Spaces:

Ayushnangia
/

Whispercpp_yt

Paused

App Files Files Community

Whispercpp_yt / app.py

Ayushnangia

updated markdown

da3774a about 2 years ago

raw

history blame contribute delete

2.34 kB

	import gradio as gr
	import yt_dlp as ydlp
	from transformers import pipeline

	from whispercpp import Whisper
	summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")

	def download_audio(youtube_url, output_folder='.'):
	ydl_opts = {
	'format': 'bestaudio/best',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'wav',
	'preferredquality': '192',
	}],
	'outtmpl': f'{output_folder}/audio',
	}

	with ydlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([youtube_url])


	w = Whisper('tiny')


	def process_general_transcription(transcription):
	formatted_transcription = []

	for line in transcription:
	if line.startswith('[') and line.endswith(']'):
	formatted_transcription.append(f'\n--- {line[1:-1].upper()} ---\n')
	else:
	formatted_transcription.append(line)

	transcript_str = "\n".join(formatted_transcription)

	return transcript_str
	def chunk_to_tokens(text, n):
	tokens = text.split()
	max_chunk_size = min(len(tokens), 512)

	token_size = max(1, int(max_chunk_size * (1 - n / 100)))

	chunks = [" ".join(tokens[i:i + token_size]) for i in range(0, len(tokens), token_size)]

	return chunks
	def summarizing(text,n):
	valid_tok=chunk_to_tokens(text,n)
	res=""
	for i in valid_tok:
	res+=summarizer(i)[0]['summary_text']+'\n'
	return res
	def transcribe_sum_youtube(youtube_url,n):
	download_audio(youtube_url)
	result = w.transcribe("audio.wav")
	text = w.extract_text(result)
	res=process_general_transcription(text)
	return summarizing(res,n)


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# CPP Whisperer - YouTube Videos Summarizer

	""")
	with gr.Row():
	with gr.Column():

	inp = gr.Textbox(label="Youtube Url",placeholder="Insert YT Url here")
	inp2 = gr.Slider(label="Summarization Percentage",min_value=0,max_value=100,step_size=1)
	result_button_transcribe = gr.Button('Transcribe and Summarize')

	with gr.Column():
	out = gr.Textbox(label="Transcribed and Summarize Text")


	result_button_transcribe.click(transcribe_sum_youtube, inputs = [inp,inp2] , outputs = out)


	demo.launch()