Spaces:

TaiYouWeb
/

whisper-multi-model

Build error

App Files Files

whisper-multi-model / subtitle.py

TaiYouWeb

Initial Commit

83922b2 about 1 year ago

raw

history blame

3.61 kB

	class Subtitle:
	def __init__(self, ext="srt"):
	sub_dict = {
	"srt": {
	"coma": ",",
	"header": "",
	"format": self._srt_format,
	},
	"vtt": {
	"coma": ".",
	"header": "WebVTT\n\n",
	"format": self._vtt_format,
	},
	"txt": {
	"coma": "",
	"header": "",
	"format": self._txt_format,
	},
	"lrc": {
	"coma": "",
	"header": "",
	"format": self._lrc_format,
	},
	}

	self.ext = ext
	self.coma = sub_dict[ext]["coma"]
	self.header = sub_dict[ext]["header"]
	self.format_fn = sub_dict[ext]["format"]

	def timeformat(self, time):
	hours, remainder = divmod(time, 3600)
	minutes, seconds = divmod(remainder, 60)
	milliseconds = (time - int(time)) * 1000
	return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}{self.coma}{int(milliseconds):03d}"

	def seconds_to_lrc_timestamp(self, time):
	minutes = int(time // 60)
	secs = time % 60
	return f"[{minutes:02}:{secs:06.3f}]"

	def _srt_format(self, i, segment):
	start_time = self.timeformat(segment['timestamp'][0])
	end_time = self.timeformat(segment['timestamp'][1] if segment['timestamp'][1] else segment['timestamp'][0])
	return f"{i + 1}\n{start_time} --> {end_time}\n{segment['text']}\n\n"

	def _vtt_format(self, i, segment):
	start_time = self.timeformat(segment['timestamp'][0])
	end_time = self.timeformat(segment['timestamp'][1] if segment['timestamp'][1] else segment['timestamp'][0])
	return f"{start_time} --> {end_time}\n{segment['text']}\n\n"

	def _txt_format(self, i, segment):
	return f"{segment['text']}\n"

	def _lrc_format(self, i, segment):
	start_time = self.seconds_to_lrc_timestamp(segment['timestamp'][0])
	return f"{start_time}{segment['text']}\n"

	def get_subtitle(self, segments):
	output = self.header
	for i, segment in enumerate(segments):
	segment['text'] = segment['text'].lstrip()
	try:
	output += self.format_fn(i, segment)
	except Exception as e:
	print(e, segment)
	return output

	def write_subtitle(self, segments, output_file):
	output_file_with_ext = f"{output_file}.{self.ext}"
	subtitle = self.get_subtitle(segments)

	with open(output_file_with_ext, 'w', encoding='utf-8') as f:
	f.write(subtitle)

	def write_file(output_file,subtitle):
	with open(output_file, 'w', encoding='utf-8') as f:
	f.write(subtitle)

	def subtitle_output(inputs, chunks):
	file_name = inputs.split('/')[-1].split('.')[0]
	lrc_sub = Subtitle("lrc")
	srt_sub = Subtitle("srt")
	vtt_sub = Subtitle("vtt")
	txt_sub = Subtitle("txt")
	lrc = lrc_sub.get_subtitle(chunks)
	srt = srt_sub.get_subtitle(chunks)
	vtt = vtt_sub.get_subtitle(chunks)
	txt = txt_sub.get_subtitle(chunks)
	write_file(file_name+".lrc",lrc)
	write_file(file_name+".srt",srt)
	write_file(file_name+".vtt",vtt)
	write_file(file_name+".txt",txt)
	files_out = [file_name+".lrc", file_name+".srt", file_name+".vtt", file_name+".txt"]
	return lrc, files_out

	def text_output(inputs, text):
	file_name = inputs.split('/')[-1].split('.')[0]
	write_file(file_name+".txt",text)
	files_out = [file_name+".txt"]
	return text, files_out