Spaces:

NeuralFalcon
/

Omnilingual-ASR-Colab

Sleeping

App Files Files Community

Omnilingual-ASR-Colab / server /subtitle.py

NeuralFalcon

Update server/subtitle.py

24011ad verified about 1 month ago

raw

history blame contribute delete

8.85 kB

	import re
	import os
	import uuid
	import json

	def convert_time_to_srt_format(seconds):
	"""Converts seconds to the standard SRT time format (HH:MM:SS,ms)."""
	hours = int(seconds // 3600)
	minutes = int((seconds % 3600) // 60)
	secs = int(seconds % 60)
	milliseconds = round((seconds - int(seconds)) * 1000)

	if milliseconds == 1000:
	milliseconds = 0
	secs += 1
	if secs == 60:
	secs, minutes = 0, minutes + 1
	if minutes == 60:
	minutes, hours = 0, hours + 1

	return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}"

	def word_level_srt(words_timestamp, srt_path="word_level_subtitle.srt", shorts=False):
	"""Generates an SRT file with one word per subtitle entry."""
	punctuation = re.compile(r'[.,!?;:"\–—_~^+*\|]')
	with open(srt_path, 'w', encoding='utf-8') as srt_file:
	for i, word_info in enumerate(words_timestamp, start=1):
	start = convert_time_to_srt_format(word_info['start'])
	end = convert_time_to_srt_format(word_info['end'])
	word = re.sub(punctuation, '', word_info['word'])
	if word.strip().lower() == 'i': word = "I"
	if not shorts: word = word.replace("-", "")
	srt_file.write(f"{i}\n{start} --> {end}\n{word}\n\n")



	def split_line_by_char_limit(text, max_chars_per_line=38):
	"""Splits a string into multiple lines based on a character limit."""
	words = text.split()
	lines = []
	current_line = ""
	for word in words:
	if not current_line:
	current_line = word
	elif len(current_line + " " + word) <= max_chars_per_line:
	current_line += " " + word
	else:
	lines.append(current_line)
	current_line = word
	if current_line:
	lines.append(current_line)
	return lines

	def merge_punctuation_glitches(subtitles):
	"""Cleans up punctuation artifacts at the boundaries of subtitle entries."""
	if not subtitles:
	return []

	cleaned = [subtitles[0]]
	for i in range(1, len(subtitles)):
	prev = cleaned[-1]
	curr = subtitles[i]

	prev_text = prev["text"].rstrip()
	curr_text = curr["text"].lstrip()

	match = re.match(r'^([,.:;!?]+)(\s*)(.+)', curr_text)
	if match:
	punct, _, rest = match.groups()
	if not prev_text.endswith(tuple(punct)):
	prev["text"] = prev_text + punct
	curr_text = rest.strip()

	unwanted_chars = ['"', '“', '”', ';', ':']
	for ch in unwanted_chars:
	curr_text = curr_text.replace(ch, '')
	curr_text = curr_text.strip()

	if not curr_text or re.fullmatch(r'[.,!?]+', curr_text):
	prev["end"] = curr["end"]
	continue

	curr["text"] = curr_text
	prev["text"] = prev["text"].replace('"', '').replace('“', '').replace('”', '')
	cleaned.append(curr)

	return cleaned


	def write_sentence_srt(
	word_level_timestamps, output_file="subtitles_professional.srt", max_lines=2,
	max_duration_s=7.0, max_chars_per_line=38, hard_pause_threshold=0.5,
	merge_pause_threshold=0.4
	):
	"""Creates professional-grade SRT files and a corresponding timestamp.json file."""
	if not word_level_timestamps:
	return

	# Phase 1: Generate draft subtitles based on timing and length rules
	draft_subtitles = []
	i = 0
	while i < len(word_level_timestamps):
	start_time = word_level_timestamps[i]["start"]

	# We'll now store the full word objects, not just the text
	current_word_objects = []

	j = i
	while j < len(word_level_timestamps):
	entry = word_level_timestamps[j]

	# Create potential text from the word objects
	potential_words = [w["word"] for w in current_word_objects] + [entry["word"]]
	potential_text = " ".join(potential_words)

	if len(split_line_by_char_limit(potential_text, max_chars_per_line)) > max_lines: break
	if (entry["end"] - start_time) > max_duration_s and current_word_objects: break

	if j > i:
	prev_entry = word_level_timestamps[j-1]
	pause = entry["start"] - prev_entry["end"]
	if pause >= hard_pause_threshold: break
	if prev_entry["word"].endswith(('.','!','?')): break

	# Append the full word object
	current_word_objects.append(entry)
	j += 1

	if not current_word_objects:
	current_word_objects.append(word_level_timestamps[i])
	j = i + 1

	text = " ".join([w["word"] for w in current_word_objects])
	end_time = word_level_timestamps[j - 1]["end"]

	# Include the list of word objects in our draft subtitle
	draft_subtitles.append({
	"start": start_time,
	"end": end_time,
	"text": text,
	"words": current_word_objects
	})
	i = j

	# Phase 2: Post-process to merge single-word "orphan" subtitles
	if not draft_subtitles: return
	final_subtitles = [draft_subtitles[0]]
	for k in range(1, len(draft_subtitles)):
	prev_sub = final_subtitles[-1]
	current_sub = draft_subtitles[k]
	is_orphan = len(current_sub["text"].split()) == 1
	pause_from_prev = current_sub["start"] - prev_sub["end"]

	if is_orphan and pause_from_prev < merge_pause_threshold:
	merged_text = prev_sub["text"] + " " + current_sub["text"]
	if len(split_line_by_char_limit(merged_text, max_chars_per_line)) <= max_lines:
	prev_sub["text"] = merged_text
	prev_sub["end"] = current_sub["end"]

	# Merge the word-level data as well
	prev_sub["words"].extend(current_sub["words"])
	continue

	final_subtitles.append(current_sub)

	final_subtitles = merge_punctuation_glitches(final_subtitles)

	# This dictionary will hold the data for our JSON file
	timestamps_data = {}

	# Phase 3: Write the final SRT file (and prepare JSON data)
	with open(output_file, "w", encoding="utf-8") as f:
	for idx, sub in enumerate(final_subtitles, start=1):
	# --- SRT Writing (Unchanged) ---
	text = sub["text"].replace(" ,", ",").replace(" .", ".")
	formatted_lines = split_line_by_char_limit(text, max_chars_per_line)
	start_time_str = convert_time_to_srt_format(sub['start'])
	end_time_str = convert_time_to_srt_format(sub['end'])

	f.write(f"{idx}\n")
	f.write(f"{start_time_str} --> {end_time_str}\n")
	f.write("\n".join(formatted_lines) + "\n\n")

	# Create the list of word dictionaries for the current subtitle
	word_data = []
	for word_obj in sub["words"]:
	word_data.append({
	"word": word_obj["word"],
	"start": convert_time_to_srt_format(word_obj["start"]),
	"end": convert_time_to_srt_format(word_obj["end"])
	})

	# Add the complete entry to our main dictionary
	timestamps_data[str(idx)] = {
	"text": "\n".join(formatted_lines),
	"start": start_time_str,
	"end": end_time_str,
	"words": word_data
	}

	# Write the collected data to the JSON file
	json_output_file = output_file.replace(".srt",".json")
	with open(json_output_file, "w", encoding="utf-8") as f_json:
	json.dump(timestamps_data, f_json, indent=4, ensure_ascii=False)

	# print(f"Successfully generated SRT file: {output_file}")
	# print(f"Successfully generated JSON file: {json_output_file}")
	return json_output_file
	def make_subtitle(word_level_timestamps,file_path):
	os.makedirs("./subtitles/",exist_ok=True)
	file_name = os.path.splitext(os.path.basename(file_path))[0]
	unique_id = str(uuid.uuid4())[:6]
	word_level_srt_file=f"./subtitles/{file_name}_subtitle_word_level_{unique_id}.srt"
	sentence_srt_file=f"./subtitles/{file_name}_subtitle_sentences_{unique_id}.srt"
	shorts_srt_file=f"./subtitles/{file_name}_subtitle_reels_{unique_id}.srt"
	word_level_srt(
	word_level_timestamps,
	srt_path=word_level_srt_file,
	shorts=False
	)

	sentence_json = write_sentence_srt(
	word_level_timestamps,
	output_file=sentence_srt_file,
	max_lines=2,
	max_duration_s=7.0,
	max_chars_per_line=38,
	hard_pause_threshold=0.5,
	merge_pause_threshold=0.4
	)

	shorts_json = write_sentence_srt(
	word_level_timestamps,
	output_file=shorts_srt_file,
	max_lines=1,
	max_duration_s=2.0,
	max_chars_per_line=17
	)
	return sentence_srt_file,word_level_srt_file,shorts_srt_file