NeuralFalcon's picture
Update server/subtitle.py
24011ad verified
import re
import os
import uuid
import json
def convert_time_to_srt_format(seconds):
"""Converts seconds to the standard SRT time format (HH:MM:SS,ms)."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
milliseconds = round((seconds - int(seconds)) * 1000)
if milliseconds == 1000:
milliseconds = 0
secs += 1
if secs == 60:
secs, minutes = 0, minutes + 1
if minutes == 60:
minutes, hours = 0, hours + 1
return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}"
def word_level_srt(words_timestamp, srt_path="word_level_subtitle.srt", shorts=False):
"""Generates an SRT file with one word per subtitle entry."""
punctuation = re.compile(r'[.,!?;:"\–—_~^+*|]')
with open(srt_path, 'w', encoding='utf-8') as srt_file:
for i, word_info in enumerate(words_timestamp, start=1):
start = convert_time_to_srt_format(word_info['start'])
end = convert_time_to_srt_format(word_info['end'])
word = re.sub(punctuation, '', word_info['word'])
if word.strip().lower() == 'i': word = "I"
if not shorts: word = word.replace("-", "")
srt_file.write(f"{i}\n{start} --> {end}\n{word}\n\n")
def split_line_by_char_limit(text, max_chars_per_line=38):
"""Splits a string into multiple lines based on a character limit."""
words = text.split()
lines = []
current_line = ""
for word in words:
if not current_line:
current_line = word
elif len(current_line + " " + word) <= max_chars_per_line:
current_line += " " + word
else:
lines.append(current_line)
current_line = word
if current_line:
lines.append(current_line)
return lines
def merge_punctuation_glitches(subtitles):
"""Cleans up punctuation artifacts at the boundaries of subtitle entries."""
if not subtitles:
return []
cleaned = [subtitles[0]]
for i in range(1, len(subtitles)):
prev = cleaned[-1]
curr = subtitles[i]
prev_text = prev["text"].rstrip()
curr_text = curr["text"].lstrip()
match = re.match(r'^([,.:;!?]+)(\s*)(.+)', curr_text)
if match:
punct, _, rest = match.groups()
if not prev_text.endswith(tuple(punct)):
prev["text"] = prev_text + punct
curr_text = rest.strip()
unwanted_chars = ['"', 'β€œ', '”', ';', ':']
for ch in unwanted_chars:
curr_text = curr_text.replace(ch, '')
curr_text = curr_text.strip()
if not curr_text or re.fullmatch(r'[.,!?]+', curr_text):
prev["end"] = curr["end"]
continue
curr["text"] = curr_text
prev["text"] = prev["text"].replace('"', '').replace('β€œ', '').replace('”', '')
cleaned.append(curr)
return cleaned
def write_sentence_srt(
word_level_timestamps, output_file="subtitles_professional.srt", max_lines=2,
max_duration_s=7.0, max_chars_per_line=38, hard_pause_threshold=0.5,
merge_pause_threshold=0.4
):
"""Creates professional-grade SRT files and a corresponding timestamp.json file."""
if not word_level_timestamps:
return
# Phase 1: Generate draft subtitles based on timing and length rules
draft_subtitles = []
i = 0
while i < len(word_level_timestamps):
start_time = word_level_timestamps[i]["start"]
# We'll now store the full word objects, not just the text
current_word_objects = []
j = i
while j < len(word_level_timestamps):
entry = word_level_timestamps[j]
# Create potential text from the word objects
potential_words = [w["word"] for w in current_word_objects] + [entry["word"]]
potential_text = " ".join(potential_words)
if len(split_line_by_char_limit(potential_text, max_chars_per_line)) > max_lines: break
if (entry["end"] - start_time) > max_duration_s and current_word_objects: break
if j > i:
prev_entry = word_level_timestamps[j-1]
pause = entry["start"] - prev_entry["end"]
if pause >= hard_pause_threshold: break
if prev_entry["word"].endswith(('.','!','?')): break
# Append the full word object
current_word_objects.append(entry)
j += 1
if not current_word_objects:
current_word_objects.append(word_level_timestamps[i])
j = i + 1
text = " ".join([w["word"] for w in current_word_objects])
end_time = word_level_timestamps[j - 1]["end"]
# Include the list of word objects in our draft subtitle
draft_subtitles.append({
"start": start_time,
"end": end_time,
"text": text,
"words": current_word_objects
})
i = j
# Phase 2: Post-process to merge single-word "orphan" subtitles
if not draft_subtitles: return
final_subtitles = [draft_subtitles[0]]
for k in range(1, len(draft_subtitles)):
prev_sub = final_subtitles[-1]
current_sub = draft_subtitles[k]
is_orphan = len(current_sub["text"].split()) == 1
pause_from_prev = current_sub["start"] - prev_sub["end"]
if is_orphan and pause_from_prev < merge_pause_threshold:
merged_text = prev_sub["text"] + " " + current_sub["text"]
if len(split_line_by_char_limit(merged_text, max_chars_per_line)) <= max_lines:
prev_sub["text"] = merged_text
prev_sub["end"] = current_sub["end"]
# Merge the word-level data as well
prev_sub["words"].extend(current_sub["words"])
continue
final_subtitles.append(current_sub)
final_subtitles = merge_punctuation_glitches(final_subtitles)
# This dictionary will hold the data for our JSON file
timestamps_data = {}
# Phase 3: Write the final SRT file (and prepare JSON data)
with open(output_file, "w", encoding="utf-8") as f:
for idx, sub in enumerate(final_subtitles, start=1):
# --- SRT Writing (Unchanged) ---
text = sub["text"].replace(" ,", ",").replace(" .", ".")
formatted_lines = split_line_by_char_limit(text, max_chars_per_line)
start_time_str = convert_time_to_srt_format(sub['start'])
end_time_str = convert_time_to_srt_format(sub['end'])
f.write(f"{idx}\n")
f.write(f"{start_time_str} --> {end_time_str}\n")
f.write("\n".join(formatted_lines) + "\n\n")
# Create the list of word dictionaries for the current subtitle
word_data = []
for word_obj in sub["words"]:
word_data.append({
"word": word_obj["word"],
"start": convert_time_to_srt_format(word_obj["start"]),
"end": convert_time_to_srt_format(word_obj["end"])
})
# Add the complete entry to our main dictionary
timestamps_data[str(idx)] = {
"text": "\n".join(formatted_lines),
"start": start_time_str,
"end": end_time_str,
"words": word_data
}
# Write the collected data to the JSON file
json_output_file = output_file.replace(".srt",".json")
with open(json_output_file, "w", encoding="utf-8") as f_json:
json.dump(timestamps_data, f_json, indent=4, ensure_ascii=False)
# print(f"Successfully generated SRT file: {output_file}")
# print(f"Successfully generated JSON file: {json_output_file}")
return json_output_file
def make_subtitle(word_level_timestamps,file_path):
os.makedirs("./subtitles/",exist_ok=True)
file_name = os.path.splitext(os.path.basename(file_path))[0]
unique_id = str(uuid.uuid4())[:6]
word_level_srt_file=f"./subtitles/{file_name}_subtitle_word_level_{unique_id}.srt"
sentence_srt_file=f"./subtitles/{file_name}_subtitle_sentences_{unique_id}.srt"
shorts_srt_file=f"./subtitles/{file_name}_subtitle_reels_{unique_id}.srt"
word_level_srt(
word_level_timestamps,
srt_path=word_level_srt_file,
shorts=False
)
sentence_json = write_sentence_srt(
word_level_timestamps,
output_file=sentence_srt_file,
max_lines=2,
max_duration_s=7.0,
max_chars_per_line=38,
hard_pause_threshold=0.5,
merge_pause_threshold=0.4
)
shorts_json = write_sentence_srt(
word_level_timestamps,
output_file=shorts_srt_file,
max_lines=1,
max_duration_s=2.0,
max_chars_per_line=17
)
return sentence_srt_file,word_level_srt_file,shorts_srt_file