Spaces:
Sleeping
Sleeping
| import re | |
| import os | |
| import uuid | |
| import json | |
| def convert_time_to_srt_format(seconds): | |
| """Converts seconds to the standard SRT time format (HH:MM:SS,ms).""" | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| secs = int(seconds % 60) | |
| milliseconds = round((seconds - int(seconds)) * 1000) | |
| if milliseconds == 1000: | |
| milliseconds = 0 | |
| secs += 1 | |
| if secs == 60: | |
| secs, minutes = 0, minutes + 1 | |
| if minutes == 60: | |
| minutes, hours = 0, hours + 1 | |
| return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}" | |
| def word_level_srt(words_timestamp, srt_path="word_level_subtitle.srt", shorts=False): | |
| """Generates an SRT file with one word per subtitle entry.""" | |
| punctuation = re.compile(r'[.,!?;:"\ββ_~^+*|]') | |
| with open(srt_path, 'w', encoding='utf-8') as srt_file: | |
| for i, word_info in enumerate(words_timestamp, start=1): | |
| start = convert_time_to_srt_format(word_info['start']) | |
| end = convert_time_to_srt_format(word_info['end']) | |
| word = re.sub(punctuation, '', word_info['word']) | |
| if word.strip().lower() == 'i': word = "I" | |
| if not shorts: word = word.replace("-", "") | |
| srt_file.write(f"{i}\n{start} --> {end}\n{word}\n\n") | |
| def split_line_by_char_limit(text, max_chars_per_line=38): | |
| """Splits a string into multiple lines based on a character limit.""" | |
| words = text.split() | |
| lines = [] | |
| current_line = "" | |
| for word in words: | |
| if not current_line: | |
| current_line = word | |
| elif len(current_line + " " + word) <= max_chars_per_line: | |
| current_line += " " + word | |
| else: | |
| lines.append(current_line) | |
| current_line = word | |
| if current_line: | |
| lines.append(current_line) | |
| return lines | |
| def merge_punctuation_glitches(subtitles): | |
| """Cleans up punctuation artifacts at the boundaries of subtitle entries.""" | |
| if not subtitles: | |
| return [] | |
| cleaned = [subtitles[0]] | |
| for i in range(1, len(subtitles)): | |
| prev = cleaned[-1] | |
| curr = subtitles[i] | |
| prev_text = prev["text"].rstrip() | |
| curr_text = curr["text"].lstrip() | |
| match = re.match(r'^([,.:;!?]+)(\s*)(.+)', curr_text) | |
| if match: | |
| punct, _, rest = match.groups() | |
| if not prev_text.endswith(tuple(punct)): | |
| prev["text"] = prev_text + punct | |
| curr_text = rest.strip() | |
| unwanted_chars = ['"', 'β', 'β', ';', ':'] | |
| for ch in unwanted_chars: | |
| curr_text = curr_text.replace(ch, '') | |
| curr_text = curr_text.strip() | |
| if not curr_text or re.fullmatch(r'[.,!?]+', curr_text): | |
| prev["end"] = curr["end"] | |
| continue | |
| curr["text"] = curr_text | |
| prev["text"] = prev["text"].replace('"', '').replace('β', '').replace('β', '') | |
| cleaned.append(curr) | |
| return cleaned | |
| def write_sentence_srt( | |
| word_level_timestamps, output_file="subtitles_professional.srt", max_lines=2, | |
| max_duration_s=7.0, max_chars_per_line=38, hard_pause_threshold=0.5, | |
| merge_pause_threshold=0.4 | |
| ): | |
| """Creates professional-grade SRT files and a corresponding timestamp.json file.""" | |
| if not word_level_timestamps: | |
| return | |
| # Phase 1: Generate draft subtitles based on timing and length rules | |
| draft_subtitles = [] | |
| i = 0 | |
| while i < len(word_level_timestamps): | |
| start_time = word_level_timestamps[i]["start"] | |
| # We'll now store the full word objects, not just the text | |
| current_word_objects = [] | |
| j = i | |
| while j < len(word_level_timestamps): | |
| entry = word_level_timestamps[j] | |
| # Create potential text from the word objects | |
| potential_words = [w["word"] for w in current_word_objects] + [entry["word"]] | |
| potential_text = " ".join(potential_words) | |
| if len(split_line_by_char_limit(potential_text, max_chars_per_line)) > max_lines: break | |
| if (entry["end"] - start_time) > max_duration_s and current_word_objects: break | |
| if j > i: | |
| prev_entry = word_level_timestamps[j-1] | |
| pause = entry["start"] - prev_entry["end"] | |
| if pause >= hard_pause_threshold: break | |
| if prev_entry["word"].endswith(('.','!','?')): break | |
| # Append the full word object | |
| current_word_objects.append(entry) | |
| j += 1 | |
| if not current_word_objects: | |
| current_word_objects.append(word_level_timestamps[i]) | |
| j = i + 1 | |
| text = " ".join([w["word"] for w in current_word_objects]) | |
| end_time = word_level_timestamps[j - 1]["end"] | |
| # Include the list of word objects in our draft subtitle | |
| draft_subtitles.append({ | |
| "start": start_time, | |
| "end": end_time, | |
| "text": text, | |
| "words": current_word_objects | |
| }) | |
| i = j | |
| # Phase 2: Post-process to merge single-word "orphan" subtitles | |
| if not draft_subtitles: return | |
| final_subtitles = [draft_subtitles[0]] | |
| for k in range(1, len(draft_subtitles)): | |
| prev_sub = final_subtitles[-1] | |
| current_sub = draft_subtitles[k] | |
| is_orphan = len(current_sub["text"].split()) == 1 | |
| pause_from_prev = current_sub["start"] - prev_sub["end"] | |
| if is_orphan and pause_from_prev < merge_pause_threshold: | |
| merged_text = prev_sub["text"] + " " + current_sub["text"] | |
| if len(split_line_by_char_limit(merged_text, max_chars_per_line)) <= max_lines: | |
| prev_sub["text"] = merged_text | |
| prev_sub["end"] = current_sub["end"] | |
| # Merge the word-level data as well | |
| prev_sub["words"].extend(current_sub["words"]) | |
| continue | |
| final_subtitles.append(current_sub) | |
| final_subtitles = merge_punctuation_glitches(final_subtitles) | |
| # This dictionary will hold the data for our JSON file | |
| timestamps_data = {} | |
| # Phase 3: Write the final SRT file (and prepare JSON data) | |
| with open(output_file, "w", encoding="utf-8") as f: | |
| for idx, sub in enumerate(final_subtitles, start=1): | |
| # --- SRT Writing (Unchanged) --- | |
| text = sub["text"].replace(" ,", ",").replace(" .", ".") | |
| formatted_lines = split_line_by_char_limit(text, max_chars_per_line) | |
| start_time_str = convert_time_to_srt_format(sub['start']) | |
| end_time_str = convert_time_to_srt_format(sub['end']) | |
| f.write(f"{idx}\n") | |
| f.write(f"{start_time_str} --> {end_time_str}\n") | |
| f.write("\n".join(formatted_lines) + "\n\n") | |
| # Create the list of word dictionaries for the current subtitle | |
| word_data = [] | |
| for word_obj in sub["words"]: | |
| word_data.append({ | |
| "word": word_obj["word"], | |
| "start": convert_time_to_srt_format(word_obj["start"]), | |
| "end": convert_time_to_srt_format(word_obj["end"]) | |
| }) | |
| # Add the complete entry to our main dictionary | |
| timestamps_data[str(idx)] = { | |
| "text": "\n".join(formatted_lines), | |
| "start": start_time_str, | |
| "end": end_time_str, | |
| "words": word_data | |
| } | |
| # Write the collected data to the JSON file | |
| json_output_file = output_file.replace(".srt",".json") | |
| with open(json_output_file, "w", encoding="utf-8") as f_json: | |
| json.dump(timestamps_data, f_json, indent=4, ensure_ascii=False) | |
| # print(f"Successfully generated SRT file: {output_file}") | |
| # print(f"Successfully generated JSON file: {json_output_file}") | |
| return json_output_file | |
| def make_subtitle(word_level_timestamps,file_path): | |
| os.makedirs("./subtitles/",exist_ok=True) | |
| file_name = os.path.splitext(os.path.basename(file_path))[0] | |
| unique_id = str(uuid.uuid4())[:6] | |
| word_level_srt_file=f"./subtitles/{file_name}_subtitle_word_level_{unique_id}.srt" | |
| sentence_srt_file=f"./subtitles/{file_name}_subtitle_sentences_{unique_id}.srt" | |
| shorts_srt_file=f"./subtitles/{file_name}_subtitle_reels_{unique_id}.srt" | |
| word_level_srt( | |
| word_level_timestamps, | |
| srt_path=word_level_srt_file, | |
| shorts=False | |
| ) | |
| sentence_json = write_sentence_srt( | |
| word_level_timestamps, | |
| output_file=sentence_srt_file, | |
| max_lines=2, | |
| max_duration_s=7.0, | |
| max_chars_per_line=38, | |
| hard_pause_threshold=0.5, | |
| merge_pause_threshold=0.4 | |
| ) | |
| shorts_json = write_sentence_srt( | |
| word_level_timestamps, | |
| output_file=shorts_srt_file, | |
| max_lines=1, | |
| max_duration_s=2.0, | |
| max_chars_per_line=17 | |
| ) | |
| return sentence_srt_file,word_level_srt_file,shorts_srt_file | |