File size: 8,852 Bytes
92e075b
 
24011ad
 
 
92e075b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24011ad
92e075b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24011ad
 
 
 
92e075b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import re
import os 
import uuid
import json

def convert_time_to_srt_format(seconds):
    """Converts seconds to the standard SRT time format (HH:MM:SS,ms)."""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    milliseconds = round((seconds - int(seconds)) * 1000)

    if milliseconds == 1000:
        milliseconds = 0
        secs += 1
        if secs == 60:
            secs, minutes = 0, minutes + 1
            if minutes == 60:
                minutes, hours = 0, hours + 1

    return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}"

def word_level_srt(words_timestamp, srt_path="word_level_subtitle.srt", shorts=False):
    """Generates an SRT file with one word per subtitle entry."""
    punctuation = re.compile(r'[.,!?;:"\–—_~^+*|]')
    with open(srt_path, 'w', encoding='utf-8') as srt_file:
        for i, word_info in enumerate(words_timestamp, start=1):
            start = convert_time_to_srt_format(word_info['start'])
            end = convert_time_to_srt_format(word_info['end'])
            word = re.sub(punctuation, '', word_info['word'])
            if word.strip().lower() == 'i': word = "I"
            if not shorts: word = word.replace("-", "")
            srt_file.write(f"{i}\n{start} --> {end}\n{word}\n\n")



def split_line_by_char_limit(text, max_chars_per_line=38):
    """Splits a string into multiple lines based on a character limit."""
    words = text.split()
    lines = []
    current_line = ""
    for word in words:
        if not current_line:
            current_line = word
        elif len(current_line + " " + word) <= max_chars_per_line:
            current_line += " " + word
        else:
            lines.append(current_line)
            current_line = word
    if current_line:
        lines.append(current_line)
    return lines

def merge_punctuation_glitches(subtitles):
    """Cleans up punctuation artifacts at the boundaries of subtitle entries."""
    if not subtitles:
        return []

    cleaned = [subtitles[0]]
    for i in range(1, len(subtitles)):
        prev = cleaned[-1]
        curr = subtitles[i]

        prev_text = prev["text"].rstrip()
        curr_text = curr["text"].lstrip()

        match = re.match(r'^([,.:;!?]+)(\s*)(.+)', curr_text)
        if match:
            punct, _, rest = match.groups()
            if not prev_text.endswith(tuple(punct)):
                prev["text"] = prev_text + punct
            curr_text = rest.strip()

        unwanted_chars = ['"', 'β€œ', '”', ';', ':']
        for ch in unwanted_chars:
            curr_text = curr_text.replace(ch, '')
        curr_text = curr_text.strip()

        if not curr_text or re.fullmatch(r'[.,!?]+', curr_text):
            prev["end"] = curr["end"]
            continue

        curr["text"] = curr_text
        prev["text"] = prev["text"].replace('"', '').replace('β€œ', '').replace('”', '')
        cleaned.append(curr)

    return cleaned


def write_sentence_srt(
    word_level_timestamps, output_file="subtitles_professional.srt", max_lines=2,
    max_duration_s=7.0, max_chars_per_line=38, hard_pause_threshold=0.5,
    merge_pause_threshold=0.4
):
    """Creates professional-grade SRT files and a corresponding timestamp.json file."""
    if not word_level_timestamps:
        return

    # Phase 1: Generate draft subtitles based on timing and length rules
    draft_subtitles = []
    i = 0
    while i < len(word_level_timestamps):
        start_time = word_level_timestamps[i]["start"]

        # We'll now store the full word objects, not just the text
        current_word_objects = []

        j = i
        while j < len(word_level_timestamps):
            entry = word_level_timestamps[j]

            # Create potential text from the word objects
            potential_words = [w["word"] for w in current_word_objects] + [entry["word"]]
            potential_text = " ".join(potential_words)

            if len(split_line_by_char_limit(potential_text, max_chars_per_line)) > max_lines: break
            if (entry["end"] - start_time) > max_duration_s and current_word_objects: break

            if j > i:
                prev_entry = word_level_timestamps[j-1]
                pause = entry["start"] - prev_entry["end"]
                if pause >= hard_pause_threshold: break
                if prev_entry["word"].endswith(('.','!','?')): break

            # Append the full word object
            current_word_objects.append(entry)
            j += 1

        if not current_word_objects:
            current_word_objects.append(word_level_timestamps[i])
            j = i + 1

        text = " ".join([w["word"] for w in current_word_objects])
        end_time = word_level_timestamps[j - 1]["end"]

        # Include the list of word objects in our draft subtitle
        draft_subtitles.append({
            "start": start_time,
            "end": end_time,
            "text": text,
            "words": current_word_objects
        })
        i = j

    # Phase 2: Post-process to merge single-word "orphan" subtitles
    if not draft_subtitles: return
    final_subtitles = [draft_subtitles[0]]
    for k in range(1, len(draft_subtitles)):
        prev_sub = final_subtitles[-1]
        current_sub = draft_subtitles[k]
        is_orphan = len(current_sub["text"].split()) == 1
        pause_from_prev = current_sub["start"] - prev_sub["end"]

        if is_orphan and pause_from_prev < merge_pause_threshold:
            merged_text = prev_sub["text"] + " " + current_sub["text"]
            if len(split_line_by_char_limit(merged_text, max_chars_per_line)) <= max_lines:
                prev_sub["text"] = merged_text
                prev_sub["end"] = current_sub["end"]

                # Merge the word-level data as well
                prev_sub["words"].extend(current_sub["words"])
                continue

        final_subtitles.append(current_sub)

    final_subtitles = merge_punctuation_glitches(final_subtitles)

    # This dictionary will hold the data for our JSON file
    timestamps_data = {}

    # Phase 3: Write the final SRT file (and prepare JSON data)
    with open(output_file, "w", encoding="utf-8") as f:
        for idx, sub in enumerate(final_subtitles, start=1):
            # --- SRT Writing (Unchanged) ---
            text = sub["text"].replace(" ,", ",").replace(" .", ".")
            formatted_lines = split_line_by_char_limit(text, max_chars_per_line)
            start_time_str = convert_time_to_srt_format(sub['start'])
            end_time_str = convert_time_to_srt_format(sub['end'])

            f.write(f"{idx}\n")
            f.write(f"{start_time_str} --> {end_time_str}\n")
            f.write("\n".join(formatted_lines) + "\n\n")

            # Create the list of word dictionaries for the current subtitle
            word_data = []
            for word_obj in sub["words"]:
                word_data.append({
                    "word": word_obj["word"],
                    "start": convert_time_to_srt_format(word_obj["start"]),
                    "end": convert_time_to_srt_format(word_obj["end"])
                })

            # Add the complete entry to our main dictionary
            timestamps_data[str(idx)] = {
                "text": "\n".join(formatted_lines),
                "start": start_time_str,
                "end": end_time_str,
                "words": word_data
            }

    # Write the collected data to the JSON file
    json_output_file = output_file.replace(".srt",".json")
    with open(json_output_file, "w", encoding="utf-8") as f_json:
        json.dump(timestamps_data, f_json, indent=4, ensure_ascii=False)

    # print(f"Successfully generated SRT file: {output_file}")
    # print(f"Successfully generated JSON file: {json_output_file}")
    return json_output_file
def make_subtitle(word_level_timestamps,file_path):
  os.makedirs("./subtitles/",exist_ok=True)
  file_name = os.path.splitext(os.path.basename(file_path))[0]
  unique_id = str(uuid.uuid4())[:6] 
  word_level_srt_file=f"./subtitles/{file_name}_subtitle_word_level_{unique_id}.srt"
  sentence_srt_file=f"./subtitles/{file_name}_subtitle_sentences_{unique_id}.srt"
  shorts_srt_file=f"./subtitles/{file_name}_subtitle_reels_{unique_id}.srt"
  word_level_srt(
      word_level_timestamps,
      srt_path=word_level_srt_file,
      shorts=False
  )

  sentence_json = write_sentence_srt(
      word_level_timestamps,
      output_file=sentence_srt_file,
      max_lines=2,
      max_duration_s=7.0,
      max_chars_per_line=38,
      hard_pause_threshold=0.5,
      merge_pause_threshold=0.4
  )

  shorts_json = write_sentence_srt(
      word_level_timestamps,
      output_file=shorts_srt_file,
      max_lines=1,
      max_duration_s=2.0,
      max_chars_per_line=17
  )
  return sentence_srt_file,word_level_srt_file,shorts_srt_file