Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#
|
| 2 |
import os
|
| 3 |
import gc
|
| 4 |
import torch
|
|
@@ -28,7 +28,7 @@ def setup_cookies():
|
|
| 28 |
# Call cookie setup when app starts
|
| 29 |
setup_cookies()
|
| 30 |
|
| 31 |
-
# Hugging Face Login
|
| 32 |
hf_token = os.getenv('HF_TOKEN')
|
| 33 |
if hf_token:
|
| 34 |
login(hf_token, add_to_git_credential=True)
|
|
@@ -93,7 +93,7 @@ def transcription_whisper(source):
|
|
| 93 |
torch.cuda.empty_cache()
|
| 94 |
return formatted_output, result
|
| 95 |
|
| 96 |
-
#
|
| 97 |
system_prompt = """
|
| 98 |
You are an expert assistant that generates clear, concise, and well-structured
|
| 99 |
Minutes of Meeting (MOM) documents from raw meeting transcripts.
|
|
@@ -143,7 +143,7 @@ quant_config = BitsAndBytesConfig(
|
|
| 143 |
bnb_4bit_quant_type='nf4'
|
| 144 |
)
|
| 145 |
|
| 146 |
-
# Generate MOM
|
| 147 |
def generate(model_name, source):
|
| 148 |
messages = messages_for(source)
|
| 149 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
@@ -177,7 +177,7 @@ def generate(model_name, source):
|
|
| 177 |
|
| 178 |
yield mom_output.strip()
|
| 179 |
|
| 180 |
-
# Translation Functions
|
| 181 |
def valid_language(lang):
|
| 182 |
return bool(
|
| 183 |
pycountry.languages.get(name=lang.capitalize()) or
|
|
@@ -185,6 +185,7 @@ def valid_language(lang):
|
|
| 185 |
pycountry.languages.get(alpha_3=lang.lower())
|
| 186 |
)
|
| 187 |
|
|
|
|
| 188 |
system_prompt_translate = "You are a translation assistant. Given a target language and some content, translate the content accurately into that language, preserving meaning, tone, and style, and return only the translated text. Also maintain proper format."
|
| 189 |
|
| 190 |
def user_prompt_translate(source, lang):
|
|
@@ -306,7 +307,7 @@ def translate_transcribe_gemma(Gemma, source, lang):
|
|
| 306 |
|
| 307 |
yield translate_output
|
| 308 |
|
| 309 |
-
# Optimization Functions
|
| 310 |
def optimize(model_name, source):
|
| 311 |
if model_name == 'LLAMA':
|
| 312 |
result = generate(LLAMA, source)
|
|
@@ -319,7 +320,8 @@ def optimize(model_name, source):
|
|
| 319 |
|
| 320 |
for chunk in result:
|
| 321 |
yield chunk
|
| 322 |
-
|
|
|
|
| 323 |
def optimize_translate(model_name, source, lang):
|
| 324 |
if model_name == 'LLAMA':
|
| 325 |
translate = translate_transcribe(LLAMA, source, lang)
|
|
@@ -335,7 +337,7 @@ def optimize_translate(model_name, source, lang):
|
|
| 335 |
for chunk_tr in translate:
|
| 336 |
yield chunk_tr
|
| 337 |
|
| 338 |
-
# Helper
|
| 339 |
def get_source_input(file, link):
|
| 340 |
if file is not None:
|
| 341 |
return file.name if hasattr(file, 'name') else file
|
|
@@ -409,7 +411,7 @@ with gr.Blocks(css=css) as ui:
|
|
| 409 |
for result in optimize_translate(model, source, lang):
|
| 410 |
yield result
|
| 411 |
|
| 412 |
-
# Event handlers with file
|
| 413 |
transcribe.click(
|
| 414 |
fn=lambda file, link: transcription_whisper(get_source_input(file, link))[0],
|
| 415 |
inputs=[input_file, input_link],
|
|
@@ -430,4 +432,4 @@ with gr.Blocks(css=css) as ui:
|
|
| 430 |
|
| 431 |
# Launch the app
|
| 432 |
if __name__ == "__main__":
|
| 433 |
-
ui.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 1 |
+
# Import Libraries
|
| 2 |
import os
|
| 3 |
import gc
|
| 4 |
import torch
|
|
|
|
| 28 |
# Call cookie setup when app starts
|
| 29 |
setup_cookies()
|
| 30 |
|
| 31 |
+
# Hugging Face Login Setup
|
| 32 |
hf_token = os.getenv('HF_TOKEN')
|
| 33 |
if hf_token:
|
| 34 |
login(hf_token, add_to_git_credential=True)
|
|
|
|
| 93 |
torch.cuda.empty_cache()
|
| 94 |
return formatted_output, result
|
| 95 |
|
| 96 |
+
# Prompts
|
| 97 |
system_prompt = """
|
| 98 |
You are an expert assistant that generates clear, concise, and well-structured
|
| 99 |
Minutes of Meeting (MOM) documents from raw meeting transcripts.
|
|
|
|
| 143 |
bnb_4bit_quant_type='nf4'
|
| 144 |
)
|
| 145 |
|
| 146 |
+
# Generate MOM / Summarization
|
| 147 |
def generate(model_name, source):
|
| 148 |
messages = messages_for(source)
|
| 149 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
|
|
| 177 |
|
| 178 |
yield mom_output.strip()
|
| 179 |
|
| 180 |
+
# Translation Functions : Valid Language or Not
|
| 181 |
def valid_language(lang):
|
| 182 |
return bool(
|
| 183 |
pycountry.languages.get(name=lang.capitalize()) or
|
|
|
|
| 185 |
pycountry.languages.get(alpha_3=lang.lower())
|
| 186 |
)
|
| 187 |
|
| 188 |
+
# Translate Prompts
|
| 189 |
system_prompt_translate = "You are a translation assistant. Given a target language and some content, translate the content accurately into that language, preserving meaning, tone, and style, and return only the translated text. Also maintain proper format."
|
| 190 |
|
| 191 |
def user_prompt_translate(source, lang):
|
|
|
|
| 307 |
|
| 308 |
yield translate_output
|
| 309 |
|
| 310 |
+
# Optimization Functions for MOM
|
| 311 |
def optimize(model_name, source):
|
| 312 |
if model_name == 'LLAMA':
|
| 313 |
result = generate(LLAMA, source)
|
|
|
|
| 320 |
|
| 321 |
for chunk in result:
|
| 322 |
yield chunk
|
| 323 |
+
|
| 324 |
+
# Optimization Functions for Translation
|
| 325 |
def optimize_translate(model_name, source, lang):
|
| 326 |
if model_name == 'LLAMA':
|
| 327 |
translate = translate_transcribe(LLAMA, source, lang)
|
|
|
|
| 337 |
for chunk_tr in translate:
|
| 338 |
yield chunk_tr
|
| 339 |
|
| 340 |
+
# Helper Functon for Gradio UI
|
| 341 |
def get_source_input(file, link):
|
| 342 |
if file is not None:
|
| 343 |
return file.name if hasattr(file, 'name') else file
|
|
|
|
| 411 |
for result in optimize_translate(model, source, lang):
|
| 412 |
yield result
|
| 413 |
|
| 414 |
+
# Event handlers with file or link support
|
| 415 |
transcribe.click(
|
| 416 |
fn=lambda file, link: transcription_whisper(get_source_input(file, link))[0],
|
| 417 |
inputs=[input_file, input_link],
|
|
|
|
| 432 |
|
| 433 |
# Launch the app
|
| 434 |
if __name__ == "__main__":
|
| 435 |
+
ui.launch(server_name="0.0.0.0", server_port=7860)
|