taruschirag commited on
Commit
d6fa859
·
verified ·
1 Parent(s): 38e8f2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -57
app.py CHANGED
@@ -1,28 +1,39 @@
1
  import os
2
-
3
- # --- CRITICAL NEW LINES TO DISABLE SSR ---
4
- # These must be set BEFORE importing gradio.
5
- os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
6
- os.environ["GRADIO_SERVER_PORT"] = "7860"
7
- os.environ["GRADIO_TEMP_DIR"] = "/tmp"
8
- os.environ["GRADIO_ENABLE_SSR"] = "0" # THIS IS THE KEY LINE!
9
-
10
  import gradio as gr
11
  import torch
12
  from transformers import AutoModelForCausalLM, AutoTokenizer
13
- from datasets import load_dataset # Keep this for get_example, though not used in compliance_check currently
14
  from huggingface_hub import login
15
 
 
16
  HF_READONLY_API_KEY = os.getenv("HF_READONLY_API_KEY")
17
- if HF_READONLY_API_KEY: # Only login if key exists
18
  login(token=HF_READONLY_API_KEY)
19
 
20
- COT_OPENING = "<think>"
21
- EXPLANATION_OPENING = "<explanation>"
22
- LABEL_OPENING = "<answer>"
23
- LABEL_CLOSING = "</answer>"
24
- INPUT_FIELD = "question"
25
  SYSTEM_PROMPT = """You are a guardian model evaluating…</explanation>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  def format_rules(rules):
28
  formatted_rules = "<rules>\n"
@@ -35,61 +46,70 @@ def format_transcript(transcript):
35
  formatted_transcript = f"<transcript>\n{transcript}\n</transcript>\n"
36
  return formatted_transcript
37
 
38
- # Keep get_example and ModelWrapper as they are for now
39
- # We'll use the dummy compliance_check to isolate the SSR issue first.
40
-
41
- # --- Model Loading (Moved outside ModelWrapper to simplify for this test) ---
42
- # This part is fine and will load once on startup.
43
- print("Loading model and tokenizer...")
44
- MODEL_NAME = "Qwen/Qwen3-0.6B"
45
- # Instantiate tokenizer directly
46
- if "nemoguard" in MODEL_NAME:
47
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
48
- else:
49
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
50
- tokenizer.pad_token_id = tokenizer.pad_token_id or tokenizer.eos_token_id
51
-
52
- # Instantiate model directly
53
- model = AutoModelForCausalLM.from_pretrained(
54
- MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16).eval()
55
- print("Model and tokenizer loaded successfully.")
56
-
57
- # --- TEMPORARY DEBUGGING CODE - The Dummy compliance_check ---
58
- # Keep this dummy function here for now. If this works, then we can bring back
59
- # your full, robust compliance_check logic.
60
  def compliance_check(rules_text, transcript_text, thinking):
61
  """
62
- A temporary, simple function for debugging. It does NOT call the AI model.
 
63
  """
64
- print("--- RUNNING IN DEBUG MODE: The AI model is currently bypassed. ---")
65
-
66
- # We will just return a simple, safe string.
67
- # This removes all complexity from the model output, parsing, and truncation.
68
- dummy_response = "This is a test response from the debug function. If you see this and no h11 error, the app is stable without SSR."
69
-
70
- print(f"Returning dummy response: '{dummy_response}'")
71
-
72
- return dummy_response
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- # Removed safe_truncate_to_bytes and ModelWrapper class for this specific test
75
- # as they are not used by the dummy function and might add unnecessary complexity
76
- # for this isolation step. We will add them back if this test passes.
 
 
 
 
 
 
 
 
 
 
 
77
 
 
 
 
 
78
 
79
- # build Gradio interface with current configuration —
 
80
  demo = gr.Interface(
81
  fn=compliance_check,
82
  inputs=[
83
- gr.Textbox(lines=5, label="Rules (one per line)"),
84
- gr.Textbox(lines=10, label="Transcript"),
85
  gr.Checkbox(label="Enable ⟨think⟩ mode", value=True)
86
  ],
87
- outputs=gr.Textbox(label="Compliance Output", lines=10),
88
  title="DynaGuard Compliance Checker",
89
- description="Paste your rules & transcript, then hit Submit.",
90
- allow_flagging="never" # This is fine for now, will generate a warning but not crash
 
91
  )
92
 
 
93
  if __name__ == "__main__":
94
- # Remove _frontend=False here, as the environment variable should now handle it.
95
  demo.launch()
 
1
  import os
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
5
  from huggingface_hub import login
6
 
7
+ # --- Basic Setup ---
8
  HF_READONLY_API_KEY = os.getenv("HF_READONLY_API_KEY")
9
+ if HF_READONLY_API_KEY:
10
  login(token=HF_READONLY_API_KEY)
11
 
 
 
 
 
 
12
  SYSTEM_PROMPT = """You are a guardian model evaluating…</explanation>"""
13
+ MODEL_NAME = "Qwen/Qwen3-0.6B"
14
+
15
+ # --- LAZY LOADING SETUP ---
16
+ # We initialize the model and tokenizer as None. They will be loaded on the first call.
17
+ model = None
18
+ tokenizer = None
19
+
20
+ def load_model_and_tokenizer():
21
+ """
22
+ Loads the model and tokenizer if they haven't been loaded yet.
23
+ This function will only run its main logic once.
24
+ """
25
+ global model, tokenizer
26
+ if model is None or tokenizer is None:
27
+ print("--- LAZY LOADING: Loading model and tokenizer for the first time... ---")
28
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
29
+ tokenizer.pad_token_id = tokenizer.pad_token_id or tokenizer.eos_token_id
30
+
31
+ model = AutoModelForCausalLM.from_pretrained(
32
+ MODEL_NAME,
33
+ device_map="auto",
34
+ torch_dtype=torch.bfloat16
35
+ ).eval()
36
+ print("--- Model and tokenizer loaded successfully. ---")
37
 
38
  def format_rules(rules):
39
  formatted_rules = "<rules>\n"
 
46
  formatted_transcript = f"<transcript>\n{transcript}\n</transcript>\n"
47
  return formatted_transcript
48
 
49
+ # --- The Main Gradio Function ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def compliance_check(rules_text, transcript_text, thinking):
51
  """
52
+ The main inference function for the Gradio app.
53
+ It ensures the model is loaded before running inference.
54
  """
55
+ try:
56
+ # STEP 1: Ensure the model is loaded. This will only do work on the first run.
57
+ load_model_and_tokenizer()
58
+
59
+ # STEP 2: Your original, robust input validation.
60
+ if not rules_text or not rules_text.strip():
61
+ return "Error: Please provide at least one rule."
62
+ if not transcript_text or not transcript_text.strip():
63
+ return "Error: Please provide a transcript to analyze."
64
+
65
+ # STEP 3: Format the input and generate a response.
66
+ rules = [r.strip() for r in rules_text.split("\n") if r.strip()]
67
+ inp = format_rules(rules) + format_transcript(transcript_text)
68
+
69
+ message = [
70
+ {'role': 'system', 'content': SYSTEM_PROMPT},
71
+ {'role': 'user', 'content': inp}
72
+ ]
73
+ prompt = tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=True)
74
+
75
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
76
 
77
+ with torch.no_grad():
78
+ output_content = model.generate(
79
+ **inputs,
80
+ max_new_tokens=256,
81
+ pad_token_id=tokenizer.pad_token_id,
82
+ do_sample=True,
83
+ temperature=0.6,
84
+ top_p=0.95,
85
+ )
86
+
87
+ # Decode only the newly generated part of the response.
88
+ output_text = tokenizer.decode(output_content[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
89
+
90
+ return output_text.strip()
91
 
92
+ except Exception as e:
93
+ # A simple, safe error handler.
94
+ print(f"An error occurred: {str(e)}")
95
+ return "An error occurred during processing. The application might be under heavy load or encountered a problem. Please try again."
96
 
97
+ # --- Build the Gradio Interface ---
98
+ # We keep your well-designed interface configuration.
99
  demo = gr.Interface(
100
  fn=compliance_check,
101
  inputs=[
102
+ gr.Textbox(lines=5, label="Rules (one per line)", max_lines=10, placeholder="Enter compliance rules, one per line..."),
103
+ gr.Textbox(lines=10, label="Transcript", max_lines=15, placeholder="Paste the transcript to analyze..."),
104
  gr.Checkbox(label="Enable ⟨think⟩ mode", value=True)
105
  ],
106
+ outputs=gr.Textbox(label="Compliance Output", lines=10, max_lines=15, show_copy_button=True),
107
  title="DynaGuard Compliance Checker",
108
+ description="Paste your rules & transcript, then hit Submit. The model will load on the first request, which may take a moment.",
109
+ allow_flagging="never",
110
+ cache_examples=False
111
  )
112
 
113
+ # --- Launch the App ---
114
  if __name__ == "__main__":
 
115
  demo.launch()