taruschirag commited on
Commit
dd396f0
·
verified ·
1 Parent(s): 0ebbe45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -33
app.py CHANGED
@@ -89,7 +89,7 @@ class ModelWrapper:
89
  prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True, enable_thinking=False)
90
  return prompt
91
 
92
- def get_response(self, input, temperature=0.7, top_k=20, top_p=0.8, max_new_tokens=128, enable_thinking=True, system_prompt=SYSTEM_PROMPT):
93
  """Generate and decode the response with the recommended temperature settings for thinking and non-thinking."""
94
  print("Generating response...")
95
 
@@ -136,55 +136,50 @@ model = ModelWrapper(MODEL_NAME)
136
  # — Gradio inference function —
137
  def compliance_check(rules_text, transcript_text, thinking):
138
  try:
139
- # Extremely conservative limits to prevent HTTP errors
140
- if len(rules_text) > 500:
141
- return "Error: Rules text too long. Please limit to 500 characters."
142
-
143
- if len(transcript_text) > 1000:
144
- return "Error: Transcript too long. Please limit to 1000 characters."
145
-
146
  rules = [r for r in rules_text.split("\n") if r.strip()]
147
  inp = format_rules(rules) + format_transcript(transcript_text)
148
 
149
- # Very small token limit
150
- out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=128)
151
-
152
- # Aggressive truncation - keep response very short
153
- max_chars = 800 # Very conservative
154
- if len(out) > max_chars:
155
- out = out[:max_chars] + "..."
156
-
157
- # Ensure it's valid UTF-8 and clean
158
- out = out.encode('utf-8', errors='ignore').decode('utf-8').strip()
159
 
 
 
160
  if not out:
161
- out = "No response generated."
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  return out
164
 
165
  except Exception as e:
166
- # Very short error message
167
- return f"Error: Processing failed"
 
 
168
 
169
  # — build Gradio interface —
170
  demo = gr.Interface(
171
  fn=compliance_check,
172
  inputs=[
173
- gr.Textbox(lines=3, label="Rules (one per line)", max_lines=5, placeholder="Enter 1-3 rules, keep short"),
174
- gr.Textbox(lines=5, label="Transcript", max_lines=8, placeholder="Enter transcript, max 1000 chars"),
175
- gr.Checkbox(label="Enable ⟨think⟩ mode", value=False) # Default to False for faster processing
176
  ],
177
- outputs=gr.Textbox(label="Compliance Output", lines=8, max_lines=10),
178
  title="DynaGuard Compliance Checker",
179
- description="Short inputs only. Rules: max 500 chars. Transcript: max 1000 chars.",
180
- flagging_mode="never",
181
  show_progress=True
182
  )
183
 
184
  if __name__ == "__main__":
185
- # Minimal launch configuration
186
- demo.launch(
187
- show_error=False, # Hide detailed errors
188
- quiet=True, # Reduce logging
189
- share=False
190
- )
 
89
  prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True, enable_thinking=False)
90
  return prompt
91
 
92
+ def get_response(self, input, temperature=0.7, top_k=20, top_p=0.8, max_new_tokens=256, enable_thinking=True, system_prompt=SYSTEM_PROMPT):
93
  """Generate and decode the response with the recommended temperature settings for thinking and non-thinking."""
94
  print("Generating response...")
95
 
 
136
  # — Gradio inference function —
137
  def compliance_check(rules_text, transcript_text, thinking):
138
  try:
 
 
 
 
 
 
 
139
  rules = [r for r in rules_text.split("\n") if r.strip()]
140
  inp = format_rules(rules) + format_transcript(transcript_text)
141
 
142
+ # Limit max tokens to prevent oversized responses
143
+ out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=256)
 
 
 
 
 
 
 
 
144
 
145
+ # Clean up any malformed output and ensure it's a string
146
+ out = str(out).strip()
147
  if not out:
148
+ out = "No response generated. Please try with different input."
149
+
150
+ # Ensure the response isn't too long for an HTTP response by checking byte length
151
+ max_bytes = 2500 # A more generous limit, in bytes
152
+ out_bytes = out.encode('utf-8')
153
+
154
+ if len(out_bytes) > max_bytes:
155
+ # Truncate the byte string, then decode back to a string, ignoring errors
156
+ # This prevents cutting a multi-byte character in half
157
+ truncated_bytes = out_bytes[:max_bytes]
158
+ out = truncated_bytes.decode('utf-8', errors='ignore')
159
+ out += "\n\n[Response truncated to prevent server errors]"
160
 
161
  return out
162
 
163
  except Exception as e:
164
+ error_msg = f"Error: {str(e)[:200]}" # Limit error message length
165
+ print(f"Full error: {e}")
166
+ return error_msg
167
+
168
 
169
  # — build Gradio interface —
170
  demo = gr.Interface(
171
  fn=compliance_check,
172
  inputs=[
173
+ gr.Textbox(lines=5, label="Rules (one per line)", max_lines=10),
174
+ gr.Textbox(lines=10, label="Transcript", max_lines=15),
175
+ gr.Checkbox(label="Enable ⟨think⟩ mode", value=True)
176
  ],
177
+ outputs=gr.Textbox(label="Compliance Output", lines=10, max_lines=15),
178
  title="DynaGuard Compliance Checker",
179
+ description="Paste your rules & transcript, then hit Submit.",
180
+ allow_flagging="never",
181
  show_progress=True
182
  )
183
 
184
  if __name__ == "__main__":
185
+ demo.launch()