taruschirag commited on
Commit
0ebbe45
·
verified ·
1 Parent(s): b7ecb8a

Update app.py

Browse files

testing just 800 chars

Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -89,7 +89,7 @@ class ModelWrapper:
89
  prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True, enable_thinking=False)
90
  return prompt
91
 
92
- def get_response(self, input, temperature=0.7, top_k=20, top_p=0.8, max_new_tokens=256, enable_thinking=True, system_prompt=SYSTEM_PROMPT):
93
  """Generate and decode the response with the recommended temperature settings for thinking and non-thinking."""
94
  print("Generating response...")
95
 
@@ -136,50 +136,55 @@ model = ModelWrapper(MODEL_NAME)
136
  # — Gradio inference function —
137
  def compliance_check(rules_text, transcript_text, thinking):
138
  try:
 
 
 
 
 
 
 
139
  rules = [r for r in rules_text.split("\n") if r.strip()]
140
  inp = format_rules(rules) + format_transcript(transcript_text)
141
 
142
- # Limit max tokens to prevent oversized responses
143
- out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=256)
144
 
145
- # Ensure response isn't too long for HTTP response
146
- max_chars = 1500 # Conservative limit
147
  if len(out) > max_chars:
148
- out = out[:max_chars] + "\n\n[Response truncated to prevent server errors]"
 
 
 
149
 
150
- # Clean up any malformed output
151
- out = out.strip()
152
  if not out:
153
- out = "No response generated. Please try with different input."
154
 
155
  return out
156
 
157
  except Exception as e:
158
- error_msg = f"Error: {str(e)[:200]}" # Limit error message length too
159
- print(f"Full error: {e}")
160
- return error_msg
161
 
162
  # — build Gradio interface —
163
  demo = gr.Interface(
164
  fn=compliance_check,
165
  inputs=[
166
- gr.Textbox(lines=5, label="Rules (one per line)", max_lines=10),
167
- gr.Textbox(lines=10, label="Transcript", max_lines=15),
168
- gr.Checkbox(label="Enable ⟨think⟩ mode", value=True)
169
  ],
170
- outputs=gr.Textbox(label="Compliance Output", lines=10, max_lines=15),
171
  title="DynaGuard Compliance Checker",
172
- description="Paste your rules & transcript, then hit Submit.",
173
- allow_flagging="never",
174
  show_progress=True
175
  )
176
 
177
  if __name__ == "__main__":
178
- # Launch with specific server settings to handle response size issues
179
  demo.launch(
180
- server_name="0.0.0.0",
181
- server_port=7860,
182
- show_error=True,
183
- quiet=False,
184
- max_threads=1 # Limit concurrent requests
185
  )
 
89
  prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True, enable_thinking=False)
90
  return prompt
91
 
92
+ def get_response(self, input, temperature=0.7, top_k=20, top_p=0.8, max_new_tokens=128, enable_thinking=True, system_prompt=SYSTEM_PROMPT):
93
  """Generate and decode the response with the recommended temperature settings for thinking and non-thinking."""
94
  print("Generating response...")
95
 
 
136
  # — Gradio inference function —
137
  def compliance_check(rules_text, transcript_text, thinking):
138
  try:
139
+ # Extremely conservative limits to prevent HTTP errors
140
+ if len(rules_text) > 500:
141
+ return "Error: Rules text too long. Please limit to 500 characters."
142
+
143
+ if len(transcript_text) > 1000:
144
+ return "Error: Transcript too long. Please limit to 1000 characters."
145
+
146
  rules = [r for r in rules_text.split("\n") if r.strip()]
147
  inp = format_rules(rules) + format_transcript(transcript_text)
148
 
149
+ # Very small token limit
150
+ out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=128)
151
 
152
+ # Aggressive truncation - keep response very short
153
+ max_chars = 800 # Very conservative
154
  if len(out) > max_chars:
155
+ out = out[:max_chars] + "..."
156
+
157
+ # Ensure it's valid UTF-8 and clean
158
+ out = out.encode('utf-8', errors='ignore').decode('utf-8').strip()
159
 
 
 
160
  if not out:
161
+ out = "No response generated."
162
 
163
  return out
164
 
165
  except Exception as e:
166
+ # Very short error message
167
+ return f"Error: Processing failed"
 
168
 
169
  # — build Gradio interface —
170
  demo = gr.Interface(
171
  fn=compliance_check,
172
  inputs=[
173
+ gr.Textbox(lines=3, label="Rules (one per line)", max_lines=5, placeholder="Enter 1-3 rules, keep short"),
174
+ gr.Textbox(lines=5, label="Transcript", max_lines=8, placeholder="Enter transcript, max 1000 chars"),
175
+ gr.Checkbox(label="Enable ⟨think⟩ mode", value=False) # Default to False for faster processing
176
  ],
177
+ outputs=gr.Textbox(label="Compliance Output", lines=8, max_lines=10),
178
  title="DynaGuard Compliance Checker",
179
+ description="Short inputs only. Rules: max 500 chars. Transcript: max 1000 chars.",
180
+ flagging_mode="never",
181
  show_progress=True
182
  )
183
 
184
  if __name__ == "__main__":
185
+ # Minimal launch configuration
186
  demo.launch(
187
+ show_error=False, # Hide detailed errors
188
+ quiet=True, # Reduce logging
189
+ share=False
 
 
190
  )