Spaces:
Sleeping
Sleeping
Update app.py
Browse filestesting just 800 chars
app.py
CHANGED
|
@@ -89,7 +89,7 @@ class ModelWrapper:
|
|
| 89 |
prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True, enable_thinking=False)
|
| 90 |
return prompt
|
| 91 |
|
| 92 |
-
def get_response(self, input, temperature=0.7, top_k=20, top_p=0.8, max_new_tokens=
|
| 93 |
"""Generate and decode the response with the recommended temperature settings for thinking and non-thinking."""
|
| 94 |
print("Generating response...")
|
| 95 |
|
|
@@ -136,50 +136,55 @@ model = ModelWrapper(MODEL_NAME)
|
|
| 136 |
# — Gradio inference function —
|
| 137 |
def compliance_check(rules_text, transcript_text, thinking):
|
| 138 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
rules = [r for r in rules_text.split("\n") if r.strip()]
|
| 140 |
inp = format_rules(rules) + format_transcript(transcript_text)
|
| 141 |
|
| 142 |
-
#
|
| 143 |
-
out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=
|
| 144 |
|
| 145 |
-
#
|
| 146 |
-
max_chars =
|
| 147 |
if len(out) > max_chars:
|
| 148 |
-
out = out[:max_chars] + "
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
# Clean up any malformed output
|
| 151 |
-
out = out.strip()
|
| 152 |
if not out:
|
| 153 |
-
out = "No response generated.
|
| 154 |
|
| 155 |
return out
|
| 156 |
|
| 157 |
except Exception as e:
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
return error_msg
|
| 161 |
|
| 162 |
# — build Gradio interface —
|
| 163 |
demo = gr.Interface(
|
| 164 |
fn=compliance_check,
|
| 165 |
inputs=[
|
| 166 |
-
gr.Textbox(lines=
|
| 167 |
-
gr.Textbox(lines=
|
| 168 |
-
gr.Checkbox(label="Enable ⟨think⟩ mode", value=
|
| 169 |
],
|
| 170 |
-
outputs=gr.Textbox(label="Compliance Output", lines=
|
| 171 |
title="DynaGuard Compliance Checker",
|
| 172 |
-
description="
|
| 173 |
-
|
| 174 |
show_progress=True
|
| 175 |
)
|
| 176 |
|
| 177 |
if __name__ == "__main__":
|
| 178 |
-
#
|
| 179 |
demo.launch(
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
quiet=False,
|
| 184 |
-
max_threads=1 # Limit concurrent requests
|
| 185 |
)
|
|
|
|
| 89 |
prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True, enable_thinking=False)
|
| 90 |
return prompt
|
| 91 |
|
| 92 |
+
def get_response(self, input, temperature=0.7, top_k=20, top_p=0.8, max_new_tokens=128, enable_thinking=True, system_prompt=SYSTEM_PROMPT):
|
| 93 |
"""Generate and decode the response with the recommended temperature settings for thinking and non-thinking."""
|
| 94 |
print("Generating response...")
|
| 95 |
|
|
|
|
| 136 |
# — Gradio inference function —
|
| 137 |
def compliance_check(rules_text, transcript_text, thinking):
|
| 138 |
try:
|
| 139 |
+
# Extremely conservative limits to prevent HTTP errors
|
| 140 |
+
if len(rules_text) > 500:
|
| 141 |
+
return "Error: Rules text too long. Please limit to 500 characters."
|
| 142 |
+
|
| 143 |
+
if len(transcript_text) > 1000:
|
| 144 |
+
return "Error: Transcript too long. Please limit to 1000 characters."
|
| 145 |
+
|
| 146 |
rules = [r for r in rules_text.split("\n") if r.strip()]
|
| 147 |
inp = format_rules(rules) + format_transcript(transcript_text)
|
| 148 |
|
| 149 |
+
# Very small token limit
|
| 150 |
+
out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=128)
|
| 151 |
|
| 152 |
+
# Aggressive truncation - keep response very short
|
| 153 |
+
max_chars = 800 # Very conservative
|
| 154 |
if len(out) > max_chars:
|
| 155 |
+
out = out[:max_chars] + "..."
|
| 156 |
+
|
| 157 |
+
# Ensure it's valid UTF-8 and clean
|
| 158 |
+
out = out.encode('utf-8', errors='ignore').decode('utf-8').strip()
|
| 159 |
|
|
|
|
|
|
|
| 160 |
if not out:
|
| 161 |
+
out = "No response generated."
|
| 162 |
|
| 163 |
return out
|
| 164 |
|
| 165 |
except Exception as e:
|
| 166 |
+
# Very short error message
|
| 167 |
+
return f"Error: Processing failed"
|
|
|
|
| 168 |
|
| 169 |
# — build Gradio interface —
|
| 170 |
demo = gr.Interface(
|
| 171 |
fn=compliance_check,
|
| 172 |
inputs=[
|
| 173 |
+
gr.Textbox(lines=3, label="Rules (one per line)", max_lines=5, placeholder="Enter 1-3 rules, keep short"),
|
| 174 |
+
gr.Textbox(lines=5, label="Transcript", max_lines=8, placeholder="Enter transcript, max 1000 chars"),
|
| 175 |
+
gr.Checkbox(label="Enable ⟨think⟩ mode", value=False) # Default to False for faster processing
|
| 176 |
],
|
| 177 |
+
outputs=gr.Textbox(label="Compliance Output", lines=8, max_lines=10),
|
| 178 |
title="DynaGuard Compliance Checker",
|
| 179 |
+
description="Short inputs only. Rules: max 500 chars. Transcript: max 1000 chars.",
|
| 180 |
+
flagging_mode="never",
|
| 181 |
show_progress=True
|
| 182 |
)
|
| 183 |
|
| 184 |
if __name__ == "__main__":
|
| 185 |
+
# Minimal launch configuration
|
| 186 |
demo.launch(
|
| 187 |
+
show_error=False, # Hide detailed errors
|
| 188 |
+
quiet=True, # Reduce logging
|
| 189 |
+
share=False
|
|
|
|
|
|
|
| 190 |
)
|