taruschirag commited on
Commit
6f8b750
Β·
verified Β·
1 Parent(s): dd396f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -135,35 +135,43 @@ model = ModelWrapper(MODEL_NAME)
135
 
136
  # β€” Gradio inference function β€”
137
  def compliance_check(rules_text, transcript_text, thinking):
 
 
 
138
  try:
139
  rules = [r for r in rules_text.split("\n") if r.strip()]
140
  inp = format_rules(rules) + format_transcript(transcript_text)
141
 
142
- # Limit max tokens to prevent oversized responses
143
  out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=256)
144
 
145
- # Clean up any malformed output and ensure it's a string
146
  out = str(out).strip()
147
  if not out:
148
  out = "No response generated. Please try with different input."
149
-
150
- # Ensure the response isn't too long for an HTTP response by checking byte length
151
- max_bytes = 2500 # A more generous limit, in bytes
152
- out_bytes = out.encode('utf-8')
153
-
154
- if len(out_bytes) > max_bytes:
155
- # Truncate the byte string, then decode back to a string, ignoring errors
156
- # This prevents cutting a multi-byte character in half
157
- truncated_bytes = out_bytes[:max_bytes]
158
- out = truncated_bytes.decode('utf-8', errors='ignore')
159
- out += "\n\n[Response truncated to prevent server errors]"
160
 
161
- return out
162
-
163
  except Exception as e:
164
- error_msg = f"Error: {str(e)[:200]}" # Limit error message length
165
- print(f"Full error: {e}")
166
- return error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
 
169
  # β€” build Gradio interface β€”
 
135
 
136
  # β€” Gradio inference function β€”
137
  def compliance_check(rules_text, transcript_text, thinking):
138
+ # This is the general byte limit for any response leaving this function
139
+ MAX_RESPONSE_BYTES = 4096
140
+
141
  try:
142
  rules = [r for r in rules_text.split("\n") if r.strip()]
143
  inp = format_rules(rules) + format_transcript(transcript_text)
144
 
 
145
  out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=256)
146
 
147
+ # Clean up the successful output
148
  out = str(out).strip()
149
  if not out:
150
  out = "No response generated. Please try with different input."
 
 
 
 
 
 
 
 
 
 
 
151
 
 
 
152
  except Exception as e:
153
+ # If any error happens, create an error message instead
154
+ full_error = str(e)
155
+ print(f"Full error: {full_error}") # Log the full error for debugging
156
+ # The output 'out' will now be this error message
157
+ out = f"An error occurred: {full_error}"
158
+
159
+ # --- UNIVERSAL BYTE CHECK FOR ALL OUTPUTS ---
160
+ # This block now runs for both successful results and error messages.
161
+
162
+ # Encode the final output (whether result or error) to bytes
163
+ out_bytes = out.encode('utf-8')
164
+
165
+ # Check if the byte length exceeds our limit
166
+ if len(out_bytes) > MAX_RESPONSE_BYTES:
167
+ # Truncate the byte string safely
168
+ truncated_bytes = out_bytes[:MAX_RESPONSE_BYTES]
169
+ out = truncated_bytes.decode('utf-8', errors='ignore')
170
+ # Add a clear message indicating it was truncated
171
+ out += "\n\n[Response truncated to prevent server errors]"
172
+
173
+ return out
174
+
175
 
176
 
177
  # β€” build Gradio interface β€”