KiWA001 commited on
Commit
1734552
·
1 Parent(s): fced2dc

fix: unescape newlines and quotes in gemini response

Browse files
Files changed (2) hide show
  1. sanitizer.py +10 -0
  2. test_unescape.py +19 -0
sanitizer.py CHANGED
@@ -109,6 +109,16 @@ def sanitize_response(text: str) -> str:
109
  # Fix user-reported "/N" artifact (treat as newline if standalone)
110
  cleaned = ARTIFACT_N_PATTERN.sub("\n", cleaned)
111
 
 
 
 
 
 
 
 
 
 
 
112
  # === Spam Removal ===
113
  for pattern in COMPILED_SPAM:
114
  cleaned = pattern.sub("", cleaned)
 
109
  # Fix user-reported "/N" artifact (treat as newline if standalone)
110
  cleaned = ARTIFACT_N_PATTERN.sub("\n", cleaned)
111
 
112
+ # === Unescape JSON/Raw Literals ===
113
+ # User wants "\n" to be actual newline and "\"" to be actual quote.
114
+ # We try to unescape, but carefully.
115
+ if "\\n" in cleaned or '\\"' in cleaned:
116
+ try:
117
+ # First try standard replace for safety and speed
118
+ cleaned = cleaned.replace("\\n", "\n").replace('\\"', '"')
119
+ except Exception:
120
+ pass
121
+
122
  # === Spam Removal ===
123
  for pattern in COMPILED_SPAM:
124
  cleaned = pattern.sub("", cleaned)
test_unescape.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sanitizer import sanitize_response
2
+
3
+ test_input = 'There are 3 \\"r\\"s in the word strawberry.\\n\\nHere is the breakdown:\\nstrawberry'
4
+ expected_output = 'There are 3 "r"s in the word strawberry.\n\nHere is the breakdown:\nstrawberry'
5
+
6
+ cleaned = sanitize_response(test_input)
7
+
8
+ print("--- INPUT ---")
9
+ print(test_input)
10
+ print("\n--- CLEANED ---")
11
+ print(cleaned)
12
+ print("\n--- EXPECTED ---")
13
+ print(expected_output)
14
+
15
+ if cleaned == expected_output:
16
+ print("\n✅ SUCCESS: Text was correctly unescaped.")
17
+ else:
18
+ print("\n❌ FAILURE: Output does not match expected.")
19
+ print(f"Got: {repr(cleaned)}")