Spaces:
Running
Running
| import json | |
| WORDS_PATH = "output/word_timestamps_v2.json" | |
| MADD_PATH = "output/feedback_madd.json" | |
| CANON_FALLBACK_PATH = "data/fatiha_canonical_fallback.json" | |
| OUT_PATH = "output/api_response.json" | |
| def main(): | |
| words_doc = json.load(open(WORDS_PATH, encoding="utf-8")) | |
| madd_doc = json.load(open(MADD_PATH, encoding="utf-8")) | |
| canon_fb = json.load(open(CANON_FALLBACK_PATH, encoding="utf-8")) | |
| # Build quick lookup: (ayah, word) -> madd_positions | |
| madd_pos = {} | |
| for ay in canon_fb["ayahs"]: | |
| for wi in ay.get("word_info", []): | |
| madd_pos[(ay["ayah"], wi["word"])] = wi.get("madd_positions_base_index", []) | |
| # Word list for UI | |
| ui_words = [] | |
| mismatches = [] | |
| for w in words_doc["words"]: | |
| ay = w["ayah"] | |
| word = w["word"] | |
| item = { | |
| "index": w["index"], | |
| "ayah": ay, | |
| "word": word, | |
| "timestamp": w["timestamp"], | |
| "match": w["match"], | |
| "score": w["score"], | |
| "madd_positions_base_index": madd_pos.get((ay, word), []) | |
| } | |
| ui_words.append(item) | |
| if not w["match"]: | |
| mismatches.append({ | |
| "ayah": ay, | |
| "word": word, | |
| "timestamp": w["timestamp"], | |
| "reason": "text_mismatch", | |
| "score": w["score"] | |
| }) | |
| # Madd results already include timestamps; keep them as "issues" | |
| madd_issues = [] | |
| for r in madd_doc.get("results", []): | |
| madd_issues.append({ | |
| "type": "madd", | |
| "ayah": r["ayah"], | |
| "word": r["word"], | |
| "timestamp": r["timestamp"], | |
| "duration_sec": r["duration_sec"], | |
| "classification": r["classification"], | |
| "confidence": r["confidence"], | |
| "tip": r["tip"] | |
| }) | |
| out = { | |
| "surah": "Al-Fatiha", | |
| "audio_path": words_doc["audio_path"], | |
| "pipeline_version": "mvp-v1", | |
| "summary": { | |
| "words_total": len(ui_words), | |
| "text_mismatches": len(mismatches), | |
| "madd_issues": len(madd_issues) | |
| }, | |
| "words": ui_words, | |
| "issues": { | |
| "text": mismatches, | |
| "madd": madd_issues | |
| }, | |
| "notes": [ | |
| "Word timestamps are MVP (token-time interpolation).", | |
| "Text alignment uses global DP alignment for robustness.", | |
| "Madd detection uses intensity-based long voiced segments; replace with phoneme-level alignment later." | |
| ] | |
| } | |
| json.dump(out, open(OUT_PATH, "w", encoding="utf-8"), ensure_ascii=False, indent=2) | |
| print("OK ✅ wrote", OUT_PATH) | |
| print("Summary:", out["summary"]) | |
| if out["issues"]["text"]: | |
| print("Example text mismatch:", out["issues"]["text"][0]) | |
| if out["issues"]["madd"]: | |
| print("Example madd issue:", out["issues"]["madd"][0]) | |
| if __name__ == "__main__": | |
| main() |