import json WORDS_PATH = "output/word_timestamps_v2.json" MADD_PATH = "output/feedback_madd.json" CANON_FALLBACK_PATH = "data/fatiha_canonical_fallback.json" OUT_PATH = "output/api_response.json" def main(): words_doc = json.load(open(WORDS_PATH, encoding="utf-8")) madd_doc = json.load(open(MADD_PATH, encoding="utf-8")) canon_fb = json.load(open(CANON_FALLBACK_PATH, encoding="utf-8")) # Build quick lookup: (ayah, word) -> madd_positions madd_pos = {} for ay in canon_fb["ayahs"]: for wi in ay.get("word_info", []): madd_pos[(ay["ayah"], wi["word"])] = wi.get("madd_positions_base_index", []) # Word list for UI ui_words = [] mismatches = [] for w in words_doc["words"]: ay = w["ayah"] word = w["word"] item = { "index": w["index"], "ayah": ay, "word": word, "timestamp": w["timestamp"], "match": w["match"], "score": w["score"], "madd_positions_base_index": madd_pos.get((ay, word), []) } ui_words.append(item) if not w["match"]: mismatches.append({ "ayah": ay, "word": word, "timestamp": w["timestamp"], "reason": "text_mismatch", "score": w["score"] }) # Madd results already include timestamps; keep them as "issues" madd_issues = [] for r in madd_doc.get("results", []): madd_issues.append({ "type": "madd", "ayah": r["ayah"], "word": r["word"], "timestamp": r["timestamp"], "duration_sec": r["duration_sec"], "classification": r["classification"], "confidence": r["confidence"], "tip": r["tip"] }) out = { "surah": "Al-Fatiha", "audio_path": words_doc["audio_path"], "pipeline_version": "mvp-v1", "summary": { "words_total": len(ui_words), "text_mismatches": len(mismatches), "madd_issues": len(madd_issues) }, "words": ui_words, "issues": { "text": mismatches, "madd": madd_issues }, "notes": [ "Word timestamps are MVP (token-time interpolation).", "Text alignment uses global DP alignment for robustness.", "Madd detection uses intensity-based long voiced segments; replace with phoneme-level alignment later." ] } json.dump(out, open(OUT_PATH, "w", encoding="utf-8"), ensure_ascii=False, indent=2) print("OK ✅ wrote", OUT_PATH) print("Summary:", out["summary"]) if out["issues"]["text"]: print("Example text mismatch:", out["issues"]["text"][0]) if out["issues"]["madd"]: print("Example madd issue:", out["issues"]["madd"][0]) if __name__ == "__main__": main()