import json import numpy as np import parselmouth AUDIO_PATH = "sample_trim.wav" CANON_PATH = "data/fatiha_canonical_fallback.json" OUT_PATH = "output/feedback_madd.json" # --- Heuristic thresholds (MVP) --- # Quranic madd lengths depend on rule; for MVP we just classify by duration. TOO_SHORT_SEC = 0.15 OK_MAX_SEC = 0.35 TOO_LONG_SEC = 0.35 def extract_long_voiced_segments(sound: parselmouth.Sound): intensity = sound.to_intensity(time_step=0.01) times = intensity.xs() vals = intensity.values[0] thr = np.percentile(vals, 60) voiced = vals > thr segments = [] in_seg = False start = None for t, v in zip(times, voiced): if v and not in_seg: in_seg = True start = float(t) elif (not v) and in_seg: in_seg = False end = float(t) if end - start >= 0.06: segments.append((start, end)) if in_seg and start is not None: end = float(times[-1]) if end - start >= 0.06: segments.append((start, end)) # Return only the longer ones as Madd candidates longish = [(s, e, e - s) for (s, e) in segments if (e - s) >= 0.18] return longish def madd_words_in_order(canon): """ Returns list of dicts in recitation order where madd_positions exists. """ items = [] for ay in canon["ayahs"]: for w in ay["word_info"]: if w.get("madd_positions_base_index"): items.append({ "ayah": ay["ayah"], "word": w["word"], "base": w["base"], "madd_positions_base_index": w["madd_positions_base_index"], "phonemes_fallback": w.get("phonemes_fallback", "") }) return items def classify_duration(d): if d < TOO_SHORT_SEC: return "too_short" if d <= OK_MAX_SEC: return "ok" return "too_long" def confidence_from_duration(d): # crude confidence: farther from ok band → higher confidence if d < TOO_SHORT_SEC: return min(0.95, 0.60 + (TOO_SHORT_SEC - d) * 2.0) if d <= OK_MAX_SEC: return 0.55 return min(0.95, 0.60 + (d - OK_MAX_SEC) * 1.2) def main(): # Load canonical word info with open(CANON_PATH, "r", encoding="utf-8") as f: canon = json.load(f) madd_targets = madd_words_in_order(canon) # Load audio snd = parselmouth.Sound(AUDIO_PATH) longish = extract_long_voiced_segments(snd) feedback = { "surah": canon["surah"], "riwayah": canon["riwayah"], "rule": "Madd (MVP heuristic)", "audio_path": AUDIO_PATH, "notes": [ "This MVP uses intensity-based voiced segments and maps long segments to Madd-eligible words in order.", "Replace with real forced alignment + Quranic-Phonemizer later for Tajweed-accurate placement." ], "segments_detected": [{"start": s, "end": e, "dur": d} for (s, e, d) in longish], "madd_targets": madd_targets, "results": [] } # Map segments to madd targets sequentially n = min(len(longish), len(madd_targets)) for i in range(n): s, e, d = longish[i] tgt = madd_targets[i] label = classify_duration(d) conf = float(round(confidence_from_duration(d), 3)) # Simple user-facing tip if label == "too_short": tip = "Extend the vowel a bit more (madd)." elif label == "too_long": tip = "Shorten the vowel slightly (avoid over-stretching)." else: tip = "Madd length looks OK." feedback["results"].append({ "index": i + 1, "ayah": tgt["ayah"], "word": tgt["word"], "timestamp": {"start": round(s, 3), "end": round(e, 3)}, "duration_sec": round(d, 3), "classification": label, "confidence": conf, "tip": tip }) with open(OUT_PATH, "w", encoding="utf-8") as f: json.dump(feedback, f, ensure_ascii=False, indent=2) print("OK ✅ wrote", OUT_PATH) print("Long segments:", len(longish)) print("Madd target words:", len(madd_targets)) print("Mapped results:", len(feedback["results"])) if feedback["results"]: print("Sample result:", feedback["results"][0]) if __name__ == "__main__": main()