iRecite-MVP-API / step9_madd_feedback_json.py
didodev
Deploy iRecite MVP API (Docker + FastAPI)
4ca6263
import json
import numpy as np
import parselmouth
AUDIO_PATH = "sample_trim.wav"
CANON_PATH = "data/fatiha_canonical_fallback.json"
OUT_PATH = "output/feedback_madd.json"
# --- Heuristic thresholds (MVP) ---
# Quranic madd lengths depend on rule; for MVP we just classify by duration.
TOO_SHORT_SEC = 0.15
OK_MAX_SEC = 0.35
TOO_LONG_SEC = 0.35
def extract_long_voiced_segments(sound: parselmouth.Sound):
intensity = sound.to_intensity(time_step=0.01)
times = intensity.xs()
vals = intensity.values[0]
thr = np.percentile(vals, 60)
voiced = vals > thr
segments = []
in_seg = False
start = None
for t, v in zip(times, voiced):
if v and not in_seg:
in_seg = True
start = float(t)
elif (not v) and in_seg:
in_seg = False
end = float(t)
if end - start >= 0.06:
segments.append((start, end))
if in_seg and start is not None:
end = float(times[-1])
if end - start >= 0.06:
segments.append((start, end))
# Return only the longer ones as Madd candidates
longish = [(s, e, e - s) for (s, e) in segments if (e - s) >= 0.18]
return longish
def madd_words_in_order(canon):
"""
Returns list of dicts in recitation order where madd_positions exists.
"""
items = []
for ay in canon["ayahs"]:
for w in ay["word_info"]:
if w.get("madd_positions_base_index"):
items.append({
"ayah": ay["ayah"],
"word": w["word"],
"base": w["base"],
"madd_positions_base_index": w["madd_positions_base_index"],
"phonemes_fallback": w.get("phonemes_fallback", "")
})
return items
def classify_duration(d):
if d < TOO_SHORT_SEC:
return "too_short"
if d <= OK_MAX_SEC:
return "ok"
return "too_long"
def confidence_from_duration(d):
# crude confidence: farther from ok band → higher confidence
if d < TOO_SHORT_SEC:
return min(0.95, 0.60 + (TOO_SHORT_SEC - d) * 2.0)
if d <= OK_MAX_SEC:
return 0.55
return min(0.95, 0.60 + (d - OK_MAX_SEC) * 1.2)
def main():
# Load canonical word info
with open(CANON_PATH, "r", encoding="utf-8") as f:
canon = json.load(f)
madd_targets = madd_words_in_order(canon)
# Load audio
snd = parselmouth.Sound(AUDIO_PATH)
longish = extract_long_voiced_segments(snd)
feedback = {
"surah": canon["surah"],
"riwayah": canon["riwayah"],
"rule": "Madd (MVP heuristic)",
"audio_path": AUDIO_PATH,
"notes": [
"This MVP uses intensity-based voiced segments and maps long segments to Madd-eligible words in order.",
"Replace with real forced alignment + Quranic-Phonemizer later for Tajweed-accurate placement."
],
"segments_detected": [{"start": s, "end": e, "dur": d} for (s, e, d) in longish],
"madd_targets": madd_targets,
"results": []
}
# Map segments to madd targets sequentially
n = min(len(longish), len(madd_targets))
for i in range(n):
s, e, d = longish[i]
tgt = madd_targets[i]
label = classify_duration(d)
conf = float(round(confidence_from_duration(d), 3))
# Simple user-facing tip
if label == "too_short":
tip = "Extend the vowel a bit more (madd)."
elif label == "too_long":
tip = "Shorten the vowel slightly (avoid over-stretching)."
else:
tip = "Madd length looks OK."
feedback["results"].append({
"index": i + 1,
"ayah": tgt["ayah"],
"word": tgt["word"],
"timestamp": {"start": round(s, 3), "end": round(e, 3)},
"duration_sec": round(d, 3),
"classification": label,
"confidence": conf,
"tip": tip
})
with open(OUT_PATH, "w", encoding="utf-8") as f:
json.dump(feedback, f, ensure_ascii=False, indent=2)
print("OK ✅ wrote", OUT_PATH)
print("Long segments:", len(longish))
print("Madd target words:", len(madd_targets))
print("Mapped results:", len(feedback["results"]))
if feedback["results"]:
print("Sample result:", feedback["results"][0])
if __name__ == "__main__":
main()