Spaces:

iRecite
/

iRecite-MVP-API

Running

iRecite-MVP-API / step9_madd_feedback_json.py

didodev

Deploy iRecite MVP API (Docker + FastAPI)

4ca6263 19 days ago

4.41 kB

	import json
	import numpy as np
	import parselmouth

	AUDIO_PATH = "sample_trim.wav"
	CANON_PATH = "data/fatiha_canonical_fallback.json"
	OUT_PATH = "output/feedback_madd.json"

	# --- Heuristic thresholds (MVP) ---
	# Quranic madd lengths depend on rule; for MVP we just classify by duration.
	TOO_SHORT_SEC = 0.15
	OK_MAX_SEC = 0.35
	TOO_LONG_SEC = 0.35

	def extract_long_voiced_segments(sound: parselmouth.Sound):
	intensity = sound.to_intensity(time_step=0.01)
	times = intensity.xs()
	vals = intensity.values[0]

	thr = np.percentile(vals, 60)
	voiced = vals > thr

	segments = []
	in_seg = False
	start = None

	for t, v in zip(times, voiced):
	if v and not in_seg:
	in_seg = True
	start = float(t)
	elif (not v) and in_seg:
	in_seg = False
	end = float(t)
	if end - start >= 0.06:
	segments.append((start, end))
	if in_seg and start is not None:
	end = float(times[-1])
	if end - start >= 0.06:
	segments.append((start, end))

	# Return only the longer ones as Madd candidates
	longish = [(s, e, e - s) for (s, e) in segments if (e - s) >= 0.18]
	return longish

	def madd_words_in_order(canon):
	"""
	Returns list of dicts in recitation order where madd_positions exists.
	"""
	items = []
	for ay in canon["ayahs"]:
	for w in ay["word_info"]:
	if w.get("madd_positions_base_index"):
	items.append({
	"ayah": ay["ayah"],
	"word": w["word"],
	"base": w["base"],
	"madd_positions_base_index": w["madd_positions_base_index"],
	"phonemes_fallback": w.get("phonemes_fallback", "")
	})
	return items

	def classify_duration(d):
	if d < TOO_SHORT_SEC:
	return "too_short"
	if d <= OK_MAX_SEC:
	return "ok"
	return "too_long"

	def confidence_from_duration(d):
	# crude confidence: farther from ok band → higher confidence
	if d < TOO_SHORT_SEC:
	return min(0.95, 0.60 + (TOO_SHORT_SEC - d) * 2.0)
	if d <= OK_MAX_SEC:
	return 0.55
	return min(0.95, 0.60 + (d - OK_MAX_SEC) * 1.2)

	def main():
	# Load canonical word info
	with open(CANON_PATH, "r", encoding="utf-8") as f:
	canon = json.load(f)

	madd_targets = madd_words_in_order(canon)

	# Load audio
	snd = parselmouth.Sound(AUDIO_PATH)
	longish = extract_long_voiced_segments(snd)

	feedback = {
	"surah": canon["surah"],
	"riwayah": canon["riwayah"],
	"rule": "Madd (MVP heuristic)",
	"audio_path": AUDIO_PATH,
	"notes": [
	"This MVP uses intensity-based voiced segments and maps long segments to Madd-eligible words in order.",
	"Replace with real forced alignment + Quranic-Phonemizer later for Tajweed-accurate placement."
	],
	"segments_detected": [{"start": s, "end": e, "dur": d} for (s, e, d) in longish],
	"madd_targets": madd_targets,
	"results": []
	}

	# Map segments to madd targets sequentially
	n = min(len(longish), len(madd_targets))
	for i in range(n):
	s, e, d = longish[i]
	tgt = madd_targets[i]
	label = classify_duration(d)
	conf = float(round(confidence_from_duration(d), 3))

	# Simple user-facing tip
	if label == "too_short":
	tip = "Extend the vowel a bit more (madd)."
	elif label == "too_long":
	tip = "Shorten the vowel slightly (avoid over-stretching)."
	else:
	tip = "Madd length looks OK."

	feedback["results"].append({
	"index": i + 1,
	"ayah": tgt["ayah"],
	"word": tgt["word"],
	"timestamp": {"start": round(s, 3), "end": round(e, 3)},
	"duration_sec": round(d, 3),
	"classification": label,
	"confidence": conf,
	"tip": tip
	})

	with open(OUT_PATH, "w", encoding="utf-8") as f:
	json.dump(feedback, f, ensure_ascii=False, indent=2)

	print("OK ✅ wrote", OUT_PATH)
	print("Long segments:", len(longish))
	print("Madd target words:", len(madd_targets))
	print("Mapped results:", len(feedback["results"]))
	if feedback["results"]:
	print("Sample result:", feedback["results"][0])

	if __name__ == "__main__":
	main()