#!/usr/bin/env python3 """ Parse backend/logs/intent/low_confidence.csv and export a Markdown backlog. """ from __future__ import annotations import argparse import csv from collections import defaultdict from datetime import datetime from pathlib import Path def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Export low-confidence intent queries.") parser.add_argument( "--output-date", default=datetime.utcnow().strftime("%Y-%m-%d"), help="Ngày (YYYY-MM-DD) dùng cho thư mục báo cáo.", ) return parser.parse_args() def load_backlog(log_path: Path) -> list[dict[str, str]]: if not log_path.exists(): return [] rows: list[dict[str, str]] = [] with log_path.open("r", encoding="utf-8") as fp: reader = csv.DictReader(fp) for row in reader: rows.append(row) return rows def render_markdown(rows: list[dict[str, str]]) -> str: lines = [ "", "# Backlog truy vấn low-confidence", "", f"Tổng số mẫu: {len(rows)}", "", ] by_intent: dict[str, list[dict[str, str]]] = defaultdict(list) for row in rows: by_intent[row.get("intent", "unknown")].append(row) for intent, samples in sorted(by_intent.items()): lines.append(f"## Intent: {intent} ({len(samples)} mẫu)") lines.append("") lines.append("| Thời gian | Confidence | Route | Query |") lines.append("| --- | --- | --- | --- |") for sample in samples[:50]: lines.append( f"| {sample.get('timestamp','')} | {sample.get('confidence','')} " f"| {sample.get('route','')} | {sample.get('query','').strip()} |" ) lines.append("") if not by_intent: lines.append("_Chưa có dữ liệu._") return "\n".join(lines).strip() + "\n" def main() -> None: repo_root = Path(__file__).resolve().parents[2] args = parse_args() log_path = repo_root / "backend" / "logs" / "intent" / "low_confidence.csv" rows = load_backlog(log_path) output_dir = repo_root / "tài nguyên" / "báo cáo" / args.output_date / "backend" output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / "intent_backlog.md" markdown = render_markdown(rows) output_path.write_text(markdown, encoding="utf-8") print(f"✅ Wrote {len(rows)} entries to {output_path}") if __name__ == "__main__": main()