Spaces:
Sleeping
Sleeping
| # src/merge_features.py | |
| from pathlib import Path | |
| import pandas as pd | |
| from src.semantic_features import add_semantic_similarity | |
| from src.features_q4 import q4_slot_features # <- берём функцию с признаками для Q4 | |
| ROOT = Path(__file__).resolve().parents[1] | |
| FEAT_PATH = ROOT / "data" / "processed" / "features_baseline.csv" | |
| OUT_PATH = ROOT / "data" / "processed" / "features_with_semantics_q4.csv" | |
| def main(): | |
| print(f"🔹 Читаю базовые фичи: {FEAT_PATH}", flush=True) | |
| df = pd.read_csv(FEAT_PATH, encoding="utf-8-sig") | |
| print("🔹 Добавляю семантическую близость (ruSBERT)...", flush=True) | |
| df = add_semantic_similarity(df, batch_size=64) # будет использовать кэш | |
| print("🔹 Добавляю rule-based признаки для Q4...", flush=True) | |
| df = q4_slot_features(df) | |
| print(f"🔹 Сохраняю итог: {OUT_PATH}", flush=True) | |
| df.to_csv(OUT_PATH, index=False, encoding="utf-8-sig") | |
| print("✅ Готово. Превью:", flush=True) | |
| print(df[['question_number','semantic_sim','q4_slots_covered','q4_answered_personal','score']].head(), flush=True) | |
| if __name__ == "__main__": | |
| main() | |