Spaces:
Runtime error
Runtime error
| from flask import Flask, request, jsonify, send_from_directory | |
| import base64 | |
| import os | |
| import shutil | |
| import numpy as np | |
| from pyannote.audio import Model, Inference | |
| from pydub import AudioSegment | |
| hf_token = os.environ.get("HF") | |
| if hf_token is None: | |
| raise ValueError("HUGGINGFACE_HUB_TOKEN が設定されていません。") | |
| # 書き込み可能なキャッシュディレクトリを指定 | |
| cache_dir = "/tmp/hf_cache" | |
| os.makedirs(cache_dir, exist_ok=True) | |
| # use_auth_token と cache_dir を指定してモデルを読み込む | |
| model = Model.from_pretrained("pyannote/embedding", use_auth_token=hf_token, cache_dir=cache_dir) | |
| inference = Inference(model) | |
| def cosine_similarity(vec1, vec2): | |
| vec1 = vec1 / np.linalg.norm(vec1) | |
| vec2 = vec2 / np.linalg.norm(vec2) | |
| return np.dot(vec1, vec2) | |
| def segment_audio(path, target_path='/tmp/setup_voice', seg_duration=1.0): | |
| """音声を指定秒数ごとに分割する""" | |
| os.makedirs(target_path, exist_ok=True) | |
| base_sound = AudioSegment.from_file(path) | |
| duration_ms = len(base_sound) | |
| seg_duration_ms = int(seg_duration * 1000) | |
| for i, start in enumerate(range(0, duration_ms, seg_duration_ms)): | |
| end = min(start + seg_duration_ms, duration_ms) | |
| segment = base_sound[start:end] | |
| segment.export(os.path.join(target_path, f'{i}.wav'), format="wav") | |
| return target_path, duration_ms | |
| def calculate_similarity(path1, path2): | |
| embedding1 = inference(path1) | |
| embedding2 = inference(path2) | |
| return float(cosine_similarity(embedding1.data.flatten(), embedding2.data.flatten())) | |
| def process_audio(reference_path, input_path, output_folder='/tmp/data/matched_segments', seg_duration=1.0, threshold=0.5): | |
| os.makedirs(output_folder, exist_ok=True) | |
| base_path, total_duration_ms = segment_audio(input_path, seg_duration=seg_duration) | |
| matched_time_ms = 0 | |
| for file in sorted(os.listdir(base_path)): | |
| segment_file = os.path.join(base_path, file) | |
| similarity = calculate_similarity(segment_file, reference_path) | |
| if similarity > threshold: | |
| shutil.copy(segment_file, output_folder) | |
| matched_time_ms += len(AudioSegment.from_file(segment_file)) | |
| unmatched_time_ms = total_duration_ms - matched_time_ms | |
| return matched_time_ms, unmatched_time_ms | |
| app = Flask(__name__) | |
| def index(): | |
| return send_from_directory('.', 'index.html') | |
| def upload_audio(): | |
| try: | |
| data = request.get_json() | |
| if not data or 'audio_data' not in data: | |
| return jsonify({"error": "音声データがありません"}), 400 | |
| audio_binary = base64.b64decode(data['audio_data']) | |
| audio_path = "/tmp/data/recorded_audio.wav" | |
| os.makedirs(os.path.dirname(audio_path), exist_ok=True) | |
| with open(audio_path, 'wb') as f: | |
| f.write(audio_binary) | |
| # 参照音声ファイルのパスが正しいか確認! | |
| reference_audio = './sample.wav' # ※sample.wavの絶対パスに変更するか、正しい場所に配置する | |
| if not os.path.exists(reference_audio): | |
| return jsonify({"error": "参照音声ファイルが見つかりません", "details": reference_audio}), 500 | |
| matched_time, unmatched_time = process_audio(reference_audio, audio_path, threshold=0.1) | |
| total_time = matched_time + unmatched_time | |
| rate = (matched_time / total_time) * 100 if total_time > 0 else 0 | |
| return jsonify({"rate": rate}), 200 | |
| except Exception as e: | |
| # ログにエラー内容を出力(デバッグ中のみ有効にすることを推奨) | |
| print("Error in /upload_audio:", str(e)) | |
| return jsonify({"error": "サーバーエラー", "details": str(e)}), 500 | |
| if __name__ == '__main__': | |
| app.run(debug=True, host="0.0.0.0", port=7860) | |