Spaces:

f2ai
/

exp-audio-recorder

Sleeping

App Files Files Community

exp-audio-recorder / model /Deep /model.py

f2ai

Upload folder using huggingface_hub

ad93d56 verified 7 months ago

raw

history blame contribute delete

5.73 kB

	import pandas as pd
	import re

	# =============================================================================
	# CONFIGURATION - Update these paths as needed
	# =============================================================================
	ASR_RESULTS_PATH = "asr_results (2).csv"
	SUBMISSION_PATH = "75.csv"

	# =============================================================================
	# LOAD DATA
	# =============================================================================
	print("Loading data files...")
	df_asr = pd.read_csv(ASR_RESULTS_PATH)
	df_submission = pd.read_csv(SUBMISSION_PATH)


	# =============================================================================
	# FUNCTION DEFINITIONS
	# =============================================================================
	def has_request_time(text):
	"""
	Check if text contains time request pattern like "ขอเวลา ... นาที" or "ขอเวลา ... ชั่วโมง"

	Args:
	text: Input text to search

	Returns:
	bool: True if pattern is found, False otherwise
	"""
	return bool(re.search(r"ขอเวลา\s\d+\s(นาที\|ชั่วโมง)", str(text)))


	def name_match(row, transcription):
	"""
	Check if both first and last name appear in transcription

	Args:
	row: DataFrame row containing first_name and last_name
	transcription: Text to search in

	Returns:
	bool: True if both names are found, False otherwise
	"""
	first = str(row["first_name"])
	last = str(row["last_name"])
	text = str(transcription)
	return (first in text) and (last in text)


	# =============================================================================
	# TASK 1: Find rows containing "สวัสดี" (greeting)
	# =============================================================================
	print("\n=== TASK 1: Finding greeting patterns ===")
	rows_with_sawasdee = df_asr[
	df_asr.apply(lambda row: row.astype(str).str.contains("สวัสดี").any(), axis=1)
	]
	print(f"Found {len(rows_with_sawasdee)} rows with greeting patterns")
	print(rows_with_sawasdee)

	# Update submission file for greeting column
	matching_ids_greeting = set(rows_with_sawasdee["id"])
	mask_greeting = df_submission["id"].isin(matching_ids_greeting) & (
	df_submission["กล่าวสวัสดี"] == False
	)
	num_changed_greeting = mask_greeting.sum()

	# Apply changes to submission file
	df_submission.loc[mask_greeting, "กล่าวสวัสดี"] = True
	print(f"Number of rows updated for greeting: {num_changed_greeting}")

	# =============================================================================
	# TASK 2: Find rows with time request patterns
	# =============================================================================
	print("\n=== TASK 2: Finding time request patterns ===")
	rows_with_request_time = df_asr[df_asr["transcription"].apply(has_request_time)]
	print(f"Found {len(rows_with_request_time)} rows with time request patterns")
	print(rows_with_request_time[["id", "transcription"]])

	# Update submission file for time request column
	request_time_ids = set(rows_with_request_time["id"])
	mask_time = df_submission["id"].isin(request_time_ids) & (
	df_submission["บอกระยะเวลาที่ใช้ในการเข้าพบ"] == False
	)
	num_changed_time = mask_time.sum()

	# Apply changes to submission file
	df_submission.loc[mask_time, "บอกระยะเวลาที่ใช้ในการเข้าพบ"] = True
	print(f"Number of rows updated for time request: {num_changed_time}")

	# =============================================================================
	# TASK 3: Name matching analysis
	# =============================================================================
	print("\n=== TASK 3: Name matching analysis ===")

	# Find name matches using the updated submission file
	matched_rows = []
	for _, sub_row in df_submission.iterrows():
	# Find all rows in df_asr where both first and last name appear in transcription
	matches = df_asr[df_asr["transcription"].apply(lambda t: name_match(sub_row, t))]
	for _, asr_row in matches.iterrows():
	matched_rows.append(
	{
	"id": asr_row["id"],
	"first_name": sub_row["first_name"],
	"last_name": sub_row["last_name"],
	"transcription": asr_row["transcription"],
	}
	)

	# Convert to DataFrame and display results
	df_matched = pd.DataFrame(matched_rows)
	print(f"Found {len(df_matched)} name matches")
	print(df_matched)

	# =============================================================================
	# SAVE UPDATED SUBMISSION FILE
	# =============================================================================
	print("\n=== SAVING UPDATED SUBMISSION FILE ===")
	df_submission.to_csv(SUBMISSION_PATH, index=False)
	print(f"Updated submission file saved as: {SUBMISSION_PATH}")

	# =============================================================================
	# SUMMARY
	# =============================================================================
	print("\n=== PROCESSING SUMMARY ===")
	print(f"✓ Greeting patterns found: {len(rows_with_sawasdee)}")
	print(f"✓ Greeting updates applied: {num_changed_greeting}")
	print(f"✓ Time request patterns found: {len(rows_with_request_time)}")
	print(f"✓ Time request updates applied: {num_changed_time}")
	print(f"✓ Name matches found: {len(df_matched)}")
	print(f"✓ All changes applied to: {SUBMISSION_PATH}")
	print(f"✓ Source data from: {ASR_RESULTS_PATH}")