Spaces:
Running
on
Zero
Running
on
Zero
Seonghyeon Go
commited on
Commit
·
f62dfd2
1
Parent(s):
acb0b29
change desing and result
Browse files- app.py +35 -159
- inference.py +1 -2
app.py
CHANGED
|
@@ -5,10 +5,8 @@ import librosa
|
|
| 5 |
import numpy as np
|
| 6 |
from inference import inference
|
| 7 |
from huggingface_hub import hf_hub_download
|
| 8 |
-
import os
|
| 9 |
from pathlib import Path
|
| 10 |
|
| 11 |
-
|
| 12 |
def download_models_from_hub():
|
| 13 |
"""
|
| 14 |
Download model checkpoints from Hugging Face Model Hub
|
|
@@ -16,7 +14,6 @@ def download_models_from_hub():
|
|
| 16 |
model_dir = Path("checkpoints")
|
| 17 |
model_dir.mkdir(exist_ok=True)
|
| 18 |
|
| 19 |
-
# Original checkpoint filenames on HF Hub
|
| 20 |
models = {
|
| 21 |
"main": "EmbeddingModel_MERT_768-epoch=0073-val_loss=0.1058-val_acc=0.9585-val_f1=0.9366-val_precision=0.9936-val_recall=0.8857.ckpt",
|
| 22 |
"backup": "step=007000-val_loss=0.1831-val_acc=0.9278.ckpt"
|
|
@@ -47,179 +44,64 @@ def download_models_from_hub():
|
|
| 47 |
def detect_ai_audio(audio_file):
|
| 48 |
"""
|
| 49 |
Detect whether the uploaded audio file was generated by AI
|
|
|
|
| 50 |
"""
|
| 51 |
if audio_file is None:
|
| 52 |
-
return ""
|
| 53 |
-
|
| 54 |
-
<div style="font-size: 18px; color: #ff6b6b;">⚠️ Please upload an audio file</div>
|
| 55 |
-
</div>
|
| 56 |
-
"""
|
| 57 |
-
|
| 58 |
try:
|
| 59 |
-
result = inference(audio_file)
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
status = "Human Generated"
|
| 68 |
-
color = "#51cf66"
|
| 69 |
-
confidence = "High confidence this audio was created by humans"
|
| 70 |
-
|
| 71 |
formatted_result = f"""
|
| 72 |
-
<div style="text-align: center; padding:
|
| 73 |
-
<
|
| 74 |
-
<
|
| 75 |
-
<
|
|
|
|
|
|
|
| 76 |
</div>
|
| 77 |
"""
|
| 78 |
-
|
| 79 |
return formatted_result
|
| 80 |
-
|
| 81 |
except Exception as e:
|
| 82 |
-
|
| 83 |
-
<div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);">
|
| 84 |
-
<div style="font-size: 20px; font-weight: bold; color: #ff6b6b; margin-bottom: 8px;">Error</div>
|
| 85 |
-
<div style="font-size: 14px; color: #666;">Failed to process audio: {str(e)}</div>
|
| 86 |
-
</div>
|
| 87 |
-
"""
|
| 88 |
-
return error_result
|
| 89 |
|
| 90 |
-
#
|
| 91 |
custom_css = """
|
| 92 |
-
|
| 93 |
-
.
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
.
|
| 100 |
-
background: rgba(255, 255, 255, 0.95) !important;
|
| 101 |
-
backdrop-filter: blur(10px) !important;
|
| 102 |
-
border-radius: 20px !important;
|
| 103 |
-
box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important;
|
| 104 |
-
margin: 20px !important;
|
| 105 |
-
padding: 30px !important;
|
| 106 |
-
}
|
| 107 |
-
|
| 108 |
-
/* Title styling */
|
| 109 |
-
h1 {
|
| 110 |
-
background: linear-gradient(135deg, #667eea, #764ba2) !important;
|
| 111 |
-
-webkit-background-clip: text !important;
|
| 112 |
-
-webkit-text-fill-color: transparent !important;
|
| 113 |
-
text-align: center !important;
|
| 114 |
-
font-size: 3em !important;
|
| 115 |
-
font-weight: 800 !important;
|
| 116 |
-
margin-bottom: 10px !important;
|
| 117 |
-
}
|
| 118 |
-
|
| 119 |
-
/* Description text */
|
| 120 |
-
.gradio-markdown p {
|
| 121 |
-
text-align: center !important;
|
| 122 |
-
font-size: 1.2em !important;
|
| 123 |
-
color: #555 !important;
|
| 124 |
-
margin-bottom: 30px !important;
|
| 125 |
-
}
|
| 126 |
-
|
| 127 |
-
/* Audio upload component */
|
| 128 |
-
.upload-container {
|
| 129 |
-
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
|
| 130 |
-
border-radius: 15px !important;
|
| 131 |
-
padding: 20px !important;
|
| 132 |
-
border: none !important;
|
| 133 |
-
box-shadow: 0 10px 30px rgba(240, 147, 251, 0.3) !important;
|
| 134 |
-
transition: all 0.3s ease !important;
|
| 135 |
-
}
|
| 136 |
-
|
| 137 |
-
.upload-container:hover {
|
| 138 |
-
transform: translateY(-5px) !important;
|
| 139 |
-
box-shadow: 0 15px 40px rgba(240, 147, 251, 0.4) !important;
|
| 140 |
-
}
|
| 141 |
-
|
| 142 |
-
/* Output container */
|
| 143 |
-
.output-container {
|
| 144 |
-
background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%) !important;
|
| 145 |
-
border-radius: 15px !important;
|
| 146 |
-
padding: 20px !important;
|
| 147 |
-
border: none !important;
|
| 148 |
-
box-shadow: 0 10px 30px rgba(168, 237, 234, 0.3) !important;
|
| 149 |
-
min-height: 150px !important;
|
| 150 |
-
}
|
| 151 |
-
|
| 152 |
-
/* Button styling */
|
| 153 |
-
.gr-button {
|
| 154 |
-
background: linear-gradient(135deg, #667eea, #764ba2) !important;
|
| 155 |
-
border: none !important;
|
| 156 |
-
border-radius: 25px !important;
|
| 157 |
-
padding: 12px 30px !important;
|
| 158 |
-
font-weight: 600 !important;
|
| 159 |
-
color: white !important;
|
| 160 |
-
box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4) !important;
|
| 161 |
-
transition: all 0.3s ease !important;
|
| 162 |
-
}
|
| 163 |
-
|
| 164 |
-
.gr-button:hover {
|
| 165 |
-
transform: translateY(-2px) !important;
|
| 166 |
-
box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
|
| 167 |
-
}
|
| 168 |
-
|
| 169 |
-
/* Animation */
|
| 170 |
-
@keyframes fadeInUp {
|
| 171 |
-
from {
|
| 172 |
-
opacity: 0;
|
| 173 |
-
transform: translateY(30px);
|
| 174 |
-
}
|
| 175 |
-
to {
|
| 176 |
-
opacity: 1;
|
| 177 |
-
transform: translateY(0);
|
| 178 |
-
}
|
| 179 |
-
}
|
| 180 |
-
|
| 181 |
-
.gradio-container > div {
|
| 182 |
-
animation: fadeInUp 0.8s ease-out !important;
|
| 183 |
-
}
|
| 184 |
-
|
| 185 |
-
/* Responsive design */
|
| 186 |
@media (max-width: 768px) {
|
| 187 |
-
h1 {
|
| 188 |
-
|
| 189 |
-
}
|
| 190 |
-
|
| 191 |
-
.main-container {
|
| 192 |
-
margin: 10px !important;
|
| 193 |
-
padding: 20px !important;
|
| 194 |
-
}
|
| 195 |
}
|
| 196 |
"""
|
| 197 |
|
| 198 |
-
#
|
| 199 |
print("🚀 Starting FST AI Audio Detection App...")
|
| 200 |
print("📦 Initializing models...")
|
| 201 |
-
|
| 202 |
-
# Download models at startup
|
| 203 |
models = download_models_from_hub()
|
| 204 |
-
|
| 205 |
-
# Check if main model is available
|
| 206 |
if models.get("main"):
|
| 207 |
print("✅ Main model ready for inference")
|
| 208 |
else:
|
| 209 |
print("⚠️ Warning: Main model not available, app may not work properly")
|
| 210 |
|
| 211 |
-
#
|
| 212 |
demo = gr.Interface(
|
| 213 |
fn=detect_ai_audio,
|
| 214 |
-
inputs=gr.Audio(
|
| 215 |
-
|
| 216 |
-
label="Upload Audio File",
|
| 217 |
-
elem_classes=["upload-container"]
|
| 218 |
-
),
|
| 219 |
-
outputs=gr.HTML(
|
| 220 |
-
label="Detection Result",
|
| 221 |
-
elem_classes=["output-container"]
|
| 222 |
-
),
|
| 223 |
title="AI Audio Detector",
|
| 224 |
description="""
|
| 225 |
<div style="text-align: center; font-size: 1.2em; color: #555; margin: 20px 0;">
|
|
@@ -240,10 +122,4 @@ demo = gr.Interface(
|
|
| 240 |
)
|
| 241 |
|
| 242 |
if __name__ == "__main__":
|
| 243 |
-
demo.launch(
|
| 244 |
-
server_name="0.0.0.0",
|
| 245 |
-
server_port=7860,
|
| 246 |
-
share=True,
|
| 247 |
-
show_api=False,
|
| 248 |
-
show_error=True
|
| 249 |
-
)
|
|
|
|
| 5 |
import numpy as np
|
| 6 |
from inference import inference
|
| 7 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
|
|
|
|
| 10 |
def download_models_from_hub():
|
| 11 |
"""
|
| 12 |
Download model checkpoints from Hugging Face Model Hub
|
|
|
|
| 14 |
model_dir = Path("checkpoints")
|
| 15 |
model_dir.mkdir(exist_ok=True)
|
| 16 |
|
|
|
|
| 17 |
models = {
|
| 18 |
"main": "EmbeddingModel_MERT_768-epoch=0073-val_loss=0.1058-val_acc=0.9585-val_f1=0.9366-val_precision=0.9936-val_recall=0.8857.ckpt",
|
| 19 |
"backup": "step=007000-val_loss=0.1831-val_acc=0.9278.ckpt"
|
|
|
|
| 44 |
def detect_ai_audio(audio_file):
|
| 45 |
"""
|
| 46 |
Detect whether the uploaded audio file was generated by AI
|
| 47 |
+
and format the result based on the standardized output.
|
| 48 |
"""
|
| 49 |
if audio_file is None:
|
| 50 |
+
return "<div>⚠️ Please upload an audio file.</div>"
|
| 51 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
try:
|
| 53 |
+
result = inference(audio_file) # {'prediction': 'Fake', 'confidence': '93.80', ...}
|
| 54 |
+
|
| 55 |
+
prediction = result.get('prediction', 'Unknown')
|
| 56 |
+
confidence = result.get('confidence', '0.00')
|
| 57 |
+
fake_prob = result.get('fake_probability', '0.0')
|
| 58 |
+
real_prob = result.get('real_probability', '0.0')
|
| 59 |
+
raw_output = result.get('raw_output', '')
|
| 60 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
formatted_result = f"""
|
| 62 |
+
<div style="text-align: center; padding: 15px; border-radius: 10px; border: 1px solid #ccc;">
|
| 63 |
+
<h2>Prediction: {prediction}</h2>
|
| 64 |
+
<p>Confidence: {confidence}%</p>
|
| 65 |
+
<p>Fake Probability: {float(fake_prob)*100:.2f}%</p>
|
| 66 |
+
<p>Real Probability: {float(real_prob)*100:.2f}%</p>
|
| 67 |
+
<p>Raw Output: {raw_output}</p>
|
| 68 |
</div>
|
| 69 |
"""
|
|
|
|
| 70 |
return formatted_result
|
| 71 |
+
|
| 72 |
except Exception as e:
|
| 73 |
+
return f"<div>Error processing audio: {str(e)}</div>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
# 심플하고 깔끔한 CSS
|
| 76 |
custom_css = """
|
| 77 |
+
.gradio-container { background: #f5f5f5 !important; min-height: 100vh; }
|
| 78 |
+
.main-container { background: #ffffff !important; border-radius: 15px !important; box-shadow: 0 8px 20px rgba(0,0,0,0.1) !important; margin: 20px auto !important; padding: 30px !important; max-width: 800px; }
|
| 79 |
+
h1 { text-align: center !important; font-size: 2.5em !important; font-weight: 700 !important; margin-bottom: 15px !important; color: #333 !important; }
|
| 80 |
+
.gradio-markdown p { text-align: center !important; font-size: 1.1em !important; color: #555 !important; margin-bottom: 20px !important; }
|
| 81 |
+
.upload-container { background: #f0f0f0 !important; border-radius: 10px !important; padding: 15px !important; border: 1px solid #ccc !important; margin-bottom: 20px !important; }
|
| 82 |
+
.output-container { background: #fafafa !important; border-radius: 10px !important; padding: 15px !important; border: 1px solid #ccc !important; min-height: 150px !important; }
|
| 83 |
+
.gr-button { background: #667eea !important; color: #fff !important; border: none !important; border-radius: 20px !important; padding: 10px 25px !important; font-weight: 600 !important; transition: all 0.2s ease !important; }
|
| 84 |
+
.gr-button:hover { background: #5563c1 !important; transform: translateY(-2px) !important; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
@media (max-width: 768px) {
|
| 86 |
+
h1 { font-size: 2em !important; }
|
| 87 |
+
.main-container { margin: 10px !important; padding: 20px !important; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
}
|
| 89 |
"""
|
| 90 |
|
| 91 |
+
# 초기화
|
| 92 |
print("🚀 Starting FST AI Audio Detection App...")
|
| 93 |
print("📦 Initializing models...")
|
|
|
|
|
|
|
| 94 |
models = download_models_from_hub()
|
|
|
|
|
|
|
| 95 |
if models.get("main"):
|
| 96 |
print("✅ Main model ready for inference")
|
| 97 |
else:
|
| 98 |
print("⚠️ Warning: Main model not available, app may not work properly")
|
| 99 |
|
| 100 |
+
# Gradio 인터페이스
|
| 101 |
demo = gr.Interface(
|
| 102 |
fn=detect_ai_audio,
|
| 103 |
+
inputs=gr.Audio(type="filepath", label="Upload Audio File", elem_classes=["upload-container"]),
|
| 104 |
+
outputs=gr.HTML(label="Detection Result", elem_classes=["output-container"]),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
title="AI Audio Detector",
|
| 106 |
description="""
|
| 107 |
<div style="text-align: center; font-size: 1.2em; color: #555; margin: 20px 0;">
|
|
|
|
| 122 |
)
|
| 123 |
|
| 124 |
if __name__ == "__main__":
|
| 125 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=True, show_api=False, show_error=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inference.py
CHANGED
|
@@ -241,8 +241,7 @@ def inference(audio_path):
|
|
| 241 |
|
| 242 |
# 결과 출력
|
| 243 |
print(f"Results: {results}")
|
| 244 |
-
|
| 245 |
-
|
| 246 |
|
| 247 |
|
| 248 |
return results
|
|
|
|
| 241 |
|
| 242 |
# 결과 출력
|
| 243 |
print(f"Results: {results}")
|
| 244 |
+
|
|
|
|
| 245 |
|
| 246 |
|
| 247 |
return results
|