Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,8 @@ import torchvision.transforms as transforms
|
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import numpy as np
|
|
|
|
|
|
|
| 8 |
from huggingface_hub import snapshot_download, HfApi
|
| 9 |
from transformers import CLIPTokenizer
|
| 10 |
|
|
@@ -16,6 +18,7 @@ HUB_JSON = "leaderboard.json"
|
|
| 16 |
MODEL_PATH = "mobilenet_v2_fake_detector.onnx"
|
| 17 |
CLIP_IMAGE_ENCODER_PATH = "clip_image_encoder.onnx"
|
| 18 |
CLIP_TEXT_ENCODER_PATH = "clip_text_encoder.onnx"
|
|
|
|
| 19 |
PROMPT_MATCH_THRESHOLD = 10 # percent
|
| 20 |
|
| 21 |
# --- Download leaderboard + model checkpoint from HF Hub ---
|
|
@@ -26,13 +29,28 @@ def load_assets():
|
|
| 26 |
local_dir=".",
|
| 27 |
repo_type="dataset",
|
| 28 |
token=HF_TOKEN,
|
| 29 |
-
allow_patterns=[HUB_JSON, MODEL_PATH, CLIP_IMAGE_ENCODER_PATH, CLIP_TEXT_ENCODER_PATH]
|
| 30 |
)
|
| 31 |
except Exception as e:
|
| 32 |
print(f"Failed to load assets from HF Hub: {e}")
|
| 33 |
|
| 34 |
load_assets()
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# --- Load leaderboard ---
|
| 37 |
def load_leaderboard():
|
| 38 |
try:
|
|
@@ -82,12 +100,10 @@ transform = transforms.Compose([
|
|
| 82 |
|
| 83 |
def compute_prompt_match(image: Image.Image, prompt: str) -> float:
|
| 84 |
try:
|
| 85 |
-
# Encode image
|
| 86 |
img_tensor = transform(image).unsqueeze(0).numpy().astype(np.float32)
|
| 87 |
image_features = clip_image_sess.run(None, {clip_image_sess.get_inputs()[0].name: img_tensor})[0][0]
|
| 88 |
-
image_features /= np.linalg.norm(image_features)
|
| 89 |
|
| 90 |
-
# Encode text
|
| 91 |
inputs = clip_tokenizer(prompt, return_tensors="np", padding="max_length", truncation=True, max_length=77)
|
| 92 |
input_ids = inputs["input_ids"]
|
| 93 |
attention_mask = inputs["attention_mask"]
|
|
@@ -95,16 +111,14 @@ def compute_prompt_match(image: Image.Image, prompt: str) -> float:
|
|
| 95 |
clip_text_sess.get_inputs()[0].name: input_ids,
|
| 96 |
clip_text_sess.get_inputs()[1].name: attention_mask
|
| 97 |
})[0][0]
|
| 98 |
-
text_features /= np.linalg.norm(text_features)
|
| 99 |
|
| 100 |
-
# Cosine similarity
|
| 101 |
sim = np.dot(image_features, text_features)
|
| 102 |
return round(sim * 100, 2)
|
| 103 |
except Exception as e:
|
| 104 |
print(f"CLIP ONNX match failed: {e}")
|
| 105 |
return 0.0
|
| 106 |
|
| 107 |
-
|
| 108 |
# --- Main prediction logic ---
|
| 109 |
def detect_with_model(image: Image.Image, prompt: str, username: str):
|
| 110 |
if not username.strip():
|
|
@@ -118,7 +132,7 @@ def detect_with_model(image: Image.Image, prompt: str, username: str):
|
|
| 118 |
image_tensor = transforms.Resize((224, 224))(image)
|
| 119 |
image_tensor = transforms.ToTensor()(image_tensor).unsqueeze(0).numpy().astype(np.float32)
|
| 120 |
outputs = session.run(None, {input_name: image_tensor})
|
| 121 |
-
prob = round(1 / (1 + np.exp(-outputs[0][0][0])), 2)
|
| 122 |
prediction = "Real" if prob > 0.5 else "Fake"
|
| 123 |
|
| 124 |
score = 1 if prediction == "Real" else 0
|
|
@@ -146,6 +160,9 @@ def detect_with_model(image: Image.Image, prompt: str, username: str):
|
|
| 146 |
)
|
| 147 |
|
| 148 |
# --- UI Layout ---
|
|
|
|
|
|
|
|
|
|
| 149 |
with gr.Blocks(css=".gr-button {font-size: 16px !important}") as demo:
|
| 150 |
gr.Markdown("## ๐ OpenFake Arena")
|
| 151 |
gr.Markdown("Welcome to the OpenFake Arena!\n\n**Your mission:** Generate a synthetic image for the prompt, upload it, and try to fool the AI detector into thinking itโs real.\n\n**Rules:**\n- Only synthetic images allowed!\n- No cheating with real photos.\n- Licensing is your responsibility.\n\nMake it wild. Make it weird. Most of all โ make it fun.")
|
|
@@ -157,7 +174,7 @@ with gr.Blocks(css=".gr-button {font-size: 16px !important}") as demo:
|
|
| 157 |
prompt_input = gr.Textbox(
|
| 158 |
label="Suggested Prompt",
|
| 159 |
placeholder="e.g., A portrait photograph of a politician delivering a speech...",
|
| 160 |
-
value=
|
| 161 |
lines=2
|
| 162 |
)
|
| 163 |
|
|
|
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import random
|
| 10 |
from huggingface_hub import snapshot_download, HfApi
|
| 11 |
from transformers import CLIPTokenizer
|
| 12 |
|
|
|
|
| 18 |
MODEL_PATH = "mobilenet_v2_fake_detector.onnx"
|
| 19 |
CLIP_IMAGE_ENCODER_PATH = "clip_image_encoder.onnx"
|
| 20 |
CLIP_TEXT_ENCODER_PATH = "clip_text_encoder.onnx"
|
| 21 |
+
PROMPT_CSV_PATH = "generate2_1.csv"
|
| 22 |
PROMPT_MATCH_THRESHOLD = 10 # percent
|
| 23 |
|
| 24 |
# --- Download leaderboard + model checkpoint from HF Hub ---
|
|
|
|
| 29 |
local_dir=".",
|
| 30 |
repo_type="dataset",
|
| 31 |
token=HF_TOKEN,
|
| 32 |
+
allow_patterns=[HUB_JSON, MODEL_PATH, CLIP_IMAGE_ENCODER_PATH, CLIP_TEXT_ENCODER_PATH, PROMPT_CSV_PATH]
|
| 33 |
)
|
| 34 |
except Exception as e:
|
| 35 |
print(f"Failed to load assets from HF Hub: {e}")
|
| 36 |
|
| 37 |
load_assets()
|
| 38 |
|
| 39 |
+
# --- Load prompts from CSV ---
|
| 40 |
+
def load_prompts():
|
| 41 |
+
try:
|
| 42 |
+
df = pd.read_csv(PROMPT_CSV_PATH)
|
| 43 |
+
if "prompt" in df.columns:
|
| 44 |
+
return df["prompt"].dropna().tolist()
|
| 45 |
+
else:
|
| 46 |
+
print("CSV missing 'prompt' column.")
|
| 47 |
+
return []
|
| 48 |
+
except Exception as e:
|
| 49 |
+
print(f"Failed to load prompts: {e}")
|
| 50 |
+
return []
|
| 51 |
+
|
| 52 |
+
PROMPT_LIST = load_prompts()
|
| 53 |
+
|
| 54 |
# --- Load leaderboard ---
|
| 55 |
def load_leaderboard():
|
| 56 |
try:
|
|
|
|
| 100 |
|
| 101 |
def compute_prompt_match(image: Image.Image, prompt: str) -> float:
|
| 102 |
try:
|
|
|
|
| 103 |
img_tensor = transform(image).unsqueeze(0).numpy().astype(np.float32)
|
| 104 |
image_features = clip_image_sess.run(None, {clip_image_sess.get_inputs()[0].name: img_tensor})[0][0]
|
| 105 |
+
image_features /= np.linalg.norm(image_features)
|
| 106 |
|
|
|
|
| 107 |
inputs = clip_tokenizer(prompt, return_tensors="np", padding="max_length", truncation=True, max_length=77)
|
| 108 |
input_ids = inputs["input_ids"]
|
| 109 |
attention_mask = inputs["attention_mask"]
|
|
|
|
| 111 |
clip_text_sess.get_inputs()[0].name: input_ids,
|
| 112 |
clip_text_sess.get_inputs()[1].name: attention_mask
|
| 113 |
})[0][0]
|
| 114 |
+
text_features /= np.linalg.norm(text_features)
|
| 115 |
|
|
|
|
| 116 |
sim = np.dot(image_features, text_features)
|
| 117 |
return round(sim * 100, 2)
|
| 118 |
except Exception as e:
|
| 119 |
print(f"CLIP ONNX match failed: {e}")
|
| 120 |
return 0.0
|
| 121 |
|
|
|
|
| 122 |
# --- Main prediction logic ---
|
| 123 |
def detect_with_model(image: Image.Image, prompt: str, username: str):
|
| 124 |
if not username.strip():
|
|
|
|
| 132 |
image_tensor = transforms.Resize((224, 224))(image)
|
| 133 |
image_tensor = transforms.ToTensor()(image_tensor).unsqueeze(0).numpy().astype(np.float32)
|
| 134 |
outputs = session.run(None, {input_name: image_tensor})
|
| 135 |
+
prob = round(1 / (1 + np.exp(-outputs[0][0][0])), 2)
|
| 136 |
prediction = "Real" if prob > 0.5 else "Fake"
|
| 137 |
|
| 138 |
score = 1 if prediction == "Real" else 0
|
|
|
|
| 160 |
)
|
| 161 |
|
| 162 |
# --- UI Layout ---
|
| 163 |
+
def get_random_prompt():
|
| 164 |
+
return random.choice(PROMPT_LIST) if PROMPT_LIST else "A synthetic scene with dramatic lighting"
|
| 165 |
+
|
| 166 |
with gr.Blocks(css=".gr-button {font-size: 16px !important}") as demo:
|
| 167 |
gr.Markdown("## ๐ OpenFake Arena")
|
| 168 |
gr.Markdown("Welcome to the OpenFake Arena!\n\n**Your mission:** Generate a synthetic image for the prompt, upload it, and try to fool the AI detector into thinking itโs real.\n\n**Rules:**\n- Only synthetic images allowed!\n- No cheating with real photos.\n- Licensing is your responsibility.\n\nMake it wild. Make it weird. Most of all โ make it fun.")
|
|
|
|
| 174 |
prompt_input = gr.Textbox(
|
| 175 |
label="Suggested Prompt",
|
| 176 |
placeholder="e.g., A portrait photograph of a politician delivering a speech...",
|
| 177 |
+
value=get_random_prompt(),
|
| 178 |
lines=2
|
| 179 |
)
|
| 180 |
|