Spaces:

PSNbst
/

ImagesComparison_PAseer

Sleeping

App Files Files Community

PSNbst commited on Jan 20

Commit

eb0b8f5

verified ·

1 Parent(s): 8d8b4cc

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -43

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import gradio as gr
 import torch
 from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
-from PIL import Image
 import numpy as np
 from openai import OpenAI
 # 初始化模型
@@ -11,13 +12,77 @@ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-# 定义功能函数
 def analyze_images(image_a, image_b, api_key):
     # BLIP生成描述
-    def generate_caption(image):
-        inputs = blip_processor(image, return_tensors="pt")
-        caption = blip_model.generate(**inputs)
-        return blip_processor.decode(caption[0], skip_special_tokens=True)
     # CLIP特征提取
     def extract_features(image):
@@ -25,24 +90,11 @@ def analyze_images(image_a, image_b, api_key):
         features = clip_model.get_image_features(**inputs)
         return features.detach().numpy()
-    # 图像已经是 PIL.Image 对象，直接处理
-    img_a = image_a.convert("RGB")
-    img_b = image_b.convert("RGB")
-    # 生成描述
-    caption_a = generate_caption(img_a)
-    caption_b = generate_caption(img_b)
-    # 提取特征
     features_a = extract_features(img_a)
     features_b = extract_features(img_b)
-    # 计算嵌入相似性
-    cosine_similarity = np.dot(features_a, features_b.T) / (np.linalg.norm(features_a) * np.linalg.norm(features_b))
     latent_diff = np.abs(features_a - features_b).tolist()
-    # 调用 DeepSeek API 生成详细分析
-    client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
     gpt_response = client.chat.completions.create(
         model="deepseek-chat",
         messages=[
@@ -51,45 +103,79 @@ def analyze_images(image_a, image_b, api_key):
         ],
         stream=False
     )
-    textual_analysis = gpt_response.choices[0].message.content.strip()
-    # 返回结果
     return {
         "caption_a": caption_a,
         "caption_b": caption_b,
-        "similarity": cosine_similarity[0][0],
-        "latent_diff": latent_diff,
-        "text_analysis": textual_analysis
     }
-# 定义Gradio界面
 with gr.Blocks() as demo:
-    gr.Markdown("# 图片对比分析工具")
     with gr.Row():
         with gr.Column():
-            image_a = gr.Image(label="图片A", type="pil")  # 使用 PIL 类型
         with gr.Column():
-            image_b = gr.Image(label="图片B", type="pil")  # 使用 PIL 类型
-    api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 DeepSeek API Key", type="password")
     analyze_button = gr.Button("分析图片")
-    result_caption_a = gr.Textbox(label="图片A描述", interactive=False)
-    result_caption_b = gr.Textbox(label="图片B描述", interactive=False)
-    result_similarity = gr.Number(label="图片相似性", interactive=False)
-    result_latent_diff = gr.DataFrame(label="潜在特征差异", interactive=False)
-    result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5)
     # 分析逻辑
     def process_analysis(img_a, img_b, api_key):
         results = analyze_images(img_a, img_b, api_key)
-        return results["caption_a"], results["caption_b"], results["similarity"], results["latent_diff"], results["text_analysis"]
     analyze_button.click(
         fn=process_analysis,
         inputs=[image_a, image_b, api_key_input],
-        outputs=[result_caption_a, result_caption_b, result_similarity, result_latent_diff, result_text_analysis]
     )
 demo.launch()

 import gradio as gr
 import torch
 from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
+from PIL import Image, ImageChops
 import numpy as np
+import matplotlib.pyplot as plt
 from openai import OpenAI
 # 初始化模型
 blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+# 图像处理函数
+def compute_difference_images(img_a, img_b):
+    # 线稿提取
+    def extract_sketch(image):
+        grayscale = image.convert("L")
+        inverted = ImageChops.invert(grayscale)
+        sketch = ImageChops.screen(grayscale, inverted)
+        return sketch
+    # 法向量图像（模拟法向量处理为简单的边缘增强）
+    def compute_normal_map(image):
+        edges = image.filter(ImageFilter.FIND_EDGES)
+        return edges
+    # 图像混合差异
+    diff_overlay = ImageChops.difference(img_a, img_b)
+    return {
+        "original_a": img_a,
+        "original_b": img_b,
+        "sketch_a": extract_sketch(img_a),
+        "sketch_b": extract_sketch(img_b),
+        "normal_a": compute_normal_map(img_a),
+        "normal_b": compute_normal_map(img_b),
+        "diff_overlay": diff_overlay
+    }
+# BLIP生成更详尽描述
+def generate_detailed_caption(image):
+    inputs = blip_processor(image, return_tensors="pt")
+    caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2)
+    return blip_processor.decode(caption[0], skip_special_tokens=True)
+# 特征差异可视化
+def plot_feature_differences(latent_diff):
+    diff_magnitude = [abs(x) for x in latent_diff[0]]
+    indices = range(len(diff_magnitude))
+    # 柱状图
+    plt.figure(figsize=(8, 4))
+    plt.bar(indices, diff_magnitude, alpha=0.7)
+    plt.xlabel("Feature Index")
+    plt.ylabel("Magnitude of Difference")
+    plt.title("Feature Differences (Bar Chart)")
+    bar_chart_path = "bar_chart.png"
+    plt.savefig(bar_chart_path)
+    plt.close()
+    # 饼图
+    plt.figure(figsize=(6, 6))
+    plt.pie(diff_magnitude[:10], labels=range(10), autopct="%1.1f%%", startangle=140)
+    plt.title("Top 10 Feature Differences (Pie Chart)")
+    pie_chart_path = "pie_chart.png"
+    plt.savefig(pie_chart_path)
+    plt.close()
+    return bar_chart_path, pie_chart_path
+# 分析函数
 def analyze_images(image_a, image_b, api_key):
+    # 调用 OpenAI 客户端
+    client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
+    # 图像差异处理
+    img_a = image_a.convert("RGB")
+    img_b = image_b.convert("RGB")
+    images_diff = compute_difference_images(img_a, img_b)
     # BLIP生成描述
+    caption_a = generate_detailed_caption(img_a)
+    caption_b = generate_detailed_caption(img_b)
     # CLIP特征提取
     def extract_features(image):
         features = clip_model.get_image_features(**inputs)
         return features.detach().numpy()
     features_a = extract_features(img_a)
     features_b = extract_features(img_b)
     latent_diff = np.abs(features_a - features_b).tolist()
+    # 调用 GPT 获取更详细描述
     gpt_response = client.chat.completions.create(
         model="deepseek-chat",
         messages=[
         ],
         stream=False
     )
+    text_analysis = gpt_response.choices[0].message.content.strip()
+    # 可视化特征差异
+    bar_chart_path, pie_chart_path = plot_feature_differences(latent_diff)
     return {
         "caption_a": caption_a,
         "caption_b": caption_b,
+        "text_analysis": text_analysis,
+        "images_diff": images_diff,
+        "bar_chart": bar_chart_path,
+        "pie_chart": pie_chart_path
     }
+# Gradio界面
 with gr.Blocks() as demo:
+    gr.Markdown("# 图像对比分析工具")
+    api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 DeepSeek API Key", type="password")
     with gr.Row():
         with gr.Column():
+            image_a = gr.Image(label="图片A", type="pil")
         with gr.Column():
+            image_b = gr.Image(label="图片B", type="pil")
     analyze_button = gr.Button("分析图片")
+    with gr.Row():
+        gr.Markdown("## 图像差异")
+        result_diff = gr.Gallery(label="混合差异图像").style(grid=3)
+    with gr.Row():
+        result_caption_a = gr.Textbox(label="图片A描述", interactive=False)
+        result_caption_b = gr.Textbox(label="图片B描述", interactive=False)
+    with gr.Row():
+        gr.Markdown("## 差异分析")
+        result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5)
+        result_bar_chart = gr.Image(label="特征差异柱状图")
+        result_pie_chart = gr.Image(label="特征差异饼图")
     # 分析逻辑
     def process_analysis(img_a, img_b, api_key):
         results = analyze_images(img_a, img_b, api_key)
+        diff_images = [
+            ("Original A", results["images_diff"]["original_a"]),
+            ("Original B", results["images_diff"]["original_b"]),
+            ("Sketch A", results["images_diff"]["sketch_a"]),
+            ("Sketch B", results["images_diff"]["sketch_b"]),
+            ("Normal A", results["images_diff"]["normal_a"]),
+            ("Normal B", results["images_diff"]["normal_b"]),
+            ("Difference Overlay", results["images_diff"]["diff_overlay"]),
+        ]
+        return (
+            diff_images,
+            results["caption_a"],
+            results["caption_b"],
+            results["text_analysis"],
+            results["bar_chart"],
+            results["pie_chart"]
+        )
     analyze_button.click(
         fn=process_analysis,
         inputs=[image_a, image_b, api_key_input],
+        outputs=[
+            result_diff,
+            result_caption_a,
+            result_caption_b,
+            result_text_analysis,
+            result_bar_chart,
+            result_pie_chart
+        ]
     )
 demo.launch()