Spaces:

adithya747
/

website-summarizer

Sleeping

App Files Files Community

adithya747 commited on Feb 13

Commit

90a9f3d

verified ·

1 Parent(s): 4a1e457

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -43

app.py CHANGED Viewed

@@ -3,23 +3,20 @@ import requests
 from bs4 import BeautifulSoup
 from transformers import pipeline
-# Use a more lightweight model for Hugging Face Spaces
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 def scrape_website(url):
     """Extracts text from a website with error handling"""
     try:
-        headers = {'User-Agent': 'Mozilla/5.0'}  # Add headers to prevent 403 errors
         response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()  # Raise HTTP errors
         soup = BeautifulSoup(response.text, "html.parser")
-        # Extract text from common content-containing tags
         text_elements = soup.find_all(['p', 'article', 'main', 'section'])
         text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
-        return text if text.strip() else "No content found"
     except Exception as e:
         return f"Scraping Error: {str(e)}"
@@ -27,44 +24,97 @@ def scrape_website(url):
 def summarize_website(url):
     """Handles the full summarization pipeline"""
     try:
-        extracted_text = scrape_website(url)
-        if "Error" in extracted_text:
-            return extracted_text
-        # Check minimum text length
-        if len(extracted_text.split()) < 50:
-            return "Error: Insufficient content for summarization (minimum 50 words required)"
-        # Truncate text to model's max input length (1024 tokens for DistilBART)
-        max_input_length = 1000  # Conservative estimate for token count
-        truncated_text = extracted_text[:max_input_length]
-        # Generate summary
-        summary = summarizer(
-            truncated_text,
-            max_length=200,
-            min_length=50,
-            do_sample=False,
-            truncation=True  # Ensure truncation is enabled
-        )
-        return f"**Summary:**\n\n{summary[0]['summary_text']}"
     except Exception as e:
-        return f"Summarization Error: {str(e)}"
-# Gradio interface with improved configuration
-iface = gr.Interface(
-    fn=summarize_website,
-    inputs=gr.Textbox(label="Website URL", placeholder="Enter full URL (including https://)..."),
-    outputs=gr.Markdown(),
-    title="AI-Powered Website Summarizer",
-    description="Enter a website URL to get an AI-generated summary of its content",
-    examples=[
-        ["https://en.wikipedia.org/wiki/Large_language_model"],
-        ["https://www.bbc.com/news/technology-66510295"]
-    ]
-)
-iface.launch()

 from bs4 import BeautifulSoup
 from transformers import pipeline
+# Load summarization pipeline
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 def scrape_website(url):
     """Extracts text from a website with error handling"""
     try:
+        headers = {'User-Agent': 'Mozilla/5.0'}
         response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
         text_elements = soup.find_all(['p', 'article', 'main', 'section'])
         text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
+        return text.strip() if text.strip() else "No content found"
     except Exception as e:
         return f"Scraping Error: {str(e)}"
 def summarize_website(url):
     """Handles the full summarization pipeline"""
     try:
+        with gr.Column(variant="panel"):
+            gr.Markdown("## ⚡ Processing...")
+            extracted_text = scrape_website(url)
+            if "Error" in extracted_text:
+                return f"❌ {extracted_text}"
+            if len(extracted_text.split()) < 50:
+                return "⚠️ Error: Insufficient content for summarization (minimum 50 words required)"
+            max_input_length = 1000
+            truncated_text = extracted_text[:max_input_length]
+            summary = summarizer(
+                truncated_text,
+                max_length=200,
+                min_length=50,
+                do_sample=False,
+                truncation=True
+            )
+            return f"## 📝 Summary\n\n{summary[0]['summary_text']}"
     except Exception as e:
+        return f"⛔ Summarization Error: {str(e)}"
+# Custom CSS for mobile optimization
+css = """
+@media screen and (max-width: 600px) {
+    .container {
+        padding: 10px !important;
+    }
+    .input-box textarea {
+        font-size: 16px !important;
+    }
+}
+"""
+# Mobile-optimized interface with Blocks API
+with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app:
+    gr.Markdown("# 🌐 AI Website Summarizer")
+    gr.Markdown("Paste any website URL below to get an instant AI-powered summary!")
+    with gr.Row():
+        url_input = gr.Textbox(
+            label="Website URL",
+            placeholder="Enter full URL (https://...)",
+            lines=1,
+            max_lines=1,
+            elem_id="input-box"
+        )
+    with gr.Row():
+        submit_btn = gr.Button("Generate Summary 🚀", variant="primary")
+        clear_btn = gr.Button("Clear 🔄")
+    output = gr.Markdown()
+    # Example section
+    gr.Examples(
+        examples=[
+            ["https://en.wikipedia.org/wiki/Large_language_model"],
+            ["https://www.bbc.com/news/technology-66510295"]
+        ],
+        inputs=url_input,
+        label="Try these examples:",
+        examples_per_page=2
+    )
+    # Progress indicator
+    progress = gr.Textbox(visible=False)
+    # Event handlers
+    submit_btn.click(
+        fn=summarize_website,
+        inputs=url_input,
+        outputs=output,
+        api_name="summarize"
+    )
+    clear_btn.click(
+        fn=lambda: ("", ""),
+        inputs=None,
+        outputs=[url_input, output],
+        queue=False
+    )
+# Mobile-friendly configuration
+app.launch(
+    server_name="0.0.0.0",
+    server_port=7860,
+    favicon_path="https://www.svgrepo.com/show/355037/huggingface.svg"
+)