Spaces:

ZennyKenny
/

DocsSearch

Build error

App Files Files Community

ZennyKenny commited on Mar 2

Commit

ac05e10

verified ·

1 Parent(s): 68e0055

Create app.py

Browse files

Files changed (1) hide show

app.py +63 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+import trafilatura
+from smolagents import Agent
+# Streamlit UI
+def main():
+    st.set_page_config(page_title="AI Documentation Assistant", layout="wide")
+    st.title("📖 AI Documentation Assistant")
+    st.write("Enter the top-level URL of your documentation, and I'll find the most relevant article to answer your question.")
+    # User input
+    doc_url = st.text_input("🔗 Documentation URL (Homepage)", "https://example.com/docs")
+    user_question = st.text_area("❓ Your Question", "How do I reset my password?")
+    if st.button("🔍 Find Answer"):
+        with st.spinner("Searching for relevant information..."):
+            article_url, extracted_text = find_relevant_article(doc_url, user_question)
+            if article_url:
+                answer = generate_answer(user_question, extracted_text)
+                st.success("✅ Answer Found!")
+                st.write(answer)
+                st.write(f"[🔗 Read Full Article]({article_url})")
+            else:
+                st.error("⚠️ No relevant articles found.")
+# Step 3 & 4: Crawling and Finding the Most Relevant Article
+def find_relevant_article(base_url, question):
+    """Crawls the top-domain docs, finds the most relevant article, and extracts text."""
+    response = requests.get(base_url)
+    if response.status_code != 200:
+        return None, None
+    soup = BeautifulSoup(response.text, "html.parser")
+    links = [a['href'] for a in soup.find_all('a', href=True) if base_url in a['href']]
+    best_match = None
+    best_text = ""
+    for link in links[:10]:  # Limit to first 10 links for now
+        page_text = trafilatura.extract(requests.get(link).text)
+        if page_text and question.lower() in page_text.lower():
+            best_match = link
+            best_text = page_text
+            break  # Stop at first good match
+    return best_match, best_text
+# Step 5: Generate Answer using `smolagents`
+def generate_answer(question, context):
+    agent = Agent("Question-Answering Agent", description="Answers questions based on documentation.")
+    prompt = f"""
+    Context: {context}
+    Question: {question}
+    Provide a clear and concise answer.
+    """
+    return agent.run(prompt)
+if __name__ == "__main__":
+    main()