Spaces:

PRIYANSHUDHAKED
/

Youtube_video_QA_ChatBot

Sleeping

App Files Files Community

PRIYANSHUDHAKED commited on Oct 7, 2024

Commit

02b4abe

verified ·

1 Parent(s): 75cdbf7

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -23

app.py CHANGED Viewed

@@ -4,51 +4,86 @@ from bs4 import BeautifulSoup
 import re
 from sentence_transformers import SentenceTransformer, util
 import torch
-# Initialize the sentence transformer model
 @st.cache_resource
-def load_model():
-    return SentenceTransformer('all-MiniLM-L6-v2')
-model = load_model()
 # Function to scrape documentation
 def scrape_documentation(url):
     response = requests.get(url)
     soup = BeautifulSoup(response.text, 'html.parser')
-    # This pattern might need adjustment based on the specific structure of the documentation
-    code_blocks = soup.find_all('pre')
-    return [block.text for block in code_blocks]
-# Function to find the most relevant code snippet
-def find_relevant_snippet(query, snippets):
     query_embedding = model.encode(query, convert_to_tensor=True)
-    snippet_embeddings = model.encode(snippets, convert_to_tensor=True)
-    cosine_scores = util.pytorch_cos_sim(query_embedding, snippet_embeddings)
     best_match = torch.argmax(cosine_scores)
-    return snippets[best_match]
 # Streamlit UI
-st.title("AI Code Assistant for Library Documentation")
 library = st.selectbox("Choose a library", ["llama-index", "langchain"])
 if library == "llama-index":
-    url = "https://gpt-index.readthedocs.io/en/latest/examples/index.html"
 elif library == "langchain":
     url = "https://python.langchain.com/docs/get_started/introduction"
-query = st.text_input("What kind of code snippet are you looking for?")
-if st.button("Find Code Snippet"):
-    with st.spinner("Searching for relevant code..."):
-        snippets = scrape_documentation(url)
-        if snippets:
-            relevant_snippet = find_relevant_snippet(query, snippets)
-            st.code(relevant_snippet, language="python")
         else:
-            st.error("No code snippets found in the documentation.")
-st.warning("Note: This tool scrapes the latest documentation, but may not always return perfect results. Always verify the code and check the official documentation.")

 import re
 from sentence_transformers import SentenceTransformer, util
 import torch
+from transformers import pipeline
+# Initialize the sentence transformer model and summarizer
 @st.cache_resource
+def load_models():
+    return SentenceTransformer('all-MiniLM-L6-v2'), pipeline("summarization")
+model, summarizer = load_models()
 # Function to scrape documentation
 def scrape_documentation(url):
     response = requests.get(url)
     soup = BeautifulSoup(response.text, 'html.parser')
+    sections = soup.find_all(['h1', 'h2', 'h3', 'p', 'pre'])
+    content = []
+    current_section = {"title": "", "content": "", "code": ""}
+    for section in sections:
+        if section.name in ['h1', 'h2', 'h3']:
+            if current_section["title"]:
+                content.append(current_section)
+            current_section = {"title": section.text.strip(), "content": "", "code": ""}
+        elif section.name == 'p':
+            current_section["content"] += section.text.strip() + " "
+        elif section.name == 'pre':
+            current_section["code"] += section.text.strip() + "\n"
+    if current_section["title"]:
+        content.append(current_section)
+    return content
+# Function to find the most relevant section
+def find_relevant_section(query, sections):
     query_embedding = model.encode(query, convert_to_tensor=True)
+    section_embeddings = model.encode([s["title"] + " " + s["content"] for s in sections], convert_to_tensor=True)
+    cosine_scores = util.pytorch_cos_sim(query_embedding, section_embeddings)
     best_match = torch.argmax(cosine_scores)
+    return sections[best_match]
+# Function to summarize text
+def summarize_text(text, max_length=150):
+    return summarizer(text, max_length=max_length, min_length=30, do_sample=False)[0]['summary_text']
 # Streamlit UI
+st.title("Enhanced AI Code Assistant for Library Documentation")
 library = st.selectbox("Choose a library", ["llama-index", "langchain"])
 if library == "llama-index":
+    url = "https://gpt-index.readthedocs.io/en/latest/getting_started/installation.html"
 elif library == "langchain":
     url = "https://python.langchain.com/docs/get_started/introduction"
+query = st.text_input("What would you like to know about the library?")
+if st.button("Get Information"):
+    with st.spinner("Searching and processing information..."):
+        sections = scrape_documentation(url)
+        if sections:
+            relevant_section = find_relevant_section(query, sections)
+            st.subheader(relevant_section["title"])
+            if relevant_section["content"]:
+                summary = summarize_text(relevant_section["content"])
+                st.write("Summary:", summary)
+            if relevant_section["code"]:
+                st.subheader("Code Example:")
+                st.code(relevant_section["code"], language="python")
+                code_summary = summarize_text(f"This code {relevant_section['code']}")
+                st.write("Code summary:", code_summary)
+            st.write("For more detailed information, please refer to the official documentation.")
         else:
+            st.error("No relevant information found in the documentation.")
+st.warning("Note: This tool provides information based on the latest documentation, but may not always return perfect results. Always verify the information and check the official documentation for the most up-to-date and accurate details.")