PRIYANSHUDHAKED commited on
Commit
02b4abe
·
verified ·
1 Parent(s): 75cdbf7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -23
app.py CHANGED
@@ -4,51 +4,86 @@ from bs4 import BeautifulSoup
4
  import re
5
  from sentence_transformers import SentenceTransformer, util
6
  import torch
 
7
 
8
- # Initialize the sentence transformer model
9
  @st.cache_resource
10
- def load_model():
11
- return SentenceTransformer('all-MiniLM-L6-v2')
12
 
13
- model = load_model()
14
 
15
  # Function to scrape documentation
16
  def scrape_documentation(url):
17
  response = requests.get(url)
18
  soup = BeautifulSoup(response.text, 'html.parser')
19
- # This pattern might need adjustment based on the specific structure of the documentation
20
- code_blocks = soup.find_all('pre')
21
- return [block.text for block in code_blocks]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Function to find the most relevant code snippet
24
- def find_relevant_snippet(query, snippets):
25
  query_embedding = model.encode(query, convert_to_tensor=True)
26
- snippet_embeddings = model.encode(snippets, convert_to_tensor=True)
27
 
28
- cosine_scores = util.pytorch_cos_sim(query_embedding, snippet_embeddings)
29
  best_match = torch.argmax(cosine_scores)
30
 
31
- return snippets[best_match]
 
 
 
 
32
 
33
  # Streamlit UI
34
- st.title("AI Code Assistant for Library Documentation")
35
 
36
  library = st.selectbox("Choose a library", ["llama-index", "langchain"])
37
 
38
  if library == "llama-index":
39
- url = "https://gpt-index.readthedocs.io/en/latest/examples/index.html"
40
  elif library == "langchain":
41
  url = "https://python.langchain.com/docs/get_started/introduction"
42
 
43
- query = st.text_input("What kind of code snippet are you looking for?")
44
 
45
- if st.button("Find Code Snippet"):
46
- with st.spinner("Searching for relevant code..."):
47
- snippets = scrape_documentation(url)
48
- if snippets:
49
- relevant_snippet = find_relevant_snippet(query, snippets)
50
- st.code(relevant_snippet, language="python")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  else:
52
- st.error("No code snippets found in the documentation.")
53
 
54
- st.warning("Note: This tool scrapes the latest documentation, but may not always return perfect results. Always verify the code and check the official documentation.")
 
4
  import re
5
  from sentence_transformers import SentenceTransformer, util
6
  import torch
7
+ from transformers import pipeline
8
 
9
+ # Initialize the sentence transformer model and summarizer
10
  @st.cache_resource
11
+ def load_models():
12
+ return SentenceTransformer('all-MiniLM-L6-v2'), pipeline("summarization")
13
 
14
+ model, summarizer = load_models()
15
 
16
  # Function to scrape documentation
17
  def scrape_documentation(url):
18
  response = requests.get(url)
19
  soup = BeautifulSoup(response.text, 'html.parser')
20
+ sections = soup.find_all(['h1', 'h2', 'h3', 'p', 'pre'])
21
+
22
+ content = []
23
+ current_section = {"title": "", "content": "", "code": ""}
24
+
25
+ for section in sections:
26
+ if section.name in ['h1', 'h2', 'h3']:
27
+ if current_section["title"]:
28
+ content.append(current_section)
29
+ current_section = {"title": section.text.strip(), "content": "", "code": ""}
30
+ elif section.name == 'p':
31
+ current_section["content"] += section.text.strip() + " "
32
+ elif section.name == 'pre':
33
+ current_section["code"] += section.text.strip() + "\n"
34
+
35
+ if current_section["title"]:
36
+ content.append(current_section)
37
+
38
+ return content
39
 
40
+ # Function to find the most relevant section
41
+ def find_relevant_section(query, sections):
42
  query_embedding = model.encode(query, convert_to_tensor=True)
43
+ section_embeddings = model.encode([s["title"] + " " + s["content"] for s in sections], convert_to_tensor=True)
44
 
45
+ cosine_scores = util.pytorch_cos_sim(query_embedding, section_embeddings)
46
  best_match = torch.argmax(cosine_scores)
47
 
48
+ return sections[best_match]
49
+
50
+ # Function to summarize text
51
+ def summarize_text(text, max_length=150):
52
+ return summarizer(text, max_length=max_length, min_length=30, do_sample=False)[0]['summary_text']
53
 
54
  # Streamlit UI
55
+ st.title("Enhanced AI Code Assistant for Library Documentation")
56
 
57
  library = st.selectbox("Choose a library", ["llama-index", "langchain"])
58
 
59
  if library == "llama-index":
60
+ url = "https://gpt-index.readthedocs.io/en/latest/getting_started/installation.html"
61
  elif library == "langchain":
62
  url = "https://python.langchain.com/docs/get_started/introduction"
63
 
64
+ query = st.text_input("What would you like to know about the library?")
65
 
66
+ if st.button("Get Information"):
67
+ with st.spinner("Searching and processing information..."):
68
+ sections = scrape_documentation(url)
69
+ if sections:
70
+ relevant_section = find_relevant_section(query, sections)
71
+
72
+ st.subheader(relevant_section["title"])
73
+
74
+ if relevant_section["content"]:
75
+ summary = summarize_text(relevant_section["content"])
76
+ st.write("Summary:", summary)
77
+
78
+ if relevant_section["code"]:
79
+ st.subheader("Code Example:")
80
+ st.code(relevant_section["code"], language="python")
81
+
82
+ code_summary = summarize_text(f"This code {relevant_section['code']}")
83
+ st.write("Code summary:", code_summary)
84
+
85
+ st.write("For more detailed information, please refer to the official documentation.")
86
  else:
87
+ st.error("No relevant information found in the documentation.")
88
 
89
+ st.warning("Note: This tool provides information based on the latest documentation, but may not always return perfect results. Always verify the information and check the official documentation for the most up-to-date and accurate details.")