Spaces:
Runtime error
Runtime error
cleanup
Browse files- app.py +18 -8
- cfg.py +2 -29
- rtd_scraper/tutorial/spiders/docs_spider.py +0 -1
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import
|
| 2 |
from typing import Optional, Tuple
|
| 3 |
|
| 4 |
import gradio as gr
|
|
@@ -6,17 +6,26 @@ import pandas as pd
|
|
| 6 |
from buster.completers import Completion
|
| 7 |
from buster.utils import extract_zip
|
| 8 |
|
|
|
|
| 9 |
import cfg
|
| 10 |
from cfg import setup_buster
|
| 11 |
|
| 12 |
-
# Create a handler to control where log messages go (e.g., console, file)
|
| 13 |
-
handler = (
|
| 14 |
-
logging.StreamHandler()
|
| 15 |
-
) # Console output, you can change it to a file handler if needed
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Typehint for chatbot history
|
| 22 |
ChatHistory = list[list[Optional[str], Optional[str]]]
|
|
@@ -108,6 +117,7 @@ with demo:
|
|
| 108 |
examples=[
|
| 109 |
"How can I install the library?",
|
| 110 |
"What dependencies are required?",
|
|
|
|
| 111 |
],
|
| 112 |
inputs=question,
|
| 113 |
)
|
|
|
|
| 1 |
+
import os
|
| 2 |
from typing import Optional, Tuple
|
| 3 |
|
| 4 |
import gradio as gr
|
|
|
|
| 6 |
from buster.completers import Completion
|
| 7 |
from buster.utils import extract_zip
|
| 8 |
|
| 9 |
+
from rtd_scraper.scrape_rtd import scrape_rtd
|
| 10 |
import cfg
|
| 11 |
from cfg import setup_buster
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
# Check if an openai key is set as an env. variable
|
| 15 |
+
if os.getenv("OPENAI_API_KEY") is None:
|
| 16 |
+
print(
|
| 17 |
+
"Warning: No openai key detected. You can set it with 'export OPENAI_API_KEY=sk-...'."
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
homepage_url = os.getenv("RTD_URL", "https://orion.readthedocs.io/")
|
| 22 |
+
target_version = os.getenv("RTD_VERSION", "en/stable")
|
| 23 |
+
|
| 24 |
+
# scrape and embed content from readthedocs website
|
| 25 |
+
scrape_rtd(
|
| 26 |
+
homepage_url=homepage_url, save_directory="outputs/", target_version=target_version
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
|
| 30 |
# Typehint for chatbot history
|
| 31 |
ChatHistory = list[list[Optional[str], Optional[str]]]
|
|
|
|
| 117 |
examples=[
|
| 118 |
"How can I install the library?",
|
| 119 |
"What dependencies are required?",
|
| 120 |
+
"Give a brief overview of the library."
|
| 121 |
],
|
| 122 |
inputs=question,
|
| 123 |
)
|
cfg.py
CHANGED
|
@@ -1,6 +1,3 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import logging
|
| 3 |
-
|
| 4 |
from buster.busterbot import Buster, BusterConfig
|
| 5 |
from buster.completers import ChatGPTCompleter, DocumentAnswerer
|
| 6 |
from buster.formatters.documents import DocumentsFormatterJSON
|
|
@@ -11,29 +8,6 @@ from buster.validators import QuestionAnswerValidator, Validator
|
|
| 11 |
|
| 12 |
from rtd_scraper.scrape_rtd import scrape_rtd
|
| 13 |
|
| 14 |
-
# Set the root logger's level to INFO
|
| 15 |
-
logging.basicConfig(level=logging.INFO)
|
| 16 |
-
|
| 17 |
-
# Check if an openai key is set as an env. variable
|
| 18 |
-
if os.getenv("OPENAI_API_KEY") is None:
|
| 19 |
-
print(
|
| 20 |
-
"Warning: No openai key detected. You can set it with 'export OPENAI_API_KEY=sk-...'."
|
| 21 |
-
)
|
| 22 |
-
|
| 23 |
-
homepage_url = os.getenv("RTD_URL", "https://orion.readthedocs.io/")
|
| 24 |
-
target_version = os.getenv("RTD_VERSION", "en/stable")
|
| 25 |
-
|
| 26 |
-
# scrape and embed content from readthedocs website
|
| 27 |
-
scrape_rtd(
|
| 28 |
-
homepage_url=homepage_url, save_directory="outputs/", target_version=target_version
|
| 29 |
-
)
|
| 30 |
-
|
| 31 |
-
# Disable logging for third-party libraries at DEBUG level
|
| 32 |
-
for name in logging.root.manager.loggerDict:
|
| 33 |
-
logger = logging.getLogger(name)
|
| 34 |
-
logger.setLevel(logging.INFO)
|
| 35 |
-
|
| 36 |
-
|
| 37 |
buster_cfg = BusterConfig(
|
| 38 |
validator_cfg={
|
| 39 |
"unknown_response_templates": [
|
|
@@ -43,15 +17,14 @@ buster_cfg = BusterConfig(
|
|
| 43 |
"embedding_model": "text-embedding-ada-002",
|
| 44 |
"use_reranking": True,
|
| 45 |
"invalid_question_response": "This question does not seem relevant to my current knowledge.",
|
| 46 |
-
"check_question_prompt": """You are an chatbot answering questions on
|
| 47 |
|
| 48 |
Your job is to determine wether or not a question is valid, and should be answered.
|
| 49 |
-
More general questions are not considered valid, even if you might know the response.
|
| 50 |
A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
|
| 51 |
|
| 52 |
For example:
|
| 53 |
|
| 54 |
-
Q:
|
| 55 |
true
|
| 56 |
|
| 57 |
Q: What is the meaning of life?
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from buster.busterbot import Buster, BusterConfig
|
| 2 |
from buster.completers import ChatGPTCompleter, DocumentAnswerer
|
| 3 |
from buster.formatters.documents import DocumentsFormatterJSON
|
|
|
|
| 8 |
|
| 9 |
from rtd_scraper.scrape_rtd import scrape_rtd
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
buster_cfg = BusterConfig(
|
| 12 |
validator_cfg={
|
| 13 |
"unknown_response_templates": [
|
|
|
|
| 17 |
"embedding_model": "text-embedding-ada-002",
|
| 18 |
"use_reranking": True,
|
| 19 |
"invalid_question_response": "This question does not seem relevant to my current knowledge.",
|
| 20 |
+
"check_question_prompt": """You are an chatbot answering questions on python libraries.
|
| 21 |
|
| 22 |
Your job is to determine wether or not a question is valid, and should be answered.
|
|
|
|
| 23 |
A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
|
| 24 |
|
| 25 |
For example:
|
| 26 |
|
| 27 |
+
Q: How can I install the library?
|
| 28 |
true
|
| 29 |
|
| 30 |
Q: What is the meaning of life?
|
rtd_scraper/tutorial/spiders/docs_spider.py
CHANGED
|
@@ -62,7 +62,6 @@ class DocsSpider(scrapy.Spider):
|
|
| 62 |
filepath = self.base_dir / parsed_uri.netloc / parsed_uri.path.strip("/")
|
| 63 |
filepath.parent.mkdir(parents=True, exist_ok=True)
|
| 64 |
|
| 65 |
-
print(f"{filepath=}")
|
| 66 |
with open(filepath, "wb") as f:
|
| 67 |
f.write(response.body)
|
| 68 |
|
|
|
|
| 62 |
filepath = self.base_dir / parsed_uri.netloc / parsed_uri.path.strip("/")
|
| 63 |
filepath.parent.mkdir(parents=True, exist_ok=True)
|
| 64 |
|
|
|
|
| 65 |
with open(filepath, "wb") as f:
|
| 66 |
f.write(response.body)
|
| 67 |
|