Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,11 +4,9 @@ import fitz # PyMuPDF
|
|
| 4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 6 |
from langchain_community.vectorstores import FAISS
|
| 7 |
-
from langchain.chains import RetrievalQA
|
| 8 |
-
from langchain_community.llms import HuggingFaceEndpoint
|
| 9 |
import requests
|
| 10 |
import os
|
| 11 |
-
import
|
| 12 |
|
| 13 |
# Page configuration
|
| 14 |
st.set_page_config(
|
|
@@ -68,17 +66,22 @@ st.markdown("""
|
|
| 68 |
animation: fadeIn 0.5s ease-in-out;
|
| 69 |
}
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
@keyframes fadeIn {
|
| 72 |
from { opacity: 0; }
|
| 73 |
to { opacity: 1; }
|
| 74 |
}
|
| 75 |
-
|
| 76 |
-
.spinner {
|
| 77 |
-
display: flex;
|
| 78 |
-
justify-content: center;
|
| 79 |
-
align-items: center;
|
| 80 |
-
height: 100px;
|
| 81 |
-
}
|
| 82 |
</style>
|
| 83 |
""", unsafe_allow_html=True)
|
| 84 |
|
|
@@ -97,10 +100,11 @@ if 'history' not in st.session_state:
|
|
| 97 |
def load_embedding_model():
|
| 98 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 99 |
|
| 100 |
-
def query_hf_inference_api(prompt,
|
| 101 |
-
"""Query Hugging Face Inference API
|
| 102 |
-
|
| 103 |
-
|
|
|
|
| 104 |
payload = {
|
| 105 |
"inputs": prompt,
|
| 106 |
"parameters": {
|
|
@@ -112,11 +116,36 @@ def query_hf_inference_api(prompt, model="google/flan-t5-xxl", max_tokens=200):
|
|
| 112 |
|
| 113 |
try:
|
| 114 |
response = requests.post(API_URL, headers=headers, json=payload)
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
except Exception as e:
|
| 119 |
-
st.error(f"
|
| 120 |
return ""
|
| 121 |
|
| 122 |
def process_pdf(pdf_file):
|
|
@@ -201,6 +230,8 @@ def generate_qa_for_chapter(start_page, end_page):
|
|
| 201 |
question = query_hf_inference_api(prompt, max_tokens=100)
|
| 202 |
if question and not question.endswith("?"):
|
| 203 |
question += "?"
|
|
|
|
|
|
|
| 204 |
else: # Generate answer
|
| 205 |
if qa_pairs: # Ensure we have a question to answer
|
| 206 |
prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}"
|
|
@@ -212,6 +243,20 @@ def generate_qa_for_chapter(start_page, end_page):
|
|
| 212 |
# App header
|
| 213 |
st.markdown("<h1 class='header'>π PDF Study Assistant</h1>", unsafe_allow_html=True)
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
# PDF Upload Section
|
| 216 |
with st.container():
|
| 217 |
st.subheader("π€ Upload Your Textbook/Notes")
|
|
@@ -246,11 +291,12 @@ if pdf_file:
|
|
| 246 |
if user_question:
|
| 247 |
with st.spinner("π€ Thinking..."):
|
| 248 |
answer, docs = ask_question(user_question)
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
|
|
|
| 254 |
|
| 255 |
# Chapter Q&A Generation Tab
|
| 256 |
elif selected_tab == "Generate Chapter Q&A":
|
|
@@ -293,6 +339,6 @@ if pdf_file:
|
|
| 293 |
st.markdown("---")
|
| 294 |
st.markdown("""
|
| 295 |
<div style="text-align: center; padding: 20px;">
|
| 296 |
-
Built with β€οΈ for students | PDF Study Assistant
|
| 297 |
</div>
|
| 298 |
""", unsafe_allow_html=True)
|
|
|
|
| 4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 6 |
from langchain_community.vectorstores import FAISS
|
|
|
|
|
|
|
| 7 |
import requests
|
| 8 |
import os
|
| 9 |
+
import time
|
| 10 |
|
| 11 |
# Page configuration
|
| 12 |
st.set_page_config(
|
|
|
|
| 66 |
animation: fadeIn 0.5s ease-in-out;
|
| 67 |
}
|
| 68 |
|
| 69 |
+
.error {
|
| 70 |
+
background-color: #ffebee;
|
| 71 |
+
border-left: 4px solid #f44336;
|
| 72 |
+
padding: 10px;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
.info {
|
| 76 |
+
background-color: #e3f2fd;
|
| 77 |
+
border-left: 4px solid #2196f3;
|
| 78 |
+
padding: 10px;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
@keyframes fadeIn {
|
| 82 |
from { opacity: 0; }
|
| 83 |
to { opacity: 1; }
|
| 84 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
</style>
|
| 86 |
""", unsafe_allow_html=True)
|
| 87 |
|
|
|
|
| 100 |
def load_embedding_model():
|
| 101 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 102 |
|
| 103 |
+
def query_hf_inference_api(prompt, max_tokens=200):
|
| 104 |
+
"""Query Hugging Face Inference API with error handling and retry"""
|
| 105 |
+
MODEL = "google/flan-t5-large" # Smaller, freely accessible model
|
| 106 |
+
API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
|
| 107 |
+
headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} if os.getenv('HF_API_KEY') else {}
|
| 108 |
payload = {
|
| 109 |
"inputs": prompt,
|
| 110 |
"parameters": {
|
|
|
|
| 116 |
|
| 117 |
try:
|
| 118 |
response = requests.post(API_URL, headers=headers, json=payload)
|
| 119 |
+
|
| 120 |
+
if response.status_code == 200:
|
| 121 |
+
result = response.json()
|
| 122 |
+
return result[0]['generated_text'] if result else ""
|
| 123 |
+
|
| 124 |
+
elif response.status_code == 403:
|
| 125 |
+
st.error("403 Forbidden: Please check your Hugging Face API token and model access")
|
| 126 |
+
st.markdown("""
|
| 127 |
+
<div class="info">
|
| 128 |
+
<h4>How to fix this:</h4>
|
| 129 |
+
<ol>
|
| 130 |
+
<li>Get your free Hugging Face token from <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></li>
|
| 131 |
+
<li>Add it to your Space secrets as <code>HF_API_KEY</code></li>
|
| 132 |
+
<li>Accept terms for the model: <a href="https://huggingface.co/google/flan-t5-large" target="_blank">https://huggingface.co/google/flan-t5-large</a></li>
|
| 133 |
+
</ol>
|
| 134 |
+
</div>
|
| 135 |
+
""", unsafe_allow_html=True)
|
| 136 |
+
return ""
|
| 137 |
+
|
| 138 |
+
elif response.status_code == 429:
|
| 139 |
+
st.warning("Rate limit exceeded. Waiting and retrying...")
|
| 140 |
+
time.sleep(5) # Wait 5 seconds before retrying
|
| 141 |
+
return query_hf_inference_api(prompt, max_tokens)
|
| 142 |
+
|
| 143 |
+
else:
|
| 144 |
+
st.error(f"API Error {response.status_code}: {response.text[:200]}")
|
| 145 |
+
return ""
|
| 146 |
+
|
| 147 |
except Exception as e:
|
| 148 |
+
st.error(f"Connection error: {str(e)}")
|
| 149 |
return ""
|
| 150 |
|
| 151 |
def process_pdf(pdf_file):
|
|
|
|
| 230 |
question = query_hf_inference_api(prompt, max_tokens=100)
|
| 231 |
if question and not question.endswith("?"):
|
| 232 |
question += "?"
|
| 233 |
+
if question: # Only add if we got a valid question
|
| 234 |
+
qa_pairs.append((question, ""))
|
| 235 |
else: # Generate answer
|
| 236 |
if qa_pairs: # Ensure we have a question to answer
|
| 237 |
prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}"
|
|
|
|
| 243 |
# App header
|
| 244 |
st.markdown("<h1 class='header'>π PDF Study Assistant</h1>", unsafe_allow_html=True)
|
| 245 |
|
| 246 |
+
# API Token Instructions
|
| 247 |
+
if not os.getenv("HF_API_KEY"):
|
| 248 |
+
st.markdown("""
|
| 249 |
+
<div class="info">
|
| 250 |
+
<h4>Setup Required:</h4>
|
| 251 |
+
<p>This app requires a free Hugging Face API token to work:</p>
|
| 252 |
+
<ol>
|
| 253 |
+
<li>Get your token from <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></li>
|
| 254 |
+
<li>Add it to your Space secrets as <code>HF_API_KEY</code></li>
|
| 255 |
+
<li>Accept terms for the model: <a href="https://huggingface.co/google/flan-t5-large" target="_blank">google/flan-t5-large</a></li>
|
| 256 |
+
</ol>
|
| 257 |
+
</div>
|
| 258 |
+
""", unsafe_allow_html=True)
|
| 259 |
+
|
| 260 |
# PDF Upload Section
|
| 261 |
with st.container():
|
| 262 |
st.subheader("π€ Upload Your Textbook/Notes")
|
|
|
|
| 291 |
if user_question:
|
| 292 |
with st.spinner("π€ Thinking..."):
|
| 293 |
answer, docs = ask_question(user_question)
|
| 294 |
+
if answer:
|
| 295 |
+
st.markdown(f"<div class='card'><b>Answer:</b> {answer}</div>", unsafe_allow_html=True)
|
| 296 |
+
|
| 297 |
+
with st.expander("π See source passages"):
|
| 298 |
+
for i, doc in enumerate(docs):
|
| 299 |
+
st.markdown(f"**Passage {i+1}:** {doc.page_content[:500]}...")
|
| 300 |
|
| 301 |
# Chapter Q&A Generation Tab
|
| 302 |
elif selected_tab == "Generate Chapter Q&A":
|
|
|
|
| 339 |
st.markdown("---")
|
| 340 |
st.markdown("""
|
| 341 |
<div style="text-align: center; padding: 20px;">
|
| 342 |
+
Built with β€οΈ for students | PDF Study Assistant v3.0
|
| 343 |
</div>
|
| 344 |
""", unsafe_allow_html=True)
|