Spaces:
Build error
Build error
revert to only pdfs
Browse files
app.py
CHANGED
|
@@ -32,21 +32,9 @@ def pdf_to_document_store(pdf_file):
|
|
| 32 |
preprocessed_docs=preprocessor.process(doc)
|
| 33 |
document_store.write_documents(preprocessed_docs)
|
| 34 |
temp_file.close()
|
| 35 |
-
|
| 36 |
-
def crawl_url(url):
|
| 37 |
-
crawler = Crawler(output_dir="crawled_files", overwrite_existing_files=True, crawler_depth=1)
|
| 38 |
-
try:
|
| 39 |
-
docs = crawler.crawl(urls=[url])
|
| 40 |
-
preprocessed_docs = preprocessor.process(docs)
|
| 41 |
-
document_store.write_documents(preprocessed_docs)
|
| 42 |
-
except:
|
| 43 |
-
st.write('We were unable to crawl the contents of that URL, please try something else')
|
| 44 |
|
| 45 |
def summarize(content):
|
| 46 |
-
|
| 47 |
-
pdf_to_document_store(content)
|
| 48 |
-
elif st.session_state.url:
|
| 49 |
-
crawl_url(content)
|
| 50 |
summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
|
| 51 |
return summaries
|
| 52 |
|
|
@@ -55,8 +43,6 @@ def set_state_if_absent(key, value):
|
|
| 55 |
st.session_state[key] = value
|
| 56 |
|
| 57 |
set_state_if_absent("summaries", None)
|
| 58 |
-
set_state_if_absent("url", False)
|
| 59 |
-
set_state_if_absent("pdf", False)
|
| 60 |
|
| 61 |
document_store, summarizer, preprocessor = start_haystack()
|
| 62 |
|
|
@@ -69,24 +55,11 @@ This Summarization demo uses a [Haystack TransformerSummarizer node](https://hay
|
|
| 69 |
""", unsafe_allow_html=True)
|
| 70 |
|
| 71 |
uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
|
| 72 |
-
url = st.text_input(label="enter a URL")
|
| 73 |
-
|
| 74 |
-
if (validators.url(url)) and (uploaded_file is None):
|
| 75 |
-
if st.button('Summarize contents of URL'):
|
| 76 |
-
with st.spinner("π Please wait while we produce a summary..."):
|
| 77 |
-
try:
|
| 78 |
-
st.session_state.pdf = False
|
| 79 |
-
st.session_state.url = True
|
| 80 |
-
st. session_state.summaries = summarize(url)
|
| 81 |
-
except Exception as e:
|
| 82 |
-
logging.exception(e)
|
| 83 |
|
| 84 |
-
if
|
| 85 |
if st.button('Summarize Document'):
|
| 86 |
with st.spinner("π Please wait while we produce a summary..."):
|
| 87 |
try:
|
| 88 |
-
st.session_state.pdf = True
|
| 89 |
-
st.session_state.url = False
|
| 90 |
st.session_state.summaries = summarize(uploaded_file)
|
| 91 |
except Exception as e:
|
| 92 |
logging.exception(e)
|
|
|
|
| 32 |
preprocessed_docs=preprocessor.process(doc)
|
| 33 |
document_store.write_documents(preprocessed_docs)
|
| 34 |
temp_file.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def summarize(content):
|
| 37 |
+
pdf_to_document_store(content)
|
|
|
|
|
|
|
|
|
|
| 38 |
summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
|
| 39 |
return summaries
|
| 40 |
|
|
|
|
| 43 |
st.session_state[key] = value
|
| 44 |
|
| 45 |
set_state_if_absent("summaries", None)
|
|
|
|
|
|
|
| 46 |
|
| 47 |
document_store, summarizer, preprocessor = start_haystack()
|
| 48 |
|
|
|
|
| 55 |
""", unsafe_allow_html=True)
|
| 56 |
|
| 57 |
uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
if uploaded_file is not None :
|
| 60 |
if st.button('Summarize Document'):
|
| 61 |
with st.spinner("π Please wait while we produce a summary..."):
|
| 62 |
try:
|
|
|
|
|
|
|
| 63 |
st.session_state.summaries = summarize(uploaded_file)
|
| 64 |
except Exception as e:
|
| 65 |
logging.exception(e)
|