Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,7 +32,7 @@ def create_embeddings(text):
|
|
| 32 |
print("Embeddings created successfully.")
|
| 33 |
return embeddings, sentences
|
| 34 |
|
| 35 |
-
def
|
| 36 |
logging.info("Generating plot.")
|
| 37 |
# Generate embeddings for the query
|
| 38 |
query_embedding = model.encode([query])[0]
|
|
@@ -79,36 +79,55 @@ def generate_plot(query, pdf_file):
|
|
| 79 |
save(p)
|
| 80 |
logging.info("Plot saved to file.")
|
| 81 |
return temp_file.name
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
def
|
| 84 |
-
logging.info("
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
-
#
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
logging.info("Returning iframe HTML content.")
|
| 101 |
-
return iframe_html
|
| 102 |
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
iface = gr.Interface(
|
| 105 |
fn=gradio_interface,
|
| 106 |
inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Query")],
|
| 107 |
-
outputs=gr.
|
| 108 |
title="PDF Content Visualizer",
|
| 109 |
description="Upload a PDF and enter a query to visualize the content."
|
| 110 |
)
|
| 111 |
|
| 112 |
-
|
| 113 |
if __name__ == "__main__":
|
| 114 |
iface.launch()
|
|
|
|
| 32 |
print("Embeddings created successfully.")
|
| 33 |
return embeddings, sentences
|
| 34 |
|
| 35 |
+
def generate_plot_bokeh(query, pdf_file):
|
| 36 |
logging.info("Generating plot.")
|
| 37 |
# Generate embeddings for the query
|
| 38 |
query_embedding = model.encode([query])[0]
|
|
|
|
| 79 |
save(p)
|
| 80 |
logging.info("Plot saved to file.")
|
| 81 |
return temp_file.name
|
| 82 |
+
import plotly.express as px
|
| 83 |
+
import plotly.graph_objects as go
|
| 84 |
|
| 85 |
+
def generate_plotly_figure(query, pdf_file):
|
| 86 |
+
logging.info("Generating plot with Plotly.")
|
| 87 |
+
# Generate embeddings for the query
|
| 88 |
+
query_embedding = model.encode([query])[0]
|
| 89 |
+
|
| 90 |
+
# Process the PDF and create embeddings
|
| 91 |
+
text = process_pdf(pdf_file.name)
|
| 92 |
+
embeddings, sentences = create_embeddings(text)
|
| 93 |
+
|
| 94 |
+
logging.info("Data prepared for UMAP.")
|
| 95 |
+
# Prepare the data for UMAP and visualization
|
| 96 |
+
all_embeddings = np.vstack([embeddings, query_embedding])
|
| 97 |
+
all_sentences = sentences + [query]
|
| 98 |
|
| 99 |
+
# UMAP transformation
|
| 100 |
+
umap_transform = umap.UMAP(n_neighbors=15, min_dist=0.0, n_components=2, random_state=42)
|
| 101 |
+
umap_embeddings = umap_transform.fit_transform(all_embeddings)
|
| 102 |
+
|
| 103 |
+
logging.info("UMAP transformation completed.")
|
| 104 |
+
# Find the closest sentences to the query
|
| 105 |
+
distances = cosine_similarity([query_embedding], embeddings)[0]
|
| 106 |
+
closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
|
| 107 |
+
|
| 108 |
+
# Prepare data for plotting
|
| 109 |
+
colors = ['red' if i in closest_indices else 'blue' for i in range(len(sentences))]
|
| 110 |
+
fig = go.Figure()
|
| 111 |
+
fig.add_trace(go.Scatter(x=umap_embeddings[:-1, 0], y=umap_embeddings[:-1, 1], mode='markers',
|
| 112 |
+
marker=dict(color=colors), text=all_sentences[:-1]))
|
| 113 |
+
|
| 114 |
+
fig.update_layout(title="UMAP Projection of Sentences", xaxis_title="UMAP 1", yaxis_title="UMAP 2")
|
| 115 |
|
| 116 |
+
logging.info("Plotly figure created successfully.")
|
| 117 |
+
return fig
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
def gradio_interface(pdf_file, query):
|
| 120 |
+
logging.info("Gradio interface called.")
|
| 121 |
+
fig = generate_plotly_figure(query, pdf_file)
|
| 122 |
+
logging.info("Returning Plotly figure.")
|
| 123 |
+
return fig
|
| 124 |
iface = gr.Interface(
|
| 125 |
fn=gradio_interface,
|
| 126 |
inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Query")],
|
| 127 |
+
outputs=gr.Plot(), # Updated to use gr.Plot() for Plotly figures
|
| 128 |
title="PDF Content Visualizer",
|
| 129 |
description="Upload a PDF and enter a query to visualize the content."
|
| 130 |
)
|
| 131 |
|
|
|
|
| 132 |
if __name__ == "__main__":
|
| 133 |
iface.launch()
|