Spaces:
Runtime error
Runtime error
| from umap_reducer import UMAPReducer | |
| from embeddings_encoder import EmbeddingsEncoder | |
| from flask import Flask, request, render_template, jsonify, make_response, session | |
| from flask_session import Session | |
| from flask_cors import CORS, cross_origin | |
| import os | |
| from dotenv import load_dotenv | |
| import feedparser | |
| import json | |
| from dateutil import parser | |
| import re | |
| import numpy as np | |
| import gzip | |
| import hashlib | |
| load_dotenv() | |
| app = Flask(__name__, static_url_path='/static') | |
| app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY") | |
| app.config["SESSION_PERMANENT"] = True | |
| app.config["SESSION_TYPE"] = "filesystem" | |
| app.config["SESSION_COOKIE_SAMESITE"] = "None" | |
| app.config["SESSION_COOKIE_SECURE"] = True | |
| Session(app) | |
| CORS(app) | |
| reducer = UMAPReducer() | |
| encoder = EmbeddingsEncoder() | |
| def index(): | |
| return render_template('index.html') | |
| def run_umap(): | |
| input_data = request.get_json() | |
| sentences = input_data['data']['sentences'] | |
| umap_options = input_data['data']['umap_options'] | |
| cluster_options = input_data['data']['cluster_options'] | |
| # create unique hash for input, avoid recalculating embeddings | |
| sentences_input_hash = hashlib.sha256( | |
| ''.join(sentences).encode("utf-8")).hexdigest() | |
| print("input options:", sentences_input_hash, | |
| umap_options, cluster_options, "\n\n") | |
| try: | |
| if not session.get(sentences_input_hash): | |
| print("New input, calculating embeddings" "\n\n") | |
| embeddings = encoder.encode(sentences) | |
| session[sentences_input_hash] = embeddings.tolist() | |
| else: | |
| print("Input already calculated, using cached embeddings", "\n\n") | |
| embeddings = session[sentences_input_hash] | |
| # UMAP embeddings | |
| reducer.setParams(umap_options, cluster_options) | |
| umap_embeddings = reducer.embed(embeddings) | |
| # HDBScan cluster analysis | |
| clusters = reducer.clusterAnalysis(umap_embeddings) | |
| content = gzip.compress(json.dumps( | |
| { | |
| "embeddings": umap_embeddings.tolist(), | |
| "clusters": clusters.labels_.tolist() | |
| } | |
| ).encode('utf8'), 5) | |
| response = make_response(content) | |
| response.headers['Content-length'] = len(content) | |
| response.headers['Content-Encoding'] = 'gzip' | |
| return response | |
| except Exception as e: | |
| return jsonify({"error": str(e)}), 400 | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) | |