Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -239,24 +239,20 @@ def get_embedding_for_text(text, tokenizer, model):
|
|
| 239 |
chunk_embeddings = []
|
| 240 |
|
| 241 |
for chunk in chunks:
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
|
| 256 |
chunk_embeddings.append(embedding[0])
|
| 257 |
-
except Exception as e:
|
| 258 |
-
st.warning(f"Error processing chunk: {str(e)}")
|
| 259 |
-
continue
|
| 260 |
|
| 261 |
if chunk_embeddings:
|
| 262 |
weights = np.array([len(chunk.split()) for chunk in chunks])
|
|
|
|
| 239 |
chunk_embeddings = []
|
| 240 |
|
| 241 |
for chunk in chunks:
|
| 242 |
+
inputs = tokenizer(
|
| 243 |
+
chunk,
|
| 244 |
+
return_tensors="pt",
|
| 245 |
+
padding=True,
|
| 246 |
+
truncation=True,
|
| 247 |
+
max_length=512
|
| 248 |
+
)
|
| 249 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 250 |
+
|
| 251 |
+
with torch.no_grad():
|
| 252 |
+
# Access the first element of the tuple which contains the hidden states
|
| 253 |
+
outputs = model(**inputs)[0]
|
| 254 |
+
embedding = outputs[:, 0, :].cpu().numpy()
|
|
|
|
| 255 |
chunk_embeddings.append(embedding[0])
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
if chunk_embeddings:
|
| 258 |
weights = np.array([len(chunk.split()) for chunk in chunks])
|