michaelfeil
commited on
Commit
·
c686254
1
Parent(s):
0255774
update demo to use actual index
Browse files
search.py
CHANGED
|
@@ -25,7 +25,7 @@ index = None
|
|
| 25 |
docs_index = None
|
| 26 |
|
| 27 |
|
| 28 |
-
def build_index(demo_mode=
|
| 29 |
global index, docs_index
|
| 30 |
index = Index(
|
| 31 |
ndim=embed_texts_sync(["Hi"]).shape[
|
|
@@ -48,11 +48,14 @@ def build_index(demo_mode=True):
|
|
| 48 |
index.add(np.arange(len(docs_index)), embeddings)
|
| 49 |
return
|
| 50 |
else:
|
|
|
|
| 51 |
ds = datasets.load_dataset("michaelfeil/mined_docstrings_pypi_embedded")
|
| 52 |
ds = ds["train"]
|
| 53 |
docs_index = ds["code"]
|
| 54 |
-
embeddings = ds["embed_func_code"]
|
|
|
|
| 55 |
index.add(np.arange(len(docs_index)), embeddings)
|
|
|
|
| 56 |
|
| 57 |
if index is None:
|
| 58 |
build_index()
|
|
|
|
| 25 |
docs_index = None
|
| 26 |
|
| 27 |
|
| 28 |
+
def build_index(demo_mode=False):
|
| 29 |
global index, docs_index
|
| 30 |
index = Index(
|
| 31 |
ndim=embed_texts_sync(["Hi"]).shape[
|
|
|
|
| 48 |
index.add(np.arange(len(docs_index)), embeddings)
|
| 49 |
return
|
| 50 |
else:
|
| 51 |
+
print("loading 280k dataset")
|
| 52 |
ds = datasets.load_dataset("michaelfeil/mined_docstrings_pypi_embedded")
|
| 53 |
ds = ds["train"]
|
| 54 |
docs_index = ds["code"]
|
| 55 |
+
embeddings = np.array(ds["embed_func_code"])
|
| 56 |
+
print("indexing the 280k vectors")
|
| 57 |
index.add(np.arange(len(docs_index)), embeddings)
|
| 58 |
+
print("usearch index done.")
|
| 59 |
|
| 60 |
if index is None:
|
| 61 |
build_index()
|