Update scripts/setup.py
Browse files- scripts/setup.py +6 -6
scripts/setup.py
CHANGED
|
@@ -20,7 +20,7 @@ load_dotenv()
|
|
| 20 |
def create_docs(input_file: str) -> list[Document]:
|
| 21 |
"""Reads a JSONL file and converts each line into a LlamaIndex Document."""
|
| 22 |
documents = []
|
| 23 |
-
with open(input_file, "r", encoding="utf-8") as f:
|
| 24 |
for line in f:
|
| 25 |
data = json.loads(line)
|
| 26 |
documents.append(
|
|
@@ -28,11 +28,11 @@ def create_docs(input_file: str) -> list[Document]:
|
|
| 28 |
doc_id=data["doc_id"],
|
| 29 |
text=data["content"],
|
| 30 |
metadata={
|
| 31 |
-
"url": data["url"],
|
| 32 |
-
"title": data["name"],
|
| 33 |
-
"tokens": data["tokens"],
|
| 34 |
-
"retrieve_doc": data["retrieve_doc"],
|
| 35 |
-
"source": data["source"],
|
| 36 |
},
|
| 37 |
excluded_llm_metadata_keys=[
|
| 38 |
"title",
|
|
|
|
| 20 |
def create_docs(input_file: str) -> list[Document]:
|
| 21 |
"""Reads a JSONL file and converts each line into a LlamaIndex Document."""
|
| 22 |
documents = []
|
| 23 |
+
with open(input_file, "r", encoding="utf-8") as f:
|
| 24 |
for line in f:
|
| 25 |
data = json.loads(line)
|
| 26 |
documents.append(
|
|
|
|
| 28 |
doc_id=data["doc_id"],
|
| 29 |
text=data["content"],
|
| 30 |
metadata={
|
| 31 |
+
"url": data["metadata"]["url"], # CORRECTED: Accessing 'url' from 'metadata'
|
| 32 |
+
"title": data["metadata"]["name"], # CORRECTED: Accessing 'name' (for title) from 'metadata'
|
| 33 |
+
"tokens": data["metadata"]["tokens"], # CORRECTED: Accessing 'tokens' from 'metadata'
|
| 34 |
+
"retrieve_doc": data["metadata"]["retrieve_doc"], # CORRECTED: Accessing 'retrieve_doc' from 'metadata'
|
| 35 |
+
"source": data["metadata"]["source"], # CORRECTED: Accessing 'source' from 'metadata'
|
| 36 |
},
|
| 37 |
excluded_llm_metadata_keys=[
|
| 38 |
"title",
|