Ahambrahmasmi commited on
Commit
cb75cb1
·
verified ·
1 Parent(s): 261254d

Update scripts/setup.py

Browse files
Files changed (1) hide show
  1. scripts/setup.py +6 -6
scripts/setup.py CHANGED
@@ -20,7 +20,7 @@ load_dotenv()
20
  def create_docs(input_file: str) -> list[Document]:
21
  """Reads a JSONL file and converts each line into a LlamaIndex Document."""
22
  documents = []
23
- with open(input_file, "r", encoding="utf-8") as f: # Added encoding for safety
24
  for line in f:
25
  data = json.loads(line)
26
  documents.append(
@@ -28,11 +28,11 @@ def create_docs(input_file: str) -> list[Document]:
28
  doc_id=data["doc_id"],
29
  text=data["content"],
30
  metadata={
31
- "url": data["url"],
32
- "title": data["name"],
33
- "tokens": data["tokens"],
34
- "retrieve_doc": data["retrieve_doc"],
35
- "source": data["source"],
36
  },
37
  excluded_llm_metadata_keys=[
38
  "title",
 
20
  def create_docs(input_file: str) -> list[Document]:
21
  """Reads a JSONL file and converts each line into a LlamaIndex Document."""
22
  documents = []
23
+ with open(input_file, "r", encoding="utf-8") as f:
24
  for line in f:
25
  data = json.loads(line)
26
  documents.append(
 
28
  doc_id=data["doc_id"],
29
  text=data["content"],
30
  metadata={
31
+ "url": data["metadata"]["url"], # CORRECTED: Accessing 'url' from 'metadata'
32
+ "title": data["metadata"]["name"], # CORRECTED: Accessing 'name' (for title) from 'metadata'
33
+ "tokens": data["metadata"]["tokens"], # CORRECTED: Accessing 'tokens' from 'metadata'
34
+ "retrieve_doc": data["metadata"]["retrieve_doc"], # CORRECTED: Accessing 'retrieve_doc' from 'metadata'
35
+ "source": data["metadata"]["source"], # CORRECTED: Accessing 'source' from 'metadata'
36
  },
37
  excluded_llm_metadata_keys=[
38
  "title",