| import os | |
| from langchain.chains.openai_functions import ( | |
| create_openai_fn_chain, | |
| create_structured_output_chain, | |
| ) | |
| from langchain_openai import ChatOpenAI | |
| from langchain.prompts import ChatPromptTemplate | |
| from typing import List, Optional | |
| from KG_classes import KnowledgeGraph | |
| with open("graphRAG/openai.txt", "r") as f: | |
| api_key = f.read() | |
| llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=api_key) | |
| def get_extraction_chain( | |
| allowed_nodes: Optional[List[str]] = None, allowed_rels: Optional[List[str]] = None | |
| ): | |
| prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ( | |
| "system", | |
| f"""# Knowledge Graph Extraction Instructions | |
| ## 1. Purpose | |
| You are a state-of-the-art system for extracting structured data to construct a **knowledge graph**. The graph consists of: | |
| - **Nodes**: Entities or concepts. | |
| - **Relationships**: Connections between nodes representing their interactions or associations. | |
| ## 2. Guidelines for Nodes | |
| - Use **general labels** for node types (e.g., "person", "organization"). | |
| - Use **human-readable identifiers** for node IDs (no integers or generic IDs). | |
| - Include attributes as key-value pairs with `camelCase` keys (e.g., `birthDate: "1990-01-01"`). | |
| - Do **not create separate nodes** for numerical data or dates; these should always be node properties. | |
| {'- **Allowed Node Labels:** ' + ", ".join(allowed_nodes) if allowed_nodes else ""} | |
| ## 3. Guidelines for Relationships | |
| - Clearly define relationships between nodes, using concise and meaningful labels (e.g., "worksAt", "bornIn"). | |
| - Only use relationships allowed in the context. | |
| {'- **Allowed Relationship Types:** ' + ", ".join(allowed_rels) if allowed_rels else ""} | |
| - Avoid overly detailed or complex relationship labels. | |
| ## 4. Coreference Resolution | |
| - Use the most complete identifier for entities across the graph. For example, use "John Doe" instead of "John" or "he". | |
| ## 5. Strict Compliance | |
| Adhere to these rules exactly to ensure the generated graph is clear, coherent, and consistent.""", | |
| ), | |
| ( | |
| "human", | |
| "Extract information from the following text using these rules: {input}", | |
| ), | |
| ("human", "Ensure the output is in the correct format."), | |
| ] | |
| ) | |
| return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False) | |