Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
e305bc7
1
Parent(s):
bccd595
Auto-sync from demo at Tue Nov 4 06:41:23 UTC 2025
Browse files
graphgen/operators/partition/partition_kg.py
CHANGED
|
@@ -66,3 +66,57 @@ async def partition_kg(
|
|
| 66 |
if image_data:
|
| 67 |
node_data["images"] = image_data
|
| 68 |
return batches
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
if image_data:
|
| 67 |
node_data["images"] = image_data
|
| 68 |
return batches
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
async def attach_additional_data_to_node(
|
| 72 |
+
batches: list[
|
| 73 |
+
tuple[
|
| 74 |
+
list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]
|
| 75 |
+
]
|
| 76 |
+
],
|
| 77 |
+
chunk_storage: BaseKVStorage,
|
| 78 |
+
) -> list[
|
| 79 |
+
tuple[list[tuple[str, dict]], list[tuple[Any, Any, dict] | tuple[Any, Any, Any]]]
|
| 80 |
+
]:
|
| 81 |
+
"""
|
| 82 |
+
Attach additional data from chunk_storage to nodes in the batches.
|
| 83 |
+
:param batches:
|
| 84 |
+
:param chunk_storage:
|
| 85 |
+
:return:
|
| 86 |
+
"""
|
| 87 |
+
for batch in batches:
|
| 88 |
+
for node_id, node_data in batch[0]:
|
| 89 |
+
await _attach_by_type(node_id, node_data, chunk_storage)
|
| 90 |
+
return batches
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
async def _attach_by_type(
|
| 94 |
+
node_id: str,
|
| 95 |
+
node_data: dict,
|
| 96 |
+
chunk_storage: BaseKVStorage,
|
| 97 |
+
) -> None:
|
| 98 |
+
"""
|
| 99 |
+
Attach additional data to the node based on its entity type.
|
| 100 |
+
"""
|
| 101 |
+
entity_type = (node_data.get("entity_type") or "").lower()
|
| 102 |
+
if not entity_type:
|
| 103 |
+
return
|
| 104 |
+
|
| 105 |
+
source_ids = [
|
| 106 |
+
sid.strip()
|
| 107 |
+
for sid in node_data.get("source_id", "").split("<SEP>")
|
| 108 |
+
if sid.strip()
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
# Handle images
|
| 112 |
+
if "image" in entity_type:
|
| 113 |
+
image_chunks = [
|
| 114 |
+
data
|
| 115 |
+
for sid in source_ids
|
| 116 |
+
if "image" in sid.lower() and (data := await chunk_storage.get_by_id(sid))
|
| 117 |
+
]
|
| 118 |
+
if image_chunks:
|
| 119 |
+
# The generator expects a dictionary with an 'img_path' key, not a list of captions.
|
| 120 |
+
# We'll use the first image chunk found for this node.
|
| 121 |
+
node_data["images"] = image_chunks[0]
|
| 122 |
+
logger.debug("Attached image data to node %s", node_id)
|