Bellok commited on
Commit
82f450e
·
1 Parent(s): 382376e

feat(app/docker): integrate committed packs and optimize embedding provider

Browse files

Configure embedding provider with 4 workers, batch size 64, and caching for better throughout. Prioritize loading local committed packs (faction-politics, wisdom-scrolls) over HuggingFace downloads, suppress progress bars, and copy packs into Dockerfile. Update READMEs to fix dataset name references. Reduces startup time and ensures essential packs are always available locally.

Dockerfile CHANGED
@@ -21,6 +21,11 @@ RUN pip install --no-cache-dir -r requirements.txt \
21
  # Copy the warbler_cda module
22
  COPY warbler_cda/ ./warbler_cda/
23
 
 
 
 
 
 
24
  # Copy server startup script
25
  COPY start_server.py ./
26
 
 
21
  # Copy the warbler_cda module
22
  COPY warbler_cda/ ./warbler_cda/
23
 
24
+ # Copy committed packs that stay in repo (per .gitignore)
25
+ COPY packs/warbler-pack-core/ ./packs/warbler-pack-core/
26
+ COPY packs/warbler-pack-faction-politics/ ./packs/warbler-pack-faction-politics/
27
+ COPY packs/warbler-pack-wisdom-scrolls/ ./packs/warbler-pack-wisdom-scrolls/
28
+
29
  # Copy server startup script
30
  COPY start_server.py ./
31
 
app.py CHANGED
@@ -26,8 +26,12 @@ from warbler_cda.pack_loader import PackLoader
26
  print("🚀 Initializing Warbler CDA...")
27
 
28
  # Initialize the system components
29
- print("⚙️ Creating embedding provider...")
30
- embedding_provider = EmbeddingProviderFactory.get_default_provider()
 
 
 
 
31
  print(f"✅ Embedding provider: {embedding_provider.get_provider_info()['provider_id']}")
32
 
33
  print("⚙️ Initializing semantic anchors...")
@@ -56,15 +60,31 @@ documents = pack_loader.discover_documents()
56
  if len(documents) == 0:
57
  print("⚠️ No packs found locally. Attempting to download from HuggingFace...")
58
  try:
 
 
 
 
59
  from warbler_cda.utils.hf_warbler_ingest import HFWarblerIngestor
60
  ingestor = HFWarblerIngestor(packs_dir=pack_loader.packs_dir, verbose=True)
61
- # Download all required datasets for deployment
 
 
 
62
  datasets_to_download = [
63
  "arxiv-1", "arxiv-2", "arxiv-3", "arxiv-4", "arxiv-5", # First 5 arxiv chunks
64
- "novels", "manuals", "enterprise", "edustories", "npc-dialogue", "wisdom-scrolls",
65
- "faction-politics", "portuguese-edu", "prompt-report"
66
  ]
67
 
 
 
 
 
 
 
 
 
 
68
  total_docs = 0
69
  successful_downloads = 0
70
 
 
26
  print("🚀 Initializing Warbler CDA...")
27
 
28
  # Initialize the system components
29
+ print("⚙️ Creating embedding provider with 4 workers...")
30
+ embedding_provider = EmbeddingProviderFactory.create_provider("sentence_transformer", {
31
+ "num_workers": 4,
32
+ "batch_size": 64, # Larger batches for better throughput
33
+ "cache_dir": ".embedding_cache"
34
+ })
35
  print(f"✅ Embedding provider: {embedding_provider.get_provider_info()['provider_id']}")
36
 
37
  print("⚙️ Initializing semantic anchors...")
 
60
  if len(documents) == 0:
61
  print("⚠️ No packs found locally. Attempting to download from HuggingFace...")
62
  try:
63
+ # Suppress HF datasets progress bars for cleaner output
64
+ import os
65
+ os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
66
+
67
  from warbler_cda.utils.hf_warbler_ingest import HFWarblerIngestor
68
  ingestor = HFWarblerIngestor(packs_dir=pack_loader.packs_dir, verbose=True)
69
+ # First, try to load packs that are committed to the repo (not HF datasets)
70
+ local_only_packs = ["warbler-pack-faction-politics", "warbler-pack-wisdom-scrolls"]
71
+
72
+ # Then download HF datasets
73
  datasets_to_download = [
74
  "arxiv-1", "arxiv-2", "arxiv-3", "arxiv-4", "arxiv-5", # First 5 arxiv chunks
75
+ "novels", "manuals", "enterprise", "edustories", "npc-dialogue",
76
+ "portuguese-edu", "prompt-report"
77
  ]
78
 
79
+ # Check if local packs exist and should be loaded
80
+ local_pack_count = 0
81
+ for pack_name in local_only_packs:
82
+ pack_path = pack_loader.packs_dir / pack_name
83
+ if pack_path.exists():
84
+ print(f"📚 Loading committed pack: {pack_name}")
85
+ # These are already in the repo, so they should be discoverable
86
+ local_pack_count += 1
87
+
88
  total_docs = 0
89
  successful_downloads = 0
90
 
packs/warbler-pack-core/README_HF_DATASET.md CHANGED
@@ -70,7 +70,7 @@ Part of **Warbler CDA** (Cognitive Development Architecture) - a production-read
70
 
71
  - [warbler-pack-faction-politics](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-faction-politics) - Political intrigue templates
72
  - [warbler-pack-wisdom-scrolls](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-wisdom-scrolls) - Wisdom generation templates
73
- - [warbler-pack-hf-npc-dialogue](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-hf-npc-dialogue) - NPC dialogue from HuggingFace sources
74
 
75
  ## License
76
 
 
70
 
71
  - [warbler-pack-faction-politics](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-faction-politics) - Political intrigue templates
72
  - [warbler-pack-wisdom-scrolls](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-wisdom-scrolls) - Wisdom generation templates
73
+ - [warbler-pack-npc-dialogue](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-npc-dialogue) - NPC dialogue from HuggingFace sources
74
 
75
  ## License
76
 
packs/warbler-pack-faction-politics/README_HF_DATASET.md CHANGED
@@ -81,7 +81,7 @@ Part of **Warbler CDA** (Cognitive Development Architecture) - a production-read
81
 
82
  - [warbler-pack-core](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-core) - Core conversation templates
83
  - [warbler-pack-wisdom-scrolls](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-wisdom-scrolls) - Wisdom generation templates
84
- - [warbler-pack-hf-npc-dialogue](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-hf-npc-dialogue) - NPC dialogue from HuggingFace sources
85
 
86
  ## License
87
 
 
81
 
82
  - [warbler-pack-core](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-core) - Core conversation templates
83
  - [warbler-pack-wisdom-scrolls](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-wisdom-scrolls) - Wisdom generation templates
84
+ - [warbler-pack-npc-dialogue](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-npc-dialogue) - NPC dialogue from HuggingFace sources
85
 
86
  ## License
87
 
packs/{warbler-pack-hf-npc-dialogue → warbler-pack-npc-dialogue}/package.json RENAMED
@@ -1,10 +1,10 @@
1
  {
2
- "name": "warbler-pack-hf-npc-dialogue",
3
  "version": "1.0.0",
4
- "description": "Warbler pack generated from HuggingFace datasets",
5
  "created_at": "2025-11-19T21:24:41.170415",
6
  "document_count": 1000,
7
- "source": "HuggingFace",
8
  "content_types": [
9
  "character_interaction"
10
  ],
 
1
  {
2
+ "name": "warbler-pack-npc-dialogue",
3
  "version": "1.0.0",
4
+ "description": "Warbler pack generated by Copilot. Contains NPC dialogue data for in-game characters.",
5
  "created_at": "2025-11-19T21:24:41.170415",
6
  "document_count": 1000,
7
+ "source": "Warbler CDA Ingestor",
8
  "content_types": [
9
  "character_interaction"
10
  ],
packs/warbler-pack-npc-dialogue/warbler-pack-npc-dialogue.jsonl ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"content":"The weather has been quite unusual lately in {{location}}. Some say it’s a sign of change.","content_id":"warbler-pack-core/flavor_weather","metadata":{"dialogue_type":"template","title":"Weather Flavor","description":"NPC comments on local weather conditions","pack":"warbler-pack-core","type":"template"}}
2
+ {"content":"Have you heard the latest rumors? They say something strange is happening near the {{landmark}}.","content_id":"warbler-pack-core/rumor_hook","metadata":{"dialogue_type":"template","title":"Rumor Hook","description":"NPC shares a rumor to spark curiosity","pack":"warbler-pack-core","type":"template"}}
3
+ {"content":"I’ve lived in {{location}} my whole life. It has its charms, but also its secrets.","content_id":"warbler-pack-core/local_lore","metadata":{"dialogue_type":"template","title":"Local Lore","description":"NPC shares a bit of local history or mystery","pack":"warbler-pack-core","type":"template"}}
4
+ {"content":"You look like you’ve traveled far, {{user_name}}. Care to share a tale from your journey?","content_id":"warbler-pack-core/traveler_prompt","metadata":{"dialogue_type":"template","title":"Traveler Prompt","description":"NPC invites the player to share their story","pack":"warbler-pack-core","type":"template"}}
5
+ {"content":"Be careful out there. The roads beyond {{location}} aren’t as safe as they used to be.","content_id":"warbler-pack-core/warning_general","metadata":{"dialogue_type":"template","title":"General Warning","description":"NPC warns the player about dangers ahead","pack":"warbler-pack-core","type":"template"}}
6
+ {"content":"Sometimes I wonder what lies beyond the {{location_type}}. Perhaps adventure, perhaps danger.","content_id":"warbler-pack-core/philosophical_idle","metadata":{"dialogue_type":"template","title":"Philosophical Idle","description":"NPC muses idly about the world","pack":"warbler-pack-core","type":"template"}}
7
+ {"content":"If you’re looking for work, I heard {{npc_name}} is seeking help with a task.","content_id":"warbler-pack-core/quest_hint","metadata":{"dialogue_type":"template","title":"Quest Hint","description":"NPC hints at a possible quest giver","pack":"warbler-pack-core","type":"template"}}
8
+ {"content":"Oh, pardon me! I didn’t mean to bump into you. These streets get crowded at {{time_of_day}}.","content_id":"warbler-pack-core/incidental_encounter","metadata":{"dialogue_type":"template","title":"Incidental Encounter","description":"NPC apologizes or reacts to a casual collision","pack":"warbler-pack-core","type":"template"}}
9
+ {"content":"I’m just passing the time. Not much happens around here, but it’s peaceful.","content_id":"warbler-pack-core/idle_chatter","metadata":{"dialogue_type":"template","title":"Idle Chatter","description":"NPC filler dialogue for downtime moments","pack":"warbler-pack-core","type":"template"}}
10
+ {"content":"Strange noises have been coming from the {{location_type}} at night. Gives me chills just thinking about it.","content_id":"warbler-pack-core/spooky_flavor","metadata":{"dialogue_type":"template","title":"Spooky Flavor","description":"NPC shares eerie environmental detail","pack":"warbler-pack-core","type":"template"}}
11
+ {"content":"The market in {{location}} has been bustling lately. Merchants from far and wide bring their wares.","content_id":"warbler-pack-core/flavor_market","metadata":{"dialogue_type":"template","title":"Market Flavor","description":"NPC comments on the busy marketplace","pack":"warbler-pack-core","type":"template"}}
12
+ {"content":"I can’t shake the feeling that someone’s been watching the {{location_type}} at night.","content_id":"warbler-pack-core/suspicion_idle","metadata":{"dialogue_type":"template","title":"Suspicious Idle","description":"NPC expresses unease about strange happenings","pack":"warbler-pack-core","type":"template"}}
13
+ {"content":"You remind me of someone I once knew, {{user_name}}. Strong spirit, determined eyes.","content_id":"warbler-pack-core/personal_connection","metadata":{"dialogue_type":"template","title":"Personal Connection","description":"NPC draws a personal parallel with the player","pack":"warbler-pack-core","type":"template"}}
14
+ {"content":"If you’re seeking adventure, the {{landmark}} is said to hold treasures and dangers alike.","content_id":"warbler-pack-core/adventure_hook","metadata":{"dialogue_type":"template","title":"Adventure Hook","description":"NPC hints at exploration opportunities","pack":"warbler-pack-core","type":"template"}}
15
+ {"content":"The children of {{location}} love to play near the {{landmark}}. It’s a joyful sight.","content_id":"warbler-pack-core/flavor_children","metadata":{"dialogue_type":"template","title":"Children Flavor","description":"NPC shares a wholesome local detail","pack":"warbler-pack-core","type":"template"}}
16
+ {"content":"I’ve seen many travelers pass through here, but few with your confidence, {{user_title}}.","content_id":"warbler-pack-core/traveler_observation","metadata":{"dialogue_type":"template","title":"Traveler Observation","description":"NPC remarks on the player’s presence","pack":"warbler-pack-core","type":"template"}}
17
+ {"content":"The {{npc_role}} has been looking for capable hands. Perhaps you should pay them a visit.","content_id":"warbler-pack-core/quest_pointer","metadata":{"dialogue_type":"template","title":"Quest Pointer","description":"NPC directs player toward a quest giver","pack":"warbler-pack-core","type":"template"}}
18
+ {"content":"Ah, the smell of fresh bread from the bakery in {{location}} always lifts my spirits.","content_id":"warbler-pack-core/flavor_food","metadata":{"dialogue_type":"template","title":"Food Flavor","description":"NPC comments on local food or drink","pack":"warbler-pack-core","type":"template"}}
19
+ {"content":"Stay vigilant, {{user_name}}. Trouble often hides where you least expect it.","content_id":"warbler-pack-core/general_encouragement","metadata":{"dialogue_type":"template","title":"General Encouragement","description":"NPC offers encouragement with a warning","pack":"warbler-pack-core","type":"template"}}
20
+ {"content":"I’ve heard songs sung about heroes like you. Perhaps one day, they’ll sing of your deeds.","content_id":"warbler-pack-core/heroic_flavor","metadata":{"dialogue_type":"template","title":"Heroic Flavor","description":"NPC elevates the player with mythic resonance","pack":"warbler-pack-core","type":"template"}}
21
+ {"content":"Care to test your skills? I’ve sparred with many, and I’d welcome the challenge.","content_id":"warbler-pack-core/combat_challenge","metadata":{"dialogue_type":"template","title":"Combat Challenge","description":"NPC invites the player to spar or fight","pack":"warbler-pack-core","type":"template"}}
22
+ {"content":"These roads are long and lonely. A bit of conversation makes the journey lighter, don’t you think?","content_id":"warbler-pack-core/travel_flavor","metadata":{"dialogue_type":"template","title":"Travel Flavor","description":"NPC shares a thought about travel","pack":"warbler-pack-core","type":"template"}}
23
+ {"content":"The {{location_type}} has been quiet lately. Too quiet, if you ask me.","content_id":"warbler-pack-core/flavor_quiet","metadata":{"dialogue_type":"template","title":"Quiet Flavor","description":"NPC comments on eerie silence","pack":"warbler-pack-core","type":"template"}}
24
+ {"content":"I once dreamed of leaving {{location}} behind, but life had other plans.","content_id":"warbler-pack-core/personal_idle","metadata":{"dialogue_type":"template","title":"Personal Idle","description":"NPC shares a personal reflection","pack":"warbler-pack-core","type":"template"}}
25
+ {"content":"If you’re trading, I’ve got rare goods from {{region}}. Interested?","content_id":"warbler-pack-core/trade_offer","metadata":{"dialogue_type":"template","title":"Trade Offer","description":"NPC offers specific trade goods","pack":"warbler-pack-core","type":"template"}}
26
+ {"content":"Legends say the {{landmark}} was built by hands not of this world.","content_id":"warbler-pack-core/legend_flavor","metadata":{"dialogue_type":"template","title":"Legend Flavor","description":"NPC shares a myth or legend","pack":"warbler-pack-core","type":"template"}}
27
+ {"content":"I’m just enjoying the sunshine. Days like this remind me why I love {{location}}.","content_id":"warbler-pack-core/flavor_sunshine","metadata":{"dialogue_type":"template","title":"Sunshine Flavor","description":"NPC comments on pleasant weather","pack":"warbler-pack-core","type":"template"}}
28
+ {"content":"You seem burdened, {{user_name}}. Is there something weighing on your mind?","content_id":"warbler-pack-core/empathy_prompt","metadata":{"dialogue_type":"template","title":"Empathy Prompt","description":"NPC expresses concern for the player","pack":"warbler-pack-core","type":"template"}}
29
+ {"content":"The bells of {{location}} ring at dawn and dusk. It’s a tradition we hold dear.","content_id":"warbler-pack-core/flavor_tradition","metadata":{"dialogue_type":"template","title":"Tradition Flavor","description":"NPC shares a cultural detail","pack":"warbler-pack-core","type":"template"}}
30
+ {"content":"I’m not much for adventure, but I admire those who seek it.","content_id":"warbler-pack-core/admiration_idle","metadata":{"dialogue_type":"template","title":"Admiration Idle","description":"NPC expresses admiration for adventurers","pack":"warbler-pack-core","type":"template"}}
packs/warbler-pack-wisdom-scrolls/README_HF_DATASET.md CHANGED
@@ -110,7 +110,7 @@ Part of **Warbler CDA** (Cognitive Development Architecture) and the **Living De
110
 
111
  - [warbler-pack-core](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-core) - Core conversation templates
112
  - [warbler-pack-faction-politics](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-faction-politics) - Political dialogue templates
113
- - [warbler-pack-hf-npc-dialogue](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-hf-npc-dialogue) - NPC dialogue from HuggingFace sources
114
 
115
  ## License
116
 
 
110
 
111
  - [warbler-pack-core](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-core) - Core conversation templates
112
  - [warbler-pack-faction-politics](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-faction-politics) - Political dialogue templates
113
+ - [warbler-pack-npc-dialogue](https://huggingface.co/datasets/tiny-walnut-games/warbler-pack-npc-dialogue) - NPC dialogue from HuggingFace sources
114
 
115
  ## License
116
 
warbler_cda/embeddings/sentence_transformer_provider.py CHANGED
@@ -33,6 +33,8 @@ class SentenceTransformerEmbeddingProvider(EmbeddingProvider):
33
  else model_name_default
34
  )
35
  self.batch_size: int = config.get("batch_size", 32) if config else 32
 
 
36
  cache_dir_default = ".embedding_cache"
37
  self.cache_dir: str = (
38
  config.get("cache_dir", cache_dir_default) if config else cache_dir_default
@@ -94,7 +96,7 @@ class SentenceTransformerEmbeddingProvider(EmbeddingProvider):
94
  def embed_batch(
95
  self, texts: List[str], show_progress: bool = False
96
  ) -> List[List[float]]:
97
- """Generate embeddings for multiple texts with batching and caching."""
98
  # Check model initialization first, before processing
99
  if texts and self.model is None:
100
  raise RuntimeError("Model not initialized. Call _initialize_model first.")
@@ -125,11 +127,12 @@ class SentenceTransformerEmbeddingProvider(EmbeddingProvider):
125
  raise ValueError("Model is not an instance of SentenceTransformer")
126
  elif SentenceTransformer is None:
127
  raise RuntimeError("SentenceTransformer not available but model is set")
128
- batch_embeddings: Any = self.model.encode(
 
 
129
  texts_to_embed,
130
  batch_size=self.batch_size,
131
- convert_to_tensor=False,
132
- show_progress_bar=show_progress,
133
  )
134
 
135
  for idx, batch_idx in enumerate(indices_to_embed):
@@ -150,6 +153,86 @@ class SentenceTransformerEmbeddingProvider(EmbeddingProvider):
150
 
151
  return result
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  def semantic_search(
154
  self, query_text: str, embeddings: List[List[float]], top_k: int = 5
155
  ) -> List[Tuple[int, float]]:
 
33
  else model_name_default
34
  )
35
  self.batch_size: int = config.get("batch_size", 32) if config else 32
36
+ # Add worker configuration for parallelism
37
+ self.num_workers: int = config.get("num_workers", 4) if config else 4
38
  cache_dir_default = ".embedding_cache"
39
  self.cache_dir: str = (
40
  config.get("cache_dir", cache_dir_default) if config else cache_dir_default
 
96
  def embed_batch(
97
  self, texts: List[str], show_progress: bool = False
98
  ) -> List[List[float]]:
99
+ """Generate embeddings for multiple texts with batching, caching, and multi-worker parallelism."""
100
  # Check model initialization first, before processing
101
  if texts and self.model is None:
102
  raise RuntimeError("Model not initialized. Call _initialize_model first.")
 
127
  raise ValueError("Model is not an instance of SentenceTransformer")
128
  elif SentenceTransformer is None:
129
  raise RuntimeError("SentenceTransformer not available but model is set")
130
+
131
+ # Use multi-worker processing for large batches
132
+ batch_embeddings: Any = self._encode_with_workers(
133
  texts_to_embed,
134
  batch_size=self.batch_size,
135
+ show_progress=show_progress
 
136
  )
137
 
138
  for idx, batch_idx in enumerate(indices_to_embed):
 
153
 
154
  return result
155
 
156
+ def _encode_with_workers(self, texts: List[str], batch_size: int = 32, show_progress: bool = False) -> Any:
157
+ """Encode texts using multiple workers when beneficial."""
158
+ total_texts = len(texts)
159
+
160
+ # Only use multi-worker processing for large batches to avoid overhead
161
+ if total_texts < 100 or self.num_workers == 1:
162
+ # Use standard single-threaded encoding for small batches
163
+ return self.model.encode(
164
+ texts,
165
+ batch_size=batch_size,
166
+ convert_to_tensor=False,
167
+ show_progress_bar=show_progress,
168
+ )
169
+
170
+ # Split texts into chunks for parallel processing
171
+ import threading
172
+ import queue
173
+
174
+ chunk_size = max(1, total_texts // self.num_workers)
175
+ text_chunks = [texts[i:i + chunk_size] for i in range(0, total_texts, chunk_size)]
176
+
177
+ results_queue: queue.Queue = queue.Queue()
178
+ embeddings_results = [None] * len(text_chunks)
179
+
180
+ def worker_encode(chunk_idx: int, chunk_texts: List[str]):
181
+ """Worker function for encoding text chunks."""
182
+ try:
183
+ chunk_embeddings = self.model.encode(
184
+ chunk_texts,
185
+ batch_size=batch_size,
186
+ convert_to_tensor=False,
187
+ show_progress_bar=False, # Disable individual progress bars
188
+ )
189
+ results_queue.put((chunk_idx, chunk_embeddings))
190
+ except Exception as e:
191
+ results_queue.put((chunk_idx, e))
192
+
193
+ # Start worker threads
194
+ threads = []
195
+ for i, chunk in enumerate(text_chunks):
196
+ thread = threading.Thread(
197
+ target=worker_encode,
198
+ args=(i, chunk),
199
+ daemon=True
200
+ )
201
+ threads.append(thread)
202
+ thread.start()
203
+
204
+ # Collect results
205
+ completed_workers = 0
206
+ while completed_workers < len(text_chunks):
207
+ try:
208
+ chunk_idx, result = results_queue.get(timeout=1.0)
209
+ if isinstance(result, Exception):
210
+ raise result
211
+ embeddings_results[chunk_idx] = result
212
+ completed_workers += 1
213
+
214
+ if show_progress:
215
+ print(f"Worker {chunk_idx + 1}/{len(text_chunks)} completed ({completed_workers}/{len(text_chunks)})")
216
+
217
+ except queue.Empty:
218
+ # Check if all threads are still alive
219
+ if not any(t.is_alive() for t in threads):
220
+ break
221
+ continue
222
+
223
+ # Wait for all threads to complete
224
+ for thread in threads:
225
+ thread.join()
226
+
227
+ # Combine results in original order
228
+ final_embeddings = []
229
+ for embeddings in embeddings_results:
230
+ if embeddings is None:
231
+ raise RuntimeError("Worker thread failed to complete")
232
+ final_embeddings.extend(embeddings)
233
+
234
+ return final_embeddings
235
+
236
  def semantic_search(
237
  self, query_text: str, embeddings: List[List[float]], top_k: int = 5
238
  ) -> List[Tuple[int, float]]: