nurulajt commited on
Commit
c240d75
·
verified ·
1 Parent(s): 31f5cc4

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +97 -4
api.py CHANGED
@@ -33,6 +33,7 @@ app.add_middleware(
33
  MODELS = {}
34
  VOYAGE_API_KEY = os.environ.get('VOYAGE_API_KEY', '')
35
  FIREWORKS_API_KEY = os.environ.get('FIREWORKS_API_KEY', '')
 
36
  API_KEY = os.environ.get('API_KEY', '')
37
  REQUIRE_API_KEY = os.environ.get('REQUIRE_API_KEY', 'false').lower() == 'true'
38
 
@@ -48,6 +49,7 @@ for cache_dir in [os.environ['TRANSFORMERS_CACHE'], os.environ['HF_HOME'], os.en
48
  security = HTTPBearer(auto_error=False)
49
  voyage_client = None
50
  fireworks_available = False
 
51
 
52
  logger.info(f"API Key authentication: {'ENABLED' if REQUIRE_API_KEY else 'DISABLED'}")
53
  if API_KEY:
@@ -88,6 +90,17 @@ if FIREWORKS_API_KEY:
88
  # Still mark as available if key is set
89
  fireworks_available = True if FIREWORKS_API_KEY else False
90
 
 
 
 
 
 
 
 
 
 
 
 
91
  def load_models():
92
  """Load embedding models on startup (gracefully handles failures)"""
93
 
@@ -115,14 +128,17 @@ def load_models():
115
  except Exception as e:
116
  logger.warning(f"⚠️ Jina AI v3 not loaded: {e}")
117
 
118
- # Qwen3-Embedding-8B via Fireworks AI (API-based, no download needed!)
119
  if fireworks_available:
120
  MODELS['qwen3'] = 'fireworks' # Mark as available via Fireworks AI
121
  logger.info("✓ Qwen3-Embedding-8B available via Fireworks AI API (MTEB #1, no local model needed)")
 
 
 
122
  else:
123
  logger.warning("⚠️ Qwen3-Embedding-8B not available")
124
- logger.warning(" To enable: Set FIREWORKS_API_KEY environment variable")
125
- logger.warning(" Get API key at: https://fireworks.ai")
126
  logger.warning(" This avoids 15GB local download!")
127
 
128
  # Check if at least one model loaded
@@ -205,6 +221,44 @@ def get_fireworks_embeddings(texts: List[str], task: Optional[str] = None) -> Li
205
 
206
  return embeddings
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  @app.on_event("startup")
209
  async def startup_event():
210
  load_models()
@@ -267,6 +321,7 @@ class HealthResponse(BaseModel):
267
  models_loaded: List[str]
268
  voyage_available: bool
269
  fireworks_available: bool
 
270
  api_key_required: bool
271
 
272
  @app.get("/", response_model=dict)
@@ -293,6 +348,7 @@ async def health():
293
  "models_loaded": models_loaded,
294
  "voyage_available": voyage_client is not None,
295
  "fireworks_available": fireworks_available,
 
296
  "api_key_required": REQUIRE_API_KEY
297
  }
298
 
@@ -315,7 +371,8 @@ async def create_embeddings_elasticsearch(
315
  - `jobbertv2`: JobBERT-v2 (768-dim, job-specific)
316
  - `jobbertv3`: JobBERT-v3 (768-dim, job-specific, improved performance) - default
317
  - `jina`: Jina AI embeddings-v3 (1024-dim, general purpose)
318
- - `qwen3`: Qwen3-Embedding-8B (4096-dim, MTEB #1, multilingual, 32k context)
 
319
  - `voyage`: Voyage AI (1024-dim, requires API key)
320
 
321
  **Jina AI Tasks (via query parameter):**
@@ -371,6 +428,36 @@ async def create_embeddings_elasticsearch(
371
  except Exception as e:
372
  raise HTTPException(status_code=500, detail=f"Voyage AI error: {str(e)}")
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  elif model_name in MODELS:
375
  try:
376
  selected_model = MODELS[model_name]
@@ -378,6 +465,9 @@ async def create_embeddings_elasticsearch(
378
  # Qwen3 via Fireworks AI API (no local model)
379
  if model_name == "qwen3" and selected_model == 'fireworks':
380
  embeddings_list = get_fireworks_embeddings(texts, task=task)
 
 
 
381
  # Jina AI with task type
382
  elif model_name == "jina" and task:
383
  embeddings = selected_model.encode(
@@ -486,6 +576,9 @@ async def create_embeddings_batch(
486
  # Qwen3 via Fireworks AI API (no local model)
487
  if model_name == "qwen3" and selected_model == 'fireworks':
488
  embeddings_list = get_fireworks_embeddings(request.texts, task=request.task)
 
 
 
489
  # Jina AI with task type
490
  elif model_name == "jina" and request.task:
491
  embeddings = selected_model.encode(
 
33
  MODELS = {}
34
  VOYAGE_API_KEY = os.environ.get('VOYAGE_API_KEY', '')
35
  FIREWORKS_API_KEY = os.environ.get('FIREWORKS_API_KEY', '')
36
+ OPENROUTER_API_KEY = os.environ.get('OPENROUTER_API_KEY', '')
37
  API_KEY = os.environ.get('API_KEY', '')
38
  REQUIRE_API_KEY = os.environ.get('REQUIRE_API_KEY', 'false').lower() == 'true'
39
 
 
49
  security = HTTPBearer(auto_error=False)
50
  voyage_client = None
51
  fireworks_available = False
52
+ openrouter_available = False
53
 
54
  logger.info(f"API Key authentication: {'ENABLED' if REQUIRE_API_KEY else 'DISABLED'}")
55
  if API_KEY:
 
90
  # Still mark as available if key is set
91
  fireworks_available = True if FIREWORKS_API_KEY else False
92
 
93
+ if OPENROUTER_API_KEY:
94
+ try:
95
+ import requests
96
+ openrouter_available = True
97
+ logger.info("✓ OpenRouter API key configured (Qwen3, text-embedding-3-small, and more available)")
98
+ except ImportError:
99
+ logger.warning("⚠️ requests package not installed (needed for OpenRouter)")
100
+ except Exception as e:
101
+ logger.warning(f"⚠️ OpenRouter validation failed: {e}")
102
+ openrouter_available = True if OPENROUTER_API_KEY else False
103
+
104
  def load_models():
105
  """Load embedding models on startup (gracefully handles failures)"""
106
 
 
128
  except Exception as e:
129
  logger.warning(f"⚠️ Jina AI v3 not loaded: {e}")
130
 
131
+ # Qwen3-Embedding-8B via Fireworks AI or OpenRouter (API-based, no download needed!)
132
  if fireworks_available:
133
  MODELS['qwen3'] = 'fireworks' # Mark as available via Fireworks AI
134
  logger.info("✓ Qwen3-Embedding-8B available via Fireworks AI API (MTEB #1, no local model needed)")
135
+ elif openrouter_available:
136
+ MODELS['qwen3'] = 'openrouter' # Mark as available via OpenRouter
137
+ logger.info("✓ Qwen3-Embedding-8B available via OpenRouter API (MTEB #1, no local model needed)")
138
  else:
139
  logger.warning("⚠️ Qwen3-Embedding-8B not available")
140
+ logger.warning(" To enable: Set FIREWORKS_API_KEY or OPENROUTER_API_KEY environment variable")
141
+ logger.warning(" Fireworks: https://fireworks.ai | OpenRouter: https://openrouter.ai")
142
  logger.warning(" This avoids 15GB local download!")
143
 
144
  # Check if at least one model loaded
 
221
 
222
  return embeddings
223
 
224
+ def get_openrouter_embeddings(texts: List[str], model: str = "qwen/qwen3-embedding-8b") -> List[List[float]]:
225
+ """
226
+ Get embeddings from OpenRouter API
227
+
228
+ Args:
229
+ texts: List of texts to embed
230
+ model: Model to use (default: qwen/qwen3-embedding-8b)
231
+ Also supports: openai/text-embedding-3-small, openai/text-embedding-3-large
232
+
233
+ Returns:
234
+ List of embedding vectors
235
+ """
236
+ import requests
237
+
238
+ if not OPENROUTER_API_KEY:
239
+ raise Exception("OPENROUTER_API_KEY not configured")
240
+
241
+ response = requests.post(
242
+ "https://openrouter.ai/api/v1/embeddings",
243
+ headers={
244
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
245
+ "Content-Type": "application/json"
246
+ },
247
+ json={
248
+ "model": model,
249
+ "input": texts
250
+ },
251
+ timeout=30
252
+ )
253
+
254
+ if response.status_code != 200:
255
+ raise Exception(f"OpenRouter API error: {response.status_code} - {response.text}")
256
+
257
+ result = response.json()
258
+ embeddings = [item["embedding"] for item in result["data"]]
259
+
260
+ return embeddings
261
+
262
  @app.on_event("startup")
263
  async def startup_event():
264
  load_models()
 
321
  models_loaded: List[str]
322
  voyage_available: bool
323
  fireworks_available: bool
324
+ openrouter_available: bool
325
  api_key_required: bool
326
 
327
  @app.get("/", response_model=dict)
 
348
  "models_loaded": models_loaded,
349
  "voyage_available": voyage_client is not None,
350
  "fireworks_available": fireworks_available,
351
+ "openrouter_available": openrouter_available,
352
  "api_key_required": REQUIRE_API_KEY
353
  }
354
 
 
371
  - `jobbertv2`: JobBERT-v2 (768-dim, job-specific)
372
  - `jobbertv3`: JobBERT-v3 (768-dim, job-specific, improved performance) - default
373
  - `jina`: Jina AI embeddings-v3 (1024-dim, general purpose)
374
+ - `qwen3`: Qwen3-Embedding-8B (4096-dim, MTEB #1, multilingual, 32k context, via Fireworks or OpenRouter)
375
+ - `openrouter`: OpenRouter embeddings (supports multiple models, requires API key)
376
  - `voyage`: Voyage AI (1024-dim, requires API key)
377
 
378
  **Jina AI Tasks (via query parameter):**
 
428
  except Exception as e:
429
  raise HTTPException(status_code=500, detail=f"Voyage AI error: {str(e)}")
430
 
431
+ elif model_name == "openrouter":
432
+ if not openrouter_available:
433
+ raise HTTPException(
434
+ status_code=503,
435
+ detail="OpenRouter not available. Set OPENROUTER_API_KEY environment variable."
436
+ )
437
+
438
+ try:
439
+ # Use OpenRouter with specified model or default
440
+ openrouter_model = task or "qwen/qwen3-embedding-8b" # Use task param as model selector
441
+ embeddings_list = get_openrouter_embeddings(texts, model=openrouter_model)
442
+
443
+ # Calculate token usage
444
+ token_count = estimate_token_count(texts)
445
+
446
+ # Create OpenAI-compatible response
447
+ data = [
448
+ EmbeddingObject(index=i, embedding=emb)
449
+ for i, emb in enumerate(embeddings_list)
450
+ ]
451
+
452
+ return OpenAIEmbeddingResponse(
453
+ model=f"openrouter/{openrouter_model}",
454
+ object="list",
455
+ usage=UsageInfo(total_tokens=token_count, prompt_tokens=token_count),
456
+ data=data
457
+ )
458
+ except Exception as e:
459
+ raise HTTPException(status_code=500, detail=f"OpenRouter error: {str(e)}")
460
+
461
  elif model_name in MODELS:
462
  try:
463
  selected_model = MODELS[model_name]
 
465
  # Qwen3 via Fireworks AI API (no local model)
466
  if model_name == "qwen3" and selected_model == 'fireworks':
467
  embeddings_list = get_fireworks_embeddings(texts, task=task)
468
+ # Qwen3 via OpenRouter API
469
+ elif model_name == "qwen3" and selected_model == 'openrouter':
470
+ embeddings_list = get_openrouter_embeddings(texts, model="qwen/qwen3-embedding-8b")
471
  # Jina AI with task type
472
  elif model_name == "jina" and task:
473
  embeddings = selected_model.encode(
 
576
  # Qwen3 via Fireworks AI API (no local model)
577
  if model_name == "qwen3" and selected_model == 'fireworks':
578
  embeddings_list = get_fireworks_embeddings(request.texts, task=request.task)
579
+ # Qwen3 via OpenRouter API
580
+ elif model_name == "qwen3" and selected_model == 'openrouter':
581
+ embeddings_list = get_openrouter_embeddings(request.texts, model="qwen/qwen3-embedding-8b")
582
  # Jina AI with task type
583
  elif model_name == "jina" and request.task:
584
  embeddings = selected_model.encode(