Spaces:

mycompanyajt
/

inference

Sleeping

App Files Files Community

nurulajt commited on 24 days ago

Commit

fd3e04f

verified ·

1 Parent(s): cc89204

Update api.py

Browse files

Files changed (1) hide show

api.py +60 -9

api.py CHANGED Viewed

@@ -99,6 +99,12 @@ async def verify_api_key(credentials: Optional[HTTPAuthorizationCredentials] = S
     return True
 @app.on_event("startup")
 async def startup_event():
     load_models()
@@ -113,6 +119,22 @@ class ElasticsearchInferenceRequest(BaseModel):
             }
         }
 class ElasticsearchInferenceResponse(BaseModel):
     embedding: List[float] = Field(..., description="Embedding vector for single input")
@@ -172,7 +194,7 @@ async def health():
         "api_key_required": REQUIRE_API_KEY
     }
-@app.post("/embed", response_model=Union[ElasticsearchInferenceResponse, ElasticsearchInferenceBatchResponse])
 async def create_embeddings_elasticsearch(
     request: ElasticsearchInferenceRequest,
     model: str = Query("jobbertv3", description="Model: jobbertv2, jobbertv3, jina, or voyage"),
@@ -224,10 +246,21 @@ async def create_embeddings_elasticsearch(
             )
             embeddings = result.embeddings
-            if is_single:
-                return ElasticsearchInferenceResponse(embedding=embeddings[0])
-            else:
-                return ElasticsearchInferenceBatchResponse(embeddings=embeddings)
         except Exception as e:
             raise HTTPException(status_code=500, detail=f"Voyage AI error: {str(e)}")
@@ -249,10 +282,28 @@ async def create_embeddings_elasticsearch(
             embeddings_list = embeddings.tolist()
-            if is_single:
-                return ElasticsearchInferenceResponse(embedding=embeddings_list[0])
-            else:
-                return ElasticsearchInferenceBatchResponse(embeddings=embeddings_list)
         except Exception as e:
             raise HTTPException(status_code=500, detail=f"Model error: {str(e)}")

     return True
+def estimate_token_count(texts: List[str]) -> int:
+    """Estimate token count for input texts (rough approximation)"""
+    # Simple estimation: ~1 token per 4 characters
+    total_chars = sum(len(text) for text in texts)
+    return max(1, total_chars // 4)
 @app.on_event("startup")
 async def startup_event():
     load_models()
             }
         }
+class EmbeddingObject(BaseModel):
+    object: str = Field("embedding", description="Object type")
+    index: int = Field(..., description="Index of the embedding")
+    embedding: List[float] = Field(..., description="Embedding vector")
+class UsageInfo(BaseModel):
+    total_tokens: int = Field(..., description="Total tokens processed")
+    prompt_tokens: int = Field(..., description="Prompt tokens")
+class OpenAIEmbeddingResponse(BaseModel):
+    model: str = Field(..., description="Model used for embeddings")
+    object: str = Field("list", description="Object type")
+    usage: UsageInfo = Field(..., description="Token usage information")
+    data: List[EmbeddingObject] = Field(..., description="List of embeddings")
+# Legacy response models (kept for backward compatibility if needed)
 class ElasticsearchInferenceResponse(BaseModel):
     embedding: List[float] = Field(..., description="Embedding vector for single input")
         "api_key_required": REQUIRE_API_KEY
     }
+@app.post("/embed", response_model=OpenAIEmbeddingResponse)
 async def create_embeddings_elasticsearch(
     request: ElasticsearchInferenceRequest,
     model: str = Query("jobbertv3", description="Model: jobbertv2, jobbertv3, jina, or voyage"),
             )
             embeddings = result.embeddings
+            # Calculate token usage
+            token_count = estimate_token_count(texts)
+            # Create OpenAI-compatible response
+            data = [
+                EmbeddingObject(index=i, embedding=emb)
+                for i, emb in enumerate(embeddings)
+            ]
+            return OpenAIEmbeddingResponse(
+                model="voyage-3",
+                object="list",
+                usage=UsageInfo(total_tokens=token_count, prompt_tokens=token_count),
+                data=data
+            )
         except Exception as e:
             raise HTTPException(status_code=500, detail=f"Voyage AI error: {str(e)}")
             embeddings_list = embeddings.tolist()
+            # Calculate token usage
+            token_count = estimate_token_count(texts)
+            # Create OpenAI-compatible response
+            data = [
+                EmbeddingObject(index=i, embedding=emb)
+                for i, emb in enumerate(embeddings_list)
+            ]
+            # Determine the full model name for response
+            model_display_name = {
+                "jobbertv2": "TechWolf/JobBERT-v2",
+                "jobbertv3": "TechWolf/JobBERT-v3",
+                "jina": "jina-embeddings-v3"
+            }.get(model_name, model_name)
+            return OpenAIEmbeddingResponse(
+                model=model_display_name,
+                object="list",
+                usage=UsageInfo(total_tokens=token_count, prompt_tokens=token_count),
+                data=data
+            )
         except Exception as e:
             raise HTTPException(status_code=500, detail=f"Model error: {str(e)}")