Spaces:

felixbet
/

biobert-emb

Running

felixbet commited on Nov 6

Commit

25bfd3b

verified ·

1 Parent(s): ac7f7f1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,30 +1,39 @@
 import os
-from transformers import BertTokenizer, BertConfig, TFBertModel
 from fastapi import FastAPI
 app = FastAPI()
 MODEL_DIR = os.environ.get("MODEL_DIR", "/app/bert_tf")
-# Guard: create dir if missing; avoid listing non-existent paths
 os.makedirs(MODEL_DIR, exist_ok=True)
-# Probe one level deep only if there are entries
-candidates = [MODEL_DIR]
-try:
-    for x in os.listdir(MODEL_DIR):
-        p = os.path.join(MODEL_DIR, x)
-        if os.path.isdir(p):
-            candidates.append(p)
-except FileNotFoundError:
-    pass
-for d in candidates:
-    if (os.path.isfile(os.path.join(d, "vocab.txt"))
-        and os.path.isfile(os.path.join(d, "config.json"))):
-        MODEL_DIR = d
-        break
-tok  = BertTokenizer(vocab_file=f"{MODEL_DIR}/vocab.txt", do_lower_case=True)
-cfg  = BertConfig.from_json_file(f"{MODEL_DIR}/config.json")
 model= TFBertModel.from_pretrained(MODEL_DIR, from_tf=True, config=cfg)

 import os
 from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import BertTokenizer, BertConfig, TFBertModel
+import tensorflow as tf
 app = FastAPI()
+# start.sh exports this after extraction; keep a fallback for local/dev
 MODEL_DIR = os.environ.get("MODEL_DIR", "/app/bert_tf")
 os.makedirs(MODEL_DIR, exist_ok=True)
+# extra safety: if no vocab here, look 2 levels deep
+if not os.path.isfile(os.path.join(MODEL_DIR, "vocab.txt")):
+    for root, dirs, files in os.walk(MODEL_DIR):
+        if "vocab.txt" in files and "config.json" in files:
+            MODEL_DIR = root
+            break
+print("[app] Using MODEL_DIR:", MODEL_DIR)
+tok  = BertTokenizer(vocab_file=os.path.join(MODEL_DIR, "vocab.txt"), do_lower_case=True)
+cfg  = BertConfig.from_json_file(os.path.join(MODEL_DIR, "config.json"))
 model= TFBertModel.from_pretrained(MODEL_DIR, from_tf=True, config=cfg)
+class EmbReq(BaseModel):
+    input: str
+@app.get("/health")
+def health():
+    return {"ok": True}
+@app.post("/v1/embeddings")
+def emb(req: EmbReq):
+    ids = tok(req.input, return_tensors="tf", truncation=True, max_length=128)
+    out = model(**ids)
+    # [CLS] pooled output
+    vec = out.pooler_output[0].numpy().tolist()
+    return {"embedding": vec, "dim": len(vec)}