Leonardo0711 commited on
Commit
13c5ed6
·
verified ·
1 Parent(s): 1f4abcf

Upload 5 files

Browse files
Files changed (5) hide show
  1. .dockerignore +5 -0
  2. Dockerfile +29 -0
  3. README.md +12 -10
  4. app.py +136 -0
  5. requirements.txt +7 -0
.dockerignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .git
2
+ __pycache__/
3
+ models/
4
+ *.ipynb
5
+ .env
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive \
4
+ PIP_NO_CACHE_DIR=1 \
5
+ HF_HUB_ENABLE_HF_TRANSFER=1 \
6
+ PORT=7860
7
+
8
+ # Dependencias del sistema (mínimas)
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ build-essential curl ca-certificates ffmpeg libgl1 \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ WORKDIR /app
14
+
15
+ # Python deps
16
+ COPY requirements.txt .
17
+ RUN pip install -U pip && pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Código
20
+ COPY app.py ./app.py
21
+ COPY README.md ./README.md
22
+
23
+ EXPOSE 7860
24
+
25
+ # (Opcional) Forzar un modelo más liviano desde Settings → Variables:
26
+ # ENV MODEL_PATTERN=qwen2.5-7b-instruct-q3_k_m-*.gguf
27
+
28
+ # Lanza Uvicorn en el puerto que Spaces define en $PORT
29
+ CMD ["sh","-c","uvicorn app:app --host 0.0.0.0 --port ${PORT}"]
README.md CHANGED
@@ -1,10 +1,12 @@
1
- ---
2
- title: Astrohunters Llm
3
- emoji: 🌖
4
- colorFrom: yellow
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+ ---
2
+ title: Astrohunters LLM API (Docker)
3
+ emoji: 🛰️
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ license: apache-2.0
7
+ ---
8
+
9
+ Endpoints:
10
+ - `GET /healthz`
11
+ - `POST /run_predict` { "prompt": "...", "system": "" }
12
+ - `POST /run_predict_with_url` { "prompt": "...", "url": "https://...", "system": "" }
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os, glob, textwrap
5
+ from pathlib import Path
6
+
7
+ import gradio as gr
8
+ from huggingface_hub import snapshot_download
9
+ from llama_cpp import Llama
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
+
13
+ # ===== Modelo (GGUF) =====
14
+ MODEL_REPO = os.getenv("MODEL_REPO", "Qwen/Qwen2.5-7B-Instruct-GGUF")
15
+ # Para CPU Basic puedes bajar a q3_k_m si te falta RAM
16
+ MODEL_PATTERN = os.getenv("MODEL_PATTERN", "qwen2.5-7b-instruct-q4_k_m-*.gguf")
17
+
18
+ LOCAL_DIR = Path("models"); LOCAL_DIR.mkdir(parents=True, exist_ok=True)
19
+ print(f"[Boot] Descargando {MODEL_REPO} patrón {MODEL_PATTERN} ...")
20
+ snapshot_dir = snapshot_download(repo_id=MODEL_REPO, local_dir=str(LOCAL_DIR),
21
+ allow_patterns=[MODEL_PATTERN])
22
+ candidates = sorted(glob.glob(str(Path(snapshot_dir) / MODEL_PATTERN)))
23
+ if not candidates:
24
+ raise FileNotFoundError(f"No hay shards para {MODEL_PATTERN} en {snapshot_dir}")
25
+ MODEL_PATH = candidates[0]
26
+ print(f"[Boot] Usando shard: {MODEL_PATH}")
27
+
28
+ N_THREADS = max(1, (os.cpu_count() or 2) - 1)
29
+
30
+ llm = Llama(
31
+ model_path=MODEL_PATH,
32
+ n_ctx=4096,
33
+ n_threads=N_THREADS,
34
+ n_batch=256,
35
+ n_gpu_layers=0,
36
+ verbose=False,
37
+ )
38
+
39
+ SYSTEM_DEFAULT = textwrap.dedent("""\
40
+ Eres Astrohunters-Guide, un asistente en español.
41
+ - Respondes con precisión y sin inventar datos.
42
+ - Sabes explicar resultados de exoplanetas (período, duración, profundidad, SNR, radio).
43
+ - Si te paso una URL, lees su contenido y lo usas como contexto.
44
+ """)
45
+
46
+ def fetch_url_text(url: str, max_chars: int = 6000) -> str:
47
+ try:
48
+ r = requests.get(url, timeout=15)
49
+ r.raise_for_status()
50
+ soup = BeautifulSoup(r.text, "html.parser")
51
+ for t in soup(["script", "style", "noscript"]): t.decompose()
52
+ txt = " ".join(soup.get_text(separator=" ").split())
53
+ return txt[:max_chars]
54
+ except Exception as e:
55
+ return f"[No se pudo cargar {url}: {e}]"
56
+
57
+ def run_llm(messages, temperature=0.6, top_p=0.95, max_tokens=768):
58
+ out = llm.create_chat_completion(
59
+ messages=messages,
60
+ temperature=temperature,
61
+ top_p=top_p,
62
+ max_tokens=max_tokens,
63
+ stream=False,
64
+ )
65
+ return out["choices"][0]["message"]["content"].strip()
66
+
67
+ # ====== Funciones API ======
68
+ def api_run_predict(prompt: str, system: str = "") -> str:
69
+ messages = [
70
+ {"role": "system", "content": system or SYSTEM_DEFAULT},
71
+ {"role": "user", "content": prompt},
72
+ ]
73
+ return run_llm(messages, max_tokens=512)
74
+
75
+ def api_run_predict_with_url(prompt: str, url: str = "", system: str = "") -> str:
76
+ web_ctx = fetch_url_text(url) if url else ""
77
+ user_msg = prompt if not web_ctx else f"{prompt}\n\n[CONTEXTO_WEB]\n{web_ctx}"
78
+ messages = [
79
+ {"role": "system", "content": system or SYSTEM_DEFAULT},
80
+ {"role": "user", "content": user_msg},
81
+ ]
82
+ return run_llm(messages, max_tokens=700)
83
+
84
+ # ===== UI de chat =====
85
+ with gr.Blocks(title="Astrohunters LLM (Qwen2.5 7B)") as chat_ui:
86
+ gr.Markdown("## 🛰️ Astrohunters LLM (Qwen2.5 7B Instruct, GGUF — CPU Basic)")
87
+ with gr.Row():
88
+ with gr.Column(scale=3):
89
+ chat = gr.Chatbot(height=420, type="tuples")
90
+ with gr.Row():
91
+ txt = gr.Textbox(placeholder="Escribe tu pregunta...", scale=4)
92
+ btn = gr.Button("Enviar", scale=1, variant="primary")
93
+ with gr.Column(scale=2):
94
+ system_tb = gr.Textbox(label="System prompt", value=SYSTEM_DEFAULT, lines=10)
95
+ url_tb = gr.Textbox(label="URL (opcional): Cargar contenido web", placeholder="https://...")
96
+
97
+ def chat_infer(history, system_prompt, user, url_to_load):
98
+ web_ctx = fetch_url_text(url_to_load.strip()) if url_to_load and url_to_load.strip() else ""
99
+ messages = [{"role": "system", "content": system_prompt or SYSTEM_DEFAULT}]
100
+ for u, a in history:
101
+ if u: messages.append({"role": "user", "content": u})
102
+ if a: messages.append({"role": "assistant", "content": a})
103
+ user_msg = user or ""
104
+ if web_ctx:
105
+ user_msg = f"{user_msg}\n\n[CONTEXTO_WEB]\n{web_ctx}"
106
+ messages.append({"role": "user", "content": user_msg})
107
+ reply = run_llm(messages, max_tokens=700)
108
+ history.append((user, reply))
109
+ return history, ""
110
+
111
+ btn.click(chat_infer, inputs=[chat, system_tb, txt, url_tb], outputs=[chat, txt])
112
+ txt.submit(chat_infer, inputs=[chat, system_tb, txt, url_tb], outputs=[chat, txt])
113
+
114
+ # ====== APIs nombradas (fuera del Blocks) ======
115
+ api1 = gr.Interface(
116
+ fn=api_run_predict,
117
+ inputs=[gr.Textbox(label="prompt"), gr.Textbox(label="system")],
118
+ outputs=gr.Textbox(label="reply"),
119
+ api_name="run_predict",
120
+ )
121
+
122
+ api2 = gr.Interface(
123
+ fn=api_run_predict_with_url,
124
+ inputs=[gr.Textbox(label="prompt"), gr.Textbox(label="url"), gr.Textbox(label="system")],
125
+ outputs=gr.Textbox(label="reply"),
126
+ api_name="run_predict_with_url",
127
+ )
128
+
129
+ # Unimos todo en un solo demo para que Gradio registre las rutas
130
+ demo = gr.TabbedInterface(
131
+ [chat_ui, api1, api2],
132
+ tab_names=["Chat", "API: run_predict", "API: run_predict_with_url"],
133
+ )
134
+
135
+ if __name__ == "__main__":
136
+ demo.queue(max_size=16).launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio==4.44.0
2
+ huggingface_hub>=0.23.0
3
+ llama-cpp-python==0.2.90
4
+ fastapi>=0.115.0
5
+ uvicorn>=0.24.0
6
+ requests
7
+ beautifulsoup4