Granite-4.0-Nano-WebGPU

Running

App Files Files Community

hantech commited on 13 days ago

Commit

bfc3921

verified ·

1 Parent(s): 55b4471

Update worker.js

Browse files

Files changed (1) hide show

worker.js +62 -63

worker.js CHANGED Viewed

@@ -1,60 +1,54 @@
 import { pipeline, env, cos_sim } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.19/dist/transformers.min.js';
-// Cấu hình WebGPU
-env.backends.onnx.wasm.proxy = false;
-// --- CẤU HÌNH MODEL (KHÔNG ĐỔI THEO YÊU CẦU) ---
-// 1. Embedding Model
-const EMBEDDING_MODEL_ID = 'onnx-community/embeddinggemma-300m-ONNX';
-// 2. LLM Model: Granite 4.0 Nano
-// Lưu ý: Nếu phiên bản ONNX của Granite 4.0 chưa public dưới ID này,
-// bạn cần trỏ tới đúng repo onnx (ví dụ ibm-granite/granite-3.0-2b-instruct nếu 4.0 chưa có onnx).
-// Tuy nhiên, tôi giữ nguyên tham chiếu "Granite" như yêu cầu.
-const LLM_MODEL_ID = 'onnx-community/granite-4.0-1b-ONNX-web'; // Kiểm tra lại tên chính xác trên HF Hub nếu lỗi
 let extractor = null;
 let generator = null;
-let vectorStore = []; // Lưu trữ chunks và vectors: { text: string, vector: number[] }
 async function initModels() {
     try {
-        self.postMessage({ type: 'status', payload: "Đang tải Embedding Model (có thể lâu lần đầu)..." });
-        // Thêm cờ { use_cache: true } dù mặc định nó đã có, để đảm bảo
         extractor = await pipeline('feature-extraction', EMBEDDING_MODEL_ID, {
             device: 'webgpu',
             dtype: 'fp32',
-            use_cache: true
         });
-        self.postMessage({ type: 'status', payload: "Đang tải LLM Granite 4.0 (Model nặng, vui lòng chờ)..." });
         generator = await pipeline('text-generation', LLM_MODEL_ID, {
             device: 'webgpu',
             dtype: 'q4',
             use_external_data_format: true,
-            // Thêm progress_callback để UI không tưởng là bị treo
-            progress_callback: (data) => {
-                if (data.status === 'progress') {
-                    // Gửi tiến độ về main thread để hiện loading bar nếu cần
-                    self.postMessage({
-                        type: 'download_progress',
-                        payload: { file: data.file, progress: data.progress }
-                    });
-                }
-            }
         });
         self.postMessage({ type: 'init_complete' });
     } catch (e) {
-        console.error(e); // Log chi tiết ra console
-        self.postMessage({ type: 'error', payload: "Lỗi tải model (Kiểm tra Console F12): " + e.message });
     }
 }
-// Xử lý chunking văn bản
 function chunkText(text, chunkSize = 300, overlap = 50) {
     const sentences = text.match(/[^.!?]+[.!?]+|[^.!?]+$/g) || [text];
     let chunks = [];
     let currentChunk = "";
@@ -62,49 +56,57 @@ function chunkText(text, chunkSize = 300, overlap = 50) {
     for (let sentence of sentences) {
         if ((currentChunk + sentence).length > chunkSize) {
             chunks.push(currentChunk.trim());
-            currentChunk = sentence.slice(-overlap); // Overlap đơn giản
         } else {
             currentChunk += " " + sentence;
         }
     }
     if (currentChunk) chunks.push(currentChunk.trim());
     return chunks;
 }
-// Tạo embeddings cho văn bản
 async function ingestText(text) {
     const chunks = chunkText(text);
-    vectorStore = []; // Reset store
-    for (const chunk of chunks) {
-        const output = await extractor(chunk, { pooling: 'mean', normalize: true });
         vectorStore.push({
-            text: chunk,
             vector: output.data
         });
     }
-    console.log(`Đã index ${vectorStore.length} đoạn văn bản.`);
 }
-// Tìm kiếm RAG
 async function retrieve(query) {
     const queryOutput = await extractor(query, { pooling: 'mean', normalize: true });
     const queryVector = queryOutput.data;
-    // Tính Cosine Similarity
-    const scored = vectorStore.map(item => {
-        return {
-            text: item.text,
-            score: cos_sim(queryVector, item.vector)
-        };
-    });
-    // Lấy top 3 đoạn liên quan nhất
     scored.sort((a, b) => b.score - a.score);
-    return scored.slice(0, 3).map(i => i.text).join("\n\n");
 }
-// Xử lý tin nhắn từ Main Thread
 self.onmessage = async (e) => {
     if (!extractor || !generator) {
         await initModels();
@@ -115,35 +117,33 @@ self.onmessage = async (e) => {
     if (type === 'ingest_text') {
         await ingestText(payload);
     } else if (type === 'query') {
-        // 1. Retrieve Context
         const context = await retrieve(payload);
-        // 2. Tạo Prompt cho Granite
-        // Định dạng prompt cơ bản cho instruction tuned model
         const prompt = `<|system|>
-Bạn là trợ lý AI hữu ích. Hãy trả lời câu hỏi dựa trên ngữ cảnh được cung cấp bên dưới bằng Tiếng Việt.
-Ngữ cảnh:
 ${context}
 <|user|>
 ${payload}
-<|assistant|>
-`;
-        // 3. Generate Answer
         try {
             const output = await generator(prompt, {
                 max_new_tokens: 256,
-                temperature: 0.7,
                 do_sample: true,
             });
-            // Lấy phần trả lời sau tag assistant (tuỳ thuộc format model)
             let answer = output[0].generated_text;
-            // Cắt bớt phần prompt nếu cần thiết
             if (answer.includes("<|assistant|>")) {
                 answer = answer.split("<|assistant|>")[1];
             }
             self.postMessage({ type: 'answer', payload: answer });
         } catch (err) {
             self.postMessage({ type: 'error', payload: err.message });
@@ -151,5 +151,4 @@ ${payload}
     }
 };
-// Khởi tạo ngay khi worker chạy
 initModels();

 import { pipeline, env, cos_sim } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.19/dist/transformers.min.js';
+// Cấu hình
+env.allowLocalModels = false;
+env.useBrowserCache = true;
+// Hàm log helper gửi về Main Thread
+function log(msg) {
+    self.postMessage({ type: 'log', payload: msg });
+}
+const EMBEDDING_MODEL_ID = 'onnx-community/embeddinggemma-300m-ONNX';
+// Dùng ID Granite 4.0 (Nếu lỗi 404 hãy thử ibm-granite/granite-3.0-2b-instruct)
+const LLM_MODEL_ID = 'ibm-granite/granite-4.0-350m-instruct';
 let extractor = null;
 let generator = null;
+let vectorStore = [];
 async function initModels() {
     try {
+        log("🚀 Bắt đầu tải Embedding Model (Gemma)...");
+        self.postMessage({ type: 'status', payload: "Đang tải Embedding..." });
         extractor = await pipeline('feature-extraction', EMBEDDING_MODEL_ID, {
             device: 'webgpu',
             dtype: 'fp32',
+            progress_callback: (data) => self.postMessage({ type: 'download_progress', payload: data })
         });
+        log("✅ Embedding Model đã tải xong.");
+        log("🚀 Bắt đầu tải LLM (Granite 4.0)...");
+        self.postMessage({ type: 'status', payload: "Đang tải LLM..." });
         generator = await pipeline('text-generation', LLM_MODEL_ID, {
             device: 'webgpu',
             dtype: 'q4',
             use_external_data_format: true,
+            progress_callback: (data) => self.postMessage({ type: 'download_progress', payload: data })
         });
+        log("✅ LLM đã tải xong.");
         self.postMessage({ type: 'init_complete' });
     } catch (e) {
+        self.postMessage({ type: 'error', payload: e.message });
     }
 }
+// Hàm chia nhỏ văn bản (Chunking) có log
 function chunkText(text, chunkSize = 300, overlap = 50) {
+    log(`Bắt đầu chia nhỏ văn bản dài ${text.length} ký tự...`);
     const sentences = text.match(/[^.!?]+[.!?]+|[^.!?]+$/g) || [text];
     let chunks = [];
     let currentChunk = "";
     for (let sentence of sentences) {
         if ((currentChunk + sentence).length > chunkSize) {
             chunks.push(currentChunk.trim());
+            currentChunk = sentence.slice(-overlap);
         } else {
             currentChunk += " " + sentence;
         }
     }
     if (currentChunk) chunks.push(currentChunk.trim());
+    log(`-> Đã chia thành ${chunks.length} chunks.`);
     return chunks;
 }
+// Tạo Embeddings
 async function ingestText(text) {
     const chunks = chunkText(text);
+    vectorStore = [];
+    log("🔄 Đang tạo vector cho từng chunk (Quá trình này tốn GPU)...");
+    let startTime = performance.now();
+    for (let i = 0; i < chunks.length; i++) {
+        const output = await extractor(chunks[i], { pooling: 'mean', normalize: true });
         vectorStore.push({
+            text: chunks[i],
             vector: output.data
         });
+        // Log mỗi 5 chunks để không spam
+        if ((i + 1) % 5 === 0) log(`Processed ${i + 1}/${chunks.length} chunks...`);
     }
+    let endTime = performance.now();
+    log(`✅ Indexing hoàn tất trong ${((endTime - startTime)/1000).toFixed(2)}s.`);
 }
 async function retrieve(query) {
+    log(`🔍 Đang tìm kiếm ngữ cảnh cho: "${query}"`);
     const queryOutput = await extractor(query, { pooling: 'mean', normalize: true });
     const queryVector = queryOutput.data;
+    const scored = vectorStore.map(item => ({
+        text: item.text,
+        score: cos_sim(queryVector, item.vector)
+    }));
     scored.sort((a, b) => b.score - a.score);
+    const top3 = scored.slice(0, 3);
+    log(`-> Tìm thấy 3 đoạn khớp nhất (Scores: ${top3.map(i => i.score.toFixed(2)).join(', ')})`);
+    return top3.map(i => i.text).join("\n\n");
 }
 self.onmessage = async (e) => {
     if (!extractor || !generator) {
         await initModels();
     if (type === 'ingest_text') {
         await ingestText(payload);
     } else if (type === 'query') {
+        if (vectorStore.length === 0) {
+            log("⚠️ Cảnh báo: Chưa có dữ liệu PDF nào được index.");
+        }
         const context = await retrieve(payload);
         const prompt = `<|system|>
+Bạn là trợ lý AI hữu ích. Trả lời dựa trên ngữ cảnh sau bằng Tiếng Việt:
 ${context}
 <|user|>
 ${payload}
+<|assistant|>`;
+        log("🤖 LLM đang suy nghĩ...");
         try {
             const output = await generator(prompt, {
                 max_new_tokens: 256,
+                temperature: 0.6,
                 do_sample: true,
+                // Log mỗi khi sinh ra token mới (cẩn thận spam nếu bật)
+                // callback_function: (x) => log("Generated token...")
             });
             let answer = output[0].generated_text;
             if (answer.includes("<|assistant|>")) {
                 answer = answer.split("<|assistant|>")[1];
             }
             self.postMessage({ type: 'answer', payload: answer });
         } catch (err) {
             self.postMessage({ type: 'error', payload: err.message });
     }
 };
 initModels();