qwen1.5-0.5b-chat / README.md

jestevesv

Update README.md

14fe685 verified 3 months ago

preview code

raw

history blame contribute delete

1.13 kB

metadata

tags:
  - text-generation
  - transformers.js
  - 8bit
  - qwen
  - quantized
  - multilanguage
license: other

qwen1.5-0.5b-chat

Model Description

This is a quantized version of the Qwen 1.5 0.5B model from Alibaba Cloud, optimized for efficient inference on devices with limited memory. Quantization reduces the model's size and improves its computational speed by using 8-bit integers instead of 32-bit floating-point numbers.

Files

config.json
tokenizer.json
tokenizer_config.json
onnx/decoder_model_merged_quantized.onnx

Usage in Transformers.js

import { pipeline, AutoTokenizer } from '@xenova/transformers';

async function runTextGeneration() {
    const generator = await pipeline(
        'text-generation',
        'jestevesv/qwen1.5-0.5b-chat',
        { quantized: true }
    );

    const prompt = 'Hola, ¿cómo estás hoy?';

    const output = await generator(prompt, {
        max_length: 100,
        do_sample: true,
        temperature: 0.7,
    });

    console.log(output);
}

runTextGeneration().catch(err => {
    console.error('Error:', err);
});