qwen1.5-0.5b-chat / README.md
jestevesv's picture
Update README.md
14fe685 verified
metadata
tags:
  - text-generation
  - transformers.js
  - 8bit
  - qwen
  - quantized
  - multilanguage
license: other

qwen1.5-0.5b-chat

Model Description

This is a quantized version of the Qwen 1.5 0.5B model from Alibaba Cloud, optimized for efficient inference on devices with limited memory. Quantization reduces the model's size and improves its computational speed by using 8-bit integers instead of 32-bit floating-point numbers.

Files

  • config.json
  • tokenizer.json
  • tokenizer_config.json
  • onnx/decoder_model_merged_quantized.onnx

Usage in Transformers.js

import { pipeline, AutoTokenizer } from '@xenova/transformers';

async function runTextGeneration() {
    const generator = await pipeline(
        'text-generation',
        'jestevesv/qwen1.5-0.5b-chat',
        { quantized: true }
    );

    const prompt = 'Hola, ¿cómo estás hoy?';

    const output = await generator(prompt, {
        max_length: 100,
        do_sample: true,
        temperature: 0.7,
    });

    console.log(output);
}

runTextGeneration().catch(err => {
    console.error('Error:', err);
});