#!/usr/bin/env python3 """ Test script for Rax 3.5 Chat model """ from transformers import AutoTokenizer, AutoModelForCausalLM import torch def test_rax_chat(): print("Loading Rax 3.5 Chat model...") # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained(".") model = AutoModelForCausalLM.from_pretrained( ".", torch_dtype=torch.bfloat16, device_map="auto" ) print("Model loaded successfully!") # Test conversation messages = [ {"role": "system", "content": "You are Rax, a helpful AI assistant."}, {"role": "user", "content": "Hello! Can you tell me about yourself?"} ] # Apply chat template input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) print(f"Input: {input_text}") inputs = tokenizer(input_text, return_tensors="pt") # Generate response with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=128, temperature=0.7, do_sample=True, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) print(f"Rax: {response}") if __name__ == "__main__": test_rax_chat()