Qwen_AI_Coder / app.py
Kai Izumoto
Create app.py
608b73e verified
raw
history blame contribute delete
797 Bytes
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load the model directly from Hugging Face Hub
model_name = "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto", # Automatically uses GPU if available
torch_dtype="float16",
low_cpu_mem_usage=True
)
def generate(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(
**inputs,
max_new_tokens=500,
temperature=0.7,
top_p=0.9,
do_sample=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.Interface(fn=generate, inputs="text", outputs="text")
demo.launch()