Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import AutoTokenizer | |
| from petals import AutoDistributedModelForCausalLM | |
| model_name = "petals-team/StableBeluga2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, add_bos_token=False) | |
| model = AutoDistributedModelForCausalLM.from_pretrained(model_name) | |
| model = model.cuda() | |
| def gen(text): | |
| inputs = tokenizer('A cat in French is "', return_tensors="pt")["input_ids"].cuda() | |
| outputs = model.generate(inputs, max_new_tokens=16) | |
| return (tokenizer.decode(outputs[0])) | |
| import gradio as gr | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Run 70B models on CPU *\n\n* (sort of)\n\nPlease do not expect privacy when using this tool as inputs and outputs may be exposed.") | |
| t = gr.Textbox(label="INPUT") | |
| b = gr.Button("GO") | |
| o = gr.Markdown("Output...") | |
| b.click(gen, inputs=t, outputs=o) | |
| demo.queue().launch() |