File size: 874 Bytes
449c0ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import torch
from transformers import AutoTokenizer
from petals import AutoDistributedModelForCausalLM

model_name = "petals-team/StableBeluga2"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, add_bos_token=False)
model = AutoDistributedModelForCausalLM.from_pretrained(model_name)
model = model.cuda()
def gen(text):
    inputs = tokenizer('A cat in French is "', return_tensors="pt")["input_ids"].cuda()
    outputs = model.generate(inputs, max_new_tokens=16)
    return (tokenizer.decode(outputs[0]))
import gradio as gr
with gr.Blocks() as demo:
    gr.Markdown("# Run 70B models on CPU *\n\n* (sort of)\n\nPlease do not expect privacy when using this tool as inputs and outputs may be exposed.")
    t = gr.Textbox(label="INPUT")
    b = gr.Button("GO")
    o = gr.Markdown("Output...")
    b.click(gen, inputs=t, outputs=o)
demo.queue().launch()