manueldeprada
/

sampling_with_kvcache_hf_helpers

Text Generation

custom_generate

text-generation-inference

Model card Files Files and versions

manueldeprada HF Staff commited on Jun 27

Commit

97c3e33

·

1 Parent(s): 47784f5

update

Files changed (1) hide show

README.md +5 -5

README.md CHANGED Viewed

@@ -48,19 +48,19 @@ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", devic
 inputs = tokenizer(["The quick brown"], return_tensors="pt").to(model.device)
 # Basic sampling
-gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache", trust_remote_code=True)
 # With temperature
-gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache", temperature=0.8, trust_remote_code=True)
 # With top-k
-gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache", top_k=50, trust_remote_code=True)
 # With top-p (nucleus sampling)
-gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache", top_p=0.9, trust_remote_code=True)
 # Greedy decoding (no sampling)
-gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache", do_sample=False, trust_remote_code=True)
 # Get detailed output with probabilities
 gen_out = model.generate(

 inputs = tokenizer(["The quick brown"], return_tensors="pt").to(model.device)
 # Basic sampling
+gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache_hf_helpers", trust_remote_code=True)
 # With temperature
+gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache_hf_helpers", temperature=0.8, trust_remote_code=True)
 # With top-k
+gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache_hf_helpers", top_k=50, trust_remote_code=True)
 # With top-p (nucleus sampling)
+gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache_hf_helpers", top_p=0.9, trust_remote_code=True)
 # Greedy decoding (no sampling)
+gen_out = model.generate(**inputs, custom_generate="manueldeprada/sampling_with_kvcache_hf_helpers", do_sample=False, trust_remote_code=True)
 # Get detailed output with probabilities
 gen_out = model.generate(