Spaces:
Build error
Build error
| import time | |
| import os | |
| import psutil | |
| import shutil | |
| import torch | |
| import math | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from llama_cpp import Llama | |
| from statistics import mean, stdev | |
| # ---------- simple reasoning dataset ---------- | |
| TEST_PROMPTS = [ | |
| ("27 * 14 = ?", "378"), | |
| ("102 + 589 = ?", "691"), | |
| ("If a train travels 60 km in 1 hour, how far in 5 hours?", "300"), | |
| ("15^2 = ?", "225"), | |
| ("1000 / 25 = ?", "40") | |
| ] | |
| CPU_TDP_WATTS = 65 # typical desktop CPU estimate | |
| ELECTRICITY_PRICE_KWH = 0.90 # adjust for your region | |
| def load_model(model_type, repo_id, file_name, n_ctx): | |
| if model_type == "transformers": | |
| tokenizer = AutoTokenizer.from_pretrained(repo_id) | |
| model = AutoModelForCausalLM.from_pretrained(repo_id, trust_remote_code=True) | |
| return (tokenizer, model) | |
| else: | |
| if not os.path.exists(file_name): | |
| from huggingface_hub import hf_hub_download | |
| hf_hub_download(repo_id=repo_id, filename=file_name, local_dir="./", local_dir_use_symlinks=False) | |
| llm = Llama(model_path=file_name, n_ctx=n_ctx) | |
| return llm | |
| def generate(model_type, model_obj, prompt, max_new_tokens, temperature, top_p, top_k): | |
| if model_type == "transformers": | |
| tokenizer, model = model_obj | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| start = time.time() | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k) | |
| end = time.time() | |
| text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| tokens = outputs.shape[-1] | |
| return text, end-start, tokens | |
| else: | |
| start = time.time() | |
| out = model_obj(prompt, max_tokens=max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k) | |
| end = time.time() | |
| text = out["choices"][0]["text"] | |
| tokens = len(text.split()) | |
| return text, end-start, tokens | |
| def reasoning_score(model_type, model_obj): | |
| correct = 0 | |
| for q, ans in TEST_PROMPTS: | |
| out, _, _ = generate(model_type, model_obj, q, 32, 0.2, 0.9, 40) | |
| if ans in out: | |
| correct += 1 | |
| return correct / len(TEST_PROMPTS) | |
| def energy_cost(seconds): | |
| kwh = (CPU_TDP_WATTS * seconds) / (1000 * 3600) | |
| return kwh * ELECTRICITY_PRICE_KWH | |
| def benchmark(model_type, repo_id, file_name="", runs=3): | |
| process = psutil.Process(os.getpid()) | |
| model_obj = load_model(model_type, repo_id, file_name, 2048) | |
| times = [] | |
| toks = [] | |
| for _ in range(runs): | |
| text, t, tok = generate(model_type, model_obj, "Explain gravity simply.", 128, 0.7, 0.9, 40) | |
| times.append(t) | |
| toks.append(tok/t) | |
| avg_time = mean(times) | |
| toksec = mean(toks) | |
| stability = stdev(times) if len(times) > 1 else 0 | |
| score = reasoning_score(model_type, model_obj) | |
| energy = energy_cost(avg_time) | |
| return { | |
| "avg_inference_time": round(avg_time,2), | |
| "tokens_per_sec": round(toksec,2), | |
| "stability_std": round(stability,3), | |
| "reasoning_score": round(score,2), | |
| "energy_cost_estimate": round(energy,5) | |
| } | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--type", choices=["transformers","gguf"], required=True) | |
| parser.add_argument("--repo", required=True) | |
| parser.add_argument("--file", default="") | |
| args = parser.parse_args() | |
| result = benchmark(args.type, args.repo, args.file) | |
| print(result) |