Joel Lundgren commited on
Commit
c1ec8e5
·
1 Parent(s): dc90ed9
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -71,7 +71,11 @@ def get_llm(model_name):
71
  "gemma3:1b": "google/gemma-3-1b-it"
72
  }
73
  tokenizer = AutoTokenizer.from_pretrained(original_model_map[model_name])
74
- model = ORTModelForCausalLM.from_pretrained(hf_model_name)
 
 
 
 
75
 
76
  llm_cache[model_name] = (model, tokenizer)
77
  return model, tokenizer
 
71
  "gemma3:1b": "google/gemma-3-1b-it"
72
  }
73
  tokenizer = AutoTokenizer.from_pretrained(original_model_map[model_name])
74
+ model = ORTModelForCausalLM.from_pretrained(
75
+ hf_model_name,
76
+ file_name="model_quantized.onnx",
77
+ use_cache=False
78
+ )
79
 
80
  llm_cache[model_name] = (model, tokenizer)
81
  return model, tokenizer