geoffmunn's picture
Add Q2–Q8_0 quantized models with per-model cards, MODELFILE, CLI examples, and auto-upload
632690f verified
raw
history blame contribute delete
520 Bytes
# MODELFILE for Qwen3Guard-Stream-0.6B
# Used by LM Studio, OpenWebUI, etc.
context_length: 2048
embedding: false
f16: cpu
# Prompt template for real-time streaming classification
prompt_template: >-
AnalyzeStream: {prompt}
# Output format: {"safe": true/false, "categories": [...], "partial": bool, "confidence": float}
# Default parameters for reliable streaming classification
temperature: 0.0
top_p: 1.0
top_k: 30
repeat_penalty: 1.0
num_keep: 1
max_tokens: 128
# Stop tokens (optional)
stop: "{"
stop: "}"