songxxzp
commited on
Commit
·
bcc35f0
1
Parent(s):
fe0674f
Add assertion when loading cpu and cuda kernel fails
Browse files- quantization.py +3 -3
quantization.py
CHANGED
|
@@ -441,10 +441,10 @@ def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=F
|
|
| 441 |
try:
|
| 442 |
load_cpu_kernel(**kwargs)
|
| 443 |
except:
|
| 444 |
-
print("Cannot load cpu kernel, don't use quantized model on cpu.")
|
| 445 |
if kernels is None: # CUDA kernels failed
|
| 446 |
-
print("Cannot load cuda kernel, quantization failed
|
| 447 |
-
|
|
|
|
| 448 |
|
| 449 |
current_device = model.device
|
| 450 |
|
|
|
|
| 441 |
try:
|
| 442 |
load_cpu_kernel(**kwargs)
|
| 443 |
except:
|
|
|
|
| 444 |
if kernels is None: # CUDA kernels failed
|
| 445 |
+
print("Cannot load cpu or cuda kernel, quantization failed:")
|
| 446 |
+
assert kernels is None
|
| 447 |
+
print("Cannot load cpu kernel, don't use quantized model on cpu.")
|
| 448 |
|
| 449 |
current_device = model.device
|
| 450 |
|