Update README.md
Browse files
README.md
CHANGED
|
@@ -53,11 +53,14 @@ qconfig_dict = {
|
|
| 53 |
# vllm
|
| 54 |
"model.decoder.layers.3.self_attn.qkv_proj": int4wo,
|
| 55 |
|
| 56 |
-
"model
|
| 57 |
-
"model
|
| 58 |
-
"model
|
|
|
|
|
|
|
|
|
|
| 59 |
# vllm
|
| 60 |
-
"model
|
| 61 |
|
| 62 |
"_default": intxwo,
|
| 63 |
}
|
|
|
|
| 53 |
# vllm
|
| 54 |
"model.decoder.layers.3.self_attn.qkv_proj": int4wo,
|
| 55 |
|
| 56 |
+
"re:model\.decoder\.layers\..+\.self_attn\.q_proj": float8dyn,
|
| 57 |
+
"re:model\.decoder\.layers\..+\.self_attn\.k_proj": float8dyn,
|
| 58 |
+
"re:model\.decoder\.layers\..+\.self_attn\.v_proj": float8dyn,
|
| 59 |
+
# this should not take effect and we'll fallback to _default
|
| 60 |
+
# since no full mach (missing `j` in the end)
|
| 61 |
+
"re:model\.decoder\.layers\..+\.self_attn\.out_pro": float8dyn,
|
| 62 |
# vllm
|
| 63 |
+
"re:model\.decoder\.layers\..+\.self_attn\.qkv_proj": float8dyn,
|
| 64 |
|
| 65 |
"_default": intxwo,
|
| 66 |
}
|