Upload 3 files
Browse files- .gitattributes +1 -0
- higgs-gpt2.hgs +3 -0
- higgs_x.py +140 -0
- loader.py +135 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
higgs-gpt2.hgs filter=lfs diff=lfs merge=lfs -text
|
higgs-gpt2.hgs
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa72a226db892e0d6304fb1627abab33158608926031f5c038eebe6cc06f7a11
|
| 3 |
+
size 163049198
|
higgs_x.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from torch.amp import GradScaler, autocast
|
| 4 |
+
from torch.quantization import quantize_dynamic, default_dynamic_qconfig
|
| 5 |
+
from loader import HGSFormat
|
| 6 |
+
|
| 7 |
+
class ALMPQOptimizer:
|
| 8 |
+
def __init__(self, model: nn.Module, device='cuda'):
|
| 9 |
+
self.model = model
|
| 10 |
+
self.device = device
|
| 11 |
+
self.layer_precisions = {}
|
| 12 |
+
self._analyze_layers()
|
| 13 |
+
self.grad_threshold_high = 1e-3
|
| 14 |
+
self.grad_threshold_low = 1e-5
|
| 15 |
+
self.precision_levels = ['int8', 'bfloat16', 'float16']
|
| 16 |
+
|
| 17 |
+
def _analyze_layers(self):
|
| 18 |
+
"""
|
| 19 |
+
Инициализация точности.
|
| 20 |
+
"""
|
| 21 |
+
for name, module in self.model.named_modules():
|
| 22 |
+
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
| 23 |
+
param_count = sum(p.numel() for p in module.parameters())
|
| 24 |
+
if param_count > 1_000_000:
|
| 25 |
+
self.layer_precisions[name] = 'float16'
|
| 26 |
+
elif param_count > 100_000:
|
| 27 |
+
self.layer_precisions[name] = 'bfloat16'
|
| 28 |
+
else:
|
| 29 |
+
self.layer_precisions[name] = 'int8'
|
| 30 |
+
else:
|
| 31 |
+
self.layer_precisions[name] = 'float16'
|
| 32 |
+
|
| 33 |
+
def apply_quantization(self):
|
| 34 |
+
#Квантование INT8
|
| 35 |
+
int8_modules = []
|
| 36 |
+
for name, module in self.model.named_modules():
|
| 37 |
+
if name in self.layer_precisions and self.layer_precisions[name] == 'int8':
|
| 38 |
+
if isinstance(module, (nn.Linear, nn.LSTM, nn.GRU)):
|
| 39 |
+
int8_modules.append(name)
|
| 40 |
+
|
| 41 |
+
if int8_modules:
|
| 42 |
+
self.model = quantize_dynamic(self.model, {nn.Linear}, dtype=torch.qint8)
|
| 43 |
+
print(f"ALMPQ: Применено динамическое INT8 квантование к слоям: {int8_modules}")
|
| 44 |
+
|
| 45 |
+
print("ALMPQ: Квантование применено (FP16/BF16 через AMP, INT8)")
|
| 46 |
+
|
| 47 |
+
def adapt_during_training(self, grads):
|
| 48 |
+
"""
|
| 49 |
+
Адаптивно меняет точность.
|
| 50 |
+
"""
|
| 51 |
+
changed = False
|
| 52 |
+
for name, grad in grads.items():
|
| 53 |
+
if grad is None or name not in self.layer_precisions:
|
| 54 |
+
continue
|
| 55 |
+
avg_grad = grad.abs().mean().item()
|
| 56 |
+
current_precision = self.layer_precisions[name]
|
| 57 |
+
if current_precision not in self.precision_levels:
|
| 58 |
+
continue
|
| 59 |
+
idx = self.precision_levels.index(current_precision)
|
| 60 |
+
|
| 61 |
+
if avg_grad > self.grad_threshold_high and idx < len(self.precision_levels) - 1:
|
| 62 |
+
new_precision = self.precision_levels[idx + 1]
|
| 63 |
+
self.layer_precisions[name] = new_precision
|
| 64 |
+
changed = True
|
| 65 |
+
print(f"[ALMPQ] Повышаем точность слоя '{name}' до {new_precision}")
|
| 66 |
+
elif avg_grad < self.grad_threshold_low and idx > 0:
|
| 67 |
+
new_precision = self.precision_levels[idx - 1]
|
| 68 |
+
self.layer_precisions[name] = new_precision
|
| 69 |
+
changed = True
|
| 70 |
+
print(f"[ALMPQ]: Понижаем точность слоя '{name}' до {new_precision}")
|
| 71 |
+
|
| 72 |
+
if changed:
|
| 73 |
+
self.apply_quantization()
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class HIGGSXAccelerator:
|
| 77 |
+
"""
|
| 78 |
+
Виртуальный ускоритель HIGGS-X с поддержкой ALMPQ и загрузкой из .hgs.
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
def __init__(self, model: nn.Module, optimizer_cls, device='cuda'):
|
| 82 |
+
self.device = device
|
| 83 |
+
self.model = model.to(device)
|
| 84 |
+
self.optimizer = optimizer_cls(self.model.parameters())
|
| 85 |
+
self.scaler = GradScaler(device=device)
|
| 86 |
+
self.almpq = ALMPQOptimizer(self.model, device)
|
| 87 |
+
self._initialize_model()
|
| 88 |
+
|
| 89 |
+
def _initialize_model(self):
|
| 90 |
+
self.almpq.apply_quantization()
|
| 91 |
+
self.model.to(self.device)
|
| 92 |
+
|
| 93 |
+
def load_hgs_model(self, hgs_filepath):
|
| 94 |
+
"""
|
| 95 |
+
Загружает веса из .hgs файла и применяет их к модели.
|
| 96 |
+
"""
|
| 97 |
+
state_dict = HGSFormat.load_hgs(hgs_filepath, device=self.device)
|
| 98 |
+
self.model.load_state_dict(state_dict)
|
| 99 |
+
print(f"HIGGS-X: Модель загружена из {hgs_filepath}")
|
| 100 |
+
self.almpq.apply_quantization()
|
| 101 |
+
|
| 102 |
+
def train_step(self, data, target):
|
| 103 |
+
self.model.train()
|
| 104 |
+
self.optimizer.zero_grad()
|
| 105 |
+
with autocast(self.device):
|
| 106 |
+
output = self.model(data.to(self.device))
|
| 107 |
+
loss = nn.functional.cross_entropy(output, target.to(self.device))
|
| 108 |
+
self.scaler.scale(loss).backward()
|
| 109 |
+
|
| 110 |
+
grads = {}
|
| 111 |
+
for name, param in self.model.named_parameters():
|
| 112 |
+
if param.grad is not None:
|
| 113 |
+
grads[name] = param.grad.detach().cpu()
|
| 114 |
+
|
| 115 |
+
self.almpq.adapt_during_training(grads)
|
| 116 |
+
|
| 117 |
+
self.scaler.step(self.optimizer)
|
| 118 |
+
self.scaler.update()
|
| 119 |
+
return loss.item()
|
| 120 |
+
|
| 121 |
+
def infer(self, data):
|
| 122 |
+
self.model.eval()
|
| 123 |
+
with torch.no_grad(), autocast(self.device):
|
| 124 |
+
output = self.model(data.to(self.device))
|
| 125 |
+
return output
|
| 126 |
+
|
| 127 |
+
@staticmethod
|
| 128 |
+
def get_model_size_bytes(model: nn.Module) -> int:
|
| 129 |
+
total_bytes = 0
|
| 130 |
+
for param in model.parameters():
|
| 131 |
+
total_bytes += param.numel() * param.element_size()
|
| 132 |
+
return total_bytes
|
| 133 |
+
|
| 134 |
+
@staticmethod
|
| 135 |
+
def measure_load_time(load_func, *args, **kwargs) -> float:
|
| 136 |
+
import time
|
| 137 |
+
start = time.perf_counter()
|
| 138 |
+
load_func(*args, **kwargs)
|
| 139 |
+
end = time.perf_counter()
|
| 140 |
+
return end - start
|
loader.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import struct
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
class HGSFormat:
|
| 6 |
+
MAGIC = b'HGS1'
|
| 7 |
+
|
| 8 |
+
@staticmethod
|
| 9 |
+
def quantize_tensor(tensor: torch.Tensor, bits=8):
|
| 10 |
+
tensor_cpu = tensor.detach().cpu()
|
| 11 |
+
min_val = tensor_cpu.min()
|
| 12 |
+
max_val = tensor_cpu.max()
|
| 13 |
+
scale = (max_val - min_val) / (2**bits - 1)
|
| 14 |
+
if scale == 0:
|
| 15 |
+
scale = 1e-8
|
| 16 |
+
q_tensor = ((tensor_cpu - min_val) / scale).round().clamp(0, 2**bits - 1).to(torch.uint8)
|
| 17 |
+
return q_tensor.numpy(), float(min_val), float(scale)
|
| 18 |
+
|
| 19 |
+
@staticmethod
|
| 20 |
+
def dequantize_tensor(q_data, min_val, scale):
|
| 21 |
+
return torch.tensor(q_data, dtype=torch.float32) * scale + min_val
|
| 22 |
+
|
| 23 |
+
@staticmethod
|
| 24 |
+
def save_hgs(model_state_dict, filepath):
|
| 25 |
+
layers = list(model_state_dict.items())
|
| 26 |
+
with open(filepath, 'wb') as f:
|
| 27 |
+
f.write(HGSFormat.MAGIC)
|
| 28 |
+
f.write(struct.pack('<I', len(layers)))
|
| 29 |
+
|
| 30 |
+
index_pos = f.tell()
|
| 31 |
+
index_data = []
|
| 32 |
+
for name, tensor in layers:
|
| 33 |
+
name_bytes = name.encode('utf-8')
|
| 34 |
+
q_data, min_val, scale = HGSFormat.quantize_tensor(tensor, bits=8)
|
| 35 |
+
index_data.append((name_bytes, tensor.shape, tensor.numel(), min_val, scale, 0, q_data.nbytes))
|
| 36 |
+
f.write(struct.pack('<I', len(name_bytes)))
|
| 37 |
+
f.write(name_bytes)
|
| 38 |
+
f.write(struct.pack('<I', len(tensor.shape)))
|
| 39 |
+
for dim in tensor.shape:
|
| 40 |
+
f.write(struct.pack('<Q', dim))
|
| 41 |
+
f.write(struct.pack('<f', min_val))
|
| 42 |
+
f.write(struct.pack('<f', scale))
|
| 43 |
+
f.write(struct.pack('<Q', tensor.numel()))
|
| 44 |
+
f.write(struct.pack('<Q', q_data.nbytes))
|
| 45 |
+
f.write(struct.pack('<Q', 0))
|
| 46 |
+
|
| 47 |
+
offsets = []
|
| 48 |
+
for _, _, _, _, _, _, size_bytes in index_data:
|
| 49 |
+
offsets.append(f.tell())
|
| 50 |
+
q_data = index_data.pop(0)[6]
|
| 51 |
+
|
| 52 |
+
@staticmethod
|
| 53 |
+
def save_hgs(model_state_dict, filepath):
|
| 54 |
+
layers = list(model_state_dict.items())
|
| 55 |
+
with open(filepath, 'wb') as f:
|
| 56 |
+
f.write(HGSFormat.MAGIC)
|
| 57 |
+
f.write(struct.pack('<I', len(layers)))
|
| 58 |
+
|
| 59 |
+
index_pos = f.tell()
|
| 60 |
+
index_data = []
|
| 61 |
+
quantized_datas = []
|
| 62 |
+
for name, tensor in layers:
|
| 63 |
+
name_bytes = name.encode('utf-8')
|
| 64 |
+
q_data, min_val, scale = HGSFormat.quantize_tensor(tensor, bits=8)
|
| 65 |
+
index_data.append({
|
| 66 |
+
'name_bytes': name_bytes,
|
| 67 |
+
'shape': tensor.shape,
|
| 68 |
+
'numel': tensor.numel(),
|
| 69 |
+
'min_val': min_val,
|
| 70 |
+
'scale': scale,
|
| 71 |
+
'q_data_len': q_data.nbytes,
|
| 72 |
+
'offset': 0
|
| 73 |
+
})
|
| 74 |
+
quantized_datas.append(q_data)
|
| 75 |
+
|
| 76 |
+
for entry in index_data:
|
| 77 |
+
f.write(struct.pack('<I', len(entry['name_bytes'])))
|
| 78 |
+
f.write(entry['name_bytes'])
|
| 79 |
+
f.write(struct.pack('<I', len(entry['shape'])))
|
| 80 |
+
for dim in entry['shape']:
|
| 81 |
+
f.write(struct.pack('<Q', dim))
|
| 82 |
+
f.write(struct.pack('<f', entry['min_val']))
|
| 83 |
+
f.write(struct.pack('<f', entry['scale']))
|
| 84 |
+
f.write(struct.pack('<Q', entry['numel']))
|
| 85 |
+
f.write(struct.pack('<Q', entry['q_data_len']))
|
| 86 |
+
f.write(struct.pack('<Q', 0))
|
| 87 |
+
|
| 88 |
+
for i, q_data in enumerate(quantized_datas):
|
| 89 |
+
index_data[i]['offset'] = f.tell()
|
| 90 |
+
f.write(q_data.tobytes())
|
| 91 |
+
f.seek(index_pos)
|
| 92 |
+
for entry in index_data:
|
| 93 |
+
f.seek(f.tell() + 4 + len(entry['name_bytes']) + 4 + 8 * len(entry['shape']) + 4 + 4 + 8 + 8)
|
| 94 |
+
f.write(struct.pack('<Q', entry['offset']))
|
| 95 |
+
|
| 96 |
+
@staticmethod
|
| 97 |
+
def load_hgs(filepath, device='cpu'):
|
| 98 |
+
state_dict = {}
|
| 99 |
+
with open(filepath, 'rb') as f:
|
| 100 |
+
magic = f.read(4)
|
| 101 |
+
if magic != HGSFormat.MAGIC:
|
| 102 |
+
raise RuntimeError("Неверный формат файла")
|
| 103 |
+
|
| 104 |
+
num_layers = struct.unpack('<I', f.read(4))[0]
|
| 105 |
+
|
| 106 |
+
index = []
|
| 107 |
+
for _ in range(num_layers):
|
| 108 |
+
name_len = struct.unpack('<I', f.read(4))[0]
|
| 109 |
+
name = f.read(name_len).decode('utf-8')
|
| 110 |
+
shape_len = struct.unpack('<I', f.read(4))[0]
|
| 111 |
+
shape = tuple(struct.unpack('<Q', f.read(8))[0] for _ in range(shape_len))
|
| 112 |
+
min_val = struct.unpack('<f', f.read(4))[0]
|
| 113 |
+
scale = struct.unpack('<f', f.read(4))[0]
|
| 114 |
+
numel = struct.unpack('<Q', f.read(8))[0]
|
| 115 |
+
q_data_len = struct.unpack('<Q', f.read(8))[0]
|
| 116 |
+
offset = struct.unpack('<Q', f.read(8))[0]
|
| 117 |
+
index.append({
|
| 118 |
+
'name': name,
|
| 119 |
+
'shape': shape,
|
| 120 |
+
'min_val': min_val,
|
| 121 |
+
'scale': scale,
|
| 122 |
+
'numel': numel,
|
| 123 |
+
'q_data_len': q_data_len,
|
| 124 |
+
'offset': offset
|
| 125 |
+
})
|
| 126 |
+
|
| 127 |
+
for entry in index:
|
| 128 |
+
f.seek(entry['offset'])
|
| 129 |
+
q_data_bytes = f.read(entry['q_data_len'])
|
| 130 |
+
q_data = np.frombuffer(q_data_bytes, dtype=np.uint8)
|
| 131 |
+
tensor = HGSFormat.dequantize_tensor(q_data, entry['min_val'], entry['scale'])
|
| 132 |
+
tensor = tensor.reshape(entry['shape']).to(device)
|
| 133 |
+
state_dict[entry['name']] = tensor
|
| 134 |
+
|
| 135 |
+
return state_dict
|