Darkester commited on
Commit
2020344
·
verified ·
1 Parent(s): c399515

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. higgs-gpt2.hgs +3 -0
  3. higgs_x.py +140 -0
  4. loader.py +135 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ higgs-gpt2.hgs filter=lfs diff=lfs merge=lfs -text
higgs-gpt2.hgs ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa72a226db892e0d6304fb1627abab33158608926031f5c038eebe6cc06f7a11
3
+ size 163049198
higgs_x.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.amp import GradScaler, autocast
4
+ from torch.quantization import quantize_dynamic, default_dynamic_qconfig
5
+ from loader import HGSFormat
6
+
7
+ class ALMPQOptimizer:
8
+ def __init__(self, model: nn.Module, device='cuda'):
9
+ self.model = model
10
+ self.device = device
11
+ self.layer_precisions = {}
12
+ self._analyze_layers()
13
+ self.grad_threshold_high = 1e-3
14
+ self.grad_threshold_low = 1e-5
15
+ self.precision_levels = ['int8', 'bfloat16', 'float16']
16
+
17
+ def _analyze_layers(self):
18
+ """
19
+ Инициализация точности.
20
+ """
21
+ for name, module in self.model.named_modules():
22
+ if isinstance(module, (nn.Linear, nn.Conv2d)):
23
+ param_count = sum(p.numel() for p in module.parameters())
24
+ if param_count > 1_000_000:
25
+ self.layer_precisions[name] = 'float16'
26
+ elif param_count > 100_000:
27
+ self.layer_precisions[name] = 'bfloat16'
28
+ else:
29
+ self.layer_precisions[name] = 'int8'
30
+ else:
31
+ self.layer_precisions[name] = 'float16'
32
+
33
+ def apply_quantization(self):
34
+ #Квантование INT8
35
+ int8_modules = []
36
+ for name, module in self.model.named_modules():
37
+ if name in self.layer_precisions and self.layer_precisions[name] == 'int8':
38
+ if isinstance(module, (nn.Linear, nn.LSTM, nn.GRU)):
39
+ int8_modules.append(name)
40
+
41
+ if int8_modules:
42
+ self.model = quantize_dynamic(self.model, {nn.Linear}, dtype=torch.qint8)
43
+ print(f"ALMPQ: Применено динамическое INT8 квантование к слоям: {int8_modules}")
44
+
45
+ print("ALMPQ: Квантование применено (FP16/BF16 через AMP, INT8)")
46
+
47
+ def adapt_during_training(self, grads):
48
+ """
49
+ Адаптивно меняет точность.
50
+ """
51
+ changed = False
52
+ for name, grad in grads.items():
53
+ if grad is None or name not in self.layer_precisions:
54
+ continue
55
+ avg_grad = grad.abs().mean().item()
56
+ current_precision = self.layer_precisions[name]
57
+ if current_precision not in self.precision_levels:
58
+ continue
59
+ idx = self.precision_levels.index(current_precision)
60
+
61
+ if avg_grad > self.grad_threshold_high and idx < len(self.precision_levels) - 1:
62
+ new_precision = self.precision_levels[idx + 1]
63
+ self.layer_precisions[name] = new_precision
64
+ changed = True
65
+ print(f"[ALMPQ] Повышаем точность слоя '{name}' до {new_precision}")
66
+ elif avg_grad < self.grad_threshold_low and idx > 0:
67
+ new_precision = self.precision_levels[idx - 1]
68
+ self.layer_precisions[name] = new_precision
69
+ changed = True
70
+ print(f"[ALMPQ]: Понижаем точность слоя '{name}' до {new_precision}")
71
+
72
+ if changed:
73
+ self.apply_quantization()
74
+
75
+
76
+ class HIGGSXAccelerator:
77
+ """
78
+ Виртуальный ускоритель HIGGS-X с поддержкой ALMPQ и загрузкой из .hgs.
79
+ """
80
+
81
+ def __init__(self, model: nn.Module, optimizer_cls, device='cuda'):
82
+ self.device = device
83
+ self.model = model.to(device)
84
+ self.optimizer = optimizer_cls(self.model.parameters())
85
+ self.scaler = GradScaler(device=device)
86
+ self.almpq = ALMPQOptimizer(self.model, device)
87
+ self._initialize_model()
88
+
89
+ def _initialize_model(self):
90
+ self.almpq.apply_quantization()
91
+ self.model.to(self.device)
92
+
93
+ def load_hgs_model(self, hgs_filepath):
94
+ """
95
+ Загружает веса из .hgs файла и применяет их к модели.
96
+ """
97
+ state_dict = HGSFormat.load_hgs(hgs_filepath, device=self.device)
98
+ self.model.load_state_dict(state_dict)
99
+ print(f"HIGGS-X: Модель загружена из {hgs_filepath}")
100
+ self.almpq.apply_quantization()
101
+
102
+ def train_step(self, data, target):
103
+ self.model.train()
104
+ self.optimizer.zero_grad()
105
+ with autocast(self.device):
106
+ output = self.model(data.to(self.device))
107
+ loss = nn.functional.cross_entropy(output, target.to(self.device))
108
+ self.scaler.scale(loss).backward()
109
+
110
+ grads = {}
111
+ for name, param in self.model.named_parameters():
112
+ if param.grad is not None:
113
+ grads[name] = param.grad.detach().cpu()
114
+
115
+ self.almpq.adapt_during_training(grads)
116
+
117
+ self.scaler.step(self.optimizer)
118
+ self.scaler.update()
119
+ return loss.item()
120
+
121
+ def infer(self, data):
122
+ self.model.eval()
123
+ with torch.no_grad(), autocast(self.device):
124
+ output = self.model(data.to(self.device))
125
+ return output
126
+
127
+ @staticmethod
128
+ def get_model_size_bytes(model: nn.Module) -> int:
129
+ total_bytes = 0
130
+ for param in model.parameters():
131
+ total_bytes += param.numel() * param.element_size()
132
+ return total_bytes
133
+
134
+ @staticmethod
135
+ def measure_load_time(load_func, *args, **kwargs) -> float:
136
+ import time
137
+ start = time.perf_counter()
138
+ load_func(*args, **kwargs)
139
+ end = time.perf_counter()
140
+ return end - start
loader.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import struct
2
+ import torch
3
+ import numpy as np
4
+
5
+ class HGSFormat:
6
+ MAGIC = b'HGS1'
7
+
8
+ @staticmethod
9
+ def quantize_tensor(tensor: torch.Tensor, bits=8):
10
+ tensor_cpu = tensor.detach().cpu()
11
+ min_val = tensor_cpu.min()
12
+ max_val = tensor_cpu.max()
13
+ scale = (max_val - min_val) / (2**bits - 1)
14
+ if scale == 0:
15
+ scale = 1e-8
16
+ q_tensor = ((tensor_cpu - min_val) / scale).round().clamp(0, 2**bits - 1).to(torch.uint8)
17
+ return q_tensor.numpy(), float(min_val), float(scale)
18
+
19
+ @staticmethod
20
+ def dequantize_tensor(q_data, min_val, scale):
21
+ return torch.tensor(q_data, dtype=torch.float32) * scale + min_val
22
+
23
+ @staticmethod
24
+ def save_hgs(model_state_dict, filepath):
25
+ layers = list(model_state_dict.items())
26
+ with open(filepath, 'wb') as f:
27
+ f.write(HGSFormat.MAGIC)
28
+ f.write(struct.pack('<I', len(layers)))
29
+
30
+ index_pos = f.tell()
31
+ index_data = []
32
+ for name, tensor in layers:
33
+ name_bytes = name.encode('utf-8')
34
+ q_data, min_val, scale = HGSFormat.quantize_tensor(tensor, bits=8)
35
+ index_data.append((name_bytes, tensor.shape, tensor.numel(), min_val, scale, 0, q_data.nbytes))
36
+ f.write(struct.pack('<I', len(name_bytes)))
37
+ f.write(name_bytes)
38
+ f.write(struct.pack('<I', len(tensor.shape)))
39
+ for dim in tensor.shape:
40
+ f.write(struct.pack('<Q', dim))
41
+ f.write(struct.pack('<f', min_val))
42
+ f.write(struct.pack('<f', scale))
43
+ f.write(struct.pack('<Q', tensor.numel()))
44
+ f.write(struct.pack('<Q', q_data.nbytes))
45
+ f.write(struct.pack('<Q', 0))
46
+
47
+ offsets = []
48
+ for _, _, _, _, _, _, size_bytes in index_data:
49
+ offsets.append(f.tell())
50
+ q_data = index_data.pop(0)[6]
51
+
52
+ @staticmethod
53
+ def save_hgs(model_state_dict, filepath):
54
+ layers = list(model_state_dict.items())
55
+ with open(filepath, 'wb') as f:
56
+ f.write(HGSFormat.MAGIC)
57
+ f.write(struct.pack('<I', len(layers)))
58
+
59
+ index_pos = f.tell()
60
+ index_data = []
61
+ quantized_datas = []
62
+ for name, tensor in layers:
63
+ name_bytes = name.encode('utf-8')
64
+ q_data, min_val, scale = HGSFormat.quantize_tensor(tensor, bits=8)
65
+ index_data.append({
66
+ 'name_bytes': name_bytes,
67
+ 'shape': tensor.shape,
68
+ 'numel': tensor.numel(),
69
+ 'min_val': min_val,
70
+ 'scale': scale,
71
+ 'q_data_len': q_data.nbytes,
72
+ 'offset': 0
73
+ })
74
+ quantized_datas.append(q_data)
75
+
76
+ for entry in index_data:
77
+ f.write(struct.pack('<I', len(entry['name_bytes'])))
78
+ f.write(entry['name_bytes'])
79
+ f.write(struct.pack('<I', len(entry['shape'])))
80
+ for dim in entry['shape']:
81
+ f.write(struct.pack('<Q', dim))
82
+ f.write(struct.pack('<f', entry['min_val']))
83
+ f.write(struct.pack('<f', entry['scale']))
84
+ f.write(struct.pack('<Q', entry['numel']))
85
+ f.write(struct.pack('<Q', entry['q_data_len']))
86
+ f.write(struct.pack('<Q', 0))
87
+
88
+ for i, q_data in enumerate(quantized_datas):
89
+ index_data[i]['offset'] = f.tell()
90
+ f.write(q_data.tobytes())
91
+ f.seek(index_pos)
92
+ for entry in index_data:
93
+ f.seek(f.tell() + 4 + len(entry['name_bytes']) + 4 + 8 * len(entry['shape']) + 4 + 4 + 8 + 8)
94
+ f.write(struct.pack('<Q', entry['offset']))
95
+
96
+ @staticmethod
97
+ def load_hgs(filepath, device='cpu'):
98
+ state_dict = {}
99
+ with open(filepath, 'rb') as f:
100
+ magic = f.read(4)
101
+ if magic != HGSFormat.MAGIC:
102
+ raise RuntimeError("Неверный формат файла")
103
+
104
+ num_layers = struct.unpack('<I', f.read(4))[0]
105
+
106
+ index = []
107
+ for _ in range(num_layers):
108
+ name_len = struct.unpack('<I', f.read(4))[0]
109
+ name = f.read(name_len).decode('utf-8')
110
+ shape_len = struct.unpack('<I', f.read(4))[0]
111
+ shape = tuple(struct.unpack('<Q', f.read(8))[0] for _ in range(shape_len))
112
+ min_val = struct.unpack('<f', f.read(4))[0]
113
+ scale = struct.unpack('<f', f.read(4))[0]
114
+ numel = struct.unpack('<Q', f.read(8))[0]
115
+ q_data_len = struct.unpack('<Q', f.read(8))[0]
116
+ offset = struct.unpack('<Q', f.read(8))[0]
117
+ index.append({
118
+ 'name': name,
119
+ 'shape': shape,
120
+ 'min_val': min_val,
121
+ 'scale': scale,
122
+ 'numel': numel,
123
+ 'q_data_len': q_data_len,
124
+ 'offset': offset
125
+ })
126
+
127
+ for entry in index:
128
+ f.seek(entry['offset'])
129
+ q_data_bytes = f.read(entry['q_data_len'])
130
+ q_data = np.frombuffer(q_data_bytes, dtype=np.uint8)
131
+ tensor = HGSFormat.dequantize_tensor(q_data, entry['min_val'], entry['scale'])
132
+ tensor = tensor.reshape(entry['shape']).to(device)
133
+ state_dict[entry['name']] = tensor
134
+
135
+ return state_dict