Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
higgs-gpt2.hgs +3 -0
higgs_x.py +140 -0
loader.py +135 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+higgs-gpt2.hgs filter=lfs diff=lfs merge=lfs -text

higgs-gpt2.hgs ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa72a226db892e0d6304fb1627abab33158608926031f5c038eebe6cc06f7a11
+size 163049198

higgs_x.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import torch
+import torch.nn as nn
+from torch.amp import GradScaler, autocast
+from torch.quantization import quantize_dynamic, default_dynamic_qconfig
+from loader import HGSFormat
+class ALMPQOptimizer:
+    def __init__(self, model: nn.Module, device='cuda'):
+        self.model = model
+        self.device = device
+        self.layer_precisions = {}
+        self._analyze_layers()
+        self.grad_threshold_high = 1e-3
+        self.grad_threshold_low = 1e-5
+        self.precision_levels = ['int8', 'bfloat16', 'float16']
+    def _analyze_layers(self):
+        """
+        Инициализация точности.
+        """
+        for name, module in self.model.named_modules():
+            if isinstance(module, (nn.Linear, nn.Conv2d)):
+                param_count = sum(p.numel() for p in module.parameters())
+                if param_count > 1_000_000:
+                    self.layer_precisions[name] = 'float16'
+                elif param_count > 100_000:
+                    self.layer_precisions[name] = 'bfloat16'
+                else:
+                    self.layer_precisions[name] = 'int8'
+            else:
+                self.layer_precisions[name] = 'float16'
+    def apply_quantization(self):
+        #Квантование INT8
+        int8_modules = []
+        for name, module in self.model.named_modules():
+            if name in self.layer_precisions and self.layer_precisions[name] == 'int8':
+                if isinstance(module, (nn.Linear, nn.LSTM, nn.GRU)):
+                    int8_modules.append(name)
+        if int8_modules:
+            self.model = quantize_dynamic(self.model, {nn.Linear}, dtype=torch.qint8)
+            print(f"ALMPQ: Применено динамическое INT8 квантование к слоям: {int8_modules}")
+        print("ALMPQ: Квантование применено (FP16/BF16 через AMP, INT8)")
+    def adapt_during_training(self, grads):
+        """
+        Адаптивно меняет точность.
+        """
+        changed = False
+        for name, grad in grads.items():
+            if grad is None or name not in self.layer_precisions:
+                continue
+            avg_grad = grad.abs().mean().item()
+            current_precision = self.layer_precisions[name]
+            if current_precision not in self.precision_levels:
+                continue
+            idx = self.precision_levels.index(current_precision)
+            if avg_grad > self.grad_threshold_high and idx < len(self.precision_levels) - 1:
+                new_precision = self.precision_levels[idx + 1]
+                self.layer_precisions[name] = new_precision
+                changed = True
+                print(f"[ALMPQ] Повышаем точность слоя '{name}' до {new_precision}")
+            elif avg_grad < self.grad_threshold_low and idx > 0:
+                new_precision = self.precision_levels[idx - 1]
+                self.layer_precisions[name] = new_precision
+                changed = True
+                print(f"[ALMPQ]: Понижаем точность слоя '{name}' до {new_precision}")
+        if changed:
+            self.apply_quantization()
+class HIGGSXAccelerator:
+    """
+    Виртуальный ускоритель HIGGS-X с поддержкой ALMPQ и загрузкой из .hgs.
+    """
+    def __init__(self, model: nn.Module, optimizer_cls, device='cuda'):
+        self.device = device
+        self.model = model.to(device)
+        self.optimizer = optimizer_cls(self.model.parameters())
+        self.scaler = GradScaler(device=device)
+        self.almpq = ALMPQOptimizer(self.model, device)
+        self._initialize_model()
+    def _initialize_model(self):
+        self.almpq.apply_quantization()
+        self.model.to(self.device)
+    def load_hgs_model(self, hgs_filepath):
+        """
+        Загружает веса из .hgs файла и применяет их к модели.
+        """
+        state_dict = HGSFormat.load_hgs(hgs_filepath, device=self.device)
+        self.model.load_state_dict(state_dict)
+        print(f"HIGGS-X: Модель загружена из {hgs_filepath}")
+        self.almpq.apply_quantization()
+    def train_step(self, data, target):
+        self.model.train()
+        self.optimizer.zero_grad()
+        with autocast(self.device):
+            output = self.model(data.to(self.device))
+            loss = nn.functional.cross_entropy(output, target.to(self.device))
+        self.scaler.scale(loss).backward()
+        grads = {}
+        for name, param in self.model.named_parameters():
+            if param.grad is not None:
+                grads[name] = param.grad.detach().cpu()
+        self.almpq.adapt_during_training(grads)
+        self.scaler.step(self.optimizer)
+        self.scaler.update()
+        return loss.item()
+    def infer(self, data):
+        self.model.eval()
+        with torch.no_grad(), autocast(self.device):
+            output = self.model(data.to(self.device))
+        return output
+    @staticmethod
+    def get_model_size_bytes(model: nn.Module) -> int:
+        total_bytes = 0
+        for param in model.parameters():
+            total_bytes += param.numel() * param.element_size()
+        return total_bytes
+    @staticmethod
+    def measure_load_time(load_func, *args, **kwargs) -> float:
+        import time
+        start = time.perf_counter()
+        load_func(*args, **kwargs)
+        end = time.perf_counter()
+        return end - start

loader.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import struct
+import torch
+import numpy as np
+class HGSFormat:
+    MAGIC = b'HGS1'
+    @staticmethod
+    def quantize_tensor(tensor: torch.Tensor, bits=8):
+        tensor_cpu = tensor.detach().cpu()
+        min_val = tensor_cpu.min()
+        max_val = tensor_cpu.max()
+        scale = (max_val - min_val) / (2**bits - 1)
+        if scale == 0:
+            scale = 1e-8
+        q_tensor = ((tensor_cpu - min_val) / scale).round().clamp(0, 2**bits - 1).to(torch.uint8)
+        return q_tensor.numpy(), float(min_val), float(scale)
+    @staticmethod
+    def dequantize_tensor(q_data, min_val, scale):
+        return torch.tensor(q_data, dtype=torch.float32) * scale + min_val
+    @staticmethod
+    def save_hgs(model_state_dict, filepath):
+        layers = list(model_state_dict.items())
+        with open(filepath, 'wb') as f:
+            f.write(HGSFormat.MAGIC)
+            f.write(struct.pack('<I', len(layers)))
+            index_pos = f.tell()
+            index_data = []
+            for name, tensor in layers:
+                name_bytes = name.encode('utf-8')
+                q_data, min_val, scale = HGSFormat.quantize_tensor(tensor, bits=8)
+                index_data.append((name_bytes, tensor.shape, tensor.numel(), min_val, scale, 0, q_data.nbytes))
+                f.write(struct.pack('<I', len(name_bytes)))
+                f.write(name_bytes)
+                f.write(struct.pack('<I', len(tensor.shape)))
+                for dim in tensor.shape:
+                    f.write(struct.pack('<Q', dim))
+                f.write(struct.pack('<f', min_val))
+                f.write(struct.pack('<f', scale))
+                f.write(struct.pack('<Q', tensor.numel()))
+                f.write(struct.pack('<Q', q_data.nbytes))
+                f.write(struct.pack('<Q', 0))
+            offsets = []
+            for _, _, _, _, _, _, size_bytes in index_data:
+                offsets.append(f.tell())
+                q_data = index_data.pop(0)[6]
+    @staticmethod
+    def save_hgs(model_state_dict, filepath):
+        layers = list(model_state_dict.items())
+        with open(filepath, 'wb') as f:
+            f.write(HGSFormat.MAGIC)
+            f.write(struct.pack('<I', len(layers)))
+            index_pos = f.tell()
+            index_data = []
+            quantized_datas = []
+            for name, tensor in layers:
+                name_bytes = name.encode('utf-8')
+                q_data, min_val, scale = HGSFormat.quantize_tensor(tensor, bits=8)
+                index_data.append({
+                    'name_bytes': name_bytes,
+                    'shape': tensor.shape,
+                    'numel': tensor.numel(),
+                    'min_val': min_val,
+                    'scale': scale,
+                    'q_data_len': q_data.nbytes,
+                    'offset': 0
+                })
+                quantized_datas.append(q_data)
+            for entry in index_data:
+                f.write(struct.pack('<I', len(entry['name_bytes'])))
+                f.write(entry['name_bytes'])
+                f.write(struct.pack('<I', len(entry['shape'])))
+                for dim in entry['shape']:
+                    f.write(struct.pack('<Q', dim))
+                f.write(struct.pack('<f', entry['min_val']))
+                f.write(struct.pack('<f', entry['scale']))
+                f.write(struct.pack('<Q', entry['numel']))
+                f.write(struct.pack('<Q', entry['q_data_len']))
+                f.write(struct.pack('<Q', 0))
+            for i, q_data in enumerate(quantized_datas):
+                index_data[i]['offset'] = f.tell()
+                f.write(q_data.tobytes())
+            f.seek(index_pos)
+            for entry in index_data:
+                f.seek(f.tell() + 4 + len(entry['name_bytes']) + 4 + 8 * len(entry['shape']) + 4 + 4 + 8 + 8)
+                f.write(struct.pack('<Q', entry['offset']))
+    @staticmethod
+    def load_hgs(filepath, device='cpu'):
+        state_dict = {}
+        with open(filepath, 'rb') as f:
+            magic = f.read(4)
+            if magic != HGSFormat.MAGIC:
+                raise RuntimeError("Неверный формат файла")
+            num_layers = struct.unpack('<I', f.read(4))[0]
+            index = []
+            for _ in range(num_layers):
+                name_len = struct.unpack('<I', f.read(4))[0]
+                name = f.read(name_len).decode('utf-8')
+                shape_len = struct.unpack('<I', f.read(4))[0]
+                shape = tuple(struct.unpack('<Q', f.read(8))[0] for _ in range(shape_len))
+                min_val = struct.unpack('<f', f.read(4))[0]
+                scale = struct.unpack('<f', f.read(4))[0]
+                numel = struct.unpack('<Q', f.read(8))[0]
+                q_data_len = struct.unpack('<Q', f.read(8))[0]
+                offset = struct.unpack('<Q', f.read(8))[0]
+                index.append({
+                    'name': name,
+                    'shape': shape,
+                    'min_val': min_val,
+                    'scale': scale,
+                    'numel': numel,
+                    'q_data_len': q_data_len,
+                    'offset': offset
+                })
+            for entry in index:
+                f.seek(entry['offset'])
+                q_data_bytes = f.read(entry['q_data_len'])
+                q_data = np.frombuffer(q_data_bytes, dtype=np.uint8)
+                tensor = HGSFormat.dequantize_tensor(q_data, entry['min_val'], entry['scale'])
+                tensor = tensor.reshape(entry['shape']).to(device)
+                state_dict[entry['name']] = tensor
+        return state_dict