Upload folder using huggingface_hub
Browse files- matrix.log +2 -0
- matrix.py +46 -0
matrix.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
正在启动 8 卡全负载占位 (显存 + 算力)...
|
| 2 |
+
按 Ctrl+C 停止
|
matrix.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.multiprocessing as mp
|
| 3 |
+
|
| 4 |
+
# 矩阵大小:保证算力满载
|
| 5 |
+
MATRIX_SIZE = 24576
|
| 6 |
+
DTYPE = torch.bfloat16
|
| 7 |
+
|
| 8 |
+
def worker(rank):
|
| 9 |
+
"""每个 GPU 的工作进程"""
|
| 10 |
+
# 1. 绑定设备
|
| 11 |
+
device = torch.device(f"cuda:{rank}")
|
| 12 |
+
torch.cuda.set_device(device)
|
| 13 |
+
|
| 14 |
+
# ==========================================
|
| 15 |
+
# 插入位置在这里!
|
| 16 |
+
# ==========================================
|
| 17 |
+
try:
|
| 18 |
+
# 6e10 bytes ≈ 60GB。
|
| 19 |
+
# H200 显存约为 141GB。如果你想占得更满,可以将 6e10 改为 1.2e11 (约120GB)
|
| 20 |
+
# 注意:dtype=torch.int8 表示每个元素占 1 byte
|
| 21 |
+
filler = torch.empty(int(1.2e11), dtype=torch.int8, device=device)
|
| 22 |
+
|
| 23 |
+
# 只要这个 filler 变量不被 del,显存就会一直被占用
|
| 24 |
+
except RuntimeError:
|
| 25 |
+
print(f"[GPU {rank}] 显存分配过大,已自动忽略显存占用,仅运行计算负载...")
|
| 26 |
+
|
| 27 |
+
# 2. 准备计算数据
|
| 28 |
+
a = torch.randn(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE)
|
| 29 |
+
b = torch.randn(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE)
|
| 30 |
+
c = torch.empty(MATRIX_SIZE, MATRIX_SIZE, device=device, dtype=DTYPE)
|
| 31 |
+
|
| 32 |
+
# 3. 无限循环计算 (算力 100%)
|
| 33 |
+
while True:
|
| 34 |
+
torch.mm(a, b, out=c)
|
| 35 |
+
|
| 36 |
+
def main():
|
| 37 |
+
world_size = torch.cuda.device_count()
|
| 38 |
+
print(f"正在启动 {world_size} 卡全负载占位 (显存 + 算力)...")
|
| 39 |
+
print("按 Ctrl+C 停止")
|
| 40 |
+
|
| 41 |
+
# 启动多进程
|
| 42 |
+
mp.spawn(worker, nprocs=world_size, join=True)
|
| 43 |
+
|
| 44 |
+
if __name__ == "__main__":
|
| 45 |
+
mp.set_start_method('spawn', force=True)
|
| 46 |
+
main()
|