Zihan Min
commited on
Commit
·
1e55ac0
1
Parent(s):
8704f55
upload 0.6+llama fuser
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- qwen3_0.6b+llam3.2_1b_Fuser/config.json +58 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/aggregator_config.json +1 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.pt +3 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.json +20 -0
- qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.pt +3 -0
qwen3_0.6b+llam3.2_1b_Fuser/config.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": {
|
| 3 |
+
"base_model": "Qwen/Qwen3-0.6B",
|
| 4 |
+
"teacher_model": "meta-llama/Llama-3.2-1B-Instruct",
|
| 5 |
+
"include_response": false,
|
| 6 |
+
"is_do_alignment": true,
|
| 7 |
+
"alignment_strategy": "longest",
|
| 8 |
+
"projector": {
|
| 9 |
+
"type": "C2CProjector",
|
| 10 |
+
"params": {
|
| 11 |
+
"hidden_dim": 1024,
|
| 12 |
+
"intermediate_dim": 1024,
|
| 13 |
+
"num_layers": 3,
|
| 14 |
+
"dropout": 0.1,
|
| 15 |
+
"initial_temperature": 1.0,
|
| 16 |
+
"final_temperature": 0.001,
|
| 17 |
+
"anneal_steps": 1929
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
"mapping": "last_aligned"
|
| 21 |
+
},
|
| 22 |
+
"training": {
|
| 23 |
+
"learning_rate": 1e-4,
|
| 24 |
+
"weight_decay": 0.01,
|
| 25 |
+
"num_epochs": 1,
|
| 26 |
+
"max_length": 2048,
|
| 27 |
+
"device": "cuda",
|
| 28 |
+
"scheduler_type": "linear",
|
| 29 |
+
"warmup_ratio": 0.1,
|
| 30 |
+
"max_grad_norm": 1.0,
|
| 31 |
+
"gradient_accumulation_steps": 8,
|
| 32 |
+
"per_device_train_batch_size": 4,
|
| 33 |
+
"num_processes": 8,
|
| 34 |
+
"freeze": ["teacher","base"],
|
| 35 |
+
"seed": 42
|
| 36 |
+
},
|
| 37 |
+
"output": {
|
| 38 |
+
"output_dir": "local/checkpoints/0.6+llama3.2-1B-Instruct_general_fixed",
|
| 39 |
+
"save_steps": 50,
|
| 40 |
+
"eval_steps": 100,
|
| 41 |
+
"wandb_config": {
|
| 42 |
+
"project": "Rosetta",
|
| 43 |
+
"mode": "online",
|
| 44 |
+
"entity": "nics-efc",
|
| 45 |
+
"run_name": "0.6B+llama3.2-1B-Instruct_general_OpenHermes_500k_fixed"
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
"data": {
|
| 49 |
+
"type": "OpenHermesChatDataset",
|
| 50 |
+
"kwargs": {
|
| 51 |
+
"split": "train",
|
| 52 |
+
"max_word_count": 2048,
|
| 53 |
+
"num_samples": 500000,
|
| 54 |
+
"min_conversation_turns": 0
|
| 55 |
+
},
|
| 56 |
+
"train_ratio": 0.99
|
| 57 |
+
}
|
| 58 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/aggregator_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6b84cf9a10360852217fe367e8248bf9791eee261219f52484fc5477d222408
|
| 3 |
+
size 35718087
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c15d5f248f8d22bb48d1f48c9ffe7b113037e7dc34c48ff3de436a0c2930c31
|
| 3 |
+
size 35718087
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:960672ea2eb83a231126fc6b5f61d6a5330c00e1f8420a077bd46430171cd412
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae84619ea1c415aeaa3114c0ce0e0c1b9a96f96f6f1e5eb216c5665806b4085a
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:324f5e9a3035c91f429e016f92332643d6cd7c25606f72d06e19ea58f4abdc7e
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca0b3e7cba3505885ef18c5e398ee6fd8eef213eb026694a24de3d13f604c92e
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84c6a85af62cfb9a7db925cf6a46f1c5a67f633e7bd18512c62d040ab05beb54
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b0fa8172377342dd7ac1afa7dcab3ed98ab8a23899b7687a916b99169ffe7af
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86ecbf2f970a1a68e2f3ef21d4cdbaa75e0fcef029e48aab02e4e6edbb2a28f8
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e965bb4920c9248bc0a7fc0bb46c340c0da7ee9b37110adac85d4f974e7ecde
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7702faa13547e2e405cb4f622c67e8ce62c091739c6030b963b3395d732bb663
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:717569636c5d5e58efb55004c0d06dd3b94d548778dc328d1d2f4d26b78bafbf
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecc2a14039f47670a23abf39ca7afacb0578acf8dc87a7b4109152e926e4ffcf
|
| 3 |
+
size 35718087
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ce76f68a19f64c90061aa245e3c5049526b7f4cfb11c0ee0fc78755ee32e120
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab2165aece78182eda4885fc91b5fb4137cc02e31b1eb515113cb1558835141e
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:091e0b61eeef5c52dc0e2602bc609d054062a8814f3f74914547d4663ab28435
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36e69a48162fde32a85786f9707023cf7140007d17243df9f5fbf3ac7e8af2e0
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:beaf18b4595bf5aa0c5b34b8064098afd441f36eb37b344b2e6f55523b833427
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb8e6844862a8201b2a1df359b3bfaa4277db0b6dc479b2958e6e7f5b01c42bd
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76891d9bf007e08bfdab6a9043cd03b00fbc49cf20e0c0ca6744b63c069cf850
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0166e78a5cc5d943ff244ce2687d9c38b47a5a735db6c1a208309482b019a854
|
| 3 |
+
size 35718124
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7dc733c67b74d24a10a4106b75e7bcb17815481ebcd7f99f67650878e79d312
|
| 3 |
+
size 35718087
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8037337fa3c7c58a42b660a5154e4b10bfa3dbbc84cbe1bdb721674d824686cb
|
| 3 |
+
size 35718087
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"class": "C2CProjector",
|
| 3 |
+
"init_args": {
|
| 4 |
+
"source_dim": 64,
|
| 5 |
+
"target_dim": 128,
|
| 6 |
+
"source_num_heads": 8,
|
| 7 |
+
"target_num_heads": 8,
|
| 8 |
+
"hidden_dim": 1024,
|
| 9 |
+
"intermediate_dim": 1024,
|
| 10 |
+
"num_layers": 3,
|
| 11 |
+
"dropout": 0.1,
|
| 12 |
+
"initial_temperature": 1.0,
|
| 13 |
+
"final_temperature": 0.001,
|
| 14 |
+
"anneal_steps": 1929,
|
| 15 |
+
"dtype": {
|
| 16 |
+
"__type__": "torch.dtype",
|
| 17 |
+
"value": "bfloat16"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05e36601f5cbfbde2ec1563a8b2f5cd9e4897b7f4fdacbe11628d13e707d661b
|
| 3 |
+
size 35718087
|