Zihan Min commited on
Commit
1e55ac0
·
1 Parent(s): 8704f55

upload 0.6+llama fuser

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. qwen3_0.6b+llam3.2_1b_Fuser/config.json +58 -0
  2. qwen3_0.6b+llam3.2_1b_Fuser/final/aggregator_config.json +1 -0
  3. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.json +20 -0
  4. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.pt +3 -0
  5. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.json +20 -0
  6. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.pt +3 -0
  7. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.json +20 -0
  8. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.pt +3 -0
  9. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.json +20 -0
  10. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.pt +3 -0
  11. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.json +20 -0
  12. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.pt +3 -0
  13. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.json +20 -0
  14. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.pt +3 -0
  15. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.json +20 -0
  16. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.pt +3 -0
  17. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.json +20 -0
  18. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.pt +3 -0
  19. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.json +20 -0
  20. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.pt +3 -0
  21. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.json +20 -0
  22. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.pt +3 -0
  23. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.json +20 -0
  24. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.pt +3 -0
  25. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.json +20 -0
  26. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.pt +3 -0
  27. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.json +20 -0
  28. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.pt +3 -0
  29. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.json +20 -0
  30. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.pt +3 -0
  31. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.json +20 -0
  32. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.pt +3 -0
  33. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.json +20 -0
  34. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.pt +3 -0
  35. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.json +20 -0
  36. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.pt +3 -0
  37. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.json +20 -0
  38. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.pt +3 -0
  39. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.json +20 -0
  40. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.pt +3 -0
  41. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.json +20 -0
  42. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.pt +3 -0
  43. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.json +20 -0
  44. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.pt +3 -0
  45. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.json +20 -0
  46. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.pt +3 -0
  47. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.json +20 -0
  48. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.pt +3 -0
  49. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.json +20 -0
  50. qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.pt +3 -0
qwen3_0.6b+llam3.2_1b_Fuser/config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "base_model": "Qwen/Qwen3-0.6B",
4
+ "teacher_model": "meta-llama/Llama-3.2-1B-Instruct",
5
+ "include_response": false,
6
+ "is_do_alignment": true,
7
+ "alignment_strategy": "longest",
8
+ "projector": {
9
+ "type": "C2CProjector",
10
+ "params": {
11
+ "hidden_dim": 1024,
12
+ "intermediate_dim": 1024,
13
+ "num_layers": 3,
14
+ "dropout": 0.1,
15
+ "initial_temperature": 1.0,
16
+ "final_temperature": 0.001,
17
+ "anneal_steps": 1929
18
+ }
19
+ },
20
+ "mapping": "last_aligned"
21
+ },
22
+ "training": {
23
+ "learning_rate": 1e-4,
24
+ "weight_decay": 0.01,
25
+ "num_epochs": 1,
26
+ "max_length": 2048,
27
+ "device": "cuda",
28
+ "scheduler_type": "linear",
29
+ "warmup_ratio": 0.1,
30
+ "max_grad_norm": 1.0,
31
+ "gradient_accumulation_steps": 8,
32
+ "per_device_train_batch_size": 4,
33
+ "num_processes": 8,
34
+ "freeze": ["teacher","base"],
35
+ "seed": 42
36
+ },
37
+ "output": {
38
+ "output_dir": "local/checkpoints/0.6+llama3.2-1B-Instruct_general_fixed",
39
+ "save_steps": 50,
40
+ "eval_steps": 100,
41
+ "wandb_config": {
42
+ "project": "Rosetta",
43
+ "mode": "online",
44
+ "entity": "nics-efc",
45
+ "run_name": "0.6B+llama3.2-1B-Instruct_general_OpenHermes_500k_fixed"
46
+ }
47
+ },
48
+ "data": {
49
+ "type": "OpenHermesChatDataset",
50
+ "kwargs": {
51
+ "split": "train",
52
+ "max_word_count": 2048,
53
+ "num_samples": 500000,
54
+ "min_conversation_turns": 0
55
+ },
56
+ "train_ratio": 0.99
57
+ }
58
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/aggregator_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6b84cf9a10360852217fe367e8248bf9791eee261219f52484fc5477d222408
3
+ size 35718087
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c15d5f248f8d22bb48d1f48c9ffe7b113037e7dc34c48ff3de436a0c2930c31
3
+ size 35718087
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:960672ea2eb83a231126fc6b5f61d6a5330c00e1f8420a077bd46430171cd412
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae84619ea1c415aeaa3114c0ce0e0c1b9a96f96f6f1e5eb216c5665806b4085a
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:324f5e9a3035c91f429e016f92332643d6cd7c25606f72d06e19ea58f4abdc7e
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0b3e7cba3505885ef18c5e398ee6fd8eef213eb026694a24de3d13f604c92e
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c6a85af62cfb9a7db925cf6a46f1c5a67f633e7bd18512c62d040ab05beb54
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b0fa8172377342dd7ac1afa7dcab3ed98ab8a23899b7687a916b99169ffe7af
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86ecbf2f970a1a68e2f3ef21d4cdbaa75e0fcef029e48aab02e4e6edbb2a28f8
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e965bb4920c9248bc0a7fc0bb46c340c0da7ee9b37110adac85d4f974e7ecde
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7702faa13547e2e405cb4f622c67e8ce62c091739c6030b963b3395d732bb663
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:717569636c5d5e58efb55004c0d06dd3b94d548778dc328d1d2f4d26b78bafbf
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc2a14039f47670a23abf39ca7afacb0578acf8dc87a7b4109152e926e4ffcf
3
+ size 35718087
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce76f68a19f64c90061aa245e3c5049526b7f4cfb11c0ee0fc78755ee32e120
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab2165aece78182eda4885fc91b5fb4137cc02e31b1eb515113cb1558835141e
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091e0b61eeef5c52dc0e2602bc609d054062a8814f3f74914547d4663ab28435
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e69a48162fde32a85786f9707023cf7140007d17243df9f5fbf3ac7e8af2e0
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beaf18b4595bf5aa0c5b34b8064098afd441f36eb37b344b2e6f55523b833427
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8e6844862a8201b2a1df359b3bfaa4277db0b6dc479b2958e6e7f5b01c42bd
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76891d9bf007e08bfdab6a9043cd03b00fbc49cf20e0c0ca6744b63c069cf850
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0166e78a5cc5d943ff244ce2687d9c38b47a5a735db6c1a208309482b019a854
3
+ size 35718124
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7dc733c67b74d24a10a4106b75e7bcb17815481ebcd7f99f67650878e79d312
3
+ size 35718087
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8037337fa3c7c58a42b660a5154e4b10bfa3dbbc84cbe1bdb721674d824686cb
3
+ size 35718087
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "C2CProjector",
3
+ "init_args": {
4
+ "source_dim": 64,
5
+ "target_dim": 128,
6
+ "source_num_heads": 8,
7
+ "target_num_heads": 8,
8
+ "hidden_dim": 1024,
9
+ "intermediate_dim": 1024,
10
+ "num_layers": 3,
11
+ "dropout": 0.1,
12
+ "initial_temperature": 1.0,
13
+ "final_temperature": 0.001,
14
+ "anneal_steps": 1929,
15
+ "dtype": {
16
+ "__type__": "torch.dtype",
17
+ "value": "bfloat16"
18
+ }
19
+ }
20
+ }
qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e36601f5cbfbde2ec1563a8b2f5cd9e4897b7f4fdacbe11628d13e707d661b
3
+ size 35718087