diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/config.json b/qwen3_0.6b+llam3.2_1b_Fuser/config.json new file mode 100755 index 0000000000000000000000000000000000000000..fa8107c1f0f0ce060b9a6f2a2df66732e93f8e49 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/config.json @@ -0,0 +1,58 @@ +{ + "model": { + "base_model": "Qwen/Qwen3-0.6B", + "teacher_model": "meta-llama/Llama-3.2-1B-Instruct", + "include_response": false, + "is_do_alignment": true, + "alignment_strategy": "longest", + "projector": { + "type": "C2CProjector", + "params": { + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929 + } + }, + "mapping": "last_aligned" + }, + "training": { + "learning_rate": 1e-4, + "weight_decay": 0.01, + "num_epochs": 1, + "max_length": 2048, + "device": "cuda", + "scheduler_type": "linear", + "warmup_ratio": 0.1, + "max_grad_norm": 1.0, + "gradient_accumulation_steps": 8, + "per_device_train_batch_size": 4, + "num_processes": 8, + "freeze": ["teacher","base"], + "seed": 42 + }, + "output": { + "output_dir": "local/checkpoints/0.6+llama3.2-1B-Instruct_general_fixed", + "save_steps": 50, + "eval_steps": 100, + "wandb_config": { + "project": "Rosetta", + "mode": "online", + "entity": "nics-efc", + "run_name": "0.6B+llama3.2-1B-Instruct_general_OpenHermes_500k_fixed" + } + }, + "data": { + "type": "OpenHermesChatDataset", + "kwargs": { + "split": "train", + "max_word_count": 2048, + "num_samples": 500000, + "min_conversation_turns": 0 + }, + "train_ratio": 0.99 + } +} diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/aggregator_config.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/aggregator_config.json new file mode 100755 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/aggregator_config.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.pt new file mode 100755 index 0000000000000000000000000000000000000000..829a6b2ad56ccc13ce43325dff608092b432cdc9 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b84cf9a10360852217fe367e8248bf9791eee261219f52484fc5477d222408 +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.pt new file mode 100755 index 0000000000000000000000000000000000000000..8f09375d36adb084f7931fe9363020d2916d484e --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c15d5f248f8d22bb48d1f48c9ffe7b113037e7dc34c48ff3de436a0c2930c31 +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.pt new file mode 100755 index 0000000000000000000000000000000000000000..50767e1a5399e05c269c3f1a7ccb9cce5bd93d5c --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:960672ea2eb83a231126fc6b5f61d6a5330c00e1f8420a077bd46430171cd412 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.pt new file mode 100755 index 0000000000000000000000000000000000000000..5028ea13f1ccbf5a25405cb0336dcfe9ef22dfeb --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae84619ea1c415aeaa3114c0ce0e0c1b9a96f96f6f1e5eb216c5665806b4085a +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.pt new file mode 100755 index 0000000000000000000000000000000000000000..007d7d5c551509ff670c63ad2e64ab4739bc0f75 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:324f5e9a3035c91f429e016f92332643d6cd7c25606f72d06e19ea58f4abdc7e +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.pt new file mode 100755 index 0000000000000000000000000000000000000000..45220079c30b1fe32aa538a57df8a3c167e507f6 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca0b3e7cba3505885ef18c5e398ee6fd8eef213eb026694a24de3d13f604c92e +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.pt new file mode 100755 index 0000000000000000000000000000000000000000..dd3f3aff5d5471de012148781ed4db6eddcff6c4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c6a85af62cfb9a7db925cf6a46f1c5a67f633e7bd18512c62d040ab05beb54 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.pt new file mode 100755 index 0000000000000000000000000000000000000000..6960034d24f71157e3217411c9c5ad0bcfeadc84 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b0fa8172377342dd7ac1afa7dcab3ed98ab8a23899b7687a916b99169ffe7af +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.pt new file mode 100755 index 0000000000000000000000000000000000000000..6cad08af315e52c13ee7781ba2bb552b482dd6bc --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ecbf2f970a1a68e2f3ef21d4cdbaa75e0fcef029e48aab02e4e6edbb2a28f8 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.pt new file mode 100755 index 0000000000000000000000000000000000000000..8077a68effdbc466f266cb4d2bff96451a5e435f --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e965bb4920c9248bc0a7fc0bb46c340c0da7ee9b37110adac85d4f974e7ecde +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.pt new file mode 100755 index 0000000000000000000000000000000000000000..61b006701493e036211e2dc2850ba600192b38c6 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7702faa13547e2e405cb4f622c67e8ce62c091739c6030b963b3395d732bb663 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.pt new file mode 100755 index 0000000000000000000000000000000000000000..dd513bae789e6f0c734d44dc0ceb7bcc52574776 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:717569636c5d5e58efb55004c0d06dd3b94d548778dc328d1d2f4d26b78bafbf +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.pt new file mode 100755 index 0000000000000000000000000000000000000000..f0229e082eafd244ab84ffc09ebb98d75e77b11e --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc2a14039f47670a23abf39ca7afacb0578acf8dc87a7b4109152e926e4ffcf +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.pt new file mode 100755 index 0000000000000000000000000000000000000000..1584ebca0f7bbf345e87e1133388827f3836f3cf --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce76f68a19f64c90061aa245e3c5049526b7f4cfb11c0ee0fc78755ee32e120 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.pt new file mode 100755 index 0000000000000000000000000000000000000000..3494eb6863080eadbb819de27e5b4204f28e6220 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2165aece78182eda4885fc91b5fb4137cc02e31b1eb515113cb1558835141e +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.pt new file mode 100755 index 0000000000000000000000000000000000000000..1a7276a85836d4a4ef198d2340d1b58ec76bde20 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091e0b61eeef5c52dc0e2602bc609d054062a8814f3f74914547d4663ab28435 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.pt new file mode 100755 index 0000000000000000000000000000000000000000..becd01c0c86733bb9c22847c173f1dc0c7a2b9df --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e69a48162fde32a85786f9707023cf7140007d17243df9f5fbf3ac7e8af2e0 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.pt new file mode 100755 index 0000000000000000000000000000000000000000..292f498abb645c8504fb27d562475d6c4c172414 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beaf18b4595bf5aa0c5b34b8064098afd441f36eb37b344b2e6f55523b833427 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.pt new file mode 100755 index 0000000000000000000000000000000000000000..bf4e81879d9879ecdb425ec59dd9aa2f2d51fcef --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8e6844862a8201b2a1df359b3bfaa4277db0b6dc479b2958e6e7f5b01c42bd +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.pt new file mode 100755 index 0000000000000000000000000000000000000000..a816baa5f8f0f9a88509845f5268266ac5c9357d --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76891d9bf007e08bfdab6a9043cd03b00fbc49cf20e0c0ca6744b63c069cf850 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.pt new file mode 100755 index 0000000000000000000000000000000000000000..a85e229d4a4af5ff4856fb6bf4fae1f1125c6793 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0166e78a5cc5d943ff244ce2687d9c38b47a5a735db6c1a208309482b019a854 +size 35718124 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.pt new file mode 100755 index 0000000000000000000000000000000000000000..2b14dba3a707886ad30faa376fd068924c844d23 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7dc733c67b74d24a10a4106b75e7bcb17815481ebcd7f99f67650878e79d312 +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.pt new file mode 100755 index 0000000000000000000000000000000000000000..0c37e7d98ed2af2346f7b1c90c19691ad06bb557 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8037337fa3c7c58a42b660a5154e4b10bfa3dbbc84cbe1bdb721674d824686cb +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.pt new file mode 100755 index 0000000000000000000000000000000000000000..53da1a3e3ef9f9709c3ae8f225b00c7c0942aa9b --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05e36601f5cbfbde2ec1563a8b2f5cd9e4897b7f4fdacbe11628d13e707d661b +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_6.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_6.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_6.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_6.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_6.pt new file mode 100755 index 0000000000000000000000000000000000000000..833e6d0dc487f3ff48a8ac8cacb11e7c8575bc60 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17285bcb352491a5f1d6339e9f6d6f51c3f6fa5597626ea62b23f6f6f3ea9518 +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_7.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_7.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_7.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_7.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_7.pt new file mode 100755 index 0000000000000000000000000000000000000000..e19d1686f2c984235284f40137ebcdb8bbbbb71f --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066731386adda52bbc1654a06f22a8e8262f13308bcd592ad1abccfd574c7c52 +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_8.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_8.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_8.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_8.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_8.pt new file mode 100755 index 0000000000000000000000000000000000000000..a203e389c6a0cfc2c1cf1dd5a89af3bac1e73c0b --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:687b334192afe1607338c28014ef75df2a6ce8f468fbe7a8ff125ccedbba48e6 +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_9.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_9.json new file mode 100755 index 0000000000000000000000000000000000000000..b91a9d89770577a41a6ee49a18f7ef2e496d4ec4 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_9.json @@ -0,0 +1,20 @@ +{ + "class": "C2CProjector", + "init_args": { + "source_dim": 64, + "target_dim": 128, + "source_num_heads": 8, + "target_num_heads": 8, + "hidden_dim": 1024, + "intermediate_dim": 1024, + "num_layers": 3, + "dropout": 0.1, + "initial_temperature": 1.0, + "final_temperature": 0.001, + "anneal_steps": 1929, + "dtype": { + "__type__": "torch.dtype", + "value": "bfloat16" + } + } +} \ No newline at end of file diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_9.pt b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_9.pt new file mode 100755 index 0000000000000000000000000000000000000000..eea5a53955fbdb36bc71941c1c46eb34156fa1df --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c98eb2e4b5c57922e45a77d597a606f6881ed22620f6103aca19205aa672ea +size 35718087 diff --git a/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_config.json b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_config.json new file mode 100755 index 0000000000000000000000000000000000000000..bb9395ad08e2755560004319cb4f322ddb2b3a64 --- /dev/null +++ b/qwen3_0.6b+llam3.2_1b_Fuser/final/projector_config.json @@ -0,0 +1 @@ +{"0": {"1": {"0": [[0, 0]], "1": [[0, 1]], "2": [[0, 2]], "3": [[0, 3]], "4": [[0, 4]], "5": [[0, 5]], "6": [[0, 6]], "7": [[0, 7]], "8": [[0, 8]], "9": [[0, 9]], "10": [[0, 10]], "11": [[0, 11]], "12": [[0, 12]], "13": [[1, 13]], "14": [[2, 14]], "15": [[3, 15]], "16": [[4, 16]], "17": [[5, 17]], "18": [[6, 18]], "19": [[7, 19]], "20": [[8, 20]], "21": [[9, 21]], "22": [[10, 22]], "23": [[11, 23]], "24": [[12, 24]], "25": [[13, 25]], "26": [[14, 26]], "27": [[15, 27]]}}} \ No newline at end of file