Training in progress, step 1000

Files changed (5) hide show

adapter_config.json CHANGED Viewed

@@ -18,7 +18,7 @@
   "loftq_config": {},
   "lora_alpha": 64,
   "lora_bias": false,
-  "lora_dropout": 0.0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
@@ -29,8 +29,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "loftq_config": {},
   "lora_alpha": 64,
   "lora_bias": false,
+  "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "v_proj",
+    "up_proj",
     "q_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04b0f13a2c6affd9364bb060c7fefc6371c514605e03ab7edc7f209655507dd3
-size 18365064

 version https://git-lfs.github.com/spec/v1
+oid sha256:93b61bcfb96c91772e8e5b41eb41f368dba655dbf54fdde2948457d4f63d42cc
+size 80792456

config.json CHANGED Viewed

@@ -283,11 +283,16 @@
   "length_penalty": 1.0,
   "llm_dim": 1024,
   "lora_alpha": 64,
-  "lora_dropout": 0.0,
   "lora_rank": 32,
   "lora_target_modules": [
     "q_proj",
-    "v_proj"
   ],
   "max_new_tokens": 128,
   "min_new_tokens": 0,
@@ -378,7 +383,7 @@
     "vocab_size": 151670
   },
   "text_model_id": "Qwen/Qwen3-0.6B",
-  "time_mask_length": 50,
   "transformers_version": "5.0.0.dev0",
   "use_cache": false,
   "use_lora": true,

   "length_penalty": 1.0,
   "llm_dim": 1024,
   "lora_alpha": 64,
+  "lora_dropout": 0.1,
   "lora_rank": 32,
   "lora_target_modules": [
     "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj"
   ],
   "max_new_tokens": 128,
   "min_new_tokens": 0,
     "vocab_size": 151670
   },
   "text_model_id": "Qwen/Qwen3-0.6B",
+  "time_mask_length": 100,
   "transformers_version": "5.0.0.dev0",
   "use_cache": false,
   "use_lora": true,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a36186bad47fadc818caba35c6ed996f863f4f17da2a91283af70bf15238f70
 size 25172384

 version https://git-lfs.github.com/spec/v1
+oid sha256:72848e0848e214362323629f760134c5450a8ec981d6e63cdde28fe7ab1be4d4
 size 25172384

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:330e4c09cde3d257e2265f96e24e2d447234d90af25450241177582c5bef7f8b
 size 5201

 version https://git-lfs.github.com/spec/v1
+oid sha256:da126f1151bd54f84aad0da286b4ef3ab3076094f0b22438b6bc8e0ef025a9c6
 size 5201