mazesmazes commited on
Commit
8d8b876
·
verified ·
1 Parent(s): 912697a

Training in progress, step 10000

Browse files
Files changed (3) hide show
  1. config.json +5 -5
  2. model.safetensors +2 -2
  3. training_args.bin +1 -1
config.json CHANGED
@@ -281,7 +281,7 @@
281
  "inference_warmup_tokens": 10,
282
  "label_smoothing": 0.0,
283
  "length_penalty": 1.0,
284
- "llm_dim": 1024,
285
  "lora_alpha": 32,
286
  "lora_dropout": 0.0,
287
  "lora_rank": 8,
@@ -321,7 +321,7 @@
321
  "router_aux_loss_coef": 0.01,
322
  "system_prompt": "",
323
  "text_config": {
324
- "_name_or_path": "Qwen/Qwen3-0.6B",
325
  "architectures": [
326
  "Qwen3ForCausalLM"
327
  ],
@@ -331,9 +331,9 @@
331
  "eos_token_id": 151645,
332
  "head_dim": 128,
333
  "hidden_act": "silu",
334
- "hidden_size": 1024,
335
  "initializer_range": 0.02,
336
- "intermediate_size": 3072,
337
  "layer_types": [
338
  "full_attention",
339
  "full_attention",
@@ -382,7 +382,7 @@
382
  "use_sliding_window": false,
383
  "vocab_size": 151670
384
  },
385
- "text_model_id": "Qwen/Qwen3-0.6B",
386
  "time_mask_length": 100,
387
  "transformers_version": "5.0.0.dev0",
388
  "use_cache": false,
 
281
  "inference_warmup_tokens": 10,
282
  "label_smoothing": 0.0,
283
  "length_penalty": 1.0,
284
+ "llm_dim": 2048,
285
  "lora_alpha": 32,
286
  "lora_dropout": 0.0,
287
  "lora_rank": 8,
 
321
  "router_aux_loss_coef": 0.01,
322
  "system_prompt": "",
323
  "text_config": {
324
+ "_name_or_path": "Qwen/Qwen3-1.7B",
325
  "architectures": [
326
  "Qwen3ForCausalLM"
327
  ],
 
331
  "eos_token_id": 151645,
332
  "head_dim": 128,
333
  "hidden_act": "silu",
334
+ "hidden_size": 2048,
335
  "initializer_range": 0.02,
336
+ "intermediate_size": 6144,
337
  "layer_types": [
338
  "full_attention",
339
  "full_attention",
 
382
  "use_sliding_window": false,
383
  "vocab_size": 151670
384
  },
385
+ "text_model_id": "Qwen/Qwen3-1.7B",
386
  "time_mask_length": 100,
387
  "transformers_version": "5.0.0.dev0",
388
  "use_cache": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d53c23dd303affd6818a84362bfde2a1d5a5df17dc8df88610bf12647f8fc252
3
- size 12587424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:768d4130a1c141e8c626ec9815cc5de075420e2970c37d7ce52763a18ca205cb
3
+ size 58732960
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18572cd996e881f07a0575e01bb0e0151fd4feb2cf38350b9c02d1ee7f580ec9
3
  size 5265
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a7b7022481c58a22f6d25ca3fc9a90f2337145b17d68d21726cddba4714e179
3
  size 5265