GphaHoa commited on
Commit
6831f8b
·
verified ·
1 Parent(s): 68c90ab

Upload 13 files

Browse files
Files changed (7) hide show
  1. README.md +18 -17
  2. model.safetensors +1 -1
  3. optimizer.pt +1 -1
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +46 -46
  7. training_args.bin +1 -1
README.md CHANGED
@@ -3,7 +3,7 @@ tags:
3
  - sentence-transformers
4
  - cross-encoder
5
  - generated_from_trainer
6
- - dataset_size:1122150
7
  - loss:BinaryCrossEntropyLoss
8
  base_model: cross-encoder/stsb-distilroberta-base
9
  pipeline_tag: text-ranking
@@ -23,13 +23,13 @@ model-index:
23
  type: reranking-dev
24
  metrics:
25
  - type: map
26
- value: 0.6701
27
  name: Map
28
  - type: mrr@50
29
- value: 0.7572
30
  name: Mrr@50
31
  - type: ndcg@50
32
- value: 0.775
33
  name: Ndcg@50
34
  ---
35
 
@@ -138,9 +138,9 @@ You can finetune this model on your own dataset.
138
 
139
  | Metric | Value |
140
  |:------------|:---------------------|
141
- | map | 0.6701 (+0.0486) |
142
- | mrr@50 | 0.7572 (+0.0196) |
143
- | **ndcg@50** | **0.7750 (+0.0495)** |
144
 
145
  <!--
146
  ## Bias, Risks and Limitations
@@ -160,13 +160,13 @@ You can finetune this model on your own dataset.
160
 
161
  #### Unnamed Dataset
162
 
163
- * Size: 1,122,150 training samples
164
  * Columns: <code>topic</code>, <code>content</code>, and <code>label</code>
165
  * Approximate statistics based on the first 1000 samples:
166
- | | topic | content | label |
167
- |:--------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:-----------------------------------------------|
168
- | type | string | string | int |
169
- | details | <ul><li>min: 42 characters</li><li>mean: 147.6 characters</li><li>max: 336 characters</li></ul> | <ul><li>min: 5 characters</li><li>mean: 148.86 characters</li><li>max: 376 characters</li></ul> | <ul><li>0: ~90.70%</li><li>1: ~9.30%</li></ul> |
170
  * Samples:
171
  | topic | content | label |
172
  |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
@@ -177,7 +177,7 @@ You can finetune this model on your own dataset.
177
  ```json
178
  {
179
  "activation_fn": "torch.nn.modules.linear.Identity",
180
- "pos_weight": 11.914752960205078
181
  }
182
  ```
183
 
@@ -188,7 +188,6 @@ You can finetune this model on your own dataset.
188
  - `per_device_train_batch_size`: 128
189
  - `per_device_eval_batch_size`: 128
190
  - `learning_rate`: 2e-05
191
- - `num_train_epochs`: 2
192
  - `warmup_ratio`: 0.1
193
  - `seed`: 12
194
  - `bf16`: True
@@ -215,7 +214,7 @@ You can finetune this model on your own dataset.
215
  - `adam_beta2`: 0.999
216
  - `adam_epsilon`: 1e-08
217
  - `max_grad_norm`: 1.0
218
- - `num_train_epochs`: 2
219
  - `max_steps`: -1
220
  - `lr_scheduler_type`: linear
221
  - `lr_scheduler_kwargs`: {}
@@ -317,8 +316,10 @@ You can finetune this model on your own dataset.
317
  ### Training Logs
318
  | Epoch | Step | Training Loss | reranking-dev_ndcg@50 |
319
  |:------:|:-----:|:-------------:|:---------------------:|
320
- | 1.0001 | 8768 | 0.5739 | 0.7669 (+0.0414) |
321
- | 1.5002 | 13152 | 0.6846 | 0.7750 (+0.0495) |
 
 
322
 
323
 
324
  ### Framework Versions
 
3
  - sentence-transformers
4
  - cross-encoder
5
  - generated_from_trainer
6
+ - dataset_size:1314940
7
  - loss:BinaryCrossEntropyLoss
8
  base_model: cross-encoder/stsb-distilroberta-base
9
  pipeline_tag: text-ranking
 
23
  type: reranking-dev
24
  metrics:
25
  - type: map
26
+ value: 0.7207
27
  name: Map
28
  - type: mrr@50
29
+ value: 0.7903
30
  name: Mrr@50
31
  - type: ndcg@50
32
+ value: 0.8072
33
  name: Ndcg@50
34
  ---
35
 
 
138
 
139
  | Metric | Value |
140
  |:------------|:---------------------|
141
+ | map | 0.7207 (+0.0992) |
142
+ | mrr@50 | 0.7903 (+0.0528) |
143
+ | **ndcg@50** | **0.8072 (+0.0817)** |
144
 
145
  <!--
146
  ## Bias, Risks and Limitations
 
160
 
161
  #### Unnamed Dataset
162
 
163
+ * Size: 1,314,940 training samples
164
  * Columns: <code>topic</code>, <code>content</code>, and <code>label</code>
165
  * Approximate statistics based on the first 1000 samples:
166
+ | | topic | content | label |
167
+ |:--------|:-------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:------------------------------------------------|
168
+ | type | string | string | int |
169
+ | details | <ul><li>min: 42 characters</li><li>mean: 158.98 characters</li><li>max: 336 characters</li></ul> | <ul><li>min: 7 characters</li><li>mean: 150.81 characters</li><li>max: 353 characters</li></ul> | <ul><li>0: ~76.30%</li><li>1: ~23.70%</li></ul> |
170
  * Samples:
171
  | topic | content | label |
172
  |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
 
177
  ```json
178
  {
179
  "activation_fn": "torch.nn.modules.linear.Identity",
180
+ "pos_weight": 3.7016043663024902
181
  }
182
  ```
183
 
 
188
  - `per_device_train_batch_size`: 128
189
  - `per_device_eval_batch_size`: 128
190
  - `learning_rate`: 2e-05
 
191
  - `warmup_ratio`: 0.1
192
  - `seed`: 12
193
  - `bf16`: True
 
214
  - `adam_beta2`: 0.999
215
  - `adam_epsilon`: 1e-08
216
  - `max_grad_norm`: 1.0
217
+ - `num_train_epochs`: 3
218
  - `max_steps`: -1
219
  - `lr_scheduler_type`: linear
220
  - `lr_scheduler_kwargs`: {}
 
316
  ### Training Logs
317
  | Epoch | Step | Training Loss | reranking-dev_ndcg@50 |
318
  |:------:|:-----:|:-------------:|:---------------------:|
319
+ | 0.0001 | 1 | 1.0174 | - |
320
+ | 0.9999 | 10272 | 0.6234 | 0.7913 (+0.0658) |
321
+ | 1.9998 | 20544 | 0.3901 | 0.8041 (+0.0786) |
322
+ | 2.9997 | 30816 | 0.2978 | 0.8072 (+0.0817) |
323
 
324
 
325
  ### Framework Versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55fcb1227953f704f05ee9c7b79e775047b794021e25a3e8ddbb78945305bef0
3
  size 328489204
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba9801b08b210d9426c107f45d8f4e5f1a12600b1008fe1240e7edc2789c2297
3
  size 328489204
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddac13df73512f6ec7dd409135468f93d79b7202904d70efb9d9824b2b9b4f27
3
  size 657041466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c46a30eb406634ad08a97771e8c19c425bc50eb1095c3ea99e57d3476635f42
3
  size 657041466
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1542eb6fb59dc9864cd057e0dd24538894542520653336503f1ebcf817ebacb8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad6885877c28968ac001ea127a0a5192a6e0348b793e71db3c573c337c34c62
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:279c12b1c3f0487a2b66dee686cc0dadaa4f214680c384a16910d2e2fd3d627d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e69362ae5a81a933052e1366ebf33c0ab751ddf0f891dce0d1f341fedec93fd
3
  size 1064
trainer_state.json CHANGED
@@ -1,87 +1,87 @@
1
  {
2
- "best_global_step": 13152,
3
- "best_metric": 0.7750120213490577,
4
- "best_model_checkpoint": "content/cross_encoder_distilroberta_base_all_data/checkpoint-13152",
5
- "epoch": 2.0,
6
- "eval_steps": 4384,
7
- "global_step": 17534,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.00011406410402646287,
14
- "grad_norm": 11.324646949768066,
15
  "learning_rate": 0.0,
16
- "loss": 1.2833,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 0.5000570320520132,
21
- "grad_norm": 8.051932334899902,
22
- "learning_rate": 1.1112801013941699e-05,
23
- "loss": 0.8934,
24
- "step": 4384
25
  },
26
  {
27
- "epoch": 0.5000570320520132,
28
  "eval_reranking-dev_base_map": 0.6214825536231217,
29
  "eval_reranking-dev_base_mrr@50": 0.7375349668670806,
30
  "eval_reranking-dev_base_ndcg@50": 0.725527756915131,
31
- "eval_reranking-dev_map": 0.6377966435473792,
32
- "eval_reranking-dev_mrr@50": 0.7335214157004677,
33
- "eval_reranking-dev_ndcg@50": 0.7532420097623872,
34
- "eval_runtime": 210.6845,
35
  "eval_samples_per_second": 0.0,
36
  "eval_steps_per_second": 0.0,
37
- "step": 4384
38
  },
39
  {
40
- "epoch": 1.0001140641040265,
41
- "grad_norm": 9.359151840209961,
42
- "learning_rate": 0.0,
43
- "loss": 0.5739,
44
- "step": 8768
45
  },
46
  {
47
- "epoch": 1.0001140641040265,
48
  "eval_reranking-dev_base_map": 0.6214825536231217,
49
  "eval_reranking-dev_base_mrr@50": 0.7375349668670806,
50
  "eval_reranking-dev_base_ndcg@50": 0.725527756915131,
51
- "eval_reranking-dev_map": 0.658237243359361,
52
- "eval_reranking-dev_mrr@50": 0.7475440647504192,
53
- "eval_reranking-dev_ndcg@50": 0.7669254328128408,
54
- "eval_runtime": 213.929,
55
  "eval_samples_per_second": 0.0,
56
  "eval_steps_per_second": 0.0,
57
- "step": 8768
58
  },
59
  {
60
- "epoch": 1.5001710961560397,
61
- "grad_norm": 12.708351135253906,
62
- "learning_rate": 5.555133079847909e-06,
63
- "loss": 0.6846,
64
- "step": 13152
65
  },
66
  {
67
- "epoch": 1.5001710961560397,
68
  "eval_reranking-dev_base_map": 0.6214825536231217,
69
  "eval_reranking-dev_base_mrr@50": 0.7375349668670806,
70
  "eval_reranking-dev_base_ndcg@50": 0.725527756915131,
71
- "eval_reranking-dev_map": 0.6701267463119136,
72
- "eval_reranking-dev_mrr@50": 0.7571781873839967,
73
- "eval_reranking-dev_ndcg@50": 0.7750120213490577,
74
- "eval_runtime": 212.3233,
75
  "eval_samples_per_second": 0.0,
76
  "eval_steps_per_second": 0.0,
77
- "step": 13152
78
  }
79
  ],
80
- "logging_steps": 4384,
81
- "max_steps": 17534,
82
  "num_input_tokens_seen": 0,
83
- "num_train_epochs": 2,
84
- "save_steps": 4384,
85
  "stateful_callbacks": {
86
  "EarlyStoppingCallback": {
87
  "args": {
 
1
  {
2
+ "best_global_step": 30816,
3
+ "best_metric": 0.807234452615663,
4
+ "best_model_checkpoint": "content/cross_encoder_distilroberta_base_all_data/checkpoint-30816",
5
+ "epoch": 3.0,
6
+ "eval_steps": 10272,
7
+ "global_step": 30819,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 9.734254842791784e-05,
14
+ "grad_norm": 5.287121772766113,
15
  "learning_rate": 0.0,
16
+ "loss": 1.0174,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 0.999902657451572,
21
+ "grad_norm": 11.268630981445312,
22
+ "learning_rate": 1.4816310343584383e-05,
23
+ "loss": 0.6234,
24
+ "step": 10272
25
  },
26
  {
27
+ "epoch": 0.999902657451572,
28
  "eval_reranking-dev_base_map": 0.6214825536231217,
29
  "eval_reranking-dev_base_mrr@50": 0.7375349668670806,
30
  "eval_reranking-dev_base_ndcg@50": 0.725527756915131,
31
+ "eval_reranking-dev_map": 0.6945674925725406,
32
+ "eval_reranking-dev_mrr@50": 0.7747076964185934,
33
+ "eval_reranking-dev_ndcg@50": 0.7913486180328326,
34
+ "eval_runtime": 210.3713,
35
  "eval_samples_per_second": 0.0,
36
  "eval_steps_per_second": 0.0,
37
+ "step": 10272
38
  },
39
  {
40
+ "epoch": 1.9998053149031443,
41
+ "grad_norm": 5.99316930770874,
42
+ "learning_rate": 7.4095972888199885e-06,
43
+ "loss": 0.3901,
44
+ "step": 20544
45
  },
46
  {
47
+ "epoch": 1.9998053149031443,
48
  "eval_reranking-dev_base_map": 0.6214825536231217,
49
  "eval_reranking-dev_base_mrr@50": 0.7375349668670806,
50
  "eval_reranking-dev_base_ndcg@50": 0.725527756915131,
51
+ "eval_reranking-dev_map": 0.7153961592162474,
52
+ "eval_reranking-dev_mrr@50": 0.7867533806941779,
53
+ "eval_reranking-dev_ndcg@50": 0.8041280710955544,
54
+ "eval_runtime": 206.6571,
55
  "eval_samples_per_second": 0.0,
56
  "eval_steps_per_second": 0.0,
57
+ "step": 20544
58
  },
59
  {
60
+ "epoch": 2.999707972354716,
61
+ "grad_norm": 9.407133102416992,
62
+ "learning_rate": 2.8842340555936117e-09,
63
+ "loss": 0.2978,
64
+ "step": 30816
65
  },
66
  {
67
+ "epoch": 2.999707972354716,
68
  "eval_reranking-dev_base_map": 0.6214825536231217,
69
  "eval_reranking-dev_base_mrr@50": 0.7375349668670806,
70
  "eval_reranking-dev_base_ndcg@50": 0.725527756915131,
71
+ "eval_reranking-dev_map": 0.7207063760806678,
72
+ "eval_reranking-dev_mrr@50": 0.7903183680827569,
73
+ "eval_reranking-dev_ndcg@50": 0.807234452615663,
74
+ "eval_runtime": 204.5125,
75
  "eval_samples_per_second": 0.0,
76
  "eval_steps_per_second": 0.0,
77
+ "step": 30816
78
  }
79
  ],
80
+ "logging_steps": 10272,
81
+ "max_steps": 30819,
82
  "num_input_tokens_seen": 0,
83
+ "num_train_epochs": 3,
84
+ "save_steps": 10272,
85
  "stateful_callbacks": {
86
  "EarlyStoppingCallback": {
87
  "args": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e578021e182d911578884f2a78658440e44dd70363c563d6a765c69118df4dfd
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5197c8dfe171c31df88574dd24d5abfd88e1a5df6f32235b6164cb73890fa7
3
  size 5624