Update eole-config.yaml
Browse filesShow fact that huggingface datasets were used
- eole-config.yaml +9 -27
eole-config.yaml
CHANGED
|
@@ -18,42 +18,24 @@ n_sample: 0
|
|
| 18 |
|
| 19 |
data:
|
| 20 |
corpus_1:
|
| 21 |
-
path_src: train.ar
|
| 22 |
-
path_tgt: train.en
|
|
|
|
| 23 |
weight: 2
|
| 24 |
corpus_2:
|
| 25 |
-
path_src:
|
| 26 |
-
path_tgt:
|
|
|
|
| 27 |
weight: 1
|
| 28 |
corpus_3:
|
| 29 |
-
path_src:
|
| 30 |
-
path_tgt:
|
|
|
|
| 31 |
weight: 2
|
| 32 |
valid:
|
| 33 |
path_src: valid.ar
|
| 34 |
path_tgt: valid.en
|
| 35 |
|
| 36 |
-
# data:
|
| 37 |
-
# corpus_1:
|
| 38 |
-
# path_src: hf://quickmt/quickmt-train.ar-en/ar
|
| 39 |
-
# path_tgt: hf://quickmt/quickmt-train.ar-en/en
|
| 40 |
-
# path_sco: hf://quickmt/quickmt-train.ar-en/sco
|
| 41 |
-
# weight: 2
|
| 42 |
-
# corpus_2:
|
| 43 |
-
# path_src: hf://quickmt/newscrawl2024-en-backtranslated-ar/ar
|
| 44 |
-
# path_tgt: hf://quickmt/newscrawl2024-en-backtranslated-ar/en
|
| 45 |
-
# path_sco: hf://quickmt/newscrawl2024-en-backtranslated-ar/sco
|
| 46 |
-
# weight: 1
|
| 47 |
-
# corpus_3:
|
| 48 |
-
# path_src: hf://quickmt/madlad400-en-backtranslated-ar/ar
|
| 49 |
-
# path_tgt: hf://quickmt/madlad400-en-backtranslated-ar/en
|
| 50 |
-
# path_sco: hf://quickmt/madlad400-en-backtranslated-ar/sco
|
| 51 |
-
# weight: 2
|
| 52 |
-
# valid:
|
| 53 |
-
# path_src: valid.ar
|
| 54 |
-
# path_tgt: valid.en
|
| 55 |
-
|
| 56 |
-
|
| 57 |
|
| 58 |
transforms: [sentencepiece, filtertoolong]
|
| 59 |
transforms_configs:
|
|
|
|
| 18 |
|
| 19 |
data:
|
| 20 |
corpus_1:
|
| 21 |
+
path_src: hf://quickmt/quickmt-train.ar-en/ar
|
| 22 |
+
path_tgt: hf://quickmt/quickmt-train.ar-en/en
|
| 23 |
+
path_sco: hf://quickmt/quickmt-train.ar-en/sco
|
| 24 |
weight: 2
|
| 25 |
corpus_2:
|
| 26 |
+
path_src: hf://quickmt/newscrawl2024-en-backtranslated-ar/ar
|
| 27 |
+
path_tgt: hf://quickmt/newscrawl2024-en-backtranslated-ar/en
|
| 28 |
+
path_sco: hf://quickmt/newscrawl2024-en-backtranslated-ar/sco
|
| 29 |
weight: 1
|
| 30 |
corpus_3:
|
| 31 |
+
path_src: hf://quickmt/madlad400-en-backtranslated-ar/ar
|
| 32 |
+
path_tgt: hf://quickmt/madlad400-en-backtranslated-ar/en
|
| 33 |
+
path_sco: hf://quickmt/madlad400-en-backtranslated-ar/sco
|
| 34 |
weight: 2
|
| 35 |
valid:
|
| 36 |
path_src: valid.ar
|
| 37 |
path_tgt: valid.en
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
transforms: [sentencepiece, filtertoolong]
|
| 41 |
transforms_configs:
|