Translation
Arabic
English
Eval Results
radinplaid commited on
Commit
6464fd9
·
verified ·
1 Parent(s): 6c17a1e

Update eole-config.yaml

Browse files

Show fact that huggingface datasets were used

Files changed (1) hide show
  1. eole-config.yaml +9 -27
eole-config.yaml CHANGED
@@ -18,42 +18,24 @@ n_sample: 0
18
 
19
  data:
20
  corpus_1:
21
- path_src: train.ar
22
- path_tgt: train.en
 
23
  weight: 2
24
  corpus_2:
25
- path_src: newscrawl.backtrans.ar
26
- path_tgt: newscrawl.2024.en
 
27
  weight: 1
28
  corpus_3:
29
- path_src: madlad.backtrans.ar
30
- path_tgt: madlad.en
 
31
  weight: 2
32
  valid:
33
  path_src: valid.ar
34
  path_tgt: valid.en
35
 
36
- # data:
37
- # corpus_1:
38
- # path_src: hf://quickmt/quickmt-train.ar-en/ar
39
- # path_tgt: hf://quickmt/quickmt-train.ar-en/en
40
- # path_sco: hf://quickmt/quickmt-train.ar-en/sco
41
- # weight: 2
42
- # corpus_2:
43
- # path_src: hf://quickmt/newscrawl2024-en-backtranslated-ar/ar
44
- # path_tgt: hf://quickmt/newscrawl2024-en-backtranslated-ar/en
45
- # path_sco: hf://quickmt/newscrawl2024-en-backtranslated-ar/sco
46
- # weight: 1
47
- # corpus_3:
48
- # path_src: hf://quickmt/madlad400-en-backtranslated-ar/ar
49
- # path_tgt: hf://quickmt/madlad400-en-backtranslated-ar/en
50
- # path_sco: hf://quickmt/madlad400-en-backtranslated-ar/sco
51
- # weight: 2
52
- # valid:
53
- # path_src: valid.ar
54
- # path_tgt: valid.en
55
-
56
-
57
 
58
  transforms: [sentencepiece, filtertoolong]
59
  transforms_configs:
 
18
 
19
  data:
20
  corpus_1:
21
+ path_src: hf://quickmt/quickmt-train.ar-en/ar
22
+ path_tgt: hf://quickmt/quickmt-train.ar-en/en
23
+ path_sco: hf://quickmt/quickmt-train.ar-en/sco
24
  weight: 2
25
  corpus_2:
26
+ path_src: hf://quickmt/newscrawl2024-en-backtranslated-ar/ar
27
+ path_tgt: hf://quickmt/newscrawl2024-en-backtranslated-ar/en
28
+ path_sco: hf://quickmt/newscrawl2024-en-backtranslated-ar/sco
29
  weight: 1
30
  corpus_3:
31
+ path_src: hf://quickmt/madlad400-en-backtranslated-ar/ar
32
+ path_tgt: hf://quickmt/madlad400-en-backtranslated-ar/en
33
+ path_sco: hf://quickmt/madlad400-en-backtranslated-ar/sco
34
  weight: 2
35
  valid:
36
  path_src: valid.ar
37
  path_tgt: valid.en
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  transforms: [sentencepiece, filtertoolong]
41
  transforms_configs: