saanikat commited on
Commit ·
8943500
1
Parent(s): 787b39b
config files
Browse files- README.md +4 -0
- bedbase/config_bedbase.yaml +6 -0
- encode/config_encode.yaml +6 -0
- fairtracks/config_fairtracks.yaml +6 -0
README.md
CHANGED
|
@@ -11,16 +11,19 @@ This repository hosts three pre-trained models desgined for metadata attribute s
|
|
| 11 |
- label_encoder_bedbase.pkl # Unqiue label values derived from training data, model classifies the output into these labels for BEDBASE schema
|
| 12 |
- model_bedbase.pth # BEDBASE schema trained model
|
| 13 |
- vectorizer_bedbase.pkl # CountVectorizer instance from the `scikit-learn` library for Bag of Words encoding used as input to the model
|
|
|
|
| 14 |
/encode
|
| 15 |
- encode_schema_design.yaml #ENCODE schema
|
| 16 |
- label_encoder_encode.pkl # Unqiue label values derived from training data, model classifies the output into these labels for ENCODE schema
|
| 17 |
- model_encode.pth # ENCODE schema trained model
|
| 18 |
- vectorizer_encode.pkl # CountVectorizer instance from the `scikit-learn` library for Bag of Words encoding used as input to the model
|
|
|
|
| 19 |
/fairtracks
|
| 20 |
- fairtracks_schema_design.yaml # FAIRTRACKS schema
|
| 21 |
- label_encoder_fairtracks.pkl # Unqiue label values derived from training data, model classifies the output into these labels for FAIRTRACKS schema
|
| 22 |
- model_fairtracks.pth #FAIRTRACKS schema trained model
|
| 23 |
- vectorizer_fairtracks.pkl # CountVectorizer instance from the `scikit-learn` library for Bag of Words encoding used as input to the model
|
|
|
|
| 24 |
```
|
| 25 |
|
| 26 |
### Usage
|
|
@@ -43,4 +46,5 @@ To add a schema model:
|
|
| 43 |
- label_encoder_new_schema.pkl
|
| 44 |
- model_new_schema.pth
|
| 45 |
- vectorizer_new_schema.pkl
|
|
|
|
| 46 |
```
|
|
|
|
| 11 |
- label_encoder_bedbase.pkl # Unqiue label values derived from training data, model classifies the output into these labels for BEDBASE schema
|
| 12 |
- model_bedbase.pth # BEDBASE schema trained model
|
| 13 |
- vectorizer_bedbase.pkl # CountVectorizer instance from the `scikit-learn` library for Bag of Words encoding used as input to the model
|
| 14 |
+
- config_bedbase.yaml # Config file with model parameters
|
| 15 |
/encode
|
| 16 |
- encode_schema_design.yaml #ENCODE schema
|
| 17 |
- label_encoder_encode.pkl # Unqiue label values derived from training data, model classifies the output into these labels for ENCODE schema
|
| 18 |
- model_encode.pth # ENCODE schema trained model
|
| 19 |
- vectorizer_encode.pkl # CountVectorizer instance from the `scikit-learn` library for Bag of Words encoding used as input to the model
|
| 20 |
+
- config_encode.yaml # Config file with model parameters
|
| 21 |
/fairtracks
|
| 22 |
- fairtracks_schema_design.yaml # FAIRTRACKS schema
|
| 23 |
- label_encoder_fairtracks.pkl # Unqiue label values derived from training data, model classifies the output into these labels for FAIRTRACKS schema
|
| 24 |
- model_fairtracks.pth #FAIRTRACKS schema trained model
|
| 25 |
- vectorizer_fairtracks.pkl # CountVectorizer instance from the `scikit-learn` library for Bag of Words encoding used as input to the model
|
| 26 |
+
- config_fairtracks.yaml # Config file with model parameters
|
| 27 |
```
|
| 28 |
|
| 29 |
### Usage
|
|
|
|
| 46 |
- label_encoder_new_schema.pkl
|
| 47 |
- model_new_schema.pth
|
| 48 |
- vectorizer_new_schema.pkl
|
| 49 |
+
- config_new_schema.yaml
|
| 50 |
```
|
bedbase/config_bedbase.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
params:
|
| 2 |
+
input_size_bow: 13708
|
| 3 |
+
embedding_size: 384
|
| 4 |
+
hidden_size: 32
|
| 5 |
+
output_size: 12
|
| 6 |
+
dropout_prob: 0.113
|
encode/config_encode.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
params:
|
| 2 |
+
input_size_bow: 10459
|
| 3 |
+
embedding_size: 384
|
| 4 |
+
hidden_size: 32
|
| 5 |
+
output_size: 18
|
| 6 |
+
dropout_prob: 0.113
|
fairtracks/config_fairtracks.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
params:
|
| 2 |
+
input_size_bow: 13617
|
| 3 |
+
embedding_size: 384
|
| 4 |
+
hidden_size: 32
|
| 5 |
+
output_size: 15
|
| 6 |
+
dropout_prob: 0.113
|