lucas066001
commited on
Commit
·
07f841f
1
Parent(s):
90cfe35
feat(libs.langage_detection): Adding models saving and unittest
Browse files- .github/workflows/build.yml +3 -1
- app/model.sav +0 -0
- app/travel_resolver/libs/nlp/langage_detection/trainer.py +16 -3
- app/travel_resolver/tests/langage_detection_test.py +84 -0
- models/langage_detection/model_0_699.sav +0 -0
- models/langage_detection/model_0_714.sav +0 -0
- models/langage_detection/{model.sav → model_0_746.sav} +0 -0
.github/workflows/build.yml
CHANGED
|
@@ -44,4 +44,6 @@ jobs:
|
|
| 44 |
|
| 45 |
- name: Test libs
|
| 46 |
run: |
|
| 47 |
-
cd ./app &&
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
- name: Test libs
|
| 46 |
run: |
|
| 47 |
+
cd ./app && \
|
| 48 |
+
python -m unittest travel_resolver.tests.sample_test && \
|
| 49 |
+
python -m unittest travel_resolver.tests.langage_detection_test
|
app/model.sav
ADDED
|
Binary file (2.62 kB). View file
|
|
|
app/travel_resolver/libs/nlp/langage_detection/trainer.py
CHANGED
|
@@ -15,7 +15,7 @@ def read_data():
|
|
| 15 |
i = 1
|
| 16 |
for lang in var.CORRESP_LANG:
|
| 17 |
first = True
|
| 18 |
-
current_file = "
|
| 19 |
current_file += lang + "_trainset.csv"
|
| 20 |
with open(current_file, "r") as csv_file:
|
| 21 |
csv_reader = csv.reader(csv_file)
|
|
@@ -37,8 +37,21 @@ def train():
|
|
| 37 |
|
| 38 |
model = SGDClassifier()
|
| 39 |
model.fit(x_train, y_train)
|
| 40 |
-
joblib.dump(model, "model.sav")
|
| 41 |
-
|
| 42 |
y_pred = model.predict(x_test)
|
| 43 |
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
|
| 44 |
print("Accuracy: {:.2f}%".format(accuracy * 100))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
i = 1
|
| 16 |
for lang in var.CORRESP_LANG:
|
| 17 |
first = True
|
| 18 |
+
current_file = "../data/langage_detection/trainset/"
|
| 19 |
current_file += lang + "_trainset.csv"
|
| 20 |
with open(current_file, "r") as csv_file:
|
| 21 |
csv_reader = csv.reader(csv_file)
|
|
|
|
| 37 |
|
| 38 |
model = SGDClassifier()
|
| 39 |
model.fit(x_train, y_train)
|
|
|
|
|
|
|
| 40 |
y_pred = model.predict(x_test)
|
| 41 |
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
|
| 42 |
print("Accuracy: {:.2f}%".format(accuracy * 100))
|
| 43 |
+
|
| 44 |
+
joblib.dump(
|
| 45 |
+
model,
|
| 46 |
+
"../models/langage_detection/model_"
|
| 47 |
+
+ str(round(accuracy, 3)).replace(".", "_")
|
| 48 |
+
+ ".sav",
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def main():
|
| 53 |
+
train()
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
main()
|
app/travel_resolver/tests/langage_detection_test.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
from travel_resolver.libs.nlp.langage_detection.extractor import (
|
| 3 |
+
extract_data_from_string,
|
| 4 |
+
)
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestExtractor(unittest.TestCase):
|
| 8 |
+
|
| 9 |
+
def test_correct_extraction(self):
|
| 10 |
+
input = "aabccooeeeeyln"
|
| 11 |
+
result = extract_data_from_string(input)
|
| 12 |
+
self.assertEqual(
|
| 13 |
+
result,
|
| 14 |
+
[
|
| 15 |
+
25.0,
|
| 16 |
+
12.5,
|
| 17 |
+
25.0,
|
| 18 |
+
0.0,
|
| 19 |
+
50.0,
|
| 20 |
+
0.0,
|
| 21 |
+
0.0,
|
| 22 |
+
0.0,
|
| 23 |
+
0.0,
|
| 24 |
+
0.0,
|
| 25 |
+
0.0,
|
| 26 |
+
12.5,
|
| 27 |
+
0.0,
|
| 28 |
+
12.5,
|
| 29 |
+
25.0,
|
| 30 |
+
0.0,
|
| 31 |
+
0.0,
|
| 32 |
+
0.0,
|
| 33 |
+
0.0,
|
| 34 |
+
0.0,
|
| 35 |
+
0.0,
|
| 36 |
+
0.0,
|
| 37 |
+
0.0,
|
| 38 |
+
0.0,
|
| 39 |
+
12.5,
|
| 40 |
+
0.0,
|
| 41 |
+
0.0,
|
| 42 |
+
0.0,
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
],
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
input2 = "aabccooeeeeylnñßãç"
|
| 49 |
+
result2 = extract_data_from_string(input2)
|
| 50 |
+
self.assertEqual(
|
| 51 |
+
result2,
|
| 52 |
+
[
|
| 53 |
+
16.67,
|
| 54 |
+
8.33,
|
| 55 |
+
16.67,
|
| 56 |
+
0.0,
|
| 57 |
+
33.33,
|
| 58 |
+
0.0,
|
| 59 |
+
0.0,
|
| 60 |
+
0.0,
|
| 61 |
+
0.0,
|
| 62 |
+
0.0,
|
| 63 |
+
0.0,
|
| 64 |
+
8.33,
|
| 65 |
+
0.0,
|
| 66 |
+
8.33,
|
| 67 |
+
16.67,
|
| 68 |
+
0.0,
|
| 69 |
+
0.0,
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0,
|
| 73 |
+
0.0,
|
| 74 |
+
0.0,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
8.33,
|
| 78 |
+
0.0,
|
| 79 |
+
5.56,
|
| 80 |
+
5.56,
|
| 81 |
+
5.56,
|
| 82 |
+
5.56,
|
| 83 |
+
],
|
| 84 |
+
)
|
models/langage_detection/model_0_699.sav
ADDED
|
Binary file (2.62 kB). View file
|
|
|
models/langage_detection/model_0_714.sav
ADDED
|
Binary file (2.62 kB). View file
|
|
|
models/langage_detection/{model.sav → model_0_746.sav}
RENAMED
|
File without changes
|