lucas066001 commited on
Commit
07f841f
·
1 Parent(s): 90cfe35

feat(libs.langage_detection): Adding models saving and unittest

Browse files
.github/workflows/build.yml CHANGED
@@ -44,4 +44,6 @@ jobs:
44
 
45
  - name: Test libs
46
  run: |
47
- cd ./app && python -m unittest travel_resolver.tests.sample_test
 
 
 
44
 
45
  - name: Test libs
46
  run: |
47
+ cd ./app && \
48
+ python -m unittest travel_resolver.tests.sample_test && \
49
+ python -m unittest travel_resolver.tests.langage_detection_test
app/model.sav ADDED
Binary file (2.62 kB). View file
 
app/travel_resolver/libs/nlp/langage_detection/trainer.py CHANGED
@@ -15,7 +15,7 @@ def read_data():
15
  i = 1
16
  for lang in var.CORRESP_LANG:
17
  first = True
18
- current_file = "../../../../data/langage_detection/trainset/"
19
  current_file += lang + "_trainset.csv"
20
  with open(current_file, "r") as csv_file:
21
  csv_reader = csv.reader(csv_file)
@@ -37,8 +37,21 @@ def train():
37
 
38
  model = SGDClassifier()
39
  model.fit(x_train, y_train)
40
- joblib.dump(model, "model.sav")
41
-
42
  y_pred = model.predict(x_test)
43
  accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
44
  print("Accuracy: {:.2f}%".format(accuracy * 100))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  i = 1
16
  for lang in var.CORRESP_LANG:
17
  first = True
18
+ current_file = "../data/langage_detection/trainset/"
19
  current_file += lang + "_trainset.csv"
20
  with open(current_file, "r") as csv_file:
21
  csv_reader = csv.reader(csv_file)
 
37
 
38
  model = SGDClassifier()
39
  model.fit(x_train, y_train)
 
 
40
  y_pred = model.predict(x_test)
41
  accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
42
  print("Accuracy: {:.2f}%".format(accuracy * 100))
43
+
44
+ joblib.dump(
45
+ model,
46
+ "../models/langage_detection/model_"
47
+ + str(round(accuracy, 3)).replace(".", "_")
48
+ + ".sav",
49
+ )
50
+
51
+
52
+ def main():
53
+ train()
54
+
55
+
56
+ if __name__ == "__main__":
57
+ main()
app/travel_resolver/tests/langage_detection_test.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from travel_resolver.libs.nlp.langage_detection.extractor import (
3
+ extract_data_from_string,
4
+ )
5
+
6
+
7
+ class TestExtractor(unittest.TestCase):
8
+
9
+ def test_correct_extraction(self):
10
+ input = "aabccooeeeeyln"
11
+ result = extract_data_from_string(input)
12
+ self.assertEqual(
13
+ result,
14
+ [
15
+ 25.0,
16
+ 12.5,
17
+ 25.0,
18
+ 0.0,
19
+ 50.0,
20
+ 0.0,
21
+ 0.0,
22
+ 0.0,
23
+ 0.0,
24
+ 0.0,
25
+ 0.0,
26
+ 12.5,
27
+ 0.0,
28
+ 12.5,
29
+ 25.0,
30
+ 0.0,
31
+ 0.0,
32
+ 0.0,
33
+ 0.0,
34
+ 0.0,
35
+ 0.0,
36
+ 0.0,
37
+ 0.0,
38
+ 0.0,
39
+ 12.5,
40
+ 0.0,
41
+ 0.0,
42
+ 0.0,
43
+ 0.0,
44
+ 0.0,
45
+ ],
46
+ )
47
+
48
+ input2 = "aabccooeeeeylnñßãç"
49
+ result2 = extract_data_from_string(input2)
50
+ self.assertEqual(
51
+ result2,
52
+ [
53
+ 16.67,
54
+ 8.33,
55
+ 16.67,
56
+ 0.0,
57
+ 33.33,
58
+ 0.0,
59
+ 0.0,
60
+ 0.0,
61
+ 0.0,
62
+ 0.0,
63
+ 0.0,
64
+ 8.33,
65
+ 0.0,
66
+ 8.33,
67
+ 16.67,
68
+ 0.0,
69
+ 0.0,
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0,
77
+ 8.33,
78
+ 0.0,
79
+ 5.56,
80
+ 5.56,
81
+ 5.56,
82
+ 5.56,
83
+ ],
84
+ )
models/langage_detection/model_0_699.sav ADDED
Binary file (2.62 kB). View file
 
models/langage_detection/model_0_714.sav ADDED
Binary file (2.62 kB). View file
 
models/langage_detection/{model.sav → model_0_746.sav} RENAMED
File without changes