Spaces:

Az-r-ow
/

TravelNER

Sleeping

App Files Files Community

lucas066001 commited on Sep 19, 2024

Commit

07f841f

1 Parent(s): 90cfe35

feat(libs.langage_detection): Adding models saving and unittest

Browse files

Files changed (7) hide show

.github/workflows/build.yml +3 -1
app/model.sav +0 -0
app/travel_resolver/libs/nlp/langage_detection/trainer.py +16 -3
app/travel_resolver/tests/langage_detection_test.py +84 -0
models/langage_detection/model_0_699.sav +0 -0
models/langage_detection/model_0_714.sav +0 -0
models/langage_detection/{model.sav → model_0_746.sav} +0 -0

.github/workflows/build.yml CHANGED Viewed

@@ -44,4 +44,6 @@ jobs:
     - name: Test libs
       run: |
-        cd ./app && python -m unittest travel_resolver.tests.sample_test

     - name: Test libs
       run: |
+        cd ./app && \
+        python -m unittest travel_resolver.tests.sample_test && \
+        python -m unittest travel_resolver.tests.langage_detection_test

app/model.sav ADDED Viewed

Binary file (2.62 kB). View file

app/travel_resolver/libs/nlp/langage_detection/trainer.py CHANGED Viewed

@@ -15,7 +15,7 @@ def read_data():
     i = 1
     for lang in var.CORRESP_LANG:
         first = True
-        current_file = "../../../../data/langage_detection/trainset/"
         current_file += lang + "_trainset.csv"
         with open(current_file, "r") as csv_file:
             csv_reader = csv.reader(csv_file)
@@ -37,8 +37,21 @@ def train():
     model = SGDClassifier()
     model.fit(x_train, y_train)
-    joblib.dump(model, "model.sav")
     y_pred = model.predict(x_test)
     accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
     print("Accuracy: {:.2f}%".format(accuracy * 100))

     i = 1
     for lang in var.CORRESP_LANG:
         first = True
+        current_file = "../data/langage_detection/trainset/"
         current_file += lang + "_trainset.csv"
         with open(current_file, "r") as csv_file:
             csv_reader = csv.reader(csv_file)
     model = SGDClassifier()
     model.fit(x_train, y_train)
     y_pred = model.predict(x_test)
     accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
     print("Accuracy: {:.2f}%".format(accuracy * 100))
+    joblib.dump(
+        model,
+        "../models/langage_detection/model_"
+        + str(round(accuracy, 3)).replace(".", "_")
+        + ".sav",
+    )
+def main():
+    train()
+if __name__ == "__main__":
+    main()

app/travel_resolver/tests/langage_detection_test.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import unittest
+from travel_resolver.libs.nlp.langage_detection.extractor import (
+    extract_data_from_string,
+)
+class TestExtractor(unittest.TestCase):
+    def test_correct_extraction(self):
+        input = "aabccooeeeeyln"
+        result = extract_data_from_string(input)
+        self.assertEqual(
+            result,
+            [
+                25.0,
+                12.5,
+                25.0,
+                0.0,
+                50.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                12.5,
+                0.0,
+                12.5,
+                25.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                12.5,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+            ],
+        )
+        input2 = "aabccooeeeeylnñßãç"
+        result2 = extract_data_from_string(input2)
+        self.assertEqual(
+            result2,
+            [
+                16.67,
+                8.33,
+                16.67,
+                0.0,
+                33.33,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                8.33,
+                0.0,
+                8.33,
+                16.67,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                8.33,
+                0.0,
+                5.56,
+                5.56,
+                5.56,
+                5.56,
+            ],
+        )

models/langage_detection/model_0_699.sav ADDED Viewed

Binary file (2.62 kB). View file

models/langage_detection/model_0_714.sav ADDED Viewed

Binary file (2.62 kB). View file

models/langage_detection/{model.sav → model_0_746.sav} RENAMED Viewed

File without changes