import pickle import numpy as np import pandas as pd def test_cosine_similarity(target): successful_cases = 0 failed_cases = [] word_embeddings = pickle.load(open("./data/word_embeddings_subset.p", "rb")) test_cases = [ { "name": "cosine_score_1", "input": {"A": word_embeddings["king"], "B": word_embeddings["queen"]}, "expected": [0.650, 0.6512, 0.6510957], }, { "name": "cosine_score_2", "input": {"A": word_embeddings["Japan"], "B": word_embeddings["Tokyo"]}, "expected": [0.699, 0.701, 0.70022535], }, { "name": "cosine_score_3", "input": {"A": word_embeddings["Germany"], "B": word_embeddings["Beirut"]}, "expected": [0.172, 0.174, 0.17339969], }, { "name": "cosine_score_4_to_catch_alternate_solution", "input": {"A": word_embeddings["China"], "B": word_embeddings["Chile"]}, "expected": [0.32, 0.381, 0.3801232], }, ] for test_case in test_cases: result = target(**test_case["input"]) try: # print(cosine_similarity(test_case['input'][0],test_case['input'][1])) assert np.isclose(result, test_case["expected"][2]) or ( test_case["expected"][0] <= result <= test_case["expected"][1] ) successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": test_case["expected"][2], "got": result, } ) print( f"Wrong output in cosine similarity function. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) if len(failed_cases) == 0: print("\033[92m All tests passed") else: print("\033[92m", successful_cases, " Tests passed") print("\033[91m", len(failed_cases), " Tests failed") # return failed_cases, len(failed_cases) + successful_cases def test_euclidean(target): successful_cases = 0 failed_cases = [] word_embeddings = pickle.load(open("./data/word_embeddings_subset.p", "rb")) test_cases = [ { "name": "euclidean_score_1", "input": {"A": word_embeddings["king"], "B": word_embeddings["queen"]}, "expected": [2.47, 2.48, 2.4796925], }, { "name": "euclidean_score_2", "input": {"A": word_embeddings["Japan"], "B": word_embeddings["Tokyo"]}, "expected": [2.43, 2.44, 2.4345345], }, { "name": "euclidean_score_3", "input": {"A": word_embeddings["Germany"], "B": word_embeddings["Beirut"]}, "expected": [4.0, 4.1, 4.0416517], }, { "name": "euclidean_score_4", "input": {"A": word_embeddings["China"], "B": word_embeddings["Chile"]}, "expected": [3.2, 3.3, 3.2326782], }, ] for test_case in test_cases: result = target(**test_case["input"]) try: assert np.isclose( result, test_case["expected"][2], rtol=1e-3, atol=1e-05 ) or (test_case["expected"][0] <= result <= test_case["expected"][1]) successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": test_case["expected"][2], "got": result, } ) print( f"Wrong output in the euclidean distance function. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) if len(failed_cases) == 0: print("\033[92m All tests passed") else: print("\033[92m", successful_cases, " Tests passed") print("\033[91m", len(failed_cases), " Tests failed") # return failed_cases, len(failed_cases) + successful_cases def test_get_country(target): successful_cases = 0 failed_cases = [] word_embeddings = pickle.load(open("./data/word_embeddings_subset.p", "rb")) test_cases = [ { "name": "get_country_score_1", "input": { "city1": "Athens", "country1": "Greece", "city2": "Cairo", "embeddings": word_embeddings, }, "expected": ("Egypt", 0.7626821), }, { "name": "get_country_score_2_for_wrong_cosine_similarity", "input": { "city1": "oil", "country1": "gas", "city2": "town", "embeddings": word_embeddings, }, "expected": ("village", 0.5611889), }, { "name": "get_country_score_3", "input": { "city1": "Doha", "country1": "Qatar", "city2": "Jakarta", "embeddings": word_embeddings, }, "expected": ("Indonesia", 0.6782036), }, { "name": "get_country_score_4", "input": { "city1": "Tokyo", "country1": "Japan", "city2": "Canberra", "embeddings": word_embeddings, }, "expected": ("Australia", 0.7139509), }, { "name": "get_country_score_5_for_wrong_cosine_similarity", "input": { "city1": "joyful", "country1": "happy", "city2": "sad", "embeddings": word_embeddings, }, "expected": ("king", 0.09570546), }, { "name": "get_country_score_6_for_wrong_cosine_similarity", "input": { "city1": "happy", "country1": "joyful", "city2": "sad", "embeddings": word_embeddings, }, "expected": ("Lebanon", 0.14527377), }, ] for test_case in test_cases: result = target(**test_case["input"]) try: assert isinstance(result, tuple) successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": type(test_case["expected"]), "got": type(result), } ) print( f"Wrong output type. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) try: assert result[0] == test_case["expected"][0] successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": test_case["expected"][0], "got": result[0], } ) print( f"Wrong output word. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) try: assert np.isclose(result[1], test_case["expected"][1]) successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": test_case["expected"][1], "got": result[1], } ) print( f"Wrong output similarity. Maybe you should check your cosine_similarity implementation. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) if len(failed_cases) == 0: print("\033[92m All tests passed") else: print("\033[92m", successful_cases, " Tests passed") print("\033[91m", len(failed_cases), " Tests failed") # return failed_cases, len(failed_cases) + successful_cases def test_get_accuracy(target, data): successful_cases = 0 failed_cases = [] word_embeddings = pickle.load(open("./data/word_embeddings_subset.p", "rb")) test_cases = [ { "name": "default_check", "input": {"word_embeddings": word_embeddings, "data": data}, "expected": 0.9192082407594425, }, { "name": "smaller_check", "input": { "word_embeddings": word_embeddings, "data": data.sample(frac=0.15, random_state=3), }, "expected": 0.9125168236877523, }, ] for test_case in test_cases: result = target(**test_case["input"]) try: assert np.isclose(result, test_case["expected"]) successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": test_case["expected"], "got": result, } ) print( f"Wrong accuracy output. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) if len(failed_cases) == 0: print("\033[92m All tests passed") else: print("\033[92m", successful_cases, " Tests passed") print("\033[91m", len(failed_cases), " Tests failed") # return failed_cases, len(failed_cases) + successful_cases def test_compute_pca(target): successful_cases = 0 failed_cases = [] test_cases = [ { "name": "default_check", "input": { "X": np.array( # np.random.seed(1) [ [ 4.17022005e-01, 7.20324493e-01, 1.14374817e-04, 3.02332573e-01, 1.46755891e-01, 9.23385948e-02, 1.86260211e-01, 3.45560727e-01, 3.96767474e-01, 5.38816734e-01, ], [ 4.19194514e-01, 6.85219500e-01, 2.04452250e-01, 8.78117436e-01, 2.73875932e-02, 6.70467510e-01, 4.17304802e-01, 5.58689828e-01, 1.40386939e-01, 1.98101489e-01, ], [ 8.00744569e-01, 9.68261576e-01, 3.13424178e-01, 6.92322616e-01, 8.76389152e-01, 8.94606664e-01, 8.50442114e-02, 3.90547832e-02, 1.69830420e-01, 8.78142503e-01, ], ] ), "n_components": 2, }, "expected": np.array( [ [0.43437323, 0.49820384], [0.42077249, -0.50351448], [-0.85514571, 0.00531064], ] ), }, { "name": "larger_check", "input": { "X": np.array( # np.random.seed(2) [ [ 0.4359949, 0.02592623, 0.54966248, 0.43532239, 0.4203678, 0.33033482, 0.20464863, 0.61927097, 0.29965467, 0.26682728, 0.62113383, 0.52914209, 0.13457995, 0.51357812, 0.18443987, ], [ 0.78533515, 0.85397529, 0.49423684, 0.84656149, 0.07964548, 0.50524609, 0.0652865, 0.42812233, 0.09653092, 0.12715997, 0.59674531, 0.226012, 0.10694568, 0.22030621, 0.34982629, ], [ 0.46778748, 0.20174323, 0.64040673, 0.48306984, 0.50523672, 0.38689265, 0.79363745, 0.58000418, 0.1622986, 0.70075235, 0.96455108, 0.50000836, 0.88952006, 0.34161365, 0.56714413, ], [ 0.42754596, 0.43674726, 0.77655918, 0.53560417, 0.95374223, 0.54420816, 0.08209492, 0.3663424, 0.8508505, 0.40627504, 0.02720237, 0.24717724, 0.06714437, 0.99385201, 0.97058031, ], [ 0.80025835, 0.60181712, 0.76495986, 0.16922545, 0.29302323, 0.52406688, 0.35662428, 0.04567897, 0.98315345, 0.44135492, 0.50400044, 0.32354132, 0.25974475, 0.38688989, 0.8320169, ], ] ), "n_components": 3, }, "expected": np.array( [ [-0.32462796, 0.01881248, -0.51389463], [-0.36781354, 0.88364184, 0.05985815], [-0.75767901, -0.69452194, 0.12223214], [1.01698298, -0.17990871, -0.33555475], [0.43313753, -0.02802368, 0.66735909], ] ), }, ] for test_case in test_cases: result = target(**test_case["input"]) try: assert isinstance(result, np.ndarray) successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": np.ndarray, "got": type(result), } ) print( f"Wrong output type. \n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) try: assert result.shape == test_case["expected"].shape successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": test_case["expected"].shape, "got": result.shape, } ) print( f"Wrong output shape. Check if you are taking the proper number of dimensions.\n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) try: assert np.allclose(result, test_case["expected"]) successful_cases += 1 except: failed_cases.append( { "name": test_case["name"], "expected": test_case["expected"], "got": result, } ) print( f"Wrong accuracy output.\n\tExpected: {failed_cases[-1].get('expected')}.\n\tGot: {failed_cases[-1].get('got')}." ) if len(failed_cases) == 0: print("\033[92m All tests passed") else: print("\033[92m", successful_cases, " Tests passed") print("\033[91m", len(failed_cases), " Tests failed") # return failed_cases, len(failed_cases) + successful_cases