Spaces:

edouardlgp
/

Job_Classification

Running

App Files Files Community

edouardlgp commited on May 10

Commit

be82510

verified ·

1 Parent(s): e0dce9c

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -38

app.py CHANGED Viewed

@@ -22,6 +22,44 @@ logging.getLogger('pdfminer').setLevel(logging.ERROR)
 # Suppress specific warnings
 warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
 # Initialize OpenAI client
 def initialize_openai_client():
     try:
@@ -80,11 +118,6 @@ def extract_section_from_pdf(full_text: str, section_title: str) -> str:
 def classify_job_family(responsibilities: List[str]) -> str:
-    try:
-        job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
-    except Exception as e:
-        print(f"Error reading job_families1.csv: {e}")
-        job_families_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
@@ -126,12 +159,7 @@ def code_sanitize(input_string, valid_codes):
 def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
-    try:
-        occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
-    except Exception as e:
-        print(f"Error reading occupational_groups.csv: {e}")
-        occupational_groups_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
     result = {}
     try:
         for level in range(1, 5):
@@ -173,15 +201,7 @@ def classify_esco_by_hierarchical_level(responsibilities: List[str]) -> dict:
         Dictionary containing classification information or error message
     """
-    esco_df = pd.read_csv(
-        "ISCOGroups_en.csv",
-        dtype={'code': str}  # Force 'code' to be read as string
-    )
-    esco_level5_df = pd.read_csv(
-        "occupations_en.csv",
-        dtype={'code': str, 'iscoGroup': str, }  # Force 'code' to be read as string
-    )
     result = {}
@@ -363,30 +383,12 @@ def get_level_ESCO_info(df, code, level_name):
 def get_skills_info_esco(Level_5_code):
-    try:
-        esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str})
-    except Exception as e:
-        print(f"Error reading occupations_en.csv: {e}")
-        esco_level5_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
     matches = esco_level5_df[esco_level5_df['code'] == Level_5_code]
     conceptUris = matches['conceptUri'].values.tolist()
-    try:
-        esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
-    except Exception as e:
-        print(f"Error reading occupationSkillRelations_en.csv: {e}")
-        esco_skill_map_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
     skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)]
     skillUris = skills['skillUri'].values.tolist()
-    try:
-        esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
-    except Exception as e:
-        print(f"Error reading skills_en.csv: {e}")
-        esco_skill_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
     thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)]
     result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates()

 # Suppress specific warnings
 warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
+# Global DataFrame initializations
+try:
+    job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
+except Exception as e:
+    print(f"Error reading job_families1.csv: {e}")
+    job_families_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
+try:
+    occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
+except Exception as e:
+    print(f"Error reading occupational_groups.csv: {e}")
+    occupational_groups_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
+try:
+    esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str}  # Force 'code' to be read as string)
+except Exception as e:
+    print(f"Error reading ISCOGroups_en.csv: {e}")
+    esco_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
+try:
+    esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip',  dtype={'code': str, 'iscoGroup': str, }  # Force 'code' to be read as string)
+except Exception as e:
+    print(f"Error reading occupations_en.csv: {e}")
+    esco_level5_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
+try:
+    esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
+except Exception as e:
+    print(f"Error reading skills_en.csv: {e}")
+    esco_skill_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
+try:
+     esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
+except Exception as e:
+    print(f"Error reading occupationSkillRelations_en.csv: {e}")
+    esco_skill_map_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
 # Initialize OpenAI client
 def initialize_openai_client():
     try:
 def classify_job_family(responsibilities: List[str]) -> str:
 def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
     result = {}
     try:
         for level in range(1, 5):
         Dictionary containing classification information or error message
     """
     result = {}
 def get_skills_info_esco(Level_5_code):
     matches = esco_level5_df[esco_level5_df['code'] == Level_5_code]
     conceptUris = matches['conceptUri'].values.tolist()
     skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)]
     skillUris = skills['skillUri'].values.tolist()
     thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)]
     result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates()