edouardlgp commited on
Commit
be82510
·
verified ·
1 Parent(s): e0dce9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -22,6 +22,44 @@ logging.getLogger('pdfminer').setLevel(logging.ERROR)
22
  # Suppress specific warnings
23
  warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Initialize OpenAI client
26
  def initialize_openai_client():
27
  try:
@@ -80,11 +118,6 @@ def extract_section_from_pdf(full_text: str, section_title: str) -> str:
80
 
81
  def classify_job_family(responsibilities: List[str]) -> str:
82
 
83
- try:
84
- job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
85
- except Exception as e:
86
- print(f"Error reading job_families1.csv: {e}")
87
- job_families_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
88
 
89
 
90
 
@@ -126,12 +159,7 @@ def code_sanitize(input_string, valid_codes):
126
 
127
  def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
128
 
129
- try:
130
- occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
131
- except Exception as e:
132
- print(f"Error reading occupational_groups.csv: {e}")
133
- occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
134
-
135
  result = {}
136
  try:
137
  for level in range(1, 5):
@@ -173,15 +201,7 @@ def classify_esco_by_hierarchical_level(responsibilities: List[str]) -> dict:
173
  Dictionary containing classification information or error message
174
  """
175
 
176
- esco_df = pd.read_csv(
177
- "ISCOGroups_en.csv",
178
- dtype={'code': str} # Force 'code' to be read as string
179
- )
180
 
181
- esco_level5_df = pd.read_csv(
182
- "occupations_en.csv",
183
- dtype={'code': str, 'iscoGroup': str, } # Force 'code' to be read as string
184
- )
185
 
186
  result = {}
187
 
@@ -363,30 +383,12 @@ def get_level_ESCO_info(df, code, level_name):
363
 
364
  def get_skills_info_esco(Level_5_code):
365
 
366
- try:
367
- esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str})
368
- except Exception as e:
369
- print(f"Error reading occupations_en.csv: {e}")
370
- esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
371
-
372
  matches = esco_level5_df[esco_level5_df['code'] == Level_5_code]
373
  conceptUris = matches['conceptUri'].values.tolist()
374
-
375
- try:
376
- esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
377
- except Exception as e:
378
- print(f"Error reading occupationSkillRelations_en.csv: {e}")
379
- esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
380
-
381
  skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)]
382
  skillUris = skills['skillUri'].values.tolist()
383
 
384
-
385
- try:
386
- esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
387
- except Exception as e:
388
- print(f"Error reading skills_en.csv: {e}")
389
- esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
390
 
391
  thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)]
392
  result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates()
 
22
  # Suppress specific warnings
23
  warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
24
 
25
+ # Global DataFrame initializations
26
+ try:
27
+ job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
28
+ except Exception as e:
29
+ print(f"Error reading job_families1.csv: {e}")
30
+ job_families_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
31
+
32
+ try:
33
+ occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
34
+ except Exception as e:
35
+ print(f"Error reading occupational_groups.csv: {e}")
36
+ occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
37
+
38
+ try:
39
+ esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str} # Force 'code' to be read as string)
40
+ except Exception as e:
41
+ print(f"Error reading ISCOGroups_en.csv: {e}")
42
+ esco_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
43
+
44
+ try:
45
+ esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str, } # Force 'code' to be read as string)
46
+ except Exception as e:
47
+ print(f"Error reading occupations_en.csv: {e}")
48
+ esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
49
+
50
+ try:
51
+ esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
52
+ except Exception as e:
53
+ print(f"Error reading skills_en.csv: {e}")
54
+ esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
55
+
56
+ try:
57
+ esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
58
+ except Exception as e:
59
+ print(f"Error reading occupationSkillRelations_en.csv: {e}")
60
+ esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
61
+
62
+
63
  # Initialize OpenAI client
64
  def initialize_openai_client():
65
  try:
 
118
 
119
  def classify_job_family(responsibilities: List[str]) -> str:
120
 
 
 
 
 
 
121
 
122
 
123
 
 
159
 
160
  def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
161
 
162
+
 
 
 
 
 
163
  result = {}
164
  try:
165
  for level in range(1, 5):
 
201
  Dictionary containing classification information or error message
202
  """
203
 
 
 
 
 
204
 
 
 
 
 
205
 
206
  result = {}
207
 
 
383
 
384
  def get_skills_info_esco(Level_5_code):
385
 
 
 
 
 
 
 
386
  matches = esco_level5_df[esco_level5_df['code'] == Level_5_code]
387
  conceptUris = matches['conceptUri'].values.tolist()
388
+
 
 
 
 
 
 
389
  skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)]
390
  skillUris = skills['skillUri'].values.tolist()
391
 
 
 
 
 
 
 
392
 
393
  thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)]
394
  result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates()