Corin1998 commited on
Commit
d6abadb
·
verified ·
1 Parent(s): 32cf7ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import json
3
  import hashlib
4
  import gradio as gr
@@ -26,25 +27,23 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
26
  partial_records = []
27
  raw_texts = []
28
 
29
- # gr.Files(type="filepath") を前提に、パスで受け取り→自前で read
30
- for p in files:
31
- fname = os.path.basename(p)
32
- with open(p, "rb") as fh:
33
- raw_bytes = fh.read()
34
-
35
- filetype = detect_filetype(fname, raw_bytes)
36
 
37
  # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
38
  if filetype in {"pdf", "image"}:
39
- text = extract_text_with_openai(raw_bytes, filename=fname, filetype=filetype)
40
  else:
41
  base_text = load_doc_text(filetype, raw_bytes)
42
- text = extract_text_with_openai(base_text.encode("utf-8"), filename=fname, filetype="txt")
 
43
 
44
- raw_texts.append({"filename": fname, "text": text})
45
 
46
- # 2) OpenAIでセクション構造化 → ルールベース正規化も適用
47
  structured = structure_with_openai(text)
 
48
  normalized = normalize_resume({
49
  "work_experience": structured.get("work_experience_raw", ""),
50
  "education": structured.get("education_raw", ""),
@@ -52,7 +51,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
52
  "skills": ", ".join(structured.get("skills_list", [])),
53
  })
54
  partial_records.append({
55
- "source": fname,
56
  "text": text,
57
  "structured": structured,
58
  "normalized": normalized,
@@ -83,7 +82,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
83
  # 8) 構造化出力
84
  result_json = {
85
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
86
- "files": [os.path.basename(p) for p in files],
87
  "merged": merged,
88
  "skills": skills,
89
  "quality_score": score,
@@ -108,14 +107,13 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
108
 
109
  anon_pdf = (result_json["candidate_id"] + ".anon.pdf", anon_pdf_bytes)
110
 
111
- # dict を gr.Code で安全表示するため、文字列化して返す
112
  return (
113
- json.dumps(result_json, ensure_ascii=False, indent=2),
114
- json.dumps(skills, ensure_ascii=False, indent=2),
115
- json.dumps(score, ensure_ascii=False, indent=2),
116
- summaries.get("300chars", ""),
117
- summaries.get("100chars", ""),
118
- summaries.get("onesent", ""),
119
  anon_pdf,
120
  json.dumps(commit_info or {"status": "skipped (DATASET_REPO not set)"}, ensure_ascii=False, indent=2),
121
  )
@@ -129,7 +127,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
129
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
130
  file_count="multiple",
131
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
132
- type="filepath", # ← 重要:パスで受け取る
133
  )
134
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
135
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
@@ -140,10 +138,11 @@ with gr.Blocks(title=APP_TITLE) as demo:
140
  out_json = gr.Code(label="統合出力 (JSON)")
141
 
142
  with gr.Tab("抽出スキル"):
143
- out_skills = gr.Code(label="スキル一覧 (JSON)") # gr.JSON をやめて文字列表示
 
144
 
145
  with gr.Tab("品質スコア"):
146
- out_score = gr.Code(label="品質評価 (JSON)")
147
 
148
  with gr.Tab("要約 (300/100/1文)"):
149
  out_sum_300 = gr.Textbox(label="300字要約")
@@ -164,4 +163,5 @@ with gr.Blocks(title=APP_TITLE) as demo:
164
 
165
 
166
  if __name__ == "__main__":
167
- demo.launch()
 
 
1
  import os
2
+ import io
3
  import json
4
  import hashlib
5
  import gradio as gr
 
27
  partial_records = []
28
  raw_texts = []
29
 
30
+ for f in files:
31
+ raw_bytes = f.read()
32
+ filetype = detect_filetype(f.name, raw_bytes)
 
 
 
 
33
 
34
  # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
35
  if filetype in {"pdf", "image"}:
36
+ text = extract_text_with_openai(raw_bytes, filename=f.name, filetype=filetype)
37
  else:
38
  base_text = load_doc_text(filetype, raw_bytes)
39
+ # 生テキストをそのままOpenAIへ渡し、軽く整形した全文を返す
40
+ text = extract_text_with_openai(base_text.encode("utf-8"), filename=f.name, filetype="txt")
41
 
42
+ raw_texts.append({"filename": f.name, "text": text})
43
 
44
+ # 2) OpenAIでセクション構造化
45
  structured = structure_with_openai(text)
46
+ # 念のためルールベース正規化も適用(期間抽出など補助)
47
  normalized = normalize_resume({
48
  "work_experience": structured.get("work_experience_raw", ""),
49
  "education": structured.get("education_raw", ""),
 
51
  "skills": ", ".join(structured.get("skills_list", [])),
52
  })
53
  partial_records.append({
54
+ "source": f.name,
55
  "text": text,
56
  "structured": structured,
57
  "normalized": normalized,
 
82
  # 8) 構造化出力
83
  result_json = {
84
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
85
+ "files": [f.name for f in files],
86
  "merged": merged,
87
  "skills": skills,
88
  "quality_score": score,
 
107
 
108
  anon_pdf = (result_json["candidate_id"] + ".anon.pdf", anon_pdf_bytes)
109
 
 
110
  return (
111
+ json.dumps(result_json, ensure_ascii=False, indent=2), # out_json(Codeへ文字列)
112
+ json.dumps(skills, ensure_ascii=False, indent=2), # ★ JSON→Code: ここを文字列で返す
113
+ json.dumps(score, ensure_ascii=False, indent=2), # out_score(Codeへ文字列)
114
+ summaries["300chars"],
115
+ summaries["100chars"],
116
+ summaries["onesent"],
117
  anon_pdf,
118
  json.dumps(commit_info or {"status": "skipped (DATASET_REPO not set)"}, ensure_ascii=False, indent=2),
119
  )
 
127
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
128
  file_count="multiple",
129
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
130
+ type="file"
131
  )
132
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
133
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
 
138
  out_json = gr.Code(label="統合出力 (JSON)")
139
 
140
  with gr.Tab("抽出スキル"):
141
+ # GradioのJSONスキーマ推論バグ回避のため Code に変更
142
+ out_skills = gr.Code(label="スキル一覧 (JSON)")
143
 
144
  with gr.Tab("品質スコア"):
145
+ out_score = gr.Code(label="品質評価")
146
 
147
  with gr.Tab("要約 (300/100/1文)"):
148
  out_sum_300 = gr.Textbox(label="300字要約")
 
163
 
164
 
165
  if __name__ == "__main__":
166
+ # ★ ローカル未到達環境での ValueError 回避(Space でも安全)
167
+ demo.launch(share=True, server_name="0.0.0.0", server_port=7860)