Corin1998 commited on
Commit
cfbd159
·
verified ·
1 Parent(s): 8607711

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -17,10 +17,14 @@ from pipelines.scoring import compute_quality_score
17
  from pipelines.storage import persist_to_hf
18
  from pipelines.utils import detect_filetype, load_doc_text
19
 
20
- APP_TITLE = "候補者インテーク & レジュメ標準化(OpenAI版)"
 
 
21
 
 
22
 
23
  def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
 
24
  if not filepaths:
25
  raise gr.Error("少なくとも1ファイルをアップロードしてください。")
26
 
@@ -31,6 +35,7 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
31
  filename = os.path.basename(path)
32
  with open(path, "rb") as f:
33
  raw_bytes = f.read()
 
34
  filetype = detect_filetype(filename, raw_bytes)
35
 
36
  # 1) テキスト抽出
@@ -79,7 +84,7 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
79
  # 8) 要約
80
  summaries = summarize_with_openai(merged_text)
81
 
82
- # 9) 構造化出力
83
  result_json = {
84
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
85
  "files": [os.path.basename(p) for p in filepaths],
@@ -107,7 +112,6 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
107
 
108
  anon_pdf = (result_json["candidate_id"] + ".anon.pdf", anon_pdf_bytes)
109
 
110
- # 重要:gradioのAPIスキーマ生成バグ回避のため、dictはすべて文字列で返す
111
  return (
112
  json.dumps(result_json, ensure_ascii=False, indent=2), # out_json -> Code(str)
113
  json.dumps(skills, ensure_ascii=False, indent=2), # out_skills -> Code(str)
@@ -115,7 +119,7 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
115
  summaries["300chars"],
116
  summaries["100chars"],
117
  summaries["onesent"],
118
- anon_pdf, # File(tuple)
119
  json.dumps(commit_info or {"status": "skipped (DATASET_REPO not set)"}, ensure_ascii=False, indent=2),
120
  )
121
 
@@ -128,7 +132,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
128
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
129
  file_count="multiple",
130
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
131
- type="filepath", # ← 'file' は 4.44 系で非推奨/不可
132
  )
133
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
134
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
@@ -136,13 +140,13 @@ with gr.Blocks(title=APP_TITLE) as demo:
136
  run_btn = gr.Button("実行")
137
 
138
  with gr.Tab("構造化JSON"):
139
- out_json = gr.Code(label="統合出力 (JSON)")
140
 
141
  with gr.Tab("抽出スキル"):
142
- out_skills = gr.Code(label="スキル一覧 (JSON)") # ← gr.JSON をやめる
143
 
144
  with gr.Tab("品質スコア"):
145
- out_score = gr.Code(label="品質評価 (JSON)")
146
 
147
  with gr.Tab("要約 (300/100/1文)"):
148
  out_sum_300 = gr.Textbox(label="300字要約")
@@ -153,7 +157,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
153
  out_pdf = gr.File(label="匿名PDFダウンロード")
154
 
155
  with gr.Tab("Datasets 保存ログ"):
156
- out_commit = gr.Code(label="コミット情報")
157
 
158
  run_btn.click(
159
  process_resumes,
@@ -162,5 +166,6 @@ with gr.Blocks(title=APP_TITLE) as demo:
162
  )
163
 
164
  if __name__ == "__main__":
165
- # ローカル不可環境に対応
166
- demo.launch(share=True, server_name="0.0.0.0")
 
 
17
  from pipelines.storage import persist_to_hf
18
  from pipelines.utils import detect_filetype, load_doc_text
19
 
20
+ # app.py の該当箇所を以下に置き換え
21
+
22
+ # ...(importsはそのまま)
23
 
24
+ APP_TITLE = "候補者インテーク & レジュメ標準化(OpenAI版)"
25
 
26
  def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
27
+ import os, json, hashlib, gradio as gr
28
  if not filepaths:
29
  raise gr.Error("少なくとも1ファイルをアップロードしてください。")
30
 
 
35
  filename = os.path.basename(path)
36
  with open(path, "rb") as f:
37
  raw_bytes = f.read()
38
+
39
  filetype = detect_filetype(filename, raw_bytes)
40
 
41
  # 1) テキスト抽出
 
84
  # 8) 要約
85
  summaries = summarize_with_openai(merged_text)
86
 
87
+ # 9) 構造化出力(UIバグ回避のため **すべて文字列** で返す)
88
  result_json = {
89
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
90
  "files": [os.path.basename(p) for p in filepaths],
 
112
 
113
  anon_pdf = (result_json["candidate_id"] + ".anon.pdf", anon_pdf_bytes)
114
 
 
115
  return (
116
  json.dumps(result_json, ensure_ascii=False, indent=2), # out_json -> Code(str)
117
  json.dumps(skills, ensure_ascii=False, indent=2), # out_skills -> Code(str)
 
119
  summaries["300chars"],
120
  summaries["100chars"],
121
  summaries["onesent"],
122
+ anon_pdf,
123
  json.dumps(commit_info or {"status": "skipped (DATASET_REPO not set)"}, ensure_ascii=False, indent=2),
124
  )
125
 
 
132
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
133
  file_count="multiple",
134
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
135
+ type="filepath", # ← ここが重要:'file' は非対応・例外の元
136
  )
137
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
138
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
 
140
  run_btn = gr.Button("実行")
141
 
142
  with gr.Tab("構造化JSON"):
143
+ out_json = gr.Code(label="統合出力 (JSON文字列)")
144
 
145
  with gr.Tab("抽出スキル"):
146
+ out_skills = gr.Code(label="スキル一覧 (JSON文字列)") # ← gr.JSON を **使わない**
147
 
148
  with gr.Tab("品質スコア"):
149
+ out_score = gr.Code(label="品質評価 (JSON文字列)")
150
 
151
  with gr.Tab("要約 (300/100/1文)"):
152
  out_sum_300 = gr.Textbox(label="300字要約")
 
157
  out_pdf = gr.File(label="匿名PDFダウンロード")
158
 
159
  with gr.Tab("Datasets 保存ログ"):
160
+ out_commit = gr.Code(label="コミット情報 (JSON文字列)")
161
 
162
  run_btn.click(
163
  process_resumes,
 
166
  )
167
 
168
  if __name__ == "__main__":
169
+ # ローカル(gradio.live)想定なら share=True が安全
170
+ # Spaces 内では demo.launch() のままでもOK
171
+ demo.launch(share=True, server_name="0.0.0.0")