semmyk commited on
Commit
b5547bd
Β·
1 Parent(s): 74018a1

baseline08_beta01.5_28Sept25: fix oauth_token.token (convert_batch), log in

Browse files
README.md CHANGED
@@ -7,13 +7,14 @@ sdk: gradio
7
  command: python main.py
8
  app_file: main.py
9
  hf_oauth: true
 
10
  python_version: 3.12
11
  license: mit
12
  pinned: true
13
  short_description: PDF & HTML parser to markdown
14
  models: [meta-llama/Llama-4-Maverick-17B-128E-Instruct, openai/gpt-oss-120b, openai/gpt-oss-20b]
15
  tags: [markdown, PDF, parser, converter, extractor]
16
- #preload_from_hub: [https://huggingface.co/datalab-to/surya_layout, https://huggingface.co/datalab-to/surya_tablerec, huggingface.co/datalab-to/line_detector0, https://huggingface.co/tarun-menta/ocr_error_detection/blob/main/config.json]
17
  owner: research-semmyk
18
  #---
19
  #
@@ -39,18 +40,12 @@ version: 0.1.0
39
  readme: README.md
40
  requires-python: ">=3.12"
41
  #dependencies: []
42
- #owner: research-semmyk
43
  #preload_from_hub:
44
  # - https://huggingface.co/datalab-to/surya_layout
45
  # - https://huggingface.co/datalab-to/surya_tablerec
46
  # - huggingface.co/datalab-to/line_detector0
47
  # - https://huggingface.co/tarun-menta/ocr_error_detection/blob/main/config.json
48
  #owner: research-semmyk
49
- #preload_from_hub:
50
- # - https://huggingface.co/datalab-to/surya_layout
51
- # - https://huggingface.co/datalab-to/surya_tablerec
52
- # - huggingface.co/datalab-to/line_detector0
53
- # - https://huggingface.co/tarun-menta/ocr_error_detection/blob/main/config.json
54
  ---
55
 
56
  # parserPDF
 
7
  command: python main.py
8
  app_file: main.py
9
  hf_oauth: true
10
+ oauth_scopes: [read-access]
11
  python_version: 3.12
12
  license: mit
13
  pinned: true
14
  short_description: PDF & HTML parser to markdown
15
  models: [meta-llama/Llama-4-Maverick-17B-128E-Instruct, openai/gpt-oss-120b, openai/gpt-oss-20b]
16
  tags: [markdown, PDF, parser, converter, extractor]
17
+ preload_from_hub: [https://huggingface.co/datalab-to/surya_layout, https://huggingface.co/datalab-to/surya_tablerec, huggingface.co/datalab-to/line_detector0, https://huggingface.co/tarun-menta/ocr_error_detection/blob/main/config.json]
18
  owner: research-semmyk
19
  #---
20
  #
 
40
  readme: README.md
41
  requires-python: ">=3.12"
42
  #dependencies: []
 
43
  #preload_from_hub:
44
  # - https://huggingface.co/datalab-to/surya_layout
45
  # - https://huggingface.co/datalab-to/surya_tablerec
46
  # - huggingface.co/datalab-to/line_detector0
47
  # - https://huggingface.co/tarun-menta/ocr_error_detection/blob/main/config.json
48
  #owner: research-semmyk
 
 
 
 
 
49
  ---
50
 
51
  # parserPDF
converters/extraction_converter.py CHANGED
@@ -133,7 +133,7 @@ class DocumentConverter:
133
  llm_service_str = str(self.llm_service).split("'")[1] ## SMY: split and slicing ##Gets the string value
134
 
135
  # sets api_key required by Marker
136
- os.environ["OPENAI_API_KEY"] = self.openai_api_key or api_token ## to handle Marker's assertion test on OpenAI
137
  logger.log(level=20, msg="self.converter: instantiating MarkerConverter:", extra={"llm_service_str": llm_service_str, "api_token": api_token}) ##debug
138
 
139
  #self.converter: MarkerConverter = MarkerConverter(
 
133
  llm_service_str = str(self.llm_service).split("'")[1] ## SMY: split and slicing ##Gets the string value
134
 
135
  # sets api_key required by Marker
136
+ os.environ["OPENAI_API_KEY"] = api_token if api_token !='' or None else self.openai_api_key ## to handle Marker's assertion test on OpenAI
137
  logger.log(level=20, msg="self.converter: instantiating MarkerConverter:", extra={"llm_service_str": llm_service_str, "api_token": api_token}) ##debug
138
 
139
  #self.converter: MarkerConverter = MarkerConverter(
llm/llm_login.py CHANGED
@@ -47,7 +47,7 @@ def login_huggingface(token: Optional[str] = None):
47
  #return True
48
  except Exception as exc:
49
  # Respect common env var names; prefer explicit token arg when provided
50
- fallback_token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") or get_token()
51
  if fallback_token:
52
  try:
53
  login(token=fallback_token)
 
47
  #return True
48
  except Exception as exc:
49
  # Respect common env var names; prefer explicit token arg when provided
50
+ fallback_token = token if token else get_token() or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
51
  if fallback_token:
52
  try:
53
  login(token=fallback_token)
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio>=4.0
2
  marker-pdf[full]>=1.3.0 # pip install marker (GitHub: https://github.com/datalab-to/marker)
3
  weasyprint>=59.0 # optional fallback if pandoc is not available
4
  #pandoc==2.3 # for Markdown β†’ PDF conversion
 
1
+ gradio>=5.40.0
2
  marker-pdf[full]>=1.3.0 # pip install marker (GitHub: https://github.com/datalab-to/marker)
3
  weasyprint>=59.0 # optional fallback if pandoc is not available
4
  #pandoc==2.3 # for Markdown β†’ PDF conversion
ui/gradio_ui.py CHANGED
@@ -43,7 +43,7 @@ def get_login_token( api_token_arg, oauth_token: gr.OAuthToken | None=None,):
43
  oauth_token = oauth_token
44
  else: get_token()
45
 
46
- return oauth_token
47
 
48
  # pool executor to convert files called by Gradio
49
  ##SMY: TODO: future: refactor to gradio_process.py
@@ -74,6 +74,7 @@ def convert_batch(
74
  use_llm: bool = False, #Optional[bool] = False, #True,
75
  page_range: str = None, #Optional[str] = None,
76
  tz_hours: str = None,
 
77
  ): #-> str:
78
  """
79
  Handles the conversion process using multiprocessing.
@@ -86,7 +87,7 @@ def convert_batch(
86
  yield gr.update(interactive=False), f"Commencing Processing ... Getting login", {"process": "Commencing Processing"}, f"__init__.py"
87
 
88
  # get token from logged-in user:
89
- api_token = get_login_token(api_token_gr)
90
  ##SMY: Strictly debug. Must not be live
91
  logger.log(level=30, msg="Commencing: get_login_token", extra={"api_token]": api_token, "api_token_gr": api_token_gr})
92
 
@@ -262,8 +263,9 @@ def convert_batch(
262
  #return "\n".join(logs), "\n".join(logs_files_images) #"\n".join(logs_files)
263
  #return logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return
264
  #return gr.update(interactive=True), gr.update(value=logs_return_formatted_json_string), gr.update(value=logs_return_formatted_json_string, visible=True), gr.update(value=logs_files_images_return, visible=True)
265
- yield gr.update(interactive=True), gr.update(), gr.update(visible=True), gr.update(visible=True)
266
- yield gr.update(interactive=True), logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return
 
267
 
268
  except Exception as exc:
269
  tb = traceback.format_exc()
@@ -450,12 +452,6 @@ def build_interface() -> gr.Blocks:
450
 
451
  return updated_files, message
452
 
453
- def clear_state():
454
- """
455
- Clears the accumulated state of uloaded file list, output textbox, files and directory upload.
456
- """
457
- return [], "Files list cleared.", [], []
458
-
459
  # with gr.Blocks(title=TITLE) as demo
460
  with gr.Blocks(title=TITLE, css=custom_css) as demo:
461
  gr.Markdown(f"## {DESCRIPTION}")
@@ -584,11 +580,12 @@ def build_interface() -> gr.Blocks:
584
 
585
  with gr.Accordion("πŸ€— HuggingFace Client Logout", open=True): #, open=False):
586
  # Logout controls
587
-
588
- logout_status = gr.Markdown(visible=True) #visible=False)
589
  with gr.Row():
590
- hf_login_logout_btn = gr.LoginButton(value="Sign in to HuggingFace πŸ€—", logout_value="Clear Session & Logout of HF: ({})", variant="huggingface")
591
- #logout_btn = gr.Button("Logout from session and Hugging Face (inference) Client", variant="stop", )
 
 
 
592
 
593
  # The gr.State component to hold the accumulated list of files
594
  uploaded_file_list = gr.State([]) ##NB: initial value of `gr.State` must be able to be deepcopied
@@ -759,7 +756,35 @@ def build_interface() -> gr.Blocks:
759
  )
760
  hf_provider_dd.change(on_provider_change, inputs=hf_provider_dd, outputs=None)
761
 
 
762
  # HuggingFace Client Logout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  def do_logout_hf():
764
  try:
765
  ok = docconverter.client.logout()
@@ -772,18 +797,9 @@ def build_interface() -> gr.Blocks:
772
  msg = "⚠️ Logout. No HF session"
773
  return msg
774
  #yield msg ## generator for string
775
- '''def get_login_token(state_api_token_arg, oauth_token: gr.OAuthToken | None=None):
776
- #oauth_token = get_token() if oauth_token is not None else state_api_token
777
- #oauth_token = oauth_token if oauth_token else state_api_token_arg
778
- if oauth_token:
779
- print(oauth_token)
780
- return oauth_token
781
- else:
782
- oauth_token = get_token()
783
- print(oauth_token)
784
- return oauth_token'''
785
 
786
- def custom_do_logout(hf_login_logout_btn_arg: gr.LoginButton, state_api_token_arg: gr.State):
 
787
  #global state_api_token
788
  ''' ##SMY: TO DELETE
789
  try:
@@ -797,12 +813,25 @@ def build_interface() -> gr.Blocks:
797
  msg = do_logout_hf()
798
  ##debug
799
  #msg = "βœ… Session Cleared. Remember to close browser." if "Clear Session & Logout of HF" in hf_login_logout_btn else "⚠️ Logout" # & Session Cleared"
800
- return gr.update(value="Sign in to HuggingFace πŸ€—"), gr.update(value=""), gr.update(visible=True, value=msg), state_api_token_arg
801
  #yield gr.update(value="Sign in to HuggingFace πŸ€—"), gr.update(value=""), gr.update(visible=True, value=msg)
802
 
 
 
 
 
 
 
 
 
 
 
803
  #hf_login_logout_btn.click(fn=custom_do_logout, inputs=None, outputs=hf_login_logout_btn)
804
- hf_login_logout_btn.click(fn=custom_do_logout, inputs=[hf_login_logout_btn, state_api_token], outputs=[hf_login_logout_btn, api_token_tb, logout_status, state_api_token])
805
- #logout_btn.click(fn=do_logout, inputs=None, outputs=[api_token_tb, logout_status, hf_login_logout_btn, logout_btn])
 
 
 
806
 
807
  # --- PDF & HTML β†’ Markdown tab ---
808
  # Event handler for the multiple file upload button
 
43
  oauth_token = oauth_token
44
  else: get_token()
45
 
46
+ return oauth_token.token ##token value
47
 
48
  # pool executor to convert files called by Gradio
49
  ##SMY: TODO: future: refactor to gradio_process.py
 
74
  use_llm: bool = False, #Optional[bool] = False, #True,
75
  page_range: str = None, #Optional[str] = None,
76
  tz_hours: str = None,
77
+ oauth_token: gr.OAuthToken | None=None,
78
  ): #-> str:
79
  """
80
  Handles the conversion process using multiprocessing.
 
87
  yield gr.update(interactive=False), f"Commencing Processing ... Getting login", {"process": "Commencing Processing"}, f"__init__.py"
88
 
89
  # get token from logged-in user:
90
+ api_token = get_login_token(api_token_arg=api_token_gr, oauth_token=oauth_token)
91
  ##SMY: Strictly debug. Must not be live
92
  logger.log(level=30, msg="Commencing: get_login_token", extra={"api_token]": api_token, "api_token_gr": api_token_gr})
93
 
 
263
  #return "\n".join(logs), "\n".join(logs_files_images) #"\n".join(logs_files)
264
  #return logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return
265
  #return gr.update(interactive=True), gr.update(value=logs_return_formatted_json_string), gr.update(value=logs_return_formatted_json_string, visible=True), gr.update(value=logs_files_images_return, visible=True)
266
+ #yield gr.update(interactive=True), gr.update(), gr.update(visible=True), gr.update(visible=True)
267
+ #yield gr.update(interactive=True), logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return
268
+ return gr.update(interactive=True), logs_return_formatted_json_string, logs_return_formatted_json_string, logs_files_images_return
269
 
270
  except Exception as exc:
271
  tb = traceback.format_exc()
 
452
 
453
  return updated_files, message
454
 
 
 
 
 
 
 
455
  # with gr.Blocks(title=TITLE) as demo
456
  with gr.Blocks(title=TITLE, css=custom_css) as demo:
457
  gr.Markdown(f"## {DESCRIPTION}")
 
580
 
581
  with gr.Accordion("πŸ€— HuggingFace Client Logout", open=True): #, open=False):
582
  # Logout controls
 
 
583
  with gr.Row():
584
+ #hf_login_logout_btn = gr.LoginButton(value="Sign in to HuggingFace πŸ€—", logout_value="Clear Session & Logout of HF: ({})", variant="huggingface")
585
+ hf_login_logout_btn = gr.LoginButton(value="Sign in to HuggingFace πŸ€—", logout_value="Logout of HF: ({}) πŸ€—", variant="huggingface")
586
+ #logout_btn = gr.Button("Logout from session & HF (inference) Client", variant="stop", )
587
+
588
+ logout_status_md = gr.Markdown(visible=True) #visible=False)
589
 
590
  # The gr.State component to hold the accumulated list of files
591
  uploaded_file_list = gr.State([]) ##NB: initial value of `gr.State` must be able to be deepcopied
 
756
  )
757
  hf_provider_dd.change(on_provider_change, inputs=hf_provider_dd, outputs=None)
758
 
759
+
760
  # HuggingFace Client Logout
761
+ '''def get_login_token(state_api_token_arg, oauth_token: gr.OAuthToken | None=None):
762
+ #oauth_token = get_token() if oauth_token is not None else state_api_token
763
+ #oauth_token = oauth_token if oauth_token else state_api_token_arg
764
+ if oauth_token:
765
+ print(oauth_token)
766
+ return oauth_token
767
+ else:
768
+ oauth_token = get_token()
769
+ print(oauth_token)
770
+ return oauth_token'''
771
+ #'''
772
+ def do_logout(): ##SMY: use with clear_state() as needed
773
+ try:
774
+ #ok = docextractor.client.logout()
775
+ ok = docconverter.client.logout()
776
+ # Reset token textbox on successful logout
777
+ #msg = "βœ… Logged out of HuggingFace and cleared tokens. Remember to log out of HuggingFace completely." if ok else "⚠️ Logout failed."
778
+ msg = "βœ… Session Cleared. Remember to close browser." if ok else "⚠️ HF client closing failed."
779
+
780
+ return msg
781
+ #return gr.update(value=""), gr.update(visible=True, value=msg), gr.update(value="Sign in to HuggingFace πŸ€—"), gr.update(value="Clear session")
782
+ except AttributeError:
783
+ msg = "⚠️ HF client closing failed."
784
+
785
+ return msg
786
+ #return gr.update(value=""), gr.update(visible=True, value=msg), gr.update(value="Sign in to HuggingFace πŸ€—"), gr.update(value="Clear session", interactive=False)
787
+ #'''
788
  def do_logout_hf():
789
  try:
790
  ok = docconverter.client.logout()
 
797
  msg = "⚠️ Logout. No HF session"
798
  return msg
799
  #yield msg ## generator for string
 
 
 
 
 
 
 
 
 
 
800
 
801
+ #def custom_do_logout(hf_login_logout_btn_arg: gr.LoginButton, state_api_token_arg: gr.State):
802
+ def custom_do_logout():
803
  #global state_api_token
804
  ''' ##SMY: TO DELETE
805
  try:
 
813
  msg = do_logout_hf()
814
  ##debug
815
  #msg = "βœ… Session Cleared. Remember to close browser." if "Clear Session & Logout of HF" in hf_login_logout_btn else "⚠️ Logout" # & Session Cleared"
816
+ return gr.update(value="Sign in to HuggingFace πŸ€—"), gr.update(value=""), gr.update(visible=True, value=msg) #, state_api_token_arg
817
  #yield gr.update(value="Sign in to HuggingFace πŸ€—"), gr.update(value=""), gr.update(visible=True, value=msg)
818
 
819
+ # Files, status, session clearing
820
+ def clear_state():
821
+ """
822
+ Clears the accumulated state of uploaded file list, output textbox, files and directory upload.
823
+ """
824
+ #msg = f"Files list cleared: {do_logout()}" ## use as needed
825
+ msg = f"Files list cleared."
826
+ yield [], msg, '', ''
827
+ #return [], f"Files list cleared.", [], []
828
+
829
  #hf_login_logout_btn.click(fn=custom_do_logout, inputs=None, outputs=hf_login_logout_btn)
830
+ ##unused
831
+ ###hf_login_logout_btn.click(fn=custom_do_logout, inputs=[hf_login_logout_btn, state_api_token], outputs=[hf_login_logout_btn, api_token_tb, logout_status_md, state_api_token])
832
+ ###logout_btn.click(fn=do_logout, inputs=None, outputs=[api_token_tb, logout_status_md, hf_login_logout_btn, logout_btn])
833
+ #logout_btn.click(fn=clear_state, inputs=None, outputs=[uploaded_file_list, output_textbox, log_output, api_token_tb])
834
+ hf_login_logout_btn.click(fn=custom_do_logout, inputs=None, outputs=[hf_login_logout_btn, api_token_tb, logout_status_md]) #, state_api_token])
835
 
836
  # --- PDF & HTML β†’ Markdown tab ---
837
  # Event handler for the multiple file upload button