Spaces:
Sleeping
Sleeping
baseline08_beta01.1_28Sept25: trying oauth_token: gr.OAuthToken
Browse files- main.py +1 -1
- ui/gradio_ui.py +140 -113
- utils/logger.py +9 -2
main.py
CHANGED
|
@@ -19,4 +19,4 @@ if __name__ == "__main__":
|
|
| 19 |
|
| 20 |
demo = build_interface()
|
| 21 |
#demo.launch(debug=True, show_error=True ,ssr_mode=True) #(share=True) # share=True for public link; remove in production
|
| 22 |
-
demo.launch(debug=True, show_error=True)
|
|
|
|
| 19 |
|
| 20 |
demo = build_interface()
|
| 21 |
#demo.launch(debug=True, show_error=True ,ssr_mode=True) #(share=True) # share=True for public link; remove in production
|
| 22 |
+
demo.launch(debug=True, show_error=True, ssr_mode=False)
|
ui/gradio_ui.py
CHANGED
|
@@ -33,6 +33,18 @@ pdf2md_converter = PdfToMarkdownConverter()
|
|
| 33 |
#html2md_converter = HtmlToMarkdownConverter()
|
| 34 |
#md2pdf_converter = MarkdownToPdfConverter()
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# pool executor to convert files called by Gradio
|
| 37 |
##SMY: TODO: future: refactor to gradio_process.py
|
| 38 |
def convert_batch(
|
|
@@ -49,7 +61,7 @@ def convert_batch(
|
|
| 49 |
temperature: float,
|
| 50 |
top_p: float,
|
| 51 |
stream: bool,
|
| 52 |
-
|
| 53 |
#max_workers: int,
|
| 54 |
#max_retries: int,
|
| 55 |
openai_base_url: str = "https://router.huggingface.co/v1",
|
|
@@ -70,13 +82,26 @@ def convert_batch(
|
|
| 70 |
Receives Gradio component values, starting with the list of uploaded file paths
|
| 71 |
"""
|
| 72 |
|
| 73 |
-
#
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
# Update the Gradio UI to improve user-friendly eXperience
|
| 77 |
-
#outputs=[process_button, log_output, files_individual_JSON, files_individual_downloads],
|
| 78 |
-
yield gr.update(interactive=False), f"Processing files...", {"process": "Processing files"}, f"__init__.py"
|
| 79 |
-
|
| 80 |
## debug
|
| 81 |
#logger.log(level=30, msg="pdf_files_inputs", extra={"input_arg[0]:": pdf_files[0]})
|
| 82 |
|
|
@@ -123,22 +148,6 @@ def convert_batch(
|
|
| 123 |
)
|
| 124 |
|
| 125 |
#global docextractor ##SMY: deprecated.
|
| 126 |
-
try:
|
| 127 |
-
##SMY: might deprecate. To replace with oauth login from Gradio ui or integrate cleanly.
|
| 128 |
-
login_huggingface(api_token) ## attempt login if not already logged in. NB: HF CLI login prompt would not display in Process Worker.
|
| 129 |
-
|
| 130 |
-
if is_loggedin_huggingface() and (api_token is None or api_token == ""):
|
| 131 |
-
api_token = get_token()
|
| 132 |
-
else:
|
| 133 |
-
login_huggingface()
|
| 134 |
-
# login: Update the Gradio UI to improve user-friendly eXperience
|
| 135 |
-
yield gr.update(interactive=False), f"login to HF: Processing files...", {"process": "Processing files"}, f"__init__.py"
|
| 136 |
-
|
| 137 |
-
except Exception as exc: # Catch all exceptions
|
| 138 |
-
tb = traceback.format_exc()
|
| 139 |
-
logger.exception(f"β Error during login_huggingface β {exc}\n{tb}", exc_info=True) # Log the full traceback
|
| 140 |
-
return gr.update(interactive=True), f"β An error occurred during login_huggingface β {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py" # return the exception message
|
| 141 |
-
|
| 142 |
try:
|
| 143 |
# Create a pool with init_worker initialiser
|
| 144 |
with ProcessPoolExecutor(
|
|
@@ -148,10 +157,13 @@ def convert_batch(
|
|
| 148 |
) as pool:
|
| 149 |
#global docextractor
|
| 150 |
logger.log(level=30, msg="Initialising ProcessPool: pool:", extra={"pdf_files": pdf_files, "files_len": len(pdf_files), "model_id": model_id, "output_dir": output_dir_string}) #pdf_files_count
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
# Map the files (pdf_files) to the conversion function (pdf2md_converter.convert_file)
|
| 153 |
# The 'docconverter' argument is implicitly handled by the initialiser
|
| 154 |
-
|
| 155 |
#futures = [pool.map(pdf2md_converter.convert_files, f) for f in pdf_files]
|
| 156 |
#logs = [f.result() for f in as_completed(futures)]
|
| 157 |
#futures = [pool.submit(pdf2md_converter.convert_files, file) for file in pdf_files]
|
|
@@ -511,18 +523,6 @@ def build_interface() -> gr.Blocks:
|
|
| 511 |
allow_custom_value=True, # let users type new providers as they appear
|
| 512 |
)
|
| 513 |
|
| 514 |
-
# Validate provider on change; warn but allow continue
|
| 515 |
-
def on_provider_change(provider_value: str):
|
| 516 |
-
if not provider_value:
|
| 517 |
-
return
|
| 518 |
-
if not is_valid_provider(provider_value):
|
| 519 |
-
sug = suggest_providers(provider_value)
|
| 520 |
-
extra = f" Suggestions: {', '.join(sug)}." if sug else ""
|
| 521 |
-
gr.Warning(
|
| 522 |
-
f"Provider not on HF provider list. See https://huggingface.co/docs/inference-providers/index.{extra}"
|
| 523 |
-
)
|
| 524 |
-
hf_provider_dd.change(on_provider_change, inputs=hf_provider_dd, outputs=None)
|
| 525 |
-
|
| 526 |
# Clean UI: Model parameters hidden in collapsible accordion
|
| 527 |
with gr.Accordion("βοΈ Marker Settings", open=False):
|
| 528 |
gr.Markdown(f"#### **Marker Configuration**")
|
|
@@ -576,64 +576,14 @@ def build_interface() -> gr.Blocks:
|
|
| 576 |
max_lines=1,
|
| 577 |
)
|
| 578 |
|
| 579 |
-
# Initialise gr.State
|
| 580 |
-
state_max_workers = gr.State(4) #max_workers_sl,
|
| 581 |
-
state_max_retries = gr.State(2) #max_retries_sl,
|
| 582 |
-
state_tz_hours = gr.State(value=None)
|
| 583 |
-
|
| 584 |
-
def update_state_stored_value(new_component_input):
|
| 585 |
-
""" Updates stored state: use for max_workers and max_retries """
|
| 586 |
-
return new_component_input
|
| 587 |
-
|
| 588 |
-
# Update gr.State values on slider components change. NB: initial value of `gr.State` must be able to be deepcopied
|
| 589 |
-
max_workers_sl.change(update_state_stored_value, inputs=max_workers_sl, outputs=state_max_workers)
|
| 590 |
-
max_retries_sl.change(update_state_stored_value, inputs=max_retries_sl, outputs=state_max_retries)
|
| 591 |
-
tz_hours_num.change(update_state_stored_value, inputs=tz_hours_num, outputs=state_tz_hours)
|
| 592 |
-
|
| 593 |
|
| 594 |
with gr.Accordion("π€ HuggingFace Client Logout", open=True): #, open=False):
|
| 595 |
# Logout controls
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
#ok = docextractor.client.logout()
|
| 599 |
-
ok = docconverter.client.logout()
|
| 600 |
-
# Reset token textbox on successful logout
|
| 601 |
-
#msg = "β
Logged out of HuggingFace and cleared tokens. Remember to log out of HuggingFace completely." if ok else "β οΈ Logout failed."
|
| 602 |
-
msg = "β
Session Cleared. Remember to browser." if ok else "β οΈ Logout failed."
|
| 603 |
-
return gr.update(value=""), gr.update(visible=True, value=msg), gr.update(value="Sign in to HuggingFace π€"), gr.update(value="Clear session")
|
| 604 |
-
except AttributeError:
|
| 605 |
-
msg = "β οΈ Logout."
|
| 606 |
-
return gr.update(value=""), gr.update(visible=True, value=msg), gr.update(value="Sign in to HuggingFace π€"), gr.update(value="Clear session", interactive=False)
|
| 607 |
-
'''
|
| 608 |
-
def do_logout_hf():
|
| 609 |
-
try:
|
| 610 |
-
ok = docconverter.client.logout()
|
| 611 |
-
# Reset token textbox on successful logout
|
| 612 |
-
msg = "β
Session Cleared. Remember to close browser." if ok else "β οΈ Logout & Session Cleared"
|
| 613 |
-
#return gr.update(value=""), gr.update(visible=True, value=msg), gr.update(value="Sign in to HuggingFace π€"), gr.update(value="Clear session", interactive=False)
|
| 614 |
-
#return msg
|
| 615 |
-
yield msg
|
| 616 |
-
except AttributeError:
|
| 617 |
-
msg = "β οΈ Logout. No HF session"
|
| 618 |
-
#return msg
|
| 619 |
-
yield msg
|
| 620 |
-
|
| 621 |
-
def custom_do_logout():
|
| 622 |
-
#do_logout()
|
| 623 |
-
#return gr.update(value="Sign in to HuggingFace π€")
|
| 624 |
-
msg = do_logout_hf()
|
| 625 |
-
#return gr.update(value="Sign in to HuggingFace π€"), gr.update(value=""), gr.update(visible=True, value=msg)
|
| 626 |
-
yield gr.update(value="Sign in to HuggingFace π€"), gr.update(value=""), gr.update(visible=True, value=msg)
|
| 627 |
-
|
| 628 |
-
logout_status = gr.Markdown(visible=False)
|
| 629 |
with gr.Row():
|
| 630 |
hf_login_logout_btn = gr.LoginButton(value="Sign in to HuggingFace π€", logout_value="Clear Session & Logout of HF: ({})", variant="huggingface")
|
| 631 |
#logout_btn = gr.Button("Logout from session and Hugging Face (inference) Client", variant="stop", )
|
| 632 |
-
|
| 633 |
-
#hf_login_logout_btn.click(fn=custom_do_logout, inputs=None, outputs=hf_login_logout_btn)
|
| 634 |
-
hf_login_logout_btn.click(fn=custom_do_logout, inputs=None, outputs=[hf_login_logout_btn, api_token_tb, logout_status])
|
| 635 |
-
#logout_btn.click(fn=do_logout, inputs=None, outputs=[api_token_tb, logout_status, hf_login_logout_btn, logout_btn])
|
| 636 |
-
|
| 637 |
|
| 638 |
# The gr.State component to hold the accumulated list of files
|
| 639 |
uploaded_file_list = gr.State([]) ##NB: initial value of `gr.State` must be able to be deepcopied
|
|
@@ -686,26 +636,6 @@ def build_interface() -> gr.Blocks:
|
|
| 686 |
process_button = gr.Button("Process All Uploaded Files", variant="primary")
|
| 687 |
clear_button = gr.Button("Clear All Uploads", variant="secondary")
|
| 688 |
|
| 689 |
-
# Event handler for the multiple file upload button
|
| 690 |
-
file_btn.upload(
|
| 691 |
-
fn=accumulate_files,
|
| 692 |
-
inputs=[file_btn, uploaded_file_list],
|
| 693 |
-
outputs=[uploaded_file_list, output_textbox]
|
| 694 |
-
)
|
| 695 |
-
|
| 696 |
-
# Event handler for the directory upload button
|
| 697 |
-
dir_btn.upload(
|
| 698 |
-
fn=accumulate_files,
|
| 699 |
-
inputs=[dir_btn, uploaded_file_list],
|
| 700 |
-
outputs=[uploaded_file_list, output_textbox]
|
| 701 |
-
)
|
| 702 |
-
|
| 703 |
-
# Event handler for the "Clear" button
|
| 704 |
-
clear_button.click(
|
| 705 |
-
fn=clear_state,
|
| 706 |
-
inputs=None,
|
| 707 |
-
outputs=[uploaded_file_list, output_textbox, file_btn, dir_btn],
|
| 708 |
-
)
|
| 709 |
|
| 710 |
# --- PDF β Markdown tab ---
|
| 711 |
with gr.Tab(" π PDF β Markdown (Flag for DEPRECATION)", interactive=False, visible=True): #False
|
|
@@ -773,8 +703,7 @@ def build_interface() -> gr.Blocks:
|
|
| 773 |
|
| 774 |
# A Files component to display individual processed files as download links
|
| 775 |
with gr.Accordion("β¬ View and Download processed files", open=True): #, open=False
|
| 776 |
-
|
| 777 |
-
|
| 778 |
##SMY: future
|
| 779 |
zip_btn = gr.DownloadButton("Download Zip file of all processed files", visible=False) #.Button()
|
| 780 |
|
|
@@ -793,7 +722,105 @@ def build_interface() -> gr.Blocks:
|
|
| 793 |
#max_lines=25,
|
| 794 |
#interactive=False
|
| 795 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 797 |
# file inputs
|
| 798 |
## [wierd] NB: inputs_arg is a list of Gradio component objects, not the values of those components.
|
| 799 |
## inputs_arg variable captures the state of these components at the time the list is created.
|
|
@@ -819,7 +846,7 @@ def build_interface() -> gr.Blocks:
|
|
| 819 |
temperature_sl,
|
| 820 |
top_p_sl,
|
| 821 |
stream_cb,
|
| 822 |
-
api_token_tb,
|
| 823 |
#gr.State(4), # max_workers
|
| 824 |
#gr.State(3), # max_retries
|
| 825 |
openai_base_url_tb,
|
|
@@ -830,7 +857,7 @@ def build_interface() -> gr.Blocks:
|
|
| 830 |
output_dir_tb,
|
| 831 |
use_llm_cb,
|
| 832 |
page_range_tb,
|
| 833 |
-
tz_hours_num,
|
| 834 |
]
|
| 835 |
|
| 836 |
## debug
|
|
|
|
| 33 |
#html2md_converter = HtmlToMarkdownConverter()
|
| 34 |
#md2pdf_converter = MarkdownToPdfConverter()
|
| 35 |
|
| 36 |
+
|
| 37 |
+
def get_login_token( api_token_arg, oauth_token: gr.OAuthToken | None=None,):
|
| 38 |
+
""" Use user's supplied token or Get token from logged-in users, else from token stored on the machine. Return token"""
|
| 39 |
+
#oauth_token = get_token() if oauth_token is not None else api_token_arg
|
| 40 |
+
if api_token_arg != '': # or not None: #| None:
|
| 41 |
+
oauth_token = api_token_arg
|
| 42 |
+
elif oauth_token:
|
| 43 |
+
oauth_token = oauth_token
|
| 44 |
+
else: get_token()
|
| 45 |
+
|
| 46 |
+
return oauth_token
|
| 47 |
+
|
| 48 |
# pool executor to convert files called by Gradio
|
| 49 |
##SMY: TODO: future: refactor to gradio_process.py
|
| 50 |
def convert_batch(
|
|
|
|
| 61 |
temperature: float,
|
| 62 |
top_p: float,
|
| 63 |
stream: bool,
|
| 64 |
+
api_token_gr: str,
|
| 65 |
#max_workers: int,
|
| 66 |
#max_retries: int,
|
| 67 |
openai_base_url: str = "https://router.huggingface.co/v1",
|
|
|
|
| 82 |
Receives Gradio component values, starting with the list of uploaded file paths
|
| 83 |
"""
|
| 84 |
|
| 85 |
+
# get token from logged-in user:
|
| 86 |
+
api_token = get_login_token(api_token_gr)
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
##SMY: might deprecate. To replace with oauth login from Gradio ui or integrate cleanly.
|
| 90 |
+
login_huggingface(api_token) ## attempt login if not already logged in. NB: HF CLI login prompt would not display in Process Worker.
|
| 91 |
+
|
| 92 |
+
if is_loggedin_huggingface() and (api_token is None or api_token == ""):
|
| 93 |
+
api_token = get_token() ##SMY: might be redundant
|
| 94 |
+
else:
|
| 95 |
+
login_huggingface()
|
| 96 |
+
# login: Update the Gradio UI to improve user-friendly eXperience
|
| 97 |
+
yield gr.update(interactive=False), f"login to HF: Processing files...", {"process": "Processing files"}, f"__init__.py"
|
| 98 |
+
|
| 99 |
+
except Exception as exc: # Catch all exceptions
|
| 100 |
+
tb = traceback.format_exc()
|
| 101 |
+
logger.exception(f"β Error during login_huggingface β {exc}\n{tb}", exc_info=True) # Log the full traceback
|
| 102 |
+
return gr.update(interactive=True), f"β An error occurred during login_huggingface β {exc}\n{tb}", {"Error":f"Error: {exc}"}, f"__init__.py" # return the exception message
|
| 103 |
+
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
## debug
|
| 106 |
#logger.log(level=30, msg="pdf_files_inputs", extra={"input_arg[0]:": pdf_files[0]})
|
| 107 |
|
|
|
|
| 148 |
)
|
| 149 |
|
| 150 |
#global docextractor ##SMY: deprecated.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
try:
|
| 152 |
# Create a pool with init_worker initialiser
|
| 153 |
with ProcessPoolExecutor(
|
|
|
|
| 157 |
) as pool:
|
| 158 |
#global docextractor
|
| 159 |
logger.log(level=30, msg="Initialising ProcessPool: pool:", extra={"pdf_files": pdf_files, "files_len": len(pdf_files), "model_id": model_id, "output_dir": output_dir_string}) #pdf_files_count
|
| 160 |
+
|
| 161 |
+
# Update the Gradio UI to improve user-friendly eXperience
|
| 162 |
+
#outputs=[process_button, log_output, files_individual_JSON, files_individual_downloads],
|
| 163 |
+
yield gr.update(interactive=False), f"Starting ProcessPool: Processing files...", {"process": "Processing files ..."}, f"__init__.py"
|
| 164 |
+
|
| 165 |
# Map the files (pdf_files) to the conversion function (pdf2md_converter.convert_file)
|
| 166 |
# The 'docconverter' argument is implicitly handled by the initialiser
|
|
|
|
| 167 |
#futures = [pool.map(pdf2md_converter.convert_files, f) for f in pdf_files]
|
| 168 |
#logs = [f.result() for f in as_completed(futures)]
|
| 169 |
#futures = [pool.submit(pdf2md_converter.convert_files, file) for file in pdf_files]
|
|
|
|
| 523 |
allow_custom_value=True, # let users type new providers as they appear
|
| 524 |
)
|
| 525 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 526 |
# Clean UI: Model parameters hidden in collapsible accordion
|
| 527 |
with gr.Accordion("βοΈ Marker Settings", open=False):
|
| 528 |
gr.Markdown(f"#### **Marker Configuration**")
|
|
|
|
| 576 |
max_lines=1,
|
| 577 |
)
|
| 578 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 579 |
|
| 580 |
with gr.Accordion("π€ HuggingFace Client Logout", open=True): #, open=False):
|
| 581 |
# Logout controls
|
| 582 |
+
|
| 583 |
+
logout_status = gr.Markdown(visible=True) #visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
with gr.Row():
|
| 585 |
hf_login_logout_btn = gr.LoginButton(value="Sign in to HuggingFace π€", logout_value="Clear Session & Logout of HF: ({})", variant="huggingface")
|
| 586 |
#logout_btn = gr.Button("Logout from session and Hugging Face (inference) Client", variant="stop", )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
|
| 588 |
# The gr.State component to hold the accumulated list of files
|
| 589 |
uploaded_file_list = gr.State([]) ##NB: initial value of `gr.State` must be able to be deepcopied
|
|
|
|
| 636 |
process_button = gr.Button("Process All Uploaded Files", variant="primary")
|
| 637 |
clear_button = gr.Button("Clear All Uploads", variant="secondary")
|
| 638 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
# --- PDF β Markdown tab ---
|
| 641 |
with gr.Tab(" π PDF β Markdown (Flag for DEPRECATION)", interactive=False, visible=True): #False
|
|
|
|
| 703 |
|
| 704 |
# A Files component to display individual processed files as download links
|
| 705 |
with gr.Accordion("β¬ View and Download processed files", open=True): #, open=False
|
| 706 |
+
|
|
|
|
| 707 |
##SMY: future
|
| 708 |
zip_btn = gr.DownloadButton("Download Zip file of all processed files", visible=False) #.Button()
|
| 709 |
|
|
|
|
| 722 |
#max_lines=25,
|
| 723 |
#interactive=False
|
| 724 |
)
|
| 725 |
+
|
| 726 |
+
# Initialise gr.State
|
| 727 |
+
state_max_workers = gr.State(4) #max_workers_sl,
|
| 728 |
+
state_max_retries = gr.State(2) #max_retries_sl,
|
| 729 |
+
state_tz_hours = gr.State(value=None)
|
| 730 |
+
state_api_token = gr.State(None)
|
| 731 |
+
processed_file_state = gr.State([]) ##SMY: future: View and Download processed files
|
| 732 |
+
|
| 733 |
+
|
| 734 |
+
def update_state_stored_value(new_component_input):
|
| 735 |
+
""" Updates stored state: use for max_workers and max_retries """
|
| 736 |
+
return new_component_input
|
| 737 |
|
| 738 |
+
# Update gr.State values on slider components change. NB: initial value of `gr.State` must be able to be deepcopied
|
| 739 |
+
max_workers_sl.change(update_state_stored_value, inputs=max_workers_sl, outputs=state_max_workers)
|
| 740 |
+
max_retries_sl.change(update_state_stored_value, inputs=max_retries_sl, outputs=state_max_retries)
|
| 741 |
+
tz_hours_num.change(update_state_stored_value, inputs=tz_hours_num, outputs=state_tz_hours)
|
| 742 |
+
api_token_tb.change(update_state_stored_value, inputs=api_token_tb, outputs=state_api_token)
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
# LLM Setting: Validate provider on change; warn but allow continue
|
| 746 |
+
def on_provider_change(provider_value: str):
|
| 747 |
+
if not provider_value:
|
| 748 |
+
return
|
| 749 |
+
if not is_valid_provider(provider_value):
|
| 750 |
+
sug = suggest_providers(provider_value)
|
| 751 |
+
extra = f" Suggestions: {', '.join(sug)}." if sug else ""
|
| 752 |
+
gr.Warning(
|
| 753 |
+
f"Provider not on HF provider list. See https://huggingface.co/docs/inference-providers/index.{extra}"
|
| 754 |
+
)
|
| 755 |
+
hf_provider_dd.change(on_provider_change, inputs=hf_provider_dd, outputs=None)
|
| 756 |
+
|
| 757 |
+
# HuggingFace Client Logout
|
| 758 |
+
def do_logout_hf():
|
| 759 |
+
try:
|
| 760 |
+
ok = docconverter.client.logout()
|
| 761 |
+
# Reset token textbox on successful logout
|
| 762 |
+
msg = "β
Session Cleared. Remember to close browser." if ok else "β οΈ Logout & Session Cleared"
|
| 763 |
+
#return gr.update(value=""), gr.update(visible=True, value=msg), gr.update(value="Sign in to HuggingFace π€"), gr.update(value="Clear session", interactive=False)
|
| 764 |
+
return msg
|
| 765 |
+
#yield msg ## generator for string
|
| 766 |
+
except AttributeError:
|
| 767 |
+
msg = "β οΈ Logout. No HF session"
|
| 768 |
+
return msg
|
| 769 |
+
#yield msg ## generator for string
|
| 770 |
+
'''def get_login_token(state_api_token_arg, oauth_token: gr.OAuthToken | None=None):
|
| 771 |
+
#oauth_token = get_token() if oauth_token is not None else state_api_token
|
| 772 |
+
#oauth_token = oauth_token if oauth_token else state_api_token_arg
|
| 773 |
+
if oauth_token:
|
| 774 |
+
print(oauth_token)
|
| 775 |
+
return oauth_token
|
| 776 |
+
else:
|
| 777 |
+
oauth_token = get_token()
|
| 778 |
+
print(oauth_token)
|
| 779 |
+
return oauth_token'''
|
| 780 |
+
|
| 781 |
+
def custom_do_logout(hf_login_logout_btn_arg: gr.LoginButton, state_api_token_arg: gr.State):
|
| 782 |
+
#global state_api_token
|
| 783 |
+
''' ##SMY: TO DELETE
|
| 784 |
+
try:
|
| 785 |
+
state_api_token_get= get_token() if "Clear Session & Logout of HF" in hf_login_logout_btn_arg.value else state_api_token_arg.value
|
| 786 |
+
except AttributeError:
|
| 787 |
+
#state_api_token_get= get_token() if "Clear Session & Logout of HF" in hf_login_logout_btn_arg else state_api_token_arg
|
| 788 |
+
state_api_token_get = get_login_token(state_api_token_arg)
|
| 789 |
+
'''
|
| 790 |
+
#do_logout()
|
| 791 |
+
#return gr.update(value="Sign in to HuggingFace π€")
|
| 792 |
+
msg = do_logout_hf()
|
| 793 |
+
##debug
|
| 794 |
+
#msg = "β
Session Cleared. Remember to close browser." if "Clear Session & Logout of HF" in hf_login_logout_btn else "β οΈ Logout" # & Session Cleared"
|
| 795 |
+
return gr.update(value="Sign in to HuggingFace π€"), gr.update(value=""), gr.update(visible=True, value=msg), state_api_token_get
|
| 796 |
+
#yield gr.update(value="Sign in to HuggingFace π€"), gr.update(value=""), gr.update(visible=True, value=msg)
|
| 797 |
+
|
| 798 |
+
#hf_login_logout_btn.click(fn=custom_do_logout, inputs=None, outputs=hf_login_logout_btn)
|
| 799 |
+
hf_login_logout_btn.click(fn=custom_do_logout, inputs=[hf_login_logout_btn, state_api_token], outputs=[hf_login_logout_btn, api_token_tb, logout_status, state_api_token])
|
| 800 |
+
#logout_btn.click(fn=do_logout, inputs=None, outputs=[api_token_tb, logout_status, hf_login_logout_btn, logout_btn])
|
| 801 |
+
|
| 802 |
+
# --- PDF & HTML β Markdown tab ---
|
| 803 |
+
# Event handler for the multiple file upload button
|
| 804 |
+
file_btn.upload(
|
| 805 |
+
fn=accumulate_files,
|
| 806 |
+
inputs=[file_btn, uploaded_file_list],
|
| 807 |
+
outputs=[uploaded_file_list, output_textbox]
|
| 808 |
+
)
|
| 809 |
+
|
| 810 |
+
# Event handler for the directory upload button
|
| 811 |
+
dir_btn.upload(
|
| 812 |
+
fn=accumulate_files,
|
| 813 |
+
inputs=[dir_btn, uploaded_file_list],
|
| 814 |
+
outputs=[uploaded_file_list, output_textbox]
|
| 815 |
+
)
|
| 816 |
+
|
| 817 |
+
# Event handler for the "Clear" button
|
| 818 |
+
clear_button.click(
|
| 819 |
+
fn=clear_state,
|
| 820 |
+
inputs=None,
|
| 821 |
+
outputs=[uploaded_file_list, output_textbox, file_btn, dir_btn],
|
| 822 |
+
)
|
| 823 |
+
|
| 824 |
# file inputs
|
| 825 |
## [wierd] NB: inputs_arg is a list of Gradio component objects, not the values of those components.
|
| 826 |
## inputs_arg variable captures the state of these components at the time the list is created.
|
|
|
|
| 846 |
temperature_sl,
|
| 847 |
top_p_sl,
|
| 848 |
stream_cb,
|
| 849 |
+
api_token_tb, #state_api_token, #api_token_tb,
|
| 850 |
#gr.State(4), # max_workers
|
| 851 |
#gr.State(3), # max_retries
|
| 852 |
openai_base_url_tb,
|
|
|
|
| 857 |
output_dir_tb,
|
| 858 |
use_llm_cb,
|
| 859 |
page_range_tb,
|
| 860 |
+
tz_hours_num, #state_tz_hours
|
| 861 |
]
|
| 862 |
|
| 863 |
## debug
|
utils/logger.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
import sys
|
| 6 |
-
from datetime import datetime, timezone
|
| 7 |
|
| 8 |
''' ##SMY: discarded
|
| 9 |
def get_logger(name: str) -> logging.Logger:
|
|
@@ -26,10 +26,17 @@ def get_logger(name: str) -> logging.Logger:
|
|
| 26 |
class JsonFormatter(logging.Formatter):
|
| 27 |
"""Minimal JSON formatter for structured logs."""
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
def format(self, record: logging.LogRecord) -> str: #
|
| 30 |
payload = {
|
| 31 |
#"ts": datetime.now(timezone.utc).isoformat(), ## default to 'YYYY-MM-DD HH:MM:SS.mmmmmm',
|
| 32 |
-
"ts": datetime.now(timezone.utc).strftime("%Y-%m-%d : %H:%M:%S"), ## SMY: interested in datefmt="%H:%M:%S",
|
|
|
|
| 33 |
"level": record.levelname,
|
| 34 |
"logger": record.name,
|
| 35 |
"message": record.getMessage(),
|
|
|
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
import sys
|
| 6 |
+
from datetime import datetime, timezone, timedelta
|
| 7 |
|
| 8 |
''' ##SMY: discarded
|
| 9 |
def get_logger(name: str) -> logging.Logger:
|
|
|
|
| 26 |
class JsonFormatter(logging.Formatter):
|
| 27 |
"""Minimal JSON formatter for structured logs."""
|
| 28 |
|
| 29 |
+
def __init__(self, tz_hours=None, date_format:str="%Y-%m-%d : %H:%M:%S"):
|
| 30 |
+
##SMY: TODO: local time
|
| 31 |
+
self.tz_hours = tz_hours if tz_hours else 0
|
| 32 |
+
self.date_format = date_format
|
| 33 |
+
self.time = datetime.now(timezone.utc) + timedelta(hours=tz_hours if tz_hours else 0) #if tz_hours else self.time.utcoffset() # tzinfo=timezone(timedelta(hours=tz_hours))
|
| 34 |
+
|
| 35 |
def format(self, record: logging.LogRecord) -> str: #
|
| 36 |
payload = {
|
| 37 |
#"ts": datetime.now(timezone.utc).isoformat(), ## default to 'YYYY-MM-DD HH:MM:SS.mmmmmm',
|
| 38 |
+
#"ts": datetime.now(timezone.utc).strftime("%Y-%m-%d : %H:%M:%S"), ## SMY: interested in datefmt="%H:%M:%S",
|
| 39 |
+
"ts": self.time.strftime(self.date_format), ## SMY: interested in datefmt="%H:%M:%S",
|
| 40 |
"level": record.levelname,
|
| 41 |
"logger": record.name,
|
| 42 |
"message": record.getMessage(),
|