semmyk commited on
Commit
8df8835
Β·
1 Parent(s): 1510c2c

rebaseline07_21Sept25_01 - Errno13 log file permission - HF logout gr.LoginButton

Browse files
file_handler/file_utils.py CHANGED
@@ -25,7 +25,7 @@ def create_outputdir(root: Union[str, Path], output_dir_string:str = None) -> Pa
25
  ## map to img_path. Opt to putting output within same output_md folder rather than individual source folders
26
  output_dir_string = output_dir_string if output_dir_string else "output_dir" ##redundant SMY: default to outputdir in config file = "output_md"
27
  output_dir = Path("data") / output_dir_string #"output_md" ##SMY: concatenating output str with src Path
28
- output_dir.mkdir(mode=0o2644, parents=True, exist_ok=True)
29
  return output_dir
30
 
31
  def find_file(file_name: str) -> Path: #configparser.ConfigParser:
@@ -134,13 +134,12 @@ def check_create_logfile(filename: str, dir_path: Union[str, Path]="logs") -> Pa
134
  writable_dir = project_root / dir_path if isinstance(dir_path, str) else Path(dir_path)
135
 
136
  try:
137
-
138
- writable_dir.mkdir(mode=0o2644, parents=True, exist_ok=True)
139
  except PermissionError: ##[Errno 13] Permission denied: '/home/user/app/logs/app_logging_2025-09-18.log'
140
  warnings.warn("[Errno 13] Permission denied, possibly insufficient permission or Persistent Storage not enable: attempting chmod 0o2644")
141
  #writable_dir = Path(tempfile.gettempdir()) #
142
- writable_dir.mkdir(mode=0o2644, parents=True, exist_ok=True)
143
- writable_dir.chmod(0o2644)
144
  if not writable_dir.is_dir():
145
  warnings.warn(f"Working without log files in directory: {writable_dir}")
146
 
@@ -148,7 +147,7 @@ def check_create_logfile(filename: str, dir_path: Union[str, Path]="logs") -> Pa
148
  # `mkdir()` with `exist_ok=True` prevents a FileExistsError if the folder exists.
149
  logs_dir = writable_dir / dir_path #project_root / dir_path
150
  if not logs_dir.is_dir():
151
- logs_dir.mkdir(mode=0o2644, parents=True, exist_ok=True)
152
 
153
  # 4. Create log file with a timestamp inside the new logs directory.
154
  # This ensures a unique log file is created for the day the script runs.
@@ -196,7 +195,7 @@ def check_create_file(filename: str, dir_path: Union[str, Path]="logs") -> Path:
196
  # `exist_ok=True` prevents an error if the directory already exists.
197
  dir_path = project_root / dir_path
198
  if not dir_path.is_dir():
199
- dir_path.mkdir(parents=True, exist_ok=True, mode=0o2664) #, mode=0o2644)
200
  #dir_path.chmod(0)
201
 
202
  file_path = dir_path / filename # Concatenate directory and filename to get full path
@@ -344,7 +343,7 @@ def write_markdown(
344
  #md_path = Path("data\\pdf") / "output_md" / f"{src.stem}" / md_name ##debug
345
  md_path = Path("data") / output_dir / f"{src.stem}" / md_name ##debug
346
  ##SMY: [resolved] Permission Errno13 - https://stackoverflow.com/a/57454275
347
- md_path.parent.mkdir(mode=0o2644, parents=True, exist_ok=True) ##SMY: create nested md_path if not exists
348
  md_path.parent.chmod(0)
349
 
350
  try:
 
25
  ## map to img_path. Opt to putting output within same output_md folder rather than individual source folders
26
  output_dir_string = output_dir_string if output_dir_string else "output_dir" ##redundant SMY: default to outputdir in config file = "output_md"
27
  output_dir = Path("data") / output_dir_string #"output_md" ##SMY: concatenating output str with src Path
28
+ output_dir.mkdir(mode=0o2755, parents=True, exist_ok=True) #,mode=0o2755
29
  return output_dir
30
 
31
  def find_file(file_name: str) -> Path: #configparser.ConfigParser:
 
134
  writable_dir = project_root / dir_path if isinstance(dir_path, str) else Path(dir_path)
135
 
136
  try:
137
+ writable_dir.mkdir(mode=0o2755, parents=True, exist_ok=True)
 
138
  except PermissionError: ##[Errno 13] Permission denied: '/home/user/app/logs/app_logging_2025-09-18.log'
139
  warnings.warn("[Errno 13] Permission denied, possibly insufficient permission or Persistent Storage not enable: attempting chmod 0o2644")
140
  #writable_dir = Path(tempfile.gettempdir()) #
141
+ writable_dir.mkdir(mode=0o2755, parents=True, exist_ok=True)
142
+ writable_dir.chmod(0o2755)
143
  if not writable_dir.is_dir():
144
  warnings.warn(f"Working without log files in directory: {writable_dir}")
145
 
 
147
  # `mkdir()` with `exist_ok=True` prevents a FileExistsError if the folder exists.
148
  logs_dir = writable_dir / dir_path #project_root / dir_path
149
  if not logs_dir.is_dir():
150
+ logs_dir.mkdir(mode=0o2755, parents=True, exist_ok=True)
151
 
152
  # 4. Create log file with a timestamp inside the new logs directory.
153
  # This ensures a unique log file is created for the day the script runs.
 
195
  # `exist_ok=True` prevents an error if the directory already exists.
196
  dir_path = project_root / dir_path
197
  if not dir_path.is_dir():
198
+ dir_path.mkdir(parents=True, exist_ok=True, mode=0o2755) #, mode=0o2644)
199
  #dir_path.chmod(0)
200
 
201
  file_path = dir_path / filename # Concatenate directory and filename to get full path
 
343
  #md_path = Path("data\\pdf") / "output_md" / f"{src.stem}" / md_name ##debug
344
  md_path = Path("data") / output_dir / f"{src.stem}" / md_name ##debug
345
  ##SMY: [resolved] Permission Errno13 - https://stackoverflow.com/a/57454275
346
+ md_path.parent.mkdir(mode=0o2755, parents=True, exist_ok=True) ##SMY: create nested md_path if not exists
347
  md_path.parent.chmod(0)
348
 
349
  try:
llm/hf_client.py CHANGED
@@ -99,8 +99,11 @@ class HFChatClient:
99
  logger.warning("hf_login_failed", extra={"error": str(exc)})
100
  # Silent fallback; client will still work if token is passed directly
101
  #pass
102
- '''
103
- login_huggingface(self.token) if not is_login_huggingface() else logger.log(level=20, msg=f"logged in to HF Hub already") ## attempt login if not already logged in. NB: HF CLI login prompt would not display in Process Worker.
 
 
 
104
 
105
  @staticmethod
106
  def _normalise_history(history: list, system_message: str, latest_user_message: str) -> list[dict]:
 
99
  logger.warning("hf_login_failed", extra={"error": str(exc)})
100
  # Silent fallback; client will still work if token is passed directly
101
  #pass
102
+ '''
103
+
104
+ login_huggingface(self.token) if not is_login_huggingface() else logger.log(level=20, msg=f"You are logged in to HF Hub already") ## attempt login if not already logged in. NB: HF CLI login prompt would not display in Process Worker.
105
+ ##SMY: TODO: Mapped with openai_client.py
106
+ #self.islogged_in = is_login_huggingface()
107
 
108
  @staticmethod
109
  def _normalise_history(history: list, system_message: str, latest_user_message: str) -> list[dict]:
llm/llm_login.py CHANGED
@@ -1,4 +1,4 @@
1
- from huggingface_hub import login, logout
2
  import os
3
  import traceback
4
  from time import sleep
@@ -30,12 +30,17 @@ def login_huggingface(token: Optional[str] = None):
30
  token = token
31
  # Privacy-first login: try interactive CLI first; fallback to provided/env token only if needed
32
  try:
33
- login()
34
- sleep(5) ##SMY pause for login. Helpful: pool async opex
35
- logger.info("βœ”οΈ hf_login already", extra={"mode": "cli"})
 
 
 
 
 
36
  except Exception as exc:
37
  # Respect common env var names; prefer explicit token arg when provided
38
- fallback_token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
39
  if fallback_token:
40
  try:
41
  login(token=fallback_token)
 
1
+ from huggingface_hub import HfApi, login, logout, get_token
2
  import os
3
  import traceback
4
  from time import sleep
 
30
  token = token
31
  # Privacy-first login: try interactive CLI first; fallback to provided/env token only if needed
32
  try:
33
+ if HfApi.whoami():
34
+ logger.info("βœ”οΈ hf_login already", extra={"mode": "cli"})
35
+ #return True
36
+ else:
37
+ login()
38
+ sleep(5) ##SMY pause for login. Helpful: pool async opex
39
+ logger.info("βœ”οΈ hf_login already", extra={"mode": "cli"})
40
+ #return True
41
  except Exception as exc:
42
  # Respect common env var names; prefer explicit token arg when provided
43
+ fallback_token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") or get_token()
44
  if fallback_token:
45
  try:
46
  login(token=fallback_token)
llm/openai_client.py CHANGED
@@ -45,11 +45,12 @@ class OpenAIChatClient:
45
  #self.token = api_token if api_token else None ##debug
46
  self.token = openai_api_key_env if openai_api_key_env else api_token #dotenv.get_key(".env", "OPENAI_API_KEY")
47
  #self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") ## not preferred
48
- login_huggingface(self.token) if not is_login_huggingface() else logger.log(level=20, msg=f"logged in to HF Hub already") ## attempt login if not already logged in. NB: HF CLI login prompt would not display in Process Worker.
49
  #self.fake_token = api_token or "a1b2c3" #or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
50
  self.openai_api_key = self.token #self.fake_token
51
  self.temperature = temperature
52
  self.top_p = top_p
 
53
 
54
  logger.log(level=2, msg="initialised OpenAIChatClient:", extra={"base_url": self.base_url, "openai_api_key": self.openai_api_key})
55
 
 
45
  #self.token = api_token if api_token else None ##debug
46
  self.token = openai_api_key_env if openai_api_key_env else api_token #dotenv.get_key(".env", "OPENAI_API_KEY")
47
  #self.token = token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") ## not preferred
48
+ login_huggingface(self.token) if not is_login_huggingface() else logger.log(level=20, msg=f"You are logged in to HF Hub already") ## attempt login if not already logged in. NB: HF CLI login prompt would not display in Process Worker.
49
  #self.fake_token = api_token or "a1b2c3" #or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
50
  self.openai_api_key = self.token #self.fake_token
51
  self.temperature = temperature
52
  self.top_p = top_p
53
+ self.islogged_in = is_login_huggingface()
54
 
55
  logger.log(level=2, msg="initialised OpenAIChatClient:", extra={"base_url": self.base_url, "openai_api_key": self.openai_api_key})
56
 
ui/gradio_ui.py CHANGED
@@ -248,39 +248,7 @@ def convert_pdfs_to_md(file: gr.File | None, folder: str | None) -> dict:
248
  # Gradio expects a dict of {filename: content}
249
  return results
250
 
251
- ## SMY: to refactor and moved to logic file. Currently unused
252
- def convert_htmls_to_md(file: gr.File | None, folder: str | None) -> dict:
253
- """
254
- Gradio callback for HTML β†’ Markdown.
255
- Accepts either a single file or a folder path (recursively).
256
- Returns a dictionary of filename β†’ Markdown string.
257
- """
258
- if not file and not folder:
259
- return {"error": "Please provide a HTML file or a folder."}
260
-
261
- html_paths = []
262
-
263
- # Single file
264
- if file:
265
- html_path = Path(file.name)
266
- html_paths.append(html_path)
267
-
268
- # Folder (recursively)
269
- if folder:
270
- try:
271
- html_paths.extend(collect_html_paths(folder))
272
- except Exception as exc:
273
- logger.exception("Folder traversal failed.")
274
- return {"error": str(exc)}
275
-
276
- if not html_paths:
277
- return {"error": "No HTML files found."}
278
-
279
- results = html2md_converter.batch_convert(html_paths)
280
- # Gradio expects a dict of {filename: content}
281
- return results
282
-
283
- ## SMY: to refactor and moved to logic file
284
  def convert_md_to_pdf(file: gr.File | None, folder: str | None) -> list[gr.File]:
285
  """
286
  Gradio callback for Markdown β†’ PDF.
@@ -314,6 +282,42 @@ def convert_md_to_pdf(file: gr.File | None, folder: str | None) -> list[gr.File]
314
  # Convert to Gradio File objects
315
  gr_files = [gr.File(path=str(p)) for p in pdf_files]
316
  return gr_files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  ##====================
318
 
319
  def build_interface() -> gr.Blocks:
@@ -530,19 +534,25 @@ def build_interface() -> gr.Blocks:
530
  max_retries_sl.change(update_state_stored_value, inputs=max_retries_sl, outputs=state_max_retries)
531
 
532
 
533
- with gr.Accordion("πŸ€— HuggingFace Logout", open=False):
534
  # Logout controls
535
  def do_logout():
536
  #ok = docextractor.client.logout()
537
  ok = docconverter.client.logout()
538
  # Reset token textbox on successful logout
539
- msg = "βœ… Logged out of Hugging Face and cleared tokens." if ok else "⚠️ Logout failed."
540
- return gr.update(value=""), gr.update(visible=True, value=msg)
 
 
 
541
 
542
  logout_status = gr.Markdown(visible=False)
543
- logout_btn = gr.Button("Logout from Hugging Face", variant="stop")
 
 
544
 
545
- logout_btn.click(fn=do_logout, inputs=None, outputs=[api_token_tb, logout_status])
 
546
 
547
 
548
  # The gr.State component to hold the accumulated list of files
 
248
  # Gradio expects a dict of {filename: content}
249
  return results
250
 
251
+ ## SMY: to be implemented AND to refactor and moved to logic file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  def convert_md_to_pdf(file: gr.File | None, folder: str | None) -> list[gr.File]:
253
  """
254
  Gradio callback for Markdown β†’ PDF.
 
282
  # Convert to Gradio File objects
283
  gr_files = [gr.File(path=str(p)) for p in pdf_files]
284
  return gr_files
285
+
286
+
287
+ ## SMY: to refactor and moved to logic file. Currently unused
288
+ '''
289
+ def convert_htmls_to_md(file: gr.File | None, folder: str | None) -> dict:
290
+ """
291
+ Gradio callback for HTML β†’ Markdown.
292
+ Accepts either a single file or a folder path (recursively).
293
+ Returns a dictionary of filename β†’ Markdown string.
294
+ """
295
+ if not file and not folder:
296
+ return {"error": "Please provide a HTML file or a folder."}
297
+
298
+ html_paths = []
299
+
300
+ # Single file
301
+ if file:
302
+ html_path = Path(file.name)
303
+ html_paths.append(html_path)
304
+
305
+ # Folder (recursively)
306
+ if folder:
307
+ try:
308
+ html_paths.extend(collect_html_paths(folder))
309
+ except Exception as exc:
310
+ logger.exception("Folder traversal failed.")
311
+ return {"error": str(exc)}
312
+
313
+ if not html_paths:
314
+ return {"error": "No HTML files found."}
315
+
316
+ results = html2md_converter.batch_convert(html_paths)
317
+ # Gradio expects a dict of {filename: content}
318
+ return results
319
+ '''
320
+
321
  ##====================
322
 
323
  def build_interface() -> gr.Blocks:
 
534
  max_retries_sl.change(update_state_stored_value, inputs=max_retries_sl, outputs=state_max_retries)
535
 
536
 
537
+ with gr.Accordion("πŸ€— HuggingFace Client Logout", open=True): #, open=False):
538
  # Logout controls
539
  def do_logout():
540
  #ok = docextractor.client.logout()
541
  ok = docconverter.client.logout()
542
  # Reset token textbox on successful logout
543
+ msg = "βœ… Logged out of HuggingFace and cleared tokens. Remember to log out of HuggingFace completely." if ok else "⚠️ Logout failed."
544
+ return gr.update(value=""), gr.update(visible=True, value=msg), gr.update(value="Sign in to HuggingFace πŸ€—")
545
+
546
+ def custom_do_logout():
547
+ return gr.update(value="Sign in to HuggingFace πŸ€—")
548
 
549
  logout_status = gr.Markdown(visible=False)
550
+ with gr.Row:
551
+ hf_login_logout_btn = gr.LoginButton(value="Sign in to HuggingFace πŸ€—", logout_value="Logout of HF: ({})", variant="huggingface")
552
+ logout_btn = gr.Button("Logout from session and Hugging Face (inference) Client", variant="stop", )
553
 
554
+ hf_login_logout_btn.click(fn=custom_do_logout, inputs=None, outputs=hf_login_logout_btn)
555
+ logout_btn.click(fn=do_logout, inputs=None, outputs=[api_token_tb, logout_status, hf_login_logout_btn])
556
 
557
 
558
  # The gr.State component to hold the accumulated list of files