semmyk commited on
Commit
75fe90d
·
1 Parent(s): 0237b88

baseline08_beta0.3.8_03Oct25: fixing slow Marker: moved "space to extract()

Browse files

@spaces.GPU AssertionError: daemonic processes are not allowed to have children
raise error("ZeroGPU worker error", res.error_cls)

Files changed (2) hide show
  1. converters/pdf_to_md.py +6 -0
  2. ui/gradio_ui.py +2 -2
converters/pdf_to_md.py CHANGED
@@ -5,6 +5,8 @@ from typing import List, Dict, Union, Optional
5
  import traceback ## Extract, format and print information about Python stack traces.
6
  import time
7
 
 
 
8
  from converters.extraction_converter import DocumentConverter #, DocumentExtractor #as docextractor #ExtractionConverter #get_extraction_converter ## SMY: should disuse
9
  from file_handler.file_utils import write_markdown, dump_images, collect_pdf_paths, collect_html_paths, collect_markdown_paths, create_outputdir
10
 
@@ -108,6 +110,10 @@ class PdfToMarkdownConverter:
108
 
109
  # This global will be set (re-initialised) in each worker after init_worker runs
110
 
 
 
 
 
111
  ## moved from extraction_converter ( to standalone extract_to_md)
112
  #def extract(self, src_path: str, output_dir: str) -> Dict[str, int, Union[str, Path]]:
113
  def extract(self, src_path: str, output_dir: str): #Dict:
 
5
  import traceback ## Extract, format and print information about Python stack traces.
6
  import time
7
 
8
+ import spaces
9
+
10
  from converters.extraction_converter import DocumentConverter #, DocumentExtractor #as docextractor #ExtractionConverter #get_extraction_converter ## SMY: should disuse
11
  from file_handler.file_utils import write_markdown, dump_images, collect_pdf_paths, collect_html_paths, collect_markdown_paths, create_outputdir
12
 
 
110
 
111
  # This global will be set (re-initialised) in each worker after init_worker runs
112
 
113
+
114
+ #duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
115
+ duration = 10
116
+ @spaces.GPU(duration=duration) ## HF Spaces GPU support
117
  ## moved from extraction_converter ( to standalone extract_to_md)
118
  #def extract(self, src_path: str, output_dir: str) -> Dict[str, int, Union[str, Path]]:
119
  def extract(self, src_path: str, output_dir: str): #Dict:
ui/gradio_ui.py CHANGED
@@ -261,8 +261,8 @@ def convert_batch(
261
  #progress((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
262
  #progress2((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
263
  #time.sleep(0.25)'''
264
- duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
265
- @spaces.GPU(duration=duration) ## HF Spaces GPU support
266
  def get_results_pool_map(pdf_files, pdf_files_count, progress2=gr.Progress()):
267
  #Use progress.tqdm to integrate with the executor map
268
  #results = pool.map(pdf2md_converter.convert_files, pdf_files) ##SMY iterables #max_retries #output_dir_string)
 
261
  #progress((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
262
  #progress2((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
263
  #time.sleep(0.25)'''
264
+ #duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
265
+ #@spaces.GPU(duration=duration) ## HF Spaces GPU support
266
  def get_results_pool_map(pdf_files, pdf_files_count, progress2=gr.Progress()):
267
  #Use progress.tqdm to integrate with the executor map
268
  #results = pool.map(pdf2md_converter.convert_files, pdf_files) ##SMY iterables #max_retries #output_dir_string)