Spaces:

semmyk
/

parserPDF

Sleeping

semmyk commited on Oct 3

Commit

75fe90d

1 Parent(s): 0237b88

baseline08_beta0.3.8_03Oct25: fixing slow Marker: moved "space to extract()

@spaces.GPU AssertionError: daemonic processes are not allowed to have children
raise error("ZeroGPU worker error", res.error_cls)

Files changed (2) hide show

converters/pdf_to_md.py CHANGED Viewed

@@ -5,6 +5,8 @@ from typing import List, Dict, Union, Optional
 import traceback  ## Extract, format and print information about Python stack traces.
 import time
 from converters.extraction_converter import DocumentConverter  #, DocumentExtractor #as docextractor #ExtractionConverter  #get_extraction_converter  ## SMY: should disuse
 from file_handler.file_utils import write_markdown, dump_images, collect_pdf_paths, collect_html_paths, collect_markdown_paths, create_outputdir
@@ -108,6 +110,10 @@ class PdfToMarkdownConverter:
     # This global will be set (re-initialised) in each worker after init_worker runs
     ## moved from extraction_converter ( to standalone extract_to_md)
     #def extract(self, src_path: str, output_dir: str) -> Dict[str, int, Union[str, Path]]:
     def extract(self, src_path: str, output_dir: str):   #Dict:

 import traceback  ## Extract, format and print information about Python stack traces.
 import time
+import spaces
 from converters.extraction_converter import DocumentConverter  #, DocumentExtractor #as docextractor #ExtractionConverter  #get_extraction_converter  ## SMY: should disuse
 from file_handler.file_utils import write_markdown, dump_images, collect_pdf_paths, collect_html_paths, collect_markdown_paths, create_outputdir
     # This global will be set (re-initialised) in each worker after init_worker runs
+    #duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
+    duration = 10
+    @spaces.GPU(duration=duration)   ## HF Spaces GPU support
     ## moved from extraction_converter ( to standalone extract_to_md)
     #def extract(self, src_path: str, output_dir: str) -> Dict[str, int, Union[str, Path]]:
     def extract(self, src_path: str, output_dir: str):   #Dict:

ui/gradio_ui.py CHANGED Viewed

@@ -261,8 +261,8 @@ def convert_batch(
                     #progress((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
                     #progress2((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
                     #time.sleep(0.25)'''
-                duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
-                @spaces.GPU(duration=duration)   ## HF Spaces GPU support
                 def get_results_pool_map(pdf_files, pdf_files_count, progress2=gr.Progress()):
                     #Use progress.tqdm to integrate with the executor map
                     #results = pool.map(pdf2md_converter.convert_files, pdf_files)  ##SMY iterables  #max_retries #output_dir_string)

                     #progress((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
                     #progress2((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
                     #time.sleep(0.25)'''
+                #duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
+                #@spaces.GPU(duration=duration)   ## HF Spaces GPU support
                 def get_results_pool_map(pdf_files, pdf_files_count, progress2=gr.Progress()):
                     #Use progress.tqdm to integrate with the executor map
                     #results = pool.map(pdf2md_converter.convert_files, pdf_files)  ##SMY iterables  #max_retries #output_dir_string)