Spaces:
Sleeping
Sleeping
baseline08_beta0.3.8_03Oct25: fixing slow Marker: moved "space to extract()
Browse files@spaces.GPU AssertionError: daemonic processes are not allowed to have children
raise error("ZeroGPU worker error", res.error_cls)
- converters/pdf_to_md.py +6 -0
- ui/gradio_ui.py +2 -2
converters/pdf_to_md.py
CHANGED
|
@@ -5,6 +5,8 @@ from typing import List, Dict, Union, Optional
|
|
| 5 |
import traceback ## Extract, format and print information about Python stack traces.
|
| 6 |
import time
|
| 7 |
|
|
|
|
|
|
|
| 8 |
from converters.extraction_converter import DocumentConverter #, DocumentExtractor #as docextractor #ExtractionConverter #get_extraction_converter ## SMY: should disuse
|
| 9 |
from file_handler.file_utils import write_markdown, dump_images, collect_pdf_paths, collect_html_paths, collect_markdown_paths, create_outputdir
|
| 10 |
|
|
@@ -108,6 +110,10 @@ class PdfToMarkdownConverter:
|
|
| 108 |
|
| 109 |
# This global will be set (re-initialised) in each worker after init_worker runs
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
## moved from extraction_converter ( to standalone extract_to_md)
|
| 112 |
#def extract(self, src_path: str, output_dir: str) -> Dict[str, int, Union[str, Path]]:
|
| 113 |
def extract(self, src_path: str, output_dir: str): #Dict:
|
|
|
|
| 5 |
import traceback ## Extract, format and print information about Python stack traces.
|
| 6 |
import time
|
| 7 |
|
| 8 |
+
import spaces
|
| 9 |
+
|
| 10 |
from converters.extraction_converter import DocumentConverter #, DocumentExtractor #as docextractor #ExtractionConverter #get_extraction_converter ## SMY: should disuse
|
| 11 |
from file_handler.file_utils import write_markdown, dump_images, collect_pdf_paths, collect_html_paths, collect_markdown_paths, create_outputdir
|
| 12 |
|
|
|
|
| 110 |
|
| 111 |
# This global will be set (re-initialised) in each worker after init_worker runs
|
| 112 |
|
| 113 |
+
|
| 114 |
+
#duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
|
| 115 |
+
duration = 10
|
| 116 |
+
@spaces.GPU(duration=duration) ## HF Spaces GPU support
|
| 117 |
## moved from extraction_converter ( to standalone extract_to_md)
|
| 118 |
#def extract(self, src_path: str, output_dir: str) -> Dict[str, int, Union[str, Path]]:
|
| 119 |
def extract(self, src_path: str, output_dir: str): #Dict:
|
ui/gradio_ui.py
CHANGED
|
@@ -261,8 +261,8 @@ def convert_batch(
|
|
| 261 |
#progress((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
|
| 262 |
#progress2((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
|
| 263 |
#time.sleep(0.25)'''
|
| 264 |
-
duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
|
| 265 |
-
|
| 266 |
def get_results_pool_map(pdf_files, pdf_files_count, progress2=gr.Progress()):
|
| 267 |
#Use progress.tqdm to integrate with the executor map
|
| 268 |
#results = pool.map(pdf2md_converter.convert_files, pdf_files) ##SMY iterables #max_retries #output_dir_string)
|
|
|
|
| 261 |
#progress((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
|
| 262 |
#progress2((10,16), desc=f"ProcessPoolExecutor: Pooling file conversion result: [{str(result_interim)}[:20]]")
|
| 263 |
#time.sleep(0.25)'''
|
| 264 |
+
#duration = 5.75 * pdf_files_count if pdf_files_count>=2 else 7
|
| 265 |
+
#@spaces.GPU(duration=duration) ## HF Spaces GPU support
|
| 266 |
def get_results_pool_map(pdf_files, pdf_files_count, progress2=gr.Progress()):
|
| 267 |
#Use progress.tqdm to integrate with the executor map
|
| 268 |
#results = pool.map(pdf2md_converter.convert_files, pdf_files) ##SMY iterables #max_retries #output_dir_string)
|