MinerU / app.py
myhloli's picture
Update app.py
ba4797d verified
# Copyright (c) Opendatalab. All rights reserved.
import os
import json
from loguru import logger
if __name__ == '__main__':
os.system('pip uninstall -y mineru')
os.system('pip install git+https://github.com/myhloli/Magic-PDF.git@dev')
os.system('pip install -U mineru_vl_utils')
os.system('mineru-models-download -s huggingface -m all')
# os.system('mineru-models-download -s modelscope -m all')
os.environ['MINERU_MODEL_SOURCE'] = "local"
os.environ['GRADIO_SSR_MODE'] = "false"
os.environ['MINERU_PDF_RENDER_TIMEOUT'] = "10"
try:
home_dir = os.path.expanduser('~')
config_file = os.path.join(home_dir, 'mineru.json')
with open(config_file, 'r+') as file:
config = json.load(file)
delimiters = {
'display': {'left': '\\[', 'right': '\\]'},
'inline': {'left': '\\(', 'right': '\\)'}
}
config['latex-delimiter-config'] = delimiters
if os.getenv('apikey'):
config['llm-aided-config']['title_aided']['api_key'] = os.getenv('apikey')
config['llm-aided-config']['title_aided']['enable'] = True
config['llm-aided-config']['title_aided']['model'] = "qwen3-next-80b-a3b-instruct"
file.seek(0) # 将文件指针移回文件开始位置
file.truncate() # 截断文件,清除原有内容
json.dump(config, file, indent=4) # 写入新内容
except Exception as e:
logger.exception(e)
os.system('mineru-gradio --enable-vllm-engine true --enable-api false --max-convert-pages 20 --latex-delimiters-type b --gpu-memory-utilization 0.5')