File size: 14,722 Bytes
3edb646 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "d13d3631",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using CPU. Note: This module is much faster with a GPU.\n"
]
},
{
"ename": "ValueError",
"evalue": "Please set API_KEY",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[5], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgradio\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgr\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ChemEagle \u001b[38;5;66;03m# 假设内部已经管理 API Key\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Chem\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mChem\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m rdChemReactions, Draw, AllChem\n",
"File \u001b[0;32m/media/chenyufan/F/ChemEagle-hf/main.py:10\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mPIL\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Image\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mget_molecular_agent\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m process_reaction_image_with_multiple_products_and_text_correctR\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mget_reaction_agent\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_reaction_withatoms_correctR\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mget_R_group_sub_agent\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m process_reaction_image_with_table_R_group, process_reaction_image_with_product_variant_R_group,get_full_reaction,get_multi_molecular_full\n",
"File \u001b[0;32m/media/chenyufan/F/ChemEagle-hf/get_molecular_agent.py:35\u001b[0m\n\u001b[1;32m 33\u001b[0m API_KEY \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAPI_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m API_KEY:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease set API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 36\u001b[0m AZURE_ENDPOINT \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAZURE_ENDPOINT\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_multi_molecular\u001b[39m(image_path: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mlist\u001b[39m:\n",
"\u001b[0;31mValueError\u001b[0m: Please set API_KEY"
]
}
],
"source": [
"import gradio as gr\n",
"import json\n",
"from main import ChemEagle # 假设内部已经管理 API Key\n",
"from rdkit import Chem\n",
"from rdkit.Chem import rdChemReactions, Draw, AllChem\n",
"from rdkit.Chem.Draw import rdMolDraw2D\n",
"import cairosvg\n",
"import re\n",
"\n",
"example_diagram = \"examples/exp.png\"\n",
"rdkit_image = \"examples/rdkit.png\"\n",
"\n",
"# 解析 ChemEagle 返回的结构化数据\n",
"def parse_reactions(output_json):\n",
" if isinstance(output_json, str):\n",
" reactions_data = json.loads(output_json)\n",
" else:\n",
" reactions_data = output_json\n",
" reactions_list = reactions_data.get(\"reactions\", [])\n",
" detailed_output = []\n",
" smiles_output = []\n",
"\n",
" for reaction in reactions_list:\n",
" reaction_id = reaction.get(\"reaction_id\", \"Unknown ID\")\n",
" reactants = [r.get(\"smiles\", \"Unknown\") for r in reaction.get(\"reactants\", [])]\n",
" conditions = [\n",
" f\"<span style='color:red'>{c.get('smiles', c.get('text', 'Unknown'))}[{c.get('role', 'Unknown')}]</span>\"\n",
" for c in reaction.get(\"condition\", [])\n",
" ]\n",
" conditions_1 = [\n",
" f\"<span style='color:black'>{c.get('smiles', c.get('text', 'Unknown'))}[{c.get('role', 'Unknown')}]</span>\"\n",
" for c in reaction.get(\"condition\", [])\n",
" ]\n",
" products = [f\"<span style='color:orange'>{p.get('smiles', 'Unknown')}</span>\" for p in reaction.get(\"products\", [])]\n",
" products_1 = [f\"<span style='color:black'>{p.get('smiles', 'Unknown')}</span>\" for p in reaction.get(\"products\", [])]\n",
" products_2 = [r.get(\"smiles\", \"Unknown\") for r in reaction.get(\"products\", [])]\n",
" additional = reaction.get(\"additional_info\", [])\n",
" additional_str = [str(x) for x in additional if x]\n",
"\n",
" tail = conditions_1 + additional_str\n",
" tail_str = \", \".join(tail)\n",
" full_reaction = f\"{'.'.join(reactants)}>>{'.'.join(products_1)} | {tail_str}\"\n",
" full_reaction = f\"<span style='color:black'>{full_reaction}</span>\"\n",
"\n",
" reaction_output = f\"<b>Reaction: </b> {reaction_id}<br>\"\n",
" reaction_output += f\" Reactants: <span style='color:blue'>{', '.join(reactants)}</span><br>\"\n",
" reaction_output += f\" Conditions: {', '.join(conditions)}<br>\"\n",
" reaction_output += f\" Products: {', '.join(products)}<br>\"\n",
" reaction_output += f\" additional_info: {', '.join(additional_str)}<br>\"\n",
" reaction_output += f\" <b>Full Reaction:</b> {full_reaction}<br><br>\"\n",
" detailed_output.append(reaction_output)\n",
"\n",
" reaction_smiles = f\"{'.'.join(reactants)}>>{'.'.join(products_2)}\"\n",
" smiles_output.append(reaction_smiles)\n",
"\n",
" return detailed_output, smiles_output\n",
"\n",
"def process_chem_image(image):\n",
" image_path = \"temp_image.png\"\n",
" image.save(image_path)\n",
"\n",
" chemeagle_result = ChemEagle(image_path)\n",
" detailed, smiles = parse_reactions(chemeagle_result)\n",
"\n",
" json_path = \"output.json\"\n",
" with open(json_path, 'w') as jf:\n",
" json.dump(chemeagle_result, jf, indent=2)\n",
"\n",
" return \"\\n\\n\".join(detailed), smiles, example_diagram, json_path\n",
"\n",
"with gr.Blocks() as demo:\n",
" gr.Markdown(\n",
" \"\"\"\n",
" <center><h1>ChemEagle: A Multi-Agent System for Multimodal Chemical Information Extraction</h1></center>\n",
" Upload a multimodal reaction image to extract multimodal chemical information.\n",
" \"\"\"\n",
" )\n",
"\n",
" with gr.Row():\n",
" with gr.Column(scale=1):\n",
" image_input = gr.Image(type=\"pil\", label=\"Upload a multimodal reaction image\")\n",
" with gr.Row():\n",
" clear_btn = gr.Button(\"Clear\")\n",
" run_btn = gr.Button(\"Run\", elem_id=\"submit-btn\")\n",
"\n",
" with gr.Column(scale=1):\n",
" gr.Markdown(\"### Parsed Reactions\")\n",
" reaction_output = gr.HTML(label=\"Detailed Reaction Output\")\n",
" gr.Markdown(\"### Schematic Diagram\")\n",
" schematic_diagram = gr.Image(value=example_diagram, label=\"示意图\")\n",
"\n",
" with gr.Column(scale=1):\n",
" gr.Markdown(\"### Machine-readable Output\")\n",
" smiles_output = gr.Textbox(\n",
" label=\"Reaction SMILES\",\n",
" show_copy_button=True,\n",
" interactive=False,\n",
" visible=False\n",
" )\n",
"\n",
" @gr.render(inputs=smiles_output)\n",
" def show_split(inputs):\n",
" if not inputs or (isinstance(inputs, str) and inputs.strip() == \"\"):\n",
" return gr.Textbox(label=\"SMILES of Reaction i\"), gr.Image(value=rdkit_image, label=\"RDKit Image of Reaction i\", height=100)\n",
" smiles_list = inputs.split(\",\")\n",
" smiles_list = [re.sub(r\"^\\s*\\[?'?|']?\\s*$\", \"\", item) for item in smiles_list]\n",
" components = []\n",
" for i, smiles in enumerate(smiles_list):\n",
" smiles_clean = smiles.replace('\"', '').replace(\"'\", \"\").replace(\"[\", \"\").replace(\"]\", \"\")\n",
" # 始终加入 SMILES 文本框\n",
" components.append(gr.Textbox(value=smiles_clean, label=f\"SMILES of Reaction {i+1}\", show_copy_button=True, interactive=False))\n",
" try:\n",
" rxn = rdChemReactions.ReactionFromSmarts(smiles_clean, useSmiles=True)\n",
" if not rxn:\n",
" continue\n",
" new_rxn = AllChem.ChemicalReaction()\n",
" for mol in rxn.GetReactants():\n",
" mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol))\n",
" new_rxn.AddReactantTemplate(mol)\n",
" for mol in rxn.GetProducts():\n",
" mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol))\n",
" new_rxn.AddProductTemplate(mol)\n",
" cleaned_rxn = new_rxn\n",
"\n",
" # 移除原子映射\n",
" for react in cleaned_rxn.GetReactants():\n",
" for atom in react.GetAtoms(): atom.SetAtomMapNum(0)\n",
" for prod in cleaned_rxn.GetProducts():\n",
" for atom in prod.GetAtoms(): atom.SetAtomMapNum(0)\n",
"\n",
" # 计算键长参考\n",
" ref_rxn = cleaned_rxn\n",
" react0 = ref_rxn.GetReactantTemplate(0)\n",
" react1 = ref_rxn.GetReactantTemplate(1) if ref_rxn.GetNumReactantTemplates() > 1 else None\n",
" if react0.GetNumBonds() > 0:\n",
" bond_len = Draw.MeanBondLength(react0)\n",
" elif react1 and react1.GetNumBonds() > 0:\n",
" bond_len = Draw.MeanBondLength(react1)\n",
" else:\n",
" bond_len = 1.0\n",
"\n",
" # 绘图\n",
" drawer = rdMolDraw2D.MolDraw2DSVG(-1, -1)\n",
" dopts = drawer.drawOptions()\n",
" dopts.padding = 0.1\n",
" dopts.includeRadicals = True\n",
" Draw.SetACS1996Mode(dopts, bond_len * 0.55)\n",
" dopts.bondLineWidth = 1.5\n",
" drawer.DrawReaction(cleaned_rxn)\n",
" drawer.FinishDrawing()\n",
" svg = drawer.GetDrawingText()\n",
" svg_file = f\"reaction_{i+1}.svg\"\n",
" with open(svg_file, \"w\") as f: f.write(svg)\n",
" png_file = f\"reaction_{i+1}.png\"\n",
" cairosvg.svg2png(url=svg_file, write_to=png_file)\n",
" components.append(gr.Image(value=png_file, label=f\"RDKit Image of Reaction {i+1}\"))\n",
" except Exception as e:\n",
" print(f\"Failed to draw reaction {i+1} for SMILES '{smiles_clean}': {e}\")\n",
" # 绘图失败则跳过\n",
" return components\n",
"\n",
" download_json = gr.File(label=\"Download JSON File\")\n",
"\n",
" gr.Examples(\n",
" examples=[\n",
" [\"examples/reaction1.jpg\"],\n",
" [\"examples/reaction2.png\"],\n",
" [\"examples/reaction3.png\"],\n",
" [\"examples/reaction4.png\"],\n",
" ],\n",
" inputs=[image_input],\n",
" outputs=[reaction_output, smiles_output, schematic_diagram, download_json],\n",
" cache_examples=False,\n",
" examples_per_page=4,\n",
" )\n",
"\n",
" clear_btn.click(\n",
" lambda: (None, None, None, None),\n",
" inputs=[],\n",
" outputs=[image_input, reaction_output, smiles_output, download_json]\n",
" )\n",
" run_btn.click(\n",
" process_chem_image,\n",
" inputs=[image_input],\n",
" outputs=[reaction_output, smiles_output, schematic_diagram, download_json]\n",
" )\n",
"\n",
" demo.css = \"\"\"\n",
" #submit-btn {\n",
" background-color: #FF914D;\n",
" color: white;\n",
" font-weight: bold;\n",
" }\n",
" \"\"\"\n",
"\n",
" demo.launch()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "openchemie",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|