File size: 11,920 Bytes
3edb646 1d58fcb 3edb646 0b7d2b5 3edb646 1d58fcb 3edb646 fce40b2 3edb646 fce40b2 3edb646 fce40b2 3edb646 8d1c68f a1b2be0 8d1c68f a1b2be0 8d1c68f 3edb646 516fa01 8d1c68f 516fa01 3edb646 6c3e75e 8d1c68f 6c3e75e 3edb646 e5bd3d6 8d1a40d 3edb646 6c3e75e 3edb646 6c3e75e 0b7d2b5 6c3e75e 3edb646 8d1c68f 3edb646 2f29c70 3edb646 8d1c68f 3edb646 8d1c68f 3edb646 8d1c68f a1b2be0 8d1c68f 3edb646 1d58fcb 3edb646 6c3e75e 3edb646 6c3e75e 3edb646 6c3e75e 3edb646 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 |
import gradio as gr
import json
from main import ChemEagle
from rdkit import Chem
from rdkit.Chem import rdChemReactions, Draw, AllChem
from rdkit.Chem.Draw import rdMolDraw2D
import cairosvg
import re
import os
example_diagram = "examples/exp.png"
rdkit_image = "examples/rdkit.png"
def parse_reactions(output_json):
if isinstance(output_json, str):
reactions_data = json.loads(output_json)
else:
reactions_data = output_json
reactions_list = reactions_data.get("reactions", [])
detailed_output = []
smiles_output = []
for reaction in reactions_list:
reaction_id = reaction.get("reaction_id", "Unknown ID")
reactants = [r.get("smiles", "Unknown") for r in reaction.get("reactants", [])]
conds = reaction.get("conditions")
if conds is None:
conds = reaction.get("condition", [])
conditions = [
f"<span style='color:red'>{c.get('smiles', c.get('text', 'Unknown'))}[{c.get('role', 'Unknown')}]</span>"
for c in conds
]
conditions_1 = [
f"<span style='color:black'>{c.get('smiles', c.get('text', 'Unknown'))}[{c.get('role', 'Unknown')}]</span>"
for c in conds
]
products = [f"<span style='color:orange'>{p.get('smiles', 'Unknown')}</span>" for p in reaction.get("products", [])]
products_1 = [f"<span style='color:black'>{p.get('smiles', 'Unknown')}</span>" for p in reaction.get("products", [])]
products_2 = [r.get("smiles", "Unknown") for r in reaction.get("products", [])]
additional = reaction.get("additional_info", [])
additional_str = [str(x) for x in additional if x]
tail = conditions_1 + additional_str
tail_str = ", ".join(tail)
full_reaction = f"{'.'.join(reactants)}>>{'.'.join(products_1)} | {tail_str}"
full_reaction = f"<span style='color:black'>{full_reaction}</span>"
reaction_output = f"<b>Reaction: </b> {reaction_id}<br>"
reaction_output += f" Reactants: <span style='color:blue'>{', '.join(reactants)}</span><br>"
reaction_output += f" Conditions: {', '.join(conditions)}<br>"
reaction_output += f" Products: {', '.join(products)}<br>"
reaction_output += f" additional_info: {', '.join(additional_str)}<br>"
reaction_output += f" <b>Full Reaction:</b> {full_reaction}<br><br>"
detailed_output.append(reaction_output)
reaction_smiles = f"{'.'.join(reactants)}>>{'.'.join(products_2)}"
smiles_output.append(reaction_smiles)
return detailed_output, smiles_output
def parse_mol(output_json):
"""
解析单分子/多分子的 ChemEagle 输出,返回与 parse_reactions 相同的 detailed_output, smiles_output。
"""
if isinstance(output_json, str):
mols_data = json.loads(output_json)
else:
mols_data = output_json
molecules_list = mols_data.get("molecules", [])
detailed_output = []
smiles_output = []
for i, mol in enumerate(molecules_list):
smiles = mol.get("smiles", "Unknown")
label = mol.get("label", f"Mol {i+1}")
bbox = mol.get("bbox", [])
# 可自定义格式
mol_output = f"<b>Molecule:</b> {label}<br>" \
f" SMILES: <span style='color:blue'>{smiles}</span><br>" \
f" bbox: {bbox}<br><br>"
detailed_output.append(mol_output)
smiles_output.append(smiles)
return detailed_output, smiles_output
def process_chem_image(image):
image_path = "temp_image.png"
image.save(image_path)
chemeagle_result = ChemEagle(image_path)
if "molecules" in chemeagle_result:
detailed, smiles = parse_mol(chemeagle_result)
else:
detailed, smiles = parse_reactions(chemeagle_result)
json_path = "output.json"
with open(json_path, 'w') as jf:
json.dump(chemeagle_result, jf, indent=2)
return "\n\n".join(detailed), smiles, example_diagram, json_path
def process_chem_image_api(image, api_key, endpoint):
# 动态设置环境变量
os.environ["API_KEY"] = api_key
os.environ["AZURE_ENDPOINT"] = endpoint or "" # endpoint 允许为空
image_path = "temp_image.png"
image.save(image_path)
# 假设 ChemEagle 内部会用 os.getenv("API_KEY") 或 os.environ["API_KEY"]
chemeagle_result = ChemEagle(image_path)
if "molecules" in chemeagle_result:
detailed, smiles = parse_mol(chemeagle_result)
else:
detailed, smiles = parse_reactions(chemeagle_result)
json_path = "output.json"
with open(json_path, 'w') as jf:
json.dump(chemeagle_result, jf, indent=2)
return "\n\n".join(detailed), smiles, example_diagram, json_path
with gr.Blocks() as demo:
gr.Markdown(
"""
<center><h1>ChemEAGLE: A Multi-Agent System Enables Versatile Information Extraction from the Chemical Literature</h1></center>
Upload a chemical graphic to extract machine-readable chemical data.
"""
)
#####api
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(type="pil", label="Upload a chemical graphic")
api_key_input = gr.Textbox(label="Azure API Key", type="password", placeholder="Enter your Azure API Key")
endpoint_input = gr.Textbox(label="Azure Endpoint", placeholder="e.g. https://xxx.openai.azure.com/")
with gr.Row():
clear_btn = gr.Button("Clear")
run_btn = gr.Button("Run", elem_id="submit-btn")
with gr.Column(scale=1):
gr.Markdown("### Parsed Reactions")
reaction_output = gr.HTML(label="Detailed Reaction Output")
gr.Markdown("### Schematic Diagram")
schematic_diagram = gr.Image(value=example_diagram, label="示意图")
with gr.Column(scale=1):
gr.Markdown("### Machine-readable Output")
smiles_output = gr.Textbox(
label="Reaction SMILES",
show_copy_button=True,
interactive=False,
visible=False
)
@gr.render(inputs=smiles_output)
def show_split(inputs):
if not inputs or (isinstance(inputs, str) and inputs.strip() == ""):
return gr.Textbox(label="SMILES of Reaction or Molecule i"), gr.Image(value=rdkit_image, label="RDKit Image", height=100)
smiles_list = inputs.split(",")
smiles_list = [re.sub(r"^\s*\[?'?|']?\s*$", "", item) for item in smiles_list]
components = []
for i, smiles in enumerate(smiles_list):
smiles_clean = smiles.replace('"', '').replace("'", "")
# 始终加入 SMILES 文本框
components.append(gr.Textbox(value=smiles_clean, label=f"SMILES of Item {i}", show_copy_button=True, interactive=False))
try:
# 优先判断是否为 reaction smiles
rxn = rdChemReactions.ReactionFromSmarts(smiles_clean, useSmiles=True)
is_rxn = rxn is not None and rxn.GetNumProductTemplates() > 0
except Exception:
is_rxn = False
if is_rxn:
try:
new_rxn = AllChem.ChemicalReaction()
for mol in rxn.GetReactants():
mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol))
new_rxn.AddReactantTemplate(mol)
for mol in rxn.GetProducts():
mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol))
new_rxn.AddProductTemplate(mol)
cleaned_rxn = new_rxn
for react in cleaned_rxn.GetReactants():
for atom in react.GetAtoms(): atom.SetAtomMapNum(0)
for prod in cleaned_rxn.GetProducts():
for atom in prod.GetAtoms(): atom.SetAtomMapNum(0)
react0 = cleaned_rxn.GetReactantTemplate(0)
react1 = cleaned_rxn.GetReactantTemplate(1) if cleaned_rxn.GetNumReactantTemplates() > 1 else None
if react0.GetNumBonds() > 0:
bond_len = Draw.MeanBondLength(react0)
elif react1 and react1.GetNumBonds() > 0:
bond_len = Draw.MeanBondLength(react1)
else:
bond_len = 1.0
drawer = rdMolDraw2D.MolDraw2DSVG(-1, -1)
dopts = drawer.drawOptions()
dopts.padding = 0.1
dopts.includeRadicals = True
Draw.SetACS1996Mode(dopts, bond_len * 0.55)
dopts.bondLineWidth = 1.5
drawer.DrawReaction(cleaned_rxn)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
svg_file = f"reaction_{i}.svg"
with open(svg_file, "w") as f: f.write(svg)
png_file = f"reaction_{i}.png"
cairosvg.svg2png(url=svg_file, write_to=png_file)
components.append(gr.Image(value=png_file, label=f"RDKit Image of Reaction {i}"))
except Exception as e:
print(f"Failed to draw reaction {i} for SMILES '{smiles_clean}': {e}")
else:
# 尝试分子绘图
try:
mol = Chem.MolFromSmiles(smiles_clean)
if mol:
img = Draw.MolToImage(mol, size=(350, 150))
img_file = f"mol_{i}.png"
img.save(img_file)
components.append(gr.Image(value=img_file, label=f"RDKit Image of Molecule {i}"))
else:
components.append(gr.Image(value=rdkit_image, label="Invalid Molecule"))
except Exception as e:
print(f"Failed to draw molecule {i} for SMILES '{smiles_clean}': {e}")
components.append(gr.Image(value=rdkit_image, label="Invalid Molecule"))
return components
download_json = gr.File(label="Download JSON File")
gr.Examples(
examples=[
["examples/reaction0.png"],
["examples/reaction1.jpg"],
["examples/reaction2.png"],
["examples/reaction3.png"],
["examples/reaction4.png"],
["examples/reaction5.png"],
["examples/template1.png"],
["examples/molecules1.png"],
],
inputs=[image_input],
outputs=[reaction_output, smiles_output, schematic_diagram, download_json],
cache_examples=False,
examples_per_page=10,
)
clear_btn.click(
lambda: (None, None, None, None),
inputs=[],
outputs=[image_input, reaction_output, smiles_output, download_json]
)
######api
run_btn.click(
process_chem_image_api,
inputs=[image_input, api_key_input, endpoint_input],
outputs=[reaction_output, smiles_output, schematic_diagram, download_json]
)
demo.css = """
#submit-btn {
background-color: #FF914D;
color: white;
font-weight: bold;
}
"""
demo.launch()
|