File size: 5,067 Bytes
5e9bd47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import gradio as gr
import json
from rxnim import RXNIM
from getReaction import generate_combined_image
import torch
from rxn.reaction import Reaction
PROMPT_DIR = "prompts/"
ckpt_path = "./rxn/model/model.ckpt"
model = Reaction(ckpt_path, device=torch.device('cpu'))
# 定义 prompt 文件名到友好名字的映射
PROMPT_NAMES = {
"2_RxnOCR.txt": "Reaction Image Parsing Workflow",
}
example_diagram = "examples/exp.png"
def list_prompt_files_with_names():
"""
列出 prompts 目录下的所有 .txt 文件,为没有名字的生成默认名字。
返回 {friendly_name: filename} 映射。
"""
prompt_files = {}
for f in os.listdir(PROMPT_DIR):
if f.endswith(".txt"):
# 如果文件名有预定义的名字,使用预定义名字
friendly_name = PROMPT_NAMES.get(f, f"Task: {os.path.splitext(f)[0]}")
prompt_files[friendly_name] = f
return prompt_files
def parse_reactions(output_json):
"""
解析 JSON 格式的反应数据并格式化输出,包含颜色定制。
"""
reactions_data = json.loads(output_json) # 转换 JSON 字符串为字典
reactions_list = reactions_data.get("reactions", [])
detailed_output = []
for reaction in reactions_list:
reaction_id = reaction.get("reaction_id", "Unknown ID")
reactants = [r.get("smiles", "Unknown") for r in reaction.get("reactants", [])]
conditions = [
f"<span style='color:red'>{c.get('smiles', c.get('text', 'Unknown'))}[{c.get('role', 'Unknown')}]</span>"
for c in reaction.get("conditions", [])
]
conditions_1 = [
f"<span style='color:black'>{c.get('smiles', c.get('text', 'Unknown'))}[{c.get('role', 'Unknown')}]</span>"
for c in reaction.get("conditions", [])
]
products = [f"<span style='color:orange'>{p.get('smiles', 'Unknown')}</span>" for p in reaction.get("products", [])]
products_1 = [f"<span style='color:black'>{p.get('smiles', 'Unknown')}</span>" for p in reaction.get("products", [])]
# 构造反应的完整字符串,定制字体颜色
full_reaction = f"{'.'.join(reactants)}>>{'.'.join(products_1)} | {', '.join(conditions_1)}"
full_reaction = f"<span style='color:black'>{full_reaction}</span>"
# 详细反应格式化输出
reaction_output = f"<b>Reaction: </b> {reaction_id}<br>"
reaction_output += f" Reactants: <span style='color:blue'>{', '.join(reactants)}</span><br>"
reaction_output += f" Conditions: {', '.join(conditions)}<br>"
reaction_output += f" Products: {', '.join(products)}<br>"
reaction_output += f" <b>Full Reaction:</b> {full_reaction}<br>"
reaction_output += "<br>"
detailed_output.append(reaction_output)
return detailed_output
def process_chem_image(image, selected_task):
chem_mllm = RXNIM()
# 将友好名字转换为实际文件名
prompt_path = os.path.join(PROMPT_DIR, prompts_with_names[selected_task])
image_path = "temp_image.png"
image.save(image_path)
# 调用 RXNIM 处理
rxnim_result = chem_mllm.process(image_path, prompt_path)
# 将 JSON 结果解析为结构化输出
detailed_reactions = parse_reactions(rxnim_result)
# 调用 RxnScribe 模型处理并生成整合图像
predictions = model.predict_image_file(image_path, molscribe=True, ocr=True)
combined_image_path = generate_combined_image(predictions, image_path)
json_file_path = "output.json"
with open(json_file_path, "w") as json_file:
json.dump(json.loads(rxnim_result), json_file, indent=4)
# 返回详细反应和整合图像
return "\n\n".join(detailed_reactions), combined_image_path, example_diagram, json_file_path
# 获取 prompts 和友好名字
prompts_with_names = list_prompt_files_with_names()
# 示例数据:图像路径 + 任务选项
examples = [
["examples/reaction1.png", "Reaction Image Parsing Workflow"],
["examples/reaction2.png", "Reaction Image Parsing Workflow"],
["examples/reaction3.png", "Reaction Image Parsing Workflow"],
["examples/reaction4.png", "Reaction Image Parsing Workflow"],
]
# 定义 Gradio 界面
demo = gr.Interface(
fn=process_chem_image,
inputs=[
gr.Image(type="pil", label="Upload Reaction Image"),
gr.Radio(
choices=list(prompts_with_names.keys()), # 显示任务名字
label="Select a predefined task",
),
],
outputs=[
gr.HTML(label="Reaction outputs"),
gr.Image(label="Visualization"), # 显示整合图像
gr.Image(value=example_diagram, label="Schematic Diagram"),
gr.File(label="Download JSON File"),
],
title="Towards Large-scale Chemical Reaction Image Parsing via a Multimodal Large Language Model",
description="Upload a reaction image and select a predefined task prompt.",
examples=examples, # 使用嵌套列表作为示例
examples_per_page=20,
)
demo.launch()
|