File size: 5,195 Bytes
a99c8d6 5ff3da0 a482122 5ff3da0 a482122 3331037 a482122 5ff3da0 a482122 5ff3da0 a482122 5ff3da0 a482122 5ff3da0 a482122 5ff3da0 d319fa6 a482122 d319fa6 5ff3da0 d319fa6 a482122 d319fa6 a482122 d319fa6 5ff3da0 e400a0b d319fa6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import base64
import os
import gradio as gr
from openai import OpenAI
client = OpenAI(
api_key=os.getenv('HUNYUAN_API_KEY'),
base_url="https://api.hunyuan.cloud.tencent.com/v1"
)
def generate_caption(image_path, question):
# 将图片转换为Base64
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
# 构建消息结构
messages = [{
"role": "user",
"content": [
{"type": "text", "text": question},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}]
# 调用混元视觉模型
response = client.chat.completions.create(
model="hunyuan-vision",
messages=messages,
stream=True,
extra_body={
"stream_moderation": True,
"enable_enhancement": False
}
)
# 流式处理响应
full_response = ""
for chunk in response:
token = chunk.choices[0].delta.content
if token:
full_response += token
yield full_response
# 创建Gradio界面
title = "Hunyuan-Vision图生文Demo"
theme = gr.themes.Soft(
primary_hue="teal",
secondary_hue="blue",
font=[gr.themes.GoogleFont("Noto Sans SC"), "Arial", "sans-serif"]
)
with gr.Blocks(title=title, theme=theme) as demo:
# ================= 头部区域 =================
gr.Markdown(f"""
<div style="text-align: center;">
<h1 style="color: #2E86C1; border-bottom: 3px solid #AED6F1; padding-bottom: 10px;">🖼️ {title}</h1>
<p style="color: #616A6B;">上传图片并输入问题,体验腾讯混元视觉大模型的图像理解能力</p>
</div>
""")
# ================= 主体区域 =================
with gr.Row(variant="panel"):
# 左侧输入列
with gr.Column(scale=3):
with gr.Group(label="输入区域"):
image_input = gr.Image(
type="filepath",
label="上传图片",
height=400,
show_download_button=False,
elem_classes="preview-box"
)
question_input = gr.Textbox(
label="问题描述",
placeholder="请输入关于图片的问题...",
value="请详细描述图片中的场景、人物和细节",
lines=2
)
with gr.Row():
clear_btn = gr.Button("清空", variant="secondary")
submit_btn = gr.Button("生成描述", variant="primary")
# 右侧输出列
with gr.Column(scale=4):
with gr.Group(label="生成结果"):
output = gr.Textbox(
label="描述内容",
interactive=False,
show_copy_button=True,
lines=12,
max_lines=20,
autoscroll=True
)
# ================= 示例区域 =================
with gr.Accordion("🖼️ 点击查看示例", open=False):
with gr.Row():
gr.Examples(
examples=[
["tencent.png", "图片中的天气状况如何?"],
["tencent.png", "描述参会人员的衣着特征"]
],
inputs=[image_input, question_input],
label="快速示例"
)
# ================= 交互逻辑 =================
submit_btn.click(
fn=generate_caption,
inputs=[image_input, question_input],
outputs=output,
api_name="generate"
)
clear_btn.click(
fn=lambda: [None, "", ""],
outputs=[image_input, question_input, output],
queue=False
)
# ================= 自定义样式 =================
css = """
.preview-box img {border-radius: 10px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);}
.preview-box:hover img {transform: scale(1.02);}
button#generate {transition: all 0.3s ease;}
"""
demo.css = css
if __name__ == "__main__":
demo.queue(default_concurrency_limit=100)
demo.launch(
server_port=7860,
show_error=True,
favicon_path="favicon.ico",
max_threads=100
)
# # 创建Gradio界面
# title="Hunyuan-Vision图生文Demo"
# with gr.Blocks(title=title) as demo:
# gr.Markdown(f"# 🖼️ {title}")
# with gr.Row():
# with gr.Column():
# image_input = gr.Image(type="filepath", label="上传图片")
# question_input = gr.Textbox(label="输入问题", value="请描述图片内容")
# submit_btn = gr.Button("生成描述")
# output = gr.Textbox(label="描述结果", interactive=False)
# submit_btn.click(
# fn=generate_caption,
# inputs=[image_input, question_input],
# outputs=output
# )
# if __name__ == "__main__":
# demo.queue(default_concurrency_limit=100)
# demo.launch(max_threads=100) |