Spaces:

InternRobotics
/

InternNav-Eval-Demo

Running

File size: 12,585 Bytes

ad1357a

# main.py
# 主入口文件，负责启动 Gradio UI
import gradio as gr
from config import SCENE_CONFIGS, MODEL_CHOICES, MODE_CHOICES
from backend_api import submit_to_backend, get_task_status, get_task_result
from logging_utils import log_access, log_submission, is_request_allowed
from simulation import stream_simulation_results, convert_to_h264
from ui_components import update_history_display, update_scene_display, update_log_display, get_scene_instruction
import os
from datetime import datetime

SESSION_TASKS = {}

def run_simulation(scene, model, mode, prompt, history, request: gr.Request):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    scene_desc = SCENE_CONFIGS.get(scene, {}).get("description", scene)
    user_ip = request.client.host if request else "unknown"
    session_id = request.session_hash
    if not is_request_allowed(user_ip):
        log_submission(scene, prompt, model, user_ip, "IP blocked temporarily")
        raise gr.Error("Too many requests from this IP. Please wait and try again one minute later.")
    # 传递model和mode给后端
    #submission_result = submit_to_backend(scene, prompt, user=model)  # 可根据后端接口调整
    submission_result = submit_to_backend(scene, prompt, mode, model, user_ip)
    if submission_result.get("status") != "pending":
        log_submission(scene, prompt, model, user_ip, "Submission failed")
        raise gr.Error(f"Submission failed: {submission_result.get('message', 'unknown issue')}")
    try:
        task_id = submission_result["task_id"]
        SESSION_TASKS[session_id] = task_id
        gr.Info(f"Simulation started, task_id: {task_id}")
        import time
        time.sleep(5)
        status = get_task_status(task_id)
        result_folder = status.get("result", "")
    except Exception as e:
        log_submission(scene, prompt, model, user_ip, str(e))
        raise gr.Error(f"error occurred when parsing submission result from backend: {str(e)}")
    if not os.path.exists(result_folder):
        log_submission(scene, prompt, model, user_ip, "Result folder provided by backend doesn't exist")
        raise gr.Error(f"Result folder provided by backend doesn't exist: <PATH>{result_folder}")
    try:
        for video_path in stream_simulation_results(result_folder, task_id):
            if video_path:
                yield video_path, history
    except Exception as e:
        log_submission(scene, prompt, model, user_ip, str(e))
        raise gr.Error(f"流式输出过程中出错: {str(e)}")
    status = get_task_status(task_id)
    if status.get("status") == "completed":
        video_path = os.path.join(status.get("result"), "output.mp4")
        video_path = convert_to_h264(video_path)
        new_entry = {
            "timestamp": timestamp,
            "scene": scene,
            "model": model,
            "mode": mode,
            "prompt": prompt,
            "video_path": video_path
        }
        updated_history = history + [new_entry]
        if len(updated_history) > 10:
            updated_history = updated_history[:10]
        log_submission(scene, prompt, model, user_ip, "success")
        gr.Info("Simulation completed successfully!")
        yield None, updated_history
    elif status.get("status") == "failed":
        log_submission(scene, prompt, model, user_ip, status.get('result', 'backend error'))
        raise gr.Error(f"任务执行失败: {status.get('result', 'backend 未知错误')}")
        yield None, history
    elif status.get("status") == "terminated":
        log_submission(scene, prompt, model, user_ip, "terminated")
        video_path = os.path.join(result_folder, "output.mp4")
        if os.path.exists(video_path):
            return f"⚠️ 任务 {task_id} 被终止，已生成部分结果", video_path, history
        else:
            return f"⚠️ 任务 {task_id} 被终止，未生成结果", None, history
    else:
        log_submission(scene, prompt, model, user_ip, "missing task's status from backend")
        raise gr.Error("missing task's status from backend")
        yield None, history

def cleanup_session(request: gr.Request):
    session_id = request.session_hash
    task_id = SESSION_TASKS.pop(session_id, None)
    from config import BACKEND_URL
    import requests
    if task_id:
        try:
            requests.post(f"{BACKEND_URL}/predict/terminate/{task_id}", timeout=3)
        except Exception:
            pass

def record_access(request: gr.Request):
    user_ip = request.client.host if request else "unknown"
    user_agent = request.headers.get("user-agent", "unknown")
    log_access(user_ip, user_agent)
    return update_log_display()



custom_css = """

#simulation-panel {

    border-radius: 8px;

    padding: 20px;

    background: #f9f9f9;

    box-shadow: 0 2px 4px rgba(0,0,0,0.1);

}

#result-panel {

    border-radius: 8px;

    padding: 20px;

    background: #f0f8ff;

}

.dark #simulation-panel { background: #2a2a2a; }

.dark #result-panel { background: #1a2a3a; }

.history-container {

    max-height: 600px;

    overflow-y: auto;

    margin-top: 20px;

}

.history-accordion {

    margin-bottom: 10px;

}

"""

header_html = """

<div style="display: flex; justify-content: space-between; align-items: center; width: 100%; margin-bottom: 20px; padding: 20px; background: linear-gradient(135deg, #e0e5ec 0%, #a7b5d0 100%); border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">

    <div style="display: flex; align-items: center;">

        <img src="https://www.shlab.org.cn/static/img/index_14.685f6559.png" alt="Institution Logo" style="height: 60px; margin-right: 20px;">

        <div>

            <h1 style="margin: 0; color: #2c3e50; font-weight: 600;">🤖 InternManip Model Inference Demo</h1>

            <p style="margin: 4px 0 0 0; color: #5d6d7e; font-size: 0.9em;">Model trained on InternManip framework</p>

        </div>

    </div>

    <div style="display: flex; gap: 15px; align-items: center;">

        <a href="https://github.com/OpenRobotLab" target="_blank" style="text-decoration: none; transition: transform 0.2s;" onmouseover="this.style.transform='scale(1.1)'" onmouseout="this.style.transform='scale(1)'">

            <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" style="height: 30px;">

        </a>

        <a href="https://huggingface.co/OpenRobotLab" target="_blank" style="text-decoration: none; transition: transform 0.2s;" onmouseover="this.style.transform='scale(1.1)'" onmouseout="this.style.transform='scale(1)'">

            <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="HuggingFace" style="height: 30px;">

        </a>

        <a href="http://123.57.187.96:55004/" target="_blank">

            <button style="padding: 8px 15px; background: #3498db; color: white; border: none; border-radius: 4px; cursor: pointer; font-weight: 500; transition: all 0.2s;" 

                    onmouseover="this.style.backgroundColor='#2980b9'; this.style.transform='scale(1.05)'" 

                    onmouseout="this.style.backgroundColor='#3498db'; this.style.transform='scale(1)'">

                Go to InternManip Demo

            </button>

        </a>

    </div>

</div>

"""



with gr.Blocks(title="InternNav Model Inference Demo", css=custom_css) as demo:
    gr.HTML(header_html)
    
    history_state = gr.State([])
    with gr.Row():
        with gr.Column(elem_id="simulation-panel"):
            gr.Markdown("### Simulation Settings")
            scene_dropdown = gr.Dropdown(
                label="Choose a scene",
                choices=list(SCENE_CONFIGS.keys()),
                value="demo1",
                interactive=True
            )
            scene_description = gr.Markdown("")
            scene_preview = gr.Image(
                label="Scene Preview",
                elem_classes=["scene-preview"],
                interactive=False
            )
            prompt_input = gr.Textbox(
                label="Navigation Prompt",
                value="Walk past the left side of the bed and stop in the doorway.",
                placeholder="e.g.: 'Walk past the left side of the bed and stop in the doorway.'",
                lines=2,
                max_lines=4
            )
            model_dropdown = gr.Dropdown(
                label="Chose a pretrained model",
                choices=MODEL_CHOICES,
                value=MODEL_CHOICES[0],
                interactive=True
            )
            mode_dropdown = gr.Dropdown(
                label="Select Mode",
                choices=MODE_CHOICES,
                value=MODE_CHOICES[0],
                interactive=True
            )
            scene_dropdown.change(
                fn=lambda scene: [update_scene_display(scene)[0], update_scene_display(scene)[1], get_scene_instruction(scene)],
                inputs=scene_dropdown,
                outputs=[scene_description, scene_preview, prompt_input]
            )
            
            submit_btn = gr.Button("Start Navigation Simulation", variant="primary")
        with gr.Column(elem_id="result-panel"):
            gr.Markdown("### Latest Simulation Result")
            video_output = gr.Video(
                label="Live",
                interactive=False,
                format="mp4",
                autoplay=True,
                streaming=True
            )
            with gr.Column() as history_container:
                gr.Markdown("### History")
                gr.Markdown("#### History will be reset after refresh")
                history_slots = []
                for i in range(10):
                    with gr.Column(visible=False) as slot:
                        with gr.Accordion(visible=False, open=False) as accordion:
                            video = gr.Video(interactive=False)
                            detail_md = gr.Markdown()
                    history_slots.append((slot, accordion, video, detail_md))
    with gr.Accordion("查看系统访问日志(DEV ONLY)", open=False):
        logs_display = gr.Markdown()
        refresh_logs_btn = gr.Button("刷新日志", variant="secondary")
        refresh_logs_btn.click(
            update_log_display,
            outputs=logs_display
        )
    gr.Examples(
        examples=[
            ["demo1", "rdp", "vlnPE", "Walk past the left side of the bed and stop in the doorway."],
            ["demo2", "rdp", "vlnPE", "Walk through the bathroom, past the sink and toilet. Stop in front of the counter with the two suitcase."],
            ["demo3", "rdp", "vlnPE", "Do a U-turn. Walk forward through the kitchen, heading to the black door. Walk out of the door and take a right onto the deck. Walk out on to the deck and stop."],
            ["demo4", "rdp", "vlnPE", "Walk out of bathroom and stand on white bath mat."],
            ["demo5", "rdp", "vlnPE", "Walk straight through the double wood doors, follow the red carpet straight to the next doorway and stop where the carpet splits off."]
        ],
        inputs=[scene_dropdown, model_dropdown, mode_dropdown, prompt_input],
        label="Navigation Task Examples"
    )
    submit_btn.click(
        fn=run_simulation,
        inputs=[scene_dropdown, model_dropdown, mode_dropdown, prompt_input, history_state],
        outputs=[video_output, history_state],
        queue=True,
        api_name="run_simulation"
    ).then(
        fn=update_history_display,
        inputs=history_state,
        outputs=[comp for slot in history_slots for comp in slot],
        queue=True
    ).then(
        fn=update_log_display,
        outputs=logs_display,
    )
    demo.load(
        fn=lambda: update_scene_display("demo1"),
        outputs=[scene_description, scene_preview]
    ).then(
        fn=update_log_display,
        outputs=logs_display
    )
    demo.load(
        fn=record_access,
        inputs=None,
        outputs=logs_display,
        queue=False
    )
    demo.queue(default_concurrency_limit=8)
    demo.unload(fn=cleanup_session)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0", 
        server_port=7860,  # Hugging Face Space默认端口
        share=False, 
        debug=False,  # 生产环境建议关闭debug
        allowed_paths=["./assets", "./logs"]  # 修改为相对路径
    )