File size: 2,721 Bytes
c2cc0af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
from google import genai
from utils import *
from PIL import Image
import os


client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
system_instruction = load_verifier_prompt()
generation_config = types.GenerateContentConfig(
    system_instruction=system_instruction,
    response_mime_type="application/json",
    response_schema=list[Grading],
    seed=1994,
)


def make_inputs(prompt, image):
    inputs = []
    inputs.extend(prepare_inputs(prompt=prompt, image=image))
    return inputs


def format_response(response: dict):
    out = ""
    for key, value in response.items():
        score = f"* **{key}**: {value['score']} (explanation: {value['explanation']})\n"
        out += score
    return out


def grade(prompt, image):
    inputs = make_inputs(prompt, image)
    response = client.models.generate_content(
        model="gemini-2.0-flash", contents=types.Content(parts=inputs, role="user"), config=generation_config
    )
    parsed_response = response.parsed[0]
    return format_response(parsed_response)


examples = [
    ["realistic photo a shiny black SUV car with a mountain in the background.", Image.open("car.jpg")],
    ["photo a green and funny creature standing in front a lightweight forest.", Image.open("green_creature.jpg")],
]

css = """
#col-container {
    margin: 0 auto;
    max-width: 520px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(
            f"""# Grade images with Gemini 2.0 Flash 
        
        Following aspects are considered during grading:
        
        * Accuracy to Prompt
        * Creativity and Originality
        * Visual Quality and Realism
        * Consistency and Cohesion
        * Emotional or Thematic Resonance
        
        The [system prompt](./verifier_prompt.txt) comes from the paper: [Inference-Time Scaling for Diffusion Models beyond Scaling Denoising Steps](https://arxiv.org/abs/2501.09732). 
        """
        )

        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter the prompt that generated the image to be graded.",
                container=False,
            )
            run_button = gr.Button("Run", scale=0)

        image = gr.Image(format="png", type="pil", label="Image", placeholder="The image to be graded.")

        result = gr.Markdown(label="Grading Output")

        gr.Examples(examples=examples, fn=grade, inputs=[prompt, image], outputs=[result], cache_examples=True)

    gr.on(triggers=[run_button.click, prompt.submit], fn=grade, inputs=[prompt, image], outputs=[result])

demo.launch()