File size: 3,157 Bytes
7bbce45
2b3583b
9a894d2
 
 
 
 
 
d10bbd6
9a894d2
 
a4a096c
42a1748
9a894d2
 
 
42a1748
9a894d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59a00f5
9a894d2
42a1748
d10bbd6
9a894d2
 
 
 
 
 
 
 
59a00f5
9a894d2
 
 
 
 
 
 
 
 
 
 
 
c7d77fa
9a894d2
 
d10bbd6
7bbce45
9a894d2
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
from smolagents import CodeAgent,HfApiModel, DuckDuckGoSearchTool, tool
from smolagents.agents import ActionStep
from time import sleep
import helium
from PIL import Image
from io import BytesIO
import os

# Define a function to search images and return the image path for display
@tool
def search_images_for_presentation(query: str) -> str:
    """
    Searches for images related to the given query on the web (using DuckDuckGo search).
    Args:
        query: The query to search for images or logos.
    """
    helium.go_to("https://duckduckgo.com/")
    search_box = helium.find(S("input[type='text']"))
    search_box.write(query)
    search_box.press(Keys.ENTER)

    # Wait for search results to load
    sleep(3)

    # Scroll down and search for images related to the query
    search_item_ctrl_f("image", nth_result=1)
    
    # Simulate an image being found (as example)
    image_path = "path/to/some_image.png"  # Replace with actual image capture logic
    return image_path


# Modified tool to capture screenshots and return image path
def save_image_from_browser(memory_step: ActionStep, agent: CodeAgent) -> None:
    sleep(1.0)  # Allow browser elements to load
    driver = helium.get_driver()
    current_step = memory_step.step_number
    if driver is not None:
        # Capture browser screenshot
        png_bytes = driver.get_screenshot_as_png()
        image = Image.open(BytesIO(png_bytes))
        image_dir = "images"
        os.makedirs(image_dir, exist_ok=True)
        image_path = f"{image_dir}/captured_image_{current_step}.png"
        
        # Save image locally
        image.save(image_path)
        print(f"Captured and saved an image: {image_path}")
        
        memory_step.observations_images = [image.copy()]

    # Update observations with the current URL for reference:
    url_info = f"Current url: {driver.current_url}"
    memory_step.observations = (
        url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
    )
    return image_path


# Initialize agent (same as before)
def initialize_agent(model):
    return CodeAgent(
        tools=[DuckDuckGoSearchTool(), save_image_from_browser, search_images_for_presentation],
        model=model,
        max_steps=20,
        verbosity_level=2,
    )


def run_agent(query: str):
    model = HfApiModel(model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/')
    agent = initialize_agent(model)
    result = agent.run(find_images_for_presentation_prompt + query)
    return result


# Gradio interface to show images
def gradio_interface(query: str):
    # Call the agent to run the query and return the image
    image_path = run_agent(query)
    
    # Return the image to Gradio for display
    return image_path


# Set up Gradio interface
gr.Interface(
    fn=gradio_interface,
    inputs="text",  # Text input for the search query
    outputs=gr.Image(),  # Image output to display the fetched image
    title="Figure, Image & Logo Finder",
    description="Enter a query to search for relevant images, logos, or figures for your presentation."
).launch()