Spaces:
Sleeping
Sleeping
import gradio as gr | |
from smolagents import CodeAgent,HfApiModel, DuckDuckGoSearchTool, tool | |
from smolagents.agents import ActionStep | |
from time import sleep | |
import helium | |
from PIL import Image | |
from io import BytesIO | |
import os | |
# Define a function to search images and return the image path for display | |
def search_images_for_presentation(query: str) -> str: | |
""" | |
Searches for images related to the given query on the web (using DuckDuckGo search). | |
Args: | |
query: The query to search for images or logos. | |
""" | |
helium.go_to("https://duckduckgo.com/") | |
search_box = helium.find(S("input[type='text']")) | |
search_box.write(query) | |
search_box.press(Keys.ENTER) | |
# Wait for search results to load | |
sleep(3) | |
# Scroll down and search for images related to the query | |
search_item_ctrl_f("image", nth_result=1) | |
# Simulate an image being found (as example) | |
image_path = "path/to/some_image.png" # Replace with actual image capture logic | |
return image_path | |
# Modified tool to capture screenshots and return image path | |
def save_image_from_browser(memory_step: ActionStep, agent: CodeAgent) -> None: | |
sleep(1.0) # Allow browser elements to load | |
driver = helium.get_driver() | |
current_step = memory_step.step_number | |
if driver is not None: | |
# Capture browser screenshot | |
png_bytes = driver.get_screenshot_as_png() | |
image = Image.open(BytesIO(png_bytes)) | |
image_dir = "images" | |
os.makedirs(image_dir, exist_ok=True) | |
image_path = f"{image_dir}/captured_image_{current_step}.png" | |
# Save image locally | |
image.save(image_path) | |
print(f"Captured and saved an image: {image_path}") | |
memory_step.observations_images = [image.copy()] | |
# Update observations with the current URL for reference: | |
url_info = f"Current url: {driver.current_url}" | |
memory_step.observations = ( | |
url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info | |
) | |
return image_path | |
# Initialize agent (same as before) | |
def initialize_agent(model): | |
return CodeAgent( | |
tools=[DuckDuckGoSearchTool(), save_image_from_browser, search_images_for_presentation], | |
model=model, | |
max_steps=20, | |
verbosity_level=2, | |
) | |
def run_agent(query: str): | |
model = HfApiModel(model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/') | |
agent = initialize_agent(model) | |
result = agent.run(find_images_for_presentation_prompt + query) | |
return result | |
# Gradio interface to show images | |
def gradio_interface(query: str): | |
# Call the agent to run the query and return the image | |
image_path = run_agent(query) | |
# Return the image to Gradio for display | |
return image_path | |
# Set up Gradio interface | |
gr.Interface( | |
fn=gradio_interface, | |
inputs="text", # Text input for the search query | |
outputs=gr.Image(), # Image output to display the fetched image | |
title="Figure, Image & Logo Finder", | |
description="Enter a query to search for relevant images, logos, or figures for your presentation." | |
).launch() | |