Spaces:

palbha
/

websearch_agent

Sleeping

File size: 3,157 Bytes

import gradio as gr
from smolagents import CodeAgent,HfApiModel, DuckDuckGoSearchTool, tool
from smolagents.agents import ActionStep
from time import sleep
import helium
from PIL import Image
from io import BytesIO
import os

# Define a function to search images and return the image path for display
@tool
def search_images_for_presentation(query: str) -> str:
    """
    Searches for images related to the given query on the web (using DuckDuckGo search).
    Args:
        query: The query to search for images or logos.
    """
    helium.go_to("https://duckduckgo.com/")
    search_box = helium.find(S("input[type='text']"))
    search_box.write(query)
    search_box.press(Keys.ENTER)

    # Wait for search results to load
    sleep(3)

    # Scroll down and search for images related to the query
    search_item_ctrl_f("image", nth_result=1)
    
    # Simulate an image being found (as example)
    image_path = "path/to/some_image.png"  # Replace with actual image capture logic
    return image_path


# Modified tool to capture screenshots and return image path
def save_image_from_browser(memory_step: ActionStep, agent: CodeAgent) -> None:
    sleep(1.0)  # Allow browser elements to load
    driver = helium.get_driver()
    current_step = memory_step.step_number
    if driver is not None:
        # Capture browser screenshot
        png_bytes = driver.get_screenshot_as_png()
        image = Image.open(BytesIO(png_bytes))
        image_dir = "images"
        os.makedirs(image_dir, exist_ok=True)
        image_path = f"{image_dir}/captured_image_{current_step}.png"
        
        # Save image locally
        image.save(image_path)
        print(f"Captured and saved an image: {image_path}")
        
        memory_step.observations_images = [image.copy()]

    # Update observations with the current URL for reference:
    url_info = f"Current url: {driver.current_url}"
    memory_step.observations = (
        url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
    )
    return image_path


# Initialize agent (same as before)
def initialize_agent(model):
    return CodeAgent(
        tools=[DuckDuckGoSearchTool(), save_image_from_browser, search_images_for_presentation],
        model=model,
        max_steps=20,
        verbosity_level=2,
    )


def run_agent(query: str):
    model = HfApiModel(model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/')
    agent = initialize_agent(model)
    result = agent.run(find_images_for_presentation_prompt + query)
    return result


# Gradio interface to show images
def gradio_interface(query: str):
    # Call the agent to run the query and return the image
    image_path = run_agent(query)
    
    # Return the image to Gradio for display
    return image_path


# Set up Gradio interface
gr.Interface(
    fn=gradio_interface,
    inputs="text",  # Text input for the search query
    outputs=gr.Image(),  # Image output to display the fetched image
    title="Figure, Image & Logo Finder",
    description="Enter a query to search for relevant images, logos, or figures for your presentation."
).launch()