import gradio as gr from smolagents import CodeAgent,HfApiModel, DuckDuckGoSearchTool, tool from smolagents.agents import ActionStep from time import sleep import helium from PIL import Image from io import BytesIO import os # Define a function to search images and return the image path for display @tool def search_images_for_presentation(query: str) -> str: """ Searches for images related to the given query on the web (using DuckDuckGo search). Args: query: The query to search for images or logos. """ helium.go_to("https://duckduckgo.com/") search_box = helium.find(S("input[type='text']")) search_box.write(query) search_box.press(Keys.ENTER) # Wait for search results to load sleep(3) # Scroll down and search for images related to the query search_item_ctrl_f("image", nth_result=1) # Simulate an image being found (as example) image_path = "path/to/some_image.png" # Replace with actual image capture logic return image_path # Modified tool to capture screenshots and return image path def save_image_from_browser(memory_step: ActionStep, agent: CodeAgent) -> None: sleep(1.0) # Allow browser elements to load driver = helium.get_driver() current_step = memory_step.step_number if driver is not None: # Capture browser screenshot png_bytes = driver.get_screenshot_as_png() image = Image.open(BytesIO(png_bytes)) image_dir = "images" os.makedirs(image_dir, exist_ok=True) image_path = f"{image_dir}/captured_image_{current_step}.png" # Save image locally image.save(image_path) print(f"Captured and saved an image: {image_path}") memory_step.observations_images = [image.copy()] # Update observations with the current URL for reference: url_info = f"Current url: {driver.current_url}" memory_step.observations = ( url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info ) return image_path # Initialize agent (same as before) def initialize_agent(model): return CodeAgent( tools=[DuckDuckGoSearchTool(), save_image_from_browser, search_images_for_presentation], model=model, max_steps=20, verbosity_level=2, ) def run_agent(query: str): model = HfApiModel(model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/') agent = initialize_agent(model) result = agent.run(find_images_for_presentation_prompt + query) return result # Gradio interface to show images def gradio_interface(query: str): # Call the agent to run the query and return the image image_path = run_agent(query) # Return the image to Gradio for display return image_path # Set up Gradio interface gr.Interface( fn=gradio_interface, inputs="text", # Text input for the search query outputs=gr.Image(), # Image output to display the fetched image title="Figure, Image & Logo Finder", description="Enter a query to search for relevant images, logos, or figures for your presentation." ).launch()