import gradio as gr from smolagents import CodeAgent,HfApiModel, DuckDuckGoSearchTool, tool from smolagents.agents import ActionStep from time import sleep import helium from PIL import Image from io import BytesIO import os # Define a function to search images and return the image path for display @tool def search_images_for_presentation(query: str) -> str: """ Searches for images related to the given query on the web (using DuckDuckGo search). Args: query: The query to search for images or logos. """ helium.go_to("https://duckduckgo.com/") search_box = helium.find(S("input[type='text']")) search_box.write(query) search_box.press(Keys.ENTER) # Wait for search results to load sleep(3) # Scroll down and search for images related to the query search_item_ctrl_f("image", nth_result=1) # Simulate an image being found (as example) image_path = "path/to/some_image.png" # Replace with actual image capture logic return image_path @tool def save_image_from_browser(image_url: str) -> str: """ Saves the image from the given URL to a file. Args: image_url: The URL of the image to be saved. Returns: str: The path to the saved image. """ image_path = f"/path/to/save/directory/{image_url.split('/')[-1]}" # Example: use image's filename from URL # Add image download and save logic here return image_path # Initialize agent (same as before) def initialize_agent(model): return CodeAgent( tools=[DuckDuckGoSearchTool(), save_image_from_browser, search_images_for_presentation], model=model, max_steps=20, verbosity_level=2, ) find_images_for_presentation_prompt = "Find images related to presentations, such as slides, logos, or visuals. Search for the following: " def run_agent(query: str): model = HfApiModel(model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/') agent = initialize_agent(model) result = agent.run(find_images_for_presentation_prompt + query) return result # Gradio interface to show images def gradio_interface(query: str): # Call the agent to run the query and return the image image_paths = run_agent(query) images = [Image.open(image_path) for image_path in image_paths] # Return the image to Gradio for display return images # Set up Gradio interface gr.Interface( fn=gradio_interface, inputs="text", # Text input for the search query outputs=gr.Gallery(label="Found Images").style(grid=3), # Image output to display the fetched image title="Figure, Image & Logo Finder", description="Enter a query to search for relevant images, logos, or figures for your presentation." ).launch()