Spaces:
Sleeping
Sleeping
File size: 3,157 Bytes
7bbce45 2b3583b 9a894d2 d10bbd6 9a894d2 a4a096c 42a1748 9a894d2 42a1748 9a894d2 59a00f5 9a894d2 42a1748 d10bbd6 9a894d2 59a00f5 9a894d2 c7d77fa 9a894d2 d10bbd6 7bbce45 9a894d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr
from smolagents import CodeAgent,HfApiModel, DuckDuckGoSearchTool, tool
from smolagents.agents import ActionStep
from time import sleep
import helium
from PIL import Image
from io import BytesIO
import os
# Define a function to search images and return the image path for display
@tool
def search_images_for_presentation(query: str) -> str:
"""
Searches for images related to the given query on the web (using DuckDuckGo search).
Args:
query: The query to search for images or logos.
"""
helium.go_to("https://duckduckgo.com/")
search_box = helium.find(S("input[type='text']"))
search_box.write(query)
search_box.press(Keys.ENTER)
# Wait for search results to load
sleep(3)
# Scroll down and search for images related to the query
search_item_ctrl_f("image", nth_result=1)
# Simulate an image being found (as example)
image_path = "path/to/some_image.png" # Replace with actual image capture logic
return image_path
# Modified tool to capture screenshots and return image path
def save_image_from_browser(memory_step: ActionStep, agent: CodeAgent) -> None:
sleep(1.0) # Allow browser elements to load
driver = helium.get_driver()
current_step = memory_step.step_number
if driver is not None:
# Capture browser screenshot
png_bytes = driver.get_screenshot_as_png()
image = Image.open(BytesIO(png_bytes))
image_dir = "images"
os.makedirs(image_dir, exist_ok=True)
image_path = f"{image_dir}/captured_image_{current_step}.png"
# Save image locally
image.save(image_path)
print(f"Captured and saved an image: {image_path}")
memory_step.observations_images = [image.copy()]
# Update observations with the current URL for reference:
url_info = f"Current url: {driver.current_url}"
memory_step.observations = (
url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
)
return image_path
# Initialize agent (same as before)
def initialize_agent(model):
return CodeAgent(
tools=[DuckDuckGoSearchTool(), save_image_from_browser, search_images_for_presentation],
model=model,
max_steps=20,
verbosity_level=2,
)
def run_agent(query: str):
model = HfApiModel(model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/')
agent = initialize_agent(model)
result = agent.run(find_images_for_presentation_prompt + query)
return result
# Gradio interface to show images
def gradio_interface(query: str):
# Call the agent to run the query and return the image
image_path = run_agent(query)
# Return the image to Gradio for display
return image_path
# Set up Gradio interface
gr.Interface(
fn=gradio_interface,
inputs="text", # Text input for the search query
outputs=gr.Image(), # Image output to display the fetched image
title="Figure, Image & Logo Finder",
description="Enter a query to search for relevant images, logos, or figures for your presentation."
).launch()
|