palbha commited on
Commit
9a894d2
·
verified ·
1 Parent(s): 59a00f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -61
app.py CHANGED
@@ -1,72 +1,94 @@
1
  import gradio as gr
2
- from smolagents import CodeAgent, HfApiModel # adjust the import to your actual smolagents module
 
 
 
 
 
 
3
 
4
- # Step 1: Set up your smolagents agent.
5
- def create_agent():
 
6
  """
7
- Initialize and return the agent.
8
- Adjust parameters like model type or configuration as needed.
 
9
  """
10
- # For example, we initialize a CodeAgent with a sample model.
11
- agent = CodeAgent(
12
- tools=[],
13
- model=HfApiModel(model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
- return agent
16
 
17
- # Create the agent instance once so that it persists across user interactions.
18
- agent = create_agent()
19
 
20
- def combine_messages(messages: list[dict]) -> str:
21
- """
22
- Helper function to combine a list of message dictionaries into a single string.
23
- Each message is prefixed with its role.
24
- """
25
- conversation = ""
26
- for msg in messages:
27
- # Capitalize the role (e.g., 'User' instead of 'user') for clarity.
28
- conversation += f"{msg['role'].capitalize()}: {msg['content']}\n"
29
- return conversation.strip()
30
 
31
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
32
- """
33
- Build the conversation history, combine messages into a single string prompt,
34
- call the smolagents agent, and stream the response back to Gradio.
35
- """
36
- # Build the conversation messages list, starting with the system prompt.
37
- messages = [{"role": "system", "content": system_message}]
38
- for user_msg, assistant_msg in history:
39
- if user_msg:
40
- messages.append({"role": "user", "content": user_msg})
41
- if assistant_msg:
42
- messages.append({"role": "assistant", "content": assistant_msg})
43
- # Add the latest user input.
44
- messages.append({"role": "user", "content": message})
45
-
46
- # Combine the list of messages into a single string prompt.
47
- prompt = combine_messages(messages)
48
 
49
- # Now call the agent with the prompt.
50
- complete_response = agent.run(prompt)
51
- yield complete_response
52
 
53
- # Step 3: Create the Gradio ChatInterface.
54
- demo = gr.ChatInterface(
55
- fn=respond,
56
- additional_inputs=[
57
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
58
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
59
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
60
- gr.Slider(
61
- minimum=0.1,
62
- maximum=1.0,
63
- value=0.95,
64
- step=0.05,
65
- label="Top-p (nucleus sampling)"
66
- ),
67
- ],
68
- )
69
 
70
- # Step 4: Launch the Gradio app.
71
- if __name__ == "__main__":
72
- demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ from smolagents import CodeAgent,HfApiModel DuckDuckGoSearchTool, tool
3
+ from smolagents.agents import ActionStep
4
+ from time import sleep
5
+ import helium
6
+ from PIL import Image
7
+ from io import BytesIO
8
+ import os
9
 
10
+ # Define a function to search images and return the image path for display
11
+ @tool
12
+ def search_images_for_presentation(query: str):
13
  """
14
+ Searches for images related to the given query on the web (using DuckDuckGo search).
15
+ Args:
16
+ query: The query to search for images or logos.
17
  """
18
+ helium.go_to("https://duckduckgo.com/")
19
+ search_box = helium.find(S("input[type='text']"))
20
+ search_box.write(query)
21
+ search_box.press(Keys.ENTER)
22
+
23
+ # Wait for search results to load
24
+ sleep(3)
25
+
26
+ # Scroll down and search for images related to the query
27
+ search_item_ctrl_f("image", nth_result=1)
28
+
29
+ # Simulate an image being found (as example)
30
+ image_path = "path/to/some_image.png" # Replace with actual image capture logic
31
+ return image_path
32
+
33
+
34
+ # Modified tool to capture screenshots and return image path
35
+ def save_image_from_browser(memory_step: ActionStep, agent: CodeAgent) -> None:
36
+ sleep(1.0) # Allow browser elements to load
37
+ driver = helium.get_driver()
38
+ current_step = memory_step.step_number
39
+ if driver is not None:
40
+ # Capture browser screenshot
41
+ png_bytes = driver.get_screenshot_as_png()
42
+ image = Image.open(BytesIO(png_bytes))
43
+ image_dir = "images"
44
+ os.makedirs(image_dir, exist_ok=True)
45
+ image_path = f"{image_dir}/captured_image_{current_step}.png"
46
+
47
+ # Save image locally
48
+ image.save(image_path)
49
+ print(f"Captured and saved an image: {image_path}")
50
+
51
+ memory_step.observations_images = [image.copy()]
52
+
53
+ # Update observations with the current URL for reference:
54
+ url_info = f"Current url: {driver.current_url}"
55
+ memory_step.observations = (
56
+ url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
57
  )
58
+ return image_path
59
 
 
 
60
 
61
+ # Initialize agent (same as before)
62
+ def initialize_agent(model):
63
+ return CodeAgent(
64
+ tools=[DuckDuckGoSearchTool(), save_image_from_browser, search_images_for_presentation],
65
+ model=model,
66
+ max_steps=20,
67
+ verbosity_level=2,
68
+ )
 
 
69
 
70
+
71
+ def run_agent(query: str):
72
+ model = HfApiModel(model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud/')
73
+ agent = initialize_agent(model)
74
+ result = agent.run(find_images_for_presentation_prompt + query)
75
+ return result
76
+
77
+
78
+ # Gradio interface to show images
79
+ def gradio_interface(query: str):
80
+ # Call the agent to run the query and return the image
81
+ image_path = run_agent(query)
 
 
 
 
 
82
 
83
+ # Return the image to Gradio for display
84
+ return image_path
 
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ # Set up Gradio interface
88
+ gr.Interface(
89
+ fn=gradio_interface,
90
+ inputs="text", # Text input for the search query
91
+ outputs=gr.Image(), # Image output to display the fetched image
92
+ title="Figure, Image & Logo Finder",
93
+ description="Enter a query to search for relevant images, logos, or figures for your presentation."
94
+ ).launch()