daqc commited on
Commit
3cb7acd
·
verified ·
1 Parent(s): e1a0e74

Delete app-.py

Browse files
Files changed (1) hide show
  1. app-.py +0 -320
app-.py DELETED
@@ -1,320 +0,0 @@
1
- import argparse
2
- import json
3
- import os
4
- import threading
5
- from concurrent.futures import ThreadPoolExecutor, as_completed
6
- from datetime import datetime
7
- from pathlib import Path
8
- from typing import List, Optional
9
-
10
- import datasets
11
- import pandas as pd
12
- from dotenv import load_dotenv
13
- from huggingface_hub import login
14
- import gradio as gr
15
-
16
- from scripts.reformulator import prepare_response
17
- from scripts.run_agents import (
18
- get_single_file_description,
19
- get_zip_description,
20
- )
21
- from scripts.text_inspector_tool import TextInspectorTool
22
- from scripts.text_web_browser import (
23
- ArchiveSearchTool,
24
- FinderTool,
25
- FindNextTool,
26
- PageDownTool,
27
- PageUpTool,
28
- SearchInformationTool,
29
- SimpleTextBrowser,
30
- VisitTool,
31
- )
32
- from scripts.visual_qa import visualizer
33
- from tqdm import tqdm
34
-
35
- from smolagents import (
36
- # MANAGED_AGENT_PROMPT,
37
- CodeAgent,
38
- HfApiModel,
39
- LiteLLMModel,
40
- Model,
41
- ToolCallingAgent,
42
- )
43
- from smolagents.agent_types import AgentText, AgentImage, AgentAudio, handle_agent_output_types
44
- from smolagents.gradio_ui import pull_messages_from_step # , handle_agent_output_types
45
-
46
-
47
- AUTHORIZED_IMPORTS = [
48
- "requests",
49
- "zipfile",
50
- "os",
51
- "pandas",
52
- "numpy",
53
- "sympy",
54
- "json",
55
- "bs4",
56
- "pubchempy",
57
- "xml",
58
- "yahoo_finance",
59
- "Bio",
60
- "sklearn",
61
- "scipy",
62
- "pydub",
63
- "io",
64
- "PIL",
65
- "chess",
66
- "PyPDF2",
67
- "pptx",
68
- "torch",
69
- "datetime",
70
- "fractions",
71
- "csv",
72
- ]
73
- load_dotenv(override=True)
74
- login(os.getenv("HF_TOKEN"))
75
-
76
- append_answer_lock = threading.Lock()
77
-
78
- SET = "validation"
79
-
80
- custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
81
-
82
- # skip
83
- _ = """
84
- ### LOAD EVALUATION DATASET
85
-
86
- eval_ds = datasets.load_dataset("gaia-benchmark/GAIA", "2023_all")[SET]
87
- eval_ds = eval_ds.rename_columns({"Question": "question", "Final answer": "true_answer", "Level": "task"})
88
-
89
-
90
- def preprocess_file_paths(row):
91
- if len(row["file_name"]) > 0:
92
- row["file_name"] = f"data/gaia/{SET}/" + row["file_name"]
93
- return row
94
-
95
-
96
- eval_ds = eval_ds.map(preprocess_file_paths)
97
- eval_df = pd.DataFrame(eval_ds)
98
- print("Loaded evaluation dataset:")
99
- print(eval_df["task"].value_counts())
100
- # """
101
-
102
- user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
103
-
104
- BROWSER_CONFIG = {
105
- "viewport_size": 1024 * 5,
106
- "downloads_folder": "downloads_folder",
107
- "request_kwargs": {
108
- "headers": {"User-Agent": user_agent},
109
- "timeout": 300,
110
- },
111
- "serpapi_key": os.getenv("SERPAPI_API_KEY"),
112
- }
113
-
114
- os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
115
-
116
- model = LiteLLMModel(
117
- # "gpt-4o",
118
- # os.getenv("MODEL_ID", "gpt-4o-mini"),
119
- os.getenv("MODEL_ID", "deepseek-ai/DeepSeek-V3"),
120
- custom_role_conversions=custom_role_conversions,
121
- api_base=os.getenv("OPENAI_API_BASE"),
122
- api_key=os.getenv("OPENAI_API_KEY"),
123
- )
124
-
125
- text_limit = 20000
126
- ti_tool = TextInspectorTool(model, text_limit)
127
-
128
- browser = SimpleTextBrowser(**BROWSER_CONFIG)
129
-
130
- WEB_TOOLS = [
131
- SearchInformationTool(browser),
132
- VisitTool(browser),
133
- PageUpTool(browser),
134
- PageDownTool(browser),
135
- FinderTool(browser),
136
- FindNextTool(browser),
137
- ArchiveSearchTool(browser),
138
- TextInspectorTool(model, text_limit),
139
- ]
140
-
141
- agent = CodeAgent(
142
- model=model,
143
- tools=[visualizer] + WEB_TOOLS,
144
- max_steps=5,
145
- verbosity_level=2,
146
- additional_authorized_imports=AUTHORIZED_IMPORTS,
147
- planning_interval=4,
148
- )
149
-
150
- document_inspection_tool = TextInspectorTool(model, 20000)
151
-
152
-
153
- # augmented_question = """You have one question to answer. It is paramount that you provide a correct answer.
154
- # Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist). Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded.
155
- # Run verification steps if that's needed, you must make sure you find the correct answer!
156
- # Here is the task:
157
- # """ + example["question"]
158
-
159
- # if example["file_name"]:
160
- # prompt_use_files = "\n\nTo solve the task above, you will have to use this attached file:"
161
- # prompt_use_files += get_single_file_description(
162
- # example["file_name"], example["question"], visual_inspection_tool, document_inspection_tool
163
- # )
164
- # augmented_question += prompt_use_files
165
-
166
-
167
- # final_result = agent.run(augmented_question)
168
-
169
-
170
- def stream_to_gradio(
171
- agent,
172
- task: str,
173
- reset_agent_memory: bool = False,
174
- additional_args: Optional[dict] = None,
175
- ):
176
- """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
177
- for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
178
- for message in pull_messages_from_step(
179
- step_log,
180
- ):
181
- yield message
182
-
183
- final_answer = step_log # Last log is the run's final_answer
184
- final_answer = handle_agent_output_types(final_answer)
185
-
186
- if isinstance(final_answer, AgentText):
187
- yield gr.ChatMessage(
188
- role="assistant",
189
- content=f"**Final answer:**\n{final_answer.to_string()}\n",
190
- )
191
- elif isinstance(final_answer, AgentImage):
192
- yield gr.ChatMessage(
193
- role="assistant",
194
- content={"path": final_answer.to_string(), "mime_type": "image/png"},
195
- )
196
- elif isinstance(final_answer, AgentAudio):
197
- yield gr.ChatMessage(
198
- role="assistant",
199
- content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
200
- )
201
- else:
202
- yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
203
-
204
-
205
- class GradioUI:
206
- """A one-line interface to launch your agent in Gradio"""
207
-
208
- def __init__(self, agent, file_upload_folder: str | None = None):
209
- self.agent = agent
210
- self.file_upload_folder = file_upload_folder
211
- if self.file_upload_folder is not None:
212
- if not os.path.exists(file_upload_folder):
213
- os.mkdir(file_upload_folder)
214
-
215
- def interact_with_agent(self, prompt, messages):
216
- messages.append(gr.ChatMessage(role="user", content=prompt))
217
- yield messages
218
- for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False):
219
- messages.append(msg)
220
- yield messages
221
- yield messages
222
-
223
- def upload_file(
224
- self,
225
- file,
226
- file_uploads_log,
227
- allowed_file_types=[
228
- "application/pdf",
229
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
230
- "text/plain",
231
- ],
232
- ):
233
- """
234
- Handle file uploads, default allowed types are .pdf, .docx, and .txt
235
- """
236
- if file is None:
237
- return gr.Textbox("No file uploaded", visible=True), file_uploads_log
238
-
239
- try:
240
- mime_type, _ = mimetypes.guess_type(file.name)
241
- except Exception as e:
242
- return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log
243
-
244
- if mime_type not in allowed_file_types:
245
- return gr.Textbox("File type disallowed", visible=True), file_uploads_log
246
-
247
- # Sanitize file name
248
- original_name = os.path.basename(file.name)
249
- sanitized_name = re.sub(
250
- r"[^\w\-.]", "_", original_name
251
- ) # Replace any non-alphanumeric, non-dash, or non-dot characters with underscores
252
-
253
- type_to_ext = {}
254
- for ext, t in mimetypes.types_map.items():
255
- if t not in type_to_ext:
256
- type_to_ext[t] = ext
257
-
258
- # Ensure the extension correlates to the mime type
259
- sanitized_name = sanitized_name.split(".")[:-1]
260
- sanitized_name.append("" + type_to_ext[mime_type])
261
- sanitized_name = "".join(sanitized_name)
262
-
263
- # Save the uploaded file to the specified folder
264
- file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name))
265
- shutil.copy(file.name, file_path)
266
-
267
- return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
268
-
269
- def log_user_message(self, text_input, file_uploads_log):
270
- return (
271
- text_input
272
- + (
273
- f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}"
274
- if len(file_uploads_log) > 0
275
- else ""
276
- ),
277
- "",
278
- )
279
-
280
- def launch(self, **kwargs):
281
- with gr.Blocks(theme="ocean", fill_height=True) as demo:
282
- gr.Markdown("""# open Deep Research - free the AI agents!
283
-
284
- OpenAI just published [Deep Research](https://openai.com/index/introducing-deep-research/), a very nice assistant that can perform deep searches on the web to answer user questions.
285
-
286
- However, their agent has a huge downside: it's not open. So we've started a 24-hour rush to replicate and open-source it. Our resulting [open-Deep-Research agent](https://github.com/huggingface/smolagents/tree/main/examples/open_deep_research) took the #1 rank of any open submission on the GAIA leaderboard! ✨
287
-
288
- You can try a simplified version below. 👇""")
289
- stored_messages = gr.State([])
290
- file_uploads_log = gr.State([])
291
- chatbot = gr.Chatbot(
292
- label="Open-Deep-Research",
293
- type="messages",
294
- avatar_images=(
295
- None,
296
- "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
297
- ),
298
- # resizable=True, # new version
299
- resizeable=True,
300
- scale=1,
301
- )
302
- # If an upload folder is provided, enable the upload feature
303
- if self.file_upload_folder is not None:
304
- upload_file = gr.File(label="Upload a file")
305
- upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
306
- upload_file.change(
307
- self.upload_file,
308
- [upload_file, file_uploads_log],
309
- [upload_status, file_uploads_log],
310
- )
311
- text_input = gr.Textbox(lines=1, label="Your request")
312
- text_input.submit(
313
- self.log_user_message,
314
- [text_input, file_uploads_log],
315
- [stored_messages, text_input],
316
- ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
317
-
318
- demo.launch(debug=True, share=True, **kwargs)
319
-
320
- GradioUI(agent).launch()