File size: 6,087 Bytes
2cd4061 0704ce3 b01cc46 2cd4061 0704ce3 2cd4061 25554bb 2cd4061 b01cc46 64dec74 2cd4061 fd2037f 2cd4061 fd2037f 2cd4061 b01cc46 fd2037f 2cd4061 fd2037f b01cc46 fd2037f 2cd4061 fd2037f b01cc46 fd2037f b01cc46 0704ce3 2cd4061 fd2037f 0704ce3 fd2037f 2cd4061 fd2037f 2cd4061 fd2037f 0704ce3 fd2037f 6dc1ca6 2cd4061 b01cc46 0704ce3 2cd4061 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import gradio as gr
from huggingface_hub import InferenceClient
import json
from bs4 import BeautifulSoup
import requests
def extract_text_from_webpage(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
for tag in soup(["script", "style", "header", "footer"]):
tag.extract()
return soup.get_text(strip=True)
def search(query):
term = query
all_results = []
max_chars_per_page = 8000
with requests.Session() as session:
resp = session.get(
url="https://www.google.com/search",
headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"},
params={"q": term, "num": 3, "udm": 14},
timeout=5,
verify=None,
)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
result_block = soup.find_all("div", attrs={"class": "g"})
for result in result_block:
link = result.find("a", href=True)
link = link["href"]
try:
webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}, timeout=5, verify=False)
webpage.raise_for_status()
visible_text = extract_text_from_webpage(webpage.text)
if len(visible_text) > max_chars_per_page:
visible_text = visible_text[:max_chars_per_page]
all_results.append({"link": link, "text": visible_text})
except requests.exceptions.RequestException:
all_results.append({"link": link, "text": None})
return all_results
client_gemma = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
func_caller = []
# Define the main chat function
def respond(message, history):
func_caller = []
user_prompt = message
functions_metadata = [
{"type": "function", "function": {"name": "web_search", "description": "Search query on google", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "web search query"}}, "required": ["query"]}}},
]
for msg in history:
func_caller.append({"role": "user", "content": f"{str(msg[0])}"})
func_caller.append({"role": "assistant", "content": f"{str(msg[1])}"})
message_text = message["text"]
func_caller.append({"role": "user", "content": f'[SYSTEM]You are a helpful assistant. You have access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }} </functioncall> [USER] {message_text}'})
response = client_gemma.chat_completion(func_caller, max_tokens=200)
response = str(response)
try:
response = response[int(response.find("{")):int(response.rindex("}"))+1]
except:
response = response[int(response.find("{")):(int(response.rfind("}"))+1)]
response = response.replace("\\n", "")
response = response.replace("\\'", "'")
response = response.replace('\\"', '"')
response = response.replace('\\', '')
print(f"\n{response}")
try:
json_data = json.loads(str(response))
if json_data["name"] == "web_search":
query = json_data["arguments"]["query"]
gr.Info("Searching Web")
web_results = search(query)
gr.Info("Extracting relevant Info")
web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results if res['text']])
messages = f"Web Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
for msg in history:
messages += f"\nuser\n{str(msg[0])}"
messages += f"\nassistant\n{str(msg[1])}"
messages+=f"\nuser\n{message_text}\nweb_result\n{web2}\nassistant\n"
stream = client_mixtral.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
if not response.token.text == "":
output += response.token.text
yield output
else:
messages = f"Web Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
for msg in history:
messages += f"\nuser\n{str(msg[0])}"
messages += f"\nassistant\n{str(msg[1])}"
messages+=f"\nuser\n{message_text}\nassistant\n"
stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
if not response.token.text == "":
output += response.token.text
yield output
except:
messages = f"Web Dac uses the user agents of Mozilla, AppleWebKit, and Safari browsers for chat responses and human context mimicking."
for msg in history:
messages += f"\nuser\n{str(msg[0])}"
messages += f"\nassistant\n{str(msg[1])}"
messages+=f"\nuser\n{message_text}\nassistant\n"
stream = client_llama.text_generation(messages, max_new_tokens=2000, do_sample=True, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
if not response.token.text == "":
output += response.token.text
yield output
demo = gr.ChatInterface(
fn=respond,
chatbot=gr.Chatbot(show_copy_button=True, likeable=True, layout="panel"),
description=" ",
textbox=gr.Textbox(),
multimodal=False,
concurrency_limit=200,
)
demo.launch() |