Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -39,58 +39,54 @@ def extract_text_from_webpage(html_content):
|
|
39 |
visible_text = soup.get_text(strip=True)
|
40 |
return visible_text
|
41 |
|
42 |
-
|
|
|
43 |
"""Performs a Google search and returns the results."""
|
44 |
escaped_term = urllib.parse.quote_plus(term)
|
45 |
start = 0
|
46 |
all_results = []
|
47 |
-
|
48 |
-
#
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
"
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
else:
|
90 |
-
all_results.append({"link": None, "text": None})
|
91 |
-
|
92 |
-
start += len(result_block) # Update starting index for next batch
|
93 |
-
|
94 |
return all_results
|
95 |
|
96 |
# Speech Recognition Model Configuration
|
@@ -104,7 +100,7 @@ tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.sp
|
|
104 |
|
105 |
# Mistral Model Configuration
|
106 |
client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
107 |
-
system_instructions1 = "<s>[SYSTEM] Answer as Real
|
108 |
|
109 |
def resample(audio_fp32, sr):
|
110 |
return soxr.resample(audio_fp32, sr, sample_rate)
|
|
|
39 |
visible_text = soup.get_text(strip=True)
|
40 |
return visible_text
|
41 |
|
42 |
+
# Perform a Google search and return the results
|
43 |
+
def search(term, num_results=3, lang="en", advanced=True, timeout=5, safe="active", ssl_verify=None):
|
44 |
"""Performs a Google search and returns the results."""
|
45 |
escaped_term = urllib.parse.quote_plus(term)
|
46 |
start = 0
|
47 |
all_results = []
|
48 |
+
# Limit the number of characters from each webpage to stay under the token limit
|
49 |
+
max_chars_per_page = 4000 # Adjust this value based on your token limit and average webpage length
|
50 |
+
|
51 |
+
with requests.Session() as session:
|
52 |
+
while start < num_results:
|
53 |
+
resp = session.get(
|
54 |
+
url="https://www.google.com/search",
|
55 |
+
headers={"User-Agent": get_useragent()},
|
56 |
+
params={
|
57 |
+
"q": term,
|
58 |
+
"num": num_results - start,
|
59 |
+
"hl": lang,
|
60 |
+
"start": start,
|
61 |
+
"safe": safe,
|
62 |
+
},
|
63 |
+
timeout=timeout,
|
64 |
+
verify=ssl_verify,
|
65 |
+
)
|
66 |
+
resp.raise_for_status()
|
67 |
+
soup = BeautifulSoup(resp.text, "html.parser")
|
68 |
+
result_block = soup.find_all("div", attrs={"class": "g"})
|
69 |
+
if not result_block:
|
70 |
+
start += 1
|
71 |
+
continue
|
72 |
+
for result in result_block:
|
73 |
+
link = result.find("a", href=True)
|
74 |
+
if link:
|
75 |
+
link = link["href"]
|
76 |
+
try:
|
77 |
+
webpage = session.get(link, headers={"User-Agent": get_useragent()})
|
78 |
+
webpage.raise_for_status()
|
79 |
+
visible_text = extract_text_from_webpage(webpage.text)
|
80 |
+
# Truncate text if it's too long
|
81 |
+
if len(visible_text) > max_chars_per_page:
|
82 |
+
visible_text = visible_text[:max_chars_per_page] + "..."
|
83 |
+
all_results.append({"link": link, "text": visible_text})
|
84 |
+
except requests.exceptions.RequestException as e:
|
85 |
+
print(f"Error fetching or processing {link}: {e}")
|
86 |
+
all_results.append({"link": link, "text": None})
|
87 |
+
else:
|
88 |
+
all_results.append({"link": None, "text": None})
|
89 |
+
start += len(result_block)
|
|
|
|
|
|
|
|
|
|
|
90 |
return all_results
|
91 |
|
92 |
# Speech Recognition Model Configuration
|
|
|
100 |
|
101 |
# Mistral Model Configuration
|
102 |
client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
103 |
+
system_instructions1 = "<s>[SYSTEM] Answer as Real OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
|
104 |
|
105 |
def resample(audio_fp32, sr):
|
106 |
return soxr.resample(audio_fp32, sr, sample_rate)
|