Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- __init__.py +5 -0
- labs.py +113 -0
- onkbtest.py +110 -0
- perplexity.py +312 -0
- utils.py +11 -0
__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: str = "perplexity"
|
2 |
+
|
3 |
+
from .utils import *
|
4 |
+
from .labs import Labs
|
5 |
+
from .perplexity import Perplexity
|
labs.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from os import listdir
|
2 |
+
from uuid import uuid4
|
3 |
+
from time import sleep, time
|
4 |
+
from threading import Thread
|
5 |
+
from json import loads, dumps
|
6 |
+
from random import getrandbits
|
7 |
+
from websocket import WebSocketApp
|
8 |
+
from requests import Session, get, post
|
9 |
+
|
10 |
+
class Labs:
|
11 |
+
def __init__(self) -> None:
|
12 |
+
self.history: list = []
|
13 |
+
self.session: Session = Session()
|
14 |
+
self.user_agent: dict = { "User-Agent": "Ask/2.2.1/334 (iOS; iPhone) isiOSOnMac/false", "X-Client-Name": "Perplexity-iOS" }
|
15 |
+
self.session.headers.update(self.user_agent)
|
16 |
+
self._init_session_without_login()
|
17 |
+
|
18 |
+
self.t: str = self._get_t()
|
19 |
+
self.sid: str = self._get_sid()
|
20 |
+
|
21 |
+
self.queue: list = []
|
22 |
+
self.finished: bool = True
|
23 |
+
|
24 |
+
assert self._ask_anonymous_user(), "failed to ask anonymous user"
|
25 |
+
self.ws: WebSocketApp = self._init_websocket()
|
26 |
+
self.ws_thread: Thread = Thread(target=self.ws.run_forever).start()
|
27 |
+
self._auth_session()
|
28 |
+
|
29 |
+
while not (self.ws.sock and self.ws.sock.connected):
|
30 |
+
sleep(0.01)
|
31 |
+
|
32 |
+
def _init_session_without_login(self) -> None:
|
33 |
+
self.session.get(url=f"https://www.perplexity.ai/search/{str(uuid4())}")
|
34 |
+
self.session.headers.update(self.user_agent)
|
35 |
+
|
36 |
+
def _auth_session(self) -> None:
|
37 |
+
self.session.get(url="https://www.perplexity.ai/api/auth/session")
|
38 |
+
|
39 |
+
def _get_t(self) -> str:
|
40 |
+
return format(getrandbits(32), "08x")
|
41 |
+
|
42 |
+
def _get_sid(self) -> str:
|
43 |
+
return loads(self.session.get(
|
44 |
+
url=f"https://labs-api.perplexity.ai/socket.io/?transport=polling&EIO=4"
|
45 |
+
).text[1:])["sid"]
|
46 |
+
|
47 |
+
def _ask_anonymous_user(self) -> bool:
|
48 |
+
response = self.session.post(
|
49 |
+
url=f"https://labs-api.perplexity.ai/socket.io/?EIO=4&transport=polling&t={self.t}&sid={self.sid}",
|
50 |
+
data="40{\"jwt\":\"anonymous-ask-user\"}"
|
51 |
+
).text
|
52 |
+
|
53 |
+
return response == "OK"
|
54 |
+
|
55 |
+
def _get_cookies_str(self) -> str:
|
56 |
+
cookies = ""
|
57 |
+
for key, value in self.session.cookies.get_dict().items():
|
58 |
+
cookies += f"{key}={value}; "
|
59 |
+
return cookies[:-2]
|
60 |
+
|
61 |
+
def _init_websocket(self) -> WebSocketApp:
|
62 |
+
def on_open(ws: WebSocketApp) -> None:
|
63 |
+
ws.send("2probe")
|
64 |
+
ws.send("5")
|
65 |
+
|
66 |
+
def on_message(ws: WebSocketApp, message: str) -> None:
|
67 |
+
if message == "2":
|
68 |
+
ws.send("3")
|
69 |
+
elif message.startswith("42"):
|
70 |
+
message = loads(message[2:])[1]
|
71 |
+
if "status" not in message:
|
72 |
+
self.queue.append(message)
|
73 |
+
elif message["status"] == "completed":
|
74 |
+
self.finished = True
|
75 |
+
self.history.append({"role": "assistant", "content": message["output"], "priority": 0})
|
76 |
+
elif message["status"] == "failed":
|
77 |
+
self.finished = True
|
78 |
+
|
79 |
+
headers: dict = self.user_agent
|
80 |
+
headers["Cookie"] = self._get_cookies_str()
|
81 |
+
|
82 |
+
return WebSocketApp(
|
83 |
+
url=f"wss://labs-api.perplexity.ai/socket.io/?EIO=4&transport=websocket&sid={self.sid}",
|
84 |
+
header=headers,
|
85 |
+
on_open=on_open,
|
86 |
+
on_message=on_message,
|
87 |
+
on_error=lambda ws, err: print(f"websocket error: {err}")
|
88 |
+
)
|
89 |
+
|
90 |
+
def _c(self, prompt: str, model: str) -> dict:
|
91 |
+
assert self.finished, "already searching"
|
92 |
+
assert model in ["codellama-34b-instruct", "llama-2-7b-chat", "llama-2-13b-chat", "llama-2-70b-chat", "mistral-7b-instruct", "pplx-70b-chat"]
|
93 |
+
self.finished = False
|
94 |
+
self.history.append({"role": "user", "content": prompt, "priority": 0})
|
95 |
+
self.ws.send("42[\"perplexity_playground\",{\"version\":\"2.1\",\"source\":\"default\",\"model\":\"" + model + "\",\"messages\":" + dumps(self.history) + "}]")
|
96 |
+
|
97 |
+
def chat(self, prompt: str, model: str = "llama-2-7b-chat") -> dict:
|
98 |
+
self._c(prompt, model)
|
99 |
+
|
100 |
+
while (not self.finished) or (len(self.queue) != 0):
|
101 |
+
if len(self.queue) > 0:
|
102 |
+
yield self.queue.pop(0)
|
103 |
+
|
104 |
+
def chat_sync(self, prompt: str, model: str = "llama-2-7b-chat") -> dict:
|
105 |
+
self._c(prompt, model)
|
106 |
+
|
107 |
+
while not self.finished:
|
108 |
+
pass
|
109 |
+
|
110 |
+
return self.queue.pop(-1)
|
111 |
+
|
112 |
+
def close(self) -> None:
|
113 |
+
self.ws.close()
|
onkbtest.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import re
|
5 |
+
import json
|
6 |
+
import requests
|
7 |
+
from io import StringIO
|
8 |
+
from perplexity import Perplexity
|
9 |
+
import codecs
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
st.set_page_config(layout="wide")
|
14 |
+
def dataread(kk, pasteduse,perplex_use):
|
15 |
+
|
16 |
+
if pasteduse != '':
|
17 |
+
data = pd.read_csv(StringIO(pasteduse),sep='\t',header=0)
|
18 |
+
data['Gene']=data['Gene_name']
|
19 |
+
data['Protein Change']=data['Protein Change'].apply(lambda x: str(x).replace('p.',''))
|
20 |
+
|
21 |
+
st.write('The data you input is the following:')
|
22 |
+
st.write(data)
|
23 |
+
|
24 |
+
all_onc=[]
|
25 |
+
all_perplex=[]
|
26 |
+
all_query=[]
|
27 |
+
querygeneprotein=[]
|
28 |
+
for index,row in data.iterrows():
|
29 |
+
|
30 |
+
if str(row['Gene']) != 'nan' and str(row['Protein Change']) != 'nan':
|
31 |
+
|
32 |
+
if str(row['Protein Change']) != 'nan':
|
33 |
+
d1=requests.get("https://www.oncokb.org/api/v1/annotate/mutations/byProteinChange?hugoSymbol="+row['Gene']+"&alteration="+row['Protein Change']+"&tumorType="+kk, headers={'Accept': 'application/json',"Authorization": 'Bearer 64f4aa64-2509-4500-994b-1f2a38422d44'})
|
34 |
+
if perplex_use:
|
35 |
+
query="what drugs are used to treat "+ row['Gene']+" "+row['Protein Change']+" in "+kk+"?"
|
36 |
+
perplexity = Perplexity()
|
37 |
+
answer=perplexity.search(query)
|
38 |
+
all_perplex.append(answer)
|
39 |
+
all_query.append(query)
|
40 |
+
all_onc.append(d1.content)
|
41 |
+
|
42 |
+
querygeneprotein.append(row['Gene']+"&alteration="+row['Protein Change'])
|
43 |
+
#if perplex_use:
|
44 |
+
# perplexity.close()
|
45 |
+
|
46 |
+
return all_onc,all_perplex,all_query,querygeneprotein
|
47 |
+
|
48 |
+
|
49 |
+
with st.form(key='parameters'):
|
50 |
+
|
51 |
+
|
52 |
+
texttomatch=st.text_input('text to match',value='')
|
53 |
+
pasteduse=st.text_area('paste text to search',value='')
|
54 |
+
perplex_use=st.checkbox('Use Perplexity',value=False)
|
55 |
+
abbrev_perplex=st.checkbox('Abbreviate Perplexity',value=True)
|
56 |
+
submit_button = st.form_submit_button(label='Submit')
|
57 |
+
if submit_button:
|
58 |
+
|
59 |
+
if pasteduse != '':
|
60 |
+
kk,perplexity, query,querygeneprotein=dataread(texttomatch,pasteduse,perplex_use)
|
61 |
+
dictionary_of_json_fda={}
|
62 |
+
dictionary_of_json_text={}
|
63 |
+
dictionary_of_json_text_perplexity={}
|
64 |
+
dictionary_of_json_query={}
|
65 |
+
dictionary_of_drugs={}
|
66 |
+
for i in np.arange(0,len(kk)):
|
67 |
+
output = codecs.decode(kk[i])
|
68 |
+
dictionary_of_json_fda[querygeneprotein[i]]=json.loads(output).get('highestFdaLevel')
|
69 |
+
#dictionary_of_drugs[querygeneprotein[i]]=json.loads(output).get('treatments').get('drugs').get('DrugName')
|
70 |
+
dictionary_of_json_text[querygeneprotein[i]]=json.loads(output)
|
71 |
+
dictionary_of_json_text_perplexity[querygeneprotein[i]]=perplexity[i]
|
72 |
+
dictionary_of_json_query[querygeneprotein[i]]=query[i]
|
73 |
+
ord_dict=sorted(dictionary_of_json_fda.items(), key=lambda item: str(item[1]))
|
74 |
+
|
75 |
+
tt=st.tabs(pd.DataFrame(ord_dict).apply(lambda n: str(n[0])+' '+str(n[1]),axis=1).to_list())
|
76 |
+
counter=0
|
77 |
+
|
78 |
+
for i in ord_dict:
|
79 |
+
with tt[counter]:
|
80 |
+
st.write('The query was '+i[0])
|
81 |
+
st.write('The results were:')
|
82 |
+
if dictionary_of_json_text[i[0]]['highestFdaLevel'] != '':
|
83 |
+
st.write('Drugs in this result')
|
84 |
+
drugnames=[]
|
85 |
+
for d in dictionary_of_json_text[i[0]]['treatments']:
|
86 |
+
for dd in d['drugs']:
|
87 |
+
drugnames.append(dd['drugName'])
|
88 |
+
for m in (set(drugnames)):
|
89 |
+
st.write(m)
|
90 |
+
st.write(str(len(set(drugnames)))+' drugs in this result')
|
91 |
+
if perplex_use:
|
92 |
+
st.write('The perplexity query was '+dictionary_of_json_query[i[0]])
|
93 |
+
|
94 |
+
|
95 |
+
for jj in dictionary_of_json_text_perplexity[i[0]]:
|
96 |
+
forout=jj
|
97 |
+
st.write('answer from perplexity')
|
98 |
+
st.write(forout["answer"])
|
99 |
+
for m in (set(drugnames)):
|
100 |
+
if(re.findall(m,forout["answer"])) !=[]:
|
101 |
+
st.write('YES BOTH '+m+' is in the answer of perplexity')
|
102 |
+
else:
|
103 |
+
st.write('NO JUST ONCOKB '+m+' is not in the answer of perplexity')
|
104 |
+
st.write('all oncokb results')
|
105 |
+
st.json(dictionary_of_json_text[i[0]])
|
106 |
+
if perplex_use and abbrev_perplex ==False:
|
107 |
+
st.write('All results from perplexity:')
|
108 |
+
st.write(forout)
|
109 |
+
#breakpoint()
|
110 |
+
counter=counter+1
|
perplexity.py
ADDED
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Iterable, Dict
|
2 |
+
|
3 |
+
from os import listdir
|
4 |
+
from uuid import uuid4
|
5 |
+
from time import sleep, time
|
6 |
+
from threading import Thread
|
7 |
+
from json import loads, dumps
|
8 |
+
from random import getrandbits
|
9 |
+
from websocket import WebSocketApp
|
10 |
+
from requests import Session, get, post
|
11 |
+
|
12 |
+
class Perplexity:
|
13 |
+
def __init__(self, email: str = None) -> None:
|
14 |
+
self.session: Session = Session()
|
15 |
+
self.user_agent: dict = { "User-Agent": "Ask/2.4.1/224 (iOS; iPhone; Version 17.1) isiOSOnMac/false", "X-Client-Name": "Perplexity-iOS" }
|
16 |
+
self.session.headers.update(self.user_agent)
|
17 |
+
|
18 |
+
if email and ".perplexity_session" in listdir():
|
19 |
+
self._recover_session(email)
|
20 |
+
else:
|
21 |
+
self._init_session_without_login()
|
22 |
+
|
23 |
+
if email:
|
24 |
+
self._login(email)
|
25 |
+
|
26 |
+
self.email: str = email
|
27 |
+
self.t: str = self._get_t()
|
28 |
+
self.sid: str = self._get_sid()
|
29 |
+
|
30 |
+
self.n: int = 1
|
31 |
+
self.base: int = 420
|
32 |
+
self.queue: list = []
|
33 |
+
self.finished: bool = True
|
34 |
+
self.last_uuid: str = None
|
35 |
+
self.backend_uuid: str = None # unused because we can't yet follow-up questions
|
36 |
+
self.frontend_session_id: str = str(uuid4())
|
37 |
+
|
38 |
+
assert self._ask_anonymous_user(), "failed to ask anonymous user"
|
39 |
+
self.ws: WebSocketApp = self._init_websocket()
|
40 |
+
self.ws_thread: Thread = Thread(target=self.ws.run_forever).start()
|
41 |
+
self._auth_session()
|
42 |
+
|
43 |
+
while not (self.ws.sock and self.ws.sock.connected):
|
44 |
+
sleep(0.01)
|
45 |
+
|
46 |
+
def _recover_session(self, email: str) -> None:
|
47 |
+
with open(".perplexity_session", "r") as f:
|
48 |
+
perplexity_session: dict = loads(f.read())
|
49 |
+
|
50 |
+
if email in perplexity_session:
|
51 |
+
self.session.cookies.update(perplexity_session[email])
|
52 |
+
else:
|
53 |
+
self._login(email, perplexity_session)
|
54 |
+
|
55 |
+
def _login(self, email: str, ps: dict = None) -> None:
|
56 |
+
self.session.post(url="https://www.perplexity.ai/api/auth/signin-email", data={"email": email})
|
57 |
+
|
58 |
+
email_link: str = str(input("paste the link you received by email: "))
|
59 |
+
self.session.get(email_link)
|
60 |
+
|
61 |
+
if ps:
|
62 |
+
ps[email] = self.session.cookies.get_dict()
|
63 |
+
else:
|
64 |
+
ps = {email: self.session.cookies.get_dict()}
|
65 |
+
|
66 |
+
with open(".perplexity_session", "w") as f:
|
67 |
+
f.write(dumps(ps))
|
68 |
+
|
69 |
+
def _init_session_without_login(self) -> None:
|
70 |
+
self.session.get(url=f"https://www.perplexity.ai/search/{str(uuid4())}")
|
71 |
+
self.session.headers.update(self.user_agent)
|
72 |
+
|
73 |
+
def _auth_session(self) -> None:
|
74 |
+
self.session.get(url="https://www.perplexity.ai/api/auth/session")
|
75 |
+
|
76 |
+
def _get_t(self) -> str:
|
77 |
+
return format(getrandbits(32), "08x")
|
78 |
+
|
79 |
+
def _get_sid(self) -> str:
|
80 |
+
return loads(self.session.get(
|
81 |
+
url=f"https://www.perplexity.ai/socket.io/?EIO=4&transport=polling&t={self.t}"
|
82 |
+
).text[1:])["sid"]
|
83 |
+
|
84 |
+
def _ask_anonymous_user(self) -> bool:
|
85 |
+
response = self.session.post(
|
86 |
+
url=f"https://www.perplexity.ai/socket.io/?EIO=4&transport=polling&t={self.t}&sid={self.sid}",
|
87 |
+
data="40{\"jwt\":\"anonymous-ask-user\"}"
|
88 |
+
).text
|
89 |
+
|
90 |
+
return response == "OK"
|
91 |
+
|
92 |
+
def _start_interaction(self) -> None:
|
93 |
+
self.finished = False
|
94 |
+
|
95 |
+
if self.n == 9:
|
96 |
+
self.n = 0
|
97 |
+
self.base *= 10
|
98 |
+
else:
|
99 |
+
self.n += 1
|
100 |
+
|
101 |
+
self.queue = []
|
102 |
+
|
103 |
+
def _get_cookies_str(self) -> str:
|
104 |
+
cookies = ""
|
105 |
+
for key, value in self.session.cookies.get_dict().items():
|
106 |
+
cookies += f"{key}={value}; "
|
107 |
+
return cookies[:-2]
|
108 |
+
|
109 |
+
def _write_file_url(self, filename: str, file_url: str) -> None:
|
110 |
+
if ".perplexity_files_url" in listdir():
|
111 |
+
with open(".perplexity_files_url", "r") as f:
|
112 |
+
perplexity_files_url: dict = loads(f.read())
|
113 |
+
else:
|
114 |
+
perplexity_files_url: dict = {}
|
115 |
+
|
116 |
+
perplexity_files_url[filename] = file_url
|
117 |
+
|
118 |
+
with open(".perplexity_files_url", "w") as f:
|
119 |
+
f.write(dumps(perplexity_files_url))
|
120 |
+
|
121 |
+
def _init_websocket(self) -> WebSocketApp:
|
122 |
+
def on_open(ws: WebSocketApp) -> None:
|
123 |
+
ws.send("2probe")
|
124 |
+
ws.send("5")
|
125 |
+
|
126 |
+
def on_message(ws: WebSocketApp, message: str) -> None:
|
127 |
+
if message == "2":
|
128 |
+
ws.send("3")
|
129 |
+
elif not self.finished:
|
130 |
+
if message.startswith("42"):
|
131 |
+
message : list = loads(message[2:])
|
132 |
+
content: dict = message[1]
|
133 |
+
if "mode" in content and content["mode"] == "copilot":
|
134 |
+
content["copilot_answer"] = loads(content["text"])
|
135 |
+
elif "mode" in content:
|
136 |
+
content.update(loads(content["text"]))
|
137 |
+
content.pop("text")
|
138 |
+
if (not ("final" in content and content["final"])) or ("status" in content and content["status"] == "completed"):
|
139 |
+
self.queue.append(content)
|
140 |
+
if message[0] == "query_answered":
|
141 |
+
self.last_uuid = content["uuid"]
|
142 |
+
self.finished = True
|
143 |
+
elif message.startswith("43"):
|
144 |
+
message: dict = loads(message[3:])[0]
|
145 |
+
if ("uuid" in message and message["uuid"] != self.last_uuid) or "uuid" not in message:
|
146 |
+
self.queue.append(message)
|
147 |
+
self.finished = True
|
148 |
+
|
149 |
+
return WebSocketApp(
|
150 |
+
url=f"wss://www.perplexity.ai/socket.io/?EIO=4&transport=websocket&sid={self.sid}",
|
151 |
+
header=self.user_agent,
|
152 |
+
cookie=self._get_cookies_str(),
|
153 |
+
on_open=on_open,
|
154 |
+
on_message=on_message,
|
155 |
+
on_error=lambda ws, err: print(f"websocket error: {err}")
|
156 |
+
)
|
157 |
+
|
158 |
+
def _s(self, query: str, mode: str = "concise", search_focus: str = "internet", attachments: list[str] = [], language: str = "en-GB", in_page: str = None, in_domain: str = None) -> None:
|
159 |
+
assert self.finished, "already searching"
|
160 |
+
assert mode in ["concise", "copilot"], "invalid mode"
|
161 |
+
assert len(attachments) <= 4, "too many attachments: max 4"
|
162 |
+
assert search_focus in ["internet", "scholar", "writing", "wolfram", "youtube", "reddit"], "invalid search focus"
|
163 |
+
|
164 |
+
if in_page:
|
165 |
+
search_focus = "in_page"
|
166 |
+
if in_domain:
|
167 |
+
search_focus = "in_domain"
|
168 |
+
|
169 |
+
self._start_interaction()
|
170 |
+
ws_message: str = f"{self.base + self.n}" + dumps([
|
171 |
+
"perplexity_ask",
|
172 |
+
query,
|
173 |
+
{
|
174 |
+
"version": "2.1",
|
175 |
+
"source": "default", # "ios"
|
176 |
+
"frontend_session_id": self.frontend_session_id,
|
177 |
+
"language": language,
|
178 |
+
"timezone": "CET",
|
179 |
+
"attachments": attachments,
|
180 |
+
"search_focus": search_focus,
|
181 |
+
"frontend_uuid": str(uuid4()),
|
182 |
+
"mode": mode,
|
183 |
+
# "use_inhouse_model": True
|
184 |
+
"in_page": in_page,
|
185 |
+
"in_domain": in_domain
|
186 |
+
}
|
187 |
+
])
|
188 |
+
|
189 |
+
self.ws.send(ws_message)
|
190 |
+
|
191 |
+
def search(self, query: str, mode: str = "concise", search_focus: str = "internet", attachments: list[str] = [], language: str = "en-GB", timeout: float = 30) -> Iterable[Dict]:
|
192 |
+
self._s(query, mode, search_focus, attachments, language)
|
193 |
+
|
194 |
+
start_time: float = time()
|
195 |
+
while (not self.finished) or len(self.queue) != 0:
|
196 |
+
if timeout and time() - start_time > timeout:
|
197 |
+
self.finished = True
|
198 |
+
return {"error": "timeout"}
|
199 |
+
if len(self.queue) != 0:
|
200 |
+
yield self.queue.pop(0)
|
201 |
+
|
202 |
+
def search_sync(self, query: str, mode: str = "concise", search_focus: str = "internet", attachments: list[str] = [], language: str = "en-GB", timeout: float = 30) -> dict:
|
203 |
+
self._s(query, mode, search_focus, attachments, language)
|
204 |
+
|
205 |
+
start_time: float = time()
|
206 |
+
while not self.finished:
|
207 |
+
if timeout and time() - start_time > timeout:
|
208 |
+
self.finished = True
|
209 |
+
return {"error": "timeout"}
|
210 |
+
|
211 |
+
return self.queue.pop(-1)
|
212 |
+
|
213 |
+
def upload(self, filename: str) -> str:
|
214 |
+
assert self.finished, "already searching"
|
215 |
+
assert filename.split(".")[-1] in ["txt", "pdf"], "invalid file format"
|
216 |
+
|
217 |
+
if filename.startswith("http"):
|
218 |
+
file = get(filename).content
|
219 |
+
else:
|
220 |
+
with open(filename, "rb") as f:
|
221 |
+
file = f.read()
|
222 |
+
|
223 |
+
self._start_interaction()
|
224 |
+
ws_message: str = f"{self.base + self.n}" + dumps([
|
225 |
+
"get_upload_url",
|
226 |
+
{
|
227 |
+
"version": "2.1",
|
228 |
+
"source": "default",
|
229 |
+
"content_type": "text/plain" if filename.split(".")[-1] == "txt" else "application/pdf",
|
230 |
+
}
|
231 |
+
])
|
232 |
+
|
233 |
+
self.ws.send(ws_message)
|
234 |
+
|
235 |
+
while not self.finished or len(self.queue) != 0:
|
236 |
+
if len(self.queue) != 0:
|
237 |
+
upload_data = self.queue.pop(0)
|
238 |
+
|
239 |
+
assert not upload_data["rate_limited"], "rate limited"
|
240 |
+
|
241 |
+
post(
|
242 |
+
url=upload_data["url"],
|
243 |
+
files={
|
244 |
+
"acl": (None, upload_data["fields"]["acl"]),
|
245 |
+
"Content-Type": (None, upload_data["fields"]["Content-Type"]),
|
246 |
+
"key": (None, upload_data["fields"]["key"]),
|
247 |
+
"AWSAccessKeyId": (None, upload_data["fields"]["AWSAccessKeyId"]),
|
248 |
+
"x-amz-security-token": (None, upload_data["fields"]["x-amz-security-token"]),
|
249 |
+
"policy": (None, upload_data["fields"]["policy"]),
|
250 |
+
"signature": (None, upload_data["fields"]["signature"]),
|
251 |
+
"file": (filename, file)
|
252 |
+
}
|
253 |
+
)
|
254 |
+
|
255 |
+
file_url: str = upload_data["url"] + upload_data["fields"]["key"].split("$")[0] + filename
|
256 |
+
|
257 |
+
self._write_file_url(filename, file_url)
|
258 |
+
|
259 |
+
return file_url
|
260 |
+
|
261 |
+
def threads(self, query: str = None, limit: int = None) -> list[dict]:
|
262 |
+
assert self.email, "not logged in"
|
263 |
+
assert self.finished, "already searching"
|
264 |
+
|
265 |
+
if not limit: limit = 20
|
266 |
+
data: dict = {"version": "2.1", "source": "default", "limit": limit, "offset": 0}
|
267 |
+
if query: data["search_term"] = query
|
268 |
+
|
269 |
+
self._start_interaction()
|
270 |
+
ws_message: str = f"{self.base + self.n}" + dumps([
|
271 |
+
"list_ask_threads",
|
272 |
+
data
|
273 |
+
])
|
274 |
+
|
275 |
+
self.ws.send(ws_message)
|
276 |
+
|
277 |
+
while not self.finished or len(self.queue) != 0:
|
278 |
+
if len(self.queue) != 0:
|
279 |
+
return self.queue.pop(0)
|
280 |
+
|
281 |
+
def list_autosuggest(self, query: str = "", search_focus: str = "internet") -> list[dict]:
|
282 |
+
assert self.finished, "already searching"
|
283 |
+
|
284 |
+
self._start_interaction()
|
285 |
+
ws_message: str = f"{self.base + self.n}" + dumps([
|
286 |
+
"list_autosuggest",
|
287 |
+
query,
|
288 |
+
{
|
289 |
+
"has_attachment": False,
|
290 |
+
"search_focus": search_focus,
|
291 |
+
"source": "default",
|
292 |
+
"version": "2.1"
|
293 |
+
}
|
294 |
+
])
|
295 |
+
|
296 |
+
self.ws.send(ws_message)
|
297 |
+
|
298 |
+
while not self.finished or len(self.queue) != 0:
|
299 |
+
if len(self.queue) != 0:
|
300 |
+
return self.queue.pop(0)
|
301 |
+
|
302 |
+
def close(self) -> None:
|
303 |
+
self.ws.close()
|
304 |
+
|
305 |
+
if self.email:
|
306 |
+
with open(".perplexity_session", "r") as f:
|
307 |
+
perplexity_session: dict = loads(f.read())
|
308 |
+
|
309 |
+
perplexity_session[self.email] = self.session.cookies.get_dict()
|
310 |
+
|
311 |
+
with open(".perplexity_session", "w") as f:
|
312 |
+
f.write(dumps(perplexity_session))
|
utils.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Iterable, Dict
|
2 |
+
|
3 |
+
def return_just_next_token(answer: Iterable[Dict]) -> str:
|
4 |
+
length = 0
|
5 |
+
for partial_answer in answer:
|
6 |
+
if "answer" in partial_answer:
|
7 |
+
yield partial_answer["answer"][length:]
|
8 |
+
length = len(partial_answer["answer"])
|
9 |
+
elif "output" in partial_answer:
|
10 |
+
yield partial_answer["output"][length:]
|
11 |
+
length = len(partial_answer["output"])
|