Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- __init__.py +5 -0
- labs.py +113 -0
- onkbtest.py +110 -0
- perplexity.py +312 -0
- utils.py +11 -0
__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: str = "perplexity"
|
| 2 |
+
|
| 3 |
+
from .utils import *
|
| 4 |
+
from .labs import Labs
|
| 5 |
+
from .perplexity import Perplexity
|
labs.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from os import listdir
|
| 2 |
+
from uuid import uuid4
|
| 3 |
+
from time import sleep, time
|
| 4 |
+
from threading import Thread
|
| 5 |
+
from json import loads, dumps
|
| 6 |
+
from random import getrandbits
|
| 7 |
+
from websocket import WebSocketApp
|
| 8 |
+
from requests import Session, get, post
|
| 9 |
+
|
| 10 |
+
class Labs:
|
| 11 |
+
def __init__(self) -> None:
|
| 12 |
+
self.history: list = []
|
| 13 |
+
self.session: Session = Session()
|
| 14 |
+
self.user_agent: dict = { "User-Agent": "Ask/2.2.1/334 (iOS; iPhone) isiOSOnMac/false", "X-Client-Name": "Perplexity-iOS" }
|
| 15 |
+
self.session.headers.update(self.user_agent)
|
| 16 |
+
self._init_session_without_login()
|
| 17 |
+
|
| 18 |
+
self.t: str = self._get_t()
|
| 19 |
+
self.sid: str = self._get_sid()
|
| 20 |
+
|
| 21 |
+
self.queue: list = []
|
| 22 |
+
self.finished: bool = True
|
| 23 |
+
|
| 24 |
+
assert self._ask_anonymous_user(), "failed to ask anonymous user"
|
| 25 |
+
self.ws: WebSocketApp = self._init_websocket()
|
| 26 |
+
self.ws_thread: Thread = Thread(target=self.ws.run_forever).start()
|
| 27 |
+
self._auth_session()
|
| 28 |
+
|
| 29 |
+
while not (self.ws.sock and self.ws.sock.connected):
|
| 30 |
+
sleep(0.01)
|
| 31 |
+
|
| 32 |
+
def _init_session_without_login(self) -> None:
|
| 33 |
+
self.session.get(url=f"https://www.perplexity.ai/search/{str(uuid4())}")
|
| 34 |
+
self.session.headers.update(self.user_agent)
|
| 35 |
+
|
| 36 |
+
def _auth_session(self) -> None:
|
| 37 |
+
self.session.get(url="https://www.perplexity.ai/api/auth/session")
|
| 38 |
+
|
| 39 |
+
def _get_t(self) -> str:
|
| 40 |
+
return format(getrandbits(32), "08x")
|
| 41 |
+
|
| 42 |
+
def _get_sid(self) -> str:
|
| 43 |
+
return loads(self.session.get(
|
| 44 |
+
url=f"https://labs-api.perplexity.ai/socket.io/?transport=polling&EIO=4"
|
| 45 |
+
).text[1:])["sid"]
|
| 46 |
+
|
| 47 |
+
def _ask_anonymous_user(self) -> bool:
|
| 48 |
+
response = self.session.post(
|
| 49 |
+
url=f"https://labs-api.perplexity.ai/socket.io/?EIO=4&transport=polling&t={self.t}&sid={self.sid}",
|
| 50 |
+
data="40{\"jwt\":\"anonymous-ask-user\"}"
|
| 51 |
+
).text
|
| 52 |
+
|
| 53 |
+
return response == "OK"
|
| 54 |
+
|
| 55 |
+
def _get_cookies_str(self) -> str:
|
| 56 |
+
cookies = ""
|
| 57 |
+
for key, value in self.session.cookies.get_dict().items():
|
| 58 |
+
cookies += f"{key}={value}; "
|
| 59 |
+
return cookies[:-2]
|
| 60 |
+
|
| 61 |
+
def _init_websocket(self) -> WebSocketApp:
|
| 62 |
+
def on_open(ws: WebSocketApp) -> None:
|
| 63 |
+
ws.send("2probe")
|
| 64 |
+
ws.send("5")
|
| 65 |
+
|
| 66 |
+
def on_message(ws: WebSocketApp, message: str) -> None:
|
| 67 |
+
if message == "2":
|
| 68 |
+
ws.send("3")
|
| 69 |
+
elif message.startswith("42"):
|
| 70 |
+
message = loads(message[2:])[1]
|
| 71 |
+
if "status" not in message:
|
| 72 |
+
self.queue.append(message)
|
| 73 |
+
elif message["status"] == "completed":
|
| 74 |
+
self.finished = True
|
| 75 |
+
self.history.append({"role": "assistant", "content": message["output"], "priority": 0})
|
| 76 |
+
elif message["status"] == "failed":
|
| 77 |
+
self.finished = True
|
| 78 |
+
|
| 79 |
+
headers: dict = self.user_agent
|
| 80 |
+
headers["Cookie"] = self._get_cookies_str()
|
| 81 |
+
|
| 82 |
+
return WebSocketApp(
|
| 83 |
+
url=f"wss://labs-api.perplexity.ai/socket.io/?EIO=4&transport=websocket&sid={self.sid}",
|
| 84 |
+
header=headers,
|
| 85 |
+
on_open=on_open,
|
| 86 |
+
on_message=on_message,
|
| 87 |
+
on_error=lambda ws, err: print(f"websocket error: {err}")
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
def _c(self, prompt: str, model: str) -> dict:
|
| 91 |
+
assert self.finished, "already searching"
|
| 92 |
+
assert model in ["codellama-34b-instruct", "llama-2-7b-chat", "llama-2-13b-chat", "llama-2-70b-chat", "mistral-7b-instruct", "pplx-70b-chat"]
|
| 93 |
+
self.finished = False
|
| 94 |
+
self.history.append({"role": "user", "content": prompt, "priority": 0})
|
| 95 |
+
self.ws.send("42[\"perplexity_playground\",{\"version\":\"2.1\",\"source\":\"default\",\"model\":\"" + model + "\",\"messages\":" + dumps(self.history) + "}]")
|
| 96 |
+
|
| 97 |
+
def chat(self, prompt: str, model: str = "llama-2-7b-chat") -> dict:
|
| 98 |
+
self._c(prompt, model)
|
| 99 |
+
|
| 100 |
+
while (not self.finished) or (len(self.queue) != 0):
|
| 101 |
+
if len(self.queue) > 0:
|
| 102 |
+
yield self.queue.pop(0)
|
| 103 |
+
|
| 104 |
+
def chat_sync(self, prompt: str, model: str = "llama-2-7b-chat") -> dict:
|
| 105 |
+
self._c(prompt, model)
|
| 106 |
+
|
| 107 |
+
while not self.finished:
|
| 108 |
+
pass
|
| 109 |
+
|
| 110 |
+
return self.queue.pop(-1)
|
| 111 |
+
|
| 112 |
+
def close(self) -> None:
|
| 113 |
+
self.ws.close()
|
onkbtest.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import re
|
| 5 |
+
import json
|
| 6 |
+
import requests
|
| 7 |
+
from io import StringIO
|
| 8 |
+
from perplexity import Perplexity
|
| 9 |
+
import codecs
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
st.set_page_config(layout="wide")
|
| 14 |
+
def dataread(kk, pasteduse,perplex_use):
|
| 15 |
+
|
| 16 |
+
if pasteduse != '':
|
| 17 |
+
data = pd.read_csv(StringIO(pasteduse),sep='\t',header=0)
|
| 18 |
+
data['Gene']=data['Gene_name']
|
| 19 |
+
data['Protein Change']=data['Protein Change'].apply(lambda x: str(x).replace('p.',''))
|
| 20 |
+
|
| 21 |
+
st.write('The data you input is the following:')
|
| 22 |
+
st.write(data)
|
| 23 |
+
|
| 24 |
+
all_onc=[]
|
| 25 |
+
all_perplex=[]
|
| 26 |
+
all_query=[]
|
| 27 |
+
querygeneprotein=[]
|
| 28 |
+
for index,row in data.iterrows():
|
| 29 |
+
|
| 30 |
+
if str(row['Gene']) != 'nan' and str(row['Protein Change']) != 'nan':
|
| 31 |
+
|
| 32 |
+
if str(row['Protein Change']) != 'nan':
|
| 33 |
+
d1=requests.get("https://www.oncokb.org/api/v1/annotate/mutations/byProteinChange?hugoSymbol="+row['Gene']+"&alteration="+row['Protein Change']+"&tumorType="+kk, headers={'Accept': 'application/json',"Authorization": 'Bearer 64f4aa64-2509-4500-994b-1f2a38422d44'})
|
| 34 |
+
if perplex_use:
|
| 35 |
+
query="what drugs are used to treat "+ row['Gene']+" "+row['Protein Change']+" in "+kk+"?"
|
| 36 |
+
perplexity = Perplexity()
|
| 37 |
+
answer=perplexity.search(query)
|
| 38 |
+
all_perplex.append(answer)
|
| 39 |
+
all_query.append(query)
|
| 40 |
+
all_onc.append(d1.content)
|
| 41 |
+
|
| 42 |
+
querygeneprotein.append(row['Gene']+"&alteration="+row['Protein Change'])
|
| 43 |
+
#if perplex_use:
|
| 44 |
+
# perplexity.close()
|
| 45 |
+
|
| 46 |
+
return all_onc,all_perplex,all_query,querygeneprotein
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
with st.form(key='parameters'):
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
texttomatch=st.text_input('text to match',value='')
|
| 53 |
+
pasteduse=st.text_area('paste text to search',value='')
|
| 54 |
+
perplex_use=st.checkbox('Use Perplexity',value=False)
|
| 55 |
+
abbrev_perplex=st.checkbox('Abbreviate Perplexity',value=True)
|
| 56 |
+
submit_button = st.form_submit_button(label='Submit')
|
| 57 |
+
if submit_button:
|
| 58 |
+
|
| 59 |
+
if pasteduse != '':
|
| 60 |
+
kk,perplexity, query,querygeneprotein=dataread(texttomatch,pasteduse,perplex_use)
|
| 61 |
+
dictionary_of_json_fda={}
|
| 62 |
+
dictionary_of_json_text={}
|
| 63 |
+
dictionary_of_json_text_perplexity={}
|
| 64 |
+
dictionary_of_json_query={}
|
| 65 |
+
dictionary_of_drugs={}
|
| 66 |
+
for i in np.arange(0,len(kk)):
|
| 67 |
+
output = codecs.decode(kk[i])
|
| 68 |
+
dictionary_of_json_fda[querygeneprotein[i]]=json.loads(output).get('highestFdaLevel')
|
| 69 |
+
#dictionary_of_drugs[querygeneprotein[i]]=json.loads(output).get('treatments').get('drugs').get('DrugName')
|
| 70 |
+
dictionary_of_json_text[querygeneprotein[i]]=json.loads(output)
|
| 71 |
+
dictionary_of_json_text_perplexity[querygeneprotein[i]]=perplexity[i]
|
| 72 |
+
dictionary_of_json_query[querygeneprotein[i]]=query[i]
|
| 73 |
+
ord_dict=sorted(dictionary_of_json_fda.items(), key=lambda item: str(item[1]))
|
| 74 |
+
|
| 75 |
+
tt=st.tabs(pd.DataFrame(ord_dict).apply(lambda n: str(n[0])+' '+str(n[1]),axis=1).to_list())
|
| 76 |
+
counter=0
|
| 77 |
+
|
| 78 |
+
for i in ord_dict:
|
| 79 |
+
with tt[counter]:
|
| 80 |
+
st.write('The query was '+i[0])
|
| 81 |
+
st.write('The results were:')
|
| 82 |
+
if dictionary_of_json_text[i[0]]['highestFdaLevel'] != '':
|
| 83 |
+
st.write('Drugs in this result')
|
| 84 |
+
drugnames=[]
|
| 85 |
+
for d in dictionary_of_json_text[i[0]]['treatments']:
|
| 86 |
+
for dd in d['drugs']:
|
| 87 |
+
drugnames.append(dd['drugName'])
|
| 88 |
+
for m in (set(drugnames)):
|
| 89 |
+
st.write(m)
|
| 90 |
+
st.write(str(len(set(drugnames)))+' drugs in this result')
|
| 91 |
+
if perplex_use:
|
| 92 |
+
st.write('The perplexity query was '+dictionary_of_json_query[i[0]])
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
for jj in dictionary_of_json_text_perplexity[i[0]]:
|
| 96 |
+
forout=jj
|
| 97 |
+
st.write('answer from perplexity')
|
| 98 |
+
st.write(forout["answer"])
|
| 99 |
+
for m in (set(drugnames)):
|
| 100 |
+
if(re.findall(m,forout["answer"])) !=[]:
|
| 101 |
+
st.write('YES BOTH '+m+' is in the answer of perplexity')
|
| 102 |
+
else:
|
| 103 |
+
st.write('NO JUST ONCOKB '+m+' is not in the answer of perplexity')
|
| 104 |
+
st.write('all oncokb results')
|
| 105 |
+
st.json(dictionary_of_json_text[i[0]])
|
| 106 |
+
if perplex_use and abbrev_perplex ==False:
|
| 107 |
+
st.write('All results from perplexity:')
|
| 108 |
+
st.write(forout)
|
| 109 |
+
#breakpoint()
|
| 110 |
+
counter=counter+1
|
perplexity.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Iterable, Dict
|
| 2 |
+
|
| 3 |
+
from os import listdir
|
| 4 |
+
from uuid import uuid4
|
| 5 |
+
from time import sleep, time
|
| 6 |
+
from threading import Thread
|
| 7 |
+
from json import loads, dumps
|
| 8 |
+
from random import getrandbits
|
| 9 |
+
from websocket import WebSocketApp
|
| 10 |
+
from requests import Session, get, post
|
| 11 |
+
|
| 12 |
+
class Perplexity:
|
| 13 |
+
def __init__(self, email: str = None) -> None:
|
| 14 |
+
self.session: Session = Session()
|
| 15 |
+
self.user_agent: dict = { "User-Agent": "Ask/2.4.1/224 (iOS; iPhone; Version 17.1) isiOSOnMac/false", "X-Client-Name": "Perplexity-iOS" }
|
| 16 |
+
self.session.headers.update(self.user_agent)
|
| 17 |
+
|
| 18 |
+
if email and ".perplexity_session" in listdir():
|
| 19 |
+
self._recover_session(email)
|
| 20 |
+
else:
|
| 21 |
+
self._init_session_without_login()
|
| 22 |
+
|
| 23 |
+
if email:
|
| 24 |
+
self._login(email)
|
| 25 |
+
|
| 26 |
+
self.email: str = email
|
| 27 |
+
self.t: str = self._get_t()
|
| 28 |
+
self.sid: str = self._get_sid()
|
| 29 |
+
|
| 30 |
+
self.n: int = 1
|
| 31 |
+
self.base: int = 420
|
| 32 |
+
self.queue: list = []
|
| 33 |
+
self.finished: bool = True
|
| 34 |
+
self.last_uuid: str = None
|
| 35 |
+
self.backend_uuid: str = None # unused because we can't yet follow-up questions
|
| 36 |
+
self.frontend_session_id: str = str(uuid4())
|
| 37 |
+
|
| 38 |
+
assert self._ask_anonymous_user(), "failed to ask anonymous user"
|
| 39 |
+
self.ws: WebSocketApp = self._init_websocket()
|
| 40 |
+
self.ws_thread: Thread = Thread(target=self.ws.run_forever).start()
|
| 41 |
+
self._auth_session()
|
| 42 |
+
|
| 43 |
+
while not (self.ws.sock and self.ws.sock.connected):
|
| 44 |
+
sleep(0.01)
|
| 45 |
+
|
| 46 |
+
def _recover_session(self, email: str) -> None:
|
| 47 |
+
with open(".perplexity_session", "r") as f:
|
| 48 |
+
perplexity_session: dict = loads(f.read())
|
| 49 |
+
|
| 50 |
+
if email in perplexity_session:
|
| 51 |
+
self.session.cookies.update(perplexity_session[email])
|
| 52 |
+
else:
|
| 53 |
+
self._login(email, perplexity_session)
|
| 54 |
+
|
| 55 |
+
def _login(self, email: str, ps: dict = None) -> None:
|
| 56 |
+
self.session.post(url="https://www.perplexity.ai/api/auth/signin-email", data={"email": email})
|
| 57 |
+
|
| 58 |
+
email_link: str = str(input("paste the link you received by email: "))
|
| 59 |
+
self.session.get(email_link)
|
| 60 |
+
|
| 61 |
+
if ps:
|
| 62 |
+
ps[email] = self.session.cookies.get_dict()
|
| 63 |
+
else:
|
| 64 |
+
ps = {email: self.session.cookies.get_dict()}
|
| 65 |
+
|
| 66 |
+
with open(".perplexity_session", "w") as f:
|
| 67 |
+
f.write(dumps(ps))
|
| 68 |
+
|
| 69 |
+
def _init_session_without_login(self) -> None:
|
| 70 |
+
self.session.get(url=f"https://www.perplexity.ai/search/{str(uuid4())}")
|
| 71 |
+
self.session.headers.update(self.user_agent)
|
| 72 |
+
|
| 73 |
+
def _auth_session(self) -> None:
|
| 74 |
+
self.session.get(url="https://www.perplexity.ai/api/auth/session")
|
| 75 |
+
|
| 76 |
+
def _get_t(self) -> str:
|
| 77 |
+
return format(getrandbits(32), "08x")
|
| 78 |
+
|
| 79 |
+
def _get_sid(self) -> str:
|
| 80 |
+
return loads(self.session.get(
|
| 81 |
+
url=f"https://www.perplexity.ai/socket.io/?EIO=4&transport=polling&t={self.t}"
|
| 82 |
+
).text[1:])["sid"]
|
| 83 |
+
|
| 84 |
+
def _ask_anonymous_user(self) -> bool:
|
| 85 |
+
response = self.session.post(
|
| 86 |
+
url=f"https://www.perplexity.ai/socket.io/?EIO=4&transport=polling&t={self.t}&sid={self.sid}",
|
| 87 |
+
data="40{\"jwt\":\"anonymous-ask-user\"}"
|
| 88 |
+
).text
|
| 89 |
+
|
| 90 |
+
return response == "OK"
|
| 91 |
+
|
| 92 |
+
def _start_interaction(self) -> None:
|
| 93 |
+
self.finished = False
|
| 94 |
+
|
| 95 |
+
if self.n == 9:
|
| 96 |
+
self.n = 0
|
| 97 |
+
self.base *= 10
|
| 98 |
+
else:
|
| 99 |
+
self.n += 1
|
| 100 |
+
|
| 101 |
+
self.queue = []
|
| 102 |
+
|
| 103 |
+
def _get_cookies_str(self) -> str:
|
| 104 |
+
cookies = ""
|
| 105 |
+
for key, value in self.session.cookies.get_dict().items():
|
| 106 |
+
cookies += f"{key}={value}; "
|
| 107 |
+
return cookies[:-2]
|
| 108 |
+
|
| 109 |
+
def _write_file_url(self, filename: str, file_url: str) -> None:
|
| 110 |
+
if ".perplexity_files_url" in listdir():
|
| 111 |
+
with open(".perplexity_files_url", "r") as f:
|
| 112 |
+
perplexity_files_url: dict = loads(f.read())
|
| 113 |
+
else:
|
| 114 |
+
perplexity_files_url: dict = {}
|
| 115 |
+
|
| 116 |
+
perplexity_files_url[filename] = file_url
|
| 117 |
+
|
| 118 |
+
with open(".perplexity_files_url", "w") as f:
|
| 119 |
+
f.write(dumps(perplexity_files_url))
|
| 120 |
+
|
| 121 |
+
def _init_websocket(self) -> WebSocketApp:
|
| 122 |
+
def on_open(ws: WebSocketApp) -> None:
|
| 123 |
+
ws.send("2probe")
|
| 124 |
+
ws.send("5")
|
| 125 |
+
|
| 126 |
+
def on_message(ws: WebSocketApp, message: str) -> None:
|
| 127 |
+
if message == "2":
|
| 128 |
+
ws.send("3")
|
| 129 |
+
elif not self.finished:
|
| 130 |
+
if message.startswith("42"):
|
| 131 |
+
message : list = loads(message[2:])
|
| 132 |
+
content: dict = message[1]
|
| 133 |
+
if "mode" in content and content["mode"] == "copilot":
|
| 134 |
+
content["copilot_answer"] = loads(content["text"])
|
| 135 |
+
elif "mode" in content:
|
| 136 |
+
content.update(loads(content["text"]))
|
| 137 |
+
content.pop("text")
|
| 138 |
+
if (not ("final" in content and content["final"])) or ("status" in content and content["status"] == "completed"):
|
| 139 |
+
self.queue.append(content)
|
| 140 |
+
if message[0] == "query_answered":
|
| 141 |
+
self.last_uuid = content["uuid"]
|
| 142 |
+
self.finished = True
|
| 143 |
+
elif message.startswith("43"):
|
| 144 |
+
message: dict = loads(message[3:])[0]
|
| 145 |
+
if ("uuid" in message and message["uuid"] != self.last_uuid) or "uuid" not in message:
|
| 146 |
+
self.queue.append(message)
|
| 147 |
+
self.finished = True
|
| 148 |
+
|
| 149 |
+
return WebSocketApp(
|
| 150 |
+
url=f"wss://www.perplexity.ai/socket.io/?EIO=4&transport=websocket&sid={self.sid}",
|
| 151 |
+
header=self.user_agent,
|
| 152 |
+
cookie=self._get_cookies_str(),
|
| 153 |
+
on_open=on_open,
|
| 154 |
+
on_message=on_message,
|
| 155 |
+
on_error=lambda ws, err: print(f"websocket error: {err}")
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
def _s(self, query: str, mode: str = "concise", search_focus: str = "internet", attachments: list[str] = [], language: str = "en-GB", in_page: str = None, in_domain: str = None) -> None:
|
| 159 |
+
assert self.finished, "already searching"
|
| 160 |
+
assert mode in ["concise", "copilot"], "invalid mode"
|
| 161 |
+
assert len(attachments) <= 4, "too many attachments: max 4"
|
| 162 |
+
assert search_focus in ["internet", "scholar", "writing", "wolfram", "youtube", "reddit"], "invalid search focus"
|
| 163 |
+
|
| 164 |
+
if in_page:
|
| 165 |
+
search_focus = "in_page"
|
| 166 |
+
if in_domain:
|
| 167 |
+
search_focus = "in_domain"
|
| 168 |
+
|
| 169 |
+
self._start_interaction()
|
| 170 |
+
ws_message: str = f"{self.base + self.n}" + dumps([
|
| 171 |
+
"perplexity_ask",
|
| 172 |
+
query,
|
| 173 |
+
{
|
| 174 |
+
"version": "2.1",
|
| 175 |
+
"source": "default", # "ios"
|
| 176 |
+
"frontend_session_id": self.frontend_session_id,
|
| 177 |
+
"language": language,
|
| 178 |
+
"timezone": "CET",
|
| 179 |
+
"attachments": attachments,
|
| 180 |
+
"search_focus": search_focus,
|
| 181 |
+
"frontend_uuid": str(uuid4()),
|
| 182 |
+
"mode": mode,
|
| 183 |
+
# "use_inhouse_model": True
|
| 184 |
+
"in_page": in_page,
|
| 185 |
+
"in_domain": in_domain
|
| 186 |
+
}
|
| 187 |
+
])
|
| 188 |
+
|
| 189 |
+
self.ws.send(ws_message)
|
| 190 |
+
|
| 191 |
+
def search(self, query: str, mode: str = "concise", search_focus: str = "internet", attachments: list[str] = [], language: str = "en-GB", timeout: float = 30) -> Iterable[Dict]:
|
| 192 |
+
self._s(query, mode, search_focus, attachments, language)
|
| 193 |
+
|
| 194 |
+
start_time: float = time()
|
| 195 |
+
while (not self.finished) or len(self.queue) != 0:
|
| 196 |
+
if timeout and time() - start_time > timeout:
|
| 197 |
+
self.finished = True
|
| 198 |
+
return {"error": "timeout"}
|
| 199 |
+
if len(self.queue) != 0:
|
| 200 |
+
yield self.queue.pop(0)
|
| 201 |
+
|
| 202 |
+
def search_sync(self, query: str, mode: str = "concise", search_focus: str = "internet", attachments: list[str] = [], language: str = "en-GB", timeout: float = 30) -> dict:
|
| 203 |
+
self._s(query, mode, search_focus, attachments, language)
|
| 204 |
+
|
| 205 |
+
start_time: float = time()
|
| 206 |
+
while not self.finished:
|
| 207 |
+
if timeout and time() - start_time > timeout:
|
| 208 |
+
self.finished = True
|
| 209 |
+
return {"error": "timeout"}
|
| 210 |
+
|
| 211 |
+
return self.queue.pop(-1)
|
| 212 |
+
|
| 213 |
+
def upload(self, filename: str) -> str:
|
| 214 |
+
assert self.finished, "already searching"
|
| 215 |
+
assert filename.split(".")[-1] in ["txt", "pdf"], "invalid file format"
|
| 216 |
+
|
| 217 |
+
if filename.startswith("http"):
|
| 218 |
+
file = get(filename).content
|
| 219 |
+
else:
|
| 220 |
+
with open(filename, "rb") as f:
|
| 221 |
+
file = f.read()
|
| 222 |
+
|
| 223 |
+
self._start_interaction()
|
| 224 |
+
ws_message: str = f"{self.base + self.n}" + dumps([
|
| 225 |
+
"get_upload_url",
|
| 226 |
+
{
|
| 227 |
+
"version": "2.1",
|
| 228 |
+
"source": "default",
|
| 229 |
+
"content_type": "text/plain" if filename.split(".")[-1] == "txt" else "application/pdf",
|
| 230 |
+
}
|
| 231 |
+
])
|
| 232 |
+
|
| 233 |
+
self.ws.send(ws_message)
|
| 234 |
+
|
| 235 |
+
while not self.finished or len(self.queue) != 0:
|
| 236 |
+
if len(self.queue) != 0:
|
| 237 |
+
upload_data = self.queue.pop(0)
|
| 238 |
+
|
| 239 |
+
assert not upload_data["rate_limited"], "rate limited"
|
| 240 |
+
|
| 241 |
+
post(
|
| 242 |
+
url=upload_data["url"],
|
| 243 |
+
files={
|
| 244 |
+
"acl": (None, upload_data["fields"]["acl"]),
|
| 245 |
+
"Content-Type": (None, upload_data["fields"]["Content-Type"]),
|
| 246 |
+
"key": (None, upload_data["fields"]["key"]),
|
| 247 |
+
"AWSAccessKeyId": (None, upload_data["fields"]["AWSAccessKeyId"]),
|
| 248 |
+
"x-amz-security-token": (None, upload_data["fields"]["x-amz-security-token"]),
|
| 249 |
+
"policy": (None, upload_data["fields"]["policy"]),
|
| 250 |
+
"signature": (None, upload_data["fields"]["signature"]),
|
| 251 |
+
"file": (filename, file)
|
| 252 |
+
}
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
file_url: str = upload_data["url"] + upload_data["fields"]["key"].split("$")[0] + filename
|
| 256 |
+
|
| 257 |
+
self._write_file_url(filename, file_url)
|
| 258 |
+
|
| 259 |
+
return file_url
|
| 260 |
+
|
| 261 |
+
def threads(self, query: str = None, limit: int = None) -> list[dict]:
|
| 262 |
+
assert self.email, "not logged in"
|
| 263 |
+
assert self.finished, "already searching"
|
| 264 |
+
|
| 265 |
+
if not limit: limit = 20
|
| 266 |
+
data: dict = {"version": "2.1", "source": "default", "limit": limit, "offset": 0}
|
| 267 |
+
if query: data["search_term"] = query
|
| 268 |
+
|
| 269 |
+
self._start_interaction()
|
| 270 |
+
ws_message: str = f"{self.base + self.n}" + dumps([
|
| 271 |
+
"list_ask_threads",
|
| 272 |
+
data
|
| 273 |
+
])
|
| 274 |
+
|
| 275 |
+
self.ws.send(ws_message)
|
| 276 |
+
|
| 277 |
+
while not self.finished or len(self.queue) != 0:
|
| 278 |
+
if len(self.queue) != 0:
|
| 279 |
+
return self.queue.pop(0)
|
| 280 |
+
|
| 281 |
+
def list_autosuggest(self, query: str = "", search_focus: str = "internet") -> list[dict]:
|
| 282 |
+
assert self.finished, "already searching"
|
| 283 |
+
|
| 284 |
+
self._start_interaction()
|
| 285 |
+
ws_message: str = f"{self.base + self.n}" + dumps([
|
| 286 |
+
"list_autosuggest",
|
| 287 |
+
query,
|
| 288 |
+
{
|
| 289 |
+
"has_attachment": False,
|
| 290 |
+
"search_focus": search_focus,
|
| 291 |
+
"source": "default",
|
| 292 |
+
"version": "2.1"
|
| 293 |
+
}
|
| 294 |
+
])
|
| 295 |
+
|
| 296 |
+
self.ws.send(ws_message)
|
| 297 |
+
|
| 298 |
+
while not self.finished or len(self.queue) != 0:
|
| 299 |
+
if len(self.queue) != 0:
|
| 300 |
+
return self.queue.pop(0)
|
| 301 |
+
|
| 302 |
+
def close(self) -> None:
|
| 303 |
+
self.ws.close()
|
| 304 |
+
|
| 305 |
+
if self.email:
|
| 306 |
+
with open(".perplexity_session", "r") as f:
|
| 307 |
+
perplexity_session: dict = loads(f.read())
|
| 308 |
+
|
| 309 |
+
perplexity_session[self.email] = self.session.cookies.get_dict()
|
| 310 |
+
|
| 311 |
+
with open(".perplexity_session", "w") as f:
|
| 312 |
+
f.write(dumps(perplexity_session))
|
utils.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Iterable, Dict
|
| 2 |
+
|
| 3 |
+
def return_just_next_token(answer: Iterable[Dict]) -> str:
|
| 4 |
+
length = 0
|
| 5 |
+
for partial_answer in answer:
|
| 6 |
+
if "answer" in partial_answer:
|
| 7 |
+
yield partial_answer["answer"][length:]
|
| 8 |
+
length = len(partial_answer["answer"])
|
| 9 |
+
elif "output" in partial_answer:
|
| 10 |
+
yield partial_answer["output"][length:]
|
| 11 |
+
length = len(partial_answer["output"])
|