Spaces:
Running
on
Zero
Running
on
Zero
Update main.py
Browse files
main.py
CHANGED
@@ -61,7 +61,7 @@ index.train(vectors)
|
|
61 |
index.add(vectors)
|
62 |
|
63 |
|
64 |
-
def preprocess(query: str, k: int) -> tuple[str,
|
65 |
"""
|
66 |
Searches the dataset for the top k most relevant papers to the query and returns a prompt and references
|
67 |
Args:
|
@@ -78,13 +78,14 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
|
|
78 |
prompt = (
|
79 |
"You are an AI assistant who delights in helping people learn about research from the Design Research Collective, which is a research lab at Carnegie Mellon University led by Professor Chris McComb. "
|
80 |
"Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_ABSTRACTS. "
|
81 |
-
"Your ANSWER should contain citations to the RESEARCH_ABSTRACTS using the LaTeX
|
82 |
-
"
|
|
|
83 |
"USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
|
84 |
"ANSWER:\n"
|
85 |
)
|
86 |
|
87 |
-
|
88 |
research_abstracts = ""
|
89 |
|
90 |
for i in range(k):
|
@@ -92,16 +93,20 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
|
|
92 |
abstract = top_five["bib_dict"].values[i]["abstract"]
|
93 |
url = "(https://scholar.google.com/citations?view_op=view_citation&citation_for_view=" + top_five["author_pub_id"].values[i]
|
94 |
title = top_five["bib_dict"].values[i]["title"]
|
95 |
-
|
96 |
-
[
|
97 |
author.split(" ")[-1]
|
98 |
for author in top_five["bib_dict"]
|
99 |
.values[i]["author"]
|
100 |
.split(" and ")
|
101 |
]
|
|
|
|
|
102 |
)
|
103 |
|
|
|
|
|
104 |
research_abstracts += top_five["bibtex"].values[i] + "\n"
|
|
|
105 |
references += (
|
106 |
str(i + 1)
|
107 |
+ ". "
|
@@ -120,9 +125,9 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
|
|
120 |
|
121 |
print(prompt)
|
122 |
|
123 |
-
return prompt,
|
124 |
|
125 |
-
def postprocess(response: str, bypass_from_preprocessing:
|
126 |
"""
|
127 |
Applies a postprocessing step to the LLM's response before the user receives it
|
128 |
Args:
|
@@ -131,7 +136,10 @@ def postprocess(response: str, bypass_from_preprocessing: str) -> str:
|
|
131 |
Returns:
|
132 |
str: The postprocessed response
|
133 |
"""
|
134 |
-
|
|
|
|
|
|
|
135 |
|
136 |
|
137 |
@spaces.GPU
|
|
|
61 |
index.add(vectors)
|
62 |
|
63 |
|
64 |
+
def preprocess(query: str, k: int) -> tuple[str, dict]:
|
65 |
"""
|
66 |
Searches the dataset for the top k most relevant papers to the query and returns a prompt and references
|
67 |
Args:
|
|
|
78 |
prompt = (
|
79 |
"You are an AI assistant who delights in helping people learn about research from the Design Research Collective, which is a research lab at Carnegie Mellon University led by Professor Chris McComb. "
|
80 |
"Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_ABSTRACTS. "
|
81 |
+
"The RESEARCH_ABSTRACTS are provided in the `.bibtex` format. Your ANSWER should contain citations to the RESEARCH_ABSTRACTS using the LaTeX `\\cite` command (e.g.,: '\\cite{mccomb2015}'). "
|
82 |
+
"DO NOT list references at the end of the answer.\n\n"
|
83 |
+
"RESEARCH_ABSTRACTS:\n```bibtex\n{{ABSTRACTS_GO_HERE}}\n```\n\n"
|
84 |
"USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
|
85 |
"ANSWER:\n"
|
86 |
)
|
87 |
|
88 |
+
id_to_url = {}
|
89 |
research_abstracts = ""
|
90 |
|
91 |
for i in range(k):
|
|
|
93 |
abstract = top_five["bib_dict"].values[i]["abstract"]
|
94 |
url = "(https://scholar.google.com/citations?view_op=view_citation&citation_for_view=" + top_five["author_pub_id"].values[i]
|
95 |
title = top_five["bib_dict"].values[i]["title"]
|
96 |
+
last_names = [
|
|
|
97 |
author.split(" ")[-1]
|
98 |
for author in top_five["bib_dict"]
|
99 |
.values[i]["author"]
|
100 |
.split(" and ")
|
101 |
]
|
102 |
+
authors = ", ".join(
|
103 |
+
last_names
|
104 |
)
|
105 |
|
106 |
+
first_authors_last_name = last_names[0]
|
107 |
+
|
108 |
research_abstracts += top_five["bibtex"].values[i] + "\n"
|
109 |
+
id_to_url[top_five["bib_dict"].values[i]["ID"]] = f"<a href=\"{url}\">[{first_authors_last_name} {year}]</a>"
|
110 |
references += (
|
111 |
str(i + 1)
|
112 |
+ ". "
|
|
|
125 |
|
126 |
print(prompt)
|
127 |
|
128 |
+
return prompt, id_to_url
|
129 |
|
130 |
+
def postprocess(response: str, bypass_from_preprocessing: dict) -> str:
|
131 |
"""
|
132 |
Applies a postprocessing step to the LLM's response before the user receives it
|
133 |
Args:
|
|
|
136 |
Returns:
|
137 |
str: The postprocessed response
|
138 |
"""
|
139 |
+
for key in bypass_from_preprocessing.keys():
|
140 |
+
response = response.replace("\\cite{"+key+"}", bypass_from_preprocessing[key])
|
141 |
+
|
142 |
+
return response
|
143 |
|
144 |
|
145 |
@spaces.GPU
|