Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
|
@@ -61,7 +61,7 @@ index.train(vectors)
|
|
| 61 |
index.add(vectors)
|
| 62 |
|
| 63 |
|
| 64 |
-
def preprocess(query: str, k: int) -> tuple[str,
|
| 65 |
"""
|
| 66 |
Searches the dataset for the top k most relevant papers to the query and returns a prompt and references
|
| 67 |
Args:
|
|
@@ -78,13 +78,14 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
|
|
| 78 |
prompt = (
|
| 79 |
"You are an AI assistant who delights in helping people learn about research from the Design Research Collective, which is a research lab at Carnegie Mellon University led by Professor Chris McComb. "
|
| 80 |
"Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_ABSTRACTS. "
|
| 81 |
-
"Your ANSWER should contain citations to the RESEARCH_ABSTRACTS using the LaTeX
|
| 82 |
-
"
|
|
|
|
| 83 |
"USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
|
| 84 |
"ANSWER:\n"
|
| 85 |
)
|
| 86 |
|
| 87 |
-
|
| 88 |
research_abstracts = ""
|
| 89 |
|
| 90 |
for i in range(k):
|
|
@@ -92,16 +93,20 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
|
|
| 92 |
abstract = top_five["bib_dict"].values[i]["abstract"]
|
| 93 |
url = "(https://scholar.google.com/citations?view_op=view_citation&citation_for_view=" + top_five["author_pub_id"].values[i]
|
| 94 |
title = top_five["bib_dict"].values[i]["title"]
|
| 95 |
-
|
| 96 |
-
[
|
| 97 |
author.split(" ")[-1]
|
| 98 |
for author in top_five["bib_dict"]
|
| 99 |
.values[i]["author"]
|
| 100 |
.split(" and ")
|
| 101 |
]
|
|
|
|
|
|
|
| 102 |
)
|
| 103 |
|
|
|
|
|
|
|
| 104 |
research_abstracts += top_five["bibtex"].values[i] + "\n"
|
|
|
|
| 105 |
references += (
|
| 106 |
str(i + 1)
|
| 107 |
+ ". "
|
|
@@ -120,9 +125,9 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
|
|
| 120 |
|
| 121 |
print(prompt)
|
| 122 |
|
| 123 |
-
return prompt,
|
| 124 |
|
| 125 |
-
def postprocess(response: str, bypass_from_preprocessing:
|
| 126 |
"""
|
| 127 |
Applies a postprocessing step to the LLM's response before the user receives it
|
| 128 |
Args:
|
|
@@ -131,7 +136,10 @@ def postprocess(response: str, bypass_from_preprocessing: str) -> str:
|
|
| 131 |
Returns:
|
| 132 |
str: The postprocessed response
|
| 133 |
"""
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
@spaces.GPU
|
|
|
|
| 61 |
index.add(vectors)
|
| 62 |
|
| 63 |
|
| 64 |
+
def preprocess(query: str, k: int) -> tuple[str, dict]:
|
| 65 |
"""
|
| 66 |
Searches the dataset for the top k most relevant papers to the query and returns a prompt and references
|
| 67 |
Args:
|
|
|
|
| 78 |
prompt = (
|
| 79 |
"You are an AI assistant who delights in helping people learn about research from the Design Research Collective, which is a research lab at Carnegie Mellon University led by Professor Chris McComb. "
|
| 80 |
"Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_ABSTRACTS. "
|
| 81 |
+
"The RESEARCH_ABSTRACTS are provided in the `.bibtex` format. Your ANSWER should contain citations to the RESEARCH_ABSTRACTS using the LaTeX `\\cite` command (e.g.,: '\\cite{mccomb2015}'). "
|
| 82 |
+
"DO NOT list references at the end of the answer.\n\n"
|
| 83 |
+
"RESEARCH_ABSTRACTS:\n```bibtex\n{{ABSTRACTS_GO_HERE}}\n```\n\n"
|
| 84 |
"USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
|
| 85 |
"ANSWER:\n"
|
| 86 |
)
|
| 87 |
|
| 88 |
+
id_to_url = {}
|
| 89 |
research_abstracts = ""
|
| 90 |
|
| 91 |
for i in range(k):
|
|
|
|
| 93 |
abstract = top_five["bib_dict"].values[i]["abstract"]
|
| 94 |
url = "(https://scholar.google.com/citations?view_op=view_citation&citation_for_view=" + top_five["author_pub_id"].values[i]
|
| 95 |
title = top_five["bib_dict"].values[i]["title"]
|
| 96 |
+
last_names = [
|
|
|
|
| 97 |
author.split(" ")[-1]
|
| 98 |
for author in top_five["bib_dict"]
|
| 99 |
.values[i]["author"]
|
| 100 |
.split(" and ")
|
| 101 |
]
|
| 102 |
+
authors = ", ".join(
|
| 103 |
+
last_names
|
| 104 |
)
|
| 105 |
|
| 106 |
+
first_authors_last_name = last_names[0]
|
| 107 |
+
|
| 108 |
research_abstracts += top_five["bibtex"].values[i] + "\n"
|
| 109 |
+
id_to_url[top_five["bib_dict"].values[i]["ID"]] = f"<a href=\"{url}\">[{first_authors_last_name} {year}]</a>"
|
| 110 |
references += (
|
| 111 |
str(i + 1)
|
| 112 |
+ ". "
|
|
|
|
| 125 |
|
| 126 |
print(prompt)
|
| 127 |
|
| 128 |
+
return prompt, id_to_url
|
| 129 |
|
| 130 |
+
def postprocess(response: str, bypass_from_preprocessing: dict) -> str:
|
| 131 |
"""
|
| 132 |
Applies a postprocessing step to the LLM's response before the user receives it
|
| 133 |
Args:
|
|
|
|
| 136 |
Returns:
|
| 137 |
str: The postprocessed response
|
| 138 |
"""
|
| 139 |
+
for key in bypass_from_preprocessing.keys():
|
| 140 |
+
response = response.replace("\\cite{"+key+"}", bypass_from_preprocessing[key])
|
| 141 |
+
|
| 142 |
+
return response
|
| 143 |
|
| 144 |
|
| 145 |
@spaces.GPU
|