ccm commited on
Commit
8800019
·
verified ·
1 Parent(s): 51cc74d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +17 -9
main.py CHANGED
@@ -61,7 +61,7 @@ index.train(vectors)
61
  index.add(vectors)
62
 
63
 
64
- def preprocess(query: str, k: int) -> tuple[str, str]:
65
  """
66
  Searches the dataset for the top k most relevant papers to the query and returns a prompt and references
67
  Args:
@@ -78,13 +78,14 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
78
  prompt = (
79
  "You are an AI assistant who delights in helping people learn about research from the Design Research Collective, which is a research lab at Carnegie Mellon University led by Professor Chris McComb. "
80
  "Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_ABSTRACTS. "
81
- "Your ANSWER should contain citations to the RESEARCH_ABSTRACTS using the LaTeX \\cite command (e.g.,: \\cite{mccomb2015}). DO NOT list references at the end of the answer.\n\n"
82
- "RESEARCH_ABSTRACTS:\n```\n{{ABSTRACTS_GO_HERE}}\n```\n\n"
 
83
  "USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
84
  "ANSWER:\n"
85
  )
86
 
87
- references = "\n\n### References\n\n"
88
  research_abstracts = ""
89
 
90
  for i in range(k):
@@ -92,16 +93,20 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
92
  abstract = top_five["bib_dict"].values[i]["abstract"]
93
  url = "(https://scholar.google.com/citations?view_op=view_citation&citation_for_view=" + top_five["author_pub_id"].values[i]
94
  title = top_five["bib_dict"].values[i]["title"]
95
- authors = ", ".join(
96
- [
97
  author.split(" ")[-1]
98
  for author in top_five["bib_dict"]
99
  .values[i]["author"]
100
  .split(" and ")
101
  ]
 
 
102
  )
103
 
 
 
104
  research_abstracts += top_five["bibtex"].values[i] + "\n"
 
105
  references += (
106
  str(i + 1)
107
  + ". "
@@ -120,9 +125,9 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
120
 
121
  print(prompt)
122
 
123
- return prompt, references
124
 
125
- def postprocess(response: str, bypass_from_preprocessing: str) -> str:
126
  """
127
  Applies a postprocessing step to the LLM's response before the user receives it
128
  Args:
@@ -131,7 +136,10 @@ def postprocess(response: str, bypass_from_preprocessing: str) -> str:
131
  Returns:
132
  str: The postprocessed response
133
  """
134
- return response + bypass_from_preprocessing
 
 
 
135
 
136
 
137
  @spaces.GPU
 
61
  index.add(vectors)
62
 
63
 
64
+ def preprocess(query: str, k: int) -> tuple[str, dict]:
65
  """
66
  Searches the dataset for the top k most relevant papers to the query and returns a prompt and references
67
  Args:
 
78
  prompt = (
79
  "You are an AI assistant who delights in helping people learn about research from the Design Research Collective, which is a research lab at Carnegie Mellon University led by Professor Chris McComb. "
80
  "Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_ABSTRACTS. "
81
+ "The RESEARCH_ABSTRACTS are provided in the `.bibtex` format. Your ANSWER should contain citations to the RESEARCH_ABSTRACTS using the LaTeX `\\cite` command (e.g.,: '\\cite{mccomb2015}'). "
82
+ "DO NOT list references at the end of the answer.\n\n"
83
+ "RESEARCH_ABSTRACTS:\n```bibtex\n{{ABSTRACTS_GO_HERE}}\n```\n\n"
84
  "USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
85
  "ANSWER:\n"
86
  )
87
 
88
+ id_to_url = {}
89
  research_abstracts = ""
90
 
91
  for i in range(k):
 
93
  abstract = top_five["bib_dict"].values[i]["abstract"]
94
  url = "(https://scholar.google.com/citations?view_op=view_citation&citation_for_view=" + top_five["author_pub_id"].values[i]
95
  title = top_five["bib_dict"].values[i]["title"]
96
+ last_names = [
 
97
  author.split(" ")[-1]
98
  for author in top_five["bib_dict"]
99
  .values[i]["author"]
100
  .split(" and ")
101
  ]
102
+ authors = ", ".join(
103
+ last_names
104
  )
105
 
106
+ first_authors_last_name = last_names[0]
107
+
108
  research_abstracts += top_five["bibtex"].values[i] + "\n"
109
+ id_to_url[top_five["bib_dict"].values[i]["ID"]] = f"<a href=\"{url}\">[{first_authors_last_name} {year}]</a>"
110
  references += (
111
  str(i + 1)
112
  + ". "
 
125
 
126
  print(prompt)
127
 
128
+ return prompt, id_to_url
129
 
130
+ def postprocess(response: str, bypass_from_preprocessing: dict) -> str:
131
  """
132
  Applies a postprocessing step to the LLM's response before the user receives it
133
  Args:
 
136
  Returns:
137
  str: The postprocessed response
138
  """
139
+ for key in bypass_from_preprocessing.keys():
140
+ response = response.replace("\\cite{"+key+"}", bypass_from_preprocessing[key])
141
+
142
+ return response
143
 
144
 
145
  @spaces.GPU