Spaces:

ccm
/

chat-with-publications

Runtime error

App Files Files Community

ccm commited on Aug 2, 2024

Commit

8800019

verified ·

1 Parent(s): 51cc74d

Update main.py

Browse files

Files changed (1) hide show

main.py +17 -9

main.py CHANGED Viewed

@@ -61,7 +61,7 @@ index.train(vectors)
 index.add(vectors)
-def preprocess(query: str, k: int) -> tuple[str, str]:
     """
     Searches the dataset for the top k most relevant papers to the query and returns a prompt and references
     Args:
@@ -78,13 +78,14 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
     prompt = (
         "You are an AI assistant who delights in helping people learn about research from the Design Research Collective, which is a research lab at Carnegie Mellon University led by Professor Chris McComb. "
         "Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_ABSTRACTS. "
-        "Your ANSWER should contain citations to the RESEARCH_ABSTRACTS using the LaTeX \\cite command (e.g.,: \\cite{mccomb2015}). DO NOT list references at the end of the answer.\n\n"
-        "RESEARCH_ABSTRACTS:\n```\n{{ABSTRACTS_GO_HERE}}\n```\n\n"
         "USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
         "ANSWER:\n"
     )
-    references = "\n\n### References\n\n"
     research_abstracts = ""
     for i in range(k):
@@ -92,16 +93,20 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
         abstract = top_five["bib_dict"].values[i]["abstract"]
         url = "(https://scholar.google.com/citations?view_op=view_citation&citation_for_view=" + top_five["author_pub_id"].values[i]
         title = top_five["bib_dict"].values[i]["title"]
-        authors = ", ".join(
-                [
                     author.split(" ")[-1]
                     for author in top_five["bib_dict"]
                     .values[i]["author"]
                     .split(" and ")
                 ]
             )
         research_abstracts += top_five["bibtex"].values[i] + "\n"
         references += (
             str(i + 1)
             + ". "
@@ -120,9 +125,9 @@ def preprocess(query: str, k: int) -> tuple[str, str]:
     print(prompt)
-    return prompt, references
-def postprocess(response: str, bypass_from_preprocessing: str) -> str:
     """
     Applies a postprocessing step to the LLM's response before the user receives it
     Args:
@@ -131,7 +136,10 @@ def postprocess(response: str, bypass_from_preprocessing: str) -> str:
     Returns:
         str: The postprocessed response
     """
-    return response + bypass_from_preprocessing
 @spaces.GPU

 index.add(vectors)
+def preprocess(query: str, k: int) -> tuple[str, dict]:
     """
     Searches the dataset for the top k most relevant papers to the query and returns a prompt and references
     Args:
     prompt = (
         "You are an AI assistant who delights in helping people learn about research from the Design Research Collective, which is a research lab at Carnegie Mellon University led by Professor Chris McComb. "
         "Your main task is to provide an ANSWER to the USER_QUERY based on the RESEARCH_ABSTRACTS. "
+        "The RESEARCH_ABSTRACTS are provided in the `.bibtex` format. Your ANSWER should contain citations to the RESEARCH_ABSTRACTS using the LaTeX `\\cite` command (e.g.,: '\\cite{mccomb2015}'). "
+        "DO NOT list references at the end of the answer.\n\n"
+        "RESEARCH_ABSTRACTS:\n```bibtex\n{{ABSTRACTS_GO_HERE}}\n```\n\n"
         "USER_GUERY:\n{{QUERY_GOES_HERE}}\n\n"
         "ANSWER:\n"
     )
+    id_to_url = {}
     research_abstracts = ""
     for i in range(k):
         abstract = top_five["bib_dict"].values[i]["abstract"]
         url = "(https://scholar.google.com/citations?view_op=view_citation&citation_for_view=" + top_five["author_pub_id"].values[i]
         title = top_five["bib_dict"].values[i]["title"]
+        last_names = [
                     author.split(" ")[-1]
                     for author in top_five["bib_dict"]
                     .values[i]["author"]
                     .split(" and ")
                 ]
+        authors = ", ".join(
+                last_names
             )
+        first_authors_last_name = last_names[0]
         research_abstracts += top_five["bibtex"].values[i] + "\n"
+        id_to_url[top_five["bib_dict"].values[i]["ID"]] = f"<a href=\"{url}\">[{first_authors_last_name} {year}]</a>"
         references += (
             str(i + 1)
             + ". "
     print(prompt)
+    return prompt, id_to_url
+def postprocess(response: str, bypass_from_preprocessing: dict) -> str:
     """
     Applies a postprocessing step to the LLM's response before the user receives it
     Args:
     Returns:
         str: The postprocessed response
     """
+    for key in bypass_from_preprocessing.keys():
+        response = response.replace("\\cite{"+key+"}", bypass_from_preprocessing[key])
+    return response
 @spaces.GPU