Spaces:

hyperdemocracy
/

hf-legisqa

Running

App Files Files Community

gabrielaltay commited on Mar 2, 2024

Commit

6c729fe

1 Parent(s): 385ebea

chain format

Browse files

Files changed (1) hide show

app.py +18 -42

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from collections import defaultdict
 import json
 import os
 import re
@@ -50,25 +51,25 @@ PROMPT_TEMPLATES = {
 {context}
-Question: {question}""",
     "v2": PREAMBLE
     + """ Each snippet starts with a header that includes a unique snippet number (snippet_num), a legis_id, and a title. Your response should cite particular snippets using legis_id and title. If you don't know how to respond, just tell the user.
 {context}
-Question: {question}""",
     "v3": PREAMBLE
     + """ Each excerpt starts with a header that includes a legis_id, and a title followed by one or more text snippets. When using text snippets in your response, you should cite the legis_id and title. If you don't know how to respond, just tell the user.
 {context}
-Question: {question}""",
     "v4": PREAMBLE
     + """ The excerpts are formatted as a JSON list. Each JSON object has "legis_id", "title", and "snippets" keys. If a snippet is useful in writing part of your response, then cite the "title" and "legis_id" in the response. If you don't know how to respond, just tell the user.
 {context}
-Query: {question}""",
 }
@@ -421,44 +422,19 @@ with query_tab:
             search_kwargs={"k": SS["n_ret_docs"], "filter": vs_filter},
         )
         prompt = PromptTemplate.from_template(SS["prompt_template"])
-        # # takes in a dict. adds context key with formatted docs
-        # rag_chain_from_docs = (
-        #     RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
-        #     | prompt
-        #     | llm
-        #     | StrOutputParser()
-        # )
-        # # takes in a query string.
-        # # passes to retriever and passthru
-        # # assign answer
-        # rag_chain_with_source = RunnableParallel(
-        #     {"context": retriever, "question": RunnablePassthrough()}
-        # ).assign(answer=rag_chain_from_docs)
-        # takes in a dict. adds context key with formatted docs
-        rag_chain_from_docs = (
-            RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
-            | prompt
-            | llm
-            | StrOutputParser()
         )
-        # takes in a query string.
-        # passes to retriever and passthru
-        # assign answer
-        rag_chain_with_source = RunnableParallel(
-            {"context": retriever, "question": RunnablePassthrough()}
-        ).assign(answer=rag_chain_from_docs)
-        print(rag_chain_with_source)
         with get_openai_callback() as cb:
-            SS["out"] = rag_chain_with_source.invoke(SS["query"])
             SS["cb"] = cb
     if "out" in SS:
@@ -476,12 +452,12 @@ with query_tab:
             st.warning(SS["cb"])
         with st.container(border=True):
-            doc_grps = group_docs(SS["out"]["context"])
             st.write(
                 "Retrieved Chunks (note that you may need to 'right click' on links in the expanders to follow them)"
             )
             for legis_id, doc_grp in doc_grps:
                 write_doc_grp(legis_id, doc_grp)
-# with st.expander("Debug doc format"):
-#    st.text_area("formatted docs", value=format_docs(SS["out"]["context"]), height=600)

 from collections import defaultdict
 import json
+from operator import itemgetter
 import os
 import re
 {context}
+Question: {query}""",
     "v2": PREAMBLE
     + """ Each snippet starts with a header that includes a unique snippet number (snippet_num), a legis_id, and a title. Your response should cite particular snippets using legis_id and title. If you don't know how to respond, just tell the user.
 {context}
+Question: {query}""",
     "v3": PREAMBLE
     + """ Each excerpt starts with a header that includes a legis_id, and a title followed by one or more text snippets. When using text snippets in your response, you should cite the legis_id and title. If you don't know how to respond, just tell the user.
 {context}
+Question: {query}""",
     "v4": PREAMBLE
     + """ The excerpts are formatted as a JSON list. Each JSON object has "legis_id", "title", and "snippets" keys. If a snippet is useful in writing part of your response, then cite the "title" and "legis_id" in the response. If you don't know how to respond, just tell the user.
 {context}
+Query: {query}""",
 }
             search_kwargs={"k": SS["n_ret_docs"], "filter": vs_filter},
         )
         prompt = PromptTemplate.from_template(SS["prompt_template"])
+        rag_chain = (
+            RunnableParallel(
+                {
+                    "docs": retriever,  # list of docs
+                    "query": RunnablePassthrough(),  # str
+                }
+            )
+            .assign(context=(lambda x: format_docs(x["docs"])))
+            .assign(answer=prompt | llm | StrOutputParser())
         )
         with get_openai_callback() as cb:
+            SS["out"] = rag_chain.invoke(SS["query"])
             SS["cb"] = cb
     if "out" in SS:
             st.warning(SS["cb"])
         with st.container(border=True):
+            doc_grps = group_docs(SS["out"]["docs"])
             st.write(
                 "Retrieved Chunks (note that you may need to 'right click' on links in the expanders to follow them)"
             )
             for legis_id, doc_grp in doc_grps:
                 write_doc_grp(legis_id, doc_grp)
+with st.expander("Debug"):
+    st.write(SS["out"])