Spaces:

projecte-aina
/

EADOP-RAG

Running

App Files Files Community

Marina Pliusnina commited on Apr 11, 2024

Commit

2217335

1 Parent(s): 2b8b263

first

Browse files

Files changed (9) hide show

README.md +4 -4
app.py +128 -0
gitignore +4 -0
handler.py +14 -0
input_reader.py +22 -0
rag.py +73 -0
rag_image.jpg +0 -0
requirements.txt +8 -0
utils.py +33 -0

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: EADOP RAG
-emoji: 📈
-colorFrom: green
 colorTo: yellow
 sdk: gradio
-sdk_version: 4.26.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 ---
+title: Rag
+emoji: 💻
+colorFrom: indigo
 colorTo: yellow
 sdk: gradio
+sdk_version: 4.14.0
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import os
+import gradio as gr
+from gradio.components import Textbox, Button
+# from AinaTheme import theme
+from urllib.error import HTTPError
+from rag import RAG
+from utils import setup
+setup()
+rag = RAG(
+    hf_token=os.getenv("HF_TOKEN"),
+    embeddings_model=os.getenv("EMBEDDINGS"),
+    model_name=os.getenv("MODEL"),
+)
+def generate(prompt):
+    try:
+        output = rag.get_response(prompt)
+        return output
+    except HTTPError as err:
+        if err.code == 400:
+            gr.Warning(
+                "The inference endpoint is only available Monday through Friday, from 08:00 to 20:00 CET."
+            )
+    except:
+        gr.Warning(
+            "Inference endpoint is not available right now. Please try again later."
+        )
+def submit_input(input_):
+    if input_.strip() == "":
+        gr.Warning("Not possible to inference an empty input")
+        return None
+    output = generate(input_)
+    return output
+def change_interactive(text):
+    if len(text) == 0:
+        return gr.update(interactive=True), gr.update(interactive=False)
+    return gr.update(interactive=True), gr.update(interactive=True)
+def clear():
+    return (
+        None,
+        None,
+    )
+def gradio_app():
+    with gr.Blocks() as demo:
+        with gr.Row():
+            with gr.Column(scale=0.1):
+                gr.Image("rag_image.jpg", elem_id="flor-banner", scale=1, height=256, width=256, show_label=False, show_download_button = False, show_share_button = False)
+            with gr.Column():
+                gr.Markdown(
+                    """# Retrieval-Augmented Generation (experimental)
+                    🔍 **Retrieval-Augmented Generation** (RAG) is an AI framework for improving the quality of LLM-generated responses
+                        by grounding the model on external sources of knowledge to supplement the LLM's internal representation of
+                        information. Implementing RAG in an LLM-based question answering system has two main benefits: It ensures
+                        that the model has access to the most current, reliable facts, and that users have access to the model's
+                        sources, ensuring that the information can be checked for accuracy and ultimately trusted.
+                    🎯 **Purpose:** The main purpose of this RAG is answering questions related to the [AI ACT](https://artificialintelligenceact.eu/wp-content/uploads/2024/01/AI-Act-FullText.pdf).
+                        By incorporating external knowledge sources, RAG enables the LLM to provide more informed and reliable
+                        responses specifically tailored to inquiries about it.
+                    ⚠️ **Limitations**: This version is for beta testing only. The content generated by these models is unsupervised
+                        and might be wrong. Please bear this in mind when exploring this resource.
+                    """
+                )
+        with gr.Row(equal_height=True):
+            with gr.Column(variant="panel"):
+                input_ = Textbox(
+                    lines=11,
+                    label="Input",
+                    placeholder="e.g. What is the AI Act?",
+                    # value = "Quina és la finalitat del Servei Meteorològic de Catalunya?"
+                )
+            with gr.Column(variant="panel"):
+                output = Textbox(
+                    lines=11, label="Output", interactive=False, show_copy_button=True
+                )
+                with gr.Row(variant="panel"):
+                    clear_btn = Button(
+                        "Clear",
+                    )
+                    submit_btn = Button("Submit", variant="primary", interactive=False)
+        input_.change(
+            fn=change_interactive,
+            inputs=[input_],
+            outputs=[clear_btn, submit_btn],
+            api_name=False,
+        )
+        input_.change(
+            fn=None,
+            inputs=[input_],
+            api_name=False,
+            js="""(i, m) => {
+            document.getElementById('inputlenght').textContent = i.length + '  '
+            document.getElementById('inputlenght').style.color =  (i.length > m) ? "#ef4444" : "";
+        }""",
+        )
+        clear_btn.click(
+            fn=clear, inputs=[], outputs=[input_, output], queue=False, api_name=False
+        )
+        submit_btn.click(
+            fn=submit_input, inputs=[input_], outputs=[output], api_name="get-results"
+        )
+        demo.launch(show_api=True)
+if __name__ == "__main__":
+    gradio_app()

gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+venv
+**/__pycache__
+.env
+vectorestore/

handler.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import json
+class ContentHandler():
+    content_type = "application/json"
+    accepts = "application/json"
+    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
+        input_str = json.dumps({'inputs': prompt, 'parameters': model_kwargs})
+        return input_str.encode('utf-8')
+    def transform_output(self, output: bytes) -> str:
+        response_json = json.loads(output.read().decode("utf-8"))
+        return response_json[0]["generated_text"]

input_reader.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from typing import List
+from llama_index.core.constants import DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE
+from llama_index.core.readers import SimpleDirectoryReader
+from llama_index.core.schema import Document
+from llama_index.core import Settings
+class InputReader:
+    def __init__(self, input_dir: str) -> None:
+        self.reader = SimpleDirectoryReader(input_dir=input_dir)
+    def parse_documents(
+        self,
+        show_progress: bool = True,
+        chunk_size: int = DEFAULT_CHUNK_SIZE,
+        chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
+    ) -> List[Document]:
+        Settings.chunk_size = chunk_size
+        Settings.chunk_overlap = chunk_overlap
+        documents = self.reader.load_data(show_progress=show_progress)
+        return documents

rag.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import logging
+import os
+import requests
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+class RAG:
+    NO_ANSWER_MESSAGE: str = "Sorry, I couldn't answer your question."
+    def __init__(self, hf_token, embeddings_model, model_name):
+        self.model_name = model_name
+        self.hf_token = hf_token
+        # load vectore store
+        embeddings = HuggingFaceEmbeddings(model_name=embeddings_model, model_kwargs={'device': 'cpu'})
+        self.vectore_store = FAISS.load_local("vectorestore", embeddings, allow_dangerous_deserialization=True)#, allow_dangerous_deserialization=True)
+        logging.info("RAG loaded!")
+    def get_context(self, instruction, number_of_contexts=1):
+        context = ""
+        documentos = self.vectore_store.similarity_search_with_score(instruction, k=number_of_contexts)
+        for doc in documentos:
+            context += doc[0].page_content
+        return context
+    def predict(self, instruction, context):
+        api_key = os.getenv("HF_TOKEN")
+        headers = {
+        "Accept" : "application/json",
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+        }
+        query = f"### Instruction\n{instruction}\n\n### Context\n{context}\n\n### Answer\n "
+        payload = {
+        "inputs": query,
+        "parameters": {}
+        }
+        response = requests.post(self.model_name, headers=headers, json=payload)
+        return response.json()[0]["generated_text"].split("###")[-1][8:-1]
+    def get_response(self, prompt: str) -> str:
+        context = self.get_context(prompt)
+        response = self.predict(prompt, context)
+        if not response:
+            return self.NO_ANSWER_MESSAGE
+        return response

rag_image.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio==4.14.0
+python-dotenv==1.0.0
+llama-index==0.10.14
+llama-index-embeddings-huggingface==0.1.4
+llama-index-llms-huggingface==0.1.3
+sentence-transformers
+langchain
+faiss-cpu

utils.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import logging
+import warnings
+from dotenv import load_dotenv
+from rag import RAG
+USER_INPUT = 100
+def setup():
+    load_dotenv()
+    warnings.filterwarnings("ignore")
+    logging.addLevelName(USER_INPUT, "USER_INPUT")
+    logging.basicConfig(format="[%(levelname)s]: %(message)s", level=logging.INFO)
+def interactive(model: RAG):
+    logging.info("Write `exit` when you want to stop the model.")
+    print()
+    query = ""
+    while query.lower() != "exit":
+        logging.log(USER_INPUT, "Write the query or `exit`:")
+        query = input()
+        if query.lower() == "exit":
+            break
+        response = model.get_response(query)
+        print(response, end="\n\n")