KYA_idefics2_yalla

Runtime error

App Files Files Community

Ali-C137 commited on May 18, 2024

Commit

a7cbbd8

verified ·

1 Parent(s): 7ba5c3a

Update app_dialogue.py

Browse files

Files changed (1) hide show

app_dialogue.py +70 -32

app_dialogue.py CHANGED Viewed

@@ -8,6 +8,11 @@ subprocess.run(
     shell=True,
 )
 import copy
 import spaces
@@ -25,7 +30,6 @@ import gradio as gr
 from transformers import AutoProcessor, TextIteratorStreamer
 from transformers import Idefics2ForConditionalGeneration
 DEVICE = torch.device("cuda")
 MODELS = {
     "idefics2-8b-chatty": Idefics2ForConditionalGeneration.from_pretrained(
@@ -40,6 +44,66 @@ PROCESSOR = AutoProcessor.from_pretrained(
     # "Ali-C137/idefics2-8b-chatty-yalla",
 )
 # Should change this section for the finetuned model
 SYSTEM_PROMPT = [
     {
@@ -92,12 +156,10 @@ EXAMPLES = [
 # BOT_AVATAR = "IDEFICS_logo.png"
 BOT_AVATAR = "YALLA_logo.png"
 # Chatbot utils
 def turn_is_pure_media(turn):
     return turn[1] is None
 def load_image_from_url(url):
     with urllib.request.urlopen(url) as response:
         image_data = response.read()
@@ -105,7 +167,6 @@ def load_image_from_url(url):
         image = Image.open(image_stream)
         return image
 def img_to_bytes(image_path):
     image = Image.open(image_path).convert(mode='RGB')
     buffer = io.BytesIO()
@@ -114,7 +175,6 @@ def img_to_bytes(image_path):
     image.close()
     return img_bytes
 def format_user_prompt_with_im_history_and_system_conditioning(
     user_prompt, chat_history
 ) -> List[Dict[str, Union[List, str]]]:
@@ -179,7 +239,6 @@ def format_user_prompt_with_im_history_and_system_conditioning(
     return resulting_messages, resulting_images
 def extract_images_from_msg_list(msg_list):
     all_images = []
     for msg in msg_list:
@@ -188,8 +247,6 @@ def extract_images_from_msg_list(msg_list):
                 all_images.append(c_)
     return all_images
-# comment this call of spaces.GPU later
 @spaces.GPU(duration=60, queue=False)
 def model_inference(
     user_prompt,
@@ -214,7 +271,6 @@ def model_inference(
     )
     # Common parameters to all decoding strategies
-    # This documentation is useful to read: https://huggingface.co/docs/transformers/main/en/generation_strategies
     generation_args = {
         "max_new_tokens": max_new_tokens,
         "repetition_penalty": repetition_penalty,
@@ -233,10 +289,7 @@ def model_inference(
         generation_args["top_p"] = top_p
     # Creating model inputs
-    (
-        resulting_text,
-        resulting_images,
-    ) = format_user_prompt_with_im_history_and_system_conditioning(
         user_prompt=user_prompt,
         chat_history=chat_history,
     )
@@ -249,20 +302,17 @@ def model_inference(
     inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
     generation_args.update(inputs)
-    # # The regular non streaming generation mode
-    # _ = generation_args.pop("streamer")
-    # generated_ids = MODELS[model_selector].generate(**generation_args)
-    # generated_text = PROCESSOR.batch_decode(generated_ids[:, generation_args["input_ids"].size(-1): ], skip_special_tokens=True)[0]
-    # return generated_text
-    # The streaming generation mode
     thread = Thread(
         target=MODELS[model_selector].generate,
         kwargs=generation_args,
     )
     thread.start()
-    print("Start generating")
     acc_text = ""
     for text_token in streamer:
         time.sleep(0.04)
@@ -273,7 +323,6 @@ def model_inference(
     print("Success - generated the following text:", acc_text)
     print("-----")
 FEATURES = datasets.Features(
     {
         "model_selector": datasets.Value("string"),
@@ -287,7 +336,6 @@ FEATURES = datasets.Features(
         }
     )
 # Hyper-parameters for generation
 max_new_tokens = gr.Slider(
     minimum=8,
@@ -337,23 +385,14 @@ top_p = gr.Slider(
     info="Higher values is equivalent to sampling more low-probability tokens.",
 )
 chatbot = gr.Chatbot(
     label="YALLA-Chatty",
     avatar_images=[None, BOT_AVATAR],
     height=450,
 )
-# with gr.Blocks(
-#     fill_height=True, # Use this below !?
-#     css=""".gradio-container .avatar-container {height: 40px width: 40px !important;} #duplicate-button {margin: auto; color: white; background: #f1a139; border-radius: 100vh; margin-top: 2px; margin-bottom: 2px;}""",
-# ) as demo:
 with gr.Blocks(fill_height=True) as demo:
     gr.Markdown("# 🇲🇦 YALLA ")
-    # gr.Markdown("In this demo you'll be able to chat with YALLA, a variant of [Idefics2-8B](https://huggingface.co/HuggingFaceM4/idefics2-8b-chatty) further fine-tuned on chat datasets, and Moroccan culture 🇲🇦")
-    # gr.Markdown("If you want to learn more about Idefics2 and its variants, you can check our [blog post](https://huggingface.co/blog/idefics2).")
-    # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
-    # model selector should be set to `visbile=False` ultimately
     with gr.Row(elem_id="model_selector_row"):
         model_selector = gr.Dropdown(
             choices=MODELS.keys(),
@@ -390,7 +429,6 @@ with gr.Blocks(fill_height=True) as demo:
         fn=model_inference,
         chatbot=chatbot,
         examples=EXAMPLES,
-        # multimodal=True,
         multimodal=False,
         cache_examples=False,
         additional_inputs=[

     shell=True,
 )
+# Install RAG dependencies
+subprocess.run(
+    "pip install langchain sentence-transformers faiss-cpu",
+    shell=True,
+)
 import copy
 import spaces
 from transformers import AutoProcessor, TextIteratorStreamer
 from transformers import Idefics2ForConditionalGeneration
 DEVICE = torch.device("cuda")
 MODELS = {
     "idefics2-8b-chatty": Idefics2ForConditionalGeneration.from_pretrained(
     # "Ali-C137/idefics2-8b-chatty-yalla",
 )
+# Load the custom dataset
+knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
+# Process the documents
+source_docs = [
+    Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
+    for doc in knowledge_base
+]
+docs_processed = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(source_docs)[:1000]
+# Create embeddings and vector store
+embedding_model = HuggingFaceEmbeddings("thenlper/gte-small")
+vectordb = FAISS.from_documents(documents=docs_processed, embedding=embedding_model)
+class RetrieverTool(Tool):
+    name = "retriever"
+    description = "Retrieves documents from the knowledge base that have the closest embeddings to the input query."
+    inputs = {
+        "query": {
+            "type": "text",
+            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
+        },
+        "source": {
+            "type": "text",
+            "description": "",
+        },
+    }
+    output_type = "text"
+    def __init__(self, vectordb: VectorStore, all_sources: str, **kwargs):
+        super().__init__(**kwargs)
+        self.vectordb = vectordb
+        self.inputs["source"]["description"] = (
+            f"The source of the documents to search, as a str representation of a list. Possible values in the list are: {all_sources}. If this argument is not provided, all sources will be searched."
+        )
+    def forward(self, query: str, source: str = None) -> str:
+        assert isinstance(query, str), "Your search query must be a string"
+        if source:
+            if isinstance(source, str) and "[" not in str(source):  # if the source is not representing a list
+                source = [source]
+            source = json.loads(str(source).replace("'", '"'))
+        docs = self.vectordb.similarity_search(query, filter=({"source": source} if source else None), k=3)
+        if len(docs) == 0:
+            return "No documents found with this filtering. Try removing the source filter."
+        return "Retrieved documents:\n\n" + "\n===Document===\n".join(
+            [doc.page_content for doc in docs]
+        )
+from transformers.agents import HfEngine, ReactJsonAgent
+# Initialize the LLM engine and the agent with the retriever tool
+llm_engine = HfEngine("meta-llama/Meta-Llama-3-8B-Instruct")
+all_sources = list(set([doc.metadata["source"] for doc in docs_processed]))
+retriever_tool = RetrieverTool(vectordb, all_sources)
+agent = ReactJsonAgent(tools=[retriever_tool], llm_engine=llm_engine)
 # Should change this section for the finetuned model
 SYSTEM_PROMPT = [
     {
 # BOT_AVATAR = "IDEFICS_logo.png"
 BOT_AVATAR = "YALLA_logo.png"
 # Chatbot utils
 def turn_is_pure_media(turn):
     return turn[1] is None
 def load_image_from_url(url):
     with urllib.request.urlopen(url) as response:
         image_data = response.read()
         image = Image.open(image_stream)
         return image
 def img_to_bytes(image_path):
     image = Image.open(image_path).convert(mode='RGB')
     buffer = io.BytesIO()
     image.close()
     return img_bytes
 def format_user_prompt_with_im_history_and_system_conditioning(
     user_prompt, chat_history
 ) -> List[Dict[str, Union[List, str]]]:
     return resulting_messages, resulting_images
 def extract_images_from_msg_list(msg_list):
     all_images = []
     for msg in msg_list:
                 all_images.append(c_)
     return all_images
 @spaces.GPU(duration=60, queue=False)
 def model_inference(
     user_prompt,
     )
     # Common parameters to all decoding strategies
     generation_args = {
         "max_new_tokens": max_new_tokens,
         "repetition_penalty": repetition_penalty,
         generation_args["top_p"] = top_p
     # Creating model inputs
+    resulting_text, resulting_images = format_user_prompt_with_im_history_and_system_conditioning(
         user_prompt=user_prompt,
         chat_history=chat_history,
     )
     inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
     generation_args.update(inputs)
+    # Use the agent to perform RAG
+    agent_output = agent.run(user_prompt["text"])
+    print("Agent output:", agent_output)
+    # Stream the generated text
     thread = Thread(
         target=MODELS[model_selector].generate,
         kwargs=generation_args,
     )
     thread.start()
     acc_text = ""
     for text_token in streamer:
         time.sleep(0.04)
     print("Success - generated the following text:", acc_text)
     print("-----")
 FEATURES = datasets.Features(
     {
         "model_selector": datasets.Value("string"),
         }
     )
 # Hyper-parameters for generation
 max_new_tokens = gr.Slider(
     minimum=8,
     info="Higher values is equivalent to sampling more low-probability tokens.",
 )
 chatbot = gr.Chatbot(
     label="YALLA-Chatty",
     avatar_images=[None, BOT_AVATAR],
     height=450,
 )
 with gr.Blocks(fill_height=True) as demo:
     gr.Markdown("# 🇲🇦 YALLA ")
     with gr.Row(elem_id="model_selector_row"):
         model_selector = gr.Dropdown(
             choices=MODELS.keys(),
         fn=model_inference,
         chatbot=chatbot,
         examples=EXAMPLES,
         multimodal=False,
         cache_examples=False,
         additional_inputs=[