Spaces:

raptor1
/

Saas_Support_ChatBot

Sleeping

App Files Files Community

raptor1 commited on Jan 26

Commit

3a6ab29

verified ·

1 Parent(s): 4a90d86

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -49

app.py CHANGED Viewed

@@ -1,64 +1,95 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
 """
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

+#importing libraries
+import gdown
+import pandas as pd
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+"""The model and tokenizer must be properly initialized before we use the get_answer function otherwise it the UI will not show answers generated by the model because model has not generated any output yet."""
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+# Initialize the tokenizer and model
+tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
+model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base").to("cpu") ##using "cpu" instead of "gpu" (GPU) as deploying on HF free tier of CPU processor
+"""Loading the FAQs csv from Google drive into a Python dictionary"""
+# Download the CSV file using gdown
+file_id = "1O4CWfDo9h7MDK5KH5fktJeSTleoew6OY"  #  file ID from google drive
+gdown.download(f'https://drive.google.com/uc?id={file_id}', 'faqs.csv', quiet=False)
+"""
+Tried downloading directly from drive, did not work properly. Could have also used upload feature in Colab.
+import pandas as pd
+# Load FAQs from Google Drive.
+data_url = "https://drive.google.com/uc?export=download&id=1O4CWfDo9h7MDK5KH5fktJeSTleoew6OY"
 """
+#creating dataframe
+df = pd.read_csv(data_url)
+#Converting df to dictionary for easy retrival of Que/Ans instead of using index of dataframe.
+faqs = df.to_dict(orient='records')  # Convert to dictionary
+print("Loaded FAQs:", faqs)
+"""Chat bot logic :
+Retrieves an answer from FAQs dataset based on user input. using Google Flan-T5 model to refine the answer. Flan-T5 is a powerful transformer-based language model.
 """
+def get_answer(question):
+    # Finding the closest FAQ question using User input query
+    closest_faq = None
+    for faq in faqs:
+        if question.lower() in faq["Question"].lower():
+            closest_faq = faq
+            break
+    # If query does not match, return a default response
+    if not closest_faq:
+        return "Please contact support for further assistance. Thanks!"
+    # Using Google Flan-T5 model to refine the answer.
+    #Creating Prompt string for the model to process.
+    #{closest_faq["Answer"]} fetch only answer from FAQs as each FAQ entry contains fields "Question," "Answer," and "Category".
+    #By including 'Answer' from the FAQ,it guide the model to produce a response that is informed by existing information(context).
+    prompt = f"""
+    Question: {question}
+    Answer this question using ONLY the context below:
+    Context: {closest_faq["Answer"]}
+    """
+    #prompt string is tokenized using a HF tokenizer
+    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
+    #Flan-T5 model generates an answer based on the inputs, with a maximum length of 200 tokens.
+    #The Flan-T5 model processes the tokenized inputs and generates an answer.
+    outputs = model.generate(**inputs, max_length=200)
+    #Generated output is decoded back into human-readable text using the tokenizer, removing any special tokens.
+    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Include the category in the response
+    return f"CATEGORY :\n {closest_faq['Category']}\nANSWER :\n {answer}"
+"""Using closest_faq["Answer"] in the context provides relevant, structured information that enhances the quality of responses generated by the Flan-T5 model. It ensures that answers are based on verified content rather than relying solely on general knowledge, which is vital for applications like customer support.Cobining language processing to deliver precise and contextually appropriate answers.
+Building simple UI using Gardio
 """
+import gradio as gr
+# Simple chat interface
+demo = gr.Interface(
+    fn=get_answer,
+    inputs=gr.Textbox(label="Ask a question"),
+    outputs=gr.Textbox(label="Answer"),
+    title="SaaS Support Chatbot",
+    examples=["How do I cancel my subscription?", "I forgot my password"]
+)
+# Launchin UI in Colab
+demo.launch(share=True)  # Generates a public link like