File size: 2,734 Bytes
88e35ba
 
 
 
2ce4730
 
5f9760f
88e35ba
5f9760f
 
2ce4730
 
 
5f9760f
2ce4730
88e35ba
 
0698053
5f9760f
2ce4730
5f9760f
 
 
 
2ce4730
5f9760f
 
 
 
 
 
 
 
 
2ce4730
5f9760f
2ce4730
 
5f9760f
 
 
 
 
2ce4730
5f9760f
 
2ce4730
 
5f9760f
 
 
2ce4730
 
8b9d9d4
5f9760f
 
 
 
2ce4730
 
 
 
 
 
 
6d80f20
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# app.py

# Install necessary libraries (Use only if these aren't already installed)
# !pip install pandas sentence-transformers transformers datasets faiss-cpu gradio

# Import libraries
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
from datasets import load_dataset
import gradio as gr

# Load the Dataset from Hugging Face and FAQ CSV
support_data = load_dataset("rjac/e-commerce-customer-support-qa")

# Load FAQ data from a local CSV file directly
# Ensure this file is in the same directory or provide the full file path
faq_data = pd.read_csv("Ecommerce_FAQs.csv")

# Preprocess and Clean Data
faq_data.rename(columns={'prompt': 'Question', 'response': 'Answer'}, inplace=True)
faq_data = faq_data[['Question', 'Answer']]
support_data_df = pd.DataFrame(support_data['train'])

# Extract question-answer pairs from the conversation field
def extract_conversation(data):
    try:
        parts = data.split("\n\n")
        question = parts[1].split(": ", 1)[1] if len(parts) > 1 else ""
        answer = parts[2].split(": ", 1)[1] if len(parts) > 2 else ""
        return pd.Series({"Question": question, "Answer": answer})
    except IndexError:
        return pd.Series({"Question": "", "Answer": ""})

# Apply extraction function
support_data_df[['Question', 'Answer']] = support_data_df['conversation'].apply(extract_conversation)

# Combine FAQ data with support data
combined_data = pd.concat([faq_data, support_data_df[['Question', 'Answer']]], ignore_index=True)

# Initialize SBERT Model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Generate and Index Embeddings for Combined Data
questions = combined_data['Question'].tolist()
embeddings = model.encode(questions, convert_to_tensor=True)

# Create FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings.cpu().numpy())

# Define Retrieval Function
def retrieve_answer(question):
    question_embedding = model.encode([question], convert_to_tensor=True)
    question_embedding_np = question_embedding.cpu().numpy()
    _, closest_index = index.search(question_embedding_np, k=1)
    best_match_idx = closest_index[0][0]
    answer = combined_data.iloc[best_match_idx]['Answer']
    return answer

# Gradio Interface
def chatbot_interface(user_input):
    response = retrieve_answer(user_input)
    return f"Bot: {response}"

# Set up Gradio Chat Interface
iface = gr.Interface(
    fn=chatbot_interface,
    inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
    outputs="text",
    title="E-commerce Support Chatbot",
    description="Ask questions about order tracking, returns, account help, and more!"
)

# Launch the Interface
iface.launch(share=True)