File size: 3,249 Bytes
2642249
0864cdf
 
2642249
 
0864cdf
 
2642249
 
0864cdf
2642249
 
 
babe37e
 
b817db2
babe37e
b817db2
 
 
 
babe37e
 
 
 
0864cdf
2642249
0864cdf
2642249
ad7c9e5
0864cdf
2642249
 
 
0864cdf
 
2642249
0864cdf
2642249
0864cdf
2642249
 
 
0864cdf
2642249
0864cdf
2642249
2d626a9
0864cdf
2642249
 
 
0864cdf
d073b51
2642249
 
 
 
 
d073b51
94e7992
2642249
94e7992
ec4c7d2
94e7992
ec4c7d2
94e7992
ec4c7d2
94e7992
 
ec4c7d2
 
 
 
 
 
94e7992
 
 
 
2642249
 
 
 
0864cdf
b817db2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import json
import gradio as gr
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from huggingface_hub import InferenceClient

# ๐Ÿ”น Load JSON Data with Colorado Food Stamp Information
DATA_FILE = "colorado_foodstamps.json"

def load_json_data():
    try:
        with open(DATA_FILE, "r", encoding="utf-8") as f:
            data = json.load(f)
        
        # Ensure data is a dictionary, not a list
        if isinstance(data, list):
            data = {str(i): str(entry) for i, entry in enumerate(data)}
        
        # Convert all values to strings
        data = {key: str(value) for key, value in data.items()}
        
        return data
    except (FileNotFoundError, ValueError) as e:
        return {"error": f"Data loading issue: {e}"}

data = load_json_data()

# ๐Ÿ”น Initialize FAISS for Searching Relevant Answers
model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")  # slower with great accuracy

def create_faiss_index(data):
    texts = list(data.values())
    embeddings = np.array([model.encode(text) for text in texts])
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return index, texts

index, texts = create_faiss_index(data)

# ๐Ÿ”น Function to Search FAISS for Relevant Answers
def search_faiss(query, top_k=1):
    query_embedding = model.encode(query).reshape(1, -1)
    distances, indices = index.search(query_embedding, top_k)
    return texts[indices[0][0]] if indices[0][0] < len(texts) else "No relevant information found."

# ๐Ÿ”น Hugging Face API for Additional Responses
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")

def get_huggingface_response(query):
    messages = [{"role": "system", "content": "Provide accurate food stamp information for Colorado."},
                {"role": "user", "content": query}]
    response = ""
    for message in client.chat_completion(messages, max_tokens=1024, stream=True, temperature=0.7, top_p=0.95):
        response += message.choices[0].delta.content
    return response

# ๐Ÿ”น Main Chatbot Function
def chatbot_response(message, history):
    relevant_info = search_faiss(message, top_k=1)  # Retrieve 1 most relevant sections

    if "No relevant information found." not in relevant_info:
        user_query_with_context = f"""
        You are an expert in Colorado SNAP (food stamp) policies. The user is asking:

        **User Question:** {message}

        ### **Relevant Policy Information Retrieved (Multiple Sources)**
        {relevant_info}

        ### **Task:**
        - **Summarize all retrieved policy information** and provide a clear, concise answer.
        - **Use bullet points** for clarity.
        - **If a rule applies, state it explicitly.**
        - **If multiple sources provide different information, clarify the distinctions.**
        - **If the policy does not fully answer the question, provide general guidance and suggest relevant keywords to search.**
        """

        return get_huggingface_response(user_query_with_context)

    return get_huggingface_response(message)

# ๐Ÿ”น Gradio Chat Interface
demo = gr.ChatInterface(chatbot_response, textbox=gr.Textbox(placeholder="Ask about Colorado food stamps..."))

demo.launch()