File size: 5,148 Bytes
9e773ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import openai
import pinecone
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
import boto3
import os
from time import sleep
from dotenv import load_dotenv
import gradio as gr

# Load environment variables
load_dotenv()

# Load OpenAI and Pinecone API keys from environment variables
openai.api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")

# Download the combined extracted text file from S3
s3_client = boto3.client('s3',
                         aws_access_key_id=aws_access_key,
                         aws_secret_access_key=aws_secret_key,
                         region_name='us-east-1')

bucket_name = 'amtrak-superliner-ai-poc'  # Replace with your S3 bucket name
txt_file_name = 'combined_extracted_text.txt'  # Name of the text file stored in S3
local_txt_path = f'/tmp/{txt_file_name}'  # Temporary location to store the file locally

# Download the text file from S3
s3_client.download_file(bucket_name, txt_file_name, local_txt_path)

# Load the extracted text from the text file
with open(local_txt_path, 'r') as f:
    doc = f.read()

# Split the document into smaller chunks (increase chunk size as needed)
text_splitter = CharacterTextSplitter(separator='\n', chunk_size=2000, chunk_overlap=500)
docs = [Document(page_content=doc)]
split_docs = text_splitter.split_documents(docs)

# Initialize the HuggingFace SciBERT model for embedding
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4")

# Create embeddings for the document chunks
doc_embeddings = [embedding_model.embed_query(doc.page_content) for doc in split_docs]

# Initialize Pinecone client using the old structure you prefer
pc = pinecone.Pinecone(api_key=pinecone_api_key)

# Create Pinecone index if it doesn't exist
index_name = "amtrak-acela-ai-demo"
embedding_dim = 768  # For SciBERT model
if index_name not in pc.list_indexes().names():
    # Create Pinecone index if it doesn't exist
    pc.create_index(
        name=index_name,
        dimension=embedding_dim,
        metric="cosine",
        spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1")
    )

# Connect to the Pinecone index
index = pc.Index(index_name)

# Upload document embeddings to Pinecone with metadata
for i, doc in enumerate(split_docs):
    index.upsert(vectors=[(str(i), doc_embeddings[i], {'content': doc.page_content})])

# Set up conversation memory
memory = ConversationBufferMemory()

# Define a prompt template for retrieval-augmented generation (RAG)
RAG_PROMPT_TEMPLATE = '''

Here is some important context that can help inform the Human's question:



{context}



Human: {human_input}



Please provide a specific and accurate answer based on the provided context.

Assistant:

'''

PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)

def get_model_response(human_input, chat_history=None):  # Add the second argument to handle chat history
    try:
        # Step 1: Embed the user input
        query_embedding = embedding_model.embed_query(human_input)

        # Step 2: Query Pinecone using the embedding vector
        search_results = index.query(
            vector=query_embedding,
            top_k=5,
            include_metadata=True  # Ensures metadata is included in the results
        )

        # Step 3: Extract relevant context (actual document content) from the search results
        context_list = []
        for ind, result in enumerate(search_results['matches']):
            document_content = result.get('metadata', {}).get('content', 'No content found')
            context_list.append(f"Document {ind+1}: {document_content}")

        # Combine context into a string
        context_string = '\n\n'.join(context_list)

        # Step 4: Call OpenAI ChatCompletion API for responses
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Here is some context:\n{context_string}\n\nUser's question: {human_input}"}
        ]

        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            max_tokens=400,
            temperature=0.7
        )

        # Extract and return the model’s output
        output_text = response['choices'][0]['message']['content'].strip()
        return output_text

    except Exception as e:
        return f"Error invoking model: {str(e)}"

# Gradio ChatInterface
gr_interface = gr.ChatInterface(
    fn=get_model_response,
    title="Amtrak Acela RMM Maintenance Assistant",
    description="Ask questions related to the RMMM documents."
)

# Launch the Gradio app on Hugging Face Spaces
gr_interface.launch()