juancho72h commited on
Commit
30cb161
·
verified ·
1 Parent(s): e20e23d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +98 -63
  2. requirements.txt +41 -1
app.py CHANGED
@@ -1,63 +1,98 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pinecone
3
+ import openai
4
+ import gradio as gr
5
+ import torch
6
+ from dotenv import load_dotenv
7
+ from pinecone import Pinecone
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+
10
+ # Detect GPU availability and set device
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ print(f"Running on device: {device}")
13
+
14
+ # Suppress specific warning about clean_up_tokenization_spaces
15
+ import warnings
16
+ warnings.filterwarnings("ignore", category=FutureWarning, message="clean_up_tokenization_spaces was not set")
17
+
18
+ # Load environment variables
19
+ load_dotenv()
20
+
21
+ # Access Pinecone and OpenAI API keys from environment variables
22
+ pinecone_api_key = os.getenv("PINECONE_API_KEY")
23
+ openai.api_key = os.getenv("OPENAI_API_KEY")
24
+ index_name = "amtrak-rmm-image-text"
25
+
26
+ # Initialize Pinecone using a class-based method
27
+ pc = Pinecone(api_key=pinecone_api_key)
28
+
29
+ # Check if the index exists, if not, create it
30
+ def initialize_pinecone_index(index_name):
31
+ available_indexes = pc.list_indexes().names()
32
+ if index_name not in available_indexes:
33
+ print(f"Index '{index_name}' does not exist.")
34
+ # Create the index here if necessary for ZeroGPU usage
35
+ return pc.Index(index_name)
36
+
37
+ index = initialize_pinecone_index(index_name)
38
+
39
+ # Initialize HuggingFace embedding model
40
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4")
41
+
42
+ # Function to interact with Pinecone and OpenAI GPT-4
43
+ def get_model_response(human_input, chat_history=None):
44
+ try:
45
+ # Fetching and embedding query (move tensors to GPU)
46
+ query_embedding = torch.tensor(embedding_model.embed_query(human_input)).to(device)
47
+
48
+ # Convert NumPy array to list before passing it to Pinecone or any API that requires JSON-serializable data
49
+ query_embedding = query_embedding.cpu().numpy().tolist()
50
+
51
+ # Query Pinecone index using CPU or GPU for fast vector search (if supported)
52
+ search_results = index.query(vector=query_embedding, top_k=2, include_metadata=True)
53
+
54
+ context_list, images = [], []
55
+ for ind, result in enumerate(search_results['matches']):
56
+ document_content = result.get('metadata', {}).get('content', 'No content found')
57
+ image_url = result.get('metadata', {}).get('image_path', None)
58
+ figure_desc = result.get('metadata', {}).get('figure_description', '')
59
+
60
+ context_list.append(f"Document {ind+1}: {document_content}")
61
+
62
+ if image_url and figure_desc:
63
+ images.append((figure_desc, image_url))
64
+
65
+ context_string = '\n\n'.join(context_list)
66
+ messages = [{"role": "system", "content": "You are a helpful assistant."},
67
+ {"role": "user", "content": f"Here is some context:\n{context_string}\n\nUser's question: {human_input}"}]
68
+
69
+ # Generate response using OpenAI GPT-3.5 Turbo for faster responses
70
+ response = openai.ChatCompletion.create(
71
+ model="gpt-3.5-turbo",
72
+ messages=messages,
73
+ max_tokens=500,
74
+ temperature=0.5
75
+ )
76
+ output_text = response['choices'][0]['message']['content'].strip()
77
+ return output_text, images
78
+ except Exception as e:
79
+ return f"Error invoking model: {str(e)}", []
80
+
81
+ # Function to format text and images for display
82
+ def get_model_response_with_images(human_input, chat_history=None):
83
+ output_text, images = get_model_response(human_input, chat_history)
84
+ if images:
85
+ image_output = "".join([f"\n\n**{figure_desc}**\n![{figure_desc}]({image_path})" for figure_desc, image_path in images])
86
+ return output_text + image_output
87
+ return output_text
88
+
89
+ # Set up Gradio interface
90
+ gr_interface = gr.ChatInterface(
91
+ fn=get_model_response_with_images,
92
+ title="Maintenance Assistant",
93
+ description="Ask questions related to the RMMM documents."
94
+ )
95
+
96
+ # Ensure ZeroGPU or Hugging Face Spaces handles launching properly
97
+ if __name__ == "__main__":
98
+ gr_interface.launch()
requirements.txt CHANGED
@@ -1 +1,41 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ openai==0.28
3
+ pinecone-client==5.0.1
4
+ langchain==0.3.1 # Ensure compatibility with your current LangChain version
5
+ langchain-huggingface==0.1.0 # Updated for HuggingFaceEmbeddings
6
+ langchain-community==0.3.1 # Updated LangChain community version
7
+ boto3==1.35.27
8
+ gradio==4.36.1
9
+ python-dotenv==1.0.1
10
+ sentence-transformers==3.1.1
11
+ transformers==4.45.0
12
+ poppler-utils==0.1.0
13
+ uvicorn==0.30.6
14
+ fastapi==0.112.4
15
+
16
+ # Additional dependencies for specific functionality
17
+ altair==5.4.1
18
+ authlib==1.3.2
19
+ cryptography==43.0.1
20
+ psutil==5.9.8
21
+ spaces==0.30.2
22
+ itsdangerous==2.2.0
23
+ pydub==0.25.1
24
+ matplotlib==3.9.2
25
+ numpy==1.26.4
26
+ scikit-learn==1.5.2
27
+ scipy==1.14.1
28
+ sqlalchemy==2.0.35
29
+ tenacity==8.5.0
30
+ requests==2.32.3
31
+ torch==2.4.0
32
+ pandas
33
+
34
+ # Dependency for Pinecone plugin inference
35
+ pinecone-plugin-inference==1.1.0
36
+
37
+ # Update langchain-text-splitters for compatibility with langchain==0.3.1
38
+ langchain-text-splitters==0.3.0
39
+
40
+ # Updated for langchain-huggingface embedding handling
41
+ langchain-huggingface==0.1.0