CamiloVega commited on
Commit
f23e4af
·
verified ·
1 Parent(s): 316a253

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -97
app.py CHANGED
@@ -11,9 +11,8 @@ from langchain.chains import RetrievalQA
11
  from langchain.prompts import PromptTemplate
12
  from langchain_community.llms import HuggingFacePipeline
13
  from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
14
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
15
  from huggingface_hub import login
16
- import bitsandbytes as bnb
17
 
18
  # Configure logging
19
  logging.basicConfig(
@@ -54,104 +53,62 @@ class RAGSystem:
54
 
55
  # Initialize embeddings
56
  self.initialize_embeddings()
57
-
58
  def initialize_embeddings(self):
59
- """Initialize embedding model."""
60
- try:
61
- device = "cuda" if torch.cuda.is_available() else "cpu"
62
- self.embeddings = HuggingFaceEmbeddings(
63
- model_name=EMBEDDING_MODEL,
64
- model_kwargs={
65
- 'device': device
66
- },
67
- encode_kwargs={
68
- 'normalize_embeddings': True
69
- }
70
- )
71
- logger.info(f"Embeddings initialized successfully on {device}")
72
- except Exception as e:
73
- logger.error(f"Error initializing embeddings: {str(e)}")
74
- raise
75
 
76
- def initialize_llm(self):
77
- """Initialize the language model and QA chain."""
78
- try:
79
- # Get Hugging Face token
80
- hf_token = os.environ.get('HUGGINGFACE_TOKEN')
81
- if not hf_token:
82
- raise ValueError("Please set HUGGINGFACE_TOKEN environment variable")
83
-
84
- # Login to Hugging Face
85
- login(token=hf_token)
86
-
87
- device = "cuda" if torch.cuda.is_available() else "cpu"
88
 
89
- # Configure model loading based on device
90
- if device == "cuda":
91
- model_config = {
92
- 'torch_dtype': torch.float16,
93
- 'device_map': "auto",
94
- }
95
- else:
96
- model_config = {
97
- 'device_map': "auto",
98
- 'low_cpu_mem_usage': True,
99
- }
100
-
101
- # Initialize tokenizer and model
102
- tokenizer = AutoTokenizer.from_pretrained(
103
- MODEL_NAME,
104
- token=hf_token,
105
- trust_remote_code=True
106
- )
107
-
108
- model = AutoModelForCausalLM.from_pretrained(
109
- MODEL_NAME,
110
- token=hf_token,
111
- trust_remote_code=True,
112
- **model_config
113
- )
114
-
115
- # Create pipeline
116
- pipe_config = {
117
- "model": model,
118
- "tokenizer": tokenizer,
119
- "max_new_tokens": 512,
120
- "temperature": 0.1,
121
- "device_map": "auto",
122
- }
123
-
124
- pipe = pipeline("text-generation", **pipe_config)
125
- llm = HuggingFacePipeline(pipeline=pipe)
126
-
127
- # Create QA chain
128
- prompt_template = """
129
- Context: {context}
130
-
131
- Based on the context above, please provide a clear and concise answer to the following question.
132
- If the information is not in the context, explicitly state so.
133
-
134
- Question: {question}
135
- """
136
-
137
- PROMPT = PromptTemplate(
138
- template=prompt_template,
139
- input_variables=["context", "question"]
140
- )
141
-
142
- self.qa_chain = RetrievalQA.from_chain_type(
143
- llm=llm,
144
- chain_type="stuff",
145
- retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
146
- return_source_documents=True,
147
- chain_type_kwargs={"prompt": PROMPT}
148
- )
149
-
150
- logger.info("LLM initialized successfully")
151
-
152
- except Exception as e:
153
- logger.error(f"Error initializing LLM: {str(e)}")
154
- raise
155
 
156
  def update_vector_store(self, new_documents: List):
157
  """Update vector store with new documents."""
@@ -174,6 +131,79 @@ def initialize_llm(self):
174
  logger.error(f"Error updating vector store: {str(e)}")
175
  raise
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  def process_upload(self, files: List[gr.File]) -> str:
178
  """Process uploaded files and initialize/update the system."""
179
  if not files:
 
11
  from langchain.prompts import PromptTemplate
12
  from langchain_community.llms import HuggingFacePipeline
13
  from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
14
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
15
  from huggingface_hub import login
 
16
 
17
  # Configure logging
18
  logging.basicConfig(
 
53
 
54
  # Initialize embeddings
55
  self.initialize_embeddings()
56
+
57
  def initialize_embeddings(self):
58
+ """Initialize embedding model."""
59
+ try:
60
+ self.embeddings = HuggingFaceEmbeddings(
61
+ model_name=EMBEDDING_MODEL,
62
+ model_kwargs={'device': self.device},
63
+ encode_kwargs={'normalize_embeddings': True}
64
+ )
65
+ logger.info(f"Embeddings initialized successfully on {self.device}")
66
+ except Exception as e:
67
+ logger.error(f"Error initializing embeddings: {str(e)}")
68
+ raise
 
 
 
 
 
69
 
70
+ def validate_file(self, file_path: str, file_size: int) -> bool:
71
+ """Validate uploaded file."""
72
+ if file_size > self.max_file_size:
73
+ raise ValueError(f"File size exceeds {self.max_file_size // 1024 // 1024}MB limit")
 
 
 
 
 
 
 
 
74
 
75
+ ext = os.path.splitext(file_path)[1].lower()
76
+ if ext not in self.supported_formats:
77
+ raise ValueError(f"Unsupported format. Supported: {', '.join(self.supported_formats)}")
78
+ return True
79
+
80
+ def process_file(self, file: gr.File) -> List:
81
+ """Process a single file and return documents."""
82
+ try:
83
+ file_path = file.name
84
+ file_size = os.path.getsize(file_path)
85
+ self.validate_file(file_path, file_size)
86
+
87
+ # Copy file to upload directory
88
+ filename = os.path.basename(file_path)
89
+ save_path = os.path.join(self.upload_folder, filename)
90
+ shutil.copy2(file_path, save_path)
91
+
92
+ # Load documents based on file type
93
+ ext = os.path.splitext(file_path)[1].lower()
94
+ if ext == '.pdf':
95
+ loader = PyPDFLoader(save_path)
96
+ elif ext == '.txt':
97
+ loader = TextLoader(save_path)
98
+ else: # .docx
99
+ loader = Docx2txtLoader(save_path)
100
+
101
+ documents = loader.load()
102
+ for doc in documents:
103
+ doc.metadata.update({
104
+ 'source': filename,
105
+ 'type': 'uploaded'
106
+ })
107
+ return documents
108
+
109
+ except Exception as e:
110
+ logger.error(f"Error processing {file_path}: {str(e)}")
111
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def update_vector_store(self, new_documents: List):
114
  """Update vector store with new documents."""
 
131
  logger.error(f"Error updating vector store: {str(e)}")
132
  raise
133
 
134
+ def initialize_llm(self):
135
+ """Initialize the language model and QA chain."""
136
+ try:
137
+ # Get Hugging Face token
138
+ hf_token = os.environ.get('HUGGINGFACE_TOKEN')
139
+ if not hf_token:
140
+ raise ValueError("Please set HUGGINGFACE_TOKEN environment variable")
141
+
142
+ # Login to Hugging Face
143
+ login(token=hf_token)
144
+
145
+ # Initialize model and tokenizer
146
+ tokenizer = AutoTokenizer.from_pretrained(
147
+ MODEL_NAME,
148
+ token=hf_token,
149
+ trust_remote_code=True
150
+ )
151
+
152
+ # Configure model loading based on device
153
+ model_config = {
154
+ 'device_map': 'auto',
155
+ 'trust_remote_code': True,
156
+ 'token': hf_token
157
+ }
158
+
159
+ if self.device == "cuda":
160
+ model_config['torch_dtype'] = torch.float16
161
+ else:
162
+ model_config['low_cpu_mem_usage'] = True
163
+
164
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, **model_config)
165
+
166
+ # Create pipeline
167
+ pipe = pipeline(
168
+ "text-generation",
169
+ model=model,
170
+ tokenizer=tokenizer,
171
+ max_new_tokens=512,
172
+ temperature=0.1,
173
+ device_map="auto"
174
+ )
175
+
176
+ llm = HuggingFacePipeline(pipeline=pipe)
177
+
178
+ # Create prompt template
179
+ prompt_template = """
180
+ Context: {context}
181
+
182
+ Based on the context above, please provide a clear and concise answer to the following question.
183
+ If the information is not in the context, explicitly state so.
184
+
185
+ Question: {question}
186
+ """
187
+
188
+ PROMPT = PromptTemplate(
189
+ template=prompt_template,
190
+ input_variables=["context", "question"]
191
+ )
192
+
193
+ self.qa_chain = RetrievalQA.from_chain_type(
194
+ llm=llm,
195
+ chain_type="stuff",
196
+ retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
197
+ return_source_documents=True,
198
+ chain_type_kwargs={"prompt": PROMPT}
199
+ )
200
+
201
+ logger.info("LLM initialized successfully")
202
+
203
+ except Exception as e:
204
+ logger.error(f"Error initializing LLM: {str(e)}")
205
+ raise
206
+
207
  def process_upload(self, files: List[gr.File]) -> str:
208
  """Process uploaded files and initialize/update the system."""
209
  if not files: