CamiloVega commited on
Commit
316a253
·
verified ·
1 Parent(s): f91396d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -144
app.py CHANGED
@@ -56,153 +56,102 @@ class RAGSystem:
56
  self.initialize_embeddings()
57
 
58
  def initialize_embeddings(self):
59
- """Initialize embedding model."""
60
- try:
61
- self.embeddings = HuggingFaceEmbeddings(
62
- model_name=EMBEDDING_MODEL,
63
- model_kwargs={
64
- 'device': self.device,
65
- 'torch_dtype': torch.float32,
66
- }
67
- )
68
- logger.info("Embeddings initialized successfully")
69
- except Exception as e:
70
- logger.error(f"Error initializing embeddings: {str(e)}")
71
- raise
72
-
73
- def initialize_llm(self):
74
- """Initialize the language model and QA chain."""
75
- try:
76
- # Get Hugging Face token
77
- hf_token = os.environ.get('HUGGINGFACE_TOKEN')
78
- if not hf_token:
79
- raise ValueError("Please set HUGGINGFACE_TOKEN environment variable")
80
-
81
- # Login to Hugging Face
82
- login(token=hf_token)
83
-
84
- # Configure model loading based on device
85
- if self.device == "cuda":
86
- model_config = {
87
- 'torch_dtype': torch.float16,
88
- 'device_map': "auto",
89
- }
90
- else:
91
- quantization_config = BitsAndBytesConfig(
92
- load_in_4bit=True,
93
- bnb_4bit_compute_dtype=torch.float32,
94
- bnb_4bit_quant_type="nf4",
95
- bnb_4bit_use_double_quant=True,
96
- )
97
- model_config = {
98
- 'quantization_config': quantization_config,
99
- 'device_map': "auto",
100
- 'torch_dtype': torch.float32,
101
- 'low_cpu_mem_usage': True,
102
- }
103
-
104
- # Initialize tokenizer and model
105
- tokenizer = AutoTokenizer.from_pretrained(
106
- MODEL_NAME,
107
- token=hf_token,
108
- trust_remote_code=True
109
- )
110
-
111
- model = AutoModelForCausalLM.from_pretrained(
112
- MODEL_NAME,
113
- token=hf_token,
114
- trust_remote_code=True,
115
- **model_config
116
- )
117
-
118
- # Create pipeline
119
- pipe_config = {
120
- "model": model,
121
- "tokenizer": tokenizer,
122
- "max_new_tokens": 512,
123
- "temperature": 0.1,
124
- "device_map": "auto",
125
- "torch_dtype": torch.float32 if self.device == "cpu" else torch.float16,
126
  }
127
-
128
- if self.device == "cpu":
129
- pipe_config["model"] = pipe_config["model"].to('cpu')
130
-
131
- pipe = pipeline("text-generation", **pipe_config)
132
-
133
- # Create QA chain
134
- llm = HuggingFacePipeline(pipeline=pipe)
135
-
136
- prompt_template = """
137
- Context: {context}
138
-
139
- Based on the context above, please provide a clear and concise answer to the following question.
140
- If the information is not in the context, explicitly state so.
141
-
142
- Question: {question}
143
- """
144
-
145
- PROMPT = PromptTemplate(
146
- template=prompt_template,
147
- input_variables=["context", "question"]
148
- )
149
-
150
- self.qa_chain = RetrievalQA.from_chain_type(
151
- llm=llm,
152
- chain_type="stuff",
153
- retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
154
- return_source_documents=True,
155
- chain_type_kwargs={"prompt": PROMPT}
156
- )
157
-
158
- logger.info("LLM initialized successfully")
159
-
160
- except Exception as e:
161
- logger.error(f"Error initializing LLM: {str(e)}")
162
- raise
163
 
164
- def validate_file(self, file_path: str, file_size: int) -> bool:
165
- """Validate uploaded file."""
166
- if file_size > self.max_file_size:
167
- raise ValueError(f"File size exceeds {self.max_file_size // 1024 // 1024}MB limit")
 
 
 
168
 
169
- ext = os.path.splitext(file_path)[1].lower()
170
- if ext not in self.supported_formats:
171
- raise ValueError(f"Unsupported format. Supported: {', '.join(self.supported_formats)}")
172
- return True
173
-
174
- def process_file(self, file: gr.File) -> List:
175
- """Process a single file and return documents."""
176
- try:
177
- file_path = file.name
178
- file_size = os.path.getsize(file_path)
179
- self.validate_file(file_path, file_size)
180
-
181
- # Copy file to upload directory
182
- filename = os.path.basename(file_path)
183
- save_path = os.path.join(self.upload_folder, filename)
184
- shutil.copy2(file_path, save_path)
185
-
186
- # Load documents based on file type
187
- ext = os.path.splitext(file_path)[1].lower()
188
- if ext == '.pdf':
189
- loader = PyPDFLoader(save_path)
190
- elif ext == '.txt':
191
- loader = TextLoader(save_path)
192
- else: # .docx
193
- loader = Docx2txtLoader(save_path)
194
-
195
- documents = loader.load()
196
- for doc in documents:
197
- doc.metadata.update({
198
- 'source': filename,
199
- 'type': 'uploaded'
200
- })
201
- return documents
202
-
203
- except Exception as e:
204
- logger.error(f"Error processing {file_path}: {str(e)}")
205
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  def update_vector_store(self, new_documents: List):
208
  """Update vector store with new documents."""
 
56
  self.initialize_embeddings()
57
 
58
  def initialize_embeddings(self):
59
+ """Initialize embedding model."""
60
+ try:
61
+ device = "cuda" if torch.cuda.is_available() else "cpu"
62
+ self.embeddings = HuggingFaceEmbeddings(
63
+ model_name=EMBEDDING_MODEL,
64
+ model_kwargs={
65
+ 'device': device
66
+ },
67
+ encode_kwargs={
68
+ 'normalize_embeddings': True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
70
+ )
71
+ logger.info(f"Embeddings initialized successfully on {device}")
72
+ except Exception as e:
73
+ logger.error(f"Error initializing embeddings: {str(e)}")
74
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ def initialize_llm(self):
77
+ """Initialize the language model and QA chain."""
78
+ try:
79
+ # Get Hugging Face token
80
+ hf_token = os.environ.get('HUGGINGFACE_TOKEN')
81
+ if not hf_token:
82
+ raise ValueError("Please set HUGGINGFACE_TOKEN environment variable")
83
 
84
+ # Login to Hugging Face
85
+ login(token=hf_token)
86
+
87
+ device = "cuda" if torch.cuda.is_available() else "cpu"
88
+
89
+ # Configure model loading based on device
90
+ if device == "cuda":
91
+ model_config = {
92
+ 'torch_dtype': torch.float16,
93
+ 'device_map': "auto",
94
+ }
95
+ else:
96
+ model_config = {
97
+ 'device_map': "auto",
98
+ 'low_cpu_mem_usage': True,
99
+ }
100
+
101
+ # Initialize tokenizer and model
102
+ tokenizer = AutoTokenizer.from_pretrained(
103
+ MODEL_NAME,
104
+ token=hf_token,
105
+ trust_remote_code=True
106
+ )
107
+
108
+ model = AutoModelForCausalLM.from_pretrained(
109
+ MODEL_NAME,
110
+ token=hf_token,
111
+ trust_remote_code=True,
112
+ **model_config
113
+ )
114
+
115
+ # Create pipeline
116
+ pipe_config = {
117
+ "model": model,
118
+ "tokenizer": tokenizer,
119
+ "max_new_tokens": 512,
120
+ "temperature": 0.1,
121
+ "device_map": "auto",
122
+ }
123
+
124
+ pipe = pipeline("text-generation", **pipe_config)
125
+ llm = HuggingFacePipeline(pipeline=pipe)
126
+
127
+ # Create QA chain
128
+ prompt_template = """
129
+ Context: {context}
130
+
131
+ Based on the context above, please provide a clear and concise answer to the following question.
132
+ If the information is not in the context, explicitly state so.
133
+
134
+ Question: {question}
135
+ """
136
+
137
+ PROMPT = PromptTemplate(
138
+ template=prompt_template,
139
+ input_variables=["context", "question"]
140
+ )
141
+
142
+ self.qa_chain = RetrievalQA.from_chain_type(
143
+ llm=llm,
144
+ chain_type="stuff",
145
+ retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
146
+ return_source_documents=True,
147
+ chain_type_kwargs={"prompt": PROMPT}
148
+ )
149
+
150
+ logger.info("LLM initialized successfully")
151
+
152
+ except Exception as e:
153
+ logger.error(f"Error initializing LLM: {str(e)}")
154
+ raise
155
 
156
  def update_vector_store(self, new_documents: List):
157
  """Update vector store with new documents."""