Spaces:

ignitariumcloud
/

TI_demo_E2E

Sleeping

arjunanand13 commited on 9 days ago

Commit

87ce197

verified ·

1 Parent(s): 0cb7835

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ import json
 import gradio as gr
 import re
 from threading import Thread
 class DocumentRetrievalAndGeneration:
     def __init__(self, embedding_model_name, lm_model_id, data_folder):
@@ -51,7 +52,7 @@ class DocumentRetrievalAndGeneration:
     def create_faiss_index(self):
         all_texts = [split.page_content for split in self.all_splits]
-        batch_size = 256
         all_embeddings = []
         for i in range(0, len(all_texts), batch_size):
@@ -74,13 +75,15 @@ class DocumentRetrievalAndGeneration:
             bnb_4bit_quant_type="nf4",
             bnb_4bit_compute_dtype=torch.bfloat16
         )
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype=torch.bfloat16,
             device_map="auto",
-            quantization_config=quantization_config
         )
         return tokenizer, model

 import gradio as gr
 import re
 from threading import Thread
+import os
 class DocumentRetrievalAndGeneration:
     def __init__(self, embedding_model_name, lm_model_id, data_folder):
     def create_faiss_index(self):
         all_texts = [split.page_content for split in self.all_splits]
+        batch_size = 1024
         all_embeddings = []
         for i in range(0, len(all_texts), batch_size):
             bnb_4bit_quant_type="nf4",
             bnb_4bit_compute_dtype=torch.bfloat16
         )
+        hf_token = os.getenv('HF_TOKEN')
+        tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype=torch.bfloat16,
             device_map="auto",
+            quantization_config=quantization_config,
+            token=hf_token
+        )  quantization_config=quantization_config
         )
         return tokenizer, model