arjunanand13 commited on
Commit
87ce197
·
verified ·
1 Parent(s): 0cb7835

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -15,6 +15,7 @@ import json
15
  import gradio as gr
16
  import re
17
  from threading import Thread
 
18
 
19
  class DocumentRetrievalAndGeneration:
20
  def __init__(self, embedding_model_name, lm_model_id, data_folder):
@@ -51,7 +52,7 @@ class DocumentRetrievalAndGeneration:
51
  def create_faiss_index(self):
52
  all_texts = [split.page_content for split in self.all_splits]
53
 
54
- batch_size = 256
55
  all_embeddings = []
56
 
57
  for i in range(0, len(all_texts), batch_size):
@@ -74,13 +75,15 @@ class DocumentRetrievalAndGeneration:
74
  bnb_4bit_quant_type="nf4",
75
  bnb_4bit_compute_dtype=torch.bfloat16
76
  )
77
- tokenizer = AutoTokenizer.from_pretrained(model_id)
 
78
  model = AutoModelForCausalLM.from_pretrained(
79
  model_id,
80
  torch_dtype=torch.bfloat16,
81
  device_map="auto",
82
-
83
- quantization_config=quantization_config
 
84
  )
85
  return tokenizer, model
86
 
 
15
  import gradio as gr
16
  import re
17
  from threading import Thread
18
+ import os
19
 
20
  class DocumentRetrievalAndGeneration:
21
  def __init__(self, embedding_model_name, lm_model_id, data_folder):
 
52
  def create_faiss_index(self):
53
  all_texts = [split.page_content for split in self.all_splits]
54
 
55
+ batch_size = 1024
56
  all_embeddings = []
57
 
58
  for i in range(0, len(all_texts), batch_size):
 
75
  bnb_4bit_quant_type="nf4",
76
  bnb_4bit_compute_dtype=torch.bfloat16
77
  )
78
+ hf_token = os.getenv('HF_TOKEN')
79
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
80
  model = AutoModelForCausalLM.from_pretrained(
81
  model_id,
82
  torch_dtype=torch.bfloat16,
83
  device_map="auto",
84
+ quantization_config=quantization_config,
85
+ token=hf_token
86
+ ) quantization_config=quantization_config
87
  )
88
  return tokenizer, model
89