Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ import json
|
|
15 |
import gradio as gr
|
16 |
import re
|
17 |
from threading import Thread
|
|
|
18 |
|
19 |
class DocumentRetrievalAndGeneration:
|
20 |
def __init__(self, embedding_model_name, lm_model_id, data_folder):
|
@@ -51,7 +52,7 @@ class DocumentRetrievalAndGeneration:
|
|
51 |
def create_faiss_index(self):
|
52 |
all_texts = [split.page_content for split in self.all_splits]
|
53 |
|
54 |
-
batch_size =
|
55 |
all_embeddings = []
|
56 |
|
57 |
for i in range(0, len(all_texts), batch_size):
|
@@ -74,13 +75,15 @@ class DocumentRetrievalAndGeneration:
|
|
74 |
bnb_4bit_quant_type="nf4",
|
75 |
bnb_4bit_compute_dtype=torch.bfloat16
|
76 |
)
|
77 |
-
|
|
|
78 |
model = AutoModelForCausalLM.from_pretrained(
|
79 |
model_id,
|
80 |
torch_dtype=torch.bfloat16,
|
81 |
device_map="auto",
|
82 |
-
|
83 |
-
|
|
|
84 |
)
|
85 |
return tokenizer, model
|
86 |
|
|
|
15 |
import gradio as gr
|
16 |
import re
|
17 |
from threading import Thread
|
18 |
+
import os
|
19 |
|
20 |
class DocumentRetrievalAndGeneration:
|
21 |
def __init__(self, embedding_model_name, lm_model_id, data_folder):
|
|
|
52 |
def create_faiss_index(self):
|
53 |
all_texts = [split.page_content for split in self.all_splits]
|
54 |
|
55 |
+
batch_size = 1024
|
56 |
all_embeddings = []
|
57 |
|
58 |
for i in range(0, len(all_texts), batch_size):
|
|
|
75 |
bnb_4bit_quant_type="nf4",
|
76 |
bnb_4bit_compute_dtype=torch.bfloat16
|
77 |
)
|
78 |
+
hf_token = os.getenv('HF_TOKEN')
|
79 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
|
80 |
model = AutoModelForCausalLM.from_pretrained(
|
81 |
model_id,
|
82 |
torch_dtype=torch.bfloat16,
|
83 |
device_map="auto",
|
84 |
+
quantization_config=quantization_config,
|
85 |
+
token=hf_token
|
86 |
+
) quantization_config=quantization_config
|
87 |
)
|
88 |
return tokenizer, model
|
89 |
|