Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,25 +19,28 @@ load_dotenv()
|
|
19 |
# Dolly-v2-3b model pipeline
|
20 |
@st.cache_resource
|
21 |
def load_pipeline():
|
22 |
-
# Use recommended settings for Dolly-v2-3b
|
23 |
model_name = "databricks/dolly-v2-3b"
|
24 |
|
|
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
|
|
|
|
|
26 |
model = AutoModelForCausalLM.from_pretrained(
|
27 |
model_name,
|
28 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, # Use float16 for GPU, float32 for CPU
|
29 |
device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
|
30 |
-
trust_remote_code=True
|
|
|
31 |
)
|
32 |
|
33 |
-
#
|
34 |
return pipeline(
|
35 |
task="text-generation",
|
36 |
model=model,
|
37 |
tokenizer=tokenizer,
|
38 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
39 |
device_map="auto",
|
40 |
-
return_full_text=True
|
41 |
)
|
42 |
|
43 |
# Initialize Dolly pipeline
|
|
|
19 |
# Dolly-v2-3b model pipeline
|
20 |
@st.cache_resource
|
21 |
def load_pipeline():
|
|
|
22 |
model_name = "databricks/dolly-v2-3b"
|
23 |
|
24 |
+
# Load tokenizer
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
|
26 |
+
|
27 |
+
# Load model with offload folder for disk storage of weights
|
28 |
model = AutoModelForCausalLM.from_pretrained(
|
29 |
model_name,
|
30 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, # Use float16 for GPU, float32 for CPU
|
31 |
device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
|
32 |
+
trust_remote_code=True,
|
33 |
+
offload_folder="./offload_weights" # Folder to store offloaded weights
|
34 |
)
|
35 |
|
36 |
+
# Return text-generation pipeline
|
37 |
return pipeline(
|
38 |
task="text-generation",
|
39 |
model=model,
|
40 |
tokenizer=tokenizer,
|
41 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
42 |
device_map="auto",
|
43 |
+
return_full_text=True
|
44 |
)
|
45 |
|
46 |
# Initialize Dolly pipeline
|