danishjameel003 commited on
Commit
4e3fd95
·
verified ·
1 Parent(s): c6e3167

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -19,25 +19,28 @@ load_dotenv()
19
  # Dolly-v2-3b model pipeline
20
  @st.cache_resource
21
  def load_pipeline():
22
- # Use recommended settings for Dolly-v2-3b
23
  model_name = "databricks/dolly-v2-3b"
24
 
 
25
  tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
 
 
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
28
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, # Use float16 for GPU, float32 for CPU
29
  device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
30
- trust_remote_code=True
 
31
  )
32
 
33
- # Load the pipeline with required configurations
34
  return pipeline(
35
  task="text-generation",
36
  model=model,
37
  tokenizer=tokenizer,
38
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
39
  device_map="auto",
40
- return_full_text=True # Required for LangChain compatibility
41
  )
42
 
43
  # Initialize Dolly pipeline
 
19
  # Dolly-v2-3b model pipeline
20
  @st.cache_resource
21
  def load_pipeline():
 
22
  model_name = "databricks/dolly-v2-3b"
23
 
24
+ # Load tokenizer
25
  tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
26
+
27
+ # Load model with offload folder for disk storage of weights
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_name,
30
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, # Use float16 for GPU, float32 for CPU
31
  device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
32
+ trust_remote_code=True,
33
+ offload_folder="./offload_weights" # Folder to store offloaded weights
34
  )
35
 
36
+ # Return text-generation pipeline
37
  return pipeline(
38
  task="text-generation",
39
  model=model,
40
  tokenizer=tokenizer,
41
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
42
  device_map="auto",
43
+ return_full_text=True
44
  )
45
 
46
  # Initialize Dolly pipeline