Tonic commited on
Commit
debaa04
·
1 Parent(s): 2368a49

wrap automodel for zerogpu

Browse files
Files changed (1) hide show
  1. langchainapp.py +6 -1
langchainapp.py CHANGED
@@ -2,7 +2,7 @@
2
  import spaces
3
  from torch.nn import DataParallel
4
  from torch import Tensor
5
- from transformers import AutoTokenizer, AutoModel
6
  from huggingface_hub import InferenceClient
7
  from openai import OpenAI
8
  from langchain_community.embeddings import HuggingFaceInstructEmbeddings
@@ -45,9 +45,14 @@ hf_token, yi_token = load_env_variables()
45
 
46
  @spaces.GPU
47
  def load_model():
 
 
48
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token, trust_remote_code=True)
 
49
  return AutoModel.from_pretrained(model_name, token=hf_token, trust_remote_code=True).to(device)
50
 
 
 
51
  # Load model
52
  nvidiamodel = load_model()
53
  # nvidiamodel.set_pooling_include_prompt(include_prompt=False)
 
2
  import spaces
3
  from torch.nn import DataParallel
4
  from torch import Tensor
5
+ # from transformers import AutoTokenizer, AutoModel
6
  from huggingface_hub import InferenceClient
7
  from openai import OpenAI
8
  from langchain_community.embeddings import HuggingFaceInstructEmbeddings
 
45
 
46
  @spaces.GPU
47
  def load_model():
48
+ # Import AutoModel within the function to avoid issues with pickling in multiprocessing
49
+ from transformers import AutoModel, AutoTokenizer
50
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token, trust_remote_code=True)
51
+
52
  return AutoModel.from_pretrained(model_name, token=hf_token, trust_remote_code=True).to(device)
53
 
54
+ # Load model
55
+ nvidiamodel = load_model()
56
  # Load model
57
  nvidiamodel = load_model()
58
  # nvidiamodel.set_pooling_include_prompt(include_prompt=False)