burtenshaw commited on
Commit
348c664
·
1 Parent(s): 46c4bfa

increase max new tokens

Browse files
Files changed (1) hide show
  1. app/app.py +20 -13
app/app.py CHANGED
@@ -33,8 +33,17 @@ LANGUAGES: dict[str, str] = {
33
 
34
 
35
  BASE_MODEL = os.getenv("MODEL", "meta-llama/Llama-3.2-11B-Vision-Instruct")
36
- ZERO_GPU = bool(os.getenv("ZERO_GPU", False)) or True if str(os.getenv("ZERO_GPU")).lower() == "true" else False
37
- TEXT_ONLY = bool(os.getenv("TEXT_ONLY", False)) or True if str(os.getenv("TEXT_ONLY")).lower() == "true" else False
 
 
 
 
 
 
 
 
 
38
 
39
  def create_inference_client(
40
  model: Optional[str] = None, base_url: Optional[str] = None
@@ -50,7 +59,12 @@ def create_inference_client(
50
  if ZERO_GPU:
51
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
52
  model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
53
- return pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
 
 
54
  else:
55
  return InferenceClient(
56
  token=os.getenv("HF_TOKEN"),
@@ -94,12 +108,7 @@ def format_history_as_messages(history: list):
94
 
95
  if TEXT_ONLY:
96
  for entry in history:
97
- messages.append(
98
- {
99
- "role": entry["role"],
100
- "content": entry["content"]
101
- }
102
- )
103
  return messages
104
 
105
  for entry in history:
@@ -185,16 +194,14 @@ def add_fake_like_data(
185
  language=language,
186
  )
187
 
 
188
  @spaces.GPU
189
  def call_pipeline(messages: list, language: str):
190
  response = CLIENT(messages)
191
- print(" ### response ### ")
192
- print(response)
193
  content = response[0]["generated_text"][-1]["content"]
194
- print(" ### content ### ")
195
- print(content)
196
  return content
197
 
 
198
  def respond(
199
  history: list,
200
  language: str,
 
33
 
34
 
35
  BASE_MODEL = os.getenv("MODEL", "meta-llama/Llama-3.2-11B-Vision-Instruct")
36
+ ZERO_GPU = (
37
+ bool(os.getenv("ZERO_GPU", False)) or True
38
+ if str(os.getenv("ZERO_GPU")).lower() == "true"
39
+ else False
40
+ )
41
+ TEXT_ONLY = (
42
+ bool(os.getenv("TEXT_ONLY", False)) or True
43
+ if str(os.getenv("TEXT_ONLY")).lower() == "true"
44
+ else False
45
+ )
46
+
47
 
48
  def create_inference_client(
49
  model: Optional[str] = None, base_url: Optional[str] = None
 
59
  if ZERO_GPU:
60
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
61
  model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
62
+ return pipeline(
63
+ "text-generation",
64
+ model=model,
65
+ tokenizer=tokenizer,
66
+ model_kwargs={"max_new_tokens": 2000},
67
+ )
68
  else:
69
  return InferenceClient(
70
  token=os.getenv("HF_TOKEN"),
 
108
 
109
  if TEXT_ONLY:
110
  for entry in history:
111
+ messages.append({"role": entry["role"], "content": entry["content"]})
 
 
 
 
 
112
  return messages
113
 
114
  for entry in history:
 
194
  language=language,
195
  )
196
 
197
+
198
  @spaces.GPU
199
  def call_pipeline(messages: list, language: str):
200
  response = CLIENT(messages)
 
 
201
  content = response[0]["generated_text"][-1]["content"]
 
 
202
  return content
203
 
204
+
205
  def respond(
206
  history: list,
207
  language: str,