Spaces:
Sleeping
Sleeping
burtenshaw
commited on
Commit
·
348c664
1
Parent(s):
46c4bfa
increase max new tokens
Browse files- app/app.py +20 -13
app/app.py
CHANGED
@@ -33,8 +33,17 @@ LANGUAGES: dict[str, str] = {
|
|
33 |
|
34 |
|
35 |
BASE_MODEL = os.getenv("MODEL", "meta-llama/Llama-3.2-11B-Vision-Instruct")
|
36 |
-
ZERO_GPU =
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
def create_inference_client(
|
40 |
model: Optional[str] = None, base_url: Optional[str] = None
|
@@ -50,7 +59,12 @@ def create_inference_client(
|
|
50 |
if ZERO_GPU:
|
51 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
52 |
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
|
53 |
-
return pipeline(
|
|
|
|
|
|
|
|
|
|
|
54 |
else:
|
55 |
return InferenceClient(
|
56 |
token=os.getenv("HF_TOKEN"),
|
@@ -94,12 +108,7 @@ def format_history_as_messages(history: list):
|
|
94 |
|
95 |
if TEXT_ONLY:
|
96 |
for entry in history:
|
97 |
-
messages.append(
|
98 |
-
{
|
99 |
-
"role": entry["role"],
|
100 |
-
"content": entry["content"]
|
101 |
-
}
|
102 |
-
)
|
103 |
return messages
|
104 |
|
105 |
for entry in history:
|
@@ -185,16 +194,14 @@ def add_fake_like_data(
|
|
185 |
language=language,
|
186 |
)
|
187 |
|
|
|
188 |
@spaces.GPU
|
189 |
def call_pipeline(messages: list, language: str):
|
190 |
response = CLIENT(messages)
|
191 |
-
print(" ### response ### ")
|
192 |
-
print(response)
|
193 |
content = response[0]["generated_text"][-1]["content"]
|
194 |
-
print(" ### content ### ")
|
195 |
-
print(content)
|
196 |
return content
|
197 |
|
|
|
198 |
def respond(
|
199 |
history: list,
|
200 |
language: str,
|
|
|
33 |
|
34 |
|
35 |
BASE_MODEL = os.getenv("MODEL", "meta-llama/Llama-3.2-11B-Vision-Instruct")
|
36 |
+
ZERO_GPU = (
|
37 |
+
bool(os.getenv("ZERO_GPU", False)) or True
|
38 |
+
if str(os.getenv("ZERO_GPU")).lower() == "true"
|
39 |
+
else False
|
40 |
+
)
|
41 |
+
TEXT_ONLY = (
|
42 |
+
bool(os.getenv("TEXT_ONLY", False)) or True
|
43 |
+
if str(os.getenv("TEXT_ONLY")).lower() == "true"
|
44 |
+
else False
|
45 |
+
)
|
46 |
+
|
47 |
|
48 |
def create_inference_client(
|
49 |
model: Optional[str] = None, base_url: Optional[str] = None
|
|
|
59 |
if ZERO_GPU:
|
60 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
61 |
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
|
62 |
+
return pipeline(
|
63 |
+
"text-generation",
|
64 |
+
model=model,
|
65 |
+
tokenizer=tokenizer,
|
66 |
+
model_kwargs={"max_new_tokens": 2000},
|
67 |
+
)
|
68 |
else:
|
69 |
return InferenceClient(
|
70 |
token=os.getenv("HF_TOKEN"),
|
|
|
108 |
|
109 |
if TEXT_ONLY:
|
110 |
for entry in history:
|
111 |
+
messages.append({"role": entry["role"], "content": entry["content"]})
|
|
|
|
|
|
|
|
|
|
|
112 |
return messages
|
113 |
|
114 |
for entry in history:
|
|
|
194 |
language=language,
|
195 |
)
|
196 |
|
197 |
+
|
198 |
@spaces.GPU
|
199 |
def call_pipeline(messages: list, language: str):
|
200 |
response = CLIENT(messages)
|
|
|
|
|
201 |
content = response[0]["generated_text"][-1]["content"]
|
|
|
|
|
202 |
return content
|
203 |
|
204 |
+
|
205 |
def respond(
|
206 |
history: list,
|
207 |
language: str,
|