Spaces:
Sleeping
Sleeping
idan shenfeld
commited on
Commit
·
590f42f
1
Parent(s):
f3c54c2
local model support
Browse files- app/app.py +59 -31
app/app.py
CHANGED
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
8 |
from typing import Optional
|
9 |
import json
|
10 |
|
|
|
11 |
import spaces
|
12 |
import gradio as gr
|
13 |
from feedback import save_feedback, scheduler
|
@@ -17,24 +18,7 @@ from pandas import DataFrame
|
|
17 |
from transformers import pipeline, AutoTokenizer, CohereForCausalLM
|
18 |
|
19 |
|
20 |
-
|
21 |
-
"English": "You are a helpful assistant. Always respond to requests in fluent and natural English, regardless of the language used by the user.",
|
22 |
-
"Dutch": "Je bent een behulpzame assistent die uitsluitend in het Nederlands communiceert. Beantwoord alle vragen en verzoeken in vloeiend en natuurlijk Nederlands, ongeacht de taal waarin de gebruiker schrijft.",
|
23 |
-
"Italian": "Sei un assistente utile e rispondi sempre in italiano in modo naturale e fluente, indipendentemente dalla lingua utilizzata dall'utente.",
|
24 |
-
"Spanish": "Eres un asistente útil que siempre responde en español de manera fluida y natural, independientemente del idioma utilizado por el usuario.",
|
25 |
-
"French": "Tu es un assistant utile qui répond toujours en français de manière fluide et naturelle, quelle que soit la langue utilisée par l'utilisateur.",
|
26 |
-
"German": "Du bist ein hilfreicher Assistent, der stets auf Deutsch in einer natürlichen und fließenden Weise antwortet, unabhängig von der Sprache des Benutzers.",
|
27 |
-
"Portuguese": "Você é um assistente útil que sempre responde em português de forma natural e fluente, independentemente do idioma utilizado pelo usuário.",
|
28 |
-
"Russian": "Ты полезный помощник, который всегда отвечает на русском языке плавно и естественно, независимо от языка пользователя.",
|
29 |
-
"Chinese": "你是一个有用的助手,总是用流畅自然的中文回答问题,无论用户使用哪种语言。",
|
30 |
-
"Japanese": "あなたは役に立つアシスタントであり、常に流暢で自然な日本語で応答します。ユーザーが使用する言語に関係なく、日本語で対応してください。",
|
31 |
-
"Korean": "당신은 유용한 도우미이며, 항상 유창하고 자연스러운 한국어로 응답합니다. 사용자가 어떤 언어를 사용하든 한국어로 대답하세요.",
|
32 |
-
"Hebrew": " אתה עוזר טוב ומועיל שמדבר בעברית ועונה בעברית.",
|
33 |
-
"Hindi" : "आप एक मददगार सहायक हैं। उपयोगकर्ता द्वारा इस्तेमाल की गई भाषा की परवाह किए बिना हमेशा धाराप्रवाह और स्वाभाविक अंग्रेजी में अनुरोधों का जवाब दें।"
|
34 |
-
}
|
35 |
-
|
36 |
-
|
37 |
-
BASE_MODEL = os.getenv("MODEL", "meta-llama/Llama-3.2-11B-Vision-Instruct")
|
38 |
ZERO_GPU = (
|
39 |
bool(os.getenv("ZERO_GPU", False)) or True
|
40 |
if str(os.getenv("ZERO_GPU")).lower() == "true"
|
@@ -52,6 +36,7 @@ def create_inference_client(
|
|
52 |
) -> InferenceClient:
|
53 |
"""Create an InferenceClient instance with the given model or environment settings.
|
54 |
This function will run the model locally if ZERO_GPU is set to True.
|
|
|
55 |
|
56 |
Args:
|
57 |
model: Optional model identifier to use. If not provided, will use environment settings.
|
@@ -62,11 +47,15 @@ def create_inference_client(
|
|
62 |
if ZERO_GPU:
|
63 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
64 |
model = CohereForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
|
65 |
-
return
|
66 |
-
"
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
else:
|
71 |
return InferenceClient(
|
72 |
token=os.getenv("HF_TOKEN"),
|
@@ -75,6 +64,9 @@ def create_inference_client(
|
|
75 |
)
|
76 |
|
77 |
|
|
|
|
|
|
|
78 |
def load_languages() -> dict[str, str]:
|
79 |
"""Load languages from JSON file or persistent storage"""
|
80 |
# First check if we have persistent storage available
|
@@ -148,7 +140,7 @@ You have been asked to participate in a research study conducted by Lingo Lab fr
|
|
148 |
|
149 |
The purpose of this study is the collection of multilingual human feedback to improve language models. As part of this study you will interat with a language model in a langugage of your choice, and provide indication to wether its reponses are helpful or not.
|
150 |
|
151 |
-
Your name and personal data will never be recorded. You may decline further participation, at any time, without adverse consequences.There are no foreseeable risks or discomforts for participating in this study. Note participating in the study may pose risks that are currently unforeseeable. If you have questions or concerns about the study, you can contact the researchers at
|
152 |
|
153 |
Clicking on the next button at the bottom of this page indicates that you are at least 18 years of age and willingly agree to participate in the research voluntarily.
|
154 |
"""
|
@@ -183,6 +175,11 @@ def format_history_as_messages(history: list):
|
|
183 |
current_role = None
|
184 |
current_message_content = []
|
185 |
|
|
|
|
|
|
|
|
|
|
|
186 |
if TEXT_ONLY:
|
187 |
for entry in history:
|
188 |
messages.append({"role": entry["role"], "content": entry["content"]})
|
@@ -274,13 +271,33 @@ def add_fake_like_data(
|
|
274 |
|
275 |
@spaces.GPU
|
276 |
def call_pipeline(messages: list, language: str):
|
277 |
-
|
278 |
-
messages
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
|
285 |
|
286 |
def respond(
|
@@ -293,6 +310,17 @@ def respond(
|
|
293 |
|
294 |
Return the history with the new message"""
|
295 |
messages = format_history_as_messages(history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
if ZERO_GPU:
|
297 |
content = call_pipeline(messages, language)
|
298 |
else:
|
|
|
8 |
from typing import Optional
|
9 |
import json
|
10 |
|
11 |
+
import spaces
|
12 |
import spaces
|
13 |
import gradio as gr
|
14 |
from feedback import save_feedback, scheduler
|
|
|
18 |
from transformers import pipeline, AutoTokenizer, CohereForCausalLM
|
19 |
|
20 |
|
21 |
+
BASE_MODEL = os.getenv("MODEL", "CohereForAI/aya-expanse-8b")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
ZERO_GPU = (
|
23 |
bool(os.getenv("ZERO_GPU", False)) or True
|
24 |
if str(os.getenv("ZERO_GPU")).lower() == "true"
|
|
|
36 |
) -> InferenceClient:
|
37 |
"""Create an InferenceClient instance with the given model or environment settings.
|
38 |
This function will run the model locally if ZERO_GPU is set to True.
|
39 |
+
This function will run the model locally if ZERO_GPU is set to True.
|
40 |
|
41 |
Args:
|
42 |
model: Optional model identifier to use. If not provided, will use environment settings.
|
|
|
47 |
if ZERO_GPU:
|
48 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
49 |
model = CohereForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
|
50 |
+
return {
|
51 |
+
"pipeline": pipeline(
|
52 |
+
"text-generation",
|
53 |
+
model=model,
|
54 |
+
tokenizer=tokenizer,
|
55 |
+
max_new_tokens=2000,
|
56 |
+
),
|
57 |
+
"tokenizer": tokenizer
|
58 |
+
}
|
59 |
else:
|
60 |
return InferenceClient(
|
61 |
token=os.getenv("HF_TOKEN"),
|
|
|
64 |
)
|
65 |
|
66 |
|
67 |
+
CLIENT = create_inference_client()
|
68 |
+
|
69 |
+
|
70 |
def load_languages() -> dict[str, str]:
|
71 |
"""Load languages from JSON file or persistent storage"""
|
72 |
# First check if we have persistent storage available
|
|
|
140 |
|
141 |
The purpose of this study is the collection of multilingual human feedback to improve language models. As part of this study you will interat with a language model in a langugage of your choice, and provide indication to wether its reponses are helpful or not.
|
142 |
|
143 |
+
Your name and personal data will never be recorded. You may decline further participation, at any time, without adverse consequences.There are no foreseeable risks or discomforts for participating in this study. Note participating in the study may pose risks that are currently unforeseeable. If you have questions or concerns about the study, you can contact the researchers at leshem@mit.edu. If you have any questions about your rights as a participant in this research (E-6610), feel you have been harmed, or wish to discuss other study-related concerns with someone who is not part of the research team, you can contact the M.I.T. Committee on the Use of Humans as Experimental Subjects (COUHES) by phone at (617) 253-8420, or by email at [email protected].
|
144 |
|
145 |
Clicking on the next button at the bottom of this page indicates that you are at least 18 years of age and willingly agree to participate in the research voluntarily.
|
146 |
"""
|
|
|
175 |
current_role = None
|
176 |
current_message_content = []
|
177 |
|
178 |
+
if TEXT_ONLY:
|
179 |
+
for entry in history:
|
180 |
+
messages.append({"role": entry["role"], "content": entry["content"]})
|
181 |
+
return messages
|
182 |
+
|
183 |
if TEXT_ONLY:
|
184 |
for entry in history:
|
185 |
messages.append({"role": entry["role"], "content": entry["content"]})
|
|
|
271 |
|
272 |
@spaces.GPU
|
273 |
def call_pipeline(messages: list, language: str):
|
274 |
+
if ZERO_GPU:
|
275 |
+
# Format the messages using the tokenizer's chat template
|
276 |
+
tokenizer = CLIENT["tokenizer"]
|
277 |
+
formatted_prompt = tokenizer.apply_chat_template(
|
278 |
+
messages,
|
279 |
+
tokenize=False,
|
280 |
+
)
|
281 |
+
|
282 |
+
# Call the pipeline with the formatted text
|
283 |
+
response = CLIENT["pipeline"](
|
284 |
+
formatted_prompt,
|
285 |
+
clean_up_tokenization_spaces=False,
|
286 |
+
max_length=2000,
|
287 |
+
return_full_text=False,
|
288 |
+
)
|
289 |
+
|
290 |
+
# Extract the generated content
|
291 |
+
content = response[0]["generated_text"]
|
292 |
+
return content
|
293 |
+
else:
|
294 |
+
response = CLIENT(
|
295 |
+
messages,
|
296 |
+
clean_up_tokenization_spaces=False,
|
297 |
+
max_length=2000,
|
298 |
+
)
|
299 |
+
content = response[0]["generated_text"][-1]["content"]
|
300 |
+
return content
|
301 |
|
302 |
|
303 |
def respond(
|
|
|
310 |
|
311 |
Return the history with the new message"""
|
312 |
messages = format_history_as_messages(history)
|
313 |
+
if ZERO_GPU:
|
314 |
+
content = call_pipeline(messages, language)
|
315 |
+
else:
|
316 |
+
response = CLIENT.chat.completions.create(
|
317 |
+
messages=messages,
|
318 |
+
max_tokens=2000,
|
319 |
+
stream=False,
|
320 |
+
seed=seed,
|
321 |
+
temperature=temperature,
|
322 |
+
)
|
323 |
+
content = response.choices[0].message.content
|
324 |
if ZERO_GPU:
|
325 |
content = call_pipeline(messages, language)
|
326 |
else:
|