idan shenfeld commited on
Commit
590f42f
·
1 Parent(s): f3c54c2

local model support

Browse files
Files changed (1) hide show
  1. app/app.py +59 -31
app/app.py CHANGED
@@ -8,6 +8,7 @@ from pathlib import Path
8
  from typing import Optional
9
  import json
10
 
 
11
  import spaces
12
  import gradio as gr
13
  from feedback import save_feedback, scheduler
@@ -17,24 +18,7 @@ from pandas import DataFrame
17
  from transformers import pipeline, AutoTokenizer, CohereForCausalLM
18
 
19
 
20
- LANGUAGES: dict[str, str] = {
21
- "English": "You are a helpful assistant. Always respond to requests in fluent and natural English, regardless of the language used by the user.",
22
- "Dutch": "Je bent een behulpzame assistent die uitsluitend in het Nederlands communiceert. Beantwoord alle vragen en verzoeken in vloeiend en natuurlijk Nederlands, ongeacht de taal waarin de gebruiker schrijft.",
23
- "Italian": "Sei un assistente utile e rispondi sempre in italiano in modo naturale e fluente, indipendentemente dalla lingua utilizzata dall'utente.",
24
- "Spanish": "Eres un asistente útil que siempre responde en español de manera fluida y natural, independientemente del idioma utilizado por el usuario.",
25
- "French": "Tu es un assistant utile qui répond toujours en français de manière fluide et naturelle, quelle que soit la langue utilisée par l'utilisateur.",
26
- "German": "Du bist ein hilfreicher Assistent, der stets auf Deutsch in einer natürlichen und fließenden Weise antwortet, unabhängig von der Sprache des Benutzers.",
27
- "Portuguese": "Você é um assistente útil que sempre responde em português de forma natural e fluente, independentemente do idioma utilizado pelo usuário.",
28
- "Russian": "Ты полезный помощник, который всегда отвечает на русском языке плавно и естественно, независимо от языка пользователя.",
29
- "Chinese": "你是一个有用的助手,总是用流畅自然的中文回答问题,无论用户使用哪种语言。",
30
- "Japanese": "あなたは役に立つアシスタントであり、常に流暢で自然な日本語で応答します。ユーザーが使用する言語に関係なく、日本語で対応してください。",
31
- "Korean": "당신은 유용한 도우미이며, 항상 유창하고 자연스러운 한국어로 응답합니다. 사용자가 어떤 언어를 사용하든 한국어로 대답하세요.",
32
- "Hebrew": " אתה עוזר טוב ומועיל שמדבר בעברית ועונה בעברית.",
33
- "Hindi" : "आप एक मददगार सहायक हैं। उपयोगकर्ता द्वारा इस्तेमाल की गई भाषा की परवाह किए बिना हमेशा धाराप्रवाह और स्वाभाविक अंग्रेजी में अनुरोधों का जवाब दें।"
34
- }
35
-
36
-
37
- BASE_MODEL = os.getenv("MODEL", "meta-llama/Llama-3.2-11B-Vision-Instruct")
38
  ZERO_GPU = (
39
  bool(os.getenv("ZERO_GPU", False)) or True
40
  if str(os.getenv("ZERO_GPU")).lower() == "true"
@@ -52,6 +36,7 @@ def create_inference_client(
52
  ) -> InferenceClient:
53
  """Create an InferenceClient instance with the given model or environment settings.
54
  This function will run the model locally if ZERO_GPU is set to True.
 
55
 
56
  Args:
57
  model: Optional model identifier to use. If not provided, will use environment settings.
@@ -62,11 +47,15 @@ def create_inference_client(
62
  if ZERO_GPU:
63
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
64
  model = CohereForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
65
- return pipeline(
66
- "text-generation",
67
- model=model,
68
- tokenizer=tokenizer,
69
- )
 
 
 
 
70
  else:
71
  return InferenceClient(
72
  token=os.getenv("HF_TOKEN"),
@@ -75,6 +64,9 @@ def create_inference_client(
75
  )
76
 
77
 
 
 
 
78
  def load_languages() -> dict[str, str]:
79
  """Load languages from JSON file or persistent storage"""
80
  # First check if we have persistent storage available
@@ -148,7 +140,7 @@ You have been asked to participate in a research study conducted by Lingo Lab fr
148
 
149
  The purpose of this study is the collection of multilingual human feedback to improve language models. As part of this study you will interat with a language model in a langugage of your choice, and provide indication to wether its reponses are helpful or not.
150
 
151
- Your name and personal data will never be recorded. You may decline further participation, at any time, without adverse consequences.There are no foreseeable risks or discomforts for participating in this study. Note participating in the study may pose risks that are currently unforeseeable. If you have questions or concerns about the study, you can contact the researchers at XXX. If you have any questions about your rights as a participant in this research (E-6610), feel you have been harmed, or wish to discuss other study-related concerns with someone who is not part of the research team, you can contact the M.I.T. Committee on the Use of Humans as Experimental Subjects (COUHES) by phone at (617) 253-8420, or by email at [email protected].
152
 
153
  Clicking on the next button at the bottom of this page indicates that you are at least 18 years of age and willingly agree to participate in the research voluntarily.
154
  """
@@ -183,6 +175,11 @@ def format_history_as_messages(history: list):
183
  current_role = None
184
  current_message_content = []
185
 
 
 
 
 
 
186
  if TEXT_ONLY:
187
  for entry in history:
188
  messages.append({"role": entry["role"], "content": entry["content"]})
@@ -274,13 +271,33 @@ def add_fake_like_data(
274
 
275
  @spaces.GPU
276
  def call_pipeline(messages: list, language: str):
277
- response = CLIENT(
278
- messages,
279
- clean_up_tokenization_spaces=False,
280
- max_length=2000,
281
- )
282
- content = response[0]["generated_text"][-1]["content"]
283
- return content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
 
286
  def respond(
@@ -293,6 +310,17 @@ def respond(
293
 
294
  Return the history with the new message"""
295
  messages = format_history_as_messages(history)
 
 
 
 
 
 
 
 
 
 
 
296
  if ZERO_GPU:
297
  content = call_pipeline(messages, language)
298
  else:
 
8
  from typing import Optional
9
  import json
10
 
11
+ import spaces
12
  import spaces
13
  import gradio as gr
14
  from feedback import save_feedback, scheduler
 
18
  from transformers import pipeline, AutoTokenizer, CohereForCausalLM
19
 
20
 
21
+ BASE_MODEL = os.getenv("MODEL", "CohereForAI/aya-expanse-8b")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  ZERO_GPU = (
23
  bool(os.getenv("ZERO_GPU", False)) or True
24
  if str(os.getenv("ZERO_GPU")).lower() == "true"
 
36
  ) -> InferenceClient:
37
  """Create an InferenceClient instance with the given model or environment settings.
38
  This function will run the model locally if ZERO_GPU is set to True.
39
+ This function will run the model locally if ZERO_GPU is set to True.
40
 
41
  Args:
42
  model: Optional model identifier to use. If not provided, will use environment settings.
 
47
  if ZERO_GPU:
48
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
49
  model = CohereForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
50
+ return {
51
+ "pipeline": pipeline(
52
+ "text-generation",
53
+ model=model,
54
+ tokenizer=tokenizer,
55
+ max_new_tokens=2000,
56
+ ),
57
+ "tokenizer": tokenizer
58
+ }
59
  else:
60
  return InferenceClient(
61
  token=os.getenv("HF_TOKEN"),
 
64
  )
65
 
66
 
67
+ CLIENT = create_inference_client()
68
+
69
+
70
  def load_languages() -> dict[str, str]:
71
  """Load languages from JSON file or persistent storage"""
72
  # First check if we have persistent storage available
 
140
 
141
  The purpose of this study is the collection of multilingual human feedback to improve language models. As part of this study you will interat with a language model in a langugage of your choice, and provide indication to wether its reponses are helpful or not.
142
 
143
+ Your name and personal data will never be recorded. You may decline further participation, at any time, without adverse consequences.There are no foreseeable risks or discomforts for participating in this study. Note participating in the study may pose risks that are currently unforeseeable. If you have questions or concerns about the study, you can contact the researchers at leshem@mit.edu. If you have any questions about your rights as a participant in this research (E-6610), feel you have been harmed, or wish to discuss other study-related concerns with someone who is not part of the research team, you can contact the M.I.T. Committee on the Use of Humans as Experimental Subjects (COUHES) by phone at (617) 253-8420, or by email at [email protected].
144
 
145
  Clicking on the next button at the bottom of this page indicates that you are at least 18 years of age and willingly agree to participate in the research voluntarily.
146
  """
 
175
  current_role = None
176
  current_message_content = []
177
 
178
+ if TEXT_ONLY:
179
+ for entry in history:
180
+ messages.append({"role": entry["role"], "content": entry["content"]})
181
+ return messages
182
+
183
  if TEXT_ONLY:
184
  for entry in history:
185
  messages.append({"role": entry["role"], "content": entry["content"]})
 
271
 
272
  @spaces.GPU
273
  def call_pipeline(messages: list, language: str):
274
+ if ZERO_GPU:
275
+ # Format the messages using the tokenizer's chat template
276
+ tokenizer = CLIENT["tokenizer"]
277
+ formatted_prompt = tokenizer.apply_chat_template(
278
+ messages,
279
+ tokenize=False,
280
+ )
281
+
282
+ # Call the pipeline with the formatted text
283
+ response = CLIENT["pipeline"](
284
+ formatted_prompt,
285
+ clean_up_tokenization_spaces=False,
286
+ max_length=2000,
287
+ return_full_text=False,
288
+ )
289
+
290
+ # Extract the generated content
291
+ content = response[0]["generated_text"]
292
+ return content
293
+ else:
294
+ response = CLIENT(
295
+ messages,
296
+ clean_up_tokenization_spaces=False,
297
+ max_length=2000,
298
+ )
299
+ content = response[0]["generated_text"][-1]["content"]
300
+ return content
301
 
302
 
303
  def respond(
 
310
 
311
  Return the history with the new message"""
312
  messages = format_history_as_messages(history)
313
+ if ZERO_GPU:
314
+ content = call_pipeline(messages, language)
315
+ else:
316
+ response = CLIENT.chat.completions.create(
317
+ messages=messages,
318
+ max_tokens=2000,
319
+ stream=False,
320
+ seed=seed,
321
+ temperature=temperature,
322
+ )
323
+ content = response.choices[0].message.content
324
  if ZERO_GPU:
325
  content = call_pipeline(messages, language)
326
  else: