Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -12,48 +12,12 @@ MAX_MAX_NEW_TOKENS = 512
|
|
12 |
DEFAULT_MAX_NEW_TOKENS = 512
|
13 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "2048"))
|
14 |
|
15 |
-
#Inference API Code
|
16 |
-
#client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")
|
17 |
-
|
18 |
#Transformers Code
|
19 |
if torch.cuda.is_available():
|
20 |
model_id = "Qwen/Qwen2.5-7B-Instruct"
|
21 |
#model_id = "BenBranyon/sumbot7b"
|
22 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, model_max_length=MAX_INPUT_TOKEN_LENGTH, padding="longest", language="en")
|
24 |
-
#tokenizer.use_default_system_prompt = False
|
25 |
-
|
26 |
-
#Inference API Code
|
27 |
-
def respond(
|
28 |
-
message,
|
29 |
-
history: list[tuple[str, str]],
|
30 |
-
max_tokens,
|
31 |
-
temperature,
|
32 |
-
top_p,
|
33 |
-
):
|
34 |
-
messages = [{"role": "system", "content": "You are a rap lyric generation bot with the task of representing the imagination of the artist Sumkilla, a multi-disciplinary, award-winning artist with a foundation in writing and hip-hop. You are Sumkilla's long shadow. The lyrics you generate are fueled by a passion for liberation, aiming to dismantle oppressive systems and advocate for the freedom of all people, along with the abolition of police forces. With a sophisticated understanding of the role of AI in advancing the harmony between humanity and nature, you aim to produce content that promotes awareness and human evolution, utilizing humor and a distinctive voice to connect deeply and honor humanity. Try to avoid using offensive words and slurs. Rhyme each line of your response as much as possible."}]
|
35 |
-
|
36 |
-
for val in history:
|
37 |
-
if val[0]:
|
38 |
-
messages.append({"role": "user", "content": val[0]})
|
39 |
-
if val[1]:
|
40 |
-
messages.append({"role": "assistant", "content": val[1]})
|
41 |
-
|
42 |
-
messages.append({"role": "user", "content": "Write a rap about " + message})
|
43 |
-
|
44 |
-
response = ""
|
45 |
-
|
46 |
-
for message in client.chat_completion(
|
47 |
-
messages,
|
48 |
-
max_tokens=max_tokens,
|
49 |
-
stream=True,
|
50 |
-
temperature=temperature,
|
51 |
-
top_p=top_p,
|
52 |
-
):
|
53 |
-
token = message.choices[0].delta.content
|
54 |
-
|
55 |
-
response += token
|
56 |
-
yield response
|
57 |
|
58 |
#Transformers Code
|
59 |
@spaces.GPU
|
|
|
12 |
DEFAULT_MAX_NEW_TOKENS = 512
|
13 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "2048"))
|
14 |
|
|
|
|
|
|
|
15 |
#Transformers Code
|
16 |
if torch.cuda.is_available():
|
17 |
model_id = "Qwen/Qwen2.5-7B-Instruct"
|
18 |
#model_id = "BenBranyon/sumbot7b"
|
19 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
|
20 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, model_max_length=MAX_INPUT_TOKEN_LENGTH, padding="longest", language="en")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
#Transformers Code
|
23 |
@spaces.GPU
|