File size: 2,342 Bytes
cc5b602
9531afd
 
 
6386510
9531afd
6386510
9531afd
 
 
e6367a7
9531afd
3eed0af
d381360
f6cebe3
 
 
 
8da4de5
9531afd
 
3eed0af
9531afd
 
 
e59867b
9531afd
61f72c5
e59867b
 
9531afd
e59867b
 
3eed0af
 
cc17248
9c6afad
 
 
 
 
27dc368
3eed0af
9531afd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51a7d9e
 
9531afd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import asyncio
import discord
from discord.ext import commands
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
DISCORD_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForCausalLM.from_pretrained(
    MODEL,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
    ignore_mismatched_sizes=True
)

intents = discord.Intents.default()
intents.message_content = True
bot = commands.Bot(command_prefix="!", intents=intents)

async def generate_response(message, history, system_prompt):
    conversation = [{"role": "system", "content": system_prompt}]
    for prompt, answer in history:
        conversation.extend([
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": answer},
        ])
    conversation.append({"role": "user", "content": message})

    inputs = tokenizer.apply_chat_template(
        conversation,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        output = model.generate(
            inputs,
            max_new_tokens=1024,
            do_sample=True,
            top_p=1.0,
            top_k=50,
            temperature=1.0,
            pad_token_id=0,
            eos_token_id=361
        )

    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response.split("Assistant:")[-1].strip()

@bot.event
async def on_ready():
    print(f"{bot.user} has connected to Discord!")

@bot.event
async def on_message(message):
    if message.author == bot.user:
        return

    if message.channel.id != DISCORD_CHANNEL_ID:
        return

    response = await generate_response(message.content, [], "You are EXAONE model from LG AI Research, a helpful assistant.")
    
    # Split the response into chunks of 2000 characters
    chunks = [response[i:i+2000] for i in range(0, len(response), 2000)]
    
    for chunk in chunks:
        await message.channel.send(chunk)

if __name__ == "__main__":
    import subprocess
    subprocess.Popen(["python", "web.py"])
    bot.run(DISCORD_TOKEN)