import os import asyncio import discord from discord.ext import commands import torch from transformers import AutoModelForCausalLM, AutoTokenizer MODEL = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct" DISCORD_TOKEN = os.getenv("DISCORD_TOKEN") DISCORD_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID")) device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained(MODEL) model = AutoModelForCausalLM.from_pretrained( MODEL, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True, ignore_mismatched_sizes=True ) intents = discord.Intents.default() intents.message_content = True bot = commands.Bot(command_prefix="!", intents=intents) async def generate_response(message, history, system_prompt): conversation = [{"role": "system", "content": system_prompt}] for prompt, answer in history: conversation.extend([ {"role": "user", "content": prompt}, {"role": "assistant", "content": answer}, ]) conversation.append({"role": "user", "content": message}) inputs = tokenizer.apply_chat_template( conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt" ).to(device) with torch.no_grad(): output = model.generate( inputs, max_new_tokens=1024, do_sample=True, top_p=1.0, top_k=50, temperature=1.0, pad_token_id=0, eos_token_id=361 ) response = tokenizer.decode(output[0], skip_special_tokens=True) return response.split("Assistant:")[-1].strip() @bot.event async def on_ready(): print(f"{bot.user} has connected to Discord!") @bot.event async def on_message(message): if message.author == bot.user: return if message.channel.id != DISCORD_CHANNEL_ID: return response = await generate_response(message.content, [], "You are EXAONE model from LG AI Research, a helpful assistant.") # Split the response into chunks of 2000 characters chunks = [response[i:i+2000] for i in range(0, len(response), 2000)] for chunk in chunks: await message.channel.send(chunk) if __name__ == "__main__": import subprocess subprocess.Popen(["python", "web.py"]) bot.run(DISCORD_TOKEN)