import os
import asyncio
import discord
from discord.ext import commands
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
DISCORD_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForCausalLM.from_pretrained(
    MODEL,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
    ignore_mismatched_sizes=True
)

intents = discord.Intents.default()
intents.message_content = True
bot = commands.Bot(command_prefix="!", intents=intents)

async def generate_response(message, history, system_prompt):
    conversation = [{"role": "system", "content": system_prompt}]
    for prompt, answer in history:
        conversation.extend([
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": answer},
        ])
    conversation.append({"role": "user", "content": message})

    inputs = tokenizer.apply_chat_template(
        conversation,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        output = model.generate(
            inputs,
            max_new_tokens=1024,
            do_sample=True,
            top_p=1.0,
            top_k=50,
            temperature=1.0,
            pad_token_id=0,
            eos_token_id=361
        )

    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response.split("Assistant:")[-1].strip()

@bot.event
async def on_ready():
    print(f"{bot.user} has connected to Discord!")

@bot.event
async def on_message(message):
    if message.author == bot.user:
        return

    if message.channel.id != DISCORD_CHANNEL_ID:
        return

    response = await generate_response(message.content, [], "You are EXAONE model from LG AI Research, a helpful assistant.")
    
    # Split the response into chunks of 2000 characters
    chunks = [response[i:i+2000] for i in range(0, len(response), 2000)]
    
    for chunk in chunks:
        await message.channel.send(chunk)

if __name__ == "__main__":
    import subprocess
    subprocess.Popen(["python", "web.py"])
    bot.run(DISCORD_TOKEN)