from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import gradio as gr
import torch

# Model name
model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"  

# Use quantization (4-bit) to reduce memory usage
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16,  # Reduce precision
    bnb_4bit_use_double_quant=True,  # Further optimize memory
)

# Load model with optimizations
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"  # Automatically chooses best device (CPU/GPU)
)

# Function to generate comments
def generate_code_comments(code_snippet):
    prompt = f"### Code:\n{code_snippet}\n### Add meaningful comments to this code:\n"
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to("cuda" if torch.cuda.is_available() else "cpu")
    outputs = model.generate(**inputs, max_length=512)
    commented_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return commented_code

# Create Gradio interface
iface = gr.Interface(
    fn=generate_code_comments,
    inputs="text",
    outputs="text",
    title="AI Code Comment Generator",
    description="Enter a code snippet, and the AI will add meaningful comments.",
)

iface.launch()