import torch from torchmetrics import BLEUScore, METEOR from transformers import AutoTokenizer, AutoModelForSeq2SeqLM class CodeEvaluator: def __init__(self, model_name): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model.to(self.device) self.bleu = BLEUScore() self.meteor = METEOR() def evaluate(self, nl_input, target_code): inputs = self.tokenizer(nl_input, return_tensors="pt").to(self.device) outputs = self.model.generate( **inputs, ) generated_code = self.tokenizer.decode(outputs[0], skip_special_tokens=True) bleu_score = self.bleu(generated_code, target_code) meteor_score = self.meteor(generated_code, target_code) return bleu_score, meteor_score if __name__ == "__main__": model_name = "S-Dreamer/PyCodeT5" evaluator = CodeEvaluator(model_name) nl_input = "Write a Python function to calculate the factorial of a number." target_code = """ def factorial(n): if n == 0: return 1 else: return n * factorial(n-1) """ bleu_score, meteor_score = evaluator.evaluate(nl_input, target_code) print(f"BLEU score: {bleu_score}") print(f"METEOR score: {meteor_score}")