Spaces:
Sleeping
Sleeping
| import evaluate | |
| import datasets | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| _description = """Fluency Score is a metric to score an Arabic sentence based on its "Fluency". That means is it closer to the eloquent classical Arabic (1) or not (0). | |
| Examples of the sentences can be found in the evaluation metric's model card [Here](https://huggingface.co/Baleegh/Fluency_Score). | |
| """ | |
| class Fluency_Score(evaluate.Metric): | |
| def _info(self): | |
| return evaluate.MetricInfo( | |
| description=_description, | |
| citation="", | |
| inputs_description="", | |
| features=datasets.Features( | |
| { | |
| "texts": datasets.Value("string", id="sequence"), | |
| } | |
| ), | |
| reference_urls=['https://huggingface.co/Baleegh/Fluency_Score'], | |
| ) | |
| def _download_and_prepare(self, dl_manager, device=None): | |
| if device is None: | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Load the tokenizer and model from the specified repository | |
| self.tokenizer = AutoTokenizer.from_pretrained("Baleegh/Fluency_Score") | |
| self.model = AutoModelForSequenceClassification.from_pretrained("Baleegh/Fluency_Score") | |
| self.model.to(device) | |
| self.device = device | |
| def _compute(self, texts, temperature=2): | |
| device = self.device | |
| inputs = self.tokenizer( | |
| texts, | |
| return_tensors="pt", | |
| truncation=True, | |
| padding='max_length', | |
| max_length=128 | |
| ).to(device) | |
| with torch.inference_mode(): | |
| output = self.model(**inputs) | |
| prediction = output.logits.clip(0, 1) | |
| return {"classical_score": prediction} |