Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| # TODO: move to a config file | |
| DEFAULT_MODEL = "Hello-SimpleAI/chatgpt-detector-roberta" | |
| MODEL_HUMAN_LABEL = {DEFAULT_MODEL: "Human"} | |
| HUMAN = "HUMAN" | |
| MACHINE = "MACHINE" | |
| UNKNOWN = "UNKNOWN" | |
| PARAPHRASE = "PARAPHRASE" | |
| NON_PARAPHRASE = "NON_PARAPHRASE" | |
| def detect_text_by_ai_model( | |
| input_text: str, | |
| model: str = DEFAULT_MODEL, | |
| max_length: int = 512, | |
| ) -> tuple: | |
| """ | |
| Model: chatgpt_detector_roberta | |
| Ref: https://huggingface.co/Hello-SimpleAI/chatgpt-detector-roberta | |
| Detects if text is human or machine generated. | |
| Returns: | |
| tuple: (label, confidence_score) | |
| where label is HUMAN or MACHINE. | |
| """ | |
| try: | |
| pipe = pipeline( | |
| "text-classification", | |
| model=model, | |
| tokenizer=model, | |
| max_length=max_length, | |
| truncation=True, | |
| device_map="auto", # good for GPU usage | |
| ) | |
| input_text = input_text.replace("<br>", " ") | |
| result = pipe(input_text)[0] | |
| confidence_score = result["score"] | |
| if result["label"] == MODEL_HUMAN_LABEL[model]: | |
| label = HUMAN | |
| else: | |
| label = MACHINE | |
| return label, confidence_score | |
| except Exception as e: # Add exception handling | |
| print(f"Error in Roberta model inference: {e}") | |
| return UNKNOWN, 0.5 # Return UNKNOWN and 0.0 confidence if error | |