import json import re import unicodedata from langflow.custom import Component from langflow.inputs import BoolInput, MessageTextInput from langflow.schema.message import Message from langflow.template import Output class JSONCleaner(Component): icon = "braces" display_name = "JSON Cleaner" description = ( "Cleans the messy and sometimes incorrect JSON strings produced by LLMs " "so that they are fully compliant with the JSON spec." ) inputs = [ MessageTextInput( name="json_str", display_name="JSON String", info="The JSON string to be cleaned.", required=True ), BoolInput( name="remove_control_chars", display_name="Remove Control Characters", info="Remove control characters from the JSON string.", required=False, ), BoolInput( name="normalize_unicode", display_name="Normalize Unicode", info="Normalize Unicode characters in the JSON string.", required=False, ), BoolInput( name="validate_json", display_name="Validate JSON", info="Validate the JSON string to ensure it is well-formed.", required=False, ), ] outputs = [ Output(display_name="Cleaned JSON String", name="output", method="clean_json"), ] def clean_json(self) -> Message: try: from json_repair import repair_json except ImportError as e: msg = "Could not import the json_repair package. Please install it with `pip install json_repair`." raise ImportError(msg) from e """Clean the input JSON string based on provided options and return the cleaned JSON string.""" json_str = self.json_str remove_control_chars = self.remove_control_chars normalize_unicode = self.normalize_unicode validate_json = self.validate_json start = json_str.find("{") end = json_str.rfind("}") if start == -1 or end == -1: msg = "Invalid JSON string: Missing '{' or '}'" raise ValueError(msg) try: json_str = json_str[start : end + 1] if remove_control_chars: json_str = self._remove_control_characters(json_str) if normalize_unicode: json_str = self._normalize_unicode(json_str) if validate_json: json_str = self._validate_json(json_str) cleaned_json_str = repair_json(json_str) result = str(cleaned_json_str) self.status = result return Message(text=result) except Exception as e: msg = f"Error cleaning JSON string: {e}" raise ValueError(msg) from e def _remove_control_characters(self, s: str) -> str: """Remove control characters from the string.""" return re.sub(r"[\x00-\x1F\x7F]", "", s) def _normalize_unicode(self, s: str) -> str: """Normalize Unicode characters in the string.""" return unicodedata.normalize("NFC", s) def _validate_json(self, s: str) -> str: """Validate the JSON string.""" try: json.loads(s) except json.JSONDecodeError as e: msg = f"Invalid JSON string: {e}" raise ValueError(msg) from e return s