Tai Truong
fix readme
d202ada
import json
import re
import unicodedata
from langflow.custom import Component
from langflow.inputs import BoolInput, MessageTextInput
from langflow.schema.message import Message
from langflow.template import Output
class JSONCleaner(Component):
icon = "braces"
display_name = "JSON Cleaner"
description = (
"Cleans the messy and sometimes incorrect JSON strings produced by LLMs "
"so that they are fully compliant with the JSON spec."
)
inputs = [
MessageTextInput(
name="json_str", display_name="JSON String", info="The JSON string to be cleaned.", required=True
),
BoolInput(
name="remove_control_chars",
display_name="Remove Control Characters",
info="Remove control characters from the JSON string.",
required=False,
),
BoolInput(
name="normalize_unicode",
display_name="Normalize Unicode",
info="Normalize Unicode characters in the JSON string.",
required=False,
),
BoolInput(
name="validate_json",
display_name="Validate JSON",
info="Validate the JSON string to ensure it is well-formed.",
required=False,
),
]
outputs = [
Output(display_name="Cleaned JSON String", name="output", method="clean_json"),
]
def clean_json(self) -> Message:
try:
from json_repair import repair_json
except ImportError as e:
msg = "Could not import the json_repair package. Please install it with `pip install json_repair`."
raise ImportError(msg) from e
"""Clean the input JSON string based on provided options and return the cleaned JSON string."""
json_str = self.json_str
remove_control_chars = self.remove_control_chars
normalize_unicode = self.normalize_unicode
validate_json = self.validate_json
start = json_str.find("{")
end = json_str.rfind("}")
if start == -1 or end == -1:
msg = "Invalid JSON string: Missing '{' or '}'"
raise ValueError(msg)
try:
json_str = json_str[start : end + 1]
if remove_control_chars:
json_str = self._remove_control_characters(json_str)
if normalize_unicode:
json_str = self._normalize_unicode(json_str)
if validate_json:
json_str = self._validate_json(json_str)
cleaned_json_str = repair_json(json_str)
result = str(cleaned_json_str)
self.status = result
return Message(text=result)
except Exception as e:
msg = f"Error cleaning JSON string: {e}"
raise ValueError(msg) from e
def _remove_control_characters(self, s: str) -> str:
"""Remove control characters from the string."""
return re.sub(r"[\x00-\x1F\x7F]", "", s)
def _normalize_unicode(self, s: str) -> str:
"""Normalize Unicode characters in the string."""
return unicodedata.normalize("NFC", s)
def _validate_json(self, s: str) -> str:
"""Validate the JSON string."""
try:
json.loads(s)
except json.JSONDecodeError as e:
msg = f"Invalid JSON string: {e}"
raise ValueError(msg) from e
return s