WebashalarForML commited on
Commit
bcc5f4e
·
verified ·
1 Parent(s): 6e27f24

Update utils/mistral.py

Browse files
Files changed (1) hide show
  1. utils/mistral.py +10 -0
utils/mistral.py CHANGED
@@ -39,6 +39,16 @@ def Data_Cleaner(text):
39
  return text # Return the original text if cleaning goes wrong
40
  '''
41
  def Data_Cleaner(text):
 
 
 
 
 
 
 
 
 
 
42
  # Use a regex pattern to extract JSON if it exists within ```json and ```
43
  pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
44
  match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
 
39
  return text # Return the original text if cleaning goes wrong
40
  '''
41
  def Data_Cleaner(text):
42
+ """
43
+ Preprocess the JSON string to remove extra spaces, tabs, and newlines.
44
+ """
45
+ # Remove leading and trailing whitespace
46
+ text = text.strip()
47
+ # Remove unnecessary newlines and tabs
48
+ text = re.sub(r'\s*\n\s*', ' ', text) # Replace newlines with a space
49
+ text = re.sub(r'\s+', ' ', text) # Replace multiple spaces with a single space
50
+ text = re.sub(r'\s*([{}:,])\s*', r'\1', text) # Remove spaces around braces, colons, and commas
51
+
52
  # Use a regex pattern to extract JSON if it exists within ```json and ```
53
  pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
54
  match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines