asigalov61 commited on
Commit
6387673
·
verified ·
1 Parent(s): 32af644

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -10
app.py CHANGED
@@ -27,6 +27,7 @@ HF_TOKEN = os.getenv('HF_TOKEN')
27
  #==========================================================================================================
28
 
29
  def format_table_data(data_string):
 
30
  # Split the string into rows based on newlines
31
  rows = data_string.strip().split("\n")
32
 
@@ -37,21 +38,29 @@ def format_table_data(data_string):
37
  # Split each row into columns based on the separator '|' and strip extra spaces
38
  columns = row.split("|")
39
  formatted_row = [cell.strip() for cell in columns]
40
-
41
- # Handle uneven rows by ensuring each row has the same number of columns
42
- max_columns = max(len(columns) for columns in formatted_data) if formatted_data else len(columns)
43
- while len(formatted_row) < max_columns:
44
- formatted_row.append("") # Add empty strings to fill the row
45
-
46
  formatted_data.append(formatted_row)
47
 
48
- # Handle case where new rows have more columns than previous rows
49
- max_columns = max(len(row) for row in formatted_data)
50
- for row in formatted_data:
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  while len(row) < max_columns:
52
  row.append("") # Add empty strings to fill the row
53
 
54
- return formatted_data
55
 
56
  #==========================================================================================================
57
 
 
27
  #==========================================================================================================
28
 
29
  def format_table_data(data_string):
30
+
31
  # Split the string into rows based on newlines
32
  rows = data_string.strip().split("\n")
33
 
 
38
  # Split each row into columns based on the separator '|' and strip extra spaces
39
  columns = row.split("|")
40
  formatted_row = [cell.strip() for cell in columns]
41
+
 
 
 
 
 
42
  formatted_data.append(formatted_row)
43
 
44
+ # Determine the minimum and maximum number of elements that a column must have
45
+ min_elements = len(formatted_data) * 0.5 # For example, at least half the rows
46
+ max_elements = len(formatted_data) * 1.5 # For example, no more than 1.5 times the number of rows
47
+
48
+ # Transpose the data to work with columns
49
+ transposed_data = list(map(list, zip(*formatted_data)))
50
+
51
+ # Filter out outlier columns
52
+ filtered_columns = [col for col in transposed_data if min_elements <= len(col) <= max_elements]
53
+
54
+ # Transpose the data back to the original format
55
+ filtered_data = list(map(list, zip(*filtered_columns)))
56
+
57
+ # Handle uneven rows by ensuring each row has the same number of columns
58
+ max_columns = max(len(row) for row in filtered_data)
59
+ for row in filtered_data:
60
  while len(row) < max_columns:
61
  row.append("") # Add empty strings to fill the row
62
 
63
+ return filtered_data
64
 
65
  #==========================================================================================================
66