Spaces:

maqiuping59
/

table_markdown

Running

App Files Files Community

maqiuping59 commited on May 26

Commit

980aa29

verified ·

1 Parent(s): 56dcd48

Update metric.py

Browse files

Files changed (1) hide show

metric.py +86 -57

metric.py CHANGED Viewed

@@ -2,6 +2,8 @@ import re
 import json
 import evaluate
 import datasets
 _DESCRIPTION = """
 Table evaluation metrics for assessing the matching degree between predicted and reference tables. It calculates the following metrics:
@@ -44,7 +46,19 @@ _CITATION = """
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class Accuracy(evaluate.Metric):
     def _info(self):
@@ -71,64 +85,65 @@ class Accuracy(evaluate.Metric):
         return None
-    def _table_to_dict(self,table_str):
-        result_dict = {}
         table_str = table_str.lstrip("|").rstrip("|")
         parts = table_str.split('||')
         parts = [part for part in parts if "--" not in part]
         legends = parts[0].split("|")
         rows = len(parts)
-        if rows == 2:
             nums = parts[1].split("|")
-            for i in range(len(nums)):
-                result_dict[legends[i]]=float(nums[i])
-        elif rows >=3:
-            for i in range(1,rows):
-                pre_row = parts[i]
-                pre_row = pre_row.split("|")
-                label = pre_row[0]
-                result_dict[label] = {}
-                for j in range(1,len(pre_row)):
-                    result_dict[label][legends[j-1]] = float(pre_row[j])
-        else:
-            return None
-        return result_dict
-    def _markdown_to_dict(self,markdown_str):
         table_str = self._extract_markdown_table(markdown_str)
         if table_str:
-            return self._table_to_dict(table_str)
-        else:
-            return None
-    def _calculate_table_metrics(self,pred_table, true_table):
-        true_positives = 0
-        false_positives = 0
-        false_negatives = 0
-        for key, pred_value in pred_table.items():
-            if key in true_table:
-                true_value = true_table[key]
-                if isinstance(pred_value, dict) and isinstance(true_value, dict):
-                    nested_metrics = self._calculate_table_metrics(pred_value, true_value)
-                    true_positives += nested_metrics['true_positives']
-                    false_positives += nested_metrics['false_positives']
-                    false_negatives += nested_metrics['false_negatives']
-                elif true_value == 0 and abs(pred_value) < 0.05:
-                    true_positives += 1
-                elif true_value != 0 and abs((pred_value - true_value) / true_value) < 0.05:
-                    true_positives += 1
-                else:
-                    false_positives += 1
-                    false_negatives += 1
-            else:
-                false_positives += 1
-        for key in true_table:
-            if key not in pred_table:
-                false_negatives += 1
         precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
         recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
@@ -146,22 +161,36 @@ class Accuracy(evaluate.Metric):
     def _compute(self, predictions, references):
         predictions = "".join(predictions)
         references = "".join(references)
-        return self._calculate_table_metrics(self._markdown_to_dict(predictions), self._markdown_to_dict(references))
 def main():
     accuracy_metric = Accuracy()
-    # 计算指标
-    results = accuracy_metric.compute(
         predictions=["""
-|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 5 | 8 | 7 | 5 | 9 || wage | 1 | 5 | 3 | 8 | 5 |
-"""],  # 预测的表格
         references=["""
-|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 1 | 6 | 7 | 5 | 9 || wage | 1 | 5 | 2 | 8 | 5 |
-"""],   # 参考的表格
     )
-    print(results)  # 输出结果
 if __name__ == '__main__':
     main()

 import json
 import evaluate
 import datasets
+from typing import Set, Tuple, List, Dict, Any
+from dataclasses import dataclass
 _DESCRIPTION = """
 Table evaluation metrics for assessing the matching degree between predicted and reference tables. It calculates the following metrics:
 """
+@dataclass(frozen=True)
+class TableCell:
+    labels: frozenset[str]  # Using frozenset for hashable unordered pair
+    value: float
+    def __eq__(self, other):
+        if not isinstance(other, TableCell):
+            return False
+        return self.labels == other.labels and abs(self.value - other.value) < 0.05
+    def __hash__(self):
+        return hash((self.labels, round(self.value, 3)))  # Round to handle float comparison
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class Accuracy(evaluate.Metric):
     def _info(self):
         return None
+    def _table_to_cell_set(self, table_str: str) -> Set[TableCell]:
+        """Convert markdown table string to a set of TableCell objects."""
+        result_set = set()
         table_str = table_str.lstrip("|").rstrip("|")
         parts = table_str.split('||')
         parts = [part for part in parts if "--" not in part]
+        if not parts:
+            return result_set
         legends = parts[0].split("|")
+        legends = [l.strip() for l in legends if l.strip()]
         rows = len(parts)
+        if rows == 2:  # Single row table - use single label
             nums = parts[1].split("|")
+            nums = [n.strip() for n in nums if n.strip()]
+            for i, num in enumerate(nums):
+                try:
+                    value = float(num)
+                    # For single row tables, use a single label
+                    cell = TableCell(frozenset([legends[i]]), value)
+                    result_set.add(cell)
+                except ValueError:
+                    continue
+        elif rows >= 3:  # Multi-row table - use label pairs
+            for i in range(1, rows):
+                row = parts[i].split("|")
+                row = [r.strip() for r in row if r.strip()]
+                if not row:
+                    continue
+                row_label = row[0]
+                for j, num in enumerate(row[1:], 1):
+                    if j >= len(legends):
+                        continue
+                    try:
+                        value = float(num)
+                        # For multi-row tables, use label pairs
+                        cell = TableCell(frozenset([row_label, legends[j-1]]), value)
+                        result_set.add(cell)
+                    except ValueError:
+                        continue
+        return result_set
+    def _markdown_to_cell_set(self, markdown_str: str) -> Set[TableCell]:
+        """Convert markdown string to a set of TableCell objects."""
         table_str = self._extract_markdown_table(markdown_str)
         if table_str:
+            return self._table_to_cell_set(table_str)
+        return set()
+    def _calculate_table_metrics(self, pred_cells: Set[TableCell], true_cells: Set[TableCell]) -> Dict[str, Any]:
+        """Calculate metrics using cell set comparison."""
+        true_positives = len(pred_cells.intersection(true_cells))
+        false_positives = len(pred_cells - true_cells)
+        false_negatives = len(true_cells - pred_cells)
         precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
         recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
     def _compute(self, predictions, references):
         predictions = "".join(predictions)
         references = "".join(references)
+        pred_cells = self._markdown_to_cell_set(predictions)
+        true_cells = self._markdown_to_cell_set(references)
+        return self._calculate_table_metrics(pred_cells, true_cells)
 def main():
     accuracy_metric = Accuracy()
+    # Test with different table formats
+    # Test 1: Single row table
+    results1 = accuracy_metric.compute(
+        predictions=["""
+|  | value1 | value2 | value3 ||--|--|--|--|| data | 1.01 | 2 | 3 |
+"""],
+        references=["""
+|  | value1 | value2 | value3 ||--|--|--|--|| data | 1 | 2 | 3 |
+"""],
+    )
+    print("Single row table test:", results1)
+    # Test 2: Multi-row table (transposed)
+    results2 = accuracy_metric.compute(
         predictions=["""
+|  | desire | wage ||--|--|--|| lobby | 5.01 | 1 || search | 8 | 5 || band | 7 | 3 || charge | 5 | 8 || chain | 9 | 5 |
+"""],
         references=["""
+|  | lobby | search | band | charge | chain ||--|--|--|--|--|--|| desire | 5.01 | 8 | 7 | 5 | 9 || wage | 1 | 5 | 3 | 8 | 5 |
+"""],
     )
+    print("Multi-row table test:", results2)
 if __name__ == '__main__':
     main()