Mattral commited on
Commit
6fd604e
·
verified ·
1 Parent(s): 6f3c1b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -61,7 +61,7 @@ def find_exact_match(df1, df2, column_name):
61
  matches = pd.merge(df1, df2, on=column_name, how='inner')
62
  return matches
63
 
64
- def find_similar_texts(df1, df2, column_name, threshold=0.3):
65
  # Find rows with similar texts in the specified column, including exact matches
66
  similar_texts = []
67
  exact_matches = []
@@ -92,7 +92,7 @@ def find_similar_texts(df1, df2, column_name, threshold=0.3):
92
  if similarity_score >= threshold:
93
  if similarity == 1: # Exact match
94
  exact_matches.append((i, j, row1[column_name], row2[column_name]))
95
- if similarity >= threshold and similarity < 1:
96
  similar_texts.append((i, j, row1[column_name], row2[column_name]))
97
 
98
  return similar_texts, exact_matches
 
61
  matches = pd.merge(df1, df2, on=column_name, how='inner')
62
  return matches
63
 
64
+ def find_similar_texts(df1, df2, column_name, threshold=0.4):
65
  # Find rows with similar texts in the specified column, including exact matches
66
  similar_texts = []
67
  exact_matches = []
 
92
  if similarity_score >= threshold:
93
  if similarity == 1: # Exact match
94
  exact_matches.append((i, j, row1[column_name], row2[column_name]))
95
+ if similarity >= threshold and similarity < 1: #remove same
96
  similar_texts.append((i, j, row1[column_name], row2[column_name]))
97
 
98
  return similar_texts, exact_matches