Mattral commited on
Commit
db70d75
·
verified ·
1 Parent(s): 11e747b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -1
app.py CHANGED
@@ -7,6 +7,7 @@ from Levenshtein import distance as levenshtein_distance
7
  import matplotlib.pyplot as plt
8
  import seaborn as sns
9
 
 
10
  ms = st.session_state
11
  if "themes" not in ms:
12
  ms.themes = {"current_theme": "light",
@@ -56,13 +57,20 @@ def read_csv_or_excel(file):
56
  return pd.read_excel(file)
57
  else:
58
  raise ValueError("Unsupported file format. Only CSV and Excel files are supported.")
 
59
 
60
  def find_exact_match(df1, df2, column_name):
 
 
 
 
61
  # Find rows with exact matches in the specified column
62
  matches = pd.merge(df1, df2, on=column_name, how='inner')
63
  return matches
64
 
65
 
 
 
66
  def find_similar_texts(df1, df2, column_name, threshold=0.3):
67
  # Find rows with similar texts in the specified column, excluding exact matches
68
  similar_texts = []
@@ -108,6 +116,7 @@ def plot_correlation(df, column):
108
  return plt.gcf() # Return the matplotlib figure
109
 
110
  st.set_option('deprecation.showPyplotGlobalUse', False)
 
111
  def plot_correlation_matrix(df):
112
  # Filter for numeric columns, if the DataFrame has non-numeric columns
113
  numeric_df = df.select_dtypes(include=['number'])
@@ -160,6 +169,8 @@ def main():
160
  # Display exact matches
161
  st.header("Exact Matches Compare")
162
  for match in exact_matches:
 
 
163
  st.write(f"Row {match[0]} in warehouse item stocks is exactly the same as Row {match[1]} in industry item stocks:")
164
  st.write(f"Warehouse: {match[2]}")
165
  st.write(f"Industry: {match[3]}")
@@ -169,6 +180,9 @@ def main():
169
  # Display similar texts
170
  st.header("Similar (but Not Same) Texts")
171
  for text_pair in similar_texts:
 
 
 
172
  st.write(f"Row {text_pair[0]} in warehouse item stocks is similar to Row {text_pair[1]} in industry item stocks:")
173
  st.write(f"Warehouse: {text_pair[2]}")
174
  st.write(f"Industry: {text_pair[3]}")
@@ -202,4 +216,4 @@ def main():
202
  plot_correlation_matrix(industry_df)
203
 
204
  if __name__ == "__main__":
205
- main()
 
7
  import matplotlib.pyplot as plt
8
  import seaborn as sns
9
 
10
+
11
  ms = st.session_state
12
  if "themes" not in ms:
13
  ms.themes = {"current_theme": "light",
 
57
  return pd.read_excel(file)
58
  else:
59
  raise ValueError("Unsupported file format. Only CSV and Excel files are supported.")
60
+
61
 
62
  def find_exact_match(df1, df2, column_name):
63
+ # Ensure the column for merging has the same data type
64
+ df1[column_name] = df1[column_name].astype(str).str.strip()
65
+ df2[column_name] = df2[column_name].astype(str).str.strip()
66
+
67
  # Find rows with exact matches in the specified column
68
  matches = pd.merge(df1, df2, on=column_name, how='inner')
69
  return matches
70
 
71
 
72
+
73
+
74
  def find_similar_texts(df1, df2, column_name, threshold=0.3):
75
  # Find rows with similar texts in the specified column, excluding exact matches
76
  similar_texts = []
 
116
  return plt.gcf() # Return the matplotlib figure
117
 
118
  st.set_option('deprecation.showPyplotGlobalUse', False)
119
+
120
  def plot_correlation_matrix(df):
121
  # Filter for numeric columns, if the DataFrame has non-numeric columns
122
  numeric_df = df.select_dtypes(include=['number'])
 
169
  # Display exact matches
170
  st.header("Exact Matches Compare")
171
  for match in exact_matches:
172
+ warehouse_index = text_pair[0] + 2
173
+ industry_index = text_pair[1] + 2
174
  st.write(f"Row {match[0]} in warehouse item stocks is exactly the same as Row {match[1]} in industry item stocks:")
175
  st.write(f"Warehouse: {match[2]}")
176
  st.write(f"Industry: {match[3]}")
 
180
  # Display similar texts
181
  st.header("Similar (but Not Same) Texts")
182
  for text_pair in similar_texts:
183
+ warehouse_index = text_pair[0] + 2
184
+ industry_index = text_pair[1] + 2
185
+
186
  st.write(f"Row {text_pair[0]} in warehouse item stocks is similar to Row {text_pair[1]} in industry item stocks:")
187
  st.write(f"Warehouse: {text_pair[2]}")
188
  st.write(f"Industry: {text_pair[3]}")
 
216
  plot_correlation_matrix(industry_df)
217
 
218
  if __name__ == "__main__":
219
+ main()