import pandas as pd import streamlit as st from difflib import SequenceMatcher def read_csv_or_excel(file): # Read CSV or Excel file if file.name.endswith('.csv'): return pd.read_csv(file) elif file.name.endswith('.xlsx') or file.name.endswith('.xls'): return pd.read_excel(file) else: raise ValueError("Unsupported file format. Only CSV and Excel files are supported.") def find_exact_matches(df1, df2, column_name): # Find rows with exact matches in the specified column matches = pd.merge(df1, df2, on=column_name, how='inner') return matches def find_similar_texts(df1, df2, column_name, threshold=0.8): # Find rows with similar texts in the specified column similar_texts = [] for index1, row1 in df1.iterrows(): for index2, row2 in df2.iterrows(): similarity = SequenceMatcher(None, str(row1[column_name]), str(row2[column_name])).ratio() if similarity >= threshold: similar_texts.append((index1, index2, row1[column_name], row2[column_name])) return similar_texts def main(): st.title("Item Comparison App") # Upload files st.header("Upload Files") warehouse_file = st.file_uploader("Upload Warehouse Item Stocks (CSV or Excel)") industry_file = st.file_uploader("Upload Industry Item Stocks (CSV or Excel)") if warehouse_file is not None and industry_file is not None: # Read files warehouse_df = read_csv_or_excel(warehouse_file) industry_df = read_csv_or_excel(industry_file) # Get column names warehouse_columns = warehouse_df.columns.tolist() industry_columns = industry_df.columns.tolist() # Select columns using dropdowns st.header("Select Columns") warehouse_column = st.selectbox("Choose column from warehouse item stocks:", warehouse_columns) industry_column = st.selectbox("Choose column from industry item stocks:", industry_columns) # Find exact matches exact_matches = find_exact_matches(warehouse_df, industry_df, warehouse_column) # Find similar texts similar_texts = find_similar_texts(warehouse_df, industry_df, warehouse_column) # Display results st.header("Exact Matches") st.write(exact_matches) st.header("Similar Texts") for text_pair in similar_texts: st.write(f"Row {text_pair[0]} in warehouse item stocks is similar to Row {text_pair[1]} in industry item stocks:") st.write(f"Warehouse: {text_pair[2]}") st.write(f"Industry: {text_pair[3]}") st.write("") if __name__ == "__main__": main()