File size: 5,509 Bytes
05f81dc
 
 
9bb02cd
 
42ac9eb
 
6cc33f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05f81dc
 
 
 
 
 
 
 
 
 
 
 
 
 
e64bd99
2ead8af
1afc11d
05f81dc
9bb02cd
 
 
b2d7e3e
9bb02cd
 
 
 
 
 
 
 
 
 
 
 
 
 
42ac9eb
 
 
 
 
 
9bb02cd
05f81dc
 
e64bd99
05f81dc
 
 
 
 
 
 
 
 
 
 
 
 
 
745476b
 
2ead8af
05f81dc
 
 
 
 
 
 
 
 
 
57940bb
05f81dc
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import pandas as pd
import streamlit as st
from difflib import SequenceMatcher
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from Levenshtein import distance as levenshtein_distance


ms = st.session_state
if "themes" not in ms: 
  ms.themes = {"current_theme": "light",
                    "refreshed": True,
                    
                    "light": {"theme.base": "dark",
                              "theme.backgroundColor": "black",
                              "theme.primaryColor": "#c98bdb",
                              "theme.secondaryBackgroundColor": "#5591f5",
                              "theme.textColor": "white",
                              "theme.textColor": "white",
                              "button_face": "🌜"},

                    "dark":  {"theme.base": "light",
                              "theme.backgroundColor": "white",
                              "theme.primaryColor": "#5591f5",
                              "theme.secondaryBackgroundColor": "#82E1D7",
                              "theme.textColor": "#0a1464",
                              "button_face": "🌞"},
                    }
  

def ChangeTheme():
  previous_theme = ms.themes["current_theme"]
  tdict = ms.themes["light"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]
  for vkey, vval in tdict.items(): 
    if vkey.startswith("theme"): st._config.set_option(vkey, vval)

  ms.themes["refreshed"] = False
  if previous_theme == "dark": ms.themes["current_theme"] = "light"
  elif previous_theme == "light": ms.themes["current_theme"] = "dark"


btn_face = ms.themes["light"]["button_face"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]["button_face"]
st.button(btn_face, on_click=ChangeTheme)

if ms.themes["refreshed"] == False:
  ms.themes["refreshed"] = True
  st.rerun()


def read_csv_or_excel(file):
    # Read CSV or Excel file
    if file.name.endswith('.csv'):
        return pd.read_csv(file)
    elif file.name.endswith('.xlsx') or file.name.endswith('.xls'):
        return pd.read_excel(file)
    else:
        raise ValueError("Unsupported file format. Only CSV and Excel files are supported.")

def find_exact_matches(df1, df2, column_name):
    # Find rows with exact matches in the specified column
    matches = pd.merge(df1, df2, on=column_name, how='inner')
    return matches


def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.5):
    # Find rows with similar texts in the specified column, excluding exact matches
    similar_texts = []
    exact_match_indices = set(exact_matches.index.tolist())
    
    # Concatenate texts from both dataframes
    all_texts = df1[column_name].astype(str).tolist() + df2[column_name].astype(str).tolist()
    
    # Compute TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(all_texts)
    
    # Compute cosine similarity matrix
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
    
    # Iterate over pairs of rows to find similar texts
    for i, row1 in df1.iterrows():
        for j, row2 in df2.iterrows():
            if i not in exact_match_indices and j not in exact_match_indices:
                similarity = similarity_matrix[i, len(df1) + j]
                if similarity >= threshold and similarity < 1:  # Exclude exact matches
                    # Calculate Levenshtein distance between strings
                    distance = levenshtein_distance(row1[column_name], row2[column_name])
                    max_length = max(len(row1[column_name]), len(row2[column_name]))
                    similarity_score = 1 - (distance / max_length)
                    if similarity_score >= threshold:
                        similar_texts.append((i, j, row1[column_name], row2[column_name]))
    
    return similar_texts


def main():
    st.title("Item Comparison App")

    # Upload files
    st.header("Upload Files")
    warehouse_file = st.file_uploader("Upload Warehouse Item Stocks (CSV or Excel)")
    industry_file = st.file_uploader("Upload Industry Item Stocks (CSV or Excel)")

    if warehouse_file is not None and industry_file is not None:
        # Read files
        warehouse_df = read_csv_or_excel(warehouse_file)
        industry_df = read_csv_or_excel(industry_file)

        # Get column names
        warehouse_columns = warehouse_df.columns.tolist()
        industry_columns = industry_df.columns.tolist()


        # Select columns using dropdowns
        st.header("Select Columns")
        warehouse_column = st.selectbox("Choose column from warehouse item stocks:", warehouse_columns)
        industry_column = st.selectbox("Choose column from industry item stocks:", industry_columns)

        # Find exact matches
        exact_matches = find_exact_matches(warehouse_df, industry_df, warehouse_column)

        # Find similar texts
        similar_texts = find_similar_texts(warehouse_df, industry_df, warehouse_column, exact_matches)

        # Display results
        st.header("Exact Matches")
        st.write(exact_matches)

        st.header("Similar Texts")
        for text_pair in similar_texts:
            st.write(f"Row {text_pair[0]} in warehouse item stocks is similar to Row {text_pair[1]} in industry item stocks:")
            st.write(f"Warehouse: {text_pair[2]}")
            st.write(f"Industry: {text_pair[3]}")
            st.write("")

if __name__ == "__main__":
    main()