Spaces:
Sleeping
Sleeping
File size: 5,509 Bytes
05f81dc 9bb02cd 42ac9eb 6cc33f4 05f81dc e64bd99 2ead8af 1afc11d 05f81dc 9bb02cd b2d7e3e 9bb02cd 42ac9eb 9bb02cd 05f81dc e64bd99 05f81dc 745476b 2ead8af 05f81dc 57940bb 05f81dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import pandas as pd
import streamlit as st
from difflib import SequenceMatcher
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from Levenshtein import distance as levenshtein_distance
ms = st.session_state
if "themes" not in ms:
ms.themes = {"current_theme": "light",
"refreshed": True,
"light": {"theme.base": "dark",
"theme.backgroundColor": "black",
"theme.primaryColor": "#c98bdb",
"theme.secondaryBackgroundColor": "#5591f5",
"theme.textColor": "white",
"theme.textColor": "white",
"button_face": "π"},
"dark": {"theme.base": "light",
"theme.backgroundColor": "white",
"theme.primaryColor": "#5591f5",
"theme.secondaryBackgroundColor": "#82E1D7",
"theme.textColor": "#0a1464",
"button_face": "π"},
}
def ChangeTheme():
previous_theme = ms.themes["current_theme"]
tdict = ms.themes["light"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]
for vkey, vval in tdict.items():
if vkey.startswith("theme"): st._config.set_option(vkey, vval)
ms.themes["refreshed"] = False
if previous_theme == "dark": ms.themes["current_theme"] = "light"
elif previous_theme == "light": ms.themes["current_theme"] = "dark"
btn_face = ms.themes["light"]["button_face"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]["button_face"]
st.button(btn_face, on_click=ChangeTheme)
if ms.themes["refreshed"] == False:
ms.themes["refreshed"] = True
st.rerun()
def read_csv_or_excel(file):
# Read CSV or Excel file
if file.name.endswith('.csv'):
return pd.read_csv(file)
elif file.name.endswith('.xlsx') or file.name.endswith('.xls'):
return pd.read_excel(file)
else:
raise ValueError("Unsupported file format. Only CSV and Excel files are supported.")
def find_exact_matches(df1, df2, column_name):
# Find rows with exact matches in the specified column
matches = pd.merge(df1, df2, on=column_name, how='inner')
return matches
def find_similar_texts(df1, df2, column_name, exact_matches, threshold=0.5):
# Find rows with similar texts in the specified column, excluding exact matches
similar_texts = []
exact_match_indices = set(exact_matches.index.tolist())
# Concatenate texts from both dataframes
all_texts = df1[column_name].astype(str).tolist() + df2[column_name].astype(str).tolist()
# Compute TF-IDF vectors
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_texts)
# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
# Iterate over pairs of rows to find similar texts
for i, row1 in df1.iterrows():
for j, row2 in df2.iterrows():
if i not in exact_match_indices and j not in exact_match_indices:
similarity = similarity_matrix[i, len(df1) + j]
if similarity >= threshold and similarity < 1: # Exclude exact matches
# Calculate Levenshtein distance between strings
distance = levenshtein_distance(row1[column_name], row2[column_name])
max_length = max(len(row1[column_name]), len(row2[column_name]))
similarity_score = 1 - (distance / max_length)
if similarity_score >= threshold:
similar_texts.append((i, j, row1[column_name], row2[column_name]))
return similar_texts
def main():
st.title("Item Comparison App")
# Upload files
st.header("Upload Files")
warehouse_file = st.file_uploader("Upload Warehouse Item Stocks (CSV or Excel)")
industry_file = st.file_uploader("Upload Industry Item Stocks (CSV or Excel)")
if warehouse_file is not None and industry_file is not None:
# Read files
warehouse_df = read_csv_or_excel(warehouse_file)
industry_df = read_csv_or_excel(industry_file)
# Get column names
warehouse_columns = warehouse_df.columns.tolist()
industry_columns = industry_df.columns.tolist()
# Select columns using dropdowns
st.header("Select Columns")
warehouse_column = st.selectbox("Choose column from warehouse item stocks:", warehouse_columns)
industry_column = st.selectbox("Choose column from industry item stocks:", industry_columns)
# Find exact matches
exact_matches = find_exact_matches(warehouse_df, industry_df, warehouse_column)
# Find similar texts
similar_texts = find_similar_texts(warehouse_df, industry_df, warehouse_column, exact_matches)
# Display results
st.header("Exact Matches")
st.write(exact_matches)
st.header("Similar Texts")
for text_pair in similar_texts:
st.write(f"Row {text_pair[0]} in warehouse item stocks is similar to Row {text_pair[1]} in industry item stocks:")
st.write(f"Warehouse: {text_pair[2]}")
st.write(f"Industry: {text_pair[3]}")
st.write("")
if __name__ == "__main__":
main()
|