test-excel / app.py
Chiragkumar Savani
Fixes for name changes in columns for various excel files
42c790d
raw
history blame
7.26 kB
import pandas as pd
from pandas.tseries.offsets import MonthEnd
import gradio as gr
import random
# Function to generate a random light color
def generate_random_light_color():
min_brightness = 0.7
while True:
r, g, b = [random.randint(128, 255) for _ in range(3)]
brightness = (r * 0.299 + g * 0.587 + b * 0.114) / 255
if brightness >= min_brightness:
return '#{:02x}{:02x}{:02x}'.format(*(r, g, b))
# Function to set the background color of a specific cell
def set_cell_color(styles_df, index, column, hex_color):
styles_df.at[index, column] = f'background-color: {hex_color}'
return styles_df
# Function to calculate the threshold
def calculate_threshold(value, is_high_price=True):
if 0 <= value <= 200:
return 0.20
elif 201 <= value <= 500:
return 0.70
elif 501 <= value <= 1000:
return 1.0
elif 1001 <= value <= 2000:
return 2.0
elif 2001 <= value <= 3000:
return 3.0
elif 3001 <= value <= 4000:
return 4.0
elif 4001 <= value <= 5000:
return 5.0
else:
return 5.0
def last_thursday(dt):
# Get the last day of the month
last_day_of_month = dt + MonthEnd(0)
# Calculate how many days to subtract to get the last Thursday
offset = (last_day_of_month.weekday() - 3) % 7
return last_day_of_month - pd.Timedelta(days=offset)
def check_condition_passed(df, column_name, max_index, output_value, is_high = True):
if is_high:
filtered_df = df[(df.index < max_index) & (df[column_name] > output_value)]
else:
filtered_df = df[(df.index < max_index) & (df[column_name] < output_value)]
if not filtered_df.empty:
return True
else:
return False
def get_output_value(value1, value2, is_high=False):
if is_high:
return max(int(value1), int(value2)) + 1
else:
return min(int(value1), int(value2)) - 1
# Function to read CSV and generate Excel with modifications
def process_csv(file):
df = pd.read_csv(file)
df.columns = df.columns.str.strip() # Remove trailing spaces from column names
HIGH_NAME = "HIGH PRICE"
if HIGH_NAME not in df.columns:
HIGH_NAME = "HIGH"
LOW_NAME = "LOW PRICE"
if LOW_NAME not in df.columns:
LOW_NAME = "LOW"
DATE_NAME = "DATE"
if DATE_NAME not in df.columns:
DATE_NAME = "Date"
# Add three empty columns between LOW PRICE and CLOSE PRICE
low_price_index = df.columns.get_loc(LOW_NAME)
df.insert(low_price_index + 1, 'HIGH Result', '')
df.insert(low_price_index + 2, 'LOW Result', '')
df.insert(low_price_index + 3, 'Empty Column', '')
# Convert DATE to datetime
df[DATE_NAME] = pd.to_datetime(df[DATE_NAME], format='%d-%b-%Y')
# Detect the last Thursday of each month and insert an empty row after it
df['Last_Thursday'] = df[DATE_NAME].apply(last_thursday)
indices_to_insert = []
for i in range(len(df)):
if df.loc[i, DATE_NAME] == df.loc[i, 'Last_Thursday']:
indices_to_insert.append(i)
df['Separator'] = ''
# Insert empty rows and update the Last_Thursday column
for idx in reversed(indices_to_insert):
# Insert an empty row
df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)
price_columns = [HIGH_NAME, LOW_NAME]
df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce')
# Calculate global thresholds for HIGH PRICE and LOW PRICE columns
high_price_threshold = calculate_threshold(df[HIGH_NAME].max(), is_high_price=True)
low_price_threshold = calculate_threshold(df[LOW_NAME].min(), is_high_price=False)
# Process HIGH PRICE and LOW PRICE columns
def process_column(df, style_df, column_name, result_column_name, threshold):
element_used = [False] * len(df[column_name])
# for last_thurday_date, group in df.groupby('Last_Thursday', sort=False):
grouped_df = df.groupby((df['Separator'] == 'Separator').cumsum())
for group_name, group in grouped_df:
group = group[group['Separator'] != 'Separator']
rows = group.index.tolist()
print(rows)
for i in range(len(rows) - 1, -1, -1):
if not element_used[rows[i]]:
for j in range(i - 1, -1, -1):
diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
if diff < threshold and not element_used[rows[j]]:
output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'high' in column_name.lower())
# print(f"i {rows[i]} j {rows[j]} {column_name}")
# print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
df.at[rows[j], result_column_name] = output_value
element_used[rows[i]] = True
element_used[rows[j]] = True
color = generate_random_light_color()
style_df = set_cell_color(style_df, index=rows[i], column=column_name, hex_color=color)
style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)
# check if there is higher or lower value, if yes, then colorize it
response = check_condition_passed(df, column_name, rows[j], output_value, 'high' in column_name.lower())
if response:
style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
break
# Create a dictionary to map column names to Excel letters
column_letter_map = {v: k for k, v in enumerate(df.columns, start=1)}
# Save to an Excel file and get the workbook
style_df = pd.DataFrame('', index=df.index, columns=df.columns)
output_file = file.replace(".csv", "_processed.xlsx")
process_column(df, style_df, HIGH_NAME, 'HIGH Result', high_price_threshold)
process_column(df, style_df, LOW_NAME, 'LOW Result', low_price_threshold)
# add an empty row before the new month
df[DATE_NAME] = df[DATE_NAME].dt.strftime('%d-%b-%Y')
# df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')
styled_df = df.style.apply(lambda _: style_df, axis=None)
styled_df.to_excel(output_file, engine='openpyxl', index=False)
return output_file
# Gradio Interface
def gradio_interface(file):
return process_csv(file)
# Gradio app interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.File(label="Upload CSV File (.csv)", file_count="single"),
outputs=gr.File(label="Download Processed Excel File"),
title="CSV to Excel Processor with Cell Highlighting",
description="Upload a CSV file with stock data, and download a processed Excel file with highlighted cells."
)
if __name__ == "__main__":
iface.launch()