Spaces:

cs70
/

test-excel

Sleeping

test-excel / app.py

Chiragkumar Savani

Fixes for name changes in columns for various excel files

42c790d 10 months ago

7.26 kB

	import pandas as pd
	from pandas.tseries.offsets import MonthEnd
	import gradio as gr
	import random

	# Function to generate a random light color
	def generate_random_light_color():
	min_brightness = 0.7
	while True:
	r, g, b = [random.randint(128, 255) for _ in range(3)]
	brightness = (r * 0.299 + g * 0.587 + b * 0.114) / 255
	if brightness >= min_brightness:
	return '#{:02x}{:02x}{:02x}'.format(*(r, g, b))


	# Function to set the background color of a specific cell
	def set_cell_color(styles_df, index, column, hex_color):
	styles_df.at[index, column] = f'background-color: {hex_color}'
	return styles_df

	# Function to calculate the threshold
	def calculate_threshold(value, is_high_price=True):
	if 0 <= value <= 200:
	return 0.20
	elif 201 <= value <= 500:
	return 0.70
	elif 501 <= value <= 1000:
	return 1.0
	elif 1001 <= value <= 2000:
	return 2.0
	elif 2001 <= value <= 3000:
	return 3.0
	elif 3001 <= value <= 4000:
	return 4.0
	elif 4001 <= value <= 5000:
	return 5.0
	else:
	return 5.0

	def last_thursday(dt):
	# Get the last day of the month
	last_day_of_month = dt + MonthEnd(0)
	# Calculate how many days to subtract to get the last Thursday
	offset = (last_day_of_month.weekday() - 3) % 7
	return last_day_of_month - pd.Timedelta(days=offset)


	def check_condition_passed(df, column_name, max_index, output_value, is_high = True):
	if is_high:
	filtered_df = df[(df.index < max_index) & (df[column_name] > output_value)]
	else:
	filtered_df = df[(df.index < max_index) & (df[column_name] < output_value)]

	if not filtered_df.empty:
	return True
	else:
	return False

	def get_output_value(value1, value2, is_high=False):
	if is_high:
	return max(int(value1), int(value2)) + 1
	else:
	return min(int(value1), int(value2)) - 1


	# Function to read CSV and generate Excel with modifications
	def process_csv(file):
	df = pd.read_csv(file)
	df.columns = df.columns.str.strip() # Remove trailing spaces from column names
	HIGH_NAME = "HIGH PRICE"
	if HIGH_NAME not in df.columns:
	HIGH_NAME = "HIGH"
	LOW_NAME = "LOW PRICE"
	if LOW_NAME not in df.columns:
	LOW_NAME = "LOW"

	DATE_NAME = "DATE"
	if DATE_NAME not in df.columns:
	DATE_NAME = "Date"

	# Add three empty columns between LOW PRICE and CLOSE PRICE
	low_price_index = df.columns.get_loc(LOW_NAME)
	df.insert(low_price_index + 1, 'HIGH Result', '')
	df.insert(low_price_index + 2, 'LOW Result', '')
	df.insert(low_price_index + 3, 'Empty Column', '')

	# Convert DATE to datetime
	df[DATE_NAME] = pd.to_datetime(df[DATE_NAME], format='%d-%b-%Y')

	# Detect the last Thursday of each month and insert an empty row after it
	df['Last_Thursday'] = df[DATE_NAME].apply(last_thursday)

	indices_to_insert = []

	for i in range(len(df)):
	if df.loc[i, DATE_NAME] == df.loc[i, 'Last_Thursday']:
	indices_to_insert.append(i)
	df['Separator'] = ''

	# Insert empty rows and update the Last_Thursday column
	for idx in reversed(indices_to_insert):
	# Insert an empty row
	df = pd.concat([df.iloc[:idx], pd.DataFrame([{'Separator': 'Separator'}]), df.iloc[idx:]]).reset_index(drop=True)

	price_columns = [HIGH_NAME, LOW_NAME]
	df[price_columns] = df[price_columns].replace({',': ''}, regex=True).apply(pd.to_numeric, errors='coerce')

	# Calculate global thresholds for HIGH PRICE and LOW PRICE columns
	high_price_threshold = calculate_threshold(df[HIGH_NAME].max(), is_high_price=True)
	low_price_threshold = calculate_threshold(df[LOW_NAME].min(), is_high_price=False)

	# Process HIGH PRICE and LOW PRICE columns
	def process_column(df, style_df, column_name, result_column_name, threshold):

	element_used = [False] * len(df[column_name])
	# for last_thurday_date, group in df.groupby('Last_Thursday', sort=False):
	grouped_df = df.groupby((df['Separator'] == 'Separator').cumsum())
	for group_name, group in grouped_df:
	group = group[group['Separator'] != 'Separator']
	rows = group.index.tolist()
	print(rows)
	for i in range(len(rows) - 1, -1, -1):
	if not element_used[rows[i]]:
	for j in range(i - 1, -1, -1):
	diff = abs(df.loc[rows[i], column_name] - df.loc[rows[j], column_name])
	if diff < threshold and not element_used[rows[j]]:
	output_value = get_output_value(df.loc[rows[i], column_name], df.loc[rows[j], column_name], 'high' in column_name.lower())
	# print(f"i {rows[i]} j {rows[j]} {column_name}")
	# print(f"{df.loc[rows[i], column_name]} {df.loc[rows[j], column_name]} diff {diff}, threshold: {threshold}, output value {output_value}")
	df.at[rows[j], result_column_name] = output_value

	element_used[rows[i]] = True
	element_used[rows[j]] = True
	color = generate_random_light_color()
	style_df = set_cell_color(style_df, index=rows[i], column=column_name, hex_color=color)
	style_df = set_cell_color(style_df, index=rows[j], column=column_name, hex_color=color)

	# check if there is higher or lower value, if yes, then colorize it
	response = check_condition_passed(df, column_name, rows[j], output_value, 'high' in column_name.lower())
	if response:
	style_df = set_cell_color(style_df, index=rows[j], column=result_column_name, hex_color=color)
	break

	# Create a dictionary to map column names to Excel letters
	column_letter_map = {v: k for k, v in enumerate(df.columns, start=1)}

	# Save to an Excel file and get the workbook
	style_df = pd.DataFrame('', index=df.index, columns=df.columns)
	output_file = file.replace(".csv", "_processed.xlsx")

	process_column(df, style_df, HIGH_NAME, 'HIGH Result', high_price_threshold)
	process_column(df, style_df, LOW_NAME, 'LOW Result', low_price_threshold)

	# add an empty row before the new month
	df[DATE_NAME] = df[DATE_NAME].dt.strftime('%d-%b-%Y')
	# df['Last_Thursday'] = df['Last_Thursday'].dt.strftime('%d-%b-%Y')

	styled_df = df.style.apply(lambda _: style_df, axis=None)

	styled_df.to_excel(output_file, engine='openpyxl', index=False)

	return output_file

	# Gradio Interface
	def gradio_interface(file):
	return process_csv(file)

	# Gradio app interface
	iface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.File(label="Upload CSV File (.csv)", file_count="single"),
	outputs=gr.File(label="Download Processed Excel File"),
	title="CSV to Excel Processor with Cell Highlighting",
	description="Upload a CSV file with stock data, and download a processed Excel file with highlighted cells."
	)

	if __name__ == "__main__":
	iface.launch()