File size: 10,473 Bytes
89fb4b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import gradio as gr
import pandas as pd
import tempfile
import os
import requests
from openpyxl import load_workbook
from openpyxl.styles import Alignment

def auto_correct_names(series, threshold=90):
    try:
        from fuzzywuzzy import process, fuzz
    except ImportError:
        return series  # Fallback if fuzzywuzzy is not installed
    unique_names = series.dropna().unique()
    name_mapping = {}
    for name in unique_names:
        matches = process.extractBests(
            name, unique_names, 
            scorer=fuzz.token_sort_ratio, 
            score_cutoff=threshold
        )
        if matches:
            best_match = max(matches, key=lambda x: (x[1], list(series).count(x[0])))
            name_mapping[name] = best_match[0]
    return series.replace(name_mapping)

def adjust_excel_formatting(file_path):
    wb = load_workbook(file_path)
    ws = wb.active
    for col in ws.columns:
        max_length = 0
        col_letter = col[0].column_letter
        for cell in col:
            if cell.value:
                max_length = max(max_length, len(str(cell.value)))
            cell.alignment = Alignment(wrap_text=True)
        ws.column_dimensions[col_letter].width = max_length + 2
    wb.save(file_path)

def process_file_a_to_b(input_file, output_file):
    try:
        # Read the Excel file, skipping the first row (OVERNIGHT/MORNING header)
        input_df = pd.read_excel(input_file, header=1)
        
        # Get the date columns (all columns except the first one which contains model names)
        date_columns = input_df.columns[1:].tolist()
        
        # Melt the dataframe to long format
        df_long = input_df.melt(
            id_vars=[input_df.columns[0]],  # First column (Model names)
            var_name='DATE',
            value_name='CHATTER'
        )
        
        # Clean up the data
        df_long = df_long[df_long['CHATTER'].notna()]  # Remove empty cells
        df_long = df_long[df_long['CHATTER'] != '']    # Remove empty strings
        df_long = df_long[df_long['CHATTER'] != 'OFF'] # Remove 'OFF' entries
        
        # Group by chatter and date, collect all models
        grouped = df_long.groupby(['CHATTER', 'DATE'])[input_df.columns[0]].apply(
            lambda x: ', '.join(sorted(x))
        ).reset_index()
        
        # Pivot to get chatters as rows and dates as columns
        pivoted = grouped.pivot(
            index='CHATTER',
            columns='DATE',
            values=input_df.columns[0]
        )
        
        # Reorder columns to match original date order
        pivoted = pivoted[date_columns]
        
        # Define the expected order of chatters
        expected_chatters = [
            'VELJKO2', 'VELJKO3', 'MARKO', 'GODDARD', 'ALEKSANDER', 'FEELIP', 
            'DENIS', 'TOME', 'MILA', 'VELJKO', 'DAMJAN', 'DULE', 'CONRAD', 
            'ALEXANDER', 'VEJKO3'
        ]
        
        # Reindex with expected order, keeping any additional chatters at the end
        final_df = pivoted.reindex(expected_chatters + [x for x in pivoted.index if x not in expected_chatters])
        
        # Fill empty cells with 'OFF'
        final_df = final_df.fillna('OFF')
        
        # Reset index and rename the index column to 'CHATTER'
        final_df = final_df.reset_index()
        final_df = final_df.rename(columns={'index': 'CHATTER'})
        
        # Try to save to Excel, with error handling
        try:
            final_df.to_excel(output_file, index=False)
            print(f"Saved Format B to {output_file}")
        except PermissionError:
            # If file is in use, try a different name
            base, ext = os.path.splitext(output_file)
            new_output = f"{base}_new{ext}"
            final_df.to_excel(new_output, index=False)
            print(f"Original file was in use. Saved Format B to {new_output}")
    except Exception as e:
        print(f"Error processing file: {str(e)}")
        raise

def process_file_b_to_a(input_file, output_file):
    try:
        # Read the Excel file
        input_df = pd.read_excel(input_file, header=0)
        
        # Get the date columns (all columns except the first one which contains chatter names)
        date_columns = input_df.columns[1:].tolist()
        
        # Melt the dataframe to long format
        df_long = input_df.melt(
            id_vars=[input_df.columns[0]],  # First column (Chatter names)
            var_name='DATE',
            value_name='MODEL'
        )
        
        # Clean up the data
        df_long = df_long[df_long['MODEL'].notna()]  # Remove empty cells
        df_long = df_long[df_long['MODEL'] != '']    # Remove empty strings
        df_long = df_long[df_long['MODEL'] != 'OFF'] # Remove 'OFF' entries
        
        # Split comma-separated models into separate rows
        df_long['MODEL'] = df_long['MODEL'].str.split(', ')
        df_long = df_long.explode('MODEL')
        
        # Group by model and date, collect all chatters
        grouped = df_long.groupby(['MODEL', 'DATE'])[input_df.columns[0]].apply(
            lambda x: ', '.join(sorted(x))
        ).reset_index()
        
        # Pivot to get models as rows and dates as columns
        pivoted = grouped.pivot(
            index='MODEL',
            columns='DATE',
            values=input_df.columns[0]
        )
        
        # Reorder columns to match original date order
        pivoted = pivoted[date_columns]
        
        # Sort models by frequency
        model_order = grouped['MODEL'].value_counts().index.tolist()
        final_df = pivoted.reindex(model_order)
        
        # Fill empty cells with 'OFF'
        final_df = final_df.fillna('OFF')
        
        # Reset index to make MODEL a column
        final_df = final_df.reset_index()
        
        # Try to save to Excel, with error handling
        try:
            final_df.to_excel(output_file, index=False)
            print(f"Saved Format A to {output_file}")
        except PermissionError:
            # If file is in use, try a different name
            base, ext = os.path.splitext(output_file)
            new_output = f"{base}_new{ext}"
            final_df.to_excel(new_output, index=False)
            print(f"Original file was in use. Saved Format A to {new_output}")
    except Exception as e:
        print(f"Error processing file: {str(e)}")
        raise

def save_and_format_excel(df, original_filename):
    name_part, ext_part = os.path.splitext(os.path.basename(original_filename))
    processed_filename = f"{name_part}_processed{ext_part}"
    temp_file_path = os.path.join(tempfile.gettempdir(), processed_filename)
    df.to_excel(temp_file_path, index=False, sheet_name='Schedule')
    adjust_excel_formatting(temp_file_path)
    return temp_file_path

def get_local_file_from_path_or_url(path_or_url):
    if path_or_url.startswith('http://') or path_or_url.startswith('https://'):
        # Download the file
        response = requests.get(path_or_url, stream=True)
        response.raise_for_status()
        suffix = os.path.splitext(path_or_url)[-1] or '.xlsx'
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
            for chunk in response.iter_content(chunk_size=8192):
                tmp_file.write(chunk)
            return tmp_file.name
    else:
        return path_or_url

def process_file(input_file, direction):
    try:
        if direction == "Format A β†’ Format B":
            df = process_file_a_to_b(input_file, direction)
        else:
            df = process_file_b_to_a(input_file, direction)
        temp_file_path = save_and_format_excel(df, input_file)
        return df, temp_file_path
    except Exception as e:
        error_df = pd.DataFrame({"Error": [f"⚠️ {str(e)}"]})
        return error_df, None

def download_file(out_path):
    return out_path

with gr.Blocks(title="Schedule Converter") as demo:
    gr.Markdown("# πŸ“… Schedule Converter")
    gr.Markdown("Upload your schedule Excel file, select conversion direction, and download the result.")
    with gr.Row():
        input_file = gr.File(label="Upload Schedule File", type="filepath")
        direction = gr.Dropdown([
            "Format A β†’ Format B",
            "Format B β†’ Format A"
        ], value="Format A β†’ Format B", label="Conversion Direction")
    with gr.Row():
        process_btn = gr.Button("Process File", variant="primary")
        reset_btn = gr.Button("Upload New File")
    output_table = gr.Dataframe(label="Preview", wrap=True)
    download_button = gr.Button("Download Processed File", visible=False)
    temp_file_path = gr.State(value=None)
    def reset_components():
        return [None, pd.DataFrame(), None, gr.update(visible=False)]
    def process_and_show(file, direction):
        df, out_path = process_file(file, direction)
        if out_path:
            return df, out_path, gr.update(visible=True)
        return df, None, gr.update(visible=False)
    process_btn.click(
        process_and_show,
        inputs=[input_file, direction],
        outputs=[output_table, temp_file_path, download_button]
    )
    reset_btn.click(
        reset_components,
        outputs=[input_file, output_table, temp_file_path, download_button]
    )
    download_button.click(
        download_file,
        inputs=temp_file_path,
        outputs=gr.File(label="Processed Schedule")
    )
if __name__ == "__main__":
    print("Enter the path or URL to your Excel file:")
    file_path_or_url = input().strip()
    print("Enter output file path (e.g., D:/work/formatter/schedule_a.xlsx):")
    output_path = input().strip()
    print("Select conversion direction:")
    print("1: Format A β†’ Format B")
    print("2: Format B β†’ Format A")
    direction_input = input().strip()
    direction = "Format A β†’ Format B" if direction_input == "1" else "Format B β†’ Format A"
    
    try:
        local_file = get_local_file_from_path_or_url(file_path_or_url)
        if direction == "Format A β†’ Format B":
            process_file_a_to_b(local_file, output_path)
        else:
            process_file_b_to_a(local_file, output_path)
    except Exception as e:
        print(f"Error: {str(e)}")
        print("Please make sure:")
        print("1. The input file exists and is not open in Excel")
        print("2. The output file is not open in Excel")
        print("3. You have write permissions in the output directory")
    demo.launch()