JPLTedCas commited on
Commit
6d6e4bc
·
1 Parent(s): 26d858d

Upload 2 files

Browse files
Files changed (2) hide show
  1. MarkStreamlit.py +505 -0
  2. RemoveHTMLtags.py +34 -0
MarkStreamlit.py ADDED
@@ -0,0 +1,505 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ uploaded_file = st.file_uploader("Choose product file", type="csv")
5
+
6
+ if uploaded_file:
7
+ #df = pd.read_excel(uploaded_file)
8
+ df = pd.read_csv(uploaded_file, encoding='utf8')
9
+ #st.dataframe(df)
10
+
11
+ uploaded_file2 = st.file_uploader("Choose inventory file", type="csv")
12
+
13
+ if uploaded_file2:
14
+ #df2 = pd.read_excel(uploaded_file2)
15
+ df2 = pd.read_csv(uploaded_file2, encoding='utf8')
16
+
17
+ #st.dataframe(df2)
18
+
19
+ #st.table(df2)
20
+
21
+ def ConvertCitrus(df,df2):
22
+ # Load pandas
23
+ import re as re
24
+ import RemoveHTMLtags as RHT
25
+ #INPUT FILE
26
+
27
+ #df = pd.read_csv('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/Validation2/products_export_1 21-10-22.csv', encoding='utf8')
28
+
29
+
30
+ #df2 = pd.read_csv('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/Validation2/inventory_export_1 21-10-22.csv', encoding='utf8')
31
+ df.to_excel('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/Validation2/products_export_1.xlsx',index=False)
32
+ df2.to_excel('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/Validation2/inventory_export_1.xlsx',index=False)
33
+
34
+ tagsp=str('<style type=')+str('"')+str('"')+str('text/css')+str('"')+str('"')+str('><!--')
35
+ tags_list = ['<p class=','"p1"', 'data-mce-fragment="1">,','<b data-mce-fragment="1">','<i data-mce-fragment="1">','<p>' ,'</p>' , '<p*>',
36
+ '<ul>','</ul>',
37
+ '</i>','</b>','</p>','</br>',
38
+ '<li>','</li>',
39
+ '<br>',
40
+ '<strong>','</strong>',
41
+ '<span*>','</span>', '"utf-8"','UTF-8',
42
+ '<a href*>','</a>','<meta charset=utf-8>',';;',
43
+ '<em>','</em>','"','<meta charset=','utf-8>','<p>','<p','data-mce-fragment=1',';','<style type=','<style type=','><!--','text/css','<style type=\"\"text/css\"\"><!--','--></style>','td {border: 1px solid #ccc','}br {mso-data-placement:same-cell','}','>']
44
+
45
+
46
+
47
+
48
+ def remove_html_tags(text):
49
+ """Remove html tags from a string"""
50
+ import re
51
+ clean = re.compile('<.*?>')
52
+ return re.sub(clean, '', text)
53
+ #for tag in tags_list:
54
+ ## df['overview_copy'] = df['overview_copy'].str.replace(tag, '')
55
+ # df.replace(to_replace=tag, value='', regex=True, inplace=True)
56
+
57
+ for index, row in df.iterrows():
58
+ df.iloc[index,2]=RHT.remove_tags(str(df.iloc[index,2]))
59
+
60
+ print(df.iloc[:,2])
61
+
62
+ df.to_excel('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/Validation2/products_export_1-nohtml.xlsx')
63
+
64
+ #df.fillna('', inplace=True)
65
+ df.iloc[:,2] = pd.Series(df.iloc[:,2],dtype="string")
66
+ print(df.iloc[:,2].dtype)
67
+ #s = pd.Series(['a', 'b', 'c'], dtype="string")
68
+ #s.dtype
69
+
70
+ #CONVERT FORMATS
71
+
72
+ #Column A(0) – Ignore
73
+ #Column B(1) “Title” > Column B(1) “Product Name”
74
+ #Column C(2) – Ignore
75
+ #Column D(3) “Vendor” > Column K(10) “Brand”
76
+ #Column F(5) “Custom Product Type” > Column AF(31) “Short Description”
77
+ #Column J(9) “Option1 Value” > Column I(8) “Size 1”
78
+ #Column L(11) “Option2 Value” > Column H(7) > Colour
79
+ #Column M(12) - Ignore
80
+ #Column N(13) “Option 3 Value” > Column A(0) “Style Number”
81
+ #1. Problems in Column N. Some codes do not stay as a number when the Citrus Lime csv is re-opened (8.05652E+12 instead of 8056516179091) The saved csv keeps turning this column back to “general’ format column when I re-open it, even after I save it as number column. The upload must keep this as a number formatted column.
82
+
83
+ #Column O(14) - Ignore
84
+ #Column P(15) “Variant Grams” > Column AE (30) “Weight (grams)”
85
+ #Column R(17) “Variant Inventory Qty” > Column R (17) “Stock Count”. THIS IS THE KEY TO THE DAILY UPLOAD
86
+ #Column U(20) “Variant Price” > Column F (5) “Unit MSRP”
87
+
88
+ #Column Y > C&D
89
+ #################################################################################################
90
+ temp_cols=df.columns.tolist()
91
+ new_cols=temp_cols.copy()
92
+ new_cols[1]=temp_cols[1]
93
+
94
+ new_cols[17]=temp_cols[17]
95
+
96
+ #################################################################################################
97
+ #THERE IS NO EXISTING COLUMN ON THE SHOPIFY EXPORT TO DIRECTLY PROVIDE DATA FOR COLUMN E ON THE CITRUS LIME CSV (which is the wholesale price ex VAT to the retailer). However – Column U “ Variant Price” can provide the information for Column E with the following formula:
98
+
99
+ #((Column U/1.2)/1.6)*0.96
100
+
101
+ #Column Y “Variant Barcode” > Column C “Vendor SKU” (2) (and D "UPC/EAN" (3)??)
102
+
103
+ #There are 2 problems with converting Column Y to Column C.
104
+ #2. Shopify exports the UPC data and adds an apostrophe. This fails the SIM process. We need to get data without the apostrophe.
105
+ #3. Vendor SKU. The CSV file keeps switching the data to a non-number eg 8056516178308 shows as 8.05652E+12. The saved csv keeps turning this column to “general’ format column when I re-open it, even after I save it as number column. The upload must keep this as a number formatted column.
106
+
107
+ #This is where it gets complicated…
108
+
109
+ #Shopify exports the image file as https:// links in an odd way. Instead of attributing image 1, image 2, and image 3 etc in dedicated and separate columns, it spreads them across the sizes for the related product in the same column (Column Z “Image Src”). Column AA in the Shopify export csv just shows the image position instead. We need to find a solution. We need to be able to provide https// image links in separate columns for each product and size. For example, if a product has 3 images, these need to be converted into Citrus Lime CSV columns Column Z “Image 1”, Column AA “Image 2”, Column AB “Image 3”, Column AC “Image 4” etc.
110
+ #new_cols[4]=((temp_cols[20]/1.2)/1.96)*0.96
111
+
112
+ #Column C “Body (HTML)” > Column AG “Long Description” (32)
113
+
114
+
115
+ df_copy=df[new_cols].copy(deep=True)
116
+ print("SKU")
117
+ print(df.iloc[:,24])
118
+
119
+ local_df = df.copy(deep=True)
120
+
121
+ df_copy.iloc[:,0]=local_df.iloc[:,13].copy(deep=True)
122
+ df_copy.iloc[:,5]=local_df.iloc[:,20].copy(deep=True)
123
+ df_copy.iloc[:,7]=local_df.iloc[:,11].copy(deep=True)
124
+ #24 is variant Bar code
125
+ df_copy.iloc[:,2]=local_df.iloc[:,24].copy(deep=True)
126
+
127
+ df_copy.iloc[:,8]=local_df.iloc[:,9].copy(deep=True)
128
+ df_copy.iloc[:,10]=local_df.iloc[:,3].copy(deep=True)
129
+ df_copy.rename(columns={df_copy.columns[10]: 'Brand'},inplace=True)
130
+ df_copy.columns.values[10] = 'Brand'
131
+
132
+ df_copy.iloc[:,30]=local_df.iloc[:,15].copy(deep=True)
133
+ df_copy.iloc[:,31]=local_df.iloc[:,5].copy(deep=True)
134
+ df_copy.iloc[:,32]=local_df.iloc[:,2].copy(deep=True)
135
+
136
+ df_copy.rename(columns={df_copy.columns[8]: 'Size 1'},inplace=True)
137
+
138
+ print(list(df_copy.columns.values))
139
+
140
+ #WE CONVERT COLUMN 20 to numeric (in case it's read as string)
141
+ df_copy.iloc[:,20] = df_copy.iloc[:,20].astype(float)
142
+
143
+ df_copy.iloc[:,4]=(((df_copy.iloc[:,20]/1.2)/1.96)*0.96)
144
+ from babel.numbers import format_currency
145
+ df_copy.iloc[:,4] = df_copy.iloc[:,4].apply(lambda x: format_currency(x, currency="GBP", locale="en_GB"))
146
+ df_copy.iloc[:,5] = df_copy.iloc[:,5].apply(lambda x: format_currency(x, currency="GBP", locale="en_GB"))
147
+
148
+ print(((df_copy.iloc[:,20]/1.2)/1.96)*0.96)
149
+ #df_copy.iloc[:,2]=df_copy.iloc[:,2].str.replace("'","")
150
+ df_copy.iloc[:,2] = df_copy.iloc[:,2].astype(str).str.replace("'","")
151
+
152
+
153
+ #df_copy.iloc[:,24]=df_copy.iloc[:,24].str.replace("'","")
154
+ df_copy.iloc[:,24] = df_copy.iloc[:,24].astype(str).str.replace("'","")
155
+
156
+ print("SKU")
157
+ print(df_copy.iloc[:,2])
158
+
159
+
160
+
161
+
162
+
163
+
164
+ #rename specific column names
165
+
166
+ #df_copy.rename(columns = {'Variant Inventory Qty':'Stock Count','Variant Grams' : 'Weight (grams)'}, inplace = True)
167
+
168
+ #df_copy.rename(columns = {'Option2 Value':'Colour','Option1 Value' : 'Size 1'}, inplace = True)
169
+
170
+ #df_copy.rename(columns = {'Vendor':'Brand','Title' : 'Product Name'}, inplace = True)
171
+ #df_copy.rename(columns = {'Body (HTML)':'Long Description'}, inplace = True)
172
+
173
+ #df_copy.rename(columns={df_copy.columns[4]: 'Unit Cost'},inplace=True)
174
+
175
+
176
+ print(list(df_copy.columns.values))
177
+
178
+
179
+ #df_copy.rename(columns={df_copy.columns[31]: 'Short Description'},inplace=True)
180
+ #df_copy.rename(columns={df_copy.columns[2]: 'Vendor SKU'},inplace=True)
181
+ df_copy.rename(columns={df_copy.columns[6]: 'Colour Code (Simple Colour)'},inplace=True)
182
+ ##IN COLUMN H (6), WE HAVE SOME TAGS AND WE WANT TO GET THE TAG "MEN, WOMEN, LADY OR BOTH (UNISEX)"
183
+ #WE ARE GETTING THAT INFO BEFORE REMOVING DATA FROM 6
184
+ for index, row in df_copy.iterrows():
185
+ if index==0:
186
+ print(row['Colour Code (Simple Colour)'])
187
+ if " mens" in str(row['Colour Code (Simple Colour)']):
188
+ if " womens" in str(row['Colour Code (Simple Colour)']):
189
+ df_copy.iloc[index,12]="Unisex"
190
+ else:
191
+ df_copy.iloc[index,12]="Mens"
192
+
193
+ if " womens" in str(row['Colour Code (Simple Colour)']):
194
+ if " mens" in str(row['Colour Code (Simple Colour)']):
195
+ df_copy.iloc[index,12]="Unisex"
196
+ else:
197
+ df_copy.iloc[index,12]="Womens"
198
+ if " ladys" in str(row['Colour Code (Simple Colour)']):
199
+ df_copy.iloc[index,12]="Ladys"
200
+ if index==0:
201
+ print(row[12])
202
+ print(df_copy.iloc[:,12])
203
+
204
+
205
+
206
+ df_copy.iloc[:,6] = ""
207
+ #Style Number Product Name Vendor SKU UPC/EAN Unit Cost Unit MSRP Colour Code (Simple Colour) Colour
208
+ df_copy.rename(columns={df_copy.columns[0]: 'Style Number'},inplace=True)
209
+ df_copy.rename(columns={df_copy.columns[1]: 'Product Name'},inplace=True)
210
+ df_copy.rename(columns={df_copy.columns[2]: 'Vendor SKU'},inplace=True)
211
+ df_copy.rename(columns={df_copy.columns[3]: 'UPC/EAN'},inplace=True)
212
+ df_copy.rename(columns={df_copy.columns[4]: 'Unit Cost'},inplace=True)
213
+ df_copy.rename(columns={df_copy.columns[5]: 'Unit MSRP'},inplace=True)
214
+ df_copy.rename(columns={df_copy.columns[6]: 'Colour Code (Simple Colour)'},inplace=True)
215
+ print(df_copy.columns[6])
216
+ df_copy.rename(columns={df_copy.columns[7]: 'Colour'},inplace=True)
217
+ #Size 1 Size 2 Brand Year or Season Gender Manufacturer Part Code Other Barcode VAT Pack Qty
218
+ df_copy.rename(columns={df_copy.columns[8]: 'Size 1'},inplace=True)
219
+ df_copy.rename(columns={df_copy.columns[9]: 'Size 2'},inplace=True)
220
+ df_copy.rename(columns={df_copy.columns[10]: 'Brand'},inplace=True)
221
+ df_copy.rename(columns={df_copy.columns[11]: 'Year of Season'},inplace=True)
222
+ df_copy.rename(columns={df_copy.columns[12]: 'Gender'},inplace=True)
223
+ df_copy.rename(columns={df_copy.columns[13]: 'Manufacturer Part Code'},inplace=True)
224
+ df_copy.rename(columns={df_copy.columns[14]: 'Other Bar Code'},inplace=True)
225
+ df_copy.rename(columns={df_copy.columns[15]: 'VAT'},inplace=True)
226
+ df_copy.rename(columns={df_copy.columns[16]: 'Pack Qty'},inplace=True)
227
+ #Stock Count Price Band 1 Price Band 2 IE VAT Unit Cost in Euros MSRP in Euros
228
+ df_copy.rename(columns={df_copy.columns[17]: 'Stock Count'},inplace=True)
229
+ df_copy.rename(columns={df_copy.columns[18]: 'Price Band 1'},inplace=True)
230
+ df_copy.rename(columns={df_copy.columns[19]: 'Price Band 2'},inplace=True)
231
+ df_copy.rename(columns={df_copy.columns[20]: 'IE VAT'},inplace=True)
232
+ df_copy.rename(columns={df_copy.columns[21]: 'Unit Cost in Euros'},inplace=True)
233
+ df_copy.rename(columns={df_copy.columns[22]: 'MSRP in Euros'},inplace=True)
234
+ #Commodity Codes Country of Origin Image (multiple images can be added in separate columns if available)
235
+ df_copy.rename(columns={df_copy.columns[23]: 'Commodity Codes'},inplace=True)
236
+ df_copy.rename(columns={df_copy.columns[24]: 'Country of Origin'},inplace=True)
237
+ #Weight Short Description Long Description Video Link
238
+ df_copy.rename(columns={df_copy.columns[30]: 'Weight'},inplace=True)
239
+ df_copy.rename(columns={df_copy.columns[31]: 'Short Description'},inplace=True)
240
+ df_copy.rename(columns={df_copy.columns[32]: 'Long Description'},inplace=True)
241
+ df_copy.rename(columns={df_copy.columns[33]: 'Video Link'},inplace=True)
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+ df_copy.iloc[:,9] = ""
250
+
251
+ df_copy.iloc[:,13] = ""
252
+
253
+ df_copy.iloc[:,14] = ""
254
+
255
+ df_copy.iloc[:,16] = ""
256
+
257
+ df_copy.iloc[:,18] = ""
258
+
259
+ df_copy.iloc[:,19] = ""
260
+
261
+ df_copy.iloc[:,20] = ""
262
+
263
+ df_copy.iloc[:,21] = ""
264
+
265
+ df_copy.iloc[:,22] = ""
266
+ #df_copy.rename(columns={df_copy.columns[26]: 'Weight (Grams)'},inplace=True)
267
+
268
+ #df_copy.iloc[:,26] = ""
269
+
270
+ df_copy.iloc[:,33] = ""
271
+
272
+
273
+
274
+ #df_copy.iloc[:,5] = " "
275
+ df_copy.iloc[:,15] = "20"
276
+
277
+ print(list(df_copy.columns.values))
278
+
279
+ #Column Y in the export and this code should go into both Columns C and D in the conversion with the titles “Vendor SKU” and “UPC/EAN” It is replicated for a complicated reason that I won’t explain here, but Column Y in the export should go into both Column C and D in the conversion
280
+ df_copy.iloc[:,3] = df_copy.iloc[:,2]
281
+ df_copy.columns.values[10] = 'Brand'
282
+ df_copy.iloc[:,11] = ""
283
+ df_copy.iloc[:,22] = ""
284
+ #df_copy.rename(columns={df_copy.columns[30]: 'Weight (Grams)'},inplace=True)
285
+
286
+
287
+ print("SKU")
288
+ print(df_copy.iloc[:,2])
289
+
290
+
291
+ #DATA COMING FROM THE OTHER CSV FILE
292
+
293
+ df_copy.iloc[:,23] = ""
294
+
295
+
296
+ df_copy.iloc[:,24] = ""
297
+
298
+ #WARNING: HEADER IS IN SECOND ROW. WE DONT HAVE INTO ACCOUNT FIRST ROW
299
+ #df2 = pd.read_excel('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/inventory_export_12.xlsx',engine="openpyxl", header=1)
300
+
301
+
302
+ #WE HAVE TO REORDER COLUMNS COO and HS Code in df2 in order to match the index order of df
303
+ #list1=df_copy.set_index('Vendor SKU').T.to_dict('list')
304
+ #print(list1)
305
+ new_index=df['Variant SKU']
306
+ boolean = df['Variant SKU'].duplicated().any()
307
+ #print(boolean)
308
+ boolean = df2['SKU'].duplicated().any()
309
+ #print(boolean)
310
+ duplicateRows2 = df2[df2.duplicated(['SKU'],keep = False)]
311
+ #print(duplicateRows2['SKU'])
312
+
313
+ duplicateRows = df[df.duplicated(['Variant SKU'],keep = False)]
314
+ #print(duplicateRows)
315
+ #print(duplicateRows['Variant SKU'])
316
+ #print(new_index)
317
+ df2=df2.set_index('SKU')
318
+ #print(df2)
319
+ #i=df2.index
320
+ #for x in i:
321
+ # print(x)
322
+ df2.reindex(new_index)
323
+ #i=df2.index
324
+ #for x in i:
325
+ # print(x)
326
+ #print(df2)
327
+ #print(df2.index)
328
+ #df3 = pd.DataFrame(students, index=['a', 'b', 'c', 'd', 'e'])
329
+ #print("Original DataFrame: ")
330
+ #print(df)
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+
339
+ print("TERMINE")
340
+
341
+ df_copy.iloc[:,24] = df2.loc[:,'COO']
342
+ df_copy.iloc[:,23] = df2.loc[:,'HS Code']
343
+
344
+ df_copy['Commodity Codes']=df2['HS Code'].values
345
+ df_copy['Country of Origin']=df2['COO'].values
346
+
347
+
348
+ #print(df2.loc[:,'COO'])
349
+ #print(df2.loc[:,'HS Code'])
350
+ #print(df_copy.iloc[:,24])
351
+ #print(df_copy.iloc[:,23])
352
+ print("SKU")
353
+ print(df_copy.iloc[:,2])
354
+
355
+
356
+
357
+ #WE COMPLETE THE DATAFRMAE WITH DUMMY COLUMNS TILL THE MAXIMUM DESIRED NUMBER
358
+ header_list=[]
359
+ for i in range(49,58):
360
+ #df.insert(i, "Dummy", [], True)
361
+ header_list.append(str(i))
362
+ df_copy[str(i)]=''
363
+
364
+
365
+
366
+ column_indices=[]
367
+ for i in range(0,24):
368
+ column_indices.append(34+i)
369
+
370
+ #Tech Specs Size Chart Geometry Chart Frame Rear Shock Fork
371
+ #Headset Stem Handlebar Bar Tape / Grip Brakes Levers Brake Calipers Tyres Wheels Front Derailleur
372
+ #Rear Derailleur Shift Levers Chain Cassette Chainset Bottom Bracket Pedals Saddle Seatpost
373
+
374
+ old_names = df_copy.columns[column_indices]
375
+ new_names = ['Tech Specs','Size Chart','Geometry Chart','Frame', 'Rear Shock', 'Fork', 'Headset', 'Stem', 'Handlebar', 'Bar Tape / Grip', 'Brakes Levers', 'Brake Calipers', 'Tyres', 'Wheels', 'Front Derailleur', 'Rear Derailleur', 'Shift Levers' ,'Chain' ,'Cassette' ,'Chainset' ,'Bottom Bracket', 'Pedals', 'Saddle', 'Seatpost']
376
+ old_names = df_copy.columns[column_indices]
377
+ df_copy.rename(columns=dict(zip(old_names, new_names)), inplace=True)
378
+
379
+
380
+ df_copy.iloc[:,34:58]=''
381
+
382
+
383
+ print("SKUf")
384
+ print(df_copy.iloc[:,2])
385
+ #print(df_copy.iloc[:,3])
386
+
387
+ ## Rename all columns with list
388
+ #cols = ['Courses','Courses_Fee','Courses_Duration']
389
+ #df_copy.columns = cols
390
+ #print(df.columns)
391
+
392
+
393
+ ###################
394
+ #PUT IMAGES IN A SIGNLE ROW: WE LOOK FOR IMAGES COMING FROM COMMON NAMES
395
+ #Shopify exports the image file as https:// links in an odd way. Instead of attributing image 1, image 2, and image 3 etc in dedicated
396
+ #and separate columns, it spreads them across the sizes for the related product in the same column (Column Z “Image Src”).
397
+ #Column AA in the Shopify export csv just shows the image position instead. We need to find a solution.
398
+ #We need to be able to provide https// image links in separate columns for each product and size. For example, if a product has 3 images,
399
+ #these need to be converted into Citrus Lime CSV columns Column Z “Image 1”, Column AA “Image 2”, Column AB “Image 3”, Column AC “Image 4”
400
+ #etc
401
+ ####################
402
+ #region imagesRow2Column
403
+ #We get the list of rows with NAN data in Product Name column (same product name but different sizes (XS, XL...). Each of these rows has a image scr link
404
+ list_col=df_copy.loc[pd.isna(df_copy.loc[:,'Product Name']), :].index
405
+ images=df_copy.loc[list_col,'Image Src']
406
+ list_end=[]
407
+ for row in df_copy.index:
408
+ #NotNA gets rows where Product Name column has a name in it (first image and row where we should add the images)
409
+ if pd.notna(df_copy.loc[row,'Product Name']):
410
+ #print(df_copy.loc[row,'Product Name'])
411
+ rowNotNa=row
412
+ i=1
413
+ #j=1
414
+ list_img=[]
415
+ #WE INCLUDE IN THE LIST THE FIRST IMAGE
416
+ list_img.append(df_copy.loc[row,'Image Src'])
417
+ while pd.isna(df_copy.loc[row+i,'Product Name']) and row+i<len(df_copy.index)-1:
418
+ #WE ADD THE REST OF THE IMAGES (FOLLOWING ROWS)
419
+ if "http" in str(df_copy.loc[row+i,'Image Src']):
420
+ list_img.append(df_copy.loc[row+i,'Image Src'])
421
+ i=i+1
422
+ list_end.append(list_img)
423
+
424
+ #IN list_end WE HAVE ALL OF THE IMAGES FOR EACH PRODUCT NAME
425
+ index_nonnan=df_copy.loc[pd.notna(df_copy.loc[:,'Product Name']), :].index
426
+ max=0
427
+ for i in range(len(list_end)):
428
+ if max<len(list_end[i]):
429
+ max=len(list_end[i])
430
+ print("SKUf")
431
+ print(df_copy.iloc[:,2])
432
+
433
+ #WE CHANGE THE COLUMN NAME OF THE COLUMNS WHERE THERE ARE IMAGES: EACH COLUMN IS CALLED "Image x"
434
+ #We first delete old values in the Image columns
435
+ for j in range(max):
436
+ df_copy.iloc[:,25+j]=''
437
+
438
+ counter=0
439
+ for index in index_nonnan:
440
+ for j in range(len(list_end[counter])):
441
+
442
+
443
+ if list_end[counter][j]!='nan':
444
+ df_copy.iloc[index,25+j]=list_end[counter][j]
445
+ df_copy.rename(columns={df_copy.columns[25+j]: 'Image'+str(j+1)},inplace=True)
446
+
447
+ counter=counter+1
448
+ print("SKUf")
449
+ print(df_copy.iloc[:,2])
450
+ #WE HAVE TO FILL NAN ROWS (SAME PRODUCT BUT DIFFERENT SIZES) WITH THE SAME IMAGES THAT IN NON NAN ROWS (MAIN PRODUCT-SIZE)
451
+ listImages=[None] * max
452
+ list1=[None] * max
453
+ list2=[None] * max
454
+ list3=[None] * max
455
+ list4=[None] * max
456
+ list5=[None] * max
457
+ for index, row in df_copy.iterrows():
458
+ #NotNA gets rows where Product Name column has a name in it (first image and row where we should add the images)
459
+ #print(df_copy.iloc[index,1])
460
+ if pd.notna(df_copy.iloc[index,1]):
461
+ for j in range(0,max):
462
+ listImages[j]=str((df_copy.iloc[index,25+j]))
463
+ #list1[j]=str((df_copy.iloc[index,1+j]))
464
+ #list2[j]=str((df_copy.iloc[index,10+j]))
465
+ #list3[j]=str((df_copy.iloc[index,12+j]))
466
+ #list4[j]=str((df_copy.iloc[index,31+j]))
467
+ #list5[j]=str((df_copy.iloc[index,32+j]))
468
+ list1[j]=str((df_copy.iloc[index,1]))
469
+ list2[j]=str((df_copy.iloc[index,10]))
470
+ list3[j]=str((df_copy.iloc[index,12]))
471
+ list4[j]=str((df_copy.iloc[index,31]))
472
+ list5[j]=str((df_copy.iloc[index,32]))
473
+
474
+ else:
475
+ for j in range(0,max):
476
+ df_copy.iloc[index,25+j]=listImages[j]
477
+ #df_copy.iloc[index,1+j]=list1[j]
478
+ #df_copy.iloc[index,10+j]=list2[j]
479
+ #df_copy.iloc[index,12+j]=list3[j]
480
+ #df_copy.iloc[index,31+j]=list4[j]
481
+ #df_copy.iloc[index,32+j]=list5[j]
482
+ df_copy.iloc[index,1]=list1[j]
483
+ df_copy.iloc[index,10]=list2[j]
484
+ df_copy.iloc[index,12]=list3[j]
485
+ df_copy.iloc[index,31]=list4[j]
486
+ df_copy.iloc[index,32]=list5[j]
487
+
488
+ #endregion
489
+
490
+ print("SKUf")
491
+ print(df_copy.iloc[:,2])
492
+ #print(df_copy.iloc[:,3])
493
+
494
+ ###################################################################################
495
+ df_copy.to_excel('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/Validation2/OCCHIO-Cycle-Data-File_st.xlsx',index=False)
496
+
497
+
498
+
499
+ #df_copy.to_csv('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/Validation2/OCCHIO-Cycle-Data-File.csv',index=False, encoding='utf-8')
500
+ df_copy.to_csv('C:/Users/15572890/Desktop/I+D/MarksCsvConversion/Validation2/OCCHIO-Cycle-Data-File_st.csv',index=False, encoding='utf_8_sig')
501
+
502
+
503
+ if uploaded_file and uploaded_file2:
504
+ ConvertCitrus(df,df2)
505
+
RemoveHTMLtags.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import Module
2
+ from bs4 import BeautifulSoup
3
+
4
+ # HTML Document
5
+ HTML_DOC = """
6
+ <html>
7
+ <head>
8
+ <title> Geeksforgeeks </title>
9
+ <style>.call {background-color:black;} </style>
10
+ <script>getit</script>
11
+ </head>
12
+ <body>
13
+ is a
14
+ <div>Computer Science portal.</div>
15
+ </body>
16
+ </html>
17
+ """
18
+
19
+ # Function to remove tags
20
+ def remove_tags(html):
21
+
22
+ # parse html content
23
+ soup = BeautifulSoup(html, "html.parser")
24
+
25
+ for data in soup(['style', 'script']):
26
+ # Remove tags
27
+ data.decompose()
28
+
29
+ # return data by retrieving the tag content
30
+ return ' '.join(soup.stripped_strings)
31
+
32
+
33
+ # Print the extracted data
34
+ print(remove_tags(HTML_DOC))