Section: Merge multiple Supplier Data Files
All code in one block
#!/usr/bin/env python # coding: utf-8# # Section: Merge multiple Supplier Data Files ## In[1]:# if there is a need to merge multiple files — use this block import os; import glob; import pandas as pd;# supplier data files/feeds are kept here data_folder = ‘data-supplier-2019–04–14/supplier-raw-data/’; os.chdir(data_folder);# In[6]:# show all data feed file name # file extension for supplier data file extension = ‘csv’; all_filenames = [i for i in glob.glob(‘*.{}’.format(extension))] all_filenames# In[7]:# total number of rows combined all data files/feeds row_total_count = 0 for f in all_filenames: df_s = pd.read_csv(f) print(df_s.shape, f) row_total_count += df_s.shape[0] row_total_count # print(row_total_count)# In[8]:# combine all files in the list combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames]); combined_csv.shape# In[10]:# export combined data to a csv file combined_csv.to_csv( “../all_supplier_products_2019_04_14.csv”, index=False, encoding=’utf-8-sig’)# In[13]:# read csv data file and show data on the screen df = pd.read_csv(‘../all_supplier_products_2019_04_14.csv’); df.head()
The following is from Jupyter Notebook: Cell By Cell Display. Output data are also shown
In [1]:
# if there is a need to merge multiple files -- use this blockimport os;import glob;import pandas as pd;# supplier data files/feeds are kept heredata_folder = 'data-supplier-2019-04-14/supplier-raw-data/';os.chdir(data_folder);
In [6]:
# show all data feed file name# file extension for supplier data fileextension = 'csv';all_filenames = [i for i in glob.glob('*.{}'.format(extension))]all_filenames
Out[6]:
['data_feeds_5e95c25a1f7f6.csv',
'data_feeds_5e95c2962d471.csv',
'data_feeds_5e95c2d255409.csv',
'data_feeds_5e95c30e63423.csv',
'data_feeds_5e95c38646478.csv',
'data_feeds_5e95c5dd76370.csv']
In [7]:
# total number of rows combined all data files/feedsrow_total_count = 0for f in all_filenames:df_s = pd.read_csv(f)print(df_s.shape, f)row_total_count += df_s.shape[0]row_total_count # print(row_total_count)(8058, 40) data_feeds_5e95c25a1f7f6.csv (7, 40) data_feeds_5e95c2962d471.csv (1, 40) data_feeds_5e95c2d255409.csv... .... (1072, 40) data_feeds_5e95c565d6e30.csv (4833, 40) data_feeds_5e95c5dd76370.csv
Out[7]:
55690
In [8]:
# combine all files in the listcombined_csv = pd.concat([pd.read_csv(f) for f in all_filenames]);combined_csv.shape
Out[8]:
(55690, 40)
In [10]:
# export combined data to a csv filecombined_csv.to_csv( "../all_supplier_products_2019_04_14.csv", index=False, encoding='utf-8-sig')
In [13]:
df = pd.read_csv('../all_supplier_products_2019_04_14.csv');df.head()
Out[13]:
Product ID Model Code Full Product NameShort Product NameProduct URLCategory NameCategory URLSubcategory NameSubcategory URLDate Product Was Launched…Related ProductsRelated AccessoriesWeight KgHeight mmWidth mmDepth mmVideo linkRetail PriceStock statusDate Back0107890POU_0850GV7YPull Rope Fitness Exercises Resistance Bands L…Pull Rope Fitness
***. ***. ***
Note: Older short-notes from this site are posted on Medium: https://medium.com/@SayedAhmedCanada
*** . *** *** . *** . *** . ***
Sayed Ahmed
BSc. Eng. in Comp. Sc. & Eng. (BUET)
MSc. in Comp. Sc. (U of Manitoba, Canada)
MSc. in Data Science and Analytics (Ryerson University, Canada)
Linkedin: https://ca.linkedin.com/in/sayedjustetc
Blog: http://Bangla.SaLearningSchool.com, http://SitesTree.com
Training Courses: http://Training.SitesTree.com
8112223 Canada Inc/Justetc: http://JustEtc.net
Facebook Groups/Forums to discuss (Q & A):
https://www.facebook.com/banglasalearningschool
https://www.facebook.com/justetcsocial
Get access to courses on Big Data, Data Science, AI, Cloud, Linux, System Admin, Web Development and Misc. related. Also, create your own course to sell to others. http://sitestree.com/training/
Build Ecommerce Software and Systems
Build Ecommerce Software and Systems
WRITTEN BY

