I found these tips to embedding Jupyter Notebooks hosted on Github from these great posts.
Master Post: https://www.andrewchallis.co.uk/portfolio/php-nbconvert-a-wordpress-plugin-for-jupyter-notebooks/
Supporting post: https://www.eg.bucknell.edu/~brk009/notebook-on-wp/
Installation Steps:
- WP Pusher install
- Install the Github plugin to WP Pusher
- https://github.com/ghandic/nbconvert
- Add the additional CSS code
- Insert the Shortcode like the one below.
- Celebrate
Personal Activity Tracking Data Analysis by Ivaylo Pavlov¶
THIS IS TEXT TO SEE IF COLAB PICKS IT UP
In [1]:
#Generic Imports and Display Settings
import numpy as np, pandas as pd, matplotlib.pyplot as plt, matplotlib.patches as mpatches, warnings
import seaborn as sns, scipy.stats as ss, matplotlib.mlab as mlab
warnings.filterwarnings('ignore')
%pylab inline
plt.rc("savefig", dpi=200)
matplotlib.style.use('ggplot')
pd.set_option('display.max_colwidth',80)
Import and clean the daily data and transform the heart rate intraday data for use¶
In [2]:
#Import the CSV files with the daily and intraday data
raw_daily_data = pd.read_csv("Health Data-daily.csv")
raw_intraday = pd.read_csv('Health Data-intraday.csv')
raw_sleep_data = pd.read_csv('Sleep Analysis.csv')
#Extract only the heart rate from the intraday data and the delete the rest, add a day only column
raw_intraday_data = raw_intraday.copy()
raw_intraday_data.index = raw_intraday_data['Start']
raw_intraday_data = raw_intraday_data.ix[:,'Heart Rate (count/min)']
raw_intraday_data = pd.DataFrame(raw_intraday_data)
raw_intraday_data.index = pd.to_datetime(raw_intraday_data.index)
raw_intraday_data = raw_intraday_data.ix[raw_intraday_data.index>'11-Mar-2016 18:00']
raw_intraday_data = raw_intraday_data.replace(0, np.nan)
raw_intraday_data = raw_intraday_data.dropna(axis=0)
raw_intraday_data['Date'] = raw_intraday_data.index.date
hr_intraday = raw_intraday_data
#Extract only the steps from the intraday data and the delete the rest, add a day only column
raw_intraday_data2 = raw_intraday.copy()
raw_intraday_data2 = raw_intraday_data2.ix[:,('Start','Finish','Steps (count)')]
raw_intraday_data2 = pd.DataFrame(raw_intraday_data2)
raw_intraday_data2.index = pd.to_datetime(raw_intraday_data2['Start'])
#raw_intraday_data2 = raw_intraday_data2.ix[raw_intraday_data2.index>'11-Mar-2016 18:00']
raw_intraday_data2 = raw_intraday_data2.replace(0, np.nan)
raw_intraday_data2 = raw_intraday_data2.dropna(axis=0)
raw_intraday_data2['Date'] = raw_intraday_data2.index.date
raw_intraday_data2['Hour'] = raw_intraday_data2.index.time
raw_intraday_data2['Weekday'] = pd.DatetimeIndex(raw_intraday_data2['Date']).dayofweek
days = {0:'0 Mon',1:'1 Tue',2:'2 Weds',3:'3 Thurs',4:'4 Fri',5:'5 Sat',6:'6 Sun'}
raw_intraday_data2['Weekday'] = raw_intraday_data2['Weekday'].apply(lambda x: days[x])
steps_intraday = raw_intraday_data2
#Extract only the heart rate from the intraday data and the delete the rest, add a day only column
raw_intraday_data3 = raw_intraday.copy()
raw_intraday_data3 = raw_intraday_data3.ix[:,('Start','Finish','Heart Rate (count/min)')]
raw_intraday_data3 = pd.DataFrame(raw_intraday_data3)
raw_intraday_data3.index = pd.to_datetime(raw_intraday_data3['Start'])
raw_intraday_data3 = raw_intraday_data3.ix[raw_intraday_data3.index>'11-Mar-2016 18:00']
raw_intraday_data3 = raw_intraday_data3.replace(0, np.nan)
raw_intraday_data3 = raw_intraday_data3.dropna(axis=0)
raw_intraday_data3['Date'] = raw_intraday_data3.index.date
raw_intraday_data3['Hour'] = raw_intraday_data3.index.time
raw_intraday_data3['Weekday'] = pd.DatetimeIndex(raw_intraday_data3['Date']).dayofweek
days = {0:'0 Mon',1:'1 Tue',2:'2 Weds',3:'3 Thurs',4:'4 Fri',5:'5 Sat',6:'6 Sun'}
raw_intraday_data3['Weekday'] = raw_intraday_data3['Weekday'].apply(lambda x: days[x])
hr_intraday2 = raw_intraday_data3
#Clean up the sleep data and aggregate by day and add a weekday to the table
raw_sleep_data2 = raw_sleep_data.ix[:,["In bed Finish","Minutes in bed"]]
raw_sleep_data2.index = pd.to_datetime(raw_sleep_data2["In bed Finish"])
raw_sleep_data2.index = raw_sleep_data2.index.date
raw_sleep_data2 = raw_sleep_data2.drop("In bed Finish", axis=1)
raw_sleep_data2 = raw_sleep_data2.groupby(raw_sleep_data2.index).agg('sum')
raw_sleep_data2['Hours in bed'] = raw_sleep_data2['Minutes in bed']/60
raw_sleep_data2 = raw_sleep_data2.drop("Minutes in bed", axis=1)
raw_sleep_data2['Weekday'] = pd.DatetimeIndex(raw_sleep_data2.index).dayofweek
days = {0:'0 Mon',1:'1 Tue',2:'2 Weds',3:'3 Thurs',4:'4 Fri',5:'5 Sat',6:'6 Sun'}
raw_sleep_data2['Weekday'] = raw_sleep_data2['Weekday'].apply(lambda x: days[x])
In [3]:
#Daily amount of sleep
raw_sleep_data2.plot.bar(color='g',figsize=(10,2))
Out[3]:
In [4]:
# Average sleep per weekday
raw_sleep_data2.groupby("Weekday").agg('mean').plot.bar(color='purple',figsize=(5,2))
Out[4]:
Delete all the zero columns, set table index and delete the useless columns and add Total Calories and Weekday columns to DataSet¶
In [5]:
data = raw_daily_data.ix[:, (raw_daily_data != 0).any(axis=0)]
data = data.set_index(pd.DatetimeIndex(data['Start']))
data = data.drop(['Start','Finish'],axis=1)
data['Total Calories (kcal)'] = data['Active Calories (kcal)'] + data['Resting Calories (kcal)']
data['Weekday'] = data.index.dayofweek
days = {0:'0 Mon',1:'1 Tue',2:'2 Weds',3:'3 Thurs',4:'4 Fri',5:'5 Sat',6:'6 Sun'}
data['Weekday'] = data['Weekday'].apply(lambda x: days[x])
In [6]:
#Get table stats - rows and columns
print ("ROWS OF DATA / DAILY: " + str(raw_daily_data.shape[0]) + " / INTRADAY: " + str(hr_intraday.shape[0]))
print ("")
data.tail(7)
Out[6]:
Clean up WEIGHT data, Fill previous values for the Weight-related data and remove starting NA rows (Weight, BMI, Body Fat %, Lean Body Mass)¶
In [7]:
data.ix[:,'Body Fat Percentage (%)'] *= 100 #Scale Body Fat Percentage (%) by 100
data.ix[:,'Weight (kg)'] = data.ix[:,'Weight (kg)'].replace(to_replace=0, method='ffill')
data.ix[:,'Body Fat Percentage (%)'] = data.ix[:,'Body Fat Percentage (%)'].replace(to_replace=0, method='ffill')
data.ix[:,'Body Mass Index (count)'] = data.ix[:,'Body Mass Index (count)'].replace(to_replace=0, method='ffill')
data.ix[:,'Lean Body Mass (kg)'] = data.ix[:,'Lean Body Mass (kg)'].replace(to_replace=0, method='ffill')
data_for_weight = data[np.isfinite(data['Weight (kg)'])]
data_for_weight = data_for_weight.ix[:,['Weight (kg)','Lean Body Mass (kg)','Body Fat Percentage (%)','Body Mass Index (count)']]
data_for_weight = data_for_weight.ix[data_for_weight.index>'2016-03-28']
print ("Rows remaining with Weight data: " + str(len(data_for_weight)))
Clean up STEPS, FLIGHTS CLIMBED, DISTANCE data, Setting all days with less than 550 steps or less than 0.400 km as NaN (haven't worn the tracker or battery died)¶
In [8]:
print ("Rows before clean up: " + str(len(data)))
data_for_steps = data
data_for_steps.ix[data_for_steps.ix[:,'Steps (count)']<550,'Steps (count)'] = np.nan
data_for_steps.ix[data_for_steps.ix[:,'Distance (km)']<0.400,'Distance (km)'] = np.nan
data_for_steps = data_for_steps[np.isfinite(data_for_steps['Steps (count)'])]
data_for_steps = data_for_steps[np.isfinite(data_for_steps['Distance (km)'])]
print ("Rows lost after clean up: " + " " + str(len(data)-len(data_for_steps.index)))
print ("Rows after clean up: " + " " + str(len(data_for_steps.index)))
data_for_steps.tail()
Out[8]:
Clean up CALORIES, WEEKDAY, HEART RATE (daily) data¶
In [9]:
data_for_cal = data
data_for_cal.ix[data_for_cal.ix[:,'Total Calories (kcal)']<0.1,['Total Calories (kcal)','Active Calories (kcal)','Resting Calories (kcal)']] = np.nan
data_for_cal = data[np.isfinite(data['Total Calories (kcal)'])]
data_for_cal = data_for_cal.ix[data_for_cal.index>'2016-03-11']
data_for_cal2 = data_for_cal.drop(['Body Fat Percentage (%)','Body Mass Index (count)','Lean Body Mass (kg)','Weight (kg)','Distance (km)','Flights Climbed (count)','Steps (count)','Weekday','Total Calories (kcal)','Heart Rate (count/min)'],axis=1)
data_for_weekday = data.drop(['Body Fat Percentage (%)','Body Mass Index (count)','Lean Body Mass (kg)','Weight (kg)'],axis=1)
data_for_hr = data[["Heart Rate (count/min)","Weekday"]].copy()
data_for_hr = data_for_hr.ix[data_for_hr["Heart Rate (count/min)"]>0]
In [10]:
plt.figure(1,figsize=(18,7))
plt.subplot(311)
plt.title('Frequency Charts')
plt.legend(handles=[mpatches.Patch(color='green', label='Steps')])
plt.hist(data_for_steps.ix[:,'Steps (count)'], bins=90, color='g')
plt.xlim(0,data_for_steps.ix[:,'Steps (count)'].max())
plt.subplot(312)
plt.legend(handles=[mpatches.Patch(color='orange', label='Kilometers')])
plt.hist(data_for_steps.ix[:,'Distance (km)'], bins=90, color='orange')
plt.xlim(0,data_for_steps.ix[:,'Distance (km)'].max())
plt.subplot(313)
plt.legend(handles=[mpatches.Patch(color='royalblue', label='Flights Climbed')])
plt.hist(data_for_steps.ix[:,'Flights Climbed (count)'], bins=60, color='royalblue')
plt.xlim(0,data_for_steps.ix[:,'Flights Climbed (count)'].max())
plt.figure(2,figsize=(16,3))
plt.subplot(121)
plt.legend(handles=[mpatches.Patch(color='red', label='Total Calories (kcal)')])
plt.hist(data_for_cal.ix[:,'Total Calories (kcal)'], bins=60, color='red')
plt.xlim(0,data_for_cal.ix[:,'Total Calories (kcal)'].max())
plt.subplot(122)
plt.legend(handles=[mpatches.Patch(color='salmon', label='Weight (kg)')])
plt.hist(data_for_weight.ix[:,'Weight (kg)'], bins=70, color='salmon')
plt.xlim(data_for_weight.ix[:,'Weight (kg)'].min(),data_for_weight.ix[:,'Weight (kg)'].max())
plt.tight_layout()
plt.show()
In [11]:
medianval = np.round(data_for_steps.ix[:,'Steps (count)'].median(),1)
avgval = np.round(data_for_steps.ix[:,'Steps (count)'].mean(),1)
maxval = np.round(data_for_steps.ix[:,'Steps (count)'].max(),1)
minor_ticks = np.arange(0, maxval+1, 1500, dtype=int)
minor_labels = minor_ticks
ax1 = data_for_steps.ix[:,'Steps (count)'].plot(color='000000',figsize=(11, 3),linewidth=1.0)
ax1.set_ylim(0,maxval)
ax1.set_ylabel('Steps')
ax1.set_yticks(minor_ticks)
ax1.set_yticklabels(minor_labels)
ax1.yaxis.tick_right()
ax1.set_xticks(data_for_steps.index, minor=True)
#Add the horizontal and vertical lines
ax1.axhline(y=medianval, linewidth=1, color='y')
ax1.axhline(y=avgval, linewidth=1, color='g')
ax1.axvline(x='2016-03-11', linewidth=1, color='r')
ax1.axvline(x='2016-11-25', linewidth=1, color='r')
ax1.grid(b=False)
#Rolling 20 Day MA
ma = data_for_steps.ix[:,'Steps (count)'].rolling(20).mean()
ax1.plot(ma)
pd.DataFrame(data_for_steps["Steps (count)"].describe()).transpose()
Out[11]:
In [12]:
medianval2 = np.round(data_for_steps.ix[:,'Flights Climbed (count)'].median(),1)
avgval2 = np.round(data_for_steps.ix[:,'Flights Climbed (count)'].mean(),1)
maxval2 = np.round(data_for_steps.ix[:,'Flights Climbed (count)'].max(),1)
minor_ticks_stairs = np.arange(0, maxval2+2, 3, dtype=int)
minor_labels_stairs = minor_ticks_stairs
ax2 = data_for_steps.ix[:,'Flights Climbed (count)'].plot(color='royalblue',figsize=(11, 3),linewidth=1.0)
ax2.set_ylim(-1,maxval2)
ax2.set_ylabel('Flights Stairs')
ax2.set_yticks(minor_ticks_stairs)
ax2.set_yticklabels(minor_labels_stairs)
ax2.yaxis.tick_right()
#Add the horizontal and vertical lines
ax2.axhline(y=medianval2, linewidth=1, color='y')
ax2.axhline(y=avgval2, linewidth=1, color='g')
ax2.axvline(x='2016-03-11', linewidth=1, color='r')
ax2.axvline(x='2016-11-25', linewidth=1, color='r')
ax2.grid(b=False)
#Rolling 20 Day MA
ma2 = data_for_steps.ix[:,'Flights Climbed (count)'].rolling(20).mean()
ax2.plot(ma2,linewidth=1.0)
pd.DataFrame(data_for_steps['Flights Climbed (count)'].describe()).transpose()
Out[12]:
In [13]:
medianval3 = np.round(data_for_steps.ix[:,'Distance (km)'].median(),1)
avgval3 = np.round(data_for_steps.ix[:,'Distance (km)'].mean(),1)
maxval3 = np.round(data_for_steps.ix[:,'Distance (km)'].max(),1)
minor_ticks_km = np.arange(0, maxval3+1, 3, dtype=int)
minor_labels_km = minor_ticks_km
ax6 = data_for_steps.ix[:,'Distance (km)'].plot(color='orange',figsize=(11, 3),linewidth=1.0)
ax6.set_ylim(-1,data_for_steps.ix[:,'Distance (km)'].max())
ax6.set_ylabel('Distance (km)')
ax6.set_yticks(minor_ticks_km)
ax6.set_yticklabels(minor_labels_km)
ax6.yaxis.tick_right()
#Add the horizontal and vertical lines
ax6.axhline(y=medianval3, linewidth=1, color='y')
ax6.axhline(y=avgval3, linewidth=1, color='g')
ax6.axvline(x='2016-03-11', linewidth=1, color='r')
ax6.axvline(x='2016-11-25', linewidth=1, color='r')
ax6.grid(b=False)
#Rolling 20 Day MA
ma2 = data_for_steps.ix[:,'Distance (km)'].rolling(20).mean()
ax6.plot(ma2,linewidth=1.0)
pd.DataFrame(data_for_steps['Distance (km)'].describe()).transpose()
Out[13]:
In [14]:
#Scatter chart Steps vs Stairs
plt.figure(1,figsize=(15,7))
x = data_for_steps.ix[:,'Steps (count)']
y = data_for_steps.ix[:,'Flights Climbed (count)']
plt.xlabel('Steps (count)')
plt.ylabel('Flights of Stairs')
plt.xlim(-5,data_for_steps.ix[:,'Steps (count)'].max()+200)
plt.tick_params(axis='y', which='both', labelleft='off', labelright='on')
plt.ylim(-2,data_for_steps.ix[:,'Flights Climbed (count)'].max()+2)
plt.grid(b=False)
plt.axhline(y=avgval2, linewidth=1, color='y')
plt.axvline(x=avgval, linewidth=1, color='r')
plt.scatter(x, y, alpha=0.7,c=data_for_steps.ix[:,'Distance (km)'], s=data_for_steps.ix[:,'Total Calories (kcal)']/10, cmap=cm.brg)
plt.show()
Most active day ever by number of climbed stairs¶
In [15]:
data_for_steps.ix[data_for_steps['Flights Climbed (count)']==data_for_steps['Flights Climbed (count)'].max()]
Out[15]:
Most active day ever by number of steps¶
In [16]:
data_for_steps.ix[data_for_steps['Steps (count)']==data_for_steps['Steps (count)'].max()]
Out[16]:
Most active day ever by distance done¶
In [17]:
data_for_steps.ix[data_for_steps['Distance (km)']==data_for_steps['Distance (km)'].max()]
Out[17]:
Weight, Lean Body Mass and Body Fat % Analysis¶
In [18]:
minor_ticks_weight = np.arange(data_for_weight['Lean Body Mass (kg)'].min()-1, data_for_weight['Weight (kg)'].max()+1, 1, dtype=int)
minor_labels_weight = minor_ticks_weight
ax9 = data_for_weight['Weight (kg)'].plot(secondary_y=True,figsize=(11, 3));
ax9 = data_for_weight['Lean Body Mass (kg)'].plot(secondary_y=True,figsize=(11, 3));
ax9.legend(loc=1, bbox_to_anchor=(0.5, 0.1), ncol=2)
ax9.grid(b=False)
ax9.yaxis.tick_right()
ax9.set_ylim(data_for_weight['Lean Body Mass (kg)'].min()-1,data_for_weight['Weight (kg)'].max()+1)
ax9.set_yticks(minor_ticks_weight)
ax9.set_yticklabels(minor_labels_weight)
data_for_weight.tail(1)
Out[18]:
In [19]:
#Averages, Medians, High, Low per Weekday
data_for_weekday.groupby('Weekday').agg(['mean','median','min','max','std']).transpose()
Out[19]:
Heart Rate Data Analysis¶
In [20]:
#Box Plot for Heart Rate Data
plt.figure(figsize=(32,7))
ax5 = sns.boxplot(x="Date", y="Heart Rate (count/min)", data=hr_intraday, whis=[0,100])
ax5.set_ylim(hr_intraday['Heart Rate (count/min)'].min()-5,hr_intraday['Heart Rate (count/min)'].max()+5)
ax5.set_xticklabels(hr_intraday['Date'].unique())
ax5.grid(b=False)
ax5.yaxis.tick_right()
ax5.axes.get_xaxis().set_visible(False)
labels = ax5.get_xticklabels()
plt.setp(labels, rotation=45, fontsize=8)
ax5.plot()
Out[20]:
In [21]:
#Cumulative Charts
#Cumulative steps done
#Cumulative km walked
#Cumulative dataset
cumulative = data[['Steps (count)','Distance (km)','Flights Climbed (count)']].copy()
cumulative = cumulative.cumsum(axis=0, skipna=True)
cumulative = cumulative.fillna(method='ffill')
cumulative['Steps (count)'].plot(figsize=(11,3))
cumulative['Distance (km)'].plot(secondary_y=True, style='g')
Out[21]:
In [22]:
#matrix charts histograms steps done per month
matrixdata = data_for_steps[['Steps (count)','Distance (km)','Flights Climbed (count)','Weekday']].copy()
matrixdata['Month']=pd.to_datetime(matrixdata.index).month
matrixdata['Year']=pd.to_datetime(matrixdata.index).year
g = sns.FacetGrid(matrixdata, row="Year", col="Month", size=4, margin_titles=True)
g.map(plt.hist, "Steps (count)", bins=12)
Out[22]:
In [23]:
g = sns.FacetGrid(matrixdata, row="Year", col="Month", size=4, margin_titles=True)
g.map(plt.hist, "Flights Climbed (count)", bins=12, color="purple")
Out[23]:
In [24]:
g = sns.FacetGrid(matrixdata, row="Year", col="Month", size=4, margin_titles=True)
g.map(sns.regplot, "Steps (count)", "Distance (km)", order=2)
Out[24]:
In [25]:
matrixdata
Out[25]: