From adafdc83773bc4dbd0b14d0acfb1237e9cb7ec4d Mon Sep 17 00:00:00 2001 From: Niki Chong Date: Wed, 23 Oct 2019 17:10:29 +0100 Subject: [PATCH] Added fitbit compiler script --- fitbit_data_complier/README.md | 14 + fitbit_data_complier/cleaning.ipynb | 1468 +++++++++++++++++++++++++++ 2 files changed, 1482 insertions(+) create mode 100644 fitbit_data_complier/README.md create mode 100644 fitbit_data_complier/cleaning.ipynb diff --git a/fitbit_data_complier/README.md b/fitbit_data_complier/README.md new file mode 100644 index 0000000..604862a --- /dev/null +++ b/fitbit_data_complier/README.md @@ -0,0 +1,14 @@ +# Fitbit Data Compiler + +## About +This script compiles all the personal files that has been collected by your fitbit. To get your own data, follow this link [here](https://help.fitbit.com/articles/en_US/Help_article/1133). + +The script is very basic, and just provides the avenue for people with fitbits to compile all the files into a single dataframe. + +## Why is this useful? +This is useful for those who own a fitbit, or have a fitbit API and would like to perform analysis and play around with their own data. + +## What do I need? +In order to run this script, you would need to download [Anaconda](https://www.anaconda.com/) and run Jupyter notebook on it. + +Alternatively, you could also upload the script along with your fitbit data and run it on Google Colab - completely on the cloud. \ No newline at end of file diff --git a/fitbit_data_complier/cleaning.ipynb b/fitbit_data_complier/cleaning.ipynb new file mode 100644 index 0000000..831ef75 --- /dev/null +++ b/fitbit_data_complier/cleaning.ipynb @@ -0,0 +1,1468 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "7FhZpGJqZBhr" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "import matplotlib.mlab as mlab" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SqIFi2EL2mco" + }, + "source": [ + "# In directory" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "id": "-XCICwcG6F7-", + "outputId": "c2e456b1-59a8-494c-d28d-722ba3a411c5" + }, + "outputs": [], + "source": [ + "#os.listdir('json')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "PBC4v-GG2rAk" + }, + "source": [ + "# Heart data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "QD_DpYaUXxTU" + }, + "outputs": [], + "source": [ + "inDirHeart = [i for i in os.listdir('json') if i.startswith('heart_rate')]\n", + "\n", + "dfHeart = pd.DataFrame()\n", + "\n", + "for i in inDirHeart: \n", + " data = pd.read_json('./json/' + i)\n", + " dfHeart = dfHeart.append(data, ignore_index=True)\n", + "\n", + "bpm = []\n", + "days = []\n", + "for n in range(len(dfHeart.index)):\n", + " val = dfHeart.value[n]['bpm']\n", + " bpm.append(val)\n", + " \n", + " day = dfHeart.dateTime[n].dayofweek\n", + " days.append(day)\n", + "\n", + "dfHeart['bpm'] = bpm\n", + "dfHeart['day'] = days\n", + "\n", + "date = []\n", + "for i in range(len(dfHeart.index)):\n", + " val = dfHeart.loc[i].dateTime.date()\n", + " date.append(val)\n", + "\n", + "dfHeart['date'] = date\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "id": "XjLpMS5lWM_V", + "outputId": "8e399812-416a-400a-e039-57c5d795078e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateTimevaluebpmdaydate
2985672019-08-20 00:00:09{'bpm': 67, 'confidence': 2}6712019-08-20
2985682019-08-20 00:00:24{'bpm': 67, 'confidence': 2}6712019-08-20
2985692019-08-20 00:00:29{'bpm': 68, 'confidence': 2}6812019-08-20
2985702019-08-20 00:00:34{'bpm': 69, 'confidence': 2}6912019-08-20
2985712019-08-20 00:00:49{'bpm': 69, 'confidence': 2}6912019-08-20
\n", + "
" + ], + "text/plain": [ + " dateTime value bpm day date\n", + "298567 2019-08-20 00:00:09 {'bpm': 67, 'confidence': 2} 67 1 2019-08-20\n", + "298568 2019-08-20 00:00:24 {'bpm': 67, 'confidence': 2} 67 1 2019-08-20\n", + "298569 2019-08-20 00:00:29 {'bpm': 68, 'confidence': 2} 68 1 2019-08-20\n", + "298570 2019-08-20 00:00:34 {'bpm': 69, 'confidence': 2} 69 1 2019-08-20\n", + "298571 2019-08-20 00:00:49 {'bpm': 69, 'confidence': 2} 69 1 2019-08-20" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfHeart.head()\n", + "\n", + "dfHeart.groupby('date').agg('mean')\n", + "\n", + "dfHeart.date = pd.to_datetime(dfHeart.date)\n", + "\n", + "dfHeart.loc[dfHeart.date == '2019-08-20'].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.time(23, 0, 44)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfHeart.loc[5].dateTime.time()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 366 + }, + "colab_type": "code", + "id": "x8Zqir2VUIDH", + "outputId": "40f9ddf9-b489-4f5c-9b68-0d1ed632e561" + }, + "outputs": [], + "source": [ + "dfHeart['hour'] = dfHeart.dateTime.dt.hour" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "gibkD9yc2igO" + }, + "source": [ + "# Sleep data" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "colab_type": "code", + "id": "FF1p_FuZO1xO", + "outputId": "fbd97044-2d4c-444c-c3f1-1fa62b604efd" + }, + "outputs": [], + "source": [ + "inDirSleep = [i for i in os.listdir('json') if i.startswith('sleep')]\n", + "\n", + "dfSleep = pd.DataFrame()\n", + "\n", + "for i in inDirSleep: \n", + " data = pd.read_json('./json/' + i)\n", + " dfSleep = dfSleep.append(data, ignore_index=True)\n", + "\n", + "dfSleep['date'] = pd.to_datetime(dfSleep.dateOfSleep)\n", + "\n", + "daysSleep = []\n", + "\n", + "for n in range(len(dfSleep.index)):\n", + " \n", + " day = dfSleep.date[n].dayofweek\n", + " daysSleep.append(day)\n", + " \n", + "dfSleep['day2'] = daysSleep\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "BO6W_q2aqFtg" + }, + "source": [ + "Why startTime instead of minutesAsleep? Erratic sleep patterns.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 33 + }, + "colab_type": "code", + "id": "ziKND6_Gow10", + "outputId": "02900e10-e9b4-4bd7-b967-1cbaeab973ea" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "66" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfSleep.startTime = pd.to_datetime(dfSleep.startTime)\n", + "\n", + "nap = []\n", + "for n in range(len(dfSleep.index)):\n", + " if dfSleep.startTime[n].hour > 7 and dfSleep.startTime[n].hour < 21 and dfSleep.minutesAsleep[n] <= 300 :\n", + " val = 1\n", + " else:\n", + " val = 0\n", + " nap.append(val)\n", + " \n", + "dfSleep['nap'] = nap\n", + "nap.count(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "hBvr0ZaGm2ab" + }, + "outputs": [], + "source": [ + "lengthOfNap = []\n", + "for t in range(len(dfSleep.index)):\n", + " if dfSleep.nap[t] == 1:\n", + " length = dfSleep.minutesAsleep[t]\n", + " else:\n", + " length = 0\n", + " lengthOfNap.append(length)\n", + " \n", + "dfSleep['lengthOfNap'] = lengthOfNap\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "v-EugeC2i__J" + }, + "outputs": [], + "source": [ + "dfSleep = dfSleep.drop_duplicates(['logId'], keep='last')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateOfSleepdurationefficiencyendTimeinfoCodelevelslogIdminutesAfterWakeupminutesAsleepminutesAwakeminutesToFallAsleepstartTimetimeInBedtypedateday2naplengthOfNap
02019-08-2533660000932019-08-25T12:41:00.0001{'summary': {'restless': {'count': 9, 'minutes...2360634116945164102019-08-25 03:19:30561classic2019-08-25600
12019-08-246540000972019-08-24T20:47:30.0002{'summary': {'restless': {'count': 2, 'minutes...236063411680106302019-08-24 18:58:30109classic2019-08-2451106
22019-08-2430480000932019-08-24T09:35:30.0000{'summary': {'deep': {'count': 5, 'minutes': 1...2360634116614367202019-08-24 01:07:30508stages2019-08-24500
32019-08-235040000922019-08-23T18:59:30.0002{'summary': {'restless': {'count': 3, 'minutes...23606341165077702019-08-23 17:35:0084classic2019-08-234177
42019-08-2318180000922019-08-23T07:38:30.0000{'summary': {'deep': {'count': 4, 'minutes': 4...2357918539422584502019-08-23 02:35:00303stages2019-08-23400
\n", + "
" + ], + "text/plain": [ + " dateOfSleep duration efficiency endTime infoCode \\\n", + "0 2019-08-25 33660000 93 2019-08-25T12:41:00.000 1 \n", + "1 2019-08-24 6540000 97 2019-08-24T20:47:30.000 2 \n", + "2 2019-08-24 30480000 93 2019-08-24T09:35:30.000 0 \n", + "3 2019-08-23 5040000 92 2019-08-23T18:59:30.000 2 \n", + "4 2019-08-23 18180000 92 2019-08-23T07:38:30.000 0 \n", + "\n", + " levels logId \\\n", + "0 {'summary': {'restless': {'count': 9, 'minutes... 23606341169 \n", + "1 {'summary': {'restless': {'count': 2, 'minutes... 23606341168 \n", + "2 {'summary': {'deep': {'count': 5, 'minutes': 1... 23606341166 \n", + "3 {'summary': {'restless': {'count': 3, 'minutes... 23606341165 \n", + "4 {'summary': {'deep': {'count': 4, 'minutes': 4... 23579185394 \n", + "\n", + " minutesAfterWakeup minutesAsleep minutesAwake minutesToFallAsleep \\\n", + "0 4 516 41 0 \n", + "1 0 106 3 0 \n", + "2 1 436 72 0 \n", + "3 0 77 7 0 \n", + "4 2 258 45 0 \n", + "\n", + " startTime timeInBed type date day2 nap lengthOfNap \n", + "0 2019-08-25 03:19:30 561 classic 2019-08-25 6 0 0 \n", + "1 2019-08-24 18:58:30 109 classic 2019-08-24 5 1 106 \n", + "2 2019-08-24 01:07:30 508 stages 2019-08-24 5 0 0 \n", + "3 2019-08-23 17:35:00 84 classic 2019-08-23 4 1 77 \n", + "4 2019-08-23 02:35:00 303 stages 2019-08-23 4 0 0 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfSleep.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "uqNo3B6u5xxK" + }, + "source": [ + "# Steps data" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66 + }, + "colab_type": "code", + "id": "62YEuabrml_q", + "outputId": "f8339cef-3ffb-4076-f240-65fdcb27bde2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['steps-2019-08-20.json', 'steps-2019-06-21.json', 'steps-2019-07-21.json']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inDirSteps = [i for i in os.listdir('json') if i.startswith('steps')]\n", + "inDirSteps" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ZpalR5AR4xSs" + }, + "outputs": [], + "source": [ + "dfSteps = pd.DataFrame()\n", + "\n", + "for i in inDirSteps: \n", + " data = pd.read_json('./json/' + i)\n", + " dfSteps = dfSteps.append(data, ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "J-yK8gLW49xR" + }, + "outputs": [], + "source": [ + "date = []\n", + "for i in range(len(dfSteps.index)):\n", + " val = dfSteps.loc[i].dateTime.date()\n", + " date.append(val)\n", + "\n", + "dfSteps['date'] = date\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "OYpSeyRc5Cdn" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value
date
2019-06-22414
2019-06-233378
2019-06-2416385
2019-06-2520452
2019-06-264299
\n", + "
" + ], + "text/plain": [ + " value\n", + "date \n", + "2019-06-22 414\n", + "2019-06-23 3378\n", + "2019-06-24 16385\n", + "2019-06-25 20452\n", + "2019-06-26 4299" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfSteps.groupby('date').agg('sum').head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 147 + }, + "colab_type": "code", + "id": "W443Idn55lLA", + "outputId": "e23d2f09-6b6c-4f58-a467-c7303e4a2f02" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 35328 entries, 0 to 35327\n", + "Data columns (total 3 columns):\n", + "dateTime 35328 non-null datetime64[ns]\n", + "value 35328 non-null int64\n", + "date 35328 non-null object\n", + "dtypes: datetime64[ns](1), int64(1), object(1)\n", + "memory usage: 828.1+ KB\n" + ] + } + ], + "source": [ + "dfSteps.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UIg6k6t5yTSN" + }, + "source": [ + "# Calories data" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66 + }, + "colab_type": "code", + "id": "01TxSlJ9yNcP", + "outputId": "fcddc59e-0ef6-4a72-e300-b3f99149a45d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['calories-2019-08-20.json',\n", + " 'calories-2019-06-21.json',\n", + " 'calories-2019-07-21.json']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inDirCalories = [i for i in os.listdir('json') if i.startswith('calories')]\n", + "inDirCalories" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "K1UzM2RTybHm", + "outputId": "588d9dc9-a2b2-4a8c-8f64-b45f3bd5e6c5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateTimevaluedate
02019-08-20 00:00:000.952019-08-20
12019-08-20 00:01:000.952019-08-20
22019-08-20 00:02:000.952019-08-20
32019-08-20 00:03:000.952019-08-20
42019-08-20 00:04:000.952019-08-20
\n", + "
" + ], + "text/plain": [ + " dateTime value date\n", + "0 2019-08-20 00:00:00 0.95 2019-08-20\n", + "1 2019-08-20 00:01:00 0.95 2019-08-20\n", + "2 2019-08-20 00:02:00 0.95 2019-08-20\n", + "3 2019-08-20 00:03:00 0.95 2019-08-20\n", + "4 2019-08-20 00:04:00 0.95 2019-08-20" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfCalories = pd.DataFrame()\n", + "\n", + "for i in inDirCalories: \n", + " data = pd.read_json('./json/' + i)\n", + " dfCalories = dfCalories.append(data, ignore_index=True)\n", + "\n", + "date = []\n", + "for i in range(len(dfCalories.index)):\n", + " val = dfCalories.loc[i].dateTime.date()\n", + " date.append(val)\n", + "\n", + "dfCalories['date'] = date\n", + "\n", + " \n", + "dfCalories.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "innRsMyG6DVn" + }, + "source": [ + "# Active minutes" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66 + }, + "colab_type": "code", + "id": "AEQgAkV36G8v", + "outputId": "cd32176c-88db-4947-e7cf-edf8b6064347" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['very_active_minutes-2019-06-21.json',\n", + " 'very_active_minutes-2019-07-21.json',\n", + " 'very_active_minutes-2019-08-20.json']" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inDirActive = [i for i in os.listdir('json') if i.startswith('very')]\n", + "inDirActive" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "jTM2Grp86SI0", + "outputId": "94098a9e-20ef-40d7-8060-f395782215be" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateTimevalue
02019-06-210
12019-06-220
22019-06-2320
32019-06-2466
42019-06-2586
\n", + "
" + ], + "text/plain": [ + " dateTime value\n", + "0 2019-06-21 0\n", + "1 2019-06-22 0\n", + "2 2019-06-23 20\n", + "3 2019-06-24 66\n", + "4 2019-06-25 86" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfActive = pd.DataFrame()\n", + "\n", + "for i in inDirActive: \n", + " data = pd.read_json('./json/' + i)\n", + " dfActive = dfActive.append(data, ignore_index=True)\n", + "\n", + "dfActive.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "eC5luQ8O65h0" + }, + "source": [ + "# Moderately active minutes" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66 + }, + "colab_type": "code", + "id": "I9DNPwQR69y7", + "outputId": "7a33c7d9-86a5-4ba5-e0bc-a0b8aa5f4a05" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['moderately_active_minutes-2019-07-21.json',\n", + " 'moderately_active_minutes-2019-06-21.json',\n", + " 'moderately_active_minutes-2019-08-20.json']" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inDirModerate = [i for i in os.listdir('json') if i.startswith('moderately')]\n", + "inDirModerate" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "83Hd97dA7KNH", + "outputId": "a3b1162d-7537-4ff3-94c5-bdcb8bbe7dd1" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateTimevalue
02019-07-210
12019-07-220
22019-07-230
32019-07-2430
42019-07-250
\n", + "
" + ], + "text/plain": [ + " dateTime value\n", + "0 2019-07-21 0\n", + "1 2019-07-22 0\n", + "2 2019-07-23 0\n", + "3 2019-07-24 30\n", + "4 2019-07-25 0" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfModerate = pd.DataFrame()\n", + "\n", + "for i in inDirModerate: \n", + " data = pd.read_json('./json/' + i)\n", + " dfModerate = dfModerate.append(data, ignore_index=True)\n", + "\n", + "dfModerate.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "5S-3vUgZ7Sie" + }, + "source": [ + "# Sedentary minutes" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66 + }, + "colab_type": "code", + "id": "HDYJqy8_7W_m", + "outputId": "d59306ba-355a-47b8-d83e-a03884918996" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['sedentary_minutes-2019-07-21.json',\n", + " 'sedentary_minutes-2019-06-21.json',\n", + " 'sedentary_minutes-2019-08-20.json']" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inDirSedentary = [i for i in os.listdir('json') if i.startswith('sedentary')]\n", + "inDirSedentary" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "cjq_k7H17cXQ", + "outputId": "791fa687-e10a-49cd-8194-fd6c666410de" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateTimevalue
02019-07-21698
12019-07-22679
22019-07-23871
32019-07-24980
42019-07-25678
\n", + "
" + ], + "text/plain": [ + " dateTime value\n", + "0 2019-07-21 698\n", + "1 2019-07-22 679\n", + "2 2019-07-23 871\n", + "3 2019-07-24 980\n", + "4 2019-07-25 678" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfSedentary = pd.DataFrame()\n", + "\n", + "for i in inDirSedentary: \n", + " data = pd.read_json('./json/' + i)\n", + " dfSedentary = dfSedentary.append(data, ignore_index=True)\n", + "\n", + "dfSedentary.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "sOylZ2Kq595d" + }, + "source": [ + "# Master Dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "of23rtIz6AYG" + }, + "outputs": [], + "source": [ + "df = pd.DataFrame()\n", + "\n", + "df['averageBpm'] = dfHeart.groupby('date').agg('mean').bpm\n", + "df['stepsTaken'] = dfSteps.groupby('date').agg('sum').value\n", + "df['caloriesBurnt'] = dfCalories.groupby('date').agg('sum').value\n", + "df['nap'] = dfSleep.groupby('date').agg('sum').nap\n", + "df['lengthOfNap'] = dfSleep.groupby('date').agg('sum').lengthOfNap\n", + "df['minutesAsleep'] = dfSleep.groupby('date').agg('sum').minutesAsleep\n", + "df['nightSleep'] = df.minutesAsleep - df.lengthOfNap\n", + "df['activeMinutes'] = dfActive.groupby('dateTime').agg('sum').value\n", + "df['moderatelyActiveMinutes'] = dfModerate.groupby('dateTime').agg('sum').value\n", + "#df['sedentaryMinutes'] = dfSedentary.groupby('dateTime').agg('sum').value\n", + "#df['date'] = df.index\n", + "\n", + "df.drop(df.head(1).index, inplace=True)\n", + "df.drop(df.tail(1).index, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 220 + }, + "colab_type": "code", + "id": "04uZziWlkXYa", + "outputId": "fa5aa488-f554-4070-a375-62781d21d65b" + }, + "outputs": [], + "source": [ + "listDays = []\n", + "for n in range(len(df.index)):\n", + " \n", + " day = df.index[n].dayofweek\n", + " listDays.append(day)\n", + "\n", + "df['day'] = listDays" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "didNap = []\n", + "noNap = []\n", + "for t in range(len(df.index)):\n", + " if df.nap[t] > 0:\n", + " didNap.append(1)\n", + " noNap.append(0)\n", + " else:\n", + " didNap.append(0)\n", + " noNap.append(1)\n", + "\n", + "df['didNap'] = didNap\n", + "df['noNap'] = noNap" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "DatetimeIndex: 64 entries, 2019-06-23 to 2019-08-25\n", + "Data columns (total 12 columns):\n", + "averageBpm 64 non-null float64\n", + "stepsTaken 64 non-null int64\n", + "caloriesBurnt 64 non-null float64\n", + "nap 64 non-null float64\n", + "lengthOfNap 64 non-null float64\n", + "minutesAsleep 64 non-null float64\n", + "nightSleep 64 non-null float64\n", + "activeMinutes 64 non-null int64\n", + "moderatelyActiveMinutes 64 non-null int64\n", + "day 64 non-null int64\n", + "didNap 64 non-null int64\n", + "noNap 64 non-null int64\n", + "dtypes: float64(6), int64(6)\n", + "memory usage: 9.0 KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "02-_jIm3sWYN" + }, + "outputs": [], + "source": [ + "df_csv = df.to_csv('master_df.csv', header=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "fitbit.ipynb", + "provenance": [], + "version": "0.3.2" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}