From 67fd5093c5e2aebc357f677ee63d6fa66310267d Mon Sep 17 00:00:00 2001
From: Jason Dobry <jason.dobry@gmail.com>
Date: Fri, 18 Mar 2016 16:17:18 -0700
Subject: [PATCH] BigQuery samples.

---
 bigquery/README.md                       |  33 +++++
 bigquery/dataset_size.js                 | 159 +++++++++++++++++++++++
 bigquery/getting_started.js              |  79 +++++++++++
 bigquery/load_data_from_csv.js           | 134 +++++++++++++++++++
 bigquery/package.json                    |  18 +++
 bigquery/resources/data.csv              |   1 +
 bigquery/resources/data.json             |   1 +
 bigquery/resources/schema.json           |   1 +
 bigquery/resources/streamrows.json       |   7 +
 package.json                             |   6 +-
 pubsub/subscription.js                   |   1 -
 test/bigquery/dataset_size.test.js       |  30 +++++
 test/bigquery/getting_started.test.js    |  28 ++++
 test/bigquery/load_data_from_csv.test.js |  47 +++++++
 14 files changed, 542 insertions(+), 3 deletions(-)
 create mode 100644 bigquery/README.md
 create mode 100644 bigquery/dataset_size.js
 create mode 100644 bigquery/getting_started.js
 create mode 100644 bigquery/load_data_from_csv.js
 create mode 100644 bigquery/package.json
 create mode 100644 bigquery/resources/data.csv
 create mode 100644 bigquery/resources/data.json
 create mode 100644 bigquery/resources/schema.json
 create mode 100644 bigquery/resources/streamrows.json
 create mode 100644 test/bigquery/dataset_size.test.js
 create mode 100644 test/bigquery/getting_started.test.js
 create mode 100644 test/bigquery/load_data_from_csv.test.js
diff --git a/bigquery/README.md b/bigquery/README.md
new file mode 100644
index 00000000000..e263d062ac6
--- /dev/null
+++ b/bigquery/README.md
@@ -0,0 +1,33 @@
+## BigQuery Samples
+
+These samples require two environment variables to be set:
+
+- `GOOGLE_APPLICATION_CREDENTIALS` - Path to a service account file. You can
+download one from your Google project's "permissions" page.
+- `GCLOUD_PROJECT` - Id of your Google project.
+
+## Run the samples
+
+Install dependencies:
+
+    npm install
+
+### getting_started.js
+
+    npm run getting_started
+
+### dataset_size.js
+
+Usage: `npm run dataset_size -- <projectId> <datasetId>
+
+Example:
+
+    npm run dataset_size -- bigquery-public-data hacker_news
+
+### load_data_from_csv.js
+
+Usage: `npm run load_data_from_csv -- <pathToCsvFile> <datasetId> <tableName>
+
+Example:
+
+    npm run load_data_from_csv -- data.csv my-dataset my-table
diff --git a/bigquery/dataset_size.js b/bigquery/dataset_size.js
new file mode 100644
index 00000000000..96c8580acd6
--- /dev/null
+++ b/bigquery/dataset_size.js
@@ -0,0 +1,159 @@
+// Copyright 2016, Google, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict';
+
+var async = require('async');
+
+// [START auth]
+// You must set the GOOGLE_APPLICATION_CREDENTIALS and GCLOUD_PROJECT
+// environment variables to run this sample
+var projectId = process.env.GCLOUD_PROJECT;
+
+// Initialize gcloud
+var gcloud = require('gcloud')({
+  projectId: projectId
+});
+
+// Get a reference to the bigquery component
+var bigquery = gcloud.bigquery();
+// [END auth]
+
+// not going to use this bigquery instance
+bigquery = undefined;
+
+// [START list_tables]
+/**
+ * Retrieve all tables for the specified dataset.
+ *
+ * @param {Object} bigquery gcloud-node bigquery client.
+ * @param {string} datasetId Dataset of the tables to retrieve.
+ * @param {string} [pageToken] Page to retrieve.
+ * @param {Function} callback Callback function.
+ */
+function getAllTablesExample(bigquery, datasetId, pageToken, callback) {
+  if (typeof pageToken === 'function') {
+    callback = pageToken;
+    pageToken = undefined;
+  }
+  var dataset = bigquery.dataset(datasetId);
+  var options = {};
+  if (pageToken) {
+    options.pageToken = pageToken;
+  }
+
+  // Grab paginated tables
+  dataset.getTables(options, function (err, tables, nextQuery) {
+    // Quit on error
+    if (err) {
+      return callback(err);
+    }
+
+    // There is another page of tables
+    if (nextQuery) {
+      // Grab the remaining pages of tables recursively
+      return getAllTablesExample(
+        datasetId,
+        nextQuery.token,
+        function (err, _tables) {
+          if (err) {
+            return callback(err);
+          }
+          callback(null, tables.concat(_tables));
+        }
+      );
+    }
+    // Last page of tables
+    return callback(null, tables);
+  });
+}
+// [END list_tables]
+
+// [START get_size]
+/**
+ * Retrieve the size of the specified dataset.
+ *
+ * @param {string} projectId The project, .e.g. "bigquery-public-data"
+ * @param {string} datasetId The dataset, e.g. "hacker_news"
+ * @param {Function} callback Callback function.
+ */
+function getSizeExample(projectId, datasetId, callback) {
+  if (!projectId) {
+    return callback(new Error('projectId is required!'));
+  }
+  if (!datasetId) {
+    return callback(new Error('datasetId is require!'));
+  }
+
+  var gcloud = require('gcloud')({
+    projectId: projectId || process.env.GCLOUD_PROJECT
+  });
+  var bigquery = gcloud.bigquery();
+
+  // Fetch all tables in the dataset
+  getAllTablesExample(bigquery, datasetId, function (err, tables) {
+    return async.parallel(tables.map(function (table) {
+      return function (cb) {
+        // Fetch more detailed info for each table
+        table.get(function (err, tableInfo) {
+          if (err) {
+            return cb(err);
+          }
+          // Return numBytes converted to Megabytes
+          var numBytes = tableInfo.metadata.numBytes;
+          return cb(null, (parseInt(numBytes, 10) / 1000) / 1000);
+        });
+      };
+    }), function (err, sizes) {
+      if (err) {
+        return callback(err);
+      }
+      var sum = sizes.reduce(function (cur, prev) {
+        return cur + prev;
+      }, 0);
+      return callback(null, sum);
+    });
+  });
+}
+// [END get_size]
+
+// Run the examples
+exports.main = function (projectId, datasetId, cb) {
+  getSizeExample(projectId, datasetId, function (err, sum) {
+    if (err) {
+      return cb(err);
+    }
+    var size = 'MB';
+    if (sum > 1000) {
+      sum = sum / 1000;
+      size = 'GB';
+    }
+    if (sum > 1000) {
+      sum = sum / 1000;
+      size = 'TB';
+    }
+    cb(null, '' + sum.toPrecision(5) + ' ' + size);
+  });
+};
+
+if (module === require.main) {
+  var args = process.argv.slice(2);
+  if (args.length !== 2) {
+    throw new Error('Usage: node dataset_size.js <projectId> <datasetId>');
+  }
+  exports.main(
+    args[0],
+    args[1],
+    console.log
+  );
+}
diff --git a/bigquery/getting_started.js b/bigquery/getting_started.js
new file mode 100644
index 00000000000..45eddeca352
--- /dev/null
+++ b/bigquery/getting_started.js
@@ -0,0 +1,79 @@
+// Copyright 2016, Google, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// [START complete]
+'use strict';
+
+// [START auth]
+// You must set the GOOGLE_APPLICATION_CREDENTIALS and GCLOUD_PROJECT
+// environment variables to run this sample
+var projectId = process.env.GCLOUD_PROJECT;
+
+// Initialize gcloud
+var gcloud = require('gcloud')({
+  projectId: projectId
+});
+
+// Get a reference to the bigquery component
+var bigquery = gcloud.bigquery();
+// [END auth]
+
+// [START print]
+function printExample(rows) {
+  console.log('Query Results:');
+  rows.forEach(function (row) {
+    var str = '';
+    for (var key in row) {
+      if (str) {
+        str += '\t';
+      }
+      str += key + ': ' + row[key];
+    }
+    console.log(str);
+  });
+}
+// [END print]
+
+// [START query]
+/**
+ * Run an example query.
+ *
+ * @param {Function} callback Callback function.
+ */
+function queryExample(callback) {
+  var query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words\n' +
+    'FROM [publicdata:samples.shakespeare];';
+
+  bigquery.query(query, function(err, rows) {
+    if (err) {
+      return callback(err);
+    }
+
+    printExample(rows);
+    callback(null, rows);
+  });
+}
+// [END query]
+
+// [END complete]
+
+// Run the examples
+exports.main = function (cb) {
+  queryExample(cb);
+};
+
+if (module === require.main) {
+  exports.main(
+    console.log
+  );
+}
diff --git a/bigquery/load_data_from_csv.js b/bigquery/load_data_from_csv.js
new file mode 100644
index 00000000000..40ac777a968
--- /dev/null
+++ b/bigquery/load_data_from_csv.js
@@ -0,0 +1,134 @@
+// Copyright 2016, Google, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// [START complete]
+'use strict';
+
+var fs = require('fs');
+
+// You must set the GOOGLE_APPLICATION_CREDENTIALS and GCLOUD_PROJECT
+// environment variables to run this sample
+var projectId = process.env.GCLOUD_PROJECT;
+
+// Initialize gcloud
+var gcloud = require('gcloud')({
+  projectId: projectId
+});
+
+// Get a reference to the bigquery component
+var bigquery = gcloud.bigquery();
+
+/**
+ * Wait for the provided job to complete.
+ *
+ * @param {Object} job The job to watch.
+ * @param {number} timeout Maximum time to wait (milliseconds).
+ * @param {Function} Callback function.
+ */
+function pollJobUntilDone(job, timeout, timeWaited, callback) {
+  job.getMetadata(function (err, metadata) {
+    if (err) {
+      return callback(err);
+    }
+    if (timeWaited > timeout) {
+      return callback(new Error('Timed out waiting for job to complete'));
+    }
+    if (metadata.status && (metadata.status.state === 'RUNNING' ||
+      metadata.status.state === 'PENDING')) {
+      setTimeout(function () {
+        console.log('working...');
+        pollJobUntilDone(job, timeout, timeWaited + 5000, callback);
+      }, 5000);
+    } else {
+      callback(null, metadata);
+    }
+  });
+}
+
+/**
+ * Load a csv file into a BigQuery table.
+ *
+ * @param {string} pathToCsvFile Path to csv file to load.
+ * @param {string} datasetId The dataset.
+ * @param {string} tableName The table.
+ * @param {Function} callback Callback function.
+ */
+function loadDataFromCsvExample(pathToCsvFile, datasetId, tableName, callback) {
+
+  if (!pathToCsvFile || typeof pathToCsvFile !== 'string') {
+    return callback(new Error('pathToCsvFile is required!'));
+  }
+  if (!datasetId || typeof pathToCsvFile !== 'string') {
+    return callback(new Error('datasetId is require!'));
+  }
+  if (!tableName || typeof pathToCsvFile !== 'string') {
+    return callback(new Error('tableName is require!'));
+  }
+
+  var dataset = bigquery.dataset(datasetId);
+  var table = dataset.table(tableName);
+
+  var options = {
+    skipLeadingRows: 0
+  };
+
+  fs.createReadStream(pathToCsvFile)
+    .pipe(table.createWriteStream(options))
+    .on('complete', function (job) {
+      // Wait up to 20 seconds for job to complete
+      pollJobUntilDone(job, 60000, 0, function (err, metadata) {
+        if (err) {
+          return callback(err);
+        }
+        console.log('job completed', metadata);
+        callback(null, metadata);
+      });
+    });
+}
+// [END complete]
+
+exports.createTable = function (datasetId, tableName, callback) {
+  var dataset = bigquery.dataset(datasetId);
+  var pathToSchemaFile = __dirname + '/resources/schema.json';
+  fs.readFile(pathToSchemaFile, { encoding: 'utf8' }, function (err, file) {
+    if (err) {
+      return callback(err);
+    }
+    var schema = JSON.parse(file);
+    var columns = schema.map(function (column) {
+      return column.name + ':' + column.type;
+    });
+    dataset.createTable(tableName, { schema: columns.join(',') }, callback);
+  });
+};
+
+exports.deleteTable = function (datasetId, tableName, callback) {
+  var dataset = bigquery.dataset(datasetId);
+  var table = dataset.table(tableName);
+  table.delete(callback);
+};
+
+// Run the examples
+exports.main = function (pathToCsvFile, datasetId, tableName, cb) {
+  loadDataFromCsvExample(pathToCsvFile, datasetId, tableName, cb);
+};
+
+if (module === require.main) {
+  var args = process.argv.slice(2);
+  exports.main(
+    args[0],
+    args[1],
+    args[2],
+    console.log
+  );
+}
diff --git a/bigquery/package.json b/bigquery/package.json
new file mode 100644
index 00000000000..470c572e8a3
--- /dev/null
+++ b/bigquery/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "nodejs-docs-samples-bigquery",
+  "description": "Node.js samples for Google BigQuery.",
+  "version": "0.0.1",
+  "private": true,
+  "license": "Apache Version 2.0",
+  "author": "Google Inc.",
+  "engines": {
+    "node": ">=0.10.x"
+  },
+  "scripts": {
+    "dataset_size": "node dataset_size.js"
+  },
+  "dependencies": {
+    "async": "^1.5.2",
+    "gcloud": "^0.29.0"
+  }
+}
diff --git a/bigquery/resources/data.csv b/bigquery/resources/data.csv
new file mode 100644
index 00000000000..20e1a5d6cbf
--- /dev/null
+++ b/bigquery/resources/data.csv
@@ -0,0 +1 @@
+Gandalf, 2000, 140.0, 1
\ No newline at end of file
diff --git a/bigquery/resources/data.json b/bigquery/resources/data.json
new file mode 100644
index 00000000000..cecbbf15826
--- /dev/null
+++ b/bigquery/resources/data.json
@@ -0,0 +1 @@
+{"Name":"Gandalf","Age":2000,"Weight":140.0,"IsMagic":true}
\ No newline at end of file
diff --git a/bigquery/resources/schema.json b/bigquery/resources/schema.json
new file mode 100644
index 00000000000..68876fb6044
--- /dev/null
+++ b/bigquery/resources/schema.json
@@ -0,0 +1 @@
+[{"type":"STRING","name":"Name"},{"type":"INTEGER","name":"Age"},{"type":"FLOAT","name":"Weight"},{"type":"BOOLEAN","name":"IsMagic"}]
\ No newline at end of file
diff --git a/bigquery/resources/streamrows.json b/bigquery/resources/streamrows.json
new file mode 100644
index 00000000000..51b111272d6
--- /dev/null
+++ b/bigquery/resources/streamrows.json
@@ -0,0 +1,7 @@
+[
+  {"Name":"test","Age":0,"Weight":100.0,"IsMagic":false},
+  {"Name":"test","Age":1,"Weight":100.0,"IsMagic":false},
+  {"Name":"test","Age":2,"Weight":100.0,"IsMagic":false},
+  {"Name":"test","Age":3,"Weight":100.0,"IsMagic":false},
+  {"Name":"test","Age":0,"Weight":100.0,"IsMagic":false}
+]
\ No newline at end of file
diff --git a/package.json b/package.json
index d178f3bd254..f49f56d7689 100644
--- a/package.json
+++ b/package.json
@@ -26,10 +26,12 @@
   "scripts": {
     "jshint": "jshint --exclude-path=.jshintignore .",
     "deps_appengine": "ava --match='*: dependencies should install*'",
-    "ava": "npm run deps_appengine && ava --match='!*: dependencies should install*'",
+    "ava": "ava --match='!*: dependencies should install*'",
+    "ava:deps": "npm run deps_appengine && npm run ava",
     "cover": "npm run deps_appengine && nyc ava --match='!*: dependencies should install*'",
     "report": "nyc report --reporter=text-lcov | ./node_modules/.bin/coveralls",
     "report-html": "nyc report --reporter=html",
+    "deps_bigquery": "cd bigquery; npm i; cd ../",
     "deps_datastore": "cd datastore; npm i; cd ../",
     "deps_pubsub": "cd pubsub; npm i; cd ../",
     "deps_monitoring": "cd monitoring; npm i; cd ../",
@@ -39,7 +41,7 @@
     "deps_functions": "cd functions/uuid; npm i; cd ../..",
     "deps_sendgrid": "cd computeengine/sendgrid; npm i; cd ../..",
     "pretest_geddy": "cd appengine/geddy; npm i geddy; GEDDY_SECRET=config/secrets.json; [[ -f $GEDDY_SECRET ]] || echo '{}' > $GEDDY_SECRET && node node_modules/.bin/geddy gen secret; cd ../..;",
-    "pretest": "npm run deps_datastore; npm run deps_monitoring; npm run deps_storage; npm run deps_pubsub; npm run deps_prediction; npm run deps_logging; npm run deps_functions; npm run deps_sendgrid; npm run pretest_geddy",
+    "pretest": "npm run deps_bigquery; npm run deps_datastore; npm run deps_monitoring; npm run deps_storage; npm run deps_pubsub; npm run deps_prediction; npm run deps_logging; npm run deps_functions; npm run deps_sendgrid; npm run pretest_geddy",
     "test": "npm run jshint && npm run cover"
   },
   "ava": {
diff --git a/pubsub/subscription.js b/pubsub/subscription.js
index e63654859e2..f0f31e0c322 100644
--- a/pubsub/subscription.js
+++ b/pubsub/subscription.js
@@ -20,7 +20,6 @@ var async = require('async');
 // environment variables to run this sample
 var projectId = process.env.GCLOUD_PROJECT;
 
-// [START require]
 // Initialize gcloud
 var gcloud = require('gcloud')({
   projectId: projectId
diff --git a/test/bigquery/dataset_size.test.js b/test/bigquery/dataset_size.test.js
new file mode 100644
index 00000000000..9e2c3ef5e18
--- /dev/null
+++ b/test/bigquery/dataset_size.test.js
@@ -0,0 +1,30 @@
+// Copyright 2016, Google, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict';
+
+var test = require('ava');
+var datasetSizeExample = require('../../bigquery/dataset_size');
+
+test.cb('should return the size of a dataset', function (t) {
+  datasetSizeExample.main(
+    'bigquery-public-data',
+    'hacker_news',
+    function (err, size) {
+      t.ifError(err);
+      t.is(typeof size, 'string');
+      t.ok(size.indexOf(' GB') === size.length - 3);
+      t.end();
+    }
+  );
+});
diff --git a/test/bigquery/getting_started.test.js b/test/bigquery/getting_started.test.js
new file mode 100644
index 00000000000..48387b2c07a
--- /dev/null
+++ b/test/bigquery/getting_started.test.js
@@ -0,0 +1,28 @@
+// Copyright 2016, Google, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict';
+
+var test = require('ava');
+var gettingStartedExample = require('../../bigquery/getting_started');
+
+test.cb('should run a query', function (t) {
+  gettingStartedExample.main(
+    function (err, rows) {
+      t.ifError(err);
+      t.ok(Array.isArray(rows));
+      t.is(rows.length, 10);
+      t.end();
+    }
+  );
+});
diff --git a/test/bigquery/load_data_from_csv.test.js b/test/bigquery/load_data_from_csv.test.js
new file mode 100644
index 00000000000..c0a755f9b99
--- /dev/null
+++ b/test/bigquery/load_data_from_csv.test.js
@@ -0,0 +1,47 @@
+// Copyright 2016, Google, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict';
+
+var async = require('async');
+var test = require('ava');
+var loadDataFromCsvExample = require('../../bigquery/load_data_from_csv');
+
+test.cb('should load data from a csv file', function (t) {
+  var pathToCsvFile = __dirname + '/../../bigquery/resources/data.csv';
+  var datasetId = 'nodejs_docs_samples';
+  var tableName = 'test_' + new Date().getTime();
+
+  async.series([
+    function (cb) {
+      loadDataFromCsvExample.createTable(datasetId, tableName, cb);
+    },
+    function (cb) {
+      loadDataFromCsvExample.main(pathToCsvFile, datasetId, tableName, cb);
+    },
+    function (cb) {
+      loadDataFromCsvExample.deleteTable(datasetId, tableName, cb);
+    }
+  ], function (err, results) {
+    if (err) {
+      loadDataFromCsvExample.deleteTable(datasetId, tableName, function () {
+        t.end(err);
+      });
+    } else {
+      t.ifError(err);
+      // metadata
+      t.is(results[1].status.state, 'DONE');
+      t.end();
+    }
+  });
+});