From 67fd5093c5e2aebc357f677ee63d6fa66310267d Mon Sep 17 00:00:00 2001 From: Jason Dobry Date: Fri, 18 Mar 2016 16:17:18 -0700 Subject: [PATCH] BigQuery samples. --- bigquery/README.md | 33 +++++ bigquery/dataset_size.js | 159 +++++++++++++++++++++++ bigquery/getting_started.js | 79 +++++++++++ bigquery/load_data_from_csv.js | 134 +++++++++++++++++++ bigquery/package.json | 18 +++ bigquery/resources/data.csv | 1 + bigquery/resources/data.json | 1 + bigquery/resources/schema.json | 1 + bigquery/resources/streamrows.json | 7 + package.json | 6 +- pubsub/subscription.js | 1 - test/bigquery/dataset_size.test.js | 30 +++++ test/bigquery/getting_started.test.js | 28 ++++ test/bigquery/load_data_from_csv.test.js | 47 +++++++ 14 files changed, 542 insertions(+), 3 deletions(-) create mode 100644 bigquery/README.md create mode 100644 bigquery/dataset_size.js create mode 100644 bigquery/getting_started.js create mode 100644 bigquery/load_data_from_csv.js create mode 100644 bigquery/package.json create mode 100644 bigquery/resources/data.csv create mode 100644 bigquery/resources/data.json create mode 100644 bigquery/resources/schema.json create mode 100644 bigquery/resources/streamrows.json create mode 100644 test/bigquery/dataset_size.test.js create mode 100644 test/bigquery/getting_started.test.js create mode 100644 test/bigquery/load_data_from_csv.test.js diff --git a/bigquery/README.md b/bigquery/README.md new file mode 100644 index 00000000000..e263d062ac6 --- /dev/null +++ b/bigquery/README.md @@ -0,0 +1,33 @@ +## BigQuery Samples + +These samples require two environment variables to be set: + +- `GOOGLE_APPLICATION_CREDENTIALS` - Path to a service account file. You can +download one from your Google project's "permissions" page. +- `GCLOUD_PROJECT` - Id of your Google project. + +## Run the samples + +Install dependencies: + + npm install + +### getting_started.js + + npm run getting_started + +### dataset_size.js + +Usage: `npm run dataset_size -- + +Example: + + npm run dataset_size -- bigquery-public-data hacker_news + +### load_data_from_csv.js + +Usage: `npm run load_data_from_csv -- + +Example: + + npm run load_data_from_csv -- data.csv my-dataset my-table diff --git a/bigquery/dataset_size.js b/bigquery/dataset_size.js new file mode 100644 index 00000000000..96c8580acd6 --- /dev/null +++ b/bigquery/dataset_size.js @@ -0,0 +1,159 @@ +// Copyright 2016, Google, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +var async = require('async'); + +// [START auth] +// You must set the GOOGLE_APPLICATION_CREDENTIALS and GCLOUD_PROJECT +// environment variables to run this sample +var projectId = process.env.GCLOUD_PROJECT; + +// Initialize gcloud +var gcloud = require('gcloud')({ + projectId: projectId +}); + +// Get a reference to the bigquery component +var bigquery = gcloud.bigquery(); +// [END auth] + +// not going to use this bigquery instance +bigquery = undefined; + +// [START list_tables] +/** + * Retrieve all tables for the specified dataset. + * + * @param {Object} bigquery gcloud-node bigquery client. + * @param {string} datasetId Dataset of the tables to retrieve. + * @param {string} [pageToken] Page to retrieve. + * @param {Function} callback Callback function. + */ +function getAllTablesExample(bigquery, datasetId, pageToken, callback) { + if (typeof pageToken === 'function') { + callback = pageToken; + pageToken = undefined; + } + var dataset = bigquery.dataset(datasetId); + var options = {}; + if (pageToken) { + options.pageToken = pageToken; + } + + // Grab paginated tables + dataset.getTables(options, function (err, tables, nextQuery) { + // Quit on error + if (err) { + return callback(err); + } + + // There is another page of tables + if (nextQuery) { + // Grab the remaining pages of tables recursively + return getAllTablesExample( + datasetId, + nextQuery.token, + function (err, _tables) { + if (err) { + return callback(err); + } + callback(null, tables.concat(_tables)); + } + ); + } + // Last page of tables + return callback(null, tables); + }); +} +// [END list_tables] + +// [START get_size] +/** + * Retrieve the size of the specified dataset. + * + * @param {string} projectId The project, .e.g. "bigquery-public-data" + * @param {string} datasetId The dataset, e.g. "hacker_news" + * @param {Function} callback Callback function. + */ +function getSizeExample(projectId, datasetId, callback) { + if (!projectId) { + return callback(new Error('projectId is required!')); + } + if (!datasetId) { + return callback(new Error('datasetId is require!')); + } + + var gcloud = require('gcloud')({ + projectId: projectId || process.env.GCLOUD_PROJECT + }); + var bigquery = gcloud.bigquery(); + + // Fetch all tables in the dataset + getAllTablesExample(bigquery, datasetId, function (err, tables) { + return async.parallel(tables.map(function (table) { + return function (cb) { + // Fetch more detailed info for each table + table.get(function (err, tableInfo) { + if (err) { + return cb(err); + } + // Return numBytes converted to Megabytes + var numBytes = tableInfo.metadata.numBytes; + return cb(null, (parseInt(numBytes, 10) / 1000) / 1000); + }); + }; + }), function (err, sizes) { + if (err) { + return callback(err); + } + var sum = sizes.reduce(function (cur, prev) { + return cur + prev; + }, 0); + return callback(null, sum); + }); + }); +} +// [END get_size] + +// Run the examples +exports.main = function (projectId, datasetId, cb) { + getSizeExample(projectId, datasetId, function (err, sum) { + if (err) { + return cb(err); + } + var size = 'MB'; + if (sum > 1000) { + sum = sum / 1000; + size = 'GB'; + } + if (sum > 1000) { + sum = sum / 1000; + size = 'TB'; + } + cb(null, '' + sum.toPrecision(5) + ' ' + size); + }); +}; + +if (module === require.main) { + var args = process.argv.slice(2); + if (args.length !== 2) { + throw new Error('Usage: node dataset_size.js '); + } + exports.main( + args[0], + args[1], + console.log + ); +} diff --git a/bigquery/getting_started.js b/bigquery/getting_started.js new file mode 100644 index 00000000000..45eddeca352 --- /dev/null +++ b/bigquery/getting_started.js @@ -0,0 +1,79 @@ +// Copyright 2016, Google, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// [START complete] +'use strict'; + +// [START auth] +// You must set the GOOGLE_APPLICATION_CREDENTIALS and GCLOUD_PROJECT +// environment variables to run this sample +var projectId = process.env.GCLOUD_PROJECT; + +// Initialize gcloud +var gcloud = require('gcloud')({ + projectId: projectId +}); + +// Get a reference to the bigquery component +var bigquery = gcloud.bigquery(); +// [END auth] + +// [START print] +function printExample(rows) { + console.log('Query Results:'); + rows.forEach(function (row) { + var str = ''; + for (var key in row) { + if (str) { + str += '\t'; + } + str += key + ': ' + row[key]; + } + console.log(str); + }); +} +// [END print] + +// [START query] +/** + * Run an example query. + * + * @param {Function} callback Callback function. + */ +function queryExample(callback) { + var query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words\n' + + 'FROM [publicdata:samples.shakespeare];'; + + bigquery.query(query, function(err, rows) { + if (err) { + return callback(err); + } + + printExample(rows); + callback(null, rows); + }); +} +// [END query] + +// [END complete] + +// Run the examples +exports.main = function (cb) { + queryExample(cb); +}; + +if (module === require.main) { + exports.main( + console.log + ); +} diff --git a/bigquery/load_data_from_csv.js b/bigquery/load_data_from_csv.js new file mode 100644 index 00000000000..40ac777a968 --- /dev/null +++ b/bigquery/load_data_from_csv.js @@ -0,0 +1,134 @@ +// Copyright 2016, Google, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// [START complete] +'use strict'; + +var fs = require('fs'); + +// You must set the GOOGLE_APPLICATION_CREDENTIALS and GCLOUD_PROJECT +// environment variables to run this sample +var projectId = process.env.GCLOUD_PROJECT; + +// Initialize gcloud +var gcloud = require('gcloud')({ + projectId: projectId +}); + +// Get a reference to the bigquery component +var bigquery = gcloud.bigquery(); + +/** + * Wait for the provided job to complete. + * + * @param {Object} job The job to watch. + * @param {number} timeout Maximum time to wait (milliseconds). + * @param {Function} Callback function. + */ +function pollJobUntilDone(job, timeout, timeWaited, callback) { + job.getMetadata(function (err, metadata) { + if (err) { + return callback(err); + } + if (timeWaited > timeout) { + return callback(new Error('Timed out waiting for job to complete')); + } + if (metadata.status && (metadata.status.state === 'RUNNING' || + metadata.status.state === 'PENDING')) { + setTimeout(function () { + console.log('working...'); + pollJobUntilDone(job, timeout, timeWaited + 5000, callback); + }, 5000); + } else { + callback(null, metadata); + } + }); +} + +/** + * Load a csv file into a BigQuery table. + * + * @param {string} pathToCsvFile Path to csv file to load. + * @param {string} datasetId The dataset. + * @param {string} tableName The table. + * @param {Function} callback Callback function. + */ +function loadDataFromCsvExample(pathToCsvFile, datasetId, tableName, callback) { + + if (!pathToCsvFile || typeof pathToCsvFile !== 'string') { + return callback(new Error('pathToCsvFile is required!')); + } + if (!datasetId || typeof pathToCsvFile !== 'string') { + return callback(new Error('datasetId is require!')); + } + if (!tableName || typeof pathToCsvFile !== 'string') { + return callback(new Error('tableName is require!')); + } + + var dataset = bigquery.dataset(datasetId); + var table = dataset.table(tableName); + + var options = { + skipLeadingRows: 0 + }; + + fs.createReadStream(pathToCsvFile) + .pipe(table.createWriteStream(options)) + .on('complete', function (job) { + // Wait up to 20 seconds for job to complete + pollJobUntilDone(job, 60000, 0, function (err, metadata) { + if (err) { + return callback(err); + } + console.log('job completed', metadata); + callback(null, metadata); + }); + }); +} +// [END complete] + +exports.createTable = function (datasetId, tableName, callback) { + var dataset = bigquery.dataset(datasetId); + var pathToSchemaFile = __dirname + '/resources/schema.json'; + fs.readFile(pathToSchemaFile, { encoding: 'utf8' }, function (err, file) { + if (err) { + return callback(err); + } + var schema = JSON.parse(file); + var columns = schema.map(function (column) { + return column.name + ':' + column.type; + }); + dataset.createTable(tableName, { schema: columns.join(',') }, callback); + }); +}; + +exports.deleteTable = function (datasetId, tableName, callback) { + var dataset = bigquery.dataset(datasetId); + var table = dataset.table(tableName); + table.delete(callback); +}; + +// Run the examples +exports.main = function (pathToCsvFile, datasetId, tableName, cb) { + loadDataFromCsvExample(pathToCsvFile, datasetId, tableName, cb); +}; + +if (module === require.main) { + var args = process.argv.slice(2); + exports.main( + args[0], + args[1], + args[2], + console.log + ); +} diff --git a/bigquery/package.json b/bigquery/package.json new file mode 100644 index 00000000000..470c572e8a3 --- /dev/null +++ b/bigquery/package.json @@ -0,0 +1,18 @@ +{ + "name": "nodejs-docs-samples-bigquery", + "description": "Node.js samples for Google BigQuery.", + "version": "0.0.1", + "private": true, + "license": "Apache Version 2.0", + "author": "Google Inc.", + "engines": { + "node": ">=0.10.x" + }, + "scripts": { + "dataset_size": "node dataset_size.js" + }, + "dependencies": { + "async": "^1.5.2", + "gcloud": "^0.29.0" + } +} diff --git a/bigquery/resources/data.csv b/bigquery/resources/data.csv new file mode 100644 index 00000000000..20e1a5d6cbf --- /dev/null +++ b/bigquery/resources/data.csv @@ -0,0 +1 @@ +Gandalf, 2000, 140.0, 1 \ No newline at end of file diff --git a/bigquery/resources/data.json b/bigquery/resources/data.json new file mode 100644 index 00000000000..cecbbf15826 --- /dev/null +++ b/bigquery/resources/data.json @@ -0,0 +1 @@ +{"Name":"Gandalf","Age":2000,"Weight":140.0,"IsMagic":true} \ No newline at end of file diff --git a/bigquery/resources/schema.json b/bigquery/resources/schema.json new file mode 100644 index 00000000000..68876fb6044 --- /dev/null +++ b/bigquery/resources/schema.json @@ -0,0 +1 @@ +[{"type":"STRING","name":"Name"},{"type":"INTEGER","name":"Age"},{"type":"FLOAT","name":"Weight"},{"type":"BOOLEAN","name":"IsMagic"}] \ No newline at end of file diff --git a/bigquery/resources/streamrows.json b/bigquery/resources/streamrows.json new file mode 100644 index 00000000000..51b111272d6 --- /dev/null +++ b/bigquery/resources/streamrows.json @@ -0,0 +1,7 @@ +[ + {"Name":"test","Age":0,"Weight":100.0,"IsMagic":false}, + {"Name":"test","Age":1,"Weight":100.0,"IsMagic":false}, + {"Name":"test","Age":2,"Weight":100.0,"IsMagic":false}, + {"Name":"test","Age":3,"Weight":100.0,"IsMagic":false}, + {"Name":"test","Age":0,"Weight":100.0,"IsMagic":false} +] \ No newline at end of file diff --git a/package.json b/package.json index d178f3bd254..f49f56d7689 100644 --- a/package.json +++ b/package.json @@ -26,10 +26,12 @@ "scripts": { "jshint": "jshint --exclude-path=.jshintignore .", "deps_appengine": "ava --match='*: dependencies should install*'", - "ava": "npm run deps_appengine && ava --match='!*: dependencies should install*'", + "ava": "ava --match='!*: dependencies should install*'", + "ava:deps": "npm run deps_appengine && npm run ava", "cover": "npm run deps_appengine && nyc ava --match='!*: dependencies should install*'", "report": "nyc report --reporter=text-lcov | ./node_modules/.bin/coveralls", "report-html": "nyc report --reporter=html", + "deps_bigquery": "cd bigquery; npm i; cd ../", "deps_datastore": "cd datastore; npm i; cd ../", "deps_pubsub": "cd pubsub; npm i; cd ../", "deps_monitoring": "cd monitoring; npm i; cd ../", @@ -39,7 +41,7 @@ "deps_functions": "cd functions/uuid; npm i; cd ../..", "deps_sendgrid": "cd computeengine/sendgrid; npm i; cd ../..", "pretest_geddy": "cd appengine/geddy; npm i geddy; GEDDY_SECRET=config/secrets.json; [[ -f $GEDDY_SECRET ]] || echo '{}' > $GEDDY_SECRET && node node_modules/.bin/geddy gen secret; cd ../..;", - "pretest": "npm run deps_datastore; npm run deps_monitoring; npm run deps_storage; npm run deps_pubsub; npm run deps_prediction; npm run deps_logging; npm run deps_functions; npm run deps_sendgrid; npm run pretest_geddy", + "pretest": "npm run deps_bigquery; npm run deps_datastore; npm run deps_monitoring; npm run deps_storage; npm run deps_pubsub; npm run deps_prediction; npm run deps_logging; npm run deps_functions; npm run deps_sendgrid; npm run pretest_geddy", "test": "npm run jshint && npm run cover" }, "ava": { diff --git a/pubsub/subscription.js b/pubsub/subscription.js index e63654859e2..f0f31e0c322 100644 --- a/pubsub/subscription.js +++ b/pubsub/subscription.js @@ -20,7 +20,6 @@ var async = require('async'); // environment variables to run this sample var projectId = process.env.GCLOUD_PROJECT; -// [START require] // Initialize gcloud var gcloud = require('gcloud')({ projectId: projectId diff --git a/test/bigquery/dataset_size.test.js b/test/bigquery/dataset_size.test.js new file mode 100644 index 00000000000..9e2c3ef5e18 --- /dev/null +++ b/test/bigquery/dataset_size.test.js @@ -0,0 +1,30 @@ +// Copyright 2016, Google, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +var test = require('ava'); +var datasetSizeExample = require('../../bigquery/dataset_size'); + +test.cb('should return the size of a dataset', function (t) { + datasetSizeExample.main( + 'bigquery-public-data', + 'hacker_news', + function (err, size) { + t.ifError(err); + t.is(typeof size, 'string'); + t.ok(size.indexOf(' GB') === size.length - 3); + t.end(); + } + ); +}); diff --git a/test/bigquery/getting_started.test.js b/test/bigquery/getting_started.test.js new file mode 100644 index 00000000000..48387b2c07a --- /dev/null +++ b/test/bigquery/getting_started.test.js @@ -0,0 +1,28 @@ +// Copyright 2016, Google, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +var test = require('ava'); +var gettingStartedExample = require('../../bigquery/getting_started'); + +test.cb('should run a query', function (t) { + gettingStartedExample.main( + function (err, rows) { + t.ifError(err); + t.ok(Array.isArray(rows)); + t.is(rows.length, 10); + t.end(); + } + ); +}); diff --git a/test/bigquery/load_data_from_csv.test.js b/test/bigquery/load_data_from_csv.test.js new file mode 100644 index 00000000000..c0a755f9b99 --- /dev/null +++ b/test/bigquery/load_data_from_csv.test.js @@ -0,0 +1,47 @@ +// Copyright 2016, Google, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +var async = require('async'); +var test = require('ava'); +var loadDataFromCsvExample = require('../../bigquery/load_data_from_csv'); + +test.cb('should load data from a csv file', function (t) { + var pathToCsvFile = __dirname + '/../../bigquery/resources/data.csv'; + var datasetId = 'nodejs_docs_samples'; + var tableName = 'test_' + new Date().getTime(); + + async.series([ + function (cb) { + loadDataFromCsvExample.createTable(datasetId, tableName, cb); + }, + function (cb) { + loadDataFromCsvExample.main(pathToCsvFile, datasetId, tableName, cb); + }, + function (cb) { + loadDataFromCsvExample.deleteTable(datasetId, tableName, cb); + } + ], function (err, results) { + if (err) { + loadDataFromCsvExample.deleteTable(datasetId, tableName, function () { + t.end(err); + }); + } else { + t.ifError(err); + // metadata + t.is(results[1].status.state, 'DONE'); + t.end(); + } + }); +});