Skip to content

Commit

Permalink
Created a sample code to demo how to scan a pdf file (#48)
Browse files Browse the repository at this point in the history
* Created a sample code to demo how to scan a pdf file

* Applied prettier

* Made changes per Ace’s comments

* Made changed related to PR comments

* Made changes based on Ace’s comments.
  • Loading branch information
happyhuman authored Apr 3, 2018
1 parent 95d2e92 commit b5d4480
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 0 deletions.
98 changes: 98 additions & 0 deletions vision/samples/detect.v1p2beta1.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Copyright 2018, Google, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

function detectPdfText(bucketName, fileName) {
// [START vision_async_detect_document_ocr]

// Imports the Google Cloud client libraries
const vision = require('@google-cloud/vision').v1p2beta1;

// Creates a client
const client = new vision.ImageAnnotatorClient();

/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// Bucket where the file resides
// const bucketName = 'my-bucket';
// Path to PDF file within bucket
// const fileName = 'path/to/document.pdf';

const gcsSourceUri = `gs://${bucketName}/${fileName}`;
const gcsDestinationUri = `gs://${bucketName}/${fileName}.json`;

const inputConfig = {
// Supported mime_types are: 'application/pdf' and 'image/tiff'
mimeType: 'application/pdf',
gcsSource: {
uri: gcsSourceUri,
},
};
const outputConfig = {
gcsDestination: {
uri: gcsDestinationUri,
},
};
const features = [{type: 'DOCUMENT_TEXT_DETECTION'}];
const request = {
requests: [
{
inputConfig: inputConfig,
features: features,
outputConfig: outputConfig,
},
],
};

client
.asyncBatchAnnotateFiles(request)
.then(results => {
console.log(results);
const operation = results[0];
// Get a Promise representation of the final result of the job
operation
.promise()
.then(filesResponse => {
let destinationUri =
filesResponse[0].responses[0].outputConfig.gcsDestination.uri;
console.log('Json saved to: ' + destinationUri);
})
.catch(function(error) {
console.log(error);
});
})
.catch(function(error) {
console.log(error);
});
// [END vision_async_detect_document_ocr]
}

//.usage('$0 <command> <local-image-file>', 'Cloud Vision Beta API Samples')
require(`yargs`) // eslint-disable-line
.demand(1)
.command(
`pdf <bucketName> <fileName>`,
`Extracts full text from a pdf file`,
{},
opts => detectPdfText(opts.bucketName, opts.fileName)
)
.example(`node $0 pdf my-bucket my-pdf.pdf`)
.wrap(120)
.recommendCommands()
.epilogue(`For more information, see https://cloud.google.com/vision/docs`)
.help()
.strict().argv;
Binary file added vision/samples/resources/pdf-ocr.pdf
Binary file not shown.
56 changes: 56 additions & 0 deletions vision/samples/system-test/detect.v1p2beta1.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Copyright 2017, Google, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

'use strict';

const path = require(`path`);
const storage = require(`@google-cloud/storage`)();
const test = require(`ava`);
const tools = require(`@google-cloud/nodejs-repo-tools`);
const uuid = require(`uuid`);

const bucketName = `nodejs-docs-samples-test-${uuid.v4()}`;
const cmd = `node detect.v1p2beta1.js`;
const cwd = path.join(__dirname, `..`);

const files = [`pdf-ocr.pdf`].map(name => {
return {
name,
localPath: path.resolve(path.join(__dirname, `../resources/${name}`)),
};
});

test.before(tools.checkCredentials);
test.before(async () => {
const [bucket] = await storage.createBucket(bucketName);
await Promise.all(files.map(file => bucket.upload(file.localPath)));
});

test.after.always(async () => {
const bucket = storage.bucket(bucketName);
await bucket.deleteFiles({force: true});
await bucket.deleteFiles({force: true}); // Try a second time...
await bucket.delete();
});

test.before(tools.checkCredentials);

test(`should extract text from pdf file`, async t => {
const output = await tools.runAsync(
`${cmd} pdf ${bucketName} ${files[0].name}`,
cwd
);
t.true(output.includes('sample.pdf.json'));
});

0 comments on commit b5d4480

Please sign in to comment.