Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

functions/ocr: Node 8 + system tests upgrade #1388

Merged
merged 2 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .kokoro/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ export SENDGRID_API_KEY=$(cat $KOKORO_GFILE_DIR/secrets-sendgrid-api-key.txt)
export FUNCTIONS_TOPIC=integration-tests-instance
export FUNCTIONS_BUCKET=$GCLOUD_PROJECT
export OUTPUT_BUCKET=$FUNCTIONS_BUCKET

# functions/translate
export SUPPORTED_LANGUAGE_CODES="en,es"
export TRANSLATE_TOPIC=$FUNCTIONS_TOPIC
export RESULT_TOPIC=$FUNCTIONS_TOPIC
export RESULT_BUCKET=$FUNCTIONS_BUCKET

# Configure IoT variables
export NODEJS_IOT_EC_PUBLIC_KEY=${KOKORO_GFILE_DIR}/ec_public.pem
Expand Down
275 changes: 126 additions & 149 deletions functions/ocr/app/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2016, Google, Inc.
* Copyright 2019, Google LLC.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
Expand All @@ -16,7 +16,12 @@
'use strict';

// [START functions_ocr_setup]
const config = require('./config.json');
const config = require('nconf')
.env()
.file('./config.json')
.defaults({
TO_LANG: ['en', 'es'],
});

// Get a reference to the Pub/Sub component
const {PubSub} = require('@google-cloud/pubsub');
Expand All @@ -43,14 +48,12 @@ const {Buffer} = require('safe-buffer');
* @param {string} topicName Name of the topic on which to publish.
* @param {object} data The message data to publish.
*/
function publishResult(topicName, data) {
const publishResult = async (topicName, data) => {
const dataBuffer = Buffer.from(JSON.stringify(data));

return pubsub
.topic(topicName)
.get({autoCreate: true})
.then(([topic]) => topic.publish(dataBuffer));
}
const [topic] = await pubsub.topic(topicName).get({autoCreate: true});
topic.publish(dataBuffer);
};
// [END functions_ocr_publish]

// [START functions_ocr_detect]
Expand All @@ -61,42 +64,38 @@ function publishResult(topicName, data) {
* @param {string} filename Cloud Storage file name.
* @returns {Promise}
*/
function detectText(bucketName, filename) {
let text;

const detectText = async (bucketName, filename) => {
console.log(`Looking for text in image ${filename}`);
return vision
.textDetection(`gs://${bucketName}/${filename}`)
.then(([detections]) => {
const annotation = detections.textAnnotations[0];
text = annotation ? annotation.description : '';
console.log(`Extracted text from image (${text.length} chars)`);
return translate.detect(text);
})
.then(([detection]) => {
if (Array.isArray(detection)) {
detection = detection[0];
}
console.log(`Detected language "${detection.language}" for ${filename}`);

// Submit a message to the bus for each language we're going to translate to
const tasks = config.TO_LANG.map(lang => {
let topicName = config.TRANSLATE_TOPIC;
if (detection.language === lang) {
topicName = config.RESULT_TOPIC;
}
const messageData = {
text: text,
filename: filename,
lang: lang,
};

return publishResult(topicName, messageData);
});

return Promise.all(tasks);
});
}
const [textDetections] = await vision.textDetection(
`gs://${bucketName}/${filename}`
);
const [annotation] = textDetections.textAnnotations;
const text = annotation ? annotation.description : '';
console.log(`Extracted text from image:`, text);

let [translateDetection] = await translate.detect(text);
if (Array.isArray(translateDetection)) {
[translateDetection] = translateDetection;
}
console.log(
`Detected language "${translateDetection.language}" for ${filename}`
);

// Submit a message to the bus for each language we're going to translate to
const topicName = config.get('TRANSLATE_TOPIC');

const tasks = config.get('TO_LANG').map(lang => {
const messageData = {
text: text,
filename: filename,
lang: lang,
};

return publishResult(topicName, messageData);
});

return Promise.all(tasks);
};
// [END functions_ocr_detect]

// [START functions_ocr_rename]
Expand All @@ -107,9 +106,9 @@ function detectText(bucketName, filename) {
* @param {string} lang Language to append.
* @returns {string} The new filename.
*/
function renameImageForSave(filename, lang) {
const renameImageForSave = (filename, lang) => {
return `${filename}_to_${lang}.txt`;
}
};
// [END functions_ocr_rename]

// [START functions_ocr_process]
Expand All @@ -118,35 +117,24 @@ function renameImageForSave(filename, lang) {
* a file is uploaded to the Cloud Storage bucket you created
* for uploading images.
*
* @param {object} event.data (Node 6) A Google Cloud Storage File object.
* @param {object} event (Node 8+) A Google Cloud Storage File object.
* @param {object} event A Google Cloud Storage File object.
*/
exports.processImage = event => {
const file = event.data || event;

return Promise.resolve()
.then(() => {
if (file.resourceState === 'not_exists') {
// This was a deletion event, we don't want to process this
return;
}

if (!file.bucket) {
throw new Error(
'Bucket not provided. Make sure you have a "bucket" property in your request'
);
}
if (!file.name) {
throw new Error(
'Filename not provided. Make sure you have a "name" property in your request'
);
}

return detectText(file.bucket, file.name);
})
.then(() => {
console.log(`File ${file.name} processed.`);
});
exports.processImage = async event => {
const {bucket, name} = event;

if (!bucket) {
throw new Error(
'Bucket not provided. Make sure you have a "bucket" property in your request'
);
}
if (!name) {
throw new Error(
'Filename not provided. Make sure you have a "name" property in your request'
);
}

await detectText(bucket, name);
console.log(`File ${name} processed.`);
};
// [END functions_ocr_process]

Expand All @@ -157,49 +145,44 @@ exports.processImage = event => {
* by the TRANSLATE_TOPIC value in the config.json file. The
* function translates text using the Google Translate API.
*
* @param {object} event.data (Node 6) The Cloud Pub/Sub Message object.
* @param {object} event (Node 8+) The Cloud Pub/Sub Message object.
* @param {object} event The Cloud Pub/Sub Message object.
* @param {string} {messageObject}.data The "data" property of the Cloud Pub/Sub
* Message. This property will be a base64-encoded string that you must decode.
*/
exports.translateText = event => {
const pubsubData = event.data.data || event.data;
exports.translateText = async event => {
const pubsubData = event.data;
const jsonStr = Buffer.from(pubsubData, 'base64').toString();
const payload = JSON.parse(jsonStr);

return Promise.resolve()
.then(() => {
if (!payload.text) {
throw new Error(
'Text not provided. Make sure you have a "text" property in your request'
);
}
if (!payload.filename) {
throw new Error(
'Filename not provided. Make sure you have a "filename" property in your request'
);
}
if (!payload.lang) {
throw new Error(
'Language not provided. Make sure you have a "lang" property in your request'
);
}

console.log(`Translating text into ${payload.lang}`);
return translate.translate(payload.text, payload.lang);
})
.then(([translation]) => {
const messageData = {
text: translation,
filename: payload.filename,
lang: payload.lang,
};

return publishResult(config.RESULT_TOPIC, messageData);
})
.then(() => {
console.log(`Text translated to ${payload.lang}`);
});
const {text, filename, lang} = JSON.parse(jsonStr);

if (!text) {
throw new Error(
'Text not provided. Make sure you have a "text" property in your request'
);
}
if (!filename) {
throw new Error(
'Filename not provided. Make sure you have a "filename" property in your request'
);
}
if (!lang) {
throw new Error(
'Language not provided. Make sure you have a "lang" property in your request'
);
}

console.log(`Translating text into ${lang}`);
const [translation] = await translate.translate(text, lang);

console.log(`Translated text:`, translation);

const messageData = {
text: translation,
filename: filename,
lang: lang,
};

await publishResult(config.get('RESULT_TOPIC'), messageData);
console.log(`Text translated to ${lang}`);
};
// [END functions_ocr_translate]

Expand All @@ -210,46 +193,40 @@ exports.translateText = event => {
* by the RESULT_TOPIC value in the config.json file. The
* function saves the data packet to a file in GCS.
*
* @param {object} event.data (Node 6) The Cloud Pub/Sub Message object.
* @param {object} event (Node 8+) The Cloud Pub/Sub Message object.
* @param {object} event The Cloud Pub/Sub Message object.
* @param {string} {messageObject}.data The "data" property of the Cloud Pub/Sub
* Message. This property will be a base64-encoded string that you must decode.
*/
exports.saveResult = event => {
const pubsubData = event.data.data || event.data;
exports.saveResult = async event => {
const pubsubData = event.data;
const jsonStr = Buffer.from(pubsubData, 'base64').toString();
const payload = JSON.parse(jsonStr);

return Promise.resolve()
.then(() => {
if (!payload.text) {
throw new Error(
'Text not provided. Make sure you have a "text" property in your request'
);
}
if (!payload.filename) {
throw new Error(
'Filename not provided. Make sure you have a "filename" property in your request'
);
}
if (!payload.lang) {
throw new Error(
'Language not provided. Make sure you have a "lang" property in your request'
);
}

console.log(`Received request to save file ${payload.filename}`);

const bucketName = config.RESULT_BUCKET;
const filename = renameImageForSave(payload.filename, payload.lang);
const file = storage.bucket(bucketName).file(filename);

console.log(`Saving result to ${filename} in bucket ${bucketName}`);

return file.save(payload.text);
})
.then(() => {
console.log(`File saved.`);
});
const {text, filename, lang} = JSON.parse(jsonStr);

if (!text) {
throw new Error(
'Text not provided. Make sure you have a "text" property in your request'
);
}
if (!filename) {
throw new Error(
'Filename not provided. Make sure you have a "filename" property in your request'
);
}
if (!lang) {
throw new Error(
'Language not provided. Make sure you have a "lang" property in your request'
);
}

console.log(`Received request to save file ${filename}`);

const bucketName = config.get('RESULT_BUCKET');
const newFilename = renameImageForSave(filename, lang);
const file = storage.bucket(bucketName).file(newFilename);

console.log(`Saving result to ${newFilename} in bucket ${bucketName}`);

await file.save(text);
console.log(`File saved.`);
};
// [END functions_ocr_save]
15 changes: 10 additions & 5 deletions functions/ocr/app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,21 @@
"@google-cloud/storage": "^2.3.3",
"@google-cloud/translate": "^3.0.0",
"@google-cloud/vision": "^0.25.0",
"nconf": "^0.10.0",
"safe-buffer": "^5.1.2"
},
"devDependencies": {
"@google-cloud/nodejs-repo-tools": "^3.3.0",
"mocha": "^6.0.0",
"proxyquire": "^2.1.0",
"sinon": "^7.2.7"
"@google-cloud/nodejs-repo-tools": "^3.3.0",
"mocha": "^6.0.0"
},
"cloud-repo-tools": {
"requiresKeyFile": true,
"requiresProjectId": true
"requiresProjectId": true,
"requiredEnvVars": [
"FUNCTIONS_BUCKET",
"OUTPUT_BUCKET",
"TRANSLATE_TOPIC",
"RESULT_TOPIC"
]
}
}
Loading