Skip to content

Commit 877ef00

Browse files
nirupa-kumarAce Nassri
authored and
Ace Nassri
committed
Speech GA – auto_punctuation & model selection (#179)
* Speech-GA : speech_transcribe_auto_punctuation speech_transcribe_enhanced_model speech_transcribe_model_selection * Speech-GA : Fixing review issues.
1 parent d6d43d9 commit 877ef00

File tree

4 files changed

+330
-329
lines changed

4 files changed

+330
-329
lines changed

speech/recognize.js

+289-1
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,237 @@ function streamingMicRecognize(encoding, sampleRateHertz, languageCode) {
453453
// [END speech_transcribe_streaming_mic]
454454
}
455455

456-
require(`yargs`)
456+
function syncRecognizeModelSelection(
457+
filename,
458+
model,
459+
encoding,
460+
sampleRateHertz,
461+
languageCode
462+
) {
463+
// [START speech_transcribe_model_selection]
464+
// Imports the Google Cloud client library for Beta API
465+
/**
466+
* TODO(developer): Update client library import to use new
467+
* version of API when desired features become available
468+
*/
469+
const speech = require('@google-cloud/speech').v1p1beta1;
470+
const fs = require('fs');
471+
472+
// Creates a client
473+
const client = new speech.SpeechClient();
474+
475+
/**
476+
* TODO(developer): Uncomment the following lines before running the sample.
477+
*/
478+
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
479+
// const model = 'Model to use, e.g. phone_call, video, default';
480+
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
481+
// const sampleRateHertz = 16000;
482+
// const languageCode = 'BCP-47 language code, e.g. en-US';
483+
484+
const config = {
485+
encoding: encoding,
486+
sampleRateHertz: sampleRateHertz,
487+
languageCode: languageCode,
488+
model: model,
489+
};
490+
const audio = {
491+
content: fs.readFileSync(filename).toString('base64'),
492+
};
493+
494+
const request = {
495+
config: config,
496+
audio: audio,
497+
};
498+
499+
// Detects speech in the audio file
500+
client
501+
.recognize(request)
502+
.then(data => {
503+
const response = data[0];
504+
const transcription = response.results
505+
.map(result => result.alternatives[0].transcript)
506+
.join('\n');
507+
console.log(`Transcription: `, transcription);
508+
})
509+
.catch(err => {
510+
console.error('ERROR:', err);
511+
});
512+
// [END speech_transcribe_model_selection]
513+
}
514+
515+
function syncRecognizeModelSelectionGCS(
516+
gcsUri,
517+
model,
518+
encoding,
519+
sampleRateHertz,
520+
languageCode
521+
) {
522+
// [START speech_transcribe_model_selection_gcs]
523+
// Imports the Google Cloud client library for Beta API
524+
/**
525+
* TODO(developer): Update client library import to use new
526+
* version of API when desired features become available
527+
*/
528+
const speech = require('@google-cloud/speech').v1p1beta1;
529+
530+
// Creates a client
531+
const client = new speech.SpeechClient();
532+
533+
/**
534+
* TODO(developer): Uncomment the following lines before running the sample.
535+
*/
536+
// const gcsUri = 'gs://my-bucket/audio.raw';
537+
// const model = 'Model to use, e.g. phone_call, video, default';
538+
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
539+
// const sampleRateHertz = 16000;
540+
// const languageCode = 'BCP-47 language code, e.g. en-US';
541+
542+
const config = {
543+
encoding: encoding,
544+
sampleRateHertz: sampleRateHertz,
545+
languageCode: languageCode,
546+
model: model,
547+
};
548+
const audio = {
549+
uri: gcsUri,
550+
};
551+
552+
const request = {
553+
config: config,
554+
audio: audio,
555+
};
556+
557+
// Detects speech in the audio file
558+
client
559+
.recognize(request)
560+
.then(data => {
561+
const response = data[0];
562+
const transcription = response.results
563+
.map(result => result.alternatives[0].transcript)
564+
.join('\n');
565+
console.log(`Transcription: `, transcription);
566+
})
567+
.catch(err => {
568+
console.error('ERROR:', err);
569+
});
570+
// [END speech_transcribe_model_selection_gcs]
571+
}
572+
573+
function syncRecognizeWithAutoPunctuation(
574+
filename,
575+
encoding,
576+
sampleRateHertz,
577+
languageCode
578+
) {
579+
// [START speech_transcribe_auto_punctuation]
580+
// Imports the Google Cloud client library for Beta API
581+
/**
582+
* TODO(developer): Update client library import to use new
583+
* version of API when desired features become available
584+
*/
585+
const speech = require('@google-cloud/speech').v1p1beta1;
586+
const fs = require('fs');
587+
588+
// Creates a client
589+
const client = new speech.SpeechClient();
590+
591+
/**
592+
* TODO(developer): Uncomment the following lines before running the sample.
593+
*/
594+
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
595+
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
596+
// const sampleRateHertz = 16000;
597+
// const languageCode = 'BCP-47 language code, e.g. en-US';
598+
599+
const config = {
600+
encoding: encoding,
601+
languageCode: languageCode,
602+
enableAutomaticPunctuation: true,
603+
};
604+
const audio = {
605+
content: fs.readFileSync(filename).toString('base64'),
606+
};
607+
608+
const request = {
609+
config: config,
610+
audio: audio,
611+
};
612+
613+
// Detects speech in the audio file
614+
client
615+
.recognize(request)
616+
.then(data => {
617+
const response = data[0];
618+
const transcription = response.results
619+
.map(result => result.alternatives[0].transcript)
620+
.join('\n');
621+
console.log(`Transcription: `, transcription);
622+
})
623+
.catch(err => {
624+
console.error('ERROR:', err);
625+
});
626+
// [END speech_transcribe_auto_punctuation]
627+
}
628+
629+
function syncRecognizeWithEnhancedModel(
630+
filename,
631+
encoding,
632+
sampleRateHertz,
633+
languageCode
634+
) {
635+
// [START speech_transcribe_enhanced_model]
636+
// Imports the Google Cloud client library for Beta API
637+
/**
638+
* TODO(developer): Update client library import to use new
639+
* version of API when desired features become available
640+
*/
641+
const speech = require('@google-cloud/speech').v1p1beta1;
642+
const fs = require('fs');
643+
644+
// Creates a client
645+
const client = new speech.SpeechClient();
646+
647+
/**
648+
* TODO(developer): Uncomment the following lines before running the sample.
649+
*/
650+
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
651+
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
652+
// const sampleRateHertz = 16000;
653+
// const languageCode = 'BCP-47 language code, e.g. en-US';
654+
655+
const config = {
656+
encoding: encoding,
657+
languageCode: languageCode,
658+
useEnhanced: true,
659+
model: 'phone_call',
660+
};
661+
const audio = {
662+
content: fs.readFileSync(filename).toString('base64'),
663+
};
664+
665+
const request = {
666+
config: config,
667+
audio: audio,
668+
};
669+
670+
// Detects speech in the audio file
671+
client
672+
.recognize(request)
673+
.then(data => {
674+
const response = data[0];
675+
response.results.forEach(result => {
676+
const alternative = result.alternatives[0];
677+
console.log(alternative.transcript);
678+
});
679+
})
680+
.catch(err => {
681+
console.error('ERROR:', err);
682+
});
683+
// [END speech_transcribe_enhanced_model]
684+
}
685+
686+
require(`yargs`) // eslint-disable-line
457687
.demand(1)
458688
.command(
459689
`sync <filename>`,
@@ -550,6 +780,56 @@ require(`yargs`)
550780
opts.languageCode
551781
)
552782
)
783+
.command(
784+
`sync-model <filename> <model>`,
785+
`Detects speech in a local audio file using provided model.`,
786+
{},
787+
opts =>
788+
syncRecognizeModelSelection(
789+
opts.filename,
790+
opts.model,
791+
opts.encoding,
792+
opts.sampleRateHertz,
793+
opts.languageCode
794+
)
795+
)
796+
.command(
797+
`sync-model-gcs <gcsUri> <model>`,
798+
`Detects speech in an audio file located in a Google Cloud Storage bucket using provided model.`,
799+
{},
800+
opts =>
801+
syncRecognizeModelSelectionGCS(
802+
opts.gcsUri,
803+
opts.model,
804+
opts.encoding,
805+
opts.sampleRateHertz,
806+
opts.languageCode
807+
)
808+
)
809+
.command(
810+
`sync-auto-punctuation <filename>`,
811+
`Detects speech in a local audio file with auto punctuation.`,
812+
{},
813+
opts =>
814+
syncRecognizeWithAutoPunctuation(
815+
opts.filename,
816+
opts.encoding,
817+
opts.sampleRateHertz,
818+
opts.languageCode
819+
)
820+
)
821+
.command(
822+
`sync-enhanced-model <filename>`,
823+
`Detects speech in a local audio file using an enhanced model.`,
824+
{},
825+
opts =>
826+
syncRecognizeWithEnhancedModel(
827+
opts.filename,
828+
opts.encoding,
829+
opts.sampleRateHertz,
830+
opts.languageCode
831+
)
832+
)
553833
.options({
554834
encoding: {
555835
alias: 'e',
@@ -577,6 +857,14 @@ require(`yargs`)
577857
.example(`node $0 async-gcs gs://gcs-test-data/vr.flac -e FLAC -r 16000`)
578858
.example(`node $0 stream ./resources/audio.raw -e LINEAR16 -r 16000`)
579859
.example(`node $0 listen`)
860+
.example(
861+
`node $0 sync-model ./resources/Google_Gnome.wav video -e LINEAR16 -r 16000`
862+
)
863+
.example(
864+
`node $0 sync-model-gcs gs://gcs-test-data/Google_Gnome.wav phone_call -e LINEAR16 -r 16000`
865+
)
866+
.example(`node $0 sync-auto-punctuation ./resources/commercial_mono.wav`)
867+
.example(`node $0 sync-enhanced-model ./resources/commercial_mono.wav`)
580868
.wrap(120)
581869
.recommendCommands()
582870
.epilogue(`For more information, see https://cloud.google.com/speech/docs`)

0 commit comments

Comments
 (0)