Skip to content

Commit d1721c3

Browse files
bradmiroAce Nassri
authored and
Ace Nassri
committed
feat(samples): bump auto-punctuation sample to v1 and move diarization to v1 (#463)
* bumped diarization and autocapitalization samples to v1 * Remove extra commited file * fixed linting error
1 parent 48d06d2 commit d1721c3

File tree

2 files changed

+75
-2
lines changed

2 files changed

+75
-2
lines changed

speech/recognize.js

+70-2
Original file line numberDiff line numberDiff line change
@@ -544,12 +544,12 @@ async function syncRecognizeWithAutoPunctuation(
544544
languageCode
545545
) {
546546
// [START speech_transcribe_auto_punctuation]
547-
// Imports the Google Cloud client library for Beta API
547+
// Imports the Google Cloud client library for API
548548
/**
549549
* TODO(developer): Update client library import to use new
550550
* version of API when desired features become available
551551
*/
552-
const speech = require('@google-cloud/speech').v1p1beta1;
552+
const speech = require('@google-cloud/speech');
553553
const fs = require('fs');
554554

555555
// Creates a client
@@ -712,6 +712,62 @@ async function syncRecognizeWithMultiChannelGCS(gcsUri) {
712712
// [END speech_transcribe_multichannel_gcs]
713713
}
714714

715+
async function speechTranscribeDiarization(fileName) {
716+
// [START speech_transcribe_diarization]
717+
const fs = require('fs');
718+
719+
// Imports the Google Cloud client library
720+
const speech = require('@google-cloud/speech');
721+
722+
// Creates a client
723+
const client = new speech.SpeechClient();
724+
725+
// Set config for Diarization
726+
const diarizationConfig = {
727+
enableSpeakerDiarization: true,
728+
maxSpeakerCount: 2,
729+
};
730+
731+
const config = {
732+
encoding: `LINEAR16`,
733+
sampleRateHertz: 8000,
734+
languageCode: `en-US`,
735+
diarizationConfig: diarizationConfig,
736+
model: `phone_call`,
737+
};
738+
739+
/**
740+
* TODO(developer): Uncomment the following lines before running the sample.
741+
*/
742+
// const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
743+
744+
const audio = {
745+
content: fs.readFileSync(fileName).toString('base64'),
746+
};
747+
748+
const request = {
749+
config: config,
750+
audio: audio,
751+
};
752+
753+
const [response] = await client.recognize(request);
754+
const transcription = response.results
755+
.map(result => result.alternatives[0].transcript)
756+
.join('\n');
757+
console.log(`Transcription: ${transcription}`);
758+
console.log(`Speaker Diarization:`);
759+
const result = response.results[response.results.length - 1];
760+
const wordsInfo = result.alternatives[0].words;
761+
// Note: The transcript within each result is separate and sequential per result.
762+
// However, the words list within an alternative includes all the words
763+
// from all the results thus far. Thus, to get all the words with speaker
764+
// tags, you only have to take the words list from the last result:
765+
wordsInfo.forEach(a =>
766+
console.log(` word: ${a.word}, speakerTag: ${a.speakerTag}`)
767+
);
768+
// [END speech_transcribe_diarization]
769+
}
770+
715771
require(`yargs`) // eslint-disable-line
716772
.demand(1)
717773
.command(
@@ -883,6 +939,12 @@ require(`yargs`) // eslint-disable-line
883939
opts.languageCode
884940
)
885941
)
942+
.command(
943+
`Diarization`,
944+
`Isolate distinct speakers in an audio file`,
945+
{},
946+
opts => speechTranscribeDiarization(opts.speechFile)
947+
)
886948
.options({
887949
encoding: {
888950
alias: 'e',
@@ -905,6 +967,12 @@ require(`yargs`) // eslint-disable-line
905967
requiresArg: true,
906968
type: 'string',
907969
},
970+
speechFile: {
971+
alias: 'f',
972+
global: true,
973+
requiresArg: false,
974+
type: 'string',
975+
},
908976
})
909977
.example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`)
910978
.example(`node $0 async-gcs gs://gcs-test-data/vr.flac -e FLAC -r 16000`)

speech/system-test/recognize.test.js

+5
Original file line numberDiff line numberDiff line change
@@ -132,4 +132,9 @@ describe('Recognize', () => {
132132
);
133133
assert.match(output, /Channel Tag: 2/);
134134
});
135+
136+
it('should run speech diarization on a local file', async () => {
137+
const output = execSync(`${cmd} Diarization -f ${filepath2}`);
138+
assert.match(output, /speakerTag:/);
139+
});
135140
});

0 commit comments

Comments
 (0)