@@ -544,12 +544,12 @@ async function syncRecognizeWithAutoPunctuation(
544
544
languageCode
545
545
) {
546
546
// [START speech_transcribe_auto_punctuation]
547
- // Imports the Google Cloud client library for Beta API
547
+ // Imports the Google Cloud client library for API
548
548
/**
549
549
* TODO(developer): Update client library import to use new
550
550
* version of API when desired features become available
551
551
*/
552
- const speech = require ( '@google-cloud/speech' ) . v1p1beta1 ;
552
+ const speech = require ( '@google-cloud/speech' ) ;
553
553
const fs = require ( 'fs' ) ;
554
554
555
555
// Creates a client
@@ -712,6 +712,62 @@ async function syncRecognizeWithMultiChannelGCS(gcsUri) {
712
712
// [END speech_transcribe_multichannel_gcs]
713
713
}
714
714
715
+ async function speechTranscribeDiarization ( fileName ) {
716
+ // [START speech_transcribe_diarization]
717
+ const fs = require ( 'fs' ) ;
718
+
719
+ // Imports the Google Cloud client library
720
+ const speech = require ( '@google-cloud/speech' ) ;
721
+
722
+ // Creates a client
723
+ const client = new speech . SpeechClient ( ) ;
724
+
725
+ // Set config for Diarization
726
+ const diarizationConfig = {
727
+ enableSpeakerDiarization : true ,
728
+ maxSpeakerCount : 2 ,
729
+ } ;
730
+
731
+ const config = {
732
+ encoding : `LINEAR16` ,
733
+ sampleRateHertz : 8000 ,
734
+ languageCode : `en-US` ,
735
+ diarizationConfig : diarizationConfig ,
736
+ model : `phone_call` ,
737
+ } ;
738
+
739
+ /**
740
+ * TODO(developer): Uncomment the following lines before running the sample.
741
+ */
742
+ // const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
743
+
744
+ const audio = {
745
+ content : fs . readFileSync ( fileName ) . toString ( 'base64' ) ,
746
+ } ;
747
+
748
+ const request = {
749
+ config : config ,
750
+ audio : audio ,
751
+ } ;
752
+
753
+ const [ response ] = await client . recognize ( request ) ;
754
+ const transcription = response . results
755
+ . map ( result => result . alternatives [ 0 ] . transcript )
756
+ . join ( '\n' ) ;
757
+ console . log ( `Transcription: ${ transcription } ` ) ;
758
+ console . log ( `Speaker Diarization:` ) ;
759
+ const result = response . results [ response . results . length - 1 ] ;
760
+ const wordsInfo = result . alternatives [ 0 ] . words ;
761
+ // Note: The transcript within each result is separate and sequential per result.
762
+ // However, the words list within an alternative includes all the words
763
+ // from all the results thus far. Thus, to get all the words with speaker
764
+ // tags, you only have to take the words list from the last result:
765
+ wordsInfo . forEach ( a =>
766
+ console . log ( ` word: ${ a . word } , speakerTag: ${ a . speakerTag } ` )
767
+ ) ;
768
+ // [END speech_transcribe_diarization]
769
+ }
770
+
715
771
require ( `yargs` ) // eslint-disable-line
716
772
. demand ( 1 )
717
773
. command (
@@ -883,6 +939,12 @@ require(`yargs`) // eslint-disable-line
883
939
opts . languageCode
884
940
)
885
941
)
942
+ . command (
943
+ `Diarization` ,
944
+ `Isolate distinct speakers in an audio file` ,
945
+ { } ,
946
+ opts => speechTranscribeDiarization ( opts . speechFile )
947
+ )
886
948
. options ( {
887
949
encoding : {
888
950
alias : 'e' ,
@@ -905,6 +967,12 @@ require(`yargs`) // eslint-disable-line
905
967
requiresArg : true ,
906
968
type : 'string' ,
907
969
} ,
970
+ speechFile : {
971
+ alias : 'f' ,
972
+ global : true ,
973
+ requiresArg : false ,
974
+ type : 'string' ,
975
+ } ,
908
976
} )
909
977
. example ( `node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000` )
910
978
. example ( `node $0 async-gcs gs://gcs-test-data/vr.flac -e FLAC -r 16000` )
0 commit comments