1
+ import gevent .monkey
2
+ gevent .monkey .patch_all ()
3
+ import base64
1
4
from email .mime .multipart import MIMEMultipart
2
5
from email .message import Message
3
6
import json
4
7
import struct
8
+ import os
5
9
6
- from flask import Flask , request , Response
7
- from google .cloud import speech
8
-
10
+ import requests
11
+ from flask import Flask , request , Response , abort
9
12
10
13
app = Flask (__name__ )
11
14
15
+ AUTH_URL = "https://auth.rebble.io"
16
+ API_KEY = os .environ ['SPEECH_API_KEY' ]
17
+
12
18
13
19
# We know gunicorn does this, but it doesn't *say* it does this, so we must signal it manually.
14
20
@app .before_request
@@ -18,7 +24,6 @@ def handle_chunking():
18
24
19
25
def parse_chunks (stream ):
20
26
boundary = b'--' + request .headers ['content-type' ].split (';' )[1 ].split ('=' )[1 ].encode ('utf-8' ).strip () # super lazy/brittle parsing.
21
- print ("Boundary: " + boundary .decode ('utf-8' ))
22
27
this_frame = b''
23
28
while True :
24
29
content = stream .read (4096 )
@@ -28,52 +33,52 @@ def parse_chunks(stream):
28
33
frame = this_frame [:end ]
29
34
if frame != b'' :
30
35
header , content = frame .split (b'\r \n \r \n ' , 1 )
31
- print (content )
32
36
yield content [:- 2 ]
33
37
this_frame = this_frame [end + len (boundary ):]
34
38
if content == b'' :
35
39
print ("End of input." )
36
40
break
37
41
38
42
39
- def parse_data ():
40
- boundary = b'--' + request .headers ['content-type' ].split (';' )[1 ].split ('=' )[1 ].encode ('utf-8' ).strip () # super lazy/brittle parsing.
41
- parts = request .data .split (boundary )
42
- for part in parts :
43
- if part == b'' :
44
- continue
45
- yield part .split (b'\r \n \r \n ' , 1 )[1 ][:- 2 ]
46
-
47
-
48
43
@app .route ('/NmspServlet/' , methods = ["POST" ])
49
44
def recognise ():
50
-
51
- client = speech .SpeechClient ()
52
45
stream = request .stream
46
+
47
+ access_token , part1 , part2 = request .host .split ('.' , 1 )[0 ].split ('-' , 3 )
48
+ lang = f"{ part1 } -{ part2 .upper ()} "
49
+
50
+ auth_req = requests .get (f"{ AUTH_URL } /api/v1/me/token" , headers = {'Authorization' : f"Bearer { access_token } " })
51
+ if not auth_req .ok :
52
+ abort (401 )
53
+
53
54
chunks = iter (list (parse_chunks (stream )))
54
55
content = next (chunks ).decode ('utf-8' )
55
- print (content )
56
-
57
- config = speech .types .RecognitionConfig (
58
- encoding = 'SPEEX_WITH_HEADER_BYTE' ,
59
- language_code = 'en-US' ,
60
- sample_rate_hertz = 16000 ,
61
- )
62
- print ('beginning request' )
63
- responses = client .streaming_recognize (
64
- config = speech .types .StreamingRecognitionConfig (config = config ),
65
- requests = (
66
- speech .types .StreamingRecognizeRequest (audio_content = struct .pack ('B' , len (x )) + x )
67
- for x in chunks ))
68
- print ('finished request' )
56
+
57
+ body = {
58
+ 'config' : {
59
+ 'encoding' : 'SPEEX_WITH_HEADER_BYTE' ,
60
+ 'language_code' : lang ,
61
+ 'sample_rate_hertz' : 16000 ,
62
+ 'max_alternatives' : 1 ,
63
+ # 'metadata': {
64
+ # 'interaction_type': 'DICTATION',
65
+ # 'microphone_distance': 'NEARFIELD',
66
+ # },
67
+ },
68
+ 'audio' : {
69
+ 'content' : base64 .b64encode (b'' .join ((struct .pack ('B' , len (x )) + x for x in chunks ))).decode ('utf-8' ),
70
+ },
71
+ }
72
+ result = requests .post (f'https://speech.googleapis.com/v1/speech:recognize?key={ API_KEY } ' , json = body )
73
+ result .raise_for_status ()
74
+
69
75
words = []
70
- for response in responses :
71
- if response .results :
72
- for result in response .results :
73
- words .extend ({
74
- 'word' : x ,
75
- 'confidence' : result .alternatives [0 ].confidence
76
- } for x in result .alternatives [0 ].transcript .split (' ' ))
76
+ if 'results' in result .json ():
77
+ for result in result .json ()['results' ]:
78
+ words .extend ({
79
+ 'word' : x ,
80
+ 'confidence' : result ['alternatives' ][0 ]['confidence' ]
81
+ } for x in result ['alternatives' ][0 ]['transcript' ].split (' ' ))
77
82
78
83
# Now for some reason we also need to give back a mime/multipart message...
79
84
parts = MIMEMultipart ()
@@ -83,6 +88,7 @@ def recognise():
83
88
if len (words ) > 0 :
84
89
response_part .add_header ('Content-Disposition' , 'form-data; name="QueryResult"' )
85
90
words [0 ]['word' ] += '\\ *no-space-before'
91
+ words [0 ]['word' ] = words [0 ]['word' ][0 ].upper () + words [0 ]['word' ][1 :]
86
92
response_part .set_payload (json .dumps ({
87
93
'words' : [words ],
88
94
}))
@@ -96,10 +102,8 @@ def recognise():
96
102
"Prompt" : "Sorry, speech not recognized. Please try again."
97
103
}))
98
104
parts .attach (response_part )
99
- print (parts .as_string ())
100
105
101
106
response = Response (parts .as_string ().split ("\n " , 3 )[3 ])
102
107
response .headers ['Content-Type' ] = f'multipart/form-data; boundary={ parts .get_boundary ()} '
103
- response .headers ['Connection' ] = 'close'
104
108
return response
105
109
0 commit comments