@@ -5,11 +5,25 @@ import (
5
5
"os"
6
6
"os/exec"
7
7
"path/filepath"
8
+ "time"
8
9
9
10
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
10
11
wav "github.com/go-audio/wav"
11
12
)
12
13
14
+ type Segment struct {
15
+ Id int `json:"id"`
16
+ Start time.Duration `json:"start"`
17
+ End time.Duration `json:"end"`
18
+ Text string `json:"text"`
19
+ Tokens []int `json:"tokens"`
20
+ }
21
+
22
+ type Result struct {
23
+ Segments []Segment `json:"segments"`
24
+ Text string `json:"text"`
25
+ }
26
+
13
27
func sh (c string ) (string , error ) {
14
28
cmd := exec .Command ("/bin/sh" , "-c" , c )
15
29
cmd .Env = os .Environ ()
@@ -28,40 +42,41 @@ func audioToWav(src, dst string) error {
28
42
return nil
29
43
}
30
44
31
- func Transcript (model whisper.Model , audiopath , language string , threads uint ) (string , error ) {
45
+ func Transcript (model whisper.Model , audiopath , language string , threads uint ) (Result , error ) {
46
+ res := Result {}
32
47
33
48
dir , err := os .MkdirTemp ("" , "whisper" )
34
49
if err != nil {
35
- return "" , err
50
+ return res , err
36
51
}
37
52
defer os .RemoveAll (dir )
38
53
39
54
convertedPath := filepath .Join (dir , "converted.wav" )
40
55
41
56
if err := audioToWav (audiopath , convertedPath ); err != nil {
42
- return "" , err
57
+ return res , err
43
58
}
44
59
45
60
// Open samples
46
61
fh , err := os .Open (convertedPath )
47
62
if err != nil {
48
- return "" , err
63
+ return res , err
49
64
}
50
65
defer fh .Close ()
51
66
52
67
// Read samples
53
68
d := wav .NewDecoder (fh )
54
69
buf , err := d .FullPCMBuffer ()
55
70
if err != nil {
56
- return "" , err
71
+ return res , err
57
72
}
58
73
59
74
data := buf .AsFloat32Buffer ().Data
60
75
61
76
// Process samples
62
77
context , err := model .NewContext ()
63
78
if err != nil {
64
- return "" , err
79
+ return res , err
65
80
66
81
}
67
82
@@ -74,17 +89,25 @@ func Transcript(model whisper.Model, audiopath, language string, threads uint) (
74
89
}
75
90
76
91
if err := context .Process (data , nil , nil ); err != nil {
77
- return "" , err
92
+ return res , err
78
93
}
79
94
80
- text := ""
81
95
for {
82
- segment , err := context .NextSegment ()
96
+ s , err := context .NextSegment ()
83
97
if err != nil {
84
98
break
85
99
}
86
- text += segment .Text
100
+
101
+ var tokens []int
102
+ for _ , t := range (s .Tokens ) {
103
+ tokens = append (tokens , t .Id )
104
+ }
105
+
106
+ segment := Segment {Id : s .Num , Text : s .Text , Start :s .Start , End : s .End , Tokens : tokens }
107
+ res .Segments = append (res .Segments , segment )
108
+
109
+ res .Text += s .Text
87
110
}
88
111
89
- return text , nil
112
+ return res , nil
90
113
}
0 commit comments