@@ -21,12 +21,15 @@ import (
21
21
"errors"
22
22
"fmt"
23
23
"io"
24
+ "io/ioutil"
24
25
"math/rand"
25
26
"net/url"
26
27
"os"
27
28
"path/filepath"
29
+ "regexp"
28
30
goruntime "runtime"
29
31
"sort"
32
+ "strconv"
30
33
"strings"
31
34
"sync"
32
35
"time"
@@ -127,6 +130,40 @@ func (s *startSpec) getTargetID(podStatus *kubecontainer.PodStatus) (*kubecontai
127
130
return & targetStatus .ID , nil
128
131
}
129
132
133
+ func calcRestartCountByLogDir (path string ) (int , error ) {
134
+ // if the path doesn't exist then it's not an error
135
+ if _ , err := os .Stat (path ); err != nil {
136
+ return 0 , nil
137
+ }
138
+ restartCount := int (0 )
139
+ files , err := ioutil .ReadDir (path )
140
+ if err != nil {
141
+ return 0 , err
142
+ }
143
+ if len (files ) == 0 {
144
+ return 0 , err
145
+ }
146
+ restartCountLogFileRegex := regexp .MustCompile (`(\d+).log(\..*)?` )
147
+ for _ , file := range files {
148
+ if file .IsDir () {
149
+ continue
150
+ }
151
+ matches := restartCountLogFileRegex .FindStringSubmatch (file .Name ())
152
+ if len (matches ) == 0 {
153
+ continue
154
+ }
155
+ count , err := strconv .Atoi (matches [1 ])
156
+ if err != nil {
157
+ return restartCount , err
158
+ }
159
+ count ++
160
+ if count > restartCount {
161
+ restartCount = count
162
+ }
163
+ }
164
+ return restartCount , nil
165
+ }
166
+
130
167
// startContainer starts a container and returns a message indicates why it is failed on error.
131
168
// It starts the container through the following steps:
132
169
// * pull the image
@@ -150,6 +187,22 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
150
187
containerStatus := podStatus .FindContainerStatusByName (container .Name )
151
188
if containerStatus != nil {
152
189
restartCount = containerStatus .RestartCount + 1
190
+ } else {
191
+ // The container runtime keeps state on container statuses and
192
+ // what the container restart count is. When nodes are rebooted
193
+ // some container runtimes clear their state which causes the
194
+ // restartCount to be reset to 0. This causes the logfile to
195
+ // start at 0.log, which either overwrites or appends to the
196
+ // already existing log.
197
+ //
198
+ // We are checking to see if the log directory exists, and find
199
+ // the latest restartCount by checking the log name -
200
+ // {restartCount}.log - and adding 1 to it.
201
+ logDir := BuildContainerLogsDirectory (pod .Namespace , pod .Name , pod .UID , container .Name )
202
+ restartCount , err = calcRestartCountByLogDir (logDir )
203
+ if err != nil {
204
+ klog .InfoS ("Log directory exists but could not calculate restartCount" , "logDir" , logDir , "err" , err )
205
+ }
153
206
}
154
207
155
208
target , err := spec .getTargetID (podStatus )
0 commit comments