Skip to content

Commit 086a698

Browse files
saswatamcodeGiedriusSMichaHoffmannthibaultmg
authored
Cut patch release v0.35.1 (thanos-io#7394)
* compact: recover from panics (thanos-io#7318) For thanos-io#6775, it would be useful to know the exact block IDs to aid debugging. Signed-off-by: Giedrius Statkevičius <[email protected]> * Sidecar: wait for prometheus on startup (thanos-io#7323) Signed-off-by: Michael Hoffmann <[email protected]> * Receive: fix serverAsClient.Series goroutines leak (thanos-io#6948) * fix serverAsClient goroutines leak Signed-off-by: Thibault Mange <[email protected]> * fix lint Signed-off-by: Thibault Mange <[email protected]> * update changelog Signed-off-by: Thibault Mange <[email protected]> * delete invalid comment Signed-off-by: Thibault Mange <[email protected]> * remove temp dev test Signed-off-by: Thibault Mange <[email protected]> * remove timer channel drain Signed-off-by: Thibault Mange <[email protected]> --------- Signed-off-by: Thibault Mange <[email protected]> * Receive: fix stats (thanos-io#7373) If we account stats for remote write and local writes we will count them twice since the remote write will be counted locally again by the remote receiver instance. Signed-off-by: Michael Hoffmann <[email protected]> * *: Ensure objstore flag values are masked & disable debug/pprof/cmdline (thanos-io#7382) * *: Ensure objstore flag values are masked & disable debug/pprof/cmdline Signed-off-by: Saswata Mukherjee <[email protected]> * small fix Signed-off-by: Saswata Mukherjee <[email protected]> --------- Signed-off-by: Saswata Mukherjee <[email protected]> * Query: dont pass query hints to avoid triggering pushdown (thanos-io#7392) If we have a new querier it will create query hints even without the pushdown feature being present anymore. Old sidecars will then trigger query pushdown which leads to broken max,min,max_over_time and min_over_time. Signed-off-by: Michael Hoffmann <[email protected]> * Cut patch release v0.35.1 Signed-off-by: Saswata Mukherjee <[email protected]> --------- Signed-off-by: Giedrius Statkevičius <[email protected]> Signed-off-by: Michael Hoffmann <[email protected]> Signed-off-by: Thibault Mange <[email protected]> Signed-off-by: Saswata Mukherjee <[email protected]> Co-authored-by: Giedrius Statkevičius <[email protected]> Co-authored-by: Michael Hoffmann <[email protected]> Co-authored-by: Thibault Mange <[email protected]>
1 parent d9a0efa commit 086a698

File tree

12 files changed

+178
-177
lines changed

12 files changed

+178
-177
lines changed

Diff for: CHANGELOG.md

+17
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,23 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
1818

1919
### Removed
2020

21+
## [v0.35.1](https://github.com/thanos-io/thanos/tree/release-0.35) - 28.05.2024
22+
23+
### Fixed
24+
25+
- [#7323](https://github.com/thanos-io/thanos/pull/7323) Sidecar: wait for prometheus on startup
26+
- [#6948](https://github.com/thanos-io/thanos/pull/6948) Receive: fix goroutines leak during series requests to thanos store api.
27+
- [#7382](https://github.com/thanos-io/thanos/pull/7382) *: Ensure objstore flag values are masked & disable debug/pprof/cmdline
28+
- [#7392](https://github.com/thanos-io/thanos/pull/7392) Query: fix broken min, max for pre 0.34.1 sidecars
29+
- [#7373](https://github.com/thanos-io/thanos/pull/7373) Receive: Fix stats for remote write
30+
- [#7318](https://github.com/thanos-io/thanos/pull/7318) Compactor: Recover from panic to log block ID
31+
32+
### Added
33+
34+
### Changed
35+
36+
### Removed
37+
2138
## [v0.35.0](https://github.com/thanos-io/thanos/tree/release-0.35) - 02.05.2024
2239

2340
### Fixed

Diff for: VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.35.0
1+
0.35.1

Diff for: cmd/thanos/main.go

+5
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ func getFlagsMap(flags []*kingpin.FlagModel) map[string]string {
214214
if boilerplateFlags.GetFlag(f.Name) != nil {
215215
continue
216216
}
217+
// Mask inline objstore flag which can have credentials.
218+
if f.Name == "objstore.config" || f.Name == "objstore.config-file" {
219+
flagsMap[f.Name] = "<REDACTED>"
220+
continue
221+
}
217222
flagsMap[f.Name] = f.Value.String()
218223
}
219224

Diff for: cmd/thanos/sidecar.go

+61-38
Original file line numberDiff line numberDiff line change
@@ -172,64 +172,87 @@ func runSidecar(
172172
Help: "Boolean indicator whether the sidecar can reach its Prometheus peer.",
173173
})
174174

175-
ctx, cancel := context.WithCancel(context.Background())
176-
g.Add(func() error {
177-
// Only check Prometheus's flags when upload is enabled.
178-
if uploads {
179-
// Check prometheus's flags to ensure same sidecar flags.
180-
if err := validatePrometheus(ctx, m.client, logger, conf.shipper.ignoreBlockSize, m); err != nil {
181-
return errors.Wrap(err, "validate Prometheus flags")
182-
}
183-
}
175+
ctx := context.Background()
176+
// Only check Prometheus's flags when upload is enabled.
177+
if uploads {
178+
// Check prometheus's flags to ensure same sidecar flags.
179+
// We retry infinitely until we validated prometheus flags
180+
err := runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
181+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
182+
defer iterCancel()
184183

185-
// We retry infinitely until we reach and fetch BuildVersion from our Prometheus.
186-
err := runutil.Retry(2*time.Second, ctx.Done(), func() error {
187-
if err := m.BuildVersion(ctx); err != nil {
184+
if err := validatePrometheus(iterCtx, m.client, logger, conf.shipper.ignoreBlockSize, m); err != nil {
188185
level.Warn(logger).Log(
189-
"msg", "failed to fetch prometheus version. Is Prometheus running? Retrying",
186+
"msg", "failed to validate prometheus flags. Is Prometheus running? Retrying",
190187
"err", err,
191188
)
192189
return err
193190
}
194191

195192
level.Info(logger).Log(
196-
"msg", "successfully loaded prometheus version",
193+
"msg", "successfully validated prometheus flags",
197194
)
198195
return nil
199196
})
200197
if err != nil {
201-
return errors.Wrap(err, "failed to get prometheus version")
198+
return errors.Wrap(err, "failed to validate prometheus flags")
202199
}
200+
}
203201

204-
// Blocking query of external labels before joining as a Source Peer into gossip.
205-
// We retry infinitely until we reach and fetch labels from our Prometheus.
206-
err = runutil.Retry(2*time.Second, ctx.Done(), func() error {
207-
if err := m.UpdateLabels(ctx); err != nil {
208-
level.Warn(logger).Log(
209-
"msg", "failed to fetch initial external labels. Is Prometheus running? Retrying",
210-
"err", err,
211-
)
212-
promUp.Set(0)
213-
statusProber.NotReady(err)
214-
return err
215-
}
202+
// We retry infinitely until we reach and fetch BuildVersion from our Prometheus.
203+
err := runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
204+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
205+
defer iterCancel()
216206

217-
level.Info(logger).Log(
218-
"msg", "successfully loaded prometheus external labels",
219-
"external_labels", m.Labels().String(),
207+
if err := m.BuildVersion(iterCtx); err != nil {
208+
level.Warn(logger).Log(
209+
"msg", "failed to fetch prometheus version. Is Prometheus running? Retrying",
210+
"err", err,
220211
)
221-
promUp.Set(1)
222-
statusProber.Ready()
223-
return nil
224-
})
225-
if err != nil {
226-
return errors.Wrap(err, "initial external labels query")
212+
return err
227213
}
228214

229-
if len(m.Labels()) == 0 {
230-
return errors.New("no external labels configured on Prometheus server, uniquely identifying external labels must be configured; see https://thanos.io/tip/thanos/storage.md#external-labels for details.")
215+
level.Info(logger).Log(
216+
"msg", "successfully loaded prometheus version",
217+
)
218+
return nil
219+
})
220+
if err != nil {
221+
return errors.Wrap(err, "failed to get prometheus version")
222+
}
223+
224+
// Blocking query of external labels before joining as a Source Peer into gossip.
225+
// We retry infinitely until we reach and fetch labels from our Prometheus.
226+
err = runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
227+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
228+
defer iterCancel()
229+
230+
if err := m.UpdateLabels(iterCtx); err != nil {
231+
level.Warn(logger).Log(
232+
"msg", "failed to fetch initial external labels. Is Prometheus running? Retrying",
233+
"err", err,
234+
)
235+
return err
231236
}
232237

238+
level.Info(logger).Log(
239+
"msg", "successfully loaded prometheus external labels",
240+
"external_labels", m.Labels().String(),
241+
)
242+
return nil
243+
})
244+
if err != nil {
245+
return errors.Wrap(err, "initial external labels query")
246+
}
247+
248+
if len(m.Labels()) == 0 {
249+
return errors.New("no external labels configured on Prometheus server, uniquely identifying external labels must be configured; see https://thanos.io/tip/thanos/storage.md#external-labels for details.")
250+
}
251+
promUp.Set(1)
252+
statusProber.Ready()
253+
254+
ctx, cancel := context.WithCancel(context.Background())
255+
g.Add(func() error {
233256
// Periodically query the Prometheus config. We use this as a heartbeat as well as for updating
234257
// the external labels we apply.
235258
return runutil.Repeat(conf.prometheus.getConfigInterval, ctx.Done(), func() error {

Diff for: pkg/compact/compact.go

+16
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"os"
1111
"path/filepath"
1212
"sort"
13+
"strings"
1314
"sync"
1415
"time"
1516

@@ -871,6 +872,21 @@ func (cg *Group) Compact(ctx context.Context, dir string, planner Planner, comp
871872
return false, ulid.ULID{}, errors.Wrap(err, "create compaction group dir")
872873
}
873874

875+
defer func() {
876+
if p := recover(); p != nil {
877+
var sb strings.Builder
878+
879+
cgIDs := cg.IDs()
880+
for i, blid := range cgIDs {
881+
_, _ = sb.WriteString(blid.String())
882+
if i < len(cgIDs)-1 {
883+
_, _ = sb.WriteString(",")
884+
}
885+
}
886+
rerr = fmt.Errorf("paniced while compacting %s: %v", sb.String(), p)
887+
}
888+
}()
889+
874890
errChan := make(chan error, 1)
875891
err := tracing.DoInSpanWithErr(ctx, "compaction_group", func(ctx context.Context) (err error) {
876892
shouldRerun, compID, err = cg.compact(ctx, subDir, planner, comp, blockDeletableChecker, compactionLifecycleCallback, errChan)

Diff for: pkg/query/querier.go

-15
Original file line numberDiff line numberDiff line change
@@ -241,20 +241,6 @@ func aggrsFromFunc(f string) []storepb.Aggr {
241241
return []storepb.Aggr{storepb.Aggr_COUNT, storepb.Aggr_SUM}
242242
}
243243

244-
func storeHintsFromPromHints(hints *storage.SelectHints) *storepb.QueryHints {
245-
return &storepb.QueryHints{
246-
StepMillis: hints.Step,
247-
Func: &storepb.Func{
248-
Name: hints.Func,
249-
},
250-
Grouping: &storepb.Grouping{
251-
By: hints.By,
252-
Labels: hints.Grouping,
253-
},
254-
Range: &storepb.Range{Millis: hints.Range},
255-
}
256-
}
257-
258244
func (q *querier) Select(ctx context.Context, _ bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.SeriesSet {
259245
if hints == nil {
260246
hints = &storage.SelectHints{
@@ -351,7 +337,6 @@ func (q *querier) selectFn(ctx context.Context, hints *storage.SelectHints, ms .
351337
ShardInfo: q.shardInfo,
352338
PartialResponseStrategy: q.partialResponseStrategy,
353339
SkipChunks: q.skipChunks,
354-
QueryHints: storeHintsFromPromHints(hints),
355340
}
356341
if q.isDedupEnabled() {
357342
// Soft ask to sort without replica labels and push them at the end of labelset.

Diff for: pkg/receive/handler.go

+16-19
Original file line numberDiff line numberDiff line change
@@ -681,35 +681,32 @@ type remoteWriteParams struct {
681681
alreadyReplicated bool
682682
}
683683

684-
func (h *Handler) gatherWriteStats(writes ...map[endpointReplica]map[string]trackedSeries) tenantRequestStats {
684+
func (h *Handler) gatherWriteStats(localWrites map[endpointReplica]map[string]trackedSeries) tenantRequestStats {
685685
var stats tenantRequestStats = make(tenantRequestStats)
686686

687-
for _, write := range writes {
688-
for er := range write {
689-
for tenant, series := range write[er] {
690-
samples := 0
687+
for er := range localWrites {
688+
for tenant, series := range localWrites[er] {
689+
samples := 0
691690

692-
for _, ts := range series.timeSeries {
693-
samples += len(ts.Samples)
694-
}
691+
for _, ts := range series.timeSeries {
692+
samples += len(ts.Samples)
693+
}
695694

696-
if st, ok := stats[tenant]; ok {
697-
st.timeseries += len(series.timeSeries)
698-
st.totalSamples += samples
695+
if st, ok := stats[tenant]; ok {
696+
st.timeseries += len(series.timeSeries)
697+
st.totalSamples += samples
699698

700-
stats[tenant] = st
701-
} else {
702-
stats[tenant] = requestStats{
703-
timeseries: len(series.timeSeries),
704-
totalSamples: samples,
705-
}
699+
stats[tenant] = st
700+
} else {
701+
stats[tenant] = requestStats{
702+
timeseries: len(series.timeSeries),
703+
totalSamples: samples,
706704
}
707705
}
708706
}
709707
}
710708

711709
return stats
712-
713710
}
714711

715712
func (h *Handler) fanoutForward(ctx context.Context, params remoteWriteParams) (tenantRequestStats, error) {
@@ -739,7 +736,7 @@ func (h *Handler) fanoutForward(ctx context.Context, params remoteWriteParams) (
739736
return stats, err
740737
}
741738

742-
stats = h.gatherWriteStats(localWrites, remoteWrites)
739+
stats = h.gatherWriteStats(localWrites)
743740

744741
// Prepare a buffered channel to receive the responses from the local and remote writes. Remote writes will all go
745742
// asynchronously and with this capacity we will never block on writing to the channel.

Diff for: pkg/server/http/http.go

-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ func (s *Server) Handle(pattern string, handler http.Handler) {
117117

118118
func registerProfiler(mux *http.ServeMux) {
119119
mux.HandleFunc("/debug/pprof/", pprof.Index)
120-
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
121120
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
122121
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
123122
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)

Diff for: pkg/store/bucket.go

-2
Original file line numberDiff line numberDiff line change
@@ -1571,7 +1571,6 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, seriesSrv storepb.Store
15711571
var resp respSet
15721572
if s.sortingStrategy == sortingStrategyStore {
15731573
resp = newEagerRespSet(
1574-
srv.Context(),
15751574
span,
15761575
10*time.Minute,
15771576
blk.meta.ULID.String(),
@@ -1585,7 +1584,6 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, seriesSrv storepb.Store
15851584
)
15861585
} else {
15871586
resp = newLazyRespSet(
1588-
srv.Context(),
15891587
span,
15901588
10*time.Minute,
15911589
blk.meta.ULID.String(),

Diff for: pkg/store/prometheus.go

+1-13
Original file line numberDiff line numberDiff line change
@@ -163,19 +163,7 @@ func (p *PrometheusStore) Series(r *storepb.SeriesRequest, seriesSrv storepb.Sto
163163
// Don't ask for more than available time. This includes potential `minTime` flag limit.
164164
availableMinTime, _ := p.timestamps()
165165
if r.MinTime < availableMinTime {
166-
// Align min time with the step to avoid missing data when it gets retrieved by the upper layer's PromQL engine.
167-
// This also is necessary when Sidecar uploads a block and then availableMinTime
168-
// becomes a fixed timestamp.
169-
if r.QueryHints != nil && r.QueryHints.StepMillis != 0 {
170-
diff := availableMinTime - r.MinTime
171-
r.MinTime += (diff / r.QueryHints.StepMillis) * r.QueryHints.StepMillis
172-
// Add one more to strictly fit within --min-time -> infinity.
173-
if r.MinTime != availableMinTime {
174-
r.MinTime += r.QueryHints.StepMillis
175-
}
176-
} else {
177-
r.MinTime = availableMinTime
178-
}
166+
r.MinTime = availableMinTime
179167
}
180168

181169
extLsetToRemove := map[string]struct{}{}

0 commit comments

Comments
 (0)