Skip to content

Commit 1ff6882

Browse files
chleroyWim Van Sebroeck
authored and
Wim Van Sebroeck
committed
watchdog: core: make sure the watchdog_worker is not deferred
commit 4cd13c2 ("softirq: Let ksoftirqd do its job") has the effect of deferring timer handling in case of high CPU load, hence delaying the delayed work allthought the worker is running which high realtime priority. As hrtimers are not managed by softirqs, this patch replaces the delayed work by a plain work and uses an hrtimer to schedule that work. Signed-off-by: Christophe Leroy <[email protected]> Reviewed-by: Guenter Roeck <[email protected]> Signed-off-by: Guenter Roeck <[email protected]> Signed-off-by: Wim Van Sebroeck <[email protected]>
1 parent 1d2e5eb commit 1ff6882

File tree

1 file changed

+52
-34
lines changed

1 file changed

+52
-34
lines changed

drivers/watchdog/watchdog_dev.c

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@
3636
#include <linux/errno.h> /* For the -ENODEV/... values */
3737
#include <linux/fs.h> /* For file operations */
3838
#include <linux/init.h> /* For __init/__exit/... */
39-
#include <linux/jiffies.h> /* For timeout functions */
39+
#include <linux/hrtimer.h> /* For hrtimers */
4040
#include <linux/kernel.h> /* For printk/panic/... */
4141
#include <linux/kref.h> /* For data references */
42-
#include <linux/kthread.h> /* For kthread_delayed_work */
42+
#include <linux/kthread.h> /* For kthread_work */
4343
#include <linux/miscdevice.h> /* For handling misc devices */
4444
#include <linux/module.h> /* For module stuff/... */
4545
#include <linux/mutex.h> /* For mutexes */
@@ -67,9 +67,10 @@ struct watchdog_core_data {
6767
struct cdev cdev;
6868
struct watchdog_device *wdd;
6969
struct mutex lock;
70-
unsigned long last_keepalive;
71-
unsigned long last_hw_keepalive;
72-
struct kthread_delayed_work work;
70+
ktime_t last_keepalive;
71+
ktime_t last_hw_keepalive;
72+
struct hrtimer timer;
73+
struct kthread_work work;
7374
unsigned long status; /* Internal status bits */
7475
#define _WDOG_DEV_OPEN 0 /* Opened ? */
7576
#define _WDOG_ALLOW_RELEASE 1 /* Did we receive the magic char ? */
@@ -109,18 +110,19 @@ static inline bool watchdog_need_worker(struct watchdog_device *wdd)
109110
(t && !watchdog_active(wdd) && watchdog_hw_running(wdd));
110111
}
111112

112-
static long watchdog_next_keepalive(struct watchdog_device *wdd)
113+
static ktime_t watchdog_next_keepalive(struct watchdog_device *wdd)
113114
{
114115
struct watchdog_core_data *wd_data = wdd->wd_data;
115116
unsigned int timeout_ms = wdd->timeout * 1000;
116-
unsigned long keepalive_interval;
117-
unsigned long last_heartbeat;
118-
unsigned long virt_timeout;
117+
ktime_t keepalive_interval;
118+
ktime_t last_heartbeat, latest_heartbeat;
119+
ktime_t virt_timeout;
119120
unsigned int hw_heartbeat_ms;
120121

121-
virt_timeout = wd_data->last_keepalive + msecs_to_jiffies(timeout_ms);
122+
virt_timeout = ktime_add(wd_data->last_keepalive,
123+
ms_to_ktime(timeout_ms));
122124
hw_heartbeat_ms = min_not_zero(timeout_ms, wdd->max_hw_heartbeat_ms);
123-
keepalive_interval = msecs_to_jiffies(hw_heartbeat_ms / 2);
125+
keepalive_interval = ms_to_ktime(hw_heartbeat_ms / 2);
124126

125127
if (!watchdog_active(wdd))
126128
return keepalive_interval;
@@ -130,39 +132,45 @@ static long watchdog_next_keepalive(struct watchdog_device *wdd)
130132
* after the most recent ping from userspace, the last
131133
* worker ping has to come in hw_heartbeat_ms before this timeout.
132134
*/
133-
last_heartbeat = virt_timeout - msecs_to_jiffies(hw_heartbeat_ms);
134-
return min_t(long, last_heartbeat - jiffies, keepalive_interval);
135+
last_heartbeat = ktime_sub(virt_timeout, ms_to_ktime(hw_heartbeat_ms));
136+
latest_heartbeat = ktime_sub(last_heartbeat, ktime_get());
137+
if (ktime_before(latest_heartbeat, keepalive_interval))
138+
return latest_heartbeat;
139+
return keepalive_interval;
135140
}
136141

137142
static inline void watchdog_update_worker(struct watchdog_device *wdd)
138143
{
139144
struct watchdog_core_data *wd_data = wdd->wd_data;
140145

141146
if (watchdog_need_worker(wdd)) {
142-
long t = watchdog_next_keepalive(wdd);
147+
ktime_t t = watchdog_next_keepalive(wdd);
143148

144149
if (t > 0)
145-
kthread_mod_delayed_work(watchdog_kworker,
146-
&wd_data->work, t);
150+
hrtimer_start(&wd_data->timer, t, HRTIMER_MODE_REL);
147151
} else {
148-
kthread_cancel_delayed_work_sync(&wd_data->work);
152+
hrtimer_cancel(&wd_data->timer);
149153
}
150154
}
151155

152156
static int __watchdog_ping(struct watchdog_device *wdd)
153157
{
154158
struct watchdog_core_data *wd_data = wdd->wd_data;
155-
unsigned long earliest_keepalive = wd_data->last_hw_keepalive +
156-
msecs_to_jiffies(wdd->min_hw_heartbeat_ms);
159+
ktime_t earliest_keepalive, now;
157160
int err;
158161

159-
if (time_is_after_jiffies(earliest_keepalive)) {
160-
kthread_mod_delayed_work(watchdog_kworker, &wd_data->work,
161-
earliest_keepalive - jiffies);
162+
earliest_keepalive = ktime_add(wd_data->last_hw_keepalive,
163+
ms_to_ktime(wdd->min_hw_heartbeat_ms));
164+
now = ktime_get();
165+
166+
if (ktime_after(earliest_keepalive, now)) {
167+
hrtimer_start(&wd_data->timer,
168+
ktime_sub(earliest_keepalive, now),
169+
HRTIMER_MODE_REL);
162170
return 0;
163171
}
164172

165-
wd_data->last_hw_keepalive = jiffies;
173+
wd_data->last_hw_keepalive = now;
166174

167175
if (wdd->ops->ping)
168176
err = wdd->ops->ping(wdd); /* ping the watchdog */
@@ -195,7 +203,7 @@ static int watchdog_ping(struct watchdog_device *wdd)
195203

196204
set_bit(_WDOG_KEEPALIVE, &wd_data->status);
197205

198-
wd_data->last_keepalive = jiffies;
206+
wd_data->last_keepalive = ktime_get();
199207
return __watchdog_ping(wdd);
200208
}
201209

@@ -210,16 +218,24 @@ static void watchdog_ping_work(struct kthread_work *work)
210218
{
211219
struct watchdog_core_data *wd_data;
212220

213-
wd_data = container_of(container_of(work, struct kthread_delayed_work,
214-
work),
215-
struct watchdog_core_data, work);
221+
wd_data = container_of(work, struct watchdog_core_data, work);
216222

217223
mutex_lock(&wd_data->lock);
218224
if (watchdog_worker_should_ping(wd_data))
219225
__watchdog_ping(wd_data->wdd);
220226
mutex_unlock(&wd_data->lock);
221227
}
222228

229+
static enum hrtimer_restart watchdog_timer_expired(struct hrtimer *timer)
230+
{
231+
struct watchdog_core_data *wd_data;
232+
233+
wd_data = container_of(timer, struct watchdog_core_data, timer);
234+
235+
kthread_queue_work(watchdog_kworker, &wd_data->work);
236+
return HRTIMER_NORESTART;
237+
}
238+
223239
/*
224240
* watchdog_start: wrapper to start the watchdog.
225241
* @wdd: the watchdog device to start
@@ -234,15 +250,15 @@ static void watchdog_ping_work(struct kthread_work *work)
234250
static int watchdog_start(struct watchdog_device *wdd)
235251
{
236252
struct watchdog_core_data *wd_data = wdd->wd_data;
237-
unsigned long started_at;
253+
ktime_t started_at;
238254
int err;
239255

240256
if (watchdog_active(wdd))
241257
return 0;
242258

243259
set_bit(_WDOG_KEEPALIVE, &wd_data->status);
244260

245-
started_at = jiffies;
261+
started_at = ktime_get();
246262
if (watchdog_hw_running(wdd) && wdd->ops->ping)
247263
err = wdd->ops->ping(wdd);
248264
else
@@ -928,7 +944,9 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
928944
if (IS_ERR_OR_NULL(watchdog_kworker))
929945
return -ENODEV;
930946

931-
kthread_init_delayed_work(&wd_data->work, watchdog_ping_work);
947+
kthread_init_work(&wd_data->work, watchdog_ping_work);
948+
hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
949+
wd_data->timer.function = watchdog_timer_expired;
932950

933951
if (wdd->id == 0) {
934952
old_wd_data = wd_data;
@@ -964,7 +982,7 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
964982
}
965983

966984
/* Record time of most recent heartbeat as 'just before now'. */
967-
wd_data->last_hw_keepalive = jiffies - 1;
985+
wd_data->last_hw_keepalive = ktime_sub(ktime_get(), 1);
968986

969987
/*
970988
* If the watchdog is running, prevent its driver from being unloaded,
@@ -974,8 +992,7 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
974992
__module_get(wdd->ops->owner);
975993
kref_get(&wd_data->kref);
976994
if (handle_boot_enabled)
977-
kthread_queue_delayed_work(watchdog_kworker,
978-
&wd_data->work, 0);
995+
hrtimer_start(&wd_data->timer, 0, HRTIMER_MODE_REL);
979996
else
980997
pr_info("watchdog%d running and kernel based pre-userspace handler disabled\n",
981998
wdd->id);
@@ -1012,7 +1029,8 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd)
10121029
watchdog_stop(wdd);
10131030
}
10141031

1015-
kthread_cancel_delayed_work_sync(&wd_data->work);
1032+
hrtimer_cancel(&wd_data->timer);
1033+
kthread_cancel_work_sync(&wd_data->work);
10161034

10171035
kref_put(&wd_data->kref, watchdog_core_data_release);
10181036
}

0 commit comments

Comments
 (0)