diff --git a/include/linux/sched.h b/include/linux/sched.h index 17cb0761ff65..123bc16ad3d0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1121,6 +1121,13 @@ struct task_struct { size_t sas_ss_size; unsigned int sas_ss_flags; + /* + * Number of signals received by an RT task between scheduling ticks. + * This counter is used to throttle RT tasks when too many signals + * (e.g., POSIX timers) are sent to the task, which can cause an RCU stall. + */ + atomic_t rt_signals_recv_count; /* used outside of the rq lock */ + struct callback_head *task_works; #ifdef CONFIG_AUDIT diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d44efa0d0611..9def826bd35f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4779,6 +4779,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->policy = SCHED_NORMAL; p->static_prio = NICE_TO_PRIO(0); p->rt_priority = 0; + atomic_set(&p->rt_signals_recv_count, 0); } else if (PRIO_TO_NICE(p->static_prio) < 0) p->static_prio = NICE_TO_PRIO(0); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3261b067b67e..9b22d67d1746 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -24,6 +24,15 @@ int sysctl_sched_rt_period = 1000000; */ int sysctl_sched_rt_runtime = 950000; +/* + * To avoid an RCU stall due to a large number of signals received by RT tasks + * (e.g., POSIX timers), the RT task needs to be throttled. + * When the number of signals received by an RT task during a scheduling + * tick period exceeds the threshold, the RT task will be throttled. + * The value of 100 has not been thoroughly tested and may need adjustment. + */ +#define RT_RECV_SGINAL_THROTTLE_THRESHOLD 100 + #ifdef CONFIG_SYSCTL static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ; static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, @@ -951,7 +960,7 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) return rt_task_of(rt_se)->prio; } -static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) +static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq, int rt_signal_recv) { u64 runtime = sched_rt_runtime(rt_rq); @@ -966,7 +975,15 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) if (runtime == RUNTIME_INF) return 0; - if (rt_rq->rt_time > runtime) { + /* + * When a large number of signals are sent to this task (e.g., POSIX timers) + * the delta time deviates significantly from real time due to the overhead + * of handling signals. For RT tasks, this can cause an RCU stall. + * To avoid this, throttle the task when the number of signals received + * exceeds a certain threshold. + */ + if (rt_rq->rt_time > runtime || + rt_signal_recv >= RT_RECV_SGINAL_THROTTLE_THRESHOLD) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); /* @@ -1021,7 +1038,9 @@ static void update_curr_rt(struct rq *rq) if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { raw_spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_time += delta_exec; - exceeded = sched_rt_runtime_exceeded(rt_rq); + exceeded = sched_rt_runtime_exceeded( + rt_rq, + atomic_read(&curr->rt_signals_recv_count)); if (exceeded) resched_curr(rq); raw_spin_unlock(&rt_rq->rt_runtime_lock); @@ -1029,6 +1048,7 @@ static void update_curr_rt(struct rq *rq) do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq)); } } + atomic_set(&curr->rt_signals_recv_count, 0); } static void diff --git a/kernel/signal.c b/kernel/signal.c index bdca529f0f7b..d58e0ba9336c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -629,6 +629,15 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, bool resched_timer = false; int signr; + /* + * To prevent an RCU stall due to receiving too many signals by RT tasks, + * count all signals regardless of their type. + * Based on this counter, the RT scheduler will decide whether the task + * should be throttled or not. + */ + if (tsk->policy == SCHED_FIFO || tsk->policy == SCHED_RR) + atomic_inc(&tsk->rt_signals_recv_count); + /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */