Linux内核支持实时线程和非实时线程同时工作,并且可以给实时线程分配适当的执行比例,当超过设置的比例的时候,调度器将不再调度实时线程工作,这样做保证了即便在实时线程不主动放弃CPU的情况下,CPU的占用率也不会到%100,保证了其它线程总有执行时间。
具体实验可以看下片博客:
Linux实时调度策略(SCHED_RR)和CFS(SCHED_OTHER)之间的区别
下面我们分析一下它的内核实现机制,从上篇博客我们知道关键的变量是rt_throttled,系统在调度器初始化阶段会分配RT线程默认的CPU带宽
而初始化bandwidth 依赖的两个关键变量均来自于/proc/sys/kernel下的关键节点配置:
我们尝试调试一下它的变化逻辑,加入下面的补丁:
diff --git a/linux-5.4.138/kernel/sched/rt.c b/linux-5.4.138/kernel/sched/rt.c
index 1e102783d..2dffb8762 100644
--- a/linux-5.4.138/kernel/sched/rt.c
+++ b/linux-5.4.138/kernel/sched/rt.c
@@ -763,10 +763,6 @@ static void __disable_runtime(struct rq *rq)
* runtime - in which case borrowing doesn't make sense.
*/
rt_rq->rt_runtime = RUNTIME_INF;
- //if(strcmp("a.out", current->comm) == 0)
- {
- printk("%s line %d, throttled is %d, set to zero, comm %s.rq_rq = %p\n", __func__, __LINE__, rt_rq->rt_throttled, current->comm,rt_rq);
- }
rt_rq->rt_throttled = 0;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
raw_spin_unlock(&rt_b->rt_runtime_lock);
@@ -794,10 +790,6 @@ static void __enable_runtime(struct rq *rq)
raw_spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_runtime = rt_b->rt_runtime;
rt_rq->rt_time = 0;
- //if(strcmp("a.out", current->comm) == 0)
- {
- printk("%s line %d, throttled is %d, set to zero, comm %s.rq_rq = %p\n", __func__, __LINE__, rt_rq->rt_throttled, current->comm,rt_rq);
- }
rt_rq->rt_throttled = 0;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
raw_spin_unlock(&rt_b->rt_runtime_lock);
@@ -868,11 +860,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
runtime = rt_rq->rt_runtime;
rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
- //if(strcmp("a.out", current->comm) == 0)
- {
- printk("%s line %d, throttled is %d, set to zero, comm %s.rq_rq = %p\n", __func__, __LINE__, rt_rq->rt_throttled, current->comm,rt_rq);
- }
-
rt_rq->rt_throttled = 0;
enqueue = 1;
@@ -943,9 +930,8 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
* but accrue some time due to boosting.
*/
if (likely(rt_b->rt_runtime)) {
- printk_deferred_once("sched: RT throttling activated\n");
- printk("%s line %d, throttled set to one, comm %s.pree is %d.rq_rq = %p\n", __func__, __LINE__, current->comm, rt_rq->rt_throttled, rt_rq);
rt_rq->rt_throttled = 1;
+ printk_deferred_once("sched: RT throttling activated\n");
} else {
/*
* In case we did anyway, make it go away,
@@ -1586,20 +1572,12 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
static struct task_struct *
pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
- static unsigned long counter = 0;
struct task_struct *p;
WARN_ON_ONCE(prev || rf);
if (!sched_rt_runnable(rq))
- {
- counter ++;
- if(counter % 10 == 0)
- {
- printk("%s line %d. comm %s, pick next is null, throttled.\n", __func__, __LINE__, prev->comm);
- }
return NULL;
- }
p = _pick_next_task_rt(rq);
set_next_task_rt(rq, p, true);
修改内核代码rt,增加调试信息,重新编译内核,启动。
编写用户态测试用例,基于Posix创建实时线程
#include <string.h>
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
void *child_thread(void *arg)
{
int policy = 0;
int max_priority = 0,min_priority = 0;
struct sched_param param;
pthread_attr_t attr;
struct sched_param sp;
bzero((void*)&sp, sizeof(sp));
pthread_attr_init(&attr);
pthread_attr_setinheritsched(&attr,PTHREAD_EXPLICIT_SCHED);
pthread_attr_getinheritsched(&attr,&policy);
if(policy == PTHREAD_EXPLICIT_SCHED){
printf("Inheritsched:PTHREAD_EXPLICIT_SCHED\n");
}
if(policy == PTHREAD_INHERIT_SCHED){
printf("Inheritsched:PTHREAD_INHERIT_SCHED\n");
}
pthread_attr_setschedpolicy(&attr,SCHED_RR);
//pthread_attr_setschedpolicy(&attr,SCHED_OTHER);
pthread_attr_getschedpolicy(&attr,&policy);
if(policy == SCHED_FIFO){
printf("Schedpolicy:SCHED_FIFO\n");
}
if(policy == SCHED_RR){
printf("Schedpolicy:SCHED_RR\n");
}
if(policy == SCHED_OTHER){
printf("Schedpolicy:SCHED_OTHER\n");
}
max_priority = sched_get_priority_max(policy);
min_priority = sched_get_priority_min(policy);
printf("Maxpriority:%u\n",max_priority);
printf("Minpriority:%u\n",min_priority);
param.sched_priority = max_priority;
pthread_attr_setschedparam(&attr,¶m);
sp.sched_priority = 1;
// Actually set the sched params for the current thread.
if (0 == pthread_setschedparam(pthread_self(), policy, &sp)) {
printf("IO Thread #%ld using high-priority scheduler!", pthread_self());
}
printf("sched_priority:%u\n",param.sched_priority);
while(1);
pthread_attr_destroy(&attr);
}
int main(int argc,char *argv[ ])
{
pthread_t child_thread_id;
pthread_create(&child_thread_id,NULL,child_thread,NULL);
pthread_join(child_thread_id,NULL);
return 0;
}
编译生成a.out.
修改sched_rt_runtime_us的值,将RT线程的占比从%95调整为%30.
root@caozilong-Vostro-3268:/proc/sys/kernel#
root@caozilong-Vostro-3268:/proc/sys/kernel# cat sched_rt_runtime_us
950000
root@caozilong-Vostro-3268:/proc/sys/kernel# cat sched_rt_period_us
1000000
root@caozilong-Vostro-3268:/proc/sys/kernel# echo 300000 > sched_rt_runtime_us
root@caozilong-Vostro-3268:/proc/sys/kernel# cat sched_rt_runtime_us
300000
root@caozilong-Vostro-3268:/proc/sys/kernel# cat sched_rt_period_us
1000000
root@caozilong-Vostro-3268:/proc/sys/kernel#
PC为4核,所以运行四次,可见CPU占用率稳定在 %30,还有四核有8个a.out在运行,是因为测试程序中创建了两个线程,一个主线程和一个实时线程,四个CPU运行四个最高优先级的线程同时存在,它们共同瓜分了%30的CPU占用率。
dmesg调试输出:
[ 152.226547] pick_next_task_rt line 1599. comm (efault), pick next is null, throttled.
[ 152.543163] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000d110e197
[ 152.543167] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000009dc5f730
[ 152.543170] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000007cc53a4d
[ 152.543172] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000b2e03ff9
[ 153.143066] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000d110e197
[ 153.143069] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000009dc5f730
[ 153.144062] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000b2e03ff9
[ 153.144064] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000007cc53a4d
[ 153.543199] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000d110e197
[ 153.543203] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000009dc5f730
[ 153.543205] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000007cc53a4d
[ 153.543208] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000b2e03ff9
[ 154.143035] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000009dc5f730
[ 154.144034] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000b2e03ff9
[ 154.144037] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000007cc53a4d
[ 154.144040] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000d110e197
[ 154.149441] pick_next_task_rt line 1599. comm (efault), pick next is null, throttled.
[ 154.543167] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000d110e197
[ 154.543172] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000009dc5f730
[ 154.543175] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000007cc53a4d
[ 154.543177] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000b2e03ff9
[ 155.143008] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000d110e197
[ 155.143013] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000007cc53a4d
[ 155.143017] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000009dc5f730
[ 155.144005] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000b2e03ff9
[ 155.161285] pick_next_task_rt line 1599. comm (efault), pick next is null, throttled.
[ 155.543133] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000d110e197
[ 155.543136] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000009dc5f730
[ 155.543137] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000007cc53a4d
[ 155.543139] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000b2e03ff9
[ 156.142976] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000009dc5f730
[ 156.143977] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000007cc53a4d
[ 156.143980] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000d110e197
[ 156.144977] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000b2e03ff9
[ 156.543105] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000d110e197
[ 156.543110] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000009dc5f730
[ 156.543112] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000007cc53a4d
[ 156.543115] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000b2e03ff9
[ 157.142952] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000007cc53a4d
[ 157.142956] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000009dc5f730
[ 157.142957] pick_next_task_rt line 1599. comm (efault), pick next is null, throttled.
[ 157.142963] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000d110e197
[ 157.143961] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000b2e03ff9
[ 157.146095] pick_next_task_rt line 1599. comm (efault), pick next is null, throttled.
[ 157.543078] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000d110e197
[ 157.543082] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000009dc5f730
[ 157.543085] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000007cc53a4d
[ 157.543087] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000b2e03ff9
[ 158.142923] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000009dc5f730
[ 158.143920] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 000000007cc53a4d
[ 158.143923] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000b2e03ff9
[ 158.143925] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000d110e197
[ 158.543050] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000d110e197
[ 158.543054] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000009dc5f730
[ 158.543057] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 000000007cc53a4d
[ 158.543059] do_sched_rt_period_timer line 873, throttled is 1, set to zero, comm swapper/1.rq_rq = 00000000b2e03ff9
[ 159.142897] sched_rt_runtime_exceeded line 947, throttled set to one, comm a.out.pree is 0.rq_rq = 00000000d110e197
[ 159.142902] sched_rt_runtime_exceeded line 947, throttl