diff -drupN a/include/trace/events/sched.h b/include/trace/events/sched.h --- a/include/trace/events/sched.h 2018-08-06 17:23:04.000000000 +0300 +++ b/include/trace/events/sched.h 2022-06-12 05:28:14.000000000 +0300 @@ -219,7 +219,7 @@ DECLARE_EVENT_CLASS(sched_process_templa DEFINE_EVENT(sched_process_template, sched_process_free, TP_PROTO(struct task_struct *p), TP_ARGS(p)); - + /* * Tracepoint for a task exiting: @@ -374,6 +374,30 @@ DEFINE_EVENT(sched_stat_template, sched_ TP_ARGS(tsk, delay)); /* + * Tracepoint for recording the cause of uninterruptible sleep. + */ +TRACE_EVENT(sched_blocked_reason, + + TP_PROTO(struct task_struct *tsk), + + TP_ARGS(tsk), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( void*, caller ) + __field( bool, io_wait ) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->caller = (void*)get_wchan(tsk); + __entry->io_wait = tsk->in_iowait; + ), + + TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller) +); + +/* * Tracepoint for accounting runtime (time the task is executing * on a CPU). */ @@ -562,6 +586,520 @@ TRACE_EVENT(sched_wake_idle_without_ipi, TP_printk("cpu=%d", __entry->cpu) ); + +TRACE_EVENT(sched_contrib_scale_f, + + TP_PROTO(int cpu, unsigned long freq_scale_factor, + unsigned long cpu_scale_factor), + + TP_ARGS(cpu, freq_scale_factor, cpu_scale_factor), + + TP_STRUCT__entry( + __field(int, cpu) + __field(unsigned long, freq_scale_factor) + __field(unsigned long, cpu_scale_factor) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->freq_scale_factor = freq_scale_factor; + __entry->cpu_scale_factor = cpu_scale_factor; + ), + + TP_printk("cpu=%d freq_scale_factor=%lu cpu_scale_factor=%lu", + __entry->cpu, __entry->freq_scale_factor, + __entry->cpu_scale_factor) +); + +#ifdef CONFIG_SMP + +#ifdef CONFIG_SCHED_WALT +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int sysctl_sched_use_walt_task_util; +extern unsigned int walt_ravg_window; +extern bool walt_disabled; +#endif + +/* + * Tracepoint for accounting sched averages for tasks. + */ +TRACE_EVENT(sched_load_avg_task, + + TP_PROTO(struct task_struct *tsk, struct sched_avg *avg, void *_ravg), + + TP_ARGS(tsk, avg, _ravg), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, cpu ) + __field( unsigned long, load_avg ) + __field( unsigned long, util_avg ) + __field( unsigned long, util_avg_pelt ) + __field( u32, util_avg_walt ) + __field( u64, load_sum ) + __field( u32, util_sum ) + __field( u32, period_contrib ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->cpu = task_cpu(tsk); + __entry->load_avg = avg->load_avg; + __entry->util_avg = avg->util_avg; + __entry->load_sum = avg->load_sum; + __entry->util_sum = avg->util_sum; + __entry->period_contrib = avg->period_contrib; + __entry->util_avg_pelt = avg->util_avg; + __entry->util_avg_walt = 0; +#ifdef CONFIG_SCHED_WALT + __entry->util_avg_walt = ((struct ravg*)_ravg)->demand / + (walt_ravg_window >> SCHED_CAPACITY_SHIFT); + if (!walt_disabled && sysctl_sched_use_walt_task_util) + __entry->util_avg = __entry->util_avg_walt; +#endif + ), + TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu util_avg=%lu " + "util_avg_pelt=%lu util_avg_walt=%u load_sum=%llu" + " util_sum=%u period_contrib=%u", + __entry->comm, + __entry->pid, + __entry->cpu, + __entry->load_avg, + __entry->util_avg, + __entry->util_avg_pelt, + __entry->util_avg_walt, + (u64)__entry->load_sum, + (u32)__entry->util_sum, + (u32)__entry->period_contrib) +); + +/* + * Tracepoint for accounting sched averages for cpus. + */ +TRACE_EVENT(sched_load_avg_cpu, + + TP_PROTO(int cpu, struct cfs_rq *cfs_rq), + + TP_ARGS(cpu, cfs_rq), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned long, load_avg ) + __field( unsigned long, util_avg ) + __field( unsigned long, util_avg_pelt ) + __field( u32, util_avg_walt ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->load_avg = cfs_rq->avg.load_avg; + __entry->util_avg = cfs_rq->avg.util_avg; + __entry->util_avg_pelt = cfs_rq->avg.util_avg; + __entry->util_avg_walt = 0; +#ifdef CONFIG_SCHED_WALT + __entry->util_avg_walt = div64_ul(cpu_rq(cpu)->prev_runnable_sum, + walt_ravg_window >> SCHED_CAPACITY_SHIFT); + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + __entry->util_avg = __entry->util_avg_walt; +#endif + ), + + TP_printk("cpu=%d load_avg=%lu util_avg=%lu " + "util_avg_pelt=%lu util_avg_walt=%u", + __entry->cpu, __entry->load_avg, __entry->util_avg, + __entry->util_avg_pelt, __entry->util_avg_walt) +); + +/* + * Tracepoint for sched_tune_config settings + */ +TRACE_EVENT(sched_tune_config, + + TP_PROTO(int boost), + + TP_ARGS(boost), + + TP_STRUCT__entry( + __field( int, boost ) + ), + + TP_fast_assign( + __entry->boost = boost; + ), + + TP_printk("boost=%d ", __entry->boost) +); + +/* + * Tracepoint for accounting CPU boosted utilization + */ +TRACE_EVENT(sched_boost_cpu, + + TP_PROTO(int cpu, unsigned long util, long margin), + + TP_ARGS(cpu, util, margin), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( unsigned long, util ) + __field(long, margin ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->util = util; + __entry->margin = margin; + ), + + TP_printk("cpu=%d util=%lu margin=%ld", + __entry->cpu, + __entry->util, + __entry->margin) +); + +/* + * Tracepoint for schedtune_tasks_update + */ +TRACE_EVENT(sched_tune_tasks_update, + + TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, + int boost, int max_boost), + + TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, cpu ) + __field( int, tasks ) + __field( int, idx ) + __field( int, boost ) + __field( int, max_boost ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->cpu = cpu; + __entry->tasks = tasks; + __entry->idx = idx; + __entry->boost = boost; + __entry->max_boost = max_boost; + ), + + TP_printk("pid=%d comm=%s " + "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d", + __entry->pid, __entry->comm, + __entry->cpu, __entry->tasks, __entry->idx, + __entry->boost, __entry->max_boost) +); + +/* + * Tracepoint for schedtune_boostgroup_update + */ +TRACE_EVENT(sched_tune_boostgroup_update, + + TP_PROTO(int cpu, int variation, int max_boost), + + TP_ARGS(cpu, variation, max_boost), + + TP_STRUCT__entry( + __field( int, cpu ) + __field( int, variation ) + __field( int, max_boost ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->variation = variation; + __entry->max_boost = max_boost; + ), + + TP_printk("cpu=%d variation=%d max_boost=%d", + __entry->cpu, __entry->variation, __entry->max_boost) +); + +/* + * Tracepoint for accounting task boosted utilization + */ +TRACE_EVENT(sched_boost_task, + + TP_PROTO(struct task_struct *tsk, unsigned long util, long margin), + + TP_ARGS(tsk, util, margin), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned long, util ) + __field( long, margin ) + + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->util = util; + __entry->margin = margin; + ), + + TP_printk("comm=%s pid=%d util=%lu margin=%ld", + __entry->comm, __entry->pid, + __entry->util, + __entry->margin) +); + +/* + * Tracepoint for find_best_target + */ +TRACE_EVENT(sched_find_best_target, + + TP_PROTO(struct task_struct *tsk, bool prefer_idle, + unsigned long min_util, int start_cpu, + int best_idle, int best_active, int target), + + TP_ARGS(tsk, prefer_idle, min_util, start_cpu, + best_idle, best_active, target), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( unsigned long, min_util ) + __field( bool, prefer_idle ) + __field( int, start_cpu ) + __field( int, best_idle ) + __field( int, best_active ) + __field( int, target ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->min_util = min_util; + __entry->prefer_idle = prefer_idle; + __entry->start_cpu = start_cpu; + __entry->best_idle = best_idle; + __entry->best_active = best_active; + __entry->target = target; + ), + + TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d " + "best_idle=%d best_active=%d target=%d", + __entry->pid, __entry->comm, + __entry->prefer_idle, __entry->start_cpu, + __entry->best_idle, __entry->best_active, + __entry->target) +); + +/* + * Tracepoint for schedtune_tasks_update + */ +TRACE_EVENT(sched_tune_filter, + + TP_PROTO(int nrg_delta, int cap_delta, + int nrg_gain, int cap_gain, + int payoff, int region), + + TP_ARGS(nrg_delta, cap_delta, nrg_gain, cap_gain, payoff, region), + + TP_STRUCT__entry( + __field( int, nrg_delta ) + __field( int, cap_delta ) + __field( int, nrg_gain ) + __field( int, cap_gain ) + __field( int, payoff ) + __field( int, region ) + ), + + TP_fast_assign( + __entry->nrg_delta = nrg_delta; + __entry->cap_delta = cap_delta; + __entry->nrg_gain = nrg_gain; + __entry->cap_gain = cap_gain; + __entry->payoff = payoff; + __entry->region = region; + ), + + TP_printk("nrg_delta=%d cap_delta=%d nrg_gain=%d cap_gain=%d payoff=%d region=%d", + __entry->nrg_delta, __entry->cap_delta, + __entry->nrg_gain, __entry->cap_gain, + __entry->payoff, __entry->region) +); + +/* + * Tracepoint for system overutilized flag + */ +TRACE_EVENT(sched_overutilized, + + TP_PROTO(bool overutilized), + + TP_ARGS(overutilized), + + TP_STRUCT__entry( + __field( bool, overutilized ) + ), + + TP_fast_assign( + __entry->overutilized = overutilized; + ), + + TP_printk("overutilized=%d", + __entry->overutilized ? 1 : 0) +); +#ifdef CONFIG_SCHED_WALT +struct rq; + +TRACE_EVENT(walt_update_task_ravg, + + TP_PROTO(struct task_struct *p, struct rq *rq, int evt, + u64 wallclock, u64 irqtime), + + TP_ARGS(p, rq, evt, wallclock, irqtime), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( pid_t, cur_pid ) + __field( u64, wallclock ) + __field( u64, mark_start ) + __field( u64, delta_m ) + __field( u64, win_start ) + __field( u64, delta ) + __field( u64, irqtime ) + __field( int, evt ) + __field(unsigned int, demand ) + __field(unsigned int, sum ) + __field( int, cpu ) + __field( u64, cs ) + __field( u64, ps ) + __field(unsigned long, util ) + __field( u32, curr_window ) + __field( u32, prev_window ) + __field( u64, nt_cs ) + __field( u64, nt_ps ) + __field( u32, active_windows ) + ), + + TP_fast_assign( + __entry->wallclock = wallclock; + __entry->win_start = rq->window_start; + __entry->delta = (wallclock - rq->window_start); + __entry->evt = evt; + __entry->cpu = rq->cpu; + __entry->cur_pid = rq->curr->pid; + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->mark_start = p->ravg.mark_start; + __entry->delta_m = (wallclock - p->ravg.mark_start); + __entry->demand = p->ravg.demand; + __entry->sum = p->ravg.sum; + __entry->irqtime = irqtime; + __entry->cs = rq->curr_runnable_sum; + __entry->ps = rq->prev_runnable_sum; + __entry->util = rq->prev_runnable_sum << SCHED_CAPACITY_SHIFT; + do_div(__entry->util, walt_ravg_window); + __entry->curr_window = p->ravg.curr_window; + __entry->prev_window = p->ravg.prev_window; + __entry->nt_cs = rq->nt_curr_runnable_sum; + __entry->nt_ps = rq->nt_prev_runnable_sum; + __entry->active_windows = p->ravg.active_windows; + ), + + TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu" + " cs %llu ps %llu util %lu cur_window %u prev_window %u active_wins %u" + , __entry->wallclock, __entry->win_start, __entry->delta, + __entry->evt, __entry->cpu, __entry->cur_pid, + __entry->pid, __entry->comm, __entry->mark_start, + __entry->delta_m, __entry->demand, + __entry->sum, __entry->irqtime, + __entry->cs, __entry->ps, __entry->util, + __entry->curr_window, __entry->prev_window, + __entry->active_windows + ) +); + +TRACE_EVENT(walt_update_history, + + TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples, + int evt), + + TP_ARGS(rq, p, runtime, samples, evt), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field(unsigned int, runtime ) + __field( int, samples ) + __field( int, evt ) + __field( u64, demand ) + __field( u64, walt_avg ) + __field(unsigned int, pelt_avg ) + __array( u32, hist, RAVG_HIST_SIZE_MAX) + __field( int, cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->runtime = runtime; + __entry->samples = samples; + __entry->evt = evt; + __entry->demand = p->ravg.demand; + __entry->walt_avg = (__entry->demand << SCHED_CAPACITY_SHIFT); + __entry->walt_avg = div_u64(__entry->walt_avg, + walt_ravg_window); + __entry->pelt_avg = p->se.avg.util_avg; + memcpy(__entry->hist, p->ravg.sum_history, + RAVG_HIST_SIZE_MAX * sizeof(u32)); + __entry->cpu = rq->cpu; + ), + + TP_printk("%d (%s): runtime %u samples %d event %d demand %llu" + " walt %llu pelt %u (hist: %u %u %u %u %u) cpu %d", + __entry->pid, __entry->comm, + __entry->runtime, __entry->samples, __entry->evt, + __entry->demand, + __entry->walt_avg, + __entry->pelt_avg, + __entry->hist[0], __entry->hist[1], + __entry->hist[2], __entry->hist[3], + __entry->hist[4], __entry->cpu) +); + +TRACE_EVENT(walt_migration_update_sum, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __field(int, cpu ) + __field(int, pid ) + __field( u64, cs ) + __field( u64, ps ) + __field( s64, nt_cs ) + __field( s64, nt_ps ) + ), + + TP_fast_assign( + __entry->cpu = cpu_of(rq); + __entry->cs = rq->curr_runnable_sum; + __entry->ps = rq->prev_runnable_sum; + __entry->nt_cs = (s64)rq->nt_curr_runnable_sum; + __entry->nt_ps = (s64)rq->nt_prev_runnable_sum; + __entry->pid = p->pid; + ), + + TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d", + __entry->cpu, __entry->cs, __entry->ps, + __entry->nt_cs, __entry->nt_ps, __entry->pid) +); +#endif /* CONFIG_SCHED_WALT */ + +#endif /* CONFIG_SMP */ + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */