Merge tag 'v3.18' into p/abusse/merge_upgrade
[projects/modsched/linux.git] / kernel / sched / cfs / cputime.c
index 9994791..8394b1e 100644 (file)
@@ -142,7 +142,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
        p->utimescaled += cputime_scaled;
        account_group_user_time(p, cputime);
 
-       index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
+       index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
 
        /* Add user time to cpustat. */
        task_group_account_field(p, index, (__force u64) cputime);
@@ -169,7 +169,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
        p->gtime += cputime;
 
        /* Add guest time to cpustat. */
-       if (TASK_NICE(p) > 0) {
+       if (task_nice(p) > 0) {
                cpustat[CPUTIME_NICE] += (__force u64) cputime;
                cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
        } else {
@@ -258,16 +258,22 @@ static __always_inline bool steal_account_process_tick(void)
 {
 #ifdef CONFIG_PARAVIRT
        if (static_key_false(&paravirt_steal_enabled)) {
-               u64 steal, st = 0;
+               u64 steal;
+               cputime_t steal_ct;
 
                steal = paravirt_steal_clock(smp_processor_id());
                steal -= this_rq()->prev_steal_time;
 
-               st = steal_ticks(steal);
-               this_rq()->prev_steal_time += st * TICK_NSEC;
+               /*
+                * cputime_t may be less precise than nsecs (eg: if it's
+                * based on jiffies). Lets cast the result to cputime
+                * granularity and account the rest on the next rounds.
+                */
+               steal_ct = nsecs_to_cputime(steal);
+               this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct);
 
-               account_steal_time(st);
-               return st;
+               account_steal_time(steal_ct);
+               return steal_ct;
        }
 #endif
        return false;
@@ -282,24 +288,29 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
        struct signal_struct *sig = tsk->signal;
        cputime_t utime, stime;
        struct task_struct *t;
-
-       times->utime = sig->utime;
-       times->stime = sig->stime;
-       times->sum_exec_runtime = sig->sum_sched_runtime;
+       unsigned int seq, nextseq;
+       unsigned long flags;
 
        rcu_read_lock();
-       /* make sure we can trust tsk->thread_group list */
-       if (!likely(pid_alive(tsk)))
-               goto out;
-
-       t = tsk;
+       /* Attempt a lockless read on the first round. */
+       nextseq = 0;
        do {
-               task_cputime(t, &utime, &stime);
-               times->utime += utime;
-               times->stime += stime;
-               times->sum_exec_runtime += task_sched_runtime(t);
-       } while_each_thread(tsk, t);
-out:
+               seq = nextseq;
+               flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+               times->utime = sig->utime;
+               times->stime = sig->stime;
+               times->sum_exec_runtime = sig->sum_sched_runtime;
+
+               for_each_thread(tsk, t) {
+                       task_cputime(t, &utime, &stime);
+                       times->utime += utime;
+                       times->stime += stime;
+                       times->sum_exec_runtime += task_sched_runtime(t);
+               }
+               /* If lockless access failed, take the lock. */
+               nextseq = 1;
+       } while (need_seqretry(&sig->stats_lock, seq));
+       done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
        rcu_read_unlock();
 }
 
@@ -326,50 +337,50 @@ out:
  * softirq as those do not count in task exec_runtime any more.
  */
 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
-                                               struct rq *rq)
+                                        struct rq *rq, int ticks)
 {
-       cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
+       cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
+       u64 cputime = (__force u64) cputime_one_jiffy;
        u64 *cpustat = kcpustat_this_cpu->cpustat;
 
        if (steal_account_process_tick())
                return;
 
+       cputime *= ticks;
+       scaled *= ticks;
+
        if (irqtime_account_hi_update()) {
-               cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
+               cpustat[CPUTIME_IRQ] += cputime;
        } else if (irqtime_account_si_update()) {
-               cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
+               cpustat[CPUTIME_SOFTIRQ] += cputime;
        } else if (this_cpu_ksoftirqd() == p) {
                /*
                 * ksoftirqd time do not get accounted in cpu_softirq_time.
                 * So, we have to handle it separately here.
                 * Also, p->stime needs to be updated for ksoftirqd.
                 */
-               __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
-                                       CPUTIME_SOFTIRQ);
+               __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
        } else if (user_tick) {
-               account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
+               account_user_time(p, cputimescaled);
        } else if (p == rq->idle) {
-               account_idle_time(cputime_one_jiffy);
+               account_idle_time(cputime);
        } else if (p->flags & PF_VCPU) { /* System time or guest time */
-               account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
+               account_guest_time(p, cputimescaled);
        } else {
-               __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
-                                       CPUTIME_SYSTEM);
+               __account_system_time(p, cputime, scaled,       CPUTIME_SYSTEM);
        }
 }
 
 static void irqtime_account_idle_ticks(int ticks)
 {
-       int i;
        struct rq *rq = this_rq();
 
-       for (i = 0; i < ticks; i++)
-               irqtime_account_process_tick(current, 0, rq);
+       irqtime_account_process_tick(current, 0, rq, ticks);
 }
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
 static inline void irqtime_account_idle_ticks(int ticks) {}
 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
-                                               struct rq *rq) {}
+                                               struct rq *rq, int nr_ticks) {}
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 /*
@@ -458,7 +469,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
                return;
 
        if (sched_clock_irqtime) {
-               irqtime_account_process_tick(p, user_tick, rq);
+               irqtime_account_process_tick(p, user_tick, rq, 1);
                return;
        }
 
@@ -543,6 +554,23 @@ drop_precision:
        return (__force cputime_t) scaled;
 }
 
+/*
+ * Atomically advance counter to the new value. Interrupts, vcpu
+ * scheduling, and scaling inaccuracies can cause cputime_advance
+ * to be occasionally called with a new value smaller than counter.
+ * Let's enforce atomicity.
+ *
+ * Normally a caller will only go through this loop once, or not
+ * at all in case a previous caller updated counter the same jiffy.
+ */
+static void cputime_advance(cputime_t *counter, cputime_t new)
+{
+       cputime_t old;
+
+       while (new > (old = ACCESS_ONCE(*counter)))
+               cmpxchg_cputime(counter, old, new);
+}
+
 /*
  * Adjust tick based cputime random precision against scheduler
  * runtime accounting.
@@ -588,13 +616,8 @@ static void cputime_adjust(struct task_cputime *curr,
                utime = rtime - stime;
        }
 
-       /*
-        * If the tick based count grows faster than the scheduler one,
-        * the result of the scaling may go backward.
-        * Let's enforce monotonicity.
-        */
-       prev->stime = max(prev->stime, stime);
-       prev->utime = max(prev->utime, utime);
+       cputime_advance(&prev->stime, stime);
+       cputime_advance(&prev->utime, utime);
 
 out:
        *ut = prev->utime;
@@ -611,9 +634,6 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
        cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 
-/*
- * Must be called with siglock held.
- */
 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
        struct task_cputime cputime;