Merge tag 'v4.1' into p/abusse/merge_upgrade
[projects/modsched/linux.git] / kernel / sched / cfs / core.c
index 13fdac5..f08622d 100644 (file)
@@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq)
 {
        s64 delta;
 
-       if (rq->skip_clock_update > 0)
+       lockdep_assert_held(&rq->lock);
+
+       if (rq->clock_skip_update & RQCF_ACT_SKIP)
                return;
 
        delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
@@ -304,65 +306,8 @@ __read_mostly int scheduler_running;
  */
 int sysctl_sched_rt_runtime = 950000;
 
-/*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
-       __acquires(rq->lock)
-{
-       struct rq *rq;
-
-       lockdep_assert_held(&p->pi_lock);
-
-       for (;;) {
-               rq = task_rq(p);
-               raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-                       return rq;
-               raw_spin_unlock(&rq->lock);
-
-               while (unlikely(task_on_rq_migrating(p)))
-                       cpu_relax();
-       }
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
-       __acquires(p->pi_lock)
-       __acquires(rq->lock)
-{
-       struct rq *rq;
-
-       for (;;) {
-               raw_spin_lock_irqsave(&p->pi_lock, *flags);
-               rq = task_rq(p);
-               raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-                       return rq;
-               raw_spin_unlock(&rq->lock);
-               raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
-               while (unlikely(task_on_rq_migrating(p)))
-                       cpu_relax();
-       }
-}
-
-static void __task_rq_unlock(struct rq *rq)
-       __releases(rq->lock)
-{
-       raw_spin_unlock(&rq->lock);
-}
-
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
-       __releases(rq->lock)
-       __releases(p->pi_lock)
-{
-       raw_spin_unlock(&rq->lock);
-       raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-}
+/* cpus with isolated domains */
+cpumask_var_t cpu_isolated_map;
 
 /*
  * this_rq_lock - lock this runqueue and disable interrupts.
@@ -490,6 +435,11 @@ static __init void init_hrtick(void)
  */
 void hrtick_start(struct rq *rq, u64 delay)
 {
+       /*
+        * Don't schedule slices shorter than 10000ns, that just
+        * doesn't make sense. Rely on vruntime for fairness.
+        */
+       delay = max_t(u64, delay, 10000LL);
        __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
                        HRTIMER_MODE_REL_PINNED, 0);
 }
@@ -742,6 +692,23 @@ static inline bool got_nohz_idle_kick(void)
 #ifdef CONFIG_NO_HZ_FULL
 bool sched_can_stop_tick(void)
 {
+       /*
+        * FIFO realtime policy runs the highest priority task. Other runnable
+        * tasks are of a lower priority. The scheduler tick does nothing.
+        */
+       if (current->policy == SCHED_FIFO)
+               return true;
+
+       /*
+        * Round-robin realtime tasks time slice with other tasks at the same
+        * realtime priority. Is this task the only one at this priority?
+        */
+       if (current->policy == SCHED_RR) {
+               struct sched_rt_entity *rt_se = &current->rt;
+
+               return rt_se->run_list.prev == rt_se->run_list.next;
+       }
+
        /*
         * More than one running task need preemption.
         * nr_running update is assumed to be visible
@@ -1046,7 +1013,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
         * this case, we can save a useless back to back clock update.
         */
        if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
-               rq->skip_clock_update = 1;
+               rq_clock_skip_update(rq, true);
 }
 
 #ifdef CONFIG_SMP
@@ -1082,7 +1049,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                if (p->sched_class->migrate_task_rq)
                        p->sched_class->migrate_task_rq(p, new_cpu);
                p->se.nr_migrations++;
-               perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
+               perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
        }
 
        __set_task_cpu(p, new_cpu);
@@ -1842,6 +1809,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
        p->se.prev_sum_exec_runtime     = 0;
        p->se.nr_migrations             = 0;
        p->se.vruntime                  = 0;
+#ifdef CONFIG_SMP
+       p->se.avg.decay_count           = 0;
+#endif
        INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_SCHEDSTATS
@@ -2761,6 +2731,10 @@ again:
  *          - explicit schedule() call
  *          - return from syscall or exception to user-space
  *          - return from interrupt-handler to user-space
+ *
+ * WARNING: all callers must re-check need_resched() afterward and reschedule
+ * accordingly in case an event triggered the need for rescheduling (such as
+ * an interrupt waking up a task) while preemption was disabled in __schedule().
  */
 //void print_rb_nodes(struct rq *rq) {
 //     struct task_struct *p;
@@ -2782,7 +2756,6 @@ static void __sched __schedule(void)
        struct rq *rq;
        int i, cpu;
 
-need_resched:
        preempt_disable();
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
@@ -2802,6 +2775,8 @@ need_resched:
        smp_mb__before_spinlock();
        raw_spin_lock_irq(&rq->lock);
 
+       rq->clock_skip_update <<= 1; /* promote REQ to ACT */
+
        switch_count = &prev->nivcsw;
        if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
                if (unlikely(signal_pending_state(prev->state, prev))) {
@@ -2826,13 +2801,13 @@ need_resched:
                switch_count = &prev->nvcsw;
        }
 
-       if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
+       if (task_on_rq_queued(prev))
                update_rq_clock(rq);
 
        next = pick_next_task(rq, prev);
        clear_tsk_need_resched(prev);
        clear_preempt_need_resched();
-       rq->skip_clock_update = 0;
+       rq->clock_skip_update = 0;
 
        if (likely(prev != next)) {
                rq->nr_switches++;
@@ -2847,8 +2822,6 @@ need_resched:
        post_schedule(rq);
 
        sched_preempt_enable_no_resched();
-       if (need_resched())
-               goto need_resched;
 }
 
 static inline void sched_submit_work(struct task_struct *tsk)
@@ -2868,7 +2841,9 @@ asmlinkage __visible void __sched schedule(void)
        struct task_struct *tsk = current;
 
        sched_submit_work(tsk);
-       __schedule();
+       do {
+               __schedule();
+       } while (need_resched());
 }
 EXPORT_SYMBOL(schedule);
 
@@ -2882,7 +2857,7 @@ asmlinkage __visible void __sched schedule_user(void)
         * we find a better solution.
         *
         * NB: There are buggy callers of this function.  Ideally we
-        * should warn if prev_state != IN_USER, but that will trigger
+        * should warn if prev_state != CONTEXT_USER, but that will trigger
         * too frequently to make sense yet.
         */
        enum ctx_state prev_state = exception_enter();
@@ -2903,6 +2878,21 @@ void __sched schedule_preempt_disabled(void)
        preempt_disable();
 }
 
+static void __sched notrace preempt_schedule_common(void)
+{
+       do {
+               __preempt_count_add(PREEMPT_ACTIVE);
+               __schedule();
+               __preempt_count_sub(PREEMPT_ACTIVE);
+
+               /*
+                * Check again in case we missed a preemption opportunity
+                * between schedule and now.
+                */
+               barrier();
+       } while (need_resched());
+}
+
 #ifdef CONFIG_PREEMPT
 /*
  * this is the entry point to schedule() from in-kernel preemption
@@ -2918,17 +2908,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
        if (likely(!preemptible()))
                return;
 
-       do {
-               __preempt_count_add(PREEMPT_ACTIVE);
-               __schedule();
-               __preempt_count_sub(PREEMPT_ACTIVE);
-
-               /*
-                * Check again in case we missed a preemption opportunity
-                * between schedule and now.
-                */
-               barrier();
-       } while (need_resched());
+       preempt_schedule_common();
 }
 NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
@@ -3093,6 +3073,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
        } else {
                if (dl_prio(oldprio))
                        p->dl.dl_boosted = 0;
+               if (rt_prio(oldprio))
+                       p->rt.timeout = 0;
                p->sched_class = &fair_sched_class;
        }
 
@@ -3337,15 +3319,18 @@ static void __setscheduler_params(struct task_struct *p,
 
 /* Actually do priority change: must hold pi & rq lock. */
 static void __setscheduler(struct rq *rq, struct task_struct *p,
-                          const struct sched_attr *attr)
+                          const struct sched_attr *attr, bool keep_boost)
 {
        __setscheduler_params(p, attr);
 
        /*
-        * If we get here, there was no pi waiters boosting the
-        * task. It is safe to use the normal prio.
+        * Keep a potential priority boosting if called from
+        * sched_setscheduler().
         */
-       p->prio = normal_prio(p);
+       if (keep_boost)
+               p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
+       else
+               p->prio = normal_prio(p);
 
        if (dl_prio(p->prio))
                p->sched_class = &dl_sched_class;
@@ -3424,6 +3409,20 @@ static bool check_same_owner(struct task_struct *p)
        return match;
 }
 
+static bool dl_param_changed(struct task_struct *p,
+               const struct sched_attr *attr)
+{
+       struct sched_dl_entity *dl_se = &p->dl;
+
+       if (dl_se->dl_runtime != attr->sched_runtime ||
+               dl_se->dl_deadline != attr->sched_deadline ||
+               dl_se->dl_period != attr->sched_period ||
+               dl_se->flags != attr->sched_flags)
+               return true;
+
+       return false;
+}
+
 static int __sched_setscheduler(struct task_struct *p,
                                const struct sched_attr *attr,
                                bool user)
@@ -3431,7 +3430,7 @@ static int __sched_setscheduler(struct task_struct *p,
        int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
                      MAX_RT_PRIO - 1 - attr->sched_priority;
        int retval, oldprio, oldpolicy = -1, queued, running;
-       int policy = attr->sched_policy;
+       int new_effective_prio, policy = attr->sched_policy;
        unsigned long flags;
        const struct sched_class *prev_class;
        struct rq *rq;
@@ -3552,7 +3551,7 @@ recheck:
                        goto change;
                if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
                        goto change;
-               if (dl_policy(policy))
+               if (dl_policy(policy) && dl_param_changed(p, attr))
                        goto change;
 
                p->sched_reset_on_fork = reset_on_fork;
@@ -3613,15 +3612,14 @@ change:
        oldprio = p->prio;
 
        /*
-        * Special case for priority boosted tasks.
-        *
-        * If the new priority is lower or equal (user space view)
-        * than the current (boosted) priority, we just store the new
+        * Take priority boosted tasks into account. If the new
+        * effective priority is unchanged, we just store the new
         * normal parameters and do not touch the scheduler class and
         * the runqueue. This will be done when the task deboost
         * itself.
         */
-       if (rt_mutex_check_prio(p, newprio)) {
+       new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+       if (new_effective_prio == oldprio) {
                __setscheduler_params(p, attr);
                task_rq_unlock(rq, p, &flags);
                return 0;
@@ -3635,7 +3633,7 @@ change:
                put_prev_task(rq, p);
 
        prev_class = p->sched_class;
-       __setscheduler(rq, p, attr);
+       __setscheduler(rq, p, attr, true);
 
        if (running)
                p->sched_class->set_curr_task(rq);
@@ -4244,17 +4242,10 @@ SYSCALL_DEFINE0(sched_yield)
        return 0;
 }
 
-static void __cond_resched(void)
-{
-       __preempt_count_add(PREEMPT_ACTIVE);
-       __schedule();
-       __preempt_count_sub(PREEMPT_ACTIVE);
-}
-
 int __sched _cond_resched(void)
 {
        if (should_resched()) {
-               __cond_resched();
+               preempt_schedule_common();
                return 1;
        }
        return 0;
@@ -4279,7 +4270,7 @@ int __cond_resched_lock(spinlock_t *lock)
        if (spin_needbreak(lock) || resched) {
                spin_unlock(lock);
                if (resched)
-                       __cond_resched();
+                       preempt_schedule_common();
                else
                        cpu_relax();
                ret = 1;
@@ -4295,7 +4286,7 @@ int __sched __cond_resched_softirq(void)
 
        if (should_resched()) {
                local_bh_enable();
-               __cond_resched();
+               preempt_schedule_common();
                local_bh_disable();
                return 1;
        }
@@ -4410,36 +4401,26 @@ EXPORT_SYMBOL_GPL(yield_to);
  * This task is about to go to sleep on IO. Increment rq->nr_iowait so
  * that process accounting knows that this is a task in IO wait state.
  */
-void __sched io_schedule(void)
-{
-       struct rq *rq = raw_rq();
-
-       delayacct_blkio_start();
-       atomic_inc(&rq->nr_iowait);
-       blk_flush_plug(current);
-       current->in_iowait = 1;
-       schedule();
-       current->in_iowait = 0;
-       atomic_dec(&rq->nr_iowait);
-       delayacct_blkio_end();
-}
-EXPORT_SYMBOL(io_schedule);
-
 long __sched io_schedule_timeout(long timeout)
 {
-       struct rq *rq = raw_rq();
+       int old_iowait = current->in_iowait;
+       struct rq *rq;
        long ret;
 
+       current->in_iowait = 1;
+       blk_schedule_flush_plug(current);
+
        delayacct_blkio_start();
+       rq = raw_rq();
        atomic_inc(&rq->nr_iowait);
-       blk_flush_plug(current);
-       current->in_iowait = 1;
        ret = schedule_timeout(timeout);
-       current->in_iowait = 0;
+       current->in_iowait = old_iowait;
        atomic_dec(&rq->nr_iowait);
        delayacct_blkio_end();
+
        return ret;
 }
+EXPORT_SYMBOL(io_schedule_timeout);
 
 /**
  * sys_sched_get_priority_max - return maximum RT priority.
@@ -4550,9 +4531,10 @@ void sched_show_task(struct task_struct *p)
 {
        unsigned long free = 0;
        int ppid;
-       unsigned state;
+       unsigned long state = p->state;
 
-       state = p->state ? __ffs(p->state) + 1 : 0;
+       if (state)
+               state = __ffs(state) + 1;
        printk(KERN_INFO "%-15.15s %c", p->comm,
                state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if BITS_PER_LONG == 32
@@ -4785,7 +4767,7 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
 
 void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
-       if (p->sched_class && p->sched_class->set_cpus_allowed)
+       if (p->sched_class->set_cpus_allowed)
                p->sched_class->set_cpus_allowed(p, new_mask);
 
        cpumask_copy(&p->cpus_allowed, new_mask);
@@ -5376,36 +5358,13 @@ static int sched_cpu_active(struct notifier_block *nfb,
 static int sched_cpu_inactive(struct notifier_block *nfb,
                                        unsigned long action, void *hcpu)
 {
-       unsigned long flags;
-       long cpu = (long)hcpu;
-       struct dl_bw *dl_b;
-
        switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_DOWN_PREPARE:
-               set_cpu_active(cpu, false);
-
-               /* explicitly allow suspend */
-               if (!(action & CPU_TASKS_FROZEN)) {
-                       bool overflow;
-                       int cpus;
-
-                       rcu_read_lock_sched();
-                       dl_b = dl_bw_of(cpu);
-
-                       raw_spin_lock_irqsave(&dl_b->lock, flags);
-                       cpus = dl_bw_cpus(cpu);
-                       overflow = __dl_overflow(dl_b, cpus, 0, 0);
-                       raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-                       rcu_read_unlock_sched();
-
-                       if (overflow)
-                               return notifier_from_errno(-EBUSY);
-               }
+               set_cpu_active((long)hcpu, false);
                return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
        }
-
-       return NOTIFY_DONE;
 }
 
 static int __init migration_init(void)
@@ -5453,9 +5412,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                                  struct cpumask *groupmask)
 {
        struct sched_group *group = sd->groups;
-       char str[256];
 
-       cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
        cpumask_clear(groupmask);
 
        printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@ -5468,7 +5425,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                return -1;
        }
 
-       printk(KERN_CONT "span %s level %s\n", str, sd->name);
+       printk(KERN_CONT "span %*pbl level %s\n",
+              cpumask_pr_args(sched_domain_span(sd)), sd->name);
 
        if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
                printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -5487,17 +5445,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                        break;
                }
 
-               /*
-                * Even though we initialize ->capacity to something semi-sane,
-                * we leave capacity_orig unset. This allows us to detect if
-                * domain iteration is still funny without causing /0 traps.
-                */
-               if (!group->sgc->capacity_orig) {
-                       printk(KERN_CONT "\n");
-                       printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
-                       break;
-               }
-
                if (!cpumask_weight(sched_group_cpus(group))) {
                        printk(KERN_CONT "\n");
                        printk(KERN_ERR "ERROR: empty group\n");
@@ -5513,9 +5460,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
                cpumask_or(groupmask, groupmask, sched_group_cpus(group));
 
-               cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
-
-               printk(KERN_CONT " %s", str);
+               printk(KERN_CONT " %*pbl",
+                      cpumask_pr_args(sched_group_cpus(group)));
                if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
                        printk(KERN_CONT " (cpu_capacity = %d)",
                                group->sgc->capacity);
@@ -5871,9 +5817,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
        update_top_cache_domain(cpu);
 }
 
-/* cpus with isolated domains */
-static cpumask_var_t cpu_isolated_map;
-
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
@@ -5982,7 +5925,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                 * die on a /0 trap.
                 */
                sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-               sg->sgc->capacity_orig = sg->sgc->capacity;
 
                /*
                 * Make sure the first group of this domain contains the
@@ -6293,6 +6235,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
         */
 
        if (sd->flags & SD_SHARE_CPUCAPACITY) {
+               sd->flags |= SD_PREFER_SIBLING;
                sd->imbalance_pct = 110;
                sd->smt_gain = 1178; /* ~15% */
 
@@ -7058,7 +7001,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
                 */
 
        case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
                cpuset_update_active_cpus(true);
                break;
        default:
@@ -7070,8 +7012,26 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
 static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                               void *hcpu)
 {
+       unsigned long flags;
+       long cpu = (long)hcpu;
+       struct dl_bw *dl_b;
+       bool overflow;
+       int cpus;
+
        switch (action) {
        case CPU_DOWN_PREPARE:
+               rcu_read_lock_sched();
+               dl_b = dl_bw_of(cpu);
+
+               raw_spin_lock_irqsave(&dl_b->lock, flags);
+               cpus = dl_bw_cpus(cpu);
+               overflow = __dl_overflow(dl_b, cpus, 0, 0);
+               raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+               rcu_read_unlock_sched();
+
+               if (overflow)
+                       return notifier_from_errno(-EBUSY);
                cpuset_update_active_cpus(false);
                break;
        case CPU_DOWN_PREPARE_FROZEN:
@@ -7216,8 +7176,8 @@ void __init sched_init(void)
                rq->calc_load_active = 0;
                rq->calc_load_update = jiffies + LOAD_FREQ;
                init_cfs_rq(&rq->cfs);
-               init_rt_rq(&rq->rt, rq);
-               init_dl_rq(&rq->dl, rq);
+               init_rt_rq(&rq->rt);
+               init_dl_rq(&rq->dl);
 #ifdef CONFIG_FAIR_GROUP_SCHED
                root_task_group.shares = ROOT_TASK_GROUP_LOAD;
                INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -7257,7 +7217,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
                rq->sd = NULL;
                rq->rd = NULL;
-               rq->cpu_capacity = SCHED_CAPACITY_SCALE;
+               rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
                rq->post_schedule = 0;
                rq->active_balance = 0;
                rq->next_balance = jiffies;
@@ -7294,6 +7254,11 @@ void __init sched_init(void)
        atomic_inc(&init_mm.mm_count);
        enter_lazy_tlb(&init_mm, current);
 
+       /*
+        * During early bootup we pretend to be a normal task:
+        */
+       current->sched_class = &fair_sched_class;
+
        /*
         * Make us the idle thread. Technically, schedule() should not be
         * called from this thread, however somewhere below it might be,
@@ -7304,11 +7269,6 @@ void __init sched_init(void)
 
        calc_load_update = jiffies + LOAD_FREQ;
 
-       /*
-        * During early bootup we pretend to be a normal task:
-        */
-       current->sched_class = &fair_sched_class;
-
 #ifdef CONFIG_SMP
        zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
        /* May be allocated at isolcpus cmdline parse time */
@@ -7369,6 +7329,9 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
                        in_atomic(), irqs_disabled(),
                        current->pid, current->comm);
 
+       if (task_stack_end_corrupted(current))
+               printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
+
        debug_show_held_locks(current);
        if (irqs_disabled())
                print_irqtrace_events(current);
@@ -7397,7 +7360,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
        queued = task_on_rq_queued(p);
        if (queued)
                dequeue_task(rq, p, 0);
-       __setscheduler(rq, p, &attr);
+       __setscheduler(rq, p, &attr, false);
        if (queued) {
                enqueue_task(rq, p, 0);
                resched_curr(rq);
@@ -7632,6 +7595,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
 {
        struct task_struct *g, *p;
 
+       /*
+        * Autogroups do not have RT tasks; see autogroup_create().
+        */
+       if (task_group_is_autogroup(tg))
+               return 0;
+
        for_each_process_thread(g, p) {
                if (rt_task(p) && task_group(p) == tg)
                        return 1;
@@ -7724,6 +7693,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
 {
        int i, err = 0;
 
+       /*
+        * Disallowing the root group RT runtime is BAD, it would disallow the
+        * kernel creating (and or operating) RT threads.
+        */
+       if (tg == &root_task_group && rt_runtime == 0)
+               return -EINVAL;
+
+       /* No period doesn't make any sense. */
+       if (rt_period == 0)
+               return -EINVAL;
+
        mutex_lock(&rt_constraints_mutex);
        read_lock(&tasklist_lock);
        err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7780,9 +7760,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
        rt_period = (u64)rt_period_us * NSEC_PER_USEC;
        rt_runtime = tg->rt_bandwidth.rt_runtime;
 
-       if (rt_period == 0)
-               return -EINVAL;
-
        return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
 }
 
@@ -7839,7 +7816,7 @@ static int sched_rt_global_constraints(void)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-static int sched_dl_global_constraints(void)
+static int sched_dl_global_validate(void)
 {
        u64 runtime = global_rt_runtime();
        u64 period = global_rt_period();
@@ -7940,11 +7917,11 @@ int sched_rt_handler(struct ctl_table *table, int write,
                if (ret)
                        goto undo;
 
-               ret = sched_rt_global_constraints();
+               ret = sched_dl_global_validate();
                if (ret)
                        goto undo;
 
-               ret = sched_dl_global_constraints();
+               ret = sched_rt_global_constraints();
                if (ret)
                        goto undo;