Merge tag 'v3.14' into p/abusse/merge_upgrade
[projects/modsched/linux.git] / kernel / sched / cfs / rt.c
index 127a2c4..1999021 100644 (file)
@@ -246,8 +246,10 @@ static inline void rt_set_overload(struct rq *rq)
         * if we should look at the mask. It would be a shame
         * if we looked at the mask, but the mask was not
         * updated yet.
+        *
+        * Matched by the barrier in pull_rt_task().
         */
-       wmb();
+       smp_wmb();
        atomic_inc(&rq->rd->rto_count);
 }
 
@@ -399,20 +401,6 @@ static inline struct task_group *next_task_group(struct task_group *tg)
                (iter = next_task_group(iter)) &&                       \
                (rt_rq = iter->rt_rq[cpu_of(rq)]);)
 
-static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
-{
-       list_add_rcu(&rt_rq->leaf_rt_rq_list,
-                       &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
-}
-
-static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
-{
-       list_del_rcu(&rt_rq->leaf_rt_rq_list);
-}
-
-#define for_each_leaf_rt_rq(rt_rq, rq) \
-       list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
-
 #define for_each_sched_rt_entity(rt_se) \
        for (; rt_se; rt_se = rt_se->parent)
 
@@ -472,7 +460,7 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
 #ifdef CONFIG_SMP
 static inline const struct cpumask *sched_rt_period_mask(void)
 {
-       return cpu_rq(smp_processor_id())->rd->span;
+       return this_rq()->rd->span;
 }
 #else
 static inline const struct cpumask *sched_rt_period_mask(void)
@@ -509,17 +497,6 @@ typedef struct rt_rq *rt_rq_iter_t;
 #define for_each_rt_rq(rt_rq, iter, rq) \
        for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
 
-static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
-{
-}
-
-static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
-{
-}
-
-#define for_each_leaf_rt_rq(rt_rq, rq) \
-       for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
-
 #define for_each_sched_rt_entity(rt_se) \
        for (; rt_se; rt_se = NULL)
 
@@ -561,6 +538,14 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 
 #endif /* CONFIG_RT_GROUP_SCHED */
 
+bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
+{
+       struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
+
+       return (hrtimer_active(&rt_b->rt_period_timer) ||
+               rt_rq->rt_time < rt_b->rt_runtime);
+}
+
 #ifdef CONFIG_SMP
 /*
  * We ran out of runtime, see if we can borrow some from our neighbours.
@@ -699,15 +684,6 @@ balanced:
        }
 }
 
-static void disable_runtime(struct rq *rq)
-{
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&rq->lock, flags);
-       __disable_runtime(rq);
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
 static void __enable_runtime(struct rq *rq)
 {
        rt_rq_iter_t iter;
@@ -732,37 +708,6 @@ static void __enable_runtime(struct rq *rq)
        }
 }
 
-static void enable_runtime(struct rq *rq)
-{
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&rq->lock, flags);
-       __enable_runtime(rq);
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
-int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-       int cpu = (int)(long)hcpu;
-
-       switch (action) {
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-               disable_runtime(cpu_rq(cpu));
-               return NOTIFY_OK;
-
-       case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               enable_runtime(cpu_rq(cpu));
-               return NOTIFY_OK;
-
-       default:
-               return NOTIFY_DONE;
-       }
-}
-
 static int balance_runtime(struct rt_rq *rt_rq)
 {
        int more = 0;
@@ -926,7 +871,7 @@ static void update_curr_rt(struct rq *rq)
        if (curr->sched_class != &rt_sched_class)
                return;
 
-       delta_exec = rq->clock_task - curr->se.exec_start;
+       delta_exec = rq_clock_task(rq) - curr->se.exec_start;
        if (unlikely((s64)delta_exec <= 0))
                return;
 
@@ -936,7 +881,7 @@ static void update_curr_rt(struct rq *rq)
        curr->se.sum_exec_runtime += delta_exec;
        account_group_exec_runtime(curr, delta_exec);
 
-       curr->se.exec_start = rq->clock_task;
+       curr->se.exec_start = rq_clock_task(rq);
        cpuacct_charge(curr, delta_exec);
 
        sched_rt_avg_update(rq, delta_exec);
@@ -964,6 +909,13 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
 
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Change rq's cpupri only if rt_rq is the top queue.
+        */
+       if (&rq->rt != rt_rq)
+               return;
+#endif
        if (rq->online && prio < prev_prio)
                cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
 }
@@ -973,6 +925,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
 
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Change rq's cpupri only if rt_rq is the top queue.
+        */
+       if (&rq->rt != rt_rq)
+               return;
+#endif
        if (rq->online && rt_rq->highest_prio.curr != prev_prio)
                cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
 }
@@ -1106,9 +1065,6 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
        if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
                return;
 
-       if (!rt_rq->rt_nr_running)
-               list_add_leaf_rt_rq(rt_rq);
-
        if (head)
                list_add(&rt_se->run_list, queue);
        else
@@ -1128,8 +1084,6 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
                __clear_bit(rt_se_prio(rt_se), array->bitmap);
 
        dec_rt_tasks(rt_se, rt_rq);
-       if (!rt_rq->rt_nr_running)
-               list_del_leaf_rt_rq(rt_rq);
 }
 
 /*
@@ -1239,13 +1193,10 @@ static void yield_task_rt(struct rq *rq)
 static int find_lowest_rq(struct task_struct *task);
 
 static int
-select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
+select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
        struct task_struct *curr;
        struct rq *rq;
-       int cpu;
-
-       cpu = task_cpu(p);
 
        if (p->nr_cpus_allowed == 1)
                goto out;
@@ -1283,8 +1234,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
         */
        if (curr && unlikely(rt_task(curr)) &&
            (curr->nr_cpus_allowed < 2 ||
-            curr->prio <= p->prio) &&
-           (p->nr_cpus_allowed > 1)) {
+            curr->prio <= p->prio)) {
                int target = find_lowest_rq(p);
 
                if (target != -1)
@@ -1385,7 +1335,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
        } while (rt_rq);
 
        p = rt_task_of(rt_se);
-       p->se.exec_start = rq->clock_task;
+       p->se.exec_start = rq_clock_task(rq);
 
        return p;
 }
@@ -1434,42 +1384,24 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
        return 0;
 }
 
-/* Return the second highest RT task, NULL otherwise */
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
+/*
+ * Return the highest pushable rq's task, which is suitable to be executed
+ * on the cpu, NULL otherwise
+ */
+static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
 {
-       struct task_struct *next = NULL;
-       struct sched_rt_entity *rt_se;
-       struct rt_prio_array *array;
-       struct rt_rq *rt_rq;
-       int idx;
-
-       for_each_leaf_rt_rq(rt_rq, rq) {
-               array = &rt_rq->active;
-               idx = sched_find_first_bit(array->bitmap);
-next_idx:
-               if (idx >= MAX_RT_PRIO)
-                       continue;
-               if (next && next->prio <= idx)
-                       continue;
-               list_for_each_entry(rt_se, array->queue + idx, run_list) {
-                       struct task_struct *p;
+       struct plist_head *head = &rq->rt.pushable_tasks;
+       struct task_struct *p;
 
-                       if (!rt_entity_is_task(rt_se))
-                               continue;
+       if (!has_pushable_tasks(rq))
+               return NULL;
 
-                       p = rt_task_of(rt_se);
-                       if (pick_rt_task(rq, p, cpu)) {
-                               next = p;
-                               break;
-                       }
-               }
-               if (!next) {
-                       idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
-                       goto next_idx;
-               }
+       plist_for_each_entry(p, head, pushable_tasks) {
+               if (pick_rt_task(rq, p, cpu))
+                       return p;
        }
 
-       return next;
+       return NULL;
 }
 
 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
@@ -1718,6 +1650,12 @@ static int pull_rt_task(struct rq *this_rq)
        if (likely(!rt_overloaded(this_rq)))
                return 0;
 
+       /*
+        * Match the barrier from rt_set_overloaded; this guarantees that if we
+        * see overloaded we must also see the rto_mask bit.
+        */
+       smp_rmb();
+
        for_each_cpu(cpu, this_rq->rd->rto_mask) {
                if (this_cpu == cpu)
                        continue;
@@ -1743,12 +1681,10 @@ static int pull_rt_task(struct rq *this_rq)
                double_lock_balance(this_rq, src_rq);
 
                /*
-                * Are there still pullable RT tasks?
+                * We can pull only a task, which is pushable
+                * on its rq, and no others.
                 */
-               if (src_rq->rt.rt_nr_running <= 1)
-                       goto skip;
-
-               p = pick_next_highest_task_rt(src_rq, this_cpu);
+               p = pick_highest_pushable_task(src_rq, this_cpu);
 
                /*
                 * Do we have an RT task that preempts
@@ -1810,7 +1746,7 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
            !test_tsk_need_resched(rq->curr) &&
            has_pushable_tasks(rq) &&
            p->nr_cpus_allowed > 1 &&
-           rt_task(rq->curr) &&
+           (dl_task(rq->curr) || rt_task(rq->curr)) &&
            (rq->curr->nr_cpus_allowed < 2 ||
             rq->curr->prio <= p->prio))
                push_rt_tasks(rq);
@@ -2021,8 +1957,8 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
        p->rt.time_slice = sched_rr_timeslice;
 
        /*
-        * Requeue to the end of queue if we (and all of our ancestors) are the
-        * only element on the queue
+        * Requeue to the end of queue if we (and all of our ancestors) are not
+        * the only element on the queue
         */
        for_each_sched_rt_entity(rt_se) {
                if (rt_se->run_list.prev != rt_se->run_list.next) {
@@ -2037,7 +1973,7 @@ static void set_curr_task_rt(struct rq *rq)
 {
        struct task_struct *p = rq->curr;
 
-       p->se.exec_start = rq->clock_task;
+       p->se.exec_start = rq_clock_task(rq);
 
        /* The running task is never eligible for pushing */
        dequeue_pushable_task(rq, p);