Merge tag 'v4.1' into p/abusse/merge_upgrade
[projects/modsched/linux.git] / kernel / sched / cfs / sched.h
index 9a2a45c..e0e1299 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
+#include <linux/irq_work.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
 
@@ -362,8 +363,14 @@ struct cfs_rq {
         * Under CFS, load is tracked on a per-entity basis and aggregated up.
         * This allows for the description of both thread and group usage (in
         * the FAIR_GROUP_SCHED case).
+        * runnable_load_avg is the sum of the load_avg_contrib of the
+        * sched_entities on the rq.
+        * blocked_load_avg is similar to runnable_load_avg except that its
+        * the blocked sched_entities on the rq.
+        * utilization_load_avg is the sum of the average running time of the
+        * sched_entities on the rq.
         */
-       unsigned long runnable_load_avg, blocked_load_avg;
+       unsigned long runnable_load_avg, blocked_load_avg, utilization_load_avg;
        atomic64_t decay_counter;
        u64 last_decay;
        atomic_long_t removed_load;
@@ -418,6 +425,11 @@ static inline int rt_bandwidth_enabled(void)
        return sysctl_sched_rt_runtime >= 0;
 }
 
+/* RT IPI pull logic requires IRQ_WORK */
+#ifdef CONFIG_IRQ_WORK
+# define HAVE_RT_PUSH_IPI
+#endif
+
 /* Real-Time classes' related field in a runqueue: */
 struct rt_rq {
        struct rt_prio_array active;
@@ -435,7 +447,13 @@ struct rt_rq {
        unsigned long rt_nr_total;
        int overloaded;
        struct plist_head pushable_tasks;
+#ifdef HAVE_RT_PUSH_IPI
+       int push_flags;
+       int push_cpu;
+       struct irq_work push_work;
+       raw_spinlock_t push_lock;
 #endif
+#endif /* CONFIG_SMP */
        int rt_queued;
 
        int rt_throttled;
@@ -558,8 +576,6 @@ struct rq {
 #ifdef CONFIG_NO_HZ_FULL
        unsigned long last_sched_tick;
 #endif
-       int skip_clock_update;
-
        /* capture load from *all* tasks on this cpu: */
        struct load_weight load;
        unsigned long nr_load_updates;
@@ -588,6 +604,7 @@ struct rq {
        unsigned long next_balance;
        struct mm_struct *prev_mm;
 
+       unsigned int clock_skip_update;
        u64 clock;
        u64 clock_task;
 
@@ -598,6 +615,7 @@ struct rq {
        struct sched_domain *sd;
 
        unsigned long cpu_capacity;
+       unsigned long cpu_capacity_orig;
 
        unsigned char idle_balance;
        /* For active balancing */
@@ -687,16 +705,35 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
 #define raw_rq()               raw_cpu_ptr(&runqueues)
 
+static inline u64 __rq_clock_broken(struct rq *rq)
+{
+       return ACCESS_ONCE(rq->clock);
+}
+
 static inline u64 rq_clock(struct rq *rq)
 {
+       lockdep_assert_held(&rq->lock);
        return rq->clock;
 }
 
 static inline u64 rq_clock_task(struct rq *rq)
 {
+       lockdep_assert_held(&rq->lock);
        return rq->clock_task;
 }
 
+#define RQCF_REQ_SKIP  0x01
+#define RQCF_ACT_SKIP  0x02
+
+static inline void rq_clock_skip_update(struct rq *rq, bool skip)
+{
+       lockdep_assert_held(&rq->lock);
+       if (skip)
+               rq->clock_skip_update |= RQCF_REQ_SKIP;
+       else
+               rq->clock_skip_update &= ~RQCF_REQ_SKIP;
+}
+
 #ifdef CONFIG_NUMA
 enum numa_topology_type {
        NUMA_DIRECT,
@@ -789,7 +826,7 @@ struct sched_group_capacity {
         * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
         * for a single CPU.
         */
-       unsigned int capacity, capacity_orig;
+       unsigned int capacity;
        unsigned long next_update;
        int imbalance; /* XXX unrelated to capacity but shared group state */
        /*
@@ -1350,9 +1387,18 @@ static inline int hrtick_enabled(struct rq *rq)
 
 #ifdef CONFIG_SMP
 extern void sched_avg_update(struct rq *rq);
+
+#ifndef arch_scale_freq_capacity
+static __always_inline
+unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+       return SCHED_CAPACITY_SCALE;
+}
+#endif
+
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
-       rq->rt_avg += rt_delta;
+       rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
        sched_avg_update(rq);
 }
 #else
@@ -1362,6 +1408,82 @@ static inline void sched_avg_update(struct rq *rq) { }
 
 extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
 
+/*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+static inline struct rq *__task_rq_lock(struct task_struct *p)
+       __acquires(rq->lock)
+{
+       struct rq *rq;
+
+       lockdep_assert_held(&p->pi_lock);
+
+       for (;;) {
+               rq = task_rq(p);
+               raw_spin_lock(&rq->lock);
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+                       return rq;
+               raw_spin_unlock(&rq->lock);
+
+               while (unlikely(task_on_rq_migrating(p)))
+                       cpu_relax();
+       }
+}
+
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+       __acquires(p->pi_lock)
+       __acquires(rq->lock)
+{
+       struct rq *rq;
+
+       for (;;) {
+               raw_spin_lock_irqsave(&p->pi_lock, *flags);
+               rq = task_rq(p);
+               raw_spin_lock(&rq->lock);
+               /*
+                *      move_queued_task()              task_rq_lock()
+                *
+                *      ACQUIRE (rq->lock)
+                *      [S] ->on_rq = MIGRATING         [L] rq = task_rq()
+                *      WMB (__set_task_cpu())          ACQUIRE (rq->lock);
+                *      [S] ->cpu = new_cpu             [L] task_rq()
+                *                                      [L] ->on_rq
+                *      RELEASE (rq->lock)
+                *
+                * If we observe the old cpu in task_rq_lock, the acquire of
+                * the old rq->lock will fully serialize against the stores.
+                *
+                * If we observe the new cpu in task_rq_lock, the acquire will
+                * pair with the WMB to ensure we must then also see migrating.
+                */
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+                       return rq;
+               raw_spin_unlock(&rq->lock);
+               raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+
+               while (unlikely(task_on_rq_migrating(p)))
+                       cpu_relax();
+       }
+}
+
+static inline void __task_rq_unlock(struct rq *rq)
+       __releases(rq->lock)
+{
+       raw_spin_unlock(&rq->lock);
+}
+
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+       __releases(rq->lock)
+       __releases(p->pi_lock)
+{
+       raw_spin_unlock(&rq->lock);
+       raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+}
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PREEMPT
 
@@ -1549,8 +1671,8 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
 extern void print_dl_stats(struct seq_file *m, int cpu);
 
 extern void init_cfs_rq(struct cfs_rq *cfs_rq);
-extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
-extern void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq);
+extern void init_rt_rq(struct rt_rq *rt_rq);
+extern void init_dl_rq(struct dl_rq *dl_rq);
 
 extern void cfs_bandwidth_usage_inc(void);
 extern void cfs_bandwidth_usage_dec(void);