Modifications for Linux v4.4.
authorAnselm Busse <anselm.busse@tu-berlin.de>
Mon, 18 Jan 2016 21:17:44 +0000 (22:17 +0100)
committerAnselm Busse <anselm.busse@tu-berlin.de>
Sat, 23 Jan 2016 20:15:56 +0000 (21:15 +0100)
framework/include/os/linux.h
framework/os/linux/linux_sched.c
framework/os/linux/os_sched.c

index 2eb4129..3850be4 100644 (file)
@@ -489,7 +489,8 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 #endif
 }
 
-void context_switch(struct rq *rq, struct task_struct *prev,
+inline struct rq *
+context_switch(struct rq *rq, struct task_struct *prev,
               struct task_struct *next);
 
 
index d9f10f6..d63ab8b 100644 (file)
@@ -803,14 +803,13 @@ void resched_task(struct task_struct *p)
 
 void resched_cpu(int cpu)
 {
-       BUG();
-/*     struct rq *rq = cpu_rq(cpu);
+       struct rq *rq = cpu_rq(cpu);
        unsigned long flags;
 
        if (!raw_spin_trylock_irqsave(&rq->lock, flags))
                return;
        resched_task(cpu_curr(cpu));
-       raw_spin_unlock_irqrestore(&rq->lock, flags);*/
+       raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
 
@@ -833,6 +832,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
                    struct task_struct *next)
 {
        prepare_lock_switch(rq, next);
+       prepare_arch_switch(next);
 }
 
 /**
@@ -851,12 +851,29 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * with the lock held can cause deadlocks; see schedule() for
  * details.)
  */
-static void finish_task_switch(struct rq *rq, struct task_struct *prev)
+static struct rq *finish_task_switch(struct task_struct *prev)
        __releases(rq->lock)
 {
+       struct rq *rq = this_rq();
        struct mm_struct *mm = rq->prev_mm;
        long prev_state;
 
+       /*
+        * The previous task will have left us with a preempt_count of 2
+        * because it left us after:
+        *
+        *      schedule()
+        *        preempt_disable();                    // 1
+        *        __schedule()
+        *          raw_spin_lock_irq(&rq->lock)        // 2
+        *
+        * Also, see FORK_PREEMPT_COUNT.
+        */
+       if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
+                     "corrupted preempt_count: %s/%d/0x%x\n",
+                     current->comm, current->pid, preempt_count()))
+               preempt_count_set(FORK_PREEMPT_COUNT);
+
        rq->prev_mm = NULL;
 
        /*
@@ -885,6 +902,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
                kprobe_flush_task(prev);
                put_task_struct(prev);
        }
+       
+       return rq;
 }
 
 #ifdef CONFIG_SMP
@@ -916,24 +935,24 @@ static inline void post_schedule(struct rq *rq)
  * schedule_tail - first thing a freshly forked thread must call.
  * @prev: the thread we just switched away from.
  */
-asmlinkage void schedule_tail(struct task_struct *prev)
+asmlinkage __visible void schedule_tail(struct task_struct *prev)
        __releases(rq->lock)
 {
-       struct rq *rq = this_rq();
-
-       finish_task_switch(rq, prev);
+       struct rq *rq;
 
        /*
-        * FIXME: do we need to worry about rq being invalidated by the
-        * task_switch?
+        * New tasks start with FORK_PREEMPT_COUNT, see there and
+        * finish_task_switch() for details.
+        *
+        * finish_task_switch() will drop rq->lock() and lower preempt_count
+        * and the preempt_enable() will end up enabling preemption (on
+        * PREEMPT_COUNT kernels).
         */
-       post_schedule(rq);
-       arch_local_irq_enable();
 
-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
-       /* In this case, finish_task_switch does not reenable preemption */
+       rq = finish_task_switch(prev);
+       //balance_callback(rq);
        preempt_enable();
-#endif
+
        if (current->set_child_tid)
                put_user(task_pid_vnr(current), current->set_child_tid);
 }
@@ -1117,10 +1136,38 @@ void update_cpu_load_nohz(void)
  * kernel/sched/core.c:1207
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
  */
+//static inline
+//int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+//{
+//     int cpu = task_cpu(p);
+//
+//     /*
+//      * In order not to call set_task_cpu() on a blocking task we need
+//      * to rely on ttwu() to place the task on a valid ->cpus_allowed
+//      * cpu.
+//      *
+//      * Since this is common to all placement strategies, this lives here.
+//      *
+//      * [ this allows ->select_task() to simply return task_cpu(p) and
+//      *   not worry about this generic constraint ]
+//      */
+//     if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
+//                  !cpu_online(cpu)))
+//             cpu = cpumask_first(tsk_cpus_allowed(p)); //select_fallback_rq(task_cpu(p), p);
+//
+//     return cpu;
+//}
+/*
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
+ */
 static inline
-int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 {
-       int cpu = task_cpu(p);
+       lockdep_assert_held(&p->pi_lock);
+
+       //if (p->nr_cpus_allowed > 1)
+               //cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+       cpu = task_cpu(p);
 
        /*
         * In order not to call set_task_cpu() on a blocking task we need
@@ -1132,6 +1179,10 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
         * [ this allows ->select_task() to simply return task_cpu(p) and
         *   not worry about this generic constraint ]
         */
+       //if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
+       //           !cpu_online(cpu)))
+       //      cpu = select_fallback_rq(task_cpu(p), p);
+
        if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
                     !cpu_online(cpu)))
                cpu = cpumask_first(tsk_cpus_allowed(p)); //select_fallback_rq(task_cpu(p), p);
@@ -1139,6 +1190,9 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
        return cpu;
 }
 
+
+
+
 /*
  * kernel/sched/core.c:736
  */
@@ -1152,13 +1206,53 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
        }
 }
 
+static void
+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
+{
+#ifdef CONFIG_SCHEDSTATS
+//     struct rq *rq = this_rq();
+//
+//#ifdef CONFIG_SMP
+//     int this_cpu = smp_processor_id();
+//
+//     if (cpu == this_cpu) {
+//             schedstat_inc(rq, ttwu_local);
+//             schedstat_inc(p, se.statistics.nr_wakeups_local);
+//     } else {
+//             struct sched_domain *sd;
+//
+//             schedstat_inc(p, se.statistics.nr_wakeups_remote);
+//             rcu_read_lock();
+//             for_each_domain(this_cpu, sd) {
+//                     if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
+//                             schedstat_inc(sd, ttwu_wake_remote);
+//                             break;
+//                     }
+//             }
+//             rcu_read_unlock();
+//     }
+//
+//     if (wake_flags & WF_MIGRATED)
+//             schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+//
+//#endif /* CONFIG_SMP */
+//
+//     schedstat_inc(rq, ttwu_count);
+//     schedstat_inc(p, se.statistics.nr_wakeups);
+//
+//     if (wake_flags & WF_SYNC)
+//             schedstat_inc(p, se.statistics.nr_wakeups_sync);
+//
+#endif /* CONFIG_SCHEDSTATS */
+}
+
 /*
  * kernel/sched/core.c:1275
  */
-static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
+static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
 {
        activate_task(rq, p, en_flags);
-//     p->on_rq = 1;
+       //p->on_rq = TASK_ON_RQ_QUEUED;
 
        /* if a worker is waking up, notify workqueue */
        if (p->flags & PF_WQ_WORKER)
@@ -1241,7 +1335,8 @@ void wake_up_new_task(struct task_struct *p)
        struct rq *rq;
 
        raw_spin_lock_irqsave(&p->pi_lock, flags);
-
+       /* Initialize new task's runnable average */
+       //init_entity_runnable_average(&p->se);
 #ifdef CONFIG_SMP
        /*
         * Fork balancing, do it here and not earlier because:
@@ -1314,18 +1409,51 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
        cpu = task_cpu(p);
 
        if(((p->fw_task.state == FW_READY)||(p->fw_task.state == FW_RUNNING)) && ttwu_remote(p,wake_flags))
-               goto out;
+               goto stat;
 
 
 
 #ifdef CONFIG_SMP
        /*
-        * Pairs with the smp_wmb() in finish_lock_switch().
+        * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+        * possible to, falsely, observe p->on_cpu == 0.
+        *
+        * One must be running (->on_cpu == 1) in order to remove oneself
+        * from the runqueue.
+        *
+        *  [S] ->on_cpu = 1;   [L] ->on_rq
+        *      UNLOCK rq->lock
+        *                      RMB
+        *      LOCK   rq->lock
+        *  [S] ->on_rq = 0;    [L] ->on_cpu
+        *
+        * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
+        * from the consecutive calls to schedule(); the first switching to our
+        * task, the second putting it to sleep.
         */
        smp_rmb();
+
+       /*
+        * If the owning (remote) cpu is still in the middle of schedule() with
+        * this task as prev, wait until its done referencing the task.
+        */
+       while (p->on_cpu)
+               cpu_relax();
+       /*
+        * Combined with the control dependency above, we have an effective
+        * smp_load_acquire() without the need for full barriers.
+        *
+        * Pairs with the smp_store_release() in finish_lock_switch().
+        *
+        * This ensures that tasks getting woken will be fully ordered against
+        * their previous state and preserve Program Order.
+        */
+       smp_rmb();
+
+       p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
 
-       cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+       cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
        if (task_cpu(p) != cpu) {
                wake_flags |= WF_MIGRATED;
                set_task_cpu(p, cpu);
@@ -1333,8 +1461,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 #endif /* CONFIG_SMP */
 
        ttwu_queue(p, cpu);
+stat:
+       ttwu_stat(p, cpu, wake_flags);
 out:
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
        return success;
 }
 
@@ -1380,7 +1511,7 @@ out:
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-void
+inline struct rq *
 context_switch(struct rq *rq, struct task_struct *prev,
               struct task_struct *next)
 {
@@ -1409,9 +1540,8 @@ context_switch(struct rq *rq, struct task_struct *prev,
         * of the scheduler it's an obvious special-case), so we
         * do an early lockdep release here:
         */
-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
+       lockdep_unpin_lock(&rq->lock);
        spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
-#endif
 
        /* Here we just switch the register state and the stack. */
        switch_to(prev, next, prev);
@@ -1422,7 +1552,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
         * CPUs since it called schedule(), thus the 'rq' on its stack
         * frame will be invalid.
         */
-       finish_task_switch(this_rq(), prev);
+       return finish_task_switch(prev);
 
 }
 
@@ -1485,24 +1615,46 @@ void rq_list_print(void)
  *          - return from syscall or exception to user-space
  *          - return from interrupt-handler to user-space
  */
-static void /*__sched*/ __schedule(void)
+static void /*__sched*/ __schedule(bool preempt)
 {
        struct task_struct *prev;//, *next, *fw_next;
        struct rq *rq;
        int cpu;
 
-       preempt_disable();
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
        rcu_note_context_switch();
        prev = rq->curr;
 
+       /*
+        * do_exit() calls schedule() with preemption disabled as an exception;
+        * however we must fix that up, otherwise the next task will see an
+        * inconsistent (higher) preempt count.
+        *
+        * It also avoids the below schedule_debug() test from complaining
+        * about this.
+        */
+       if (unlikely(prev->state == TASK_DEAD))
+               preempt_enable_no_resched_notrace();
+       
+//     schedule_debug(prev);
+       
+//     if (sched_feat(HRTICK))
+//             hrtick_clear(rq);
+       
+       /*
+        * Make sure that signal_pending_state()->signal_pending() below
+        * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
+        * done by the caller to avoid the race with signal_wake_up().
+        */
+       smp_mb__before_spinlock();
        raw_spin_lock_irq(&rq->lock);
+       lockdep_pin_lock(&rq->lock);
 
 //     if(cpu == 1)
 //             printk("___!!!___\t%s\t acquired lock %p in schedule. Interrupts are %s\n", prev->comm, &rq->lock, arch_irqs_disabled() ? "off" : "on");
 
-       if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
+       if (!preempt && prev->state) {
                if (unlikely(signal_pending_state(prev->state, prev))) {
                        prev->state = TASK_RUNNING;
                }
@@ -1542,16 +1694,37 @@ static void /*__sched*/ __schedule(void)
 
 }
 
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+       if (!tsk->state || tsk_is_pi_blocked(tsk))
+                       return;
+       /*
+        * If we are going to sleep and we have plugged IO queued,
+        * make sure to submit it to avoid deadlocks.
+        */
+       if (blk_needs_flush_plug(tsk))
+                       blk_schedule_flush_plug(tsk);
+}
+
+
 /*
  * kernel/sched/core.c:2966
  */
-asmlinkage void __sched schedule(void)
+asmlinkage __visible void __sched schedule(void)
 {
-       __schedule();
+       struct task_struct *tsk = current;
+       
+       sched_submit_work(tsk);
+       do {
+               preempt_disable();
+               __schedule(false);
+               sched_preempt_enable_no_resched();
+       } while (need_resched());
 }
 EXPORT_SYMBOL(schedule);
 
 
+
 /**
  * kernel/sched/core.c:1536
  * wake_up_process - Wake up a specific process
@@ -1566,7 +1739,6 @@ EXPORT_SYMBOL(schedule);
  */
 int wake_up_process(struct task_struct *p)
 {
-       WARN_ON(task_is_stopped_or_traced(p));
        return try_to_wake_up(p, TASK_NORMAL, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
@@ -1628,6 +1800,21 @@ void __sched schedule_preempt_disabled(void)
        preempt_disable();
 }
 
+static void __sched notrace preempt_schedule_common(void)
+{
+       do {
+               preempt_disable_notrace();
+               __schedule(true);
+               preempt_enable_no_resched_notrace();
+               
+               /*
+                * Check again in case we missed a preemption opportunity
+                * between schedule and now.
+                */
+       } while (need_resched());
+}
+
+
 /*
  * kernel/sched/core.c:6858
  */
@@ -1650,17 +1837,10 @@ int __cond_resched_lock(spinlock_t *lock)
        return 0;
 }
 
-static void __cond_resched(void)
-{
-       __preempt_count_add(PREEMPT_ACTIVE);
-       __schedule();
-       __preempt_count_sub(PREEMPT_ACTIVE);
-}
-
 int __sched _cond_resched(void)
 {
        if (should_resched(0)) {
-               __cond_resched();
+               preempt_schedule_common();
                return 1;
        }
        return 0;
@@ -1681,10 +1861,8 @@ EXPORT_SYMBOL(default_wake_function);
  */
 int wake_up_state(struct task_struct *p, unsigned int state)
 {
-       WARN_ON(task_is_stopped_or_traced(p));
        return try_to_wake_up(p, state, 0);
 }
-EXPORT_SYMBOL(wake_up_process);
 
 /**
  * kernel/sched/core.c:4389
@@ -1986,10 +2164,22 @@ static struct notifier_block migration_notifier = {
 static int sched_cpu_active(struct notifier_block *nfb,
                                      unsigned long action, void *hcpu)
 {
+       int cpu = (long)hcpu;
+
        switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_STARTING:
+               return NOTIFY_OK;
+       case CPU_ONLINE:
+               /*
+                * At this point a starting CPU has marked itself as online via
+                * set_cpu_online(). But it might not yet have marked itself
+                * as active, which is essential from here on.
+                */
+               set_cpu_active(cpu, true);
+               stop_machine_unpark(cpu);
+               return NOTIFY_OK;               
        case CPU_DOWN_FAILED:
-               set_cpu_active((long)hcpu, true);
+               set_cpu_active(cpu, true);
                return NOTIFY_OK;
        default:
                return NOTIFY_DONE;
@@ -2085,16 +2275,20 @@ void sched_ttwu_pending(void)
        struct rq *rq = this_rq();
        struct llist_node *llist = llist_del_all(&rq->wake_list);
        struct task_struct *p;
+       unsigned long flags;
 
-       raw_spin_lock(&rq->lock);
-
+       if (!llist)
+               return;
+       
+       raw_spin_lock_irqsave(&rq->lock, flags);
+       
        while (llist) {
                p = llist_entry(llist, struct task_struct, wake_entry);
                llist = llist_next(llist);
                ttwu_do_activate(rq, p, 0);
        }
 
-       raw_spin_unlock(&rq->lock);
+       raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
 void scheduler_ipi(void)
@@ -2275,7 +2469,7 @@ void sched_exec(void)
        int dest_cpu;
 
        raw_spin_lock_irqsave(&p->pi_lock, flags);
-       dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
+       dest_cpu = select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
        if (dest_cpu == smp_processor_id())
                goto unlock;
 
index ea36d5a..a997663 100644 (file)
@@ -28,6 +28,7 @@ void os_dispatch(struct fw_task *task)
        }
        rq->curr = next;
        if(next == prev){
+               lockdep_unpin_lock(&rq->lock);
                raw_spin_unlock(&rq->lock);
                return;
        }