SMP is booting up but stuck in userland. Maybe some process did not come up on the...
authorJens Krieg <jkrieg@mailbox.tu-berlin.de>
Thu, 15 Aug 2013 19:08:14 +0000 (21:08 +0200)
committerJens Krieg <jkrieg@mailbox.tu-berlin.de>
Thu, 15 Aug 2013 19:08:14 +0000 (21:08 +0200)
kernel/kthread.c
kernel/sched.new/core.c
kernel/sched.new/sched.h
kernel/sched/core.c

index 8021955..bda7fa4 100644 (file)
@@ -264,6 +264,10 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
                WARN_ON(1);
                return;
        }
+
+//     if (cpu == 1)
+//             printk("Tsk(%d) bound to CPU 1\n", p->pid);
+
        /* It's safe because the task is inactive. */
        do_set_cpus_allowed(p, cpumask_of(cpu));
        p->flags |= PF_THREAD_BOUND;
index 8ecf0fa..3bc89f6 100644 (file)
@@ -8,21 +8,6 @@
  *
  */
 
-/**
- * TODO:  2)        Input from register are not printed on the screen although corresponding
- *                  interrupts are called. After the serial interrupt a thread (0xffff880007054cc0)
- *                  is activated, which is probably responsible for the output. In our case that thread
- *                  is already running, which should be not a problem. At the end a tty function (n_tty_write)
- *                  must be called, which is not, in order to print the input.
- *
- *        3)        While boot up we encounter a kernel stack overflow, which seems to come randomly. Linux
- *                  stack overflow checker gives us a warning but its to late to back trace.
- *                  Maybe we should fix this one first in order to be sure that there are no side effects produced
- *                  by this "error".
- *
- *        4)        We need to strip down clock.c since we use the default linux implementation.
- */
-
 #include <linux/init.h>
 #include <asm/mmu_context.h>
 #include <linux/completion.h>
 #include <linux/export.h>
 #include <linux/context_tracking.h>
 #include <linux/kthread.h>
+#include <linux/init_task.h>
 
 #include <asm/switch_to.h>
+#include <asm/tlb.h>
 #include <linux/cgroup.h>
 #include "sched.h"
 #include "../workqueue_internal.h"
+#include "../smpboot.h"
 
 
 //
@@ -86,6 +74,25 @@ int sysctl_sched_rt_runtime = 950000;
  */
 struct task_group root_task_group;
 
+/*
+ * /kernel/sched/core.c:6582
+ * Special case: If a kmalloc of a doms_cur partition (array of
+ * cpumask) fails, then fallback to a single sched domain,
+ * as determined by the single cpumask fallback_doms.
+ */
+static cpumask_var_t fallback_doms;
+
+/*
+ * /kernel/sched/core.c:5682
+ * cpus with isolated domains
+ */
+static cpumask_var_t cpu_isolated_map;
+
+/*
+ * /kernel/sched/core.c:5323
+ */
+DEFINE_PER_CPU(int, sd_llc_id);
+
 /*
  * /kernel/sched/core.c:2623
  * unknown
@@ -102,6 +109,7 @@ __read_mostly int scheduler_running;
 /*
  * kernel/sched/core.c:113
  */
+DEFINE_MUTEX(sched_domains_mutex);
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
 
@@ -151,11 +159,11 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
        }
 }
 
-//static void __task_rq_unlock(struct rq *rq)
-//     __releases(rq->lock)
-//{
-//     raw_spin_unlock(&rq->lock);
-//}
+static void __task_rq_unlock(struct rq *rq)
+       __releases(rq->lock)
+{
+       raw_spin_unlock(&rq->lock);
+}
 
 static inline void
 task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
@@ -194,6 +202,14 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
 void sched_init(void)
 {
        int i;
+       unsigned long alloc_size = 0, ptr;
+
+#ifdef CONFIG_CPUMASK_OFFSTACK
+       alloc_size += num_possible_cpus() * cpumask_size();
+#endif
+       if (alloc_size) {
+               ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
+       }
 
        // TODO: SMP
 //#ifdef CONFIG_SMP
@@ -207,10 +223,24 @@ void sched_init(void)
                struct rq *rq;
 
                rq = cpu_rq(i);
-
+               raw_spin_lock_init(&rq->lock);
+               rq->nr_running = 0;
                INIT_LIST_HEAD(&rq->rq_list);
 
-//#ifdef CONFIG_SMP
+//             rq->calc_load_active = 0;
+//             rq->calc_load_update = jiffies + LOAD_FREQ;
+
+//             init_cfs_rq(&rq->cfs);
+//             init_rt_rq(&rq->rt, rq);
+
+//             rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
+
+//             for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
+//                     rq->cpu_load[j] = 0;
+
+//             rq->last_load_update_tick = jiffies;
+
+#ifdef CONFIG_SMP
 //             rq->sd = NULL;
 //             rq->rd = NULL;
 ////           rq->cpu_power = SCHED_POWER_SCALE;
@@ -218,8 +248,8 @@ void sched_init(void)
 ////           rq->active_balance = 0;
 ////           rq->next_balance = jiffies;
 //             rq->push_cpu = 0;
-//             rq->cpu = i;
-//             rq->online = 0;
+               rq->cpu = i;
+               rq->online = 0;
 ////           rq->idle_stamp = 0;
 ////           rq->avg_idle = 2*sysctl_sched_migration_cost;
 //
@@ -229,26 +259,7 @@ void sched_init(void)
 //#ifdef CONFIG_NO_HZ
 //             rq->nohz_flags = 0;
 //#endif
-//#endif
-
-               raw_spin_lock_init(&rq->lock);
-               rq->nr_running = 0;
-
-//             rq->calc_load_active = 0;
-//             rq->calc_load_update = jiffies + LOAD_FREQ;
-
-//             init_cfs_rq(&rq->cfs);
-//             init_rt_rq(&rq->rt, rq);
-
-//             rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
-
-//             for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
-//                     rq->cpu_load[j] = 0;
-
-//             rq->last_load_update_tick = jiffies;
-
-               // TODO: SMP
-
+#endif
 //             init_rq_hrtick(rq);
                atomic_set(&rq->nr_iowait, 0);
        }
@@ -276,8 +287,9 @@ void sched_init(void)
         */
 //     current->sched_class = &fair_sched_class;
 
-       // TODO: SMP
-
+#ifdef CONFIG_SMP
+       idle_thread_set_boot_cpu();
+#endif
 //     init_sched_fair_class();
 
        scheduler_running = 1;
@@ -319,38 +331,22 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 EXPORT_SYMBOL(__might_sleep);
 #endif
 
-/**
- * /kernel/sched/core.c:4674
- * init_idle - set up an idle thread for a given CPU
- * @idle: task in question
- * @cpu: cpu the idle task belongs to
+/*
+ * kernel/sched/core.c:1560
+ * Perform scheduler related setup for a newly forked process p.
+ * p is forked by current.
  *
- * NOTE: this function does not set the idle thread's NEED_RESCHED
- * flag, to make booting more robust.
+ * __sched_fork() is basic setup used by init_idle() too:
  */
-void __cpuinit init_idle(struct task_struct *idle, int cpu)
+static void __sched_fork(struct task_struct *p)
 {
-       struct rq *rq = cpu_rq(cpu);
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&rq->lock, flags);
-
-       sched_fork(idle);
-       idle->state = TASK_RUNNING;
-       idle->se.exec_start = sched_clock();
-
-       // TODO: SMP
-
-       rq->curr = rq->idle = idle;
-
-       // TODO: SMP
+       p->on_rq                                        = 0;
 
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
-
-       /* Set the preempt count _outside_ the spinlocks! */
-       task_thread_info(idle)->preempt_count = 0;
-
-       // TODO: SMP
+       p->se.on_rq                                     = 0;
+       p->se.exec_start                        = 0;
+       p->se.sum_exec_runtime          = 0;
+       p->se.prev_sum_exec_runtime     = 0;
+       p->se.vruntime                          = 0;
 }
 
 /*
@@ -359,17 +355,10 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
  */
 void sched_fork(struct task_struct *p)
 {
-       get_cpu();
-
-       // TODO: SMP, see __sched_fork
-
-       p->on_rq                        = 0;
+       unsigned long flags;
+       int cpu = get_cpu();
 
-       p->se.on_rq                     = 0;
-       p->se.exec_start                = 0;
-       p->se.sum_exec_runtime          = 0;
-       p->se.prev_sum_exec_runtime     = 0;
-       p->se.vruntime                  = 0;
+       __sched_fork(p);
 
        /*
         * We mark the process as running here. This guarantees that
@@ -383,6 +372,13 @@ void sched_fork(struct task_struct *p)
         */
        p->prio = current->normal_prio;
 
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
+       set_task_cpu(p, cpu);
+       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+#if defined(CONFIG_SMP)
+       p->on_cpu = 0;
+#endif
 #ifdef CONFIG_PREEMPT_COUNT
        /* Want to start with kernel preemption disabled. */
        task_thread_info(p)->preempt_count = 1;
@@ -391,6 +387,55 @@ void sched_fork(struct task_struct *p)
        put_cpu();
 }
 
+/**
+ * /kernel/sched/core.c:4674
+ * init_idle - set up an idle thread for a given CPU
+ * @idle: task in question
+ * @cpu: cpu the idle task belongs to
+ *
+ * NOTE: this function does not set the idle thread's NEED_RESCHED
+ * flag, to make booting more robust.
+ */
+void __cpuinit init_idle(struct task_struct *idle, int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&rq->lock, flags);
+
+       __sched_fork(idle);
+       idle->state = TASK_RUNNING;
+       idle->se.exec_start = sched_clock();
+
+       do_set_cpus_allowed(idle, cpumask_of(cpu));
+       /*
+        * We're having a chicken and egg problem, even though we are
+        * holding rq->lock, the cpu isn't yet set to this cpu so the
+        * lockdep check in task_group() will fail.
+        *
+        * Similar case to sched_fork(). / Alternatively we could
+        * use task_rq_lock() here and obtain the other rq->lock.
+        *
+        * Silence PROVE_RCU
+        */
+       rcu_read_lock();
+       __set_task_cpu(idle, cpu);
+       rcu_read_unlock();
+
+       rq->curr = rq->idle = idle;
+#if defined(CONFIG_SMP)
+       idle->on_cpu = 1;
+#endif
+       raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+       /* Set the preempt count _outside_ the spinlocks! */
+       task_thread_info(idle)->preempt_count = 0;
+
+#if defined(CONFIG_SMP)
+       sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
+#endif
+}
+
 /*
  * /kernel/sched/cputime.c:436
  * Account multiple ticks of idle time.
@@ -487,11 +532,7 @@ int can_nice(const struct task_struct *p, const int nice)
  */
 struct task_struct *idle_task(int cpu)
 {
-       printk("\ntask_struct");
-
-       // TODO: SMP, return the CPU
-
-       return 0;
+       return cpu_rq(cpu)->idle;
 }
 
 /**
@@ -501,11 +542,20 @@ struct task_struct *idle_task(int cpu)
  */
 int idle_cpu(int cpu)
 {
-       printk("\nidle_cpu");
+       struct rq *rq = cpu_rq(cpu);
 
-       // TODO: SMP
+       if (rq->curr != rq->idle)
+               return 0;
 
-       return 0;
+       if (rq->nr_running)
+               return 0;
+
+#ifdef CONFIG_SMP
+       if (!llist_empty(&rq->wake_list))
+               return 0;
+#endif
+
+       return 1;
 }
 
 /*
@@ -640,18 +690,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
        return 0;
 }
 
-/**
- * kernel/sched/core.c:6850
- */
-void sched_init_smp(void)
-{
-       //printk("\nsched_init_smp");
-
-       // TODO: SMP
-
-       return;
-}
-
 /**
  * kernel/sched/core.c:7571
  */
@@ -729,10 +767,49 @@ void sched_show_task(struct task_struct *p)
  */
 void resched_task(struct task_struct *p)
 {
+       int cpu;
+
        assert_raw_spin_locked(&task_rq(p)->lock);
+
+       if (test_tsk_need_resched(p))
+               return;
+
        set_tsk_need_resched(p);
 
-       // TODO: SMP
+       cpu = task_cpu(p);
+       if (cpu == smp_processor_id())
+               return;
+
+       /* NEED_RESCHED must be visible before we test polling */
+       smp_mb();
+       if (!tsk_is_polling(p))
+               smp_send_reschedule(cpu);
+}
+
+/**
+ * kernel/sched/core.c:1806
+ * prepare_task_switch - prepare to switch tasks
+ * @rq: the runqueue preparing to switch
+ * @prev: the current task that is being switched out
+ * @next: the task we are going to switch to.
+ *
+ * This is called with the rq lock held and interrupts off. It must
+ * be paired with a subsequent finish_task_switch after the context
+ * switch.
+ *
+ * prepare_task_switch sets up locking and calls architecture specific
+ * hooks.
+ */
+static inline void
+prepare_task_switch(struct rq *rq, struct task_struct *prev,
+                   struct task_struct *next)
+{
+//     trace_sched_switch(prev, next);
+//     sched_info_switch(prev, next);
+//     perf_event_task_sched_out(prev, next);
+//     fire_sched_out_preempt_notifiers(prev, next);
+       prepare_lock_switch(rq, next);
+//     prepare_arch_switch(next);
 }
 
 /**
@@ -774,24 +851,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
        vtime_task_switch(prev);
 //     finish_arch_switch(prev);
 //     perf_event_task_sched_in(prev, current);
-//     finish_lock_switch(rq, prev);
-
-       // TODO: SMP, in finish_lock_switch
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-       /* this is a valid case when another task releases the spinlock */
-       rq->lock.owner = current;
-#endif
-       /*
-        * If we are tracking spinlock dependencies then we have to
-        * fix up the runqueue lock - which gets 'carried over' from
-        * prev into current:
-        */
-       spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-
-       raw_spin_unlock_irq(&rq->lock);
-
-//     finish_arch_post_lock_switch();
+       finish_lock_switch(rq, prev);
+       finish_arch_post_lock_switch();
 
 //     fire_sched_in_preempt_notifiers(current);
        if (mm)
@@ -937,7 +998,6 @@ void update_rq_clock(struct rq *rq)
        update_rq_clock_task(rq, delta);
 }
 
-
 /*
  * kernel/sched/core.c:2684
  * This function gets called by the timer code, with HZ frequency.
@@ -978,7 +1038,7 @@ void scheduler_tick(void)
 
        raw_spin_unlock(&rq->lock);
 
-       // TODO: SMP
+       // TODO: SMP for load balancing
 }
 
 /*
@@ -987,7 +1047,7 @@ void scheduler_tick(void)
  */
 unsigned long long task_delta_exec(struct task_struct *p)
 {
-       //printk("\ntask_delta_exec");
+       printk("\ntask_delta_exec");
 
        // TODO: SMP
 
@@ -1047,6 +1107,173 @@ void update_cpu_load_nohz(void)
        return;
 }
 
+
+/*
+ * kernel/sched/core.c:1207
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
+ */
+static inline
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+{
+       int cpu = task_cpu(p);
+//     int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+
+       /*
+        * In order not to call set_task_cpu() on a blocking task we need
+        * to rely on ttwu() to place the task on a valid ->cpus_allowed
+        * cpu.
+        *
+        * Since this is common to all placement strategies, this lives here.
+        *
+        * [ this allows ->select_task() to simply return task_cpu(p) and
+        *   not worry about this generic constraint ]
+        */
+       if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
+                    !cpu_online(cpu)))
+               cpu = cpumask_first(tsk_cpus_allowed(p)); //select_fallback_rq(task_cpu(p), p);
+
+       return cpu;
+}
+
+/*
+ * kernel/sched/core.c:736
+ */
+void activate_task(struct rq *rq, struct task_struct *p, int flags)
+{
+       if (task_contributes_to_load(p))
+               rq->nr_uninterruptible--;
+
+//     enqueue_task(rq, p, flags);
+       list_add(&p->rq_tasks, &rq->rq_list);
+}
+
+/*
+ * kernel/sched/core.c:744
+ */
+void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
+{
+       if (task_contributes_to_load(p))
+               rq->nr_uninterruptible++;
+
+//     dequeue_task(rq, p, flags);
+       list_del(&p->rq_tasks);
+}
+
+/*
+ * kernel/sched/core.c:1275
+ */
+static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
+{
+       activate_task(rq, p, en_flags);
+       p->on_rq = 1;
+
+       /* if a worker is waking up, notify workqueue */
+       if (p->flags & PF_WQ_WORKER)
+               wq_worker_waking_up(p, cpu_of(rq));
+}
+
+/*
+ * kernel/sched/core.c:909
+ */
+void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
+{
+//     const struct sched_class *class;
+//
+//     if (p->sched_class == rq->curr->sched_class) {
+//             rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+//     } else {
+//             for_each_class(class) {
+//                     if (class == rq->curr->sched_class)
+//                             break;
+//                     if (class == p->sched_class) {
+//                             resched_task(rq->curr);
+//                             break;
+//                     }
+//             }
+//     }
+       if (rq->curr == rq->idle)
+                       resched_task(rq->curr);
+
+       /*
+        * A queue event has occurred, and we're going to schedule.  In
+        * this case, we can save a useless back to back clock update.
+        */
+       if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
+               rq->skip_clock_update = 1;
+}
+
+/*
+ * kernel/sched/core:1289
+ * Mark the task runnable and perform wakeup-preemption.
+ */
+static void
+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
+{
+//     trace_sched_wakeup(p, true);
+       check_preempt_curr(rq, p, wake_flags);
+
+       p->state = TASK_RUNNING;
+//#ifdef CONFIG_SMP
+//     if (p->sched_class->task_woken)
+//             p->sched_class->task_woken(rq, p);
+//
+//     if (rq->idle_stamp) {
+//             u64 delta = rq->clock - rq->idle_stamp;
+//             u64 max = 2*sysctl_sched_migration_cost;
+//
+//             if (delta > max)
+//                     rq->avg_idle = max;
+//             else
+//                     update_avg(&rq->avg_idle, delta);
+//             rq->idle_stamp = 0;
+//     }
+//#endif
+}
+
+/*
+ * kernel/sched/core.c:1313
+ */
+static void
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
+{
+#ifdef CONFIG_SMP
+       if (p->sched_contributes_to_load)
+               rq->nr_uninterruptible--;
+#endif
+
+       ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
+       ttwu_do_wakeup(rq, p, wake_flags);
+}
+
+/*
+ * kernel/sched/core.c:1394
+ */
+static void ttwu_queue_remote(struct task_struct *p, int cpu)
+{
+       if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list))
+               smp_send_reschedule(cpu);
+}
+
+/*
+ * kernel/sched/core.c:1406
+ */
+static void ttwu_queue(struct task_struct *p, int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+
+#if defined(CONFIG_SMP)
+       if (/*sched_feat(TTWU_QUEUE) && */!cpus_share_cache(smp_processor_id(), cpu)) {
+               sched_clock_cpu(cpu); /* sync clocks x-cpu */
+               ttwu_queue_remote(p, cpu);
+               return;
+       }
+#endif
+
+       raw_spin_lock(&rq->lock);
+       ttwu_do_activate(rq, p, 0);
+       raw_spin_unlock(&rq->lock);
+}
+
 /*
  * kernel/sched/core.c:1703
  * wake_up_new_task - wake up a newly created task for the first time.
@@ -1055,33 +1282,62 @@ void update_cpu_load_nohz(void)
  * that must be done for every newly created context, then puts the task
  * on the runqueue and wakes it.
  */
-void wake_up_new_task(struct task_struct *tsk)
+void wake_up_new_task(struct task_struct *p)
 {
        unsigned long flags;
-       struct rq *rq = cpu_rq(0);
+       struct rq *rq;
+//     int cpu = 255;
 
-       raw_spin_lock_irqsave(&tsk->pi_lock, flags);
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
 
-       // TODO: SMP
+#ifdef CONFIG_SMP
+       /*
+        * Fork balancing, do it here and not earlier because:
+        *  - cpus_allowed can change in the fork path
+        *  - any previously selected cpu might disappear through hotplug
+        */
+       set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
+//     printk("new thread\n");
+//     for_each_cpu(cpu, &(p->cpus_allowed)){
+//             printk("Asked for CPU #%d\n", cpu);
+//     }
 
-       raw_spin_lock(&rq->lock);
+#endif
 
-//     if (list_empty(&rq->rq_list)) {
-//             printk("Liste leer (wake_up_new_task) -> Task: %p\n", (void*)tsk);
-//     }
-       list_add(&tsk->rq_tasks, &rq->rq_list);
-       tsk->on_rq = 1;
+       rq = __task_rq_lock(p);
+       activate_task(rq, p, 0);
+       p->on_rq = 1;
+//     trace_sched_wakeup_new(p, true);
+       check_preempt_curr(rq, p, WF_FORK);
+//#ifdef CONFIG_SMP
+//     if (p->sched_class->task_woken)
+//             p->sched_class->task_woken(rq, p);
+//#endif
+       task_rq_unlock(rq, p, &flags);
+}
 
-       if (rq->curr == rq->idle)
-               resched_task(rq->curr);
+/*
+ * kernel/sched/core:1330
+ * Called in case the task @p isn't fully descheduled from its runqueue,
+ * in this case we must do a remote wakeup. Its a 'light' wakeup though,
+ * since all we need to do is flip p->state to TASK_RUNNING, since
+ * the task is still ->on_rq.
+ */
+static int ttwu_remote(struct task_struct *p, int wake_flags)
+{
+       struct rq *rq;
+       int ret = 0;
 
-       // TODO: SMP
+       rq = __task_rq_lock(p);
+       if (p->on_rq) {
+               ttwu_do_wakeup(rq, p, wake_flags);
+               ret = 1;
+       }
+       __task_rq_unlock(rq);
 
-       raw_spin_unlock(&rq->lock);
-       raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
+       return ret;
 }
 
-
 /**
  * kernel/sched/core.c:1439
  * try_to_wake_up - wake up a thread
@@ -1103,9 +1359,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 {
        unsigned long flags;
        int cpu, success = 0;
-       int ret = 0;
-
-       struct rq *rq = cpu_rq(0);
 
        smp_wmb();
        raw_spin_lock_irqsave(&p->pi_lock, flags);
@@ -1115,36 +1368,38 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
        success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);
 
-       raw_spin_lock(&rq->lock);
-       assert_raw_spin_locked(&rq->lock);
-
-       if (p->on_rq) {
-               if (rq->curr == rq->idle)
-                       resched_task(rq->curr);
-               ret = 1;
-       }
-
-       if (ret)
+       if (p->on_rq && ttwu_remote(p, wake_flags))
                goto stat;
 
-       // TODO: SMP
+#ifdef CONFIG_SMP
+       /*
+        * If the owning (remote) cpu is still in the middle of schedule() with
+        * this task as prev, wait until its done referencing the task.
+        */
+       while (p->on_cpu)
+               cpu_relax();
+       /*
+        * Pairs with the smp_wmb() in finish_lock_switch().
+        */
+       smp_rmb();
 
-       p->on_rq = 1;
-       p->state = TASK_RUNNING;
+//     p->sched_contributes_to_load = !!task_contributes_to_load(p);
+       p->state = TASK_WAKING;
 
-//     if (list_empty(&rq->rq_list)) {
-//             printk("Liste leer (try_to_wake_up) -> Task: %p\n", (void*)p);
-//     }
-       list_add(&p->rq_tasks, &rq->rq_list);
+//     if (p->sched_class->task_waking)
+//             p->sched_class->task_waking(p);
 
-       if (rq->curr == rq->idle)
-               resched_task(rq->curr);
+       // TODO: simply not using select_task_rq :)
+       cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+       if (task_cpu(p) != cpu) {
+               wake_flags |= WF_MIGRATED;
+               set_task_cpu(p, cpu);
+       }
+#endif /* CONFIG_SMP */
 
-       /* if a worker is waking up, notify workqueue */
-       if (p->flags & PF_WQ_WORKER)
-               wq_worker_waking_up(p, 0);
+       ttwu_queue(p, cpu);
 stat:
-       raw_spin_unlock(&rq->lock);
+//     raw_spin_unlock(&rq->lock);
 out:
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
@@ -1170,8 +1425,6 @@ static void try_to_wake_up_local(struct task_struct *p)
 
        lockdep_assert_held(&rq->lock);
 
-       assert_raw_spin_locked(&rq->lock);
-
        if (!raw_spin_trylock(&p->pi_lock)) {
                raw_spin_unlock(&rq->lock);
                raw_spin_lock(&p->pi_lock);
@@ -1181,24 +1434,11 @@ static void try_to_wake_up_local(struct task_struct *p)
        if (!(p->state & TASK_NORMAL))
                goto out;
 
-       p->on_rq = 1;
-       p->state = TASK_RUNNING;
-
-       if (!&rq->lock) {
-               printk("Lock not set!!!!!\n");
-       }
-
-       list_add(&p->rq_tasks, &rq->rq_list);
-
-       /* if a worker is waking up, notify workqueue */
-       if (p->flags & PF_WQ_WORKER)
-               wq_worker_waking_up(p, cpu_of(rq));
-
-       if (rq->curr == rq->idle)
-               resched_task(rq->curr);
-
-       // TODO: SMP, used in original but just for statistics
+       if (!p->on_rq)
+               ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
+       ttwu_do_wakeup(rq, p, 0);
+//     ttwu_stat(p, smp_processor_id(), 0);
 out:
        raw_spin_unlock(&p->pi_lock);
 }
@@ -1214,7 +1454,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 {
        struct mm_struct *mm, *oldmm;
 
-//     prepare_task_switch(rq, prev, next);
+       prepare_task_switch(rq, prev, next);
 
        mm = next->mm;
        oldmm = prev->active_mm;
@@ -1299,6 +1539,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
  *          - return from syscall or exception to user-space
  *          - return from interrupt-handler to user-space
  */
+volatile struct rq *rq1, *rq2;
 volatile struct task_struct test_tsk;
 static void __sched __schedule(void)
 {
@@ -1314,6 +1555,14 @@ need_resched:
        rcu_note_context_switch(cpu);
        prev = rq->curr;
 
+
+       if (cpu == 0)
+                       rq1 = rq;
+               else if (cpu == 1)
+                       rq2 = rq;
+               else
+                       printk("WTF!!!\n");
+
 //     schedule_debug(prev);
 
 //     if (sched_feat(HRTICK))
@@ -1354,32 +1603,36 @@ need_resched:
 //             idle_balance(cpu, rq);
 
 //     put_prev_task(rq, prev);
-//     next = pick_next_task(rq);
-
-       if (prev != rq->idle && prev->on_rq)
+       if ((prev != rq->idle) && prev->on_rq) {
                list_add_tail(&prev->rq_tasks, &rq->rq_list);
+       }
 
        /*      In case the only runnable task gets deactivated, we need to schedule
         *      the idle tasks.
         */
-       if(!list_empty(&rq->rq_list)) {
+//     next = pick_next_task(rq);
+       if (!list_empty(&rq->rq_list)) {
                assert_raw_spin_locked(&rq->lock);
                next = list_first_entry(&rq->rq_list, struct task_struct, rq_tasks);
                list_del(&next->rq_tasks);
-//             list_add_tail(&next->rq_tasks, &rq->rq_list);
        }
        else {
                next = rq->idle;
-//             printk("Liste leer (schedule)\n");
        }
        next->se.exec_start = rq->clock_task;
 
 
        clear_tsk_need_resched(prev);
-//     rq->skip_clock_update = 0;
+       rq->skip_clock_update = 0;
 
-//     if (next->pid == 3)
-//             printk("task(3) scheduled\n");
+
+//     if ((next->pid == 12) ||
+//             (next->pid == 13) ||
+//             (next->pid == 14))
+//             printk("tolle wurst!");
+//
+//     if (task_cpu(next) == 1)
+//             printk("Task (%i) on CPU 1\n", next->pid);
 
        if (likely(prev != next)) {
                rq->nr_switches++;
@@ -1971,120 +2224,189 @@ EXPORT_SYMBOL(wait_for_completion_timeout);
  */
 #ifdef CONFIG_SMP
 
-///*
-// * kernel/sched/core.c:4822
-// * Move (not current) task off this cpu, onto dest cpu. We're doing
-// * this because either it can't run here any more (set_cpus_allowed()
-// * away from this CPU, or CPU going down), or because we're
-// * attempting to rebalance this task on exec (sched_exec).
-// *
-// * So we race with normal scheduler movements, but that's OK, as long
-// * as the task is no longer on this CPU.
-// *
-// * Returns non-zero if task was successfully migrated.
-// */
-//static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
-//{
-//     struct rq *rq_dest, *rq_src;
-//     int ret = 0;
-//
-//     if (unlikely(!cpu_active(dest_cpu)))
-//             return ret;
-//
-//     rq_src = cpu_rq(src_cpu);
-//     rq_dest = cpu_rq(dest_cpu);
-//
-//     raw_spin_lock(&p->pi_lock);
-//     double_rq_lock(rq_src, rq_dest);
-//     /* Already moved. */
-//     if (task_cpu(p) != src_cpu)
-//             goto done;
-//     /* Affinity changed (again). */
-//     if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
-//             goto fail;
-//
-//     /*
-//      * If we're not on a rq, the next wake-up will ensure we're
-//      * placed properly.
-//      */
-//     if (p->on_rq) {
-////           dequeue_task(rq_src, p, 0);
-////           list_del(&next->rq_tasks);
-////           set_task_cpu(p, dest_cpu);
-////           enqueue_task(rq_dest, p, 0);
-////           check_preempt_curr(rq_dest, p, 0);
-//     }
-//done:
-//     ret = 1;
-//fail:
-//     double_rq_unlock(rq_src, rq_dest);
-//     raw_spin_unlock(&p->pi_lock);
-//     return ret;
-//}
+struct migration_arg {
+       struct task_struct *task;
+       int dest_cpu;
+};
+
+/*
+ * kernel/sched/core.c:4822
+ * Move (not current) task off this cpu, onto dest cpu. We're doing
+ * this because either it can't run here any more (set_cpus_allowed()
+ * away from this CPU, or CPU going down), or because we're
+ * attempting to rebalance this task on exec (sched_exec).
+ *
+ * So we race with normal scheduler movements, but that's OK, as long
+ * as the task is no longer on this CPU.
+ *
+ * Returns non-zero if task was successfully migrated.
+ */
+static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+{
+       struct rq *rq_dest, *rq_src;
+       int ret = 0;
+
+       if (unlikely(!cpu_active(dest_cpu)))
+               return ret;
+
+       rq_src = cpu_rq(src_cpu);
+       rq_dest = cpu_rq(dest_cpu);
+
+       raw_spin_lock(&p->pi_lock);
+       double_rq_lock(rq_src, rq_dest);
+       /* Already moved. */
+       if (task_cpu(p) != src_cpu)
+               goto done;
+       /* Affinity changed (again). */
+       if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+               goto fail;
+
+       /*
+        * If we're not on a rq, the next wake-up will ensure we're
+        * placed properly.
+        */
+       if (p->on_rq) {
+//             dequeue_task(rq_src, p, 0);
+               list_del(&p->rq_tasks);
+               // TODO: maybe not necessary hence double lock
+               p->on_rq = 0;
+               set_task_cpu(p, dest_cpu);
+//             enqueue_task(rq_dest, p, 0);
+               list_add(&p->rq_tasks, &rq_dest->rq_list);
+               // TODO: maybe not necessary hence double lock
+               p->on_rq = 1;
+//             check_preempt_curr(rq_dest, p, 0);
+               if (rq_dest->curr == rq_dest->idle)
+                       resched_task(rq_dest->curr);
+       }
+done:
+       ret = 1;
+fail:
+       double_rq_unlock(rq_src, rq_dest);
+       raw_spin_unlock(&p->pi_lock);
+       return ret;
+}
+
+/*
+ * kernel/sched/core:4865
+ * migration_cpu_stop - this will be executed by a highprio stopper thread
+ * and performs thread migration by bumping thread off CPU then
+ * 'pushing' onto another runqueue.
+ */
+static int migration_cpu_stop(void *data)
+{
+       struct migration_arg *arg = data;
+
+       /*
+        * The original target cpu might have gone down and we might
+        * be on another cpu but it doesn't matter.
+        */
+       local_irq_disable();
+       __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
+       local_irq_enable();
+       return 0;
+}
 
 void do_set_cpus_allowed(struct task_struct *p,
                               const struct cpumask *new_mask)
 {
 //     if (p->sched_class && p->sched_class->set_cpus_allowed)
 //             p->sched_class->set_cpus_allowed(p, new_mask);
-//
-//     cpumask_copy(&p->cpus_allowed, new_mask);
-//     p->nr_cpus_allowed = cpumask_weight(new_mask);
+
+       cpumask_copy(&p->cpus_allowed, new_mask);
+       p->nr_cpus_allowed = cpumask_weight(new_mask);
 }
 
-int set_cpus_allowed_ptr(struct task_struct *p,
-                               const struct cpumask *new_mask)
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
-//     unsigned long flags;
-//     struct rq *rq;
-//     unsigned int dest_cpu;
-//     int ret = 0;
-//
-//     rq = task_rq_lock(p, &flags);
-//
-//     if (cpumask_equal(&p->cpus_allowed, new_mask))
-//             goto out;
-//
-//     if (!cpumask_intersects(new_mask, cpu_active_mask)) {
-//             ret = -EINVAL;
-//             goto out;
-//     }
-//
-//     if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
-//             ret = -EINVAL;
-//             goto out;
-//     }
-//
-//     do_set_cpus_allowed(p, new_mask);
-//
-//     /* Can the task run on the task's current CPU? If so, we're done */
-//     if (cpumask_test_cpu(task_cpu(p), new_mask))
-//             goto out;
-//
-//     dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-//     if (p->on_rq) {
-//             /* Need help from migration thread: drop lock and wait. */
-//             task_rq_unlock(rq, p, &flags);
-//             stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-//
-//             local_irq_disable();
-//             __migrate_task(p, raw_smp_processor_id(), dest_cpu);
-//             local_irq_enable();
-//
-//             tlb_migrate_finish(p->mm);
-//             return 0;
-//     }
-//out:
-//     task_rq_unlock(rq, p, &flags);
-//
-//     return ret;
-       return 0;
+       unsigned long flags;
+       struct rq *rq;
+       unsigned int dest_cpu;
+       int ret = 0;
+
+       rq = task_rq_lock(p, &flags);
+
+       if (cpumask_equal(&p->cpus_allowed, new_mask))
+               goto out;
+
+       if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       do_set_cpus_allowed(p, new_mask);
+
+       /* Can the task run on the task's current CPU? If so, we're done */
+       if (cpumask_test_cpu(task_cpu(p), new_mask))
+               goto out;
+
+       dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+       if (p->on_rq) {
+               struct migration_arg arg = { p, dest_cpu };
+               /* Need help from migration thread: drop lock and wait. */
+               task_rq_unlock(rq, p, &flags);
+               stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
+               tlb_migrate_finish(p->mm);
+               return 0;
+       }
+out:
+       task_rq_unlock(rq, p, &flags);
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
 
+static void sched_ttwu_pending(void)
+{
+       struct rq *rq = this_rq();
+       struct llist_node *llist = llist_del_all(&rq->wake_list);
+       struct task_struct *p;
+
+       raw_spin_lock(&rq->lock);
+
+       while (llist) {
+               p = llist_entry(llist, struct task_struct, wake_entry);
+               llist = llist_next(llist);
+               ttwu_do_activate(rq, p, 0);
+       }
+
+       raw_spin_unlock(&rq->lock);
+}
+
 void scheduler_ipi(void)
 {
+       if (llist_empty(&this_rq()->wake_list)) // && !got_nohz_idle_kick())
+                       return;
+
+       /*
+        * Not all reschedule IPI handlers call irq_enter/irq_exit, since
+        * traditionally all their work was done from the interrupt return
+        * path. Now that we actually do some work, we need to make sure
+        * we do call them.
+        *
+        * Some archs already do call them, luckily irq_enter/exit nest
+        * properly.
+        *
+        * Arguably we should visit all archs and update all handlers,
+        * however a fair share of IPIs are still resched only so this would
+        * somewhat pessimize the simple resched case.
+        */
+       irq_enter();
+       sched_ttwu_pending();
 
+       /*
+        * Check if someone kicked us for doing the nohz idle load balance.
+        */
+       if (unlikely(/*got_nohz_idle_kick() && */!need_resched())) {
+//             this_rq()->idle_balance = 1;
+               raise_softirq_irqoff(SCHED_SOFTIRQ);
+       }
+       irq_exit();
 }
 
 /*
@@ -2196,29 +2518,69 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
        return ncsw;
 }
 
-void kick_process(struct task_struct *tsk)
+/***
+ * kernel/sched/core:1116
+ * kick_process - kick a running thread to enter/exit the kernel
+ * @p: the to-be-kicked thread
+ *
+ * Cause a process which is running on another CPU to enter
+ * kernel-mode, without any delay. (to get signals handled.)
+ *
+ * NOTE: this function doesn't have to take the runqueue lock,
+ * because all it wants to ensure is that the remote task enters
+ * the kernel. If the IPI races and the task has been migrated
+ * to another CPU then no harm is done and the purpose has been
+ * achieved as well.
+ */
+void kick_process(struct task_struct *p)
 {
+       int cpu;
 
+       preempt_disable();
+       cpu = task_cpu(p);
+       if ((cpu != smp_processor_id()) && task_curr(p))
+               smp_send_reschedule(cpu);
+       preempt_enable();
 }
+EXPORT_SYMBOL_GPL(kick_process);
 
 void sched_set_stop_task(int cpu, struct task_struct *stop)
 {
-
+       printk("\nsched_set_stop_task");
 }
 
 bool completion_done(struct completion *x)
 {
+       printk("\ncompletion_done");
+
        return 0;
 }
 
+/*
+ * kernel/sched/core:2605
+ * sched_exec - execve() is a valuable balancing opportunity, because at
+ * this point the task has the smallest effective memory and cache footprint.
+ */
 void sched_exec(void)
 {
+       struct task_struct *p = current;
+       unsigned long flags;
+       int dest_cpu;
 
-}
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
+       dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
+       if (dest_cpu == smp_processor_id())
+               goto unlock;
 
-bool cpus_share_cache(int this_cpu, int that_cpu)
-{
-       return 0;
+       if (likely(cpu_active(dest_cpu))) {
+               struct migration_arg arg = { p, dest_cpu };
+
+               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
+               return;
+       }
+unlock:
+       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 }
 
 ///*
@@ -2264,7 +2626,110 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
 //
 //     atomic_set(&def_root_domain.refcount, 1);
 //}
+
+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+{
+#ifdef CONFIG_SCHED_DEBUG
+       /*
+        * We should never call set_task_cpu() on a blocked task,
+        * ttwu() will sort out the placement.
+        */
+       WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
+                       !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+
+#ifdef CONFIG_LOCKDEP
+       /*
+        * The caller should hold either p->pi_lock or rq->lock, when changing
+        * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
+        *
+        * sched_move_task() holds both and thus holding either pins the cgroup,
+        * see task_group().
+        *
+        * Furthermore, all task_rq users should acquire both locks, see
+        * task_rq_lock().
+        */
+       WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
+                                     lockdep_is_held(&task_rq(p)->lock)));
 #endif
+#endif
+
+       // TODO: SMP, needs to implemented while using migration/load balancing
+//     trace_sched_migrate_task(p, new_cpu);
+//
+//     if (task_cpu(p) != new_cpu) {
+//             struct task_migration_notifier tmn;
+//
+//             if (p->sched_class->migrate_task_rq)
+//                     p->sched_class->migrate_task_rq(p, new_cpu);
+//             p->se.nr_migrations++;
+//             perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
+//
+//             tmn.task = p;
+//             tmn.from_cpu = task_cpu(p);
+//             tmn.to_cpu = new_cpu;
+//
+//             atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
+//     }
+
+       __set_task_cpu(p, new_cpu);
+}
+
+/**
+ * kernel/sched/core.c:6820
+ */
+void __init sched_init_smp(void)
+{
+       cpumask_var_t non_isolated_cpus;
+
+       alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
+       alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
+
+//     sched_init_numa();
+
+       get_online_cpus();
+       mutex_lock(&sched_domains_mutex);
+//     init_sched_domains(cpu_active_mask);
+       cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+       if (cpumask_empty(non_isolated_cpus))
+               cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
+       mutex_unlock(&sched_domains_mutex);
+       put_online_cpus();
+
+//     hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
+//     hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
+//     hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
+
+       /* RT runtime code needs to handle some hotplug events */
+//     hotcpu_notifier(update_runtime, 0);
+
+//     init_hrtick();
+
+       /* Move init over to a non-isolated CPU */
+       if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
+               BUG();
+//     sched_init_granularity();
+       free_cpumask_var(non_isolated_cpus);
+
+//     init_sched_rt_class();
+}
+
+bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+       return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
+}
+
+#else
+
+/**
+ * kernel/sched/core.c:6856
+ */
+void sched_init_smp(void)
+{
+       //printk("\nsched_init_smp");
+
+       return;
+}
+#endif /* CONFIG_SMP */
 
 
 
index 07dd7d6..105adad 100644 (file)
@@ -1,6 +1,8 @@
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
 
 #include <linux/list.h>
 
@@ -18,6 +20,14 @@ struct rq {
 
        struct list_head rq_list;
 
+       /*
+        * This is part of a global counter where only the total sum
+        * over all CPUs matters. A task can increase this counter on
+        * one CPU and if it got migrated afterwards it may decrease
+        * it on another CPU. Always updated under the runqueue lock:
+        */
+       unsigned long nr_uninterruptible;
+
        struct task_struct *curr, *idle, *stop;
 
        struct mm_struct *prev_mm;
@@ -33,7 +43,9 @@ struct rq {
 
        /* cpu of this runqueue: */
        int cpu;
+       int online;
 
+       struct llist_head wake_list;
 
 #endif
 
@@ -57,6 +69,132 @@ DECLARE_PER_CPU(struct rq, runqueues);
 #define raw_rq()               (&__raw_get_cpu_var(runqueues))
 
 
+#ifdef CONFIG_SMP
+
+/*
+ * kernel/sched/sched.h:1105
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
+       __acquires(rq1->lock)
+       __acquires(rq2->lock)
+{
+       BUG_ON(!irqs_disabled());
+       if (rq1 == rq2) {
+               raw_spin_lock(&rq1->lock);
+               __acquire(rq2->lock);   /* Fake it out ;) */
+       } else {
+               if (rq1 < rq2) {
+                       raw_spin_lock(&rq1->lock);
+                       raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
+               } else {
+                       raw_spin_lock(&rq2->lock);
+                       raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
+               }
+       }
+}
+
+/*
+ * kernel/sched/sched.h:1130
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+       __releases(rq1->lock)
+       __releases(rq2->lock)
+{
+       raw_spin_unlock(&rq1->lock);
+       if (rq1 != rq2)
+               raw_spin_unlock(&rq2->lock);
+       else
+               __release(rq2->lock);
+}
+
+#endif
+
+#ifndef prepare_arch_switch
+# define prepare_arch_switch(next)     do { } while (0)
+#endif
+#ifndef finish_arch_switch
+# define finish_arch_switch(prev)      do { } while (0)
+#endif
+#ifndef finish_arch_post_lock_switch
+# define finish_arch_post_lock_switch()        do { } while (0)
+#endif
+
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+       /*
+        * We can optimise this out completely for !SMP, because the
+        * SMP rebalancing from interrupt is the only thing that cares
+        * here.
+        */
+       next->on_cpu = 1;
+#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+       /*
+        * After ->on_cpu is cleared, the task can be moved to a different CPU.
+        * We must ensure this doesn't happen until the switch is completely
+        * finished.
+        */
+       smp_wmb();
+       prev->on_cpu = 0;
+#endif
+#ifdef CONFIG_DEBUG_SPINLOCK
+       /* this is a valid case when another task releases the spinlock */
+       rq->lock.owner = current;
+#endif
+       /*
+        * If we are tracking spinlock dependencies then we have to
+        * fix up the runqueue lock - which gets 'carried over' from
+        * prev into current:
+        */
+       spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+
+       raw_spin_unlock_irq(&rq->lock);
+}
+
+#else /* __ARCH_WANT_UNLOCKED_CTXSW */
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+       /*
+        * We can optimise this out completely for !SMP, because the
+        * SMP rebalancing from interrupt is the only thing that cares
+        * here.
+        */
+       next->on_cpu = 1;
+#endif
+       raw_spin_unlock(&rq->lock);
+}
+
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+       /*
+        * After ->on_cpu is cleared, the task can be moved to a different CPU.
+        * We must ensure this doesn't happen until the switch is completely
+        * finished.
+        */
+       smp_wmb();
+       prev->on_cpu = 0;
+#endif
+       local_irq_enable();
+}
+#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+
+
 void account_idle_ticks(unsigned long ticks);
 
 void account_process_tick(struct task_struct *p, int user_tick);
@@ -193,3 +331,16 @@ static inline int task_running(struct rq *rq, struct task_struct *p)
        return task_current(rq, p);
 #endif
 }
+
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+#ifdef CONFIG_SMP
+       /*
+        * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+        * successfuly executed on another CPU. We must ensure that updates of
+        * per-task data have been completed by this moment.
+        */
+       smp_wmb();
+       task_thread_info(p)->cpu = cpu;
+#endif
+}
index 1184dbc..634e887 100644 (file)
@@ -2878,6 +2878,7 @@ pick_next_task(struct rq *rq)
  *          - return from syscall or exception to user-space
  *          - return from interrupt-handler to user-space
  */
+volatile struct rq *rq1, *rq2;
 static void __sched __schedule(void)
 {
        struct task_struct *prev, *next;
@@ -2892,6 +2893,13 @@ need_resched:
        rcu_note_context_switch(cpu);
        prev = rq->curr;
 
+       if (cpu == 0)
+               rq1 = rq;
+       else if (cpu == 1)
+               rq2 = rq;
+       else
+               printk("WTF!!!\n");
+
        schedule_debug(prev);
 
        if (sched_feat(HRTICK))
@@ -2933,6 +2941,14 @@ need_resched:
        clear_tsk_need_resched(prev);
        rq->skip_clock_update = 0;
 
+       if ((next->pid == 12) ||
+               (next->pid == 13) ||
+               (next->pid == 14))
+               printk("tolle wurst!");
+
+       if (task_cpu(next) == 1)
+               printk("Task (%i) on CPU 1\n", next->pid);
+
        if (likely(prev != next)) {
                rq->nr_switches++;
                rq->curr = next;
@@ -2955,6 +2971,7 @@ need_resched:
        sched_preempt_enable_no_resched();
        if (need_resched())
                goto need_resched;
+
 }
 
 static inline void sched_submit_work(struct task_struct *tsk)