Merge tag 'v3.15' into p/abusse/merge_upgrade
[projects/modsched/linux.git] / kernel / sched / idle.c
1 /*
2  * Generic entry point for the idle threads
3  */
4 #include <linux/sched.h>
5 #include <linux/cpu.h>
6 #include <linux/cpuidle.h>
7 #include <linux/tick.h>
8 #include <linux/mm.h>
9 #include <linux/stackprotector.h>
10
11 #include <asm/tlb.h>
12
13 #include <trace/events/power.h>
14
15 static int __read_mostly cpu_idle_force_poll;
16
17 void cpu_idle_poll_ctrl(bool enable)
18 {
19         if (enable) {
20                 cpu_idle_force_poll++;
21         } else {
22                 cpu_idle_force_poll--;
23                 WARN_ON_ONCE(cpu_idle_force_poll < 0);
24         }
25 }
26
27 #ifdef CONFIG_GENERIC_IDLE_POLL_SETUP
28 static int __init cpu_idle_poll_setup(char *__unused)
29 {
30         cpu_idle_force_poll = 1;
31         return 1;
32 }
33 __setup("nohlt", cpu_idle_poll_setup);
34
35 static int __init cpu_idle_nopoll_setup(char *__unused)
36 {
37         cpu_idle_force_poll = 0;
38         return 1;
39 }
40 __setup("hlt", cpu_idle_nopoll_setup);
41 #endif
42
43 static inline int cpu_idle_poll(void)
44 {
45         rcu_idle_enter();
46         trace_cpu_idle_rcuidle(0, smp_processor_id());
47         local_irq_enable();
48         while (!tif_need_resched())
49                 cpu_relax();
50         trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
51         rcu_idle_exit();
52         return 1;
53 }
54
55 /* Weak implementations for optional arch specific functions */
56 void __weak arch_cpu_idle_prepare(void) { }
57 void __weak arch_cpu_idle_enter(void) { }
58 void __weak arch_cpu_idle_exit(void) { }
59 void __weak arch_cpu_idle_dead(void) { }
60 void __weak arch_cpu_idle(void)
61 {
62         cpu_idle_force_poll = 1;
63         local_irq_enable();
64 }
65
66 /**
67  * cpuidle_idle_call - the main idle function
68  *
69  * NOTE: no locks or semaphores should be used here
70  * return non-zero on failure
71  */
72 static int cpuidle_idle_call(void)
73 {
74         struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
75         struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
76         int next_state, entered_state, ret;
77         bool broadcast;
78
79         /*
80          * Check if the idle task must be rescheduled. If it is the
81          * case, exit the function after re-enabling the local irq and
82          * set again the polling flag
83          */
84         if (current_clr_polling_and_test()) {
85                 local_irq_enable();
86                 __current_set_polling();
87                 return 0;
88         }
89
90         /*
91          * During the idle period, stop measuring the disabled irqs
92          * critical sections latencies
93          */
94         stop_critical_timings();
95
96         /*
97          * Tell the RCU framework we are entering an idle section,
98          * so no more rcu read side critical sections and one more
99          * step to the grace period
100          */
101         rcu_idle_enter();
102
103         /*
104          * Check if the cpuidle framework is ready, otherwise fallback
105          * to the default arch specific idle method
106          */
107         ret = cpuidle_enabled(drv, dev);
108
109         if (!ret) {
110                 /*
111                  * Ask the governor to choose an idle state it thinks
112                  * it is convenient to go to. There is *always* a
113                  * convenient idle state
114                  */
115                 next_state = cpuidle_select(drv, dev);
116
117                 /*
118                  * The idle task must be scheduled, it is pointless to
119                  * go to idle, just update no idle residency and get
120                  * out of this function
121                  */
122                 if (current_clr_polling_and_test()) {
123                         dev->last_residency = 0;
124                         entered_state = next_state;
125                         local_irq_enable();
126                 } else {
127                         broadcast = !!(drv->states[next_state].flags &
128                                        CPUIDLE_FLAG_TIMER_STOP);
129
130                         if (broadcast)
131                                 /*
132                                  * Tell the time framework to switch
133                                  * to a broadcast timer because our
134                                  * local timer will be shutdown. If a
135                                  * local timer is used from another
136                                  * cpu as a broadcast timer, this call
137                                  * may fail if it is not available
138                                  */
139                                 ret = clockevents_notify(
140                                         CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
141                                         &dev->cpu);
142
143                         if (!ret) {
144                                 trace_cpu_idle_rcuidle(next_state, dev->cpu);
145
146                                 /*
147                                  * Enter the idle state previously
148                                  * returned by the governor
149                                  * decision. This function will block
150                                  * until an interrupt occurs and will
151                                  * take care of re-enabling the local
152                                  * interrupts
153                                  */
154                                 entered_state = cpuidle_enter(drv, dev,
155                                                               next_state);
156
157                                 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT,
158                                                        dev->cpu);
159
160                                 if (broadcast)
161                                         clockevents_notify(
162                                                 CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
163                                                 &dev->cpu);
164
165                                 /*
166                                  * Give the governor an opportunity to reflect on the
167                                  * outcome
168                                  */
169                                 cpuidle_reflect(dev, entered_state);
170                         }
171                 }
172         }
173
174         /*
175          * We can't use the cpuidle framework, let's use the default
176          * idle routine
177          */
178         if (ret)
179                 arch_cpu_idle();
180
181         __current_set_polling();
182
183         /*
184          * It is up to the idle functions to enable back the local
185          * interrupt
186          */
187         if (WARN_ON_ONCE(irqs_disabled()))
188                 local_irq_enable();
189
190         rcu_idle_exit();
191         start_critical_timings();
192
193         return 0;
194 }
195
196 /*
197  * Generic idle loop implementation
198  */
199 static void cpu_idle_loop(void)
200 {
201         while (1) {
202                 tick_nohz_idle_enter();
203
204                 while (!need_resched()) {
205                         check_pgt_cache();
206                         rmb();
207
208                         if (cpu_is_offline(smp_processor_id()))
209                                 arch_cpu_idle_dead();
210
211                         local_irq_disable();
212                         arch_cpu_idle_enter();
213
214                         /*
215                          * In poll mode we reenable interrupts and spin.
216                          *
217                          * Also if we detected in the wakeup from idle
218                          * path that the tick broadcast device expired
219                          * for us, we don't want to go deep idle as we
220                          * know that the IPI is going to arrive right
221                          * away
222                          */
223                         if (cpu_idle_force_poll || tick_check_broadcast_expired())
224                                 cpu_idle_poll();
225                         else
226                                 cpuidle_idle_call();
227
228                         arch_cpu_idle_exit();
229                 }
230
231                 /*
232                  * Since we fell out of the loop above, we know
233                  * TIF_NEED_RESCHED must be set, propagate it into
234                  * PREEMPT_NEED_RESCHED.
235                  *
236                  * This is required because for polling idle loops we will
237                  * not have had an IPI to fold the state for us.
238                  */
239                 preempt_set_need_resched();
240                 tick_nohz_idle_exit();
241                 schedule_preempt_disabled();
242         }
243 }
244
245 void cpu_startup_entry(enum cpuhp_state state)
246 {
247         /*
248          * This #ifdef needs to die, but it's too late in the cycle to
249          * make this generic (arm and sh have never invoked the canary
250          * init for the non boot cpus!). Will be fixed in 3.11
251          */
252 #ifdef CONFIG_X86
253         /*
254          * If we're the non-boot CPU, nothing set the stack canary up
255          * for us. The boot CPU already has it initialized but no harm
256          * in doing it again. This is a good place for updating it, as
257          * we wont ever return from this function (so the invalid
258          * canaries already on the stack wont ever trigger).
259          */
260         boot_init_stack_canary();
261 #endif
262         __current_set_polling();
263         arch_cpu_idle_prepare();
264         cpu_idle_loop();
265 }