49e13e1f8fe6a5e481edb3026ae20360918986c3
[projects/modsched/linux.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/bitmap.h>
27 #include <linux/signal.h>
28 #include <linux/printk.h>
29 #include <linux/proc_fs.h>
30 #include <linux/security.h>
31 #include <linux/ctype.h>
32 #include <linux/kmemcheck.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/kexec.h>
66
67 #include <asm/uaccess.h>
68 #include <asm/processor.h>
69
70 #ifdef CONFIG_X86
71 #include <asm/nmi.h>
72 #include <asm/stacktrace.h>
73 #include <asm/io.h>
74 #endif
75 #ifdef CONFIG_SPARC
76 #include <asm/setup.h>
77 #endif
78 #ifdef CONFIG_BSD_PROCESS_ACCT
79 #include <linux/acct.h>
80 #endif
81 #ifdef CONFIG_RT_MUTEXES
82 #include <linux/rtmutex.h>
83 #endif
84 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
85 #include <linux/lockdep.h>
86 #endif
87 #ifdef CONFIG_CHR_DEV_SG
88 #include <scsi/sg.h>
89 #endif
90
91 #ifdef CONFIG_LOCKUP_DETECTOR
92 #include <linux/nmi.h>
93 #endif
94
95
96 #if defined(CONFIG_SYSCTL)
97
98 /* External variables not in a header file. */
99 extern int max_threads;
100 extern int suid_dumpable;
101 #ifdef CONFIG_COREDUMP
102 extern int core_uses_pid;
103 extern char core_pattern[];
104 extern unsigned int core_pipe_limit;
105 #endif
106 extern int pid_max;
107 extern int pid_max_min, pid_max_max;
108 extern int percpu_pagelist_fraction;
109 extern int compat_log;
110 extern int latencytop_enabled;
111 extern int sysctl_nr_open_min, sysctl_nr_open_max;
112 #ifndef CONFIG_MMU
113 extern int sysctl_nr_trim_pages;
114 #endif
115 #ifdef CONFIG_BLOCK
116 extern int blk_iopoll_enabled;
117 #endif
118
119 /* Constants used for minimum and  maximum */
120 #ifdef CONFIG_LOCKUP_DETECTOR
121 static int sixty = 60;
122 #endif
123
124 static int __maybe_unused neg_one = -1;
125
126 static int zero;
127 static int __maybe_unused one = 1;
128 static int __maybe_unused two = 2;
129 static int __maybe_unused three = 3;
130 static unsigned long one_ul = 1;
131 static int one_hundred = 100;
132 #ifdef CONFIG_PRINTK
133 static int ten_thousand = 10000;
134 #endif
135
136 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
137 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
138
139 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
140 static int maxolduid = 65535;
141 static int minolduid;
142 static int min_percpu_pagelist_fract = 8;
143
144 static int ngroups_max = NGROUPS_MAX;
145 static const int cap_last_cap = CAP_LAST_CAP;
146
147 #ifdef CONFIG_INOTIFY_USER
148 #include <linux/inotify.h>
149 #endif
150 #ifdef CONFIG_SPARC
151 #endif
152
153 #ifdef CONFIG_SPARC64
154 extern int sysctl_tsb_ratio;
155 #endif
156
157 #ifdef __hppa__
158 extern int pwrsw_enabled;
159 #endif
160
161 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
162 extern int unaligned_enabled;
163 #endif
164
165 #ifdef CONFIG_IA64
166 extern int unaligned_dump_stack;
167 #endif
168
169 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
170 extern int no_unaligned_warning;
171 #endif
172
173 #ifdef CONFIG_PROC_SYSCTL
174 static int proc_do_cad_pid(struct ctl_table *table, int write,
175                   void __user *buffer, size_t *lenp, loff_t *ppos);
176 static int proc_taint(struct ctl_table *table, int write,
177                                void __user *buffer, size_t *lenp, loff_t *ppos);
178 #endif
179
180 #ifdef CONFIG_PRINTK
181 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
182                                 void __user *buffer, size_t *lenp, loff_t *ppos);
183 #endif
184
185 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
186                 void __user *buffer, size_t *lenp, loff_t *ppos);
187 #ifdef CONFIG_COREDUMP
188 static int proc_dostring_coredump(struct ctl_table *table, int write,
189                 void __user *buffer, size_t *lenp, loff_t *ppos);
190 #endif
191
192 #ifdef CONFIG_MAGIC_SYSRQ
193 /* Note: sysrq code uses it's own private copy */
194 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
195
196 static int sysrq_sysctl_handler(ctl_table *table, int write,
197                                 void __user *buffer, size_t *lenp,
198                                 loff_t *ppos)
199 {
200         int error;
201
202         error = proc_dointvec(table, write, buffer, lenp, ppos);
203         if (error)
204                 return error;
205
206         if (write)
207                 sysrq_toggle_support(__sysrq_enabled);
208
209         return 0;
210 }
211
212 #endif
213
214 static struct ctl_table kern_table[];
215 static struct ctl_table vm_table[];
216 static struct ctl_table fs_table[];
217 static struct ctl_table debug_table[];
218 static struct ctl_table dev_table[];
219 extern struct ctl_table random_table[];
220 #ifdef CONFIG_EPOLL
221 extern struct ctl_table epoll_table[];
222 #endif
223
224 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
225 int sysctl_legacy_va_layout;
226 #endif
227
228 /* The default sysctl tables: */
229
230 static struct ctl_table sysctl_base_table[] = {
231         {
232                 .procname       = "kernel",
233                 .mode           = 0555,
234                 .child          = kern_table,
235         },
236         {
237                 .procname       = "vm",
238                 .mode           = 0555,
239                 .child          = vm_table,
240         },
241         {
242                 .procname       = "fs",
243                 .mode           = 0555,
244                 .child          = fs_table,
245         },
246         {
247                 .procname       = "debug",
248                 .mode           = 0555,
249                 .child          = debug_table,
250         },
251         {
252                 .procname       = "dev",
253                 .mode           = 0555,
254                 .child          = dev_table,
255         },
256         { }
257 };
258
259 #ifdef CONFIG_SCHED_DEBUG
260 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
261 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
262 static int min_wakeup_granularity_ns;                   /* 0 usecs */
263 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
264 #ifdef CONFIG_SMP
265 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
266 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
267 #endif /* CONFIG_SMP */
268 #endif /* CONFIG_SCHED_DEBUG */
269
270 #ifdef CONFIG_COMPACTION
271 static int min_extfrag_threshold;
272 static int max_extfrag_threshold = 1000;
273 #endif
274
275 static struct ctl_table kern_table[] = {
276         {
277                 .procname       = "sched_child_runs_first",
278                 .data           = &sysctl_sched_child_runs_first,
279                 .maxlen         = sizeof(unsigned int),
280                 .mode           = 0644,
281                 .proc_handler   = proc_dointvec,
282         },
283 #ifdef CONFIG_SCHED_DEBUG
284         {
285                 .procname       = "sched_min_granularity_ns",
286                 .data           = &sysctl_sched_min_granularity,
287                 .maxlen         = sizeof(unsigned int),
288                 .mode           = 0644,
289                 .proc_handler   = sched_proc_update_handler,
290                 .extra1         = &min_sched_granularity_ns,
291                 .extra2         = &max_sched_granularity_ns,
292         },
293         {
294                 .procname       = "sched_latency_ns",
295                 .data           = &sysctl_sched_latency,
296                 .maxlen         = sizeof(unsigned int),
297                 .mode           = 0644,
298                 .proc_handler   = sched_proc_update_handler,
299                 .extra1         = &min_sched_granularity_ns,
300                 .extra2         = &max_sched_granularity_ns,
301         },
302         {
303                 .procname       = "sched_wakeup_granularity_ns",
304                 .data           = &sysctl_sched_wakeup_granularity,
305                 .maxlen         = sizeof(unsigned int),
306                 .mode           = 0644,
307                 .proc_handler   = sched_proc_update_handler,
308                 .extra1         = &min_wakeup_granularity_ns,
309                 .extra2         = &max_wakeup_granularity_ns,
310         },
311 #ifdef CONFIG_SMP
312         {
313                 .procname       = "sched_tunable_scaling",
314                 .data           = &sysctl_sched_tunable_scaling,
315                 .maxlen         = sizeof(enum sched_tunable_scaling),
316                 .mode           = 0644,
317                 .proc_handler   = sched_proc_update_handler,
318                 .extra1         = &min_sched_tunable_scaling,
319                 .extra2         = &max_sched_tunable_scaling,
320         },
321         {
322                 .procname       = "sched_migration_cost_ns",
323                 .data           = &sysctl_sched_migration_cost,
324                 .maxlen         = sizeof(unsigned int),
325                 .mode           = 0644,
326                 .proc_handler   = proc_dointvec,
327         },
328         {
329                 .procname       = "sched_nr_migrate",
330                 .data           = &sysctl_sched_nr_migrate,
331                 .maxlen         = sizeof(unsigned int),
332                 .mode           = 0644,
333                 .proc_handler   = proc_dointvec,
334         },
335         {
336                 .procname       = "sched_time_avg_ms",
337                 .data           = &sysctl_sched_time_avg,
338                 .maxlen         = sizeof(unsigned int),
339                 .mode           = 0644,
340                 .proc_handler   = proc_dointvec,
341         },
342         {
343                 .procname       = "sched_shares_window_ns",
344                 .data           = &sysctl_sched_shares_window,
345                 .maxlen         = sizeof(unsigned int),
346                 .mode           = 0644,
347                 .proc_handler   = proc_dointvec,
348         },
349         {
350                 .procname       = "timer_migration",
351                 .data           = &sysctl_timer_migration,
352                 .maxlen         = sizeof(unsigned int),
353                 .mode           = 0644,
354                 .proc_handler   = proc_dointvec_minmax,
355                 .extra1         = &zero,
356                 .extra2         = &one,
357         },
358 #endif /* CONFIG_SMP */
359 #ifdef CONFIG_NUMA_BALANCING
360         {
361                 .procname       = "numa_balancing_scan_delay_ms",
362                 .data           = &sysctl_numa_balancing_scan_delay,
363                 .maxlen         = sizeof(unsigned int),
364                 .mode           = 0644,
365                 .proc_handler   = proc_dointvec,
366         },
367         {
368                 .procname       = "numa_balancing_scan_period_min_ms",
369                 .data           = &sysctl_numa_balancing_scan_period_min,
370                 .maxlen         = sizeof(unsigned int),
371                 .mode           = 0644,
372                 .proc_handler   = proc_dointvec,
373         },
374         {
375                 .procname       = "numa_balancing_scan_period_max_ms",
376                 .data           = &sysctl_numa_balancing_scan_period_max,
377                 .maxlen         = sizeof(unsigned int),
378                 .mode           = 0644,
379                 .proc_handler   = proc_dointvec,
380         },
381         {
382                 .procname       = "numa_balancing_scan_size_mb",
383                 .data           = &sysctl_numa_balancing_scan_size,
384                 .maxlen         = sizeof(unsigned int),
385                 .mode           = 0644,
386                 .proc_handler   = proc_dointvec,
387         },
388         {
389                 .procname       = "numa_balancing_migrate_deferred",
390                 .data           = &sysctl_numa_balancing_migrate_deferred,
391                 .maxlen         = sizeof(unsigned int),
392                 .mode           = 0644,
393                 .proc_handler   = proc_dointvec,
394         },
395         {
396                 .procname       = "numa_balancing",
397                 .data           = NULL, /* filled in by handler */
398                 .maxlen         = sizeof(unsigned int),
399                 .mode           = 0644,
400                 .proc_handler   = sysctl_numa_balancing,
401                 .extra1         = &zero,
402                 .extra2         = &one,
403         },
404 #endif /* CONFIG_NUMA_BALANCING */
405 #endif /* CONFIG_SCHED_DEBUG */
406         {
407                 .procname       = "sched_rt_period_us",
408                 .data           = &sysctl_sched_rt_period,
409                 .maxlen         = sizeof(unsigned int),
410                 .mode           = 0644,
411                 .proc_handler   = sched_rt_handler,
412         },
413         {
414                 .procname       = "sched_rt_runtime_us",
415                 .data           = &sysctl_sched_rt_runtime,
416                 .maxlen         = sizeof(int),
417                 .mode           = 0644,
418                 .proc_handler   = sched_rt_handler,
419         },
420         {
421                 .procname       = "sched_rr_timeslice_ms",
422                 .data           = &sched_rr_timeslice,
423                 .maxlen         = sizeof(int),
424                 .mode           = 0644,
425                 .proc_handler   = sched_rr_handler,
426         },
427 #ifdef CONFIG_SCHED_AUTOGROUP
428         {
429                 .procname       = "sched_autogroup_enabled",
430                 .data           = &sysctl_sched_autogroup_enabled,
431                 .maxlen         = sizeof(unsigned int),
432                 .mode           = 0644,
433                 .proc_handler   = proc_dointvec_minmax,
434                 .extra1         = &zero,
435                 .extra2         = &one,
436         },
437 #endif
438 #ifdef CONFIG_CFS_BANDWIDTH
439         {
440                 .procname       = "sched_cfs_bandwidth_slice_us",
441                 .data           = &sysctl_sched_cfs_bandwidth_slice,
442                 .maxlen         = sizeof(unsigned int),
443                 .mode           = 0644,
444                 .proc_handler   = proc_dointvec_minmax,
445                 .extra1         = &one,
446         },
447 #endif
448 #ifdef CONFIG_PROVE_LOCKING
449         {
450                 .procname       = "prove_locking",
451                 .data           = &prove_locking,
452                 .maxlen         = sizeof(int),
453                 .mode           = 0644,
454                 .proc_handler   = proc_dointvec,
455         },
456 #endif
457 #ifdef CONFIG_LOCK_STAT
458         {
459                 .procname       = "lock_stat",
460                 .data           = &lock_stat,
461                 .maxlen         = sizeof(int),
462                 .mode           = 0644,
463                 .proc_handler   = proc_dointvec,
464         },
465 #endif
466         {
467                 .procname       = "panic",
468                 .data           = &panic_timeout,
469                 .maxlen         = sizeof(int),
470                 .mode           = 0644,
471                 .proc_handler   = proc_dointvec,
472         },
473 #ifdef CONFIG_COREDUMP
474         {
475                 .procname       = "core_uses_pid",
476                 .data           = &core_uses_pid,
477                 .maxlen         = sizeof(int),
478                 .mode           = 0644,
479                 .proc_handler   = proc_dointvec,
480         },
481         {
482                 .procname       = "core_pattern",
483                 .data           = core_pattern,
484                 .maxlen         = CORENAME_MAX_SIZE,
485                 .mode           = 0644,
486                 .proc_handler   = proc_dostring_coredump,
487         },
488         {
489                 .procname       = "core_pipe_limit",
490                 .data           = &core_pipe_limit,
491                 .maxlen         = sizeof(unsigned int),
492                 .mode           = 0644,
493                 .proc_handler   = proc_dointvec,
494         },
495 #endif
496 #ifdef CONFIG_PROC_SYSCTL
497         {
498                 .procname       = "tainted",
499                 .maxlen         = sizeof(long),
500                 .mode           = 0644,
501                 .proc_handler   = proc_taint,
502         },
503 #endif
504 #ifdef CONFIG_LATENCYTOP
505         {
506                 .procname       = "latencytop",
507                 .data           = &latencytop_enabled,
508                 .maxlen         = sizeof(int),
509                 .mode           = 0644,
510                 .proc_handler   = proc_dointvec,
511         },
512 #endif
513 #ifdef CONFIG_BLK_DEV_INITRD
514         {
515                 .procname       = "real-root-dev",
516                 .data           = &real_root_dev,
517                 .maxlen         = sizeof(int),
518                 .mode           = 0644,
519                 .proc_handler   = proc_dointvec,
520         },
521 #endif
522         {
523                 .procname       = "print-fatal-signals",
524                 .data           = &print_fatal_signals,
525                 .maxlen         = sizeof(int),
526                 .mode           = 0644,
527                 .proc_handler   = proc_dointvec,
528         },
529 #ifdef CONFIG_SPARC
530         {
531                 .procname       = "reboot-cmd",
532                 .data           = reboot_command,
533                 .maxlen         = 256,
534                 .mode           = 0644,
535                 .proc_handler   = proc_dostring,
536         },
537         {
538                 .procname       = "stop-a",
539                 .data           = &stop_a_enabled,
540                 .maxlen         = sizeof (int),
541                 .mode           = 0644,
542                 .proc_handler   = proc_dointvec,
543         },
544         {
545                 .procname       = "scons-poweroff",
546                 .data           = &scons_pwroff,
547                 .maxlen         = sizeof (int),
548                 .mode           = 0644,
549                 .proc_handler   = proc_dointvec,
550         },
551 #endif
552 #ifdef CONFIG_SPARC64
553         {
554                 .procname       = "tsb-ratio",
555                 .data           = &sysctl_tsb_ratio,
556                 .maxlen         = sizeof (int),
557                 .mode           = 0644,
558                 .proc_handler   = proc_dointvec,
559         },
560 #endif
561 #ifdef __hppa__
562         {
563                 .procname       = "soft-power",
564                 .data           = &pwrsw_enabled,
565                 .maxlen         = sizeof (int),
566                 .mode           = 0644,
567                 .proc_handler   = proc_dointvec,
568         },
569 #endif
570 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
571         {
572                 .procname       = "unaligned-trap",
573                 .data           = &unaligned_enabled,
574                 .maxlen         = sizeof (int),
575                 .mode           = 0644,
576                 .proc_handler   = proc_dointvec,
577         },
578 #endif
579         {
580                 .procname       = "ctrl-alt-del",
581                 .data           = &C_A_D,
582                 .maxlen         = sizeof(int),
583                 .mode           = 0644,
584                 .proc_handler   = proc_dointvec,
585         },
586 #ifdef CONFIG_FUNCTION_TRACER
587         {
588                 .procname       = "ftrace_enabled",
589                 .data           = &ftrace_enabled,
590                 .maxlen         = sizeof(int),
591                 .mode           = 0644,
592                 .proc_handler   = ftrace_enable_sysctl,
593         },
594 #endif
595 #ifdef CONFIG_STACK_TRACER
596         {
597                 .procname       = "stack_tracer_enabled",
598                 .data           = &stack_tracer_enabled,
599                 .maxlen         = sizeof(int),
600                 .mode           = 0644,
601                 .proc_handler   = stack_trace_sysctl,
602         },
603 #endif
604 #ifdef CONFIG_TRACING
605         {
606                 .procname       = "ftrace_dump_on_oops",
607                 .data           = &ftrace_dump_on_oops,
608                 .maxlen         = sizeof(int),
609                 .mode           = 0644,
610                 .proc_handler   = proc_dointvec,
611         },
612         {
613                 .procname       = "traceoff_on_warning",
614                 .data           = &__disable_trace_on_warning,
615                 .maxlen         = sizeof(__disable_trace_on_warning),
616                 .mode           = 0644,
617                 .proc_handler   = proc_dointvec,
618         },
619 #endif
620 #ifdef CONFIG_KEXEC
621         {
622                 .procname       = "kexec_load_disabled",
623                 .data           = &kexec_load_disabled,
624                 .maxlen         = sizeof(int),
625                 .mode           = 0644,
626                 /* only handle a transition from default "0" to "1" */
627                 .proc_handler   = proc_dointvec_minmax,
628                 .extra1         = &one,
629                 .extra2         = &one,
630         },
631 #endif
632 #ifdef CONFIG_MODULES
633         {
634                 .procname       = "modprobe",
635                 .data           = &modprobe_path,
636                 .maxlen         = KMOD_PATH_LEN,
637                 .mode           = 0644,
638                 .proc_handler   = proc_dostring,
639         },
640         {
641                 .procname       = "modules_disabled",
642                 .data           = &modules_disabled,
643                 .maxlen         = sizeof(int),
644                 .mode           = 0644,
645                 /* only handle a transition from default "0" to "1" */
646                 .proc_handler   = proc_dointvec_minmax,
647                 .extra1         = &one,
648                 .extra2         = &one,
649         },
650 #endif
651
652         {
653                 .procname       = "hotplug",
654                 .data           = &uevent_helper,
655                 .maxlen         = UEVENT_HELPER_PATH_LEN,
656                 .mode           = 0644,
657                 .proc_handler   = proc_dostring,
658         },
659
660 #ifdef CONFIG_CHR_DEV_SG
661         {
662                 .procname       = "sg-big-buff",
663                 .data           = &sg_big_buff,
664                 .maxlen         = sizeof (int),
665                 .mode           = 0444,
666                 .proc_handler   = proc_dointvec,
667         },
668 #endif
669 #ifdef CONFIG_BSD_PROCESS_ACCT
670         {
671                 .procname       = "acct",
672                 .data           = &acct_parm,
673                 .maxlen         = 3*sizeof(int),
674                 .mode           = 0644,
675                 .proc_handler   = proc_dointvec,
676         },
677 #endif
678 #ifdef CONFIG_MAGIC_SYSRQ
679         {
680                 .procname       = "sysrq",
681                 .data           = &__sysrq_enabled,
682                 .maxlen         = sizeof (int),
683                 .mode           = 0644,
684                 .proc_handler   = sysrq_sysctl_handler,
685         },
686 #endif
687 #ifdef CONFIG_PROC_SYSCTL
688         {
689                 .procname       = "cad_pid",
690                 .data           = NULL,
691                 .maxlen         = sizeof (int),
692                 .mode           = 0600,
693                 .proc_handler   = proc_do_cad_pid,
694         },
695 #endif
696         {
697                 .procname       = "threads-max",
698                 .data           = &max_threads,
699                 .maxlen         = sizeof(int),
700                 .mode           = 0644,
701                 .proc_handler   = proc_dointvec,
702         },
703         {
704                 .procname       = "random",
705                 .mode           = 0555,
706                 .child          = random_table,
707         },
708         {
709                 .procname       = "usermodehelper",
710                 .mode           = 0555,
711                 .child          = usermodehelper_table,
712         },
713         {
714                 .procname       = "overflowuid",
715                 .data           = &overflowuid,
716                 .maxlen         = sizeof(int),
717                 .mode           = 0644,
718                 .proc_handler   = proc_dointvec_minmax,
719                 .extra1         = &minolduid,
720                 .extra2         = &maxolduid,
721         },
722         {
723                 .procname       = "overflowgid",
724                 .data           = &overflowgid,
725                 .maxlen         = sizeof(int),
726                 .mode           = 0644,
727                 .proc_handler   = proc_dointvec_minmax,
728                 .extra1         = &minolduid,
729                 .extra2         = &maxolduid,
730         },
731 #ifdef CONFIG_S390
732 #ifdef CONFIG_MATHEMU
733         {
734                 .procname       = "ieee_emulation_warnings",
735                 .data           = &sysctl_ieee_emulation_warnings,
736                 .maxlen         = sizeof(int),
737                 .mode           = 0644,
738                 .proc_handler   = proc_dointvec,
739         },
740 #endif
741         {
742                 .procname       = "userprocess_debug",
743                 .data           = &show_unhandled_signals,
744                 .maxlen         = sizeof(int),
745                 .mode           = 0644,
746                 .proc_handler   = proc_dointvec,
747         },
748 #endif
749         {
750                 .procname       = "pid_max",
751                 .data           = &pid_max,
752                 .maxlen         = sizeof (int),
753                 .mode           = 0644,
754                 .proc_handler   = proc_dointvec_minmax,
755                 .extra1         = &pid_max_min,
756                 .extra2         = &pid_max_max,
757         },
758         {
759                 .procname       = "panic_on_oops",
760                 .data           = &panic_on_oops,
761                 .maxlen         = sizeof(int),
762                 .mode           = 0644,
763                 .proc_handler   = proc_dointvec,
764         },
765 #if defined CONFIG_PRINTK
766         {
767                 .procname       = "printk",
768                 .data           = &console_loglevel,
769                 .maxlen         = 4*sizeof(int),
770                 .mode           = 0644,
771                 .proc_handler   = proc_dointvec,
772         },
773         {
774                 .procname       = "printk_ratelimit",
775                 .data           = &printk_ratelimit_state.interval,
776                 .maxlen         = sizeof(int),
777                 .mode           = 0644,
778                 .proc_handler   = proc_dointvec_jiffies,
779         },
780         {
781                 .procname       = "printk_ratelimit_burst",
782                 .data           = &printk_ratelimit_state.burst,
783                 .maxlen         = sizeof(int),
784                 .mode           = 0644,
785                 .proc_handler   = proc_dointvec,
786         },
787         {
788                 .procname       = "printk_delay",
789                 .data           = &printk_delay_msec,
790                 .maxlen         = sizeof(int),
791                 .mode           = 0644,
792                 .proc_handler   = proc_dointvec_minmax,
793                 .extra1         = &zero,
794                 .extra2         = &ten_thousand,
795         },
796         {
797                 .procname       = "dmesg_restrict",
798                 .data           = &dmesg_restrict,
799                 .maxlen         = sizeof(int),
800                 .mode           = 0644,
801                 .proc_handler   = proc_dointvec_minmax_sysadmin,
802                 .extra1         = &zero,
803                 .extra2         = &one,
804         },
805         {
806                 .procname       = "kptr_restrict",
807                 .data           = &kptr_restrict,
808                 .maxlen         = sizeof(int),
809                 .mode           = 0644,
810                 .proc_handler   = proc_dointvec_minmax_sysadmin,
811                 .extra1         = &zero,
812                 .extra2         = &two,
813         },
814 #endif
815         {
816                 .procname       = "ngroups_max",
817                 .data           = &ngroups_max,
818                 .maxlen         = sizeof (int),
819                 .mode           = 0444,
820                 .proc_handler   = proc_dointvec,
821         },
822         {
823                 .procname       = "cap_last_cap",
824                 .data           = (void *)&cap_last_cap,
825                 .maxlen         = sizeof(int),
826                 .mode           = 0444,
827                 .proc_handler   = proc_dointvec,
828         },
829 #if defined(CONFIG_LOCKUP_DETECTOR)
830         {
831                 .procname       = "watchdog",
832                 .data           = &watchdog_user_enabled,
833                 .maxlen         = sizeof (int),
834                 .mode           = 0644,
835                 .proc_handler   = proc_dowatchdog,
836                 .extra1         = &zero,
837                 .extra2         = &one,
838         },
839         {
840                 .procname       = "watchdog_thresh",
841                 .data           = &watchdog_thresh,
842                 .maxlen         = sizeof(int),
843                 .mode           = 0644,
844                 .proc_handler   = proc_dowatchdog,
845                 .extra1         = &zero,
846                 .extra2         = &sixty,
847         },
848         {
849                 .procname       = "softlockup_panic",
850                 .data           = &softlockup_panic,
851                 .maxlen         = sizeof(int),
852                 .mode           = 0644,
853                 .proc_handler   = proc_dointvec_minmax,
854                 .extra1         = &zero,
855                 .extra2         = &one,
856         },
857         {
858                 .procname       = "nmi_watchdog",
859                 .data           = &watchdog_user_enabled,
860                 .maxlen         = sizeof (int),
861                 .mode           = 0644,
862                 .proc_handler   = proc_dowatchdog,
863                 .extra1         = &zero,
864                 .extra2         = &one,
865         },
866 #endif
867 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
868         {
869                 .procname       = "unknown_nmi_panic",
870                 .data           = &unknown_nmi_panic,
871                 .maxlen         = sizeof (int),
872                 .mode           = 0644,
873                 .proc_handler   = proc_dointvec,
874         },
875 #endif
876 #if defined(CONFIG_X86)
877         {
878                 .procname       = "panic_on_unrecovered_nmi",
879                 .data           = &panic_on_unrecovered_nmi,
880                 .maxlen         = sizeof(int),
881                 .mode           = 0644,
882                 .proc_handler   = proc_dointvec,
883         },
884         {
885                 .procname       = "panic_on_io_nmi",
886                 .data           = &panic_on_io_nmi,
887                 .maxlen         = sizeof(int),
888                 .mode           = 0644,
889                 .proc_handler   = proc_dointvec,
890         },
891 #ifdef CONFIG_DEBUG_STACKOVERFLOW
892         {
893                 .procname       = "panic_on_stackoverflow",
894                 .data           = &sysctl_panic_on_stackoverflow,
895                 .maxlen         = sizeof(int),
896                 .mode           = 0644,
897                 .proc_handler   = proc_dointvec,
898         },
899 #endif
900         {
901                 .procname       = "bootloader_type",
902                 .data           = &bootloader_type,
903                 .maxlen         = sizeof (int),
904                 .mode           = 0444,
905                 .proc_handler   = proc_dointvec,
906         },
907         {
908                 .procname       = "bootloader_version",
909                 .data           = &bootloader_version,
910                 .maxlen         = sizeof (int),
911                 .mode           = 0444,
912                 .proc_handler   = proc_dointvec,
913         },
914         {
915                 .procname       = "kstack_depth_to_print",
916                 .data           = &kstack_depth_to_print,
917                 .maxlen         = sizeof(int),
918                 .mode           = 0644,
919                 .proc_handler   = proc_dointvec,
920         },
921         {
922                 .procname       = "io_delay_type",
923                 .data           = &io_delay_type,
924                 .maxlen         = sizeof(int),
925                 .mode           = 0644,
926                 .proc_handler   = proc_dointvec,
927         },
928 #endif
929 #if defined(CONFIG_MMU)
930         {
931                 .procname       = "randomize_va_space",
932                 .data           = &randomize_va_space,
933                 .maxlen         = sizeof(int),
934                 .mode           = 0644,
935                 .proc_handler   = proc_dointvec,
936         },
937 #endif
938 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
939         {
940                 .procname       = "spin_retry",
941                 .data           = &spin_retry,
942                 .maxlen         = sizeof (int),
943                 .mode           = 0644,
944                 .proc_handler   = proc_dointvec,
945         },
946 #endif
947 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
948         {
949                 .procname       = "acpi_video_flags",
950                 .data           = &acpi_realmode_flags,
951                 .maxlen         = sizeof (unsigned long),
952                 .mode           = 0644,
953                 .proc_handler   = proc_doulongvec_minmax,
954         },
955 #endif
956 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
957         {
958                 .procname       = "ignore-unaligned-usertrap",
959                 .data           = &no_unaligned_warning,
960                 .maxlen         = sizeof (int),
961                 .mode           = 0644,
962                 .proc_handler   = proc_dointvec,
963         },
964 #endif
965 #ifdef CONFIG_IA64
966         {
967                 .procname       = "unaligned-dump-stack",
968                 .data           = &unaligned_dump_stack,
969                 .maxlen         = sizeof (int),
970                 .mode           = 0644,
971                 .proc_handler   = proc_dointvec,
972         },
973 #endif
974 #ifdef CONFIG_DETECT_HUNG_TASK
975         {
976                 .procname       = "hung_task_panic",
977                 .data           = &sysctl_hung_task_panic,
978                 .maxlen         = sizeof(int),
979                 .mode           = 0644,
980                 .proc_handler   = proc_dointvec_minmax,
981                 .extra1         = &zero,
982                 .extra2         = &one,
983         },
984         {
985                 .procname       = "hung_task_check_count",
986                 .data           = &sysctl_hung_task_check_count,
987                 .maxlen         = sizeof(int),
988                 .mode           = 0644,
989                 .proc_handler   = proc_dointvec_minmax,
990                 .extra1         = &zero,
991         },
992         {
993                 .procname       = "hung_task_timeout_secs",
994                 .data           = &sysctl_hung_task_timeout_secs,
995                 .maxlen         = sizeof(unsigned long),
996                 .mode           = 0644,
997                 .proc_handler   = proc_dohung_task_timeout_secs,
998         },
999         {
1000                 .procname       = "hung_task_warnings",
1001                 .data           = &sysctl_hung_task_warnings,
1002                 .maxlen         = sizeof(int),
1003                 .mode           = 0644,
1004                 .proc_handler   = proc_dointvec_minmax,
1005                 .extra1         = &neg_one,
1006         },
1007 #endif
1008 #ifdef CONFIG_COMPAT
1009         {
1010                 .procname       = "compat-log",
1011                 .data           = &compat_log,
1012                 .maxlen         = sizeof (int),
1013                 .mode           = 0644,
1014                 .proc_handler   = proc_dointvec,
1015         },
1016 #endif
1017 #ifdef CONFIG_RT_MUTEXES
1018         {
1019                 .procname       = "max_lock_depth",
1020                 .data           = &max_lock_depth,
1021                 .maxlen         = sizeof(int),
1022                 .mode           = 0644,
1023                 .proc_handler   = proc_dointvec,
1024         },
1025 #endif
1026         {
1027                 .procname       = "poweroff_cmd",
1028                 .data           = &poweroff_cmd,
1029                 .maxlen         = POWEROFF_CMD_PATH_LEN,
1030                 .mode           = 0644,
1031                 .proc_handler   = proc_dostring,
1032         },
1033 #ifdef CONFIG_KEYS
1034         {
1035                 .procname       = "keys",
1036                 .mode           = 0555,
1037                 .child          = key_sysctls,
1038         },
1039 #endif
1040 #ifdef CONFIG_RCU_TORTURE_TEST
1041         {
1042                 .procname       = "rcutorture_runnable",
1043                 .data           = &rcutorture_runnable,
1044                 .maxlen         = sizeof(int),
1045                 .mode           = 0644,
1046                 .proc_handler   = proc_dointvec,
1047         },
1048 #endif
1049 #ifdef CONFIG_PERF_EVENTS
1050         /*
1051          * User-space scripts rely on the existence of this file
1052          * as a feature check for perf_events being enabled.
1053          *
1054          * So it's an ABI, do not remove!
1055          */
1056         {
1057                 .procname       = "perf_event_paranoid",
1058                 .data           = &sysctl_perf_event_paranoid,
1059                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1060                 .mode           = 0644,
1061                 .proc_handler   = proc_dointvec,
1062         },
1063         {
1064                 .procname       = "perf_event_mlock_kb",
1065                 .data           = &sysctl_perf_event_mlock,
1066                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1067                 .mode           = 0644,
1068                 .proc_handler   = proc_dointvec,
1069         },
1070         {
1071                 .procname       = "perf_event_max_sample_rate",
1072                 .data           = &sysctl_perf_event_sample_rate,
1073                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1074                 .mode           = 0644,
1075                 .proc_handler   = perf_proc_update_handler,
1076                 .extra1         = &one,
1077         },
1078         {
1079                 .procname       = "perf_cpu_time_max_percent",
1080                 .data           = &sysctl_perf_cpu_time_max_percent,
1081                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1082                 .mode           = 0644,
1083                 .proc_handler   = perf_cpu_time_max_percent_handler,
1084                 .extra1         = &zero,
1085                 .extra2         = &one_hundred,
1086         },
1087 #endif
1088 #ifdef CONFIG_KMEMCHECK
1089         {
1090                 .procname       = "kmemcheck",
1091                 .data           = &kmemcheck_enabled,
1092                 .maxlen         = sizeof(int),
1093                 .mode           = 0644,
1094                 .proc_handler   = proc_dointvec,
1095         },
1096 #endif
1097 #ifdef CONFIG_BLOCK
1098         {
1099                 .procname       = "blk_iopoll",
1100                 .data           = &blk_iopoll_enabled,
1101                 .maxlen         = sizeof(int),
1102                 .mode           = 0644,
1103                 .proc_handler   = proc_dointvec,
1104         },
1105 #endif
1106         { }
1107 };
1108
1109 static struct ctl_table vm_table[] = {
1110         {
1111                 .procname       = "overcommit_memory",
1112                 .data           = &sysctl_overcommit_memory,
1113                 .maxlen         = sizeof(sysctl_overcommit_memory),
1114                 .mode           = 0644,
1115                 .proc_handler   = proc_dointvec_minmax,
1116                 .extra1         = &zero,
1117                 .extra2         = &two,
1118         },
1119         {
1120                 .procname       = "panic_on_oom",
1121                 .data           = &sysctl_panic_on_oom,
1122                 .maxlen         = sizeof(sysctl_panic_on_oom),
1123                 .mode           = 0644,
1124                 .proc_handler   = proc_dointvec_minmax,
1125                 .extra1         = &zero,
1126                 .extra2         = &two,
1127         },
1128         {
1129                 .procname       = "oom_kill_allocating_task",
1130                 .data           = &sysctl_oom_kill_allocating_task,
1131                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1132                 .mode           = 0644,
1133                 .proc_handler   = proc_dointvec,
1134         },
1135         {
1136                 .procname       = "oom_dump_tasks",
1137                 .data           = &sysctl_oom_dump_tasks,
1138                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1139                 .mode           = 0644,
1140                 .proc_handler   = proc_dointvec,
1141         },
1142         {
1143                 .procname       = "overcommit_ratio",
1144                 .data           = &sysctl_overcommit_ratio,
1145                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1146                 .mode           = 0644,
1147                 .proc_handler   = overcommit_ratio_handler,
1148         },
1149         {
1150                 .procname       = "overcommit_kbytes",
1151                 .data           = &sysctl_overcommit_kbytes,
1152                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
1153                 .mode           = 0644,
1154                 .proc_handler   = overcommit_kbytes_handler,
1155         },
1156         {
1157                 .procname       = "page-cluster", 
1158                 .data           = &page_cluster,
1159                 .maxlen         = sizeof(int),
1160                 .mode           = 0644,
1161                 .proc_handler   = proc_dointvec_minmax,
1162                 .extra1         = &zero,
1163         },
1164         {
1165                 .procname       = "dirty_background_ratio",
1166                 .data           = &dirty_background_ratio,
1167                 .maxlen         = sizeof(dirty_background_ratio),
1168                 .mode           = 0644,
1169                 .proc_handler   = dirty_background_ratio_handler,
1170                 .extra1         = &zero,
1171                 .extra2         = &one_hundred,
1172         },
1173         {
1174                 .procname       = "dirty_background_bytes",
1175                 .data           = &dirty_background_bytes,
1176                 .maxlen         = sizeof(dirty_background_bytes),
1177                 .mode           = 0644,
1178                 .proc_handler   = dirty_background_bytes_handler,
1179                 .extra1         = &one_ul,
1180         },
1181         {
1182                 .procname       = "dirty_ratio",
1183                 .data           = &vm_dirty_ratio,
1184                 .maxlen         = sizeof(vm_dirty_ratio),
1185                 .mode           = 0644,
1186                 .proc_handler   = dirty_ratio_handler,
1187                 .extra1         = &zero,
1188                 .extra2         = &one_hundred,
1189         },
1190         {
1191                 .procname       = "dirty_bytes",
1192                 .data           = &vm_dirty_bytes,
1193                 .maxlen         = sizeof(vm_dirty_bytes),
1194                 .mode           = 0644,
1195                 .proc_handler   = dirty_bytes_handler,
1196                 .extra1         = &dirty_bytes_min,
1197         },
1198         {
1199                 .procname       = "dirty_writeback_centisecs",
1200                 .data           = &dirty_writeback_interval,
1201                 .maxlen         = sizeof(dirty_writeback_interval),
1202                 .mode           = 0644,
1203                 .proc_handler   = dirty_writeback_centisecs_handler,
1204         },
1205         {
1206                 .procname       = "dirty_expire_centisecs",
1207                 .data           = &dirty_expire_interval,
1208                 .maxlen         = sizeof(dirty_expire_interval),
1209                 .mode           = 0644,
1210                 .proc_handler   = proc_dointvec_minmax,
1211                 .extra1         = &zero,
1212         },
1213         {
1214                 .procname       = "nr_pdflush_threads",
1215                 .mode           = 0444 /* read-only */,
1216                 .proc_handler   = pdflush_proc_obsolete,
1217         },
1218         {
1219                 .procname       = "swappiness",
1220                 .data           = &vm_swappiness,
1221                 .maxlen         = sizeof(vm_swappiness),
1222                 .mode           = 0644,
1223                 .proc_handler   = proc_dointvec_minmax,
1224                 .extra1         = &zero,
1225                 .extra2         = &one_hundred,
1226         },
1227 #ifdef CONFIG_HUGETLB_PAGE
1228         {
1229                 .procname       = "nr_hugepages",
1230                 .data           = NULL,
1231                 .maxlen         = sizeof(unsigned long),
1232                 .mode           = 0644,
1233                 .proc_handler   = hugetlb_sysctl_handler,
1234                 .extra1         = (void *)&hugetlb_zero,
1235                 .extra2         = (void *)&hugetlb_infinity,
1236         },
1237 #ifdef CONFIG_NUMA
1238         {
1239                 .procname       = "nr_hugepages_mempolicy",
1240                 .data           = NULL,
1241                 .maxlen         = sizeof(unsigned long),
1242                 .mode           = 0644,
1243                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1244                 .extra1         = (void *)&hugetlb_zero,
1245                 .extra2         = (void *)&hugetlb_infinity,
1246         },
1247 #endif
1248          {
1249                 .procname       = "hugetlb_shm_group",
1250                 .data           = &sysctl_hugetlb_shm_group,
1251                 .maxlen         = sizeof(gid_t),
1252                 .mode           = 0644,
1253                 .proc_handler   = proc_dointvec,
1254          },
1255          {
1256                 .procname       = "hugepages_treat_as_movable",
1257                 .data           = &hugepages_treat_as_movable,
1258                 .maxlen         = sizeof(int),
1259                 .mode           = 0644,
1260                 .proc_handler   = proc_dointvec,
1261         },
1262         {
1263                 .procname       = "nr_overcommit_hugepages",
1264                 .data           = NULL,
1265                 .maxlen         = sizeof(unsigned long),
1266                 .mode           = 0644,
1267                 .proc_handler   = hugetlb_overcommit_handler,
1268                 .extra1         = (void *)&hugetlb_zero,
1269                 .extra2         = (void *)&hugetlb_infinity,
1270         },
1271 #endif
1272         {
1273                 .procname       = "lowmem_reserve_ratio",
1274                 .data           = &sysctl_lowmem_reserve_ratio,
1275                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1276                 .mode           = 0644,
1277                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1278         },
1279         {
1280                 .procname       = "drop_caches",
1281                 .data           = &sysctl_drop_caches,
1282                 .maxlen         = sizeof(int),
1283                 .mode           = 0644,
1284                 .proc_handler   = drop_caches_sysctl_handler,
1285                 .extra1         = &one,
1286                 .extra2         = &three,
1287         },
1288 #ifdef CONFIG_COMPACTION
1289         {
1290                 .procname       = "compact_memory",
1291                 .data           = &sysctl_compact_memory,
1292                 .maxlen         = sizeof(int),
1293                 .mode           = 0200,
1294                 .proc_handler   = sysctl_compaction_handler,
1295         },
1296         {
1297                 .procname       = "extfrag_threshold",
1298                 .data           = &sysctl_extfrag_threshold,
1299                 .maxlen         = sizeof(int),
1300                 .mode           = 0644,
1301                 .proc_handler   = sysctl_extfrag_handler,
1302                 .extra1         = &min_extfrag_threshold,
1303                 .extra2         = &max_extfrag_threshold,
1304         },
1305
1306 #endif /* CONFIG_COMPACTION */
1307         {
1308                 .procname       = "min_free_kbytes",
1309                 .data           = &min_free_kbytes,
1310                 .maxlen         = sizeof(min_free_kbytes),
1311                 .mode           = 0644,
1312                 .proc_handler   = min_free_kbytes_sysctl_handler,
1313                 .extra1         = &zero,
1314         },
1315         {
1316                 .procname       = "percpu_pagelist_fraction",
1317                 .data           = &percpu_pagelist_fraction,
1318                 .maxlen         = sizeof(percpu_pagelist_fraction),
1319                 .mode           = 0644,
1320                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1321                 .extra1         = &min_percpu_pagelist_fract,
1322         },
1323 #ifdef CONFIG_MMU
1324         {
1325                 .procname       = "max_map_count",
1326                 .data           = &sysctl_max_map_count,
1327                 .maxlen         = sizeof(sysctl_max_map_count),
1328                 .mode           = 0644,
1329                 .proc_handler   = proc_dointvec_minmax,
1330                 .extra1         = &zero,
1331         },
1332 #else
1333         {
1334                 .procname       = "nr_trim_pages",
1335                 .data           = &sysctl_nr_trim_pages,
1336                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1337                 .mode           = 0644,
1338                 .proc_handler   = proc_dointvec_minmax,
1339                 .extra1         = &zero,
1340         },
1341 #endif
1342         {
1343                 .procname       = "laptop_mode",
1344                 .data           = &laptop_mode,
1345                 .maxlen         = sizeof(laptop_mode),
1346                 .mode           = 0644,
1347                 .proc_handler   = proc_dointvec_jiffies,
1348         },
1349         {
1350                 .procname       = "block_dump",
1351                 .data           = &block_dump,
1352                 .maxlen         = sizeof(block_dump),
1353                 .mode           = 0644,
1354                 .proc_handler   = proc_dointvec,
1355                 .extra1         = &zero,
1356         },
1357         {
1358                 .procname       = "vfs_cache_pressure",
1359                 .data           = &sysctl_vfs_cache_pressure,
1360                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1361                 .mode           = 0644,
1362                 .proc_handler   = proc_dointvec,
1363                 .extra1         = &zero,
1364         },
1365 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1366         {
1367                 .procname       = "legacy_va_layout",
1368                 .data           = &sysctl_legacy_va_layout,
1369                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1370                 .mode           = 0644,
1371                 .proc_handler   = proc_dointvec,
1372                 .extra1         = &zero,
1373         },
1374 #endif
1375 #ifdef CONFIG_NUMA
1376         {
1377                 .procname       = "zone_reclaim_mode",
1378                 .data           = &zone_reclaim_mode,
1379                 .maxlen         = sizeof(zone_reclaim_mode),
1380                 .mode           = 0644,
1381                 .proc_handler   = proc_dointvec,
1382                 .extra1         = &zero,
1383         },
1384         {
1385                 .procname       = "min_unmapped_ratio",
1386                 .data           = &sysctl_min_unmapped_ratio,
1387                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1388                 .mode           = 0644,
1389                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1390                 .extra1         = &zero,
1391                 .extra2         = &one_hundred,
1392         },
1393         {
1394                 .procname       = "min_slab_ratio",
1395                 .data           = &sysctl_min_slab_ratio,
1396                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1397                 .mode           = 0644,
1398                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1399                 .extra1         = &zero,
1400                 .extra2         = &one_hundred,
1401         },
1402 #endif
1403 #ifdef CONFIG_SMP
1404         {
1405                 .procname       = "stat_interval",
1406                 .data           = &sysctl_stat_interval,
1407                 .maxlen         = sizeof(sysctl_stat_interval),
1408                 .mode           = 0644,
1409                 .proc_handler   = proc_dointvec_jiffies,
1410         },
1411 #endif
1412 #ifdef CONFIG_MMU
1413         {
1414                 .procname       = "mmap_min_addr",
1415                 .data           = &dac_mmap_min_addr,
1416                 .maxlen         = sizeof(unsigned long),
1417                 .mode           = 0644,
1418                 .proc_handler   = mmap_min_addr_handler,
1419         },
1420 #endif
1421 #ifdef CONFIG_NUMA
1422         {
1423                 .procname       = "numa_zonelist_order",
1424                 .data           = &numa_zonelist_order,
1425                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1426                 .mode           = 0644,
1427                 .proc_handler   = numa_zonelist_order_handler,
1428         },
1429 #endif
1430 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1431    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1432         {
1433                 .procname       = "vdso_enabled",
1434                 .data           = &vdso_enabled,
1435                 .maxlen         = sizeof(vdso_enabled),
1436                 .mode           = 0644,
1437                 .proc_handler   = proc_dointvec,
1438                 .extra1         = &zero,
1439         },
1440 #endif
1441 #ifdef CONFIG_HIGHMEM
1442         {
1443                 .procname       = "highmem_is_dirtyable",
1444                 .data           = &vm_highmem_is_dirtyable,
1445                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1446                 .mode           = 0644,
1447                 .proc_handler   = proc_dointvec_minmax,
1448                 .extra1         = &zero,
1449                 .extra2         = &one,
1450         },
1451 #endif
1452         {
1453                 .procname       = "scan_unevictable_pages",
1454                 .data           = &scan_unevictable_pages,
1455                 .maxlen         = sizeof(scan_unevictable_pages),
1456                 .mode           = 0644,
1457                 .proc_handler   = scan_unevictable_handler,
1458         },
1459 #ifdef CONFIG_MEMORY_FAILURE
1460         {
1461                 .procname       = "memory_failure_early_kill",
1462                 .data           = &sysctl_memory_failure_early_kill,
1463                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1464                 .mode           = 0644,
1465                 .proc_handler   = proc_dointvec_minmax,
1466                 .extra1         = &zero,
1467                 .extra2         = &one,
1468         },
1469         {
1470                 .procname       = "memory_failure_recovery",
1471                 .data           = &sysctl_memory_failure_recovery,
1472                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1473                 .mode           = 0644,
1474                 .proc_handler   = proc_dointvec_minmax,
1475                 .extra1         = &zero,
1476                 .extra2         = &one,
1477         },
1478 #endif
1479         {
1480                 .procname       = "user_reserve_kbytes",
1481                 .data           = &sysctl_user_reserve_kbytes,
1482                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1483                 .mode           = 0644,
1484                 .proc_handler   = proc_doulongvec_minmax,
1485         },
1486         {
1487                 .procname       = "admin_reserve_kbytes",
1488                 .data           = &sysctl_admin_reserve_kbytes,
1489                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1490                 .mode           = 0644,
1491                 .proc_handler   = proc_doulongvec_minmax,
1492         },
1493         { }
1494 };
1495
1496 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1497 static struct ctl_table binfmt_misc_table[] = {
1498         { }
1499 };
1500 #endif
1501
1502 static struct ctl_table fs_table[] = {
1503         {
1504                 .procname       = "inode-nr",
1505                 .data           = &inodes_stat,
1506                 .maxlen         = 2*sizeof(long),
1507                 .mode           = 0444,
1508                 .proc_handler   = proc_nr_inodes,
1509         },
1510         {
1511                 .procname       = "inode-state",
1512                 .data           = &inodes_stat,
1513                 .maxlen         = 7*sizeof(long),
1514                 .mode           = 0444,
1515                 .proc_handler   = proc_nr_inodes,
1516         },
1517         {
1518                 .procname       = "file-nr",
1519                 .data           = &files_stat,
1520                 .maxlen         = sizeof(files_stat),
1521                 .mode           = 0444,
1522                 .proc_handler   = proc_nr_files,
1523         },
1524         {
1525                 .procname       = "file-max",
1526                 .data           = &files_stat.max_files,
1527                 .maxlen         = sizeof(files_stat.max_files),
1528                 .mode           = 0644,
1529                 .proc_handler   = proc_doulongvec_minmax,
1530         },
1531         {
1532                 .procname       = "nr_open",
1533                 .data           = &sysctl_nr_open,
1534                 .maxlen         = sizeof(int),
1535                 .mode           = 0644,
1536                 .proc_handler   = proc_dointvec_minmax,
1537                 .extra1         = &sysctl_nr_open_min,
1538                 .extra2         = &sysctl_nr_open_max,
1539         },
1540         {
1541                 .procname       = "dentry-state",
1542                 .data           = &dentry_stat,
1543                 .maxlen         = 6*sizeof(long),
1544                 .mode           = 0444,
1545                 .proc_handler   = proc_nr_dentry,
1546         },
1547         {
1548                 .procname       = "overflowuid",
1549                 .data           = &fs_overflowuid,
1550                 .maxlen         = sizeof(int),
1551                 .mode           = 0644,
1552                 .proc_handler   = proc_dointvec_minmax,
1553                 .extra1         = &minolduid,
1554                 .extra2         = &maxolduid,
1555         },
1556         {
1557                 .procname       = "overflowgid",
1558                 .data           = &fs_overflowgid,
1559                 .maxlen         = sizeof(int),
1560                 .mode           = 0644,
1561                 .proc_handler   = proc_dointvec_minmax,
1562                 .extra1         = &minolduid,
1563                 .extra2         = &maxolduid,
1564         },
1565 #ifdef CONFIG_FILE_LOCKING
1566         {
1567                 .procname       = "leases-enable",
1568                 .data           = &leases_enable,
1569                 .maxlen         = sizeof(int),
1570                 .mode           = 0644,
1571                 .proc_handler   = proc_dointvec,
1572         },
1573 #endif
1574 #ifdef CONFIG_DNOTIFY
1575         {
1576                 .procname       = "dir-notify-enable",
1577                 .data           = &dir_notify_enable,
1578                 .maxlen         = sizeof(int),
1579                 .mode           = 0644,
1580                 .proc_handler   = proc_dointvec,
1581         },
1582 #endif
1583 #ifdef CONFIG_MMU
1584 #ifdef CONFIG_FILE_LOCKING
1585         {
1586                 .procname       = "lease-break-time",
1587                 .data           = &lease_break_time,
1588                 .maxlen         = sizeof(int),
1589                 .mode           = 0644,
1590                 .proc_handler   = proc_dointvec,
1591         },
1592 #endif
1593 #ifdef CONFIG_AIO
1594         {
1595                 .procname       = "aio-nr",
1596                 .data           = &aio_nr,
1597                 .maxlen         = sizeof(aio_nr),
1598                 .mode           = 0444,
1599                 .proc_handler   = proc_doulongvec_minmax,
1600         },
1601         {
1602                 .procname       = "aio-max-nr",
1603                 .data           = &aio_max_nr,
1604                 .maxlen         = sizeof(aio_max_nr),
1605                 .mode           = 0644,
1606                 .proc_handler   = proc_doulongvec_minmax,
1607         },
1608 #endif /* CONFIG_AIO */
1609 #ifdef CONFIG_INOTIFY_USER
1610         {
1611                 .procname       = "inotify",
1612                 .mode           = 0555,
1613                 .child          = inotify_table,
1614         },
1615 #endif  
1616 #ifdef CONFIG_EPOLL
1617         {
1618                 .procname       = "epoll",
1619                 .mode           = 0555,
1620                 .child          = epoll_table,
1621         },
1622 #endif
1623 #endif
1624         {
1625                 .procname       = "protected_symlinks",
1626                 .data           = &sysctl_protected_symlinks,
1627                 .maxlen         = sizeof(int),
1628                 .mode           = 0600,
1629                 .proc_handler   = proc_dointvec_minmax,
1630                 .extra1         = &zero,
1631                 .extra2         = &one,
1632         },
1633         {
1634                 .procname       = "protected_hardlinks",
1635                 .data           = &sysctl_protected_hardlinks,
1636                 .maxlen         = sizeof(int),
1637                 .mode           = 0600,
1638                 .proc_handler   = proc_dointvec_minmax,
1639                 .extra1         = &zero,
1640                 .extra2         = &one,
1641         },
1642         {
1643                 .procname       = "suid_dumpable",
1644                 .data           = &suid_dumpable,
1645                 .maxlen         = sizeof(int),
1646                 .mode           = 0644,
1647                 .proc_handler   = proc_dointvec_minmax_coredump,
1648                 .extra1         = &zero,
1649                 .extra2         = &two,
1650         },
1651 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1652         {
1653                 .procname       = "binfmt_misc",
1654                 .mode           = 0555,
1655                 .child          = binfmt_misc_table,
1656         },
1657 #endif
1658         {
1659                 .procname       = "pipe-max-size",
1660                 .data           = &pipe_max_size,
1661                 .maxlen         = sizeof(int),
1662                 .mode           = 0644,
1663                 .proc_handler   = &pipe_proc_fn,
1664                 .extra1         = &pipe_min_size,
1665         },
1666         { }
1667 };
1668
1669 static struct ctl_table debug_table[] = {
1670 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1671         {
1672                 .procname       = "exception-trace",
1673                 .data           = &show_unhandled_signals,
1674                 .maxlen         = sizeof(int),
1675                 .mode           = 0644,
1676                 .proc_handler   = proc_dointvec
1677         },
1678 #endif
1679 #if defined(CONFIG_OPTPROBES)
1680         {
1681                 .procname       = "kprobes-optimization",
1682                 .data           = &sysctl_kprobes_optimization,
1683                 .maxlen         = sizeof(int),
1684                 .mode           = 0644,
1685                 .proc_handler   = proc_kprobes_optimization_handler,
1686                 .extra1         = &zero,
1687                 .extra2         = &one,
1688         },
1689 #endif
1690         { }
1691 };
1692
1693 static struct ctl_table dev_table[] = {
1694         { }
1695 };
1696
1697 int __init sysctl_init(void)
1698 {
1699         struct ctl_table_header *hdr;
1700
1701         hdr = register_sysctl_table(sysctl_base_table);
1702         kmemleak_not_leak(hdr);
1703         return 0;
1704 }
1705
1706 #endif /* CONFIG_SYSCTL */
1707
1708 /*
1709  * /proc/sys support
1710  */
1711
1712 #ifdef CONFIG_PROC_SYSCTL
1713
1714 static int _proc_do_string(void* data, int maxlen, int write,
1715                            void __user *buffer,
1716                            size_t *lenp, loff_t *ppos)
1717 {
1718         size_t len;
1719         char __user *p;
1720         char c;
1721
1722         if (!data || !maxlen || !*lenp) {
1723                 *lenp = 0;
1724                 return 0;
1725         }
1726
1727         if (write) {
1728                 len = 0;
1729                 p = buffer;
1730                 while (len < *lenp) {
1731                         if (get_user(c, p++))
1732                                 return -EFAULT;
1733                         if (c == 0 || c == '\n')
1734                                 break;
1735                         len++;
1736                 }
1737                 if (len >= maxlen)
1738                         len = maxlen-1;
1739                 if(copy_from_user(data, buffer, len))
1740                         return -EFAULT;
1741                 ((char *) data)[len] = 0;
1742                 *ppos += *lenp;
1743         } else {
1744                 len = strlen(data);
1745                 if (len > maxlen)
1746                         len = maxlen;
1747
1748                 if (*ppos > len) {
1749                         *lenp = 0;
1750                         return 0;
1751                 }
1752
1753                 data += *ppos;
1754                 len  -= *ppos;
1755
1756                 if (len > *lenp)
1757                         len = *lenp;
1758                 if (len)
1759                         if(copy_to_user(buffer, data, len))
1760                                 return -EFAULT;
1761                 if (len < *lenp) {
1762                         if(put_user('\n', ((char __user *) buffer) + len))
1763                                 return -EFAULT;
1764                         len++;
1765                 }
1766                 *lenp = len;
1767                 *ppos += len;
1768         }
1769         return 0;
1770 }
1771
1772 /**
1773  * proc_dostring - read a string sysctl
1774  * @table: the sysctl table
1775  * @write: %TRUE if this is a write to the sysctl file
1776  * @buffer: the user buffer
1777  * @lenp: the size of the user buffer
1778  * @ppos: file position
1779  *
1780  * Reads/writes a string from/to the user buffer. If the kernel
1781  * buffer provided is not large enough to hold the string, the
1782  * string is truncated. The copied string is %NULL-terminated.
1783  * If the string is being read by the user process, it is copied
1784  * and a newline '\n' is added. It is truncated if the buffer is
1785  * not large enough.
1786  *
1787  * Returns 0 on success.
1788  */
1789 int proc_dostring(struct ctl_table *table, int write,
1790                   void __user *buffer, size_t *lenp, loff_t *ppos)
1791 {
1792         return _proc_do_string(table->data, table->maxlen, write,
1793                                buffer, lenp, ppos);
1794 }
1795
1796 static size_t proc_skip_spaces(char **buf)
1797 {
1798         size_t ret;
1799         char *tmp = skip_spaces(*buf);
1800         ret = tmp - *buf;
1801         *buf = tmp;
1802         return ret;
1803 }
1804
1805 static void proc_skip_char(char **buf, size_t *size, const char v)
1806 {
1807         while (*size) {
1808                 if (**buf != v)
1809                         break;
1810                 (*size)--;
1811                 (*buf)++;
1812         }
1813 }
1814
1815 #define TMPBUFLEN 22
1816 /**
1817  * proc_get_long - reads an ASCII formatted integer from a user buffer
1818  *
1819  * @buf: a kernel buffer
1820  * @size: size of the kernel buffer
1821  * @val: this is where the number will be stored
1822  * @neg: set to %TRUE if number is negative
1823  * @perm_tr: a vector which contains the allowed trailers
1824  * @perm_tr_len: size of the perm_tr vector
1825  * @tr: pointer to store the trailer character
1826  *
1827  * In case of success %0 is returned and @buf and @size are updated with
1828  * the amount of bytes read. If @tr is non-NULL and a trailing
1829  * character exists (size is non-zero after returning from this
1830  * function), @tr is updated with the trailing character.
1831  */
1832 static int proc_get_long(char **buf, size_t *size,
1833                           unsigned long *val, bool *neg,
1834                           const char *perm_tr, unsigned perm_tr_len, char *tr)
1835 {
1836         int len;
1837         char *p, tmp[TMPBUFLEN];
1838
1839         if (!*size)
1840                 return -EINVAL;
1841
1842         len = *size;
1843         if (len > TMPBUFLEN - 1)
1844                 len = TMPBUFLEN - 1;
1845
1846         memcpy(tmp, *buf, len);
1847
1848         tmp[len] = 0;
1849         p = tmp;
1850         if (*p == '-' && *size > 1) {
1851                 *neg = true;
1852                 p++;
1853         } else
1854                 *neg = false;
1855         if (!isdigit(*p))
1856                 return -EINVAL;
1857
1858         *val = simple_strtoul(p, &p, 0);
1859
1860         len = p - tmp;
1861
1862         /* We don't know if the next char is whitespace thus we may accept
1863          * invalid integers (e.g. 1234...a) or two integers instead of one
1864          * (e.g. 123...1). So lets not allow such large numbers. */
1865         if (len == TMPBUFLEN - 1)
1866                 return -EINVAL;
1867
1868         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
1869                 return -EINVAL;
1870
1871         if (tr && (len < *size))
1872                 *tr = *p;
1873
1874         *buf += len;
1875         *size -= len;
1876
1877         return 0;
1878 }
1879
1880 /**
1881  * proc_put_long - converts an integer to a decimal ASCII formatted string
1882  *
1883  * @buf: the user buffer
1884  * @size: the size of the user buffer
1885  * @val: the integer to be converted
1886  * @neg: sign of the number, %TRUE for negative
1887  *
1888  * In case of success %0 is returned and @buf and @size are updated with
1889  * the amount of bytes written.
1890  */
1891 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
1892                           bool neg)
1893 {
1894         int len;
1895         char tmp[TMPBUFLEN], *p = tmp;
1896
1897         sprintf(p, "%s%lu", neg ? "-" : "", val);
1898         len = strlen(tmp);
1899         if (len > *size)
1900                 len = *size;
1901         if (copy_to_user(*buf, tmp, len))
1902                 return -EFAULT;
1903         *size -= len;
1904         *buf += len;
1905         return 0;
1906 }
1907 #undef TMPBUFLEN
1908
1909 static int proc_put_char(void __user **buf, size_t *size, char c)
1910 {
1911         if (*size) {
1912                 char __user **buffer = (char __user **)buf;
1913                 if (put_user(c, *buffer))
1914                         return -EFAULT;
1915                 (*size)--, (*buffer)++;
1916                 *buf = *buffer;
1917         }
1918         return 0;
1919 }
1920
1921 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
1922                                  int *valp,
1923                                  int write, void *data)
1924 {
1925         if (write) {
1926                 *valp = *negp ? -*lvalp : *lvalp;
1927         } else {
1928                 int val = *valp;
1929                 if (val < 0) {
1930                         *negp = true;
1931                         *lvalp = (unsigned long)-val;
1932                 } else {
1933                         *negp = false;
1934                         *lvalp = (unsigned long)val;
1935                 }
1936         }
1937         return 0;
1938 }
1939
1940 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
1941
1942 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
1943                   int write, void __user *buffer,
1944                   size_t *lenp, loff_t *ppos,
1945                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
1946                               int write, void *data),
1947                   void *data)
1948 {
1949         int *i, vleft, first = 1, err = 0;
1950         unsigned long page = 0;
1951         size_t left;
1952         char *kbuf;
1953         
1954         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
1955                 *lenp = 0;
1956                 return 0;
1957         }
1958         
1959         i = (int *) tbl_data;
1960         vleft = table->maxlen / sizeof(*i);
1961         left = *lenp;
1962
1963         if (!conv)
1964                 conv = do_proc_dointvec_conv;
1965
1966         if (write) {
1967                 if (left > PAGE_SIZE - 1)
1968                         left = PAGE_SIZE - 1;
1969                 page = __get_free_page(GFP_TEMPORARY);
1970                 kbuf = (char *) page;
1971                 if (!kbuf)
1972                         return -ENOMEM;
1973                 if (copy_from_user(kbuf, buffer, left)) {
1974                         err = -EFAULT;
1975                         goto free;
1976                 }
1977                 kbuf[left] = 0;
1978         }
1979
1980         for (; left && vleft--; i++, first=0) {
1981                 unsigned long lval;
1982                 bool neg;
1983
1984                 if (write) {
1985                         left -= proc_skip_spaces(&kbuf);
1986
1987                         if (!left)
1988                                 break;
1989                         err = proc_get_long(&kbuf, &left, &lval, &neg,
1990                                              proc_wspace_sep,
1991                                              sizeof(proc_wspace_sep), NULL);
1992                         if (err)
1993                                 break;
1994                         if (conv(&neg, &lval, i, 1, data)) {
1995                                 err = -EINVAL;
1996                                 break;
1997                         }
1998                 } else {
1999                         if (conv(&neg, &lval, i, 0, data)) {
2000                                 err = -EINVAL;
2001                                 break;
2002                         }
2003                         if (!first)
2004                                 err = proc_put_char(&buffer, &left, '\t');
2005                         if (err)
2006                                 break;
2007                         err = proc_put_long(&buffer, &left, lval, neg);
2008                         if (err)
2009                                 break;
2010                 }
2011         }
2012
2013         if (!write && !first && left && !err)
2014                 err = proc_put_char(&buffer, &left, '\n');
2015         if (write && !err && left)
2016                 left -= proc_skip_spaces(&kbuf);
2017 free:
2018         if (write) {
2019                 free_page(page);
2020                 if (first)
2021                         return err ? : -EINVAL;
2022         }
2023         *lenp -= left;
2024         *ppos += *lenp;
2025         return err;
2026 }
2027
2028 static int do_proc_dointvec(struct ctl_table *table, int write,
2029                   void __user *buffer, size_t *lenp, loff_t *ppos,
2030                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2031                               int write, void *data),
2032                   void *data)
2033 {
2034         return __do_proc_dointvec(table->data, table, write,
2035                         buffer, lenp, ppos, conv, data);
2036 }
2037
2038 /**
2039  * proc_dointvec - read a vector of integers
2040  * @table: the sysctl table
2041  * @write: %TRUE if this is a write to the sysctl file
2042  * @buffer: the user buffer
2043  * @lenp: the size of the user buffer
2044  * @ppos: file position
2045  *
2046  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2047  * values from/to the user buffer, treated as an ASCII string. 
2048  *
2049  * Returns 0 on success.
2050  */
2051 int proc_dointvec(struct ctl_table *table, int write,
2052                      void __user *buffer, size_t *lenp, loff_t *ppos)
2053 {
2054     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2055                             NULL,NULL);
2056 }
2057
2058 /*
2059  * Taint values can only be increased
2060  * This means we can safely use a temporary.
2061  */
2062 static int proc_taint(struct ctl_table *table, int write,
2063                                void __user *buffer, size_t *lenp, loff_t *ppos)
2064 {
2065         struct ctl_table t;
2066         unsigned long tmptaint = get_taint();
2067         int err;
2068
2069         if (write && !capable(CAP_SYS_ADMIN))
2070                 return -EPERM;
2071
2072         t = *table;
2073         t.data = &tmptaint;
2074         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2075         if (err < 0)
2076                 return err;
2077
2078         if (write) {
2079                 /*
2080                  * Poor man's atomic or. Not worth adding a primitive
2081                  * to everyone's atomic.h for this
2082                  */
2083                 int i;
2084                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2085                         if ((tmptaint >> i) & 1)
2086                                 add_taint(i, LOCKDEP_STILL_OK);
2087                 }
2088         }
2089
2090         return err;
2091 }
2092
2093 #ifdef CONFIG_PRINTK
2094 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2095                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2096 {
2097         if (write && !capable(CAP_SYS_ADMIN))
2098                 return -EPERM;
2099
2100         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2101 }
2102 #endif
2103
2104 struct do_proc_dointvec_minmax_conv_param {
2105         int *min;
2106         int *max;
2107 };
2108
2109 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2110                                         int *valp,
2111                                         int write, void *data)
2112 {
2113         struct do_proc_dointvec_minmax_conv_param *param = data;
2114         if (write) {
2115                 int val = *negp ? -*lvalp : *lvalp;
2116                 if ((param->min && *param->min > val) ||
2117                     (param->max && *param->max < val))
2118                         return -EINVAL;
2119                 *valp = val;
2120         } else {
2121                 int val = *valp;
2122                 if (val < 0) {
2123                         *negp = true;
2124                         *lvalp = (unsigned long)-val;
2125                 } else {
2126                         *negp = false;
2127                         *lvalp = (unsigned long)val;
2128                 }
2129         }
2130         return 0;
2131 }
2132
2133 /**
2134  * proc_dointvec_minmax - read a vector of integers with min/max values
2135  * @table: the sysctl table
2136  * @write: %TRUE if this is a write to the sysctl file
2137  * @buffer: the user buffer
2138  * @lenp: the size of the user buffer
2139  * @ppos: file position
2140  *
2141  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2142  * values from/to the user buffer, treated as an ASCII string.
2143  *
2144  * This routine will ensure the values are within the range specified by
2145  * table->extra1 (min) and table->extra2 (max).
2146  *
2147  * Returns 0 on success.
2148  */
2149 int proc_dointvec_minmax(struct ctl_table *table, int write,
2150                   void __user *buffer, size_t *lenp, loff_t *ppos)
2151 {
2152         struct do_proc_dointvec_minmax_conv_param param = {
2153                 .min = (int *) table->extra1,
2154                 .max = (int *) table->extra2,
2155         };
2156         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2157                                 do_proc_dointvec_minmax_conv, &param);
2158 }
2159
2160 static void validate_coredump_safety(void)
2161 {
2162 #ifdef CONFIG_COREDUMP
2163         if (suid_dumpable == SUID_DUMP_ROOT &&
2164             core_pattern[0] != '/' && core_pattern[0] != '|') {
2165                 printk(KERN_WARNING "Unsafe core_pattern used with "\
2166                         "suid_dumpable=2. Pipe handler or fully qualified "\
2167                         "core dump path required.\n");
2168         }
2169 #endif
2170 }
2171
2172 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2173                 void __user *buffer, size_t *lenp, loff_t *ppos)
2174 {
2175         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2176         if (!error)
2177                 validate_coredump_safety();
2178         return error;
2179 }
2180
2181 #ifdef CONFIG_COREDUMP
2182 static int proc_dostring_coredump(struct ctl_table *table, int write,
2183                   void __user *buffer, size_t *lenp, loff_t *ppos)
2184 {
2185         int error = proc_dostring(table, write, buffer, lenp, ppos);
2186         if (!error)
2187                 validate_coredump_safety();
2188         return error;
2189 }
2190 #endif
2191
2192 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2193                                      void __user *buffer,
2194                                      size_t *lenp, loff_t *ppos,
2195                                      unsigned long convmul,
2196                                      unsigned long convdiv)
2197 {
2198         unsigned long *i, *min, *max;
2199         int vleft, first = 1, err = 0;
2200         unsigned long page = 0;
2201         size_t left;
2202         char *kbuf;
2203
2204         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2205                 *lenp = 0;
2206                 return 0;
2207         }
2208
2209         i = (unsigned long *) data;
2210         min = (unsigned long *) table->extra1;
2211         max = (unsigned long *) table->extra2;
2212         vleft = table->maxlen / sizeof(unsigned long);
2213         left = *lenp;
2214
2215         if (write) {
2216                 if (left > PAGE_SIZE - 1)
2217                         left = PAGE_SIZE - 1;
2218                 page = __get_free_page(GFP_TEMPORARY);
2219                 kbuf = (char *) page;
2220                 if (!kbuf)
2221                         return -ENOMEM;
2222                 if (copy_from_user(kbuf, buffer, left)) {
2223                         err = -EFAULT;
2224                         goto free;
2225                 }
2226                 kbuf[left] = 0;
2227         }
2228
2229         for (; left && vleft--; i++, first = 0) {
2230                 unsigned long val;
2231
2232                 if (write) {
2233                         bool neg;
2234
2235                         left -= proc_skip_spaces(&kbuf);
2236
2237                         err = proc_get_long(&kbuf, &left, &val, &neg,
2238                                              proc_wspace_sep,
2239                                              sizeof(proc_wspace_sep), NULL);
2240                         if (err)
2241                                 break;
2242                         if (neg)
2243                                 continue;
2244                         if ((min && val < *min) || (max && val > *max))
2245                                 continue;
2246                         *i = val;
2247                 } else {
2248                         val = convdiv * (*i) / convmul;
2249                         if (!first) {
2250                                 err = proc_put_char(&buffer, &left, '\t');
2251                                 if (err)
2252                                         break;
2253                         }
2254                         err = proc_put_long(&buffer, &left, val, false);
2255                         if (err)
2256                                 break;
2257                 }
2258         }
2259
2260         if (!write && !first && left && !err)
2261                 err = proc_put_char(&buffer, &left, '\n');
2262         if (write && !err)
2263                 left -= proc_skip_spaces(&kbuf);
2264 free:
2265         if (write) {
2266                 free_page(page);
2267                 if (first)
2268                         return err ? : -EINVAL;
2269         }
2270         *lenp -= left;
2271         *ppos += *lenp;
2272         return err;
2273 }
2274
2275 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2276                                      void __user *buffer,
2277                                      size_t *lenp, loff_t *ppos,
2278                                      unsigned long convmul,
2279                                      unsigned long convdiv)
2280 {
2281         return __do_proc_doulongvec_minmax(table->data, table, write,
2282                         buffer, lenp, ppos, convmul, convdiv);
2283 }
2284
2285 /**
2286  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2287  * @table: the sysctl table
2288  * @write: %TRUE if this is a write to the sysctl file
2289  * @buffer: the user buffer
2290  * @lenp: the size of the user buffer
2291  * @ppos: file position
2292  *
2293  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2294  * values from/to the user buffer, treated as an ASCII string.
2295  *
2296  * This routine will ensure the values are within the range specified by
2297  * table->extra1 (min) and table->extra2 (max).
2298  *
2299  * Returns 0 on success.
2300  */
2301 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2302                            void __user *buffer, size_t *lenp, loff_t *ppos)
2303 {
2304     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2305 }
2306
2307 /**
2308  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2309  * @table: the sysctl table
2310  * @write: %TRUE if this is a write to the sysctl file
2311  * @buffer: the user buffer
2312  * @lenp: the size of the user buffer
2313  * @ppos: file position
2314  *
2315  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2316  * values from/to the user buffer, treated as an ASCII string. The values
2317  * are treated as milliseconds, and converted to jiffies when they are stored.
2318  *
2319  * This routine will ensure the values are within the range specified by
2320  * table->extra1 (min) and table->extra2 (max).
2321  *
2322  * Returns 0 on success.
2323  */
2324 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2325                                       void __user *buffer,
2326                                       size_t *lenp, loff_t *ppos)
2327 {
2328     return do_proc_doulongvec_minmax(table, write, buffer,
2329                                      lenp, ppos, HZ, 1000l);
2330 }
2331
2332
2333 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2334                                          int *valp,
2335                                          int write, void *data)
2336 {
2337         if (write) {
2338                 if (*lvalp > LONG_MAX / HZ)
2339                         return 1;
2340                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2341         } else {
2342                 int val = *valp;
2343                 unsigned long lval;
2344                 if (val < 0) {
2345                         *negp = true;
2346                         lval = (unsigned long)-val;
2347                 } else {
2348                         *negp = false;
2349                         lval = (unsigned long)val;
2350                 }
2351                 *lvalp = lval / HZ;
2352         }
2353         return 0;
2354 }
2355
2356 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2357                                                 int *valp,
2358                                                 int write, void *data)
2359 {
2360         if (write) {
2361                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2362                         return 1;
2363                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2364         } else {
2365                 int val = *valp;
2366                 unsigned long lval;
2367                 if (val < 0) {
2368                         *negp = true;
2369                         lval = (unsigned long)-val;
2370                 } else {
2371                         *negp = false;
2372                         lval = (unsigned long)val;
2373                 }
2374                 *lvalp = jiffies_to_clock_t(lval);
2375         }
2376         return 0;
2377 }
2378
2379 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2380                                             int *valp,
2381                                             int write, void *data)
2382 {
2383         if (write) {
2384                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2385
2386                 if (jif > INT_MAX)
2387                         return 1;
2388                 *valp = (int)jif;
2389         } else {
2390                 int val = *valp;
2391                 unsigned long lval;
2392                 if (val < 0) {
2393                         *negp = true;
2394                         lval = (unsigned long)-val;
2395                 } else {
2396                         *negp = false;
2397                         lval = (unsigned long)val;
2398                 }
2399                 *lvalp = jiffies_to_msecs(lval);
2400         }
2401         return 0;
2402 }
2403
2404 /**
2405  * proc_dointvec_jiffies - read a vector of integers as seconds
2406  * @table: the sysctl table
2407  * @write: %TRUE if this is a write to the sysctl file
2408  * @buffer: the user buffer
2409  * @lenp: the size of the user buffer
2410  * @ppos: file position
2411  *
2412  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2413  * values from/to the user buffer, treated as an ASCII string. 
2414  * The values read are assumed to be in seconds, and are converted into
2415  * jiffies.
2416  *
2417  * Returns 0 on success.
2418  */
2419 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2420                           void __user *buffer, size_t *lenp, loff_t *ppos)
2421 {
2422     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2423                             do_proc_dointvec_jiffies_conv,NULL);
2424 }
2425
2426 /**
2427  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2428  * @table: the sysctl table
2429  * @write: %TRUE if this is a write to the sysctl file
2430  * @buffer: the user buffer
2431  * @lenp: the size of the user buffer
2432  * @ppos: pointer to the file position
2433  *
2434  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2435  * values from/to the user buffer, treated as an ASCII string. 
2436  * The values read are assumed to be in 1/USER_HZ seconds, and 
2437  * are converted into jiffies.
2438  *
2439  * Returns 0 on success.
2440  */
2441 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2442                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2443 {
2444     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2445                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2446 }
2447
2448 /**
2449  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2450  * @table: the sysctl table
2451  * @write: %TRUE if this is a write to the sysctl file
2452  * @buffer: the user buffer
2453  * @lenp: the size of the user buffer
2454  * @ppos: file position
2455  * @ppos: the current position in the file
2456  *
2457  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2458  * values from/to the user buffer, treated as an ASCII string. 
2459  * The values read are assumed to be in 1/1000 seconds, and 
2460  * are converted into jiffies.
2461  *
2462  * Returns 0 on success.
2463  */
2464 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2465                              void __user *buffer, size_t *lenp, loff_t *ppos)
2466 {
2467         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2468                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2469 }
2470
2471 static int proc_do_cad_pid(struct ctl_table *table, int write,
2472                            void __user *buffer, size_t *lenp, loff_t *ppos)
2473 {
2474         struct pid *new_pid;
2475         pid_t tmp;
2476         int r;
2477
2478         tmp = pid_vnr(cad_pid);
2479
2480         r = __do_proc_dointvec(&tmp, table, write, buffer,
2481                                lenp, ppos, NULL, NULL);
2482         if (r || !write)
2483                 return r;
2484
2485         new_pid = find_get_pid(tmp);
2486         if (!new_pid)
2487                 return -ESRCH;
2488
2489         put_pid(xchg(&cad_pid, new_pid));
2490         return 0;
2491 }
2492
2493 /**
2494  * proc_do_large_bitmap - read/write from/to a large bitmap
2495  * @table: the sysctl table
2496  * @write: %TRUE if this is a write to the sysctl file
2497  * @buffer: the user buffer
2498  * @lenp: the size of the user buffer
2499  * @ppos: file position
2500  *
2501  * The bitmap is stored at table->data and the bitmap length (in bits)
2502  * in table->maxlen.
2503  *
2504  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2505  * large bitmaps may be represented in a compact manner. Writing into
2506  * the file will clear the bitmap then update it with the given input.
2507  *
2508  * Returns 0 on success.
2509  */
2510 int proc_do_large_bitmap(struct ctl_table *table, int write,
2511                          void __user *buffer, size_t *lenp, loff_t *ppos)
2512 {
2513         int err = 0;
2514         bool first = 1;
2515         size_t left = *lenp;
2516         unsigned long bitmap_len = table->maxlen;
2517         unsigned long *bitmap = (unsigned long *) table->data;
2518         unsigned long *tmp_bitmap = NULL;
2519         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2520
2521         if (!bitmap_len || !left || (*ppos && !write)) {
2522                 *lenp = 0;
2523                 return 0;
2524         }
2525
2526         if (write) {
2527                 unsigned long page = 0;
2528                 char *kbuf;
2529
2530                 if (left > PAGE_SIZE - 1)
2531                         left = PAGE_SIZE - 1;
2532
2533                 page = __get_free_page(GFP_TEMPORARY);
2534                 kbuf = (char *) page;
2535                 if (!kbuf)
2536                         return -ENOMEM;
2537                 if (copy_from_user(kbuf, buffer, left)) {
2538                         free_page(page);
2539                         return -EFAULT;
2540                 }
2541                 kbuf[left] = 0;
2542
2543                 tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2544                                      GFP_KERNEL);
2545                 if (!tmp_bitmap) {
2546                         free_page(page);
2547                         return -ENOMEM;
2548                 }
2549                 proc_skip_char(&kbuf, &left, '\n');
2550                 while (!err && left) {
2551                         unsigned long val_a, val_b;
2552                         bool neg;
2553
2554                         err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2555                                              sizeof(tr_a), &c);
2556                         if (err)
2557                                 break;
2558                         if (val_a >= bitmap_len || neg) {
2559                                 err = -EINVAL;
2560                                 break;
2561                         }
2562
2563                         val_b = val_a;
2564                         if (left) {
2565                                 kbuf++;
2566                                 left--;
2567                         }
2568
2569                         if (c == '-') {
2570                                 err = proc_get_long(&kbuf, &left, &val_b,
2571                                                      &neg, tr_b, sizeof(tr_b),
2572                                                      &c);
2573                                 if (err)
2574                                         break;
2575                                 if (val_b >= bitmap_len || neg ||
2576                                     val_a > val_b) {
2577                                         err = -EINVAL;
2578                                         break;
2579                                 }
2580                                 if (left) {
2581                                         kbuf++;
2582                                         left--;
2583                                 }
2584                         }
2585
2586                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
2587                         first = 0;
2588                         proc_skip_char(&kbuf, &left, '\n');
2589                 }
2590                 free_page(page);
2591         } else {
2592                 unsigned long bit_a, bit_b = 0;
2593
2594                 while (left) {
2595                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2596                         if (bit_a >= bitmap_len)
2597                                 break;
2598                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
2599                                                    bit_a + 1) - 1;
2600
2601                         if (!first) {
2602                                 err = proc_put_char(&buffer, &left, ',');
2603                                 if (err)
2604                                         break;
2605                         }
2606                         err = proc_put_long(&buffer, &left, bit_a, false);
2607                         if (err)
2608                                 break;
2609                         if (bit_a != bit_b) {
2610                                 err = proc_put_char(&buffer, &left, '-');
2611                                 if (err)
2612                                         break;
2613                                 err = proc_put_long(&buffer, &left, bit_b, false);
2614                                 if (err)
2615                                         break;
2616                         }
2617
2618                         first = 0; bit_b++;
2619                 }
2620                 if (!err)
2621                         err = proc_put_char(&buffer, &left, '\n');
2622         }
2623
2624         if (!err) {
2625                 if (write) {
2626                         if (*ppos)
2627                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2628                         else
2629                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
2630                 }
2631                 kfree(tmp_bitmap);
2632                 *lenp -= left;
2633                 *ppos += *lenp;
2634                 return 0;
2635         } else {
2636                 kfree(tmp_bitmap);
2637                 return err;
2638         }
2639 }
2640
2641 #else /* CONFIG_PROC_SYSCTL */
2642
2643 int proc_dostring(struct ctl_table *table, int write,
2644                   void __user *buffer, size_t *lenp, loff_t *ppos)
2645 {
2646         return -ENOSYS;
2647 }
2648
2649 int proc_dointvec(struct ctl_table *table, int write,
2650                   void __user *buffer, size_t *lenp, loff_t *ppos)
2651 {
2652         return -ENOSYS;
2653 }
2654
2655 int proc_dointvec_minmax(struct ctl_table *table, int write,
2656                     void __user *buffer, size_t *lenp, loff_t *ppos)
2657 {
2658         return -ENOSYS;
2659 }
2660
2661 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2662                     void __user *buffer, size_t *lenp, loff_t *ppos)
2663 {
2664         return -ENOSYS;
2665 }
2666
2667 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2668                     void __user *buffer, size_t *lenp, loff_t *ppos)
2669 {
2670         return -ENOSYS;
2671 }
2672
2673 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2674                              void __user *buffer, size_t *lenp, loff_t *ppos)
2675 {
2676         return -ENOSYS;
2677 }
2678
2679 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2680                     void __user *buffer, size_t *lenp, loff_t *ppos)
2681 {
2682         return -ENOSYS;
2683 }
2684
2685 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2686                                       void __user *buffer,
2687                                       size_t *lenp, loff_t *ppos)
2688 {
2689     return -ENOSYS;
2690 }
2691
2692
2693 #endif /* CONFIG_PROC_SYSCTL */
2694
2695 /*
2696  * No sense putting this after each symbol definition, twice,
2697  * exception granted :-)
2698  */
2699 EXPORT_SYMBOL(proc_dointvec);
2700 EXPORT_SYMBOL(proc_dointvec_jiffies);
2701 EXPORT_SYMBOL(proc_dointvec_minmax);
2702 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2703 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2704 EXPORT_SYMBOL(proc_dostring);
2705 EXPORT_SYMBOL(proc_doulongvec_minmax);
2706 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);