Merge remote-tracking branch 'origin/clustered' into aks_dev_clus
[projects/modsched/linux.git] / kernel / process_server.c
1  /**
2  * Implements task migration and maintains coherent 
3  * address spaces across CPU cores.
4  *
5  * David G. Katz
6  */
7
8 #include <linux/mcomm.h> // IPC
9 #include <linux/kthread.h>
10 #include <linux/export.h>
11 #include <linux/delay.h>
12 #include <linux/smp.h>
13 #include <linux/sched.h>
14 #include <linux/threads.h> // NR_CPUS
15 #include <linux/kmod.h>
16 #include <linux/path.h>
17 #include <linux/mount.h>
18 #include <linux/fs.h>
19 #include <linux/fs_struct.h>
20 #include <linux/file.h>
21 #include <linux/fdtable.h>
22 #include <linux/slab.h>
23 #include <linux/process_server.h>
24 #include <linux/mm.h>
25 #include <linux/io.h> // ioremap
26 #include <linux/mman.h> // MAP_ANONYMOUS
27 #include <linux/pcn_kmsg.h> // Messaging
28 #include <linux/pcn_perf.h> // performance measurement
29 #include <linux/string.h>
30
31 #include <linux/popcorn_cpuinfo.h>
32
33 #include <asm/pgtable.h>
34 #include <asm/atomic.h>
35 #include <asm/tlbflush.h>
36 #include <asm/cacheflush.h>
37 #include <asm/uaccess.h> // USER_DS
38 #include <asm/prctl.h> // prctl
39 #include <asm/proto.h> // do_arch_prctl
40 #include <asm/msr.h> // wrmsr_safe
41 #include <asm/mmu_context.h>
42 #include <asm/processor.h> // load_cr3
43
44 unsigned long get_percpu_old_rsp(void);
45
46 #include <linux/futex.h>
47 #define  NSIG 32
48
49 #include<linux/signal.h>
50 #include <linux/fcntl.h>
51 #include "futex_remote.h"
52 /**
53  * General purpose configuration
54  */
55
56 // Flag indiciating whether or not to migrate the entire virtual 
57 // memory space when a migration occurs.  
58 #define COPY_WHOLE_VM_WITH_MIGRATION 1
59
60 // Flag indicating whether or not to migrate file-backed executable
61 // pages when a fault occurs accessing executable memory.  When this
62 // flag is 1, those pages will be migrated.  When it is 0, the local
63 // file-system will be consulted instead.
64 #define MIGRATE_EXECUTABLE_PAGES_ON_DEMAND 1
65
66 // The maximum number of contiguously physical mapped regions to 
67 // migrate in response to a mapping query.
68 #define MAX_MAPPINGS 1
69
70 extern sys_topen(const char __user * filename, int flags, int mode, int fd);
71 /**
72  * Use the preprocessor to turn off printk.
73  */
74 #define PROCESS_SERVER_VERBOSE 0
75 #if PROCESS_SERVER_VERBOSE
76 #define PSPRINTK(...) printk(__VA_ARGS__)
77 #else
78 #define PSPRINTK(...) ;
79 #endif
80
81 #define PROCESS_SERVER_INSTRUMENT_LOCK 0
82 #if PROCESS_SERVER_VERBOSE && PROCESS_SERVER_INSTRUMENT_LOCK
83 #define PS_SPIN_LOCK(x) PSPRINTK("Acquiring spin lock in %s at line %d\n",__func__,__LINE__); \
84                        spin_lock(x); \
85                        PSPRINTK("Done acquiring spin lock in %s at line %d\n",__func__,__LINE__)
86 #define PS_SPIN_UNLOCK(x) PSPRINTK("Releasing spin lock in %s at line %d\n",__func__,__LINE__); \
87                           spin_unlock(x); \
88                           PSPRINTK("Done releasing spin lock in %s at line %d\n",__func__,__LINE__)
89 #define PS_DOWN_READ(x) PSPRINTK("Acquiring read lock in %s at line %d\n",__func__,__LINE__); \
90                         down_read(x); \
91                         PSPRINTK("Done acquiring read lock in %s at line %d\n",__func__,__LINE__)
92 #define PS_UP_READ(x) PSPRINTK("Releasing read lock in %s at line %d\n",__func__,__LINE__); \
93                       up_read(x); \
94                       PSPRINTK("Done releasing read lock in %s at line %d\n",__func__,__LINE__)
95 #define PS_DOWN_WRITE(x) PSPRINTK("Acquiring write lock in %s at line %d\n",__func__,__LINE__); \
96                          down_write(x); \
97                          PSPRINTK("Done acquiring write lock in %s at line %d\n",__func__,__LINE__)
98 #define PS_UP_WRITE(x) PSPRINTK("Releasing read write in %s at line %d\n",__func__,__LINE__); \
99                        up_write(x); \
100                        PSPRINTK("Done releasing write lock in %s at line %d\n",__func__,__LINE__)
101
102
103 #else
104 #define PS_SPIN_LOCK(x) spin_lock(x)
105 #define PS_SPIN_UNLOCK(x) spin_unlock(x)
106 #define PS_DOWN_READ(x) down_read(x)
107 #define PS_UP_READ(x) up_read(x)
108 #define PS_DOWN_WRITE(x) down_write(x)
109 #define PS_UP_WRITE(x) up_write(x)
110 #endif
111
112 /**
113  * Library data type definitions
114  */
115 #define PROCESS_SERVER_DATA_TYPE_TEST 0
116 #define PROCESS_SERVER_VMA_DATA_TYPE 1
117 #define PROCESS_SERVER_PTE_DATA_TYPE 2
118 #define PROCESS_SERVER_CLONE_DATA_TYPE 3
119 #define PROCESS_SERVER_MAPPING_REQUEST_DATA_TYPE 4
120 #define PROCESS_SERVER_MUNMAP_REQUEST_DATA_TYPE 5
121 #define PROCESS_SERVER_MM_DATA_TYPE 6
122 #define PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE 7
123 #define PROCESS_SERVER_MPROTECT_DATA_TYPE 8
124
125 /**
126  * Useful macros
127  */
128 #define DO_UNTIL_SUCCESS(x) while(x != 0){}
129
130 /**
131  * Perf
132  */
133 #ifdef CONFIG_POPCORN_PERF
134 #define PERF_INIT() perf_init()
135 #define PERF_MEASURE_START(x) perf_measure_start(x)
136 #define PERF_MEASURE_STOP(x,y,z)  perf_measure_stop(x,y,z)
137
138 pcn_perf_context_t perf_count_remote_thread_members;
139 pcn_perf_context_t perf_process_back_migration;
140 pcn_perf_context_t perf_process_mapping_request;
141 pcn_perf_context_t perf_process_mapping_request_search_active_mm;
142 pcn_perf_context_t perf_process_mapping_request_search_saved_mm;
143 pcn_perf_context_t perf_process_mapping_request_do_lookup;
144 pcn_perf_context_t perf_process_mapping_request_transmit;
145 pcn_perf_context_t perf_process_mapping_response;
146 pcn_perf_context_t perf_process_tgroup_closed_item;
147 pcn_perf_context_t perf_process_exit_item;
148 pcn_perf_context_t perf_process_mprotect_item;
149 pcn_perf_context_t perf_process_munmap_request;
150 pcn_perf_context_t perf_process_munmap_response;
151 pcn_perf_context_t perf_process_server_try_handle_mm_fault;
152 pcn_perf_context_t perf_process_server_import_address_space;
153 pcn_perf_context_t perf_process_server_do_exit;
154 pcn_perf_context_t perf_process_server_do_munmap;
155 pcn_perf_context_t perf_process_server_do_migration;
156 pcn_perf_context_t perf_process_server_do_mprotect;
157 pcn_perf_context_t perf_process_server_notify_delegated_subprocess_starting;
158 pcn_perf_context_t perf_handle_thread_group_exit_notification;
159 pcn_perf_context_t perf_handle_remote_thread_count_response;
160 pcn_perf_context_t perf_handle_remote_thread_count_request;
161 pcn_perf_context_t perf_handle_munmap_response;
162 pcn_perf_context_t perf_handle_munmap_request;
163 pcn_perf_context_t perf_handle_mapping_response;
164 pcn_perf_context_t perf_handle_mapping_request;
165 pcn_perf_context_t perf_handle_pte_transfer;
166 pcn_perf_context_t perf_handle_vma_transfer;
167 pcn_perf_context_t perf_handle_exiting_process_notification;
168 pcn_perf_context_t perf_handle_process_pairing_request;
169 pcn_perf_context_t perf_handle_clone_request;
170 pcn_perf_context_t perf_handle_mprotect_response;
171 pcn_perf_context_t perf_handle_mprotect_request;
172
173 /**
174  *
175  */
176 static void perf_init(void) {
177    perf_init_context(&perf_count_remote_thread_members,
178            "count_remote_thread_members");
179    perf_init_context(&perf_process_back_migration,
180            "process_back_migration");
181    perf_init_context(&perf_process_mapping_request,
182            "process_mapping_request");
183    perf_init_context(&perf_process_mapping_request_search_active_mm,
184            "process_mapping_request_search_active_mm");
185    perf_init_context(&perf_process_mapping_request_search_saved_mm,
186            "process_mapping_request_search_saved_mm");
187    perf_init_context(&perf_process_mapping_request_do_lookup,
188            "process_mapping_request_do_lookup");
189    perf_init_context(&perf_process_mapping_request_transmit,
190            "process_mapping_request_transmit");
191    perf_init_context(&perf_process_mapping_response,
192            "process_mapping_response");
193    perf_init_context(&perf_process_tgroup_closed_item,
194            "process_tgroup_closed_item");
195    perf_init_context(&perf_process_exit_item,
196            "process_exit_item");
197    perf_init_context(&perf_process_mprotect_item,
198            "process_mprotect_item");
199    perf_init_context(&perf_process_munmap_request,
200            "process_munmap_request");
201    perf_init_context(&perf_process_munmap_response,
202            "process_munmap_response");
203    perf_init_context(&perf_process_server_try_handle_mm_fault,
204            "process_server_try_handle_mm_fault");
205    perf_init_context(&perf_process_server_import_address_space,
206            "process_server_import_address_space");
207    perf_init_context(&perf_process_server_do_exit,
208            "process_server_do_exit");
209    perf_init_context(&perf_process_server_do_munmap,
210            "process_server_do_munmap");
211    perf_init_context(&perf_process_server_do_migration,
212            "process_server_do_migration");
213    perf_init_context(&perf_process_server_do_mprotect,
214            "process_server_do_mprotect");
215    perf_init_context(&perf_process_server_notify_delegated_subprocess_starting,
216            "process_server_notify_delegated_subprocess_starting");
217    perf_init_context(&perf_handle_thread_group_exit_notification,
218            "handle_thread_group_exit_notification");
219    perf_init_context(&perf_handle_remote_thread_count_response,
220            "handle_remote_thread_count_response");
221    perf_init_context(&perf_handle_remote_thread_count_request,
222            "handle_remote_thread_count_request");
223    perf_init_context(&perf_handle_munmap_response,
224            "handle_munmap_response");
225    perf_init_context(&perf_handle_munmap_request,
226            "handle_munmap_request");
227    perf_init_context(&perf_handle_mapping_response,
228            "handle_mapping_response");
229    perf_init_context(&perf_handle_mapping_request,
230            "handle_mapping_request");
231    perf_init_context(&perf_handle_pte_transfer,
232            "handle_pte_transfer");
233    perf_init_context(&perf_handle_vma_transfer,
234            "handle_vma_transfer");
235    perf_init_context(&perf_handle_exiting_process_notification,
236            "handle_exiting_process_notification");
237    perf_init_context(&perf_handle_process_pairing_request,
238            "handle_process_pairing_request");
239    perf_init_context(&perf_handle_clone_request,
240            "handle_clone_request");
241    perf_init_context(&perf_handle_mprotect_request,
242            "handle_mprotect_request");
243    perf_init_context(&perf_handle_mprotect_response,
244            "handle_mprotect_resonse");
245
246 }
247 #else /* CONFIG_POPCORN_PERF */
248 #define PERF_INIT() 
249 #define PERF_MEASURE_START(x) -1
250 #define PERF_MEASURE_STOP(x, y, z)
251 #endif
252
253
254 static DECLARE_WAIT_QUEUE_HEAD( countq);
255
256 /**
257  * Library
258  */
259
260 #define POPCORN_MAX_PATH 512
261
262 /**
263  * Some piping for linking data entries
264  * and identifying data entry types.
265  */
266 typedef struct _data_header {
267     struct _data_header* next;
268     struct _data_header* prev;
269     int data_type;
270 } data_header_t;
271
272 /**
273  * Hold data about a pte to vma mapping.
274  */
275 typedef struct _pte_data {
276     data_header_t header;
277     int vma_id;
278     int clone_request_id;
279     int cpu;
280     unsigned long vaddr_start;
281     unsigned long paddr_start;
282     size_t sz;
283 } pte_data_t;
284
285 /**
286  * Hold data about a vma to process
287  * mapping.
288  */
289 typedef struct _vma_data {
290     data_header_t header;
291     spinlock_t lock;
292     unsigned long start;
293     unsigned long end;
294     int clone_request_id;
295     int cpu;
296     unsigned long flags;
297     int vma_id;
298     pgprot_t prot;
299     unsigned long pgoff;
300     pte_data_t* pte_list;
301     int mmapping_in_progress;
302     char path[256];
303 } vma_data_t;
304
305 typedef struct _contiguous_physical_mapping {
306     unsigned char present;
307     unsigned long vaddr;
308     unsigned long paddr;
309     size_t sz;
310 } contiguous_physical_mapping_t;
311
312 /**
313  *
314  */
315 typedef struct _clone_data {
316     data_header_t header;
317     spinlock_t lock;
318     int clone_request_id;
319     int requesting_cpu;
320     char exe_path[512];
321     unsigned long clone_flags;
322     unsigned long stack_start;
323     unsigned long stack_ptr;
324     unsigned long env_start;
325     unsigned long env_end;
326     unsigned long arg_start;
327     unsigned long arg_end;
328     unsigned long heap_start;
329     unsigned long heap_end;
330     unsigned long data_start;
331     unsigned long data_end;
332     struct pt_regs regs;
333     int placeholder_pid;
334     int placeholder_tgid;
335     int placeholder_cpu;
336     unsigned long thread_fs;
337     unsigned long thread_gs;
338     unsigned long thread_sp0;
339     unsigned long thread_sp;
340     unsigned long thread_usersp;
341     unsigned short thread_es;
342     unsigned short thread_ds;
343     unsigned short thread_fsindex;
344     unsigned short thread_gsindex;
345     int tgroup_home_cpu;
346     int tgroup_home_id;
347     int t_home_cpu;
348     int t_home_id;
349     int prio, static_prio, normal_prio; //from sched.c
350         unsigned int rt_priority; //from sched.c
351         int sched_class; //from sched.c but here we are using SCHED_NORMAL, SCHED_FIFO, etc.
352     unsigned long previous_cpus;
353     vma_data_t* vma_list;
354     vma_data_t* pending_vma_list;
355     /*mklinux_akshay*/int origin_pid;
356     sigset_t remote_blocked, remote_real_blocked;
357     sigset_t remote_saved_sigmask;
358     struct sigpending remote_pending;
359     unsigned long sas_ss_sp;
360     size_t sas_ss_size;
361     struct k_sigaction action[_NSIG];
362 } clone_data_t;
363
364 /**
365  * 
366  */
367 typedef struct _mapping_request_data {
368     data_header_t header;
369     int tgroup_home_cpu;
370     int tgroup_home_id;
371     int requester_pid;
372     unsigned long address;
373     unsigned long vaddr_start;
374     unsigned long vaddr_size;
375     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
376     pgprot_t prot;
377     unsigned long vm_flags;
378     unsigned char present;
379     unsigned char complete;
380     unsigned char from_saved_mm;
381     int responses;
382     int expected_responses;
383     unsigned long pgoff;
384     spinlock_t lock;
385     char path[512];
386 } mapping_request_data_t;
387
388 /**
389  *
390  */
391 typedef struct _munmap_request_data {
392     data_header_t header;
393     int tgroup_home_cpu;
394     int tgroup_home_id;
395     int requester_pid;
396     unsigned long vaddr_start;
397     unsigned long vaddr_size;
398     int responses;
399     int expected_responses;
400     spinlock_t lock;
401 } munmap_request_data_t;
402
403 /**
404  *
405  */
406 typedef struct _remote_thread_count_request_data {
407     data_header_t header;
408     int tgroup_home_cpu;
409     int tgroup_home_id;
410     int requester_pid;
411     int responses;
412     int expected_responses;
413     int count;
414     spinlock_t lock;
415 } remote_thread_count_request_data_t;
416
417 /**
418  *
419  */
420 typedef struct _mm_data {
421     data_header_t header;
422     int tgroup_home_cpu;
423     int tgroup_home_id;
424     struct mm_struct* mm;
425 } mm_data_t;
426
427 typedef struct _mprotect_data {
428     data_header_t header;
429     int tgroup_home_cpu;
430     int tgroup_home_id;
431     int requester_pid;
432     unsigned long start;
433     int responses;
434     int expected_responses;
435     spinlock_t lock;
436 } mprotect_data_t;
437
438 /**
439  * This message is sent to a remote cpu in order to 
440  * ask it to spin up a process on behalf of the
441  * requesting cpu.  Some of these fields may go
442  * away in the near future.
443  */
444 typedef struct _clone_request {
445     struct pcn_kmsg_hdr header;
446     int clone_request_id;
447     unsigned long clone_flags;
448     unsigned long stack_start;
449     unsigned long stack_ptr;
450     unsigned long env_start;
451     unsigned long env_end;
452     unsigned long arg_start;
453     unsigned long arg_end;
454     unsigned long heap_start;
455     unsigned long heap_end;
456     unsigned long data_start;
457     unsigned long data_end;
458     struct pt_regs regs;
459     char exe_path[512];
460     int placeholder_pid;
461     int placeholder_tgid;
462     unsigned long thread_fs;
463     unsigned long thread_gs;
464     unsigned long thread_sp0;
465     unsigned long thread_sp;
466     unsigned long thread_usersp;
467     unsigned short thread_es;
468     unsigned short thread_ds;
469     unsigned short thread_fsindex;
470     unsigned short thread_gsindex;
471     int tgroup_home_cpu;
472     int tgroup_home_id;
473     int t_home_cpu;
474     int t_home_id;
475     int prio, static_prio, normal_prio; //from sched.c
476         unsigned int rt_priority; //from sched.c
477         int sched_class; //from sched.c but here we are using SCHED_NORMAL, SCHED_FIFO, etc.
478     /*mklinux_akshay*/int origin_pid;
479     sigset_t remote_blocked, remote_real_blocked;
480     sigset_t remote_saved_sigmask;
481     struct sigpending remote_pending;
482     unsigned long sas_ss_sp;
483     size_t sas_ss_size;
484     struct k_sigaction action[_NSIG];
485     unsigned long previous_cpus;
486 } clone_request_t;
487
488 /**
489  * This message is sent in response to a clone request.
490  * Its purpose is to notify the requesting cpu that
491  * the specified pid is executing on behalf of the
492  * requesting cpu.
493  */
494 typedef struct _create_process_pairing {
495     struct pcn_kmsg_hdr header;
496     int your_pid; // PID of cpu receiving this pairing request
497     int my_pid;   // PID of cpu transmitting this pairing request
498 } create_process_pairing_t;
499
500 /**
501  * This message informs the remote cpu of delegated
502  * process death.  This occurs whether the process
503  * is a placeholder or a delegate locally.
504  */
505 struct _exiting_process {
506     struct pcn_kmsg_hdr header;
507     int t_home_cpu;             // 4
508     int t_home_id;              // 4
509     int my_pid;                 // 4
510     int is_last_tgroup_member;  // 4+
511                                 // ---
512                                 // 16 -> 44 bytes of padding needed
513     char pad[44];
514 } __attribute__((packed)) __attribute__((aligned(64)));  
515 typedef struct _exiting_process exiting_process_t;
516
517 /**
518  *
519  */
520 struct _exiting_group {
521     struct pcn_kmsg_hdr header;
522     int tgroup_home_cpu;        // 4
523     int tgroup_home_id;         // 4
524                                 // ---
525                                 // 8 -> 52 bytes of padding needed
526     char pad[52];
527 } __attribute__((packed)) __attribute__((aligned(64)));
528 typedef struct _exiting_group exiting_group_t;
529
530 /**
531  * Inform remote cpu of a vma to process mapping.
532  */
533 typedef struct _vma_transfer {
534     struct pcn_kmsg_hdr header;
535     int vma_id;
536     int clone_request_id;
537     unsigned long start;
538     unsigned long end;
539     pgprot_t prot;
540     unsigned long flags;
541     unsigned long pgoff;
542     char path[256];
543 } vma_transfer_t;
544
545 /**
546  * Inform remote cpu of a pte to vma mapping.
547  */
548 struct _pte_transfer {
549     struct pcn_kmsg_hdr header;
550     int vma_id;                  //  4
551     int clone_request_id;        //  4
552     unsigned long vaddr_start;   //  8
553     unsigned long paddr_start;   //  8
554     size_t sz;                   //  4 +
555                                  //  ---
556                                  //  28 -> 32 bytes of padding needed
557     char pad[32];
558 } __attribute__((packed)) __attribute__((aligned(64)));
559
560 typedef struct _pte_transfer pte_transfer_t;
561
562 /**
563  *
564  */
565 struct _mapping_request {
566     struct pcn_kmsg_hdr header;
567     int tgroup_home_cpu;        // 4
568     int tgroup_home_id;         // 4
569     int requester_pid;          // 4
570     unsigned long address;      // 8
571                                 // ---
572                                 // 20 -> 40 bytes of padding needed
573     char pad[40];
574
575 } __attribute__((packed)) __attribute__((aligned(64)));
576
577 typedef struct _mapping_request mapping_request_t;
578
579 /*
580  * type = PCN_KMSG_TYPE_PROC_SRV_THREAD_GROUP_EXITED_NOTIFICATION
581  */
582 struct _thread_group_exited_notification {
583     struct pcn_kmsg_hdr header;
584     int tgroup_home_cpu;        // 4
585     int tgroup_home_id;         // 4
586                                 // ---
587                                 // 8 -> 52 bytes of padding needed
588     char pad[52];
589 } __attribute__((packed)) __attribute__((aligned(64)));
590 typedef struct _thread_group_exited_notification thread_group_exited_notification_t;
591
592
593 /**
594  *
595  */
596 struct _mapping_response {
597     struct pcn_kmsg_hdr header;
598     int tgroup_home_cpu;        
599     int tgroup_home_id; 
600     int requester_pid;
601     unsigned char present;      
602     unsigned char from_saved_mm;
603     unsigned long address;      
604     unsigned long vaddr_start;
605     unsigned long vaddr_size;
606     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
607     pgprot_t prot;              
608     unsigned long vm_flags;     
609     unsigned long pgoff;
610     char path[512]; // save to last so we can cut
611                     // off data when possible.
612 };
613 typedef struct _mapping_response mapping_response_t;
614
615 /**
616  * This is a hack to eliminate the overhead of sending
617  * an entire mapping_response_t when there is no mapping.
618  * The overhead is due to the size of the message, which
619  * requires the _long pcn_kmsg variant to be used.
620  */
621 struct _nonpresent_mapping_response {
622     struct pcn_kmsg_hdr header;
623     int tgroup_home_cpu;        // 4
624     int tgroup_home_id;         // 4
625     int requester_pid;            // 4
626     unsigned long address;      // 8
627                                 // ---
628                                 // 20 -> 40 bytes of padding needed
629     char pad[40];
630
631 } __attribute__((packed)) __attribute__((aligned(64)));
632 typedef struct _nonpresent_mapping_response nonpresent_mapping_response_t;
633
634 /**
635  *
636  */
637 struct _munmap_request {
638     struct pcn_kmsg_hdr header;
639     int tgroup_home_cpu;         // 4
640     int tgroup_home_id;          // 4
641     int requester_pid;           // 4
642     unsigned long vaddr_start;   // 8
643     unsigned long vaddr_size;    // 8
644                                  // ---
645                                  // 28 -> 32 bytes of padding needed
646     char pad[32];
647 } __attribute__((packed)) __attribute__((aligned(64)));
648 typedef struct _munmap_request munmap_request_t;
649
650 /**
651  *
652  */
653 struct _munmap_response {
654     struct pcn_kmsg_hdr header;
655     int tgroup_home_cpu;        // 4
656     int tgroup_home_id;         // 4
657     int requester_pid;          // 4
658     unsigned long vaddr_start;  // 8
659     unsigned long vaddr_size;   // 8+
660                                 // ---
661                                 // 28 -> 32 bytes of padding needed
662     char pad[32];
663 } __attribute__((packed)) __attribute__((aligned(64)));
664 typedef struct _munmap_response munmap_response_t;
665
666 /**
667  *
668  */
669 struct _remote_thread_count_request {
670     struct pcn_kmsg_hdr header;
671     int tgroup_home_cpu;        // 4
672     int tgroup_home_id;         // 4
673     int requester_pid;          // 4
674                                 // ---
675                                 // 12 -> 48 bytes of padding needed
676     char pad[48];
677 } __attribute__((packed)) __attribute__((aligned(64)));
678 typedef struct _remote_thread_count_request remote_thread_count_request_t;
679
680 /**
681  *
682  */
683 struct _remote_thread_count_response {
684     struct pcn_kmsg_hdr header;
685     int tgroup_home_cpu;        // 4
686     int tgroup_home_id;         // 4
687     int requester_pid;        // 4
688     int count;                  // 4
689                                 // ---
690                                 // 16 -> 44 bytes of padding needed
691     char pad[44];
692 } __attribute__((packed)) __attribute__((aligned(64)));
693 typedef struct _remote_thread_count_response remote_thread_count_response_t;
694
695 /**
696  *
697  */
698 struct _mprotect_request {
699     struct pcn_kmsg_hdr header; 
700     int tgroup_home_cpu;        // 4
701     int tgroup_home_id;         // 4
702     int requester_pid;          // 4
703     unsigned long start;        // 8
704     size_t len;                 // 4
705     unsigned long prot;         // 8
706                                 // ---
707                                 // 32 -> 28 bytes of padding needed
708     char pad[28];
709 } __attribute__((packed)) __attribute__((aligned(64)));
710 typedef struct _mprotect_request mprotect_request_t;
711
712 /**
713  *
714  */
715 struct _mprotect_response {
716     struct pcn_kmsg_hdr header;
717     int tgroup_home_cpu;        // 4
718     int tgroup_home_id;         // 4
719     int requester_pid;          // 4
720     unsigned long start;        // 8
721                                 // ---
722                                 // 20 -> 40 bytes of padding needed
723     char pad[40];
724 } __attribute__((packed)) __attribute__((aligned(64)));
725 typedef struct _mprotect_response mprotect_response_t;
726
727 /**
728  *
729  */
730 typedef struct _back_migration {
731     struct pcn_kmsg_hdr header;
732     int tgroup_home_cpu;
733     int tgroup_home_id;
734     int t_home_cpu;
735     int t_home_id;
736     unsigned long previous_cpus;
737     struct pt_regs regs;
738     unsigned long thread_fs;
739     unsigned long thread_gs;
740     unsigned long thread_usersp;
741     unsigned short thread_es;
742     unsigned short thread_ds;
743     unsigned short thread_fsindex;
744     unsigned short thread_gsindex;
745 } back_migration_t;
746
747 /**
748  *
749  */
750 typedef struct _deconstruction_data {
751     int clone_request_id;
752     int vma_id;
753     int dst_cpu;
754 } deconstruction_data_t;
755
756 /**
757  *
758  */
759 typedef struct {
760     struct work_struct work;
761     struct task_struct *task;
762     pid_t pid;
763     int t_home_cpu;
764     int t_home_id;
765     int is_last_tgroup_member;
766     struct pt_regs regs;
767     unsigned long thread_fs;
768     unsigned long thread_gs;
769     unsigned long thread_sp0;
770     unsigned long thread_sp;
771     unsigned long thread_usersp;
772     unsigned short thread_es;
773     unsigned short thread_ds;
774     unsigned short thread_fsindex;
775     unsigned short thread_gsindex;
776 } exit_work_t;
777
778 /**
779  *
780  */
781 typedef struct {
782     struct work_struct work;
783     int tgroup_home_cpu;
784     int tgroup_home_id;
785 } group_exit_work_t;
786
787 /**
788  *
789  */
790 typedef struct {
791     struct work_struct work;
792     int tgroup_home_cpu;
793     int tgroup_home_id;
794     int requester_pid;
795     unsigned long address;
796     int from_cpu;
797 } mapping_request_work_t;
798
799 /**
800  *
801  */
802 typedef struct {
803     struct work_struct work;
804     int tgroup_home_cpu;
805     int tgroup_home_id;
806     int requester_pid;
807     unsigned char from_saved_mm;
808     unsigned long address;      
809     unsigned char present;      
810     unsigned long vaddr_mapping;
811     unsigned long vaddr_start;
812     unsigned long vaddr_size;
813     unsigned long paddr_mapping;
814     size_t paddr_mapping_sz;
815     pgprot_t prot;              
816     unsigned long vm_flags;     
817     char path[512];
818     unsigned long pgoff;
819     int from_cpu;
820 } mapping_response_work_t;
821
822 /**
823  *
824  */
825 typedef struct {
826     struct work_struct work;
827     int tgroup_home_cpu;
828     int tgroup_home_id;
829     int requester_pid;
830     unsigned long address;
831     int from_cpu;
832 } nonpresent_mapping_response_work_t;
833
834 /**
835  *
836  */
837 typedef struct {
838     struct work_struct work;
839     int tgroup_home_cpu;
840     int tgroup_home_id;
841 } tgroup_closed_work_t;
842
843 /**
844  *
845  */
846 typedef struct {
847     struct work_struct work;
848     int tgroup_home_cpu;
849     int tgroup_home_id;
850     int requester_pid;
851     unsigned long vaddr_start;
852     unsigned long vaddr_size;
853     int from_cpu;
854 } munmap_request_work_t;
855
856 /**
857  *
858  */
859 typedef struct {
860     struct work_struct work;
861     int tgroup_home_cpu;
862     int tgroup_home_id;
863     int requester_pid;
864     unsigned long vaddr_start;
865     unsigned long vaddr_size;
866 } munmap_response_work_t;
867
868 /**
869  * 
870  */
871 typedef struct {
872     struct work_struct work;
873     int tgroup_home_cpu;
874     int tgroup_home_id;
875     int requester_pid;
876     unsigned long start;
877     size_t len;
878     unsigned long prot;
879     int from_cpu;
880 } mprotect_work_t;
881
882 /**
883  *
884  */
885 typedef struct {
886     struct work_struct work;
887     int tgroup_home_cpu;
888     int tgroup_home_id;
889     int requester_pid;
890     int from_cpu;
891 } remote_thread_count_request_work_t;
892
893 /**
894  *
895  */
896 typedef struct {
897     struct work_struct work;
898     int tgroup_home_cpu;
899     int tgroup_home_id;
900     int t_home_cpu;
901     int t_home_id;
902     unsigned long previous_cpus;
903     struct pt_regs regs;
904     unsigned long thread_fs;
905     unsigned long thread_gs;
906     unsigned long thread_usersp;
907     unsigned short thread_es;
908     unsigned short thread_ds;
909     unsigned short thread_fsindex;
910     unsigned short thread_gsindex;
911 } back_migration_work_t;
912
913
914 /**
915  * Prototypes
916  */
917 static int handle_clone_request(struct pcn_kmsg_message* msg);
918 long process_server_clone(unsigned long clone_flags,
919                           unsigned long stack_start,                                                                                                                   
920                           struct pt_regs *regs,
921                           unsigned long stack_size,
922                           struct task_struct* task);
923 static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_start);
924 static clone_data_t* find_clone_data(int cpu, int clone_request_id);
925 static void dump_mm(struct mm_struct* mm);
926 static void dump_task(struct task_struct* task,struct pt_regs* regs,unsigned long stack_ptr);
927 static void dump_thread(struct thread_struct* thread);
928 static void dump_regs(struct pt_regs* regs);
929 static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr); 
930
931 /**
932  * Prototypes from parts of the kernel that I modified or made available to external
933  * modules.
934  */
935 // I removed the 'static' modifier in mm/memory.c for do_wp_page so I could use it 
936 // here.
937 int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
938                unsigned long address, pte_t *page_table, pmd_t *pmd,
939                spinlock_t *ptl, pte_t orig_pte);
940 int do_mprotect(struct task_struct* task, unsigned long start, size_t len, unsigned long prot, int do_remote);
941
942 /**
943  * Module variables
944  */
945 static int _vma_id = 0;
946 static int _clone_request_id = 0;
947 static int _cpu = -1;
948 static unsigned long long perf_a, perf_b, perf_c, perf_d, perf_e;
949 data_header_t* _saved_mm_head = NULL;             // Saved MM list
950 DEFINE_SPINLOCK(_saved_mm_head_lock);             // Lock for _saved_mm_head
951 data_header_t* _mapping_request_data_head = NULL; // Mapping request data head
952 DEFINE_SPINLOCK(_mapping_request_data_head_lock);  // Lock for above
953 data_header_t* _count_remote_tmembers_data_head = NULL;
954 DEFINE_SPINLOCK(_count_remote_tmembers_data_head_lock);
955 data_header_t* _munmap_data_head = NULL;
956 DEFINE_SPINLOCK(_munmap_data_head_lock);
957 data_header_t* _mprotect_data_head = NULL;
958 DEFINE_SPINLOCK(_mprotect_data_head_lock);
959 data_header_t* _data_head = NULL;                 // General purpose data store
960 DEFINE_SPINLOCK(_data_head_lock);                 // Lock for _data_head
961 DEFINE_SPINLOCK(_vma_id_lock);                    // Lock for _vma_id
962 DEFINE_SPINLOCK(_clone_request_id_lock);          // Lock for _clone_request_id
963 struct rw_semaphore _import_sem;
964 DEFINE_SPINLOCK(_remap_lock);
965
966
967 // Work Queues
968 static struct workqueue_struct *clone_wq;
969 static struct workqueue_struct *exit_wq;
970 static struct workqueue_struct *mapping_wq;
971
972 /**
973  * General helper functions and debugging tools
974  */
975
976 /**
977  * TODO
978  */
979 static bool __user_addr (unsigned long x ) {
980     return (x < PAGE_OFFSET);   
981 }
982
983 // TODO the cpu_has_known_tgroup_mm must be reworked, i.e. the map must be pointed by the threads NOT one copy per thread, anti scaling and redudandt information
984 /**
985  *
986  */
987 static int cpu_has_known_tgroup_mm(int cpu)
988 {
989 #ifdef SUPPORT_FOR_CLUSTERING
990     struct list_head *iter;
991     _remote_cpu_info_list_t *objPtr;
992     struct cpumask *pcpum =0;
993     int cpuid =-1;
994     if (cpumask_test_cpu(cpu, cpu_present_mask))
995         return 1;
996     list_for_each(iter, &rlist_head) {
997         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
998         cpuid = objPtr->_data._processor;
999         pcpum = &(objPtr->_data._cpumask);
1000         if (cpumask_test_cpu(cpu, pcpum)) {
1001             if ( bitmap_intersects(cpumask_bits(pcpum),
1002                                    &(current->known_cpu_with_tgroup_mm),
1003                                    (sizeof(unsigned long) *8)) ) {
1004                 return 1;
1005             }
1006             return 0;
1007         }
1008     }
1009     printk(KERN_ERR"%s: ERROR the input cpu (%d) is not included in any known cpu cluster\n",
1010                 __func__, cpu);
1011     return 0;
1012 #else
1013     if(test_bit(cpu,&current->known_cpu_with_tgroup_mm)) {
1014         return 1;
1015     }
1016     return 0;
1017 #endif
1018 }
1019
1020 /**
1021  *
1022  */
1023 static void set_cpu_has_known_tgroup_mm(struct task_struct *task,int cpu) {
1024     struct task_struct *me = task;
1025     struct task_struct *t = me;
1026     do {
1027         set_bit(cpu,&t->known_cpu_with_tgroup_mm);
1028     } while_each_thread(me, t);
1029 }
1030
1031 /**
1032  * @brief find_vma does not always return the correct vm_area_struct*.
1033  * If it fails to find a vma for the specified address, it instead
1034  * returns the closest one in the rb list.  This function looks
1035  * for this failure, and returns NULL in this error condition.
1036  * Otherwise, it returns a pointer to the struct vm_area_struct
1037  * containing the specified address.
1038  */
1039 static struct vm_area_struct* find_vma_checked(struct mm_struct* mm, unsigned long address) {
1040     struct vm_area_struct* vma = find_vma(mm,address&PAGE_MASK);
1041     if( vma == NULL ||
1042         (vma->vm_start > (address & PAGE_MASK)) ||
1043         (vma->vm_end <= address) ) {
1044         
1045         vma = NULL;
1046     }
1047
1048     return vma;
1049 }
1050
1051 /**
1052  * Note, mm->mmap_sem must already be held!
1053  */
1054 /*static int is_mapped(struct mm_struct* mm, unsigned vaddr) {
1055     pte_t* pte = NULL;
1056     pmd_t* pmd = NULL;
1057     pud_t* pud = NULL;
1058     pgd_t* pgd = NULL;
1059     int ret = 0;
1060
1061     pgd = pgd_offset(mm, vaddr);
1062     if(pgd_present(*pgd) && pgd_present(*pgd)) {
1063         pud = pud_offset(pgd,vaddr); 
1064         if(pud_present(*pud)) {
1065             pmd = pmd_offset(pud,vaddr);
1066             if(pmd_present(*pmd)) {
1067                 pte = pte_offset_map(pmd,vaddr);
1068                 if(pte && !pte_none(*pte)) {
1069                     // It exists!
1070                     ret = 1;
1071                 }
1072             }
1073         }
1074     }
1075     return ret;
1076
1077 }*/
1078 /* Antonio's Version
1079 static int is_mapped(struct mm_struct* mm, unsigned vaddr)
1080 {
1081     pte_t* pte = NULL;
1082     pmd_t* pmd = NULL;                                                             
1083     pud_t* pud = NULL;                                                             
1084     pgd_t* pgd = NULL; 
1085
1086     pgd = pgd_offset(mm, vaddr);                                                   
1087     if (pgd && !pgd_none(*pgd) && likely(!pgd_bad(*pgd)) && pgd_present(*pgd)) {
1088       pud = pud_offset(pgd,vaddr);                                               
1089       if (pud && !pud_none(*pud) && likely(!pud_bad(*pud)) && pud_present(*pud)) {
1090         pmd = pmd_offset(pud,vaddr);
1091         if(pmd && !pmd_none(*pmd) && likely(!pmd_bad(*pmd)) && pmd_present(*pmd)) {                      pte = pte_offset_map(pmd,vaddr);                                   
1092           if(pte && !pte_none(*pte) && pte_present(*pte)) { 
1093                    // It exists!                                                  
1094                     return 1;
1095           }                                                                  
1096         }                                                                      
1097       }                                                                          
1098     }
1099     return 0;                                                                                  }
1100 */
1101
1102 /**
1103  * @brief Find the mm_struct for a given distributed thread.  
1104  * If one does not exist, then return NULL.
1105  */
1106 static struct mm_struct* find_thread_mm(
1107         int tgroup_home_cpu, 
1108         int tgroup_home_id, 
1109         mm_data_t **used_saved_mm,
1110         struct task_struct** task_out)
1111 {
1112
1113     struct task_struct *task, *g;
1114     struct mm_struct * mm = NULL;
1115     data_header_t* data_curr;
1116     mm_data_t* mm_data;
1117     unsigned long lockflags;
1118
1119     *used_saved_mm = NULL;
1120     *task_out = NULL;
1121
1122     // First, look through all active processes.
1123     do_each_thread(g,task) {
1124         if(task->tgroup_home_cpu == tgroup_home_cpu &&
1125            task->tgroup_home_id  == tgroup_home_id) {
1126             mm = task->mm;
1127             *task_out = task;
1128             *used_saved_mm = NULL;
1129             goto out;
1130         }
1131     } while_each_thread(g,task);
1132
1133     // Failing that, look through saved mm's.
1134     spin_lock_irqsave(&_saved_mm_head_lock,lockflags);
1135     data_curr = _saved_mm_head;
1136     while(data_curr) {
1137
1138         mm_data = (mm_data_t*)data_curr;
1139     
1140         if((mm_data->tgroup_home_cpu == tgroup_home_cpu) &&
1141            (mm_data->tgroup_home_id  == tgroup_home_id)) {
1142             mm = mm_data->mm;
1143             *used_saved_mm = mm_data;
1144             break;
1145         }
1146
1147         data_curr = data_curr->next;
1148
1149     } // while
1150
1151     spin_unlock_irqrestore(&_saved_mm_head_lock,lockflags);
1152
1153
1154 out:
1155     return mm;
1156 }
1157
1158 /**
1159  * @brief A best effort at making a page writable
1160  * @return void
1161  */
1162 static void mk_page_writable(struct mm_struct* mm,
1163                              struct vm_area_struct* vma,
1164                              unsigned long vaddr) {
1165     spinlock_t* ptl;
1166     pte_t *ptep, pte, entry;
1167      
1168     // Grab the pte, and lock it     
1169     ptep = get_locked_pte(mm, vaddr, &ptl);
1170     if (!ptep)
1171         goto out;
1172
1173     // grab the contents of the pte pointer
1174     pte = *ptep;
1175     
1176     if(pte_none(*ptep)) {
1177         pte_unmap_unlock(pte,ptl);
1178         goto out;
1179     }
1180
1181     arch_enter_lazy_mmu_mode();
1182
1183     // Make the content copy writable and dirty, then
1184     // write it back into the page tables.
1185     entry = pte_mkwrite(pte_mkdirty(pte));
1186     set_pte_at(mm, vaddr, ptep, entry);
1187
1188     update_mmu_cache(vma, vaddr, ptep);
1189
1190     arch_leave_lazy_mmu_mode();
1191
1192     // Unlock the pte
1193     pte_unmap_unlock(pte, ptl);
1194 out:
1195     return;
1196 }
1197
1198 /**
1199  * @brief Check to see if a given page is writable.
1200  * @return 0 if not writable or error, not zero otherwise
1201  */
1202 static int is_page_writable(struct mm_struct* mm,
1203                             struct vm_area_struct* vma,
1204                             unsigned long addr) {
1205     spinlock_t* ptl;
1206     pte_t *ptep, pte;
1207     int ret = 0;
1208
1209     ptep = get_locked_pte(mm,addr,&ptl);
1210     if(!ptep)
1211         goto out;
1212
1213     pte = *ptep;
1214     
1215     if(pte_none(*ptep)) {
1216         pte_unmap_unlock(*ptep,ptl);
1217         ret = -1;
1218         goto out;
1219     }
1220
1221     ret = pte_write(pte);
1222
1223     pte_unmap_unlock(pte, ptl);
1224
1225 out:
1226     return ret;
1227 }
1228
1229 /**
1230  * @brief Get the clone data associated with the current task.
1231  * @return clone_data_t* or NULL if not present
1232  */
1233 static clone_data_t* get_current_clone_data(void) {
1234     clone_data_t* ret = NULL;
1235
1236     if(!current->clone_data) {
1237         // Do costly lookup
1238         ret = find_clone_data(current->prev_cpu,
1239                                  current->clone_request_id);
1240         // Store it for easy access next time.
1241         current->clone_data = ret;
1242     } else {
1243         ret = (clone_data_t*)current->clone_data;
1244     }
1245
1246     return ret;
1247 }
1248
1249
1250 /**
1251  * @brief Page walk has encountered a pte while deconstructing
1252  * the client side processes address space.  Transfer it.
1253  */
1254 /*static int deconstruction_page_walk_pte_entry_callback(pte_t *pte, 
1255         unsigned long start, unsigned long end, struct mm_walk *walk) {
1256
1257     deconstruction_data_t* decon_data = (deconstruction_data_t*)walk->private;
1258     int vma_id = decon_data->vma_id;
1259     int dst_cpu = decon_data->dst_cpu;
1260     int clone_request_id = decon_data->clone_request_id;
1261     pte_transfer_t pte_xfer;
1262
1263     if(NULL == pte || !pte_present(*pte)) {
1264         return 0;
1265     }
1266
1267     pte_xfer.header.type = PCN_KMSG_TYPE_PROC_SRV_PTE_TRANSFER;
1268     pte_xfer.header.prio = PCN_KMSG_PRIO_NORMAL;
1269     pte_xfer.paddr = (pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1));
1270     // NOTE: Found the above pte to paddr conversion here -
1271     // http://wbsun.blogspot.com/2010/12/convert-userspace-virtual-address-to.html
1272     pte_xfer.vaddr = start;
1273     pte_xfer.vma_id = vma_id;
1274     pte_xfer.clone_request_id = clone_request_id;
1275     pte_xfer.pfn = pte_pfn(*pte);
1276     PSPRINTK("Sending PTE\n"); 
1277     DO_UNTIL_SUCCESS(pcn_kmsg_send(dst_cpu, (struct pcn_kmsg_message *)&pte_xfer));
1278
1279     return 0;
1280 }*/
1281
1282 /**
1283  * @brief Callback used when walking a memory map.  It looks to see
1284  * if the page is present.  If present, it resolves the given
1285  * address.
1286  * @return always returns 0
1287  */
1288 static int vm_search_page_walk_pte_entry_callback(pte_t *pte, unsigned long start, unsigned long end, struct mm_walk *walk) {
1289  
1290     unsigned long* resolved_addr = (unsigned long*)walk->private;
1291
1292     if (pte == NULL || pte_none(*pte) || !pte_present(*pte)) {
1293         return 0;
1294     }
1295
1296     // Store the resolved address in the address
1297     // pointed to by the private field of the walk
1298     // structure.  This is checked by the caller
1299     // of the walk function when the walk is complete.
1300     *resolved_addr = (pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1));
1301     return 0;
1302 }
1303
1304 /**
1305  * @brief Retrieve the physical address of the specified virtual address.
1306  * @return -1 indicates failure.  Otherwise, 0 is returned.
1307  */
1308 static int get_physical_address(struct mm_struct* mm, 
1309                                 unsigned long vaddr,
1310                                 unsigned long* paddr) {
1311     unsigned long resolved = 0;
1312     struct mm_walk walk = {
1313         .pte_entry = vm_search_page_walk_pte_entry_callback,
1314         .private = &(resolved),
1315         .mm = mm
1316     };
1317
1318     // Walk the page tables.  The walk handler modifies the
1319     // resolved variable if it finds the address.
1320     walk_page_range(vaddr & PAGE_MASK, (vaddr & PAGE_MASK) + PAGE_SIZE, &walk);
1321     if(resolved == 0) {
1322         return -1;
1323     }
1324
1325     // Set the output
1326     *paddr = resolved;
1327
1328     return 0;
1329 }
1330
1331 /**
1332  * Check to see if the specified virtual address has a 
1333  * corresponding physical address mapped to it.
1334  * @return 0 = no mapping, 1 = mapping present
1335  */
1336 static int is_vaddr_mapped(struct mm_struct* mm, unsigned long vaddr) {
1337     unsigned long resolved = 0;
1338     struct mm_walk walk = {
1339         .pte_entry = vm_search_page_walk_pte_entry_callback,
1340         .private = &(resolved),
1341         .mm = mm
1342     };
1343
1344     // Walk the page tables.  The walk handler will set the
1345     // resolved variable if it finds the mapping.  
1346     walk_page_range(vaddr & PAGE_MASK, ( vaddr & PAGE_MASK ) + PAGE_SIZE, &walk);
1347     if(resolved != 0) {
1348         return 1;
1349     }
1350     return 0;
1351 }
1352
1353 /**
1354  *  @brief Find the bounds of a physically consecutive mapped region.
1355  *  The region must be contained within the specified VMA.
1356  *
1357  *  Hypothetical page table mappings for a given VMA:
1358  *
1359  *  *********************************
1360  *  *    Vaddr      *   Paddr       *
1361  *  *********************************
1362  *  * 0x10000000    * 0x12341000    *
1363  *  *********************************
1364  *  * 0x10001000    * 0x12342000    *
1365  *  *********************************
1366  *  * 0x10002000    * 0x12343000    *
1367  *  *********************************
1368  *  * 0x10003000    * 0x43214000    *
1369  *  *********************************
1370  *  
1371  *  This function, given a vaddr of 12342xxx will return:
1372  *  *vaddr_mapping_start = 0x10000000
1373  *  *paddr_mapping_start = 0x12341000
1374  *  *paddr_mapping_sz    = 0x3000
1375  *
1376  *  Notice 0x10003000 and above is not included in the returned region, as
1377  *  its paddr is not consecutive with the previous mappings.
1378  *
1379  */
1380 int find_consecutive_physically_mapped_region(struct mm_struct* mm,
1381                                               struct vm_area_struct* vma,
1382                                               unsigned long vaddr,
1383                                               unsigned long* vaddr_mapping_start,
1384                                               unsigned long* paddr_mapping_start,
1385                                               size_t* paddr_mapping_sz)
1386 {
1387     unsigned long paddr_curr = 0l;
1388     unsigned long vaddr_curr = vaddr;
1389     unsigned long vaddr_next = vaddr;
1390     unsigned long paddr_next = 0l;
1391     unsigned long paddr_start = 0l;
1392     size_t sz = 0;
1393
1394     
1395     // Initializes paddr_curr
1396     if(get_physical_address(mm,vaddr_curr,&paddr_curr) < 0) {
1397         return -1;
1398     }
1399     paddr_start = paddr_curr;
1400     *vaddr_mapping_start = vaddr_curr;
1401     *paddr_mapping_start = paddr_curr;
1402     
1403     sz = PAGE_SIZE;
1404
1405     // seek up in memory
1406     // This stretches (sz) only while leaving
1407     // vaddr and paddr the samed
1408     while(1) {
1409         vaddr_next += PAGE_SIZE;
1410         
1411         // dont' go past the end of the vma
1412         if(vaddr_next >= vma->vm_end) {
1413             break;
1414         }
1415
1416         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
1417             break;
1418         }
1419
1420         if(paddr_next == paddr_curr + PAGE_SIZE) {
1421             sz += PAGE_SIZE;
1422             paddr_curr = paddr_next;
1423         } else {
1424             break;
1425         }
1426     }
1427
1428     // seek down in memory
1429     // This stretches sz, and the paddr and vaddr's
1430     vaddr_curr = vaddr;
1431     paddr_curr = paddr_start; 
1432     vaddr_next = vaddr_curr;
1433     while(1) {
1434         vaddr_next -= PAGE_SIZE;
1435
1436         // don't go past the start of the vma
1437         if(vaddr_next < vma->vm_start) {
1438             break;
1439         }
1440
1441         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
1442             break;
1443         }
1444
1445         if(paddr_next == (paddr_curr - PAGE_SIZE)) {
1446             vaddr_curr = vaddr_next;
1447             paddr_curr = paddr_next;
1448             sz += PAGE_SIZE;
1449         } else {
1450             break;
1451         }
1452     }
1453    
1454     *vaddr_mapping_start = vaddr_curr;
1455     *paddr_mapping_start = paddr_curr;
1456     *paddr_mapping_sz = sz;
1457
1458     PSPRINTK("%s: found consecutive area- vaddr{%lx}, paddr{%lx}, sz{%d}\n",
1459                 __func__,
1460                 *vaddr_mapping_start,
1461                 *paddr_mapping_start,
1462                 *paddr_mapping_sz);
1463
1464     return 0;
1465 }
1466
1467 /**
1468  * @brief Find the preceeding physically consecutive region.  This is a region
1469  * that starts BEFORE the specified vaddr.  The region must be contained 
1470  * within the specified VMA.
1471  */
1472 int find_prev_consecutive_physically_mapped_region(struct mm_struct* mm,
1473                                               struct vm_area_struct* vma,
1474                                               unsigned long vaddr,
1475                                               unsigned long* vaddr_mapping_start,
1476                                               unsigned long* paddr_mapping_start,
1477                                               size_t* paddr_mapping_sz) {
1478     unsigned long curr_vaddr_mapping_start;
1479     unsigned long curr_paddr_mapping_start;
1480     unsigned long curr_paddr_mapping_sz;
1481     unsigned long curr_vaddr = vaddr;
1482     int ret = -1;
1483
1484     if(curr_vaddr < vma->vm_start) return -1;
1485
1486     do {
1487         int res = find_consecutive_physically_mapped_region(mm,
1488                                                      vma,
1489                                                      curr_vaddr,
1490                                                      &curr_vaddr_mapping_start,
1491                                                      &curr_paddr_mapping_start,
1492                                                      &curr_paddr_mapping_sz);
1493         if(0 == res) {
1494
1495             // this is a match, we can store off results and exit
1496             ret = 0;
1497             *vaddr_mapping_start = curr_vaddr_mapping_start;
1498             *paddr_mapping_start = curr_paddr_mapping_start;
1499             *paddr_mapping_sz    = curr_paddr_mapping_sz;
1500             break;
1501         }
1502
1503         curr_vaddr -= PAGE_SIZE;
1504     } while (curr_vaddr >= vma->vm_start);
1505
1506     return ret;
1507
1508 }
1509 /**
1510  * @brief Find the next physically consecutive region.  This is a region
1511  * that starts AFTER the specified vaddr.  The region must be contained
1512  * within the specified VMA.
1513  */
1514 int find_next_consecutive_physically_mapped_region(struct mm_struct* mm,
1515                                               struct vm_area_struct* vma,
1516                                               unsigned long vaddr,
1517                                               unsigned long* vaddr_mapping_start,
1518                                               unsigned long* paddr_mapping_start,
1519                                               size_t* paddr_mapping_sz) {
1520     unsigned long curr_vaddr_mapping_start;
1521     unsigned long curr_paddr_mapping_start;
1522     unsigned long curr_paddr_mapping_sz;
1523     unsigned long curr_vaddr = vaddr;
1524     int ret = -1;
1525
1526     if(curr_vaddr >= vma->vm_end) return -1;
1527
1528     do {
1529         int res = find_consecutive_physically_mapped_region(mm,
1530                                                      vma,
1531                                                      curr_vaddr,
1532                                                      &curr_vaddr_mapping_start,
1533                                                      &curr_paddr_mapping_start,
1534                                                      &curr_paddr_mapping_sz);
1535         if(0 == res) {
1536
1537             // this is a match, we can store off results and exit
1538             ret = 0;
1539             *vaddr_mapping_start = curr_vaddr_mapping_start;
1540             *paddr_mapping_start = curr_paddr_mapping_start;
1541             *paddr_mapping_sz    = curr_paddr_mapping_sz;
1542             break;
1543         }
1544
1545         curr_vaddr += PAGE_SIZE;
1546     } while (curr_vaddr < vma->vm_end);
1547
1548     return ret;
1549
1550 }
1551
1552 /**
1553  *  @brief Fill the array with as many physically consecutive regions
1554  *  as are present and will fit (specified by arr_sz).
1555  */
1556 int fill_physical_mapping_array(struct mm_struct* mm,
1557         struct vm_area_struct* vma,
1558         unsigned long address,
1559         contiguous_physical_mapping_t* mappings, 
1560         int arr_sz) {
1561     int i;
1562     unsigned long next_vaddr = address & PAGE_MASK;
1563     int ret = -1;
1564     unsigned long smallest_in_first_round = next_vaddr;
1565
1566     PSPRINTK("%s: entered\n",__func__);
1567
1568     for(i = 0; i < arr_sz; i++) 
1569         mappings[i].present = 0;
1570
1571     for(i = 0; i < arr_sz && next_vaddr < vma->vm_end; i++) {
1572         int valid_mapping = find_next_consecutive_physically_mapped_region(mm,
1573                                             vma,
1574                                             next_vaddr,
1575                                             &mappings[i].vaddr,
1576                                             &mappings[i].paddr,
1577                                             &mappings[i].sz);
1578
1579
1580         if(valid_mapping == 0) {
1581             PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
1582             if(address >= mappings[i].vaddr && 
1583                     address < mappings[i].vaddr + mappings[i].sz)
1584                 ret = 0;
1585
1586             if(mappings[i].vaddr < smallest_in_first_round)
1587                 smallest_in_first_round = mappings[i].vaddr;
1588
1589             mappings[i].present = 1;
1590             next_vaddr = mappings[i].vaddr + mappings[i].sz;
1591
1592         } else {
1593             PSPRINTK("%s: up search ended in failure, resuming down search\n",
1594                     __func__);
1595             mappings[i].present = 0;
1596             mappings[i].vaddr = 0;
1597             mappings[i].paddr = 0;
1598             mappings[i].sz = 0;
1599             break;
1600         }
1601     }
1602
1603     // If we have room left, go in the opposite direction
1604     if(i <= arr_sz -1) {
1605         next_vaddr = smallest_in_first_round - PAGE_SIZE;
1606         for(;i < arr_sz && next_vaddr >= vma->vm_start; i++) {
1607             int valid_mapping = find_prev_consecutive_physically_mapped_region(mm,
1608                                             vma,
1609                                             next_vaddr,
1610                                             &mappings[i].vaddr,
1611                                             &mappings[i].paddr,
1612                                             &mappings[i].sz);
1613             if(valid_mapping == 0) {
1614                 PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
1615                 mappings[i].present = 1;
1616                 next_vaddr = mappings[i].vaddr - PAGE_SIZE;
1617             } else {
1618                 mappings[i].present = 0;
1619                 mappings[i].vaddr = 0;
1620                 mappings[i].paddr = 0;
1621                 mappings[i].sz = 0;
1622                 break;
1623             }
1624         }
1625     }
1626
1627     // Trim any entries that extend beyond the boundaries of the vma
1628     for(i = 0; i < MAX_MAPPINGS; i++) {
1629         if(mappings[i].present) {
1630             if(mappings[i].vaddr < vma->vm_start) {
1631                 unsigned long sz_diff = vma->vm_start - mappings[i].vaddr;
1632                 PSPRINTK("Trimming mapping, since it starts too low in memory\n");
1633                 if(mappings[i].sz > sz_diff) {
1634                     mappings[i].sz -= sz_diff;
1635                     mappings[i].vaddr = vma->vm_start;
1636                 } else {
1637                     mappings[i].present = 0;
1638                     mappings[i].vaddr = 0;
1639                     mappings[i].paddr = 0;
1640                     mappings[i].sz = 0;
1641                 }
1642             }
1643
1644             if(mappings[i].vaddr + mappings[i].sz >= vma->vm_end) {
1645                 unsigned long sz_diff = mappings[i].vaddr + 
1646                                         mappings[i].sz - 
1647                                         vma->vm_end;
1648                 PSPRINTK("Trimming mapping, since it ends too high in memory\n");
1649                 if(mappings[i].sz > sz_diff) {
1650                     mappings[i].sz -= sz_diff;
1651                 } else {
1652                     mappings[i].present = 0;
1653                     mappings[i].vaddr = 0;
1654                     mappings[i].paddr = 0;
1655                     mappings[i].sz = 0;
1656                 }
1657             }
1658         }
1659     }
1660
1661     // Clear out what we just did
1662     if(ret == -1) {
1663         PSPRINTK("%s: zeroing out responses, due to an error\n",__func__);
1664         for(i = 0; i < arr_sz; i++)
1665             mappings[i].present = 0;
1666     }
1667
1668     PSPRINTK("%s: exiting\n",__func__);
1669
1670     return ret;
1671 }
1672
1673 /**
1674  * @brief Call remap_pfn_range on the parts of the specified virtual-physical
1675  * region that are not already mapped.
1676  * @precondition mm->mmap_sem must already be held by caller.
1677  */
1678 int remap_pfn_range_remaining(struct mm_struct* mm,
1679                                   struct vm_area_struct* vma,
1680                                   unsigned long vaddr_start,
1681                                   unsigned long paddr_start,
1682                                   size_t sz,
1683                                   pgprot_t prot,
1684                                   int make_writable) {
1685     unsigned long vaddr_curr;
1686     unsigned long paddr_curr = paddr_start;
1687     int ret = 0, val;
1688     int err;
1689
1690     PSPRINTK("%s: entered vaddr_start{%lx}, paddr_start{%lx}, sz{%x}\n",
1691             __func__,
1692             vaddr_start,
1693             paddr_start,
1694             sz);
1695
1696     for(vaddr_curr = vaddr_start; 
1697         vaddr_curr < vaddr_start + sz; 
1698         vaddr_curr += PAGE_SIZE) {
1699         if( !(val = is_vaddr_mapped(mm,vaddr_curr)) ) {
1700             //PSPRINTK("%s: mapping vaddr{%lx} paddr{%lx}\n",__func__,vaddr_curr,paddr_curr);
1701             // not mapped - map it
1702             err = remap_pfn_range(vma,
1703                                   vaddr_curr,
1704                                   paddr_curr >> PAGE_SHIFT,
1705                                   PAGE_SIZE,
1706                                   prot);
1707             if(err == 0) {
1708                 if(make_writable && vma->vm_flags & VM_WRITE) {
1709                     mk_page_writable(mm, vma, vaddr_curr);
1710                 }
1711             } else {
1712                 printk(KERN_ALERT"%s: ERROR mapping %lx to %lx with err{%d}\n",
1713                             __func__, vaddr_curr, paddr_curr, err);
1714             }
1715
1716             if( err != 0 ) ret = err;
1717         }
1718         else
1719             PSPRINTK("%s: is_vaddr_mapped %d, star:%lx end:%lx\n",
1720                     __func__, val, vma->vm_start, vma->vm_end);
1721
1722         paddr_curr += PAGE_SIZE;
1723     }
1724
1725     PSPRINTK("%s: exiting\n",__func__);
1726
1727     return ret;
1728 }
1729
1730
1731 /**
1732  * @brief Map, but only in areas that do not currently have mappings.
1733  * This should extend vmas that ara adjacent as necessary.
1734  * NOTE: current->enable_do_mmap_pgoff_hook must be disabled
1735  *       by client code before calling this.
1736  * NOTE: mm->mmap_sem must already be held by client code.
1737  * NOTE: entries in the per-mm list of vm_area_structs are
1738  *       ordered by starting address.  This is helpful, because
1739  *       I can exit my check early sometimes.
1740  */
1741 #define FORCE_NODEBUG
1742 #ifndef FORCE_NODEBUG
1743 #define DBGPSPRINTK(...) { if (dbg ==1) printk(KERN_ALERT __VA_ARGS__); }
1744 #else
1745 #define DBGPSPRINTK(...) ;
1746 #endif
1747 unsigned long do_mmap_remaining(struct file *file, unsigned long addr,
1748                                 unsigned long len, unsigned long prot,
1749                                 unsigned long flags, unsigned long pgoff, int dbg) {
1750     unsigned long ret = addr;
1751     unsigned long start = addr;
1752     unsigned long local_end = start;
1753     unsigned long end = addr + len;
1754     struct vm_area_struct* curr;
1755     unsigned long error;
1756
1757     // go through ALL vma's, looking for interference with this space.
1758     curr = current->mm->mmap;
1759     DBGPSPRINTK("%s: processing {%lx,%lx}\n",__func__,addr,len);
1760
1761     while(1) {
1762
1763         if(start >= end) goto done;
1764
1765         // We've reached the end of the list
1766         else if(curr == NULL) {
1767             // map through the end
1768             DBGPSPRINTK("%s: curr == NULL - mapping {%lx,%lx}\n",
1769                     __func__,start,end-start);
1770             error=do_mmap(file, start, end - start, prot, flags, pgoff); 
1771             if (error != start)
1772                 printk(KERN_ALERT"%s_1: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1773             goto done;
1774         }
1775
1776         // the VMA is fully above the region of interest
1777         else if(end <= curr->vm_start) {
1778                 // mmap through local_end
1779             DBGPSPRINTK("%s: VMA is fully above the region of interest - mapping {%lx,%lx}\n",
1780                     __func__,start,end-start);
1781             error=do_mmap(file, start, end - start, prot, flags, pgoff);
1782             if (error != start)
1783                 printk(KERN_ALERT"%s_2: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1784             goto done;
1785         }
1786
1787         // the VMA fully encompases the region of interest
1788         else if(start >= curr->vm_start && end <= curr->vm_end) {
1789             // nothing to do
1790             DBGPSPRINTK("%s: VMA fully encompases the region of interest\n",__func__);
1791             goto done;
1792         }
1793
1794         // the VMA is fully below the region of interest
1795         else if(curr->vm_end <= start) {
1796             // move on to the next one
1797             DBGPSPRINTK("%s: VMA is fully below region of interest\n",__func__);
1798         }
1799
1800         // the VMA includes the start of the region of interest 
1801         // but not the end
1802         else if (start >= curr->vm_start && 
1803                  start < curr->vm_end &&
1804                  end > curr->vm_end) {
1805             // advance start (no mapping to do) 
1806             start = curr->vm_end;
1807             local_end = start;
1808             DBGPSPRINTK("%s: VMA includes start but not end\n",__func__);
1809         }
1810
1811         // the VMA includes the end of the region of interest
1812         // but not the start
1813         else if(start < curr->vm_start && 
1814                 end <= curr->vm_end &&
1815                 end > curr->vm_start) {
1816             local_end = curr->vm_start;
1817             
1818             // mmap through local_end
1819             DBGPSPRINTK("%s: VMA includes end but not start - mapping {%lx,%lx}\n",
1820                     __func__,start, local_end - start);
1821             error=do_mmap(file, start, local_end - start, prot, flags, pgoff);
1822             if (error != start)
1823                 printk(KERN_ALERT"%s_3: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1824
1825             // Then we're done
1826             goto done;
1827         }
1828
1829         // the VMA is fully within the region of interest
1830         else if(start <= curr->vm_start && end >= curr->vm_end) {
1831             // advance local end
1832             local_end = curr->vm_start;
1833
1834             // map the difference
1835             DBGPSPRINTK("%s: VMS is fully within the region of interest - mapping {%lx,%lx}\n",
1836                     __func__,start, local_end - start);
1837             error=do_mmap(file, start, local_end - start, prot, flags, pgoff);
1838             if (error != start)
1839                 printk(KERN_ALERT"%s_4: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1840
1841             // Then advance to the end of this vma
1842             start = curr->vm_end;
1843             local_end = start;
1844         }
1845
1846         curr = curr->vm_next;
1847
1848     }
1849
1850 done:
1851     
1852     DBGPSPRINTK("%s: exiting start:%lx\n",__func__, error);
1853     return ret;
1854 }
1855
1856 static void send_pte(unsigned long paddr_start,
1857         unsigned long vaddr_start, 
1858         size_t sz, 
1859         int dst,
1860         int vma_id,
1861         int clone_request_id) {
1862
1863     pte_transfer_t pte_xfer;
1864     pte_xfer.header.type = PCN_KMSG_TYPE_PROC_SRV_PTE_TRANSFER;
1865     pte_xfer.header.prio = PCN_KMSG_PRIO_NORMAL;
1866     pte_xfer.paddr_start = paddr_start;
1867     pte_xfer.vaddr_start = vaddr_start;
1868     pte_xfer.sz = sz;
1869     pte_xfer.clone_request_id = clone_request_id;
1870     pte_xfer.vma_id = vma_id;
1871     pcn_kmsg_send(dst, (struct pcn_kmsg_message *)&pte_xfer);
1872 }
1873
1874 static void send_vma(struct mm_struct* mm,
1875         struct vm_area_struct* vma, 
1876         int dst,
1877         int clone_request_id) {
1878     char lpath[256];
1879     char *plpath;
1880     vma_transfer_t* vma_xfer = kmalloc(sizeof(vma_transfer_t),GFP_KERNEL);
1881     vma_xfer->header.type = PCN_KMSG_TYPE_PROC_SRV_VMA_TRANSFER;  
1882     vma_xfer->header.prio = PCN_KMSG_PRIO_NORMAL;
1883     
1884     if(vma->vm_file == NULL) {
1885         vma_xfer->path[0] = '\0';
1886     } else {
1887         plpath = d_path(&vma->vm_file->f_path,
1888                 lpath,256);
1889         strcpy(vma_xfer->path,plpath);
1890     }
1891
1892     //
1893     // Transfer the vma
1894     //
1895     PS_SPIN_LOCK(&_vma_id_lock);
1896     vma_xfer->vma_id = _vma_id++;
1897     PS_SPIN_UNLOCK(&_vma_id_lock);
1898     vma_xfer->start = vma->vm_start;
1899     vma_xfer->end = vma->vm_end;
1900     vma_xfer->prot = vma->vm_page_prot;
1901     vma_xfer->clone_request_id = clone_request_id;
1902     vma_xfer->flags = vma->vm_flags;
1903     vma_xfer->pgoff = vma->vm_pgoff;
1904     pcn_kmsg_send_long(dst, 
1905                         (struct pcn_kmsg_long_message*)vma_xfer, 
1906                         sizeof(vma_transfer_t) - sizeof(vma_xfer->header));
1907
1908     // Send all physical information too
1909     {
1910     unsigned long curr = vma->vm_start;
1911     unsigned long vaddr_resolved = -1;
1912     unsigned long paddr_resolved = -1;
1913     size_t sz_resolved = 0;
1914     
1915     while(curr < vma->vm_end) {
1916         if(-1 == find_next_consecutive_physically_mapped_region(mm,
1917                     vma,
1918                     curr,
1919                     &vaddr_resolved,
1920                     &paddr_resolved,
1921                     &sz_resolved)) {
1922             // None more, exit
1923             break;
1924         } else {
1925             // send the pte
1926             send_pte(paddr_resolved,
1927                      vaddr_resolved,
1928                      sz_resolved,
1929                      dst,
1930                      vma_xfer->vma_id,
1931                      vma_xfer->clone_request_id
1932                      );
1933
1934             // move to the next
1935             curr = vaddr_resolved + sz_resolved;
1936         }
1937     }
1938
1939     }
1940
1941
1942     kfree(vma_xfer);
1943 }
1944
1945 /**
1946  * @brief Display a mapping request data entry.
1947  */
1948 static void dump_mapping_request_data(mapping_request_data_t* data) {
1949     int i;
1950     PSPRINTK("mapping request data dump:\n");
1951     PSPRINTK("address{%lx}, vaddr_start{%lx}, vaddr_sz{%lx}\n",
1952                     data->address, data->vaddr_start, data->vaddr_size);
1953     for(i = 0; i < MAX_MAPPINGS; i++) {
1954         PSPRINTK("mapping %d - vaddr{%lx}, paddr{%lx}, sz{%lx}\n",
1955                 i,data->mappings[i].vaddr,data->mappings[i].paddr,data->mappings[i].sz);
1956     }
1957     PSPRINTK("present{%d}, complete{%d}, from_saved_mm{%d}\n",
1958             data->present, data->complete, data->from_saved_mm);
1959     PSPRINTK("responses{%d}, expected_responses{%d}\n",
1960             data->responses, data->expected_responses);
1961 }
1962
1963 /**
1964  * @brief Display relevant task information.
1965  */
1966 void dump_task(struct task_struct* task, struct pt_regs* regs, unsigned long stack_ptr) {
1967 #if PROCESS_SERVER_VERBOSE
1968     if (!task) return;
1969
1970     PSPRINTK("DUMP TASK\n");
1971     PSPRINTK("PID: %d\n",task->pid);
1972     PSPRINTK("State: %lx\n",task->state);
1973     PSPRINTK("Flags: %x\n",task->flags);
1974     PSPRINTK("Prio{%d},Static_Prio{%d},Normal_Prio{%d}\n",
1975             task->prio,task->static_prio,task->normal_prio);
1976     PSPRINTK("Represents_remote{%d}\n",task->represents_remote);
1977     PSPRINTK("Executing_for_remote{%d}\n",task->executing_for_remote);
1978     PSPRINTK("prev_pid{%d}\n",task->prev_pid);
1979     PSPRINTK("next_pid{%d}\n",task->next_pid);
1980     PSPRINTK("prev_cpu{%d}\n",task->prev_cpu);
1981     PSPRINTK("next_cpu{%d}\n",task->next_cpu);
1982     PSPRINTK("Clone_request_id{%d}\n",task->clone_request_id);
1983     dump_regs(regs);
1984     dump_thread(&task->thread);
1985     //dump_mm(task->mm);
1986     dump_stk(&task->thread,stack_ptr);
1987     PSPRINTK("TASK DUMP COMPLETE\n");
1988 #endif
1989 }
1990
1991 /**
1992  * @brief Display a task's stack information.
1993  */
1994 static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr) {
1995     if(!thread) return;
1996     PSPRINTK("DUMP STACK\n");
1997     if(thread->sp) {
1998         PSPRINTK("sp = %lx\n",thread->sp);
1999     }
2000     if(thread->usersp) {
2001         PSPRINTK("usersp = %lx\n",thread->usersp);
2002     }
2003     if(stack_ptr) {
2004         PSPRINTK("stack_ptr = %lx\n",stack_ptr);
2005     }
2006     PSPRINTK("STACK DUMP COMPLETE\n");
2007 }
2008
2009 /**
2010  * @brief Display a tasks register contents.
2011  */
2012 static void dump_regs(struct pt_regs* regs) {
2013     unsigned long fs, gs;
2014     PSPRINTK("DUMP REGS\n");
2015     if(NULL != regs) {
2016         PSPRINTK("r15{%lx}\n",regs->r15);   
2017         PSPRINTK("r14{%lx}\n",regs->r14);
2018         PSPRINTK("r13{%lx}\n",regs->r13);
2019         PSPRINTK("r12{%lx}\n",regs->r12);
2020         PSPRINTK("r11{%lx}\n",regs->r11);
2021         PSPRINTK("r10{%lx}\n",regs->r10);
2022         PSPRINTK("r9{%lx}\n",regs->r9);
2023         PSPRINTK("r8{%lx}\n",regs->r8);
2024         PSPRINTK("bp{%lx}\n",regs->bp);
2025         PSPRINTK("bx{%lx}\n",regs->bx);
2026         PSPRINTK("ax{%lx}\n",regs->ax);
2027         PSPRINTK("cx{%lx}\n",regs->cx);
2028         PSPRINTK("dx{%lx}\n",regs->dx);
2029         PSPRINTK("di{%lx}\n",regs->di);
2030         PSPRINTK("orig_ax{%lx}\n",regs->orig_ax);
2031         PSPRINTK("ip{%lx}\n",regs->ip);
2032         PSPRINTK("cs{%lx}\n",regs->cs);
2033         PSPRINTK("flags{%lx}\n",regs->flags);
2034         PSPRINTK("sp{%lx}\n",regs->sp);
2035         PSPRINTK("ss{%lx}\n",regs->ss);
2036     }
2037     rdmsrl(MSR_FS_BASE, fs);
2038     rdmsrl(MSR_GS_BASE, gs);
2039     PSPRINTK("fs{%lx}\n",fs);
2040     PSPRINTK("gs{%lx}\n",gs);
2041     PSPRINTK("REGS DUMP COMPLETE\n");
2042 }
2043
2044 /**
2045  * @brief Display a tasks thread information.
2046  */
2047 static void dump_thread(struct thread_struct* thread) {
2048     PSPRINTK("DUMP THREAD\n");
2049     PSPRINTK("sp0{%lx}, sp{%lx}\n",thread->sp0,thread->sp);
2050     PSPRINTK("usersp{%lx}\n",thread->usersp);
2051     PSPRINTK("es{%x}\n",thread->es);
2052     PSPRINTK("ds{%x}\n",thread->ds);
2053     PSPRINTK("fsindex{%x}\n",thread->fsindex);
2054     PSPRINTK("gsindex{%x}\n",thread->gsindex);
2055     PSPRINTK("gs{%lx}\n",thread->gs);
2056     PSPRINTK("THREAD DUMP COMPLETE\n");
2057 }
2058
2059 /**
2060  * @brief Display a pte_data_t data structure.
2061  */
2062 static void dump_pte_data(pte_data_t* p) {
2063     PSPRINTK("PTE_DATA\n");
2064     PSPRINTK("vma_id{%x}\n",p->vma_id);
2065     PSPRINTK("clone_request_id{%x}\n",p->clone_request_id);
2066     PSPRINTK("cpu{%x}\n",p->cpu);
2067     PSPRINTK("vaddr_start{%lx}\n",p->vaddr_start);
2068     PSPRINTK("paddr_start{%lx}\n",p->paddr_start);
2069     PSPRINTK("sz{%d}\n",p->sz);
2070 }
2071
2072 /**
2073  * @brief Display a vma_data_t data structure.
2074  */
2075 static void dump_vma_data(vma_data_t* v) {
2076     pte_data_t* p;
2077     PSPRINTK("VMA_DATA\n");
2078     PSPRINTK("start{%lx}\n",v->start);
2079     PSPRINTK("end{%lx}\n",v->end);
2080     PSPRINTK("clone_request_id{%x}\n",v->clone_request_id);
2081     PSPRINTK("cpu{%x}\n",v->cpu);
2082     PSPRINTK("flags{%lx}\n",v->flags);
2083     PSPRINTK("vma_id{%x}\n",v->vma_id);
2084     PSPRINTK("path{%s}\n",v->path);
2085
2086     p = v->pte_list;
2087     while(p) {
2088         dump_pte_data(p);
2089         p = (pte_data_t*)p->header.next;
2090     }
2091 }
2092
2093 /**
2094  * @brief Display a clone_data_t.
2095  */
2096 static void dump_clone_data(clone_data_t* r) {
2097     vma_data_t* v;
2098     PSPRINTK("CLONE REQUEST\n");
2099     PSPRINTK("clone_request_id{%x}\n",r->clone_request_id);
2100     PSPRINTK("clone_flags{%lx}\n",r->clone_flags);
2101     PSPRINTK("stack_start{%lx}\n",r->stack_start);
2102     PSPRINTK("stack_ptr{%lx}\n",r->stack_ptr);
2103     PSPRINTK("env_start{%lx}\n",r->env_start);
2104     PSPRINTK("env_end{%lx}\n",r->env_end);
2105     PSPRINTK("arg_start{%lx}\n",r->arg_start);
2106     PSPRINTK("arg_end{%lx}\n",r->arg_end);
2107     PSPRINTK("heap_start{%lx}\n",r->heap_start);
2108     PSPRINTK("heap_end{%lx}\n",r->heap_end);
2109     PSPRINTK("data_start{%lx}\n",r->data_start);
2110     PSPRINTK("data_end{%lx}\n",r->data_end);
2111     dump_regs(&r->regs);
2112     PSPRINTK("placeholder_pid{%x}\n",r->placeholder_pid);
2113     PSPRINTK("placeholder_tgid{%x}\n",r->placeholder_tgid);
2114     PSPRINTK("thread_fs{%lx}\n",r->thread_fs);
2115     PSPRINTK("thread_gs{%lx}\n",r->thread_gs);
2116     PSPRINTK("thread_sp0{%lx}\n",r->thread_sp0);
2117     PSPRINTK("thread_sp{%lx}\n",r->thread_sp);
2118     PSPRINTK("thread_usersp{%lx}\n",r->thread_usersp);
2119
2120     v = r->vma_list;
2121     while(v) {
2122         dump_vma_data(v);
2123         v = (vma_data_t*)v->header.next;
2124     }
2125 }
2126
2127 /**
2128  * @brief Find a thread count data entry.
2129  * @return Either a thread count request data entry, or NULL if one does 
2130  * not exist that satisfies the parameter requirements.
2131  */
2132 static remote_thread_count_request_data_t* find_remote_thread_count_data(int cpu, 
2133         int id, int requester_pid) {
2134
2135     data_header_t* curr = NULL;
2136     remote_thread_count_request_data_t* request = NULL;
2137     remote_thread_count_request_data_t* ret = NULL;
2138     unsigned long lockflags;
2139
2140     spin_lock_irqsave(&_count_remote_tmembers_data_head_lock,lockflags);
2141
2142     curr = _count_remote_tmembers_data_head;
2143     while(curr) {
2144         request = (remote_thread_count_request_data_t*)curr;
2145         if(request->tgroup_home_cpu == cpu &&
2146            request->tgroup_home_id == id &&
2147            request->requester_pid == requester_pid) {
2148             ret = request;
2149             break;
2150         }
2151         curr = curr->next;
2152     }
2153
2154     spin_unlock_irqrestore(&_count_remote_tmembers_data_head_lock,lockflags);
2155
2156     return ret;
2157 }
2158
2159 /**
2160  * @brief Finds a munmap request data entry.
2161  * @return Either a munmap request data entry, or NULL if one is not
2162  * found that satisfies the parameter requirements.
2163  */
2164 static munmap_request_data_t* find_munmap_request_data(int cpu, int id, 
2165         int requester_pid, unsigned long address) {
2166
2167     data_header_t* curr = NULL;
2168     munmap_request_data_t* request = NULL;
2169     munmap_request_data_t* ret = NULL;
2170     PS_SPIN_LOCK(&_munmap_data_head_lock);
2171     
2172     curr = _munmap_data_head;
2173     while(curr) {
2174         request = (munmap_request_data_t*)curr;
2175         if(request->tgroup_home_cpu == cpu && 
2176                 request->tgroup_home_id == id &&
2177                 request->requester_pid == requester_pid &&
2178                 request->vaddr_start == address) {
2179             ret = request;
2180             break;
2181         }
2182         curr = curr->next;
2183     }
2184
2185     PS_SPIN_UNLOCK(&_munmap_data_head_lock);
2186
2187     return ret;
2188
2189 }
2190
2191 /**
2192  * @brief Finds an mprotect request data entry.
2193  * @return Either a mprotect request data entry, or NULL if one is
2194  * not found that satisfies the parameter requirements.
2195  */
2196 static mprotect_data_t* find_mprotect_request_data(int cpu, int id, 
2197         int requester_pid, unsigned long start) {
2198
2199     data_header_t* curr = NULL;
2200     mprotect_data_t* request = NULL;
2201     mprotect_data_t* ret = NULL;
2202     PS_SPIN_LOCK(&_mprotect_data_head_lock);
2203     
2204     curr = _mprotect_data_head;
2205     while(curr) {
2206         request = (mprotect_data_t*)curr;
2207         if(request->tgroup_home_cpu == cpu && 
2208                 request->tgroup_home_id == id &&
2209                 request->requester_pid == requester_pid &&
2210                 request->start == start) {
2211             ret = request;
2212             break;
2213         }
2214         curr = curr->next;
2215     }
2216
2217     PS_SPIN_UNLOCK(&_mprotect_data_head_lock);
2218
2219     return ret;
2220
2221 }
2222
2223 /**
2224  * @brief Finds a mapping request data entry.
2225  * @return Either a mapping request data entry, or NULL if an entry
2226  * is not found that satisfies the parameter requirements.
2227  */
2228 static mapping_request_data_t* find_mapping_request_data(int cpu, int id, 
2229         int pid, unsigned long address) {
2230
2231     data_header_t* curr = NULL;
2232     mapping_request_data_t* request = NULL;
2233     mapping_request_data_t* ret = NULL;
2234     
2235     curr = _mapping_request_data_head;
2236     while(curr) {
2237         request = (mapping_request_data_t*)curr;
2238         if(request->tgroup_home_cpu == cpu && 
2239                 request->tgroup_home_id == id &&
2240                 request->requester_pid == pid &&
2241                 request->address == address) {
2242             ret = request;
2243             break;
2244         }
2245         curr = curr->next;
2246     }
2247
2248
2249     return ret;
2250 }
2251
2252 /**
2253  * @brief Finds a clone data entry.
2254  * @return Either a clone entry or NULL if one is not found
2255  * that satisfies the parameter requirements.
2256  */
2257 static clone_data_t* find_clone_data(int cpu, int clone_request_id) {
2258     data_header_t* curr = NULL;
2259     clone_data_t* clone = NULL;
2260     clone_data_t* ret = NULL;
2261     PS_SPIN_LOCK(&_data_head_lock);
2262     
2263     curr = _data_head;
2264     while(curr) {
2265         if(curr->data_type == PROCESS_SERVER_CLONE_DATA_TYPE) {
2266             clone = (clone_data_t*)curr;
2267             if(clone->placeholder_cpu == cpu && clone->clone_request_id == clone_request_id) {
2268                 ret = clone;
2269                 break;
2270             }
2271         }
2272         curr = curr->next;
2273     }
2274
2275     PS_SPIN_UNLOCK(&_data_head_lock);
2276
2277     return ret;
2278 }
2279
2280 /**
2281  * @brief Destroys the specified clone data.  It also destroys lists
2282  * that are nested within it.
2283  */
2284 static void destroy_clone_data(clone_data_t* data) {
2285     vma_data_t* vma_data;
2286     pte_data_t* pte_data;
2287     vma_data = data->vma_list;
2288     while(vma_data) {
2289         
2290         // Destroy this VMA's PTE's
2291         pte_data = vma_data->pte_list;
2292         while(pte_data) {
2293
2294             // Remove pte from list
2295             vma_data->pte_list = (pte_data_t*)pte_data->header.next;
2296             if(vma_data->pte_list) {
2297                 vma_data->pte_list->header.prev = NULL;
2298             }
2299
2300             // Destroy pte
2301             kfree(pte_data);
2302
2303             // Next is the new list head
2304             pte_data = vma_data->pte_list;
2305         }
2306         
2307         // Remove vma from list
2308         data->vma_list = (vma_data_t*)vma_data->header.next;
2309         if(data->vma_list) {
2310             data->vma_list->header.prev = NULL;
2311         }
2312
2313         // Destroy vma
2314         kfree(vma_data);
2315
2316         // Next is the new list head
2317         vma_data = data->vma_list;
2318     }
2319
2320     // Destroy clone data
2321     kfree(data);
2322 }
2323
2324 #if 0
2325 /**
2326  * @brief Finds a vma_data_t entry.
2327  */
2328 static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_start) {
2329
2330     vma_data_t* curr = clone_data->vma_list;
2331     vma_data_t* ret = NULL;
2332
2333     while(curr) {
2334         
2335         if(curr->start == addr_start) {
2336             ret = curr;
2337             break;
2338         }
2339
2340         curr = (vma_data_t*)curr->header.next;
2341     }
2342
2343     return ret;
2344 }
2345 #endif
2346
2347 /**
2348  * @brief Callback for page walk that displays the contents of the walk.
2349  */
2350 static int dump_page_walk_pte_entry_callback(pte_t *pte, unsigned long start, 
2351         unsigned long end, struct mm_walk *walk) {
2352
2353     int nx;
2354     int rw;
2355     int user;
2356     int pwt;
2357     int pcd;
2358     int accessed;
2359     int dirty;
2360
2361     if(NULL == pte || !pte_present(*pte)) {                                                                                                                             
2362         return 0;
2363     }
2364
2365     nx       = pte_flags(*pte) & _PAGE_NX       ? 1 : 0;
2366     rw       = pte_flags(*pte) & _PAGE_RW       ? 1 : 0;
2367     user     = pte_flags(*pte) & _PAGE_USER     ? 1 : 0;
2368     pwt      = pte_flags(*pte) & _PAGE_PWT      ? 1 : 0;
2369     pcd      = pte_flags(*pte) & _PAGE_PCD      ? 1 : 0;
2370     accessed = pte_flags(*pte) & _PAGE_ACCESSED ? 1 : 0;
2371     dirty    = pte_flags(*pte) & _PAGE_DIRTY    ? 1 : 0;
2372
2373     PSPRINTK("pte_entry start{%lx}, end{%lx}, phy{%lx}\n",
2374             start,
2375             end,
2376             (unsigned long)(pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1)));
2377
2378     PSPRINTK("\tnx{%d}, ",nx);
2379     PSPRINTK("rw{%d}, ",rw);
2380     PSPRINTK("user{%d}, ",user);
2381     PSPRINTK("pwt{%d}, ",pwt);
2382     PSPRINTK("pcd{%d}, ",pcd);
2383     PSPRINTK("accessed{%d}, ",accessed);
2384     PSPRINTK("dirty{%d}\n",dirty);
2385
2386     return 0;
2387 }
2388
2389 /**
2390  * @brief Displays relevant data within a mm.
2391  */
2392 static void dump_mm(struct mm_struct* mm)
2393 {
2394     struct vm_area_struct * curr;
2395     struct mm_walk walk = {
2396         .pte_entry = dump_page_walk_pte_entry_callback,
2397         .mm = mm,
2398         .private = NULL
2399         };
2400
2401     if(NULL == mm) {
2402         PSPRINTK("MM IS NULL!\n");
2403         return;
2404     }
2405
2406     PS_DOWN_READ(&mm->mmap_sem);
2407
2408     curr = mm->mmap;
2409
2410     PSPRINTK("MM DUMP\n");
2411     PSPRINTK("Stack Growth{%lx}\n",mm->stack_vm);
2412     PSPRINTK("Code{%lx - %lx}\n",mm->start_code,mm->end_code);
2413     PSPRINTK("Brk{%lx - %lx}\n",mm->start_brk,mm->brk);
2414     PSPRINTK("Stack{%lx}\n",mm->start_stack);
2415     PSPRINTK("Arg{%lx - %lx}\n",mm->arg_start,mm->arg_end);
2416     PSPRINTK("Env{%lx - %lx}\n",mm->env_start,mm->env_end);
2417
2418     while(curr) {
2419         if(!curr->vm_file) {
2420             PSPRINTK("Anonymous VM Entry: start{%lx}, end{%lx}, pgoff{%lx}, flags{%lx}\n",
2421                     curr->vm_start, 
2422                     curr->vm_end,
2423                     curr->vm_pgoff,
2424                     curr->vm_flags);
2425             // walk    
2426             walk_page_range(curr->vm_start,curr->vm_end,&walk);
2427         } else {
2428             PSPRINTK("Page VM Entry: start{%lx}, end{%lx}, pgoff{%lx}, path{%s}, flags{%lx}\n",
2429                     curr->vm_start,
2430                     curr->vm_end,
2431                     curr->vm_pgoff,
2432                     d_path(&curr->vm_file->f_path,buf, 256),
2433                     curr->vm_flags);
2434             walk_page_range(curr->vm_start,curr->vm_end,&walk);
2435         }
2436         curr = curr->vm_next;
2437     }
2438
2439     PS_UP_READ(&mm->mmap_sem);
2440 }
2441
2442 /**
2443  * Data library
2444  */
2445
2446 /**
2447  * @brief Add data entry.
2448  */
2449 static void add_data_entry_to(void* entry, spinlock_t* lock, data_header_t** head) {
2450     data_header_t* hdr = (data_header_t*)entry;
2451     data_header_t* curr = NULL;
2452
2453     if(!entry) {
2454         return;
2455     }
2456
2457     // Always clear out the link information
2458     hdr->next = NULL;
2459     hdr->prev = NULL;
2460
2461     PS_SPIN_LOCK(lock);
2462     
2463     if (!*head) {
2464         *head = hdr;
2465         hdr->next = NULL;
2466         hdr->prev = NULL;
2467     } else {
2468         curr = *head;
2469         while(curr->next != NULL) {
2470             if(curr == entry) {
2471                 return;// It's already in the list!
2472             }
2473             curr = curr->next;
2474         }
2475         // Now curr should be the last entry.
2476         // Append the new entry to curr.
2477         curr->next = hdr;
2478         hdr->next = NULL;
2479         hdr->prev = curr;
2480     }
2481
2482     PS_SPIN_UNLOCK(lock);
2483 }
2484
2485 /**
2486  * @brief Remove a data entry
2487  * @prerequisite Requires user to hold lock
2488  */
2489 static void remove_data_entry_from(void* entry, data_header_t** head) {
2490     data_header_t* hdr = entry;
2491
2492     if(!entry) {
2493         return;
2494     }
2495
2496     if(*head == hdr) {
2497         *head = hdr->next;
2498     }
2499
2500     if(hdr->next) {
2501         hdr->next->prev = hdr->prev;
2502     }
2503
2504     if(hdr->prev) {
2505         hdr->prev->next = hdr->next;
2506     }
2507
2508     hdr->prev = NULL;
2509     hdr->next = NULL;
2510
2511 }
2512
2513 /**
2514  * @brief Add data entry
2515  */
2516 static void add_data_entry(void* entry) {
2517     data_header_t* hdr = (data_header_t*)entry;
2518     data_header_t* curr = NULL;
2519     unsigned long lockflags;
2520
2521     if(!entry) {
2522         return;
2523     }
2524
2525     // Always clear out the link information
2526     hdr->next = NULL;
2527     hdr->prev = NULL;
2528
2529     spin_lock_irqsave(&_data_head_lock,lockflags);
2530     
2531     if (!_data_head) {
2532         _data_head = hdr;
2533         hdr->next = NULL;
2534         hdr->prev = NULL;
2535     } else {
2536         curr = _data_head;
2537         while(curr->next != NULL) {
2538             if(curr == entry) {
2539                 return;// It's already in the list!
2540             }
2541             curr = curr->next;
2542         }
2543         // Now curr should be the last entry.
2544         // Append the new entry to curr.
2545         curr->next = hdr;
2546         hdr->next = NULL;
2547         hdr->prev = curr;
2548     }
2549
2550     spin_unlock_irqrestore(&_data_head_lock,lockflags);
2551 }
2552
2553 /**
2554  * @brief Remove a data entry.
2555  * @prerequisite Requires user to hold _data_head_lock.
2556  */
2557 static void remove_data_entry(void* entry) {
2558     data_header_t* hdr = entry;
2559
2560     if(!entry) {
2561         return;
2562     }
2563
2564     if(_data_head == hdr) {
2565         _data_head = hdr->next;
2566     }
2567
2568     if(hdr->next) {
2569         hdr->next->prev = hdr->prev;
2570     }
2571
2572     if(hdr->prev) {
2573         hdr->prev->next = hdr->next;
2574     }
2575
2576     hdr->prev = NULL;
2577     hdr->next = NULL;
2578
2579 }
2580
2581 /**
2582  * @brief Print information about the list.
2583  */
2584 static void dump_data_list(void) {
2585     data_header_t* curr = NULL;
2586     pte_data_t* pte_data = NULL;
2587     vma_data_t* vma_data = NULL;
2588     clone_data_t* clone_data = NULL;
2589
2590     PS_SPIN_LOCK(&_data_head_lock);
2591
2592     curr = _data_head;
2593
2594     PSPRINTK("DATA LIST:\n");
2595     while(curr) {
2596         switch(curr->data_type) {
2597         case PROCESS_SERVER_VMA_DATA_TYPE:
2598             vma_data = (vma_data_t*)curr;
2599             PSPRINTK("VMA DATA: start{%lx}, end{%lx}, crid{%d}, vmaid{%d}, cpu{%d}, pgoff{%lx}\n",
2600                     vma_data->start,
2601                     vma_data->end,
2602                     vma_data->clone_request_id,
2603                     vma_data->vma_id, 
2604                     vma_data->cpu, 
2605                     vma_data->pgoff);
2606             break;
2607         case PROCESS_SERVER_PTE_DATA_TYPE:
2608             pte_data = (pte_data_t*)curr;
2609             PSPRINTK("PTE DATA: vaddr_start{%lx}, paddr_start{%lx}, sz{%d}, vmaid{%d}, cpu{%d}\n",
2610                     pte_data->vaddr_start,
2611                     pte_data->paddr_start,
2612                     pte_data->sz,
2613                     pte_data->vma_id,
2614                     pte_data->cpu);
2615             break;
2616         case PROCESS_SERVER_CLONE_DATA_TYPE:
2617             clone_data = (clone_data_t*)curr;
2618             PSPRINTK("CLONE DATA: flags{%lx}, stack_start{%lx}, heap_start{%lx}, heap_end{%lx}, ip{%lx}, crid{%d}\n",
2619                     clone_data->clone_flags,
2620                     clone_data->stack_start,
2621                     clone_data->heap_start,
2622                     clone_data->heap_end,
2623                     clone_data->regs.ip,
2624                     clone_data->clone_request_id);
2625             break;
2626         default:
2627             break;
2628         }
2629         curr = curr->next;
2630     }
2631
2632     PS_SPIN_UNLOCK(&_data_head_lock);
2633 }
2634
2635 /**
2636  * @brief Counts remote thread group members.
2637  * @return The number of remote thread group members in the
2638  * specified distributed thread group.
2639  * <MEASURE perf_count_remote_thread_members>
2640  */
2641 static int count_remote_thread_members(int exclude_t_home_cpu,
2642                                        int exclude_t_home_id) {
2643
2644     int tgroup_home_cpu = current->tgroup_home_cpu;
2645     int tgroup_home_id  = current->tgroup_home_id;
2646     remote_thread_count_request_data_t* data;
2647     remote_thread_count_request_t request;
2648     int i;
2649     int s;
2650     int ret = -1;
2651     int perf = -1;
2652     unsigned long lockflags;
2653
2654     perf = PERF_MEASURE_START(&perf_count_remote_thread_members);
2655
2656     PSPRINTK("%s: entered\n",__func__);
2657
2658     data = kmalloc(sizeof(remote_thread_count_request_data_t),GFP_KERNEL);
2659     if(!data) goto exit;
2660
2661     data->header.data_type = PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE;
2662     data->responses = 0;
2663     data->expected_responses = 0;
2664     data->tgroup_home_cpu = tgroup_home_cpu;
2665     data->tgroup_home_id = tgroup_home_id;
2666     data->requester_pid = current->pid;
2667     data->count = 0;
2668     spin_lock_init(&data->lock);
2669
2670     add_data_entry_to(data,
2671                       &_count_remote_tmembers_data_head_lock,
2672                       &_count_remote_tmembers_data_head);
2673
2674     request.header.type = PCN_KMSG_TYPE_PROC_SRV_THREAD_COUNT_REQUEST;
2675     request.header.prio = PCN_KMSG_PRIO_NORMAL;
2676     request.tgroup_home_cpu = current->tgroup_home_cpu; //TODO why not tgroup_home_cpu?!?!
2677     request.tgroup_home_id  = current->tgroup_home_id; //TODO why not tgroup_home_id?!?!
2678     request.requester_pid = data->requester_pid;
2679
2680 #ifndef SUPPORT_FOR_CLUSTERING
2681     for(i = 0; i < NR_CPUS; i++) {
2682         // Skip the current cpu
2683         if(i == _cpu) continue;
2684 #else
2685     // the list does not include the current processor group descirptor (TODO)
2686     struct list_head *iter;
2687     _remote_cpu_info_list_t *objPtr;
2688     list_for_each(iter, &rlist_head) {
2689         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
2690         i = objPtr->_data._processor;
2691 #endif
2692         // Send the request to this cpu.
2693         s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&request));
2694         if(!s) {
2695             // A successful send operation, increase the number
2696             // of expected responses.
2697             data->expected_responses++;
2698         }
2699     }
2700
2701     PSPRINTK("%s: waiting on %d responses\n",__func__,data->expected_responses);
2702
2703     // Wait for all cpus to respond.
2704     while(data->expected_responses != data->responses) {
2705         schedule();
2706     }
2707
2708     // OK, all responses are in, we can proceed.
2709     ret = data->count;
2710
2711     PSPRINTK("%s: found a total of %d remote threads in group\n",__func__,
2712             data->count);
2713
2714     spin_lock_irqsave(&_count_remote_tmembers_data_head_lock,lockflags);
2715     remove_data_entry_from(data,
2716                            &_count_remote_tmembers_data_head);
2717     spin_unlock_irqrestore(&_count_remote_tmembers_data_head_lock,lockflags);
2718
2719     kfree(data);
2720
2721 exit:
2722     PERF_MEASURE_STOP(&perf_count_remote_thread_members," ",perf);
2723     return ret;
2724 }
2725
2726 /**
2727  * @brief Counts the number of local thread group members for the specified
2728  * distributed thread group.
2729  */
2730 static int count_local_thread_members(int tgroup_home_cpu, 
2731         int tgroup_home_id, int exclude_pid) {
2732
2733     struct task_struct *task, *g;
2734     int count = 0;
2735     PSPRINTK("%s: entered\n",__func__);
2736     do_each_thread(g,task) {
2737         if(task->tgroup_home_id == tgroup_home_id &&
2738            task->tgroup_home_cpu == tgroup_home_cpu &&
2739            task->t_home_cpu == _cpu &&
2740            task->pid != exclude_pid &&
2741            task->exit_state != EXIT_ZOMBIE &&
2742            task->exit_state != EXIT_DEAD &&
2743            !(task->flags & PF_EXITING)) {
2744
2745                 count++;
2746             
2747         }
2748     } while_each_thread(g,task);
2749     PSPRINTK("%s: exited\n",__func__);
2750
2751     return count;
2752
2753 }
2754
2755 /**
2756  * @brief Counts the number of local and remote thread group members for the
2757  * thread group in which the "current" task resides.
2758  * @return The number of threads.
2759  */
2760 static int count_thread_members (void)
2761 {
2762      
2763     int count = 0;
2764     PSPRINTK("%s: entered\n",__func__);
2765     count += count_local_thread_members(current->tgroup_home_cpu, current->tgroup_home_id,current->pid);
2766     count += count_remote_thread_members(current->tgroup_home_cpu, current->tgroup_home_id);
2767     PSPRINTK("%s: exited\n",__func__);
2768     return count;
2769 }
2770
2771
2772 /*
2773  * @brief Process notification of a thread group closing.
2774  * This function will wait for any locally executing thread group
2775  * members to exit.  It will then clean up all local resources
2776  * dedicated to the thread group that has exited.
2777  *
2778  * <MEASURE perf_process_tgroup_closed_item>
2779  */
2780
2781 void process_tgroup_closed_item(struct work_struct* work) {
2782
2783     tgroup_closed_work_t* w = (tgroup_closed_work_t*) work;
2784     data_header_t *curr;
2785     mm_data_t* mm_data;
2786     struct task_struct *g, *task;
2787     unsigned char tgroup_closed = 0;
2788     int perf = -1;
2789     mm_data_t* to_remove = NULL;
2790
2791     perf = PERF_MEASURE_START(&perf_process_tgroup_closed_item);
2792
2793     PSPRINTK("%s: entered\n",__func__);
2794     PSPRINTK("%s: received group exit notification\n",__func__);
2795
2796     PSPRINTK("%s: waiting for all members of this distributed thread group to finish\n",__func__);
2797     while(!tgroup_closed) {
2798         unsigned char pass = 0;
2799         do_each_thread(g,task) {
2800             if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
2801                task->tgroup_home_id  == w->tgroup_home_id) {
2802                 
2803                 // there are still living tasks within this distributed thread group
2804                 // wait a bit
2805                 schedule();
2806                 pass = 1;
2807             }
2808
2809         } while_each_thread(g,task);
2810         if(!pass) {
2811             tgroup_closed = 1;
2812         } else {
2813             PSPRINTK("%s: waiting for tgroup close out\n",__func__);
2814         }
2815     }
2816
2817 loop:
2818     spin_lock(&_saved_mm_head_lock);
2819     // Remove all saved mm's for this thread group.
2820     curr = _saved_mm_head;
2821     while(curr) {
2822         mm_data = (mm_data_t*)curr;
2823         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
2824            mm_data->tgroup_home_id  == w->tgroup_home_id) {
2825             remove_data_entry_from(curr,&_saved_mm_head);
2826             to_remove = mm_data;
2827             goto found;
2828         }
2829         curr = curr->next;
2830     }
2831 found:
2832     spin_unlock(&_saved_mm_head_lock);
2833
2834     if(to_remove != NULL) {
2835         PSPRINTK("%s: removing a mm from cpu{%d} id{%d}\n",
2836                 __func__,
2837                 w->tgroup_home_cpu,
2838                 w->tgroup_home_id);
2839         
2840         BUG_ON(to_remove->mm == NULL);
2841         mmput(to_remove->mm);
2842         kfree(to_remove);
2843         to_remove = NULL;
2844         goto loop;
2845     }
2846
2847     kfree(work);
2848
2849     PERF_MEASURE_STOP(&perf_process_tgroup_closed_item," ",perf);
2850 }
2851
2852 /**
2853  * @brief Determine if the specified vma can have cow mapings.
2854  * @return 1 = yes, 0 = no.
2855  */
2856 static int is_maybe_cow(struct vm_area_struct* vma) {
2857     if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
2858         // Not a cow vma
2859         return 0;
2860     }
2861
2862     if(!(vma->vm_flags & VM_WRITE)) {
2863         return 0;
2864     }
2865
2866     return 1;
2867 }
2868
2869 /**
2870  * @brief Break the COW page that contains "address", iff that page
2871  * is a COW page.
2872  * @return 1 = handled, 0 = not handled.
2873  * @prerequisite Caller must grab mm->mmap_sem
2874  */
2875 static int break_cow(struct mm_struct *mm, struct vm_area_struct* vma, unsigned long address) {
2876     pgd_t *pgd = NULL;
2877     pud_t *pud = NULL;
2878     pmd_t *pmd = NULL;
2879     pte_t *ptep = NULL;
2880     pte_t pte;
2881     spinlock_t* ptl;
2882
2883     //PSPRINTK("%s: entered\n",__func__);
2884
2885     // if it's not a cow mapping, return.
2886     if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
2887         goto not_handled;
2888     }
2889
2890     // if it's not writable in vm_flags, return.
2891     if(!(vma->vm_flags & VM_WRITE)) {
2892         goto not_handled;
2893     }
2894
2895     pgd = pgd_offset(mm, address);
2896     if(!pgd_present(*pgd)) {
2897         goto not_handled_unlock;
2898     }
2899
2900     pud = pud_offset(pgd,address);
2901     if(!pud_present(*pud)) {
2902         goto not_handled_unlock;
2903     }
2904
2905     pmd = pmd_offset(pud,address);
2906     if(!pmd_present(*pmd)) {
2907         goto not_handled_unlock;
2908     }
2909
2910     ptep = pte_offset_map(pmd,address);
2911     if(!ptep || !pte_present(*ptep) || pte_none(*ptep)) {
2912         pte_unmap(ptep);
2913         goto not_handled_unlock;
2914     }
2915
2916     pte = *ptep;
2917
2918     if(pte_write(pte)) {
2919         goto not_handled_unlock;
2920     }
2921     
2922     // break the cow!
2923     ptl = pte_lockptr(mm,pmd);
2924     PS_SPIN_LOCK(ptl);
2925    
2926     PSPRINTK("%s: proceeding\n",__func__);
2927     do_wp_page(mm,vma,address,ptep,pmd,ptl,pte);
2928
2929
2930     // NOTE:
2931     // Do not call pte_unmap_unlock(ptep,ptl), since do_wp_page does that!
2932     
2933     goto handled;
2934
2935 not_handled_unlock:
2936 not_handled:
2937     return 0;
2938 handled:
2939     return 1;
2940 }
2941
2942 /**
2943  * @brief Process a request made by a remote CPU for a mapping.  This function
2944  * will search for mm's for the specified distributed thread group, and if found,
2945  * will search that mm for entries that contain the address that was asked for.
2946  * Prefetch is implemented in this function, so not only will the page that
2947  * is asked for be communicated, but the entire contiguous range of virtual to
2948  * physical addresses that the specified address lives in will be communicated.
2949  * Other contiguous regions may also be communicated if they exist.  This is
2950  * prefetch.
2951  *
2952  * <MEASURED perf_process_mapping_request>
2953  */
2954 void process_mapping_request(struct work_struct* work)
2955 {
2956     mapping_request_work_t* w = (mapping_request_work_t*) work;
2957     mapping_response_t* response;
2958     data_header_t* data_curr = NULL;
2959     mm_data_t* mm_data = NULL;
2960     
2961     struct task_struct* task = NULL;
2962     struct task_struct* g;
2963     struct vm_area_struct* vma = NULL;
2964     struct mm_struct* mm = NULL;
2965     
2966     unsigned long address = w->address;
2967     unsigned long resolved = 0;
2968     struct mm_walk walk = {
2969         .pte_entry = vm_search_page_walk_pte_entry_callback,
2970         .private = &(resolved)
2971     };
2972     char *plpath = NULL, *lpath = NULL;
2973     int used_saved_mm = 0, found_vma = 1, found_pte = 1; 
2974     int i;
2975
2976 #ifdef CONFIG_POPCORN_PERF    
2977     // for perf 
2978     int perf = PERF_MEASURE_START(&perf_process_mapping_request);
2979 #endif /* CONFIG_POPCORN_PERF */    
2980
2981     PSPRINTK("received mapping request from{%d} address{%lx}, cpu{%d}, id{%d}\n",
2982             w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
2983
2984     // First, search through existing processes
2985     do_each_thread(g,task) {
2986         if((task->tgroup_home_cpu == w->tgroup_home_cpu) &&
2987            (task->tgroup_home_id  == w->tgroup_home_id )) {
2988             PSPRINTK("mapping request found common thread group here\n");
2989             mm = task->mm;
2990
2991             // Take note of the fact that an mm exists on the remote kernel
2992             set_cpu_has_known_tgroup_mm(task, w->from_cpu);
2993
2994             goto task_mm_search_exit;
2995         }
2996     } while_each_thread(g,task);
2997 task_mm_search_exit:
2998
2999     // Failing the process search, look through saved mm's.
3000     if(!mm) {
3001         PS_SPIN_LOCK(&_saved_mm_head_lock);
3002         data_curr = _saved_mm_head;
3003         while(data_curr) {
3004
3005             mm_data = (mm_data_t*)data_curr;
3006             
3007             if((mm_data->tgroup_home_cpu == w->tgroup_home_cpu) &&
3008                (mm_data->tgroup_home_id  == w->tgroup_home_id)) {
3009                 PSPRINTK("%s: Using saved mm to resolve mapping\n",__func__);
3010                 mm = mm_data->mm;
3011                 used_saved_mm = 1;
3012                 break;
3013             }
3014
3015             data_curr = data_curr->next;
3016         } // while
3017         PS_SPIN_UNLOCK(&_saved_mm_head_lock);
3018     }
3019     
3020     response = kmalloc(sizeof(mapping_response_t), GFP_ATOMIC); //TODO convert to alloc_cache
3021     if (!response) {
3022       printk(KERN_ALERT"can not kmalloc mapping_response_t area from{%d} address{%lx} cpu{%d} id{%d}\n",
3023               w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
3024       goto err_work;
3025     }
3026     lpath = kmalloc(POPCORN_MAX_PATH, GFP_ATOMIC); //TODO convert to alloc_cache
3027     if (!lpath) {
3028       printk(KERN_ALERT"can not kmalloc lpath area from{%d} address{%lx} cpu{%d} id{%d}\n",
3029               w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
3030       goto err_response;
3031     }
3032     
3033     // OK, if mm was found, look up the mapping.
3034     if (mm) {
3035
3036         // The purpose of this code block is to determine
3037         // if we need to use a read or write lock, and safely.  
3038         // implement whatever lock type we decided we needed.  We
3039         // prefer to use read locks, since then we can service
3040         // more than one mapping request at the same time.  However,
3041         // if we are going to do any cow break operations, we 
3042         // must lock for write.
3043         int can_be_cow = 0;
3044         int first = 1;
3045 changed_can_be_cow:
3046         if(can_be_cow)
3047             PS_DOWN_WRITE(&mm->mmap_sem);
3048         else 
3049             PS_DOWN_READ(&mm->mmap_sem);
3050         vma = find_vma_checked(mm, address);
3051         if(vma && first) {
3052             first = 0;
3053             if(is_maybe_cow(vma)) {
3054                 can_be_cow = 1;
3055                 PS_UP_READ(&mm->mmap_sem);
3056                 goto changed_can_be_cow;
3057             }
3058         }
3059
3060         walk.mm = mm;
3061         walk_page_range(address & PAGE_MASK, 
3062                 (address & PAGE_MASK) + PAGE_SIZE, &walk);
3063
3064         if (vma && resolved != 0) {
3065             PSPRINTK("mapping found! %lx for vaddr %lx\n",resolved,
3066                     address & PAGE_MASK);
3067             /*
3068              * Find regions of consecutive physical memory
3069              * in this vma, including the faulting address
3070              * if possible.
3071              */
3072             {
3073             // Break all cows in this vma
3074             if (can_be_cow) {
3075                 unsigned long cow_addr;
3076                 for(cow_addr = vma->vm_start; cow_addr < vma->vm_end; cow_addr += PAGE_SIZE) {
3077                     break_cow(mm, vma, cow_addr);
3078                 }
3079                 // We no longer need a write lock after the break_cow process
3080                 // is complete, so downgrade the lock to a read lock.
3081                 downgrade_write(&mm->mmap_sem);
3082             } // if (can_be_cow
3083
3084             // Now grab all the mappings that we can stuff into the response.
3085             if (0 != fill_physical_mapping_array(mm, vma, address,
3086                                                 &(response->mappings[0]),
3087                                                 MAX_MAPPINGS)) {
3088                 // If the fill process fails, clear out all
3089                 // results.  Otherwise, we might trick the
3090                 // receiving cpu into thinking the target
3091                 // mapping was found when it was not.
3092                 for(i = 0; i < MAX_MAPPINGS; i++) {
3093                     response->mappings[i].present = 0;
3094                     response->mappings[i].vaddr = 0;
3095                     response->mappings[i].paddr = 0;
3096                     response->mappings[i].sz = 0;
3097                 }   
3098             } // if (0 != fill_physical_mapping_array
3099             }
3100
3101             response->header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
3102             response->header.prio = PCN_KMSG_PRIO_NORMAL;
3103             response->tgroup_home_cpu = w->tgroup_home_cpu;
3104             response->tgroup_home_id = w->tgroup_home_id;
3105             response->requester_pid = w->requester_pid;
3106             response->address = address;
3107             response->present = 1;
3108             response->vaddr_start = vma->vm_start;
3109             response->vaddr_size = vma->vm_end - vma->vm_start;
3110             response->prot = vma->vm_page_prot;
3111             response->vm_flags = vma->vm_flags;
3112             if(vma->vm_file == NULL) {
3113                 response->path[0] = '\0';
3114             } else {    
3115                 plpath = d_path(&vma->vm_file->f_path,lpath,512);
3116                 strcpy(response->path,plpath);
3117                 response->pgoff = vma->vm_pgoff;
3118             }
3119
3120             // We modified this lock to be read-mode above so now
3121             // we can do a read-unlock instead of a write-unlock
3122             PS_UP_READ(&mm->mmap_sem);
3123        
3124         } else { // (vma && resolved != 0) 
3125
3126             if(can_be_cow)
3127                 PS_UP_WRITE(&mm->mmap_sem);
3128             else
3129                 PS_UP_READ(&mm->mmap_sem);
3130             // Zero out mappings
3131             for(i = 0; i < MAX_MAPPINGS; i++) {
3132                 response->mappings[i].present = 0;
3133                 response->mappings[i].vaddr = 0;
3134                 response->mappings[i].paddr = 0;
3135                 response->mappings[i].sz = 0;
3136             }
3137         } // !(vma && resolved != 0) 
3138     }
3139
3140     // Not found, respond accordingly
3141     if (resolved == 0) {
3142         found_vma = 0;
3143         found_pte = 0;
3144         //PSPRINTK("Mapping not found\n");
3145         response->header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
3146         response->header.prio = PCN_KMSG_PRIO_NORMAL;
3147         response->tgroup_home_cpu = w->tgroup_home_cpu;
3148         response->tgroup_home_id = w->tgroup_home_id;
3149         response->requester_pid = w->requester_pid;
3150         response->address = address;
3151         response->present = 0;
3152         response->vaddr_start = 0;
3153         response->vaddr_size = 0;
3154         response->path[0] = '\0';
3155
3156         // Handle case where vma was present but no pte.
3157         if (vma) {
3158             //PSPRINTK("But vma present\n");
3159             found_vma = 1;
3160             response->present = 1;
3161             response->vaddr_start = vma->vm_start;
3162             response->vaddr_size = vma->vm_end - vma->vm_start;
3163             response->prot = vma->vm_page_prot;
3164             response->vm_flags = vma->vm_flags;
3165              if(vma->vm_file == NULL) {
3166                  response->path[0] = '\0';
3167              } else {    
3168                  plpath = d_path(&vma->vm_file->f_path,lpath,512);
3169                  strcpy(response->path,plpath);
3170                  response->pgoff = vma->vm_pgoff;
3171              }
3172         }
3173     }
3174
3175     // Send response
3176     if(response->present) {
3177         DO_UNTIL_SUCCESS(pcn_kmsg_send_long(w->from_cpu,
3178                             (struct pcn_kmsg_long_message*)(response),
3179                             sizeof(mapping_response_t) - 
3180                             sizeof(struct pcn_kmsg_hdr) -   //
3181                             sizeof(response->path) +         // Chop off the end of the path
3182                             strlen(response->path) + 1));    // variable to save bandwidth.
3183     } else {
3184         // This is an optimization to get rid of the _long send 
3185         // which is a time sink.
3186         nonpresent_mapping_response_t nonpresent_response;
3187         nonpresent_response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE_NONPRESENT;
3188         nonpresent_response.header.prio = PCN_KMSG_PRIO_NORMAL;
3189         nonpresent_response.tgroup_home_cpu = w->tgroup_home_cpu;
3190         nonpresent_response.tgroup_home_id  = w->tgroup_home_id;
3191         nonpresent_response.requester_pid = w->requester_pid;
3192         nonpresent_response.address = w->address;
3193         DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,(struct pcn_kmsg_message*)(&nonpresent_response)));
3194     }
3195
3196     kfree(lpath);
3197 err_response:
3198     kfree(response);
3199 err_work:
3200     kfree(work);
3201
3202 #ifdef CONFIG_POPCORN_PERF    
3203     if(used_saved_mm && found_vma && found_pte) {
3204         PERF_MEASURE_STOP(&perf_process_mapping_request,
3205                 "Saved MM + VMA + PTE",
3206                 perf);
3207     } else if (used_saved_mm && found_vma && !found_pte) {
3208         PERF_MEASURE_STOP(&perf_process_mapping_request,
3209                 "Saved MM + VMA + no PTE",
3210                 perf);
3211     } else if (used_saved_mm && !found_vma) {
3212         PERF_MEASURE_STOP(&perf_process_mapping_request,
3213                 "Saved MM + no VMA",
3214                 perf);
3215     } else if (!used_saved_mm && found_vma && found_pte) {
3216         PERF_MEASURE_STOP(&perf_process_mapping_request,
3217                 "VMA + PTE",
3218                 perf);
3219     } else if (!used_saved_mm && found_vma && !found_pte) {
3220         PERF_MEASURE_STOP(&perf_process_mapping_request,
3221                 "VMA + no PTE",
3222                 perf);
3223     } else if (!used_saved_mm && !found_vma) {
3224         PERF_MEASURE_STOP(&perf_process_mapping_request,
3225                 "no VMA",
3226                 perf);
3227     } else {
3228         PERF_MEASURE_STOP(&perf_process_mapping_request,"ERR",perf);
3229     }
3230 #endif /* CONFIG_POPCORN_PERF */    
3231
3232     return;
3233 }
3234
3235 unsigned long long perf_aa, perf_bb, perf_cc, perf_dd, perf_ee;
3236
3237 /**
3238  * @brief Process notification that a task has exited.  This function
3239  * sets the "return disposition" of the task, then wakes the task.
3240  * In this case, the "return disposition" specifies that the task
3241  * is exiting.  When the task resumes execution, it consults its
3242  * return disposition and acts accordingly - and invokes do_exit.
3243  *
3244  * <MEASURE perf_process_exit_item>
3245  */
3246 void process_exit_item(struct work_struct* work) {
3247     exit_work_t* w = (exit_work_t*) work;
3248     pid_t pid = w->pid;
3249     struct task_struct *task = w->task;
3250
3251     int perf = PERF_MEASURE_START(&perf_process_exit_item);
3252
3253     if(unlikely(!task)) {
3254         printk("%s: ERROR - empty task\n",__func__);
3255         kfree(work);
3256         PERF_MEASURE_STOP(&perf_process_exit_item,"ERROR",perf);
3257         return;
3258     }
3259
3260     if(unlikely(task->pid != pid)) {
3261         printk("%s: ERROR - wrong task picked\n",__func__);
3262         kfree(work);
3263         PERF_MEASURE_STOP(&perf_process_exit_item,"ERROR",perf);
3264         return;
3265     }
3266     
3267     PSPRINTK("%s: process to kill %ld\n", __func__, (long)pid);
3268     PSPRINTK("%s: for_each_process Found task to kill, killing\n", __func__);
3269     PSPRINTK("%s: killing task - is_last_tgroup_member{%d}\n",
3270             __func__,
3271             w->is_last_tgroup_member);
3272
3273     // Now we're executing locally, so update our records
3274     //if(task->t_home_cpu == _cpu && task->t_home_id == task->pid)
3275     //    task->represents_remote = 0;
3276
3277     // Set the return disposition
3278     task->return_disposition = RETURN_DISPOSITION_EXIT;
3279
3280     wake_up_process(task);
3281
3282     kfree(work);
3283
3284     PERF_MEASURE_STOP(&perf_process_exit_item," ",perf);
3285 }
3286
3287 /**
3288  * @brief Process a group exit request.  This function
3289  * issues SIGKILL to all locally executing members of the specified
3290  * distributed thread group.  Only tasks that are actively
3291  * executing on this CPU will receive the SIGKILL.  Shadow tasks
3292  * will not be sent SIGKILL.  Group exit requests are sent to
3293  * all CPUs, so for shadow tasks, another CPU will issue the
3294  * SIGKILL.  When that occurs, the normal exit process will be
3295  * initiated for that task, and eventually, all of its shadow
3296  * tasks will be killed.
3297  */
3298 void process_group_exit_item(struct work_struct* work) {
3299     group_exit_work_t* w = (group_exit_work_t*) work;
3300     struct task_struct *task = NULL;
3301     struct task_struct *g;
3302     unsigned long flags;
3303
3304     //int perf = PERF_MEASURE_START(&perf_process_group_exit_item);
3305     PSPRINTK("%s: entered\n",__func__);
3306     PSPRINTK("exit group target id{%d}, cpu{%d}\n",
3307             w->tgroup_home_id, w->tgroup_home_cpu);
3308
3309     do_each_thread(g,task) {
3310         if(task->tgroup_home_id == w->tgroup_home_id &&
3311            task->tgroup_home_cpu == w->tgroup_home_cpu) {
3312             
3313             if (!task->represents_remote) { //similar to zap_other_threads
3314                                 exit_robust_list(task);
3315                                 task->robust_list = NULL;
3316                                 // active, send sigkill
3317                                 lock_task_sighand(task, &flags);
3318
3319                                 task_clear_jobctl_pending(task, JOBCTL_PENDING_MASK);
3320                                 sigaddset(&task->pending.signal, SIGKILL);
3321                                 signal_wake_up(task, 1);
3322                                 clear_ti_thread_flag(task, _TIF_USER_RETURN_NOTIFY);
3323
3324                                 unlock_task_sighand(task, &flags);
3325
3326                         }
3327
3328             // If it is a shadow task, it will eventually
3329             // get killed when its corresponding active task
3330             // is killed.
3331
3332         }
3333     } while_each_thread(g,task);
3334     
3335     kfree(work);
3336
3337     PSPRINTK("%s: exiting\n",__func__);
3338     //PERF_MEASURE_STOP(&perf_process_group_exit_item," ",perf);
3339
3340 }
3341
3342
3343 /**
3344  * @brief Process request to unmap a region of memory from a distributed
3345  * thread group.  Look for local thread group members and carry out the
3346  * requested action.
3347  *
3348  * <MEASURE perf_process_munmap_request>
3349  */
3350 void process_munmap_request(struct work_struct* work) {
3351     munmap_request_work_t* w = (munmap_request_work_t*)work;
3352     munmap_response_t response;
3353     struct task_struct *task, *g;
3354     data_header_t *curr = NULL;
3355     mm_data_t* mm_data = NULL;
3356     mm_data_t* to_munmap = NULL;
3357     struct mm_struct* mm_to_munmap = NULL;
3358
3359     int perf = PERF_MEASURE_START(&perf_process_munmap_request);
3360
3361     PSPRINTK("%s: entered\n",__func__);
3362
3363     // munmap the specified region in the specified thread group
3364     read_lock(&tasklist_lock);
3365     do_each_thread(g,task) {
3366
3367         // Look for the thread group
3368         if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
3369            task->tgroup_home_id  == w->tgroup_home_id &&
3370            !(task->flags & PF_EXITING)) {
3371
3372             // Take note of the fact that an mm exists on the remote kernel
3373             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
3374             
3375             if (task->mm) {
3376                 mm_to_munmap = task->mm;
3377             }
3378             else
3379                 printk("%s: pirla\n", __func__);
3380
3381             goto done; 
3382         }
3383     } while_each_thread(g,task);
3384 done:
3385     read_unlock(&tasklist_lock);
3386
3387     if(mm_to_munmap) {
3388         PS_DOWN_WRITE(&mm_to_munmap->mmap_sem);
3389         current->enable_distributed_munmap = 0;
3390         do_munmap(mm_to_munmap, w->vaddr_start, w->vaddr_size);
3391         current->enable_distributed_munmap = 1;
3392         PS_UP_WRITE(&mm_to_munmap->mmap_sem);
3393     }
3394     else
3395         printk("%s: unexpected error task %p task->mm %p\n", 
3396                  __func__, task, (task ? task->mm : 0) );
3397
3398     // munmap the specified region in any saved mm's as well.
3399     // This keeps old mappings saved in the mm of dead thread
3400     // group members from being resolved accidentally after
3401     // being munmap()ped, as that would cause security/coherency
3402     // problems.
3403     PS_SPIN_LOCK(&_saved_mm_head_lock);
3404     curr = _saved_mm_head;
3405     while(curr) {
3406         mm_data = (mm_data_t*)curr;
3407         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
3408            mm_data->tgroup_home_id  == w->tgroup_home_id) {
3409            
3410             to_munmap = mm_data;
3411             goto found;
3412
3413         }
3414         curr = curr->next;
3415     }
3416 found:
3417     PS_SPIN_UNLOCK(&_saved_mm_head_lock);
3418
3419     if (to_munmap && to_munmap->mm) {
3420         PS_DOWN_WRITE(&to_munmap->mm->mmap_sem);
3421         current->enable_distributed_munmap = 0;
3422         do_munmap(to_munmap->mm, w->vaddr_start, w->vaddr_size);
3423         current->enable_distributed_munmap = 1;
3424         if (to_munmap && to_munmap->mm)
3425                 PS_UP_WRITE(&to_munmap->mm->mmap_sem);
3426         else
3427                 printk(KERN_ALERT"%s: ERROR2: to_munmap %p mm %p\n",
3428                                  __func__, to_munmap, to_munmap?to_munmap->mm:0);
3429     }
3430     else if (to_munmap) // It is OK for to_munmap to be null, but not to_munmap->mm
3431         printk(KERN_ALERT"%s: ERROR1: to_munmap %p mm %p\n",
3432                          __func__, to_munmap, to_munmap?to_munmap->mm:0);
3433
3434     // Construct response
3435     response.header.type = PCN_KMSG_TYPE_PROC_SRV_MUNMAP_RESPONSE;
3436     response.header.prio = PCN_KMSG_PRIO_NORMAL;
3437     response.tgroup_home_cpu = w->tgroup_home_cpu;
3438     response.tgroup_home_id = w->tgroup_home_id;
3439     response.requester_pid = w->requester_pid;
3440     response.vaddr_start = w->vaddr_start;
3441     response.vaddr_size = w->vaddr_size;
3442     
3443     // Send response
3444     DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,
3445                         (struct pcn_kmsg_message*)(&response)));
3446
3447     kfree(work);
3448     
3449     PERF_MEASURE_STOP(&perf_process_munmap_request," ",perf);
3450 }
3451
3452 /**
3453  * @brief Process request to change protection of a region of memory in
3454  * a distributed thread group.  Look for local thread group members and
3455  * carry out the requested action.
3456  *
3457  * <MEASRURE perf_process_mprotect_item>
3458  */
3459 void process_mprotect_item(struct work_struct* work) {
3460     mprotect_response_t response;
3461     mprotect_work_t* w = (mprotect_work_t*)work;
3462     int tgroup_home_cpu = w->tgroup_home_cpu;
3463     int tgroup_home_id  = w->tgroup_home_id;
3464     unsigned long start = w->start;
3465     size_t len = w->len;
3466     struct task_struct* task, *g;
3467     data_header_t* curr = NULL;
3468     mm_data_t* mm_data = NULL;
3469     mm_data_t* to_munmap = NULL;
3470     struct mm_struct *mm_to_munmap = NULL;
3471
3472     int perf = PERF_MEASURE_START(&perf_process_mprotect_item);
3473     
3474     // Find the task
3475     read_lock(&tasklist_lock);
3476     do_each_thread(g,task) {
3477
3478         // Look for the thread group
3479         if (task->tgroup_home_cpu == tgroup_home_cpu &&
3480             task->tgroup_home_id  == tgroup_home_id &&
3481             !(task->flags & PF_EXITING)) {
3482
3483             // Take note of the fact that an mm exists on the remote kernel
3484             set_cpu_has_known_tgroup_mm(task,w->from_cpu);