default value for vdso must be zero, we are not supporting vdso at this timewq
[projects/modsched/linux.git] / kernel / process_server.c
1  /**
2  * Implements task migration and maintains coherent 
3  * address spaces across CPU cores.
4  *
5  * David G. Katz
6  */
7
8 #include <linux/mcomm.h> // IPC
9 #include <linux/kthread.h>
10 #include <linux/export.h>
11 #include <linux/delay.h>
12 #include <linux/smp.h>
13 #include <linux/sched.h>
14 #include <linux/threads.h> // NR_CPUS
15 #include <linux/kmod.h>
16 #include <linux/path.h>
17 #include <linux/mount.h>
18 #include <linux/fs.h>
19 #include <linux/fs_struct.h>
20 #include <linux/file.h>
21 #include <linux/fdtable.h>
22 #include <linux/slab.h>
23 #include <linux/process_server.h>
24 #include <linux/mm.h>
25 #include <linux/io.h> // ioremap
26 #include <linux/mman.h> // MAP_ANONYMOUS
27 #include <linux/pcn_kmsg.h> // Messaging
28 #include <linux/pcn_perf.h> // performance measurement
29 #include <linux/string.h>
30
31 #include <linux/popcorn.h>
32
33 #include <asm/pgtable.h>
34 #include <asm/atomic.h>
35 #include <asm/tlbflush.h>
36 #include <asm/cacheflush.h>
37 #include <asm/uaccess.h> // USER_DS
38 #include <asm/prctl.h> // prctl
39 #include <asm/proto.h> // do_arch_prctl
40 #include <asm/msr.h> // wrmsr_safe
41 #include <asm/mmu_context.h>
42 #include <asm/processor.h> // load_cr3
43
44 unsigned long get_percpu_old_rsp(void);
45
46 /**
47  * General purpose configuration
48  */
49
50 // Flag indiciating whether or not to migrate the entire virtual 
51 // memory space when a migration occurs.  
52 #define COPY_WHOLE_VM_WITH_MIGRATION 1
53
54 // Flag indicating whether or not to migrate file-backed executable
55 // pages when a fault occurs accessing executable memory.  When this
56 // flag is 1, those pages will be migrated.  When it is 0, the local
57 // file-system will be consulted instead.
58 #define MIGRATE_EXECUTABLE_PAGES_ON_DEMAND 1
59
60 // The maximum number of contiguously physical mapped regions to 
61 // migrate in response to a mapping query.
62 #define MAX_MAPPINGS 1
63
64 /**
65  * Use the preprocessor to turn off printk.
66  */
67 #define PROCESS_SERVER_VERBOSE 0
68 #if PROCESS_SERVER_VERBOSE
69 #define PSPRINTK(...) printk(__VA_ARGS__)
70 #else
71 #define PSPRINTK(...) ;
72 #endif
73
74 #define PROCESS_SERVER_INSTRUMENT_LOCK 0
75 #if PROCESS_SERVER_VERBOSE && PROCESS_SERVER_INSTRUMENT_LOCK
76 #define PS_SPIN_LOCK(x) PSPRINTK("Acquiring spin lock in %s at line %d\n",__func__,__LINE__); \
77                        spin_lock(x); \
78                        PSPRINTK("Done acquiring spin lock in %s at line %d\n",__func__,__LINE__)
79 #define PS_SPIN_UNLOCK(x) PSPRINTK("Releasing spin lock in %s at line %d\n",__func__,__LINE__); \
80                           spin_unlock(x); \
81                           PSPRINTK("Done releasing spin lock in %s at line %d\n",__func__,__LINE__)
82 #define PS_DOWN_READ(x) PSPRINTK("Acquiring read lock in %s at line %d\n",__func__,__LINE__); \
83                         down_read(x); \
84                         PSPRINTK("Done acquiring read lock in %s at line %d\n",__func__,__LINE__)
85 #define PS_UP_READ(x) PSPRINTK("Releasing read lock in %s at line %d\n",__func__,__LINE__); \
86                       up_read(x); \
87                       PSPRINTK("Done releasing read lock in %s at line %d\n",__func__,__LINE__)
88 #define PS_DOWN_WRITE(x) PSPRINTK("Acquiring write lock in %s at line %d\n",__func__,__LINE__); \
89                          down_write(x); \
90                          PSPRINTK("Done acquiring write lock in %s at line %d\n",__func__,__LINE__)
91 #define PS_UP_WRITE(x) PSPRINTK("Releasing read write in %s at line %d\n",__func__,__LINE__); \
92                        up_write(x); \
93                        PSPRINTK("Done releasing write lock in %s at line %d\n",__func__,__LINE__)
94
95
96 #else
97 #define PS_SPIN_LOCK(x) spin_lock(x)
98 #define PS_SPIN_UNLOCK(x) spin_unlock(x)
99 #define PS_DOWN_READ(x) down_read(x)
100 #define PS_UP_READ(x) up_read(x)
101 #define PS_DOWN_WRITE(x) down_write(x)
102 #define PS_UP_WRITE(x) up_write(x)
103 #endif
104
105 /**
106  * Library data type definitions
107  */
108 #define PROCESS_SERVER_DATA_TYPE_TEST 0
109 #define PROCESS_SERVER_VMA_DATA_TYPE 1
110 #define PROCESS_SERVER_PTE_DATA_TYPE 2
111 #define PROCESS_SERVER_CLONE_DATA_TYPE 3
112 #define PROCESS_SERVER_MAPPING_REQUEST_DATA_TYPE 4
113 #define PROCESS_SERVER_MUNMAP_REQUEST_DATA_TYPE 5
114 #define PROCESS_SERVER_MM_DATA_TYPE 6
115 #define PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE 7
116 #define PROCESS_SERVER_MPROTECT_DATA_TYPE 8
117
118 /**
119  * Useful macros
120  */
121 #define DO_UNTIL_SUCCESS(x) while(x != 0){}
122
123 /**
124  * Perf
125  */
126 #ifdef CONFIG_POPCORN_PERF
127 #define PERF_INIT() perf_init()
128 #define PERF_MEASURE_START(x) perf_measure_start(x)
129 #define PERF_MEASURE_STOP(x,y,z)  perf_measure_stop(x,y,z)
130
131 pcn_perf_context_t perf_count_remote_thread_members;
132 pcn_perf_context_t perf_process_back_migration;
133 pcn_perf_context_t perf_process_mapping_request;
134 pcn_perf_context_t perf_process_mapping_request_search_active_mm;
135 pcn_perf_context_t perf_process_mapping_request_search_saved_mm;
136 pcn_perf_context_t perf_process_mapping_request_do_lookup;
137 pcn_perf_context_t perf_process_mapping_request_transmit;
138 pcn_perf_context_t perf_process_mapping_response;
139 pcn_perf_context_t perf_process_tgroup_closed_item;
140 pcn_perf_context_t perf_process_exit_item;
141 pcn_perf_context_t perf_process_mprotect_item;
142 pcn_perf_context_t perf_process_munmap_request;
143 pcn_perf_context_t perf_process_munmap_response;
144 pcn_perf_context_t perf_process_server_try_handle_mm_fault;
145 pcn_perf_context_t perf_process_server_import_address_space;
146 pcn_perf_context_t perf_process_server_do_exit;
147 pcn_perf_context_t perf_process_server_do_munmap;
148 pcn_perf_context_t perf_process_server_do_migration;
149 pcn_perf_context_t perf_process_server_do_mprotect;
150 pcn_perf_context_t perf_process_server_notify_delegated_subprocess_starting;
151 pcn_perf_context_t perf_handle_thread_group_exit_notification;
152 pcn_perf_context_t perf_handle_remote_thread_count_response;
153 pcn_perf_context_t perf_handle_remote_thread_count_request;
154 pcn_perf_context_t perf_handle_munmap_response;
155 pcn_perf_context_t perf_handle_munmap_request;
156 pcn_perf_context_t perf_handle_mapping_response;
157 pcn_perf_context_t perf_handle_mapping_request;
158 pcn_perf_context_t perf_handle_pte_transfer;
159 pcn_perf_context_t perf_handle_vma_transfer;
160 pcn_perf_context_t perf_handle_exiting_process_notification;
161 pcn_perf_context_t perf_handle_process_pairing_request;
162 pcn_perf_context_t perf_handle_clone_request;
163 pcn_perf_context_t perf_handle_mprotect_response;
164 pcn_perf_context_t perf_handle_mprotect_request;
165
166 /**
167  *
168  */
169 static void perf_init(void) {
170    perf_init_context(&perf_count_remote_thread_members,
171            "count_remote_thread_members");
172    perf_init_context(&perf_process_back_migration,
173            "process_back_migration");
174    perf_init_context(&perf_process_mapping_request,
175            "process_mapping_request");
176    perf_init_context(&perf_process_mapping_request_search_active_mm,
177            "process_mapping_request_search_active_mm");
178    perf_init_context(&perf_process_mapping_request_search_saved_mm,
179            "process_mapping_request_search_saved_mm");
180    perf_init_context(&perf_process_mapping_request_do_lookup,
181            "process_mapping_request_do_lookup");
182    perf_init_context(&perf_process_mapping_request_transmit,
183            "process_mapping_request_transmit");
184    perf_init_context(&perf_process_mapping_response,
185            "process_mapping_response");
186    perf_init_context(&perf_process_tgroup_closed_item,
187            "process_tgroup_closed_item");
188    perf_init_context(&perf_process_exit_item,
189            "process_exit_item");
190    perf_init_context(&perf_process_mprotect_item,
191            "process_mprotect_item");
192    perf_init_context(&perf_process_munmap_request,
193            "process_munmap_request");
194    perf_init_context(&perf_process_munmap_response,
195            "process_munmap_response");
196    perf_init_context(&perf_process_server_try_handle_mm_fault,
197            "process_server_try_handle_mm_fault");
198    perf_init_context(&perf_process_server_import_address_space,
199            "process_server_import_address_space");
200    perf_init_context(&perf_process_server_do_exit,
201            "process_server_do_exit");
202    perf_init_context(&perf_process_server_do_munmap,
203            "process_server_do_munmap");
204    perf_init_context(&perf_process_server_do_migration,
205            "process_server_do_migration");
206    perf_init_context(&perf_process_server_do_mprotect,
207            "process_server_do_mprotect");
208    perf_init_context(&perf_process_server_notify_delegated_subprocess_starting,
209            "process_server_notify_delegated_subprocess_starting");
210    perf_init_context(&perf_handle_thread_group_exit_notification,
211            "handle_thread_group_exit_notification");
212    perf_init_context(&perf_handle_remote_thread_count_response,
213            "handle_remote_thread_count_response");
214    perf_init_context(&perf_handle_remote_thread_count_request,
215            "handle_remote_thread_count_request");
216    perf_init_context(&perf_handle_munmap_response,
217            "handle_munmap_response");
218    perf_init_context(&perf_handle_munmap_request,
219            "handle_munmap_request");
220    perf_init_context(&perf_handle_mapping_response,
221            "handle_mapping_response");
222    perf_init_context(&perf_handle_mapping_request,
223            "handle_mapping_request");
224    perf_init_context(&perf_handle_pte_transfer,
225            "handle_pte_transfer");
226    perf_init_context(&perf_handle_vma_transfer,
227            "handle_vma_transfer");
228    perf_init_context(&perf_handle_exiting_process_notification,
229            "handle_exiting_process_notification");
230    perf_init_context(&perf_handle_process_pairing_request,
231            "handle_process_pairing_request");
232    perf_init_context(&perf_handle_clone_request,
233            "handle_clone_request");
234    perf_init_context(&perf_handle_mprotect_request,
235            "handle_mprotect_request");
236    perf_init_context(&perf_handle_mprotect_response,
237            "handle_mprotect_resonse");
238
239 }
240 #else /* CONFIG_POPCORN_PERF */
241 #define PERF_INIT() 
242 #define PERF_MEASURE_START(x) -1
243 #define PERF_MEASURE_STOP(x, y, z)
244 #endif /* !CONFIG_POPCORN_PERF */
245
246 /**
247  * Library
248  */
249
250 #define POPCORN_MAX_PATH 512
251
252 /**
253  * Some piping for linking data entries
254  * and identifying data entry types.
255  */
256 typedef struct _data_header {
257     struct _data_header* next;
258     struct _data_header* prev;
259     int data_type;
260 } data_header_t;
261
262 /**
263  * Hold data about a pte to vma mapping.
264  */
265 typedef struct _pte_data {
266     data_header_t header;
267     int vma_id;
268     int clone_request_id;
269     int cpu;
270     unsigned long vaddr_start;
271     unsigned long paddr_start;
272     size_t sz;
273 } pte_data_t;
274
275 /**
276  * Hold data about a vma to process
277  * mapping.
278  */
279 typedef struct _vma_data {
280     data_header_t header;
281     spinlock_t lock;
282     unsigned long start;
283     unsigned long end;
284     int clone_request_id;
285     int cpu;
286     unsigned long flags;
287     int vma_id;
288     pgprot_t prot;
289     unsigned long pgoff;
290     pte_data_t* pte_list;
291     int mmapping_in_progress;
292     char path[256];
293 } vma_data_t;
294
295 typedef struct _contiguous_physical_mapping {
296     unsigned char present;
297     unsigned long vaddr;
298     unsigned long paddr;
299     size_t sz;
300 } contiguous_physical_mapping_t;
301
302 /**
303  *
304  */
305 typedef struct _clone_data {
306     data_header_t header;
307     spinlock_t lock;
308     int clone_request_id;
309     int requesting_cpu;
310     char exe_path[512];
311     unsigned long clone_flags;
312     unsigned long stack_start;
313     unsigned long stack_ptr;
314     unsigned long env_start;
315     unsigned long env_end;
316     unsigned long arg_start;
317     unsigned long arg_end;
318     unsigned long heap_start;
319     unsigned long heap_end;
320     unsigned long data_start;
321     unsigned long data_end;
322     struct pt_regs regs;
323     int placeholder_pid;
324     int placeholder_tgid;
325     int placeholder_cpu;
326     unsigned long thread_fs;
327     unsigned long thread_gs;
328     unsigned long thread_sp0;
329     unsigned long thread_sp;
330     unsigned long thread_usersp;
331     unsigned short thread_es;
332     unsigned short thread_ds;
333     unsigned short thread_fsindex;
334     unsigned short thread_gsindex;
335     int tgroup_home_cpu;
336     int tgroup_home_id;
337     int t_home_cpu;
338     int t_home_id;
339     int prio, static_prio, normal_prio; //from sched.c
340         unsigned int rt_priority; //from sched.c
341         int sched_class; //from sched.c but here we are using SCHED_NORMAL, SCHED_FIFO, etc.
342     unsigned long previous_cpus;
343     vma_data_t* vma_list;
344     vma_data_t* pending_vma_list;
345 } clone_data_t;
346
347 /**
348  * 
349  */
350 typedef struct _mapping_request_data {
351     data_header_t header;
352     int tgroup_home_cpu;
353     int tgroup_home_id;
354     int requester_pid;
355     unsigned long address;
356     unsigned long vaddr_start;
357     unsigned long vaddr_size;
358     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
359     pgprot_t prot;
360     unsigned long vm_flags;
361     unsigned char present;
362     unsigned char complete;
363     unsigned char from_saved_mm;
364     int responses;
365     int expected_responses;
366     unsigned long pgoff;
367     spinlock_t lock;
368     char path[512];
369 } mapping_request_data_t;
370
371 /**
372  *
373  */
374 typedef struct _munmap_request_data {
375     data_header_t header;
376     int tgroup_home_cpu;
377     int tgroup_home_id;
378     int requester_pid;
379     unsigned long vaddr_start;
380     unsigned long vaddr_size;
381     int responses;
382     int expected_responses;
383     spinlock_t lock;
384 } munmap_request_data_t;
385
386 /**
387  *
388  */
389 typedef struct _remote_thread_count_request_data {
390     data_header_t header;
391     int tgroup_home_cpu;
392     int tgroup_home_id;
393     int requester_pid;
394     int responses;
395     int expected_responses;
396     int count;
397     spinlock_t lock;
398 } remote_thread_count_request_data_t;
399
400 /**
401  *
402  */
403 typedef struct _mm_data {
404     data_header_t header;
405     int tgroup_home_cpu;
406     int tgroup_home_id;
407     struct mm_struct* mm;
408 } mm_data_t;
409
410 typedef struct _mprotect_data {
411     data_header_t header;
412     int tgroup_home_cpu;
413     int tgroup_home_id;
414     int requester_pid;
415     unsigned long start;
416     int responses;
417     int expected_responses;
418     spinlock_t lock;
419 } mprotect_data_t;
420
421 /**
422  * This message is sent to a remote cpu in order to 
423  * ask it to spin up a process on behalf of the
424  * requesting cpu.  Some of these fields may go
425  * away in the near future.
426  */
427 typedef struct _clone_request {
428     struct pcn_kmsg_hdr header;
429     int clone_request_id;
430     unsigned long clone_flags;
431     unsigned long stack_start;
432     unsigned long stack_ptr;
433     unsigned long env_start;
434     unsigned long env_end;
435     unsigned long arg_start;
436     unsigned long arg_end;
437     unsigned long heap_start;
438     unsigned long heap_end;
439     unsigned long data_start;
440     unsigned long data_end;
441     struct pt_regs regs;
442     char exe_path[512];
443     int placeholder_pid;
444     int placeholder_tgid;
445     unsigned long thread_fs;
446     unsigned long thread_gs;
447     unsigned long thread_sp0;
448     unsigned long thread_sp;
449     unsigned long thread_usersp;
450     unsigned short thread_es;
451     unsigned short thread_ds;
452     unsigned short thread_fsindex;
453     unsigned short thread_gsindex;
454     int tgroup_home_cpu;
455     int tgroup_home_id;
456     int t_home_cpu;
457     int t_home_id;
458     int prio, static_prio, normal_prio; //from sched.c
459         unsigned int rt_priority; //from sched.c
460         int sched_class; //from sched.c but here we are using SCHED_NORMAL, SCHED_FIFO, etc.
461     unsigned long previous_cpus;
462 } clone_request_t;
463
464 /**
465  * This message is sent in response to a clone request.
466  * Its purpose is to notify the requesting cpu that
467  * the specified pid is executing on behalf of the
468  * requesting cpu.
469  */
470 typedef struct _create_process_pairing {
471     struct pcn_kmsg_hdr header;
472     int your_pid; // PID of cpu receiving this pairing request
473     int my_pid;   // PID of cpu transmitting this pairing request
474 } create_process_pairing_t;
475
476 /**
477  * This message informs the remote cpu of delegated
478  * process death.  This occurs whether the process
479  * is a placeholder or a delegate locally.
480  */
481 struct _exiting_process {
482     struct pcn_kmsg_hdr header;
483     int t_home_cpu;             // 4
484     int t_home_id;              // 4
485     int my_pid;                 // 4
486     int is_last_tgroup_member;  // 4+
487                                 // ---
488                                 // 16 -> 44 bytes of padding needed
489     char pad[44];
490 } __attribute__((packed)) __attribute__((aligned(64)));  
491 typedef struct _exiting_process exiting_process_t;
492
493 /**
494  *
495  */
496 struct _exiting_group {
497     struct pcn_kmsg_hdr header;
498     int tgroup_home_cpu;        // 4
499     int tgroup_home_id;         // 4
500                                 // ---
501                                 // 8 -> 52 bytes of padding needed
502     char pad[52];
503 } __attribute__((packed)) __attribute__((aligned(64)));
504 typedef struct _exiting_group exiting_group_t;
505
506 /**
507  * Inform remote cpu of a vma to process mapping.
508  */
509 typedef struct _vma_transfer {
510     struct pcn_kmsg_hdr header;
511     int vma_id;
512     int clone_request_id;
513     unsigned long start;
514     unsigned long end;
515     pgprot_t prot;
516     unsigned long flags;
517     unsigned long pgoff;
518     char path[256];
519 } vma_transfer_t;
520
521 /**
522  * Inform remote cpu of a pte to vma mapping.
523  */
524 struct _pte_transfer {
525     struct pcn_kmsg_hdr header;
526     int vma_id;                  //  4
527     int clone_request_id;        //  4
528     unsigned long vaddr_start;   //  8
529     unsigned long paddr_start;   //  8
530     size_t sz;                   //  4 +
531                                  //  ---
532                                  //  28 -> 32 bytes of padding needed
533     char pad[32];
534 } __attribute__((packed)) __attribute__((aligned(64)));
535
536 typedef struct _pte_transfer pte_transfer_t;
537
538 /**
539  *
540  */
541 struct _mapping_request {
542     struct pcn_kmsg_hdr header;
543     int tgroup_home_cpu;        // 4
544     int tgroup_home_id;         // 4
545     int requester_pid;          // 4
546     unsigned long address;      // 8
547                                 // ---
548                                 // 20 -> 40 bytes of padding needed
549     char pad[40];
550
551 } __attribute__((packed)) __attribute__((aligned(64)));
552
553 typedef struct _mapping_request mapping_request_t;
554
555 /*
556  * type = PCN_KMSG_TYPE_PROC_SRV_THREAD_GROUP_EXITED_NOTIFICATION
557  */
558 struct _thread_group_exited_notification {
559     struct pcn_kmsg_hdr header;
560     int tgroup_home_cpu;        // 4
561     int tgroup_home_id;         // 4
562                                 // ---
563                                 // 8 -> 52 bytes of padding needed
564     char pad[52];
565 } __attribute__((packed)) __attribute__((aligned(64)));
566 typedef struct _thread_group_exited_notification thread_group_exited_notification_t;
567
568
569 /**
570  *
571  */
572 struct _mapping_response {
573     struct pcn_kmsg_hdr header;
574     int tgroup_home_cpu;        
575     int tgroup_home_id; 
576     int requester_pid;
577     unsigned char present;      
578     unsigned char from_saved_mm;
579     unsigned long address;      
580     unsigned long vaddr_start;
581     unsigned long vaddr_size;
582     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
583     pgprot_t prot;              
584     unsigned long vm_flags;     
585     unsigned long pgoff;
586     char path[512]; // save to last so we can cut
587                     // off data when possible.
588 };
589 typedef struct _mapping_response mapping_response_t;
590
591 /**
592  * This is a hack to eliminate the overhead of sending
593  * an entire mapping_response_t when there is no mapping.
594  * The overhead is due to the size of the message, which
595  * requires the _long pcn_kmsg variant to be used.
596  */
597 struct _nonpresent_mapping_response {
598     struct pcn_kmsg_hdr header;
599     int tgroup_home_cpu;        // 4
600     int tgroup_home_id;         // 4
601     int requester_pid;            // 4
602     unsigned long address;      // 8
603                                 // ---
604                                 // 20 -> 40 bytes of padding needed
605     char pad[40];
606
607 } __attribute__((packed)) __attribute__((aligned(64)));
608 typedef struct _nonpresent_mapping_response nonpresent_mapping_response_t;
609
610 /**
611  *
612  */
613 struct _munmap_request {
614     struct pcn_kmsg_hdr header;
615     int tgroup_home_cpu;         // 4
616     int tgroup_home_id;          // 4
617     int requester_pid;           // 4
618     unsigned long vaddr_start;   // 8
619     unsigned long vaddr_size;    // 8
620                                  // ---
621                                  // 28 -> 32 bytes of padding needed
622     char pad[32];
623 } __attribute__((packed)) __attribute__((aligned(64)));
624 typedef struct _munmap_request munmap_request_t;
625
626 /**
627  *
628  */
629 struct _munmap_response {
630     struct pcn_kmsg_hdr header;
631     int tgroup_home_cpu;        // 4
632     int tgroup_home_id;         // 4
633     int requester_pid;          // 4
634     unsigned long vaddr_start;  // 8
635     unsigned long vaddr_size;   // 8+
636                                 // ---
637                                 // 28 -> 32 bytes of padding needed
638     char pad[32];
639 } __attribute__((packed)) __attribute__((aligned(64)));
640 typedef struct _munmap_response munmap_response_t;
641
642 /**
643  *
644  */
645 struct _remote_thread_count_request {
646     struct pcn_kmsg_hdr header;
647     int tgroup_home_cpu;        // 4
648     int tgroup_home_id;         // 4
649     int requester_pid;          // 4
650                                 // ---
651                                 // 12 -> 48 bytes of padding needed
652     char pad[48];
653 } __attribute__((packed)) __attribute__((aligned(64)));
654 typedef struct _remote_thread_count_request remote_thread_count_request_t;
655
656 /**
657  *
658  */
659 struct _remote_thread_count_response {
660     struct pcn_kmsg_hdr header;
661     int tgroup_home_cpu;        // 4
662     int tgroup_home_id;         // 4
663     int requester_pid;        // 4
664     int count;                  // 4
665                                 // ---
666                                 // 16 -> 44 bytes of padding needed
667     char pad[44];
668 } __attribute__((packed)) __attribute__((aligned(64)));
669 typedef struct _remote_thread_count_response remote_thread_count_response_t;
670
671 /**
672  *
673  */
674 struct _mprotect_request {
675     struct pcn_kmsg_hdr header; 
676     int tgroup_home_cpu;        // 4
677     int tgroup_home_id;         // 4
678     int requester_pid;          // 4
679     unsigned long start;        // 8
680     size_t len;                 // 4
681     unsigned long prot;         // 8
682                                 // ---
683                                 // 32 -> 28 bytes of padding needed
684     char pad[28];
685 } __attribute__((packed)) __attribute__((aligned(64)));
686 typedef struct _mprotect_request mprotect_request_t;
687
688 /**
689  *
690  */
691 struct _mprotect_response {
692     struct pcn_kmsg_hdr header;
693     int tgroup_home_cpu;        // 4
694     int tgroup_home_id;         // 4
695     int requester_pid;          // 4
696     unsigned long start;        // 8
697                                 // ---
698                                 // 20 -> 40 bytes of padding needed
699     char pad[40];
700 } __attribute__((packed)) __attribute__((aligned(64)));
701 typedef struct _mprotect_response mprotect_response_t;
702
703 /**
704  *
705  */
706 typedef struct _back_migration {
707     struct pcn_kmsg_hdr header;
708     int tgroup_home_cpu;
709     int tgroup_home_id;
710     int t_home_cpu;
711     int t_home_id;
712     unsigned long previous_cpus;
713     struct pt_regs regs;
714     unsigned long thread_fs;
715     unsigned long thread_gs;
716     unsigned long thread_usersp;
717     unsigned short thread_es;
718     unsigned short thread_ds;
719     unsigned short thread_fsindex;
720     unsigned short thread_gsindex;
721 } back_migration_t;
722
723 /**
724  *
725  */
726 typedef struct _deconstruction_data {
727     int clone_request_id;
728     int vma_id;
729     int dst_cpu;
730 } deconstruction_data_t;
731
732 /**
733  *
734  */
735 typedef struct {
736     struct work_struct work;
737     struct task_struct *task;
738     pid_t pid;
739     int t_home_cpu;
740     int t_home_id;
741     int is_last_tgroup_member;
742     struct pt_regs regs;
743     unsigned long thread_fs;
744     unsigned long thread_gs;
745     unsigned long thread_sp0;
746     unsigned long thread_sp;
747     unsigned long thread_usersp;
748     unsigned short thread_es;
749     unsigned short thread_ds;
750     unsigned short thread_fsindex;
751     unsigned short thread_gsindex;
752 } exit_work_t;
753
754 /**
755  *
756  */
757 typedef struct {
758     struct work_struct work;
759     int tgroup_home_cpu;
760     int tgroup_home_id;
761 } group_exit_work_t;
762
763 /**
764  *
765  */
766 typedef struct {
767     struct work_struct work;
768     int tgroup_home_cpu;
769     int tgroup_home_id;
770     int requester_pid;
771     unsigned long address;
772     int from_cpu;
773 } mapping_request_work_t;
774
775 /**
776  *
777  */
778 typedef struct {
779     struct work_struct work;
780     int tgroup_home_cpu;
781     int tgroup_home_id;
782     int requester_pid;
783     unsigned char from_saved_mm;
784     unsigned long address;      
785     unsigned char present;      
786     unsigned long vaddr_mapping;
787     unsigned long vaddr_start;
788     unsigned long vaddr_size;
789     unsigned long paddr_mapping;
790     size_t paddr_mapping_sz;
791     pgprot_t prot;              
792     unsigned long vm_flags;     
793     char path[512];
794     unsigned long pgoff;
795     int from_cpu;
796 } mapping_response_work_t;
797
798 /**
799  *
800  */
801 typedef struct {
802     struct work_struct work;
803     int tgroup_home_cpu;
804     int tgroup_home_id;
805     int requester_pid;
806     unsigned long address;
807     int from_cpu;
808 } nonpresent_mapping_response_work_t;
809
810 /**
811  *
812  */
813 typedef struct {
814     struct work_struct work;
815     int tgroup_home_cpu;
816     int tgroup_home_id;
817 } tgroup_closed_work_t;
818
819 /**
820  *
821  */
822 typedef struct {
823     struct work_struct work;
824     int tgroup_home_cpu;
825     int tgroup_home_id;
826     int requester_pid;
827     unsigned long vaddr_start;
828     unsigned long vaddr_size;
829     int from_cpu;
830 } munmap_request_work_t;
831
832 /**
833  *
834  */
835 typedef struct {
836     struct work_struct work;
837     int tgroup_home_cpu;
838     int tgroup_home_id;
839     int requester_pid;
840     unsigned long vaddr_start;
841     unsigned long vaddr_size;
842 } munmap_response_work_t;
843
844 /**
845  * 
846  */
847 typedef struct {
848     struct work_struct work;
849     int tgroup_home_cpu;
850     int tgroup_home_id;
851     int requester_pid;
852     unsigned long start;
853     size_t len;
854     unsigned long prot;
855     int from_cpu;
856 } mprotect_work_t;
857
858 /**
859  *
860  */
861 typedef struct {
862     struct work_struct work;
863     int tgroup_home_cpu;
864     int tgroup_home_id;
865     int requester_pid;
866     int from_cpu;
867 } remote_thread_count_request_work_t;
868
869 /**
870  *
871  */
872 typedef struct {
873     struct work_struct work;
874     int tgroup_home_cpu;
875     int tgroup_home_id;
876     int t_home_cpu;
877     int t_home_id;
878     unsigned long previous_cpus;
879     struct pt_regs regs;
880     unsigned long thread_fs;
881     unsigned long thread_gs;
882     unsigned long thread_usersp;
883     unsigned short thread_es;
884     unsigned short thread_ds;
885     unsigned short thread_fsindex;
886     unsigned short thread_gsindex;
887 } back_migration_work_t;
888
889
890 /**
891  * Prototypes
892  */
893 static int handle_clone_request(struct pcn_kmsg_message* msg);
894 long process_server_clone(unsigned long clone_flags,
895                           unsigned long stack_start,                                                                                                                   
896                           struct pt_regs *regs,
897                           unsigned long stack_size,
898                           struct task_struct* task);
899 static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_start);
900 static clone_data_t* find_clone_data(int cpu, int clone_request_id);
901 static void dump_mm(struct mm_struct* mm);
902 static void dump_task(struct task_struct* task,struct pt_regs* regs,unsigned long stack_ptr);
903 static void dump_thread(struct thread_struct* thread);
904 static void dump_regs(struct pt_regs* regs);
905 static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr); 
906
907 /**
908  * Prototypes from parts of the kernel that I modified or made available to external
909  * modules.
910  */
911 // I removed the 'static' modifier in mm/memory.c for do_wp_page so I could use it 
912 // here.
913 int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
914                unsigned long address, pte_t *page_table, pmd_t *pmd,
915                spinlock_t *ptl, pte_t orig_pte);
916 int do_mprotect(struct task_struct* task, unsigned long start, size_t len, unsigned long prot, int do_remote);
917
918 /**
919  * Module variables
920  */
921 static int _vma_id = 0;
922 static int _clone_request_id = 0;
923 static int _cpu = -1;
924 static unsigned long long perf_a, perf_b, perf_c, perf_d, perf_e;
925 data_header_t* _saved_mm_head = NULL;             // Saved MM list
926 DEFINE_SPINLOCK(_saved_mm_head_lock);             // Lock for _saved_mm_head
927 data_header_t* _mapping_request_data_head = NULL; // Mapping request data head
928 DEFINE_SPINLOCK(_mapping_request_data_head_lock);  // Lock for above
929 data_header_t* _count_remote_tmembers_data_head = NULL;
930 DEFINE_SPINLOCK(_count_remote_tmembers_data_head_lock);
931 data_header_t* _munmap_data_head = NULL;
932 DEFINE_SPINLOCK(_munmap_data_head_lock);
933 data_header_t* _mprotect_data_head = NULL;
934 DEFINE_SPINLOCK(_mprotect_data_head_lock);
935 data_header_t* _data_head = NULL;                 // General purpose data store
936 DEFINE_SPINLOCK(_data_head_lock);                 // Lock for _data_head
937 DEFINE_SPINLOCK(_vma_id_lock);                    // Lock for _vma_id
938 DEFINE_SPINLOCK(_clone_request_id_lock);          // Lock for _clone_request_id
939 struct rw_semaphore _import_sem;
940 DEFINE_SPINLOCK(_remap_lock);
941
942
943 // Work Queues
944 static struct workqueue_struct *clone_wq;
945 static struct workqueue_struct *exit_wq;
946 static struct workqueue_struct *mapping_wq;
947
948 /**
949  * General helper functions and debugging tools
950  */
951
952 /**
953  * TODO
954  */
955 static bool __user_addr (unsigned long x ) {
956     return (x < PAGE_OFFSET);   
957 }
958
959 // TODO the cpu_has_known_tgroup_mm must be reworked, i.e. the map must be pointed by the threads NOT one copy per thread, anti scaling and redudandt information
960 /**
961  *
962  */
963 static int cpu_has_known_tgroup_mm(int cpu)
964 {
965 #ifdef SUPPORT_FOR_CLUSTERING
966     struct list_head *iter;
967     _remote_cpu_info_list_t *objPtr;
968     struct cpumask *pcpum =0;
969     int cpuid =-1;
970     if (cpumask_test_cpu(cpu, cpu_present_mask))
971         return 1;
972     list_for_each(iter, &rlist_head) {
973         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
974         cpuid = objPtr->_data._processor;
975         pcpum = &(objPtr->_data._cpumask);
976         if (cpumask_test_cpu(cpu, pcpum)) {
977             if ( bitmap_intersects(cpumask_bits(pcpum),
978                                    &(current->known_cpu_with_tgroup_mm),
979                                    (sizeof(unsigned long) *8)) ) {
980                 return 1;
981             }
982             return 0;
983         }
984     }
985     printk(KERN_ERR"%s: ERROR the input cpu (%d) is not included in any known cpu cluster\n",
986                 __func__, cpu);
987     return 0;
988 #else
989     if(test_bit(cpu,&current->known_cpu_with_tgroup_mm)) {
990         return 1;
991     }
992     return 0;
993 #endif
994 }
995
996 /**
997  *
998  */
999 static void set_cpu_has_known_tgroup_mm(struct task_struct *task,int cpu) {
1000     struct task_struct *me = task;
1001     struct task_struct *t = me;
1002     do {
1003         set_bit(cpu,&t->known_cpu_with_tgroup_mm);
1004     } while_each_thread(me, t);
1005 }
1006
1007 /**
1008  * @brief find_vma does not always return the correct vm_area_struct*.
1009  * If it fails to find a vma for the specified address, it instead
1010  * returns the closest one in the rb list.  This function looks
1011  * for this failure, and returns NULL in this error condition.
1012  * Otherwise, it returns a pointer to the struct vm_area_struct
1013  * containing the specified address.
1014  */
1015 static struct vm_area_struct* find_vma_checked(struct mm_struct* mm, unsigned long address) {
1016     struct vm_area_struct* vma = find_vma(mm,address&PAGE_MASK);
1017     if( vma == NULL ||
1018         (vma->vm_start > (address & PAGE_MASK)) ||
1019         (vma->vm_end <= address) ) {
1020         
1021         vma = NULL;
1022     }
1023
1024     return vma;
1025 }
1026
1027 /**
1028  * Note, mm->mmap_sem must already be held!
1029  */
1030 /*static int is_mapped(struct mm_struct* mm, unsigned vaddr) {
1031     pte_t* pte = NULL;
1032     pmd_t* pmd = NULL;
1033     pud_t* pud = NULL;
1034     pgd_t* pgd = NULL;
1035     int ret = 0;
1036
1037     pgd = pgd_offset(mm, vaddr);
1038     if(pgd_present(*pgd) && pgd_present(*pgd)) {
1039         pud = pud_offset(pgd,vaddr); 
1040         if(pud_present(*pud)) {
1041             pmd = pmd_offset(pud,vaddr);
1042             if(pmd_present(*pmd)) {
1043                 pte = pte_offset_map(pmd,vaddr);
1044                 if(pte && !pte_none(*pte)) {
1045                     // It exists!
1046                     ret = 1;
1047                 }
1048             }
1049         }
1050     }
1051     return ret;
1052
1053 }*/
1054 /* Antonio's Version
1055 static int is_mapped(struct mm_struct* mm, unsigned vaddr)
1056 {
1057     pte_t* pte = NULL;
1058     pmd_t* pmd = NULL;                                                             
1059     pud_t* pud = NULL;                                                             
1060     pgd_t* pgd = NULL; 
1061
1062     pgd = pgd_offset(mm, vaddr);                                                   
1063     if (pgd && !pgd_none(*pgd) && likely(!pgd_bad(*pgd)) && pgd_present(*pgd)) {
1064       pud = pud_offset(pgd,vaddr);                                               
1065       if (pud && !pud_none(*pud) && likely(!pud_bad(*pud)) && pud_present(*pud)) {
1066         pmd = pmd_offset(pud,vaddr);
1067         if(pmd && !pmd_none(*pmd) && likely(!pmd_bad(*pmd)) && pmd_present(*pmd)) {                      pte = pte_offset_map(pmd,vaddr);                                   
1068           if(pte && !pte_none(*pte) && pte_present(*pte)) { 
1069                    // It exists!                                                  
1070                     return 1;
1071           }                                                                  
1072         }                                                                      
1073       }                                                                          
1074     }
1075     return 0;                                                                                  }
1076 */
1077
1078 /**
1079  * @brief Find the mm_struct for a given distributed thread.  
1080  * If one does not exist, then return NULL.
1081  */
1082 static struct mm_struct* find_thread_mm(
1083         int tgroup_home_cpu, 
1084         int tgroup_home_id, 
1085         mm_data_t **used_saved_mm,
1086         struct task_struct** task_out)
1087 {
1088
1089     struct task_struct *task, *g;
1090     struct mm_struct * mm = NULL;
1091     data_header_t* data_curr;
1092     mm_data_t* mm_data;
1093     unsigned long lockflags;
1094
1095     *used_saved_mm = NULL;
1096     *task_out = NULL;
1097
1098     // First, look through all active processes.
1099     do_each_thread(g,task) {
1100         if(task->tgroup_home_cpu == tgroup_home_cpu &&
1101            task->tgroup_home_id  == tgroup_home_id) {
1102             mm = task->mm;
1103             *task_out = task;
1104             *used_saved_mm = NULL;
1105             goto out;
1106         }
1107     } while_each_thread(g,task);
1108
1109     // Failing that, look through saved mm's.
1110     spin_lock_irqsave(&_saved_mm_head_lock,lockflags);
1111     data_curr = _saved_mm_head;
1112     while(data_curr) {
1113
1114         mm_data = (mm_data_t*)data_curr;
1115     
1116         if((mm_data->tgroup_home_cpu == tgroup_home_cpu) &&
1117            (mm_data->tgroup_home_id  == tgroup_home_id)) {
1118             mm = mm_data->mm;
1119             *used_saved_mm = mm_data;
1120             break;
1121         }
1122
1123         data_curr = data_curr->next;
1124
1125     } // while
1126
1127     spin_unlock_irqrestore(&_saved_mm_head_lock,lockflags);
1128
1129
1130 out:
1131     return mm;
1132 }
1133
1134 /**
1135  * @brief A best effort at making a page writable
1136  * @return void
1137  */
1138 static void mk_page_writable(struct mm_struct* mm,
1139                              struct vm_area_struct* vma,
1140                              unsigned long vaddr) {
1141     spinlock_t* ptl;
1142     pte_t *ptep, pte, entry;
1143      
1144     // Grab the pte, and lock it     
1145     ptep = get_locked_pte(mm, vaddr, &ptl);
1146     if (!ptep)
1147         goto out;
1148
1149     // grab the contents of the pte pointer
1150     pte = *ptep;
1151     
1152     if(pte_none(*ptep)) {
1153         pte_unmap_unlock(pte,ptl);
1154         goto out;
1155     }
1156
1157     arch_enter_lazy_mmu_mode();
1158
1159     // Make the content copy writable and dirty, then
1160     // write it back into the page tables.
1161     entry = pte_mkwrite(pte_mkdirty(pte));
1162     set_pte_at(mm, vaddr, ptep, entry);
1163
1164     update_mmu_cache(vma, vaddr, ptep);
1165
1166     arch_leave_lazy_mmu_mode();
1167
1168     // Unlock the pte
1169     pte_unmap_unlock(pte, ptl);
1170 out:
1171     return;
1172 }
1173
1174 /**
1175  * @brief Check to see if a given page is writable.
1176  * @return 0 if not writable or error, not zero otherwise
1177  */
1178 static int is_page_writable(struct mm_struct* mm,
1179                             struct vm_area_struct* vma,
1180                             unsigned long addr) {
1181     spinlock_t* ptl;
1182     pte_t *ptep, pte;
1183     int ret = 0;
1184
1185     ptep = get_locked_pte(mm,addr,&ptl);
1186     if(!ptep)
1187         goto out;
1188
1189     pte = *ptep;
1190     
1191     if(pte_none(*ptep)) {
1192         pte_unmap_unlock(*ptep,ptl);
1193         ret = -1;
1194         goto out;
1195     }
1196
1197     ret = pte_write(pte);
1198
1199     pte_unmap_unlock(pte, ptl);
1200
1201 out:
1202     return ret;
1203 }
1204
1205 /**
1206  * @brief Get the clone data associated with the current task.
1207  * @return clone_data_t* or NULL if not present
1208  */
1209 static clone_data_t* get_current_clone_data(void) {
1210     clone_data_t* ret = NULL;
1211
1212     if(!current->clone_data) {
1213         // Do costly lookup
1214         ret = find_clone_data(current->prev_cpu,
1215                                  current->clone_request_id);
1216         // Store it for easy access next time.
1217         current->clone_data = ret;
1218     } else {
1219         ret = (clone_data_t*)current->clone_data;
1220     }
1221
1222     return ret;
1223 }
1224
1225
1226 /**
1227  * @brief Page walk has encountered a pte while deconstructing
1228  * the client side processes address space.  Transfer it.
1229  */
1230 /*static int deconstruction_page_walk_pte_entry_callback(pte_t *pte, 
1231         unsigned long start, unsigned long end, struct mm_walk *walk) {
1232
1233     deconstruction_data_t* decon_data = (deconstruction_data_t*)walk->private;
1234     int vma_id = decon_data->vma_id;
1235     int dst_cpu = decon_data->dst_cpu;
1236     int clone_request_id = decon_data->clone_request_id;
1237     pte_transfer_t pte_xfer;
1238
1239     if(NULL == pte || !pte_present(*pte)) {
1240         return 0;
1241     }
1242
1243     pte_xfer.header.type = PCN_KMSG_TYPE_PROC_SRV_PTE_TRANSFER;
1244     pte_xfer.header.prio = PCN_KMSG_PRIO_NORMAL;
1245     pte_xfer.paddr = (pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1));
1246     // NOTE: Found the above pte to paddr conversion here -
1247     // http://wbsun.blogspot.com/2010/12/convert-userspace-virtual-address-to.html
1248     pte_xfer.vaddr = start;
1249     pte_xfer.vma_id = vma_id;
1250     pte_xfer.clone_request_id = clone_request_id;
1251     pte_xfer.pfn = pte_pfn(*pte);
1252     PSPRINTK("Sending PTE\n"); 
1253     DO_UNTIL_SUCCESS(pcn_kmsg_send(dst_cpu, (struct pcn_kmsg_message *)&pte_xfer));
1254
1255     return 0;
1256 }*/
1257
1258 /**
1259  * @brief Callback used when walking a memory map.  It looks to see
1260  * if the page is present.  If present, it resolves the given
1261  * address.
1262  * @return always returns 0
1263  */
1264 static int vm_search_page_walk_pte_entry_callback(pte_t *pte, unsigned long start, unsigned long end, struct mm_walk *walk) {
1265  
1266     unsigned long* resolved_addr = (unsigned long*)walk->private;
1267
1268     if (pte == NULL || pte_none(*pte) || !pte_present(*pte)) {
1269         return 0;
1270     }
1271
1272     // Store the resolved address in the address
1273     // pointed to by the private field of the walk
1274     // structure.  This is checked by the caller
1275     // of the walk function when the walk is complete.
1276     *resolved_addr = (pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1));
1277     return 0;
1278 }
1279
1280 /**
1281  * @brief Retrieve the physical address of the specified virtual address.
1282  * @return -1 indicates failure.  Otherwise, 0 is returned.
1283  */
1284 static int get_physical_address(struct mm_struct* mm, 
1285                                 unsigned long vaddr,
1286                                 unsigned long* paddr) {
1287     unsigned long resolved = 0;
1288     struct mm_walk walk = {
1289         .pte_entry = vm_search_page_walk_pte_entry_callback,
1290         .private = &(resolved),
1291         .mm = mm
1292     };
1293
1294     // Walk the page tables.  The walk handler modifies the
1295     // resolved variable if it finds the address.
1296     walk_page_range(vaddr & PAGE_MASK, (vaddr & PAGE_MASK) + PAGE_SIZE, &walk);
1297     if(resolved == 0) {
1298         return -1;
1299     }
1300
1301     // Set the output
1302     *paddr = resolved;
1303
1304     return 0;
1305 }
1306
1307 /**
1308  * Check to see if the specified virtual address has a 
1309  * corresponding physical address mapped to it.
1310  * @return 0 = no mapping, 1 = mapping present
1311  */
1312 static int is_vaddr_mapped(struct mm_struct* mm, unsigned long vaddr) {
1313     unsigned long resolved = 0;
1314     struct mm_walk walk = {
1315         .pte_entry = vm_search_page_walk_pte_entry_callback,
1316         .private = &(resolved),
1317         .mm = mm
1318     };
1319
1320     // Walk the page tables.  The walk handler will set the
1321     // resolved variable if it finds the mapping.  
1322     walk_page_range(vaddr & PAGE_MASK, ( vaddr & PAGE_MASK ) + PAGE_SIZE, &walk);
1323     if(resolved != 0) {
1324         return 1;
1325     }
1326     return 0;
1327 }
1328
1329 /**
1330  *  @brief Find the bounds of a physically consecutive mapped region.
1331  *  The region must be contained within the specified VMA.
1332  *
1333  *  Hypothetical page table mappings for a given VMA:
1334  *
1335  *  *********************************
1336  *  *    Vaddr      *   Paddr       *
1337  *  *********************************
1338  *  * 0x10000000    * 0x12341000    *
1339  *  *********************************
1340  *  * 0x10001000    * 0x12342000    *
1341  *  *********************************
1342  *  * 0x10002000    * 0x12343000    *
1343  *  *********************************
1344  *  * 0x10003000    * 0x43214000    *
1345  *  *********************************
1346  *  
1347  *  This function, given a vaddr of 12342xxx will return:
1348  *  *vaddr_mapping_start = 0x10000000
1349  *  *paddr_mapping_start = 0x12341000
1350  *  *paddr_mapping_sz    = 0x3000
1351  *
1352  *  Notice 0x10003000 and above is not included in the returned region, as
1353  *  its paddr is not consecutive with the previous mappings.
1354  *
1355  */
1356 int find_consecutive_physically_mapped_region(struct mm_struct* mm,
1357                                               struct vm_area_struct* vma,
1358                                               unsigned long vaddr,
1359                                               unsigned long* vaddr_mapping_start,
1360                                               unsigned long* paddr_mapping_start,
1361                                               size_t* paddr_mapping_sz)
1362 {
1363     unsigned long paddr_curr = 0l;
1364     unsigned long vaddr_curr = vaddr;
1365     unsigned long vaddr_next = vaddr;
1366     unsigned long paddr_next = 0l;
1367     unsigned long paddr_start = 0l;
1368     size_t sz = 0;
1369
1370     
1371     // Initializes paddr_curr
1372     if(get_physical_address(mm,vaddr_curr,&paddr_curr) < 0) {
1373         return -1;
1374     }
1375     paddr_start = paddr_curr;
1376     *vaddr_mapping_start = vaddr_curr;
1377     *paddr_mapping_start = paddr_curr;
1378     
1379     sz = PAGE_SIZE;
1380
1381     // seek up in memory
1382     // This stretches (sz) only while leaving
1383     // vaddr and paddr the samed
1384     while(1) {
1385         vaddr_next += PAGE_SIZE;
1386         
1387         // dont' go past the end of the vma
1388         if(vaddr_next >= vma->vm_end) {
1389             break;
1390         }
1391
1392         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
1393             break;
1394         }
1395
1396         if(paddr_next == paddr_curr + PAGE_SIZE) {
1397             sz += PAGE_SIZE;
1398             paddr_curr = paddr_next;
1399         } else {
1400             break;
1401         }
1402     }
1403
1404     // seek down in memory
1405     // This stretches sz, and the paddr and vaddr's
1406     vaddr_curr = vaddr;
1407     paddr_curr = paddr_start; 
1408     vaddr_next = vaddr_curr;
1409     while(1) {
1410         vaddr_next -= PAGE_SIZE;
1411
1412         // don't go past the start of the vma
1413         if(vaddr_next < vma->vm_start) {
1414             break;
1415         }
1416
1417         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
1418             break;
1419         }
1420
1421         if(paddr_next == (paddr_curr - PAGE_SIZE)) {
1422             vaddr_curr = vaddr_next;
1423             paddr_curr = paddr_next;
1424             sz += PAGE_SIZE;
1425         } else {
1426             break;
1427         }
1428     }
1429    
1430     *vaddr_mapping_start = vaddr_curr;
1431     *paddr_mapping_start = paddr_curr;
1432     *paddr_mapping_sz = sz;
1433
1434     PSPRINTK("%s: found consecutive area- vaddr{%lx}, paddr{%lx}, sz{%d}\n",
1435                 __func__,
1436                 *vaddr_mapping_start,
1437                 *paddr_mapping_start,
1438                 *paddr_mapping_sz);
1439
1440     return 0;
1441 }
1442
1443 /**
1444  * @brief Find the preceeding physically consecutive region.  This is a region
1445  * that starts BEFORE the specified vaddr.  The region must be contained 
1446  * within the specified VMA.
1447  */
1448 int find_prev_consecutive_physically_mapped_region(struct mm_struct* mm,
1449                                               struct vm_area_struct* vma,
1450                                               unsigned long vaddr,
1451                                               unsigned long* vaddr_mapping_start,
1452                                               unsigned long* paddr_mapping_start,
1453                                               size_t* paddr_mapping_sz) {
1454     unsigned long curr_vaddr_mapping_start;
1455     unsigned long curr_paddr_mapping_start;
1456     unsigned long curr_paddr_mapping_sz;
1457     unsigned long curr_vaddr = vaddr;
1458     int ret = -1;
1459
1460     if(curr_vaddr < vma->vm_start) return -1;
1461
1462     do {
1463         int res = find_consecutive_physically_mapped_region(mm,
1464                                                      vma,
1465                                                      curr_vaddr,
1466                                                      &curr_vaddr_mapping_start,
1467                                                      &curr_paddr_mapping_start,
1468                                                      &curr_paddr_mapping_sz);
1469         if(0 == res) {
1470
1471             // this is a match, we can store off results and exit
1472             ret = 0;
1473             *vaddr_mapping_start = curr_vaddr_mapping_start;
1474             *paddr_mapping_start = curr_paddr_mapping_start;
1475             *paddr_mapping_sz    = curr_paddr_mapping_sz;
1476             break;
1477         }
1478
1479         curr_vaddr -= PAGE_SIZE;
1480     } while (curr_vaddr >= vma->vm_start);
1481
1482     return ret;
1483
1484 }
1485 /**
1486  * @brief Find the next physically consecutive region.  This is a region
1487  * that starts AFTER the specified vaddr.  The region must be contained
1488  * within the specified VMA.
1489  */
1490 int find_next_consecutive_physically_mapped_region(struct mm_struct* mm,
1491                                               struct vm_area_struct* vma,
1492                                               unsigned long vaddr,
1493                                               unsigned long* vaddr_mapping_start,
1494                                               unsigned long* paddr_mapping_start,
1495                                               size_t* paddr_mapping_sz) {
1496     unsigned long curr_vaddr_mapping_start;
1497     unsigned long curr_paddr_mapping_start;
1498     unsigned long curr_paddr_mapping_sz;
1499     unsigned long curr_vaddr = vaddr;
1500     int ret = -1;
1501
1502     if(curr_vaddr >= vma->vm_end) return -1;
1503
1504     do {
1505         int res = find_consecutive_physically_mapped_region(mm,
1506                                                      vma,
1507                                                      curr_vaddr,
1508                                                      &curr_vaddr_mapping_start,
1509                                                      &curr_paddr_mapping_start,
1510                                                      &curr_paddr_mapping_sz);
1511         if(0 == res) {
1512
1513             // this is a match, we can store off results and exit
1514             ret = 0;
1515             *vaddr_mapping_start = curr_vaddr_mapping_start;
1516             *paddr_mapping_start = curr_paddr_mapping_start;
1517             *paddr_mapping_sz    = curr_paddr_mapping_sz;
1518             break;
1519         }
1520
1521         curr_vaddr += PAGE_SIZE;
1522     } while (curr_vaddr < vma->vm_end);
1523
1524     return ret;
1525
1526 }
1527
1528 /**
1529  *  @brief Fill the array with as many physically consecutive regions
1530  *  as are present and will fit (specified by arr_sz).
1531  */
1532 int fill_physical_mapping_array(struct mm_struct* mm,
1533         struct vm_area_struct* vma,
1534         unsigned long address,
1535         contiguous_physical_mapping_t* mappings, 
1536         int arr_sz) {
1537     int i;
1538     unsigned long next_vaddr = address & PAGE_MASK;
1539     int ret = -1;
1540     unsigned long smallest_in_first_round = next_vaddr;
1541
1542     PSPRINTK("%s: entered\n",__func__);
1543
1544     for(i = 0; i < arr_sz; i++) 
1545         mappings[i].present = 0;
1546
1547     for(i = 0; i < arr_sz && next_vaddr < vma->vm_end; i++) {
1548         int valid_mapping = find_next_consecutive_physically_mapped_region(mm,
1549                                             vma,
1550                                             next_vaddr,
1551                                             &mappings[i].vaddr,
1552                                             &mappings[i].paddr,
1553                                             &mappings[i].sz);
1554
1555
1556         if(valid_mapping == 0) {
1557             PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
1558             if(address >= mappings[i].vaddr && 
1559                     address < mappings[i].vaddr + mappings[i].sz)
1560                 ret = 0;
1561
1562             if(mappings[i].vaddr < smallest_in_first_round)
1563                 smallest_in_first_round = mappings[i].vaddr;
1564
1565             mappings[i].present = 1;
1566             next_vaddr = mappings[i].vaddr + mappings[i].sz;
1567
1568         } else {
1569             PSPRINTK("%s: up search ended in failure, resuming down search\n",
1570                     __func__);
1571             mappings[i].present = 0;
1572             mappings[i].vaddr = 0;
1573             mappings[i].paddr = 0;
1574             mappings[i].sz = 0;
1575             break;
1576         }
1577     }
1578
1579     // If we have room left, go in the opposite direction
1580     if(i <= arr_sz -1) {
1581         next_vaddr = smallest_in_first_round - PAGE_SIZE;
1582         for(;i < arr_sz && next_vaddr >= vma->vm_start; i++) {
1583             int valid_mapping = find_prev_consecutive_physically_mapped_region(mm,
1584                                             vma,
1585                                             next_vaddr,
1586                                             &mappings[i].vaddr,
1587                                             &mappings[i].paddr,
1588                                             &mappings[i].sz);
1589             if(valid_mapping == 0) {
1590                 PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
1591                 mappings[i].present = 1;
1592                 next_vaddr = mappings[i].vaddr - PAGE_SIZE;
1593             } else {
1594                 mappings[i].present = 0;
1595                 mappings[i].vaddr = 0;
1596                 mappings[i].paddr = 0;
1597                 mappings[i].sz = 0;
1598                 break;
1599             }
1600         }
1601     }
1602
1603     // Trim any entries that extend beyond the boundaries of the vma
1604     for(i = 0; i < MAX_MAPPINGS; i++) {
1605         if(mappings[i].present) {
1606             if(mappings[i].vaddr < vma->vm_start) {
1607                 unsigned long sz_diff = vma->vm_start - mappings[i].vaddr;
1608                 PSPRINTK("Trimming mapping, since it starts too low in memory\n");
1609                 if(mappings[i].sz > sz_diff) {
1610                     mappings[i].sz -= sz_diff;
1611                     mappings[i].vaddr = vma->vm_start;
1612                 } else {
1613                     mappings[i].present = 0;
1614                     mappings[i].vaddr = 0;
1615                     mappings[i].paddr = 0;
1616                     mappings[i].sz = 0;
1617                 }
1618             }
1619
1620             if(mappings[i].vaddr + mappings[i].sz >= vma->vm_end) {
1621                 unsigned long sz_diff = mappings[i].vaddr + 
1622                                         mappings[i].sz - 
1623                                         vma->vm_end;
1624                 PSPRINTK("Trimming mapping, since it ends too high in memory\n");
1625                 if(mappings[i].sz > sz_diff) {
1626                     mappings[i].sz -= sz_diff;
1627                 } else {
1628                     mappings[i].present = 0;
1629                     mappings[i].vaddr = 0;
1630                     mappings[i].paddr = 0;
1631                     mappings[i].sz = 0;
1632                 }
1633             }
1634         }
1635     }
1636
1637     // Clear out what we just did
1638     if(ret == -1) {
1639         PSPRINTK("%s: zeroing out responses, due to an error\n",__func__);
1640         for(i = 0; i < arr_sz; i++)
1641             mappings[i].present = 0;
1642     }
1643
1644     PSPRINTK("%s: exiting\n",__func__);
1645
1646     return ret;
1647 }
1648
1649 /**
1650  * @brief Call remap_pfn_range on the parts of the specified virtual-physical
1651  * region that are not already mapped.
1652  * @precondition mm->mmap_sem must already be held by caller.
1653  */
1654 int remap_pfn_range_remaining(struct mm_struct* mm,
1655                                   struct vm_area_struct* vma,
1656                                   unsigned long vaddr_start,
1657                                   unsigned long paddr_start,
1658                                   size_t sz,
1659                                   pgprot_t prot,
1660                                   int make_writable) {
1661     unsigned long vaddr_curr;
1662     unsigned long paddr_curr = paddr_start;
1663     int ret = 0, val;
1664     int err;
1665
1666     PSPRINTK("%s: entered vaddr_start{%lx}, paddr_start{%lx}, sz{%x}\n",
1667             __func__,
1668             vaddr_start,
1669             paddr_start,
1670             sz);
1671
1672     for(vaddr_curr = vaddr_start; 
1673         vaddr_curr < vaddr_start + sz; 
1674         vaddr_curr += PAGE_SIZE) {
1675         if( !(val = is_vaddr_mapped(mm,vaddr_curr)) ) {
1676             //PSPRINTK("%s: mapping vaddr{%lx} paddr{%lx}\n",__func__,vaddr_curr,paddr_curr);
1677             // not mapped - map it
1678             err = remap_pfn_range(vma,
1679                                   vaddr_curr,
1680                                   paddr_curr >> PAGE_SHIFT,
1681                                   PAGE_SIZE,
1682                                   prot);
1683             if(err == 0) {
1684                 if(make_writable && vma->vm_flags & VM_WRITE) {
1685                     mk_page_writable(mm, vma, vaddr_curr);
1686                 }
1687             } else {
1688                 printk(KERN_ALERT"%s: ERROR mapping %lx to %lx with err{%d}\n",
1689                             __func__, vaddr_curr, paddr_curr, err);
1690             }
1691
1692             if( err != 0 ) ret = err;
1693         }
1694         else
1695             PSPRINTK("%s: is_vaddr_mapped %d, star:%lx end:%lx\n",
1696                     __func__, val, vma->vm_start, vma->vm_end);
1697
1698         paddr_curr += PAGE_SIZE;
1699     }
1700
1701     PSPRINTK("%s: exiting\n",__func__);
1702
1703     return ret;
1704 }
1705
1706
1707 /**
1708  * @brief Map, but only in areas that do not currently have mappings.
1709  * This should extend vmas that ara adjacent as necessary.
1710  * NOTE: current->enable_do_mmap_pgoff_hook must be disabled
1711  *       by client code before calling this.
1712  * NOTE: mm->mmap_sem must already be held by client code.
1713  * NOTE: entries in the per-mm list of vm_area_structs are
1714  *       ordered by starting address.  This is helpful, because
1715  *       I can exit my check early sometimes.
1716  */
1717 #define FORCE_NODEBUG
1718 #ifndef FORCE_NODEBUG
1719 #define DBGPSPRINTK(...) { if (dbg ==1) printk(KERN_ALERT __VA_ARGS__); }
1720 #else
1721 #define DBGPSPRINTK(...) ;
1722 #endif
1723 unsigned long do_mmap_remaining(struct file *file, unsigned long addr,
1724                                 unsigned long len, unsigned long prot,
1725                                 unsigned long flags, unsigned long pgoff, int dbg) {
1726     unsigned long ret = addr;
1727     unsigned long start = addr;
1728     unsigned long local_end = start;
1729     unsigned long end = addr + len;
1730     struct vm_area_struct* curr;
1731     unsigned long error;
1732
1733     // go through ALL vma's, looking for interference with this space.
1734     curr = current->mm->mmap;
1735     DBGPSPRINTK("%s: processing {%lx,%lx}\n",__func__,addr,len);
1736
1737     while(1) {
1738
1739         if(start >= end) goto done;
1740
1741         // We've reached the end of the list
1742         else if(curr == NULL) {
1743             // map through the end
1744             DBGPSPRINTK("%s: curr == NULL - mapping {%lx,%lx}\n",
1745                     __func__,start,end-start);
1746             error=do_mmap(file, start, end - start, prot, flags, pgoff); 
1747             if (error != start)
1748                 printk(KERN_ALERT"%s_1: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1749             goto done;
1750         }
1751
1752         // the VMA is fully above the region of interest
1753         else if(end <= curr->vm_start) {
1754                 // mmap through local_end
1755             DBGPSPRINTK("%s: VMA is fully above the region of interest - mapping {%lx,%lx}\n",
1756                     __func__,start,end-start);
1757             error=do_mmap(file, start, end - start, prot, flags, pgoff);
1758             if (error != start)
1759                 printk(KERN_ALERT"%s_2: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1760             goto done;
1761         }
1762
1763         // the VMA fully encompases the region of interest
1764         else if(start >= curr->vm_start && end <= curr->vm_end) {
1765             // nothing to do
1766             DBGPSPRINTK("%s: VMA fully encompases the region of interest\n",__func__);
1767             goto done;
1768         }
1769
1770         // the VMA is fully below the region of interest
1771         else if(curr->vm_end <= start) {
1772             // move on to the next one
1773             DBGPSPRINTK("%s: VMA is fully below region of interest\n",__func__);
1774         }
1775
1776         // the VMA includes the start of the region of interest 
1777         // but not the end
1778         else if (start >= curr->vm_start && 
1779                  start < curr->vm_end &&
1780                  end > curr->vm_end) {
1781             // advance start (no mapping to do) 
1782             start = curr->vm_end;
1783             local_end = start;
1784             DBGPSPRINTK("%s: VMA includes start but not end\n",__func__);
1785         }
1786
1787         // the VMA includes the end of the region of interest
1788         // but not the start
1789         else if(start < curr->vm_start && 
1790                 end <= curr->vm_end &&
1791                 end > curr->vm_start) {
1792             local_end = curr->vm_start;
1793             
1794             // mmap through local_end
1795             DBGPSPRINTK("%s: VMA includes end but not start - mapping {%lx,%lx}\n",
1796                     __func__,start, local_end - start);
1797             error=do_mmap(file, start, local_end - start, prot, flags, pgoff);
1798             if (error != start)
1799                 printk(KERN_ALERT"%s_3: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1800
1801             // Then we're done
1802             goto done;
1803         }
1804
1805         // the VMA is fully within the region of interest
1806         else if(start <= curr->vm_start && end >= curr->vm_end) {
1807             // advance local end
1808             local_end = curr->vm_start;
1809
1810             // map the difference
1811             DBGPSPRINTK("%s: VMS is fully within the region of interest - mapping {%lx,%lx}\n",
1812                     __func__,start, local_end - start);
1813             error=do_mmap(file, start, local_end - start, prot, flags, pgoff);
1814             if (error != start)
1815                 printk(KERN_ALERT"%s_4: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1816
1817             // Then advance to the end of this vma
1818             start = curr->vm_end;
1819             local_end = start;
1820         }
1821
1822         curr = curr->vm_next;
1823
1824     }
1825
1826 done:
1827     
1828     DBGPSPRINTK("%s: exiting start:%lx\n",__func__, error);
1829     return ret;
1830 }
1831
1832 static void send_pte(unsigned long paddr_start,
1833         unsigned long vaddr_start, 
1834         size_t sz, 
1835         int dst,
1836         int vma_id,
1837         int clone_request_id) {
1838
1839     pte_transfer_t pte_xfer;
1840     pte_xfer.header.type = PCN_KMSG_TYPE_PROC_SRV_PTE_TRANSFER;
1841     pte_xfer.header.prio = PCN_KMSG_PRIO_NORMAL;
1842     pte_xfer.paddr_start = paddr_start;
1843     pte_xfer.vaddr_start = vaddr_start;
1844     pte_xfer.sz = sz;
1845     pte_xfer.clone_request_id = clone_request_id;
1846     pte_xfer.vma_id = vma_id;
1847     pcn_kmsg_send(dst, (struct pcn_kmsg_message *)&pte_xfer);
1848 }
1849
1850 static void send_vma(struct mm_struct* mm,
1851         struct vm_area_struct* vma, 
1852         int dst,
1853         int clone_request_id) {
1854     char lpath[256];
1855     char *plpath;
1856     vma_transfer_t* vma_xfer = kmalloc(sizeof(vma_transfer_t),GFP_KERNEL);
1857     vma_xfer->header.type = PCN_KMSG_TYPE_PROC_SRV_VMA_TRANSFER;  
1858     vma_xfer->header.prio = PCN_KMSG_PRIO_NORMAL;
1859     
1860     if(vma->vm_file == NULL) {
1861         vma_xfer->path[0] = '\0';
1862     } else {
1863         plpath = d_path(&vma->vm_file->f_path,
1864                 lpath,256);
1865         strcpy(vma_xfer->path,plpath);
1866     }
1867
1868     //
1869     // Transfer the vma
1870     //
1871     PS_SPIN_LOCK(&_vma_id_lock);
1872     vma_xfer->vma_id = _vma_id++;
1873     PS_SPIN_UNLOCK(&_vma_id_lock);
1874     vma_xfer->start = vma->vm_start;
1875     vma_xfer->end = vma->vm_end;
1876     vma_xfer->prot = vma->vm_page_prot;
1877     vma_xfer->clone_request_id = clone_request_id;
1878     vma_xfer->flags = vma->vm_flags;
1879     vma_xfer->pgoff = vma->vm_pgoff;
1880     pcn_kmsg_send_long(dst, 
1881                         (struct pcn_kmsg_long_message*)vma_xfer, 
1882                         sizeof(vma_transfer_t) - sizeof(vma_xfer->header));
1883
1884     // Send all physical information too
1885     {
1886     unsigned long curr = vma->vm_start;
1887     unsigned long vaddr_resolved = -1;
1888     unsigned long paddr_resolved = -1;
1889     size_t sz_resolved = 0;
1890     
1891     while(curr < vma->vm_end) {
1892         if(-1 == find_next_consecutive_physically_mapped_region(mm,
1893                     vma,
1894                     curr,
1895                     &vaddr_resolved,
1896                     &paddr_resolved,
1897                     &sz_resolved)) {
1898             // None more, exit
1899             break;
1900         } else {
1901             // send the pte
1902             send_pte(paddr_resolved,
1903                      vaddr_resolved,
1904                      sz_resolved,
1905                      dst,
1906                      vma_xfer->vma_id,
1907                      vma_xfer->clone_request_id
1908                      );
1909
1910             // move to the next
1911             curr = vaddr_resolved + sz_resolved;
1912         }
1913     }
1914
1915     }
1916
1917
1918     kfree(vma_xfer);
1919 }
1920
1921 /**
1922  * @brief Display a mapping request data entry.
1923  */
1924 static void dump_mapping_request_data(mapping_request_data_t* data) {
1925     int i;
1926     PSPRINTK("mapping request data dump:\n");
1927     PSPRINTK("address{%lx}, vaddr_start{%lx}, vaddr_sz{%lx}\n",
1928                     data->address, data->vaddr_start, data->vaddr_size);
1929     for(i = 0; i < MAX_MAPPINGS; i++) {
1930         PSPRINTK("mapping %d - vaddr{%lx}, paddr{%lx}, sz{%lx}\n",
1931                 i,data->mappings[i].vaddr,data->mappings[i].paddr,data->mappings[i].sz);
1932     }
1933     PSPRINTK("present{%d}, complete{%d}, from_saved_mm{%d}\n",
1934             data->present, data->complete, data->from_saved_mm);
1935     PSPRINTK("responses{%d}, expected_responses{%d}\n",
1936             data->responses, data->expected_responses);
1937 }
1938
1939 /**
1940  * @brief Display relevant task information.
1941  */
1942 void dump_task(struct task_struct* task, struct pt_regs* regs, unsigned long stack_ptr) {
1943 #if PROCESS_SERVER_VERBOSE
1944     if (!task) return;
1945
1946     PSPRINTK("DUMP TASK\n");
1947     PSPRINTK("PID: %d\n",task->pid);
1948     PSPRINTK("State: %lx\n",task->state);
1949     PSPRINTK("Flags: %x\n",task->flags);
1950     PSPRINTK("Prio{%d},Static_Prio{%d},Normal_Prio{%d}\n",
1951             task->prio,task->static_prio,task->normal_prio);
1952     PSPRINTK("Represents_remote{%d}\n",task->represents_remote);
1953     PSPRINTK("Executing_for_remote{%d}\n",task->executing_for_remote);
1954     PSPRINTK("prev_pid{%d}\n",task->prev_pid);
1955     PSPRINTK("next_pid{%d}\n",task->next_pid);
1956     PSPRINTK("prev_cpu{%d}\n",task->prev_cpu);
1957     PSPRINTK("next_cpu{%d}\n",task->next_cpu);
1958     PSPRINTK("Clone_request_id{%d}\n",task->clone_request_id);
1959     dump_regs(regs);
1960     dump_thread(&task->thread);
1961     //dump_mm(task->mm);
1962     dump_stk(&task->thread,stack_ptr);
1963     PSPRINTK("TASK DUMP COMPLETE\n");
1964 #endif
1965 }
1966
1967 /**
1968  * @brief Display a task's stack information.
1969  */
1970 static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr) {
1971     if(!thread) return;
1972     PSPRINTK("DUMP STACK\n");
1973     if(thread->sp) {
1974         PSPRINTK("sp = %lx\n",thread->sp);
1975     }
1976     if(thread->usersp) {
1977         PSPRINTK("usersp = %lx\n",thread->usersp);
1978     }
1979     if(stack_ptr) {
1980         PSPRINTK("stack_ptr = %lx\n",stack_ptr);
1981     }
1982     PSPRINTK("STACK DUMP COMPLETE\n");
1983 }
1984
1985 /**
1986  * @brief Display a tasks register contents.
1987  */
1988 static void dump_regs(struct pt_regs* regs) {
1989     unsigned long fs, gs;
1990     PSPRINTK("DUMP REGS\n");
1991     if(NULL != regs) {
1992         PSPRINTK("r15{%lx}\n",regs->r15);   
1993         PSPRINTK("r14{%lx}\n",regs->r14);
1994         PSPRINTK("r13{%lx}\n",regs->r13);
1995         PSPRINTK("r12{%lx}\n",regs->r12);
1996         PSPRINTK("r11{%lx}\n",regs->r11);
1997         PSPRINTK("r10{%lx}\n",regs->r10);
1998         PSPRINTK("r9{%lx}\n",regs->r9);
1999         PSPRINTK("r8{%lx}\n",regs->r8);
2000         PSPRINTK("bp{%lx}\n",regs->bp);
2001         PSPRINTK("bx{%lx}\n",regs->bx);
2002         PSPRINTK("ax{%lx}\n",regs->ax);
2003         PSPRINTK("cx{%lx}\n",regs->cx);
2004         PSPRINTK("dx{%lx}\n",regs->dx);
2005         PSPRINTK("di{%lx}\n",regs->di);
2006         PSPRINTK("orig_ax{%lx}\n",regs->orig_ax);
2007         PSPRINTK("ip{%lx}\n",regs->ip);
2008         PSPRINTK("cs{%lx}\n",regs->cs);
2009         PSPRINTK("flags{%lx}\n",regs->flags);
2010         PSPRINTK("sp{%lx}\n",regs->sp);
2011         PSPRINTK("ss{%lx}\n",regs->ss);
2012     }
2013     rdmsrl(MSR_FS_BASE, fs);
2014     rdmsrl(MSR_GS_BASE, gs);
2015     PSPRINTK("fs{%lx}\n",fs);
2016     PSPRINTK("gs{%lx}\n",gs);
2017     PSPRINTK("REGS DUMP COMPLETE\n");
2018 }
2019
2020 /**
2021  * @brief Display a tasks thread information.
2022  */
2023 static void dump_thread(struct thread_struct* thread) {
2024     PSPRINTK("DUMP THREAD\n");
2025     PSPRINTK("sp0{%lx}, sp{%lx}\n",thread->sp0,thread->sp);
2026     PSPRINTK("usersp{%lx}\n",thread->usersp);
2027     PSPRINTK("es{%x}\n",thread->es);
2028     PSPRINTK("ds{%x}\n",thread->ds);
2029     PSPRINTK("fsindex{%x}\n",thread->fsindex);
2030     PSPRINTK("gsindex{%x}\n",thread->gsindex);
2031     PSPRINTK("gs{%lx}\n",thread->gs);
2032     PSPRINTK("THREAD DUMP COMPLETE\n");
2033 }
2034
2035 /**
2036  * @brief Display a pte_data_t data structure.
2037  */
2038 static void dump_pte_data(pte_data_t* p) {
2039     PSPRINTK("PTE_DATA\n");
2040     PSPRINTK("vma_id{%x}\n",p->vma_id);
2041     PSPRINTK("clone_request_id{%x}\n",p->clone_request_id);
2042     PSPRINTK("cpu{%x}\n",p->cpu);
2043     PSPRINTK("vaddr_start{%lx}\n",p->vaddr_start);
2044     PSPRINTK("paddr_start{%lx}\n",p->paddr_start);
2045     PSPRINTK("sz{%d}\n",p->sz);
2046 }
2047
2048 /**
2049  * @brief Display a vma_data_t data structure.
2050  */
2051 static void dump_vma_data(vma_data_t* v) {
2052     pte_data_t* p;
2053     PSPRINTK("VMA_DATA\n");
2054     PSPRINTK("start{%lx}\n",v->start);
2055     PSPRINTK("end{%lx}\n",v->end);
2056     PSPRINTK("clone_request_id{%x}\n",v->clone_request_id);
2057     PSPRINTK("cpu{%x}\n",v->cpu);
2058     PSPRINTK("flags{%lx}\n",v->flags);
2059     PSPRINTK("vma_id{%x}\n",v->vma_id);
2060     PSPRINTK("path{%s}\n",v->path);
2061
2062     p = v->pte_list;
2063     while(p) {
2064         dump_pte_data(p);
2065         p = (pte_data_t*)p->header.next;
2066     }
2067 }
2068
2069 /**
2070  * @brief Display a clone_data_t.
2071  */
2072 static void dump_clone_data(clone_data_t* r) {
2073     vma_data_t* v;
2074     PSPRINTK("CLONE REQUEST\n");
2075     PSPRINTK("clone_request_id{%x}\n",r->clone_request_id);
2076     PSPRINTK("clone_flags{%lx}\n",r->clone_flags);
2077     PSPRINTK("stack_start{%lx}\n",r->stack_start);
2078     PSPRINTK("stack_ptr{%lx}\n",r->stack_ptr);
2079     PSPRINTK("env_start{%lx}\n",r->env_start);
2080     PSPRINTK("env_end{%lx}\n",r->env_end);
2081     PSPRINTK("arg_start{%lx}\n",r->arg_start);
2082     PSPRINTK("arg_end{%lx}\n",r->arg_end);
2083     PSPRINTK("heap_start{%lx}\n",r->heap_start);
2084     PSPRINTK("heap_end{%lx}\n",r->heap_end);
2085     PSPRINTK("data_start{%lx}\n",r->data_start);
2086     PSPRINTK("data_end{%lx}\n",r->data_end);
2087     dump_regs(&r->regs);
2088     PSPRINTK("placeholder_pid{%x}\n",r->placeholder_pid);
2089     PSPRINTK("placeholder_tgid{%x}\n",r->placeholder_tgid);
2090     PSPRINTK("thread_fs{%lx}\n",r->thread_fs);
2091     PSPRINTK("thread_gs{%lx}\n",r->thread_gs);
2092     PSPRINTK("thread_sp0{%lx}\n",r->thread_sp0);
2093     PSPRINTK("thread_sp{%lx}\n",r->thread_sp);
2094     PSPRINTK("thread_usersp{%lx}\n",r->thread_usersp);
2095
2096     v = r->vma_list;
2097     while(v) {
2098         dump_vma_data(v);
2099         v = (vma_data_t*)v->header.next;
2100     }
2101 }
2102
2103 /**
2104  * @brief Find a thread count data entry.
2105  * @return Either a thread count request data entry, or NULL if one does 
2106  * not exist that satisfies the parameter requirements.
2107  */
2108 static remote_thread_count_request_data_t* find_remote_thread_count_data(int cpu, 
2109         int id, int requester_pid) {
2110
2111     data_header_t* curr = NULL;
2112     remote_thread_count_request_data_t* request = NULL;
2113     remote_thread_count_request_data_t* ret = NULL;
2114     unsigned long lockflags;
2115
2116     spin_lock_irqsave(&_count_remote_tmembers_data_head_lock,lockflags);
2117
2118     curr = _count_remote_tmembers_data_head;
2119     while(curr) {
2120         request = (remote_thread_count_request_data_t*)curr;
2121         if(request->tgroup_home_cpu == cpu &&
2122            request->tgroup_home_id == id &&
2123            request->requester_pid == requester_pid) {
2124             ret = request;
2125             break;
2126         }
2127         curr = curr->next;
2128     }
2129
2130     spin_unlock_irqrestore(&_count_remote_tmembers_data_head_lock,lockflags);
2131
2132     return ret;
2133 }
2134
2135 /**
2136  * @brief Finds a munmap request data entry.
2137  * @return Either a munmap request data entry, or NULL if one is not
2138  * found that satisfies the parameter requirements.
2139  */
2140 static munmap_request_data_t* find_munmap_request_data(int cpu, int id, 
2141         int requester_pid, unsigned long address) {
2142
2143     data_header_t* curr = NULL;
2144     munmap_request_data_t* request = NULL;
2145     munmap_request_data_t* ret = NULL;
2146     PS_SPIN_LOCK(&_munmap_data_head_lock);
2147     
2148     curr = _munmap_data_head;
2149     while(curr) {
2150         request = (munmap_request_data_t*)curr;
2151         if(request->tgroup_home_cpu == cpu && 
2152                 request->tgroup_home_id == id &&
2153                 request->requester_pid == requester_pid &&
2154                 request->vaddr_start == address) {
2155             ret = request;
2156             break;
2157         }
2158         curr = curr->next;
2159     }
2160
2161     PS_SPIN_UNLOCK(&_munmap_data_head_lock);
2162
2163     return ret;
2164
2165 }
2166
2167 /**
2168  * @brief Finds an mprotect request data entry.
2169  * @return Either a mprotect request data entry, or NULL if one is
2170  * not found that satisfies the parameter requirements.
2171  */
2172 static mprotect_data_t* find_mprotect_request_data(int cpu, int id, 
2173         int requester_pid, unsigned long start) {
2174
2175     data_header_t* curr = NULL;
2176     mprotect_data_t* request = NULL;
2177     mprotect_data_t* ret = NULL;
2178     PS_SPIN_LOCK(&_mprotect_data_head_lock);
2179     
2180     curr = _mprotect_data_head;
2181     while(curr) {
2182         request = (mprotect_data_t*)curr;
2183         if(request->tgroup_home_cpu == cpu && 
2184                 request->tgroup_home_id == id &&
2185                 request->requester_pid == requester_pid &&
2186                 request->start == start) {
2187             ret = request;
2188             break;
2189         }
2190         curr = curr->next;
2191     }
2192
2193     PS_SPIN_UNLOCK(&_mprotect_data_head_lock);
2194
2195     return ret;
2196
2197 }
2198
2199 /**
2200  * @brief Finds a mapping request data entry.
2201  * @return Either a mapping request data entry, or NULL if an entry
2202  * is not found that satisfies the parameter requirements.
2203  */
2204 static mapping_request_data_t* find_mapping_request_data(int cpu, int id, 
2205         int pid, unsigned long address) {
2206
2207     data_header_t* curr = NULL;
2208     mapping_request_data_t* request = NULL;
2209     mapping_request_data_t* ret = NULL;
2210     
2211     curr = _mapping_request_data_head;
2212     while(curr) {
2213         request = (mapping_request_data_t*)curr;
2214         if(request->tgroup_home_cpu == cpu && 
2215                 request->tgroup_home_id == id &&
2216                 request->requester_pid == pid &&
2217                 request->address == address) {
2218             ret = request;
2219             break;
2220         }
2221         curr = curr->next;
2222     }
2223
2224
2225     return ret;
2226 }
2227
2228 /**
2229  * @brief Finds a clone data entry.
2230  * @return Either a clone entry or NULL if one is not found
2231  * that satisfies the parameter requirements.
2232  */
2233 static clone_data_t* find_clone_data(int cpu, int clone_request_id) {
2234     data_header_t* curr = NULL;
2235     clone_data_t* clone = NULL;
2236     clone_data_t* ret = NULL;
2237     PS_SPIN_LOCK(&_data_head_lock);
2238     
2239     curr = _data_head;
2240     while(curr) {
2241         if(curr->data_type == PROCESS_SERVER_CLONE_DATA_TYPE) {
2242             clone = (clone_data_t*)curr;
2243             if(clone->placeholder_cpu == cpu && clone->clone_request_id == clone_request_id) {
2244                 ret = clone;
2245                 break;
2246             }
2247         }
2248         curr = curr->next;
2249     }
2250
2251     PS_SPIN_UNLOCK(&_data_head_lock);
2252
2253     return ret;
2254 }
2255
2256 /**
2257  * @brief Destroys the specified clone data.  It also destroys lists
2258  * that are nested within it.
2259  */
2260 static void destroy_clone_data(clone_data_t* data) {
2261     vma_data_t* vma_data;
2262     pte_data_t* pte_data;
2263     vma_data = data->vma_list;
2264     while(vma_data) {
2265         
2266         // Destroy this VMA's PTE's
2267         pte_data = vma_data->pte_list;
2268         while(pte_data) {
2269
2270             // Remove pte from list
2271             vma_data->pte_list = (pte_data_t*)pte_data->header.next;
2272             if(vma_data->pte_list) {
2273                 vma_data->pte_list->header.prev = NULL;
2274             }
2275
2276             // Destroy pte
2277             kfree(pte_data);
2278
2279             // Next is the new list head
2280             pte_data = vma_data->pte_list;
2281         }
2282         
2283         // Remove vma from list
2284         data->vma_list = (vma_data_t*)vma_data->header.next;
2285         if(data->vma_list) {
2286             data->vma_list->header.prev = NULL;
2287         }
2288
2289         // Destroy vma
2290         kfree(vma_data);
2291
2292         // Next is the new list head
2293         vma_data = data->vma_list;
2294     }
2295
2296     // Destroy clone data
2297     kfree(data);
2298 }
2299
2300 #if 0
2301 /**
2302  * @brief Finds a vma_data_t entry.
2303  */
2304 static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_start) {
2305
2306     vma_data_t* curr = clone_data->vma_list;
2307     vma_data_t* ret = NULL;
2308
2309     while(curr) {
2310         
2311         if(curr->start == addr_start) {
2312             ret = curr;
2313             break;
2314         }
2315
2316         curr = (vma_data_t*)curr->header.next;
2317     }
2318
2319     return ret;
2320 }
2321 #endif
2322
2323 /**
2324  * @brief Callback for page walk that displays the contents of the walk.
2325  */
2326 static int dump_page_walk_pte_entry_callback(pte_t *pte, unsigned long start, 
2327         unsigned long end, struct mm_walk *walk) {
2328
2329     int nx;
2330     int rw;
2331     int user;
2332     int pwt;
2333     int pcd;
2334     int accessed;
2335     int dirty;
2336
2337     if(NULL == pte || !pte_present(*pte)) {                                                                                                                             
2338         return 0;
2339     }
2340
2341     nx       = pte_flags(*pte) & _PAGE_NX       ? 1 : 0;
2342     rw       = pte_flags(*pte) & _PAGE_RW       ? 1 : 0;
2343     user     = pte_flags(*pte) & _PAGE_USER     ? 1 : 0;
2344     pwt      = pte_flags(*pte) & _PAGE_PWT      ? 1 : 0;
2345     pcd      = pte_flags(*pte) & _PAGE_PCD      ? 1 : 0;
2346     accessed = pte_flags(*pte) & _PAGE_ACCESSED ? 1 : 0;
2347     dirty    = pte_flags(*pte) & _PAGE_DIRTY    ? 1 : 0;
2348
2349     PSPRINTK("pte_entry start{%lx}, end{%lx}, phy{%lx}\n",
2350             start,
2351             end,
2352             (unsigned long)(pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1)));
2353
2354     PSPRINTK("\tnx{%d}, ",nx);
2355     PSPRINTK("rw{%d}, ",rw);
2356     PSPRINTK("user{%d}, ",user);
2357     PSPRINTK("pwt{%d}, ",pwt);
2358     PSPRINTK("pcd{%d}, ",pcd);
2359     PSPRINTK("accessed{%d}, ",accessed);
2360     PSPRINTK("dirty{%d}\n",dirty);
2361
2362     return 0;
2363 }
2364
2365 /**
2366  * @brief Displays relevant data within a mm.
2367  */
2368 static void dump_mm(struct mm_struct* mm)
2369 {
2370     struct vm_area_struct * curr;
2371     struct mm_walk walk = {
2372         .pte_entry = dump_page_walk_pte_entry_callback,
2373         .mm = mm,
2374         .private = NULL
2375         };
2376
2377     if(NULL == mm) {
2378         PSPRINTK("MM IS NULL!\n");
2379         return;
2380     }
2381
2382     PS_DOWN_READ(&mm->mmap_sem);
2383
2384     curr = mm->mmap;
2385
2386     PSPRINTK("MM DUMP\n");
2387     PSPRINTK("Stack Growth{%lx}\n",mm->stack_vm);
2388     PSPRINTK("Code{%lx - %lx}\n",mm->start_code,mm->end_code);
2389     PSPRINTK("Brk{%lx - %lx}\n",mm->start_brk,mm->brk);
2390     PSPRINTK("Stack{%lx}\n",mm->start_stack);
2391     PSPRINTK("Arg{%lx - %lx}\n",mm->arg_start,mm->arg_end);
2392     PSPRINTK("Env{%lx - %lx}\n",mm->env_start,mm->env_end);
2393
2394     while(curr) {
2395         if(!curr->vm_file) {
2396             PSPRINTK("Anonymous VM Entry: start{%lx}, end{%lx}, pgoff{%lx}, flags{%lx}\n",
2397                     curr->vm_start, 
2398                     curr->vm_end,
2399                     curr->vm_pgoff,
2400                     curr->vm_flags);
2401             // walk    
2402             walk_page_range(curr->vm_start,curr->vm_end,&walk);
2403         } else {
2404             PSPRINTK("Page VM Entry: start{%lx}, end{%lx}, pgoff{%lx}, path{%s}, flags{%lx}\n",
2405                     curr->vm_start,
2406                     curr->vm_end,
2407                     curr->vm_pgoff,
2408                     d_path(&curr->vm_file->f_path,buf, 256),
2409                     curr->vm_flags);
2410             walk_page_range(curr->vm_start,curr->vm_end,&walk);
2411         }
2412         curr = curr->vm_next;
2413     }
2414
2415     PS_UP_READ(&mm->mmap_sem);
2416 }
2417
2418 /**
2419  * Data library
2420  */
2421
2422 /**
2423  * @brief Add data entry.
2424  */
2425 static void add_data_entry_to(void* entry, spinlock_t* lock, data_header_t** head) {
2426     data_header_t* hdr = (data_header_t*)entry;
2427     data_header_t* curr = NULL;
2428
2429     if(!entry) {
2430         return;
2431     }
2432
2433     // Always clear out the link information
2434     hdr->next = NULL;
2435     hdr->prev = NULL;
2436
2437     PS_SPIN_LOCK(lock);
2438     
2439     if (!*head) {
2440         *head = hdr;
2441         hdr->next = NULL;
2442         hdr->prev = NULL;
2443     } else {
2444         curr = *head;
2445         while(curr->next != NULL) {
2446             if(curr == entry) {
2447                 return;// It's already in the list!
2448             }
2449             curr = curr->next;
2450         }
2451         // Now curr should be the last entry.
2452         // Append the new entry to curr.
2453         curr->next = hdr;
2454         hdr->next = NULL;
2455         hdr->prev = curr;
2456     }
2457
2458     PS_SPIN_UNLOCK(lock);
2459 }
2460
2461 /**
2462  * @brief Remove a data entry
2463  * @prerequisite Requires user to hold lock
2464  */
2465 static void remove_data_entry_from(void* entry, data_header_t** head) {
2466     data_header_t* hdr = entry;
2467
2468     if(!entry) {
2469         return;
2470     }
2471
2472     if(*head == hdr) {
2473         *head = hdr->next;
2474     }
2475
2476     if(hdr->next) {
2477         hdr->next->prev = hdr->prev;
2478     }
2479
2480     if(hdr->prev) {
2481         hdr->prev->next = hdr->next;
2482     }
2483
2484     hdr->prev = NULL;
2485     hdr->next = NULL;
2486
2487 }
2488
2489 /**
2490  * @brief Add data entry
2491  */
2492 static void add_data_entry(void* entry) {
2493     data_header_t* hdr = (data_header_t*)entry;
2494     data_header_t* curr = NULL;
2495     unsigned long lockflags;
2496
2497     if(!entry) {
2498         return;
2499     }
2500
2501     // Always clear out the link information
2502     hdr->next = NULL;
2503     hdr->prev = NULL;
2504
2505     spin_lock_irqsave(&_data_head_lock,lockflags);
2506     
2507     if (!_data_head) {
2508         _data_head = hdr;
2509         hdr->next = NULL;
2510         hdr->prev = NULL;
2511     } else {
2512         curr = _data_head;
2513         while(curr->next != NULL) {
2514             if(curr == entry) {
2515                 return;// It's already in the list!
2516             }
2517             curr = curr->next;
2518         }
2519         // Now curr should be the last entry.
2520         // Append the new entry to curr.
2521         curr->next = hdr;
2522         hdr->next = NULL;
2523         hdr->prev = curr;
2524     }
2525
2526     spin_unlock_irqrestore(&_data_head_lock,lockflags);
2527 }
2528
2529 /**
2530  * @brief Remove a data entry.
2531  * @prerequisite Requires user to hold _data_head_lock.
2532  */
2533 static void remove_data_entry(void* entry) {
2534     data_header_t* hdr = entry;
2535
2536     if(!entry) {
2537         return;
2538     }
2539
2540     if(_data_head == hdr) {
2541         _data_head = hdr->next;
2542     }
2543
2544     if(hdr->next) {
2545         hdr->next->prev = hdr->prev;
2546     }
2547
2548     if(hdr->prev) {
2549         hdr->prev->next = hdr->next;
2550     }
2551
2552     hdr->prev = NULL;
2553     hdr->next = NULL;
2554
2555 }
2556
2557 /**
2558  * @brief Print information about the list.
2559  */
2560 static void dump_data_list(void) {
2561     data_header_t* curr = NULL;
2562     pte_data_t* pte_data = NULL;
2563     vma_data_t* vma_data = NULL;
2564     clone_data_t* clone_data = NULL;
2565
2566     PS_SPIN_LOCK(&_data_head_lock);
2567
2568     curr = _data_head;
2569
2570     PSPRINTK("DATA LIST:\n");
2571     while(curr) {
2572         switch(curr->data_type) {
2573         case PROCESS_SERVER_VMA_DATA_TYPE:
2574             vma_data = (vma_data_t*)curr;
2575             PSPRINTK("VMA DATA: start{%lx}, end{%lx}, crid{%d}, vmaid{%d}, cpu{%d}, pgoff{%lx}\n",
2576                     vma_data->start,
2577                     vma_data->end,
2578                     vma_data->clone_request_id,
2579                     vma_data->vma_id, 
2580                     vma_data->cpu, 
2581                     vma_data->pgoff);
2582             break;
2583         case PROCESS_SERVER_PTE_DATA_TYPE:
2584             pte_data = (pte_data_t*)curr;
2585             PSPRINTK("PTE DATA: vaddr_start{%lx}, paddr_start{%lx}, sz{%d}, vmaid{%d}, cpu{%d}\n",
2586                     pte_data->vaddr_start,
2587                     pte_data->paddr_start,
2588                     pte_data->sz,
2589                     pte_data->vma_id,
2590                     pte_data->cpu);
2591             break;
2592         case PROCESS_SERVER_CLONE_DATA_TYPE:
2593             clone_data = (clone_data_t*)curr;
2594             PSPRINTK("CLONE DATA: flags{%lx}, stack_start{%lx}, heap_start{%lx}, heap_end{%lx}, ip{%lx}, crid{%d}\n",
2595                     clone_data->clone_flags,
2596                     clone_data->stack_start,
2597                     clone_data->heap_start,
2598                     clone_data->heap_end,
2599                     clone_data->regs.ip,
2600                     clone_data->clone_request_id);
2601             break;
2602         default:
2603             break;
2604         }
2605         curr = curr->next;
2606     }
2607
2608     PS_SPIN_UNLOCK(&_data_head_lock);
2609 }
2610
2611 /**
2612  * @brief Counts remote thread group members.
2613  * @return The number of remote thread group members in the
2614  * specified distributed thread group.
2615  * <MEASURE perf_count_remote_thread_members>
2616  */
2617 static int count_remote_thread_members(int exclude_t_home_cpu,
2618                                        int exclude_t_home_id) {
2619
2620     int tgroup_home_cpu = current->tgroup_home_cpu;
2621     int tgroup_home_id  = current->tgroup_home_id;
2622     remote_thread_count_request_data_t* data;
2623     remote_thread_count_request_t request;
2624     int i;
2625     int s;
2626     int ret = -1;
2627     int perf = -1;
2628     unsigned long lockflags;
2629
2630     perf = PERF_MEASURE_START(&perf_count_remote_thread_members);
2631
2632     PSPRINTK("%s: entered\n",__func__);
2633
2634     data = kmalloc(sizeof(remote_thread_count_request_data_t),GFP_KERNEL);
2635     if(!data) goto exit;
2636
2637     data->header.data_type = PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE;
2638     data->responses = 0;
2639     data->expected_responses = 0;
2640     data->tgroup_home_cpu = tgroup_home_cpu;
2641     data->tgroup_home_id = tgroup_home_id;
2642     data->requester_pid = current->pid;
2643     data->count = 0;
2644     spin_lock_init(&data->lock);
2645
2646     add_data_entry_to(data,
2647                       &_count_remote_tmembers_data_head_lock,
2648                       &_count_remote_tmembers_data_head);
2649
2650     request.header.type = PCN_KMSG_TYPE_PROC_SRV_THREAD_COUNT_REQUEST;
2651     request.header.prio = PCN_KMSG_PRIO_NORMAL;
2652     request.tgroup_home_cpu = current->tgroup_home_cpu; //TODO why not tgroup_home_cpu?!?!
2653     request.tgroup_home_id  = current->tgroup_home_id; //TODO why not tgroup_home_id?!?!
2654     request.requester_pid = data->requester_pid;
2655
2656 #ifndef SUPPORT_FOR_CLUSTERING
2657     for(i = 0; i < NR_CPUS; i++) {
2658         // Skip the current cpu
2659         if(i == _cpu) continue;
2660 #else
2661     // the list does not include the current processor group descirptor (TODO)
2662     struct list_head *iter;
2663     _remote_cpu_info_list_t *objPtr;
2664     list_for_each(iter, &rlist_head) {
2665         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
2666         i = objPtr->_data._processor;
2667 #endif
2668         // Send the request to this cpu.
2669         s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&request));
2670         if(!s) {
2671             // A successful send operation, increase the number
2672             // of expected responses.
2673             data->expected_responses++;
2674         }
2675     }
2676
2677     PSPRINTK("%s: waiting on %d responses\n",__func__,data->expected_responses);
2678
2679     // Wait for all cpus to respond.
2680     while(data->expected_responses != data->responses) {
2681         schedule();
2682     }
2683
2684     // OK, all responses are in, we can proceed.
2685     ret = data->count;
2686
2687     PSPRINTK("%s: found a total of %d remote threads in group\n",__func__,
2688             data->count);
2689
2690     spin_lock_irqsave(&_count_remote_tmembers_data_head_lock,lockflags);
2691     remove_data_entry_from(data,
2692                            &_count_remote_tmembers_data_head);
2693     spin_unlock_irqrestore(&_count_remote_tmembers_data_head_lock,lockflags);
2694
2695     kfree(data);
2696
2697 exit:
2698     PERF_MEASURE_STOP(&perf_count_remote_thread_members," ",perf);
2699     return ret;
2700 }
2701
2702 /**
2703  * @brief Counts the number of local thread group members for the specified
2704  * distributed thread group.
2705  */
2706 static int count_local_thread_members(int tgroup_home_cpu, 
2707         int tgroup_home_id, int exclude_pid) {
2708
2709     struct task_struct *task, *g;
2710     int count = 0;
2711     PSPRINTK("%s: entered\n",__func__);
2712     do_each_thread(g,task) {
2713         if(task->tgroup_home_id == tgroup_home_id &&
2714            task->tgroup_home_cpu == tgroup_home_cpu &&
2715            task->t_home_cpu == _cpu &&
2716            task->pid != exclude_pid &&
2717            task->exit_state != EXIT_ZOMBIE &&
2718            task->exit_state != EXIT_DEAD &&
2719            !(task->flags & PF_EXITING)) {
2720
2721                 count++;
2722             
2723         }
2724     } while_each_thread(g,task);
2725     PSPRINTK("%s: exited\n",__func__);
2726
2727     return count;
2728
2729 }
2730
2731 /**
2732  * @brief Counts the number of local and remote thread group members for the
2733  * thread group in which the "current" task resides.
2734  * @return The number of threads.
2735  */
2736 static int count_thread_members (void)
2737 {
2738      
2739     int count = 0;
2740     PSPRINTK("%s: entered\n",__func__);
2741     count += count_local_thread_members(current->tgroup_home_cpu, current->tgroup_home_id,current->pid);
2742     count += count_remote_thread_members(current->tgroup_home_cpu, current->tgroup_home_id);
2743     PSPRINTK("%s: exited\n",__func__);
2744     return count;
2745 }
2746
2747
2748 /*
2749  * @brief Process notification of a thread group closing.
2750  * This function will wait for any locally executing thread group
2751  * members to exit.  It will then clean up all local resources
2752  * dedicated to the thread group that has exited.
2753  *
2754  * <MEASURE perf_process_tgroup_closed_item>
2755  */
2756
2757 void process_tgroup_closed_item(struct work_struct* work) {
2758
2759     tgroup_closed_work_t* w = (tgroup_closed_work_t*) work;
2760     data_header_t *curr;
2761     mm_data_t* mm_data;
2762     struct task_struct *g, *task;
2763     unsigned char tgroup_closed = 0;
2764     int perf = -1;
2765     mm_data_t* to_remove = NULL;
2766
2767     perf = PERF_MEASURE_START(&perf_process_tgroup_closed_item);
2768
2769     PSPRINTK("%s: entered\n",__func__);
2770     PSPRINTK("%s: received group exit notification\n",__func__);
2771
2772     PSPRINTK("%s: waiting for all members of this distributed thread group to finish\n",__func__);
2773     while(!tgroup_closed) {
2774         unsigned char pass = 0;
2775         do_each_thread(g,task) {
2776             if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
2777                task->tgroup_home_id  == w->tgroup_home_id) {
2778                 
2779                 // there are still living tasks within this distributed thread group
2780                 // wait a bit
2781                 schedule();
2782                 pass = 1;
2783             }
2784
2785         } while_each_thread(g,task);
2786         if(!pass) {
2787             tgroup_closed = 1;
2788         } else {
2789             PSPRINTK("%s: waiting for tgroup close out\n",__func__);
2790         }
2791     }
2792
2793 loop:
2794     spin_lock(&_saved_mm_head_lock);
2795     // Remove all saved mm's for this thread group.
2796     curr = _saved_mm_head;
2797     while(curr) {
2798         mm_data = (mm_data_t*)curr;
2799         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
2800            mm_data->tgroup_home_id  == w->tgroup_home_id) {
2801             remove_data_entry_from(curr,&_saved_mm_head);
2802             to_remove = mm_data;
2803             goto found;
2804         }
2805         curr = curr->next;
2806     }
2807 found:
2808     spin_unlock(&_saved_mm_head_lock);
2809
2810     if(to_remove != NULL) {
2811         PSPRINTK("%s: removing a mm from cpu{%d} id{%d}\n",
2812                 __func__,
2813                 w->tgroup_home_cpu,
2814                 w->tgroup_home_id);
2815         
2816         BUG_ON(to_remove->mm == NULL);
2817         mmput(to_remove->mm);
2818         kfree(to_remove);
2819         to_remove = NULL;
2820         goto loop;
2821     }
2822
2823     kfree(work);
2824
2825     PERF_MEASURE_STOP(&perf_process_tgroup_closed_item," ",perf);
2826 }
2827
2828 /**
2829  * @brief Determine if the specified vma can have cow mapings.
2830  * @return 1 = yes, 0 = no.
2831  */
2832 static int is_maybe_cow(struct vm_area_struct* vma) {
2833     if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
2834         // Not a cow vma
2835         return 0;
2836     }
2837
2838     if(!(vma->vm_flags & VM_WRITE)) {
2839         return 0;
2840     }
2841
2842     return 1;
2843 }
2844
2845 /**
2846  * @brief Break the COW page that contains "address", iff that page
2847  * is a COW page.
2848  * @return 1 = handled, 0 = not handled.
2849  * @prerequisite Caller must grab mm->mmap_sem
2850  */
2851 static int break_cow(struct mm_struct *mm, struct vm_area_struct* vma, unsigned long address) {
2852     pgd_t *pgd = NULL;
2853     pud_t *pud = NULL;
2854     pmd_t *pmd = NULL;
2855     pte_t *ptep = NULL;
2856     pte_t pte;
2857     spinlock_t* ptl;
2858
2859     //PSPRINTK("%s: entered\n",__func__);
2860
2861     // if it's not a cow mapping, return.
2862     if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
2863         goto not_handled;
2864     }
2865
2866     // if it's not writable in vm_flags, return.
2867     if(!(vma->vm_flags & VM_WRITE)) {
2868         goto not_handled;
2869     }
2870
2871     pgd = pgd_offset(mm, address);
2872     if(!pgd_present(*pgd)) {
2873         goto not_handled_unlock;
2874     }
2875
2876     pud = pud_offset(pgd,address);
2877     if(!pud_present(*pud)) {
2878         goto not_handled_unlock;
2879     }
2880
2881     pmd = pmd_offset(pud,address);
2882     if(!pmd_present(*pmd)) {
2883         goto not_handled_unlock;
2884     }
2885
2886     ptep = pte_offset_map(pmd,address);
2887     if(!ptep || !pte_present(*ptep) || pte_none(*ptep)) {
2888         pte_unmap(ptep);
2889         goto not_handled_unlock;
2890     }
2891
2892     pte = *ptep;
2893
2894     if(pte_write(pte)) {
2895         goto not_handled_unlock;
2896     }
2897     
2898     // break the cow!
2899     ptl = pte_lockptr(mm,pmd);
2900     PS_SPIN_LOCK(ptl);
2901    
2902     PSPRINTK("%s: proceeding\n",__func__);
2903     do_wp_page(mm,vma,address,ptep,pmd,ptl,pte);
2904
2905
2906     // NOTE:
2907     // Do not call pte_unmap_unlock(ptep,ptl), since do_wp_page does that!
2908     
2909     goto handled;
2910
2911 not_handled_unlock:
2912 not_handled:
2913     return 0;
2914 handled:
2915     return 1;
2916 }
2917
2918 /**
2919  * @brief Process a request made by a remote CPU for a mapping.  This function
2920  * will search for mm's for the specified distributed thread group, and if found,
2921  * will search that mm for entries that contain the address that was asked for.
2922  * Prefetch is implemented in this function, so not only will the page that
2923  * is asked for be communicated, but the entire contiguous range of virtual to
2924  * physical addresses that the specified address lives in will be communicated.
2925  * Other contiguous regions may also be communicated if they exist.  This is
2926  * prefetch.
2927  *
2928  * <MEASURED perf_process_mapping_request>
2929  */
2930 void process_mapping_request(struct work_struct* work)
2931 {
2932     mapping_request_work_t* w = (mapping_request_work_t*) work;
2933     mapping_response_t* response;
2934     data_header_t* data_curr = NULL;
2935     mm_data_t* mm_data = NULL;
2936     
2937     struct task_struct* task = NULL;
2938     struct task_struct* g;
2939     struct vm_area_struct* vma = NULL;
2940     struct mm_struct* mm = NULL;
2941     
2942     unsigned long address = w->address;
2943     unsigned long resolved = 0;
2944     struct mm_walk walk = {
2945         .pte_entry = vm_search_page_walk_pte_entry_callback,
2946         .private = &(resolved)
2947     };
2948     char *plpath = NULL, *lpath = NULL;
2949     int used_saved_mm = 0, found_vma = 1, found_pte = 1; 
2950     int i;
2951
2952 #ifdef CONFIG_POPCORN_PERF    
2953     // for perf 
2954     int perf = PERF_MEASURE_START(&perf_process_mapping_request);
2955 #endif /* CONFIG_POPCORN_PERF */    
2956
2957     PSPRINTK("received mapping request from{%d} address{%lx}, cpu{%d}, id{%d}\n",
2958             w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
2959
2960     // First, search through existing processes
2961     do_each_thread(g,task) {
2962         if((task->tgroup_home_cpu == w->tgroup_home_cpu) &&
2963            (task->tgroup_home_id  == w->tgroup_home_id )) {
2964             PSPRINTK("mapping request found common thread group here\n");
2965             mm = task->mm;
2966
2967             // Take note of the fact that an mm exists on the remote kernel
2968             set_cpu_has_known_tgroup_mm(task, w->from_cpu);
2969
2970             goto task_mm_search_exit;
2971         }
2972     } while_each_thread(g,task);
2973 task_mm_search_exit:
2974
2975     // Failing the process search, look through saved mm's.
2976     if(!mm) {
2977         PS_SPIN_LOCK(&_saved_mm_head_lock);
2978         data_curr = _saved_mm_head;
2979         while(data_curr) {
2980
2981             mm_data = (mm_data_t*)data_curr;
2982             
2983             if((mm_data->tgroup_home_cpu == w->tgroup_home_cpu) &&
2984                (mm_data->tgroup_home_id  == w->tgroup_home_id)) {
2985                 PSPRINTK("%s: Using saved mm to resolve mapping\n",__func__);
2986                 mm = mm_data->mm;
2987                 used_saved_mm = 1;
2988                 break;
2989             }
2990
2991             data_curr = data_curr->next;
2992         } // while
2993         PS_SPIN_UNLOCK(&_saved_mm_head_lock);
2994     }
2995     
2996     response = kmalloc(sizeof(mapping_response_t), GFP_ATOMIC); //TODO convert to alloc_cache
2997     if (!response) {
2998       printk(KERN_ALERT"can not kmalloc mapping_response_t area from{%d} address{%lx} cpu{%d} id{%d}\n",
2999               w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
3000       goto err_work;
3001     }
3002     lpath = kmalloc(POPCORN_MAX_PATH, GFP_ATOMIC); //TODO convert to alloc_cache
3003     if (!lpath) {
3004       printk(KERN_ALERT"can not kmalloc lpath area from{%d} address{%lx} cpu{%d} id{%d}\n",
3005               w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
3006       goto err_response;
3007     }
3008     
3009     // OK, if mm was found, look up the mapping.
3010     if (mm) {
3011
3012         // The purpose of this code block is to determine
3013         // if we need to use a read or write lock, and safely.  
3014         // implement whatever lock type we decided we needed.  We
3015         // prefer to use read locks, since then we can service
3016         // more than one mapping request at the same time.  However,
3017         // if we are going to do any cow break operations, we 
3018         // must lock for write.
3019         int can_be_cow = 0;
3020         int first = 1;
3021 changed_can_be_cow:
3022         if(can_be_cow)
3023             PS_DOWN_WRITE(&mm->mmap_sem);
3024         else 
3025             PS_DOWN_READ(&mm->mmap_sem);
3026         vma = find_vma_checked(mm, address);
3027         if(vma && first) {
3028             first = 0;
3029             if(is_maybe_cow(vma)) {
3030                 can_be_cow = 1;
3031                 PS_UP_READ(&mm->mmap_sem);
3032                 goto changed_can_be_cow;
3033             }
3034         }
3035
3036         walk.mm = mm;
3037         walk_page_range(address & PAGE_MASK, 
3038                 (address & PAGE_MASK) + PAGE_SIZE, &walk);
3039
3040         if (vma && resolved != 0) {
3041             PSPRINTK("mapping found! %lx for vaddr %lx\n",resolved,
3042                     address & PAGE_MASK);
3043             /*
3044              * Find regions of consecutive physical memory
3045              * in this vma, including the faulting address
3046              * if possible.
3047              */
3048             {
3049             // Break all cows in this vma
3050             if (can_be_cow) {
3051                 unsigned long cow_addr;
3052                 for(cow_addr = vma->vm_start; cow_addr < vma->vm_end; cow_addr += PAGE_SIZE) {
3053                     break_cow(mm, vma, cow_addr);
3054                 }
3055                 // We no longer need a write lock after the break_cow process
3056                 // is complete, so downgrade the lock to a read lock.
3057                 downgrade_write(&mm->mmap_sem);
3058             } // if (can_be_cow
3059
3060             // Now grab all the mappings that we can stuff into the response.
3061             if (0 != fill_physical_mapping_array(mm, vma, address,
3062                                                 &(response->mappings[0]),
3063                                                 MAX_MAPPINGS)) {
3064                 // If the fill process fails, clear out all
3065                 // results.  Otherwise, we might trick the
3066                 // receiving cpu into thinking the target
3067                 // mapping was found when it was not.
3068                 for(i = 0; i < MAX_MAPPINGS; i++) {
3069                     response->mappings[i].present = 0;
3070                     response->mappings[i].vaddr = 0;
3071                     response->mappings[i].paddr = 0;
3072                     response->mappings[i].sz = 0;
3073                 }   
3074             } // if (0 != fill_physical_mapping_array
3075             }
3076
3077             response->header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
3078             response->header.prio = PCN_KMSG_PRIO_NORMAL;
3079             response->tgroup_home_cpu = w->tgroup_home_cpu;
3080             response->tgroup_home_id = w->tgroup_home_id;
3081             response->requester_pid = w->requester_pid;
3082             response->address = address;
3083             response->present = 1;
3084             response->vaddr_start = vma->vm_start;
3085             response->vaddr_size = vma->vm_end - vma->vm_start;
3086             response->prot = vma->vm_page_prot;
3087             response->vm_flags = vma->vm_flags;
3088             if(vma->vm_file == NULL) {
3089                 response->path[0] = '\0';
3090             } else {    
3091                 plpath = d_path(&vma->vm_file->f_path,lpath,512);
3092                 strcpy(response->path,plpath);
3093                 response->pgoff = vma->vm_pgoff;
3094             }
3095
3096             // We modified this lock to be read-mode above so now
3097             // we can do a read-unlock instead of a write-unlock
3098             PS_UP_READ(&mm->mmap_sem);
3099        
3100         } else { // (vma && resolved != 0) 
3101
3102             if(can_be_cow)
3103                 PS_UP_WRITE(&mm->mmap_sem);
3104             else
3105                 PS_UP_READ(&mm->mmap_sem);
3106             // Zero out mappings
3107             for(i = 0; i < MAX_MAPPINGS; i++) {
3108                 response->mappings[i].present = 0;
3109                 response->mappings[i].vaddr = 0;
3110                 response->mappings[i].paddr = 0;
3111                 response->mappings[i].sz = 0;
3112             }
3113         } // !(vma && resolved != 0) 
3114     }
3115
3116     // Not found, respond accordingly
3117     if (resolved == 0) {
3118         found_vma = 0;
3119         found_pte = 0;
3120         //PSPRINTK("Mapping not found\n");
3121         response->header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
3122         response->header.prio = PCN_KMSG_PRIO_NORMAL;
3123         response->tgroup_home_cpu = w->tgroup_home_cpu;
3124         response->tgroup_home_id = w->tgroup_home_id;
3125         response->requester_pid = w->requester_pid;
3126         response->address = address;
3127         response->present = 0;
3128         response->vaddr_start = 0;
3129         response->vaddr_size = 0;
3130         response->path[0] = '\0';
3131
3132         // Handle case where vma was present but no pte.
3133         if (vma) {
3134             //PSPRINTK("But vma present\n");
3135             found_vma = 1;
3136             response->present = 1;
3137             response->vaddr_start = vma->vm_start;
3138             response->vaddr_size = vma->vm_end - vma->vm_start;
3139             response->prot = vma->vm_page_prot;
3140             response->vm_flags = vma->vm_flags;
3141              if(vma->vm_file == NULL) {
3142                  response->path[0] = '\0';
3143              } else {    
3144                  plpath = d_path(&vma->vm_file->f_path,lpath,512);
3145                  strcpy(response->path,plpath);
3146                  response->pgoff = vma->vm_pgoff;
3147              }
3148         }
3149     }
3150
3151     // Send response
3152     if(response->present) {
3153         DO_UNTIL_SUCCESS(pcn_kmsg_send_long(w->from_cpu,
3154                             (struct pcn_kmsg_long_message*)(response),
3155                             sizeof(mapping_response_t) - 
3156                             sizeof(struct pcn_kmsg_hdr) -   //
3157                             sizeof(response->path) +         // Chop off the end of the path
3158                             strlen(response->path) + 1));    // variable to save bandwidth.
3159     } else {
3160         // This is an optimization to get rid of the _long send 
3161         // which is a time sink.
3162         nonpresent_mapping_response_t nonpresent_response;
3163         nonpresent_response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE_NONPRESENT;
3164         nonpresent_response.header.prio = PCN_KMSG_PRIO_NORMAL;
3165         nonpresent_response.tgroup_home_cpu = w->tgroup_home_cpu;
3166         nonpresent_response.tgroup_home_id  = w->tgroup_home_id;
3167         nonpresent_response.requester_pid = w->requester_pid;
3168         nonpresent_response.address = w->address;
3169         DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,(struct pcn_kmsg_message*)(&nonpresent_response)));
3170     }
3171
3172     kfree(lpath);
3173 err_response:
3174     kfree(response);
3175 err_work:
3176     kfree(work);
3177
3178 #ifdef CONFIG_POPCORN_PERF    
3179     if(used_saved_mm && found_vma && found_pte) {
3180         PERF_MEASURE_STOP(&perf_process_mapping_request,
3181                 "Saved MM + VMA + PTE",
3182                 perf);
3183     } else if (used_saved_mm && found_vma && !found_pte) {
3184         PERF_MEASURE_STOP(&perf_process_mapping_request,
3185                 "Saved MM + VMA + no PTE",
3186                 perf);
3187     } else if (used_saved_mm && !found_vma) {
3188         PERF_MEASURE_STOP(&perf_process_mapping_request,
3189                 "Saved MM + no VMA",
3190                 perf);
3191     } else if (!used_saved_mm && found_vma && found_pte) {
3192         PERF_MEASURE_STOP(&perf_process_mapping_request,
3193                 "VMA + PTE",
3194                 perf);
3195     } else if (!used_saved_mm && found_vma && !found_pte) {
3196         PERF_MEASURE_STOP(&perf_process_mapping_request,
3197                 "VMA + no PTE",
3198                 perf);
3199     } else if (!used_saved_mm && !found_vma) {
3200         PERF_MEASURE_STOP(&perf_process_mapping_request,
3201                 "no VMA",
3202                 perf);
3203     } else {
3204         PERF_MEASURE_STOP(&perf_process_mapping_request,"ERR",perf);
3205     }
3206 #endif /* CONFIG_POPCORN_PERF */    
3207
3208     return;
3209 }
3210
3211 unsigned long long perf_aa, perf_bb, perf_cc, perf_dd, perf_ee;
3212
3213 /**
3214  * @brief Process notification that a task has exited.  This function
3215  * sets the "return disposition" of the task, then wakes the task.
3216  * In this case, the "return disposition" specifies that the task
3217  * is exiting.  When the task resumes execution, it consults its
3218  * return disposition and acts accordingly - and invokes do_exit.
3219  *
3220  * <MEASURE perf_process_exit_item>
3221  */
3222 void process_exit_item(struct work_struct* work) {
3223     exit_work_t* w = (exit_work_t*) work;
3224     pid_t pid = w->pid;
3225     struct task_struct *task = w->task;
3226
3227     int perf = PERF_MEASURE_START(&perf_process_exit_item);
3228
3229     if(unlikely(!task)) {
3230         printk("%s: ERROR - empty task\n",__func__);
3231         kfree(work);
3232         PERF_MEASURE_STOP(&perf_process_exit_item,"ERROR",perf);
3233         return;
3234     }
3235
3236     if(unlikely(task->pid != pid)) {
3237         printk("%s: ERROR - wrong task picked\n",__func__);
3238         kfree(work);
3239         PERF_MEASURE_STOP(&perf_process_exit_item,"ERROR",perf);
3240         return;
3241     }
3242     
3243     PSPRINTK("%s: process to kill %ld\n", __func__, (long)pid);
3244     PSPRINTK("%s: for_each_process Found task to kill, killing\n", __func__);
3245     PSPRINTK("%s: killing task - is_last_tgroup_member{%d}\n",
3246             __func__,
3247             w->is_last_tgroup_member);
3248
3249     // Now we're executing locally, so update our records
3250     //if(task->t_home_cpu == _cpu && task->t_home_id == task->pid)
3251     //    task->represents_remote = 0;
3252
3253     // Set the return disposition
3254     task->return_disposition = RETURN_DISPOSITION_EXIT;
3255
3256     wake_up_process(task);
3257
3258     kfree(work);
3259
3260     PERF_MEASURE_STOP(&perf_process_exit_item," ",perf);
3261 }
3262
3263 /**
3264  * @brief Process a group exit request.  This function
3265  * issues SIGKILL to all locally executing members of the specified
3266  * distributed thread group.  Only tasks that are actively
3267  * executing on this CPU will receive the SIGKILL.  Shadow tasks
3268  * will not be sent SIGKILL.  Group exit requests are sent to
3269  * all CPUs, so for shadow tasks, another CPU will issue the
3270  * SIGKILL.  When that occurs, the normal exit process will be
3271  * initiated for that task, and eventually, all of its shadow
3272  * tasks will be killed.
3273  */
3274 void process_group_exit_item(struct work_struct* work) {
3275     group_exit_work_t* w = (group_exit_work_t*) work;
3276     struct task_struct *task = NULL;
3277     struct task_struct *g;
3278
3279     //int perf = PERF_MEASURE_START(&perf_process_group_exit_item);
3280     PSPRINTK("%s: entered\n",__func__);
3281     PSPRINTK("exit group target id{%d}, cpu{%d}\n",
3282             w->tgroup_home_id, w->tgroup_home_cpu);
3283
3284     do_each_thread(g,task) {
3285         if(task->tgroup_home_id == w->tgroup_home_id &&
3286            task->tgroup_home_cpu == w->tgroup_home_cpu) {
3287             
3288             if(!task->represents_remote) {
3289                 // active, send sigkill
3290                 PSPRINTK("Issuing SIGKILL to pid %d\n",task->pid);
3291                 kill_pid(task_pid(task), SIGKILL, 1);
3292             }
3293
3294             // If it is a shadow task, it will eventually
3295             // get killed when its corresponding active task
3296             // is killed.
3297
3298         }
3299     } while_each_thread(g,task);
3300     
3301     kfree(work);
3302
3303     PSPRINTK("%s: exiting\n",__func__);
3304     //PERF_MEASURE_STOP(&perf_process_group_exit_item," ",perf);
3305
3306 }
3307
3308
3309 /**
3310  * @brief Process request to unmap a region of memory from a distributed
3311  * thread group.  Look for local thread group members and carry out the
3312  * requested action.
3313  *
3314  * <MEASURE perf_process_munmap_request>
3315  */
3316 void process_munmap_request(struct work_struct* work) {
3317     munmap_request_work_t* w = (munmap_request_work_t*)work;
3318     munmap_response_t response;
3319     struct task_struct *task, *g;
3320     data_header_t *curr = NULL;
3321     mm_data_t* mm_data = NULL;
3322     mm_data_t* to_munmap = NULL;
3323     struct mm_struct* mm_to_munmap = NULL;
3324
3325     int perf = PERF_MEASURE_START(&perf_process_munmap_request);
3326
3327     PSPRINTK("%s: entered\n",__func__);
3328
3329     // munmap the specified region in the specified thread group
3330     read_lock(&tasklist_lock);
3331     do_each_thread(g,task) {
3332
3333         // Look for the thread group
3334         if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
3335            task->tgroup_home_id  == w->tgroup_home_id &&
3336            !(task->flags & PF_EXITING)) {
3337
3338             // Take note of the fact that an mm exists on the remote kernel
3339             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
3340             
3341             if (task->mm) {
3342                 mm_to_munmap = task->mm;
3343             }
3344             else
3345                 printk("%s: pirla\n", __func__);
3346
3347             goto done; 
3348         }
3349     } while_each_thread(g,task);
3350 done:
3351     read_unlock(&tasklist_lock);
3352
3353     if(mm_to_munmap) {
3354         PS_DOWN_WRITE(&mm_to_munmap->mmap_sem);
3355         current->enable_distributed_munmap = 0;
3356         do_munmap(mm_to_munmap, w->vaddr_start, w->vaddr_size);
3357         current->enable_distributed_munmap = 1;
3358         PS_UP_WRITE(&mm_to_munmap->mmap_sem);
3359     }
3360     else
3361         printk("%s: unexpected error task %p task->mm %p\n", 
3362                  __func__, task, (task ? task->mm : 0) );
3363
3364     // munmap the specified region in any saved mm's as well.
3365     // This keeps old mappings saved in the mm of dead thread
3366     // group members from being resolved accidentally after
3367     // being munmap()ped, as that would cause security/coherency
3368     // problems.
3369     PS_SPIN_LOCK(&_saved_mm_head_lock);
3370     curr = _saved_mm_head;
3371     while(curr) {
3372         mm_data = (mm_data_t*)curr;
3373         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
3374            mm_data->tgroup_home_id  == w->tgroup_home_id) {
3375            
3376             to_munmap = mm_data;
3377             goto found;
3378
3379         }
3380         curr = curr->next;
3381     }
3382 found:
3383     PS_SPIN_UNLOCK(&_saved_mm_head_lock);
3384
3385     if (to_munmap && to_munmap->mm) {
3386         PS_DOWN_WRITE(&to_munmap->mm->mmap_sem);
3387         current->enable_distributed_munmap = 0;
3388         do_munmap(to_munmap->mm, w->vaddr_start, w->vaddr_size);
3389         current->enable_distributed_munmap = 1;
3390         if (to_munmap && to_munmap->mm)
3391                 PS_UP_WRITE(&to_munmap->mm->mmap_sem);
3392         else
3393                 printk(KERN_ALERT"%s: ERROR2: to_munmap %p mm %p\n",
3394                                  __func__, to_munmap, to_munmap?to_munmap->mm:0);
3395     }
3396     else if (to_munmap) // It is OK for to_munmap to be null, but not to_munmap->mm
3397         printk(KERN_ALERT"%s: ERROR1: to_munmap %p mm %p\n",
3398                          __func__, to_munmap, to_munmap?to_munmap->mm:0);
3399
3400     // Construct response
3401     response.header.type = PCN_KMSG_TYPE_PROC_SRV_MUNMAP_RESPONSE;
3402     response.header.prio = PCN_KMSG_PRIO_NORMAL;
3403     response.tgroup_home_cpu = w->tgroup_home_cpu;
3404     response.tgroup_home_id = w->tgroup_home_id;
3405     response.requester_pid = w->requester_pid;
3406     response.vaddr_start = w->vaddr_start;
3407     response.vaddr_size = w->vaddr_size;
3408     
3409     // Send response
3410     DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,
3411                         (struct pcn_kmsg_message*)(&response)));
3412
3413     kfree(work);
3414     
3415     PERF_MEASURE_STOP(&perf_process_munmap_request," ",perf);
3416 }
3417
3418 /**
3419  * @brief Process request to change protection of a region of memory in
3420  * a distributed thread group.  Look for local thread group members and
3421  * carry out the requested action.
3422  *
3423  * <MEASRURE perf_process_mprotect_item>
3424  */
3425 void process_mprotect_item(struct work_struct* work) {
3426     mprotect_response_t response;
3427     mprotect_work_t* w = (mprotect_work_t*)work;
3428     int tgroup_home_cpu = w->tgroup_home_cpu;
3429     int tgroup_home_id  = w->tgroup_home_id;
3430     unsigned long start = w->start;
3431     size_t len = w->len;
3432     struct task_struct* task, *g;
3433     data_header_t* curr = NULL;
3434     mm_data_t* mm_data = NULL;
3435     mm_data_t* to_munmap = NULL;
3436     struct mm_struct *mm_to_munmap = NULL;
3437
3438     int perf = PERF_MEASURE_START(&perf_process_mprotect_item);
3439     
3440     // Find the task
3441     read_lock(&tasklist_lock);
3442     do_each_thread(g,task) {
3443
3444         // Look for the thread group
3445         if (task->tgroup_home_cpu == tgroup_home_cpu &&
3446             task->tgroup_home_id  == tgroup_home_id &&
3447             !(task->flags & PF_EXITING)) {
3448
3449             // Take note of the fact that an mm exists on the remote kernel
3450             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
3451
3452             if(task->mm) {
3453                 mm_to_munmap = task->mm;
3454             }
3455             else
3456                 printk("%s: pirla\n",__func__);
3457             
3458             goto done;
3459         }
3460     } while_each_thread(g,task);
3461 done:
3462     read_unlock(&tasklist_lock);
3463
3464     if(mm_to_munmap) {
3465         PS_DOWN_WRITE(&mm_to_munmap->mmap_sem);
3466         current->enable_distributed_munmap = 0;
3467         do_munmap(mm_to_munmap, start, len);
3468         current->enable_distributed_munmap = 1;
3469         PS_UP_WRITE(&mm_to_munmap->mmap_sem);
3470     }
3471
3472     // munmap the specified region in any saved mm's as well.
3473     // This keeps old mappings saved in the mm of dead thread
3474     // group members from being resolved accidentally after
3475     // being munmap()ped, as that would cause security/coherency
3476     // problems.
3477     PS_SPIN_LOCK(&_saved_mm_head_lock);
3478     curr = _saved_mm_head;
3479     while(curr) {
3480         mm_data = (mm_data_t*)curr;
3481         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
3482            mm_data->tgroup_home_id  == w->tgroup_home_id) {
3483            
3484             to_munmap = mm_data;
3485             goto found;