Preparing to commit with clustering version
[projects/modsched/linux.git] / kernel / process_server.c
1 /**
2  * Implements task migration and maintains coherent 
3  * address spaces across CPU cores.
4  *
5  * David G. Katz
6  */
7
8 #include <linux/mcomm.h> // IPC
9 #include <linux/kthread.h>
10 #include <linux/export.h>
11 #include <linux/delay.h>
12 #include <linux/smp.h>
13 #include <linux/sched.h>
14 #include <linux/threads.h> // NR_CPUS
15 #include <linux/kmod.h>
16 #include <linux/path.h>
17 #include <linux/mount.h>
18 #include <linux/fs.h>
19 #include <linux/fs_struct.h>
20 #include <linux/file.h>
21 #include <linux/fdtable.h>
22 #include <linux/slab.h>
23 #include <linux/process_server.h>
24 #include <linux/mm.h>
25 #include <linux/io.h> // ioremap
26 #include <linux/mman.h> // MAP_ANONYMOUS
27 #include <linux/pcn_kmsg.h> // Messaging
28 #include <linux/pcn_perf.h> // performance measurement
29 #include <linux/string.h>
30
31 #include <linux/popcorn_cpuinfo.h>
32
33 #include <asm/pgtable.h>
34 #include <asm/atomic.h>
35 #include <asm/tlbflush.h>
36 #include <asm/cacheflush.h>
37 #include <asm/uaccess.h> // USER_DS
38 #include <asm/prctl.h> // prctl
39 #include <asm/proto.h> // do_arch_prctl
40 #include <asm/msr.h> // wrmsr_safe
41 #include <asm/mmu_context.h>
42 #include <asm/processor.h> // load_cr3
43
44 unsigned long get_percpu_old_rsp(void);
45
46 #include <linux/futex.h>
47 #define  NSIG 32
48
49 #include<linux/signal.h>
50 #include <linux/fcntl.h>
51 #include "futex_remote.h"
52 /**
53  * General purpose configuration
54  */
55
56 // Flag indiciating whether or not to migrate the entire virtual 
57 // memory space when a migration occurs.  
58 #define COPY_WHOLE_VM_WITH_MIGRATION 1
59
60 // Flag indicating whether or not to migrate file-backed executable
61 // pages when a fault occurs accessing executable memory.  When this
62 // flag is 1, those pages will be migrated.  When it is 0, the local
63 // file-system will be consulted instead.
64 #define MIGRATE_EXECUTABLE_PAGES_ON_DEMAND 1
65
66 // The maximum number of contiguously physical mapped regions to 
67 // migrate in response to a mapping query.
68 #define MAX_MAPPINGS 1
69
70 extern sys_topen(const char __user * filename, int flags, int mode, int fd);
71 /**
72  * Use the preprocessor to turn off printk.
73  */
74 #define PROCESS_SERVER_VERBOSE 0
75 #if PROCESS_SERVER_VERBOSE
76 #define PSPRINTK(...) printk(__VA_ARGS__)
77 #else
78 #define PSPRINTK(...) ;
79 #endif
80
81 #define PROCESS_SERVER_INSTRUMENT_LOCK 0
82 #if PROCESS_SERVER_VERBOSE && PROCESS_SERVER_INSTRUMENT_LOCK
83 #define PS_SPIN_LOCK(x) PSPRINTK("Acquiring spin lock in %s at line %d\n",__func__,__LINE__); \
84                        spin_lock(x); \
85                        PSPRINTK("Done acquiring spin lock in %s at line %d\n",__func__,__LINE__)
86 #define PS_SPIN_UNLOCK(x) PSPRINTK("Releasing spin lock in %s at line %d\n",__func__,__LINE__); \
87                           spin_unlock(x); \
88                           PSPRINTK("Done releasing spin lock in %s at line %d\n",__func__,__LINE__)
89 #define PS_DOWN_READ(x) PSPRINTK("Acquiring read lock in %s at line %d\n",__func__,__LINE__); \
90                         down_read(x); \
91                         PSPRINTK("Done acquiring read lock in %s at line %d\n",__func__,__LINE__)
92 #define PS_UP_READ(x) PSPRINTK("Releasing read lock in %s at line %d\n",__func__,__LINE__); \
93                       up_read(x); \
94                       PSPRINTK("Done releasing read lock in %s at line %d\n",__func__,__LINE__)
95 #define PS_DOWN_WRITE(x) PSPRINTK("Acquiring write lock in %s at line %d\n",__func__,__LINE__); \
96                          down_write(x); \
97                          PSPRINTK("Done acquiring write lock in %s at line %d\n",__func__,__LINE__)
98 #define PS_UP_WRITE(x) PSPRINTK("Releasing read write in %s at line %d\n",__func__,__LINE__); \
99                        up_write(x); \
100                        PSPRINTK("Done releasing write lock in %s at line %d\n",__func__,__LINE__)
101
102
103 #else
104 #define PS_SPIN_LOCK(x) spin_lock(x)
105 #define PS_SPIN_UNLOCK(x) spin_unlock(x)
106 #define PS_DOWN_READ(x) down_read(x)
107 #define PS_UP_READ(x) up_read(x)
108 #define PS_DOWN_WRITE(x) down_write(x)
109 #define PS_UP_WRITE(x) up_write(x)
110 #endif
111
112 /**
113  * Library data type definitions
114  */
115 #define PROCESS_SERVER_DATA_TYPE_TEST 0
116 #define PROCESS_SERVER_VMA_DATA_TYPE 1
117 #define PROCESS_SERVER_PTE_DATA_TYPE 2
118 #define PROCESS_SERVER_CLONE_DATA_TYPE 3
119 #define PROCESS_SERVER_MAPPING_REQUEST_DATA_TYPE 4
120 #define PROCESS_SERVER_MUNMAP_REQUEST_DATA_TYPE 5
121 #define PROCESS_SERVER_MM_DATA_TYPE 6
122 #define PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE 7
123 #define PROCESS_SERVER_MPROTECT_DATA_TYPE 8
124
125 /**
126  * Useful macros
127  */
128 #define DO_UNTIL_SUCCESS(x) while(x != 0){}
129
130 /**
131  * Perf
132  */
133 #define MEASURE_PERF 1
134 #if MEASURE_PERF
135 #define PERF_INIT() perf_init()
136 #define PERF_MEASURE_START(x) perf_measure_start(x)
137 #define PERF_MEASURE_STOP(x,y,z)  perf_measure_stop(x,y,z)
138
139 pcn_perf_context_t perf_count_remote_thread_members;
140 pcn_perf_context_t perf_process_back_migration;
141 pcn_perf_context_t perf_process_mapping_request;
142 pcn_perf_context_t perf_process_mapping_request_search_active_mm;
143 pcn_perf_context_t perf_process_mapping_request_search_saved_mm;
144 pcn_perf_context_t perf_process_mapping_request_do_lookup;
145 pcn_perf_context_t perf_process_mapping_request_transmit;
146 pcn_perf_context_t perf_process_mapping_response;
147 pcn_perf_context_t perf_process_tgroup_closed_item;
148 pcn_perf_context_t perf_process_exit_item;
149 pcn_perf_context_t perf_process_mprotect_item;
150 pcn_perf_context_t perf_process_munmap_request;
151 pcn_perf_context_t perf_process_munmap_response;
152 pcn_perf_context_t perf_process_server_try_handle_mm_fault;
153 pcn_perf_context_t perf_process_server_import_address_space;
154 pcn_perf_context_t perf_process_server_do_exit;
155 pcn_perf_context_t perf_process_server_do_munmap;
156 pcn_perf_context_t perf_process_server_do_migration;
157 pcn_perf_context_t perf_process_server_do_mprotect;
158 pcn_perf_context_t perf_process_server_notify_delegated_subprocess_starting;
159 pcn_perf_context_t perf_handle_thread_group_exit_notification;
160 pcn_perf_context_t perf_handle_remote_thread_count_response;
161 pcn_perf_context_t perf_handle_remote_thread_count_request;
162 pcn_perf_context_t perf_handle_munmap_response;
163 pcn_perf_context_t perf_handle_munmap_request;
164 pcn_perf_context_t perf_handle_mapping_response;
165 pcn_perf_context_t perf_handle_mapping_request;
166 pcn_perf_context_t perf_handle_pte_transfer;
167 pcn_perf_context_t perf_handle_vma_transfer;
168 pcn_perf_context_t perf_handle_exiting_process_notification;
169 pcn_perf_context_t perf_handle_process_pairing_request;
170 pcn_perf_context_t perf_handle_clone_request;
171 pcn_perf_context_t perf_handle_mprotect_response;
172 pcn_perf_context_t perf_handle_mprotect_request;
173
174 /**
175  *
176  */
177 static void perf_init(void) {
178    perf_init_context(&perf_count_remote_thread_members,
179            "count_remote_thread_members");
180    perf_init_context(&perf_process_back_migration,
181            "process_back_migration");
182    perf_init_context(&perf_process_mapping_request,
183            "process_mapping_request");
184    perf_init_context(&perf_process_mapping_request_search_active_mm,
185            "process_mapping_request_search_active_mm");
186    perf_init_context(&perf_process_mapping_request_search_saved_mm,
187            "process_mapping_request_search_saved_mm");
188    perf_init_context(&perf_process_mapping_request_do_lookup,
189            "process_mapping_request_do_lookup");
190    perf_init_context(&perf_process_mapping_request_transmit,
191            "process_mapping_request_transmit");
192    perf_init_context(&perf_process_mapping_response,
193            "process_mapping_response");
194    perf_init_context(&perf_process_tgroup_closed_item,
195            "process_tgroup_closed_item");
196    perf_init_context(&perf_process_exit_item,
197            "process_exit_item");
198    perf_init_context(&perf_process_mprotect_item,
199            "process_mprotect_item");
200    perf_init_context(&perf_process_munmap_request,
201            "process_munmap_request");
202    perf_init_context(&perf_process_munmap_response,
203            "process_munmap_response");
204    perf_init_context(&perf_process_server_try_handle_mm_fault,
205            "process_server_try_handle_mm_fault");
206    perf_init_context(&perf_process_server_import_address_space,
207            "process_server_import_address_space");
208    perf_init_context(&perf_process_server_do_exit,
209            "process_server_do_exit");
210    perf_init_context(&perf_process_server_do_munmap,
211            "process_server_do_munmap");
212    perf_init_context(&perf_process_server_do_migration,
213            "process_server_do_migration");
214    perf_init_context(&perf_process_server_do_mprotect,
215            "process_server_do_mprotect");
216    perf_init_context(&perf_process_server_notify_delegated_subprocess_starting,
217            "process_server_notify_delegated_subprocess_starting");
218    perf_init_context(&perf_handle_thread_group_exit_notification,
219            "handle_thread_group_exit_notification");
220    perf_init_context(&perf_handle_remote_thread_count_response,
221            "handle_remote_thread_count_response");
222    perf_init_context(&perf_handle_remote_thread_count_request,
223            "handle_remote_thread_count_request");
224    perf_init_context(&perf_handle_munmap_response,
225            "handle_munmap_response");
226    perf_init_context(&perf_handle_munmap_request,
227            "handle_munmap_request");
228    perf_init_context(&perf_handle_mapping_response,
229            "handle_mapping_response");
230    perf_init_context(&perf_handle_mapping_request,
231            "handle_mapping_request");
232    perf_init_context(&perf_handle_pte_transfer,
233            "handle_pte_transfer");
234    perf_init_context(&perf_handle_vma_transfer,
235            "handle_vma_transfer");
236    perf_init_context(&perf_handle_exiting_process_notification,
237            "handle_exiting_process_notification");
238    perf_init_context(&perf_handle_process_pairing_request,
239            "handle_process_pairing_request");
240    perf_init_context(&perf_handle_clone_request,
241            "handle_clone_request");
242    perf_init_context(&perf_handle_mprotect_request,
243            "handle_mprotect_request");
244    perf_init_context(&perf_handle_mprotect_response,
245            "handle_mprotect_resonse");
246
247 }
248
249 #else
250 #define PERF_INIT() 
251 #define PERF_MEASURE_START(x) -1
252 #define PERF_MEASURE_STOP(x, y, z)
253 #endif
254
255
256 static DECLARE_WAIT_QUEUE_HEAD( countq);
257
258 /**
259  * Constants
260  */
261 #define RETURN_DISPOSITION_EXIT 0
262 #define RETURN_DISPOSITION_MIGRATE 1
263
264 /**
265  * Library
266  */
267
268 /**
269  * Some piping for linking data entries
270  * and identifying data entry types.
271  */
272 typedef struct _data_header {
273     struct _data_header* next;
274     struct _data_header* prev;
275     int data_type;
276 } data_header_t;
277
278 /**
279  * Hold data about a pte to vma mapping.
280  */
281 typedef struct _pte_data {
282     data_header_t header;
283     int vma_id;
284     int clone_request_id;
285     int cpu;
286     unsigned long vaddr_start;
287     unsigned long paddr_start;
288     size_t sz;
289 } pte_data_t;
290
291 /**
292  * Hold data about a vma to process
293  * mapping.
294  */
295 typedef struct _vma_data {
296     data_header_t header;
297     spinlock_t lock;
298     unsigned long start;
299     unsigned long end;
300     int clone_request_id;
301     int cpu;
302     unsigned long flags;
303     int vma_id;
304     pgprot_t prot;
305     unsigned long pgoff;
306     pte_data_t* pte_list;
307     int mmapping_in_progress;
308     char path[256];
309 } vma_data_t;
310
311 typedef struct _contiguous_physical_mapping {
312     unsigned char present;
313     unsigned long vaddr;
314     unsigned long paddr;
315     size_t sz;
316 } contiguous_physical_mapping_t;
317
318 /**
319  *
320  */
321 typedef struct _clone_data {
322     data_header_t header;
323     spinlock_t lock;
324     int clone_request_id;
325     int requesting_cpu;
326     char exe_path[512];
327     unsigned long clone_flags;
328     unsigned long stack_start;
329     unsigned long stack_ptr;
330     unsigned long env_start;
331     unsigned long env_end;
332     unsigned long arg_start;
333     unsigned long arg_end;
334     unsigned long heap_start;
335     unsigned long heap_end;
336     unsigned long data_start;
337     unsigned long data_end;
338     struct pt_regs regs;
339     int placeholder_pid;
340     int placeholder_tgid;
341     int placeholder_cpu;
342     unsigned long thread_fs;
343     unsigned long thread_gs;
344     unsigned long thread_sp0;
345     unsigned long thread_sp;
346     unsigned long thread_usersp;
347     unsigned short thread_es;
348     unsigned short thread_ds;
349     unsigned short thread_fsindex;
350     unsigned short thread_gsindex;
351     int tgroup_home_cpu;
352     int tgroup_home_id;
353     int t_home_cpu;
354     int t_home_id;
355     int prio, static_prio, normal_prio; //from sched.c
356         unsigned int rt_priority; //from sched.c
357         int sched_class; //from sched.c but here we are using SCHED_NORMAL, SCHED_FIFO, etc.
358     unsigned long previous_cpus;
359     vma_data_t* vma_list;
360     vma_data_t* pending_vma_list;
361     /*mklinux_akshay*/int origin_pid;
362     sigset_t remote_blocked, remote_real_blocked;
363     sigset_t remote_saved_sigmask;
364     struct sigpending remote_pending;
365     unsigned long sas_ss_sp;
366     size_t sas_ss_size;
367     struct k_sigaction action[_NSIG];
368 } clone_data_t;
369
370 /**
371  * 
372  */
373 typedef struct _mapping_request_data {
374     data_header_t header;
375     int tgroup_home_cpu;
376     int tgroup_home_id;
377     int requester_pid;
378     unsigned long address;
379     unsigned long vaddr_start;
380     unsigned long vaddr_size;
381     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
382     pgprot_t prot;
383     unsigned long vm_flags;
384     unsigned char present;
385     unsigned char complete;
386     unsigned char from_saved_mm;
387     int responses;
388     int expected_responses;
389     unsigned long pgoff;
390     spinlock_t lock;
391     char path[512];
392 } mapping_request_data_t;
393
394 /**
395  *
396  */
397 typedef struct _munmap_request_data {
398     data_header_t header;
399     int tgroup_home_cpu;
400     int tgroup_home_id;
401     int requester_pid;
402     unsigned long vaddr_start;
403     unsigned long vaddr_size;
404     int responses;
405     int expected_responses;
406     spinlock_t lock;
407 } munmap_request_data_t;
408
409 /**
410  *
411  */
412 typedef struct _remote_thread_count_request_data {
413     data_header_t header;
414     int tgroup_home_cpu;
415     int tgroup_home_id;
416     int requester_pid;
417     int responses;
418     int expected_responses;
419     int count;
420     spinlock_t lock;
421 } remote_thread_count_request_data_t;
422
423 /**
424  *
425  */
426 typedef struct _mm_data {
427     data_header_t header;
428     int tgroup_home_cpu;
429     int tgroup_home_id;
430     struct mm_struct* mm;
431 } mm_data_t;
432
433 typedef struct _mprotect_data {
434     data_header_t header;
435     int tgroup_home_cpu;
436     int tgroup_home_id;
437     int requester_pid;
438     unsigned long start;
439     int responses;
440     int expected_responses;
441     spinlock_t lock;
442 } mprotect_data_t;
443
444 /**
445  * This message is sent to a remote cpu in order to 
446  * ask it to spin up a process on behalf of the
447  * requesting cpu.  Some of these fields may go
448  * away in the near future.
449  */
450 typedef struct _clone_request {
451     struct pcn_kmsg_hdr header;
452     int clone_request_id;
453     unsigned long clone_flags;
454     unsigned long stack_start;
455     unsigned long stack_ptr;
456     unsigned long env_start;
457     unsigned long env_end;
458     unsigned long arg_start;
459     unsigned long arg_end;
460     unsigned long heap_start;
461     unsigned long heap_end;
462     unsigned long data_start;
463     unsigned long data_end;
464     struct pt_regs regs;
465     char exe_path[512];
466     int placeholder_pid;
467     int placeholder_tgid;
468     unsigned long thread_fs;
469     unsigned long thread_gs;
470     unsigned long thread_sp0;
471     unsigned long thread_sp;
472     unsigned long thread_usersp;
473     unsigned short thread_es;
474     unsigned short thread_ds;
475     unsigned short thread_fsindex;
476     unsigned short thread_gsindex;
477     int tgroup_home_cpu;
478     int tgroup_home_id;
479     int t_home_cpu;
480     int t_home_id;
481     int prio, static_prio, normal_prio; //from sched.c
482         unsigned int rt_priority; //from sched.c
483         int sched_class; //from sched.c but here we are using SCHED_NORMAL, SCHED_FIFO, etc.
484     /*mklinux_akshay*/int origin_pid;
485     sigset_t remote_blocked, remote_real_blocked;
486     sigset_t remote_saved_sigmask;
487     struct sigpending remote_pending;
488     unsigned long sas_ss_sp;
489     size_t sas_ss_size;
490     struct k_sigaction action[_NSIG];
491     unsigned long previous_cpus;
492 } clone_request_t;
493
494 /**
495  * This message is sent in response to a clone request.
496  * Its purpose is to notify the requesting cpu that
497  * the specified pid is executing on behalf of the
498  * requesting cpu.
499  */
500 typedef struct _create_process_pairing {
501     struct pcn_kmsg_hdr header;
502     int your_pid; // PID of cpu receiving this pairing request
503     int my_pid;   // PID of cpu transmitting this pairing request
504 } create_process_pairing_t;
505
506 /**
507  * This message informs the remote cpu of delegated
508  * process death.  This occurs whether the process
509  * is a placeholder or a delegate locally.
510  */
511 struct _exiting_process {
512     struct pcn_kmsg_hdr header;
513     int t_home_cpu;             // 4
514     int t_home_id;              // 4
515     int my_pid;                 // 4
516     int is_last_tgroup_member;  // 4+
517                                 // ---
518                                 // 16 -> 44 bytes of padding needed
519     char pad[44];
520 } __attribute__((packed)) __attribute__((aligned(64)));  
521 typedef struct _exiting_process exiting_process_t;
522
523 /**
524  *
525  */
526 struct _exiting_group {
527     struct pcn_kmsg_hdr header;
528     int tgroup_home_cpu;        // 4
529     int tgroup_home_id;         // 4
530                                 // ---
531                                 // 8 -> 52 bytes of padding needed
532     char pad[52];
533 } __attribute__((packed)) __attribute__((aligned(64)));
534 typedef struct _exiting_group exiting_group_t;
535
536 /**
537  * Inform remote cpu of a vma to process mapping.
538  */
539 typedef struct _vma_transfer {
540     struct pcn_kmsg_hdr header;
541     int vma_id;
542     int clone_request_id;
543     unsigned long start;
544     unsigned long end;
545     pgprot_t prot;
546     unsigned long flags;
547     unsigned long pgoff;
548     char path[256];
549 } vma_transfer_t;
550
551 /**
552  * Inform remote cpu of a pte to vma mapping.
553  */
554 struct _pte_transfer {
555     struct pcn_kmsg_hdr header;
556     int vma_id;                  //  4
557     int clone_request_id;        //  4
558     unsigned long vaddr_start;   //  8
559     unsigned long paddr_start;   //  8
560     size_t sz;                   //  4 +
561                                  //  ---
562                                  //  28 -> 32 bytes of padding needed
563     char pad[32];
564 } __attribute__((packed)) __attribute__((aligned(64)));
565
566 typedef struct _pte_transfer pte_transfer_t;
567
568 /**
569  *
570  */
571 struct _mapping_request {
572     struct pcn_kmsg_hdr header;
573     int tgroup_home_cpu;        // 4
574     int tgroup_home_id;         // 4
575     int requester_pid;          // 4
576     unsigned long address;      // 8
577                                 // ---
578                                 // 20 -> 40 bytes of padding needed
579     char pad[40];
580
581 } __attribute__((packed)) __attribute__((aligned(64)));
582
583 typedef struct _mapping_request mapping_request_t;
584
585 /*
586  * type = PCN_KMSG_TYPE_PROC_SRV_THREAD_GROUP_EXITED_NOTIFICATION
587  */
588 struct _thread_group_exited_notification {
589     struct pcn_kmsg_hdr header;
590     int tgroup_home_cpu;        // 4
591     int tgroup_home_id;         // 4
592                                 // ---
593                                 // 8 -> 52 bytes of padding needed
594     char pad[52];
595 } __attribute__((packed)) __attribute__((aligned(64)));
596 typedef struct _thread_group_exited_notification thread_group_exited_notification_t;
597
598
599 /**
600  *
601  */
602 struct _mapping_response {
603     struct pcn_kmsg_hdr header;
604     int tgroup_home_cpu;        
605     int tgroup_home_id; 
606     int requester_pid;
607     unsigned char present;      
608     unsigned char from_saved_mm;
609     unsigned long address;      
610     unsigned long vaddr_start;
611     unsigned long vaddr_size;
612     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
613     pgprot_t prot;              
614     unsigned long vm_flags;     
615     unsigned long pgoff;
616     char path[512]; // save to last so we can cut
617                     // off data when possible.
618 };
619 typedef struct _mapping_response mapping_response_t;
620
621 /**
622  * This is a hack to eliminate the overhead of sending
623  * an entire mapping_response_t when there is no mapping.
624  * The overhead is due to the size of the message, which
625  * requires the _long pcn_kmsg variant to be used.
626  */
627 struct _nonpresent_mapping_response {
628     struct pcn_kmsg_hdr header;
629     int tgroup_home_cpu;        // 4
630     int tgroup_home_id;         // 4
631     int requester_pid;            // 4
632     unsigned long address;      // 8
633                                 // ---
634                                 // 20 -> 40 bytes of padding needed
635     char pad[40];
636
637 } __attribute__((packed)) __attribute__((aligned(64)));
638 typedef struct _nonpresent_mapping_response nonpresent_mapping_response_t;
639
640 /**
641  *
642  */
643 struct _munmap_request {
644     struct pcn_kmsg_hdr header;
645     int tgroup_home_cpu;         // 4
646     int tgroup_home_id;          // 4
647     int requester_pid;           // 4
648     unsigned long vaddr_start;   // 8
649     unsigned long vaddr_size;    // 8
650                                  // ---
651                                  // 28 -> 32 bytes of padding needed
652     char pad[32];
653 } __attribute__((packed)) __attribute__((aligned(64)));
654 typedef struct _munmap_request munmap_request_t;
655
656 /**
657  *
658  */
659 struct _munmap_response {
660     struct pcn_kmsg_hdr header;
661     int tgroup_home_cpu;        // 4
662     int tgroup_home_id;         // 4
663     int requester_pid;          // 4
664     unsigned long vaddr_start;  // 8
665     unsigned long vaddr_size;   // 8+
666                                 // ---
667                                 // 28 -> 32 bytes of padding needed
668     char pad[32];
669 } __attribute__((packed)) __attribute__((aligned(64)));
670 typedef struct _munmap_response munmap_response_t;
671
672 /**
673  *
674  */
675 struct _remote_thread_count_request {
676     struct pcn_kmsg_hdr header;
677     int tgroup_home_cpu;        // 4
678     int tgroup_home_id;         // 4
679     int requester_pid;          // 4
680                                 // ---
681                                 // 12 -> 48 bytes of padding needed
682     char pad[48];
683 } __attribute__((packed)) __attribute__((aligned(64)));
684 typedef struct _remote_thread_count_request remote_thread_count_request_t;
685
686 /**
687  *
688  */
689 struct _remote_thread_count_response {
690     struct pcn_kmsg_hdr header;
691     int tgroup_home_cpu;        // 4
692     int tgroup_home_id;         // 4
693     int requester_pid;        // 4
694     int count;                  // 4
695                                 // ---
696                                 // 16 -> 44 bytes of padding needed
697     char pad[44];
698 } __attribute__((packed)) __attribute__((aligned(64)));
699 typedef struct _remote_thread_count_response remote_thread_count_response_t;
700
701 /**
702  *
703  */
704 struct _mprotect_request {
705     struct pcn_kmsg_hdr header; 
706     int tgroup_home_cpu;        // 4
707     int tgroup_home_id;         // 4
708     int requester_pid;          // 4
709     unsigned long start;        // 8
710     size_t len;                 // 4
711     unsigned long prot;         // 8
712                                 // ---
713                                 // 32 -> 28 bytes of padding needed
714     char pad[28];
715 } __attribute__((packed)) __attribute__((aligned(64)));
716 typedef struct _mprotect_request mprotect_request_t;
717
718 /**
719  *
720  */
721 struct _mprotect_response {
722     struct pcn_kmsg_hdr header;
723     int tgroup_home_cpu;        // 4
724     int tgroup_home_id;         // 4
725     int requester_pid;          // 4
726     unsigned long start;        // 8
727                                 // ---
728                                 // 20 -> 40 bytes of padding needed
729     char pad[40];
730 } __attribute__((packed)) __attribute__((aligned(64)));
731 typedef struct _mprotect_response mprotect_response_t;
732
733 /**
734  *
735  */
736 typedef struct _back_migration {
737     struct pcn_kmsg_hdr header;
738     int tgroup_home_cpu;
739     int tgroup_home_id;
740     int t_home_cpu;
741     int t_home_id;
742     unsigned long previous_cpus;
743     struct pt_regs regs;
744     unsigned long thread_fs;
745     unsigned long thread_gs;
746     unsigned long thread_usersp;
747     unsigned short thread_es;
748     unsigned short thread_ds;
749     unsigned short thread_fsindex;
750     unsigned short thread_gsindex;
751 } back_migration_t;
752
753 /**
754  *
755  */
756 typedef struct _deconstruction_data {
757     int clone_request_id;
758     int vma_id;
759     int dst_cpu;
760 } deconstruction_data_t;
761
762 /**
763  *
764  */
765 typedef struct {
766     struct work_struct work;
767     struct task_struct *task;
768     pid_t pid;
769     int t_home_cpu;
770     int t_home_id;
771     int is_last_tgroup_member;
772     struct pt_regs regs;
773     unsigned long thread_fs;
774     unsigned long thread_gs;
775     unsigned long thread_sp0;
776     unsigned long thread_sp;
777     unsigned long thread_usersp;
778     unsigned short thread_es;
779     unsigned short thread_ds;
780     unsigned short thread_fsindex;
781     unsigned short thread_gsindex;
782 } exit_work_t;
783
784 /**
785  *
786  */
787 typedef struct {
788     struct work_struct work;
789     int tgroup_home_cpu;
790     int tgroup_home_id;
791 } group_exit_work_t;
792
793 /**
794  *
795  */
796 typedef struct {
797     struct work_struct work;
798     int tgroup_home_cpu;
799     int tgroup_home_id;
800     int requester_pid;
801     unsigned long address;
802     int from_cpu;
803 } mapping_request_work_t;
804
805 /**
806  *
807  */
808 typedef struct {
809     struct work_struct work;
810     int tgroup_home_cpu;
811     int tgroup_home_id;
812     int requester_pid;
813     unsigned char from_saved_mm;
814     unsigned long address;      
815     unsigned char present;      
816     unsigned long vaddr_mapping;
817     unsigned long vaddr_start;
818     unsigned long vaddr_size;
819     unsigned long paddr_mapping;
820     size_t paddr_mapping_sz;
821     pgprot_t prot;              
822     unsigned long vm_flags;     
823     char path[512];
824     unsigned long pgoff;
825     int from_cpu;
826 } mapping_response_work_t;
827
828 /**
829  *
830  */
831 typedef struct {
832     struct work_struct work;
833     int tgroup_home_cpu;
834     int tgroup_home_id;
835     int requester_pid;
836     unsigned long address;
837     int from_cpu;
838 } nonpresent_mapping_response_work_t;
839
840 /**
841  *
842  */
843 typedef struct {
844     struct work_struct work;
845     int tgroup_home_cpu;
846     int tgroup_home_id;
847 } tgroup_closed_work_t;
848
849 /**
850  *
851  */
852 typedef struct {
853     struct work_struct work;
854     int tgroup_home_cpu;
855     int tgroup_home_id;
856     int requester_pid;
857     unsigned long vaddr_start;
858     unsigned long vaddr_size;
859     int from_cpu;
860 } munmap_request_work_t;
861
862 /**
863  *
864  */
865 typedef struct {
866     struct work_struct work;
867     int tgroup_home_cpu;
868     int tgroup_home_id;
869     int requester_pid;
870     unsigned long vaddr_start;
871     unsigned long vaddr_size;
872 } munmap_response_work_t;
873
874 /**
875  * 
876  */
877 typedef struct {
878     struct work_struct work;
879     int tgroup_home_cpu;
880     int tgroup_home_id;
881     int requester_pid;
882     unsigned long start;
883     size_t len;
884     unsigned long prot;
885     int from_cpu;
886 } mprotect_work_t;
887
888 /**
889  *
890  */
891 typedef struct {
892     struct work_struct work;
893     int tgroup_home_cpu;
894     int tgroup_home_id;
895     int requester_pid;
896     int from_cpu;
897 } remote_thread_count_request_work_t;
898
899 /**
900  *
901  */
902 typedef struct {
903     struct work_struct work;
904     int tgroup_home_cpu;
905     int tgroup_home_id;
906     int t_home_cpu;
907     int t_home_id;
908     unsigned long previous_cpus;
909     struct pt_regs regs;
910     unsigned long thread_fs;
911     unsigned long thread_gs;
912     unsigned long thread_usersp;
913     unsigned short thread_es;
914     unsigned short thread_ds;
915     unsigned short thread_fsindex;
916     unsigned short thread_gsindex;
917 } back_migration_work_t;
918
919
920 /**
921  * Prototypes
922  */
923 static int handle_clone_request(struct pcn_kmsg_message* msg);
924 long process_server_clone(unsigned long clone_flags,
925                           unsigned long stack_start,                                                                                                                   
926                           struct pt_regs *regs,
927                           unsigned long stack_size,
928                           struct task_struct* task);
929 static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_start);
930 static clone_data_t* find_clone_data(int cpu, int clone_request_id);
931 static void dump_mm(struct mm_struct* mm);
932 static void dump_task(struct task_struct* task,struct pt_regs* regs,unsigned long stack_ptr);
933 static void dump_thread(struct thread_struct* thread);
934 static void dump_regs(struct pt_regs* regs);
935 static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr); 
936
937 /**
938  * Prototypes from parts of the kernel that I modified or made available to external
939  * modules.
940  */
941 // I removed the 'static' modifier in mm/memory.c for do_wp_page so I could use it 
942 // here.
943 int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
944                unsigned long address, pte_t *page_table, pmd_t *pmd,
945                spinlock_t *ptl, pte_t orig_pte);
946 int do_mprotect(struct task_struct* task, unsigned long start, size_t len, unsigned long prot, int do_remote);
947
948 /**
949  * Module variables
950  */
951 static int _vma_id = 0;
952 static int _clone_request_id = 0;
953 static int _cpu = -1;
954 static unsigned long long perf_a, perf_b, perf_c, perf_d, perf_e;
955 data_header_t* _saved_mm_head = NULL;             // Saved MM list
956 DEFINE_SPINLOCK(_saved_mm_head_lock);             // Lock for _saved_mm_head
957 data_header_t* _mapping_request_data_head = NULL; // Mapping request data head
958 DEFINE_SPINLOCK(_mapping_request_data_head_lock);  // Lock for above
959 data_header_t* _count_remote_tmembers_data_head = NULL;
960 DEFINE_SPINLOCK(_count_remote_tmembers_data_head_lock);
961 data_header_t* _munmap_data_head = NULL;
962 DEFINE_SPINLOCK(_munmap_data_head_lock);
963 data_header_t* _mprotect_data_head = NULL;
964 DEFINE_SPINLOCK(_mprotect_data_head_lock);
965 data_header_t* _data_head = NULL;                 // General purpose data store
966 DEFINE_SPINLOCK(_data_head_lock);                 // Lock for _data_head
967 DEFINE_SPINLOCK(_vma_id_lock);                    // Lock for _vma_id
968 DEFINE_SPINLOCK(_clone_request_id_lock);          // Lock for _clone_request_id
969 struct rw_semaphore _import_sem;
970 DEFINE_SPINLOCK(_remap_lock);
971
972
973 // Work Queues
974 static struct workqueue_struct *clone_wq;
975 static struct workqueue_struct *exit_wq;
976 static struct workqueue_struct *mapping_wq;
977
978 /**
979  * General helper functions and debugging tools
980  */
981
982 /**
983  * TODO
984  */
985 static bool __user_addr (unsigned long x ) {
986     return (x < PAGE_OFFSET);   
987 }
988
989 // TODO the cpu_has_known_tgroup_mm must be reworked, i.e. the map must be pointed by the threads NOT one copy per thread, anti scaling and redudandt information
990 /**
991  *
992  */
993 static int cpu_has_known_tgroup_mm(int cpu)
994 {
995 #ifdef SUPPORT_FOR_CLUSTERING
996     struct list_head *iter;
997     _remote_cpu_info_list_t *objPtr;
998     struct cpumask *pcpum =0;
999     int cpuid =-1;
1000 extern struct list_head rlist_head;
1001     if (cpumask_test_cpu(cpu, cpu_present_mask))
1002         return 1;
1003     list_for_each(iter, &rlist_head) {
1004         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
1005         cpuid = objPtr->_data._processor;
1006         pcpum = &(objPtr->_data._cpumask);
1007         if (cpumask_test_cpu(cpu, pcpum)) {
1008             if ( bitmap_intersects(cpumask_bits(pcpum),
1009                                    &(current->known_cpu_with_tgroup_mm),
1010                                    (sizeof(unsigned long) *8)) ) {
1011                 return 1;
1012             }
1013             return 0;
1014         }
1015     }
1016     printk(KERN_ERR"%s: ERROR the input cpu (%d) is not included in any known cpu cluster\n",
1017                 __func__, cpu);
1018     return 0;
1019 #else
1020     if(test_bit(cpu,&current->known_cpu_with_tgroup_mm)) {
1021         return 1;
1022     }
1023     return 0;
1024 #endif
1025 }
1026
1027 /**
1028  *
1029  */
1030 static void set_cpu_has_known_tgroup_mm(struct task_struct *task,int cpu) {
1031     struct task_struct *me = task;
1032     struct task_struct *t = me;
1033     do {
1034         set_bit(cpu,&t->known_cpu_with_tgroup_mm);
1035     } while_each_thread(me, t);
1036 }
1037
1038 /**
1039  * @brief find_vma does not always return the correct vm_area_struct*.
1040  * If it fails to find a vma for the specified address, it instead
1041  * returns the closest one in the rb list.  This function looks
1042  * for this failure, and returns NULL in this error condition.
1043  * Otherwise, it returns a pointer to the struct vm_area_struct
1044  * containing the specified address.
1045  */
1046 static struct vm_area_struct* find_vma_checked(struct mm_struct* mm, unsigned long address) {
1047     struct vm_area_struct* vma = find_vma(mm,address&PAGE_MASK);
1048     if( vma == NULL ||
1049         (vma->vm_start > (address & PAGE_MASK)) ||
1050         (vma->vm_end <= address) ) {
1051         
1052         vma = NULL;
1053     }
1054
1055     return vma;
1056 }
1057
1058 /**
1059  * Note, mm->mmap_sem must already be held!
1060  */
1061 /*static int is_mapped(struct mm_struct* mm, unsigned vaddr) {
1062     pte_t* pte = NULL;
1063     pmd_t* pmd = NULL;
1064     pud_t* pud = NULL;
1065     pgd_t* pgd = NULL;
1066     int ret = 0;
1067
1068     pgd = pgd_offset(mm, vaddr);
1069     if(pgd_present(*pgd) && pgd_present(*pgd)) {
1070         pud = pud_offset(pgd,vaddr); 
1071         if(pud_present(*pud)) {
1072             pmd = pmd_offset(pud,vaddr);
1073             if(pmd_present(*pmd)) {
1074                 pte = pte_offset_map(pmd,vaddr);
1075                 if(pte && !pte_none(*pte)) {
1076                     // It exists!
1077                     ret = 1;
1078                 }
1079             }
1080         }
1081     }
1082     return ret;
1083
1084 }*/
1085 /* Antonio's Version
1086 static int is_mapped(struct mm_struct* mm, unsigned vaddr)
1087 {
1088     pte_t* pte = NULL;
1089     pmd_t* pmd = NULL;                                                             
1090     pud_t* pud = NULL;                                                             
1091     pgd_t* pgd = NULL; 
1092
1093     pgd = pgd_offset(mm, vaddr);                                                   
1094     if (pgd && !pgd_none(*pgd) && likely(!pgd_bad(*pgd)) && pgd_present(*pgd)) {
1095       pud = pud_offset(pgd,vaddr);                                               
1096       if (pud && !pud_none(*pud) && likely(!pud_bad(*pud)) && pud_present(*pud)) {
1097         pmd = pmd_offset(pud,vaddr);
1098         if(pmd && !pmd_none(*pmd) && likely(!pmd_bad(*pmd)) && pmd_present(*pmd)) {                      pte = pte_offset_map(pmd,vaddr);                                   
1099           if(pte && !pte_none(*pte) && pte_present(*pte)) { 
1100                    // It exists!                                                  
1101                     return 1;
1102           }                                                                  
1103         }                                                                      
1104       }                                                                          
1105     }
1106     return 0;                                                                                  }
1107 */
1108
1109 /**
1110  * @brief Find the mm_struct for a given distributed thread.  
1111  * If one does not exist, then return NULL.
1112  */
1113 static struct mm_struct* find_thread_mm(
1114         int tgroup_home_cpu, 
1115         int tgroup_home_id, 
1116         mm_data_t **used_saved_mm,
1117         struct task_struct** task_out)
1118 {
1119
1120     struct task_struct *task, *g;
1121     struct mm_struct * mm = NULL;
1122     data_header_t* data_curr;
1123     mm_data_t* mm_data;
1124     unsigned long lockflags;
1125
1126     *used_saved_mm = NULL;
1127     *task_out = NULL;
1128
1129     // First, look through all active processes.
1130     do_each_thread(g,task) {
1131         if(task->tgroup_home_cpu == tgroup_home_cpu &&
1132            task->tgroup_home_id  == tgroup_home_id) {
1133             mm = task->mm;
1134             *task_out = task;
1135             *used_saved_mm = NULL;
1136             goto out;
1137         }
1138     } while_each_thread(g,task);
1139
1140     // Failing that, look through saved mm's.
1141     spin_lock_irqsave(&_saved_mm_head_lock,lockflags);
1142     data_curr = _saved_mm_head;
1143     while(data_curr) {
1144
1145         mm_data = (mm_data_t*)data_curr;
1146     
1147         if((mm_data->tgroup_home_cpu == tgroup_home_cpu) &&
1148            (mm_data->tgroup_home_id  == tgroup_home_id)) {
1149             mm = mm_data->mm;
1150             *used_saved_mm = mm_data;
1151             break;
1152         }
1153
1154         data_curr = data_curr->next;
1155
1156     } // while
1157
1158     spin_unlock_irqrestore(&_saved_mm_head_lock,lockflags);
1159
1160
1161 out:
1162     return mm;
1163 }
1164
1165 /**
1166  * @brief A best effort at making a page writable
1167  * @return void
1168  */
1169 static void mk_page_writable(struct mm_struct* mm,
1170                              struct vm_area_struct* vma,
1171                              unsigned long vaddr) {
1172     spinlock_t* ptl;
1173     pte_t *ptep, pte, entry;
1174      
1175     // Grab the pte, and lock it     
1176     ptep = get_locked_pte(mm, vaddr, &ptl);
1177     if (!ptep)
1178         goto out;
1179
1180     // grab the contents of the pte pointer
1181     pte = *ptep;
1182     
1183     if(pte_none(*ptep)) {
1184         pte_unmap_unlock(pte,ptl);
1185         goto out;
1186     }
1187
1188     arch_enter_lazy_mmu_mode();
1189
1190     // Make the content copy writable and dirty, then
1191     // write it back into the page tables.
1192     entry = pte_mkwrite(pte_mkdirty(pte));
1193     set_pte_at(mm, vaddr, ptep, entry);
1194
1195     update_mmu_cache(vma, vaddr, ptep);
1196
1197     arch_leave_lazy_mmu_mode();
1198
1199     // Unlock the pte
1200     pte_unmap_unlock(pte, ptl);
1201 out:
1202     return;
1203 }
1204
1205 /**
1206  * @brief Check to see if a given page is writable.
1207  * @return 0 if not writable or error, not zero otherwise
1208  */
1209 static int is_page_writable(struct mm_struct* mm,
1210                             struct vm_area_struct* vma,
1211                             unsigned long addr) {
1212     spinlock_t* ptl;
1213     pte_t *ptep, pte;
1214     int ret = 0;
1215
1216     ptep = get_locked_pte(mm,addr,&ptl);
1217     if(!ptep)
1218         goto out;
1219
1220     pte = *ptep;
1221     
1222     if(pte_none(*ptep)) {
1223         pte_unmap_unlock(*ptep,ptl);
1224         ret = -1;
1225         goto out;
1226     }
1227
1228     ret = pte_write(pte);
1229
1230     pte_unmap_unlock(pte, ptl);
1231
1232 out:
1233     return ret;
1234 }
1235
1236 /**
1237  * @brief Get the clone data associated with the current task.
1238  * @return clone_data_t* or NULL if not present
1239  */
1240 static clone_data_t* get_current_clone_data(void) {
1241     clone_data_t* ret = NULL;
1242
1243     if(!current->clone_data) {
1244         // Do costly lookup
1245         ret = find_clone_data(current->prev_cpu,
1246                                  current->clone_request_id);
1247         // Store it for easy access next time.
1248         current->clone_data = ret;
1249     } else {
1250         ret = (clone_data_t*)current->clone_data;
1251     }
1252
1253     return ret;
1254 }
1255
1256
1257 /**
1258  * @brief Page walk has encountered a pte while deconstructing
1259  * the client side processes address space.  Transfer it.
1260  */
1261 /*static int deconstruction_page_walk_pte_entry_callback(pte_t *pte, 
1262         unsigned long start, unsigned long end, struct mm_walk *walk) {
1263
1264     deconstruction_data_t* decon_data = (deconstruction_data_t*)walk->private;
1265     int vma_id = decon_data->vma_id;
1266     int dst_cpu = decon_data->dst_cpu;
1267     int clone_request_id = decon_data->clone_request_id;
1268     pte_transfer_t pte_xfer;
1269
1270     if(NULL == pte || !pte_present(*pte)) {
1271         return 0;
1272     }
1273
1274     pte_xfer.header.type = PCN_KMSG_TYPE_PROC_SRV_PTE_TRANSFER;
1275     pte_xfer.header.prio = PCN_KMSG_PRIO_NORMAL;
1276     pte_xfer.paddr = (pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1));
1277     // NOTE: Found the above pte to paddr conversion here -
1278     // http://wbsun.blogspot.com/2010/12/convert-userspace-virtual-address-to.html
1279     pte_xfer.vaddr = start;
1280     pte_xfer.vma_id = vma_id;
1281     pte_xfer.clone_request_id = clone_request_id;
1282     pte_xfer.pfn = pte_pfn(*pte);
1283     PSPRINTK("Sending PTE\n"); 
1284     DO_UNTIL_SUCCESS(pcn_kmsg_send(dst_cpu, (struct pcn_kmsg_message *)&pte_xfer));
1285
1286     return 0;
1287 }*/
1288
1289 /**
1290  * @brief Callback used when walking a memory map.  It looks to see
1291  * if the page is present.  If present, it resolves the given
1292  * address.
1293  * @return always returns 0
1294  */
1295 static int vm_search_page_walk_pte_entry_callback(pte_t *pte, unsigned long start, unsigned long end, struct mm_walk *walk) {
1296  
1297     unsigned long* resolved_addr = (unsigned long*)walk->private;
1298
1299     if (pte == NULL || pte_none(*pte) || !pte_present(*pte)) {
1300         return 0;
1301     }
1302
1303     // Store the resolved address in the address
1304     // pointed to by the private field of the walk
1305     // structure.  This is checked by the caller
1306     // of the walk function when the walk is complete.
1307     *resolved_addr = (pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1));
1308     return 0;
1309 }
1310
1311 /**
1312  * @brief Retrieve the physical address of the specified virtual address.
1313  * @return -1 indicates failure.  Otherwise, 0 is returned.
1314  */
1315 static int get_physical_address(struct mm_struct* mm, 
1316                                 unsigned long vaddr,
1317                                 unsigned long* paddr) {
1318     unsigned long resolved = 0;
1319     struct mm_walk walk = {
1320         .pte_entry = vm_search_page_walk_pte_entry_callback,
1321         .private = &(resolved),
1322         .mm = mm
1323     };
1324
1325     // Walk the page tables.  The walk handler modifies the
1326     // resolved variable if it finds the address.
1327     walk_page_range(vaddr & PAGE_MASK, (vaddr & PAGE_MASK) + PAGE_SIZE, &walk);
1328     if(resolved == 0) {
1329         return -1;
1330     }
1331
1332     // Set the output
1333     *paddr = resolved;
1334
1335     return 0;
1336 }
1337
1338 /**
1339  * Check to see if the specified virtual address has a 
1340  * corresponding physical address mapped to it.
1341  * @return 0 = no mapping, 1 = mapping present
1342  */
1343 static int is_vaddr_mapped(struct mm_struct* mm, unsigned long vaddr) {
1344     unsigned long resolved = 0;
1345     struct mm_walk walk = {
1346         .pte_entry = vm_search_page_walk_pte_entry_callback,
1347         .private = &(resolved),
1348         .mm = mm
1349     };
1350
1351     // Walk the page tables.  The walk handler will set the
1352     // resolved variable if it finds the mapping.  
1353     walk_page_range(vaddr & PAGE_MASK, ( vaddr & PAGE_MASK ) + PAGE_SIZE, &walk);
1354     if(resolved != 0) {
1355         return 1;
1356     }
1357     return 0;
1358 }
1359
1360 /**
1361  *  @brief Find the bounds of a physically consecutive mapped region.
1362  *  The region must be contained within the specified VMA.
1363  *
1364  *  Hypothetical page table mappings for a given VMA:
1365  *
1366  *  *********************************
1367  *  *    Vaddr      *   Paddr       *
1368  *  *********************************
1369  *  * 0x10000000    * 0x12341000    *
1370  *  *********************************
1371  *  * 0x10001000    * 0x12342000    *
1372  *  *********************************
1373  *  * 0x10002000    * 0x12343000    *
1374  *  *********************************
1375  *  * 0x10003000    * 0x43214000    *
1376  *  *********************************
1377  *  
1378  *  This function, given a vaddr of 12342xxx will return:
1379  *  *vaddr_mapping_start = 0x10000000
1380  *  *paddr_mapping_start = 0x12341000
1381  *  *paddr_mapping_sz    = 0x3000
1382  *
1383  *  Notice 0x10003000 and above is not included in the returned region, as
1384  *  its paddr is not consecutive with the previous mappings.
1385  *
1386  */
1387 int find_consecutive_physically_mapped_region(struct mm_struct* mm,
1388                                               struct vm_area_struct* vma,
1389                                               unsigned long vaddr,
1390                                               unsigned long* vaddr_mapping_start,
1391                                               unsigned long* paddr_mapping_start,
1392                                               size_t* paddr_mapping_sz) {
1393     unsigned long paddr_curr = NULL;
1394     unsigned long vaddr_curr = vaddr;
1395     unsigned long vaddr_next = vaddr;
1396     unsigned long paddr_next = NULL;
1397     unsigned long paddr_start = NULL;
1398     size_t sz = 0;
1399
1400     
1401     // Initializes paddr_curr
1402     if(get_physical_address(mm,vaddr_curr,&paddr_curr) < 0) {
1403         return -1;
1404     }
1405     paddr_start = paddr_curr;
1406     *vaddr_mapping_start = vaddr_curr;
1407     *paddr_mapping_start = paddr_curr;
1408     
1409     sz = PAGE_SIZE;
1410
1411     // seek up in memory
1412     // This stretches (sz) only while leaving
1413     // vaddr and paddr the samed
1414     while(1) {
1415         vaddr_next += PAGE_SIZE;
1416         
1417         // dont' go past the end of the vma
1418         if(vaddr_next >= vma->vm_end) {
1419             break;
1420         }
1421
1422         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
1423             break;
1424         }
1425
1426         if(paddr_next == paddr_curr + PAGE_SIZE) {
1427             sz += PAGE_SIZE;
1428             paddr_curr = paddr_next;
1429         } else {
1430             break;
1431         }
1432     }
1433
1434     // seek down in memory
1435     // This stretches sz, and the paddr and vaddr's
1436     vaddr_curr = vaddr;
1437     paddr_curr = paddr_start; 
1438     vaddr_next = vaddr_curr;
1439     while(1) {
1440         vaddr_next -= PAGE_SIZE;
1441
1442         // don't go past the start of the vma
1443         if(vaddr_next < vma->vm_start) {
1444             break;
1445         }
1446
1447         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
1448             break;
1449         }
1450
1451         if(paddr_next == (paddr_curr - PAGE_SIZE)) {
1452             vaddr_curr = vaddr_next;
1453             paddr_curr = paddr_next;
1454             sz += PAGE_SIZE;
1455         } else {
1456             break;
1457         }
1458     }
1459    
1460     *vaddr_mapping_start = vaddr_curr;
1461     *paddr_mapping_start = paddr_curr;
1462     *paddr_mapping_sz = sz;
1463
1464     PSPRINTK("%s: found consecutive area- vaddr{%lx}, paddr{%lx}, sz{%d}\n",
1465                 __func__,
1466                 *vaddr_mapping_start,
1467                 *paddr_mapping_start,
1468                 *paddr_mapping_sz);
1469
1470     return 0;
1471 }
1472
1473 /**
1474  * @brief Find the preceeding physically consecutive region.  This is a region
1475  * that starts BEFORE the specified vaddr.  The region must be contained 
1476  * within the specified VMA.
1477  */
1478 int find_prev_consecutive_physically_mapped_region(struct mm_struct* mm,
1479                                               struct vm_area_struct* vma,
1480                                               unsigned long vaddr,
1481                                               unsigned long* vaddr_mapping_start,
1482                                               unsigned long* paddr_mapping_start,
1483                                               size_t* paddr_mapping_sz) {
1484     unsigned long curr_vaddr_mapping_start;
1485     unsigned long curr_paddr_mapping_start;
1486     unsigned long curr_paddr_mapping_sz;
1487     unsigned long curr_vaddr = vaddr;
1488     int ret = -1;
1489
1490     if(curr_vaddr < vma->vm_start) return -1;
1491
1492     do {
1493         int res = find_consecutive_physically_mapped_region(mm,
1494                                                      vma,
1495                                                      curr_vaddr,
1496                                                      &curr_vaddr_mapping_start,
1497                                                      &curr_paddr_mapping_start,
1498                                                      &curr_paddr_mapping_sz);
1499         if(0 == res) {
1500
1501             // this is a match, we can store off results and exit
1502             ret = 0;
1503             *vaddr_mapping_start = curr_vaddr_mapping_start;
1504             *paddr_mapping_start = curr_paddr_mapping_start;
1505             *paddr_mapping_sz    = curr_paddr_mapping_sz;
1506             break;
1507         }
1508
1509         curr_vaddr -= PAGE_SIZE;
1510     } while (curr_vaddr >= vma->vm_start);
1511
1512     return ret;
1513
1514 }
1515 /**
1516  * @brief Find the next physically consecutive region.  This is a region
1517  * that starts AFTER the specified vaddr.  The region must be contained
1518  * within the specified VMA.
1519  */
1520 int find_next_consecutive_physically_mapped_region(struct mm_struct* mm,
1521                                               struct vm_area_struct* vma,
1522                                               unsigned long vaddr,
1523                                               unsigned long* vaddr_mapping_start,
1524                                               unsigned long* paddr_mapping_start,
1525                                               size_t* paddr_mapping_sz) {
1526     unsigned long curr_vaddr_mapping_start;
1527     unsigned long curr_paddr_mapping_start;
1528     unsigned long curr_paddr_mapping_sz;
1529     unsigned long curr_vaddr = vaddr;
1530     int ret = -1;
1531
1532     if(curr_vaddr >= vma->vm_end) return -1;
1533
1534     do {
1535         int res = find_consecutive_physically_mapped_region(mm,
1536                                                      vma,
1537                                                      curr_vaddr,
1538                                                      &curr_vaddr_mapping_start,
1539                                                      &curr_paddr_mapping_start,
1540                                                      &curr_paddr_mapping_sz);
1541         if(0 == res) {
1542
1543             // this is a match, we can store off results and exit
1544             ret = 0;
1545             *vaddr_mapping_start = curr_vaddr_mapping_start;
1546             *paddr_mapping_start = curr_paddr_mapping_start;
1547             *paddr_mapping_sz    = curr_paddr_mapping_sz;
1548             break;
1549         }
1550
1551         curr_vaddr += PAGE_SIZE;
1552     } while (curr_vaddr < vma->vm_end);
1553
1554     return ret;
1555
1556 }
1557
1558 /**
1559  *  @brief Fill the array with as many physically consecutive regions
1560  *  as are present and will fit (specified by arr_sz).
1561  */
1562 int fill_physical_mapping_array(struct mm_struct* mm,
1563         struct vm_area_struct* vma,
1564         unsigned long address,
1565         contiguous_physical_mapping_t* mappings, 
1566         int arr_sz) {
1567     int i;
1568     unsigned long next_vaddr = address & PAGE_MASK;
1569     int ret = -1;
1570     unsigned long smallest_in_first_round = next_vaddr;
1571
1572     PSPRINTK("%s: entered\n",__func__);
1573
1574     for(i = 0; i < arr_sz; i++) 
1575         mappings[i].present = 0;
1576
1577     for(i = 0; i < arr_sz && next_vaddr < vma->vm_end; i++) {
1578         int valid_mapping = find_next_consecutive_physically_mapped_region(mm,
1579                                             vma,
1580                                             next_vaddr,
1581                                             &mappings[i].vaddr,
1582                                             &mappings[i].paddr,
1583                                             &mappings[i].sz);
1584
1585
1586         if(valid_mapping == 0) {
1587             PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
1588             if(address >= mappings[i].vaddr && 
1589                     address < mappings[i].vaddr + mappings[i].sz)
1590                 ret = 0;
1591
1592             if(mappings[i].vaddr < smallest_in_first_round)
1593                 smallest_in_first_round = mappings[i].vaddr;
1594
1595             mappings[i].present = 1;
1596             next_vaddr = mappings[i].vaddr + mappings[i].sz;
1597
1598         } else {
1599             PSPRINTK("%s: up search ended in failure, resuming down search\n",
1600                     __func__);
1601             mappings[i].present = 0;
1602             mappings[i].vaddr = 0;
1603             mappings[i].paddr = 0;
1604             mappings[i].sz = 0;
1605             break;
1606         }
1607     }
1608
1609     // If we have room left, go in the opposite direction
1610     if(i <= arr_sz -1) {
1611         next_vaddr = smallest_in_first_round - PAGE_SIZE;
1612         for(;i < arr_sz && next_vaddr >= vma->vm_start; i++) {
1613             int valid_mapping = find_prev_consecutive_physically_mapped_region(mm,
1614                                             vma,
1615                                             next_vaddr,
1616                                             &mappings[i].vaddr,
1617                                             &mappings[i].paddr,
1618                                             &mappings[i].sz);
1619             if(valid_mapping == 0) {
1620                 PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
1621                 mappings[i].present = 1;
1622                 next_vaddr = mappings[i].vaddr - PAGE_SIZE;
1623             } else {
1624                 mappings[i].present = 0;
1625                 mappings[i].vaddr = 0;
1626                 mappings[i].paddr = 0;
1627                 mappings[i].sz = 0;
1628                 break;
1629             }
1630         }
1631     }
1632
1633     // Trim any entries that extend beyond the boundaries of the vma
1634     for(i = 0; i < MAX_MAPPINGS; i++) {
1635         if(mappings[i].present) {
1636             if(mappings[i].vaddr < vma->vm_start) {
1637                 unsigned long sz_diff = vma->vm_start - mappings[i].vaddr;
1638                 PSPRINTK("Trimming mapping, since it starts too low in memory\n");
1639                 if(mappings[i].sz > sz_diff) {
1640                     mappings[i].sz -= sz_diff;
1641                     mappings[i].vaddr = vma->vm_start;
1642                 } else {
1643                     mappings[i].present = 0;
1644                     mappings[i].vaddr = 0;
1645                     mappings[i].paddr = 0;
1646                     mappings[i].sz = 0;
1647                 }
1648             }
1649
1650             if(mappings[i].vaddr + mappings[i].sz >= vma->vm_end) {
1651                 unsigned long sz_diff = mappings[i].vaddr + 
1652                                         mappings[i].sz - 
1653                                         vma->vm_end;
1654                 PSPRINTK("Trimming mapping, since it ends too high in memory\n");
1655                 if(mappings[i].sz > sz_diff) {
1656                     mappings[i].sz -= sz_diff;
1657                 } else {
1658                     mappings[i].present = 0;
1659                     mappings[i].vaddr = 0;
1660                     mappings[i].paddr = 0;
1661                     mappings[i].sz = 0;
1662                 }
1663             }
1664         }
1665     }
1666
1667     // Clear out what we just did
1668     if(ret == -1) {
1669         PSPRINTK("%s: zeroing out responses, due to an error\n",__func__);
1670         for(i = 0; i < arr_sz; i++)
1671             mappings[i].present = 0;
1672     }
1673
1674     PSPRINTK("%s: exiting\n",__func__);
1675
1676     return ret;
1677 }
1678
1679 /**
1680  * @brief Call remap_pfn_range on the parts of the specified virtual-physical
1681  * region that are not already mapped.
1682  * @precondition mm->mmap_sem must already be held by caller.
1683  */
1684 int remap_pfn_range_remaining(struct mm_struct* mm,
1685                                   struct vm_area_struct* vma,
1686                                   unsigned long vaddr_start,
1687                                   unsigned long paddr_start,
1688                                   size_t sz,
1689                                   pgprot_t prot,
1690                                   int make_writable) {
1691     unsigned long vaddr_curr;
1692     unsigned long paddr_curr = paddr_start;
1693     int ret = 0, val;
1694     int err;
1695
1696     PSPRINTK("%s: entered vaddr_start{%lx}, paddr_start{%lx}, sz{%x}\n",
1697             __func__,
1698             vaddr_start,
1699             paddr_start,
1700             sz);
1701
1702     for(vaddr_curr = vaddr_start; 
1703         vaddr_curr < vaddr_start + sz; 
1704         vaddr_curr += PAGE_SIZE) {
1705         if( !(val = is_vaddr_mapped(mm,vaddr_curr)) ) {
1706             //PSPRINTK("%s: mapping vaddr{%lx} paddr{%lx}\n",__func__,vaddr_curr,paddr_curr);
1707             // not mapped - map it
1708             err = remap_pfn_range(vma,
1709                                   vaddr_curr,
1710                                   paddr_curr >> PAGE_SHIFT,
1711                                   PAGE_SIZE,
1712                                   prot);
1713             if(err == 0) {
1714                 if(make_writable && vma->vm_flags & VM_WRITE) {
1715                     mk_page_writable(mm, vma, vaddr_curr);
1716                 }
1717             } else {
1718                 printk(KERN_ALERT"%s: ERROR mapping %lx to %lx with err{%d}\n",
1719                             __func__, vaddr_curr, paddr_curr, err);
1720             }
1721
1722             if( err != 0 ) ret = err;
1723         }
1724         else
1725             PSPRINTK("%s: is_vaddr_mapped %d, star:%lx end:%lx\n",
1726                     __func__, val, vma->vm_start, vma->vm_end);
1727
1728         paddr_curr += PAGE_SIZE;
1729     }
1730
1731     PSPRINTK("%s: exiting\n",__func__);
1732
1733     return ret;
1734 }
1735
1736
1737 /**
1738  * @brief Map, but only in areas that do not currently have mappings.
1739  * This should extend vmas that ara adjacent as necessary.
1740  * NOTE: current->enable_do_mmap_pgoff_hook must be disabled
1741  *       by client code before calling this.
1742  * NOTE: mm->mmap_sem must already be held by client code.
1743  * NOTE: entries in the per-mm list of vm_area_structs are
1744  *       ordered by starting address.  This is helpful, because
1745  *       I can exit my check early sometimes.
1746  */
1747 #define FORCE_NODEBUG
1748 #ifndef FORCE_NODEBUG
1749 #define DBGPSPRINTK(...) { if (dbg ==1) printk(KERN_ALERT __VA_ARGS__); }
1750 #else
1751 #define DBGPSPRINTK(...) ;
1752 #endif
1753 unsigned long do_mmap_remaining(struct file *file, unsigned long addr,
1754                                 unsigned long len, unsigned long prot,
1755                                 unsigned long flags, unsigned long pgoff, int dbg) {
1756     unsigned long ret = addr;
1757     unsigned long start = addr;
1758     unsigned long local_end = start;
1759     unsigned long end = addr + len;
1760     struct vm_area_struct* curr;
1761     unsigned long error;
1762
1763     // go through ALL vma's, looking for interference with this space.
1764     curr = current->mm->mmap;
1765     DBGPSPRINTK("%s: processing {%lx,%lx}\n",__func__,addr,len);
1766
1767     while(1) {
1768
1769         if(start >= end) goto done;
1770
1771         // We've reached the end of the list
1772         else if(curr == NULL) {
1773             // map through the end
1774             DBGPSPRINTK("%s: curr == NULL - mapping {%lx,%lx}\n",
1775                     __func__,start,end-start);
1776             error=do_mmap(file, start, end - start, prot, flags, pgoff); 
1777             if (error != start)
1778                 printk(KERN_ALERT"%s_1: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1779             goto done;
1780         }
1781
1782         // the VMA is fully above the region of interest
1783         else if(end <= curr->vm_start) {
1784                 // mmap through local_end
1785             DBGPSPRINTK("%s: VMA is fully above the region of interest - mapping {%lx,%lx}\n",
1786                     __func__,start,end-start);
1787             error=do_mmap(file, start, end - start, prot, flags, pgoff);
1788             if (error != start)
1789                 printk(KERN_ALERT"%s_2: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1790             goto done;
1791         }
1792
1793         // the VMA fully encompases the region of interest
1794         else if(start >= curr->vm_start && end <= curr->vm_end) {
1795             // nothing to do
1796             DBGPSPRINTK("%s: VMA fully encompases the region of interest\n",__func__);
1797             goto done;
1798         }
1799
1800         // the VMA is fully below the region of interest
1801         else if(curr->vm_end <= start) {
1802             // move on to the next one
1803             DBGPSPRINTK("%s: VMA is fully below region of interest\n",__func__);
1804         }
1805
1806         // the VMA includes the start of the region of interest 
1807         // but not the end
1808         else if (start >= curr->vm_start && 
1809                  start < curr->vm_end &&
1810                  end > curr->vm_end) {
1811             // advance start (no mapping to do) 
1812             start = curr->vm_end;
1813             local_end = start;
1814             DBGPSPRINTK("%s: VMA includes start but not end\n",__func__);
1815         }
1816
1817         // the VMA includes the end of the region of interest
1818         // but not the start
1819         else if(start < curr->vm_start && 
1820                 end <= curr->vm_end &&
1821                 end > curr->vm_start) {
1822             local_end = curr->vm_start;
1823             
1824             // mmap through local_end
1825             DBGPSPRINTK("%s: VMA includes end but not start - mapping {%lx,%lx}\n",
1826                     __func__,start, local_end - start);
1827             error=do_mmap(file, start, local_end - start, prot, flags, pgoff);
1828             if (error != start)
1829                 printk(KERN_ALERT"%s_3: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1830
1831             // Then we're done
1832             goto done;
1833         }
1834
1835         // the VMA is fully within the region of interest
1836         else if(start <= curr->vm_start && end >= curr->vm_end) {
1837             // advance local end
1838             local_end = curr->vm_start;
1839
1840             // map the difference
1841             DBGPSPRINTK("%s: VMS is fully within the region of interest - mapping {%lx,%lx}\n",
1842                     __func__,start, local_end - start);
1843             error=do_mmap(file, start, local_end - start, prot, flags, pgoff);
1844             if (error != start)
1845                 printk(KERN_ALERT"%s_4: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
1846
1847             // Then advance to the end of this vma
1848             start = curr->vm_end;
1849             local_end = start;
1850         }
1851
1852         curr = curr->vm_next;
1853
1854     }
1855
1856 done:
1857     
1858     DBGPSPRINTK("%s: exiting start:%lx\n",__func__, error);
1859     return ret;
1860 }
1861
1862 static void send_pte(unsigned long paddr_start,
1863         unsigned long vaddr_start, 
1864         size_t sz, 
1865         int dst,
1866         int vma_id,
1867         int clone_request_id) {
1868
1869     pte_transfer_t pte_xfer;
1870     pte_xfer.header.type = PCN_KMSG_TYPE_PROC_SRV_PTE_TRANSFER;
1871     pte_xfer.header.prio = PCN_KMSG_PRIO_NORMAL;
1872     pte_xfer.paddr_start = paddr_start;
1873     pte_xfer.vaddr_start = vaddr_start;
1874     pte_xfer.sz = sz;
1875     pte_xfer.clone_request_id = clone_request_id;
1876     pte_xfer.vma_id = vma_id;
1877     pcn_kmsg_send(dst, (struct pcn_kmsg_message *)&pte_xfer);
1878 }
1879
1880 static void send_vma(struct mm_struct* mm,
1881         struct vm_area_struct* vma, 
1882         int dst,
1883         int clone_request_id) {
1884     char lpath[256];
1885     char *plpath;
1886     vma_transfer_t* vma_xfer = kmalloc(sizeof(vma_transfer_t),GFP_KERNEL);
1887     vma_xfer->header.type = PCN_KMSG_TYPE_PROC_SRV_VMA_TRANSFER;  
1888     vma_xfer->header.prio = PCN_KMSG_PRIO_NORMAL;
1889     
1890     if(vma->vm_file == NULL) {
1891         vma_xfer->path[0] = '\0';
1892     } else {
1893         plpath = d_path(&vma->vm_file->f_path,
1894                 lpath,256);
1895         strcpy(vma_xfer->path,plpath);
1896     }
1897
1898     //
1899     // Transfer the vma
1900     //
1901     PS_SPIN_LOCK(&_vma_id_lock);
1902     vma_xfer->vma_id = _vma_id++;
1903     PS_SPIN_UNLOCK(&_vma_id_lock);
1904     vma_xfer->start = vma->vm_start;
1905     vma_xfer->end = vma->vm_end;
1906     vma_xfer->prot = vma->vm_page_prot;
1907     vma_xfer->clone_request_id = clone_request_id;
1908     vma_xfer->flags = vma->vm_flags;
1909     vma_xfer->pgoff = vma->vm_pgoff;
1910     pcn_kmsg_send_long(dst, 
1911                         (struct pcn_kmsg_long_message*)vma_xfer, 
1912                         sizeof(vma_transfer_t) - sizeof(vma_xfer->header));
1913
1914     // Send all physical information too
1915     {
1916     unsigned long curr = vma->vm_start;
1917     unsigned long vaddr_resolved = -1;
1918     unsigned long paddr_resolved = -1;
1919     size_t sz_resolved = 0;
1920     
1921     while(curr < vma->vm_end) {
1922         if(-1 == find_next_consecutive_physically_mapped_region(mm,
1923                     vma,
1924                     curr,
1925                     &vaddr_resolved,
1926                     &paddr_resolved,
1927                     &sz_resolved)) {
1928             // None more, exit
1929             break;
1930         } else {
1931             // send the pte
1932             send_pte(paddr_resolved,
1933                      vaddr_resolved,
1934                      sz_resolved,
1935                      dst,
1936                      vma_xfer->vma_id,
1937                      vma_xfer->clone_request_id
1938                      );
1939
1940             // move to the next
1941             curr = vaddr_resolved + sz_resolved;
1942         }
1943     }
1944
1945     }
1946
1947
1948     kfree(vma_xfer);
1949 }
1950
1951 /**
1952  * @brief Display a mapping request data entry.
1953  */
1954 static void dump_mapping_request_data(mapping_request_data_t* data) {
1955     int i;
1956     PSPRINTK("mapping request data dump:\n");
1957     PSPRINTK("address{%lx}, vaddr_start{%lx}, vaddr_sz{%lx}\n",
1958                     data->address, data->vaddr_start, data->vaddr_size);
1959     for(i = 0; i < MAX_MAPPINGS; i++) {
1960         PSPRINTK("mapping %d - vaddr{%lx}, paddr{%lx}, sz{%lx}\n",
1961                 i,data->mappings[i].vaddr,data->mappings[i].paddr,data->mappings[i].sz);
1962     }
1963     PSPRINTK("present{%d}, complete{%d}, from_saved_mm{%d}\n",
1964             data->present, data->complete, data->from_saved_mm);
1965     PSPRINTK("responses{%d}, expected_responses{%d}\n",
1966             data->responses, data->expected_responses);
1967 }
1968
1969 /**
1970  * @brief Display relevant task information.
1971  */
1972 void dump_task(struct task_struct* task, struct pt_regs* regs, unsigned long stack_ptr) {
1973 #if PROCESS_SERVER_VERBOSE
1974     if (!task) return;
1975
1976     PSPRINTK("DUMP TASK\n");
1977     PSPRINTK("PID: %d\n",task->pid);
1978     PSPRINTK("State: %lx\n",task->state);
1979     PSPRINTK("Flags: %x\n",task->flags);
1980     PSPRINTK("Prio{%d},Static_Prio{%d},Normal_Prio{%d}\n",
1981             task->prio,task->static_prio,task->normal_prio);
1982     PSPRINTK("Represents_remote{%d}\n",task->represents_remote);
1983     PSPRINTK("Executing_for_remote{%d}\n",task->executing_for_remote);
1984     PSPRINTK("prev_pid{%d}\n",task->prev_pid);
1985     PSPRINTK("next_pid{%d}\n",task->next_pid);
1986     PSPRINTK("prev_cpu{%d}\n",task->prev_cpu);
1987     PSPRINTK("next_cpu{%d}\n",task->next_cpu);
1988     PSPRINTK("Clone_request_id{%d}\n",task->clone_request_id);
1989     dump_regs(regs);
1990     dump_thread(&task->thread);
1991     //dump_mm(task->mm);
1992     dump_stk(&task->thread,stack_ptr);
1993     PSPRINTK("TASK DUMP COMPLETE\n");
1994 #endif
1995 }
1996
1997 /**
1998  * @brief Display a task's stack information.
1999  */
2000 static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr) {
2001     if(!thread) return;
2002     PSPRINTK("DUMP STACK\n");
2003     if(thread->sp) {
2004         PSPRINTK("sp = %lx\n",thread->sp);
2005     }
2006     if(thread->usersp) {
2007         PSPRINTK("usersp = %lx\n",thread->usersp);
2008     }
2009     if(stack_ptr) {
2010         PSPRINTK("stack_ptr = %lx\n",stack_ptr);
2011     }
2012     PSPRINTK("STACK DUMP COMPLETE\n");
2013 }
2014
2015 /**
2016  * @brief Display a tasks register contents.
2017  */
2018 static void dump_regs(struct pt_regs* regs) {
2019     unsigned long fs, gs;
2020     PSPRINTK("DUMP REGS\n");
2021     if(NULL != regs) {
2022         PSPRINTK("r15{%lx}\n",regs->r15);   
2023         PSPRINTK("r14{%lx}\n",regs->r14);
2024         PSPRINTK("r13{%lx}\n",regs->r13);
2025         PSPRINTK("r12{%lx}\n",regs->r12);
2026         PSPRINTK("r11{%lx}\n",regs->r11);
2027         PSPRINTK("r10{%lx}\n",regs->r10);
2028         PSPRINTK("r9{%lx}\n",regs->r9);
2029         PSPRINTK("r8{%lx}\n",regs->r8);
2030         PSPRINTK("bp{%lx}\n",regs->bp);
2031         PSPRINTK("bx{%lx}\n",regs->bx);
2032         PSPRINTK("ax{%lx}\n",regs->ax);
2033         PSPRINTK("cx{%lx}\n",regs->cx);
2034         PSPRINTK("dx{%lx}\n",regs->dx);
2035         PSPRINTK("di{%lx}\n",regs->di);
2036         PSPRINTK("orig_ax{%lx}\n",regs->orig_ax);
2037         PSPRINTK("ip{%lx}\n",regs->ip);
2038         PSPRINTK("cs{%lx}\n",regs->cs);
2039         PSPRINTK("flags{%lx}\n",regs->flags);
2040         PSPRINTK("sp{%lx}\n",regs->sp);
2041         PSPRINTK("ss{%lx}\n",regs->ss);
2042     }
2043     rdmsrl(MSR_FS_BASE, fs);
2044     rdmsrl(MSR_GS_BASE, gs);
2045     PSPRINTK("fs{%lx}\n",fs);
2046     PSPRINTK("gs{%lx}\n",gs);
2047     PSPRINTK("REGS DUMP COMPLETE\n");
2048 }
2049
2050 /**
2051  * @brief Display a tasks thread information.
2052  */
2053 static void dump_thread(struct thread_struct* thread) {
2054     PSPRINTK("DUMP THREAD\n");
2055     PSPRINTK("sp0{%lx}, sp{%lx}\n",thread->sp0,thread->sp);
2056     PSPRINTK("usersp{%lx}\n",thread->usersp);
2057     PSPRINTK("es{%x}\n",thread->es);
2058     PSPRINTK("ds{%x}\n",thread->ds);
2059     PSPRINTK("fsindex{%x}\n",thread->fsindex);
2060     PSPRINTK("gsindex{%x}\n",thread->gsindex);
2061     PSPRINTK("gs{%lx}\n",thread->gs);
2062     PSPRINTK("THREAD DUMP COMPLETE\n");
2063 }
2064
2065 /**
2066  * @brief Display a pte_data_t data structure.
2067  */
2068 static void dump_pte_data(pte_data_t* p) {
2069     PSPRINTK("PTE_DATA\n");
2070     PSPRINTK("vma_id{%x}\n",p->vma_id);
2071     PSPRINTK("clone_request_id{%x}\n",p->clone_request_id);
2072     PSPRINTK("cpu{%x}\n",p->cpu);
2073     PSPRINTK("vaddr_start{%lx}\n",p->vaddr_start);
2074     PSPRINTK("paddr_start{%lx}\n",p->paddr_start);
2075     PSPRINTK("sz{%d}\n",p->sz);
2076 }
2077
2078 /**
2079  * @brief Display a vma_data_t data structure.
2080  */
2081 static void dump_vma_data(vma_data_t* v) {
2082     pte_data_t* p;
2083     PSPRINTK("VMA_DATA\n");
2084     PSPRINTK("start{%lx}\n",v->start);
2085     PSPRINTK("end{%lx}\n",v->end);
2086     PSPRINTK("clone_request_id{%x}\n",v->clone_request_id);
2087     PSPRINTK("cpu{%x}\n",v->cpu);
2088     PSPRINTK("flags{%lx}\n",v->flags);
2089     PSPRINTK("vma_id{%x}\n",v->vma_id);
2090     PSPRINTK("path{%s}\n",v->path);
2091
2092     p = v->pte_list;
2093     while(p) {
2094         dump_pte_data(p);
2095         p = (pte_data_t*)p->header.next;
2096     }
2097 }
2098
2099 /**
2100  * @brief Display a clone_data_t.
2101  */
2102 static void dump_clone_data(clone_data_t* r) {
2103     vma_data_t* v;
2104     PSPRINTK("CLONE REQUEST\n");
2105     PSPRINTK("clone_request_id{%x}\n",r->clone_request_id);
2106     PSPRINTK("clone_flags{%lx}\n",r->clone_flags);
2107     PSPRINTK("stack_start{%lx}\n",r->stack_start);
2108     PSPRINTK("stack_ptr{%lx}\n",r->stack_ptr);
2109     PSPRINTK("env_start{%lx}\n",r->env_start);
2110     PSPRINTK("env_end{%lx}\n",r->env_end);
2111     PSPRINTK("arg_start{%lx}\n",r->arg_start);
2112     PSPRINTK("arg_end{%lx}\n",r->arg_end);
2113     PSPRINTK("heap_start{%lx}\n",r->heap_start);
2114     PSPRINTK("heap_end{%lx}\n",r->heap_end);
2115     PSPRINTK("data_start{%lx}\n",r->data_start);
2116     PSPRINTK("data_end{%lx}\n",r->data_end);
2117     dump_regs(&r->regs);
2118     PSPRINTK("placeholder_pid{%x}\n",r->placeholder_pid);
2119     PSPRINTK("placeholder_tgid{%x}\n",r->placeholder_tgid);
2120     PSPRINTK("thread_fs{%lx}\n",r->thread_fs);
2121     PSPRINTK("thread_gs{%lx}\n",r->thread_gs);
2122     PSPRINTK("thread_sp0{%lx}\n",r->thread_sp0);
2123     PSPRINTK("thread_sp{%lx}\n",r->thread_sp);
2124     PSPRINTK("thread_usersp{%lx}\n",r->thread_usersp);
2125
2126     v = r->vma_list;
2127     while(v) {
2128         dump_vma_data(v);
2129         v = (vma_data_t*)v->header.next;
2130     }
2131 }
2132
2133 /**
2134  * @brief Find a thread count data entry.
2135  * @return Either a thread count request data entry, or NULL if one does 
2136  * not exist that satisfies the parameter requirements.
2137  */
2138 static remote_thread_count_request_data_t* find_remote_thread_count_data(int cpu, 
2139         int id, int requester_pid) {
2140
2141     data_header_t* curr = NULL;
2142     remote_thread_count_request_data_t* request = NULL;
2143     remote_thread_count_request_data_t* ret = NULL;
2144     unsigned long lockflags;
2145
2146     spin_lock_irqsave(&_count_remote_tmembers_data_head_lock,lockflags);
2147
2148     curr = _count_remote_tmembers_data_head;
2149     while(curr) {
2150         request = (remote_thread_count_request_data_t*)curr;
2151         if(request->tgroup_home_cpu == cpu &&
2152            request->tgroup_home_id == id &&
2153            request->requester_pid == requester_pid) {
2154             ret = request;
2155             break;
2156         }
2157         curr = curr->next;
2158     }
2159
2160     spin_unlock_irqrestore(&_count_remote_tmembers_data_head_lock,lockflags);
2161
2162     return ret;
2163 }
2164
2165 /**
2166  * @brief Finds a munmap request data entry.
2167  * @return Either a munmap request data entry, or NULL if one is not
2168  * found that satisfies the parameter requirements.
2169  */
2170 static munmap_request_data_t* find_munmap_request_data(int cpu, int id, 
2171         int requester_pid, unsigned long address) {
2172
2173     data_header_t* curr = NULL;
2174     munmap_request_data_t* request = NULL;
2175     munmap_request_data_t* ret = NULL;
2176     PS_SPIN_LOCK(&_munmap_data_head_lock);
2177     
2178     curr = _munmap_data_head;
2179     while(curr) {
2180         request = (munmap_request_data_t*)curr;
2181         if(request->tgroup_home_cpu == cpu && 
2182                 request->tgroup_home_id == id &&
2183                 request->requester_pid == requester_pid &&
2184                 request->vaddr_start == address) {
2185             ret = request;
2186             break;
2187         }
2188         curr = curr->next;
2189     }
2190
2191     PS_SPIN_UNLOCK(&_munmap_data_head_lock);
2192
2193     return ret;
2194
2195 }
2196
2197 /**
2198  * @brief Finds an mprotect request data entry.
2199  * @return Either a mprotect request data entry, or NULL if one is
2200  * not found that satisfies the parameter requirements.
2201  */
2202 static mprotect_data_t* find_mprotect_request_data(int cpu, int id, 
2203         int requester_pid, unsigned long start) {
2204
2205     data_header_t* curr = NULL;
2206     mprotect_data_t* request = NULL;
2207     mprotect_data_t* ret = NULL;
2208     PS_SPIN_LOCK(&_mprotect_data_head_lock);
2209     
2210     curr = _mprotect_data_head;
2211     while(curr) {
2212         request = (mprotect_data_t*)curr;
2213         if(request->tgroup_home_cpu == cpu && 
2214                 request->tgroup_home_id == id &&
2215                 request->requester_pid == requester_pid &&
2216                 request->start == start) {
2217             ret = request;
2218             break;
2219         }
2220         curr = curr->next;
2221     }
2222
2223     PS_SPIN_UNLOCK(&_mprotect_data_head_lock);
2224
2225     return ret;
2226
2227 }
2228
2229 /**
2230  * @brief Finds a mapping request data entry.
2231  * @return Either a mapping request data entry, or NULL if an entry
2232  * is not found that satisfies the parameter requirements.
2233  */
2234 static mapping_request_data_t* find_mapping_request_data(int cpu, int id, 
2235         int pid, unsigned long address) {
2236
2237     data_header_t* curr = NULL;
2238     mapping_request_data_t* request = NULL;
2239     mapping_request_data_t* ret = NULL;
2240     
2241     curr = _mapping_request_data_head;
2242     while(curr) {
2243         request = (mapping_request_data_t*)curr;
2244         if(request->tgroup_home_cpu == cpu && 
2245                 request->tgroup_home_id == id &&
2246                 request->requester_pid == pid &&
2247                 request->address == address) {
2248             ret = request;
2249             break;
2250         }
2251         curr = curr->next;
2252     }
2253
2254
2255     return ret;
2256 }
2257
2258 /**
2259  * @brief Finds a clone data entry.
2260  * @return Either a clone entry or NULL if one is not found
2261  * that satisfies the parameter requirements.
2262  */
2263 static clone_data_t* find_clone_data(int cpu, int clone_request_id) {
2264     data_header_t* curr = NULL;
2265     clone_data_t* clone = NULL;
2266     clone_data_t* ret = NULL;
2267     PS_SPIN_LOCK(&_data_head_lock);
2268     
2269     curr = _data_head;
2270     while(curr) {
2271         if(curr->data_type == PROCESS_SERVER_CLONE_DATA_TYPE) {
2272             clone = (clone_data_t*)curr;
2273             if(clone->placeholder_cpu == cpu && clone->clone_request_id == clone_request_id) {
2274                 ret = clone;
2275                 break;
2276             }
2277         }
2278         curr = curr->next;
2279     }
2280
2281     PS_SPIN_UNLOCK(&_data_head_lock);
2282
2283     return ret;
2284 }
2285
2286 /**
2287  * @brief Destroys the specified clone data.  It also destroys lists
2288  * that are nested within it.
2289  */
2290 static void destroy_clone_data(clone_data_t* data) {
2291     vma_data_t* vma_data;
2292     pte_data_t* pte_data;
2293     vma_data = data->vma_list;
2294     while(vma_data) {
2295         
2296         // Destroy this VMA's PTE's
2297         pte_data = vma_data->pte_list;
2298         while(pte_data) {
2299
2300             // Remove pte from list
2301             vma_data->pte_list = (pte_data_t*)pte_data->header.next;
2302             if(vma_data->pte_list) {
2303                 vma_data->pte_list->header.prev = NULL;
2304             }
2305
2306             // Destroy pte
2307             kfree(pte_data);
2308
2309             // Next is the new list head
2310             pte_data = vma_data->pte_list;
2311         }
2312         
2313         // Remove vma from list
2314         data->vma_list = (vma_data_t*)vma_data->header.next;
2315         if(data->vma_list) {
2316             data->vma_list->header.prev = NULL;
2317         }
2318
2319         // Destroy vma
2320         kfree(vma_data);
2321
2322         // Next is the new list head
2323         vma_data = data->vma_list;
2324     }
2325
2326     // Destroy clone data
2327     kfree(data);
2328 }
2329
2330 /**
2331  * @brief Finds a vma_data_t entry.
2332  */
2333 static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_start) {
2334
2335     vma_data_t* curr = clone_data->vma_list;
2336     vma_data_t* ret = NULL;
2337
2338     while(curr) {
2339         
2340         if(curr->start == addr_start) {
2341             ret = curr;
2342             break;
2343         }
2344
2345         curr = (vma_data_t*)curr->header.next;
2346     }
2347
2348     return ret;
2349 }
2350
2351 /**
2352  * @brief Callback for page walk that displays the contents of the walk.
2353  */
2354 static int dump_page_walk_pte_entry_callback(pte_t *pte, unsigned long start, 
2355         unsigned long end, struct mm_walk *walk) {
2356
2357     int nx;
2358     int rw;
2359     int user;
2360     int pwt;
2361     int pcd;
2362     int accessed;
2363     int dirty;
2364
2365     if(NULL == pte || !pte_present(*pte)) {                                                                                                                             
2366         return 0;
2367     }
2368
2369     nx       = pte_flags(*pte) & _PAGE_NX       ? 1 : 0;
2370     rw       = pte_flags(*pte) & _PAGE_RW       ? 1 : 0;
2371     user     = pte_flags(*pte) & _PAGE_USER     ? 1 : 0;
2372     pwt      = pte_flags(*pte) & _PAGE_PWT      ? 1 : 0;
2373     pcd      = pte_flags(*pte) & _PAGE_PCD      ? 1 : 0;
2374     accessed = pte_flags(*pte) & _PAGE_ACCESSED ? 1 : 0;
2375     dirty    = pte_flags(*pte) & _PAGE_DIRTY    ? 1 : 0;
2376
2377     PSPRINTK("pte_entry start{%lx}, end{%lx}, phy{%lx}\n",
2378             start,
2379             end,
2380             (unsigned long)(pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1)));
2381
2382     PSPRINTK("\tnx{%d}, ",nx);
2383     PSPRINTK("rw{%d}, ",rw);
2384     PSPRINTK("user{%d}, ",user);
2385     PSPRINTK("pwt{%d}, ",pwt);
2386     PSPRINTK("pcd{%d}, ",pcd);
2387     PSPRINTK("accessed{%d}, ",accessed);
2388     PSPRINTK("dirty{%d}\n",dirty);
2389
2390     return 0;
2391 }
2392
2393 /**
2394  * @brief Displays relevant data within a mm.
2395  */
2396 static void dump_mm(struct mm_struct* mm) {
2397     struct vm_area_struct * curr;
2398     char buf[256];
2399     struct mm_walk walk = {
2400         .pte_entry = dump_page_walk_pte_entry_callback,
2401         .mm = mm,
2402         .private = NULL
2403         };
2404
2405     if(NULL == mm) {
2406         PSPRINTK("MM IS NULL!\n");
2407         return;
2408     }
2409
2410     PS_DOWN_READ(&mm->mmap_sem);
2411
2412     curr = mm->mmap;
2413
2414     PSPRINTK("MM DUMP\n");
2415     PSPRINTK("Stack Growth{%lx}\n",mm->stack_vm);
2416     PSPRINTK("Code{%lx - %lx}\n",mm->start_code,mm->end_code);
2417     PSPRINTK("Brk{%lx - %lx}\n",mm->start_brk,mm->brk);
2418     PSPRINTK("Stack{%lx}\n",mm->start_stack);
2419     PSPRINTK("Arg{%lx - %lx}\n",mm->arg_start,mm->arg_end);
2420     PSPRINTK("Env{%lx - %lx}\n",mm->env_start,mm->env_end);
2421
2422     while(curr) {
2423         if(!curr->vm_file) {
2424             PSPRINTK("Anonymous VM Entry: start{%lx}, end{%lx}, pgoff{%lx}, flags{%lx}\n",
2425                     curr->vm_start, 
2426                     curr->vm_end,
2427                     curr->vm_pgoff,
2428                     curr->vm_flags);
2429             // walk    
2430             walk_page_range(curr->vm_start,curr->vm_end,&walk);
2431         } else {
2432             PSPRINTK("Page VM Entry: start{%lx}, end{%lx}, pgoff{%lx}, path{%s}, flags{%lx}\n",
2433                     curr->vm_start,
2434                     curr->vm_end,
2435                     curr->vm_pgoff,
2436                     d_path(&curr->vm_file->f_path,buf, 256),
2437                     curr->vm_flags);
2438             walk_page_range(curr->vm_start,curr->vm_end,&walk);
2439         }
2440         curr = curr->vm_next;
2441     }
2442
2443     PS_UP_READ(&mm->mmap_sem);
2444 }
2445
2446 /**
2447  * Data library
2448  */
2449
2450 /**
2451  * @brief Add data entry.
2452  */
2453 static void add_data_entry_to(void* entry, spinlock_t* lock, data_header_t** head) {
2454     data_header_t* hdr = (data_header_t*)entry;
2455     data_header_t* curr = NULL;
2456
2457     if(!entry) {
2458         return;
2459     }
2460
2461     // Always clear out the link information
2462     hdr->next = NULL;
2463     hdr->prev = NULL;
2464
2465     PS_SPIN_LOCK(lock);
2466     
2467     if (!*head) {
2468         *head = hdr;
2469         hdr->next = NULL;
2470         hdr->prev = NULL;
2471     } else {
2472         curr = *head;
2473         while(curr->next != NULL) {
2474             if(curr == entry) {
2475                 return;// It's already in the list!
2476             }
2477             curr = curr->next;
2478         }
2479         // Now curr should be the last entry.
2480         // Append the new entry to curr.
2481         curr->next = hdr;
2482         hdr->next = NULL;
2483         hdr->prev = curr;
2484     }
2485
2486     PS_SPIN_UNLOCK(lock);
2487 }
2488
2489 /**
2490  * @brief Remove a data entry
2491  * @prerequisite Requires user to hold lock
2492  */
2493 static void remove_data_entry_from(void* entry, data_header_t** head) {
2494     data_header_t* hdr = entry;
2495
2496     if(!entry) {
2497         return;
2498     }
2499
2500     if(*head == hdr) {
2501         *head = hdr->next;
2502     }
2503
2504     if(hdr->next) {
2505         hdr->next->prev = hdr->prev;
2506     }
2507
2508     if(hdr->prev) {
2509         hdr->prev->next = hdr->next;
2510     }
2511
2512     hdr->prev = NULL;
2513     hdr->next = NULL;
2514
2515 }
2516
2517 /**
2518  * @brief Add data entry
2519  */
2520 static void add_data_entry(void* entry) {
2521     data_header_t* hdr = (data_header_t*)entry;
2522     data_header_t* curr = NULL;
2523     unsigned long lockflags;
2524
2525     if(!entry) {
2526         return;
2527     }
2528
2529     // Always clear out the link information
2530     hdr->next = NULL;
2531     hdr->prev = NULL;
2532
2533     spin_lock_irqsave(&_data_head_lock,lockflags);
2534     
2535     if (!_data_head) {
2536         _data_head = hdr;
2537         hdr->next = NULL;
2538         hdr->prev = NULL;
2539     } else {
2540         curr = _data_head;
2541         while(curr->next != NULL) {
2542             if(curr == entry) {
2543                 return;// It's already in the list!
2544             }
2545             curr = curr->next;
2546         }
2547         // Now curr should be the last entry.
2548         // Append the new entry to curr.
2549         curr->next = hdr;
2550         hdr->next = NULL;
2551         hdr->prev = curr;
2552     }
2553
2554     spin_unlock_irqrestore(&_data_head_lock,lockflags);
2555 }
2556
2557 /**
2558  * @brief Remove a data entry.
2559  * @prerequisite Requires user to hold _data_head_lock.
2560  */
2561 static void remove_data_entry(void* entry) {
2562     data_header_t* hdr = entry;
2563
2564     if(!entry) {
2565         return;
2566     }
2567
2568     if(_data_head == hdr) {
2569         _data_head = hdr->next;
2570     }
2571
2572     if(hdr->next) {
2573         hdr->next->prev = hdr->prev;
2574     }
2575
2576     if(hdr->prev) {
2577         hdr->prev->next = hdr->next;
2578     }
2579
2580     hdr->prev = NULL;
2581     hdr->next = NULL;
2582
2583 }
2584
2585 /**
2586  * @brief Print information about the list.
2587  */
2588 static void dump_data_list(void) {
2589     data_header_t* curr = NULL;
2590     pte_data_t* pte_data = NULL;
2591     vma_data_t* vma_data = NULL;
2592     clone_data_t* clone_data = NULL;
2593
2594     PS_SPIN_LOCK(&_data_head_lock);
2595
2596     curr = _data_head;
2597
2598     PSPRINTK("DATA LIST:\n");
2599     while(curr) {
2600         switch(curr->data_type) {
2601         case PROCESS_SERVER_VMA_DATA_TYPE:
2602             vma_data = (vma_data_t*)curr;
2603             PSPRINTK("VMA DATA: start{%lx}, end{%lx}, crid{%d}, vmaid{%d}, cpu{%d}, pgoff{%lx}\n",
2604                     vma_data->start,
2605                     vma_data->end,
2606                     vma_data->clone_request_id,
2607                     vma_data->vma_id, 
2608                     vma_data->cpu, 
2609                     vma_data->pgoff);
2610             break;
2611         case PROCESS_SERVER_PTE_DATA_TYPE:
2612             pte_data = (pte_data_t*)curr;
2613             PSPRINTK("PTE DATA: vaddr_start{%lx}, paddr_start{%lx}, sz{%d}, vmaid{%d}, cpu{%d}\n",
2614                     pte_data->vaddr_start,
2615                     pte_data->paddr_start,
2616                     pte_data->sz,
2617                     pte_data->vma_id,
2618                     pte_data->cpu);
2619             break;
2620         case PROCESS_SERVER_CLONE_DATA_TYPE:
2621             clone_data = (clone_data_t*)curr;
2622             PSPRINTK("CLONE DATA: flags{%lx}, stack_start{%lx}, heap_start{%lx}, heap_end{%lx}, ip{%lx}, crid{%d}\n",
2623                     clone_data->clone_flags,
2624                     clone_data->stack_start,
2625                     clone_data->heap_start,
2626                     clone_data->heap_end,
2627                     clone_data->regs.ip,
2628                     clone_data->clone_request_id);
2629             break;
2630         default:
2631             break;
2632         }
2633         curr = curr->next;
2634     }
2635
2636     PS_SPIN_UNLOCK(&_data_head_lock);
2637 }
2638
2639 /**
2640  * @brief Counts remote thread group members.
2641  * @return The number of remote thread group members in the
2642  * specified distributed thread group.
2643  * <MEASURE perf_count_remote_thread_members>
2644  */
2645 static int count_remote_thread_members(int exclude_t_home_cpu,
2646                                        int exclude_t_home_id) {
2647
2648     int tgroup_home_cpu = current->tgroup_home_cpu;
2649     int tgroup_home_id  = current->tgroup_home_id;
2650     remote_thread_count_request_data_t* data;
2651     remote_thread_count_request_t request;
2652     int i;
2653     int s;
2654     int ret = -1;
2655     int perf = -1;
2656     unsigned long lockflags;
2657
2658     perf = PERF_MEASURE_START(&perf_count_remote_thread_members);
2659
2660     PSPRINTK("%s: entered\n",__func__);
2661
2662     data = kmalloc(sizeof(remote_thread_count_request_data_t),GFP_KERNEL);
2663     if(!data) goto exit;
2664
2665     data->header.data_type = PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE;
2666     data->responses = 0;
2667     data->expected_responses = 0;
2668     data->tgroup_home_cpu = tgroup_home_cpu;
2669     data->tgroup_home_id = tgroup_home_id;
2670     data->requester_pid = current->pid;
2671     data->count = 0;
2672     spin_lock_init(&data->lock);
2673
2674     add_data_entry_to(data,
2675                       &_count_remote_tmembers_data_head_lock,
2676                       &_count_remote_tmembers_data_head);
2677
2678     request.header.type = PCN_KMSG_TYPE_PROC_SRV_THREAD_COUNT_REQUEST;
2679     request.header.prio = PCN_KMSG_PRIO_NORMAL;
2680     request.tgroup_home_cpu = current->tgroup_home_cpu; //TODO why not tgroup_home_cpu?!?!
2681     request.tgroup_home_id  = current->tgroup_home_id; //TODO why not tgroup_home_id?!?!
2682     request.requester_pid = data->requester_pid;
2683
2684 #ifndef SUPPORT_FOR_CLUSTERING
2685     for(i = 0; i < NR_CPUS; i++) {
2686         // Skip the current cpu
2687         if(i == _cpu) continue;
2688 #else
2689     // the list does not include the current processor group descirptor (TODO)
2690     struct list_head *iter;
2691     _remote_cpu_info_list_t *objPtr;
2692 extern struct list_head rlist_head;
2693     list_for_each(iter, &rlist_head) {
2694         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
2695         i = objPtr->_data._processor;
2696 #endif
2697         // Send the request to this cpu.
2698         s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&request));
2699         if(!s) {
2700             // A successful send operation, increase the number
2701             // of expected responses.
2702             data->expected_responses++;
2703         }
2704     }
2705
2706     PSPRINTK("%s: waiting on %d responses\n",__func__,data->expected_responses);
2707
2708     // Wait for all cpus to respond.
2709     while(data->expected_responses != data->responses) {
2710         schedule();
2711     }
2712
2713     // OK, all responses are in, we can proceed.
2714     ret = data->count;
2715
2716     PSPRINTK("%s: found a total of %d remote threads in group\n",__func__,
2717             data->count);
2718
2719     spin_lock_irqsave(&_count_remote_tmembers_data_head_lock,lockflags);
2720     remove_data_entry_from(data,
2721                            &_count_remote_tmembers_data_head);
2722     spin_unlock_irqrestore(&_count_remote_tmembers_data_head_lock,lockflags);
2723
2724     kfree(data);
2725
2726 exit:
2727     PERF_MEASURE_STOP(&perf_count_remote_thread_members," ",perf);
2728     return ret;
2729 }
2730
2731 /**
2732  * @brief Counts the number of local thread group members for the specified
2733  * distributed thread group.
2734  */
2735 static int count_local_thread_members(int tgroup_home_cpu, 
2736         int tgroup_home_id, int exclude_pid) {
2737
2738     struct task_struct *task, *g;
2739     int count = 0;
2740     PSPRINTK("%s: entered\n",__func__);
2741     do_each_thread(g,task) {
2742         if(task->tgroup_home_id == tgroup_home_id &&
2743            task->tgroup_home_cpu == tgroup_home_cpu &&
2744            task->t_home_cpu == _cpu &&
2745            task->pid != exclude_pid &&
2746            task->exit_state != EXIT_ZOMBIE &&
2747            task->exit_state != EXIT_DEAD &&
2748            !(task->flags & PF_EXITING)) {
2749
2750                 count++;
2751             
2752         }
2753     } while_each_thread(g,task);
2754     PSPRINTK("%s: exited\n",__func__);
2755
2756     return count;
2757
2758 }
2759
2760 /**
2761  * @brief Counts the number of local and remote thread group members for the
2762  * thread group in which the "current" task resides.
2763  * @return The number of threads.
2764  */
2765 static int count_thread_members() {
2766      
2767     int count = 0;
2768     PSPRINTK("%s: entered\n",__func__);
2769     count += count_local_thread_members(current->tgroup_home_cpu, current->tgroup_home_id,current->pid);
2770     count += count_remote_thread_members(current->tgroup_home_cpu, current->tgroup_home_id);
2771     PSPRINTK("%s: exited\n",__func__);
2772     return count;
2773 }
2774
2775
2776 /*
2777  * @brief Process notification of a thread group closing.
2778  * This function will wait for any locally executing thread group
2779  * members to exit.  It will then clean up all local resources
2780  * dedicated to the thread group that has exited.
2781  *
2782  * <MEASURE perf_process_tgroup_closed_item>
2783  */
2784
2785 void process_tgroup_closed_item(struct work_struct* work) {
2786
2787     tgroup_closed_work_t* w = (tgroup_closed_work_t*) work;
2788     data_header_t *curr, *next;
2789     mm_data_t* mm_data;
2790     struct task_struct *g, *task;
2791     unsigned char tgroup_closed = 0;
2792     int perf = -1;
2793     mm_data_t* to_remove = NULL;
2794
2795     perf = PERF_MEASURE_START(&perf_process_tgroup_closed_item);
2796
2797     PSPRINTK("%s: entered\n",__func__);
2798     PSPRINTK("%s: received group exit notification\n",__func__);
2799
2800     PSPRINTK("%s: waiting for all members of this distributed thread group to finish\n",__func__);
2801     while(!tgroup_closed) {
2802         unsigned char pass = 0;
2803         do_each_thread(g,task) {
2804             if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
2805                task->tgroup_home_id  == w->tgroup_home_id) {
2806                 
2807                 // there are still living tasks within this distributed thread group
2808                 // wait a bit
2809                 schedule();
2810                 pass = 1;
2811             }
2812
2813         } while_each_thread(g,task);
2814         if(!pass) {
2815             tgroup_closed = 1;
2816         } else {
2817             PSPRINTK("%s: waiting for tgroup close out\n",__func__);
2818         }
2819     }
2820
2821 loop:
2822     spin_lock(&_saved_mm_head_lock);
2823     // Remove all saved mm's for this thread group.
2824     curr = _saved_mm_head;
2825     while(curr) {
2826         mm_data = (mm_data_t*)curr;
2827         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
2828            mm_data->tgroup_home_id  == w->tgroup_home_id) {
2829             remove_data_entry_from(curr,&_saved_mm_head);
2830             to_remove = mm_data;
2831             goto found;
2832         }
2833         curr = curr->next;
2834     }
2835 found:
2836     spin_unlock(&_saved_mm_head_lock);
2837
2838     if(to_remove != NULL) {
2839         PSPRINTK("%s: removing a mm from cpu{%d} id{%d}\n",
2840                 __func__,
2841                 w->tgroup_home_cpu,
2842                 w->tgroup_home_id);
2843         
2844         BUG_ON(to_remove->mm == NULL);
2845         mmput(to_remove->mm);
2846         kfree(to_remove);
2847         to_remove = NULL;
2848         goto loop;
2849     }
2850
2851     kfree(work);
2852
2853     PERF_MEASURE_STOP(&perf_process_tgroup_closed_item," ",perf);
2854 }
2855
2856 /**
2857  * @brief Determine if the specified vma can have cow mapings.
2858  * @return 1 = yes, 0 = no.
2859  */
2860 static int is_maybe_cow(struct vm_area_struct* vma) {
2861     if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
2862         // Not a cow vma
2863         return 0;
2864     }
2865
2866     if(!(vma->vm_flags & VM_WRITE)) {
2867         return 0;
2868     }
2869
2870     return 1;
2871 }
2872
2873 /**
2874  * @brief Break the COW page that contains "address", iff that page
2875  * is a COW page.
2876  * @return 1 = handled, 0 = not handled.
2877  * @prerequisite Caller must grab mm->mmap_sem
2878  */
2879 static int break_cow(struct mm_struct *mm, struct vm_area_struct* vma, unsigned long address) {
2880     pgd_t *pgd = NULL;
2881     pud_t *pud = NULL;
2882     pmd_t *pmd = NULL;
2883     pte_t *ptep = NULL;
2884     pte_t pte;
2885     spinlock_t* ptl;
2886
2887     //PSPRINTK("%s: entered\n",__func__);
2888
2889     // if it's not a cow mapping, return.
2890     if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
2891         goto not_handled;
2892     }
2893
2894     // if it's not writable in vm_flags, return.
2895     if(!(vma->vm_flags & VM_WRITE)) {
2896         goto not_handled;
2897     }
2898
2899     pgd = pgd_offset(mm, address);
2900     if(!pgd_present(*pgd)) {
2901         goto not_handled_unlock;
2902     }
2903
2904     pud = pud_offset(pgd,address);
2905     if(!pud_present(*pud)) {
2906         goto not_handled_unlock;
2907     }
2908
2909     pmd = pmd_offset(pud,address);
2910     if(!pmd_present(*pmd)) {
2911         goto not_handled_unlock;
2912     }
2913
2914     ptep = pte_offset_map(pmd,address);
2915     if(!ptep || !pte_present(*ptep) || pte_none(*ptep)) {
2916         pte_unmap(ptep);
2917         goto not_handled_unlock;
2918     }
2919
2920     pte = *ptep;
2921
2922     if(pte_write(pte)) {
2923         goto not_handled_unlock;
2924     }
2925     
2926     // break the cow!
2927     ptl = pte_lockptr(mm,pmd);
2928     PS_SPIN_LOCK(ptl);
2929    
2930     PSPRINTK("%s: proceeding\n",__func__);
2931     do_wp_page(mm,vma,address,ptep,pmd,ptl,pte);
2932
2933
2934     // NOTE:
2935     // Do not call pte_unmap_unlock(ptep,ptl), since do_wp_page does that!
2936     
2937     goto handled;
2938
2939 not_handled_unlock:
2940 not_handled:
2941     return 0;
2942 handled:
2943     return 1;
2944 }
2945
2946 /**
2947  * @brief Process a request made by a remote CPU for a mapping.  This function
2948  * will search for mm's for the specified distributed thread group, and if found,
2949  * will search that mm for entries that contain the address that was asked for.
2950  * Prefetch is implemented in this function, so not only will the page that
2951  * is asked for be communicated, but the entire contiguous range of virtual to
2952  * physical addresses that the specified address lives in will be communicated.
2953  * Other contiguous regions may also be communicated if they exist.  This is
2954  * prefetch.
2955  *
2956  * <MEASURED perf_process_mapping_request>
2957  */
2958 void process_mapping_request(struct work_struct* work) {
2959     mapping_request_work_t* w = (mapping_request_work_t*) work;
2960     mapping_response_t response;
2961     data_header_t* data_curr = NULL;
2962     mm_data_t* mm_data = NULL;
2963     struct task_struct* task = NULL;
2964     struct task_struct* g;
2965     struct vm_area_struct* vma = NULL;
2966     struct mm_struct* mm = NULL;
2967     unsigned long address = w->address;
2968     unsigned long resolved = 0;
2969     struct mm_walk walk = {
2970         .pte_entry = vm_search_page_walk_pte_entry_callback,
2971         .private = &(resolved)
2972     };
2973     char* plpath = NULL;
2974     char lpath[512];
2975     int i;
2976     
2977     // for perf
2978     int used_saved_mm = 0;
2979     int found_vma = 1;
2980     int found_pte = 1;
2981     
2982     // Perf start
2983     int perf = PERF_MEASURE_START(&perf_process_mapping_request);
2984
2985     //PSPRINTK("%s: entered\n",__func__);
2986     PSPRINTK("received mapping request from {%d} address{%lx}, cpu{%d}, id{%d}\n",
2987             w->from_cpu,
2988             w->address,
2989             w->tgroup_home_cpu,
2990             w->tgroup_home_id);
2991
2992     // First, search through existing processes
2993     do_each_thread(g,task) {
2994         if((task->tgroup_home_cpu == w->tgroup_home_cpu) &&
2995            (task->tgroup_home_id  == w->tgroup_home_id )) {
2996             //PSPRINTK("mapping request found common thread group here\n");
2997             mm = task->mm;
2998
2999             // Take note of the fact that an mm exists on the remote kernel
3000             set_cpu_has_known_tgroup_mm(task, w->from_cpu);
3001
3002             goto task_mm_search_exit;
3003         }
3004     } while_each_thread(g,task);
3005 task_mm_search_exit:
3006
3007     // Failing the process search, look through saved mm's.
3008     if(!mm) {
3009         PS_SPIN_LOCK(&_saved_mm_head_lock);
3010         data_curr = _saved_mm_head;
3011         while(data_curr) {
3012
3013             mm_data = (mm_data_t*)data_curr;
3014             
3015             if((mm_data->tgroup_home_cpu == w->tgroup_home_cpu) &&
3016                (mm_data->tgroup_home_id  == w->tgroup_home_id)) {
3017                 PSPRINTK("%s: Using saved mm to resolve mapping\n",__func__);
3018                 mm = mm_data->mm;
3019                 used_saved_mm = 1;
3020                 break;
3021             }
3022
3023             data_curr = data_curr->next;
3024
3025         } // while
3026
3027         PS_SPIN_UNLOCK(&_saved_mm_head_lock);
3028     }
3029     
3030     // OK, if mm was found, look up the mapping.
3031     if(mm) {
3032
3033         // The purpose of this code block is to determine
3034         // if we need to use a read or write lock, and safely.  
3035         // implement whatever lock type we decided we needed.  We
3036         // prefer to use read locks, since then we can service
3037         // more than one mapping request at the same time.  However,
3038         // if we are going to do any cow break operations, we 
3039         // must lock for write.
3040         int can_be_cow = 0;
3041         int first = 1;
3042 changed_can_be_cow:
3043         if(can_be_cow)
3044             PS_DOWN_WRITE(&mm->mmap_sem);
3045         else 
3046             PS_DOWN_READ(&mm->mmap_sem);
3047         vma = find_vma_checked(mm, address);
3048         if(vma && first) {
3049             first = 0;
3050             if(is_maybe_cow(vma)) {
3051                 can_be_cow = 1;
3052                 PS_UP_READ(&mm->mmap_sem);
3053                 goto changed_can_be_cow;
3054             }
3055         }
3056
3057         walk.mm = mm;
3058         walk_page_range(address & PAGE_MASK, 
3059                 (address & PAGE_MASK) + PAGE_SIZE, &walk);
3060
3061         if(vma && resolved != 0) {
3062
3063             PSPRINTK("mapping found! %lx for vaddr %lx\n",resolved,
3064                     address & PAGE_MASK);
3065
3066             /*
3067              * Find regions of consecutive physical memory
3068              * in this vma, including the faulting address
3069              * if possible.
3070              */
3071             {
3072             // Break all cows in this vma
3073             if(can_be_cow) {
3074                 unsigned long cow_addr;
3075                 for(cow_addr = vma->vm_start; cow_addr < vma->vm_end; cow_addr += PAGE_SIZE) {
3076                     break_cow(mm, vma, cow_addr);
3077                 }
3078                 // We no longer need a write lock after the break_cow process
3079                 // is complete, so downgrade the lock to a read lock.
3080                 downgrade_write(&mm->mmap_sem);
3081             }
3082
3083
3084             // Now grab all the mappings that we can stuff into the response.
3085             if(0 != fill_physical_mapping_array(mm, 
3086                                                 vma,
3087                                                 address,
3088                                                 &response.mappings, 
3089                                                 MAX_MAPPINGS)) {
3090                 // If the fill process fails, clear out all
3091                 // results.  Otherwise, we might trick the
3092                 // receiving cpu into thinking the target
3093                 // mapping was found when it was not.
3094                 for(i = 0; i < MAX_MAPPINGS; i++) {
3095                     response.mappings[i].present = 0;
3096                     response.mappings[i].vaddr = 0;
3097                     response.mappings[i].paddr = 0;
3098                     response.mappings[i].sz = 0;
3099                 }
3100                     
3101             }
3102
3103             }
3104
3105             response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
3106             response.header.prio = PCN_KMSG_PRIO_NORMAL;
3107             response.tgroup_home_cpu = w->tgroup_home_cpu;
3108             response.tgroup_home_id = w->tgroup_home_id;
3109             response.requester_pid = w->requester_pid;
3110             response.address = address;
3111             response.present = 1;
3112             response.vaddr_start = vma->vm_start;
3113             response.vaddr_size = vma->vm_end - vma->vm_start;
3114             response.prot = vma->vm_page_prot;
3115             response.vm_flags = vma->vm_flags;
3116             if(vma->vm_file == NULL) {
3117                 response.path[0] = '\0';
3118             } else {    
3119                 plpath = d_path(&vma->vm_file->f_path,lpath,512);
3120                 strcpy(response.path,plpath);
3121                 response.pgoff = vma->vm_pgoff;
3122             }
3123
3124             // We modified this lock to be read-mode above so now
3125             // we can do a read-unlock instead of a write-unlock
3126             PS_UP_READ(&mm->mmap_sem);
3127        
3128         } else {
3129
3130             if(can_be_cow)
3131                 PS_UP_WRITE(&mm->mmap_sem);
3132             else
3133                 PS_UP_READ(&mm->mmap_sem);
3134             // Zero out mappings
3135             for(i = 0; i < MAX_MAPPINGS; i++) {
3136                 response.mappings[i].present = 0;
3137                 response.mappings[i].vaddr = 0;
3138                 response.mappings[i].paddr = 0;
3139                 response.mappings[i].sz = 0;
3140             }
3141
3142         }
3143         
3144
3145     }
3146
3147     // Not found, respond accordingly
3148     if(resolved == 0) {
3149         found_vma = 0;
3150         found_pte = 0;
3151         //PSPRINTK("Mapping not found\n");
3152         response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
3153         response.header.prio = PCN_KMSG_PRIO_NORMAL;
3154         response.tgroup_home_cpu = w->tgroup_home_cpu;
3155         response.tgroup_home_id = w->tgroup_home_id;
3156         response.requester_pid = w->requester_pid;
3157         response.address = address;
3158         response.present = 0;
3159         response.vaddr_start = 0;
3160         response.vaddr_size = 0;
3161         response.path[0] = '\0';
3162
3163         // Handle case where vma was present but no pte.
3164         if(vma) {
3165             //PSPRINTK("But vma present\n");
3166             found_vma = 1;
3167             response.present = 1;
3168             response.vaddr_start = vma->vm_start;
3169             response.vaddr_size = vma->vm_end - vma->vm_start;
3170             response.prot = vma->vm_page_prot;
3171             response.vm_flags = vma->vm_flags;
3172              if(vma->vm_file == NULL) {
3173                  response.path[0] = '\0';
3174              } else {    
3175                  plpath = d_path(&vma->vm_file->f_path,lpath,512);
3176                  strcpy(response.path,plpath);
3177                  response.pgoff = vma->vm_pgoff;
3178              }
3179         }
3180     }
3181
3182     // Send response
3183     if(response.present) {
3184         DO_UNTIL_SUCCESS(pcn_kmsg_send_long(w->from_cpu,
3185                             (struct pcn_kmsg_long_message*)(&response),
3186                             sizeof(mapping_response_t) - 
3187                             sizeof(struct pcn_kmsg_hdr) -   //
3188                             sizeof(response.path) +         // Chop off the end of the path
3189                             strlen(response.path) + 1));    // variable to save bandwidth.
3190     } else {
3191         // This is an optimization to get rid of the _long send 
3192         // which is a time sink.
3193         nonpresent_mapping_response_t nonpresent_response;
3194         nonpresent_response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE_NONPRESENT;
3195         nonpresent_response.header.prio = PCN_KMSG_PRIO_NORMAL;
3196         nonpresent_response.tgroup_home_cpu = w->tgroup_home_cpu;
3197         nonpresent_response.tgroup_home_id  = w->tgroup_home_id;
3198         nonpresent_response.requester_pid = w->requester_pid;
3199         nonpresent_response.address = w->address;
3200         DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,(struct pcn_kmsg_message*)(&nonpresent_response)));
3201
3202     }
3203
3204     kfree(work);
3205
3206     // Perf stop
3207     if(used_saved_mm && found_vma && found_pte) {
3208         PERF_MEASURE_STOP(&perf_process_mapping_request,
3209                 "Saved MM + VMA + PTE",
3210                 perf);
3211     } else if (used_saved_mm && found_vma && !found_pte) {
3212         PERF_MEASURE_STOP(&perf_process_mapping_request,
3213                 "Saved MM + VMA + no PTE",
3214                 perf);
3215     } else if (used_saved_mm && !found_vma) {
3216         PERF_MEASURE_STOP(&perf_process_mapping_request,
3217                 "Saved MM + no VMA",
3218                 perf);
3219     } else if (!used_saved_mm && found_vma && found_pte) {
3220         PERF_MEASURE_STOP(&perf_process_mapping_request,
3221                 "VMA + PTE",
3222                 perf);
3223     } else if (!used_saved_mm && found_vma && !found_pte) {
3224         PERF_MEASURE_STOP(&perf_process_mapping_request,
3225                 "VMA + no PTE",
3226                 perf);
3227     } else if (!used_saved_mm && !found_vma) {
3228         PERF_MEASURE_STOP(&perf_process_mapping_request,
3229                 "no VMA",
3230                 perf);
3231     } else {
3232         PERF_MEASURE_STOP(&perf_process_mapping_request,"ERR",perf);
3233     }
3234
3235     return;
3236 }
3237
3238 unsigned long long perf_aa, perf_bb, perf_cc, perf_dd, perf_ee;
3239
3240 /**
3241  * @brief Process notification that a task has exited.  This function
3242  * sets the "return disposition" of the task, then wakes the task.
3243  * In this case, the "return disposition" specifies that the task
3244  * is exiting.  When the task resumes execution, it consults its
3245  * return disposition and acts accordingly - and invokes do_exit.
3246  *
3247  * <MEASURE perf_process_exit_item>
3248  */
3249 void process_exit_item(struct work_struct* work) {
3250     exit_work_t* w = (exit_work_t*) work;
3251     pid_t pid = w->pid;
3252     struct task_struct *task = w->task;
3253
3254     int perf = PERF_MEASURE_START(&perf_process_exit_item);
3255
3256     if(unlikely(!task)) {
3257         printk("%s: ERROR - empty task\n",__func__);
3258         kfree(work);
3259         PERF_MEASURE_STOP(&perf_process_exit_item,"ERROR",perf);
3260         return;
3261     }
3262
3263     if(unlikely(task->pid != pid)) {
3264         printk("%s: ERROR - wrong task picked\n",__func__);
3265         kfree(work);
3266         PERF_MEASURE_STOP(&perf_process_exit_item,"ERROR",perf);
3267         return;
3268     }
3269     
3270     PSPRINTK("%s: process to kill %ld\n", __func__, (long)pid);
3271     PSPRINTK("%s: for_each_process Found task to kill, killing\n", __func__);
3272     PSPRINTK("%s: killing task - is_last_tgroup_member{%d}\n",
3273             __func__,
3274             w->is_last_tgroup_member);
3275
3276     // Now we're executing locally, so update our records
3277     //if(task->t_home_cpu == _cpu && task->t_home_id == task->pid)
3278     //    task->represents_remote = 0;
3279
3280     // Set the return disposition
3281     task->return_disposition = RETURN_DISPOSITION_EXIT;
3282
3283     wake_up_process(task);
3284
3285     kfree(work);
3286
3287     PERF_MEASURE_STOP(&perf_process_exit_item," ",perf);
3288 }
3289
3290 /**
3291  * @brief Process a group exit request.  This function
3292  * issues SIGKILL to all locally executing members of the specified
3293  * distributed thread group.  Only tasks that are actively
3294  * executing on this CPU will receive the SIGKILL.  Shadow tasks
3295  * will not be sent SIGKILL.  Group exit requests are sent to
3296  * all CPUs, so for shadow tasks, another CPU will issue the
3297  * SIGKILL.  When that occurs, the normal exit process will be
3298  * initiated for that task, and eventually, all of its shadow
3299  * tasks will be killed.
3300  */
3301 void process_group_exit_item(struct work_struct* work) {
3302     group_exit_work_t* w = (group_exit_work_t*) work;
3303     struct task_struct *task = NULL;
3304     struct task_struct *g;
3305     unsigned long flags;
3306
3307     //int perf = PERF_MEASURE_START(&perf_process_group_exit_item);
3308     PSPRINTK("%s: entered\n",__func__);
3309     PSPRINTK("exit group target id{%d}, cpu{%d}\n",
3310             w->tgroup_home_id, w->tgroup_home_cpu);
3311
3312     do_each_thread(g,task) {
3313         if(task->tgroup_home_id == w->tgroup_home_id &&
3314            task->tgroup_home_cpu == w->tgroup_home_cpu) {
3315             
3316             if (!task->represents_remote) { //similar to zap_other_threads
3317                                 exit_robust_list(task);
3318                                 task->robust_list = NULL;
3319                                 // active, send sigkill
3320                                 lock_task_sighand(task, &flags);
3321
3322                                 task_clear_jobctl_pending(task, JOBCTL_PENDING_MASK);
3323                                 sigaddset(&task->pending.signal, SIGKILL);
3324                                 signal_wake_up(task, 1);
3325                                 clear_ti_thread_flag(task, _TIF_USER_RETURN_NOTIFY);
3326
3327                                 unlock_task_sighand(task, &flags);
3328
3329                         }
3330
3331             // If it is a shadow task, it will eventually
3332             // get killed when its corresponding active task
3333             // is killed.
3334
3335         }
3336     } while_each_thread(g,task);
3337     
3338     kfree(work);
3339
3340     PSPRINTK("%s: exiting\n",__func__);
3341     //PERF_MEASURE_STOP(&perf_process_group_exit_item," ",perf);
3342
3343 }
3344
3345
3346 /**
3347  * @brief Process request to unmap a region of memory from a distributed
3348  * thread group.  Look for local thread group members and carry out the
3349  * requested action.
3350  *
3351  * <MEASURE perf_process_munmap_request>
3352  */
3353 void process_munmap_request(struct work_struct* work) {
3354     munmap_request_work_t* w = (munmap_request_work_t*)work;
3355     munmap_response_t response;
3356     struct task_struct *task, *g;
3357     data_header_t *curr = NULL;
3358     mm_data_t* mm_data = NULL;
3359     mm_data_t* to_munmap = NULL;
3360     struct mm_struct * mm_to_munmap = NULL;
3361
3362     int perf = PERF_MEASURE_START(&perf_process_munmap_request);
3363
3364     PSPRINTK("%s: entered\n",__func__);
3365
3366     // munmap the specified region in the specified thread group
3367     read_lock(&tasklist_lock);
3368     do_each_thread(g,task) {
3369
3370         // Look for the thread group
3371         if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
3372            task->tgroup_home_id  == w->tgroup_home_id &&
3373            !(task->flags & PF_EXITING)) {
3374
3375             // Thread group has been found, perform munmap operation on this
3376             // task.
3377          if (task && task->mm ) {
3378             mm_to_munmap =task->mm;
3379         }
3380         else
3381                 printk("%s: pirla\n", __func__);
3382
3383         // TODO try and check if make sense
3384             // Take note of the fact that an mm exists on the remote kernel
3385             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
3386
3387             goto done; // thread grouping - threads all share a common mm.
3388
3389         }
3390     } while_each_thread(g,task);
3391 done:
3392     read_unlock(&tasklist_lock);
3393
3394       if(mm_to_munmap) {
3395          PS_DOWN_WRITE(&task->mm->mmap_sem);
3396          current->enable_distributed_munmap = 0;
3397          do_munmap(mm_to_munmap, w->vaddr_start, w->vaddr_size);
3398          current->enable_distributed_munmap = 1;
3399          PS_UP_WRITE(&task->mm->mmap_sem);
3400          }
3401     // munmap the specified region in any saved mm's as well.
3402     // This keeps old mappings saved in the mm of dead thread
3403     // group members from being resolved accidentally after
3404     // being munmap()ped, as that would cause security/coherency
3405     // problems.
3406     PS_SPIN_LOCK(&_saved_mm_head_lock);
3407     curr = _saved_mm_head;
3408     while(curr) {
3409         mm_data = (mm_data_t*)curr;
3410         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
3411            mm_data->tgroup_home_id  == w->tgroup_home_id) {
3412            
3413             to_munmap = mm_data;
3414             goto found;
3415
3416         }
3417         curr = curr->next;
3418     }
3419 found:
3420     PS_SPIN_UNLOCK(&_saved_mm_head_lock);
3421
3422     if (to_munmap && to_munmap->mm) {
3423         PS_DOWN_WRITE(&to_munmap->mm->mmap_sem);
3424         current->enable_distributed_munmap = 0;
3425         do_munmap(to_munmap->mm, w->vaddr_start, w->vaddr_size);
3426         current->enable_distributed_munmap = 1;
3427         if (to_munmap && to_munmap->mm)
3428             PS_UP_WRITE(&to_munmap->mm->mmap_sem);
3429         else
3430             printk(KERN_ALERT"%s: ERROR2: to_munmap %p mm %p\n", __func__, to_munmap, to_munmap?to_munmap->mm:0);
3431     }
3432     else if (to_munmap) // It is OK for to_munmap to be null, but not to_munmap->mm
3433         printk(KERN_ALERT"%s: ERROR1: to_munmap %p mm %p\n", __func__, to_munmap, to_munmap?to_munmap->mm:0);
3434
3435     // Construct response
3436     response.header.type = PCN_KMSG_TYPE_PROC_SRV_MUNMAP_RESPONSE;
3437     response.header.prio = PCN_KMSG_PRIO_NORMAL;
3438     response.tgroup_home_cpu = w->tgroup_home_cpu;
3439     response.tgroup_home_id = w->tgroup_home_id;
3440     response.requester_pid = w->requester_pid;
3441     response.vaddr_start = w->vaddr_start;
3442     response.vaddr_size = w->vaddr_size;
3443     
3444     // Send response
3445     DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,
3446                         (struct pcn_kmsg_message*)(&response)));
3447
3448     kfree(work);
3449     
3450     PERF_MEASURE_STOP(&perf_process_munmap_request," ",perf);
3451 }
3452
3453 /**
3454  * @brief Process request to change protection of a region of memory in
3455  * a distributed thread group.  Look for local thread group members and
3456  * carry out the requested action.
3457  *
3458  * <MEASRURE perf_process_mprotect_item>
3459  */
3460 void process_mprotect_item(struct work_struct* work) {
3461     mprotect_response_t response;
3462     mprotect_work_t* w = (mprotect_work_t*)work;
3463     int tgroup_home_cpu = w->tgroup_home_cpu;
3464     int tgroup_home_id  = w->tgroup_home_id;
3465     unsigned long start = w->start;
3466     size_t len = w->len;
3467     unsigned long prot = w->prot;
3468     struct task_struct* task, *g;
3469     data_header_t* curr = NULL;
3470     mm_data_t* mm_data = NULL;
3471     mm_data_t* to_munmap = NULL;
3472     struct mm_struct* mm_to_munmap = NULL;
3473
3474     int perf = PERF_MEASURE_START(&perf_process_mprotect_item);
3475     
3476     // Find the task
3477     read_lock(&tasklist_lock);
3478     do_each_thread(g,task) {
3479 //      task_lock(task); // TODO consider to use this
3480         if (task->tgroup_home_cpu == tgroup_home_cpu &&
3481             task->tgroup_home_id  == tgroup_home_id &&
3482             !(task->flags & PF_EXITING)) {
3483            /* 
3484             if (task->mm)
3485                 // do_mprotect
3486                 do_mprotect(task, start, len, prot,0);
3487 //              task_unlock(task); //TODO consider to use this