03bcc381c735981169b31b525013ea38154becd5
[projects/modsched/linux.git] / kernel / process_server.c
1 /*
2  * Implements task migration and maintains coherent 
3  * address spaces across CPU cores.
4  *
5  * David G. Katz
6  */
7
8 #include <linux/mcomm.h> // IPC
9 #include <linux/kthread.h>
10 #include <linux/export.h>
11 #include <linux/delay.h>
12 #include <linux/smp.h>
13 #include <linux/sched.h>
14 #include <linux/threads.h> // NR_CPUS
15 #include <linux/kmod.h>
16 #include <linux/path.h>
17 #include <linux/mount.h>
18 #include <linux/fs.h>
19 #include <linux/fs_struct.h>
20 #include <linux/file.h>
21 #include <linux/fdtable.h>
22 #include <linux/slab.h>
23 #include <linux/process_server.h>
24 #include <linux/mm.h>
25 #include <linux/io.h> // ioremap
26 #include <linux/mman.h> // MAP_ANONYMOUS
27 #include <linux/pcn_kmsg.h> // Messaging
28 #include <linux/pcn_perf.h> // performance measurement
29 #include <linux/string.h>
30 #include <linux/unistd.h>
31 #include <linux/tsacct_kern.h>
32 #include <linux/popcorn.h>
33 #include <linux/syscalls.h>
34 #include <linux/kernel.h>
35 #include <linux/proc_fs.h>
36
37 #include <asm/pgtable.h>
38 #include <asm/atomic.h>
39 #include <asm/tlbflush.h>
40 #include <asm/cacheflush.h>
41 #include <asm/uaccess.h> // USER_DS
42 #include <asm/prctl.h> // prctl
43 #include <asm/proto.h> // do_arch_prctl
44 #include <asm/msr.h> // wrmsr_safe
45 #include <asm/mmu_context.h>
46 #include <asm/processor.h> // load_cr3
47
48 unsigned long get_percpu_old_rsp(void);
49
50 /**
51  * General purpose configuration
52  */
53
54 // Flag indiciating whether or not to migrate the entire virtual 
55 // memory space when a migration occurs.  
56 #define COPY_WHOLE_VM_WITH_MIGRATION 0
57
58 // Flag indicating whether or not to migrate file-backed executable
59 // pages when a fault occurs accessing executable memory.  When this
60 // flag is 1, those pages will be migrated.  When it is 0, the local
61 // file-system will be consulted instead.
62 #define MIGRATE_EXECUTABLE_PAGES_ON_DEMAND 1
63
64 // The maximum number of contiguously physical mapped regions to 
65 // migrate in response to a mapping query.
66 #define MAX_MAPPINGS 1
67
68 // Whether or not to expose a proc entry that we can publish
69 // information to.
70 //#undef PROCESS_SERVER_HOST_PROC_ENTRY
71 #define PROCESS_SERVER_HOST_PROC_ENTRY
72
73 /**
74  * Use the preprocessor to turn off printk.
75  */
76 #define PROCESS_SERVER_VERBOSE 0
77 #if PROCESS_SERVER_VERBOSE
78 #define PSPRINTK(...) printk(__VA_ARGS__)
79 #else
80 #define PSPRINTK(...) ;
81 #endif
82
83 #define PROCESS_SERVER_INSTRUMENT_LOCK 0
84 #if PROCESS_SERVER_VERBOSE && PROCESS_SERVER_INSTRUMENT_LOCK
85 #define PS_SPIN_LOCK(x) PSPRINTK("Acquiring spin lock in %s at line %d\n",__func__,__LINE__); \
86                        spin_lock(x); \
87                        PSPRINTK("Done acquiring spin lock in %s at line %d\n",__func__,__LINE__)
88 #define PS_SPIN_UNLOCK(x) PSPRINTK("Releasing spin lock in %s at line %d\n",__func__,__LINE__); \
89                           spin_unlock(x); \
90                           PSPRINTK("Done releasing spin lock in %s at line %d\n",__func__,__LINE__)
91 #define PS_DOWN_READ(x) PSPRINTK("Acquiring read lock in %s at line %d\n",__func__,__LINE__); \
92                         down_read(x); \
93                         PSPRINTK("Done acquiring read lock in %s at line %d\n",__func__,__LINE__)
94 #define PS_UP_READ(x) PSPRINTK("Releasing read lock in %s at line %d\n",__func__,__LINE__); \
95                       up_read(x); \
96                       PSPRINTK("Done releasing read lock in %s at line %d\n",__func__,__LINE__)
97 #define PS_DOWN_WRITE(x) PSPRINTK("Acquiring write lock in %s at line %d\n",__func__,__LINE__); \
98                          down_write(x); \
99                          PSPRINTK("Done acquiring write lock in %s at line %d\n",__func__,__LINE__)
100 #define PS_UP_WRITE(x) PSPRINTK("Releasing read write in %s at line %d\n",__func__,__LINE__); \
101                        up_write(x); \
102                        PSPRINTK("Done releasing write lock in %s at line %d\n",__func__,__LINE__)
103
104
105 #else
106 #define PS_SPIN_LOCK(x) spin_lock(x)
107 #define PS_SPIN_UNLOCK(x) spin_unlock(x)
108 #define PS_DOWN_READ(x) down_read(x)
109 #define PS_UP_READ(x) up_read(x)
110 #define PS_DOWN_WRITE(x) down_write(x)
111 #define PS_UP_WRITE(x) up_write(x)
112 #endif
113
114 /**
115  * Library data type definitions
116  */
117 #define PROCESS_SERVER_DATA_TYPE_TEST 0
118 #define PROCESS_SERVER_VMA_DATA_TYPE 1
119 #define PROCESS_SERVER_PTE_DATA_TYPE 2
120 #define PROCESS_SERVER_CLONE_DATA_TYPE 3
121 #define PROCESS_SERVER_MAPPING_REQUEST_DATA_TYPE 4
122 #define PROCESS_SERVER_MUNMAP_REQUEST_DATA_TYPE 5
123 #define PROCESS_SERVER_MM_DATA_TYPE 6
124 #define PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE 7
125 #define PROCESS_SERVER_MPROTECT_DATA_TYPE 8
126 #define PROCESS_SERVER_LAMPORT_BARRIER_DATA_TYPE 9
127 #define PROCESS_SERVER_STATS_DATA_TYPE 10
128
129 /**
130  * Useful macros
131  */
132 #define DO_UNTIL_SUCCESS(x) while(x != 0){}
133
134 /**
135  * Perf
136  */
137 #define MEASURE_PERF 0
138 #if MEASURE_PERF
139 #define PERF_INIT() perf_init()
140 #define PERF_MEASURE_START(x) perf_measure_start(x)
141 #define PERF_MEASURE_STOP(x,y,z)  perf_measure_stop(x,y,z)
142
143 pcn_perf_context_t perf_count_remote_thread_members;
144 pcn_perf_context_t perf_process_back_migration;
145 pcn_perf_context_t perf_process_mapping_request;
146 pcn_perf_context_t perf_process_mapping_request_search_active_mm;
147 pcn_perf_context_t perf_process_mapping_request_search_saved_mm;
148 pcn_perf_context_t perf_process_mapping_request_do_lookup;
149 pcn_perf_context_t perf_process_mapping_request_transmit;
150 pcn_perf_context_t perf_process_mapping_response;
151 pcn_perf_context_t perf_process_tgroup_closed_item;
152 pcn_perf_context_t perf_process_exit_item;
153 pcn_perf_context_t perf_process_mprotect_item;
154 pcn_perf_context_t perf_process_munmap_request;
155 pcn_perf_context_t perf_process_munmap_response;
156 pcn_perf_context_t perf_process_server_try_handle_mm_fault;
157 pcn_perf_context_t perf_process_server_import_address_space;
158 pcn_perf_context_t perf_process_server_do_exit;
159 pcn_perf_context_t perf_process_server_do_munmap;
160 pcn_perf_context_t perf_process_server_do_migration;
161 pcn_perf_context_t perf_process_server_do_mprotect;
162 pcn_perf_context_t perf_process_server_notify_delegated_subprocess_starting;
163 pcn_perf_context_t perf_handle_thread_group_exit_notification;
164 pcn_perf_context_t perf_handle_remote_thread_count_response;
165 pcn_perf_context_t perf_handle_remote_thread_count_request;
166 pcn_perf_context_t perf_handle_munmap_response;
167 pcn_perf_context_t perf_handle_munmap_request;
168 pcn_perf_context_t perf_handle_mapping_response;
169 pcn_perf_context_t perf_handle_mapping_request;
170 pcn_perf_context_t perf_handle_pte_transfer;
171 pcn_perf_context_t perf_handle_vma_transfer;
172 pcn_perf_context_t perf_handle_exiting_process_notification;
173 pcn_perf_context_t perf_handle_process_pairing_request;
174 pcn_perf_context_t perf_handle_clone_request;
175 pcn_perf_context_t perf_handle_mprotect_response;
176 pcn_perf_context_t perf_handle_mprotect_request;
177
178 /**
179  *
180  */
181 static void perf_init(void) {
182    perf_init_context(&perf_count_remote_thread_members,
183            "count_remote_thread_members");
184    perf_init_context(&perf_process_back_migration,
185            "process_back_migration");
186    perf_init_context(&perf_process_mapping_request,
187            "process_mapping_request");
188    perf_init_context(&perf_process_mapping_request_search_active_mm,
189            "process_mapping_request_search_active_mm");
190    perf_init_context(&perf_process_mapping_request_search_saved_mm,
191            "process_mapping_request_search_saved_mm");
192    perf_init_context(&perf_process_mapping_request_do_lookup,
193            "process_mapping_request_do_lookup");
194    perf_init_context(&perf_process_mapping_request_transmit,
195            "process_mapping_request_transmit");
196    perf_init_context(&perf_process_mapping_response,
197            "process_mapping_response");
198    perf_init_context(&perf_process_tgroup_closed_item,
199            "process_tgroup_closed_item");
200    perf_init_context(&perf_process_exit_item,
201            "process_exit_item");
202    perf_init_context(&perf_process_mprotect_item,
203            "process_mprotect_item");
204    perf_init_context(&perf_process_munmap_request,
205            "process_munmap_request");
206    perf_init_context(&perf_process_munmap_response,
207            "process_munmap_response");
208    perf_init_context(&perf_process_server_try_handle_mm_fault,
209            "process_server_try_handle_mm_fault");
210    perf_init_context(&perf_process_server_import_address_space,
211            "process_server_import_address_space");
212    perf_init_context(&perf_process_server_do_exit,
213            "process_server_do_exit");
214    perf_init_context(&perf_process_server_do_munmap,
215            "process_server_do_munmap");
216    perf_init_context(&perf_process_server_do_migration,
217            "process_server_do_migration");
218    perf_init_context(&perf_process_server_do_mprotect,
219            "process_server_do_mprotect");
220    perf_init_context(&perf_process_server_notify_delegated_subprocess_starting,
221            "process_server_notify_delegated_subprocess_starting");
222    perf_init_context(&perf_handle_thread_group_exit_notification,
223            "handle_thread_group_exit_notification");
224    perf_init_context(&perf_handle_remote_thread_count_response,
225            "handle_remote_thread_count_response");
226    perf_init_context(&perf_handle_remote_thread_count_request,
227            "handle_remote_thread_count_request");
228    perf_init_context(&perf_handle_munmap_response,
229            "handle_munmap_response");
230    perf_init_context(&perf_handle_munmap_request,
231            "handle_munmap_request");
232    perf_init_context(&perf_handle_mapping_response,
233            "handle_mapping_response");
234    perf_init_context(&perf_handle_mapping_request,
235            "handle_mapping_request");
236    perf_init_context(&perf_handle_pte_transfer,
237            "handle_pte_transfer");
238    perf_init_context(&perf_handle_vma_transfer,
239            "handle_vma_transfer");
240    perf_init_context(&perf_handle_exiting_process_notification,
241            "handle_exiting_process_notification");
242    perf_init_context(&perf_handle_process_pairing_request,
243            "handle_process_pairing_request");
244    perf_init_context(&perf_handle_clone_request,
245            "handle_clone_request");
246    perf_init_context(&perf_handle_mprotect_request,
247            "handle_mprotect_request");
248    perf_init_context(&perf_handle_mprotect_response,
249            "handle_mprotect_resonse");
250
251 }
252
253 #else
254 #define PERF_INIT() 
255 #define PERF_MEASURE_START(x) -1
256 #define PERF_MEASURE_STOP(x, y, z)
257 #endif
258
259 /**
260  * Enums
261  */
262 typedef enum _lamport_barrier_state {
263     LAMPORT_ENTRY_OWNED,
264     LAMPORT_ENTRY_OFF_LIMITS,
265     LAMPORT_ENTRY_CONTENDED
266 } lamport_barrier_state_t;
267
268
269 /**
270  * Library
271  */
272
273 /**
274  * Some piping for linking data entries
275  * and identifying data entry types.
276  */
277 typedef struct _data_header {
278     struct _data_header* next;
279     struct _data_header* prev;
280     int data_type;
281 } data_header_t;
282
283 /**
284  * Hold data about a pte to vma mapping.
285  */
286 typedef struct _pte_data {
287     data_header_t header;
288     int vma_id;
289     int clone_request_id;
290     int cpu;
291     unsigned long vaddr_start;
292     unsigned long paddr_start;
293     size_t sz;
294 } pte_data_t;
295
296 /**
297  * Hold data about a vma to process
298  * mapping.
299  */
300 typedef struct _vma_data {
301     data_header_t header;
302     spinlock_t lock;
303     unsigned long start;
304     unsigned long end;
305     int clone_request_id;
306     int cpu;
307     unsigned long flags;
308     int vma_id;
309     pgprot_t prot;
310     unsigned long pgoff;
311     pte_data_t* pte_list;
312     int mmapping_in_progress;
313     char path[256];
314 } vma_data_t;
315
316 typedef struct _contiguous_physical_mapping {
317     unsigned char present;
318     unsigned long vaddr;
319     unsigned long paddr;
320     size_t sz;
321 } contiguous_physical_mapping_t;
322
323 /**
324  *
325  */
326 typedef struct _clone_data {
327     data_header_t header;
328     spinlock_t lock;
329     int clone_request_id;
330     int requesting_cpu;
331     char exe_path[512];
332     unsigned long clone_flags;
333     unsigned long stack_start;
334     unsigned long stack_ptr;
335     unsigned long env_start;
336     unsigned long env_end;
337     unsigned long arg_start;
338     unsigned long arg_end;
339     unsigned long heap_start;
340     unsigned long heap_end;
341     unsigned long data_start;
342     unsigned long data_end;
343     struct pt_regs regs;
344     int placeholder_pid;
345     int placeholder_tgid;
346     int placeholder_cpu;
347     unsigned long thread_fs;
348     unsigned long thread_gs;
349     unsigned long thread_sp0;
350     unsigned long thread_sp;
351     unsigned long thread_usersp;
352     unsigned short thread_es;
353     unsigned short thread_ds;
354     unsigned short thread_fsindex;
355     unsigned short thread_gsindex;
356     unsigned long def_flags;
357     unsigned int personality;
358     int tgroup_home_cpu;
359     int tgroup_home_id;
360     int t_home_cpu;
361     int t_home_id;
362     int prio, static_prio, normal_prio; //from sched.c
363         unsigned int rt_priority; //from sched.c
364         int sched_class; //from sched.c but here we are using SCHED_NORMAL, SCHED_FIFO, etc.
365     unsigned long previous_cpus;
366     vma_data_t* vma_list;
367     vma_data_t* pending_vma_list;
368 } clone_data_t;
369
370 /**
371  * 
372  */
373 typedef struct _mapping_request_data {
374     data_header_t header;
375     int tgroup_home_cpu;
376     int tgroup_home_id;
377     int requester_pid;
378     unsigned long address;
379     unsigned long vaddr_start;
380     unsigned long vaddr_size;
381     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
382     pgprot_t prot;
383     unsigned long vm_flags;
384     unsigned char present;
385     unsigned char complete;
386     unsigned char from_saved_mm;
387     int responses;
388     int expected_responses;
389     unsigned long pgoff;
390     spinlock_t lock;
391     char path[512];
392     struct semaphore wait_sem;
393 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
394     unsigned long long wait_time_concluded;
395 #endif
396 } mapping_request_data_t;
397
398 /**
399  *
400  */
401 typedef struct _munmap_request_data {
402     data_header_t header;
403     int tgroup_home_cpu;
404     int tgroup_home_id;
405     int requester_pid;
406     unsigned long vaddr_start;
407     unsigned long vaddr_size;
408     int responses;
409     int expected_responses;
410     spinlock_t lock;
411 } munmap_request_data_t;
412
413 /**
414  *
415  */
416 typedef struct _remote_thread_count_request_data {
417     data_header_t header;
418     int tgroup_home_cpu;
419     int tgroup_home_id;
420     int requester_pid;
421     int responses;
422     int expected_responses;
423     int count;
424     spinlock_t lock;
425 } remote_thread_count_request_data_t;
426
427 /**
428  *
429  */
430 typedef struct _mm_data {
431     data_header_t header;
432     int tgroup_home_cpu;
433     int tgroup_home_id;
434     struct mm_struct* mm;
435 } mm_data_t;
436
437 typedef struct _mprotect_data {
438     data_header_t header;
439     int tgroup_home_cpu;
440     int tgroup_home_id;
441     int requester_pid;
442     unsigned long start;
443     int responses;
444     int expected_responses;
445     spinlock_t lock;
446 } mprotect_data_t;
447
448 typedef struct _get_counter_phys_data {
449     data_header_t header;
450     int response_received;
451     unsigned long resp;
452 } get_counter_phys_data_t;
453
454 typedef struct _lamport_barrier_entry {
455     data_header_t header;
456     unsigned long long timestamp;
457     int responses;
458     int expected_responses;
459     int allow_responses;
460     int cpu;
461 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
462     unsigned long long lock_acquired;
463     unsigned long long lock_released;
464 #endif
465 } lamport_barrier_entry_t;
466
467 typedef struct _lamport_barrier_queue {
468     data_header_t header;
469     int tgroup_home_cpu;
470     int tgroup_home_id;
471     unsigned long address;
472     unsigned long long active_timestamp;
473     lamport_barrier_entry_t* queue;
474 } lamport_barrier_queue_t;
475
476 /**
477  * This message is sent to a remote cpu in order to 
478  * ask it to spin up a process on behalf of the
479  * requesting cpu.  Some of these fields may go
480  * away in the near future.
481  */
482 typedef struct _clone_request {
483     struct pcn_kmsg_hdr header;
484     int clone_request_id;
485     unsigned long clone_flags;
486     unsigned long stack_start;
487     unsigned long stack_ptr;
488     unsigned long env_start;
489     unsigned long env_end;
490     unsigned long arg_start;
491     unsigned long arg_end;
492     unsigned long heap_start;
493     unsigned long heap_end;
494     unsigned long data_start;
495     unsigned long data_end;
496     struct pt_regs regs;
497     char exe_path[512];
498     int placeholder_pid;
499     int placeholder_tgid;
500     unsigned long thread_fs;
501     unsigned long thread_gs;
502     unsigned long thread_sp0;
503     unsigned long thread_sp;
504     unsigned long thread_usersp;
505     unsigned short thread_es;
506     unsigned short thread_ds;
507     unsigned short thread_fsindex;
508     unsigned short thread_gsindex;
509     unsigned long def_flags;
510     unsigned int personality;
511     int tgroup_home_cpu;
512     int tgroup_home_id;
513     int t_home_cpu;
514     int t_home_id;
515     int prio, static_prio, normal_prio; //from sched.c
516         unsigned int rt_priority; //from sched.c
517         int sched_class; //from sched.c but here we are using SCHED_NORMAL, SCHED_FIFO, etc.
518     unsigned long previous_cpus;
519 } clone_request_t;
520
521 /**
522  * This message is sent in response to a clone request.
523  * Its purpose is to notify the requesting cpu that
524  * the specified pid is executing on behalf of the
525  * requesting cpu.
526  */
527 typedef struct _create_process_pairing {
528     struct pcn_kmsg_hdr header;
529     int your_pid; // PID of cpu receiving this pairing request
530     int my_pid;   // PID of cpu transmitting this pairing request
531 } create_process_pairing_t;
532
533 /**
534  * This message informs the remote cpu of delegated
535  * process death.  This occurs whether the process
536  * is a placeholder or a delegate locally.
537  */
538 struct _exiting_process {
539     struct pcn_kmsg_hdr header;
540     int t_home_cpu;             // 4
541     int t_home_id;              // 4
542     int my_pid;                 // 4
543     int is_last_tgroup_member;  // 4+
544                                 // ---
545                                 // 16 -> 44 bytes of padding needed
546     char pad[44];
547 } __attribute__((packed)) __attribute__((aligned(64)));  
548 typedef struct _exiting_process exiting_process_t;
549
550 /**
551  *
552  */
553 struct _exiting_group {
554     struct pcn_kmsg_hdr header;
555     int tgroup_home_cpu;        // 4
556     int tgroup_home_id;         // 4
557                                 // ---
558                                 // 8 -> 52 bytes of padding needed
559     char pad[52];
560 } __attribute__((packed)) __attribute__((aligned(64)));
561 typedef struct _exiting_group exiting_group_t;
562
563 /**
564  * Inform remote cpu of a vma to process mapping.
565  */
566 typedef struct _vma_transfer {
567     struct pcn_kmsg_hdr header;
568     int vma_id;
569     int clone_request_id;
570     unsigned long start;
571     unsigned long end;
572     pgprot_t prot;
573     unsigned long flags;
574     unsigned long pgoff;
575     char path[256];
576 } vma_transfer_t;
577
578 /**
579  * Inform remote cpu of a pte to vma mapping.
580  */
581 struct _pte_transfer {
582     struct pcn_kmsg_hdr header;
583     int vma_id;                  //  4
584     int clone_request_id;        //  4
585     unsigned long vaddr_start;   //  8
586     unsigned long paddr_start;   //  8
587     size_t sz;                   //  4 +
588                                  //  ---
589                                  //  28 -> 32 bytes of padding needed
590     char pad[32];
591 } __attribute__((packed)) __attribute__((aligned(64)));
592
593 typedef struct _pte_transfer pte_transfer_t;
594
595 /**
596  *
597  */
598 struct _mapping_request {
599     struct pcn_kmsg_hdr header;
600     int tgroup_home_cpu;        // 4
601     int tgroup_home_id;         // 4
602     int requester_pid;          // 4
603     unsigned long address;      // 8
604     char need_vma;              // 1
605                                 // ---
606                                 // 21 -> 39 bytes of padding needed
607 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
608     unsigned long long send_time;
609     char pad[31];
610 #else
611     char pad[39];
612 #endif
613
614 } __attribute__((packed)) __attribute__((aligned(64)));
615
616 typedef struct _mapping_request mapping_request_t;
617
618 /*
619  * type = PCN_KMSG_TYPE_PROC_SRV_THREAD_GROUP_EXITED_NOTIFICATION
620  */
621 struct _thread_group_exited_notification {
622     struct pcn_kmsg_hdr header;
623     int tgroup_home_cpu;        // 4
624     int tgroup_home_id;         // 4
625                                 // ---
626                                 // 8 -> 52 bytes of padding needed
627     char pad[52];
628 } __attribute__((packed)) __attribute__((aligned(64)));
629 typedef struct _thread_group_exited_notification thread_group_exited_notification_t;
630
631
632 /**
633  *
634  */
635 struct _mapping_response {
636     struct pcn_kmsg_hdr header;
637     int tgroup_home_cpu;                                    // 4 
638     int tgroup_home_id;                                     // 4
639     int requester_pid;                                      // 4
640     unsigned char present;                                  // 1
641     unsigned char from_saved_mm;                            // 1
642     unsigned long address;                                  // 8
643     unsigned long vaddr_start;                              // 8
644     unsigned long vaddr_size;
645     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
646     pgprot_t prot;              
647     unsigned long vm_flags;     
648     unsigned long pgoff;
649 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
650     unsigned long long send_time;
651 #endif
652     char path[512]; // save to last so we can cut
653                     // off data when possible.
654 };
655 typedef struct _mapping_response mapping_response_t;
656
657 /**
658  * This is a hack to eliminate the overhead of sending
659  * an entire mapping_response_t when there is no mapping.
660  * The overhead is due to the size of the message, which
661  * requires the _long pcn_kmsg variant to be used.
662  */
663 struct _nonpresent_mapping_response {
664     struct pcn_kmsg_hdr header;
665     int tgroup_home_cpu;            // 4
666     int tgroup_home_id;             // 4
667     int requester_pid;              // 4
668     unsigned long address;          // 8
669 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
670     unsigned long long send_time;   // 8
671                                     // ---
672                                     // 28 -> 32 bytes of padding needed
673     char pad[32];
674 #else
675     char pad[40];
676 #endif
677
678 } __attribute__((packed)) __attribute__((aligned(64)));
679 typedef struct _nonpresent_mapping_response nonpresent_mapping_response_t;
680
681 /**
682  *
683  */
684 struct _munmap_request {
685     struct pcn_kmsg_hdr header;
686     int tgroup_home_cpu;         // 4
687     int tgroup_home_id;          // 4
688     int requester_pid;           // 4
689     unsigned long vaddr_start;   // 8
690     unsigned long vaddr_size;    // 8
691                                  // ---
692                                  // 28 -> 32 bytes of padding needed
693     char pad[32];
694 } __attribute__((packed)) __attribute__((aligned(64)));
695 typedef struct _munmap_request munmap_request_t;
696
697 /**
698  *
699  */
700 struct _munmap_response {
701     struct pcn_kmsg_hdr header;
702     int tgroup_home_cpu;        // 4
703     int tgroup_home_id;         // 4
704     int requester_pid;          // 4
705     unsigned long vaddr_start;  // 8
706     unsigned long vaddr_size;   // 8+
707                                 // ---
708                                 // 28 -> 32 bytes of padding needed
709     char pad[32];
710 } __attribute__((packed)) __attribute__((aligned(64)));
711 typedef struct _munmap_response munmap_response_t;
712
713 /**
714  *
715  */
716 struct _remote_thread_count_request {
717     struct pcn_kmsg_hdr header;
718     int tgroup_home_cpu;        // 4
719     int tgroup_home_id;         // 4
720     int requester_pid;          // 4
721                                 // ---
722                                 // 12 -> 48 bytes of padding needed
723     char pad[48];
724 } __attribute__((packed)) __attribute__((aligned(64)));
725 typedef struct _remote_thread_count_request remote_thread_count_request_t;
726
727 /**
728  *
729  */
730 struct _remote_thread_count_response {
731     struct pcn_kmsg_hdr header;
732     int tgroup_home_cpu;        // 4
733     int tgroup_home_id;         // 4
734     int requester_pid;        // 4
735     int count;                  // 4
736                                 // ---
737                                 // 16 -> 44 bytes of padding needed
738     char pad[44];
739 } __attribute__((packed)) __attribute__((aligned(64)));
740 typedef struct _remote_thread_count_response remote_thread_count_response_t;
741
742 /**
743  *
744  */
745 struct _mprotect_request {
746     struct pcn_kmsg_hdr header; 
747     int tgroup_home_cpu;        // 4
748     int tgroup_home_id;         // 4
749     int requester_pid;          // 4
750     unsigned long start;        // 8
751     size_t len;                 // 4
752     unsigned long prot;         // 8
753                                 // ---
754                                 // 32 -> 28 bytes of padding needed
755     char pad[28];
756 } __attribute__((packed)) __attribute__((aligned(64)));
757 typedef struct _mprotect_request mprotect_request_t;
758
759 /**
760  *
761  */
762 struct _mprotect_response {
763     struct pcn_kmsg_hdr header;
764     int tgroup_home_cpu;        // 4
765     int tgroup_home_id;         // 4
766     int requester_pid;          // 4
767     unsigned long start;        // 8
768                                 // ---
769                                 // 20 -> 40 bytes of padding needed
770     char pad[40];
771 } __attribute__((packed)) __attribute__((aligned(64)));
772 typedef struct _mprotect_response mprotect_response_t;
773
774 /**
775  *
776  */
777 typedef struct _back_migration {
778     struct pcn_kmsg_hdr header;
779     int tgroup_home_cpu;
780     int tgroup_home_id;
781     int t_home_cpu;
782     int t_home_id;
783     unsigned long previous_cpus;
784     struct pt_regs regs;
785     unsigned long thread_fs;
786     unsigned long thread_gs;
787     unsigned long thread_usersp;
788     unsigned short thread_es;
789     unsigned short thread_ds;
790     unsigned short thread_fsindex;
791     unsigned short thread_gsindex;
792 } back_migration_t;
793
794 /**
795  *
796  */
797 struct _lamport_barrier_request{
798     struct pcn_kmsg_hdr header;
799     int tgroup_home_cpu;            // 4
800     int tgroup_home_id;             // 4
801     unsigned long address;          // 8
802     unsigned long long timestamp;   // 16
803                                     // ---
804                                     // 32 -> 32 bytes of padding needed
805     char pad[32];
806 } __attribute__((packed)) __attribute__((aligned(64)));
807 typedef struct _lamport_barrier_request lamport_barrier_request_t;
808
809 /**
810  *
811  */
812 struct _lamport_barrier_request_range {
813     struct pcn_kmsg_hdr header;
814     int tgroup_home_cpu;            // 4
815     int tgroup_home_id;             // 4
816     unsigned long address;          // 8
817     size_t sz;                      // 4
818     unsigned long long timestamp;   // 16
819                                     // ---
820                                     // 36 -> 28 bytes of padding needed
821     char pad[28];
822 } __attribute__((packed)) __attribute__((aligned(64)));
823 typedef struct _lamport_barrier_request_range lamport_barrier_request_range_t;
824
825 /**
826  *
827  */
828 struct _lamport_barrier_response {
829     struct pcn_kmsg_hdr header;
830     int tgroup_home_cpu;            // 4
831     int tgroup_home_id;             // 4
832     unsigned long address;          // 8
833     unsigned long long timestamp;   // 16
834                                     // ---
835                                     // 32 -> 28 bytes of padding needed
836     char pad[28];
837 } __attribute__((packed)) __attribute__((aligned(64)));
838 typedef struct _lamport_barrier_response lamport_barrier_response_t;
839
840 /**
841  *
842  */
843 struct _lamport_barrier_response_range {
844     struct pcn_kmsg_hdr header;
845     int tgroup_home_cpu;            // 4
846     int tgroup_home_id;             // 4
847     unsigned long address;          // 8
848     size_t sz;                      // 4
849     unsigned long long timestamp;   // 16
850                                     // ---
851                                     // 36 -> 24 bytes of padding needed
852     char pad[24];
853 } __attribute__((packed)) __attribute__((aligned(64)));
854 typedef struct _lamport_barrier_response_range lamport_barrier_response_range_t;
855
856 /**
857  *
858  */
859 struct _lamport_barrier_release {
860     struct pcn_kmsg_hdr header;
861     int tgroup_home_cpu;            // 4
862     int tgroup_home_id;             // 4
863     unsigned long address;          // 8
864     unsigned long long timestamp;   //16
865                                     // ---
866                                     // 32 -> 28 bytes of padding needed
867     char pad[28];
868 } __attribute__((packed)) __attribute__((aligned(64)));
869 typedef struct _lamport_barrier_release lamport_barrier_release_t;
870
871 /**
872  *
873  */
874 struct _lamport_barrier_release_range {
875     struct pcn_kmsg_hdr header;
876     int tgroup_home_cpu;            // 4
877     int tgroup_home_id;             // 4
878     unsigned long address;          // 8
879     size_t sz;                      // 4
880     unsigned long long timestamp;   // 16
881                                     // ---
882                                     // 36 -> 24 bytes of padding needed
883     char pad[24];
884 } __attribute__((packed)) __attribute__((aligned(64)));
885 typedef struct _lamport_barrier_release_range lamport_barrier_release_range_t;
886
887 /**
888  *
889  */
890 struct _get_counter_phys_request {
891     struct pcn_kmsg_hdr header;
892     char pad[60];
893 } __attribute__((packed)) __attribute__((aligned(64)));
894 typedef struct _get_counter_phys_request get_counter_phys_request_t;
895
896 /**
897  *
898  */
899 struct _get_counter_phys_response {
900     struct pcn_kmsg_hdr header;
901     unsigned long resp;
902     char pad[58];
903 } __attribute__((packed)) __attribute__((aligned(64)));
904 typedef struct _get_counter_phys_response get_counter_phys_response_t;
905
906
907 /**
908  *
909  */
910 typedef struct _deconstruction_data {
911     int clone_request_id;
912     int vma_id;
913     int dst_cpu;
914 } deconstruction_data_t;
915
916 /**
917  *
918  */
919 typedef struct {
920     struct work_struct work;
921     struct task_struct *task;
922     pid_t pid;
923     int t_home_cpu;
924     int t_home_id;
925     int is_last_tgroup_member;
926     struct pt_regs regs;
927     unsigned long thread_fs;
928     unsigned long thread_gs;
929     unsigned long thread_sp0;
930     unsigned long thread_sp;
931     unsigned long thread_usersp;
932     unsigned short thread_es;
933     unsigned short thread_ds;
934     unsigned short thread_fsindex;
935     unsigned short thread_gsindex;
936 } exit_work_t;
937
938 /**
939  *
940  */
941 typedef struct {
942     struct work_struct work;
943     clone_data_t* data;
944 } import_task_work_t;
945
946 /**
947  *
948  */
949 typedef struct {
950     struct work_struct work;
951     int tgroup_home_cpu;
952     int tgroup_home_id;
953 } group_exit_work_t;
954
955 /**
956  *
957  */
958 typedef struct {
959     struct work_struct work;
960     int tgroup_home_cpu;
961     int tgroup_home_id;
962     int requester_pid;
963     unsigned long address;
964     char need_vma;
965     int from_cpu;
966 } mapping_request_work_t;
967
968 /**
969  *
970  */
971 typedef struct {
972     struct work_struct work;
973     int tgroup_home_cpu;
974     int tgroup_home_id;
975     int requester_pid;
976     unsigned char from_saved_mm;
977     unsigned long address;      
978     unsigned char present;      
979     unsigned long vaddr_mapping;
980     unsigned long vaddr_start;
981     unsigned long vaddr_size;
982     unsigned long paddr_mapping;
983     size_t paddr_mapping_sz;
984     pgprot_t prot;              
985     unsigned long vm_flags;     
986     char path[512];
987     unsigned long pgoff;
988     int from_cpu;
989 } mapping_response_work_t;
990
991 /**
992  *
993  */
994 typedef struct {
995     struct work_struct work;
996     int tgroup_home_cpu;
997     int tgroup_home_id;
998     int requester_pid;
999     unsigned long address;
1000     int from_cpu;
1001 } nonpresent_mapping_response_work_t;
1002
1003 /**
1004  *
1005  */
1006 typedef struct {
1007     struct work_struct work;
1008     int tgroup_home_cpu;
1009     int tgroup_home_id;
1010 } tgroup_closed_work_t;
1011
1012 /**
1013  *
1014  */
1015 typedef struct {
1016     struct work_struct work;
1017     int tgroup_home_cpu;
1018     int tgroup_home_id;
1019     int requester_pid;
1020     unsigned long vaddr_start;
1021     unsigned long vaddr_size;
1022     int from_cpu;
1023 } munmap_request_work_t;
1024
1025 /**
1026  *
1027  */
1028 typedef struct {
1029     struct work_struct work;
1030     int tgroup_home_cpu;
1031     int tgroup_home_id;
1032     int requester_pid;
1033     unsigned long vaddr_start;
1034     unsigned long vaddr_size;
1035 } munmap_response_work_t;
1036
1037 /**
1038  * 
1039  */
1040 typedef struct {
1041     struct work_struct work;
1042     int tgroup_home_cpu;
1043     int tgroup_home_id;
1044     int requester_pid;
1045     unsigned long start;
1046     size_t len;
1047     unsigned long prot;
1048     int from_cpu;
1049 } mprotect_work_t;
1050
1051 /**
1052  *
1053  */
1054 typedef struct {
1055     struct work_struct work;
1056     int tgroup_home_cpu;
1057     int tgroup_home_id;
1058     int requester_pid;
1059     int from_cpu;
1060 } remote_thread_count_request_work_t;
1061
1062 /**
1063  *
1064  */
1065 typedef struct {
1066     struct work_struct work;
1067     int tgroup_home_cpu;
1068     int tgroup_home_id;
1069     int t_home_cpu;
1070     int t_home_id;
1071     unsigned long previous_cpus;
1072     struct pt_regs regs;
1073     unsigned long thread_fs;
1074     unsigned long thread_gs;
1075     unsigned long thread_usersp;
1076     unsigned short thread_es;
1077     unsigned short thread_ds;
1078     unsigned short thread_fsindex;
1079     unsigned short thread_gsindex;
1080 } back_migration_work_t;
1081
1082 /**
1083  *
1084  */
1085 typedef struct {
1086     struct work_struct work;
1087     int tgroup_home_cpu;
1088     int tgroup_home_id;
1089     int from_cpu;
1090     unsigned long address;
1091     unsigned long long timestamp;
1092 } lamport_barrier_request_work_t;
1093
1094 /**
1095  *
1096  */
1097 typedef struct {
1098     struct work_struct work;
1099     int tgroup_home_cpu;
1100     int tgroup_home_id;
1101     int from_cpu;
1102     unsigned long address;
1103     unsigned long long timestamp;
1104 } lamport_barrier_response_work_t;
1105
1106 /**
1107  * 
1108  */
1109 typedef struct {
1110     struct work_struct work;
1111     int tgroup_home_cpu;
1112     int tgroup_home_id;
1113     int from_cpu;
1114     unsigned long address;
1115     unsigned long long timestamp
1116 } lamport_barrier_release_work_t;
1117
1118 /**
1119  *
1120  */
1121 typedef struct {
1122     struct work_struct work;
1123     int tgroup_home_cpu;
1124     int tgroup_home_id;
1125     int from_cpu;
1126     unsigned long address;
1127     size_t sz;
1128     unsigned long long timestamp;
1129 } lamport_barrier_request_range_work_t;
1130
1131 /**
1132  *
1133  */
1134 typedef struct {
1135     struct work_struct work;
1136     int tgroup_home_cpu;
1137     int tgroup_home_id;
1138     int from_cpu;
1139     unsigned long address;
1140     size_t sz;
1141     unsigned long long timestamp;
1142 } lamport_barrier_response_range_work_t;
1143
1144 /**
1145  * 
1146  */
1147 typedef struct {
1148     struct work_struct work;
1149     int tgroup_home_cpu;
1150     int tgroup_home_id;
1151     int from_cpu;
1152     unsigned long address;
1153     size_t sz;
1154     unsigned long long timestamp
1155 } lamport_barrier_release_range_work_t;
1156
1157 /**
1158  * Prototypes
1159  */
1160 static void process_import_task(struct work_struct* work);
1161 static int handle_clone_request(struct pcn_kmsg_message* msg);
1162 long process_server_clone(unsigned long clone_flags,
1163                           unsigned long stack_start,                                                                                                                   
1164                           struct pt_regs *regs,
1165                           unsigned long stack_size,
1166                           struct task_struct* task);
1167 static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_start);
1168 static clone_data_t* find_clone_data(int cpu, int clone_request_id);
1169 static void dump_mm(struct mm_struct* mm);
1170 static void dump_task(struct task_struct* task,struct pt_regs* regs,unsigned long stack_ptr);
1171 static void dump_thread(struct thread_struct* thread);
1172 static void dump_regs(struct pt_regs* regs);
1173 static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr); 
1174
1175 /**
1176  * Prototypes from parts of the kernel that I modified or made available to external
1177  * modules.
1178  */
1179 // I removed the 'static' modifier in mm/memory.c for do_wp_page so I could use it 
1180 // here.
1181 int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1182                unsigned long address, pte_t *page_table, pmd_t *pmd,
1183                spinlock_t *ptl, pte_t orig_pte);
1184 int do_mprotect(struct task_struct* task, struct mm_struct* mm, unsigned long start, size_t len, unsigned long prot, int do_remote);
1185 #ifndef PROCESS_SERVER_USE_KMOD
1186 extern int exec_mmap(struct mm_struct* mm);
1187 extern void start_remote_thread(struct pt_regs* regs);
1188 extern void flush_old_files(struct files_struct * files);
1189 #endif
1190 static unsigned long get_next_ts_value(void);
1191
1192 /**
1193  * Module variables
1194  */
1195 static int _vma_id = 0;
1196 static int _clone_request_id = 0;
1197 static int _cpu = -1;
1198 static unsigned long long perf_a, perf_b, perf_c, perf_d, perf_e;
1199 data_header_t* _saved_mm_head = NULL;             // Saved MM list
1200 DEFINE_SPINLOCK(_saved_mm_head_lock);             // Lock for _saved_mm_head
1201 data_header_t* _mapping_request_data_head = NULL; // Mapping request data head
1202 DEFINE_SPINLOCK(_mapping_request_data_head_lock);  // Lock for above
1203 data_header_t* _count_remote_tmembers_data_head = NULL;
1204 DEFINE_SPINLOCK(_count_remote_tmembers_data_head_lock);
1205 data_header_t* _munmap_data_head = NULL;
1206 DEFINE_SPINLOCK(_munmap_data_head_lock);
1207 data_header_t* _mprotect_data_head = NULL;
1208 DEFINE_SPINLOCK(_mprotect_data_head_lock);
1209 data_header_t* _data_head = NULL;                 // General purpose data store
1210 DEFINE_SPINLOCK(_data_head_lock);                 // Lock for _data_head
1211 DEFINE_SPINLOCK(_vma_id_lock);                    // Lock for _vma_id
1212 DEFINE_SPINLOCK(_clone_request_id_lock);          // Lock for _clone_request_id
1213 struct rw_semaphore _import_sem;
1214 DEFINE_SPINLOCK(_remap_lock);
1215 data_header_t* _lamport_barrier_queue_head = NULL;
1216 DEFINE_SPINLOCK(_lamport_barrier_queue_lock);
1217 unsigned long* ts_counter = NULL;
1218 get_counter_phys_data_t* get_counter_phys_data = NULL;
1219
1220 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
1221 struct proc_dir_entry *_proc_entry = NULL;
1222 static void proc_track_data(int entry, unsigned long long time);//proto
1223 static void proc_data_init();
1224 typedef struct _proc_data {
1225     int count;
1226     unsigned long long total;
1227     unsigned long long min;
1228     unsigned long long max;
1229     char name[256];
1230 } proc_data_t;
1231 typedef enum _proc_data_index{
1232     PS_PROC_DATA_MAPPING_WAIT_TIME=0,
1233     PS_PROC_DATA_MAPPING_POST_WAIT_TIME_RESUME,
1234     PS_PROC_DATA_MAPPING_REQUEST_SEND_TIME,
1235     PS_PROC_DATA_MAPPING_RESPONSE_SEND_TIME,
1236     PS_PROC_DATA_MAPPING_REQUEST_DELIVERY_TIME,
1237     PS_PROC_DATA_MAPPING_RESPONSE_DELIVERY_TIME,
1238     PS_PROC_DATA_MAPPING_REQUEST_PROCESSING_TIME,
1239     PS_PROC_DATA_BREAK_COW_TIME,
1240     PS_PROC_DATA_FAULT_PROCESSING_TIME,
1241     PS_PROC_DATA_ADJUSTED_PERMISSIONS,
1242     PS_PROC_DATA_NEWVMA_ANONYMOUS_PTE,
1243     PS_PROC_DATA_NEWVMA_ANONYMOUS_NOPTE,
1244     PS_PROC_DATA_NEWVMA_FILEBACKED_PTE,
1245     PS_PROC_DATA_NEWVMA_FILEBACKED_NOPTE,
1246     PS_PROC_DATA_OLDVMA_ANONYMOUS_PTE,
1247     PS_PROC_DATA_OLDVMA_ANONYMOUS_NOPTE,
1248     PS_PROC_DATA_OLDVMA_FILEBACKED_PTE,
1249     PS_PROC_DATA_OLDVMA_FILEBACKED_NOPTE,
1250     PS_PROC_DATA_MUNMAP_PROCESSING_TIME,
1251     PS_PROC_DATA_MUNMAP_REQUEST_PROCESSING_TIME,
1252     PS_PROC_DATA_MPROTECT_PROCESSING_TIME,
1253     PS_PROC_DATA_MPROTECT_REQUEST_PROCESSING_TIME,
1254     PS_PROC_DATA_EXIT_PROCESSING_TIME,
1255     PS_PROC_DATA_EXIT_NOTIFICATION_PROCESSING_TIME,
1256     PS_PROC_DATA_GROUP_EXIT_PROCESSING_TIME,
1257     PS_PROC_DATA_GROUP_EXIT_NOTIFICATION_PROCESSING_TIME,
1258     PS_PROC_DATA_IMPORT_TASK_TIME,
1259     PS_PROC_DATA_COUNT_REMOTE_THREADS_PROCESSING_TIME,
1260     PS_PROC_DATA_MK_PAGE_WRITABLE,
1261     PS_PROC_DATA_WAITING_FOR_LAMPORT_LOCK,
1262     PS_PROC_DATA_LAMPORT_LOCK_HELD,
1263     PS_PROC_DATA_MAX
1264 } proc_data_index_t;
1265 proc_data_t _proc_data[NR_CPUS][PS_PROC_DATA_MAX];
1266
1267 typedef struct proc_xfer {
1268     unsigned long long total;
1269     int count;
1270     unsigned long long min;
1271     unsigned long long max;
1272 } proc_xfer_t;
1273
1274 struct _stats_clear {
1275     struct pcn_kmsg_hdr header;
1276     char pad[60];
1277 } __attribute__((packed)) __attribute__((aligned(64)));;
1278 typedef struct _stats_clear stats_clear_t;
1279
1280 struct _stats_query {
1281     struct pcn_kmsg_hdr header;
1282     pid_t pid;
1283     char pad[56];
1284 } __attribute__((packed)) __attribute__((aligned(64)));
1285 typedef struct _stats_query stats_query_t;
1286
1287 struct _stats_response {
1288     struct pcn_kmsg_hdr header;
1289     pid_t pid;
1290     proc_xfer_t data[PS_PROC_DATA_MAX];
1291 } __attribute__((packed)) __attribute__((aligned(64))); 
1292 typedef struct _stats_response stats_response_t;
1293
1294 typedef struct _stats_query_data {
1295     data_header_t header;
1296     int expected_responses;
1297     int responses;
1298     pid_t pid;
1299 } stats_query_data_t;
1300
1301 typedef struct {
1302     struct work_struct work;
1303     int pid;
1304     int from_cpu;
1305 } stats_query_work_t;
1306
1307 #define PS_PROC_DATA_TRACK(x,y) proc_track_data(x,y)
1308 #define PS_PROC_DATA_INIT() proc_data_init()
1309
1310 #else
1311 #define PS_PROC_DATA_TRACK(x,y)
1312 #define PS_PROC_DATA_INIT()
1313 #endif
1314
1315 // Work Queues
1316 static struct workqueue_struct *clone_wq;
1317 static struct workqueue_struct *exit_wq;
1318 static struct workqueue_struct *mapping_wq;
1319
1320 /**
1321  * General helper functions and debugging tools
1322  */
1323
1324 /**
1325  * TODO
1326  */
1327 static bool __user_addr (unsigned long x ) {
1328     return (x < PAGE_OFFSET);   
1329 }
1330
1331 // TODO the cpu_has_known_tgroup_mm must be reworked, i.e. the map must be pointed by the threads NOT one copy per thread, anti scaling and redudandt information
1332 /**
1333  *
1334  */
1335 static int cpu_has_known_tgroup_mm(int cpu)
1336 {
1337 #ifdef SUPPORT_FOR_CLUSTERING
1338     struct list_head *iter;
1339     _remote_cpu_info_list_t *objPtr;
1340     struct cpumask *pcpum =0;
1341     int cpuid =-1;
1342 extern struct list_head rlist_head;
1343     if (cpumask_test_cpu(cpu, cpu_present_mask))
1344         return 1;
1345     list_for_each(iter, &rlist_head) {
1346         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
1347         cpuid = objPtr->_data._processor;
1348         pcpum = &(objPtr->_data._cpumask);
1349         if (cpumask_test_cpu(cpu, pcpum)) {
1350             if ( bitmap_intersects(cpumask_bits(pcpum),
1351                                    &(current->known_cpu_with_tgroup_mm),
1352                                    (sizeof(unsigned long) *8)) ) {
1353                 return 1;
1354             }
1355             return 0;
1356         }
1357     }
1358     printk(KERN_ERR"%s: ERROR the input cpu (%d) is not included in any known cpu cluster\n",
1359                 __func__, cpu);
1360     return 0;
1361 #else
1362     if(test_bit(cpu,&current->known_cpu_with_tgroup_mm)) {
1363         return 1;
1364     }
1365     return 0;
1366 #endif
1367 }
1368
1369 /**
1370  *
1371  */
1372 static void set_cpu_has_known_tgroup_mm(struct task_struct *task,int cpu) {
1373     struct task_struct *me = task;
1374     struct task_struct *t = me;
1375     do {
1376         set_bit(cpu,&t->known_cpu_with_tgroup_mm);
1377     } while_each_thread(me, t);
1378 }
1379
1380 /**
1381  * @brief find_vma does not always return the correct vm_area_struct*.
1382  * If it fails to find a vma for the specified address, it instead
1383  * returns the closest one in the rb list.  This function looks
1384  * for this failure, and returns NULL in this error condition.
1385  * Otherwise, it returns a pointer to the struct vm_area_struct
1386  * containing the specified address.
1387  */
1388 static struct vm_area_struct* find_vma_checked(struct mm_struct* mm, unsigned long address) {
1389     struct vm_area_struct* vma = find_vma(mm,address&PAGE_MASK);
1390     if( vma == NULL ||
1391         (vma->vm_start > (address & PAGE_MASK)) ||
1392         (vma->vm_end <= address) ) {
1393         
1394         vma = NULL;
1395     }
1396
1397     return vma;
1398 }
1399
1400 /**
1401  * Note, mm->mmap_sem must already be held!
1402  */
1403 /*static int is_mapped(struct mm_struct* mm, unsigned vaddr) {
1404     pte_t* pte = NULL;
1405     pmd_t* pmd = NULL;
1406     pud_t* pud = NULL;
1407     pgd_t* pgd = NULL;
1408     int ret = 0;
1409
1410     pgd = pgd_offset(mm, vaddr);
1411     if(pgd_present(*pgd) && pgd_present(*pgd)) {
1412         pud = pud_offset(pgd,vaddr); 
1413         if(pud_present(*pud)) {
1414             pmd = pmd_offset(pud,vaddr);
1415             if(pmd_present(*pmd)) {
1416                 pte = pte_offset_map(pmd,vaddr);
1417                 if(pte && !pte_none(*pte)) {
1418                     // It exists!
1419                     ret = 1;
1420                 }
1421             }
1422         }
1423     }
1424     return ret;
1425
1426 }*/
1427 // Antonio's Version
1428 static int is_mapped(struct mm_struct* mm, unsigned vaddr)
1429 {
1430     pte_t* pte = NULL;
1431     pmd_t* pmd = NULL;                                                             
1432     pud_t* pud = NULL;                                                             
1433     pgd_t* pgd = NULL; 
1434
1435     pgd = pgd_offset(mm, vaddr);                                                   
1436     if (pgd && !pgd_none(*pgd) && likely(!pgd_bad(*pgd)) && pgd_present(*pgd)) {
1437         pud = pud_offset(pgd,vaddr);                                               
1438         if (pud && !pud_none(*pud) && likely(!pud_bad(*pud)) && pud_present(*pud)) {
1439
1440             pmd = pmd_offset(pud,vaddr);
1441             if(pmd && !pmd_none(*pmd) && likely(!pmd_bad(*pmd)) && pmd_present(*pmd)) {             
1442                 pte = pte_offset_map(pmd,vaddr);                                   
1443                 if(pte && !pte_none(*pte) && pte_present(*pte)) { 
1444                    // It exists!                                                  
1445                     return 1;
1446                 }                                                                  
1447             }                                                                      
1448         }                                                                          
1449     }
1450     return 0;
1451 }
1452
1453
1454 /**
1455  * @brief Find the mm_struct for a given distributed thread.  
1456  * If one does not exist, then return NULL.
1457  */
1458 static struct mm_struct* find_thread_mm(
1459         int tgroup_home_cpu, 
1460         int tgroup_home_id, 
1461         mm_data_t **used_saved_mm,
1462         struct task_struct** task_out)
1463 {
1464
1465     struct task_struct *task, *g;
1466     struct mm_struct * mm = NULL;
1467     data_header_t* data_curr;
1468     mm_data_t* mm_data;
1469     unsigned long lockflags;
1470
1471     *used_saved_mm = NULL;
1472     *task_out = NULL;
1473
1474     // First, look through all active processes.
1475     read_lock(&tasklist_lock);
1476     do_each_thread(g,task) {
1477         if(task->tgroup_home_cpu == tgroup_home_cpu &&
1478            task->tgroup_home_id  == tgroup_home_id) {
1479             mm = task->mm;
1480             *task_out = task;
1481             *used_saved_mm = NULL;
1482             read_unlock(&tasklist_lock);
1483             goto out;
1484         }
1485     } while_each_thread(g,task);
1486     read_unlock(&tasklist_lock);
1487
1488     // Failing that, look through saved mm's.
1489     spin_lock_irqsave(&_saved_mm_head_lock,lockflags);
1490     data_curr = _saved_mm_head;
1491     while(data_curr) {
1492
1493         mm_data = (mm_data_t*)data_curr;
1494     
1495         if((mm_data->tgroup_home_cpu == tgroup_home_cpu) &&
1496            (mm_data->tgroup_home_id  == tgroup_home_id)) {
1497             mm = mm_data->mm;
1498             *used_saved_mm = mm_data;
1499             break;
1500         }
1501
1502         data_curr = data_curr->next;
1503
1504     } // while
1505
1506     spin_unlock_irqrestore(&_saved_mm_head_lock,lockflags);
1507
1508
1509 out:
1510     return mm;
1511 }
1512
1513
1514
1515 /**
1516  * @brief A best effort at making a page writable
1517  * @return void
1518  */
1519 static void mk_page_writable(struct mm_struct* mm,
1520                              struct vm_area_struct* vma,
1521                              unsigned long vaddr) {
1522 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
1523     unsigned long long end_time;
1524     unsigned long long total_time;
1525     unsigned long long start_time = native_read_tsc();
1526 #endif
1527     spinlock_t* ptl;
1528     pte_t *ptep, pte, entry;
1529      
1530     // Grab the pte, and lock it     
1531     ptep = get_locked_pte(mm, vaddr, &ptl);
1532     if (!ptep)
1533         goto out;
1534
1535     // grab the contents of the pte pointer
1536     pte = *ptep;
1537     
1538     if(pte_none(*ptep)) {
1539         pte_unmap_unlock(pte,ptl);
1540         goto out;
1541     }
1542
1543     arch_enter_lazy_mmu_mode();
1544
1545     // Make the content copy writable and dirty, then
1546     // write it back into the page tables.
1547     entry = pte_mkwrite(pte_mkdirty(pte));
1548     set_pte_at(mm, vaddr, ptep, entry);
1549
1550     update_mmu_cache(vma, vaddr, ptep);
1551
1552     arch_leave_lazy_mmu_mode();
1553
1554     // Unlock the pte
1555     pte_unmap_unlock(pte, ptl);
1556 out:
1557 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
1558     end_time = native_read_tsc();
1559     total_time = end_time - start_time;
1560     PS_PROC_DATA_TRACK(PS_PROC_DATA_MK_PAGE_WRITABLE,total_time);
1561 #endif
1562     return;
1563 }
1564
1565 /**
1566  *
1567  */
1568 static void mk_page_writable_lookupvma(struct mm_struct*mm,
1569                              unsigned long addr) {
1570     struct vm_area_struct* curr = mm->mmap;
1571     while(curr) {
1572         if(curr->vm_start <= addr && curr->vm_end > addr) {
1573             mk_page_writable(mm,curr,addr);
1574             break;
1575         }
1576         curr = curr->vm_next;
1577     }
1578 }
1579
1580 /**
1581  * @brief Check to see if a given page is writable.
1582  * @return 0 if not writable or error, not zero otherwise
1583  */
1584 static int is_page_writable(struct mm_struct* mm,
1585                             struct vm_area_struct* vma,
1586                             unsigned long addr) {
1587     spinlock_t* ptl;
1588     pte_t *ptep, pte;
1589     int ret = 0;
1590
1591     ptep = get_locked_pte(mm,addr,&ptl);
1592     if(!ptep)
1593         goto out;
1594
1595     pte = *ptep;
1596     
1597     if(pte_none(*ptep)) {
1598         pte_unmap_unlock(*ptep,ptl);
1599         ret = -1;
1600         goto out;
1601     }
1602
1603     ret = pte_write(pte);
1604
1605     pte_unmap_unlock(pte, ptl);
1606
1607 out:
1608     return ret;
1609 }
1610
1611 /**
1612  * @brief Get the clone data associated with the current task.
1613  * @return clone_data_t* or NULL if not present
1614  */
1615 static clone_data_t* get_current_clone_data(void) {
1616     clone_data_t* ret = NULL;
1617
1618     if(!current->clone_data) {
1619         // Do costly lookup
1620         ret = find_clone_data(current->prev_cpu,
1621                                  current->clone_request_id);
1622         // Store it for easy access next time.
1623         current->clone_data = ret;
1624     } else {
1625         ret = (clone_data_t*)current->clone_data;
1626     }
1627
1628     return ret;
1629 }
1630
1631
1632 /**
1633  * @brief Page walk has encountered a pte while deconstructing
1634  * the client side processes address space.  Transfer it.
1635  */
1636 /*static int deconstruction_page_walk_pte_entry_callback(pte_t *pte, 
1637         unsigned long start, unsigned long end, struct mm_walk *walk) {
1638
1639     deconstruction_data_t* decon_data = (deconstruction_data_t*)walk->private;
1640     int vma_id = decon_data->vma_id;
1641     int dst_cpu = decon_data->dst_cpu;
1642     int clone_request_id = decon_data->clone_request_id;
1643     pte_transfer_t pte_xfer;
1644
1645     if(NULL == pte || !pte_present(*pte)) {
1646         return 0;
1647     }
1648
1649     pte_xfer.header.type = PCN_KMSG_TYPE_PROC_SRV_PTE_TRANSFER;
1650     pte_xfer.header.prio = PCN_KMSG_PRIO_NORMAL;
1651     pte_xfer.paddr = (pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1));
1652     // NOTE: Found the above pte to paddr conversion here -
1653     // http://wbsun.blogspot.com/2010/12/convert-userspace-virtual-address-to.html
1654     pte_xfer.vaddr = start;
1655     pte_xfer.vma_id = vma_id;
1656     pte_xfer.clone_request_id = clone_request_id;
1657     pte_xfer.pfn = pte_pfn(*pte);
1658     PSPRINTK("Sending PTE\n"); 
1659     DO_UNTIL_SUCCESS(pcn_kmsg_send(dst_cpu, (struct pcn_kmsg_message *)&pte_xfer));
1660
1661     return 0;
1662 }*/
1663
1664 /**
1665  * @brief Callback used when walking a memory map.  It looks to see
1666  * if the page is present.  If present, it resolves the given
1667  * address.
1668  * @return always returns 0
1669  */
1670 static int vm_search_page_walk_pte_entry_callback(pte_t *pte, unsigned long start, unsigned long end, struct mm_walk *walk) {
1671  
1672     unsigned long* resolved_addr = (unsigned long*)walk->private;
1673
1674     if (pte == NULL || pte_none(*pte) || !pte_present(*pte)) {
1675         *resolved_addr = 0;
1676         return 0;
1677     }
1678
1679     // Store the resolved address in the address
1680     // pointed to by the private field of the walk
1681     // structure.  This is checked by the caller
1682     // of the walk function when the walk is complete.
1683     *resolved_addr = (pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1));
1684     return 0;
1685 }
1686
1687 /**
1688  * @brief Retrieve the physical address of the specified virtual address.
1689  * @return -1 indicates failure.  Otherwise, 0 is returned.
1690  */
1691 static int get_physical_address(struct mm_struct* mm, 
1692                                 unsigned long vaddr,
1693                                 unsigned long* paddr) {
1694     unsigned long resolved = 0;
1695     struct mm_walk walk = {
1696         .pte_entry = vm_search_page_walk_pte_entry_callback,
1697         .private = &(resolved),
1698         .mm = mm
1699     };
1700
1701     // Walk the page tables.  The walk handler modifies the
1702     // resolved variable if it finds the address.
1703     walk_page_range(vaddr & PAGE_MASK, (vaddr & PAGE_MASK) + PAGE_SIZE, &walk);
1704     if(resolved == 0) {
1705         return -1;
1706     }
1707
1708     // Set the output
1709     *paddr = resolved;
1710
1711     return 0;
1712 }
1713
1714 /**
1715  * Check to see if the specified virtual address has a 
1716  * corresponding physical address mapped to it.
1717  * @return 0 = no mapping, 1 = mapping present
1718  */
1719 static int is_vaddr_mapped(struct mm_struct* mm, unsigned long vaddr) {
1720     unsigned long resolved = 0;
1721     struct mm_walk walk = {
1722         .pte_entry = vm_search_page_walk_pte_entry_callback,
1723         .private = &(resolved),
1724         .mm = mm
1725     };
1726
1727     // Walk the page tables.  The walk handler will set the
1728     // resolved variable if it finds the mapping.  
1729     walk_page_range(vaddr & PAGE_MASK, ( vaddr & PAGE_MASK ) + PAGE_SIZE, &walk);
1730     if(resolved != 0) {
1731         return 1;
1732     }
1733     return 0;
1734 }
1735
1736 /**
1737  * @brief Determine if the specified vma can have cow mapings.
1738  * @return 1 = yes, 0 = no.
1739  */
1740 static int is_maybe_cow(struct vm_area_struct* vma) {
1741     if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
1742         // Not a cow vma
1743         return 0;
1744     }
1745
1746     if(!(vma->vm_flags & VM_WRITE)) {
1747         return 0;
1748     }
1749
1750     return 1;
1751 }
1752
1753 /**
1754  * @brief Break the COW page that contains "address", iff that page
1755  * is a COW page.
1756  * @return 1 = handled, 0 = not handled.
1757  * @prerequisite Caller must grab mm->mmap_sem
1758  */
1759 static int break_cow(struct mm_struct *mm, struct vm_area_struct* vma, unsigned long address) {
1760     pgd_t *pgd = NULL;
1761     pud_t *pud = NULL;
1762     pmd_t *pmd = NULL;
1763     pte_t *ptep = NULL;
1764     pte_t pte;
1765     spinlock_t* ptl;
1766
1767 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
1768     unsigned long long end_time = 0;
1769     unsigned long long total_time = 0;
1770     unsigned long long start_time = native_read_tsc();
1771 #endif
1772     //PSPRINTK("%s: entered\n",__func__);
1773
1774     // if it's not a cow mapping, return.
1775     if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
1776         goto not_handled;
1777     }
1778
1779     // if it's not writable in vm_flags, return.
1780     if(!(vma->vm_flags & VM_WRITE)) {
1781         goto not_handled;
1782     }
1783
1784     pgd = pgd_offset(mm, address);
1785     if(!pgd_present(*pgd)) {
1786         goto not_handled_unlock;
1787     }
1788
1789     pud = pud_offset(pgd,address);
1790     if(!pud_present(*pud)) {
1791         goto not_handled_unlock;
1792     }
1793
1794     pmd = pmd_offset(pud,address);
1795     if(!pmd_present(*pmd)) {
1796         goto not_handled_unlock;
1797     }
1798
1799     ptep = pte_offset_map(pmd,address);
1800     if(!ptep || !pte_present(*ptep) || pte_none(*ptep)) {
1801         pte_unmap(ptep);
1802         goto not_handled_unlock;
1803     }
1804
1805     pte = *ptep;
1806
1807     if(pte_write(pte)) {
1808         goto not_handled_unlock;
1809     }
1810     
1811     // break the cow!
1812     ptl = pte_lockptr(mm,pmd);
1813     PS_SPIN_LOCK(ptl);
1814    
1815     PSPRINTK("%s: proceeding on address %lx\n",__func__,address);
1816     do_wp_page(mm,vma,address,ptep,pmd,ptl,pte);
1817
1818
1819     // NOTE:
1820     // Do not call pte_unmap_unlock(ptep,ptl), since do_wp_page does that!
1821     
1822     goto handled;
1823
1824 not_handled_unlock:
1825 not_handled:
1826 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
1827     end_time = native_read_tsc();
1828     total_time = end_time - start_time;
1829     PS_PROC_DATA_TRACK(PS_PROC_DATA_BREAK_COW_TIME,total_time);
1830 #endif
1831     return 0;
1832 handled:
1833 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
1834     end_time = native_read_tsc();
1835     total_time = end_time - start_time;
1836     PS_PROC_DATA_TRACK(PS_PROC_DATA_BREAK_COW_TIME,total_time);
1837 #endif
1838     return 1;
1839 }
1840
1841 /**
1842  *  @brief Find the bounds of a physically consecutive mapped region.
1843  *  The region must be contained within the specified VMA.
1844  *
1845  *  Hypothetical page table mappings for a given VMA:
1846  *
1847  *  *********************************
1848  *  *    Vaddr      *   Paddr       *
1849  *  *********************************
1850  *  * 0x10000000    * 0x12341000    *
1851  *  *********************************
1852  *  * 0x10001000    * 0x12342000    *
1853  *  *********************************
1854  *  * 0x10002000    * 0x12343000    *
1855  *  *********************************
1856  *  * 0x10003000    * 0x43214000    *
1857  *  *********************************
1858  *  
1859  *  This function, given a vaddr of 12342xxx will return:
1860  *  *vaddr_mapping_start = 0x10000000
1861  *  *paddr_mapping_start = 0x12341000
1862  *  *paddr_mapping_sz    = 0x3000
1863  *
1864  *  Notice 0x10003000 and above is not included in the returned region, as
1865  *  its paddr is not consecutive with the previous mappings.
1866  *
1867  */
1868 int find_consecutive_physically_mapped_region(struct mm_struct* mm,
1869                                               struct vm_area_struct* vma,
1870                                               unsigned long vaddr,
1871                                               unsigned long* vaddr_mapping_start,
1872                                               unsigned long* paddr_mapping_start,
1873                                               size_t* paddr_mapping_sz,
1874                                               int br_cow) {
1875     unsigned long paddr_curr = NULL;
1876     unsigned long vaddr_curr = vaddr;
1877     unsigned long vaddr_next = vaddr;
1878     unsigned long paddr_next = NULL;
1879     unsigned long paddr_start = NULL;
1880     size_t sz = 0;
1881
1882     
1883     // Initializes paddr_curr
1884     if(br_cow) {
1885         break_cow(mm,vma,vaddr_curr);
1886     }
1887     if(get_physical_address(mm,vaddr_curr,&paddr_curr) < 0) {
1888         return -1;
1889     }
1890     paddr_start = paddr_curr;
1891     *vaddr_mapping_start = vaddr_curr;
1892     *paddr_mapping_start = paddr_curr;
1893     
1894     sz = PAGE_SIZE;
1895
1896     // seek up in memory
1897     // This stretches (sz) only while leaving
1898     // vaddr and paddr the samed
1899     while(1) {
1900         vaddr_next += PAGE_SIZE;
1901         
1902         // dont' go past the end of the vma
1903         if(vaddr_next >= vma->vm_end) {
1904             break;
1905         }
1906
1907         if(br_cow) {
1908             break_cow(mm,vma,vaddr_next);
1909         }
1910
1911         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
1912             break;
1913         }
1914
1915         if(paddr_next == paddr_curr + PAGE_SIZE) {
1916             sz += PAGE_SIZE;
1917             paddr_curr = paddr_next;
1918         } else {
1919             break;
1920         }
1921     }
1922
1923     // seek down in memory
1924     // This stretches sz, and the paddr and vaddr's
1925     vaddr_curr = vaddr;
1926     paddr_curr = paddr_start; 
1927     vaddr_next = vaddr_curr;
1928     while(1) {
1929         vaddr_next -= PAGE_SIZE;
1930
1931         // don't go past the start of the vma
1932         if(vaddr_next < vma->vm_start) {
1933             break;
1934         }
1935
1936         if(br_cow) {
1937             break_cow(mm,vma,vaddr_next);
1938         }
1939
1940         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
1941             break;
1942         }
1943
1944         if(paddr_next == (paddr_curr - PAGE_SIZE)) {
1945             vaddr_curr = vaddr_next;
1946             paddr_curr = paddr_next;
1947             sz += PAGE_SIZE;
1948         } else {
1949             break;
1950         }
1951     }
1952    
1953     *vaddr_mapping_start = vaddr_curr;
1954     *paddr_mapping_start = paddr_curr;
1955     *paddr_mapping_sz = sz;
1956
1957     PSPRINTK("%s: found consecutive area- vaddr{%lx}, paddr{%lx}, sz{%d}\n",
1958                 __func__,
1959                 *vaddr_mapping_start,
1960                 *paddr_mapping_start,
1961                 *paddr_mapping_sz);
1962
1963     return 0;
1964 }
1965
1966 /**
1967  * @brief Find the preceeding physically consecutive region.  This is a region
1968  * that starts BEFORE the specified vaddr.  The region must be contained 
1969  * within the specified VMA.
1970  */
1971 int find_prev_consecutive_physically_mapped_region(struct mm_struct* mm,
1972                                               struct vm_area_struct* vma,
1973                                               unsigned long vaddr,
1974                                               unsigned long* vaddr_mapping_start,
1975                                               unsigned long* paddr_mapping_start,
1976                                               size_t* paddr_mapping_sz,
1977                                               int break_cow) {
1978     unsigned long curr_vaddr_mapping_start;
1979     unsigned long curr_paddr_mapping_start;
1980     unsigned long curr_paddr_mapping_sz;
1981     unsigned long curr_vaddr = vaddr;
1982     int ret = -1;
1983
1984     if(curr_vaddr < vma->vm_start) return -1;
1985
1986     do {
1987         int res = find_consecutive_physically_mapped_region(mm,
1988                                                      vma,
1989                                                      curr_vaddr,
1990                                                      &curr_vaddr_mapping_start,
1991                                                      &curr_paddr_mapping_start,
1992                                                      &curr_paddr_mapping_sz,
1993                                                      break_cow);
1994         if(0 == res) {
1995
1996             // this is a match, we can store off results and exit
1997             ret = 0;
1998             *vaddr_mapping_start = curr_vaddr_mapping_start;
1999             *paddr_mapping_start = curr_paddr_mapping_start;
2000             *paddr_mapping_sz    = curr_paddr_mapping_sz;
2001             break;
2002         }
2003
2004         curr_vaddr -= PAGE_SIZE;
2005     } while (curr_vaddr >= vma->vm_start);
2006
2007     return ret;
2008
2009 }
2010 /**
2011  * @brief Find the next physically consecutive region.  This is a region
2012  * that starts AFTER the specified vaddr.  The region must be contained
2013  * within the specified VMA.
2014  */
2015 int find_next_consecutive_physically_mapped_region(struct mm_struct* mm,
2016                                               struct vm_area_struct* vma,
2017                                               unsigned long vaddr,
2018                                               unsigned long* vaddr_mapping_start,
2019                                               unsigned long* paddr_mapping_start,
2020                                               size_t* paddr_mapping_sz,
2021                                               int break_cow) {
2022     unsigned long curr_vaddr_mapping_start;
2023     unsigned long curr_paddr_mapping_start;
2024     unsigned long curr_paddr_mapping_sz;
2025     unsigned long curr_vaddr = vaddr;
2026     int ret = -1;
2027
2028     if(curr_vaddr >= vma->vm_end) return -1;
2029
2030     do {
2031         int res = find_consecutive_physically_mapped_region(mm,
2032                                                      vma,
2033                                                      curr_vaddr,
2034                                                      &curr_vaddr_mapping_start,
2035                                                      &curr_paddr_mapping_start,
2036                                                      &curr_paddr_mapping_sz,
2037                                                      break_cow);
2038         if(0 == res) {
2039
2040             // this is a match, we can store off results and exit
2041             ret = 0;
2042             *vaddr_mapping_start = curr_vaddr_mapping_start;
2043             *paddr_mapping_start = curr_paddr_mapping_start;
2044             *paddr_mapping_sz    = curr_paddr_mapping_sz;
2045             break;
2046         }
2047
2048         curr_vaddr += PAGE_SIZE;
2049     } while (curr_vaddr < vma->vm_end);
2050
2051     return ret;
2052
2053 }
2054
2055 /**
2056  *  @brief Fill the array with as many physically consecutive regions
2057  *  as are present and will fit (specified by arr_sz).
2058  */
2059 int fill_physical_mapping_array(struct mm_struct* mm,
2060         struct vm_area_struct* vma,
2061         unsigned long address,
2062         contiguous_physical_mapping_t* mappings, 
2063         int arr_sz,
2064         int break_cow) {
2065     int i;
2066     unsigned long next_vaddr = address & PAGE_MASK;
2067     int ret = -1;
2068     unsigned long smallest_in_first_round = next_vaddr;
2069
2070     PSPRINTK("%s: entered\n",__func__);
2071
2072     for(i = 0; i < arr_sz; i++) 
2073         mappings[i].present = 0;
2074
2075     for(i = 0; i < arr_sz && next_vaddr < vma->vm_end; i++) {
2076         int valid_mapping = find_next_consecutive_physically_mapped_region(mm,
2077                                             vma,
2078                                             next_vaddr,
2079                                             &mappings[i].vaddr,
2080                                             &mappings[i].paddr,
2081                                             &mappings[i].sz,
2082                                             break_cow);
2083
2084
2085         if(valid_mapping == 0) {
2086             PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
2087             if(address >= mappings[i].vaddr && 
2088                     address < mappings[i].vaddr + mappings[i].sz)
2089                 ret = 0;
2090
2091             if(mappings[i].vaddr < smallest_in_first_round)
2092                 smallest_in_first_round = mappings[i].vaddr;
2093
2094             mappings[i].present = 1;
2095             next_vaddr = mappings[i].vaddr + mappings[i].sz;
2096
2097         } else {
2098             PSPRINTK("%s: up search ended in failure, resuming down search\n",
2099                     __func__);
2100             mappings[i].present = 0;
2101             mappings[i].vaddr = 0;
2102             mappings[i].paddr = 0;
2103             mappings[i].sz = 0;
2104             break;
2105         }
2106     }
2107
2108     // If we have room left, go in the opposite direction
2109     if(i <= arr_sz -1) {
2110         next_vaddr = smallest_in_first_round - PAGE_SIZE;
2111         for(;i < arr_sz && next_vaddr >= vma->vm_start; i++) {
2112             int valid_mapping = find_prev_consecutive_physically_mapped_region(mm,
2113                                             vma,
2114                                             next_vaddr,
2115                                             &mappings[i].vaddr,
2116                                             &mappings[i].paddr,
2117                                             &mappings[i].sz,
2118                                             break_cow);
2119             if(valid_mapping == 0) {
2120                 PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
2121                 mappings[i].present = 1;
2122                 next_vaddr = mappings[i].vaddr - PAGE_SIZE;
2123             } else {
2124                 mappings[i].present = 0;
2125                 mappings[i].vaddr = 0;
2126                 mappings[i].paddr = 0;
2127                 mappings[i].sz = 0;
2128                 break;
2129             }
2130         }
2131     }
2132
2133     // Trim any entries that extend beyond the boundaries of the vma
2134     for(i = 0; i < MAX_MAPPINGS; i++) {
2135         if(mappings[i].present) {
2136             if(mappings[i].vaddr < vma->vm_start) {
2137                 unsigned long sz_diff = vma->vm_start - mappings[i].vaddr;
2138                 PSPRINTK("Trimming mapping, since it starts too low in memory\n");
2139                 if(mappings[i].sz > sz_diff) {
2140                     mappings[i].sz -= sz_diff;
2141                     mappings[i].vaddr = vma->vm_start;
2142                 } else {
2143                     mappings[i].present = 0;
2144                     mappings[i].vaddr = 0;
2145                     mappings[i].paddr = 0;
2146                     mappings[i].sz = 0;
2147                 }
2148             }
2149
2150             if(mappings[i].vaddr + mappings[i].sz >= vma->vm_end) {
2151                 unsigned long sz_diff = mappings[i].vaddr + 
2152                                         mappings[i].sz - 
2153                                         vma->vm_end;
2154                 PSPRINTK("Trimming mapping, since it ends too high in memory\n");
2155                 if(mappings[i].sz > sz_diff) {
2156                     mappings[i].sz -= sz_diff;
2157                 } else {
2158                     mappings[i].present = 0;
2159                     mappings[i].vaddr = 0;
2160                     mappings[i].paddr = 0;
2161                     mappings[i].sz = 0;
2162                 }
2163             }
2164         }
2165     }
2166
2167     // Clear out what we just did
2168     if(ret == -1) {
2169         PSPRINTK("%s: zeroing out responses, due to an error\n",__func__);
2170         for(i = 0; i < arr_sz; i++)
2171             mappings[i].present = 0;
2172     }
2173
2174     PSPRINTK("%s: exiting\n",__func__);
2175
2176     return ret;
2177 }
2178
2179 /**
2180  * @brief Call remap_pfn_range on the parts of the specified virtual-physical
2181  * region that are not already mapped.
2182  * @precondition mm->mmap_sem must already be held by caller.
2183  */
2184 int remap_pfn_range_remaining(struct mm_struct* mm,
2185                                   struct vm_area_struct* vma,
2186                                   unsigned long vaddr_start,
2187                                   unsigned long paddr_start,
2188                                   size_t sz,
2189                                   pgprot_t prot,
2190                                   int make_writable) {
2191     unsigned long vaddr_curr;
2192     unsigned long paddr_curr = paddr_start;
2193     int ret = 0, val;
2194     int err;
2195
2196     PSPRINTK("%s: entered vaddr_start{%lx}, paddr_start{%lx}, sz{%x}\n",
2197             __func__,
2198             vaddr_start,
2199             paddr_start,
2200             sz);
2201
2202     for(vaddr_curr = vaddr_start; 
2203         vaddr_curr < vaddr_start + sz; 
2204         vaddr_curr += PAGE_SIZE) {
2205         //if( !(val = is_vaddr_mapped(mm,vaddr_curr)) ) {
2206         if(!is_vaddr_mapped(mm,vaddr_curr)) {
2207             //PSPRINTK("%s: mapping vaddr{%lx} paddr{%lx}\n",__func__,vaddr_curr,paddr_curr);
2208             // not mapped - map it
2209             err = remap_pfn_range(vma,
2210                                   vaddr_curr,
2211                                   paddr_curr >> PAGE_SHIFT,
2212                                   PAGE_SIZE,
2213                                   prot);
2214             if(err == 0) {
2215                 PSPRINTK("%s: succesfully mapped vaddr{%lx} to paddr{%lx}\n",
2216                             __func__,vaddr_curr,paddr_curr);
2217                 if(make_writable && vma->vm_flags & VM_WRITE) {
2218                     mk_page_writable(mm, vma, vaddr_curr);
2219                 }
2220             } else {
2221                 printk(KERN_ALERT"%s: ERROR mapping %lx to %lx with err{%d}\n",
2222                             __func__, vaddr_curr, paddr_curr, err);
2223             }
2224
2225             if( err != 0 ) ret = err;
2226         } else {
2227                 PSPRINTK("%s: is_vaddr_mapped %d, star:%lx end:%lx\n",
2228                         __func__, val, vma->vm_start, vma->vm_end);
2229         }
2230
2231         paddr_curr += PAGE_SIZE;
2232     }
2233
2234     PSPRINTK("%s: exiting\n",__func__);
2235
2236     return ret;
2237 }
2238
2239
2240 /**
2241  * @brief Map, but only in areas that do not currently have mappings.
2242  * This should extend vmas that ara adjacent as necessary.
2243  * NOTE: current->enable_do_mmap_pgoff_hook must be disabled
2244  *       by client code before calling this.
2245  * NOTE: mm->mmap_sem must already be held by client code.
2246  * NOTE: entries in the per-mm list of vm_area_structs are
2247  *       ordered by starting address.  This is helpful, because
2248  *       I can exit my check early sometimes.
2249  */
2250 #define FORCE_NODEBUG
2251 #ifndef FORCE_NODEBUG
2252 #define DBGPSPRINTK(...) { if (dbg ==1) printk(KERN_ALERT __VA_ARGS__); }
2253 #else
2254 #define DBGPSPRINTK(...) ;
2255 #endif
2256 unsigned long do_mmap_remaining(struct file *file, unsigned long addr,
2257                                 unsigned long len, unsigned long prot,
2258                                 unsigned long flags, unsigned long pgoff, int dbg) {
2259     unsigned long ret = addr;
2260     unsigned long start = addr;
2261     unsigned long local_end = start;
2262     unsigned long end = addr + len;
2263     struct vm_area_struct* curr;
2264     unsigned long error;
2265
2266     // go through ALL vma's, looking for interference with this space.
2267     curr = current->mm->mmap;
2268     DBGPSPRINTK("%s: processing {%lx,%lx}\n",__func__,addr,len);
2269
2270     while(1) {
2271
2272         if(start >= end) goto done;
2273
2274         // We've reached the end of the list
2275         else if(curr == NULL) {
2276             // map through the end
2277             DBGPSPRINTK("%s: curr == NULL - mapping {%lx,%lx}\n",
2278                     __func__,start,end-start);
2279             error=do_mmap(file, start, end - start, prot, flags, pgoff); 
2280             if (error != start)
2281                 printk(KERN_ALERT"%s_1: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
2282             goto done;
2283         }
2284
2285         // the VMA is fully above the region of interest
2286         else if(end <= curr->vm_start) {
2287                 // mmap through local_end
2288             DBGPSPRINTK("%s: VMA is fully above the region of interest - mapping {%lx,%lx}\n",
2289                     __func__,start,end-start);
2290             error=do_mmap(file, start, end - start, prot, flags, pgoff);
2291             if (error != start)
2292                 printk(KERN_ALERT"%s_2: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
2293             goto done;
2294         }
2295
2296         // the VMA fully encompases the region of interest
2297         else if(start >= curr->vm_start && end <= curr->vm_end) {
2298             // nothing to do
2299             DBGPSPRINTK("%s: VMA fully encompases the region of interest\n",__func__);
2300             goto done;
2301         }
2302
2303         // the VMA is fully below the region of interest
2304         else if(curr->vm_end <= start) {
2305             // move on to the next one
2306             DBGPSPRINTK("%s: VMA is fully below region of interest\n",__func__);
2307         }
2308
2309         // the VMA includes the start of the region of interest 
2310         // but not the end
2311         else if (start >= curr->vm_start && 
2312                  start < curr->vm_end &&
2313                  end > curr->vm_end) {
2314             // advance start (no mapping to do) 
2315             start = curr->vm_end;
2316             local_end = start;
2317             DBGPSPRINTK("%s: VMA includes start but not end\n",__func__);
2318         }
2319
2320         // the VMA includes the end of the region of interest
2321         // but not the start
2322         else if(start < curr->vm_start && 
2323                 end <= curr->vm_end &&
2324                 end > curr->vm_start) {
2325             local_end = curr->vm_start;
2326             
2327             // mmap through local_end
2328             DBGPSPRINTK("%s: VMA includes end but not start - mapping {%lx,%lx}\n",
2329                     __func__,start, local_end - start);
2330             error=do_mmap(file, start, local_end - start, prot, flags, pgoff);
2331             if (error != start)
2332                 printk(KERN_ALERT"%s_3: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
2333
2334             // Then we're done
2335             goto done;
2336         }
2337
2338         // the VMA is fully within the region of interest
2339         else if(start <= curr->vm_start && end >= curr->vm_end) {
2340             // advance local end
2341             local_end = curr->vm_start;
2342
2343             // map the difference
2344             DBGPSPRINTK("%s: VMS is fully within the region of interest - mapping {%lx,%lx}\n",
2345                     __func__,start, local_end - start);
2346             error=do_mmap(file, start, local_end - start, prot, flags, pgoff);
2347             if (error != start)
2348                 printk(KERN_ALERT"%s_4: ERROR %lx start: %lx end %lx\n", __func__, error, start, end);
2349
2350             // Then advance to the end of this vma
2351             start = curr->vm_end;
2352             local_end = start;
2353         }
2354
2355         curr = curr->vm_next;
2356
2357     }
2358
2359 done:
2360     
2361     DBGPSPRINTK("%s: exiting start:%lx\n",__func__, error);
2362     return ret;
2363 }
2364
2365 static void send_pte(unsigned long paddr_start,
2366         unsigned long vaddr_start, 
2367         size_t sz, 
2368         int dst,
2369         int vma_id,
2370         int clone_request_id) {
2371
2372     pte_transfer_t pte_xfer;
2373     pte_xfer.header.type = PCN_KMSG_TYPE_PROC_SRV_PTE_TRANSFER;
2374     pte_xfer.header.prio = PCN_KMSG_PRIO_NORMAL;
2375     pte_xfer.paddr_start = paddr_start;
2376     pte_xfer.vaddr_start = vaddr_start;
2377     pte_xfer.sz = sz;
2378     pte_xfer.clone_request_id = clone_request_id;
2379     pte_xfer.vma_id = vma_id;
2380     pcn_kmsg_send(dst, (struct pcn_kmsg_message *)&pte_xfer);
2381 }
2382
2383 static void send_vma(struct mm_struct* mm,
2384         struct vm_area_struct* vma, 
2385         int dst,
2386         int clone_request_id) {
2387     char lpath[256];
2388     char *plpath;
2389     vma_transfer_t* vma_xfer = kmalloc(sizeof(vma_transfer_t),GFP_KERNEL);
2390     vma_xfer->header.type = PCN_KMSG_TYPE_PROC_SRV_VMA_TRANSFER;  
2391     vma_xfer->header.prio = PCN_KMSG_PRIO_NORMAL;
2392     
2393     if(vma->vm_file == NULL) {
2394         vma_xfer->path[0] = '\0';
2395     } else {
2396         plpath = d_path(&vma->vm_file->f_path,
2397                 lpath,256);
2398         strcpy(vma_xfer->path,plpath);
2399     }
2400
2401     //
2402     // Transfer the vma
2403     //
2404     PS_SPIN_LOCK(&_vma_id_lock);
2405     vma_xfer->vma_id = _vma_id++;
2406     PS_SPIN_UNLOCK(&_vma_id_lock);
2407     vma_xfer->start = vma->vm_start;
2408     vma_xfer->end = vma->vm_end;
2409     vma_xfer->prot = vma->vm_page_prot;
2410     vma_xfer->clone_request_id = clone_request_id;
2411     vma_xfer->flags = vma->vm_flags;
2412     vma_xfer->pgoff = vma->vm_pgoff;
2413     pcn_kmsg_send_long(dst, 
2414                         (struct pcn_kmsg_long_message*)vma_xfer, 
2415                         sizeof(vma_transfer_t) - sizeof(vma_xfer->header));
2416
2417     // Send all physical information too
2418     {
2419     unsigned long curr = vma->vm_start;
2420     unsigned long vaddr_resolved = -1;
2421     unsigned long paddr_resolved = -1;
2422     size_t sz_resolved = 0;
2423     
2424     while(curr < vma->vm_end) {
2425         if(-1 == find_next_consecutive_physically_mapped_region(mm,
2426                     vma,
2427                     curr,
2428                     &vaddr_resolved,
2429                     &paddr_resolved,
2430                     &sz_resolved,
2431                     0)) {
2432             // None more, exit
2433             break;
2434         } else {
2435             // send the pte
2436             send_pte(paddr_resolved,
2437                      vaddr_resolved,
2438                      sz_resolved,
2439                      dst,
2440                      vma_xfer->vma_id,
2441                      vma_xfer->clone_request_id
2442                      );
2443
2444             // move to the next
2445             curr = vaddr_resolved + sz_resolved;
2446         }
2447     }
2448
2449     }
2450
2451
2452     kfree(vma_xfer);
2453 }
2454
2455 /**
2456  * @brief Display a mapping request data entry.
2457  */
2458 static void dump_mapping_request_data(mapping_request_data_t* data) {
2459     int i;
2460     PSPRINTK("mapping request data dump:\n");
2461     PSPRINTK("address{%lx}, vaddr_start{%lx}, vaddr_sz{%lx}\n",
2462                     data->address, data->vaddr_start, data->vaddr_size);
2463     for(i = 0; i < MAX_MAPPINGS; i++) {
2464         PSPRINTK("mapping %d - vaddr{%lx}, paddr{%lx}, sz{%lx}\n",
2465                 i,data->mappings[i].vaddr,data->mappings[i].paddr,data->mappings[i].sz);
2466     }
2467     PSPRINTK("present{%d}, complete{%d}, from_saved_mm{%d}\n",
2468             data->present, data->complete, data->from_saved_mm);
2469     PSPRINTK("responses{%d}, expected_responses{%d}\n",
2470             data->responses, data->expected_responses);
2471 }
2472
2473 /**
2474  * @brief Display relevant task information.
2475  */
2476 void dump_task(struct task_struct* task, struct pt_regs* regs, unsigned long stack_ptr) {
2477 #if PROCESS_SERVER_VERBOSE
2478     if (!task) return;
2479
2480     PSPRINTK("DUMP TASK\n");
2481     PSPRINTK("PID: %d\n",task->pid);
2482     PSPRINTK("State: %lx\n",task->state);
2483     PSPRINTK("Flags: %x\n",task->flags);
2484     PSPRINTK("Prio{%d},Static_Prio{%d},Normal_Prio{%d}\n",
2485             task->prio,task->static_prio,task->normal_prio);
2486     PSPRINTK("Represents_remote{%d}\n",task->represents_remote);
2487     PSPRINTK("Executing_for_remote{%d}\n",task->executing_for_remote);
2488     PSPRINTK("prev_pid{%d}\n",task->prev_pid);
2489     PSPRINTK("next_pid{%d}\n",task->next_pid);
2490     PSPRINTK("prev_cpu{%d}\n",task->prev_cpu);
2491     PSPRINTK("next_cpu{%d}\n",task->next_cpu);
2492     PSPRINTK("Clone_request_id{%d}\n",task->clone_request_id);
2493     dump_regs(regs);
2494     dump_thread(&task->thread);
2495     //dump_mm(task->mm);
2496     dump_stk(&task->thread,stack_ptr);
2497     PSPRINTK("TASK DUMP COMPLETE\n");
2498 #endif
2499 }
2500
2501 /**
2502  * @brief Display a task's stack information.
2503  */
2504 static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr) {
2505     if(!thread) return;
2506     PSPRINTK("DUMP STACK\n");
2507     if(thread->sp) {
2508         PSPRINTK("sp = %lx\n",thread->sp);
2509     }
2510     if(thread->usersp) {
2511         PSPRINTK("usersp = %lx\n",thread->usersp);
2512     }
2513     if(stack_ptr) {
2514         PSPRINTK("stack_ptr = %lx\n",stack_ptr);
2515     }
2516     PSPRINTK("STACK DUMP COMPLETE\n");
2517 }
2518
2519 /**
2520  * @brief Display a tasks register contents.
2521  */
2522 static void dump_regs(struct pt_regs* regs) {
2523     unsigned long fs, gs;
2524     PSPRINTK("DUMP REGS\n");
2525     if(NULL != regs) {
2526         PSPRINTK("r15{%lx}\n",regs->r15);   
2527         PSPRINTK("r14{%lx}\n",regs->r14);
2528         PSPRINTK("r13{%lx}\n",regs->r13);
2529         PSPRINTK("r12{%lx}\n",regs->r12);
2530         PSPRINTK("r11{%lx}\n",regs->r11);
2531         PSPRINTK("r10{%lx}\n",regs->r10);
2532         PSPRINTK("r9{%lx}\n",regs->r9);
2533         PSPRINTK("r8{%lx}\n",regs->r8);
2534         PSPRINTK("bp{%lx}\n",regs->bp);
2535         PSPRINTK("bx{%lx}\n",regs->bx);
2536         PSPRINTK("ax{%lx}\n",regs->ax);
2537         PSPRINTK("cx{%lx}\n",regs->cx);
2538         PSPRINTK("dx{%lx}\n",regs->dx);
2539         PSPRINTK("di{%lx}\n",regs->di);
2540         PSPRINTK("orig_ax{%lx}\n",regs->orig_ax);
2541         PSPRINTK("ip{%lx}\n",regs->ip);
2542         PSPRINTK("cs{%lx}\n",regs->cs);
2543         PSPRINTK("flags{%lx}\n",regs->flags);
2544         PSPRINTK("sp{%lx}\n",regs->sp);
2545         PSPRINTK("ss{%lx}\n",regs->ss);
2546     }
2547     rdmsrl(MSR_FS_BASE, fs);
2548     rdmsrl(MSR_GS_BASE, gs);
2549     PSPRINTK("fs{%lx}\n",fs);
2550     PSPRINTK("gs{%lx}\n",gs);
2551     PSPRINTK("REGS DUMP COMPLETE\n");
2552 }
2553
2554 /**
2555  * @brief Display a tasks thread information.
2556  */
2557 static void dump_thread(struct thread_struct* thread) {
2558     PSPRINTK("DUMP THREAD\n");
2559     PSPRINTK("sp0{%lx}, sp{%lx}\n",thread->sp0,thread->sp);
2560     PSPRINTK("usersp{%lx}\n",thread->usersp);
2561     PSPRINTK("es{%x}\n",thread->es);
2562     PSPRINTK("ds{%x}\n",thread->ds);
2563     PSPRINTK("fsindex{%x}\n",thread->fsindex);
2564     PSPRINTK("gsindex{%x}\n",thread->gsindex);
2565     PSPRINTK("gs{%lx}\n",thread->gs);
2566     PSPRINTK("THREAD DUMP COMPLETE\n");
2567 }
2568
2569 /**
2570  * @brief Display a pte_data_t data structure.
2571  */
2572 static void dump_pte_data(pte_data_t* p) {
2573     PSPRINTK("PTE_DATA\n");
2574     PSPRINTK("vma_id{%x}\n",p->vma_id);
2575     PSPRINTK("clone_request_id{%x}\n",p->clone_request_id);
2576     PSPRINTK("cpu{%x}\n",p->cpu);
2577     PSPRINTK("vaddr_start{%lx}\n",p->vaddr_start);
2578     PSPRINTK("paddr_start{%lx}\n",p->paddr_start);
2579     PSPRINTK("sz{%d}\n",p->sz);
2580 }
2581
2582 /**
2583  * @brief Display a vma_data_t data structure.
2584  */
2585 static void dump_vma_data(vma_data_t* v) {
2586     pte_data_t* p;
2587     PSPRINTK("VMA_DATA\n");
2588     PSPRINTK("start{%lx}\n",v->start);
2589     PSPRINTK("end{%lx}\n",v->end);
2590     PSPRINTK("clone_request_id{%x}\n",v->clone_request_id);
2591     PSPRINTK("cpu{%x}\n",v->cpu);
2592     PSPRINTK("flags{%lx}\n",v->flags);
2593     PSPRINTK("vma_id{%x}\n",v->vma_id);
2594     PSPRINTK("path{%s}\n",v->path);
2595
2596     p = v->pte_list;
2597     while(p) {
2598         dump_pte_data(p);
2599         p = (pte_data_t*)p->header.next;
2600     }
2601 }
2602
2603 /**
2604  * @brief Display a clone_data_t.
2605  */
2606 static void dump_clone_data(clone_data_t* r) {
2607     vma_data_t* v;
2608     PSPRINTK("CLONE REQUEST\n");
2609     PSPRINTK("clone_request_id{%x}\n",r->clone_request_id);
2610     PSPRINTK("clone_flags{%lx}\n",r->clone_flags);
2611     PSPRINTK("stack_start{%lx}\n",r->stack_start);
2612     PSPRINTK("stack_ptr{%lx}\n",r->stack_ptr);
2613     PSPRINTK("env_start{%lx}\n",r->env_start);
2614     PSPRINTK("env_end{%lx}\n",r->env_end);
2615     PSPRINTK("arg_start{%lx}\n",r->arg_start);
2616     PSPRINTK("arg_end{%lx}\n",r->arg_end);
2617     PSPRINTK("heap_start{%lx}\n",r->heap_start);
2618     PSPRINTK("heap_end{%lx}\n",r->heap_end);
2619     PSPRINTK("data_start{%lx}\n",r->data_start);
2620     PSPRINTK("data_end{%lx}\n",r->data_end);
2621     dump_regs(&r->regs);
2622     PSPRINTK("placeholder_pid{%x}\n",r->placeholder_pid);
2623     PSPRINTK("placeholder_tgid{%x}\n",r->placeholder_tgid);
2624     PSPRINTK("thread_fs{%lx}\n",r->thread_fs);
2625     PSPRINTK("thread_gs{%lx}\n",r->thread_gs);
2626     PSPRINTK("thread_sp0{%lx}\n",r->thread_sp0);
2627     PSPRINTK("thread_sp{%lx}\n",r->thread_sp);
2628     PSPRINTK("thread_usersp{%lx}\n",r->thread_usersp);
2629
2630     v = r->vma_list;
2631     while(v) {
2632         dump_vma_data(v);
2633         v = (vma_data_t*)v->header.next;
2634     }
2635 }
2636
2637 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
2638 /**
2639  * @brief Finds a stats_query data entry.
2640  * @return Either a stats entry or NULL if one is not found
2641  * that satisfies the parameter requirements.
2642  */
2643 static stats_query_data_t* find_stats_query_data(pid_t pid) {
2644     data_header_t* curr = NULL;
2645     stats_query_data_t* query = NULL;
2646     stats_query_data_t* ret = NULL;
2647     PS_SPIN_LOCK(&_data_head_lock);
2648     
2649     curr = _data_head;
2650     while(curr) {
2651         if(curr->data_type == PROCESS_SERVER_STATS_DATA_TYPE) {
2652             query = (stats_query_data_t*)curr;
2653             if(query->pid == pid) {
2654                 ret = query;
2655                 break;
2656             }
2657         }
2658         curr = curr->next;
2659     }
2660
2661     PS_SPIN_UNLOCK(&_data_head_lock);
2662
2663     return ret;
2664 }
2665 #endif
2666
2667 /**
2668  * Queue lock must already be held.
2669  */
2670 static void add_fault_entry_to_queue(lamport_barrier_entry_t* entry,
2671                                      lamport_barrier_queue_t* queue)
2672 {
2673     lamport_barrier_entry_t* curr = queue->queue;
2674     lamport_barrier_entry_t* last = NULL;
2675
2676     entry->header.next = NULL;
2677     entry->header.prev = NULL;
2678
2679     // Take care of the "empty" scenario first because it's easy.
2680     if(!queue->queue) {
2681         queue->queue = entry;
2682         return;
2683     }
2684
2685     // Next take care of the scenario where we have to replace
2686     // the first entry
2687     if(queue->queue->timestamp > entry->timestamp) {
2688         queue->queue->header.prev = (data_header_t*)entry;
2689         entry->header.next = (data_header_t*)queue->queue;
2690         queue->queue = entry;
2691         return;
2692     }
2693
2694     // Now we have to iterate, but we know that we don't
2695     // have to change the value of queue->queue.
2696     while(curr) {
2697         if(curr->timestamp > entry->timestamp) {
2698             curr->header.prev->next = (data_header_t*)entry;
2699             entry->header.prev = curr->header.prev;
2700             curr->header.prev = (data_header_t*)entry;
2701             entry->header.next = (data_header_t*)curr;
2702             return;
2703         }
2704         last = curr;
2705         curr = (lamport_barrier_entry_t*)curr->header.next;
2706     }
2707
2708     // It must be the last entry then
2709     if(last) {
2710         last->header.next = (data_header_t*)entry;
2711         entry->header.prev = (data_header_t*)last;
2712     }
2713
2714 }
2715
2716
2717 /**
2718  * @brief Find a fault barrier data entry.
2719  * @return Either a data entry, or NULL if one does 
2720  * not exist that satisfies the parameter requirements.
2721  */
2722 static lamport_barrier_queue_t* find_lamport_barrier_queue(int tgroup_home_cpu, 
2723         int tgroup_home_id, unsigned long address) {
2724
2725     data_header_t* curr = NULL;
2726     lamport_barrier_queue_t* entry = NULL;
2727     lamport_barrier_queue_t* ret = NULL;
2728
2729     curr = (data_header_t*)_lamport_barrier_queue_head;
2730     while(curr) {
2731         entry = (lamport_barrier_queue_t*)curr;
2732         if(entry->tgroup_home_cpu == tgroup_home_cpu &&
2733            entry->tgroup_home_id == tgroup_home_id &&
2734            entry->address == address) {
2735             ret = entry;
2736             break;
2737         }
2738         curr = curr->next;
2739     }
2740
2741     return ret;
2742 }
2743
2744 static lamport_barrier_entry_t* find_lamport_barrier_entry(int cpu,
2745         int tgroup_home_cpu,
2746         int tgroup_home_id, 
2747         unsigned long address)
2748 {
2749     lamport_barrier_queue_t* queue = find_lamport_barrier_queue(
2750                                         tgroup_home_cpu,
2751                                         tgroup_home_id,
2752                                         address);
2753     if(!queue) {
2754         goto exit;
2755     }
2756
2757     lamport_barrier_entry_t* curr = NULL;
2758     lamport_barrier_entry_t* ret = NULL;
2759     curr = queue->queue;
2760     while(curr) {
2761         if(curr->cpu == cpu) {
2762             ret = curr;
2763             goto exit;
2764         }
2765         curr = curr->header.next;
2766     }
2767 exit:
2768     return ret;
2769 }
2770
2771 /**
2772  * @brief Find a thread count data entry.
2773  * @return Either a thread count request data entry, or NULL if one does 
2774  * not exist that satisfies the parameter requirements.
2775  */
2776 static remote_thread_count_request_data_t* find_remote_thread_count_data(int cpu, 
2777         int id, int requester_pid) {
2778
2779     data_header_t* curr = NULL;
2780     remote_thread_count_request_data_t* request = NULL;
2781     remote_thread_count_request_data_t* ret = NULL;
2782     unsigned long lockflags;
2783
2784     spin_lock_irqsave(&_count_remote_tmembers_data_head_lock,lockflags);
2785
2786     curr = _count_remote_tmembers_data_head;
2787     while(curr) {
2788         request = (remote_thread_count_request_data_t*)curr;
2789         if(request->tgroup_home_cpu == cpu &&
2790            request->tgroup_home_id == id &&
2791            request->requester_pid == requester_pid) {
2792             ret = request;
2793             break;
2794         }
2795         curr = curr->next;
2796     }
2797
2798     spin_unlock_irqrestore(&_count_remote_tmembers_data_head_lock,lockflags);
2799
2800     return ret;
2801 }
2802
2803 /**
2804  * @brief Finds a munmap request data entry.
2805  * @return Either a munmap request data entry, or NULL if one is not
2806  * found that satisfies the parameter requirements.
2807  */
2808 static munmap_request_data_t* find_munmap_request_data(int cpu, int id, 
2809         int requester_pid, unsigned long address) {
2810
2811     data_header_t* curr = NULL;
2812     munmap_request_data_t* request = NULL;
2813     munmap_request_data_t* ret = NULL;
2814     PS_SPIN_LOCK(&_munmap_data_head_lock);
2815     
2816     curr = _munmap_data_head;
2817     while(curr) {
2818         request = (munmap_request_data_t*)curr;
2819         if(request->tgroup_home_cpu == cpu && 
2820                 request->tgroup_home_id == id &&
2821                 request->requester_pid == requester_pid &&
2822                 request->vaddr_start == address) {
2823             ret = request;
2824             break;
2825         }
2826         curr = curr->next;
2827     }
2828
2829     PS_SPIN_UNLOCK(&_munmap_data_head_lock);
2830
2831     return ret;
2832
2833 }
2834
2835 /**
2836  * @brief Finds an mprotect request data entry.
2837  * @return Either a mprotect request data entry, or NULL if one is
2838  * not found that satisfies the parameter requirements.
2839  */
2840 static mprotect_data_t* find_mprotect_request_data(int cpu, int id, 
2841         int requester_pid, unsigned long start) {
2842
2843     data_header_t* curr = NULL;
2844     mprotect_data_t* request = NULL;
2845     mprotect_data_t* ret = NULL;
2846     PS_SPIN_LOCK(&_mprotect_data_head_lock);
2847     
2848     curr = _mprotect_data_head;
2849     while(curr) {
2850         request = (mprotect_data_t*)curr;
2851         if(request->tgroup_home_cpu == cpu && 
2852                 request->tgroup_home_id == id &&
2853                 request->requester_pid == requester_pid &&
2854                 request->start == start) {
2855             ret = request;
2856             break;
2857         }
2858         curr = curr->next;
2859     }
2860
2861     PS_SPIN_UNLOCK(&_mprotect_data_head_lock);
2862
2863     return ret;
2864
2865 }
2866
2867 /**
2868  * @brief Finds a mapping request data entry.
2869  * @return Either a mapping request data entry, or NULL if an entry
2870  * is not found that satisfies the parameter requirements.
2871  */
2872 static mapping_request_data_t* find_mapping_request_data(int cpu, int id, 
2873         int pid, unsigned long address) {
2874
2875     data_header_t* curr = NULL;
2876     mapping_request_data_t* request = NULL;
2877     mapping_request_data_t* ret = NULL;
2878     
2879     curr = _mapping_request_data_head;
2880     while(curr) {
2881         request = (mapping_request_data_t*)curr;
2882         if(request->tgroup_home_cpu == cpu && 
2883                 request->tgroup_home_id == id &&
2884                 request->requester_pid == pid &&
2885                 request->address == address) {
2886             ret = request;
2887             break;
2888         }
2889         curr = curr->next;
2890     }
2891
2892
2893     return ret;
2894 }
2895
2896 /**
2897  * @brief Finds a clone data entry.
2898  * @return Either a clone entry or NULL if one is not found
2899  * that satisfies the parameter requirements.
2900  */
2901 static clone_data_t* find_clone_data(int cpu, int clone_request_id) {
2902     data_header_t* curr = NULL;
2903     clone_data_t* clone = NULL;
2904     clone_data_t* ret = NULL;
2905     PS_SPIN_LOCK(&_data_head_lock);
2906     
2907     curr = _data_head;
2908     while(curr) {
2909         if(curr->data_type == PROCESS_SERVER_CLONE_DATA_TYPE) {
2910             clone = (clone_data_t*)curr;
2911             if(clone->placeholder_cpu == cpu && clone->clone_request_id == clone_request_id) {
2912                 ret = clone;
2913                 break;
2914             }
2915         }
2916         curr = curr->next;
2917     }
2918
2919     PS_SPIN_UNLOCK(&_data_head_lock);
2920
2921     return ret;
2922 }
2923
2924 /**
2925  * @brief Destroys the specified clone data.  It also destroys lists
2926  * that are nested within it.
2927  */
2928 static void destroy_clone_data(clone_data_t* data) {
2929     vma_data_t* vma_data;
2930     pte_data_t* pte_data;
2931     vma_data = data->vma_list;
2932     while(vma_data) {
2933         
2934         // Destroy this VMA's PTE's
2935         pte_data = vma_data->pte_list;
2936         while(pte_data) {
2937
2938             // Remove pte from list
2939             vma_data->pte_list = (pte_data_t*)pte_data->header.next;
2940             if(vma_data->pte_list) {
2941                 vma_data->pte_list->header.prev = NULL;
2942             }
2943
2944             // Destroy pte
2945             kfree(pte_data);
2946
2947             // Next is the new list head
2948             pte_data = vma_data->pte_list;
2949         }
2950         
2951         // Remove vma from list
2952         data->vma_list = (vma_data_t*)vma_data->header.next;
2953         if(data->vma_list) {
2954             data->vma_list->header.prev = NULL;
2955         }
2956
2957         // Destroy vma
2958         kfree(vma_data);
2959
2960         // Next is the new list head
2961         vma_data = data->vma_list;
2962     }
2963
2964     // Destroy clone data
2965     kfree(data);
2966 }
2967
2968 /**
2969  * @brief Finds a vma_data_t entry.
2970  */
2971 static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_start) {
2972
2973     vma_data_t* curr = clone_data->vma_list;
2974     vma_data_t* ret = NULL;
2975
2976     while(curr) {
2977         
2978         if(curr->start == addr_start) {
2979             ret = curr;
2980             break;
2981         }
2982
2983         curr = (vma_data_t*)curr->header.next;
2984     }
2985
2986     return ret;
2987 }
2988
2989 /**
2990  * @brief Callback for page walk that displays the contents of the walk.
2991  */
2992 static int dump_page_walk_pte_entry_callback(pte_t *pte, unsigned long start, 
2993         unsigned long end, struct mm_walk *walk) {
2994
2995     int nx;
2996     int rw;
2997     int user;
2998     int pwt;
2999     int pcd;
3000     int accessed;
3001     int dirty;
3002
3003     if(NULL == pte || !pte_present(*pte)) {                                                                                                                             
3004         return 0;
3005     }
3006
3007     nx       = pte_flags(*pte) & _PAGE_NX       ? 1 : 0;
3008     rw       = pte_flags(*pte) & _PAGE_RW       ? 1 : 0;
3009     user     = pte_flags(*pte) & _PAGE_USER     ? 1 : 0;
3010     pwt      = pte_flags(*pte) & _PAGE_PWT      ? 1 : 0;
3011     pcd      = pte_flags(*pte) & _PAGE_PCD      ? 1 : 0;
3012     accessed = pte_flags(*pte) & _PAGE_ACCESSED ? 1 : 0;
3013     dirty    = pte_flags(*pte) & _PAGE_DIRTY    ? 1 : 0;
3014
3015     PSPRINTK("pte_entry start{%lx}, end{%lx}, phy{%lx}\n",
3016             start,
3017             end,
3018             (unsigned long)(pte_val(*pte) & PHYSICAL_PAGE_MASK) | (start & (PAGE_SIZE-1)));
3019
3020     PSPRINTK("\tnx{%d}, ",nx);
3021     PSPRINTK("rw{%d}, ",rw);
3022     PSPRINTK("user{%d}, ",user);
3023     PSPRINTK("pwt{%d}, ",pwt);
3024     PSPRINTK("pcd{%d}, ",pcd);
3025     PSPRINTK("accessed{%d}, ",accessed);
3026     PSPRINTK("dirty{%d}\n",dirty);
3027
3028     return 0;
3029 }
3030
3031 /**
3032  * @brief Displays relevant data within a mm.
3033  */
3034 static void dump_mm(struct mm_struct* mm) {
3035     struct vm_area_struct * curr;
3036     struct mm_walk walk = {
3037         .pte_entry = dump_page_walk_pte_entry_callback,
3038         .mm = mm,
3039         .private = NULL
3040         };
3041     char buf[256];
3042
3043     if(NULL == mm) {
3044         PSPRINTK("MM IS NULL!\n");
3045         return;
3046     }
3047
3048     PS_DOWN_READ(&mm->mmap_sem);
3049
3050     curr = mm->mmap;
3051
3052     PSPRINTK("MM DUMP\n");
3053     PSPRINTK("Stack Growth{%lx}\n",mm->stack_vm);
3054     PSPRINTK("Code{%lx - %lx}\n",mm->start_code,mm->end_code);
3055     PSPRINTK("Brk{%lx - %lx}\n",mm->start_brk,mm->brk);
3056     PSPRINTK("Stack{%lx}\n",mm->start_stack);
3057     PSPRINTK("Arg{%lx - %lx}\n",mm->arg_start,mm->arg_end);
3058     PSPRINTK("Env{%lx - %lx}\n",mm->env_start,mm->env_end);
3059
3060     while(curr) {
3061         if(!curr->vm_file) {
3062             PSPRINTK("Anonymous VM Entry: start{%lx}, end{%lx}, pgoff{%lx}, flags{%lx}\n",
3063                     curr->vm_start, 
3064                     curr->vm_end,
3065                     curr->vm_pgoff,
3066                     curr->vm_flags);
3067             // walk    
3068             walk_page_range(curr->vm_start,curr->vm_end,&walk);
3069         } else {
3070             PSPRINTK("Page VM Entry: start{%lx}, end{%lx}, pgoff{%lx}, path{%s}, flags{%lx}\n",
3071                     curr->vm_start,
3072                     curr->vm_end,
3073                     curr->vm_pgoff,
3074                     d_path(&curr->vm_file->f_path,buf, 256),
3075                     curr->vm_flags);
3076             walk_page_range(curr->vm_start,curr->vm_end,&walk);
3077         }
3078         curr = curr->vm_next;
3079     }
3080
3081     PS_UP_READ(&mm->mmap_sem);
3082 }
3083
3084 /**
3085  * Data library
3086  */
3087
3088 /**
3089  * @brief Add data entry.
3090  */
3091 static void add_data_entry_to(void* entry, spinlock_t* lock, data_header_t** head) {
3092     data_header_t* hdr = (data_header_t*)entry;
3093     data_header_t* curr = NULL;
3094
3095     if(!entry) {
3096         return;
3097     }
3098
3099     // Always clear out the link information
3100     hdr->next = NULL;
3101     hdr->prev = NULL;
3102
3103     if(lock)PS_SPIN_LOCK(lock);
3104     
3105     if (!*head) {
3106         *head = hdr;
3107         hdr->next = NULL;
3108         hdr->prev = NULL;
3109     } else {
3110         curr = *head;
3111         while(curr->next != NULL) {
3112             if(curr == entry) {
3113                 return;// It's already in the list!
3114             }
3115             curr = curr->next;
3116         }
3117         // Now curr should be the last entry.
3118         // Append the new entry to curr.
3119         curr->next = hdr;
3120         hdr->next = NULL;
3121         hdr->prev = curr;
3122     }
3123
3124     if(lock)PS_SPIN_UNLOCK(lock);
3125 }
3126
3127 /**
3128  * @brief Remove a data entry
3129  * @prerequisite Requires user to hold lock
3130  */
3131 static void remove_data_entry_from(void* entry, data_header_t** head) {
3132     data_header_t* hdr = entry;
3133
3134     if(!entry) {
3135         return;
3136     }
3137
3138     if(*head == hdr) {
3139         *head = hdr->next;
3140     }
3141
3142     if(hdr->next) {
3143         hdr->next->prev = hdr->prev;
3144     }
3145
3146     if(hdr->prev) {
3147         hdr->prev->next = hdr->next;
3148     }
3149
3150     hdr->prev = NULL;
3151     hdr->next = NULL;
3152
3153 }
3154
3155 /**
3156  * @brief Add data entry
3157  */
3158 static void add_data_entry(void* entry) {
3159     data_header_t* hdr = (data_header_t*)entry;
3160     data_header_t* curr = NULL;
3161     unsigned long lockflags;
3162
3163     if(!entry) {
3164         return;
3165     }
3166
3167     // Always clear out the link information
3168     hdr->next = NULL;
3169     hdr->prev = NULL;
3170
3171     spin_lock_irqsave(&_data_head_lock,lockflags);
3172     
3173     if (!_data_head) {
3174         _data_head = hdr;
3175         hdr->next = NULL;
3176         hdr->prev = NULL;
3177     } else {
3178         curr = _data_head;
3179         while(curr->next != NULL) {
3180             if(curr == entry) {
3181                 return;// It's already in the list!
3182             }
3183             curr = curr->next;
3184         }
3185         // Now curr should be the last entry.
3186         // Append the new entry to curr.
3187         curr->next = hdr;
3188         hdr->next = NULL;
3189         hdr->prev = curr;
3190     }
3191
3192     spin_unlock_irqrestore(&_data_head_lock,lockflags);
3193 }
3194
3195 /**
3196  * @brief Remove a data entry.
3197  * @prerequisite Requires user to hold _data_head_lock.
3198  */
3199 static void remove_data_entry(void* entry) {
3200     data_header_t* hdr = entry;
3201
3202     if(!entry) {
3203         return;
3204     }
3205
3206     if(_data_head == hdr) {
3207         _data_head = hdr->next;
3208     }
3209
3210     if(hdr->next) {
3211         hdr->next->prev = hdr->prev;
3212     }
3213
3214     if(hdr->prev) {
3215         hdr->prev->next = hdr->next;
3216     }
3217
3218     hdr->prev = NULL;
3219     hdr->next = NULL;
3220
3221 }
3222
3223 /**
3224  * @brief Print information about the list.
3225  */
3226 static void dump_data_list(void) {
3227     data_header_t* curr = NULL;
3228     pte_data_t* pte_data = NULL;
3229     vma_data_t* vma_data = NULL;
3230     clone_data_t* clone_data = NULL;
3231
3232     PS_SPIN_LOCK(&_data_head_lock);
3233
3234     curr = _data_head;
3235
3236     PSPRINTK("DATA LIST:\n");
3237     while(curr) {
3238         switch(curr->data_type) {
3239         case PROCESS_SERVER_VMA_DATA_TYPE:
3240             vma_data = (vma_data_t*)curr;
3241             PSPRINTK("VMA DATA: start{%lx}, end{%lx}, crid{%d}, vmaid{%d}, cpu{%d}, pgoff{%lx}\n",
3242                     vma_data->start,
3243                     vma_data->end,
3244                     vma_data->clone_request_id,
3245                     vma_data->vma_id, 
3246                     vma_data->cpu, 
3247                     vma_data->pgoff);
3248             break;
3249         case PROCESS_SERVER_PTE_DATA_TYPE:
3250             pte_data = (pte_data_t*)curr;
3251             PSPRINTK("PTE DATA: vaddr_start{%lx}, paddr_start{%lx}, sz{%d}, vmaid{%d}, cpu{%d}\n",
3252                     pte_data->vaddr_start,
3253                     pte_data->paddr_start,
3254                     pte_data->sz,
3255                     pte_data->vma_id,
3256                     pte_data->cpu);
3257             break;
3258         case PROCESS_SERVER_CLONE_DATA_TYPE:
3259             clone_data = (clone_data_t*)curr;
3260             PSPRINTK("CLONE DATA: flags{%lx}, stack_start{%lx}, heap_start{%lx}, heap_end{%lx}, ip{%lx}, crid{%d}\n",
3261                     clone_data->clone_flags,
3262                     clone_data->stack_start,
3263                     clone_data->heap_start,
3264                     clone_data->heap_end,
3265                     clone_data->regs.ip,
3266                     clone_data->clone_request_id);
3267             break;
3268         default:
3269             break;
3270         }
3271         curr = curr->next;
3272     }
3273
3274     PS_SPIN_UNLOCK(&_data_head_lock);
3275 }
3276
3277 /**
3278  * @brief Counts remote thread group members.
3279  * @return The number of remote thread group members in the
3280  * specified distributed thread group.
3281  * <MEASURE perf_count_remote_thread_members>
3282  */
3283 static int count_remote_thread_members(int exclude_t_home_cpu,
3284                                        int exclude_t_home_id) {
3285
3286     int tgroup_home_cpu = current->tgroup_home_cpu;
3287     int tgroup_home_id  = current->tgroup_home_id;
3288     remote_thread_count_request_data_t* data;
3289     remote_thread_count_request_t request;
3290     int i;
3291     int s;
3292     int ret = -1;
3293     int perf = -1;
3294     unsigned long lockflags;
3295 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3296     unsigned long long end_time;
3297     unsigned long long total_time;
3298     unsigned long long start_time = native_read_tsc();
3299 #endif
3300
3301     perf = PERF_MEASURE_START(&perf_count_remote_thread_members);
3302
3303     PSPRINTK("%s: entered\n",__func__);
3304
3305     data = kmalloc(sizeof(remote_thread_count_request_data_t),GFP_KERNEL);
3306     if(!data) goto exit;
3307
3308     data->header.data_type = PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE;
3309     data->responses = 0;
3310     data->expected_responses = 0;
3311     data->tgroup_home_cpu = tgroup_home_cpu;
3312     data->tgroup_home_id = tgroup_home_id;
3313     data->requester_pid = current->pid;
3314     data->count = 0;
3315     spin_lock_init(&data->lock);
3316
3317     add_data_entry_to(data,
3318                       &_count_remote_tmembers_data_head_lock,
3319                       &_count_remote_tmembers_data_head);
3320
3321     request.header.type = PCN_KMSG_TYPE_PROC_SRV_THREAD_COUNT_REQUEST;
3322     request.header.prio = PCN_KMSG_PRIO_NORMAL;
3323     request.tgroup_home_cpu = current->tgroup_home_cpu; //TODO why not tgroup_home_cpu?!?!
3324     request.tgroup_home_id  = current->tgroup_home_id; //TODO why not tgroup_home_id?!?!
3325     request.requester_pid = data->requester_pid;
3326
3327 #ifndef SUPPORT_FOR_CLUSTERING
3328     for(i = 0; i < NR_CPUS; i++) {
3329         // Skip the current cpu
3330         if(i == _cpu) continue;
3331 #else
3332     // the list does not include the current processor group descirptor (TODO)
3333     struct list_head *iter;
3334     _remote_cpu_info_list_t *objPtr;
3335     extern struct list_head rlist_head;
3336     list_for_each(iter, &rlist_head) {
3337         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
3338         i = objPtr->_data._processor;
3339 #endif
3340         // Send the request to this cpu.
3341         s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&request));
3342         if(!s) {
3343             // A successful send operation, increase the number
3344             // of expected responses.
3345             data->expected_responses++;
3346         }
3347     }
3348
3349     PSPRINTK("%s: waiting on %d responses\n",__func__,data->expected_responses);
3350
3351     // Wait for all cpus to respond.
3352     while(data->expected_responses != data->responses) {
3353         schedule();
3354     }
3355
3356     // OK, all responses are in, we can proceed.
3357     ret = data->count;
3358
3359     PSPRINTK("%s: found a total of %d remote threads in group\n",__func__,
3360             data->count);
3361
3362     spin_lock_irqsave(&_count_remote_tmembers_data_head_lock,lockflags);
3363     remove_data_entry_from(data,
3364                            &_count_remote_tmembers_data_head);
3365     spin_unlock_irqrestore(&_count_remote_tmembers_data_head_lock,lockflags);
3366
3367     kfree(data);
3368
3369 exit:
3370
3371 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3372     end_time = native_read_tsc();
3373     total_time = end_time - start_time;
3374     PS_PROC_DATA_TRACK(PS_PROC_DATA_COUNT_REMOTE_THREADS_PROCESSING_TIME,total_time);
3375 #endif
3376
3377     PERF_MEASURE_STOP(&perf_count_remote_thread_members," ",perf);
3378     return ret;
3379 }
3380
3381 /**
3382  * @brief Counts the number of local thread group members for the specified
3383  * distributed thread group.
3384  */
3385 static int count_local_thread_members(int tgroup_home_cpu, 
3386         int tgroup_home_id, int exclude_pid) {
3387
3388     struct task_struct *task, *g;
3389     int count = 0;
3390     PSPRINTK("%s: entered\n",__func__);
3391     read_lock(&tasklist_lock);
3392     do_each_thread(g,task) {
3393         if(task->tgroup_home_id == tgroup_home_id &&
3394            task->tgroup_home_cpu == tgroup_home_cpu &&
3395            task->t_home_cpu == _cpu &&
3396            task->pid != exclude_pid &&
3397            task->exit_state != EXIT_ZOMBIE &&
3398            task->exit_state != EXIT_DEAD &&
3399            !(task->flags & PF_EXITING)) {
3400
3401                 count++;
3402             
3403         }
3404     } while_each_thread(g,task);
3405     read_unlock(&tasklist_lock);
3406     PSPRINTK("%s: exited\n",__func__);
3407
3408     return count;
3409
3410 }
3411
3412 /**
3413  * @brief Counts the number of local and remote thread group members for the
3414  * thread group in which the "current" task resides.
3415  * @return The number of threads.
3416  */
3417 static int count_thread_members() {
3418      
3419     int count = 0;
3420     PSPRINTK("%s: entered\n",__func__);
3421     count += count_local_thread_members(current->tgroup_home_cpu, current->tgroup_home_id,current->pid);
3422     count += count_remote_thread_members(current->tgroup_home_cpu, current->tgroup_home_id);
3423     PSPRINTK("%s: exited\n",__func__);
3424     return count;
3425 }
3426
3427
3428 /*
3429  * @brief Process notification of a thread group closing.
3430  * This function will wait for any locally executing thread group
3431  * members to exit.  It will then clean up all local resources
3432  * dedicated to the thread group that has exited.
3433  *
3434  * <MEASURE perf_process_tgroup_closed_item>
3435  */
3436
3437 void process_tgroup_closed_item(struct work_struct* work) {
3438
3439     tgroup_closed_work_t* w = (tgroup_closed_work_t*) work;
3440     data_header_t *curr;
3441     mm_data_t* mm_data = NULL;
3442     struct task_struct *g, *task;
3443     unsigned char tgroup_closed = 0;
3444     int perf = -1;
3445     mm_data_t* to_remove = NULL;
3446
3447     perf = PERF_MEASURE_START(&perf_process_tgroup_closed_item);
3448
3449     PSPRINTK("%s: entered\n",__func__);
3450     PSPRINTK("%s: received group exit notification\n",__func__);
3451
3452     PSPRINTK("%s: waiting for all members of this distributed thread group to finish\n",__func__);
3453     while(!tgroup_closed) {
3454         unsigned char pass = 0;
3455         read_lock(&tasklist_lock);
3456         do_each_thread(g,task) {
3457             if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
3458                task->tgroup_home_id  == w->tgroup_home_id) {
3459                 // there are still living tasks within this distributed thread group
3460                 // wait a bit
3461                 pass = 1;
3462                 goto pass_complete;
3463             }
3464         } while_each_thread(g,task);
3465 pass_complete:
3466         read_unlock(&tasklist_lock);
3467         if(!pass) {
3468             tgroup_closed = 1;
3469         } else {
3470             PSPRINTK("%s: waiting for tgroup close out\n",__func__);
3471             schedule();
3472         }
3473     }
3474
3475 loop:
3476     spin_lock(&_saved_mm_head_lock);
3477     // Remove all saved mm's for this thread group.
3478     curr = _saved_mm_head;
3479     while(curr) {
3480         mm_data = (mm_data_t*)curr;
3481         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
3482            mm_data->tgroup_home_id  == w->tgroup_home_id) {
3483             remove_data_entry_from(curr,&_saved_mm_head);
3484             to_remove = mm_data;
3485             goto found;
3486         }
3487         curr = curr->next;
3488     }
3489 found:
3490     spin_unlock(&_saved_mm_head_lock);
3491
3492     if(to_remove != NULL) {
3493         PSPRINTK("%s: removing a mm from cpu{%d} id{%d}\n",
3494                 __func__,
3495                 w->tgroup_home_cpu,
3496                 w->tgroup_home_id);
3497         
3498         BUG_ON(to_remove->mm == NULL);
3499         mmput(to_remove->mm);
3500         kfree(to_remove);
3501         to_remove = NULL;
3502         goto loop;
3503     }
3504
3505     kfree(work);
3506
3507     PERF_MEASURE_STOP(&perf_process_tgroup_closed_item," ",perf);
3508 }
3509
3510
3511 /**
3512  * @brief Process a request made by a remote CPU for a mapping.  This function
3513  * will search for mm's for the specified distributed thread group, and if found,
3514  * will search that mm for entries that contain the address that was asked for.
3515  * Prefetch is implemented in this function, so not only will the page that
3516  * is asked for be communicated, but the entire contiguous range of virtual to
3517  * physical addresses that the specified address lives in will be communicated.
3518  * Other contiguous regions may also be communicated if they exist.  This is
3519  * prefetch.
3520  *
3521  * <MEASURED perf_process_mapping_request>
3522  */
3523 void process_mapping_request(struct work_struct* work) {
3524     mapping_request_work_t* w = (mapping_request_work_t*) work;
3525     mapping_response_t response;
3526     data_header_t* data_curr = NULL;
3527     mm_data_t* mm_data = NULL;
3528     struct task_struct* task = NULL;
3529     struct task_struct* g;
3530     struct vm_area_struct* vma = NULL;
3531     struct mm_struct* mm = NULL;
3532     unsigned long address = w->address;
3533     unsigned long resolved = 0;
3534     struct mm_walk walk = {
3535         .pte_entry = vm_search_page_walk_pte_entry_callback,
3536         .private = &(resolved)
3537     };
3538     char* plpath = NULL;
3539     char lpath[512];
3540     int i;
3541     
3542     // for perf
3543     int used_saved_mm = 0;
3544     int found_vma = 1;
3545     int found_pte = 1;
3546 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3547     unsigned long long mapping_response_send_time_start = 0;
3548     unsigned long long mapping_response_send_time_end = 0;
3549     unsigned long long mapping_request_processing_time_start = native_read_tsc();
3550     unsigned long long mapping_request_processing_time_end = 0;
3551 #endif
3552     
3553     // Perf start
3554     int perf = PERF_MEASURE_START(&perf_process_mapping_request);
3555
3556     current->enable_distributed_munmap = 0;
3557     current->enable_do_mmap_pgoff_hook = 0;
3558
3559     //PSPRINTK("%s: entered\n",__func__);
3560     PSPRINTK("received mapping request from {%d} address{%lx}, cpu{%d}, id{%d}\n",
3561             w->from_cpu,
3562             w->address,
3563             w->tgroup_home_cpu,
3564             w->tgroup_home_id);
3565
3566     // First, search through existing processes
3567     read_lock(&tasklist_lock);
3568     do_each_thread(g,task) {
3569         if((task->tgroup_home_cpu == w->tgroup_home_cpu) &&
3570            (task->tgroup_home_id  == w->tgroup_home_id )) {
3571             //PSPRINTK("mapping request found common thread group here\n");
3572             mm = task->mm;
3573
3574             // Take note of the fact that an mm exists on the remote kernel
3575             set_cpu_has_known_tgroup_mm(task, w->from_cpu);
3576
3577             goto task_mm_search_exit;
3578         }
3579     } while_each_thread(g,task);
3580 task_mm_search_exit:
3581     read_unlock(&tasklist_lock);
3582
3583     // Failing the process search, look through saved mm's.
3584     if(!mm) {
3585         PS_SPIN_LOCK(&_saved_mm_head_lock);
3586         data_curr = _saved_mm_head;
3587         while(data_curr) {
3588
3589             mm_data = (mm_data_t*)data_curr;
3590             
3591             if((mm_data->tgroup_home_cpu == w->tgroup_home_cpu) &&
3592                (mm_data->tgroup_home_id  == w->tgroup_home_id)) {
3593                 PSPRINTK("%s: Using saved mm to resolve mapping\n",__func__);
3594                 mm = mm_data->mm;
3595                 used_saved_mm = 1;
3596                 break;
3597             }
3598
3599             data_curr = data_curr->next;
3600
3601         } // while
3602
3603         PS_SPIN_UNLOCK(&_saved_mm_head_lock);
3604     }
3605     
3606     // OK, if mm was found, look up the mapping.
3607     if(mm) {
3608
3609         // The purpose of this code block is to determine
3610         // if we need to use a read or write lock, and safely.  
3611         // implement whatever lock type we decided we needed.  We
3612         // prefer to use read locks, since then we can service
3613         // more than one mapping request at the same time.  However,
3614         // if we are going to do any cow break operations, we 
3615         // must lock for write.
3616         int can_be_cow = 0;
3617         int first = 1;
3618 changed_can_be_cow:
3619         if(can_be_cow)
3620             PS_DOWN_WRITE(&mm->mmap_sem);
3621         else 
3622             PS_DOWN_READ(&mm->mmap_sem);
3623         vma = find_vma_checked(mm, address);
3624         if(vma && first) {
3625             first = 0;
3626             if(is_maybe_cow(vma)) {
3627                 can_be_cow = 1;
3628                 PS_UP_READ(&mm->mmap_sem);
3629                 goto changed_can_be_cow;
3630             }
3631         }
3632
3633         walk.mm = mm;
3634         walk_page_range(address & PAGE_MASK, 
3635                 (address & PAGE_MASK) + PAGE_SIZE, &walk);
3636
3637         if(vma && resolved != 0) {
3638
3639             PSPRINTK("mapping found! %lx for vaddr %lx\n",resolved,
3640                     address & PAGE_MASK);
3641
3642             /*
3643              * Find regions of consecutive physical memory
3644              * in this vma, including the faulting address
3645              * if possible.
3646              */
3647             {
3648
3649             // Now grab all the mappings that we can stuff into the response.
3650             if(0 != fill_physical_mapping_array(mm, 
3651                                                 vma,
3652                                                 address,
3653                                                 &response.mappings, 
3654                                                 MAX_MAPPINGS,
3655                                                 can_be_cow)) {
3656                 // If the fill process fails, clear out all
3657                 // results.  Otherwise, we might trick the
3658                 // receiving cpu into thinking the target
3659                 // mapping was found when it was not.
3660                 for(i = 0; i < MAX_MAPPINGS; i++) {
3661                     response.mappings[i].present = 0;
3662                     response.mappings[i].vaddr = 0;
3663                     response.mappings[i].paddr = 0;
3664                     response.mappings[i].sz = 0;
3665                 }
3666                     
3667             }
3668
3669             if(can_be_cow) {
3670                 downgrade_write(&mm->mmap_sem);
3671             }
3672
3673             }
3674
3675             response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
3676             response.header.prio = PCN_KMSG_PRIO_NORMAL;
3677             response.tgroup_home_cpu = w->tgroup_home_cpu;
3678             response.tgroup_home_id = w->tgroup_home_id;
3679             response.requester_pid = w->requester_pid;
3680             response.address = address;
3681             response.present = 1;
3682             response.vaddr_start = vma->vm_start;
3683             response.vaddr_size = vma->vm_end - vma->vm_start;
3684             response.prot = vma->vm_page_prot;
3685             response.vm_flags = vma->vm_flags;
3686             if(vma->vm_file == NULL || !w->need_vma) {
3687                 response.path[0] = '\0';
3688             } else {    
3689          
3690                 plpath = d_path(&vma->vm_file->f_path,lpath,512);
3691                 strcpy(response.path,plpath);
3692                 response.pgoff = vma->vm_pgoff;
3693             }
3694
3695             // We modified this lock to be read-mode above so now
3696             // we can do a read-unlock instead of a write-unlock
3697             PS_UP_READ(&mm->mmap_sem);
3698        
3699         } else {
3700
3701             if(can_be_cow)
3702                 PS_UP_WRITE(&mm->mmap_sem);
3703             else
3704                 PS_UP_READ(&mm->mmap_sem);
3705             // Zero out mappings
3706             for(i = 0; i < MAX_MAPPINGS; i++) {
3707                 response.mappings[i].present = 0;
3708                 response.mappings[i].vaddr = 0;
3709                 response.mappings[i].paddr = 0;
3710                 response.mappings[i].sz = 0;
3711             }
3712
3713         }
3714         
3715
3716     }
3717
3718     // Not found, respond accordingly
3719     if(resolved == 0) {
3720         found_vma = 0;
3721         found_pte = 0;
3722         //PSPRINTK("Mapping not found\n");
3723         response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
3724         response.header.prio = PCN_KMSG_PRIO_NORMAL;
3725         response.tgroup_home_cpu = w->tgroup_home_cpu;
3726         response.tgroup_home_id = w->tgroup_home_id;
3727         response.requester_pid = w->requester_pid;
3728         response.address = address;
3729         response.present = 0;
3730         response.vaddr_start = 0;
3731         response.vaddr_size = 0;
3732         response.path[0] = '\0';
3733
3734         // Handle case where vma was present but no pte.
3735         // Optimization, if no pte, and it is specified not to
3736         // send the path, we can instead report that the mapping
3737         // was not found at all.  This will result in sending a 
3738         // nonpresent_mapping_response_t, which is much smaller
3739         // than a mapping_response_t.
3740         if(vma && w->need_vma) {
3741             //PSPRINTK("But vma present\n");
3742             found_vma = 1;
3743             response.present = 1;
3744             response.vaddr_start = vma->vm_start;
3745             response.vaddr_size = vma->vm_end - vma->vm_start;
3746             response.prot = vma->vm_page_prot;
3747             response.vm_flags = vma->vm_flags;
3748              if(vma->vm_file == NULL || !w->need_vma) {
3749                  response.path[0] = '\0';
3750              } else {    
3751                  plpath = d_path(&vma->vm_file->f_path,lpath,512);
3752                  strcpy(response.path,plpath);
3753                  response.pgoff = vma->vm_pgoff;
3754              }
3755         }
3756     }
3757
3758     // Send response
3759     if(response.present) {
3760 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3761         mapping_response_send_time_start = native_read_tsc();
3762         response.send_time = mapping_response_send_time_start;
3763 #endif
3764         DO_UNTIL_SUCCESS(pcn_kmsg_send_long(w->from_cpu,
3765                             (struct pcn_kmsg_long_message*)(&response),
3766                             sizeof(mapping_response_t) - 
3767                             sizeof(struct pcn_kmsg_hdr) -   //
3768                             sizeof(response.path) +         // Chop off the end of the path
3769                             strlen(response.path) + 1));    // variable to save bandwidth.
3770 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3771         mapping_response_send_time_end = native_read_tsc();
3772 #endif
3773     } else {
3774         // This is an optimization to get rid of the _long send 
3775         // which is a time sink.
3776         nonpresent_mapping_response_t nonpresent_response;
3777         nonpresent_response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE_NONPRESENT;
3778         nonpresent_response.header.prio = PCN_KMSG_PRIO_NORMAL;
3779         nonpresent_response.tgroup_home_cpu = w->tgroup_home_cpu;
3780         nonpresent_response.tgroup_home_id  = w->tgroup_home_id;
3781         nonpresent_response.requester_pid = w->requester_pid;
3782         nonpresent_response.address = w->address;
3783 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3784         mapping_response_send_time_start = native_read_tsc();
3785         nonpresent_response.send_time = mapping_response_send_time_start;
3786 #endif
3787
3788         DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,(struct pcn_kmsg_message*)(&nonpresent_response)));
3789
3790 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3791         mapping_response_send_time_end = native_read_tsc();
3792 #endif
3793
3794     }
3795     
3796     // proc
3797 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3798     PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_RESPONSE_SEND_TIME,
3799             mapping_response_send_time_end - mapping_response_send_time_start);
3800 #endif
3801
3802     kfree(work);
3803
3804     // Perf stop
3805     if(used_saved_mm && found_vma && found_pte) {
3806         PERF_MEASURE_STOP(&perf_process_mapping_request,
3807                 "Saved MM + VMA + PTE",
3808                 perf);
3809     } else if (used_saved_mm && found_vma && !found_pte) {
3810         PERF_MEASURE_STOP(&perf_process_mapping_request,
3811                 "Saved MM + VMA + no PTE",
3812                 perf);
3813     } else if (used_saved_mm && !found_vma) {
3814         PERF_MEASURE_STOP(&perf_process_mapping_request,
3815                 "Saved MM + no VMA",
3816                 perf);
3817     } else if (!used_saved_mm && found_vma && found_pte) {
3818         PERF_MEASURE_STOP(&perf_process_mapping_request,
3819                 "VMA + PTE",
3820                 perf);
3821     } else if (!used_saved_mm && found_vma && !found_pte) {
3822         PERF_MEASURE_STOP(&perf_process_mapping_request,
3823                 "VMA + no PTE",
3824                 perf);
3825     } else if (!used_saved_mm && !found_vma) {
3826         PERF_MEASURE_STOP(&perf_process_mapping_request,
3827                 "no VMA",
3828                 perf);
3829     } else {
3830         PERF_MEASURE_STOP(&perf_process_mapping_request,"ERR",perf);
3831     }
3832
3833     current->enable_distributed_munmap = 1;
3834     current->enable_do_mmap_pgoff_hook = 1;
3835
3836 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3837     {
3838     unsigned long long mapping_request_processing_time;
3839     mapping_request_processing_time_end = native_read_tsc();
3840     mapping_request_processing_time = mapping_request_processing_time_end - 
3841                                         mapping_request_processing_time_start;
3842     PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_REQUEST_PROCESSING_TIME,
3843             mapping_request_processing_time);
3844     }
3845 #endif
3846
3847     return;
3848 }
3849
3850 unsigned long long perf_aa, perf_bb, perf_cc, perf_dd, perf_ee;
3851
3852 /**
3853  * @brief Process notification that a task has exited.  This function
3854  * sets the "return disposition" of the task, then wakes the task.
3855  * In this case, the "return disposition" specifies that the task
3856  * is exiting.  When the task resumes execution, it consults its
3857  * return disposition and acts accordingly - and invokes do_exit.
3858  *
3859  * <MEASURE perf_process_exit_item>
3860  */
3861 void process_exit_item(struct work_struct* work) {
3862     exit_work_t* w = (exit_work_t*) work;
3863     pid_t pid = w->pid;
3864     struct task_struct *task = w->task;
3865
3866     int perf = PERF_MEASURE_START(&perf_process_exit_item);
3867 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3868     unsigned long long end_time;
3869     unsigned long long total_time;
3870     unsigned long long start_time = native_read_tsc();
3871 #endif
3872
3873     if(unlikely(!task)) {
3874         printk("%s: ERROR - empty task\n",__func__);
3875         kfree(work);
3876         PERF_MEASURE_STOP(&perf_process_exit_item,"ERROR",perf);
3877         return;
3878     }
3879
3880     if(unlikely(task->pid != pid)) {
3881         printk("%s: ERROR - wrong task picked\n",__func__);
3882         kfree(work);
3883         PERF_MEASURE_STOP(&perf_process_exit_item,"ERROR",perf);
3884         return;
3885     }
3886     
3887     PSPRINTK("%s: process to kill %ld\n", __func__, (long)pid);
3888     PSPRINTK("%s: for_each_process Found task to kill, killing\n", __func__);
3889     PSPRINTK("%s: killing task - is_last_tgroup_member{%d}\n",
3890             __func__,
3891             w->is_last_tgroup_member);
3892
3893     // Now we're executing locally, so update our records
3894     //if(task->t_home_cpu == _cpu && task->t_home_id == task->pid)
3895     //    task->represents_remote = 0;
3896
3897     // Set the return disposition
3898     task->return_disposition = RETURN_DISPOSITION_EXIT;
3899
3900     wake_up_process(task);
3901
3902     kfree(work);
3903
3904     PERF_MEASURE_STOP(&perf_process_exit_item," ",perf);
3905
3906 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3907     end_time = native_read_tsc();
3908     total_time = end_time - start_time;
3909     PS_PROC_DATA_TRACK(PS_PROC_DATA_EXIT_NOTIFICATION_PROCESSING_TIME,total_time);
3910 #endif
3911 }
3912
3913 /**
3914  * @brief Process a group exit request.  This function
3915  * issues SIGKILL to all locally executing members of the specified
3916  * distributed thread group.  Only tasks that are actively
3917  * executing on this CPU will receive the SIGKILL.  Shadow tasks
3918  * will not be sent SIGKILL.  Group exit requests are sent to
3919  * all CPUs, so for shadow tasks, another CPU will issue the
3920  * SIGKILL.  When that occurs, the normal exit process will be
3921  * initiated for that task, and eventually, all of its shadow
3922  * tasks will be killed.
3923  */
3924 void process_group_exit_item(struct work_struct* work) {
3925     group_exit_work_t* w = (group_exit_work_t*) work;
3926     struct task_struct *task = NULL;
3927     struct task_struct *g;
3928 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3929     unsigned long long end_time;
3930     unsigned long long total_time;
3931     unsigned long long start_time = native_read_tsc();
3932 #endif
3933
3934     //int perf = PERF_MEASURE_START(&perf_process_group_exit_item);
3935     PSPRINTK("%s: entered\n",__func__);
3936     PSPRINTK("exit group target id{%d}, cpu{%d}\n",
3937             w->tgroup_home_id, w->tgroup_home_cpu);
3938
3939     do_each_thread(g,task) {
3940         if(task->tgroup_home_id == w->tgroup_home_id &&
3941            task->tgroup_home_cpu == w->tgroup_home_cpu) {
3942             
3943             if(!task->represents_remote) {
3944                 // active, send sigkill
3945                 PSPRINTK("Issuing SIGKILL to pid %d\n",task->pid);
3946                 kill_pid(task_pid(task), SIGKILL, 1);
3947             }
3948
3949             // If it is a shadow task, it will eventually
3950             // get killed when its corresponding active task
3951             // is killed.
3952
3953         }
3954     } while_each_thread(g,task);
3955     
3956     kfree(work);
3957
3958     PSPRINTK("%s: exiting\n",__func__);
3959     //PERF_MEASURE_STOP(&perf_process_group_exit_item," ",perf);
3960
3961 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3962     end_time = native_read_tsc();
3963     total_time = end_time - start_time;
3964     PS_PROC_DATA_TRACK(PS_PROC_DATA_GROUP_EXIT_NOTIFICATION_PROCESSING_TIME,total_time);
3965 #endif
3966
3967 }
3968
3969
3970 /**
3971  * @brief Process request to unmap a region of memory from a distributed
3972  * thread group.  Look for local thread group members and carry out the
3973  * requested action.
3974  *
3975  * <MEASURE perf_process_munmap_request>
3976  */
3977 void process_munmap_request(struct work_struct* work) {
3978     munmap_request_work_t* w = (munmap_request_work_t*)work;
3979     munmap_response_t response;
3980     struct task_struct *task, *g;
3981     data_header_t *curr = NULL;
3982     mm_data_t* mm_data = NULL;
3983     mm_data_t* to_munmap = NULL;
3984     struct mm_struct* mm_to_munmap = NULL;
3985 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
3986     unsigned long long end_time;
3987     unsigned long long total_time;
3988     unsigned long long start_time = native_read_tsc();
3989 #endif
3990     int perf = PERF_MEASURE_START(&perf_process_munmap_request);
3991
3992     PSPRINTK("%s: entered\n",__func__);
3993
3994     current->enable_distributed_munmap = 0;
3995     current->enable_do_mmap_pgoff_hook = 0;
3996
3997     // munmap the specified region in the specified thread group
3998     read_lock(&tasklist_lock);
3999     do_each_thread(g,task) {
4000
4001         // Look for the thread group
4002         if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
4003            task->tgroup_home_id  == w->tgroup_home_id &&
4004            !(task->flags & PF_EXITING)) {
4005
4006             // Take note of the fact that an mm exists on the remote kernel
4007             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
4008             
4009             if (task->mm) {
4010                 mm_to_munmap = task->mm;
4011             }
4012             else
4013                 printk("%s: pirla\n", __func__);
4014
4015             goto done; 
4016         }
4017     } while_each_thread(g,task);
4018 done:
4019     read_unlock(&tasklist_lock);
4020
4021     if(mm_to_munmap) {
4022         PS_DOWN_WRITE(&mm_to_munmap->mmap_sem);
4023         do_munmap(mm_to_munmap, w->vaddr_start, w->vaddr_size);
4024         PS_UP_WRITE(&mm_to_munmap->mmap_sem);
4025     }
4026
4027
4028     // munmap the specified region in any saved mm's as well.
4029     // This keeps old mappings saved in the mm of dead thread
4030     // group members from being resolved accidentally after
4031     // being munmap()ped, as that would cause security/coherency
4032     // problems.
4033     PS_SPIN_LOCK(&_saved_mm_head_lock);
4034     curr = _saved_mm_head;
4035     while(curr) {
4036         mm_data = (mm_data_t*)curr;
4037         if(mm_data->tgroup_home_cpu == w->tgroup_home_cpu &&
4038            mm_data->tgroup_home_id  == w->tgroup_home_id) {
4039            
4040             to_munmap = mm_data;
4041             goto found;
4042
4043         }
4044         curr = curr->next;
4045     }
4046 found:
4047     PS_SPIN_UNLOCK(&_saved_mm_head_lock);
4048
4049     if (to_munmap && to_munmap->mm) {
4050         PS_DOWN_WRITE(&to_munmap->mm->mmap_sem);
4051         do_munmap(to_munmap->mm, w->vaddr_start, w->vaddr_size);
4052         if (to_munmap && to_munmap->mm)
4053             PS_UP_WRITE(&to_munmap->mm->mmap_sem);
4054         else
4055             printk(KERN_ALERT"%s: ERROR2: to_munmap %p mm %p\n", __func__, to_munmap, to_munmap?to_munmap->mm:0);
4056     }
4057     else if (to_munmap) // It is OK for to_munmap to be null, but not to_munmap->mm
4058         printk(KERN_ALERT"%s: ERROR1: to_munmap %p mm %p\n", __func__, to_munmap, to_munmap?to_munmap->mm:0);
4059
4060     // Construct response
4061     response.header.type = PCN_KMSG_TYPE_PROC_SRV_MUNMAP_RESPONSE;
4062     response.header.prio = PCN_KMSG_PRIO_NORMAL;
4063     response.tgroup_home_cpu = w->tgroup_home_cpu;
4064     response.tgroup_home_id = w->tgroup_home_id;
4065     response.requester_pid = w->requester_pid;
4066     response.vaddr_start = w->vaddr_start;
4067     response.vaddr_size = w->vaddr_size;
4068     
4069     // Send response
4070     DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,
4071                         (struct pcn_kmsg_message*)(&response)));
4072
4073     current->enable_distributed_munmap = 1;
4074     current->enable_do_mmap_pgoff_hook = 1;
4075     
4076     kfree(work);
4077     
4078 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
4079     end_time = native_read_tsc();
4080     total_time = end_time - start_time;
4081     PS_PROC_DATA_TRACK(PS_PROC_DATA_MUNMAP_REQUEST_PROCESSING_TIME,total_time);
4082 #endif
4083
4084     PERF_MEASURE_STOP(&perf_process_munmap_request," ",perf);
4085 }
4086
4087 /**
4088  * @brief Process request to change protection of a region of memory in
4089  * a distributed thread group.  Look for local thread group members and
4090  * carry out the requested action.
4091  *
4092  * <MEASRURE perf_process_mprotect_item>
4093  */
4094 void process_mprotect_item(struct work_struct* work) {
4095     mprotect_response_t response;
4096     mprotect_work_t* w = (mprotect_work_t*)work;
4097     int tgroup_home_cpu = w->tgroup_home_cpu;
4098     int tgroup_home_id  = w->tgroup_home_id;
4099     unsigned long start = w->start;
4100     size_t len = w->len;
4101     unsigned long prot = w->prot;
4102     struct task_struct* task, *g;
4103     data_header_t* curr = NULL;
4104     mm_data_t* mm_data = NULL;
4105     mm_data_t* to_munmap = NULL;
4106     struct mm_struct *mm_to_munmap = NULL;
4107
4108     int perf = PERF_MEASURE_START(&perf_process_mprotect_item);
4109
4110 #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
4111     unsigned long long end_time;
4112     unsigned long long total_time;
4113     unsigned long long start_time = native_read_tsc();
4114 #endif
4115    
4116     current->enable_distributed_munmap = 0;
4117     current->enable_do_mmap_pgoff_hook = 0;
4118
4119     // Find the task
4120     read_lock(&tasklist_lock);
4121     do_each_thread(g,task) {
4122
4123         // Look for the thread group
4124         if (task->tgroup_home_cpu == tgroup_home_cpu &&
4125             task->tgroup_home_id  == tgroup_home_id &&
4126             !(task-&g