Preparing to commit with clustering version
authorAkshay Giridhar <akshay87@vt.edu>
Tue, 24 Jun 2014 18:51:20 +0000 (14:51 -0400)
committerAkshay Giridhar <akshay87@vt.edu>
Tue, 24 Jun 2014 18:51:20 +0000 (14:51 -0400)
19 files changed:
arch/x86/include/asm/futex.h
arch/x86/kernel/entry_64.S
arch/x86/kernel/process_64.c
arch/x86/mm/fault.c
include/linux/kmod.h
include/linux/pcn_kmsg.h
include/linux/process_server.h
include/linux/syscalls.h
include/popcorn/global_spinlock.h
kernel/exit.c
kernel/futex.c
kernel/futex_remote.c
kernel/futex_remote.h
kernel/global_spinlock.c
kernel/kmod.c
kernel/process_server.c
mm/mmap.c
mm/mprotect.c
mm/mremap.c

index d09bb03..c9b290b 100644 (file)
@@ -43,7 +43,8 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
        int cmp = (encoded_op >> 24) & 15;
        int oparg = (encoded_op << 8) >> 20;
        int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret, tem;
+       int oldval = 0, ret = 0, tem;
+
 
        if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
                oparg = 1 << oparg;
index 8c4f6b8..dd1f182 100644 (file)
@@ -56,7 +56,6 @@
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 
-
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
 #define AUDIT_ARCH_X86_64      (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
@@ -1206,22 +1205,6 @@ ENTRY(kernel_execve)
        CFI_ENDPROC
 END(kernel_execve)
 
-ENTRY(kernel_import_task)
-       CFI_STARTPROC
-       FAKE_STACK_FRAME $0
-       SAVE_ALL
-       movq %rsp,%rcx
-       call sys_process_server_import_task
-       movq %rax, RAX(%rsp)
-       RESTORE_REST
-       testq %rax,%rax
-       je int_ret_from_sys_call
-       RESTORE_ARGS
-       UNFAKE_STACK_FRAME
-       ret
-       CFI_ENDPROC
-END(kernel_import_task)
-
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
        CFI_STARTPROC
index e993649..55f5087 100644 (file)
@@ -367,52 +367,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
                            __USER_CS, __USER_DS, 0);
 }
 
-static bool __user_addr (unsigned long x ) {
-    return (x < PAGE_OFFSET);
-}
-void start_remote_thread(struct pt_regs *regs){
-    unsigned int fsindex, gsindex;
-    unsigned short es,ds;
-
-    savesegment(fs, fsindex);
-    if ( !(current->thread.fs) || !(__user_addr(current->thread.fs)) ) {
-        printk(KERN_ERR "%s: ERROR corrupted fs base address %lu\n", __func__, current->thread.fs);
-    }
-
-    if (unlikely(fsindex | current->thread.fsindex))
-        loadsegment(fs, current->thread.fsindex);
-    else
-        loadsegment(fs, 0);
-    if (current->thread.fs)
-        checking_wrmsrl(MSR_FS_BASE, current->thread.fs);
-
-    savesegment(gs, gsindex); //read the gs register in gsindex variable
-    if ( !(current->thread.gs) && !(__user_addr(current->thread.gs)) ) {
-        printk(KERN_ERR "%s: ERROR corrupted gs base address %lu\n", __func__, current->thread.gs);
-    }
-
-    if (unlikely(gsindex | current->thread.gsindex))
-        load_gs_index(current->thread.gsindex);
-    else
-        load_gs_index(0);
-    if (current->thread.gs)
-        checking_wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
-
-    savesegment(es, es);
-    if (unlikely(es | current->thread.es))
-        loadsegment(es, current->thread.es);
-
-    savesegment(ds, ds);
-    if (unlikely(ds | current->thread.ds))
-        loadsegment(ds, current->thread.ds);
-
-    percpu_write(old_rsp, current->thread.usersp);
-
-    regs->sp = current->thread.usersp;
-    free_thread_xstate(current);
-
-}
-
 #ifdef CONFIG_IA32_EMULATION
 void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
 {
index 97fff65..e0aa941 100644 (file)
@@ -1091,23 +1091,18 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
        }
 
     vma = find_vma(mm, address);
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-    process_server_acquire_heavy_lock();
-#else
-    process_server_acquire_page_lock(address);
-#endif
        if (unlikely(!vma)) {
         // Multikernel - see if another member of the thread group has mapped
         // this vma
-        if(process_server_pull_remote_mappings(mm,NULL,address,flags,&vma,error_code)) {
-            goto ret;
+        if(process_server_try_handle_mm_fault(mm,NULL,address,flags,&vma,error_code)) {
+            return;
         }
                if(!vma) {
             bad_area(regs, error_code, address);
-                   goto ret;
+                   return;
         }
-       } else if(process_server_pull_remote_mappings(mm,vma,address,flags,&vma,error_code)) {
-        goto ret;
+       } else if(process_server_try_handle_mm_fault(mm,vma,address,flags,&vma,error_code)) {
+        return;
     }
 
        /*
@@ -1130,7 +1125,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
                if ((error_code & PF_USER) == 0 &&
                    !search_exception_tables(regs->ip)) {
                        bad_area_nosemaphore(regs, error_code, address);
-                       goto ret;
+                       return;
                }
 retry:
                down_read(&mm->mmap_sem);
@@ -1143,11 +1138,26 @@ retry:
                might_sleep();
        }
 
+       /*vma = find_vma(mm, address);
+       if (unlikely(!vma)) {
+        // Multikernel - see if another member of the thread group has mapped
+        // this vma
+        if(process_server_try_handle_mm_fault(mm,NULL,address,flags,&vma)) {
+            return;
+        }
+               if(!vma) {
+            bad_area(regs, error_code, address);
+                   return;
+        }
+       } else if(process_server_try_handle_mm_fault(mm,vma,address,flags,&vma)) {
+        return;
+    }*/
+
        if (likely(vma->vm_start <= address))
                goto good_area;
        if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
                bad_area(regs, error_code, address);
-               goto ret;
+               return;
        }
        if (error_code & PF_USER) {
                /*
@@ -1158,12 +1168,12 @@ retry:
                 */
                if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
                        bad_area(regs, error_code, address);
-                       goto ret;
+                       return;
                }
        }
        if (unlikely(expand_stack(vma, address))) {
                bad_area(regs, error_code, address);
-               goto ret;
+               return;
        }
 
        /*
@@ -1173,7 +1183,7 @@ retry:
 good_area:
        if (unlikely(access_error(error_code, vma))) {
                bad_area_access_error(regs, error_code, address);
-               goto ret;
+               return;
        }
 
        /*
@@ -1185,7 +1195,7 @@ good_area:
 
        if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
                if (mm_fault_error(regs, error_code, address, fault))
-                       goto ret;
+                       return;
        }
 
        /*
@@ -1214,12 +1224,4 @@ good_area:
        check_v8086_mode(regs, address, tsk);
 
        up_read(&mm->mmap_sem);
-
-ret:
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-    process_server_release_heavy_lock();
-#else
-    process_server_release_page_lock(address);
-#endif
-    return;
 }
index f935012..bdace9b 100644 (file)
@@ -28,7 +28,6 @@
 
 /*mklinux_akshay*/
 #include <linux/signal.h>
-#include <linux/process_server.h>
 
 #define KMOD_PATH_LEN 256
 
@@ -73,7 +72,6 @@ struct subprocess_info {
     /**
      * multikernel
      */
-#ifdef PROCESS_SERVER_USE_KMOD
     int delegated; // Is this subprocess a delegated worker 
                    // working on behalf of another cpu?
     pid_t remote_pid;
@@ -85,7 +83,6 @@ struct subprocess_info {
     pid_t origin_pid;
     /*mklinux_akshay*/
 
-#endif
 };
 
 /* Allocate a subprocess_info structure */
index 5eb6c9f..a973c90 100644 (file)
@@ -18,6 +18,7 @@
 /* BOOKKEEPING */
 
 #define POPCORN_MAX_MCAST_CHANNELS 32
+#define LG_SEQNUM_SIZE 7
 
 struct pcn_kmsg_mcast_wininfo {
        volatile unsigned char lock;
@@ -112,17 +113,6 @@ enum pcn_kmsg_type {
        PCN_KMSG_TYPE_PROC_SRV_MUNMAP_REQUEST,
        PCN_KMSG_TYPE_PROC_SRV_MUNMAP_RESPONSE,
     PCN_KMSG_TYPE_PROC_SRV_BACK_MIGRATION,
-    PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_REQUEST,
-    PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_REQUEST_RANGE,
-    PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RESPONSE,
-    PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RESPONSE_RANGE,
-    PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RELEASE,
-    PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RELEASE_RANGE,
-    PCN_KMSG_TYPE_PROC_SRV_GET_COUNTER_PHYS_REQUEST,
-    PCN_KMSG_TYPE_PROC_SRV_GET_COUNTER_PHYS_RESPONSE,
-    PCN_KMSG_TYPE_PROC_SRV_STATS_CLEAR,
-    PCN_KMSG_TYPE_PROC_SRV_STATS_QUERY,
-    PCN_KMSG_TYPE_PROC_SRV_STATS_RESPONSE,
        PCN_KMSG_TYPE_PCN_PERF_START_MESSAGE,
        PCN_KMSG_TYPE_PCN_PERF_END_MESSAGE,
        PCN_KMSG_TYPE_PCN_PERF_CONTEXT_MESSAGE,
@@ -142,37 +132,24 @@ enum pcn_kmsg_prio {
        PCN_KMSG_PRIO_NORMAL
 };
 
-#define __READY_SIZE 1
-#define LG_SEQNUM_SIZE  (8 - __READY_SIZE)
-
 /* Message header */
 struct pcn_kmsg_hdr {
        unsigned int from_cpu   :8; // b0
-       
+
        enum pcn_kmsg_type type :8; // b1
-       
+
        enum pcn_kmsg_prio prio :5; // b2
        unsigned int is_lg_msg  :1;
        unsigned int lg_start   :1;
        unsigned int lg_end     :1;
+       unsigned long long_number;
 
-       unsigned long long_number; // b3 .. b10
-       
-       unsigned int lg_seqnum  :LG_SEQNUM_SIZE; // b11
-       unsigned int __ready    :__READY_SIZE;
+       unsigned int lg_seqnum  :LG_SEQNUM_SIZE;// b3
+       //volatile unsigned int ready   :1;
 }__attribute__((packed));
 
-//#if ( &((struct pcn_kmsg_hdr*)0)->ready != 12 )
-//# error "ready is not the last byte of the struct"
-//#endif
-
-// TODO cache size can be retrieved by the compiler, put it here
-#define CACHE_LINE_SIZE 128
 //#define PCN_KMSG_PAYLOAD_SIZE 60
-#define PCN_KMSG_PAYLOAD_SIZE (CACHE_LINE_SIZE - sizeof(struct pcn_kmsg_hdr))
-
-#define MAX_CHUNKS ((1 << LG_SEQNUM_SIZE) -1)
-#define PCN_KMSG_LONG_PAYLOAD_SIZE (MAX_CHUNKS*PCN_KMSG_PAYLOAD_SIZE)
+#define PCN_KMSG_PAYLOAD_SIZE (64-sizeof(struct pcn_kmsg_hdr))
 
 /* The actual messages.  The expectation is that developers will create their
    own message structs with the payload replaced with their own fields, and then
@@ -185,19 +162,12 @@ struct pcn_kmsg_hdr {
 struct pcn_kmsg_message {
        struct pcn_kmsg_hdr hdr;
        unsigned char payload[PCN_KMSG_PAYLOAD_SIZE];
-}__attribute__((packed)) __attribute__((aligned(CACHE_LINE_SIZE)));
-
-struct pcn_kmsg_reverse_message {
-       unsigned char payload[PCN_KMSG_PAYLOAD_SIZE];
-       struct pcn_kmsg_hdr hdr;
-       volatile unsigned long last_ticket;
-       volatile unsigned char ready;
-}__attribute__((packed)) __attribute__((aligned(CACHE_LINE_SIZE)));
+}__attribute__((packed)) __attribute__((aligned(64)));
 
 /* Struct for sending long messages (>60 bytes payload) */
 struct pcn_kmsg_long_message {
        struct pcn_kmsg_hdr hdr;
-       unsigned char payload[PCN_KMSG_LONG_PAYLOAD_SIZE];
+       unsigned char payload[512];
 }__attribute__((packed));
 
 /* List entry to copy message into and pass around in receiving kernel */
@@ -207,6 +177,13 @@ struct pcn_kmsg_container {
 }__attribute__((packed));
 
 
+struct pcn_kmsg_reverse_message {
+       unsigned char payload[PCN_KMSG_PAYLOAD_SIZE];
+       struct pcn_kmsg_hdr hdr;
+       volatile unsigned char ready;
+       volatile unsigned long last_ticket;
+}__attribute__((packed)) __attribute__((aligned(64)));
+
 
 /* TYPES OF MESSAGES */
 
@@ -216,13 +193,13 @@ struct pcn_kmsg_checkin_message {
        unsigned long window_phys_addr;
        unsigned char cpu_to_add;
        char pad[51];
-}__attribute__((packed)) __attribute__((aligned(CACHE_LINE_SIZE)));
+}__attribute__((packed)) __attribute__((aligned(64)));
 
 
 
 /* WINDOW / BUFFERING */
 
-#define PCN_KMSG_RBUF_SIZE 256
+#define PCN_KMSG_RBUF_SIZE 64
 
 struct pcn_kmsg_window {
        volatile unsigned long head;
@@ -281,7 +258,7 @@ struct pcn_kmsg_mcast_message {
        unsigned int num_members;
        unsigned long window_phys_addr;
        char pad[28];
-}__attribute__((packed)) __attribute__((aligned(CACHE_LINE_SIZE)));
+}__attribute__((packed)) __attribute__((aligned(64)));
 
 struct pcn_kmsg_mcast_window {
        volatile unsigned long head;
index 0589f4c..b06b6e8 100644 (file)
@@ -8,11 +8,6 @@
 
 #ifndef _PROCESS_SERVER_H
 #define _PROCESS_SERVER_H
-
-
-/**
- * Constants
- */
 #define RETURN_DISPOSITION_NONE 0
 #define RETURN_DISPOSITION_EXIT 1
 #define RETURN_DISPOSITION_MIGRATE 2
 //#define SUPPORT_FOR_CLUSTERING
 #undef SUPPORT_FOR_CLUSTERING
 
-//#define PROCESS_SERVER_USE_KMOD
-#undef PROCESS_SERVER_USE_KMOD
-
-#define PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-//#undef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-
-//#define PROCESS_SERVER_USE_HEAVY_LOCK
-#undef PROCESS_SERVER_USE_HEAVY_LOCK
-
 /*
  * Migration hook.
  */
@@ -42,11 +28,7 @@ void process_server_do_return_disposition(void);
  * Utilities for other modules to hook
  * into the process server.
  */
-#ifdef PROCESS_SERVER_USE_KMOD
 int process_server_import_address_space(unsigned long* ip, unsigned long *sp, struct pt_regs* regs);
-#else
-// long sys_process_server_import_task(void* info, struct pt_regs* regs) 
-#endif
 int process_server_notify_delegated_subprocess_starting(pid_t pid, pid_t remote_pid, int remote_cpu);
 int process_server_do_exit(void);
 int process_server_do_group_exit(void);
@@ -54,11 +36,12 @@ int process_server_notify_mmap(struct file *file, unsigned long addr,
                                 unsigned long len, unsigned long prot,
                                 unsigned long flags, unsigned long pgoff);
 int process_server_notify_munmap(struct mm_struct *mm, unsigned long start, size_t len);
-int process_server_pull_remote_mappings(struct mm_struct *mm, struct vm_area_struct *vma,
+int process_server_try_handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                                 unsigned long address, unsigned int flags,
                                 struct vm_area_struct **vma_out,
                                 unsigned long error_code);
 int process_server_do_munmap(struct mm_struct* mm, 
+                                struct vm_area_struct *vma,
                                 unsigned long start, 
                                 unsigned long len);
 void process_server_do_mprotect(struct task_struct* task,
@@ -69,11 +52,4 @@ int process_server_dup_task(struct task_struct* orig, struct task_struct* task);
 unsigned long process_server_do_mmap_pgoff(struct file *file, unsigned long addr,
                                            unsigned long len, unsigned long prot,
                                            unsigned long flags, unsigned long pgoff);
-int process_server_acquire_page_lock(unsigned long address);
-int process_server_acquire_page_lock_range(unsigned long address, size_t sz);
-int process_server_acquire_heavy_lock(void);
-void process_server_release_page_lock(unsigned long address);
-void process_server_release_page_lock_range(unsigned long address, size_t sz);
-void process_server_release_heavy_lock(void);
-
 #endif // _PROCESS_SERVER_H
index c8cb414..80b26ad 100644 (file)
@@ -830,8 +830,6 @@ asmlinkage long sys_syncfs(int fd);
 
 int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]);
 
-int kernel_import_task(void* info);
-
 
 asmlinkage long sys_perf_event_open(
                struct perf_event_attr __user *attr_uptr,
index 698c2eb..5c5af3d 100644 (file)
@@ -14,7 +14,8 @@
 
 #define NORMAL_Q_PRIORITY 100
 
-
+#define LOCK_STAT 
+//#undef LOCK_STAT
 #define sp_hashfn(uaddr, pid)      \
          hash_long((unsigned long)uaddr + (unsigned long)pid, _SPIN_HASHBITS)
 
index 9609276..dc4dde1 100644 (file)
@@ -992,7 +992,14 @@ NORET_TYPE void do_exit(long code)
      * Multikernel
      */
     process_server_do_exit();
-
+#ifdef FUTEX_STAT
+    if(current->tgroup_distributed && current->pid == current->tgroup_home_id){
+    print_wait_perf();
+    print_wake_perf();
+    print_wakeop_perf();
+    print_requeue_perf();
+    }
+#endif
        /*
         * tsk->flags are checked in the futex code to protect against
         * an exiting task cleaning up the robust pi futexes.
index 756a5bb..ab4f3fe 100644 (file)
 #include "futex_remote.h"
 #include <popcorn/global_spinlock.h>
 
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <linux/mmu_context.h>
+#include <linux/string.h>
 
-#define FUTEX_VERBOSE 1
+#define FUTEX_VERBOSE 1 
 #if FUTEX_VERBOSE
 #define FPRINTK(...) printk(__VA_ARGS__)
 #else
@@ -90,6 +94,12 @@ int __read_mostly futex_cmpxchg_enabled;
 #define FLAGS_HAS_TIMEOUT      0x04
 
 
+static unsigned long long _wait=0,_wake=0,_wakeop=0,_requeue=0;
+static unsigned int _wait_cnt=0,_wake_cnt=0,_wakeop_cnt=0,_requeue_cnt=0;
+static unsigned int _wait_err=0,_wake_err=0,_wakeop_err=0,_requeue_err=0;
+static unsigned long long perf_aa,perf_bb,perf_cc;
+
+
 //static
 const struct futex_q futex_q_init = {
        /* list gets initialized in queue_me()*/
@@ -978,6 +988,7 @@ __acquires(&value->_sp)
        int ret;
        u32 dval;
        int localticket_value;
+       int x=0,y=0;;
 
        struct spin_key sk;
        __spin_key_init(&sk);
@@ -1014,7 +1025,8 @@ __acquires(&value->_sp)
        smp_mb();
 
        if(ret){
-               FPRINTK(KERN_ALERT "%s: check if there is wake up {%d} - {%d} \n",__func__,rq_ptr->wake_st,ret);
+               y  = get_user(x,uaddr);
+               //printk(KERN_ALERT "%s: uadrr{%lx} ti{%lx} check if there is wake up {%d} - {%d} {%d} {%d} \n",__func__,uaddr,localticket_value,rq_ptr->wake_st,ret,x,y);
                if(rq_ptr->wake_st == 1) //no need to queue it.
                {
                        ret = 0;
@@ -1033,6 +1045,39 @@ __acquires(&value->_sp)
        return ret;
 }
 
+int
+get_futex_key_tsk(u32 __user *uaddr, int fshared, union futex_key *key, int rw, struct task_struct * _tsk)
+{
+       unsigned long address = (unsigned long)uaddr;
+       struct mm_struct *mm = _tsk->mm;
+       struct task_struct *tsk = _tsk;
+       int pid=tsk->pid;
+       struct page *page, *page_head;
+       int err, ro = 0;
+       /*
+        *       * The futex address must be "naturally" aligned.
+        *               */
+       key->both.offset = address % PAGE_SIZE;
+       if (unlikely((address % sizeof(u32)) != 0))
+                       return -EINVAL;
+                       address -= key->both.offset;
+       /*
+       *        * PROCESS_PRIVATE futexes are fast.
+       *        * As the mm cannot disappear under us and the 'key' only needs
+       *        * virtual address, we dont even have to find the underlying vma.
+       *        * Note : We do have to check 'uaddr' is a valid user address,
+       *        *        but access_ok() should be faster than find_vma()
+       *                                                */
+       if (!fshared) {
+               if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
+                       return -EFAULT;
+               key->private.mm = mm;
+               key->private.address = address;
+               get_futex_key_refs(key);
+               return 0;
+       }
+}
+
 static inline int global_queue_wake_lock(union futex_key *key,u32 __user * uaddr, unsigned int flags, int nr_wake,
                u32 bitset, int rflag, unsigned int fn_flags, unsigned long uaddr2, int nr_requeue, int cmpval)
 __acquires(&value->_sp)
@@ -1083,8 +1128,16 @@ __acquires(&value->_sp)
  */
 //static
 int
-futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset,unsigned int fn_flags)
+futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset,unsigned int fn_flags,struct task_struct *_tsk)
 {
+#ifdef FUTEX_STAT
+       unsigned long long wake_aa=0,wake_bb=0;
+       if(!_tsk && current->tgroup_distributed){
+               _wake_cnt++;
+               wake_aa = native_read_tsc();
+       }
+#endif
+
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
        struct plist_head *head;
@@ -1099,19 +1152,19 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset,unsign
 
        fn_flags |= FLAGS_WAKECALL;
 
-//     printPTE(uaddr);
+       //printPTE(uaddr);
        if (!bitset)
                return -EINVAL;
 
-       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
+       ret = (_tsk == NULL) ? get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ) :
+                get_futex_key_tsk(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ, _tsk);
 
        if (unlikely(ret != 0))
                goto out;
 
 cont:
-       if(current->mm){
        hb = hash_futex(&key);
-       if(!(flags & FLAGS_SHARED) && current->tgroup_distributed  && !(fn_flags & FLAGS_REMOTECALL) ){
+       if( !_tsk && !(flags & FLAGS_SHARED) && current->tgroup_distributed  && !(fn_flags & FLAGS_REMOTECALL) ){
                g_errno= global_queue_wake_lock(&key,uaddr, flags & FLAGS_SHARED, nr_wake, bitset,
                                 0, fn_flags, 0,0,0);
                FPRINTK(KERN_ALERT " %s: err {%d}\n",__func__,g_errno);
@@ -1159,12 +1212,167 @@ cont:
        spin_unlock(&hb->lock);
        put_futex_key(&key);
        }
-       }
+       
 out:
+#ifdef FUTEX_STAT
+       if(!_tsk && current->tgroup_distributed){
+               wake_bb = native_read_tsc();
+               _wake += wake_bb - wake_aa;
+       }
+#endif
        FPRINTK(KERN_ALERT "%s: exit {%d}\n",__func__,current->pid);
        return ret;
 }
 
+
+struct vm_area_struct * getVMAfromUaddr_t(unsigned long uaddr,struct task_struct *t) {
+
+  unsigned long address = (unsigned long) uaddr;
+  unsigned long offset = address % PAGE_SIZE;
+  if (unlikely((address % sizeof(u32)) != 0))
+               return NULL;
+  address -= offset;
+  struct vm_area_struct *vma;
+  struct vm_area_struct* curr = NULL;
+  curr = t->mm->mmap;
+  vma = find_extend_vma(t->mm, address);
+  if (!vma)
+       return NULL;
+  else
+        return vma;
+}
+
+static void dumpPTE(pte_t *ptep) {
+
+ int nx;
+ int rw;
+ int user;
+ int pwt;
+ int pcd;
+ int accessed;
+ int dirty;
+unsigned long pfn;
+
+pte_t pte;
+pte = *ptep;
+
+printk(KERN_ALERT"cpu {%d} pte ptr: 0x{%lx}\n", smp_processor_id(), pte);
+pfn = pte_pfn(pte);
+printk(KERN_ALERT" cpu{%d} pte pfn : 0x{%lx}\n", smp_processor_id(), pfn);
+
+  nx       = pte_flags(*ptep) & _PAGE_NX       ? 1 : 0;
+  rw       = pte_flags(*ptep) & _PAGE_RW       ? 1 : 0;
+  user     = pte_flags(*ptep) & _PAGE_USER     ? 1 : 0;
+  pwt      = pte_flags(*ptep) & _PAGE_PWT      ? 1 : 0;
+  pcd      = pte_flags(*ptep) & _PAGE_PCD      ? 1 : 0;
+  accessed = pte_flags(*ptep) & _PAGE_ACCESSED ? 1 : 0;
+  dirty    = pte_flags(*ptep) & _PAGE_DIRTY    ? 1 : 0;
+
+printk("\tnx{%d}, rw{%d} user{%d} pwt{%d} pcd{%d} accessed{%d} dirty{%d} present{%d} global{%d} special{%d} ",nx,rw,user,pwt,pcd,accessed,dirty,pte_present(pte),pte_mkglobal(pte),pte_mkspecial(pte));
+
+
+exit:
+printk("exit\n");
+}
+
+
+ void dump_pgtable(unsigned long address)
+{
+pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+pgd_t *pgd = base + pgd_index(address);
+pud_t *pud;
+pmd_t *pmd;
+pte_t *pte;
+if (!pgd || !pgd_present(*pgd))
+       goto bad;
+printk(KERN_ALERT"PGD %lx flags{%d} ", pgd_val(*pgd),pgd_flags(*pgd));
+if (!pgd_present(*pgd))
+               goto out;
+pud = pud_offset(pgd, address);
+if (!pud || !pud_present(*pud))
+               goto bad;
+printk(KERN_ALERT"PUD %lx flags{%lx} ", pud_val(*pud),pud_flags(*pud));
+if (!pud || !pud_present(*pud) || pud_large(*pud))
+               goto out;
+pmd = pmd_offset(pud, address);
+if (!pmd || !pmd_present(*pmd))
+               goto bad;
+printk(KERN_ALERT"PMD %lx mkold{%d} dirty{%d} mkwrite{%d} ", pmd_val(*pmd),pmd_mkold(*pmd),pmd_mkdirty(*pmd),pmd_mkwrite(*pmd));
+if (!pmd_present(*pmd) || pmd_large(*pmd))
+               goto out;
+pte = pte_offset_kernel(pmd, address);
+if (!(pte) || !pte_present(*pte))
+               goto bad;
+printk(KERN_ALERT"PTE %lx", pte_val(*pte));
+out:
+printk(KERN_ALERT"\n");
+return;
+bad:
+printk(KERN_ALERT"BAD\n");
+}
+
+pte_t *do_page_wlk(unsigned long address,struct task_struct *t) {
+       pgd_t *pgd = NULL;
+       pud_t *pud = NULL;
+       pmd_t *pmd = NULL;
+       pte_t *ptep = NULL;
+       pte_t *pte;
+       struct mm_struct *_m = t->mm;
+       //printk(KERN_ALERT"mm{%p} cm{%p} am{%p} \n",_m,(!current->mm) ? 0 : current->mm, (!current->active_mm) ? 0 :current->active_mm);
+       //down_read(&_m->mmap_sem);
+
+       pgd = pgd_offset(_m, address);
+       if (!pgd_present(*pgd)) {
+       //      up_read(&_m->mmap_sem);
+               goto exit;
+       }
+      printk(KERN_ALERT"PGD %lx flags{%d} ", pgd_val(*pgd),pgd_flags(*pgd));
+  
+       pud = pud_offset(pgd, address);
+       if (!pud_present(*pud)) {
+       //      up_read(&_m->mmap_sem);
+               goto exit;
+       }
+printk(KERN_ALERT"PUD %lx flags{%lx} ", pud_val(*pud),pud_flags(*pud));
+
+       pmd = pmd_offset(pud, address);
+       if (!pmd_present(*pmd)) {
+       //      up_read(&_m->mmap_sem);
+               goto exit;
+       }
+printk(KERN_ALERT"PMD %lx mkold{%d} dirty{%d} mkwrite{%d}  pmd_flags{%lx}", pmd_val(*pmd),pmd_mkold(*pmd),pmd_mkdirty(*pmd),pmd_mkwrite(*pmd),pmd_flags(*pmd));
+       ptep = pte_offset_map(pmd, address);
+       if (!ptep || !pte_present(*ptep)) {
+       //      up_read(&_m->mmap_sem);
+               goto exit;
+       }
+       pte = ptep;
+
+       //up_read(&_m->mmap_sem);
+       return (pte_t*) pte;
+exit: 
+//     up_read(&_m->mmap_sem);
+       return NULL;
+}
+
+
+void find_page(unsigned long uaddr,struct task_struct *t){
+
+pte_t *pt=do_page_wlk(uaddr,t);
+printk(KERN_ALERT"%s: dump PTE with normal page walk using mm\n",__func__);
+dumpPTE(pt);
+printk(KERN_ALERT"%s: dump PTE with CR3 \n",__func__);
+dump_pgtable(uaddr);
+
+struct vm_area_struct * _v = getVMAfromUaddr_t(uaddr,t);
+struct page * pg = vm_normal_page(_v, uaddr,*pt);
+if(!pg)
+ printk(KERN_ALERT"%s: pg not so good news\n",__func__);
+else{
+       dump_page(pg);
+ printk(KERN_ALERT"%s: pg present vm{%lx} end{%lx}  flags{%lx} pageprot{%lx} \n",__func__,_v->vm_start, _v->vm_end,_v->vm_flags, pgprot_val(_v->vm_page_prot));
+}
+}
 /*
  * Wake up all waiters hashed on the physical page that is mapped
  * to this virtual address:
@@ -1173,23 +1381,38 @@ out:
 int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
              int nr_wake, int nr_wake2, int op,unsigned int fn_flags,struct task_struct * or_task)
 {
+
+#ifdef FUTEX_STAT
+       unsigned long long wakeop_aa=0,wakeop_bb=0;
+       if(!or_task && current->tgroup_distributed){
+               _wakeop_cnt++;
+               wakeop_aa = native_read_tsc();
+       }
+#endif
        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head;
        struct futex_q *this, *next;
        int ret, op_ret;
+       struct page *page;
        unsigned long bp = stack_frame(current,NULL);
        int g_errno=0;
+       int x=0;
+       struct mm_struct *act=NULL,*old=NULL;
 
        fn_flags |= FLAGS_WAKEOPCALL;
        FPRINTK(KERN_ALERT " FUTEX_WAKE_OP: entry{%pB} pid {%d} comm{%s} uaddr1{%lx} uaddr2{%lx}  op(%d} \n",(void*) &bp,current->pid,current->comm,uaddr1,uaddr2,op);
 retry:
-       ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
+       ret = (or_task == NULL) ? get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ) :
+               get_futex_key_tsk(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ, or_task);
+       
 
        if (unlikely(ret != 0))
                goto out;
 
-       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+       ret = (or_task == NULL) ? get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE) :
+               get_futex_key_tsk(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_READ, or_task);
+
 
        if (unlikely(ret != 0))
                goto out_put_key1;
@@ -1198,18 +1421,33 @@ retry:
        hb2 = hash_futex(&key2);
 
 retry_private:
-       if(current->tgroup_distributed  && !(fn_flags & FLAGS_REMOTECALL) && !(flags & FLAGS_SHARED)){
-               g_errno= global_queue_wake_lock(&key1,uaddr1, flags & FLAGS_SHARED, nr_wake, 1,
+       
+       /*if((strcmp("cond",current->comm) == 0 ) || (strcmp("mut",current->comm) == 0) || (strcmp("bar",current->comm))){
+       printk(KERN_ALERT"%s: distriuted {%d} cpu{%d} pid{%d} uaddr{%lx} or{%d} ordiswsd{%d}\n",__func__,current->tgroup_distributed,smp_processor_id(),current->pid,uaddr1,(or_task) ? or_task->pid : 0, (or_task) ? or_task->tgroup_distributed : 0);
+       }*/
+       if(or_task){
+               use_mm(or_task->mm);
+       }
+
+       if( !or_task && current->tgroup_distributed  && !(fn_flags & FLAGS_REMOTECALL) && !(flags & FLAGS_SHARED)){
+               //struct vm_area_struct  *_v = getVMAfromUaddr(uaddr2);
+               //printk(KERN_ALERT "%s:comm{%s} start{%lx} end{%lx} vmastart{%lx} vmaend{%lx} vmaflag{%lx}\n",__func__,current->comm,current->mm->mmap->vm_start, current->mm->mmap->vm_end, _v->vm_start,_v->vm_end,_v->vm_flags);
+               //find_page(uaddr2,current);
+               g_errno= global_queue_wake_lock(&key1,uaddr1, flags & FLAGS_SHARED, nr_wake, 1,
                                 0, fn_flags,uaddr2,nr_wake2,op);
                ret = g_errno;
                FPRINTK(KERN_ALERT " %s: err {%d}\n",__func__,g_errno);
+#ifdef FUTEX_STAT
+               _wakeop_err++;
+#endif
                goto out;
        }
        else
        {
+               
+       //printk(KERN_ALERT"%s:  \n",__func__);
        double_lock_hb(hb1, hb2);
-       op_ret = futex_atomic_op_inuser(op, uaddr2);
-       FPRINTK(KERN_ALERT "op_ret{%d} \n",op_ret);
+       op_ret = futex_atomic_op_inuser(op, (u32 __user *)uaddr2);
        if (unlikely(op_ret < 0)) {
 
                double_unlock_hb(hb1, hb2);
@@ -1225,9 +1463,22 @@ retry_private:
                        ret = op_ret;
                        goto out_put_keys;
                }
+               if((fn_flags & FLAGS_REMOTECALL) && or_task && op_ret == -EFAULT){
+                 flush_cache_mm(or_task->mm);
+               }
 
                ret = ((fn_flags & FLAGS_REMOTECALL) && or_task)? fault_in_user_writeable_task(uaddr2,or_task):fault_in_user_writeable(uaddr2);
-               FPRINTK(KERN_ALERT "%s: faultinuaddr2 {%d} tsk{%d} comm{%s} \n",__func__,ret,or_task->pid,or_task->comm);
+               
+               if(or_task){
+               //struct vm_area_struct *_v = getVMAfromUaddr_t(uaddr2,or_task);
+               //printk(KERN_ALERT "%s: faultinuaddr2 op{%d} ret{%d} valu{%d} tsk{%d} comm{%s} start{%lx} end{%lx} vmstart{%lx} vmend{%lx} vmflag{%lx}\n",__func__,op,ret,x,or_task->pid,or_task->comm,or_task->mm->mmap->vm_start, or_task->mm->mmap->vm_end, (_v) ? _v->vm_start : 0,(_v) ? _v->vm_end : 0, (_v) ?_v->vm_flags : 0);
+               //find_page(uaddr2,or_task);
+               }
+
+               if((fn_flags & FLAGS_REMOTECALL) && or_task && op_ret == -EFAULT){
+                 flush_tlb_page(or_task->mm->mmap, uaddr2);
+                 unuse_mm(or_task->mm);
+               }
                if (ret)
                        goto out_put_keys;
 
@@ -1239,10 +1490,13 @@ retry_private:
                goto retry;
        }
 
+       if((fn_flags & FLAGS_REMOTECALL) && or_task){
+       unuse_mm(or_task->mm);
+       }
        if((fn_flags & FLAGS_REMOTECALL)){
        fn_flags  = 0;
        fn_flags |=FLAGS_WAKEOPCALL;//FLAGS_ORIGINCALL
-                                                                       }
+       }
 
 
 
@@ -1304,6 +1558,13 @@ out_put_keys:
 out_put_key1:
        put_futex_key(&key1);
 out:
+
+#ifdef FUTEX_STAT
+       if(!or_task && current->tgroup_distributed){
+               wakeop_bb = native_read_tsc();
+               _wakeop += wakeop_bb - wakeop_aa;
+       }
+#endif
        return ret;
 }
 
@@ -1458,14 +1719,23 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 //static
 int futex_requeue(u32 __user *uaddr1, unsigned int flags,
                         u32 __user *uaddr2, int nr_wake, int nr_requeue,
-                        u32 *cmpval, int requeue_pi,unsigned int fn_flags)
+                        u32 *cmpval, int requeue_pi,unsigned int fn_flags, struct task_struct * re_task)
 {
+
+#ifdef FUTEX_STAT
+       unsigned long long requeue_aa=0,requeue_bb=0;
+       if(!re_task && current->tgroup_distributed){
+               _requeue_cnt++;
+               requeue_aa = native_read_tsc();
+       }
+#endif
        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        int drop_count = 0, task_count = 0, ret;
        struct futex_pi_state *pi_state = NULL;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head1;
        struct futex_q *this, *next;
+       struct page *pages;
        u32 curval2;
        int requeued=0;
        int g_errno=0;
@@ -1505,13 +1775,15 @@ retry:
                pi_state = NULL;
        }
 
-       ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
+       ret = (re_task == NULL) ? get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ) :
+               get_futex_key_tsk(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ, re_task);
 
        if (unlikely(ret != 0))
                goto out;
 
-       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
-                           requeue_pi ? VERIFY_WRITE : VERIFY_READ);
+       ret = (re_task == NULL) ? get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, requeue_pi ? VERIFY_WRITE : VERIFY_READ) :
+               get_futex_key_tsk(uaddr2, flags & FLAGS_SHARED, &key2,  requeue_pi ? VERIFY_WRITE : VERIFY_READ, re_task);
+       
        if (unlikely(ret != 0))
                goto out_put_key1;
 
@@ -1524,12 +1796,18 @@ retry_private:
 
     FPRINTK(KERN_ALERT " %s: spinlock  futex_requeue uaddr2{%lx} \n",__func__,uaddr2);
 
+    if(re_task){
+           use_mm(re_task->mm);
+    }
 
-    if(current->tgroup_distributed  && !(fn_flags & FLAGS_REMOTECALL) && !(flags & FLAGS_SHARED)){
+    if( !re_task && current->tgroup_distributed  && !(fn_flags & FLAGS_REMOTECALL) && !(flags & FLAGS_SHARED)){
                g_errno= global_queue_wake_lock(&key1,uaddr1, flags & FLAGS_SHARED, nr_wake, 1,
                                 0, fn_flags,uaddr2,nr_requeue,(int)*cmpval);
                FPRINTK(KERN_ALERT " %s: err {%d}\n",__func__,g_errno);
                ret = g_errno;
+#ifdef FUTEX_STAT
+               _requeue_err++;
+#endif
                goto out;
     }
     else
@@ -1544,13 +1822,20 @@ retry_private:
                if (unlikely(ret)) {
                        double_unlock_hb(hb1, hb2);
 
+                       //if(re_task && ret == -EFAULT)
+                       //      get_user_pages_fast_mm(re_task->mm, key1.private.address, 1, 1, pages);
+
                        ret = get_user(curval, uaddr1);
                        if (ret)
                                goto out_put_keys;
+                        
+                       if(re_task && ret == -EFAULT)
+                               unuse_mm(re_task->mm);
+
 
                        if (!(flags & FLAGS_SHARED))
                                goto retry_private;
-
+          
                        put_futex_key(&key2);
                        put_futex_key(&key1);
                        goto retry;
@@ -1610,9 +1895,10 @@ retry_private:
                }
        }
 
-       if((fn_flags & FLAGS_REMOTECALL)){
+       if((fn_flags & FLAGS_REMOTECALL) && re_task){
                fn_flags  = 0;
                fn_flags |=FLAGS_REQCALL;//FLAGS_ORIGINCALL
+               unuse_mm(re_task->mm);
        }
 
        head1 = &hb1->chain;
@@ -1716,6 +2002,13 @@ out_put_key1:
 out:
        if (pi_state != NULL)
                free_pi_state(pi_state);
+
+#ifdef FUTEX_STAT
+       if(!re_task && current->tgroup_distributed){
+               requeue_bb = native_read_tsc();
+               _requeue += requeue_bb - requeue_aa;
+       }
+#endif
        return ret ? ret : task_count;
 }
 
@@ -2116,14 +2409,23 @@ retry:
                return ret;
 
 retry_private:
-       FPRINTK(KERN_ALERT " %s: spinlock  futex_wait_setup shared{%d} \n",__func__,(flags & FLAGS_SHARED));
+       //printk(KERN_ALERT " %s: spinlock  futex_wait_setup shared{%d} \n",__func__,(flags & FLAGS_SHARED));
 
        if(current->tgroup_distributed  && !(fn_flag & FLAGS_REMOTECALL) && !(flags & FLAGS_SHARED)){
+#ifdef FUTEX_STAT
+                perf_bb = native_read_tsc();
+#endif
                g_errno = global_queue_wait_lock(q, uaddr, *hb, fn_flag, val,
                                flags & FLAGS_SHARED, VERIFY_READ, bitset);
+#ifdef FUTEX_STAT
+               perf_cc = native_read_tsc();
+#endif
                FPRINTK(KERN_ALERT " %s: spinlock  futex_wait_setup err {%d}\n",__func__,g_errno);
                if (g_errno) {  //error due to val change
-                           ret = g_errno;
+#ifdef FUTEX_STAT
+                       _wait_err++;
+#endif
+                           ret = g_errno;
                            if( ret == -EFAULT)
                            {
                                 FPRINTK(KERN_ALERT" client side efault fix up {%d} \n",fault_in_user_writeable(uaddr));
@@ -2247,6 +2549,14 @@ static void dump_regs(struct pt_regs* regs) {
 int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
                      ktime_t *abs_time, u32 bitset, unsigned int fn_flag)
 {
+
+#ifdef FUTEX_STAT
+        unsigned long long wait_aa,wait_bb,wait_cc;
+       if(current->tgroup_distributed){
+       wait_aa = native_read_tsc();
+       _wait_cnt++;
+       }
+#endif
        struct hrtimer_sleeper timeout, *to = NULL;
        struct restart_block *restart;
        struct futex_hash_bucket *hb;
@@ -2286,6 +2596,9 @@ retry:
         * Prepare to wait on uaddr. On success, holds hb lock and increments
         * q.key refs.
         */
+        /*if((strcmp("cond",current->comm) == 0) || (strcmp("bar",current->comm) == 0)){
+       printk(KERN_ALERT"%s: distributed{%d} cpu{%d} pid {%d} uaddr{%d} \n",__func__,current->tgroup_distributed,smp_processor_id(),current->pid,uaddr);
+       }*/
        ret = futex_wait_setup(uaddr, val, flags, &q, &hb,fn_flag,bitset);
        
        if (ret)
@@ -2332,11 +2645,72 @@ out:
                hrtimer_cancel(&to->timer);
                destroy_hrtimer_on_stack(&to->timer);
        }
+#ifdef FUTEX_STAT
+       if(current->tgroup_distributed){
+       wait_bb = native_read_tsc();
+       _wait += wait_bb - wait_aa ;
+       }
+#endif
        FPRINTK(KERN_DEBUG " %s:exit {%d}\n",__func__,current->pid);
        return ret;
 }
+int print_wait_perf(){
+printk(KERN_ALERT"%s: cpu{%d} pid{%d} tgid{%d} counter{%d} errors{%d} wait time {%llu}",
+               __func__,
+               smp_processor_id(),
+               current->pid,
+               current->tgroup_home_id,
+               _wait_cnt,
+               _wait_err,
+               _wait);
+_wait_err = 0;
+_wait = 0;
+_wait_cnt = 0;
+}
+
+int print_wake_perf(){
+printk(KERN_ALERT"%s: cpu{%d} pid{%d} tgid{%d} counter{%d} errors{%d} wake time {%llu}",
+               __func__,
+               smp_processor_id(),
+               current->pid,
+               current->tgroup_home_id,
+               _wake_cnt,
+               _wake_err,
+               _wake);
+_wake_err = 0;
+_wake = 0;
+_wake_cnt = 0;
+}
 
 
+int print_wakeop_perf(){
+printk(KERN_ALERT"%s: cpu{%d} pid{%d} tgid{%d} counter{%d} errors{%d} wakeop time {%llu}",
+               __func__,
+               smp_processor_id(),
+               current->pid,
+               current->tgroup_home_id,
+               _wakeop_cnt,
+               _wakeop_err,
+               _wakeop);
+_wakeop_err = 0;
+_wakeop = 0;
+_wakeop_cnt = 0;
+}
+
+int print_requeue_perf(){
+printk(KERN_ALERT"%s: cpu{%d} pid{%d} tgid{%d} counter{%d} errors{%d} requeue time {%llu}",
+               __func__,
+               smp_processor_id(),
+               current->pid,
+               current->tgroup_home_id,
+               _requeue_cnt,
+               _requeue_err,
+               _requeue);
+_requeue_err = 0;
+_requeue = 0;
+_requeue_cnt = 0;
+}
+
 static long futex_wait_restart(struct restart_block *restart)
 {
        u32 __user *uaddr = restart->futex.uaddr;
@@ -2929,7 +3303,7 @@ retry:
                 * PI futexes happens in exit_pi_state():
                 */
                if (!pi && (uval & FUTEX_WAITERS))
-                       futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY,FLAGS_SYSCALL);//modified
+                       futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY,FLAGS_SYSCALL, NULL);//modified
        }
        return 0;
 }
@@ -3058,16 +3432,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
        case FUTEX_WAKE:
                val3 = FUTEX_BITSET_MATCH_ANY;
        case FUTEX_WAKE_BITSET:
-               ret = futex_wake(uaddr, flags, val, val3,fn_flags);
+               ret = futex_wake(uaddr, flags, val, val3,fn_flags, NULL);
                break;
        case FUTEX_REQUEUE:
-               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0,fn_flags);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0,fn_flags, NULL);
                break;
        case FUTEX_CMP_REQUEUE:
-               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0,fn_flags);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0,fn_flags, NULL);
                break;
        case FUTEX_WAKE_OP:
-               ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3,fn_flags,NULL);
+               ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3,fn_flags, NULL);
                break;
        case FUTEX_LOCK_PI:
                ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
@@ -3084,7 +3458,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                                            uaddr2);
                break;
        case FUTEX_CMP_REQUEUE_PI:
-               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1,fn_flags);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1,fn_flags, NULL);
                break;
        default:
                ret = -ENOSYS;
index a35fc7e..5c002f4 100644 (file)
 #include <popcorn/remote_pfn.h>
 #include <popcorn/pid.h>
 #include <asm/page_types.h>
+#include <linux/mmu_context.h>
 
 #include "futex_remote.h"
 #define ENOTINKRN 999
 #define MODULE "GRQ-"
 #include <popcorn/global_spinlock.h>
 
-#define FUTEX_REMOTE_VERBOSE 1 
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+#define FUTEX_REMOTE_VERBOSE 0 
 #if FUTEX_REMOTE_VERBOSE
 #define FRPRINTK(...) printk(__VA_ARGS__)
 #else
@@ -233,23 +237,10 @@ void wake_futex_global(struct futex_q *q) {
        put_task_struct(p);
 }
 
-struct task_struct* gettask(pid_t origin_pid, pid_t tghid) {
+struct task_struct* gettask(pid_t tghid) {
        struct task_struct *tsk = NULL;
        struct task_struct *g, *task = NULL;
 
-       tsk = pid_task(find_vpid(origin_pid), PIDTYPE_PID);
-       if (tsk) {
-               FRPRINTK(KERN_ALERT "origin id exists \n");
-       } else {
-               do_each_thread(g, task)
-               {
-                       if (task->pid == origin_pid) {
-                               tsk = task;
-                               goto mm_exit;
-                       }
-               }
-               while_each_thread(g, task);
-       }
        tsk = pid_task(find_vpid(tghid), PIDTYPE_PID);
        if (tsk) {
                FRPRINTK(KERN_ALERT "tghid id exists \n");
@@ -263,7 +254,8 @@ struct task_struct* gettask(pid_t origin_pid, pid_t tghid) {
                }
                while_each_thread(g, task);
        }
-       mm_exit: return tsk;
+mm_exit: 
+       return tsk;
 }
 
 int global_futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake,
@@ -283,22 +275,23 @@ int global_futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake,
        struct spin_key sk;
        _spin_key_init(&sk);
 
-       tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
-       if (tsk) {
-               cmm = current->mm;
-               current->mm = tsk->mm;
+       tsk = gettask(pid);
+       //printk(KERN_ALERT"%s: rem pid{%d} tsk{%d}\n",__func__,pid,(!tsk) ? 0 : 1);
+
+       if(!tsk){       
+               goto out;
        }
+
        getKey((uaddr2 == 0) ? (unsigned long)uaddr : (unsigned long) uaddr2, &sk,(!tsk)?current->tgroup_home_id:tsk->tgroup_home_id);
+       
        _spin_value *value = hashspinkey(&sk);
        _local_rq_t * l= find_request_by_pid(pid, &value->_lrq_head);
        
        FRPRINTK(KERN_ALERT "%s: set wake up \n",__func__);
 
-       ret = get_futex_key((uaddr2 == 0) ?uaddr : (u32 __user*) uaddr2,
-                       ((flags & FLAGS_DESTROY == 256) ?
-                                       (0 & FLAGS_SHARED) : (flags & FLAGS_SHARED)), &key, VERIFY_READ);
+       ret = get_futex_key_tsk((uaddr2 == 0) ? uaddr : (u32 __user*) uaddr2,(flags & FLAGS_SHARED), &key, VERIFY_READ, tsk);
 
-       FRPRINTK(KERN_ALERT "%s: after get key ptr {%p} mm{%p} \n",__func__,key.both.ptr,current->mm);
+       FRPRINTK(KERN_ALERT "%s: after get key ptr {%p} mm{%p} \n",__func__,key.both.ptr,tsk->mm);
 
        hb = hash_futex(&key);
        spin_lock(&hb->lock);
@@ -307,7 +300,7 @@ int global_futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake,
        plist_for_each_entry_safe(this, next, head, list)
        {
                temp = this->task;
-               if (temp /*&& is_kernel_addr(temp)*/) {
+               if (temp) {
                        if (temp->tgroup_distributed == 1
                                        && temp->tgroup_home_id == tsk->tgroup_home_id
                                        && temp->pid == tsk->pid) {
@@ -324,15 +317,9 @@ int global_futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake,
        }
        spin_unlock(&hb->lock);
        put_futex_key(&key);
+       
 out:
 
-       if (cmm == NULL) {
-               FRPRINTK(KERN_ALERT"%s:cmm NULL\n",__func__);
-               current->mm = cmm;
-       } else {
-               FRPRINTK(KERN_ALERT"%s:current {%s}\n",current->comm,__func__);
-               current->mm = NULL;
-       }
 
        FRPRINTK(KERN_ALERT "%s:exit \n",__func__);
 
@@ -358,7 +345,8 @@ int global_futex_wait(unsigned long uaddr, unsigned int flags, u32 val,
        struct task_struct *tsk = origin;
        struct task_struct *rem_struct = NULL;
        struct futex_q *q = (struct futex_q *) kmalloc(sizeof(struct futex_q),
-                       GFP_ATOMIC); //futex_q_init;
+                       GFP_ATOMIC);
+       struct page *pages;
        
        q->key = FUTEX_KEY_INIT;
        q->bitset = FUTEX_BITSET_MATCH_ANY;
@@ -375,18 +363,16 @@ int global_futex_wait(unsigned long uaddr, unsigned int flags, u32 val,
 
        //start wait setup
 retry:
-       
-       ret = get_futex_key((u32 __user *)uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
+       ret = get_futex_key_tsk((u32 __user *)uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ, tsk);
        FRPRINTK(KERN_ALERT "%s: pid origin {%s} _cpu{%d} uaddr{%lx} uval{%d} ret{%d} \n ",__func__,tsk->comm,smp_processor_id(),uaddr,val,ret);
        if (unlikely(ret != 0))
           return ret;
 
-       //set private.mm to origin tasks mm
-       if(tsk)
-               q->key.private.mm = tsk->mm;
 
 retry_private:
        //queue_lock
+       use_mm(tsk->mm);
+
        hb = hash_futex(&q->key);
        q->lock_ptr = &hb->lock;
        spin_lock(&hb->lock);
@@ -398,10 +384,9 @@ fault:
                        FRPRINTK(KERN_ALERT "%s:after spin unlock ret{%d} uval{%lx}\n ",__func__,ret,uval);
 
                        if(ret == -EFAULT){
-                               if((ret = fix_user_page((u32 __user *)uaddr,tsk)) ==  0){
-                                       printk(KERN_ALERT"%s:ret {%d} \n",__func__,ret);
-                               }
-
+                               flush_cache_mm(tsk->mm);
+                               ret = fix_user_page((u32 __user *)uaddr,tsk);                           
+                               flush_tlb_mm(tsk->mm);
                        }
 
                        ret = get_user(uval, (u32 __user *)uaddr);
@@ -411,6 +396,8 @@ fault:
                                goto out;
                        }
 
+                       unuse_mm(tsk->mm);
+
                        if (!(flags & FLAGS_SHARED))
                                goto retry_private;
 
@@ -453,6 +440,7 @@ out:
        if(ret){
                put_futex_key(&q->key);
        }
+       unuse_mm(tsk->mm);
 
        FRPRINTK(KERN_ALERT "%s: hb {%p} key: word {%lx} offset{%d} ptr{%p} mm{%p}\n ",__func__,
                        hb,q->key.both.word,q->key.both.offset,q->key.both.ptr,q->key.private.mm);
@@ -496,52 +484,23 @@ void global_worher_fn(struct work_struct* work) {
                        FRPRINTK(KERN_ALERT"%s:wake--current msg pid{%d} msg->ticket{%d} \n", __func__,msg->pid,msg->ticket);
 
                        if (msg->rflag == 0 || (msg->fn_flag & FLAGS_ORIGINCALL)) {
-                                       if (current->mm != NULL) {
-                                               null_flag = 1;
-                                               cmm = (current->mm);
-                                       }
-
-                                       tsk = pid_task(find_vpid(msg->tghid), PIDTYPE_PID);
-                                       if (tsk) {
-                                               current->mm = tsk->mm;
-                                               FRPRINTK(KERN_ALERT "tghid exist cmm{%d}  cmm{%p} comm{%s} mm{%p}\n",(cmm!=NULL)?1:0,cmm,current->comm,current->mm);
-                                       } else {
-                                               do_each_thread(g, task){
-                                                       if (task->pid == msg->tghid) {
-                                                               current->mm = task->mm;
-                                                               FRPRINTK(KERN_ALERT "tghid-> mm struct found comm{%s} cmm{%d} mm{%d} \n",task->comm,(cmm!=NULL)?1:0, (current->mm!=NULL)?1:0);
-                                                               goto mm_exit;
-                                                       }
-                                               }
-                                               while_each_thread(g, task);
-                                       }
-mm_exit:
-                                       FRPRINTK(KERN_ALERT "%s: before wake cmm{%d}  mm{%d} cmm{%p}  mm{%p} msg->fn_flag{%u}\n",__func__,(cmm!=NULL)?1:0,(current->mm!=NULL)?1:0,cmm,current->mm,msg->fn_flag);
+
+                                       tsk = gettask(msg->tghid);
 
                                        msg->fn_flag |= FLAGS_REMOTECALL;
 
                                        if (msg->fn_flag & FLAGS_WAKECALL)
                                                ret = futex_wake(msg->uaddr, msg->flags, msg->nr_wake, msg->bitset,
-                                                               msg->fn_flag);
+                                                               msg->fn_flag,tsk);
 
                                        else if (msg->fn_flag & FLAGS_REQCALL)
                                                ret = futex_requeue(msg->uaddr, msg->flags, (unsigned long)  (msg->uaddr2 & ((1600*PAGE_SIZE)-1)), msg->nr_wake,
-                                                               msg->nr_wake2, &(msg->cmpval),0, msg->fn_flag);
+                                                               msg->nr_wake2, &(msg->cmpval),0, msg->fn_flag,tsk);
 
                                        else if (msg->fn_flag & FLAGS_WAKEOPCALL)
-                                               ret = futex_wake_op((u32 __user*)msg->uaddr, msg->flags,(u32 __user*)msg->uaddr2, msg->nr_wake,
+                                               ret = futex_wake_op((u32 __user*)msg->uaddr, msg->flags,(u32 __user*)(msg->uaddr2 & ((1600*PAGE_SIZE)-1)), msg->nr_wake,
                                                                msg->nr_wake2, msg->cmpval, msg->fn_flag,tsk);
 
-                                       if (cmm != NULL && null_flag) {
-                                               FRPRINTK(KERN_ALERT "assign the original mm struct back for task {%d}\n",current->pid);
-                                               current->mm = cmm;
-                                       } else if (cmm == NULL && !null_flag) {
-                                               FRPRINTK(KERN_ALERT "assign the null mm struct back {%d} \n",current->pid);
-                                               current->mm = NULL;
-                                       } else {
-                                               FRPRINTK(KERN_ALERT "whatever {%d} \n",current->pid);
-                                               current->mm = NULL;
-                                       }
                                }
                                FRPRINTK(KERN_ALERT "%s:after setting mm to NULL\n",__func__);
 
@@ -567,7 +526,7 @@ mm_exit:
 
                        FRPRINTK(KERN_ALERT"%s:wait --current msg pid{%d} msg->ticket{%d} \n", __func__,msg->pid,msg->ticket);
 
-                       tsk = gettask(msg->tghid, msg->tghid);
+                       tsk = gettask(msg->tghid);
                        if (msg->fn_flags & FLAGS_ORIGINCALL) {
                                msg->fn_flags |= FLAGS_REMOTECALL;
                                ret = global_futex_wait(msg->uaddr, msg->flags, msg->val, 0, 0, msg->pid, tsk,
@@ -625,7 +584,6 @@ static int handle_remote_futex_wake_response(struct pcn_kmsg_message* inc_msg) {
        _local_rq_t *ptr = set_err_request(msg->request_id,msg->errno, &value->_lrq_head);
        // smp_wmb();
        FRPRINTK(KERN_ALERT"%s: errno{%d} p->tgp(%d} \n",__func__,msg->errno,p->tgroup_home_id);
-//     wake_up_interruptible(&ptr->_wq);
 
        put_task_struct(p);
 
@@ -784,7 +742,6 @@ static int handle_remote_futex_key_response(struct pcn_kmsg_message* inc_msg) {
 
        _local_rq_t *ptr = set_err_request(msg->request_id,msg->errno, &value->_lrq_head);
        
-//     wake_up_interruptible(&ptr->_wq);
 
        put_task_struct(p);
 
index 32c0e8d..68ef913 100644 (file)
@@ -100,10 +100,10 @@ int remote_futex_wakeup(u32 __user  *uaddr,unsigned int flags, int nr_wake, u32
 extern struct futex_hash_bucket futex_queues[1<<_FUTEX_HASHBITS];
 extern void get_futex_key_refs(union futex_key *key);
 extern int
-futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset,unsigned int fn_flags);
+futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset,unsigned int fn_flags,struct task_struct *tsk);
 extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
                         u32 __user *uaddr2, int nr_wake, int nr_requeue,
-                        u32 *cmpval, int requeue_pi,unsigned int fn_flags);
+                        u32 *cmpval, int requeue_pi,unsigned int fn_flags, struct task_struct *tsk);
 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
              int nr_wake, int nr_wake2, int op,unsigned int fn_flags,struct task_struct *tsk);
 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
@@ -112,6 +112,10 @@ extern const struct futex_q futex_q_init ;
 extern struct futex_hash_bucket *hash_futex(union futex_key *key);
 extern int
 get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw);
+
+extern int 
+get_futex_key_tsk(u32 __user *uaddr, int fshared, union futex_key *key, int rw, struct task_struct *tsk);
+
 extern int match_futex(union futex_key *key1, union futex_key *key2);
 extern void wake_futex(struct futex_q *q);
 extern void put_futex_key(union futex_key *key);
@@ -149,8 +153,12 @@ struct kernel_robust_list_head {
 #define FLAGS_ORIGINCALL       32
 
 #define FLAGS_MAX      FLAGS_SYSCALL+FLAGS_REMOTECALL+FLAGS_ORIGINCALL+FLAGS_WAKECALL+FLAGS_REQCALL+FLAGS_WAKEOPCALL
-
+//#define FUTEX_STAT
+#undef FUTEX_STAT
 extern struct vm_area_struct * getVMAfromUaddr(unsigned long uaddr);
+int print_wait_perf();
 
-
+int print_wake_perf();
+int print_wakeop_perf();
+int print_requeue_perf();
 #endif /* FUTEX_REMOTE_H_ */
index dc77292..0163e29 100644 (file)
@@ -233,7 +233,7 @@ __releases(&value->_sp)
                wake_req->flags = _data->flags;
 
                wake_req->ticket = localticket_value;//GET_TOKEN; //set the request has no ticket
-               printk(KERN_ALERT"%s: wake uaddr2{%lx} data{%lx} \n",__func__,wake_req->uaddr2,_data->uaddr2);
+//             printk(KERN_ALERT"%s: wake uaddr2{%lx} data{%lx} \n",__func__,wake_req->uaddr2,_data->uaddr2);
        }
 
 
index 9be4d89..c1ad7aa 100644 (file)
@@ -188,7 +188,6 @@ static int ____call_usermodehelper(void *data)
      * Multikernel
      * Handle delegation case
      */
-#ifdef PROCESS_SERVER_USE_KMOD
     if (sub_info->delegated) {
 
         // Copy identity information to current task.
@@ -206,7 +205,6 @@ static int ____call_usermodehelper(void *data)
         // Notify of PID/PID pairing.
         process_server_notify_delegated_subprocess_starting(current->pid,sub_info->remote_pid,sub_info->remote_cpu);
     } 
-#endif
 
        retval = kernel_execve(sub_info->path,
                               (const char *const *)sub_info->argv,
@@ -403,9 +401,7 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
        sub_info->path = path;
        sub_info->argv = argv;
        sub_info->envp = envp;
-#ifdef PROCESS_SERVER_USE_KMOD
     sub_info->delegated = 0;  // multikernel
-#endif
   out:
        return sub_info;
 }
index c1f5d68..067bb30 100644 (file)
@@ -1,4 +1,4 @@
-/*
+/**
  * Implements task migration and maintains coherent 
  * address spaces across CPU cores.
  *
 #include <linux/pcn_kmsg.h> // Messaging
 #include <linux/pcn_perf.h> // performance measurement
 #include <linux/string.h>
-#include <linux/unistd.h>
-#include <linux/tsacct_kern.h>
+
 #include <linux/popcorn_cpuinfo.h>
-#include <linux/syscalls.h>
-#include <linux/kernel.h>
-#include <linux/proc_fs.h>
 
 #include <asm/pgtable.h>
 #include <asm/atomic.h>
@@ -59,7 +55,7 @@ unsigned long get_percpu_old_rsp(void);
 
 // Flag indiciating whether or not to migrate the entire virtual 
 // memory space when a migration occurs.  
-#define COPY_WHOLE_VM_WITH_MIGRATION 0
+#define COPY_WHOLE_VM_WITH_MIGRATION 1
 
 // Flag indicating whether or not to migrate file-backed executable
 // pages when a fault occurs accessing executable memory.  When this
@@ -72,11 +68,6 @@ unsigned long get_percpu_old_rsp(void);
 #define MAX_MAPPINGS 1
 
 extern sys_topen(const char __user * filename, int flags, int mode, int fd);
-// Whether or not to expose a proc entry that we can publish
-// information to.
-//#undef PROCESS_SERVER_HOST_PROC_ENTRY
-#define PROCESS_SERVER_HOST_PROC_ENTRY
-
 /**
  * Use the preprocessor to turn off printk.
  */
@@ -130,8 +121,6 @@ extern sys_topen(const char __user * filename, int flags, int mode, int fd);
 #define PROCESS_SERVER_MM_DATA_TYPE 6
 #define PROCESS_SERVER_THREAD_COUNT_REQUEST_DATA_TYPE 7
 #define PROCESS_SERVER_MPROTECT_DATA_TYPE 8
-#define PROCESS_SERVER_LAMPORT_BARRIER_DATA_TYPE 9
-#define PROCESS_SERVER_STATS_DATA_TYPE 10
 
 /**
  * Useful macros
@@ -141,7 +130,7 @@ extern sys_topen(const char __user * filename, int flags, int mode, int fd);
 /**
  * Perf
  */
-#define MEASURE_PERF 0
+#define MEASURE_PERF 1
 #if MEASURE_PERF
 #define PERF_INIT() perf_init()
 #define PERF_MEASURE_START(x) perf_measure_start(x)
@@ -267,14 +256,10 @@ static void perf_init(void) {
 static DECLARE_WAIT_QUEUE_HEAD( countq);
 
 /**
- * Enums
+ * Constants
  */
-typedef enum _lamport_barrier_state {
-    LAMPORT_ENTRY_OWNED,
-    LAMPORT_ENTRY_OFF_LIMITS,
-    LAMPORT_ENTRY_CONTENDED
-} lamport_barrier_state_t;
-
+#define RETURN_DISPOSITION_EXIT 0
+#define RETURN_DISPOSITION_MIGRATE 1
 
 /**
  * Library
@@ -363,8 +348,6 @@ typedef struct _clone_data {
     unsigned short thread_ds;
     unsigned short thread_fsindex;
     unsigned short thread_gsindex;
-    unsigned long def_flags;
-    unsigned int personality;
     int tgroup_home_cpu;
     int tgroup_home_id;
     int t_home_cpu;
@@ -406,10 +389,6 @@ typedef struct _mapping_request_data {
     unsigned long pgoff;
     spinlock_t lock;
     char path[512];
-    struct semaphore wait_sem;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long wait_time_concluded;
-#endif
 } mapping_request_data_t;
 
 /**
@@ -462,34 +441,6 @@ typedef struct _mprotect_data {
     spinlock_t lock;
 } mprotect_data_t;
 
-typedef struct _get_counter_phys_data {
-    data_header_t header;
-    int response_received;
-    unsigned long resp;
-} get_counter_phys_data_t;
-
-typedef struct _lamport_barrier_entry {
-    data_header_t header;
-    unsigned long long timestamp;
-    int responses;
-    int expected_responses;
-    int allow_responses;
-    int cpu;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long lock_acquired;
-    unsigned long long lock_released;
-#endif
-} lamport_barrier_entry_t;
-
-typedef struct _lamport_barrier_queue {
-    data_header_t header;
-    int tgroup_home_cpu;
-    int tgroup_home_id;
-    unsigned long address;
-    unsigned long long active_timestamp;
-    lamport_barrier_entry_t* queue;
-} lamport_barrier_queue_t;
-
 /**
  * This message is sent to a remote cpu in order to 
  * ask it to spin up a process on behalf of the
@@ -523,8 +474,6 @@ typedef struct _clone_request {
     unsigned short thread_ds;
     unsigned short thread_fsindex;
     unsigned short thread_gsindex;
-    unsigned long def_flags;
-    unsigned int personality;
     int tgroup_home_cpu;
     int tgroup_home_id;
     int t_home_cpu;
@@ -625,15 +574,9 @@ struct _mapping_request {
     int tgroup_home_id;         // 4
     int requester_pid;          // 4
     unsigned long address;      // 8
-    char need_vma;              // 1
                                 // ---
-                                // 21 -> 39 bytes of padding needed
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long send_time;
-    char pad[31];
-#else
-    char pad[39];
-#endif
+                                // 20 -> 40 bytes of padding needed
+    char pad[40];
 
 } __attribute__((packed)) __attribute__((aligned(64)));
 
@@ -658,21 +601,18 @@ typedef struct _thread_group_exited_notification thread_group_exited_notificatio
  */
 struct _mapping_response {
     struct pcn_kmsg_hdr header;
-    int tgroup_home_cpu;                                    // 4 
-    int tgroup_home_id;                                     // 4
-    int requester_pid;                                      // 4
-    unsigned char present;                                  // 1
-    unsigned char from_saved_mm;                            // 1
-    unsigned long address;                                  // 8
-    unsigned long vaddr_start;                              // 8
+    int tgroup_home_cpu;        
+    int tgroup_home_id; 
+    int requester_pid;
+    unsigned char present;      
+    unsigned char from_saved_mm;
+    unsigned long address;      
+    unsigned long vaddr_start;
     unsigned long vaddr_size;
     contiguous_physical_mapping_t mappings[MAX_MAPPINGS];
     pgprot_t prot;              
     unsigned long vm_flags;     
     unsigned long pgoff;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long send_time;
-#endif
     char path[512]; // save to last so we can cut
                     // off data when possible.
 };
@@ -686,18 +626,13 @@ typedef struct _mapping_response mapping_response_t;
  */
 struct _nonpresent_mapping_response {
     struct pcn_kmsg_hdr header;
-    int tgroup_home_cpu;            // 4
-    int tgroup_home_id;             // 4
-    int requester_pid;              // 4
-    unsigned long address;          // 8
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long send_time;   // 8
-                                    // ---
-                                    // 28 -> 32 bytes of padding needed
-    char pad[32];
-#else
+    int tgroup_home_cpu;        // 4
+    int tgroup_home_id;         // 4
+    int requester_pid;            // 4
+    unsigned long address;      // 8
+                                // ---
+                                // 20 -> 40 bytes of padding needed
     char pad[40];
-#endif
 
 } __attribute__((packed)) __attribute__((aligned(64)));
 typedef struct _nonpresent_mapping_response nonpresent_mapping_response_t;
@@ -815,119 +750,6 @@ typedef struct _back_migration {
     unsigned short thread_gsindex;
 } back_migration_t;
 
-/**
- *
- */
-struct _lamport_barrier_request{
-    struct pcn_kmsg_hdr header;
-    int tgroup_home_cpu;            // 4
-    int tgroup_home_id;             // 4
-    unsigned long address;          // 8
-    unsigned long long timestamp;   // 16
-                                    // ---
-                                    // 32 -> 32 bytes of padding needed
-    char pad[32];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _lamport_barrier_request lamport_barrier_request_t;
-
-/**
- *
- */
-struct _lamport_barrier_request_range {
-    struct pcn_kmsg_hdr header;
-    int tgroup_home_cpu;            // 4
-    int tgroup_home_id;             // 4
-    unsigned long address;          // 8
-    size_t sz;                      // 4
-    unsigned long long timestamp;   // 16
-                                    // ---
-                                    // 36 -> 28 bytes of padding needed
-    char pad[28];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _lamport_barrier_request_range lamport_barrier_request_range_t;
-
-/**
- *
- */
-struct _lamport_barrier_response {
-    struct pcn_kmsg_hdr header;
-    int tgroup_home_cpu;            // 4
-    int tgroup_home_id;             // 4
-    unsigned long address;          // 8
-    unsigned long long timestamp;   // 16
-                                    // ---
-                                    // 32 -> 28 bytes of padding needed
-    char pad[28];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _lamport_barrier_response lamport_barrier_response_t;
-
-/**
- *
- */
-struct _lamport_barrier_response_range {
-    struct pcn_kmsg_hdr header;
-    int tgroup_home_cpu;            // 4
-    int tgroup_home_id;             // 4
-    unsigned long address;          // 8
-    size_t sz;                      // 4
-    unsigned long long timestamp;   // 16
-                                    // ---
-                                    // 36 -> 24 bytes of padding needed
-    char pad[24];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _lamport_barrier_response_range lamport_barrier_response_range_t;
-
-/**
- *
- */
-struct _lamport_barrier_release {
-    struct pcn_kmsg_hdr header;
-    int tgroup_home_cpu;            // 4
-    int tgroup_home_id;             // 4
-    unsigned long address;          // 8
-    unsigned long long timestamp;   //16
-                                    // ---
-                                    // 32 -> 28 bytes of padding needed
-    char pad[28];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _lamport_barrier_release lamport_barrier_release_t;
-
-/**
- *
- */
-struct _lamport_barrier_release_range {
-    struct pcn_kmsg_hdr header;
-    int tgroup_home_cpu;            // 4
-    int tgroup_home_id;             // 4
-    unsigned long address;          // 8
-    size_t sz;                      // 4
-    unsigned long long timestamp;   // 16
-                                    // ---
-                                    // 36 -> 24 bytes of padding needed
-    char pad[24];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _lamport_barrier_release_range lamport_barrier_release_range_t;
-
-/**
- *
- */
-struct _get_counter_phys_request {
-    struct pcn_kmsg_hdr header;
-    char pad[60];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _get_counter_phys_request get_counter_phys_request_t;
-
-/**
- *
- */
-struct _get_counter_phys_response {
-    struct pcn_kmsg_hdr header;
-    unsigned long resp;
-    char pad[58];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _get_counter_phys_response get_counter_phys_response_t;
-
-
 /**
  *
  */
@@ -959,14 +781,6 @@ typedef struct {
     unsigned short thread_gsindex;
 } exit_work_t;
 
-/**
- *
- */
-typedef struct {
-    struct work_struct work;
-    clone_data_t* data;
-} import_task_work_t;
-
 /**
  *
  */
@@ -985,7 +799,6 @@ typedef struct {
     int tgroup_home_id;
     int requester_pid;
     unsigned long address;
-    char need_vma;
     int from_cpu;
 } mapping_request_work_t;
 
@@ -1103,85 +916,10 @@ typedef struct {
     unsigned short thread_gsindex;
 } back_migration_work_t;
 
-/**
- *
- */
-typedef struct {
-    struct work_struct work;
-    int tgroup_home_cpu;
-    int tgroup_home_id;
-    int from_cpu;
-    unsigned long address;
-    unsigned long long timestamp;
-} lamport_barrier_request_work_t;
-
-/**
- *
- */
-typedef struct {
-    struct work_struct work;
-    int tgroup_home_cpu;
-    int tgroup_home_id;
-    int from_cpu;
-    unsigned long address;
-    unsigned long long timestamp;
-} lamport_barrier_response_work_t;
-
-/**
- * 
- */
-typedef struct {
-    struct work_struct work;
-    int tgroup_home_cpu;
-    int tgroup_home_id;
-    int from_cpu;
-    unsigned long address;
-    unsigned long long timestamp
-} lamport_barrier_release_work_t;
-
-/**
- *
- */
-typedef struct {
-    struct work_struct work;
-    int tgroup_home_cpu;
-    int tgroup_home_id;
-    int from_cpu;
-    unsigned long address;
-    size_t sz;
-    unsigned long long timestamp;
-} lamport_barrier_request_range_work_t;
-
-/**
- *
- */
-typedef struct {
-    struct work_struct work;
-    int tgroup_home_cpu;
-    int tgroup_home_id;
-    int from_cpu;
-    unsigned long address;
-    size_t sz;
-    unsigned long long timestamp;
-} lamport_barrier_response_range_work_t;
-
-/**
- * 
- */
-typedef struct {
-    struct work_struct work;
-    int tgroup_home_cpu;
-    int tgroup_home_id;
-    int from_cpu;
-    unsigned long address;
-    size_t sz;
-    unsigned long long timestamp
-} lamport_barrier_release_range_work_t;
 
 /**
  * Prototypes
  */
-static void process_import_task(struct work_struct* work);
 static int handle_clone_request(struct pcn_kmsg_message* msg);
 long process_server_clone(unsigned long clone_flags,
                           unsigned long stack_start,                                                                                                                   
@@ -1205,13 +943,7 @@ static void dump_stk(struct thread_struct* thread, unsigned long stack_ptr);
 int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
                spinlock_t *ptl, pte_t orig_pte);
-int do_mprotect(struct task_struct* task, struct mm_struct* mm, unsigned long start, size_t len, unsigned long prot, int do_remote);
-#ifndef PROCESS_SERVER_USE_KMOD
-extern int exec_mmap(struct mm_struct* mm);
-extern void start_remote_thread(struct pt_regs* regs);
-extern void flush_old_files(struct files_struct * files);
-#endif
-static unsigned long get_next_ts_value(void);
+int do_mprotect(struct task_struct* task, unsigned long start, size_t len, unsigned long prot, int do_remote);
 
 /**
  * Module variables
@@ -1236,105 +968,7 @@ DEFINE_SPINLOCK(_vma_id_lock);                    // Lock for _vma_id
 DEFINE_SPINLOCK(_clone_request_id_lock);          // Lock for _clone_request_id
 struct rw_semaphore _import_sem;
 DEFINE_SPINLOCK(_remap_lock);
-data_header_t* _lamport_barrier_queue_head = NULL;
-DEFINE_SPINLOCK(_lamport_barrier_queue_lock);
-unsigned long* ts_counter = NULL;
-get_counter_phys_data_t* get_counter_phys_data = NULL;
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-struct proc_dir_entry *_proc_entry = NULL;
-static void proc_track_data(int entry, unsigned long long time);//proto
-static void proc_data_init();
-typedef struct _proc_data {
-    int count;
-    unsigned long long total;
-    unsigned long long min;
-    unsigned long long max;
-    char name[256];
-} proc_data_t;
-typedef enum _proc_data_index{
-    PS_PROC_DATA_MAPPING_WAIT_TIME=0,
-    PS_PROC_DATA_MAPPING_POST_WAIT_TIME_RESUME,
-    PS_PROC_DATA_MAPPING_REQUEST_SEND_TIME,
-    PS_PROC_DATA_MAPPING_RESPONSE_SEND_TIME,
-    PS_PROC_DATA_MAPPING_REQUEST_DELIVERY_TIME,
-    PS_PROC_DATA_MAPPING_RESPONSE_DELIVERY_TIME,
-    PS_PROC_DATA_MAPPING_REQUEST_PROCESSING_TIME,
-    PS_PROC_DATA_BREAK_COW_TIME,
-    PS_PROC_DATA_FAULT_PROCESSING_TIME,
-    PS_PROC_DATA_ADJUSTED_PERMISSIONS,
-    PS_PROC_DATA_NEWVMA_ANONYMOUS_PTE,
-    PS_PROC_DATA_NEWVMA_ANONYMOUS_NOPTE,
-    PS_PROC_DATA_NEWVMA_FILEBACKED_PTE,
-    PS_PROC_DATA_NEWVMA_FILEBACKED_NOPTE,
-    PS_PROC_DATA_OLDVMA_ANONYMOUS_PTE,
-    PS_PROC_DATA_OLDVMA_ANONYMOUS_NOPTE,
-    PS_PROC_DATA_OLDVMA_FILEBACKED_PTE,
-    PS_PROC_DATA_OLDVMA_FILEBACKED_NOPTE,
-    PS_PROC_DATA_MUNMAP_PROCESSING_TIME,
-    PS_PROC_DATA_MUNMAP_REQUEST_PROCESSING_TIME,
-    PS_PROC_DATA_MPROTECT_PROCESSING_TIME,
-    PS_PROC_DATA_MPROTECT_REQUEST_PROCESSING_TIME,
-    PS_PROC_DATA_EXIT_PROCESSING_TIME,
-    PS_PROC_DATA_EXIT_NOTIFICATION_PROCESSING_TIME,
-    PS_PROC_DATA_GROUP_EXIT_PROCESSING_TIME,
-    PS_PROC_DATA_GROUP_EXIT_NOTIFICATION_PROCESSING_TIME,
-    PS_PROC_DATA_IMPORT_TASK_TIME,
-    PS_PROC_DATA_COUNT_REMOTE_THREADS_PROCESSING_TIME,
-    PS_PROC_DATA_MK_PAGE_WRITABLE,
-    PS_PROC_DATA_WAITING_FOR_LAMPORT_LOCK,
-    PS_PROC_DATA_LAMPORT_LOCK_HELD,
-    PS_PROC_DATA_MAX
-} proc_data_index_t;
-proc_data_t _proc_data[NR_CPUS][PS_PROC_DATA_MAX];
-
-typedef struct proc_xfer {
-    unsigned long long total;
-    int count;
-    unsigned long long min;
-    unsigned long long max;
-} proc_xfer_t;
-
-struct _stats_clear {
-    struct pcn_kmsg_hdr header;
-    char pad[60];
-} __attribute__((packed)) __attribute__((aligned(64)));;
-typedef struct _stats_clear stats_clear_t;
-
-struct _stats_query {
-    struct pcn_kmsg_hdr header;
-    pid_t pid;
-    char pad[56];
-} __attribute__((packed)) __attribute__((aligned(64)));
-typedef struct _stats_query stats_query_t;
-
-struct _stats_response {
-    struct pcn_kmsg_hdr header;
-    pid_t pid;
-    proc_xfer_t data[PS_PROC_DATA_MAX];
-} __attribute__((packed)) __attribute__((aligned(64))); 
-typedef struct _stats_response stats_response_t;
-
-typedef struct _stats_query_data {
-    data_header_t header;
-    int expected_responses;
-    int responses;
-    pid_t pid;
-} stats_query_data_t;
-
-typedef struct {
-    struct work_struct work;
-    int pid;
-    int from_cpu;
-} stats_query_work_t;
-
-#define PS_PROC_DATA_TRACK(x,y) proc_track_data(x,y)
-#define PS_PROC_DATA_INIT() proc_data_init()
 
-#else
-#define PS_PROC_DATA_TRACK(x,y)
-#define PS_PROC_DATA_INIT()
-#endif
 
 // Work Queues
 static struct workqueue_struct *clone_wq;
@@ -1358,7 +992,7 @@ static bool __user_addr (unsigned long x ) {
  */
 static int cpu_has_known_tgroup_mm(int cpu)
 {
-/*#ifdef SUPPORT_FOR_CLUSTERING
+#ifdef SUPPORT_FOR_CLUSTERING
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
     struct cpumask *pcpum =0;
@@ -1382,12 +1016,12 @@ extern struct list_head rlist_head;
     printk(KERN_ERR"%s: ERROR the input cpu (%d) is not included in any known cpu cluster\n",
                __func__, cpu);
     return 0;
-#else*/
+#else
     if(test_bit(cpu,&current->known_cpu_with_tgroup_mm)) {
         return 1;
     }
     return 0;
-//#endif
+#endif
 }
 
 /**
@@ -1448,7 +1082,7 @@ static struct vm_area_struct* find_vma_checked(struct mm_struct* mm, unsigned lo
     return ret;
 
 }*/
-// Antonio's Version
+/* Antonio's Version
 static int is_mapped(struct mm_struct* mm, unsigned vaddr)
 {
     pte_t* pte = NULL;
@@ -1458,22 +1092,19 @@ static int is_mapped(struct mm_struct* mm, unsigned vaddr)
 
     pgd = pgd_offset(mm, vaddr);                                                   
     if (pgd && !pgd_none(*pgd) && likely(!pgd_bad(*pgd)) && pgd_present(*pgd)) {
-        pud = pud_offset(pgd,vaddr);                                               
-        if (pud && !pud_none(*pud) && likely(!pud_bad(*pud)) && pud_present(*pud)) {
-
-            pmd = pmd_offset(pud,vaddr);
-            if(pmd && !pmd_none(*pmd) && likely(!pmd_bad(*pmd)) && pmd_present(*pmd)) {             
-                pte = pte_offset_map(pmd,vaddr);                                   
-                if(pte && !pte_none(*pte) && pte_present(*pte)) { 
+      pud = pud_offset(pgd,vaddr);                                               
+      if (pud && !pud_none(*pud) && likely(!pud_bad(*pud)) && pud_present(*pud)) {
+       pmd = pmd_offset(pud,vaddr);
+        if(pmd && !pmd_none(*pmd) && likely(!pmd_bad(*pmd)) && pmd_present(*pmd)) {                      pte = pte_offset_map(pmd,vaddr);                                   
+         if(pte && !pte_none(*pte) && pte_present(*pte)) { 
                    // It exists!                                                  
                     return 1;
-                }                                                                  
-            }                                                                      
-        }                                                                          
+          }                                                                  
+        }                                                                      
+      }                                                                          
     }
-    return 0;
-}
-
+    return 0;                                                                                  }
+*/
 
 /**
  * @brief Find the mm_struct for a given distributed thread.  
@@ -1496,18 +1127,15 @@ static struct mm_struct* find_thread_mm(
     *task_out = NULL;
 
     // First, look through all active processes.
-    read_lock(&tasklist_lock);
     do_each_thread(g,task) {
         if(task->tgroup_home_cpu == tgroup_home_cpu &&
            task->tgroup_home_id  == tgroup_home_id) {
             mm = task->mm;
             *task_out = task;
             *used_saved_mm = NULL;
-            read_unlock(&tasklist_lock);
             goto out;
         }
     } while_each_thread(g,task);
-    read_unlock(&tasklist_lock);
 
     // Failing that, look through saved mm's.
     spin_lock_irqsave(&_saved_mm_head_lock,lockflags);
@@ -1534,8 +1162,6 @@ out:
     return mm;
 }
 
-
-
 /**
  * @brief A best effort at making a page writable
  * @return void
@@ -1543,11 +1169,6 @@ out:
 static void mk_page_writable(struct mm_struct* mm,
                              struct vm_area_struct* vma,
                              unsigned long vaddr) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    unsigned long long start_time = native_read_tsc();
-#endif
     spinlock_t* ptl;
     pte_t *ptep, pte, entry;
      
@@ -1578,29 +1199,9 @@ static void mk_page_writable(struct mm_struct* mm,
     // Unlock the pte
     pte_unmap_unlock(pte, ptl);
 out:
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MK_PAGE_WRITABLE,total_time);
-#endif
     return;
 }
 
-/**
- *
- */
-static void mk_page_writable_lookupvma(struct mm_struct*mm,
-                             unsigned long addr) {
-    struct vm_area_struct* curr = mm->mmap;
-    while(curr) {
-        if(curr->vm_start <= addr && curr->vm_end > addr) {
-            mk_page_writable(mm,curr,addr);
-            break;
-        }
-        curr = curr->vm_next;
-    }
-}
-
 /**
  * @brief Check to see if a given page is writable.
  * @return 0 if not writable or error, not zero otherwise
@@ -1696,7 +1297,6 @@ static int vm_search_page_walk_pte_entry_callback(pte_t *pte, unsigned long star
     unsigned long* resolved_addr = (unsigned long*)walk->private;
 
     if (pte == NULL || pte_none(*pte) || !pte_present(*pte)) {
-        *resolved_addr = 0;
         return 0;
     }
 
@@ -1758,158 +1358,49 @@ static int is_vaddr_mapped(struct mm_struct* mm, unsigned long vaddr) {
 }
 
 /**
- * @brief Determine if the specified vma can have cow mapings.
- * @return 1 = yes, 0 = no.
+ *  @brief Find the bounds of a physically consecutive mapped region.
+ *  The region must be contained within the specified VMA.
+ *
+ *  Hypothetical page table mappings for a given VMA:
+ *
+ *  *********************************
+ *  *    Vaddr      *   Paddr       *
+ *  *********************************
+ *  * 0x10000000    * 0x12341000    *
+ *  *********************************
+ *  * 0x10001000    * 0x12342000    *
+ *  *********************************
+ *  * 0x10002000    * 0x12343000    *
+ *  *********************************
+ *  * 0x10003000    * 0x43214000    *
+ *  *********************************
+ *  
+ *  This function, given a vaddr of 12342xxx will return:
+ *  *vaddr_mapping_start = 0x10000000
+ *  *paddr_mapping_start = 0x12341000
+ *  *paddr_mapping_sz    = 0x3000
+ *
+ *  Notice 0x10003000 and above is not included in the returned region, as
+ *  its paddr is not consecutive with the previous mappings.
+ *
+ */
+int find_consecutive_physically_mapped_region(struct mm_struct* mm,
+                                              struct vm_area_struct* vma,
+                                              unsigned long vaddr,
+                                              unsigned long* vaddr_mapping_start,
+                                              unsigned long* paddr_mapping_start,
+                                              size_t* paddr_mapping_sz) {
+    unsigned long paddr_curr = NULL;
+    unsigned long vaddr_curr = vaddr;
+    unsigned long vaddr_next = vaddr;
+    unsigned long paddr_next = NULL;
+    unsigned long paddr_start = NULL;
+    size_t sz = 0;
 
-static int is_maybe_cow(struct vm_area_struct* vma) {
-    if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
-        // Not a cow vma
-        return 0;
-    }
-
-    if(!(vma->vm_flags & VM_WRITE)) {
-        return 0;
-    }
-
-    return 1;
-}*/
-
-/**
- * @brief Break the COW page that contains "address", iff that page
- * is a COW page.
- * @return 1 = handled, 0 = not handled.
- * @prerequisite Caller must grab mm->mmap_sem
- */
-static int break_cow(struct mm_struct *mm, struct vm_area_struct* vma, unsigned long address) {
-    pgd_t *pgd = NULL;
-    pud_t *pud = NULL;
-    pmd_t *pmd = NULL;
-    pte_t *ptep = NULL;
-    pte_t pte;
-    spinlock_t* ptl;
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time = 0;
-    unsigned long long total_time = 0;
-    unsigned long long start_time = native_read_tsc();
-#endif
-    //PSPRINTK("%s: entered\n",__func__);
-
-    // if it's not a cow mapping, return.
-    if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
-        goto not_handled;
-    }
-
-    // if it's not writable in vm_flags, return.
-    if(!(vma->vm_flags & VM_WRITE)) {
-        goto not_handled;
-    }
-
-    pgd = pgd_offset(mm, address);
-    if(!pgd_present(*pgd)) {
-        goto not_handled_unlock;
-    }
-
-    pud = pud_offset(pgd,address);
-    if(!pud_present(*pud)) {
-        goto not_handled_unlock;
-    }
-
-    pmd = pmd_offset(pud,address);
-    if(!pmd_present(*pmd)) {
-        goto not_handled_unlock;
-    }
-
-    ptep = pte_offset_map(pmd,address);
-    if(!ptep || !pte_present(*ptep) || pte_none(*ptep)) {
-        pte_unmap(ptep);
-        goto not_handled_unlock;
-    }
-
-    pte = *ptep;
-
-    if(pte_write(pte)) {
-        goto not_handled_unlock;
-    }
-    
-    // break the cow!
-    ptl = pte_lockptr(mm,pmd);
-    PS_SPIN_LOCK(ptl);
-   
-    PSPRINTK("%s: proceeding on address %lx\n",__func__,address);
-    do_wp_page(mm,vma,address,ptep,pmd,ptl,pte);
-
-
-    // NOTE:
-    // Do not call pte_unmap_unlock(ptep,ptl), since do_wp_page does that!
-    
-    goto handled;
-
-not_handled_unlock:
-not_handled:
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_BREAK_COW_TIME,total_time);
-#endif
-    return 0;
-handled:
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_BREAK_COW_TIME,total_time);
-#endif
-    return 1;
-}
-
-/**
- *  @brief Find the bounds of a physically consecutive mapped region.
- *  The region must be contained within the specified VMA.
- *
- *  Hypothetical page table mappings for a given VMA:
- *
- *  *********************************
- *  *    Vaddr      *   Paddr       *
- *  *********************************
- *  * 0x10000000    * 0x12341000    *
- *  *********************************
- *  * 0x10001000    * 0x12342000    *
- *  *********************************
- *  * 0x10002000    * 0x12343000    *
- *  *********************************
- *  * 0x10003000    * 0x43214000    *
- *  *********************************
- *  
- *  This function, given a vaddr of 12342xxx will return:
- *  *vaddr_mapping_start = 0x10000000
- *  *paddr_mapping_start = 0x12341000
- *  *paddr_mapping_sz    = 0x3000
- *
- *  Notice 0x10003000 and above is not included in the returned region, as
- *  its paddr is not consecutive with the previous mappings.
- *
- */
-int find_consecutive_physically_mapped_region(struct mm_struct* mm,
-                                              struct vm_area_struct* vma,
-                                              unsigned long vaddr,
-                                              unsigned long* vaddr_mapping_start,
-                                              unsigned long* paddr_mapping_start,
-                                              size_t* paddr_mapping_sz,
-                                              int br_cow) {
-    unsigned long paddr_curr = NULL;
-    unsigned long vaddr_curr = vaddr;
-    unsigned long vaddr_next = vaddr;
-    unsigned long paddr_next = NULL;
-    unsigned long paddr_start = NULL;
-    size_t sz = 0;
-
-    
-    // Initializes paddr_curr
-    if(br_cow) {
-        break_cow(mm,vma,vaddr_curr);
-    }
-    if(get_physical_address(mm,vaddr_curr,&paddr_curr) < 0) {
-        return -1;
+    
+    // Initializes paddr_curr
+    if(get_physical_address(mm,vaddr_curr,&paddr_curr) < 0) {
+        return -1;
     }
     paddr_start = paddr_curr;
     *vaddr_mapping_start = vaddr_curr;
@@ -1928,10 +1419,6 @@ int find_consecutive_physically_mapped_region(struct mm_struct* mm,
             break;
         }
 
-        if(br_cow) {
-            break_cow(mm,vma,vaddr_next);
-        }
-
         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
             break;
         }
@@ -1957,10 +1444,6 @@ int find_consecutive_physically_mapped_region(struct mm_struct* mm,
             break;
         }
 
-        if(br_cow) {
-            break_cow(mm,vma,vaddr_next);
-        }
-
         if(get_physical_address(mm,vaddr_next,&paddr_next) < 0) {
             break;
         }
@@ -1997,8 +1480,7 @@ int find_prev_consecutive_physically_mapped_region(struct mm_struct* mm,
                                               unsigned long vaddr,
                                               unsigned long* vaddr_mapping_start,
                                               unsigned long* paddr_mapping_start,
-                                              size_t* paddr_mapping_sz,
-                                              int break_cow) {
+                                              size_t* paddr_mapping_sz) {
     unsigned long curr_vaddr_mapping_start;
     unsigned long curr_paddr_mapping_start;
     unsigned long curr_paddr_mapping_sz;
@@ -2013,8 +1495,7 @@ int find_prev_consecutive_physically_mapped_region(struct mm_struct* mm,
                                                      curr_vaddr,
                                                      &curr_vaddr_mapping_start,
                                                      &curr_paddr_mapping_start,
-                                                     &curr_paddr_mapping_sz,
-                                                     break_cow);
+                                                     &curr_paddr_mapping_sz);
         if(0 == res) {
 
             // this is a match, we can store off results and exit
@@ -2041,8 +1522,7 @@ int find_next_consecutive_physically_mapped_region(struct mm_struct* mm,
                                               unsigned long vaddr,
                                               unsigned long* vaddr_mapping_start,
                                               unsigned long* paddr_mapping_start,
-                                              size_t* paddr_mapping_sz,
-                                              int break_cow) {
+                                              size_t* paddr_mapping_sz) {
     unsigned long curr_vaddr_mapping_start;
     unsigned long curr_paddr_mapping_start;
     unsigned long curr_paddr_mapping_sz;
@@ -2057,8 +1537,7 @@ int find_next_consecutive_physically_mapped_region(struct mm_struct* mm,
                                                      curr_vaddr,
                                                      &curr_vaddr_mapping_start,
                                                      &curr_paddr_mapping_start,
-                                                     &curr_paddr_mapping_sz,
-                                                     break_cow);
+                                                     &curr_paddr_mapping_sz);
         if(0 == res) {
 
             // this is a match, we can store off results and exit
@@ -2084,8 +1563,7 @@ int fill_physical_mapping_array(struct mm_struct* mm,
         struct vm_area_struct* vma,
         unsigned long address,
         contiguous_physical_mapping_t* mappings, 
-        int arr_sz,
-        int break_cow) {
+        int arr_sz) {
     int i;
     unsigned long next_vaddr = address & PAGE_MASK;
     int ret = -1;
@@ -2102,8 +1580,7 @@ int fill_physical_mapping_array(struct mm_struct* mm,
                                             next_vaddr,
                                             &mappings[i].vaddr,
                                             &mappings[i].paddr,
-                                            &mappings[i].sz,
-                                            break_cow);
+                                            &mappings[i].sz);
 
 
         if(valid_mapping == 0) {
@@ -2138,8 +1615,7 @@ int fill_physical_mapping_array(struct mm_struct* mm,
                                             next_vaddr,
                                             &mappings[i].vaddr,
                                             &mappings[i].paddr,
-                                            &mappings[i].sz,
-                                            break_cow);
+                                            &mappings[i].sz);
             if(valid_mapping == 0) {
                 PSPRINTK("%s: supplying a mapping in slot %d\n",__func__,i);
                 mappings[i].present = 1;
@@ -2226,8 +1702,7 @@ int remap_pfn_range_remaining(struct mm_struct* mm,
     for(vaddr_curr = vaddr_start; 
         vaddr_curr < vaddr_start + sz; 
         vaddr_curr += PAGE_SIZE) {
-        //if( !(val = is_vaddr_mapped(mm,vaddr_curr)) ) {
-        if(!is_vaddr_mapped(mm,vaddr_curr)) {
+        if( !(val = is_vaddr_mapped(mm,vaddr_curr)) ) {
             //PSPRINTK("%s: mapping vaddr{%lx} paddr{%lx}\n",__func__,vaddr_curr,paddr_curr);
             // not mapped - map it
             err = remap_pfn_range(vma,
@@ -2236,8 +1711,6 @@ int remap_pfn_range_remaining(struct mm_struct* mm,
                                   PAGE_SIZE,
                                   prot);
             if(err == 0) {
-                PSPRINTK("%s: succesfully mapped vaddr{%lx} to paddr{%lx}\n",
-                            __func__,vaddr_curr,paddr_curr);
                 if(make_writable && vma->vm_flags & VM_WRITE) {
                     mk_page_writable(mm, vma, vaddr_curr);
                 }
@@ -2247,10 +1720,10 @@ int remap_pfn_range_remaining(struct mm_struct* mm,
             }
 
             if( err != 0 ) ret = err;
-        } else {
-               PSPRINTK("%s: is_vaddr_mapped %d, star:%lx end:%lx\n",
-                       __func__, val, vma->vm_start, vma->vm_end);
         }
+       else
+           PSPRINTK("%s: is_vaddr_mapped %d, star:%lx end:%lx\n",
+                   __func__, val, vma->vm_start, vma->vm_end);
 
         paddr_curr += PAGE_SIZE;
     }
@@ -2451,8 +1924,7 @@ static void send_vma(struct mm_struct* mm,
                     curr,
                     &vaddr_resolved,
                     &paddr_resolved,
-                    &sz_resolved,
-                    0)) {
+                    &sz_resolved)) {
             // None more, exit
             break;
         } else {
@@ -2658,140 +2130,6 @@ static void dump_clone_data(clone_data_t* r) {
     }
 }
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-/**
- * @brief Finds a stats_query data entry.
- * @return Either a stats entry or NULL if one is not found
- * that satisfies the parameter requirements.
- */
-static stats_query_data_t* find_stats_query_data(pid_t pid) {
-    data_header_t* curr = NULL;
-    stats_query_data_t* query = NULL;
-    stats_query_data_t* ret = NULL;
-    PS_SPIN_LOCK(&_data_head_lock);
-    
-    curr = _data_head;
-    while(curr) {
-        if(curr->data_type == PROCESS_SERVER_STATS_DATA_TYPE) {
-            query = (stats_query_data_t*)curr;
-            if(query->pid == pid) {
-                ret = query;
-                break;
-            }
-        }
-        curr = curr->next;
-    }
-
-    PS_SPIN_UNLOCK(&_data_head_lock);
-
-    return ret;
-}
-#endif
-
-/**
- * Queue lock must already be held.
- */
-static void add_fault_entry_to_queue(lamport_barrier_entry_t* entry,
-                                     lamport_barrier_queue_t* queue)
-{
-    lamport_barrier_entry_t* curr = queue->queue;
-    lamport_barrier_entry_t* last = NULL;
-
-    entry->header.next = NULL;
-    entry->header.prev = NULL;
-
-    // Take care of the "empty" scenario first because it's easy.
-    if(!queue->queue) {
-        queue->queue = entry;
-        return;
-    }
-
-    // Next take care of the scenario where we have to replace
-    // the first entry
-    if(queue->queue->timestamp > entry->timestamp) {
-        queue->queue->header.prev = (data_header_t*)entry;
-        entry->header.next = (data_header_t*)queue->queue;
-        queue->queue = entry;
-        return;
-    }
-
-    // Now we have to iterate, but we know that we don't
-    // have to change the value of queue->queue.
-    while(curr) {
-        if(curr->timestamp > entry->timestamp) {
-            curr->header.prev->next = (data_header_t*)entry;
-            entry->header.prev = curr->header.prev;
-            curr->header.prev = (data_header_t*)entry;
-            entry->header.next = (data_header_t*)curr;
-            return;
-        }
-        last = curr;
-        curr = (lamport_barrier_entry_t*)curr->header.next;
-    }
-
-    // It must be the last entry then
-    if(last) {
-        last->header.next = (data_header_t*)entry;
-        entry->header.prev = (data_header_t*)last;
-    }
-
-}
-
-
-/**
- * @brief Find a fault barrier data entry.
- * @return Either a data entry, or NULL if one does 
- * not exist that satisfies the parameter requirements.
- */
-static lamport_barrier_queue_t* find_lamport_barrier_queue(int tgroup_home_cpu, 
-        int tgroup_home_id, unsigned long address) {
-
-    data_header_t* curr = NULL;
-    lamport_barrier_queue_t* entry = NULL;
-    lamport_barrier_queue_t* ret = NULL;
-
-    curr = (data_header_t*)_lamport_barrier_queue_head;
-    while(curr) {
-        entry = (lamport_barrier_queue_t*)curr;
-        if(entry->tgroup_home_cpu == tgroup_home_cpu &&
-           entry->tgroup_home_id == tgroup_home_id &&
-           entry->address == address) {
-            ret = entry;
-            break;
-        }
-        curr = curr->next;
-    }
-
-    return ret;
-}
-
-static lamport_barrier_entry_t* find_lamport_barrier_entry(int cpu,
-        int tgroup_home_cpu,
-        int tgroup_home_id, 
-        unsigned long address)
-{
-    lamport_barrier_queue_t* queue = find_lamport_barrier_queue(
-                                        tgroup_home_cpu,
-                                        tgroup_home_id,
-                                        address);
-    if(!queue) {
-        goto exit;
-    }
-
-    lamport_barrier_entry_t* curr = NULL;
-    lamport_barrier_entry_t* ret = NULL;
-    curr = queue->queue;
-    while(curr) {
-        if(curr->cpu == cpu) {
-            ret = curr;
-            goto exit;
-        }
-        curr = curr->header.next;
-    }
-exit:
-    return ret;
-}
-
 /**
  * @brief Find a thread count data entry.
  * @return Either a thread count request data entry, or NULL if one does 
@@ -3057,12 +2395,12 @@ static int dump_page_walk_pte_entry_callback(pte_t *pte, unsigned long start,
  */
 static void dump_mm(struct mm_struct* mm) {
     struct vm_area_struct * curr;
+    char buf[256];
     struct mm_walk walk = {
         .pte_entry = dump_page_walk_pte_entry_callback,
         .mm = mm,
         .private = NULL
         };
-    char buf[256];
 
     if(NULL == mm) {
         PSPRINTK("MM IS NULL!\n");
@@ -3124,7 +2462,7 @@ static void add_data_entry_to(void* entry, spinlock_t* lock, data_header_t** hea
     hdr->next = NULL;
     hdr->prev = NULL;
 
-    if(lock)PS_SPIN_LOCK(lock);
+    PS_SPIN_LOCK(lock);
     
     if (!*head) {
         *head = hdr;
@@ -3145,7 +2483,7 @@ static void add_data_entry_to(void* entry, spinlock_t* lock, data_header_t** hea
         hdr->prev = curr;
     }
 
-    if(lock)PS_SPIN_UNLOCK(lock);
+    PS_SPIN_UNLOCK(lock);
 }
 
 /**
@@ -3316,11 +2654,6 @@ static int count_remote_thread_members(int exclude_t_home_cpu,
     int ret = -1;
     int perf = -1;
     unsigned long lockflags;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    unsigned long long start_time = native_read_tsc();
-#endif
 
     perf = PERF_MEASURE_START(&perf_count_remote_thread_members);
 
@@ -3352,14 +2685,14 @@ static int count_remote_thread_members(int exclude_t_home_cpu,
     for(i = 0; i < NR_CPUS; i++) {
         // Skip the current cpu
         if(i == _cpu) continue;
-/*#else
+#else
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
-    extern struct list_head rlist_head;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-        i = objPtr->_data._processor;*/
+        i = objPtr->_data._processor;
 #endif
         // Send the request to this cpu.
         s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&request));
@@ -3391,13 +2724,6 @@ static int count_remote_thread_members(int exclude_t_home_cpu,
     kfree(data);
 
 exit:
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_COUNT_REMOTE_THREADS_PROCESSING_TIME,total_time);
-#endif
-
     PERF_MEASURE_STOP(&perf_count_remote_thread_members," ",perf);
     return ret;
 }
@@ -3412,7 +2738,6 @@ static int count_local_thread_members(int tgroup_home_cpu,
     struct task_struct *task, *g;
     int count = 0;
     PSPRINTK("%s: entered\n",__func__);
-    read_lock(&tasklist_lock);
     do_each_thread(g,task) {
         if(task->tgroup_home_id == tgroup_home_id &&
            task->tgroup_home_cpu == tgroup_home_cpu &&
@@ -3426,7 +2751,6 @@ static int count_local_thread_members(int tgroup_home_cpu,
             
         }
     } while_each_thread(g,task);
-    read_unlock(&tasklist_lock);
     PSPRINTK("%s: exited\n",__func__);
 
     return count;
@@ -3461,8 +2785,8 @@ static int count_thread_members() {
 void process_tgroup_closed_item(struct work_struct* work) {
 
     tgroup_closed_work_t* w = (tgroup_closed_work_t*) work;
-    data_header_t *curr;
-    mm_data_t* mm_data = NULL;
+    data_header_t *curr, *next;
+    mm_data_t* mm_data;
     struct task_struct *g, *task;
     unsigned char tgroup_closed = 0;
     int perf = -1;
@@ -3476,23 +2800,21 @@ void process_tgroup_closed_item(struct work_struct* work) {
     PSPRINTK("%s: waiting for all members of this distributed thread group to finish\n",__func__);
     while(!tgroup_closed) {
         unsigned char pass = 0;
-        read_lock(&tasklist_lock);
         do_each_thread(g,task) {
             if(task->tgroup_home_cpu == w->tgroup_home_cpu &&
                task->tgroup_home_id  == w->tgroup_home_id) {
+                
                 // there are still living tasks within this distributed thread group
                 // wait a bit
+                schedule();
                 pass = 1;
-                goto pass_complete;
             }
+
         } while_each_thread(g,task);
-pass_complete:
-        read_unlock(&tasklist_lock);
         if(!pass) {
             tgroup_closed = 1;
         } else {
             PSPRINTK("%s: waiting for tgroup close out\n",__func__);
-            schedule();
         }
     }
 
@@ -3549,54 +2871,118 @@ static int is_maybe_cow(struct vm_area_struct* vma) {
 }
 
 /**
- * @brief Process a request made by a remote CPU for a mapping.  This function
- * will search for mm's for the specified distributed thread group, and if found,
- * will search that mm for entries that contain the address that was asked for.
- * Prefetch is implemented in this function, so not only will the page that
- * is asked for be communicated, but the entire contiguous range of virtual to
- * physical addresses that the specified address lives in will be communicated.
- * Other contiguous regions may also be communicated if they exist.  This is
- * prefetch.
- *
- * <MEASURED perf_process_mapping_request>
+ * @brief Break the COW page that contains "address", iff that page
+ * is a COW page.
+ * @return 1 = handled, 0 = not handled.
+ * @prerequisite Caller must grab mm->mmap_sem
  */
-void process_mapping_request(struct work_struct* work) {
-    mapping_request_work_t* w = (mapping_request_work_t*) work;
-    mapping_response_t response;
-    data_header_t* data_curr = NULL;
-    mm_data_t* mm_data = NULL;
-    struct task_struct* task = NULL;
-    struct task_struct* g;
-    struct vm_area_struct* vma = NULL;
-    struct mm_struct* mm = NULL;
-    unsigned long address = w->address;
-    unsigned long resolved = 0;
-    struct mm_walk walk = {
-        .pte_entry = vm_search_page_walk_pte_entry_callback,
-        .private = &(resolved)
-    };
-    char* plpath = NULL;
-    char lpath[512];
-    int i;
-    
-    // for perf
-    int used_saved_mm = 0;
-    int found_vma = 1;
-    int found_pte = 1;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long mapping_response_send_time_start = 0;
-    unsigned long long mapping_response_send_time_end = 0;
-    unsigned long long mapping_request_processing_time_start = native_read_tsc();
-    unsigned long long mapping_request_processing_time_end = 0;
-#endif
-    
-    // Perf start
-    int perf = PERF_MEASURE_START(&perf_process_mapping_request);
-
-    current->enable_distributed_munmap = 0;
-    current->enable_do_mmap_pgoff_hook = 0;
-
-    //PSPRINTK("%s: entered\n",__func__);
+static int break_cow(struct mm_struct *mm, struct vm_area_struct* vma, unsigned long address) {
+    pgd_t *pgd = NULL;
+    pud_t *pud = NULL;
+    pmd_t *pmd = NULL;
+    pte_t *ptep = NULL;
+    pte_t pte;
+    spinlock_t* ptl;
+
+    //PSPRINTK("%s: entered\n",__func__);
+
+    // if it's not a cow mapping, return.
+    if((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) {
+        goto not_handled;
+    }
+
+    // if it's not writable in vm_flags, return.
+    if(!(vma->vm_flags & VM_WRITE)) {
+        goto not_handled;
+    }
+
+    pgd = pgd_offset(mm, address);
+    if(!pgd_present(*pgd)) {
+        goto not_handled_unlock;
+    }
+
+    pud = pud_offset(pgd,address);
+    if(!pud_present(*pud)) {
+        goto not_handled_unlock;
+    }
+
+    pmd = pmd_offset(pud,address);
+    if(!pmd_present(*pmd)) {
+        goto not_handled_unlock;
+    }
+
+    ptep = pte_offset_map(pmd,address);
+    if(!ptep || !pte_present(*ptep) || pte_none(*ptep)) {
+        pte_unmap(ptep);
+        goto not_handled_unlock;
+    }
+
+    pte = *ptep;
+
+    if(pte_write(pte)) {
+        goto not_handled_unlock;
+    }
+    
+    // break the cow!
+    ptl = pte_lockptr(mm,pmd);
+    PS_SPIN_LOCK(ptl);
+   
+    PSPRINTK("%s: proceeding\n",__func__);
+    do_wp_page(mm,vma,address,ptep,pmd,ptl,pte);
+
+
+    // NOTE:
+    // Do not call pte_unmap_unlock(ptep,ptl), since do_wp_page does that!
+    
+    goto handled;
+
+not_handled_unlock:
+not_handled:
+    return 0;
+handled:
+    return 1;
+}
+
+/**
+ * @brief Process a request made by a remote CPU for a mapping.  This function
+ * will search for mm's for the specified distributed thread group, and if found,
+ * will search that mm for entries that contain the address that was asked for.
+ * Prefetch is implemented in this function, so not only will the page that
+ * is asked for be communicated, but the entire contiguous range of virtual to
+ * physical addresses that the specified address lives in will be communicated.
+ * Other contiguous regions may also be communicated if they exist.  This is
+ * prefetch.
+ *
+ * <MEASURED perf_process_mapping_request>
+ */
+void process_mapping_request(struct work_struct* work) {
+    mapping_request_work_t* w = (mapping_request_work_t*) work;
+    mapping_response_t response;
+    data_header_t* data_curr = NULL;
+    mm_data_t* mm_data = NULL;
+    struct task_struct* task = NULL;
+    struct task_struct* g;
+    struct vm_area_struct* vma = NULL;
+    struct mm_struct* mm = NULL;
+    unsigned long address = w->address;
+    unsigned long resolved = 0;
+    struct mm_walk walk = {
+        .pte_entry = vm_search_page_walk_pte_entry_callback,
+        .private = &(resolved)
+    };
+    char* plpath = NULL;
+    char lpath[512];
+    int i;
+    
+    // for perf
+    int used_saved_mm = 0;
+    int found_vma = 1;
+    int found_pte = 1;
+    
+    // Perf start
+    int perf = PERF_MEASURE_START(&perf_process_mapping_request);
+
+    //PSPRINTK("%s: entered\n",__func__);
     PSPRINTK("received mapping request from {%d} address{%lx}, cpu{%d}, id{%d}\n",
             w->from_cpu,
             w->address,
@@ -3604,7 +2990,6 @@ void process_mapping_request(struct work_struct* work) {
             w->tgroup_home_id);
 
     // First, search through existing processes
-    read_lock(&tasklist_lock);
     do_each_thread(g,task) {
         if((task->tgroup_home_cpu == w->tgroup_home_cpu) &&
            (task->tgroup_home_id  == w->tgroup_home_id )) {
@@ -3618,7 +3003,6 @@ void process_mapping_request(struct work_struct* work) {
         }
     } while_each_thread(g,task);
 task_mm_search_exit:
-    read_unlock(&tasklist_lock);
 
     // Failing the process search, look through saved mm's.
     if(!mm) {
@@ -3685,14 +3069,24 @@ changed_can_be_cow:
              * if possible.
              */
             {
+            // Break all cows in this vma
+            if(can_be_cow) {
+                unsigned long cow_addr;
+                for(cow_addr = vma->vm_start; cow_addr < vma->vm_end; cow_addr += PAGE_SIZE) {
+                    break_cow(mm, vma, cow_addr);
+                }
+                // We no longer need a write lock after the break_cow process
+                // is complete, so downgrade the lock to a read lock.
+                downgrade_write(&mm->mmap_sem);
+            }
+
 
             // Now grab all the mappings that we can stuff into the response.
             if(0 != fill_physical_mapping_array(mm, 
                                                 vma,
                                                 address,
                                                 &response.mappings, 
-                                                MAX_MAPPINGS,
-                                                can_be_cow)) {
+                                                MAX_MAPPINGS)) {
                 // If the fill process fails, clear out all
                 // results.  Otherwise, we might trick the
                 // receiving cpu into thinking the target
@@ -3706,10 +3100,6 @@ changed_can_be_cow:
                     
             }
 
-            if(can_be_cow) {
-                downgrade_write(&mm->mmap_sem);
-            }
-
             }
 
             response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
@@ -3723,10 +3113,9 @@ changed_can_be_cow:
             response.vaddr_size = vma->vm_end - vma->vm_start;
             response.prot = vma->vm_page_prot;
             response.vm_flags = vma->vm_flags;
-            if(vma->vm_file == NULL || !w->need_vma) {
+            if(vma->vm_file == NULL) {
                 response.path[0] = '\0';
             } else {    
-         
                 plpath = d_path(&vma->vm_file->f_path,lpath,512);
                 strcpy(response.path,plpath);
                 response.pgoff = vma->vm_pgoff;
@@ -3772,12 +3161,7 @@ changed_can_be_cow:
         response.path[0] = '\0';
 
         // Handle case where vma was present but no pte.
-        // Optimization, if no pte, and it is specified not to
-        // send the path, we can instead report that the mapping
-        // was not found at all.  This will result in sending a 
-        // nonpresent_mapping_response_t, which is much smaller
-        // than a mapping_response_t.
-        if(vma && w->need_vma) {
+        if(vma) {
             //PSPRINTK("But vma present\n");
             found_vma = 1;
             response.present = 1;
@@ -3785,7 +3169,7 @@ changed_can_be_cow:
             response.vaddr_size = vma->vm_end - vma->vm_start;
             response.prot = vma->vm_page_prot;
             response.vm_flags = vma->vm_flags;
-             if(vma->vm_file == NULL || !w->need_vma) {
+             if(vma->vm_file == NULL) {
                  response.path[0] = '\0';
              } else {    
                  plpath = d_path(&vma->vm_file->f_path,lpath,512);
@@ -3797,19 +3181,12 @@ changed_can_be_cow:
 
     // Send response
     if(response.present) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        mapping_response_send_time_start = native_read_tsc();
-        response.send_time = mapping_response_send_time_start;
-#endif
         DO_UNTIL_SUCCESS(pcn_kmsg_send_long(w->from_cpu,
                             (struct pcn_kmsg_long_message*)(&response),
                             sizeof(mapping_response_t) - 
                             sizeof(struct pcn_kmsg_hdr) -   //
                             sizeof(response.path) +         // Chop off the end of the path
                             strlen(response.path) + 1));    // variable to save bandwidth.
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        mapping_response_send_time_end = native_read_tsc();
-#endif
     } else {
         // This is an optimization to get rid of the _long send 
         // which is a time sink.
@@ -3820,24 +3197,9 @@ changed_can_be_cow:
         nonpresent_response.tgroup_home_id  = w->tgroup_home_id;
         nonpresent_response.requester_pid = w->requester_pid;
         nonpresent_response.address = w->address;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        mapping_response_send_time_start = native_read_tsc();
-        nonpresent_response.send_time = mapping_response_send_time_start;
-#endif
-
         DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,(struct pcn_kmsg_message*)(&nonpresent_response)));
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        mapping_response_send_time_end = native_read_tsc();
-#endif
-
     }
-    
-    // proc
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_RESPONSE_SEND_TIME,
-            mapping_response_send_time_end - mapping_response_send_time_start);
-#endif
 
     kfree(work);
 
@@ -3870,20 +3232,6 @@ changed_can_be_cow:
         PERF_MEASURE_STOP(&perf_process_mapping_request,"ERR",perf);
     }
 
-    current->enable_distributed_munmap = 1;
-    current->enable_do_mmap_pgoff_hook = 1;
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    {
-    unsigned long long mapping_request_processing_time;
-    mapping_request_processing_time_end = native_read_tsc();
-    mapping_request_processing_time = mapping_request_processing_time_end - 
-                                        mapping_request_processing_time_start;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_REQUEST_PROCESSING_TIME,
-            mapping_request_processing_time);
-    }
-#endif
-
     return;
 }
 
@@ -3904,11 +3252,6 @@ void process_exit_item(struct work_struct* work) {
     struct task_struct *task = w->task;
 
     int perf = PERF_MEASURE_START(&perf_process_exit_item);
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    unsigned long long start_time = native_read_tsc();
-#endif
 
     if(unlikely(!task)) {
         printk("%s: ERROR - empty task\n",__func__);
@@ -3942,12 +3285,6 @@ void process_exit_item(struct work_struct* work) {
     kfree(work);
 
     PERF_MEASURE_STOP(&perf_process_exit_item," ",perf);
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_EXIT_NOTIFICATION_PROCESSING_TIME,total_time);
-#endif
 }
 
 /**
@@ -3966,11 +3303,6 @@ void process_group_exit_item(struct work_struct* work) {
     struct task_struct *task = NULL;
     struct task_struct *g;
     unsigned long flags;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    unsigned long long start_time = native_read_tsc();
-#endif
 
     //int perf = PERF_MEASURE_START(&perf_process_group_exit_item);
     PSPRINTK("%s: entered\n",__func__);
@@ -4008,12 +3340,6 @@ void process_group_exit_item(struct work_struct* work) {
     PSPRINTK("%s: exiting\n",__func__);
     //PERF_MEASURE_STOP(&perf_process_group_exit_item," ",perf);
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_GROUP_EXIT_NOTIFICATION_PROCESSING_TIME,total_time);
-#endif
-
 }
 
 
@@ -4031,19 +3357,12 @@ void process_munmap_request(struct work_struct* work) {
     data_header_t *curr = NULL;
     mm_data_t* mm_data = NULL;
     mm_data_t* to_munmap = NULL;
-    struct mm_struct* mm_to_munmap = NULL;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    unsigned long long start_time = native_read_tsc();
-#endif
+    struct mm_struct * mm_to_munmap = NULL;
+
     int perf = PERF_MEASURE_START(&perf_process_munmap_request);
 
     PSPRINTK("%s: entered\n",__func__);
 
-    current->enable_distributed_munmap = 0;
-    current->enable_do_mmap_pgoff_hook = 0;
-
     // munmap the specified region in the specified thread group
     read_lock(&tasklist_lock);
     do_each_thread(g,task) {
@@ -4053,28 +3372,32 @@ void process_munmap_request(struct work_struct* work) {
            task->tgroup_home_id  == w->tgroup_home_id &&
            !(task->flags & PF_EXITING)) {
 
+            // Thread group has been found, perform munmap operation on this
+            // task.
+        if (task && task->mm ) {
+           mm_to_munmap =task->mm;
+       }
+       else
+               printk("%s: pirla\n", __func__);
+
+       // TODO try and check if make sense
             // Take note of the fact that an mm exists on the remote kernel
             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
-            
-            if (task->mm) {
-                mm_to_munmap = task->mm;
-            }
-            else
-                printk("%s: pirla\n", __func__);
 
-            goto done; 
+            goto done; // thread grouping - threads all share a common mm.
+
         }
     } while_each_thread(g,task);
 done:
     read_unlock(&tasklist_lock);
 
-    if(mm_to_munmap) {
-        PS_DOWN_WRITE(&mm_to_munmap->mmap_sem);
-        do_munmap(mm_to_munmap, w->vaddr_start, w->vaddr_size);
-        PS_UP_WRITE(&mm_to_munmap->mmap_sem);
-    }
-
-
+      if(mm_to_munmap) {
+        PS_DOWN_WRITE(&task->mm->mmap_sem);
+        current->enable_distributed_munmap = 0;
+        do_munmap(mm_to_munmap, w->vaddr_start, w->vaddr_size);
+        current->enable_distributed_munmap = 1;
+        PS_UP_WRITE(&task->mm->mmap_sem);
+        }
     // munmap the specified region in any saved mm's as well.
     // This keeps old mappings saved in the mm of dead thread
     // group members from being resolved accidentally after
@@ -4098,7 +3421,9 @@ found:
 
     if (to_munmap && to_munmap->mm) {
         PS_DOWN_WRITE(&to_munmap->mm->mmap_sem);
+        current->enable_distributed_munmap = 0;
         do_munmap(to_munmap->mm, w->vaddr_start, w->vaddr_size);
+        current->enable_distributed_munmap = 1;
         if (to_munmap && to_munmap->mm)
             PS_UP_WRITE(&to_munmap->mm->mmap_sem);
         else
@@ -4120,17 +3445,8 @@ found:
     DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,
                         (struct pcn_kmsg_message*)(&response)));
 
-    current->enable_distributed_munmap = 1;
-    current->enable_do_mmap_pgoff_hook = 1;
-    
     kfree(work);
     
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MUNMAP_REQUEST_PROCESSING_TIME,total_time);
-#endif
-
     PERF_MEASURE_STOP(&perf_process_munmap_request," ",perf);
 }
 
@@ -4153,24 +3469,14 @@ void process_mprotect_item(struct work_struct* work) {
     data_header_t* curr = NULL;
     mm_data_t* mm_data = NULL;
     mm_data_t* to_munmap = NULL;
-    struct mm_struct *mm_to_munmap = NULL;
+    struct mm_structmm_to_munmap = NULL;
 
     int perf = PERF_MEASURE_START(&perf_process_mprotect_item);
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    unsigned long long start_time = native_read_tsc();
-#endif
-   
-    current->enable_distributed_munmap = 0;
-    current->enable_do_mmap_pgoff_hook = 0;
-
+    
     // Find the task
     read_lock(&tasklist_lock);
     do_each_thread(g,task) {
-
-        // Look for the thread group
+//     task_lock(task); // TODO consider to use this
         if (task->tgroup_home_cpu == tgroup_home_cpu &&
             task->tgroup_home_id  == tgroup_home_id &&
             !(task->flags & PF_EXITING)) {
@@ -4193,22 +3499,22 @@ void process_mprotect_item(struct work_struct* work) {
            // Take note of the fact that an mm exists on the remote kernel
             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
 
-            if(task->mm) {
-                mm_to_munmap = task->mm;
-            }
-            else
-                printk("%s: pirla\n",__func__);
-            
+            // then quit
             goto done;
         }
+//     task_unlock(task); // TODO consider to use this
     } while_each_thread(g,task);
 done:
     read_unlock(&tasklist_lock);
 
-    if(mm_to_munmap) {
-        do_mprotect(task,mm_to_munmap,start,len,prot,0);
-        goto early_exit;
-    }
+      if(mm_to_munmap) {
+        PS_DOWN_WRITE(&task->mm->mmap_sem);
+        current->enable_distributed_munmap = 0;
+        do_munmap(mm_to_munmap, start, len);
+        current->enable_distributed_munmap = 1;
+        PS_UP_WRITE(&task->mm->mmap_sem);
+        }
+
 
     // munmap the specified region in any saved mm's as well.
     // This keeps old mappings saved in the mm of dead thread
@@ -4232,10 +3538,14 @@ found:
     PS_SPIN_UNLOCK(&_saved_mm_head_lock);
 
     if(to_munmap != NULL) {
-        do_mprotect(NULL,to_munmap->mm,start,len,prot,0);
+        PS_DOWN_WRITE(&to_munmap->mm->mmap_sem);
+        current->enable_distributed_munmap = 0;
+        do_munmap(to_munmap->mm, start, len);
+        current->enable_distributed_munmap = 1;
+        PS_UP_WRITE(&to_munmap->mm->mmap_sem);
     }
 
-early_exit: 
+    
     // Construct response
     response.header.type = PCN_KMSG_TYPE_PROC_SRV_MPROTECT_RESPONSE;
     response.header.prio = PCN_KMSG_PRIO_NORMAL;
@@ -4248,18 +3558,9 @@ early_exit:
     DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,
                         (struct pcn_kmsg_message*)(&response)));
 
-    current->enable_distributed_munmap = 0;
-    current->enable_do_mmap_pgoff_hook = 0;
-    
     kfree(work);
 
     PERF_MEASURE_STOP(&perf_process_mprotect_item," ",perf);
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MPROTECT_REQUEST_PROCESSING_TIME,total_time);
-#endif
 }
 
 /**
@@ -4317,7 +3618,6 @@ void process_back_migration(struct work_struct* work) {
     PSPRINTK("%s\n",__func__);
 
     // Find the task
-    read_lock(&tasklist_lock);
     do_each_thread(g,task) {
         if(task->tgroup_home_id  == w->tgroup_home_id &&
            task->tgroup_home_cpu == w->tgroup_home_cpu &&
@@ -4328,7 +3628,6 @@ void process_back_migration(struct work_struct* work) {
         }
     } while_each_thread(g,task);
 search_exit:
-    read_unlock(&tasklist_lock);
     if(!found) {
         goto exit;
     }
@@ -4363,239 +3662,6 @@ exit:
     PERF_MEASURE_STOP(&perf_process_back_migration," ",perf);
 }
 
-/**
- * _lamport_barrier_queue_lock must already be held.
- */
-void register_lamport_barrier_request(int tgroup_home_cpu,
-                                      int tgroup_home_id,
-                                      unsigned long address,
-                                      unsigned long long timestamp,
-                                      int from_cpu) {
-    lamport_barrier_entry_t* entry = kmalloc(sizeof(lamport_barrier_entry_t),GFP_ATOMIC);
-    lamport_barrier_queue_t* queue = NULL;
-    entry->timestamp = timestamp;
-    entry->responses = 0;
-    entry->expected_responses = 0;
-    entry->allow_responses = 0;
-    entry->cpu = from_cpu;
-
-    // Find queue, if it exists
-    queue = find_lamport_barrier_queue(tgroup_home_cpu,
-                                       tgroup_home_id,
-                                       address);
-
-    // If we cannot find one, make one
-    if(!queue) {
-        queue = kmalloc(sizeof(lamport_barrier_queue_t),GFP_ATOMIC);
-        queue->tgroup_home_cpu = tgroup_home_cpu;
-        queue->tgroup_home_id  = tgroup_home_id;
-        queue->address = address;
-        queue->queue = NULL;
-        queue->active_timestamp = 0;
-        add_data_entry_to(queue,NULL,&_lamport_barrier_queue_head);
-    }
-
-    // Add entry to queue
-    add_fault_entry_to_queue(entry,queue);
-}
-
-/**
- *
- */
-void process_lamport_barrier_request(struct work_struct* work) {
-    lamport_barrier_request_work_t* w = (lamport_barrier_request_work_t*)work;
-    lamport_barrier_response_t* response = NULL;
-
-    PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-    register_lamport_barrier_request(w->tgroup_home_cpu,
-                                     w->tgroup_home_id,
-                                     w->address,
-                                     w->timestamp,
-                                     w->from_cpu);
-    PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-
-    // Reply
-    response = kmalloc(sizeof(lamport_barrier_response_t),GFP_KERNEL);
-    response->header.type = PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RESPONSE;
-    response->header.prio = PCN_KMSG_PRIO_NORMAL;
-    response->tgroup_home_cpu = w->tgroup_home_cpu;
-    response->tgroup_home_id  = w->tgroup_home_id;
-    response->address = w->address;
-    response->timestamp = w->timestamp;
-    pcn_kmsg_send(w->from_cpu,(struct pcn_kmsg_message*)response);
-    kfree(response);
-    
-    kfree(work);
-}
-
-/**
- *
- */
-void process_lamport_barrier_request_range(struct work_struct* work) {
-    lamport_barrier_request_range_work_t* w = (lamport_barrier_request_range_work_t*)work;
-    lamport_barrier_response_range_t* response = NULL;
-    int i;
-
-    PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-    for(i = 0; i < (w->sz / PAGE_SIZE); i++) {
-        register_lamport_barrier_request(w->tgroup_home_cpu,
-                                         w->tgroup_home_id,
-                                         w->address + (i*PAGE_SIZE),
-                                         w->timestamp,
-                                         w->from_cpu);
-    }
-    PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-
-    // Reply
-    response = kmalloc(sizeof(lamport_barrier_response_range_t),GFP_KERNEL);
-    response->header.type = PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RESPONSE_RANGE;
-    response->header.prio = PCN_KMSG_PRIO_NORMAL;
-    response->tgroup_home_cpu = w->tgroup_home_cpu;
-    response->tgroup_home_id  = w->tgroup_home_id;
-    response->address = w->address;
-    response->sz = w->sz;
-    response->timestamp = w->timestamp;
-    pcn_kmsg_send(w->from_cpu,(struct pcn_kmsg_message*)response);
-    kfree(response);
-    
-    kfree(work);
-}
-/**
- * _lamport_barrier_queue_lock must already be held.
- */
-void register_lamport_barrier_response(int tgroup_home_cpu,
-                                       int tgroup_home_id,
-                                       unsigned long address,
-                                       unsigned long long timestamp) {
-    lamport_barrier_queue_t* queue = NULL;
-    lamport_barrier_entry_t* curr = NULL;
-
-    queue = find_lamport_barrier_queue(tgroup_home_cpu,
-                                       tgroup_home_id,
-                                       address);
-
-    BUG_ON(!queue);
-
-    if(queue) {
-        curr = queue->queue;
-        while(curr) {
-            if(curr->cpu == _cpu &&
-               curr->timestamp == timestamp) {
-                curr->responses++;
-                goto accounted_for;
-            }
-            curr = curr->header.next;
-        }
-    }
-accounted_for:
-    return;
-}
-
-/**
- *
- */
-void process_lamport_barrier_response(struct work_struct* work) {
-    lamport_barrier_response_work_t* w = (lamport_barrier_response_work_t*)work;
-
-    PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-    register_lamport_barrier_response(w->tgroup_home_cpu,
-                                      w->tgroup_home_id,
-                                      w->address,
-                                      w->timestamp);
-    PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-
-    kfree(work);
-}
-
-/**
- *
- */
-void process_lamport_barrier_response_range(struct work_struct* work) {
-    lamport_barrier_response_range_work_t* w = (lamport_barrier_response_range_work_t*)work;
-    int i;
-
-    PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-    for(i = 0; i < (w->sz / PAGE_SIZE); i++) {
-        register_lamport_barrier_response(w->tgroup_home_cpu,
-                                          w->tgroup_home_id,
-                                          w->address + (i*PAGE_SIZE),
-                                          w->timestamp);
-    }
-    PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-
-    kfree(work);
-}
-/**
- * _lamport_barrier_queue_lock must already be held.
- */
-int register_lamport_barrier_release(int tgroup_home_cpu,
-                                      int tgroup_home_id,
-                                      unsigned long address,
-                                      unsigned long long timestamp,
-                                      int from_cpu) {
-    lamport_barrier_queue_t* queue = NULL;
-    lamport_barrier_entry_t* curr = NULL;
-    queue = find_lamport_barrier_queue(tgroup_home_cpu,
-                                       tgroup_home_id,
-                                       address);
-
-    if(queue) {
-        // find the specific entry
-        curr = queue->queue;
-        while(curr) {
-            if(curr->cpu == from_cpu &&
-               curr->timestamp == timestamp) {
-                remove_data_entry_from(curr,(data_header_t**)&queue->queue);
-                kfree(curr);
-                break;
-            }
-            curr = curr->header.next;
-        }
-        if(!queue->queue) {
-            remove_data_entry_from(queue,&_lamport_barrier_queue_head);
-            kfree(queue);
-        }
-    }
-}
-
-/**
- *
- */
-void process_lamport_barrier_release(struct work_struct* work) {
-    lamport_barrier_release_work_t* w = (lamport_barrier_release_work_t*)work;
-
-    PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-    register_lamport_barrier_release(w->tgroup_home_cpu,
-                                     w->tgroup_home_id,
-                                     w->address,
-                                     w->timestamp,
-                                     w->from_cpu);
-    PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-
-    kfree(work);
-}
-
-/**
- *
- */
-void process_lamport_barrier_release_range(struct work_struct* work) {
-    lamport_barrier_release_range_work_t* w = (lamport_barrier_release_range_work_t*)work;
-    int i;
-    int page_count = w->sz / PAGE_SIZE;
-
-    PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-    for(i = 0; i < page_count; i++) {
-        register_lamport_barrier_release(w->tgroup_home_cpu,
-                                         w->tgroup_home_id,
-                                         w->address + (i * PAGE_SIZE),
-                                         w->timestamp,
-                                         w->from_cpu);
-    }
-    PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-
-    kfree(work);
-}
-
 /**
  * Message handlers
  */
@@ -4853,9 +3919,6 @@ static int handle_nonpresent_mapping_response(struct pcn_kmsg_message* inc_msg)
     nonpresent_mapping_response_t* msg = (nonpresent_mapping_response_t*)inc_msg;
     mapping_request_data_t* data;
     unsigned long lockflags1,lockflags2;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long received_time = native_read_tsc();
-#endif
 
     //PSPRINTK("%s: entered\n",__func__);
 
@@ -4872,24 +3935,11 @@ static int handle_nonpresent_mapping_response(struct pcn_kmsg_message* inc_msg)
         goto exit;
     }
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_RESPONSE_DELIVERY_TIME,
-                        received_time - msg->send_time);
-#endif
-
     PSPRINTK("Nonpresent mapping response received for %lx from %d\n",
             msg->address,
             msg->header.from_cpu);
 
     spin_lock_irqsave(&data->lock,lockflags1);
-
- #ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    if (!data->wait_time_concluded && (data->responses+1) == data->expected_responses) {
-        data->wait_time_concluded = native_read_tsc();
-    }
-    mb();
-#endif
-
     data->responses++;
     spin_unlock_irqrestore(&data->lock,lockflags1);
 exit:
@@ -4915,9 +3965,6 @@ static int handle_mapping_response(struct pcn_kmsg_message* inc_msg) {
     unsigned char data_paddr_present = 0;
     unsigned char response_paddr_present = 0;
     int i = 0;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long received_time = native_read_tsc();
-#endif
 
     PSPRINTK("%s: entered\n",__func__);
 
@@ -4939,10 +3986,6 @@ static int handle_mapping_response(struct pcn_kmsg_message* inc_msg) {
     if(data == NULL) {
         goto out_err;
     }
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_RESPONSE_DELIVERY_TIME,
-                        received_time - msg->send_time);
-#endif
 
     spin_lock_irqsave(&data->lock,lockflags);
 
@@ -5076,26 +4119,18 @@ static int handle_mapping_response(struct pcn_kmsg_message* inc_msg) {
 
 
 out:
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    if (!data->wait_time_concluded && ((data->responses+1) == data->expected_responses) || data->complete) {
-        data->wait_time_concluded = native_read_tsc();
-    }
-    mb();
-#endif
     // Account for this cpu's response.
     data->responses++;
 
     PSPRINTK("After changing data\n");
     dump_mapping_request_data(data);
-
-
+    
     spin_unlock_irqrestore(&data->lock,lockflags);
 
 out_err:
 
     spin_unlock_irqrestore(&_mapping_request_data_head_lock,lockflags2);
-
+    
     pcn_kmsg_free_msg(inc_msg);
 
     return 0;
@@ -5109,11 +4144,6 @@ out_err:
 static int handle_mapping_request(struct pcn_kmsg_message* inc_msg) {
     mapping_request_t* msg = (mapping_request_t*)inc_msg;
     mapping_request_work_t* work;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long receive_time = native_read_tsc();
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_REQUEST_DELIVERY_TIME,
-                        receive_time - msg->send_time);
-#endif
 
     int perf = PERF_MEASURE_START(&perf_handle_mapping_request);
 
@@ -5124,7 +4154,6 @@ static int handle_mapping_request(struct pcn_kmsg_message* inc_msg) {
         work->tgroup_home_id  = msg->tgroup_home_id;
         work->address = msg->address;
         work->requester_pid = msg->requester_pid;
-        work->need_vma = msg->need_vma;
         work->from_cpu = msg->header.from_cpu;
         queue_work(mapping_wq, (struct work_struct*)work);
     }
@@ -5277,15 +4306,13 @@ static int handle_exiting_process_notification(struct pcn_kmsg_message* inc_msg)
 
     PSPRINTK("%s: cpu: %d msg: (pid: %d from_cpu: %d [%d])\n", 
           __func__, smp_processor_id(), msg->my_pid,  inc_msg->hdr.from_cpu, msg->header.from_cpu);
-   
-    read_lock(&tasklist_lock);
+    
     do_each_thread(g,task) {
         if(task->t_home_id == msg->t_home_id &&
            task->t_home_cpu == msg->t_home_cpu) {
 
             PSPRINTK("kmkprocsrv: killing local task pid{%d}\n",task->pid);
 
-            read_unlock(&tasklist_lock);
 
             // Now we're executing locally, so update our records
             // Should I be doing this here, or in the bottom-half handler?
@@ -5306,7 +4333,7 @@ static int handle_exiting_process_notification(struct pcn_kmsg_message* inc_msg)
             goto done; // No need to continue;
         }
     } while_each_thread(g,task);
-    read_unlock(&tasklist_lock);
+
 done:
 
     pcn_kmsg_free_msg(inc_msg);
@@ -5368,7 +4395,6 @@ static int handle_process_pairing_request(struct pcn_kmsg_message* inc_msg) {
      * Once that task is found, do the bookkeeping necessary to remember
      * the remote cpu and pid information.
      */
-    read_lock(&tasklist_lock);
     do_each_thread(g,task) {
 
         if(task->pid == msg->your_pid && task->represents_remote ) {
@@ -5386,7 +4412,6 @@ static int handle_process_pairing_request(struct pcn_kmsg_message* inc_msg) {
     } while_each_thread(g,task);
 
 done:
-    read_unlock(&tasklist_lock);
 
     pcn_kmsg_free_msg(inc_msg);
 
@@ -5402,23 +4427,21 @@ done:
 static int handle_clone_request(struct pcn_kmsg_message* inc_msg) {
     clone_request_t* request = (clone_request_t*)inc_msg;
     unsigned int source_cpu = request->header.from_cpu;
-    clone_data_t* clone_data = NULL;
-    data_header_t* curr = NULL;
-    data_header_t* next = NULL;
-    vma_data_t* vma = NULL;
+    clone_data_t* clone_data;
+    data_header_t* curr;
+    data_header_t* next;
+    vma_data_t* vma;
     unsigned long lockflags;
 
     int perf = PERF_MEASURE_START(&perf_handle_clone_request);
 
-    perf_cc = native_read_tsc();
-
+perf_cc = native_read_tsc();
     PSPRINTK("%s: entered\n",__func__);
     
     /*
      * Remember this request
      */
     clone_data = kmalloc(sizeof(clone_data_t),GFP_ATOMIC);
-    
     clone_data->header.data_type = PROCESS_SERVER_CLONE_DATA_TYPE;
 
     clone_data->clone_request_id = request->clone_request_id;
@@ -5448,8 +4471,6 @@ static int handle_clone_request(struct pcn_kmsg_message* inc_msg) {
     clone_data->thread_ds = request->thread_ds;
     clone_data->thread_fsindex = request->thread_fsindex;
     clone_data->thread_gsindex = request->thread_gsindex;
-    clone_data->def_flags = request->def_flags;
-    clone_data->personality = request->personality;
     clone_data->vma_list = NULL;
     clone_data->tgroup_home_cpu = request->tgroup_home_cpu;
     clone_data->tgroup_home_id = request->tgroup_home_id;
@@ -5479,7 +4500,6 @@ static int handle_clone_request(struct pcn_kmsg_message* inc_msg) {
     /*
      * Pull in vma data
      */
-#if COPY_WHOLE_VM_WITH_MIGRATION 
     spin_lock_irqsave(&_data_head_lock,lockflags);
 
     curr = _data_head;
@@ -5509,12 +4529,12 @@ static int handle_clone_request(struct pcn_kmsg_message* inc_msg) {
     }
 
     spin_unlock_irqrestore(&_data_head_lock,lockflags);
-#endif
 
-    perf_dd = native_read_tsc();
+    add_data_entry(clone_data);
+
+perf_dd = native_read_tsc();
 
     {
-#ifdef PROCESS_SERVER_USE_KMOD
     struct subprocess_info* sub_info;
     char* argv[] = {clone_data->exe_path,NULL};
     static char *envp[] = { 
@@ -5522,10 +4542,7 @@ static int handle_clone_request(struct pcn_kmsg_message* inc_msg) {
         "TERM=linux",
         "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL
     };
-    
-    add_data_entry(clone_data);
-    
-    perf_aa = native_read_tsc();
+perf_aa = native_read_tsc();
     sub_info = call_usermodehelper_setup( clone_data->exe_path /*argv[0]*/, 
             argv, envp, 
             GFP_ATOMIC );
@@ -5552,23 +4569,11 @@ static int handle_clone_request(struct pcn_kmsg_message* inc_msg) {
      * Spin up the new process.
      */
     call_usermodehelper_exec(sub_info, UMH_NO_WAIT);
-    perf_bb = native_read_tsc();
-#else
-    import_task_work_t* work;
-    work = kmalloc(sizeof(import_task_work_t),GFP_ATOMIC);
-    if(work) {
-        INIT_WORK( (struct work_struct*)work, process_import_task );
-        work->data = clone_data;
-        queue_work(clone_wq, (struct work_struct*)work);
-
-    }
-#endif
+perf_bb = native_read_tsc();
     }
 
     pcn_kmsg_free_msg(inc_msg);
-
-    perf_ee = native_read_tsc();
-
+perf_ee = native_read_tsc();
     PERF_MEASURE_STOP(&perf_handle_clone_request," ",perf);
     return 0;
 }
@@ -5604,157 +4609,9 @@ static int handle_back_migration(struct pcn_kmsg_message* inc_msg) {
     return 0;
 }
 
-static int handle_lamport_barrier_request(struct pcn_kmsg_message* inc_msg) {
-    lamport_barrier_request_t* msg = (lamport_barrier_request_t*)inc_msg;
-    lamport_barrier_request_work_t* work;
-
-    work = kmalloc(sizeof(lamport_barrier_request_work_t),GFP_ATOMIC);
-    if(work) {
-        INIT_WORK( (struct work_struct*)work, process_lamport_barrier_request);
-        work->tgroup_home_cpu = msg->tgroup_home_cpu;
-        work->tgroup_home_id  = msg->tgroup_home_id;
-        work->from_cpu = msg->header.from_cpu;
-        work->address = msg->address;
-        work->timestamp = msg->timestamp;
-        queue_work(clone_wq, (struct work_struct*)work);
-    }
-
-    pcn_kmsg_free_msg(inc_msg);
-
-    return 0;
-}
-
-static int handle_lamport_barrier_response(struct pcn_kmsg_message* inc_msg) {
-    lamport_barrier_response_t* msg = (lamport_barrier_response_t*)inc_msg;
-    lamport_barrier_response_work_t* work;
-
-    work = kmalloc(sizeof(lamport_barrier_response_work_t),GFP_ATOMIC);
-    if(work) {
-        INIT_WORK( (struct work_struct*)work, process_lamport_barrier_response);
-        work->tgroup_home_cpu = msg->tgroup_home_cpu;
-        work->tgroup_home_id  = msg->tgroup_home_id;
-        work->from_cpu = msg->header.from_cpu;
-        work->address = msg->address;
-        work->timestamp = msg->timestamp;
-        queue_work(clone_wq, (struct work_struct*)work);
-    }
-
-    pcn_kmsg_free_msg(inc_msg);
-
-    return 0;
-}
-
-static int handle_lamport_barrier_release(struct pcn_kmsg_message* inc_msg) {
-    lamport_barrier_release_t* msg = (lamport_barrier_release_t*)inc_msg;
-    lamport_barrier_release_work_t* work;
-
-    work = kmalloc(sizeof(lamport_barrier_release_work_t),GFP_ATOMIC);
-    if(work) {
-        INIT_WORK( (struct work_struct*)work, process_lamport_barrier_release);
-        work->tgroup_home_cpu = msg->tgroup_home_cpu;
-        work->tgroup_home_id  = msg->tgroup_home_id;
-        work->from_cpu = msg->header.from_cpu;
-        work->address = msg->address;
-        work->timestamp = msg->timestamp;
-        queue_work(clone_wq, (struct work_struct*)work);
-    }
-
-    pcn_kmsg_free_msg(inc_msg);
-
-    return 0;
-}
-
-static int handle_lamport_barrier_request_range(struct pcn_kmsg_message* inc_msg) {
-    lamport_barrier_request_range_t* msg = (lamport_barrier_request_range_t*)inc_msg;
-    lamport_barrier_request_range_work_t* work;
-
-    work = kmalloc(sizeof(lamport_barrier_request_range_work_t),GFP_ATOMIC);
-    if(work) {
-        INIT_WORK( (struct work_struct*)work, process_lamport_barrier_request_range);
-        work->tgroup_home_cpu = msg->tgroup_home_cpu;
-        work->tgroup_home_id  = msg->tgroup_home_id;
-        work->from_cpu = msg->header.from_cpu;
-        work->address = msg->address;
-        work->sz = msg->sz;
-        work->timestamp = msg->timestamp;
-        queue_work(clone_wq, (struct work_struct*)work);
-    }
-
-    pcn_kmsg_free_msg(inc_msg);
-
-    return 0;
-}
-
-static int handle_lamport_barrier_response_range(struct pcn_kmsg_message* inc_msg) {
-    lamport_barrier_response_range_t* msg = (lamport_barrier_response_range_t*)inc_msg;
-    lamport_barrier_response_range_work_t* work;
-
-    work = kmalloc(sizeof(lamport_barrier_response_range_work_t),GFP_ATOMIC);
-    if(work) {
-        INIT_WORK( (struct work_struct*)work, process_lamport_barrier_response_range);
-        work->tgroup_home_cpu = msg->tgroup_home_cpu;
-        work->tgroup_home_id  = msg->tgroup_home_id;
-        work->from_cpu = msg->header.from_cpu;
-        work->address = msg->address;
-        work->sz = msg->sz;
-        work->timestamp = msg->timestamp;
-        queue_work(clone_wq, (struct work_struct*)work);
-    }
-
-    pcn_kmsg_free_msg(inc_msg);
-
-    return 0;
-}
-
-static int handle_lamport_barrier_release_range(struct pcn_kmsg_message* inc_msg) {
-    lamport_barrier_release_range_t* msg = (lamport_barrier_release_range_t*)inc_msg;
-    lamport_barrier_release_range_work_t* work;
-
-    work = kmalloc(sizeof(lamport_barrier_release_range_work_t),GFP_ATOMIC);
-    if(work) {
-        INIT_WORK( (struct work_struct*)work, process_lamport_barrier_release_range);
-        work->tgroup_home_cpu = msg->tgroup_home_cpu;
-        work->tgroup_home_id  = msg->tgroup_home_id;
-        work->from_cpu = msg->header.from_cpu;
-        work->address = msg->address;
-        work->sz = msg->sz;
-        work->timestamp = msg->timestamp;
-        queue_work(clone_wq, (struct work_struct*)work);
-    }
-
-    pcn_kmsg_free_msg(inc_msg);
-
-    return 0;
-}
-
-/**
- *
- */
-static int handle_get_counter_phys_request(struct pcn_kmsg_message* inc_msg) {
-    get_counter_phys_response_t resp;
-    resp.header.type = PCN_KMSG_TYPE_PROC_SRV_GET_COUNTER_PHYS_RESPONSE;
-    resp.header.prio = PCN_KMSG_PRIO_NORMAL;
-    resp.resp = virt_to_phys(ts_counter);
-    pcn_kmsg_send(inc_msg->hdr.from_cpu,(struct pcn_kmsg_message*)&resp);
-    pcn_kmsg_free_msg(inc_msg);
-    return 0;
-}
 
-/**
- *
- */
-static int handle_get_counter_phys_response(struct pcn_kmsg_message* inc_msg) {
-    get_counter_phys_response_t* msg = (get_counter_phys_response_t*)inc_msg;
 
-    if(get_counter_phys_data) {
-        get_counter_phys_data->resp = msg->resp;
-        get_counter_phys_data->response_received = 1;
-    }
 
-    pcn_kmsg_free_msg(inc_msg);
-    
-    return 0;
-}
 
 /**
  *
@@ -5773,51 +4630,32 @@ int process_server_import_address_space(unsigned long* ip,
         unsigned long* sp, 
         struct pt_regs* regs) {
     clone_data_t* clone_data = NULL;
-    struct file* f = NULL;
-#ifdef PROCESS_SERVER_USE_KMOD
-    struct vm_area_struct* vma = NULL;
+    struct file* f;
+    struct vm_area_struct* vma;
     int munmap_ret = 0;
-#endif
     struct mm_struct* thread_mm = NULL;
     struct task_struct* thread_task = NULL;
     mm_data_t* used_saved_mm = NULL;
     int perf = -1;
-#ifndef PROCESS_SERVER_USE_KMOD
-    struct cred* new_cred = NULL;
-#endif
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    int do_time_measurement = 0;
-    unsigned long long start_time = native_read_tsc();
-#endif
 
     perf_a = native_read_tsc();
     
     PSPRINTK("import address space\n");
     
     // Verify that we're a delegated task // deadlock.
-#ifdef PROCESS_SERVER_USE_KMOD
     if (!current->executing_for_remote) {
         PSPRINTK("ERROR - not executing for remote\n");
         return -1;
     }
-#endif
 
     perf = PERF_MEASURE_START(&perf_process_server_import_address_space);
 
-    clone_data = current->clone_data;
-    if(!clone_data)
-        clone_data = find_clone_data(current->prev_cpu,current->clone_request_id);
+    clone_data = find_clone_data(current->prev_cpu,current->clone_request_id);
     if(!clone_data) {
         PERF_MEASURE_STOP(&perf_process_server_import_address_space,"Clone data missing, early exit",perf);
         return -1;
     }
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    do_time_measurement = 1;
-#endif
-
     perf_b = native_read_tsc();    
     
     // Search for existing thread members to share an mm with.
@@ -5843,32 +4681,15 @@ int process_server_import_address_space(unsigned long* ip,
                                                         // been updated by the
                                                         // sending cpu.
                                                         //
-    current->executing_for_remote = 1;
     current->tgroup_distributed = 1;
     current->t_distributed = 1;
 
-#ifndef PROCESS_SERVER_USE_KMOD
-    spin_lock_irq(&current->sighand->siglock);
-    flush_signal_handlers(current,1);
-    spin_unlock_irq(&current->sighand->siglock);
-
-    set_cpus_allowed_ptr(current,cpu_all_mask);
-
-    set_user_nice(current,0);
-
-    new_cred = prepare_kernel_cred(current);
-    new_cred->cap_bset = CAP_FULL_SET;
-    new_cred->cap_inheritable = CAP_FULL_SET;
-    commit_creds(new_cred);
-#endif
-
     PSPRINTK("%s: previous_cpus{%lx}\n",__func__,current->previous_cpus);
     PSPRINTK("%s: t_home_cpu{%d}\n",__func__,current->t_home_cpu);
     PSPRINTK("%s: t_home_id{%d}\n",__func__,current->t_home_id);
   
     if(!thread_mm) {
-       
-#ifdef PROCESS_SERVER_USE_KMOD
+        
         PS_DOWN_WRITE(&current->mm->mmap_sem);
 
         // Gut existing mappings
@@ -5885,42 +4706,14 @@ int process_server_import_address_space(unsigned long* ip,
         flush_tlb_mm(current->mm);
         flush_cache_mm(current->mm);
         PS_UP_WRITE(&current->mm->mmap_sem);
+        
         // import exe_file
         f = filp_open(clone_data->exe_path,O_RDONLY | O_LARGEFILE, 0);
-        if(!IS_ERR(f)) {
+        if(f) {
             get_file(f);
             current->mm->exe_file = f;
             filp_close(f,NULL);
-        } else {
-            printk("%s: Error opening file %s\n",__func__,clone_data->exe_path);
-        }
-       
-#else
-        struct mm_struct* mm = mm_alloc();
-        if(mm) {
-            init_new_context(current,mm);
-
-            // import exe_file
-            f = filp_open(clone_data->exe_path,O_RDONLY | O_LARGEFILE , 0);
-            if(!IS_ERR(f)) {
-                //get_file(f);
-                //mm->exe_file = f;
-                set_mm_exe_file(mm,f);
-                filp_close(f,NULL);
-            } else {
-                printk("%s: Error opening executable file\n",__func__);
-            }
-            mm->task_size = TASK_SIZE;
-            mm->token_priority = 0;
-            mm->last_interval = 0;
-
-            arch_pick_mmap_layout(mm);
-
-            atomic_inc(&mm->mm_users);
-            exec_mmap(mm);
         }
-#endif
 
         perf_c = native_read_tsc();    
 
@@ -5929,12 +4722,12 @@ int process_server_import_address_space(unsigned long* ip,
         {
         struct vm_area_struct* vma_out = NULL;
         // fetch stack
-        process_server_pull_remote_mappings(current->mm,
-                                            NULL,
-                                            clone_data->stack_start,
-                                            NULL,
-                                            &vma_out,
-                                            NULL);
+        process_server_try_handle_mm_fault(current->mm,
+                                           NULL,
+                                           clone_data->stack_start,
+                                           NULL,
+                                           &vma_out,
+                                           NULL);
 
         }
 #else // Copying address space with migration
@@ -5954,7 +4747,7 @@ int process_server_import_address_space(unsigned long* ip,
                 f = filp_open(vma_curr->path,
                                 O_RDONLY | O_LARGEFILE,
                                 0);
-                if(!IS_ERR(f)) {
+                if(f) {
                     PS_DOWN_WRITE(&current->mm->mmap_sem);
                     vma_curr->mmapping_in_progress = 1;
                     current->enable_do_mmap_pgoff_hook = 0;
@@ -5973,8 +4766,6 @@ int process_server_import_address_space(unsigned long* ip,
                         PSPRINTK("Fault - do_mmap failed to map %lx with error %lx\n",
                                 vma_curr->start,err);
                     }
-                } else {
-                    printk("%s: error opening file %s\n",__func__,vma_curr->path);
                 }
             } else {
                 mmap_flags = MAP_UNINITIALIZED|MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE;
@@ -5998,9 +4789,7 @@ int process_server_import_address_space(unsigned long* ip,
            
             if(err > 0) {
                 // mmap_region succeeded
-                PS_DOWN_READ(&current->mm->mmap_sem);
                 vma = find_vma_checked(current->mm, vma_curr->start);
-                PS_UP_READ(&current->mm->mmap_sem);
                 PSPRINTK("vma mmapped, pulling in pte's\n");
                 if(vma) {
                     pte_curr = vma_curr->pte_list;
@@ -6159,17 +4948,12 @@ int process_server_import_address_space(unsigned long* ip,
     current->mm->arg_end = clone_data->arg_end;
     current->mm->start_data = clone_data->data_start;
     current->mm->end_data = clone_data->data_end;
-    current->mm->def_flags = clone_data->def_flags;
 
     // install thread information
     // TODO: Move to arch
     current->thread.es = clone_data->thread_es;
     current->thread.ds = clone_data->thread_ds;
     current->thread.usersp = clone_data->thread_usersp;
-    current->thread.fsindex = clone_data->thread_fsindex;
-    current->thread.fs = clone_data->thread_fs;
-    current->thread.gs = clone_data->thread_gs;    
-    current->thread.gsindex = clone_data->thread_gsindex;
    
 
     //mklinux_akshay
@@ -6201,20 +4985,19 @@ int process_server_import_address_space(unsigned long* ip,
     current->normal_prio = clone_data->normal_prio;
     current->rt_priority = clone_data->rt_priority;
     current->policy = clone_data->sched_class;
-    current->personality = clone_data->personality;
 
     // We assume that an exec is going on and the current process is the one is executing
     // (a switch will occur if it is not the one that must execute)
     { // FS/GS update --- start
-#ifdef PROCESS_SERVER_USE_KMOD
     unsigned long fs, gs;
     unsigned int fsindex, gsindex;
-    unsigned short es, ds;
                     
     savesegment(fs, fsindex);
     if ( !(clone_data->thread_fs) || !(__user_addr(clone_data->thread_fs)) ) {
       printk(KERN_ERR "%s: ERROR corrupted fs base address %p\n", __func__, clone_data->thread_fs);
     }    
+    current->thread.fsindex = clone_data->thread_fsindex;
+    current->thread.fs = clone_data->thread_fs;
     if (unlikely(fsindex | current->thread.fsindex))
       loadsegment(fs, current->thread.fsindex);
     else
@@ -6226,47 +5009,19 @@ int process_server_import_address_space(unsigned long* ip,
     if ( !(clone_data->thread_gs) && !(__user_addr(clone_data->thread_gs)) ) {
       printk(KERN_ERR "%s: ERROR corrupted gs base address %p\n", __func__, clone_data->thread_gs);      
     }
+    current->thread.gs = clone_data->thread_gs;    
+    current->thread.gsindex = clone_data->thread_gsindex;
     if (unlikely(gsindex | current->thread.gsindex))
       load_gs_index(current->thread.gsindex);
     else
       load_gs_index(0);
     if (current->thread.gs)
       checking_wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
-#else
-    {
-    int i, ch;
-    const char* name = NULL;
-    char tcomm[sizeof(current->comm)];
-
-    flush_thread();
-    set_fs(USER_DS);
-    current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
-    current->sas_ss_sp = current->sas_ss_size = 0;
-
-    // Copy exe name
-    name = clone_data->exe_path;
-    for(i = 0; (ch = *(name++)) != '\0';) {
-        if(ch == '/')
-            i = 0;
-        else if (i < (sizeof(tcomm) - 1)) 
-            tcomm[i++] = ch;
-    }
-    tcomm[i] = '\0';
-    set_task_comm(current,tcomm);
-
-    current->self_exec_id++;
-        
-    flush_signal_handlers(current,0);
-    flush_old_files(current->files);
-    }
-    start_remote_thread(regs);
-#endif
-
+                                                   
     } // FS/GS update --- end
 
     // Save off clone data, replacing any that may
     // already exist.
-#ifdef PROCESS_SERVER_USE_KMOD
     if(current->clone_data) {
         unsigned long lockflags;
         spin_lock_irqsave(&_data_head_lock,lockflags);
@@ -6275,56 +5030,20 @@ int process_server_import_address_space(unsigned long* ip,
         destroy_clone_data(current->clone_data);
     }
     current->clone_data = clone_data;
-#endif
 
     PS_UP_WRITE(&_import_sem);
 
-    process_server_notify_delegated_subprocess_starting(current->pid,
-            clone_data->placeholder_pid,
-            clone_data->requesting_cpu);
-
     //dump_task(current,NULL,0);
 
     PERF_MEASURE_STOP(&perf_process_server_import_address_space, " ",perf);
 
 
     perf_e = native_read_tsc();
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_IMPORT_TASK_TIME,total_time);
-#endif
-
-    printk("%s %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu (%d)\n",
+    printk("%s %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu (%d) {%d} \n",
             __func__,
             perf_aa, perf_bb, perf_cc, perf_dd, perf_ee,
-            perf_a, perf_b, perf_c, perf_d, perf_e, current->t_home_id);
-
-    return 0;
-}
-
-static int call_import_task(void* data) {
-    kernel_import_task(data);
-    return -1;
-}
-
-static void process_import_task(struct work_struct* work) {
-    import_task_work_t* w = (import_task_work_t*)work;
-    clone_data_t* data = w->data;
-    kfree(work); 
-    kernel_thread(call_import_task, data, SIGCHLD);
-}
+            perf_a, perf_b, perf_c, perf_d, perf_e, current->t_home_id,current->pid);
 
-long sys_process_server_import_task(void *info /*name*/,
-        const char* argv,
-        const char* envp,
-        struct pt_regs* regs) {
-    clone_data_t* clone_data = (clone_data_t*)info;
-    unsigned long ip, sp;
-    current->clone_data = clone_data;
-    printk("in sys_process_server_import_task pid{%d}, clone_data{%lx}\n",current->pid,(unsigned long)clone_data);
-    process_server_import_address_space(&ip,&sp,regs);
     return 0;
 }
 
@@ -6336,12 +5055,6 @@ long sys_process_server_import_task(void *info /*name*/,
 int process_server_do_group_exit(void) {
     exiting_group_t msg;
     int i;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    int do_time_measurement = 0;
-    unsigned long long start_time = native_read_tsc();
-#endif
 
      // Select only relevant tasks to operate on
     if(!(current->t_distributed || current->tgroup_distributed)/* || 
@@ -6349,10 +5062,6 @@ int process_server_do_group_exit(void) {
         return -1;
     }
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    do_time_measurement = 1;
-#endif
-
     PSPRINTK("%s: doing distributed group exit\n",__func__);
 
     // Build message
@@ -6365,26 +5074,19 @@ int process_server_do_group_exit(void) {
     for(i = 0; i < NR_CPUS; i++) {
         // Skip the current cpu
         if(i == _cpu) continue;
-/*#else
+#else
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
-    extern struct list_head rlist_head;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-        i = objPtr->_data._processor;*/
+        i = objPtr->_data._processor;
 #endif
        // Send
         pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&msg));
     }
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    if(do_time_measurement) {
-        end_time = native_read_tsc();
-        total_time = end_time - start_time;
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_GROUP_EXIT_PROCESSING_TIME,total_time);
-    }
-#endif
 
     return 0;
 }
@@ -6408,23 +5110,12 @@ int process_server_do_exit(void) {
     clone_data_t* clone_data = NULL;
     int perf = -1;
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    int do_time_measurement = 0;
-    unsigned long long start_time = native_read_tsc();
-#endif
-
     // Select only relevant tasks to operate on
     if(!(current->t_distributed || current->tgroup_distributed)/* || 
             !current->enable_distributed_exit*/) {
         return -1;
     }
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    do_time_measurement = 1;
-#endif
-
 /*     printk("%s: CHANGED? prio: %d static: %d normal: %d rt: %u class: %d rt_prio %d\n",
                __func__,
                  current->prio, current->static_prio, current->normal_prio, current->rt_priority,
@@ -6446,7 +5137,6 @@ int process_server_do_exit(void) {
     // local group.  We have to count shadow tasks because
     // otherwise we risk missing tasks when they are exiting
     // and migrating back.
-    read_lock(&tasklist_lock);
     do_each_thread(g,task) {
         if(task->tgid == current->tgid &&           // <--- narrow search to current thread group only 
                 task->pid != current->pid &&        // <--- don't include current in the search
@@ -6458,7 +5148,6 @@ int process_server_do_exit(void) {
         }
     } while_each_thread(g,task);
 finished_membership_search:
-    read_unlock(&tasklist_lock);
 
     // Count the number of threads in this distributed thread group
     // this will be useful for determining what to do with the mm.
@@ -6468,8 +5157,8 @@ finished_membership_search:
         is_last_thread_in_group = 0;
 #ifndef SUPPORT_FOR_CLUSTERING
     } else if (!(task->t_home_cpu == _cpu &&
-/*#else
-    } else if (!(task->t_home_cpu == cpumask_first(cpu_present_mask) &&*/
+#else
+    } else if (!(task->t_home_cpu == cpumask_first(cpu_present_mask) &&
 #endif
               task->t_home_id == task->pid)) {
         // OPTIMIZATION: only bother to count threads if we are not home base for
@@ -6490,8 +5179,7 @@ finished_membership_search:
     }
     
     // Find the clone data, we are going to destroy this very soon.
-    clone_data = get_current_clone_data();
-    //clone_data = find_clone_data(current->prev_cpu, current->clone_request_id);
+    clone_data = find_clone_data(current->prev_cpu, current->clone_request_id);
 
     // Build the message that is going to migrate this task back 
     // from whence it came.
@@ -6508,25 +5196,29 @@ finished_membership_search:
         // take over, so do not mark this as executing for remote
         current->executing_for_remote = 0;
 
+        // Migrate back - you just had an out of body experience, you will wake in
+        //                a familiar place (a place you've been before), but unfortunately, 
+        //                your life is over.
+        //                Note: comments like this must == I am tired.
 #ifndef SUPPORT_FOR_CLUSTERING
         for(i = 0; i < NR_CPUS; i++) {
-            // Skip the current cpu
-            if(i == _cpu)
-                continue;
-            if (test_bit(i,&current->previous_cpus))
+          // Skip the current cpu
+          if(i == _cpu)
+            continue;
+         if (test_bit(i,&current->previous_cpus))
 #else
         // the list does not include the current processor group descirptor (TODO)
         struct list_head *iter;
         _remote_cpu_info_list_t *objPtr;
-        struct cpumask *pcpum =0;
-        extern struct list_head rlist_head;
+       struct cpumask *pcpum =0;
+extern struct list_head rlist_head;
         list_for_each(iter, &rlist_head) {
-        objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-        i = objPtr->_data._processor;
-        pcpum  = &(objPtr->_data._cpumask);
-        if ( bitmap_intersects(cpumask_bits(pcpum),  
+          objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
+          i = objPtr->_data._processor;
+          pcpum  = &(objPtr->_data._cpumask);
+         if ( bitmap_intersects(cpumask_bits(pcpum),  
                                &(current->previous_cpus),
-                               (sizeof(unsigned long) *8)) )*/
+                               (sizeof(unsigned long) *8)) )
 #endif
             pcn_kmsg_send(i, (struct pcn_kmsg_message*)&msg);
         }
@@ -6561,12 +5253,12 @@ finished_membership_search:
               if(i == _cpu) continue;
 #else
            // the list does not include the current processor group descirptor (TODO)
-               struct list_head *iter;
-               _remote_cpu_info_list_t *objPtr;
-            extern struct list_head rlist_head;
+           struct list_head *iter;
+           _remote_cpu_info_list_t *objPtr;
+extern struct list_head rlist_head;
             list_for_each(iter, &rlist_head) {
-                objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-                i = objPtr->_data._processor;
+              objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
+              i = objPtr->_data._processor;
 #endif
               pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&exit_notification));
             }
@@ -6577,10 +5269,10 @@ finished_membership_search:
             // it from being destroyed
             PSPRINTK("%s: This is not the last thread member, saving mm\n",
                     __func__);
-            if (current && current->mm)
-                atomic_inc(&current->mm->mm_users);
-            else
-                printk("%s: ERROR current %p, current->mm %p\n", __func__, current, current->mm);
+if (current && current->mm)
+            atomic_inc(&current->mm->mm_users);
+else
+  printk("%s: ERROR current %p, current->mm %p\n", __func__, current, current->mm);
 
             // Remember the mm
             mm_data = kmalloc(sizeof(mm_data_t),GFP_KERNEL);
@@ -6604,12 +5296,10 @@ finished_membership_search:
     // with it again, so remove its clone_data from the linked list, and
     // nuke it.
     if(clone_data) {
-#ifdef PROCESS_SERVER_USE_KMOD
         unsigned long lockflags;
         spin_lock_irqsave(&_data_head_lock,lockflags);
         remove_data_entry(clone_data);
         spin_unlock_irqrestore(&_data_head_lock,lockflags);
-#endif
         destroy_clone_data(clone_data);
     }
 
@@ -6617,14 +5307,6 @@ finished_membership_search:
     
     PERF_MEASURE_STOP(&perf_process_server_do_exit," ",perf);
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    if(do_time_measurement) {
-        end_time = native_read_tsc();
-        total_time = end_time - start_time;
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_EXIT_PROCESSING_TIME,total_time);
-    }
-#endif
-
     return 0;
 }
 
@@ -6650,11 +5332,9 @@ int process_server_notify_delegated_subprocess_starting(pid_t pid,
     msg.your_pid = remote_pid; 
     msg.my_pid = pid;
     
-    if(0 != pcn_kmsg_send(remote_cpu, (struct pcn_kmsg_message*)(&msg))) {
-        printk("%s: ERROR sending message pairing message to cpu %d\n",
-                __func__,
-                remote_cpu);
-    }
+    DO_UNTIL_SUCCESS(pcn_kmsg_send_long(remote_cpu, 
+                        (struct pcn_kmsg_long_message*)&msg, 
+                        sizeof(msg) - sizeof(msg.header)));
 
     PERF_MEASURE_STOP(&perf_process_server_notify_delegated_subprocess_starting,
             " ",
@@ -6673,6 +5353,7 @@ int process_server_notify_delegated_subprocess_starting(pid_t pid,
  * <MEASURE perf_process_server_do_munmap>
  */
 int process_server_do_munmap(struct mm_struct* mm, 
+            struct vm_area_struct *vma,
             unsigned long start, 
             unsigned long len) {
 
@@ -6682,21 +5363,11 @@ int process_server_do_munmap(struct mm_struct* mm,
     int s;
     int perf = -1;
     unsigned long lockflags;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time = 0;
-    unsigned long long total_time = 0;
-    unsigned long long start_time = native_read_tsc();
-    int do_time_measurement = 0;
-#endif
 
      // Nothing to do for a thread group that's not distributed.
     if(!current->tgroup_distributed || !current->enable_distributed_munmap) {
         goto exit;
-    }
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    do_time_measurement = 1;
-#endif
+    } 
 
     perf = PERF_MEASURE_START(&perf_process_server_do_munmap);
 
@@ -6725,26 +5396,18 @@ int process_server_do_munmap(struct mm_struct* mm,
     request.tgroup_home_id  = current->tgroup_home_id;
     request.requester_pid = current->pid;
 
-    // This function is always called with mm->mmap_sem held.
-    // We have to release it to avoid deadlocks.  If this
-    // lock is held and another kernel is also munmapping,
-    // then if we hold the lock here, the munmap message
-    // handler fails to acquire the mm->mmap_sem, and deadlock
-    // ensues.
-    up_write(&mm->mmap_sem);
-
 #ifndef SUPPORT_FOR_CLUSTERING
     for(i = 0; i < NR_CPUS; i++) {
         // Skip the current cpu
         if(i == _cpu) continue;
-/*#else
+#else
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
 extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-        i = objPtr->_data._processor;*/
+        i = objPtr->_data._processor;
 #endif
         // Send the request to this cpu.
         s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&request));
@@ -6760,8 +5423,6 @@ extern struct list_head rlist_head;
         schedule();
     }
 
-    down_write(&mm->mmap_sem);
-
     // OK, all responses are in, we can proceed.
 
     spin_lock_irqsave(&_munmap_data_head_lock,lockflags);
@@ -6775,14 +5436,6 @@ exit:
 
     PERF_MEASURE_STOP(&perf_process_server_do_munmap,"Exit success",perf);
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    if(do_time_measurement) {
-        end_time = native_read_tsc();
-        total_time = end_time - start_time;
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_MUNMAP_PROCESSING_TIME,total_time);
-    }
-#endif
-
     return 0;
 }
 
@@ -6801,22 +5454,12 @@ void process_server_do_mprotect(struct task_struct* task,
     int s;
     int perf = -1;
     unsigned lockflags;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time;
-    unsigned long long total_time;
-    unsigned long long start_time = native_read_tsc();
-    int do_time_measurement = 0;
-#endif
 
      // Nothing to do for a thread group that's not distributed.
     if(!current->tgroup_distributed) {
         goto exit;
     }
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    do_time_measurement = 1;
-#endif
-
     PSPRINTK("%s entered\n",__func__);
 
     perf = PERF_MEASURE_START(&perf_process_server_do_mprotect);
@@ -6851,14 +5494,14 @@ void process_server_do_mprotect(struct task_struct* task,
     for(i = 0; i < NR_CPUS; i++) {
         // Skip the current cpu
         if(i == _cpu) continue;
-/*#else
+#else
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
 extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-        i = objPtr->_data._processor;*/
+        i = objPtr->_data._processor;
 #endif
         // Send the request to this cpu.
         s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&request));
@@ -6889,14 +5532,6 @@ extern struct list_head rlist_head;
 
 exit:
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    if(do_time_measurement) {
-        end_time = native_read_tsc();
-        total_time = end_time - start_time;
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_MPROTECT_PROCESSING_TIME,total_time);
-    }
-#endif
-
     PERF_MEASURE_STOP(&perf_process_server_do_mprotect," ",perf);
 
 }
@@ -6927,6 +5562,7 @@ unsigned long process_server_do_mmap_pgoff(struct file *file, unsigned long addr
     // do_mmap_pgoff implementation) to keep from having differing
     // vm address spaces on different cpus.
     process_server_do_munmap(current->mm,
+                             /*struct vm_area_struct *vma*/NULL,
                              addr,
                              len);
 
@@ -6942,25 +5578,26 @@ not_handled_no_perf:
  *
  * <MEASURED perf_process_server_try_handle_mm_fault>
  */
-int process_server_pull_remote_mappings(struct mm_struct *mm, 
+int process_server_try_handle_mm_fault(struct mm_struct *mm, 
                                        struct vm_area_struct *vma,
                                        unsigned long address, 
                                        unsigned int flags, 
                                        struct vm_area_struct **vma_out,
                                        unsigned long error_code) {
 
-    mapping_request_data_t *data = NULL;
+    mapping_request_data_t *data;
     unsigned long err = 0;
     int ret = 0;
     mapping_request_t request;
     int i;
     int s;
     int j;
-    struct file* f = NULL;
+    struct file* f;
     unsigned long prot = 0;
     unsigned char started_outside_vma = 0;
     unsigned char did_early_removal = 0;
-    
+    char path[512];
+    char* ppath;
     // for perf
     unsigned char pte_provided = 0;
     unsigned char is_anonymous = 0;
@@ -6969,33 +5606,14 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
     unsigned char is_new_vma = 0;
     unsigned char paddr_present = 0;
     int perf = -1;
-    int original_enable_distributed_munmap = current->enable_distributed_munmap;
-    int original_enable_do_mmap_pgoff_hook = current->enable_do_mmap_pgoff_hook;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long mapping_wait_start = 0;
-    unsigned long long mapping_wait_end = 0;
-    unsigned long long mapping_request_send_start = 0;
-    unsigned long long mapping_request_send_end = 0;
-    unsigned long long fault_processing_time_start = 0;
-    unsigned long long fault_processing_time_end = 0;
-    unsigned long long fault_processing_time = 0;
-#endif
-
 
     // Nothing to do for a thread group that's not distributed.
     if(!current->tgroup_distributed) {
         goto not_handled_no_perf;
     }
 
-    current->enable_distributed_munmap = 0;
-    current->enable_do_mmap_pgoff_hook = 0;
-    
     perf = PERF_MEASURE_START(&perf_process_server_try_handle_mm_fault);
 
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    fault_processing_time_start = native_read_tsc();
-#endif
-
     PSPRINTK("Fault caught on address{%lx}, cpu{%d}, id{%d}, pid{%d}, tgid{%d}, error_code{%lx}\n",
             address,
             current->tgroup_home_cpu,
@@ -7011,21 +5629,13 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
         // should this thing be writable?  if so, set it and exit
         // This is a security hole, and is VERY bad.
         // It will also probably cause problems for genuine COW mappings..
-        if(!vma) {
-            vma = find_vma_checked(mm, address & PAGE_MASK);
-            if(!vma)
-                PSPRINTK("VMA failed to resolve\n");
-        }
-        if(vma && 
-                vma->vm_flags & VM_WRITE /*&& 
-                0 == is_page_writable(mm, vma, address & PAGE_MASK)*/) {
+        if(vma->vm_flags & VM_WRITE && 
+                0 == is_page_writable(mm, vma, address & PAGE_MASK)) {
             PSPRINTK("Touching up write setting\n");
             mk_page_writable(mm,vma,address & PAGE_MASK);
             adjusted_permissions = 1;
             ret = 1;
-        } else {
-            PSPRINTK("Did not touch up write settings\n");
-        }
+        } 
 
         goto not_handled;
     }
@@ -7048,10 +5658,21 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
     }
 #endif
     
-    // The vma that's passed in might not always be correct.  find_vma fails by returning the wrong
-    // vma when the vma is not present.  How ugly...
-    if(vma && (vma->vm_start > address || vma->vm_end <= address)) {
-        started_outside_vma = 1;
+    if(vma) {
+        if(vma->vm_file) {
+            ppath = d_path(&vma->vm_file->f_path,
+                        path,512);
+        } else {
+            path[0] = '\0';
+        }
+
+        //PSPRINTK("working with provided vma: start{%lx}, end{%lx}, path{%s}\n",vma->vm_start,vma->vm_end,path);
+    }
+
+    // The vma that's passed in might not always be correct.  find_vma fails by returning the wrong
+    // vma when the vma is not present.  How ugly...
+    if(vma && (vma->vm_start > address || vma->vm_end <= address)) {
+        started_outside_vma = 1;
         PSPRINTK("set vma = NULL, since the vma does not hold the faulting address, for whatever reason...\n");
         vma = NULL;
     } else if (vma) {
@@ -7076,9 +5697,6 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
     data->tgroup_home_cpu = current->tgroup_home_cpu;
     data->tgroup_home_id = current->tgroup_home_id;
     data->requester_pid = current->pid;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    data->wait_time_concluded = 0;
-#endif
     for(j = 0; j < MAX_MAPPINGS; j++) {
         data->mappings[j].present = 0;
         data->mappings[j].vaddr = 0;
@@ -7086,7 +5704,6 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
         data->mappings[j].sz = 0;
     }
 
-
     // Make data entry visible to handler.
     add_data_entry_to(data,
                       &_mapping_request_data_head_lock,
@@ -7101,40 +5718,21 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
     request.tgroup_home_cpu = current->tgroup_home_cpu;
     request.tgroup_home_id  = current->tgroup_home_id;
     request.requester_pid = current->pid;
-    request.need_vma = vma? 0 : 1; // Optimization, do not bother
-                                    // sending the vma path if a local
-                                    // vma is already installed, since
-                                    // we know that a do_mmap_pgoff will
-                                    // not be needed in this case.
-    // Part of need_vma optimization.  Just record the path in the
-    // data structure since we know it in advance, and since the
-    // resolving kernel instance is no longer responsible for providing
-    // it in this case.
-    if(!request.need_vma && vma->vm_file) {
-        d_path(&vma->vm_file->f_path,data->path,sizeof(data->path));
-    }
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    mapping_request_send_start = native_read_tsc();
-#endif
 
 #ifndef SUPPORT_FOR_CLUSTERING
     for(i = 0; i < NR_CPUS; i++) {
         // Skip the current cpu
         if(i == _cpu) continue;
 #else
-        // the list does not include the current processor group descirptor (TODO)
+    // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
-    extern struct list_head rlist_head;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) { 
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-        i = objPtr->_data._processor;*/
+        i = objPtr->_data._processor;
 #endif
         // Send the request to this cpu.
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        request.send_time = native_read_tsc();
-#endif
         s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&request));
         if(!s) {
             // A successful send operation, increase the number
@@ -7142,19 +5740,11 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
             data->expected_responses++;
         }
     }
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    mapping_request_send_end = native_read_tsc();
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_REQUEST_SEND_TIME,
-                        mapping_request_send_end - mapping_request_send_start);
-#endif
 
     // Wait for all cpus to respond, or a mapping that is complete
     // with a physical mapping.  Mapping results that do not include
     // a physical mapping cause this to wait until all mapping responses
     // have arrived from remote cpus.
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    mapping_wait_start = native_read_tsc();
-#endif
     while(1) {
         unsigned char done = 0;
         unsigned long lockflags;
@@ -7173,15 +5763,6 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
         }
         schedule();
     }
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    mapping_wait_end = native_read_tsc();
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_WAIT_TIME,
-                        mapping_wait_end - mapping_wait_start);
-    if(data->wait_time_concluded) {
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_MAPPING_POST_WAIT_TIME_RESUME,
-                            mapping_wait_end - data->wait_time_concluded);
-    }
-#endif
     
     // Handle successful response.
     if(data->present) {
@@ -7205,6 +5786,8 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
             if(data->path[0] == '\0') {       
                 PSPRINTK("mapping anonymous\n");
                 is_anonymous = 1;
+                current->enable_distributed_munmap = 0;
+                current->enable_do_mmap_pgoff_hook = 0;
                 // mmap parts that are missing, while leaving the existing
                 // parts untouched.
                 PS_DOWN_WRITE(&current->mm->mmap_sem);
@@ -7222,9 +5805,10 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
                if ( data->vm_flags & VM_NORESERVE )
                        printk(KERN_ALERT"MAPPING ANONYMOUS %p %p data: %lx vma: %lx {%lx-%lx} ret%lx\n",
                                __func__, data->mappings[i].vaddr, data->mappings[i].paddr, 
-                               data->vm_flags, vma?vma->vm_flags:0, vma?vma->vm_start:0, vma?vma->vm_end:0, err);*/
+                               data->vm_flags, vma?vma->vm_flags:0, vma?vma->vm_start:0, vma?vma->vm_end:0, err);
+*/                current->enable_distributed_munmap = 1;
+                current->enable_do_mmap_pgoff_hook = 1;
             } else {
-                //unsigned char used_existing;
                 PSPRINTK("opening file to map\n");
                 is_anonymous = 0;
 
@@ -7237,20 +5821,15 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
                 if( !strncmp( "/dev/zero (deleted)", data->path, strlen("/dev/zero (deleted)")+1 )) {
                     data->path[9] = '\0';
                 }
-               
-                //if(vma && vma->vm_file) {
-                //    used_existing = 0;
-                //    f = fget(fileno(vma->vm_file));
-                //} else {
-                //    used_existing = 1;
-                    f = filp_open(data->path, (data->vm_flags & VM_SHARED)? O_RDWR:O_RDONLY, 0);
-                //}
-
-                if(!IS_ERR(f)) {
+
+                f = filp_open(data->path, (data->vm_flags & VM_SHARED)? O_RDWR:O_RDONLY, 0);
+                if(f) {
                     PSPRINTK("mapping file %s, %lx, %lx, %lx\n",data->path,
                             data->vaddr_start, 
                             data->vaddr_size,
                             (unsigned long)f);
+                    current->enable_distributed_munmap = 0;
+                    current->enable_do_mmap_pgoff_hook = 0;
                     // mmap parts that are missing, while leaving the existing
                     // parts untouched.
                     PS_DOWN_WRITE(&current->mm->mmap_sem);
@@ -7264,14 +5843,9 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
                             ((data->vm_flags & VM_SHARED)?MAP_SHARED:MAP_PRIVATE),
                             data->pgoff << PAGE_SHIFT, (data->vm_flags & VM_NORESERVE) ?1:0);
                     PS_UP_WRITE(&current->mm->mmap_sem);
-
-                    //if(used_existing) {
-                    //    fput(f);
-                    //} else {
-                        filp_close(f,NULL);
-                    //}
-                } else {
-                    printk("Error opening file %s\n",data->path);
+                    current->enable_distributed_munmap = 1;
+                    current->enable_do_mmap_pgoff_hook = 1;
+                    filp_close(f,NULL);
                 }
             }
             if(err != data->vaddr_start) {
@@ -7279,21 +5853,16 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
                 //PS_UP_WRITE(&current->mm->mmap_sem);
                 goto exit_remove_data;
             }
-            PS_DOWN_READ(&current->mm->mmap_sem); 
+            
             vma = find_vma_checked(current->mm, data->address); //data->vaddr_start);
-            PS_UP_READ(&current->mm->mmap_sem);
             if (data->address < vma->vm_start || vma->vm_end <= data->address)
-                printk(KERN_ALERT"%s: ERROR %lx is not mapped in current vma {%lx-%lx} remote vma {%lx-%lx}\n",
-                               __func__, data->address, vma->vm_start, vma->vm_end,
-                               data->vaddr_start, (data->vaddr_start + data->vaddr_size));
+               printk(KERN_ALERT"%s: ERROR %lx is not mapped in current vma {%lx-%lx} remote vma {%lx-%lx}\n",
+                       __func__, data->address, vma->vm_start, vma->vm_end,
+                       data->vaddr_start, (data->vaddr_start + data->vaddr_size));
         } else {
             PSPRINTK("vma is present, using existing\n");
         }
 
-        if(vma) {
-            vma->vm_flags |= VM_MIXEDMAP;
-        }
-
         // We should have a vma now, so map physical memory into it.
         // Check to see if we have mappings
         for(i = 0; i < MAX_MAPPINGS; i++) {
@@ -7305,6 +5874,16 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
         if(vma && paddr_present) { 
             int remap_pfn_range_err = 0;
             pte_provided = 1;
+            unsigned long cow_addr;
+
+            // Break cow in this entire VMA
+            if(is_maybe_cow(vma)) {
+                PS_DOWN_WRITE(&current->mm->mmap_sem);
+                for(cow_addr = vma->vm_start; cow_addr < vma->vm_end; cow_addr += PAGE_SIZE) {
+                    break_cow(mm, vma, cow_addr);
+                }
+                PS_UP_WRITE(&current->mm->mmap_sem);
+            }
 
             for(i = 0; i < MAX_MAPPINGS; i++) {
                 if(data->mappings[i].present) {
@@ -7317,6 +5896,11 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
                                                        data->mappings[i].sz,
                                                        vm_get_page_prot(vma->vm_flags),
                                                        1);
+/*             if ( data->vm_flags & VM_NORESERVE )
+                       printk(KERN_ALERT"%s: NORESERVE %p %p data: %lx vma: %lx {%lx-%lx} ret%d\n",
+                               __func__,  data->mappings[i].vaddr, data->mappings[i].paddr,
+                               data->vm_flags, vma->vm_flags, vma->vm_start, vma->vm_end, tmp_err);
+*/
                     PS_UP_WRITE(&current->mm->mmap_sem);
                     if(tmp_err) remap_pfn_range_err = tmp_err;
                 }
@@ -7324,7 +5908,7 @@ int process_server_pull_remote_mappings(struct mm_struct *mm,
 
             // Check remap_pfn_range success
             if(remap_pfn_range_err) {
-                printk(KERN_ALERT"ERROR: Failed to remap_pfn_range %lx\n",err);
+                printk(KERN_ALERT"ERROR: Failed to remap_pfn_range %d\n",err);
             } else {
                 PSPRINTK("remap_pfn_range succeeded\n");
                 ret = 1;
@@ -7354,63 +5938,36 @@ exit_remove_data:
 not_handled:
 
     if (adjusted_permissions) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_ADJUSTED_PERMISSIONS,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,"Adjusted Permissions",perf);
     } else if (is_new_vma && is_anonymous && pte_provided) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_NEWVMA_ANONYMOUS_PTE,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,
                 "New Anonymous VMA + PTE",
                 perf);
     } else if (is_new_vma && is_anonymous && !pte_provided) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_NEWVMA_ANONYMOUS_NOPTE,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,
                 "New Anonymous VMA + No PTE",
                 perf);
     } else if (is_new_vma && !is_anonymous && pte_provided) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_NEWVMA_FILEBACKED_PTE,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,
                 "New File Backed VMA + PTE",
                 perf);
     } else if (is_new_vma && !is_anonymous && !pte_provided) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_NEWVMA_FILEBACKED_NOPTE,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,
                 "New File Backed VMA + No PTE",
                 perf);
     } else if (!is_new_vma && is_anonymous && pte_provided) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_OLDVMA_ANONYMOUS_PTE,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,
                 "Existing Anonymous VMA + PTE",
                 perf);
     } else if (!is_new_vma && is_anonymous && !pte_provided) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_OLDVMA_ANONYMOUS_NOPTE,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,
                 "Existing Anonymous VMA + No PTE",
                 perf);
     } else if (!is_new_vma && !is_anonymous && pte_provided) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_OLDVMA_FILEBACKED_PTE,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,
                 "Existing File Backed VMA + PTE",
                 perf);
     } else if (!is_new_vma && !is_anonymous && !pte_provided) {
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_OLDVMA_FILEBACKED_NOPTE,0);
-#endif
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,
                 "Existing File Backed VMA + No PTE",
                 perf);
@@ -7418,42 +5975,11 @@ not_handled:
         PERF_MEASURE_STOP(&perf_process_server_try_handle_mm_fault,"test",perf);
     }
 
-    current->enable_distributed_munmap = original_enable_distributed_munmap;
-    current->enable_do_mmap_pgoff_hook = original_enable_do_mmap_pgoff_hook;
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    fault_processing_time_end = native_read_tsc();
-    fault_processing_time = fault_processing_time_end - fault_processing_time_start;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_FAULT_PROCESSING_TIME,
-                        fault_processing_time);
-#endif
-
     return ret;
 
 not_handled_no_perf:
-    
-    return 0;
-}
 
-/**
- *  
- */
-void break_all_cow_pages(struct task_struct* task, struct task_struct* orig) {
-    struct mm_struct* mm = task->mm;
-    struct vm_area_struct* curr = mm->mmap;
-    unsigned long i,start, end;
-    while(curr) {
-        if(is_maybe_cow(curr)) {
-            start = curr->vm_start;
-            end = curr->vm_end;
-            for(i = start; i < end; i += PAGE_SIZE) {
-                if(break_cow(mm,curr,i)) {
-                    mk_page_writable_lookupvma(orig->mm,i);
-                }
-            }
-        }
-        curr = curr->vm_next;
-    }
+    return 0;
 }
 
 /**
@@ -7489,7 +6015,7 @@ int process_server_dup_task(struct task_struct* orig, struct task_struct* task)
     task->t_distributed = 0;
     task->previous_cpus = 0;
     task->known_cpu_with_tgroup_mm = 0;
-    task->return_disposition = RETURN_DISPOSITION_NONE;
+    task->return_disposition = RETURN_DISPOSITION_EXIT;
 
     // If this is pid 1 or 2, the parent cannot have been migrated
     // so it is safe to take on all local thread info.
@@ -7505,22 +6031,16 @@ int process_server_dup_task(struct task_struct* orig, struct task_struct* task)
         task->tgroup_home_cpu = home_kernel;
         task->tgroup_home_id = task->tgid;
         task->tgroup_distributed = 0;
-
-        // COW problem fix, necessary for coherency.
-        if(orig->tgroup_distributed) {
-            break_all_cow_pages(task,orig);
-        }
-
         return 1;
     }
 
     // Inherit the list of known cpus with mms for this thread group 
-    // once we know that the task is in the same tgid.
+    // once we know that the task is inhe same tgid.
     task->known_cpu_with_tgroup_mm = orig->known_cpu_with_tgroup_mm;
 
     // This is important.  We want to make sure to keep an accurate record
     // of which cpu and thread group the new thread is a part of.
-    if(orig->executing_for_remote == 1 || orig->tgroup_home_cpu != _cpu) {
+    if(orig->executing_for_remote == 1 || orig->tgroup_home_cpu != home_kernel ) {
         task->tgroup_home_cpu = orig->tgroup_home_cpu;
         task->tgroup_home_id = orig->tgroup_home_id;
         task->tgroup_distributed = 1;
@@ -7602,8 +6122,6 @@ static int do_migration_to_new_cpu(struct task_struct* task, int cpu) {
 
     // Book keeping for distributed threads.
     task->tgroup_distributed = 1;
-
-    read_lock(&tasklist_lock);
     do_each_thread(g,tgroup_iterator) {
         if(tgroup_iterator != task) {
             if(tgroup_iterator->tgid == task->tgid) {
@@ -7613,7 +6131,6 @@ static int do_migration_to_new_cpu(struct task_struct* task, int cpu) {
             }
         }
     } while_each_thread(g,tgroup_iterator);
-    read_unlock(&tasklist_lock);
 
     // Pick an id for this remote process request
     PS_SPIN_LOCK(&_clone_request_id_lock);
@@ -7686,7 +6203,6 @@ static int do_migration_to_new_cpu(struct task_struct* task, int cpu) {
     request->arg_end = task->mm->arg_end;
     request->data_start = task->mm->start_data;
     request->data_end = task->mm->end_data;
-    request->def_flags = task->mm->def_flags;
     
     // struct task_struct ---------------------------------------------------------    
     request->stack_ptr = stack_start;
@@ -7702,8 +6218,6 @@ static int do_migration_to_new_cpu(struct task_struct* task, int cpu) {
     request->normal_prio = task->normal_prio;
     request->rt_priority = task->rt_priority;
     request->sched_class = task->policy;
-    request->personality = task->personality;
-    
 
     /*mklinux_akshay*/
     if (task->prev_pid == -1)
@@ -7724,26 +6238,25 @@ static int do_migration_to_new_cpu(struct task_struct* task, int cpu) {
     // have a look at: copy_thread() arch/x86/kernel/process_64.c 
     // have a look at: struct thread_struct arch/x86/include/asm/processor.h
     {
-       unsigned long fs, gs;
+       unsigned long fs, gs;
        unsigned int fsindex, gsindex;
        unsigned int ds, es;
-    unsigned long _usersp;
-
-       if (current != task)
-           PSPRINTK("DAVEK current is different from task!\n");
+       
+           if (current != task)
+             PSPRINTK("DAVEK current is different from task!\n");
 
     request->thread_sp0 = task->thread.sp0;
     request->thread_sp = task->thread.sp;
     //printk("%s: usersp percpu %lx thread %lx\n", __func__, percpu_read(old_rsp), task->thread.usersp);
     // if (percpu_read(old_rsp), task->thread.usersp) set to 0 otherwise copy
-    _usersp = get_percpu_old_rsp();
-    if (task->thread.usersp != _usersp) {
-        printk("%s: USERSP %lx %lx\n",
-                __func__, task->thread.usersp, _usersp);
-        request->thread_usersp = _usersp;
-    } else {
-        request->thread_usersp = task->thread.usersp;
-    }
+unsigned long _usersp = get_percpu_old_rsp();
+if (task->thread.usersp != _usersp) {
+  printk("%s: USERSP %lx %lx\n",
+    __func__, task->thread.usersp, _usersp);
+  request->thread_usersp = _usersp;
+}
+else
+  request->thread_usersp = task->thread.usersp;
     
     request->thread_es = task->thread.es;
     savesegment(es, es);          
@@ -7813,7 +6326,7 @@ static int do_migration_to_new_cpu(struct task_struct* task, int cpu) {
 static int do_migration_back_to_previous_cpu(struct task_struct* task, int cpu) {
     back_migration_t mig;
     struct pt_regs* regs = task_pt_regs(task);
-    unsigned long _usersp;
+
     int perf = -1;
 
     perf = PERF_MEASURE_START(&perf_process_server_do_migration);
@@ -7832,6 +6345,7 @@ static int do_migration_back_to_previous_cpu(struct task_struct* task, int cpu)
     task->executing_for_remote = 0;
     task->represents_remote = 1;
     task->t_distributed = 1; // This should already be the case
+    task->return_disposition = RETURN_DISPOSITION_EXIT;
     
     // Build message
     mig.tgroup_home_cpu = task->tgroup_home_cpu;
@@ -7842,14 +6356,13 @@ static int do_migration_back_to_previous_cpu(struct task_struct* task, int cpu)
     mig.thread_fs       = task->thread.fs;
     mig.thread_gs       = task->thread.gs;
 
-    _usersp = get_percpu_old_rsp();
-    if (task->thread.usersp != _usersp) { 
-        printk("%s: USERSP %lx %lx\n",
-                __func__, task->thread.usersp, _usersp);
-        mig.thread_usersp = _usersp;
-    } else {
-        mig.thread_usersp = task->thread.usersp;
-    }
+unsigned long _usersp = get_percpu_old_rsp();
+if (task->thread.usersp != _usersp) { 
+  printk("%s: USERSP %lx %lx\n",
+    __func__, task->thread.usersp, _usersp);
+  mig.thread_usersp = _usersp;
+}else
+  mig.thread_usersp = task->thread.usersp;
 
     mig.thread_es       = task->thread.es;
     mig.thread_ds       = task->thread.ds;
@@ -7888,6 +6401,7 @@ int process_server_do_migration(struct task_struct* task, int cpu) {
     int ret = 0;
 
 #ifndef SUPPORT_FOR_CLUSTERING
+    printk(KERN_ALERT"%s: normal migration\n",__func__);
     if(test_bit(cpu,&task->previous_cpus)) {
         ret = do_migration_back_to_previous_cpu(task,cpu);
     } else {
@@ -7899,6 +6413,7 @@ int process_server_do_migration(struct task_struct* task, int cpu) {
                       "(cpu: %d present_mask)\n", __func__, task, cpu);
         return -EBUSY;
     }
+    printk(KERN_ALERT"%s: clustering activated\n",__func__);
     // TODO seems like that David is using previous_cpus as a bitmask.. 
     // TODO well this must be upgraded to a cpumask, declared as usigned long in task_struct
     struct list_head *iter;
@@ -7911,6 +6426,7 @@ extern struct list_head rlist_head;
         cpuid = objPtr->_data._processor;
         pcpum = &(objPtr->_data._cpumask);
        if (cpumask_test_cpu(cpu, pcpum)) {
+       printk(KERN_ALERT"%s: cpuid {%d} \n",cpuid);
                if ( bitmap_intersects(cpumask_bits(pcpum),
                                       &(task->previous_cpus),
                                       (sizeof(unsigned long)*8)) )
@@ -7931,16 +6447,9 @@ extern struct list_head rlist_head;
  */
 void process_server_do_return_disposition(void) {
 
-    int return_disposition = current->return_disposition;
-
     PSPRINTK("%s\n",__func__);
-    // Reset the return disposition
-    current->return_disposition = RETURN_DISPOSITION_NONE;
 
-    switch(return_disposition) {
-    case RETURN_DISPOSITION_NONE:
-        printk("%s: ERROR, return disposition is none!\n",__func__);
-        break;
+    switch(current->return_disposition) {
     case RETURN_DISPOSITION_MIGRATE:
         // Nothing to do, already back-imported the
         // state in process_back_migration.  This will
@@ -7958,643 +6467,6 @@ void process_server_do_return_disposition(void) {
     return;
 }
 
-/**
- * _lamport_barrier_queue_lock must NOT already be held.
- */
-void wait_for_lamport_lock_acquisition(lamport_barrier_queue_t* queue,
-                                       lamport_barrier_entry_t* entry) {
-        // Wait until "entry" is at the front of the queue
-    while(1) {
-        PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-        if(entry == queue->queue) {
-            queue->active_timestamp = entry->timestamp;
-            PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-            goto lock_acquired;
-        }
-        PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-        schedule();
-    } 
-lock_acquired:
-    return;
-}
-
-/**
- * _lamport_barrier_queue_lock must NOT already be held.
- */
-void wait_for_all_lamport_request_responses(lamport_barrier_entry_t* entry) {
-    while(1) {
-        PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-        if(entry->expected_responses == entry->responses) {
-            PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-            goto responses_acquired;
-        }
-        PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-        schedule();
-    }
-responses_acquired:
-    return;
-}
-
-/**
- * _lamport_barrier_queue_lock must already be held.
- */
-void add_entry_to_lamport_queue(unsigned long address, 
-                                unsigned long long ts,
-                                lamport_barrier_entry_t** entry,
-                                lamport_barrier_queue_t** queue) {
-
-    *entry = kmalloc(sizeof(lamport_barrier_entry_t),GFP_ATOMIC);
-
-    // form record and place in queue
-    (*entry)->timestamp = ts;
-    (*entry)->responses = 0;
-    (*entry)->expected_responses = 0;
-    (*entry)->allow_responses = 0;
-    (*entry)->cpu = _cpu;
-
-    // find queue if it exists
-    *queue = find_lamport_barrier_queue(current->tgroup_home_cpu,
-                                     current->tgroup_home_id,
-                                     address);
-    // If no queue exists, create one
-    if(!*queue) {
-        *queue = kmalloc(sizeof(lamport_barrier_queue_t),GFP_ATOMIC);
-        (*queue)->tgroup_home_cpu = current->tgroup_home_cpu;
-        (*queue)->tgroup_home_id  = current->tgroup_home_id;
-        (*queue)->address = address;
-        (*queue)->active_timestamp = 0;
-        (*queue)->queue = NULL;
-        add_data_entry_to(*queue,NULL,&_lamport_barrier_queue_head);
-    } 
-
-    // Add entry to queue
-    add_fault_entry_to_queue(*entry,*queue);
-}
-
-
-
-
-/**
- *
- */
-int process_server_acquire_page_lock_range(unsigned long address,size_t sz) {
-    lamport_barrier_request_range_t* request = NULL;
-    lamport_barrier_entry_t** entry_list = NULL;
-    lamport_barrier_queue_t** queue_list = NULL;
-    int i,s;
-    unsigned long addr;
-    int index;
-    int page_count = sz / PAGE_SIZE;
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    unsigned long long end_time = 0;
-    unsigned long long total_time = 0;
-    unsigned long long start_time = native_read_tsc();
-#endif
-
-    if(!current->tgroup_distributed) return 0;
-
-    entry_list = kmalloc(sizeof(lamport_barrier_entry_t*)*page_count,GFP_KERNEL);
-    queue_list = kmalloc(sizeof(lamport_barrier_queue_t*)*page_count,GFP_KERNEL);
-    request = kmalloc(sizeof(lamport_barrier_request_range_t), GFP_KERNEL);
-  
-    BUG_ON(!request);
-    BUG_ON(!entry_list);
-    BUG_ON(!queue_list);
-
-    address &= PAGE_MASK;
-    request->header.type = PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_REQUEST_RANGE;
-    request->header.prio = PCN_KMSG_PRIO_NORMAL;
-    request->address = address;
-    request->sz = sz;
-    request->tgroup_home_cpu = current->tgroup_home_cpu;
-    request->tgroup_home_id =  current->tgroup_home_id;
-
-    // Grab the fault barrier queue lock
-    PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-    
-    // create timestamp
-    request->timestamp = get_next_ts_value(); /*native_read_tsc();*/
-
-    index = 0;
-    for(addr = address; addr < address + sz; addr += PAGE_SIZE) {
-        add_entry_to_lamport_queue(addr,
-                                   request->timestamp,
-                                   &(entry_list[index]),
-                                   &(queue_list[index]));
-        index++;
-    }
-
-    PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-
-    // Send out request to everybody
-    for(i = 0; i < NR_CPUS; i++) {
-        if(i == _cpu) continue;
-        s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)request);
-        if(!s) {
-            for(index = 0; index < page_count; index++) 
-                entry_list[index]->expected_responses++;
-        }
-    }
-
-    mb();
-
-    kfree(request);
-
-    for(index = 0; index < page_count; index++)
-        wait_for_all_lamport_request_responses(entry_list[index]);
-
-    for(index = 0; index < page_count; index++)
-        wait_for_lamport_lock_acquisition(queue_list[index],entry_list[index]);
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    end_time = native_read_tsc();
-    for(index = 0; index < page_count; index++)
-        entry_list[index]->lock_acquired = end_time;
-    total_time = end_time - start_time;
-    PS_PROC_DATA_TRACK(PS_PROC_DATA_WAITING_FOR_LAMPORT_LOCK,total_time);
-#endif
-
-    kfree(entry_list);
-    kfree(queue_list);
-
-    return 0;
-}
-
-/**
- *
- */
-int process_server_acquire_page_lock(unsigned long address) {
-    return process_server_acquire_page_lock_range(address,PAGE_SIZE);
-}
-
-/**
- *
- */
-int process_server_acquire_heavy_lock() {
-    return process_server_acquire_page_lock_range(0,PAGE_SIZE);
-}
-
-void release_local_lamport_lock(unsigned long address,
-                                unsigned long long* timestamp) {
-    lamport_barrier_queue_t* queue = NULL;
-    lamport_barrier_entry_t* entry = NULL;
-    *timestamp = 0;
-    // find queue
-    queue = find_lamport_barrier_queue(current->tgroup_home_cpu,
-                                     current->tgroup_home_id,
-                                     address);
-
-    //BUG_ON(!queue);
-
-    if(queue) {
-
-        BUG_ON(!queue->queue);
-        BUG_ON(queue->queue->cpu != _cpu);
-        
-        entry = queue->queue;
-        
-        BUG_ON(entry->timestamp != queue->active_timestamp);
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-        entry->lock_released = native_read_tsc();
-        PS_PROC_DATA_TRACK(PS_PROC_DATA_LAMPORT_LOCK_HELD,
-                                entry->lock_released - entry->lock_acquired);
-#endif
-        *timestamp = entry->timestamp;
-        queue->active_timestamp = 0;
-        
-        // remove entry from queue
-        remove_data_entry_from((data_header_t*)entry,(data_header_t**)&queue->queue);
-
-        kfree(entry); // this is OK, because kfree never sleeps
-
-        // garbage collect the queue if necessary
-        if(!queue->queue) {
-            remove_data_entry_from(queue,&_lamport_barrier_queue_head);
-            kfree(queue);
-        }
-    
-    }
-
-}
-
-
-/**
- *
- */
-void process_server_release_page_lock_range(unsigned long address,size_t sz) {
-    lamport_barrier_release_range_t* release = NULL;
-    int i;
-    int index;
-    unsigned long long timestamp = 0;
-    unsigned long long tmp_ts = 0;
-    int page_count = sz / PAGE_SIZE;
-
-    if(!current->tgroup_distributed) return;
-
-    address &= PAGE_MASK;
-    release = kmalloc(sizeof(lamport_barrier_release_range_t),
-                        GFP_KERNEL);
-
-    PS_SPIN_LOCK(&_lamport_barrier_queue_lock);
-    for(index = 0; index < page_count; index++) {
-        release_local_lamport_lock(address + (index*PAGE_SIZE),
-                                   &tmp_ts);
-        if(!timestamp && tmp_ts) timestamp = tmp_ts;
-    }
-    PS_SPIN_UNLOCK(&_lamport_barrier_queue_lock);
-
-    // Send release
-    release->header.type = PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RELEASE_RANGE;
-    release->header.prio = PCN_KMSG_PRIO_NORMAL;
-    release->tgroup_home_cpu = current->tgroup_home_cpu;
-    release->tgroup_home_id  = current->tgroup_home_id;
-    release->timestamp = timestamp;
-    release->address = address;
-    release->sz = sz;
-    for(i = 0; i < NR_CPUS; i++) {
-        if(i == _cpu) continue;
-        pcn_kmsg_send(i,(struct pcn_kmsg_message*)release);
-    }
-
-    kfree(release);
-}
-
-/**
- *
- */
-void process_server_release_page_lock(unsigned long address) {
-    process_server_release_page_lock_range(address,PAGE_SIZE);
-}
-
-/**
- *
- */
-void process_server_release_heavy_lock() {
-    process_server_release_page_lock_range(0,PAGE_SIZE);
-}
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static void proc_data_reset(int cpu,int entry) {
-    if(entry >= PS_PROC_DATA_MAX) {
-        printk("Invalid proc_data_reset entry %d\n",entry);
-        return;
-    }
-    _proc_data[cpu][entry].total = 0;
-    _proc_data[cpu][entry].count = 0;
-    _proc_data[cpu][entry].min = 0;
-    _proc_data[cpu][entry].max = 0;
-   
-}
-#endif
-
-/**
- *
- */
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static int proc_read(char* buf, char**start, off_t off, int count,
-                        int *eof, void*d) {
-    char* p = buf;
-    int i,j,s;
-    stats_query_t query;
-    stats_query_data_t data;
-
-    sprintf(buf,"See dmesg\n");
-
-    query.header.prio = PCN_KMSG_PRIO_NORMAL;
-    query.header.type = PCN_KMSG_TYPE_PROC_SRV_STATS_QUERY;
-    query.pid = current->pid;
-    data.pid = current->pid;
-    data.header.data_type = PROCESS_SERVER_STATS_DATA_TYPE;
-    data.expected_responses = 0;
-    data.responses = 0;
-
-    add_data_entry(&data);
-
-    // Update all the data
-#ifndef SUPPORT_FOR_CLUSTERING
-    for(i = 0; i < NR_CPUS; i++) {
-        if(i == _cpu) continue;
-#else
-    // the list does not include the current processor group descirptor (TODO)
-    struct list_head *iter;
-    _remote_cpu_info_list_t *objPtr;
-    extern struct list_head rlist_head;
-    list_for_each(iter, &rlist_head) {
-        objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-        i = objPtr->_data._processor;
-
-#endif
-        s = pcn_kmsg_send(i,(struct pcn_kmsg_message*)(&query));
-        if(!s) {
-            data.expected_responses++;
-        }
-    }
-
-    while(data.expected_responses != data.responses) {
-        schedule();
-    }
-
-    spin_lock(&_data_head_lock);
-    remove_data_entry(&data);
-    spin_unlock(&_data_head_lock);
-
-    printk("Process Server Data\n");
-    for(i = 0; i < PS_PROC_DATA_MAX; i++) {
-        printk("%s[Tot,Cnt,Max,Min,Avg]:\n",_proc_data[_cpu][i].name);
-        for(j = 0; j < NR_CPUS; j++) {
-            if(_proc_data[j][i].count) {
-                unsigned long long avg = 0;
-                if(_proc_data[j][i].count)
-                    avg = _proc_data[j][i].total / _proc_data[j][i].count;
-                printk("\tcpu{%d}[%llx,%d,%llx,%llx,%llx]\n",
-                                j,
-                                _proc_data[j][i].total,
-                                _proc_data[j][i].count,
-                                _proc_data[j][i].max,
-                                _proc_data[j][i].min,
-                                avg);
-            }
-        }
-    }
-    return strlen(buf);
-}           
-#endif
-
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static void proc_track_data(int entry, unsigned long long time) {
-    if(entry >= PS_PROC_DATA_MAX) {
-        printk("Invalid proc_track_data entry %d\n",entry);
-        return;
-    }
-    _proc_data[_cpu][entry].total += time;
-    _proc_data[_cpu][entry].count++;
-    if(_proc_data[_cpu][entry].min == 0 || time < _proc_data[_cpu][entry].min)
-        _proc_data[_cpu][entry].min = time;
-    if(time > _proc_data[_cpu][entry].max)
-        _proc_data[_cpu][entry].max = time;
-}
-#endif
-
-
-
-/**
- *
- */      
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static int proc_write(struct file* file,
-                        const char* buffer,
-                        unsigned long count,
-                        void* data) {
-    int i;
-    int j;
-    stats_clear_t msg;
-    msg.header.type = PCN_KMSG_TYPE_PROC_SRV_STATS_CLEAR;
-    msg.header.prio = PCN_KMSG_PRIO_NORMAL;
-
-    for(j = 0; j < NR_CPUS; j++)
-        for(i = 0; i < PS_PROC_DATA_MAX; i++)
-            proc_data_reset(j,i);
-
-#ifndef SUPPORT_FOR_CLUSTERING
-    for(i = 0; i < NR_CPUS; i++) {
-        if(i == _cpu) continue;
-#else
-    // the list does not include the current processor group descirptor (TODO)
-    struct list_head *iter;
-    _remote_cpu_info_list_t *objPtr;
-    extern struct list_head rlist_head;
-    list_for_each(iter, &rlist_head) {
-        objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
-        i = objPtr->_data._processor;
-
-#endif
-        pcn_kmsg_send(i,(struct pcn_kmsg_message*)&msg);
-    }
-
-
-    return count;
-} 
-#endif
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static void proc_data_init() {
-    int i;
-    int j;
-    _proc_entry = create_proc_entry("procsrv",666,NULL);
-    _proc_entry->read_proc = proc_read;
-    _proc_entry->write_proc = proc_write;
-
-    for(j = 0; j < NR_CPUS; j++)
-        for(i = 0; i < PS_PROC_DATA_MAX; i++)
-            proc_data_reset(j,i);
-
-    for(j = 0; j < NR_CPUS; j++) {
-        sprintf(_proc_data[j][PS_PROC_DATA_MAPPING_WAIT_TIME].name,
-                "Mapping wait time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MAPPING_POST_WAIT_TIME_RESUME].name,
-                "Time after all mapping responses are in and when the fault handler resumes");
-        sprintf(_proc_data[j][PS_PROC_DATA_MAPPING_REQUEST_SEND_TIME].name,
-                "Mapping request send time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MAPPING_RESPONSE_SEND_TIME].name,
-                "Mapping response send time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MAPPING_REQUEST_DELIVERY_TIME].name,
-                "Mapping request delivery time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MAPPING_RESPONSE_DELIVERY_TIME].name,
-                "Mapping response delivery time");
-        sprintf(_proc_data[j][PS_PROC_DATA_BREAK_COW_TIME].name,
-                "Break cow time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MAPPING_REQUEST_PROCESSING_TIME].name,
-                "Mapping request processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_FAULT_PROCESSING_TIME].name,
-                "Fault processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_ADJUSTED_PERMISSIONS].name,
-                "Adjusted permissions fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_NEWVMA_ANONYMOUS_PTE].name,
-                "Newvma anonymous pte fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_NEWVMA_ANONYMOUS_NOPTE].name,
-                "Newvma anonymous nopte fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_NEWVMA_FILEBACKED_PTE].name,
-                "Newvma filebacked pte fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_NEWVMA_FILEBACKED_NOPTE].name,
-                "Newvma filebacked nopte fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_OLDVMA_ANONYMOUS_PTE].name,
-                "Oldvma anonymous pte fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_OLDVMA_ANONYMOUS_NOPTE].name,
-                "Oldvma anonymous nopte fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_OLDVMA_FILEBACKED_PTE].name,
-                "Oldvma filebacked pte fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_OLDVMA_FILEBACKED_NOPTE].name,
-                "Oldvma filebacked nopte fault time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MUNMAP_PROCESSING_TIME].name,
-                "Munmap processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MUNMAP_REQUEST_PROCESSING_TIME].name,
-                "Munmap request processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MPROTECT_PROCESSING_TIME].name,
-                "Mprotect processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MPROTECT_REQUEST_PROCESSING_TIME].name,
-                "Mprotect request processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_EXIT_PROCESSING_TIME].name,
-                "Exit processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_EXIT_NOTIFICATION_PROCESSING_TIME].name,
-                "Exit notification processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_GROUP_EXIT_PROCESSING_TIME].name,
-                "Group exit processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_GROUP_EXIT_NOTIFICATION_PROCESSING_TIME].name,
-                "Group exit notification processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_IMPORT_TASK_TIME].name,
-                "Import migrated task information time");
-        sprintf(_proc_data[j][PS_PROC_DATA_COUNT_REMOTE_THREADS_PROCESSING_TIME].name,
-                "Count remote threads processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_MK_PAGE_WRITABLE].name,
-                "Make page writable processing time");
-        sprintf(_proc_data[j][PS_PROC_DATA_WAITING_FOR_LAMPORT_LOCK].name,
-                "Waiting for Lamport lock on virtual page");
-        sprintf(_proc_data[j][PS_PROC_DATA_LAMPORT_LOCK_HELD].name,
-                "Lamport lock held");
-    }
-}
-#endif
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static int handle_stats_clear(struct pcn_kmsg_message* inc_msg) {
-
-    int i,j;
-    for(j = 0; j < NR_CPUS; j++)
-        for(i = 0; i < PS_PROC_DATA_MAX; i++)
-            proc_data_reset(j,i);
-    pcn_kmsg_free_msg(inc_msg);
-    return 0;
-}
-#endif
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static void process_stats_query(struct work_struct* w) {
-    stats_response_t* response = kmalloc(sizeof(stats_response_t),GFP_KERNEL);
-    int i;
-    stats_query_work_t* work = (stats_query_work_t*)w;
-    response->header.type = PCN_KMSG_TYPE_PROC_SRV_STATS_RESPONSE;
-    response->header.prio = PCN_KMSG_PRIO_NORMAL;
-    response->pid = work->pid;
-    for(i = 0; i < PS_PROC_DATA_MAX; i++) { 
-        response->data[i].count = _proc_data[_cpu][i].count;
-        response->data[i].total = _proc_data[_cpu][i].total;
-        response->data[i].min   = _proc_data[_cpu][i].min;
-        response->data[i].max   = _proc_data[_cpu][i].max;
-    }
-    pcn_kmsg_send_long(work->from_cpu,
-                        (struct pcn_kmsg_long_message*)response,
-                        sizeof(stats_response_t) - sizeof(response->header));
-    kfree(response);
-    kfree(w);
-}
-#endif
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static int handle_stats_query(struct pcn_kmsg_message* inc_msg) {
-    stats_query_t* query = (stats_query_t*)inc_msg;
-    stats_query_work_t* work = kmalloc(sizeof(stats_query_work_t),GFP_ATOMIC);
-
-    if(work) {
-        INIT_WORK( (struct work_struct*)work, process_stats_query);
-        work->pid = query->pid;
-        work->from_cpu = query->header.from_cpu;
-        queue_work(exit_wq, (struct work_struct*)work);
-    }
-
-    pcn_kmsg_free_msg(inc_msg);
-    return 0;
-}
-#endif
-
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-static int handle_stats_response(struct pcn_kmsg_message* inc_msg) {
-    stats_response_t* response = (stats_response_t*)inc_msg;
-    stats_query_data_t* data = find_stats_query_data(response->pid);
-    int from_cpu = response->header.from_cpu;
-    if(data) {
-        int i;
-        for(i = 0; i < PS_PROC_DATA_MAX; i++) {
-            _proc_data[from_cpu][i].count = response->data[i].count;
-            _proc_data[from_cpu][i].total = response->data[i].total;
-            _proc_data[from_cpu][i].min   = response->data[i].min;
-            _proc_data[from_cpu][i].max   = response->data[i].max;
-        }
-
-        data->responses++;
-    }
-    pcn_kmsg_free_msg(inc_msg);
-    return 0;
-}
-#endif
-
-/* From Wikipedia page "Fetch and add", modified to work for u64 */
-/**
- *
- */
-static inline unsigned long fetch_and_add(volatile unsigned long * variable, 
-                      unsigned long value) {
-    asm volatile( 
-             "lock; xaddq %%rax, %2;"
-             :"=a" (value)                   //Output
-             : "a" (value), "m" (*variable)  //Input
-             :"memory" );
-    return value;
-}
-
-/**
- *
- */
-static unsigned long get_next_ts_value() {
-    return fetch_and_add(ts_counter,1);
-}
-
-/**
- *
- */
-static unsigned long* get_master_ts_counter_address() {
-    unsigned long phys = 0;
-    get_counter_phys_request_t request;
-    request.header.type = PCN_KMSG_TYPE_PROC_SRV_GET_COUNTER_PHYS_REQUEST;
-    request.header.prio = PCN_KMSG_PRIO_NORMAL;
-
-    if(!get_counter_phys_data)
-        get_counter_phys_data = kmalloc(sizeof(get_counter_phys_data_t),GFP_KERNEL);
-
-    get_counter_phys_data->resp = 0;
-    get_counter_phys_data->response_received = 0;
-
-    pcn_kmsg_send(0,(struct pcn_kmsg_message*)&request);
-
-    while(!get_counter_phys_data->response_received)
-        schedule();
-     
-    return (unsigned long long*)get_counter_phys_data->resp;
-}
-
-/**
- *
- */
-static void init_shared_counter(void) {
-    if(!_cpu) {
-        // Master allocs space, then shares it
-        void* pg = kmalloc(PAGE_SIZE,GFP_KERNEL);
-        ts_counter = pg;
-        *ts_counter = 0;
-        get_next_ts_value();
-        printk("%s: ts_counter{%lx},*ts_counter{%lx}\n",__func__,
-                ts_counter,
-                get_next_ts_value());
-    } else {
-        // ask for physical address of master's ts_counter
-        ts_counter = ioremap_cache(get_master_ts_counter_address(), PAGE_SIZE);
-        printk("%s: ts_counter{%lx},*ts_counter{%lx}\n",__func__,
-                ts_counter,
-                get_next_ts_value());
-    }
-}
-
-
 /**
  * @brief Initialize this module
  */
@@ -8604,10 +6476,10 @@ static int __init process_server_init(void) {
      * Cache some local information.
      */
 #ifndef SUPPORT_FOR_CLUSTERING
-      _cpu= smp_processor_id();
- #else
-      _cpu= cpumask_first(cpu_present_mask);
- #endif
+           _cpu= smp_processor_id();
+#else
+          _cpu = cpumask_first(cpu_present_mask);
+#endif
     /*
      * Init global semaphores
      */
@@ -8622,12 +6494,6 @@ static int __init process_server_init(void) {
     exit_wq    = create_workqueue("exit_wq");
     mapping_wq = create_workqueue("mapping_wq");
 
-    /*
-     * Proc entry to publish information
-     */
-    PS_PROC_DATA_INIT();
-
-
     /*
      * Register to receive relevant incomming messages.
      */
@@ -8665,38 +6531,6 @@ static int __init process_server_init(void) {
             handle_mprotect_response);
     pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_BACK_MIGRATION,
             handle_back_migration);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_REQUEST,
-            handle_lamport_barrier_request);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RESPONSE,
-            handle_lamport_barrier_response);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RELEASE,
-            handle_lamport_barrier_release);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_REQUEST_RANGE,
-            handle_lamport_barrier_request_range);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RESPONSE_RANGE,
-            handle_lamport_barrier_response_range);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_LAMPORT_BARRIER_RELEASE_RANGE,
-            handle_lamport_barrier_release_range);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_GET_COUNTER_PHYS_REQUEST,
-            handle_get_counter_phys_request);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_GET_COUNTER_PHYS_RESPONSE,
-            handle_get_counter_phys_response);
-
-    // stats messages
-#ifdef PROCESS_SERVER_HOST_PROC_ENTRY
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_STATS_CLEAR,
-            handle_stats_clear);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_STATS_QUERY,
-            handle_stats_query);
-    pcn_kmsg_register_callback(PCN_KMSG_TYPE_PROC_SRV_STATS_RESPONSE,
-            handle_stats_response);
-#endif
-
-    /*
-     *  
-     */
-   init_shared_counter(); 
-
     PERF_INIT(); 
     return 0;
 }
index 908c989..4ae0463 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -948,10 +948,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        struct inode *inode;
        vm_flags_t vm_flags;
        int error;
-    unsigned long ret,a;
        unsigned long reqprot = prot;
-    int original_enable_distributed_munmap = current->enable_distributed_munmap;
-    int range_locked = 0;
 
        /*
         * Does the application expect PROT_READ to imply PROT_EXEC?
@@ -985,67 +982,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        /* Obtain the address to map to. we verify (or select) it and ensure
         * that it represents a valid section of the address space.
         */
-    if(addr || !current->enable_do_mmap_pgoff_hook) {
-        addr = get_unmapped_area(file, addr, len, pgoff, flags);
-    } else {
-        int pserv_conflict = 0;
-        do {
-            int fault_ret;
-            struct vm_area_struct* vma_out = NULL;
-            addr = get_unmapped_area(file, NULL, len, pgoff, flags);
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-            process_server_acquire_heavy_lock();
-#else
-            process_server_acquire_page_lock_range(addr,len);
-#endif
-#endif
-            fault_ret = process_server_pull_remote_mappings(mm,
-                                                            NULL,
-                                                            addr,
-                                                            0,
-                                                            &vma_out,
-                                                            0);
-            if(fault_ret) {
-                pserv_conflict = 1;
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-                process_server_release_heavy_lock();
-#else
-                process_server_release_page_lock_range(addr,len);
-#endif
-#endif
-          
-            }
-            else {
-                pserv_conflict = 0;    
-                range_locked = 1;
-            }
-        } while(pserv_conflict);
-    }
-       if (addr & ~PAGE_MASK) {
-        if(range_locked && current->enable_do_mmap_pgoff_hook) {
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-            process_server_release_heavy_lock();
-#else  
-            process_server_release_page_lock_range(addr,len);
-#endif
-
-        }
+       addr = get_unmapped_area(file, addr, len, pgoff, flags);
+       if (addr & ~PAGE_MASK)
                return addr;
-    }
-
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-    if(current->enable_do_mmap_pgoff_hook && !range_locked) {
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-        process_server_acquire_heavy_lock();
-#else
-        process_server_acquire_page_lock_range(addr,len);
-#endif
-    }
-#endif
-
-    current->enable_distributed_munmap = 0;
 
        /* Do simple checking here so the lower-level routines won't have
         * to. we assume access permissions have been handled by the open
@@ -1055,10 +994,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                        mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
        if (flags & MAP_LOCKED)
-               if (!can_do_mlock()) {
-                       error = -EPERM;
-            goto err;
-        }
+               if (!can_do_mlock())
+                       return -EPERM;
 
        /* mlock MCL_FUTURE? */
        if (vm_flags & VM_LOCKED) {
@@ -1067,10 +1004,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                locked += mm->locked_vm;
                lock_limit = rlimit(RLIMIT_MEMLOCK);
                lock_limit >>= PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
-                       error = -EAGAIN;
-            goto err;
-        }
+               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+                       return -EAGAIN;
        }
 
        inode = file ? file->f_path.dentry->d_inode : NULL;
@@ -1078,27 +1013,21 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        if (file) {
                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
-                       if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) {
-                               error = -EACCES;
-                goto err;
-            }
+                       if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
+                               return -EACCES;
 
                        /*
                         * Make sure we don't allow writing to an append-only
                         * file..
                         */
-                       if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE)) {
-                               error = -EACCES;
-                goto err;
-            }
+                       if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
+                               return -EACCES;
 
                        /*
                         * Make sure there are no mandatory locks on the file.
                         */
-                       if (locks_verify_locked(inode)) {
-                               error = -EAGAIN;
-                goto err;
-            }
+                       if (locks_verify_locked(inode))
+                               return -EAGAIN;
 
                        vm_flags |= VM_SHARED | VM_MAYSHARE;
                        if (!(file->f_mode & FMODE_WRITE))
@@ -1106,27 +1035,20 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 
                        /* fall through */
                case MAP_PRIVATE:
-                       if (!(file->f_mode & FMODE_READ)) {
-                               error = -EACCES;
-                goto err;
-            }
+                       if (!(file->f_mode & FMODE_READ))
+                               return -EACCES;
                        if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
-                               if (vm_flags & VM_EXEC) {
-                                       error = -EPERM;
-                    goto err;
-                }
+                               if (vm_flags & VM_EXEC)
+                                       return -EPERM;
                                vm_flags &= ~VM_MAYEXEC;
                        }
 
-                       if (!file->f_op || !file->f_op->mmap) {
-                               error = -ENODEV;
-                goto err;
-            }
+                       if (!file->f_op || !file->f_op->mmap)
+                               return -ENODEV;
                        break;
 
                default:
-                       error = -EINVAL;
-            goto err;
+                       return -EINVAL;
                }
        } else {
                switch (flags & MAP_TYPE) {
@@ -1144,52 +1066,20 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                        pgoff = addr >> PAGE_SHIFT;
                        break;
                default:
-            error = -EINVAL;
-                       goto err;
+                       return -EINVAL;
                }
        }
 
        error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
-       if (error) {
-               goto err;
-    }
+       if (error)
+               return error;
 
     /*
      * Multikernel do_mmap_pgoff hook
      */
-    current->enable_distributed_munmap = original_enable_distributed_munmap;
     process_server_do_mmap_pgoff(file, addr, len, flags, vm_flags, pgoff);
-    current->enable_distributed_munmap = 0;
-
-    ret = mmap_region(file, addr, len, flags, vm_flags, pgoff);
-
-    current->enable_distributed_munmap = original_enable_distributed_munmap;
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-    if(current->enable_do_mmap_pgoff_hook) {
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-        process_server_release_heavy_lock();
-#else
-        process_server_release_page_lock_range(addr,len);
-#endif
-    }
-#endif
-
-       return ret;
-
-err:
 
-    current->enable_distributed_munmap = original_enable_distributed_munmap;
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-    if(current->enable_do_mmap_pgoff_hook) {
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-        process_server_release_heavy_lock();
-#else
-        process_server_release_page_lock_range(addr,len);
-#endif
-    }
-#endif
-
-    return error;
+       return mmap_region(file, addr, len, flags, vm_flags, pgoff);
 }
 EXPORT_SYMBOL(do_mmap_pgoff);
 
@@ -2141,8 +2031,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 {
        unsigned long end;
        struct vm_area_struct *vma, *prev, *last;
-    unsigned long a;
-       int error;
 
        if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
                return -EINVAL;
@@ -2150,7 +2038,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        if ((len = PAGE_ALIGN(len)) == 0)
                return -EINVAL;
 
-
        /* Find the first overlapping VMA */
        vma = find_vma(mm, start);
        if (!vma)
@@ -2163,17 +2050,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        if (vma->vm_start >= end)
                return 0;
 
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-    if(current->enable_distributed_munmap) {
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-        process_server_acquire_heavy_lock();
-#else
-        process_server_acquire_page_lock_range(start,len);
-#endif
-    }
-#endif
-       
-    /*
+       /*
         * If we need to split any vma, do it now to save pain later.
         *
         * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
@@ -2181,20 +2058,19 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
         * places tmp vma above, and higher split_vma places tmp vma below.
         */
        if (start > vma->vm_start) {
+               int error;
 
                /*
                 * Make sure that map_count on return from munmap() will
                 * not exceed its limit; but let map_count go just above
                 * its limit temporarily, to help free resources as expected.
                 */
-               if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) {
-            error = -ENOMEM;
-                       goto err;
-        }
+               if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
+                       return -ENOMEM;
 
                error = __split_vma(mm, vma, start, 0);
                if (error)
-                       goto err;
+                       return error;
                prev = vma;
        }
 
@@ -2203,7 +2079,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        if (last && end > last->vm_start) {
                int error = __split_vma(mm, last, end, 1);
                if (error)
-                       goto err;
+                       return error;
        }
        vma = prev? prev->vm_next: mm->mmap;
 
@@ -2221,6 +2097,11 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
                }
        }
 
+    /*
+     * Memory is now almost munmapped locally, so before exiting this syscall, synchronize
+     * the removal of this memory from all other thread members.
+     */
+    process_server_do_munmap(mm, vma, start, len);
 
     /*
         * Remove the vma's, and unmap the actual pages
@@ -2231,27 +2112,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        /* Fix up all other VM information */
        remove_vma_list(mm, vma);
 
-    /*
-     * Memory is now munmapped locally, so before exiting this syscall, synchronize
-     * the removal of this memory from all other thread members.
-     */
-    process_server_do_munmap(mm, start, len);
-
-    //return 0;
-    error = 0;
-
-err:
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-    if(current->enable_distributed_munmap) {
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-        process_server_release_heavy_lock();
-#else
-        process_server_release_page_lock_range(start,len);
-#endif
-    }
-#endif
-
-    return error;
+       return 0;
 }
 
 EXPORT_SYMBOL(do_munmap);
@@ -2266,9 +2127,6 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
        down_write(&mm->mmap_sem);
        ret = do_munmap(mm, addr, len);
        up_write(&mm->mmap_sem);
-
-    //process_server_do_munmap(mm, addr, len);
-
        return ret;
 }
 
index 530f5b5..64f0a85 100644 (file)
@@ -238,12 +238,10 @@ fail:
  * do_remote - 1 = ask process_server to notify all remote thread members
  *             0 = do not
  */
-int do_mprotect(struct task_struct* task, struct mm_struct* mm, unsigned long start, size_t len, unsigned long prot, int do_remote) {
+int do_mprotect(struct task_struct* task, unsigned long start, size_t len, unsigned long prot, int do_remote) {
        unsigned long vm_flags, nstart, end, tmp, reqprot;
        struct vm_area_struct *vma, *prev;
-    struct mm_struct* task_mm = task? task->mm : mm;
        int error = -EINVAL;
-    unsigned long a;
        const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
        prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
        if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
@@ -260,28 +258,18 @@ int do_mprotect(struct task_struct* task, struct mm_struct* mm, unsigned long st
        if (!arch_validate_prot(prot))
                return -EINVAL;
 
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-    if(do_remote) {
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-        process_server_acquire_heavy_lock();
-#else
-        process_server_acquire_page_lock_range(start,len);
-#endif
-    }
-#endif
-
        reqprot = prot;
        /*
         * Does the application expect PROT_READ to imply PROT_EXEC:
         */
-       if (task && (prot & PROT_READ) && (task->personality & READ_IMPLIES_EXEC))
+       if ((prot & PROT_READ) && (task->personality & READ_IMPLIES_EXEC))
                prot |= PROT_EXEC;
 
        vm_flags = calc_vm_prot_bits(prot);
 
-       down_write(&task_mm->mmap_sem);
+       down_write(&task->mm->mmap_sem);
 
-       vma = find_vma_prev(task_mm, start, &prev);
+       vma = find_vma_prev(task->mm, start, &prev);
        error = -ENOMEM;
        if (!vma)
                goto out;
@@ -343,25 +331,15 @@ int do_mprotect(struct task_struct* task, struct mm_struct* mm, unsigned long st
                }
        }
 out:
-       up_write(&task_mm->mmap_sem);
+       up_write(&task->mm->mmap_sem);
 
     /*
      * Multikernel.  Change remote mappings as well before returning.
      */
-    if(!error && do_remote && task) {
+    if(!error && do_remote) {
         process_server_do_mprotect(task,start,len,prot);
     }
 
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-    if(do_remote) {
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-        process_server_release_heavy_lock();
-#else
-        process_server_release_page_lock_range(start,len);
-#endif
-    }
-#endif
-
        return error;
 
 }
@@ -369,5 +347,5 @@ out:
 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
                unsigned long, prot)
 {
-    return do_mprotect(current, current->mm, start, len, prot, 1);
+    return do_mprotect(current, start, len, prot, 1);
 }
index 079e971..e360330 100644 (file)
@@ -432,48 +432,6 @@ unsigned long do_mremap(unsigned long addr,
        struct vm_area_struct *vma;
        unsigned long ret = -EINVAL;
        unsigned long charged = 0;
-    int original_enable_distributed_munmap = current->enable_distributed_munmap;
-    unsigned long a;
-    current->enable_distributed_munmap = 0;
-
-    // This is kind of tricky.  We have to lock the old range
-    // and the new range.
-    // Also, recursion is not an issue for mremap, since 
-    // process_server does not ever attempt to do distributed
-    // remaps, it is naughty, and just does a distributed
-    // munmap (except locally).  That should probably change.
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-    process_server_acquire_heavy_lock();
-#else 
-    {
-    unsigned long old_start = addr;
-    unsigned long old_end   = addr + old_len;
-    unsigned long new_start = new_addr;
-    unsigned long new_end   = new_addr + new_len;
-    if(old_end <= new_start || new_end <= old_start) {
-        process_server_acquire_page_lock_range(old_start,old_len);
-        process_server_acquire_page_lock_range(new_start,new_len);
-    } else {
-        unsigned long min_start = old_start < new_start? old_start : new_start;
-        unsigned long max_end   = old_end > new_end? old_end : new_end;
-        process_server_acquire_page_lock_range(min_start,max_end - min_start);
-    }
-    }
-#endif
-#endif
-
-    // Pull in all remote mappings so nothing is lost later.
-    for(a = addr & PAGE_MASK; a < addr + old_len; a+= PAGE_SIZE) {
-        struct vm_area_struct *vma_out = NULL;
-        process_server_pull_remote_mappings(current->mm,
-                                            NULL,
-                                            a,
-                                            NULL,
-                                            &vma_out,
-                                            NULL);
-
-    }
 
        if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
                goto out;
@@ -575,39 +533,12 @@ unsigned long do_mremap(unsigned long addr,
          * operation, and notify all remotes of a munmap.  If they want to access
          * the new space, they will fault and re-acquire the mapping.
          */
-        current->enable_distributed_munmap = original_enable_distributed_munmap;
-        process_server_do_munmap(mm, addr, old_len);
-        process_server_do_munmap(mm, new_addr, new_len);
-        current->enable_distributed_munmap = 0;
+        process_server_do_munmap(mm, vma, addr, old_len);
 
        }
 out:
        if (ret & ~PAGE_MASK)
                vm_unacct_memory(charged);
-#ifdef PROCESS_SERVER_ENFORCE_VMA_MOD_ATOMICITY
-#ifdef PROCESS_SERVER_USE_HEAVY_LOCK
-    process_server_release_heavy_lock();
-#else
-    {
-    unsigned long old_start = addr;
-    unsigned long old_end   = addr + old_len;
-    unsigned long new_start = new_addr;
-    unsigned long new_end   = new_addr + new_len;
-    if(old_end <= new_start || new_end <= old_start) {
-        process_server_release_page_lock_range(old_start,old_len);
-        process_server_release_page_lock_range(new_start,new_len);
-    } else {
-        unsigned long min_start = old_start < new_start? old_start : new_start;
-        unsigned long max_end   = old_end > new_end? old_end : new_end;
-        process_server_release_page_lock_range(min_start,max_end - min_start);
-    }
-
-    }
-#endif
-#endif
-
-    current->enable_distributed_munmap = original_enable_distributed_munmap;
-
        return ret;
 }