Fixed code after merging with FPU changes. Tested with cg-bomp and nptl
authorAkshay Giridhar <akshay87@vt.edu>
Sun, 29 Jun 2014 00:06:52 +0000 (20:06 -0400)
committerAkshay Giridhar <akshay87@vt.edu>
Sun, 29 Jun 2014 00:06:52 +0000 (20:06 -0400)
test cases (Not extensively)

17 files changed:
arch/x86/include/asm/unistd_64.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/process_64.c
arch/x86/mm/ioremap.c
block/scsi_ioctl.c
drivers/pci/probe.c
drivers/tty/vty.c
fs/binfmt_elf.c
fs/exec.c
fs/proc/remote_proc_pid.c
include/linux/pcn_kmsg.h
include/linux/process_server.h
ipc/Makefile
kernel/kmod.c
kernel/process_server.c
pcnmsg/pcn_kmsg.c
pcnmsg/pcn_kmsg_test.c

index d244c59..7c601f3 100644 (file)
@@ -686,7 +686,6 @@ __SYSCALL(__NR_getcpu, sys_getcpu)
 __SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev                 311
 __SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
-
 #define __NR_multikernel_boot                   312
 __SYSCALL(__NR_multikernel_boot, sys_multikernel_boot)
 #define __NR_get_boot_params_addr               313
@@ -695,13 +694,10 @@ __SYSCALL(__NR_get_boot_params_addr, sys_get_boot_params_addr)
 __SYSCALL(__NR_popcorn_test_kmsg, sys_popcorn_test_kmsg)
 #define __NR_popcorn_test_ipi_latency          315
 __SYSCALL(__NR_popcorn_test_ipi_latency, sys_popcorn_test_ipi_latency)
-
-#ifdef CONFIG_POPCORN_PERF
 #define __NR_popcorn_perf_start     316
 __SYSCALL(__NR_popcorn_perf_start, sys_popcorn_perf_start)
 #define __NR_popcorn_perf_end       317
 __SYSCALL(__NR_popcorn_perf_end, sys_popcorn_perf_end)
-#endif /* CONFIG_POPCORN_PERF */
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
index 95f105b..26873ff 100644 (file)
@@ -1478,6 +1478,7 @@ static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
        int idx, node;
        struct io_apic_irq_attr attr;
        unsigned int pin, irq;
+       //char buffer[128];
        node = cpu_to_node(0); // note: node is always node zero! Have sense in single kernel
 
        for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
index 55f5087..125cf3d 100644 (file)
@@ -357,6 +357,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
        /*
         * Free the old FP and other extended state
         */
+       if ( !current->executing_for_remote )
        free_thread_xstate(current);
 }
 
index a7c4672..52d241b 100644 (file)
@@ -126,8 +126,9 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
                                prot_val, new_prot_val);
                        goto err_free_memtype;
                }
-               printk(KERN_WARNING
-       "ioremap WARN 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
+               printk(KERN_ERR
+       "%s: ioremap WARN 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
+                       __func__,
                        (unsigned long long)phys_addr,
                        (unsigned long long)(phys_addr + size),
                        prot_val, new_prot_val);
index ed46890..688be8a 100644 (file)
@@ -720,15 +720,12 @@ int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
        default:
                break;
        }
-       
-       if (capable(CAP_SYS_RAWIO))
-         return 0;
 
        /* In particular, rule out all resets and host-specific ioctls.  */
        printk_ratelimited(KERN_WARNING
                           "%s: sending ioctl %x to a partition!\n", current->comm, cmd);
 
-       return -ENOTTY;
+       return capable(CAP_SYS_RAWIO) ? 0 : -ENOTTY;
 }
 EXPORT_SYMBOL(scsi_verify_blk_ioctl);
 
index b1d59d5..bfd2123 100644 (file)
@@ -1232,12 +1232,12 @@ struct pci_dev_blacklist_item {
        unsigned short device;
        unsigned int flags;
 };
-#define BL 32
+#define BL 16
 static struct pci_dev_blacklist_item
-pci_dev_blacklist[BL] = { {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
-                          {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
-                          {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
-                          {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0} };
+pci_dev_blacklist[BL] = { {0,0,0},{0,0,0},{0,0,0},{0,0,0},
+                                               {0,0,0},{0,0,0},{0,0,0},{0,0,0},
+                                               {0,0,0},{0,0,0},{0,0,0},{0,0,0},
+                                               {0,0,0},{0,0,0},{0,0,0},{0,0,0} };
 static int pci_dev_blacklist_elements = 0;
 
 int pci_dev_list_add(int compatible, char *vendor, char *model,
index 6c2b10d..cc62320 100644 (file)
@@ -1,10 +1,3 @@
-/*
- * Copyright 2012-2014, SSRG VT
- * original version: Arijit Chattopardiyay
- * rewritten by: Antonio Barbalace
- */
-// TODO rewrite the buffering algorithm
-
 #include<linux/module.h>
 #include<linux/init.h>
 #include<linux/kernel.h>
@@ -13,6 +6,7 @@
 #include<linux/spinlock.h>
 #include<linux/highmem.h>
 #include<linux/mm.h>
+#include<asm/io.h>
 #include<linux/sched.h>
 #include<linux/tty.h>
 #include<linux/tty_driver.h>
 #include<linux/spinlock.h>
 #include<linux/types.h>
 
-#include<asm/io.h>
-#include<asm/bug.h>
-
 MODULE_LICENSE("GPL");
 
 #define SUCCESS 0
 #define FAILURE -1
+#define BUF_SIZE 19200
 
-/*
- * configurables
- */
 #define NO_OF_DEV 64
-#define SHM_SIZE 0x200000
-#define BUF_SIZE ((SHM_SIZE/NO_OF_DEV) - PAGE_SIZE)
-#define READING_INTERVAL 125
-
-#if ( (SHM_SIZE & ~PAGE_MASK) )
-# error "size must be a multiple of the architecture page size"
-#endif
-#if ( (NO_OF_DEV * (BUF_SIZE + PAGE_SIZE)) > SHM_SIZE )
-# error "(NO_OF_DEV * (BUF_SIZE + PAGE_SIZE)) exceeds SHM_SIZE"
-#endif
-
-/*
- * constants
- */
+
+/**************************************************/
+/*System Variables*/
+
 #define MINOR_START_NUMBER 121
-#define VTY_DEV_NAM "vty"
+const char *tty_dev_name = "ttty";
+int order = 0;
+const int reading_interval = 200;
+const char tokenizer = '%';
+
+/***************************************************/
 
-/*
- * System Variables
- */
-static int order = 0;
-static const char *tty_dev_name = VTY_DEV_NAM;
-static const int reading_interval = READING_INTERVAL;
-static const char tokenizer = '%';
-static unsigned long long global_poff = 0l;
+unsigned long long global_poff = 0l;
 
 typedef struct vty_desc {
        struct tty_struct * tty;
@@ -67,56 +44,40 @@ typedef struct vty_desc {
        struct list_head list;
 } vty_desc_t;
 
+int index = 0;
 struct list_head current_tty;
 
-/*
- * _setup_vty_offset
- * 
- * fetching the command line argument for configuration.
- * if the configuration argument is missing the driver will not be loaded.
- */
 static int __init _setup_vty_offset(char *str)
 {
-       global_poff = simple_strtoull(str, 0, 16);
+       global_poff = simple_strtoull(str,0,16);
+       printk(KERN_ALERT "VTY offset %llx\n",global_poff);
        return 0;
 }
 early_param("vty_offset", _setup_vty_offset);
 
-/*
- * allocate_shared_memory
- * 
- * it maps a physical shared memory area as a matrix communication buffer.
- * The matrix is NO_OF_DEV*NO_OF_DEV. There is a buffer for each communication
- * direction.
- */
 struct ring_buffer {
        char buffer[BUF_SIZE];
        int current_pos;
        rwlock_t lock;
 };
-static struct ring_buffer *ring_buffer_address[NO_OF_DEV][NO_OF_DEV];
 
-static int allocate_shared_memory (void)
-{
-       int i, j;
-       void *poff = 0l;
-       void *virtual_address[NO_OF_DEV];
-       unsigned long pfn, pfn_end, node, nid;
-       size_t size = SHM_SIZE;
-       
-       // fetching the pysical address (in shared memory)
-       if (global_poff == 0)
-         return -ENOMEM;
-       
-       poff = (void*) global_poff;
-       pfn = (unsigned long) poff >> PAGE_SHIFT;
-       pfn_end = ((unsigned long) poff + ((unsigned long) size * NO_OF_DEV)) >> PAGE_SHIFT;
-       
-       //buffers do not have to overlap
-       BUG_ON( (sizeof(struct ring_buffer) > (BUF_SIZE + PAGE_SIZE)) );
-       
-       // finding the current node id
-       node = nid = -1;
+struct ring_buffer *ring_buffer_address[NO_OF_DEV][NO_OF_DEV];
+
+static int allocate_shared_memory() {
+
+       unsigned long *poff = 0l;
+       if (global_poff == 0) {
+               poff = 0xc0000000;
+       } else {
+               poff = (unsigned long *) global_poff;
+       }
+
+       size_t size = 0x200000;
+       void *virtual_address[64];
+
+       printk(KERN_ALERT "Poff %llx and %ld\n",poff,size);
+       unsigned long pfn = (long) poff >> PAGE_SHIFT;
+       unsigned long node = -1, nid = -1;
        for_each_online_node(nid)
        {
                unsigned long start_pfn, end_pfn;
@@ -127,34 +88,29 @@ static int allocate_shared_memory (void)
                        break; // node found continue
                }
        }
-       printk(KERN_INFO"virtualTTY: buffer %ld, rows %ld, columns(cpus) %d, node %ld[0x%lx-0x%lx]\n",
-              sizeof(struct ring_buffer), (long) size, (int)NO_OF_DEV, node,
-              (node!=-1)?node_start_pfn(node):0l, (node!=-1)?node_end_pfn(node):0l);
-       
-       // mapping in all the matrix of possible buffers (per rows)
-       for (i = 0; i < NO_OF_DEV; i++) {
-               if (node == -1) { // page never mapped
+
+       int i;
+       for (i = 0; i < 64; ++i) {
+               if (node == -1) { // page never mapped (why?)
                        virtual_address[i] = ioremap_cache(
                                        (resource_size_t)((void *) poff + (i * size)), size);
-                       if (! virtual_address[i])
-                               printk(KERN_ERR"%s: ioremap failed, virtual_address %d is 0x%p\n",
-                                       __func__, i, virtual_address[i]);
                } else {
                        struct page *shared_page;
-                       shared_page = pfn_to_page(pfn + (size >> PAGE_SHIFT)*i);
+                       shared_page = pfn_to_page(pfn);
                        virtual_address[i] = page_address(shared_page);
+                       void * kmap_addr = kmap(shared_page);
+
                }
        }
 
-       // mapping each buffer and initialize it
-       for (i = 0; i < NO_OF_DEV; i++) {
+       int j;
+       for (i = 0; i < 64; ++i) {
                void *p = virtual_address[i];
-               for (j = 0; j < NO_OF_DEV; j++) {
+               for (j = 0; j < 64; ++j) {
                        ring_buffer_address[i][j] = (struct ring_buffer *) ((void *) p
-                                       + ((BUF_SIZE + PAGE_SIZE) * j));
-                       
-                       ring_buffer_address[i][j]->current_pos = 0;
-                       rwlock_init(&(ring_buffer_address[i][j]->lock));
+                                       + (sizeof(struct ring_buffer) * j));
+                               ring_buffer_address[i][j]->current_pos = 0;
+                               rwlock_init(&(ring_buffer_address[i][j]->lock));
                }
        }
 
@@ -166,22 +122,20 @@ struct timer_list read_function_timer;
 
 static void tty_dev_read(long unsigned int time);
 
-int tty_dev_open(struct tty_struct *tty, struct file *flip)
-{
+int tty_dev_open(struct tty_struct *tty, struct file *flip) {
 
        vty_desc_t *tmp = (vty_desc_t *) kmalloc(sizeof(vty_desc_t), GFP_KERNEL);
        tmp->tty = tty;
-       tmp->id = tty->index;
+       tmp->id = index++;
        INIT_LIST_HEAD(&(tmp->list));
-       tty->driver_data = (void*)(long)tmp->id;
+       tty->driver_data = tmp->id;
        list_add(&(tmp->list), &(current_tty));
 
        mod_timer(&read_function_timer, jiffies + msecs_to_jiffies(reading_interval));
        return SUCCESS;
 }
 
-void tty_dev_close(struct tty_struct *tty, struct file *flip)
-{
+void tty_dev_close(struct tty_struct *tty, struct file *flip) {
 
        struct list_head * cur, *n;
        vty_desc_t * curt;
@@ -190,68 +144,55 @@ void tty_dev_close(struct tty_struct *tty, struct file *flip)
        {
                curt = list_entry(cur, vty_desc_t, list);
 
-               if (curt->tty == tty &&
-                   curt->id == (int)(long)tty->driver_data) {
+               if (curt->tty == tty && curt->id == (int) tty->driver_data) {
                        list_del(cur);
                        kfree(curt);
-                       return;
+                       return 0;
                }
        }
 
        del_timer(&read_function_timer);
 }
 
-int tty_dev_write(struct tty_struct * tty, const unsigned char *buf, int count)
-{
+int tty_dev_write(struct tty_struct * tty, const unsigned char *buf, int count) {
        /**
         * When 0 wants to write to 2 it will write in 2,0
         * 2 wants to read what is written by 0, it will read 2,0
         * */
        int xGrid = tty->index;
        int yGrid = order;
-       struct ring_buffer *current_buffer;
-       
-       if ( (count > 0) &&
-           (ring_buffer_address[xGrid][yGrid] != NULL) ) {
-         
+       if (count > 0 && (ring_buffer_address[xGrid][yGrid] != NULL)) {
                write_lock(&(ring_buffer_address[xGrid][yGrid]->lock));
-               if (ring_buffer_address[xGrid][yGrid]->current_pos < 0 ||
-                               ring_buffer_address[xGrid][yGrid]->current_pos >= BUF_SIZE ||
-                               count > BUF_SIZE) {
-                 
-                       ring_buffer_address[xGrid][yGrid]->current_pos = 0;
-                       printk(KERN_ALERT "Memory Overflow...........\n Resetting the value.....\n");
-                       if(count > BUF_SIZE) {
-                               count = BUF_SIZE;
-                       }
+               if (ring_buffer_address[xGrid][yGrid]->current_pos
+                               < 0||
+                               ring_buffer_address[xGrid][yGrid]->current_pos >= BUF_SIZE || count > BUF_SIZE) {ring_buffer_address[xGrid][yGrid]->current_pos = 0;
+               printk(KERN_ALERT "Memory Overflow...........\n Resetting the value.....\n");
+               if(count > BUF_SIZE) {
+                       count = BUF_SIZE;
                }
-               current_buffer = ring_buffer_address[xGrid][yGrid];
+       }
+               struct ring_buffer *current_buffer = ring_buffer_address[xGrid][yGrid];
                memcpy(&(current_buffer->buffer[current_buffer->current_pos]), buf,
                                count);
                current_buffer->current_pos += count;
                current_buffer->buffer[current_buffer->current_pos] = '\0';
-               
                write_unlock(&(ring_buffer_address[xGrid][yGrid]->lock));
        }
        return count;
 }
 
-void tty_dev_read(long unsigned int time)
-{
+void tty_dev_read(long unsigned int time) {
 
        struct list_head * cur, *n;
        vty_desc_t * curt;
 
        list_for_each_safe(cur, n, &current_tty)        
        {
-               int xGrid, yGrid;
-               struct ring_buffer *my_ring_buf;
-               
                curt = list_entry(cur, vty_desc_t, list);
-               xGrid = order;
-               yGrid = curt->tty->index;
-               
-               my_ring_buf = ring_buffer_address[xGrid][yGrid];
+
+               int xGrid = order;
+               int yGrid = curt->tty->index;
+               struct ring_buffer *my_ring_buf = ring_buffer_address[xGrid][yGrid];
                if (my_ring_buf != NULL) {
                        read_lock(&(ring_buffer_address[xGrid][yGrid]->lock));
                        if (my_ring_buf->current_pos == 0) {
@@ -281,8 +222,7 @@ void tty_dev_read(long unsigned int time)
 
 }
 
-static int tty_dev_write_room(struct tty_struct *tty)
-{
+static int tty_dev_write_room(struct tty_struct *tty) {
        int xGrid = tty->index;
        int yGrid = order;
        int retVal = BUF_SIZE;
@@ -294,35 +234,28 @@ static int tty_dev_write_room(struct tty_struct *tty)
        return retVal;
 }
 
-static struct tty_operations tty_dev_operations = {
-       .open = tty_dev_open,
-       .close = tty_dev_close,
-       .write = tty_dev_write,
-       .write_room = tty_dev_write_room
-};
+static struct tty_operations tty_dev_operations = { .open = tty_dev_open,
+               .close = tty_dev_close, .write = tty_dev_write, .write_room =
+                               tty_dev_write_room };
 
 static int __init vty_init(void)
 {
-       int ret;
+       printk(KERN_ALERT "Loading MyTTy Driver memory %s\n",__func__);
        order = smp_processor_id();
-       printk(KERN_INFO "virtualTTY: cpu %d, phys 0x%llx-0x%llx\n",
-              order, global_poff, global_poff + (SHM_SIZE * NO_OF_DEV));
+       printk(KERN_ALERT "The order is %d\n",order);
 
-       if ( (ret = allocate_shared_memory()) ) {
-         printk(KERN_ERR "%s: allocate_shared_memory error %d\n",
-                __func__, ret);
-         return FAILURE;
-       }
+       allocate_shared_memory();
+       printk(KERN_ALERT "Memory Allocation is successful\n");
 
+       printk(KERN_ALERT "My TTY driver Module is loading\n");
        master_tty_driver = alloc_tty_driver(NO_OF_DEV);
-       if( !master_tty_driver ) {
-         printk(KERN_ERR "%s: allocation of master tty failed %p\n",
-               __func__, master_tty_driver);
-         return FAILURE;
+       if(!master_tty_driver) {
+               printk(KERN_ALERT "Allocation of master is failed\n");
+               return FAILURE;
        }
 
        master_tty_driver->owner = THIS_MODULE;
-       master_tty_driver->driver_name="vtydriver";
+       master_tty_driver->driver_name="Myttydriver";
        master_tty_driver->name = tty_dev_name;
        master_tty_driver->major = TTY_MAJOR;
        master_tty_driver->minor_start = MINOR_START_NUMBER;
@@ -341,13 +274,13 @@ static int __init vty_init(void)
        master_tty_driver->init_termios.c_ospeed = 38400;
        tty_set_operations(master_tty_driver, &tty_dev_operations);
 
-       ret = tty_register_driver(master_tty_driver);
-       if(ret != 0) {
-               printk(KERN_ERR "%s: unable to register the tty device %d\n",
-                 __func__, ret);
+       int retval = tty_register_driver(master_tty_driver);
+       if(retval != 0)
+       {
+               printk(KERN_ALERT "Unable to register the device\n");
                return FAILURE;
        }
-       setup_timer(&read_function_timer, tty_dev_read, 0);
+       setup_timer(&read_function_timer,tty_dev_read,0);
 
        INIT_LIST_HEAD(&current_tty);
 
@@ -356,8 +289,7 @@ static int __init vty_init(void)
 
 static void __exit vty_exit(void)
 {
-       printk(KERN_INFO "virtualTTY Driver: unloading\n");
-       // TODO not implemented
+       printk(KERN_ALERT "Unloading shared memory\n");
 }
 
 module_init( vty_init);
index 232d562..5af0a72 100644 (file)
@@ -991,16 +991,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
     /*
      * Multikernel
      */
-#ifdef PROCESS_SERVER_USE_KMOD
     if(current->executing_for_remote) {
         process_server_import_address_space(&mk_ip, &mk_sp, regs);
+        /*printk("stack pointer = %lx\n",mk_sp);
+        for(i = 0; i <= 16; i++) {
+            printk("stack peak %lx at %lx\n",*(unsigned long*)(mk_sp + i*8), mk_sp + i*8);
+        }*/
            start_thread(regs, mk_ip, mk_sp);
     } else {
         start_thread(regs, elf_entry, bprm->p);
     }
-#else
-    start_thread(regs, elf_entry, bprm->p);
-#endif
        retval = 0;
 out:
        kfree(loc);
index 3c76f9f..0bbec73 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1011,7 +1011,7 @@ no_thread_group:
  * These functions flushes out all traces of the currently running executable
  * so that a new one can be started
  */
-/*static*/ void flush_old_files(struct files_struct * files)
+static void flush_old_files(struct files_struct * files)
 {
        long j = -1;
        struct fdtable *fdt;
@@ -1484,25 +1484,19 @@ static int do_execve_common(const char *filename,
                goto out_files;
 
        retval = prepare_bprm_creds(bprm);
-       if (retval) {
-printk("%s: prepare_bprm_creds\n", __func__);
+       if (retval)
                goto out_free;
-}
 
        retval = check_unsafe_exec(bprm);
-       if (retval < 0) {
-printk("%s: check_unsafe_exec\n", __func__);
+       if (retval < 0)
                goto out_free;
-}
        clear_in_exec = retval;
        current->in_execve = 1;
 
        file = open_exec(filename);
        retval = PTR_ERR(file);
-       if (IS_ERR(file)) {
-//printk("%s: open_exec\n", __func__);
+       if (IS_ERR(file))
                goto out_unmark;
-}
 
        sched_exec();
 
@@ -1511,56 +1505,39 @@ printk("%s: check_unsafe_exec\n", __func__);
        bprm->interp = filename;
 
        retval = bprm_mm_init(bprm);
-       if (retval) {
-printk("%s: bprm_mm_init\n", __func__);
+       if (retval)
                goto out_file;
-}
 
        bprm->argc = count(argv, MAX_ARG_STRINGS);
-       if ((retval = bprm->argc) < 0) {
-printk("%s: count argv\n", __func__);
+       if ((retval = bprm->argc) < 0)
                goto out;
-}
 
        bprm->envc = count(envp, MAX_ARG_STRINGS);
-       if ((retval = bprm->envc) < 0) {
-printk("%s: count envc\n", __func__);
+       if ((retval = bprm->envc) < 0)
                goto out;
-}
 
        retval = prepare_binprm(bprm);
-       if (retval < 0) {
-printk("%s: prepare_binprm\n", __func__);
+       if (retval < 0)
                goto out;
-}
 
        retval = copy_strings_kernel(1, &bprm->filename, bprm);
-       if (retval < 0) {
-printk("%s: copy_string_kernel\n", __func__);
+       if (retval < 0)
                goto out;
-}
 
     if(!current->executing_for_remote) {
         bprm->exec = bprm->p;
         retval = copy_strings(bprm->envc, envp, bprm);
-        if (retval < 0) {
-printk("%s: copy_strings bprm->envc\n", __func__);
+        if (retval < 0)
             goto out;
-}
 
         retval = copy_strings(bprm->argc, argv, bprm);
-        if (retval < 0) {
-printk("%s: copy_strings bprm->argc\n", __func__);
+        if (retval < 0)
             goto out;
-}
-
-     }
+    }
 
        retval = search_binary_handler(bprm,regs);
-       if (retval < 0) {
-printk("%s: search_binary_handler\n", __func__);
+       if (retval < 0)
                goto out;
-}
 
        /* execve succeeded */
        current->fs->in_exec = 0;
@@ -2325,4 +2302,3 @@ int dump_seek(struct file *file, loff_t off)
        return ret;
 }
 EXPORT_SYMBOL(dump_seek);
-
index 50e3da5..68cf77a 100644 (file)
@@ -728,7 +728,11 @@ static int __init pid_handler_init(void)
 {
 
 
-    _cpu = smp_processor_id();
+#ifndef SUPPORT_FOR_CLUSTERING
+      _cpu= smp_processor_id();
+#else
+      _cpu= cpumask_first(cpu_present_mask);
+#endif
 
        pcn_kmsg_register_callback(PCN_KMSG_TYPE_REMOTE_PID_REQUEST,
                                                handle_remote_pid_request);
index 4f9f195..a973c90 100644 (file)
@@ -18,6 +18,7 @@
 /* BOOKKEEPING */
 
 #define POPCORN_MAX_MCAST_CHANNELS 32
+#define LG_SEQNUM_SIZE 7
 
 struct pcn_kmsg_mcast_wininfo {
        volatile unsigned char lock;
@@ -131,37 +132,24 @@ enum pcn_kmsg_prio {
        PCN_KMSG_PRIO_NORMAL
 };
 
-#define __READY_SIZE 1
-#define LG_SEQNUM_SIZE  (8 - __READY_SIZE)
-
 /* Message header */
 struct pcn_kmsg_hdr {
        unsigned int from_cpu   :8; // b0
-       
+
        enum pcn_kmsg_type type :8; // b1
-       
+
        enum pcn_kmsg_prio prio :5; // b2
        unsigned int is_lg_msg  :1;
        unsigned int lg_start   :1;
        unsigned int lg_end     :1;
+       unsigned long long_number;
 
-       unsigned long long_number; // b3 .. b10
-       
-       unsigned int lg_seqnum  :LG_SEQNUM_SIZE; // b11
-       unsigned int __ready    :__READY_SIZE;
+       unsigned int lg_seqnum  :LG_SEQNUM_SIZE;// b3
+       //volatile unsigned int ready   :1;
 }__attribute__((packed));
 
-//#if ( &((struct pcn_kmsg_hdr*)0)->ready != 12 )
-//# error "ready is not the last byte of the struct"
-//#endif
-
-// TODO cache size can be retrieved by the compiler, put it here
-#define CACHE_LINE_SIZE 64
 //#define PCN_KMSG_PAYLOAD_SIZE 60
-#define PCN_KMSG_PAYLOAD_SIZE (CACHE_LINE_SIZE - sizeof(struct pcn_kmsg_hdr))
-
-#define MAX_CHUNKS ((1 << LG_SEQNUM_SIZE) -1)
-#define PCN_KMSG_LONG_PAYLOAD_SIZE (MAX_CHUNKS*PCN_KMSG_PAYLOAD_SIZE)
+#define PCN_KMSG_PAYLOAD_SIZE (64-sizeof(struct pcn_kmsg_hdr))
 
 /* The actual messages.  The expectation is that developers will create their
    own message structs with the payload replaced with their own fields, and then
@@ -174,19 +162,12 @@ struct pcn_kmsg_hdr {
 struct pcn_kmsg_message {
        struct pcn_kmsg_hdr hdr;
        unsigned char payload[PCN_KMSG_PAYLOAD_SIZE];
-}__attribute__((packed)) __attribute__((aligned(CACHE_LINE_SIZE)));
-
-struct pcn_kmsg_reverse_message {
-       unsigned char payload[PCN_KMSG_PAYLOAD_SIZE];
-       struct pcn_kmsg_hdr hdr;
-       volatile unsigned long last_ticket;
-       volatile unsigned char ready;
-}__attribute__((packed)) __attribute__((aligned(CACHE_LINE_SIZE)));
+}__attribute__((packed)) __attribute__((aligned(64)));
 
 /* Struct for sending long messages (>60 bytes payload) */
 struct pcn_kmsg_long_message {
        struct pcn_kmsg_hdr hdr;
-       unsigned char payload[PCN_KMSG_LONG_PAYLOAD_SIZE];
+       unsigned char payload[512];
 }__attribute__((packed));
 
 /* List entry to copy message into and pass around in receiving kernel */
@@ -196,6 +177,13 @@ struct pcn_kmsg_container {
 }__attribute__((packed));
 
 
+struct pcn_kmsg_reverse_message {
+       unsigned char payload[PCN_KMSG_PAYLOAD_SIZE];
+       struct pcn_kmsg_hdr hdr;
+       volatile unsigned char ready;
+       volatile unsigned long last_ticket;
+}__attribute__((packed)) __attribute__((aligned(64)));
+
 
 /* TYPES OF MESSAGES */
 
@@ -205,7 +193,7 @@ struct pcn_kmsg_checkin_message {
        unsigned long window_phys_addr;
        unsigned char cpu_to_add;
        char pad[51];
-}__attribute__((packed)) __attribute__((aligned(CACHE_LINE_SIZE)));
+}__attribute__((packed)) __attribute__((aligned(64)));
 
 
 
@@ -270,7 +258,7 @@ struct pcn_kmsg_mcast_message {
        unsigned int num_members;
        unsigned long window_phys_addr;
        char pad[28];
-}__attribute__((packed)) __attribute__((aligned(CACHE_LINE_SIZE)));
+}__attribute__((packed)) __attribute__((aligned(64)));
 
 struct pcn_kmsg_mcast_window {
        volatile unsigned long head;
index 4e4b736..b06b6e8 100644 (file)
@@ -8,10 +8,6 @@
 
 #ifndef _PROCESS_SERVER_H
 #define _PROCESS_SERVER_H
-
-/**
- * Constants
- */
 #define RETURN_DISPOSITION_NONE 0
 #define RETURN_DISPOSITION_EXIT 1
 #define RETURN_DISPOSITION_MIGRATE 2
index 7ac8bf4..46d79b7 100644 (file)
@@ -6,8 +6,7 @@ obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
 obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o syscall.o sem_remote.o shm_remote.o
 obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
 obj_mq-$(CONFIG_COMPAT) += compat_mq.o
-#obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) bbuffer.o mbuffer.o mcomm.o
-obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
+obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) bbuffer.o mbuffer.o mcomm.o
 obj-$(CONFIG_IPC_NS) += namespace.o
 obj-$(CONFIG_POSIX_MQUEUE_SYSCTL) += mq_sysctl.o
 
index 021e6c3..c1ad7aa 100644 (file)
@@ -203,17 +203,19 @@ static int ____call_usermodehelper(void *data)
         current->origin_pid =sub_info->origin_pid;
 
         // Notify of PID/PID pairing.
-        process_server_notify_delegated_subprocess_starting(
-               current->pid,sub_info->remote_pid,sub_info->remote_cpu);
+        process_server_notify_delegated_subprocess_starting(current->pid,sub_info->remote_pid,sub_info->remote_cpu);
     } 
 
        retval = kernel_execve(sub_info->path,
                               (const char *const *)sub_info->argv,
                               (const char *const *)sub_info->envp);
     
+
        /* Exec failed? */
 fail:
-       printk("%s: failed retval{%d}\n", __func__, retval);
+
+    printk("kmod exec failed retval{%d}\n",retval);
+
        sub_info->retval = retval;
        do_exit(0);
 }
index f7fb5d0..c65ccf7 100644 (file)
@@ -1,4 +1,4 @@
- /**
+/**
  * Implements task migration and maintains coherent 
  * address spaces across CPU cores.
  *
@@ -40,6 +40,7 @@
 #include <asm/msr.h> // wrmsr_safe
 #include <asm/mmu_context.h>
 #include <asm/processor.h> // load_cr3
+#include <asm/i387.h>
 
 unsigned long get_percpu_old_rsp(void);
 
@@ -49,6 +50,9 @@ unsigned long get_percpu_old_rsp(void);
 #include<linux/signal.h>
 #include <linux/fcntl.h>
 #include "futex_remote.h"
+
+#define FPU_ 1
+//#undef FPU_
 /**
  * General purpose configuration
  */
@@ -130,7 +134,8 @@ extern sys_topen(const char __user * filename, int flags, int mode, int fd);
 /**
  * Perf
  */
-#ifdef CONFIG_POPCORN_PERF
+#define MEASURE_PERF 1
+#if MEASURE_PERF
 #define PERF_INIT() perf_init()
 #define PERF_MEASURE_START(x) perf_measure_start(x)
 #define PERF_MEASURE_STOP(x,y,z)  perf_measure_stop(x,y,z)
@@ -244,7 +249,8 @@ static void perf_init(void) {
            "handle_mprotect_resonse");
 
 }
-#else /* CONFIG_POPCORN_PERF */
+
+#else
 #define PERF_INIT() 
 #define PERF_MEASURE_START(x) -1
 #define PERF_MEASURE_STOP(x, y, z)
@@ -254,10 +260,14 @@ static void perf_init(void) {
 static DECLARE_WAIT_QUEUE_HEAD( countq);
 
 /**
- * Library
+ * Constants
  */
+#define RETURN_DISPOSITION_EXIT 0
+#define RETURN_DISPOSITION_MIGRATE 1
 
-#define POPCORN_MAX_PATH 512
+/**
+ * Library
+ */
 
 /**
  * Some piping for linking data entries
@@ -308,7 +318,7 @@ typedef struct _contiguous_physical_mapping {
     unsigned long paddr;
     size_t sz;
 } contiguous_physical_mapping_t;
-
+#define HAS_FPU_MASK 0x80
 /**
  *
  */
@@ -342,6 +352,12 @@ typedef struct _clone_data {
     unsigned short thread_ds;
     unsigned short thread_fsindex;
     unsigned short thread_gsindex;
+#ifdef FPU_
+    unsigned int  task_flags; //FPU, but should be extended t
+    unsigned char task_fpu_counter;
+    unsigned char thread_has_fpu;
+    union thread_xstate fpu_state; //FPU migration
+#endif
     int tgroup_home_cpu;
     int tgroup_home_id;
     int t_home_cpu;
@@ -468,6 +484,12 @@ typedef struct _clone_request {
     unsigned short thread_ds;
     unsigned short thread_fsindex;
     unsigned short thread_gsindex;
+#ifdef FPU_   
+    unsigned int  task_flags; //FPU, but should be extended t
+    unsigned char task_fpu_counter; 
+    unsigned char thread_has_fpu;   
+    union thread_xstate fpu_state; //FPU migration support
+#endif
     int tgroup_home_cpu;
     int tgroup_home_id;
     int t_home_cpu;
@@ -741,7 +763,13 @@ typedef struct _back_migration {
     unsigned short thread_es;
     unsigned short thread_ds;
     unsigned short thread_fsindex;
-    unsigned short thread_gsindex;
+    unsigned short thread_gsindex; 
+#ifdef FPU_   
+    unsigned int  task_flags; //FPU, but should be extended t
+    unsigned char task_fpu_counter; 
+    unsigned char thread_has_fpu;   
+    union thread_xstate fpu_state; //FPU migration support
+#endif
 } back_migration_t;
 
 /**
@@ -773,6 +801,13 @@ typedef struct {
     unsigned short thread_ds;
     unsigned short thread_fsindex;
     unsigned short thread_gsindex;
+
+#ifdef FPU_   
+    unsigned int  task_flags; //FPU, but should be extended t
+    unsigned char task_fpu_counter; 
+    unsigned char thread_has_fpu;   
+    union thread_xstate fpu_state; //FPU migration support
+#endif
 } exit_work_t;
 
 /**
@@ -908,6 +943,13 @@ typedef struct {
     unsigned short thread_ds;
     unsigned short thread_fsindex;
     unsigned short thread_gsindex;
+
+#ifdef FPU_   
+    unsigned int  task_flags; //FPU, but should be extended t
+    unsigned char task_fpu_counter; 
+    unsigned char thread_has_fpu;   
+    union thread_xstate fpu_state; //FPU migration support
+#endif
 } back_migration_work_t;
 
 
@@ -991,6 +1033,7 @@ static int cpu_has_known_tgroup_mm(int cpu)
     _remote_cpu_info_list_t *objPtr;
     struct cpumask *pcpum =0;
     int cpuid =-1;
+extern struct list_head rlist_head;
     if (cpumask_test_cpu(cpu, cpu_present_mask))
        return 1;
     list_for_each(iter, &rlist_head) {
@@ -1382,13 +1425,12 @@ int find_consecutive_physically_mapped_region(struct mm_struct* mm,
                                               unsigned long vaddr,
                                               unsigned long* vaddr_mapping_start,
                                               unsigned long* paddr_mapping_start,
-                                              size_t* paddr_mapping_sz)
-{
-    unsigned long paddr_curr = 0l;
+                                              size_t* paddr_mapping_sz) {
+    unsigned long paddr_curr = NULL;
     unsigned long vaddr_curr = vaddr;
     unsigned long vaddr_next = vaddr;
-    unsigned long paddr_next = 0l;
-    unsigned long paddr_start = 0l;
+    unsigned long paddr_next = NULL;
+    unsigned long paddr_start = NULL;
     size_t sz = 0;
 
     
@@ -2117,6 +2159,11 @@ static void dump_clone_data(clone_data_t* r) {
     PSPRINTK("thread_sp{%lx}\n",r->thread_sp);
     PSPRINTK("thread_usersp{%lx}\n",r->thread_usersp);
 
+#ifdef FPU_   
+    PSPRINTK("task_flags{%x}\n",r->task_flags);
+    PSPRINTK("task_fpu_counter{%x}\n",(unsigned int)r->task_fpu_counter);
+    PSPRINTK("thread_has_fpu{%x}\n",(unsigned int)r->thread_has_fpu);
+#endif
     v = r->vma_list;
     while(v) {
         dump_vma_data(v);
@@ -2321,7 +2368,6 @@ static void destroy_clone_data(clone_data_t* data) {
     kfree(data);
 }
 
-#if 0
 /**
  * @brief Finds a vma_data_t entry.
  */
@@ -2342,7 +2388,6 @@ static vma_data_t* find_vma_data(clone_data_t* clone_data, unsigned long addr_st
 
     return ret;
 }
-#endif
 
 /**
  * @brief Callback for page walk that displays the contents of the walk.
@@ -2389,9 +2434,9 @@ static int dump_page_walk_pte_entry_callback(pte_t *pte, unsigned long start,
 /**
  * @brief Displays relevant data within a mm.
  */
-static void dump_mm(struct mm_struct* mm)
-{
+static void dump_mm(struct mm_struct* mm) {
     struct vm_area_struct * curr;
+    char buf[256];
     struct mm_walk walk = {
         .pte_entry = dump_page_walk_pte_entry_callback,
         .mm = mm,
@@ -2685,6 +2730,7 @@ static int count_remote_thread_members(int exclude_t_home_cpu,
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
         i = objPtr->_data._processor;
@@ -2757,8 +2803,7 @@ static int count_local_thread_members(int tgroup_home_cpu,
  * thread group in which the "current" task resides.
  * @return The number of threads.
  */
-static int count_thread_members (void)
-{
+static int count_thread_members() {
      
     int count = 0;
     PSPRINTK("%s: entered\n",__func__);
@@ -2781,7 +2826,7 @@ static int count_thread_members (void)
 void process_tgroup_closed_item(struct work_struct* work) {
 
     tgroup_closed_work_t* w = (tgroup_closed_work_t*) work;
-    data_header_t *curr;
+    data_header_t *curr, *next;
     mm_data_t* mm_data;
     struct task_struct *g, *task;
     unsigned char tgroup_closed = 0;
@@ -2951,41 +2996,45 @@ handled:
  *
  * <MEASURED perf_process_mapping_request>
  */
-void process_mapping_request(struct work_struct* work)
-{
+void process_mapping_request(struct work_struct* work) {
     mapping_request_work_t* w = (mapping_request_work_t*) work;
-    mapping_response_t* response;
+    mapping_response_t response;
     data_header_t* data_curr = NULL;
     mm_data_t* mm_data = NULL;
-    
     struct task_struct* task = NULL;
     struct task_struct* g;
     struct vm_area_struct* vma = NULL;
     struct mm_struct* mm = NULL;
-    
     unsigned long address = w->address;
     unsigned long resolved = 0;
     struct mm_walk walk = {
         .pte_entry = vm_search_page_walk_pte_entry_callback,
         .private = &(resolved)
     };
-    char *plpath = NULL, *lpath = NULL;
-    int used_saved_mm = 0, found_vma = 1, found_pte = 1; 
+    char* plpath = NULL;
+    char lpath[512];
     int i;
-
-#ifdef CONFIG_POPCORN_PERF    
-    // for perf 
+    
+    // for perf
+    int used_saved_mm = 0;
+    int found_vma = 1;
+    int found_pte = 1;
+    
+    // Perf start
     int perf = PERF_MEASURE_START(&perf_process_mapping_request);
-#endif /* CONFIG_POPCORN_PERF */    
 
-    PSPRINTK("received mapping request from{%d} address{%lx}, cpu{%d}, id{%d}\n",
-            w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
+    //PSPRINTK("%s: entered\n",__func__);
+    PSPRINTK("received mapping request from {%d} address{%lx}, cpu{%d}, id{%d}\n",
+            w->from_cpu,
+            w->address,
+            w->tgroup_home_cpu,
+            w->tgroup_home_id);
 
     // First, search through existing processes
     do_each_thread(g,task) {
         if((task->tgroup_home_cpu == w->tgroup_home_cpu) &&
            (task->tgroup_home_id  == w->tgroup_home_id )) {
-            PSPRINTK("mapping request found common thread group here\n");
+            //PSPRINTK("mapping request found common thread group here\n");
             mm = task->mm;
 
             // Take note of the fact that an mm exists on the remote kernel
@@ -3013,25 +3062,14 @@ task_mm_search_exit:
             }
 
             data_curr = data_curr->next;
+
         } // while
+
         PS_SPIN_UNLOCK(&_saved_mm_head_lock);
     }
     
-    response = kmalloc(sizeof(mapping_response_t), GFP_ATOMIC); //TODO convert to alloc_cache
-    if (!response) {
-      printk(KERN_ALERT"can not kmalloc mapping_response_t area from{%d} address{%lx} cpu{%d} id{%d}\n",
-             w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
-      goto err_work;
-    }
-    lpath = kmalloc(POPCORN_MAX_PATH, GFP_ATOMIC); //TODO convert to alloc_cache
-    if (!lpath) {
-      printk(KERN_ALERT"can not kmalloc lpath area from{%d} address{%lx} cpu{%d} id{%d}\n",
-             w->from_cpu, w->address, w->tgroup_home_cpu, w->tgroup_home_id);
-      goto err_response;
-    }
-    
     // OK, if mm was found, look up the mapping.
-    if (mm) {
+    if(mm) {
 
         // The purpose of this code block is to determine
         // if we need to use a read or write lock, and safely.  
@@ -3061,9 +3099,11 @@ changed_can_be_cow:
         walk_page_range(address & PAGE_MASK, 
                 (address & PAGE_MASK) + PAGE_SIZE, &walk);
 
-        if (vma && resolved != 0) {
+        if(vma && resolved != 0) {
+
             PSPRINTK("mapping found! %lx for vaddr %lx\n",resolved,
                     address & PAGE_MASK);
+
             /*
              * Find regions of consecutive physical memory
              * in this vma, including the faulting address
@@ -3071,7 +3111,7 @@ changed_can_be_cow:
              */
             {
             // Break all cows in this vma
-            if (can_be_cow) {
+            if(can_be_cow) {
                 unsigned long cow_addr;
                 for(cow_addr = vma->vm_start; cow_addr < vma->vm_end; cow_addr += PAGE_SIZE) {
                     break_cow(mm, vma, cow_addr);
@@ -3079,49 +3119,54 @@ changed_can_be_cow:
                 // We no longer need a write lock after the break_cow process
                 // is complete, so downgrade the lock to a read lock.
                 downgrade_write(&mm->mmap_sem);
-            } // if (can_be_cow
+            }
+
 
             // Now grab all the mappings that we can stuff into the response.
-            if (0 != fill_physical_mapping_array(mm, vma, address,
-                                                &(response->mappings[0]),
-                                               MAX_MAPPINGS)) {
+            if(0 != fill_physical_mapping_array(mm, 
+                                                vma,
+                                                address,
+                                                &response.mappings, 
+                                                MAX_MAPPINGS)) {
                 // If the fill process fails, clear out all
                 // results.  Otherwise, we might trick the
                 // receiving cpu into thinking the target
                 // mapping was found when it was not.
                 for(i = 0; i < MAX_MAPPINGS; i++) {
-                    response->mappings[i].present = 0;
-                    response->mappings[i].vaddr = 0;
-                    response->mappings[i].paddr = 0;
-                    response->mappings[i].sz = 0;
-                }   
-            } // if (0 != fill_physical_mapping_array
+                    response.mappings[i].present = 0;
+                    response.mappings[i].vaddr = 0;
+                    response.mappings[i].paddr = 0;
+                    response.mappings[i].sz = 0;
+                }
+                    
+            }
+
             }
 
-            response->header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
-            response->header.prio = PCN_KMSG_PRIO_NORMAL;
-            response->tgroup_home_cpu = w->tgroup_home_cpu;
-            response->tgroup_home_id = w->tgroup_home_id;
-            response->requester_pid = w->requester_pid;
-            response->address = address;
-            response->present = 1;
-            response->vaddr_start = vma->vm_start;
-            response->vaddr_size = vma->vm_end - vma->vm_start;
-            response->prot = vma->vm_page_prot;
-            response->vm_flags = vma->vm_flags;
+            response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
+            response.header.prio = PCN_KMSG_PRIO_NORMAL;
+            response.tgroup_home_cpu = w->tgroup_home_cpu;
+            response.tgroup_home_id = w->tgroup_home_id;
+            response.requester_pid = w->requester_pid;
+            response.address = address;
+            response.present = 1;
+            response.vaddr_start = vma->vm_start;
+            response.vaddr_size = vma->vm_end - vma->vm_start;
+            response.prot = vma->vm_page_prot;
+            response.vm_flags = vma->vm_flags;
             if(vma->vm_file == NULL) {
-                response->path[0] = '\0';
+                response.path[0] = '\0';
             } else {    
                 plpath = d_path(&vma->vm_file->f_path,lpath,512);
-                strcpy(response->path,plpath);
-                response->pgoff = vma->vm_pgoff;
+                strcpy(response.path,plpath);
+                response.pgoff = vma->vm_pgoff;
             }
 
             // We modified this lock to be read-mode above so now
             // we can do a read-unlock instead of a write-unlock
             PS_UP_READ(&mm->mmap_sem);
        
-        } else { // (vma && resolved != 0) 
+        } else {
 
             if(can_be_cow)
                 PS_UP_WRITE(&mm->mmap_sem);
@@ -3129,57 +3174,60 @@ changed_can_be_cow:
                 PS_UP_READ(&mm->mmap_sem);
             // Zero out mappings
             for(i = 0; i < MAX_MAPPINGS; i++) {
-                response->mappings[i].present = 0;
-                response->mappings[i].vaddr = 0;
-                response->mappings[i].paddr = 0;
-                response->mappings[i].sz = 0;
+                response.mappings[i].present = 0;
+                response.mappings[i].vaddr = 0;
+                response.mappings[i].paddr = 0;
+                response.mappings[i].sz = 0;
             }
-        } // !(vma && resolved != 0) 
+
+        }
+        
+
     }
 
     // Not found, respond accordingly
-    if (resolved == 0) {
+    if(resolved == 0) {
         found_vma = 0;
         found_pte = 0;
         //PSPRINTK("Mapping not found\n");
-        response->header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
-        response->header.prio = PCN_KMSG_PRIO_NORMAL;
-        response->tgroup_home_cpu = w->tgroup_home_cpu;
-        response->tgroup_home_id = w->tgroup_home_id;
-        response->requester_pid = w->requester_pid;
-        response->address = address;
-        response->present = 0;
-        response->vaddr_start = 0;
-        response->vaddr_size = 0;
-        response->path[0] = '\0';
+        response.header.type = PCN_KMSG_TYPE_PROC_SRV_MAPPING_RESPONSE;
+        response.header.prio = PCN_KMSG_PRIO_NORMAL;
+        response.tgroup_home_cpu = w->tgroup_home_cpu;
+        response.tgroup_home_id = w->tgroup_home_id;
+        response.requester_pid = w->requester_pid;
+        response.address = address;
+        response.present = 0;
+        response.vaddr_start = 0;
+        response.vaddr_size = 0;
+        response.path[0] = '\0';
 
         // Handle case where vma was present but no pte.
-        if (vma) {
+        if(vma) {
             //PSPRINTK("But vma present\n");
             found_vma = 1;
-            response->present = 1;
-            response->vaddr_start = vma->vm_start;
-            response->vaddr_size = vma->vm_end - vma->vm_start;
-            response->prot = vma->vm_page_prot;
-            response->vm_flags = vma->vm_flags;
+            response.present = 1;
+            response.vaddr_start = vma->vm_start;
+            response.vaddr_size = vma->vm_end - vma->vm_start;
+            response.prot = vma->vm_page_prot;
+            response.vm_flags = vma->vm_flags;
              if(vma->vm_file == NULL) {
-                 response->path[0] = '\0';
+                 response.path[0] = '\0';
              } else {    
                  plpath = d_path(&vma->vm_file->f_path,lpath,512);
-                 strcpy(response->path,plpath);
-                 response->pgoff = vma->vm_pgoff;
+                 strcpy(response.path,plpath);
+                 response.pgoff = vma->vm_pgoff;
              }
         }
     }
 
     // Send response
-    if(response->present) {
+    if(response.present) {
         DO_UNTIL_SUCCESS(pcn_kmsg_send_long(w->from_cpu,
-                            (struct pcn_kmsg_long_message*)(response),
+                            (struct pcn_kmsg_long_message*)(&response),
                             sizeof(mapping_response_t) - 
                             sizeof(struct pcn_kmsg_hdr) -   //
-                            sizeof(response->path) +         // Chop off the end of the path
-                            strlen(response->path) + 1));    // variable to save bandwidth.
+                            sizeof(response.path) +         // Chop off the end of the path
+                            strlen(response.path) + 1));    // variable to save bandwidth.
     } else {
         // This is an optimization to get rid of the _long send 
         // which is a time sink.
@@ -3191,15 +3239,12 @@ changed_can_be_cow:
         nonpresent_response.requester_pid = w->requester_pid;
         nonpresent_response.address = w->address;
         DO_UNTIL_SUCCESS(pcn_kmsg_send(w->from_cpu,(struct pcn_kmsg_message*)(&nonpresent_response)));
+
     }
 
-    kfree(lpath);
-err_response:
-    kfree(response);
-err_work:
     kfree(work);
 
-#ifdef CONFIG_POPCORN_PERF    
+    // Perf stop
     if(used_saved_mm && found_vma && found_pte) {
         PERF_MEASURE_STOP(&perf_process_mapping_request,
                 "Saved MM + VMA + PTE",
@@ -3227,7 +3272,6 @@ err_work:
     } else {
         PERF_MEASURE_STOP(&perf_process_mapping_request,"ERR",perf);
     }
-#endif /* CONFIG_POPCORN_PERF */    
 
     return;
 }
@@ -3354,7 +3398,7 @@ void process_munmap_request(struct work_struct* work) {
     data_header_t *curr = NULL;
     mm_data_t* mm_data = NULL;
     mm_data_t* to_munmap = NULL;
-    struct mm_struct* mm_to_munmap = NULL;
+    struct mm_struct * mm_to_munmap = NULL;
 
     int perf = PERF_MEASURE_START(&perf_process_munmap_request);
 
@@ -3369,32 +3413,32 @@ void process_munmap_request(struct work_struct* work) {
            task->tgroup_home_id  == w->tgroup_home_id &&
            !(task->flags & PF_EXITING)) {
 
+            // Thread group has been found, perform munmap operation on this
+            // task.
+        if (task && task->mm ) {
+           mm_to_munmap =task->mm;
+       }
+       else
+               printk("%s: pirla\n", __func__);
+
+       // TODO try and check if make sense
             // Take note of the fact that an mm exists on the remote kernel
             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
-            
-            if (task->mm) {
-                mm_to_munmap = task->mm;
-            }
-            else
-                printk("%s: pirla\n", __func__);
 
-            goto done; 
+            goto done; // thread grouping - threads all share a common mm.
+
         }
     } while_each_thread(g,task);
 done:
     read_unlock(&tasklist_lock);
 
-    if(mm_to_munmap) {
-        PS_DOWN_WRITE(&mm_to_munmap->mmap_sem);
-        current->enable_distributed_munmap = 0;
-        do_munmap(mm_to_munmap, w->vaddr_start, w->vaddr_size);
-        current->enable_distributed_munmap = 1;
-        PS_UP_WRITE(&mm_to_munmap->mmap_sem);
-    }
-    else
-       printk("%s: unexpected error task %p task->mm %p\n", 
-                __func__, task, (task ? task->mm : 0) );
-
+      if(mm_to_munmap) {
+        PS_DOWN_WRITE(&task->mm->mmap_sem);
+        current->enable_distributed_munmap = 0;
+        do_munmap(mm_to_munmap, w->vaddr_start, w->vaddr_size);
+        current->enable_distributed_munmap = 1;
+        PS_UP_WRITE(&task->mm->mmap_sem);
+        }
     // munmap the specified region in any saved mm's as well.
     // This keeps old mappings saved in the mm of dead thread
     // group members from being resolved accidentally after
@@ -3421,15 +3465,13 @@ found:
         current->enable_distributed_munmap = 0;
         do_munmap(to_munmap->mm, w->vaddr_start, w->vaddr_size);
         current->enable_distributed_munmap = 1;
-       if (to_munmap && to_munmap->mm)
-               PS_UP_WRITE(&to_munmap->mm->mmap_sem);
-       else
-               printk(KERN_ALERT"%s: ERROR2: to_munmap %p mm %p\n",
-                                __func__, to_munmap, to_munmap?to_munmap->mm:0);
+        if (to_munmap && to_munmap->mm)
+            PS_UP_WRITE(&to_munmap->mm->mmap_sem);
+        else
+            printk(KERN_ALERT"%s: ERROR2: to_munmap %p mm %p\n", __func__, to_munmap, to_munmap?to_munmap->mm:0);
     }
     else if (to_munmap) // It is OK for to_munmap to be null, but not to_munmap->mm
-       printk(KERN_ALERT"%s: ERROR1: to_munmap %p mm %p\n",
-                        __func__, to_munmap, to_munmap?to_munmap->mm:0);
+        printk(KERN_ALERT"%s: ERROR1: to_munmap %p mm %p\n", __func__, to_munmap, to_munmap?to_munmap->mm:0);
 
     // Construct response
     response.header.type = PCN_KMSG_TYPE_PROC_SRV_MUNMAP_RESPONSE;
@@ -3463,45 +3505,57 @@ void process_mprotect_item(struct work_struct* work) {
     int tgroup_home_id  = w->tgroup_home_id;
     unsigned long start = w->start;
     size_t len = w->len;
+    unsigned long prot = w->prot;
     struct task_struct* task, *g;
     data_header_t* curr = NULL;
     mm_data_t* mm_data = NULL;
     mm_data_t* to_munmap = NULL;
-    struct mm_struct *mm_to_munmap = NULL;
+    struct mm_structmm_to_munmap = NULL;
 
     int perf = PERF_MEASURE_START(&perf_process_mprotect_item);
     
     // Find the task
     read_lock(&tasklist_lock);
     do_each_thread(g,task) {
-
-        // Look for the thread group
+//     task_lock(task); // TODO consider to use this
         if (task->tgroup_home_cpu == tgroup_home_cpu &&
             task->tgroup_home_id  == tgroup_home_id &&
             !(task->flags & PF_EXITING)) {
-
-            // Take note of the fact that an mm exists on the remote kernel
+           /* 
+            if (task->mm)
+                // do_mprotect
+                do_mprotect(task, start, len, prot,0);
+//             task_unlock(task); //TODO consider to use this
+           else
+               printk("%s: task->mm task:%p mm:%p\n",
+                       __func__, task, task->mm);
+            */
+            // doing mprotect here causes errors, I do not know why
+            // for now I will unmap the region instead.
+            //do_mprotect(task,start,len,prot,0);
+            
+            if (task && task->mm ) {
+                    mm_to_munmap = task->mm;
+            }
+           // Take note of the fact that an mm exists on the remote kernel
             set_cpu_has_known_tgroup_mm(task,w->from_cpu);
 
-            if(task->mm) {
-                mm_to_munmap = task->mm;
-            }
-            else
-                printk("%s: pirla\n",__func__);
-            
+            // then quit
             goto done;
         }
+//     task_unlock(task); // TODO consider to use this
     } while_each_thread(g,task);
 done:
     read_unlock(&tasklist_lock);
 
-    if(mm_to_munmap) {
-        PS_DOWN_WRITE(&mm_to_munmap->mmap_sem);
+      if(mm_to_munmap) {
+        PS_DOWN_WRITE(&task->mm->mmap_sem);
         current->enable_distributed_munmap = 0;
         do_munmap(mm_to_munmap, start, len);
         current->enable_distributed_munmap = 1;
-        PS_UP_WRITE(&mm_to_munmap->mmap_sem);
-    }
+        PS_UP_WRITE(&task->mm->mmap_sem);
+        }
+
 
     // munmap the specified region in any saved mm's as well.
     // This keeps old mappings saved in the mm of dead thread
@@ -3632,6 +3686,27 @@ search_exit:
     task->thread.fsindex = w->thread_fsindex;
     task->thread.gsindex = w->thread_gsindex;
 
+#ifdef FPU_   
+      //FPU migration --- server (back migration)
+          if (w->task_flags & PF_USED_MATH)
+               set_used_math();
+           current->fpu_counter = w->task_fpu_counter;
+        if (w->task_flags & PF_USED_MATH) {
+               if (fpu_alloc(&current->thread.fpu) == -ENOMEM)
+                   printk(KERN_ERR "%s: ERROR fpu_alloc returned -ENOMEM, remote fpu not copied.\n", __func__);
+               else {
+                   struct fpu temp; temp.state = &w->fpu_state;
+                   fpu_copy(&current->thread.fpu, &temp);
+               }
+           }
+       printk(KERN_ALERT"%s: task flags %x fpu_counter %x has_fpu %x [%d:%d]\n",
+           __func__, current->flags, (int)current->fpu_counter,
+           (int)current->thread.has_fpu, (int)__thread_has_fpu(current), (int)fpu_allocated(&current->thread.fpu));
+           //FPU migration code --- id the following optional?
+           if (tsk_used_math(current) && current->fpu_counter >5) //fpu.preload
+               __math_state_restore(current);
+       
+#endif
     // Update local state
     task->represents_remote = 0;
     task->executing_for_remote = 1;
@@ -4458,6 +4533,14 @@ perf_cc = native_read_tsc();
     clone_data->thread_ds = request->thread_ds;
     clone_data->thread_fsindex = request->thread_fsindex;
     clone_data->thread_gsindex = request->thread_gsindex;
+    //TODO this part of the code requires refactoring, it is ugly and can not be worst. Copy each element of a data structure in another without data transformation (ok in the het. case) is a waste of resources.
+#ifdef FPU_   
+         clone_data->task_flags = request->task_flags;
+         clone_data->task_fpu_counter = request->task_fpu_counter;
+         clone_data->thread_has_fpu = request->thread_has_fpu;
+         clone_data->fpu_state = request->fpu_state;
+     //end FPU code
+#endif
     clone_data->vma_list = NULL;
     clone_data->tgroup_home_cpu = request->tgroup_home_cpu;
     clone_data->tgroup_home_id = request->tgroup_home_id;
@@ -4587,7 +4670,16 @@ static int handle_back_migration(struct pcn_kmsg_message* inc_msg) {
         work->thread_ds       = msg->thread_ds;
         work->thread_fsindex  = msg->thread_fsindex;
         work->thread_gsindex  = msg->thread_gsindex;
-        memcpy(&work->regs, &msg->regs, sizeof(struct pt_regs));
+       //TODO this function (part of the code) requires refactoring (switch to memcpy or continue like this if there is data transformation (het. support)
+               //FPU migration
+#ifdef FPU_   
+               work->task_flags      = msg->task_flags;
+               work->task_fpu_counter = msg->task_fpu_counter;
+               work->thread_has_fpu  = msg->thread_has_fpu;
+               work->fpu_state       = msg->fpu_state;
+               // end FPU code
+#endif        
+       memcpy(&work->regs, &msg->regs, sizeof(struct pt_regs));
         queue_work(clone_wq, (struct work_struct*)work);
     }
 
@@ -4723,6 +4815,7 @@ int process_server_import_address_space(unsigned long* ip,
         vma_data_t* vma_curr = NULL;
         int mmap_flags = 0;
         int vmas_installed = 0;
+        int ptes_installed = 0;
         unsigned long err = 0;
 
         vma_curr = clone_data->vma_list;
@@ -4975,11 +5068,12 @@ int process_server_import_address_space(unsigned long* ip,
     // We assume that an exec is going on and the current process is the one is executing
     // (a switch will occur if it is not the one that must execute)
     { // FS/GS update --- start
+    unsigned long fs, gs;
     unsigned int fsindex, gsindex;
                     
     savesegment(fs, fsindex);
     if ( !(clone_data->thread_fs) || !(__user_addr(clone_data->thread_fs)) ) {
-      printk(KERN_ERR "%s: ERROR corrupted fs base address 0x%lx\n", __func__, clone_data->thread_fs);
+      printk(KERN_ERR "%s: ERROR corrupted fs base address %p\n", __func__, clone_data->thread_fs);
     }    
     current->thread.fsindex = clone_data->thread_fsindex;
     current->thread.fs = clone_data->thread_fs;
@@ -4992,7 +5086,7 @@ int process_server_import_address_space(unsigned long* ip,
                              
     savesegment(gs, gsindex); //read the gs register in gsindex variable
     if ( !(clone_data->thread_gs) && !(__user_addr(clone_data->thread_gs)) ) {
-      printk(KERN_ERR "%s: ERROR corrupted gs base address 0x%lx\n", __func__, clone_data->thread_gs);      
+      printk(KERN_ERR "%s: ERROR corrupted gs base address %p\n", __func__, clone_data->thread_gs);      
     }
     current->thread.gs = clone_data->thread_gs;    
     current->thread.gsindex = clone_data->thread_gsindex;
@@ -5005,7 +5099,32 @@ int process_server_import_address_space(unsigned long* ip,
                                                    
     } // FS/GS update --- end
 
-    // Save off clone data, replacing any that may
+#ifdef FPU_   
+     //FPU migration code --- server
+          /* PF_USED_MATH is set if the task used the FPU before
+           * fpu_counter is incremented every time you go in __switch_to while owning the FPU
+           * has_fpu is true if the task is the owner of the FPU, thus the FPU contains its data
+           * fpu.preload (see arch/x86/include/asm.i387.h:switch_fpu_prepare()) is a heuristic
+           */
+          if (clone_data->task_flags & PF_USED_MATH)
+              set_used_math();
+          current->fpu_counter = clone_data->task_fpu_counter;
+          if (clone_data->thread_has_fpu & HAS_FPU_MASK) {    
+         if (fpu_alloc(&current->thread.fpu) == -ENOMEM)
+                  printk(KERN_ALERT "%s: ERROR fpu_alloc returned -ENOMEM, remote fpu not copied.\n", __func__);
+              else {
+                  struct fpu temp; temp.state = &clone_data->fpu_state;
+                  fpu_copy(&current->thread.fpu, &temp);
+              }
+         }
+     printk(KERN_ALERT"%s: task flags %x fpu_counter %x has_fpu %x [%d:%d]\n",
+         __func__, current->flags, (int)current->fpu_counter,
+          (int)current->thread.has_fpu, (int)__thread_has_fpu(current), (int)fpu_allocated(&current->thread.fpu));
+          //FPU migration code --- is the following optional?
+          if (tsk_used_math(current) && current->fpu_counter >5) //fpu.preload
+              __math_state_restore(current);
+#endif    
+     // Save off clone data, replacing any that may
     // already exist.
     if(current->clone_data) {
         unsigned long lockflags;
@@ -5063,6 +5182,7 @@ int process_server_do_group_exit(void) {
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
         i = objPtr->_data._processor;
@@ -5180,6 +5300,10 @@ finished_membership_search:
         // take over, so do not mark this as executing for remote
         current->executing_for_remote = 0;
 
+        // Migrate back - you just had an out of body experience, you will wake in
+        //                a familiar place (a place you've been before), but unfortunately, 
+        //                your life is over.
+        //                Note: comments like this must == I am tired.
 #ifndef SUPPORT_FOR_CLUSTERING
         for(i = 0; i < NR_CPUS; i++) {
           // Skip the current cpu
@@ -5191,6 +5315,7 @@ finished_membership_search:
         struct list_head *iter;
         _remote_cpu_info_list_t *objPtr;
        struct cpumask *pcpum =0;
+extern struct list_head rlist_head;
         list_for_each(iter, &rlist_head) {
           objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
           i = objPtr->_data._processor;
@@ -5234,6 +5359,7 @@ finished_membership_search:
            // the list does not include the current processor group descirptor (TODO)
            struct list_head *iter;
            _remote_cpu_info_list_t *objPtr;
+extern struct list_head rlist_head;
             list_for_each(iter, &rlist_head) {
               objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
               i = objPtr->_data._processor;
@@ -5382,6 +5508,7 @@ int process_server_do_munmap(struct mm_struct* mm,
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
         i = objPtr->_data._processor;
@@ -5424,14 +5551,13 @@ exit:
 void process_server_do_mprotect(struct task_struct* task,
                                 unsigned long start,
                                 size_t len,
-                                unsigned long prot)
-{
+                                unsigned long prot) {
     mprotect_data_t* data;
     mprotect_request_t request;
     int i;
     int s;
     int perf = -1;
-    unsigned long lockflags;
+    unsigned lockflags;
 
      // Nothing to do for a thread group that's not distributed.
     if(!current->tgroup_distributed) {
@@ -5476,6 +5602,7 @@ void process_server_do_mprotect(struct task_struct* task,
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
         i = objPtr->_data._processor;
@@ -5501,7 +5628,8 @@ void process_server_do_mprotect(struct task_struct* task,
     // OK, all responses are in, we can proceed.
 
     spin_lock_irqsave(&_mprotect_data_head_lock,lockflags);
-    remove_data_entry_from(data, &_mprotect_data_head);
+    remove_data_entry_from(data,
+                           &_mprotect_data_head);
     spin_unlock_irqrestore(&_mprotect_data_head_lock,lockflags);
 
     kfree(data);
@@ -5563,13 +5691,16 @@ int process_server_try_handle_mm_fault(struct mm_struct *mm,
 
     mapping_request_data_t *data;
     unsigned long err = 0;
+    int ret = 0;
     mapping_request_t request;
-    int i, s, j, ret=0;
+    int i;
+    int s;
+    int j;
     struct file* f;
     unsigned long prot = 0;
     unsigned char started_outside_vma = 0;
     unsigned char did_early_removal = 0;
-    char path[512]; //TODO must be kmalloc-ed
+    char path[512];
     char* ppath;
     // for perf
     unsigned char pte_provided = 0;
@@ -5700,6 +5831,7 @@ int process_server_try_handle_mm_fault(struct mm_struct *mm,
     // the list does not include the current processor group descirptor (TODO)
     struct list_head *iter;
     _remote_cpu_info_list_t *objPtr;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) { 
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
         i = objPtr->_data._processor;
@@ -5880,7 +6012,7 @@ int process_server_try_handle_mm_fault(struct mm_struct *mm,
 
             // Check remap_pfn_range success
             if(remap_pfn_range_err) {
-                printk(KERN_ALERT"ERROR: Failed to remap_pfn_range %ld\n",err);
+                printk(KERN_ALERT"ERROR: Failed to remap_pfn_range %d\n",err);
             } else {
                 PSPRINTK("remap_pfn_range succeeded\n");
                 ret = 1;
@@ -5987,7 +6119,7 @@ int process_server_dup_task(struct task_struct* orig, struct task_struct* task)
     task->t_distributed = 0;
     task->previous_cpus = 0;
     task->known_cpu_with_tgroup_mm = 0;
-    task->return_disposition = RETURN_DISPOSITION_NONE;
+    task->return_disposition = RETURN_DISPOSITION_EXIT;
 
     // If this is pid 1 or 2, the parent cannot have been migrated
     // so it is safe to take on all local thread info.
@@ -6012,7 +6144,7 @@ int process_server_dup_task(struct task_struct* orig, struct task_struct* task)
 
     // This is important.  We want to make sure to keep an accurate record
     // of which cpu and thread group the new thread is a part of.
-    if(orig->executing_for_remote == 1 || orig->tgroup_home_cpu != home_kernel) {
+    if(orig->executing_for_remote == 1 || orig->tgroup_home_cpu != home_kernel ) {
         task->tgroup_home_cpu = orig->tgroup_home_cpu;
         task->tgroup_home_id = orig->tgroup_home_id;
         task->tgroup_distributed = 1;
@@ -6243,7 +6375,7 @@ else
     request->thread_fsindex = task->thread.fsindex;
     savesegment(fs, fsindex);
     if (fsindex != request->thread_fsindex)
-        printk(KERN_WARNING"%s: fsindex %x (TLS_SEL:%x) thread %x\n", __func__, fsindex, FS_TLS_SEL, request->thread_fsindex);
+        printk(KERN_ALERT"%s: fsindex %x (TLS_SEL:%x) thread %x\n", __func__, fsindex, FS_TLS_SEL, request->thread_fsindex);
     request->thread_fs = task->thread.fs;
     rdmsrl(MSR_FS_BASE, fs);
     if (fs != request->thread_fs) {
@@ -6263,7 +6395,31 @@ else
         PSPRINTK("%s: DAVEK: gs %lx thread %lx\n", __func__, fs, request->thread_gs);
     }
 
-    // ptrace, debug, dr7: struct perf_event *ptrace_bps[HBP_NUM]; unsigned long debugreg6; unsigned long ptrace_dr7;
+#ifdef FPU_   
+ //FPU migration code --- initiator
+PSPRINTK(KERN_ERR"%s: task flags %x fpu_counter %x has_fpu %x [%d:%d] %d:%d %x\n",
+                       __func__, task->flags, (int)task->fpu_counter, (int)task->thread.has_fpu,
+                        (int)__thread_has_fpu(task), (int)fpu_allocated(&task->thread.fpu),
+                        (int)use_xsave(), (int)use_fxsr(), (int) PF_USED_MATH);
+     request->task_flags = task->flags;
+     request->task_fpu_counter = task->fpu_counter;
+    request->thread_has_fpu = task->thread.has_fpu;
+     if (!fpu_allocated(&task->thread.fpu)) {  
+                printk("%s: !fpu_allocated\n", __func__);
+                request->thread_has_fpu &= (unsigned char)~HAS_FPU_MASK;
+            }
+            else {
+                        struct fpu temp; temp.state = &request->fpu_state;      
+                        fpu_save_init(&task->thread.fpu);
+                        fpu_copy(&temp, &task->thread.fpu);
+                        request->thread_has_fpu |= HAS_FPU_MASK;
+                    }
+                printk(KERN_ALERT"%s: flags %x fpu_counter %x has_fpu %x [%d:%d]\n",
+                                        __func__, request->task_flags, (int)request->task_fpu_counter,
+                                        (int)request->thread_has_fpu, (int)__thread_has_fpu(task), (int)fpu_allocated(&task->thread.fpu));
+#endif
+    
+       // ptrace, debug, dr7: struct perf_event *ptrace_bps[HBP_NUM]; unsigned long debugreg6; unsigned long ptrace_dr7;
     // Fault info: unsigned long cr2; unsigned long trap_no; unsigned long error_code;
     // floating point: struct fpu fpu; THIS IS NEEDED
     // IO permissions: unsigned long *io_bitmap_ptr; unsigned long iopl; unsigned io_bitmap_max;
@@ -6296,16 +6452,18 @@ else
  * <MEASURE perf_process_server_do_migration>
  */
 static int do_migration_back_to_previous_cpu(struct task_struct* task, int cpu) {
-    back_migration_t mig;
+    back_migration_t *mig =NULL;
     struct pt_regs* regs = task_pt_regs(task);
 
     int perf = -1;
 
     perf = PERF_MEASURE_START(&perf_process_server_do_migration);
 
+    mig = kmalloc(sizeof(back_migration_t), GFP_ATOMIC);
     // Set up response header
-    mig.header.type = PCN_KMSG_TYPE_PROC_SRV_BACK_MIGRATION;
-    mig.header.prio = PCN_KMSG_PRIO_NORMAL;
+
+    mig->header.type = PCN_KMSG_TYPE_PROC_SRV_BACK_MIGRATION;
+    mig->header.prio = PCN_KMSG_PRIO_NORMAL;
 
     // Make mark on the list of previous cpus
     set_bit(smp_processor_id(),&task->previous_cpus);
@@ -6317,35 +6475,58 @@ static int do_migration_back_to_previous_cpu(struct task_struct* task, int cpu)
     task->executing_for_remote = 0;
     task->represents_remote = 1;
     task->t_distributed = 1; // This should already be the case
+    task->return_disposition = RETURN_DISPOSITION_EXIT;
     
     // Build message
-    mig.tgroup_home_cpu = task->tgroup_home_cpu;
-    mig.tgroup_home_id  = task->tgroup_home_id;
-    mig.t_home_cpu      = task->t_home_cpu;
-    mig.t_home_id       = task->t_home_id;
-    mig.previous_cpus   = task->previous_cpus;
-    mig.thread_fs       = task->thread.fs;
-    mig.thread_gs       = task->thread.gs;
+    mig->tgroup_home_cpu = task->tgroup_home_cpu;
+    mig->tgroup_home_id  = task->tgroup_home_id;
+    mig->t_home_cpu      = task->t_home_cpu;
+    mig->t_home_id       = task->t_home_id;
+    mig->previous_cpus   = task->previous_cpus;
+    mig->thread_fs       = task->thread.fs;
+    mig->thread_gs       = task->thread.gs;
 
 unsigned long _usersp = get_percpu_old_rsp();
 if (task->thread.usersp != _usersp) { 
   printk("%s: USERSP %lx %lx\n",
     __func__, task->thread.usersp, _usersp);
-  mig.thread_usersp = _usersp;
+  mig->thread_usersp = _usersp;
 }else
-  mig.thread_usersp = task->thread.usersp;
+  mig->thread_usersp = task->thread.usersp;
+
+    mig->thread_es       = task->thread.es;
+    mig->thread_ds       = task->thread.ds;
+    mig->thread_fsindex  = task->thread.fsindex;
+    mig->thread_gsindex  = task->thread.gsindex;
+      //FPU support --- initiator (back migration?)
+#ifdef FPU_   
+  mig->task_flags      = task->flags;
+       mig->task_fpu_counter = task->fpu_counter;
+       mig->thread_has_fpu   = task->thread.has_fpu;
+       if (!fpu_allocated(&task->thread.fpu)) {  
+                  printk("%s: !fpu_allocated\n", __func__);
+                  mig->thread_has_fpu &= (unsigned char)~HAS_FPU_MASK;
+              }
+              else {
+                  struct fpu temp; temp.state = &mig->fpu_state;      
+                         fpu_save_init(&task->thread.fpu);
+                        fpu_copy(&temp, &task->thread.fpu);
+                         mig->thread_has_fpu |= HAS_FPU_MASK;
+                     }
+                  printk(KERN_ALERT"%s: flags %x fpu_counter %x has_fpu %x [%d:%d]\n",
+                                          __func__, mig->task_flags, (int)mig->task_fpu_counter,
+                                          (int)mig->thread_has_fpu, (int)__thread_has_fpu(task), (int)fpu_allocated(&task->thread.fpu));
 
-    mig.thread_es       = task->thread.es;
-    mig.thread_ds       = task->thread.ds;
-    mig.thread_fsindex  = task->thread.fsindex;
-    mig.thread_gsindex  = task->thread.gsindex;
-    memcpy(&mig.regs, regs, sizeof(struct pt_regs));
+#endif
+
+    memcpy(&mig->regs, regs, sizeof(struct pt_regs));
 
     // Send migration request to destination.
     pcn_kmsg_send_long(cpu,
                        (struct pcn_kmsg_long_message*)&mig,
                        sizeof(back_migration_t) - sizeof(struct pcn_kmsg_hdr));
 
+    pcn_kmsg_free_msg(mig);
     PERF_MEASURE_STOP(&perf_process_server_do_migration,"back migration",perf);
 
     return PROCESS_SERVER_CLONE_SUCCESS;
@@ -6391,6 +6572,7 @@ int process_server_do_migration(struct task_struct* task, int cpu) {
     _remote_cpu_info_list_t *objPtr;
     struct cpumask *pcpum =0;
     int cpuid=-1;
+extern struct list_head rlist_head;
     list_for_each(iter, &rlist_head) {
         objPtr = list_entry(iter, _remote_cpu_info_list_t, cpu_list_member);
         cpuid = objPtr->_data._processor;
@@ -6415,17 +6597,11 @@ int process_server_do_migration(struct task_struct* task, int cpu) {
  * implements the actions that must be made immediately after
  * the newly awoken task resumes execution.
  */
-void process_server_do_return_disposition(void)
-{
+void process_server_do_return_disposition(void) {
+
     PSPRINTK("%s\n",__func__);
-    int return_disposition = current->return_disposition;
-    // Reset the return disposition
-    current->return_disposition = RETURN_DISPOSITION_NONE;
-    
-    switch(return_disposition) {
-    case RETURN_DISPOSITION_NONE:
-        printk("%s: ERROR, return disposition is none!\n",__func__);
-        break;      
+
+    switch(current->return_disposition) {
     case RETURN_DISPOSITION_MIGRATE:
         // Nothing to do, already back-imported the
         // state in process_back_migration.  This will
index 937125e..c790011 100644 (file)
@@ -1,9 +1,8 @@
 /*
  * Inter-kernel messaging support for Popcorn
  *
- * Antonio Barbalace, David Katz, Marina Sadini 2014
- * Antonio Barbalace, Marina Sadini, Phil Wilshire 2013
- * Ben Shelton 2012 - 2013
+ * Current ver: Antonio Barbalace, Phil Wilshire 2013
+ * First ver: Ben Shelton <beshelto@vt.edu> 2013
  */
 
 #include <linux/irq.h>
 #define KMSG_PRINTK(...) ;
 #endif
 
-/*
- * MAX_LOOPS must be calibrated on each architecture. Is difficult to give a
- * good estimation for this parameter. Worst case must be equal or inferior to
- * the minimum time we will be blocked for a call to schedule_timeout
- */
-#define MAX_LOOPS 12345
-#define MAX_LOOPS_JIFFIES (MAX_SCHEDULE_TIMEOUT)
 
 #define MCAST_VERBOSE 0
 
@@ -57,9 +49,6 @@
 
 #define KMSG_ERR(fmt, args...) printk("%s: ERROR: " fmt, __func__, ##args)
 
-#define ROUND_PAGES(size) ((size/PAGE_SIZE) + ((size%PAGE_SIZE)? 1:0))
-#define ROUND_PAGE_SIZE(size) (ROUND_PAGES(size)*PAGE_SIZE)
-
 /* COMMON STATE */
 
 /* table of callback functions for handling each message type */
@@ -76,6 +65,8 @@ struct pcn_kmsg_rkinfo *rkinfo;
    one per kernel */
 struct pcn_kmsg_window * rkvirt[POPCORN_MAX_CPUS];
 
+/* Same thing, but for mcast windows */
+struct pcn_kmsg_mcast_local mcastlocal[POPCORN_MAX_MCAST_CHANNELS];
 
 /* lists of messages to be processed for each prio */
 struct list_head msglist_hiprio, msglist_normprio;
@@ -104,11 +95,6 @@ struct workqueue_struct *messaging_wq;
 #define PCN_WARN(...) ;
 #define PCN_ERROR(...) printk(__VA_ARGS__)
 
-unsigned long long total_sleep_win_put = 0;
-unsigned int sleep_win_put_count = 0;
-unsigned long long total_sleep_win_get = 0;
-unsigned int sleep_win_get_count = 0;
-
 struct pcn_kmsg_hdr log_receive[LOGLEN];
 struct pcn_kmsg_hdr log_send[LOGLEN];
 int log_r_index=0;
@@ -119,13 +105,8 @@ int log_f_index=0;
 int log_f_sendindex=0;
 void * log_function_send[LOGCALL];
 
-#define SIZE_RANGES 7
-unsigned int large_message_sizes[(SIZE_RANGES +1)];
-unsigned int large_message_count[(SIZE_RANGES +1)];
-unsigned int type_message_count[PCN_KMSG_TYPE_MAX];
-
 /* From Wikipedia page "Fetch and add", modified to work for u64 */
-/*static inline unsigned long fetch_and_add(volatile unsigned long * variable, 
+static inline unsigned long fetch_and_add(volatile unsigned long * variable, 
                                          unsigned long value)
 {
        asm volatile( 
@@ -134,8 +115,7 @@ unsigned int type_message_count[PCN_KMSG_TYPE_MAX];
                     : "a" (value), "m" (*variable)  //Input
                     :"memory" );
        return value;
-}*/
-#define fetch_and_add xadd_sync
+}
 
 static inline unsigned long win_inuse(struct pcn_kmsg_window *win) 
 {
@@ -147,7 +127,6 @@ static inline int win_put(struct pcn_kmsg_window *win,
                          int no_block) 
 {
        unsigned long ticket;
-       unsigned long loop;
 
        /* if we can't block and the queue is already really long, 
           return EAGAIN */
@@ -168,35 +147,25 @@ static inline int win_put(struct pcn_kmsg_window *win,
        /* spin until there's a spot free for me */
        //while (win_inuse(win) >= RB_SIZE) {}
        //if(ticket>=PCN_KMSG_RBUF_SIZE){
+               while((win->buffer[ticket%PCN_KMSG_RBUF_SIZE].last_ticket!=ticket-PCN_KMSG_RBUF_SIZE)) {
+                       //pcn_cpu_relax();
+                                       msleep(1);
+               }
+               while(  win->buffer[ticket%PCN_KMSG_RBUF_SIZE].ready!=0){
+                       //pcn_cpu_relax();
+                       msleep(1);
+               }
        //}
-       loop=0;  
-       while( (win->buffer[ticket%PCN_KMSG_RBUF_SIZE].last_ticket
-         != (ticket - PCN_KMSG_RBUF_SIZE)) ) {
-               //pcn_cpu_relax();
-               //msleep(1);
-               if ( !(++loop % MAX_LOOPS) )
-                       schedule_timeout(MAX_LOOPS_JIFFIES);
-       }
-       /* the following it is always false because add is after ready=0*/
-       //loop=0;
-       while( win->buffer[ticket%PCN_KMSG_RBUF_SIZE].ready!=0 ) {
-               pcn_cpu_relax();
-               //msleep(1);
-               //if ( !(++loop % MAX_LOOPS) )
-               //      schedule_timeout(MAX_LOOPS_JIFFIES);
-       }
-       
        /* insert item */
        memcpy(&win->buffer[ticket%PCN_KMSG_RBUF_SIZE].payload,
               &msg->payload, PCN_KMSG_PAYLOAD_SIZE);
 
-       memcpy((void*)&(win->buffer[ticket%PCN_KMSG_RBUF_SIZE].hdr),
-              (void*)&(msg->hdr), sizeof(struct pcn_kmsg_hdr));
+       memcpy(&win->buffer[ticket%PCN_KMSG_RBUF_SIZE].hdr,
+              &msg->hdr, sizeof(struct pcn_kmsg_hdr));
+
 
        //log_send[log_s_index%LOGLEN]= win->buffer[ticket & RB_MASK].hdr;
-       memcpy(&(log_send[log_s_index%LOGLEN]),
-               (void*)&(win->buffer[ticket%PCN_KMSG_RBUF_SIZE].hdr),
-               sizeof(struct pcn_kmsg_hdr));
+       memcpy(&(log_send[log_s_index%LOGLEN]),&(win->buffer[ticket%PCN_KMSG_RBUF_SIZE].hdr),sizeof(struct pcn_kmsg_hdr));
        log_s_index++;
 
        win->second_buffer[ticket%PCN_KMSG_RBUF_SIZE]++;
@@ -210,6 +179,8 @@ static inline int win_put(struct pcn_kmsg_window *win,
 
 msg_put++;
 
+
+
        return 0;
 }
 
@@ -218,39 +189,51 @@ static inline int win_get(struct pcn_kmsg_window *win,
                          struct pcn_kmsg_reverse_message **msg) 
 {
        struct pcn_kmsg_reverse_message *rcvd;
-       unsigned long loop;
 
        if (!win_inuse(win)) {
+
                KMSG_PRINTK("nothing in buffer, returning...\n");
                return -1;
        }
 
-       KMSG_PRINTK("reached win_get, head %lu, tail %lu\n", win->head, win->tail);
-       rcvd =(struct pcn_kmsg_reverse_message*) &(win->buffer[win->tail % PCN_KMSG_RBUF_SIZE]);
+       KMSG_PRINTK("reached win_get, head %lu, tail %lu\n", 
+                   win->head, win->tail);      
 
        /* spin until entry.ready at end of cache line is set */
-       loop=0;
+       rcvd = &(win->buffer[win->tail % PCN_KMSG_RBUF_SIZE]);
+       //KMSG_PRINTK("%s: Ready bit: %u\n", __func__, rcvd->hdr.ready);
+
+
+
        while (!rcvd->ready) {
+
                //pcn_cpu_relax();
-               //msleep(1);
-               if ( !(++loop % MAX_LOOPS) )
-                       schedule_timeout(MAX_LOOPS_JIFFIES);
+               msleep(1);
+
        }
 
-       /* statistics */
-       memcpy(&(log_receive[log_r_index%LOGLEN]),
-              &(rcvd->hdr),
-              sizeof(struct pcn_kmsg_hdr));
+
+       // barrier here?
+       pcn_barrier();
+
+       //log_receive[log_r_index%LOGLEN]=rcvd->hdr;
+       memcpy(&(log_receive[log_r_index%LOGLEN]),&(rcvd->hdr),sizeof(struct pcn_kmsg_hdr));
        log_r_index++;
-       msg_get++;
 
-       *msg = rcvd;
+       //rcvd->hdr.ready = 0;
+
+       *msg = rcvd;    
+msg_get++;
+
+
+
        return 0;
 }
 
-/*static inline void win_advance_tail(struct pcn_kmsg_window *win) 
-{ win->tail++; }
-*/
+static inline void win_advance_tail(struct pcn_kmsg_window *win) 
+{
+       win->tail++;
+}
 
 /* win_enable_int
  * win_disable_int
@@ -272,6 +255,118 @@ static inline unsigned char win_int_enabled(struct pcn_kmsg_window *win) {
                return win->int_enabled;
 }
 
+#define MCASTWIN(_id_) (mcastlocal[(_id_)].mcastvirt)
+#define LOCAL_TAIL(_id_) (mcastlocal[(_id_)].local_tail)
+
+/* MULTICAST RING BUFFER */
+static inline unsigned long mcastwin_inuse(pcn_kmsg_mcast_id id)
+{
+       return MCASTWIN(id)->head - MCASTWIN(id)->tail;
+}
+
+static inline int mcastwin_put(pcn_kmsg_mcast_id id,
+                              struct pcn_kmsg_message *msg)
+{
+       unsigned long ticket;
+       unsigned long time_limit = jiffies + 2;
+
+
+       MCAST_PRINTK("called for id %lu, msg 0x%p\n", id, msg);
+
+       /* if the queue is already really long, return EAGAIN */
+       if (mcastwin_inuse(id) >= RB_SIZE) {
+               MCAST_PRINTK("window full, caller should try again...\n");
+               return -EAGAIN;
+       }
+
+       /* grab ticket */
+       ticket = fetch_and_add(&MCASTWIN(id)->head, 1);
+       MCAST_PRINTK("ticket = %lu, head = %lu, tail = %lu\n",
+                    ticket, MCASTWIN(id)->head, MCASTWIN(id)->tail);
+
+       /* spin until there's a spot free for me */
+       while (mcastwin_inuse(id) >= RB_SIZE) {
+               if (unlikely(time_after(jiffies, time_limit))) {
+                       MCAST_PRINTK("spinning too long to wait for window to be free; this is bad!\n");
+                       return -1;
+               }
+       }
+
+       /* insert item */
+       memcpy(&MCASTWIN(id)->buffer[ticket & RB_MASK].payload, 
+              &msg->payload, PCN_KMSG_PAYLOAD_SIZE);
+
+       memcpy(&MCASTWIN(id)->buffer[ticket & RB_MASK].hdr, 
+              &msg->hdr, sizeof(struct pcn_kmsg_hdr));
+
+       /* set counter to (# in group - self) */
+
+       /*
+       int x;
+
+       if ((x = atomic_read(&MCASTWIN(id)->read_counter[ticket & RB_MASK]))) {
+               KMSG_ERR("read counter is not zero (it's %d)\n", x);
+               return -1;
+       }
+       */
+
+       atomic_set(&MCASTWIN(id)->read_counter[ticket & RB_MASK],
+               rkinfo->mcast_wininfo[id].num_members - 1);
+
+       MCAST_PRINTK("set counter to %d\n", 
+                    rkinfo->mcast_wininfo[id].num_members - 1);
+
+       pcn_barrier();
+
+       /* set completed flag */
+       MCASTWIN(id)->buffer[ticket & RB_MASK].ready = 1;
+
+       return 0;
+}
+
+static inline int mcastwin_get(pcn_kmsg_mcast_id id,
+                              struct pcn_kmsg_reverse_message **msg)
+{
+       volatile struct pcn_kmsg_reverse_message *rcvd;
+
+       MCAST_PRINTK("called for id %lu, head %lu, tail %lu, local_tail %lu\n", 
+                    id, MCASTWIN(id)->head, MCASTWIN(id)->tail, 
+                    LOCAL_TAIL(id));
+
+retry:
+
+       /* if we sent a bunch of messages, it's possible our local_tail
+          has gotten behind the global tail and we need to update it */
+       /* TODO -- atomicity concerns here? */
+       if (LOCAL_TAIL(id) < MCASTWIN(id)->tail) {
+               LOCAL_TAIL(id) = MCASTWIN(id)->tail;
+       }
+
+       if (MCASTWIN(id)->head == LOCAL_TAIL(id)) {
+               MCAST_PRINTK("nothing in buffer, returning...\n");
+               return -1;
+       }
+
+       /* spin until entry.ready at end of cache line is set */
+       rcvd = &(MCASTWIN(id)->buffer[LOCAL_TAIL(id) & RB_MASK]);
+       while (!rcvd->ready) {
+               pcn_cpu_relax();
+       }
+
+       // barrier here?
+       pcn_barrier();
+
+       /* we can't step on our own messages! */
+       if (rcvd->hdr.from_cpu == my_cpu) {
+               LOCAL_TAIL(id)++;
+               goto retry;
+       }
+
+       *msg = rcvd;
+
+       return 0;
+}
+
 static inline int atomic_add_return_sync(int i, atomic_t *v)
 {
        return i + xadd_sync(&v->counter, i);
@@ -287,12 +382,44 @@ static inline int atomic_dec_and_test_sync(atomic_t *v)
        return c != 0;
 }
 
+static inline void mcastwin_advance_tail(pcn_kmsg_mcast_id id)
+{
+       unsigned long slot = LOCAL_TAIL(id) & RB_MASK;
+       //int val_ret, i;
+       //char printstr[256];
+       //char intstr[16];
+
+       MCAST_PRINTK("local tail currently on slot %lu, read counter %d\n", 
+                    LOCAL_TAIL(id), atomic_read(&MCASTWIN(id)->read_counter[slot]));
+
+       /*
+       memset(printstr, 0, 256);
+       memset(intstr, 0, 16);
+
+       for (i = 0; i < 64; i++) {
+               sprintf(intstr, "%d ", atomic_read(&MCASTWIN(id)->read_counter[i]));
+               strcat(printstr, intstr);
+       }
+
+       MCAST_PRINTK("read_counter: %s\n", printstr);
+
+       val_ret = atomic_add_return_sync(-1, &MCASTWIN(id)->read_counter[slot]);
+
+       MCAST_PRINTK("read counter after: %d\n", val_ret);
+       */
 
+       if (atomic_dec_and_test_sync(&MCASTWIN(id)->read_counter[slot])) {
+               MCAST_PRINTK("we're the last reader to go; ++ global tail\n");
+               MCASTWIN(id)->buffer[slot].ready = 0;
+               atomic64_inc((atomic64_t *) &MCASTWIN(id)->tail);
+       }
+
+       LOCAL_TAIL(id)++;
+}
 
 /* INITIALIZATION */
-#ifdef PCN_SUPPORT_MULTICAST
+
 static int pcn_kmsg_mcast_callback(struct pcn_kmsg_message *message);
-#endif /* PCN_SUPPORT_MULTICAST */
 
 static void map_msg_win(pcn_kmsg_work_t *w)
 {
@@ -304,8 +431,7 @@ static void map_msg_win(pcn_kmsg_work_t *w)
        }
 
        rkvirt[cpu] = ioremap_cache(rkinfo->phys_addr[cpu],
-                                 ROUND_PAGE_SIZE(sizeof(struct pcn_kmsg_window)));
-
+                                   sizeof(struct pcn_kmsg_window));
        if (rkvirt[cpu]) {
                KMSG_INIT("ioremapped window, virt addr 0x%p\n", 
                          rkvirt[cpu]);
@@ -315,6 +441,28 @@ static void map_msg_win(pcn_kmsg_work_t *w)
        }
 }
 
+static void map_mcast_win(pcn_kmsg_work_t *w)
+{
+       pcn_kmsg_mcast_id id = w->id_to_join;
+
+       /* map window */
+       if (id < 0 || id > POPCORN_MAX_MCAST_CHANNELS) {
+               KMSG_ERR("%s: invalid mcast channel id %lu specified!\n",
+                        __func__, id);
+               return;
+       }
+
+       MCASTWIN(id) = ioremap_cache(rkinfo->mcast_wininfo[id].phys_addr,
+                                    sizeof(struct pcn_kmsg_mcast_window));
+       if (MCASTWIN(id)) {
+               MCAST_PRINTK("ioremapped mcast window, virt addr 0x%p\n",
+                            MCASTWIN(id));
+       } else {
+               KMSG_ERR("Failed to map mcast window %lu at phys addr 0x%lx\n",
+                        id, rkinfo->mcast_wininfo[id].phys_addr);
+       }
+}
+
 /* bottom half for workqueue */
 static void process_kmsg_wq_item(struct work_struct * work)
 {
@@ -332,7 +480,6 @@ static void process_kmsg_wq_item(struct work_struct * work)
                                 __func__);
                        break;
 
-#ifdef PCN_SUPPORT_MULTICAST
                case PCN_KMSG_WQ_OP_MAP_MCAST_WIN:
                        map_mcast_win(w);
                        break;
@@ -340,7 +487,6 @@ static void process_kmsg_wq_item(struct work_struct * work)
                case PCN_KMSG_WQ_OP_UNMAP_MCAST_WIN:
                        KMSG_ERR("UNMAP_MCAST_WIN not yet implemented!\n");
                        break;
-#endif /* PCN_SUPPORT_MULTICAST */
 
                default:
                        KMSG_ERR("Invalid work queue operation %d\n", w->op);
@@ -402,17 +548,33 @@ static inline int pcn_kmsg_window_init(struct pcn_kmsg_window *window)
 {
        window->head = 0;
        window->tail = 0;
+       window->int_enabled = 1;
        //memset(&window->buffer, 0,
             //  PCN_KMSG_RBUF_SIZE * sizeof(struct pcn_kmsg_reverse_message));
        int i;
        for(i=0;i<PCN_KMSG_RBUF_SIZE;i++){
                window->buffer[i].last_ticket=i-PCN_KMSG_RBUF_SIZE;
-               window->buffer[i].ready=0;
        }
        memset(&window->second_buffer, 0,
                       PCN_KMSG_RBUF_SIZE * sizeof(int));
+       return 0;
+}
 
-       window->int_enabled = 1;
+static inline int pcn_kmsg_mcast_window_init(struct pcn_kmsg_mcast_window *win)
+{
+       int i;
+
+       win->head = 0;
+       win->tail = 0;
+
+       for (i = 0; i < PCN_KMSG_RBUF_SIZE; i++) {
+               atomic_set(&win->read_counter[i], 0);
+       }
+
+       //memset(&win->read_counter, 0, 
+       //       PCN_KMSG_RBUF_SIZE * sizeof(int));
+       memset(&win->buffer, 0,
+              PCN_KMSG_RBUF_SIZE * sizeof(struct pcn_kmsg_message));
        return 0;
 }
 
@@ -450,8 +612,7 @@ static int do_checkin(void)
 
                if (rkinfo->phys_addr[i]) {
                        rkvirt[i] = ioremap_cache(rkinfo->phys_addr[i],
-                                                 ROUND_PAGE_SIZE(sizeof(struct pcn_kmsg_window)));
-
+                                                 sizeof(struct pcn_kmsg_window));
                        if (rkvirt[i]) {
                                KMSG_INIT("ioremapped CPU %d's window, virt addr 0x%p\n", 
                                          i, rkvirt[i]);
@@ -478,14 +639,9 @@ static int pcn_read_proc(char *page, char **start, off_t off, int count, int *eo
        char *p= page;
     int len, i, idx;
 
-       p += sprintf(p, "get: %ld\n", msg_get);
-        p += sprintf(p, "put: %ld\n", msg_put);
-    for (i =0; i<(SIZE_RANGES +1); i++)
-      p +=sprintf (p,"%u: %u\n", large_message_sizes[i], large_message_count[i]);
-    
-    for (i =0; i<PCN_KMSG_TYPE_MAX; i++)
-      p +=sprintf (p, "t%u: %u\n", i, type_message_count[i]);
-    
+       p += sprintf(p, "messages get: %ld\n", msg_get);
+        p += sprintf(p, "messages put: %ld\n", msg_put);
+
     idx = log_r_index;
     for (i =0; i>-LOGLEN; i--)
        p +=sprintf (p,"r%d: from%d type%d %1d:%1d:%1d seq%d\n",
@@ -499,7 +655,7 @@ static int pcn_read_proc(char *page, char **start, off_t off, int count, int *eo
                        (idx+i),(int) log_send[(idx+i)%LOGLEN].from_cpu, (int)log_send[(idx+i)%LOGLEN].type,
                        (int) log_send[(idx+i)%LOGLEN].is_lg_msg, (int)log_send[(idx+i)%LOGLEN].lg_start,
                        (int) log_send[(idx+i)%LOGLEN].lg_end, (int) log_send[(idx+i)%LOGLEN].lg_seqnum );
-/*
+
     idx = log_f_index;
         for (i =0; i>-LOGCALL; i--)
                p +=sprintf (p,"f%d: %pB\n",
@@ -512,7 +668,7 @@ static int pcn_read_proc(char *page, char **start, off_t off, int count, int *eo
 
         for(i=0; i<PCN_KMSG_RBUF_SIZE; i++)
                p +=sprintf (p,"second_buffer[%i]=%i\n",i,rkvirt[my_cpu]->second_buffer[i]);
-*/
+
 
        len = (p -page) - off;
        if (len < 0)
@@ -522,35 +678,18 @@ static int pcn_read_proc(char *page, char **start, off_t off, int count, int *eo
        return len;
 }
 
-static int pcn_write_proc (struct file *file, const char __user *buffer, unsigned long count, void *data)
-{
-  int i;
-       msg_get=0;
-       msg_put=0;
-       memset(large_message_count, 0, sizeof(int)*(SIZE_RANGES +1));
-       memset(large_message_sizes, 0, sizeof(int)*(SIZE_RANGES +1));
-       for (i=0; i<SIZE_RANGES; i++)
-         large_message_sizes[i] = ((i+1)*PCN_KMSG_PAYLOAD_SIZE);
-       large_message_sizes[SIZE_RANGES] = ~0;
-       memset(type_message_count, 0, sizeof(int)*PCN_KMSG_TYPE_MAX);
-       return count;
-}
-
 static int __init pcn_kmsg_init(void)
 {
        int rc,i;
        unsigned long win_phys_addr, rkinfo_phys_addr;
        struct pcn_kmsg_window *win_virt_addr;
-       struct boot_params *boot_params_va;
+       struct boot_params * boot_params_va;
 
        KMSG_INIT("entered\n");
 
        my_cpu = raw_smp_processor_id();
        
-       printk("%s: THIS VERSION DOES NOT SUPPORT CACHE ALIGNED BUFFERS\n",
-              __func__);
-       printk("%s: Entered pcn_kmsg_init raw: %d id: %d\n",
-               __func__, my_cpu, smp_processor_id());
+       printk("%s: Entered pcn_kmsg_init raw: %d id: %d\n", __func__, my_cpu, smp_processor_id());
 
        /* Initialize list heads */
        INIT_LIST_HEAD(&msglist_hiprio);
@@ -558,7 +697,7 @@ static int __init pcn_kmsg_init(void)
 
        /* Clear out large-message receive buffers */
        //memset(&lg_buf, 0, POPCORN_MAX_CPUS * sizeof(unsigned char *));
-       for(i=0; i<POPCORN_MAX_CPUS; i++) {
+       for(i=0;i<PCN_KMSG_RBUF_SIZE;i++){
                INIT_LIST_HEAD(&(lg_buf[i]));
        }
        long_id=0;
@@ -570,96 +709,80 @@ static int __init pcn_kmsg_init(void)
        rc = pcn_kmsg_register_callback(PCN_KMSG_TYPE_CHECKIN, 
                                        &pcn_kmsg_checkin_callback);
        if (rc) {
-               printk(KERN_ALERT"Failed to register initial kmsg checkin callback!\n");
+               KMSG_ERR("Failed to register initial kmsg checkin callback!\n");
        }
 
-#ifdef PCN_SUPPORT_MULTICAST
        rc = pcn_kmsg_register_callback(PCN_KMSG_TYPE_MCAST, 
                                        &pcn_kmsg_mcast_callback);
        if (rc) {
-               printk(KERN_ALERT"Failed to register initial kmsg mcast callback!\n");
+               KMSG_ERR("Failed to register initial kmsg mcast callback!\n");
        }
-#endif /* PCN_SUPPORT_MULTICAST */     
 
-       /* Register softirq handler now kworker */
+       /* Register softirq handler */
        KMSG_INIT("Registering softirq handler...\n");
        //open_softirq(PCN_KMSG_SOFTIRQ, pcn_kmsg_action);
-       messaging_wq= create_workqueue("messaging_wq");
-       if (!messaging_wq) 
-               printk("%s: create_workqueue(messaging_wq) ret 0x%lx ERROR\n",
-                       __func__, (unsigned long)messaging_wq);
+       messaging_wq= create_singlethread_workqueue("messaging_wq");
 
        /* Initialize work queue */
        KMSG_INIT("Initializing workqueue...\n");
-       kmsg_wq = create_workqueue("kmsg_wq");
-       if (!kmsg_wq)
-               printk("%s: create_workqueue(kmsg_wq) ret 0x%lx ERROR\n",
-                       __func__, (unsigned long)kmsg_wq);
+       kmsg_wq = create_singlethread_workqueue("kmsg_wq");
 
-               
        /* If we're the master kernel, malloc and map the rkinfo structure and 
           put its physical address in boot_params; otherwise, get it from the 
           boot_params and map it */
        if (!mklinux_boot) {
-               /* rkinfo must be multiple of a page, because the granularity of
-                * foreings mapping is per page. The following didn't worked,
-                * the returned address is on the form 0xffff88000000, ioremap
-                * on the remote fails. 
-               int order = get_order(sizeof(struct pcn_kmsg_rkinfo));
-               rkinfo = __get_free_pages(GFP_KERNEL, order);
-               */
-               KMSG_INIT("Primary kernel, mallocing rkinfo size:%d rounded:%d\n",
-                      sizeof(struct pcn_kmsg_rkinfo), ROUND_PAGE_SIZE(sizeof(struct pcn_kmsg_rkinfo)));
-               rkinfo = kmalloc(ROUND_PAGE_SIZE(sizeof(struct pcn_kmsg_rkinfo)), GFP_KERNEL);
+               KMSG_INIT("We're the master; mallocing rkinfo...\n");
+               rkinfo = kmalloc(sizeof(struct pcn_kmsg_rkinfo), GFP_KERNEL);
+
                if (!rkinfo) {
                        KMSG_ERR("Failed to malloc rkinfo structure!\n");
                        return -1;
                }
-               memset(rkinfo, 0x0, sizeof(struct pcn_kmsg_rkinfo));
+
                rkinfo_phys_addr = virt_to_phys(rkinfo);
-               KMSG_INIT("rkinfo virt %p, phys 0x%lx MAX_CPUS %d\n", 
-                         rkinfo, rkinfo_phys_addr, POPCORN_MAX_CPUS);
 
+               KMSG_INIT("rkinfo virt addr 0x%p, phys addr 0x%lx\n", 
+                         rkinfo, rkinfo_phys_addr);
+
+               memset(rkinfo, 0x0, sizeof(struct pcn_kmsg_rkinfo));
+
+               KMSG_INIT("Setting boot_params...\n");
                /* Otherwise, we need to set the boot_params to show the rest
                   of the kernels where the master kernel's messaging window 
                   is. */
-               KMSG_INIT("Setting boot_params...\n");
                boot_params_va = (struct boot_params *) 
                        (0xffffffff80000000 + orig_boot_params);
+               KMSG_INIT("Boot params virt addr: 0x%p\n", boot_params_va);
                boot_params_va->pcn_kmsg_master_window = rkinfo_phys_addr;
-               KMSG_INIT("boot_params virt %p phys %p\n",
-                       boot_params_va, orig_boot_params);
-               
-               KMSG_INIT("LOOPS %ld, LOOPS_JIFFIES %ld, 1nsec %ld, 1usec %ld, 1msec %ld, 1jiffies %d %d\n",
-                         MAX_LOOPS, MAX_LOOPS_JIFFIES, nsecs_to_jiffies(1), usecs_to_jiffies(1), msecs_to_jiffies(1),
-                         jiffies_to_msecs(1), jiffies_to_usecs(1));
-       }
-       else {
-               KMSG_INIT("Primary kernel rkinfo phys addr: 0x%lx\n", 
+       } else {
+               KMSG_INIT("Master kernel rkinfo phys addr: 0x%lx\n", 
                          (unsigned long) boot_params.pcn_kmsg_master_window);
+
                rkinfo_phys_addr = boot_params.pcn_kmsg_master_window;
-               
-               rkinfo = ioremap_cache(rkinfo_phys_addr, ROUND_PAGE_SIZE(sizeof(struct pcn_kmsg_rkinfo)));
+               rkinfo = ioremap_cache(rkinfo_phys_addr, 
+                                      sizeof(struct pcn_kmsg_rkinfo));
+
                if (!rkinfo) {
                        KMSG_ERR("Failed to map rkinfo from master kernel!\n");
                }
+
                KMSG_INIT("rkinfo virt addr: 0x%p\n", rkinfo);
        }
 
        /* Malloc our own receive buffer and set it up */
-       win_virt_addr = kmalloc(ROUND_PAGE_SIZE(sizeof(struct pcn_kmsg_window)), GFP_KERNEL);
+       win_virt_addr = kmalloc(sizeof(struct pcn_kmsg_window), GFP_KERNEL);
+
        if (win_virt_addr) {
-               KMSG_INIT("Allocated %ld(%ld) bytes for my win, virt addr 0x%p\n", 
-                         ROUND_PAGE_SIZE(sizeof(struct pcn_kmsg_window)),
+               KMSG_INIT("Allocated %ld bytes for my win, virt addr 0x%p\n", 
                          sizeof(struct pcn_kmsg_window), win_virt_addr);
        } else {
-               KMSG_ERR("%s: Failed to kmalloc kmsg recv window!\n", __func__);
+               KMSG_ERR("Failed to kmalloc kmsg recv window!\n");
                return -1;
        }
 
        rkvirt[my_cpu] = win_virt_addr;
        win_phys_addr = virt_to_phys((void *) win_virt_addr);
-       KMSG_INIT("cpu %d physical address: 0x%lx\n", my_cpu, win_phys_addr);
+       KMSG_INIT("Physical address: 0x%lx\n", win_phys_addr);
        rkinfo->phys_addr[my_cpu] = win_phys_addr;
 
        rc = pcn_kmsg_window_init(rkvirt[my_cpu]);
@@ -678,14 +801,6 @@ static int __init pcn_kmsg_init(void)
                }
        } 
 
-       /* proc interface for debugging and stats */
-       memset(large_message_count, 0, sizeof(int)*(SIZE_RANGES +1));
-       memset(large_message_sizes, 0, sizeof(int)*(SIZE_RANGES +1));
-       for (i=0; i<SIZE_RANGES; i++)
-         large_message_sizes[i] = ((i+1)*PCN_KMSG_PAYLOAD_SIZE);
-       large_message_sizes[SIZE_RANGES] = ~0;
-       memset(type_message_count, 0, sizeof(int)*PCN_KMSG_TYPE_MAX);
-
        memset(log_receive,0,sizeof(struct pcn_kmsg_hdr)*LOGLEN);
        memset(log_send,0,sizeof(struct pcn_kmsg_hdr)*LOGLEN);
        memset(log_function_called,0,sizeof(void*)*LOGCALL);
@@ -694,11 +809,12 @@ static int __init pcn_kmsg_init(void)
        struct proc_dir_entry *res;
        res = create_proc_entry("pcnmsg", S_IRUGO, NULL);
        if (!res) {
-               printk(KERN_ALERT"%s: create_proc_entry failed (%p)\n", __func__, res);
+               printk("%s: create_proc_entry failed (%p)\n", __func__, res);
                return -ENOMEM;
        }
        res->read_proc = pcn_read_proc;
-       res->write_proc = pcn_write_proc;
+
+
 
        return 0;
 }
@@ -711,7 +827,7 @@ int pcn_kmsg_register_callback(enum pcn_kmsg_type type, pcn_kmsg_cbftn callback)
        PCN_WARN("%s: registering callback for type %d, ptr 0x%p\n", __func__, type, callback);
 
        if (type >= PCN_KMSG_TYPE_MAX) {
-               printk(KERN_ALERT"Attempted to register callback with bad type %d\n", 
+               KMSG_ERR("Attempted to register callback with bad type %d\n", 
                         type);
                return -1;
        }
@@ -766,7 +882,6 @@ static int __pcn_kmsg_send(unsigned int dest_cpu, struct pcn_kmsg_message *msg,
        msg->hdr.from_cpu = my_cpu;
 
        rc = win_put(dest_window, msg, no_block);
-type_message_count[msg->hdr.type]++;
 
        if (rc) {
                if (no_block && (rc == EAGAIN)) {
@@ -821,23 +936,30 @@ int pcn_kmsg_send_long(unsigned int dest_cpu,
                       struct pcn_kmsg_long_message *lmsg, 
                       unsigned int payload_size)
 {
-       int i, ret =0;
+       int i;
        int num_chunks = payload_size / PCN_KMSG_PAYLOAD_SIZE;
        struct pcn_kmsg_message this_chunk;
+       //char test_buf[15];
+
+       /*mklinux_akshay*/
+       int ret=0;
+       /*mklinux_akshay*/
 
        if (payload_size % PCN_KMSG_PAYLOAD_SIZE) {
                num_chunks++;
        }
 
-        if ( num_chunks >= MAX_CHUNKS ){
+        if ( num_chunks >= ((1<<LG_SEQNUM_SIZE)-1) ){
                 KMSG_PRINTK("Message too long (size:%d, chunks:%d, max:%d) can not be transferred\n",
-                       payload_size, num_chunks, MAX_CHUNKS);
+                       payload_size, num_chunks, ((1 << LG_SEQNUM_SIZE)-1));
                return -1;
         }
 
        KMSG_PRINTK("Sending large message to CPU %d, type %d, payload size %d bytes, %d chunks\n", 
                    dest_cpu, lmsg->hdr.type, payload_size, num_chunks);
 
+
+
        this_chunk.hdr.type = lmsg->hdr.type;
        this_chunk.hdr.prio = lmsg->hdr.prio;
        this_chunk.hdr.is_lg_msg = 1;
@@ -859,10 +981,6 @@ int pcn_kmsg_send_long(unsigned int dest_cpu,
                if(ret!=0)
                        return ret;
        }
-       
-       /* statistics */
-       num_chunks = payload_size / PCN_KMSG_PAYLOAD_SIZE;
-       large_message_count[((num_chunks < SIZE_RANGES) ? num_chunks : SIZE_RANGES)]++;
 
        return 0;
 }
@@ -983,6 +1101,7 @@ static int process_large_message(struct pcn_kmsg_reverse_message *msg)
        int work_done = 0;
        struct pcn_kmsg_container* container_long=NULL, *n=NULL;
 
+
        KMSG_PRINTK("Got a large message fragment, type %u, from_cpu %u, start %u, end %u, seqnum %u!\n",
                    msg->hdr.type, msg->hdr.from_cpu,
                    msg->hdr.lg_start, msg->hdr.lg_end,
@@ -991,34 +1110,30 @@ static int process_large_message(struct pcn_kmsg_reverse_message *msg)
        if (msg->hdr.lg_start) {
                KMSG_PRINTK("Processing initial message fragment...\n");
 
-               if (!msg->hdr.lg_seqnum)
-                 printk(KERN_ALERT"%s: ERROR lg_seqnum is zero:%d long_number:%ld\n",
-                     __func__, (int)msg->hdr.lg_seqnum, (long)msg->hdr.long_number);
-                 
-               // calculate the size of the holding buffer
                recv_buf_size = sizeof(struct list_head) + 
                        sizeof(struct pcn_kmsg_hdr) + 
                        msg->hdr.lg_seqnum * PCN_KMSG_PAYLOAD_SIZE;
-#undef BEN_VERSION
-#ifdef BEN_VERSION             
-               lg_buf[msg->hdr.from_cpu] = kmalloc(recv_buf_size, GFP_ATOMIC);
+
+               /*lg_buf[msg->hdr.from_cpu] = kmalloc(recv_buf_size, GFP_ATOMIC);
                if (!lg_buf[msg->hdr.from_cpu]) {
                                        KMSG_ERR("Unable to kmalloc buffer for incoming message!\n");
                                        goto out;
                                }
                lmsg = (struct pcn_kmsg_long_message *) &lg_buf[msg->hdr.from_cpu]->msg;
-#else /* BEN_VERSION */
+               */
+
                container_long= kmalloc(recv_buf_size, GFP_ATOMIC);
                if (!container_long) {
-                       KMSG_ERR("Unable to kmalloc buffer for incoming message!\n");
-                       goto out;
+                                       KMSG_ERR("Unable to kmalloc buffer for incoming message!\n");
+                                       goto out;
                }
-               lmsg = (struct pcn_kmsg_long_message *) &container_long->msg; //TODO wrong cast!
-#endif /* !BEN_VERSION */
+
+               lmsg = (struct pcn_kmsg_long_message *) &container_long->msg;
 
                /* copy header first */
                memcpy((unsigned char *) &lmsg->hdr, 
                       &msg->hdr, sizeof(struct pcn_kmsg_hdr));
+
                /* copy first chunk of message */
                memcpy((unsigned char *) &lmsg->payload,
                       &msg->payload, PCN_KMSG_PAYLOAD_SIZE);
@@ -1027,59 +1142,67 @@ static int process_large_message(struct pcn_kmsg_reverse_message *msg)
                        KMSG_PRINTK("NOTE: Long message of length 1 received; this isn't efficient!\n");
 
                        /* add to appropriate list */
-#ifdef BEN_VERSION                     
-                       rc = msg_add_list(lg_buf[msg->hdr.from_cpu]);
-#else /* BEN_VERSION */
+
+                       //rc = msg_add_list(lg_buf[msg->hdr.from_cpu]);
                        rc = msg_add_list(container_long);
-#endif /* !BEN_VERSION */
-                       if (rc)
-                               KMSG_ERR("Failed to add large message to list!\n");
                        work_done = 1;
+
+                       if (rc) {
+                               KMSG_ERR("Failed to add large message to list!\n");
+                       }
                }
-#ifndef BEN_VERSION            
-               else
-                 // add the message in the lg_buf
-                 list_add_tail(&container_long->list, &lg_buf[msg->hdr.from_cpu]);
-#endif /* !BEN_VERSION */
-       }
-       else {
+               else{
+
+                       list_add_tail(&container_long->list,&lg_buf[msg->hdr.from_cpu]);
+
+               }
+       } else {
+
                KMSG_PRINTK("Processing subsequent message fragment...\n");
 
+
                //It should not be needed safe
                list_for_each_entry_safe(container_long, n, &lg_buf[msg->hdr.from_cpu], list) {
-                       if ( (container_long != NULL) &&
-                         (container_long->msg.hdr.long_number == msg->hdr.long_number) )
-                               // found!
+
+                       if(container_long!=NULL && container_long->msg.hdr.long_number==msg->hdr.long_number)
                                goto next;
+
                }
 
-               KMSG_ERR("Failed to find long message %lu in the list of cpu %i!\n",
-                        msg->hdr.long_number, msg->hdr.from_cpu);
+               KMSG_ERR("Failed to find long message %lu in the list of cpu %i!\n",msg->hdr.long_number,msg->hdr.from_cpu);
                goto out;
 
-next:          
-               lmsg = (struct pcn_kmsg_long_message *) &container_long->msg;
-               memcpy((unsigned char *) ((void*)&lmsg->payload) + (PCN_KMSG_PAYLOAD_SIZE * msg->hdr.lg_seqnum),
+next:          lmsg = (struct pcn_kmsg_long_message *) &container_long->msg;
+
+               memcpy((unsigned char *) &lmsg->payload + PCN_KMSG_PAYLOAD_SIZE * msg->hdr.lg_seqnum,
                       &msg->payload, PCN_KMSG_PAYLOAD_SIZE);
 
                if (msg->hdr.lg_end) {
                        KMSG_PRINTK("Last fragment in series...\n");
+
                        KMSG_PRINTK("from_cpu %d, type %d, prio %d\n",
-                                   lmsg->hdr.from_cpu, lmsg->hdr.type, lmsg->hdr.prio);
-                       /* add to appropriate list */
-#ifdef BEN_VERSION
-                       rc = msg_add_list(lg_buf[msg->hdr.from_cpu]);
-#else /* BEN_VERSION */
+                                   lmsg->hdr.from_cpu,
+                                   lmsg->hdr.type,
+                                   lmsg->hdr.prio);
+
+
+                       //rc = msg_add_list(lg_buf[msg->hdr.from_cpu]);
+
                        list_del(&container_long->list);
+
+                       /* add to appropriate list */
                        rc = msg_add_list(container_long);
-#endif /* !BEN_VERSION */                      
-                       if (rc)
-                               KMSG_ERR("Failed to add large message to list!\n");
+
                        work_done = 1;
+
+                       if (rc) {
+                               KMSG_ERR("Failed to add large message to list!\n");
+                       }
                }
        }
 
 out:
+
        return work_done;
 }
 
@@ -1111,19 +1234,39 @@ static int process_small_message(struct pcn_kmsg_reverse_message *msg)
        return work_done;
 }
 
-static int poll_handler_check=0;
+static void process_mcast_queue(pcn_kmsg_mcast_id id)
+{
+       struct pcn_kmsg_reverse_message *msg;
+       while (!mcastwin_get(id, &msg)) {
+               MCAST_PRINTK("Got an mcast message, type %d!\n",
+                            msg->hdr.type);
+
+               /* Special processing for large messages */
+                if (msg->hdr.is_lg_msg) {
+                        MCAST_PRINTK("message is a large message!\n");
+                        process_large_message(msg);
+                } else {
+                        MCAST_PRINTK("message is a small message!\n");
+                        process_small_message(msg);
+                }
+
+               mcastwin_advance_tail(id);
+       }
+
+}
+
 static int pcn_kmsg_poll_handler(void)
 {
        struct pcn_kmsg_reverse_message *msg;
        struct pcn_kmsg_window *win = rkvirt[my_cpu]; // TODO this will not work for clustering
        int work_done = 0;
 
-       poll_handler_check++;
-       if (poll_handler_check >1)
-               printk("poll_hanlder_check %d concurrent calls not supported\n", poll_handler_check);
+       KMSG_PRINTK("called\n");
 
 pull_msg:
        /* Get messages out of the buffer first */
+//#define PCN_KMSG_BUDGET 128
+       //while ((work_done < PCN_KMSG_BUDGET) && (!win_get(rkvirt[my_cpu], &msg))) {
        while (! win_get(win, &msg) ) {
                KMSG_PRINTK("got a message!\n");
 
@@ -1135,16 +1278,10 @@ pull_msg:
                        KMSG_PRINTK("message is a small message!\n");
                        work_done += process_small_message(msg);
                }
-
-               msg->ready = 0;
                pcn_barrier();
-               
-               fetch_and_add(&win->tail, 1); //win_advance_tail(win);
-               
-               // NOTE
-               // why you need the ready bit if you are incrementing the tail?
-               // can we increment the tail before signaling the ready bit?
-               // in that way we can support multiple calls to this function
+               msg->ready = 0;
+               //win_advance_tail(win);
+               fetch_and_add(&win->tail, 1);
        }
 
        win_enable_int(win);
@@ -1153,15 +1290,13 @@ pull_msg:
                goto pull_msg;
        }
 
-       poll_handler_check--;
        return work_done;
 }
 
 unsigned volatile long bh_ts = 0, bh_ts_2 = 0;
 
-// NOTE the following was declared as a bottom half
-//static void pcn_kmsg_action(struct softirq_action *h)
-static void pcn_kmsg_action(struct work_struct* work)
+/* bottom half */
+static void pcn_kmsg_action(/*struct softirq_action *h*/struct work_struct* work)
 {
        int rc;
        int i;
@@ -1170,20 +1305,20 @@ static void pcn_kmsg_action(struct work_struct* work)
        //if (!bh_ts) {
                rdtscll(bh_ts);
        //}
+
        KMSG_PRINTK("called\n");
 
        work_done = pcn_kmsg_poll_handler();
        KMSG_PRINTK("Handler did %d units of work!\n", work_done);
 
-#ifdef PCN_SUPPORT_MULTICAST   
        for (i = 0; i < POPCORN_MAX_MCAST_CHANNELS; i++) {
                if (MCASTWIN(i)) {
                        KMSG_PRINTK("mcast win %d mapped, processing it\n", i);
                        process_mcast_queue(i);
                }
        }
+
        KMSG_PRINTK("Done checking mcast queues; processing messages\n");
-#endif /* PCN_SUPPORT_MULTICAST */
 
        //if (!bh_ts_2) {
                rdtscll(bh_ts_2);
@@ -1204,6 +1339,406 @@ static void pcn_kmsg_action(struct work_struct* work)
        return;
 }
 
-#ifdef PCN_SUPPORT_MULTICAST
-# include "pcn_kmsg_mcast.h"
-#endif /* PCN_SUPPORT_MULTICAST */
+/* MULTICAST */
+
+inline void lock_chan(pcn_kmsg_mcast_id id)
+{
+
+}
+
+inline void unlock_chan(pcn_kmsg_mcast_id id)
+{
+
+}
+
+inline int count_members(unsigned long mask)
+{
+       int i, count = 0;
+
+       for (i = 0; i < POPCORN_MAX_CPUS; i++) {
+               if (mask & (1ULL << i)) {
+                       count++;
+               }
+       }
+
+       return count;
+}
+
+void print_mcast_map(void)
+{
+#if MCAST_VERBOSE
+       int i;
+
+       printk("ACTIVE MCAST GROUPS:\n");
+
+       for (i = 0; i < POPCORN_MAX_MCAST_CHANNELS; i++) {
+               if (rkinfo->mcast_wininfo[i].mask) {
+                       printk("group %d, mask 0x%lx, num_members %d\n", 
+                              i, rkinfo->mcast_wininfo[i].mask, 
+                              rkinfo->mcast_wininfo[i].num_members);
+               }
+       }
+       return;
+#endif
+}
+
+/* Open a multicast group containing the CPUs specified in the mask. */
+int pcn_kmsg_mcast_open(pcn_kmsg_mcast_id *id, unsigned long mask)
+{
+       int rc, i, found_id;
+       struct pcn_kmsg_mcast_message msg;
+       struct pcn_kmsg_mcast_wininfo *slot;
+       struct pcn_kmsg_mcast_window * new_win;
+
+       MCAST_PRINTK("Reached pcn_kmsg_mcast_open, mask 0x%lx\n", mask);
+
+       if (!(mask & (1 << my_cpu))) {
+               KMSG_ERR("This CPU is not a member of the mcast group to be created, cpu %d, mask 0x%lx\n",
+                        my_cpu, mask);
+               return -1;
+       }
+
+       /* find first unused channel */
+retry:
+       found_id = -1;
+
+       for (i = 0; i < POPCORN_MAX_MCAST_CHANNELS; i++) {
+               if (!rkinfo->mcast_wininfo[i].num_members) {
+                       found_id = i;
+                       break;
+               }
+       }
+
+       MCAST_PRINTK("Found channel ID %d\n", found_id);
+
+       if (found_id == -1) {
+               KMSG_ERR("No free multicast channels!\n");
+               return -1;
+       }
+
+       /* lock and check if channel is still unused; 
+          otherwise, try again */
+       lock_chan(found_id);
+
+       if (rkinfo->mcast_wininfo[i].num_members) {
+               unlock_chan(found_id);
+               MCAST_PRINTK("Got scooped; trying again...\n");
+               goto retry;
+       }
+
+       /* set slot info */
+       slot = &rkinfo->mcast_wininfo[found_id];
+       slot->mask = mask;
+       slot->num_members = count_members(mask);
+       slot->owner_cpu = my_cpu;
+
+       MCAST_PRINTK("Found %d members\n", slot->num_members);
+
+       /* kmalloc window for slot */
+       new_win = kmalloc(sizeof(struct pcn_kmsg_mcast_window), GFP_ATOMIC);
+
+       if (!new_win) {
+               KMSG_ERR("Failed to kmalloc mcast buffer!\n");
+               goto out;
+       }
+
+       /* zero out window */
+       memset(new_win, 0x0, sizeof(struct pcn_kmsg_mcast_window));
+
+       MCASTWIN(found_id) = new_win;
+       slot->phys_addr = virt_to_phys(new_win);
+       MCAST_PRINTK("Malloced mcast receive window %d at phys addr 0x%lx\n",
+                    found_id, slot->phys_addr);
+
+       /* send message to each member except self.  Can't use mcast yet because
+          group is not yet established, so unicast to each CPU in mask. */
+       msg.hdr.type = PCN_KMSG_TYPE_MCAST;
+       msg.hdr.prio = PCN_KMSG_PRIO_HIGH;
+       msg.type = PCN_KMSG_MCAST_OPEN;
+       msg.id = found_id;
+       msg.mask = mask;
+       msg.num_members = slot->num_members;
+
+       for (i = 0; i < POPCORN_MAX_CPUS; i++) {
+               if ((slot->mask & (1ULL << i)) && 
+                   (my_cpu != i)) {
+                       MCAST_PRINTK("Sending message to CPU %d\n", i);
+
+                       rc = pcn_kmsg_send(i, (struct pcn_kmsg_message *) &msg);
+
+                       if (rc) {
+                               KMSG_ERR("Message send failed!\n");
+                       }
+               }
+       }
+
+       *id = found_id;
+
+out:
+       unlock_chan(found_id);
+
+       return 0;
+}
+
+/* Add new members to a multicast group. */
+int pcn_kmsg_mcast_add_members(pcn_kmsg_mcast_id id, unsigned long mask)
+{
+       lock_chan(id);
+
+       KMSG_ERR("Operation not yet supported!\n");
+
+       //rkinfo->mcast_wininfo[id].mask |= mask; 
+
+       /* TODO -- notify new members */
+
+       unlock_chan(id);
+       return 0;
+}
+
+/* Remove existing members from a multicast group. */
+int pcn_kmsg_mcast_delete_members(pcn_kmsg_mcast_id id, unsigned long mask)
+{
+       lock_chan(id);
+
+       KMSG_ERR("Operation not yet supported!\n");
+
+       //rkinfo->mcast_wininfo[id].mask &= !mask;
+
+       /* TODO -- notify new members */
+
+       unlock_chan(id);
+
+       return 0;
+}
+
+inline int pcn_kmsg_mcast_close_notowner(pcn_kmsg_mcast_id id)
+{
+       MCAST_PRINTK("Closing multicast channel %lu on CPU %d\n", id, my_cpu);
+
+       /* process remaining messages in queue (should there be any?) */
+
+       /* remove queue from list of queues being polled */
+       iounmap(MCASTWIN(id));
+
+       MCASTWIN(id) = NULL;
+
+       return 0;
+}
+
+/* Close a multicast group. */
+int pcn_kmsg_mcast_close(pcn_kmsg_mcast_id id)
+{
+       int rc;
+       struct pcn_kmsg_mcast_message msg;
+       struct pcn_kmsg_mcast_wininfo *wi = &rkinfo->mcast_wininfo[id];
+
+       if (wi->owner_cpu != my_cpu) {
+               KMSG_ERR("Only creator (cpu %d) can close mcast group %lu!\n",
+                        wi->owner_cpu, id);
+               return -1;
+       }
+
+       lock_chan(id);
+
+       /* set window to close */
+       wi->is_closing = 1;
+
+       /* broadcast message to close window globally */
+       msg.hdr.type = PCN_KMSG_TYPE_MCAST;
+       msg.hdr.prio = PCN_KMSG_PRIO_HIGH;
+       msg.type = PCN_KMSG_MCAST_CLOSE;
+       msg.id = id;
+
+       rc = pcn_kmsg_mcast_send(id, (struct pcn_kmsg_message *) &msg);
+       if (rc) {
+               KMSG_ERR("failed to send mcast close message!\n");
+               return -1;
+       }
+
+       /* wait until global_tail == global_head */
+       while (MCASTWIN(id)->tail != MCASTWIN(id)->head) {}
+
+       /* free window and set channel as unused */
+       kfree(MCASTWIN(id));
+       MCASTWIN(id) = NULL;
+
+       wi->mask = 0;
+       wi->num_members = 0;
+       wi->is_closing = 0;
+
+       unlock_chan(id);
+
+       return 0;
+}
+
+unsigned long mcast_ipi_ts;
+
+static int __pcn_kmsg_mcast_send(pcn_kmsg_mcast_id id, 
+                                struct pcn_kmsg_message *msg)
+{
+       int i, rc;
+
+       if (!msg) {
+               KMSG_ERR("Passed in a null pointer to msg!\n");
+               return -1;
+       }
+
+       /* set source CPU */
+       msg->hdr.from_cpu = my_cpu;
+
+       /* place message in rbuf */
+       rc = mcastwin_put(id, msg);
+
+       if (rc) {
+               KMSG_ERR("failed to place message in mcast window -- maybe it's full?\n");
+               return -1;
+       }
+
+       rdtscll(mcast_ipi_ts);
+
+       /* send IPI to all in mask but me */
+       for (i = 0; i < POPCORN_MAX_CPUS; i++) {
+               if (rkinfo->mcast_wininfo[id].mask & (1ULL << i)) {
+                       if (i != my_cpu) {
+                               MCAST_PRINTK("sending IPI to CPU %d\n", i);
+                               apic->send_IPI_single(i, POPCORN_KMSG_VECTOR);
+                       }
+               }
+       }
+
+       return 0;
+}
+
+#define MCAST_HACK 0
+
+/* Send a message to the specified multicast group. */
+int pcn_kmsg_mcast_send(pcn_kmsg_mcast_id id, struct pcn_kmsg_message *msg)
+{
+#if MCAST_HACK
+
+       int i, rc;
+
+       MCAST_PRINTK("Sending mcast message, id %lu\n", id);
+
+       /* quick hack for testing for now; 
+          loop through mask and send individual messages */
+       for (i = 0; i < POPCORN_MAX_CPUS; i++) {
+               if (rkinfo->mcast_wininfo[id].mask & (0x1 << i)) {
+                       rc = pcn_kmsg_send(i, msg);
+
+                       if (rc) {
+                               KMSG_ERR("Batch send failed to CPU %d\n", i);
+                               return -1;
+                       }
+               }
+       }
+
+       return 0;
+#else
+       int rc;
+
+       MCAST_PRINTK("sending mcast message to group id %lu\n", id);
+
+       msg->hdr.is_lg_msg = 0;
+       msg->hdr.lg_start = 0;
+       msg->hdr.lg_end = 0;
+       msg->hdr.lg_seqnum = 0;
+
+       rc = __pcn_kmsg_mcast_send(id, msg);
+
+       return rc;
+#endif
+}
+
+/* Send a message to the specified multicast group. */
+int pcn_kmsg_mcast_send_long(pcn_kmsg_mcast_id id, 
+                            struct pcn_kmsg_long_message *msg, 
+                            unsigned int payload_size)
+{
+#if MCAST_HACK
+       int i, rc;
+
+       MCAST_PRINTK("Sending long mcast message, id %lu, size %u\n", 
+                    id, payload_size);
+
+       /* quick hack for testing for now; 
+          loop through mask and send individual messages */
+       for (i = 0; i < POPCORN_MAX_CPUS; i++) {
+               if (rkinfo->mcast_wininfo[id].mask & (0x1 << i)) {
+                       rc = pcn_kmsg_send_long(i, msg, payload_size);
+
+                       if (rc) {
+                               KMSG_ERR("Batch send failed to CPU %d\n", i);
+                               return -1;
+                       }
+               }
+       }
+
+       return 0;
+#else
+
+       KMSG_ERR("long messages not yet supported in mcast!\n");
+
+       return 0;
+#endif
+}
+
+
+static int pcn_kmsg_mcast_callback(struct pcn_kmsg_message *message) 
+{
+       int rc = 0;
+       struct pcn_kmsg_mcast_message *msg = 
+               (struct pcn_kmsg_mcast_message *) message;
+       pcn_kmsg_work_t *kmsg_work;
+
+       MCAST_PRINTK("Received mcast message, type %d\n", msg->type);
+
+       switch (msg->type) {
+               case PCN_KMSG_MCAST_OPEN:
+                       MCAST_PRINTK("Processing mcast open message...\n");
+
+                       /* Need to queue work to remap the window in a kernel
+                          thread; it can't happen here */
+                       kmsg_work = kmalloc(sizeof(pcn_kmsg_work_t), GFP_ATOMIC);
+                       if (kmsg_work) {
+                               INIT_WORK((struct work_struct *) kmsg_work,
+                                         process_kmsg_wq_item);
+                               kmsg_work->op = PCN_KMSG_WQ_OP_MAP_MCAST_WIN;
+                               kmsg_work->from_cpu = msg->hdr.from_cpu;
+                               kmsg_work->id_to_join = msg->id;
+                               queue_work(kmsg_wq, 
+                                          (struct work_struct *) kmsg_work);
+                       } else {
+                               KMSG_ERR("Failed to kmalloc work structure!\n");
+                       }
+
+                       break;
+
+               case PCN_KMSG_MCAST_ADD_MEMBERS:
+                       KMSG_ERR("Mcast add not yet implemented...\n");
+                       break;
+
+               case PCN_KMSG_MCAST_DEL_MEMBERS:
+                       KMSG_ERR("Mcast delete not yet implemented...\n");
+                       break;
+
+               case PCN_KMSG_MCAST_CLOSE:
+                       MCAST_PRINTK("Processing mcast close message...\n");
+                       pcn_kmsg_mcast_close_notowner(msg->id);
+                       break;
+
+               default:
+                       KMSG_ERR("Invalid multicast message type %d\n", 
+                                msg->type);
+                       rc = -1;
+                       goto out;
+       }
+
+       print_mcast_map();
+
+out:
+       pcn_kmsg_free_msg(message);
+       return rc;
+}
+
+
index e194f2c..9efc00a 100644 (file)
@@ -163,7 +163,6 @@ static int pcn_kmsg_test_long_msg(struct pcn_kmsg_test_args __user *args)
        return rc;
 }
 
-#ifdef PCN_SUPPORT_MULTICAST
 static int pcn_kmsg_test_mcast_open(struct pcn_kmsg_test_args __user *args)
 {
        int rc;
@@ -227,7 +226,6 @@ static int pcn_kmsg_test_mcast_close(struct pcn_kmsg_test_args __user *args)
 
        return rc;
 }
-#endif /* PCN_SUPPORT_MULTICAST */
 
 /* Syscall for testing all this stuff */
 SYSCALL_DEFINE2(popcorn_test_kmsg, enum pcn_kmsg_test_op, op,
@@ -254,7 +252,7 @@ SYSCALL_DEFINE2(popcorn_test_kmsg, enum pcn_kmsg_test_op, op,
                case PCN_KMSG_TEST_SEND_LONG:
                        rc = pcn_kmsg_test_long_msg(args);
                        break;
-#ifdef PCN_SUPPORT_MULTICAST
+
                case PCN_KMSG_TEST_OP_MCAST_OPEN:
                        rc = pcn_kmsg_test_mcast_open(args);
                        break;
@@ -266,8 +264,7 @@ SYSCALL_DEFINE2(popcorn_test_kmsg, enum pcn_kmsg_test_op, op,
                case PCN_KMSG_TEST_OP_MCAST_CLOSE:
                        rc = pcn_kmsg_test_mcast_close(args);
                        break;
-#endif /* PCN_SUPPORT_MULTICAST */
-                       
+
                default:
                        TEST_ERR("invalid option %d\n", op);
                        return -1;
@@ -276,6 +273,7 @@ SYSCALL_DEFINE2(popcorn_test_kmsg, enum pcn_kmsg_test_op, op,
        return rc;
 }
 
+
 /* CALLBACKS */
 
 static int handle_single_msg(struct pcn_kmsg_test_message *msg)