# HG changeset patch # User John Tsiombikas # Date 1318419580 -10800 # Node ID b45e2d5f0ae1bf51fc0a7e9118234b6cad539ba6 # Parent 0a205396e1a0057769dba7515b610f163e7853bd ok I *think* i've fixed it now diff -r 0a205396e1a0 -r b45e2d5f0ae1 src/proc.c --- a/src/proc.c Mon Oct 10 04:16:01 2011 +0300 +++ b/src/proc.c Wed Oct 12 14:39:40 2011 +0300 @@ -93,7 +93,7 @@ memcpy((void*)img_start_addr, test_proc, proc_size_pg * PGSIZE); printf("copied init process at: %x\n", img_start_addr); - /* allocate the first page of the process stack */ + /* allocate the first page of the user stack */ stack_pg = ADDR_TO_PAGE(KMEM_START) - 1; if(pgalloc_vrange(stack_pg, 1) == -1) { panic("failed to allocate user stack page\n"); @@ -194,7 +194,8 @@ /* will be copied on write */ p->user_stack_pg = parent->user_stack_pg; - p->ctx.pgtbl_paddr = clone_vm(CLONE_COW); + /* clone the parent's virtual memory */ + clone_vm(p, parent, CLONE_COW); /* done, now let's add it to the scheduler runqueue */ add_proc(p->id); diff -r 0a205396e1a0 -r b45e2d5f0ae1 src/rbtree.c --- a/src/rbtree.c Mon Oct 10 04:16:01 2011 +0300 +++ b/src/rbtree.c Wed Oct 12 14:39:40 2011 +0300 @@ -2,6 +2,7 @@ #include #include #include "rbtree.h" +#include "panic.h" #define INT2PTR(x) ((void*)(x)) #define PTR2INT(x) ((int)(x)) @@ -13,7 +14,6 @@ static void del_tree(struct rbnode *node, void (*delfunc)(struct rbnode*, void*), void *cls); static struct rbnode *insert(struct rbtree *rb, struct rbnode *tree, void *key, void *data); static struct rbnode *delete(struct rbtree *rb, struct rbnode *tree, void *key); -/*static struct rbnode *find(struct rbtree *rb, struct rbnode *node, void *key);*/ static void traverse(struct rbnode *node, void (*func)(struct rbnode*, void*), void *cls); struct rbtree *rb_create(rb_cmp_func_t cmp_func) @@ -59,6 +59,27 @@ del_tree(rb->root, rb->del, rb->del_cls); } +void rb_clear(struct rbtree *rb) +{ + del_tree(rb->root, rb->del, rb->del_cls); + rb->root = 0; +} + +int rb_copy(struct rbtree *dest, struct rbtree *src) +{ + struct rbnode *node; + + rb_clear(dest); + + rb_begin(src); + while((node = rb_next(src))) { + if(rb_insert(dest, node->key, node->data) == -1) { + return -1; + } + } + return 0; +} + void rb_set_allocator(struct rbtree *rb, rb_alloc_func_t alloc, rb_free_func_t free) { rb->alloc = alloc; @@ -237,6 +258,9 @@ if(!tree) { struct rbnode *node = rb->alloc(sizeof *node); + if(!node) { + panic("failed to allocate tree node\n"); + } node->red = 1; node->key = key; node->data = data; @@ -455,3 +479,14 @@ } return tree; } + +void rb_dbg_print_tree(struct rbtree *tree) +{ + struct rbnode *node; + + rb_begin(tree); + while((node = rb_next(tree))) { + printf("%d ", rb_node_keyi(node)); + } + printf("\n"); +} diff -r 0a205396e1a0 -r b45e2d5f0ae1 src/rbtree.h --- a/src/rbtree.h Mon Oct 10 04:16:01 2011 +0300 +++ b/src/rbtree.h Wed Oct 12 14:39:40 2011 +0300 @@ -48,6 +48,9 @@ int rb_init(struct rbtree *rb, rb_cmp_func_t cmp_func); void rb_destroy(struct rbtree *rb); +void rb_clear(struct rbtree *tree); +int rb_copy(struct rbtree *dest, struct rbtree *src); + void rb_set_allocator(struct rbtree *rb, rb_alloc_func_t alloc, rb_free_func_t free); void rb_set_compare_func(struct rbtree *rb, rb_cmp_func_t func); void rb_set_delete_func(struct rbtree *rb, rb_del_func_t func, void *cls); @@ -74,6 +77,9 @@ int rb_node_keyi(struct rbnode *node); void *rb_node_data(struct rbnode *node); + +void rb_dbg_print_tree(struct rbtree *tree); + #ifdef __cplusplus } #endif diff -r 0a205396e1a0 -r b45e2d5f0ae1 src/test_proc.S --- a/src/test_proc.S Mon Oct 10 04:16:01 2011 +0300 +++ b/src/test_proc.S Wed Oct 12 14:39:40 2011 +0300 @@ -8,6 +8,8 @@ movl $SYS_FORK, %eax int $SYSCALL_INT + push %eax + infloop: /* --- print a message --- */ movl $SYS_HELLO, %eax diff -r 0a205396e1a0 -r b45e2d5f0ae1 src/vm.c --- a/src/vm.c Mon Oct 10 04:16:01 2011 +0300 +++ b/src/vm.c Wed Oct 12 14:39:40 2011 +0300 @@ -38,6 +38,7 @@ static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high); static void pgfault(int inum); +static int copy_on_write(struct vm_page *page); static struct page_range *alloc_node(void); static void free_node(struct page_range *node); @@ -109,6 +110,7 @@ { uint32_t *pgtbl; int diridx, pgidx, pgon, intr_state; + struct process *p; intr_state = get_intr_state(); disable_intr(); @@ -155,6 +157,30 @@ pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT; flush_tlb_page(vpage); + /* if it's a new *user* mapping, and there is a current process, update the vmmap */ + if((attr & PG_USER) && (p = get_current_proc())) { + struct vm_page *page; + + if(!(page = get_vm_page_proc(p, vpage))) { + if(!(page = malloc(sizeof *page))) { + panic("map_page: failed to allocate new vm_page structure"); + } + page->vpage = vpage; + page->ppage = ppage; + page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT; + page->nref = 1; + + rb_inserti(&p->vmmap, vpage, page); + } else { + /* otherwise just update the mapping */ + page->ppage = ppage; + + /* XXX don't touch the flags, as that's how we implement CoW + * by changing the mapping without affecting the vm_page + */ + } + } + set_intr_state(intr_state); return 0; } @@ -231,6 +257,7 @@ return map_page_range(vpg_start, num_pages, ppg_start, attr); } +/* translate a virtual address to a physical address using the current page table */ uint32_t virt_to_phys(uint32_t vaddr) { int pg; @@ -244,6 +271,7 @@ return pgaddr | ADDR_TO_PGOFFS(vaddr); } +/* translate a virtual page number to a physical page number using the current page table */ int virt_to_phys_page(int vpg) { uint32_t pgaddr, *pgtbl; @@ -268,6 +296,32 @@ return ADDR_TO_PAGE(pgaddr); } +/* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */ +uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr) +{ + int pg; + uint32_t pgaddr; + + if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) { + return 0; + } + pgaddr = PAGE_TO_ADDR(pg); + + return pgaddr | ADDR_TO_PGOFFS(vaddr); +} + +/* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */ +int virt_to_phys_page_proc(struct process *p, int vpg) +{ + struct rbnode *node; + assert(p); + + if(!(node = rb_findi(&p->vmmap, vpg))) { + return -1; + } + return ((struct vm_page*)node->data)->ppage; +} + /* allocate a contiguous block of virtual memory pages along with * backing physical memory for them, and update the page table. */ @@ -481,14 +535,37 @@ if(frm->err & PG_USER) { int fault_page = ADDR_TO_PAGE(fault_addr); struct process *proc = get_current_proc(); - printf("DBG: page fault in user space\n"); + printf("DBG: page fault in user space (pid: %d)\n", proc->id); assert(proc); if(frm->err & PG_PRESENT) { - /* it's not due to a missing page, just panic */ + /* it's not due to a missing page fetch the attributes */ + int pgnum = ADDR_TO_PAGE(fault_addr); + + if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) { + /* write permission fault might be a CoW fault or just an error + * fetch the vm_page permissions to check if this is suppoosed to be + * a writable page (which means we should CoW). + */ + struct vm_page *page = get_vm_page_proc(proc, pgnum); + + if(page->flags & PG_WRITABLE) { + /* ok this is a CoW fault */ + if(copy_on_write(page) == -1) { + panic("copy on write failed!"); + } + return; /* done, allow the process to restart the instruction and continue */ + } else { + /* TODO eventually we'll SIGSEGV the process, for now just panic. + */ + goto unhandled; + } + } goto unhandled; } + /* so it's a missing page... ok */ + /* detect if it's an automatic stack growth deal */ if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) { int num_pages = proc->user_stack_pg - fault_page; @@ -502,11 +579,16 @@ proc->user_stack_pg = fault_page; return; } + + /* it's not a stack growth fault. since we don't do swapping yet, just + * fall to unhandled and panic + */ } unhandled: printf("~~~~ PAGE FAULT ~~~~\n"); printf("fault address: %x\n", fault_addr); + printf("error code: %x\n", frm->err); if(frm->err & PG_PRESENT) { if(frm->err & 8) { @@ -522,6 +604,54 @@ panic("unhandled page fault\n"); } +/* copy-on-write handler, called from pgfault above */ +static int copy_on_write(struct vm_page *page) +{ + uint32_t newphys; + struct vm_page *newpage; + struct rbnode *vmnode; + struct process *p = get_current_proc(); + + assert(page->nref > 0); + + /* first of all check the refcount. If it's 1 then we don't need to copy + * anything. This will happen when all forked processes except one have + * marked this read-write again after faulting. + */ + if(page->nref == 1) { + set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY); + return 0; + } + + /* ok let's make a copy and mark it read-write */ + if(!(newpage = malloc(sizeof *newpage))) { + printf("copy_on_write: failed to allocate new vm_page\n"); + return -1; + } + newpage->vpage = page->vpage; + newpage->flags = page->flags; + + if(!(newphys = alloc_phys_page())) { + printf("copy_on_write: failed to allocate physical page\n"); + /* XXX proper action: SIGSEGV */ + return -1; + } + newpage->ppage = ADDR_TO_PAGE(newphys); + newpage->nref = 1; + + /* set the new vm_page in the process vmmap */ + vmnode = rb_findi(&p->vmmap, newpage->vpage); + assert(vmnode && vmnode->data == page); /* shouldn't be able to fail */ + vmnode->data = newpage; + + /* also update tha page table */ + map_page(newpage->vpage, newpage->ppage, newpage->flags); + + /* finally decrease the refcount at the original vm_page struct */ + page->nref--; + return 0; +} + /* --- page range list node management --- */ #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range)) @@ -574,14 +704,13 @@ * * If "cow" is non-zero it also marks the shared user-space pages as * read-only, to implement copy-on-write. - * - * Returns the physical address of the new page directory. */ -uint32_t clone_vm(int cow) +void clone_vm(struct process *pdest, struct process *psrc, int cow) { int i, j, dirpg, tblpg, kstart_dirent; uint32_t paddr; uint32_t *ndir, *ntbl; + struct rbnode *vmnode; /* allocate the new page directory */ if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) { @@ -613,8 +742,10 @@ * page table and unset the writable bits. */ for(j=0; j<1024; j++) { - clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY); - /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/ + if(PGTBL(i)[j] & PG_PRESENT) { + clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY); + /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/ + } } } @@ -632,18 +763,31 @@ } } + /* make a copy of the parent's vmmap tree pointing to the same vm_pages + * and increase the reference counters for all vm_pages. + */ + rb_init(&pdest->vmmap, RB_KEY_INT); + rb_begin(&psrc->vmmap); + while((vmnode = rb_next(&psrc->vmmap))) { + struct vm_page *pg = vmnode->data; + pg->nref++; + + /* insert the same vm_page to the new tree */ + rb_inserti(&pdest->vmmap, pg->vpage, pg); + } + /* for the kernel space we'll just use the same page tables */ for(i=kstart_dirent; i<1024; i++) { ndir[i] = pgdir[i]; } + paddr = virt_to_phys((uint32_t)ndir); + ndir[1023] = paddr | PG_PRESENT; if(cow) { /* we just changed all the page protection bits, so we need to flush the TLB */ flush_tlb(); } - paddr = virt_to_phys((uint32_t)ndir); - /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */ unmap_page(dirpg); unmap_page(tblpg); @@ -651,7 +795,8 @@ pgfree(dirpg, 1); pgfree(tblpg, 1); - return paddr; + /* set the new page directory pointer */ + pdest->ctx.pgtbl_paddr = paddr; } int get_page_bit(int pgnum, uint32_t bit, int wholepath) @@ -723,7 +868,7 @@ vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK; vmp->nref = 1; /* when first created assume no sharing */ - rb_inserti(vmmap, vmp->ppage, vmp); + rb_inserti(vmmap, vmp->vpage, vmp); } } } @@ -732,6 +877,21 @@ return 0; } +struct vm_page *get_vm_page(int vpg) +{ + return get_vm_page_proc(get_current_proc(), vpg); +} + +struct vm_page *get_vm_page_proc(struct process *p, int vpg) +{ + struct rbnode *node; + + if(!p || !(node = rb_findi(&p->vmmap, vpg))) { + return 0; + } + return node->data; +} + void dbg_print_vm(int area) { diff -r 0a205396e1a0 -r b45e2d5f0ae1 src/vm.h --- a/src/vm.h Mon Oct 10 04:16:01 2011 +0300 +++ b/src/vm.h Wed Oct 12 14:39:40 2011 +0300 @@ -53,6 +53,8 @@ int nref; }; +struct process; + void init_vm(void); int map_page(int vpage, int ppage, unsigned int attr); @@ -61,8 +63,11 @@ int unmap_page_range(int vpg_start, int pgcount); int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr); +uint32_t virt_to_phys(uint32_t vaddr); int virt_to_phys_page(int vpg); -uint32_t virt_to_phys(uint32_t vaddr); + +uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr); +int virt_to_phys_page_proc(struct process *p, int vpg); enum { MEM_KERNEL, @@ -73,7 +78,7 @@ int pgalloc_vrange(int start, int num); void pgfree(int start, int num); -uint32_t clone_vm(int cow); +void clone_vm(struct process *pdest, struct process *psrc, int cow); int get_page_bit(int pgnum, uint32_t bit, int wholepath); void set_page_bit(int pgnum, uint32_t bit, int wholepath); @@ -82,6 +87,9 @@ /* construct the vm map for the current user mappings */ int cons_vmmap(struct rbtree *vmmap); +struct vm_page *get_vm_page(int vpg); +struct vm_page *get_vm_page_proc(struct process *p, int vpg); + void dbg_print_vm(int area); /* defined in vm-asm.S */