kern
changeset 69:b45e2d5f0ae1
ok I *think* i've fixed it now
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Wed, 12 Oct 2011 14:39:40 +0300 |
parents | 0a205396e1a0 |
children | 219974492c7d |
files | src/proc.c src/rbtree.c src/rbtree.h src/test_proc.S src/vm.c src/vm.h |
diffstat | 6 files changed, 228 insertions(+), 16 deletions(-) [+] |
line diff
1.1 --- a/src/proc.c Mon Oct 10 04:16:01 2011 +0300 1.2 +++ b/src/proc.c Wed Oct 12 14:39:40 2011 +0300 1.3 @@ -93,7 +93,7 @@ 1.4 memcpy((void*)img_start_addr, test_proc, proc_size_pg * PGSIZE); 1.5 printf("copied init process at: %x\n", img_start_addr); 1.6 1.7 - /* allocate the first page of the process stack */ 1.8 + /* allocate the first page of the user stack */ 1.9 stack_pg = ADDR_TO_PAGE(KMEM_START) - 1; 1.10 if(pgalloc_vrange(stack_pg, 1) == -1) { 1.11 panic("failed to allocate user stack page\n"); 1.12 @@ -194,7 +194,8 @@ 1.13 /* will be copied on write */ 1.14 p->user_stack_pg = parent->user_stack_pg; 1.15 1.16 - p->ctx.pgtbl_paddr = clone_vm(CLONE_COW); 1.17 + /* clone the parent's virtual memory */ 1.18 + clone_vm(p, parent, CLONE_COW); 1.19 1.20 /* done, now let's add it to the scheduler runqueue */ 1.21 add_proc(p->id);
2.1 --- a/src/rbtree.c Mon Oct 10 04:16:01 2011 +0300 2.2 +++ b/src/rbtree.c Wed Oct 12 14:39:40 2011 +0300 2.3 @@ -2,6 +2,7 @@ 2.4 #include <stdlib.h> 2.5 #include <string.h> 2.6 #include "rbtree.h" 2.7 +#include "panic.h" 2.8 2.9 #define INT2PTR(x) ((void*)(x)) 2.10 #define PTR2INT(x) ((int)(x)) 2.11 @@ -13,7 +14,6 @@ 2.12 static void del_tree(struct rbnode *node, void (*delfunc)(struct rbnode*, void*), void *cls); 2.13 static struct rbnode *insert(struct rbtree *rb, struct rbnode *tree, void *key, void *data); 2.14 static struct rbnode *delete(struct rbtree *rb, struct rbnode *tree, void *key); 2.15 -/*static struct rbnode *find(struct rbtree *rb, struct rbnode *node, void *key);*/ 2.16 static void traverse(struct rbnode *node, void (*func)(struct rbnode*, void*), void *cls); 2.17 2.18 struct rbtree *rb_create(rb_cmp_func_t cmp_func) 2.19 @@ -59,6 +59,27 @@ 2.20 del_tree(rb->root, rb->del, rb->del_cls); 2.21 } 2.22 2.23 +void rb_clear(struct rbtree *rb) 2.24 +{ 2.25 + del_tree(rb->root, rb->del, rb->del_cls); 2.26 + rb->root = 0; 2.27 +} 2.28 + 2.29 +int rb_copy(struct rbtree *dest, struct rbtree *src) 2.30 +{ 2.31 + struct rbnode *node; 2.32 + 2.33 + rb_clear(dest); 2.34 + 2.35 + rb_begin(src); 2.36 + while((node = rb_next(src))) { 2.37 + if(rb_insert(dest, node->key, node->data) == -1) { 2.38 + return -1; 2.39 + } 2.40 + } 2.41 + return 0; 2.42 +} 2.43 + 2.44 void rb_set_allocator(struct rbtree *rb, rb_alloc_func_t alloc, rb_free_func_t free) 2.45 { 2.46 rb->alloc = alloc; 2.47 @@ -237,6 +258,9 @@ 2.48 2.49 if(!tree) { 2.50 struct rbnode *node = rb->alloc(sizeof *node); 2.51 + if(!node) { 2.52 + panic("failed to allocate tree node\n"); 2.53 + } 2.54 node->red = 1; 2.55 node->key = key; 2.56 node->data = data; 2.57 @@ -455,3 +479,14 @@ 2.58 } 2.59 return tree; 2.60 } 2.61 + 2.62 +void rb_dbg_print_tree(struct rbtree *tree) 2.63 +{ 2.64 + struct rbnode *node; 2.65 + 2.66 + rb_begin(tree); 2.67 + while((node = rb_next(tree))) { 2.68 + printf("%d ", rb_node_keyi(node)); 2.69 + } 2.70 + printf("\n"); 2.71 +}
3.1 --- a/src/rbtree.h Mon Oct 10 04:16:01 2011 +0300 3.2 +++ b/src/rbtree.h Wed Oct 12 14:39:40 2011 +0300 3.3 @@ -48,6 +48,9 @@ 3.4 int rb_init(struct rbtree *rb, rb_cmp_func_t cmp_func); 3.5 void rb_destroy(struct rbtree *rb); 3.6 3.7 +void rb_clear(struct rbtree *tree); 3.8 +int rb_copy(struct rbtree *dest, struct rbtree *src); 3.9 + 3.10 void rb_set_allocator(struct rbtree *rb, rb_alloc_func_t alloc, rb_free_func_t free); 3.11 void rb_set_compare_func(struct rbtree *rb, rb_cmp_func_t func); 3.12 void rb_set_delete_func(struct rbtree *rb, rb_del_func_t func, void *cls); 3.13 @@ -74,6 +77,9 @@ 3.14 int rb_node_keyi(struct rbnode *node); 3.15 void *rb_node_data(struct rbnode *node); 3.16 3.17 + 3.18 +void rb_dbg_print_tree(struct rbtree *tree); 3.19 + 3.20 #ifdef __cplusplus 3.21 } 3.22 #endif
4.1 --- a/src/test_proc.S Mon Oct 10 04:16:01 2011 +0300 4.2 +++ b/src/test_proc.S Wed Oct 12 14:39:40 2011 +0300 4.3 @@ -8,6 +8,8 @@ 4.4 movl $SYS_FORK, %eax 4.5 int $SYSCALL_INT 4.6 4.7 + push %eax 4.8 + 4.9 infloop: 4.10 /* --- print a message --- */ 4.11 movl $SYS_HELLO, %eax
5.1 --- a/src/vm.c Mon Oct 10 04:16:01 2011 +0300 5.2 +++ b/src/vm.c Wed Oct 12 14:39:40 2011 +0300 5.3 @@ -38,6 +38,7 @@ 5.4 5.5 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high); 5.6 static void pgfault(int inum); 5.7 +static int copy_on_write(struct vm_page *page); 5.8 static struct page_range *alloc_node(void); 5.9 static void free_node(struct page_range *node); 5.10 5.11 @@ -109,6 +110,7 @@ 5.12 { 5.13 uint32_t *pgtbl; 5.14 int diridx, pgidx, pgon, intr_state; 5.15 + struct process *p; 5.16 5.17 intr_state = get_intr_state(); 5.18 disable_intr(); 5.19 @@ -155,6 +157,30 @@ 5.20 pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT; 5.21 flush_tlb_page(vpage); 5.22 5.23 + /* if it's a new *user* mapping, and there is a current process, update the vmmap */ 5.24 + if((attr & PG_USER) && (p = get_current_proc())) { 5.25 + struct vm_page *page; 5.26 + 5.27 + if(!(page = get_vm_page_proc(p, vpage))) { 5.28 + if(!(page = malloc(sizeof *page))) { 5.29 + panic("map_page: failed to allocate new vm_page structure"); 5.30 + } 5.31 + page->vpage = vpage; 5.32 + page->ppage = ppage; 5.33 + page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT; 5.34 + page->nref = 1; 5.35 + 5.36 + rb_inserti(&p->vmmap, vpage, page); 5.37 + } else { 5.38 + /* otherwise just update the mapping */ 5.39 + page->ppage = ppage; 5.40 + 5.41 + /* XXX don't touch the flags, as that's how we implement CoW 5.42 + * by changing the mapping without affecting the vm_page 5.43 + */ 5.44 + } 5.45 + } 5.46 + 5.47 set_intr_state(intr_state); 5.48 return 0; 5.49 } 5.50 @@ -231,6 +257,7 @@ 5.51 return map_page_range(vpg_start, num_pages, ppg_start, attr); 5.52 } 5.53 5.54 +/* translate a virtual address to a physical address using the current page table */ 5.55 uint32_t virt_to_phys(uint32_t vaddr) 5.56 { 5.57 int pg; 5.58 @@ -244,6 +271,7 @@ 5.59 return pgaddr | ADDR_TO_PGOFFS(vaddr); 5.60 } 5.61 5.62 +/* translate a virtual page number to a physical page number using the current page table */ 5.63 int virt_to_phys_page(int vpg) 5.64 { 5.65 uint32_t pgaddr, *pgtbl; 5.66 @@ -268,6 +296,32 @@ 5.67 return ADDR_TO_PAGE(pgaddr); 5.68 } 5.69 5.70 +/* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */ 5.71 +uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr) 5.72 +{ 5.73 + int pg; 5.74 + uint32_t pgaddr; 5.75 + 5.76 + if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) { 5.77 + return 0; 5.78 + } 5.79 + pgaddr = PAGE_TO_ADDR(pg); 5.80 + 5.81 + return pgaddr | ADDR_TO_PGOFFS(vaddr); 5.82 +} 5.83 + 5.84 +/* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */ 5.85 +int virt_to_phys_page_proc(struct process *p, int vpg) 5.86 +{ 5.87 + struct rbnode *node; 5.88 + assert(p); 5.89 + 5.90 + if(!(node = rb_findi(&p->vmmap, vpg))) { 5.91 + return -1; 5.92 + } 5.93 + return ((struct vm_page*)node->data)->ppage; 5.94 +} 5.95 + 5.96 /* allocate a contiguous block of virtual memory pages along with 5.97 * backing physical memory for them, and update the page table. 5.98 */ 5.99 @@ -481,14 +535,37 @@ 5.100 if(frm->err & PG_USER) { 5.101 int fault_page = ADDR_TO_PAGE(fault_addr); 5.102 struct process *proc = get_current_proc(); 5.103 - printf("DBG: page fault in user space\n"); 5.104 + printf("DBG: page fault in user space (pid: %d)\n", proc->id); 5.105 assert(proc); 5.106 5.107 if(frm->err & PG_PRESENT) { 5.108 - /* it's not due to a missing page, just panic */ 5.109 + /* it's not due to a missing page fetch the attributes */ 5.110 + int pgnum = ADDR_TO_PAGE(fault_addr); 5.111 + 5.112 + if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) { 5.113 + /* write permission fault might be a CoW fault or just an error 5.114 + * fetch the vm_page permissions to check if this is suppoosed to be 5.115 + * a writable page (which means we should CoW). 5.116 + */ 5.117 + struct vm_page *page = get_vm_page_proc(proc, pgnum); 5.118 + 5.119 + if(page->flags & PG_WRITABLE) { 5.120 + /* ok this is a CoW fault */ 5.121 + if(copy_on_write(page) == -1) { 5.122 + panic("copy on write failed!"); 5.123 + } 5.124 + return; /* done, allow the process to restart the instruction and continue */ 5.125 + } else { 5.126 + /* TODO eventually we'll SIGSEGV the process, for now just panic. 5.127 + */ 5.128 + goto unhandled; 5.129 + } 5.130 + } 5.131 goto unhandled; 5.132 } 5.133 5.134 + /* so it's a missing page... ok */ 5.135 + 5.136 /* detect if it's an automatic stack growth deal */ 5.137 if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) { 5.138 int num_pages = proc->user_stack_pg - fault_page; 5.139 @@ -502,11 +579,16 @@ 5.140 proc->user_stack_pg = fault_page; 5.141 return; 5.142 } 5.143 + 5.144 + /* it's not a stack growth fault. since we don't do swapping yet, just 5.145 + * fall to unhandled and panic 5.146 + */ 5.147 } 5.148 5.149 unhandled: 5.150 printf("~~~~ PAGE FAULT ~~~~\n"); 5.151 printf("fault address: %x\n", fault_addr); 5.152 + printf("error code: %x\n", frm->err); 5.153 5.154 if(frm->err & PG_PRESENT) { 5.155 if(frm->err & 8) { 5.156 @@ -522,6 +604,54 @@ 5.157 panic("unhandled page fault\n"); 5.158 } 5.159 5.160 +/* copy-on-write handler, called from pgfault above */ 5.161 +static int copy_on_write(struct vm_page *page) 5.162 +{ 5.163 + uint32_t newphys; 5.164 + struct vm_page *newpage; 5.165 + struct rbnode *vmnode; 5.166 + struct process *p = get_current_proc(); 5.167 + 5.168 + assert(page->nref > 0); 5.169 + 5.170 + /* first of all check the refcount. If it's 1 then we don't need to copy 5.171 + * anything. This will happen when all forked processes except one have 5.172 + * marked this read-write again after faulting. 5.173 + */ 5.174 + if(page->nref == 1) { 5.175 + set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY); 5.176 + return 0; 5.177 + } 5.178 + 5.179 + /* ok let's make a copy and mark it read-write */ 5.180 + if(!(newpage = malloc(sizeof *newpage))) { 5.181 + printf("copy_on_write: failed to allocate new vm_page\n"); 5.182 + return -1; 5.183 + } 5.184 + newpage->vpage = page->vpage; 5.185 + newpage->flags = page->flags; 5.186 + 5.187 + if(!(newphys = alloc_phys_page())) { 5.188 + printf("copy_on_write: failed to allocate physical page\n"); 5.189 + /* XXX proper action: SIGSEGV */ 5.190 + return -1; 5.191 + } 5.192 + newpage->ppage = ADDR_TO_PAGE(newphys); 5.193 + newpage->nref = 1; 5.194 + 5.195 + /* set the new vm_page in the process vmmap */ 5.196 + vmnode = rb_findi(&p->vmmap, newpage->vpage); 5.197 + assert(vmnode && vmnode->data == page); /* shouldn't be able to fail */ 5.198 + vmnode->data = newpage; 5.199 + 5.200 + /* also update tha page table */ 5.201 + map_page(newpage->vpage, newpage->ppage, newpage->flags); 5.202 + 5.203 + /* finally decrease the refcount at the original vm_page struct */ 5.204 + page->nref--; 5.205 + return 0; 5.206 +} 5.207 + 5.208 /* --- page range list node management --- */ 5.209 #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range)) 5.210 5.211 @@ -574,14 +704,13 @@ 5.212 * 5.213 * If "cow" is non-zero it also marks the shared user-space pages as 5.214 * read-only, to implement copy-on-write. 5.215 - * 5.216 - * Returns the physical address of the new page directory. 5.217 */ 5.218 -uint32_t clone_vm(int cow) 5.219 +void clone_vm(struct process *pdest, struct process *psrc, int cow) 5.220 { 5.221 int i, j, dirpg, tblpg, kstart_dirent; 5.222 uint32_t paddr; 5.223 uint32_t *ndir, *ntbl; 5.224 + struct rbnode *vmnode; 5.225 5.226 /* allocate the new page directory */ 5.227 if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) { 5.228 @@ -613,8 +742,10 @@ 5.229 * page table and unset the writable bits. 5.230 */ 5.231 for(j=0; j<1024; j++) { 5.232 - clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY); 5.233 - /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/ 5.234 + if(PGTBL(i)[j] & PG_PRESENT) { 5.235 + clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY); 5.236 + /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/ 5.237 + } 5.238 } 5.239 } 5.240 5.241 @@ -632,18 +763,31 @@ 5.242 } 5.243 } 5.244 5.245 + /* make a copy of the parent's vmmap tree pointing to the same vm_pages 5.246 + * and increase the reference counters for all vm_pages. 5.247 + */ 5.248 + rb_init(&pdest->vmmap, RB_KEY_INT); 5.249 + rb_begin(&psrc->vmmap); 5.250 + while((vmnode = rb_next(&psrc->vmmap))) { 5.251 + struct vm_page *pg = vmnode->data; 5.252 + pg->nref++; 5.253 + 5.254 + /* insert the same vm_page to the new tree */ 5.255 + rb_inserti(&pdest->vmmap, pg->vpage, pg); 5.256 + } 5.257 + 5.258 /* for the kernel space we'll just use the same page tables */ 5.259 for(i=kstart_dirent; i<1024; i++) { 5.260 ndir[i] = pgdir[i]; 5.261 } 5.262 + paddr = virt_to_phys((uint32_t)ndir); 5.263 + ndir[1023] = paddr | PG_PRESENT; 5.264 5.265 if(cow) { 5.266 /* we just changed all the page protection bits, so we need to flush the TLB */ 5.267 flush_tlb(); 5.268 } 5.269 5.270 - paddr = virt_to_phys((uint32_t)ndir); 5.271 - 5.272 /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */ 5.273 unmap_page(dirpg); 5.274 unmap_page(tblpg); 5.275 @@ -651,7 +795,8 @@ 5.276 pgfree(dirpg, 1); 5.277 pgfree(tblpg, 1); 5.278 5.279 - return paddr; 5.280 + /* set the new page directory pointer */ 5.281 + pdest->ctx.pgtbl_paddr = paddr; 5.282 } 5.283 5.284 int get_page_bit(int pgnum, uint32_t bit, int wholepath) 5.285 @@ -723,7 +868,7 @@ 5.286 vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK; 5.287 vmp->nref = 1; /* when first created assume no sharing */ 5.288 5.289 - rb_inserti(vmmap, vmp->ppage, vmp); 5.290 + rb_inserti(vmmap, vmp->vpage, vmp); 5.291 } 5.292 } 5.293 } 5.294 @@ -732,6 +877,21 @@ 5.295 return 0; 5.296 } 5.297 5.298 +struct vm_page *get_vm_page(int vpg) 5.299 +{ 5.300 + return get_vm_page_proc(get_current_proc(), vpg); 5.301 +} 5.302 + 5.303 +struct vm_page *get_vm_page_proc(struct process *p, int vpg) 5.304 +{ 5.305 + struct rbnode *node; 5.306 + 5.307 + if(!p || !(node = rb_findi(&p->vmmap, vpg))) { 5.308 + return 0; 5.309 + } 5.310 + return node->data; 5.311 +} 5.312 + 5.313 5.314 void dbg_print_vm(int area) 5.315 {
6.1 --- a/src/vm.h Mon Oct 10 04:16:01 2011 +0300 6.2 +++ b/src/vm.h Wed Oct 12 14:39:40 2011 +0300 6.3 @@ -53,6 +53,8 @@ 6.4 int nref; 6.5 }; 6.6 6.7 +struct process; 6.8 + 6.9 void init_vm(void); 6.10 6.11 int map_page(int vpage, int ppage, unsigned int attr); 6.12 @@ -61,8 +63,11 @@ 6.13 int unmap_page_range(int vpg_start, int pgcount); 6.14 int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr); 6.15 6.16 +uint32_t virt_to_phys(uint32_t vaddr); 6.17 int virt_to_phys_page(int vpg); 6.18 -uint32_t virt_to_phys(uint32_t vaddr); 6.19 + 6.20 +uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr); 6.21 +int virt_to_phys_page_proc(struct process *p, int vpg); 6.22 6.23 enum { 6.24 MEM_KERNEL, 6.25 @@ -73,7 +78,7 @@ 6.26 int pgalloc_vrange(int start, int num); 6.27 void pgfree(int start, int num); 6.28 6.29 -uint32_t clone_vm(int cow); 6.30 +void clone_vm(struct process *pdest, struct process *psrc, int cow); 6.31 6.32 int get_page_bit(int pgnum, uint32_t bit, int wholepath); 6.33 void set_page_bit(int pgnum, uint32_t bit, int wholepath); 6.34 @@ -82,6 +87,9 @@ 6.35 /* construct the vm map for the current user mappings */ 6.36 int cons_vmmap(struct rbtree *vmmap); 6.37 6.38 +struct vm_page *get_vm_page(int vpg); 6.39 +struct vm_page *get_vm_page_proc(struct process *p, int vpg); 6.40 + 6.41 void dbg_print_vm(int area); 6.42 6.43 /* defined in vm-asm.S */