kern
diff src/vm.c @ 69:b45e2d5f0ae1
ok I *think* i've fixed it now
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Wed, 12 Oct 2011 14:39:40 +0300 |
parents | 0a205396e1a0 |
children | 219974492c7d |
line diff
1.1 --- a/src/vm.c Mon Oct 10 04:16:01 2011 +0300 1.2 +++ b/src/vm.c Wed Oct 12 14:39:40 2011 +0300 1.3 @@ -38,6 +38,7 @@ 1.4 1.5 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high); 1.6 static void pgfault(int inum); 1.7 +static int copy_on_write(struct vm_page *page); 1.8 static struct page_range *alloc_node(void); 1.9 static void free_node(struct page_range *node); 1.10 1.11 @@ -109,6 +110,7 @@ 1.12 { 1.13 uint32_t *pgtbl; 1.14 int diridx, pgidx, pgon, intr_state; 1.15 + struct process *p; 1.16 1.17 intr_state = get_intr_state(); 1.18 disable_intr(); 1.19 @@ -155,6 +157,30 @@ 1.20 pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT; 1.21 flush_tlb_page(vpage); 1.22 1.23 + /* if it's a new *user* mapping, and there is a current process, update the vmmap */ 1.24 + if((attr & PG_USER) && (p = get_current_proc())) { 1.25 + struct vm_page *page; 1.26 + 1.27 + if(!(page = get_vm_page_proc(p, vpage))) { 1.28 + if(!(page = malloc(sizeof *page))) { 1.29 + panic("map_page: failed to allocate new vm_page structure"); 1.30 + } 1.31 + page->vpage = vpage; 1.32 + page->ppage = ppage; 1.33 + page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT; 1.34 + page->nref = 1; 1.35 + 1.36 + rb_inserti(&p->vmmap, vpage, page); 1.37 + } else { 1.38 + /* otherwise just update the mapping */ 1.39 + page->ppage = ppage; 1.40 + 1.41 + /* XXX don't touch the flags, as that's how we implement CoW 1.42 + * by changing the mapping without affecting the vm_page 1.43 + */ 1.44 + } 1.45 + } 1.46 + 1.47 set_intr_state(intr_state); 1.48 return 0; 1.49 } 1.50 @@ -231,6 +257,7 @@ 1.51 return map_page_range(vpg_start, num_pages, ppg_start, attr); 1.52 } 1.53 1.54 +/* translate a virtual address to a physical address using the current page table */ 1.55 uint32_t virt_to_phys(uint32_t vaddr) 1.56 { 1.57 int pg; 1.58 @@ -244,6 +271,7 @@ 1.59 return pgaddr | ADDR_TO_PGOFFS(vaddr); 1.60 } 1.61 1.62 +/* translate a virtual page number to a physical page number using the current page table */ 1.63 int virt_to_phys_page(int vpg) 1.64 { 1.65 uint32_t pgaddr, *pgtbl; 1.66 @@ -268,6 +296,32 @@ 1.67 return ADDR_TO_PAGE(pgaddr); 1.68 } 1.69 1.70 +/* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */ 1.71 +uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr) 1.72 +{ 1.73 + int pg; 1.74 + uint32_t pgaddr; 1.75 + 1.76 + if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) { 1.77 + return 0; 1.78 + } 1.79 + pgaddr = PAGE_TO_ADDR(pg); 1.80 + 1.81 + return pgaddr | ADDR_TO_PGOFFS(vaddr); 1.82 +} 1.83 + 1.84 +/* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */ 1.85 +int virt_to_phys_page_proc(struct process *p, int vpg) 1.86 +{ 1.87 + struct rbnode *node; 1.88 + assert(p); 1.89 + 1.90 + if(!(node = rb_findi(&p->vmmap, vpg))) { 1.91 + return -1; 1.92 + } 1.93 + return ((struct vm_page*)node->data)->ppage; 1.94 +} 1.95 + 1.96 /* allocate a contiguous block of virtual memory pages along with 1.97 * backing physical memory for them, and update the page table. 1.98 */ 1.99 @@ -481,14 +535,37 @@ 1.100 if(frm->err & PG_USER) { 1.101 int fault_page = ADDR_TO_PAGE(fault_addr); 1.102 struct process *proc = get_current_proc(); 1.103 - printf("DBG: page fault in user space\n"); 1.104 + printf("DBG: page fault in user space (pid: %d)\n", proc->id); 1.105 assert(proc); 1.106 1.107 if(frm->err & PG_PRESENT) { 1.108 - /* it's not due to a missing page, just panic */ 1.109 + /* it's not due to a missing page fetch the attributes */ 1.110 + int pgnum = ADDR_TO_PAGE(fault_addr); 1.111 + 1.112 + if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) { 1.113 + /* write permission fault might be a CoW fault or just an error 1.114 + * fetch the vm_page permissions to check if this is suppoosed to be 1.115 + * a writable page (which means we should CoW). 1.116 + */ 1.117 + struct vm_page *page = get_vm_page_proc(proc, pgnum); 1.118 + 1.119 + if(page->flags & PG_WRITABLE) { 1.120 + /* ok this is a CoW fault */ 1.121 + if(copy_on_write(page) == -1) { 1.122 + panic("copy on write failed!"); 1.123 + } 1.124 + return; /* done, allow the process to restart the instruction and continue */ 1.125 + } else { 1.126 + /* TODO eventually we'll SIGSEGV the process, for now just panic. 1.127 + */ 1.128 + goto unhandled; 1.129 + } 1.130 + } 1.131 goto unhandled; 1.132 } 1.133 1.134 + /* so it's a missing page... ok */ 1.135 + 1.136 /* detect if it's an automatic stack growth deal */ 1.137 if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) { 1.138 int num_pages = proc->user_stack_pg - fault_page; 1.139 @@ -502,11 +579,16 @@ 1.140 proc->user_stack_pg = fault_page; 1.141 return; 1.142 } 1.143 + 1.144 + /* it's not a stack growth fault. since we don't do swapping yet, just 1.145 + * fall to unhandled and panic 1.146 + */ 1.147 } 1.148 1.149 unhandled: 1.150 printf("~~~~ PAGE FAULT ~~~~\n"); 1.151 printf("fault address: %x\n", fault_addr); 1.152 + printf("error code: %x\n", frm->err); 1.153 1.154 if(frm->err & PG_PRESENT) { 1.155 if(frm->err & 8) { 1.156 @@ -522,6 +604,54 @@ 1.157 panic("unhandled page fault\n"); 1.158 } 1.159 1.160 +/* copy-on-write handler, called from pgfault above */ 1.161 +static int copy_on_write(struct vm_page *page) 1.162 +{ 1.163 + uint32_t newphys; 1.164 + struct vm_page *newpage; 1.165 + struct rbnode *vmnode; 1.166 + struct process *p = get_current_proc(); 1.167 + 1.168 + assert(page->nref > 0); 1.169 + 1.170 + /* first of all check the refcount. If it's 1 then we don't need to copy 1.171 + * anything. This will happen when all forked processes except one have 1.172 + * marked this read-write again after faulting. 1.173 + */ 1.174 + if(page->nref == 1) { 1.175 + set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY); 1.176 + return 0; 1.177 + } 1.178 + 1.179 + /* ok let's make a copy and mark it read-write */ 1.180 + if(!(newpage = malloc(sizeof *newpage))) { 1.181 + printf("copy_on_write: failed to allocate new vm_page\n"); 1.182 + return -1; 1.183 + } 1.184 + newpage->vpage = page->vpage; 1.185 + newpage->flags = page->flags; 1.186 + 1.187 + if(!(newphys = alloc_phys_page())) { 1.188 + printf("copy_on_write: failed to allocate physical page\n"); 1.189 + /* XXX proper action: SIGSEGV */ 1.190 + return -1; 1.191 + } 1.192 + newpage->ppage = ADDR_TO_PAGE(newphys); 1.193 + newpage->nref = 1; 1.194 + 1.195 + /* set the new vm_page in the process vmmap */ 1.196 + vmnode = rb_findi(&p->vmmap, newpage->vpage); 1.197 + assert(vmnode && vmnode->data == page); /* shouldn't be able to fail */ 1.198 + vmnode->data = newpage; 1.199 + 1.200 + /* also update tha page table */ 1.201 + map_page(newpage->vpage, newpage->ppage, newpage->flags); 1.202 + 1.203 + /* finally decrease the refcount at the original vm_page struct */ 1.204 + page->nref--; 1.205 + return 0; 1.206 +} 1.207 + 1.208 /* --- page range list node management --- */ 1.209 #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range)) 1.210 1.211 @@ -574,14 +704,13 @@ 1.212 * 1.213 * If "cow" is non-zero it also marks the shared user-space pages as 1.214 * read-only, to implement copy-on-write. 1.215 - * 1.216 - * Returns the physical address of the new page directory. 1.217 */ 1.218 -uint32_t clone_vm(int cow) 1.219 +void clone_vm(struct process *pdest, struct process *psrc, int cow) 1.220 { 1.221 int i, j, dirpg, tblpg, kstart_dirent; 1.222 uint32_t paddr; 1.223 uint32_t *ndir, *ntbl; 1.224 + struct rbnode *vmnode; 1.225 1.226 /* allocate the new page directory */ 1.227 if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) { 1.228 @@ -613,8 +742,10 @@ 1.229 * page table and unset the writable bits. 1.230 */ 1.231 for(j=0; j<1024; j++) { 1.232 - clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY); 1.233 - /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/ 1.234 + if(PGTBL(i)[j] & PG_PRESENT) { 1.235 + clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY); 1.236 + /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/ 1.237 + } 1.238 } 1.239 } 1.240 1.241 @@ -632,18 +763,31 @@ 1.242 } 1.243 } 1.244 1.245 + /* make a copy of the parent's vmmap tree pointing to the same vm_pages 1.246 + * and increase the reference counters for all vm_pages. 1.247 + */ 1.248 + rb_init(&pdest->vmmap, RB_KEY_INT); 1.249 + rb_begin(&psrc->vmmap); 1.250 + while((vmnode = rb_next(&psrc->vmmap))) { 1.251 + struct vm_page *pg = vmnode->data; 1.252 + pg->nref++; 1.253 + 1.254 + /* insert the same vm_page to the new tree */ 1.255 + rb_inserti(&pdest->vmmap, pg->vpage, pg); 1.256 + } 1.257 + 1.258 /* for the kernel space we'll just use the same page tables */ 1.259 for(i=kstart_dirent; i<1024; i++) { 1.260 ndir[i] = pgdir[i]; 1.261 } 1.262 + paddr = virt_to_phys((uint32_t)ndir); 1.263 + ndir[1023] = paddr | PG_PRESENT; 1.264 1.265 if(cow) { 1.266 /* we just changed all the page protection bits, so we need to flush the TLB */ 1.267 flush_tlb(); 1.268 } 1.269 1.270 - paddr = virt_to_phys((uint32_t)ndir); 1.271 - 1.272 /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */ 1.273 unmap_page(dirpg); 1.274 unmap_page(tblpg); 1.275 @@ -651,7 +795,8 @@ 1.276 pgfree(dirpg, 1); 1.277 pgfree(tblpg, 1); 1.278 1.279 - return paddr; 1.280 + /* set the new page directory pointer */ 1.281 + pdest->ctx.pgtbl_paddr = paddr; 1.282 } 1.283 1.284 int get_page_bit(int pgnum, uint32_t bit, int wholepath) 1.285 @@ -723,7 +868,7 @@ 1.286 vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK; 1.287 vmp->nref = 1; /* when first created assume no sharing */ 1.288 1.289 - rb_inserti(vmmap, vmp->ppage, vmp); 1.290 + rb_inserti(vmmap, vmp->vpage, vmp); 1.291 } 1.292 } 1.293 } 1.294 @@ -732,6 +877,21 @@ 1.295 return 0; 1.296 } 1.297 1.298 +struct vm_page *get_vm_page(int vpg) 1.299 +{ 1.300 + return get_vm_page_proc(get_current_proc(), vpg); 1.301 +} 1.302 + 1.303 +struct vm_page *get_vm_page_proc(struct process *p, int vpg) 1.304 +{ 1.305 + struct rbnode *node; 1.306 + 1.307 + if(!p || !(node = rb_findi(&p->vmmap, vpg))) { 1.308 + return 0; 1.309 + } 1.310 + return node->data; 1.311 +} 1.312 + 1.313 1.314 void dbg_print_vm(int area) 1.315 {