kern

annotate src/vm.c @ 71:c7bd6ec7b946

changed test_proc to modify memory after the fork in order to try out copy-on-write, by pushing the result of getpid on the stack.
author John Tsiombikas <nuclear@member.fsf.org>
date Thu, 13 Oct 2011 05:22:35 +0300
parents b45e2d5f0ae1
children 3941e82b07f2
rev   line source
nuclear@17 1 #include <stdio.h>
nuclear@17 2 #include <string.h>
nuclear@17 3 #include <inttypes.h>
nuclear@52 4 #include <assert.h>
nuclear@52 5 #include "config.h"
nuclear@17 6 #include "vm.h"
nuclear@17 7 #include "intr.h"
nuclear@17 8 #include "mem.h"
nuclear@17 9 #include "panic.h"
nuclear@52 10 #include "proc.h"
nuclear@17 11
nuclear@17 12 #define IDMAP_START 0xa0000
nuclear@17 13
nuclear@24 14 #define PGDIR_ADDR 0xfffff000
nuclear@24 15 #define PGTBL_BASE (0xffffffff - 4096 * 1024 + 1)
nuclear@24 16 #define PGTBL(x) ((uint32_t*)(PGTBL_BASE + PGSIZE * (x)))
nuclear@24 17
nuclear@17 18 #define ATTR_PGDIR_MASK 0x3f
nuclear@17 19 #define ATTR_PGTBL_MASK 0x1ff
nuclear@17 20
nuclear@17 21 #define PAGEFAULT 14
nuclear@17 22
nuclear@22 23
nuclear@22 24 struct page_range {
nuclear@22 25 int start, end;
nuclear@22 26 struct page_range *next;
nuclear@22 27 };
nuclear@22 28
nuclear@22 29 /* defined in vm-asm.S */
nuclear@22 30 void enable_paging(void);
nuclear@23 31 void disable_paging(void);
nuclear@23 32 int get_paging_status(void);
nuclear@22 33 void set_pgdir_addr(uint32_t addr);
nuclear@23 34 void flush_tlb(void);
nuclear@23 35 void flush_tlb_addr(uint32_t addr);
nuclear@23 36 #define flush_tlb_page(p) flush_tlb_addr(PAGE_TO_ADDR(p))
nuclear@22 37 uint32_t get_fault_addr(void);
nuclear@22 38
nuclear@23 39 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high);
nuclear@52 40 static void pgfault(int inum);
nuclear@69 41 static int copy_on_write(struct vm_page *page);
nuclear@22 42 static struct page_range *alloc_node(void);
nuclear@22 43 static void free_node(struct page_range *node);
nuclear@22 44
nuclear@22 45 /* page directory */
nuclear@22 46 static uint32_t *pgdir;
nuclear@22 47
nuclear@22 48 /* 2 lists of free ranges, for kernel memory and user memory */
nuclear@22 49 static struct page_range *pglist[2];
nuclear@22 50 /* list of free page_range structures to be used in the lists */
nuclear@22 51 static struct page_range *node_pool;
nuclear@23 52 /* the first page range for the whole kernel address space, to get things started */
nuclear@23 53 static struct page_range first_node;
nuclear@22 54
nuclear@22 55
nuclear@26 56 void init_vm(void)
nuclear@17 57 {
nuclear@19 58 uint32_t idmap_end;
nuclear@47 59 int i, kmem_start_pg, pgtbl_base_pg;
nuclear@19 60
nuclear@23 61 /* setup the page tables */
nuclear@18 62 pgdir = (uint32_t*)alloc_phys_page();
nuclear@23 63 memset(pgdir, 0, PGSIZE);
nuclear@24 64 set_pgdir_addr((uint32_t)pgdir);
nuclear@17 65
nuclear@17 66 /* map the video memory and kernel code 1-1 */
nuclear@19 67 get_kernel_mem_range(0, &idmap_end);
nuclear@19 68 map_mem_range(IDMAP_START, idmap_end - IDMAP_START, IDMAP_START, 0);
nuclear@17 69
nuclear@24 70 /* make the last page directory entry point to the page directory */
nuclear@68 71 pgdir[1023] = ((uint32_t)pgdir & PGENT_ADDR_MASK) | PG_PRESENT;
nuclear@24 72 pgdir = (uint32_t*)PGDIR_ADDR;
nuclear@24 73
nuclear@23 74 /* set the page fault handler */
nuclear@17 75 interrupt(PAGEFAULT, pgfault);
nuclear@17 76
nuclear@23 77 /* we can enable paging now */
nuclear@17 78 enable_paging();
nuclear@23 79
nuclear@23 80 /* initialize the virtual page allocator */
nuclear@23 81 node_pool = 0;
nuclear@23 82
nuclear@47 83 kmem_start_pg = ADDR_TO_PAGE(KMEM_START);
nuclear@47 84 pgtbl_base_pg = ADDR_TO_PAGE(PGTBL_BASE);
nuclear@47 85
nuclear@47 86 first_node.start = kmem_start_pg;
nuclear@47 87 first_node.end = pgtbl_base_pg;
nuclear@23 88 first_node.next = 0;
nuclear@23 89 pglist[MEM_KERNEL] = &first_node;
nuclear@23 90
nuclear@23 91 pglist[MEM_USER] = alloc_node();
nuclear@26 92 pglist[MEM_USER]->start = ADDR_TO_PAGE(idmap_end);
nuclear@47 93 pglist[MEM_USER]->end = kmem_start_pg;
nuclear@23 94 pglist[MEM_USER]->next = 0;
nuclear@47 95
nuclear@47 96 /* temporaroly map something into every 1024th page of the kernel address
nuclear@47 97 * space to force pre-allocation of all the kernel page-tables
nuclear@47 98 */
nuclear@47 99 for(i=kmem_start_pg; i<pgtbl_base_pg; i+=1024) {
nuclear@47 100 /* if there's already something mapped here, leave it alone */
nuclear@47 101 if(virt_to_phys_page(i) == -1) {
nuclear@47 102 map_page(i, 0, 0);
nuclear@47 103 unmap_page(i);
nuclear@47 104 }
nuclear@47 105 }
nuclear@17 106 }
nuclear@17 107
nuclear@23 108 /* if ppage == -1 we allocate a physical page by calling alloc_phys_page */
nuclear@23 109 int map_page(int vpage, int ppage, unsigned int attr)
nuclear@17 110 {
nuclear@17 111 uint32_t *pgtbl;
nuclear@25 112 int diridx, pgidx, pgon, intr_state;
nuclear@69 113 struct process *p;
nuclear@25 114
nuclear@25 115 intr_state = get_intr_state();
nuclear@25 116 disable_intr();
nuclear@23 117
nuclear@23 118 pgon = get_paging_status();
nuclear@23 119
nuclear@23 120 if(ppage < 0) {
nuclear@23 121 uint32_t addr = alloc_phys_page();
nuclear@23 122 if(!addr) {
nuclear@25 123 set_intr_state(intr_state);
nuclear@23 124 return -1;
nuclear@23 125 }
nuclear@23 126 ppage = ADDR_TO_PAGE(addr);
nuclear@23 127 }
nuclear@23 128
nuclear@23 129 diridx = PAGE_TO_PGTBL(vpage);
nuclear@23 130 pgidx = PAGE_TO_PGTBL_PG(vpage);
nuclear@17 131
nuclear@17 132 if(!(pgdir[diridx] & PG_PRESENT)) {
nuclear@55 133 /* no page table present, we must allocate one */
nuclear@17 134 uint32_t addr = alloc_phys_page();
nuclear@55 135
nuclear@55 136 /* make sure all page directory entries in the below the kernel vm
nuclear@55 137 * split have the user and writable bits set, otherwise further user
nuclear@55 138 * mappings on the same 4mb block will be unusable in user space.
nuclear@55 139 */
nuclear@55 140 unsigned int pgdir_attr = attr;
nuclear@55 141 if(vpage < ADDR_TO_PAGE(KMEM_START)) {
nuclear@55 142 pgdir_attr |= PG_USER | PG_WRITABLE;
nuclear@55 143 }
nuclear@55 144
nuclear@55 145 pgdir[diridx] = addr | (pgdir_attr & ATTR_PGDIR_MASK) | PG_PRESENT;
nuclear@24 146
nuclear@24 147 pgtbl = pgon ? PGTBL(diridx) : (uint32_t*)addr;
nuclear@18 148 memset(pgtbl, 0, PGSIZE);
nuclear@17 149 } else {
nuclear@24 150 if(pgon) {
nuclear@24 151 pgtbl = PGTBL(diridx);
nuclear@24 152 } else {
nuclear@68 153 pgtbl = (uint32_t*)(pgdir[diridx] & PGENT_ADDR_MASK);
nuclear@24 154 }
nuclear@17 155 }
nuclear@17 156
nuclear@17 157 pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
nuclear@23 158 flush_tlb_page(vpage);
nuclear@23 159
nuclear@69 160 /* if it's a new *user* mapping, and there is a current process, update the vmmap */
nuclear@69 161 if((attr & PG_USER) && (p = get_current_proc())) {
nuclear@69 162 struct vm_page *page;
nuclear@69 163
nuclear@69 164 if(!(page = get_vm_page_proc(p, vpage))) {
nuclear@69 165 if(!(page = malloc(sizeof *page))) {
nuclear@69 166 panic("map_page: failed to allocate new vm_page structure");
nuclear@69 167 }
nuclear@69 168 page->vpage = vpage;
nuclear@69 169 page->ppage = ppage;
nuclear@69 170 page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
nuclear@69 171 page->nref = 1;
nuclear@69 172
nuclear@69 173 rb_inserti(&p->vmmap, vpage, page);
nuclear@69 174 } else {
nuclear@69 175 /* otherwise just update the mapping */
nuclear@69 176 page->ppage = ppage;
nuclear@69 177
nuclear@69 178 /* XXX don't touch the flags, as that's how we implement CoW
nuclear@69 179 * by changing the mapping without affecting the vm_page
nuclear@69 180 */
nuclear@69 181 }
nuclear@69 182 }
nuclear@69 183
nuclear@25 184 set_intr_state(intr_state);
nuclear@23 185 return 0;
nuclear@17 186 }
nuclear@17 187
nuclear@43 188 int unmap_page(int vpage)
nuclear@17 189 {
nuclear@17 190 uint32_t *pgtbl;
nuclear@43 191 int res = 0;
nuclear@17 192 int diridx = PAGE_TO_PGTBL(vpage);
nuclear@17 193 int pgidx = PAGE_TO_PGTBL_PG(vpage);
nuclear@17 194
nuclear@25 195 int intr_state = get_intr_state();
nuclear@25 196 disable_intr();
nuclear@25 197
nuclear@17 198 if(!(pgdir[diridx] & PG_PRESENT)) {
nuclear@17 199 goto err;
nuclear@17 200 }
nuclear@26 201 pgtbl = PGTBL(diridx);
nuclear@17 202
nuclear@17 203 if(!(pgtbl[pgidx] & PG_PRESENT)) {
nuclear@17 204 goto err;
nuclear@17 205 }
nuclear@17 206 pgtbl[pgidx] = 0;
nuclear@23 207 flush_tlb_page(vpage);
nuclear@17 208
nuclear@25 209 if(0) {
nuclear@17 210 err:
nuclear@25 211 printf("unmap_page(%d): page already not mapped\n", vpage);
nuclear@43 212 res = -1;
nuclear@25 213 }
nuclear@25 214 set_intr_state(intr_state);
nuclear@43 215 return res;
nuclear@17 216 }
nuclear@17 217
nuclear@22 218 /* if ppg_start is -1, we allocate physical pages to map with alloc_phys_page() */
nuclear@23 219 int map_page_range(int vpg_start, int pgcount, int ppg_start, unsigned int attr)
nuclear@17 220 {
nuclear@23 221 int i, phys_pg;
nuclear@17 222
nuclear@17 223 for(i=0; i<pgcount; i++) {
nuclear@26 224 phys_pg = ppg_start < 0 ? -1 : ppg_start + i;
nuclear@23 225 map_page(vpg_start + i, phys_pg, attr);
nuclear@17 226 }
nuclear@23 227 return 0;
nuclear@17 228 }
nuclear@17 229
nuclear@43 230 int unmap_page_range(int vpg_start, int pgcount)
nuclear@43 231 {
nuclear@43 232 int i, res = 0;
nuclear@43 233
nuclear@43 234 for(i=0; i<pgcount; i++) {
nuclear@43 235 if(unmap_page(vpg_start + i) == -1) {
nuclear@43 236 res = -1;
nuclear@43 237 }
nuclear@43 238 }
nuclear@43 239 return res;
nuclear@43 240 }
nuclear@43 241
nuclear@23 242 /* if paddr is 0, we allocate physical pages with alloc_phys_page() */
nuclear@23 243 int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr)
nuclear@17 244 {
nuclear@17 245 int vpg_start, ppg_start, num_pages;
nuclear@17 246
nuclear@23 247 if(!sz) return -1;
nuclear@17 248
nuclear@17 249 if(ADDR_TO_PGOFFS(paddr)) {
nuclear@17 250 panic("map_mem_range called with unaligned physical address: %x\n", paddr);
nuclear@17 251 }
nuclear@17 252
nuclear@17 253 vpg_start = ADDR_TO_PAGE(vaddr);
nuclear@23 254 ppg_start = paddr > 0 ? ADDR_TO_PAGE(paddr) : -1;
nuclear@17 255 num_pages = ADDR_TO_PAGE(sz) + 1;
nuclear@17 256
nuclear@23 257 return map_page_range(vpg_start, num_pages, ppg_start, attr);
nuclear@17 258 }
nuclear@17 259
nuclear@69 260 /* translate a virtual address to a physical address using the current page table */
nuclear@18 261 uint32_t virt_to_phys(uint32_t vaddr)
nuclear@18 262 {
nuclear@43 263 int pg;
nuclear@43 264 uint32_t pgaddr;
nuclear@43 265
nuclear@43 266 if((pg = virt_to_phys_page(ADDR_TO_PAGE(vaddr))) == -1) {
nuclear@43 267 return 0;
nuclear@43 268 }
nuclear@43 269 pgaddr = PAGE_TO_ADDR(pg);
nuclear@43 270
nuclear@43 271 return pgaddr | ADDR_TO_PGOFFS(vaddr);
nuclear@43 272 }
nuclear@43 273
nuclear@69 274 /* translate a virtual page number to a physical page number using the current page table */
nuclear@43 275 int virt_to_phys_page(int vpg)
nuclear@43 276 {
nuclear@18 277 uint32_t pgaddr, *pgtbl;
nuclear@43 278 int diridx, pgidx;
nuclear@43 279
nuclear@43 280 if(vpg < 0 || vpg >= PAGE_COUNT) {
nuclear@43 281 return -1;
nuclear@43 282 }
nuclear@43 283
nuclear@43 284 diridx = PAGE_TO_PGTBL(vpg);
nuclear@43 285 pgidx = PAGE_TO_PGTBL_PG(vpg);
nuclear@18 286
nuclear@18 287 if(!(pgdir[diridx] & PG_PRESENT)) {
nuclear@43 288 return -1;
nuclear@18 289 }
nuclear@26 290 pgtbl = PGTBL(diridx);
nuclear@18 291
nuclear@18 292 if(!(pgtbl[pgidx] & PG_PRESENT)) {
nuclear@43 293 return -1;
nuclear@18 294 }
nuclear@18 295 pgaddr = pgtbl[pgidx] & PGENT_ADDR_MASK;
nuclear@43 296 return ADDR_TO_PAGE(pgaddr);
nuclear@18 297 }
nuclear@18 298
nuclear@69 299 /* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */
nuclear@69 300 uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr)
nuclear@69 301 {
nuclear@69 302 int pg;
nuclear@69 303 uint32_t pgaddr;
nuclear@69 304
nuclear@69 305 if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) {
nuclear@69 306 return 0;
nuclear@69 307 }
nuclear@69 308 pgaddr = PAGE_TO_ADDR(pg);
nuclear@69 309
nuclear@69 310 return pgaddr | ADDR_TO_PGOFFS(vaddr);
nuclear@69 311 }
nuclear@69 312
nuclear@69 313 /* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */
nuclear@69 314 int virt_to_phys_page_proc(struct process *p, int vpg)
nuclear@69 315 {
nuclear@69 316 struct rbnode *node;
nuclear@69 317 assert(p);
nuclear@69 318
nuclear@69 319 if(!(node = rb_findi(&p->vmmap, vpg))) {
nuclear@69 320 return -1;
nuclear@69 321 }
nuclear@69 322 return ((struct vm_page*)node->data)->ppage;
nuclear@69 323 }
nuclear@69 324
nuclear@22 325 /* allocate a contiguous block of virtual memory pages along with
nuclear@22 326 * backing physical memory for them, and update the page table.
nuclear@22 327 */
nuclear@22 328 int pgalloc(int num, int area)
nuclear@22 329 {
nuclear@25 330 int intr_state, ret = -1;
nuclear@22 331 struct page_range *node, *prev, dummy;
nuclear@22 332
nuclear@25 333 intr_state = get_intr_state();
nuclear@25 334 disable_intr();
nuclear@25 335
nuclear@22 336 dummy.next = pglist[area];
nuclear@22 337 node = pglist[area];
nuclear@22 338 prev = &dummy;
nuclear@22 339
nuclear@22 340 while(node) {
nuclear@22 341 if(node->end - node->start >= num) {
nuclear@22 342 ret = node->start;
nuclear@22 343 node->start += num;
nuclear@22 344
nuclear@22 345 if(node->start == node->end) {
nuclear@22 346 prev->next = node->next;
nuclear@22 347 node->next = 0;
nuclear@22 348
nuclear@22 349 if(node == pglist[area]) {
nuclear@22 350 pglist[area] = 0;
nuclear@22 351 }
nuclear@22 352 free_node(node);
nuclear@22 353 }
nuclear@22 354 break;
nuclear@22 355 }
nuclear@22 356
nuclear@22 357 prev = node;
nuclear@22 358 node = node->next;
nuclear@22 359 }
nuclear@22 360
nuclear@22 361 if(ret >= 0) {
nuclear@55 362 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
nuclear@55 363 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
nuclear@55 364
nuclear@23 365 /* allocate physical storage and map */
nuclear@45 366 if(map_page_range(ret, num, -1, attr) == -1) {
nuclear@45 367 ret = -1;
nuclear@45 368 }
nuclear@45 369 }
nuclear@45 370
nuclear@45 371 set_intr_state(intr_state);
nuclear@45 372 return ret;
nuclear@45 373 }
nuclear@45 374
nuclear@45 375 int pgalloc_vrange(int start, int num)
nuclear@45 376 {
nuclear@45 377 struct page_range *node, *prev, dummy;
nuclear@45 378 int area, intr_state, ret = -1;
nuclear@45 379
nuclear@45 380 area = (start >= ADDR_TO_PAGE(KMEM_START)) ? MEM_KERNEL : MEM_USER;
nuclear@47 381 if(area == MEM_USER && start + num > ADDR_TO_PAGE(KMEM_START)) {
nuclear@45 382 printf("pgalloc_vrange: invalid range request crossing user/kernel split\n");
nuclear@45 383 return -1;
nuclear@45 384 }
nuclear@45 385
nuclear@45 386 intr_state = get_intr_state();
nuclear@45 387 disable_intr();
nuclear@45 388
nuclear@45 389 dummy.next = pglist[area];
nuclear@45 390 node = pglist[area];
nuclear@45 391 prev = &dummy;
nuclear@45 392
nuclear@45 393 /* check to see if the requested VM range is available */
nuclear@45 394 node = pglist[area];
nuclear@45 395 while(node) {
nuclear@45 396 if(start >= node->start && start + num <= node->end) {
nuclear@49 397 ret = start; /* can do .. */
nuclear@49 398
nuclear@49 399 if(start == node->start) {
nuclear@49 400 /* adjacent to the start of the range */
nuclear@49 401 node->start += num;
nuclear@49 402 } else if(start + num == node->end) {
nuclear@49 403 /* adjacent to the end of the range */
nuclear@49 404 node->end = start;
nuclear@49 405 } else {
nuclear@49 406 /* somewhere in the middle, which means we need
nuclear@49 407 * to allocate a new page_range
nuclear@49 408 */
nuclear@49 409 struct page_range *newnode;
nuclear@49 410
nuclear@49 411 if(!(newnode = alloc_node())) {
nuclear@49 412 panic("pgalloc_vrange failed to allocate new page_range while splitting a range in half... bummer\n");
nuclear@49 413 }
nuclear@49 414 newnode->start = start + num;
nuclear@49 415 newnode->end = node->end;
nuclear@49 416 newnode->next = node->next;
nuclear@49 417
nuclear@49 418 node->end = start;
nuclear@49 419 node->next = newnode;
nuclear@49 420 /* no need to check for null nodes at this point, there's
nuclear@49 421 * certainly stuff at the begining and the end, otherwise we
nuclear@49 422 * wouldn't be here. so break out of it.
nuclear@49 423 */
nuclear@49 424 break;
nuclear@49 425 }
nuclear@45 426
nuclear@45 427 if(node->start == node->end) {
nuclear@45 428 prev->next = node->next;
nuclear@45 429 node->next = 0;
nuclear@45 430
nuclear@45 431 if(node == pglist[area]) {
nuclear@45 432 pglist[area] = 0;
nuclear@45 433 }
nuclear@45 434 free_node(node);
nuclear@45 435 }
nuclear@45 436 break;
nuclear@45 437 }
nuclear@45 438
nuclear@45 439 prev = node;
nuclear@45 440 node = node->next;
nuclear@45 441 }
nuclear@45 442
nuclear@45 443 if(ret >= 0) {
nuclear@55 444 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
nuclear@55 445 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
nuclear@55 446
nuclear@45 447 /* allocate physical storage and map */
nuclear@45 448 if(map_page_range(ret, num, -1, attr) == -1) {
nuclear@23 449 ret = -1;
nuclear@23 450 }
nuclear@22 451 }
nuclear@22 452
nuclear@25 453 set_intr_state(intr_state);
nuclear@22 454 return ret;
nuclear@22 455 }
nuclear@22 456
nuclear@22 457 void pgfree(int start, int num)
nuclear@22 458 {
nuclear@33 459 int i, area, intr_state;
nuclear@23 460 struct page_range *node, *new, *prev, *next;
nuclear@23 461
nuclear@25 462 intr_state = get_intr_state();
nuclear@25 463 disable_intr();
nuclear@25 464
nuclear@26 465 for(i=0; i<num; i++) {
nuclear@43 466 int phys_pg = virt_to_phys_page(start + i);
nuclear@43 467 if(phys_pg != -1) {
nuclear@43 468 free_phys_page(phys_pg);
nuclear@26 469 }
nuclear@26 470 }
nuclear@26 471
nuclear@23 472 if(!(new = alloc_node())) {
nuclear@23 473 panic("pgfree: can't allocate new page_range node to add the freed pages\n");
nuclear@23 474 }
nuclear@23 475 new->start = start;
nuclear@33 476 new->end = start + num;
nuclear@23 477
nuclear@23 478 area = PAGE_TO_ADDR(start) >= KMEM_START ? MEM_KERNEL : MEM_USER;
nuclear@23 479
nuclear@23 480 if(!pglist[area] || pglist[area]->start > start) {
nuclear@23 481 next = new->next = pglist[area];
nuclear@23 482 pglist[area] = new;
nuclear@23 483 prev = 0;
nuclear@23 484
nuclear@23 485 } else {
nuclear@23 486
nuclear@23 487 prev = 0;
nuclear@23 488 node = pglist[area];
nuclear@23 489 next = node ? node->next : 0;
nuclear@23 490
nuclear@23 491 while(node) {
nuclear@23 492 if(!next || next->start > start) {
nuclear@23 493 /* place here, after node */
nuclear@23 494 new->next = next;
nuclear@23 495 node->next = new;
nuclear@23 496 prev = node; /* needed by coalesce after the loop */
nuclear@23 497 break;
nuclear@23 498 }
nuclear@23 499
nuclear@23 500 prev = node;
nuclear@23 501 node = next;
nuclear@23 502 next = node ? node->next : 0;
nuclear@23 503 }
nuclear@23 504 }
nuclear@23 505
nuclear@23 506 coalesce(prev, new, next);
nuclear@25 507 set_intr_state(intr_state);
nuclear@23 508 }
nuclear@23 509
nuclear@23 510 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high)
nuclear@23 511 {
nuclear@23 512 if(high) {
nuclear@23 513 if(mid->end == high->start) {
nuclear@23 514 mid->end = high->end;
nuclear@23 515 mid->next = high->next;
nuclear@23 516 free_node(high);
nuclear@23 517 }
nuclear@23 518 }
nuclear@23 519
nuclear@23 520 if(low) {
nuclear@23 521 if(low->end == mid->start) {
nuclear@23 522 low->end += mid->end;
nuclear@23 523 low->next = mid->next;
nuclear@23 524 free_node(mid);
nuclear@23 525 }
nuclear@23 526 }
nuclear@22 527 }
nuclear@22 528
nuclear@52 529 static void pgfault(int inum)
nuclear@17 530 {
nuclear@52 531 struct intr_frame *frm = get_intr_frame();
nuclear@52 532 uint32_t fault_addr = get_fault_addr();
nuclear@52 533
nuclear@52 534 /* the fault occured in user space */
nuclear@55 535 if(frm->err & PG_USER) {
nuclear@52 536 int fault_page = ADDR_TO_PAGE(fault_addr);
nuclear@52 537 struct process *proc = get_current_proc();
nuclear@69 538 printf("DBG: page fault in user space (pid: %d)\n", proc->id);
nuclear@52 539 assert(proc);
nuclear@52 540
nuclear@52 541 if(frm->err & PG_PRESENT) {
nuclear@69 542 /* it's not due to a missing page fetch the attributes */
nuclear@69 543 int pgnum = ADDR_TO_PAGE(fault_addr);
nuclear@69 544
nuclear@69 545 if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) {
nuclear@69 546 /* write permission fault might be a CoW fault or just an error
nuclear@69 547 * fetch the vm_page permissions to check if this is suppoosed to be
nuclear@69 548 * a writable page (which means we should CoW).
nuclear@69 549 */
nuclear@69 550 struct vm_page *page = get_vm_page_proc(proc, pgnum);
nuclear@69 551
nuclear@69 552 if(page->flags & PG_WRITABLE) {
nuclear@69 553 /* ok this is a CoW fault */
nuclear@69 554 if(copy_on_write(page) == -1) {
nuclear@69 555 panic("copy on write failed!");
nuclear@69 556 }
nuclear@69 557 return; /* done, allow the process to restart the instruction and continue */
nuclear@69 558 } else {
nuclear@69 559 /* TODO eventually we'll SIGSEGV the process, for now just panic.
nuclear@69 560 */
nuclear@69 561 goto unhandled;
nuclear@69 562 }
nuclear@69 563 }
nuclear@52 564 goto unhandled;
nuclear@52 565 }
nuclear@52 566
nuclear@69 567 /* so it's a missing page... ok */
nuclear@69 568
nuclear@52 569 /* detect if it's an automatic stack growth deal */
nuclear@55 570 if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) {
nuclear@55 571 int num_pages = proc->user_stack_pg - fault_page;
nuclear@52 572 printf("growing user (%d) stack by %d pages\n", proc->id, num_pages);
nuclear@52 573
nuclear@52 574 if(pgalloc_vrange(fault_page, num_pages) != fault_page) {
nuclear@52 575 printf("failed to allocate VM for stack growth\n");
nuclear@52 576 /* TODO: in the future we'd SIGSEGV the process here, for now just panic */
nuclear@52 577 goto unhandled;
nuclear@52 578 }
nuclear@55 579 proc->user_stack_pg = fault_page;
nuclear@52 580 return;
nuclear@52 581 }
nuclear@69 582
nuclear@69 583 /* it's not a stack growth fault. since we don't do swapping yet, just
nuclear@69 584 * fall to unhandled and panic
nuclear@69 585 */
nuclear@52 586 }
nuclear@52 587
nuclear@52 588 unhandled:
nuclear@17 589 printf("~~~~ PAGE FAULT ~~~~\n");
nuclear@52 590 printf("fault address: %x\n", fault_addr);
nuclear@69 591 printf("error code: %x\n", frm->err);
nuclear@17 592
nuclear@51 593 if(frm->err & PG_PRESENT) {
nuclear@51 594 if(frm->err & 8) {
nuclear@17 595 printf("reserved bit set in some paging structure\n");
nuclear@17 596 } else {
nuclear@55 597 printf("%s protection violation ", (frm->err & PG_WRITABLE) ? "WRITE" : "READ");
nuclear@55 598 printf("in %s mode\n", (frm->err & PG_USER) ? "user" : "kernel");
nuclear@17 599 }
nuclear@17 600 } else {
nuclear@17 601 printf("page not present\n");
nuclear@17 602 }
nuclear@19 603
nuclear@19 604 panic("unhandled page fault\n");
nuclear@17 605 }
nuclear@22 606
nuclear@69 607 /* copy-on-write handler, called from pgfault above */
nuclear@69 608 static int copy_on_write(struct vm_page *page)
nuclear@69 609 {
nuclear@69 610 uint32_t newphys;
nuclear@69 611 struct vm_page *newpage;
nuclear@69 612 struct rbnode *vmnode;
nuclear@69 613 struct process *p = get_current_proc();
nuclear@69 614
nuclear@69 615 assert(page->nref > 0);
nuclear@69 616
nuclear@69 617 /* first of all check the refcount. If it's 1 then we don't need to copy
nuclear@69 618 * anything. This will happen when all forked processes except one have
nuclear@69 619 * marked this read-write again after faulting.
nuclear@69 620 */
nuclear@69 621 if(page->nref == 1) {
nuclear@69 622 set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY);
nuclear@69 623 return 0;
nuclear@69 624 }
nuclear@69 625
nuclear@69 626 /* ok let's make a copy and mark it read-write */
nuclear@69 627 if(!(newpage = malloc(sizeof *newpage))) {
nuclear@69 628 printf("copy_on_write: failed to allocate new vm_page\n");
nuclear@69 629 return -1;
nuclear@69 630 }
nuclear@69 631 newpage->vpage = page->vpage;
nuclear@69 632 newpage->flags = page->flags;
nuclear@69 633
nuclear@69 634 if(!(newphys = alloc_phys_page())) {
nuclear@69 635 printf("copy_on_write: failed to allocate physical page\n");
nuclear@69 636 /* XXX proper action: SIGSEGV */
nuclear@69 637 return -1;
nuclear@69 638 }
nuclear@69 639 newpage->ppage = ADDR_TO_PAGE(newphys);
nuclear@69 640 newpage->nref = 1;
nuclear@69 641
nuclear@69 642 /* set the new vm_page in the process vmmap */
nuclear@69 643 vmnode = rb_findi(&p->vmmap, newpage->vpage);
nuclear@69 644 assert(vmnode && vmnode->data == page); /* shouldn't be able to fail */
nuclear@69 645 vmnode->data = newpage;
nuclear@69 646
nuclear@69 647 /* also update tha page table */
nuclear@69 648 map_page(newpage->vpage, newpage->ppage, newpage->flags);
nuclear@69 649
nuclear@69 650 /* finally decrease the refcount at the original vm_page struct */
nuclear@69 651 page->nref--;
nuclear@69 652 return 0;
nuclear@69 653 }
nuclear@69 654
nuclear@22 655 /* --- page range list node management --- */
nuclear@23 656 #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range))
nuclear@23 657
nuclear@22 658 static struct page_range *alloc_node(void)
nuclear@22 659 {
nuclear@22 660 struct page_range *node;
nuclear@23 661 int pg, i;
nuclear@22 662
nuclear@22 663 if(node_pool) {
nuclear@22 664 node = node_pool;
nuclear@22 665 node_pool = node_pool->next;
nuclear@47 666 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
nuclear@22 667 return node;
nuclear@22 668 }
nuclear@22 669
nuclear@23 670 /* no node structures in the pool, we need to allocate a new page,
nuclear@23 671 * split it up into node structures, add them in the pool, and
nuclear@23 672 * allocate one of them.
nuclear@22 673 */
nuclear@23 674 if(!(pg = pgalloc(1, MEM_KERNEL))) {
nuclear@22 675 panic("ran out of physical memory while allocating VM range structures\n");
nuclear@22 676 }
nuclear@23 677 node_pool = (struct page_range*)PAGE_TO_ADDR(pg);
nuclear@22 678
nuclear@23 679 /* link them up, skip the first as we'll just allocate it anyway */
nuclear@23 680 for(i=2; i<NODES_IN_PAGE; i++) {
nuclear@23 681 node_pool[i - 1].next = node_pool + i;
nuclear@23 682 }
nuclear@23 683 node_pool[NODES_IN_PAGE - 1].next = 0;
nuclear@23 684
nuclear@23 685 /* grab the first and return it */
nuclear@23 686 node = node_pool++;
nuclear@47 687 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
nuclear@23 688 return node;
nuclear@22 689 }
nuclear@22 690
nuclear@22 691 static void free_node(struct page_range *node)
nuclear@22 692 {
nuclear@22 693 node->next = node_pool;
nuclear@22 694 node_pool = node;
nuclear@47 695 /*printf("free_node\n");*/
nuclear@22 696 }
nuclear@23 697
nuclear@47 698 /* clone_vm makes a copy of the current page tables, thus duplicating the
nuclear@47 699 * virtual address space.
nuclear@47 700 *
nuclear@47 701 * For the kernel part of the address space (last 256 page directory entries)
nuclear@47 702 * we don't want to diplicate the page tables, just point all page directory
nuclear@47 703 * entries to the same set of page tables.
nuclear@43 704 *
nuclear@57 705 * If "cow" is non-zero it also marks the shared user-space pages as
nuclear@57 706 * read-only, to implement copy-on-write.
nuclear@43 707 */
nuclear@69 708 void clone_vm(struct process *pdest, struct process *psrc, int cow)
nuclear@43 709 {
nuclear@57 710 int i, j, dirpg, tblpg, kstart_dirent;
nuclear@43 711 uint32_t paddr;
nuclear@43 712 uint32_t *ndir, *ntbl;
nuclear@69 713 struct rbnode *vmnode;
nuclear@43 714
nuclear@47 715 /* allocate the new page directory */
nuclear@43 716 if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) {
nuclear@43 717 panic("clone_vmem: failed to allocate page directory page\n");
nuclear@43 718 }
nuclear@43 719 ndir = (uint32_t*)PAGE_TO_ADDR(dirpg);
nuclear@43 720
nuclear@47 721 /* allocate a virtual page for temporarily mapping all new
nuclear@47 722 * page tables while we populate them.
nuclear@47 723 */
nuclear@43 724 if((tblpg = pgalloc(1, MEM_KERNEL)) == -1) {
nuclear@43 725 panic("clone_vmem: failed to allocate page table page\n");
nuclear@43 726 }
nuclear@43 727 ntbl = (uint32_t*)PAGE_TO_ADDR(tblpg);
nuclear@43 728
nuclear@43 729 /* we will allocate physical pages and map them to this virtual page
nuclear@57 730 * as needed in the loop below. we don't need the physical page allocated
nuclear@57 731 * by pgalloc.
nuclear@43 732 */
nuclear@49 733 free_phys_page(virt_to_phys((uint32_t)ntbl));
nuclear@43 734
nuclear@48 735 kstart_dirent = ADDR_TO_PAGE(KMEM_START) / 1024;
nuclear@47 736
nuclear@47 737 /* user space */
nuclear@48 738 for(i=0; i<kstart_dirent; i++) {
nuclear@43 739 if(pgdir[i] & PG_PRESENT) {
nuclear@64 740 if(cow) {
nuclear@64 741 /* first go through all the entries of the existing
nuclear@64 742 * page table and unset the writable bits.
nuclear@64 743 */
nuclear@64 744 for(j=0; j<1024; j++) {
nuclear@69 745 if(PGTBL(i)[j] & PG_PRESENT) {
nuclear@69 746 clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY);
nuclear@69 747 /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/
nuclear@69 748 }
nuclear@64 749 }
nuclear@57 750 }
nuclear@57 751
nuclear@57 752 /* allocate a page table for the clone */
nuclear@43 753 paddr = alloc_phys_page();
nuclear@43 754
nuclear@43 755 /* copy the page table */
nuclear@57 756 map_page(tblpg, ADDR_TO_PAGE(paddr), 0);
nuclear@43 757 memcpy(ntbl, PGTBL(i), PGSIZE);
nuclear@43 758
nuclear@43 759 /* set the new page directory entry */
nuclear@43 760 ndir[i] = paddr | (pgdir[i] & PGOFFS_MASK);
nuclear@43 761 } else {
nuclear@43 762 ndir[i] = 0;
nuclear@43 763 }
nuclear@43 764 }
nuclear@43 765
nuclear@69 766 /* make a copy of the parent's vmmap tree pointing to the same vm_pages
nuclear@69 767 * and increase the reference counters for all vm_pages.
nuclear@69 768 */
nuclear@69 769 rb_init(&pdest->vmmap, RB_KEY_INT);
nuclear@69 770 rb_begin(&psrc->vmmap);
nuclear@69 771 while((vmnode = rb_next(&psrc->vmmap))) {
nuclear@69 772 struct vm_page *pg = vmnode->data;
nuclear@69 773 pg->nref++;
nuclear@69 774
nuclear@69 775 /* insert the same vm_page to the new tree */
nuclear@69 776 rb_inserti(&pdest->vmmap, pg->vpage, pg);
nuclear@69 777 }
nuclear@69 778
nuclear@55 779 /* for the kernel space we'll just use the same page tables */
nuclear@70 780 for(i=kstart_dirent; i<1023; i++) {
nuclear@49 781 ndir[i] = pgdir[i];
nuclear@47 782 }
nuclear@70 783
nuclear@70 784 /* also point the last page directory entry to the page directory address
nuclear@70 785 * since we're relying on recursive page tables
nuclear@70 786 */
nuclear@69 787 paddr = virt_to_phys((uint32_t)ndir);
nuclear@69 788 ndir[1023] = paddr | PG_PRESENT;
nuclear@47 789
nuclear@64 790 if(cow) {
nuclear@64 791 /* we just changed all the page protection bits, so we need to flush the TLB */
nuclear@64 792 flush_tlb();
nuclear@64 793 }
nuclear@57 794
nuclear@57 795 /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */
nuclear@43 796 unmap_page(dirpg);
nuclear@43 797 unmap_page(tblpg);
nuclear@43 798
nuclear@43 799 pgfree(dirpg, 1);
nuclear@43 800 pgfree(tblpg, 1);
nuclear@43 801
nuclear@69 802 /* set the new page directory pointer */
nuclear@69 803 pdest->ctx.pgtbl_paddr = paddr;
nuclear@43 804 }
nuclear@57 805
nuclear@57 806 int get_page_bit(int pgnum, uint32_t bit, int wholepath)
nuclear@57 807 {
nuclear@57 808 int tidx = PAGE_TO_PGTBL(pgnum);
nuclear@57 809 int tent = PAGE_TO_PGTBL_PG(pgnum);
nuclear@57 810 uint32_t *pgtbl = PGTBL(tidx);
nuclear@57 811
nuclear@57 812 if(wholepath) {
nuclear@57 813 if((pgdir[tidx] & bit) == 0) {
nuclear@57 814 return 0;
nuclear@57 815 }
nuclear@57 816 }
nuclear@57 817
nuclear@57 818 return pgtbl[tent] & bit;
nuclear@57 819 }
nuclear@57 820
nuclear@57 821 void set_page_bit(int pgnum, uint32_t bit, int wholepath)
nuclear@57 822 {
nuclear@57 823 int tidx = PAGE_TO_PGTBL(pgnum);
nuclear@57 824 int tent = PAGE_TO_PGTBL_PG(pgnum);
nuclear@57 825 uint32_t *pgtbl = PGTBL(tidx);
nuclear@57 826
nuclear@57 827 if(wholepath) {
nuclear@57 828 pgdir[tidx] |= bit;
nuclear@57 829 }
nuclear@57 830 pgtbl[tent] |= bit;
nuclear@57 831
nuclear@57 832 flush_tlb_page(pgnum);
nuclear@57 833 }
nuclear@57 834
nuclear@57 835 void clear_page_bit(int pgnum, uint32_t bit, int wholepath)
nuclear@57 836 {
nuclear@57 837 int tidx = PAGE_TO_PGTBL(pgnum);
nuclear@57 838 int tent = PAGE_TO_PGTBL_PG(pgnum);
nuclear@57 839 uint32_t *pgtbl = PGTBL(tidx);
nuclear@57 840
nuclear@57 841 if(wholepath) {
nuclear@57 842 pgdir[tidx] &= ~bit;
nuclear@57 843 }
nuclear@57 844
nuclear@57 845 pgtbl[tent] &= ~bit;
nuclear@57 846
nuclear@57 847 flush_tlb_page(pgnum);
nuclear@57 848 }
nuclear@43 849
nuclear@43 850
nuclear@68 851 #define USER_PGDIR_ENTRIES PAGE_TO_PGTBL(KMEM_START_PAGE)
nuclear@68 852 int cons_vmmap(struct rbtree *vmmap)
nuclear@68 853 {
nuclear@68 854 int i, j;
nuclear@68 855
nuclear@68 856 rb_init(vmmap, RB_KEY_INT);
nuclear@68 857
nuclear@68 858 for(i=0; i<USER_PGDIR_ENTRIES; i++) {
nuclear@68 859 if(pgdir[i] & PG_PRESENT) {
nuclear@68 860 /* page table is present, iterate through its 1024 pages */
nuclear@68 861 uint32_t *pgtbl = PGTBL(i);
nuclear@68 862
nuclear@68 863 for(j=0; j<1024; j++) {
nuclear@68 864 if(pgtbl[j] & PG_PRESENT) {
nuclear@68 865 struct vm_page *vmp;
nuclear@68 866
nuclear@68 867 if(!(vmp = malloc(sizeof *vmp))) {
nuclear@68 868 panic("cons_vmap failed to allocate memory");
nuclear@68 869 }
nuclear@68 870 vmp->vpage = i * 1024 + j;
nuclear@68 871 vmp->ppage = ADDR_TO_PAGE(pgtbl[j] & PGENT_ADDR_MASK);
nuclear@68 872 vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK;
nuclear@68 873 vmp->nref = 1; /* when first created assume no sharing */
nuclear@68 874
nuclear@69 875 rb_inserti(vmmap, vmp->vpage, vmp);
nuclear@68 876 }
nuclear@68 877 }
nuclear@68 878 }
nuclear@68 879 }
nuclear@68 880
nuclear@68 881 return 0;
nuclear@68 882 }
nuclear@68 883
nuclear@69 884 struct vm_page *get_vm_page(int vpg)
nuclear@69 885 {
nuclear@69 886 return get_vm_page_proc(get_current_proc(), vpg);
nuclear@69 887 }
nuclear@69 888
nuclear@69 889 struct vm_page *get_vm_page_proc(struct process *p, int vpg)
nuclear@69 890 {
nuclear@69 891 struct rbnode *node;
nuclear@69 892
nuclear@69 893 if(!p || !(node = rb_findi(&p->vmmap, vpg))) {
nuclear@69 894 return 0;
nuclear@69 895 }
nuclear@69 896 return node->data;
nuclear@69 897 }
nuclear@69 898
nuclear@68 899
nuclear@23 900 void dbg_print_vm(int area)
nuclear@23 901 {
nuclear@25 902 struct page_range *node;
nuclear@25 903 int last, intr_state;
nuclear@25 904
nuclear@25 905 intr_state = get_intr_state();
nuclear@25 906 disable_intr();
nuclear@25 907
nuclear@25 908 node = pglist[area];
nuclear@25 909 last = area == MEM_USER ? 0 : ADDR_TO_PAGE(KMEM_START);
nuclear@23 910
nuclear@23 911 printf("%s vm space\n", area == MEM_USER ? "user" : "kernel");
nuclear@23 912
nuclear@23 913 while(node) {
nuclear@23 914 if(node->start > last) {
nuclear@23 915 printf(" vm-used: %x -> %x\n", PAGE_TO_ADDR(last), PAGE_TO_ADDR(node->start));
nuclear@23 916 }
nuclear@23 917
nuclear@23 918 printf(" vm-free: %x -> ", PAGE_TO_ADDR(node->start));
nuclear@23 919 if(node->end >= PAGE_COUNT) {
nuclear@23 920 printf("END\n");
nuclear@23 921 } else {
nuclear@23 922 printf("%x\n", PAGE_TO_ADDR(node->end));
nuclear@23 923 }
nuclear@23 924
nuclear@23 925 last = node->end;
nuclear@23 926 node = node->next;
nuclear@23 927 }
nuclear@25 928
nuclear@25 929 set_intr_state(intr_state);
nuclear@23 930 }