kern

annotate src/vm.c @ 80:4db99a52863e

fixed the "endianess" of the text messages in the ATA identify info block. this is the first time I've seen wrong byteorder in ascii text, the ATA committee should be commended.
author John Tsiombikas <nuclear@member.fsf.org>
date Tue, 06 Dec 2011 13:35:39 +0200
parents d3601789d638
children
rev   line source
nuclear@17 1 #include <stdio.h>
nuclear@17 2 #include <string.h>
nuclear@17 3 #include <inttypes.h>
nuclear@52 4 #include <assert.h>
nuclear@52 5 #include "config.h"
nuclear@17 6 #include "vm.h"
nuclear@17 7 #include "intr.h"
nuclear@17 8 #include "mem.h"
nuclear@17 9 #include "panic.h"
nuclear@52 10 #include "proc.h"
nuclear@17 11
nuclear@17 12 #define IDMAP_START 0xa0000
nuclear@17 13
nuclear@24 14 #define PGDIR_ADDR 0xfffff000
nuclear@24 15 #define PGTBL_BASE (0xffffffff - 4096 * 1024 + 1)
nuclear@24 16 #define PGTBL(x) ((uint32_t*)(PGTBL_BASE + PGSIZE * (x)))
nuclear@24 17
nuclear@17 18 #define ATTR_PGDIR_MASK 0x3f
nuclear@17 19 #define ATTR_PGTBL_MASK 0x1ff
nuclear@17 20
nuclear@17 21 #define PAGEFAULT 14
nuclear@17 22
nuclear@22 23
nuclear@22 24 struct page_range {
nuclear@22 25 int start, end;
nuclear@22 26 struct page_range *next;
nuclear@22 27 };
nuclear@22 28
nuclear@22 29 /* defined in vm-asm.S */
nuclear@22 30 void enable_paging(void);
nuclear@23 31 void disable_paging(void);
nuclear@23 32 int get_paging_status(void);
nuclear@22 33 void set_pgdir_addr(uint32_t addr);
nuclear@23 34 void flush_tlb(void);
nuclear@23 35 void flush_tlb_addr(uint32_t addr);
nuclear@23 36 #define flush_tlb_page(p) flush_tlb_addr(PAGE_TO_ADDR(p))
nuclear@22 37 uint32_t get_fault_addr(void);
nuclear@22 38
nuclear@23 39 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high);
nuclear@52 40 static void pgfault(int inum);
nuclear@69 41 static int copy_on_write(struct vm_page *page);
nuclear@22 42 static struct page_range *alloc_node(void);
nuclear@22 43 static void free_node(struct page_range *node);
nuclear@22 44
nuclear@22 45 /* page directory */
nuclear@22 46 static uint32_t *pgdir;
nuclear@22 47
nuclear@22 48 /* 2 lists of free ranges, for kernel memory and user memory */
nuclear@22 49 static struct page_range *pglist[2];
nuclear@22 50 /* list of free page_range structures to be used in the lists */
nuclear@22 51 static struct page_range *node_pool;
nuclear@23 52 /* the first page range for the whole kernel address space, to get things started */
nuclear@23 53 static struct page_range first_node;
nuclear@22 54
nuclear@22 55
nuclear@26 56 void init_vm(void)
nuclear@17 57 {
nuclear@19 58 uint32_t idmap_end;
nuclear@47 59 int i, kmem_start_pg, pgtbl_base_pg;
nuclear@19 60
nuclear@23 61 /* setup the page tables */
nuclear@18 62 pgdir = (uint32_t*)alloc_phys_page();
nuclear@23 63 memset(pgdir, 0, PGSIZE);
nuclear@24 64 set_pgdir_addr((uint32_t)pgdir);
nuclear@17 65
nuclear@17 66 /* map the video memory and kernel code 1-1 */
nuclear@19 67 get_kernel_mem_range(0, &idmap_end);
nuclear@19 68 map_mem_range(IDMAP_START, idmap_end - IDMAP_START, IDMAP_START, 0);
nuclear@17 69
nuclear@24 70 /* make the last page directory entry point to the page directory */
nuclear@68 71 pgdir[1023] = ((uint32_t)pgdir & PGENT_ADDR_MASK) | PG_PRESENT;
nuclear@24 72 pgdir = (uint32_t*)PGDIR_ADDR;
nuclear@24 73
nuclear@23 74 /* set the page fault handler */
nuclear@17 75 interrupt(PAGEFAULT, pgfault);
nuclear@17 76
nuclear@23 77 /* we can enable paging now */
nuclear@17 78 enable_paging();
nuclear@23 79
nuclear@23 80 /* initialize the virtual page allocator */
nuclear@23 81 node_pool = 0;
nuclear@23 82
nuclear@47 83 kmem_start_pg = ADDR_TO_PAGE(KMEM_START);
nuclear@47 84 pgtbl_base_pg = ADDR_TO_PAGE(PGTBL_BASE);
nuclear@47 85
nuclear@47 86 first_node.start = kmem_start_pg;
nuclear@47 87 first_node.end = pgtbl_base_pg;
nuclear@23 88 first_node.next = 0;
nuclear@23 89 pglist[MEM_KERNEL] = &first_node;
nuclear@23 90
nuclear@23 91 pglist[MEM_USER] = alloc_node();
nuclear@26 92 pglist[MEM_USER]->start = ADDR_TO_PAGE(idmap_end);
nuclear@47 93 pglist[MEM_USER]->end = kmem_start_pg;
nuclear@23 94 pglist[MEM_USER]->next = 0;
nuclear@47 95
nuclear@76 96 /* temporarily map something into every 1024th page of the kernel address
nuclear@47 97 * space to force pre-allocation of all the kernel page-tables
nuclear@47 98 */
nuclear@47 99 for(i=kmem_start_pg; i<pgtbl_base_pg; i+=1024) {
nuclear@47 100 /* if there's already something mapped here, leave it alone */
nuclear@47 101 if(virt_to_phys_page(i) == -1) {
nuclear@47 102 map_page(i, 0, 0);
nuclear@47 103 unmap_page(i);
nuclear@47 104 }
nuclear@47 105 }
nuclear@17 106 }
nuclear@17 107
nuclear@23 108 /* if ppage == -1 we allocate a physical page by calling alloc_phys_page */
nuclear@23 109 int map_page(int vpage, int ppage, unsigned int attr)
nuclear@17 110 {
nuclear@17 111 uint32_t *pgtbl;
nuclear@25 112 int diridx, pgidx, pgon, intr_state;
nuclear@69 113 struct process *p;
nuclear@25 114
nuclear@25 115 intr_state = get_intr_state();
nuclear@25 116 disable_intr();
nuclear@23 117
nuclear@23 118 pgon = get_paging_status();
nuclear@23 119
nuclear@23 120 if(ppage < 0) {
nuclear@23 121 uint32_t addr = alloc_phys_page();
nuclear@23 122 if(!addr) {
nuclear@25 123 set_intr_state(intr_state);
nuclear@23 124 return -1;
nuclear@23 125 }
nuclear@23 126 ppage = ADDR_TO_PAGE(addr);
nuclear@23 127 }
nuclear@23 128
nuclear@23 129 diridx = PAGE_TO_PGTBL(vpage);
nuclear@23 130 pgidx = PAGE_TO_PGTBL_PG(vpage);
nuclear@17 131
nuclear@17 132 if(!(pgdir[diridx] & PG_PRESENT)) {
nuclear@55 133 /* no page table present, we must allocate one */
nuclear@17 134 uint32_t addr = alloc_phys_page();
nuclear@55 135
nuclear@55 136 /* make sure all page directory entries in the below the kernel vm
nuclear@55 137 * split have the user and writable bits set, otherwise further user
nuclear@55 138 * mappings on the same 4mb block will be unusable in user space.
nuclear@55 139 */
nuclear@55 140 unsigned int pgdir_attr = attr;
nuclear@55 141 if(vpage < ADDR_TO_PAGE(KMEM_START)) {
nuclear@55 142 pgdir_attr |= PG_USER | PG_WRITABLE;
nuclear@55 143 }
nuclear@55 144
nuclear@55 145 pgdir[diridx] = addr | (pgdir_attr & ATTR_PGDIR_MASK) | PG_PRESENT;
nuclear@24 146
nuclear@24 147 pgtbl = pgon ? PGTBL(diridx) : (uint32_t*)addr;
nuclear@18 148 memset(pgtbl, 0, PGSIZE);
nuclear@17 149 } else {
nuclear@24 150 if(pgon) {
nuclear@24 151 pgtbl = PGTBL(diridx);
nuclear@24 152 } else {
nuclear@68 153 pgtbl = (uint32_t*)(pgdir[diridx] & PGENT_ADDR_MASK);
nuclear@24 154 }
nuclear@17 155 }
nuclear@17 156
nuclear@17 157 pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
nuclear@23 158 flush_tlb_page(vpage);
nuclear@23 159
nuclear@69 160 /* if it's a new *user* mapping, and there is a current process, update the vmmap */
nuclear@69 161 if((attr & PG_USER) && (p = get_current_proc())) {
nuclear@69 162 struct vm_page *page;
nuclear@69 163
nuclear@69 164 if(!(page = get_vm_page_proc(p, vpage))) {
nuclear@69 165 if(!(page = malloc(sizeof *page))) {
nuclear@69 166 panic("map_page: failed to allocate new vm_page structure");
nuclear@69 167 }
nuclear@69 168 page->vpage = vpage;
nuclear@69 169 page->ppage = ppage;
nuclear@69 170 page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
nuclear@69 171 page->nref = 1;
nuclear@69 172
nuclear@69 173 rb_inserti(&p->vmmap, vpage, page);
nuclear@69 174 } else {
nuclear@69 175 /* otherwise just update the mapping */
nuclear@69 176 page->ppage = ppage;
nuclear@69 177
nuclear@69 178 /* XXX don't touch the flags, as that's how we implement CoW
nuclear@69 179 * by changing the mapping without affecting the vm_page
nuclear@69 180 */
nuclear@69 181 }
nuclear@69 182 }
nuclear@69 183
nuclear@25 184 set_intr_state(intr_state);
nuclear@23 185 return 0;
nuclear@17 186 }
nuclear@17 187
nuclear@43 188 int unmap_page(int vpage)
nuclear@17 189 {
nuclear@17 190 uint32_t *pgtbl;
nuclear@43 191 int res = 0;
nuclear@17 192 int diridx = PAGE_TO_PGTBL(vpage);
nuclear@17 193 int pgidx = PAGE_TO_PGTBL_PG(vpage);
nuclear@17 194
nuclear@25 195 int intr_state = get_intr_state();
nuclear@25 196 disable_intr();
nuclear@25 197
nuclear@17 198 if(!(pgdir[diridx] & PG_PRESENT)) {
nuclear@17 199 goto err;
nuclear@17 200 }
nuclear@26 201 pgtbl = PGTBL(diridx);
nuclear@17 202
nuclear@17 203 if(!(pgtbl[pgidx] & PG_PRESENT)) {
nuclear@17 204 goto err;
nuclear@17 205 }
nuclear@17 206 pgtbl[pgidx] = 0;
nuclear@23 207 flush_tlb_page(vpage);
nuclear@17 208
nuclear@25 209 if(0) {
nuclear@17 210 err:
nuclear@25 211 printf("unmap_page(%d): page already not mapped\n", vpage);
nuclear@43 212 res = -1;
nuclear@25 213 }
nuclear@25 214 set_intr_state(intr_state);
nuclear@43 215 return res;
nuclear@17 216 }
nuclear@17 217
nuclear@22 218 /* if ppg_start is -1, we allocate physical pages to map with alloc_phys_page() */
nuclear@23 219 int map_page_range(int vpg_start, int pgcount, int ppg_start, unsigned int attr)
nuclear@17 220 {
nuclear@23 221 int i, phys_pg;
nuclear@17 222
nuclear@17 223 for(i=0; i<pgcount; i++) {
nuclear@26 224 phys_pg = ppg_start < 0 ? -1 : ppg_start + i;
nuclear@23 225 map_page(vpg_start + i, phys_pg, attr);
nuclear@17 226 }
nuclear@23 227 return 0;
nuclear@17 228 }
nuclear@17 229
nuclear@43 230 int unmap_page_range(int vpg_start, int pgcount)
nuclear@43 231 {
nuclear@43 232 int i, res = 0;
nuclear@43 233
nuclear@43 234 for(i=0; i<pgcount; i++) {
nuclear@43 235 if(unmap_page(vpg_start + i) == -1) {
nuclear@43 236 res = -1;
nuclear@43 237 }
nuclear@43 238 }
nuclear@43 239 return res;
nuclear@43 240 }
nuclear@43 241
nuclear@23 242 /* if paddr is 0, we allocate physical pages with alloc_phys_page() */
nuclear@23 243 int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr)
nuclear@17 244 {
nuclear@17 245 int vpg_start, ppg_start, num_pages;
nuclear@17 246
nuclear@23 247 if(!sz) return -1;
nuclear@17 248
nuclear@17 249 if(ADDR_TO_PGOFFS(paddr)) {
nuclear@17 250 panic("map_mem_range called with unaligned physical address: %x\n", paddr);
nuclear@17 251 }
nuclear@17 252
nuclear@17 253 vpg_start = ADDR_TO_PAGE(vaddr);
nuclear@23 254 ppg_start = paddr > 0 ? ADDR_TO_PAGE(paddr) : -1;
nuclear@17 255 num_pages = ADDR_TO_PAGE(sz) + 1;
nuclear@17 256
nuclear@23 257 return map_page_range(vpg_start, num_pages, ppg_start, attr);
nuclear@17 258 }
nuclear@17 259
nuclear@69 260 /* translate a virtual address to a physical address using the current page table */
nuclear@18 261 uint32_t virt_to_phys(uint32_t vaddr)
nuclear@18 262 {
nuclear@43 263 int pg;
nuclear@43 264 uint32_t pgaddr;
nuclear@43 265
nuclear@43 266 if((pg = virt_to_phys_page(ADDR_TO_PAGE(vaddr))) == -1) {
nuclear@43 267 return 0;
nuclear@43 268 }
nuclear@43 269 pgaddr = PAGE_TO_ADDR(pg);
nuclear@43 270
nuclear@43 271 return pgaddr | ADDR_TO_PGOFFS(vaddr);
nuclear@43 272 }
nuclear@43 273
nuclear@69 274 /* translate a virtual page number to a physical page number using the current page table */
nuclear@43 275 int virt_to_phys_page(int vpg)
nuclear@43 276 {
nuclear@18 277 uint32_t pgaddr, *pgtbl;
nuclear@43 278 int diridx, pgidx;
nuclear@43 279
nuclear@43 280 if(vpg < 0 || vpg >= PAGE_COUNT) {
nuclear@43 281 return -1;
nuclear@43 282 }
nuclear@43 283
nuclear@43 284 diridx = PAGE_TO_PGTBL(vpg);
nuclear@43 285 pgidx = PAGE_TO_PGTBL_PG(vpg);
nuclear@18 286
nuclear@18 287 if(!(pgdir[diridx] & PG_PRESENT)) {
nuclear@43 288 return -1;
nuclear@18 289 }
nuclear@26 290 pgtbl = PGTBL(diridx);
nuclear@18 291
nuclear@18 292 if(!(pgtbl[pgidx] & PG_PRESENT)) {
nuclear@43 293 return -1;
nuclear@18 294 }
nuclear@18 295 pgaddr = pgtbl[pgidx] & PGENT_ADDR_MASK;
nuclear@43 296 return ADDR_TO_PAGE(pgaddr);
nuclear@18 297 }
nuclear@18 298
nuclear@69 299 /* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */
nuclear@69 300 uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr)
nuclear@69 301 {
nuclear@69 302 int pg;
nuclear@69 303 uint32_t pgaddr;
nuclear@69 304
nuclear@69 305 if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) {
nuclear@69 306 return 0;
nuclear@69 307 }
nuclear@69 308 pgaddr = PAGE_TO_ADDR(pg);
nuclear@69 309
nuclear@69 310 return pgaddr | ADDR_TO_PGOFFS(vaddr);
nuclear@69 311 }
nuclear@69 312
nuclear@69 313 /* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */
nuclear@69 314 int virt_to_phys_page_proc(struct process *p, int vpg)
nuclear@69 315 {
nuclear@69 316 struct rbnode *node;
nuclear@69 317 assert(p);
nuclear@69 318
nuclear@69 319 if(!(node = rb_findi(&p->vmmap, vpg))) {
nuclear@69 320 return -1;
nuclear@69 321 }
nuclear@69 322 return ((struct vm_page*)node->data)->ppage;
nuclear@69 323 }
nuclear@69 324
nuclear@22 325 /* allocate a contiguous block of virtual memory pages along with
nuclear@22 326 * backing physical memory for them, and update the page table.
nuclear@22 327 */
nuclear@22 328 int pgalloc(int num, int area)
nuclear@22 329 {
nuclear@25 330 int intr_state, ret = -1;
nuclear@22 331 struct page_range *node, *prev, dummy;
nuclear@22 332
nuclear@25 333 intr_state = get_intr_state();
nuclear@25 334 disable_intr();
nuclear@25 335
nuclear@22 336 dummy.next = pglist[area];
nuclear@22 337 node = pglist[area];
nuclear@22 338 prev = &dummy;
nuclear@22 339
nuclear@22 340 while(node) {
nuclear@22 341 if(node->end - node->start >= num) {
nuclear@22 342 ret = node->start;
nuclear@22 343 node->start += num;
nuclear@22 344
nuclear@22 345 if(node->start == node->end) {
nuclear@22 346 prev->next = node->next;
nuclear@22 347 node->next = 0;
nuclear@22 348
nuclear@22 349 if(node == pglist[area]) {
nuclear@22 350 pglist[area] = 0;
nuclear@22 351 }
nuclear@22 352 free_node(node);
nuclear@22 353 }
nuclear@22 354 break;
nuclear@22 355 }
nuclear@22 356
nuclear@22 357 prev = node;
nuclear@22 358 node = node->next;
nuclear@22 359 }
nuclear@22 360
nuclear@22 361 if(ret >= 0) {
nuclear@55 362 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
nuclear@55 363 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
nuclear@55 364
nuclear@23 365 /* allocate physical storage and map */
nuclear@45 366 if(map_page_range(ret, num, -1, attr) == -1) {
nuclear@45 367 ret = -1;
nuclear@45 368 }
nuclear@45 369 }
nuclear@45 370
nuclear@45 371 set_intr_state(intr_state);
nuclear@45 372 return ret;
nuclear@45 373 }
nuclear@45 374
nuclear@45 375 int pgalloc_vrange(int start, int num)
nuclear@45 376 {
nuclear@45 377 struct page_range *node, *prev, dummy;
nuclear@45 378 int area, intr_state, ret = -1;
nuclear@45 379
nuclear@45 380 area = (start >= ADDR_TO_PAGE(KMEM_START)) ? MEM_KERNEL : MEM_USER;
nuclear@47 381 if(area == MEM_USER && start + num > ADDR_TO_PAGE(KMEM_START)) {
nuclear@45 382 printf("pgalloc_vrange: invalid range request crossing user/kernel split\n");
nuclear@45 383 return -1;
nuclear@45 384 }
nuclear@45 385
nuclear@45 386 intr_state = get_intr_state();
nuclear@45 387 disable_intr();
nuclear@45 388
nuclear@45 389 dummy.next = pglist[area];
nuclear@45 390 node = pglist[area];
nuclear@45 391 prev = &dummy;
nuclear@45 392
nuclear@45 393 /* check to see if the requested VM range is available */
nuclear@45 394 node = pglist[area];
nuclear@45 395 while(node) {
nuclear@45 396 if(start >= node->start && start + num <= node->end) {
nuclear@49 397 ret = start; /* can do .. */
nuclear@49 398
nuclear@49 399 if(start == node->start) {
nuclear@49 400 /* adjacent to the start of the range */
nuclear@49 401 node->start += num;
nuclear@49 402 } else if(start + num == node->end) {
nuclear@49 403 /* adjacent to the end of the range */
nuclear@49 404 node->end = start;
nuclear@49 405 } else {
nuclear@49 406 /* somewhere in the middle, which means we need
nuclear@49 407 * to allocate a new page_range
nuclear@49 408 */
nuclear@49 409 struct page_range *newnode;
nuclear@49 410
nuclear@49 411 if(!(newnode = alloc_node())) {
nuclear@49 412 panic("pgalloc_vrange failed to allocate new page_range while splitting a range in half... bummer\n");
nuclear@49 413 }
nuclear@49 414 newnode->start = start + num;
nuclear@49 415 newnode->end = node->end;
nuclear@49 416 newnode->next = node->next;
nuclear@49 417
nuclear@49 418 node->end = start;
nuclear@49 419 node->next = newnode;
nuclear@49 420 /* no need to check for null nodes at this point, there's
nuclear@49 421 * certainly stuff at the begining and the end, otherwise we
nuclear@49 422 * wouldn't be here. so break out of it.
nuclear@49 423 */
nuclear@49 424 break;
nuclear@49 425 }
nuclear@45 426
nuclear@45 427 if(node->start == node->end) {
nuclear@45 428 prev->next = node->next;
nuclear@45 429 node->next = 0;
nuclear@45 430
nuclear@45 431 if(node == pglist[area]) {
nuclear@45 432 pglist[area] = 0;
nuclear@45 433 }
nuclear@45 434 free_node(node);
nuclear@45 435 }
nuclear@45 436 break;
nuclear@45 437 }
nuclear@45 438
nuclear@45 439 prev = node;
nuclear@45 440 node = node->next;
nuclear@45 441 }
nuclear@45 442
nuclear@45 443 if(ret >= 0) {
nuclear@55 444 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
nuclear@55 445 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
nuclear@55 446
nuclear@45 447 /* allocate physical storage and map */
nuclear@45 448 if(map_page_range(ret, num, -1, attr) == -1) {
nuclear@23 449 ret = -1;
nuclear@23 450 }
nuclear@22 451 }
nuclear@22 452
nuclear@25 453 set_intr_state(intr_state);
nuclear@22 454 return ret;
nuclear@22 455 }
nuclear@22 456
nuclear@22 457 void pgfree(int start, int num)
nuclear@22 458 {
nuclear@33 459 int i, area, intr_state;
nuclear@23 460 struct page_range *node, *new, *prev, *next;
nuclear@23 461
nuclear@25 462 intr_state = get_intr_state();
nuclear@25 463 disable_intr();
nuclear@25 464
nuclear@26 465 for(i=0; i<num; i++) {
nuclear@43 466 int phys_pg = virt_to_phys_page(start + i);
nuclear@43 467 if(phys_pg != -1) {
nuclear@43 468 free_phys_page(phys_pg);
nuclear@26 469 }
nuclear@26 470 }
nuclear@26 471
nuclear@23 472 if(!(new = alloc_node())) {
nuclear@23 473 panic("pgfree: can't allocate new page_range node to add the freed pages\n");
nuclear@23 474 }
nuclear@23 475 new->start = start;
nuclear@33 476 new->end = start + num;
nuclear@23 477
nuclear@23 478 area = PAGE_TO_ADDR(start) >= KMEM_START ? MEM_KERNEL : MEM_USER;
nuclear@23 479
nuclear@23 480 if(!pglist[area] || pglist[area]->start > start) {
nuclear@23 481 next = new->next = pglist[area];
nuclear@23 482 pglist[area] = new;
nuclear@23 483 prev = 0;
nuclear@23 484
nuclear@23 485 } else {
nuclear@23 486
nuclear@23 487 prev = 0;
nuclear@23 488 node = pglist[area];
nuclear@23 489 next = node ? node->next : 0;
nuclear@23 490
nuclear@23 491 while(node) {
nuclear@23 492 if(!next || next->start > start) {
nuclear@23 493 /* place here, after node */
nuclear@23 494 new->next = next;
nuclear@23 495 node->next = new;
nuclear@23 496 prev = node; /* needed by coalesce after the loop */
nuclear@23 497 break;
nuclear@23 498 }
nuclear@23 499
nuclear@23 500 prev = node;
nuclear@23 501 node = next;
nuclear@23 502 next = node ? node->next : 0;
nuclear@23 503 }
nuclear@23 504 }
nuclear@23 505
nuclear@23 506 coalesce(prev, new, next);
nuclear@25 507 set_intr_state(intr_state);
nuclear@23 508 }
nuclear@23 509
nuclear@23 510 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high)
nuclear@23 511 {
nuclear@23 512 if(high) {
nuclear@23 513 if(mid->end == high->start) {
nuclear@23 514 mid->end = high->end;
nuclear@23 515 mid->next = high->next;
nuclear@23 516 free_node(high);
nuclear@23 517 }
nuclear@23 518 }
nuclear@23 519
nuclear@23 520 if(low) {
nuclear@23 521 if(low->end == mid->start) {
nuclear@23 522 low->end += mid->end;
nuclear@23 523 low->next = mid->next;
nuclear@23 524 free_node(mid);
nuclear@23 525 }
nuclear@23 526 }
nuclear@22 527 }
nuclear@22 528
nuclear@52 529 static void pgfault(int inum)
nuclear@17 530 {
nuclear@52 531 struct intr_frame *frm = get_intr_frame();
nuclear@52 532 uint32_t fault_addr = get_fault_addr();
nuclear@52 533
nuclear@52 534 /* the fault occured in user space */
nuclear@55 535 if(frm->err & PG_USER) {
nuclear@52 536 int fault_page = ADDR_TO_PAGE(fault_addr);
nuclear@52 537 struct process *proc = get_current_proc();
nuclear@69 538 printf("DBG: page fault in user space (pid: %d)\n", proc->id);
nuclear@52 539 assert(proc);
nuclear@52 540
nuclear@52 541 if(frm->err & PG_PRESENT) {
nuclear@69 542 /* it's not due to a missing page fetch the attributes */
nuclear@69 543 int pgnum = ADDR_TO_PAGE(fault_addr);
nuclear@69 544
nuclear@69 545 if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) {
nuclear@69 546 /* write permission fault might be a CoW fault or just an error
nuclear@69 547 * fetch the vm_page permissions to check if this is suppoosed to be
nuclear@69 548 * a writable page (which means we should CoW).
nuclear@69 549 */
nuclear@69 550 struct vm_page *page = get_vm_page_proc(proc, pgnum);
nuclear@69 551
nuclear@69 552 if(page->flags & PG_WRITABLE) {
nuclear@69 553 /* ok this is a CoW fault */
nuclear@69 554 if(copy_on_write(page) == -1) {
nuclear@69 555 panic("copy on write failed!");
nuclear@69 556 }
nuclear@69 557 return; /* done, allow the process to restart the instruction and continue */
nuclear@69 558 } else {
nuclear@69 559 /* TODO eventually we'll SIGSEGV the process, for now just panic.
nuclear@69 560 */
nuclear@69 561 goto unhandled;
nuclear@69 562 }
nuclear@69 563 }
nuclear@52 564 goto unhandled;
nuclear@52 565 }
nuclear@52 566
nuclear@69 567 /* so it's a missing page... ok */
nuclear@69 568
nuclear@52 569 /* detect if it's an automatic stack growth deal */
nuclear@55 570 if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) {
nuclear@55 571 int num_pages = proc->user_stack_pg - fault_page;
nuclear@52 572 printf("growing user (%d) stack by %d pages\n", proc->id, num_pages);
nuclear@52 573
nuclear@52 574 if(pgalloc_vrange(fault_page, num_pages) != fault_page) {
nuclear@52 575 printf("failed to allocate VM for stack growth\n");
nuclear@52 576 /* TODO: in the future we'd SIGSEGV the process here, for now just panic */
nuclear@52 577 goto unhandled;
nuclear@52 578 }
nuclear@55 579 proc->user_stack_pg = fault_page;
nuclear@52 580 return;
nuclear@52 581 }
nuclear@69 582
nuclear@69 583 /* it's not a stack growth fault. since we don't do swapping yet, just
nuclear@69 584 * fall to unhandled and panic
nuclear@69 585 */
nuclear@52 586 }
nuclear@52 587
nuclear@52 588 unhandled:
nuclear@17 589 printf("~~~~ PAGE FAULT ~~~~\n");
nuclear@52 590 printf("fault address: %x\n", fault_addr);
nuclear@69 591 printf("error code: %x\n", frm->err);
nuclear@17 592
nuclear@51 593 if(frm->err & PG_PRESENT) {
nuclear@51 594 if(frm->err & 8) {
nuclear@17 595 printf("reserved bit set in some paging structure\n");
nuclear@17 596 } else {
nuclear@55 597 printf("%s protection violation ", (frm->err & PG_WRITABLE) ? "WRITE" : "READ");
nuclear@55 598 printf("in %s mode\n", (frm->err & PG_USER) ? "user" : "kernel");
nuclear@17 599 }
nuclear@17 600 } else {
nuclear@17 601 printf("page not present\n");
nuclear@17 602 }
nuclear@19 603
nuclear@19 604 panic("unhandled page fault\n");
nuclear@17 605 }
nuclear@22 606
nuclear@69 607 /* copy-on-write handler, called from pgfault above */
nuclear@69 608 static int copy_on_write(struct vm_page *page)
nuclear@69 609 {
nuclear@76 610 int tmpvpg;
nuclear@69 611 struct vm_page *newpage;
nuclear@69 612 struct rbnode *vmnode;
nuclear@69 613 struct process *p = get_current_proc();
nuclear@69 614
nuclear@69 615 assert(page->nref > 0);
nuclear@69 616
nuclear@69 617 /* first of all check the refcount. If it's 1 then we don't need to copy
nuclear@69 618 * anything. This will happen when all forked processes except one have
nuclear@69 619 * marked this read-write again after faulting.
nuclear@69 620 */
nuclear@69 621 if(page->nref == 1) {
nuclear@69 622 set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY);
nuclear@69 623 return 0;
nuclear@69 624 }
nuclear@69 625
nuclear@69 626 /* ok let's make a copy and mark it read-write */
nuclear@69 627 if(!(newpage = malloc(sizeof *newpage))) {
nuclear@69 628 printf("copy_on_write: failed to allocate new vm_page\n");
nuclear@69 629 return -1;
nuclear@69 630 }
nuclear@69 631 newpage->vpage = page->vpage;
nuclear@69 632 newpage->flags = page->flags;
nuclear@69 633
nuclear@76 634 if(!(tmpvpg = pgalloc(1, MEM_KERNEL))) {
nuclear@69 635 printf("copy_on_write: failed to allocate physical page\n");
nuclear@69 636 /* XXX proper action: SIGSEGV */
nuclear@69 637 return -1;
nuclear@69 638 }
nuclear@76 639 newpage->ppage = virt_to_phys_page(tmpvpg);
nuclear@69 640 newpage->nref = 1;
nuclear@69 641
nuclear@76 642 /* do the copy */
nuclear@76 643 memcpy((void*)PAGE_TO_ADDR(tmpvpg), (void*)PAGE_TO_ADDR(page->vpage), PGSIZE);
nuclear@76 644 unmap_page(tmpvpg);
nuclear@76 645 pgfree(tmpvpg, 1);
nuclear@76 646
nuclear@69 647 /* set the new vm_page in the process vmmap */
nuclear@69 648 vmnode = rb_findi(&p->vmmap, newpage->vpage);
nuclear@69 649 assert(vmnode && vmnode->data == page); /* shouldn't be able to fail */
nuclear@69 650 vmnode->data = newpage;
nuclear@69 651
nuclear@69 652 /* also update tha page table */
nuclear@69 653 map_page(newpage->vpage, newpage->ppage, newpage->flags);
nuclear@69 654
nuclear@69 655 /* finally decrease the refcount at the original vm_page struct */
nuclear@69 656 page->nref--;
nuclear@69 657 return 0;
nuclear@69 658 }
nuclear@69 659
nuclear@22 660 /* --- page range list node management --- */
nuclear@23 661 #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range))
nuclear@23 662
nuclear@22 663 static struct page_range *alloc_node(void)
nuclear@22 664 {
nuclear@22 665 struct page_range *node;
nuclear@23 666 int pg, i;
nuclear@22 667
nuclear@22 668 if(node_pool) {
nuclear@22 669 node = node_pool;
nuclear@22 670 node_pool = node_pool->next;
nuclear@47 671 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
nuclear@22 672 return node;
nuclear@22 673 }
nuclear@22 674
nuclear@23 675 /* no node structures in the pool, we need to allocate a new page,
nuclear@23 676 * split it up into node structures, add them in the pool, and
nuclear@23 677 * allocate one of them.
nuclear@22 678 */
nuclear@23 679 if(!(pg = pgalloc(1, MEM_KERNEL))) {
nuclear@22 680 panic("ran out of physical memory while allocating VM range structures\n");
nuclear@22 681 }
nuclear@23 682 node_pool = (struct page_range*)PAGE_TO_ADDR(pg);
nuclear@22 683
nuclear@23 684 /* link them up, skip the first as we'll just allocate it anyway */
nuclear@23 685 for(i=2; i<NODES_IN_PAGE; i++) {
nuclear@23 686 node_pool[i - 1].next = node_pool + i;
nuclear@23 687 }
nuclear@23 688 node_pool[NODES_IN_PAGE - 1].next = 0;
nuclear@23 689
nuclear@23 690 /* grab the first and return it */
nuclear@23 691 node = node_pool++;
nuclear@47 692 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
nuclear@23 693 return node;
nuclear@22 694 }
nuclear@22 695
nuclear@22 696 static void free_node(struct page_range *node)
nuclear@22 697 {
nuclear@22 698 node->next = node_pool;
nuclear@22 699 node_pool = node;
nuclear@47 700 /*printf("free_node\n");*/
nuclear@22 701 }
nuclear@23 702
nuclear@47 703 /* clone_vm makes a copy of the current page tables, thus duplicating the
nuclear@47 704 * virtual address space.
nuclear@47 705 *
nuclear@47 706 * For the kernel part of the address space (last 256 page directory entries)
nuclear@47 707 * we don't want to diplicate the page tables, just point all page directory
nuclear@47 708 * entries to the same set of page tables.
nuclear@43 709 *
nuclear@57 710 * If "cow" is non-zero it also marks the shared user-space pages as
nuclear@57 711 * read-only, to implement copy-on-write.
nuclear@43 712 */
nuclear@69 713 void clone_vm(struct process *pdest, struct process *psrc, int cow)
nuclear@43 714 {
nuclear@57 715 int i, j, dirpg, tblpg, kstart_dirent;
nuclear@43 716 uint32_t paddr;
nuclear@43 717 uint32_t *ndir, *ntbl;
nuclear@69 718 struct rbnode *vmnode;
nuclear@43 719
nuclear@47 720 /* allocate the new page directory */
nuclear@43 721 if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) {
nuclear@43 722 panic("clone_vmem: failed to allocate page directory page\n");
nuclear@43 723 }
nuclear@43 724 ndir = (uint32_t*)PAGE_TO_ADDR(dirpg);
nuclear@43 725
nuclear@47 726 /* allocate a virtual page for temporarily mapping all new
nuclear@47 727 * page tables while we populate them.
nuclear@47 728 */
nuclear@43 729 if((tblpg = pgalloc(1, MEM_KERNEL)) == -1) {
nuclear@43 730 panic("clone_vmem: failed to allocate page table page\n");
nuclear@43 731 }
nuclear@43 732 ntbl = (uint32_t*)PAGE_TO_ADDR(tblpg);
nuclear@43 733
nuclear@43 734 /* we will allocate physical pages and map them to this virtual page
nuclear@57 735 * as needed in the loop below. we don't need the physical page allocated
nuclear@57 736 * by pgalloc.
nuclear@43 737 */
nuclear@49 738 free_phys_page(virt_to_phys((uint32_t)ntbl));
nuclear@43 739
nuclear@48 740 kstart_dirent = ADDR_TO_PAGE(KMEM_START) / 1024;
nuclear@47 741
nuclear@47 742 /* user space */
nuclear@48 743 for(i=0; i<kstart_dirent; i++) {
nuclear@43 744 if(pgdir[i] & PG_PRESENT) {
nuclear@64 745 if(cow) {
nuclear@64 746 /* first go through all the entries of the existing
nuclear@64 747 * page table and unset the writable bits.
nuclear@64 748 */
nuclear@64 749 for(j=0; j<1024; j++) {
nuclear@69 750 if(PGTBL(i)[j] & PG_PRESENT) {
nuclear@69 751 clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY);
nuclear@69 752 /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/
nuclear@69 753 }
nuclear@64 754 }
nuclear@57 755 }
nuclear@57 756
nuclear@57 757 /* allocate a page table for the clone */
nuclear@43 758 paddr = alloc_phys_page();
nuclear@43 759
nuclear@43 760 /* copy the page table */
nuclear@57 761 map_page(tblpg, ADDR_TO_PAGE(paddr), 0);
nuclear@43 762 memcpy(ntbl, PGTBL(i), PGSIZE);
nuclear@43 763
nuclear@43 764 /* set the new page directory entry */
nuclear@43 765 ndir[i] = paddr | (pgdir[i] & PGOFFS_MASK);
nuclear@43 766 } else {
nuclear@43 767 ndir[i] = 0;
nuclear@43 768 }
nuclear@43 769 }
nuclear@43 770
nuclear@69 771 /* make a copy of the parent's vmmap tree pointing to the same vm_pages
nuclear@69 772 * and increase the reference counters for all vm_pages.
nuclear@69 773 */
nuclear@69 774 rb_init(&pdest->vmmap, RB_KEY_INT);
nuclear@69 775 rb_begin(&psrc->vmmap);
nuclear@69 776 while((vmnode = rb_next(&psrc->vmmap))) {
nuclear@69 777 struct vm_page *pg = vmnode->data;
nuclear@69 778 pg->nref++;
nuclear@69 779
nuclear@69 780 /* insert the same vm_page to the new tree */
nuclear@69 781 rb_inserti(&pdest->vmmap, pg->vpage, pg);
nuclear@69 782 }
nuclear@69 783
nuclear@55 784 /* for the kernel space we'll just use the same page tables */
nuclear@70 785 for(i=kstart_dirent; i<1023; i++) {
nuclear@49 786 ndir[i] = pgdir[i];
nuclear@47 787 }
nuclear@70 788
nuclear@70 789 /* also point the last page directory entry to the page directory address
nuclear@70 790 * since we're relying on recursive page tables
nuclear@70 791 */
nuclear@69 792 paddr = virt_to_phys((uint32_t)ndir);
nuclear@69 793 ndir[1023] = paddr | PG_PRESENT;
nuclear@47 794
nuclear@64 795 if(cow) {
nuclear@64 796 /* we just changed all the page protection bits, so we need to flush the TLB */
nuclear@64 797 flush_tlb();
nuclear@64 798 }
nuclear@57 799
nuclear@57 800 /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */
nuclear@43 801 unmap_page(dirpg);
nuclear@43 802 unmap_page(tblpg);
nuclear@43 803
nuclear@43 804 pgfree(dirpg, 1);
nuclear@43 805 pgfree(tblpg, 1);
nuclear@43 806
nuclear@69 807 /* set the new page directory pointer */
nuclear@69 808 pdest->ctx.pgtbl_paddr = paddr;
nuclear@43 809 }
nuclear@57 810
nuclear@72 811 /* cleanup_vm called by exit to clean up any memory used by the process */
nuclear@72 812 void cleanup_vm(struct process *p)
nuclear@72 813 {
nuclear@72 814 struct rbnode *vmnode;
nuclear@72 815
nuclear@72 816 /* go through the vm map and reduce refcounts all around
nuclear@72 817 * when a ref goes to 0, free the physical page
nuclear@72 818 */
nuclear@72 819 rb_begin(&p->vmmap);
nuclear@72 820 while((vmnode = rb_next(&p->vmmap))) {
nuclear@72 821 struct vm_page *page = vmnode->data;
nuclear@74 822
nuclear@74 823 /* skip kernel pages obviously */
nuclear@74 824 if(!(page->flags & PG_USER)) {
nuclear@74 825 continue;
nuclear@74 826 }
nuclear@74 827
nuclear@72 828 if(--page->nref <= 0) {
nuclear@72 829 /* free the physical page if nref goes to 0 */
nuclear@72 830 free_phys_page(PAGE_TO_ADDR(page->ppage));
nuclear@72 831 }
nuclear@72 832 }
nuclear@72 833
nuclear@72 834 /* destroying the tree will free the nodes */
nuclear@72 835 rb_destroy(&p->vmmap);
nuclear@72 836 }
nuclear@72 837
nuclear@72 838
nuclear@57 839 int get_page_bit(int pgnum, uint32_t bit, int wholepath)
nuclear@57 840 {
nuclear@57 841 int tidx = PAGE_TO_PGTBL(pgnum);
nuclear@57 842 int tent = PAGE_TO_PGTBL_PG(pgnum);
nuclear@57 843 uint32_t *pgtbl = PGTBL(tidx);
nuclear@57 844
nuclear@57 845 if(wholepath) {
nuclear@57 846 if((pgdir[tidx] & bit) == 0) {
nuclear@57 847 return 0;
nuclear@57 848 }
nuclear@57 849 }
nuclear@57 850
nuclear@57 851 return pgtbl[tent] & bit;
nuclear@57 852 }
nuclear@57 853
nuclear@57 854 void set_page_bit(int pgnum, uint32_t bit, int wholepath)
nuclear@57 855 {
nuclear@57 856 int tidx = PAGE_TO_PGTBL(pgnum);
nuclear@57 857 int tent = PAGE_TO_PGTBL_PG(pgnum);
nuclear@57 858 uint32_t *pgtbl = PGTBL(tidx);
nuclear@57 859
nuclear@57 860 if(wholepath) {
nuclear@57 861 pgdir[tidx] |= bit;
nuclear@57 862 }
nuclear@57 863 pgtbl[tent] |= bit;
nuclear@57 864
nuclear@57 865 flush_tlb_page(pgnum);
nuclear@57 866 }
nuclear@57 867
nuclear@57 868 void clear_page_bit(int pgnum, uint32_t bit, int wholepath)
nuclear@57 869 {
nuclear@57 870 int tidx = PAGE_TO_PGTBL(pgnum);
nuclear@57 871 int tent = PAGE_TO_PGTBL_PG(pgnum);
nuclear@57 872 uint32_t *pgtbl = PGTBL(tidx);
nuclear@57 873
nuclear@57 874 if(wholepath) {
nuclear@57 875 pgdir[tidx] &= ~bit;
nuclear@57 876 }
nuclear@57 877
nuclear@57 878 pgtbl[tent] &= ~bit;
nuclear@57 879
nuclear@57 880 flush_tlb_page(pgnum);
nuclear@57 881 }
nuclear@43 882
nuclear@43 883
nuclear@68 884 #define USER_PGDIR_ENTRIES PAGE_TO_PGTBL(KMEM_START_PAGE)
nuclear@68 885 int cons_vmmap(struct rbtree *vmmap)
nuclear@68 886 {
nuclear@68 887 int i, j;
nuclear@68 888
nuclear@68 889 rb_init(vmmap, RB_KEY_INT);
nuclear@68 890
nuclear@68 891 for(i=0; i<USER_PGDIR_ENTRIES; i++) {
nuclear@68 892 if(pgdir[i] & PG_PRESENT) {
nuclear@68 893 /* page table is present, iterate through its 1024 pages */
nuclear@68 894 uint32_t *pgtbl = PGTBL(i);
nuclear@68 895
nuclear@68 896 for(j=0; j<1024; j++) {
nuclear@68 897 if(pgtbl[j] & PG_PRESENT) {
nuclear@68 898 struct vm_page *vmp;
nuclear@68 899
nuclear@68 900 if(!(vmp = malloc(sizeof *vmp))) {
nuclear@68 901 panic("cons_vmap failed to allocate memory");
nuclear@68 902 }
nuclear@68 903 vmp->vpage = i * 1024 + j;
nuclear@68 904 vmp->ppage = ADDR_TO_PAGE(pgtbl[j] & PGENT_ADDR_MASK);
nuclear@68 905 vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK;
nuclear@68 906 vmp->nref = 1; /* when first created assume no sharing */
nuclear@68 907
nuclear@69 908 rb_inserti(vmmap, vmp->vpage, vmp);
nuclear@68 909 }
nuclear@68 910 }
nuclear@68 911 }
nuclear@68 912 }
nuclear@68 913
nuclear@68 914 return 0;
nuclear@68 915 }
nuclear@68 916
nuclear@69 917 struct vm_page *get_vm_page(int vpg)
nuclear@69 918 {
nuclear@69 919 return get_vm_page_proc(get_current_proc(), vpg);
nuclear@69 920 }
nuclear@69 921
nuclear@69 922 struct vm_page *get_vm_page_proc(struct process *p, int vpg)
nuclear@69 923 {
nuclear@69 924 struct rbnode *node;
nuclear@69 925
nuclear@69 926 if(!p || !(node = rb_findi(&p->vmmap, vpg))) {
nuclear@69 927 return 0;
nuclear@69 928 }
nuclear@69 929 return node->data;
nuclear@69 930 }
nuclear@69 931
nuclear@68 932
nuclear@23 933 void dbg_print_vm(int area)
nuclear@23 934 {
nuclear@25 935 struct page_range *node;
nuclear@25 936 int last, intr_state;
nuclear@25 937
nuclear@25 938 intr_state = get_intr_state();
nuclear@25 939 disable_intr();
nuclear@25 940
nuclear@25 941 node = pglist[area];
nuclear@25 942 last = area == MEM_USER ? 0 : ADDR_TO_PAGE(KMEM_START);
nuclear@23 943
nuclear@23 944 printf("%s vm space\n", area == MEM_USER ? "user" : "kernel");
nuclear@23 945
nuclear@23 946 while(node) {
nuclear@23 947 if(node->start > last) {
nuclear@23 948 printf(" vm-used: %x -> %x\n", PAGE_TO_ADDR(last), PAGE_TO_ADDR(node->start));
nuclear@23 949 }
nuclear@23 950
nuclear@23 951 printf(" vm-free: %x -> ", PAGE_TO_ADDR(node->start));
nuclear@23 952 if(node->end >= PAGE_COUNT) {
nuclear@23 953 printf("END\n");
nuclear@23 954 } else {
nuclear@23 955 printf("%x\n", PAGE_TO_ADDR(node->end));
nuclear@23 956 }
nuclear@23 957
nuclear@23 958 last = node->end;
nuclear@23 959 node = node->next;
nuclear@23 960 }
nuclear@25 961
nuclear@25 962 set_intr_state(intr_state);
nuclear@23 963 }