kern

view src/vm.c @ 80:4db99a52863e

fixed the "endianess" of the text messages in the ATA identify info block. this is the first time I've seen wrong byteorder in ascii text, the ATA committee should be commended.
author John Tsiombikas <nuclear@member.fsf.org>
date Tue, 06 Dec 2011 13:35:39 +0200
parents d3601789d638
children
line source
1 #include <stdio.h>
2 #include <string.h>
3 #include <inttypes.h>
4 #include <assert.h>
5 #include "config.h"
6 #include "vm.h"
7 #include "intr.h"
8 #include "mem.h"
9 #include "panic.h"
10 #include "proc.h"
12 #define IDMAP_START 0xa0000
14 #define PGDIR_ADDR 0xfffff000
15 #define PGTBL_BASE (0xffffffff - 4096 * 1024 + 1)
16 #define PGTBL(x) ((uint32_t*)(PGTBL_BASE + PGSIZE * (x)))
18 #define ATTR_PGDIR_MASK 0x3f
19 #define ATTR_PGTBL_MASK 0x1ff
21 #define PAGEFAULT 14
24 struct page_range {
25 int start, end;
26 struct page_range *next;
27 };
29 /* defined in vm-asm.S */
30 void enable_paging(void);
31 void disable_paging(void);
32 int get_paging_status(void);
33 void set_pgdir_addr(uint32_t addr);
34 void flush_tlb(void);
35 void flush_tlb_addr(uint32_t addr);
36 #define flush_tlb_page(p) flush_tlb_addr(PAGE_TO_ADDR(p))
37 uint32_t get_fault_addr(void);
39 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high);
40 static void pgfault(int inum);
41 static int copy_on_write(struct vm_page *page);
42 static struct page_range *alloc_node(void);
43 static void free_node(struct page_range *node);
45 /* page directory */
46 static uint32_t *pgdir;
48 /* 2 lists of free ranges, for kernel memory and user memory */
49 static struct page_range *pglist[2];
50 /* list of free page_range structures to be used in the lists */
51 static struct page_range *node_pool;
52 /* the first page range for the whole kernel address space, to get things started */
53 static struct page_range first_node;
56 void init_vm(void)
57 {
58 uint32_t idmap_end;
59 int i, kmem_start_pg, pgtbl_base_pg;
61 /* setup the page tables */
62 pgdir = (uint32_t*)alloc_phys_page();
63 memset(pgdir, 0, PGSIZE);
64 set_pgdir_addr((uint32_t)pgdir);
66 /* map the video memory and kernel code 1-1 */
67 get_kernel_mem_range(0, &idmap_end);
68 map_mem_range(IDMAP_START, idmap_end - IDMAP_START, IDMAP_START, 0);
70 /* make the last page directory entry point to the page directory */
71 pgdir[1023] = ((uint32_t)pgdir & PGENT_ADDR_MASK) | PG_PRESENT;
72 pgdir = (uint32_t*)PGDIR_ADDR;
74 /* set the page fault handler */
75 interrupt(PAGEFAULT, pgfault);
77 /* we can enable paging now */
78 enable_paging();
80 /* initialize the virtual page allocator */
81 node_pool = 0;
83 kmem_start_pg = ADDR_TO_PAGE(KMEM_START);
84 pgtbl_base_pg = ADDR_TO_PAGE(PGTBL_BASE);
86 first_node.start = kmem_start_pg;
87 first_node.end = pgtbl_base_pg;
88 first_node.next = 0;
89 pglist[MEM_KERNEL] = &first_node;
91 pglist[MEM_USER] = alloc_node();
92 pglist[MEM_USER]->start = ADDR_TO_PAGE(idmap_end);
93 pglist[MEM_USER]->end = kmem_start_pg;
94 pglist[MEM_USER]->next = 0;
96 /* temporarily map something into every 1024th page of the kernel address
97 * space to force pre-allocation of all the kernel page-tables
98 */
99 for(i=kmem_start_pg; i<pgtbl_base_pg; i+=1024) {
100 /* if there's already something mapped here, leave it alone */
101 if(virt_to_phys_page(i) == -1) {
102 map_page(i, 0, 0);
103 unmap_page(i);
104 }
105 }
106 }
108 /* if ppage == -1 we allocate a physical page by calling alloc_phys_page */
109 int map_page(int vpage, int ppage, unsigned int attr)
110 {
111 uint32_t *pgtbl;
112 int diridx, pgidx, pgon, intr_state;
113 struct process *p;
115 intr_state = get_intr_state();
116 disable_intr();
118 pgon = get_paging_status();
120 if(ppage < 0) {
121 uint32_t addr = alloc_phys_page();
122 if(!addr) {
123 set_intr_state(intr_state);
124 return -1;
125 }
126 ppage = ADDR_TO_PAGE(addr);
127 }
129 diridx = PAGE_TO_PGTBL(vpage);
130 pgidx = PAGE_TO_PGTBL_PG(vpage);
132 if(!(pgdir[diridx] & PG_PRESENT)) {
133 /* no page table present, we must allocate one */
134 uint32_t addr = alloc_phys_page();
136 /* make sure all page directory entries in the below the kernel vm
137 * split have the user and writable bits set, otherwise further user
138 * mappings on the same 4mb block will be unusable in user space.
139 */
140 unsigned int pgdir_attr = attr;
141 if(vpage < ADDR_TO_PAGE(KMEM_START)) {
142 pgdir_attr |= PG_USER | PG_WRITABLE;
143 }
145 pgdir[diridx] = addr | (pgdir_attr & ATTR_PGDIR_MASK) | PG_PRESENT;
147 pgtbl = pgon ? PGTBL(diridx) : (uint32_t*)addr;
148 memset(pgtbl, 0, PGSIZE);
149 } else {
150 if(pgon) {
151 pgtbl = PGTBL(diridx);
152 } else {
153 pgtbl = (uint32_t*)(pgdir[diridx] & PGENT_ADDR_MASK);
154 }
155 }
157 pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
158 flush_tlb_page(vpage);
160 /* if it's a new *user* mapping, and there is a current process, update the vmmap */
161 if((attr & PG_USER) && (p = get_current_proc())) {
162 struct vm_page *page;
164 if(!(page = get_vm_page_proc(p, vpage))) {
165 if(!(page = malloc(sizeof *page))) {
166 panic("map_page: failed to allocate new vm_page structure");
167 }
168 page->vpage = vpage;
169 page->ppage = ppage;
170 page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
171 page->nref = 1;
173 rb_inserti(&p->vmmap, vpage, page);
174 } else {
175 /* otherwise just update the mapping */
176 page->ppage = ppage;
178 /* XXX don't touch the flags, as that's how we implement CoW
179 * by changing the mapping without affecting the vm_page
180 */
181 }
182 }
184 set_intr_state(intr_state);
185 return 0;
186 }
188 int unmap_page(int vpage)
189 {
190 uint32_t *pgtbl;
191 int res = 0;
192 int diridx = PAGE_TO_PGTBL(vpage);
193 int pgidx = PAGE_TO_PGTBL_PG(vpage);
195 int intr_state = get_intr_state();
196 disable_intr();
198 if(!(pgdir[diridx] & PG_PRESENT)) {
199 goto err;
200 }
201 pgtbl = PGTBL(diridx);
203 if(!(pgtbl[pgidx] & PG_PRESENT)) {
204 goto err;
205 }
206 pgtbl[pgidx] = 0;
207 flush_tlb_page(vpage);
209 if(0) {
210 err:
211 printf("unmap_page(%d): page already not mapped\n", vpage);
212 res = -1;
213 }
214 set_intr_state(intr_state);
215 return res;
216 }
218 /* if ppg_start is -1, we allocate physical pages to map with alloc_phys_page() */
219 int map_page_range(int vpg_start, int pgcount, int ppg_start, unsigned int attr)
220 {
221 int i, phys_pg;
223 for(i=0; i<pgcount; i++) {
224 phys_pg = ppg_start < 0 ? -1 : ppg_start + i;
225 map_page(vpg_start + i, phys_pg, attr);
226 }
227 return 0;
228 }
230 int unmap_page_range(int vpg_start, int pgcount)
231 {
232 int i, res = 0;
234 for(i=0; i<pgcount; i++) {
235 if(unmap_page(vpg_start + i) == -1) {
236 res = -1;
237 }
238 }
239 return res;
240 }
242 /* if paddr is 0, we allocate physical pages with alloc_phys_page() */
243 int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr)
244 {
245 int vpg_start, ppg_start, num_pages;
247 if(!sz) return -1;
249 if(ADDR_TO_PGOFFS(paddr)) {
250 panic("map_mem_range called with unaligned physical address: %x\n", paddr);
251 }
253 vpg_start = ADDR_TO_PAGE(vaddr);
254 ppg_start = paddr > 0 ? ADDR_TO_PAGE(paddr) : -1;
255 num_pages = ADDR_TO_PAGE(sz) + 1;
257 return map_page_range(vpg_start, num_pages, ppg_start, attr);
258 }
260 /* translate a virtual address to a physical address using the current page table */
261 uint32_t virt_to_phys(uint32_t vaddr)
262 {
263 int pg;
264 uint32_t pgaddr;
266 if((pg = virt_to_phys_page(ADDR_TO_PAGE(vaddr))) == -1) {
267 return 0;
268 }
269 pgaddr = PAGE_TO_ADDR(pg);
271 return pgaddr | ADDR_TO_PGOFFS(vaddr);
272 }
274 /* translate a virtual page number to a physical page number using the current page table */
275 int virt_to_phys_page(int vpg)
276 {
277 uint32_t pgaddr, *pgtbl;
278 int diridx, pgidx;
280 if(vpg < 0 || vpg >= PAGE_COUNT) {
281 return -1;
282 }
284 diridx = PAGE_TO_PGTBL(vpg);
285 pgidx = PAGE_TO_PGTBL_PG(vpg);
287 if(!(pgdir[diridx] & PG_PRESENT)) {
288 return -1;
289 }
290 pgtbl = PGTBL(diridx);
292 if(!(pgtbl[pgidx] & PG_PRESENT)) {
293 return -1;
294 }
295 pgaddr = pgtbl[pgidx] & PGENT_ADDR_MASK;
296 return ADDR_TO_PAGE(pgaddr);
297 }
299 /* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */
300 uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr)
301 {
302 int pg;
303 uint32_t pgaddr;
305 if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) {
306 return 0;
307 }
308 pgaddr = PAGE_TO_ADDR(pg);
310 return pgaddr | ADDR_TO_PGOFFS(vaddr);
311 }
313 /* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */
314 int virt_to_phys_page_proc(struct process *p, int vpg)
315 {
316 struct rbnode *node;
317 assert(p);
319 if(!(node = rb_findi(&p->vmmap, vpg))) {
320 return -1;
321 }
322 return ((struct vm_page*)node->data)->ppage;
323 }
325 /* allocate a contiguous block of virtual memory pages along with
326 * backing physical memory for them, and update the page table.
327 */
328 int pgalloc(int num, int area)
329 {
330 int intr_state, ret = -1;
331 struct page_range *node, *prev, dummy;
333 intr_state = get_intr_state();
334 disable_intr();
336 dummy.next = pglist[area];
337 node = pglist[area];
338 prev = &dummy;
340 while(node) {
341 if(node->end - node->start >= num) {
342 ret = node->start;
343 node->start += num;
345 if(node->start == node->end) {
346 prev->next = node->next;
347 node->next = 0;
349 if(node == pglist[area]) {
350 pglist[area] = 0;
351 }
352 free_node(node);
353 }
354 break;
355 }
357 prev = node;
358 node = node->next;
359 }
361 if(ret >= 0) {
362 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
363 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
365 /* allocate physical storage and map */
366 if(map_page_range(ret, num, -1, attr) == -1) {
367 ret = -1;
368 }
369 }
371 set_intr_state(intr_state);
372 return ret;
373 }
375 int pgalloc_vrange(int start, int num)
376 {
377 struct page_range *node, *prev, dummy;
378 int area, intr_state, ret = -1;
380 area = (start >= ADDR_TO_PAGE(KMEM_START)) ? MEM_KERNEL : MEM_USER;
381 if(area == MEM_USER && start + num > ADDR_TO_PAGE(KMEM_START)) {
382 printf("pgalloc_vrange: invalid range request crossing user/kernel split\n");
383 return -1;
384 }
386 intr_state = get_intr_state();
387 disable_intr();
389 dummy.next = pglist[area];
390 node = pglist[area];
391 prev = &dummy;
393 /* check to see if the requested VM range is available */
394 node = pglist[area];
395 while(node) {
396 if(start >= node->start && start + num <= node->end) {
397 ret = start; /* can do .. */
399 if(start == node->start) {
400 /* adjacent to the start of the range */
401 node->start += num;
402 } else if(start + num == node->end) {
403 /* adjacent to the end of the range */
404 node->end = start;
405 } else {
406 /* somewhere in the middle, which means we need
407 * to allocate a new page_range
408 */
409 struct page_range *newnode;
411 if(!(newnode = alloc_node())) {
412 panic("pgalloc_vrange failed to allocate new page_range while splitting a range in half... bummer\n");
413 }
414 newnode->start = start + num;
415 newnode->end = node->end;
416 newnode->next = node->next;
418 node->end = start;
419 node->next = newnode;
420 /* no need to check for null nodes at this point, there's
421 * certainly stuff at the begining and the end, otherwise we
422 * wouldn't be here. so break out of it.
423 */
424 break;
425 }
427 if(node->start == node->end) {
428 prev->next = node->next;
429 node->next = 0;
431 if(node == pglist[area]) {
432 pglist[area] = 0;
433 }
434 free_node(node);
435 }
436 break;
437 }
439 prev = node;
440 node = node->next;
441 }
443 if(ret >= 0) {
444 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
445 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
447 /* allocate physical storage and map */
448 if(map_page_range(ret, num, -1, attr) == -1) {
449 ret = -1;
450 }
451 }
453 set_intr_state(intr_state);
454 return ret;
455 }
457 void pgfree(int start, int num)
458 {
459 int i, area, intr_state;
460 struct page_range *node, *new, *prev, *next;
462 intr_state = get_intr_state();
463 disable_intr();
465 for(i=0; i<num; i++) {
466 int phys_pg = virt_to_phys_page(start + i);
467 if(phys_pg != -1) {
468 free_phys_page(phys_pg);
469 }
470 }
472 if(!(new = alloc_node())) {
473 panic("pgfree: can't allocate new page_range node to add the freed pages\n");
474 }
475 new->start = start;
476 new->end = start + num;
478 area = PAGE_TO_ADDR(start) >= KMEM_START ? MEM_KERNEL : MEM_USER;
480 if(!pglist[area] || pglist[area]->start > start) {
481 next = new->next = pglist[area];
482 pglist[area] = new;
483 prev = 0;
485 } else {
487 prev = 0;
488 node = pglist[area];
489 next = node ? node->next : 0;
491 while(node) {
492 if(!next || next->start > start) {
493 /* place here, after node */
494 new->next = next;
495 node->next = new;
496 prev = node; /* needed by coalesce after the loop */
497 break;
498 }
500 prev = node;
501 node = next;
502 next = node ? node->next : 0;
503 }
504 }
506 coalesce(prev, new, next);
507 set_intr_state(intr_state);
508 }
510 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high)
511 {
512 if(high) {
513 if(mid->end == high->start) {
514 mid->end = high->end;
515 mid->next = high->next;
516 free_node(high);
517 }
518 }
520 if(low) {
521 if(low->end == mid->start) {
522 low->end += mid->end;
523 low->next = mid->next;
524 free_node(mid);
525 }
526 }
527 }
529 static void pgfault(int inum)
530 {
531 struct intr_frame *frm = get_intr_frame();
532 uint32_t fault_addr = get_fault_addr();
534 /* the fault occured in user space */
535 if(frm->err & PG_USER) {
536 int fault_page = ADDR_TO_PAGE(fault_addr);
537 struct process *proc = get_current_proc();
538 printf("DBG: page fault in user space (pid: %d)\n", proc->id);
539 assert(proc);
541 if(frm->err & PG_PRESENT) {
542 /* it's not due to a missing page fetch the attributes */
543 int pgnum = ADDR_TO_PAGE(fault_addr);
545 if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) {
546 /* write permission fault might be a CoW fault or just an error
547 * fetch the vm_page permissions to check if this is suppoosed to be
548 * a writable page (which means we should CoW).
549 */
550 struct vm_page *page = get_vm_page_proc(proc, pgnum);
552 if(page->flags & PG_WRITABLE) {
553 /* ok this is a CoW fault */
554 if(copy_on_write(page) == -1) {
555 panic("copy on write failed!");
556 }
557 return; /* done, allow the process to restart the instruction and continue */
558 } else {
559 /* TODO eventually we'll SIGSEGV the process, for now just panic.
560 */
561 goto unhandled;
562 }
563 }
564 goto unhandled;
565 }
567 /* so it's a missing page... ok */
569 /* detect if it's an automatic stack growth deal */
570 if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) {
571 int num_pages = proc->user_stack_pg - fault_page;
572 printf("growing user (%d) stack by %d pages\n", proc->id, num_pages);
574 if(pgalloc_vrange(fault_page, num_pages) != fault_page) {
575 printf("failed to allocate VM for stack growth\n");
576 /* TODO: in the future we'd SIGSEGV the process here, for now just panic */
577 goto unhandled;
578 }
579 proc->user_stack_pg = fault_page;
580 return;
581 }
583 /* it's not a stack growth fault. since we don't do swapping yet, just
584 * fall to unhandled and panic
585 */
586 }
588 unhandled:
589 printf("~~~~ PAGE FAULT ~~~~\n");
590 printf("fault address: %x\n", fault_addr);
591 printf("error code: %x\n", frm->err);
593 if(frm->err & PG_PRESENT) {
594 if(frm->err & 8) {
595 printf("reserved bit set in some paging structure\n");
596 } else {
597 printf("%s protection violation ", (frm->err & PG_WRITABLE) ? "WRITE" : "READ");
598 printf("in %s mode\n", (frm->err & PG_USER) ? "user" : "kernel");
599 }
600 } else {
601 printf("page not present\n");
602 }
604 panic("unhandled page fault\n");
605 }
607 /* copy-on-write handler, called from pgfault above */
608 static int copy_on_write(struct vm_page *page)
609 {
610 int tmpvpg;
611 struct vm_page *newpage;
612 struct rbnode *vmnode;
613 struct process *p = get_current_proc();
615 assert(page->nref > 0);
617 /* first of all check the refcount. If it's 1 then we don't need to copy
618 * anything. This will happen when all forked processes except one have
619 * marked this read-write again after faulting.
620 */
621 if(page->nref == 1) {
622 set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY);
623 return 0;
624 }
626 /* ok let's make a copy and mark it read-write */
627 if(!(newpage = malloc(sizeof *newpage))) {
628 printf("copy_on_write: failed to allocate new vm_page\n");
629 return -1;
630 }
631 newpage->vpage = page->vpage;
632 newpage->flags = page->flags;
634 if(!(tmpvpg = pgalloc(1, MEM_KERNEL))) {
635 printf("copy_on_write: failed to allocate physical page\n");
636 /* XXX proper action: SIGSEGV */
637 return -1;
638 }
639 newpage->ppage = virt_to_phys_page(tmpvpg);
640 newpage->nref = 1;
642 /* do the copy */
643 memcpy((void*)PAGE_TO_ADDR(tmpvpg), (void*)PAGE_TO_ADDR(page->vpage), PGSIZE);
644 unmap_page(tmpvpg);
645 pgfree(tmpvpg, 1);
647 /* set the new vm_page in the process vmmap */
648 vmnode = rb_findi(&p->vmmap, newpage->vpage);
649 assert(vmnode && vmnode->data == page); /* shouldn't be able to fail */
650 vmnode->data = newpage;
652 /* also update tha page table */
653 map_page(newpage->vpage, newpage->ppage, newpage->flags);
655 /* finally decrease the refcount at the original vm_page struct */
656 page->nref--;
657 return 0;
658 }
660 /* --- page range list node management --- */
661 #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range))
663 static struct page_range *alloc_node(void)
664 {
665 struct page_range *node;
666 int pg, i;
668 if(node_pool) {
669 node = node_pool;
670 node_pool = node_pool->next;
671 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
672 return node;
673 }
675 /* no node structures in the pool, we need to allocate a new page,
676 * split it up into node structures, add them in the pool, and
677 * allocate one of them.
678 */
679 if(!(pg = pgalloc(1, MEM_KERNEL))) {
680 panic("ran out of physical memory while allocating VM range structures\n");
681 }
682 node_pool = (struct page_range*)PAGE_TO_ADDR(pg);
684 /* link them up, skip the first as we'll just allocate it anyway */
685 for(i=2; i<NODES_IN_PAGE; i++) {
686 node_pool[i - 1].next = node_pool + i;
687 }
688 node_pool[NODES_IN_PAGE - 1].next = 0;
690 /* grab the first and return it */
691 node = node_pool++;
692 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
693 return node;
694 }
696 static void free_node(struct page_range *node)
697 {
698 node->next = node_pool;
699 node_pool = node;
700 /*printf("free_node\n");*/
701 }
703 /* clone_vm makes a copy of the current page tables, thus duplicating the
704 * virtual address space.
705 *
706 * For the kernel part of the address space (last 256 page directory entries)
707 * we don't want to diplicate the page tables, just point all page directory
708 * entries to the same set of page tables.
709 *
710 * If "cow" is non-zero it also marks the shared user-space pages as
711 * read-only, to implement copy-on-write.
712 */
713 void clone_vm(struct process *pdest, struct process *psrc, int cow)
714 {
715 int i, j, dirpg, tblpg, kstart_dirent;
716 uint32_t paddr;
717 uint32_t *ndir, *ntbl;
718 struct rbnode *vmnode;
720 /* allocate the new page directory */
721 if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) {
722 panic("clone_vmem: failed to allocate page directory page\n");
723 }
724 ndir = (uint32_t*)PAGE_TO_ADDR(dirpg);
726 /* allocate a virtual page for temporarily mapping all new
727 * page tables while we populate them.
728 */
729 if((tblpg = pgalloc(1, MEM_KERNEL)) == -1) {
730 panic("clone_vmem: failed to allocate page table page\n");
731 }
732 ntbl = (uint32_t*)PAGE_TO_ADDR(tblpg);
734 /* we will allocate physical pages and map them to this virtual page
735 * as needed in the loop below. we don't need the physical page allocated
736 * by pgalloc.
737 */
738 free_phys_page(virt_to_phys((uint32_t)ntbl));
740 kstart_dirent = ADDR_TO_PAGE(KMEM_START) / 1024;
742 /* user space */
743 for(i=0; i<kstart_dirent; i++) {
744 if(pgdir[i] & PG_PRESENT) {
745 if(cow) {
746 /* first go through all the entries of the existing
747 * page table and unset the writable bits.
748 */
749 for(j=0; j<1024; j++) {
750 if(PGTBL(i)[j] & PG_PRESENT) {
751 clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY);
752 /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/
753 }
754 }
755 }
757 /* allocate a page table for the clone */
758 paddr = alloc_phys_page();
760 /* copy the page table */
761 map_page(tblpg, ADDR_TO_PAGE(paddr), 0);
762 memcpy(ntbl, PGTBL(i), PGSIZE);
764 /* set the new page directory entry */
765 ndir[i] = paddr | (pgdir[i] & PGOFFS_MASK);
766 } else {
767 ndir[i] = 0;
768 }
769 }
771 /* make a copy of the parent's vmmap tree pointing to the same vm_pages
772 * and increase the reference counters for all vm_pages.
773 */
774 rb_init(&pdest->vmmap, RB_KEY_INT);
775 rb_begin(&psrc->vmmap);
776 while((vmnode = rb_next(&psrc->vmmap))) {
777 struct vm_page *pg = vmnode->data;
778 pg->nref++;
780 /* insert the same vm_page to the new tree */
781 rb_inserti(&pdest->vmmap, pg->vpage, pg);
782 }
784 /* for the kernel space we'll just use the same page tables */
785 for(i=kstart_dirent; i<1023; i++) {
786 ndir[i] = pgdir[i];
787 }
789 /* also point the last page directory entry to the page directory address
790 * since we're relying on recursive page tables
791 */
792 paddr = virt_to_phys((uint32_t)ndir);
793 ndir[1023] = paddr | PG_PRESENT;
795 if(cow) {
796 /* we just changed all the page protection bits, so we need to flush the TLB */
797 flush_tlb();
798 }
800 /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */
801 unmap_page(dirpg);
802 unmap_page(tblpg);
804 pgfree(dirpg, 1);
805 pgfree(tblpg, 1);
807 /* set the new page directory pointer */
808 pdest->ctx.pgtbl_paddr = paddr;
809 }
811 /* cleanup_vm called by exit to clean up any memory used by the process */
812 void cleanup_vm(struct process *p)
813 {
814 struct rbnode *vmnode;
816 /* go through the vm map and reduce refcounts all around
817 * when a ref goes to 0, free the physical page
818 */
819 rb_begin(&p->vmmap);
820 while((vmnode = rb_next(&p->vmmap))) {
821 struct vm_page *page = vmnode->data;
823 /* skip kernel pages obviously */
824 if(!(page->flags & PG_USER)) {
825 continue;
826 }
828 if(--page->nref <= 0) {
829 /* free the physical page if nref goes to 0 */
830 free_phys_page(PAGE_TO_ADDR(page->ppage));
831 }
832 }
834 /* destroying the tree will free the nodes */
835 rb_destroy(&p->vmmap);
836 }
839 int get_page_bit(int pgnum, uint32_t bit, int wholepath)
840 {
841 int tidx = PAGE_TO_PGTBL(pgnum);
842 int tent = PAGE_TO_PGTBL_PG(pgnum);
843 uint32_t *pgtbl = PGTBL(tidx);
845 if(wholepath) {
846 if((pgdir[tidx] & bit) == 0) {
847 return 0;
848 }
849 }
851 return pgtbl[tent] & bit;
852 }
854 void set_page_bit(int pgnum, uint32_t bit, int wholepath)
855 {
856 int tidx = PAGE_TO_PGTBL(pgnum);
857 int tent = PAGE_TO_PGTBL_PG(pgnum);
858 uint32_t *pgtbl = PGTBL(tidx);
860 if(wholepath) {
861 pgdir[tidx] |= bit;
862 }
863 pgtbl[tent] |= bit;
865 flush_tlb_page(pgnum);
866 }
868 void clear_page_bit(int pgnum, uint32_t bit, int wholepath)
869 {
870 int tidx = PAGE_TO_PGTBL(pgnum);
871 int tent = PAGE_TO_PGTBL_PG(pgnum);
872 uint32_t *pgtbl = PGTBL(tidx);
874 if(wholepath) {
875 pgdir[tidx] &= ~bit;
876 }
878 pgtbl[tent] &= ~bit;
880 flush_tlb_page(pgnum);
881 }
884 #define USER_PGDIR_ENTRIES PAGE_TO_PGTBL(KMEM_START_PAGE)
885 int cons_vmmap(struct rbtree *vmmap)
886 {
887 int i, j;
889 rb_init(vmmap, RB_KEY_INT);
891 for(i=0; i<USER_PGDIR_ENTRIES; i++) {
892 if(pgdir[i] & PG_PRESENT) {
893 /* page table is present, iterate through its 1024 pages */
894 uint32_t *pgtbl = PGTBL(i);
896 for(j=0; j<1024; j++) {
897 if(pgtbl[j] & PG_PRESENT) {
898 struct vm_page *vmp;
900 if(!(vmp = malloc(sizeof *vmp))) {
901 panic("cons_vmap failed to allocate memory");
902 }
903 vmp->vpage = i * 1024 + j;
904 vmp->ppage = ADDR_TO_PAGE(pgtbl[j] & PGENT_ADDR_MASK);
905 vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK;
906 vmp->nref = 1; /* when first created assume no sharing */
908 rb_inserti(vmmap, vmp->vpage, vmp);
909 }
910 }
911 }
912 }
914 return 0;
915 }
917 struct vm_page *get_vm_page(int vpg)
918 {
919 return get_vm_page_proc(get_current_proc(), vpg);
920 }
922 struct vm_page *get_vm_page_proc(struct process *p, int vpg)
923 {
924 struct rbnode *node;
926 if(!p || !(node = rb_findi(&p->vmmap, vpg))) {
927 return 0;
928 }
929 return node->data;
930 }
933 void dbg_print_vm(int area)
934 {
935 struct page_range *node;
936 int last, intr_state;
938 intr_state = get_intr_state();
939 disable_intr();
941 node = pglist[area];
942 last = area == MEM_USER ? 0 : ADDR_TO_PAGE(KMEM_START);
944 printf("%s vm space\n", area == MEM_USER ? "user" : "kernel");
946 while(node) {
947 if(node->start > last) {
948 printf(" vm-used: %x -> %x\n", PAGE_TO_ADDR(last), PAGE_TO_ADDR(node->start));
949 }
951 printf(" vm-free: %x -> ", PAGE_TO_ADDR(node->start));
952 if(node->end >= PAGE_COUNT) {
953 printf("END\n");
954 } else {
955 printf("%x\n", PAGE_TO_ADDR(node->end));
956 }
958 last = node->end;
959 node = node->next;
960 }
962 set_intr_state(intr_state);
963 }