kern

view src/vm.c @ 68:0a205396e1a0

- added a generic red-black tree data structure - added a VM map as an red-black tree of vm_pages in the process structure - constructed the vm map of the memory passed by the kernel initially to the first process.
author John Tsiombikas <nuclear@mutantstargoat.com>
date Mon, 10 Oct 2011 04:16:01 +0300
parents c2692696f9ab
children b45e2d5f0ae1
line source
1 #include <stdio.h>
2 #include <string.h>
3 #include <inttypes.h>
4 #include <assert.h>
5 #include "config.h"
6 #include "vm.h"
7 #include "intr.h"
8 #include "mem.h"
9 #include "panic.h"
10 #include "proc.h"
12 #define IDMAP_START 0xa0000
14 #define PGDIR_ADDR 0xfffff000
15 #define PGTBL_BASE (0xffffffff - 4096 * 1024 + 1)
16 #define PGTBL(x) ((uint32_t*)(PGTBL_BASE + PGSIZE * (x)))
18 #define ATTR_PGDIR_MASK 0x3f
19 #define ATTR_PGTBL_MASK 0x1ff
21 #define PAGEFAULT 14
24 struct page_range {
25 int start, end;
26 struct page_range *next;
27 };
29 /* defined in vm-asm.S */
30 void enable_paging(void);
31 void disable_paging(void);
32 int get_paging_status(void);
33 void set_pgdir_addr(uint32_t addr);
34 void flush_tlb(void);
35 void flush_tlb_addr(uint32_t addr);
36 #define flush_tlb_page(p) flush_tlb_addr(PAGE_TO_ADDR(p))
37 uint32_t get_fault_addr(void);
39 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high);
40 static void pgfault(int inum);
41 static struct page_range *alloc_node(void);
42 static void free_node(struct page_range *node);
44 /* page directory */
45 static uint32_t *pgdir;
47 /* 2 lists of free ranges, for kernel memory and user memory */
48 static struct page_range *pglist[2];
49 /* list of free page_range structures to be used in the lists */
50 static struct page_range *node_pool;
51 /* the first page range for the whole kernel address space, to get things started */
52 static struct page_range first_node;
55 void init_vm(void)
56 {
57 uint32_t idmap_end;
58 int i, kmem_start_pg, pgtbl_base_pg;
60 /* setup the page tables */
61 pgdir = (uint32_t*)alloc_phys_page();
62 memset(pgdir, 0, PGSIZE);
63 set_pgdir_addr((uint32_t)pgdir);
65 /* map the video memory and kernel code 1-1 */
66 get_kernel_mem_range(0, &idmap_end);
67 map_mem_range(IDMAP_START, idmap_end - IDMAP_START, IDMAP_START, 0);
69 /* make the last page directory entry point to the page directory */
70 pgdir[1023] = ((uint32_t)pgdir & PGENT_ADDR_MASK) | PG_PRESENT;
71 pgdir = (uint32_t*)PGDIR_ADDR;
73 /* set the page fault handler */
74 interrupt(PAGEFAULT, pgfault);
76 /* we can enable paging now */
77 enable_paging();
79 /* initialize the virtual page allocator */
80 node_pool = 0;
82 kmem_start_pg = ADDR_TO_PAGE(KMEM_START);
83 pgtbl_base_pg = ADDR_TO_PAGE(PGTBL_BASE);
85 first_node.start = kmem_start_pg;
86 first_node.end = pgtbl_base_pg;
87 first_node.next = 0;
88 pglist[MEM_KERNEL] = &first_node;
90 pglist[MEM_USER] = alloc_node();
91 pglist[MEM_USER]->start = ADDR_TO_PAGE(idmap_end);
92 pglist[MEM_USER]->end = kmem_start_pg;
93 pglist[MEM_USER]->next = 0;
95 /* temporaroly map something into every 1024th page of the kernel address
96 * space to force pre-allocation of all the kernel page-tables
97 */
98 for(i=kmem_start_pg; i<pgtbl_base_pg; i+=1024) {
99 /* if there's already something mapped here, leave it alone */
100 if(virt_to_phys_page(i) == -1) {
101 map_page(i, 0, 0);
102 unmap_page(i);
103 }
104 }
105 }
107 /* if ppage == -1 we allocate a physical page by calling alloc_phys_page */
108 int map_page(int vpage, int ppage, unsigned int attr)
109 {
110 uint32_t *pgtbl;
111 int diridx, pgidx, pgon, intr_state;
113 intr_state = get_intr_state();
114 disable_intr();
116 pgon = get_paging_status();
118 if(ppage < 0) {
119 uint32_t addr = alloc_phys_page();
120 if(!addr) {
121 set_intr_state(intr_state);
122 return -1;
123 }
124 ppage = ADDR_TO_PAGE(addr);
125 }
127 diridx = PAGE_TO_PGTBL(vpage);
128 pgidx = PAGE_TO_PGTBL_PG(vpage);
130 if(!(pgdir[diridx] & PG_PRESENT)) {
131 /* no page table present, we must allocate one */
132 uint32_t addr = alloc_phys_page();
134 /* make sure all page directory entries in the below the kernel vm
135 * split have the user and writable bits set, otherwise further user
136 * mappings on the same 4mb block will be unusable in user space.
137 */
138 unsigned int pgdir_attr = attr;
139 if(vpage < ADDR_TO_PAGE(KMEM_START)) {
140 pgdir_attr |= PG_USER | PG_WRITABLE;
141 }
143 pgdir[diridx] = addr | (pgdir_attr & ATTR_PGDIR_MASK) | PG_PRESENT;
145 pgtbl = pgon ? PGTBL(diridx) : (uint32_t*)addr;
146 memset(pgtbl, 0, PGSIZE);
147 } else {
148 if(pgon) {
149 pgtbl = PGTBL(diridx);
150 } else {
151 pgtbl = (uint32_t*)(pgdir[diridx] & PGENT_ADDR_MASK);
152 }
153 }
155 pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
156 flush_tlb_page(vpage);
158 set_intr_state(intr_state);
159 return 0;
160 }
162 int unmap_page(int vpage)
163 {
164 uint32_t *pgtbl;
165 int res = 0;
166 int diridx = PAGE_TO_PGTBL(vpage);
167 int pgidx = PAGE_TO_PGTBL_PG(vpage);
169 int intr_state = get_intr_state();
170 disable_intr();
172 if(!(pgdir[diridx] & PG_PRESENT)) {
173 goto err;
174 }
175 pgtbl = PGTBL(diridx);
177 if(!(pgtbl[pgidx] & PG_PRESENT)) {
178 goto err;
179 }
180 pgtbl[pgidx] = 0;
181 flush_tlb_page(vpage);
183 if(0) {
184 err:
185 printf("unmap_page(%d): page already not mapped\n", vpage);
186 res = -1;
187 }
188 set_intr_state(intr_state);
189 return res;
190 }
192 /* if ppg_start is -1, we allocate physical pages to map with alloc_phys_page() */
193 int map_page_range(int vpg_start, int pgcount, int ppg_start, unsigned int attr)
194 {
195 int i, phys_pg;
197 for(i=0; i<pgcount; i++) {
198 phys_pg = ppg_start < 0 ? -1 : ppg_start + i;
199 map_page(vpg_start + i, phys_pg, attr);
200 }
201 return 0;
202 }
204 int unmap_page_range(int vpg_start, int pgcount)
205 {
206 int i, res = 0;
208 for(i=0; i<pgcount; i++) {
209 if(unmap_page(vpg_start + i) == -1) {
210 res = -1;
211 }
212 }
213 return res;
214 }
216 /* if paddr is 0, we allocate physical pages with alloc_phys_page() */
217 int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr)
218 {
219 int vpg_start, ppg_start, num_pages;
221 if(!sz) return -1;
223 if(ADDR_TO_PGOFFS(paddr)) {
224 panic("map_mem_range called with unaligned physical address: %x\n", paddr);
225 }
227 vpg_start = ADDR_TO_PAGE(vaddr);
228 ppg_start = paddr > 0 ? ADDR_TO_PAGE(paddr) : -1;
229 num_pages = ADDR_TO_PAGE(sz) + 1;
231 return map_page_range(vpg_start, num_pages, ppg_start, attr);
232 }
234 uint32_t virt_to_phys(uint32_t vaddr)
235 {
236 int pg;
237 uint32_t pgaddr;
239 if((pg = virt_to_phys_page(ADDR_TO_PAGE(vaddr))) == -1) {
240 return 0;
241 }
242 pgaddr = PAGE_TO_ADDR(pg);
244 return pgaddr | ADDR_TO_PGOFFS(vaddr);
245 }
247 int virt_to_phys_page(int vpg)
248 {
249 uint32_t pgaddr, *pgtbl;
250 int diridx, pgidx;
252 if(vpg < 0 || vpg >= PAGE_COUNT) {
253 return -1;
254 }
256 diridx = PAGE_TO_PGTBL(vpg);
257 pgidx = PAGE_TO_PGTBL_PG(vpg);
259 if(!(pgdir[diridx] & PG_PRESENT)) {
260 return -1;
261 }
262 pgtbl = PGTBL(diridx);
264 if(!(pgtbl[pgidx] & PG_PRESENT)) {
265 return -1;
266 }
267 pgaddr = pgtbl[pgidx] & PGENT_ADDR_MASK;
268 return ADDR_TO_PAGE(pgaddr);
269 }
271 /* allocate a contiguous block of virtual memory pages along with
272 * backing physical memory for them, and update the page table.
273 */
274 int pgalloc(int num, int area)
275 {
276 int intr_state, ret = -1;
277 struct page_range *node, *prev, dummy;
279 intr_state = get_intr_state();
280 disable_intr();
282 dummy.next = pglist[area];
283 node = pglist[area];
284 prev = &dummy;
286 while(node) {
287 if(node->end - node->start >= num) {
288 ret = node->start;
289 node->start += num;
291 if(node->start == node->end) {
292 prev->next = node->next;
293 node->next = 0;
295 if(node == pglist[area]) {
296 pglist[area] = 0;
297 }
298 free_node(node);
299 }
300 break;
301 }
303 prev = node;
304 node = node->next;
305 }
307 if(ret >= 0) {
308 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
309 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
311 /* allocate physical storage and map */
312 if(map_page_range(ret, num, -1, attr) == -1) {
313 ret = -1;
314 }
315 }
317 set_intr_state(intr_state);
318 return ret;
319 }
321 int pgalloc_vrange(int start, int num)
322 {
323 struct page_range *node, *prev, dummy;
324 int area, intr_state, ret = -1;
326 area = (start >= ADDR_TO_PAGE(KMEM_START)) ? MEM_KERNEL : MEM_USER;
327 if(area == MEM_USER && start + num > ADDR_TO_PAGE(KMEM_START)) {
328 printf("pgalloc_vrange: invalid range request crossing user/kernel split\n");
329 return -1;
330 }
332 intr_state = get_intr_state();
333 disable_intr();
335 dummy.next = pglist[area];
336 node = pglist[area];
337 prev = &dummy;
339 /* check to see if the requested VM range is available */
340 node = pglist[area];
341 while(node) {
342 if(start >= node->start && start + num <= node->end) {
343 ret = start; /* can do .. */
345 if(start == node->start) {
346 /* adjacent to the start of the range */
347 node->start += num;
348 } else if(start + num == node->end) {
349 /* adjacent to the end of the range */
350 node->end = start;
351 } else {
352 /* somewhere in the middle, which means we need
353 * to allocate a new page_range
354 */
355 struct page_range *newnode;
357 if(!(newnode = alloc_node())) {
358 panic("pgalloc_vrange failed to allocate new page_range while splitting a range in half... bummer\n");
359 }
360 newnode->start = start + num;
361 newnode->end = node->end;
362 newnode->next = node->next;
364 node->end = start;
365 node->next = newnode;
366 /* no need to check for null nodes at this point, there's
367 * certainly stuff at the begining and the end, otherwise we
368 * wouldn't be here. so break out of it.
369 */
370 break;
371 }
373 if(node->start == node->end) {
374 prev->next = node->next;
375 node->next = 0;
377 if(node == pglist[area]) {
378 pglist[area] = 0;
379 }
380 free_node(node);
381 }
382 break;
383 }
385 prev = node;
386 node = node->next;
387 }
389 if(ret >= 0) {
390 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
391 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
393 /* allocate physical storage and map */
394 if(map_page_range(ret, num, -1, attr) == -1) {
395 ret = -1;
396 }
397 }
399 set_intr_state(intr_state);
400 return ret;
401 }
403 void pgfree(int start, int num)
404 {
405 int i, area, intr_state;
406 struct page_range *node, *new, *prev, *next;
408 intr_state = get_intr_state();
409 disable_intr();
411 for(i=0; i<num; i++) {
412 int phys_pg = virt_to_phys_page(start + i);
413 if(phys_pg != -1) {
414 free_phys_page(phys_pg);
415 }
416 }
418 if(!(new = alloc_node())) {
419 panic("pgfree: can't allocate new page_range node to add the freed pages\n");
420 }
421 new->start = start;
422 new->end = start + num;
424 area = PAGE_TO_ADDR(start) >= KMEM_START ? MEM_KERNEL : MEM_USER;
426 if(!pglist[area] || pglist[area]->start > start) {
427 next = new->next = pglist[area];
428 pglist[area] = new;
429 prev = 0;
431 } else {
433 prev = 0;
434 node = pglist[area];
435 next = node ? node->next : 0;
437 while(node) {
438 if(!next || next->start > start) {
439 /* place here, after node */
440 new->next = next;
441 node->next = new;
442 prev = node; /* needed by coalesce after the loop */
443 break;
444 }
446 prev = node;
447 node = next;
448 next = node ? node->next : 0;
449 }
450 }
452 coalesce(prev, new, next);
453 set_intr_state(intr_state);
454 }
456 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high)
457 {
458 if(high) {
459 if(mid->end == high->start) {
460 mid->end = high->end;
461 mid->next = high->next;
462 free_node(high);
463 }
464 }
466 if(low) {
467 if(low->end == mid->start) {
468 low->end += mid->end;
469 low->next = mid->next;
470 free_node(mid);
471 }
472 }
473 }
475 static void pgfault(int inum)
476 {
477 struct intr_frame *frm = get_intr_frame();
478 uint32_t fault_addr = get_fault_addr();
480 /* the fault occured in user space */
481 if(frm->err & PG_USER) {
482 int fault_page = ADDR_TO_PAGE(fault_addr);
483 struct process *proc = get_current_proc();
484 printf("DBG: page fault in user space\n");
485 assert(proc);
487 if(frm->err & PG_PRESENT) {
488 /* it's not due to a missing page, just panic */
489 goto unhandled;
490 }
492 /* detect if it's an automatic stack growth deal */
493 if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) {
494 int num_pages = proc->user_stack_pg - fault_page;
495 printf("growing user (%d) stack by %d pages\n", proc->id, num_pages);
497 if(pgalloc_vrange(fault_page, num_pages) != fault_page) {
498 printf("failed to allocate VM for stack growth\n");
499 /* TODO: in the future we'd SIGSEGV the process here, for now just panic */
500 goto unhandled;
501 }
502 proc->user_stack_pg = fault_page;
503 return;
504 }
505 }
507 unhandled:
508 printf("~~~~ PAGE FAULT ~~~~\n");
509 printf("fault address: %x\n", fault_addr);
511 if(frm->err & PG_PRESENT) {
512 if(frm->err & 8) {
513 printf("reserved bit set in some paging structure\n");
514 } else {
515 printf("%s protection violation ", (frm->err & PG_WRITABLE) ? "WRITE" : "READ");
516 printf("in %s mode\n", (frm->err & PG_USER) ? "user" : "kernel");
517 }
518 } else {
519 printf("page not present\n");
520 }
522 panic("unhandled page fault\n");
523 }
525 /* --- page range list node management --- */
526 #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range))
528 static struct page_range *alloc_node(void)
529 {
530 struct page_range *node;
531 int pg, i;
533 if(node_pool) {
534 node = node_pool;
535 node_pool = node_pool->next;
536 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
537 return node;
538 }
540 /* no node structures in the pool, we need to allocate a new page,
541 * split it up into node structures, add them in the pool, and
542 * allocate one of them.
543 */
544 if(!(pg = pgalloc(1, MEM_KERNEL))) {
545 panic("ran out of physical memory while allocating VM range structures\n");
546 }
547 node_pool = (struct page_range*)PAGE_TO_ADDR(pg);
549 /* link them up, skip the first as we'll just allocate it anyway */
550 for(i=2; i<NODES_IN_PAGE; i++) {
551 node_pool[i - 1].next = node_pool + i;
552 }
553 node_pool[NODES_IN_PAGE - 1].next = 0;
555 /* grab the first and return it */
556 node = node_pool++;
557 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
558 return node;
559 }
561 static void free_node(struct page_range *node)
562 {
563 node->next = node_pool;
564 node_pool = node;
565 /*printf("free_node\n");*/
566 }
568 /* clone_vm makes a copy of the current page tables, thus duplicating the
569 * virtual address space.
570 *
571 * For the kernel part of the address space (last 256 page directory entries)
572 * we don't want to diplicate the page tables, just point all page directory
573 * entries to the same set of page tables.
574 *
575 * If "cow" is non-zero it also marks the shared user-space pages as
576 * read-only, to implement copy-on-write.
577 *
578 * Returns the physical address of the new page directory.
579 */
580 uint32_t clone_vm(int cow)
581 {
582 int i, j, dirpg, tblpg, kstart_dirent;
583 uint32_t paddr;
584 uint32_t *ndir, *ntbl;
586 /* allocate the new page directory */
587 if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) {
588 panic("clone_vmem: failed to allocate page directory page\n");
589 }
590 ndir = (uint32_t*)PAGE_TO_ADDR(dirpg);
592 /* allocate a virtual page for temporarily mapping all new
593 * page tables while we populate them.
594 */
595 if((tblpg = pgalloc(1, MEM_KERNEL)) == -1) {
596 panic("clone_vmem: failed to allocate page table page\n");
597 }
598 ntbl = (uint32_t*)PAGE_TO_ADDR(tblpg);
600 /* we will allocate physical pages and map them to this virtual page
601 * as needed in the loop below. we don't need the physical page allocated
602 * by pgalloc.
603 */
604 free_phys_page(virt_to_phys((uint32_t)ntbl));
606 kstart_dirent = ADDR_TO_PAGE(KMEM_START) / 1024;
608 /* user space */
609 for(i=0; i<kstart_dirent; i++) {
610 if(pgdir[i] & PG_PRESENT) {
611 if(cow) {
612 /* first go through all the entries of the existing
613 * page table and unset the writable bits.
614 */
615 for(j=0; j<1024; j++) {
616 clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY);
617 /*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/
618 }
619 }
621 /* allocate a page table for the clone */
622 paddr = alloc_phys_page();
624 /* copy the page table */
625 map_page(tblpg, ADDR_TO_PAGE(paddr), 0);
626 memcpy(ntbl, PGTBL(i), PGSIZE);
628 /* set the new page directory entry */
629 ndir[i] = paddr | (pgdir[i] & PGOFFS_MASK);
630 } else {
631 ndir[i] = 0;
632 }
633 }
635 /* for the kernel space we'll just use the same page tables */
636 for(i=kstart_dirent; i<1024; i++) {
637 ndir[i] = pgdir[i];
638 }
640 if(cow) {
641 /* we just changed all the page protection bits, so we need to flush the TLB */
642 flush_tlb();
643 }
645 paddr = virt_to_phys((uint32_t)ndir);
647 /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */
648 unmap_page(dirpg);
649 unmap_page(tblpg);
651 pgfree(dirpg, 1);
652 pgfree(tblpg, 1);
654 return paddr;
655 }
657 int get_page_bit(int pgnum, uint32_t bit, int wholepath)
658 {
659 int tidx = PAGE_TO_PGTBL(pgnum);
660 int tent = PAGE_TO_PGTBL_PG(pgnum);
661 uint32_t *pgtbl = PGTBL(tidx);
663 if(wholepath) {
664 if((pgdir[tidx] & bit) == 0) {
665 return 0;
666 }
667 }
669 return pgtbl[tent] & bit;
670 }
672 void set_page_bit(int pgnum, uint32_t bit, int wholepath)
673 {
674 int tidx = PAGE_TO_PGTBL(pgnum);
675 int tent = PAGE_TO_PGTBL_PG(pgnum);
676 uint32_t *pgtbl = PGTBL(tidx);
678 if(wholepath) {
679 pgdir[tidx] |= bit;
680 }
681 pgtbl[tent] |= bit;
683 flush_tlb_page(pgnum);
684 }
686 void clear_page_bit(int pgnum, uint32_t bit, int wholepath)
687 {
688 int tidx = PAGE_TO_PGTBL(pgnum);
689 int tent = PAGE_TO_PGTBL_PG(pgnum);
690 uint32_t *pgtbl = PGTBL(tidx);
692 if(wholepath) {
693 pgdir[tidx] &= ~bit;
694 }
696 pgtbl[tent] &= ~bit;
698 flush_tlb_page(pgnum);
699 }
702 #define USER_PGDIR_ENTRIES PAGE_TO_PGTBL(KMEM_START_PAGE)
703 int cons_vmmap(struct rbtree *vmmap)
704 {
705 int i, j;
707 rb_init(vmmap, RB_KEY_INT);
709 for(i=0; i<USER_PGDIR_ENTRIES; i++) {
710 if(pgdir[i] & PG_PRESENT) {
711 /* page table is present, iterate through its 1024 pages */
712 uint32_t *pgtbl = PGTBL(i);
714 for(j=0; j<1024; j++) {
715 if(pgtbl[j] & PG_PRESENT) {
716 struct vm_page *vmp;
718 if(!(vmp = malloc(sizeof *vmp))) {
719 panic("cons_vmap failed to allocate memory");
720 }
721 vmp->vpage = i * 1024 + j;
722 vmp->ppage = ADDR_TO_PAGE(pgtbl[j] & PGENT_ADDR_MASK);
723 vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK;
724 vmp->nref = 1; /* when first created assume no sharing */
726 rb_inserti(vmmap, vmp->ppage, vmp);
727 }
728 }
729 }
730 }
732 return 0;
733 }
736 void dbg_print_vm(int area)
737 {
738 struct page_range *node;
739 int last, intr_state;
741 intr_state = get_intr_state();
742 disable_intr();
744 node = pglist[area];
745 last = area == MEM_USER ? 0 : ADDR_TO_PAGE(KMEM_START);
747 printf("%s vm space\n", area == MEM_USER ? "user" : "kernel");
749 while(node) {
750 if(node->start > last) {
751 printf(" vm-used: %x -> %x\n", PAGE_TO_ADDR(last), PAGE_TO_ADDR(node->start));
752 }
754 printf(" vm-free: %x -> ", PAGE_TO_ADDR(node->start));
755 if(node->end >= PAGE_COUNT) {
756 printf("END\n");
757 } else {
758 printf("%x\n", PAGE_TO_ADDR(node->end));
759 }
761 last = node->end;
762 node = node->next;
763 }
765 set_intr_state(intr_state);
766 }