kern
view src/vm.c @ 64:c2692696f9ab
forgot to handle the cow argument to clone_vm
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Wed, 17 Aug 2011 05:43:47 +0300 |
parents | 437360696883 |
children | 0a205396e1a0 |
line source
1 #include <stdio.h>
2 #include <string.h>
3 #include <inttypes.h>
4 #include <assert.h>
5 #include "config.h"
6 #include "vm.h"
7 #include "intr.h"
8 #include "mem.h"
9 #include "panic.h"
10 #include "proc.h"
12 #define IDMAP_START 0xa0000
14 #define PGDIR_ADDR 0xfffff000
15 #define PGTBL_BASE (0xffffffff - 4096 * 1024 + 1)
16 #define PGTBL(x) ((uint32_t*)(PGTBL_BASE + PGSIZE * (x)))
18 #define ATTR_PGDIR_MASK 0x3f
19 #define ATTR_PGTBL_MASK 0x1ff
20 #define ADDR_PGENT_MASK 0xfffff000
22 #define PAGEFAULT 14
25 struct page_range {
26 int start, end;
27 struct page_range *next;
28 };
30 /* defined in vm-asm.S */
31 void enable_paging(void);
32 void disable_paging(void);
33 int get_paging_status(void);
34 void set_pgdir_addr(uint32_t addr);
35 void flush_tlb(void);
36 void flush_tlb_addr(uint32_t addr);
37 #define flush_tlb_page(p) flush_tlb_addr(PAGE_TO_ADDR(p))
38 uint32_t get_fault_addr(void);
40 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high);
41 static void pgfault(int inum);
42 static struct page_range *alloc_node(void);
43 static void free_node(struct page_range *node);
45 /* page directory */
46 static uint32_t *pgdir;
48 /* 2 lists of free ranges, for kernel memory and user memory */
49 static struct page_range *pglist[2];
50 /* list of free page_range structures to be used in the lists */
51 static struct page_range *node_pool;
52 /* the first page range for the whole kernel address space, to get things started */
53 static struct page_range first_node;
56 void init_vm(void)
57 {
58 uint32_t idmap_end;
59 int i, kmem_start_pg, pgtbl_base_pg;
61 /* setup the page tables */
62 pgdir = (uint32_t*)alloc_phys_page();
63 memset(pgdir, 0, PGSIZE);
64 set_pgdir_addr((uint32_t)pgdir);
66 /* map the video memory and kernel code 1-1 */
67 get_kernel_mem_range(0, &idmap_end);
68 map_mem_range(IDMAP_START, idmap_end - IDMAP_START, IDMAP_START, 0);
70 /* make the last page directory entry point to the page directory */
71 pgdir[1023] = ((uint32_t)pgdir & ADDR_PGENT_MASK) | PG_PRESENT;
72 pgdir = (uint32_t*)PGDIR_ADDR;
74 /* set the page fault handler */
75 interrupt(PAGEFAULT, pgfault);
77 /* we can enable paging now */
78 enable_paging();
80 /* initialize the virtual page allocator */
81 node_pool = 0;
83 kmem_start_pg = ADDR_TO_PAGE(KMEM_START);
84 pgtbl_base_pg = ADDR_TO_PAGE(PGTBL_BASE);
86 first_node.start = kmem_start_pg;
87 first_node.end = pgtbl_base_pg;
88 first_node.next = 0;
89 pglist[MEM_KERNEL] = &first_node;
91 pglist[MEM_USER] = alloc_node();
92 pglist[MEM_USER]->start = ADDR_TO_PAGE(idmap_end);
93 pglist[MEM_USER]->end = kmem_start_pg;
94 pglist[MEM_USER]->next = 0;
96 /* temporaroly map something into every 1024th page of the kernel address
97 * space to force pre-allocation of all the kernel page-tables
98 */
99 for(i=kmem_start_pg; i<pgtbl_base_pg; i+=1024) {
100 /* if there's already something mapped here, leave it alone */
101 if(virt_to_phys_page(i) == -1) {
102 map_page(i, 0, 0);
103 unmap_page(i);
104 }
105 }
106 }
108 /* if ppage == -1 we allocate a physical page by calling alloc_phys_page */
109 int map_page(int vpage, int ppage, unsigned int attr)
110 {
111 uint32_t *pgtbl;
112 int diridx, pgidx, pgon, intr_state;
114 intr_state = get_intr_state();
115 disable_intr();
117 pgon = get_paging_status();
119 if(ppage < 0) {
120 uint32_t addr = alloc_phys_page();
121 if(!addr) {
122 set_intr_state(intr_state);
123 return -1;
124 }
125 ppage = ADDR_TO_PAGE(addr);
126 }
128 diridx = PAGE_TO_PGTBL(vpage);
129 pgidx = PAGE_TO_PGTBL_PG(vpage);
131 if(!(pgdir[diridx] & PG_PRESENT)) {
132 /* no page table present, we must allocate one */
133 uint32_t addr = alloc_phys_page();
135 /* make sure all page directory entries in the below the kernel vm
136 * split have the user and writable bits set, otherwise further user
137 * mappings on the same 4mb block will be unusable in user space.
138 */
139 unsigned int pgdir_attr = attr;
140 if(vpage < ADDR_TO_PAGE(KMEM_START)) {
141 pgdir_attr |= PG_USER | PG_WRITABLE;
142 }
144 pgdir[diridx] = addr | (pgdir_attr & ATTR_PGDIR_MASK) | PG_PRESENT;
146 pgtbl = pgon ? PGTBL(diridx) : (uint32_t*)addr;
147 memset(pgtbl, 0, PGSIZE);
148 } else {
149 if(pgon) {
150 pgtbl = PGTBL(diridx);
151 } else {
152 pgtbl = (uint32_t*)(pgdir[diridx] & ADDR_PGENT_MASK);
153 }
154 }
156 pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
157 flush_tlb_page(vpage);
159 set_intr_state(intr_state);
160 return 0;
161 }
163 int unmap_page(int vpage)
164 {
165 uint32_t *pgtbl;
166 int res = 0;
167 int diridx = PAGE_TO_PGTBL(vpage);
168 int pgidx = PAGE_TO_PGTBL_PG(vpage);
170 int intr_state = get_intr_state();
171 disable_intr();
173 if(!(pgdir[diridx] & PG_PRESENT)) {
174 goto err;
175 }
176 pgtbl = PGTBL(diridx);
178 if(!(pgtbl[pgidx] & PG_PRESENT)) {
179 goto err;
180 }
181 pgtbl[pgidx] = 0;
182 flush_tlb_page(vpage);
184 if(0) {
185 err:
186 printf("unmap_page(%d): page already not mapped\n", vpage);
187 res = -1;
188 }
189 set_intr_state(intr_state);
190 return res;
191 }
193 /* if ppg_start is -1, we allocate physical pages to map with alloc_phys_page() */
194 int map_page_range(int vpg_start, int pgcount, int ppg_start, unsigned int attr)
195 {
196 int i, phys_pg;
198 for(i=0; i<pgcount; i++) {
199 phys_pg = ppg_start < 0 ? -1 : ppg_start + i;
200 map_page(vpg_start + i, phys_pg, attr);
201 }
202 return 0;
203 }
205 int unmap_page_range(int vpg_start, int pgcount)
206 {
207 int i, res = 0;
209 for(i=0; i<pgcount; i++) {
210 if(unmap_page(vpg_start + i) == -1) {
211 res = -1;
212 }
213 }
214 return res;
215 }
217 /* if paddr is 0, we allocate physical pages with alloc_phys_page() */
218 int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr)
219 {
220 int vpg_start, ppg_start, num_pages;
222 if(!sz) return -1;
224 if(ADDR_TO_PGOFFS(paddr)) {
225 panic("map_mem_range called with unaligned physical address: %x\n", paddr);
226 }
228 vpg_start = ADDR_TO_PAGE(vaddr);
229 ppg_start = paddr > 0 ? ADDR_TO_PAGE(paddr) : -1;
230 num_pages = ADDR_TO_PAGE(sz) + 1;
232 return map_page_range(vpg_start, num_pages, ppg_start, attr);
233 }
235 uint32_t virt_to_phys(uint32_t vaddr)
236 {
237 int pg;
238 uint32_t pgaddr;
240 if((pg = virt_to_phys_page(ADDR_TO_PAGE(vaddr))) == -1) {
241 return 0;
242 }
243 pgaddr = PAGE_TO_ADDR(pg);
245 return pgaddr | ADDR_TO_PGOFFS(vaddr);
246 }
248 int virt_to_phys_page(int vpg)
249 {
250 uint32_t pgaddr, *pgtbl;
251 int diridx, pgidx;
253 if(vpg < 0 || vpg >= PAGE_COUNT) {
254 return -1;
255 }
257 diridx = PAGE_TO_PGTBL(vpg);
258 pgidx = PAGE_TO_PGTBL_PG(vpg);
260 if(!(pgdir[diridx] & PG_PRESENT)) {
261 return -1;
262 }
263 pgtbl = PGTBL(diridx);
265 if(!(pgtbl[pgidx] & PG_PRESENT)) {
266 return -1;
267 }
268 pgaddr = pgtbl[pgidx] & PGENT_ADDR_MASK;
269 return ADDR_TO_PAGE(pgaddr);
270 }
272 /* allocate a contiguous block of virtual memory pages along with
273 * backing physical memory for them, and update the page table.
274 */
275 int pgalloc(int num, int area)
276 {
277 int intr_state, ret = -1;
278 struct page_range *node, *prev, dummy;
280 intr_state = get_intr_state();
281 disable_intr();
283 dummy.next = pglist[area];
284 node = pglist[area];
285 prev = &dummy;
287 while(node) {
288 if(node->end - node->start >= num) {
289 ret = node->start;
290 node->start += num;
292 if(node->start == node->end) {
293 prev->next = node->next;
294 node->next = 0;
296 if(node == pglist[area]) {
297 pglist[area] = 0;
298 }
299 free_node(node);
300 }
301 break;
302 }
304 prev = node;
305 node = node->next;
306 }
308 if(ret >= 0) {
309 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
310 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
312 /* allocate physical storage and map */
313 if(map_page_range(ret, num, -1, attr) == -1) {
314 ret = -1;
315 }
316 }
318 set_intr_state(intr_state);
319 return ret;
320 }
322 int pgalloc_vrange(int start, int num)
323 {
324 struct page_range *node, *prev, dummy;
325 int area, intr_state, ret = -1;
327 area = (start >= ADDR_TO_PAGE(KMEM_START)) ? MEM_KERNEL : MEM_USER;
328 if(area == MEM_USER && start + num > ADDR_TO_PAGE(KMEM_START)) {
329 printf("pgalloc_vrange: invalid range request crossing user/kernel split\n");
330 return -1;
331 }
333 intr_state = get_intr_state();
334 disable_intr();
336 dummy.next = pglist[area];
337 node = pglist[area];
338 prev = &dummy;
340 /* check to see if the requested VM range is available */
341 node = pglist[area];
342 while(node) {
343 if(start >= node->start && start + num <= node->end) {
344 ret = start; /* can do .. */
346 if(start == node->start) {
347 /* adjacent to the start of the range */
348 node->start += num;
349 } else if(start + num == node->end) {
350 /* adjacent to the end of the range */
351 node->end = start;
352 } else {
353 /* somewhere in the middle, which means we need
354 * to allocate a new page_range
355 */
356 struct page_range *newnode;
358 if(!(newnode = alloc_node())) {
359 panic("pgalloc_vrange failed to allocate new page_range while splitting a range in half... bummer\n");
360 }
361 newnode->start = start + num;
362 newnode->end = node->end;
363 newnode->next = node->next;
365 node->end = start;
366 node->next = newnode;
367 /* no need to check for null nodes at this point, there's
368 * certainly stuff at the begining and the end, otherwise we
369 * wouldn't be here. so break out of it.
370 */
371 break;
372 }
374 if(node->start == node->end) {
375 prev->next = node->next;
376 node->next = 0;
378 if(node == pglist[area]) {
379 pglist[area] = 0;
380 }
381 free_node(node);
382 }
383 break;
384 }
386 prev = node;
387 node = node->next;
388 }
390 if(ret >= 0) {
391 /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/
392 unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0;
394 /* allocate physical storage and map */
395 if(map_page_range(ret, num, -1, attr) == -1) {
396 ret = -1;
397 }
398 }
400 set_intr_state(intr_state);
401 return ret;
402 }
404 void pgfree(int start, int num)
405 {
406 int i, area, intr_state;
407 struct page_range *node, *new, *prev, *next;
409 intr_state = get_intr_state();
410 disable_intr();
412 for(i=0; i<num; i++) {
413 int phys_pg = virt_to_phys_page(start + i);
414 if(phys_pg != -1) {
415 free_phys_page(phys_pg);
416 }
417 }
419 if(!(new = alloc_node())) {
420 panic("pgfree: can't allocate new page_range node to add the freed pages\n");
421 }
422 new->start = start;
423 new->end = start + num;
425 area = PAGE_TO_ADDR(start) >= KMEM_START ? MEM_KERNEL : MEM_USER;
427 if(!pglist[area] || pglist[area]->start > start) {
428 next = new->next = pglist[area];
429 pglist[area] = new;
430 prev = 0;
432 } else {
434 prev = 0;
435 node = pglist[area];
436 next = node ? node->next : 0;
438 while(node) {
439 if(!next || next->start > start) {
440 /* place here, after node */
441 new->next = next;
442 node->next = new;
443 prev = node; /* needed by coalesce after the loop */
444 break;
445 }
447 prev = node;
448 node = next;
449 next = node ? node->next : 0;
450 }
451 }
453 coalesce(prev, new, next);
454 set_intr_state(intr_state);
455 }
457 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high)
458 {
459 if(high) {
460 if(mid->end == high->start) {
461 mid->end = high->end;
462 mid->next = high->next;
463 free_node(high);
464 }
465 }
467 if(low) {
468 if(low->end == mid->start) {
469 low->end += mid->end;
470 low->next = mid->next;
471 free_node(mid);
472 }
473 }
474 }
476 static void pgfault(int inum)
477 {
478 struct intr_frame *frm = get_intr_frame();
479 uint32_t fault_addr = get_fault_addr();
481 /* the fault occured in user space */
482 if(frm->err & PG_USER) {
483 int fault_page = ADDR_TO_PAGE(fault_addr);
484 struct process *proc = get_current_proc();
485 printf("DBG: page fault in user space\n");
486 assert(proc);
488 if(frm->err & PG_PRESENT) {
489 /* it's not due to a missing page, just panic */
490 goto unhandled;
491 }
493 /* detect if it's an automatic stack growth deal */
494 if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) {
495 int num_pages = proc->user_stack_pg - fault_page;
496 printf("growing user (%d) stack by %d pages\n", proc->id, num_pages);
498 if(pgalloc_vrange(fault_page, num_pages) != fault_page) {
499 printf("failed to allocate VM for stack growth\n");
500 /* TODO: in the future we'd SIGSEGV the process here, for now just panic */
501 goto unhandled;
502 }
503 proc->user_stack_pg = fault_page;
504 return;
505 }
506 }
508 unhandled:
509 printf("~~~~ PAGE FAULT ~~~~\n");
510 printf("fault address: %x\n", fault_addr);
512 if(frm->err & PG_PRESENT) {
513 if(frm->err & 8) {
514 printf("reserved bit set in some paging structure\n");
515 } else {
516 printf("%s protection violation ", (frm->err & PG_WRITABLE) ? "WRITE" : "READ");
517 printf("in %s mode\n", (frm->err & PG_USER) ? "user" : "kernel");
518 }
519 } else {
520 printf("page not present\n");
521 }
523 panic("unhandled page fault\n");
524 }
526 /* --- page range list node management --- */
527 #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range))
529 static struct page_range *alloc_node(void)
530 {
531 struct page_range *node;
532 int pg, i;
534 if(node_pool) {
535 node = node_pool;
536 node_pool = node_pool->next;
537 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
538 return node;
539 }
541 /* no node structures in the pool, we need to allocate a new page,
542 * split it up into node structures, add them in the pool, and
543 * allocate one of them.
544 */
545 if(!(pg = pgalloc(1, MEM_KERNEL))) {
546 panic("ran out of physical memory while allocating VM range structures\n");
547 }
548 node_pool = (struct page_range*)PAGE_TO_ADDR(pg);
550 /* link them up, skip the first as we'll just allocate it anyway */
551 for(i=2; i<NODES_IN_PAGE; i++) {
552 node_pool[i - 1].next = node_pool + i;
553 }
554 node_pool[NODES_IN_PAGE - 1].next = 0;
556 /* grab the first and return it */
557 node = node_pool++;
558 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
559 return node;
560 }
562 static void free_node(struct page_range *node)
563 {
564 node->next = node_pool;
565 node_pool = node;
566 /*printf("free_node\n");*/
567 }
569 /* clone_vm makes a copy of the current page tables, thus duplicating the
570 * virtual address space.
571 *
572 * For the kernel part of the address space (last 256 page directory entries)
573 * we don't want to diplicate the page tables, just point all page directory
574 * entries to the same set of page tables.
575 *
576 * If "cow" is non-zero it also marks the shared user-space pages as
577 * read-only, to implement copy-on-write.
578 *
579 * Returns the physical address of the new page directory.
580 */
581 uint32_t clone_vm(int cow)
582 {
583 int i, j, dirpg, tblpg, kstart_dirent;
584 uint32_t paddr;
585 uint32_t *ndir, *ntbl;
587 /* allocate the new page directory */
588 if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) {
589 panic("clone_vmem: failed to allocate page directory page\n");
590 }
591 ndir = (uint32_t*)PAGE_TO_ADDR(dirpg);
593 /* allocate a virtual page for temporarily mapping all new
594 * page tables while we populate them.
595 */
596 if((tblpg = pgalloc(1, MEM_KERNEL)) == -1) {
597 panic("clone_vmem: failed to allocate page table page\n");
598 }
599 ntbl = (uint32_t*)PAGE_TO_ADDR(tblpg);
601 /* we will allocate physical pages and map them to this virtual page
602 * as needed in the loop below. we don't need the physical page allocated
603 * by pgalloc.
604 */
605 free_phys_page(virt_to_phys((uint32_t)ntbl));
607 kstart_dirent = ADDR_TO_PAGE(KMEM_START) / 1024;
609 /* user space */
610 for(i=0; i<kstart_dirent; i++) {
611 if(pgdir[i] & PG_PRESENT) {
612 if(cow) {
613 /* first go through all the entries of the existing
614 * page table and unset the writable bits.
615 */
616 for(j=0; j<1024; j++) {
617 PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;
618 }
619 }
621 /* allocate a page table for the clone */
622 paddr = alloc_phys_page();
624 /* copy the page table */
625 map_page(tblpg, ADDR_TO_PAGE(paddr), 0);
626 memcpy(ntbl, PGTBL(i), PGSIZE);
628 /* set the new page directory entry */
629 ndir[i] = paddr | (pgdir[i] & PGOFFS_MASK);
630 } else {
631 ndir[i] = 0;
632 }
633 }
635 /* for the kernel space we'll just use the same page tables */
636 for(i=kstart_dirent; i<1024; i++) {
637 ndir[i] = pgdir[i];
638 }
640 if(cow) {
641 /* we just changed all the page protection bits, so we need to flush the TLB */
642 flush_tlb();
643 }
645 paddr = virt_to_phys((uint32_t)ndir);
647 /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */
648 unmap_page(dirpg);
649 unmap_page(tblpg);
651 pgfree(dirpg, 1);
652 pgfree(tblpg, 1);
654 return paddr;
655 }
657 int get_page_bit(int pgnum, uint32_t bit, int wholepath)
658 {
659 int tidx = PAGE_TO_PGTBL(pgnum);
660 int tent = PAGE_TO_PGTBL_PG(pgnum);
661 uint32_t *pgtbl = PGTBL(tidx);
663 if(wholepath) {
664 if((pgdir[tidx] & bit) == 0) {
665 return 0;
666 }
667 }
669 return pgtbl[tent] & bit;
670 }
672 void set_page_bit(int pgnum, uint32_t bit, int wholepath)
673 {
674 int tidx = PAGE_TO_PGTBL(pgnum);
675 int tent = PAGE_TO_PGTBL_PG(pgnum);
676 uint32_t *pgtbl = PGTBL(tidx);
678 if(wholepath) {
679 pgdir[tidx] |= bit;
680 }
681 pgtbl[tent] |= bit;
683 flush_tlb_page(pgnum);
684 }
686 void clear_page_bit(int pgnum, uint32_t bit, int wholepath)
687 {
688 int tidx = PAGE_TO_PGTBL(pgnum);
689 int tent = PAGE_TO_PGTBL_PG(pgnum);
690 uint32_t *pgtbl = PGTBL(tidx);
692 if(wholepath) {
693 pgdir[tidx] &= ~bit;
694 }
696 pgtbl[tent] &= ~bit;
698 flush_tlb_page(pgnum);
699 }
702 void dbg_print_vm(int area)
703 {
704 struct page_range *node;
705 int last, intr_state;
707 intr_state = get_intr_state();
708 disable_intr();
710 node = pglist[area];
711 last = area == MEM_USER ? 0 : ADDR_TO_PAGE(KMEM_START);
713 printf("%s vm space\n", area == MEM_USER ? "user" : "kernel");
715 while(node) {
716 if(node->start > last) {
717 printf(" vm-used: %x -> %x\n", PAGE_TO_ADDR(last), PAGE_TO_ADDR(node->start));
718 }
720 printf(" vm-free: %x -> ", PAGE_TO_ADDR(node->start));
721 if(node->end >= PAGE_COUNT) {
722 printf("END\n");
723 } else {
724 printf("%x\n", PAGE_TO_ADDR(node->end));
725 }
727 last = node->end;
728 node = node->next;
729 }
731 set_intr_state(intr_state);
732 }