kern

view src/vm.c @ 52:fa65b4f45366

picking this up again, let's fix it
author John Tsiombikas <nuclear@member.fsf.org>
date Sun, 07 Aug 2011 06:42:00 +0300
parents b1e8c8251884
children 88a6c4e192f9
line source
1 #include <stdio.h>
2 #include <string.h>
3 #include <inttypes.h>
4 #include <assert.h>
5 #include "config.h"
6 #include "vm.h"
7 #include "intr.h"
8 #include "mem.h"
9 #include "panic.h"
10 #include "proc.h"
12 #define IDMAP_START 0xa0000
14 #define PGDIR_ADDR 0xfffff000
15 #define PGTBL_BASE (0xffffffff - 4096 * 1024 + 1)
16 #define PGTBL(x) ((uint32_t*)(PGTBL_BASE + PGSIZE * (x)))
18 #define ATTR_PGDIR_MASK 0x3f
19 #define ATTR_PGTBL_MASK 0x1ff
20 #define ADDR_PGENT_MASK 0xfffff000
22 #define PAGEFAULT 14
25 struct page_range {
26 int start, end;
27 struct page_range *next;
28 };
30 /* defined in vm-asm.S */
31 void enable_paging(void);
32 void disable_paging(void);
33 int get_paging_status(void);
34 void set_pgdir_addr(uint32_t addr);
35 void flush_tlb(void);
36 void flush_tlb_addr(uint32_t addr);
37 #define flush_tlb_page(p) flush_tlb_addr(PAGE_TO_ADDR(p))
38 uint32_t get_fault_addr(void);
40 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high);
41 static void pgfault(int inum);
42 static struct page_range *alloc_node(void);
43 static void free_node(struct page_range *node);
45 /* page directory */
46 static uint32_t *pgdir;
48 /* 2 lists of free ranges, for kernel memory and user memory */
49 static struct page_range *pglist[2];
50 /* list of free page_range structures to be used in the lists */
51 static struct page_range *node_pool;
52 /* the first page range for the whole kernel address space, to get things started */
53 static struct page_range first_node;
56 void init_vm(void)
57 {
58 uint32_t idmap_end;
59 int i, kmem_start_pg, pgtbl_base_pg;
61 /* setup the page tables */
62 pgdir = (uint32_t*)alloc_phys_page();
63 memset(pgdir, 0, PGSIZE);
64 set_pgdir_addr((uint32_t)pgdir);
66 /* map the video memory and kernel code 1-1 */
67 get_kernel_mem_range(0, &idmap_end);
68 map_mem_range(IDMAP_START, idmap_end - IDMAP_START, IDMAP_START, 0);
70 /* make the last page directory entry point to the page directory */
71 pgdir[1023] = ((uint32_t)pgdir & ADDR_PGENT_MASK) | PG_PRESENT;
72 pgdir = (uint32_t*)PGDIR_ADDR;
74 /* set the page fault handler */
75 interrupt(PAGEFAULT, pgfault);
77 /* we can enable paging now */
78 enable_paging();
80 /* initialize the virtual page allocator */
81 node_pool = 0;
83 kmem_start_pg = ADDR_TO_PAGE(KMEM_START);
84 pgtbl_base_pg = ADDR_TO_PAGE(PGTBL_BASE);
86 first_node.start = kmem_start_pg;
87 first_node.end = pgtbl_base_pg;
88 first_node.next = 0;
89 pglist[MEM_KERNEL] = &first_node;
91 pglist[MEM_USER] = alloc_node();
92 pglist[MEM_USER]->start = ADDR_TO_PAGE(idmap_end);
93 pglist[MEM_USER]->end = kmem_start_pg;
94 pglist[MEM_USER]->next = 0;
96 /* temporaroly map something into every 1024th page of the kernel address
97 * space to force pre-allocation of all the kernel page-tables
98 */
99 for(i=kmem_start_pg; i<pgtbl_base_pg; i+=1024) {
100 /* if there's already something mapped here, leave it alone */
101 if(virt_to_phys_page(i) == -1) {
102 map_page(i, 0, 0);
103 unmap_page(i);
104 }
105 }
106 }
108 /* if ppage == -1 we allocate a physical page by calling alloc_phys_page */
109 int map_page(int vpage, int ppage, unsigned int attr)
110 {
111 uint32_t *pgtbl;
112 int diridx, pgidx, pgon, intr_state;
114 intr_state = get_intr_state();
115 disable_intr();
117 pgon = get_paging_status();
119 if(ppage < 0) {
120 uint32_t addr = alloc_phys_page();
121 if(!addr) {
122 set_intr_state(intr_state);
123 return -1;
124 }
125 ppage = ADDR_TO_PAGE(addr);
126 }
128 diridx = PAGE_TO_PGTBL(vpage);
129 pgidx = PAGE_TO_PGTBL_PG(vpage);
131 if(!(pgdir[diridx] & PG_PRESENT)) {
132 uint32_t addr = alloc_phys_page();
133 pgdir[diridx] = addr | (attr & ATTR_PGDIR_MASK) | PG_PRESENT;
135 pgtbl = pgon ? PGTBL(diridx) : (uint32_t*)addr;
136 memset(pgtbl, 0, PGSIZE);
137 } else {
138 if(pgon) {
139 pgtbl = PGTBL(diridx);
140 } else {
141 pgtbl = (uint32_t*)(pgdir[diridx] & ADDR_PGENT_MASK);
142 }
143 }
145 pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
146 flush_tlb_page(vpage);
148 set_intr_state(intr_state);
149 return 0;
150 }
152 int unmap_page(int vpage)
153 {
154 uint32_t *pgtbl;
155 int res = 0;
156 int diridx = PAGE_TO_PGTBL(vpage);
157 int pgidx = PAGE_TO_PGTBL_PG(vpage);
159 int intr_state = get_intr_state();
160 disable_intr();
162 if(!(pgdir[diridx] & PG_PRESENT)) {
163 goto err;
164 }
165 pgtbl = PGTBL(diridx);
167 if(!(pgtbl[pgidx] & PG_PRESENT)) {
168 goto err;
169 }
170 pgtbl[pgidx] = 0;
171 flush_tlb_page(vpage);
173 if(0) {
174 err:
175 printf("unmap_page(%d): page already not mapped\n", vpage);
176 res = -1;
177 }
178 set_intr_state(intr_state);
179 return res;
180 }
182 /* if ppg_start is -1, we allocate physical pages to map with alloc_phys_page() */
183 int map_page_range(int vpg_start, int pgcount, int ppg_start, unsigned int attr)
184 {
185 int i, phys_pg;
187 for(i=0; i<pgcount; i++) {
188 phys_pg = ppg_start < 0 ? -1 : ppg_start + i;
189 map_page(vpg_start + i, phys_pg, attr);
190 }
191 return 0;
192 }
194 int unmap_page_range(int vpg_start, int pgcount)
195 {
196 int i, res = 0;
198 for(i=0; i<pgcount; i++) {
199 if(unmap_page(vpg_start + i) == -1) {
200 res = -1;
201 }
202 }
203 return res;
204 }
206 /* if paddr is 0, we allocate physical pages with alloc_phys_page() */
207 int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr)
208 {
209 int vpg_start, ppg_start, num_pages;
211 if(!sz) return -1;
213 if(ADDR_TO_PGOFFS(paddr)) {
214 panic("map_mem_range called with unaligned physical address: %x\n", paddr);
215 }
217 vpg_start = ADDR_TO_PAGE(vaddr);
218 ppg_start = paddr > 0 ? ADDR_TO_PAGE(paddr) : -1;
219 num_pages = ADDR_TO_PAGE(sz) + 1;
221 return map_page_range(vpg_start, num_pages, ppg_start, attr);
222 }
224 uint32_t virt_to_phys(uint32_t vaddr)
225 {
226 int pg;
227 uint32_t pgaddr;
229 if((pg = virt_to_phys_page(ADDR_TO_PAGE(vaddr))) == -1) {
230 return 0;
231 }
232 pgaddr = PAGE_TO_ADDR(pg);
234 return pgaddr | ADDR_TO_PGOFFS(vaddr);
235 }
237 int virt_to_phys_page(int vpg)
238 {
239 uint32_t pgaddr, *pgtbl;
240 int diridx, pgidx;
242 if(vpg < 0 || vpg >= PAGE_COUNT) {
243 return -1;
244 }
246 diridx = PAGE_TO_PGTBL(vpg);
247 pgidx = PAGE_TO_PGTBL_PG(vpg);
249 if(!(pgdir[diridx] & PG_PRESENT)) {
250 return -1;
251 }
252 pgtbl = PGTBL(diridx);
254 if(!(pgtbl[pgidx] & PG_PRESENT)) {
255 return -1;
256 }
257 pgaddr = pgtbl[pgidx] & PGENT_ADDR_MASK;
258 return ADDR_TO_PAGE(pgaddr);
259 }
261 /* allocate a contiguous block of virtual memory pages along with
262 * backing physical memory for them, and update the page table.
263 */
264 int pgalloc(int num, int area)
265 {
266 int intr_state, ret = -1;
267 struct page_range *node, *prev, dummy;
268 unsigned int attr = 0; /* TODO */
270 intr_state = get_intr_state();
271 disable_intr();
273 dummy.next = pglist[area];
274 node = pglist[area];
275 prev = &dummy;
277 while(node) {
278 if(node->end - node->start >= num) {
279 ret = node->start;
280 node->start += num;
282 if(node->start == node->end) {
283 prev->next = node->next;
284 node->next = 0;
286 if(node == pglist[area]) {
287 pglist[area] = 0;
288 }
289 free_node(node);
290 }
291 break;
292 }
294 prev = node;
295 node = node->next;
296 }
298 if(ret >= 0) {
299 /* allocate physical storage and map */
300 if(map_page_range(ret, num, -1, attr) == -1) {
301 ret = -1;
302 }
303 }
305 set_intr_state(intr_state);
306 return ret;
307 }
309 int pgalloc_vrange(int start, int num)
310 {
311 struct page_range *node, *prev, dummy;
312 int area, intr_state, ret = -1;
313 unsigned int attr = 0; /* TODO */
315 area = (start >= ADDR_TO_PAGE(KMEM_START)) ? MEM_KERNEL : MEM_USER;
316 if(area == MEM_USER && start + num > ADDR_TO_PAGE(KMEM_START)) {
317 printf("pgalloc_vrange: invalid range request crossing user/kernel split\n");
318 return -1;
319 }
321 intr_state = get_intr_state();
322 disable_intr();
324 dummy.next = pglist[area];
325 node = pglist[area];
326 prev = &dummy;
328 /* check to see if the requested VM range is available */
329 node = pglist[area];
330 while(node) {
331 if(start >= node->start && start + num <= node->end) {
332 ret = start; /* can do .. */
334 if(start == node->start) {
335 /* adjacent to the start of the range */
336 node->start += num;
337 } else if(start + num == node->end) {
338 /* adjacent to the end of the range */
339 node->end = start;
340 } else {
341 /* somewhere in the middle, which means we need
342 * to allocate a new page_range
343 */
344 struct page_range *newnode;
346 if(!(newnode = alloc_node())) {
347 panic("pgalloc_vrange failed to allocate new page_range while splitting a range in half... bummer\n");
348 }
349 newnode->start = start + num;
350 newnode->end = node->end;
351 newnode->next = node->next;
353 node->end = start;
354 node->next = newnode;
355 /* no need to check for null nodes at this point, there's
356 * certainly stuff at the begining and the end, otherwise we
357 * wouldn't be here. so break out of it.
358 */
359 break;
360 }
362 if(node->start == node->end) {
363 prev->next = node->next;
364 node->next = 0;
366 if(node == pglist[area]) {
367 pglist[area] = 0;
368 }
369 free_node(node);
370 }
371 break;
372 }
374 prev = node;
375 node = node->next;
376 }
378 if(ret >= 0) {
379 /* allocate physical storage and map */
380 if(map_page_range(ret, num, -1, attr) == -1) {
381 ret = -1;
382 }
383 }
385 set_intr_state(intr_state);
386 return ret;
387 }
389 void pgfree(int start, int num)
390 {
391 int i, area, intr_state;
392 struct page_range *node, *new, *prev, *next;
394 intr_state = get_intr_state();
395 disable_intr();
397 for(i=0; i<num; i++) {
398 int phys_pg = virt_to_phys_page(start + i);
399 if(phys_pg != -1) {
400 free_phys_page(phys_pg);
401 }
402 }
404 if(!(new = alloc_node())) {
405 panic("pgfree: can't allocate new page_range node to add the freed pages\n");
406 }
407 new->start = start;
408 new->end = start + num;
410 area = PAGE_TO_ADDR(start) >= KMEM_START ? MEM_KERNEL : MEM_USER;
412 if(!pglist[area] || pglist[area]->start > start) {
413 next = new->next = pglist[area];
414 pglist[area] = new;
415 prev = 0;
417 } else {
419 prev = 0;
420 node = pglist[area];
421 next = node ? node->next : 0;
423 while(node) {
424 if(!next || next->start > start) {
425 /* place here, after node */
426 new->next = next;
427 node->next = new;
428 prev = node; /* needed by coalesce after the loop */
429 break;
430 }
432 prev = node;
433 node = next;
434 next = node ? node->next : 0;
435 }
436 }
438 coalesce(prev, new, next);
439 set_intr_state(intr_state);
440 }
442 static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high)
443 {
444 if(high) {
445 if(mid->end == high->start) {
446 mid->end = high->end;
447 mid->next = high->next;
448 free_node(high);
449 }
450 }
452 if(low) {
453 if(low->end == mid->start) {
454 low->end += mid->end;
455 low->next = mid->next;
456 free_node(mid);
457 }
458 }
459 }
461 static void pgfault(int inum)
462 {
463 struct intr_frame *frm = get_intr_frame();
464 uint32_t fault_addr = get_fault_addr();
466 /* the fault occured in user space */
467 if(frm->esp < KMEM_START + 1) {
468 int fault_page = ADDR_TO_PAGE(fault_addr);
469 struct process *proc = get_current_proc();
470 assert(proc);
472 printf("DBG: page fault in user space\n");
474 if(frm->err & PG_PRESENT) {
475 /* it's not due to a missing page, just panic */
476 goto unhandled;
477 }
479 /* detect if it's an automatic stack growth deal */
480 if(fault_page < proc->stack_start_pg && proc->stack_start_pg - fault_page < USTACK_MAXGROW) {
481 int num_pages = proc->stack_start_pg - fault_page;
482 printf("growing user (%d) stack by %d pages\n", proc->id, num_pages);
484 if(pgalloc_vrange(fault_page, num_pages) != fault_page) {
485 printf("failed to allocate VM for stack growth\n");
486 /* TODO: in the future we'd SIGSEGV the process here, for now just panic */
487 goto unhandled;
488 }
489 proc->stack_start_pg = fault_page;
491 return;
492 }
493 }
495 unhandled:
496 printf("~~~~ PAGE FAULT ~~~~\n");
497 printf("fault address: %x\n", fault_addr);
499 if(frm->err & PG_PRESENT) {
500 if(frm->err & 8) {
501 printf("reserved bit set in some paging structure\n");
502 } else {
503 printf("%s protection violation ", (frm->err & PG_WRITABLE) ? "write" : "read");
504 printf("in %s mode\n", frm->err & PG_USER ? "user" : "kernel");
505 }
506 } else {
507 printf("page not present\n");
508 }
510 panic("unhandled page fault\n");
511 }
513 /* --- page range list node management --- */
514 #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range))
516 static struct page_range *alloc_node(void)
517 {
518 struct page_range *node;
519 int pg, i;
521 if(node_pool) {
522 node = node_pool;
523 node_pool = node_pool->next;
524 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
525 return node;
526 }
528 /* no node structures in the pool, we need to allocate a new page,
529 * split it up into node structures, add them in the pool, and
530 * allocate one of them.
531 */
532 if(!(pg = pgalloc(1, MEM_KERNEL))) {
533 panic("ran out of physical memory while allocating VM range structures\n");
534 }
535 node_pool = (struct page_range*)PAGE_TO_ADDR(pg);
537 /* link them up, skip the first as we'll just allocate it anyway */
538 for(i=2; i<NODES_IN_PAGE; i++) {
539 node_pool[i - 1].next = node_pool + i;
540 }
541 node_pool[NODES_IN_PAGE - 1].next = 0;
543 /* grab the first and return it */
544 node = node_pool++;
545 /*printf("alloc_node -> %x\n", (unsigned int)node);*/
546 return node;
547 }
549 static void free_node(struct page_range *node)
550 {
551 node->next = node_pool;
552 node_pool = node;
553 /*printf("free_node\n");*/
554 }
557 /* clone_vm makes a copy of the current page tables, thus duplicating the
558 * virtual address space.
559 *
560 * For the kernel part of the address space (last 256 page directory entries)
561 * we don't want to diplicate the page tables, just point all page directory
562 * entries to the same set of page tables.
563 *
564 * Returns the physical address of the new page directory.
565 */
566 uint32_t clone_vm(void)
567 {
568 int i, dirpg, tblpg, kstart_dirent;
569 uint32_t paddr;
570 uint32_t *ndir, *ntbl;
572 /* allocate the new page directory */
573 if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) {
574 panic("clone_vmem: failed to allocate page directory page\n");
575 }
576 ndir = (uint32_t*)PAGE_TO_ADDR(dirpg);
578 /* allocate a virtual page for temporarily mapping all new
579 * page tables while we populate them.
580 */
581 if((tblpg = pgalloc(1, MEM_KERNEL)) == -1) {
582 panic("clone_vmem: failed to allocate page table page\n");
583 }
584 ntbl = (uint32_t*)PAGE_TO_ADDR(tblpg);
586 /* we will allocate physical pages and map them to this virtual page
587 * as needed in the loop below.
588 */
589 free_phys_page(virt_to_phys((uint32_t)ntbl));
591 kstart_dirent = ADDR_TO_PAGE(KMEM_START) / 1024;
593 /* user space */
594 for(i=0; i<kstart_dirent; i++) {
595 if(pgdir[i] & PG_PRESENT) {
596 paddr = alloc_phys_page();
597 map_page(tblpg, ADDR_TO_PAGE(paddr), 0);
599 /* copy the page table */
600 memcpy(ntbl, PGTBL(i), PGSIZE);
602 /* set the new page directory entry */
603 ndir[i] = paddr | (pgdir[i] & PGOFFS_MASK);
604 } else {
605 ndir[i] = 0;
606 }
607 }
609 /* kernel space */
610 for(i=kstart_dirent; i<1024; i++) {
611 ndir[i] = pgdir[i];
612 }
614 paddr = virt_to_phys((uint32_t)ndir);
616 /* unmap before freeing to avoid deallocating the physical pages */
617 unmap_page(dirpg);
618 unmap_page(tblpg);
620 pgfree(dirpg, 1);
621 pgfree(tblpg, 1);
623 return paddr;
624 }
627 void dbg_print_vm(int area)
628 {
629 struct page_range *node;
630 int last, intr_state;
632 intr_state = get_intr_state();
633 disable_intr();
635 node = pglist[area];
636 last = area == MEM_USER ? 0 : ADDR_TO_PAGE(KMEM_START);
638 printf("%s vm space\n", area == MEM_USER ? "user" : "kernel");
640 while(node) {
641 if(node->start > last) {
642 printf(" vm-used: %x -> %x\n", PAGE_TO_ADDR(last), PAGE_TO_ADDR(node->start));
643 }
645 printf(" vm-free: %x -> ", PAGE_TO_ADDR(node->start));
646 if(node->end >= PAGE_COUNT) {
647 printf("END\n");
648 } else {
649 printf("%x\n", PAGE_TO_ADDR(node->end));
650 }
652 last = node->end;
653 node = node->next;
654 }
656 set_intr_state(intr_state);
657 }