kern

changeset 69:b45e2d5f0ae1

ok I *think* i've fixed it now
author John Tsiombikas <nuclear@member.fsf.org>
date Wed, 12 Oct 2011 14:39:40 +0300
parents 0a205396e1a0
children 219974492c7d
files src/proc.c src/rbtree.c src/rbtree.h src/test_proc.S src/vm.c src/vm.h
diffstat 6 files changed, 228 insertions(+), 16 deletions(-) [+]
line diff
     1.1 --- a/src/proc.c	Mon Oct 10 04:16:01 2011 +0300
     1.2 +++ b/src/proc.c	Wed Oct 12 14:39:40 2011 +0300
     1.3 @@ -93,7 +93,7 @@
     1.4  	memcpy((void*)img_start_addr, test_proc, proc_size_pg * PGSIZE);
     1.5  	printf("copied init process at: %x\n", img_start_addr);
     1.6  
     1.7 -	/* allocate the first page of the process stack */
     1.8 +	/* allocate the first page of the user stack */
     1.9  	stack_pg = ADDR_TO_PAGE(KMEM_START) - 1;
    1.10  	if(pgalloc_vrange(stack_pg, 1) == -1) {
    1.11  		panic("failed to allocate user stack page\n");
    1.12 @@ -194,7 +194,8 @@
    1.13  	/* will be copied on write */
    1.14  	p->user_stack_pg = parent->user_stack_pg;
    1.15  
    1.16 -	p->ctx.pgtbl_paddr = clone_vm(CLONE_COW);
    1.17 +	/* clone the parent's virtual memory */
    1.18 +	clone_vm(p, parent, CLONE_COW);
    1.19  
    1.20  	/* done, now let's add it to the scheduler runqueue */
    1.21  	add_proc(p->id);
     2.1 --- a/src/rbtree.c	Mon Oct 10 04:16:01 2011 +0300
     2.2 +++ b/src/rbtree.c	Wed Oct 12 14:39:40 2011 +0300
     2.3 @@ -2,6 +2,7 @@
     2.4  #include <stdlib.h>
     2.5  #include <string.h>
     2.6  #include "rbtree.h"
     2.7 +#include "panic.h"
     2.8  
     2.9  #define INT2PTR(x)	((void*)(x))
    2.10  #define PTR2INT(x)	((int)(x))
    2.11 @@ -13,7 +14,6 @@
    2.12  static void del_tree(struct rbnode *node, void (*delfunc)(struct rbnode*, void*), void *cls);
    2.13  static struct rbnode *insert(struct rbtree *rb, struct rbnode *tree, void *key, void *data);
    2.14  static struct rbnode *delete(struct rbtree *rb, struct rbnode *tree, void *key);
    2.15 -/*static struct rbnode *find(struct rbtree *rb, struct rbnode *node, void *key);*/
    2.16  static void traverse(struct rbnode *node, void (*func)(struct rbnode*, void*), void *cls);
    2.17  
    2.18  struct rbtree *rb_create(rb_cmp_func_t cmp_func)
    2.19 @@ -59,6 +59,27 @@
    2.20  	del_tree(rb->root, rb->del, rb->del_cls);
    2.21  }
    2.22  
    2.23 +void rb_clear(struct rbtree *rb)
    2.24 +{
    2.25 +	del_tree(rb->root, rb->del, rb->del_cls);
    2.26 +	rb->root = 0;
    2.27 +}
    2.28 +
    2.29 +int rb_copy(struct rbtree *dest, struct rbtree *src)
    2.30 +{
    2.31 +	struct rbnode *node;
    2.32 +
    2.33 +	rb_clear(dest);
    2.34 +
    2.35 +	rb_begin(src);
    2.36 +	while((node = rb_next(src))) {
    2.37 +		if(rb_insert(dest, node->key, node->data) == -1) {
    2.38 +			return -1;
    2.39 +		}
    2.40 +	}
    2.41 +	return 0;
    2.42 +}
    2.43 +
    2.44  void rb_set_allocator(struct rbtree *rb, rb_alloc_func_t alloc, rb_free_func_t free)
    2.45  {
    2.46  	rb->alloc = alloc;
    2.47 @@ -237,6 +258,9 @@
    2.48  
    2.49  	if(!tree) {
    2.50  		struct rbnode *node = rb->alloc(sizeof *node);
    2.51 +		if(!node) {
    2.52 +			panic("failed to allocate tree node\n");
    2.53 +		}
    2.54  		node->red = 1;
    2.55  		node->key = key;
    2.56  		node->data = data;
    2.57 @@ -455,3 +479,14 @@
    2.58  	}
    2.59  	return tree;
    2.60  }
    2.61 +
    2.62 +void rb_dbg_print_tree(struct rbtree *tree)
    2.63 +{
    2.64 +	struct rbnode *node;
    2.65 +
    2.66 +	rb_begin(tree);
    2.67 +	while((node = rb_next(tree))) {
    2.68 +		printf("%d ", rb_node_keyi(node));
    2.69 +	}
    2.70 +	printf("\n");
    2.71 +}
     3.1 --- a/src/rbtree.h	Mon Oct 10 04:16:01 2011 +0300
     3.2 +++ b/src/rbtree.h	Wed Oct 12 14:39:40 2011 +0300
     3.3 @@ -48,6 +48,9 @@
     3.4  int rb_init(struct rbtree *rb, rb_cmp_func_t cmp_func);
     3.5  void rb_destroy(struct rbtree *rb);
     3.6  
     3.7 +void rb_clear(struct rbtree *tree);
     3.8 +int rb_copy(struct rbtree *dest, struct rbtree *src);
     3.9 +
    3.10  void rb_set_allocator(struct rbtree *rb, rb_alloc_func_t alloc, rb_free_func_t free);
    3.11  void rb_set_compare_func(struct rbtree *rb, rb_cmp_func_t func);
    3.12  void rb_set_delete_func(struct rbtree *rb, rb_del_func_t func, void *cls);
    3.13 @@ -74,6 +77,9 @@
    3.14  int rb_node_keyi(struct rbnode *node);
    3.15  void *rb_node_data(struct rbnode *node);
    3.16  
    3.17 +
    3.18 +void rb_dbg_print_tree(struct rbtree *tree);
    3.19 +
    3.20  #ifdef __cplusplus
    3.21  }
    3.22  #endif
     4.1 --- a/src/test_proc.S	Mon Oct 10 04:16:01 2011 +0300
     4.2 +++ b/src/test_proc.S	Wed Oct 12 14:39:40 2011 +0300
     4.3 @@ -8,6 +8,8 @@
     4.4  	movl $SYS_FORK, %eax
     4.5  	int $SYSCALL_INT
     4.6  
     4.7 +	push %eax
     4.8 +
     4.9  infloop:
    4.10  	/* --- print a message --- */
    4.11  	movl $SYS_HELLO, %eax
     5.1 --- a/src/vm.c	Mon Oct 10 04:16:01 2011 +0300
     5.2 +++ b/src/vm.c	Wed Oct 12 14:39:40 2011 +0300
     5.3 @@ -38,6 +38,7 @@
     5.4  
     5.5  static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high);
     5.6  static void pgfault(int inum);
     5.7 +static int copy_on_write(struct vm_page *page);
     5.8  static struct page_range *alloc_node(void);
     5.9  static void free_node(struct page_range *node);
    5.10  
    5.11 @@ -109,6 +110,7 @@
    5.12  {
    5.13  	uint32_t *pgtbl;
    5.14  	int diridx, pgidx, pgon, intr_state;
    5.15 +	struct process *p;
    5.16  
    5.17  	intr_state = get_intr_state();
    5.18  	disable_intr();
    5.19 @@ -155,6 +157,30 @@
    5.20  	pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
    5.21  	flush_tlb_page(vpage);
    5.22  
    5.23 +	/* if it's a new *user* mapping, and there is a current process, update the vmmap */
    5.24 +	if((attr & PG_USER) && (p = get_current_proc())) {
    5.25 +		struct vm_page *page;
    5.26 +
    5.27 +		if(!(page = get_vm_page_proc(p, vpage))) {
    5.28 +			if(!(page = malloc(sizeof *page))) {
    5.29 +				panic("map_page: failed to allocate new vm_page structure");
    5.30 +			}
    5.31 +			page->vpage = vpage;
    5.32 +			page->ppage = ppage;
    5.33 +			page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
    5.34 +			page->nref = 1;
    5.35 +
    5.36 +			rb_inserti(&p->vmmap, vpage, page);
    5.37 +		} else {
    5.38 +			/* otherwise just update the mapping */
    5.39 +			page->ppage = ppage;
    5.40 +
    5.41 +			/* XXX don't touch the flags, as that's how we implement CoW
    5.42 +			 * by changing the mapping without affecting the vm_page
    5.43 +			 */
    5.44 +		}
    5.45 +	}
    5.46 +
    5.47  	set_intr_state(intr_state);
    5.48  	return 0;
    5.49  }
    5.50 @@ -231,6 +257,7 @@
    5.51  	return map_page_range(vpg_start, num_pages, ppg_start, attr);
    5.52  }
    5.53  
    5.54 +/* translate a virtual address to a physical address using the current page table */
    5.55  uint32_t virt_to_phys(uint32_t vaddr)
    5.56  {
    5.57  	int pg;
    5.58 @@ -244,6 +271,7 @@
    5.59  	return pgaddr | ADDR_TO_PGOFFS(vaddr);
    5.60  }
    5.61  
    5.62 +/* translate a virtual page number to a physical page number using the current page table */
    5.63  int virt_to_phys_page(int vpg)
    5.64  {
    5.65  	uint32_t pgaddr, *pgtbl;
    5.66 @@ -268,6 +296,32 @@
    5.67  	return ADDR_TO_PAGE(pgaddr);
    5.68  }
    5.69  
    5.70 +/* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */
    5.71 +uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr)
    5.72 +{
    5.73 +	int pg;
    5.74 +	uint32_t pgaddr;
    5.75 +
    5.76 +	if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) {
    5.77 +		return 0;
    5.78 +	}
    5.79 +	pgaddr = PAGE_TO_ADDR(pg);
    5.80 +
    5.81 +	return pgaddr | ADDR_TO_PGOFFS(vaddr);
    5.82 +}
    5.83 +
    5.84 +/* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */
    5.85 +int virt_to_phys_page_proc(struct process *p, int vpg)
    5.86 +{
    5.87 +	struct rbnode *node;
    5.88 +	assert(p);
    5.89 +
    5.90 +	if(!(node = rb_findi(&p->vmmap, vpg))) {
    5.91 +		return -1;
    5.92 +	}
    5.93 +	return ((struct vm_page*)node->data)->ppage;
    5.94 +}
    5.95 +
    5.96  /* allocate a contiguous block of virtual memory pages along with
    5.97   * backing physical memory for them, and update the page table.
    5.98   */
    5.99 @@ -481,14 +535,37 @@
   5.100  	if(frm->err & PG_USER) {
   5.101  		int fault_page = ADDR_TO_PAGE(fault_addr);
   5.102  		struct process *proc = get_current_proc();
   5.103 -		printf("DBG: page fault in user space\n");
   5.104 +		printf("DBG: page fault in user space (pid: %d)\n", proc->id);
   5.105  		assert(proc);
   5.106  
   5.107  		if(frm->err & PG_PRESENT) {
   5.108 -			/* it's not due to a missing page, just panic */
   5.109 +			/* it's not due to a missing page fetch the attributes */
   5.110 +			int pgnum = ADDR_TO_PAGE(fault_addr);
   5.111 +
   5.112 +			if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) {
   5.113 +				/* write permission fault might be a CoW fault or just an error
   5.114 +				 * fetch the vm_page permissions to check if this is suppoosed to be
   5.115 +				 * a writable page (which means we should CoW).
   5.116 +				 */
   5.117 +				struct vm_page *page = get_vm_page_proc(proc, pgnum);
   5.118 +
   5.119 +				if(page->flags & PG_WRITABLE) {
   5.120 +					/* ok this is a CoW fault */
   5.121 +					if(copy_on_write(page) == -1) {
   5.122 +						panic("copy on write failed!");
   5.123 +					}
   5.124 +					return;	/* done, allow the process to restart the instruction and continue */
   5.125 +				} else {
   5.126 +					/* TODO eventually we'll SIGSEGV the process, for now just panic.
   5.127 +					 */
   5.128 +					goto unhandled;
   5.129 +				}
   5.130 +			}
   5.131  			goto unhandled;
   5.132  		}
   5.133  
   5.134 +		/* so it's a missing page... ok */
   5.135 +
   5.136  		/* detect if it's an automatic stack growth deal */
   5.137  		if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) {
   5.138  			int num_pages = proc->user_stack_pg - fault_page;
   5.139 @@ -502,11 +579,16 @@
   5.140  			proc->user_stack_pg = fault_page;
   5.141  			return;
   5.142  		}
   5.143 +
   5.144 +		/* it's not a stack growth fault. since we don't do swapping yet, just
   5.145 +		 * fall to unhandled and panic
   5.146 +		 */
   5.147  	}
   5.148  
   5.149  unhandled:
   5.150  	printf("~~~~ PAGE FAULT ~~~~\n");
   5.151  	printf("fault address: %x\n", fault_addr);
   5.152 +	printf("error code: %x\n", frm->err);
   5.153  
   5.154  	if(frm->err & PG_PRESENT) {
   5.155  		if(frm->err & 8) {
   5.156 @@ -522,6 +604,54 @@
   5.157  	panic("unhandled page fault\n");
   5.158  }
   5.159  
   5.160 +/* copy-on-write handler, called from pgfault above */
   5.161 +static int copy_on_write(struct vm_page *page)
   5.162 +{
   5.163 +	uint32_t newphys;
   5.164 +	struct vm_page *newpage;
   5.165 +	struct rbnode *vmnode;
   5.166 +	struct process *p = get_current_proc();
   5.167 +
   5.168 +	assert(page->nref > 0);
   5.169 +
   5.170 +	/* first of all check the refcount. If it's 1 then we don't need to copy
   5.171 +	 * anything. This will happen when all forked processes except one have
   5.172 +	 * marked this read-write again after faulting.
   5.173 +	 */
   5.174 +	if(page->nref == 1) {
   5.175 +		set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY);
   5.176 +		return 0;
   5.177 +	}
   5.178 +
   5.179 +	/* ok let's make a copy and mark it read-write */
   5.180 +	if(!(newpage = malloc(sizeof *newpage))) {
   5.181 +		printf("copy_on_write: failed to allocate new vm_page\n");
   5.182 +		return -1;
   5.183 +	}
   5.184 +	newpage->vpage = page->vpage;
   5.185 +	newpage->flags = page->flags;
   5.186 +
   5.187 +	if(!(newphys = alloc_phys_page())) {
   5.188 +		printf("copy_on_write: failed to allocate physical page\n");
   5.189 +		/* XXX proper action: SIGSEGV */
   5.190 +		return -1;
   5.191 +	}
   5.192 +	newpage->ppage = ADDR_TO_PAGE(newphys);
   5.193 +	newpage->nref = 1;
   5.194 +
   5.195 +	/* set the new vm_page in the process vmmap */
   5.196 +	vmnode = rb_findi(&p->vmmap, newpage->vpage);
   5.197 +	assert(vmnode && vmnode->data == page);	/* shouldn't be able to fail */
   5.198 +	vmnode->data = newpage;
   5.199 +
   5.200 +	/* also update tha page table */
   5.201 +	map_page(newpage->vpage, newpage->ppage, newpage->flags);
   5.202 +
   5.203 +	/* finally decrease the refcount at the original vm_page struct */
   5.204 +	page->nref--;
   5.205 +	return 0;
   5.206 +}
   5.207 +
   5.208  /* --- page range list node management --- */
   5.209  #define NODES_IN_PAGE	(PGSIZE / sizeof(struct page_range))
   5.210  
   5.211 @@ -574,14 +704,13 @@
   5.212   *
   5.213   * If "cow" is non-zero it also marks the shared user-space pages as
   5.214   * read-only, to implement copy-on-write.
   5.215 - *
   5.216 - * Returns the physical address of the new page directory.
   5.217   */
   5.218 -uint32_t clone_vm(int cow)
   5.219 +void clone_vm(struct process *pdest, struct process *psrc, int cow)
   5.220  {
   5.221  	int i, j, dirpg, tblpg, kstart_dirent;
   5.222  	uint32_t paddr;
   5.223  	uint32_t *ndir, *ntbl;
   5.224 +	struct rbnode *vmnode;
   5.225  
   5.226  	/* allocate the new page directory */
   5.227  	if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) {
   5.228 @@ -613,8 +742,10 @@
   5.229  				 * page table and unset the writable bits.
   5.230  				 */
   5.231  				for(j=0; j<1024; j++) {
   5.232 -					clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY);
   5.233 -					/*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/
   5.234 +					if(PGTBL(i)[j] & PG_PRESENT) {
   5.235 +						clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY);
   5.236 +						/*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/
   5.237 +					}
   5.238  				}
   5.239  			}
   5.240  
   5.241 @@ -632,18 +763,31 @@
   5.242  		}
   5.243  	}
   5.244  
   5.245 +	/* make a copy of the parent's vmmap tree pointing to the same vm_pages
   5.246 +	 * and increase the reference counters for all vm_pages.
   5.247 +	 */
   5.248 +	rb_init(&pdest->vmmap, RB_KEY_INT);
   5.249 +	rb_begin(&psrc->vmmap);
   5.250 +	while((vmnode = rb_next(&psrc->vmmap))) {
   5.251 +		struct vm_page *pg = vmnode->data;
   5.252 +		pg->nref++;
   5.253 +
   5.254 +		/* insert the same vm_page to the new tree */
   5.255 +		rb_inserti(&pdest->vmmap, pg->vpage, pg);
   5.256 +	}
   5.257 +
   5.258  	/* for the kernel space we'll just use the same page tables */
   5.259  	for(i=kstart_dirent; i<1024; i++) {
   5.260  		ndir[i] = pgdir[i];
   5.261  	}
   5.262 +	paddr = virt_to_phys((uint32_t)ndir);
   5.263 +	ndir[1023] = paddr | PG_PRESENT;
   5.264  
   5.265  	if(cow) {
   5.266  		/* we just changed all the page protection bits, so we need to flush the TLB */
   5.267  		flush_tlb();
   5.268  	}
   5.269  
   5.270 -	paddr = virt_to_phys((uint32_t)ndir);
   5.271 -
   5.272  	/* unmap before freeing the virtual pages, to avoid deallocating the physical pages */
   5.273  	unmap_page(dirpg);
   5.274  	unmap_page(tblpg);
   5.275 @@ -651,7 +795,8 @@
   5.276  	pgfree(dirpg, 1);
   5.277  	pgfree(tblpg, 1);
   5.278  
   5.279 -	return paddr;
   5.280 +	/* set the new page directory pointer */
   5.281 +	pdest->ctx.pgtbl_paddr = paddr;
   5.282  }
   5.283  
   5.284  int get_page_bit(int pgnum, uint32_t bit, int wholepath)
   5.285 @@ -723,7 +868,7 @@
   5.286  					vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK;
   5.287  					vmp->nref = 1;	/* when first created assume no sharing */
   5.288  
   5.289 -					rb_inserti(vmmap, vmp->ppage, vmp);
   5.290 +					rb_inserti(vmmap, vmp->vpage, vmp);
   5.291  				}
   5.292  			}
   5.293  		}
   5.294 @@ -732,6 +877,21 @@
   5.295  	return 0;
   5.296  }
   5.297  
   5.298 +struct vm_page *get_vm_page(int vpg)
   5.299 +{
   5.300 +	return get_vm_page_proc(get_current_proc(), vpg);
   5.301 +}
   5.302 +
   5.303 +struct vm_page *get_vm_page_proc(struct process *p, int vpg)
   5.304 +{
   5.305 +	struct rbnode *node;
   5.306 +
   5.307 +	if(!p || !(node = rb_findi(&p->vmmap, vpg))) {
   5.308 +		return 0;
   5.309 +	}
   5.310 +	return node->data;
   5.311 +}
   5.312 +
   5.313  
   5.314  void dbg_print_vm(int area)
   5.315  {
     6.1 --- a/src/vm.h	Mon Oct 10 04:16:01 2011 +0300
     6.2 +++ b/src/vm.h	Wed Oct 12 14:39:40 2011 +0300
     6.3 @@ -53,6 +53,8 @@
     6.4  	int nref;
     6.5  };
     6.6  
     6.7 +struct process;
     6.8 +
     6.9  void init_vm(void);
    6.10  
    6.11  int map_page(int vpage, int ppage, unsigned int attr);
    6.12 @@ -61,8 +63,11 @@
    6.13  int unmap_page_range(int vpg_start, int pgcount);
    6.14  int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr);
    6.15  
    6.16 +uint32_t virt_to_phys(uint32_t vaddr);
    6.17  int virt_to_phys_page(int vpg);
    6.18 -uint32_t virt_to_phys(uint32_t vaddr);
    6.19 +
    6.20 +uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr);
    6.21 +int virt_to_phys_page_proc(struct process *p, int vpg);
    6.22  
    6.23  enum {
    6.24  	MEM_KERNEL,
    6.25 @@ -73,7 +78,7 @@
    6.26  int pgalloc_vrange(int start, int num);
    6.27  void pgfree(int start, int num);
    6.28  
    6.29 -uint32_t clone_vm(int cow);
    6.30 +void clone_vm(struct process *pdest, struct process *psrc, int cow);
    6.31  
    6.32  int get_page_bit(int pgnum, uint32_t bit, int wholepath);
    6.33  void set_page_bit(int pgnum, uint32_t bit, int wholepath);
    6.34 @@ -82,6 +87,9 @@
    6.35  /* construct the vm map for the current user mappings */
    6.36  int cons_vmmap(struct rbtree *vmmap);
    6.37  
    6.38 +struct vm_page *get_vm_page(int vpg);
    6.39 +struct vm_page *get_vm_page_proc(struct process *p, int vpg);
    6.40 +
    6.41  void dbg_print_vm(int area);
    6.42  
    6.43  /* defined in vm-asm.S */