kern

diff src/vm.c @ 69:b45e2d5f0ae1

ok I *think* i've fixed it now
author John Tsiombikas <nuclear@member.fsf.org>
date Wed, 12 Oct 2011 14:39:40 +0300
parents 0a205396e1a0
children 219974492c7d
line diff
     1.1 --- a/src/vm.c	Mon Oct 10 04:16:01 2011 +0300
     1.2 +++ b/src/vm.c	Wed Oct 12 14:39:40 2011 +0300
     1.3 @@ -38,6 +38,7 @@
     1.4  
     1.5  static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high);
     1.6  static void pgfault(int inum);
     1.7 +static int copy_on_write(struct vm_page *page);
     1.8  static struct page_range *alloc_node(void);
     1.9  static void free_node(struct page_range *node);
    1.10  
    1.11 @@ -109,6 +110,7 @@
    1.12  {
    1.13  	uint32_t *pgtbl;
    1.14  	int diridx, pgidx, pgon, intr_state;
    1.15 +	struct process *p;
    1.16  
    1.17  	intr_state = get_intr_state();
    1.18  	disable_intr();
    1.19 @@ -155,6 +157,30 @@
    1.20  	pgtbl[pgidx] = PAGE_TO_ADDR(ppage) | (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
    1.21  	flush_tlb_page(vpage);
    1.22  
    1.23 +	/* if it's a new *user* mapping, and there is a current process, update the vmmap */
    1.24 +	if((attr & PG_USER) && (p = get_current_proc())) {
    1.25 +		struct vm_page *page;
    1.26 +
    1.27 +		if(!(page = get_vm_page_proc(p, vpage))) {
    1.28 +			if(!(page = malloc(sizeof *page))) {
    1.29 +				panic("map_page: failed to allocate new vm_page structure");
    1.30 +			}
    1.31 +			page->vpage = vpage;
    1.32 +			page->ppage = ppage;
    1.33 +			page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT;
    1.34 +			page->nref = 1;
    1.35 +
    1.36 +			rb_inserti(&p->vmmap, vpage, page);
    1.37 +		} else {
    1.38 +			/* otherwise just update the mapping */
    1.39 +			page->ppage = ppage;
    1.40 +
    1.41 +			/* XXX don't touch the flags, as that's how we implement CoW
    1.42 +			 * by changing the mapping without affecting the vm_page
    1.43 +			 */
    1.44 +		}
    1.45 +	}
    1.46 +
    1.47  	set_intr_state(intr_state);
    1.48  	return 0;
    1.49  }
    1.50 @@ -231,6 +257,7 @@
    1.51  	return map_page_range(vpg_start, num_pages, ppg_start, attr);
    1.52  }
    1.53  
    1.54 +/* translate a virtual address to a physical address using the current page table */
    1.55  uint32_t virt_to_phys(uint32_t vaddr)
    1.56  {
    1.57  	int pg;
    1.58 @@ -244,6 +271,7 @@
    1.59  	return pgaddr | ADDR_TO_PGOFFS(vaddr);
    1.60  }
    1.61  
    1.62 +/* translate a virtual page number to a physical page number using the current page table */
    1.63  int virt_to_phys_page(int vpg)
    1.64  {
    1.65  	uint32_t pgaddr, *pgtbl;
    1.66 @@ -268,6 +296,32 @@
    1.67  	return ADDR_TO_PAGE(pgaddr);
    1.68  }
    1.69  
    1.70 +/* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */
    1.71 +uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr)
    1.72 +{
    1.73 +	int pg;
    1.74 +	uint32_t pgaddr;
    1.75 +
    1.76 +	if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) {
    1.77 +		return 0;
    1.78 +	}
    1.79 +	pgaddr = PAGE_TO_ADDR(pg);
    1.80 +
    1.81 +	return pgaddr | ADDR_TO_PGOFFS(vaddr);
    1.82 +}
    1.83 +
    1.84 +/* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */
    1.85 +int virt_to_phys_page_proc(struct process *p, int vpg)
    1.86 +{
    1.87 +	struct rbnode *node;
    1.88 +	assert(p);
    1.89 +
    1.90 +	if(!(node = rb_findi(&p->vmmap, vpg))) {
    1.91 +		return -1;
    1.92 +	}
    1.93 +	return ((struct vm_page*)node->data)->ppage;
    1.94 +}
    1.95 +
    1.96  /* allocate a contiguous block of virtual memory pages along with
    1.97   * backing physical memory for them, and update the page table.
    1.98   */
    1.99 @@ -481,14 +535,37 @@
   1.100  	if(frm->err & PG_USER) {
   1.101  		int fault_page = ADDR_TO_PAGE(fault_addr);
   1.102  		struct process *proc = get_current_proc();
   1.103 -		printf("DBG: page fault in user space\n");
   1.104 +		printf("DBG: page fault in user space (pid: %d)\n", proc->id);
   1.105  		assert(proc);
   1.106  
   1.107  		if(frm->err & PG_PRESENT) {
   1.108 -			/* it's not due to a missing page, just panic */
   1.109 +			/* it's not due to a missing page fetch the attributes */
   1.110 +			int pgnum = ADDR_TO_PAGE(fault_addr);
   1.111 +
   1.112 +			if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) {
   1.113 +				/* write permission fault might be a CoW fault or just an error
   1.114 +				 * fetch the vm_page permissions to check if this is suppoosed to be
   1.115 +				 * a writable page (which means we should CoW).
   1.116 +				 */
   1.117 +				struct vm_page *page = get_vm_page_proc(proc, pgnum);
   1.118 +
   1.119 +				if(page->flags & PG_WRITABLE) {
   1.120 +					/* ok this is a CoW fault */
   1.121 +					if(copy_on_write(page) == -1) {
   1.122 +						panic("copy on write failed!");
   1.123 +					}
   1.124 +					return;	/* done, allow the process to restart the instruction and continue */
   1.125 +				} else {
   1.126 +					/* TODO eventually we'll SIGSEGV the process, for now just panic.
   1.127 +					 */
   1.128 +					goto unhandled;
   1.129 +				}
   1.130 +			}
   1.131  			goto unhandled;
   1.132  		}
   1.133  
   1.134 +		/* so it's a missing page... ok */
   1.135 +
   1.136  		/* detect if it's an automatic stack growth deal */
   1.137  		if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) {
   1.138  			int num_pages = proc->user_stack_pg - fault_page;
   1.139 @@ -502,11 +579,16 @@
   1.140  			proc->user_stack_pg = fault_page;
   1.141  			return;
   1.142  		}
   1.143 +
   1.144 +		/* it's not a stack growth fault. since we don't do swapping yet, just
   1.145 +		 * fall to unhandled and panic
   1.146 +		 */
   1.147  	}
   1.148  
   1.149  unhandled:
   1.150  	printf("~~~~ PAGE FAULT ~~~~\n");
   1.151  	printf("fault address: %x\n", fault_addr);
   1.152 +	printf("error code: %x\n", frm->err);
   1.153  
   1.154  	if(frm->err & PG_PRESENT) {
   1.155  		if(frm->err & 8) {
   1.156 @@ -522,6 +604,54 @@
   1.157  	panic("unhandled page fault\n");
   1.158  }
   1.159  
   1.160 +/* copy-on-write handler, called from pgfault above */
   1.161 +static int copy_on_write(struct vm_page *page)
   1.162 +{
   1.163 +	uint32_t newphys;
   1.164 +	struct vm_page *newpage;
   1.165 +	struct rbnode *vmnode;
   1.166 +	struct process *p = get_current_proc();
   1.167 +
   1.168 +	assert(page->nref > 0);
   1.169 +
   1.170 +	/* first of all check the refcount. If it's 1 then we don't need to copy
   1.171 +	 * anything. This will happen when all forked processes except one have
   1.172 +	 * marked this read-write again after faulting.
   1.173 +	 */
   1.174 +	if(page->nref == 1) {
   1.175 +		set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY);
   1.176 +		return 0;
   1.177 +	}
   1.178 +
   1.179 +	/* ok let's make a copy and mark it read-write */
   1.180 +	if(!(newpage = malloc(sizeof *newpage))) {
   1.181 +		printf("copy_on_write: failed to allocate new vm_page\n");
   1.182 +		return -1;
   1.183 +	}
   1.184 +	newpage->vpage = page->vpage;
   1.185 +	newpage->flags = page->flags;
   1.186 +
   1.187 +	if(!(newphys = alloc_phys_page())) {
   1.188 +		printf("copy_on_write: failed to allocate physical page\n");
   1.189 +		/* XXX proper action: SIGSEGV */
   1.190 +		return -1;
   1.191 +	}
   1.192 +	newpage->ppage = ADDR_TO_PAGE(newphys);
   1.193 +	newpage->nref = 1;
   1.194 +
   1.195 +	/* set the new vm_page in the process vmmap */
   1.196 +	vmnode = rb_findi(&p->vmmap, newpage->vpage);
   1.197 +	assert(vmnode && vmnode->data == page);	/* shouldn't be able to fail */
   1.198 +	vmnode->data = newpage;
   1.199 +
   1.200 +	/* also update tha page table */
   1.201 +	map_page(newpage->vpage, newpage->ppage, newpage->flags);
   1.202 +
   1.203 +	/* finally decrease the refcount at the original vm_page struct */
   1.204 +	page->nref--;
   1.205 +	return 0;
   1.206 +}
   1.207 +
   1.208  /* --- page range list node management --- */
   1.209  #define NODES_IN_PAGE	(PGSIZE / sizeof(struct page_range))
   1.210  
   1.211 @@ -574,14 +704,13 @@
   1.212   *
   1.213   * If "cow" is non-zero it also marks the shared user-space pages as
   1.214   * read-only, to implement copy-on-write.
   1.215 - *
   1.216 - * Returns the physical address of the new page directory.
   1.217   */
   1.218 -uint32_t clone_vm(int cow)
   1.219 +void clone_vm(struct process *pdest, struct process *psrc, int cow)
   1.220  {
   1.221  	int i, j, dirpg, tblpg, kstart_dirent;
   1.222  	uint32_t paddr;
   1.223  	uint32_t *ndir, *ntbl;
   1.224 +	struct rbnode *vmnode;
   1.225  
   1.226  	/* allocate the new page directory */
   1.227  	if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) {
   1.228 @@ -613,8 +742,10 @@
   1.229  				 * page table and unset the writable bits.
   1.230  				 */
   1.231  				for(j=0; j<1024; j++) {
   1.232 -					clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY);
   1.233 -					/*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/
   1.234 +					if(PGTBL(i)[j] & PG_PRESENT) {
   1.235 +						clear_page_bit(i * 1024 + j, PG_WRITABLE, PAGE_ONLY);
   1.236 +						/*PGTBL(i)[j] &= ~(uint32_t)PG_WRITABLE;*/
   1.237 +					}
   1.238  				}
   1.239  			}
   1.240  
   1.241 @@ -632,18 +763,31 @@
   1.242  		}
   1.243  	}
   1.244  
   1.245 +	/* make a copy of the parent's vmmap tree pointing to the same vm_pages
   1.246 +	 * and increase the reference counters for all vm_pages.
   1.247 +	 */
   1.248 +	rb_init(&pdest->vmmap, RB_KEY_INT);
   1.249 +	rb_begin(&psrc->vmmap);
   1.250 +	while((vmnode = rb_next(&psrc->vmmap))) {
   1.251 +		struct vm_page *pg = vmnode->data;
   1.252 +		pg->nref++;
   1.253 +
   1.254 +		/* insert the same vm_page to the new tree */
   1.255 +		rb_inserti(&pdest->vmmap, pg->vpage, pg);
   1.256 +	}
   1.257 +
   1.258  	/* for the kernel space we'll just use the same page tables */
   1.259  	for(i=kstart_dirent; i<1024; i++) {
   1.260  		ndir[i] = pgdir[i];
   1.261  	}
   1.262 +	paddr = virt_to_phys((uint32_t)ndir);
   1.263 +	ndir[1023] = paddr | PG_PRESENT;
   1.264  
   1.265  	if(cow) {
   1.266  		/* we just changed all the page protection bits, so we need to flush the TLB */
   1.267  		flush_tlb();
   1.268  	}
   1.269  
   1.270 -	paddr = virt_to_phys((uint32_t)ndir);
   1.271 -
   1.272  	/* unmap before freeing the virtual pages, to avoid deallocating the physical pages */
   1.273  	unmap_page(dirpg);
   1.274  	unmap_page(tblpg);
   1.275 @@ -651,7 +795,8 @@
   1.276  	pgfree(dirpg, 1);
   1.277  	pgfree(tblpg, 1);
   1.278  
   1.279 -	return paddr;
   1.280 +	/* set the new page directory pointer */
   1.281 +	pdest->ctx.pgtbl_paddr = paddr;
   1.282  }
   1.283  
   1.284  int get_page_bit(int pgnum, uint32_t bit, int wholepath)
   1.285 @@ -723,7 +868,7 @@
   1.286  					vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK;
   1.287  					vmp->nref = 1;	/* when first created assume no sharing */
   1.288  
   1.289 -					rb_inserti(vmmap, vmp->ppage, vmp);
   1.290 +					rb_inserti(vmmap, vmp->vpage, vmp);
   1.291  				}
   1.292  			}
   1.293  		}
   1.294 @@ -732,6 +877,21 @@
   1.295  	return 0;
   1.296  }
   1.297  
   1.298 +struct vm_page *get_vm_page(int vpg)
   1.299 +{
   1.300 +	return get_vm_page_proc(get_current_proc(), vpg);
   1.301 +}
   1.302 +
   1.303 +struct vm_page *get_vm_page_proc(struct process *p, int vpg)
   1.304 +{
   1.305 +	struct rbnode *node;
   1.306 +
   1.307 +	if(!p || !(node = rb_findi(&p->vmmap, vpg))) {
   1.308 +		return 0;
   1.309 +	}
   1.310 +	return node->data;
   1.311 +}
   1.312 +
   1.313  
   1.314  void dbg_print_vm(int area)
   1.315  {