1 // mem/addrspace.cc -- System.Mem.AddrSpace
3 // OPT: Special AddrSpaces that only translate/export a linear block of
4 // another AddrSpace, and don't have individual entries for every page.
6 // OPT: Special VAreas that use their own translation mechanism instead
7 // of varea->offset, so that filesystem block tables (and similar things)
8 // don't need to have a VArea per block.
10 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
12 // This software is provided 'as-is', without any express or implied warranty.
13 // In no event will the authors or contributors be held liable for any damages
14 // arising from the use of this software.
16 // Permission is hereby granted to everyone, free of charge, to use, copy,
17 // modify, prepare derivative works of, publish, distribute, perform,
18 // sublicense, and/or sell copies of the Software, provided that the above
19 // copyright notice and disclaimer of warranty be included in all copies or
20 // substantial portions of this software.
23 #include <kern/paging.h>
24 #include <kern/generic-pagetable.h>
25 #include <kern/pagetable.h>
26 #include <kern/pagealloc.h>
27 #include <kern/generic-pte.h>
28 #include <kern/compiler.h>
29 #include <kern/process.h>
30 #include <kern/thread.h>
32 extern int roshared_start, roshared_page_end;
33 extern int rwshared_start, rwshared_page_end;
36 extern IMappable physmem, anonmem;
38 class AddrSpaceFactory {
40 #include <servers/mem/addrspace/Mem/AddrSpaceFactory.h>
47 void create(Object *obj)
49 *obj = static_cast<IAddrSpace>(*(new AddrSpace));
53 class ProcAddrSpaceFactory {
55 #include <servers/mem/addrspace/Mem/ProcAddrSpaceFactory.h>
57 ProcAddrSpaceFactory()
62 void create(Object *obj)
64 AddrSpace *as = new ProcAddrSpace;
69 region.start = kvirt_to_phys(&roshared_start);
70 region.end = kvirt_to_phys(&roshared_page_end);
71 vstart = Arch::roshared_map;
73 mf.access_IDLNS_Read = 1;
74 mf.access_IDLNS_Exec = 1;
76 as->map(physmem, region, &vstart, mf,
77 PTEFlags::protectedmap | PTEFlags::addressonly);
79 region.start = kvirt_to_phys(&rwshared_start);
80 region.end = kvirt_to_phys(&rwshared_page_end);
81 vstart = Arch::rwshared_map;
82 mf.access_IDLNS_Exec = 0;
83 mf.access_IDLNS_Write = 1;
86 as->map(physmem, region, &vstart, mf,
87 PTEFlags::protectedmap | PTEFlags::addressonly);
89 // Leave the stack no-exec by default.
90 region.start = vstart = Arch::stack_bottom;
91 region.end = Arch::stack_top;
93 as->map(anonmem, region, &vstart, mf);
95 *obj = static_cast<IAddrSpace>(*as);
99 ProcAddrSpaceFactory real_proc_addrspace_factory;
100 Factory proc_addr_space_factory = real_proc_addrspace_factory;
102 AddrSpaceFactory real_addrspace_factory;
103 Factory addr_space_factory = real_addrspace_factory;
105 AddrSpace::AddrSpace(PageTable *ptbl) : mappable(this)
112 page_table = new PageTableImpl<GenPTE>(false);
114 cached_free_region = Arch::user_start + Arch::page_size;
117 ProcAddrSpace::ProcAddrSpace() :
118 AddrSpace(new PageTableImpl<Arch::PTE>(true))
123 // This should only be used once during bootup to initialize the
124 // kernel's address space with a static initial page table.
126 ProcAddrSpace::ProcAddrSpace(void *ptbl_toplevel) :
127 AddrSpace(new PageTableImpl<Arch::PTE>(ptbl_toplevel))
129 // FIXME: set cached_free_region to kernel virtual space
133 void AddrSpace::get_mappable(IMappable *ma)
138 void AddrSpace::clone(IAddrSpace *addrspace, uint8_t clone_is_real)
144 int AddrSpace::handle_fault(ulong vaddr, bool write, bool exec, bool user)
146 if (lock.held_by_curthread())
149 assert(!(write && exec));
156 reqflags.Writeable = 1;
158 reqflags.Executable = 1;
160 reqflags.Readable = 1;
165 mappable.pagein(page_align(vaddr), reqflags);
168 catch (BadPageFault &bpf) {
175 bool AddrSpace::check_overlap(Region region, VirtualArea *&va)
177 if (region.end < region.start)
180 va = varea_tree.find_nearest(region.start);
185 // If region.start is in an existing region, that region will
188 if (region.end >= va->region().start &&
189 region.start <= va->region().end)
192 // If it returns a region that's greater than region.start, and va
193 // itself does not overlap, then prev does not overlap (or else
194 // region.start would be in or before prev, and thus prev would
195 // have been returned).
197 // If it returns a region that's less than region.start, we still
198 // need to check next, as region.end could be in (or beyond) that
201 if (va->list_node.next != &varea_list) {
203 va->list_node.next->listentry(VirtualArea, list_node);
205 if (region.end >= next->region().start &&
206 region.start <= next->region().end)
215 if (va->list_node.prev != &varea_list)
216 prev = va->list_node.prev->listentry(VirtualArea, list_node);
220 if (region.start < va->region().start) {
221 assert(!prev || prev->region().end < region.start);
228 VirtualArea *AddrSpace::split_varea(Region region)
230 VirtualArea *start, *mid, *end;
232 // check_overlap is used rather than varea_tree.find,
233 // so that the first overlapping region can be returned,
234 // as most (if not all) callers will need this anyway.
236 if (!check_overlap(region, start))
240 assert(start->aspace == this);
241 assert(start->region().end >= region.start);
243 if (start->region().start < region.start) {
244 // There is a varea that straddles region.start;
245 // create a new varea "mid" for the overlapping part.
247 mid = new VirtualArea;
250 mid->region().start = region.start;
252 if (region.end > start->region().end)
253 mid->region().end = start->region().end;
255 mid->region().end = region.end;
257 mid->flags = start->flags;
259 mid->offset = start->offset;
261 if (start->region().end > region.end) {
262 // The varea also straddles region.end; create a new
263 // varea "end" for the other side of the region.
265 end = new VirtualArea;
268 end->region().start = region.end + 1;
269 end->region().end = start->region().end;
271 end->flags = start->flags;
273 end->offset = start->offset;
278 start->region().end = region.start - 1;
284 // Splits have already been done at both ends of the region,
285 // so there's no need to look up the ending address.
295 if (start->region().end == region.end)
298 if (start->region().end > region.end)
301 end = varea_tree.find(region.end);
306 assert(end->aspace == this);
307 assert(end->region().start <= region.end);
308 assert(end->region().end >= region.end);
310 if (end->region().end == region.end)
314 assert(end->region().end > region.end);
316 // There is a varea that straddles region.start;
317 // create a new varea "mid" for the overlapping part.
319 mid = new VirtualArea;
322 mid->region().start = region.start;
324 mid->region().start = end->region().start;
325 mid->region().end = region.end;
327 mid->flags = start->flags;
329 mid->offset = start->offset;
331 end->region().start = region.end + 1;
339 bool AddrSpace::get_free_region(ulong len, Region ®ion,
342 assert(page_aligned(len));
343 assert(cached_free_region);
345 region.start = cached_free_region;
346 region.end = cached_free_region + len - 1;
348 if (region.end <= Arch::user_end && !check_overlap(region, prev)) {
349 cached_free_region = region.end + 1;
353 for (Util::List *node = &prev->list_node; node != &varea_list;
356 VirtualArea *va = node->listentry(VirtualArea, list_node);
357 ulong end = Arch::user_end;
359 if (node->next != &varea_list) {
360 VirtualArea *next = node->next->listentry(VirtualArea, list_node);
361 end = next->region().start - 1;
364 assert(end > va->region().end);
366 if (end - va->region().end >= len) {
367 region.start = va->region().end + 1;
368 region.end = region.start + len - 1;
370 assert(page_aligned(region.start));
371 cached_free_region = region.end + 1;
376 if (cached_free_region != Arch::user_start + Arch::page_size) {
377 cached_free_region = Arch::user_start + Arch::page_size;
378 return get_free_region(len, region, prev);
384 // The "mapped" paramater is used to indicate whether the top-level
385 // address space has had a mapping established. If "mapped" is
386 // false, but an exception is not thrown, then this method must
387 // be called again to propagate the mapping along the aspace chain.
389 // FIXME: Between aspace locks, if aspace's mapping is revoked and
390 // ma->aspace's mapping changes, a pagein could leak through and cause
391 // a page load or a copy-on-write breaking. This isn't a huge deal
392 // (it doesn't affect the correctness of the code or give aspace
393 // access to ma->aspace's new mapping), but it's unpleasant, and could
394 // have an adverse impact on determinism. If you have a real-time
395 // application that can't tolerate the occasional spurious pagein or
396 // copy-on-write breaking, then use an address space that hasn't
397 // previously been exposed to recursive mappers.
399 bool ASpaceMappable::rec_pagein(AddrSpace *aspace, u64 vaddr,
404 // aspace->mappable.retain();
407 Lock::DroppableAutoLock autolock(aspace->lock);
408 VirtualArea *va = aspace->varea_tree.find(vaddr);
411 throw BadPageFault(MemoryFault_ns::Cause::Unmapped);
413 if ((va->flags & reqflags) != reqflags)
414 throw BadPageFault(MemoryFault_ns::Cause::Protected);
416 if (aspace->map(va, vaddr, reqflags))
420 Mappable *ma = va->ma;
425 // aspace->mappable.release();
427 if (!ma->is_aspace) {
428 ma->pagein(vaddr, reqflags);
433 aspace = static_cast<ASpaceMappable *>(ma)->aspace;
439 void ASpaceMappable::pagein(u64 vaddr, PTEFlags reqflags)
441 while (!rec_pagein(aspace, vaddr, reqflags));
444 void AddrSpace::break_copy_on_write(VirtualArea *va, u64 vaddr, u64 phys)
446 assert(lock.held_by_curthread());
447 assert(rmap_lock.held_by_curthread());
449 assert(va->flags.FaultOnWrite);
450 assert(va->aspace == this);
452 Page *old_page = phys_to_page(phys);
454 Region region = { vaddr, vaddr + Arch::page_size - 1 };
456 // If this is the only reference to the page left, then
457 // nothing needs to be copied. Just clear the COW condition.
458 if (is_phys_page(old_page) && old_page->get_refcount() == 1) {
459 PTEFlags mask, flags;
460 mask.FaultOnWrite = 1;
462 page_table->set_flags(region, flags, mask);
466 Page *new_page = PageAlloc::alloc(1);
469 // OPT: It'd be better to do this without the rmap_lock held,
470 // especially if rmap_lock is global rather than per-physpage.
471 // I want to keep things simple for now and optimize later,
474 memcpy(page_to_kvirt(new_page), phys_to_kvirt(phys),
477 page_table->rmap_table.break_copy_on_write(region.start, new_page);
481 void ASpaceMappable::get_mapping(u64 vaddr, u64 *phys, PTEFlags *flags)
483 aspace->page_table->get_mapping(vaddr, phys, flags);
486 bool AddrSpace::map(VirtualArea *va, u64 vaddr, PTEFlags reqflags)
488 Lock::AutoLock autolock(rmap_lock);
489 assert(va->aspace == this);
493 va->ma->get_mapping(vaddr + va->offset, &phys, &flags);
495 PTEFlags newflags = flags & va->flags;
496 newflags.FaultOnWrite = flags.FaultOnWrite | va->flags.FaultOnWrite;
498 if (!newflags.Valid) {
499 assert(va->flags.Valid);
503 if ((newflags & reqflags) != reqflags)
508 page_table->get_mapping(vaddr, &oldphys, &oldflags);
510 if (oldflags.Valid &&
511 !(reqflags.Writeable && oldflags.FaultOnWrite))
513 // If the existing mapping is valid, don't try to map it again.
514 // The existing mapping was put there possibly by a race, but
515 // more likely because a FaultOnWrite was handled upstream.
517 // FaultOnWrite handling is the only type of mapping change that
518 // can be done directly; all others must change the varea and do
519 // an rmap invalidation instead. FaultOnWrite is special
520 // because we don't want to split vareas for every page that
521 // gets its copy-on-write broken.
523 assert((oldflags & reqflags) == reqflags);
524 assert(!va->flags.FaultOnWrite || oldphys == phys);
528 if (reqflags.Writeable && oldflags.FaultOnWrite)
530 // The FaultOnWrite needs to be handled upstream.
531 if (!va->flags.FaultOnWrite)
534 va->aspace->break_copy_on_write(va, vaddr, phys);
536 assert(!oldflags.Valid);
537 PageTable *usptbl = NULL;
539 if (va->ma->is_aspace) {
540 ASpaceMappable *asma = static_cast<ASpaceMappable *>(va->ma);
541 usptbl = asma->aspace->page_table;
544 RMapTable::map(va, usptbl, vaddr, vaddr + va->offset);
546 RegionWithOffset rwo;
548 rwo.end = vaddr + Arch::page_size - 1;
551 page_table->map(rwo, newflags);
557 void ASpaceMappable::get_size(u64 *size)
559 aspace->get_size(size);
562 void AddrSpace::map(IMappable ma, Region region, u64 *vstart,
563 MapFlags mflags, PTEFlags set, PTEFlags clear)
565 // FIXME: check alignment for VIPT caches
566 // FIXME: Implement the "Replace" map flag
569 throw_idl(InvalidArgument, 3,
570 countarray("Replace unimplemented"));
572 Mappable *cma = Mappable::classptr(ma);
574 // The given IMappable does not refer to a Mappable
577 throw_idl(InvalidArgument, 0, nullarray);
580 bool fixed = mflags.Fixed;
585 if (!page_aligned(region.start))
586 throw_idl(InvalidArgument, 1, countarray("unaligned start"));
588 if (!page_aligned(region.end + 1))
589 throw_idl(InvalidArgument, 1, countarray("unaligned end"));
591 Lock::AutoLock autolock(lock);
595 if (*vstart != System::Mem::AddrSpace_ns::unspecified_start) {
596 vregion.start = *vstart;
597 vregion.end = vregion.start + region.end - region.start;
600 if (!valid_addr(vregion.start))
601 throw_idl(InvalidArgument, 2,
602 countarray("invalid virtual start"));
604 if (!valid_addr(vregion.end))
605 throw_idl(InvalidArgument, 2,
606 countarray("invalid virtual end"));
609 if (check_overlap(vregion, prev))
610 *vstart = System::Mem::AddrSpace_ns::unspecified_start;
613 if (*vstart == System::Mem::AddrSpace_ns::unspecified_start) {
615 throw_idl(ResourceBusy, 2, countarray("varea overlap"));
617 if (!get_free_region(region.end - region.start + 1, vregion, prev))
618 throw_idl(OutOfSpace, countarray("out of vspace"));
620 *vstart = vregion.start;
623 VirtualArea *newva = new VirtualArea;
624 newva->aspace = this;
625 newva->region() = vregion;
627 newva->flags.Valid = 1;
628 newva->flags.User = 1;
629 newva->flags.Readable = mflags.access_IDLNS_Read;
630 newva->flags.Writeable = mflags.access_IDLNS_Write;
631 newva->flags.Executable = mflags.access_IDLNS_Exec;
632 newva->flags.FaultOnWrite = mflags.CopyOnWrite;
633 newva->flags.raw |= set;
634 newva->flags.raw &= ~clear;
636 newva->offset = region.start - vregion.start;
638 varea_tree.add(newva);
639 newva->ma->map(newva);
642 prev->list_node.add_front(&newva->list_node);
644 varea_list.add_front(&newva->list_node);
648 void AddrSpace::unmap(Region region, bool from_kernel)
650 u64 orig_start = region.start;
652 while (region.start <= region.end) {
653 Lock::DroppableAutoLock autolock(lock);
656 // If check_overlap returns false, then there are no vareas
657 // in the specified region, so there's nothing to unmap.
659 if (!check_overlap(region, va))
662 if (va->flags.Protected && !from_kernel) {
663 region.start = va->list_node.next->
664 listentry(VirtualArea, list_node)->region().start;
666 if (region.start <= orig_start)
672 u64 va_end = va->region().end;
675 if (va_end > region.end) {
676 u64 va_start = va->region().start;
677 va->region().start = region.end + 1;
679 if (va_start < region.start) {
680 VirtualArea *newva = new VirtualArea;
682 newva->aspace = this;
683 newva->region().start = va_start;
684 newva->region().end = region.start - 1;
686 newva->flags = va->flags;
688 newva->offset = va->offset;
690 varea_tree.add(newva);
691 newva->ma->map(newva);
694 VirtualArea *nextva =
695 va->list_node.next->listentry(VirtualArea, list_node);
697 next_start = nextva->region().start;
698 } else if (va->region().start < region.start) {
699 va->region().end = region.start - 1;
705 // This is done after the varea removal, so that new faults
706 // don't map things in again.
708 // OPT: Skip RMap-based unmapping if nothing maps this aspace.
709 // OPT: Push this loop into the RMap code, allowing it to skip
710 // empty portions of the tables (as the pagetable code currently
713 while (region.start <= va_end && region.start <= region.end) {
714 page_table->rmap_table.unmap(region.start);
715 region.start += Arch::page_size;
717 if (region.start <= orig_start)
721 region.start = next_start;
723 if (region.start <= orig_start)
728 void AddrSpace::set_mapflags(Region region, MapFlags mflags)
731 // Find varea, split if necessary, propagate change to stacked aspaces
734 void AddrSpace::get_mapflags(Region region, MapFlags *mflags, uint8_t *all_same)
739 void AddrSpace::get_mapping(Region region, IMappable *ma, u64 *offset)
744 void AddrSpace::get_page_size(u32 *page_size)
746 *page_size = Arch::page_size;
749 void AddrSpace::get_min_align(u32 *min_align)
751 *min_align = Arch::page_mapping_min_align;
754 void AddrSpace::get_size(u64 *size)
756 page_table->get_size(size);
759 void Mappable::map(VirtualArea *varea)
761 mappings_lock.lock_irq();
762 mappings.add_back(&varea->mappings_node);
763 mappings_lock.unlock_irq();
766 void Mappable::unmap(VirtualArea *varea)
768 mappings_lock.lock_irq();
769 varea->mappings_node.del();
770 mappings_lock.unlock_irq();
773 void PageTable::kill_pte(ulong vaddr, u64 paddr, bool dirty,
774 bool valid, bool no_release)
776 Page *oldpage = phys_to_page(paddr);
778 if (!is_phys_page(oldpage))
781 if (is_process && valid) {
782 Arch::invalidate_tlb_entry(vaddr);
784 if (oldpage && dirty &&
785 !ll_test_and_set(&oldpage->flags, PageFlags::bits::Dirty))
788 // Queue page for writeback
792 if (!no_release && oldpage)
796 class PhysMem : public Mappable {
798 void get_size(u64 *size)
800 if (sizeof(long) == 8)
801 *size = 1ULL << (64 - Arch::page_shift);
803 *size = 1ULL << (32 - Arch::page_shift);
806 void pagein(u64 vaddr, PTEFlags reqflags)
810 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
816 flags->Writeable = 1;
817 flags->Executable = 1;
819 flags->AddressOnly = 1;
823 PhysMem real_physmem;
824 IMappable physmem = real_physmem;
826 class AnonMem : public Mappable {
828 void get_size(u64 *size)
830 if (sizeof(long) == 8)
831 *size = 1ULL << (64 - Arch::page_shift);
833 *size = 1ULL << (32 - Arch::page_shift);
836 void pagein(u64 vaddr, PTEFlags reqflags)
840 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
842 Page *page = PageAlloc::alloc(1);
844 // OPT: Only zero if it was asked for.
845 // OPT: Eventually, have separate pagelists for zeroed and
846 // unzeroed memory, and a low-priority background thread
847 // that zeroes pages and moves them to the zeroed list.
848 bzero(page_to_kvirt(page), Arch::page_size);
850 *phys = page_to_phys(page);
854 flags->Writeable = 1;
855 flags->Executable = 1;
860 AnonMem real_anonmem;
861 IMappable anonmem = real_anonmem;
864 #include <servers/mem/addrspace/footer.cc>