1 // mem/addrspace.cc -- System.Mem.AddrSpace
3 // OPT: Special AddrSpaces that only translate/export a linear block of
4 // another AddrSpace, and don't have individual entries for every page.
6 // OPT: Special VAreas that use their own translation mechanism instead
7 // of varea->offset, so that filesystem block tables (and similar things)
8 // don't need to have a VArea per block.
10 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
12 // Permission is hereby granted, free of charge, to any person obtaining a copy of
13 // this software and associated documentation files (the "Software"), to deal with
14 // the Software without restriction, including without limitation the rights to
15 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
16 // of the Software, and to permit persons to whom the Software is furnished to do
17 // so, subject to the following conditions:
19 // * Redistributions of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimers.
22 // * Redistributions in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimers in the
24 // documentation and/or other materials provided with the distribution.
26 // * The names of the Software's authors and/or contributors
27 // may not be used to endorse or promote products derived from
28 // this Software without specific prior written permission.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
32 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
35 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
39 #include <kern/paging.h>
40 #include <kern/pagetable.h>
41 #include <kern/pagealloc.h>
42 #include <kern/generic-pte.h>
43 #include <kern/compiler.h>
45 extern int roshared_start, roshared_page_end;
46 extern int rwshared_start, rwshared_page_end;
49 extern IMappable physmem;
51 class AddrSpaceFactory {
53 #include <servers/mem/addrspace/Mem/AddrSpaceFactory.h>
60 void create(Object *obj)
62 *obj = static_cast<IAddrSpace>(*(new AddrSpace(false)));
66 class ProcAddrSpaceFactory {
68 #include <servers/mem/addrspace/Mem/ProcAddrSpaceFactory.h>
70 ProcAddrSpaceFactory()
75 void create(Object *obj)
77 AddrSpace *as = new AddrSpace(true);
82 region.start = kvirt_to_phys(&roshared_start);
83 region.end = kvirt_to_phys(&roshared_page_end);
84 vstart = Arch::roshared_map;
86 mf.access_IDLNS_Read = 1;
87 mf.access_IDLNS_Exec = 1;
89 as->map(physmem, region, &vstart, mf,
90 true, AddrSpace::map_protected);
92 region.start = kvirt_to_phys(&rwshared_start);
93 region.end = kvirt_to_phys(&rwshared_page_end);
94 vstart = Arch::rwshared_map;
95 mf.access_IDLNS_Exec = 0;
96 mf.access_IDLNS_Write = 1;
99 as->map(physmem, region, &vstart, mf,
100 true, AddrSpace::map_protected);
103 vstart = Arch::stack_bottom;
104 as->alloc_and_map(Arch::stack_top - vstart + 1, &vstart, af, mf);
106 *obj = static_cast<IAddrSpace>(*(as));
110 ProcAddrSpaceFactory real_proc_addrspace_factory;
111 Factory proc_addr_space_factory = real_proc_addrspace_factory;
113 AddrSpaceFactory real_addrspace_factory;
114 Factory addr_space_factory = real_addrspace_factory;
116 AddrSpace::AddrSpace(bool process) : mappable(this)
119 is_process = process;
121 // OPT: Allow optional use of the native PTE for stacked aspaces,
122 // either because the native PTE is 64-bit, or because it's an
123 // embedded system which does not need 64-bit storage.
126 page_table = new PageTableImpl<Arch::PTE>(true);
128 page_table = new PageTableImpl<GenPTE>(false);
130 cached_free_region = Arch::user_start + Arch::page_size;
133 // This should only be used once during bootup to initialize the
134 // kernel's address space with a static initial page table.
136 AddrSpace::AddrSpace(void *ptbl_toplevel) : mappable(this)
140 page_table = new PageTableImpl<Arch::PTE>(ptbl_toplevel);
142 // FIXME: should be kernel virtual space
143 cached_free_region = Arch::user_start + Arch::page_size;
146 void AddrSpace::get_mappable(IMappable *ma)
151 void AddrSpace::clone(IAddrSpace *addrspace, uint8_t clone_is_real)
157 void AddrSpace::alloc_and_map(u64 len, u64 *vstart,
158 AllocFlags aflags, MapFlags mflags)
163 bool AddrSpace::handle_fault(ulong vaddr, bool write, bool exec, bool user)
165 if (lock.held_by_curthread())
168 assert(!(write && exec));
175 reqflags.Writeable = 1;
177 reqflags.Executable = 1;
179 reqflags.Readable = 1;
184 mappable.pagein(page_align(vaddr), reqflags);
187 catch (BadPageFault &bpf) {
188 // FIXME: retain info about nature of bpf
196 bool AddrSpace::check_overlap(Region region, VirtualArea *&va)
198 if (region.end < region.start)
201 va = varea_tree.find_nearest(region.start);
206 // If region.start is in an existing region, that region will
209 if (region.end >= va->region().start &&
210 region.start <= va->region().end)
213 // If it returns a region that's greater than region.start, and va
214 // itself does not overlap, then prev does not overlap (or else
215 // region.start would be in or before prev, and thus prev would
216 // have been returned).
218 // If it returns a region that's less than region.start, we still
219 // need to check next, as region.end could be in (or beyond) that
222 if (va->list_node.next != &varea_list) {
224 va->list_node.next->listentry(VirtualArea, list_node);
226 if (region.end >= next->region().start &&
227 region.start <= next->region().end)
236 if (va->list_node.prev != &varea_list)
237 prev = va->list_node.prev->listentry(VirtualArea, list_node);
241 if (region.start < va->region().start) {
242 assert(!prev || prev->region().end < region.start);
249 VirtualArea *AddrSpace::split_varea(Region region)
251 VirtualArea *start, *mid, *end;
253 // check_overlap is used rather than varea_tree.find,
254 // so that the first overlapping region can be returned,
255 // as most (if not all) callers will need this anyway.
257 if (!check_overlap(region, start))
261 assert(start->aspace == this);
262 assert(start->region().end >= region.start);
264 if (start->region().start < region.start) {
265 // There is a varea that straddles region.start;
266 // create a new varea "mid" for the overlapping part.
268 mid = new VirtualArea;
271 mid->region().start = region.start;
273 if (region.end > start->region().end)
274 mid->region().end = start->region().end;
276 mid->region().end = region.end;
278 mid->flags = start->flags;
280 mid->offset = start->offset;
282 if (start->region().end > region.end) {
283 // The varea also straddles region.end; create a new
284 // varea "end" for the other side of the region.
286 end = new VirtualArea;
289 end->region().start = region.end + 1;
290 end->region().end = start->region().end;
292 end->flags = start->flags;
294 end->offset = start->offset;
299 start->region().end = region.start - 1;
305 // Splits have already been done at both ends of the region,
306 // so there's no need to look up the ending address.
316 if (start->region().end == region.end)
319 if (start->region().end > region.end)
322 end = varea_tree.find(region.end);
327 assert(end->aspace == this);
328 assert(end->region().start <= region.end);
329 assert(end->region().end >= region.end);
331 if (end->region().end == region.end)
335 assert(end->region().end > region.end);
337 // There is a varea that straddles region.start;
338 // create a new varea "mid" for the overlapping part.
340 mid = new VirtualArea;
343 mid->region().start = region.start;
345 mid->region().start = end->region().start;
346 mid->region().end = region.end;
348 mid->flags = start->flags;
350 mid->offset = start->offset;
352 end->region().start = region.end + 1;
360 bool AddrSpace::get_free_region(ulong len, Region ®ion,
363 assert(page_aligned(len));
364 assert(cached_free_region);
366 region.start = cached_free_region;
367 region.end = cached_free_region + len - 1;
369 if (region.end <= Arch::user_end && !check_overlap(region, prev)) {
370 cached_free_region = region.end + 1;
374 for (Util::List *node = &prev->list_node; node != &varea_list;
377 VirtualArea *va = node->listentry(VirtualArea, list_node);
378 ulong end = Arch::user_end;
380 if (node->next != &varea_list) {
381 VirtualArea *next = node->next->listentry(VirtualArea, list_node);
382 end = next->region().start - 1;
385 assert(end > va->region().end);
387 if (end - va->region().end >= len) {
388 region.start = va->region().end + 1;
389 region.end = region.start + len - 1;
391 assert(page_aligned(region.start));
392 cached_free_region = region.end + 1;
397 if (cached_free_region != Arch::user_start + Arch::page_size) {
398 cached_free_region = Arch::user_start + Arch::page_size;
399 return get_free_region(len, region, prev);
405 // The "mapped" paramater is used to indicate whether the top-level
406 // address space has had a mapping established. If "mapped" is
407 // false, but an exception is not thrown, then this method must
408 // be called again to propagate the mapping along the aspace chain.
410 // FIXME: Between aspace locks, if aspace's mapping is revoked and
411 // ma->aspace's mapping changes, a pagein could leak through and cause
412 // a page load or a copy-on-write breaking. This isn't a huge deal
413 // (it doesn't affect the correctness of the code or give aspace
414 // access to ma->aspace's new mapping), but it's unpleasant, and could
415 // have an adverse impact on determinism. If you have a real-time
416 // application that can't tolerate the occasional spurious pagein or
417 // copy-on-write breaking, then use an address space that hasn't
418 // previously been exposed to recursive mappers.
420 bool ASpaceMappable::rec_pagein(AddrSpace *aspace, u64 vaddr,
425 // aspace->mappable.retain();
428 Lock::DroppableAutoLock autolock(aspace->lock);
429 VirtualArea *va = aspace->varea_tree.find(vaddr);
432 throw BadPageFault();
434 if ((va->flags & reqflags) != reqflags)
435 throw BadPageFault();
437 if (aspace->map(va, vaddr, reqflags))
441 Mappable *ma = va->ma;
446 // aspace->mappable.release();
448 if (!ma->is_aspace) {
449 ma->pagein(vaddr, reqflags);
454 aspace = static_cast<ASpaceMappable *>(ma)->aspace;
460 void ASpaceMappable::pagein(u64 vaddr, PTEFlags reqflags)
462 while (!rec_pagein(aspace, vaddr, reqflags));
465 void AddrSpace::break_copy_on_write(VirtualArea *va, u64 vaddr, u64 phys)
467 assert(lock.held_by_curthread());
468 assert(rmap_lock.held_by_curthread());
470 assert(va->flags.FaultOnWrite);
471 assert(va->aspace == this);
473 Page *old_page = phys_to_page(phys);
475 Region region = { vaddr, vaddr + Arch::page_size - 1 };
477 // If this is the only reference to the page left, then
478 // nothing needs to be copied. Just clear the COW condition.
479 if (is_phys_page(old_page) && old_page->get_refcount() == 1) {
480 PTEFlags mask, flags;
481 mask.FaultOnWrite = 1;
483 page_table->set_flags(region, flags, mask);
487 Page *new_page = PageAlloc::alloc(1);
490 // OPT: It'd be better to do this without the rmap_lock held,
491 // especially if rmap_lock is global rather than per-physpage.
492 // I want to keep things simple for now and optimize later,
495 memcpy(page_to_kvirt(new_page), phys_to_kvirt(phys),
498 page_table->rmap_table.break_copy_on_write(region.start, new_page);
502 void ASpaceMappable::get_entry(u64 vaddr, u64 *phys, PTEFlags *flags)
504 aspace->page_table->get_entry(vaddr, phys, flags);
507 bool AddrSpace::map(VirtualArea *va, u64 vaddr, PTEFlags reqflags)
509 Lock::AutoLock autolock(rmap_lock);
510 assert(va->aspace == this);
514 va->ma->get_entry(vaddr + va->offset, &phys, &flags);
516 PTEFlags newflags = flags & va->flags;
517 newflags.FaultOnWrite = flags.FaultOnWrite | va->flags.FaultOnWrite;
519 if (!newflags.Valid) {
520 assert(va->flags.Valid);
524 if ((newflags & reqflags) != reqflags)
529 page_table->get_entry(vaddr, &oldphys, &oldflags);
531 if (oldflags.Valid &&
532 !(reqflags.Writeable && oldflags.FaultOnWrite))
534 // If the existing mapping is valid, don't try to map it again.
535 // The existing mapping was put there possibly by a race, but
536 // more likely because a FaultOnWrite was handled upstream.
538 // FaultOnWrite handling is the only type of mapping change that
539 // can be done directly; all others must change the varea and do
540 // an rmap invalidation instead. FaultOnWrite is special
541 // because we don't want to split vareas for every page that
542 // gets its copy-on-write broken.
544 assert((oldflags & reqflags) == reqflags);
548 if (reqflags.Writeable && oldflags.FaultOnWrite)
550 // The FaultOnWrite needs to be handled upstream.
551 if (!va->flags.FaultOnWrite)
554 va->aspace->break_copy_on_write(va, vaddr, phys);
556 assert(!oldflags.Valid);
557 PageTable *usptbl = NULL;
559 if (va->ma->is_aspace) {
560 ASpaceMappable *asma = static_cast<ASpaceMappable *>(va->ma);
561 usptbl = asma->aspace->page_table;
564 RMapTable::map(va, usptbl, vaddr, vaddr + va->offset);
566 RegionWithOffset rwo;
568 rwo.end = vaddr + Arch::page_size - 1;
571 page_table->map(rwo, newflags);
577 void ASpaceMappable::get_size(u64 *size)
579 aspace->get_size(size);
582 void AddrSpace::map(IMappable ma, Region region, u64 *vstart,
583 MapFlags mflags, bool from_kernel, int map_type)
585 // FIXME: check alignment for VIPT caches
586 // FIXME: Implement the "Replace" map flag
589 throw_idl(InvalidArgument, 3,
590 countarray("Replace unimplemented"));
592 Mappable *cma = Mappable::classptr(ma);
594 // The given IMappable does not refer to a Mappable
597 throw_idl(InvalidArgument, 0, nullarray);
600 bool fixed = mflags.Fixed;
605 if (!page_aligned(region.start))
606 throw_idl(InvalidArgument, 1, countarray("unaligned start"));
608 if (!page_aligned(region.end + 1))
609 throw_idl(InvalidArgument, 1, countarray("unaligned end"));
611 Lock::AutoLock autolock(lock);
615 if (*vstart != System::Mem::AddrSpace_ns::unspecified_start) {
616 vregion.start = *vstart;
617 vregion.end = vregion.start + region.end - region.start + 1;
620 if (!valid_addr(vregion.start))
621 throw_idl(InvalidArgument, 2,
622 countarray("invalid virtual start"));
624 if (!valid_addr(vregion.end))
625 throw_idl(InvalidArgument, 2,
626 countarray("invalid virtual end"));
629 if (check_overlap(vregion, prev))
630 *vstart = System::Mem::AddrSpace_ns::unspecified_start;
633 if (*vstart == System::Mem::AddrSpace_ns::unspecified_start) {
635 throw_idl(ResourceBusy, 2, countarray("varea overlap"));
637 if (!get_free_region(region.end - region.start + 1, vregion, prev))
638 throw_idl(OutOfSpace, countarray("out of vspace"));
640 *vstart = vregion.start;
643 VirtualArea *newva = new VirtualArea;
644 newva->aspace = this;
645 newva->region() = vregion;
647 newva->flags.Valid = 1;
648 newva->flags.User = map_type != map_kernel;
649 newva->flags.Readable = mflags.access_IDLNS_Read;
650 newva->flags.Writeable = mflags.access_IDLNS_Write;
651 newva->flags.Executable = mflags.access_IDLNS_Exec;
652 newva->flags.FaultOnWrite = mflags.CopyOnWrite;
653 newva->flags.Protected = map_type != map_user;
655 newva->offset = region.start - vregion.start;
657 varea_tree.add(newva);
658 newva->ma->map(newva);
661 prev->list_node.add_front(&newva->list_node);
663 varea_list.add_front(&newva->list_node);
667 void AddrSpace::unmap(Region region, bool from_kernel)
669 u64 orig_start = region.start;
671 while (region.start <= region.end) {
672 Lock::DroppableAutoLock autolock(lock);
675 // If check_overlap returns false, then there are no vareas
676 // in the specified region, so there's nothing to unmap.
678 if (!check_overlap(region, va))
681 if (va->flags.Protected && !from_kernel) {
682 region.start = va->list_node.next->
683 listentry(VirtualArea, list_node)->region().start;
685 if (region.start <= orig_start)
691 u64 va_end = va->region().end;
694 if (va_end > region.end) {
695 u64 va_start = va->region().start;
696 va->region().start = region.end + 1;
698 if (va_start < region.start) {
699 VirtualArea *newva = new VirtualArea;
701 newva->aspace = this;
702 newva->region().start = va_start;
703 newva->region().end = region.start - 1;
705 newva->flags = va->flags;
707 newva->offset = va->offset;
709 varea_tree.add(newva);
710 newva->ma->map(newva);
713 VirtualArea *nextva =
714 va->list_node.next->listentry(VirtualArea, list_node);
716 next_start = nextva->region().start;
717 } else if (va->region().start < region.start) {
718 va->region().end = region.start - 1;
724 // This is done after the varea removal, so that new faults
725 // don't map things in again.
727 // OPT: Skip RMap-based unmapping if nothing maps this aspace.
728 // OPT: Push this loop into the RMap code, allowing it to skip
729 // empty portions of the tables (as the pagetable code currently
732 while (region.start <= va_end && region.start <= region.end) {
733 page_table->rmap_table.unmap(region.start);
734 region.start += Arch::page_size;
736 if (region.start <= orig_start)
740 region.start = next_start;
742 if (region.start <= orig_start)
747 void AddrSpace::set_mapflags(Region region, MapFlags mflags)
750 // Find varea, split if necessary, propagate change to stacked aspaces
753 void AddrSpace::get_mapflags(Region region, MapFlags *mflags, uint8_t *all_same)
758 void AddrSpace::get_mapping(Region region, IMappable *ma, u64 *offset)
763 void AddrSpace::get_page_size(u32 *page_size)
765 *page_size = Arch::page_size;
768 void AddrSpace::get_min_align(u32 *min_align)
770 *min_align = Arch::page_mapping_min_align;
773 void Mappable::map(VirtualArea *varea)
775 mappings_lock.lock_irq();
776 mappings.add_back(&varea->mappings_node);
777 mappings_lock.unlock_irq();
780 void Mappable::unmap(VirtualArea *varea)
782 mappings_lock.lock_irq();
783 varea->mappings_node.del();
784 mappings_lock.unlock_irq();
787 void PageTable::kill_pte(ulong vaddr, u64 paddr, bool dirty,
788 bool valid, bool no_release)
790 Page *oldpage = phys_to_page(paddr);
792 if (!is_phys_page(oldpage))
795 if (is_process && valid) {
796 Arch::invalidate_tlb_entry(vaddr);
798 if (oldpage && dirty &&
799 !ll_test_and_set(&oldpage->flags, PageFlags::bits::Dirty))
802 // Queue page for writeback
806 if (!no_release && oldpage)
810 // FIXME: Add a special PTE flag to indicate that PhysMem mappings
811 // don't mess with page refcounts.
813 class PhysMem : public Mappable {
815 void get_size(u64 *size)
817 if (sizeof(long) == 8)
818 *size = 1ULL << (64 - Arch::page_shift);
820 *size = 1ULL << (32 - Arch::page_shift);
823 void pagein(u64 vaddr, PTEFlags reqflags)
825 // Doesn't need to do anything yet, though it may later
826 // once high memory support is added.
829 void get_entry(u64 addr, u64 *phys, PTEFlags *flags)
835 flags->Writeable = 1;
836 flags->Executable = 1;
841 PhysMem real_physmem;
842 IMappable physmem = real_physmem;
845 #include <servers/mem/addrspace/footer.cc>