1 // mem/addrspace.cc -- System.Mem.AddrSpace
3 // OPT: Special AddrSpaces that only translate/export a linear block of
4 // another AddrSpace, and don't have individual entries for every page.
6 // OPT: Special VAreas that use their own translation mechanism instead
7 // of varea->offset, so that filesystem block tables (and similar things)
8 // don't need to have a VArea per block.
10 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
12 // Permission is hereby granted, free of charge, to any person obtaining a copy of
13 // this software and associated documentation files (the "Software"), to deal with
14 // the Software without restriction, including without limitation the rights to
15 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
16 // of the Software, and to permit persons to whom the Software is furnished to do
17 // so, subject to the following condition:
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
24 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
31 #include <kern/paging.h>
32 #include <kern/pagetable.h>
33 #include <kern/pagealloc.h>
34 #include <kern/generic-pte.h>
35 #include <kern/compiler.h>
37 extern int roshared_start, roshared_page_end;
38 extern int rwshared_start, rwshared_page_end;
41 extern IMappable physmem, anonmem;
43 class AddrSpaceFactory {
45 #include <servers/mem/addrspace/Mem/AddrSpaceFactory.h>
52 void create(Object *obj)
54 *obj = static_cast<IAddrSpace>(*(new AddrSpace(false)));
58 class ProcAddrSpaceFactory {
60 #include <servers/mem/addrspace/Mem/ProcAddrSpaceFactory.h>
62 ProcAddrSpaceFactory()
67 void create(Object *obj)
69 AddrSpace *as = new AddrSpace(true);
74 region.start = kvirt_to_phys(&roshared_start);
75 region.end = kvirt_to_phys(&roshared_page_end);
76 vstart = Arch::roshared_map;
78 mf.access_IDLNS_Read = 1;
79 mf.access_IDLNS_Exec = 1;
81 as->map(physmem, region, &vstart, mf, AddrSpace::map_protected);
83 region.start = kvirt_to_phys(&rwshared_start);
84 region.end = kvirt_to_phys(&rwshared_page_end);
85 vstart = Arch::rwshared_map;
86 mf.access_IDLNS_Exec = 0;
87 mf.access_IDLNS_Write = 1;
90 as->map(physmem, region, &vstart, mf, AddrSpace::map_protected);
92 // Leave the stack no-exec by default.
93 region.start = vstart = Arch::stack_bottom;
94 region.end = Arch::stack_top;
96 printf("vstart %llx\n", vstart);
97 as->map(anonmem, region, &vstart, mf);
99 *obj = static_cast<IAddrSpace>(*(as));
103 ProcAddrSpaceFactory real_proc_addrspace_factory;
104 Factory proc_addr_space_factory = real_proc_addrspace_factory;
106 AddrSpaceFactory real_addrspace_factory;
107 Factory addr_space_factory = real_addrspace_factory;
109 AddrSpace::AddrSpace(bool process) : mappable(this)
112 is_process = process;
114 // OPT: Allow optional use of the native PTE for stacked aspaces,
115 // either because the native PTE is 64-bit, or because it's an
116 // embedded system which does not need 64-bit storage.
119 page_table = new PageTableImpl<Arch::PTE>(true);
121 page_table = new PageTableImpl<GenPTE>(false);
123 cached_free_region = Arch::user_start + Arch::page_size;
126 // This should only be used once during bootup to initialize the
127 // kernel's address space with a static initial page table.
129 AddrSpace::AddrSpace(void *ptbl_toplevel) : mappable(this)
133 page_table = new PageTableImpl<Arch::PTE>(ptbl_toplevel);
135 // FIXME: should be kernel virtual space
136 cached_free_region = Arch::user_start + Arch::page_size;
139 void AddrSpace::get_mappable(IMappable *ma)
144 void AddrSpace::clone(IAddrSpace *addrspace, uint8_t clone_is_real)
150 bool AddrSpace::handle_fault(ulong vaddr, bool write, bool exec, bool user)
152 if (lock.held_by_curthread())
155 assert(!(write && exec));
162 reqflags.Writeable = 1;
164 reqflags.Executable = 1;
166 reqflags.Readable = 1;
171 mappable.pagein(page_align(vaddr), reqflags);
174 catch (BadPageFault &bpf) {
175 // FIXME: retain info about nature of bpf
183 bool AddrSpace::check_overlap(Region region, VirtualArea *&va)
185 if (region.end < region.start)
188 va = varea_tree.find_nearest(region.start);
193 // If region.start is in an existing region, that region will
196 if (region.end >= va->region().start &&
197 region.start <= va->region().end)
200 // If it returns a region that's greater than region.start, and va
201 // itself does not overlap, then prev does not overlap (or else
202 // region.start would be in or before prev, and thus prev would
203 // have been returned).
205 // If it returns a region that's less than region.start, we still
206 // need to check next, as region.end could be in (or beyond) that
209 if (va->list_node.next != &varea_list) {
211 va->list_node.next->listentry(VirtualArea, list_node);
213 if (region.end >= next->region().start &&
214 region.start <= next->region().end)
223 if (va->list_node.prev != &varea_list)
224 prev = va->list_node.prev->listentry(VirtualArea, list_node);
228 if (region.start < va->region().start) {
229 assert(!prev || prev->region().end < region.start);
236 VirtualArea *AddrSpace::split_varea(Region region)
238 VirtualArea *start, *mid, *end;
240 // check_overlap is used rather than varea_tree.find,
241 // so that the first overlapping region can be returned,
242 // as most (if not all) callers will need this anyway.
244 if (!check_overlap(region, start))
248 assert(start->aspace == this);
249 assert(start->region().end >= region.start);
251 if (start->region().start < region.start) {
252 // There is a varea that straddles region.start;
253 // create a new varea "mid" for the overlapping part.
255 mid = new VirtualArea;
258 mid->region().start = region.start;
260 if (region.end > start->region().end)
261 mid->region().end = start->region().end;
263 mid->region().end = region.end;
265 mid->flags = start->flags;
267 mid->offset = start->offset;
269 if (start->region().end > region.end) {
270 // The varea also straddles region.end; create a new
271 // varea "end" for the other side of the region.
273 end = new VirtualArea;
276 end->region().start = region.end + 1;
277 end->region().end = start->region().end;
279 end->flags = start->flags;
281 end->offset = start->offset;
286 start->region().end = region.start - 1;
292 // Splits have already been done at both ends of the region,
293 // so there's no need to look up the ending address.
303 if (start->region().end == region.end)
306 if (start->region().end > region.end)
309 end = varea_tree.find(region.end);
314 assert(end->aspace == this);
315 assert(end->region().start <= region.end);
316 assert(end->region().end >= region.end);
318 if (end->region().end == region.end)
322 assert(end->region().end > region.end);
324 // There is a varea that straddles region.start;
325 // create a new varea "mid" for the overlapping part.
327 mid = new VirtualArea;
330 mid->region().start = region.start;
332 mid->region().start = end->region().start;
333 mid->region().end = region.end;
335 mid->flags = start->flags;
337 mid->offset = start->offset;
339 end->region().start = region.end + 1;
347 bool AddrSpace::get_free_region(ulong len, Region ®ion,
350 assert(page_aligned(len));
351 assert(cached_free_region);
353 region.start = cached_free_region;
354 region.end = cached_free_region + len - 1;
356 if (region.end <= Arch::user_end && !check_overlap(region, prev)) {
357 cached_free_region = region.end + 1;
361 for (Util::List *node = &prev->list_node; node != &varea_list;
364 VirtualArea *va = node->listentry(VirtualArea, list_node);
365 ulong end = Arch::user_end;
367 if (node->next != &varea_list) {
368 VirtualArea *next = node->next->listentry(VirtualArea, list_node);
369 end = next->region().start - 1;
372 assert(end > va->region().end);
374 if (end - va->region().end >= len) {
375 region.start = va->region().end + 1;
376 region.end = region.start + len - 1;
378 assert(page_aligned(region.start));
379 cached_free_region = region.end + 1;
384 if (cached_free_region != Arch::user_start + Arch::page_size) {
385 cached_free_region = Arch::user_start + Arch::page_size;
386 return get_free_region(len, region, prev);
392 // The "mapped" paramater is used to indicate whether the top-level
393 // address space has had a mapping established. If "mapped" is
394 // false, but an exception is not thrown, then this method must
395 // be called again to propagate the mapping along the aspace chain.
397 // FIXME: Between aspace locks, if aspace's mapping is revoked and
398 // ma->aspace's mapping changes, a pagein could leak through and cause
399 // a page load or a copy-on-write breaking. This isn't a huge deal
400 // (it doesn't affect the correctness of the code or give aspace
401 // access to ma->aspace's new mapping), but it's unpleasant, and could
402 // have an adverse impact on determinism. If you have a real-time
403 // application that can't tolerate the occasional spurious pagein or
404 // copy-on-write breaking, then use an address space that hasn't
405 // previously been exposed to recursive mappers.
407 bool ASpaceMappable::rec_pagein(AddrSpace *aspace, u64 vaddr,
412 // aspace->mappable.retain();
415 Lock::DroppableAutoLock autolock(aspace->lock);
416 VirtualArea *va = aspace->varea_tree.find(vaddr);
419 throw BadPageFault();
421 if ((va->flags & reqflags) != reqflags)
422 throw BadPageFault();
424 if (aspace->map(va, vaddr, reqflags))
428 Mappable *ma = va->ma;
433 // aspace->mappable.release();
435 if (!ma->is_aspace) {
436 ma->pagein(vaddr, reqflags);
441 aspace = static_cast<ASpaceMappable *>(ma)->aspace;
447 void ASpaceMappable::pagein(u64 vaddr, PTEFlags reqflags)
449 while (!rec_pagein(aspace, vaddr, reqflags));
452 void AddrSpace::break_copy_on_write(VirtualArea *va, u64 vaddr, u64 phys)
454 assert(lock.held_by_curthread());
455 assert(rmap_lock.held_by_curthread());
457 assert(va->flags.FaultOnWrite);
458 assert(va->aspace == this);
460 Page *old_page = phys_to_page(phys);
462 Region region = { vaddr, vaddr + Arch::page_size - 1 };
464 // If this is the only reference to the page left, then
465 // nothing needs to be copied. Just clear the COW condition.
466 if (is_phys_page(old_page) && old_page->get_refcount() == 1) {
467 PTEFlags mask, flags;
468 mask.FaultOnWrite = 1;
470 page_table->set_flags(region, flags, mask);
474 Page *new_page = PageAlloc::alloc(1);
477 // OPT: It'd be better to do this without the rmap_lock held,
478 // especially if rmap_lock is global rather than per-physpage.
479 // I want to keep things simple for now and optimize later,
482 memcpy(page_to_kvirt(new_page), phys_to_kvirt(phys),
485 page_table->rmap_table.break_copy_on_write(region.start, new_page);
489 void ASpaceMappable::get_mapping(u64 vaddr, u64 *phys, PTEFlags *flags)
491 aspace->page_table->get_mapping(vaddr, phys, flags);
494 bool AddrSpace::map(VirtualArea *va, u64 vaddr, PTEFlags reqflags)
496 Lock::AutoLock autolock(rmap_lock);
497 assert(va->aspace == this);
501 va->ma->get_mapping(vaddr + va->offset, &phys, &flags);
503 PTEFlags newflags = flags & va->flags;
504 newflags.FaultOnWrite = flags.FaultOnWrite | va->flags.FaultOnWrite;
506 if (!newflags.Valid) {
507 assert(va->flags.Valid);
511 if ((newflags & reqflags) != reqflags)
516 page_table->get_mapping(vaddr, &oldphys, &oldflags);
518 if (oldflags.Valid &&
519 !(reqflags.Writeable && oldflags.FaultOnWrite))
521 // If the existing mapping is valid, don't try to map it again.
522 // The existing mapping was put there possibly by a race, but
523 // more likely because a FaultOnWrite was handled upstream.
525 // FaultOnWrite handling is the only type of mapping change that
526 // can be done directly; all others must change the varea and do
527 // an rmap invalidation instead. FaultOnWrite is special
528 // because we don't want to split vareas for every page that
529 // gets its copy-on-write broken.
531 assert((oldflags & reqflags) == reqflags);
532 assert(!va->flags.FaultOnWrite || oldphys == phys);
536 if (reqflags.Writeable && oldflags.FaultOnWrite)
538 // The FaultOnWrite needs to be handled upstream.
539 if (!va->flags.FaultOnWrite)
542 va->aspace->break_copy_on_write(va, vaddr, phys);
544 assert(!oldflags.Valid);
545 PageTable *usptbl = NULL;
547 if (va->ma->is_aspace) {
548 ASpaceMappable *asma = static_cast<ASpaceMappable *>(va->ma);
549 usptbl = asma->aspace->page_table;
552 RMapTable::map(va, usptbl, vaddr, vaddr + va->offset);
554 RegionWithOffset rwo;
556 rwo.end = vaddr + Arch::page_size - 1;
559 page_table->map(rwo, newflags);
565 void ASpaceMappable::get_size(u64 *size)
567 aspace->get_size(size);
570 void AddrSpace::map(IMappable ma, Region region, u64 *vstart,
571 MapFlags mflags, int map_type)
573 // FIXME: check alignment for VIPT caches
574 // FIXME: Implement the "Replace" map flag
577 throw_idl(InvalidArgument, 3,
578 countarray("Replace unimplemented"));
580 Mappable *cma = Mappable::classptr(ma);
582 // The given IMappable does not refer to a Mappable
585 throw_idl(InvalidArgument, 0, nullarray);
588 bool fixed = mflags.Fixed;
593 if (!page_aligned(region.start))
594 throw_idl(InvalidArgument, 1, countarray("unaligned start"));
596 if (!page_aligned(region.end + 1))
597 throw_idl(InvalidArgument, 1, countarray("unaligned end"));
599 Lock::AutoLock autolock(lock);
603 if (*vstart != System::Mem::AddrSpace_ns::unspecified_start) {
604 vregion.start = *vstart;
605 vregion.end = vregion.start + region.end - region.start;
608 if (!valid_addr(vregion.start))
609 throw_idl(InvalidArgument, 2,
610 countarray("invalid virtual start"));
612 if (!valid_addr(vregion.end))
613 throw_idl(InvalidArgument, 2,
614 countarray("invalid virtual end"));
617 if (check_overlap(vregion, prev))
618 *vstart = System::Mem::AddrSpace_ns::unspecified_start;
621 if (*vstart == System::Mem::AddrSpace_ns::unspecified_start) {
623 throw_idl(ResourceBusy, 2, countarray("varea overlap"));
625 if (!get_free_region(region.end - region.start + 1, vregion, prev))
626 throw_idl(OutOfSpace, countarray("out of vspace"));
628 *vstart = vregion.start;
631 VirtualArea *newva = new VirtualArea;
632 newva->aspace = this;
633 newva->region() = vregion;
635 newva->flags.Valid = 1;
636 newva->flags.User = map_type != map_kernel;
637 newva->flags.Readable = mflags.access_IDLNS_Read;
638 newva->flags.Writeable = mflags.access_IDLNS_Write;
639 newva->flags.Executable = mflags.access_IDLNS_Exec;
640 newva->flags.FaultOnWrite = mflags.CopyOnWrite;
641 newva->flags.Protected = map_type != map_user;
643 newva->offset = region.start - vregion.start;
645 varea_tree.add(newva);
646 newva->ma->map(newva);
649 prev->list_node.add_front(&newva->list_node);
651 varea_list.add_front(&newva->list_node);
655 void AddrSpace::unmap(Region region, bool from_kernel)
657 u64 orig_start = region.start;
659 while (region.start <= region.end) {
660 Lock::DroppableAutoLock autolock(lock);
663 // If check_overlap returns false, then there are no vareas
664 // in the specified region, so there's nothing to unmap.
666 if (!check_overlap(region, va))
669 if (va->flags.Protected && !from_kernel) {
670 region.start = va->list_node.next->
671 listentry(VirtualArea, list_node)->region().start;
673 if (region.start <= orig_start)
679 u64 va_end = va->region().end;
682 if (va_end > region.end) {
683 u64 va_start = va->region().start;
684 va->region().start = region.end + 1;
686 if (va_start < region.start) {
687 VirtualArea *newva = new VirtualArea;
689 newva->aspace = this;
690 newva->region().start = va_start;
691 newva->region().end = region.start - 1;
693 newva->flags = va->flags;
695 newva->offset = va->offset;
697 varea_tree.add(newva);
698 newva->ma->map(newva);
701 VirtualArea *nextva =
702 va->list_node.next->listentry(VirtualArea, list_node);
704 next_start = nextva->region().start;
705 } else if (va->region().start < region.start) {
706 va->region().end = region.start - 1;
712 // This is done after the varea removal, so that new faults
713 // don't map things in again.
715 // OPT: Skip RMap-based unmapping if nothing maps this aspace.
716 // OPT: Push this loop into the RMap code, allowing it to skip
717 // empty portions of the tables (as the pagetable code currently
720 while (region.start <= va_end && region.start <= region.end) {
721 page_table->rmap_table.unmap(region.start);
722 region.start += Arch::page_size;
724 if (region.start <= orig_start)
728 region.start = next_start;
730 if (region.start <= orig_start)
735 void AddrSpace::set_mapflags(Region region, MapFlags mflags)
738 // Find varea, split if necessary, propagate change to stacked aspaces
741 void AddrSpace::get_mapflags(Region region, MapFlags *mflags, uint8_t *all_same)
746 void AddrSpace::get_mapping(Region region, IMappable *ma, u64 *offset)
751 void AddrSpace::get_page_size(u32 *page_size)
753 *page_size = Arch::page_size;
756 void AddrSpace::get_min_align(u32 *min_align)
758 *min_align = Arch::page_mapping_min_align;
761 void Mappable::map(VirtualArea *varea)
763 mappings_lock.lock_irq();
764 mappings.add_back(&varea->mappings_node);
765 mappings_lock.unlock_irq();
768 void Mappable::unmap(VirtualArea *varea)
770 mappings_lock.lock_irq();
771 varea->mappings_node.del();
772 mappings_lock.unlock_irq();
775 void PageTable::kill_pte(ulong vaddr, u64 paddr, bool dirty,
776 bool valid, bool no_release)
778 Page *oldpage = phys_to_page(paddr);
780 if (!is_phys_page(oldpage))
783 if (is_process && valid) {
784 Arch::invalidate_tlb_entry(vaddr);
786 if (oldpage && dirty &&
787 !ll_test_and_set(&oldpage->flags, PageFlags::bits::Dirty))
790 // Queue page for writeback
794 if (!no_release && oldpage)
798 // FIXME: Add a special PTE flag to indicate that PhysMem mappings
799 // don't mess with page refcounts.
801 class PhysMem : public Mappable {
803 void get_size(u64 *size)
805 if (sizeof(long) == 8)
806 *size = 1ULL << (64 - Arch::page_shift);
808 *size = 1ULL << (32 - Arch::page_shift);
811 void pagein(u64 vaddr, PTEFlags reqflags)
813 // Doesn't need to do anything yet, though it may later
814 // once high memory support is added.
817 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
823 flags->Writeable = 1;
824 flags->Executable = 1;
829 PhysMem real_physmem;
830 IMappable physmem = real_physmem;
832 class AnonMem : public Mappable {
834 void get_size(u64 *size)
836 if (sizeof(long) == 8)
837 *size = 1ULL << (64 - Arch::page_shift);
839 *size = 1ULL << (32 - Arch::page_shift);
842 void pagein(u64 vaddr, PTEFlags reqflags)
844 // Doesn't need to do anything yet, though it may later
845 // once high memory support is added.
848 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
850 Page *page = PageAlloc::alloc(1);
852 // OPT: Only zero if it was asked for.
853 // OPT: Eventually, have separate pagelists for zeroed and
854 // unzeroed memory, and a low-priority background thread
855 // that zeroes pages and moves them to the zeroed list.
856 bzero(page_to_kvirt(page), Arch::page_size);
858 *phys = page_to_phys(page);
862 flags->Writeable = 1;
863 flags->Executable = 1;
868 AnonMem real_anonmem;
869 IMappable anonmem = real_anonmem;
872 #include <servers/mem/addrspace/footer.cc>