1 // mem/addrspace.cc -- System.Mem.AddrSpace
3 // OPT: Special AddrSpaces that only translate/export a linear block of
4 // another AddrSpace, and don't have individual entries for every page.
6 // OPT: Special VAreas that use their own translation mechanism instead
7 // of varea->offset, so that filesystem block tables (and similar things)
8 // don't need to have a VArea per block.
10 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
12 // Permission is hereby granted, free of charge, to any person obtaining a copy of
13 // this software and associated documentation files (the "Software"), to deal with
14 // the Software without restriction, including without limitation the rights to
15 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
16 // of the Software, and to permit persons to whom the Software is furnished to do
17 // so, subject to the following conditions:
19 // * Redistributions of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimers.
22 // * Redistributions in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimers in the
24 // documentation and/or other materials provided with the distribution.
26 // * The names of the Software's authors and/or contributors
27 // may not be used to endorse or promote products derived from
28 // this Software without specific prior written permission.
30 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
32 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
35 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
39 #include <kern/paging.h>
40 #include <kern/pagetable.h>
41 #include <kern/pagealloc.h>
42 #include <kern/generic-pte.h>
43 #include <kern/compiler.h>
45 extern int roshared_start, roshared_page_end;
46 extern int rwshared_start, rwshared_page_end;
49 extern IMappable physmem, anonmem;
51 class AddrSpaceFactory {
53 #include <servers/mem/addrspace/Mem/AddrSpaceFactory.h>
60 void create(Object *obj)
62 *obj = static_cast<IAddrSpace>(*(new AddrSpace(false)));
66 class ProcAddrSpaceFactory {
68 #include <servers/mem/addrspace/Mem/ProcAddrSpaceFactory.h>
70 ProcAddrSpaceFactory()
75 void create(Object *obj)
77 AddrSpace *as = new AddrSpace(true);
82 region.start = kvirt_to_phys(&roshared_start);
83 region.end = kvirt_to_phys(&roshared_page_end);
84 vstart = Arch::roshared_map;
86 mf.access_IDLNS_Read = 1;
87 mf.access_IDLNS_Exec = 1;
89 as->map(physmem, region, &vstart, mf, AddrSpace::map_protected);
91 region.start = kvirt_to_phys(&rwshared_start);
92 region.end = kvirt_to_phys(&rwshared_page_end);
93 vstart = Arch::rwshared_map;
94 mf.access_IDLNS_Exec = 0;
95 mf.access_IDLNS_Write = 1;
98 as->map(physmem, region, &vstart, mf, AddrSpace::map_protected);
100 // Leave the stack no-exec by default.
101 region.start = vstart = Arch::stack_bottom;
102 region.end = Arch::stack_top;
104 printf("vstart %llx\n", vstart);
105 as->map(anonmem, region, &vstart, mf);
107 *obj = static_cast<IAddrSpace>(*(as));
111 ProcAddrSpaceFactory real_proc_addrspace_factory;
112 Factory proc_addr_space_factory = real_proc_addrspace_factory;
114 AddrSpaceFactory real_addrspace_factory;
115 Factory addr_space_factory = real_addrspace_factory;
117 AddrSpace::AddrSpace(bool process) : mappable(this)
120 is_process = process;
122 // OPT: Allow optional use of the native PTE for stacked aspaces,
123 // either because the native PTE is 64-bit, or because it's an
124 // embedded system which does not need 64-bit storage.
127 page_table = new PageTableImpl<Arch::PTE>(true);
129 page_table = new PageTableImpl<GenPTE>(false);
131 cached_free_region = Arch::user_start + Arch::page_size;
134 // This should only be used once during bootup to initialize the
135 // kernel's address space with a static initial page table.
137 AddrSpace::AddrSpace(void *ptbl_toplevel) : mappable(this)
141 page_table = new PageTableImpl<Arch::PTE>(ptbl_toplevel);
143 // FIXME: should be kernel virtual space
144 cached_free_region = Arch::user_start + Arch::page_size;
147 void AddrSpace::get_mappable(IMappable *ma)
152 void AddrSpace::clone(IAddrSpace *addrspace, uint8_t clone_is_real)
158 bool AddrSpace::handle_fault(ulong vaddr, bool write, bool exec, bool user)
160 if (lock.held_by_curthread())
163 assert(!(write && exec));
170 reqflags.Writeable = 1;
172 reqflags.Executable = 1;
174 reqflags.Readable = 1;
179 mappable.pagein(page_align(vaddr), reqflags);
182 catch (BadPageFault &bpf) {
183 // FIXME: retain info about nature of bpf
191 bool AddrSpace::check_overlap(Region region, VirtualArea *&va)
193 if (region.end < region.start)
196 va = varea_tree.find_nearest(region.start);
201 // If region.start is in an existing region, that region will
204 if (region.end >= va->region().start &&
205 region.start <= va->region().end)
208 // If it returns a region that's greater than region.start, and va
209 // itself does not overlap, then prev does not overlap (or else
210 // region.start would be in or before prev, and thus prev would
211 // have been returned).
213 // If it returns a region that's less than region.start, we still
214 // need to check next, as region.end could be in (or beyond) that
217 if (va->list_node.next != &varea_list) {
219 va->list_node.next->listentry(VirtualArea, list_node);
221 if (region.end >= next->region().start &&
222 region.start <= next->region().end)
231 if (va->list_node.prev != &varea_list)
232 prev = va->list_node.prev->listentry(VirtualArea, list_node);
236 if (region.start < va->region().start) {
237 assert(!prev || prev->region().end < region.start);
244 VirtualArea *AddrSpace::split_varea(Region region)
246 VirtualArea *start, *mid, *end;
248 // check_overlap is used rather than varea_tree.find,
249 // so that the first overlapping region can be returned,
250 // as most (if not all) callers will need this anyway.
252 if (!check_overlap(region, start))
256 assert(start->aspace == this);
257 assert(start->region().end >= region.start);
259 if (start->region().start < region.start) {
260 // There is a varea that straddles region.start;
261 // create a new varea "mid" for the overlapping part.
263 mid = new VirtualArea;
266 mid->region().start = region.start;
268 if (region.end > start->region().end)
269 mid->region().end = start->region().end;
271 mid->region().end = region.end;
273 mid->flags = start->flags;
275 mid->offset = start->offset;
277 if (start->region().end > region.end) {
278 // The varea also straddles region.end; create a new
279 // varea "end" for the other side of the region.
281 end = new VirtualArea;
284 end->region().start = region.end + 1;
285 end->region().end = start->region().end;
287 end->flags = start->flags;
289 end->offset = start->offset;
294 start->region().end = region.start - 1;
300 // Splits have already been done at both ends of the region,
301 // so there's no need to look up the ending address.
311 if (start->region().end == region.end)
314 if (start->region().end > region.end)
317 end = varea_tree.find(region.end);
322 assert(end->aspace == this);
323 assert(end->region().start <= region.end);
324 assert(end->region().end >= region.end);
326 if (end->region().end == region.end)
330 assert(end->region().end > region.end);
332 // There is a varea that straddles region.start;
333 // create a new varea "mid" for the overlapping part.
335 mid = new VirtualArea;
338 mid->region().start = region.start;
340 mid->region().start = end->region().start;
341 mid->region().end = region.end;
343 mid->flags = start->flags;
345 mid->offset = start->offset;
347 end->region().start = region.end + 1;
355 bool AddrSpace::get_free_region(ulong len, Region ®ion,
358 assert(page_aligned(len));
359 assert(cached_free_region);
361 region.start = cached_free_region;
362 region.end = cached_free_region + len - 1;
364 if (region.end <= Arch::user_end && !check_overlap(region, prev)) {
365 cached_free_region = region.end + 1;
369 for (Util::List *node = &prev->list_node; node != &varea_list;
372 VirtualArea *va = node->listentry(VirtualArea, list_node);
373 ulong end = Arch::user_end;
375 if (node->next != &varea_list) {
376 VirtualArea *next = node->next->listentry(VirtualArea, list_node);
377 end = next->region().start - 1;
380 assert(end > va->region().end);
382 if (end - va->region().end >= len) {
383 region.start = va->region().end + 1;
384 region.end = region.start + len - 1;
386 assert(page_aligned(region.start));
387 cached_free_region = region.end + 1;
392 if (cached_free_region != Arch::user_start + Arch::page_size) {
393 cached_free_region = Arch::user_start + Arch::page_size;
394 return get_free_region(len, region, prev);
400 // The "mapped" paramater is used to indicate whether the top-level
401 // address space has had a mapping established. If "mapped" is
402 // false, but an exception is not thrown, then this method must
403 // be called again to propagate the mapping along the aspace chain.
405 // FIXME: Between aspace locks, if aspace's mapping is revoked and
406 // ma->aspace's mapping changes, a pagein could leak through and cause
407 // a page load or a copy-on-write breaking. This isn't a huge deal
408 // (it doesn't affect the correctness of the code or give aspace
409 // access to ma->aspace's new mapping), but it's unpleasant, and could
410 // have an adverse impact on determinism. If you have a real-time
411 // application that can't tolerate the occasional spurious pagein or
412 // copy-on-write breaking, then use an address space that hasn't
413 // previously been exposed to recursive mappers.
415 bool ASpaceMappable::rec_pagein(AddrSpace *aspace, u64 vaddr,
420 // aspace->mappable.retain();
423 Lock::DroppableAutoLock autolock(aspace->lock);
424 VirtualArea *va = aspace->varea_tree.find(vaddr);
427 throw BadPageFault();
429 if ((va->flags & reqflags) != reqflags)
430 throw BadPageFault();
432 if (aspace->map(va, vaddr, reqflags))
436 Mappable *ma = va->ma;
441 // aspace->mappable.release();
443 if (!ma->is_aspace) {
444 ma->pagein(vaddr, reqflags);
449 aspace = static_cast<ASpaceMappable *>(ma)->aspace;
455 void ASpaceMappable::pagein(u64 vaddr, PTEFlags reqflags)
457 while (!rec_pagein(aspace, vaddr, reqflags));
460 void AddrSpace::break_copy_on_write(VirtualArea *va, u64 vaddr, u64 phys)
462 assert(lock.held_by_curthread());
463 assert(rmap_lock.held_by_curthread());
465 assert(va->flags.FaultOnWrite);
466 assert(va->aspace == this);
468 Page *old_page = phys_to_page(phys);
470 Region region = { vaddr, vaddr + Arch::page_size - 1 };
472 // If this is the only reference to the page left, then
473 // nothing needs to be copied. Just clear the COW condition.
474 if (is_phys_page(old_page) && old_page->get_refcount() == 1) {
475 PTEFlags mask, flags;
476 mask.FaultOnWrite = 1;
478 page_table->set_flags(region, flags, mask);
482 Page *new_page = PageAlloc::alloc(1);
485 // OPT: It'd be better to do this without the rmap_lock held,
486 // especially if rmap_lock is global rather than per-physpage.
487 // I want to keep things simple for now and optimize later,
490 memcpy(page_to_kvirt(new_page), phys_to_kvirt(phys),
493 page_table->rmap_table.break_copy_on_write(region.start, new_page);
497 void ASpaceMappable::get_mapping(u64 vaddr, u64 *phys, PTEFlags *flags)
499 aspace->page_table->get_mapping(vaddr, phys, flags);
502 bool AddrSpace::map(VirtualArea *va, u64 vaddr, PTEFlags reqflags)
504 Lock::AutoLock autolock(rmap_lock);
505 assert(va->aspace == this);
509 va->ma->get_mapping(vaddr + va->offset, &phys, &flags);
511 PTEFlags newflags = flags & va->flags;
512 newflags.FaultOnWrite = flags.FaultOnWrite | va->flags.FaultOnWrite;
514 if (!newflags.Valid) {
515 assert(va->flags.Valid);
519 if ((newflags & reqflags) != reqflags)
524 page_table->get_mapping(vaddr, &oldphys, &oldflags);
526 if (oldflags.Valid &&
527 !(reqflags.Writeable && oldflags.FaultOnWrite))
529 // If the existing mapping is valid, don't try to map it again.
530 // The existing mapping was put there possibly by a race, but
531 // more likely because a FaultOnWrite was handled upstream.
533 // FaultOnWrite handling is the only type of mapping change that
534 // can be done directly; all others must change the varea and do
535 // an rmap invalidation instead. FaultOnWrite is special
536 // because we don't want to split vareas for every page that
537 // gets its copy-on-write broken.
539 assert((oldflags & reqflags) == reqflags);
540 assert(!va->flags.FaultOnWrite || oldphys == phys);
544 if (reqflags.Writeable && oldflags.FaultOnWrite)
546 // The FaultOnWrite needs to be handled upstream.
547 if (!va->flags.FaultOnWrite)
550 va->aspace->break_copy_on_write(va, vaddr, phys);
552 assert(!oldflags.Valid);
553 PageTable *usptbl = NULL;
555 if (va->ma->is_aspace) {
556 ASpaceMappable *asma = static_cast<ASpaceMappable *>(va->ma);
557 usptbl = asma->aspace->page_table;
560 RMapTable::map(va, usptbl, vaddr, vaddr + va->offset);
562 RegionWithOffset rwo;
564 rwo.end = vaddr + Arch::page_size - 1;
567 page_table->map(rwo, newflags);
573 void ASpaceMappable::get_size(u64 *size)
575 aspace->get_size(size);
578 void AddrSpace::map(IMappable ma, Region region, u64 *vstart,
579 MapFlags mflags, int map_type)
581 // FIXME: check alignment for VIPT caches
582 // FIXME: Implement the "Replace" map flag
585 throw_idl(InvalidArgument, 3,
586 countarray("Replace unimplemented"));
588 Mappable *cma = Mappable::classptr(ma);
590 // The given IMappable does not refer to a Mappable
593 throw_idl(InvalidArgument, 0, nullarray);
596 bool fixed = mflags.Fixed;
601 if (!page_aligned(region.start))
602 throw_idl(InvalidArgument, 1, countarray("unaligned start"));
604 if (!page_aligned(region.end + 1))
605 throw_idl(InvalidArgument, 1, countarray("unaligned end"));
607 Lock::AutoLock autolock(lock);
611 if (*vstart != System::Mem::AddrSpace_ns::unspecified_start) {
612 vregion.start = *vstart;
613 vregion.end = vregion.start + region.end - region.start;
616 if (!valid_addr(vregion.start))
617 throw_idl(InvalidArgument, 2,
618 countarray("invalid virtual start"));
620 if (!valid_addr(vregion.end))
621 throw_idl(InvalidArgument, 2,
622 countarray("invalid virtual end"));
625 if (check_overlap(vregion, prev))
626 *vstart = System::Mem::AddrSpace_ns::unspecified_start;
629 if (*vstart == System::Mem::AddrSpace_ns::unspecified_start) {
631 throw_idl(ResourceBusy, 2, countarray("varea overlap"));
633 if (!get_free_region(region.end - region.start + 1, vregion, prev))
634 throw_idl(OutOfSpace, countarray("out of vspace"));
636 *vstart = vregion.start;
639 VirtualArea *newva = new VirtualArea;
640 newva->aspace = this;
641 newva->region() = vregion;
643 newva->flags.Valid = 1;
644 newva->flags.User = map_type != map_kernel;
645 newva->flags.Readable = mflags.access_IDLNS_Read;
646 newva->flags.Writeable = mflags.access_IDLNS_Write;
647 newva->flags.Executable = mflags.access_IDLNS_Exec;
648 newva->flags.FaultOnWrite = mflags.CopyOnWrite;
649 newva->flags.Protected = map_type != map_user;
651 newva->offset = region.start - vregion.start;
653 varea_tree.add(newva);
654 newva->ma->map(newva);
657 prev->list_node.add_front(&newva->list_node);
659 varea_list.add_front(&newva->list_node);
663 void AddrSpace::unmap(Region region, bool from_kernel)
665 u64 orig_start = region.start;
667 while (region.start <= region.end) {
668 Lock::DroppableAutoLock autolock(lock);
671 // If check_overlap returns false, then there are no vareas
672 // in the specified region, so there's nothing to unmap.
674 if (!check_overlap(region, va))
677 if (va->flags.Protected && !from_kernel) {
678 region.start = va->list_node.next->
679 listentry(VirtualArea, list_node)->region().start;
681 if (region.start <= orig_start)
687 u64 va_end = va->region().end;
690 if (va_end > region.end) {
691 u64 va_start = va->region().start;
692 va->region().start = region.end + 1;
694 if (va_start < region.start) {
695 VirtualArea *newva = new VirtualArea;
697 newva->aspace = this;
698 newva->region().start = va_start;
699 newva->region().end = region.start - 1;
701 newva->flags = va->flags;
703 newva->offset = va->offset;
705 varea_tree.add(newva);
706 newva->ma->map(newva);
709 VirtualArea *nextva =
710 va->list_node.next->listentry(VirtualArea, list_node);
712 next_start = nextva->region().start;
713 } else if (va->region().start < region.start) {
714 va->region().end = region.start - 1;
720 // This is done after the varea removal, so that new faults
721 // don't map things in again.
723 // OPT: Skip RMap-based unmapping if nothing maps this aspace.
724 // OPT: Push this loop into the RMap code, allowing it to skip
725 // empty portions of the tables (as the pagetable code currently
728 while (region.start <= va_end && region.start <= region.end) {
729 page_table->rmap_table.unmap(region.start);
730 region.start += Arch::page_size;
732 if (region.start <= orig_start)
736 region.start = next_start;
738 if (region.start <= orig_start)
743 void AddrSpace::set_mapflags(Region region, MapFlags mflags)
746 // Find varea, split if necessary, propagate change to stacked aspaces
749 void AddrSpace::get_mapflags(Region region, MapFlags *mflags, uint8_t *all_same)
754 void AddrSpace::get_mapping(Region region, IMappable *ma, u64 *offset)
759 void AddrSpace::get_page_size(u32 *page_size)
761 *page_size = Arch::page_size;
764 void AddrSpace::get_min_align(u32 *min_align)
766 *min_align = Arch::page_mapping_min_align;
769 void Mappable::map(VirtualArea *varea)
771 mappings_lock.lock_irq();
772 mappings.add_back(&varea->mappings_node);
773 mappings_lock.unlock_irq();
776 void Mappable::unmap(VirtualArea *varea)
778 mappings_lock.lock_irq();
779 varea->mappings_node.del();
780 mappings_lock.unlock_irq();
783 void PageTable::kill_pte(ulong vaddr, u64 paddr, bool dirty,
784 bool valid, bool no_release)
786 Page *oldpage = phys_to_page(paddr);
788 if (!is_phys_page(oldpage))
791 if (is_process && valid) {
792 Arch::invalidate_tlb_entry(vaddr);
794 if (oldpage && dirty &&
795 !ll_test_and_set(&oldpage->flags, PageFlags::bits::Dirty))
798 // Queue page for writeback
802 if (!no_release && oldpage)
806 // FIXME: Add a special PTE flag to indicate that PhysMem mappings
807 // don't mess with page refcounts.
809 class PhysMem : public Mappable {
811 void get_size(u64 *size)
813 if (sizeof(long) == 8)
814 *size = 1ULL << (64 - Arch::page_shift);
816 *size = 1ULL << (32 - Arch::page_shift);
819 void pagein(u64 vaddr, PTEFlags reqflags)
821 // Doesn't need to do anything yet, though it may later
822 // once high memory support is added.
825 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
831 flags->Writeable = 1;
832 flags->Executable = 1;
837 PhysMem real_physmem;
838 IMappable physmem = real_physmem;
840 class AnonMem : public Mappable {
842 void get_size(u64 *size)
844 if (sizeof(long) == 8)
845 *size = 1ULL << (64 - Arch::page_shift);
847 *size = 1ULL << (32 - Arch::page_shift);
850 void pagein(u64 vaddr, PTEFlags reqflags)
852 // Doesn't need to do anything yet, though it may later
853 // once high memory support is added.
856 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
858 Page *page = PageAlloc::alloc(1);
860 // OPT: Only zero if it was asked for.
861 // OPT: Eventually, have separate pagelists for zeroed and
862 // unzeroed memory, and a low-priority background thread
863 // that zeroes pages and moves them to the zeroed list.
864 bzero(page_to_kvirt(page), Arch::page_size);
866 *phys = page_to_phys(page);
870 flags->Writeable = 1;
871 flags->Executable = 1;
876 AnonMem real_anonmem;
877 IMappable anonmem = real_anonmem;
880 #include <servers/mem/addrspace/footer.cc>