1 // mem/addrspace.cc -- System.Mem.AddrSpace
3 // OPT: Special AddrSpaces that only translate/export a linear block of
4 // another AddrSpace, and don't have individual entries for every page.
6 // OPT: Special VAreas that use their own translation mechanism instead
7 // of varea->offset, so that filesystem block tables (and similar things)
8 // don't need to have a VArea per block.
10 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
12 // This software is provided 'as-is', without any express or implied warranty.
13 // In no event will the authors or contributors be held liable for any damages
14 // arising from the use of this software.
16 // Permission is hereby granted to everyone, free of charge, to use, copy,
17 // modify, prepare derivative works of, publish, distribute, perform,
18 // sublicense, and/or sell copies of the Software, provided that the above
19 // copyright notice and disclaimer of warranty be included in all copies or
20 // substantial portions of this software.
23 #include <kern/paging.h>
24 #include <kern/generic-pagetable.h>
25 #include <kern/pagetable.h>
26 #include <kern/pagealloc.h>
27 #include <kern/generic-pte.h>
28 #include <kern/compiler.h>
29 #include <kern/process.h>
30 #include <kern/thread.h>
32 extern int roshared_start, roshared_page_end;
33 extern int rwshared_start, rwshared_page_end;
36 extern IMappable physmem, anonmem;
38 class AddrSpaceFactory {
40 #include <servers/mem/addrspace/Mem/AddrSpaceFactory.h>
47 void create(Object *obj)
49 *obj = static_cast<IAddrSpace>(*(new AddrSpace));
53 class ProcAddrSpaceFactory {
55 #include <servers/mem/addrspace/Mem/ProcAddrSpaceFactory.h>
57 ProcAddrSpaceFactory()
62 void create(Object *obj)
64 AddrSpace *as = new ProcAddrSpace;
69 region.start = kvirt_to_phys(&roshared_start);
70 region.end = kvirt_to_phys(&roshared_page_end);
71 vstart = Arch::roshared_map;
73 mf.access_IDLNS_Read = 1;
74 mf.access_IDLNS_Exec = 1;
76 as->map(physmem, region, &vstart, mf, AddrSpace::map_protected);
78 region.start = kvirt_to_phys(&rwshared_start);
79 region.end = kvirt_to_phys(&rwshared_page_end);
80 vstart = Arch::rwshared_map;
81 mf.access_IDLNS_Exec = 0;
82 mf.access_IDLNS_Write = 1;
85 as->map(physmem, region, &vstart, mf, AddrSpace::map_protected);
87 // Leave the stack no-exec by default.
88 region.start = vstart = Arch::stack_bottom;
89 region.end = Arch::stack_top;
91 printf("vstart %llx\n", vstart);
92 as->map(anonmem, region, &vstart, mf);
94 *obj = static_cast<IAddrSpace>(*as);
98 ProcAddrSpaceFactory real_proc_addrspace_factory;
99 Factory proc_addr_space_factory = real_proc_addrspace_factory;
101 AddrSpaceFactory real_addrspace_factory;
102 Factory addr_space_factory = real_addrspace_factory;
104 AddrSpace::AddrSpace(PageTable *ptbl) : mappable(this)
111 page_table = new PageTableImpl<GenPTE>(false);
113 cached_free_region = Arch::user_start + Arch::page_size;
116 ProcAddrSpace::ProcAddrSpace() :
117 AddrSpace(new PageTableImpl<Arch::PTE>(true))
122 // This should only be used once during bootup to initialize the
123 // kernel's address space with a static initial page table.
125 ProcAddrSpace::ProcAddrSpace(void *ptbl_toplevel) :
126 AddrSpace(new PageTableImpl<Arch::PTE>(ptbl_toplevel))
128 // FIXME: set cached_free_region to kernel virtual space
132 void AddrSpace::get_mappable(IMappable *ma)
137 void AddrSpace::clone(IAddrSpace *addrspace, uint8_t clone_is_real)
143 bool AddrSpace::handle_fault(ulong vaddr, bool write, bool exec, bool user)
145 if (lock.held_by_curthread())
148 assert(!(write && exec));
155 reqflags.Writeable = 1;
157 reqflags.Executable = 1;
159 reqflags.Readable = 1;
164 mappable.pagein(page_align(vaddr), reqflags);
167 catch (BadPageFault &bpf) {
168 // FIXME: retain info about nature of bpf
176 bool AddrSpace::check_overlap(Region region, VirtualArea *&va)
178 if (region.end < region.start)
181 va = varea_tree.find_nearest(region.start);
186 // If region.start is in an existing region, that region will
189 if (region.end >= va->region().start &&
190 region.start <= va->region().end)
193 // If it returns a region that's greater than region.start, and va
194 // itself does not overlap, then prev does not overlap (or else
195 // region.start would be in or before prev, and thus prev would
196 // have been returned).
198 // If it returns a region that's less than region.start, we still
199 // need to check next, as region.end could be in (or beyond) that
202 if (va->list_node.next != &varea_list) {
204 va->list_node.next->listentry(VirtualArea, list_node);
206 if (region.end >= next->region().start &&
207 region.start <= next->region().end)
216 if (va->list_node.prev != &varea_list)
217 prev = va->list_node.prev->listentry(VirtualArea, list_node);
221 if (region.start < va->region().start) {
222 assert(!prev || prev->region().end < region.start);
229 VirtualArea *AddrSpace::split_varea(Region region)
231 VirtualArea *start, *mid, *end;
233 // check_overlap is used rather than varea_tree.find,
234 // so that the first overlapping region can be returned,
235 // as most (if not all) callers will need this anyway.
237 if (!check_overlap(region, start))
241 assert(start->aspace == this);
242 assert(start->region().end >= region.start);
244 if (start->region().start < region.start) {
245 // There is a varea that straddles region.start;
246 // create a new varea "mid" for the overlapping part.
248 mid = new VirtualArea;
251 mid->region().start = region.start;
253 if (region.end > start->region().end)
254 mid->region().end = start->region().end;
256 mid->region().end = region.end;
258 mid->flags = start->flags;
260 mid->offset = start->offset;
262 if (start->region().end > region.end) {
263 // The varea also straddles region.end; create a new
264 // varea "end" for the other side of the region.
266 end = new VirtualArea;
269 end->region().start = region.end + 1;
270 end->region().end = start->region().end;
272 end->flags = start->flags;
274 end->offset = start->offset;
279 start->region().end = region.start - 1;
285 // Splits have already been done at both ends of the region,
286 // so there's no need to look up the ending address.
296 if (start->region().end == region.end)
299 if (start->region().end > region.end)
302 end = varea_tree.find(region.end);
307 assert(end->aspace == this);
308 assert(end->region().start <= region.end);
309 assert(end->region().end >= region.end);
311 if (end->region().end == region.end)
315 assert(end->region().end > region.end);
317 // There is a varea that straddles region.start;
318 // create a new varea "mid" for the overlapping part.
320 mid = new VirtualArea;
323 mid->region().start = region.start;
325 mid->region().start = end->region().start;
326 mid->region().end = region.end;
328 mid->flags = start->flags;
330 mid->offset = start->offset;
332 end->region().start = region.end + 1;
340 bool AddrSpace::get_free_region(ulong len, Region ®ion,
343 assert(page_aligned(len));
344 assert(cached_free_region);
346 region.start = cached_free_region;
347 region.end = cached_free_region + len - 1;
349 if (region.end <= Arch::user_end && !check_overlap(region, prev)) {
350 cached_free_region = region.end + 1;
354 for (Util::List *node = &prev->list_node; node != &varea_list;
357 VirtualArea *va = node->listentry(VirtualArea, list_node);
358 ulong end = Arch::user_end;
360 if (node->next != &varea_list) {
361 VirtualArea *next = node->next->listentry(VirtualArea, list_node);
362 end = next->region().start - 1;
365 assert(end > va->region().end);
367 if (end - va->region().end >= len) {
368 region.start = va->region().end + 1;
369 region.end = region.start + len - 1;
371 assert(page_aligned(region.start));
372 cached_free_region = region.end + 1;
377 if (cached_free_region != Arch::user_start + Arch::page_size) {
378 cached_free_region = Arch::user_start + Arch::page_size;
379 return get_free_region(len, region, prev);
385 // The "mapped" paramater is used to indicate whether the top-level
386 // address space has had a mapping established. If "mapped" is
387 // false, but an exception is not thrown, then this method must
388 // be called again to propagate the mapping along the aspace chain.
390 // FIXME: Between aspace locks, if aspace's mapping is revoked and
391 // ma->aspace's mapping changes, a pagein could leak through and cause
392 // a page load or a copy-on-write breaking. This isn't a huge deal
393 // (it doesn't affect the correctness of the code or give aspace
394 // access to ma->aspace's new mapping), but it's unpleasant, and could
395 // have an adverse impact on determinism. If you have a real-time
396 // application that can't tolerate the occasional spurious pagein or
397 // copy-on-write breaking, then use an address space that hasn't
398 // previously been exposed to recursive mappers.
400 bool ASpaceMappable::rec_pagein(AddrSpace *aspace, u64 vaddr,
405 // aspace->mappable.retain();
408 Lock::DroppableAutoLock autolock(aspace->lock);
409 VirtualArea *va = aspace->varea_tree.find(vaddr);
412 throw BadPageFault();
414 if ((va->flags & reqflags) != reqflags)
415 throw BadPageFault();
417 if (aspace->map(va, vaddr, reqflags))
421 Mappable *ma = va->ma;
426 // aspace->mappable.release();
428 if (!ma->is_aspace) {
429 ma->pagein(vaddr, reqflags);
434 aspace = static_cast<ASpaceMappable *>(ma)->aspace;
440 void ASpaceMappable::pagein(u64 vaddr, PTEFlags reqflags)
442 while (!rec_pagein(aspace, vaddr, reqflags));
445 void AddrSpace::break_copy_on_write(VirtualArea *va, u64 vaddr, u64 phys)
447 assert(lock.held_by_curthread());
448 assert(rmap_lock.held_by_curthread());
450 assert(va->flags.FaultOnWrite);
451 assert(va->aspace == this);
453 Page *old_page = phys_to_page(phys);
455 Region region = { vaddr, vaddr + Arch::page_size - 1 };
457 // If this is the only reference to the page left, then
458 // nothing needs to be copied. Just clear the COW condition.
459 if (is_phys_page(old_page) && old_page->get_refcount() == 1) {
460 PTEFlags mask, flags;
461 mask.FaultOnWrite = 1;
463 page_table->set_flags(region, flags, mask);
467 Page *new_page = PageAlloc::alloc(1);
470 // OPT: It'd be better to do this without the rmap_lock held,
471 // especially if rmap_lock is global rather than per-physpage.
472 // I want to keep things simple for now and optimize later,
475 memcpy(page_to_kvirt(new_page), phys_to_kvirt(phys),
478 page_table->rmap_table.break_copy_on_write(region.start, new_page);
482 void ASpaceMappable::get_mapping(u64 vaddr, u64 *phys, PTEFlags *flags)
484 aspace->page_table->get_mapping(vaddr, phys, flags);
487 bool AddrSpace::map(VirtualArea *va, u64 vaddr, PTEFlags reqflags)
489 Lock::AutoLock autolock(rmap_lock);
490 assert(va->aspace == this);
494 va->ma->get_mapping(vaddr + va->offset, &phys, &flags);
496 PTEFlags newflags = flags & va->flags;
497 newflags.FaultOnWrite = flags.FaultOnWrite | va->flags.FaultOnWrite;
499 if (!newflags.Valid) {
500 assert(va->flags.Valid);
504 if ((newflags & reqflags) != reqflags)
509 page_table->get_mapping(vaddr, &oldphys, &oldflags);
511 if (oldflags.Valid &&
512 !(reqflags.Writeable && oldflags.FaultOnWrite))
514 // If the existing mapping is valid, don't try to map it again.
515 // The existing mapping was put there possibly by a race, but
516 // more likely because a FaultOnWrite was handled upstream.
518 // FaultOnWrite handling is the only type of mapping change that
519 // can be done directly; all others must change the varea and do
520 // an rmap invalidation instead. FaultOnWrite is special
521 // because we don't want to split vareas for every page that
522 // gets its copy-on-write broken.
524 assert((oldflags & reqflags) == reqflags);
525 assert(!va->flags.FaultOnWrite || oldphys == phys);
529 if (reqflags.Writeable && oldflags.FaultOnWrite)
531 // The FaultOnWrite needs to be handled upstream.
532 if (!va->flags.FaultOnWrite)
535 va->aspace->break_copy_on_write(va, vaddr, phys);
537 assert(!oldflags.Valid);
538 PageTable *usptbl = NULL;
540 if (va->ma->is_aspace) {
541 ASpaceMappable *asma = static_cast<ASpaceMappable *>(va->ma);
542 usptbl = asma->aspace->page_table;
545 RMapTable::map(va, usptbl, vaddr, vaddr + va->offset);
547 RegionWithOffset rwo;
549 rwo.end = vaddr + Arch::page_size - 1;
552 page_table->map(rwo, newflags);
558 void ASpaceMappable::get_size(u64 *size)
560 aspace->get_size(size);
563 void AddrSpace::map(IMappable ma, Region region, u64 *vstart,
564 MapFlags mflags, int map_type)
566 // FIXME: check alignment for VIPT caches
567 // FIXME: Implement the "Replace" map flag
570 throw_idl(InvalidArgument, 3,
571 countarray("Replace unimplemented"));
573 Mappable *cma = Mappable::classptr(ma);
575 // The given IMappable does not refer to a Mappable
578 throw_idl(InvalidArgument, 0, nullarray);
581 bool fixed = mflags.Fixed;
586 if (!page_aligned(region.start))
587 throw_idl(InvalidArgument, 1, countarray("unaligned start"));
589 if (!page_aligned(region.end + 1))
590 throw_idl(InvalidArgument, 1, countarray("unaligned end"));
592 Lock::AutoLock autolock(lock);
596 if (*vstart != System::Mem::AddrSpace_ns::unspecified_start) {
597 vregion.start = *vstart;
598 vregion.end = vregion.start + region.end - region.start;
601 if (!valid_addr(vregion.start))
602 throw_idl(InvalidArgument, 2,
603 countarray("invalid virtual start"));
605 if (!valid_addr(vregion.end))
606 throw_idl(InvalidArgument, 2,
607 countarray("invalid virtual end"));
610 if (check_overlap(vregion, prev))
611 *vstart = System::Mem::AddrSpace_ns::unspecified_start;
614 if (*vstart == System::Mem::AddrSpace_ns::unspecified_start) {
616 throw_idl(ResourceBusy, 2, countarray("varea overlap"));
618 if (!get_free_region(region.end - region.start + 1, vregion, prev))
619 throw_idl(OutOfSpace, countarray("out of vspace"));
621 *vstart = vregion.start;
624 VirtualArea *newva = new VirtualArea;
625 newva->aspace = this;
626 newva->region() = vregion;
628 newva->flags.Valid = 1;
629 newva->flags.User = map_type != map_kernel;
630 newva->flags.Readable = mflags.access_IDLNS_Read;
631 newva->flags.Writeable = mflags.access_IDLNS_Write;
632 newva->flags.Executable = mflags.access_IDLNS_Exec;
633 newva->flags.FaultOnWrite = mflags.CopyOnWrite;
634 newva->flags.Protected = map_type != map_user;
636 newva->offset = region.start - vregion.start;
638 varea_tree.add(newva);
639 newva->ma->map(newva);
642 prev->list_node.add_front(&newva->list_node);
644 varea_list.add_front(&newva->list_node);
648 void AddrSpace::unmap(Region region, bool from_kernel)
650 u64 orig_start = region.start;
652 while (region.start <= region.end) {
653 Lock::DroppableAutoLock autolock(lock);
656 // If check_overlap returns false, then there are no vareas
657 // in the specified region, so there's nothing to unmap.
659 if (!check_overlap(region, va))
662 if (va->flags.Protected && !from_kernel) {
663 region.start = va->list_node.next->
664 listentry(VirtualArea, list_node)->region().start;
666 if (region.start <= orig_start)
672 u64 va_end = va->region().end;
675 if (va_end > region.end) {
676 u64 va_start = va->region().start;
677 va->region().start = region.end + 1;
679 if (va_start < region.start) {
680 VirtualArea *newva = new VirtualArea;
682 newva->aspace = this;
683 newva->region().start = va_start;
684 newva->region().end = region.start - 1;
686 newva->flags = va->flags;
688 newva->offset = va->offset;
690 varea_tree.add(newva);
691 newva->ma->map(newva);
694 VirtualArea *nextva =
695 va->list_node.next->listentry(VirtualArea, list_node);
697 next_start = nextva->region().start;
698 } else if (va->region().start < region.start) {
699 va->region().end = region.start - 1;
705 // This is done after the varea removal, so that new faults
706 // don't map things in again.
708 // OPT: Skip RMap-based unmapping if nothing maps this aspace.
709 // OPT: Push this loop into the RMap code, allowing it to skip
710 // empty portions of the tables (as the pagetable code currently
713 while (region.start <= va_end && region.start <= region.end) {
714 page_table->rmap_table.unmap(region.start);
715 region.start += Arch::page_size;
717 if (region.start <= orig_start)
721 region.start = next_start;
723 if (region.start <= orig_start)
728 void AddrSpace::set_mapflags(Region region, MapFlags mflags)
731 // Find varea, split if necessary, propagate change to stacked aspaces
734 void AddrSpace::get_mapflags(Region region, MapFlags *mflags, uint8_t *all_same)
739 void AddrSpace::get_mapping(Region region, IMappable *ma, u64 *offset)
744 void AddrSpace::get_page_size(u32 *page_size)
746 *page_size = Arch::page_size;
749 void AddrSpace::get_min_align(u32 *min_align)
751 *min_align = Arch::page_mapping_min_align;
754 void AddrSpace::get_size(u64 *size)
756 page_table->get_size(size);
759 void Mappable::map(VirtualArea *varea)
761 mappings_lock.lock_irq();
762 mappings.add_back(&varea->mappings_node);
763 mappings_lock.unlock_irq();
766 void Mappable::unmap(VirtualArea *varea)
768 mappings_lock.lock_irq();
769 varea->mappings_node.del();
770 mappings_lock.unlock_irq();
773 void PageTable::kill_pte(ulong vaddr, u64 paddr, bool dirty,
774 bool valid, bool no_release)
776 Page *oldpage = phys_to_page(paddr);
778 if (!is_phys_page(oldpage))
781 if (is_process && valid) {
782 Arch::invalidate_tlb_entry(vaddr);
784 if (oldpage && dirty &&
785 !ll_test_and_set(&oldpage->flags, PageFlags::bits::Dirty))
788 // Queue page for writeback
792 if (!no_release && oldpage)
796 // FIXME: Add a special PTE flag to indicate that PhysMem mappings
797 // don't mess with page refcounts.
799 class PhysMem : public Mappable {
801 void get_size(u64 *size)
803 if (sizeof(long) == 8)
804 *size = 1ULL << (64 - Arch::page_shift);
806 *size = 1ULL << (32 - Arch::page_shift);
809 void pagein(u64 vaddr, PTEFlags reqflags)
813 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
819 flags->Writeable = 1;
820 flags->Executable = 1;
825 PhysMem real_physmem;
826 IMappable physmem = real_physmem;
828 class AnonMem : public Mappable {
830 void get_size(u64 *size)
832 if (sizeof(long) == 8)
833 *size = 1ULL << (64 - Arch::page_shift);
835 *size = 1ULL << (32 - Arch::page_shift);
838 void pagein(u64 vaddr, PTEFlags reqflags)
842 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
844 Page *page = PageAlloc::alloc(1);
846 // OPT: Only zero if it was asked for.
847 // OPT: Eventually, have separate pagelists for zeroed and
848 // unzeroed memory, and a low-priority background thread
849 // that zeroes pages and moves them to the zeroed list.
850 bzero(page_to_kvirt(page), Arch::page_size);
852 *phys = page_to_phys(page);
856 flags->Writeable = 1;
857 flags->Executable = 1;
862 AnonMem real_anonmem;
863 IMappable anonmem = real_anonmem;
866 #include <servers/mem/addrspace/footer.cc>