1 // mem/addrspace.cc -- System.Mem.AddrSpace
3 // OPT: Special AddrSpaces that only translate/export a linear block of
4 // another AddrSpace, and don't have individual entries for every page.
6 // OPT: Special VAreas that use their own translation mechanism instead
7 // of varea->offset, so that filesystem block tables (and similar things)
8 // don't need to have a VArea per block.
10 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
12 // This software is provided 'as-is', without any express or implied warranty.
13 // In no event will the authors or contributors be held liable for any damages
14 // arising from the use of this software.
16 // Permission is hereby granted to everyone, free of charge, to use, copy,
17 // modify, prepare derivative works of, publish, distribute, perform,
18 // sublicense, and/or sell copies of the Software, provided that the above
19 // copyright notice and disclaimer of warranty be included in all copies or
20 // substantial portions of this software.
23 #include <kern/paging.h>
24 #include <kern/generic-pagetable.h>
25 #include <kern/pagetable.h>
26 #include <kern/pagealloc.h>
27 #include <kern/generic-pte.h>
28 #include <kern/compiler.h>
30 extern int roshared_start, roshared_page_end;
31 extern int rwshared_start, rwshared_page_end;
34 extern IMappable physmem, anonmem;
36 class AddrSpaceFactory {
38 #include <servers/mem/addrspace/Mem/AddrSpaceFactory.h>
45 void create(Object *obj)
47 *obj = static_cast<IAddrSpace>(*(new AddrSpace));
51 class ProcAddrSpaceFactory {
53 #include <servers/mem/addrspace/Mem/ProcAddrSpaceFactory.h>
55 ProcAddrSpaceFactory()
60 void create(Object *obj)
62 AddrSpace *as = new ProcAddrSpace;
67 region.start = kvirt_to_phys(&roshared_start);
68 region.end = kvirt_to_phys(&roshared_page_end);
69 vstart = Arch::roshared_map;
71 mf.access_IDLNS_Read = 1;
72 mf.access_IDLNS_Exec = 1;
74 as->map(physmem, region, &vstart, mf, AddrSpace::map_protected);
76 region.start = kvirt_to_phys(&rwshared_start);
77 region.end = kvirt_to_phys(&rwshared_page_end);
78 vstart = Arch::rwshared_map;
79 mf.access_IDLNS_Exec = 0;
80 mf.access_IDLNS_Write = 1;
83 as->map(physmem, region, &vstart, mf, AddrSpace::map_protected);
85 // Leave the stack no-exec by default.
86 region.start = vstart = Arch::stack_bottom;
87 region.end = Arch::stack_top;
89 printf("vstart %llx\n", vstart);
90 as->map(anonmem, region, &vstart, mf);
92 *obj = static_cast<IAddrSpace>(*as);
96 ProcAddrSpaceFactory real_proc_addrspace_factory;
97 Factory proc_addr_space_factory = real_proc_addrspace_factory;
99 AddrSpaceFactory real_addrspace_factory;
100 Factory addr_space_factory = real_addrspace_factory;
102 AddrSpace::AddrSpace(PageTable *ptbl) : mappable(this)
109 page_table = new PageTableImpl<GenPTE>(false);
111 cached_free_region = Arch::user_start + Arch::page_size;
114 ProcAddrSpace::ProcAddrSpace() :
115 AddrSpace(new PageTableImpl<Arch::PTE>(true))
120 // This should only be used once during bootup to initialize the
121 // kernel's address space with a static initial page table.
123 ProcAddrSpace::ProcAddrSpace(void *ptbl_toplevel) :
124 AddrSpace(new PageTableImpl<Arch::PTE>(ptbl_toplevel))
126 // FIXME: set cached_free_region to kernel virtual space
130 void AddrSpace::get_mappable(IMappable *ma)
135 void AddrSpace::clone(IAddrSpace *addrspace, uint8_t clone_is_real)
141 bool AddrSpace::handle_fault(ulong vaddr, bool write, bool exec, bool user)
143 if (lock.held_by_curthread())
146 assert(!(write && exec));
153 reqflags.Writeable = 1;
155 reqflags.Executable = 1;
157 reqflags.Readable = 1;
162 mappable.pagein(page_align(vaddr), reqflags);
165 catch (BadPageFault &bpf) {
166 // FIXME: retain info about nature of bpf
174 bool AddrSpace::check_overlap(Region region, VirtualArea *&va)
176 if (region.end < region.start)
179 va = varea_tree.find_nearest(region.start);
184 // If region.start is in an existing region, that region will
187 if (region.end >= va->region().start &&
188 region.start <= va->region().end)
191 // If it returns a region that's greater than region.start, and va
192 // itself does not overlap, then prev does not overlap (or else
193 // region.start would be in or before prev, and thus prev would
194 // have been returned).
196 // If it returns a region that's less than region.start, we still
197 // need to check next, as region.end could be in (or beyond) that
200 if (va->list_node.next != &varea_list) {
202 va->list_node.next->listentry(VirtualArea, list_node);
204 if (region.end >= next->region().start &&
205 region.start <= next->region().end)
214 if (va->list_node.prev != &varea_list)
215 prev = va->list_node.prev->listentry(VirtualArea, list_node);
219 if (region.start < va->region().start) {
220 assert(!prev || prev->region().end < region.start);
227 VirtualArea *AddrSpace::split_varea(Region region)
229 VirtualArea *start, *mid, *end;
231 // check_overlap is used rather than varea_tree.find,
232 // so that the first overlapping region can be returned,
233 // as most (if not all) callers will need this anyway.
235 if (!check_overlap(region, start))
239 assert(start->aspace == this);
240 assert(start->region().end >= region.start);
242 if (start->region().start < region.start) {
243 // There is a varea that straddles region.start;
244 // create a new varea "mid" for the overlapping part.
246 mid = new VirtualArea;
249 mid->region().start = region.start;
251 if (region.end > start->region().end)
252 mid->region().end = start->region().end;
254 mid->region().end = region.end;
256 mid->flags = start->flags;
258 mid->offset = start->offset;
260 if (start->region().end > region.end) {
261 // The varea also straddles region.end; create a new
262 // varea "end" for the other side of the region.
264 end = new VirtualArea;
267 end->region().start = region.end + 1;
268 end->region().end = start->region().end;
270 end->flags = start->flags;
272 end->offset = start->offset;
277 start->region().end = region.start - 1;
283 // Splits have already been done at both ends of the region,
284 // so there's no need to look up the ending address.
294 if (start->region().end == region.end)
297 if (start->region().end > region.end)
300 end = varea_tree.find(region.end);
305 assert(end->aspace == this);
306 assert(end->region().start <= region.end);
307 assert(end->region().end >= region.end);
309 if (end->region().end == region.end)
313 assert(end->region().end > region.end);
315 // There is a varea that straddles region.start;
316 // create a new varea "mid" for the overlapping part.
318 mid = new VirtualArea;
321 mid->region().start = region.start;
323 mid->region().start = end->region().start;
324 mid->region().end = region.end;
326 mid->flags = start->flags;
328 mid->offset = start->offset;
330 end->region().start = region.end + 1;
338 bool AddrSpace::get_free_region(ulong len, Region ®ion,
341 assert(page_aligned(len));
342 assert(cached_free_region);
344 region.start = cached_free_region;
345 region.end = cached_free_region + len - 1;
347 if (region.end <= Arch::user_end && !check_overlap(region, prev)) {
348 cached_free_region = region.end + 1;
352 for (Util::List *node = &prev->list_node; node != &varea_list;
355 VirtualArea *va = node->listentry(VirtualArea, list_node);
356 ulong end = Arch::user_end;
358 if (node->next != &varea_list) {
359 VirtualArea *next = node->next->listentry(VirtualArea, list_node);
360 end = next->region().start - 1;
363 assert(end > va->region().end);
365 if (end - va->region().end >= len) {
366 region.start = va->region().end + 1;
367 region.end = region.start + len - 1;
369 assert(page_aligned(region.start));
370 cached_free_region = region.end + 1;
375 if (cached_free_region != Arch::user_start + Arch::page_size) {
376 cached_free_region = Arch::user_start + Arch::page_size;
377 return get_free_region(len, region, prev);
383 // The "mapped" paramater is used to indicate whether the top-level
384 // address space has had a mapping established. If "mapped" is
385 // false, but an exception is not thrown, then this method must
386 // be called again to propagate the mapping along the aspace chain.
388 // FIXME: Between aspace locks, if aspace's mapping is revoked and
389 // ma->aspace's mapping changes, a pagein could leak through and cause
390 // a page load or a copy-on-write breaking. This isn't a huge deal
391 // (it doesn't affect the correctness of the code or give aspace
392 // access to ma->aspace's new mapping), but it's unpleasant, and could
393 // have an adverse impact on determinism. If you have a real-time
394 // application that can't tolerate the occasional spurious pagein or
395 // copy-on-write breaking, then use an address space that hasn't
396 // previously been exposed to recursive mappers.
398 bool ASpaceMappable::rec_pagein(AddrSpace *aspace, u64 vaddr,
403 // aspace->mappable.retain();
406 Lock::DroppableAutoLock autolock(aspace->lock);
407 VirtualArea *va = aspace->varea_tree.find(vaddr);
410 throw BadPageFault();
412 if ((va->flags & reqflags) != reqflags)
413 throw BadPageFault();
415 if (aspace->map(va, vaddr, reqflags))
419 Mappable *ma = va->ma;
424 // aspace->mappable.release();
426 if (!ma->is_aspace) {
427 ma->pagein(vaddr, reqflags);
432 aspace = static_cast<ASpaceMappable *>(ma)->aspace;
438 void ASpaceMappable::pagein(u64 vaddr, PTEFlags reqflags)
440 while (!rec_pagein(aspace, vaddr, reqflags));
443 void AddrSpace::break_copy_on_write(VirtualArea *va, u64 vaddr, u64 phys)
445 assert(lock.held_by_curthread());
446 assert(rmap_lock.held_by_curthread());
448 assert(va->flags.FaultOnWrite);
449 assert(va->aspace == this);
451 Page *old_page = phys_to_page(phys);
453 Region region = { vaddr, vaddr + Arch::page_size - 1 };
455 // If this is the only reference to the page left, then
456 // nothing needs to be copied. Just clear the COW condition.
457 if (is_phys_page(old_page) && old_page->get_refcount() == 1) {
458 PTEFlags mask, flags;
459 mask.FaultOnWrite = 1;
461 page_table->set_flags(region, flags, mask);
465 Page *new_page = PageAlloc::alloc(1);
468 // OPT: It'd be better to do this without the rmap_lock held,
469 // especially if rmap_lock is global rather than per-physpage.
470 // I want to keep things simple for now and optimize later,
473 memcpy(page_to_kvirt(new_page), phys_to_kvirt(phys),
476 page_table->rmap_table.break_copy_on_write(region.start, new_page);
480 void ASpaceMappable::get_mapping(u64 vaddr, u64 *phys, PTEFlags *flags)
482 aspace->page_table->get_mapping(vaddr, phys, flags);
485 bool AddrSpace::map(VirtualArea *va, u64 vaddr, PTEFlags reqflags)
487 Lock::AutoLock autolock(rmap_lock);
488 assert(va->aspace == this);
492 va->ma->get_mapping(vaddr + va->offset, &phys, &flags);
494 PTEFlags newflags = flags & va->flags;
495 newflags.FaultOnWrite = flags.FaultOnWrite | va->flags.FaultOnWrite;
497 if (!newflags.Valid) {
498 assert(va->flags.Valid);
502 if ((newflags & reqflags) != reqflags)
507 page_table->get_mapping(vaddr, &oldphys, &oldflags);
509 if (oldflags.Valid &&
510 !(reqflags.Writeable && oldflags.FaultOnWrite))
512 // If the existing mapping is valid, don't try to map it again.
513 // The existing mapping was put there possibly by a race, but
514 // more likely because a FaultOnWrite was handled upstream.
516 // FaultOnWrite handling is the only type of mapping change that
517 // can be done directly; all others must change the varea and do
518 // an rmap invalidation instead. FaultOnWrite is special
519 // because we don't want to split vareas for every page that
520 // gets its copy-on-write broken.
522 assert((oldflags & reqflags) == reqflags);
523 assert(!va->flags.FaultOnWrite || oldphys == phys);
527 if (reqflags.Writeable && oldflags.FaultOnWrite)
529 // The FaultOnWrite needs to be handled upstream.
530 if (!va->flags.FaultOnWrite)
533 va->aspace->break_copy_on_write(va, vaddr, phys);
535 assert(!oldflags.Valid);
536 PageTable *usptbl = NULL;
538 if (va->ma->is_aspace) {
539 ASpaceMappable *asma = static_cast<ASpaceMappable *>(va->ma);
540 usptbl = asma->aspace->page_table;
543 RMapTable::map(va, usptbl, vaddr, vaddr + va->offset);
545 RegionWithOffset rwo;
547 rwo.end = vaddr + Arch::page_size - 1;
550 page_table->map(rwo, newflags);
556 void ASpaceMappable::get_size(u64 *size)
558 aspace->get_size(size);
561 void AddrSpace::map(IMappable ma, Region region, u64 *vstart,
562 MapFlags mflags, int map_type)
564 // FIXME: check alignment for VIPT caches
565 // FIXME: Implement the "Replace" map flag
568 throw_idl(InvalidArgument, 3,
569 countarray("Replace unimplemented"));
571 Mappable *cma = Mappable::classptr(ma);
573 // The given IMappable does not refer to a Mappable
576 throw_idl(InvalidArgument, 0, nullarray);
579 bool fixed = mflags.Fixed;
584 if (!page_aligned(region.start))
585 throw_idl(InvalidArgument, 1, countarray("unaligned start"));
587 if (!page_aligned(region.end + 1))
588 throw_idl(InvalidArgument, 1, countarray("unaligned end"));
590 Lock::AutoLock autolock(lock);
594 if (*vstart != System::Mem::AddrSpace_ns::unspecified_start) {
595 vregion.start = *vstart;
596 vregion.end = vregion.start + region.end - region.start;
599 if (!valid_addr(vregion.start))
600 throw_idl(InvalidArgument, 2,
601 countarray("invalid virtual start"));
603 if (!valid_addr(vregion.end))
604 throw_idl(InvalidArgument, 2,
605 countarray("invalid virtual end"));
608 if (check_overlap(vregion, prev))
609 *vstart = System::Mem::AddrSpace_ns::unspecified_start;
612 if (*vstart == System::Mem::AddrSpace_ns::unspecified_start) {
614 throw_idl(ResourceBusy, 2, countarray("varea overlap"));
616 if (!get_free_region(region.end - region.start + 1, vregion, prev))
617 throw_idl(OutOfSpace, countarray("out of vspace"));
619 *vstart = vregion.start;
622 VirtualArea *newva = new VirtualArea;
623 newva->aspace = this;
624 newva->region() = vregion;
626 newva->flags.Valid = 1;
627 newva->flags.User = map_type != map_kernel;
628 newva->flags.Readable = mflags.access_IDLNS_Read;
629 newva->flags.Writeable = mflags.access_IDLNS_Write;
630 newva->flags.Executable = mflags.access_IDLNS_Exec;
631 newva->flags.FaultOnWrite = mflags.CopyOnWrite;
632 newva->flags.Protected = map_type != map_user;
634 newva->offset = region.start - vregion.start;
636 varea_tree.add(newva);
637 newva->ma->map(newva);
640 prev->list_node.add_front(&newva->list_node);
642 varea_list.add_front(&newva->list_node);
646 void AddrSpace::unmap(Region region, bool from_kernel)
648 u64 orig_start = region.start;
650 while (region.start <= region.end) {
651 Lock::DroppableAutoLock autolock(lock);
654 // If check_overlap returns false, then there are no vareas
655 // in the specified region, so there's nothing to unmap.
657 if (!check_overlap(region, va))
660 if (va->flags.Protected && !from_kernel) {
661 region.start = va->list_node.next->
662 listentry(VirtualArea, list_node)->region().start;
664 if (region.start <= orig_start)
670 u64 va_end = va->region().end;
673 if (va_end > region.end) {
674 u64 va_start = va->region().start;
675 va->region().start = region.end + 1;
677 if (va_start < region.start) {
678 VirtualArea *newva = new VirtualArea;
680 newva->aspace = this;
681 newva->region().start = va_start;
682 newva->region().end = region.start - 1;
684 newva->flags = va->flags;
686 newva->offset = va->offset;
688 varea_tree.add(newva);
689 newva->ma->map(newva);
692 VirtualArea *nextva =
693 va->list_node.next->listentry(VirtualArea, list_node);
695 next_start = nextva->region().start;
696 } else if (va->region().start < region.start) {
697 va->region().end = region.start - 1;
703 // This is done after the varea removal, so that new faults
704 // don't map things in again.
706 // OPT: Skip RMap-based unmapping if nothing maps this aspace.
707 // OPT: Push this loop into the RMap code, allowing it to skip
708 // empty portions of the tables (as the pagetable code currently
711 while (region.start <= va_end && region.start <= region.end) {
712 page_table->rmap_table.unmap(region.start);
713 region.start += Arch::page_size;
715 if (region.start <= orig_start)
719 region.start = next_start;
721 if (region.start <= orig_start)
726 void AddrSpace::set_mapflags(Region region, MapFlags mflags)
729 // Find varea, split if necessary, propagate change to stacked aspaces
732 void AddrSpace::get_mapflags(Region region, MapFlags *mflags, uint8_t *all_same)
737 void AddrSpace::get_mapping(Region region, IMappable *ma, u64 *offset)
742 void AddrSpace::get_page_size(u32 *page_size)
744 *page_size = Arch::page_size;
747 void AddrSpace::get_min_align(u32 *min_align)
749 *min_align = Arch::page_mapping_min_align;
752 void AddrSpace::get_size(u64 *size)
754 page_table->get_size(size);
757 void Mappable::map(VirtualArea *varea)
759 mappings_lock.lock_irq();
760 mappings.add_back(&varea->mappings_node);
761 mappings_lock.unlock_irq();
764 void Mappable::unmap(VirtualArea *varea)
766 mappings_lock.lock_irq();
767 varea->mappings_node.del();
768 mappings_lock.unlock_irq();
771 void PageTable::kill_pte(ulong vaddr, u64 paddr, bool dirty,
772 bool valid, bool no_release)
774 Page *oldpage = phys_to_page(paddr);
776 if (!is_phys_page(oldpage))
779 if (is_process && valid) {
780 Arch::invalidate_tlb_entry(vaddr);
782 if (oldpage && dirty &&
783 !ll_test_and_set(&oldpage->flags, PageFlags::bits::Dirty))
786 // Queue page for writeback
790 if (!no_release && oldpage)
794 // FIXME: Add a special PTE flag to indicate that PhysMem mappings
795 // don't mess with page refcounts.
797 class PhysMem : public Mappable {
799 void get_size(u64 *size)
801 if (sizeof(long) == 8)
802 *size = 1ULL << (64 - Arch::page_shift);
804 *size = 1ULL << (32 - Arch::page_shift);
807 void pagein(u64 vaddr, PTEFlags reqflags)
811 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
817 flags->Writeable = 1;
818 flags->Executable = 1;
823 PhysMem real_physmem;
824 IMappable physmem = real_physmem;
826 class AnonMem : public Mappable {
828 void get_size(u64 *size)
830 if (sizeof(long) == 8)
831 *size = 1ULL << (64 - Arch::page_shift);
833 *size = 1ULL << (32 - Arch::page_shift);
836 void pagein(u64 vaddr, PTEFlags reqflags)
840 void get_mapping(u64 addr, u64 *phys, PTEFlags *flags)
842 Page *page = PageAlloc::alloc(1);
844 // OPT: Only zero if it was asked for.
845 // OPT: Eventually, have separate pagelists for zeroed and
846 // unzeroed memory, and a low-priority background thread
847 // that zeroes pages and moves them to the zeroed list.
848 bzero(page_to_kvirt(page), Arch::page_size);
850 *phys = page_to_phys(page);
854 flags->Writeable = 1;
855 flags->Executable = 1;
860 AnonMem real_anonmem;
861 IMappable anonmem = real_anonmem;
864 #include <servers/mem/addrspace/footer.cc>