4 #include <System/Mem.h>
6 #include <kern/kernel.h>
8 #include <arch/addrs.h>
10 #include <util/rbtree.h>
11 #include <util/list.h>
12 #include <util/lock.h>
13 #include <kernel/region.h>
14 #include <lowlevel/bitops.h>
17 // Used for allocating memory at boot time before the page allocator
18 // is running. Alignment must be a power of 2. Because nothing other
19 // than the kernel is guaranteed to be mapped from the beginning on
20 // all architectures, no generic code should use this until after
21 // Arch::arch_init() has run and set up physical memory mappings.
23 // This function may not be used after the page allocator has
24 // been initialized by architecture code.
26 // Architectures must provide Arch::next_free_bootmem initalized
27 // to the first free piece of bootmem.
29 static inline void *get_bootmem(size_t size, size_t align)
31 uintptr_t ret = (Arch::next_free_bootmem + align - 1) & ~(align - 1);
32 Arch::next_free_bootmem = ret + size;
33 return reinterpret_cast<void *>(ret);
36 typedef System::Mem::AddrSpace IAddrSpace;
37 typedef System::Mem::Mappable IMappable;
38 using System::Mem::Cacheable;
39 using System::Mem::Region;
40 using System::Mem::RegionWithOffset;
41 using System::Mem::AllocFlags;
42 using System::Mem::MapFlags;
43 using System::Mem::AccessFlags;
47 // This must be kept in sync with include/kern/generic-pte.h
50 // Readable, Writeable, and Executable are for permission only,
51 // not for implementing copy on write, swapping, etc.
61 // If set, then on a write access, the page is copied and this
62 // address space gets the new, anonymous version. The rmap list
63 // is then traversed; all downstream mappings will share the new
66 // For vareas that directly map something other than an address
67 // space, the action to be taken on a write fault is
73 // Do not allow the user to unmap or modify flags.
74 // Used for the shared user/kernel mappings.
78 #elif defined(BITFIELD_BE)
79 ulong pad:_LL_LONG_BYTES * 8 - 9;
90 #error Unspecified/unrecognized bitfield endianness
96 PTEFlags(ulong init) : raw(init)
110 using Arch::kvirt_to_phys;
111 using Arch::phys_to_kvirt;
116 typedef Util::RBTree<VirtualArea, Region, u64> VirtualAreaTree;
120 // This linked list keeps track of the virtual areas that map this
121 // mappable (this is not transitive; vareas that map a varea that
122 // maps this mappable are not on this list).
124 // OPT: rbtree keyed on mapped address range?
127 Lock::SpinLock mappings_lock;
132 virtual void get_size(u64 *size) = 0;
134 virtual void get_block_size(u64 *block_size)
136 *block_size = Arch::page_size;
139 // Register/unregister varea as mapping this mappable.
141 virtual void map(VirtualArea *varea);
142 virtual void unmap(VirtualArea *varea);
144 // Make the specified page available for mapping. This must be
145 // done before map() will succeed. It is possible (though
146 // unlikely) that the pages will be removed before map() is called,
147 // causing map() to return false. In such a case, pagein should be
148 // called again by the fault handler. If the mapping fails for
149 // other reasons (such as lack of permission, a hole in a stacked
150 // aspace, or an I/O error) then pagein() will throw a BadPageFault
153 virtual void pagein(u64 vaddr, PTEFlags reqflags) = 0;
155 // Returns the physical address and flags associated with a given
156 // virtual address. If flags.Valid is not set, then phys and all
157 // other flags are undefined, and pagein() should be retried.
158 // rmap_lock must be held.
160 virtual void get_entry(u64 vaddr, u64 *phys, PTEFlags *flags) = 0;
162 #include <servers/mem/addrspace/Mem/Mappable.h>
178 // The red/black tree is used to find a region based on address.
180 // The linked list is kept in order and is used to iterate over
181 // vmas in a region (after looking up the starting point in the
182 // tree, unless the region is the entire address space).
184 VirtualAreaTree::Node rbtree_node;
185 Util::List list_node;
186 Util::List mappings_node;
191 // This is added to the virtual address to get the offset
192 // into the mappable.
197 return rbtree_node.value;
201 // If the padded size of this changes, update rmap_shift.
202 // and the alignment check in RMapTable::unmap.
204 // If the layout of this changes, update the offsets below.
209 Util::ListNoAutoInit head, tail;
215 head_offset = sizeof(u64) + sizeof(void *),
216 tail_offset = head_offset + sizeof(void *) * 2,
220 // This lock protects the rmap chains and rmap tables. It also makes
221 // atomic the PageTable::get_entry, RMapTable::map, PageTable::map
224 // OPT: This lock is acquired on all map/unmap activity; if/when this
225 // turns out to be a significant bottleneck, finer-grained locking can
226 // be used. I decided against doing it now because it would be
227 // somewhat complicated (but I believe do-able) to avoid all races,
228 // and I'd like to move on to implementing other things for now.
230 extern Lock::Lock rmap_lock;
237 RMapNode *get_rmap(u64 virtaddr, bool add = false);
242 // rmap_lock must be held.
243 static void map(VirtualArea *downstream_va, PageTable *upstream_ptbl,
244 u64 virtaddr, u64 upstream_vaddr);
246 void unmap(u64 virtaddr);
248 // Handle a copy-on-write for the specified page and all downstream
249 // mappings. All such mappings are set to the new page, and
250 // FaultOnWrite is cleared.
252 void break_copy_on_write(u64 virtaddr, Page *new_page);
258 RMapTable rmap_table;
259 const bool is_process;
261 typedef Mem::PTEFlags Flags;
262 typedef System::Mem::Region Region;
263 typedef System::Mem::RegionWithOffset RegionWithOffset;
265 PageTable(bool process) : is_process(process)
273 // Region is virtual, offset is physical
274 virtual void map(RegionWithOffset region, Flags flags) = 0;
275 virtual void unmap(Region region) = 0;
277 // Sets the flags which are set in mask to their value in flags.
278 // Flags not set in mask are untouched.
280 virtual void set_flags(Region region, Flags flags, Flags mask) = 0;
282 // Returns the physical address and flags associated with a given
283 // virtual address. If flags.Valid is not set, then phys and all
284 // other flags are undefined. This function is mainly used for
285 // propagating stacked aspace PTEs.
287 virtual void get_entry(u64 vaddr, u64 *phys, Flags *flags) = 0;
289 virtual void get_size(u64 *size) = 0;
291 // This is called when a PTE is replaced. It handles refcounting,
292 // dirty page queueing, and TLB invalidation. vaddr is only
293 // valid for process address spaces, so it doesn't need to be
294 // 64-bit (except on 64-bit hardware, of course). When it is
295 // known that only flags are changing, set no_release so that
296 // the page refcount is not decremented.
298 void kill_pte(ulong vaddr, u64 physaddr, bool dirty, bool valid,
299 bool no_release = false);
302 struct BadPageFault {
305 class ASpaceMappable : public Mappable {
308 static bool rec_pagein(AddrSpace *aspace, u64 vaddr,
312 ASpaceMappable (AddrSpace *ASPACE) : aspace(ASPACE)
317 void get_size(u64 *size);
320 virtual void pagein(u64 vaddr, PTEFlags reqflags);
321 virtual void get_entry(u64 vaddr, u64 *phys, PTEFlags *flags);
323 friend class AddrSpace;
327 // OPT: Coalesce vareas when possible (except when setting flags to
328 // match surrounding vareas, as the flags are likely to change
329 // again if they've already changed).
331 // OPT: A subclass of AddrSpace that doesn't use
332 // VirtualArea::offset, but rather has its own virtual method that
333 // figures out offsets to the next level using its own data
334 // structures (such as filesystem block tables). This would avoid
335 // excessive vareas for fragmented files. Whether the excess of
336 // vareas is significant enough for this to be worthwhile remains
339 VirtualAreaTree varea_tree;
340 Util::List varea_list;
343 // This defines the start and end of the aspace; mappings outside
344 // this range may not be done, and will not be returned by
345 // get_free_region(). For process aspaces, this goes from
346 // Arch::user_start to Arch::user_end. For non-proc aspaces, this
349 Region aspace_region;
351 // Returns true if there is a mapped region that overlaps the given
352 // region. If there is a collision, then the first overlapping
353 // varea is returned in va. Otherwise, it returns the last mapped
354 // area before the region in va (if there are no areas, or the
355 // region is before the first area, then prev is NULL). The aspace
356 // lock must be held.
358 bool check_overlap(Region region, VirtualArea *&va);
360 // Finds a free region of the requested length and puts it in
361 // region. Returns true if an appropriate area is found. The prev
362 // pointer is as in check_overlap. The aspace lock must be held.
364 bool get_free_region(ulong len, Region ®ion, VirtualArea *&prev);
366 // This is the value after the last region returned by
367 // get_free_region. If there was an intervening unmap for a lower
368 // address, then it is set to that address instead.
370 u64 cached_free_region;
372 static u64 rec_unmap(AddrSpace *aspace, Region region,
373 PTEFlags reqflags, VirtualArea *va);
375 // If there are multiple virtual areas that cover the specified region,
376 // split them at the region's boundaries. The first varea in the region
377 // (if any) is returned. The aspace lock must be held.
379 VirtualArea *split_varea(Region region);
381 void break_copy_on_write(VirtualArea *va, u64 vaddr, u64 phys);
382 bool map(VirtualArea *va, u64 vaddr, PTEFlags reqflags);
385 #include <servers/mem/addrspace/Mem/AddrSpace.h>
387 ASpaceMappable mappable;
388 PageTable *page_table;
390 AddrSpace(bool process);
391 AddrSpace(void *page_table);
393 // Returns true if the fault was "good"; otherwise, the caller
394 // should dump regs. exec should only be used if the CPU
395 // implements per-page exec protection; otherwise, treat it
398 bool handle_fault(ulong addr, bool write, bool exec, bool user);
400 void get_mappable(IMappable *ma);
401 void clone(IAddrSpace *addrspace, u8 clone_is_real);
403 void alloc_and_map(u64 len, u64 *vstart,
404 AllocFlags aflags, MapFlags mflags);
412 void map(IMappable ma, Region region, u64 *vstart, MapFlags mflags,
413 bool from_kernel = false, int map_type = map_user);
414 void unmap(Region region, bool from_kernel = false);
416 void set_mapflags(Region region, MapFlags mflags);
417 void get_mapflags(Region region, MapFlags *mflags, uint8_t *all_same);
418 void get_mapping(Region region, IMappable *ma, u64 *offset);
420 void get_page_size(u32 *page_size);
421 void get_min_align(u32 *min_align);
423 void get_size(u64 *size)
425 page_table->get_size(size);
428 friend void Arch::set_aspace(AddrSpace *aspace);
429 friend class ASpaceMappable;
432 extern Factory addr_space_factory, proc_addr_space_factory;
434 using ::System::RunTime::orbmm;
436 static inline bool page_aligned(u64 addr)
438 return !(addr & (u64)(Arch::page_size - 1));
441 static inline u64 page_align(u64 addr)
443 return addr & ~(u64)(Arch::page_size - 1);
446 // FIXME: Valid user addr? Paging holes?
447 static inline bool valid_addr(uint64_t addr)
449 if (sizeof(void *) == 8)
452 return (addr >> 32) == 0;