1 // mem/pagetable.cc -- Generic page table implementation
2 // Most architectures should be able to use this as is, though
3 // architectures with weird paging hardware can provide their own implementation.
5 // OPT: Dynamically adjust the number of pagetable levels for PTEs that
6 // support it (mainly for generic-pte).
8 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
10 // Permission is hereby granted, free of charge, to any person obtaining a copy of
11 // this software and associated documentation files (the "Software"), to deal with
12 // the Software without restriction, including without limitation the rights to
13 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
14 // of the Software, and to permit persons to whom the Software is furnished to do
15 // so, subject to the following conditions:
17 // * Redistributions of source code must retain the above copyright notice,
18 // this list of conditions and the following disclaimers.
20 // * Redistributions in binary form must reproduce the above copyright notice,
21 // this list of conditions and the following disclaimers in the
22 // documentation and/or other materials provided with the distribution.
24 // * The names of the Software's authors and/or contributors
25 // may not be used to endorse or promote products derived from
26 // this Software without specific prior written permission.
28 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
30 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
37 #include <kern/pagealloc.h>
38 #include <kern/pagetable.h>
39 #include <kern/generic-pte.h>
40 #include <lowlevel/atomic.h>
41 #include <util/misc.h>
44 PageTable *kernel_page_table;
46 // The architecture must specify at least one data structure
47 // representing one PTE. Non-directory PTEs must support the |
48 // operation, either through overloading or providing a constructor and
49 // conversion operator for an integral type. It is assumed that a PTE
50 // of zero (by using bzero on the table) is a reasonable invalid PTE,
51 // but a PTE-specific bulk zero method could be added if necessary.
53 // Eventually multiple PTE formats will need to be supported in
54 // order to dynamically choose between PAE and non-PAE on 32-bit
55 // x86. When that happens, the arch will instantiate its choice
56 // of PageTableImpl template rather than export Arch::PTE.
58 // A PTE must support typedefs PTE::PhysAddr and PTE::VirtAddr which
59 // refers to integer types of the same size as the supported physical
60 // and virtual addresses, respectively...
62 // A PTE must support a typedef PTE::DirPTE which is used for
63 // non-final page tables. DirPTE may be the same as PTE. DirPTE must
64 // support valid_pte, set_pte, and shift_per_level as in a normal PTE,
65 // and must implement the following methods:
69 // A function to return the virtual address of the table pointed to
70 // by this DirPTE entry.
72 // static DirPTE set_table(void *table)
74 // A function to return a DirPTE pointing to the specified virtual
77 // A normal PTE must support the following methods:
79 // static void flags_to_pte(Mem::PTEFlags flagsin,
80 // Mem::PTEFlags maskin,
84 // A function to turn Mem::PTEFlags into a PTE. It also produces
85 // a mask which can be used to produce a new pte by calling
86 // oldpte.set_flags(mask, newpteflags).
88 // PTE set_flags(PTE mask, PTE flags)
90 // Apply the flags to the PTE according to the provided mask.
92 // Mem::PTEFlags pte_to_flags()
94 // A function to turn a PTE into Mem::PTEFlags
96 // static uint addr_to_offset(VirtAddr addr, int shift)
98 // A function to take a virtual address and a shift level and return
99 // the offset into the page table in entries.
101 // PhysAddr pte_to_addr()
103 // A function to take a PTE and return the physical address contained
106 // static PTE addr_to_pte(PhysAddr phys)
108 // A function to take a physical address and return a PTE with that
109 // address and no flags set.
114 // A function to return whether the PTE is valid/dirty or not. This
115 // is a shortcut to keep from having to do a pte_to_flags repeatedly.
116 // It would have been slightly cleaner to make this a method of PTE,
117 // but that would require that PTE be implemented as a complex type,
118 // and I'd rather leave that up to the architecture.
120 // void set_pte(PTE *table, uint offset)
122 // A function to set a PTE in a page table. Normally, this is just
123 // a simple assignment, but some architectures may need to do something
124 // unusual (such as ensure atomicity if the PTE size is greater than
127 // PTE xchg_pte(PTE *table, uint offset)
129 // As set_pte, but atomically reads the old PTE while setting the
130 // new PTE, and returns the old one.
132 // A PTE must have the following constants:
135 // The number of bits of virtual address space represented by one
136 // level of page tables. This is log2(number of pages per table).
139 // The number of page table levels; this is used, but not imported,
140 // due to a conflict with PageTable::num_levels.
142 // page_size: the size of a page
143 // page_shift: log2(page_size)
145 // kmap_start, kmap_end:
146 // The kernel mappings portion of the address space is mapped into all
147 // address spaces, using shared page tables. This sharing occurs at
148 // the top-level page table (hopefully this will work for all
149 // architectures; it can be made configurable, at the cost of some
150 // complexity). kmap_start and kmap_end are indices into the top
151 // level page table that define which region is shared. These are only
152 // relevant for process address spaces.
154 using Util::round_up;
156 template<typename PTE>
157 void PageTableImpl<PTE>::end_map(RegionWithOffset region, PTE flags,
160 uint start = PTE::addr_to_offset(region.start, PTE::page_shift);
161 uint end = PTE::addr_to_offset(region.end, PTE::page_shift);
163 Page *page = kvirt_to_page(table);
165 assert(start < pages_per_table());
166 assert(end < pages_per_table());
167 assert(page->flags & Page::InUse);
169 PTE *ptable = static_cast<PTE *>(table);
171 for (uint i = start; i <= end; i++) {
172 PTE newpte = PTE::addr_to_pte(region.offset) | flags;
173 PTE oldpte = newpte.xchg_pte(ptable, i);
175 retain_if_phys(region.offset);
178 // vaddr is only for process aspaces, so don't worry
181 ulong vaddr = (ulong)region.start +
182 ((i - start) << PTE::page_shift);
184 kill_pte(vaddr, oldpte.pte_to_addr(),
185 oldpte.dirty_pte(), oldpte.valid_pte());
190 region.offset += PTE::page_size;
194 template<typename PTE>
195 void PageTableImpl<PTE>::rec_map(RegionWithOffset region, PTE flags,
196 void *table, int shift)
198 if (shift < lastlevel_shift) {
199 assert(shift + DirPTE::shift_per_level - PTE::shift_per_level == PTE::page_shift);
200 end_map(region, flags, table);
204 Page *page = kvirt_to_page(table);
206 DirPTE *dtable = static_cast<DirPTE *>(table);
207 uint start = DirPTE::addr_to_offset(region.start, shift);
208 uint end = DirPTE::addr_to_offset(region.end, shift);
209 u64 orig_end = region.end;
211 assert(start < pages_per_dtable());
212 assert(end < pages_per_dtable());
213 assert(page->flags & Page::InUse);
216 region.end = round_up(region.start + 1, shift) - 1;
218 for (uint i = start; i <= end; i++) {
221 if (!dtable[i].valid_pte()) {
222 subtable = Mem::alloc_pages(1);
223 bzero(subtable, PTE::page_size);
224 DirPTE newpte = DirPTE::set_table(subtable);
225 newpte.set_pte(dtable, i);
228 subtable = dtable[i].get_table();
231 rec_map(region, flags, subtable, shift - DirPTE::shift_per_level);
233 region.offset += region.end - region.start + 1;
234 region.start = region.end + 1;
237 region.end = orig_end;
239 region.end += 1UL << shift;
243 template <typename PTE>
244 void PageTableImpl<PTE>::end_unmap(Region region, void *table)
246 Page *page = kvirt_to_page(table);
247 uint start = PTE::addr_to_offset(region.start, PTE::page_shift);
248 uint end = PTE::addr_to_offset(region.end, PTE::page_shift);
250 assert(start < pages_per_table());
251 assert(end < pages_per_table());
252 assert(page->flags & Page::InUse);
254 PTE *ptable = static_cast<PTE *>(table);
256 for (uint i = start; i <= end; i++) {
258 PTE oldpte = PTE().xchg_pte(ptable, i);
261 // vaddr is only for process aspaces, so don't worry
264 ulong vaddr = (ulong)region.start +
265 ((i - start) << PTE::page_shift);
267 kill_pte(vaddr, oldpte.pte_to_addr(),
268 oldpte.dirty_pte(), oldpte.valid_pte());
271 assert(page->inuse.refcount > 1);
277 template <typename PTE>
278 void PageTableImpl<PTE>::rec_unmap(Region region, void *table, int shift)
280 if (shift < lastlevel_shift) {
281 assert(shift + DirPTE::shift_per_level - PTE::shift_per_level == PTE::page_shift);
282 end_unmap(region, table);
286 Page *page = kvirt_to_page(table);
287 uint start = DirPTE::addr_to_offset(region.start, shift);
288 uint end = DirPTE::addr_to_offset(region.end, shift);
289 u64 orig_end = region.end;
291 assert(start < pages_per_dtable());
292 assert(end < pages_per_dtable());
293 assert(page->flags & Page::InUse);
295 DirPTE *dtable = static_cast<DirPTE *>(table);
298 region.end = round_up(region.start + 1, shift) - 1;
300 for (uint i = start; i <= end; i++) {
301 if (dtable[i].valid_pte()) {
302 void *subtable = dtable[i].get_table();
304 rec_unmap(region, subtable, shift - DirPTE::shift_per_level);
306 Page *subpage = kvirt_to_page(subtable);
307 assert(subpage->flags & Page::InUse);
308 assert(subpage->inuse.refcount > 0);
310 if (subpage->inuse.refcount == 1) {
311 DirPTE().set_pte(dtable, i);
314 assert(page->inuse.refcount > 1);
319 region.start = region.end + 1;
322 region.end = orig_end;
324 region.end += 1UL << shift;
328 template <typename PTE>
329 void PageTableImpl<PTE>::end_set_flags(Region region, PTE flags,
330 PTE mask, void *table)
332 uint start = PTE::addr_to_offset(region.start, PTE::page_shift);
333 uint end = PTE::addr_to_offset(region.end, PTE::page_shift);
335 assert(start < pages_per_table());
336 assert(end < pages_per_table());
338 PTE *ptable = static_cast<PTE *>(table);
340 for (uint i = start; i <= end; i++) {
342 PTE oldpte = ptable[i].set_flags(mask, flags);
344 // vaddr is only for process aspaces, so don't worry
347 ulong vaddr = (ulong)region.start +
348 ((i - start) << PTE::page_shift);
350 kill_pte(vaddr, oldpte.pte_to_addr(),
351 oldpte.dirty_pte(), oldpte.valid_pte(), true);
356 template <typename PTE>
357 void PageTableImpl<PTE>::rec_set_flags(Region region, PTE flags,
358 PTE mask, void *table,
361 if (shift < lastlevel_shift) {
362 assert(shift + DirPTE::shift_per_level - PTE::shift_per_level == PTE::page_shift);
363 shift = PTE::page_shift;
366 uint start = DirPTE::addr_to_offset(region.start, shift);
367 uint end = DirPTE::addr_to_offset(region.end, shift);
368 u64 orig_end = region.end;
370 assert(start < pages_per_dtable());
371 assert(end < pages_per_dtable());
373 DirPTE *dtable = static_cast<DirPTE *>(table);
376 region.end = round_up(region.start + 1, shift) - 1;
378 for (uint i = start; i <= end; i++) {
379 if (dtable[i].valid_pte()) {
380 void *subtable = dtable[i].get_table();
382 rec_set_flags(region, flags, mask, subtable,
383 shift - DirPTE::shift_per_level);
386 region.start = region.end + 1;
389 region.end = orig_end;
391 region.end += 1UL << shift;
395 template <typename PTE>
396 void PageTableImpl<PTE>::map(RegionWithOffset region, Flags flags)
398 Lock::AutoLock autolock(lock);
400 PTE::flags_to_pte(flags, ~0UL, pte, ptemask);
401 PTE *table = static_cast<PTE *>(toplevel);
402 rec_map(region, pte, table, toplevel_shift);
405 template <typename PTE>
406 void PageTableImpl<PTE>::unmap(Region region)
408 Lock::AutoLock autolock(lock);
409 PTE *table = static_cast<PTE *>(toplevel);
410 rec_unmap(region, table, toplevel_shift);
413 template <typename PTE>
414 void PageTableImpl<PTE>::set_flags(Region region, Flags flags, Flags mask)
416 Lock::AutoLock autolock(lock);
418 PTE::flags_to_pte(flags, mask, pte, ptemask);
419 PTE *table = static_cast<PTE *>(toplevel);
420 rec_set_flags(region, pte, ptemask, table, toplevel_shift);
423 template <typename PTE>
424 void PageTableImpl<PTE>::get_entry(u64 addr, u64 *phys, Flags *flags)
426 Lock::AutoLock autolock(lock);
427 int shift = toplevel_shift;
429 void *table = toplevel;
431 while (shift >= lastlevel_shift) {
432 DirPTE *dtable = static_cast<DirPTE *>(table);
433 DirPTE dpte = dtable[DirPTE::addr_to_offset(addr, shift)];
435 if (!dpte.valid_pte()) {
440 table = dpte.get_table();
441 shift -= DirPTE::shift_per_level;
444 assert(shift + DirPTE::shift_per_level - PTE::shift_per_level == PTE::page_shift);
446 PTE *ptable = static_cast<PTE *>(table);
448 uint off = PTE::addr_to_offset(addr, PTE::page_shift);
450 *phys = ptable[off].pte_to_addr();
451 *flags = ptable[off].pte_to_flags();
454 template <typename PTE>
455 PageTableImpl<PTE>::PageTableImpl(bool process) : PageTable(process)
457 toplevel = Mem::alloc_pages(1);
458 PTE *table = static_cast<PTE *>(toplevel);
461 num_levels = PTE::num_levels;
463 if (PTE::kmap_start != 0)
464 bzero(table, PTE::kmap_start * sizeof(PTE));
466 if (PTE::kmap_end != pages_per_dtable() - 1)
467 bzero(table + PTE::kmap_end + 1,
468 (pages_per_dtable() - PTE::kmap_end - 1) * sizeof(PTE));
470 PTE *ktable = static_cast<PTE *>(kernel_page_table->toplevel);
472 memcpy(table + PTE::kmap_start, ktable + PTE::kmap_start,
473 (PTE::kmap_end - PTE::kmap_start + 1) * sizeof(PTE));
475 // FIXME: growable levels
476 num_levels = PTE::num_levels;
477 bzero(table, PTE::page_size);
480 toplevel_shift = lastlevel_shift = PTE::page_shift;
482 if (num_levels > 1) {
483 lastlevel_shift += PTE::shift_per_level;
484 toplevel_shift += PTE::shift_per_level +
485 (num_levels - 2) * DirPTE::shift_per_level;
489 template <typename PTE>
490 PageTableImpl<PTE>::PageTableImpl(void *table) : PageTable(true)
492 assert(!kernel_page_table);
495 num_levels = PTE::num_levels;
496 kernel_page_table = this;
499 template <typename PTE>
500 PageTableImpl<PTE>::~PageTableImpl()
502 assert(this != kernel_page_table);
505 Region region1 = { 0, ((VirtAddr)PTE::kmap_start - 1) << toplevel_shift };
506 Region region2 = { ((VirtAddr)PTE::kmap_end + 1) << toplevel_shift, ~0UL };
508 if (PTE::kmap_start != 0)
510 if (PTE::kmap_end != pages_per_dtable() - 1)
513 Region region = { 0, ~0UL };
517 Page *page = kvirt_to_page(toplevel);
518 assert(page->flags & Page::InUse);
519 assert(page->inuse.refcount == 1);
521 Mem::free_pages(toplevel, 1);
524 template class PageTableImpl<Arch::PTE>;
525 template class PageTableImpl<GenPTE>;