1 // mem/pagetable.cc -- Generic page table implementation
2 // Most architectures should be able to use this as is, though
3 // architectures with weird paging hardware can provide their own implementation.
5 // OPT: Dynamically adjust the number of pagetable levels for PTEs that
6 // support it (mainly for generic-pte).
8 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
10 // Permission is hereby granted, free of charge, to any person obtaining a copy of
11 // this software and associated documentation files (the "Software"), to deal with
12 // the Software without restriction, including without limitation the rights to
13 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
14 // of the Software, and to permit persons to whom the Software is furnished to do
15 // so, subject to the following condition:
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
29 #include <kern/pagealloc.h>
30 #include <kern/pagetable.h>
31 #include <kern/generic-pte.h>
32 #include <lowlevel/atomic.h>
33 #include <util/misc.h>
36 PageTable *kernel_page_table;
38 // The architecture must specify at least one data structure
39 // representing one PTE. Non-directory PTEs must support the |
40 // operation, either through overloading or providing a constructor and
41 // conversion operator for an integral type. It is assumed that a PTE
42 // of zero (by using bzero on the table) is a reasonable invalid PTE,
43 // but a PTE-specific bulk zero method could be added if necessary.
45 // Eventually multiple PTE formats will need to be supported in
46 // order to dynamically choose between PAE and non-PAE on 32-bit
47 // x86. When that happens, the arch will instantiate its choice
48 // of PageTableImpl template rather than export Arch::PTE.
50 // A PTE must support typedefs PTE::PhysAddr and PTE::VirtAddr which
51 // refers to integer types of the same size as the supported physical
52 // and virtual addresses, respectively...
54 // A PTE must support a typedef PTE::DirPTE which is used for
55 // non-final page tables. DirPTE may be the same as PTE. DirPTE must
56 // support valid_pte, set_pte, and shift_per_level as in a normal PTE,
57 // and must implement the following methods:
61 // A function to return the virtual address of the table pointed to
62 // by this DirPTE entry.
64 // static DirPTE set_table(void *table)
66 // A function to return a DirPTE pointing to the specified virtual
69 // A normal PTE must support the following methods:
71 // static void flags_to_pte(Mem::PTEFlags flagsin,
72 // Mem::PTEFlags maskin,
76 // A function to turn Mem::PTEFlags into a PTE. It also produces
77 // a mask which can be used to produce a new pte by calling
78 // oldpte.set_flags(mask, newpteflags).
80 // PTE set_flags(PTE mask, PTE flags)
82 // Apply the flags to the PTE according to the provided mask.
84 // Mem::PTEFlags pte_to_flags()
86 // A function to turn a PTE into Mem::PTEFlags
88 // static uint addr_to_offset(VirtAddr addr, int shift)
90 // A function to take a virtual address and a shift level and return
91 // the offset into the page table in entries.
93 // PhysAddr pte_to_addr()
95 // A function to take a PTE and return the physical address contained
98 // static PTE addr_to_pte(PhysAddr phys)
100 // A function to take a physical address and return a PTE with that
101 // address and no flags set.
106 // A function to return whether the PTE is valid/dirty or not. This
107 // is a shortcut to keep from having to do a pte_to_flags repeatedly.
108 // It would have been slightly cleaner to make this a method of PTE,
109 // but that would require that PTE be implemented as a complex type,
110 // and I'd rather leave that up to the architecture.
112 // void set_pte(PTE *table, uint offset)
114 // A function to set a PTE in a page table. Normally, this is just
115 // a simple assignment, but some architectures may need to do something
116 // unusual (such as ensure atomicity if the PTE size is greater than
119 // PTE xchg_pte(PTE *table, uint offset)
121 // As set_pte, but atomically reads the old PTE while setting the
122 // new PTE, and returns the old one.
124 // A PTE must have the following constants:
127 // The number of bits of virtual address space represented by one
128 // level of page tables. This is log2(number of pages per table).
131 // The number of page table levels; this is used, but not imported,
132 // due to a conflict with PageTable::num_levels.
134 // page_size: the size of a page
135 // page_shift: log2(page_size)
137 // kmap_start, kmap_end:
138 // The kernel mappings portion of the address space is mapped into all
139 // address spaces, using shared page tables. This sharing occurs at
140 // the top-level page table (hopefully this will work for all
141 // architectures; it can be made configurable, at the cost of some
142 // complexity). kmap_start and kmap_end are indices into the top
143 // level page table that define which region is shared. These are only
144 // relevant for process address spaces.
146 using Util::round_up;
148 template<typename PTE>
149 void PageTableImpl<PTE>::end_map(RegionWithOffset region, PTE flags,
152 uint start = PTE::addr_to_offset(region.start, PTE::page_shift);
153 uint end = PTE::addr_to_offset(region.end, PTE::page_shift);
155 Page *page = kvirt_to_page(table);
157 assert(start < pages_per_table());
158 assert(end < pages_per_table());
159 assert(page->flags & Page::InUse);
161 PTE *ptable = static_cast<PTE *>(table);
163 for (uint i = start; i <= end; i++) {
164 PTE newpte = PTE::addr_to_pte(region.offset) | flags;
165 PTE oldpte = newpte.xchg_pte(ptable, i);
167 retain_if_phys(region.offset);
170 // vaddr is only for process aspaces, so don't worry
173 ulong vaddr = (ulong)region.start +
174 ((i - start) << PTE::page_shift);
176 kill_pte(vaddr, oldpte.pte_to_addr(),
177 oldpte.dirty_pte(), oldpte.valid_pte());
182 region.offset += PTE::page_size;
186 template<typename PTE>
187 void PageTableImpl<PTE>::rec_map(RegionWithOffset region, PTE flags,
188 void *table, int shift)
190 if (shift < lastlevel_shift) {
191 assert(shift + DirPTE::shift_per_level - PTE::shift_per_level == PTE::page_shift);
192 end_map(region, flags, table);
196 Page *page = kvirt_to_page(table);
198 DirPTE *dtable = static_cast<DirPTE *>(table);
199 uint start = DirPTE::addr_to_offset(region.start, shift);
200 uint end = DirPTE::addr_to_offset(region.end, shift);
201 u64 orig_end = region.end;
203 assert(start < pages_per_dtable());
204 assert(end < pages_per_dtable());
205 assert(page->flags & Page::InUse);
208 region.end = round_up(region.start + 1, shift) - 1;
210 for (uint i = start; i <= end; i++) {
213 if (!dtable[i].valid_pte()) {
214 subtable = Mem::alloc_pages(1);
215 bzero(subtable, PTE::page_size);
216 DirPTE newpte = DirPTE::set_table(subtable);
217 newpte.set_pte(dtable, i);
220 subtable = dtable[i].get_table();
223 rec_map(region, flags, subtable, shift - DirPTE::shift_per_level);
225 region.offset += region.end - region.start + 1;
226 region.start = region.end + 1;
229 region.end = orig_end;
231 region.end += 1UL << shift;
235 template <typename PTE>
236 void PageTableImpl<PTE>::end_unmap(Region region, void *table)
238 Page *page = kvirt_to_page(table);
239 uint start = PTE::addr_to_offset(region.start, PTE::page_shift);
240 uint end = PTE::addr_to_offset(region.end, PTE::page_shift);
242 assert(start < pages_per_table());
243 assert(end < pages_per_table());
244 assert(page->flags & Page::InUse);
246 PTE *ptable = static_cast<PTE *>(table);
248 for (uint i = start; i <= end; i++) {
250 PTE oldpte = PTE().xchg_pte(ptable, i);
253 // vaddr is only for process aspaces, so don't worry
256 ulong vaddr = (ulong)region.start +
257 ((i - start) << PTE::page_shift);
259 kill_pte(vaddr, oldpte.pte_to_addr(),
260 oldpte.dirty_pte(), oldpte.valid_pte());
263 assert(page->inuse.refcount > 1);
269 template <typename PTE>
270 void PageTableImpl<PTE>::rec_unmap(Region region, void *table, int shift)
272 if (shift < lastlevel_shift) {
273 assert(shift + DirPTE::shift_per_level - PTE::shift_per_level == PTE::page_shift);
274 end_unmap(region, table);
278 Page *page = kvirt_to_page(table);
279 uint start = DirPTE::addr_to_offset(region.start, shift);
280 uint end = DirPTE::addr_to_offset(region.end, shift);
281 u64 orig_end = region.end;
283 assert(start < pages_per_dtable());
284 assert(end < pages_per_dtable());
285 assert(page->flags & Page::InUse);
287 DirPTE *dtable = static_cast<DirPTE *>(table);
290 region.end = round_up(region.start + 1, shift) - 1;
292 for (uint i = start; i <= end; i++) {
293 if (dtable[i].valid_pte()) {
294 void *subtable = dtable[i].get_table();
296 rec_unmap(region, subtable, shift - DirPTE::shift_per_level);
298 Page *subpage = kvirt_to_page(subtable);
299 assert(subpage->flags & Page::InUse);
300 assert(subpage->inuse.refcount > 0);
302 if (subpage->inuse.refcount == 1) {
303 DirPTE().set_pte(dtable, i);
306 assert(page->inuse.refcount > 1);
311 region.start = region.end + 1;
314 region.end = orig_end;
316 region.end += 1UL << shift;
320 template <typename PTE>
321 void PageTableImpl<PTE>::end_set_flags(Region region, PTE flags,
322 PTE mask, void *table)
324 uint start = PTE::addr_to_offset(region.start, PTE::page_shift);
325 uint end = PTE::addr_to_offset(region.end, PTE::page_shift);
327 assert(start < pages_per_table());
328 assert(end < pages_per_table());
330 PTE *ptable = static_cast<PTE *>(table);
332 for (uint i = start; i <= end; i++) {
334 PTE oldpte = ptable[i].set_flags(mask, flags);
336 // vaddr is only for process aspaces, so don't worry
339 ulong vaddr = (ulong)region.start +
340 ((i - start) << PTE::page_shift);
342 kill_pte(vaddr, oldpte.pte_to_addr(),
343 oldpte.dirty_pte(), oldpte.valid_pte(), true);
348 template <typename PTE>
349 void PageTableImpl<PTE>::rec_set_flags(Region region, PTE flags,
350 PTE mask, void *table,
353 if (shift < lastlevel_shift) {
354 assert(shift + DirPTE::shift_per_level - PTE::shift_per_level == PTE::page_shift);
355 shift = PTE::page_shift;
358 uint start = DirPTE::addr_to_offset(region.start, shift);
359 uint end = DirPTE::addr_to_offset(region.end, shift);
360 u64 orig_end = region.end;
362 assert(start < pages_per_dtable());
363 assert(end < pages_per_dtable());
365 DirPTE *dtable = static_cast<DirPTE *>(table);
368 region.end = round_up(region.start + 1, shift) - 1;
370 for (uint i = start; i <= end; i++) {
371 if (dtable[i].valid_pte()) {
372 void *subtable = dtable[i].get_table();
374 rec_set_flags(region, flags, mask, subtable,
375 shift - DirPTE::shift_per_level);
378 region.start = region.end + 1;
381 region.end = orig_end;
383 region.end += 1UL << shift;
387 template <typename PTE>
388 void PageTableImpl<PTE>::map(RegionWithOffset region, Flags flags)
390 Lock::AutoLock autolock(lock);
392 PTE::flags_to_pte(flags, ~0UL, pte, ptemask);
393 PTE *table = static_cast<PTE *>(toplevel);
394 rec_map(region, pte, table, toplevel_shift);
397 template <typename PTE>
398 void PageTableImpl<PTE>::unmap(Region region)
400 Lock::AutoLock autolock(lock);
401 PTE *table = static_cast<PTE *>(toplevel);
402 rec_unmap(region, table, toplevel_shift);
405 template <typename PTE>
406 void PageTableImpl<PTE>::set_flags(Region region, Flags flags, Flags mask)
408 Lock::AutoLock autolock(lock);
410 PTE::flags_to_pte(flags, mask, pte, ptemask);
411 PTE *table = static_cast<PTE *>(toplevel);
412 rec_set_flags(region, pte, ptemask, table, toplevel_shift);
415 template <typename PTE>
416 void PageTableImpl<PTE>::get_mapping(u64 addr, u64 *phys, Flags *flags)
418 Lock::AutoLock autolock(lock);
419 int shift = toplevel_shift;
421 void *table = toplevel;
423 while (shift >= lastlevel_shift) {
424 DirPTE *dtable = static_cast<DirPTE *>(table);
425 DirPTE dpte = dtable[DirPTE::addr_to_offset(addr, shift)];
427 if (!dpte.valid_pte()) {
432 table = dpte.get_table();
433 shift -= DirPTE::shift_per_level;
436 assert(shift + DirPTE::shift_per_level - PTE::shift_per_level == PTE::page_shift);
438 PTE *ptable = static_cast<PTE *>(table);
440 uint off = PTE::addr_to_offset(addr, PTE::page_shift);
442 *phys = ptable[off].pte_to_addr();
443 *flags = ptable[off].pte_to_flags();
446 template <typename PTE>
447 PageTableImpl<PTE>::PageTableImpl(bool process) : PageTable(process)
449 toplevel = Mem::alloc_pages(1);
450 PTE *table = static_cast<PTE *>(toplevel);
453 num_levels = PTE::num_levels;
455 if (PTE::kmap_start != 0)
456 bzero(table, PTE::kmap_start * sizeof(PTE));
458 if (PTE::kmap_end != pages_per_dtable() - 1)
459 bzero(table + PTE::kmap_end + 1,
460 (pages_per_dtable() - PTE::kmap_end - 1) * sizeof(PTE));
462 PTE *ktable = static_cast<PTE *>(kernel_page_table->toplevel);
464 memcpy(table + PTE::kmap_start, ktable + PTE::kmap_start,
465 (PTE::kmap_end - PTE::kmap_start + 1) * sizeof(PTE));
467 // FIXME: growable levels
468 num_levels = PTE::num_levels;
469 bzero(table, PTE::page_size);
472 toplevel_shift = lastlevel_shift = PTE::page_shift;
474 if (num_levels > 1) {
475 lastlevel_shift += PTE::shift_per_level;
476 toplevel_shift += PTE::shift_per_level +
477 (num_levels - 2) * DirPTE::shift_per_level;
481 template <typename PTE>
482 PageTableImpl<PTE>::PageTableImpl(void *table) : PageTable(true)
484 assert(!kernel_page_table);
487 num_levels = PTE::num_levels;
488 kernel_page_table = this;
491 template <typename PTE>
492 PageTableImpl<PTE>::~PageTableImpl()
494 assert(this != kernel_page_table);
497 Region region1 = { 0, ((VirtAddr)PTE::kmap_start - 1) << toplevel_shift };
498 Region region2 = { ((VirtAddr)PTE::kmap_end + 1) << toplevel_shift, ~0UL };
500 if (PTE::kmap_start != 0)
502 if (PTE::kmap_end != pages_per_dtable() - 1)
505 Region region = { 0, ~0UL };
509 Page *page = kvirt_to_page(toplevel);
510 assert(page->flags & Page::InUse);
511 assert(page->inuse.refcount == 1);
513 Mem::free_pages(toplevel, 1);
516 template class PageTableImpl<Arch::PTE>;
517 template class PageTableImpl<GenPTE>;