1 // mem/rmap.cc -- Reverse mapping from physical page frames (or
2 // intermediate address spaces) to mappers.
4 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
6 // Permission is hereby granted, free of charge, to any person obtaining a copy of
7 // this software and associated documentation files (the "Software"), to deal with
8 // the Software without restriction, including without limitation the rights to
9 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 // of the Software, and to permit persons to whom the Software is furnished to do
11 // so, subject to the following conditions:
13 // * Redistributions of source code must retain the above copyright notice,
14 // this list of conditions and the following disclaimers.
16 // * Redistributions in binary form must reproduce the above copyright notice,
17 // this list of conditions and the following disclaimers in the
18 // documentation and/or other materials provided with the distribution.
20 // * The names of the Software's authors and/or contributors
21 // may not be used to endorse or promote products derived from
22 // this Software without specific prior written permission.
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
26 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
33 #include <kern/pagealloc.h>
34 #include <util/misc.h>
38 // static uint rmaps_per_page = Arch::page_size / sizeof(RMapNode);
40 // If RMapNode's length becomes something other than 8 longs,
41 // change "3" to the base-2 log of the number of longs.
43 static int rmap_shift = Arch::page_shift - _LL_LONG_LOGBYTES - 3;
45 // static int rmap_dirs_per_page = Arch::page_size / sizeof(RMapNode *);
46 static int rmap_dir_shift = Arch::page_shift - _LL_LONG_LOGBYTES;
47 static int rmap_lastlevel_shift = rmap_shift + Arch::page_shift;
49 static int rmap_dir_levels = (64 - rmap_lastlevel_shift - 1)
52 static int rmap_toplevel_shift = rmap_dir_shift * rmap_dir_levels
53 + rmap_lastlevel_shift;
55 static inline u64 addr_to_dir_offset(u64 addr, int shift)
57 return (addr >> shift) & ((1ULL << rmap_dir_shift) - 1);
60 static inline u64 addr_to_offset(u64 addr)
62 return (addr >> Arch::page_shift) & ((1ULL << rmap_shift) - 1);
65 RMapTable::RMapTable()
67 // All RMap tables must have at least one dir level, in order to
68 // simplify the code. If it turns out that a lot of memory is
69 // wasted due to this, the code could be made more complex in order
70 // to allow one-level rmap tables. Currently, on 4KiB-page systems,
71 // a page is wasted per under-512KiB aspace (32-bit) or under-256KiB
74 // Dynamic levels would have to be implemented in generic-pte for
75 // the wastage here to be meaningful.
77 toplevel_shift = rmap_lastlevel_shift;
78 toplevel = Mem::alloc_pages(1);
79 bzero(toplevel, Arch::page_size);
82 RMapNode *RMapTable::get_rmap(u64 virtaddr, bool add)
84 assert(rmap_lock.held_by_curthread());
85 int shift = toplevel_shift;
86 void *table = toplevel;
88 while (toplevel_shift < rmap_toplevel_shift &&
89 (virtaddr >> (toplevel_shift + rmap_dir_shift)))
94 shift += rmap_dir_shift;
95 toplevel_shift += rmap_dir_shift;
97 toplevel = Mem::alloc_pages(1);
98 bzero(toplevel, Arch::page_size);
100 static_cast<void **>(toplevel)[0] = table;
104 while (shift >= rmap_lastlevel_shift) {
105 int off = addr_to_dir_offset(virtaddr, shift);
106 void *new_table = static_cast<void **>(table)[off];
109 new_table = Mem::alloc_pages(1);
110 bzero(new_table, Arch::page_size);
111 static_cast<void **>(table)[off] = new_table;
115 shift -= rmap_dir_shift;
118 assert(shift + rmap_dir_shift - rmap_shift == Arch::page_shift);
120 int off = addr_to_offset(virtaddr);
121 return &static_cast<RMapNode *>(table)[off];
124 void RMapTable::map(VirtualArea *dsva, PageTable *usptbl,
125 u64 dsvaddr, u64 usvaddr)
127 RMapNode *dsrmap = dsva->aspace->page_table->
128 rmap_table.get_rmap(dsvaddr, true);
132 dsrmap->vaddr = page_align(dsvaddr);
137 RMapNode *usrmap = usptbl->rmap_table.get_rmap(usvaddr);
139 assert(usrmap->va->aspace->page_table == usptbl);
141 usrmap->head.add_front(&dsrmap->head);
143 // FIXME: If it ends up being useful, link into the phys-page
147 dsrmap->head.add_front(&dsrmap->tail);
150 void RMapTable::unmap(u64 virtaddr)
152 Lock::AutoLock autolock(rmap_lock);
153 RMapNode *head = get_rmap(virtaddr);
155 if (!head || !head->va)
158 assert(head->vaddr == virtaddr);
160 Util::ListNoAutoInit *node = &head->head, *oldnode;
163 ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
164 if (off == RMapNode::head_offset) {
165 RMapNode *rmap = node->listentry(RMapNode, head);
167 Region region = { rmap->vaddr,
168 rmap->vaddr + Arch::page_shift - 1 };
170 rmap->va->aspace->page_table->unmap(region);
173 assert(off == RMapNode::tail_offset);
179 } while (node != &head->tail);
184 void RMapTable::break_copy_on_write(u64 virtaddr, Page *new_page)
186 assert(rmap_lock.held_by_curthread());
187 RMapNode *head = get_rmap(virtaddr);
188 RMapNode *still_cow = NULL;
190 assert(head && head->va);
191 assert(head->vaddr == virtaddr);
193 // If there are sibling or upstream mappings of this page,
194 // detach the rmap list.
196 if (head->head.prev != &head->tail) {
197 head->head.prev->next = head->tail.next;
198 head->tail.next->prev = head->head.prev;
200 head->head.prev = &head->tail;
201 head->tail.next = &head->head;
204 assert(head->tail.next == &head->head);
205 Util::ListNoAutoInit *node = &head->head;
208 ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
209 if (off == RMapNode::head_offset) {
210 RMapNode *rmap = node->listentry(RMapNode, head);
211 RegionWithOffset region;
213 region.start = rmap->vaddr;
214 region.end = rmap->vaddr + Arch::page_shift - 1;
215 region.offset = page_to_phys(new_page);
217 PTEFlags flags = rmap->va->flags;
219 // The faulting mapping always has PTE FaultOnWrite cleared;
220 // downstream mappings have PTE FaultOnWrite cleared if they
221 // are not downstream of different mapping with VA
222 // FaultOnWrite set. Downstream mappings should never have
223 // PTE FaultOnWrite clear if VA FaultOnWrite is set; if the
224 // downstream mapping had been cow-broken, it would have been
225 // removed from this physpage's rmap list.
227 if (flags.FaultOnWrite && node != &head->head && !still_cow)
231 flags.FaultOnWrite = 1;
233 flags.FaultOnWrite = 0;
235 rmap->va->aspace->page_table->map(region, flags);
237 assert(off == RMapNode::tail_offset);
240 RMapNode *rmap = node->listentry(RMapNode, tail);
242 // We've finished the downstreams of a COW mapping,
243 // so stop marking pages as COW.
245 if (rmap == still_cow)
251 } while (node != &head->tail);
256 Lock::Lock rmap_lock;