1 // mem/rmap.cc -- Reverse mapping from physical page frames (or
2 // intermediate address spaces) to mappers.
4 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
6 // This software is provided 'as-is', without any express or implied warranty.
7 // In no event will the authors or contributors be held liable for any damages
8 // arising from the use of this software.
10 // Permission is hereby granted to everyone, free of charge, to use, copy,
11 // modify, prepare derivative works of, publish, distribute, perform,
12 // sublicense, and/or sell copies of the Software, provided that the above
13 // copyright notice and disclaimer of warranty be included in all copies or
14 // substantial portions of this software.
17 #include <kern/pagealloc.h>
18 #include <util/misc.h>
22 // static uint rmaps_per_page = Arch::page_size / sizeof(RMapNode);
24 // If RMapNode's length becomes something other than 8 longs,
25 // change "3" to the base-2 log of the number of longs.
27 static int rmap_shift = Arch::page_shift - _LL_LONG_LOGBYTES - 3;
29 // static int rmap_dirs_per_page = Arch::page_size / sizeof(RMapNode *);
30 static int rmap_dir_shift = Arch::page_shift - _LL_LONG_LOGBYTES;
31 static int rmap_lastlevel_shift = rmap_shift + Arch::page_shift;
33 static int rmap_dir_levels = (64 - rmap_lastlevel_shift - 1)
36 static int rmap_toplevel_shift = rmap_dir_shift * rmap_dir_levels
37 + rmap_lastlevel_shift;
39 static inline u64 addr_to_dir_offset(u64 addr, int shift)
41 return (addr >> shift) & ((1ULL << rmap_dir_shift) - 1);
44 static inline u64 addr_to_offset(u64 addr)
46 return (addr >> Arch::page_shift) & ((1ULL << rmap_shift) - 1);
49 RMapTable::RMapTable()
51 // All RMap tables must have at least one dir level, in order to
52 // simplify the code. If it turns out that a lot of memory is
53 // wasted due to this, the code could be made more complex in order
54 // to allow one-level rmap tables. Currently, on 4KiB-page systems,
55 // a page is wasted per under-512KiB aspace (32-bit) or under-256KiB
58 // Dynamic levels would have to be implemented in generic-pte for
59 // the wastage here to be meaningful.
61 toplevel_shift = rmap_lastlevel_shift;
62 toplevel = Mem::alloc_pages(1);
63 bzero(toplevel, Arch::page_size);
66 RMapNode *RMapTable::get_rmap(u64 virtaddr, bool add)
68 assert(rmap_lock.held_by_curthread());
69 int shift = toplevel_shift;
70 void *table = toplevel;
72 while (toplevel_shift < rmap_toplevel_shift &&
73 (virtaddr >> (toplevel_shift + rmap_dir_shift)))
78 shift += rmap_dir_shift;
79 toplevel_shift += rmap_dir_shift;
81 toplevel = Mem::alloc_pages(1);
82 bzero(toplevel, Arch::page_size);
84 static_cast<void **>(toplevel)[0] = table;
88 while (shift >= rmap_lastlevel_shift) {
89 int off = addr_to_dir_offset(virtaddr, shift);
90 void *new_table = static_cast<void **>(table)[off];
93 new_table = Mem::alloc_pages(1);
94 bzero(new_table, Arch::page_size);
95 static_cast<void **>(table)[off] = new_table;
99 shift -= rmap_dir_shift;
102 assert(shift + rmap_dir_shift - rmap_shift == Arch::page_shift);
104 int off = addr_to_offset(virtaddr);
105 return &static_cast<RMapNode *>(table)[off];
108 void RMapTable::map(VirtualArea *dsva, PageTable *usptbl,
109 u64 dsvaddr, u64 usvaddr)
111 RMapNode *dsrmap = dsva->aspace->page_table->
112 rmap_table.get_rmap(dsvaddr, true);
116 dsrmap->vaddr = page_align(dsvaddr);
121 RMapNode *usrmap = usptbl->rmap_table.get_rmap(usvaddr);
123 assert(usrmap->va->aspace->page_table == usptbl);
125 usrmap->head.add_front(&dsrmap->head);
127 // FIXME: If it ends up being useful, link into the phys-page
131 dsrmap->head.add_front(&dsrmap->tail);
134 void RMapTable::unmap(u64 virtaddr)
136 Lock::AutoLock autolock(rmap_lock);
137 RMapNode *head = get_rmap(virtaddr);
139 if (!head || !head->va)
142 assert(head->vaddr == virtaddr);
144 Util::ListNoAutoInit *node = &head->head, *oldnode;
147 ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
148 if (off == RMapNode::head_offset) {
149 RMapNode *rmap = node->listentry(RMapNode, head);
151 Region region = { rmap->vaddr,
152 rmap->vaddr + Arch::page_shift - 1 };
154 rmap->va->aspace->page_table->unmap(region);
157 assert(off == RMapNode::tail_offset);
163 } while (node != &head->tail);
168 void RMapTable::break_copy_on_write(u64 virtaddr, Page *new_page)
170 assert(rmap_lock.held_by_curthread());
171 RMapNode *head = get_rmap(virtaddr);
172 RMapNode *still_cow = NULL;
174 assert(head && head->va);
175 assert(head->vaddr == virtaddr);
177 // If there are sibling or upstream mappings of this page,
178 // detach the rmap list.
180 if (head->head.prev != &head->tail) {
181 head->head.prev->next = head->tail.next;
182 head->tail.next->prev = head->head.prev;
184 head->head.prev = &head->tail;
185 head->tail.next = &head->head;
188 assert(head->tail.next == &head->head);
189 Util::ListNoAutoInit *node = &head->head;
192 ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
193 if (off == RMapNode::head_offset) {
194 RMapNode *rmap = node->listentry(RMapNode, head);
195 RegionWithOffset region;
197 region.start = rmap->vaddr;
198 region.end = rmap->vaddr + Arch::page_shift - 1;
199 region.offset = page_to_phys(new_page);
201 PTEFlags flags = rmap->va->flags;
203 // The faulting mapping always has PTE FaultOnWrite cleared;
204 // downstream mappings have PTE FaultOnWrite cleared if they
205 // are not downstream of different mapping with VA
206 // FaultOnWrite set. Downstream mappings should never have
207 // PTE FaultOnWrite clear if VA FaultOnWrite is set; if the
208 // downstream mapping had been cow-broken, it would have been
209 // removed from this physpage's rmap list.
211 if (flags.FaultOnWrite && node != &head->head && !still_cow)
215 flags.FaultOnWrite = 1;
217 flags.FaultOnWrite = 0;
219 rmap->va->aspace->page_table->map(region, flags);
221 assert(off == RMapNode::tail_offset);
224 RMapNode *rmap = node->listentry(RMapNode, tail);
226 // We've finished the downstreams of a COW mapping,
227 // so stop marking pages as COW.
229 if (rmap == still_cow)
235 } while (node != &head->tail);
240 Lock::Lock rmap_lock;