Switch to a simple X11-style license.
[polintos/scott/priv.git] / kernel / mem / rmap.cc
1 // mem/rmap.cc -- Reverse mapping from physical page frames (or 
2 // intermediate address spaces) to mappers.
3 //
4 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
5 // 
6 // Permission is hereby granted, free of charge, to any person obtaining a copy of
7 // this software and associated documentation files (the "Software"), to deal with
8 // the Software without restriction, including without limitation the rights to
9 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 // of the Software, and to permit persons to whom the Software is furnished to do
11 // so, subject to the following condition:
12 // 
13 // The above copyright notice and this permission notice shall be
14 // included in all copies or substantial portions of the Software.
15 // 
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
19 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
22 // SOFTWARE.
23
24 #include <kern/mem.h>
25 #include <kern/pagealloc.h>
26 #include <util/misc.h>
27
28 namespace Mem {
29         using Util::round_up;
30         // static uint rmaps_per_page = Arch::page_size / sizeof(RMapNode);
31         
32         // If RMapNode's length becomes something other than 8 longs,
33         // change "3" to the base-2 log of the number of longs.
34         
35         static int rmap_shift = Arch::page_shift - _LL_LONG_LOGBYTES - 3;
36         
37         // static int rmap_dirs_per_page = Arch::page_size / sizeof(RMapNode *);
38         static int rmap_dir_shift = Arch::page_shift - _LL_LONG_LOGBYTES;
39         static int rmap_lastlevel_shift = rmap_shift + Arch::page_shift;
40         
41         static int rmap_dir_levels = (64 - rmap_lastlevel_shift - 1) 
42                                      / rmap_dir_shift;
43
44         static int rmap_toplevel_shift = rmap_dir_shift * rmap_dir_levels
45                                        + rmap_lastlevel_shift;
46         
47         static inline u64 addr_to_dir_offset(u64 addr, int shift)
48         {
49                 return (addr >> shift) & ((1ULL << rmap_dir_shift) - 1);
50         }
51
52         static inline u64 addr_to_offset(u64 addr)
53         {
54                 return (addr >> Arch::page_shift) & ((1ULL << rmap_shift) - 1);
55         }
56
57         RMapTable::RMapTable()
58         {
59                 // All RMap tables must have at least one dir level, in order to
60                 // simplify the code.  If it turns out that a lot of memory is
61                 // wasted due to this, the code could be made more complex in order
62                 // to allow one-level rmap tables.  Currently, on 4KiB-page systems,
63                 // a page is wasted per under-512KiB aspace (32-bit) or under-256KiB
64                 // aspace (64-bit).
65                 //
66                 // Dynamic levels would have to be implemented in generic-pte for
67                 // the wastage here to be meaningful.
68                 
69                 toplevel_shift = rmap_lastlevel_shift;
70                 toplevel = Mem::alloc_pages(1);
71                 bzero(toplevel, Arch::page_size);
72         }
73
74         RMapNode *RMapTable::get_rmap(u64 virtaddr, bool add)
75         {
76                 assert(rmap_lock.held_by_curthread());
77                 int shift = toplevel_shift;
78                 void *table = toplevel;
79                 
80                 while (toplevel_shift < rmap_toplevel_shift && 
81                        (virtaddr >> (toplevel_shift + rmap_dir_shift)))
82                 {
83                         if (!add)
84                                 return NULL;
85                         
86                         shift += rmap_dir_shift;
87                         toplevel_shift += rmap_dir_shift;
88
89                         toplevel = Mem::alloc_pages(1);
90                         bzero(toplevel, Arch::page_size);
91
92                         static_cast<void **>(toplevel)[0] = table;
93                         table = toplevel;
94                 }
95                 
96                 while (shift >= rmap_lastlevel_shift) {
97                         int off = addr_to_dir_offset(virtaddr, shift);
98                         void *new_table = static_cast<void **>(table)[off];
99                         
100                         if (!new_table) {
101                                 new_table = Mem::alloc_pages(1);
102                                 bzero(new_table, Arch::page_size);
103                                 static_cast<void **>(table)[off] = new_table;
104                         }
105                         
106                         table = new_table;
107                         shift -= rmap_dir_shift;
108                 }
109                 
110                 assert(shift + rmap_dir_shift - rmap_shift == Arch::page_shift);
111
112                 int off = addr_to_offset(virtaddr);
113                 return &static_cast<RMapNode *>(table)[off];
114         }
115
116         void RMapTable::map(VirtualArea *dsva, PageTable *usptbl,
117                                  u64 dsvaddr, u64 usvaddr)
118         {
119                 RMapNode *dsrmap = dsva->aspace->page_table->
120                                    rmap_table.get_rmap(dsvaddr, true);
121
122                 assert(!dsrmap->va);
123                 dsrmap->va = dsva;
124                 dsrmap->vaddr = page_align(dsvaddr);
125                 dsrmap->head.init();
126                 dsrmap->tail.init();
127                 
128                 if (usptbl) {
129                         RMapNode *usrmap = usptbl->rmap_table.get_rmap(usvaddr);
130                         assert(usrmap);
131                         assert(usrmap->va->aspace->page_table == usptbl);
132
133                         usrmap->head.add_front(&dsrmap->head);
134                 } else {
135                         // FIXME: If it ends up being useful, link into the phys-page
136                         // rmap list.
137                 }
138
139                 dsrmap->head.add_front(&dsrmap->tail);
140         }
141
142         void RMapTable::unmap(u64 virtaddr)
143         {
144                 Lock::AutoLock autolock(rmap_lock);
145                 RMapNode *head = get_rmap(virtaddr);
146                 
147                 if (!head || !head->va)
148                         return;
149
150                 assert(head->vaddr == virtaddr);
151
152                 Util::ListNoAutoInit *node = &head->head, *oldnode;
153                 
154                 do {
155                         ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
156                         if (off == RMapNode::head_offset) {
157                                 RMapNode *rmap = node->listentry(RMapNode, head);
158                         
159                                 Region region = { rmap->vaddr,
160                                                   rmap->vaddr + Arch::page_shift - 1 };
161
162                                 rmap->va->aspace->page_table->unmap(region);
163                                 rmap->va = NULL;
164                         } else {
165                                 assert(off == RMapNode::tail_offset);
166                         }
167
168                         oldnode = node;
169                         node = node->next;
170                         oldnode->del();
171                 } while (node != &head->tail);
172                 
173                 node->del();
174         }
175
176         void RMapTable::break_copy_on_write(u64 virtaddr, Page *new_page)
177         {
178                 assert(rmap_lock.held_by_curthread());
179                 RMapNode *head = get_rmap(virtaddr);
180                 RMapNode *still_cow = NULL;
181                 
182                 assert(head && head->va);
183                 assert(head->vaddr == virtaddr);
184                 
185                 // If there are sibling or upstream mappings of this page,
186                 // detach the rmap list.
187                 
188                 if (head->head.prev != &head->tail) {
189                         head->head.prev->next = head->tail.next;
190                         head->tail.next->prev = head->head.prev;
191                         
192                         head->head.prev = &head->tail;
193                         head->tail.next = &head->head;
194                 }
195
196                 assert(head->tail.next == &head->head);
197                 Util::ListNoAutoInit *node = &head->head;
198                 
199                 do {
200                         ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
201                         if (off == RMapNode::head_offset) {
202                                 RMapNode *rmap = node->listentry(RMapNode, head);
203                                 RegionWithOffset region;
204                                 
205                                 region.start = rmap->vaddr;
206                                 region.end = rmap->vaddr + Arch::page_shift - 1;
207                                 region.offset = page_to_phys(new_page);
208                                 
209                                 PTEFlags flags = rmap->va->flags;
210                                 
211                                 // The faulting mapping always has PTE FaultOnWrite cleared;
212                                 // downstream mappings have PTE FaultOnWrite cleared if they
213                                 // are not downstream of different mapping with VA
214                                 // FaultOnWrite set.  Downstream mappings should never have
215                                 // PTE FaultOnWrite clear if VA FaultOnWrite is set; if the
216                                 // downstream mapping had been cow-broken, it would have been
217                                 // removed from this physpage's rmap list.
218                                 
219                                 if (flags.FaultOnWrite && node != &head->head && !still_cow)
220                                         still_cow = rmap;
221                                 
222                                 if (still_cow)
223                                         flags.FaultOnWrite = 1;
224                                 else
225                                         flags.FaultOnWrite = 0;
226                                 
227                                 rmap->va->aspace->page_table->map(region, flags);
228                         } else {
229                                 assert(off == RMapNode::tail_offset);
230                                 
231                                 if (still_cow) {
232                                         RMapNode *rmap = node->listentry(RMapNode, tail);
233
234                                         // We've finished the downstreams of a COW mapping,
235                                         // so stop marking pages as COW.
236
237                                         if (rmap == still_cow)
238                                                 still_cow = NULL;
239                                 }
240                         }
241
242                         node = node->next;
243                 } while (node != &head->tail);
244                 
245                 assert(!still_cow);
246         }
247
248         Lock::Lock rmap_lock;
249 }