]> git.buserror.net Git - polintos/scott/priv.git/blob - kernel/mem/rmap.cc
0b08b74b5e7c7a7b43b0741e900f2bf6befa0d08
[polintos/scott/priv.git] / kernel / mem / rmap.cc
1 // mem/rmap.cc -- Reverse mapping from physical page frames (or 
2 // intermediate address spaces) to mappers.
3 //
4 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
5 // 
6 // Permission is hereby granted, free of charge, to any person obtaining a copy of
7 // this software and associated documentation files (the "Software"), to deal with
8 // the Software without restriction, including without limitation the rights to
9 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 // of the Software, and to permit persons to whom the Software is furnished to do
11 // so, subject to the following conditions:
12 // 
13 //     * Redistributions of source code must retain the above copyright notice,
14 //       this list of conditions and the following disclaimers.
15 // 
16 //     * Redistributions in binary form must reproduce the above copyright notice,
17 //       this list of conditions and the following disclaimers in the
18 //       documentation and/or other materials provided with the distribution.
19 // 
20 //     * The names of the Software's authors and/or contributors
21 //       may not be used to endorse or promote products derived from
22 //       this Software without specific prior written permission.
23 // 
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
26 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
27 // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
30 // SOFTWARE.
31
32 #include <kern/mem.h>
33 #include <kern/pagealloc.h>
34 #include <util/misc.h>
35
36 namespace Mem {
37         using Util::round_up;
38         // static uint rmaps_per_page = Arch::page_size / sizeof(RMapNode);
39         
40         // If RMapNode's length becomes something other than 8 longs,
41         // change "3" to the base-2 log of the number of longs.
42         
43         static int rmap_shift = Arch::page_shift - _LL_LONG_LOGBYTES - 3;
44         
45         // static int rmap_dirs_per_page = Arch::page_size / sizeof(RMapNode *);
46         static int rmap_dir_shift = Arch::page_shift - _LL_LONG_LOGBYTES;
47         static int rmap_lastlevel_shift = rmap_shift + Arch::page_shift;
48         
49         static int rmap_dir_levels = (64 - rmap_lastlevel_shift - 1) 
50                                      / rmap_dir_shift;
51
52         static int rmap_toplevel_shift = rmap_dir_shift * rmap_dir_levels
53                                        + rmap_lastlevel_shift;
54         
55         static inline u64 addr_to_dir_offset(u64 addr, int shift)
56         {
57                 return (addr >> shift) & ((1ULL << rmap_dir_shift) - 1);
58         }
59
60         static inline u64 addr_to_offset(u64 addr)
61         {
62                 return (addr >> Arch::page_shift) & ((1ULL << rmap_shift) - 1);
63         }
64
65         RMapTable::RMapTable()
66         {
67                 // All RMap tables must have at least one dir level, in order to
68                 // simplify the code.  If it turns out that a lot of memory is
69                 // wasted due to this, the code could be made more complex in order
70                 // to allow one-level rmap tables.  Currently, on 4KiB-page systems,
71                 // a page is wasted per under-512KiB aspace (32-bit) or under-256KiB
72                 // aspace (64-bit).
73                 //
74                 // Dynamic levels would have to be implemented in generic-pte for
75                 // the wastage here to be meaningful.
76                 
77                 toplevel_shift = rmap_lastlevel_shift;
78                 toplevel = Mem::alloc_pages(1);
79                 bzero(toplevel, Arch::page_size);
80         }
81
82         RMapNode *RMapTable::get_rmap(u64 virtaddr, bool add)
83         {
84                 assert(rmap_lock.held_by_curthread());
85                 int shift = toplevel_shift;
86                 void *table = toplevel;
87                 
88                 while (toplevel_shift < rmap_toplevel_shift && 
89                        (virtaddr >> (toplevel_shift + rmap_dir_shift)))
90                 {
91                         if (!add)
92                                 return NULL;
93                         
94                         shift += rmap_dir_shift;
95                         toplevel_shift += rmap_dir_shift;
96
97                         toplevel = Mem::alloc_pages(1);
98                         bzero(toplevel, Arch::page_size);
99
100                         static_cast<void **>(toplevel)[0] = table;
101                         table = toplevel;
102                 }
103                 
104                 while (shift >= rmap_lastlevel_shift) {
105                         int off = addr_to_dir_offset(virtaddr, shift);
106                         void *new_table = static_cast<void **>(table)[off];
107                         
108                         if (!new_table) {
109                                 new_table = Mem::alloc_pages(1);
110                                 bzero(new_table, Arch::page_size);
111                                 static_cast<void **>(table)[off] = new_table;
112                         }
113                         
114                         table = new_table;
115                         shift -= rmap_dir_shift;
116                 }
117                 
118                 assert(shift + rmap_dir_shift - rmap_shift == Arch::page_shift);
119
120                 int off = addr_to_offset(virtaddr);
121                 return &static_cast<RMapNode *>(table)[off];
122         }
123
124         void RMapTable::map(VirtualArea *dsva, PageTable *usptbl,
125                                  u64 dsvaddr, u64 usvaddr)
126         {
127                 RMapNode *dsrmap = dsva->aspace->page_table->
128                                    rmap_table.get_rmap(dsvaddr, true);
129
130                 assert(!dsrmap->va);
131                 dsrmap->va = dsva;
132                 dsrmap->vaddr = page_align(dsvaddr);
133                 dsrmap->head.init();
134                 dsrmap->tail.init();
135                 
136                 if (usptbl) {
137                         RMapNode *usrmap = usptbl->rmap_table.get_rmap(usvaddr);
138                         assert(usrmap);
139                         assert(usrmap->va->aspace->page_table == usptbl);
140
141                         usrmap->head.add_front(&dsrmap->head);
142                 } else {
143                         // FIXME: If it ends up being useful, link into the phys-page
144                         // rmap list.
145                 }
146
147                 dsrmap->head.add_front(&dsrmap->tail);
148         }
149
150         void RMapTable::unmap(u64 virtaddr)
151         {
152                 Lock::AutoLock autolock(rmap_lock);
153                 RMapNode *head = get_rmap(virtaddr);
154                 
155                 if (!head || !head->va)
156                         return;
157
158                 assert(head->vaddr == virtaddr);
159
160                 Util::ListNoAutoInit *node = &head->head, *oldnode;
161                 
162                 do {
163                         ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
164                         if (off == RMapNode::head_offset) {
165                                 RMapNode *rmap = node->listentry(RMapNode, head);
166                         
167                                 Region region = { rmap->vaddr,
168                                                   rmap->vaddr + Arch::page_shift - 1 };
169
170                                 rmap->va->aspace->page_table->unmap(region);
171                                 rmap->va = NULL;
172                         } else {
173                                 assert(off == RMapNode::tail_offset);
174                         }
175
176                         oldnode = node;
177                         node = node->next;
178                         oldnode->del();
179                 } while (node != &head->tail);
180                 
181                 node->del();
182         }
183
184         void RMapTable::break_copy_on_write(u64 virtaddr, Page *new_page)
185         {
186                 assert(rmap_lock.held_by_curthread());
187                 RMapNode *head = get_rmap(virtaddr);
188                 RMapNode *still_cow = NULL;
189                 
190                 assert(head && head->va);
191                 assert(head->vaddr == virtaddr);
192                 
193                 // If there are sibling or upstream mappings of this page,
194                 // detach the rmap list.
195                 
196                 if (head->head.prev != &head->tail) {
197                         head->head.prev->next = head->tail.next;
198                         head->tail.next->prev = head->head.prev;
199                         
200                         head->head.prev = &head->tail;
201                         head->tail.next = &head->head;
202                 }
203
204                 assert(head->tail.next == &head->head);
205                 Util::ListNoAutoInit *node = &head->head;
206                 
207                 do {
208                         ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
209                         if (off == RMapNode::head_offset) {
210                                 RMapNode *rmap = node->listentry(RMapNode, head);
211                                 RegionWithOffset region;
212                                 
213                                 region.start = rmap->vaddr;
214                                 region.end = rmap->vaddr + Arch::page_shift - 1;
215                                 region.offset = page_to_phys(new_page);
216                                 
217                                 PTEFlags flags = rmap->va->flags;
218                                 
219                                 // The faulting mapping always has PTE FaultOnWrite cleared;
220                                 // downstream mappings have PTE FaultOnWrite cleared if they
221                                 // are not downstream of different mapping with VA
222                                 // FaultOnWrite set.  Downstream mappings should never have
223                                 // PTE FaultOnWrite clear if VA FaultOnWrite is set; if the
224                                 // downstream mapping had been cow-broken, it would have been
225                                 // removed from this physpage's rmap list.
226                                 
227                                 if (flags.FaultOnWrite && node != &head->head && !still_cow)
228                                         still_cow = rmap;
229                                 
230                                 if (still_cow)
231                                         flags.FaultOnWrite = 1;
232                                 else
233                                         flags.FaultOnWrite = 0;
234                                 
235                                 rmap->va->aspace->page_table->map(region, flags);
236                         } else {
237                                 assert(off == RMapNode::tail_offset);
238                                 
239                                 if (still_cow) {
240                                         RMapNode *rmap = node->listentry(RMapNode, tail);
241
242                                         // We've finished the downstreams of a COW mapping,
243                                         // so stop marking pages as COW.
244
245                                         if (rmap == still_cow)
246                                                 still_cow = NULL;
247                                 }
248                         }
249
250                         node = node->next;
251                 } while (node != &head->tail);
252                 
253                 assert(!still_cow);
254         }
255
256         Lock::Lock rmap_lock;
257 }