]> git.buserror.net Git - polintos/scott/priv.git/blob - kernel/mem/rmap.cc
License change.
[polintos/scott/priv.git] / kernel / mem / rmap.cc
1 // mem/rmap.cc -- Reverse mapping from physical page frames (or 
2 // intermediate address spaces) to mappers.
3 //
4 // This software is copyright (c) 2006 Scott Wood <scott@buserror.net>.
5 // 
6 // This software is provided 'as-is', without any express or implied warranty.
7 // In no event will the authors or contributors be held liable for any damages
8 // arising from the use of this software.
9 // 
10 // Permission is hereby granted to everyone, free of charge, to use, copy,
11 // modify, prepare derivative works of, publish, distribute, perform,
12 // sublicense, and/or sell copies of the Software, provided that the above
13 // copyright notice and disclaimer of warranty be included in all copies or
14 // substantial portions of this software.
15
16 #include <kern/mem.h>
17 #include <kern/pagealloc.h>
18 #include <util/misc.h>
19
20 namespace Mem {
21         using Util::round_up;
22         // static uint rmaps_per_page = Arch::page_size / sizeof(RMapNode);
23         
24         // If RMapNode's length becomes something other than 8 longs,
25         // change "3" to the base-2 log of the number of longs.
26         
27         static int rmap_shift = Arch::page_shift - _LL_LONG_LOGBYTES - 3;
28         
29         // static int rmap_dirs_per_page = Arch::page_size / sizeof(RMapNode *);
30         static int rmap_dir_shift = Arch::page_shift - _LL_LONG_LOGBYTES;
31         static int rmap_lastlevel_shift = rmap_shift + Arch::page_shift;
32         
33         static int rmap_dir_levels = (64 - rmap_lastlevel_shift - 1) 
34                                      / rmap_dir_shift;
35
36         static int rmap_toplevel_shift = rmap_dir_shift * rmap_dir_levels
37                                        + rmap_lastlevel_shift;
38         
39         static inline u64 addr_to_dir_offset(u64 addr, int shift)
40         {
41                 return (addr >> shift) & ((1ULL << rmap_dir_shift) - 1);
42         }
43
44         static inline u64 addr_to_offset(u64 addr)
45         {
46                 return (addr >> Arch::page_shift) & ((1ULL << rmap_shift) - 1);
47         }
48
49         RMapTable::RMapTable()
50         {
51                 // All RMap tables must have at least one dir level, in order to
52                 // simplify the code.  If it turns out that a lot of memory is
53                 // wasted due to this, the code could be made more complex in order
54                 // to allow one-level rmap tables.  Currently, on 4KiB-page systems,
55                 // a page is wasted per under-512KiB aspace (32-bit) or under-256KiB
56                 // aspace (64-bit).
57                 //
58                 // Dynamic levels would have to be implemented in generic-pte for
59                 // the wastage here to be meaningful.
60                 
61                 toplevel_shift = rmap_lastlevel_shift;
62                 toplevel = Mem::alloc_pages(1);
63                 bzero(toplevel, Arch::page_size);
64         }
65
66         RMapNode *RMapTable::get_rmap(u64 virtaddr, bool add)
67         {
68                 assert(rmap_lock.held_by_curthread());
69                 int shift = toplevel_shift;
70                 void *table = toplevel;
71                 
72                 while (toplevel_shift < rmap_toplevel_shift && 
73                        (virtaddr >> (toplevel_shift + rmap_dir_shift)))
74                 {
75                         if (!add)
76                                 return NULL;
77                         
78                         shift += rmap_dir_shift;
79                         toplevel_shift += rmap_dir_shift;
80
81                         toplevel = Mem::alloc_pages(1);
82                         bzero(toplevel, Arch::page_size);
83
84                         static_cast<void **>(toplevel)[0] = table;
85                         table = toplevel;
86                 }
87                 
88                 while (shift >= rmap_lastlevel_shift) {
89                         int off = addr_to_dir_offset(virtaddr, shift);
90                         void *new_table = static_cast<void **>(table)[off];
91                         
92                         if (!new_table) {
93                                 new_table = Mem::alloc_pages(1);
94                                 bzero(new_table, Arch::page_size);
95                                 static_cast<void **>(table)[off] = new_table;
96                         }
97                         
98                         table = new_table;
99                         shift -= rmap_dir_shift;
100                 }
101                 
102                 assert(shift + rmap_dir_shift - rmap_shift == Arch::page_shift);
103
104                 int off = addr_to_offset(virtaddr);
105                 return &static_cast<RMapNode *>(table)[off];
106         }
107
108         void RMapTable::map(VirtualArea *dsva, PageTable *usptbl,
109                                  u64 dsvaddr, u64 usvaddr)
110         {
111                 RMapNode *dsrmap = dsva->aspace->page_table->
112                                    rmap_table.get_rmap(dsvaddr, true);
113
114                 assert(!dsrmap->va);
115                 dsrmap->va = dsva;
116                 dsrmap->vaddr = page_align(dsvaddr);
117                 dsrmap->head.init();
118                 dsrmap->tail.init();
119                 
120                 if (usptbl) {
121                         RMapNode *usrmap = usptbl->rmap_table.get_rmap(usvaddr);
122                         assert(usrmap);
123                         assert(usrmap->va->aspace->page_table == usptbl);
124
125                         usrmap->head.add_front(&dsrmap->head);
126                 } else {
127                         // FIXME: If it ends up being useful, link into the phys-page
128                         // rmap list.
129                 }
130
131                 dsrmap->head.add_front(&dsrmap->tail);
132         }
133
134         void RMapTable::unmap(u64 virtaddr)
135         {
136                 Lock::AutoLock autolock(rmap_lock);
137                 RMapNode *head = get_rmap(virtaddr);
138                 
139                 if (!head || !head->va)
140                         return;
141
142                 assert(head->vaddr == virtaddr);
143
144                 Util::ListNoAutoInit *node = &head->head, *oldnode;
145                 
146                 do {
147                         ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
148                         if (off == RMapNode::head_offset) {
149                                 RMapNode *rmap = node->listentry(RMapNode, head);
150                         
151                                 Region region = { rmap->vaddr,
152                                                   rmap->vaddr + Arch::page_shift - 1 };
153
154                                 rmap->va->aspace->page_table->unmap(region);
155                                 rmap->va = NULL;
156                         } else {
157                                 assert(off == RMapNode::tail_offset);
158                         }
159
160                         oldnode = node;
161                         node = node->next;
162                         oldnode->del();
163                 } while (node != &head->tail);
164                 
165                 node->del();
166         }
167
168         void RMapTable::break_copy_on_write(u64 virtaddr, Page *new_page)
169         {
170                 assert(rmap_lock.held_by_curthread());
171                 RMapNode *head = get_rmap(virtaddr);
172                 RMapNode *still_cow = NULL;
173                 
174                 assert(head && head->va);
175                 assert(head->vaddr == virtaddr);
176                 
177                 // If there are sibling or upstream mappings of this page,
178                 // detach the rmap list.
179                 
180                 if (head->head.prev != &head->tail) {
181                         head->head.prev->next = head->tail.next;
182                         head->tail.next->prev = head->head.prev;
183                         
184                         head->head.prev = &head->tail;
185                         head->tail.next = &head->head;
186                 }
187
188                 assert(head->tail.next == &head->head);
189                 Util::ListNoAutoInit *node = &head->head;
190                 
191                 do {
192                         ulong off = reinterpret_cast<ulong>(node) & (sizeof(RMapNode) - 1);
193                         if (off == RMapNode::head_offset) {
194                                 RMapNode *rmap = node->listentry(RMapNode, head);
195                                 RegionWithOffset region;
196                                 
197                                 region.start = rmap->vaddr;
198                                 region.end = rmap->vaddr + Arch::page_shift - 1;
199                                 region.offset = page_to_phys(new_page);
200                                 
201                                 PTEFlags flags = rmap->va->flags;
202                                 
203                                 // The faulting mapping always has PTE FaultOnWrite cleared;
204                                 // downstream mappings have PTE FaultOnWrite cleared if they
205                                 // are not downstream of different mapping with VA
206                                 // FaultOnWrite set.  Downstream mappings should never have
207                                 // PTE FaultOnWrite clear if VA FaultOnWrite is set; if the
208                                 // downstream mapping had been cow-broken, it would have been
209                                 // removed from this physpage's rmap list.
210                                 
211                                 if (flags.FaultOnWrite && node != &head->head && !still_cow)
212                                         still_cow = rmap;
213                                 
214                                 if (still_cow)
215                                         flags.FaultOnWrite = 1;
216                                 else
217                                         flags.FaultOnWrite = 0;
218                                 
219                                 rmap->va->aspace->page_table->map(region, flags);
220                         } else {
221                                 assert(off == RMapNode::tail_offset);
222                                 
223                                 if (still_cow) {
224                                         RMapNode *rmap = node->listentry(RMapNode, tail);
225
226                                         // We've finished the downstreams of a COW mapping,
227                                         // so stop marking pages as COW.
228
229                                         if (rmap == still_cow)
230                                                 still_cow = NULL;
231                                 }
232                         }
233
234                         node = node->next;
235                 } while (node != &head->tail);
236                 
237                 assert(!still_cow);
238         }
239
240         Lock::Lock rmap_lock;
241 }