FreeBSD Bugzilla – Attachment 246076 Details for
Bug 274592
bus_dmamem_alloc()/bus_dmamap_load returns physical address outside the tag low/high bounds
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
vm_phys_find_freelist_contig, but without using adjacent smaller blocks
vm_phys.c (text/plain), 49.88 KB, created by
Doug Moore
on 2023-11-03 08:45:44 UTC
(
hide
)
Description:
vm_phys_find_freelist_contig, but without using adjacent smaller blocks
Filename:
MIME Type:
Creator:
Doug Moore
Created:
2023-11-03 08:45:44 UTC
Size:
49.88 KB
patch
obsolete
>/*- > * SPDX-License-Identifier: BSD-2-Clause > * > * Copyright (c) 2002-2006 Rice University > * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> > * All rights reserved. > * > * This software was developed for the FreeBSD Project by Alan L. Cox, > * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. > * > * Redistribution and use in source and binary forms, with or without > * modification, are permitted provided that the following conditions > * are met: > * 1. Redistributions of source code must retain the above copyright > * notice, this list of conditions and the following disclaimer. > * 2. Redistributions in binary form must reproduce the above copyright > * notice, this list of conditions and the following disclaimer in the > * documentation and/or other materials provided with the distribution. > * > * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, > * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, > * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS > * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED > * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT > * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY > * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE > * POSSIBILITY OF SUCH DAMAGE. > */ > >/* > * Physical memory system implementation > * > * Any external functions defined by this module are only to be used by the > * virtual memory system. > */ > >#include <sys/cdefs.h> >#include "opt_ddb.h" >#include "opt_vm.h" > >#include <sys/param.h> >#include <sys/systm.h> >#include <sys/domainset.h> >#include <sys/lock.h> >#include <sys/kernel.h> >#include <sys/malloc.h> >#include <sys/mutex.h> >#include <sys/proc.h> >#include <sys/queue.h> >#include <sys/rwlock.h> >#include <sys/sbuf.h> >#include <sys/sysctl.h> >#include <sys/tree.h> >#include <sys/vmmeter.h> > >#include <ddb/ddb.h> > >#include <vm/vm.h> >#include <vm/vm_extern.h> >#include <vm/vm_param.h> >#include <vm/vm_kern.h> >#include <vm/vm_object.h> >#include <vm/vm_page.h> >#include <vm/vm_phys.h> >#include <vm/vm_pagequeue.h> > >_Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, > "Too many physsegs."); >_Static_assert(sizeof(long long) >= sizeof(vm_paddr_t), > "vm_paddr_t too big for ffsll, flsll."); > >#ifdef NUMA >struct mem_affinity __read_mostly *mem_affinity; >int __read_mostly *mem_locality; > >static int numa_disabled; >static SYSCTL_NODE(_vm, OID_AUTO, numa, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, > "NUMA options"); >SYSCTL_INT(_vm_numa, OID_AUTO, disabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, > &numa_disabled, 0, "NUMA-awareness in the allocators is disabled"); >#endif > >int __read_mostly vm_ndomains = 1; >domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); > >struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; >int __read_mostly vm_phys_nsegs; >static struct vm_phys_seg vm_phys_early_segs[8]; >static int vm_phys_early_nsegs; > >struct vm_phys_fictitious_seg; >static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, > struct vm_phys_fictitious_seg *); > >RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = > RB_INITIALIZER(&vm_phys_fictitious_tree); > >struct vm_phys_fictitious_seg { > RB_ENTRY(vm_phys_fictitious_seg) node; > /* Memory region data */ > vm_paddr_t start; > vm_paddr_t end; > vm_page_t first_page; >}; > >RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, > vm_phys_fictitious_cmp); > >static struct rwlock_padalign vm_phys_fictitious_reg_lock; >MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); > >static struct vm_freelist __aligned(CACHE_LINE_SIZE) > vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL] > [VM_NFREEORDER_MAX]; > >static int __read_mostly vm_nfreelists; > >/* > * These "avail lists" are globals used to communicate boot-time physical > * memory layout to other parts of the kernel. Each physically contiguous > * region of memory is defined by a start address at an even index and an > * end address at the following odd index. Each list is terminated by a > * pair of zero entries. > * > * dump_avail tells the dump code what regions to include in a crash dump, and > * phys_avail is all of the remaining physical memory that is available for > * the vm system. > * > * Initially dump_avail and phys_avail are identical. Boot time memory > * allocations remove extents from phys_avail that may still be included > * in dumps. > */ >vm_paddr_t phys_avail[PHYS_AVAIL_COUNT]; >vm_paddr_t dump_avail[PHYS_AVAIL_COUNT]; > >/* > * Provides the mapping from VM_FREELIST_* to free list indices (flind). > */ >static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; > >CTASSERT(VM_FREELIST_DEFAULT == 0); > >#ifdef VM_FREELIST_DMA32 >#define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) >#endif > >/* > * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about > * the ordering of the free list boundaries. > */ >#if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) >CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); >#endif > >static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); >SYSCTL_OID(_vm, OID_AUTO, phys_free, > CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, > sysctl_vm_phys_free, "A", > "Phys Free Info"); > >static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); >SYSCTL_OID(_vm, OID_AUTO, phys_segs, > CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, > sysctl_vm_phys_segs, "A", > "Phys Seg Info"); > >#ifdef NUMA >static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); >SYSCTL_OID(_vm, OID_AUTO, phys_locality, > CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, > sysctl_vm_phys_locality, "A", > "Phys Locality Info"); >#endif > >SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, > &vm_ndomains, 0, "Number of physical memory domains available."); > >static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); >static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); >static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, > int order, int tail); > >/* > * Red-black tree helpers for vm fictitious range management. > */ >static inline int >vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, > struct vm_phys_fictitious_seg *range) >{ > > KASSERT(range->start != 0 && range->end != 0, > ("Invalid range passed on search for vm_fictitious page")); > if (p->start >= range->end) > return (1); > if (p->start < range->start) > return (-1); > > return (0); >} > >static int >vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, > struct vm_phys_fictitious_seg *p2) >{ > > /* Check if this is a search for a page */ > if (p1->end == 0) > return (vm_phys_fictitious_in_range(p1, p2)); > > KASSERT(p2->end != 0, > ("Invalid range passed as second parameter to vm fictitious comparison")); > > /* Searching to add a new range */ > if (p1->end <= p2->start) > return (-1); > if (p1->start >= p2->end) > return (1); > > panic("Trying to add overlapping vm fictitious ranges:\n" > "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, > (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); >} > >int >vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) >{ >#ifdef NUMA > domainset_t mask; > int i; > > if (vm_ndomains == 1 || mem_affinity == NULL) > return (0); > > DOMAINSET_ZERO(&mask); > /* > * Check for any memory that overlaps low, high. > */ > for (i = 0; mem_affinity[i].end != 0; i++) > if (mem_affinity[i].start <= high && > mem_affinity[i].end >= low) > DOMAINSET_SET(mem_affinity[i].domain, &mask); > if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) > return (prefer); > if (DOMAINSET_EMPTY(&mask)) > panic("vm_phys_domain_match: Impossible constraint"); > return (DOMAINSET_FFS(&mask) - 1); >#else > return (0); >#endif >} > >/* > * Outputs the state of the physical memory allocator, specifically, > * the amount of physical memory in each free list. > */ >static int >sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) >{ > struct sbuf sbuf; > struct vm_freelist *fl; > int dom, error, flind, oind, pind; > > error = sysctl_wire_old_buffer(req, 0); > if (error != 0) > return (error); > sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); > for (dom = 0; dom < vm_ndomains; dom++) { > sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); > for (flind = 0; flind < vm_nfreelists; flind++) { > sbuf_printf(&sbuf, "\nFREE LIST %d:\n" > "\n ORDER (SIZE) | NUMBER" > "\n ", flind); > for (pind = 0; pind < VM_NFREEPOOL; pind++) > sbuf_printf(&sbuf, " | POOL %d", pind); > sbuf_printf(&sbuf, "\n-- "); > for (pind = 0; pind < VM_NFREEPOOL; pind++) > sbuf_printf(&sbuf, "-- -- "); > sbuf_printf(&sbuf, "--\n"); > for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { > sbuf_printf(&sbuf, " %2d (%6dK)", oind, > 1 << (PAGE_SHIFT - 10 + oind)); > for (pind = 0; pind < VM_NFREEPOOL; pind++) { > fl = vm_phys_free_queues[dom][flind][pind]; > sbuf_printf(&sbuf, " | %6d", > fl[oind].lcnt); > } > sbuf_printf(&sbuf, "\n"); > } > } > } > error = sbuf_finish(&sbuf); > sbuf_delete(&sbuf); > return (error); >} > >/* > * Outputs the set of physical memory segments. > */ >static int >sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) >{ > struct sbuf sbuf; > struct vm_phys_seg *seg; > int error, segind; > > error = sysctl_wire_old_buffer(req, 0); > if (error != 0) > return (error); > sbuf_new_for_sysctl(&sbuf, NULL, 128, req); > for (segind = 0; segind < vm_phys_nsegs; segind++) { > sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); > seg = &vm_phys_segs[segind]; > sbuf_printf(&sbuf, "start: %#jx\n", > (uintmax_t)seg->start); > sbuf_printf(&sbuf, "end: %#jx\n", > (uintmax_t)seg->end); > sbuf_printf(&sbuf, "domain: %d\n", seg->domain); > sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); > } > error = sbuf_finish(&sbuf); > sbuf_delete(&sbuf); > return (error); >} > >/* > * Return affinity, or -1 if there's no affinity information. > */ >int >vm_phys_mem_affinity(int f, int t) >{ > >#ifdef NUMA > if (mem_locality == NULL) > return (-1); > if (f >= vm_ndomains || t >= vm_ndomains) > return (-1); > return (mem_locality[f * vm_ndomains + t]); >#else > return (-1); >#endif >} > >#ifdef NUMA >/* > * Outputs the VM locality table. > */ >static int >sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) >{ > struct sbuf sbuf; > int error, i, j; > > error = sysctl_wire_old_buffer(req, 0); > if (error != 0) > return (error); > sbuf_new_for_sysctl(&sbuf, NULL, 128, req); > > sbuf_printf(&sbuf, "\n"); > > for (i = 0; i < vm_ndomains; i++) { > sbuf_printf(&sbuf, "%d: ", i); > for (j = 0; j < vm_ndomains; j++) { > sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); > } > sbuf_printf(&sbuf, "\n"); > } > error = sbuf_finish(&sbuf); > sbuf_delete(&sbuf); > return (error); >} >#endif > >static void >vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) >{ > > m->order = order; > if (tail) > TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); > else > TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); > fl[order].lcnt++; >} > >static void >vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) >{ > > TAILQ_REMOVE(&fl[order].pl, m, listq); > fl[order].lcnt--; > m->order = VM_NFREEORDER; >} > >/* > * Create a physical memory segment. > */ >static void >_vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) >{ > struct vm_phys_seg *seg; > > KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, > ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); > KASSERT(domain >= 0 && domain < vm_ndomains, > ("vm_phys_create_seg: invalid domain provided")); > seg = &vm_phys_segs[vm_phys_nsegs++]; > while (seg > vm_phys_segs && (seg - 1)->start >= end) { > *seg = *(seg - 1); > seg--; > } > seg->start = start; > seg->end = end; > seg->domain = domain; >} > >static void >vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) >{ >#ifdef NUMA > int i; > > if (mem_affinity == NULL) { > _vm_phys_create_seg(start, end, 0); > return; > } > > for (i = 0;; i++) { > if (mem_affinity[i].end == 0) > panic("Reached end of affinity info"); > if (mem_affinity[i].end <= start) > continue; > if (mem_affinity[i].start > start) > panic("No affinity info for start %jx", > (uintmax_t)start); > if (mem_affinity[i].end >= end) { > _vm_phys_create_seg(start, end, > mem_affinity[i].domain); > break; > } > _vm_phys_create_seg(start, mem_affinity[i].end, > mem_affinity[i].domain); > start = mem_affinity[i].end; > } >#else > _vm_phys_create_seg(start, end, 0); >#endif >} > >/* > * Add a physical memory segment. > */ >void >vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) >{ > vm_paddr_t paddr; > > KASSERT((start & PAGE_MASK) == 0, > ("vm_phys_define_seg: start is not page aligned")); > KASSERT((end & PAGE_MASK) == 0, > ("vm_phys_define_seg: end is not page aligned")); > > /* > * Split the physical memory segment if it spans two or more free > * list boundaries. > */ > paddr = start; >#ifdef VM_FREELIST_LOWMEM > if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { > vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); > paddr = VM_LOWMEM_BOUNDARY; > } >#endif >#ifdef VM_FREELIST_DMA32 > if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { > vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); > paddr = VM_DMA32_BOUNDARY; > } >#endif > vm_phys_create_seg(paddr, end); >} > >/* > * Initialize the physical memory allocator. > * > * Requires that vm_page_array is initialized! > */ >void >vm_phys_init(void) >{ > struct vm_freelist *fl; > struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; >#if defined(VM_DMA32_NPAGES_THRESHOLD) || defined(VM_PHYSSEG_SPARSE) > u_long npages; >#endif > int dom, flind, freelist, oind, pind, segind; > > /* > * Compute the number of free lists, and generate the mapping from the > * manifest constants VM_FREELIST_* to the free list indices. > * > * Initially, the entries of vm_freelist_to_flind[] are set to either > * 0 or 1 to indicate which free lists should be created. > */ >#ifdef VM_DMA32_NPAGES_THRESHOLD > npages = 0; >#endif > for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { > seg = &vm_phys_segs[segind]; >#ifdef VM_FREELIST_LOWMEM > if (seg->end <= VM_LOWMEM_BOUNDARY) > vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; > else >#endif >#ifdef VM_FREELIST_DMA32 > if ( >#ifdef VM_DMA32_NPAGES_THRESHOLD > /* > * Create the DMA32 free list only if the amount of > * physical memory above physical address 4G exceeds the > * given threshold. > */ > npages > VM_DMA32_NPAGES_THRESHOLD && >#endif > seg->end <= VM_DMA32_BOUNDARY) > vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; > else >#endif > { >#ifdef VM_DMA32_NPAGES_THRESHOLD > npages += atop(seg->end - seg->start); >#endif > vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; > } > } > /* Change each entry into a running total of the free lists. */ > for (freelist = 1; freelist < VM_NFREELIST; freelist++) { > vm_freelist_to_flind[freelist] += > vm_freelist_to_flind[freelist - 1]; > } > vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; > KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); > /* Change each entry into a free list index. */ > for (freelist = 0; freelist < VM_NFREELIST; freelist++) > vm_freelist_to_flind[freelist]--; > > /* > * Initialize the first_page and free_queues fields of each physical > * memory segment. > */ >#ifdef VM_PHYSSEG_SPARSE > npages = 0; >#endif > for (segind = 0; segind < vm_phys_nsegs; segind++) { > seg = &vm_phys_segs[segind]; >#ifdef VM_PHYSSEG_SPARSE > seg->first_page = &vm_page_array[npages]; > npages += atop(seg->end - seg->start); >#else > seg->first_page = PHYS_TO_VM_PAGE(seg->start); >#endif >#ifdef VM_FREELIST_LOWMEM > if (seg->end <= VM_LOWMEM_BOUNDARY) { > flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; > KASSERT(flind >= 0, > ("vm_phys_init: LOWMEM flind < 0")); > } else >#endif >#ifdef VM_FREELIST_DMA32 > if (seg->end <= VM_DMA32_BOUNDARY) { > flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; > KASSERT(flind >= 0, > ("vm_phys_init: DMA32 flind < 0")); > } else >#endif > { > flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; > KASSERT(flind >= 0, > ("vm_phys_init: DEFAULT flind < 0")); > } > seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; > } > > /* > * Coalesce physical memory segments that are contiguous and share the > * same per-domain free queues. > */ > prev_seg = vm_phys_segs; > seg = &vm_phys_segs[1]; > end_seg = &vm_phys_segs[vm_phys_nsegs]; > while (seg < end_seg) { > if (prev_seg->end == seg->start && > prev_seg->free_queues == seg->free_queues) { > prev_seg->end = seg->end; > KASSERT(prev_seg->domain == seg->domain, > ("vm_phys_init: free queues cannot span domains")); > vm_phys_nsegs--; > end_seg--; > for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) > *tmp_seg = *(tmp_seg + 1); > } else { > prev_seg = seg; > seg++; > } > } > > /* > * Initialize the free queues. > */ > for (dom = 0; dom < vm_ndomains; dom++) { > for (flind = 0; flind < vm_nfreelists; flind++) { > for (pind = 0; pind < VM_NFREEPOOL; pind++) { > fl = vm_phys_free_queues[dom][flind][pind]; > for (oind = 0; oind < VM_NFREEORDER; oind++) > TAILQ_INIT(&fl[oind].pl); > } > } > } > > rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); >} > >/* > * Register info about the NUMA topology of the system. > * > * Invoked by platform-dependent code prior to vm_phys_init(). > */ >void >vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, > int *locality) >{ >#ifdef NUMA > int i; > > /* > * For now the only override value that we support is 1, which > * effectively disables NUMA-awareness in the allocators. > */ > TUNABLE_INT_FETCH("vm.numa.disabled", &numa_disabled); > if (numa_disabled) > ndomains = 1; > > if (ndomains > 1) { > vm_ndomains = ndomains; > mem_affinity = affinity; > mem_locality = locality; > } > > for (i = 0; i < vm_ndomains; i++) > DOMAINSET_SET(i, &all_domains); >#else > (void)ndomains; > (void)affinity; > (void)locality; >#endif >} > >/* > * Split a contiguous, power of two-sized set of physical pages. > * > * When this function is called by a page allocation function, the caller > * should request insertion at the head unless the order [order, oind) queues > * are known to be empty. The objective being to reduce the likelihood of > * long-term fragmentation by promoting contemporaneous allocation and > * (hopefully) deallocation. > */ >static __inline void >vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, > int tail) >{ > vm_page_t m_buddy; > > while (oind > order) { > oind--; > m_buddy = &m[1 << oind]; > KASSERT(m_buddy->order == VM_NFREEORDER, > ("vm_phys_split_pages: page %p has unexpected order %d", > m_buddy, m_buddy->order)); > vm_freelist_add(fl, m_buddy, oind, tail); > } >} > >/* > * Add the physical pages [m, m + npages) at the beginning of a power-of-two > * aligned and sized set to the specified free list. > * > * When this function is called by a page allocation function, the caller > * should request insertion at the head unless the lower-order queues are > * known to be empty. The objective being to reduce the likelihood of long- > * term fragmentation by promoting contemporaneous allocation and (hopefully) > * deallocation. > * > * The physical page m's buddy must not be free. > */ >static void >vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) >{ > int order; > > KASSERT(npages == 0 || > (VM_PAGE_TO_PHYS(m) & > ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, > ("%s: page %p and npages %u are misaligned", > __func__, m, npages)); > while (npages > 0) { > KASSERT(m->order == VM_NFREEORDER, > ("%s: page %p has unexpected order %d", > __func__, m, m->order)); > order = fls(npages) - 1; > KASSERT(order < VM_NFREEORDER, > ("%s: order %d is out of range", __func__, order)); > vm_freelist_add(fl, m, order, tail); > m += 1 << order; > npages -= 1 << order; > } >} > >/* > * Add the physical pages [m, m + npages) at the end of a power-of-two aligned > * and sized set to the specified free list. > * > * When this function is called by a page allocation function, the caller > * should request insertion at the head unless the lower-order queues are > * known to be empty. The objective being to reduce the likelihood of long- > * term fragmentation by promoting contemporaneous allocation and (hopefully) > * deallocation. > * > * If npages is zero, this function does nothing and ignores the physical page > * parameter m. Otherwise, the physical page m's buddy must not be free. > */ >static vm_page_t >vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) >{ > int order; > > KASSERT(npages == 0 || > ((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & > ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, > ("vm_phys_enq_range: page %p and npages %u are misaligned", > m, npages)); > while (npages > 0) { > KASSERT(m->order == VM_NFREEORDER, > ("vm_phys_enq_range: page %p has unexpected order %d", > m, m->order)); > order = ffs(npages) - 1; > KASSERT(order < VM_NFREEORDER, > ("vm_phys_enq_range: order %d is out of range", order)); > vm_freelist_add(fl, m, order, tail); > m += 1 << order; > npages -= 1 << order; > } > return (m); >} > >/* > * Set the pool for a contiguous, power of two-sized set of physical pages. > */ >static void >vm_phys_set_pool(int pool, vm_page_t m, int order) >{ > vm_page_t m_tmp; > > for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) > m_tmp->pool = pool; >} > >/* > * Tries to allocate the specified number of pages from the specified pool > * within the specified domain. Returns the actual number of allocated pages > * and a pointer to each page through the array ma[]. > * > * The returned pages may not be physically contiguous. However, in contrast > * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), > * calling this function once to allocate the desired number of pages will > * avoid wasted time in vm_phys_split_pages(). > * > * The free page queues for the specified domain must be locked. > */ >int >vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) >{ > struct vm_freelist *alt, *fl; > vm_page_t m; > int avail, end, flind, freelist, i, oind, pind; > > KASSERT(domain >= 0 && domain < vm_ndomains, > ("vm_phys_alloc_npages: domain %d is out of range", domain)); > KASSERT(pool < VM_NFREEPOOL, > ("vm_phys_alloc_npages: pool %d is out of range", pool)); > KASSERT(npages <= 1 << (VM_NFREEORDER - 1), > ("vm_phys_alloc_npages: npages %d is out of range", npages)); > vm_domain_free_assert_locked(VM_DOMAIN(domain)); > i = 0; > for (freelist = 0; freelist < VM_NFREELIST; freelist++) { > flind = vm_freelist_to_flind[freelist]; > if (flind < 0) > continue; > fl = vm_phys_free_queues[domain][flind][pool]; > for (oind = 0; oind < VM_NFREEORDER; oind++) { > while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { > vm_freelist_rem(fl, m, oind); > avail = i + (1 << oind); > end = imin(npages, avail); > while (i < end) > ma[i++] = m++; > if (i == npages) { > /* > * Return excess pages to fl. Its order > * [0, oind) queues are empty. > */ > vm_phys_enq_range(m, avail - i, fl, 1); > return (npages); > } > } > } > for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { > for (pind = 0; pind < VM_NFREEPOOL; pind++) { > alt = vm_phys_free_queues[domain][flind][pind]; > while ((m = TAILQ_FIRST(&alt[oind].pl)) != > NULL) { > vm_freelist_rem(alt, m, oind); > vm_phys_set_pool(pool, m, oind); > avail = i + (1 << oind); > end = imin(npages, avail); > while (i < end) > ma[i++] = m++; > if (i == npages) { > /* > * Return excess pages to fl. > * Its order [0, oind) queues > * are empty. > */ > vm_phys_enq_range(m, avail - i, > fl, 1); > return (npages); > } > } > } > } > } > return (i); >} > >/* > * Allocate a contiguous, power of two-sized set of physical pages > * from the free lists. > * > * The free page queues must be locked. > */ >vm_page_t >vm_phys_alloc_pages(int domain, int pool, int order) >{ > vm_page_t m; > int freelist; > > for (freelist = 0; freelist < VM_NFREELIST; freelist++) { > m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); > if (m != NULL) > return (m); > } > return (NULL); >} > >/* > * Allocate a contiguous, power of two-sized set of physical pages from the > * specified free list. The free list must be specified using one of the > * manifest constants VM_FREELIST_*. > * > * The free page queues must be locked. > */ >vm_page_t >vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) >{ > struct vm_freelist *alt, *fl; > vm_page_t m; > int oind, pind, flind; > > KASSERT(domain >= 0 && domain < vm_ndomains, > ("vm_phys_alloc_freelist_pages: domain %d is out of range", > domain)); > KASSERT(freelist < VM_NFREELIST, > ("vm_phys_alloc_freelist_pages: freelist %d is out of range", > freelist)); > KASSERT(pool < VM_NFREEPOOL, > ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); > KASSERT(order < VM_NFREEORDER, > ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); > > flind = vm_freelist_to_flind[freelist]; > /* Check if freelist is present */ > if (flind < 0) > return (NULL); > > vm_domain_free_assert_locked(VM_DOMAIN(domain)); > fl = &vm_phys_free_queues[domain][flind][pool][0]; > for (oind = order; oind < VM_NFREEORDER; oind++) { > m = TAILQ_FIRST(&fl[oind].pl); > if (m != NULL) { > vm_freelist_rem(fl, m, oind); > /* The order [order, oind) queues are empty. */ > vm_phys_split_pages(m, oind, fl, order, 1); > return (m); > } > } > > /* > * The given pool was empty. Find the largest > * contiguous, power-of-two-sized set of pages in any > * pool. Transfer these pages to the given pool, and > * use them to satisfy the allocation. > */ > for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { > for (pind = 0; pind < VM_NFREEPOOL; pind++) { > alt = &vm_phys_free_queues[domain][flind][pind][0]; > m = TAILQ_FIRST(&alt[oind].pl); > if (m != NULL) { > vm_freelist_rem(alt, m, oind); > vm_phys_set_pool(pool, m, oind); > /* The order [order, oind) queues are empty. */ > vm_phys_split_pages(m, oind, fl, order, 1); > return (m); > } > } > } > return (NULL); >} > >/* > * Find the vm_page corresponding to the given physical address. > */ >vm_page_t >vm_phys_paddr_to_vm_page(vm_paddr_t pa) >{ > struct vm_phys_seg *seg; > > if ((seg = vm_phys_paddr_to_seg(pa)) != NULL) > return (&seg->first_page[atop(pa - seg->start)]); > return (NULL); >} > >vm_page_t >vm_phys_fictitious_to_vm_page(vm_paddr_t pa) >{ > struct vm_phys_fictitious_seg tmp, *seg; > vm_page_t m; > > m = NULL; > tmp.start = pa; > tmp.end = 0; > > rw_rlock(&vm_phys_fictitious_reg_lock); > seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); > rw_runlock(&vm_phys_fictitious_reg_lock); > if (seg == NULL) > return (NULL); > > m = &seg->first_page[atop(pa - seg->start)]; > KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); > > return (m); >} > >static inline void >vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, > long page_count, vm_memattr_t memattr) >{ > long i; > > bzero(range, page_count * sizeof(*range)); > for (i = 0; i < page_count; i++) { > vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); > range[i].oflags &= ~VPO_UNMANAGED; > range[i].busy_lock = VPB_UNBUSIED; > } >} > >int >vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, > vm_memattr_t memattr) >{ > struct vm_phys_fictitious_seg *seg; > vm_page_t fp; > long page_count; >#ifdef VM_PHYSSEG_DENSE > long pi, pe; > long dpage_count; >#endif > > KASSERT(start < end, > ("Start of segment isn't less than end (start: %jx end: %jx)", > (uintmax_t)start, (uintmax_t)end)); > > page_count = (end - start) / PAGE_SIZE; > >#ifdef VM_PHYSSEG_DENSE > pi = atop(start); > pe = atop(end); > if (pi >= first_page && (pi - first_page) < vm_page_array_size) { > fp = &vm_page_array[pi - first_page]; > if ((pe - first_page) > vm_page_array_size) { > /* > * We have a segment that starts inside > * of vm_page_array, but ends outside of it. > * > * Use vm_page_array pages for those that are > * inside of the vm_page_array range, and > * allocate the remaining ones. > */ > dpage_count = vm_page_array_size - (pi - first_page); > vm_phys_fictitious_init_range(fp, start, dpage_count, > memattr); > page_count -= dpage_count; > start += ptoa(dpage_count); > goto alloc; > } > /* > * We can allocate the full range from vm_page_array, > * so there's no need to register the range in the tree. > */ > vm_phys_fictitious_init_range(fp, start, page_count, memattr); > return (0); > } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { > /* > * We have a segment that ends inside of vm_page_array, > * but starts outside of it. > */ > fp = &vm_page_array[0]; > dpage_count = pe - first_page; > vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, > memattr); > end -= ptoa(dpage_count); > page_count -= dpage_count; > goto alloc; > } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { > /* > * Trying to register a fictitious range that expands before > * and after vm_page_array. > */ > return (EINVAL); > } else { >alloc: >#endif > fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, > M_WAITOK); >#ifdef VM_PHYSSEG_DENSE > } >#endif > vm_phys_fictitious_init_range(fp, start, page_count, memattr); > > seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); > seg->start = start; > seg->end = end; > seg->first_page = fp; > > rw_wlock(&vm_phys_fictitious_reg_lock); > RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); > rw_wunlock(&vm_phys_fictitious_reg_lock); > > return (0); >} > >void >vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) >{ > struct vm_phys_fictitious_seg *seg, tmp; >#ifdef VM_PHYSSEG_DENSE > long pi, pe; >#endif > > KASSERT(start < end, > ("Start of segment isn't less than end (start: %jx end: %jx)", > (uintmax_t)start, (uintmax_t)end)); > >#ifdef VM_PHYSSEG_DENSE > pi = atop(start); > pe = atop(end); > if (pi >= first_page && (pi - first_page) < vm_page_array_size) { > if ((pe - first_page) <= vm_page_array_size) { > /* > * This segment was allocated using vm_page_array > * only, there's nothing to do since those pages > * were never added to the tree. > */ > return; > } > /* > * We have a segment that starts inside > * of vm_page_array, but ends outside of it. > * > * Calculate how many pages were added to the > * tree and free them. > */ > start = ptoa(first_page + vm_page_array_size); > } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { > /* > * We have a segment that ends inside of vm_page_array, > * but starts outside of it. > */ > end = ptoa(first_page); > } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { > /* Since it's not possible to register such a range, panic. */ > panic( > "Unregistering not registered fictitious range [%#jx:%#jx]", > (uintmax_t)start, (uintmax_t)end); > } >#endif > tmp.start = start; > tmp.end = 0; > > rw_wlock(&vm_phys_fictitious_reg_lock); > seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); > if (seg->start != start || seg->end != end) { > rw_wunlock(&vm_phys_fictitious_reg_lock); > panic( > "Unregistering not registered fictitious range [%#jx:%#jx]", > (uintmax_t)start, (uintmax_t)end); > } > RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); > rw_wunlock(&vm_phys_fictitious_reg_lock); > free(seg->first_page, M_FICT_PAGES); > free(seg, M_FICT_PAGES); >} > >/* > * Free a contiguous, power of two-sized set of physical pages. > * > * The free page queues must be locked. > */ >void >vm_phys_free_pages(vm_page_t m, int order) >{ > struct vm_freelist *fl; > struct vm_phys_seg *seg; > vm_paddr_t pa; > vm_page_t m_buddy; > > KASSERT(m->order == VM_NFREEORDER, > ("vm_phys_free_pages: page %p has unexpected order %d", > m, m->order)); > KASSERT(m->pool < VM_NFREEPOOL, > ("vm_phys_free_pages: page %p has unexpected pool %d", > m, m->pool)); > KASSERT(order < VM_NFREEORDER, > ("vm_phys_free_pages: order %d is out of range", order)); > seg = &vm_phys_segs[m->segind]; > vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); > if (order < VM_NFREEORDER - 1) { > pa = VM_PAGE_TO_PHYS(m); > do { > pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); > if (pa < seg->start || pa >= seg->end) > break; > m_buddy = &seg->first_page[atop(pa - seg->start)]; > if (m_buddy->order != order) > break; > fl = (*seg->free_queues)[m_buddy->pool]; > vm_freelist_rem(fl, m_buddy, order); > if (m_buddy->pool != m->pool) > vm_phys_set_pool(m->pool, m_buddy, order); > order++; > pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); > m = &seg->first_page[atop(pa - seg->start)]; > } while (order < VM_NFREEORDER - 1); > } > fl = (*seg->free_queues)[m->pool]; > vm_freelist_add(fl, m, order, 1); >} > >/* > * Return the largest possible order of a set of pages starting at m. > */ >static int >max_order(vm_page_t m) >{ > > /* > * Unsigned "min" is used here so that "order" is assigned > * "VM_NFREEORDER - 1" when "m"'s physical address is zero > * or the low-order bits of its physical address are zero > * because the size of a physical address exceeds the size of > * a long. > */ > return (min(ffsll(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, > VM_NFREEORDER - 1)); >} > >/* > * Free a contiguous, arbitrarily sized set of physical pages, without > * merging across set boundaries. > * > * The free page queues must be locked. > */ >void >vm_phys_enqueue_contig(vm_page_t m, u_long npages) >{ > struct vm_freelist *fl; > struct vm_phys_seg *seg; > vm_page_t m_end; > vm_paddr_t diff, lo; > int order; > > /* > * Avoid unnecessary coalescing by freeing the pages in the largest > * possible power-of-two-sized subsets. > */ > vm_domain_free_assert_locked(vm_pagequeue_domain(m)); > seg = &vm_phys_segs[m->segind]; > fl = (*seg->free_queues)[m->pool]; > m_end = m + npages; > /* Free blocks of increasing size. */ > lo = VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT; > if (m < m_end && > (diff = lo ^ (lo + npages - 1)) != 0) { > order = min(flsll(diff) - 1, VM_NFREEORDER - 1); > m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl, 1); > } > > /* Free blocks of maximum size. */ > order = VM_NFREEORDER - 1; > while (m + (1 << order) <= m_end) { > KASSERT(seg == &vm_phys_segs[m->segind], > ("%s: page range [%p,%p) spans multiple segments", > __func__, m_end - npages, m)); > vm_freelist_add(fl, m, order, 1); > m += 1 << order; > } > /* Free blocks of diminishing size. */ > vm_phys_enq_beg(m, m_end - m, fl, 1); >} > >/* > * Free a contiguous, arbitrarily sized set of physical pages. > * > * The free page queues must be locked. > */ >void >vm_phys_free_contig(vm_page_t m, u_long npages) >{ > int order_start, order_end; > vm_page_t m_start, m_end; > > vm_domain_free_assert_locked(vm_pagequeue_domain(m)); > > m_start = m; > order_start = max_order(m_start); > if (order_start < VM_NFREEORDER - 1) > m_start += 1 << order_start; > m_end = m + npages; > order_end = max_order(m_end); > if (order_end < VM_NFREEORDER - 1) > m_end -= 1 << order_end; > /* > * Avoid unnecessary coalescing by freeing the pages at the start and > * end of the range last. > */ > if (m_start < m_end) > vm_phys_enqueue_contig(m_start, m_end - m_start); > if (order_start < VM_NFREEORDER - 1) > vm_phys_free_pages(m, order_start); > if (order_end < VM_NFREEORDER - 1) > vm_phys_free_pages(m_end, order_end); >} > >/* > * Identify the first address range within segment segind or greater > * that matches the domain, lies within the low/high range, and has > * enough pages. Return -1 if there is none. > */ >int >vm_phys_find_range(vm_page_t bounds[], int segind, int domain, > u_long npages, vm_paddr_t low, vm_paddr_t high) >{ > vm_paddr_t pa_end, pa_start; > struct vm_phys_seg *end_seg, *seg; > > KASSERT(npages > 0, ("npages is zero")); > KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range")); > end_seg = &vm_phys_segs[vm_phys_nsegs]; > for (seg = &vm_phys_segs[segind]; seg < end_seg; seg++) { > if (seg->domain != domain) > continue; > if (seg->start >= high) > return (-1); > pa_start = MAX(low, seg->start); > pa_end = MIN(high, seg->end); > if (pa_end - pa_start < ptoa(npages)) > continue; > bounds[0] = &seg->first_page[atop(pa_start - seg->start)]; > bounds[1] = &seg->first_page[atop(pa_end - seg->start)]; > return (seg - vm_phys_segs); > } > return (-1); >} > >/* > * Search for the given physical page "m" in the free lists. If the search > * succeeds, remove "m" from the free lists and return true. Otherwise, return > * false, indicating that "m" is not in the free lists. > * > * The free page queues must be locked. > */ >bool >vm_phys_unfree_page(vm_page_t m) >{ > struct vm_freelist *fl; > struct vm_phys_seg *seg; > vm_paddr_t pa, pa_half; > vm_page_t m_set, m_tmp; > int order; > > /* > * First, find the contiguous, power of two-sized set of free > * physical pages containing the given physical page "m" and > * assign it to "m_set". > */ > seg = &vm_phys_segs[m->segind]; > vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); > for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && > order < VM_NFREEORDER - 1; ) { > order++; > pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); > if (pa >= seg->start) > m_set = &seg->first_page[atop(pa - seg->start)]; > else > return (false); > } > if (m_set->order < order) > return (false); > if (m_set->order == VM_NFREEORDER) > return (false); > KASSERT(m_set->order < VM_NFREEORDER, > ("vm_phys_unfree_page: page %p has unexpected order %d", > m_set, m_set->order)); > > /* > * Next, remove "m_set" from the free lists. Finally, extract > * "m" from "m_set" using an iterative algorithm: While "m_set" > * is larger than a page, shrink "m_set" by returning the half > * of "m_set" that does not contain "m" to the free lists. > */ > fl = (*seg->free_queues)[m_set->pool]; > order = m_set->order; > vm_freelist_rem(fl, m_set, order); > while (order > 0) { > order--; > pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); > if (m->phys_addr < pa_half) > m_tmp = &seg->first_page[atop(pa_half - seg->start)]; > else { > m_tmp = m_set; > m_set = &seg->first_page[atop(pa_half - seg->start)]; > } > vm_freelist_add(fl, m_tmp, order, 0); > } > KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); > return (true); >} > >/* > * Find a run of contiguous physical pages from the specified page list. > */ >static vm_page_t >vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages, > vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) >{ > struct vm_phys_seg *seg; > vm_page_t m_ret; > vm_paddr_t max_size, pa, pa_end, size; > > if (TAILQ_EMPTY(&fl[VM_NFREEORDER - 1].pl)) > return (NULL); > m_ret = TAILQ_FIRST(&fl[VM_NFREEORDER - 1].pl); > seg = &vm_phys_segs[m_ret->segind]; > > size = npages << PAGE_SHIFT; > max_size = (vm_paddr_t)1 << (PAGE_SHIFT + VM_NFREEORDER - 1); > TAILQ_FOREACH(m_ret, &fl[VM_NFREEORDER - 1].pl, listq) { > /* > * Verify that pa is the first free max page block >= low in the > * sequence of physically contiguous blocks that contains pa. > */ > pa = VM_PAGE_TO_PHYS(m_ret); > if (pa < low) > continue; > if (pa >= max_size && pa - max_size >= low && > VM_NFREEORDER - 1 == > vm_phys_paddr_to_vm_page(pa - max_size)->order) > continue; > > /* > * Advance pa to the first of the sequence, if any, that > * satisfies address conditions. > */ > while (!vm_addr_ok(pa, size, alignment, boundary) && > VM_NFREEORDER - 1 == > vm_phys_paddr_to_vm_page(pa + max_size)->order) > pa += max_size; > if (!vm_addr_ok(pa, size, alignment, boundary)) > continue; > > /* > * Verify that pa hasn't advanced so far that there's not enough > * space left to allocate. > */ > pa_end = pa + size; > if (pa_end > high || > pa_end < pa || > pa < seg->start || seg->end < pa_end) > continue; > > /* > * Verify that the blocks to allocate starting at pa are all > * free. > */ > do { > pa += max_size; > if (pa >= pa_end) > return (vm_phys_paddr_to_vm_page(pa_end - size)); > } while (VM_NFREEORDER - 1 == > vm_phys_paddr_to_vm_page(pa)->order); > } > return (NULL); >} > >/* > * Find a run of contiguous physical pages from the specified free list > * table. > */ >static vm_page_t >vm_phys_find_queues_contig( > struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX], > u_long npages, vm_paddr_t low, vm_paddr_t high, > u_long alignment, vm_paddr_t boundary) >{ > struct vm_freelist *fl; > vm_page_t m_ret; > vm_paddr_t pa, pa_end, size; > int oind, order, pind; > > KASSERT(npages > 0, ("npages is 0")); > KASSERT(powerof2(alignment), ("alignment is not a power of 2")); > KASSERT(powerof2(boundary), ("boundary is not a power of 2")); > /* Compute the queue that is the best fit for npages. */ > order = flsl(npages - 1); > /* Search for a large enough free block. */ > size = npages << PAGE_SHIFT; > for (oind = order; oind < VM_NFREEORDER; oind++) { > for (pind = 0; pind < VM_NFREEPOOL; pind++) { > fl = (*queues)[pind]; > TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { > /* > * Determine if the address range starting at pa > * is within the given range, satisfies the > * given alignment, and does not cross the given > * boundary. > */ > pa = VM_PAGE_TO_PHYS(m_ret); > pa_end = pa + size; > if (low <= pa && pa_end <= high && > vm_addr_ok(pa, size, alignment, boundary)) > return (m_ret); > } > } > } > if (order < VM_NFREEORDER) > return (NULL); > /* Search for a long-enough sequence of small blocks. */ > for (pind = 0; pind < VM_NFREEPOOL; pind++) { > fl = (*queues)[pind]; > m_ret = vm_phys_find_freelist_contig(fl, npages, > low, high, alignment, boundary); > if (m_ret != NULL) > return (m_ret); > } > return (NULL); >} > >/* > * Allocate a contiguous set of physical pages of the given size > * "npages" from the free lists. All of the physical pages must be at > * or above the given physical address "low" and below the given > * physical address "high". The given value "alignment" determines the > * alignment of the first physical page in the set. If the given value > * "boundary" is non-zero, then the set of physical pages cannot cross > * any physical address boundary that is a multiple of that value. Both > * "alignment" and "boundary" must be a power of two. > */ >vm_page_t >vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, > u_long alignment, vm_paddr_t boundary) >{ > vm_paddr_t pa_end, pa_start; > struct vm_freelist *fl; > vm_page_t m, m_run; > struct vm_phys_seg *seg; > struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX]; > int oind, segind; > > KASSERT(npages > 0, ("npages is 0")); > KASSERT(powerof2(alignment), ("alignment is not a power of 2")); > KASSERT(powerof2(boundary), ("boundary is not a power of 2")); > vm_domain_free_assert_locked(VM_DOMAIN(domain)); > if (low >= high) > return (NULL); > queues = NULL; > m_run = NULL; > for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { > seg = &vm_phys_segs[segind]; > if (seg->start >= high || seg->domain != domain) > continue; > if (low >= seg->end) > break; > if (low <= seg->start) > pa_start = seg->start; > else > pa_start = low; > if (high < seg->end) > pa_end = high; > else > pa_end = seg->end; > if (pa_end - pa_start < ptoa(npages)) > continue; > /* > * If a previous segment led to a search using > * the same free lists as would this segment, then > * we've actually already searched within this > * too. So skip it. > */ > if (seg->free_queues == queues) > continue; > queues = seg->free_queues; > vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); > m_run = vm_phys_find_queues_contig(queues, npages, > low, high, alignment, boundary); > if (m_run != NULL) > break; > } > if (m_run == NULL) > return (NULL); > > /* Allocate pages from the page-range found. */ > for (m = m_run; m < &m_run[npages]; m = &m[1 << oind]) { > fl = (*queues)[m->pool]; > oind = m->order; > vm_freelist_rem(fl, m, oind); > if (m->pool != VM_FREEPOOL_DEFAULT) > vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); > } > /* Return excess pages to the free lists. */ > fl = (*queues)[VM_FREEPOOL_DEFAULT]; > vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl, 0); > return (m_run); >} > >/* > * Return the index of the first unused slot which may be the terminating > * entry. > */ >static int >vm_phys_avail_count(void) >{ > int i; > > for (i = 0; phys_avail[i + 1]; i += 2) > continue; > if (i > PHYS_AVAIL_ENTRIES) > panic("Improperly terminated phys_avail %d entries", i); > > return (i); >} > >/* > * Assert that a phys_avail entry is valid. > */ >static void >vm_phys_avail_check(int i) >{ > if (phys_avail[i] & PAGE_MASK) > panic("Unaligned phys_avail[%d]: %#jx", i, > (intmax_t)phys_avail[i]); > if (phys_avail[i+1] & PAGE_MASK) > panic("Unaligned phys_avail[%d + 1]: %#jx", i, > (intmax_t)phys_avail[i]); > if (phys_avail[i + 1] < phys_avail[i]) > panic("phys_avail[%d] start %#jx < end %#jx", i, > (intmax_t)phys_avail[i], (intmax_t)phys_avail[i+1]); >} > >/* > * Return the index of an overlapping phys_avail entry or -1. > */ >#ifdef NUMA >static int >vm_phys_avail_find(vm_paddr_t pa) >{ > int i; > > for (i = 0; phys_avail[i + 1]; i += 2) > if (phys_avail[i] <= pa && phys_avail[i + 1] > pa) > return (i); > return (-1); >} >#endif > >/* > * Return the index of the largest entry. > */ >int >vm_phys_avail_largest(void) >{ > vm_paddr_t sz, largesz; > int largest; > int i; > > largest = 0; > largesz = 0; > for (i = 0; phys_avail[i + 1]; i += 2) { > sz = vm_phys_avail_size(i); > if (sz > largesz) { > largesz = sz; > largest = i; > } > } > > return (largest); >} > >vm_paddr_t >vm_phys_avail_size(int i) >{ > > return (phys_avail[i + 1] - phys_avail[i]); >} > >/* > * Split an entry at the address 'pa'. Return zero on success or errno. > */ >static int >vm_phys_avail_split(vm_paddr_t pa, int i) >{ > int cnt; > > vm_phys_avail_check(i); > if (pa <= phys_avail[i] || pa >= phys_avail[i + 1]) > panic("vm_phys_avail_split: invalid address"); > cnt = vm_phys_avail_count(); > if (cnt >= PHYS_AVAIL_ENTRIES) > return (ENOSPC); > memmove(&phys_avail[i + 2], &phys_avail[i], > (cnt - i) * sizeof(phys_avail[0])); > phys_avail[i + 1] = pa; > phys_avail[i + 2] = pa; > vm_phys_avail_check(i); > vm_phys_avail_check(i+2); > > return (0); >} > >/* > * Check if a given physical address can be included as part of a crash dump. > */ >bool >vm_phys_is_dumpable(vm_paddr_t pa) >{ > vm_page_t m; > int i; > > if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) > return ((m->flags & PG_NODUMP) == 0); > > for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { > if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) > return (true); > } > return (false); >} > >void >vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end) >{ > struct vm_phys_seg *seg; > > if (vm_phys_early_nsegs == -1) > panic("%s: called after initialization", __func__); > if (vm_phys_early_nsegs == nitems(vm_phys_early_segs)) > panic("%s: ran out of early segments", __func__); > > seg = &vm_phys_early_segs[vm_phys_early_nsegs++]; > seg->start = start; > seg->end = end; >} > >/* > * This routine allocates NUMA node specific memory before the page > * allocator is bootstrapped. > */ >vm_paddr_t >vm_phys_early_alloc(int domain, size_t alloc_size) >{ >#ifdef NUMA > int mem_index; >#endif > int i, biggestone; > vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align; > > KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains), > ("%s: invalid domain index %d", __func__, domain)); > > /* > * Search the mem_affinity array for the biggest address > * range in the desired domain. This is used to constrain > * the phys_avail selection below. > */ > biggestsize = 0; > mem_start = 0; > mem_end = -1; >#ifdef NUMA > mem_index = 0; > if (mem_affinity != NULL) { > for (i = 0;; i++) { > size = mem_affinity[i].end - mem_affinity[i].start; > if (size == 0) > break; > if (domain != -1 && mem_affinity[i].domain != domain) > continue; > if (size > biggestsize) { > mem_index = i; > biggestsize = size; > } > } > mem_start = mem_affinity[mem_index].start; > mem_end = mem_affinity[mem_index].end; > } >#endif > > /* > * Now find biggest physical segment in within the desired > * numa domain. > */ > biggestsize = 0; > biggestone = 0; > for (i = 0; phys_avail[i + 1] != 0; i += 2) { > /* skip regions that are out of range */ > if (phys_avail[i+1] - alloc_size < mem_start || > phys_avail[i+1] > mem_end) > continue; > size = vm_phys_avail_size(i); > if (size > biggestsize) { > biggestone = i; > biggestsize = size; > } > } > alloc_size = round_page(alloc_size); > > /* > * Grab single pages from the front to reduce fragmentation. > */ > if (alloc_size == PAGE_SIZE) { > pa = phys_avail[biggestone]; > phys_avail[biggestone] += PAGE_SIZE; > vm_phys_avail_check(biggestone); > return (pa); > } > > /* > * Naturally align large allocations. > */ > align = phys_avail[biggestone + 1] & (alloc_size - 1); > if (alloc_size + align > biggestsize) > panic("cannot find a large enough size\n"); > if (align != 0 && > vm_phys_avail_split(phys_avail[biggestone + 1] - align, > biggestone) != 0) > /* Wasting memory. */ > phys_avail[biggestone + 1] -= align; > > phys_avail[biggestone + 1] -= alloc_size; > vm_phys_avail_check(biggestone); > pa = phys_avail[biggestone + 1]; > return (pa); >} > >void >vm_phys_early_startup(void) >{ > struct vm_phys_seg *seg; > int i; > > for (i = 0; phys_avail[i + 1] != 0; i += 2) { > phys_avail[i] = round_page(phys_avail[i]); > phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); > } > > for (i = 0; i < vm_phys_early_nsegs; i++) { > seg = &vm_phys_early_segs[i]; > vm_phys_add_seg(seg->start, seg->end); > } > vm_phys_early_nsegs = -1; > >#ifdef NUMA > /* Force phys_avail to be split by domain. */ > if (mem_affinity != NULL) { > int idx; > > for (i = 0; mem_affinity[i].end != 0; i++) { > idx = vm_phys_avail_find(mem_affinity[i].start); > if (idx != -1 && > phys_avail[idx] != mem_affinity[i].start) > vm_phys_avail_split(mem_affinity[i].start, idx); > idx = vm_phys_avail_find(mem_affinity[i].end); > if (idx != -1 && > phys_avail[idx] != mem_affinity[i].end) > vm_phys_avail_split(mem_affinity[i].end, idx); > } > } >#endif >} > >#ifdef DDB >/* > * Show the number of physical pages in each of the free lists. > */ >DB_SHOW_COMMAND_FLAGS(freepages, db_show_freepages, DB_CMD_MEMSAFE) >{ > struct vm_freelist *fl; > int flind, oind, pind, dom; > > for (dom = 0; dom < vm_ndomains; dom++) { > db_printf("DOMAIN: %d\n", dom); > for (flind = 0; flind < vm_nfreelists; flind++) { > db_printf("FREE LIST %d:\n" > "\n ORDER (SIZE) | NUMBER" > "\n ", flind); > for (pind = 0; pind < VM_NFREEPOOL; pind++) > db_printf(" | POOL %d", pind); > db_printf("\n-- "); > for (pind = 0; pind < VM_NFREEPOOL; pind++) > db_printf("-- -- "); > db_printf("--\n"); > for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { > db_printf(" %2.2d (%6.6dK)", oind, > 1 << (PAGE_SHIFT - 10 + oind)); > for (pind = 0; pind < VM_NFREEPOOL; pind++) { > fl = vm_phys_free_queues[dom][flind][pind]; > db_printf(" | %6.6d", fl[oind].lcnt); > } > db_printf("\n"); > } > db_printf("\n"); > } > db_printf("\n"); > } >} >#endif
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 274592
:
245992
|
246017
|
246038
|
246060
| 246076 |
246094
|
246126
|
246141
|
246148
|
246165
|
246184
|
246280