/* * RMALLOC v0.91 by Ronald Kriemann * -------------------------- * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation (version 2 of the License). * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * Documentation * ------------- * * One reason for this version of malloc was a problem I encountered * with one of my other programs when allocating several gigabytes. * Because of the used algorithms I deallocated and allocated most * of this memory (which was used by relativly small chunks). This * behaviour confused all other malloc implementations I tested and * led to unacceptable program-performance. So I decided to do it * by myself and wrote this lib which performed MUCH better, * allthough I have to admit, that one can easily construct programs * where the used strategy leads to a totaly disastrous memory- * consumption. Anyway here are the details: * * This malloc implementation works by "recycling" freed blocks * (this is also responsible for the "r" in rmalloc and not the * R in Ronald ;). We NEVER give a block back to the system, * so be sure that your application uses similar blocksizes during * program- execution. Also, this malloc assumes the overall memory- * consumption of the program to be rather high (several tens or * hundreds of megabytes [or better gigabytes ;]). * * We allocate chunks in heaps, and a heap is built, if a * thread wants to allocate data, but no heap is free (same * as in ptmalloc). After using the heap, we save this heap * as thread-specific-data and try to use it next time. A heap * consists mainly of a data-segment which is allocated from * the system via sbrk or mmap. To avoid too many calls to these * functions we allocate system-memory with an additional size * (top_pad) which is by default 20 MB, but can be changed via * mallinfo. Be aware that a too small top_pad can increase * fragmentation and degrade performance. On the other hand, if * you have 8 concurrent threads, you will have at least 160 MB * memory consumption with the default top_pad (this is what I * mean with BIG programs ;). * * Beside the user-data requested, we store additional data-fields * namely the size of the chunk, the heap it was allocated in and some * status-flags. Because we use only multiples of 8 as chunk-sizes, * we have three bits for free in each size-field which we use as follows: * * BIT 0 : 0 - chunk free * 1 - chunk in use * BIT 1 : 0 - chunk MUST not be deallocated (only in small blocks) * 1 - chunk can be deallocated * BIT 2 : not used * * We also use additional fields for management: next-/prev- pointers * for lists. These fields are only used, when the block is not in use, * so we can use the area of the chunk. * * Because of these data-field we have a minimal chunk-size of 16 bytes * in 32 bit systems and 24 bytes in 64 bit systems. * * We have size-classes for storing freed chunks (thanks to libhoard for the idea). * By this we can handle allocation and deallocation in O(1) and avoid fragmentation * by providing appropriate classes. One can also choose between Best Fit and * First Fit when allocating in these size-classes (with Best-Fit allthough, O(1) * complexity cannot be assured). * * We use a special allocation strategy for small blocks (<= SMALL_SIZE) * to avoid too small block-requests from the system: if we need new chunks, * we allocate more than one a time. How many is determined by processing * smallest common multiply of chunk- and pagesize. This not only * helps keeping sbrk-requests low, but also speed things up. * (This procedure must be activated; see HANDLE_SMALL below) * * Some environment variables are supported for doing a memory-trace or * printing statistics: * * RMALLOC_TRACE : not defined or * : 0 - no memory traceing * : 1 - do a memory-trace * RMALLOC_TRACE_FILE : filename of the tracefile (default = trace) * RMALLOC_TRACE_SIZE : blocksize we should trace (default = 0 - means whole memory) * RMALLOC_STAT : not defined or * : 0 - no statistics will be printed * : 1 - print info about allocated space * : 2 - print info about number of chunks and allocated memory * : 3 - print info about each allocated chunksize * RMALLOC_BUILD_SIZE : 1 - print size-classes-source usable for rmalloc */ /***************************************************************** ***************************************************************** ** ** some options ** ***************************************************************** *****************************************************************/ /* * use rmalloc as a malloc-replacment */ #if defined(__cplusplus) && !defined(OVERLOAD_MALLOC) #define OVERLOAD_MALLOC 0 #else #ifndef OVERLOAD_MALLOC #define OVERLOAD_MALLOC 1 #endif #endif /* * use rmalloc as a new/delete replacment */ #ifndef OVERLOAD_NEW #define OVERLOAD_NEW 1 #endif /* * use pthreads */ #ifndef USE_PTHREAD #define USE_PTHREAD 1 #endif /* * define system-allocation method: MALLOC, MMAP, SBRK */ #if !defined(USE_MALLOC) && OVERLOAD_MALLOC == 0 # define USE_MALLOC 1 #else # ifndef USE_MMAP # define USE_MMAP 1 # endif # ifndef USE_SBRK # define USE_SBRK 0 # endif #endif /* * check heap extracted from memory-chunk */ #ifndef CHECK_HEAP #define CHECK_HEAP 1 #endif #include #include #include #include #include #include #include #include #include #include #include #if USE_PTHREAD == 1 #include #endif #include "rmalloc.h" /***************************************************************** ***************************************************************** ** ** defines ** ***************************************************************** *****************************************************************/ /* overload malloc functions */ #if OVERLOAD_MALLOC == 1 # define r_malloc malloc # define r_calloc calloc # define r_free free # define r_realloc realloc # define r_mallinfo mallinfo # define r_mallopt mallopt #endif /* number of size classes */ #define NO_OF_CLASSES 89 /* upper limit for small chunks (must be multiple of 8) */ #define SMALL_SIZE 128 /* define to handle small chunks differently */ #define HANDLE_SMALL /* default trace file */ #define DEFAULT_TRACE_FILE "trace" /* default value for top-pad */ #define DEFAULT_TOP_PAD 20*1024*1024 /* error value in sbrk */ #define SBRK_FAIL -1 /* maximal number of threads */ #define TSD_MAX 256 /* use mmap instead of sbrk for local heaps */ #if USE_MMAP == 1 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) # define MAP_ANONYMOUS MAP_ANON #endif #if !defined(MAP_FAILED) # define MAP_FAILED ((char*)-1) #endif #if !defined(MAP_ANONYMOUS) static int dev_zero_fd = -1; #define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \ (dev_zero_fd = open("/dev/zero", O_RDWR), \ mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \ mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) #else #define MMAP(addr, size, prot, flags) \ (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0)) #endif /* !defined(MAP_ANONYMOUS) */ #endif /* USE_MMAP == 1 */ /***************************************************************** ***************************************************************** ** ** types for malloc ** ***************************************************************** *****************************************************************/ /* internal size and pointer types */ typedef unsigned long internal_size_t; /* bool looks better than int */ #ifndef __cplusplus typedef enum { false, true } bool; #endif /* * mutex type */ #if USE_PTHREAD == 1 typedef pthread_mutex_t mutex_t; #else typedef int mutex_t; #endif /* * for holding info about system */ typedef struct { internal_size_t pagesize; /* number of bytes per page */ unsigned int nprocs; /* number of processors */ internal_size_t top_pad; /* number of extra bytes to allocated in each sbrk */ } sysconf_t; /* * holds size information for a data segment */ typedef struct { internal_size_t alloc; /* number of bytes allocated */ internal_size_t size; /* total number of bytes */ } size_info_t; /* * data-structure for a system-heap * (represents data-segment) */ typedef struct { char * begin; /* pointer to the first byte in the data-segment */ char * pos; /* pointer to the first free byte in the data-segment */ char * end; /* pointer to the first byte out of the data-segment */ size_info_t local; /* info about local data-segment */ size_info_t global; /* info about global data-segment */ internal_size_t unused; /* number of bytes unusable in data-segment (fragmented) */ } data_seg_t; /* * node for memory chunks */ typedef struct internal_node_struct { /* size of chunk */ internal_size_t size; /* heap this chunk belongs to */ void * heap; /* next/prev pointer for list */ struct internal_node_struct * prev; struct internal_node_struct * next; } node_t; /* size of extra-data to be stored in each chunk */ #define EXTRA_DATA_SIZE (sizeof(internal_size_t) + sizeof(void*)) /* * holding statistical information */ typedef struct { internal_size_t alloc[ NO_OF_CLASSES ]; /* number of allocated small chunks */ internal_size_t used[ NO_OF_CLASSES ]; /* number of used small chunks */ internal_size_t free[ NO_OF_CLASSES ]; /* number of free small chunks */ internal_size_t used_mem; /* total number of bytes used in heap (alloc from system) */ internal_size_t mem_in_use; /* number of bytes in heap used by application */ } stat_t; /* * type for a heap */ typedef struct heap_s { int id; /* unique id of heap */ struct heap_s * next; /* link for the heap-list */ data_seg_t data_seg; /* data segment associated with heap */ node_t * free[ NO_OF_CLASSES ]; /* free chunks */ stat_t stat; /* status information */ mutex_t mutex; /* mutex for locking heap */ } heap_t; /* * thread specific data */ typedef struct { heap_t * heap; /* pointer to last used heap */ } tsd_t; /* * list of heaps for the threads */ typedef struct { heap_t * heaps; /* linked list of heaps */ unsigned int nheaps; /* number of heaps in list */ mutex_t mutex; /* mutex for safety */ } heap_list_t; /* * type for tracing memory-allocation */ typedef struct { int fd; /* file descriptor of trace-file */ internal_size_t old_used; /* old number of used bytes */ internal_size_t old_free; /* old number of free bytes */ unsigned long step; /* number of allocation-steps */ internal_size_t size; /* size of chunk to trace */ } trace_t; /***************************************************************** ***************************************************************** ** ** thread definition ** ***************************************************************** *****************************************************************/ #if USE_PTHREAD == 1 /* * handling of mutices */ # define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER # define DEFINE_MUTEX( m ) static mutex_t m = PTHREAD_MUTEX_INITIALIZER # define MUTEX_INIT(mutex,status) if (( status = pthread_mutex_init( & mutex, NULL )) != 0) \ { fprintf( stderr, "(rmalloc) init_heap : error in mutex_init (%s)\n", \ strerror( status ) ); \ exit( 1 ); } # define LOCK( m ) pthread_mutex_lock( & m ) # define UNLOCK( m ) pthread_mutex_unlock( & m ) # define TRYLOCK( m ) pthread_mutex_trylock( & m ) # define MUTEX_BUSY EBUSY # define IS_LOCKED( m ) TRYLOCK( m ) == MUTEX_BUSY /* * conversion thread <-> heap */ # define THREAD_TO_HEAP( t ) & thread_heap[ (t % sys_conf.nprocs) ] # if defined(LINUX) # define THREAD_ID ((((unsigned long) pthread_self()) >> 10) % TSD_MAX) # else # define THREAD_ID (((unsigned long) pthread_self()) % TSD_MAX) # endif # ifdef NDEBUG # define OLD_HEAP( tid ) tsd[ tid ] # else # define OLD_HEAP( tid ) ((tid < TSD_MAX) ? tsd[ tid ] : \ (fprintf( stderr, "(rmalloc) wrong tid\n" ), tsd[0])) # endif #else /* * handling of mutices */ # define MUTEX_INITIALIZER 0 # define DEFINE_MUTEX( m ) # define MUTEX_INIT(mutex,status) # define LOCK( m ) # define UNLOCK( m ) # define TRYLOCK( m ) 1 # define MUTEX_BUSY 0 # define IS_LOCKED( m ) false /* * conversion thread <-> heap */ # define THREAD_TO_HEAP( t ) & global_heap # define THREAD_ID 0 # define OLD_HEAP( tid ) & global_heap; #endif /* USE_PTHREADS == 1 */ /***************************************************************** ***************************************************************** ** ** heap specific data ** ***************************************************************** *****************************************************************/ /* array holding the different size classes */ static internal_size_t size_classes[ NO_OF_CLASSES ] = { 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 144, 168, 200, 240, 288, 344, 416, 496, 592, 712, 856, 1024, 1232, 1472, 1768, 2120, 2544, 3048, 3664, 4392, 5272, 6320, 7584, 9104, 10928, 13112, 15728, 18872, 22648, 27176, 32616, 39136, 46960, 56352, 67624, 81144, 97376, 116848, 140216, 168256, 201904, 242288, 290744, 348896, 418672, 502408, 602888, 723464, 868152, 1041784, 1250136, 1500160, 1800192, 2160232, 2592280, 3110736, 3732880, 4479456, 5375344, 6450408, 7740496, 9288592, 11146312, 13375568, 16050680, 19260816, 23112984, 27735576, 33282688, 39939224, 47927072, 57512488, 69014984 }; /* holds info about system */ static sysconf_t sys_conf = { 0, 1, DEFAULT_TOP_PAD }; /* global heap */ static heap_t global_heap = { 0, NULL, { NULL, NULL, NULL, { 0, 0 }, { 0, 0 }, 0 }, { 0 }, { { 0 }, { 0 }, { 0 }, 0, 0 }, MUTEX_INITIALIZER }; /* counter for the allocated heaps */ static unsigned int heap_id = 0; /* linked list of heaps for threads (global heap is not a part of it) */ static heap_list_t heap_list = { NULL, 0, MUTEX_INITIALIZER }; /* thread specific data */ static heap_t * tsd[ TSD_MAX ]; /* is the heap initialised */ static bool is_initialised = false; /* how much statistics should be printed */ static unsigned int heap_stat_lvl = 0; /* for memory trace */ static bool trace_mem = false; static trace_t trace = { -1, 0, 0, 0, 0 }; /* holds allocations sizes for very small chunks */ static internal_size_t alloc_size[ SMALL_SIZE ]; /***************************************************************** ***************************************************************** ** ** defines for easier access ** ***************************************************************** *****************************************************************/ #define NODE_TO_PTR( n ) ((void *) (((char*) n) + EXTRA_DATA_SIZE)) #define PTR_TO_NODE( p ) ((node_t*) (((char*) p) - EXTRA_DATA_SIZE)) #define NODE_SIZE( n ) ((n)->size & ~0x7) #define IS_USED( n ) ((n)->size & 0x1) #define SET_USED( n ) ((n)->size |= 0x1) #define IS_FREE( n ) (! ((n)->size & 0x1)) #define SET_FREE( n ) ((n)->size &= ~0x1) #define SET_DEALLOC( n ) ((n)->size |= 0x2) #define IS_DEALLOC( n ) ((n)->size & 0x2) #define MAX(x,y) ((x) > (y) ? (x) : (y)) /***************************************************************** ***************************************************************** ** ** forward declarations ** ***************************************************************** *****************************************************************/ /* * initialise/finish malloc */ static void rmalloc_init (); static void rmalloc_finish (); static void init_alloc_sizes (); /* * heap management */ static void init_heap ( heap_t * heap ); static void insert_heap ( heap_t * heap ); static heap_t * get_free_heap ( heap_t * old ); /* * allocation/deallocation */ void * r_malloc ( size_t size ); void * r_calloc ( size_t nmemb, size_t size); void r_free ( void * ptr ); void * r_realloc ( void * ptr, size_t size ); /* * free all chunks */ static void rmalloc_clean (); /* * alternative handling of small allocation sizes */ #ifdef HANDLE_SMALL static node_t * allocate_small ( heap_t * heap, internal_size_t size ); #endif /* * handling of chunks */ static node_t * get_free ( heap_t * heap, internal_size_t size ); static void put_free ( heap_t * heap, node_t * node ); static void store_unused ( void * ptr, internal_size_t size, heap_t * heap ); /* * trace memory allocation */ static void rmalloc_init_trace ( const char * name ); static void rmalloc_trace (); static void rmalloc_finish_trace (); /* * print statistics */ static void rmalloc_stat (); /* * build size-classes */ static void build_size_classes (); /* * translate size into size-class */ static unsigned int size_to_class ( internal_size_t size ) { if ( size <= SMALL_SIZE ) return (size >> 3) - 1; else { unsigned int sclass = SMALL_SIZE >> 3; while ((sclass < NO_OF_CLASSES) && (size_classes[ sclass ] < size)) sclass++; return sclass-1; } } /***************************************************************** ***************************************************************** ** ** allocation of memory from system ** ***************************************************************** *****************************************************************/ /* * allocation from system via sbrk / mmap etc. */ static void * system_alloc ( heap_t * heap, internal_size_t n ) { char * tmp; data_seg_t * data_seg; #if USE_MALLOC == 1 || USE_SBRK == 1 DEFINE_MUTEX( mutex ); #endif if ( n == 0 ) return NULL; assert( heap != NULL ); /* get datasegment of given heap */ data_seg = & heap->data_seg; /* adjust n to be multiple of 8 */ if ( n % 8 != 0 ) n += 8 - (n % 8); /* * check if we have enough free space in heap */ if ( data_seg->local.size - data_seg->local.alloc >= n ) { data_seg->local.alloc += n; data_seg->global.alloc += n; } else { #if USE_MALLOC == 1 /* * use system-malloc to request a large memory-chunk */ /* put left space as chunk into free list of heap */ if ( data_seg->local.size - data_seg->local.alloc ) { store_unused( (void*) data_seg->pos, data_seg->local.size - data_seg->local.alloc, heap ); } data_seg->local.size = n + sys_conf.top_pad; data_seg->local.alloc = n; data_seg->global.alloc += n; /* adjust size to be multiple of pagesize */ if ( data_seg->local.size % sys_conf.pagesize != 0 ) data_seg->local.size += sys_conf.pagesize - (data_seg->local.size % sys_conf.pagesize); data_seg->global.size += data_seg->local.size; /* call malloc in a thread-safe way */ LOCK( mutex ); data_seg->begin = (char*) malloc( data_seg->local.size ); UNLOCK( mutex ); if ( data_seg->begin == (char*) NULL ) { fprintf( stderr, "(rmalloc) system_alloc : malloc failed (%s)\n", strerror( errno ) ); exit( 1 ); } data_seg->pos = data_seg->begin; data_seg->end = data_seg->begin + data_seg->local.size; #elif USE_MMAP == 1 /* * call mmap to request a large memory-chunk * (extending might be fine at later calls, but doesnīt work) */ /* put left space as chunk into free list of heap */ if ( data_seg->local.size - data_seg->local.alloc ) { store_unused( (void*) data_seg->pos, data_seg->local.size - data_seg->local.alloc, heap ); } data_seg->local.size = n + sys_conf.top_pad; data_seg->local.alloc = n; data_seg->global.alloc += n; /* adjust size to be multiple of pagesize */ if ( data_seg->local.size % sys_conf.pagesize != 0 ) data_seg->local.size += sys_conf.pagesize - (data_seg->local.size % sys_conf.pagesize); data_seg->global.size += data_seg->local.size; data_seg->begin = (char*) MMAP( 0, data_seg->local.size, PROT_READ | PROT_WRITE, MAP_PRIVATE ); if ( data_seg->begin == (char*) MAP_FAILED ) { fprintf( stderr, "(rmalloc) system_alloc : mmap failed (%s)\n", strerror( errno ) ); exit( 1 ); } data_seg->pos = data_seg->begin; data_seg->end = data_seg->begin + data_seg->local.size; #elif USE_SBRK == 1 /* lock, so we donīt call sbrk simultaneously */ if ( TRYLOCK( mutex ) == MUTEX_BUSY ) { fprintf( stderr, "(rmalloc) system_alloc : mutex locked\n" ); LOCK( mutex ); } /* * check if first call or someone other has called sbrk */ if ((data_seg->begin == NULL) || (((char*) sbrk(0)) != data_seg->end)) { /* sum up fragmentation */ data_seg->unused += data_seg->local.size - data_seg->local.alloc; /* * allocate new heap */ data_seg->local.size = n + sys_conf.top_pad; data_seg->local.alloc = n; data_seg->global.alloc += n; /* adjust size to be multiple of pagesize */ if ( data_seg->local.size % sys_conf.pagesize != 0 ) data_seg->local.size += sys_conf.pagesize - (data_seg->local.size % sys_conf.pagesize); data_seg->global.size += data_seg->local.size; data_seg->begin = (char*) sbrk( data_seg->local.size ); if ( data_seg->begin == (char*) SBRK_FAIL ) { fprintf( stderr, "(rmalloc) system_alloc : sbrk failed (%s)\n", strerror( errno ) ); exit( 1 ); } /* adjust heap to be aligned by 8 */ if ( ((internal_size_t) data_seg->begin) % 8 != 0 ) { internal_size_t adjust = 8 - (((internal_size_t) data_seg->begin) % 8); data_seg->begin += adjust; if ( sbrk( adjust ) == (void*) SBRK_FAIL ) { fprintf( stderr, "(rmalloc) system_alloc : sbrk failed (%s)\n", strerror( errno ) ); exit( 1 ); } } data_seg->pos = data_seg->begin; data_seg->end = (char*) sbrk(0); } else { /* * extend old heap */ internal_size_t add = n + sys_conf.top_pad; data_seg->local.alloc += n; data_seg->global.alloc += n; /* adjust size to be multiple of pagesize */ if ( add % sys_conf.pagesize != 0 ) add += sys_conf.pagesize - (add % sys_conf.pagesize); data_seg->local.size += add; data_seg->global.size += add; if ( sbrk( add ) == (void*) SBRK_FAIL ) { fprintf( stderr, "(rmalloc) system_alloc : sbrk failed (%s)\n", strerror( errno ) ); exit( 1 ); } data_seg->end = (char*) sbrk(0); } UNLOCK( mutex ); #else fprintf( stderr, "(rmalloc) system_alloc : no heap-allocation method defined (MALLOC,MMAP,SBRK)\n" ); exit( 1 ); #endif } /* * increment heap and return old position */ tmp = data_seg->pos; data_seg->pos += n; return (void *) tmp; } /***************************************************************** ***************************************************************** ** ** heap aka malloc/free routines ** ***************************************************************** *****************************************************************/ /* * init/finish heap-manager */ static void rmalloc_init () { DEFINE_MUTEX( mutex ); if ( IS_LOCKED( mutex ) ) { fprintf( stderr, "(rmalloc) rmalloc_init : mutex locked\n" ); LOCK( mutex ); } if ( ! is_initialised ) { unsigned int i; char * value; heap_t * heap; sys_conf.pagesize = getpagesize(); /* init global heap */ init_heap( & global_heap ); /* init heap for first thread */ LOCK( global_heap.mutex ); heap = (heap_t*) system_alloc( & global_heap, sizeof(heap_t) ); UNLOCK( global_heap.mutex ); heap_list.nheaps++; init_heap( heap ); insert_heap( heap ); /* set up allocation sizes */ init_alloc_sizes(); /* init thread-specific-data */ for ( i = 0; i < TSD_MAX; i++ ) tsd[i] = NULL; /* * shall we build size-classes */ if ((value = getenv( "RMALLOC_BUILD_SIZE" )) != NULL) { if ( strcmp( value, "1" ) == 0 ) { build_size_classes(); exit( 1 ); } } /* * setup memory trace */ value = getenv( "RMALLOC_TRACE" ); if (( value != NULL ) && (value[0] == '1')) { /* get size of trace */ if ((value = getenv( "RMALLOC_TRACE_SIZE" )) != NULL ) { if ( strcmp( value, "all" ) == 0 ) trace.size = 0; else { trace.size = atoi( value ); /* truncate to multiple of 8 not bigger than trace_size */ trace.size = (trace.size / 8) * 8; } } /* get name of tracefile and initialise */ if ((value = getenv( "RMALLOC_TRACE_FILE" )) != NULL ) rmalloc_init_trace( value ); else rmalloc_init_trace( DEFAULT_TRACE_FILE ); } /* * register cleanup-functions */ if ( atexit( rmalloc_finish ) != 0 ) fprintf( stderr, "(rmalloc) init : error in atexit (%s)\n", strerror( errno ) ); if ( atexit( rmalloc_finish_trace ) != 0 ) fprintf( stderr, "(rmalloc) init : error in atexit (%s)\n", strerror( errno ) ); if ((value = getenv( "RMALLOC_STAT" )) != NULL ) { heap_stat_lvl = atoi( value ); if ( heap_stat_lvl > 0 ) { if ( atexit( rmalloc_stat ) != 0 ) fprintf( stderr, "(rmalloc) init : error in atexit (%s)\n", strerror( errno ) ); } } is_initialised = true; } UNLOCK( mutex ); } static void rmalloc_finish () { rmalloc_clean(); } /* * initialise a heap */ static void init_heap ( heap_t * heap ) { unsigned int i; int status; heap->next = NULL; heap->id = heap_id++; heap->data_seg.begin = NULL; heap->data_seg.pos = NULL; heap->data_seg.end = NULL; heap->data_seg.global.size = 0; heap->data_seg.global.alloc = 0; heap->data_seg.local.size = 0; heap->data_seg.local.alloc = 0; heap->data_seg.unused = 0; for ( i = 0; i < NO_OF_CLASSES; i++ ) { heap->free[i] = NULL; heap->stat.alloc[i] = 0; heap->stat.free[i] = 0; heap->stat.used[i] = 0; } heap->stat.used_mem = 0; heap->stat.mem_in_use = 0; if ( heap != & global_heap ) { MUTEX_INIT( heap->mutex, status ); } } /* * inserts new heap into heap-list */ static void insert_heap ( heap_t * heap ) { if ( IS_LOCKED( heap_list.mutex ) ) { fprintf( stderr, "(rmalloc) insert_heap : mutex locked\n" ); LOCK( heap_list.mutex ); } /* prepend into circular list */ if ( heap_list.heaps == NULL ) { heap_list.heaps = heap; heap->next = heap; } else { heap->next = heap_list.heaps->next; heap_list.heaps->next = heap; } heap_list.nheaps++; UNLOCK( heap_list.mutex ); } /* * inserts new heap into heap-list */ static heap_t * get_free_heap ( heap_t * old ) { heap_t * first; heap_t * heap; if ( old == NULL ) heap = first = heap_list.heaps; else { first = old; heap = old->next; } /* * look for free heap */ do { if ( IS_LOCKED( heap->mutex ) ) heap = heap->next; else /* found free heap */ return heap; } while ( heap != first ); /* no free heap found */ return NULL; } /* * setup allocation sizes for very small chunks */ static void init_alloc_sizes () { size_t pagesize = getpagesize(); size_t minsize = sizeof(node_t); unsigned int i, j; /* adjust to be multiple of 8 */ if ( (minsize / 8) * 8 < minsize ) minsize = ((minsize / 8) + 1) * 8; j = 0; for ( i = 8; i <= SMALL_SIZE; i += 8 ) { unsigned int size = i; unsigned int count = 0; unsigned int bsize = 4 * pagesize; unsigned int min = size; /* compute multiple of pagesize with smallest split */ while ( bsize <= 8 * pagesize ) { if ( (bsize - ((bsize / size) * size)) < min ) { min = bsize - ((bsize / size) * size); count = bsize / pagesize; if ( min == 0 ) break; } bsize += pagesize; } bsize = count * pagesize; count = bsize / size; alloc_size[ j++ ] = count; } } /********************************************* * * allocation and deallocation * *********************************************/ void * r_malloc ( size_t size ) { node_t * node = NULL; heap_t * heap = NULL; unsigned long tid; if ( size == 0 ) return NULL; if ( ! is_initialised ) rmalloc_init(); /* adjust size to hold management data */ size = EXTRA_DATA_SIZE + MAX( sizeof(node_t) - EXTRA_DATA_SIZE, size ); /* trim size to be multiple of 8 */ if ( size % 8 != 0 ) size += 8 - (size % 8); /* * get heap to use in this thread and lock it */ tid = THREAD_ID; /* * get heap for this thread */ /* first use last heap used by this thread */ heap = OLD_HEAP( tid ); if ((heap == NULL) || ( IS_LOCKED( heap->mutex ) )) { /* look for free heap or create new one */ if ((heap = get_free_heap( heap )) == NULL) { #ifdef DEBUG fprintf( stderr, "(rmalloc) malloc : building new heap\n" ); #endif LOCK( global_heap.mutex ); heap = (heap_t *) system_alloc( & global_heap, sizeof(heap_t) ); UNLOCK( global_heap.mutex ); init_heap( heap ); /* lock heap BEFORE inserting so we can be sure, no other grabs it */ LOCK( heap->mutex ); insert_heap( heap ); } } assert( heap != NULL ); /* * get free block if available */ node = get_free( heap, size ); /* if not, allocate new block */ if ( node == NULL ) { unsigned int slot = size_to_class( size ); #ifdef HANDLE_SMALL if ( size <= SMALL_SIZE ) { node = allocate_small( heap, size ); } else #endif { node = (node_t*) system_alloc( heap, size ); heap->stat.used_mem += size; node->size = size; node->heap = heap; heap->stat.alloc[ slot ]++; } heap->stat.used[ slot ]++; } assert( node != NULL ); node->heap = heap; heap->stat.mem_in_use += NODE_SIZE( node ); if ( trace_mem ) rmalloc_trace(); /* remember old heap */ tsd[ tid ] = heap; UNLOCK( heap->mutex ); return NODE_TO_PTR( node ); } void * r_calloc ( size_t nmemb, size_t size) { void * p; if ((nmemb == 0) || (size == 0)) return NULL; p = r_malloc( nmemb * size ); /* memset to zero */ if ( p != NULL ) memset( p, 0, nmemb * size ); return p; } void r_free ( void * ptr ) { node_t * node; internal_size_t size; heap_t * heap = NULL; /* * our own management */ if ( ptr == NULL ) return; /* should at least have called malloc for this ptr, if not : ERROR */ if ( ! is_initialised ) { fprintf( stderr, "(rmalloc) free : not initialised\n" ); exit( 1 ); } node = PTR_TO_NODE( ptr ); size = NODE_SIZE( node ); #if 1 /* * put chunk back into itīs own heap */ heap = (heap_t*) node->heap; /* * check if heap does exist */ #if CHECK_HEAP == 1 if ( ((unsigned long) heap) < 0xff ) { #ifdef DEBUG fprintf( stderr, "(rmalloc) free : node hase unknown heap: node = %p, heap = %p\n", node, heap ); #endif return; } else { bool found_heap = false; if ( heap == & global_heap ) found_heap = true; else { heap_t * tmp; if ((tmp = heap_list.heaps) != NULL) { do { if ( tmp == heap ) { found_heap = true; break; } tmp = tmp->next; } while ( tmp != heap_list.heaps ); } } if ( ! found_heap ) { #ifdef DEBUG fprintf( stderr, "(rmalloc) free : node hase unknown heap: node = %p, heap = %p\n", node, heap ); #endif return; } } #endif LOCK( heap->mutex ); #else { /* * try "local" heap first */ unsigned long tid = THREAD_ID; heap = OLD_HEAP( tid ); if ( IS_LOCKED( heap->mutex ) ) { /* * now try heap of chunk */ heap = (heap_t*) node->heap; LOCK( heap->mutex ); } else /* "reheap" the chunk */ node->heap = heap; } #endif /* move node into free area */ put_free( heap, node ); SET_FREE(node); heap->stat.mem_in_use -= size; if ( trace_mem ) rmalloc_trace(); UNLOCK( heap->mutex ); } void * r_realloc ( void * ptr, size_t size ) { if ( ptr == NULL ) return r_malloc( size ); else { node_t * node = PTR_TO_NODE( ptr ); void * newptr = NULL; /* if new size < old size do nothing */ if ( NODE_SIZE(node) >= size ) return ptr; /* allocate new chunk, copy and free old */ newptr = r_malloc( size ); memcpy( newptr, ptr, NODE_SIZE(node) ); r_free( ptr ); return newptr; } } /* * allocate big chunk of very small blocks */ #ifdef HANDLE_SMALL static node_t * allocate_small ( heap_t * heap, internal_size_t size ) { unsigned int slot = (size >> 3) - 1; node_t * p; node_t * node; node_t * last = NULL; internal_size_t i; assert( heap != NULL ); /* * allocate more than one node */ if ((p = (node_t*) system_alloc( heap, alloc_size[ slot ] * size )) == NULL ) { fprintf( stderr, "(rmalloc) allocate_very_small : no memory available\n" ); exit( 1 ); } heap->stat.used_mem += size * alloc_size[ slot ]; heap->stat.alloc[ slot ] += alloc_size[ slot ]; heap->stat.free[ slot ] += alloc_size[ slot ]; /* * initialise nodes in memory chunk and * put them into free list */ #define NODE( i ) ((node_t*) (((char*) p) + ((i) * size))) node = p; for ( i = 0; i < alloc_size[ slot ]; i++ ) { /* * set size */ node = NODE( i ); node->size = size; node->heap = heap; /* first block mark as being deallocatable */ if ( i == 0 ) SET_DEALLOC( node ); /* set link in list */ if ( last != NULL ) last->next = node; node->next = NULL; node->prev = last; last = node; } /* * put list into free list of nodes * - method is only called if no free node * available, so we can set directly */ heap->free[ slot ] = p; /* * return node from free-list */ return get_free( heap, size ); } #endif /* * really free all chunks */ static void rmalloc_clean () { /* * clean will be called at the end and then * the memory will be deallocated by the system * so why bother ??? */ return; } /************************************* * * management * *************************************/ /* * return node of suitable size */ static node_t * get_free ( heap_t * heap, internal_size_t size ) { /* * search list for best-fit */ node_t * node; node_t * fit = NULL; unsigned int slot = size_to_class( size ); assert( heap != NULL ); if ((node = heap->free[ slot ]) == NULL) return NULL; /* * look for best fit */ while ( node != NULL ) { if ( NODE_SIZE(node) >= size ) /* "==" - best fit; ">=" - first fit */ { /* * remove node from list */ if ( node->prev != NULL ) node->prev->next = node->next; else heap->free[ slot ] = node->next; if ( node->next != NULL ) node->next->prev = node->prev; heap->stat.free[ slot ]--; heap->stat.used[ slot ]++; return node; } else if ( NODE_SIZE(node) > size ) { if (fit != NULL) { if (NODE_SIZE(node) < NODE_SIZE(fit)) fit = node; } else fit = node; } node = node->next; } if ( fit != NULL ) { if ( fit->prev != NULL ) fit->prev->next = fit->next; else heap->free[ slot ] = fit->next; if ( fit->next != NULL ) fit->next->prev = fit->prev; heap->stat.free[ slot ]--; heap->stat.used[ slot ]++; return fit; } return NULL; } /* * insert big node into internal datastructure */ static void put_free ( heap_t * heap, node_t * node ) { unsigned int slot; node_t * first; assert((heap != NULL) && (node != NULL)); slot = size_to_class( NODE_SIZE(node) ); /* * prepend node */ first = heap->free[ slot ]; if (first == NULL) { heap->free[ slot ] = node; node->next = NULL; } else { heap->free[ slot ] = node; node->next = first; first->prev = node; } node->prev = NULL; heap->stat.free[ slot ]++; heap->stat.used[ slot ]--; } /* * insert unused end of data-segment as free chunk */ static void store_unused ( void * ptr, internal_size_t size, heap_t * heap ) { node_t * node = (node_t*) ptr; /* check if we have enough space for a node plus data */ if ( size > (EXTRA_DATA_SIZE + 8) ) { unsigned int slot; size -= EXTRA_DATA_SIZE; slot = size_to_class( size ); /* set node-size */ node->size = size; node->heap = heap; heap->stat.used_mem += size; heap->stat.alloc[ slot ]++; } } /* * set malloc options */ int r_mallopt ( int param, int val ) { if ( param == M_TOP_PAD ) { if ( val < 0 ) val = 0; sys_conf.top_pad = val; return 0; } return 0; } /* * report memory usage */ struct mallinfo r_mallinfo ( void ) { struct mallinfo mi; internal_size_t total_size; internal_size_t used_size; internal_size_t free_size; heap_t * heap; if ( IS_LOCKED( global_heap.mutex ) ) { fprintf( stderr, "(rmalloc) mallinfo : global heap locked\n" ); LOCK( global_heap.mutex ); } total_size = global_heap.data_seg.global.size; used_size = global_heap.stat.mem_in_use; free_size = global_heap.stat.used_mem - global_heap.stat.mem_in_use; UNLOCK( global_heap.mutex ); if ( IS_LOCKED( heap_list.mutex ) ) { fprintf( stderr, "(rmalloc) mallinfo : heap list locked\n" ); LOCK( heap_list.mutex ); } if ((heap = heap_list.heaps) != NULL) { do { if ( IS_LOCKED( heap->mutex ) ) { fprintf( stderr, "(rmalloc) mallinfo : heap locked\n" ); LOCK( heap->mutex ); } total_size += heap->data_seg.global.size; used_size += heap->stat.mem_in_use; free_size += heap->stat.used_mem - heap->stat.mem_in_use; UNLOCK( heap->mutex ); heap = heap->next; } while ( heap != heap_list.heaps ); } UNLOCK( heap_list.mutex ); mi.arena = total_size; /* total space allocated from system */ mi.ordblks = 0; /* number of non-inuse chunks */ mi.smblks = 0; /* unused -- always zero */ mi.hblks = 0; /* number of mmapped regions */ mi.hblkhd = 0; /* total space in mmapped regions */ mi.usmblks = 0; /* unused -- always zero */ mi.fsmblks = 0; /* unused -- always zero */ mi.uordblks = used_size; /* total allocated space */ mi.fordblks = free_size; /* total non-inuse space */ mi.keepcost = 0; /* top-most, releasable (via malloc_trim) space */ return mi; } /* * allocation trace */ static void rmalloc_init_trace ( const char * name ) { assert( name != NULL ); if ((trace.fd = open( name, O_WRONLY | O_CREAT | O_TRUNC )) == -1) { fprintf( stderr, "(rmalloc) init_trace : could not open \"%s\" (%s)\n", name, strerror( errno ) ); return; } if (fchmod( trace.fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) == -1) fprintf( stderr, "(rmalloc) init_trace : could not chmod \"%s\" (%s)\n", name, strerror( errno ) ); trace.step = 0; trace.old_used = 0; trace.old_free = 0; trace_mem = true; } static void rmalloc_trace () { internal_size_t used, free; if ( ! trace_mem ) return; if ( trace.size == 0 ) { used = global_heap.stat.mem_in_use / 1024; free = (global_heap.stat.used_mem - global_heap.stat.mem_in_use) / 1024; } else { used = global_heap.stat.used[ trace.size >> 3 ] * trace.size; free = global_heap.stat.free[ trace.size >> 3 ] * trace.size; } if (((used > 0) || (free > 0)) && ((used != trace.old_used) || (free != trace.old_free))) { char buffer[256]; sprintf( buffer, "%ld %ld %ld\n", trace.step, used, free ); write( trace.fd, buffer, strlen(buffer) ); trace.old_used = used; trace.old_free = free; } trace.step++; } static void rmalloc_finish_trace () { if ( trace_mem ) { close( trace.fd ); trace_mem = false; } } /* * build size-classes */ static void build_size_classes () { unsigned int max = 100; /* maximal number of classes defined by power */ double basis = 1.2; /* basis for the power */ unsigned long small = 128; /* definition of small size */ unsigned long i; unsigned long val = 0; double dval = 1; unsigned long old = 0; unsigned long count = 0; fprintf( stderr, "static internal_size_t size_classes[ NO_OF_CLASSES ] = { \n" ); for ( i = 8; i <= small; i += 8 ) { count++; fprintf( stderr, "%ld, ", i ); } i = 0; while ( dval <= ((double)(small+8)) ) { dval *= basis; i++; } for ( ; i < max; i++ ) { if ( dval > ((double) (unsigned long) -1 ) ) { fprintf( stderr, "value too high\n" ); exit( 1 ); } val = (unsigned long) dval; /* round value to the next multiple of 8 */ val += 8 - (val % 8); if ( old != val ) { count++; fprintf( stderr, "%ld", val ); if ( i < (max-1) ) fprintf( stderr, ", " ); old = val; } dval *= basis; } fprintf( stderr, "\n };\n" ); fprintf( stderr, "#define NO_OF_CLASSES %ld\n", count ); fprintf( stderr, "#define SMALL_SIZE %ld\n", small ); } /* * print statistics */ static void print_heap_stat ( heap_t * heap ) { internal_size_t i; internal_size_t nblocks = 0; internal_size_t sblocks = 0; if ( heap_stat_lvl <= 1 ) return; /* * statistic for small-usage */ if ( heap_stat_lvl >= 3 ) { fprintf( stderr, " sizeclass | # alloc | # free | kB | comment \n" ); fprintf( stderr, "-----------+------------+------------+------------+---------------\n" ); for ( i = 0; i < NO_OF_CLASSES; i++ ) { if (heap->stat.alloc[i] > 0) { fprintf( stderr, "%10ld | %10ld | %10ld | %10ld |", size_classes[ i ], heap->stat.alloc[i], heap->stat.free[i], (heap->stat.free[i] * size_classes[ i ]) / 1024 ); if ( heap->stat.alloc[i] > heap->stat.free[i] ) fprintf( stderr, " %ld chunk(s) alive", heap->stat.alloc[i] - heap->stat.free[i] ); if ( heap->stat.alloc[i] < heap->stat.free[i] ) fprintf( stderr, " ERROR (too many freed)" ); fprintf( stderr, "\n" ); } } } /* * count memory */ for ( i = 0; i < NO_OF_CLASSES; i++ ) { node_t * node = heap->free[i]; while ( node != NULL ) { sblocks += NODE_SIZE(node); nblocks++; node = node->next; } } /* * output */ #define BYTE_TO_MB( n ) (((double)(n)) / (1000.0 * 1000.0)) if ((heap->stat.mem_in_use != 0) || (heap->stat.used_mem != 0)) fprintf( stderr, " mem in use / allocated = %.2f MB / %.2f MB\n", BYTE_TO_MB(heap->stat.mem_in_use), BYTE_TO_MB(heap->stat.used_mem) ); if (nblocks != 0) { fprintf( stderr, " no / size of free chunks = %ld / %.2f MB\n", (unsigned long) nblocks, BYTE_TO_MB(sblocks) ); } if ( heap->data_seg.unused != 0 ) fprintf( stderr, " fragmentation size = %.2f MB\n", BYTE_TO_MB(heap->data_seg.unused) ); } static void rmalloc_stat () { heap_t * heap; if ( heap_stat_lvl == 0 ) return; if ( ! is_initialised ) { fprintf( stderr, "(rmalloc) stat : heap not initialised\n" ); return; } /* * print statstics about each heap */ if ( heap_stat_lvl >= 2 ) { fprintf( stderr, "(rmalloc) global heap :\n" ); print_heap_stat( & global_heap ); if ((heap = heap_list.heaps) != NULL) { do { fprintf( stderr, "(rmalloc) heap %ld :\n", (long) heap->id ); print_heap_stat( heap ); heap = heap->next; } while ( heap != heap_list.heaps ); } } if ( heap_stat_lvl >= 1 ) { internal_size_t total_size = global_heap.data_seg.global.size; internal_size_t frag_size = global_heap.data_seg.unused; if ((heap = heap_list.heaps) != NULL) { do { total_size += heap->data_seg.global.size; frag_size += heap->data_seg.unused; heap = heap->next; } while ( heap != heap_list.heaps ); } fprintf( stderr, "(rmalloc) global stat:\n" ); if ( total_size > 0 ) fprintf( stderr, " allocated from system = %.2f MB\n", BYTE_TO_MB(total_size) ); if ( frag_size > 0 ) fprintf( stderr, " total fragmentation = %.2f MB\n", BYTE_TO_MB(frag_size) ); } } /* * overload global new/delete */ #if defined(__cplusplus) && OVERLOAD_NEW == 1 #include void * operator new ( size_t n ) throw (std::bad_alloc) { return r_malloc( n ); } void * operator new[] ( size_t n ) throw (std::bad_alloc) { return r_malloc( n ); } void operator delete ( void * p ) throw () { r_free( p ); } void operator delete[] ( void * p ) throw () { r_free( p ); } #endif /* defined(__cplusplus) && OVERLOAD_NEW == 1 */