#include "DS_Allocator.h" #ifdef MEMORY_HEAP_TRACK_FILE_AND_LINE #include #endif using namespace DataStructures; #include #include // #define ENABLE_MEMORY_HEAP_UNIT_TEST #ifdef ENABLE_MEMORY_HEAP_UNIT_TEST // For the UnitTest #include "GetTime.h" #include #include #include "SimpleMutex.h" #include "Rand.h" void Allocator::UnitTest(DataStructures::Allocator *initializedHeap) { char *c[10000]; c[0]=(char*) initializedHeap->Malloc(5000); initializedHeap->Free(c[0]); assert(initializedHeap->GetTotalBytesUsed()==0); c[0]=(char*) initializedHeap->Malloc(5000); c[1]=(char*) initializedHeap->Malloc(5000); initializedHeap->Free(c[0]); initializedHeap->Free(c[1]); assert(initializedHeap->GetTotalBytesUsed()==0); c[0]=(char*) initializedHeap->Malloc(5000); c[1]=(char*) initializedHeap->Malloc(5000); c[2]=(char*) initializedHeap->Malloc(5000); uint64_t totalBytesUsed = initializedHeap->GetTotalBytesUsed(); uint64_t maxBytesUsed = initializedHeap->GetMaxBytesUsed(); initializedHeap->Free(c[0]); initializedHeap->Free(c[1]); initializedHeap->Free(c[2]); assert(initializedHeap->GetTotalBytesUsed()==0); for (int i=1; i < 3457; i++) { c[i] = (char*) initializedHeap->Malloc(i); if (c[i]) { for (int j=0; j < i; j++) c[i][j]=(char) i; } } for (int i=1; i < 3457; i++) { for (int j=0; j < i; j++) { if (c[i]) assert((char) c[i][j]==(char) i); } } totalBytesUsed = initializedHeap->GetTotalBytesUsed(); maxBytesUsed = initializedHeap->GetMaxBytesUsed(); for (int i=1; i < 3457; i++) { initializedHeap->Free(c[i]); } assert(initializedHeap->GetTotalBytesUsed()==0); for (int i=1; i < 3457; i++) { c[i] = (char*) initializedHeap->Malloc(i); if (c[i]) { for (int j=0; j < i; j++) c[i][j]=(char) i; } } assert(initializedHeap->GetTotalBytesUsed()==totalBytesUsed); assert(initializedHeap->GetMaxBytesUsed()==maxBytesUsed); for (int i=3457-1; i > 0; i--) { initializedHeap->Free(c[i]); } assert(initializedHeap->GetTotalBytesUsed()==0); // Speed comparison test SimpleMutex sm; RakNetTimeUS t1,t2,t3; t1=RakNet::GetTimeUS(); t1=RakNet::GetTimeUS(); for (int i=1; i < 10000; i++) { sm.Lock(); c[i] = (char*) initializedHeap->Malloc(randomMT()%i+1); sm.Unlock(); } for (int i=1; i < 10000; i++) { sm.Lock(); initializedHeap->Free(c[i]); sm.Unlock(); } t2=RakNet::GetTimeUS(); for (int i=1; i < 10000; i++) { c[i] = (char*) malloc(randomMT()%i+1); } for (int i=1; i < 10000; i++) { free(c[i]); } t3=RakNet::GetTimeUS(); printf("Full alloc, then dealloc\n"); printf("Heap = %i us\n", t2-t1); printf("malloc = %i us\n", t3-t2); for (int i=1; i < 100; i++) { c[i] = 0; } int rand; t1=RakNet::GetTimeUS(); for (int i=1; i < 100000; i++) { rand=randomMT()%100; sm.Lock(); if (c[rand]) c[rand] = (char*) initializedHeap->Malloc(3000); else initializedHeap->Free(c[rand]); sm.Unlock(); } t2=RakNet::GetTimeUS(); for (int i=1; i < 100; i++) { c[i] = 0; } for (int i=1; i < 100000; i++) { rand=randomMT()%100; if (c[rand]) c[rand] = (char*) malloc(3000); else free(c[rand]); } t3=RakNet::GetTimeUS(); printf("Fragmentation\n"); printf("Heap = %i us\n", t2-t1); printf("malloc = %i us\n", t3-t2); } #endif Allocator::Allocator() { ResetVariables(); } Allocator::~Allocator() { } uint64_t Allocator::GetStartingAlignmentOffset(char *startingBlock) { uint64_t offsetToStartingBlock = (((uint64_t)startingBlock)%ALIGNMENT_BOUNDARY_IN_BYTES); while (offsetToStartingBlock < sizeof(DataPrefix)) offsetToStartingBlock+=ALIGNMENT_BOUNDARY_IN_BYTES; offsetToStartingBlock-=sizeof(DataPrefix); return offsetToStartingBlock; } bool Allocator::Init(char *startingBlock, uint64_t availableBytes ) { uint64_t offsetToStartingBlock = GetStartingAlignmentOffset(startingBlock); if (offsetToStartingBlock>=availableBytes) return false; ResetVariables(); // Find nearest aligned pointer to the starting block to use as the start of the clusters memoryStart=startingBlock+offsetToStartingBlock; availableBytes-=offsetToStartingBlock; // How many total clusters we can have (rounds down) totalClusters=availableBytes/CLUSTER_SIZE; variableClusterStartAddress=memoryStart; unsigned int i; for (i=0; i < NUM_TYPES_OF_BUCKETS; i++) { actualBucketSizes[i]=(SMALLEST_BUCKET<Init(); dp->dataBytes=memoryRequired-sizeof(DataPrefix); assert((uint64_t)((char*) dp + sizeof(DataPrefix)) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); assert((uint64_t)((char*) dp + sizeof(DataPrefix)*2 + dp->dataBytes) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); if (availableFixedBucketList[bucketType]==0) { availableFixedBucketList[bucketType]=dp; } else { availableFixedBucketList[bucketType]->immediatePrev=dp; dp->nextFree=availableFixedBucketList[bucketType]; dp->immediatePrev=0; availableFixedBucketList[bucketType]=dp; } variableClusterStartAddress+=memoryRequired; memoryUsed+=memoryRequired; // Allocate smaller buckets linearly more frequently if (++bucketTypeCount==NUM_TYPES_OF_BUCKETS-bucketType) { bucketType=(bucketType+1)%NUM_TYPES_OF_BUCKETS; bucketTypeCount=0; } } dp = (DataPrefix*)variableClusterStartAddress; dp->Init(); availableBytes-=memoryUsed; dp->dataBytes=(unsigned int)(availableBytes - (availableBytes % ALIGNMENT_BOUNDARY_IN_BYTES) - sizeof(DataPrefix)); assert((uint64_t)((char*) dp + sizeof(DataPrefix)) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); dp->nextFree=0; dp->immediatePrev=0; freeList=dp; end = dp->GetImmediateNext(); return true; } unsigned int Allocator::GetBucketIndex(uint64_t size) const { unsigned int clusterIndex=0; for (clusterIndex=0; clusterIndex < NUM_TYPES_OF_BUCKETS; clusterIndex++) { if (size+sizeof(DataPrefix) <= actualBucketSizes[clusterIndex]) break; } return clusterIndex; } void *Allocator::Malloc(uint64_t size) { if (size<=0) return 0; DataPrefix *dp; unsigned int clusterIndex=GetBucketIndex(size); if (clusterIndex!=NUM_TYPES_OF_BUCKETS && availableFixedBucketList[clusterIndex]) { dp = availableFixedBucketList[clusterIndex]; // No need to adjust immediatePrev pointer for buckets, always pushed / pulled off the front availableFixedBucketList[clusterIndex]=dp->nextFree; AddToTotalBytesUsed(actualBucketSizes[clusterIndex]); assert(((uint64_t)dp + sizeof(DataPrefix)) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); assert(((uint64_t)dp->GetImmediateNext() + sizeof(DataPrefix)) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); dp->isAllocated=true; return (char*)dp + sizeof(DataPrefix); } // Get from the free list a cluster large enough to hold what we want return AllocFromFreeList(size); } void *Allocator::Malloc(uint64_t size, const char *file, unsigned int line) { char *v = (char*) Malloc(size); WriteFileAndLineToUserBlock(v, file, line); return (void*) v; } void *Allocator::Realloc(void *pointer, uint64_t size) { if (pointer==0) return Malloc(size); DataPrefix *cur = PrefixFromUserBlock(pointer); uint64_t amountToCopy = cur->dataBytes > size ? size : cur->dataBytes; unsigned int clusterIndex=GetBucketIndex(size); // If could be in a cluster, but is currently not, move it there if (clusterIndex= variableClusterStartAddress && availableFixedBucketList[clusterIndex]) { char *newBlock = (char*) Malloc(size); memcpy(newBlock, pointer, (size_t) amountToCopy); Free(pointer); return newBlock; } if (size == cur->dataBytes) return pointer; if ((char*) cur >= variableClusterStartAddress) { unsigned int clusterSizeRequired = (unsigned int) size + sizeof(DataPrefix); unsigned int alignedClusterSizeRequired = clusterSizeRequired + (unsigned int) GetBytesToAddToBaseToAlignTo(clusterSizeRequired, ALIGNMENT_BOUNDARY_IN_BYTES); unsigned int requiredDataBytesInBlock=alignedClusterSizeRequired - sizeof(DataPrefix); // Regular cluster, not a bucket if (requiredDataBytesInBlock < cur->dataBytes) { if (size >= cur->dataBytes-REALLOC_WASTE_THRESHHOLD) return pointer; // Shrink current block, return excess to freeList DataPrefix *excess = (DataPrefix*)((unsigned char*) cur + alignedClusterSizeRequired); excess->Init(cur->nextFree, cur, (unsigned int) alignedClusterSizeRequired); cur->nextFree=excess; } else { // Expand current block. Must reallocate } } else { // Already in a bucket } char *newBlock = (char*) Malloc(size); memcpy(newBlock, pointer, (size_t) amountToCopy); Free(pointer); return newBlock; } void *Allocator::Realloc(void *pointer, uint64_t size, const char *file, unsigned int line) { char *v = (char*) Realloc(pointer,size); WriteFileAndLineToUserBlock(v, file, line); return (void*) v; } void Allocator::Free(void *pointer) { if (pointer==0) return; DataPrefix *immediateLeft, *immediateRight; DataPrefix *cur = PrefixFromUserBlock(pointer); assert(cur->isAllocated); assert(cur <= end); assert((char*) cur >= memoryStart); if ((char*) cur < variableClusterStartAddress) { // This is a block unsigned int clusterIndex=GetBucketIndex(cur->dataBytes); assert(clusterIndex < NUM_TYPES_OF_BUCKETS); cur->nextFree=availableFixedBucketList[clusterIndex]; availableFixedBucketList[clusterIndex]=cur; SubtractFromTotalBytesUsed(cur->dataBytes+sizeof(DataPrefix)); } else { // This is a cluster immediateLeft=cur->GetImmediatePrev(); immediateRight=cur->GetImmediateNext(); if (immediateRight==end) immediateRight=0; if (immediateLeft && immediateLeft->isAllocated==true) immediateLeft=0; if (immediateRight && immediateRight->isAllocated==true) immediateRight=0; if (immediateLeft && immediateRight) { // Join left and right // Right->next->prev=left if (immediateRight->GetImmediateNext()!=end) immediateRight->GetImmediateNext()->immediatePrev=immediateLeft; // Skip right immediateLeft->nextFree=immediateRight->nextFree; if (freeList==immediateRight) freeList=immediateLeft; // Expand immediateLeft->dataBytes+=cur->dataBytes+immediateRight->dataBytes+sizeof(DataPrefix)*2; SubtractFromTotalBytesUsed(cur->dataBytes+sizeof(DataPrefix)); } else if (immediateLeft) { // Extend left right // Adjust next free block's previous pointer to self // left->next->prev=left if (cur->GetImmediateNext()!=end) cur->GetImmediateNext()->immediatePrev=immediateLeft; // Expand immediateLeft->dataBytes+=cur->dataBytes+sizeof(DataPrefix); SubtractFromTotalBytesUsed(cur->dataBytes+sizeof(DataPrefix)); } else if (immediateRight) { // Extend right left // Right->next->prev = extended if (immediateRight->GetImmediateNext()!=end) immediateRight->GetImmediateNext()->immediatePrev=cur; // Right->prev->next = extended if (immediateRight->GetImmediatePrev()) immediateRight->GetImmediatePrev()->nextFree=cur; if (freeList==immediateRight) freeList=cur; cur->nextFree=immediateRight->nextFree; // Expand SubtractFromTotalBytesUsed(cur->dataBytes+sizeof(DataPrefix)); cur->dataBytes+=immediateRight->dataBytes+sizeof(DataPrefix); } else { // island // Add to head of list cur->immediatePrev=0; cur->nextFree=freeList; if (freeList) freeList->immediatePrev=cur; freeList=cur; SubtractFromTotalBytesUsed(cur->dataBytes+sizeof(DataPrefix)); } } // Not necessary, but safer to validate against double deletion cur->isAllocated=false; } const char* Allocator::GetFile(char *pointer) const { #if defined(MEMORY_HEAP_TRACK_FILE_AND_LINE) Allocator::DataPrefix *prefix = PrefixFromUserBlock(pointer); return prefix->file; #else return 0; #endif } unsigned short Allocator::GetLine(char *pointer) const { #if defined(MEMORY_HEAP_TRACK_FILE_AND_LINE) Allocator::DataPrefix *prefix = PrefixFromUserBlock(pointer); return prefix->line; #else return 0; #endif } uint64_t Allocator::GetTotalClusterCount(void) const { return totalClusters; } void Allocator::SprintfFragmentation(char *out, int wrapAtColumn) const { // TODO } void Allocator::PrintfFragmentation(void) const { // TODO } Allocator::DataPrefix* Allocator::PrefixFromUserBlock(void *v) const { return (Allocator::DataPrefix*) ((char*) v - sizeof(Allocator::DataPrefix)); } void Allocator::WriteFileAndLineToUserBlock(char *v, const char *file, unsigned short line) { #if defined(MEMORY_HEAP_TRACK_FILE_AND_LINE) if (v==0) return; Allocator::DataPrefix *prefix = PrefixFromUserBlock(v); prefix->line=line; int len = (int) strlen(file); int offset; if (len >= sizeof(prefix->file)) offset = len - sizeof(prefix->file) + 1; else offset=0; strcpy(prefix->file, file+offset); #endif } uint64_t Allocator::GetTotalBytesUsed(void) const { return totalBytesUsed; } uint64_t Allocator::GetMaxBytesUsed(void) const { return maxBytesUsed; } void Allocator::ResetVariables(void) { unsigned int i; for (i=0; i < NUM_TYPES_OF_BUCKETS; i++) { availableFixedBucketList[i]=0; } freeList=0; totalClusters=0; variableClusterStartAddress=0; totalBytesUsed=0; maxBytesUsed=0; end=0; memoryStart=0; } char *Allocator::AllocFromFreeList(uint64_t size) { if (size==0) return 0; DataPrefix *cur = freeList; DataPrefix *last=0; while (cur!=0) { unsigned int clusterSizeRequired = (unsigned int) size + sizeof(DataPrefix); unsigned int alignedClusterSizeRequired = clusterSizeRequired + (unsigned int) GetBytesToAddToBaseToAlignTo(clusterSizeRequired, ALIGNMENT_BOUNDARY_IN_BYTES); unsigned int requiredDataBytesInBlock=alignedClusterSizeRequired - sizeof(DataPrefix); if (cur->dataBytes>=requiredDataBytesInBlock) { if (requiredDataBytesInBlock < cur->dataBytes) { // Break off the excess, reinsert at same spot DataPrefix *excess = (DataPrefix*)((unsigned char*) cur + alignedClusterSizeRequired); excess->Init(cur->nextFree, cur, (unsigned int) cur->dataBytes-alignedClusterSizeRequired); if (last) last->nextFree=excess; else freeList=excess; if (cur->nextFree) cur->nextFree->immediatePrev=excess; cur->nextFree=excess; assert(((uint64_t)excess + sizeof(DataPrefix)) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); assert(((uint64_t)excess->GetImmediateNext() + sizeof(DataPrefix)) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); assert((excess->dataBytes+sizeof(DataPrefix))%ALIGNMENT_BOUNDARY_IN_BYTES==0); } else { // Point prior to nextFree if (last) last->nextFree=cur->nextFree; else freeList=cur->nextFree; } AddToTotalBytesUsed(alignedClusterSizeRequired); assert(((uint64_t)cur + sizeof(DataPrefix)) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); assert(((uint64_t)cur->GetImmediateNext() + sizeof(DataPrefix)) % ALIGNMENT_BOUNDARY_IN_BYTES == 0); assert((cur->dataBytes+sizeof(DataPrefix))%ALIGNMENT_BOUNDARY_IN_BYTES==0); cur->isAllocated=true; cur->dataBytes=requiredDataBytesInBlock; return (char*)cur + sizeof(DataPrefix); } last=cur; cur=cur->nextFree; } return 0; } void Allocator::AddToTotalBytesUsed(uint64_t size) { totalBytesUsed+=size; if (totalBytesUsed>maxBytesUsed) maxBytesUsed=totalBytesUsed; } void Allocator::SubtractFromTotalBytesUsed(uint64_t size) { assert(totalBytesUsed>=size); totalBytesUsed-=size; } uint64_t Allocator::GetBytesToAddToBaseToAlignTo(uint64_t base, uint64_t boundary) { uint64_t mod = base % boundary; if (mod==0) return 0; return boundary - mod; }