Design Intro

  • Implement a function my_malloc()
  • When calling, divide a part memory from pool and return the pointer
  • If there are no more free memory in pool(For instance, at start, pool is empty), acquire memory with mmap()
  • Recycle and merge free memory when calling free()

Implementation

Const Values and header files

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#include <assert.h>
#include <stddef.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>

// Word alignment
const size_t kAlignment = sizeof(size_t);
// Minimum allocation size (1 word)
const size_t kMinAllocationSize = kAlignment;
// Maximum allocation size (16 MB)
extern const size_t kMaxAllocationSize;
// Arena size is 4 MB
const size_t ARENA_SIZE = (4ull << 20);

const size_t kDataAlignment = 1 << 8; // 256 Bytes For data align, must be 2^n
const size_t kDataAlignmentMask = kDataAlignment - 1;
// Maximum allocation size (16 MB) (user limit not program limit).
const size_t kMaxAllocationSize = (16ull << 20);

const size_t kMemorySize = ARENA_SIZE; // size to mmap once

const size_t WORD_LENGTH = sizeof(size_t);

size_t memory_size = 0; // mark how many we have allocated

Data Structure

In the pool, there are several blocks, each block representing a part of memory.

All blocks are linked as a linked list

1
2
3
4
5
6
7
8
9
10
11
typedef struct Block
{
size_t occupied;
struct Block *nxt;
struct Block *nxt_mmap;
size_t data_length;
void *data_ptr;
// 64 bytes for Block Meta data, on 64-bit machine
// block address is write before data_ptr sizeof(size_t)
} Block;
const size_t kBlockMetadataSize = sizeof(Block) + WORD_LENGTH + kDataAlignmentMask & ~kDataAlignmentMask;

Member Values:

In a Block there are two parts:

  • Block meta data
  • padding area for align address
  • user’s data

integer occupied is a boolean(0/1), indicating if these block is in using or freed
pointer *nxt is the next block
pointer *nxt_mmap is the block which is the header of next mmap part
integer data_length is user’s required data length
pointer *data_ptr is the begin address of user’s data

Block Composition

A Block structure need 5 Words(5*8=40 bytes on 64 bit machine) for meta data, after that, there are padding and then data field.

Data field will align at least 64 bytes, so there are 24 bytes padding.
地址至少以64bytes对齐,对齐后还剩24bytes padding,够放3个int,最后一个int(也就是data_ptr - 1)存Block的地址,用于从帮助data ptr直接快速找到对应的block ptr

1
2
|------------|-----------|---------|------------
|Block meta | Padding |Block ptr|Data ptr

Utility Functions

calculate aligned address by round up/down pointer with MASK

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
size_t round_up(size_t x)
{
// malloc target_size, but give larger space for alignment
// if malloc 200, align 64, then get (200 + 64) & -64 = 256
return x + kDataAlignmentMask & ~kDataAlignmentMask;
}
size_t round_down(size_t x)
{
return x & ~kDataAlignmentMask;
}
void *get_data_ptr(void *block)
{
/**
* read-only function
* just calculate a data ptr for block
*/
size_t block_addr = (size_t)block;
size_t data_addr = round_up(block_addr + kBlockMetadataSize);
return (void *)data_addr;
}
size_t get_block_size(Block *block)
{
return (size_t)block->data_ptr - (size_t)block + block->data_length;
}

Initialize data pointer in the block, write the address of block before data_ptr(data_ptr[-1])

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
void *init_data_ptr(Block *block)
{
/**
* get a aligned address after:
* - block itself: sizeof(Block)
* - block's address: sizeof(size_t)
*/
void *data_ptr = get_data_ptr(block);
((size_t *)data_ptr)[-1] = (size_t)block;
return data_ptr;
}
Block *init_block(Block *block, size_t size)
{
/**
* block: ptr of block to be init
* size: all of the block except block's metadata, including data align padding, data field
*/
block->occupied = 0;
block->data_ptr = init_data_ptr(block);

block->data_length = (size + kBlockMetadataSize) - ((size_t)block->data_ptr - (size_t)block);

block->nxt = NULL;
block->nxt_mmap = NULL;
return block;
}
Block *get_block_ptr(void *ptr)
{
return (Block *)(((size_t *)ptr)[-1]);
}

Memory Management

By mmap() function, acquire a large size of memory from OS

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
typedef unsigned long long ul;

/// Acquire more memory from OS
static Block *acquire_memory()
{
// Acquire one more chunk from OS
if (memory_size + kMemorySize >= 2 * kMaxAllocationSize)
return NULL;
Block *block = mmap(NULL, kMemorySize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
memory_size += kMemorySize;
// Initialize block metadata
block = init_block(block, kMemorySize - kDataAlignment);
return block;
}

Allocate

Main logic is in the my_malloc function:

A Start Block pointer in heap values, as the header of linked list

  • 首先判断是否超过allocation size limit,超过了直接return NULL
  • 进循环,先从pool里切出来最近的一个block,如果成功则data_ptr != NULL,直接return data_ptr
  • 分配失败,把pool里所有block合并回收
  • 再次分配一个最近的block
  • 再次失败说明pool里没有合适的block用来分配了,那么acquire_memory()再从OS申请一块空间

首次分配pool为空,前面的步骤都失败的,最终调用了acquire_memory()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
// Starting address
static Block *start = NULL;

void *my_malloc(size_t size)
{
size_t apply_mem_size = round_up(size);
if (size == 0 || apply_mem_size > kMaxAllocationSize)
return NULL;
while (1)
{
void *data_ptr = allocate_nearest_block_for_size(start, apply_mem_size);
if (data_ptr != NULL)
return data_ptr;

// no block fit apply_mem_size, trigger recycle.
recycle_all_block(start);

data_ptr = allocate_nearest_block_for_size(start, apply_mem_size);
if (data_ptr != NULL)
return data_ptr;
// still no available block, acquire another memory
Block *new_blk = acquire_memory();
if (new_blk == NULL)
return NULL;
new_blk->nxt = start;
new_blk->nxt_mmap = start;
start = new_blk;
}
return NULL;
}

Split blocks

切分Block, 一个Block经过mmap后就是一个很大的Block,占有整个mmap的内存,之后根据用户输入my_malloc()的size参数从这个block的data区切出来前面的size部分,并且原来的Block->nxt指向新切出来的Block

1
2
3
4
Before:
|Block-------------|data_ptr-----------------------------------------------------------
After:
|Block-------------|data_ptr-------------data_ptr+size|Block--------|data_ptr----------
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

Block *split_a_block_for_size(Block *block, size_t size)
{
/**
* split one block to two
* one's data length is size
* the rest field is another block
*/
Block *new_block = (Block *)((size_t)block->data_ptr + size);
size_t rest_size = block->data_length - size;
init_block(new_block, rest_size);
block->data_length = size;
new_block->nxt = block->nxt;
block->nxt = new_block;
return new_block;
}

void *allocate_nearest_block_for_size(Block *head, size_t siz)
{
for (Block *t_block = head; t_block != NULL; t_block = t_block->nxt)
{
if (t_block->occupied != 0 ||
t_block->data_length < siz)
continue;
if (t_block->data_length - siz > 3 * kDataAlignment)
{
// if target size is 256, but tBlock size is 512, then give 256 and free
// the rest 256 split and free rest memory
split_a_block_for_size(t_block, siz);
}
t_block->occupied = 1;
memset(t_block->data_ptr, 0, t_block->data_length);
return t_block->data_ptr;
}
return NULL;
}

Merge freed blocks

遍历链表,把 occupied为0的,相邻的Block合并成一个
即链表删除Block,data_length直接相加,因为相邻的block是连续内存

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
void recycle_all_block(Block *head)
{
for (Block *blk = head; blk != NULL; blk = blk->nxt)
{
if (blk->occupied == 0)
{
for (Block *tb = blk->nxt; tb != NULL; tb = blk->nxt)
{
if (tb->occupied)
{
break;
}
// tb is not occupied, delete tb and merge into blk
blk->nxt = tb->nxt;
blk->data_length += get_block_size(tb);
}
}
}
}

Free

如果指针不在mmap的内存内,则越界。
free(data_ptr),直接把data_ptr所属的block->occupied标记0

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/// Check if pointer is in our mmaped memory
static bool in_mmaped_memory(Block *block_head, void *ptr)
{
if (block_head == NULL)
{
// if we haven't mmaped anything then it is not in our memory
return false;
}
for (Block *blk = block_head; blk != NULL; blk = blk->nxt_mmap)
{
size_t block_start = (size_t)blk;
size_t block_end = (size_t)blk + kMemorySize;
size_t ptr_ = (size_t)ptr;
if (ptr_ < block_end && ptr_ >= block_start)
{
return true;
}
}
return false;
}

void my_free(void *ptr)
{
if (ptr == NULL)
return;
Block *blk = get_block_ptr(ptr);
if (!in_mmaped_memory(start, blk) || !blk->occupied)
{
errno = EINVAL;
fprintf(stderr, "my_free: %s\n", strerror(errno));
exit(1);
}
blk->occupied = 0;
}

Conclusion

kDataAlignment指 按照多少byte对齐,直接影响内存分配的速度。在我的电脑上是256最快。

如果多线程使用需要加锁