mallo函数流程详解

本文章推荐与源代码搭配食用。源码指路：https://elixir.bootlin.com/glibc/glibc-2.31/source/malloc/malloc.c
对了，文章开头介绍的1.xx版本的内容都没用，引入ptmalloc之后改动大得很。

简略情报

堆功能出现的最早版本大概是在glibc 1.09？wow~ 好早啊啊

版本更迭

glibc2.3.x版本开始，引入ptmalloc，支持多线程的内存分配器，通过减少锁的争用来提高性能和效率
glibc2.23之后，对malloc和free新加入多种安全检查
glibc2.27进一步增加size等多项检查

学习计划

从最早版本开始，找到应该看的版本
梳理各版本相关流程思路
统筹整理各版本新增防御策略

glibc 1.09

这个真的好简略喔，整个malloc函数只有短短的十数行，并且所有功能函数都在同一个文件里，路径/glibc/hurd/hurdmalloc.c，这里面就定义了三个函数，同时也是malloc的全部功能实现

数据结构

一共有两个结构体，分别是header和free_list,代码如下：

/*
 * Structure of memory block header.
 * When free, next points to next block on free list.
 * When allocated, fl points to free list.
 * Size of header is 4 bytes, so minimum usable block size is 8 bytes.
 */
typedef union header {
	union header *next;
	struct free_list *fl;
} *header_t;

#define MIN_SIZE	8	/* minimum block size */

typedef struct free_list {
	spin_lock_t lock;	/* spin lock for mutual exclusion */
	header_t head;		/* head of free list for this size */
#ifdef	DEBUG
	int in_use;		/* # mallocs - # frees */
#endif	DEBUG
} *free_list_t;

其中free_list是结构体数组，内部有锁lock和指向header的指针head，每个系统程序仅有一个malloc_free_list，在下面有其初始化。

/*
 * Free list with index i contains blocks of size 2^(i+3) including header.
 * Smallest block size is 8, with 4 bytes available to user.
 * Size argument to malloc is a signed integer for sanity checking,
 * so largest block size is 2^31.
 */
#define NBUCKETS	29

static struct free_list malloc_free_list[NBUCKETS];

more_memory

根据size申请至少一页内存并存储到相应的free_list数组元素的单链表中，这种方式无可避免地会浪费一些内存。

首先判断size是否有一页大小，若无则分割页，有则直接申请size大小，记作amount
随后调用vm_allocate函数为当前进程mach_task_self()申请一块amount大小的内存空间
随后将内存空间切割后存放到free_list链表中

static void
more_memory(size, fl)
	int size;
	register free_list_t fl;
{
	register int amount;
	register int n;
	vm_address_t where;
	register header_t h;
	kern_return_t r;

	if (size <= vm_page_size) {
		amount = vm_page_size;
		n = vm_page_size / size;
		/*
		 * We lose vm_page_size - n*size bytes here.  */
	} else {
		amount = size;
		n = 1;
	}
	MACH_CALL(vm_allocate(mach_task_self(), &where, (vm_size_t) amount, TRUE), r);
	h = (header_t) where;
	do {
	  h->next = fl->head;
	  fl->head = h;
	  h = (header_t) ((char *) h + size);
	} while (--n != 0);
}

malloc

malloc的功能实现就是先检查free_list中是否有合适大小的chunk，有则直接取用，无则调用 more_memory 申请。

free_list是一个元素中包含指针指向空闲chunk头的结构体列表。

void *
malloc(size)
	register size_t size;
{
	register int i, n;
	register free_list_t fl;
	register header_t h;

	if ((int) size < 0)		/* sanity check */
		return 0;
	size += sizeof(union header);
	/*
	 * Find smallest power-of-two block size
	 * big enough to hold requested size plus header.
	 */
	i = 0;
	n = MIN_SIZE;
	while (n < size) {
		i += 1;
		n <<= 1;
	}
	ASSERT(i < NBUCKETS);
	fl = &malloc_free_list[i];
	spin_lock(&fl->lock);
	h = fl->head;
	if (h == 0) {
		/*
		 * Free list is empty;
		 * allocate more blocks.
		 */
		more_memory(n, fl);
		h = fl->head;
		if (h == 0) {
			/*
			 * Allocation failed.
			 */
			spin_unlock(&fl->lock);
			return 0;
		}
	}
	/*
	 * Pop block from free list.
	 */
	fl->head = h->next;
#ifdef	DEBUG
	fl->in_use += 1;
#endif	DEBUG
	spin_unlock(&fl->lock);
	/*
	 * Store free list pointer in block header
	 * so we can figure out where it goes
	 * at free() time.
	 */
	h->fl = fl;
	/*
	 * Return pointer past the block header.
	 */
	return ((char *) h) + sizeof(union header);
}

free

相信看完malloc之后不看free也能知道其大致过程

根据参数判断处于free_list_t中元素位置，用以确定是否size超过最大值
将chunk链入free_list_t中指向的对应单链表

/* Declaration changed to standard one for GNU.  */
void
free(base)
	void *base;
{
	register header_t h;
	register free_list_t fl;
	register int i;

	if (base == 0)
		return;
	/*
	 * Find free list for block.
	 */
	h = (header_t) (base - sizeof(union header));
	fl = h->fl;
	i = fl - malloc_free_list;
	/*
	 * Sanity checks.
	 */
	if (i < 0 || i >= NBUCKETS) {
		ASSERT(0 <= i && i < NBUCKETS);
		return;
	}
	if (fl != &malloc_free_list[i]) {
		ASSERT(fl == &malloc_free_list[i]);
		return;
	}
	/*
	 * Push block on free list.
	 */
	spin_lock(&fl->lock);
	h->next = fl->head;
	fl->head = h;
#ifdef	DEBUG
	fl->in_use -= 1;
#endif	DEBUG
	spin_unlock(&fl->lock);
	return;
}

realloc

用来给old_chunk换一个大小的chunk,并将其中的内容拷贝进新chunk中。

这里面仍然没有合并和切割之类，只是申请一个new chunk将old chunk内容拷贝进去并free掉old chunk而已

/* Declaration changed to standard one for GNU.  */
void *
realloc(old_base, new_size)
        void *old_base;
        size_t new_size;
{
	register header_t h;
	register free_list_t fl;
	register int i;
	unsigned int old_size;
	char *new_base;

	if (old_base == 0)
	  return malloc (new_size);

	/*
	 * Find size of old block.
	 */
	h = (header_t) (old_base - sizeof(union header));
	fl = h->fl;
	i = fl - malloc_free_list;
	/*
	 * Sanity checks.
	 */
	if (i < 0 || i >= NBUCKETS) {
		ASSERT(0 <= i && i < NBUCKETS);
		return 0;
	}
	if (fl != &malloc_free_list[i]) {
		ASSERT(fl == &malloc_free_list[i]);
		return 0;
	}
	/*
	 * Free list with index i contains blocks of size 2^(i+3) including header.
	 */
	old_size = (1 << (i+3)) - sizeof(union header);
	/*
	 * Allocate new block, copy old bytes, and free old block.
	 */
	new_base = malloc(new_size);
	if (new_base != 0)
		bcopy(old_base, new_base, (int) (old_size < new_size ? old_size : new_size));
	free(old_base);
	return new_base;
}

总结

该版本是linux堆最初版本，可以看到初具雏形，但是安全意识不是很到位，不过最初版本都有一个很巧妙的安全检查是我没想到的。~~使用的header数据结构有冗余~~（奥，我错了，由于它使用了union定义结构体，所以实际上这个没有冗余），但是还是可以清晰的看到malloc_free_list数组，该数组的使用也一直延续到现在(虽然内容发生了翻天覆地的变化，但是都是在libc文件中定义了一个结构体数组用来存放堆块信息)。 union联合体作用， 内存分配: union 中的所有成员共用同一块内存，大小等于最大成员的大小。用途: 适合于只需要存储一个成员的情况（如节省内存）。 初始化: 只能初始化第一个成员，后续对其他成员的赋值会覆盖第一个成员的值。

下一次关键版本是glibc2.3.x版本开始使用ptmalloc2，但在此之前我想要先看看在紧随其后的版本中该函数的演变过程

glibc 1.90

该版本较上次版本并没有太多更新，但是在header上新增了一个检查以应对可能的溢出的情况。另外新增了许多宏，大部分是围绕新增的检查而写的。

* 更改以符合hurd libthreads/malloc.c:
* (more_memory): 使用assert_perror而不是MACH_CALL。
* “cthread_internals.h”: 删除了Include。
* (realloc): 使用LOG2_MIN_SIZE。
* (LOG2_MIN_SIZE): 新宏。
* (realloc): 不要费心分配一个新的块，如果
* 新的尺寸要求适合旧的，不会浪费任何空间。
* 只有释放旧块，如果我们成功地得到了一个新的。
* [MCHECK] (struct header): 新类型。
* (联合头): 仅定义if !MCHECK。
* (HEADER_SIZE，HEADER_NEXT，HEADER_FREE，HEADER_CHECK): 新宏。
* [MCHECK] (MIN_SIZE): 为这种情况添加正确的定义。
* (more_memory，malloc，free，realloc): 使用上面的宏，并添加适当的
* 在MCHECK情况下检查和frobs。

新增宏

该版本新增了许多宏定义，另外可以看到，似乎检查是试运行的，还定义了没有检查的情况

#ifdef MCHECK

typedef struct header {
  long check;
  union {
    struct header *next;
    struct free_list *fl;
  } u;
} *header_t;

#define HEADER_SIZE sizeof (struct header)
#define HEADER_NEXT(h) ((h)->u.next)
#define HEADER_FREE(h) ((h)->u.fl)
#define HEADER_CHECK(h) ((h)->check)
#define MIN_SIZE	16
#define LOG2_MIN_SIZE	4

#else /* ! MCHECK */

typedef union header {
	union header *next;
	struct free_list *fl;
} *header_t;

#define HEADER_SIZE sizeof (union header)
#define HEADER_NEXT(h) ((h)->next)
#define HEADER_FREE(h) ((h)->fl)
#define MIN_SIZE	8	/* minimum block size */
#define LOG2_MIN_SIZE	3

#endif /* MCHECK */

typedef struct free_list {
	spin_lock_t lock;	/* spin lock for mutual exclusion */
	header_t head;		/* head of free list for this size */
#ifdef	DEBUG
	int in_use;		/* # mallocs - # frees */
#endif	DEBUG
} *free_list_t;

添加了 HEADER_SIZE、HEADER_NEXT、HEADER_FREE、HEADER_CHECK四个新宏。以及LOG2_MIN_SIZE用于根据 free_list 数组序号计算size

感觉这一步属无奈之举，但是也为之后丰富的宏调用奠定了基础

more_memory 更新

使用assert_perror代替MACH_CALL进行报错判断

r = vm_allocate(mach_task_self(), &where, (vm_size_t) amount, TRUE);
	assert_perror (r);

realloc 优化

这里判断了new_size和old_size是否会申请到同一大小块，是的话直接返回old_base，算的上一个优化

if (new_size <= old_size
	    && new_size > (((old_size + HEADER_SIZE) >> 1) - HEADER_SIZE))
	  /* The new size still fits in the same block, and wouldn't fit in
	     the next smaller block!  */
	  return old_base;

其余部分变化不大，只是将必要的部分替换为宏来表示

glibc 1.91~93

无变化

glibc2.02

根据之前收集的信息，在该版本引入了ptmalloc2

hurdmalloc

先看hurd/hurdmalloc文件的变化，该文件仅更改了函数原型参数的写法，现在的风格，颇具一些现代风味

/*
 * HISTORY
 * $Log$
 * Revision 1.13  1996/12/20 01:32:01  drepper
 * Update from main archive 961219
 *
 * Revision 1.12  1996/11/15 19:44:13  thomas
 * Tue Nov 12 16:58:41 1996  Thomas Bushnell, n/BSG  <thomas@gnu.ai.mit.edu>
 *
 * 	* mach/msg-destroy.c (mach_msg_destroy_port,
 * 	mach_msg_destroy_memory): Use prototype	syntax.
 * 	* hurd/hurdmalloc.c (more_memory, malloc_fork_prepare,
 * 	malloc_fork_parent, malloc_fork_child): Likewise.
 */

static void
more_memory(int size, free_list_t fl)
{

malloc.h

没错，在该版本的时候终于引入了ptmalloc2，也就是我们现在常用的malloc架构，自此malloc可以多线程调用。

详细的先略过，我们先来看2.31版本malloc函数。

glibc2.31

2.31版本malloc函数实现部分在3500行左右，我们直接快进到这里开始阅读。

malloc

malloc函数大约2000行，下面就代码逻辑顺序进行介绍。函数原型为_int_malloc (mstate av, size_t bytes)，声明的内部变量为

INTERNAL_SIZE_T nb;               /* normalized request size */
  unsigned int idx;                 /* associated bin index */
  mbinptr bin;                      /* associated bin */

  mchunkptr victim;                 /* inspected/selected chunk */
  INTERNAL_SIZE_T size;             /* its size */
  int victim_index;                 /* its bin index */

  mchunkptr remainder;              /* remainder from a split */
  unsigned long remainder_size;     /* its size */

  unsigned int block;               /* bit map traverser */
  unsigned int bit;                 /* bit map traverser */
  unsigned int map;                 /* current word of binmap */

  mchunkptr fwd;                    /* misc temp for linking */
  mchunkptr bck;                    /* misc temp for linking */

#if USE_TCACHE
  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
#endif

代码逻辑

首先进行了内部变量的申请
然后处理bytes，使其大小标准化（对齐），if (!checked_request2size (bytes, &nb))
之后对堆进行初始化（第一次使用时），也就是调用sysmalloc函数从系统空间申请一块内存出来给该进程

代码解析_int_malloc

处理bytes

将bytes标准化

if (!checked_request2size (bytes, &nb))
    {
      __set_errno (ENOMEM);
      return NULL;
    }

初始化堆空间

第一次调用时，从系统空间申请内存

if (__glibc_unlikely (av == NULL))
    {
      void *p = sysmalloc (nb, av);
      if (p != NULL)
	alloc_perturb (p, bytes);
      return p;
    }

从fastbin取chunk

还没开始函数逻辑就出现了一个宏REMOVE_FB，功能是保证线程安全地从fastbin链表中移除某个节点，原理就是把victim->fd赋值给fb，主要是为了应付多线程环境。

#define REMOVE_FB(fb, victim, pp)			//\
  do							\
    {							\
      victim = pp;					\
      if (victim == NULL)				\
		break;						\
    }							\
  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
	 != victim);

ps:我注释了第一行的\，这样子可以对do…while看的清楚一点

我真傻，真的搁那儿看半天没想到是ai给的答案不准确，再去问了一遍才算是搞懂了，其实就是从fastbin链表中取出一个chunk块，为保证线程安全使用了原子操作，仅此而已。

fastbin

首先进行该判定。仅根据大小判断是否为fastbin，只要大小符合就进入fastbin申请流程。

如果大小不超过最大fast bin大小，且链表中有chunk就尝试从fastbins链表中取出chunk。之后就会尝试从fastbin中取出chunk块填满tcache（如果tcache未满的话）。在处理完tcache的问题后，会将指针挪向用户空间，然后使用alloc_perturb做扰动处理（使这里不会再泄露任何地址信息）

关于这个我有一个小问题，也就是说mallco函数分配的时候优先从fastbin链表中取chunk而不是tcache吗？不对啊，我记得不是这样的啊…这个问题的答案我将在之后单独解释，现在先继续讲解这部分代码。

if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
    	idx = fastbin_index (nb);
      mfastbinptr *fb = &fastbin (av, idx); // 这里的fb就代表着fastbin链表指向的位置
      mchunkptr pp;
      victim = *fb;

      if (victim != NULL)
	{
	  if (SINGLE_THREAD_P)
	    *fb = victim->fd; 	// 这边表示已经将chunk取出了
	  else
	    REMOVE_FB (fb, pp, victim);
	  if (__glibc_likely (victim != NULL))
	    {
	      size_t victim_idx = fastbin_index (chunksize (victim));
	      if (__builtin_expect (victim_idx != idx, 0))
		malloc_printerr ("malloc(): memory corruption (fast)");
	      check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
	      /* While we're here, if we see other chunks of the same size,
		 stash them in the tcache.  */
	      size_t tc_idx = csize2tidx (nb);
	      if (tcache && tc_idx < mp_.tcache_bins)
		{
		  mchunkptr tc_victim;

		  /* While bin not empty and tcache not full, copy chunks.  */
		  while (tcache->counts[tc_idx] < mp_.tcache_count
			 && (tc_victim = *fb) != NULL)
		    {
		      if (SINGLE_THREAD_P)
			*fb = tc_victim->fd;
		      else
			{
			  REMOVE_FB (fb, pp, tc_victim);
			  if (__glibc_unlikely (tc_victim == NULL))
			    break;
			}
		      tcache_put (tc_victim, tc_idx);
		    }
		}
#endif
	      void *p = chunk2mem (victim);
	      alloc_perturb (p, bytes);
	      return p;
	    }
	}
    }

fastbin_index 函数就是根据标准化之后的sz算fb_idx的。

/* offset 2 to use otherwise unindexable first 2 bins */
#define fastbin_index(sz) \
  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)

下面我对这部分代码精炼一下

if sz在fast范围内:
	idx, fb, pp, victim 初始化
	if victim != NULL(fastbins中有chunk):
		从fb单链表表头取出一个chunk(使用victim取)
		if victim!=NULL(成功取出):
			victim_idx 初始化
			if victim_idx != idx(fastbins中chunk被改了):
				报错“malloc(): memory corruption (fast)”
			# 若是debug版本，则check_remalloced_chunk会对分配的块进行再检查(可忽略)
			# 若tcache被使用，
			tc_idx 初始化
			if tcache结构体存在 && tc_idx在tcache_bins范围内:
				tc_victim 初始化
				while tc_count 没装满 && fb 没用完:
					从fb单链表表头取出一个chunk(使用tc_victim取)
					将取出的chunk放到tcache链表表头(使用tcache_put函数)
             # tcache部分结束
			将取出的chunk转化为mem
			对mem进行扰动处理，使其无法泄露地址
			返回mem

smallbin

首先判断nb是否在smallbin范围。不在的话会合并所有的fastbin

若在，则先计算idx并找到对应 fd 指向的 chunk 记为 bin。随后判断是否为空，空则立即结束。不为空则取 victim 的 bck，也就是表尾的上一个块。victim是我们要取的块，也是最后一个块。在取块的时候会有一个安全检查，检查bck->fd 是否等于victim。之后赋值inuse位和main_arena位。在将chunk从small bins双链表中取出来的时候，会先赋值bin->bk再赋值bck->fd。随后就是向tcache倾倒chunk，值得一提的是这里没有之前的安全检查，其余的不再赘述。

想起来之前学的的unlink技巧。诶对了，既然tcache这里没有安全检查，那么在没tcache的版本，unlink好像更简单了点？

if (in_smallbin_range (nb))
{
    idx = smallbin_index (nb);
    bin = bin_at (av, idx);

    if ((victim = last (bin)) != bin)
    {
        bck = victim->bk;
        if (__glibc_unlikely (bck->fd != victim))
            malloc_printerr ("malloc(): smallbin double linked list corrupted");
        set_inuse_bit_at_offset (victim, nb);
        bin->bk = bck;
        bck->fd = bin;

        if (av != &main_arena)
            set_non_main_arena (victim);
        check_malloced_chunk (av, victim, nb);
        #if USE_TCACHE
        /* While we're here, if we see other chunks of the same size,
	     stash them in the tcache.  */
        size_t tc_idx = csize2tidx (nb);
        if (tcache && tc_idx < mp_.tcache_bins)
        {
            mchunkptr tc_victim;

            /* While bin not empty and tcache not full, copy chunks over.  */
            while (tcache->counts[tc_idx] < mp_.tcache_count
                   && (tc_victim = last (bin)) != bin)
            {
                if (tc_victim != 0)
                {
                    bck = tc_victim->bk;
                    set_inuse_bit_at_offset (tc_victim, nb);
                    if (av != &main_arena)
                        set_non_main_arena (tc_victim);
                    bin->bk = bck;
                    bck->fd = bin;

                    tcache_put (tc_victim, tc_idx);
                }
            }
        }
        #endif
        void *p = chunk2mem (victim);
        alloc_perturb (p, bytes);
        return p;
    }
}

宏in_smallbin_range通过sz与largebin最小大小的表确定申请的chunk大小是否属于smallbin

#define NBINS             128
#define NSMALLBINS         64
#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)

#define in_smallbin_range(sz)  \
  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)
-----------------------------------------------------------------------------------
顺便将前面的内容也解释了：
- NBINS 总计128个bins
- NSMALLBINS smallbins共64个
- SMALLBIN_WIDTH 每个small bin的宽度
- SMALLBIN_CORRECTION 修正标志，判断内存是否严格大于2*SIZE_SZ
- MIN_LARGE_SIZE largebins最小大小

宏bin_at的作用就是根据 index 取 smallbins 中存储的 fd 。注释，列表中 *2 的原因是smallbins列表连续存储了 fd 和 bk 。-1的原因是bins的排序是从1开始的。

/* addressing -- note that bin_at(0) does not exist */
#define bin_at(m, i) \
  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))			      \
             - offsetof (struct malloc_chunk, fd))

宏last的作用就是方便地表示bk指针。

#define last(b)      ((b)->bk)

下面我对这部分代码精炼一下

if sz不大于largebin最小大小:
    idx, bin初始化
    if (victim=last(bin))!=NULL(smallbins双循环链表不为空):
    	bck初始化(bck是双循环链表末尾前一个)
        if bck->fd != victim(校验bck->fd是否被修改，可有效防止任意块被放进双链表):
            报错“malloc(): smallbin double linked list corrupted”
        置位inuse_bit
        将victim从smallbins双链表表尾取出(bin->bk = bck, bck->fd = bin)
        if av != &main_arena(不是主线程):
            置位no_main_arena
        # 若是debug版本，则check_malloced_chunk会对分配的块进行再检查(可忽略)
        # 若tcache被使用，
        tc_idx初始化
        if tcache结构体存在 && tc_idx在tcache_bins范围内:
            tc_victim 初始化
            while tc_count 没装满 && last(bin) 没用完:
                if tc_victim != 0:
                    将tc_victim从smallbins双链表表尾取出(bin->bk = bck, bck->fd = bin)
                    置位inuse_bit和main_arena
                    将取出的chunk放到tcache链表表头(使用tcache_put函数)
        # tcache部分结束
        将取出的chunk转化为mem
        对mem进行扰动处理，使其无法泄露地址
        返回mem

有个疑问，这里不用处理多线程环境下的影响了吗？

large bin 准备

如果上面的nb大于min largebin大小，则进入该部分对fastbin进行合并处理。

首先计算idx，然后判断是否存在fastchunks，若存在则调用malloc_consolidata对fastbins进行合并。

else
    {
      idx = largebin_index (nb);
      if (atomic_load_relaxed (&av->have_fastchunks))
        malloc_consolidate (av);
    }

malloc_consolidata代码在4440行，咱先不看了，继续往下看。

tcache准备

嗯，做了一些看不太明白的操作，初始化了tc_idx、tcache_nb、return_chached、tcache_unsorted_count，其中若nb超过tcache范围，则tcache_nb将被赋值为0，否则复制nb的值。但是，我确实看不太明白这里什么意思。

#if USE_TCACHE
  INTERNAL_SIZE_T tcache_nb = 0;
  size_t tc_idx = csize2tidx (nb);
  if (tcache && tc_idx < mp_.tcache_bins)
    tcache_nb = nb;
  int return_cached = 0;

  tcache_unsorted_count = 0;
#endif

之后看明白了再回来解释吧。

大循环for(;;)

这处笔记仅作为一个标志，标志大循环开始的位置。

遍历unsortedbins

遍历unsorted链表，取出了victim和bck，然后又做了一系列的安全检查。具体为

检查size,next的size,next的prev_size,检查双链表（bck->fd,victim->fd）,next的prev_inuse

while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
        {
          bck = victim->bk;
          size = chunksize (victim);
          mchunkptr next = chunk_at_offset (victim, size);

          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
              || __glibc_unlikely (size > av->system_mem))
            malloc_printerr ("malloc(): invalid size (unsorted)");
          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
            malloc_printerr ("malloc(): invalid next size (unsorted)");
          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
          if (__glibc_unlikely (bck->fd != victim)
              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
            malloc_printerr ("malloc(): unsorted double linked list corrupted");
          if (__glibc_unlikely (prev_inuse (next)))
            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");

last_remainder分割

例外情况，仅当nb属于smallbin，且unsortedbins链表中仅存在一块chunk（victim->bk等于unsorted_chunks(av)）,且这块chunk是last_remainder，且剩下的大小足够分割时进入该逻辑。该逻辑是唯一不满足最优内存分配的策略

讲真的，我有一件事很奇怪。为什么要使用这么苛刻的方式来对last_remainder进行处理？这一个步骤到底是为了方便哪里？

很简单，由于最后一个块最后会被放入small bin或者large bin，随后再找到一个大于请求大小的块分割，分割后的块将会被放入unsorted 中并被打上last reaminder标志。所以我们对于unsorted中最后一个last remainder采取直接分割的策略，省略了中间的步骤，可以说这是一个很棒的优化策略。并且还有一点，由于unsorted bin是FIFO策略，该策略下采取last remainder处于最后一个unsorted chunk时进行分割是一个很棒的优化策略。我说不清，不过确实很棒。（我可能还没有窥见他真正的速度）

为什么这里要置位prev_inuse啊？victim不是被分割出来的吗？如果是被分割出来的怎么保证prev_chunk一定是被用了的，我有个推测，这里可能是防止被合并的？那也不应该啊。我不明白，这太奇怪了。
	关于这一点，我有一个推测。由于last_remainder是分割得来的，所以物理相邻的前一个块一定是被使用的，所以last_remainder再次被分割的时候，前一个块一定是被使用的状态，所以我们不需要确认而可以直接设置prev_inuse。如果是这样的话，我可以考虑一下前一个块是否有在再次分割last_remainder之前free的可能。诶，不对啊，如果unsortedbin中有和前面的块未使用，应该直接就合并了啊。不和并的情况无非就是fastbin和tcachebin，但是这两种情况下prev_inuse都会被置位。所以，last_remainder前一个块一定是inuse状态，所以不需要犹豫。

下面展示该部分代码，

if (in_smallbin_range (nb) &&
    bck == unsorted_chunks (av) &&
    victim == av->last_remainder &&
    (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
    {
        /* split and reattach remainder */
        remainder_size = size - nb;
        remainder = chunk_at_offset (victim, nb);
        unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
        av->last_remainder = remainder;
        remainder->bk = remainder->fd = unsorted_chunks (av);
        if (!in_smallbin_range (remainder_size))
        {
            remainder->fd_nextsize = NULL;
            remainder->bk_nextsize = NULL;
        }

        set_head (victim, nb | PREV_INUSE |
            (av != &main_arena ? NON_MAIN_ARENA : 0));
        set_head (remainder, remainder_size | PREV_INUSE);
        set_foot (remainder, remainder_size);

        check_malloced_chunk (av, victim, nb);
        void *p = chunk2mem (victim);
        alloc_perturb (p, bytes);
        return p;
    }

有问题就会有答案

突然有个问题，small bin是在什么时候进行分割的呢？突然想到之前有个提示说需要执行两次（那个for(;;)大循环），现在去看看

	The outer loop here is needed because we might not realize until
     near the end of malloc that we should have consolidated, so must
     do so and retry. This happens at most once, and only when we would
     otherwise need to expand memory to service a "small" request.
这里需要外层循环，因为我们可能直到 malloc 快结束时才意识到应该进行合并，所以必须这样做并重试。这种情况最多发生一次，并且仅在我们原本需要扩展内存来服务一个“小”请求时才会发生。

看不懂思密达，但是感觉答案不在这里。

我将在这里介绍small bin分配策略。首先，如果请求大小是small bin，则先检查是否有合适大小的块，有则直接分配。若无则先检查unsorted bin，若其中有最后一个块且该块是last remainder，则直接对其分割，否则就触发unsorted bin倾倒，同时检查是否有大小正好合适的块，将chunk放到他们应该去的地方去。倾倒结束后，将idx自增，随后根据chunk map检查是否有可分割chunk。需要注意的是，在tcache策略下，遇到适合大小chunk会优先放入tcache。并置位tcache_cache，随后从tcache中取出chunk。另检查chunk map时，tcahce不会被计算在内。

remove_unsorted

在last remainder这个小插曲之后是unsorted正常处理流程。第一步是从double list中取出目标chunk，随后对比大小，正合适就直接分配。否则将其放入对应bin中。

如果大小合适就直接分配，在使用tcache的情况下先放入tcache bin。

/* remove from unsorted list */
          if (__glibc_unlikely (bck->fd != victim))
            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
          unsorted_chunks (av)->bk = bck;
          bck->fd = unsorted_chunks (av);

          /* Take now instead of binning if exact fit */

          if (size == nb)
            {
              set_inuse_bit_at_offset (victim, size);
              if (av != &main_arena)
		set_non_main_arena (victim);
#if USE_TCACHE
	      /* Fill cache first, return to user only if cache fills.
		 We may return one of these chunks later.  */
	      if (tcache_nb
		  && tcache->counts[tc_idx] < mp_.tcache_count)
		{
		  tcache_put (victim, tc_idx);
		  return_cached = 1;
		  continue;
		}
	      else
		{
#endif
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
#if USE_TCACHE
		}
#endif
            }

place chunk in bin

对于大小不合适的chunk，将其置于对应的bin中（此过程不涉及fast bin，但是始终脱不开tcache bin）。需要注意的是，如果有合适大小的chunk被放入tcache的话，那么在结束之后将从tcache中get一个chunk。

关于这里的逻辑，if-else语句块内会优先处理好fwd和bck块间的关系，然后按统一的流程结束。

		/* place chunk in bin */

          if (in_smallbin_range (size))
            {
              victim_index = smallbin_index (size);
              bck = bin_at (av, victim_index);
              fwd = bck->fd;
            }
          else
            {
              victim_index = largebin_index (size);
              bck = bin_at (av, victim_index);
              fwd = bck->fd;

              /* maintain large bins in sorted order */
              if (fwd != bck)
                {
                  /* Or with inuse bit to speed comparisons */
                  size |= PREV_INUSE;
                  /* if smaller than smallest, bypass loop below */
                  assert (chunk_main_arena (bck->bk));
                  if ((unsigned long) (size)
		      < (unsigned long) chunksize_nomask (bck->bk))
                    {
                      fwd = bck;
                      bck = bck->bk;

                      victim->fd_nextsize = fwd->fd;
                      victim->bk_nextsize = fwd->fd->bk_nextsize;
                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
                    }
                  else
                    {
                      assert (chunk_main_arena (fwd));
                      while ((unsigned long) size < chunksize_nomask (fwd))
                        {
                          fwd = fwd->fd_nextsize;
			  assert (chunk_main_arena (fwd));
                        }

                      if ((unsigned long) size
			  == (unsigned long) chunksize_nomask (fwd))
                        /* Always insert in the second position.  */
                        fwd = fwd->fd;
                      else
                        {
                          victim->fd_nextsize = fwd;
                          victim->bk_nextsize = fwd->bk_nextsize;
                          if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
                            malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
                          fwd->bk_nextsize = victim;
                          victim->bk_nextsize->fd_nextsize = victim;
                        }
                      bck = fwd->bk;
                      if (bck->fd != fwd)
                        malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
                    }
                }
              else
                victim->fd_nextsize = victim->bk_nextsize = victim;
            }

          mark_bin (av, victim_index);
          victim->bk = bck;
          victim->fd = fwd;
          fwd->bk = victim;
          bck->fd = victim;

#if USE_TCACHE
      /* If we've processed as many chunks as we're allowed while
	 filling the cache, return one of the cached ones.  */
      ++tcache_unsorted_count;
      if (return_cached
	  && mp_.tcache_unsorted_limit > 0
	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
	{
	  return tcache_get (tc_idx);
	}
#endif

#define MAX_ITERS       10000
          if (++iters >= MAX_ITERS)
            break;
} // (这个}对应对于unsorted bin的遍历结束)

#if USE_TCACHE
      /* If all the small chunks we found ended up cached, return one now.  */
      if (return_cached)
	{
	  return tcache_get (tc_idx);
	}
#endif

如果以上所有合适的块都被tcache截取了，那么在unsorted bin遍历结束后会从tcache中返回一个合适大小的块。

largebin

如果分配一个large bin，则使用skip list来加速分配过程。large bin本身是一个fd_bk的双链表，使用fd_nextsize和bk_nextsize将不同大小的第一个chunk组成一个双链表skip list。

首先是一个优化，如果large bin为空或者其中最大的size小于请求大小，则直接结束。否则按bk_nextsize链升序遍历到大于nb的最小chunk大小，然后为了加速分配，优先取非用skip list链上的chunk(也就是victim->fd)。随后将对其进行分割，若分割后的last remainder大于MIN_CHUNKSIZE，则分割使用，否则直接全数分配。last remainder将被置入unsorted bin中。处理后返回chunk。

      /*
         If a large request, scan through the chunks of current bin in
         sorted order to find smallest that fits.  Use the skip list for this.
       */

      if (!in_smallbin_range (nb))
        {
          bin = bin_at (av, idx);

          /* skip scan if empty or largest chunk is too small */
          if ((victim = first (bin)) != bin
	      && (unsigned long) chunksize_nomask (victim)
	        >= (unsigned long) (nb))
            {
              victim = victim->bk_nextsize;
              while (((unsigned long) (size = chunksize (victim)) <
                      (unsigned long) (nb)))
                victim = victim->bk_nextsize;

              /* Avoid removing the first entry for a size so that the skip
                 list does not have to be rerouted.  */
              if (victim != last (bin)
		  && chunksize_nomask (victim)
		    == chunksize_nomask (victim->fd))
                victim = victim->fd;

              remainder_size = size - nb;
              unlink_chunk (av, victim);

              /* Exhaust */
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
		    set_non_main_arena (victim);
                }
              /* Split */
              else
                {
                  remainder = chunk_at_offset (victim, nb);
                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
		  if (__glibc_unlikely (fwd->bk != bck))
		    malloc_printerr ("malloc(): corrupted unsorted chunks");
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

切割大块

当无法直接找到合适bin箱的情况下，利用binmap找到更大的块来切割。

首先根据idx确定是哪块binmap（每块32个，共计128个，序号block），随后遍历block找到更大的块，然后使用set bit升序遍历找到最小的设置位（升序遍历过程中bin与bit保持同步变化），最后取出last(bin)并进入分割流程。

      /*
         Search for a chunk by scanning bins, starting with next largest
         bin. This search is strictly by best-fit; i.e., the smallest
         (with ties going to approximately the least recently used) chunk
         that fits is selected.

         The bitmap avoids needing to check that most blocks are nonempty.
         The particular case of skipping all bins during warm-up phases
         when no chunks have been returned yet is faster than it might look.
       */

      ++idx;
      bin = bin_at (av, idx);
      block = idx2block (idx);
      map = av->binmap[block];
      bit = idx2bit (idx);

      for (;; )
        {
          /* Skip rest of block if there are no more set bits in this block.  */
          if (bit > map || bit == 0)
            {
              do
                {
                  if (++block >= BINMAPSIZE) /* out of bins */
                    goto use_top;
                }
              while ((map = av->binmap[block]) == 0);

              bin = bin_at (av, (block << BINMAPSHIFT));
              bit = 1;
            }

          /* Advance to bin with set bit. There must be one. */
          while ((bit & map) == 0)
            {
              bin = next_bin (bin);
              bit <<= 1;
              assert (bit != 0);
            }

          /* Inspect the bin. It is likely to be non-empty */
          victim = last (bin);

          /*  If a false alarm (empty bin), clear the bit. */
          if (victim == bin)
            {
              av->binmap[block] = map &= ~bit; /* Write through */
              bin = next_bin (bin);
              bit <<= 1;
            }

          else
            {
              size = chunksize (victim);

              /*  We know the first chunk in this bin is big enough to use. */
              assert ((unsigned long) (size) >= (unsigned long) (nb));

              remainder_size = size - nb;

              /* unlink */
              unlink_chunk (av, victim);

              /* Exhaust */
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
		    set_non_main_arena (victim);
                }

              /* Split */
              else
                {
                  remainder = chunk_at_offset (victim, nb);

                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
		  if (__glibc_unlikely (fwd->bk != bck))
		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;

                  /* advertise as last remainder */
                  if (in_smallbin_range (nb))
                    av->last_remainder = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

user top

这里就是我们熟知的top了，这部分的处理逻辑主要有三个

一是大小合适时分割（需要为top至少留有minsize大小），二是大小不合适且fastbin存在时对fastbin合并并开始第二次外层for大循环，三是以上要求皆不满足时直接调用sysmalloc分配内存。

use_top:
      /*
         If large enough, split off the chunk bordering the end of memory
         (held in av->top). Note that this is in accord with the best-fit
         search rule.  In effect, av->top is treated as larger (and thus
         less well fitting) than any other available chunk since it can
         be extended to be as large as necessary (up to system
         limitations).

         We require that av->top always exists (i.e., has size >=
         MINSIZE) after initialization, so if it would otherwise be
         exhausted by current request, it is replenished. (The main
         reason for ensuring it exists is that we may need MINSIZE space
         to put in fenceposts in sysmalloc.)
       */

      victim = av->top;
      size = chunksize (victim);

      if (__glibc_unlikely (size > av->system_mem))
        malloc_printerr ("malloc(): corrupted top size");

      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
        {
          remainder_size = size - nb;
          remainder = chunk_at_offset (victim, nb);
          av->top = remainder;
          set_head (victim, nb | PREV_INUSE |
                    (av != &main_arena ? NON_MAIN_ARENA : 0));
          set_head (remainder, remainder_size | PREV_INUSE);

          check_malloced_chunk (av, victim, nb);
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }

      /* When we are using atomic ops to free fast chunks we can get
         here for all block sizes.  */
      else if (atomic_load_relaxed (&av->have_fastchunks))
        {
          malloc_consolidate (av);
          /* restore original bin index */
          if (in_smallbin_range (nb))
            idx = smallbin_index (nb);
          else
            idx = largebin_index (nb);
        }

      /*
         Otherwise, relay to handle system-dependent cases
       */
      else
        {
          void *p = sysmalloc (nb, av);
          if (p != NULL)
            alloc_perturb (p, bytes);
          return p;
        }

简略情报

版本更迭

学习计划

glibc 1.09

数据结构

more_memory

malloc

free

realloc

总结

glibc 1.90

新增 宏

more_memory 更新

realloc 优化

glibc 1.91~93

glibc2.02

hurdmalloc

malloc.h

glibc2.31

malloc

代码逻辑

代码解析_int_malloc

处理bytes

初始化堆空间

从fastbin取chunk

fastbin

smallbin

large bin 准备

tcache准备

大循环for(;;)

遍历unsortedbins

last_remainder分割

有问题就会有答案

remove_unsorted

place chunk in bin

largebin

切割大块

user top

发送评论 编辑评论

新增宏

发送评论编辑评论