afl-gcc.c 的源码阅读。

总结

  • 先通过 find_as 函数找 afl-as 并设置好编译工具所在的路径;
  • 再通过 edit_params 做一些参数上的优化;
  • 最后调用 afl-cc 进行真正的编译。

edit_params

  • 核心作用:将 argv 拷贝到 u8 **cc_params,然后进行相应的处理。
/* Copy argv to cc_params, making the necessary edits. */
 
static void edit_params(u32 argc, char** argv) {
 
  u8 fortify_set = 0, asan_set = 0;
  u8 *name;
 
#if defined(__FreeBSD__) && defined(__x86_64__)
  u8 m32_set = 0;
#endif

cc_params 分配大小为 (argc + 128) * sizeof(u8*) 的内存,也就是说最大的参数是当前参数加上 128:

  cc_params = ck_alloc((argc + 128) * sizeof(u8*));

检查 argv[0] 中是否存在/,如果不存在则 name?= argv[0],如果存在则一直找到最后一个/,并将其后面的字符串赋值给 name。这个 name 应该就是 afl-gcc/afl-clang 系列了。

  name = strrchr(argv[0], '/');
  if (!name) name = argv[0]; else name++;

处理 nameafl-clang 的情况:

  • 设置 clang_mode=1 并设置环境变量 __AFL_CLANG_MODE 为 1;
  • 对比 nameafl-clang++
    • 如果相同,获取环境变量 AFL_CXX,如果存在的话赋值给 cc_params[0] 否则还是 clang++
    • 如果不同,获取环境变量 AFL_CC,和上面做类似的操作。
    • PS:怪不得上面用 strncmp,原来后面还能顺便处理 CXXCC 的情况,学到了。
  if (!strncmp(name, "afl-clang", 9)) {
 
    clang_mode = 1;
 
    setenv(CLANG_ENV_VAR, "1", 1);
 
    if (!strcmp(name, "afl-clang++")) {
      u8* alt_cxx = getenv("AFL_CXX");
      cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++";
    } else {
      u8* alt_cc = getenv("AFL_CC");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"clang";
    }
 
  } 

如果 name 不是 afl-clang 且当前平台是苹果系列:

  else {
 
    /* With GCJ and Eclipse installed, you can actually compile Java! The
       instrumentation will work (amazingly). Alas, unhandled exceptions do
       not call abort(), so afl-fuzz would need to be modified to equate
       non-zero exit codes with crash conditions when working with Java
       binaries. Meh. */
 
#ifdef __APPLE__
 
    if (!strcmp(name, "afl-g++")) cc_params[0] = getenv("AFL_CXX");
    else if (!strcmp(name, "afl-gcj")) cc_params[0] = getenv("AFL_GCJ");
    else cc_params[0] = getenv("AFL_CC");
 
    if (!cc_params[0]) {
 
      SAYF("\n" cLRD "[-] " cRST
           "On Apple systems, 'gcc' is usually just a wrapper for clang. Please use the\n"
           "    'afl-clang' utility instead of 'afl-gcc'. If you really have GCC installed,\n"
           "    set AFL_CC or AFL_CXX to specify the correct path to that compiler.\n");
 
      FATAL("AFL_CC or AFL_CXX required on MacOS X");
 
    }

如果 name 不是 afl-clang 且当前平台不是苹果系列:

  • 如果 nameafl-g++ 就尝试获取 AFL_CXX 环境变量并赋值;
  • 如果 nameafl-gcj 就尝试获取 AFL_GCJ 环境变量并赋值;
  • 如果 name 既不是 afl-g++ 也不是 afl-gcj,就尝试获取 AFL_CC 环境变量并赋值。
#else
 
    if (!strcmp(name, "afl-g++")) {
      u8* alt_cxx = getenv("AFL_CXX");
      cc_params[0] = alt_cxx ? alt_cxx : (u8*)"g++";
    } else if (!strcmp(name, "afl-gcj")) {
      u8* alt_cc = getenv("AFL_GCJ");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"gcj";
    } else {
      u8* alt_cc = getenv("AFL_CC");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"gcc";
    }
 
#endif /* __APPLE__ */
 
  }

接下来开始遍历参数,包括:

  • -B看起来是 be_quiet 参数?
    • -B 的下一个参数是 as_path

Tip

  • -Bprefix
    • This option specifies where to find the executables, libraries, include files, and data files of the compiler itself.
    • The compiler driver program runs one or more of the subprograms cppcc1as and ld. It tries prefix as a prefix for each program it tries to run, both with and without ‘machine/version/’ for the corresponding target machine and compiler version.
  • 总的来说,-B 参数1就是来找编译工具的信息的。
  • 如果是在 x86_64 的 FreeBSD 平台上,且设置了 -m32 参数的话就设置 m32_set 的值为 1;
  • 如果设置了 -fsanitize=address-fsanitize=memory 就将 asan_set 的值设置为 1;
  • 如果设置 FORTIFY_SOURCE 就将 fortify_set 的值设置为 1;

Tip

  • FORTIFY_SOURCE 主要进行缓冲区溢出问题的检查,检查的常见函数有memcpy, mempcpy, memmove, memset, strcpy, stpcpy, strncpy, strcat, strncat, sprintf, vsprintf, snprintf, gets 等;
  while (--argc) {
    u8* cur = *(++argv);
 
    if (!strncmp(cur, "-B", 2)) {
 
      if (!be_quiet) WARNF("-B is already set, overriding");
 
      if (!cur[2] && argc > 1) { argc--; argv++; }
      continue;
 
    }
 
    if (!strcmp(cur, "-integrated-as")) continue;
 
    if (!strcmp(cur, "-pipe")) continue;
 
#if defined(__FreeBSD__) && defined(__x86_64__)
    if (!strcmp(cur, "-m32")) m32_set = 1;
#endif
 
    if (!strcmp(cur, "-fsanitize=address") ||
        !strcmp(cur, "-fsanitize=memory")) asan_set = 1;
 
    if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1;
 
    cc_params[cc_par_cnt++] = cur;
 
  }
 
  cc_params[cc_par_cnt++] = "-B";
  cc_params[cc_par_cnt++] = as_path;

接下来开始插入新参数:

  • 如果在 clang_mode 下,加入 -no-integrated-as 参数;
  • 如果设置了 AFL_HARDEN 环境变量,加入 -fstack-protector-all 环境变量;
    • 此时,如果 fortify_set 为 0 就添加 -D_FORTIFY_SOURCE=2 参数;
  if (clang_mode)
    cc_params[cc_par_cnt++] = "-no-integrated-as";
 
  if (getenv("AFL_HARDEN")) {
 
    cc_params[cc_par_cnt++] = "-fstack-protector-all";
 
    if (!fortify_set)
      cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2";
 
  }
  • 如果在上文中设置了 asan_set 的值,就设置 AFL_USE_ASAN 环境变量的值为 1;
  • 如果没设置 asan_set 但是设置了 AFL_USE_ASAN 环境变量:
    • 此时不能同时设置 AFL_USE_MSANAFL_HARDEN
    • 添加 -U_FORTIFY_SOURCE-fsanitize=address 参数;
  • 如果没设置 asan_set 但是设置了 AFL_USE_MSAN 环境变量:
    • 此时不能同时设置 AFL_USE_ASANAFL_HARDEN
    • 添加 -U_FORTIFY_SOURCE-fsanitize=memory 参数;
  if (asan_set) {
 
    /* Pass this on to afl-as to adjust map density. */
 
    setenv("AFL_USE_ASAN", "1", 1);
 
  } else if (getenv("AFL_USE_ASAN")) {
 
    if (getenv("AFL_USE_MSAN"))
      FATAL("ASAN and MSAN are mutually exclusive");
 
    if (getenv("AFL_HARDEN"))
      FATAL("ASAN and AFL_HARDEN are mutually exclusive");
 
    cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
    cc_params[cc_par_cnt++] = "-fsanitize=address";
 
  } else if (getenv("AFL_USE_MSAN")) {
 
    if (getenv("AFL_USE_ASAN"))
      FATAL("ASAN and MSAN are mutually exclusive");
 
    if (getenv("AFL_HARDEN"))
      FATAL("MSAN and AFL_HARDEN are mutually exclusive");
 
    cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
    cc_params[cc_par_cnt++] = "-fsanitize=memory";
 
 
  }

如果不存在 AFL_DONT_OPTIMIZE 环境变量:

  • 在 x86_64 下的 FreeBSD 中:
    • 如果没有设置 clang_modem32_set,添加 -g 参数;
  • 不在 x86_64 下的 FreeBSD 中的话:
    • 直接添加 -g 参数。
  • 除此之外,还添加 -O3-funroll-loops-D__AFL_COMPILER=1-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1 参数。
  if (!getenv("AFL_DONT_OPTIMIZE")) {
 
#if defined(__FreeBSD__) && defined(__x86_64__)
 
    /* On 64-bit FreeBSD systems, clang -g -m32 is broken, but -m32 itself
       works OK. This has nothing to do with us, but let's avoid triggering
       that bug. */
 
    if (!clang_mode || !m32_set)
      cc_params[cc_par_cnt++] = "-g";
 
#else
 
      cc_params[cc_par_cnt++] = "-g";
 
#endif
 
    cc_params[cc_par_cnt++] = "-O3";
    cc_params[cc_par_cnt++] = "-funroll-loops";
 
    /* Two indicators that you're building for fuzzing; one of them is
       AFL-specific, the other is shared with libfuzzer. */
 
    cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1";
    cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1";
 
  }

如果设置了 AFL_NO_BUILTIN 环境变量,会进一步添加下面的参数进行优化:

  if (getenv("AFL_NO_BUILTIN")) {
 
    cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-memcmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strstr";
    cc_params[cc_par_cnt++] = "-fno-builtin-strcasestr";
 
  }
 
  cc_params[cc_par_cnt] = NULL;
 
}

find_as

核心功能:寻找 afl-as

  1. 首先检查环境变量 AFL_PATH ,如果存在直接赋值给 afl_path ,然后检查 afl_path/as 文件是否可以访问,如果可以,as_path=afl_path
  2. 如果不存在环境变量 AFL_PATH ,检查 argv[0](此时的 arg[0] 类似于 /<path>/afl-gcc)中是否存在 ”/” ,如果存在则取最后“/” 前面的字符串作为 dir,然后检查 dir/afl-as 是否可以访问,如果可以,将 as_path = dir 。
  3. 以上两种方式都失败,抛出异常。
/* Try to find our "fake" GNU assembler in AFL_PATH or at the location derived
   from argv[0]. If that fails, abort. */
 
static void find_as(u8* argv0) {

首先检查环境变量 AFL_PATH ,如果存在直接赋值给 afl_path ,然后检查 afl_path/as 文件是否可以访问,如果可以,as_path=afl_path

  u8 *afl_path = getenv("AFL_PATH");
  u8 *slash, *tmp;
 
  if (afl_path) {
 
    tmp = alloc_printf("%s/as", afl_path);
 
    if (!access(tmp, X_OK)) {
      as_path = afl_path;
      ck_free(tmp);
      return;
    }
 
    ck_free(tmp);
 
  }

如果不存在环境变量 AFL_PATH ,检查 argv[0] (如 /Users/v4ler1an/AFL/afl-gcc)中是否存在 ”/” ,如果存在则取最后“/” 前面的字符串作为 dir,然后检查 dir/afl-as 是否可以访问,如果可以,将 as_path = dir 。

  slash = strrchr(argv0, '/');
 
  if (slash) {
 
    u8 *dir;
 
    *slash = 0;
    dir = ck_strdup(argv0);
    *slash = '/';
 
    tmp = alloc_printf("%s/afl-as", dir);
 
    if (!access(tmp, X_OK)) {
      as_path = dir;
      ck_free(tmp);
      return;
    }
 
    ck_free(tmp);
    ck_free(dir);
 
  }
 
  if (!access(AFL_PATH "/as", X_OK)) {
    as_path = AFL_PATH;
    return;
  }
 
  FATAL("Unable to find AFL wrapper binary for 'as'. Please set AFL_PATH");
 
}

main

主要包含三个函数调用:

  • find_as(argv[0]) :查找使用的汇编器
  • edit_params(argc, argv):处理传入的编译参数,将确定好的参数放入 cc_params[] 数组
  • 调用 execvp(cc_params[0], (char**)cc_params) 执行 afl-gcc
/* Main entry point */
 
int main(int argc, char** argv) {
 
  if (isatty(2) && !getenv("AFL_QUIET")) {
 
    SAYF(cCYA "afl-cc " cBRI VERSION cRST " by <[email protected]>\n");
 
  } else be_quiet = 1;
 
  if (argc < 2) {
 
    SAYF("\n"
         "This is a helper application for afl-fuzz. It serves as a drop-in replacement\n"
         "for gcc or clang, letting you recompile third-party code with the required\n"
         "runtime instrumentation. A common use pattern would be one of the following:\n\n"
 
         "  CC=%s/afl-gcc ./configure\n"
         "  CXX=%s/afl-g++ ./configure\n\n"
 
         "You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.\n"
         "Setting AFL_HARDEN enables hardening optimizations in the compiled code.\n\n",
         BIN_PATH, BIN_PATH);
 
    exit(1);
 
  }
 
  find_as(argv[0]);
 
  edit_params(argc, argv);
 
  execvp(cc_params[0], (char**)cc_params);
 
  FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);
 
  return 0;
 
}

参考资料

Footnotes

  1. 3.16 Options for Directory Search