Capstone反汇编引擎数据类型及API分析及示例(三)

Capstone反汇编引擎数据类型及API分析及示例(一)
Capstone反汇编引擎数据类型及API分析及示例(二)

API分析

cs_open

cs_err CAPSTONE_API cs_open(cs_arch arch, cs_mode mode, csh *handle);

初始化cs句柄

参数
arch: 架构类型 (CSARCH)
mode: 硬件模式. CSMODE
在cs_mode数据类型中可查
handle: 指向句柄, 返回时更新
return: 创建成功返回CS_ERR_OK,否则返回cs_err枚举中对应的错误信息

实现代码

cs_err CAPSTONE_API cs_open(cs_arch arch, cs_mode mode, csh *handle)
{
    cs_err err;
    struct cs_struct *ud;
    if (!cs_mem_malloc || !cs_mem_calloc || !cs_mem_realloc || !cs_mem_free || !cs_vsnprintf)
        // Error: 使用cs_open()前, 必须使用cs_option(CS_OPT_MEM)进行动态内存管理的初始化
        return CS_ERR_MEMSETUP;

    if (arch < CS_ARCH_MAX && cs_arch_init[arch]) {
        // 验证架构是否使用,方式:架构在枚举中且可初始化
        if (mode & cs_arch_disallowed_mode_mask[arch]) {
            *handle = 0;
            return CS_ERR_MODE;
        }

        ud = cs_mem_calloc(1, sizeof(*ud));
        if (!ud) {
            // 内存不足
            return CS_ERR_MEM;
        }

        ud->errnum = CS_ERR_OK;
        ud->arch = arch;
        ud->mode = mode;
        // 默认情况指令不打开detail模式
        ud->detail = CS_OPT_OFF;

        // 默认skipdata设置
        ud->skipdata_setup.mnemonic = SKIPDATA_MNEM;

        err = cs_arch_init[ud->arch](ud);
        if (err) {
            cs_mem_free(ud);
            *handle = 0;
            return err;
        }

        *handle = (uintptr_t)ud;

        return CS_ERR_OK;
    } else {
        *handle = 0;
        return CS_ERR_ARCH;
    }
}

其中,cs_struct结构体包含更多细节设定,如下

struct cs_struct {
    cs_arch arch;
    cs_mode mode;
    Printer_t printer;  // 打印asm
    void *printer_info; // 打印信息
    Disasm_t disasm;    // 反编译
    void *getinsn_info; // 打印辅助信息
    GetName_t reg_name;
    GetName_t insn_name;
    GetName_t group_name;
    GetID_t insn_id;
    PostPrinter_t post_printer;
    cs_err errnum;
    ARM_ITStatus ITBlock;   // ARM特殊选项
    cs_opt_value detail, imm_unsigned;
    int syntax; //ARM, Mips & PPC等架构的基本asm语法打印
    bool doing_mem; // 在InstPrinter代码中处理内存操作数
    unsigned short *insn_cache; //为mapping.c建立缓存索引
    GetRegisterName_t get_regname;
    bool skipdata;  // 如果反编译时要跳过数据,该项设置为True
    uint8_t skipdata_size;  //要跳过bytes的数量
    cs_opt_skipdata skipdata_setup; // 自定义skipdata设置
    const uint8_t *regsize_map; //映射register大小 (目前仅支持x86)
    GetRegisterAccess_t reg_access;
    struct insn_mnem *mnem_list;    // 自定义指令助记符的链接list
};

示例(创建一个x86_64类型的cs句柄):
cs_open(CS_ARCH_X86, CS_MODE_64, &handle)

cs_close

cs_err CAPSTONE_API cs_close(csh *handle);

释放句柄
参数
handle: 指向一个cs_open()打开的句柄
return: 释放成功返回CS_ERR_OK,否则返回cs_err枚举的错误信息

实现代码,可以看出释放句柄实质为将句柄值设置为0

cs_err CAPSTONE_API cs_close(csh *handle)
{
    struct cs_struct *ud;
    struct insn_mnem *next, *tmp;

    if (*handle == 0)
        // 句柄不可用
        return CS_ERR_CSH;

    ud = (struct cs_struct *)(*handle);

    if (ud->printer_info)
        cs_mem_free(ud->printer_info);

    // 释放自定义助记符的链接list
    tmp = ud->mnem_list;
    while(tmp) {
        next = tmp->next;
        cs_mem_free(tmp);
        tmp = next;
    }

    cs_mem_free(ud->insn_cache);

    memset(ud, 0, sizeof(*ud));
    cs_mem_free(ud);

    // handle值设置为0,保证这个句柄在cs_close()释放后不可使用
    *handle = 0;

    return CS_ERR_OK;
}

示例:
cs_close(&handle);

cs_option

cs_err CAPSTONE_API cs_option(csh handle, cs_opt_type type, size_t value);

反编译引擎的运行时选项

handle: cs_open()打开的句柄
type: 设置选项的类型
value: 与type对应的选项值
return: 设置成功返回CS_ERR_OK,否则返回cs_err枚举的错误信息

注意: 在CS_OPT_MEM的情况下,handle可以是任何值,因此cs_option(handle, CS_OPT_MEM, value)必须在cs_open()之前被调用

实现代码

cs_err CAPSTONE_API cs_option(csh ud, cs_opt_type type, size_t value)
{
    struct cs_struct *handle;
    cs_opt_mnem *opt;

    // 支持在所有API前支持 (even cs_open())
    if (type == CS_OPT_MEM) {
        cs_opt_mem *mem = (cs_opt_mem *)value;

        cs_mem_malloc = mem->malloc;
        cs_mem_calloc = mem->calloc;
        cs_mem_realloc = mem->realloc;
        cs_mem_free = mem->free;
        cs_vsnprintf = mem->vsnprintf;

        return CS_ERR_OK;
    }

    handle = (struct cs_struct *)(uintptr_t)ud;
    if (!handle)
        return CS_ERR_CSH;

    switch(type) {
        default:
            break;

        case CS_OPT_UNSIGNED:
            handle->imm_unsigned = (cs_opt_value)value;
            return CS_ERR_OK;

        case CS_OPT_DETAIL:
            handle->detail = (cs_opt_value)value;
            return CS_ERR_OK;

        case CS_OPT_SKIPDATA:
            handle->skipdata = (value == CS_OPT_ON);
            if (handle->skipdata) {
                if (handle->skipdata_size == 0) {
                    handle->skipdata_size = skipdata_size(handle);
                }
            }
            return CS_ERR_OK;

        case CS_OPT_SKIPDATA_SETUP:
            if (value)
                handle->skipdata_setup = *((cs_opt_skipdata *)value);
            return CS_ERR_OK;

        case CS_OPT_MNEMONIC:
            opt = (cs_opt_mnem *)value;
            if (opt->id) {
                if (opt->mnemonic) {
                    struct insn_mnem *tmp;

                    // 添加新指令或替换现有指令
                    // 查看当前insn释放在list中
                    tmp = handle->mnem_list;
                    while(tmp) {
                        if (tmp->insn.id == opt->id) {
                            // f找到指令,替换助记符
                            (void)strncpy(tmp->insn.mnemonic, opt->mnemonic, sizeof(tmp->insn.mnemonic) - 1);
                            tmp->insn.mnemonic[sizeof(tmp->insn.mnemonic) - 1] = '\0';
                            break;
                        }
                        tmp = tmp->next;
                    }

                    // 2. 如果没有就添加这条指令
                    if (!tmp) {
                        tmp = cs_mem_malloc(sizeof(*tmp));
                        tmp->insn.id = opt->id;
                        (void)strncpy(tmp->insn.mnemonic, opt->mnemonic, sizeof(tmp->insn.mnemonic) - 1);
                        tmp->insn.mnemonic[sizeof(tmp->insn.mnemonic) - 1] = '\0';
                        // 新指令放在list最前面
                        tmp->next = handle->mnem_list;
                        handle->mnem_list = tmp;
                    }
                    return CS_ERR_OK;
                } else {
                    struct insn_mnem *prev, *tmp;

                    tmp = handle->mnem_list;
                    prev = tmp;
                    while(tmp) {
                        if (tmp->insn.id == opt->id) {
                            // 删除指令
                            if (tmp == prev) {
                                handle->mnem_list = tmp->next;
                            } else {
                                prev->next = tmp->next;
                            }
                            cs_mem_free(tmp);
                            break;
                        }
                        prev = tmp;
                        tmp = tmp->next;
                    }
                }
            }
            return CS_ERR_OK;

        case CS_OPT_MODE:
            // 验证所请求的模式是否有效
            if (value & cs_arch_disallowed_mode_mask[handle->arch]) {
                return CS_ERR_OPTION;
            }
            break;
    }

    return cs_arch_option[handle->arch](handle, type, value);
}

示例,更改反汇编后显示的语法:

#include <iostream>
#include <stdio.h>

#include "capstone.h"
#include "platform.h"

using namespace std;

#define CODE "\x55\x48\x8b\x05\xb8\x13\x00\x00"

int main(void)
{
    csh handle;
    cs_insn* insn;
    size_t count;

    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
        return -1;
    }
    cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);  // 以AT&T语法显示
    count = cs_disasm(handle, (unsigned char*)CODE, sizeof(CODE) - 1, 0x1000, 0, &insn);
    if (count) {
        size_t j;

        for (j = 0; j < count; j++) {
            printf("0x%""Ix"":\t%s\t\t%s\n", insn[j].address, insn[j].mnemonic, insn[j].op_str);
        }

        cs_free(insn, count);
    }
    else
        printf("ERROR: Failed to disassemble given code!\n");

    cs_close(&handle);

    return 0;
}

输出

cs_errno

cs_err CAPSTONE_API cs_errno(csh handle);

API出错时返回错误消息
参数
handle: cs_open()打开的句柄
return: 无错误返回CS_ERR_OK,否则返回cs_err枚举的错误信息

实现很简单,判断到句柄不存在直接返回CS_ERR_CSH

示例:

#include <iostream>
#include <stdio.h>

#include "capstone.h"
#include "platform.h"

using namespace std;

#define CODE "\x55\x48\x8b\x05\xb8\x13\x00\x00"

int main(void)
{
    csh handle = 0;
    cs_insn* insn;
    size_t count;

    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
        return -1;
    }

    cs_close(&handle);
    std::cout << cs_errno(handle);    //关闭句柄后检查将报错
    return 0;
}

输出,错误码4即CS_ERR_CSH

cs_strerror

const char * CAPSTONE_API cs_strerror(cs_err code);

将上个API输出的错误码转换为详细错误信息

const char * CAPSTONE_API cs_strerror(cs_err code)
{
    switch(code) {
        default:
            return "Unknown error code";
        case CS_ERR_OK:
            return "OK (CS_ERR_OK)";
        case CS_ERR_MEM:
            return "Out of memory (CS_ERR_MEM)";
        case CS_ERR_ARCH:
            return "Invalid/unsupported architecture(CS_ERR_ARCH)";
        case CS_ERR_HANDLE:
            return "Invalid handle (CS_ERR_HANDLE)";
        case CS_ERR_CSH:
            return "Invalid csh (CS_ERR_CSH)";
        case CS_ERR_MODE:
            return "Invalid mode (CS_ERR_MODE)";
        case CS_ERR_OPTION:
            return "Invalid option (CS_ERR_OPTION)";
        case CS_ERR_DETAIL:
            return "Details are unavailable (CS_ERR_DETAIL)";
        case CS_ERR_MEMSETUP:
            return "Dynamic memory management uninitialized (CS_ERR_MEMSETUP)";
        case CS_ERR_VERSION:
            return "Different API version between core & binding (CS_ERR_VERSION)";
        case CS_ERR_DIET:
            return "Information irrelevant in diet engine (CS_ERR_DIET)";
        case CS_ERR_SKIPDATA:
            return "Information irrelevant for 'data' instruction in SKIPDATA mode (CS_ERR_SKIPDATA)";
        case CS_ERR_X86_ATT:
            return "AT&T syntax is unavailable (CS_ERR_X86_ATT)";
        case CS_ERR_X86_INTEL:
            return "INTEL syntax is unavailable (CS_ERR_X86_INTEL)";
        case CS_ERR_X86_MASM:
            return "MASM syntax is unavailable (CS_ERR_X86_MASM)";
    }
}

示例,结合cs_errno使用:

#include <iostream>
#include <stdio.h>

#include "capstone.h"
#include "platform.h"

using namespace std;

#define CODE "\x55\x48\x8b\x05\xb8\x13\x00\x00"

int main(void)
{
    csh handle = 0;
    cs_insn* insn;
    size_t count;

    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
        return -1;
    }

    cs_close(&handle);
    std::cout << cs_strerror(cs_errno(handle));  //直接输出报错信息
    return 0;
}

输出

cs_disasm

size_t CAPSTONE_API cs_disasm(csh handle,
        const uint8_t *code, size_t code_size,
        uint64_t address,
        size_t count,
        cs_insn **insn);

给定缓冲区、大小、地址和编号,反编译机器码
API动态地分配内存来包含分解的指令,生成的指令将放在*insn中

注意: 必须释放分配的内存,以避免内存泄漏。对于需要动态分配稀缺内存的系统(如OS内核或固件),API cs_disasm_iter()可能是比cs_disasm()更好的选择。原因是,使用cs_disasm()时,基于有限的可用内存,必须预先计算要分解多少条指令。

handle: cs_open()返回的句柄
code: 包含要反汇编的机器码的缓冲区。
code_size:上面代码缓冲区的大小。
address:给定原始代码缓冲区中的第一条指令的地址。
insn: 由这个API填写的指令数组。注意: insn将由这个函数分配,应该用cs_free () API释放
count: 需要分解的指令数量,或输入0分解所有指令
return:成功反汇编指令的数量,如果该函数未能反汇编给定的代码,则为0,失败时,调用cs_errno()获取错误代码。

源码分析

size_t CAPSTONE_API cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn)
{
    struct cs_struct *handle;
    MCInst mci;
    uint16_t insn_size;
    size_t c = 0, i;
    unsigned int f = 0; // 缓存中下一条指令的索引
    cs_insn *insn_cache;    // 缓存反汇编后的指令
    void *total = NULL;
    size_t total_size = 0;  //所有insn的输出缓冲区的总大小
    bool r;
    void *tmp;
    size_t skipdata_bytes;
    uint64_t offset_org; // 保存缓冲区的所有原始信息
    size_t size_org;
    const uint8_t *buffer_org;
    unsigned int cache_size = INSN_CACHE_SIZE;
    size_t next_offset;

    handle = (struct cs_struct *)(uintptr_t)ud;
    if (!handle) {
        // 修复方式:
        // handle->errnum = CS_ERR_HANDLE;
        return 0;
    }

    handle->errnum = CS_ERR_OK;

    // 重设ARM架构的IT block
    if (handle->arch == CS_ARCH_ARM)
        handle->ITBlock.size = 0;

#ifdef CAPSTONE_USE_SYS_DYN_MEM
    if (count > 0 && count <= INSN_CACHE_SIZE)
        cache_size = (unsigned int) count;
#endif

    // 保存SKIPDATA原始偏移量
    buffer_org = buffer;
    offset_org = offset;
    size_org = size;

    total_size = sizeof(cs_insn) * cache_size;
    total = cs_mem_malloc(total_size);
    if (total == NULL) {
        // 内存不足
        handle->errnum = CS_ERR_MEM;
        return 0;
    }

    insn_cache = total;

    while (size > 0) {
        MCInst_Init(&mci);
        mci.csh = handle;

        mci.address = offset;

        if (handle->detail) {
            //给detail指针分配内存
            insn_cache->detail = cs_mem_malloc(sizeof(cs_detail));
        } else {
            insn_cache->detail = NULL;
        }

        // 为non-detailed模式保存所有信息
        mci.flat_insn = insn_cache;
        mci.flat_insn->address = offset;
#ifdef CAPSTONE_DIET
        //mnemonic & op_str0填充
        mci.flat_insn->mnemonic[0] = '\0';
        mci.flat_insn->op_str[0] = '\0';
#endif

        r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
        if (r) {
            SStream ss;
            SStream_Init(&ss);

            mci.flat_insn->size = insn_size;

            //将内部指令操作码映射到公共insn ID
            handle->insn_id(handle, insn_cache, mci.Opcode);

            handle->printer(&mci, &ss, handle->printer_info);
            fill_insn(handle, insn_cache, ss.buffer, &mci, handle->post_printer, buffer);

            // 调整opcode (X86)
            if (handle->arch == CS_ARCH_X86)
                insn_cache->id += mci.popcode_adjust;

            next_offset = insn_size;
        } else  {
            // 遇到中断指令

            // 为detail指针释放内存
            if (handle->detail) {
                cs_mem_free(insn_cache->detail);
            }

            if (!handle->skipdata || handle->skipdata_size > size)
                break;

            if (handle->skipdata_setup.callback) {
                skipdata_bytes = handle->skipdata_setup.callback(buffer_org, size_org,
                        (size_t)(offset - offset_org), handle->skipdata_setup.user_data);
                if (skipdata_bytes > size)
                    break;

                if (!skipdata_bytes)
                    break;
            } else
                skipdata_bytes = handle->skipdata_size;

            insn_cache->id = 0;
            insn_cache->address = offset;
            insn_cache->size = (uint16_t)skipdata_bytes;
            memcpy(insn_cache->bytes, buffer, skipdata_bytes);
#ifdef CAPSTONE_DIET
            insn_cache->mnemonic[0] = '\0';
            insn_cache->op_str[0] = '\0';
#else
            strncpy(insn_cache->mnemonic, handle->skipdata_setup.mnemonic,
                    sizeof(insn_cache->mnemonic) - 1);
            skipdata_opstr(insn_cache->op_str, buffer, skipdata_bytes);
#endif
            insn_cache->detail = NULL;

            next_offset = skipdata_bytes;
        }

        // 一条新指令进入缓存
        f++;

        // 反汇编了一条指令
        c++;
        if (count > 0 && c == count)
            break;

        if (f == cache_size) {
            cache_size = cache_size * 8 / 5; 
            total_size += (sizeof(cs_insn) * cache_size);
            tmp = cs_mem_realloc(total, total_size);
            if (tmp == NULL) {  //内存不足
                if (handle->detail) {
                    insn_cache = (cs_insn *)total;
                    for (i = 0; i < c; i++, insn_cache++)
                        cs_mem_free(insn_cache->detail);
                }

                cs_mem_free(total);
                *insn = NULL;
                handle->errnum = CS_ERR_MEM;
                return 0;
            }

            total = tmp;
            //在最后一条指令之后继续填充缓存
            insn_cache = (cs_insn *)((char *)total + sizeof(cs_insn) * c);

            // 将f重置为0,从一开始就填入缓存
            f = 0;
        } else
            insn_cache++;

        buffer += next_offset;
        size -= next_offset;
        offset += next_offset;
    }

    if (!c) {
        //未反汇编任何指令
        cs_mem_free(total);
        total = NULL;
    } else if (f != cache_size) {
        // 没有完全使用最后一个缓存,缩小大小
        tmp = cs_mem_realloc(total, total_size - (cache_size - f) * sizeof(*insn_cache));
        if (tmp == NULL) {  // 内存不足
            // 释放所有detail指针
            if (handle->detail) {
                insn_cache = (cs_insn *)total;
                for (i = 0; i < c; i++, insn_cache++)
                    cs_mem_free(insn_cache->detail);
            }

            cs_mem_free(total);
            *insn = NULL;

            handle->errnum = CS_ERR_MEM;
            return 0;
        }

        total = tmp;
    }

    *insn = total;

    return c;
}

示例,x86_64:

#include <iostream>
#include <stdio.h>

#include "capstone.h"
#include "platform.h"

using namespace std;

#define CODE "\x55\x48\x8b\x05\xb8\x13\x00\x00\xe9\xea\xbe\xad\xde\xff\x25\x23\x01\x00\x00\xe8\xdf\xbe\xad\xde\x74\xff"

int main(void)
{
    csh handle = 0;
    cs_insn* insn;
    size_t count;

    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
        return -1;
    }

    count = cs_disasm(handle, (unsigned char*)CODE, sizeof(CODE) - 1, 0x1000, 0, &insn);   //所有指令,基址0x1000,放入insn
    if (count) {
        size_t j;

        for (j = 0; j < count; j++) {
            printf("0x%""Ix"":\t%s\t\t%s\n", insn[j].address, insn[j].mnemonic, insn[j].op_str);
        }

        cs_free(insn, count);
    }
    else
        printf("ERROR: Failed to disassemble given code!\n");

    cs_close(&handle);

    return 0;
}

输出

点击收藏 | 0 关注 | 1 打赏
登录 后跟帖