编译

我们想要分析一个cython可以从自己编译cython程序开始,通过cython生成的c和ida中的伪代码进行对比来分析实际情况。注意:在windows平台下和linux平台生成cython使用的c编译器不同所以生成出来的代码架构可能会有区别,下文中均以windows平台为样板。

创建pyx文件

pyx文件其实就是以py格式编写的编程内容。

def test(a, b):
    c = a + b
    print(c)

创建setup.py文件

导入cythonize模块

from distutils.core import setup
from Cython.Build import cythonize

setup(
    ext_modules = cythonize("test.pyx")
)
# test.pyx是创建的pyx文件名

命令行编译

使用cythonm模块编译

cython test.pyx

使用cythonize模块编译

cythonize -a -i test.pyx

使用setup.py编译

python setup.py build_ext --inplace

编译之后会生成一个c文件和.pyd文件,其中c文件是源码,pyd则是我们的目标分析文件了

分析

比较指令

我们编写一段判断函数:

def test(flag):
    enc = 19
    if flag == enc:
        pass
    if flag >= enc:
        pass
    if flag <= enc:
        pass
    if flag != enc:
        pass
    if flag > enc:
        pass
    if flag < enc:
        pass

查看生成的test.c文件中相对应的内容

由于代码较长此处省略大部分,通过观察我们可以发现if flag == enc:类的句式被翻译成了

__pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_enc); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 3, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = PyObject_RichCompare(__pyx_v_flag, __pyx_t_1, Py_EQ); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 3, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 3, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (__pyx_t_3) {
  }

通过函数名可以得知“==”这类比较使用的是PyObject_RichCompare函数,第一个参数就是前面需要比较的内容,第二个参数就是后面用于比较的内容,第三个参数就是指定比较类别。

可以观测到基本都是这种类型。下面就是比较符号与比较类型的表格:

符号 类型
== Py_EQ
>= Py_GE
<= Py_LE
!= Py_NE
> Py_GT
< Py_LT

观测IDA中的反编译情况如下:

从上面两个图可以看出在IDA中的PyObject_RichCompare是引用了外部函数所以显示了函数名,对应的参数与c中的代码相同,那么Py_EQ等比较类别就用了数字来替代。

根据顺序我们可以推测出下面的列表:

符号 类型 数字
== Py_EQ 2
>= Py_GE 5
<= Py_LE 1
!= Py_NE 3
> Py_GT 4
< Py_LT 0

变量类型

编写一段覆盖所有变量类型的代码:

def test(flag):
    tmp = 19
    tmp1 = 21.3
    tmp2 = 437593479587349875983475987349587324895
    tmp3 = "tmp1"
    tmp4 = True
    tmp5 = False
    tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
    tmp7 = []
    tmp8 = (1,3,99)
    tmp9 = {"key" : "key", "value" : "value"}
    print(tmp, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9)

对比翻译c后的内容查看:

通过定义可以发现在cython编译规则中会先把硬编码中的数字先解析了一遍,我们写的普通整数识别为long类型,小数则是识别成了double类型(无论长短),太长的整数识别为PyObject类型,字符串也是PyObject类型,布尔值是int类型,列表、元组、字典都是PyObject类型。

数字

c代码如下:

/* "test.pyx":2
 * def test(flag):
 *     tmp = 19             # <<<<<<<<<<<<<<
 *     tmp1 = 21.3
 *     tmp2 = 437593479587349875983475987349587324895
 */
  __pyx_v_tmp = 19;

在c中没有到达long long上限的整数基本都是直接赋值,我们看看IDA中的赋值长下面这个样

直接使用PyLong_FromLong函数赋值,参数就是整数

小数

c代码如下:

/* "test.pyx":3
 * def test(flag):
 *     tmp = 19
 *     tmp1 = 21.3             # <<<<<<<<<<<<<<
 *     tmp2 = 437593479587349875983475987349587324895
 *     tmp3 = "tmp1"
 */
  __pyx_v_tmp1 = 21.3;

在IDA中显示如下:

这里看上去比较奇怪,我们查看汇编就知道其实是ida没有识别出载入参数

xmm0是浮点数寄存器,一般浮点数都会放入xmm系列寄存器中计算,而在正常的程序参数列表中没有xmm寄存器,所以ida没有识别出来,我们查看qword_180006E28指向地址就可以看到我们输入的浮点数

长整数

c代码如下:

/* "test.pyx":4
 *     tmp = 19
 *     tmp1 = 21.3
 *     tmp2 = 437593479587349875983475987349587324895             # <<<<<<<<<<<<<<
 *     tmp3 = "tmp1"
 *     tmp4 = True
 */
  __Pyx_INCREF(__pyx_int_0x149357046d142e1e6e1948884dc976fdf);
  __pyx_v_tmp2 = __pyx_int_0x149357046d142e1e6e1948884dc976fdf;

长整数在IDA中内容如下:

这里与数字不同的是这里使用的是PyLong_FromString函数,这里也是表明长整数在编译之后储存的是的字符串,在使用的时候再将字符串转为数字。

值得一提的是如果是负数也是如此。

字符串

c代码如下:

/* "test.pyx":5
 *     tmp1 = 21.3
 *     tmp2 = 437593479587349875983475987349587324895
 *     tmp3 = "tmp1"             # <<<<<<<<<<<<<<
 *     tmp4 = True
 *     tmp5 = False
 */
  __Pyx_INCREF(__pyx_n_s_tmp1);
  __pyx_v_tmp3 = __pyx_n_s_tmp1;

字符串调用的其实是赋值好的全局变量字符串,在c文件中有直接赋值

在使用前还有一次初始化赋值

在IDA中的内容如下:

布尔值

c代码如下:

/* "test.pyx":6
 *     tmp2 = 437593479587349875983475987349587324895
 *     tmp3 = "tmp1"
 *     tmp4 = True             # <<<<<<<<<<<<<<
 *     tmp5 = False
 *     tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
 */
  __pyx_v_tmp4 = 1;

  /* "test.pyx":7
 *     tmp3 = "tmp1"
 *     tmp4 = True
 *     tmp5 = False             # <<<<<<<<<<<<<<
 *     tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
 *     tmp7 = []
 */
  __pyx_v_tmp5 = 0;

布尔值在IDA中比较简单

列表

c代码如下:

/* "test.pyx":8
 *     tmp4 = True
 *     tmp5 = False
 *     tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]             # <<<<<<<<<<<<<<
 *     tmp7 = []
 *     tmp8 = (1,3,99)
 */
  __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_tmp); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = PyList_New(11); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 8, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_INCREF(__pyx_int_45);
  __Pyx_GIVEREF(__pyx_int_45);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_int_45)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(__pyx_int_neg_67);
  __Pyx_GIVEREF(__pyx_int_neg_67);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 1, __pyx_int_neg_67)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(__pyx_float_21_4);
  __Pyx_GIVEREF(__pyx_float_21_4);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 2, __pyx_float_21_4)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(__pyx_float_23_7);
  __Pyx_GIVEREF(__pyx_float_23_7);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 3, __pyx_float_23_7)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(__pyx_int_large_neg_43759347958734_xxx_475987349587324895);
  __Pyx_GIVEREF(__pyx_int_large_neg_43759347958734_xxx_475987349587324895);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 4, __pyx_int_large_neg_43759347958734_xxx_475987349587324895)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(__pyx_int_0x119de994f9e3728a1652147);
  __Pyx_GIVEREF(__pyx_int_0x119de994f9e3728a1652147);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 5, __pyx_int_0x119de994f9e3728a1652147)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(__pyx_n_s_hello);
  __Pyx_GIVEREF(__pyx_n_s_hello);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 6, __pyx_n_s_hello)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(__pyx_n_s_guheng);
  __Pyx_GIVEREF(__pyx_n_s_guheng);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 7, __pyx_n_s_guheng)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(Py_True);
  __Pyx_GIVEREF(Py_True);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 8, Py_True)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_INCREF(Py_False);
  __Pyx_GIVEREF(Py_False);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 9, Py_False)) __PYX_ERR(0, 8, __pyx_L1_error);
  __Pyx_GIVEREF(__pyx_t_1);
  if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 10, __pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error);
  __pyx_t_1 = 0;
  __pyx_v_tmp6 = ((PyObject*)__pyx_t_2);
  __pyx_t_2 = 0;

  /* "test.pyx":9
 *     tmp5 = False
 *     tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
 *     tmp7 = []             # <<<<<<<<<<<<<<
 *     tmp8 = (1,3,99)
 *     tmp9 = {"key" : "key", "value" : "value"}
 */
  __pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_v_tmp7 = ((PyObject*)__pyx_t_2);
  __pyx_t_2 = 0;

列表在IDA中情况如下:

首先使用PyList_New创建了一个11长度的空列表,然后直接使用数组赋值

元组

c代码如下:

/* "test.pyx":10
 *     tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
 *     tmp7 = []
 *     tmp8 = (1,3,99)             # <<<<<<<<<<<<<<
 *     tmp9 = {"key" : "key", "value" : "value"}
 *     print(tmp, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9)
 */
  __Pyx_INCREF(__pyx_tuple_);
  __pyx_v_tmp8 = __pyx_tuple_;

在IDA中的元组表示如下:

字典

c代码如下:

/* "test.pyx":11
 *     tmp7 = []
 *     tmp8 = (1,3,99)
 *     tmp9 = {"key" : "key", "value" : "value"}             # <<<<<<<<<<<<<<
 *     print(tmp, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9)
 */
  __pyx_t_2 = __Pyx_PyDict_NewPresized(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_key, __pyx_n_s_key) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_value, __pyx_n_s_value) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
  __pyx_v_tmp9 = ((PyObject*)__pyx_t_2);
  __pyx_t_2 = 0;

在IDA中情况如下:

使用PyDict_New创建字典,PyDict_SetItem函数是添加字典中的键值。

变量初始化

在上文中的列表赋值跟我们想象中的函数不太一样,这是因为列表中的内容全部在其他函数中进行了初始化,然后放入了off_1800095B8数据块中统一管理,我们看到下面这一段c代码:

/* #### Code section: init_constants ### */

static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) {
  if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error);
  __pyx_float_21_4 = PyFloat_FromDouble(21.4); if (unlikely(!__pyx_float_21_4)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_23_7 = PyFloat_FromDouble(23.7); if (unlikely(!__pyx_float_23_7)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_3 = PyInt_FromLong(3); if (unlikely(!__pyx_int_3)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_45 = PyInt_FromLong(45); if (unlikely(!__pyx_int_45)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_99 = PyInt_FromLong(99); if (unlikely(!__pyx_int_99)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_0x119de994f9e3728a1652147 = PyInt_FromString((char *)"0x119de994f9e3728a1652147", 0, 0); if (unlikely(!__pyx_int_0x119de994f9e3728a1652147)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_0x149357046d142e1e6e1948884dc976fdf = PyInt_FromString((char *)"0x149357046d142e1e6e1948884dc976fdf", 0, 0); if (unlikely(!__pyx_int_0x149357046d142e1e6e1948884dc976fdf)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_neg_67 = PyInt_FromLong(-67); if (unlikely(!__pyx_int_neg_67)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_large_neg_43759347958734_xxx_475987349587324895 = PyInt_FromString((char *)"-437593479587349875983475987349587324895", 0, 0); if (unlikely(!__pyx_int_large_neg_43759347958734_xxx_475987349587324895)) __PYX_ERR(0, 1, __pyx_L1_error)
  return 0;
  __pyx_L1_error:;
  return -1;
}

这里就是把列表中需要用的内容进行了初始化,我们再看到IDA中该函数的表现形式:

这里发现其实在初始化之后把初始化之后的值放入了off_1800095B8块中,列表根据编译规定的下标提取其中的内容。

运算符号

我们将所有的运算符号全部放入函数中,看看编译出来情况如何:

def test(x, y):
    a = x + y
    b = x - y
    c = x * y
    d = x / y
    e = x // y
    f = x ^ y
    g = x & y
    h = x | y
    i = x % y
    j = x ** y
    k = ~x
    l = x >> 4
    n = y << 2

按之前的一样,放出c代码和IDA截图

加法

/* "test.pyx":2
 * def test(x, y):
 *     a = x + y             # <<<<<<<<<<<<<<
 *     b = x - y
 *     c = x * y
 */
  __pyx_t_1 = PyNumber_Add(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_a = __pyx_t_1;
  __pyx_t_1 = 0;

减法

/* "test.pyx":3
 * def test(x, y):
 *     a = x + y
 *     b = x - y             # <<<<<<<<<<<<<<
 *     c = x * y
 *     d = x / y
 */
  __pyx_t_1 = PyNumber_Subtract(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 3, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_b = __pyx_t_1;
  __pyx_t_1 = 0;

乘法

/* "test.pyx":4
 *     a = x + y
 *     b = x - y
 *     c = x * y             # <<<<<<<<<<<<<<
 *     d = x / y
 *     e = x // y
 */
  __pyx_t_1 = PyNumber_Multiply(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 4, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_c = __pyx_t_1;
  __pyx_t_1 = 0;

除法

/* "test.pyx":5
 *     b = x - y
 *     c = x * y
 *     d = x / y             # <<<<<<<<<<<<<<
 *     e = x // y
 *     f = x ^ y
 */
  __pyx_t_1 = __Pyx_PyNumber_Divide(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 5, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_d = __pyx_t_1;
  __pyx_t_1 = 0;

整除

/* "test.pyx":6
 *     c = x * y
 *     d = x / y
 *     e = x // y             # <<<<<<<<<<<<<<
 *     f = x ^ y
 *     g = x & y
 */
  __pyx_t_1 = PyNumber_FloorDivide(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 6, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_e = __pyx_t_1;
  __pyx_t_1 = 0;

异或

/* "test.pyx":7
 *     d = x / y
 *     e = x // y
 *     f = x ^ y             # <<<<<<<<<<<<<<
 *     g = x & y
 *     h = x | y
 */
  __pyx_t_1 = PyNumber_Xor(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_f = __pyx_t_1;
  __pyx_t_1 = 0;

按位与

/* "test.pyx":8
 *     e = x // y
 *     f = x ^ y
 *     g = x & y             # <<<<<<<<<<<<<<
 *     h = x | y
 *     i = x % y
 */
  __pyx_t_1 = PyNumber_And(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_g = __pyx_t_1;
  __pyx_t_1 = 0;

按位或

/* "test.pyx":9
 *     f = x ^ y
 *     g = x & y
 *     h = x | y             # <<<<<<<<<<<<<<
 *     i = x % y
 *     j = x ** y
 */
  __pyx_t_1 = PyNumber_Or(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 9, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_h = __pyx_t_1;
  __pyx_t_1 = 0;

求模

/* "test.pyx":10
 *     g = x & y
 *     h = x | y
 *     i = x % y             # <<<<<<<<<<<<<<
 *     j = x ** y
 *     k = ~x
 */
  __pyx_t_1 = PyNumber_Remainder(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_i = __pyx_t_1;
  __pyx_t_1 = 0;

/* "test.pyx":11
 *     h = x | y
 *     i = x % y
 *     j = x ** y             # <<<<<<<<<<<<<<
 *     k = ~x
 *     l = x >> 4
 */
  __pyx_t_1 = PyNumber_Power(__pyx_v_x, __pyx_v_y, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 11, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_j = __pyx_t_1;
  __pyx_t_1 = 0;

取反

/* "test.pyx":12
 *     i = x % y
 *     j = x ** y
 *     k = ~x             # <<<<<<<<<<<<<<
 *     l = x >> 4
 *     n = y << 2
 */
  __pyx_t_1 = PyNumber_Invert(__pyx_v_x); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 12, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_k = __pyx_t_1;
  __pyx_t_1 = 0;

右移

/* "test.pyx":13
 *     j = x ** y
 *     k = ~x
 *     l = x >> 4             # <<<<<<<<<<<<<<
 *     n = y << 2
 * 
 */
  __pyx_t_1 = __Pyx_PyInt_RshiftObjC(__pyx_v_x, __pyx_int_4, 4, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 13, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_l = __pyx_t_1;
  __pyx_t_1 = 0;

IDA伪代码略长,所以直接复制过来看看就好

v24 = off_1800095B8[32];
  if ( *(_QWORD *)(v4 + 8) != PyLong_Type[0] )
  {
    v27 = PyNumber_Rshift(v4, off_1800095B8[32]);
LABEL_35:
    v4 = v27;
    goto LABEL_36;
  }
  v25 = *(_QWORD *)(v4 + 16);
  if ( v25 )
  {
    if ( ((v25 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 )
    {
      v26 = v25 + 4;
      switch ( v26 )
      {
        case 2i64:
          v27 = PyLong_FromLongLong(
                  -(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
                  v26,
                  v24,
                  0x180000000ui64);
          break;
        case 6i64:
          v27 = PyLong_FromLongLong(
                  (__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
                  v26,
                  v24,
                  0x180000000ui64);
          break;
        default:
          v27 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 96i64))(v4, off_1800095B8[32]);
          break;
      }
    }
    else
    {
      v28 = -*(_DWORD *)(v4 + 24);
      if ( v25 >= 0 )
        v28 = *(_DWORD *)(v4 + 24);
      v27 = PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);
    }
    goto LABEL_35;
  }
  ++*(_QWORD *)v4;
LABEL_36:
  if ( !v4 )
  {
    v12 = 2534i64;
    v13 = 13i64;
    goto LABEL_58;
  }
  v10 = (_QWORD *)v4;

off_1800095B8[32]中储存就是4,这里python为了安全性还有对于整数的处理做了安全措施,我们可以看到在else后面PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);这里也可以看到是右移多少。

左移

/* "test.pyx":14
 *     k = ~x
 *     l = x >> 4
 *     n = y << 2             # <<<<<<<<<<<<<<
 * 
 */
  __pyx_t_1 = __Pyx_PyInt_LshiftObjC(__pyx_v_y, __pyx_int_2, 2, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_n = __pyx_t_1;
  __pyx_t_1 = 0;
v29 = off_1800095B8[31];
  if ( *(_QWORD *)(v5 + 8) != PyLong_Type[0] )
    goto LABEL_54;
  v30 = *(_QWORD *)(v5 + 16);
  if ( !v30 )
  {
    ++*(_QWORD *)v5;
    goto LABEL_56;
  }
  if ( ((v30 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 )
  {
    switch ( v30 )
    {
      case -2i64:
        v31 = -(__int64)(*(unsigned int *)(v5 + 24) | ((unsigned __int64)*(unsigned int *)(v5 + 28) << 30));
        goto LABEL_51;
      case 2i64:
        v31 = *(unsigned int *)(v5 + 24) | ((unsigned __int64)*(unsigned int *)(v5 + 28) << 30);
        goto LABEL_51;
      default:
        v32 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 88i64))(v5, off_1800095B8[31]);
        break;
    }
    goto LABEL_55;
  }
  LODWORD(v31) = -*(_DWORD *)(v5 + 24);
  if ( v30 >= 0 )
    LODWORD(v31) = *(_DWORD *)(v5 + 24);
  if ( (_DWORD)v31 == (4 * (int)v31) >> 2 || !(_DWORD)v31 )
  {
    v32 = PyLong_FromLong((unsigned int)(4 * v31), (unsigned int)(4 * v31), v29, 0x180000000ui64);
    goto LABEL_55;
  }
  v31 = (int)v31;
LABEL_51:
  if ( v31 != (__int64)(4 * v31) >> 2 )
  {
LABEL_54:
    v32 = PyNumber_Lshift(v5, off_1800095B8[31]);
    goto LABEL_55;
  }
  v32 = PyLong_FromLongLong(4 * v31, 4 * v31, v29, 0x180000000ui64);
LABEL_55:
  v5 = v32;

这里后面的PyLong_FromLongLong(4 * v31, 4 * v31, v29, 0x180000000ui64);其实就是左移2位。

总结

下面是IDA中函数与运算符号的表格

符号 函数名
+ PyNumber_Add
- PyNumber_Subtract
* PyNumber_Multiply
/ PyNumber_TrueDivide
// PyNumber_FloorDivide
^ PyNumber_Xor
& PyNumber_And
\ PyNumber_Or
% PyNumber_Remainder
** PyNumber_Power
~ PyNumber_Invert
>> PyNumber_Rshift
<< PyNumber_Lshift

条件语句以及其他运算符

python源码:

def test(x, y):
    if x in y:
        x = y
    elif x not in y:
        y = x
    else:
        x = 999999
    if x == 0 and y == 0:
        x = 1
    if y == 0 or x == 0:
        y = 1
    if not x:
        x = 0
    if x is y:
        x = 9
    if y is not None:
        y = 8

in 运算

__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 2, __pyx_L1_error)
  if (__pyx_t_1) {

    /* "test.pyx":3
 * def test(x, y):
 *     if x in y:
 *         x = y             # <<<<<<<<<<<<<<
 *     elif x not in y:
 *         y = x
 */
    __Pyx_INCREF(__pyx_v_y);
    __Pyx_DECREF_SET(__pyx_v_x, __pyx_v_y);

    /* "test.pyx":2
 * def test(x, y):
 *     if x in y:             # <<<<<<<<<<<<<<
 *         x = y
 *     elif x not in y:
 */
    goto __pyx_L3;
  }

此处与c代码中的表现形式不同,IDA中的函数实际只有两个参数,如何区分in和not in请看下一节

not in 运算

/* "test.pyx":4
 *     if x in y:
 *         x = y
 *     elif x not in y:             # <<<<<<<<<<<<<<
 *         y = x
 *     else:
 */
  __pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_NE)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 4, __pyx_L1_error)
  if (__pyx_t_1) {

    /* "test.pyx":5
 *         x = y
 *     elif x not in y:
 *         y = x             # <<<<<<<<<<<<<<
 *     else:
 *         x = 999999
 */
    __Pyx_INCREF(__pyx_v_x);
    __Pyx_DECREF_SET(__pyx_v_y, __pyx_v_x);

    /* "test.pyx":4
 *     if x in y:
 *         x = y
 *     elif x not in y:             # <<<<<<<<<<<<<<
 *         y = x
 *     else:
 */
    goto __pyx_L3;
  }

这里的in和not in使用的是同一个函数,如何判断是in还是not in主要是看下面对返回结果的判断。if(v8){...}这一块跟上一节一样其实是in这里是对返回值判断为真,而后面else这一块是not in为真时执行的内容。

备注:如果没有使用到if-else语句的情况下,这里的结果判断会显示为if(!v8){...}

and 运算

/* "test.pyx":8
 *     else:
 *         x = 999999
 *     if x == 0 and y == 0:             # <<<<<<<<<<<<<<
 *         x = 1
 *     if y == 0 or x == 0:
 */
  __pyx_t_2 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 8, __pyx_L1_error)
  if (__pyx_t_2) {
  } else {
    __pyx_t_1 = __pyx_t_2;
    goto __pyx_L5_bool_binop_done;
  }
  __pyx_t_2 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_y, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 8, __pyx_L1_error)
  __pyx_t_1 = __pyx_t_2;
  __pyx_L5_bool_binop_done:;
  if (__pyx_t_1) {

    /* "test.pyx":9
 *         x = 999999
 *     if x == 0 and y == 0:
 *         x = 1             # <<<<<<<<<<<<<<
 *     if y == 0 or x == 0:
 *         y = 1
 */
    __Pyx_INCREF(__pyx_int_1);
    __Pyx_DECREF_SET(__pyx_v_x, __pyx_int_1);

    /* "test.pyx":8
 *     else:
 *         x = 999999
 *     if x == 0 and y == 0:             # <<<<<<<<<<<<<<
 *         x = 1
 *     if y == 0 or x == 0:
 */
  }

这里的and运算实际上是使用了两个if包裹,而sub_180002890实际上就是一个判断函数

or 运算

/* "test.pyx":10
 *     if x == 0 and y == 0:
 *         x = 1
 *     if y == 0 or x == 0:             # <<<<<<<<<<<<<<
 *         y = 1
 *     if not x:
 */
  __pyx_t_2 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_y, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 10, __pyx_L1_error)
  if (!__pyx_t_2) {
  } else {
    __pyx_t_1 = __pyx_t_2;
    goto __pyx_L8_bool_binop_done;
  }
  __pyx_t_2 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 10, __pyx_L1_error)
  __pyx_t_1 = __pyx_t_2;
  __pyx_L8_bool_binop_done:;
  if (__pyx_t_1) {

    /* "test.pyx":11
 *         x = 1
 *     if y == 0 or x == 0:
 *         y = 1             # <<<<<<<<<<<<<<
 *     if not x:
 *         x = 0
 */
    __Pyx_INCREF(__pyx_int_1);
    __Pyx_DECREF_SET(__pyx_v_y, __pyx_int_1);

    /* "test.pyx":10
 *     if x == 0 and y == 0:
 *         x = 1
 *     if y == 0 or x == 0:             # <<<<<<<<<<<<<<
 *         y = 1
 *     if not x:
 */
  }

or运算同样使用了sub_180002890函数做判定,我们注意到if(v17)这里他这里就是如果第一个返回为真则直接执行内容。

not 运算

/* "test.pyx":12
 *     if y == 0 or x == 0:
 *         y = 1
 *     if not x:             # <<<<<<<<<<<<<<
 *         x = 0
 *     if x is y:
 */
  __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_x); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 12, __pyx_L1_error)
  __pyx_t_2 = (!__pyx_t_1);
  if (__pyx_t_2) {

    /* "test.pyx":13
 *         y = 1
 *     if not x:
 *         x = 0             # <<<<<<<<<<<<<<
 *     if x is y:
 *         x = 9
 */
    __Pyx_INCREF(__pyx_int_0);
    __Pyx_DECREF_SET(__pyx_v_x, __pyx_int_0);

    /* "test.pyx":12
 *     if y == 0 or x == 0:
 *         y = 1
 *     if not x:             # <<<<<<<<<<<<<<
 *         x = 0
 *     if x is y:
 */
  }

not运算使用了sub_180004610函数作判定,而内容其实主要是PyObject_IsTrue函数,这里判断是否为真,然后返回结果判断。

is 运算

/* "test.pyx":14
 *     if not x:
 *         x = 0
 *     if x is y:             # <<<<<<<<<<<<<<
 *         x = 9
 *     if y is not None:
 */
  __pyx_t_2 = (__pyx_v_x == __pyx_v_y);
  if (__pyx_t_2) {

    /* "test.pyx":15
 *         x = 0
 *     if x is y:
 *         x = 9             # <<<<<<<<<<<<<<
 *     if y is not None:
 *         y = 8
 */
    __Pyx_INCREF(__pyx_int_9);
    __Pyx_DECREF_SET(__pyx_v_x, __pyx_int_9);

    /* "test.pyx":14
 *     if not x:
 *         x = 0
 *     if x is y:             # <<<<<<<<<<<<<<
 *         x = 9
 *     if y is not None:
 */
  }

is运算在python中其实是判断两个变量是否引用的是同一块地址,于是在ida中会直接显示为if(v4==v3)

is not 和 None

/* "test.pyx":16
 *     if x is y:
 *         x = 9
 *     if y is not None:             # <<<<<<<<<<<<<<
 *         y = 8
 */
  __pyx_t_2 = (__pyx_v_y != Py_None);
  if (__pyx_t_2) {

    /* "test.pyx":17
 *         x = 9
 *     if y is not None:
 *         y = 8             # <<<<<<<<<<<<<<
 */
    __Pyx_INCREF(__pyx_int_8);
    __Pyx_DECREF_SET(__pyx_v_y, __pyx_int_8);

    /* "test.pyx":16
 *     if x is y:
 *         x = 9
 *     if y is not None:             # <<<<<<<<<<<<<<
 *         y = 8
 */
  }

这里的is not和上面的is其实就是取了相反的符号,然后这里使用了None,在ida中显示为一个外部的结构体Py_NoneStruct

if、elif、else的表现

v5 = PySequence_Contains(a3, a2);
  v6 = 0i64;
  if ( v5 < 0 )
  {
    v12 = 2327i64;
    v13 = 2i64;
    goto LABEL_41;
  }
  if ( v5 == 1 )
  {
    ++*v3;
    v7 = v4;
    v4 = v3;
  }
  else
  {
    v8 = PySequence_Contains(v3, v4);
    if ( v8 < 0 )
    {
      v12 = 2356i64;
      v13 = 4i64;
      goto LABEL_41;
    }
    if ( v8 )
    {
      v9 = off_1800095B8;
      v7 = v4;
      ++*off_1800095B8[22];
      v4 = v9[22];
    }
    else
    {
      ++*v4;
      v7 = v3;
      v3 = v4;
    }
  }

这三个内容的表现在前面有一部分展现,这里全部显示出来,可以看出其实elif就是在if里面套了一层if,else与c中的else无差别。

下一篇再分析关于cython函数调用、循环、列表操作等内容

点击收藏 | 0 关注 | 1 打赏
  • 动动手指,沙发就是你的了!
登录 后跟帖