编译
我们想要分析一个cython可以从自己编译cython程序开始,通过cython生成的c和ida中的伪代码进行对比来分析实际情况。注意:在windows平台下和linux平台生成cython使用的c编译器不同所以生成出来的代码架构可能会有区别,下文中均以windows平台为样板。
创建pyx文件
pyx文件其实就是以py格式编写的编程内容。
def test(a, b):
c = a + b
print(c)
创建setup.py文件
导入cythonize模块
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules = cythonize("test.pyx")
)
# test.pyx是创建的pyx文件名
命令行编译
使用cythonm模块编译
cython test.pyx
使用cythonize模块编译
cythonize -a -i test.pyx
使用setup.py编译
python setup.py build_ext --inplace
编译之后会生成一个c文件和.pyd文件,其中c文件是源码,pyd则是我们的目标分析文件了
分析
比较指令
我们编写一段判断函数:
def test(flag):
enc = 19
if flag == enc:
pass
if flag >= enc:
pass
if flag <= enc:
pass
if flag != enc:
pass
if flag > enc:
pass
if flag < enc:
pass
查看生成的test.c文件中相对应的内容
由于代码较长此处省略大部分,通过观察我们可以发现if flag == enc:
类的句式被翻译成了
__pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_enc); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 3, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = PyObject_RichCompare(__pyx_v_flag, __pyx_t_1, Py_EQ); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 3, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 3, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
if (__pyx_t_3) {
}
通过函数名可以得知“==”这类比较使用的是PyObject_RichCompare
函数,第一个参数就是前面需要比较的内容,第二个参数就是后面用于比较的内容,第三个参数就是指定比较类别。
可以观测到基本都是这种类型。下面就是比较符号与比较类型的表格:
符号 | 类型 |
---|---|
== | Py_EQ |
>= | Py_GE |
<= | Py_LE |
!= | Py_NE |
> | Py_GT |
< | Py_LT |
观测IDA中的反编译情况如下:
从上面两个图可以看出在IDA中的PyObject_RichCompare
是引用了外部函数所以显示了函数名,对应的参数与c中的代码相同,那么Py_EQ
等比较类别就用了数字来替代。
根据顺序我们可以推测出下面的列表:
符号 | 类型 | 数字 |
---|---|---|
== | Py_EQ | 2 |
>= | Py_GE | 5 |
<= | Py_LE | 1 |
!= | Py_NE | 3 |
> | Py_GT | 4 |
< | Py_LT | 0 |
变量类型
编写一段覆盖所有变量类型的代码:
def test(flag):
tmp = 19
tmp1 = 21.3
tmp2 = 437593479587349875983475987349587324895
tmp3 = "tmp1"
tmp4 = True
tmp5 = False
tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
tmp7 = []
tmp8 = (1,3,99)
tmp9 = {"key" : "key", "value" : "value"}
print(tmp, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9)
对比翻译c后的内容查看:
通过定义可以发现在cython编译规则中会先把硬编码中的数字先解析了一遍,我们写的普通整数识别为long
类型,小数则是识别成了double
类型(无论长短),太长的整数识别为PyObject
类型,字符串也是PyObject
类型,布尔值是int
类型,列表、元组、字典都是PyObject
类型。
数字
c代码如下:
/* "test.pyx":2
* def test(flag):
* tmp = 19 # <<<<<<<<<<<<<<
* tmp1 = 21.3
* tmp2 = 437593479587349875983475987349587324895
*/
__pyx_v_tmp = 19;
在c中没有到达long long上限的整数基本都是直接赋值,我们看看IDA中的赋值长下面这个样
直接使用PyLong_FromLong
函数赋值,参数就是整数
小数
c代码如下:
/* "test.pyx":3
* def test(flag):
* tmp = 19
* tmp1 = 21.3 # <<<<<<<<<<<<<<
* tmp2 = 437593479587349875983475987349587324895
* tmp3 = "tmp1"
*/
__pyx_v_tmp1 = 21.3;
在IDA中显示如下:
这里看上去比较奇怪,我们查看汇编就知道其实是ida没有识别出载入参数
xmm0是浮点数寄存器,一般浮点数都会放入xmm系列寄存器中计算,而在正常的程序参数列表中没有xmm寄存器,所以ida没有识别出来,我们查看qword_180006E28指向地址就可以看到我们输入的浮点数
长整数
c代码如下:
/* "test.pyx":4
* tmp = 19
* tmp1 = 21.3
* tmp2 = 437593479587349875983475987349587324895 # <<<<<<<<<<<<<<
* tmp3 = "tmp1"
* tmp4 = True
*/
__Pyx_INCREF(__pyx_int_0x149357046d142e1e6e1948884dc976fdf);
__pyx_v_tmp2 = __pyx_int_0x149357046d142e1e6e1948884dc976fdf;
长整数在IDA中内容如下:
这里与数字不同的是这里使用的是PyLong_FromString
函数,这里也是表明长整数在编译之后储存的是的字符串,在使用的时候再将字符串转为数字。
值得一提的是如果是负数也是如此。
字符串
c代码如下:
/* "test.pyx":5
* tmp1 = 21.3
* tmp2 = 437593479587349875983475987349587324895
* tmp3 = "tmp1" # <<<<<<<<<<<<<<
* tmp4 = True
* tmp5 = False
*/
__Pyx_INCREF(__pyx_n_s_tmp1);
__pyx_v_tmp3 = __pyx_n_s_tmp1;
字符串调用的其实是赋值好的全局变量字符串,在c文件中有直接赋值
在使用前还有一次初始化赋值
在IDA中的内容如下:
布尔值
c代码如下:
/* "test.pyx":6
* tmp2 = 437593479587349875983475987349587324895
* tmp3 = "tmp1"
* tmp4 = True # <<<<<<<<<<<<<<
* tmp5 = False
* tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
*/
__pyx_v_tmp4 = 1;
/* "test.pyx":7
* tmp3 = "tmp1"
* tmp4 = True
* tmp5 = False # <<<<<<<<<<<<<<
* tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
* tmp7 = []
*/
__pyx_v_tmp5 = 0;
布尔值在IDA中比较简单
列表
c代码如下:
/* "test.pyx":8
* tmp4 = True
* tmp5 = False
* tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp] # <<<<<<<<<<<<<<
* tmp7 = []
* tmp8 = (1,3,99)
*/
__pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_tmp); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_t_2 = PyList_New(11); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 8, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__Pyx_INCREF(__pyx_int_45);
__Pyx_GIVEREF(__pyx_int_45);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 0, __pyx_int_45)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(__pyx_int_neg_67);
__Pyx_GIVEREF(__pyx_int_neg_67);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 1, __pyx_int_neg_67)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(__pyx_float_21_4);
__Pyx_GIVEREF(__pyx_float_21_4);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 2, __pyx_float_21_4)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(__pyx_float_23_7);
__Pyx_GIVEREF(__pyx_float_23_7);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 3, __pyx_float_23_7)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(__pyx_int_large_neg_43759347958734_xxx_475987349587324895);
__Pyx_GIVEREF(__pyx_int_large_neg_43759347958734_xxx_475987349587324895);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 4, __pyx_int_large_neg_43759347958734_xxx_475987349587324895)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(__pyx_int_0x119de994f9e3728a1652147);
__Pyx_GIVEREF(__pyx_int_0x119de994f9e3728a1652147);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 5, __pyx_int_0x119de994f9e3728a1652147)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(__pyx_n_s_hello);
__Pyx_GIVEREF(__pyx_n_s_hello);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 6, __pyx_n_s_hello)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(__pyx_n_s_guheng);
__Pyx_GIVEREF(__pyx_n_s_guheng);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 7, __pyx_n_s_guheng)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(Py_True);
__Pyx_GIVEREF(Py_True);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 8, Py_True)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_INCREF(Py_False);
__Pyx_GIVEREF(Py_False);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 9, Py_False)) __PYX_ERR(0, 8, __pyx_L1_error);
__Pyx_GIVEREF(__pyx_t_1);
if (__Pyx_PyList_SET_ITEM(__pyx_t_2, 10, __pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error);
__pyx_t_1 = 0;
__pyx_v_tmp6 = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
/* "test.pyx":9
* tmp5 = False
* tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
* tmp7 = [] # <<<<<<<<<<<<<<
* tmp8 = (1,3,99)
* tmp9 = {"key" : "key", "value" : "value"}
*/
__pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 9, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
__pyx_v_tmp7 = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
列表在IDA中情况如下:
首先使用PyList_New
创建了一个11长度的空列表,然后直接使用数组赋值
元组
c代码如下:
/* "test.pyx":10
* tmp6 = [45, -67, 21.4, 23.7, -437593479587349875983475987349587324895, 340759348759834759853842759, "hello", "guheng", True, False, tmp]
* tmp7 = []
* tmp8 = (1,3,99) # <<<<<<<<<<<<<<
* tmp9 = {"key" : "key", "value" : "value"}
* print(tmp, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9)
*/
__Pyx_INCREF(__pyx_tuple_);
__pyx_v_tmp8 = __pyx_tuple_;
在IDA中的元组表示如下:
字典
c代码如下:
/* "test.pyx":11
* tmp7 = []
* tmp8 = (1,3,99)
* tmp9 = {"key" : "key", "value" : "value"} # <<<<<<<<<<<<<<
* print(tmp, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9)
*/
__pyx_t_2 = __Pyx_PyDict_NewPresized(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_key, __pyx_n_s_key) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
if (PyDict_SetItem(__pyx_t_2, __pyx_n_s_value, __pyx_n_s_value) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
__pyx_v_tmp9 = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
在IDA中情况如下:
使用PyDict_New
创建字典,PyDict_SetItem
函数是添加字典中的键值。
变量初始化
在上文中的列表赋值跟我们想象中的函数不太一样,这是因为列表中的内容全部在其他函数中进行了初始化,然后放入了off_1800095B8
数据块中统一管理,我们看到下面这一段c代码:
/* #### Code section: init_constants ### */
static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) {
if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error);
__pyx_float_21_4 = PyFloat_FromDouble(21.4); if (unlikely(!__pyx_float_21_4)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_float_23_7 = PyFloat_FromDouble(23.7); if (unlikely(!__pyx_float_23_7)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_3 = PyInt_FromLong(3); if (unlikely(!__pyx_int_3)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_45 = PyInt_FromLong(45); if (unlikely(!__pyx_int_45)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_99 = PyInt_FromLong(99); if (unlikely(!__pyx_int_99)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_0x119de994f9e3728a1652147 = PyInt_FromString((char *)"0x119de994f9e3728a1652147", 0, 0); if (unlikely(!__pyx_int_0x119de994f9e3728a1652147)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_0x149357046d142e1e6e1948884dc976fdf = PyInt_FromString((char *)"0x149357046d142e1e6e1948884dc976fdf", 0, 0); if (unlikely(!__pyx_int_0x149357046d142e1e6e1948884dc976fdf)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_neg_67 = PyInt_FromLong(-67); if (unlikely(!__pyx_int_neg_67)) __PYX_ERR(0, 1, __pyx_L1_error)
__pyx_int_large_neg_43759347958734_xxx_475987349587324895 = PyInt_FromString((char *)"-437593479587349875983475987349587324895", 0, 0); if (unlikely(!__pyx_int_large_neg_43759347958734_xxx_475987349587324895)) __PYX_ERR(0, 1, __pyx_L1_error)
return 0;
__pyx_L1_error:;
return -1;
}
这里就是把列表中需要用的内容进行了初始化,我们再看到IDA中该函数的表现形式:
这里发现其实在初始化之后把初始化之后的值放入了off_1800095B8
块中,列表根据编译规定的下标提取其中的内容。
运算符号
我们将所有的运算符号全部放入函数中,看看编译出来情况如何:
def test(x, y):
a = x + y
b = x - y
c = x * y
d = x / y
e = x // y
f = x ^ y
g = x & y
h = x | y
i = x % y
j = x ** y
k = ~x
l = x >> 4
n = y << 2
按之前的一样,放出c代码和IDA截图
加法
/* "test.pyx":2
* def test(x, y):
* a = x + y # <<<<<<<<<<<<<<
* b = x - y
* c = x * y
*/
__pyx_t_1 = PyNumber_Add(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_a = __pyx_t_1;
__pyx_t_1 = 0;
减法
/* "test.pyx":3
* def test(x, y):
* a = x + y
* b = x - y # <<<<<<<<<<<<<<
* c = x * y
* d = x / y
*/
__pyx_t_1 = PyNumber_Subtract(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 3, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_b = __pyx_t_1;
__pyx_t_1 = 0;
乘法
/* "test.pyx":4
* a = x + y
* b = x - y
* c = x * y # <<<<<<<<<<<<<<
* d = x / y
* e = x // y
*/
__pyx_t_1 = PyNumber_Multiply(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 4, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_c = __pyx_t_1;
__pyx_t_1 = 0;
除法
/* "test.pyx":5
* b = x - y
* c = x * y
* d = x / y # <<<<<<<<<<<<<<
* e = x // y
* f = x ^ y
*/
__pyx_t_1 = __Pyx_PyNumber_Divide(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 5, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_d = __pyx_t_1;
__pyx_t_1 = 0;
整除
/* "test.pyx":6
* c = x * y
* d = x / y
* e = x // y # <<<<<<<<<<<<<<
* f = x ^ y
* g = x & y
*/
__pyx_t_1 = PyNumber_FloorDivide(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 6, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_e = __pyx_t_1;
__pyx_t_1 = 0;
异或
/* "test.pyx":7
* d = x / y
* e = x // y
* f = x ^ y # <<<<<<<<<<<<<<
* g = x & y
* h = x | y
*/
__pyx_t_1 = PyNumber_Xor(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 7, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_f = __pyx_t_1;
__pyx_t_1 = 0;
按位与
/* "test.pyx":8
* e = x // y
* f = x ^ y
* g = x & y # <<<<<<<<<<<<<<
* h = x | y
* i = x % y
*/
__pyx_t_1 = PyNumber_And(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_g = __pyx_t_1;
__pyx_t_1 = 0;
按位或
/* "test.pyx":9
* f = x ^ y
* g = x & y
* h = x | y # <<<<<<<<<<<<<<
* i = x % y
* j = x ** y
*/
__pyx_t_1 = PyNumber_Or(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 9, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_h = __pyx_t_1;
__pyx_t_1 = 0;
求模
/* "test.pyx":10
* g = x & y
* h = x | y
* i = x % y # <<<<<<<<<<<<<<
* j = x ** y
* k = ~x
*/
__pyx_t_1 = PyNumber_Remainder(__pyx_v_x, __pyx_v_y); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_i = __pyx_t_1;
__pyx_t_1 = 0;
幂
/* "test.pyx":11
* h = x | y
* i = x % y
* j = x ** y # <<<<<<<<<<<<<<
* k = ~x
* l = x >> 4
*/
__pyx_t_1 = PyNumber_Power(__pyx_v_x, __pyx_v_y, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 11, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_j = __pyx_t_1;
__pyx_t_1 = 0;
取反
/* "test.pyx":12
* i = x % y
* j = x ** y
* k = ~x # <<<<<<<<<<<<<<
* l = x >> 4
* n = y << 2
*/
__pyx_t_1 = PyNumber_Invert(__pyx_v_x); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 12, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_k = __pyx_t_1;
__pyx_t_1 = 0;
右移
/* "test.pyx":13
* j = x ** y
* k = ~x
* l = x >> 4 # <<<<<<<<<<<<<<
* n = y << 2
*
*/
__pyx_t_1 = __Pyx_PyInt_RshiftObjC(__pyx_v_x, __pyx_int_4, 4, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 13, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_l = __pyx_t_1;
__pyx_t_1 = 0;
IDA伪代码略长,所以直接复制过来看看就好
v24 = off_1800095B8[32];
if ( *(_QWORD *)(v4 + 8) != PyLong_Type[0] )
{
v27 = PyNumber_Rshift(v4, off_1800095B8[32]);
LABEL_35:
v4 = v27;
goto LABEL_36;
}
v25 = *(_QWORD *)(v4 + 16);
if ( v25 )
{
if ( ((v25 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 )
{
v26 = v25 + 4;
switch ( v26 )
{
case 2i64:
v27 = PyLong_FromLongLong(
-(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
v26,
v24,
0x180000000ui64);
break;
case 6i64:
v27 = PyLong_FromLongLong(
(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,
v26,
v24,
0x180000000ui64);
break;
default:
v27 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 96i64))(v4, off_1800095B8[32]);
break;
}
}
else
{
v28 = -*(_DWORD *)(v4 + 24);
if ( v25 >= 0 )
v28 = *(_DWORD *)(v4 + 24);
v27 = PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);
}
goto LABEL_35;
}
++*(_QWORD *)v4;
LABEL_36:
if ( !v4 )
{
v12 = 2534i64;
v13 = 13i64;
goto LABEL_58;
}
v10 = (_QWORD *)v4;
off_1800095B8[32]
中储存就是4,这里python为了安全性还有对于整数的处理做了安全措施,我们可以看到在else后面PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);
这里也可以看到是右移多少。
左移
/* "test.pyx":14
* k = ~x
* l = x >> 4
* n = y << 2 # <<<<<<<<<<<<<<
*
*/
__pyx_t_1 = __Pyx_PyInt_LshiftObjC(__pyx_v_y, __pyx_int_2, 2, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 14, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_v_n = __pyx_t_1;
__pyx_t_1 = 0;
v29 = off_1800095B8[31];
if ( *(_QWORD *)(v5 + 8) != PyLong_Type[0] )
goto LABEL_54;
v30 = *(_QWORD *)(v5 + 16);
if ( !v30 )
{
++*(_QWORD *)v5;
goto LABEL_56;
}
if ( ((v30 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 )
{
switch ( v30 )
{
case -2i64:
v31 = -(__int64)(*(unsigned int *)(v5 + 24) | ((unsigned __int64)*(unsigned int *)(v5 + 28) << 30));
goto LABEL_51;
case 2i64:
v31 = *(unsigned int *)(v5 + 24) | ((unsigned __int64)*(unsigned int *)(v5 + 28) << 30);
goto LABEL_51;
default:
v32 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 88i64))(v5, off_1800095B8[31]);
break;
}
goto LABEL_55;
}
LODWORD(v31) = -*(_DWORD *)(v5 + 24);
if ( v30 >= 0 )
LODWORD(v31) = *(_DWORD *)(v5 + 24);
if ( (_DWORD)v31 == (4 * (int)v31) >> 2 || !(_DWORD)v31 )
{
v32 = PyLong_FromLong((unsigned int)(4 * v31), (unsigned int)(4 * v31), v29, 0x180000000ui64);
goto LABEL_55;
}
v31 = (int)v31;
LABEL_51:
if ( v31 != (__int64)(4 * v31) >> 2 )
{
LABEL_54:
v32 = PyNumber_Lshift(v5, off_1800095B8[31]);
goto LABEL_55;
}
v32 = PyLong_FromLongLong(4 * v31, 4 * v31, v29, 0x180000000ui64);
LABEL_55:
v5 = v32;
这里后面的PyLong_FromLongLong(4 * v31, 4 * v31, v29, 0x180000000ui64);
其实就是左移2位。
总结
下面是IDA中函数与运算符号的表格
符号 | 函数名 | |
---|---|---|
+ | PyNumber_Add | |
- | PyNumber_Subtract | |
* | PyNumber_Multiply | |
/ | PyNumber_TrueDivide | |
// | PyNumber_FloorDivide | |
^ | PyNumber_Xor | |
& | PyNumber_And | |
\ | PyNumber_Or | |
% | PyNumber_Remainder | |
** | PyNumber_Power | |
~ | PyNumber_Invert | |
>> | PyNumber_Rshift | |
<< | PyNumber_Lshift |
条件语句以及其他运算符
python源码:
def test(x, y):
if x in y:
x = y
elif x not in y:
y = x
else:
x = 999999
if x == 0 and y == 0:
x = 1
if y == 0 or x == 0:
y = 1
if not x:
x = 0
if x is y:
x = 9
if y is not None:
y = 8
in 运算
__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 2, __pyx_L1_error)
if (__pyx_t_1) {
/* "test.pyx":3
* def test(x, y):
* if x in y:
* x = y # <<<<<<<<<<<<<<
* elif x not in y:
* y = x
*/
__Pyx_INCREF(__pyx_v_y);
__Pyx_DECREF_SET(__pyx_v_x, __pyx_v_y);
/* "test.pyx":2
* def test(x, y):
* if x in y: # <<<<<<<<<<<<<<
* x = y
* elif x not in y:
*/
goto __pyx_L3;
}
此处与c代码中的表现形式不同,IDA中的函数实际只有两个参数,如何区分in和not in请看下一节
not in 运算
/* "test.pyx":4
* if x in y:
* x = y
* elif x not in y: # <<<<<<<<<<<<<<
* y = x
* else:
*/
__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_NE)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 4, __pyx_L1_error)
if (__pyx_t_1) {
/* "test.pyx":5
* x = y
* elif x not in y:
* y = x # <<<<<<<<<<<<<<
* else:
* x = 999999
*/
__Pyx_INCREF(__pyx_v_x);
__Pyx_DECREF_SET(__pyx_v_y, __pyx_v_x);
/* "test.pyx":4
* if x in y:
* x = y
* elif x not in y: # <<<<<<<<<<<<<<
* y = x
* else:
*/
goto __pyx_L3;
}
这里的in和not in使用的是同一个函数,如何判断是in还是not in主要是看下面对返回结果的判断。if(v8){...}
这一块跟上一节一样其实是in这里是对返回值判断为真,而后面else
这一块是not in为真时执行的内容。
备注:如果没有使用到if-else语句的情况下,这里的结果判断会显示为if(!v8){...}
and 运算
/* "test.pyx":8
* else:
* x = 999999
* if x == 0 and y == 0: # <<<<<<<<<<<<<<
* x = 1
* if y == 0 or x == 0:
*/
__pyx_t_2 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 8, __pyx_L1_error)
if (__pyx_t_2) {
} else {
__pyx_t_1 = __pyx_t_2;
goto __pyx_L5_bool_binop_done;
}
__pyx_t_2 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_y, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 8, __pyx_L1_error)
__pyx_t_1 = __pyx_t_2;
__pyx_L5_bool_binop_done:;
if (__pyx_t_1) {
/* "test.pyx":9
* x = 999999
* if x == 0 and y == 0:
* x = 1 # <<<<<<<<<<<<<<
* if y == 0 or x == 0:
* y = 1
*/
__Pyx_INCREF(__pyx_int_1);
__Pyx_DECREF_SET(__pyx_v_x, __pyx_int_1);
/* "test.pyx":8
* else:
* x = 999999
* if x == 0 and y == 0: # <<<<<<<<<<<<<<
* x = 1
* if y == 0 or x == 0:
*/
}
这里的and运算实际上是使用了两个if包裹,而sub_180002890
实际上就是一个判断函数
or 运算
/* "test.pyx":10
* if x == 0 and y == 0:
* x = 1
* if y == 0 or x == 0: # <<<<<<<<<<<<<<
* y = 1
* if not x:
*/
__pyx_t_2 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_y, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 10, __pyx_L1_error)
if (!__pyx_t_2) {
} else {
__pyx_t_1 = __pyx_t_2;
goto __pyx_L8_bool_binop_done;
}
__pyx_t_2 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 10, __pyx_L1_error)
__pyx_t_1 = __pyx_t_2;
__pyx_L8_bool_binop_done:;
if (__pyx_t_1) {
/* "test.pyx":11
* x = 1
* if y == 0 or x == 0:
* y = 1 # <<<<<<<<<<<<<<
* if not x:
* x = 0
*/
__Pyx_INCREF(__pyx_int_1);
__Pyx_DECREF_SET(__pyx_v_y, __pyx_int_1);
/* "test.pyx":10
* if x == 0 and y == 0:
* x = 1
* if y == 0 or x == 0: # <<<<<<<<<<<<<<
* y = 1
* if not x:
*/
}
or运算同样使用了sub_180002890
函数做判定,我们注意到if(v17)
这里他这里就是如果第一个返回为真则直接执行内容。
not 运算
/* "test.pyx":12
* if y == 0 or x == 0:
* y = 1
* if not x: # <<<<<<<<<<<<<<
* x = 0
* if x is y:
*/
__pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_x); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 12, __pyx_L1_error)
__pyx_t_2 = (!__pyx_t_1);
if (__pyx_t_2) {
/* "test.pyx":13
* y = 1
* if not x:
* x = 0 # <<<<<<<<<<<<<<
* if x is y:
* x = 9
*/
__Pyx_INCREF(__pyx_int_0);
__Pyx_DECREF_SET(__pyx_v_x, __pyx_int_0);
/* "test.pyx":12
* if y == 0 or x == 0:
* y = 1
* if not x: # <<<<<<<<<<<<<<
* x = 0
* if x is y:
*/
}
not运算使用了sub_180004610
函数作判定,而内容其实主要是PyObject_IsTrue
函数,这里判断是否为真,然后返回结果判断。
is 运算
/* "test.pyx":14
* if not x:
* x = 0
* if x is y: # <<<<<<<<<<<<<<
* x = 9
* if y is not None:
*/
__pyx_t_2 = (__pyx_v_x == __pyx_v_y);
if (__pyx_t_2) {
/* "test.pyx":15
* x = 0
* if x is y:
* x = 9 # <<<<<<<<<<<<<<
* if y is not None:
* y = 8
*/
__Pyx_INCREF(__pyx_int_9);
__Pyx_DECREF_SET(__pyx_v_x, __pyx_int_9);
/* "test.pyx":14
* if not x:
* x = 0
* if x is y: # <<<<<<<<<<<<<<
* x = 9
* if y is not None:
*/
}
is运算在python中其实是判断两个变量是否引用的是同一块地址,于是在ida中会直接显示为if(v4==v3)
is not 和 None
/* "test.pyx":16
* if x is y:
* x = 9
* if y is not None: # <<<<<<<<<<<<<<
* y = 8
*/
__pyx_t_2 = (__pyx_v_y != Py_None);
if (__pyx_t_2) {
/* "test.pyx":17
* x = 9
* if y is not None:
* y = 8 # <<<<<<<<<<<<<<
*/
__Pyx_INCREF(__pyx_int_8);
__Pyx_DECREF_SET(__pyx_v_y, __pyx_int_8);
/* "test.pyx":16
* if x is y:
* x = 9
* if y is not None: # <<<<<<<<<<<<<<
* y = 8
*/
}
这里的is not和上面的is其实就是取了相反的符号,然后这里使用了None,在ida中显示为一个外部的结构体Py_NoneStruct
if、elif、else的表现
v5 = PySequence_Contains(a3, a2);
v6 = 0i64;
if ( v5 < 0 )
{
v12 = 2327i64;
v13 = 2i64;
goto LABEL_41;
}
if ( v5 == 1 )
{
++*v3;
v7 = v4;
v4 = v3;
}
else
{
v8 = PySequence_Contains(v3, v4);
if ( v8 < 0 )
{
v12 = 2356i64;
v13 = 4i64;
goto LABEL_41;
}
if ( v8 )
{
v9 = off_1800095B8;
v7 = v4;
++*off_1800095B8[22];
v4 = v9[22];
}
else
{
++*v4;
v7 = v3;
v3 = v4;
}
}
这三个内容的表现在前面有一部分展现,这里全部显示出来,可以看出其实elif就是在if里面套了一层if,else与c中的else无差别。
下一篇再分析关于cython函数调用、循环、列表操作等内容