heap buffer overflow found at micropython/lib/re1.5/compilecode.c:68 [micropython@a5bdd39127]
Hi all, I'm Wonil Jang, from the research group S2Lab in UNIST.
We found a heap buffer overflow bug from micropython by our custom tool. The detailed information is as follows.
Environment
- OS: Ubuntu 22.04
- Version: micropython at commit a5bdd39127
- Build: unix ports
- Bug Type: heap-buffer-overflow
- Bug Location: micropython/lib/re1.5/compilecode.c:68
- Credits: Junwha Hong and Wonil Jang, from S2Lab in UNIST
Problem Statement
...
case '[': {
int cnt;
term = PC;
re++;
if (*re == '^') {
EMIT(PC++, ClassNot);
re++;
} else {
EMIT(PC++, Class);
}
PC++; // Skip # of pair byte
prog->len++;
for (cnt = 0; *re != ']'; re++, cnt++) { <== [1]
char c = *re;
if (c == '\\') {
++re;
c = *re;
if (MATCH_NAMED_CLASS_CHAR(c)) {
c = RE15_CLASS_NAMED_CLASS_INDICATOR;
goto emit_char_pair;
}
}
if (!c) return NULL;
if (re[1] == '-' && re[2] != ']') { <== [2]
re += 2;
}
emit_char_pair:
EMIT(PC++, c);
EMIT(PC++, *re);
}
EMIT_CHECKED(term + 1, cnt);
break;
}
...
At [1], We found a heap-buffer-overflow bug. This is because there is no verification of regex string length.
For instance, If we have a regex string ([a-zA-Z_0-(null), it will process 0-(null) finally.
and then, at [2], re pointer will point to next byte after 0-(null).
Because It doesn’t have verification logic of regex string length, it will try to read a byte after 0-(null), which means out-of-bound read.
Example (replication)
>>> import re
>>> re.sub(r"([a-zA-Z_0-", "b", "1a2a3a")
=================================================================
#0 0x5555557b9461 in _compilecode /home/qbit/testing-2023/micropython/ports/unix/../../lib/re1.5/compilecode.c:68:27
#1 0x5555557b97ed in _compilecode /home/qbit/testing-2023/micropython/ports/unix/../../lib/re1.5/compilecode.c:103:18
#2 0x5555557b78da in re1_5_sizecode /home/qbit/testing-2023/micropython/ports/unix/../../lib/re1.5/compilecode.c:194:9
#3 0x5555557b78da in mod_re_compile /home/qbit/testing-2023/micropython/ports/unix/../../extmod/modre.c:427:16
#4 0x5555557b6cd7 in re_sub_helper /home/qbit/testing-2023/micropython/ports/unix/../../extmod/modre.c:287:16
Patch
The simplest patch will be adding verification of regex re length.
Log
#0 0x5555557b9461 in _compilecode /home/qbit/testing-2023/micropython/ports/unix/../../lib/re1.5/compilecode.c:68:27
#1 0x5555557b97ed in _compilecode /home/qbit/testing-2023/micropython/ports/unix/../../lib/re1.5/compilecode.c:103:18
#2 0x5555557b78da in re1_5_sizecode /home/qbit/testing-2023/micropython/ports/unix/../../lib/re1.5/compilecode.c:194:9
#3 0x5555557b78da in mod_re_compile /home/qbit/testing-2023/micropython/ports/unix/../../extmod/modre.c:427:16
#4 0x5555557b6cd7 in re_sub_helper /home/qbit/testing-2023/micropython/ports/unix/../../extmod/modre.c:287:16
#5 0x555555782c1c in mp_execute_bytecode /home/qbit/testing-2023/micropython/ports/unix/../../py/vm.c:1042:21
#6 0x55555574261b in fun_bc_call /home/qbit/testing-2023/micropython/ports/unix/../../py/objfun.c:273:42
#7 0x555555903f3d in execute_from_lexer /home/qbit/testing-2023/micropython/ports/unix/main.c:161:13
#8 0x555555902ad5 in do_file /home/qbit/testing-2023/micropython/ports/unix/main.c:310:12
#9 0x555555902ad5 in main_ /home/qbit/testing-2023/micropython/ports/unix/main.c:722:19
#10 0x7ffff7c29d8f in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#11 0x7ffff7c29e3f in __libc_start_main csu/../csu/libc-start.c:392:3
#12 0x555555593a34 in _start (/home/qbit/testing-2023/micropython/ports/unix/build-standard/micropython+0x3fa34)
Thank you for reading my report.
heap-buffer-overflow: from integer overflow at mp_stream_rw
Hi all, Sorry for burdening you with a lot of bug reports. we found buffer overflow at mp_stream_rw.
Summary
- OS: Ubuntu 22.04
- version: micropython@a00c9d56db775ee5fc14c2db60eb07bab8e872dd
- port: unix
- contribution: Junwha Hong and Wonil Jang @S2-Lab, UNIST
- description:
- at mp_stream_rw
py/stream.c:121, it checks the unsigned integer size with > 0, thus it lead to integer overflow, and then heap buffer overflow
- at mp_stream_rw
PoC
import os
os.VfsLfs1
os.VfsLfs2
class RAMBlockDevice:
ERASE_BLOCK_SIZE = 512
def __init__(self, blocks):
self.data = bytearray(blocks * self.ERASE_BLOCK_SIZE)
self.ret = 0
def readblocks(self, block, buf, off):
addr = block * self.ERASE_BLOCK_SIZE + off
for i in range(len(buf)):
buf[i] = self.data[addr + i]
return self.ret
def writeblocks(self, block, buf, off):
addr = block * self.ERASE_BLOCK_SIZE + off
for i in range(len(buf)):
self.data[addr + i] = buf[i]
return self.ret
def ioctl(self, op, arg):
if op == 4: # block count
return len(self.data) // self.ERASE_BLOCK_SIZE
if op == 5: # block size
return self.ERASE_BLOCK_SIZE
if op == 6: # erase block
return 0
def create_vfs(bdev, vfs_class):
bdev.ret -= 0
vfs_class.mkfs(bdev)
vfs = vfs_class(bdev)
with vfs.open("f", "w") as f:
f.write("test")
return vfs
bdev = RAMBlockDevice(8)
vfs = create_vfs(bdev, os.VfsLfs1)
f = vfs.open("f", "r")
bdev.ret = 10 # EIO
f.read(1)
Problem Statement
vstr->buf is allocated at py/stream.c:122, with sz 1-length.
STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte flags) {
vstr_init(&vstr, sz);
The chunk vstr->buf is flown to mp_stream_rw at py/stream.c:46, as a parameter buf_ , and size is 1 here. At the first while loop py/stream.c:60, it calls io_func, which is lfs1_cache_read, and the out_sz is 10.
Problem occurs here, because the size is 1 and out_sz is 10, both are mp_uint_t, thus size -= out_sz makes integer overflow. ⇒ it’s 18446744073709551607 in unix port.
mp_uint_t mp_stream_rw(mp_obj_t stream, void *buf_, mp_uint_t size, int *errcode, byte flags) {
byte *buf = buf_;
typedef mp_uint_t (*io_func_t)(mp_obj_t obj, void *buf, mp_uint_t size, int *errcode);
io_func_t io_func;
const mp_stream_p_t *stream_p = mp_get_stream(stream);
if (flags & MP_STREAM_RW_WRITE) {
io_func = (io_func_t)stream_p->write;
} else {
io_func = stream_p->read;
}
*errcode = 0;
mp_uint_t done = 0;
while (size > 0) {
mp_uint_t out_sz = io_func(stream, buf, size, errcode);
// For read, out_sz == 0 means EOF. For write, it's unspecified
// what it means, but we don't make any progress, so returning
// is still the best option.
if (out_sz == 0) {
return done;
}
if (out_sz == MP_STREAM_ERROR) {
// If we read something before getting EAGAIN, don't leak it
if (mp_is_nonblocking_error(*errcode) && done != 0) {
*errcode = 0;
}
return done;
}
if (flags & MP_STREAM_RW_ONCE) {
return out_sz;
}
buf += out_sz;
size -= out_sz;
done += out_sz;
}
return done;
}
and then, because size is still over 0 (because of integer overflow), it calls io_func again, with the address of buf_ + 10 and size 18446744073709551607
then, lfs1_cache_read do memcpy with the diff, on the invalid offset buf_ + 10. thus, it is heap-over-flow.
static int lfs1_cache_read(lfs1_t *lfs1, lfs1_cache_t *rcache,
const lfs1_cache_t *pcache, lfs1_block_t block,
lfs1_off_t off, void *buffer, lfs1_size_t size) {
uint8_t *data = buffer;
LFS1_ASSERT(block < lfs1->cfg->block_count);
while (size > 0) {
if (pcache && block == pcache->block && off >= pcache->off &&
off < pcache->off + lfs1->cfg->prog_size) {
// is already in pcache?
lfs1_size_t diff = lfs1_min(size,
lfs1->cfg->prog_size - (off-pcache->off));
memcpy(data, &pcache->buffer[off-pcache->off], diff);
Patch
we need to compare out_sz and size, instead of using while (size > 0) on unsigned integer.
Crash log
#1 0x555555803061 in lfs1_cache_read /home/qbit/testing-2023/micropython/ports/unix/../../lib/littlefs/lfs1.c:39:13
#2 0x555555803061 in lfs1_file_read /home/qbit/testing-2023/micropython/ports/unix/../../lib/littlefs/lfs1.c:1611:19
#3 0x5555557d3088 in mp_vfs_lfs1_file_read /home/qbit/testing-2023/micropython/ports/unix/../../extmod/vfs_lfsx_file.c:129:28
#4 0x555555772e43 in mp_stream_rw /home/qbit/testing-2023/micropython/ports/unix/../../py/stream.c:60:28
#5 0x555555772e43 in stream_read_generic /home/qbit/testing-2023/micropython/ports/unix/../../py/stream.c:128:32
#6 0x555555782c1c in mp_execute_bytecode /home/qbit/testing-2023/micropython/ports/unix/../../py/vm.c:1042:21
#7 0x55555574261b in fun_bc_call /home/qbit/testing-2023/micropython/ports/unix/../../py/objfun.c:273:42
#8 0x555555781b2c in mp_execute_bytecode /home/qbit/testing-2023/micropython/ports/unix/../../py/vm.c:957:21
#9 0x55555574261b in fun_bc_call /home/qbit/testing-2023/micropython/ports/unix/../../py/objfun.c:273:42
#10 0x555555903f3d in execute_from_lexer /home/qbit/testing-2023/micropython/ports/unix/main.c:161:13
#11 0x555555902ad5 in do_file /home/qbit/testing-2023/micropython/ports/unix/main.c:310:12
#12 0x555555902ad5 in main_ /home/qbit/testing-2023/micropython/ports/unix/main.c:722:19
#13 0x7ffff7c29d8f in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#14 0x7ffff7c29e3f in __libc_start_main csu/../csu/libc-start.c:392:3
#15 0x555555593a34 in _start (/home/qbit/testing-2023/micropython/ports/unix/build-standard/micropython+0x3fa34)
Thank you for taking the time to review our bug report! :)