Crash in re.compile with crafted regular expression
Port, board and/or hardware
unix port
MicroPython version
MicroPython v1.26.0-preview.387.g67acac257f.dirty on 2025-07-19; linux [GCC 12.2.0] version
Reproduction
Run the following code:
import re
def test_re(r):
try:
re.compile(r)
except:
print("Error")
# too many groups
test_re("((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((a)" * 256)
Expected behaviour
An exception saying that the regular expression is too complex
Observed behaviour
This consumes an amount of stack proportional to the number of open parens, exhausting the C stack, followed by a segfault.
Additional Information
This was found via automated fuzzing.
Code of Conduct
Yes, I agree
Crash compiling(?) unusual code
Port, board and/or hardware
unix port, coverage, x86_64
MicroPython version
MicroPython v1.26.0-preview.521.g658a2e3dbd on 2025-08-02; linux [GCC 12.2.0] version
Reproduction
Run micropython with a snippet of unusual code:
$ ./build-coverage/micropython -c 'ans = (-1) ** 2.3; aa'
Segmentation fault
Expected behaviour
A NameError, because aa is not defined
Observed behaviour
A segfault
Additional Information
The stack trace is corrupt. ubsan asan all failed to give more useful info.
Program received signal SIGSEGV, Segmentation fault.
0x0000555555756530 in mp_state_ctx ()
(gdb) where
#0 0x0000555555756530 in mp_state_ctx ()
#1 0x0000000000000000 in ?? ()
valgrind produced multiple diagnostics beginning with this, which looks interesting:
==1669951== Invalid write of size 8
==1669951== at 0x16C3B4: nlr_jump (nlrx64.c:104)
==1669951== by 0x1B23DE: fun_bc_call (objfun.c:352)
==1669951== by 0x19E61E: mp_call_function_n_kw (runtime.c:727)
==1669951== by 0x1A0DEA: mp_call_function_0 (runtime.c:701)
==1669951== by 0x264DB8: execute_from_lexer (main.c:162)
==1669951== by 0x264E67: do_str (main.c:315)
==1669951== by 0x2658D3: main_ (main.c:656)
==1669951== by 0x26619F: main (main.c:494)
==1669951== Address 0x1ffefff888 is on thread 1's stack
==1669951== 232 bytes below stack pointer
I think there's something going on where an nlr jmp_buf registered inside fold_constants is somehow coming into play later when the NameError is thrown:
Breakpoint 1, nlr_push (nlr=nlr@entry=0x7fffffffdb10) at ../../py/nlrx64.c:55
55 unsigned int nlr_push(nlr_buf_t *nlr) {
(gdb) where
#0 nlr_push (nlr=nlr@entry=0x7fffffffdb10) at ../../py/nlrx64.c:55
#1 0x00005555556b0ae5 in execute_from_lexer (source_kind=source_kind@entry=1,
source=0x7fffffffe1a9, input_kind=input_kind@entry=MP_PARSE_FILE_INPUT,
is_repl=is_repl@entry=false) at main.c:123
#2 0x00005555556b0e68 in do_str (str=<optimized out>) at main.c:315
#3 0x00005555556b18d4 in main_ (argc=argc@entry=3, argv=argv@entry=0x7fffffffddd8)
at main.c:656
#4 0x00005555556b21a0 in main (argc=3, argv=0x7fffffffddd8) at main.c:494
(gdb) c
Continuing.
Breakpoint 1, nlr_push (nlr=nlr@entry=0x7fffffffd900) at ../../py/nlrx64.c:55
55 unsigned int nlr_push(nlr_buf_t *nlr) {
(gdb) where
#0 nlr_push (nlr=nlr@entry=0x7fffffffd900) at ../../py/nlrx64.c:55
#1 0x00005555555c5ea3 in binary_op_maybe (op=op@entry=MP_BINARY_OP_POWER,
lhs=0xffffffffffffffff, rhs=0x7ffff7c491e0, res=res@entry=0x7fffffffd998)
at ../../py/parse.c:672
#2 0x00005555555c6d42 in fold_constants (parser=parser@entry=0x7fffffffda30,
rule_id=rule_id@entry=42 '*', num_args=2) at ../../py/parse.c:780
#3 0x00005555555c6ac2 in push_result_rule (parser=parser@entry=0x7fffffffda30, src_line=1,
rule_id=rule_id@entry=42 '*', num_args=<optimized out>) at ../../py/parse.c:1033
#4 0x00005555555c86b7 in mp_parse (lex=lex@entry=0x7ffff7c48bc0,
input_kind=input_kind@entry=MP_PARSE_FILE_INPUT) at ../../py/parse.c:1263
#5 0x00005555556b0b5c in execute_from_lexer (source_kind=source_kind@entry=1,
source=<optimized out>, input_kind=input_kind@entry=MP_PARSE_FILE_INPUT,
is_repl=is_repl@entry=false) at main.c:147
#6 0x00005555556b0e68 in do_str (str=<optimized out>) at main.c:315
#7 0x00005555556b18d4 in main_ (argc=argc@entry=3, argv=argv@entry=0x7fffffffddd8)
at main.c:656
#8 0x00005555556b21a0 in main (argc=3, argv=0x7fffffffddd8) at main.c:494
(gdb) c
Continuing.
Breakpoint 1, nlr_push (nlr=nlr@entry=0x7fffffffd990) at ../../py/nlrx64.c:55
55 unsigned int nlr_push(nlr_buf_t *nlr) {
(gdb) where
#0 nlr_push (nlr=nlr@entry=0x7fffffffd990) at ../../py/nlrx64.c:55
#1 0x00005555556218fa in mp_execute_bytecode (code_state=code_state@entry=0x7fffffffda20,
inject_exc=<optimized out>, inject_exc@entry=0x0) at ../../py/vm.c:301
#2 0x00005555555fe288 in fun_bc_call (self_in=0x7ffff7c48be0, n_args=0, n_kw=0, args=0x0)
at ../../py/objfun.c:295
#3 0x00005555555ea61f in mp_call_function_n_kw (fun_in=0x7ffff7c48be0,
n_args=n_args@entry=0, n_kw=n_kw@entry=0, args=args@entry=0x0) at ../../py/runtime.c:727
#4 0x00005555555ecdeb in mp_call_function_0 (fun=<optimized out>) at ../../py/runtime.c:701
#5 0x00005555556b0db9 in execute_from_lexer (source_kind=source_kind@entry=1,
source=<optimized out>, input_kind=input_kind@entry=MP_PARSE_FILE_INPUT,
is_repl=is_repl@entry=false) at main.c:162
#6 0x00005555556b0e68 in do_str (str=<optimized out>) at main.c:315
#7 0x00005555556b18d4 in main_ (argc=argc@entry=3, argv=argv@entry=0x7fffffffddd8)
at main.c:656
#8 0x00005555556b21a0 in main (argc=3, argv=0x7fffffffddd8) at main.c:494
(gdb) c
Continuing.
Breakpoint 2, nlr_jump (val=0x7ffff7c48ba0) at ../../py/nlrx64.c:103
103 MP_NORETURN void nlr_jump(void *val) {
(gdb) p mp_thread_get_state ()->nlr_top
$3 = (nlr_buf_t *) 0x7fffffffd990
(gdb) c
Continuing.
Breakpoint 2, nlr_jump (val=val@entry=0x7ffff7c48ba0) at ../../py/nlrx64.c:103
103 MP_NORETURN void nlr_jump(void *val) {
(gdb) p mp_thread_get_state ()->nlr_top
$4 = (nlr_buf_t *) 0x7fffffffd900
(gdb) where
#0 nlr_jump (val=val@entry=0x7ffff7c48ba0) at ../../py/nlrx64.c:103
#1 0x00005555555fe3df in fun_bc_call (self_in=<optimized out>, n_args=0, n_kw=0, args=0x0)
at ../../py/objfun.c:352
#2 0x00005555555ea61f in mp_call_function_n_kw (fun_in=0x7ffff7c48be0,
n_args=n_args@entry=0, n_kw=n_kw@entry=0, args=args@entry=0x0) at ../../py/runtime.c:727
#3 0x00005555555ecdeb in mp_call_function_0 (fun=<optimized out>) at ../../py/runtime.c:701
#4 0x00005555556b0db9 in execute_from_lexer (source_kind=source_kind@entry=1,
source=<optimized out>, input_kind=input_kind@entry=MP_PARSE_FILE_INPUT,
is_repl=is_repl@entry=false) at main.c:162
#5 0x00005555556b0e68 in do_str (str=<optimized out>) at main.c:315
#6 0x00005555556b18d4 in main_ (argc=argc@entry=3, argv=argv@entry=0x7fffffffddd8)
at main.c:656
#7 0x00005555556b21a0 in main (argc=3, argv=0x7fffffffddd8) at main.c:494
notice how the last nlr_buf_t in nlr_jmp is equal to the one inside the stack including binary_op_maybe called from fold_constants even though those are no longer on the stack.
This crash was found with AFLplusplus and minimized manually.
Code of Conduct
Yes, I agree