Skip to content

Commit 388bf71

Browse files
tonycozmauke
authored andcommitted
safer cleanup when failing to compile regexps
Prior to this commit when producing a warning the regexp compiler would check if the warning category was marked as FATAL, and if it was it would add clean up to the save stack to release buffers used during compilation and to release the working REGEXP SV. This causes two type of problems: - if an error was already queued, Perl_ck_warner() returns even if the warning is fatal, this meant that the normal clean up code Perl_re_op_compile() would also run, resulting in a double free of the buffers. - without fatal warnings, if a $SIG{__WARN__} handler died, the buffers and the working REGEXP SV would leak. Avoid this by using SAVEDESTRUCTOR_X() to release the memory and optionally the SV at the end of scope. Fixes #21661
1 parent c9e5693 commit 388bf71

File tree

2 files changed

+33
-35
lines changed

2 files changed

+33
-35
lines changed

regcomp.c

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,6 +1356,24 @@ S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
13561356
return TRUE;
13571357
}
13581358

1359+
static void
1360+
release_RExC_state(pTHX_ void *vstate) {
1361+
RExC_state_t *pRExC_state = (RExC_state_t *)vstate;
1362+
1363+
/* Any or all of these might be NULL.
1364+
1365+
There's no point in setting them to NULL after the free, since
1366+
pRExC_state is about to be released.
1367+
*/
1368+
SvREFCNT_dec(RExC_rx_sv);
1369+
Safefree(RExC_open_parens);
1370+
Safefree(RExC_close_parens);
1371+
Safefree(RExC_logical_to_parno);
1372+
Safefree(RExC_parno_to_logical);
1373+
1374+
Safefree(pRExC_state);
1375+
}
1376+
13591377
/*
13601378
* Perl_re_op_compile - the perl internal RE engine's function to compile a
13611379
* regular expression into internal code.
@@ -1437,8 +1455,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
14371455
bool recompile = 0;
14381456
bool runtime_code = 0;
14391457
scan_data_t data;
1440-
RExC_state_t RExC_state;
1441-
RExC_state_t * const pRExC_state = &RExC_state;
14421458

14431459
#ifdef TRIE_STUDY_OPT
14441460
/* search for "restudy" in this file for a detailed explanation */
@@ -1449,25 +1465,28 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
14491465

14501466
PERL_ARGS_ASSERT_RE_OP_COMPILE;
14511467

1468+
DEBUG_r(if (!PL_colorset) reginitcolors());
1469+
1470+
RExC_state_t *pRExC_state = NULL;
14521471
/* Ensure that all members of the pRExC_state is initialized to 0
14531472
* at the start of regex compilation. Historically we have had issues
14541473
* with people remembering to zero specific members or zeroing them
14551474
* too late, etc. Doing it in one place is saner and avoid oversight
14561475
* or error. */
1457-
Zero(pRExC_state,1,RExC_state_t);
1476+
Newxz(pRExC_state, 1, RExC_state_t);
1477+
1478+
SAVEDESTRUCTOR_X(release_RExC_state, pRExC_state);
1479+
14581480
DEBUG_r({
14591481
/* and then initialize RExC_mysv1 and RExC_mysv2 early so if
14601482
* something calls regprop we don't have issues. These variables
1461-
* not being set up properly motivated the use of Zero() to initalize
1483+
* not being set up properly motivated the use of Newxz() to initalize
14621484
* the pRExC_state structure, as there were codepaths under -Uusedl
14631485
* that left these unitialized, and non-null as well. */
14641486
RExC_mysv1 = sv_newmortal();
14651487
RExC_mysv2 = sv_newmortal();
14661488
});
14671489

1468-
DEBUG_r(if (!PL_colorset) reginitcolors());
1469-
1470-
14711490
if (is_bare_re)
14721491
*is_bare_re = FALSE;
14731492

@@ -1840,6 +1859,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
18401859

18411860
/* Clean up what we did in this parse */
18421861
SvREFCNT_dec_NN(RExC_rx_sv);
1862+
RExC_rx_sv = NULL;
18431863

18441864
goto redo_parse;
18451865
}
@@ -1915,12 +1935,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
19151935
/* search for "restudy" in this file for a detailed explanation */
19161936
if (!restudied) {
19171937
StructCopy(&zero_scan_data, &data, scan_data_t);
1918-
copyRExC_state = RExC_state;
1938+
copyRExC_state = *pRExC_state;
19191939
} else {
19201940
U32 seen=RExC_seen;
19211941
DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "Restudying\n"));
19221942

1923-
RExC_state = copyRExC_state;
1943+
*pRExC_state = copyRExC_state;
19241944
if (seen & REG_TOP_LEVEL_BRANCHES_SEEN)
19251945
RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN;
19261946
else
@@ -2439,22 +2459,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
24392459
regdump(RExC_rx);
24402460
});
24412461

2442-
if (RExC_open_parens) {
2443-
Safefree(RExC_open_parens);
2444-
RExC_open_parens = NULL;
2445-
}
2446-
if (RExC_close_parens) {
2447-
Safefree(RExC_close_parens);
2448-
RExC_close_parens = NULL;
2449-
}
2450-
if (RExC_logical_to_parno) {
2451-
Safefree(RExC_logical_to_parno);
2452-
RExC_logical_to_parno = NULL;
2453-
}
2454-
if (RExC_parno_to_logical) {
2455-
Safefree(RExC_parno_to_logical);
2456-
RExC_parno_to_logical = NULL;
2457-
}
2462+
/* we're returning ownership of the SV to the caller, ensure the cleanup
2463+
* doesn't release it
2464+
*/
2465+
RExC_rx_sv = NULL;
24582466

24592467
#ifdef USE_ITHREADS
24602468
/* under ithreads the ?pat? PMf_USED flag on the pmop is simulated
@@ -9344,7 +9352,6 @@ S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
93449352
array is mortal, but is a
93459353
fail-safe */
93469354
(void) sv_2mortal(msg);
9347-
PREPARE_TO_DIE;
93489355
}
93499356
Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
93509357
SvREFCNT_dec_NN(msg);

regcomp_internal.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,6 @@ static const scan_data_t zero_scan_data = {
894894
const char *ellipses = ""; \
895895
IV len = RExC_precomp_end - RExC_precomp; \
896896
\
897-
PREPARE_TO_DIE; \
898897
if (len > RegexLengthToShowInErrorMessages) { \
899898
/* chop 10 shorter than the max, to ensure meaning of "..." */ \
900899
len = RegexLengthToShowInErrorMessages - 10; \
@@ -927,7 +926,6 @@ static const scan_data_t zero_scan_data = {
927926
* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
928927
*/
929928
#define vFAIL(m) STMT_START { \
930-
PREPARE_TO_DIE; \
931929
Simple_vFAIL(m); \
932930
} STMT_END
933931

@@ -943,7 +941,6 @@ static const scan_data_t zero_scan_data = {
943941
* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
944942
*/
945943
#define vFAIL2(m,a1) STMT_START { \
946-
PREPARE_TO_DIE; \
947944
Simple_vFAIL2(m, a1); \
948945
} STMT_END
949946

@@ -960,7 +957,6 @@ static const scan_data_t zero_scan_data = {
960957
* Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
961958
*/
962959
#define vFAIL3(m,a1,a2) STMT_START { \
963-
PREPARE_TO_DIE; \
964960
Simple_vFAIL3(m, a1, a2); \
965961
} STMT_END
966962

@@ -973,19 +969,16 @@ static const scan_data_t zero_scan_data = {
973969
} STMT_END
974970

975971
#define vFAIL4(m,a1,a2,a3) STMT_START { \
976-
PREPARE_TO_DIE; \
977972
Simple_vFAIL4(m, a1, a2, a3); \
978973
} STMT_END
979974

980975
/* A specialized version of vFAIL2 that works with UTF8f */
981976
#define vFAIL2utf8f(m, a1) STMT_START { \
982-
PREPARE_TO_DIE; \
983977
S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, \
984978
REPORT_LOCATION_ARGS(RExC_parse)); \
985979
} STMT_END
986980

987981
#define vFAIL3utf8f(m, a1, a2) STMT_START { \
988-
PREPARE_TO_DIE; \
989982
S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, \
990983
REPORT_LOCATION_ARGS(RExC_parse)); \
991984
} STMT_END
@@ -1026,8 +1019,6 @@ static const scan_data_t zero_scan_data = {
10261019
__FILE__, __LINE__, loc); \
10271020
} \
10281021
if (TO_OUTPUT_WARNINGS(loc)) { \
1029-
if (ckDEAD(warns)) \
1030-
PREPARE_TO_DIE; \
10311022
code; \
10321023
UPDATE_WARNINGS_LOC(loc); \
10331024
} \

0 commit comments

Comments
 (0)