Skip to content

Commit 034059b

Browse files
committed
improving rfc3676 parsing
1 parent 39ea8a0 commit 034059b

File tree

3 files changed

+223
-81
lines changed

3 files changed

+223
-81
lines changed

src/parse.c

Lines changed: 151 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,112 +1117,180 @@ static int get_quotelevel (const char *line)
11171117
** headers), the previous line quotelevel, and a flag saying if the
11181118
** previous line was marked as a continuing one.
11191119
**
1120-
** The function returns true if the current line should be merged with
1121-
** the next line to be parsed.
1122-
**
11231120
** The function updates the quotelevel to
11241121
** that of the current parsed line. The function will update the
11251122
** continue_prev_flow_flag to say if the current line should be joined
11261123
** to the previous one, and, if positive, the padding offset that
11271124
** should be applied to the current line when merging it (for skipping
1128-
** quotes or space-padding).
1125+
** quotes or space-stuffing).
1126+
**
1127+
** If delsp is true, the function will remove the space in the soft
1128+
** line break if the line is flowed.
1129+
**
1130+
** The function returns true if the current line is flowed.
1131+
**
11291132
*/
1130-
static bool rfc3676_handler (const char *line, bool delsp, int *quotelevel,
1133+
static bool rfc3676_handler (char *line, bool delsp_flag, int *quotelevel,
11311134
bool *continue_prev_flow_flag, int *padding)
11321135
{
11331136
int new_quotelevel = 0;
1137+
int space_stuffing = 0;
11341138
int tmp_padding = 0;
11351139
bool sig_sep = FALSE;
11361140
bool flowed = FALSE;
11371141

11381142
/* rules for evaluation if the flow should stop:
11391143
1. new quote level is different from previous one
1140-
2. The line ends with a signature "(quotes)(stuffing)-- \n"
1144+
2. The line ends with a signature "[(quotes)][(ss)]-- \n"
1145+
3. The line is a hard break "\n"
1146+
4. The message body has ended
1147+
1148+
rules for removing space-stuffing:
1149+
1. if f=f, then remove the first space of any line beginning with a space,
1150+
before processing for f=f.
1151+
2. space char may depend on charset.
1152+
1153+
rules for quotes:
1154+
1. quoted lines always begin with a '>' char. This symbol may depend on the
1155+
msg charset.
1156+
2. They are not ss before the quote symbol but may be after it
1157+
appears.
1158+
1159+
rules for seeing if a line should be flowed with the next one:
1160+
1. line ends with a soft line break sp\n
1161+
2. remove the sp if delsp=true; keep it otherwise
1162+
1163+
special case, space-stuffed or f=f? A line that has only this content:
1164+
" \n": this is a space-stuffed newline.
11411165
*/
11421166

1143-
/* If this is line is part of the flow and begins with quotes,
1144-
remove the quote level and stuffed space if found */
1145-
new_quotelevel = get_quotelevel (line);
11461167

11471168
#if DEBUG_PARSE
11481169
printf("RFC3676: Previous quote level: %d\n", *quotelevel);
11491170
printf("RFC3676: Previous line flow flag: %d\n", *continue_prev_flow_flag);
1171+
#endif
1172+
1173+
/*
1174+
** hard crlf detection.
1175+
*/
1176+
if (rfc3676_ishardlb(line)) {
1177+
/* Hard crlf, reset flags */
1178+
*quotelevel = 0;
1179+
*padding = 0;
1180+
/* *continue_prev_flow_flag = FALSE; */
1181+
#if DEBUG_PARSE
1182+
printf("RFC3676: hard CRLF detected. Stopping ff\n");
1183+
#endif
1184+
return FALSE;
1185+
}
1186+
1187+
/*
1188+
** quote level detection
1189+
*/
1190+
new_quotelevel = get_quotelevel (line);
1191+
#if DEBUG_PARSE
11501192
printf("RFC3676: New quote level: %d\n", new_quotelevel);
11511193
#endif
11521194

1153-
/* remove the multi-line quotes padding */
1154-
tmp_padding = new_quotelevel;
1195+
/* change of quote level, stop ff */
1196+
if (new_quotelevel != *quotelevel
1197+
|| (new_quotelevel > 0 && set_format_flowed_disable_quoted)) {
1198+
*continue_prev_flow_flag = FALSE;
11551199

1156-
if (*continue_prev_flow_flag
1157-
&& (new_quotelevel != *quotelevel
1158-
|| (new_quotelevel == *quotelevel
1159-
&& new_quotelevel > 0
1160-
&& set_format_flowed_disable_quoted))) {
1161-
/* don't join */
1162-
*continue_prev_flow_flag = FALSE;
1200+
#if DEBUG_PARSE
1201+
printf("RFC3676: different quote levels detected. Stopping ff\n");
1202+
#endif
11631203
}
1204+
tmp_padding = new_quotelevel;
11641205

1165-
/* remove space stuffing if any */
1206+
/*
1207+
** skip space stuffing if any
1208+
*/
11661209
if (line[tmp_padding] == ' ') {
1167-
tmp_padding++;
1210+
space_stuffing = 1;
1211+
tmp_padding++;
1212+
#if DEBUG_PARSE
1213+
printf("RFC3676: space-stuffing detected; skipping space\n");
1214+
#endif
11681215
}
11691216

1217+
/*
1218+
** hard crlf detection after quotes
1219+
*/
1220+
if (rfc3676_ishardlb(line+tmp_padding)) {
1221+
/* Hard crlf, reset flags */
1222+
/* *continue_prev_flow_flag = FALSE; */
1223+
*quotelevel = new_quotelevel;
1224+
*padding = 0;
1225+
#if DEBUG_PARSE
1226+
printf("RFC3676: hard CRLF detected after quote. Stopping ff\n");
1227+
#endif
1228+
return FALSE;
1229+
}
1230+
1231+
/*
1232+
** signature detection
1233+
*/
1234+
11701235
/* Is it a signature separator? */
1171-
/* @@ add sscanf for --\s?\r?\n here */
1172-
if (!strcmp (line + tmp_padding, "-- \n") || !strcmp (line + tmp_padding, "-- \r\n")) {
1173-
/* don't join */
1174-
*continue_prev_flow_flag = FALSE;
1175-
sig_sep = TRUE;
1236+
/* rfc3676 gives "-- \n" and "--\r\n" as signatures. We also add "--\n" to this list,
1237+
as mutt allows it */
1238+
if (!strcmp (line + tmp_padding, "-- \n")
1239+
|| !strcmp (line + tmp_padding, "-- \r\n")
1240+
|| !strcmp (line + tmp_padding, "--\n")) {
1241+
/* yes, stop f=f */
1242+
*continue_prev_flow_flag = FALSE;
1243+
sig_sep = TRUE;
11761244
#if DEBUG_PARSE
1177-
printf("RFC3676: Current line is signature\n", sig_sep);
1245+
printf ("RFC3676: -- signature detected. Stopping ff\n", sig_sep);
11781246
#endif
1247+
if (delsp_flag) {
1248+
rfc3676_trim_softlb (line);
1249+
}
11791250
}
11801251

1181-
if (*continue_prev_flow_flag == FALSE)
1182-
tmp_padding = 0;
1183-
1184-
*padding = tmp_padding;
1185-
1186-
/* is this line part of a flowed sequence (beginning or continuation)? */
1252+
/*
1253+
** is this line f=f?
1254+
*/
11871255
if (!sig_sep) {
1188-
char *eold;
1189-
eold = strrchr (line, '\n');
1190-
if (line != eold) {
1191-
if (*(eold - 1) == '\r')
1192-
eold--;
1193-
}
1194-
if (line != eold) {
1195-
if (*(eold - 1) == ' ') {
1196-
flowed = TRUE;
1197-
if (delsp) {
1198-
/* remove the space stuffing and copy the end of line */
1199-
char *ptr = eold - 1;
1256+
char *eold;
1257+
eold = strrchr (line, '\n');
1258+
if (line != eold) {
1259+
if (*(eold - 1) == '\r')
1260+
eold--;
1261+
}
1262+
if (line != eold && (line + tmp_padding) != eold) {
1263+
if (*(eold - 1) == ' ') {
1264+
if (!sig_sep) {
1265+
flowed = TRUE;
12001266
#if DEBUG_PARSE
1201-
printf("deleting delsp separator\n");
1267+
printf("RFC3676: f=f line detected\n");
12021268
#endif
1203-
while (*ptr != '\0') {
1204-
*ptr = *(ptr + 1);
1205-
ptr++;
1206-
}
1207-
}
1269+
}
1270+
if (delsp_flag) {
1271+
/* remove the space stuffing and copy the end of line */
1272+
rfc3676_trim_softlb(line);
1273+
}
1274+
}
12081275
}
1209-
}
12101276
}
1211-
1212-
if (flowed) {
1213-
*quotelevel = new_quotelevel;
1277+
1278+
/*
1279+
** update flags
1280+
*/
1281+
*quotelevel = new_quotelevel;
1282+
1283+
if (*continue_prev_flow_flag) {
1284+
*padding = new_quotelevel + space_stuffing;
12141285
} else {
1215-
*quotelevel = 0;
1286+
*padding = (new_quotelevel == 0) ? space_stuffing : 0;
12161287
}
1217-
1288+
12181289
#if DEBUG_PARSE
1219-
if (continue_prev_flow_flag)
1220-
printf("RFC3676: Continuing previous flow\n");
1221-
else
1222-
printf("RFC3676: Stopping previous flow\n");
1290+
if (*continue_prev_flow_flag)
1291+
printf("RFC3676: Continuing previous flow\n");
12231292
if (flowed) {
1224-
printf("RFC3676: Current line is flowed\n");
1225-
printf("RFC3676: New quote level: %d\n", new_quotelevel);
1293+
printf("RFC3676: Current line is flowed\n");
12261294
}
12271295
#endif
12281296

@@ -1559,7 +1627,7 @@ int parsemail(char *mbox, /* file name */
15591627
bool flowed_line = FALSE;
15601628
int quotelevel = 0;
15611629
bool continue_previous_flow_flag = FALSE;
1562-
bool delsp = FALSE;
1630+
bool delsp_flag = FALSE;
15631631

15641632
int binfile = -1;
15651633

@@ -2054,7 +2122,7 @@ int parsemail(char *mbox, /* file name */
20542122
sscanf(cp, "%128[^;\"\n]", charbuffer);
20552123
/* save the delsp info */
20562124
if (!strcasecmp (charbuffer, "yes"))
2057-
delsp = TRUE;
2125+
delsp_flag = TRUE;
20582126
}
20592127
}
20602128

@@ -2533,9 +2601,9 @@ int parsemail(char *mbox, /* file name */
25332601
textplain_format = FORMAT_FIXED;
25342602
}
25352603

2536-
if (textplain_format == FORMAT_FIXED && delsp) {
2537-
/* delsp only accepted for format=flowed */
2538-
delsp = FALSE;
2604+
if (textplain_format == FORMAT_FIXED && delsp_flag) {
2605+
/* delsp only accepted for format=flowed */
2606+
delsp_flag = FALSE;
25392607
}
25402608

25412609
if (append_bp && append_bp != bp) {
@@ -2648,7 +2716,7 @@ msgid);
26482716

26492717
/* reset related RFC 3676 state flags */
26502718
textplain_format = FORMAT_FIXED;
2651-
delsp = FALSE;
2719+
delsp_flag = FALSE;
26522720
flowed_line = FALSE;
26532721
quotelevel = 0;
26542722
continue_previous_flow_flag = FALSE;
@@ -2876,7 +2944,7 @@ msgid);
28762944

28772945
/* reset related RFC 3676 state flags */
28782946
textplain_format = FORMAT_FIXED;
2879-
delsp = FALSE;
2947+
delsp_flag = FALSE;
28802948
flowed_line = FALSE;
28812949
quotelevel = 0;
28822950
continue_previous_flow_flag = FALSE;
@@ -2998,16 +3066,18 @@ msgid);
29983066
int padding; /* used for skipping padding detected by rfc3676_handler,
29993067
which seems smarter than moving all the bytes in data
30003068
before injecting it into addbody */
3001-
if (!isinheader && textplain_format == FORMAT_FLOWED) {
3002-
flowed_line = rfc3676_handler (data, delsp, &quotelevel,
3003-
&continue_previous_flow_flag, &padding);
3004-
if (continue_previous_flow_flag)
3005-
bodyflags |= BODY_CONTINUE;
3006-
else
3007-
bodyflags &= ~BODY_CONTINUE;
3008-
continue_previous_flow_flag = flowed_line;
3069+
if (!isinheader && (textplain_format == FORMAT_FLOWED)) {
3070+
flowed_line = rfc3676_handler (data, delsp_flag, &quotelevel,
3071+
&continue_previous_flow_flag,
3072+
&padding);
3073+
if (continue_previous_flow_flag) {
3074+
bodyflags |= BODY_CONTINUE;
3075+
} else {
3076+
bodyflags &= ~BODY_CONTINUE;
3077+
}
3078+
continue_previous_flow_flag = flowed_line;
30093079
} else {
3010-
padding = 0;
3080+
padding = 0;
30113081
}
30123082
bp = addbody(bp, &lp, data + padding,
30133083
(content == CONTENT_HTML ?
@@ -3375,9 +3445,9 @@ msgid);
33753445
textplain_format = FORMAT_FIXED;
33763446
}
33773447

3378-
if (textplain_format == FORMAT_FIXED && delsp) {
3448+
if (textplain_format == FORMAT_FIXED && delsp_flag) {
33793449
/* delsp only accepted for format=flowed */
3380-
delsp = FALSE;
3450+
delsp_flag = FALSE;
33813451
}
33823452

33833453
if (append_bp && append_bp != bp) {
@@ -3464,7 +3534,7 @@ msgid);
34643534

34653535
/* reset related RFC 3676 state flags */
34663536
textplain_format = FORMAT_FIXED;
3467-
delsp = FALSE;
3537+
delsp_flag = FALSE;
34683538
flowed_line = FALSE;
34693539
quotelevel = 0;
34703540
continue_previous_flow_flag = FALSE;

src/proto.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@ char *getvalue(char *);
115115
char *getconfvalue(char *, char *, char *);
116116
char *unre(char *);
117117
char *oneunre(char *);
118+
void rfc3676_trim_softlb(char *);
119+
char *rfc3676_delsp_quotes(char *);
120+
int rfc3676_ishardlb(const char *);
118121
int isquote(const char *);
119122
char *replace(char *, char *, char *);
120123
char *replacechar(char *, char, char *);

0 commit comments

Comments
 (0)