12
12
13
13
14
14
#define CHAR_EOF -1
15
+ #define CHAR_NULL 0
15
16
#define CHAR_BANG 33
16
17
#define CHAR_DOUBLE 34
17
18
#define CHAR_PERCENT 37
23
24
#define CHAR_GT 62
24
25
#define CHAR_QUESTION 63
25
26
#define CHAR_RIGHTB 93
27
+ #define CHAR_TICK 96
26
28
27
29
/* prototypes */
28
30
@@ -41,6 +43,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs);
41
43
static int h5_state_before_attribute_value (h5_state_t * hs );
42
44
static int h5_state_attribute_value_double_quote (h5_state_t * hs );
43
45
static int h5_state_attribute_value_single_quote (h5_state_t * hs );
46
+ static int h5_state_attribute_value_back_quote (h5_state_t * hs );
44
47
static int h5_state_attribute_value_no_quote (h5_state_t * hs );
45
48
static int h5_state_after_attribute_value_quoted_state (h5_state_t * hs );
46
49
static int h5_state_comment (h5_state_t * hs );
@@ -60,16 +63,28 @@ static int h5_state_doctype(h5_state_t* hs);
60
63
/**
61
64
* public function
62
65
*/
63
- void libinjection_h5_init (h5_state_t * hs , const char * s , size_t len , int flags )
66
+ void libinjection_h5_init (h5_state_t * hs , const char * s , size_t len , enum html5_flags flags )
64
67
{
65
68
memset (hs , 0 , sizeof (h5_state_t ));
66
69
hs -> s = s ;
67
70
hs -> len = len ;
68
- hs -> state = h5_state_data ;
69
- if (flags == 0 ) {
71
+
72
+ switch (flags ) {
73
+ case DATA_STATE :
70
74
hs -> state = h5_state_data ;
71
- } else {
72
- assert (0 );
75
+ break ;
76
+ case VALUE_NO_QUOTE :
77
+ hs -> state = h5_state_before_attribute_name ;
78
+ break ;
79
+ case VALUE_SINGLE_QUOTE :
80
+ hs -> state = h5_state_attribute_value_single_quote ;
81
+ break ;
82
+ case VALUE_DOUBLE_QUOTE :
83
+ hs -> state = h5_state_attribute_value_double_quote ;
84
+ break ;
85
+ case VALUE_BACK_QUOTE :
86
+ hs -> state = h5_state_attribute_value_back_quote ;
87
+ break ;
73
88
}
74
89
}
75
90
@@ -85,10 +100,18 @@ int libinjection_h5_next(h5_state_t* hs)
85
100
/**
86
101
* Everything below here is private
87
102
*
88
- */
103
+ */
104
+
89
105
90
106
static int h5_is_white (char ch )
91
107
{
108
+ /*
109
+ * \t = horizontal tab = 0x09
110
+ * \n = newline = 0x0A
111
+ * \v = vertical tab = 0x0B
112
+ * \f = form feed = 0x0C
113
+ * \r = cr = 0x0D
114
+ */
92
115
return strchr (" \t\n\v\f\r" , ch ) != NULL ;
93
116
}
94
117
@@ -97,9 +120,17 @@ static int h5_skip_white(h5_state_t* hs)
97
120
char ch ;
98
121
while (hs -> pos < hs -> len ) {
99
122
ch = hs -> s [hs -> pos ];
100
- if (ch == ' ' ) {
123
+ switch (ch ) {
124
+ case 0x00 : /* IE only */
125
+ case 0x20 :
126
+ case 0x09 :
127
+ case 0x0A :
128
+ case 0x0B : /* IE only */
129
+ case 0x0C :
130
+ case 0x0D : /* IE only */
101
131
hs -> pos += 1 ;
102
- } else {
132
+ break ;
133
+ default :
103
134
return ch ;
104
135
}
105
136
}
@@ -149,6 +180,9 @@ static int h5_state_tag_open(h5_state_t* hs)
149
180
char ch ;
150
181
151
182
TRACE ();
183
+ if (hs -> pos >= hs -> len ) {
184
+ return 0 ;
185
+ }
152
186
ch = hs -> s [hs -> pos ];
153
187
if (ch == CHAR_BANG ) {
154
188
hs -> pos += 1 ;
@@ -167,6 +201,9 @@ static int h5_state_tag_open(h5_state_t* hs)
167
201
return h5_state_bogus_comment2 (hs );
168
202
} else if ((ch >= 'a' && ch <= 'z' ) || (ch >= 'A' && ch <= 'Z' )) {
169
203
return h5_state_tag_name (hs );
204
+ } else if (ch == CHAR_NULL ) {
205
+ /* IE-ism NULL characters are ignored */
206
+ return h5_state_tag_name (hs );
170
207
} else {
171
208
/* user input mistake in configuring state */
172
209
if (hs -> pos == 0 ) {
@@ -197,7 +234,9 @@ static int h5_state_end_tag_open(h5_state_t* hs)
197
234
} else if ((ch >= 'a' && ch <= 'z' ) || (ch >= 'A' && ch <= 'Z' )) {
198
235
return h5_state_tag_name (hs );
199
236
}
200
- return h5_state_data (hs );
237
+
238
+ hs -> is_close = 0 ;
239
+ return h5_state_bogus_comment (hs );
201
240
}
202
241
/*
203
242
*
@@ -231,7 +270,12 @@ static int h5_state_tag_name(h5_state_t* hs)
231
270
pos = hs -> pos ;
232
271
while (pos < hs -> len ) {
233
272
ch = hs -> s [pos ];
234
- if (h5_is_white (ch )) {
273
+ if (ch == 0 ) {
274
+ /* special non-standard case */
275
+ /* allow nulls in tag name */
276
+ /* some old browsers apparently allow and ignore them */
277
+ pos += 1 ;
278
+ } else if (h5_is_white (ch )) {
235
279
hs -> token_start = hs -> s + hs -> pos ;
236
280
hs -> token_len = pos - hs -> pos ;
237
281
hs -> token_type = TAG_NAME_OPEN ;
@@ -299,7 +343,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs)
299
343
default : {
300
344
return h5_state_attribute_name (hs );
301
345
}
302
- }
346
+ }
303
347
}
304
348
305
349
static int h5_state_attribute_name (h5_state_t * hs )
@@ -308,7 +352,7 @@ static int h5_state_attribute_name(h5_state_t* hs)
308
352
size_t pos ;
309
353
310
354
TRACE ();
311
- pos = hs -> pos ;
355
+ pos = hs -> pos + 1 ;
312
356
while (pos < hs -> len ) {
313
357
ch = hs -> s [pos ];
314
358
if (h5_is_white (ch )) {
@@ -358,21 +402,19 @@ static int h5_state_attribute_name(h5_state_t* hs)
358
402
static int h5_state_after_attribute_name (h5_state_t * hs )
359
403
{
360
404
int c ;
361
- size_t pos ;
362
405
363
406
TRACE ();
364
- pos = hs -> pos ;
365
407
c = h5_skip_white (hs );
366
408
switch (c ) {
367
409
case CHAR_EOF : {
368
410
return 0 ;
369
411
}
370
412
case CHAR_SLASH : {
371
- hs -> pos = pos + 1 ;
413
+ hs -> pos += 1 ;
372
414
return h5_state_self_closing_start_tag (hs );
373
415
}
374
416
case CHAR_EQUALS : {
375
- hs -> pos = pos + 1 ;
417
+ hs -> pos += 1 ;
376
418
return h5_state_before_attribute_value (hs );
377
419
}
378
420
case CHAR_GT : {
@@ -403,6 +445,9 @@ static int h5_state_before_attribute_value(h5_state_t* hs)
403
445
return h5_state_attribute_value_double_quote (hs );
404
446
} else if (c == CHAR_SINGLE ) {
405
447
return h5_state_attribute_value_single_quote (hs );
448
+ } else if (c == CHAR_TICK ) {
449
+ /* NON STANDARD IE */
450
+ return h5_state_attribute_value_back_quote (hs );
406
451
} else {
407
452
return h5_state_attribute_value_no_quote (hs );
408
453
}
@@ -415,8 +460,16 @@ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
415
460
416
461
TRACE ();
417
462
418
- /* skip quote */
419
- hs -> pos += 1 ;
463
+ /* skip initial quote in normal case.
464
+ * don't do this "if (pos == 0)" since it means we have started
465
+ * in a non-data state. given an input of '><foo
466
+ * we want to make 0-length attribute name
467
+ */
468
+ if (hs -> pos > 0 ) {
469
+ hs -> pos += 1 ;
470
+ }
471
+
472
+
420
473
idx = (const char * ) memchr (hs -> s + hs -> pos , qchar , hs -> len - hs -> pos );
421
474
if (idx == NULL ) {
422
475
hs -> token_start = hs -> s + hs -> pos ;
@@ -447,6 +500,13 @@ int h5_state_attribute_value_single_quote(h5_state_t* hs)
447
500
return h5_state_attribute_value_quote (hs , CHAR_SINGLE );
448
501
}
449
502
503
+ static
504
+ int h5_state_attribute_value_back_quote (h5_state_t * hs )
505
+ {
506
+ TRACE ();
507
+ return h5_state_attribute_value_quote (hs , CHAR_TICK );
508
+ }
509
+
450
510
static int h5_state_attribute_value_no_quote (h5_state_t * hs )
451
511
{
452
512
char ch ;
@@ -656,10 +716,13 @@ static int h5_state_comment(h5_state_t* hs)
656
716
char ch ;
657
717
const char * idx ;
658
718
size_t pos ;
719
+ size_t offset ;
720
+ const char * end = hs -> s + hs -> len ;
659
721
660
722
TRACE ();
661
723
pos = hs -> pos ;
662
724
while (1 ) {
725
+
663
726
idx = (const char * ) memchr (hs -> s + pos , CHAR_DASH , hs -> len - pos );
664
727
665
728
/* did not find anything or has less than 3 chars left */
@@ -670,21 +733,62 @@ static int h5_state_comment(h5_state_t* hs)
670
733
hs -> token_type = TAG_COMMENT ;
671
734
return 1 ;
672
735
}
673
- ch = * (idx + 1 );
736
+ offset = 1 ;
737
+
738
+ /* skip all nulls */
739
+ while (idx + offset < end && * (idx + offset ) == 0 ) {
740
+ offset += 1 ;
741
+ }
742
+ if (idx + offset == end ) {
743
+ hs -> state = h5_state_eof ;
744
+ hs -> token_start = hs -> s + hs -> pos ;
745
+ hs -> token_len = hs -> len - hs -> pos ;
746
+ hs -> token_type = TAG_COMMENT ;
747
+ return 1 ;
748
+ }
749
+
750
+ ch = * (idx + offset );
674
751
if (ch != CHAR_DASH && ch != CHAR_BANG ) {
675
752
pos = (size_t )(idx - hs -> s ) + 1 ;
676
753
continue ;
677
754
}
678
- ch = * (idx + 2 );
755
+
756
+ /* need to test */
757
+ #if 0
758
+ /* skip all nulls */
759
+ while (idx + offset < end && * (idx + offset ) == 0 ) {
760
+ offset += 1 ;
761
+ }
762
+ if (idx + offset == end ) {
763
+ hs -> state = h5_state_eof ;
764
+ hs -> token_start = hs -> s + hs -> pos ;
765
+ hs -> token_len = hs -> len - hs -> pos ;
766
+ hs -> token_type = TAG_COMMENT ;
767
+ return 1 ;
768
+ }
769
+ #endif
770
+
771
+ offset += 1 ;
772
+ if (idx + offset == end ) {
773
+ hs -> state = h5_state_eof ;
774
+ hs -> token_start = hs -> s + hs -> pos ;
775
+ hs -> token_len = hs -> len - hs -> pos ;
776
+ hs -> token_type = TAG_COMMENT ;
777
+ return 1 ;
778
+ }
779
+
780
+
781
+ ch = * (idx + offset );
679
782
if (ch != CHAR_GT ) {
680
783
pos = (size_t )(idx - hs -> s ) + 1 ;
681
784
continue ;
682
785
}
786
+ offset += 1 ;
683
787
684
788
/* ends in --> or -!> */
685
789
hs -> token_start = hs -> s + hs -> pos ;
686
790
hs -> token_len = (size_t )(idx - hs -> s ) - hs -> pos ;
687
- hs -> pos = (size_t )(idx - hs -> s ) + 3 ;
791
+ hs -> pos = (size_t )(idx + offset - hs -> s );
688
792
hs -> state = h5_state_data ;
689
793
hs -> token_type = TAG_COMMENT ;
690
794
return 1 ;
0 commit comments