Skip to content

Commit 2381252

Browse files
committed
[GR-18163] Fix rb_enc_left_char_head() (#3267)
PullRequest: truffleruby/4013 (cherry picked from commit c77f8bb)
1 parent e976a4d commit 2381252

File tree

5 files changed

+34
-2
lines changed

5 files changed

+34
-2
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# 23.1.2
2+
3+
Bug fixes:
4+
5+
* Fix `rb_enc_left_char_head()` so it is not always `ArgumentError` (#3267, @eregon).
6+
17
# 23.1.0
28

39
New features:

lib/cext/ABI_check.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2
1+
3

spec/ruby/optional/capi/encoding_spec.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,22 @@
674674
end
675675
end
676676

677+
describe "rb_enc_left_char_head" do
678+
it 'returns the head position of a character' do
679+
@s.rb_enc_left_char_head("é", 1).should == 0
680+
@s.rb_enc_left_char_head("éééé", 7).should == 6
681+
682+
@s.rb_enc_left_char_head("a", 0).should == 0
683+
684+
# unclear if this is intended to work
685+
@s.rb_enc_left_char_head("a", 1).should == 1
686+
687+
# Works because for single-byte encodings rb_enc_left_char_head() just returns the pointer
688+
@s.rb_enc_left_char_head("a".force_encoding(Encoding::US_ASCII), 88).should == 88
689+
@s.rb_enc_left_char_head("a".b, 88).should == 88
690+
end
691+
end
692+
677693
describe "ONIGENC_MBC_CASE_FOLD" do
678694
it "returns the correct case fold for the given string" do
679695
@s.ONIGENC_MBC_CASE_FOLD("lower").should == ["l", 1]

spec/ruby/optional/capi/ext/encoding_spec.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,12 @@ static VALUE encoding_spec_rb_enc_strlen(VALUE self, VALUE str, VALUE length, VA
307307
return LONG2FIX(rb_enc_strlen(p, e, rb_to_encoding(encoding)));
308308
}
309309

310+
static VALUE encoding_spec_rb_enc_left_char_head(VALUE self, VALUE str, VALUE offset) {
311+
char *ptr = RSTRING_PTR(str);
312+
char *result = rb_enc_left_char_head(ptr, ptr + NUM2INT(offset), RSTRING_END(str), rb_enc_get(str));
313+
return LONG2NUM(result - ptr);
314+
}
315+
310316
void Init_encoding_spec(void) {
311317
VALUE cls;
312318
native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*));
@@ -364,6 +370,7 @@ void Init_encoding_spec(void) {
364370
rb_define_method(cls, "rb_enc_str_asciionly_p", encoding_spec_rb_enc_str_asciionly_p, 1);
365371
rb_define_method(cls, "rb_uv_to_utf8", encoding_spec_rb_uv_to_utf8, 2);
366372
rb_define_method(cls, "ONIGENC_MBC_CASE_FOLD", encoding_spec_ONIGENC_MBC_CASE_FOLD, 1);
373+
rb_define_method(cls, "rb_enc_left_char_head", encoding_spec_rb_enc_left_char_head, 2);
367374
}
368375

369376
#ifdef __cplusplus

src/main/c/cext/encoding.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,10 @@ int rb_enc_get_index(VALUE obj) {
226226
}
227227

228228
char* rb_enc_left_char_head(const char *start, const char *p, const char *end, rb_encoding *enc) {
229-
int length = start - end;
229+
if (p <= start || p >= end) {
230+
return p;
231+
}
232+
int length = end - start;
230233
int position = polyglot_as_i32(polyglot_invoke(RUBY_CEXT, "rb_enc_left_char_head",
231234
rb_tr_unwrap(rb_enc_from_encoding(enc)),
232235
rb_tr_unwrap(rb_str_new(start, length)),

0 commit comments

Comments
 (0)