Skip to content

Commit 34a6ade

Browse files
committed
Implement rb_enc_codelen
1 parent dc80ceb commit 34a6ade

File tree

6 files changed

+44
-0
lines changed

6 files changed

+44
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ Compatibility:
6565
* Implemented `ONIGENC_MBC_CASE_FOLD`.
6666
* Fixed `Thread#raise` to call the exception class' constructor with no arguments when given no message (#2045).
6767
* Fixed `refine + super` compatibility (#2039, @ssnickolay)
68+
* Implemented `rb_enc_codelen`.
6869

6970
Performance:
7071

lib/cext/include/ruby/onigmo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,12 @@ int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const stru
369369
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
370370
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
371371
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
372+
#ifdef TRUFFLERUBY
373+
int rb_tr_code_to_mbclen(OnigCodePoint code, OnigEncodingType *encoding);
374+
#define ONIGENC_CODE_TO_MBCLEN(enc,code) rb_tr_code_to_mbclen(code,enc)
375+
#else
372376
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
377+
#endif
373378
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
374379
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
375380
(enc)->property_name_to_ctype(enc,p,end)

spec/ruby/optional/capi/encoding_spec.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@
5858
end
5959
end
6060

61+
describe "rb_enc_codelen" do
62+
it "returns the correct length for the given codepoint" do
63+
@s.rb_enc_codelen(0x24, Encoding::UTF_8).should == 1
64+
@s.rb_enc_codelen(0xA2, Encoding::UTF_8).should == 2
65+
@s.rb_enc_codelen(0x20AC, Encoding::UTF_8).should == 3
66+
@s.rb_enc_codelen(0x24B62, Encoding::UTF_8).should == 4
67+
end
68+
end
69+
6170
describe "rb_enc_find" do
6271
it "returns the encoding of an Encoding" do
6372
@s.rb_enc_find("UTF-8").should == "UTF-8"

spec/ruby/optional/capi/ext/encoding_spec.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,12 @@ static VALUE encoding_spec_ONIGENC_MBC_CASE_FOLD(VALUE self, VALUE str) {
278278
return rb_ary_new3(2, str_result, INT2FIX(bytes_used));
279279
}
280280

281+
static VALUE encoding_spec_rb_enc_codelen(VALUE self, VALUE code, VALUE encoding) {
282+
unsigned int c = FIX2UINT(code);
283+
rb_encoding *enc = rb_to_encoding(encoding);
284+
return INT2FIX(rb_enc_codelen(c, enc));
285+
}
286+
281287
void Init_encoding_spec(void) {
282288
VALUE cls;
283289
native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*));
@@ -311,6 +317,7 @@ void Init_encoding_spec(void) {
311317
rb_define_method(cls, "rb_enc_associate_index", encoding_spec_rb_enc_associate_index, 2);
312318
rb_define_method(cls, "rb_enc_compatible", encoding_spec_rb_enc_compatible, 2);
313319
rb_define_method(cls, "rb_enc_copy", encoding_spec_rb_enc_copy, 2);
320+
rb_define_method(cls, "rb_enc_codelen", encoding_spec_rb_enc_codelen, 2);
314321
rb_define_method(cls, "rb_enc_find", encoding_spec_rb_enc_find, 1);
315322
rb_define_method(cls, "rb_enc_find_index", encoding_spec_rb_enc_find_index, 1);
316323
rb_define_method(cls, "rb_enc_isalnum", encoding_spec_rb_enc_isalnum, 2);

src/main/c/cext/encoding.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,18 @@ int rb_enc_mbc_to_codepoint(char *p, char *e, rb_encoding *enc) {
7979
length));
8080
}
8181

82+
int rb_tr_code_to_mbclen(OnigCodePoint code, OnigEncodingType *encoding) {
83+
return polyglot_as_i32(polyglot_invoke(RUBY_CEXT, "code_to_mbclen", code, rb_tr_unwrap(rb_enc_from_encoding(encoding))));
84+
}
85+
86+
int rb_enc_codelen(int c, rb_encoding *enc) {
87+
int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
88+
if (n == 0) {
89+
rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
90+
}
91+
return n;
92+
}
93+
8294
rb_encoding* rb_enc_get(VALUE object) {
8395
return rb_to_encoding(RUBY_CEXT_INVOKE("rb_enc_get", object));
8496
}

src/main/java/org/truffleruby/cext/CExtNodes.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,16 @@ protected DynamicObject clearCodeRange(DynamicObject string,
522522

523523
}
524524

525+
@CoreMethod(names = "code_to_mbclen", onSingleton = true, required = 2, lowerFixnum = 1)
526+
public abstract static class CodeToMbcLenNode extends CoreMethodArrayArgumentsNode {
527+
528+
@Specialization
529+
protected int codeToMbcLen(int code, DynamicObject encoding) {
530+
return EncodingOperations.getEncoding(encoding).codeToMbcLength(code);
531+
}
532+
533+
}
534+
525535
@CoreMethod(names = "rb_enc_codepoint_len", onSingleton = true, required = 2)
526536
public abstract static class RbEncCodePointLenNode extends CoreMethodArrayArgumentsNode {
527537

0 commit comments

Comments
 (0)