Skip to content

Commit 0b2009e

Browse files
committed
Implement ONIGENC_MBC_CASE_FOLD
1 parent aecb8d3 commit 0b2009e

File tree

7 files changed

+103
-0
lines changed

7 files changed

+103
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Compatibility:
5555
* `RUBY_REVISION` is now the full commit hash used to build TruffleRuby, similar to MRI 2.7+.
5656
* Implemented `rb_enc_mbc_to_codepoint`.
5757
* Change the lookup methods to achieve Refinements specification (#2033, @ssnickolay)
58+
* Implemented `ONIGENC_MBC_CASE_FOLD`.
5859

5960
Performance:
6061

lib/cext/include/ruby/encoding.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@ int rb_enc_isspace(unsigned char c, rb_encoding *enc);
253253
#endif
254254
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT((enc),(c))
255255

256+
#ifdef TRUFFLERUBY
257+
/* This should be moved to onigmo.h when rb_encoding is typedef const OnigEncodingType rb_encoding; */
258+
int rb_tr_enc_mbc_case_fold(rb_encoding *enc, int flag, const UChar** p, const UChar* end, UChar* lower);
259+
#endif
260+
256261
int rb_enc_asciicompat(rb_encoding *enc);
257262

258263
int rb_enc_casefold(char *to, const char *p, const char *e, rb_encoding *enc);

lib/cext/include/ruby/onigmo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,13 @@ int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, c
329329

330330
#define ONIGENC_NAME(enc) ((enc)->name)
331331

332+
#ifdef TRUFFLERUBY
333+
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
334+
rb_tr_enc_mbc_case_fold(enc,flag,pp,end,buf)
335+
#else
332336
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
333337
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
338+
#endif
334339
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
335340
(enc)->is_allowed_reverse_match(s,end,enc)
336341
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \

spec/ruby/optional/capi/encoding_spec.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,4 +610,17 @@
610610
end
611611
end
612612
end
613+
614+
describe "ONIGENC_MBC_CASE_FOLD" do
615+
it "returns the correct case fold for the given string" do
616+
@s.ONIGENC_MBC_CASE_FOLD("lower").should == ["l", 1]
617+
@s.ONIGENC_MBC_CASE_FOLD("Upper").should == ["u", 1]
618+
end
619+
620+
it "works with other encodings" do
621+
@s.ONIGENC_MBC_CASE_FOLD("lower".force_encoding("binary")).should == ["l", 1]
622+
@s.ONIGENC_MBC_CASE_FOLD("Upper".force_encoding("binary")).should == ["u", 1]
623+
@s.ONIGENC_MBC_CASE_FOLD("É").should == ["é", 2]
624+
end
625+
end
613626
end

spec/ruby/optional/capi/ext/encoding_spec.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,18 @@ static VALUE encoding_spec_rb_uv_to_utf8(VALUE self, VALUE buf, VALUE num) {
261261
return INT2NUM(rb_uv_to_utf8(RSTRING_PTR(buf), NUM2INT(num)));
262262
}
263263

264+
static VALUE encoding_spec_ONIGENC_MBC_CASE_FOLD(VALUE self, VALUE str) {
265+
char *beg = RSTRING_PTR(str);
266+
char *beg_initial = beg;
267+
char *end = beg + 2;
268+
OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
269+
rb_encoding *enc = rb_enc_get(str);
270+
int r = ONIGENC_MBC_CASE_FOLD(enc, ONIGENC_CASE_FOLD, &beg, (const OnigUChar *)end, fold);
271+
VALUE str_result = r <= 0 ? Qnil : rb_enc_str_new((char *)fold, r, enc);
272+
long bytes_used = beg - beg_initial;
273+
return rb_ary_new3(2, str_result, INT2FIX(bytes_used));
274+
}
275+
264276
void Init_encoding_spec(void) {
265277
VALUE cls;
266278
native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*));
@@ -321,6 +333,7 @@ void Init_encoding_spec(void) {
321333
rb_define_method(cls, "rb_enc_codepoint_len", encoding_spec_rb_enc_codepoint_len, 1);
322334
rb_define_method(cls, "rb_enc_str_asciionly_p", encoding_spec_rb_enc_str_asciionly_p, 1);
323335
rb_define_method(cls, "rb_uv_to_utf8", encoding_spec_rb_uv_to_utf8, 2);
336+
rb_define_method(cls, "ONIGENC_MBC_CASE_FOLD", encoding_spec_ONIGENC_MBC_CASE_FOLD, 1);
324337
}
325338

326339
#ifdef __cplusplus

src/main/c/cext/encoding.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,3 +331,22 @@ int rb_uv_to_utf8(char buf[6], unsigned long uv) {
331331

332332
rb_raise(rb_eRangeError, "pack(U): value out of range");
333333
}
334+
335+
void write_p(const UChar** p, int offset) {
336+
*p = *p + offset;
337+
}
338+
339+
int rb_tr_enc_mbc_case_fold(rb_encoding *enc, int flag, const UChar** p, const UChar* end, UChar* result) {
340+
int length = end - *p;
341+
VALUE result_str = rb_tr_wrap(polyglot_invoke(RUBY_CEXT, "rb_tr_enc_mbc_case_fold",
342+
rb_tr_unwrap(rb_enc_from_encoding(enc)),
343+
flag,
344+
rb_tr_unwrap(rb_str_new((char *)*p, length)),
345+
write_p,
346+
p));
347+
int result_len = RSTRING_LEN(result_str);
348+
if (result_len > 0) {
349+
strncpy((char *)result, RSTRING_PTR(result_str), result_len);
350+
}
351+
return result_len;
352+
}

src/main/java/org/truffleruby/cext/CExtNodes.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import java.util.concurrent.locks.ReentrantLock;
1616

1717
import org.jcodings.Encoding;
18+
import org.jcodings.IntHolder;
19+
import org.jcodings.specific.USASCIIEncoding;
1820
import org.jcodings.specific.UTF8Encoding;
1921
import org.truffleruby.Layouts;
2022
import org.truffleruby.RubyContext;
@@ -1252,6 +1254,51 @@ protected Object mbclenCharFoundLen(int r) {
12521254

12531255
}
12541256

1257+
@CoreMethod(names = "rb_tr_enc_mbc_case_fold", onSingleton = true, required = 5, lowerFixnum = 2)
1258+
public abstract static class RbTrMbcCaseFoldNode extends CoreMethodArrayArgumentsNode {
1259+
1260+
@Specialization(guards = { "isRubyString(string)", "isRubyEncoding(enc)" }, limit = "getCacheLimit()")
1261+
protected Object rbTrEncMbcCaseFold(
1262+
DynamicObject enc,
1263+
int flags,
1264+
DynamicObject string,
1265+
Object write_p,
1266+
Object p,
1267+
@CachedLibrary("write_p") InteropLibrary receivers,
1268+
@Cached BranchProfile exceptionProfile) {
1269+
final byte[] bytes = StringOperations.rope(string).getBytes();
1270+
final byte[] to = new byte[bytes.length];
1271+
final IntHolder intHolder = new IntHolder();
1272+
intHolder.value = 0;
1273+
final int resultLength = EncodingOperations
1274+
.getEncoding(enc)
1275+
.mbcCaseFold(flags, bytes, intHolder, bytes.length, to);
1276+
execute(write_p, new Object[]{ p, intHolder.value }, receivers, exceptionProfile);
1277+
final byte[] result = new byte[resultLength];
1278+
if (resultLength > 0) {
1279+
System.arraycopy(to, 0, result, 0, resultLength);
1280+
}
1281+
return StringOperations.createString(
1282+
getContext(),
1283+
RopeOperations.create(result, USASCIIEncoding.INSTANCE, CodeRange.CR_UNKNOWN));
1284+
}
1285+
1286+
private void execute(Object receiver, Object[] args, InteropLibrary receivers,
1287+
BranchProfile exceptionProfile) {
1288+
try {
1289+
receivers.execute(receiver, args);
1290+
} catch (UnsupportedTypeException | ArityException | UnsupportedMessageException e) {
1291+
exceptionProfile.enter();
1292+
throw new JavaException(e);
1293+
}
1294+
}
1295+
1296+
protected int getCacheLimit() {
1297+
return getContext().getOptions().DISPATCH_CACHE;
1298+
}
1299+
1300+
}
1301+
12551302
@CoreMethod(names = "rb_enc_mbmaxlen", onSingleton = true, required = 1)
12561303
public abstract static class RbEncMaxLenNode extends CoreMethodArrayArgumentsNode {
12571304

0 commit comments

Comments
 (0)