Skip to content

Commit 0b68b83

Browse files
committed
[GR-20329] Implement ONIGENC_MBC_CASE_FOLD.
PullRequest: truffleruby/1726
2 parents dc3a61a + 0b2009e commit 0b68b83

File tree

7 files changed

+103
-0
lines changed

7 files changed

+103
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Compatibility:
5656
* Implemented `rb_enc_mbc_to_codepoint`.
5757
* Change the lookup methods to achieve Refinements specification (#2033, @ssnickolay)
5858
* Implemented `Digest::Instance#new` (#2040).
59+
* Implemented `ONIGENC_MBC_CASE_FOLD`.
5960

6061
Performance:
6162

lib/cext/include/ruby/encoding.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@ int rb_enc_isspace(unsigned char c, rb_encoding *enc);
253253
#endif
254254
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT((enc),(c))
255255

256+
#ifdef TRUFFLERUBY
257+
/* This should be moved to onigmo.h when rb_encoding is typedef const OnigEncodingType rb_encoding; */
258+
int rb_tr_enc_mbc_case_fold(rb_encoding *enc, int flag, const UChar** p, const UChar* end, UChar* lower);
259+
#endif
260+
256261
int rb_enc_asciicompat(rb_encoding *enc);
257262

258263
int rb_enc_casefold(char *to, const char *p, const char *e, rb_encoding *enc);

lib/cext/include/ruby/onigmo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,13 @@ int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, c
329329

330330
#define ONIGENC_NAME(enc) ((enc)->name)
331331

332+
#ifdef TRUFFLERUBY
333+
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
334+
rb_tr_enc_mbc_case_fold(enc,flag,pp,end,buf)
335+
#else
332336
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
333337
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
338+
#endif
334339
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
335340
(enc)->is_allowed_reverse_match(s,end,enc)
336341
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \

spec/ruby/optional/capi/encoding_spec.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,4 +610,17 @@
610610
end
611611
end
612612
end
613+
614+
describe "ONIGENC_MBC_CASE_FOLD" do
615+
it "returns the correct case fold for the given string" do
616+
@s.ONIGENC_MBC_CASE_FOLD("lower").should == ["l", 1]
617+
@s.ONIGENC_MBC_CASE_FOLD("Upper").should == ["u", 1]
618+
end
619+
620+
it "works with other encodings" do
621+
@s.ONIGENC_MBC_CASE_FOLD("lower".force_encoding("binary")).should == ["l", 1]
622+
@s.ONIGENC_MBC_CASE_FOLD("Upper".force_encoding("binary")).should == ["u", 1]
623+
@s.ONIGENC_MBC_CASE_FOLD("É").should == ["é", 2]
624+
end
625+
end
613626
end

spec/ruby/optional/capi/ext/encoding_spec.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,18 @@ static VALUE encoding_spec_rb_uv_to_utf8(VALUE self, VALUE buf, VALUE num) {
266266
return INT2NUM(rb_uv_to_utf8(RSTRING_PTR(buf), NUM2INT(num)));
267267
}
268268

269+
static VALUE encoding_spec_ONIGENC_MBC_CASE_FOLD(VALUE self, VALUE str) {
270+
char *beg = RSTRING_PTR(str);
271+
char *beg_initial = beg;
272+
char *end = beg + 2;
273+
OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
274+
rb_encoding *enc = rb_enc_get(str);
275+
int r = ONIGENC_MBC_CASE_FOLD(enc, ONIGENC_CASE_FOLD, &beg, (const OnigUChar *)end, fold);
276+
VALUE str_result = r <= 0 ? Qnil : rb_enc_str_new((char *)fold, r, enc);
277+
long bytes_used = beg - beg_initial;
278+
return rb_ary_new3(2, str_result, INT2FIX(bytes_used));
279+
}
280+
269281
void Init_encoding_spec(void) {
270282
VALUE cls;
271283
native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*));
@@ -326,6 +338,7 @@ void Init_encoding_spec(void) {
326338
rb_define_method(cls, "rb_enc_codepoint_len", encoding_spec_rb_enc_codepoint_len, 1);
327339
rb_define_method(cls, "rb_enc_str_asciionly_p", encoding_spec_rb_enc_str_asciionly_p, 1);
328340
rb_define_method(cls, "rb_uv_to_utf8", encoding_spec_rb_uv_to_utf8, 2);
341+
rb_define_method(cls, "ONIGENC_MBC_CASE_FOLD", encoding_spec_ONIGENC_MBC_CASE_FOLD, 1);
329342
}
330343

331344
#ifdef __cplusplus

src/main/c/cext/encoding.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,3 +331,22 @@ int rb_uv_to_utf8(char buf[6], unsigned long uv) {
331331

332332
rb_raise(rb_eRangeError, "pack(U): value out of range");
333333
}
334+
335+
void write_p(const UChar** p, int offset) {
336+
*p = *p + offset;
337+
}
338+
339+
int rb_tr_enc_mbc_case_fold(rb_encoding *enc, int flag, const UChar** p, const UChar* end, UChar* result) {
340+
int length = end - *p;
341+
VALUE result_str = rb_tr_wrap(polyglot_invoke(RUBY_CEXT, "rb_tr_enc_mbc_case_fold",
342+
rb_tr_unwrap(rb_enc_from_encoding(enc)),
343+
flag,
344+
rb_tr_unwrap(rb_str_new((char *)*p, length)),
345+
write_p,
346+
p));
347+
int result_len = RSTRING_LEN(result_str);
348+
if (result_len > 0) {
349+
strncpy((char *)result, RSTRING_PTR(result_str), result_len);
350+
}
351+
return result_len;
352+
}

src/main/java/org/truffleruby/cext/CExtNodes.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import java.util.concurrent.locks.ReentrantLock;
1616

1717
import org.jcodings.Encoding;
18+
import org.jcodings.IntHolder;
19+
import org.jcodings.specific.USASCIIEncoding;
1820
import org.jcodings.specific.UTF8Encoding;
1921
import org.truffleruby.Layouts;
2022
import org.truffleruby.RubyContext;
@@ -1252,6 +1254,51 @@ protected Object mbclenCharFoundLen(int r) {
12521254

12531255
}
12541256

1257+
@CoreMethod(names = "rb_tr_enc_mbc_case_fold", onSingleton = true, required = 5, lowerFixnum = 2)
1258+
public abstract static class RbTrMbcCaseFoldNode extends CoreMethodArrayArgumentsNode {
1259+
1260+
@Specialization(guards = { "isRubyString(string)", "isRubyEncoding(enc)" }, limit = "getCacheLimit()")
1261+
protected Object rbTrEncMbcCaseFold(
1262+
DynamicObject enc,
1263+
int flags,
1264+
DynamicObject string,
1265+
Object write_p,
1266+
Object p,
1267+
@CachedLibrary("write_p") InteropLibrary receivers,
1268+
@Cached BranchProfile exceptionProfile) {
1269+
final byte[] bytes = StringOperations.rope(string).getBytes();
1270+
final byte[] to = new byte[bytes.length];
1271+
final IntHolder intHolder = new IntHolder();
1272+
intHolder.value = 0;
1273+
final int resultLength = EncodingOperations
1274+
.getEncoding(enc)
1275+
.mbcCaseFold(flags, bytes, intHolder, bytes.length, to);
1276+
execute(write_p, new Object[]{ p, intHolder.value }, receivers, exceptionProfile);
1277+
final byte[] result = new byte[resultLength];
1278+
if (resultLength > 0) {
1279+
System.arraycopy(to, 0, result, 0, resultLength);
1280+
}
1281+
return StringOperations.createString(
1282+
getContext(),
1283+
RopeOperations.create(result, USASCIIEncoding.INSTANCE, CodeRange.CR_UNKNOWN));
1284+
}
1285+
1286+
private void execute(Object receiver, Object[] args, InteropLibrary receivers,
1287+
BranchProfile exceptionProfile) {
1288+
try {
1289+
receivers.execute(receiver, args);
1290+
} catch (UnsupportedTypeException | ArityException | UnsupportedMessageException e) {
1291+
exceptionProfile.enter();
1292+
throw new JavaException(e);
1293+
}
1294+
}
1295+
1296+
protected int getCacheLimit() {
1297+
return getContext().getOptions().DISPATCH_CACHE;
1298+
}
1299+
1300+
}
1301+
12551302
@CoreMethod(names = "rb_enc_mbmaxlen", onSingleton = true, required = 1)
12561303
public abstract static class RbEncMaxLenNode extends CoreMethodArrayArgumentsNode {
12571304

0 commit comments

Comments
 (0)