Skip to content

Commit 4f07365

Browse files
committed
[GR-20329] Implement rb_enc_mbcput.
PullRequest: truffleruby/1782
2 parents 729a070 + 8a69ee2 commit 4f07365

File tree

6 files changed

+67
-2
lines changed

6 files changed

+67
-2
lines changed

lib/cext/include/ruby/onigmo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,10 +373,12 @@ int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const stru
373373
#ifdef TRUFFLERUBY
374374
int rb_tr_code_to_mbclen(OnigCodePoint code, OnigEncodingType *encoding);
375375
#define ONIGENC_CODE_TO_MBCLEN(enc,code) rb_tr_code_to_mbclen(code,enc)
376+
int rb_tr_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc);
377+
#define ONIGENC_CODE_TO_MBC(enc,code,buf) rb_tr_code_to_mbc(code,buf,enc)
376378
#else
377379
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
378-
#endif
379380
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
381+
#endif
380382
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
381383
(enc)->property_name_to_ctype(enc,p,end)
382384

spec/ruby/optional/capi/encoding_spec.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,18 @@
139139
end
140140
end
141141

142+
describe "rb_enc_mbcput" do
143+
it "writes the correct bytes to the buffer" do
144+
@s.rb_enc_mbcput(0x24, Encoding::UTF_8).should == "$"
145+
@s.rb_enc_mbcput(0xA2, Encoding::UTF_8).should == "¢"
146+
@s.rb_enc_mbcput(0x20AC, Encoding::UTF_8).should == "€"
147+
@s.rb_enc_mbcput(0x24B62, Encoding::UTF_8).should == "𤭢"
148+
149+
@s.rb_enc_mbcput(0x24, Encoding::UTF_16BE).bytes.should == [0, 0x24]
150+
@s.rb_enc_mbcput(0x24B62, Encoding::UTF_16LE).bytes.should == [82, 216, 98, 223]
151+
end
152+
end
153+
142154
describe "rb_usascii_encoding" do
143155
it "returns the encoding for Encoding::US_ASCII" do
144156
@s.rb_usascii_encoding.should == "US-ASCII"
@@ -630,6 +642,10 @@
630642
@s.ONIGENC_MBC_CASE_FOLD("lower".force_encoding("binary")).should == ["l", 1]
631643
@s.ONIGENC_MBC_CASE_FOLD("Upper".force_encoding("binary")).should == ["u", 1]
632644
@s.ONIGENC_MBC_CASE_FOLD("É").should == ["é", 2]
645+
646+
str, length = @s.ONIGENC_MBC_CASE_FOLD('$'.encode(Encoding::UTF_16BE))
647+
length.should == 2
648+
str.bytes.should == [0, 0x24]
633649
end
634650
end
635651
end

spec/ruby/optional/capi/ext/encoding_spec.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,18 @@ static VALUE encoding_spec_rb_enc_mbc_to_codepoint(VALUE self, VALUE str, VALUE
127127
return INT2FIX(rb_enc_mbc_to_codepoint(p, e, rb_enc_get(str)));
128128
}
129129

130+
static VALUE encoding_spec_rb_enc_mbcput(VALUE self, VALUE code, VALUE encoding) {
131+
unsigned int c = FIX2UINT(code);
132+
rb_encoding *enc = rb_to_encoding(encoding);
133+
char buf[ONIGENC_CODE_TO_MBC_MAXLEN];
134+
memset(buf, '\1', sizeof(buf));
135+
int len = rb_enc_mbcput(c, buf, enc);
136+
if (buf[len] != '\1') {
137+
rb_raise(rb_eRuntimeError, "should not change bytes after len");
138+
}
139+
return rb_enc_str_new(buf, len, enc);
140+
}
141+
130142
static VALUE encoding_spec_rb_enc_from_encoding(VALUE self, VALUE name) {
131143
return rb_enc_from_encoding(rb_enc_find(RSTRING_PTR(name)));
132144
}
@@ -271,8 +283,12 @@ static VALUE encoding_spec_ONIGENC_MBC_CASE_FOLD(VALUE self, VALUE str) {
271283
char *beg_initial = beg;
272284
char *end = beg + 2;
273285
OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
286+
memset(fold, '\1', sizeof(fold));
274287
rb_encoding *enc = rb_enc_get(str);
275288
int r = ONIGENC_MBC_CASE_FOLD(enc, ONIGENC_CASE_FOLD, &beg, (const OnigUChar *)end, fold);
289+
if (r > 0 && fold[r] != '\1') {
290+
rb_raise(rb_eRuntimeError, "should not change bytes after len");
291+
}
276292
VALUE str_result = r <= 0 ? Qnil : rb_enc_str_new((char *)fold, r, enc);
277293
long bytes_used = beg - beg_initial;
278294
return rb_ary_new3(2, str_result, INT2FIX(bytes_used));
@@ -324,6 +340,7 @@ void Init_encoding_spec(void) {
324340
rb_define_method(cls, "rb_enc_isspace", encoding_spec_rb_enc_isspace, 2);
325341
rb_define_method(cls, "rb_enc_from_index", encoding_spec_rb_enc_from_index, 1);
326342
rb_define_method(cls, "rb_enc_mbc_to_codepoint", encoding_spec_rb_enc_mbc_to_codepoint, 2);
343+
rb_define_method(cls, "rb_enc_mbcput", encoding_spec_rb_enc_mbcput, 2);
327344
rb_define_method(cls, "rb_enc_from_encoding", encoding_spec_rb_enc_from_encoding, 1);
328345
rb_define_method(cls, "rb_enc_get", encoding_spec_rb_enc_get, 1);
329346
rb_define_method(cls, "rb_enc_precise_mbclen", encoding_spec_rb_enc_precise_mbclen, 2);

spec/ruby/optional/capi/spec_helper.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def compile_extension(name)
7676
$ruby = ENV.values_at('RUBY_EXE', 'RUBY_FLAGS').join(' ')
7777
# MRI magic to consider building non-bundled extensions
7878
$extout = nil
79+
$warnflags << ' -Wno-declaration-after-statement'
7980
create_makefile(#{ext.inspect})
8081
RUBY
8182
output = ruby_exe("extconf.rb")

src/main/c/cext/encoding.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,17 @@ int rb_tr_enc_mbc_case_fold(rb_encoding *enc, int flag, const UChar** p, const U
362362
p));
363363
int result_len = RSTRING_LEN(result_str);
364364
if (result_len > 0) {
365-
strncpy((char *)result, RSTRING_PTR(result_str), result_len);
365+
memcpy(result, RSTRING_PTR(result_str), result_len);
366+
}
367+
return result_len;
368+
}
369+
370+
int rb_tr_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) {
371+
VALUE result_str = rb_tr_wrap(polyglot_invoke(RUBY_CEXT, "rb_tr_code_to_mbc",
372+
rb_tr_unwrap(rb_enc_from_encoding(enc)), code));
373+
int result_len = RSTRING_LEN(result_str);
374+
if (result_len > 0) {
375+
memcpy(buf, RSTRING_PTR(result_str), result_len);
366376
}
367377
return result_len;
368378
}

src/main/java/org/truffleruby/cext/CExtNodes.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,25 @@ protected int getCacheLimit() {
13101310

13111311
}
13121312

1313+
@CoreMethod(names = "rb_tr_code_to_mbc", onSingleton = true, required = 2, lowerFixnum = 2)
1314+
public abstract static class RbTrMbcPutNode extends CoreMethodArrayArgumentsNode {
1315+
1316+
@Specialization(guards = "isRubyEncoding(enc)")
1317+
protected Object rbTrEncMbcPut(DynamicObject enc, int code) {
1318+
final Encoding encoding = EncodingOperations.getEncoding(enc);
1319+
final byte buf[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
1320+
final int resultLength = encoding.codeToMbc(code, buf, 0);
1321+
final byte result[] = new byte[resultLength];
1322+
if (resultLength > 0) {
1323+
System.arraycopy(buf, 0, result, 0, resultLength);
1324+
}
1325+
return StringOperations.createString(
1326+
getContext(),
1327+
RopeOperations.create(result, USASCIIEncoding.INSTANCE, CodeRange.CR_UNKNOWN));
1328+
}
1329+
1330+
}
1331+
13131332
@CoreMethod(names = "rb_enc_mbmaxlen", onSingleton = true, required = 1)
13141333
public abstract static class RbEncMaxLenNode extends CoreMethodArrayArgumentsNode {
13151334

0 commit comments

Comments
 (0)