Skip to content

Commit 8202731

Browse files
committed
[GR-18163] Fix Marshal.load of multiple Symbols with an explicit encoding (#1624)
PullRequest: truffleruby/2781
2 parents fd279be + c4635fd commit 8202731

File tree

5 files changed

+89
-18
lines changed

5 files changed

+89
-18
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ New features:
55

66
Bug fixes:
77

8+
* Fix `Marshal.load` of multiple `Symbols` with an explicit encoding (#1624).
89

910
Compatibility:
1011

spec/ruby/core/marshal/dump_spec.rb

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,20 @@
7878
s = "\u2192".force_encoding("binary").to_sym
7979
Marshal.dump(s).should == "\x04\b:\b\xE2\x86\x92"
8080
end
81+
82+
it "dumps multiple Symbols sharing the same encoding" do
83+
# Note that the encoding is a link for the second Symbol
84+
symbol1 = "I:\t\xE2\x82\xACa\x06:\x06ET"
85+
symbol2 = "I:\t\xE2\x82\xACb\x06;\x06T"
86+
value = [
87+
"€a".force_encoding(Encoding::UTF_8).to_sym,
88+
"€b".force_encoding(Encoding::UTF_8).to_sym
89+
]
90+
Marshal.dump(value).should == "\x04\b[\a#{symbol1}#{symbol2}"
91+
92+
value = [*value, value[0]]
93+
Marshal.dump(value).should == "\x04\b[\b#{symbol1}#{symbol2};\x00"
94+
end
8195
end
8296

8397
describe "with an object responding to #marshal_dump" do
@@ -343,8 +357,13 @@
343357
end
344358

345359
it "dumps an extended Struct" do
346-
st = Struct.new("Extended", :a, :b).new
347-
Marshal.dump(st.extend(Meths)).should == "\004\be:\nMethsS:\025Struct::Extended\a:\006a0:\006b0"
360+
obj = Struct.new("Extended", :a, :b).new.extend(Meths)
361+
Marshal.dump(obj).should == "\004\be:\nMethsS:\025Struct::Extended\a:\006a0:\006b0"
362+
363+
s = 'hi'
364+
obj.a = [:a, s]
365+
obj.b = [:Meths, s]
366+
Marshal.dump(obj).should == "\004\be:\nMethsS:\025Struct::Extended\a:\006a[\a;\a\"\ahi:\006b[\a;\000@\a"
348367
Struct.send(:remove_const, :Extended)
349368
end
350369
end

spec/ruby/core/marshal/fixtures/marshal_data.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class UserPreviouslyDefinedWithInitializedIvar
8383
end
8484

8585
class UserMarshal
86-
attr_reader :data
86+
attr_accessor :data
8787

8888
def initialize
8989
@data = 'stuff'

spec/ruby/core/marshal/shared/load.rb

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,8 @@
309309

310310
it "loads an extended Array object containing a user-marshaled object" do
311311
obj = [UserMarshal.new, UserMarshal.new].extend(Meths)
312-
new_obj = Marshal.send(@method, "\x04\be:\nMeths[\ao:\x10UserMarshal\x06:\n@dataI\"\nstuff\x06:\x06ETo;\x06\x06;\aI\"\nstuff\x06;\bT")
312+
dump = "\x04\be:\nMeths[\ao:\x10UserMarshal\x06:\n@dataI\"\nstuff\x06:\x06ETo;\x06\x06;\aI\"\nstuff\x06;\bT"
313+
new_obj = Marshal.send(@method, dump)
313314

314315
new_obj.should == obj
315316
obj_ancestors = class << obj; ancestors[1..-1]; end
@@ -399,6 +400,24 @@
399400
sym.should == s
400401
sym.encoding.should == Encoding::BINARY
401402
end
403+
404+
it "loads multiple Symbols sharing the same encoding" do
405+
# Note that the encoding is a link for the second Symbol
406+
symbol1 = "I:\t\xE2\x82\xACa\x06:\x06ET"
407+
symbol2 = "I:\t\xE2\x82\xACb\x06;\x06T"
408+
dump = "\x04\b[\a#{symbol1}#{symbol2}"
409+
value = Marshal.send(@method, dump)
410+
value.map(&:encoding).should == [Encoding::UTF_8, Encoding::UTF_8]
411+
expected = [
412+
"€a".force_encoding(Encoding::UTF_8).to_sym,
413+
"€b".force_encoding(Encoding::UTF_8).to_sym
414+
]
415+
value.should == expected
416+
417+
value = Marshal.send(@method, "\x04\b[\b#{symbol1}#{symbol2};\x00")
418+
value.map(&:encoding).should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8]
419+
value.should == [*expected, expected[0]]
420+
end
402421
end
403422

404423
describe "for a String" do
@@ -460,20 +479,23 @@
460479
describe "for a Struct" do
461480
it "loads a extended_struct having fields with same objects" do
462481
s = 'hi'
463-
obj = Struct.new("Ure2", :a, :b).new.extend(Meths)
482+
obj = Struct.new("Extended", :a, :b).new.extend(Meths)
483+
dump = "\004\be:\nMethsS:\025Struct::Extended\a:\006a0:\006b0"
484+
Marshal.send(@method, dump).should == obj
485+
464486
obj.a = [:a, s]
465487
obj.b = [:Meths, s]
466-
467-
Marshal.send(@method,
468-
"\004\be:\nMethsS:\021Struct::Ure2\a:\006a[\a;\a\"\ahi:\006b[\a;\000@\a"
469-
).should == obj
470-
Struct.send(:remove_const, :Ure2)
488+
dump = "\004\be:\nMethsS:\025Struct::Extended\a:\006a[\a;\a\"\ahi:\006b[\a;\000@\a"
489+
Marshal.send(@method, dump).should == obj
490+
Struct.send(:remove_const, :Extended)
471491
end
472492

473493
it "loads a struct having ivar" do
474494
obj = Struct.new("Thick").new
475495
obj.instance_variable_set(:@foo, 5)
476-
Marshal.send(@method, "\004\bIS:\022Struct::Thick\000\006:\t@fooi\n").should == obj
496+
reloaded = Marshal.send(@method, "\004\bIS:\022Struct::Thick\000\006:\t@fooi\n")
497+
reloaded.should == obj
498+
reloaded.instance_variable_get(:@foo).should == 5
477499
Struct.send(:remove_const, :Thick)
478500
end
479501

@@ -588,6 +610,18 @@
588610
end
589611
end
590612

613+
describe "for an object responding to #marshal_dump and #marshal_load" do
614+
it "loads a user-marshaled object" do
615+
obj = UserMarshal.new
616+
obj.data = :data
617+
value = [obj, :data]
618+
dump = Marshal.dump(value)
619+
dump.should == "\x04\b[\aU:\x10UserMarshal:\tdata;\x06"
620+
reloaded = Marshal.load(dump)
621+
reloaded.should == value
622+
end
623+
end
624+
591625
describe "for a user object" do
592626
it "loads a user-marshaled extended object" do
593627
obj = UserMarshal.new.extend(Meths)

src/main/ruby/truffleruby/core/marshal.rb

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,18 @@ def add_object(obj)
553553
end
554554

555555
def add_symlink(obj)
556-
sz = @symlinks.size
556+
sz = @symbols.size
557+
@symbols[sz] = obj
558+
@symlinks[obj.__id__] = sz
559+
end
560+
561+
def reserve_symlink
562+
sz = @symbols.size
563+
@symbols[sz] = nil
564+
sz
565+
end
566+
567+
def assign_reserved_symlink(sz, obj)
557568
@symbols[sz] = obj
558569
@symlinks[obj.__id__] = sz
559570
end
@@ -848,7 +859,6 @@ def construct_regexp
848859

849860
def construct_struct
850861
name = get_symbol
851-
store_unique_object name
852862

853863
klass = const_lookup name, Class
854864
members = klass.members
@@ -859,8 +869,7 @@ def construct_struct
859869
construct_integer.times do |i|
860870
slot = get_symbol
861871
unless members[i].intern == slot
862-
raise TypeError, 'struct %s is not compatible (%p for %p)' %
863-
[klass, slot, members[i]]
872+
raise TypeError, "struct #{klass} is not compatible (#{slot.inspect} for #{members[i].inspect})"
864873
end
865874

866875
Primitive.object_hidden_var_set obj, slot, construct
@@ -872,6 +881,12 @@ def construct_struct
872881
def construct_symbol(ivar_index)
873882
data = get_byte_sequence
874883

884+
# We must use the next @symbols index for the Symbol being constructed.
885+
# However, constructing a Symbol might require to construct the :E or :encoding symbols,
886+
# and those must have a larger index (if they are serialized for the first time),
887+
# to be binary-compatible with CRuby and pass specs. So we reserve the index and assign it later.
888+
idx = reserve_symlink
889+
875890
# A Symbol has no instance variables (it's frozen),
876891
# but we need to know the encoding before building the Symbol
877892
if ivar_index and @has_ivar[ivar_index]
@@ -881,7 +896,7 @@ def construct_symbol(ivar_index)
881896
end
882897

883898
obj = data.to_sym
884-
store_unique_object obj
899+
assign_reserved_symlink idx, obj
885900

886901
obj
887902
end
@@ -910,7 +925,6 @@ def construct_user_defined(ivar_index)
910925

911926
def construct_user_marshal
912927
name = get_symbol
913-
store_unique_object name
914928

915929
klass = const_lookup name, Class
916930
obj = klass.allocate
@@ -976,7 +990,7 @@ def serialize(obj)
976990
raise ArgumentError, 'exceed depth limit' if @depth == 0
977991

978992
# How much depth we have left.
979-
@depth -= 1;
993+
@depth -= 1
980994

981995
if link = find_link(obj)
982996
str = Truffle::Type.binary_string("@#{serialize_integer(link)}")
@@ -1249,7 +1263,10 @@ def initialize(stream, depth, prc)
12491263
if @stream
12501264
@byte_array = stream.bytes
12511265
end
1266+
end
12521267

1268+
def inspect
1269+
"#<Marshal::StringState #{@stream[@consumed..-1].inspect}>"
12531270
end
12541271

12551272
def consume(bytes)

0 commit comments

Comments
 (0)