15
15
//! The byte-level encoding of component lists uses the structure of UTF-8 in
16
16
//! order to save space:
17
17
//!
18
- //! - A valid UTF-8 codepoint never starts with the bits `10` as this bit
19
- //! prefix is reserved for bytes in the middle of a UTF-8 codepoint byte
20
- //! sequence. We make use of this fact by letting all string ID components
21
- //! start with this `10` prefix. Thus when we parse the contents of a value
22
- //! we know to stop if the start byte of the next codepoint has this prefix.
18
+ //! - A valid UTF-8 codepoint never starts with the byte `0xFE`. We make use
19
+ //! of this fact by letting all string ID components start with this `0xFE`
20
+ //! prefix. Thus when we parse the contents of a value we know to stop if
21
+ //! we encounter this byte.
23
22
//!
24
- //! - A valid UTF-8 string cannot contain the `0xFF` byte and since string IDs
25
- //! start with `10` as described above, they also cannot start with a `0xFF`
26
- //! byte. Thus we can safely use `0xFF` as our component list terminator.
23
+ //! - A valid UTF-8 string cannot contain the `0xFF` byte. Thus we can safely
24
+ //! use `0xFF` as our component list terminator.
27
25
//!
28
26
//! The sample composite string ["abc", ID(42), "def", TERMINATOR] would thus be
29
27
//! encoded as:
30
28
//!
31
29
//! ```ignore
32
- //! ['a', 'b' , 'c', 128, 0, 0, 42 , 'd', 'e', 'f', 255]
33
- //! ^^^^^^^^^^^^^ ^^^
34
- //! string ID 42 with 0b10 prefix terminator (0xFF)
30
+ //! ['a', 'b' , 'c', 254, 42, 0, 0, 0 , 'd', 'e', 'f', 255]
31
+ //! ^^^^^^^^^^^^^^^^ ^^^
32
+ //! string ID with 0xFE prefix terminator (0xFF)
35
33
//! ```
36
34
//!
37
- //! As you can see string IDs are encoded in big endian format so that highest
38
- //! order bits show up in the first byte we encounter.
35
+ //! As you can see string IDs are encoded in little endian format.
39
36
//!
40
37
//! ----------------------------------------------------------------------------
41
38
//!
58
55
//! > [0 .. MAX_VIRTUAL_STRING_ID, METADATA_STRING_ID, .. ]
59
56
//!
60
57
//! From `0` to `MAX_VIRTUAL_STRING_ID` are the allowed values for virtual strings.
61
- //! After `MAX_VIRTUAL_STRING_ID`, there is one string id (`METADATA_STRING_ID`) which is used
62
- //! internally by `measureme` to record additional metadata about the profiling session.
63
- //! After `METADATA_STRING_ID` are all other `StringId` values.
64
- //!
58
+ //! After `MAX_VIRTUAL_STRING_ID`, there is one string id (`METADATA_STRING_ID`)
59
+ //! which is used internally by `measureme` to record additional metadata about
60
+ //! the profiling session. After `METADATA_STRING_ID` are all other `StringId`
61
+ //! values.
65
62
66
63
use crate :: file_header:: {
67
64
write_file_header, FILE_MAGIC_STRINGTABLE_DATA , FILE_MAGIC_STRINGTABLE_INDEX ,
@@ -84,7 +81,6 @@ impl StringId {
84
81
85
82
#[ inline]
86
83
pub fn new ( id : u32 ) -> StringId {
87
- assert ! ( id <= MAX_STRING_ID ) ;
88
84
StringId ( id)
89
85
}
90
86
@@ -106,23 +102,20 @@ impl StringId {
106
102
107
103
#[ inline]
108
104
pub fn from_addr ( addr : Addr ) -> StringId {
109
- let id = addr. 0 + FIRST_REGULAR_STRING_ID ;
105
+ let id = addr. 0 . checked_add ( FIRST_REGULAR_STRING_ID ) . unwrap ( ) ;
110
106
StringId :: new ( id)
111
107
}
112
108
113
109
#[ inline]
114
110
pub fn to_addr ( self ) -> Addr {
115
- assert ! ( self . 0 >= FIRST_REGULAR_STRING_ID ) ;
116
- Addr ( self . 0 - FIRST_REGULAR_STRING_ID )
111
+ Addr ( self . 0 . checked_sub ( FIRST_REGULAR_STRING_ID ) . unwrap ( ) )
117
112
}
118
113
}
119
114
120
115
// See module-level documentation for more information on the encoding.
121
116
pub const TERMINATOR : u8 = 0xFF ;
122
-
123
- // All 1s except for the two highest bits.
124
- pub const MAX_STRING_ID : u32 = 0x3FFF_FFFF ;
125
- pub const STRING_ID_MASK : u32 = 0x3FFF_FFFF ;
117
+ pub const STRING_REF_TAG : u8 = 0xFE ;
118
+ pub const STRING_REF_ENCODED_SIZE : usize = 5 ;
126
119
127
120
/// The maximum id value a virtual string may be.
128
121
const MAX_USER_VIRTUAL_STRING_ID : u32 = 100_000_000 ;
@@ -175,7 +168,7 @@ impl<'s> StringComponent<'s> {
175
168
fn serialized_size ( & self ) -> usize {
176
169
match * self {
177
170
StringComponent :: Value ( s) => s. len ( ) ,
178
- StringComponent :: Ref ( _) => 4 ,
171
+ StringComponent :: Ref ( _) => STRING_REF_ENCODED_SIZE ,
179
172
}
180
173
}
181
174
@@ -187,11 +180,10 @@ impl<'s> StringComponent<'s> {
187
180
& mut bytes[ s. len ( ) ..]
188
181
}
189
182
StringComponent :: Ref ( string_id) => {
190
- assert ! ( string_id. 0 == string_id. 0 & STRING_ID_MASK ) ;
191
- let tagged = string_id. 0 | ( 1u32 << 31 ) ;
192
-
193
- & mut bytes[ 0 ..4 ] . copy_from_slice ( & tagged. to_be_bytes ( ) ) ;
194
- & mut bytes[ 4 ..]
183
+ assert ! ( STRING_REF_ENCODED_SIZE == 5 ) ;
184
+ bytes[ 0 ] = STRING_REF_TAG ;
185
+ & mut bytes[ 1 ..5 ] . copy_from_slice ( & string_id. 0 . to_le_bytes ( ) ) ;
186
+ & mut bytes[ 5 ..]
195
187
}
196
188
}
197
189
}
0 commit comments