Skip to content

CP-32622: Remove 'select' from stdext #80

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@
base-unix
(odoc :with-doc)
(xapi-stdext-pervasives (= :version))
(mtime :with-test)
(xapi-stdext-unix (= :version))
)
)

Expand All @@ -92,6 +94,7 @@
(fd-send-recv (>= 2.0.0))
(odoc :with-doc)
(xapi-stdext-pervasives (= :version))
polly
)
)

Expand Down
4 changes: 2 additions & 2 deletions lib/xapi-stdext-date/date.mli
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ val of_string : string -> t
val never : t
(** Same as {!epoch} *)

type iso8601 = t
(** Deprecated alias for {!t} *)
type iso8601 = t

type rfc822 = t
(** Deprecated alias for {!t} *)
type rfc822 = t
36 changes: 18 additions & 18 deletions lib/xapi-stdext-encodings/bench/bechamel_simple_cli.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ let benchmark tests =

let analyze raw_results =
let ols =
Analyze.ols ~r_square:true ~bootstrap:0 ~predictors:[|Measure.run|]
Analyze.ols ~r_square:true ~bootstrap:0 ~predictors:[|Measure.run|]
in
let results =
List.map (fun instance -> Analyze.all ols instance raw_results) instances in
List.map (fun instance -> Analyze.all ols instance raw_results) instances
in
(Analyze.merge ols instances results, raw_results)

let () =
Expand All @@ -26,26 +27,25 @@ let img (window, results) =
open Notty_unix

let cli tests =
Format.printf "@,Running benchmarks@.";
Format.printf "@,Running benchmarks@." ;
let results, _ = tests |> benchmark |> analyze in

(* compute speed from duration *)
let () =
Hashtbl.find results (Measure.label Instance.monotonic_clock)
|> Hashtbl.iter @@ fun name result ->
try
(* this relies on extracting input size from test name,
which works if Test.make_indexed* was used *)
Scanf.sscanf name "%_s@:%d" @@ fun length ->
match Analyze.OLS.estimates result with
| Some [duration] ->
(* unit is ns *)
let speed = 1e9 *. float length /. duration /. 1048576.0 in
Fmt.pf Fmt.stdout "@[%s = %.1f MiB/s@]@." name speed
| _ -> ()
with Failure _ | Scanf.Scan_failure _ -> ()
Hashtbl.find results (Measure.label Instance.monotonic_clock)
|> Hashtbl.iter @@ fun name result ->
try
(* this relies on extracting input size from test name,
which works if Test.make_indexed* was used *)
Scanf.sscanf name "%_s@:%d" @@ fun length ->
match Analyze.OLS.estimates result with
| Some [duration] ->
(* unit is ns *)
let speed = 1e9 *. float length /. duration /. 1048576.0 in
Fmt.pf Fmt.stdout "@[%s = %.1f MiB/s@]@." name speed
| _ ->
()
with Failure _ | Scanf.Scan_failure _ -> ()
in

let window =
match winsize Unix.stdout with
| Some (w, h) ->
Expand Down
17 changes: 7 additions & 10 deletions lib/xapi-stdext-encodings/bench/bench_encodings.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@ open Bechamel
open Xapi_stdext_encodings.Encodings

let test name f =
Test.make_indexed_with_resource ~name
~args:[10; 1000; 10000]
Test.multiple (* TODO: Test.uniq segfaults here, bechamel bug *)
~allocate:(fun i -> String.make i 'x')
~free:ignore
(fun (_:int) -> Staged.stage f)
Test.make_indexed_with_resource ~name ~args:[10; 1000; 10000]
Test.multiple (* TODO: Test.uniq segfaults here, bechamel bug *)
~allocate:(fun i -> String.make i 'x')
~free:ignore
(fun (_ : int) -> Staged.stage f)

let benchmarks =
Test.make_grouped ~name:"Encodings.validate"
[ test "UTF8_XML" UTF8_XML.validate
]
[test "UTF8_XML" UTF8_XML.validate]

let () =
Bechamel_simple_cli.cli benchmarks
let () = Bechamel_simple_cli.cli benchmarks
143 changes: 76 additions & 67 deletions lib/xapi-stdext-encodings/encodings.ml
Original file line number Diff line number Diff line change
Expand Up @@ -12,147 +12,156 @@
* GNU Lesser General Public License for more details.
*)
exception UCS_value_out_of_range

exception UCS_value_prohibited_in_UTF8

exception UCS_value_prohibited_in_XML

exception UTF8_character_incomplete

exception UTF8_header_byte_invalid

exception UTF8_continuation_byte_invalid

exception UTF8_encoding_not_canonical

exception String_incomplete

(* === Unicode Functions === *)

module UCS = struct

let is_non_character value = false
|| (0xfdd0 <= value && value <= 0xfdef) (* case 1 *)
|| (Int.logand 0xfffe value = 0xfffe) (* case 2 *)
[@@inline]

let is_non_character value =
false
|| (0xfdd0 <= value && value <= 0xfdef) (* case 1 *)
|| Int.logand 0xfffe value = 0xfffe
(* case 2 *)
[@@inline]
end

module XML = struct

let is_illegal_control_character value = let value = Uchar.to_int value in
value < 0x20
&& value <> 0x09
&& value <> 0x0a
&& value <> 0x0d
[@@inline]

let is_illegal_control_character value =
let value = Uchar.to_int value in
value < 0x20 && value <> 0x09 && value <> 0x0a && value <> 0x0d
[@@inline]
end

(* === UCS Validators === *)

module type UCS_VALIDATOR = sig

val validate : Uchar.t -> unit [@@inline]

end

module UTF8_UCS_validator = struct

let validate value =
if (UCS.is_non_character[@inlined]) (Uchar.to_int value) then raise UCS_value_prohibited_in_UTF8
if (UCS.is_non_character [@inlined]) (Uchar.to_int value) then
raise UCS_value_prohibited_in_UTF8
[@@inline]

end

module XML_UTF8_UCS_validator = struct

let validate value =
(UTF8_UCS_validator.validate[@inlined]) value;
if (XML.is_illegal_control_character[@inlined]) value
then raise UCS_value_prohibited_in_XML

(UTF8_UCS_validator.validate [@inlined]) value ;
if (XML.is_illegal_control_character [@inlined]) value then
raise UCS_value_prohibited_in_XML
end

(* === String Validators === *)

module type STRING_VALIDATOR = sig

val is_valid : string -> bool

val validate : string -> unit
val longest_valid_prefix : string -> string

val longest_valid_prefix : string -> string
end

exception Validation_error of int * exn

module UTF8_XML : STRING_VALIDATOR = struct

let decode_continuation_byte byte =
if byte land 0b11000000 = 0b10000000 then byte land 0b00111111 else
if byte land 0b11000000 = 0b10000000 then
byte land 0b00111111
else
raise UTF8_continuation_byte_invalid

let rec decode_continuation_bytes string last value index =
if index <= last then
let chunk = decode_continuation_byte (Char.code string.[index]) in
let value = (value lsl 6) lor chunk in
decode_continuation_bytes string last value (index + 1)
else value
else
value

let validate_character_utf8 string byte index =
let value, width =
if byte land 0b10000000 = 0b00000000 then (byte, 1) else
if byte land 0b11100000 = 0b11000000 then (byte land 0b0011111, 2) else
if byte land 0b11110000 = 0b11100000 then (byte land 0b0001111, 3) else
if byte land 0b11111000 = 0b11110000 then (byte land 0b0000111, 4) else
if byte land 0b10000000 = 0b00000000 then
(byte, 1)
else if byte land 0b11100000 = 0b11000000 then
(byte land 0b0011111, 2)
else if byte land 0b11110000 = 0b11100000 then
(byte land 0b0001111, 3)
else if byte land 0b11111000 = 0b11110000 then
(byte land 0b0000111, 4)
else
raise UTF8_header_byte_invalid
in
let value =
if width = 1 then value
else decode_continuation_bytes string (index+width-1) value (index+1)
if width = 1 then
value
else
decode_continuation_bytes string (index + width - 1) value (index + 1)
in
XML_UTF8_UCS_validator.validate (Uchar.unsafe_of_int value);
XML_UTF8_UCS_validator.validate (Uchar.unsafe_of_int value) ;
width

let rec validate_aux string length index =
if index = length then ()
if index = length then
()
else
let width =
try
let byte = string.[index] |> Char.code in
validate_character_utf8 string byte index
with
| Invalid_argument _ -> raise String_incomplete
| error -> raise (Validation_error(index, error))
in
validate_aux string length (index + width)

let validate string =
validate_aux string (String.length string) 0
let width =
try
let byte = string.[index] |> Char.code in
validate_character_utf8 string byte index
with
| Invalid_argument _ ->
raise String_incomplete
| error ->
raise (Validation_error (index, error))
in
validate_aux string length (index + width)

let validate string = validate_aux string (String.length string) 0

let rec validate_with_fastpath string stop pos =
if pos < stop then
(* the compiler is smart enough to optimize the 'int32' away here,
and not allocate *)
let i32 = String.get_int32_ne string pos |> Int32.to_int in
(* test that for all bytes 0x20 <= byte < 0x80.
(* the compiler is smart enough to optimize the 'int32' away here,
and not allocate *)
let i32 = String.get_int32_ne string pos |> Int32.to_int in
(* test that for all bytes 0x20 <= byte < 0x80.
If any is <0x20 it would cause a negative value to appear in that byte,
which we can detect if we use 0x80 as a mask.
Byte >= 0x80 can be similarly detected with a mask of 0x80 on each byte.
We don't want to see a 0x80 from either of these, hence we bitwise or the 2 values together.
*)
if (i32 lor (i32 - 0x20_20_20_20)) land 0x80_80_80_80 = 0 then
validate_with_fastpath string stop (pos + 4)
else (* when the condition doesn't hold fall back to full UTF8 decoder *)
validate_aux string (String.length string) pos
else
*)
if i32 lor (i32 - 0x20_20_20_20) land 0x80_80_80_80 = 0 then
validate_with_fastpath string stop (pos + 4)
else (* when the condition doesn't hold fall back to full UTF8 decoder *)
validate_aux string (String.length string) pos
else
validate_aux string (String.length string) pos

let validate_with_fastpath string =
validate_with_fastpath string (String.length string - 3) 0
validate_with_fastpath string (String.length string - 3) 0

let validate =
if Sys.word_size = 64 then validate_with_fastpath
else validate
if Sys.word_size = 64 then
validate_with_fastpath
else
validate

let is_valid string =
try validate string; true with _ -> false
let is_valid string = try validate string ; true with _ -> false

let longest_valid_prefix string =
try validate string; string
try validate string ; string
with Validation_error (index, _) -> String.sub string 0 index

end
18 changes: 11 additions & 7 deletions lib/xapi-stdext-encodings/encodings.mli
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,20 @@
(** {2 Exceptions} *)

exception UCS_value_out_of_range

exception UCS_value_prohibited_in_UTF8

exception UCS_value_prohibited_in_XML

exception UTF8_character_incomplete

exception UTF8_header_byte_invalid

exception UTF8_continuation_byte_invalid

exception UTF8_encoding_not_canonical
exception String_incomplete

exception String_incomplete

(** {2 UCS Validators} *)

Expand All @@ -38,27 +44,25 @@ end
module XML_UTF8_UCS_validator : UCS_VALIDATOR

module XML : sig
val is_illegal_control_character : Uchar.t -> bool
(** Returns true if and only if the given value corresponds to
* a illegal control character as defined in section 2.2 of
* the XML specification, version 1.0. *)
val is_illegal_control_character : Uchar.t -> bool
end

(** {2 String Validators} *)

(** Provides functionality for validating and processing
* strings according to a particular character encoding. *)
module type STRING_VALIDATOR = sig

(** Returns true if and only if the given string is validly-encoded. *)
val is_valid : string -> bool
(** Returns true if and only if the given string is validly-encoded. *)

val validate : string -> unit
(** Raises an encoding error if the given string is not validly-encoded. *)
val validate: string -> unit

(** Returns the longest validly-encoded prefix of the given string. *)
val longest_valid_prefix : string -> string

(** Returns the longest validly-encoded prefix of the given string. *)
end

(** Represents a validation error as a tuple [(i,e)], where:
Expand Down
Loading