From 0cfa3550b4562eea406163a9f30f282fe5bb1056 Mon Sep 17 00:00:00 2001 From: sakari-malkki <47064229+sakari-malkki@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:06:36 +0200 Subject: [PATCH 01/13] Replaces .NET 7 from CI pipeline with .NET 9 (#55) --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 3b8a88e3..ced6c6fd 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - dotnet-version: [ '7.x', '8.x' ] + dotnet-version: [ '9.x', '8.x' ] configuration: [ 'Debug' ] runs-on: ${{ matrix.os }} From 91be0d0d9651e98eb38be7549d0a31b865e54797 Mon Sep 17 00:00:00 2001 From: sakari-malkki <47064229+sakari-malkki@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:16:37 +0200 Subject: [PATCH 02/13] More examples and fixed typos in documentation (#54) --- .../DivisionMatrixFunctionExtensions.cs | 8 +- .../Operations/SumMatrixFunctionExtensions.cs | 6 +- docs/README.md | 162 +++++++++++++++++- 3 files changed, 163 insertions(+), 13 deletions(-) diff --git a/Px.Utils/Operations/DivisionMatrixFunctionExtensions.cs b/Px.Utils/Operations/DivisionMatrixFunctionExtensions.cs index f785b93e..db377e2b 100644 --- a/Px.Utils/Operations/DivisionMatrixFunctionExtensions.cs +++ b/Px.Utils/Operations/DivisionMatrixFunctionExtensions.cs @@ -80,10 +80,10 @@ public static Matrix DivideSubsetBySelectedValue(this MatrixThe set of datapoints defined by this dimension value /// will be used to divide the corresponding datapoints defined by /// A new object that contais the results of the operation. - public static async Task> DivideSubsetBySelectedValueAsync(this Matrix input, IDimensionMap targetMap, string baseValueCode) + public static async Task> DivideSubsetBySelectedValueAsync(this Matrix input, IDimensionMap targetMap, string dividerValueCode) where TData : IDivisionOperators, IMultiplicativeIdentity { - return await Task.Factory.StartNew(() => DivideSubsetBySelectedValue(input, targetMap, baseValueCode)); + return await Task.Factory.StartNew(() => DivideSubsetBySelectedValue(input, targetMap, dividerValueCode)); } /// @@ -96,10 +96,10 @@ public static async Task> DivideSubsetBySelectedValueAsync( /// The set of datapoints defined by this dimension value /// will be used to divide the corresponding datapoints defined by /// A new object that contais the results of the operation. - public static async Task> DivideSubsetBySelectedValueAsync(this Task> input, IDimensionMap targetMap, string baseValueCode) + public static async Task> DivideSubsetBySelectedValueAsync(this Task> input, IDimensionMap targetMap, string dividerValueCode) where TData : IDivisionOperators, IMultiplicativeIdentity { - return await DivideSubsetBySelectedValueAsync(await input, targetMap, baseValueCode); + return await DivideSubsetBySelectedValueAsync(await input, targetMap, dividerValueCode); } private static TData Divide(TData a, TData b) where TData : IDivisionOperators, IMultiplicativeIdentity diff --git a/Px.Utils/Operations/SumMatrixFunctionExtensions.cs b/Px.Utils/Operations/SumMatrixFunctionExtensions.cs index 5c6b7062..eaaa35ca 100644 --- a/Px.Utils/Operations/SumMatrixFunctionExtensions.cs +++ b/Px.Utils/Operations/SumMatrixFunctionExtensions.cs @@ -96,9 +96,9 @@ public async static Task> AddConstantToSubsetAsync(this Mat /// Type of the data values in the matrix, must implement /// and /// The source matrix for the operation - /// Defines the values to wich the constant will be added. - /// The contant to be added. - /// A new object that contais the results of the additions. + /// Defines the values to which the constant will be added. + /// The constant to be added. + /// A new object that contains the results of the additions. public async static Task> AddConstantToSubsetAsync(this Task> input, IMatrixMap targetMap, TData valueToAdd) where TData : IAdditionOperators, IAdditiveIdentity { diff --git a/docs/README.md b/docs/README.md index 6596649c..74ccdb24 100644 --- a/docs/README.md +++ b/docs/README.md @@ -13,7 +13,7 @@ Extending the library with a new features should be as easy as possible and ever ## Installation Px.Utils can be installed using .NET CLI or NuGet Package Manager. -### .NET CL +### .NET CLI #### Latest ```bash dotnet add package Px.Utils @@ -106,6 +106,16 @@ There are no limits for the number or size of dimensions. But it is important to ```GetTransform(IMatrixMap map)``` method can be used to take a subset of the the matrix and/or change the order of the dimensions or the dimension values. It creates a new mutable deep copy of the matrix that have the structure defined by the map parameter. The data array will also be copied and reordered based on the map. +##### Example +```csharp + MatrixMap map = new( + [ + new DimensionMap("variable-0", ["variable-0_value-0", "variable-0_value-2", "variable-0_value-4"]), + new DimensionMap("variable-1", ["variable-1_value-0", "variable-1_value-2"]) + ]); + Matrix output = matrix.GetTransform(map); +``` + #### ```MatrixMap : IMatrixMap``` This is a minimal way to represent the structure of the metadata. Does not contain any other information than the dimension and dimension value codes. The ```IReadOnlyMatrixMetadata``` also implements the ```IMatrixMap``` interface. @@ -130,17 +140,24 @@ They both implement the ```IReadOnlyList``` interface, Differs from the base class by having values of type ```ContentDimensionValue```. #### ```TimeDimension : Dimension``` -Shares the same value type as the base class, but has additional properties in the dimension level metadata. +Shares the same value type as the base class, but has an additional property in the dimension level metadata: +- ```Interval``` (```TimeDimensionInterval```[enum]): Represents the interval of the time dimension. Can be either Year, HalfYear, Quarter, Month, Week, Other or Irregular. + +TimeDimension's Type (```DimensionType```[enum]) is always Time. #### ```DimensionValue : IReadOnlyDimensionValue``` Represents the dimension value level metadata of a px-file. This is a base class for all dimension values. Each value has a unique string code among the values in the dimension. #### ```ContentDimensionValue : DimensionValue``` -Dimension value that contais content dimension value specific metadata. +Dimension value that contais content dimension value specific metadata properties: +- ```Unit``` (```MultilanguageString```): Stores the unit associated with the content dimension value (such as "EUR" or "%") as a multilanguage string. +- ```LastUpdated``` (DateTime): Stores the date and time of the last updated associated with the content dimension value. +- ```Precision``` (int): Stores the precision - the number of decimal places - of the content dimension value. #### ```MetaProperty``` Px.Utils supports reading any metadata properties that follow the px file syntax. The properties are stored in a ```Dictionary``` collection called ```AdditionalProperties``` where the dictionary key is the property keyword. The base class ```MetaProperty``` is abstract and each supported property type has its own class that inherits from it. +Currently supported property types (represented by ```MetaPropertyType``` enum) and their respective classes are: ```Text``` (```StringProperty```), ```MultilanguageText``` (```MultilanguageStringProperty```), ```TextArray``` (```StringListProperty```), ```MultilanguageTextArray``` (```MultilanguageStringListProperty```), ```Numeric``` (```NumericProperty```) and ```Boolean``` (```BooleanProperty```). ### Data models ```IDataValue``` is an interface for the data points that defines the basic computation methods for the data points. See the Computing section for more information. @@ -167,12 +184,26 @@ Validator classes implement either ```IPxFileStreamValidator``` or ```IPxFileStr Custom validator objects can be injected by calling the SetCustomValidatorFunctions or SetCustomValidators methods of the PxFileValidator object. Custom validators must implement either the IPxFileValidator or IPxFileValidatorAsync interface. Custom validation methods are stored in CustomSyntaxValidationFunctions and CustomContentValidationFunctions objects for syntax and content validation processes respectively. Once the PxFileValidator object is instantiated, either the Validate or ValidateAsync method can be called to validate the px file. The Validate method returns a ValidationResult object that contains the validation results as a key value pair containing information about the rule violations. +##### Example +```csharp + PxFileValidator validator = new PxFileValidator(); + ValidationResult result = validator.Validate(fileStream, "path/to/file.px", Encoding.UTF8); + ValidationResult asyncResult = await validator.ValidateAsync(fileStream, "path/to/file.px", Encoding.UTF8, cancellationToken: cancellationToken); +``` + #### SyntaxValidator : IPxFileStreamValidator, IPxFileStreamValidatorAsync ```SyntaxValidator``` is a class that validates the syntax of a px file's metadata. It needs to be run before other validators, because both the ```ContentValidator``` and ```DataValidator``` require information from the ```SyntaxValidationResult``` object that ```SyntaxValidator``` ```Validate()``` and ```ValidateAsync()``` methods return. The class can be instantiated with the following parameters: - conf (PxFileConfiguration, optional): Object that contains px file configuration. - customValidationFunctions (CustomSyntaxValidationFunctions, optional): Object that contains custom validation functions for the syntax validation process. +##### Example +```csharp + SyntaxValidator validator = new SyntaxValidator(); + SyntaxValidationResult result = validator.Validate(fileStream, "path/to/file.px", Encoding.UTF8); + SyntaxValidationResult asyncResult = await validator.ValidateAsync(fileStream, "path/to/file.px", Encoding.UTF8, cancellationToken: cancellationToken); +``` + #### ContentValidator : IValidator ```ContentValidator``` class validates the integrity of the contents of a px file's metadata. It needs to be run after the ```SyntaxValidator```, because it requires information from the ```SyntaxValidationResult``` object that ```SyntaxValidator``` ```Validate()``` and ```ValidateAsync()``` methods return. The class can be instantiated with the following parameters: @@ -182,14 +213,34 @@ The class can be instantiated with the following parameters: - customContentValidationFunctions (CustomContentValidationFunctions, optional): Object that contains custom functions for validating the px file metadata contents. - conf (PxFileConfiguration, optional): Object that contains px file configuration. +##### Example +```csharp + Encoding encoding = Encoding.UTF8; + SyntaxValidator syntaxValidator = new SyntaxValidator(); + SyntaxValidationResult syntaxResult = syntaxValidator.Validate(fileStream, "path/to/file.px", encoding); + ContentValidator validator = new ContentValidator("path/to/file.px", encoding, syntaxResult.Result); + ValidationResult result = validator.Validate(); +``` + #### DataValidator : IPxFileStreamValidator, IPxFileStreamValidatorAsync -```DataValidator``` class is used to validate the data section of a px file. It needs to be run after the ```SyntaxValidator```, because it requires information from both the ```SyntaxValidationResult``` and ```ContentValidationResult``` objects that ```SyntaxValidator``` and ```ContentValidator``` ```Validate()``` and ```ValidateAsync()``` methods return. +```DataValidator``` class is used to validate the data section of a px file. It needs to be run after the ```SyntaxValidator``` and ```ContentValidator``` because it requires information provided by the ```SyntaxValidationResult``` and ```ContentValidationResult``` objects that the ```SyntaxValidator``` and ```ContentValidator``` ```Validate()``` and ```ValidateAsync()``` methods return. The class can be instantiated with the following parameters: - rowLen (int): Length of one row of Px file data. ContentValidationResult object contains this information. - numOfRows (int): Amount of rows of Px file data. This information is also stored in ContentValidationResult object. - startRow (long): The row number where the data section starts. This information is stored in the SyntaxValidationResult object. - conf (PxFileConfiguration, optional): Configuration for the Px file +##### Example +```csharp + Encoding encoding = Encoding.UTF8; + SyntaxValidator syntaxValidator = new SyntaxValidator(); + SyntaxValidationResult syntaxResult = syntaxValidator.Validate(fileStream, "path/to/file.px", encoding); + ContentValidator contentValidator = new ContentValidator("path/to/file.px", encoding, syntaxResult.Result); + ValidationResult contentResult = contentValidator.Validate(); + DataValidator validator = new DataValidator(contentResult.DataRowLength, contentResult.DataRowAmount, syntaxResult.DataStartRow); + ValidationResult result = validator.Validate(fileStream, "path/to/file.px", encoding); +``` + #### DatabaseValidator : IValidator, IValidatorAsync Whole px file databases can be validated using ```DatabaseValidator``` class. Validation can be done by using the blocking ```Validate()``` or asynchronous ```ValidateAsync()``` methods. ```DatabaseValidator``` class can be instantiated using the following parameters: - directoryPath (string): Path to the database root @@ -202,30 +253,129 @@ Whole px file databases can be validated using ```DatabaseValidator``` class. Va Database validation process validates each px file within the database and also the required structure and consistency of the database languages and encoding formats. The return object is a ```ValidationResult``` object that contains ```ValidationFeedback``` objects gathered during the validation process. The database needs to contain alias files for each language used in the database for each folder that contains either subcategory folders or px files. If either languages or encoding formats differ between alias or px files, warnings are generated. +##### Example +```csharp + DatabaseValidator validator = new DatabaseValidator("path/to/database"); + ValidationResult result = validator.Validate(); + ValidationResult asyncResult = await validator.ValidateAsync(cancellationToken); +``` + ### Computing ```Matrix``` class has a set of extension methods for performing basic computations for the datapoints. +Values of dimensions can be summed or multiplied together to new values. If the original matrix has the following structure: + +|| col0-0 || col0-1 || col0-2 || +||-----------|-----------|-----------|-----------|-----------|-----------| +|| col1-0 | col1-1| col1-0 | col1-1| col1-0 | col1-1 | +|row0-0| 0 | 1 | 2 | 3 | 4 | 5 | +|row0-1| 6 | 7 | 8 | 9 | 10 | 11 | +|row0-2| 12 | 13 | 14 | 15 | 16 | 17 | + +If we sum the row0 dimension's values 1 and 2 together to form a new value "rowSum", the resulting matrix will look like this: + +|| col0-0 || col0-1 || col0-2 || +||-----------|-----------|-----------|-----------|-----------|-----------| +|| col1-0 | col1-1| col1-0 | col1-1| col1-0 | col1-1 | +|row0-0| 0 | 1 | 2 | 3 | 4 | 5 | +|row0-1| 6 | 7 | 8 | 9 | 10 | 11 | +|row0-2| 12 | 13 | 14 | 15 | 16 | 17 | +|rowSum | 18 | 20 | 22 | 24 | 26 | 28 | + #### Sum ```SumToNewValue()``` computes sums of datapoints defined by a subset of values from a given dimension. The method takes a new dimension value as a parameter that will define the resulting values. -The method also has an asyncronous variant ```SumToNewValueAsync()```. +The method also has an asyncronous variant ```SumToNewValueAsync()```. + +##### Example +```csharp + DimensionValue newDimensionValue = new("rowSum", new("en", "Sum value")); + DimensionMap map = new("row0", ["row0-1", "row0-2"]); + Matrix output = matrix.SumToNewValue(newDimensionValue, map); +``` ```AddConstantToSubset()``` adds a constant to a subset of datapoints. Also has an asynchronous variant ```AddConstantToSubsetAsync()```. +##### Example +```csharp + IMatrixMap map = new MatrixMap([ + new DimensionMap("col0", ["col0-0"]), + new DimensionMap("col1", ["col1-0", "col1-1"]), + ]); + + Matrix output = matrix.AddConstantToSubset(map, 5); +``` + #### Multiplication ```MultiplyToNewValue()``` computes products of datapoints defined by a subset of values from a given dimension. The method takes a new dimension value as a parameter that will define the resulting values. The method also has an asyncronous variant ```MultiplyToNewValueAsync()```. +##### Example +```csharp + DimensionValue newDimensionValue = new("rowProduct", new("en", "Product value")); + DimensionMap map = new("row0", ["row0-1", "row0-2"]); + Matrix output = matrix.MultiplyToNewValue(newDimensionValue, map); +``` + ```MultiplySubsetByConstant()``` Multiply a subset of datapoints by a constant. Also has an asynchronous variant ```MultiplySubsetByConstantAsync()```. +##### Example +```csharp + IMatrixMap map = new MatrixMap([ + new DimensionMap("col0", ["col0-0"]), + new DimensionMap("col1", ["col1-0", "col1-1"]), + ]); + + Matrix output = matrix.MultiplySubsetByConstant(map, 5); +``` + + #### Division -```DivideSubsetBySelectedValue()``` divides a subset of datapoints defined by values from one dimension with datapoints defined by a value from the same dimension. +```DivideSubsetBySelectedValue()``` divides a subset of datapoints defined by values from one dimension with datapoints defined by a value from the same dimension. Also has an asyncronous variant ```DivideSubsetBySelectedValueAsync()``` +If the original matrix has the following structure: + +|| col0-0 || col0-1 || col0-2 || +||-----------|-----------|-----------|-----------|-----------|-----------| +|| col1-0 | col1-1| col1-0 | col1-1| col1-0 | col1-1 | +|row0-0| 0 | 1 | 2 | 3 | 4 | 5 | +|row0-1| 6 | 7 | 8 | 9 | 10 | 11 | +|row0-2| 12 | 13 | 14 | 15 | 16 | 17 | +|rowSum | 18 | 20 | 22 | 24 | 26 | 28 | + +And we divide row dimension values row0-1 and row0-2 by rowSum the resulting matrix will look like this: + +|| col0-0 || col0-1 || col0-2 || +||-----------|-----------|-----------|-----------|-----------|-----------| +|| col1-0 | col1-1| col1-0 | col1-1| col1-0 | col1-1 | +|row0-0| 0 | 1 | 2 | 3 | 4 | 5 | +|row0-1| 0.33 | 0.35 | 0.36 | 0.38 | 0.39 | 0.41 | +|row0-2| 0.67 | 0.65 | 0.64 | 0.63 | 0.61 | 0.59 | +|rowSum | 18 | 20 | 22 | 24 | 26 | 28 | + +##### Example +```csharp + DimensionMap map = new("row0", ["row0-1", "row0-2"]); + + Matrix output = matrix.DivideSubsetBySelectedValue(map, "rowSum"); +``` + ```DivideSubsetByConstant()``` Divide a subset of datapoints by a constant. Also has an asynchronous variant ```DivideSubsetByConstantAsync()```. +##### Example +```csharp + IMatrixMap map = new MatrixMap([ + new DimensionMap("col0", ["col0-0"]), + new DimensionMap("col1", ["col1-0", "col1-1"]), + ]); + + Matrix output = matrix.DivideSubsetByConstant(map, 2); +``` + + #### General ```ApplyOverDimension()``` Generatas a new set datapoints by applying a function to datapoints defined by a subset of values from one dimension. From 8afe0d6ed52978cea7c21553c00a06987ed80164 Mon Sep 17 00:00:00 2001 From: sakari-malkki <47064229+sakari-malkki@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:17:44 +0200 Subject: [PATCH 03/13] Meta data building property fixes (#53) --- ...st_1_Language_With_Range_Time_Dimension.cs | 66 +++++++++++++++++ .../MatrixMetadataBuilderTests.cs | 56 ++++++++++++++- .../GetTimeValValueStringTests.cs | 70 +++++++++++++++++++ .../ModelBuilders/MatrixMetadataBuilder.cs | 43 ++++++++---- .../ModelBuilders/ValueParserUtilities.cs | 43 +++++++++--- Px.Utils/Px.Utils.csproj | 2 +- 6 files changed, 253 insertions(+), 27 deletions(-) create mode 100644 Px.Utils.UnitTests/ModelBuilderTests/Fixtures/PxFileMetaEntries_Robust_1_Language_With_Range_Time_Dimension.cs create mode 100644 Px.Utils.UnitTests/ModelBuilderTests/ValueParserUtilitiesTests/GetTimeValValueStringTests.cs diff --git a/Px.Utils.UnitTests/ModelBuilderTests/Fixtures/PxFileMetaEntries_Robust_1_Language_With_Range_Time_Dimension.cs b/Px.Utils.UnitTests/ModelBuilderTests/Fixtures/PxFileMetaEntries_Robust_1_Language_With_Range_Time_Dimension.cs new file mode 100644 index 00000000..ed6f3a10 --- /dev/null +++ b/Px.Utils.UnitTests/ModelBuilderTests/Fixtures/PxFileMetaEntries_Robust_1_Language_With_Range_Time_Dimension.cs @@ -0,0 +1,66 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Px.Utils.UnitTests.ModelBuilderTests.Fixtures +{ + internal static class PxFileMetaEntries_Robust_1_Language_With_Range_Time_Dimension + { + public static List> Entries = + [ + new("CHARSET", "\"ANSI\""), + new("AXIS-VERSION", "\"2013\""), + new("CODEPAGE", "\"iso-8859-15\""), + new("LANGUAGE", "\"fi\""), + new("CREATION-DATE", "\"20200121 09:00\""), + new("NEXT-UPDATE", "\"20240131 08:00\""), + new("TABLEID", "\"example_table_id_for_testing\""), + new("DECIMALS", "0"), + new("SHOWDECIMALS", "1"), + new("MATRIX", "\"001_12ab_2022\""), + new("SUBJECT-CODE", "\"ABCD\""), + new("SUBJECT-AREA", "\"abcd\""), + new("COPYRIGHT", "YES"), + new("DESCRIPTION", "\"test_description_fi\""), + new("TITLE", "\"test_title_fi\""), + new("CONTENTS", "\"test_contents_fi\""), + new("UNITS", "\"test_unit_fi\""), + new("STUB", "\"Vuosi\",\"Alue\",\"Talotyyppi\""), + new("HEADING", "\"Tiedot\""), + new("CONTVARIABLE", "\"Tiedot\""), + new("VALUES(\"Vuosi\")", "\"2015\",\"2016\",\"2017\",\"2018\",\"2019\",\"2020\",\"2021\",\"2022\""), + new("VALUES(\"Alue\")", "\"Koko maa\",\"Pääkaupunkiseutu (PKS)\",\"Muu Suomi (koko maa pl. PKS)\",\"Helsinki\", \"Espoo-Kauniainen\",\"Vantaa\",\"Turku\""), + new("VALUES(\"Talotyyppi\")", "\"Talotyypit yhteensä\",\"Rivitalot\",\"Kerrostalot\""), + new("VALUES(\"Tiedot\")", "\"Indeksi (2015=100)\",\"Muutos edelliseen vuoteen (indeksi 2015=100)\",\"Kauppojen lukumäärä\""), + new("TIMEVAL(\"Vuosi\")", "TLIST(A1, \"2015-2022\")"), + new("CODES(\"Vuosi\")", "\"2015\",\"2016\",\"2017\",\"2018\",\"2019\",\"2020\",\"2021\",\"2022\""), + new("CODES(\"Alue\")", "\"ksu\",\"pks\",\"msu\",\"091\",\"049\",\"092\",\"853\""), + new("CODES(\"Talotyyppi\")", "\"0\",\"1\",\"3\""), + new("CODES(\"Tiedot\")", "\"ketjutettu_lv\",\"vmuutos_lv\",\"lkm_julk_uudet\""), + new("VARIABLE-TYPE(\"Vuosi\")", "\"Time\""), + new("VARIABLE-TYPE(\"Alue\")", "\"Classificatory\""), + new("VARIABLE-TYPE(\"Talotyyppi\")", "\"Classificatory\""), + new("MAP(\"Alue\")", "\"Alue 2018\""), + new("ELIMINATION(\"Talotyyppi\")", "\"Talotyypit yhteensä\""), + new("PRECISION(\"Tiedot\",\"Muutos edelliseen vuoteen (indeksi 2015=100)\")", "1"), + new("LAST-UPDATED(\"Indeksi (2015=100)\")", "\"20230131 08:00\""), + new("LAST-UPDATED(\"Muutos edelliseen vuoteen (indeksi 2015=100)\")", "\"20230131 09:00\""), + new("LAST-UPDATED(\"Kauppojen lukumäärä\")", "\"20230131 10:00\""), + new("UNITS(\"Indeksi (2015=100)\")", "\"indeksipisteluku\""), + new("UNITS(\"Muutos edelliseen vuoteen (indeksi 2015=100)\")", "\"%\""), + new("UNITS", "\"lukumäärä\""), // table level units + new("CONTACT(\"Indeksi (2015=100)\")", "\"test_contact1_fi\""), + new("CONTACT(\"Muutos edelliseen vuoteen (indeksi 2015=100)\")", "\"test_contact2_fi\""), + new("CONTACT(\"Kauppojen lukumäärä\")", "\"test_contact3_fi\""), + new("SOURCE", "\"test_source_fi\""), + new("OFFICIAL-STATISTICS", "YES"), + new("NOTE", "\"test_note_fi\""), + new("NOTE(\"Talotyyppi\")", "\"test_note_talotyyppi\""), + new("VALUENOTE(\"Tiedot\",\"Indeksi (2015=100)\")", "\"test_value_note_tiedot_indeksi\""), + new("VALUENOTE(\"Tiedot\",\"Muutos edelliseen vuoteen (indeksi 2015=100)\")", "\"test_value_note_tiedot_muutos\""), + new("VALUENOTE(\"Tiedot\",\"Kauppojen lukumäärä\")", "\"test_value_note_tiedot_kauppojen_lukumäärä\"") + ]; + } +} diff --git a/Px.Utils.UnitTests/ModelBuilderTests/MatrixMetadataBuilderTests.cs b/Px.Utils.UnitTests/ModelBuilderTests/MatrixMetadataBuilderTests.cs index f99c0718..ad69a0f5 100644 --- a/Px.Utils.UnitTests/ModelBuilderTests/MatrixMetadataBuilderTests.cs +++ b/Px.Utils.UnitTests/ModelBuilderTests/MatrixMetadataBuilderTests.cs @@ -20,6 +20,9 @@ public class MatrixMetadataBuilderTests private MatrixMetadata Actual_1Lang_With_Table_Level_Units_And_Precision { get; } = new MatrixMetadataBuilder().Build(PxFileMetaEntries_Robust_1_Language_With_Table_Level_Units_And_Precision.Entries); + private MatrixMetadata Actual_1Lang_With_Range_Time_Dimension { get; } = + new MatrixMetadataBuilder().Build(PxFileMetaEntries_Robust_1_Language_With_Range_Time_Dimension.Entries); + [TestMethod] public void IEnumerableBuildTest() { @@ -228,7 +231,7 @@ public void SingleLangWithTableLevelUnitsAndPrecisionBuildTest() } Assert.IsFalse(Actual_1Lang_With_Table_Level_Units_And_Precision.AdditionalProperties.ContainsKey(PxFileConfiguration.Default.Tokens.KeyWords.Units)); Assert.IsFalse(Actual_1Lang_With_Table_Level_Units_And_Precision.AdditionalProperties.ContainsKey(PxFileConfiguration.Default.Tokens.KeyWords.Decimals)); - Assert.IsFalse(Actual_1Lang_With_Table_Level_Units_And_Precision.AdditionalProperties.ContainsKey(PxFileConfiguration.Default.Tokens.KeyWords.ShowDecimals)); + Assert.IsTrue(Actual_1Lang_With_Table_Level_Units_And_Precision.AdditionalProperties.ContainsKey(PxFileConfiguration.Default.Tokens.KeyWords.ShowDecimals)); } #region Content Dimension Tests @@ -525,5 +528,56 @@ public void MultilanguageTableWithCustomMetaPropertiesAndPropetyTypes() Assert.AreEqual(MetaPropertyType.TextArray, actual.AdditionalProperties["SINGLEITEMTEXTARRAYPROPERTY"].Type); Assert.AreEqual(MetaPropertyType.MultilanguageTextArray, actual.AdditionalProperties["SINGLEITEMMULTILANGUAGETEXTARRAYPROPERTY"].Type); } + + [TestMethod] + public void MultilanguageTableRemovesDimensionTypeAndTimeValEntriesTest() + { + Assert.IsTrue(Actual_3Lang.Dimensions.Exists(dim => dim.Type == DimensionType.Time)); + foreach (Dimension dim in Actual_3Lang.Dimensions) + { + Assert.IsFalse(dim.AdditionalProperties.ContainsKey(PxFileConfiguration.Default.Tokens.KeyWords.DimensionType)); + + if (dim.Type == DimensionType.Time) + { + Assert.IsTrue(dim.AdditionalProperties.TryGetValue(PxFileConfiguration.Default.Tokens.KeyWords.TimeVal, out MetaProperty? value)); + Assert.AreEqual(MetaPropertyType.TextArray, value.Type); + StringListProperty property = (StringListProperty)value; + Assert.AreEqual(8, property.Value.Count); + } + } + } + + [TestMethod] + public void MultilanguageWithRangeTimeDimensionTest() + { + Assert.IsTrue(Actual_1Lang_With_Range_Time_Dimension.Dimensions.Exists(dim => dim.Type == DimensionType.Time)); + foreach (Dimension dim in Actual_1Lang_With_Range_Time_Dimension.Dimensions) + { + Assert.IsFalse(dim.AdditionalProperties.ContainsKey(PxFileConfiguration.Default.Tokens.KeyWords.DimensionType)); + + if (dim.Type == DimensionType.Time) + { + Assert.IsTrue(dim.AdditionalProperties.TryGetValue(PxFileConfiguration.Default.Tokens.KeyWords.TimeVal, out MetaProperty? value)); + Assert.AreEqual(MetaPropertyType.Text, value.Type); + StringProperty property = (StringProperty)value; + Assert.AreEqual("2015-2022", property.Value); + } + } + } + + [TestMethod] + public void MultilanguageRemovesTableLevelMetaEntriesTest() + { + string[] keywords = [ + PxFileConfiguration.Default.Tokens.KeyWords.Units, + PxFileConfiguration.Default.Tokens.KeyWords.Precision, + PxFileConfiguration.Default.Tokens.KeyWords.Decimals + ]; + + foreach (string keyword in keywords) + { + Assert.IsFalse(Actual_3Lang.AdditionalProperties.ContainsKey(keyword)); + } + } } } diff --git a/Px.Utils.UnitTests/ModelBuilderTests/ValueParserUtilitiesTests/GetTimeValValueStringTests.cs b/Px.Utils.UnitTests/ModelBuilderTests/ValueParserUtilitiesTests/GetTimeValValueStringTests.cs new file mode 100644 index 00000000..ab4507fd --- /dev/null +++ b/Px.Utils.UnitTests/ModelBuilderTests/ValueParserUtilitiesTests/GetTimeValValueStringTests.cs @@ -0,0 +1,70 @@ +using Px.Utils.ModelBuilders; + +namespace Px.Utils.UnitTests.ModelBuilderTests.ValueParserUtilitiesTests +{ + [TestClass] + public class GetTimeValValueRangeStringTests + { + [TestMethod] + public void GetTimeValValueRangeStringTestEmptyInputThrowsException() + { + string input = "TLIST(A1)"; + Assert.ThrowsException(() => ValueParserUtilities.GetTimeValValueRangeString(input)); + } + + [TestMethod] + public void GetTimeValValueRangeStringTestListInputThrowsException() + { + string input = "TLIST(A1), \"9000\", \"9001\", \"9002\", \"9003\", \"9004\""; + Assert.ThrowsException(() => ValueParserUtilities.GetTimeValValueRangeString(input)); + } + + [TestMethod] + public void GetTimeValValueRangeStringTestInvalidRangeFormatThrowsException() + { + string input = "TLIST(A1, \"9001\")"; + Assert.ThrowsException(() => ValueParserUtilities.GetTimeValValueRangeString(input)); + } + + [TestMethod] + public void GetTimeValueStringTestValidInputReturnsString() + { + string input = "TLIST(A1, \"9000-9001\")"; + string expected = "9000-9001"; + string actual = ValueParserUtilities.GetTimeValValueRangeString(input); + + Assert.AreEqual(expected, actual); + } + + [TestMethod] + public void GetTimeValueStringTestInputWithTwoRangesThrowsException() + { + string input = "TLIST(A1, \"9000-9001\", \"9002-9003\")"; + Assert.ThrowsException(() => ValueParserUtilities.GetTimeValValueRangeString(input)); + } + + [TestMethod] + public void GetTimeValueStringTestInputWithRangeOfThreePartRangeThrowsException() + { + string input = "TLIST(A1, \"9000-9001-9002\")"; + Assert.ThrowsException(() => ValueParserUtilities.GetTimeValValueRangeString(input)); + } + + [TestMethod] + public void GetTimeValueStringTestInputRangeWithoutStringDelimetersThrowsException() + { + string input = "TLIST(A1, 9000-9001)"; + Assert.ThrowsException(() => ValueParserUtilities.GetTimeValValueRangeString(input)); + } + + [TestMethod] + public void GetTimeValueStringTestInputRangeWithExtraWhitespaceReturnsString() + { + string input = "TLIST(A1, \"9000-9001\" )"; + string expected = "9000-9001"; + string actual = ValueParserUtilities.GetTimeValValueRangeString(input); + + Assert.AreEqual(expected, actual); + } + } +} diff --git a/Px.Utils/ModelBuilders/MatrixMetadataBuilder.cs b/Px.Utils/ModelBuilders/MatrixMetadataBuilder.cs index ca444508..f29d1899 100644 --- a/Px.Utils/ModelBuilders/MatrixMetadataBuilder.cs +++ b/Px.Utils/ModelBuilders/MatrixMetadataBuilder.cs @@ -159,17 +159,27 @@ private bool TestIfTimeAndBuild( if (TryGetEntries(entries, timeValIdentifierKey, langs, out Dictionary? timeValEntries, dimensionNameToTest)) { string timeValValueString = timeValEntries.Values.First(); + if (!timeValValueString.StartsWith(_conf.Tokens.Time.TimeIntervalIndicator, StringComparison.InvariantCulture)) + { + throw new ArgumentException($"Invalid time value string {timeValValueString}"); + } List timeValList = ValueParserUtilities.GetTimeValValueList(timeValValueString, _conf); - + MetaProperty timeValProperty = timeValList.Count > 0 ? + new StringListProperty(timeValList) : + new StringProperty(ValueParserUtilities.GetTimeValValueRangeString(timeValValueString, _conf)); timeDimension = new( code: GetDimensionCode(entries, langs, dimensionNameToTest), name: dimensionNameToTest, - additionalProperties: new() { { timeValIdentifierKey, new StringListProperty(timeValList) } }, + additionalProperties: new() { { timeValIdentifierKey, timeValProperty } }, values: GetDimensionValues(entries, langs, dimensionNameToTest), interval: ValueParserUtilities.ParseTimeIntervalFromTimeVal(timeValValueString, _conf) ); foreach (MetadataEntryKey key in timeValEntries.Keys) entries.Remove(key); + if (TryGetEntries(entries, dimensionTypeKey, langs, out Dictionary? dimTypeEntries, dimensionNameToTest)) + { + foreach (MetadataEntryKey key in dimTypeEntries.Keys) entries.Remove(key); + } return true; } else if (TryGetEntries(entries, dimensionTypeKey, langs, out Dictionary? dimTypeEntries, dimensionNameToTest) && @@ -226,20 +236,25 @@ private ContentDimension BuildContentDimension(Dictionary? unitEntries)) - { - foreach (MetadataEntryKey key in unitEntries.Keys) entries.Remove(key); - } - if (TryGetEntries(entries, _conf.Tokens.KeyWords.Decimals, langs, out Dictionary? decimalEntries)) - { - foreach (MetadataEntryKey key in decimalEntries.Keys) entries.Remove(key); - } - if (TryGetEntries(entries, _conf.Tokens.KeyWords.ShowDecimals, langs, out Dictionary? showDecimalEntries)) + // Table level UNIT, PRECISION and DECIMALS properties are not needed after building the content dimension, so they're removed here + string[] keywords = [ + _conf.Tokens.KeyWords.Units, + _conf.Tokens.KeyWords.Precision, + _conf.Tokens.KeyWords.Decimals + ]; + RemoveEntries(entries, langs, keywords); + return new ContentDimension(code, dimensionName, [], values); + } + + private static void RemoveEntries(Dictionary entries, PxFileLanguages langs, string[] tokens) + { + foreach (string token in tokens) { - foreach (MetadataEntryKey key in showDecimalEntries.Keys) entries.Remove(key); + if (TryGetEntries(entries, token, langs, out Dictionary? foundEntries)) + { + foreach (MetadataEntryKey key in foundEntries.Keys) entries.Remove(key); + } } - return new ContentDimension(code, dimensionName, [], values); } private void AddAdditionalPropertiesToDimensions( diff --git a/Px.Utils/ModelBuilders/ValueParserUtilities.cs b/Px.Utils/ModelBuilders/ValueParserUtilities.cs index 6dd6bc5c..33cbe847 100644 --- a/Px.Utils/ModelBuilders/ValueParserUtilities.cs +++ b/Px.Utils/ModelBuilders/ValueParserUtilities.cs @@ -56,25 +56,46 @@ public static TimeDimensionInterval ParseTimeIntervalFromTimeVal(string input, P /// List of value strings excluding the interval part. /// If the input string is in the range format, empty list is returned. /// - /// If the input string does not match the expected timeval format. public static List GetTimeValValueList(string input, PxFileConfiguration? conf = null) { conf ??= PxFileConfiguration.Default; + int endOftoken = input.IndexOf(conf.Symbols.Value.TimeSeriesIntervalEnd); + int firstStringDelimeter = input.IndexOf(conf.Symbols.Value.StringDelimeter, endOftoken); + if (firstStringDelimeter >= 0) + { + return input[firstStringDelimeter..] + .SplitToListOfStrings(conf.Symbols.Value.ListSeparator, conf.Symbols.Value.StringDelimeter); + } + else return []; + } - if (input.StartsWith(conf.Tokens.Time.TimeIntervalIndicator, StringComparison.InvariantCulture)) + /// + /// Removes the interval entry from the beginning of a time value string + /// and returns the rest as a string if the range format is used. + /// + /// The complete timeval value in one language. + /// Configuration used for parsing the value strings and the interval part. + /// Value string excluding the interval part. If the right format is not used an exception is thrown. + /// Thrown when the input string is not in the correct format. + public static string GetTimeValValueRangeString(string input, PxFileConfiguration? conf = null) + { + conf ??= PxFileConfiguration.Default; + int startOfRange = input.IndexOf(conf.Symbols.Value.ListSeparator); + int endOfToken = input.IndexOf(conf.Symbols.Value.TimeSeriesIntervalEnd); + if (startOfRange == -1 || endOfToken < startOfRange) { - int endOftoken = input.IndexOf(conf.Symbols.Value.TimeSeriesIntervalEnd); - int firtsStringDelimeter = input.IndexOf(conf.Symbols.Value.StringDelimeter, endOftoken); - if (firtsStringDelimeter >= 0) - { - return input[firtsStringDelimeter..] - .SplitToListOfStrings(conf.Symbols.Value.ListSeparator, conf.Symbols.Value.StringDelimeter); - } - else return []; + throw new ArgumentException($"Invalid time value range string. {input} range is not defined inside the time interval token"); + } + string range = input[(startOfRange + 1)..endOfToken].Trim(); + if (range.Count(c => c == conf.Symbols.Value.StringDelimeter) != 2 || + (range[0] != conf.Symbols.Value.StringDelimeter || range[^1] != conf.Symbols.Value.StringDelimeter) || + range.Count(c => c == conf.Symbols.Value.TimeSeriesLimitsSeparator) != 1) + { + throw new ArgumentException($"Invalid time value range string. {input} is not in valid range format."); } else { - throw new ArgumentException($"Invalid time value string {input}"); + return range[1..^1]; } } diff --git a/Px.Utils/Px.Utils.csproj b/Px.Utils/Px.Utils.csproj index 5cab598d..6de27c2a 100644 --- a/Px.Utils/Px.Utils.csproj +++ b/Px.Utils/Px.Utils.csproj @@ -2,7 +2,7 @@ Px.Utils - 1.1.0 + 1.1.1 net8.0 enable enable From b887abea8949b1581426c94decb83aeda9992382 Mon Sep 17 00:00:00 2001 From: Sakari Malkki Date: Mon, 23 Dec 2024 15:59:44 +0200 Subject: [PATCH 04/13] Support and tests for reading unenclosed missing value codes --- .../DataTests/DataValueParserTests.cs | 402 +++++++++++++++++- .../DataReaderTests.cs | 52 +++ .../Fixtures/DataReaderFixtures.cs | 13 + Px.Utils/PxFile/Data/DataValueParsers.cs | 130 ++++-- 4 files changed, 550 insertions(+), 47 deletions(-) diff --git a/Px.Utils.UnitTests/PxFileTests/DataTests/DataValueParserTests.cs b/Px.Utils.UnitTests/PxFileTests/DataTests/DataValueParserTests.cs index 96fab76c..23683497 100644 --- a/Px.Utils.UnitTests/PxFileTests/DataTests/DataValueParserTests.cs +++ b/Px.Utils.UnitTests/PxFileTests/DataTests/DataValueParserTests.cs @@ -74,6 +74,21 @@ public void FastParseDoubleDataValueDangerousNillSymbolReturnsNillValueType() Assert.AreEqual(DataValueType.Nill, result.Type); } + [TestMethod] + public void FastParseDoubleDataValueDangerousNillSymbolWithoutDelimetersReturnsNillValueType() + { + // Arrange + char[] buffer = ['-']; + int len = 1; + + // Act + DoubleDataValue result = DataValueParsers.FastParseDoubleDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(0, result.UnsafeValue); + Assert.AreEqual(DataValueType.Nill, result.Type); + } + [TestMethod] public void FastParseDoubleDataValueDangerousValidWithDecimalPartReturnsDoubleDataValue() { @@ -148,6 +163,20 @@ public void FastParseDoubleDataValueDangerousMissingSymbolReturnsMissingValueTyp Assert.AreEqual(DataValueType.Missing, result.Type); } + [TestMethod] + public void FastParseDoubleDataValueDangerousMissingSymbolWithoutDelimetersReturnsMissingValueType() + { + // Arrange + char[] buffer = ['.']; + int len = 1; + + // Act + DoubleDataValue result = DataValueParsers.FastParseDoubleDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Missing, result.Type); + } + [TestMethod] public void FastParseDoubleDataValueDangerousCanNotRepresentSymbolReturnsCanNotRepresentValueType() { @@ -162,6 +191,20 @@ public void FastParseDoubleDataValueDangerousCanNotRepresentSymbolReturnsCanNotR Assert.AreEqual(DataValueType.CanNotRepresent, result.Type); } + [TestMethod] + public void FastParseDoubleDataValueDangerousCanNotRepresentSymbolWithoutDelimetersReturnsCanNotRepresentValueType() + { + // Arrange + char[] buffer = ['.', '.']; + int len = 2; + + // Act + DoubleDataValue result = DataValueParsers.FastParseDoubleDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.CanNotRepresent, result.Type); + } + [TestMethod] public void FastParseDoubleDataValueDangerousConfidentialSymbolReturnsConfidentialValueType() { @@ -176,6 +219,20 @@ public void FastParseDoubleDataValueDangerousConfidentialSymbolReturnsConfidenti Assert.AreEqual(DataValueType.Confidential, result.Type); } + [TestMethod] + public void FastParseDoubleDataValueDangerousConfidentialSymbolWithoutDelimetersReturnsConfidentialValueType() + { + // Arrange + char[] buffer = ['.', '.', '.']; + int len = 3; + + // Act + DoubleDataValue result = DataValueParsers.FastParseDoubleDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Confidential, result.Type); + } + [TestMethod] public void FastParseDoubleDataValueDangerousNotAcquiredSymbolReturnsNotAcquiredValueType() { @@ -190,6 +247,20 @@ public void FastParseDoubleDataValueDangerousNotAcquiredSymbolReturnsNotAcquired Assert.AreEqual(DataValueType.NotAcquired, result.Type); } + [TestMethod] + public void FastParseDoubleDataValueDangerousNotAcquiredSymbolWithoutDelimetersReturnsNotAcquiredValueType() + { + // Arrange + char[] buffer = ['.', '.', '.', '.']; + int len = 4; + + // Act + DoubleDataValue result = DataValueParsers.FastParseDoubleDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.NotAcquired, result.Type); + } + [TestMethod] public void FastParseDoubleDataValueDangerousNotAskedSymbolReturnsNotAskedValueType() { @@ -204,6 +275,20 @@ public void FastParseDoubleDataValueDangerousNotAskedSymbolReturnsNotAskedValueT Assert.AreEqual(DataValueType.NotAsked, result.Type); } + [TestMethod] + public void FastParseDoubleDataValueDangerousNotAskedSymbolWithoutDelimetersReturnsNotAskedValueType() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.']; + int len = 5; + + // Act + DoubleDataValue result = DataValueParsers.FastParseDoubleDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.NotAsked, result.Type); + } + [TestMethod] public void FastParseDoubleDataValueDangerousEmptySymbolReturnsEmptyValueType() { @@ -218,6 +303,20 @@ public void FastParseDoubleDataValueDangerousEmptySymbolReturnsEmptyValueType() Assert.AreEqual(DataValueType.Empty, result.Type); } + [TestMethod] + public void FastParseDoubleDataValueDangerousEmptySymbolWithoutDelimetersReturnsEmptyValueType() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.', '.']; + int len = 6; + + // Act + DoubleDataValue result = DataValueParsers.FastParseDoubleDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Empty, result.Type); + } + #endregion #region FastParseDecimalDataValueDangerous @@ -287,6 +386,21 @@ public void FastParseDecimalDataValueDangerousNillSymbolReturnsNillValueType() Assert.AreEqual(DataValueType.Nill, result.Type); } + [TestMethod] + public void FastParseDecimalDataValueDangerousNillSymbolWithoutDelimetersReturnsNillValueType() + { + // Arrange + char[] buffer = ['-']; + int len = 1; + + // Act + DecimalDataValue result = DataValueParsers.FastParseDecimalDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(0m, result.UnsafeValue); + Assert.AreEqual(DataValueType.Nill, result.Type); + } + [TestMethod] public void FastParseDecimalDataValueDangerousValidWithDecimalPartReturnsDecimalDataValue() { @@ -361,6 +475,20 @@ public void FastParseDecimalDataValueDangerousMissingSymbolReturnsMissingValueTy Assert.AreEqual(DataValueType.Missing, result.Type); } + [TestMethod] + public void FastParseDecimalDataValueDangerousMissingSymbolWithoutDelimetersReturnsMissingValueType() + { + // Arrange + char[] buffer = ['.']; + int len = 1; + + // Act + DecimalDataValue result = DataValueParsers.FastParseDecimalDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Missing, result.Type); + } + [TestMethod] public void FastParseDecimalDataValueDangerousCanNotRepresentSymbolReturnsCanNotRepresentValueType() { @@ -375,6 +503,20 @@ public void FastParseDecimalDataValueDangerousCanNotRepresentSymbolReturnsCanNot Assert.AreEqual(DataValueType.CanNotRepresent, result.Type); } + [TestMethod] + public void FastParseDecimalDataValueDangerousCanNotRepresentSymbolWithoutDelimetersReturnsCanNotRepresentValueType() + { + // Arrange + char[] buffer = ['.', '.']; + int len = 2; + + // Act + DecimalDataValue result = DataValueParsers.FastParseDecimalDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.CanNotRepresent, result.Type); + } + [TestMethod] public void FastParseDecimalDataValueDangerousConfidentialSymbolReturnsConfidentialValueType() { @@ -389,6 +531,20 @@ public void FastParseDecimalDataValueDangerousConfidentialSymbolReturnsConfident Assert.AreEqual(DataValueType.Confidential, result.Type); } + [TestMethod] + public void FastParseDecimalDataValueDangerousConfidentialSymbolWithoutDelimetersReturnsConfidentialValueType() + { + // Arrange + char[] buffer = ['.', '.', '.']; + int len = 3; + + // Act + DecimalDataValue result = DataValueParsers.FastParseDecimalDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Confidential, result.Type); + } + [TestMethod] public void FastParseDecimalDataValueDangerousNotAcquiredSymbolReturnsNotAcquiredValueType() { @@ -403,6 +559,20 @@ public void FastParseDecimalDataValueDangerousNotAcquiredSymbolReturnsNotAcquire Assert.AreEqual(DataValueType.NotAcquired, result.Type); } + [TestMethod] + public void FastParseDecimalDataValueDangerousNotAcquiredSymbolWithoutDelimetersReturnsNotAcquiredValueType() + { + // Arrange + char[] buffer = ['.', '.', '.', '.']; + int len = 4; + + // Act + DecimalDataValue result = DataValueParsers.FastParseDecimalDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.NotAcquired, result.Type); + } + [TestMethod] public void FastParseDecimalDataValueDangerousNotAskedSymbolReturnsNotAskedValueType() { @@ -417,6 +587,20 @@ public void FastParseDecimalDataValueDangerousNotAskedSymbolReturnsNotAskedValue Assert.AreEqual(DataValueType.NotAsked, result.Type); } + [TestMethod] + public void FastParseDecimalDataValueDangerousNotAskedSymbolWithoutDelimetersReturnsNotAskedValueType() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.']; + int len = 5; + + // Act + DecimalDataValue result = DataValueParsers.FastParseDecimalDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.NotAsked, result.Type); + } + [TestMethod] public void FastParseDecimalDataValueDangerousEmptySymbolReturnsEmptyValueType() { @@ -431,6 +615,20 @@ public void FastParseDecimalDataValueDangerousEmptySymbolReturnsEmptyValueType() Assert.AreEqual(DataValueType.Empty, result.Type); } + [TestMethod] + public void FastParseDecimalDataValueDangerousEmptySymbolWithoutDelimetersReturnsEmptyValueType() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.', '.']; + int len = 6; + + // Act + DecimalDataValue result = DataValueParsers.FastParseDecimalDataValueDangerous(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Empty, result.Type); + } + #endregion #region FastParseUnsafeDoubleDangerous @@ -498,6 +696,20 @@ public void FastParseUnsafeDoubleDangerousNillSymbolReturnsNillValue() Assert.AreEqual(0, result); } + [TestMethod] + public void FastParseUnsafeDoubleDangerousNillSymbolWithoutDelimetersReturnsNillValue() + { + // Arrange + char[] buffer = ['-']; + int len = 1; + + // Act + double result = DataValueParsers.FastParseUnsafeDoubleDangerous(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(0, result); + } + [TestMethod] public void FastParseUnsafeDoubleDangerousValidWithDecimalPartReturnsDouble() { @@ -568,6 +780,20 @@ public void FastParseUnsafeDoubleDangerousMissingSymbolReturnsMissingValue() Assert.AreEqual(1, result); } + [TestMethod] + public void FastParseUnsafeDoubleDangerousMissingSymbolWithoutDelimetersReturnsMissingValue() + { + // Arrange + char[] buffer = ['.']; + int len = 1; + + // Act + double result = DataValueParsers.FastParseUnsafeDoubleDangerous(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(1, result); + } + [TestMethod] public void FastParseUnsafeDoubleDangerousCanNotRepresentSymbolReturnsCanNotRepresentValue() { @@ -582,6 +808,20 @@ public void FastParseUnsafeDoubleDangerousCanNotRepresentSymbolReturnsCanNotRepr Assert.AreEqual(2, result); } + [TestMethod] + public void FastParseUnsafeDoubleDangerousCanNotRepresentSymbolWithoutDelimetersReturnsCanNotRepresentValue() + { + // Arrange + char[] buffer = ['.', '.']; + int len = 2; + + // Act + double result = DataValueParsers.FastParseUnsafeDoubleDangerous(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(2, result); + } + [TestMethod] public void FastParseUnsafeDoubleDangerousConfidentialSymbolReturnsConfidentialValue() { @@ -596,6 +836,20 @@ public void FastParseUnsafeDoubleDangerousConfidentialSymbolReturnsConfidentialV Assert.AreEqual(3, result); } + [TestMethod] + public void FastParseUnsafeDoubleDangerousConfidentialSymbolWithoutDelimetersReturnsConfidentialValue() + { + // Arrange + char[] buffer = [ '.', '.', '.' ]; + int len = 3; + + // Act + double result = DataValueParsers.FastParseUnsafeDoubleDangerous(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(3, result); + } + [TestMethod] public void FastParseUnsafeDoubleDangerousNotAcquiredSymbolReturnsNotAcquiredValue() { @@ -610,6 +864,20 @@ public void FastParseUnsafeDoubleDangerousNotAcquiredSymbolReturnsNotAcquiredVal Assert.AreEqual(4, result); } + [TestMethod] + public void FastParseUnsafeDoubleDangerousNotAcquiredSymbolWithoutDelimetersReturnsNotAcquiredValue() + { + // Arrange + char[] buffer = ['.', '.', '.', '.']; + int len = 4; + + // Act + double result = DataValueParsers.FastParseUnsafeDoubleDangerous(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(4, result); + } + [TestMethod] public void FastParseUnsafeDoubleDangerousNotAskedSymbolReturnsNotAskedValue() { @@ -624,6 +892,20 @@ public void FastParseUnsafeDoubleDangerousNotAskedSymbolReturnsNotAskedValue() Assert.AreEqual(5, result); } + [TestMethod] + public void FastParseUnsafeDoubleDangerousNotAskedSymbolWithoutDelimetersReturnsNotAskedValue() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.']; + int len = 5; + + // Act + double result = DataValueParsers.FastParseUnsafeDoubleDangerous(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(5, result); + } + [TestMethod] public void FastParseUnsafeDoubleDangerousEmptySymbolReturnsEmptyValue() { @@ -638,6 +920,20 @@ public void FastParseUnsafeDoubleDangerousEmptySymbolReturnsEmptyValue() Assert.AreEqual(6, result); } + [TestMethod] + public void FastParseUnsafeDoubleDangerousEmptySymbolWithoutDelimetersReturnsEmptyValue() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.', '.']; + int len = 6; + + // Act + double result = DataValueParsers.FastParseUnsafeDoubleDangerous(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(6, result); + } + #endregion #region ParseDoubleDataValue @@ -1109,17 +1405,6 @@ public void ParseDecimalDataValueEmptySymbolReturnsEmptyValueType() Assert.AreEqual(DataValueType.Empty, result.Type); } - [TestMethod] - public void ParseDecimalDataValueMissingCodeWithoutQuotesThrows() - { - // Arrange - char[] buffer = ['.', '.', '.', '.',]; - int len = 4; - - // Act - Assert.ThrowsException(() => DataValueParsers.ParseDecimalDataValue(buffer, len)); - } - [TestMethod] public void ParseDecimalDataValueMissingCodeWithoutEndQuoteThrows() { @@ -1364,17 +1649,6 @@ public void ParseUnsafeDoubleEmptySymbolReturnsEmptyValue() Assert.AreEqual(6, result); } - [TestMethod] - public void ParseUnsafeDoubleMissingCodeWithoutQuotesThrows() - { - // Arrange - char[] buffer = ['.', '.', '.', '.',]; - int len = 4; - - // Act - Assert.ThrowsException(() => DataValueParsers.ParseUnsafeDouble(buffer, len, missingValueEncodings)); - } - [TestMethod] public void ParseUnsafeDoubleMissingCodeWithoutEndQuoteThrows() { @@ -1419,6 +1693,90 @@ public void ParseUnsafeDoubleInvalidCharactersThrows() Assert.ThrowsException(() => DataValueParsers.ParseUnsafeDouble(buffer, len, missingValueEncodings)); } + [TestMethod] + public void ParseUnsafeDoubleMissingSymbolWithoutDelimetersReturnsMissingValue() + { + // Arrange + char[] buffer = ['.']; + int len = 1; + + // Act + double result = DataValueParsers.ParseUnsafeDouble(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(1, result); + } + + [TestMethod] + public void ParseUnsafeDoubleCanNotRepresentSymbolWithoutDelimetersReturnsCanNotRepresentValue() + { + // Arrange + char[] buffer = ['.', '.']; + int len = 2; + + // Act + double result = DataValueParsers.ParseUnsafeDouble(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(2, result); + } + + [TestMethod] + public void ParseUnsafeDoubleConfidentialSymbolWithoutDelimetersReturnsConfidentialValue() + { + // Arrange + char[] buffer = ['.', '.', '.']; + int len = 3; + + // Act + double result = DataValueParsers.ParseUnsafeDouble(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(3, result); + } + + [TestMethod] + public void ParseUnsafeDoubleNotAcquiredSymbolWithoutDelimetersReturnsNotAcquiredValue() + { + // Arrange + char[] buffer = ['.', '.', '.', '.']; + int len = 4; + + // Act + double result = DataValueParsers.ParseUnsafeDouble(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(4, result); + } + + [TestMethod] + public void ParseUnsafeDoubleNotAskedSymbolWithoutDelimetersReturnsNotAskedValue() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.']; + int len = 5; + + // Act + double result = DataValueParsers.ParseUnsafeDouble(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(5, result); + } + + [TestMethod] + public void ParseUnsafeDoubleEmptySymbolWithoutDelimetersReturnsEmptyValue() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.', '.']; + int len = 6; + + // Act + double result = DataValueParsers.ParseUnsafeDouble(buffer, len, missingValueEncodings); + + // Assert + Assert.AreEqual(6, result); + } + #endregion } } diff --git a/Px.Utils.UnitTests/PxFileTests/DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs b/Px.Utils.UnitTests/PxFileTests/DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs index 9c69665b..81c57aec 100644 --- a/Px.Utils.UnitTests/PxFileTests/DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs +++ b/Px.Utils.UnitTests/PxFileTests/DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs @@ -112,6 +112,58 @@ public void ReadDoubleDataValuesValidIntegersAndMissingReturnsCorrectDoubleDataV CollectionAssert.AreEqual(expexted, targetBuffer); } + [TestMethod] + public void ReadDoubleDataValuesValidIntegersAndUnenclosedMissingRetuurnsCorrectDoubleDataValues() + { + + // Arrange + byte[] data = Encoding.UTF8.GetBytes(DataReaderFixtures.MINIMAL_UTF8_20DATAVALUES_WITH_UNENCLOSED_MISSING); + using Stream stream = new MemoryStream(data); + using PxFileStreamDataReader reader = new(stream); + DoubleDataValue[] targetBuffer = new DoubleDataValue[21]; + DoubleDataValue canary = new(123.456, DataValueType.Exists); + targetBuffer[^1] = canary; + + // Act + MatrixMetadata testMeta = TestModelBuilder.BuildTestMetadata([2, 2, 5]); + MatrixMap matrixMap = new( + [ + new DimensionMap("var0", ["var0_val0", "var0_val1"]), + new DimensionMap("var1", ["var1_val0", "var1_val1"]), + new DimensionMap("var2", ["var2_val0", "var2_val1", "var2_val2", "var2_val3", "var2_val4"]) + ]); + reader.ReadDoubleDataValues(targetBuffer, 0, testMeta, matrixMap); + + // Assert + DoubleDataValue[] expexted = + [ + new(0.0, DataValueType.Missing), + new(1.00, DataValueType.Exists), + new(0.0, DataValueType.Missing), + new(3.00, DataValueType.Exists), + new(0.0, DataValueType.Missing), + new(5.00, DataValueType.Exists), + new(0.0, DataValueType.Missing), + new(7.00, DataValueType.Exists), + new(0.0, DataValueType.Missing), + new(9.00, DataValueType.Exists), + new(0.0, DataValueType.Confidential), + new(11.00, DataValueType.Exists), + new(0.0, DataValueType.Confidential), + new(13.00, DataValueType.Exists), + new(0.0, DataValueType.Confidential), + new(15.00, DataValueType.Exists), + new(0.0, DataValueType.Confidential), + new(17.00, DataValueType.Exists), + new(0.0, DataValueType.Confidential), + new(19.00, DataValueType.Exists), + canary + ]; + // The canary in the expected checks against overwrites + + CollectionAssert.AreEqual(expexted, targetBuffer); + } + [TestMethod] public void ReadEveryOtherDoubleDataValueFrom1stRowValidIntegersReturnsCorrectDoubleDataValues() { diff --git a/Px.Utils.UnitTests/PxFileTests/Fixtures/DataReaderFixtures.cs b/Px.Utils.UnitTests/PxFileTests/Fixtures/DataReaderFixtures.cs index d9351013..337267a2 100644 --- a/Px.Utils.UnitTests/PxFileTests/Fixtures/DataReaderFixtures.cs +++ b/Px.Utils.UnitTests/PxFileTests/Fixtures/DataReaderFixtures.cs @@ -28,6 +28,19 @@ internal static class DataReaderFixtures "\".\" 1 \".\" 3 \".\" 5 \".\" 7 \".\" 9 \n" + "\"...\" 11 \"...\" 13 \"...\" 15 \"...\" 17 \"...\" 19;"; + internal static string MINIMAL_UTF8_20DATAVALUES_WITH_UNENCLOSED_MISSING => + "CHARSET=\"Unicode\";\n" + + "AXIS-VERSION=\"2013\";\n" + + "CODEPAGE=\"utf-8\";\n" + + "LANGUAGES=\"aa\",\"åå\",\"öö\";\n" + + "NEXT-UPDATE=\"20240131 08:00\";\n" + + "SUBJECT-AREA=\"test\";\n" + + "SUBJECT-AREA[åå]=\"test\";\n" + + "COPYRIGHT=YES;\n" + + "DATA=\n" + + "\".\" 1 \".\" 3 \".\" 5 \".\" 7 \".\" 9 \n" + + "... 11 ... 13 ... 15 ... 17 ... 19;"; + internal static string MINIMAL_UTF8_20DECIMALVALUES => "CHARSET=\"Unicode\";\n" + "AXIS-VERSION=\"2013\";\n" + diff --git a/Px.Utils/PxFile/Data/DataValueParsers.cs b/Px.Utils/PxFile/Data/DataValueParsers.cs index f38f3bde..4f4c086f 100644 --- a/Px.Utils/PxFile/Data/DataValueParsers.cs +++ b/Px.Utils/PxFile/Data/DataValueParsers.cs @@ -23,13 +23,20 @@ public static class DataValueParsers [MethodImpl(MethodImplOptions.AggressiveInlining)] public static DoubleDataValue FastParseDoubleDataValueDangerous(char[] buffer, int len) { - // All special/missing values are encoded as strings in the format "..." or "-". + // All special/missing values are encoded either as strings in the format "..." and "-" or ... and - // The length of the string (number of dots) is used to determine the type of missing value. if (buffer[0] == '"') { if (buffer[1] == '-') return new DoubleDataValue(0, DataValueType.Nill); return new DoubleDataValue(0, (DataValueType)(len - stringDelimiterOffset)); } + else if (buffer[0] < '0') + { + if (buffer[0] == '-') + if (len == 1) return new DoubleDataValue(0, DataValueType.Nill); + else return new(FastParseDoubleDangerous(buffer, len), DataValueType.Exists); + return new DoubleDataValue(0, (DataValueType)len); + } else { double value = FastParseDoubleDangerous(buffer, len); @@ -55,6 +62,13 @@ public static DecimalDataValue FastParseDecimalDataValueDangerous(char[] buffer, if (buffer[1] == '-') return new DecimalDataValue(0, DataValueType.Nill); return new DecimalDataValue(0, (DataValueType)(len - stringDelimiterOffset)); } + else if (buffer[0] < '0') + { + if (buffer[0] == '-') + if (len == 1) return new DecimalDataValue(0, DataValueType.Nill); + else return new(FastParseDecimalDangerous(buffer, len), DataValueType.Exists); + return new DecimalDataValue(0, (DataValueType)len); + } else { decimal value = FastParseDecimalDangerous(buffer, len); @@ -89,6 +103,13 @@ public static double FastParseUnsafeDoubleDangerous(char[] buffer, int len, doub if (buffer[1] == '-') return missingValueEncodings[0]; return missingValueEncodings[len - stringDelimiterOffset]; } + else if (buffer[0] < '0') + { + if (buffer[0] == '-') + if (len == 1) return missingValueEncodings[0]; + else return FastParseDoubleDangerous(buffer, len); + return missingValueEncodings[len]; + } else { return FastParseDoubleDangerous(buffer, len); @@ -111,15 +132,16 @@ public static DoubleDataValue ParseDoubleDataValue(char[] buffer, int len) } else { - if (buffer[0] != '"' || buffer[len - 1] != '"' || len < missingDataEntryMinLength || len > missingDataEntryMaxLength) + if (buffer[0] != '"' || buffer[len - 1] != '"' || len > missingDataEntryMaxLength) { throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); } - if (buffer[1] == '-') return new DoubleDataValue(0.0, DataValueType.Nill); + if (buffer[1] == '-' || buffer[0] == '-') return new DoubleDataValue(0.0, DataValueType.Nill); int dots = 0; - while (dots < len - stringDelimiterOffset) + int offset = buffer[0] == '"' ? stringDelimiterOffset : 0; + while (dots < len - offset) { if (buffer[dots + 1] == '.') dots++; else throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); @@ -145,21 +167,12 @@ public static DecimalDataValue ParseDecimalDataValue(char[] buffer, int len) } else { - if (buffer[0] != '"' || buffer[len - 1] != '"' || len < missingDataEntryMinLength || len > missingDataEntryMaxLength) - { - throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); - } - - if (buffer[1] == '-') return new DecimalDataValue(decimal.Zero, DataValueType.Nill); - - int dots = 0; - while (dots < len - stringDelimiterOffset) + if (buffer[0] == '"') { - if (buffer[dots + 1] == '.') dots++; - else throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); + return ParseEnclosedDecimalDataValue(buffer, len); } - return new DecimalDataValue(decimal.Zero, (DataValueType)dots); + return ParseUnenclosedDecimalDataValue(buffer, len); } } @@ -191,22 +204,89 @@ public static double ParseUnsafeDouble(char[] buffer, int len, double[] missingV } else { - if (buffer[0] != '"' || buffer[len - 1] != '"' || len < missingDataEntryMinLength || len > missingDataEntryMaxLength) + if (buffer[0] == '"') { - throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); + return ParseEnclosedUnsafeDouble(buffer, len, missingValueEncodings); } - if (buffer[1] == '-') return missingValueEncodings[0]; + return ParseUnenclosedUnsafeDouble(buffer, len, missingValueEncodings); + } + } - int dots = 0; - while (dots < len - stringDelimiterOffset) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static DecimalDataValue ParseEnclosedDecimalDataValue(char[] buffer, int len) + { + if (buffer[len - 1] != '"' || len < missingDataEntryMinLength || len > missingDataEntryMaxLength) + { + throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); + } + + if (buffer[1] == '-') + { + return new DecimalDataValue(decimal.Zero, DataValueType.Nill); + } + + int dots = CountDots(buffer, 1, len - stringDelimiterOffset); + return new DecimalDataValue(decimal.Zero, (DataValueType)dots); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static DecimalDataValue ParseUnenclosedDecimalDataValue(char[] buffer, int len) + { + if (buffer[0] == '-' && len == 1) + { + return new DecimalDataValue(decimal.Zero, DataValueType.Nill); + } + + int dots = CountDots(buffer, 0, len); + return new DecimalDataValue(decimal.Zero, (DataValueType)dots); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double ParseEnclosedUnsafeDouble(char[] buffer, int len, double[] missingValueEncodings) + { + if (buffer[len - 1] != '"' || len < missingDataEntryMinLength || len > missingDataEntryMaxLength) + { + throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); + } + + if (buffer[1] == '-') + { + return missingValueEncodings[0]; + } + + int dots = CountDots(buffer, 1, len - stringDelimiterOffset); + return missingValueEncodings[dots]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double ParseUnenclosedUnsafeDouble(char[] buffer, int len, double[] missingValueEncodings) + { + if (buffer[0] == '-' && len == 1) + { + return missingValueEncodings[0]; + } + + int dots = CountDots(buffer, 0, len); + return missingValueEncodings[dots]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int CountDots(char[] buffer, int offset, int end) + { + int dots = 0; + for (int i = 0; i < end; i++) + { + if (buffer[i + offset] == '.') { - if (buffer[dots + 1] == '.') dots++; - else throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); + dots++; + } + else + { + throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, end)}"); } - - return missingValueEncodings[dots]; } + return dots; } private static readonly double[] doublePowersOf10 = From 990b543dc2bdf33b1b424fac671ddb855830f2e2 Mon Sep 17 00:00:00 2001 From: Sakari Malkki Date: Fri, 27 Dec 2024 09:05:07 +0200 Subject: [PATCH 05/13] Validation for unenclosed missing data code validation --- .../DataStringValueValidatorTests.cs | 10 +++- .../DataValidationTests/DataValidationTest.cs | 40 +++++++++++++++ .../Validation/Fixtures/DataStreamContents.cs | 7 +++ Px.Utils/PxFile/Data/DataValueParsers.cs | 50 +++++++++++-------- .../DataValidation/DataValidator.cs | 26 +++++++++- .../DataValidation/DataValidatorFunctions.cs | 9 +++- 6 files changed, 115 insertions(+), 27 deletions(-) diff --git a/Px.Utils.UnitTests/Validation/DataValidationTests/DataStringValueValidatorTests.cs b/Px.Utils.UnitTests/Validation/DataValidationTests/DataStringValueValidatorTests.cs index 7c106e9f..3e04030c 100644 --- a/Px.Utils.UnitTests/Validation/DataValidationTests/DataStringValueValidatorTests.cs +++ b/Px.Utils.UnitTests/Validation/DataValidationTests/DataStringValueValidatorTests.cs @@ -16,6 +16,13 @@ public class DataStringValueValidatorTest [DataRow("\".....\"")] [DataRow("\"......\"")] [DataRow("\"-\"")] + [DataRow(".")] + [DataRow("..")] + [DataRow("...")] + [DataRow("....")] + [DataRow(".....")] + [DataRow("......")] + [DataRow("-")] public void AllowedStrings(string allowedValue) { DataStringValidator validator = new(); @@ -30,12 +37,11 @@ public void AllowedStrings(string allowedValue) [DataRow("\"...")] [DataRow("...\"")] [DataRow("\"\"")] - [DataRow("...")] + [DataRow(" ... ")] [DataRow("foo")] [DataRow("\"foo\"")] [DataRow("\".......\"")] [DataRow("\"--\"")] - [DataRow("-")] public void NotAllowedStringValue(string notAllowedValue) { DataStringValidator validator = new(); diff --git a/Px.Utils.UnitTests/Validation/DataValidationTests/DataValidationTest.cs b/Px.Utils.UnitTests/Validation/DataValidationTests/DataValidationTest.cs index e64f3b73..344443c9 100644 --- a/Px.Utils.UnitTests/Validation/DataValidationTests/DataValidationTest.cs +++ b/Px.Utils.UnitTests/Validation/DataValidationTests/DataValidationTest.cs @@ -30,6 +30,26 @@ public void TestValidateWithoutErrors() Assert.AreEqual(0, validationFeedbacks.Count); } + [TestMethod] + public void TestValidateWithoutMissingCodeDelimetersReturnsWithoutErrors() + { + using Stream stream = new MemoryStream(Encoding.UTF8.GetBytes(DataStreamContents.SIMPLE_VALID_DATA_WITHOUT_MISISNG_CODE_DELIMETERS)); + stream.Seek(6, 0); + DataValidator validator = new(5, 4, 1); + + ValidationFeedback validationFeedbacks = validator.Validate(stream, "foo", Encoding.UTF8).FeedbackItems; + + foreach (KeyValuePair> validationFeedback in validationFeedbacks) + { + foreach (ValidationFeedbackValue instance in validationFeedback.Value) + { + Logger.LogMessage($"Line {instance.Line}, Char {instance.Character}: " + + $"{validationFeedback.Key.Rule} {instance.AdditionalInfo}"); + } + } + Assert.AreEqual(0, validationFeedbacks.Count); + } + [TestMethod] public async Task TestValidateAsyncWithoutErrors() { @@ -49,7 +69,27 @@ public async Task TestValidateAsyncWithoutErrors() } } Assert.AreEqual(0, validationFeedbacks.Count); + } + + [TestMethod] + public async Task TestValidateAsyncWithoutMissingCodeDelimetersReturnsWithoutErrors() + { + using Stream stream = new MemoryStream(Encoding.UTF8.GetBytes(DataStreamContents.SIMPLE_VALID_DATA_WITHOUT_MISISNG_CODE_DELIMETERS)); + stream.Seek(6, 0); + DataValidator validator = new(5, 4, 1); + + ValidationResult result = await validator.ValidateAsync(stream, "foo", Encoding.UTF8); + ValidationFeedback validationFeedbacks = result.FeedbackItems; + foreach (KeyValuePair> validationFeedback in validationFeedbacks) + { + foreach (ValidationFeedbackValue instance in validationFeedback.Value) + { + Logger.LogMessage($"Line {instance.Line}, Char {instance.Character}: " + + $"{validationFeedback.Key.Rule} {instance.AdditionalInfo}"); + } + } + Assert.AreEqual(0, validationFeedbacks.Count); } [TestMethod] diff --git a/Px.Utils.UnitTests/Validation/Fixtures/DataStreamContents.cs b/Px.Utils.UnitTests/Validation/Fixtures/DataStreamContents.cs index 3db547ad..65c6d8b8 100644 --- a/Px.Utils.UnitTests/Validation/Fixtures/DataStreamContents.cs +++ b/Px.Utils.UnitTests/Validation/Fixtures/DataStreamContents.cs @@ -9,6 +9,13 @@ internal static class DataStreamContents "\".\" \"..\" \"...\" \"....\" \".....\" \r"+ "\"......\" \"-\" -1 1.2 -1.3; \r\n"; + internal static string SIMPLE_VALID_DATA_WITHOUT_MISISNG_CODE_DELIMETERS => + "DATA=\n" + + "1 2 3 4 5 \r\n" + + "6 7 8 9 10 \n\r" + + ". .. ... .... ..... \r" + + "...... - -1 1.2 -1.3; \r\n"; + internal static string SIMPLE_INVALID_DATA => "DATA=a\n" + "\"b\" 1 2. 3 4 5 \r\n" + diff --git a/Px.Utils/PxFile/Data/DataValueParsers.cs b/Px.Utils/PxFile/Data/DataValueParsers.cs index 4f4c086f..6e241017 100644 --- a/Px.Utils/PxFile/Data/DataValueParsers.cs +++ b/Px.Utils/PxFile/Data/DataValueParsers.cs @@ -25,16 +25,18 @@ public static DoubleDataValue FastParseDoubleDataValueDangerous(char[] buffer, i { // All special/missing values are encoded either as strings in the format "..." and "-" or ... and - // The length of the string (number of dots) is used to determine the type of missing value. - if (buffer[0] == '"') - { - if (buffer[1] == '-') return new DoubleDataValue(0, DataValueType.Nill); - return new DoubleDataValue(0, (DataValueType)(len - stringDelimiterOffset)); - } - else if (buffer[0] < '0') + if (buffer[0] < '0') { + if (buffer[0] == '"') + { + if (buffer[1] == '-') return new DoubleDataValue(0, DataValueType.Nill); + return new DoubleDataValue(0, (DataValueType)(len - stringDelimiterOffset)); + } if (buffer[0] == '-') + { if (len == 1) return new DoubleDataValue(0, DataValueType.Nill); - else return new(FastParseDoubleDangerous(buffer, len), DataValueType.Exists); + return new(FastParseDoubleDangerous(buffer, len), DataValueType.Exists); + } return new DoubleDataValue(0, (DataValueType)len); } else @@ -57,16 +59,18 @@ public static DecimalDataValue FastParseDecimalDataValueDangerous(char[] buffer, { // All special/missing values are encoded as strings in the format "..." or "-". // The length of the string (number of dots) is used to determine the type of missing value. - if (buffer[0] == '"') - { - if (buffer[1] == '-') return new DecimalDataValue(0, DataValueType.Nill); - return new DecimalDataValue(0, (DataValueType)(len - stringDelimiterOffset)); - } - else if (buffer[0] < '0') + if (buffer[0] < '0') { + if (buffer[0] == '"') + { + if (buffer[1] == '-') return new DecimalDataValue(0, DataValueType.Nill); + return new DecimalDataValue(0, (DataValueType)(len - stringDelimiterOffset)); + } if (buffer[0] == '-') + { if (len == 1) return new DecimalDataValue(0, DataValueType.Nill); - else return new(FastParseDecimalDangerous(buffer, len), DataValueType.Exists); + return new(FastParseDecimalDangerous(buffer, len), DataValueType.Exists); + } return new DecimalDataValue(0, (DataValueType)len); } else @@ -98,16 +102,18 @@ public static DecimalDataValue FastParseDecimalDataValueDangerous(char[] buffer, [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double FastParseUnsafeDoubleDangerous(char[] buffer, int len, double[] missingValueEncodings) { - if (buffer[0] == '"') + if (buffer[0] < '0') { - if (buffer[1] == '-') return missingValueEncodings[0]; - return missingValueEncodings[len - stringDelimiterOffset]; - } - else if (buffer[0] < '0') - { - if (buffer[0] == '-') + if (buffer[0] == '"') + { + if (buffer[1] == '-') return missingValueEncodings[0]; + return missingValueEncodings[len - stringDelimiterOffset]; + } + else if (buffer[0] == '-') + { if (len == 1) return missingValueEncodings[0]; - else return FastParseDoubleDangerous(buffer, len); + return FastParseDoubleDangerous(buffer, len); + } return missingValueEncodings[len]; } else diff --git a/Px.Utils/Validation/DataValidation/DataValidator.cs b/Px.Utils/Validation/DataValidation/DataValidator.cs index 18c24212..843cb436 100644 --- a/Px.Utils/Validation/DataValidation/DataValidator.cs +++ b/Px.Utils/Validation/DataValidation/DataValidator.cs @@ -1,4 +1,5 @@ -using System.Text; +using System.Runtime.CompilerServices; +using System.Text; using Px.Utils.PxFile; using Px.Utils.Validation.DatabaseValidation; @@ -185,6 +186,7 @@ private ValidationFeedback ValidateDataStream(Stream stream, CancellationToken? return validationFeedbacks; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private void HandleEntryTypeChange(ref ValidationFeedback validationFeedbacks) { if (_currentEntryType == EntryType.Unknown && (_lineNumber > 1 || _charPosition > 0)) @@ -199,7 +201,7 @@ private void HandleEntryTypeChange(ref ValidationFeedback validationFeedbacks) List validators = _currentEntryType switch { EntryType.DataItemSeparator => _dataSeparatorValidators, - EntryType.DataItem => _currentEntry[0] == _stringDelimeter ? _dataStringValidators : _dataNumValidators, + EntryType.DataItem => CurrentEntryIsNumber() ? _dataNumValidators : _dataStringValidators, _ => _commonValidators }; @@ -220,6 +222,7 @@ private void HandleEntryTypeChange(ref ValidationFeedback validationFeedbacks) } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private void HandleNonSeparatorType(ref ValidationFeedback validationFeedbacks) { if (_currentCharacterType == EntryType.DataItem) @@ -243,6 +246,25 @@ private void HandleNonSeparatorType(ref ValidationFeedback validationFeedbacks) } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool CurrentEntryIsNumber() + { + if (_currentEntry[0] < '0') + { + if (_currentEntry[0] == '"') return false; + else if (_currentEntry[0] == '-') + { + if (_currentEntry.Count == 1) return false; + else return true; + } + return false; + } + else + { + return true; + } + } + private void ResetValidator() { _commonValidators.Clear(); diff --git a/Px.Utils/Validation/DataValidation/DataValidatorFunctions.cs b/Px.Utils/Validation/DataValidation/DataValidatorFunctions.cs index 0d5cae58..727b5a77 100644 --- a/Px.Utils/Validation/DataValidation/DataValidatorFunctions.cs +++ b/Px.Utils/Validation/DataValidation/DataValidatorFunctions.cs @@ -14,7 +14,14 @@ public class DataStringValidator : IDataValidator PxFileConfiguration.Default.Tokens.DataValues.DataIsNotAvailable, PxFileConfiguration.Default.Tokens.DataValues.DataHasNotBeenAsked, PxFileConfiguration.Default.Tokens.DataValues.Missing6, - PxFileConfiguration.Default.Tokens.DataValues.DataIsNone + PxFileConfiguration.Default.Tokens.DataValues.DataIsNone, + PxFileConfiguration.Default.Tokens.DataValues.DataIsMissing.Trim('"'), // Allows for missing codes without string delimeters + PxFileConfiguration.Default.Tokens.DataValues.DataCategoryNotApplicable.Trim('"'), + PxFileConfiguration.Default.Tokens.DataValues.DataIsConfidential.Trim('"'), + PxFileConfiguration.Default.Tokens.DataValues.DataIsNotAvailable.Trim('"'), + PxFileConfiguration.Default.Tokens.DataValues.DataHasNotBeenAsked.Trim('"'), + PxFileConfiguration.Default.Tokens.DataValues.Missing6.Trim('"'), + PxFileConfiguration.Default.Tokens.DataValues.DataIsNone.Trim('"'), ]; /// From 9687a2af591ffdf8cb73676e38e05b6196002d5c Mon Sep 17 00:00:00 2001 From: Sakari Malkki Date: Fri, 27 Dec 2024 14:31:26 +0200 Subject: [PATCH 06/13] Optimization and test fixes --- .../DataTests/DataValueParserTests.cs | 93 +++++++++++++- Px.Utils/PxFile/Data/DataValueParsers.cs | 118 +++++++----------- .../DataValidation/DataValidator.cs | 15 +-- 3 files changed, 141 insertions(+), 85 deletions(-) diff --git a/Px.Utils.UnitTests/PxFileTests/DataTests/DataValueParserTests.cs b/Px.Utils.UnitTests/PxFileTests/DataTests/DataValueParserTests.cs index 23683497..72e4bcb9 100644 --- a/Px.Utils.UnitTests/PxFileTests/DataTests/DataValueParserTests.cs +++ b/Px.Utils.UnitTests/PxFileTests/DataTests/DataValueParserTests.cs @@ -1141,16 +1141,103 @@ public void ParseDoubleDataValueEmptySymbolReturnsEmptyValueType() // Assert Assert.AreEqual(DataValueType.Empty, result.Type); } + + [TestMethod] + public void ParseDoubleDataValueNillSymbolWithoutQuotesReturnsNillValueType() + { + // Arrange + char[] buffer = ['-']; + int len = 1; + + // Act + DoubleDataValue result = DataValueParsers.ParseDoubleDataValue(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Nill, result.Type); + } + + [TestMethod] + public void ParseDoubleDataValueMissingSymbolWithoutQuotesReturnsMissingValueType() + { + // Arrange + char[] buffer = ['.']; + int len = 1; + + // Act + DoubleDataValue result = DataValueParsers.ParseDoubleDataValue(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Missing, result.Type); + } + + [TestMethod] + public void ParseDoubleDataValueCanNotRepresentSymbolWithoutQuotesReturnsCanNotRepresentValueType() + { + // Arrange + char[] buffer = ['.', '.']; + int len = 2; + + // Act + DoubleDataValue result = DataValueParsers.ParseDoubleDataValue(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.CanNotRepresent, result.Type); + } + + [TestMethod] + public void ParseDoubleDataValueConfidentialSymbollWithoutQuotesReturnsConfidentialValueType() + { + // Arrange + char[] buffer = ['.', '.', '.']; + int len = 3; + + // Act + DoubleDataValue result = DataValueParsers.ParseDoubleDataValue(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Confidential, result.Type); + } [TestMethod] - public void ParseDoubleDataValueMissingCodeWithoutQuotesThrows() + public void ParseDoubleDataValueNotAcquiredSymbollWithoutQuotesReturnsNotAcquiredValueType() { // Arrange - char[] buffer = ['.', '.', '.', '.',]; + char[] buffer = ['.', '.', '.', '.']; int len = 4; // Act - Assert.ThrowsException(() => DataValueParsers.ParseDoubleDataValue(buffer, len)); + DoubleDataValue result = DataValueParsers.ParseDoubleDataValue(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.NotAcquired, result.Type); + } + + [TestMethod] + public void ParseDoubleDataValueNotAskedSymbollWithoutQuotesReturnsNotAskedValueType() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.']; + int len = 5; + + // Act + DoubleDataValue result = DataValueParsers.ParseDoubleDataValue(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.NotAsked, result.Type); + } + + [TestMethod] + public void ParseDoubleDataValueEmptySymbolWithoutQuotesReturnsEmptyValueType() + { + // Arrange + char[] buffer = ['.', '.', '.', '.', '.', '.']; + int len = 6; + + // Act + DoubleDataValue result = DataValueParsers.ParseDoubleDataValue(buffer, len); + + // Assert + Assert.AreEqual(DataValueType.Empty, result.Type); } [TestMethod] diff --git a/Px.Utils/PxFile/Data/DataValueParsers.cs b/Px.Utils/PxFile/Data/DataValueParsers.cs index 6e241017..b5a8e61c 100644 --- a/Px.Utils/PxFile/Data/DataValueParsers.cs +++ b/Px.Utils/PxFile/Data/DataValueParsers.cs @@ -23,27 +23,18 @@ public static class DataValueParsers [MethodImpl(MethodImplOptions.AggressiveInlining)] public static DoubleDataValue FastParseDoubleDataValueDangerous(char[] buffer, int len) { - // All special/missing values are encoded either as strings in the format "..." and "-" or ... and - - // The length of the string (number of dots) is used to determine the type of missing value. - if (buffer[0] < '0') + if(IsNumber(buffer, len)) { - if (buffer[0] == '"') - { - if (buffer[1] == '-') return new DoubleDataValue(0, DataValueType.Nill); - return new DoubleDataValue(0, (DataValueType)(len - stringDelimiterOffset)); - } - if (buffer[0] == '-') - { - if (len == 1) return new DoubleDataValue(0, DataValueType.Nill); - return new(FastParseDoubleDangerous(buffer, len), DataValueType.Exists); - } - return new DoubleDataValue(0, (DataValueType)len); + return new DoubleDataValue(FastParseDoubleDangerous(buffer, len), DataValueType.Exists); } - else + + if (buffer[0] == '"') { - double value = FastParseDoubleDangerous(buffer, len); - return new DoubleDataValue(value, DataValueType.Exists); + if (buffer[1] == '-') return new DoubleDataValue(0, DataValueType.Nill); + return new DoubleDataValue(0, (DataValueType)(len - stringDelimiterOffset)); } + if (buffer[0] == '-') return new DoubleDataValue(0, DataValueType.Nill); + return new DoubleDataValue(0, (DataValueType)(len)); } /// @@ -57,27 +48,18 @@ public static DoubleDataValue FastParseDoubleDataValueDangerous(char[] buffer, i [MethodImpl(MethodImplOptions.AggressiveInlining)] public static DecimalDataValue FastParseDecimalDataValueDangerous(char[] buffer, int len) { - // All special/missing values are encoded as strings in the format "..." or "-". - // The length of the string (number of dots) is used to determine the type of missing value. - if (buffer[0] < '0') + if (IsNumber(buffer, len)) { - if (buffer[0] == '"') - { - if (buffer[1] == '-') return new DecimalDataValue(0, DataValueType.Nill); - return new DecimalDataValue(0, (DataValueType)(len - stringDelimiterOffset)); - } - if (buffer[0] == '-') - { - if (len == 1) return new DecimalDataValue(0, DataValueType.Nill); - return new(FastParseDecimalDangerous(buffer, len), DataValueType.Exists); - } - return new DecimalDataValue(0, (DataValueType)len); + return new DecimalDataValue(FastParseDecimalDangerous(buffer, len), DataValueType.Exists); } - else + + if (buffer[0] == '"') { - decimal value = FastParseDecimalDangerous(buffer, len); - return new (value, DataValueType.Exists); + if (buffer[1] == '-') return new DecimalDataValue(0, DataValueType.Nill); + return new DecimalDataValue(0, (DataValueType)(len - stringDelimiterOffset)); } + if (buffer[0] == '-') return new DecimalDataValue(0, DataValueType.Nill); + return new DecimalDataValue(0, (DataValueType)(len)); } /// @@ -102,24 +84,18 @@ public static DecimalDataValue FastParseDecimalDataValueDangerous(char[] buffer, [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double FastParseUnsafeDoubleDangerous(char[] buffer, int len, double[] missingValueEncodings) { - if (buffer[0] < '0') + if (IsNumber(buffer, len)) { - if (buffer[0] == '"') - { - if (buffer[1] == '-') return missingValueEncodings[0]; - return missingValueEncodings[len - stringDelimiterOffset]; - } - else if (buffer[0] == '-') - { - if (len == 1) return missingValueEncodings[0]; - return FastParseDoubleDangerous(buffer, len); - } - return missingValueEncodings[len]; + return FastParseDoubleDangerous(buffer, len); } - else + + if (buffer[0] == '"') { - return FastParseDoubleDangerous(buffer, len); + if (buffer[1] == '-') return missingValueEncodings[0]; + return missingValueEncodings[len - stringDelimiterOffset]; } + if (buffer[0] == '-') return missingValueEncodings[0]; + return missingValueEncodings[len]; } /// @@ -138,22 +114,12 @@ public static DoubleDataValue ParseDoubleDataValue(char[] buffer, int len) } else { - if (buffer[0] != '"' || buffer[len - 1] != '"' || len > missingDataEntryMaxLength) - { - throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); - } - - if (buffer[1] == '-' || buffer[0] == '-') return new DoubleDataValue(0.0, DataValueType.Nill); - - int dots = 0; - int offset = buffer[0] == '"' ? stringDelimiterOffset : 0; - while (dots < len - offset) + if (buffer[0] == '"') { - if (buffer[dots + 1] == '.') dots++; - else throw new ArgumentException($"Invalid symbol found when parsing data values {new string(buffer, 0, len)}"); + return new(0, ParseEnclosedMissingDataType(buffer, len)); } - return new DoubleDataValue(double.NaN, (DataValueType)dots); + return new(0, ParseUnenclosedMissingDataType(buffer, len)); } } @@ -175,10 +141,10 @@ public static DecimalDataValue ParseDecimalDataValue(char[] buffer, int len) { if (buffer[0] == '"') { - return ParseEnclosedDecimalDataValue(buffer, len); + return new(decimal.Zero, ParseEnclosedMissingDataType(buffer, len)); } - return ParseUnenclosedDecimalDataValue(buffer, len); + return new(decimal.Zero, ParseUnenclosedMissingDataType(buffer, len)); } } @@ -220,7 +186,7 @@ public static double ParseUnsafeDouble(char[] buffer, int len, double[] missingV } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static DecimalDataValue ParseEnclosedDecimalDataValue(char[] buffer, int len) + private static DataValueType ParseEnclosedMissingDataType(char[] buffer, int len) { if (buffer[len - 1] != '"' || len < missingDataEntryMinLength || len > missingDataEntryMaxLength) { @@ -229,23 +195,21 @@ private static DecimalDataValue ParseEnclosedDecimalDataValue(char[] buffer, int if (buffer[1] == '-') { - return new DecimalDataValue(decimal.Zero, DataValueType.Nill); + return DataValueType.Nill; } - int dots = CountDots(buffer, 1, len - stringDelimiterOffset); - return new DecimalDataValue(decimal.Zero, (DataValueType)dots); + return (DataValueType)CountDots(buffer, 1, len - stringDelimiterOffset); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static DecimalDataValue ParseUnenclosedDecimalDataValue(char[] buffer, int len) + private static DataValueType ParseUnenclosedMissingDataType(char[] buffer, int len) { if (buffer[0] == '-' && len == 1) { - return new DecimalDataValue(decimal.Zero, DataValueType.Nill); + return DataValueType.Nill; } - int dots = CountDots(buffer, 0, len); - return new DecimalDataValue(decimal.Zero, (DataValueType)dots); + return (DataValueType)CountDots(buffer, 0, len); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -372,5 +336,17 @@ private static decimal FastParseDecimalDangerous(char[] buffer, int len) if (buffer[0] == '-') return -n / decimalPowersOf10[len - decimalPosition]; else return n / decimalPowersOf10[len - decimalPosition]; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsNumber(char[] buffer, int len) + { + if (buffer[0] <= '.') // Characters that can start a valid missing value code are ", - or . which are are "smaller or equal to" . + { + if (buffer[0] == '-') return len > 1; // Dodge negative numbers + return false; + } + + return true; + } } } diff --git a/Px.Utils/Validation/DataValidation/DataValidator.cs b/Px.Utils/Validation/DataValidation/DataValidator.cs index 843cb436..03fa973f 100644 --- a/Px.Utils/Validation/DataValidation/DataValidator.cs +++ b/Px.Utils/Validation/DataValidation/DataValidator.cs @@ -249,20 +249,13 @@ private void HandleNonSeparatorType(ref ValidationFeedback validationFeedbacks) [MethodImpl(MethodImplOptions.AggressiveInlining)] private bool CurrentEntryIsNumber() { - if (_currentEntry[0] < '0') + if (_currentEntry[0] <= '.') // Characters that can start a valid missing value code are ", - or . which are are "smaller or equal to" . { - if (_currentEntry[0] == '"') return false; - else if (_currentEntry[0] == '-') - { - if (_currentEntry.Count == 1) return false; - else return true; - } + if (_currentEntry[0] == '-') return _currentEntry.Count > 1; // Dodge negative numbers return false; } - else - { - return true; - } + + return true; } private void ResetValidator() From 75a215568d08f3ddf4f1d80ee473ce4030c6d59d Mon Sep 17 00:00:00 2001 From: Sakari Malkki Date: Fri, 27 Dec 2024 14:57:46 +0200 Subject: [PATCH 07/13] Bump patch version number --- Px.Utils/Px.Utils.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Px.Utils/Px.Utils.csproj b/Px.Utils/Px.Utils.csproj index 6de27c2a..e1af1cc8 100644 --- a/Px.Utils/Px.Utils.csproj +++ b/Px.Utils/Px.Utils.csproj @@ -2,7 +2,7 @@ Px.Utils - 1.1.1 + 1.1.2 net8.0 enable enable From 610c7ab1b685191a6dbcdf6ba9f0f0769a47a10f Mon Sep 17 00:00:00 2001 From: Jose Saarimaa Date: Wed, 8 Jan 2025 12:56:04 +0200 Subject: [PATCH 08/13] When checking if a char buffer contains a number, we can use the fact that all valid numbers end with a number char and none of the missing value codes do. --- Px.Utils/PxFile/Data/DataValueParsers.cs | 28 ++++++------------- .../DataValidation/DataValidator.cs | 17 ++--------- 2 files changed, 11 insertions(+), 34 deletions(-) diff --git a/Px.Utils/PxFile/Data/DataValueParsers.cs b/Px.Utils/PxFile/Data/DataValueParsers.cs index b5a8e61c..3ffdcfd9 100644 --- a/Px.Utils/PxFile/Data/DataValueParsers.cs +++ b/Px.Utils/PxFile/Data/DataValueParsers.cs @@ -23,7 +23,7 @@ public static class DataValueParsers [MethodImpl(MethodImplOptions.AggressiveInlining)] public static DoubleDataValue FastParseDoubleDataValueDangerous(char[] buffer, int len) { - if(IsNumber(buffer, len)) + if (buffer[len - 1] > '.') // When the last char is a number, the value is a number { return new DoubleDataValue(FastParseDoubleDangerous(buffer, len), DataValueType.Exists); } @@ -48,7 +48,8 @@ public static DoubleDataValue FastParseDoubleDataValueDangerous(char[] buffer, i [MethodImpl(MethodImplOptions.AggressiveInlining)] public static DecimalDataValue FastParseDecimalDataValueDangerous(char[] buffer, int len) { - if (IsNumber(buffer, len)) + if (buffer[len - 1] > '.') // When the last char is a number, the value is a number + { return new DecimalDataValue(FastParseDecimalDangerous(buffer, len), DataValueType.Exists); } @@ -84,7 +85,8 @@ public static DecimalDataValue FastParseDecimalDataValueDangerous(char[] buffer, [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double FastParseUnsafeDoubleDangerous(char[] buffer, int len, double[] missingValueEncodings) { - if (IsNumber(buffer, len)) + if (buffer[len - 1] > '.') // When the last char is a number, the value is a number + { return FastParseDoubleDangerous(buffer, len); } @@ -178,10 +180,10 @@ public static double ParseUnsafeDouble(char[] buffer, int len, double[] missingV { if (buffer[0] == '"') { - return ParseEnclosedUnsafeDouble(buffer, len, missingValueEncodings); + return EncodeMissingEnclosedUnsafeDouble(buffer, len, missingValueEncodings); } - return ParseUnenclosedUnsafeDouble(buffer, len, missingValueEncodings); + return EncodeMissingUnenclosedUnsafeDouble(buffer, len, missingValueEncodings); } } @@ -213,7 +215,7 @@ private static DataValueType ParseUnenclosedMissingDataType(char[] buffer, int l } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static double ParseEnclosedUnsafeDouble(char[] buffer, int len, double[] missingValueEncodings) + private static double EncodeMissingEnclosedUnsafeDouble(char[] buffer, int len, double[] missingValueEncodings) { if (buffer[len - 1] != '"' || len < missingDataEntryMinLength || len > missingDataEntryMaxLength) { @@ -230,7 +232,7 @@ private static double ParseEnclosedUnsafeDouble(char[] buffer, int len, double[] } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static double ParseUnenclosedUnsafeDouble(char[] buffer, int len, double[] missingValueEncodings) + private static double EncodeMissingUnenclosedUnsafeDouble(char[] buffer, int len, double[] missingValueEncodings) { if (buffer[0] == '-' && len == 1) { @@ -336,17 +338,5 @@ private static decimal FastParseDecimalDangerous(char[] buffer, int len) if (buffer[0] == '-') return -n / decimalPowersOf10[len - decimalPosition]; else return n / decimalPowersOf10[len - decimalPosition]; } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool IsNumber(char[] buffer, int len) - { - if (buffer[0] <= '.') // Characters that can start a valid missing value code are ", - or . which are are "smaller or equal to" . - { - if (buffer[0] == '-') return len > 1; // Dodge negative numbers - return false; - } - - return true; - } } } diff --git a/Px.Utils/Validation/DataValidation/DataValidator.cs b/Px.Utils/Validation/DataValidation/DataValidator.cs index 03fa973f..6025418e 100644 --- a/Px.Utils/Validation/DataValidation/DataValidator.cs +++ b/Px.Utils/Validation/DataValidation/DataValidator.cs @@ -24,7 +24,6 @@ public class DataValidator(int rowLen, int numOfRows, int startRow, PxFileConfig private readonly List _dataSeparatorValidators = []; private EntryType _currentEntryType = EntryType.Unknown; - private byte _stringDelimeter; private List _currentEntry = []; private int _lineNumber = 1; private int _charPosition; @@ -138,7 +137,6 @@ private ValidationFeedback ValidateDataStream(Stream stream, CancellationToken? { ValidationFeedback validationFeedbacks = []; byte endOfData = (byte)_conf.Symbols.EntrySeparator; - _stringDelimeter = (byte)_conf.Symbols.Value.StringDelimeter; _currentEntry = new(_streamBufferSize); byte[] buffer = new byte[_streamBufferSize]; int bytesRead = 0; @@ -198,10 +196,11 @@ private void HandleEntryTypeChange(ref ValidationFeedback validationFeedbacks) } else { + // OBS: All valid numbers end with number char (_currentEntry[^1] > '.') List validators = _currentEntryType switch { EntryType.DataItemSeparator => _dataSeparatorValidators, - EntryType.DataItem => CurrentEntryIsNumber() ? _dataNumValidators : _dataStringValidators, + EntryType.DataItem => _currentEntry[^1] > '.' ? _dataNumValidators : _dataStringValidators, _ => _commonValidators }; @@ -246,18 +245,6 @@ private void HandleNonSeparatorType(ref ValidationFeedback validationFeedbacks) } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool CurrentEntryIsNumber() - { - if (_currentEntry[0] <= '.') // Characters that can start a valid missing value code are ", - or . which are are "smaller or equal to" . - { - if (_currentEntry[0] == '-') return _currentEntry.Count > 1; // Dodge negative numbers - return false; - } - - return true; - } - private void ResetValidator() { _commonValidators.Clear(); From 298732258a55f9d56ccdb3b5e803992bc60626c4 Mon Sep 17 00:00:00 2001 From: Sakari Malkki Date: Wed, 15 Jan 2025 11:59:33 +0200 Subject: [PATCH 09/13] Fix typo --- .../DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Px.Utils.UnitTests/PxFileTests/DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs b/Px.Utils.UnitTests/PxFileTests/DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs index 81c57aec..c41dabe7 100644 --- a/Px.Utils.UnitTests/PxFileTests/DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs +++ b/Px.Utils.UnitTests/PxFileTests/DataTests/PxFileStreamDataReaderTests/DataReaderTests.cs @@ -113,7 +113,7 @@ public void ReadDoubleDataValuesValidIntegersAndMissingReturnsCorrectDoubleDataV } [TestMethod] - public void ReadDoubleDataValuesValidIntegersAndUnenclosedMissingRetuurnsCorrectDoubleDataValues() + public void ReadDoubleDataValuesValidIntegersAndUnenclosedMissingReturnsCorrectDoubleDataValues() { // Arrange From 84541c6895d7c2747f28dcce89da708422963e11 Mon Sep 17 00:00:00 2001 From: Sakari Malkki Date: Wed, 15 Jan 2025 12:46:43 +0200 Subject: [PATCH 10/13] Converts test fixtures to raw string literals for better readability --- .../Fixtures/DataReaderFixtures.cs | 130 ++++++++++-------- 1 file changed, 70 insertions(+), 60 deletions(-) diff --git a/Px.Utils.UnitTests/PxFileTests/Fixtures/DataReaderFixtures.cs b/Px.Utils.UnitTests/PxFileTests/Fixtures/DataReaderFixtures.cs index 337267a2..d388e661 100644 --- a/Px.Utils.UnitTests/PxFileTests/Fixtures/DataReaderFixtures.cs +++ b/Px.Utils.UnitTests/PxFileTests/Fixtures/DataReaderFixtures.cs @@ -2,69 +2,79 @@ { internal static class DataReaderFixtures { - internal static string MINIMAL_UTF8_20DATAVALUES => - "CHARSET=\"Unicode\";\n" + - "AXIS-VERSION=\"2013\";\n" + - "CODEPAGE=\"utf-8\";\n" + - "LANGUAGES=\"aa\",\"åå\",\"öö\";\n" + - "NEXT-UPDATE=\"20240131 08:00\";\n" + - "SUBJECT-AREA=\"test\";\n" + - "SUBJECT-AREA[åå]=\"test\";\n" + - "COPYRIGHT=YES;\n" + - "DATA=\n" + - "0 1 2 3 4 5 6 7 8 9 \n" + - "10 11 12 13 14 15 16 17 18 19;"; + internal static string MINIMAL_UTF8_20DATAVALUES = + """ + CHARSET=""Unicode""; + AXIS-VERSION=""2013""; + CODEPAGE=""utf-8""; + LANGUAGES=""aa"",""åå"",""öö""; + NEXT-UPDATE=""20240131 08:00""; + SUBJECT-AREA=""test""; + SUBJECT-AREA[åå]=""test""; + COPYRIGHT=YES; + DATA= + 0 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 19; + """; - internal static string MINIMAL_UTF8_20DATAVALUES_WITH_MISSING => - "CHARSET=\"Unicode\";\n" + - "AXIS-VERSION=\"2013\";\n" + - "CODEPAGE=\"utf-8\";\n" + - "LANGUAGES=\"aa\",\"åå\",\"öö\";\n" + - "NEXT-UPDATE=\"20240131 08:00\";\n" + - "SUBJECT-AREA=\"test\";\n" + - "SUBJECT-AREA[åå]=\"test\";\n" + - "COPYRIGHT=YES;\n" + - "DATA=\n" + - "\".\" 1 \".\" 3 \".\" 5 \".\" 7 \".\" 9 \n" + - "\"...\" 11 \"...\" 13 \"...\" 15 \"...\" 17 \"...\" 19;"; + internal static string MINIMAL_UTF8_20DATAVALUES_WITH_MISSING = + """ + CHARSET=""Unicode""; + AXIS-VERSION=""2013""; + CODEPAGE=""utf-8""; + LANGUAGES=""aa"",""åå"",""öö""; + NEXT-UPDATE=""20240131 08:00""; + SUBJECT-AREA=""test""; + SUBJECT-AREA[åå]=""test""; + COPYRIGHT=YES; + DATA= + "." 1 "." 3 "." 5 "." 7 "." 9 + "..." 11 "..." 13 "..." 15 "..." 17 "..." 19; + """; - internal static string MINIMAL_UTF8_20DATAVALUES_WITH_UNENCLOSED_MISSING => - "CHARSET=\"Unicode\";\n" + - "AXIS-VERSION=\"2013\";\n" + - "CODEPAGE=\"utf-8\";\n" + - "LANGUAGES=\"aa\",\"åå\",\"öö\";\n" + - "NEXT-UPDATE=\"20240131 08:00\";\n" + - "SUBJECT-AREA=\"test\";\n" + - "SUBJECT-AREA[åå]=\"test\";\n" + - "COPYRIGHT=YES;\n" + - "DATA=\n" + - "\".\" 1 \".\" 3 \".\" 5 \".\" 7 \".\" 9 \n" + - "... 11 ... 13 ... 15 ... 17 ... 19;"; + internal static string MINIMAL_UTF8_20DATAVALUES_WITH_UNENCLOSED_MISSING = + """ + CHARSET=""Unicode""; + AXIS-VERSION=""2013""; + CODEPAGE=""utf-8""; + LANGUAGES=""aa"",""åå"",""öö""; + NEXT-UPDATE=""20240131 08:00""; + SUBJECT-AREA=""test""; + SUBJECT-AREA[åå]=""test""; + COPYRIGHT=YES; + DATA= + "." 1 "." 3 "." 5 "." 7 "." 9 + ... 11 ... 13 ... 15 ... 17 ... 19; + """; - internal static string MINIMAL_UTF8_20DECIMALVALUES => - "CHARSET=\"Unicode\";\n" + - "AXIS-VERSION=\"2013\";\n" + - "CODEPAGE=\"utf-8\";\n" + - "LANGUAGES=\"aa\",\"åå\",\"öö\";\n" + - "NEXT-UPDATE=\"20240131 08:00\";\n" + - "SUBJECT-AREA=\"test\";\n" + - "SUBJECT-AREA[åå]=\"test\";\n" + - "COPYRIGHT=YES;\n" + - "DATA=\n" + - "0.0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 \n" + - "0.1 0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19; "; // observe the trailing space + internal static string MINIMAL_UTF8_20DECIMALVALUES = + """ + CHARSET=""Unicode""; + AXIS-VERSION=""2013""; + CODEPAGE=""utf-8""; + LANGUAGES=""aa"",""åå"",""öö""; + NEXT-UPDATE=""20240131 08:00""; + SUBJECT-AREA=""test""; + SUBJECT-AREA[åå]=""test""; + COPYRIGHT=YES; + DATA= + 0.0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 + 0.1 0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19; + """; // observe the trailing space - internal static string MINIMAL_UTF8_20ROWS => - "CHARSET=\"Unicode\";\n" + - "AXIS-VERSION=\"2013\";\n" + - "CODEPAGE=\"utf-8\";\n" + - "LANGUAGES=\"aa\",\"åå\",\"öö\";\n" + - "NEXT-UPDATE=\"20240131 08:00\";\n" + - "SUBJECT-AREA=\"test\";\n" + - "SUBJECT-AREA[åå]=\"test\";\n" + - "COPYRIGHT=YES;\n" + - "DATA=\n" + - "0.0 \n0.01 \n0.02 \n0.03 \n0.04 \n0.05 \n0.06 \n0.07 \n0.08 \n0.09 \n" + - "0.1 \n0.11 \n0.12 \n0.13 \n0.14 \n0.15 \n0.16 \n0.17 \n0.18 \n0.19;"; + internal static string MINIMAL_UTF8_20ROWS = + """ + CHARSET=""Unicode""; + AXIS-VERSION=""2013""; + CODEPAGE=""utf-8""; + LANGUAGES=""aa"",""åå"",""öö""; + NEXT-UPDATE=""20240131 08:00""; + SUBJECT-AREA=""test""; + SUBJECT-AREA[åå]=""test""; + COPYRIGHT=YES; + DATA= + 0.0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 + 0.1 0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19; + """; // observe the trailing space } } From 6a82f548c34beecabc82d38f5c6f366bad3619c2 Mon Sep 17 00:00:00 2001 From: jsaarimaa Date: Fri, 31 Jan 2025 15:18:48 +0200 Subject: [PATCH 11/13] Added missing constructor parameter for content dimension values (#57) --- .../MatrixMetadataBuilderTests.cs | 26 +++++++++++++++++++ .../Dimensions/ContentDimensionValue.cs | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Px.Utils.UnitTests/ModelBuilderTests/MatrixMetadataBuilderTests.cs b/Px.Utils.UnitTests/ModelBuilderTests/MatrixMetadataBuilderTests.cs index ad69a0f5..54a9772f 100644 --- a/Px.Utils.UnitTests/ModelBuilderTests/MatrixMetadataBuilderTests.cs +++ b/Px.Utils.UnitTests/ModelBuilderTests/MatrixMetadataBuilderTests.cs @@ -374,6 +374,32 @@ public void MultiLangPrecisionTestFromRecommendedFixture(int index, int expected Assert.IsFalse(contentDimension?.Values[index].AdditionalProperties.ContainsKey("PRECISION")); } + [TestMethod] + public void MultiLangContentDimensionAdditionalPropertiesTest() + { + ContentDimension? contentDimension = (ContentDimension?)Actual_3Lang.Dimensions.Find(d => d.Type == DimensionType.Content); + Assert.IsNotNull(contentDimension); + Assert.AreEqual(3, contentDimension.Values.Count); + foreach (ContentDimensionValue value in contentDimension.Values) + { + Assert.IsTrue(value.AdditionalProperties.ContainsKey("VALUENOTE")); + Assert.IsInstanceOfType(value.AdditionalProperties["VALUENOTE"]); + } + } + + [TestMethod] + public void SingleLangContentDimensionAdditionalPropertiesTest() + { + ContentDimension? contentDimension = (ContentDimension?)Actual_1Lang.Dimensions.Find(d => d.Type == DimensionType.Content); + Assert.IsNotNull(contentDimension); + Assert.AreEqual(3, contentDimension.Values.Count); + foreach (ContentDimensionValue value in contentDimension.Values) + { + Assert.IsTrue(value.AdditionalProperties.ContainsKey("VALUENOTE")); + Assert.IsInstanceOfType(value.AdditionalProperties["VALUENOTE"]); + } + } + #endregion #region Time Dimension Tests diff --git a/Px.Utils/Models/Metadata/Dimensions/ContentDimensionValue.cs b/Px.Utils/Models/Metadata/Dimensions/ContentDimensionValue.cs index 25c5dda4..252357c3 100644 --- a/Px.Utils/Models/Metadata/Dimensions/ContentDimensionValue.cs +++ b/Px.Utils/Models/Metadata/Dimensions/ContentDimensionValue.cs @@ -104,7 +104,7 @@ public ContentDimensionValue( MultilanguageString unit, DateTime lastUpdated, int precision) - : base(dimensionValue.Code, dimensionValue.Name, dimensionValue.IsVirtual) + : base(dimensionValue.Code, dimensionValue.Name, dimensionValue.IsVirtual, dimensionValue.AdditionalProperties) { Unit = unit; LastUpdated = lastUpdated; From b41c67d81c9dd50235c6df035c75cdcaa30383a1 Mon Sep 17 00:00:00 2001 From: jsaarimaa Date: Fri, 31 Jan 2025 15:19:10 +0200 Subject: [PATCH 12/13] Added support for missing values without quotes to the README file (#58) --- docs/PXFILE_SPECIFICATION.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/PXFILE_SPECIFICATION.md b/docs/PXFILE_SPECIFICATION.md index 989522df..ac5ecb0e 100644 --- a/docs/PXFILE_SPECIFICATION.md +++ b/docs/PXFILE_SPECIFICATION.md @@ -111,15 +111,15 @@ Whitespace characters are not significant in the value outside of ```"``` separa Some keywords have special value types. These are described in the following sections. #### DATA -- The value of the DATA entry is a list of values that can be either string or number. +- The value of the DATA entry is a list of values that can be either a missing value code or a number. - The list items are separated by a space (ASCII hex ```20```) or tab (ASCII hex ```09```). - The data list can be split into multiple lines - Each line must end with a space or tab character (ASCII hex ```20```) or (ASCII hex ```09```). - Only one kind of separator can be used in the same list. - The number values are subject to the same rules as the number value type. -- The strings have a limited set of contents that are allowed: one to six dots ```.``` that mark missing values or hyphen ```-``` that marks exact zero. +- The missing values have a limited set of contents that are allowed: one to six dots ```.``` that mark missing values or hyphen ```-``` that marks exact zero. - List of allowed strings: ```.```, ```..```, ```...```, ```....```, ```.....```, ```......```, ```-```. -- The strings must be marked by ```"``` characters. +- The missing values are not handled as normal strings, so they can be marked by ```"``` characters but it is not mandatory. ##### TIMEVAL - Timeval has two value types unique to the entry: From d4f1da1bb8843deed1014cc0da29ec31b2269a3e Mon Sep 17 00:00:00 2001 From: jsaarimaa Date: Mon, 3 Feb 2025 09:59:38 +0200 Subject: [PATCH 13/13] Updated version number to 1.1.3 (#59) --- Px.Utils/Px.Utils.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Px.Utils/Px.Utils.csproj b/Px.Utils/Px.Utils.csproj index e1af1cc8..5beae46a 100644 --- a/Px.Utils/Px.Utils.csproj +++ b/Px.Utils/Px.Utils.csproj @@ -2,7 +2,7 @@ Px.Utils - 1.1.2 + 1.1.3 net8.0 enable enable