1
- from typing import List , Union
1
+ from typing import List , Optional , Union
2
2
3
3
import numpy as np
4
4
from loguru import logger
5
5
6
+ from guidellm .core .serializable import Serializable
7
+
6
8
__all__ = ["Distribution" ]
7
9
8
10
9
- class Distribution :
11
+ class Distribution ( Serializable ) :
10
12
"""
11
- A class to represent a statistical distribution and perform various statistical
12
- analyses.
13
-
14
- :param data: List of numerical data points (int or float) to initialize the
15
- distribution.
16
- :type data: List[Union[int, float]], optional
13
+ A class to represent a statistical distribution and perform various
14
+ statistical analyses.
17
15
"""
18
16
19
- def __init__ (self , data : List [Union [int , float ]] = None ):
20
- """
21
- Initialize the Distribution with optional data.
17
+ data : Optional [List [Union [int , float ]]] = []
22
18
23
- :param data: List of numerical data points to initialize the distribution,
24
- defaults to None.
25
- :type data: List[Union[int, float]], optional
26
- """
27
- self ._data = list (data ) if data else []
28
- logger .debug (f"Initialized Distribution with data: { self ._data } " )
19
+ def __init__ (self , ** data ):
20
+ super ().__init__ (** data )
21
+ logger .debug (f"Initialized Distribution with data: { self .data } " )
29
22
30
23
def __str__ (self ) -> str :
31
24
"""
32
25
Return a string representation of the Distribution.
33
-
34
- :return: String representation of the Distribution.
35
- :rtype: str
36
26
"""
37
27
return (
38
28
f"Distribution(mean={ self .mean :.2f} , median={ self .median :.2f} , "
39
- f"min={ self .min } , max={ self .max } , count={ len (self ._data )} )"
29
+ f"min={ self .min } , max={ self .max } , count={ len (self .data )} )"
40
30
)
41
31
42
32
def __repr__ (self ) -> str :
43
33
"""
44
34
Return an unambiguous string representation of the Distribution for debugging.
45
-
46
- :return: Unambiguous string representation of the Distribution.
47
- :rtype: str
48
35
"""
49
- return f"Distribution(data={ self ._data } )"
50
-
51
- @property
52
- def data (self ) -> List [Union [int , float ]]:
53
- """
54
- Return the data points of the distribution.
55
-
56
- :return: The data points of the distribution.
57
- :rtype: List[Union[int, float]]
58
- """
59
- return self ._data
36
+ return f"Distribution(data={ self .data } )"
60
37
61
38
@property
62
39
def mean (self ) -> float :
63
40
"""
64
41
Calculate and return the mean of the distribution.
65
-
66
42
:return: The mean of the distribution.
67
- :rtype: float
68
43
"""
69
- if not self ._data :
44
+ if not self .data :
70
45
logger .warning ("No data points available to calculate mean." )
71
46
return 0.0
72
47
73
- mean_value = np .mean (self ._data ).item ()
48
+ mean_value = np .mean (self .data ).item ()
74
49
logger .debug (f"Calculated mean: { mean_value } " )
75
50
return mean_value
76
51
77
52
@property
78
53
def median (self ) -> float :
79
54
"""
80
55
Calculate and return the median of the distribution.
81
-
82
56
:return: The median of the distribution.
83
- :rtype: float
84
57
"""
85
- if not self ._data :
58
+ if not self .data :
86
59
logger .warning ("No data points available to calculate median." )
87
60
return 0.0
88
61
89
- median_value = np .median (self ._data ).item ()
62
+ median_value = np .median (self .data ).item ()
90
63
logger .debug (f"Calculated median: { median_value } " )
91
64
return median_value
92
65
93
66
@property
94
67
def variance (self ) -> float :
95
68
"""
96
69
Calculate and return the variance of the distribution.
97
-
98
70
:return: The variance of the distribution.
99
- :rtype: float
100
71
"""
101
- if not self ._data :
72
+ if not self .data :
102
73
logger .warning ("No data points available to calculate variance." )
103
74
return 0.0
104
75
105
- variance_value = np .var (self ._data ).item ()
76
+ variance_value = np .var (self .data ).item ()
106
77
logger .debug (f"Calculated variance: { variance_value } " )
107
78
return variance_value
108
79
109
80
@property
110
81
def std_deviation (self ) -> float :
111
82
"""
112
83
Calculate and return the standard deviation of the distribution.
113
-
114
84
:return: The standard deviation of the distribution.
115
- :rtype: float
116
85
"""
117
- if not self ._data :
86
+ if not self .data :
118
87
logger .warning ("No data points available to calculate standard deviation." )
119
88
return 0.0
120
89
121
- std_deviation_value = np .std (self ._data ).item ()
90
+ std_deviation_value = np .std (self .data ).item ()
122
91
logger .debug (f"Calculated standard deviation: { std_deviation_value } " )
123
92
return std_deviation_value
124
93
125
94
def percentile (self , percentile : float ) -> float :
126
95
"""
127
96
Calculate and return the specified percentile of the distribution.
128
-
129
97
:param percentile: The desired percentile to calculate (0-100).
130
- :type percentile: float
131
98
:return: The specified percentile of the distribution.
132
- :rtype: float
133
99
"""
134
- if not self ._data :
100
+ if not self .data :
135
101
logger .warning ("No data points available to calculate percentile." )
136
102
return 0.0
137
103
138
- percentile_value = np .percentile (self ._data , percentile )
104
+ percentile_value = np .percentile (self .data , percentile )
139
105
logger .debug (f"Calculated { percentile } th percentile: { percentile_value } " )
140
106
return percentile_value
141
107
142
108
def percentiles (self , percentiles : List [float ]) -> List [float ]:
143
109
"""
144
110
Calculate and return the specified percentiles of the distribution.
145
-
146
111
:param percentiles: A list of desired percentiles to calculate (0-100).
147
- :type percentiles: List[float]
148
112
:return: A list of the specified percentiles of the distribution.
149
- :rtype: List[float]
150
113
"""
151
- if not self ._data :
114
+ if not self .data :
152
115
logger .warning ("No data points available to calculate percentiles." )
153
116
return [0.0 ] * len (percentiles )
154
117
155
- percentiles_values = np .percentile (self ._data , percentiles ).tolist ()
118
+ percentiles_values = np .percentile (self .data , percentiles ).tolist ()
156
119
logger .debug (f"Calculated percentiles { percentiles } : { percentiles_values } " )
157
120
return percentiles_values
158
121
159
122
@property
160
123
def min (self ) -> float :
161
124
"""
162
125
Return the minimum value of the distribution.
163
-
164
126
:return: The minimum value of the distribution.
165
- :rtype: float
166
127
"""
167
- if not self ._data :
128
+ if not self .data :
168
129
logger .warning ("No data points available to calculate minimum." )
169
130
return 0.0
170
131
171
- min_value = np .min (self ._data )
132
+ min_value = np .min (self .data )
172
133
logger .debug (f"Calculated min: { min_value } " )
173
134
return min_value
174
135
175
136
@property
176
137
def max (self ) -> float :
177
138
"""
178
139
Return the maximum value of the distribution.
179
-
180
140
:return: The maximum value of the distribution.
181
- :rtype: float
182
141
"""
183
- if not self ._data :
142
+ if not self .data :
184
143
logger .warning ("No data points available to calculate maximum." )
185
144
return 0.0
186
145
187
- max_value = np .max (self ._data )
146
+ max_value = np .max (self .data )
188
147
logger .debug (f"Calculated max: { max_value } " )
189
148
return max_value
190
149
191
150
@property
192
151
def range (self ) -> float :
193
152
"""
194
153
Calculate and return the range of the distribution (max - min).
195
-
196
154
:return: The range of the distribution.
197
- :rtype: float
198
155
"""
199
- if not self ._data :
156
+ if not self .data :
200
157
logger .warning ("No data points available to calculate range." )
201
158
return 0.0
202
159
@@ -207,9 +164,7 @@ def range(self) -> float:
207
164
def describe (self ) -> dict :
208
165
"""
209
166
Return a dictionary describing various statistics of the distribution.
210
-
211
167
:return: A dictionary with statistical summaries of the distribution.
212
- :rtype: dict
213
168
"""
214
169
description = {
215
170
"mean" : self .mean ,
@@ -230,19 +185,15 @@ def describe(self) -> dict:
230
185
def add_data (self , new_data : List [Union [int , float ]]):
231
186
"""
232
187
Add new data points to the distribution.
233
-
234
188
:param new_data: A list of new numerical data points to add.
235
- :type new_data: List[Union[int, float]]
236
189
"""
237
- self ._data .extend (new_data )
190
+ self .data .extend (new_data )
238
191
logger .debug (f"Added new data: { new_data } " )
239
192
240
193
def remove_data (self , remove_data : List [Union [int , float ]]):
241
194
"""
242
195
Remove specified data points from the distribution.
243
-
244
196
:param remove_data: A list of numerical data points to remove.
245
- :type remove_data: List[Union[int, float]]
246
197
"""
247
- self ._data = [item for item in self ._data if item not in remove_data ]
198
+ self .data = [item for item in self .data if item not in remove_data ]
248
199
logger .debug (f"Removed data: { remove_data } " )
0 commit comments