Skip to content

Commit c13b305

Browse files
committed
Add serialization and deserialization classes and methods with pydantic
1 parent 407079a commit c13b305

File tree

8 files changed

+337
-641
lines changed

8 files changed

+337
-641
lines changed

src/guidellm/core/distribution.py

Lines changed: 31 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,202 +1,159 @@
1-
from typing import List, Union
1+
from typing import List, Optional, Union
22

33
import numpy as np
44
from loguru import logger
55

6+
from guidellm.core.serializable import Serializable
7+
68
__all__ = ["Distribution"]
79

810

9-
class Distribution:
11+
class Distribution(Serializable):
1012
"""
11-
A class to represent a statistical distribution and perform various statistical
12-
analyses.
13-
14-
:param data: List of numerical data points (int or float) to initialize the
15-
distribution.
16-
:type data: List[Union[int, float]], optional
13+
A class to represent a statistical distribution and perform various
14+
statistical analyses.
1715
"""
1816

19-
def __init__(self, data: List[Union[int, float]] = None):
20-
"""
21-
Initialize the Distribution with optional data.
17+
data: Optional[List[Union[int, float]]] = []
2218

23-
:param data: List of numerical data points to initialize the distribution,
24-
defaults to None.
25-
:type data: List[Union[int, float]], optional
26-
"""
27-
self._data = list(data) if data else []
28-
logger.debug(f"Initialized Distribution with data: {self._data}")
19+
def __init__(self, **data):
20+
super().__init__(**data)
21+
logger.debug(f"Initialized Distribution with data: {self.data}")
2922

3023
def __str__(self) -> str:
3124
"""
3225
Return a string representation of the Distribution.
33-
34-
:return: String representation of the Distribution.
35-
:rtype: str
3626
"""
3727
return (
3828
f"Distribution(mean={self.mean:.2f}, median={self.median:.2f}, "
39-
f"min={self.min}, max={self.max}, count={len(self._data)})"
29+
f"min={self.min}, max={self.max}, count={len(self.data)})"
4030
)
4131

4232
def __repr__(self) -> str:
4333
"""
4434
Return an unambiguous string representation of the Distribution for debugging.
45-
46-
:return: Unambiguous string representation of the Distribution.
47-
:rtype: str
4835
"""
49-
return f"Distribution(data={self._data})"
50-
51-
@property
52-
def data(self) -> List[Union[int, float]]:
53-
"""
54-
Return the data points of the distribution.
55-
56-
:return: The data points of the distribution.
57-
:rtype: List[Union[int, float]]
58-
"""
59-
return self._data
36+
return f"Distribution(data={self.data})"
6037

6138
@property
6239
def mean(self) -> float:
6340
"""
6441
Calculate and return the mean of the distribution.
65-
6642
:return: The mean of the distribution.
67-
:rtype: float
6843
"""
69-
if not self._data:
44+
if not self.data:
7045
logger.warning("No data points available to calculate mean.")
7146
return 0.0
7247

73-
mean_value = np.mean(self._data).item()
48+
mean_value = np.mean(self.data).item()
7449
logger.debug(f"Calculated mean: {mean_value}")
7550
return mean_value
7651

7752
@property
7853
def median(self) -> float:
7954
"""
8055
Calculate and return the median of the distribution.
81-
8256
:return: The median of the distribution.
83-
:rtype: float
8457
"""
85-
if not self._data:
58+
if not self.data:
8659
logger.warning("No data points available to calculate median.")
8760
return 0.0
8861

89-
median_value = np.median(self._data).item()
62+
median_value = np.median(self.data).item()
9063
logger.debug(f"Calculated median: {median_value}")
9164
return median_value
9265

9366
@property
9467
def variance(self) -> float:
9568
"""
9669
Calculate and return the variance of the distribution.
97-
9870
:return: The variance of the distribution.
99-
:rtype: float
10071
"""
101-
if not self._data:
72+
if not self.data:
10273
logger.warning("No data points available to calculate variance.")
10374
return 0.0
10475

105-
variance_value = np.var(self._data).item()
76+
variance_value = np.var(self.data).item()
10677
logger.debug(f"Calculated variance: {variance_value}")
10778
return variance_value
10879

10980
@property
11081
def std_deviation(self) -> float:
11182
"""
11283
Calculate and return the standard deviation of the distribution.
113-
11484
:return: The standard deviation of the distribution.
115-
:rtype: float
11685
"""
117-
if not self._data:
86+
if not self.data:
11887
logger.warning("No data points available to calculate standard deviation.")
11988
return 0.0
12089

121-
std_deviation_value = np.std(self._data).item()
90+
std_deviation_value = np.std(self.data).item()
12291
logger.debug(f"Calculated standard deviation: {std_deviation_value}")
12392
return std_deviation_value
12493

12594
def percentile(self, percentile: float) -> float:
12695
"""
12796
Calculate and return the specified percentile of the distribution.
128-
12997
:param percentile: The desired percentile to calculate (0-100).
130-
:type percentile: float
13198
:return: The specified percentile of the distribution.
132-
:rtype: float
13399
"""
134-
if not self._data:
100+
if not self.data:
135101
logger.warning("No data points available to calculate percentile.")
136102
return 0.0
137103

138-
percentile_value = np.percentile(self._data, percentile)
104+
percentile_value = np.percentile(self.data, percentile)
139105
logger.debug(f"Calculated {percentile}th percentile: {percentile_value}")
140106
return percentile_value
141107

142108
def percentiles(self, percentiles: List[float]) -> List[float]:
143109
"""
144110
Calculate and return the specified percentiles of the distribution.
145-
146111
:param percentiles: A list of desired percentiles to calculate (0-100).
147-
:type percentiles: List[float]
148112
:return: A list of the specified percentiles of the distribution.
149-
:rtype: List[float]
150113
"""
151-
if not self._data:
114+
if not self.data:
152115
logger.warning("No data points available to calculate percentiles.")
153116
return [0.0] * len(percentiles)
154117

155-
percentiles_values = np.percentile(self._data, percentiles).tolist()
118+
percentiles_values = np.percentile(self.data, percentiles).tolist()
156119
logger.debug(f"Calculated percentiles {percentiles}: {percentiles_values}")
157120
return percentiles_values
158121

159122
@property
160123
def min(self) -> float:
161124
"""
162125
Return the minimum value of the distribution.
163-
164126
:return: The minimum value of the distribution.
165-
:rtype: float
166127
"""
167-
if not self._data:
128+
if not self.data:
168129
logger.warning("No data points available to calculate minimum.")
169130
return 0.0
170131

171-
min_value = np.min(self._data)
132+
min_value = np.min(self.data)
172133
logger.debug(f"Calculated min: {min_value}")
173134
return min_value
174135

175136
@property
176137
def max(self) -> float:
177138
"""
178139
Return the maximum value of the distribution.
179-
180140
:return: The maximum value of the distribution.
181-
:rtype: float
182141
"""
183-
if not self._data:
142+
if not self.data:
184143
logger.warning("No data points available to calculate maximum.")
185144
return 0.0
186145

187-
max_value = np.max(self._data)
146+
max_value = np.max(self.data)
188147
logger.debug(f"Calculated max: {max_value}")
189148
return max_value
190149

191150
@property
192151
def range(self) -> float:
193152
"""
194153
Calculate and return the range of the distribution (max - min).
195-
196154
:return: The range of the distribution.
197-
:rtype: float
198155
"""
199-
if not self._data:
156+
if not self.data:
200157
logger.warning("No data points available to calculate range.")
201158
return 0.0
202159

@@ -207,9 +164,7 @@ def range(self) -> float:
207164
def describe(self) -> dict:
208165
"""
209166
Return a dictionary describing various statistics of the distribution.
210-
211167
:return: A dictionary with statistical summaries of the distribution.
212-
:rtype: dict
213168
"""
214169
description = {
215170
"mean": self.mean,
@@ -230,19 +185,15 @@ def describe(self) -> dict:
230185
def add_data(self, new_data: List[Union[int, float]]):
231186
"""
232187
Add new data points to the distribution.
233-
234188
:param new_data: A list of new numerical data points to add.
235-
:type new_data: List[Union[int, float]]
236189
"""
237-
self._data.extend(new_data)
190+
self.data.extend(new_data)
238191
logger.debug(f"Added new data: {new_data}")
239192

240193
def remove_data(self, remove_data: List[Union[int, float]]):
241194
"""
242195
Remove specified data points from the distribution.
243-
244196
:param remove_data: A list of numerical data points to remove.
245-
:type remove_data: List[Union[int, float]]
246197
"""
247-
self._data = [item for item in self._data if item not in remove_data]
198+
self.data = [item for item in self.data if item not in remove_data]
248199
logger.debug(f"Removed data: {remove_data}")

0 commit comments

Comments
 (0)