From b504b7371e994ca6e22a7bf2e31185b9b120d096 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Sun, 16 Feb 2025 08:38:35 -0800 Subject: [PATCH 001/180] [RFC][V1] LogitsProcessor interface Signed-off-by: Nick Hill --- vllm/v1/attention/backends/flash_attn.py | 8 +- vllm/v1/attention/backends/mla/common.py | 31 ++- vllm/v1/sample/logits_processor.py | 244 +++++++++++++++++++++++ vllm/v1/sample/metadata.py | 11 +- vllm/v1/sample/ops/penalties.py | 16 -- vllm/v1/sample/sampler.py | 67 +------ vllm/v1/worker/gpu_input_batch.py | 83 ++++---- vllm/v1/worker/gpu_model_runner.py | 45 +++-- vllm/v1/worker/tpu_model_runner.py | 8 +- 9 files changed, 354 insertions(+), 159 deletions(-) create mode 100644 vllm/v1/sample/logits_processor.py diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index b4c7708daab..9c12406676a 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 """Attention layer with FlashAttention.""" +from collections.abc import Sequence from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Optional @@ -279,9 +280,10 @@ class FlashAttentionMetadataBuilder: def __init__(self, runner: "GPUModelRunner"): self.runner = runner - def reorder_batch(self, input_batch: "InputBatch", - scheduler_output: "SchedulerOutput") -> bool: - return False + def reorder_batch( + self, input_batch: "InputBatch", + scheduler_output: "SchedulerOutput") -> Sequence[tuple[int, int]]: + return () def build(self, num_reqs: int, num_actual_tokens: int, max_query_len: int, common_prefix_len: int): diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 8c7179ba0a8..dbd05428970 100644 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -186,6 +186,7 @@ import functools from abc import abstractmethod +from collections.abc import Sequence from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Generic, Optional, TypeVar @@ -377,8 +378,11 @@ def __init__(self, ) self.page_size = self.runner.block_size - def reorder_batch(self, input_batch: "InputBatch", - scheduler_output: "SchedulerOutput") -> bool: + def reorder_batch( + self, + input_batch: "InputBatch", + scheduler_output: "SchedulerOutput", + ) -> Sequence[tuple[int, int]]: # We now want to reorder the batch so that the "decode" requests are and # the front and the "prefill" requests are at the using the least amount # swaps possible. (NOTE for now we loosely use "decode" to mean requests @@ -415,20 +419,25 @@ def reorder_batch(self, input_batch: "InputBatch", # the above loop num_decodes = len(decodes) num_prefills = len(prefills) - first_prefill = 0 - modified_batch = False + swaps = [] for i in range(1, min(num_decodes, num_prefills) + 1): # If the decode is at the "back" of the batch, i, we can swap it # with the prefill closest to the front of the batch - if decodes[num_decodes - i] >= num_decodes: - input_batch.swap_states(prefills[first_prefill], - decodes[num_decodes - i]) - first_prefill += 1 - modified_batch = True - else: + if decodes[num_decodes - i] < num_decodes: break + i1 = prefills[i - 1] + i2 = decodes[num_decodes - i] + input_batch.swap_states(i1, i2) + + # Using "move" operation of LogitsProcessors via temporary slot + # currently. + # TODO possibly add more direct swap operation to LPs + swaps.append((i1, input_batch.max_num_reqs)) + swaps.append((i2, i1)) + swaps.append((input_batch.max_num_reqs, i2)) + # Save for next `build` call # TODO(lucas): this is a bit of a hack, we should probably have a # better way of doing this @@ -437,7 +446,7 @@ def reorder_batch(self, input_batch: "InputBatch", self._num_decode_tokens = num_decode_tokens self._num_prefill_tokens = num_prefill_tokens - return modified_batch + return swaps def _build_decode(self, input_positions: torch.Tensor, block_table: torch.Tensor, seq_lens: torch.Tensor): diff --git a/vllm/v1/sample/logits_processor.py b/vllm/v1/sample/logits_processor.py new file mode 100644 index 00000000000..fd168613649 --- /dev/null +++ b/vllm/v1/sample/logits_processor.py @@ -0,0 +1,244 @@ +# SPDX-License-Identifier: Apache-2.0 +import dataclasses +from abc import ABC, abstractmethod +from collections.abc import Sequence +from typing import Optional + +import torch +from torch._prims_common import DeviceLikeType + +from vllm import SamplingParams + + +@dataclasses.dataclass +class BatchUpdate: + # The current number of requests in the batch. + batch_size: int + # Batch indices of any removed requests. + removed: Sequence[int] = () + # (from, to) batch indices of any requests + # moved within the batch. + moved: Sequence[tuple[int, int]] = () + # (index, params, output_tok_ids) for new + # requests added to the batch. + added: Sequence[tuple[int, SamplingParams, list[int]]] = () + + +class LogitsProcessor(ABC): + + @abstractmethod + def apply(self, logits: torch.Tensor) -> torch.Tensor: + raise NotImplementedError + + @abstractmethod + def update_states( + self, + batch_update: Optional[BatchUpdate] = None, + ) -> None: + """Called when there are new output tokens, prior + to each forward pass. + + Args: + batch_update is non-None iff there have been + changes to the batch makeup. + """ + raise NotImplementedError + + +###### ----- LogitsProcessor impls below here + + +class MinPLogitsProcessor(LogitsProcessor): + + def __init__(self, max_num_reqs: int, pin_memory: bool, + device: DeviceLikeType): + self.min_p_count: int = 0 + + self.min_p_cpu_tensor = torch.zeros((max_num_reqs, ), + dtype=torch.float32, + device="cpu", + pin_memory=pin_memory) + self.min_p_cpu = self.min_p_cpu_tensor.numpy() + # Pre-allocated device tensor + self.min_p_gpu: torch.Tensor = torch.empty((max_num_reqs, ), + dtype=torch.float32, + device=device) + # Current slice of the device tensor + self.min_p: torch.Tensor = self.min_p_gpu[:0] + + def update_states(self, batch_update: Optional[BatchUpdate] = None): + if not batch_update: + return + + needs_update = False + if self.min_p_count: + # Process removed and moved requests. + for index in batch_update.removed: + if self.min_p_cpu[index]: + self.min_p_count -= 1 + needs_update = True + + for from_index, to_index in batch_update.moved: + min_p = self.min_p_cpu[from_index] + self.min_p_cpu[to_index] = min_p + if min_p: + needs_update = True + + # Process added requests. + for index, sampling_params, _ in batch_update.added: + min_p = sampling_params.min_p + self.min_p_cpu[index] = min_p + if min_p: + self.min_p_count += 1 + needs_update = True + + # Update tensors if needed. + size = batch_update.batch_size + if self.min_p_count and (needs_update or self.min_p.shape[0] != size): + + self.min_p = self.min_p_gpu[:size] + self.min_p.copy_(self.min_p_cpu_tensor[:size], non_blocking=True) + self.min_p.unsqueeze_(1) + + def apply(self, logits: torch.Tensor) -> torch.Tensor: + if not self.min_p_count: + return logits + + # Convert logits to probability distribution + probability_values = torch.nn.functional.softmax(logits, dim=-1) + # Calculate maximum probabilities per sequence + max_probabilities = torch.amax(probability_values, + dim=-1, + keepdim=True) + # Adjust min_p + adjusted_min_p = max_probabilities.mul_(self.min_p) + # Identify valid tokens using threshold comparison + invalid_token_mask = probability_values < adjusted_min_p + # Apply mask using boolean indexing + logits[invalid_token_mask] = -float('inf') + return logits + + +class LogitBiasLogitsProcessor(LogitsProcessor): + + def __init__(self, pin_memory: bool, device: torch.device): + self.biases: dict[int, dict[int, float]] = {} + self.device = device + self.pin_memory = pin_memory + + self.bias_tensor: torch.Tensor = torch.tensor(()) + self.logits_slice: tuple[torch.Tensor, torch.Tensor] = (torch.tensor( + ()), torch.tensor(())) + + def update_states(self, batch_update: Optional[BatchUpdate] = None): + if not batch_update: + return + + needs_update = False + if self.biases: + # Process removed and moved requests. + for index in batch_update.removed: + if self.biases.pop(index, None): + needs_update = True + + for from_index, to_index in batch_update.moved: + if entry := self.biases.pop(from_index, None): + self.biases[to_index] = entry + needs_update = True + + # Process added requests. + for index, sampling_params, _ in batch_update.added: + if lb := sampling_params.logit_bias: + self.biases[index] = lb + needs_update = True + + # Update tensors if needed. + if self.biases and needs_update: + reqs, tok_ids, biases = [], [], [] + for req, lb in self.biases.items(): + reqs.extend([req] * len(lb)) + tok_ids.extend(lb.keys()) + biases.extend(lb.values()) + + self.bias_tensor = self._tensor(biases, torch.float32) + self.logits_slice = (self._tensor(reqs, torch.int32), + self._tensor(tok_ids, torch.int32)) + + def _tensor(self, data: list, dtype: torch.dtype) -> torch.Tensor: + return (torch.tensor(data, + device="cpu", + dtype=dtype, + pin_memory=self.pin_memory).to(device=self.device, + non_blocking=True)) + + def apply(self, logits: torch.Tensor) -> torch.Tensor: + if self.biases: + logits[self.logits_slice] += self.bias_tensor + return logits + + +class MinTokensLogitsProcessor(LogitsProcessor): + + def __init__(self, pin_memory: bool, device: torch.device): + # index -> (min_toks, output_token_ids, stop_token_ids) + self.min_toks: dict[int, tuple[int, Sequence[int], set[int]]] = {} + self.device = device + self.pin_memory = pin_memory + + self.logits_slice: tuple[torch.Tensor, torch.Tensor] = (torch.tensor( + ()), torch.tensor(())) + + def update_states(self, batch_update: Optional[BatchUpdate] = None): + needs_update = False + if batch_update: + if self.min_toks: + # Process removed and moved requests. + for index in batch_update.removed: + if self.min_toks.pop(index, None): + needs_update = True + + for from_index, to_index in batch_update.moved: + if entry := self.min_toks.pop(from_index, None): + self.min_toks[to_index] = entry + needs_update = True + + # Process added requests. + for index, sampling_params, output_tok_ids in batch_update.added: + if ((min_tokens := sampling_params.min_tokens) + and len(output_tok_ids) < min_tokens): + self.min_toks[index] = (min_tokens, output_tok_ids, + sampling_params.all_stop_token_ids) + needs_update = True + + if self.min_toks: + # Check for any requests that have attained their min tokens. + to_remove = tuple(index for index, (min_toks, out_tok_ids, + _) in self.min_toks.items() + if len(out_tok_ids) >= min_toks) + if to_remove: + needs_update = True + for index in to_remove: + del self.min_toks[index] + + # Update tensors if needed. + if needs_update and self.min_toks: + reqs: list[int] = [] + tok_ids: list[int] = [] + for req, (_, _, stop_tok_ids) in self.min_toks.items(): + reqs.extend([req] * len(stop_tok_ids)) + tok_ids.extend(stop_tok_ids) + + self.logits_slice = (self._tensor(reqs, torch.int32), + self._tensor(tok_ids, torch.int32)) + + def _tensor(self, data: list, dtype: torch.dtype) -> torch.Tensor: + return (torch.tensor(data, + device="cpu", + dtype=dtype, + pin_memory=self.pin_memory).to(device=self.device, + non_blocking=True)) + + def apply(self, logits: torch.Tensor) -> torch.Tensor: + if self.min_toks: + logits[self.logits_slice] = -float("inf") + return logits diff --git a/vllm/v1/sample/metadata.py b/vllm/v1/sample/metadata.py index e97e1235fb3..e113c3a50c2 100644 --- a/vllm/v1/sample/metadata.py +++ b/vllm/v1/sample/metadata.py @@ -5,6 +5,8 @@ import torch +from vllm.v1.sample.logits_processor import LogitsProcessor + @dataclass class SamplingMetadata: @@ -15,7 +17,6 @@ class SamplingMetadata: top_p: Optional[torch.Tensor] top_k: Optional[torch.Tensor] - min_p: Optional[torch.Tensor] generators: dict[int, torch.Generator] @@ -30,14 +31,12 @@ class SamplingMetadata: output_token_ids: list[list[int]] - # req_index -> (min_tokens, stop_token_ids) - min_tokens: dict[int, tuple[int, set[int]]] - - logit_bias: list[Optional[dict[int, float]]] - # `allowed_token_ids_mask` is a 2D bool tensor of shape (max batch size, # vocab size). allowed_token_ids_mask: Optional[torch.Tensor] # req_index -> bad_words_token_ids bad_words_token_ids: dict[int, list[list[int]]] + + logits_procs: list[LogitsProcessor] + nongreedy_logits_procs: list[LogitsProcessor] diff --git a/vllm/v1/sample/ops/penalties.py b/vllm/v1/sample/ops/penalties.py index ed05e3f4840..4d95bc28200 100644 --- a/vllm/v1/sample/ops/penalties.py +++ b/vllm/v1/sample/ops/penalties.py @@ -6,22 +6,6 @@ from vllm.utils import is_pin_memory_available, make_tensor_with_pad -def apply_min_token_penalties( - logits: torch.Tensor, output_token_ids: list[list[int]], - min_tokens: dict[int, tuple[int, set[int]]]) -> None: - """ - Applies minimum token penalty by setting the logits of the stop tokens - to -inf. - """ - min_tokens_logits_to_penalize: list[tuple[int, int]] = [] - for index, (min_token, stop_token_ids) in min_tokens.items(): - if len(output_token_ids[index]) < min_token: - for stop_token_id in stop_token_ids: - min_tokens_logits_to_penalize.append((index, stop_token_id)) - if min_tokens_logits_to_penalize: - logits[tuple(zip(*min_tokens_logits_to_penalize))] = -float("inf") - - def apply_all_penalties( logits: torch.Tensor, prompt_token_ids: torch.Tensor, diff --git a/vllm/v1/sample/sampler.py b/vllm/v1/sample/sampler.py index 16561d30a6d..5fc9ee12eeb 100644 --- a/vllm/v1/sample/sampler.py +++ b/vllm/v1/sample/sampler.py @@ -7,8 +7,7 @@ from vllm.v1.outputs import LogprobsTensors, SamplerOutput from vllm.v1.sample.metadata import SamplingMetadata from vllm.v1.sample.ops.bad_words import apply_bad_words -from vllm.v1.sample.ops.penalties import (apply_all_penalties, - apply_min_token_penalties) +from vllm.v1.sample.ops.penalties import apply_all_penalties from vllm.v1.sample.ops.topk_topp_sampler import TopKTopPSampler _SAMPLING_EPS = 1e-5 @@ -37,12 +36,16 @@ def forward( # Use float32 for the logits. logits = logits.to(torch.float32) + # Apply allowed token ids. logits = self.apply_allowed_token_ids(logits, sampling_metadata) # Apply bad words exclusion. logits = self.apply_bad_words(logits, sampling_metadata) - # Apply logits bias. - logits = self.apply_logits_bias(logits, sampling_metadata) + + # Apply logits processors. + for processor in sampling_metadata.logits_procs: + logits = processor.apply(logits) + # Apply penalties (e.g., min_tokens, freq_penalties). logits = self.apply_penalties(logits, sampling_metadata) # Sample the next token. @@ -107,9 +110,9 @@ def sample( # Apply temperature. logits = self.apply_temperature(logits, sampling_metadata.temperature) - # Apply min_p. - if sampling_metadata.min_p is not None: - logits = self.apply_min_p(logits, sampling_metadata.min_p) + # Apply logits processors. + for processor in sampling_metadata.nongreedy_logits_procs: + logits = processor.apply(logits) # Apply top_k and/or top_p. random_sampled = self.topk_topp_sampler( @@ -184,10 +187,6 @@ def apply_penalties( logits: torch.Tensor, sampling_metadata: SamplingMetadata, ) -> torch.Tensor: - if sampling_metadata.min_tokens: - apply_min_token_penalties(logits, - sampling_metadata.output_token_ids, - sampling_metadata.min_tokens) if not sampling_metadata.no_penalties: assert sampling_metadata.prompt_token_ids is not None logits = apply_all_penalties( @@ -200,52 +199,6 @@ def apply_penalties( ) return logits - def apply_min_p( - self, - logits: torch.Tensor, - min_p: torch.Tensor, - ) -> torch.Tensor: - """ - Filters logits using adaptive probability thresholding. - """ - # Convert logits to probability distribution - probability_values = torch.nn.functional.softmax(logits, dim=-1) - # Calculate maximum probabilities per sequence - max_probabilities = torch.amax(probability_values, - dim=-1, - keepdim=True) - # Reshape min_p for broadcasting - adjusted_min_p = min_p.unsqueeze(1) * max_probabilities - # Identify valid tokens using threshold comparison - valid_token_mask = probability_values >= adjusted_min_p - # Apply mask using boolean indexing - logits[~valid_token_mask] = -float('inf') - return logits - - def apply_logits_bias( - self, - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, - ) -> torch.Tensor: - # TODO(houseroad): this implementation is extremely inefficient. - # One idea is implement this as a PyTorch C++ op, and we may - # even optimize the logit_bias layout. - - # Get vocabulary size from logits - vocab_size = logits.shape[-1] - - for i, logit_bias in enumerate(sampling_metadata.logit_bias): - if logit_bias: - for token_id, bias in logit_bias.items(): - # Check token_id bounds to ensure within vocabulary - if token_id < 0 or token_id >= vocab_size: - raise ValueError( - f"token_id {token_id} in logit_bias contains " - f"out-of-vocab token id. Vocabulary size: " - f"{vocab_size}") - logits[i, token_id] += bias - return logits - def apply_allowed_token_ids( self, logits: torch.Tensor, diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index a64cb97e012..4f04072b96f 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -12,6 +12,10 @@ from vllm.sampling_params import SamplingParams, SamplingType from vllm.utils import swap_dict_values from vllm.v1.outputs import LogprobsTensors +from vllm.v1.sample.logits_processor import (LogitBiasLogitsProcessor, + LogitsProcessor, + MinPLogitsProcessor, + MinTokensLogitsProcessor) from vllm.v1.sample.metadata import SamplingMetadata from vllm.v1.utils import copy_slice from vllm.v1.worker.block_table import BlockTable @@ -137,16 +141,6 @@ def __init__( self.top_k_cpu = self.top_k_cpu_tensor.numpy() self.top_k_reqs: set[str] = set() - self.min_p = torch.empty((max_num_reqs, ), - dtype=torch.float32, - device=device) - self.min_p_cpu_tensor = torch.empty((max_num_reqs, ), - dtype=torch.float32, - device="cpu", - pin_memory=pin_memory) - self.min_p_cpu = self.min_p_cpu_tensor.numpy() - self.min_p_reqs: set[str] = set() - # Frequency penalty related data structures self.frequency_penalties = torch.empty((max_num_reqs, ), dtype=torch.float, @@ -185,8 +179,7 @@ def __init__( self.repetition_penalties_cpu_tensor.numpy() self.repetition_penalties_reqs: set[str] = set() - # req_index -> (min_tokens, stop_token_ids) - self.min_tokens: dict[int, tuple[int, set[int]]] = {} + self.prompt_token_ids: Optional[torch.Tensor] = None # lora related self.request_lora_mapping = np.zeros((self.max_num_reqs, ), @@ -207,8 +200,19 @@ def __init__( # To accumulate prompt logprobs tensor chunks across prefill steps. self.in_progress_prompt_logprobs_cpu: dict[str, LogprobsTensors] = {} - self.logit_bias: list[Optional[dict[int, - float]]] = [None] * max_num_reqs + self.logit_procs: list[LogitsProcessor] = [ + MinTokensLogitsProcessor(pin_memory=pin_memory, device=device), + LogitBiasLogitsProcessor(pin_memory=pin_memory, device=device), + ] + self.nongreedy_logits_procs: list[LogitsProcessor] = [ + MinPLogitsProcessor( + pin_memory=pin_memory, + device=device, + # +1 for temporary swap space + max_num_reqs=max_num_reqs + 1) + ] + + # TODO convert this to LogitsProcessor self.has_allowed_token_ids: set[str] = set() # NOTE(lufang): In the mask tensor, if the corresponding token allowed, # the value is False. Since we use masked_fill_ to set -inf. @@ -233,7 +237,7 @@ def add_request( self, request: "CachedRequestState", req_index: Optional[int] = None, - ) -> None: + ) -> int: if req_index is None: req_index = self.num_reqs assert req_index < self.max_num_reqs @@ -284,11 +288,8 @@ def add_request( else: top_k = self.vocab_size self.top_k_cpu[req_index] = top_k - self.min_p_cpu[req_index] = sampling_params.min_p self.frequency_penalties_cpu[ req_index] = sampling_params.frequency_penalty - if sampling_params.min_p > _SAMPLING_EPS: - self.min_p_reqs.add(req_id) if sampling_params.frequency_penalty != 0.0: self.frequency_penalties_reqs.add(req_id) self.presence_penalties_cpu[ @@ -299,9 +300,6 @@ def add_request( req_index] = sampling_params.repetition_penalty if sampling_params.repetition_penalty != 1.0: self.repetition_penalties_reqs.add(req_id) - if sampling_params.min_tokens: - self.min_tokens[req_index] = (sampling_params.min_tokens, - sampling_params.all_stop_token_ids) # NOTE(woosuk): self.generators should not include the requests that # do not have their own generator. @@ -312,8 +310,6 @@ def add_request( self.num_logprobs[req_id] = sampling_params.logprobs if sampling_params.prompt_logprobs is not None: self.num_prompt_logprobs[req_id] = sampling_params.prompt_logprobs - if sampling_params.logit_bias is not None: - self.logit_bias[req_index] = sampling_params.logit_bias if sampling_params.allowed_token_ids: self.has_allowed_token_ids.add(req_id) @@ -351,6 +347,8 @@ def add_request( # No LoRA self.request_lora_mapping[req_index] = 0 + return req_index + def remove_request(self, req_id: str) -> Optional[int]: """This method must always be followed by a call to condense().""" @@ -364,8 +362,6 @@ def remove_request(self, req_id: str) -> Optional[int]: self.random_reqs.discard(req_id) self.top_p_reqs.discard(req_id) self.top_k_reqs.discard(req_id) - self.min_p_reqs.discard(req_id) - self.min_tokens.pop(req_index, None) self.frequency_penalties_reqs.discard(req_id) self.presence_penalties_reqs.discard(req_id) self.repetition_penalties_reqs.discard(req_id) @@ -383,7 +379,6 @@ def remove_request(self, req_id: str) -> Optional[int]: self.lora_id_to_lora_request.pop(lora_id) self.request_lora_mapping[req_index] = 0 - self.logit_bias[req_index] = None self.has_allowed_token_ids.discard(req_id) if self.allowed_token_ids_mask_cpu_tensor is not None: # False means we don't fill with -inf. @@ -421,8 +416,6 @@ def swap_states(self, i1: int, i2: int) -> None: self.presence_penalties_cpu[i2], self.presence_penalties_cpu[i1] self.repetition_penalties_cpu[i1], self.repetition_penalties_cpu[i2] =\ self.repetition_penalties_cpu[i2], self.repetition_penalties_cpu[i1] - self.min_p_cpu[i1], self.min_p_cpu[i2] =\ - self.min_p_cpu[i2], self.min_p_cpu[i1] # NOTE: the following is unsafe # self.token_ids_cpu[i1, ...], self.token_ids_cpu[i2, ...], =\ @@ -434,32 +427,33 @@ def swap_states(self, i1: int, i2: int) -> None: self.token_ids_cpu[i2, ...] = tmp swap_dict_values(self.generators, i1, i2) - swap_dict_values(self.min_tokens, i1, i2) swap_dict_values(self.bad_words_token_ids, i1, i2) self.request_lora_mapping[i1], self.request_lora_mapping[i2] =\ self.request_lora_mapping[i2], self.request_lora_mapping[i1] - self.logit_bias[i1], self.logit_bias[i2] =\ - self.logit_bias[i2], self.logit_bias[i1] if self.allowed_token_ids_mask_cpu_tensor is not None: self.allowed_token_ids_mask_cpu_tensor[i1], \ self.allowed_token_ids_mask_cpu_tensor[i2] =\ self.allowed_token_ids_mask_cpu_tensor[i2], \ self.allowed_token_ids_mask_cpu_tensor[i1] + + # TODO need to handle LogitsProcessors here + self.block_table.swap_row(i1, i2) - def condense(self, empty_req_indices: list[int]) -> None: + def condense(self, empty_req_indices: list[int]) -> list[tuple[int, int]]: num_reqs = self.num_reqs if num_reqs == 0: # The batched states are empty. self._req_ids.clear() self.req_output_token_ids.clear() - return + return [] # NOTE(woosuk): This function assumes that the empty_req_indices # is sorted in descending order. last_req_index = num_reqs + len(empty_req_indices) - 1 + swaps = [] while empty_req_indices: # Find the largest non-empty index. while last_req_index in empty_req_indices: @@ -471,6 +465,7 @@ def condense(self, empty_req_indices: list[int]) -> None: break # Swap the states. + swaps.append((last_req_index, empty_index)) req_id = self._req_ids[last_req_index] output_token_ids = self.req_output_token_ids[last_req_index] assert req_id is not None @@ -501,20 +496,14 @@ def condense(self, empty_req_indices: list[int]) -> None: empty_index] = self.presence_penalties_cpu[last_req_index] self.repetition_penalties_cpu[ empty_index] = self.repetition_penalties_cpu[last_req_index] - self.min_p_cpu[empty_index] = self.min_p_cpu[last_req_index] generator = self.generators.pop(last_req_index, None) if generator is not None: self.generators[empty_index] = generator - min_token = self.min_tokens.pop(last_req_index, None) - if min_token is not None: - self.min_tokens[empty_index] = min_token - self.request_lora_mapping[empty_index] = self.request_lora_mapping[ last_req_index] - self.logit_bias[empty_index] = self.logit_bias[last_req_index] - + # TODO convert these to LogitsProcessors if self.allowed_token_ids_mask_cpu_tensor is not None: self.allowed_token_ids_mask_cpu_tensor[ empty_index] = self.allowed_token_ids_mask_cpu_tensor[ @@ -524,6 +513,7 @@ def condense(self, empty_req_indices: list[int]) -> None: last_req_index, None) if bad_words_token_ids is not None: self.bad_words_token_ids[empty_index] = bad_words_token_ids + # Decrement last_req_index since it is now empty. last_req_index -= 1 @@ -531,6 +521,8 @@ def condense(self, empty_req_indices: list[int]) -> None: del self._req_ids[self.num_reqs:] del self.req_output_token_ids[self.num_reqs:] + return swaps + def refresh_sampling_metadata(self): self.sampling_metadata = self._make_sampling_metadata() @@ -545,8 +537,6 @@ def _make_sampling_metadata(self) -> SamplingMetadata: copy_slice(self.top_p_cpu_tensor, self.top_p, num_reqs) if not self.no_top_k: copy_slice(self.top_k_cpu_tensor, self.top_k, num_reqs) - if not self.no_min_p: - copy_slice(self.min_p_cpu_tensor, self.min_p, num_reqs) if not self.no_penalties: # Since syncing these tensors is expensive only copy them @@ -579,7 +569,6 @@ def _make_sampling_metadata(self) -> SamplingMetadata: all_random=self.all_random, top_p=None if self.no_top_p else self.top_p[:num_reqs], top_k=None if self.no_top_k else self.top_k[:num_reqs], - min_p=None if self.no_min_p else self.min_p[:num_reqs], generators=self.generators, max_num_logprobs=self.max_num_logprobs, prompt_token_ids=prompt_token_ids, @@ -587,11 +576,11 @@ def _make_sampling_metadata(self) -> SamplingMetadata: presence_penalties=self.presence_penalties[:num_reqs], repetition_penalties=self.repetition_penalties[:num_reqs], output_token_ids=cast(list[list[int]], self.req_output_token_ids), - min_tokens=self.min_tokens, no_penalties=self.no_penalties, - logit_bias=self.logit_bias[:num_reqs], allowed_token_ids_mask=allowed_token_ids_mask, bad_words_token_ids=self.bad_words_token_ids, + logits_procs=self.logit_procs, + nongreedy_logits_procs=self.nongreedy_logits_procs, ) def _make_prompt_token_ids_tensor(self) -> torch.Tensor: @@ -655,10 +644,6 @@ def no_top_p(self) -> bool: def no_top_k(self) -> bool: return len(self.top_k_reqs) == 0 - @property - def no_min_p(self) -> bool: - return len(self.min_p_reqs) == 0 - @property def no_penalties(self) -> bool: return (len(self.presence_penalties_reqs) == 0 diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index c3d84ab3773..b38c0cde1c6 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3,6 +3,7 @@ import gc import time import weakref +from itertools import chain from typing import TYPE_CHECKING, Optional, Union import numpy as np @@ -34,6 +35,7 @@ SlidingWindowSpec) from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, LogprobsTensors, ModelRunnerOutput) +from vllm.v1.sample.logits_processor import BatchUpdate from vllm.v1.sample.metadata import SamplingMetadata from vllm.v1.sample.rejection_sampler import RejectionSampler from vllm.v1.spec_decode.eagle import EagleProposer @@ -443,6 +445,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None: # Add the new or resumed requests to the persistent batch. # The smaller empty indices are filled first. + removed = removed_req_indices + added = [] removed_req_indices = sorted(removed_req_indices, reverse=True) for req_id in req_ids_to_add: req_state = self.requests[req_id] @@ -452,11 +456,35 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None: else: # Append to the end. req_index = None - self.input_batch.add_request(req_state, req_index) + req_index = self.input_batch.add_request(req_state, req_index) + added.append((req_index, req_state.sampling_params, + req_state.output_token_ids)) # Condense the batched states if there are empty indices. if removed_req_indices: - self.input_batch.condense(removed_req_indices) + moved = self.input_batch.condense(removed_req_indices) + else: + moved = [] + + # Some attention backends (namely MLA) may want to separate requests + # based on if the attention computation will be compute-bound or + # memory-bound. This gives them a hook to do that. + if swaps := self.attn_metadata_builder.reorder_batch( + self.input_batch, scheduler_output): + moved.extend(swaps) + batch_changed = True + + # Update states of logits processors + batch_update = None if not batch_changed else BatchUpdate( + removed=removed, + moved=moved, + added=added, + batch_size=self.input_batch.num_reqs, + ) + + for processor in chain(self.input_batch.logit_procs, + self.input_batch.nongreedy_logits_procs): + processor.update_states(batch_update) if batch_changed: self.input_batch.refresh_sampling_metadata() @@ -471,14 +499,6 @@ def _prepare_inputs( num_reqs = self.input_batch.num_reqs assert num_reqs > 0 - # Some attention backends (namely MLA) may want to separate requests - # based on if the attention computation will be compute-bound or - # memory-bound. This gives them a hook to do that. - modified_batch = self.attn_metadata_builder.reorder_batch( - self.input_batch, scheduler_output) - if modified_batch: - self.input_batch.refresh_sampling_metadata() - # OPTIMIZATION: Start copying the block table first. # This way, we can overlap the copy with the following CPU operations. self.input_batch.block_table.commit(num_reqs) @@ -1468,7 +1488,6 @@ def _dummy_sampler_run( all_random=False, top_p=dummy_tensors(0.9), top_k=dummy_tensors(logits.size(1) - 1), - min_p=None, generators={}, max_num_logprobs=None, no_penalties=True, @@ -1477,10 +1496,10 @@ def _dummy_sampler_run( presence_penalties=dummy_tensors(0.1), repetition_penalties=dummy_tensors(0.1), output_token_ids=[[] for _ in range(num_reqs)], - min_tokens={}, - logit_bias=[None for _ in range(num_reqs)], allowed_token_ids_mask=None, bad_words_token_ids={}, + logits_procs=[], + nongreedy_logits_procs=[], ) try: sampler_output = self.model.sample( diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index c61c449e179..33d43937aa8 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -1021,7 +1021,7 @@ def sample_from_hidden( sampling_metadata: TPUSupportedSamplingMetadata, ) -> torch.Tensor: """ - Sample with xla-friendly function. This function is to be traced + Sample with xla-friendly function. This function is to be traced separately from `forward` for lighter compilation overhead. """ logits = self.model.compute_logits(sample_hidden_states, None) @@ -1059,13 +1059,13 @@ def _get_padded_num_reqs_with_upper_limit(x: int, upper_limit: int) -> int: def _get_token_paddings(min_token_size: int, max_token_size: int, padding_gap: int) -> list[int]: - """Generate a list of padding size, starting from min_token_size, + """Generate a list of padding size, starting from min_token_size, ending with a number that can cover max_token_size - + If padding_gap == 0 then: increase 2X each time (exponential) else: - first increase the size to twice, + first increase the size to twice, then increase the padding size by padding_gap. """ # assert min_token_size is power of 2 From 55328d84a7d53d149cedf39fb50f2e793aa299b5 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Fri, 18 Apr 2025 17:25:21 +0000 Subject: [PATCH 002/180] extra_args Signed-off-by: Andrew Feldman --- vllm/entrypoints/openai/protocol.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 4639b4cea06..ed6b9927421 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -242,6 +242,12 @@ class ChatCompletionRequest(OpenAIBaseModel): ChatCompletionNamedToolChoiceParam, ]] = "none" + # Custom args param + extra_args: Optional[dict[str, Any]] = Field( + default=None, + description=("Additional kwargs to pass to sampling."), + ) + # NOTE this will be ignored by vLLM -- the model determines the behavior parallel_tool_calls: Optional[bool] = False user: Optional[str] = None @@ -514,7 +520,8 @@ def to_sampling_params( output_kind=RequestOutputKind.DELTA if self.stream \ else RequestOutputKind.FINAL_ONLY, guided_decoding=guided_decoding, - logit_bias=self.logit_bias) + logit_bias=self.logit_bias, + extra_args=self.extra_args) def _get_guided_json_from_tool( self) -> Optional[Union[str, dict, BaseModel]]: @@ -718,6 +725,12 @@ class CompletionRequest(OpenAIBaseModel): top_p: Optional[float] = None user: Optional[str] = None + # Custom args param + extra_args: Optional[dict[str, Any]] = Field( + default=None, + description=("Additional kwargs to pass to sampling."), + ) + # doc: begin-completion-sampling-params use_beam_search: bool = False top_k: Optional[int] = None @@ -932,7 +945,8 @@ def to_sampling_params( else RequestOutputKind.FINAL_ONLY, guided_decoding=guided_decoding, logit_bias=self.logit_bias, - allowed_token_ids=self.allowed_token_ids) + allowed_token_ids=self.allowed_token_ids, + extra_args=self.extra_args) @model_validator(mode="before") @classmethod @@ -1586,6 +1600,12 @@ class TranscriptionRequest(OpenAIBaseModel): to automatically increase the temperature until certain thresholds are hit. """ + # Custom args param + extra_args: Optional[dict[str, Any]] = Field( + default=None, + description=("Additional kwargs to pass to sampling."), + ) + timestamp_granularities: list[Literal["word", "segment"]] = Field( alias="timestamp_granularities[]", default=[]) """The timestamp granularities to populate for this transcription. @@ -1628,7 +1648,8 @@ def to_sampling_params( max_tokens=max_tokens, output_kind=RequestOutputKind.DELTA if self.stream \ - else RequestOutputKind.FINAL_ONLY) + else RequestOutputKind.FINAL_ONLY, + extra_args=self.extra_args) @model_validator(mode="before") @classmethod From 191b9e1aa91550b31bb50fde7d3627fbddbba7d5 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Tue, 22 Apr 2025 06:08:06 +0000 Subject: [PATCH 003/180] rename Signed-off-by: Andrew Feldman --- vllm/entrypoints/openai/protocol.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 6b5d4077562..b3a1e4c34f3 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -242,8 +242,8 @@ class ChatCompletionRequest(OpenAIBaseModel): ChatCompletionNamedToolChoiceParam, ]] = "none" - # Custom args param - extra_args: Optional[dict[str, Any]] = Field( + # Custom sampling params + extra_sampling_params: Optional[dict[str, Any]] = Field( default=None, description=("Additional kwargs to pass to sampling."), ) @@ -521,7 +521,7 @@ def to_sampling_params( else RequestOutputKind.FINAL_ONLY, guided_decoding=guided_decoding, logit_bias=self.logit_bias, - extra_args=self.extra_args) + extra_args=self.extra_sampling_params) def _get_guided_json_from_tool( self) -> Optional[Union[str, dict, BaseModel]]: @@ -726,7 +726,7 @@ class CompletionRequest(OpenAIBaseModel): user: Optional[str] = None # Custom args param - extra_args: Optional[dict[str, Any]] = Field( + extra_sampling_params: Optional[dict[str, Any]] = Field( default=None, description=("Additional kwargs to pass to sampling."), ) @@ -946,7 +946,7 @@ def to_sampling_params( guided_decoding=guided_decoding, logit_bias=self.logit_bias, allowed_token_ids=self.allowed_token_ids, - extra_args=self.extra_args) + extra_args=self.extra_sampling_params) @model_validator(mode="before") @classmethod From 1b658cdf5e03cc1d44add219551d81049f0511b1 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Tue, 22 Apr 2025 06:09:05 +0000 Subject: [PATCH 004/180] rename Signed-off-by: Andrew Feldman --- vllm/entrypoints/openai/protocol.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index b3a1e4c34f3..85838cc97fb 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -1590,14 +1590,14 @@ class TranscriptionRequest(OpenAIBaseModel): `verbose_json`, or `vtt`. """ - ## TODO (varun) : Support if set to 0, certain thresholds are met !! - # Custom args param - extra_args: Optional[dict[str, Any]] = Field( + extra_sampling_params: Optional[dict[str, Any]] = Field( default=None, description=("Additional kwargs to pass to sampling."), ) + ## TODO (varun) : Support if set to 0, certain thresholds are met !! + timestamp_granularities: list[Literal["word", "segment"]] = Field( alias="timestamp_granularities[]", default=[]) """The timestamp granularities to populate for this transcription. @@ -1705,7 +1705,7 @@ def to_sampling_params( output_kind=RequestOutputKind.DELTA if self.stream \ else RequestOutputKind.FINAL_ONLY, - extra_args=self.extra_args) + extra_args=self.extra_sampling_params) @model_validator(mode="before") @classmethod From 6a0f87c95b14f40d80f67048f403af643143c908 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Tue, 22 Apr 2025 07:25:53 +0000 Subject: [PATCH 005/180] extra_body Signed-off-by: Andrew Feldman --- vllm/entrypoints/openai/api_server.py | 14 ++++++++++++++ vllm/entrypoints/openai/protocol.py | 18 ++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 13681958089..1d8ec50692a 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -463,6 +463,17 @@ async def show_version(): return JSONResponse(content=ver) +RequestWithExtraBody = Union[CompletionRequest, ChatCompletionRequest, + TranscriptionRequest] + + +def _merge_extra_body(request: RequestWithExtraBody) -> None: + """Integrate extra body arguments""" + for key, value in request.extra_body.items(): + setattr(request, key, value) + request.extra_body = None + + @router.post("/v1/chat/completions", dependencies=[Depends(validate_json_request)]) @with_cancellation @@ -494,6 +505,9 @@ async def create_completion(request: CompletionRequest, raw_request: Request): if handler is None: return base(raw_request).create_error_response( message="The model does not support Completions API") + if request.extra_body: + # Integrate extra body arguments + _merge_extra_body(request) generator = await handler.create_completion(request, raw_request) if isinstance(generator, ErrorResponse): diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 85838cc97fb..e2eed0b1326 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -248,6 +248,12 @@ class ChatCompletionRequest(OpenAIBaseModel): description=("Additional kwargs to pass to sampling."), ) + # Catch-all for request attributes beyond the OpenAI API spec + extra_body: Optional[dict[str, Any]] = Field( + default=None, + description=("Specify arguments beyond the OpenAI API spec."), + ) + # NOTE this will be ignored by vLLM -- the model determines the behavior parallel_tool_calls: Optional[bool] = False user: Optional[str] = None @@ -731,6 +737,12 @@ class CompletionRequest(OpenAIBaseModel): description=("Additional kwargs to pass to sampling."), ) + # Catch-all for request attributes beyond the OpenAI API spec + extra_body: Optional[dict[str, Any]] = Field( + default=None, + description=("Specify arguments beyond the OpenAI API spec."), + ) + # doc: begin-completion-sampling-params use_beam_search: bool = False top_k: Optional[int] = None @@ -1596,6 +1608,12 @@ class TranscriptionRequest(OpenAIBaseModel): description=("Additional kwargs to pass to sampling."), ) + # Catch-all for request attributes beyond the OpenAI API spec + extra_body: Optional[dict[str, Any]] = Field( + default=None, + description=("Specify arguments beyond the OpenAI API spec."), + ) + ## TODO (varun) : Support if set to 0, certain thresholds are met !! timestamp_granularities: list[Literal["word", "segment"]] = Field( From ac57a7f51763ffccb29d71f305c37e96be50b599 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Tue, 22 Apr 2025 08:21:10 +0000 Subject: [PATCH 006/180] completion custom arg unit test Signed-off-by: Andrew Feldman --- .../v1/entrypoints/openai/test_completion.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/v1/entrypoints/openai/test_completion.py b/tests/v1/entrypoints/openai/test_completion.py index 57ca99e1f68..4bdf14d9927 100644 --- a/tests/v1/entrypoints/openai/test_completion.py +++ b/tests/v1/entrypoints/openai/test_completion.py @@ -80,6 +80,38 @@ async def test_single_completion(client: openai.AsyncOpenAI, assert completion.choices[0].prompt_logprobs is None +@pytest.mark.asyncio +@pytest.mark.parametrize( + "model_name", + [MODEL_NAME], +) +async def test_custom_arg(client: openai.AsyncOpenAI, model_name: str) -> None: + """Test that custom arg works and does not break completion. + Issue a request with a contradictory `max_tokens` setting + in `extra_body`; test that the value in `extra_body` was + applied. + """ + completion = await client.completions.create( + model=model_name, + prompt="Hello, my name is", + max_tokens=10, + temperature=0.0, + # Contradictory `max_tokens` + extra_body={ + "max_tokens": 5, + "ignore_eos": True + }) + + # Assert: valid completion with `extra_body["max_tokens"]` tokens + assert completion.id is not None + assert completion.choices is not None and len(completion.choices) == 1 + choice = completion.choices[0] + assert len(choice.text) >= 5 + assert choice.finish_reason == "length" + assert completion.usage == openai.types.CompletionUsage( + completion_tokens=5, prompt_tokens=6, total_tokens=11) + + @pytest.mark.asyncio @pytest.mark.parametrize( "model_name", From 5c436091cc3cbb6c1a16496aff3bda3dfa338159 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Wed, 23 Apr 2025 14:47:35 +0000 Subject: [PATCH 007/180] tweak extra_args; test sampling params extra args via api Signed-off-by: Andrew Feldman --- .../v1/entrypoints/openai/test_completion.py | 42 +++++++++++++++---- vllm/sampling_params.py | 12 +++--- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/tests/v1/entrypoints/openai/test_completion.py b/tests/v1/entrypoints/openai/test_completion.py index 4bdf14d9927..efaf5cc23db 100644 --- a/tests/v1/entrypoints/openai/test_completion.py +++ b/tests/v1/entrypoints/openai/test_completion.py @@ -85,13 +85,16 @@ async def test_single_completion(client: openai.AsyncOpenAI, "model_name", [MODEL_NAME], ) -async def test_custom_arg(client: openai.AsyncOpenAI, model_name: str) -> None: +async def test_completion_custom_arg(client: openai.AsyncOpenAI, + model_name: str) -> None: """Test that custom arg works and does not break completion. - Issue a request with a contradictory `max_tokens` setting + 1. Issue a request with a contradictory `max_tokens` setting in `extra_body`; test that the value in `extra_body` was applied. + 2. Issue a request with a contradictory `max_tokens` setting + in `extra_sampling_params`; test that the value is applied. """ - completion = await client.completions.create( + completion_body = await client.completions.create( model=model_name, prompt="Hello, my name is", max_tokens=10, @@ -103,12 +106,37 @@ async def test_custom_arg(client: openai.AsyncOpenAI, model_name: str) -> None: }) # Assert: valid completion with `extra_body["max_tokens"]` tokens - assert completion.id is not None - assert completion.choices is not None and len(completion.choices) == 1 - choice = completion.choices[0] + assert completion_body.id is not None + assert completion_body.choices is not None and len( + completion_body.choices) == 1 + choice = completion_body.choices[0] assert len(choice.text) >= 5 assert choice.finish_reason == "length" - assert completion.usage == openai.types.CompletionUsage( + assert completion_body.usage == openai.types.CompletionUsage( + completion_tokens=5, prompt_tokens=6, total_tokens=11) + + completion_sampling_params = await client.completions.create( + model=model_name, + prompt="Hello, my name is", + temperature=0.0, + # Contradictory `max_tokens` + extra_body={ + "ignore_eos": True, + "extra_sampling_params": { + # Contradictory max_tokens + "max_tokens": 5 + } + }) + + # Assert: valid completion with + # `extra_body["extra_sampling_params"]["max_tokens"]` tokens + assert completion_sampling_params.id is not None + assert (completion_sampling_params.choices is not None + and len(completion_sampling_params.choices) == 1) + choice = completion_sampling_params.choices[0] + assert len(choice.text) >= 5 + assert choice.finish_reason == "length" + assert completion_sampling_params.usage == openai.types.CompletionUsage( completion_tokens=5, prompt_tokens=6, total_tokens=11) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 707a757ca83..319ba42b6c0 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -199,7 +199,7 @@ class SamplingParams( Defaults to None. extra_args: Arbitrary additional args, that can be used by custom sampling implementations. Not used by any in-tree sampling - implementations. + implementations. (Not actually a class member.) """ n: int = 1 @@ -242,7 +242,6 @@ class SamplingParams( guided_decoding: Optional[GuidedDecodingParams] = None logit_bias: Optional[dict[int, float]] = None allowed_token_ids: Optional[list[int]] = None - extra_args: Optional[dict[str, Any]] = None # Fields used for bad words bad_words: Optional[list[str]] = None @@ -288,8 +287,7 @@ def from_optional( int(token): min(100.0, max(-100.0, bias)) for token, bias in logit_bias.items() } - - return SamplingParams( + sampling_params = SamplingParams( n=1 if n is None else n, best_of=best_of, presence_penalty=0.0 @@ -321,8 +319,12 @@ def from_optional( guided_decoding=guided_decoding, logit_bias=logit_bias, allowed_token_ids=allowed_token_ids, - extra_args=extra_args, ) + # Custom sampling params + if extra_args: + for attr_name, attr_val in extra_args.items(): + setattr(sampling_params, attr_name, attr_val) + return sampling_params def __post_init__(self) -> None: # how we deal with `best_of``: From 368f907aa05c1c26142378c14e56e5734fa57f29 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Wed, 23 Apr 2025 14:52:23 +0000 Subject: [PATCH 008/180] remove unnecessary extra_body field/breakout Signed-off-by: Andrew Feldman --- vllm/entrypoints/openai/api_server.py | 14 -------------- vllm/entrypoints/openai/protocol.py | 18 ------------------ 2 files changed, 32 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 1d8ec50692a..13681958089 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -463,17 +463,6 @@ async def show_version(): return JSONResponse(content=ver) -RequestWithExtraBody = Union[CompletionRequest, ChatCompletionRequest, - TranscriptionRequest] - - -def _merge_extra_body(request: RequestWithExtraBody) -> None: - """Integrate extra body arguments""" - for key, value in request.extra_body.items(): - setattr(request, key, value) - request.extra_body = None - - @router.post("/v1/chat/completions", dependencies=[Depends(validate_json_request)]) @with_cancellation @@ -505,9 +494,6 @@ async def create_completion(request: CompletionRequest, raw_request: Request): if handler is None: return base(raw_request).create_error_response( message="The model does not support Completions API") - if request.extra_body: - # Integrate extra body arguments - _merge_extra_body(request) generator = await handler.create_completion(request, raw_request) if isinstance(generator, ErrorResponse): diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index e2eed0b1326..85838cc97fb 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -248,12 +248,6 @@ class ChatCompletionRequest(OpenAIBaseModel): description=("Additional kwargs to pass to sampling."), ) - # Catch-all for request attributes beyond the OpenAI API spec - extra_body: Optional[dict[str, Any]] = Field( - default=None, - description=("Specify arguments beyond the OpenAI API spec."), - ) - # NOTE this will be ignored by vLLM -- the model determines the behavior parallel_tool_calls: Optional[bool] = False user: Optional[str] = None @@ -737,12 +731,6 @@ class CompletionRequest(OpenAIBaseModel): description=("Additional kwargs to pass to sampling."), ) - # Catch-all for request attributes beyond the OpenAI API spec - extra_body: Optional[dict[str, Any]] = Field( - default=None, - description=("Specify arguments beyond the OpenAI API spec."), - ) - # doc: begin-completion-sampling-params use_beam_search: bool = False top_k: Optional[int] = None @@ -1608,12 +1596,6 @@ class TranscriptionRequest(OpenAIBaseModel): description=("Additional kwargs to pass to sampling."), ) - # Catch-all for request attributes beyond the OpenAI API spec - extra_body: Optional[dict[str, Any]] = Field( - default=None, - description=("Specify arguments beyond the OpenAI API spec."), - ) - ## TODO (varun) : Support if set to 0, certain thresholds are met !! timestamp_granularities: list[Literal["word", "segment"]] = Field( From a90311a94beffcd4ce57e3030c85c6e4df99ca1b Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Wed, 23 Apr 2025 14:57:34 +0000 Subject: [PATCH 009/180] removed transcription scenario Signed-off-by: Andrew Feldman --- vllm/entrypoints/openai/protocol.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 85838cc97fb..59ae529cefc 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -1590,12 +1590,6 @@ class TranscriptionRequest(OpenAIBaseModel): `verbose_json`, or `vtt`. """ - # Custom args param - extra_sampling_params: Optional[dict[str, Any]] = Field( - default=None, - description=("Additional kwargs to pass to sampling."), - ) - ## TODO (varun) : Support if set to 0, certain thresholds are met !! timestamp_granularities: list[Literal["word", "segment"]] = Field( @@ -1704,8 +1698,7 @@ def to_sampling_params( presence_penalty=self.presence_penalty, output_kind=RequestOutputKind.DELTA if self.stream \ - else RequestOutputKind.FINAL_ONLY, - extra_args=self.extra_sampling_params) + else RequestOutputKind.FINAL_ONLY) @model_validator(mode="before") @classmethod From 42b0d31b887b42b4c3897381c28076c8c314900e Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Thu, 1 May 2025 14:23:10 +0000 Subject: [PATCH 010/180] small changes Signed-off-by: Andrew Feldman --- vllm/v1/sample/logits_processor.py | 6 +++--- vllm/v1/sample/metadata.py | 3 +++ vllm/v1/worker/tpu_model_runner.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/vllm/v1/sample/logits_processor.py b/vllm/v1/sample/logits_processor.py index fd168613649..7d9342ac1b3 100644 --- a/vllm/v1/sample/logits_processor.py +++ b/vllm/v1/sample/logits_processor.py @@ -60,11 +60,11 @@ def __init__(self, max_num_reqs: int, pin_memory: bool, pin_memory=pin_memory) self.min_p_cpu = self.min_p_cpu_tensor.numpy() # Pre-allocated device tensor - self.min_p_gpu: torch.Tensor = torch.empty((max_num_reqs, ), + self.min_p_device: torch.Tensor = torch.empty((max_num_reqs, ), dtype=torch.float32, device=device) # Current slice of the device tensor - self.min_p: torch.Tensor = self.min_p_gpu[:0] + self.min_p: torch.Tensor = self.min_p_device[:0] def update_states(self, batch_update: Optional[BatchUpdate] = None): if not batch_update: @@ -96,7 +96,7 @@ def update_states(self, batch_update: Optional[BatchUpdate] = None): size = batch_update.batch_size if self.min_p_count and (needs_update or self.min_p.shape[0] != size): - self.min_p = self.min_p_gpu[:size] + self.min_p = self.min_p_device[:size] self.min_p.copy_(self.min_p_cpu_tensor[:size], non_blocking=True) self.min_p.unsqueeze_(1) diff --git a/vllm/v1/sample/metadata.py b/vllm/v1/sample/metadata.py index e113c3a50c2..0036582d493 100644 --- a/vllm/v1/sample/metadata.py +++ b/vllm/v1/sample/metadata.py @@ -38,5 +38,8 @@ class SamplingMetadata: # req_index -> bad_words_token_ids bad_words_token_ids: dict[int, list[list[int]]] + # Some logits processors don't affect greedy decoding (or if they do, + # only due to precision errors); "non-greedy" processors are + # only applied to random-sampled requests in the batch. logits_procs: list[LogitsProcessor] nongreedy_logits_procs: list[LogitsProcessor] diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 33d43937aa8..db91f199f11 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -1065,7 +1065,7 @@ def _get_token_paddings(min_token_size: int, max_token_size: int, If padding_gap == 0 then: increase 2X each time (exponential) else: - first increase the size to twice, + first increase the size to twice, then increase the padding size by padding_gap. """ # assert min_token_size is power of 2 From f1ef8efe02418ebdf048eb650bfbd39154202f53 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Fri, 2 May 2025 14:25:48 +0000 Subject: [PATCH 011/180] spec decode min p Signed-off-by: Andrew Feldman --- vllm/v1/sample/logits_processor.py | 3 +++ vllm/v1/spec_decode/utils.py | 2 +- vllm/v1/worker/gpu_input_batch.py | 14 ++++++++++++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/vllm/v1/sample/logits_processor.py b/vllm/v1/sample/logits_processor.py index 7d9342ac1b3..be812c11b76 100644 --- a/vllm/v1/sample/logits_processor.py +++ b/vllm/v1/sample/logits_processor.py @@ -66,6 +66,9 @@ def __init__(self, max_num_reqs: int, pin_memory: bool, # Current slice of the device tensor self.min_p: torch.Tensor = self.min_p_device[:0] + def get_min_p_by_index(self, index: int) -> float: + return float(self.min_p_cpu[index]) + def update_states(self, batch_update: Optional[BatchUpdate] = None): if not batch_update: return diff --git a/vllm/v1/spec_decode/utils.py b/vllm/v1/spec_decode/utils.py index ce81a40ee3a..e9de0086e59 100644 --- a/vllm/v1/spec_decode/utils.py +++ b/vllm/v1/spec_decode/utils.py @@ -3,7 +3,7 @@ def is_spec_decode_supported(req_id: str, input_batch: InputBatch) -> bool: - if req_id in input_batch.min_p_reqs: + if input_batch.get_min_p_by_req_id(req_id): # Spec decode doesn't support min_p sampling. return False elif (req_id in input_batch.frequency_penalties_reqs diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index 4f04072b96f..265c09901f0 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -200,16 +200,20 @@ def __init__( # To accumulate prompt logprobs tensor chunks across prefill steps. self.in_progress_prompt_logprobs_cpu: dict[str, LogprobsTensors] = {} + # Define logits processors + # TODO(andy): logits processor list should be extensible via engine + # constructor argument; for now the list is fixed. self.logit_procs: list[LogitsProcessor] = [ MinTokensLogitsProcessor(pin_memory=pin_memory, device=device), LogitBiasLogitsProcessor(pin_memory=pin_memory, device=device), ] - self.nongreedy_logits_procs: list[LogitsProcessor] = [ - MinPLogitsProcessor( + self.min_p_logitsproc = MinPLogitsProcessor( pin_memory=pin_memory, device=device, # +1 for temporary swap space max_num_reqs=max_num_reqs + 1) + self.nongreedy_logits_procs: list[LogitsProcessor] = [ + self.min_p_logitsproc ] # TODO convert this to LogitsProcessor @@ -624,6 +628,12 @@ def make_lora_inputs( return prompt_lora_mapping, token_lora_mapping, active_lora_requests + def get_min_p_by_req_id(self, req_id: str) -> float: + assert req_id in self.req_id_to_index + return self.min_p_logitsproc.get_min_p_by_index( + self.req_id_to_index[req_id]) + + @property def num_reqs(self) -> int: return len(self.req_id_to_index) From b270ac443da430002ab1f769f8c267d8e9e3dadf Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Fri, 2 May 2025 14:26:35 +0000 Subject: [PATCH 012/180] spec decode min p Signed-off-by: Andrew Feldman --- vllm/v1/sample/logits_processor.py | 4 ++-- vllm/v1/worker/gpu_input_batch.py | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/vllm/v1/sample/logits_processor.py b/vllm/v1/sample/logits_processor.py index be812c11b76..22334e687a8 100644 --- a/vllm/v1/sample/logits_processor.py +++ b/vllm/v1/sample/logits_processor.py @@ -61,8 +61,8 @@ def __init__(self, max_num_reqs: int, pin_memory: bool, self.min_p_cpu = self.min_p_cpu_tensor.numpy() # Pre-allocated device tensor self.min_p_device: torch.Tensor = torch.empty((max_num_reqs, ), - dtype=torch.float32, - device=device) + dtype=torch.float32, + device=device) # Current slice of the device tensor self.min_p: torch.Tensor = self.min_p_device[:0] diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index 265c09901f0..c118ad92aec 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -208,10 +208,10 @@ def __init__( LogitBiasLogitsProcessor(pin_memory=pin_memory, device=device), ] self.min_p_logitsproc = MinPLogitsProcessor( - pin_memory=pin_memory, - device=device, - # +1 for temporary swap space - max_num_reqs=max_num_reqs + 1) + pin_memory=pin_memory, + device=device, + # +1 for temporary swap space + max_num_reqs=max_num_reqs + 1) self.nongreedy_logits_procs: list[LogitsProcessor] = [ self.min_p_logitsproc ] @@ -633,7 +633,6 @@ def get_min_p_by_req_id(self, req_id: str) -> float: return self.min_p_logitsproc.get_min_p_by_index( self.req_id_to_index[req_id]) - @property def num_reqs(self) -> int: return len(self.req_id_to_index) From 49531cbffff70742394d1e47e871cec8896951a8 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Mon, 5 May 2025 10:14:34 +0000 Subject: [PATCH 013/180] wip TPU fix Signed-off-by: Andrew Feldman --- vllm/v1/sample/tpu/metadata.py | 2 +- vllm/v1/worker/tpu_input_batch.py | 611 +++++++++++++++++++++++++++++ vllm/v1/worker/tpu_model_runner.py | 2 +- 3 files changed, 613 insertions(+), 2 deletions(-) create mode 100644 vllm/v1/worker/tpu_input_batch.py diff --git a/vllm/v1/sample/tpu/metadata.py b/vllm/v1/sample/tpu/metadata.py index 3950fda3e5e..341b38b42a4 100644 --- a/vllm/v1/sample/tpu/metadata.py +++ b/vllm/v1/sample/tpu/metadata.py @@ -4,7 +4,7 @@ import torch -from vllm.v1.worker.gpu_input_batch import InputBatch +from vllm.v1.worker.tpu_input_batch import InputBatch DEFAULT_SAMPLING_PARAMS = dict( temperature=-1.0, diff --git a/vllm/v1/worker/tpu_input_batch.py b/vllm/v1/worker/tpu_input_batch.py new file mode 100644 index 00000000000..50dddcd2a30 --- /dev/null +++ b/vllm/v1/worker/tpu_input_batch.py @@ -0,0 +1,611 @@ +# SPDX-License-Identifier: Apache-2.0 +# Datastructures defining an input batch + +from dataclasses import dataclass +from typing import Optional, cast + +import numpy as np +import torch + +from vllm.lora.request import LoRARequest +from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange +from vllm.sampling_params import SamplingParams, SamplingType +from vllm.utils import swap_dict_values +from vllm.v1.outputs import LogprobsTensors +from vllm.v1.sample.tpu.metadata import TPUSupportedSamplingMetadata +from vllm.v1.utils import copy_slice +from vllm.v1.worker.block_table import BlockTable + +_SAMPLING_EPS = 1e-5 + + +@dataclass +class CachedRequestState: + + req_id: str + prompt_token_ids: list[int] + mm_inputs: list[MultiModalKwargs] + mm_positions: list[PlaceholderRange] + sampling_params: SamplingParams + generator: Optional[torch.Generator] + + block_ids: list[int] + num_computed_tokens: int + output_token_ids: list[int] + + mrope_positions: Optional[torch.Tensor] = None + mrope_position_delta: Optional[int] = None + + lora_request: Optional[LoRARequest] = None + + def __post_init__(self): + self.num_prompt_tokens = len(self.prompt_token_ids) + + @property + def num_tokens(self) -> int: + return self.num_prompt_tokens + len(self.output_token_ids) + + def get_token_id(self, idx: int) -> int: + if idx < self.num_prompt_tokens: + return self.prompt_token_ids[idx] + else: + return self.output_token_ids[idx - self.num_prompt_tokens] + + +class InputBatch: + + def __init__( + self, + max_num_reqs: int, + max_model_len: int, + max_num_blocks_per_req: int, + device: torch.device, + pin_memory: bool, + vocab_size: int, + ): + self.max_num_reqs = max_num_reqs + self.max_model_len = max_model_len + self.max_num_blocks_per_req = max_num_blocks_per_req + self.device = device + self.pin_memory = pin_memory + self.vocab_size = vocab_size + + self._req_ids: list[Optional[str]] = [] + self.req_id_to_index: dict[str, int] = {} + + # TODO(woosuk): This buffer could be too large if max_model_len is big. + # Find a way to reduce the CPU memory usage. + # This buffer is not directly transferred to the GPU, so it does not + # need to be pinned. + self.token_ids_cpu_tensor = torch.zeros( + (max_num_reqs, max_model_len), + device="cpu", + dtype=torch.int32, + pin_memory=False, + ) + self.token_ids_cpu = self.token_ids_cpu_tensor.numpy() + self.num_tokens = np.zeros(max_num_reqs, dtype=np.int32) + self.num_tokens_no_spec = np.zeros(max_num_reqs, dtype=np.int32) + self.num_prompt_tokens = np.zeros(max_num_reqs, dtype=np.int32) + self.num_computed_tokens_cpu_tensor = torch.zeros( + (max_num_reqs, ), + device="cpu", + dtype=torch.int32, + pin_memory=pin_memory, + ) + self.num_computed_tokens_cpu = \ + self.num_computed_tokens_cpu_tensor.numpy() + + # Block table. + self.block_table = BlockTable( + max_num_reqs=max_num_reqs, + max_num_blocks_per_req=max_num_blocks_per_req, + pin_memory=pin_memory, + device=device, + ) + + # Sampling-related. + self.temperature = torch.empty((max_num_reqs, ), + dtype=torch.float32, + device=device) + self.temperature_cpu_tensor = torch.empty((max_num_reqs, ), + dtype=torch.float32, + device="cpu", + pin_memory=pin_memory) + self.temperature_cpu = self.temperature_cpu_tensor.numpy() + self.greedy_reqs: set[str] = set() + self.random_reqs: set[str] = set() + + self.top_p = torch.empty((max_num_reqs, ), + dtype=torch.float32, + device=device) + self.top_p_cpu_tensor = torch.empty((max_num_reqs, ), + dtype=torch.float32, + device="cpu", + pin_memory=pin_memory) + self.top_p_cpu = self.top_p_cpu_tensor.numpy() + self.top_p_reqs: set[str] = set() + + self.top_k = torch.empty((max_num_reqs, ), + dtype=torch.int32, + device=device) + self.top_k_cpu_tensor = torch.empty((max_num_reqs, ), + dtype=torch.int32, + device="cpu", + pin_memory=pin_memory) + self.top_k_cpu = self.top_k_cpu_tensor.numpy() + self.top_k_reqs: set[str] = set() + + self.min_p = torch.empty((max_num_reqs, ), + dtype=torch.float32, + device=device) + self.min_p_cpu_tensor = torch.empty((max_num_reqs, ), + dtype=torch.float32, + device="cpu", + pin_memory=pin_memory) + self.min_p_cpu = self.min_p_cpu_tensor.numpy() + self.min_p_reqs: set[str] = set() + + # Frequency penalty related data structures + self.frequency_penalties = torch.empty((max_num_reqs, ), + dtype=torch.float, + device=device) + self.frequency_penalties_cpu_tensor = torch.empty( + (max_num_reqs, ), + dtype=torch.float, + device="cpu", + pin_memory=pin_memory) + self.frequency_penalties_cpu = \ + self.frequency_penalties_cpu_tensor.numpy() + self.frequency_penalties_reqs: set[str] = set() + + # Presence penalty related data structures + self.presence_penalties = torch.empty((max_num_reqs, ), + dtype=torch.float, + device=device) + self.presence_penalties_cpu_tensor = torch.empty((max_num_reqs, ), + dtype=torch.float, + device="cpu", + pin_memory=pin_memory) + self.presence_penalties_cpu = self.presence_penalties_cpu_tensor.numpy( + ) + self.presence_penalties_reqs: set[str] = set() + + # Repetition penalty related data structures + self.repetition_penalties = torch.empty((max_num_reqs, ), + dtype=torch.float, + device=device) + self.repetition_penalties_cpu_tensor = torch.empty( + (max_num_reqs, ), + dtype=torch.float, + device="cpu", + pin_memory=pin_memory) + self.repetition_penalties_cpu = \ + self.repetition_penalties_cpu_tensor.numpy() + self.repetition_penalties_reqs: set[str] = set() + + # req_index -> (min_tokens, stop_token_ids) + self.min_tokens: dict[int, tuple[int, set[int]]] = {} + + # lora related + self.request_lora_mapping = np.zeros((self.max_num_reqs, ), + dtype=np.int32) + self.lora_id_to_request_ids: dict[int, set[str]] = {} + self.lora_id_to_lora_request: dict[int, LoRARequest] = {} + + # req_index -> generator + # NOTE(woosuk): The indices of the requests that do not have their own + # generator should not be included in the dictionary. + self.generators: dict[int, torch.Generator] = {} + + self.num_logprobs: dict[str, int] = {} + # NOTE(rob): num_prompt_logprobs only includes reqs + # that are currently in the prefill phase. + self.num_prompt_logprobs: dict[str, int] = {} + + # To accumulate prompt logprobs tensor chunks across prefill steps. + self.in_progress_prompt_logprobs_cpu: dict[str, LogprobsTensors] = {} + + self.logit_bias: list[Optional[dict[int, + float]]] = [None] * max_num_reqs + self.has_allowed_token_ids: set[str] = set() + # NOTE(lufang): In the mask tensor, if the corresponding token allowed, + # the value is False. Since we use masked_fill_ to set -inf. + self.allowed_token_ids_mask: Optional[torch.Tensor] = None + self.allowed_token_ids_mask_cpu_tensor: Optional[torch.Tensor] = None + + # req_index -> bad_words_token_ids + self.bad_words_token_ids: dict[int, list[list[int]]] = {} + + self.req_output_token_ids: list[Optional[list[int]]] = [] + + @property + def req_ids(self) -> list[str]: + # None elements should only be present transiently + # while performing state updates to the batch. + return cast(list[str], self._req_ids) + + def add_request( + self, + request: "CachedRequestState", + req_index: Optional[int] = None, + ) -> None: + if req_index is None: + req_index = self.num_reqs + assert req_index < self.max_num_reqs + + req_id = request.req_id + if req_index == len(self._req_ids): + self._req_ids.append(req_id) + self.req_output_token_ids.append(request.output_token_ids) + else: + self._req_ids[req_index] = req_id + self.req_output_token_ids[req_index] = request.output_token_ids + + self.req_id_to_index[req_id] = req_index + + # Copy the prompt token ids and output token ids. + num_prompt_tokens = len(request.prompt_token_ids) + self.num_prompt_tokens[req_index] = num_prompt_tokens + self.token_ids_cpu[ + req_index, :num_prompt_tokens] = request.prompt_token_ids + start_idx = num_prompt_tokens + end_idx = start_idx + len(request.output_token_ids) + self.token_ids_cpu[req_index, + start_idx:end_idx] = request.output_token_ids + # Number of token ids in token_ids_cpu. + # NOTE(woosuk): This may include spec decode tokens. + self.num_tokens[req_index] = request.num_tokens + # Number of tokens without spec decode tokens. + self.num_tokens_no_spec[req_index] = request.num_tokens + + self.num_computed_tokens_cpu[req_index] = request.num_computed_tokens + self.block_table.add_row(request.block_ids, req_index) + + sampling_params = request.sampling_params + if sampling_params.sampling_type == SamplingType.GREEDY: + # Avoid later division by zero. + self.temperature_cpu[req_index] = -1.0 + self.greedy_reqs.add(req_id) + else: + self.temperature_cpu[req_index] = sampling_params.temperature + self.random_reqs.add(req_id) + + self.top_p_cpu[req_index] = sampling_params.top_p + if sampling_params.top_p < 1: + self.top_p_reqs.add(req_id) + top_k = sampling_params.top_k + if 0 < top_k < self.vocab_size: + self.top_k_reqs.add(req_id) + else: + top_k = self.vocab_size + self.top_k_cpu[req_index] = top_k + self.min_p_cpu[req_index] = sampling_params.min_p + self.frequency_penalties_cpu[ + req_index] = sampling_params.frequency_penalty + if sampling_params.min_p > _SAMPLING_EPS: + self.min_p_reqs.add(req_id) + if sampling_params.frequency_penalty != 0.0: + self.frequency_penalties_reqs.add(req_id) + self.presence_penalties_cpu[ + req_index] = sampling_params.presence_penalty + if sampling_params.presence_penalty != 0.0: + self.presence_penalties_reqs.add(req_id) + self.repetition_penalties_cpu[ + req_index] = sampling_params.repetition_penalty + if sampling_params.repetition_penalty != 1.0: + self.repetition_penalties_reqs.add(req_id) + if sampling_params.min_tokens: + self.min_tokens[req_index] = (sampling_params.min_tokens, + sampling_params.all_stop_token_ids) + + # NOTE(woosuk): self.generators should not include the requests that + # do not have their own generator. + if request.generator is not None: + self.generators[req_index] = request.generator + + if sampling_params.logprobs is not None: + self.num_logprobs[req_id] = sampling_params.logprobs + if sampling_params.prompt_logprobs is not None: + self.num_prompt_logprobs[req_id] = sampling_params.prompt_logprobs + if sampling_params.logit_bias is not None: + self.logit_bias[req_index] = sampling_params.logit_bias + + if sampling_params.allowed_token_ids: + self.has_allowed_token_ids.add(req_id) + if self.allowed_token_ids_mask_cpu_tensor is None: + # Lazy allocation for this tensor, which can be large. + # False means we don't fill with -inf. + self.allowed_token_ids_mask = torch.zeros(self.max_num_reqs, + self.vocab_size, + dtype=torch.bool, + device=self.device) + self.allowed_token_ids_mask_cpu_tensor = torch.zeros( + self.max_num_reqs, + self.vocab_size, + dtype=torch.bool, + device="cpu") + self.allowed_token_ids_mask_cpu_tensor[req_index] = True + # False means we don't fill with -inf. + self.allowed_token_ids_mask_cpu_tensor[req_index][ + sampling_params.allowed_token_ids] = False + + if sampling_params.bad_words_token_ids: + self.bad_words_token_ids[ + req_index] = sampling_params.bad_words_token_ids + + # Add request lora ID + if request.lora_request: + lora_id = request.lora_request.lora_int_id + if lora_id not in self.lora_id_to_request_ids: + self.lora_id_to_request_ids[lora_id] = set() + + self.request_lora_mapping[req_index] = lora_id + self.lora_id_to_request_ids[lora_id].add(request.req_id) + self.lora_id_to_lora_request[lora_id] = request.lora_request + else: + # No LoRA + self.request_lora_mapping[req_index] = 0 + + def remove_request(self, req_id: str) -> Optional[int]: + """This method must always be followed by a call to condense().""" + + req_index = self.req_id_to_index.pop(req_id, None) + if req_index is None: + return None + self._req_ids[req_index] = None + self.req_output_token_ids[req_index] = None + + self.greedy_reqs.discard(req_id) + self.random_reqs.discard(req_id) + self.top_p_reqs.discard(req_id) + self.top_k_reqs.discard(req_id) + self.min_p_reqs.discard(req_id) + self.min_tokens.pop(req_index, None) + self.frequency_penalties_reqs.discard(req_id) + self.presence_penalties_reqs.discard(req_id) + self.repetition_penalties_reqs.discard(req_id) + self.generators.pop(req_index, None) + self.num_logprobs.pop(req_id, None) + self.num_prompt_logprobs.pop(req_id, None) + self.in_progress_prompt_logprobs_cpu.pop(req_id, None) + + # LoRA + lora_id = self.request_lora_mapping[req_index] + if lora_id != 0: + self.lora_id_to_request_ids[lora_id].discard(req_id) + if len(self.lora_id_to_request_ids[lora_id]) == 0: + self.lora_id_to_request_ids.pop(lora_id) + self.lora_id_to_lora_request.pop(lora_id) + self.request_lora_mapping[req_index] = 0 + + self.logit_bias[req_index] = None + self.has_allowed_token_ids.discard(req_id) + if self.allowed_token_ids_mask_cpu_tensor is not None: + # False means we don't fill with -inf. + self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(False) + self.bad_words_token_ids.pop(req_index, None) + return req_index + + def swap_states(self, i1: int, i2: int) -> None: + old_id_i1 = self._req_ids[i1] + old_id_i2 = self._req_ids[i2] + self._req_ids[i1], self._req_ids[i2] =\ + self._req_ids[i2], self._req_ids[i1] # noqa + self.req_output_token_ids[i1], self.req_output_token_ids[i2] =\ + self.req_output_token_ids[i2], self.req_output_token_ids[i1] + assert old_id_i1 is not None and old_id_i2 is not None + self.req_id_to_index[old_id_i1], self.req_id_to_index[old_id_i2] =\ + self.req_id_to_index[old_id_i2], self.req_id_to_index[old_id_i1] + self.num_tokens[i1], self.num_tokens[i2] =\ + self.num_tokens[i2], self.num_tokens[i1] + self.num_tokens_no_spec[i1], self.num_tokens_no_spec[i2] =\ + self.num_tokens_no_spec[i2], self.num_tokens_no_spec[i1] + self.num_prompt_tokens[i1], self.num_prompt_tokens[i2] =\ + self.num_prompt_tokens[i2], self.num_prompt_tokens[i1] + self.num_computed_tokens_cpu[i1], self.num_computed_tokens_cpu[i2] =\ + self.num_computed_tokens_cpu[i2], self.num_computed_tokens_cpu[i1] + self.temperature_cpu[i1], self.temperature_cpu[i2] =\ + self.temperature_cpu[i2], self.temperature_cpu[i1] + self.top_p_cpu[i1], self.top_p_cpu[i2] =\ + self.top_p_cpu[i2], self.top_p_cpu[i1] + self.top_k_cpu[i1], self.top_k_cpu[i2] =\ + self.top_k_cpu[i2], self.top_k_cpu[i1] + self.frequency_penalties_cpu[i1], self.frequency_penalties_cpu[i2] =\ + self.frequency_penalties_cpu[i2], self.frequency_penalties_cpu[i1] + self.presence_penalties_cpu[i1], self.presence_penalties_cpu[i2] =\ + self.presence_penalties_cpu[i2], self.presence_penalties_cpu[i1] + self.repetition_penalties_cpu[i1], self.repetition_penalties_cpu[i2] =\ + self.repetition_penalties_cpu[i2], self.repetition_penalties_cpu[i1] + self.min_p_cpu[i1], self.min_p_cpu[i2] =\ + self.min_p_cpu[i2], self.min_p_cpu[i1] + + # NOTE: the following is unsafe + # self.token_ids_cpu[i1, ...], self.token_ids_cpu[i2, ...], =\ + # self.token_ids_cpu[i2, ...], self.token_ids_cpu[i1, ...] + # instead, we need to temporiarily copy the data for one of the indices + # TODO(lucas): optimize this by only copying valid indices + tmp = self.token_ids_cpu[i1, ...].copy() + self.token_ids_cpu[i1, ...] = self.token_ids_cpu[i2, ...] + self.token_ids_cpu[i2, ...] = tmp + + swap_dict_values(self.generators, i1, i2) + swap_dict_values(self.min_tokens, i1, i2) + swap_dict_values(self.bad_words_token_ids, i1, i2) + + self.request_lora_mapping[i1], self.request_lora_mapping[i2] =\ + self.request_lora_mapping[i2], self.request_lora_mapping[i1] + self.logit_bias[i1], self.logit_bias[i2] =\ + self.logit_bias[i2], self.logit_bias[i1] + + if self.allowed_token_ids_mask_cpu_tensor is not None: + self.allowed_token_ids_mask_cpu_tensor[i1], \ + self.allowed_token_ids_mask_cpu_tensor[i2] =\ + self.allowed_token_ids_mask_cpu_tensor[i2], \ + self.allowed_token_ids_mask_cpu_tensor[i1] + self.block_table.swap_row(i1, i2) + + def condense(self, empty_req_indices: list[int]) -> None: + num_reqs = self.num_reqs + if num_reqs == 0: + # The batched states are empty. + self._req_ids.clear() + self.req_output_token_ids.clear() + return + + # NOTE(woosuk): This function assumes that the empty_req_indices + # is sorted in descending order. + last_req_index = num_reqs + len(empty_req_indices) - 1 + while empty_req_indices: + # Find the largest non-empty index. + while last_req_index in empty_req_indices: + last_req_index -= 1 + + # Find the smallest empty index. + empty_index = empty_req_indices.pop() + if empty_index >= last_req_index: + break + + # Swap the states. + req_id = self._req_ids[last_req_index] + output_token_ids = self.req_output_token_ids[last_req_index] + assert req_id is not None + self._req_ids[empty_index] = req_id + self._req_ids[last_req_index] = None + self.req_output_token_ids[empty_index] = output_token_ids + self.req_output_token_ids[last_req_index] = None + self.req_id_to_index[req_id] = empty_index + + num_tokens = self.num_tokens[last_req_index] + self.token_ids_cpu[empty_index, :num_tokens] = self.token_ids_cpu[ + last_req_index, :num_tokens] + self.num_tokens[empty_index] = num_tokens + self.num_tokens_no_spec[empty_index] = self.num_tokens_no_spec[ + last_req_index] + self.num_prompt_tokens[empty_index] = self.num_prompt_tokens[ + last_req_index] + self.num_computed_tokens_cpu[ + empty_index] = self.num_computed_tokens_cpu[last_req_index] + self.block_table.move_row(last_req_index, empty_index) + self.temperature_cpu[empty_index] = self.temperature_cpu[ + last_req_index] + self.top_p_cpu[empty_index] = self.top_p_cpu[last_req_index] + self.top_k_cpu[empty_index] = self.top_k_cpu[last_req_index] + self.frequency_penalties_cpu[ + empty_index] = self.frequency_penalties_cpu[last_req_index] + self.presence_penalties_cpu[ + empty_index] = self.presence_penalties_cpu[last_req_index] + self.repetition_penalties_cpu[ + empty_index] = self.repetition_penalties_cpu[last_req_index] + self.min_p_cpu[empty_index] = self.min_p_cpu[last_req_index] + generator = self.generators.pop(last_req_index, None) + if generator is not None: + self.generators[empty_index] = generator + + min_token = self.min_tokens.pop(last_req_index, None) + if min_token is not None: + self.min_tokens[empty_index] = min_token + + self.request_lora_mapping[empty_index] = self.request_lora_mapping[ + last_req_index] + + self.logit_bias[empty_index] = self.logit_bias[last_req_index] + + if self.allowed_token_ids_mask_cpu_tensor is not None: + self.allowed_token_ids_mask_cpu_tensor[ + empty_index] = self.allowed_token_ids_mask_cpu_tensor[ + last_req_index] + + bad_words_token_ids = self.bad_words_token_ids.pop( + last_req_index, None) + if bad_words_token_ids is not None: + self.bad_words_token_ids[empty_index] = bad_words_token_ids + # Decrement last_req_index since it is now empty. + last_req_index -= 1 + + # Trim lists to the batch size. + del self._req_ids[self.num_reqs:] + del self.req_output_token_ids[self.num_reqs:] + + def _make_prompt_token_ids_tensor(self) -> torch.Tensor: + max_prompt_len = self.num_prompt_tokens[:self.num_reqs].max() + prompt_token_ids_cpu_tensor = torch.empty( + (self.num_reqs, max_prompt_len), + device="cpu", + dtype=torch.int64, + pin_memory=self.pin_memory, + ) + prompt_token_ids = prompt_token_ids_cpu_tensor.numpy() + prompt_token_ids[:] = self.token_ids_cpu[:self. + num_reqs, :max_prompt_len] + # Use the value of vocab_size as a pad since we don't have a + # token_id of this value. + for i in range(self.num_reqs): + prompt_token_ids[i, self.num_prompt_tokens[i]:] = self.vocab_size + return prompt_token_ids_cpu_tensor.to(device=self.device, + non_blocking=True) + + def make_lora_inputs( + self, num_scheduled_tokens: np.ndarray + ) -> tuple[tuple[int, ...], tuple[int, ...], set[LoRARequest]]: + """ + Given the num_scheduled_tokens for each request in the batch, return + datastructures used to activate the current LoRAs. + Returns: + 1. prompt_lora_mapping: A tuple of size self.num_reqs where, + prompt_lora_mapping[i] is the LoRA id to use for the ith prompt. + 2. token_lora_mapping: A tuple of size np.sum(num_scheduled_tokens) + where, token_lora_mapping[i] is the LoRA id to use for ith token. + 3. lora_requests: Set of relevant LoRA requests. + """ + + req_lora_mapping = self.request_lora_mapping[:self.num_reqs] + prompt_lora_mapping = tuple(req_lora_mapping) + token_lora_mapping = tuple( + req_lora_mapping.repeat(num_scheduled_tokens)) + active_lora_requests: set[LoRARequest] = set( + self.lora_id_to_lora_request.values()) + + return prompt_lora_mapping, token_lora_mapping, active_lora_requests + + @property + def num_reqs(self) -> int: + return len(self.req_id_to_index) + + @property + def all_greedy(self) -> bool: + return len(self.random_reqs) == 0 + + @property + def all_random(self) -> bool: + return len(self.greedy_reqs) == 0 + + @property + def no_top_p(self) -> bool: + return len(self.top_p_reqs) == 0 + + @property + def no_top_k(self) -> bool: + return len(self.top_k_reqs) == 0 + + @property + def no_min_p(self) -> bool: + return len(self.min_p_reqs) == 0 + + @property + def no_penalties(self) -> bool: + return (len(self.presence_penalties_reqs) == 0 + and len(self.frequency_penalties_reqs) == 0 + and len(self.repetition_penalties_reqs) == 0) + + @property + def max_num_logprobs(self) -> Optional[int]: + return max(self.num_logprobs.values()) if self.num_logprobs else None + + @property + def no_prompt_logprob(self) -> bool: + return not self.num_prompt_logprobs + + @property + def no_allowed_token_ids(self) -> bool: + return len(self.has_allowed_token_ids) == 0 diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index db91f199f11..6b5c0662b80 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -35,7 +35,7 @@ from vllm.v1.sample.tpu.metadata import TPUSupportedSamplingMetadata from vllm.v1.sample.tpu.sampler import Sampler as TPUSampler from vllm.v1.utils import bind_kv_cache -from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch +from vllm.v1.worker.tpu_input_batch import CachedRequestState, InputBatch from .utils import (gather_mm_placeholders, sanity_check_mm_encoder_outputs, scatter_mm_placeholders) From 066761d931d41aa7318cbfb9a285eb170cf3f752 Mon Sep 17 00:00:00 2001 From: Andrew Feldman Date: Mon, 5 May 2025 11:00:28 +0000 Subject: [PATCH 014/180] merge Signed-off-by: Andrew Feldman --- .../configs/DeepSeek-V2-Lite-Chat.yaml | 1 + ...lama-3-70B-Instruct-FBGEMM-nonuniform.yaml | 1 + .../configs/Meta-Llama-3-70B-Instruct.yaml | 1 + ...struct-Channelwise-compressed-tensors.yaml | 1 + ...Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml | 1 + ...-3-8B-Instruct-FP8-compressed-tensors.yaml | 1 + .../configs/Meta-Llama-3-8B-Instruct-FP8.yaml | 1 + ...Instruct-INT8-compressed-tensors-asym.yaml | 1 + ...3-8B-Instruct-INT8-compressed-tensors.yaml | 1 + ...nstruct-nonuniform-compressed-tensors.yaml | 1 + .../configs/Meta-Llama-3-8B-Instruct.yaml | 3 +- .../configs/Meta-Llama-3-8B-QQQ.yaml | 1 + ...2-1B-Instruct-INT8-compressed-tensors.yaml | 1 + .../configs/Minitron-4B-Base-FP8.yaml | 1 + ...xtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml | 1 + .../Mixtral-8x7B-Instruct-v0.1-FP8.yaml | 1 + .../configs/Mixtral-8x7B-Instruct-v0.1.yaml | 3 +- .../Qwen1.5-MoE-W4A16-compressed-tensors.yaml | 5 +- .../configs/Qwen2-1.5B-Instruct-FP8W8.yaml | 1 + ...1.5B-Instruct-INT8-compressed-tensors.yaml | 1 + ....5B-Instruct-W8A16-compressed-tensors.yaml | 1 + .../configs/Qwen2-57B-A14-Instruct.yaml | 1 + .../SparseLlama3.1_2of4_fp8_compressed.yaml | 1 + .../test_lm_eval_correctness.py | 2 +- .buildkite/release-pipeline.yaml | 27 +- .../scripts/hardware_ci/run-amd-test.sh | 67 +- .../hardware_ci/run-cpu-test-ppc64le.sh | 35 +- .../scripts/hardware_ci/run-tpu-v1-test.sh | 11 +- .buildkite/scripts/upload-wheels.sh | 18 +- .buildkite/test-pipeline.yaml | 160 +- .github/CODEOWNERS | 1 + .github/ISSUE_TEMPLATE/200-installation.yml | 2 +- .github/ISSUE_TEMPLATE/300-usage.yml | 2 +- .github/ISSUE_TEMPLATE/400-bug-report.yml | 6 +- .../700-performance-discussion.yml | 2 +- .github/mergify.yml | 34 +- .github/workflows/lint-and-deploy.yaml | 4 +- .gitignore | 5 +- .pre-commit-config.yaml | 15 +- CMakeLists.txt | 51 +- benchmarks/auto_tune.sh | 212 ++ benchmarks/backend_request_func.py | 109 + benchmarks/benchmark_dataset.py | 134 ++ benchmarks/benchmark_prefix_caching.py | 14 +- benchmarks/benchmark_serving.py | 56 +- .../benchmark_serving_structured_output.py | 23 +- benchmarks/benchmark_throughput.py | 7 + benchmarks/kernels/benchmark_bitblas.py | 236 +++ .../kernels/benchmark_grouped_gemm_cutlass.py | 3 +- benchmarks/kernels/benchmark_lora.py | 10 +- benchmarks/kernels/benchmark_moe.py | 36 +- .../benchmark_moe_permute_unpermute.py | 349 ++++ cmake/external_projects/vllm_flash_attn.cmake | 2 +- csrc/attention/merge_attn_states.cu | 25 +- csrc/attention/mla/cutlass_mla_entry.cu | 38 + csrc/attention/mla/cutlass_mla_kernels.cu | 225 +++ csrc/cache_kernels.cu | 39 +- csrc/core/math.hpp | 19 + csrc/moe/marlin_kernels/marlin_moe_kernel.h | 8 +- csrc/moe/marlin_moe_wna16/marlin_template.h | 8 +- csrc/moe/moe_permute_unpermute_op.cu | 133 ++ csrc/moe/moe_wna16.cu | 10 +- csrc/moe/moe_wna16_utils.h | 16 +- csrc/moe/permute_unpermute_kernels/dispatch.h | 53 + .../moe_permute_unpermute_kernel.cu | 229 +++ .../moe_permute_unpermute_kernel.h | 95 + .../moe_permute_unpermute_kernel.inl | 211 ++ csrc/moe/torch_bindings.cpp | 22 + csrc/ops.h | 9 + csrc/quantization/activation_kernels.cu | 120 ++ .../quantization/cutlass_w8a8/moe/moe_data.cu | 17 +- .../scaled_mm_c2x_sm89_fp8_dispatch.cuh | 2 +- .../scaled_mm_c2x_sm89_int8_dispatch.cuh | 2 +- .../fp4/nvfp4_scaled_mm_kernels.cu | 2 +- ...fused_layernorm_dynamic_per_token_quant.cu | 2 +- .../gptq_allspark/allspark_qgemm_w8a16.cu | 2 +- csrc/quantization/gptq_marlin/gptq_marlin.cu | 16 +- .../marlin/dense/marlin_cuda_kernel.cu | 4 +- .../marlin/qqq/marlin_qqq_gemm_kernel.cu | 4 +- csrc/quantization/marlin/sparse/common/mma.h | 4 +- csrc/rocm/attention.cu | 11 +- csrc/rocm/ops.h | 9 + csrc/rocm/skinny_gemms.cu | 1600 +++++++++++++++ csrc/rocm/torch_bindings.cpp | 18 + csrc/torch_bindings.cpp | 13 +- docker/Dockerfile | 55 +- docker/Dockerfile.cpu | 1 + docker/Dockerfile.nightly_torch | 313 +++ docker/Dockerfile.rocm | 10 +- docker/Dockerfile.rocm_base | 7 +- docker/Dockerfile.s390x | 3 +- docker/Dockerfile.tpu | 2 +- docker/Dockerfile.xpu | 6 - docs/Makefile | 1 + docs/source/api/engine/async_llm_engine.md | 7 - docs/source/api/engine/index.md | 17 - docs/source/api/engine/llm_engine.md | 7 - docs/source/api/inference_params.md | 21 - docs/source/api/model/adapters.md | 9 - docs/source/api/model/index.md | 11 - docs/source/api/model/interfaces.md | 9 - docs/source/api/model/interfaces_base.md | 9 - docs/source/api/multimodal/index.md | 28 - docs/source/api/multimodal/inputs.md | 49 - docs/source/api/multimodal/parse.md | 9 - docs/source/api/multimodal/processing.md | 9 - docs/source/api/multimodal/profiling.md | 9 - docs/source/api/multimodal/registry.md | 9 - docs/source/api/offline_inference/index.md | 9 - docs/source/api/offline_inference/llm.md | 7 - .../api/offline_inference/llm_inputs.md | 19 - docs/source/api/summary.md | 133 ++ .../deployment/anything-llm-chat-with-doc.png | Bin 0 -> 120834 bytes .../anything-llm-chat-without-doc.png | Bin 0 -> 138979 bytes .../deployment/anything-llm-provider.png | Bin 0 -> 112470 bytes .../deployment/anything-llm-upload-doc.png | Bin 0 -> 114117 bytes docs/source/assets/deployment/open_webui.png | Bin 0 -> 69283 bytes .../assets/deployment/streamlit-chat.png | Bin 0 -> 108553 bytes docs/source/autodoc2_docstring_parser.py | 21 + docs/source/conf.py | 131 +- .../source/contributing/deprecation_policy.md | 87 + docs/source/contributing/model/multimodal.md | 62 +- docs/source/contributing/overview.md | 12 +- docs/source/deployment/docker.md | 12 + .../deployment/frameworks/anything-llm.md | 47 + docs/source/deployment/frameworks/index.md | 3 + .../deployment/frameworks/open-webui.md | 29 + .../source/deployment/frameworks/streamlit.md | 42 + .../integrations/production-stack.md | 2 +- docs/source/deployment/security.md | 58 + docs/source/design/arch_overview.md | 4 +- docs/source/design/mm_processing.md | 2 +- docs/source/design/v1/metrics.md | 18 +- docs/source/design/v1/prefix_caching.md | 20 +- docs/source/design/v1/torch_compile.md | 4 +- docs/source/features/compatibility_matrix.md | 6 +- docs/source/features/disagg_prefill.md | 4 +- docs/source/features/lora.md | 61 +- docs/source/features/quantization/auto_awq.md | 4 +- docs/source/features/quantization/bitblas.md | 48 + docs/source/features/quantization/bnb.md | 2 +- docs/source/features/quantization/fp8.md | 19 +- .../source/features/quantization/gptqmodel.md | 21 +- docs/source/features/quantization/index.md | 2 + docs/source/features/quantization/int4.md | 8 +- docs/source/features/quantization/int8.md | 8 +- docs/source/features/quantization/modelopt.md | 78 + .../quantization/quantized_kvcache.md | 2 +- docs/source/features/quantization/quark.md | 7 + .../quantization/supported_hardware.md | 23 +- docs/source/features/quantization/torchao.md | 3 +- docs/source/features/reasoning_outputs.md | 14 +- docs/source/features/structured_outputs.md | 55 +- docs/source/features/tool_calling.md | 28 +- .../ai_accelerator/hpu-gaudi.inc.md | 34 +- .../installation/ai_accelerator/tpu.inc.md | 10 +- .../getting_started/installation/cpu.md | 2 +- .../installation/cpu/build.inc.md | 8 +- .../installation/gpu/cuda.inc.md | 14 +- .../installation/gpu/rocm.inc.md | 17 +- .../installation/gpu/xpu.inc.md | 11 +- .../source/getting_started/troubleshooting.md | 2 +- docs/source/getting_started/v1_user_guide.md | 7 +- docs/source/index.md | 9 +- .../models/extensions/fastsafetensor.md | 2 +- .../models/extensions/runai_model_streamer.md | 26 + docs/source/models/generative_models.md | 4 +- docs/source/models/pooling_models.md | 76 +- docs/source/models/supported_models.md | 154 +- docs/source/performance/optimization.md | 187 +- docs/source/serving/distributed_serving.md | 4 + docs/source/serving/engine_args.md | 2 + docs/source/serving/multimodal_inputs.md | 2 +- docs/source/serving/offline_inference.md | 85 +- .../serving/openai_compatible_server.md | 27 +- examples/lmcache/README.md | 56 + examples/lmcache/cpu_offload_lmcache.py | 151 ++ .../disagg_prefill_lmcache_v0.py} | 0 .../configs/lmcache-decoder-config.yaml | 13 + .../configs/lmcache-prefiller-config.yaml | 13 + .../disagg_example_nixl.sh | 136 ++ .../disagg_proxy_server.py | 193 ++ .../disagg_vllm_launcher.sh | 59 + .../lmcache/kv_cache_sharing_lmcache_v1.py | 130 ++ examples/offline_inference/audio_language.py | 65 +- .../offline_inference/batch_llm_inference.py | 90 + .../offline_inference/cpu_offload_lmcache.py | 65 - .../decode_example.py | 36 + .../prefill_example.py | 43 + .../disaggregated-prefill-v1/run.sh | 5 + examples/offline_inference/distributed.py | 109 - examples/offline_inference/eagle.py | 24 +- .../encoder_decoder_multimodal.py | 3 +- .../offline_inference/llm_engine_example.py | 12 +- examples/offline_inference/mistral-small.py | 5 +- examples/offline_inference/profiling.py | 2 +- .../offline_inference/qwen2_5_omni/README.md | 32 + .../qwen2_5_omni/only_thinker.py | 159 ++ examples/offline_inference/vision_language.py | 79 +- .../vision_language_multi_image.py | 40 +- .../online_serving/chart-helm/values.yaml | 2 +- .../gradio_openai_chatbot_webserver.py | 4 - examples/online_serving/kv_events.sh | 86 + .../online_serving/kv_events_subscriber.py | 114 ++ ...i_chat_completion_client_for_multimodal.py | 20 +- ...penai_chat_completion_client_with_tools.py | 195 +- ...t_completion_client_with_tools_required.py | 58 +- ...enai_chat_completion_structured_outputs.py | 195 +- ...etion_structured_outputs_structural_tag.py | 85 + ...etion_structured_outputs_with_reasoning.py | 160 +- ...at_completion_tool_calls_with_reasoning.py | 160 +- .../openai_chat_completion_with_reasoning.py | 65 +- ...hat_completion_with_reasoning_streaming.py | 84 +- ...ai_chat_embedding_client_for_multimodal.py | 11 +- .../openai_completion_client.py | 58 +- .../openai_cross_encoder_score.py | 23 +- .../online_serving/openai_embedding_client.py | 45 +- .../openai_embedding_matryoshka_fy.py | 36 + .../online_serving/openai_pooling_client.py | 15 +- .../openai_transcription_client.py | 15 +- examples/online_serving/ray_serve_deepseek.py | 48 + .../streamlit_openai_chatbot_webserver.py | 185 ++ examples/tool_chat_template_llama4_json.jinja | 116 ++ examples/tool_chat_template_mistral3.jinja | 119 ++ pyproject.toml | 17 +- requirements/build.txt | 6 +- requirements/common.txt | 6 +- requirements/cpu.txt | 11 +- requirements/cuda.txt | 9 +- requirements/docs.txt | 24 +- requirements/hpu.txt | 4 +- requirements/neuron.txt | 2 + requirements/nightly_torch_test.txt | 33 + requirements/rocm-build.txt | 11 +- requirements/rocm.txt | 3 +- requirements/test.in | 12 +- requirements/test.txt | 168 +- requirements/tpu.txt | 3 +- requirements/xpu.txt | 10 +- setup.py | 29 +- tests/compile/test_basic_correctness.py | 3 +- tests/compile/test_full_graph.py | 6 +- tests/compile/test_functionalization.py | 33 +- tests/compile/test_fusion.py | 9 +- tests/compile/test_pass_manager.py | 9 +- tests/compile/test_sequence_parallelism.py | 190 ++ tests/compile/test_silu_mul_quant_fusion.py | 74 + tests/conftest.py | 122 +- tests/core/block/e2e/test_correctness.py | 6 +- tests/core/test_scheduler.py | 74 +- tests/core/utils.py | 11 +- tests/distributed/conftest.py | 145 ++ tests/distributed/test_comm_ops.py | 31 +- tests/distributed/test_events.py | 193 ++ tests/distributed/test_pipeline_parallel.py | 6 +- tests/distributed/test_sequence_parallel.py | 296 +++ tests/engine/test_arg_utils.py | 172 +- tests/engine/test_options.py | 60 + tests/engine/test_skip_tokenizer_init.py | 29 - tests/entrypoints/llm/test_chat.py | 127 +- tests/entrypoints/llm/test_guided_generate.py | 304 ++- .../test_transcription_api_correctness.py | 1 + tests/entrypoints/openai/test_audio.py | 33 +- .../openai/test_chat_with_tool_reasoning.py | 6 +- tests/entrypoints/openai/test_cli_args.py | 14 +- tests/entrypoints/openai/test_embedding.py | 42 +- .../openai/test_embedding_dimensions.py | 140 +- .../entrypoints/openai/test_lora_resolvers.py | 209 ++ .../entrypoints/openai/test_openai_schema.py | 49 + tests/entrypoints/openai/test_serving_chat.py | 40 + .../openai/test_transcription_validation.py | 33 + tests/entrypoints/openai/test_truncation.py | 103 + tests/entrypoints/openai/test_video.py | 33 +- tests/entrypoints/openai/test_vision.py | 34 +- .../openai/test_vision_embedding.py | 4 +- tests/kernels/{ => attention}/conftest.py | 0 .../kernels/{ => attention}/test_attention.py | 3 +- .../attention/test_attention_selector.py | 252 +++ .../test_blocksparse_attention.py | 3 +- tests/kernels/{ => attention}/test_cache.py | 60 +- .../test_cascade_flash_attn.py | 0 .../test_encoder_decoder_attn.py | 0 .../{ => attention}/test_flash_attn.py | 2 +- .../{ => attention}/test_flashinfer.py | 0 .../kernels/{ => attention}/test_flashmla.py | 0 .../{ => attention}/test_lightning_attn.py | 0 .../{ => attention}/test_merge_attn_states.py | 0 .../kernels/{ => attention}/test_mha_attn.py | 0 .../{ => attention}/test_mla_decode_cpu.py | 0 .../{ => attention}/test_prefix_prefill.py | 0 .../attention/test_rocm_attention_selector.py | 61 + .../test_triton_decode_attention.py | 0 tests/kernels/{ => core}/test_activation.py | 3 +- .../{ => core}/test_fused_quant_layernorm.py | 0 tests/kernels/{ => core}/test_layernorm.py | 0 tests/kernels/core/test_opcheck.py | 25 + tests/kernels/{ => core}/test_permute_cols.py | 0 tests/kernels/{ => core}/test_pos_encoding.py | 3 +- .../{ => core}/test_rotary_embedding.py | 0 tests/kernels/{ => core}/test_uva.py | 0 .../kernels/{ => mamba}/test_causal_conv1d.py | 0 .../kernels/{ => mamba}/test_mamba_mixer2.py | 0 tests/kernels/{ => mamba}/test_mamba_ssm.py | 0 .../kernels/{ => mamba}/test_mamba_ssm_ssd.py | 0 tests/kernels/moe/test_cutlass_moe.py | 364 ++++ tests/kernels/{ => moe}/test_moe.py | 3 +- .../kernels/moe/test_moe_permute_unpermute.py | 223 +++ .../{ => moe}/test_triton_moe_ptpc_fp8.py | 0 tests/kernels/quant_utils.py | 60 + .../{ => quantization}/test_allspark_gemm.py | 0 tests/kernels/{ => quantization}/test_aqlm.py | 0 tests/kernels/{ => quantization}/test_awq.py | 0 .../{ => quantization}/test_awq_marlin.py | 3 +- .../{ => quantization}/test_awq_triton.py | 0 .../{ => quantization}/test_block_fp8.py | 9 +- .../{ => quantization}/test_block_int8.py | 3 +- .../test_cutlass_2of4_sparse.py | 3 +- .../test_cutlass_scaled_mm.py} | 4 +- .../{ => quantization}/test_fp8_quant.py | 0 tests/kernels/{ => quantization}/test_ggml.py | 0 tests/kernels/{ => quantization}/test_gguf.py | 0 tests/kernels/{ => quantization}/test_gptq.py | 0 .../{ => quantization}/test_int8_kernel.py | 0 .../{ => quantization}/test_int8_quant.py | 0 .../{ => quantization}/test_machete_mm.py | 0 .../{ => quantization}/test_marlin_gemm.py | 0 .../{ => quantization}/test_nvfp4_quant.py | 0 .../test_nvfp4_scaled_mm.py | 0 .../quantization/test_rocm_skinny_gemms.py | 80 + .../test_triton_scaled_mm.py | 0 tests/kernels/test_attention_selector.py | 136 -- tests/kernels/test_cutlass_mla_decode.py | 93 + tests/kernels/test_cutlass_moe.py | 244 --- tests/kernels/test_fused_quant_activation.py | 69 + tests/kernels/test_rocm_attention_selector.py | 34 - tests/kernels/test_triton_flash_attention.py | 499 +++++ tests/kernels/test_utils.py | 25 - tests/kernels/utils_block.py | 63 - tests/kv_transfer/test_disagg.py | 4 +- tests/lora/test_llama_tp.py | 1 + tests/lora/test_lora_manager.py | 30 +- tests/lora/test_resolver.py | 74 + tests/lora/test_tokenizer_group.py | 10 +- tests/lora/test_utils.py | 12 + .../model_executor/test_enabled_custom_ops.py | 7 +- .../model_executor/test_guided_processors.py | 15 +- .../decoder_only/language/test_hybrid.py | 360 ---- .../decoder_only/language/test_mamba.py | 337 ---- tests/models/embedding/utils.py | 39 - tests/models/encoder_decoder/__init__.py | 0 .../audio_language/__init__.py | 0 .../encoder_decoder/language/__init__.py | 0 .../vision_language/__init__.py | 0 .../vision_language/test_broadcast.py | 37 - .../{decoder_only => language}/__init__.py | 0 .../generation}/__init__.py | 0 .../generation}/test_bart.py | 4 - .../generation/test_common.py} | 67 +- .../generation}/test_granite.py | 4 - .../models/language/generation/test_hybrid.py | 315 +++ .../generation}/test_mistral.py | 66 +- .../generation}/test_phimoe.py | 4 - .../language => language/pooling}/__init__.py | 0 .../pooling/test_classification.py} | 6 +- .../pooling}/test_embedding.py | 6 +- .../pooling}/test_gritlm.py | 189 +- .../pooling}/test_jina.py | 46 +- .../pooling}/test_scoring.py | 61 +- .../pooling/test_snowflake_arctic_embed.py | 95 + .../pooling/test_truncation_control.py | 69 + .../generation}/__init__.py | 0 .../generation/test_common.py} | 74 +- .../generation}/test_florence2.py | 21 +- .../generation/test_granite_speech.py | 144 ++ .../generation}/test_interleaved.py | 3 +- .../generation}/test_mllama.py | 50 +- .../generation}/test_phi4mm.py | 6 +- .../generation}/test_pixtral.py | 4 - .../generation}/test_qwen2_vl.py | 2 +- .../generation}/test_ultravox.py | 77 +- .../generation}/test_whisper.py | 58 +- .../generation}/vlm_utils/__init__.py | 0 .../generation}/vlm_utils/builders.py | 10 +- .../generation}/vlm_utils/case_filtering.py | 0 .../generation}/vlm_utils/core.py | 2 +- .../generation}/vlm_utils/custom_inputs.py | 0 .../generation}/vlm_utils/model_utils.py | 65 +- .../generation}/vlm_utils/runners.py | 11 +- .../generation}/vlm_utils/types.py | 6 +- .../pooling}/__init__.py | 0 .../pooling}/test_dse_qwen2_vl.py | 2 +- .../pooling}/test_intern_vit.py | 23 +- .../pooling}/test_llava_next.py | 2 +- .../pooling}/test_phi3v.py | 2 +- .../multimodal/processing/test_common.py | 5 + .../multimodal/processing/test_h2ovl.py | 4 +- .../multimodal/processing/test_idefics3.py | 4 +- .../multimodal/processing/test_internvl.py | 4 +- .../multimodal/processing/test_llama4.py | 4 +- .../processing/test_minimax_vl_01.py | 98 + .../multimodal/processing/test_phi3v.py | 4 +- .../multimodal/processing/test_phi4mm.py | 59 + .../multimodal/processing/test_qwen2_vl.py | 4 +- .../multimodal/processing/test_smolvlm.py | 4 +- .../language => quantization}/__init__.py | 0 .../language => quantization}/test_aqlm.py | 6 - .../test_awq.py | 7 +- tests/models/quantization/test_bitblas.py | 61 + .../language => quantization}/test_fp8.py | 7 +- .../language => quantization}/test_gguf.py | 7 +- .../models/quantization/test_gptq_bitblas.py | 59 + .../test_gptq_marlin.py | 8 +- .../test_gptq_marlin_24.py | 5 +- .../test_modelopt.py | 1 - .../language => quantization}/test_nvfp4.py | 1 - tests/models/registry.py | 94 +- tests/models/test_initialization.py | 5 +- tests/models/test_oot_registration.py | 5 +- tests/models/test_transformers.py | 5 +- tests/models/utils.py | 66 +- tests/multimodal/assets/image1.png | Bin 0 -> 1837 bytes tests/multimodal/assets/image2.png | Bin 0 -> 1837 bytes tests/multimodal/test_hasher.py | 61 + .../test_register_quantization_config.py | 4 +- tests/quantization/test_torchao.py | 26 + .../reasoning/test_qwen3_reasoning_parser.py | 141 ++ .../e2e/test_medusa_correctness.py | 2 +- tests/spec_decode/e2e/test_mlp_correctness.py | 4 +- .../e2e/test_multistep_correctness.py | 9 +- .../spec_decode/e2e/test_ngram_correctness.py | 2 +- tests/spec_decode/test_memory_usage.py | 90 + tests/spec_decode/test_scorer.py | 5 +- tests/test_config.py | 82 +- tests/test_utils.py | 54 +- tests/tokenization/test_cached_tokenizer.py | 43 +- tests/tokenization/test_detokenize.py | 209 +- tests/tokenization/test_get_eos.py | 2 +- tests/tokenization/test_tokenizer_group.py | 187 +- tests/tool_use/utils.py | 14 + tests/utils.py | 2 +- tests/v1/core/test_kv_cache_utils.py | 61 +- tests/v1/core/test_prefix_caching.py | 405 +++- tests/v1/core/test_scheduler.py | 539 ++++- tests/v1/core/test_specialized_manager.py | 8 +- tests/v1/e2e/test_cascade_attention.py | 10 +- tests/v1/e2e/test_spec_decode.py | 29 +- tests/v1/engine/conftest.py | 4 +- tests/v1/engine/test_async_llm.py | 33 + tests/v1/engine/test_engine_core.py | 110 +- tests/v1/engine/test_engine_core_client.py | 167 +- tests/v1/engine/test_output_processor.py | 130 +- tests/v1/engine/utils.py | 5 +- tests/v1/entrypoints/conftest.py | 34 +- .../llm/test_struct_output_generate.py | 225 ++- tests/v1/sample/test_sampler.py | 4 +- tests/v1/shutdown/test_delete.py | 97 + tests/v1/shutdown/test_forward_error.py | 129 ++ tests/v1/shutdown/test_processor_error.py | 69 + tests/v1/shutdown/test_startup_error.py | 97 + tests/v1/shutdown/utils.py | 5 + tests/v1/spec_decode/test_max_len.py | 57 + tests/v1/spec_decode/test_ngram.py | 63 +- tests/v1/structured_output/test_utils.py | 55 +- tests/v1/test_async_llm_dp.py | 4 +- tests/v1/test_oracle.py | 5 +- tests/v1/test_serial_utils.py | 208 +- tests/v1/tpu/test_basic.py | 7 +- tests/v1/tpu/test_multimodal.py | 93 + tests/v1/tpu/test_perf.py | 15 +- tests/v1/tpu/test_sampler.py | 22 + tests/v1/tpu/test_topk_topp_sampler.py | 22 +- tests/v1/tpu/worker/test_tpu_model_runner.py | 9 +- tests/v1/worker/test_gpu_input_batch.py | 1 - tests/v1/worker/test_gpu_model_runner.py | 1 - tests/worker/test_model_runner.py | 146 +- vllm/_custom_ops.py | 50 +- vllm/_ipex_ops.py | 20 +- vllm/assets/audio.py | 12 +- vllm/assets/image.py | 4 +- vllm/assets/video.py | 35 +- vllm/attention/backends/abstract.py | 5 + vllm/attention/backends/cpu_mla.py | 8 +- vllm/attention/backends/flash_attn.py | 6 +- vllm/attention/backends/flashinfer.py | 54 +- vllm/attention/backends/flashmla.py | 2 +- vllm/attention/backends/hpu_attn.py | 106 +- vllm/attention/backends/ipex_attn.py | 26 +- vllm/attention/backends/mla/common.py | 266 +-- vllm/attention/backends/rocm_aiter_mla.py | 412 ++++ vllm/attention/backends/rocm_flash_attn.py | 157 +- vllm/attention/backends/triton_mla.py | 2 +- vllm/attention/backends/utils.py | 28 +- vllm/attention/layer.py | 46 +- .../ops/chunked_prefill_paged_decode.py | 2 +- vllm/attention/ops/hpu_paged_attn.py | 1 - vllm/attention/ops/ipex_attn.py | 3 +- vllm/attention/ops/prefix_prefill.py | 1634 +++++++-------- vllm/attention/ops/rocm_aiter_mla.py | 42 + vllm/attention/ops/rocm_aiter_paged_attn.py | 101 + vllm/attention/ops/triton_flash_attention.py | 1762 +++++++++++------ .../utils}/fa_utils.py | 0 .../benchmarks}/__init__.py | 0 collect_env.py => vllm/collect_env.py | 32 +- vllm/compilation/activation_quant_fusion.py | 87 + vllm/compilation/backends.py | 41 +- vllm/compilation/compiler_interface.py | 65 +- vllm/compilation/fix_functionalization.py | 17 +- vllm/compilation/fusion.py | 8 +- vllm/compilation/fx_utils.py | 16 + vllm/compilation/inductor_pass.py | 32 + vllm/compilation/pass_manager.py | 31 +- vllm/compilation/sequence_parallelism.py | 266 +++ vllm/compilation/vllm_inductor_pass.py | 13 +- vllm/config.py | 1554 +++++++++------ vllm/connections.py | 2 +- vllm/core/scheduler.py | 33 +- vllm/distributed/communication_op.py | 6 + .../base_device_communicator.py | 34 + .../device_communicators/cuda_communicator.py | 25 + .../device_communicators/shm_broadcast.py | 35 +- vllm/distributed/kv_events.py | 295 +++ vllm/distributed/kv_transfer/__init__.py | 12 + .../kv_transfer/kv_connector/base.py | 4 + .../kv_transfer/kv_connector/factory.py | 57 +- .../kv_connector/mooncake_store_connector.py | 39 +- .../kv_connector/simple_connector.py | 95 +- .../kv_transfer/kv_connector/utils.py | 90 + .../kv_transfer/kv_connector/v1/__init__.py | 8 + .../kv_transfer/kv_connector/v1/base.py | 209 ++ .../kv_connector/v1/lmcache_connector.py | 131 ++ .../v1/shared_storage_connector.py | 383 ++++ ...ransfer_agent.py => kv_connector_agent.py} | 2 +- .../kv_transfer/kv_pipe/mooncake_pipe.py | 21 +- .../kv_transfer/kv_transfer_state.py | 70 + vllm/distributed/parallel_state.py | 93 +- vllm/distributed/utils.py | 22 +- vllm/engine/arg_utils.py | 1554 +++++++-------- vllm/engine/async_llm_engine.py | 131 +- vllm/engine/llm_engine.py | 205 +- vllm/engine/metrics.py | 9 +- vllm/engine/multiprocessing/client.py | 9 +- vllm/engine/multiprocessing/engine.py | 12 +- vllm/engine/output_processor/multi_step.py | 12 +- vllm/engine/output_processor/single_step.py | 15 +- vllm/engine/protocol.py | 7 +- vllm/entrypoints/api_server.py | 2 +- vllm/entrypoints/chat_utils.py | 76 +- vllm/entrypoints/cli/collect_env.py | 35 + vllm/entrypoints/cli/main.py | 2 + vllm/entrypoints/launcher.py | 98 +- vllm/entrypoints/llm.py | 188 +- vllm/entrypoints/openai/api_server.py | 42 +- vllm/entrypoints/openai/cli_args.py | 29 +- vllm/entrypoints/openai/protocol.py | 175 +- vllm/entrypoints/openai/run_batch.py | 4 +- vllm/entrypoints/openai/serving_chat.py | 62 +- vllm/entrypoints/openai/serving_embedding.py | 14 +- vllm/entrypoints/openai/serving_engine.py | 31 +- vllm/entrypoints/openai/serving_models.py | 70 + vllm/entrypoints/openai/serving_pooling.py | 14 +- vllm/entrypoints/openai/serving_score.py | 15 +- .../openai/tool_parsers/llama_tool_parser.py | 1 + .../tool_parsers/mistral_tool_parser.py | 17 + vllm/entrypoints/score_utils.py | 2 +- vllm/entrypoints/utils.py | 24 + vllm/env_override.py | 17 +- vllm/envs.py | 56 +- vllm/executor/executor_base.py | 2 +- vllm/executor/uniproc_executor.py | 4 +- vllm/forward_context.py | 39 +- vllm/inputs/__init__.py | 14 +- vllm/inputs/data.py | 260 +-- vllm/inputs/parse.py | 57 +- vllm/inputs/preprocess.py | 621 +++--- vllm/inputs/registry.py | 335 +--- vllm/logger.py | 23 +- vllm/lora/ops/triton_ops/__init__.py | 4 +- .../{lora_expand.py => lora_expand_op.py} | 0 .../{lora_shrink.py => lora_shrink_op.py} | 0 vllm/lora/punica_wrapper/punica_selector.py | 3 +- vllm/lora/resolver.py | 83 + vllm/lora/utils.py | 13 +- vllm/model_executor/custom_op.py | 6 +- .../guided_decoding/__init__.py | 30 +- .../guided_decoding/guidance_decoding.py | 18 +- .../guided_decoding/guided_fields.py | 11 +- .../outlines_logits_processors.py | 2 +- .../guided_decoding/reasoner/__init__.py | 35 - vllm/model_executor/guided_decoding/utils.py | 10 +- .../guided_decoding/xgrammar_decoding.py | 14 +- vllm/model_executor/layers/activation.py | 1 + ...me=AMD_Instinct_MI300X,dtype=fp8_w8a8.json | 164 ++ ...=1024,device_name=AMD_Instinct_MI300X.json | 200 ++ ...192,device_name=NVIDIA_H100_80GB_HBM3.json | 146 ++ .../E=128,N=192,device_name=NVIDIA_H20.json | 146 ++ .../E=128,N=192,device_name=NVIDIA_H200.json | 146 ++ ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++ ...,dtype=fp8_w8a8,block_shape=[128,128].json | 146 ++ .../E=128,N=384,device_name=NVIDIA_H20.json | 146 ++ ...,dtype=fp8_w8a8,block_shape=[128,128].json | 146 ++ .../E=128,N=384,device_name=NVIDIA_H200.json | 146 ++ ...512,device_name=NVIDIA_H100_80GB_HBM3.json | 146 ++ ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++ ...,dtype=fp8_w8a8,block_shape=[128,128].json | 146 ++ .../E=128,N=768,device_name=NVIDIA_H20.json | 146 ++ ...,dtype=fp8_w8a8,block_shape=[128,128].json | 146 ++ .../E=128,N=768,device_name=NVIDIA_H200.json | 146 ++ .../E=128,N=96,device_name=NVIDIA_H20.json | 146 ++ ...,dtype=fp8_w8a8,block_shape=[128,128].json | 146 ++ .../layers/fused_moe/configs/README | 3 +- .../layers/fused_moe/cutlass_moe.py | 43 +- .../layers/fused_moe/fused_marlin_moe.py | 4 +- .../layers/fused_moe/fused_moe.py | 25 +- vllm/model_executor/layers/fused_moe/layer.py | 25 +- .../layers/fused_moe/moe_permute_unpermute.py | 116 ++ .../layers/fused_moe/rocm_aiter_fused_moe.py | 434 +++- vllm/model_executor/layers/layernorm.py | 8 +- vllm/model_executor/layers/linear.py | 29 +- .../layers/mamba/ops/mamba_ssm.py | 4 +- .../layers/quantization/__init__.py | 20 +- .../layers/quantization/aqlm.py | 3 +- .../model_executor/layers/quantization/awq.py | 3 +- .../layers/quantization/awq_marlin.py | 14 +- .../layers/quantization/base_config.py | 13 +- .../layers/quantization/bitblas.py | 460 +++++ .../layers/quantization/bitsandbytes.py | 3 +- .../compressed_tensors/compressed_tensors.py | 7 +- .../compressed_tensors_moe.py | 171 +- .../layers/quantization/deepspeedfp.py | 5 +- .../layers/quantization/experts_int8.py | 3 +- .../layers/quantization/fbgemm_fp8.py | 3 +- .../model_executor/layers/quantization/fp8.py | 13 +- .../layers/quantization/gguf.py | 3 +- .../layers/quantization/gptq.py | 3 +- .../layers/quantization/gptq_bitblas.py | 444 +++++ .../layers/quantization/gptq_marlin.py | 9 +- .../layers/quantization/gptq_marlin_24.py | 7 +- .../layers/quantization/hqq_marlin.py | 3 +- .../layers/quantization/ipex_quant.py | 7 +- .../kernels/mixed_precision/__init__.py | 5 +- .../kernels/mixed_precision/bitblas.py | 299 +++ .../quantization/kernels/scaled_mm/cutlass.py | 2 +- .../layers/quantization/kv_cache.py | 36 + .../layers/quantization/marlin.py | 7 +- .../layers/quantization/modelopt.py | 7 +- .../layers/quantization/moe_wna16.py | 7 +- .../layers/quantization/neuron_quant.py | 3 +- .../layers/quantization/ptpc_fp8.py | 3 +- .../model_executor/layers/quantization/qqq.py | 3 +- .../layers/quantization/quark/quark.py | 38 +- .../layers/quantization/torchao.py | 3 +- .../layers/quantization/tpu_int8.py | 3 +- .../quantization/utils/bitblas_utils.py | 207 ++ .../layers/quantization/utils/int8_utils.py | 28 +- .../layers/quantization/utils/w8a8_utils.py | 266 ++- .../layers/rejection_sampler.py | 31 +- .../model_executor/layers/rotary_embedding.py | 362 +++- vllm/model_executor/layers/sampler.py | 9 +- .../layers/typical_acceptance_sampler.py | 15 +- vllm/model_executor/layers/utils.py | 51 +- .../layers/vocab_parallel_embedding.py | 3 +- vllm/model_executor/model_loader/loader.py | 82 +- vllm/model_executor/model_loader/neuron.py | 1 - vllm/model_executor/model_loader/utils.py | 37 +- .../model_loader/weight_utils.py | 25 +- vllm/model_executor/models/aimv2.py | 322 +++ vllm/model_executor/models/arctic.py | 16 +- vllm/model_executor/models/aria.py | 12 +- vllm/model_executor/models/aya_vision.py | 16 - vllm/model_executor/models/baichuan.py | 10 - vllm/model_executor/models/bamba.py | 10 - vllm/model_executor/models/bart.py | 10 - vllm/model_executor/models/bert.py | 201 +- vllm/model_executor/models/blip2.py | 58 +- vllm/model_executor/models/bloom.py | 10 - vllm/model_executor/models/chameleon.py | 18 +- vllm/model_executor/models/chatglm.py | 10 - vllm/model_executor/models/commandr.py | 23 +- vllm/model_executor/models/dbrx.py | 16 +- vllm/model_executor/models/deepseek.py | 10 - vllm/model_executor/models/deepseek_mtp.py | 11 - vllm/model_executor/models/deepseek_v2.py | 31 +- vllm/model_executor/models/deepseek_vl2.py | 32 +- vllm/model_executor/models/eagle.py | 13 - vllm/model_executor/models/exaone.py | 11 - vllm/model_executor/models/falcon.py | 10 - vllm/model_executor/models/florence2.py | 21 - vllm/model_executor/models/fuyu.py | 13 - vllm/model_executor/models/gemma.py | 10 - vllm/model_executor/models/gemma2.py | 14 +- vllm/model_executor/models/gemma3.py | 14 +- vllm/model_executor/models/gemma3_mm.py | 34 +- vllm/model_executor/models/glm.py | 5 +- vllm/model_executor/models/glm4.py | 18 +- vllm/model_executor/models/gpt2.py | 10 - vllm/model_executor/models/gpt_bigcode.py | 64 +- vllm/model_executor/models/gpt_j.py | 110 +- vllm/model_executor/models/gpt_neox.py | 10 - vllm/model_executor/models/granite.py | 11 - vllm/model_executor/models/granite_speech.py | 777 ++++++++ vllm/model_executor/models/granitemoe.py | 11 - .../model_executor/models/granitemoeshared.py | 11 - vllm/model_executor/models/grok1.py | 10 - vllm/model_executor/models/h2ovl.py | 16 +- vllm/model_executor/models/idefics3.py | 10 - vllm/model_executor/models/interfaces.py | 4 +- vllm/model_executor/models/interfaces_base.py | 9 - vllm/model_executor/models/internlm2.py | 12 +- vllm/model_executor/models/internvl.py | 18 +- vllm/model_executor/models/jais.py | 10 - vllm/model_executor/models/jamba.py | 10 - vllm/model_executor/models/kimi_vl.py | 75 +- vllm/model_executor/models/llama.py | 42 +- vllm/model_executor/models/llama4.py | 8 +- vllm/model_executor/models/llama_eagle.py | 32 +- vllm/model_executor/models/llama_eagle3.py | 242 +++ vllm/model_executor/models/llava.py | 29 +- vllm/model_executor/models/llava_next.py | 23 +- .../model_executor/models/llava_next_video.py | 16 - vllm/model_executor/models/llava_onevision.py | 54 +- vllm/model_executor/models/mamba.py | 51 +- vllm/model_executor/models/mamba2.py | 10 - vllm/model_executor/models/minicpm.py | 10 - vllm/model_executor/models/minicpmo.py | 36 +- vllm/model_executor/models/minicpmv.py | 20 +- vllm/model_executor/models/minimax_text_01.py | 79 +- vllm/model_executor/models/minimax_vl_01.py | 363 ++++ vllm/model_executor/models/mistral3.py | 45 +- vllm/model_executor/models/mixtral.py | 10 - vllm/model_executor/models/mixtral_quant.py | 10 - vllm/model_executor/models/mllama.py | 10 - vllm/model_executor/models/mllama4.py | 19 +- vllm/model_executor/models/modernbert.py | 325 +++ vllm/model_executor/models/molmo.py | 14 +- vllm/model_executor/models/mpt.py | 10 - vllm/model_executor/models/nemotron.py | 11 - vllm/model_executor/models/nemotron_nas.py | 8 - vllm/model_executor/models/olmo.py | 10 - vllm/model_executor/models/olmo2.py | 12 +- vllm/model_executor/models/olmoe.py | 113 +- vllm/model_executor/models/opt.py | 98 +- vllm/model_executor/models/orion.py | 101 +- vllm/model_executor/models/ovis2.py | 388 ++++ vllm/model_executor/models/paligemma.py | 14 +- vllm/model_executor/models/persimmon.py | 87 +- vllm/model_executor/models/phi.py | 10 - vllm/model_executor/models/phi3_small.py | 57 +- vllm/model_executor/models/phi3v.py | 18 +- vllm/model_executor/models/phi4mm.py | 1730 ++++++---------- vllm/model_executor/models/phi4mm_audio.py | 75 +- vllm/model_executor/models/phi4mm_utils.py | 4 +- vllm/model_executor/models/phimoe.py | 10 - vllm/model_executor/models/pixtral.py | 40 +- vllm/model_executor/models/plamo2.py | 736 +++++++ vllm/model_executor/models/qwen.py | 10 - vllm/model_executor/models/qwen2.py | 10 - .../models/qwen2_5_omni_thinker.py | 901 +++++++++ vllm/model_executor/models/qwen2_5_vl.py | 103 +- vllm/model_executor/models/qwen2_audio.py | 25 +- vllm/model_executor/models/qwen2_moe.py | 19 +- vllm/model_executor/models/qwen2_vl.py | 27 +- vllm/model_executor/models/qwen3.py | 10 - vllm/model_executor/models/qwen3_moe.py | 19 +- vllm/model_executor/models/qwen_vl.py | 2 +- vllm/model_executor/models/registry.py | 28 +- vllm/model_executor/models/skyworkr1v.py | 18 +- vllm/model_executor/models/solar.py | 11 - vllm/model_executor/models/stablelm.py | 18 +- vllm/model_executor/models/starcoder2.py | 10 - vllm/model_executor/models/transformers.py | 48 +- vllm/model_executor/models/ultravox.py | 16 - vllm/model_executor/models/utils.py | 2 +- vllm/model_executor/models/vision.py | 25 +- vllm/model_executor/models/whisper.py | 10 - vllm/model_executor/models/zamba2.py | 19 - vllm/model_executor/parameter.py | 33 +- vllm/multimodal/__init__.py | 11 +- vllm/multimodal/audio.py | 76 +- vllm/multimodal/base.py | 328 +-- vllm/multimodal/hasher.py | 32 +- vllm/multimodal/image.py | 80 +- vllm/multimodal/inputs.py | 274 ++- vllm/multimodal/parse.py | 39 +- vllm/multimodal/processing.py | 643 +++--- vllm/multimodal/profiling.py | 42 +- vllm/multimodal/registry.py | 305 +-- vllm/multimodal/utils.py | 41 +- vllm/multimodal/video.py | 71 +- vllm/outputs.py | 41 +- vllm/platforms/cpu.py | 2 - vllm/platforms/cuda.py | 18 +- vllm/platforms/interface.py | 26 +- vllm/platforms/neuron.py | 2 +- vllm/platforms/rocm.py | 85 +- vllm/platforms/tpu.py | 29 +- vllm/pooling_params.py | 11 +- vllm/profiler/__init__.py | 7 - vllm/reasoning/__init__.py | 2 + vllm/reasoning/qwen3_reasoning_parser.py | 150 ++ vllm/sampling_params.py | 63 +- vllm/sequence.py | 152 +- vllm/spec_decode/draft_model_runner.py | 19 +- vllm/spec_decode/metrics.py | 11 +- vllm/spec_decode/multi_step_worker.py | 13 +- .../spec_decode/smaller_tp_proposer_worker.py | 3 +- vllm/spec_decode/spec_decode_worker.py | 5 +- vllm/transformers_utils/config.py | 67 +- vllm/transformers_utils/configs/__init__.py | 8 +- vllm/transformers_utils/configs/dbrx.py | 3 +- vllm/transformers_utils/configs/eagle.py | 19 +- vllm/transformers_utils/configs/exaone.py | 44 +- .../configs/minimax_text_01.py | 69 + .../configs/minimax_vl_01.py | 70 + vllm/transformers_utils/configs/olmo2.py | 168 -- vllm/transformers_utils/configs/ovis2.py | 170 ++ vllm/transformers_utils/detokenizer.py | 4 +- vllm/transformers_utils/processor.py | 69 +- .../transformers_utils/processors/__init__.py | 3 +- vllm/transformers_utils/processors/ovis2.py | 399 ++++ vllm/transformers_utils/tokenizer.py | 57 +- vllm/transformers_utils/tokenizer_base.py | 36 +- .../{tokenizer_group => }/tokenizer_group.py | 39 +- .../tokenizer_group/__init__.py | 56 - .../tokenizer_group/base_tokenizer_group.py | 68 - .../tokenizer_group/ray_tokenizer_group.py | 244 --- vllm/transformers_utils/tokenizers/mistral.py | 13 +- vllm/triton_utils/__init__.py | 2 +- vllm/triton_utils/importing.py | 34 +- vllm/usage/usage_lib.py | 17 +- vllm/utils.py | 103 +- vllm/v1/attention/backends/flash_attn.py | 139 +- vllm/v1/attention/backends/flashinfer.py | 635 ++++++ vllm/v1/attention/backends/mla/common.py | 173 +- vllm/v1/attention/backends/mla/flashmla.py | 2 +- vllm/v1/attention/backends/mla/triton_mla.py | 2 +- vllm/v1/attention/backends/pallas.py | 15 + vllm/v1/core/block_pool.py | 49 +- vllm/v1/core/kv_cache_manager.py | 78 +- vllm/v1/core/kv_cache_utils.py | 27 +- vllm/v1/core/sched/interface.py | 10 +- vllm/v1/core/sched/output.py | 7 +- vllm/v1/core/sched/scheduler.py | 224 ++- vllm/v1/core/specialized_manager.py | 39 +- vllm/v1/engine/__init__.py | 21 +- vllm/v1/engine/async_llm.py | 277 ++- vllm/v1/engine/core.py | 215 +- vllm/v1/engine/core_client.py | 378 ++-- vllm/v1/engine/detokenizer.py | 253 ++- vllm/v1/engine/exceptions.py | 16 + vllm/v1/engine/llm_engine.py | 53 +- vllm/v1/engine/mm_input_cache.py | 5 +- vllm/v1/engine/output_processor.py | 43 +- vllm/v1/engine/processor.py | 89 +- vllm/v1/executor/abstract.py | 11 +- vllm/v1/executor/multiproc_executor.py | 336 ++-- vllm/v1/metrics/loggers.py | 125 +- vllm/v1/request.py | 6 +- vllm/v1/sample/ops/topk_topp_sampler.py | 27 +- vllm/v1/sample/rejection_sampler.py | 16 +- vllm/v1/sample/tpu/metadata.py | 19 +- vllm/v1/serial_utils.py | 160 +- vllm/v1/spec_decode/eagle.py | 199 +- vllm/v1/spec_decode/metrics.py | 132 +- vllm/v1/spec_decode/ngram_proposer.py | 10 +- vllm/v1/structured_output/__init__.py | 71 +- vllm/v1/structured_output/backend_guidance.py | 105 +- vllm/v1/structured_output/backend_types.py | 31 + vllm/v1/structured_output/backend_xgrammar.py | 177 +- vllm/v1/structured_output/request.py | 2 + vllm/v1/structured_output/utils.py | 120 -- vllm/v1/utils.py | 50 +- vllm/v1/worker/gpu_input_batch.py | 1 - vllm/v1/worker/gpu_model_runner.py | 216 +- vllm/v1/worker/gpu_worker.py | 42 +- vllm/v1/worker/lora_model_runner_mixin.py | 16 +- vllm/v1/worker/tpu_model_runner.py | 485 ++++- vllm/v1/worker/tpu_worker.py | 10 +- vllm/v1/worker/utils.py | 6 +- vllm/worker/cache_engine.py | 23 +- vllm/worker/cpu_enc_dec_model_runner.py | 2 +- vllm/worker/cpu_model_runner.py | 60 +- vllm/worker/enc_dec_model_runner.py | 21 +- vllm/worker/hpu_model_runner.py | 521 ++--- vllm/worker/hpu_worker.py | 8 +- vllm/worker/model_runner.py | 192 +- vllm/worker/multi_step_model_runner.py | 12 +- vllm/worker/neuron_model_runner.py | 20 +- vllm/worker/pooling_model_runner.py | 15 +- vllm/worker/tpu_worker.py | 4 +- vllm/worker/worker.py | 31 +- vllm/worker/xpu_model_runner.py | 21 +- vllm/worker/xpu_worker.py | 9 +- 892 files changed, 44542 insertions(+), 16160 deletions(-) create mode 100644 benchmarks/auto_tune.sh create mode 100644 benchmarks/kernels/benchmark_bitblas.py create mode 100644 benchmarks/kernels/benchmark_moe_permute_unpermute.py create mode 100644 csrc/attention/mla/cutlass_mla_entry.cu create mode 100644 csrc/attention/mla/cutlass_mla_kernels.cu create mode 100644 csrc/moe/moe_permute_unpermute_op.cu create mode 100644 csrc/moe/permute_unpermute_kernels/dispatch.h create mode 100644 csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.cu create mode 100644 csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.h create mode 100644 csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.inl create mode 100644 csrc/quantization/activation_kernels.cu create mode 100644 csrc/rocm/skinny_gemms.cu create mode 100644 docker/Dockerfile.nightly_torch delete mode 100644 docs/source/api/engine/async_llm_engine.md delete mode 100644 docs/source/api/engine/index.md delete mode 100644 docs/source/api/engine/llm_engine.md delete mode 100644 docs/source/api/inference_params.md delete mode 100644 docs/source/api/model/adapters.md delete mode 100644 docs/source/api/model/index.md delete mode 100644 docs/source/api/model/interfaces.md delete mode 100644 docs/source/api/model/interfaces_base.md delete mode 100644 docs/source/api/multimodal/index.md delete mode 100644 docs/source/api/multimodal/inputs.md delete mode 100644 docs/source/api/multimodal/parse.md delete mode 100644 docs/source/api/multimodal/processing.md delete mode 100644 docs/source/api/multimodal/profiling.md delete mode 100644 docs/source/api/multimodal/registry.md delete mode 100644 docs/source/api/offline_inference/index.md delete mode 100644 docs/source/api/offline_inference/llm.md delete mode 100644 docs/source/api/offline_inference/llm_inputs.md create mode 100644 docs/source/api/summary.md create mode 100644 docs/source/assets/deployment/anything-llm-chat-with-doc.png create mode 100644 docs/source/assets/deployment/anything-llm-chat-without-doc.png create mode 100644 docs/source/assets/deployment/anything-llm-provider.png create mode 100644 docs/source/assets/deployment/anything-llm-upload-doc.png create mode 100644 docs/source/assets/deployment/open_webui.png create mode 100644 docs/source/assets/deployment/streamlit-chat.png create mode 100644 docs/source/autodoc2_docstring_parser.py create mode 100644 docs/source/contributing/deprecation_policy.md create mode 100644 docs/source/deployment/frameworks/anything-llm.md create mode 100644 docs/source/deployment/frameworks/open-webui.md create mode 100644 docs/source/deployment/frameworks/streamlit.md create mode 100644 docs/source/deployment/security.md create mode 100644 docs/source/features/quantization/bitblas.md create mode 100644 docs/source/features/quantization/modelopt.md create mode 100644 examples/lmcache/README.md create mode 100644 examples/lmcache/cpu_offload_lmcache.py rename examples/{offline_inference/disaggregated_prefill_lmcache.py => lmcache/disagg_prefill_lmcache_v0.py} (100%) create mode 100644 examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml create mode 100644 examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml create mode 100644 examples/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh create mode 100644 examples/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py create mode 100644 examples/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh create mode 100644 examples/lmcache/kv_cache_sharing_lmcache_v1.py create mode 100644 examples/offline_inference/batch_llm_inference.py delete mode 100644 examples/offline_inference/cpu_offload_lmcache.py create mode 100644 examples/offline_inference/disaggregated-prefill-v1/decode_example.py create mode 100644 examples/offline_inference/disaggregated-prefill-v1/prefill_example.py create mode 100644 examples/offline_inference/disaggregated-prefill-v1/run.sh delete mode 100644 examples/offline_inference/distributed.py create mode 100644 examples/offline_inference/qwen2_5_omni/README.md create mode 100644 examples/offline_inference/qwen2_5_omni/only_thinker.py create mode 100644 examples/online_serving/kv_events.sh create mode 100644 examples/online_serving/kv_events_subscriber.py create mode 100644 examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py create mode 100644 examples/online_serving/openai_embedding_matryoshka_fy.py create mode 100644 examples/online_serving/ray_serve_deepseek.py create mode 100644 examples/online_serving/streamlit_openai_chatbot_webserver.py create mode 100644 examples/tool_chat_template_llama4_json.jinja create mode 100644 examples/tool_chat_template_mistral3.jinja create mode 100644 requirements/nightly_torch_test.txt create mode 100644 tests/compile/test_sequence_parallelism.py create mode 100644 tests/compile/test_silu_mul_quant_fusion.py create mode 100644 tests/distributed/conftest.py create mode 100644 tests/distributed/test_events.py create mode 100644 tests/distributed/test_sequence_parallel.py create mode 100644 tests/engine/test_options.py delete mode 100644 tests/engine/test_skip_tokenizer_init.py create mode 100644 tests/entrypoints/openai/test_lora_resolvers.py create mode 100644 tests/entrypoints/openai/test_openai_schema.py create mode 100644 tests/entrypoints/openai/test_truncation.py rename tests/kernels/{ => attention}/conftest.py (100%) rename tests/kernels/{ => attention}/test_attention.py (99%) create mode 100644 tests/kernels/attention/test_attention_selector.py rename tests/kernels/{ => attention}/test_blocksparse_attention.py (99%) rename tests/kernels/{ => attention}/test_cache.py (93%) rename tests/kernels/{ => attention}/test_cascade_flash_attn.py (100%) rename tests/kernels/{ => attention}/test_encoder_decoder_attn.py (100%) rename tests/kernels/{ => attention}/test_flash_attn.py (99%) rename tests/kernels/{ => attention}/test_flashinfer.py (100%) rename tests/kernels/{ => attention}/test_flashmla.py (100%) rename tests/kernels/{ => attention}/test_lightning_attn.py (100%) rename tests/kernels/{ => attention}/test_merge_attn_states.py (100%) rename tests/kernels/{ => attention}/test_mha_attn.py (100%) rename tests/kernels/{ => attention}/test_mla_decode_cpu.py (100%) rename tests/kernels/{ => attention}/test_prefix_prefill.py (100%) create mode 100644 tests/kernels/attention/test_rocm_attention_selector.py rename tests/kernels/{ => attention}/test_triton_decode_attention.py (100%) rename tests/kernels/{ => core}/test_activation.py (97%) rename tests/kernels/{ => core}/test_fused_quant_layernorm.py (100%) rename tests/kernels/{ => core}/test_layernorm.py (100%) create mode 100644 tests/kernels/core/test_opcheck.py rename tests/kernels/{ => core}/test_permute_cols.py (100%) rename tests/kernels/{ => core}/test_pos_encoding.py (99%) rename tests/kernels/{ => core}/test_rotary_embedding.py (100%) rename tests/kernels/{ => core}/test_uva.py (100%) rename tests/kernels/{ => mamba}/test_causal_conv1d.py (100%) rename tests/kernels/{ => mamba}/test_mamba_mixer2.py (100%) rename tests/kernels/{ => mamba}/test_mamba_ssm.py (100%) rename tests/kernels/{ => mamba}/test_mamba_ssm_ssd.py (100%) create mode 100644 tests/kernels/moe/test_cutlass_moe.py rename tests/kernels/{ => moe}/test_moe.py (99%) create mode 100644 tests/kernels/moe/test_moe_permute_unpermute.py rename tests/kernels/{ => moe}/test_triton_moe_ptpc_fp8.py (100%) rename tests/kernels/{ => quantization}/test_allspark_gemm.py (100%) rename tests/kernels/{ => quantization}/test_aqlm.py (100%) rename tests/kernels/{ => quantization}/test_awq.py (100%) rename tests/kernels/{ => quantization}/test_awq_marlin.py (98%) rename tests/kernels/{ => quantization}/test_awq_triton.py (100%) rename tests/kernels/{ => quantization}/test_block_fp8.py (98%) rename tests/kernels/{ => quantization}/test_block_int8.py (99%) rename tests/kernels/{ => quantization}/test_cutlass_2of4_sparse.py (99%) rename tests/kernels/{test_cutlass.py => quantization/test_cutlass_scaled_mm.py} (99%) rename tests/kernels/{ => quantization}/test_fp8_quant.py (100%) rename tests/kernels/{ => quantization}/test_ggml.py (100%) rename tests/kernels/{ => quantization}/test_gguf.py (100%) rename tests/kernels/{ => quantization}/test_gptq.py (100%) rename tests/kernels/{ => quantization}/test_int8_kernel.py (100%) rename tests/kernels/{ => quantization}/test_int8_quant.py (100%) rename tests/kernels/{ => quantization}/test_machete_mm.py (100%) rename tests/kernels/{ => quantization}/test_marlin_gemm.py (100%) rename tests/kernels/{ => quantization}/test_nvfp4_quant.py (100%) rename tests/kernels/{ => quantization}/test_nvfp4_scaled_mm.py (100%) create mode 100644 tests/kernels/quantization/test_rocm_skinny_gemms.py rename tests/kernels/{ => quantization}/test_triton_scaled_mm.py (100%) delete mode 100644 tests/kernels/test_attention_selector.py create mode 100644 tests/kernels/test_cutlass_mla_decode.py delete mode 100644 tests/kernels/test_cutlass_moe.py create mode 100644 tests/kernels/test_fused_quant_activation.py delete mode 100644 tests/kernels/test_rocm_attention_selector.py create mode 100644 tests/kernels/test_triton_flash_attention.py delete mode 100644 tests/kernels/test_utils.py delete mode 100644 tests/kernels/utils_block.py create mode 100644 tests/lora/test_resolver.py delete mode 100644 tests/models/decoder_only/language/test_hybrid.py delete mode 100644 tests/models/decoder_only/language/test_mamba.py delete mode 100644 tests/models/embedding/utils.py delete mode 100644 tests/models/encoder_decoder/__init__.py delete mode 100644 tests/models/encoder_decoder/audio_language/__init__.py delete mode 100644 tests/models/encoder_decoder/language/__init__.py delete mode 100644 tests/models/encoder_decoder/vision_language/__init__.py delete mode 100644 tests/models/encoder_decoder/vision_language/test_broadcast.py rename tests/models/{decoder_only => language}/__init__.py (100%) rename tests/models/{decoder_only/audio_language => language/generation}/__init__.py (100%) rename tests/models/{encoder_decoder/language => language/generation}/test_bart.py (98%) rename tests/models/{decoder_only/language/test_models.py => language/generation/test_common.py} (65%) rename tests/models/{decoder_only/language => language/generation}/test_granite.py (89%) create mode 100644 tests/models/language/generation/test_hybrid.py rename tests/models/{decoder_only/language => language/generation}/test_mistral.py (86%) rename tests/models/{decoder_only/language => language/generation}/test_phimoe.py (96%) rename tests/models/{decoder_only/language => language/pooling}/__init__.py (100%) rename tests/models/{embedding/language/test_cls_models.py => language/pooling/test_classification.py} (91%) rename tests/models/{embedding/language => language/pooling}/test_embedding.py (94%) rename tests/models/{embedding/language => language/pooling}/test_gritlm.py (64%) rename tests/models/{embedding/language => language/pooling}/test_jina.py (82%) rename tests/models/{embedding/language => language/pooling}/test_scoring.py (72%) create mode 100644 tests/models/language/pooling/test_snowflake_arctic_embed.py create mode 100644 tests/models/language/pooling/test_truncation_control.py rename tests/models/{decoder_only/vision_language => multimodal/generation}/__init__.py (100%) rename tests/models/{decoder_only/vision_language/test_models.py => multimodal/generation/test_common.py} (91%) rename tests/models/{encoder_decoder/vision_language => multimodal/generation}/test_florence2.py (87%) create mode 100644 tests/models/multimodal/generation/test_granite_speech.py rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_interleaved.py (96%) rename tests/models/{encoder_decoder/vision_language => multimodal/generation}/test_mllama.py (95%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_phi4mm.py (98%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_pixtral.py (98%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_qwen2_vl.py (99%) rename tests/models/{decoder_only/audio_language => multimodal/generation}/test_ultravox.py (79%) rename tests/models/{encoder_decoder/audio_language => multimodal/generation}/test_whisper.py (84%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/__init__.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/builders.py (97%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/case_filtering.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/core.py (99%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/custom_inputs.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/model_utils.py (91%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/runners.py (94%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/types.py (97%) rename tests/models/{embedding => multimodal/pooling}/__init__.py (100%) rename tests/models/{embedding/vision_language => multimodal/pooling}/test_dse_qwen2_vl.py (99%) rename tests/models/{decoder_only/vision_language => multimodal/pooling}/test_intern_vit.py (84%) rename tests/models/{embedding/vision_language => multimodal/pooling}/test_llava_next.py (99%) rename tests/models/{embedding/vision_language => multimodal/pooling}/test_phi3v.py (98%) create mode 100644 tests/models/multimodal/processing/test_minimax_vl_01.py create mode 100644 tests/models/multimodal/processing/test_phi4mm.py rename tests/models/{embedding/language => quantization}/__init__.py (100%) rename tests/models/{decoder_only/language => quantization}/test_aqlm.py (94%) rename tests/models/{decoder_only/vision_language => quantization}/test_awq.py (96%) create mode 100644 tests/models/quantization/test_bitblas.py rename tests/models/{decoder_only/language => quantization}/test_fp8.py (97%) rename tests/models/{decoder_only/language => quantization}/test_gguf.py (97%) create mode 100644 tests/models/quantization/test_gptq_bitblas.py rename tests/models/{decoder_only/language => quantization}/test_gptq_marlin.py (94%) rename tests/models/{decoder_only/language => quantization}/test_gptq_marlin_24.py (95%) rename tests/models/{decoder_only/language => quantization}/test_modelopt.py (99%) rename tests/models/{decoder_only/language => quantization}/test_nvfp4.py (99%) create mode 100644 tests/multimodal/assets/image1.png create mode 100644 tests/multimodal/assets/image2.png create mode 100644 tests/multimodal/test_hasher.py create mode 100644 tests/reasoning/test_qwen3_reasoning_parser.py create mode 100644 tests/spec_decode/test_memory_usage.py create mode 100644 tests/v1/shutdown/test_delete.py create mode 100644 tests/v1/shutdown/test_forward_error.py create mode 100644 tests/v1/shutdown/test_processor_error.py create mode 100644 tests/v1/shutdown/test_startup_error.py create mode 100644 tests/v1/shutdown/utils.py create mode 100644 tests/v1/spec_decode/test_max_len.py create mode 100644 tests/v1/tpu/test_multimodal.py create mode 100644 vllm/attention/backends/rocm_aiter_mla.py create mode 100644 vllm/attention/ops/rocm_aiter_mla.py create mode 100644 vllm/attention/ops/rocm_aiter_paged_attn.py rename vllm/{vllm_flash_attn => attention/utils}/fa_utils.py (100%) rename {tests/models/embedding/vision_language => vllm/benchmarks}/__init__.py (100%) rename collect_env.py => vllm/collect_env.py (96%) create mode 100644 vllm/compilation/activation_quant_fusion.py create mode 100644 vllm/compilation/sequence_parallelism.py create mode 100644 vllm/distributed/kv_events.py create mode 100644 vllm/distributed/kv_transfer/kv_connector/utils.py create mode 100644 vllm/distributed/kv_transfer/kv_connector/v1/__init__.py create mode 100644 vllm/distributed/kv_transfer/kv_connector/v1/base.py create mode 100644 vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py create mode 100644 vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py rename vllm/distributed/kv_transfer/{kv_transfer_agent.py => kv_connector_agent.py} (97%) create mode 100644 vllm/distributed/kv_transfer/kv_transfer_state.py create mode 100644 vllm/entrypoints/cli/collect_env.py rename vllm/lora/ops/triton_ops/{lora_expand.py => lora_expand_op.py} (100%) rename vllm/lora/ops/triton_ops/{lora_shrink.py => lora_shrink_op.py} (100%) create mode 100644 vllm/lora/resolver.py delete mode 100644 vllm/model_executor/guided_decoding/reasoner/__init__.py create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json create mode 100644 vllm/model_executor/layers/fused_moe/moe_permute_unpermute.py create mode 100644 vllm/model_executor/layers/quantization/bitblas.py create mode 100644 vllm/model_executor/layers/quantization/gptq_bitblas.py create mode 100644 vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py create mode 100644 vllm/model_executor/layers/quantization/utils/bitblas_utils.py create mode 100644 vllm/model_executor/models/aimv2.py create mode 100644 vllm/model_executor/models/granite_speech.py create mode 100644 vllm/model_executor/models/llama_eagle3.py create mode 100644 vllm/model_executor/models/minimax_vl_01.py create mode 100644 vllm/model_executor/models/modernbert.py create mode 100644 vllm/model_executor/models/ovis2.py create mode 100644 vllm/model_executor/models/plamo2.py create mode 100644 vllm/model_executor/models/qwen2_5_omni_thinker.py create mode 100644 vllm/reasoning/qwen3_reasoning_parser.py create mode 100644 vllm/transformers_utils/configs/minimax_text_01.py create mode 100644 vllm/transformers_utils/configs/minimax_vl_01.py delete mode 100644 vllm/transformers_utils/configs/olmo2.py create mode 100644 vllm/transformers_utils/configs/ovis2.py create mode 100644 vllm/transformers_utils/processors/ovis2.py rename vllm/transformers_utils/{tokenizer_group => }/tokenizer_group.py (77%) delete mode 100644 vllm/transformers_utils/tokenizer_group/__init__.py delete mode 100644 vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py delete mode 100644 vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py create mode 100755 vllm/v1/attention/backends/flashinfer.py create mode 100644 vllm/v1/engine/exceptions.py diff --git a/.buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml b/.buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml index d70ecb2a7e7..d392a5f6406 100644 --- a/.buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml +++ b/.buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash ./run-lm-eval-gsm-vllm-baseline.sh -m deepseek-ai/DeepSeek-V2-Lite-Chat -b "auto" -l 1000 -f 5 -t 2 model_name: "deepseek-ai/DeepSeek-V2-Lite-Chat" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml index 4397effa82c..4b7776b20da 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml @@ -1,3 +1,4 @@ +# For hf script, without -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5 model_name: "nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml index fa6ea236ef0..05b66175199 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml @@ -1,3 +1,4 @@ +# For hf script, without -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-70B-Instruct -b 32 -l 250 -f 5 model_name: "meta-llama/Meta-Llama-3-70B-Instruct" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml index c513159c6fa..12a87e52901 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors -b auto -l 1000 -f 5 -t 1 model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml index 5e57fcbcf7d..7c7a1ca6edb 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5 -t 1 model_name: "nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml index 374171f1f91..1d45c377045 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test -b 32 -l 1000 -f 5 -t 1 model_name: "nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml index dc36b705634..29a145252ef 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Meta-Llama-3-8B-Instruct-FP8 -b 32 -l 250 -f 5 -t 1 model_name: "neuralmagic/Meta-Llama-3-8B-Instruct-FP8" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml index 0ecfc01ef04..3a5f120b3e7 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Asym-Per-Token-Test -b "auto" -l 250 -f 5 -t 1 model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Asym-Per-Token-Test" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml index bc290029859..5ff57bae492 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Per-Token-Test -b "auto" -l 250 -f 5 -t 1 model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Per-Token-Test" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml index 3964f3be5e8..07fb130464a 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-nonuniform-test -b auto -l 1000 -f 5 -t 1 model_name: "nm-testing/Meta-Llama-3-8B-Instruct-nonuniform-test" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml index fb4b4915ab9..c27886525bb 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml @@ -1,4 +1,5 @@ -# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-8B-Instruct -b 32 -l 250 -f 5 -t 1 +# For hf script, without -t option (tensor parallel size). +# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-8B-Instruct -b 32 -l 250 -f 5 model_name: "meta-llama/Meta-Llama-3-8B-Instruct" tasks: - name: "gsm8k" diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml index 04245865983..56ec933c9cc 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m HandH1998/QQQ-Llama-3-8b-g128 -b 32 -l 1000 -f 5 -t 1 model_name: "HandH1998/QQQ-Llama-3-8b-g128" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml index 78347f63fa7..83e11f2be77 100644 --- a/.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml +++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1 model_name: "neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Minitron-4B-Base-FP8.yaml b/.buildkite/lm-eval-harness/configs/Minitron-4B-Base-FP8.yaml index 4ef8b5c3709..15a836dddbd 100644 --- a/.buildkite/lm-eval-harness/configs/Minitron-4B-Base-FP8.yaml +++ b/.buildkite/lm-eval-harness/configs/Minitron-4B-Base-FP8.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m mgoin/Minitron-4B-Base-FP8 -b auto -l 1000 -f 5 -t 1 model_name: "mgoin/Minitron-4B-Base-FP8" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml b/.buildkite/lm-eval-harness/configs/Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml index 75a24e408e7..5633a2d9b82 100644 --- a/.buildkite/lm-eval-harness/configs/Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml +++ b/.buildkite/lm-eval-harness/configs/Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash ./run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Mixtral-8x22B-Instruct-v0.1-FP8-dynamic -b "auto" -l 250 -f 5 -t 8 model_name: "neuralmagic/Mixtral-8x22B-Instruct-v0.1-FP8-dynamic" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1-FP8.yaml b/.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1-FP8.yaml index 436ec21924c..b8024c80e8e 100644 --- a/.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1-FP8.yaml +++ b/.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1-FP8.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash ./run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8 -b "auto" -l 250 -f 5 -t 4 model_name: "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1.yaml b/.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1.yaml index dec9164d1b8..188a112ca3a 100644 --- a/.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1.yaml +++ b/.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1.yaml @@ -1,4 +1,5 @@ -# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m neuralmagic/Mixtral-8x7B-Instruct-v0.1 -b 32 -l 250 -f 5 -t 4 +# For hf script, without -t option (tensor parallel size). +# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m neuralmagic/Mixtral-8x7B-Instruct-v0.1 -b 32 -l 250 -f 5 model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1" tasks: - name: "gsm8k" diff --git a/.buildkite/lm-eval-harness/configs/Qwen1.5-MoE-W4A16-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Qwen1.5-MoE-W4A16-compressed-tensors.yaml index 166af81a3f0..099e0f465ba 100644 --- a/.buildkite/lm-eval-harness/configs/Qwen1.5-MoE-W4A16-compressed-tensors.yaml +++ b/.buildkite/lm-eval-harness/configs/Qwen1.5-MoE-W4A16-compressed-tensors.yaml @@ -1,11 +1,12 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16 -b auto -l 1319 -f 5 -t 1 model_name: "nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16" tasks: - name: "gsm8k" metrics: - name: "exact_match,strict-match" - value: 0.31 + value: 0.30 - name: "exact_match,flexible-extract" - value: 0.47 + value: 0.465 limit: 1319 num_fewshot: 5 diff --git a/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-FP8W8.yaml b/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-FP8W8.yaml index 42936fbfbe7..426e8ff6987 100644 --- a/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-FP8W8.yaml +++ b/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-FP8W8.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen2-1.5B-Instruct-FP8W8 -b auto -l 1000 -f 5 -t 1 model_name: "nm-testing/Qwen2-1.5B-Instruct-FP8W8" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml index 43ff2bc5ce3..8d57e9dabd5 100644 --- a/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml +++ b/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Qwen2-1.5B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1 model_name: "neuralmagic/Qwen2-1.5B-Instruct-quantized.w8a8" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-W8A16-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-W8A16-compressed-tensors.yaml index 259799ba8bf..1bce7e7fdf1 100644 --- a/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-W8A16-compressed-tensors.yaml +++ b/.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-W8A16-compressed-tensors.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen2-1.5B-Instruct-W8A16-Channelwise -b "auto" -l 1000 -f 5 -t 1 model_name: "nm-testing/Qwen2-1.5B-Instruct-W8A16-Channelwise" tasks: diff --git a/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct.yaml b/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct.yaml index 45d5efc8860..fc9707d0d6f 100644 --- a/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct.yaml +++ b/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash ./run-lm-eval-gsm-vllm-baseline.sh -m Qwen/Qwen2-57B-A14B-Instruct -b "auto" -l 250 -f 5 -t 4 model_name: "Qwen/Qwen2-57B-A14B-Instruct" tasks: diff --git a/.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml b/.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml index 2928d75ce44..9a9c749748e 100644 --- a/.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml +++ b/.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml @@ -1,3 +1,4 @@ +# For vllm script, with -t option (tensor parallel size). # bash ./run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/SparseLlama-3.1-8B-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM -b "auto" -t 2 model_name: "nm-testing/SparseLlama-3.1-8B-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM" tasks: diff --git a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py index 4ae23eff62f..6015a83e829 100644 --- a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py +++ b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py @@ -16,7 +16,7 @@ import pytest import yaml -RTOL = 0.05 +RTOL = 0.08 TEST_DATA_FILE = os.environ.get( "LM_EVAL_TEST_DATA_FILE", ".buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml") diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 3354ea37002..4cc9c70a6ad 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -1,20 +1,20 @@ steps: - - label: "Build wheel - CUDA 12.4" + - label: "Build wheel - CUDA 12.8" agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-wheels.sh" env: DOCKER_BUILDKIT: "1" - - label: "Build wheel - CUDA 12.1" + - label: "Build wheel - CUDA 12.6" agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.6.3 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-wheels.sh" @@ -48,7 +48,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - label: "Build and publish TPU release image" @@ -57,6 +57,8 @@ steps: agents: queue: tpu_queue_postmerge commands: + - "yes | docker system prune -a" + - "git fetch --all" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f docker/Dockerfile.tpu ." - "docker push vllm/vllm-tpu:nightly" - "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT" @@ -86,3 +88,18 @@ steps: - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" + + - block: "Build Neuron release image" + key: block-neuron-release-image-build + depends_on: ~ + + - label: "Build and publish Neuron release image" + depends_on: block-neuron-release-image-build + agents: + queue: neuron-postmerge + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:latest --progress plain -f docker/Dockerfile.neuron ." + - "docker push public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:$(buildkite-agent meta-data get release-version)" + env: + DOCKER_BUILDKIT: "1" diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 469422ddec2..d29903bf497 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -75,30 +75,51 @@ HF_MOUNT="/root/.cache/huggingface" commands=$@ echo "Commands:$commands" #ignore certain kernels tests -if [[ $commands == *" kernels "* ]]; then +if [[ $commands == *" kernels/core"* ]]; then commands="${commands} \ - --ignore=kernels/test_attention_selector.py \ - --ignore=kernels/test_blocksparse_attention.py \ - --ignore=kernels/test_causal_conv1d.py \ - --ignore=kernels/test_cutlass.py \ - --ignore=kernels/test_encoder_decoder_attn.py \ - --ignore=kernels/test_flash_attn.py \ - --ignore=kernels/test_flashinfer.py \ - --ignore=kernels/test_int8_quant.py \ - --ignore=kernels/test_machete_gemm.py \ - --ignore=kernels/test_mamba_ssm.py \ - --ignore=kernels/test_marlin_gemm.py \ - --ignore=kernels/test_moe.py \ - --ignore=kernels/test_prefix_prefill.py \ - --ignore=kernels/test_rand.py \ - --ignore=kernels/test_sampler.py \ - --ignore=kernels/test_cascade_flash_attn.py \ - --ignore=kernels/test_mamba_mixer2.py \ - --ignore=kernels/test_aqlm.py \ - --ignore=kernels/test_machete_mm.py \ - --ignore=kernels/test_mha_attn.py \ - --ignore=kernels/test_block_fp8.py \ - --ignore=kernels/test_permute_cols.py" + --ignore=kernels/core/test_fused_quant_layernorm.py \ + --ignore=kernels/core/test_permute_cols.py" +fi + +if [[ $commands == *" kernels/attention"* ]]; then + commands="${commands} \ + --ignore=kernels/attention/stest_attention_selector.py \ + --ignore=kernels/attention/test_blocksparse_attention.py \ + --ignore=kernels/attention/test_encoder_decoder_attn.py \ + --ignore=kernels/attention/test_attention_selector.py \ + --ignore=kernels/attention/test_flash_attn.py \ + --ignore=kernels/attention/test_flashinfer.py \ + --ignore=kernels/attention/test_prefix_prefill.py \ + --ignore=kernels/attention/test_cascade_flash_attn.py \ + --ignore=kernels/attention/test_mha_attn.py \ + --ignore=kernels/attention/test_lightning_attn.py \ + --ignore=kernels/attention/test_attention.py" +fi + +if [[ $commands == *" kernels/quantization"* ]]; then + commands="${commands} \ + --ignore=kernels/quantization/test_int8_quant.py \ + --ignore=kernels/quantization/test_aqlm.py \ + --ignore=kernels/quantization/test_machete_mm.py \ + --ignore=kernels/quantization/test_block_fp8.py \ + --ignore=kernels/quantization/test_block_int8.py \ + --ignore=kernels/quantization/test_marlin_gemm.py \ + --ignore=kernels/quantization/test_cutlass_scaled_mm.py \ + --ignore=kernels/quantization/test_int8_kernel.py" +fi + +if [[ $commands == *" kernels/mamba"* ]]; then + commands="${commands} \ + --ignore=kernels/mamba/test_mamba_mixer2.py \ + --ignore=kernels/mamba/test_causal_conv1d.py \ + --ignore=kernels/mamba/test_mamba_ssm_ssd.py" +fi + +if [[ $commands == *" kernels/moe"* ]]; then + commands="${commands} \ + --ignore=kernels/moe/test_moe.py \ + --ignore=kernels/moe/test_cutlass_moe.py \ + --ignore=kernels/moe/test_triton_moe_ptpc_fp8.py" fi #ignore certain Entrypoints/openai tests diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh b/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh index 9c5cf7cad94..5d863dd82e9 100755 --- a/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh +++ b/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh @@ -5,10 +5,41 @@ set -ex # Setup cleanup -remove_docker_container() { docker rm -f cpu-test || true; docker system prune -f; } +remove_docker_container() { + if [[ -n "$container_id" ]]; then + podman rm -f "$container_id" || true + fi + podman system prune -f +} trap remove_docker_container EXIT remove_docker_container # Try building the docker image -docker build -t cpu-test -f docker/Dockerfile.ppc64le . +podman build -t cpu-test-ubi9-ppc -f docker/Dockerfile.ppc64le . + +# Run the image +container_id=$(podman run -itd --entrypoint /bin/bash -v /tmp/:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN cpu-test-ubi9-ppc) + +function cpu_tests() { + + # offline inference + podman exec -it "$container_id" bash -c " + set -e + python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" + + # Run basic model test + podman exec -it "$container_id" bash -c " + set -e + pip install pytest pytest-asyncio einops peft Pillow soundfile transformers_stream_generator matplotlib + pip install sentence-transformers datamodel_code_generator + pytest -v -s tests/models/embedding/language/test_cls_models.py::test_classification_models[float-jason9693/Qwen2.5-1.5B-apeach] + pytest -v -s tests/models/embedding/language/test_embedding.py::test_models[half-BAAI/bge-base-en-v1.5] + pytest -v -s tests/models/encoder_decoder/language -m cpu_model" +} + +# All of CPU tests are expected to be finished less than 40 mins. + +export container_id +export -f cpu_tests +timeout 40m bash -c cpu_tests diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh index 87f74277cf9..21982b01b9c 100755 --- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh +++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh @@ -17,10 +17,13 @@ source /etc/environment docker run --privileged --net host --shm-size=16G -it \ -e "HF_TOKEN=$HF_TOKEN" --name tpu-test \ vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \ - && python3 -m pip install pytest \ + && python3 -m pip install pytest pytest-asyncio tpu-info \ && python3 -m pip install lm_eval[api]==0.4.4 \ + && export VLLM_XLA_CACHE_PATH= \ && export VLLM_USE_V1=1 \ && export VLLM_XLA_CHECK_RECOMPILATION=1 \ + && echo HARDWARE \ + && tpu-info \ && echo TEST_0 \ && pytest -v -s /workspace/vllm/tests/v1/tpu/test_perf.py \ && echo TEST_1 \ @@ -40,7 +43,11 @@ docker run --privileged --net host --shm-size=16G -it \ && echo TEST_8 \ && pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py \ && echo TEST_9 \ - && pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py" \ + && pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py \ + && echo TEST_10 \ + && pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py \ + && echo TEST_11 \ + && pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py" \ # TODO: This test fails because it uses RANDOM_SEED sampling diff --git a/.buildkite/scripts/upload-wheels.sh b/.buildkite/scripts/upload-wheels.sh index a681f892706..75e3ef26409 100644 --- a/.buildkite/scripts/upload-wheels.sh +++ b/.buildkite/scripts/upload-wheels.sh @@ -50,11 +50,11 @@ aws s3 cp "$normal_wheel" "s3://vllm-wheels/$BUILDKITE_COMMIT/" if [[ $normal_wheel == *"cu118"* ]]; then # if $normal_wheel matches cu118, do not upload the index.html echo "Skipping index files for cu118 wheels" -elif [[ $normal_wheel == *"cu121"* ]]; then - # if $normal_wheel matches cu121, do not upload the index.html - echo "Skipping index files for cu121 wheels" +elif [[ $normal_wheel == *"cu126"* ]]; then + # if $normal_wheel matches cu126, do not upload the index.html + echo "Skipping index files for cu126 wheels" else - # only upload index.html for cu124 wheels (default wheels) + # only upload index.html for cu128 wheels (default wheels) aws s3 cp index.html "s3://vllm-wheels/$BUILDKITE_COMMIT/vllm/index.html" aws s3 cp "s3://vllm-wheels/nightly/index.html" "s3://vllm-wheels/$BUILDKITE_COMMIT/index.html" fi @@ -66,12 +66,12 @@ aws s3 cp "$normal_wheel" "s3://vllm-wheels/nightly/" if [[ $normal_wheel == *"cu118"* ]]; then # if $normal_wheel matches cu118, do not upload the index.html echo "Skipping index files for cu118 wheels" -elif [[ $normal_wheel == *"cu121"* ]]; then - # if $normal_wheel matches cu121, do not upload the index.html - echo "Skipping index files for cu121 wheels" +elif [[ $normal_wheel == *"cu126"* ]]; then + # if $normal_wheel matches cu126, do not upload the index.html + echo "Skipping index files for cu126 wheels" else - # only upload index.html for cu124 wheels (default wheels) + # only upload index.html for cu128 wheels (default wheels) aws s3 cp index.html "s3://vllm-wheels/nightly/vllm/index.html" fi -aws s3 cp "$wheel" "s3://vllm-wheels/$version/" \ No newline at end of file +aws s3 cp "$wheel" "s3://vllm-wheels/$version/" diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 38961138c97..b3005b1b4b0 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -8,6 +8,7 @@ # Documentation # label(str): the name of the test. emoji allowed. # fast_check(bool): whether to run this on each commit on fastcheck pipeline. +# torch_nightly(bool): whether to run this on vllm against torch nightly pipeline. # fast_check_only(bool): run this test on fastcheck pipeline only # optional(bool): never run this test by default (i.e. need to unblock manually) unless it's scheduled nightly run. # command(str): the single command to run for tests. incompatible with commands. @@ -38,7 +39,7 @@ steps: - pip install -r ../../requirements/docs.txt - SPHINXOPTS=\"-W\" make html # Check API reference (if it fails, you may have missing mock imports) - - grep \"sig sig-object py\" build/html/api/inference_params.html + - grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html - label: Async Engine, Inputs, Utils, Worker Test # 24min source_file_dependencies: @@ -70,6 +71,7 @@ steps: - label: Basic Correctness Test # 30min #mirror_hardwares: [amd] fast_check: true + torch_nightly: true source_file_dependencies: - vllm/ - tests/basic_correctness/test_basic_correctness @@ -104,6 +106,7 @@ steps: - label: Entrypoints Test # 40min working_dir: "/vllm-workspace/tests" fast_check: true + torch_nightly: true #mirror_hardwares: [amd] source_file_dependencies: - vllm/ @@ -118,7 +121,7 @@ steps: - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process - VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process - - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ + - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py - pytest -v -s entrypoints/test_chat_utils.py - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests @@ -205,6 +208,8 @@ steps: - pytest -v -s v1/sample - pytest -v -s v1/worker - pytest -v -s v1/structured_output + - pytest -v -s v1/spec_decode + - pytest -v -s v1/test_serial_utils.py - pytest -v -s v1/test_stats.py - pytest -v -s v1/test_utils.py - pytest -v -s v1/test_oracle.py @@ -288,14 +293,17 @@ steps: parallelism: 4 - label: PyTorch Compilation Unit Tests + torch_nightly: true source_file_dependencies: - vllm/ - tests/compile commands: - pytest -v -s compile/test_pass_manager.py - pytest -v -s compile/test_fusion.py + - pytest -v -s compile/test_sequence_parallelism.py - label: PyTorch Fullgraph Smoke Test # 9min + torch_nightly: true source_file_dependencies: - vllm/ - tests/compile @@ -306,21 +314,58 @@ steps: - pytest -v -s compile/piecewise/test_toy_llama.py - label: PyTorch Fullgraph Test # 18min + torch_nightly: true source_file_dependencies: - vllm/ - tests/compile commands: - pytest -v -s compile/test_full_graph.py -- label: Kernels Test %N # 1h each - # mirror_hardwares: [amd] +- label: Kernels Core Operation Test + mirror_hardwares: [amd] source_file_dependencies: - csrc/ + - tests/kernels/core + commands: + - pytest -v -s kernels/core + +- label: Kernels Attention Test %N + mirror_hardwares: [amd] + source_file_dependencies: + - csrc/attention/ - vllm/attention - - tests/kernels + - vllm/v1/attention + - tests/kernels/attention commands: - - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT - parallelism: 4 + - pytest -v -s kernels/attention --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT + parallelism: 2 + +- label: Kernels Quantization Test %N + mirror_hardwares: [amd] + source_file_dependencies: + - csrc/quantization/ + - vllm/model_executor/layers/quantization + - tests/kernels/quantization + commands: + - pytest -v -s kernels/quantization --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT + parallelism: 2 + +- label: Kernels MoE Test + #mirror_hardwares: [amd] + source_file_dependencies: + - csrc/moe/ + - tests/kernels/moe + - vllm/model_executor/layers/fused_moe/ + commands: + - pytest -v -s kernels/moe + +- label: Kernels Mamba Test + #mirror_hardwares: [amd] + source_file_dependencies: + - csrc/mamba/ + - tests/kernels/mamba + commands: + - pytest -v -s kernels/mamba - label: Tensorizer Test # 11min # mirror_hardwares: [amd] @@ -348,12 +393,13 @@ steps: commands: - pytest -v -s benchmarks/ -- label: Quantization Test # 33min +- label: Quantization Test source_file_dependencies: - csrc/ - vllm/model_executor/layers/quantization - tests/quantization - command: VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization + commands: + - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization - label: LM Eval Small Models # 53min working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" @@ -393,83 +439,85 @@ steps: ##### models test ##### - label: Basic Models Test # 24min + torch_nightly: true source_file_dependencies: - vllm/ - tests/models commands: - pytest -v -s models/test_transformers.py - pytest -v -s models/test_registry.py + - pytest -v -s models/test_utils.py + - pytest -v -s models/test_vision.py # V1 Test: https://github.com/vllm-project/vllm/issues/14531 - - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4' + - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2' - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4' + - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2' -- label: Language Models Test (Standard) # 32min +- label: Language Models Test (Standard) #mirror_hardwares: [amd] source_file_dependencies: - vllm/ - - tests/models/decoder_only/language - - tests/models/embedding/language - - tests/models/encoder_decoder/language + - tests/models/language commands: - - pytest -v -s models/decoder_only/language -m 'core_model or quant_model' - - pytest -v -s models/embedding/language -m core_model + # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. + - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' + - pytest -v -s models/language -m core_model -- label: Language Models Test (Extended) # 1h10min +- label: Language Models Test (Extended) optional: true source_file_dependencies: - vllm/ - - tests/models/decoder_only/language - - tests/models/embedding/language - - tests/models/encoder_decoder/language + - tests/models/language commands: - - pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model' - - pytest -v -s models/embedding/language -m 'not core_model' + # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. + - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' + - pytest -v -s models/language -m 'not core_model' -- label: Multi-Modal Models Test (Standard) # 40min +- label: Multi-Modal Models Test (Standard) #mirror_hardwares: [amd] source_file_dependencies: - vllm/ - - tests/models/decoder_only/audio_language - - tests/models/decoder_only/vision_language - - tests/models/embedding/vision_language - - tests/models/encoder_decoder/audio_language - - tests/models/encoder_decoder/vision_language + - tests/models/multimodal commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - - pytest -v -s models/multimodal - - pytest -v -s models/decoder_only/audio_language -m 'core_model or quant_model' - - pytest -v -s models/decoder_only/vision_language -m 'core_model or quant_model' - - pytest -v -s models/embedding/vision_language -m core_model - - pytest -v -s models/encoder_decoder/audio_language -m core_model - - pytest -v -s models/encoder_decoder/language -m core_model - - pytest -v -s models/encoder_decoder/vision_language -m core_model - - pytest -v -s models/decoder_only/vision_language/test_interleaved.py - -- label: Multi-Modal Models Test (Extended) 1 # 48m + - pytest -v -s models/multimodal/processing + - pytest -v -s --ignore models/multimodal/generation/test_whisper.py models/multimodal -m core_model + - cd .. && pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work + +- label: Multi-Modal Models Test (Extended) 1 optional: true source_file_dependencies: - vllm/ - - tests/models/decoder_only/audio_language - - tests/models/decoder_only/vision_language - - tests/models/embedding/vision_language - - tests/models/encoder_decoder/vision_language + - tests/models/multimodal commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - - pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model' - - pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=0) and not core_model and not quant_model' - - pytest -v -s --ignore models/decoder_only/vision_language/test_models.py models/decoder_only/vision_language -m 'not core_model and not quant_model' - - pytest -v -s models/embedding/vision_language -m 'not core_model' - - pytest -v -s models/encoder_decoder/language -m 'not core_model' - - pytest -v -s models/encoder_decoder/vision_language -m 'not core_model' - -- label: Multi-Modal Models Test (Extended) 2 # 38m + - pytest -v -s --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing models/multimodal -m 'not core_model' + +- label: Multi-Modal Models Test (Extended) 2 optional: true source_file_dependencies: - vllm/ - - tests/models/decoder_only/vision_language + - tests/models/multimodal commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - - pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=1) and not core_model and not quant_model' + - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model' + +- label: Multi-Modal Models Test (Extended) 3 + optional: true + source_file_dependencies: + - vllm/ + - tests/models/multimodal + commands: + - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git + - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model' + +- label: Quantized Models Test + #mirror_hardwares: [amd] + source_file_dependencies: + - vllm/model_executor/layers/quantization + - tests/models/quantization + commands: + - pytest -v -s models/quantization # This test is used only in PR development phase to test individual models and should never run on main - label: Custom Models Test @@ -539,14 +587,16 @@ steps: - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)' # Avoid importing model tests that cause CUDA reinitialization error - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)' - - pytest models/encoder_decoder/language/test_bart.py -v -s -m 'distributed(num_gpus=2)' - - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m 'distributed(num_gpus=2)' - - pytest models/decoder_only/vision_language/test_models.py -v -s -m 'distributed(num_gpus=2)' + - pytest models/language -v -s -m 'distributed(num_gpus=2)' + - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' + # test sequence parallel + - pytest -v -s distributed/test_sequence_parallel.py # this test fails consistently. # TODO: investigate and fix # - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/test_disagg.py + - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown - label: Plugin Tests (2 GPUs) # 40min working_dir: "/vllm-workspace/tests" diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 860c5c6cd53..76aa5f7a35d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -12,6 +12,7 @@ /vllm/model_executor/layers/quantization @mgoin @robertgshaw2-redhat @tlrmchlsmth /vllm/model_executor/guided_decoding @mgoin @russellb /vllm/multimodal @DarkLight1337 @ywang96 +/vllm/vllm_flash_attn @LucasWilkinson CMakeLists.txt @tlrmchlsmth # vLLM V1 diff --git a/.github/ISSUE_TEMPLATE/200-installation.yml b/.github/ISSUE_TEMPLATE/200-installation.yml index 590e56c1378..34da4019687 100644 --- a/.github/ISSUE_TEMPLATE/200-installation.yml +++ b/.github/ISSUE_TEMPLATE/200-installation.yml @@ -14,7 +14,7 @@ body: description: | Please run the following and paste the output below. ```sh - wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py + wget https://raw.githubusercontent.com/vllm-project/vllm/main/vllm/collect_env.py # For security purposes, please feel free to check the contents of collect_env.py before running it. python collect_env.py ``` diff --git a/.github/ISSUE_TEMPLATE/300-usage.yml b/.github/ISSUE_TEMPLATE/300-usage.yml index 004798a388a..c9e4be0e771 100644 --- a/.github/ISSUE_TEMPLATE/300-usage.yml +++ b/.github/ISSUE_TEMPLATE/300-usage.yml @@ -14,7 +14,7 @@ body: description: | Please run the following and paste the output below. ```sh - wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py + wget https://raw.githubusercontent.com/vllm-project/vllm/main/vllm/collect_env.py # For security purposes, please feel free to check the contents of collect_env.py before running it. python collect_env.py ``` diff --git a/.github/ISSUE_TEMPLATE/400-bug-report.yml b/.github/ISSUE_TEMPLATE/400-bug-report.yml index d4113da8b5b..637d2dd1145 100644 --- a/.github/ISSUE_TEMPLATE/400-bug-report.yml +++ b/.github/ISSUE_TEMPLATE/400-bug-report.yml @@ -14,19 +14,19 @@ body: description: | Please run the following and paste the output below. ```sh - wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py + wget https://raw.githubusercontent.com/vllm-project/vllm/main/vllm/collect_env.py # For security purposes, please feel free to check the contents of collect_env.py before running it. python collect_env.py ``` It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues. value: |
- The output of `python collect_env.py` + The output of python collect_env.py ```text Your output of `python collect_env.py` here ``` - +
validations: required: true diff --git a/.github/ISSUE_TEMPLATE/700-performance-discussion.yml b/.github/ISSUE_TEMPLATE/700-performance-discussion.yml index 273f50d59cf..3d31c115501 100644 --- a/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +++ b/.github/ISSUE_TEMPLATE/700-performance-discussion.yml @@ -35,7 +35,7 @@ body: description: | Please run the following and paste the output below. ```sh - wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py + wget https://raw.githubusercontent.com/vllm-project/vllm/main/vllm/collect_env.py # For security purposes, please feel free to check the contents of collect_env.py before running it. python collect_env.py ``` diff --git a/.github/mergify.yml b/.github/mergify.yml index 3097b994659..15fa3660a87 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -55,11 +55,19 @@ pull_request_rules: description: Automatically apply structured-output label conditions: - or: + - files~=^benchmarks/structured_schemas/ + - files=benchmarks/benchmark_serving_structured_output.py + - files=benchmarks/run_structured_output_benchmark.sh + - files=docs/source/features/structured_outputs.md + - files=examples/offline_inference/structured_outputs.py + - files=examples/online_serving/openai_chat_completion_structured_outputs.py + - files=examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py - files~=^vllm/model_executor/guided_decoding/ - files=tests/model_executor/test_guided_processors.py - files=tests/entrypoints/llm/test_guided_generate.py - - files=benchmarks/benchmark_serving_guided.py - - files=benchmarks/benchmark_guided.py + - files~=^tests/v1/structured_output/ + - files=tests/v1/entrypoints/llm/test_guided_generate.py + - files~=^vllm/v1/structured_output/ actions: label: add: @@ -118,6 +126,28 @@ pull_request_rules: remove: - tpu +- name: label-tool-calling + description: Automatically add tool-calling label + conditions: + - or: + - files~=^tests/tool_use/ + - files~=^tests/mistral_tool_use/ + - files~=^tests/entrypoints/openai/tool_parsers/ + - files=tests/entrypoints/openai/test_chat_with_tool_reasoning.py + - files~=^vllm/entrypoints/openai/tool_parsers/ + - files=docs/source/features/tool_calling.md + - files=docs/source/getting_started/examples/openai_chat_completion_client_with_tools.md + - files=docs/source/getting_started/examples/chat_with_tools.md + - files~=^examples/tool_chat_* + - files=examples/offline_inference/chat_with_tools.py + - files=examples/online_serving/openai_chat_completion_client_with_tools_required.py + - files=examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py + - files=examples/online_serving/openai_chat_completion_client_with_tools.py + actions: + label: + add: + - tool-calling + - name: ping author on conflicts and add 'needs-rebase' label conditions: - conflict diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml index 7b1d9f69938..dd9b61a6473 100644 --- a/.github/workflows/lint-and-deploy.yaml +++ b/.github/workflows/lint-and-deploy.yaml @@ -66,7 +66,7 @@ jobs: export AWS_SECRET_ACCESS_KEY=minioadmin sleep 30 && kubectl -n ns-vllm logs -f "$(kubectl -n ns-vllm get pods | awk '/deployment/ {print $1;exit}')" & helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/online_serving/chart-helm -f examples/online_serving/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set image.env[1].name=VLLM_LOGGING_LEVEL --set-string image.env[0].value="1" --set-string image.env[1].value="DEBUG" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env" - + - name: curl test run: | kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 & @@ -79,4 +79,4 @@ jobs: "max_tokens": 7, "temperature": 0 }'):$CODE" - echo "$CODE" \ No newline at end of file + echo "$CODE" diff --git a/.gitignore b/.gitignore index 6f5cbd0733d..2756c612b82 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ # vllm-flash-attn built from source vllm/vllm_flash_attn/* -!vllm/vllm_flash_attn/fa_utils.py # Byte-compiled / optimized / DLL files __pycache__/ @@ -81,6 +80,7 @@ instance/ # Sphinx documentation docs/_build/ docs/source/getting_started/examples/ +docs/source/api/vllm # PyBuilder .pybuilder/ @@ -203,3 +203,6 @@ benchmarks/**/*.json # Linting actionlint shellcheck*/ + +# Ingore moe/marlin_moe gen code +csrc/moe/marlin_moe_wna16/kernel_* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e921f69925b..5ecd7b70ea5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,31 +11,30 @@ repos: hooks: - id: yapf args: [--in-place, --verbose] - additional_dependencies: [toml] # TODO: Remove when yapf is upgraded - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.3 + rev: v0.11.7 hooks: - id: ruff args: [--output-format, github, --fix] - repo: https://github.com/codespell-project/codespell - rev: v2.4.0 + rev: v2.4.1 hooks: - id: codespell additional_dependencies: ['tomli'] args: ['--toml', 'pyproject.toml'] - repo: https://github.com/PyCQA/isort - rev: 0a0b7a830386ba6a31c2ec8316849ae4d1b8240d # 6.0.0 + rev: 6.0.1 hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v19.1.7 + rev: v20.1.3 hooks: - id: clang-format exclude: 'csrc/(moe/topk_softmax_kernels.cu|quantization/gguf/(ggml-common.h|dequantize.cuh|vecdotq.cuh|mmq.cuh|mmvq.cuh))|vllm/third_party/.*' types_or: [c++, cuda] args: [--style=file, --verbose] - repo: https://github.com/jackdewinter/pymarkdown - rev: v0.9.27 + rev: v0.9.29 hooks: - id: pymarkdown args: [fix] @@ -44,10 +43,10 @@ repos: hooks: - id: actionlint - repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.6.2 + rev: 0.6.17 hooks: - id: pip-compile - args: [requirements/test.in, -o, requirements/test.txt] + args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu128] files: ^requirements/test\.(in|txt)$ - repo: local hooks: diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f4b20d3515..8012c233462 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,6 @@ project(vllm_extensions LANGUAGES CXX) # CUDA by default, can be overridden by using -DVLLM_TARGET_DEVICE=... (used by setup.py) set(VLLM_TARGET_DEVICE "cuda" CACHE STRING "Target device backend for vLLM") - message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") message(STATUS "Target device: ${VLLM_TARGET_DEVICE}") @@ -46,8 +45,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1 # requirements.txt files and should be kept consistent. The ROCm torch # versions are derived from docker/Dockerfile.rocm # -set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0") -set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0") +set(TORCH_SUPPORTED_VERSION_CUDA "2.7.0") +set(TORCH_SUPPORTED_VERSION_ROCM "2.7.0") # # Try to find python package with an executable that exactly matches @@ -241,6 +240,7 @@ set(VLLM_EXT_SRC "csrc/quantization/fp8/common.cu" "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu" "csrc/quantization/gguf/gguf_kernel.cu" + "csrc/quantization/activation_kernels.cu" "csrc/cuda_utils_kernels.cu" "csrc/prepare_inputs/advance_step.cu" "csrc/custom_all_reduce.cu" @@ -249,9 +249,8 @@ set(VLLM_EXT_SRC if(VLLM_GPU_LANG STREQUAL "CUDA") SET(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library") - # Set CUTLASS_REVISION manually -- its revision detection doesn't work in this case. - # Please keep this in sync with FetchContent_Declare line below. - set(CUTLASS_REVISION "v3.8.0" CACHE STRING "CUTLASS revision to use") + # Set CUTLASS_REVISION. Used for FetchContent. Also fixes some bogus messages when building. + set(CUTLASS_REVISION "v3.9.2" CACHE STRING "CUTLASS revision to use") # Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR}) @@ -269,7 +268,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") cutlass GIT_REPOSITORY https://github.com/nvidia/cutlass.git # Please keep this in sync with CUTLASS_REVISION line above. - GIT_TAG v3.8.0 + GIT_TAG ${CUTLASS_REVISION} GIT_PROGRESS TRUE # Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history. @@ -290,7 +289,8 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") "csrc/quantization/fp4/nvfp4_quant_entry.cu" "csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu" "csrc/sparse/cutlass/sparse_scaled_mm_entry.cu" - "csrc/cutlass_extensions/common.cpp") + "csrc/cutlass_extensions/common.cpp" + "csrc/attention/mla/cutlass_mla_entry.cu") set_gencode_flags_for_srcs( SRCS "${VLLM_EXT_SRC}" @@ -463,7 +463,26 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") set(FP4_ARCHS) endif() - # + # CUTLASS MLA Archs and flags + cuda_archs_loose_intersection(MLA_ARCHS "10.0a" "${CUDA_ARCHS}") + if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.8 AND MLA_ARCHS) + set(SRCS + "csrc/attention/mla/cutlass_mla_kernels.cu") + set_gencode_flags_for_srcs( + SRCS "${SRCS}" + CUDA_ARCHS "${MLA_ARCHS}") + list(APPEND VLLM_EXT_SRC "${SRCS}") + list(APPEND VLLM_GPU_FLAGS "-DENABLE_CUTLASS_MLA=1") + # Add MLA-specific include directories only to MLA source files + set_source_files_properties(${SRCS} + PROPERTIES INCLUDE_DIRECTORIES "${CUTLASS_DIR}/examples/77_blackwell_fmha;${CUTLASS_DIR}/examples/common") + message(STATUS "Building CUTLASS MLA for archs: ${MLA_ARCHS}") + else() + message(STATUS "Not building CUTLASS MLA as no compatible archs were found.") + # clear MLA_ARCHS + set(MLA_ARCHS) + endif() + # CUTLASS MoE kernels # The MoE kernel cutlass_moe_mm requires CUDA 12.3 or later (and only works @@ -661,6 +680,17 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") endif() endif() +if(VLLM_GPU_LANG STREQUAL "CUDA") + set(MOE_PERMUTE_SRC + "csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.cu" + "csrc/moe/moe_permute_unpermute_op.cu") + + set_gencode_flags_for_srcs( + SRCS "${MARLIN_PERMUTE_SRC}" + CUDA_ARCHS "${MOE_PERMUTE_ARCHS}") + + list(APPEND VLLM_MOE_EXT_SRC "${MOE_PERMUTE_SRC}") +endif() message(STATUS "Enabling moe extension.") define_gpu_extension_target( _moe_C @@ -669,6 +699,8 @@ define_gpu_extension_target( SOURCES ${VLLM_MOE_EXT_SRC} COMPILE_FLAGS ${VLLM_GPU_FLAGS} ARCHITECTURES ${VLLM_GPU_ARCHES} + INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR} + INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR} USE_SABI 3 WITH_SOABI) @@ -678,6 +710,7 @@ if(VLLM_GPU_LANG STREQUAL "HIP") # set(VLLM_ROCM_EXT_SRC "csrc/rocm/torch_bindings.cpp" + "csrc/rocm/skinny_gemms.cu" "csrc/rocm/attention.cu") define_gpu_extension_target( diff --git a/benchmarks/auto_tune.sh b/benchmarks/auto_tune.sh new file mode 100644 index 00000000000..ea63c6f71a6 --- /dev/null +++ b/benchmarks/auto_tune.sh @@ -0,0 +1,212 @@ +#!/bin/bash + +# This script aims to tune the best server parameter combinations to maximize throughput for given requirement. +# The current server parameter combination is max_num_seqs and max_num_batched_tokens +# It also supports additional requirement: e2e latency and prefix cache. + +# Pre-requisite: +# 1. Checkout to your branch, install/ update the correct running env. For TPU, activate conda env and install the corresponding torch, xla version. +# 2. If the model is customized, replace the MODEL's config with the customized config. +# 3. Set variables (ALL REQUIRED) +# BASE: your directory for vllm repo +# MODEL: the model served by vllm +# DOWNLOAD_DIR: directory to download and load model weights. +# INPUT_LEN: request input len +# OUTPUT_LEN: request output len +# MIN_CACHE_HIT_PCT: prefix cache rate +# MAX_LATENCY_ALLOWED_MS: (e2e) latency requirement. If there's no latency requirement, set it to a large number like 1000000000 +# 4. Run the script, it might take a long time, you can use tmux to avoid the script stop if disconnection happens. +# 5. The final result will be saved in RESULT file. + + +# Example use cases +# 1. Given input_len=1800, output_len=20, what's the best max_num_seqs and max_num_batched_tokens to get highest throughput? +# Use INPUT_LEN=1800, OUTPUT_LEN=20, MIN_CACHE_HIT_PCT=0, MAX_LATENCY_ALLOWED_MS=100000000000 +# 2. If we have latency requirement to be lower than 500ms, what's the best server parameter? +# Use INPUT_LEN=1800, OUTPUT_LEN=20, MIN_CACHE_HIT_PCT=0, MAX_LATENCY_ALLOWED_MS=500 +# 3. If we want to reach 60% prefix cache, what's the best server parameter? +# Use INPUT_LEN=1800, OUTPUT_LEN=20, MIN_CACHE_HIT_PCT=60, MAX_LATENCY_ALLOWED_MS=500 + +TAG=$(date +"%Y_%m_%d_%H_%M") +BASE="" +MODEL="meta-llama/Llama-3.1-8B-Instruct" +DOWNLOAD_DIR="" +INPUT_LEN=4000 +OUTPUT_LEN=16 +MIN_CACHE_HIT_PCT_PCT=0 +MAX_LATENCY_ALLOWED_MS=100000000000 + +LOG_FOLDER="$BASE/auto-benchmark/$TAG" +RESULT="$LOG_FOLDER/result.txt" + +echo "result file$ $RESULT" +echo "model: $MODEL" +echo + +rm -rf $LOG_FOLDER +mkdir -p $LOG_FOLDER + +cd "$BASE/vllm" +# create sonnet-4x.txt so that we can sample 2048 tokens for input +echo "" > benchmarks/sonnet_4x.txt +for _ in {1..4} +do +cat benchmarks/sonnet.txt >> benchmarks/sonnet_4x.txt +done + +pip install datasets + +current_hash=$(git rev-parse HEAD) +echo "hash:$current_hash" >> "$RESULT" +echo "current_hash: $current_hash" + +best_throughput=0 +best_max_num_seqs=0 +best_num_batched_tokens=0 +best_goodput=0 +run_benchmark() { + local max_num_seqs=$1 + local max_num_batched_tokens=$2 + echo "max_num_seq: $max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens" + local vllm_log="$LOG_FOLDER/vllm_log_${max_num_seqs}_${max_num_batched_tokens}.txt" + echo "vllm_log: $vllm_log" + echo + rm -f $vllm_log + + # start the server + VLLM_USE_V1=1 VLLM_SERVER_DEV_MODE=1 vllm serve $MODEL \ + --disable-log-requests \ + --port 8004 \ + --gpu-memory-utilization 0.98 \ + --max-num-seqs $max_num_seqs \ + --max-num-batched-tokens $max_num_batched_tokens \ + --tensor-parallel-size 1 \ + --enable-prefix-caching \ + --load-format dummy \ + --download-dir $DOWNLOAD_DIR \ + --max-model-len $(( INPUT_LEN+OUTPUT_LEN )) > "$vllm_log" 2>&1 & + echo "wait for 10 minutes.." + echo + # wait for 10 minutes... + server_started=0 + for i in {1..60}; do + if grep -Fq "Application startup complete" "$vllm_log"; then + echo "Application started" + server_started=1 + break + else + # echo "wait for 10 seconds..." + sleep 10 + fi + done + + if (( ! server_started )); then + echo "server did not start within 10 minutes, terminate the benchmarking. Please check server log at $vllm_log" + echo "pkill -f vllm" + echo + pkill vllm + sleep 10 + return 1 + fi + + echo "run benchmark test..." + echo + meet_latency_requirement=0 + # get a basic qps by using request-rate inf + bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_inf.txt" + prefix_len=$(( INPUT_LEN * MIN_CACHE_HIT_PCT / 100 )) + python benchmarks/benchmark_serving.py \ + --backend vllm \ + --model $MODEL \ + --dataset-name sonnet \ + --dataset-path benchmarks/sonnet_4x.txt \ + --sonnet-input-len $INPUT_LEN \ + --sonnet-output-len $OUTPUT_LEN \ + --ignore-eos \ + --disable-tqdm \ + --request-rate inf \ + --percentile-metrics ttft,tpot,itl,e2el \ + --goodput e2el:$MAX_LATENCY_ALLOWED_MS \ + --num-prompts 100 \ + --sonnet-prefix-len $prefix_len \ + --port 8004 > "$bm_log" + through_put=$(grep "Request throughput (req/s):" "$bm_log" | sed 's/[^0-9.]//g') + e2el=$(grep "P99 E2EL (ms):" "$bm_log" | awk '{print $NF}') + goodput=$(grep "Request goodput (req/s):" "$bm_log" | sed 's/[^0-9.]//g') + + if (( $(echo "$e2el <= $MAX_LATENCY_ALLOWED_MS" | bc -l) )); then + meet_latency_requirement=1 + fi + + if (( ! meet_latency_requirement )); then + # start from request-rate as int(through_put) + 1 + request_rate=$((${through_put%.*} + 1)) + while ((request_rate > 0)); do + # clear prefix cache + curl -X POST http://0.0.0.0:8004/reset_prefix_cache + sleep 5 + bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_${request_rate}.txt" + python benchmarks/benchmark_serving.py \ + --backend vllm \ + --model $MODEL \ + --dataset-name sonnet \ + --dataset-path benchmarks/sonnet_4x.txt \ + --sonnet-input-len $INPUT_LEN \ + --sonnet-output-len $OUTPUT_LEN \ + --ignore_eos \ + --disable-tqdm \ + --request-rate $request_rate \ + --percentile-metrics ttft,tpot,itl,e2el \ + --goodput e2el:$MAX_LATENCY_ALLOWED_MS \ + --num-prompts 100 \ + --sonnet-prefix-len $prefix_len \ + --port 8004 > "$bm_log" + through_put=$(grep "Request throughput (req/s):" "$bm_log" | sed 's/[^0-9.]//g') + e2el=$(grep "P99 E2EL (ms):" "$bm_log" | awk '{print $NF}') + goodput=$(grep "Request goodput (req/s):" "$bm_log" | sed 's/[^0-9.]//g') + if (( $(echo "$e2el <= $MAX_LATENCY_ALLOWED_MS" | bc -l) )); then + meet_latency_requirement=1 + break + fi + request_rate=$((request_rate-1)) + done + fi + # write the results and update the best result. + if ((meet_latency_requirement)); then + echo "max_num_seqs: $max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens, request_rate: $request_rate, e2el: $e2el, through put: $through_put, goodput: $goodput" + echo "max_num_seqs: $max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens, request_rate: $request_rate, e2el: $e2el, through put: $through_put, goodput: $goodput" >> "$RESULT" + if (( $(echo "$through_put > $best_throughput" | bc -l) )); then + best_throughput=$through_put + best_max_num_seqs=$max_num_seqs + best_num_batched_tokens=$max_num_batched_tokens + best_goodput=$goodput + fi + else + echo "max_num_seqs: $max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens does not meet latency requirement ${MAX_LATENCY_ALLOWED_MS}" + echo "max_num_seqs: $max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens does not meet latency requirement ${MAX_LATENCY_ALLOWED_MS}" >> "$RESULT" + fi + + echo "best_max_num_seqs: $best_max_num_seqs, best_num_batched_tokens: $best_num_batched_tokens, best_throughput: $best_throughput" + + echo "pkill -f vllm" + echo + pkill vllm + sleep 10 + rm -f $vllm_log + printf '=%.0s' $(seq 1 20) + return 0 +} + + +num_seqs_list="128 256" +num_batched_tokens_list="512 1024 2048 4096" +for num_seqs in $num_seqs_list; do + for num_batched_tokens in $num_batched_tokens_list; do + run_benchmark $num_seqs $num_batched_tokens + exit 0 + done +done +echo "finish permutations" +echo "best_max_num_seqs: $best_max_num_seqs, best_num_batched_tokens: $best_num_batched_tokens, best_throughput: $best_throughput" +echo "best_max_num_seqs: $best_max_num_seqs, best_num_batched_tokens: $best_num_batched_tokens, best_throughput: $best_throughput" >> "$RESULT" + diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 287d500a81d..e6a67fda682 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +import io import json import os import sys @@ -32,6 +33,7 @@ class RequestFuncInput: extra_body: Optional[dict] = None multi_modal_content: Optional[dict] = None ignore_eos: bool = False + language: Optional[str] = None @dataclass @@ -199,6 +201,7 @@ async def async_request_deepspeed_mii( timeout=AIOHTTP_TIMEOUT) as session: payload = { + "model": request_func_input.model, "prompt": request_func_input.prompt, "max_tokens": request_func_input.output_len, "temperature": 0.01, # deepspeed-mii does not accept 0.0 temp. @@ -258,6 +261,7 @@ async def async_request_openai_completions( if request_func_input.model_name else request_func_input.model, "prompt": request_func_input.prompt, "temperature": 0.0, + "repetition_penalty": 1.0, "max_tokens": request_func_input.output_len, "logprobs": request_func_input.logprobs, "stream": True, @@ -436,6 +440,110 @@ async def async_request_openai_chat_completions( return output +async def async_request_openai_audio( + request_func_input: RequestFuncInput, + pbar: Optional[tqdm] = None, +) -> RequestFuncOutput: + # Lazy import without PlaceholderModule to avoid vllm dep. + import soundfile + api_url = request_func_input.api_url + assert api_url.endswith( + ("transcriptions", "translations" + )), "OpenAI Chat Completions API URL must end with 'transcriptions' " + "or `translations`." + + async with aiohttp.ClientSession(trust_env=True, + timeout=AIOHTTP_TIMEOUT) as session: + content = [{"type": "text", "text": request_func_input.prompt}] + payload = { + "model": request_func_input.model_name \ + if request_func_input.model_name else request_func_input.model, + "temperature": 0.0, + "max_completion_tokens": request_func_input.output_len, + "stream": True, + "language": "en", + # Flattened due to multipart/form-data + "stream_include_usage": True, + "stream_continuous_usage_stats": True + } + if request_func_input.extra_body: + payload.update(request_func_input.extra_body) + headers = { + "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}", + } + + # Send audio file + def to_bytes(y, sr): + buffer = io.BytesIO() + soundfile.write(buffer, y, sr, format="WAV") + buffer.seek(0) + return buffer + + with to_bytes(*request_func_input.multi_modal_content['audio']) as f: + form = aiohttp.FormData() + form.add_field('file', f, content_type='audio/wav') + for key, value in payload.items(): + form.add_field(key, str(value)) + + output = RequestFuncOutput() + output.prompt_len = request_func_input.prompt_len + + generated_text = "" + ttft = 0.0 + st = time.perf_counter() + most_recent_timestamp = st + try: + async with session.post(url=api_url, + data=form, + headers=headers) as response: + if response.status == 200: + async for chunk_bytes in response.content: + chunk_bytes = chunk_bytes.strip() + if not chunk_bytes: + continue + + chunk = chunk_bytes.decode("utf-8").removeprefix( + "data: ") + if chunk != "[DONE]": + timestamp = time.perf_counter() + data = json.loads(chunk) + + if choices := data.get("choices"): + content = choices[0]["delta"].get( + "content") + # First token + if ttft == 0.0: + ttft = timestamp - st + output.ttft = ttft + + # Decoding phase + else: + output.itl.append( + timestamp - most_recent_timestamp) + + generated_text += content or "" + elif usage := data.get("usage"): + output.output_tokens = usage.get( + "completion_tokens") + + most_recent_timestamp = timestamp + + output.generated_text = generated_text + output.success = True + output.latency = most_recent_timestamp - st + else: + output.error = response.reason or "" + output.success = False + except Exception: + output.success = False + exc_info = sys.exc_info() + output.error = "".join(traceback.format_exception(*exc_info)) + + if pbar: + pbar.update(1) + return output + + def get_model(pretrained_model_name_or_path: str) -> str: if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true': from modelscope import snapshot_download @@ -493,6 +601,7 @@ def get_tokenizer( "deepspeed-mii": async_request_deepspeed_mii, "openai": async_request_openai_completions, "openai-chat": async_request_openai_chat_completions, + "openai-audio": async_request_openai_audio, "tensorrt-llm": async_request_trt_llm, "scalellm": async_request_openai_completions, "sglang": async_request_openai_completions, diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index 63f174275d4..9c614baf1f0 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -64,6 +64,7 @@ class SampleRequest: class BenchmarkDataset(ABC): DEFAULT_SEED = 0 + IS_MULTIMODAL = False def __init__( self, @@ -621,6 +622,7 @@ class ConversationDataset(HuggingFaceDataset): SUPPORTED_DATASET_PATHS = { 'lmms-lab/LLaVA-OneVision-Data', 'Aeala/ShareGPT_Vicuna_unfiltered' } + IS_MULTIMODAL = True def sample(self, tokenizer: PreTrainedTokenizerBase, @@ -685,6 +687,7 @@ class VisionArenaDataset(HuggingFaceDataset): "lmarena-ai/vision-arena-bench-v0.1": lambda x: x["turns"][0][0]["content"] } + IS_MULTIMODAL = True def sample( self, @@ -768,6 +771,60 @@ def sample(self, return sampled_requests +# ----------------------------------------------------------------------------- +# MT-Bench Dataset Implementation +# ----------------------------------------------------------------------------- + + +class MTBenchDataset(HuggingFaceDataset): + """ + MT-Bench Dataset. + https://huggingface.co/datasets/philschmid/mt-bench + + We create a single turn dataset for MT-Bench. + This is similar to Spec decoding benchmark setup in vLLM + https://github.com/vllm-project/vllm/blob/9d98ab5ec/examples/offline_inference/eagle.py#L14-L18 + """ # noqa: E501 + + DEFAULT_OUTPUT_LEN = 256 # avg len used in SD bench in vLLM + SUPPORTED_DATASET_PATHS = { + "philschmid/mt-bench", + } + + def sample(self, + tokenizer: PreTrainedTokenizerBase, + num_requests: int, + output_len: Optional[int] = None, + enable_multimodal_chat: bool = False, + **kwargs) -> list: + output_len = (output_len + if output_len is not None else self.DEFAULT_OUTPUT_LEN) + sampled_requests = [] + + for item in self.data: + if len(sampled_requests) >= num_requests: + break + prompt = item['turns'][0] + + # apply template + prompt = tokenizer.apply_chat_template([{ + "role": "user", + "content": prompt + }], + add_generation_prompt=True, + tokenize=False) + + prompt_len = len(tokenizer(prompt).input_ids) + sampled_requests.append( + SampleRequest( + prompt=prompt, + prompt_len=prompt_len, + expected_output_len=output_len, + )) + self.maybe_oversample_requests(sampled_requests, num_requests) + return sampled_requests + + # ----------------------------------------------------------------------------- # AIMO Dataset Implementation # ----------------------------------------------------------------------------- @@ -815,3 +872,80 @@ def sample(self, )) self.maybe_oversample_requests(sampled_requests, num_requests) return sampled_requests + + +# ----------------------------------------------------------------------------- +# ASR Dataset Implementation +# ----------------------------------------------------------------------------- + + +class ASRDataset(HuggingFaceDataset): + """ + Dataset class for processing a ASR dataset for transcription. + Tested on the following set: + + +----------------+----------------------------------------+--------------------------+-----------------------------+ + | Dataset | Domain | Speaking Style | hf-subset | + +----------------+----------------------------------------+--------------------------+-----------------------------+ + | TED-LIUM | TED talks | Oratory | release1, release2, release3| + | | | | release3-speaker-adaptation | + | VoxPopuli | European Parliament | Oratory | en, de, it, fr, ... | + | LibriSpeech | Audiobook | Narrated | "LIUM/tedlium" | + | GigaSpeech | Audiobook, podcast, YouTube | Narrated, spontaneous | xs, s, m, l, xl, dev, test | + | SPGISpeech | Financial meetings | Oratory, spontaneous | S, M, L, dev, test | + | AMI | Meetings | Spontaneous | ihm, sdm | + +----------------+----------------------------------------+--------------------------+-----------------------------+ + + """ # noqa: E501 + SUPPORTED_DATASET_PATHS = { + "openslr/librispeech_asr", "facebook/voxpopuli", "LIUM/tedlium", + "edinburghcstr/ami", "speechcolab/gigaspeech", "kensho/spgispeech" + } + + DEFAULT_OUTPUT_LEN = 128 + IS_MULTIMODAL = True + + # TODO Whisper-specific. Abstract interface when more models are supported. + TRANSCRIPTION_PREAMBLE = "<|startoftranscript|><|en|><|transcribe|>"\ + "<|notimestamps|>" + skip_long_audios: bool = True + + def sample( + self, + tokenizer: PreTrainedTokenizerBase, + num_requests: int, + output_len: Optional[int] = None, + **kwargs, + ) -> list: + import librosa + output_len = (output_len + if output_len is not None else self.DEFAULT_OUTPUT_LEN) + prompt = ASRDataset.TRANSCRIPTION_PREAMBLE + prompt_len = len(tokenizer(prompt).input_ids) + sampled_requests = [] + skipped = 0 + for item in self.data: + if len(sampled_requests) >= num_requests: + break + audio = item["audio"] + y, sr = audio["array"], audio["sampling_rate"] + duration_s = librosa.get_duration(y=y, sr=sr) + # Whisper max supported duration + if self.skip_long_audios and duration_s > 30: + skipped += 1 + continue + + mm_content = {"audio": (y, sr)} + sampled_requests.append( + SampleRequest( + prompt=prompt, + prompt_len=prompt_len, + expected_output_len=output_len, + multi_modal_data=mm_content, + )) + if skipped: + logger.warning("%d samples discarded from dataset due to" \ + " their length being greater than" \ + " what Whisper supports.", skipped) + self.maybe_oversample_requests(sampled_requests, num_requests) + return sampled_requests diff --git a/benchmarks/benchmark_prefix_caching.py b/benchmarks/benchmark_prefix_caching.py index 4fff7a8fc8e..f44da95d321 100644 --- a/benchmarks/benchmark_prefix_caching.py +++ b/benchmarks/benchmark_prefix_caching.py @@ -63,14 +63,16 @@ class Request: output_len: int -def sample_tokens(tokenizer: PreTrainedTokenizerBase, length: int) -> str: +def sample_tokens(tokenizer: PreTrainedTokenizerBase, + length: int) -> list[int]: vocab = tokenizer.get_vocab() + all_special_ids = set(tokenizer.all_special_ids) + # Remove the special tokens. - vocab = { - k: v - for k, v in vocab.items() if k not in tokenizer.all_special_ids - } - return random.choices(list(vocab.values()), k=length) + return random.choices( + [v for k, v in vocab.items() if k not in all_special_ids], + k=length, + ) def sample_requests_from_dataset( diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index b5bd840d841..c236d64261d 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -50,11 +50,11 @@ except ImportError: from argparse import ArgumentParser as FlexibleArgumentParser -from benchmark_dataset import (AIMODataset, BurstGPTDataset, +from benchmark_dataset import (AIMODataset, ASRDataset, BurstGPTDataset, ConversationDataset, HuggingFaceDataset, - InstructCoderDataset, RandomDataset, - SampleRequest, ShareGPTDataset, SonnetDataset, - VisionArenaDataset) + InstructCoderDataset, MTBenchDataset, + RandomDataset, SampleRequest, ShareGPTDataset, + SonnetDataset, VisionArenaDataset) from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json MILLISECONDS_TO_SECONDS_CONVERSION = 1000 @@ -274,10 +274,6 @@ async def benchmark( input_requests[0].expected_output_len, \ input_requests[0].multi_modal_data - if backend != "openai-chat" and test_mm_content is not None: - # multi-modal benchmark is only available on OpenAI Chat backend. - raise ValueError( - "Multi-modal content is only supported on 'openai-chat' backend.") assert test_mm_content is None or isinstance(test_mm_content, dict) test_input = RequestFuncInput( model=model_id, @@ -599,11 +595,17 @@ def main(args: argparse.Namespace): elif args.dataset_path in InstructCoderDataset.SUPPORTED_DATASET_PATHS: dataset_class = InstructCoderDataset args.hf_split = "train" + elif args.dataset_path in MTBenchDataset.SUPPORTED_DATASET_PATHS: + dataset_class = MTBenchDataset + args.hf_split = "train" elif args.dataset_path in ConversationDataset.SUPPORTED_DATASET_PATHS: dataset_class = ConversationDataset elif args.dataset_path in AIMODataset.SUPPORTED_DATASET_PATHS: dataset_class = AIMODataset args.hf_split = "train" + elif args.dataset_path in ASRDataset.SUPPORTED_DATASET_PATHS: + dataset_class = ASRDataset + args.hf_split = "train" else: supported_datasets = set([ dataset_name for cls in HuggingFaceDataset.__subclasses__() @@ -615,6 +617,13 @@ def main(args: argparse.Namespace): f" from one of following: {supported_datasets}. " "Please consider contributing if you would " "like to add support for additional dataset formats.") + + if (dataset_class.IS_MULTIMODAL and backend not in \ + ["openai-chat", "openai-audio"]): + # multi-modal benchmark is only available on OpenAI Chat backend. + raise ValueError( + "Multi-modal content is only supported on 'openai-chat' and " \ + "'openai-audio' backend.") input_requests = dataset_class( dataset_path=args.dataset_path, dataset_subset=args.hf_subset, @@ -707,7 +716,7 @@ def main(args: argparse.Namespace): )) # Save config and results to json - if args.save_result: + if args.save_result or args.append_result: result_json: dict[str, Any] = {} # Setup @@ -728,6 +737,14 @@ def main(args: argparse.Namespace): raise ValueError( "Invalid metadata format. Please use KEY=VALUE format." ) + # Traffic + result_json["request_rate"] = (args.request_rate if args.request_rate + < float("inf") else "inf") + result_json["burstiness"] = args.burstiness + result_json["max_concurrency"] = args.max_concurrency + + # Merge with benchmark result + result_json = {**result_json, **benchmark_result} if not args.save_detailed: # Remove fields with too many data points @@ -738,15 +755,6 @@ def main(args: argparse.Namespace): if field in result_json: del result_json[field] - # Traffic - result_json["request_rate"] = (args.request_rate if args.request_rate - < float("inf") else "inf") - result_json["burstiness"] = args.burstiness - result_json["max_concurrency"] = args.max_concurrency - - # Merge with benchmark result - result_json = {**result_json, **benchmark_result} - # Save to file base_model_id = model_id.split("/")[-1] max_concurrency_str = (f"-concurrency{args.max_concurrency}" @@ -756,7 +764,12 @@ def main(args: argparse.Namespace): file_name = args.result_filename if args.result_dir: file_name = os.path.join(args.result_dir, file_name) - with open(file_name, "w", encoding='utf-8') as outfile: + with open(file_name, + mode="a+" if args.append_result else "w", + encoding='utf-8') as outfile: + # Append a newline. + if args.append_result and outfile.tell() != 0: + outfile.write("\n") json.dump(result_json, outfile) save_to_pytorch_benchmark_format(args, result_json, file_name) @@ -888,6 +901,11 @@ def main(args: argparse.Namespace): help="When saving the results, whether to include per request " "information such as response, error, ttfs, tpots, etc.", ) + parser.add_argument( + "--append-result", + action="store_true", + help="Append the benchmark result to the existing json file.", + ) parser.add_argument( "--metadata", metavar="KEY=VALUE", diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index e52f16a8b12..7c40e39ac81 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -51,7 +51,7 @@ except ImportError: from argparse import ArgumentParser as FlexibleArgumentParser -from vllm.v1.structured_output.utils import ( +from vllm.v1.structured_output.backend_xgrammar import ( has_xgrammar_unsupported_json_features) MILLISECONDS_TO_SECONDS_CONVERSION = 1000 @@ -123,6 +123,8 @@ def sample_requests(tokenizer: PreTrainedTokenizerBase, copy.deepcopy(schema) for _ in range(args.num_prompts) ] for i in range(len(json_schemas)): + if "properties" not in json_schemas[i]: + json_schemas[i]["properties"] = {} json_schemas[i]["properties"][ f"__optional_field_{uuid.uuid4()}"] = { "type": @@ -134,7 +136,7 @@ def sample_requests(tokenizer: PreTrainedTokenizerBase, json_schemas = [schema] * args.num_prompts def gen_prompt(index: int): - return f"Generate an example of a user profile given the following schema: {json.dumps(get_schema(index))}" # noqa: E501 + return f"Generate an example of a brief user profile given the following schema: {json.dumps(get_schema(index))}" # noqa: E501 def get_schema(index: int): return json_schemas[index % len(json_schemas)] @@ -150,17 +152,17 @@ def get_schema(index: int): elif args.dataset == "grammar": schema = """ - ?start: select_statement + root ::= select_statement - ?select_statement: "SELECT " column_list " FROM " table_name + select_statement ::= "SELECT " column " from " table " where " condition - ?column_list: column_name ("," column_name)* + column ::= "col_1 " | "col_2 " - ?table_name: identifier + table ::= "table_1 " | "table_2 " - ?column_name: identifier + condition ::= column "= " number - ?identifier: /[a-zA-Z_][a-zA-Z0-9_]*/ + number ::= "1 " | "2 " """ prompt = "Generate an SQL query to show the 'username' \ and 'email' from the 'users' table." @@ -231,7 +233,8 @@ def _filter_func(item): idx -= len_dataset schema = dataset["schema"][idx] prompt = tokenizer.apply_chat_template(dataset["prompt"][idx], - tokenize=False) + tokenize=False, + add_generation_prompt=True) input_len = len(tokenizer(prompt).input_ids) completion = dataset["completion"][idx] @@ -849,7 +852,7 @@ def main(args: argparse.Namespace): 'json', 'json-unique', 'grammar', 'regex', 'choice', 'xgrammar_bench' ]) - parser.add_argument("--json_schema_path", + parser.add_argument("--json-schema-path", type=str, default=None, help="Path to json schema.") diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index 67e509c1f55..1f65277e1bf 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -523,6 +523,13 @@ def validate_args(args): raise ValueError( "Tokenizer must be the same as the model for MII backend.") + # --data-parallel is not supported currently. + # https://github.com/vllm-project/vllm/issues/16222 + if args.data_parallel_size > 1: + raise ValueError( + "Data parallel is not supported in offline benchmark, \ + please use benchmark serving instead") + if __name__ == "__main__": parser = FlexibleArgumentParser(description="Benchmark the throughput.") diff --git a/benchmarks/kernels/benchmark_bitblas.py b/benchmarks/kernels/benchmark_bitblas.py new file mode 100644 index 00000000000..b23b4f3ea68 --- /dev/null +++ b/benchmarks/kernels/benchmark_bitblas.py @@ -0,0 +1,236 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from vllm.model_executor.layers.quantization.utils.bitblas_utils import ( + MINIMUM_BITBLAS_VERSION) + +try: + import bitblas + if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: + raise ImportError("bitblas version is wrong. Please " + f"install bitblas>={MINIMUM_BITBLAS_VERSION}") +except ImportError as e: + bitblas_import_exception = e + raise ValueError("Trying to use the bitblas backend, but could not import" + f"with the following error: {bitblas_import_exception}. " + "Please install bitblas through the following command: " + f"`pip install bitblas>={MINIMUM_BITBLAS_VERSION}`" + ) from bitblas_import_exception + +from bitblas import Matmul, MatmulConfig, auto_detect_nvidia_target + +from vllm.utils import FlexibleArgumentParser + +parser = FlexibleArgumentParser( + description="Benchmark BitBLAS int4 on a specific target.") + +# Add arguments to the parser +parser.add_argument( + "--target", + type=str, + default=auto_detect_nvidia_target(), + help="Specify the target device for benchmarking.", +) +parser.add_argument("--group_size", + type=int, + default=None, + help="Group size for grouped quantization.") +parser.add_argument( + "--A_dtype", + type=str, + default="float16", + choices=["float16", "float32", "float64", "int32", "int8"], + help="Data type of activation A.", +) +parser.add_argument( + "--W_dtype", + type=str, + default="int4", + choices=[ + "float16", + "float32", + "float64", + "int32", + "int8", + "int4", + "int2", + "int1", + "nf4", + "fp4_e2m1", + ], + help="Data type of weight W.", +) +parser.add_argument( + "--accum_dtype", + type=str, + default="float16", + choices=["float16", "int32"], + help="Data type for accumulation.", +) +parser.add_argument( + "--out_dtype", + type=str, + default="float16", + choices=["float16", "float32", "int32", "int8"], + help="Data type for output.", +) +parser.add_argument( + "--layout", + type=str, + default="nt", + choices=["nt", "nn"], + help="Matrix layout, 'nt' for non-transpose A and transpose W.", +) +parser.add_argument("--with_bias", + action="store_true", + help="Include bias in the benchmark.") +parser.add_argument( + "--with_scaling", + action="store_true", + help="Include scaling factor in the quantization.", +) +parser.add_argument("--with_zeros", + action="store_true", + help="Include zeros in the quantization.") +parser.add_argument( + "--zeros_mode", + type=str, + default=None, + choices=["original", "rescale", "quantized"], + help="Specify the mode for calculating zeros.", +) + +# Parse the arguments +args = parser.parse_args() + +# Assign arguments to variables +target = args.target +A_dtype = args.A_dtype +W_dtype = args.W_dtype +accum_dtype = args.accum_dtype +out_dtype = args.out_dtype +layout = args.layout +with_bias = args.with_bias +group_size = args.group_size +with_scaling = args.with_scaling +with_zeros = args.with_zeros +zeros_mode = args.zeros_mode + +# Define a list of shared arguments that repeat in every config +shared_args = [ + A_dtype, + W_dtype, + out_dtype, + accum_dtype, + layout, + with_bias, + group_size, + with_scaling, + with_zeros, + zeros_mode, +] + +# Define just the (M, K, N) shapes in a more compact list +shapes = [ + # square test + (1, 16384, 16384), + # BLOOM-176B + (1, 43008, 14336), + (1, 14336, 14336), + (1, 57344, 14336), + (1, 14336, 57344), + # OPT-65B + (1, 9216, 9216), + (1, 36864, 9216), + (1, 9216, 36864), + (1, 22016, 8192), + # LLAMA-70B/65B + (1, 8192, 22016), + (1, 8192, 8192), + (1, 28672, 8192), + (1, 8192, 28672), + # square test + (16384, 16384, 16384), + # BLOOM-176B + (8192, 43008, 14336), + (8192, 14336, 14336), + (8192, 57344, 14336), + (8192, 14336, 57344), + # OPT-65B + (8192, 9216, 9216), + (8192, 36864, 9216), + (8192, 9216, 36864), + (8192, 22016, 8192), + # LLAMA-70B/65B + (8192, 8192, 22016), + (8192, 8192, 8192), + (8192, 28672, 8192), + (8192, 8192, 28672), +] + +# Build test shapes with all the shared arguments +test_shapes = [(MatmulConfig, Matmul, (*shape, *shared_args)) + for shape in shapes] + +benchmark_sets = [] +benchmark_sets.extend(test_shapes) + +benchmark_results = {} +for config_class, operator, input_args in benchmark_sets: + config = config_class(*input_args) + matmul = operator(config, target=target, enable_tuning=True) + kernel_latency = matmul.profile_latency() + + print("Time cost is: {:.3f} ms".format(kernel_latency)) + + profile_config = { + f"{operator.__name__}-{'-'.join([str(i) for i in input_args])}": { + "BitBLAS_top20_latency": kernel_latency, + } + } + + benchmark_results.update(profile_config) + +# Define headers for the table +headers = [ + "PrimFunc", + "Input Arguments", + "BitBLAS Top20 Latency", +] + +# Calculate column widths for pretty printing +col_widths = [0, 0, 0] +for config_key, values in benchmark_results.items(): + args_split = config_key.split("-") + func_name = args_split[0] + input_args_str = "-".join(args_split[1:]) + col_widths[0] = max(col_widths[0], len(func_name) + 2, len(headers[0]) + 2) + col_widths[1] = max(col_widths[1], + len(input_args_str) + 2, + len(headers[1]) + 2) + col_widths[2] = max(col_widths[2], + len(f"{values['BitBLAS_top20_latency']:.3f} ms") + 2, + len(headers[2]) + 2) + # break only if you want to measure widths from a single example; + # otherwise, let it loop over all items. + +# Print header +for i, header in enumerate(headers): + headers[i] = header.ljust(col_widths[i]) +print("".join(headers)) +print("-" * sum(col_widths)) + +# Print rows +for config_key, values in benchmark_results.items(): + args_split = config_key.split("-") + func_name = args_split[0] + input_args_str = "-".join(args_split[1:]) + row = [ + func_name, + input_args_str, + f"{values['BitBLAS_top20_latency']:.3f} ms", + ] + row_str = "".join( + [str(cell).ljust(col_widths[idx]) for idx, cell in enumerate(row)]) + print(row_str) diff --git a/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py b/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py index bcdbf6c7551..c92ea43e826 100644 --- a/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py +++ b/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py @@ -90,7 +90,8 @@ def bench_run(results: list[benchmark.Measurement], model: str, score = torch.randn((m, num_experts), device="cuda", dtype=dtype) - topk_weights, topk_ids = fused_topk(a, score, topk, renormalize=False) + topk_weights, topk_ids, token_expert_indices = fused_topk( + a, score, topk, renormalize=False) def run_triton_moe(a: torch.Tensor, w1: torch.Tensor, w2: torch.Tensor, topk_weights: torch.Tensor, topk_ids: torch.Tensor, diff --git a/benchmarks/kernels/benchmark_lora.py b/benchmarks/kernels/benchmark_lora.py index b4b91eda284..d382ede10b4 100644 --- a/benchmarks/kernels/benchmark_lora.py +++ b/benchmarks/kernels/benchmark_lora.py @@ -17,8 +17,14 @@ from utils import ArgPool, Bench, CudaGraphBenchParams from weight_shapes import WEIGHT_SHAPES -from vllm.lora.ops.triton_ops import LoRAKernelMeta, lora_expand, lora_shrink -from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT +from vllm.triton_utils import HAS_TRITON + +if HAS_TRITON: + from vllm.lora.ops.triton_ops import (LoRAKernelMeta, lora_expand, + lora_shrink) + from vllm.lora.ops.triton_ops.utils import (_LORA_A_PTR_DICT, + _LORA_B_PTR_DICT) + from vllm.utils import FlexibleArgumentParser DEFAULT_MODELS = list(WEIGHT_SHAPES.keys()) diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py index afe0b53077a..9407747f784 100644 --- a/benchmarks/kernels/benchmark_moe.py +++ b/benchmarks/kernels/benchmark_moe.py @@ -115,8 +115,8 @@ def run(): from vllm.model_executor.layers.fused_moe import override_config with override_config(config): if use_deep_gemm: - topk_weights, topk_ids = fused_topk(x, input_gating, topk, - False) + topk_weights, topk_ids, token_expert_indices = fused_topk( + x, input_gating, topk, False) return fused_experts( x, w1, @@ -442,8 +442,14 @@ def tune( hidden_size, search_space, is_fp16, topk) - with torch.cuda.device(self.device_id) if current_platform.is_rocm( - ) else nullcontext(): + need_device_guard = False + if current_platform.is_rocm(): + visible_device = os.environ.get("ROCR_VISIBLE_DEVICES", None) + if visible_device != f"{self.device_id}": + need_device_guard = True + + with torch.cuda.device( + self.device_id) if need_device_guard else nullcontext(): for config in tqdm(search_space): try: kernel_time = benchmark_config( @@ -527,7 +533,7 @@ def get_weight_block_size_safety(config, default_value=None): def main(args: argparse.Namespace): print(args) - block_quant_shape = None + config = AutoConfig.from_pretrained( args.model, trust_remote_code=args.trust_remote_code) if config.architectures[0] == "DbrxForCausalLM": @@ -546,16 +552,16 @@ def main(args: argparse.Namespace): topk = config.num_experts_per_tok intermediate_size = config.moe_intermediate_size shard_intermediate_size = 2 * intermediate_size // args.tp_size - block_quant_shape = get_weight_block_size_safety(config) - elif config.architectures[0] == "Qwen2MoeForCausalLM": + elif config.architectures[0] in [ + "Qwen2MoeForCausalLM", "Qwen3MoeForCausalLM" + ]: E = config.num_experts topk = config.num_experts_per_tok intermediate_size = config.moe_intermediate_size shard_intermediate_size = 2 * intermediate_size // args.tp_size else: - if not hasattr(config, "hidden_size"): - # Support for llama4 - config = config.text_config + # Support for llama4 + config = config.get_text_config() # Default: Mixtral. E = config.num_local_experts topk = config.num_experts_per_tok @@ -566,6 +572,7 @@ def main(args: argparse.Namespace): dtype = torch.float16 if current_platform.is_rocm() else config.torch_dtype use_fp8_w8a8 = args.dtype == "fp8_w8a8" use_int8_w8a16 = args.dtype == "int8_w8a16" + block_quant_shape = get_weight_block_size_safety(config) if args.batch_size is None: batch_sizes = [ @@ -577,6 +584,15 @@ def main(args: argparse.Namespace): use_deep_gemm = bool(args.use_deep_gemm) + if current_platform.is_rocm() and "HIP_VISIBLE_DEVICES" in os.environ: + # Ray will set ROCR_VISIBLE_DEVICES for device visibility + logger.warning( + "Ray uses ROCR_VISIBLE_DEVICES to control device accessibility." + "Replacing HIP_VISIBLE_DEVICES with ROCR_VISIBLE_DEVICES.") + val = os.environ["HIP_VISIBLE_DEVICES"] + os.environ["ROCR_VISIBLE_DEVICES"] = val + del os.environ["HIP_VISIBLE_DEVICES"] + ray.init() num_gpus = int(ray.available_resources()["GPU"]) workers = [BenchmarkWorker.remote(args.seed) for _ in range(num_gpus)] diff --git a/benchmarks/kernels/benchmark_moe_permute_unpermute.py b/benchmarks/kernels/benchmark_moe_permute_unpermute.py new file mode 100644 index 00000000000..937df962465 --- /dev/null +++ b/benchmarks/kernels/benchmark_moe_permute_unpermute.py @@ -0,0 +1,349 @@ +# SPDX-License-Identifier: Apache-2.0 + +import argparse +from typing import Any, TypedDict + +import ray +import torch +from transformers import AutoConfig + +from vllm.model_executor.layers.fused_moe.deep_gemm_moe import ( + _moe_permute, _moe_unpermute_and_reduce) +from vllm.model_executor.layers.fused_moe.fused_moe import * +from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import * +from vllm.model_executor.layers.fused_moe.utils import _fp8_quantize +from vllm.platforms import current_platform +from vllm.utils import FlexibleArgumentParser + +FP8_DTYPE = current_platform.fp8_dtype() + + +class BenchmarkConfig(TypedDict): + BLOCK_SIZE_M: int + BLOCK_SIZE_N: int + BLOCK_SIZE_K: int + GROUP_SIZE_M: int + num_warps: int + num_stages: int + + +def benchmark_permute(num_tokens: int, + num_experts: int, + hidden_size: int, + topk: int, + dtype: torch.dtype, + use_fp8_w8a8: bool, + use_int8_w8a16: bool, + num_iters: int = 100, + use_customized_permute: bool = False) -> float: + # init_dtype = torch.float16 if use_fp8_w8a8 else dtype + hidden_states = torch.randn(num_tokens, hidden_size, dtype=dtype) + # output_hidden_states = torch.empty_like(hidden_states) + if use_fp8_w8a8: + align_block_size = 128 # deepgemm needs 128 m aligned block + qhidden_states, scale = _fp8_quantize(hidden_states, None, None) + else: + align_block_size = None + qhidden_states = hidden_states + + gating_output = torch.randn(num_iters, + num_tokens, + num_experts, + dtype=torch.float32) + + input_gating = torch.randn(num_tokens, num_experts, dtype=torch.float32) + topk_weights, topk_ids, token_expert_indices = fused_topk( + qhidden_states, input_gating, topk, False) + + def prepare(i: int): + input_gating.copy_(gating_output[i]) + + def run(): + if use_customized_permute: + (permuted_hidden_states, first_token_off, inv_perm_idx, + m_indices) = moe_permute( + qhidden_states, + topk_weights=topk_weights, + topk_ids=topk_ids, + token_expert_indices=token_expert_indices, + topk=topk, + n_expert=num_experts, + n_local_expert=num_experts, + expert_map=None, + align_block_size=align_block_size, + ) + else: + (permuted_hidden_states, a1q_scale, sorted_token_ids, expert_ids, + inv_perm) = _moe_permute(qhidden_states, None, topk_ids, + num_experts, None, align_block_size) + + # JIT compilation & warmup + run() + torch.cuda.synchronize() + + # Capture 10 invocations with CUDA graph + graph = torch.cuda.CUDAGraph() + with torch.cuda.graph(graph): + for _ in range(10): + run() + torch.cuda.synchronize() + + # Warmup + for _ in range(5): + graph.replay() + torch.cuda.synchronize() + + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + + latencies: list[float] = [] + for i in range(num_iters): + prepare(i) + torch.cuda.synchronize() + + start_event.record() + graph.replay() + end_event.record() + end_event.synchronize() + latencies.append(start_event.elapsed_time(end_event)) + avg = sum(latencies) / (num_iters * 10) * 1000 # us + graph.reset() + return avg + + +def benchmark_unpermute(num_tokens: int, + num_experts: int, + hidden_size: int, + topk: int, + dtype: torch.dtype, + use_fp8_w8a8: bool, + use_int8_w8a16: bool, + num_iters: int = 100, + use_customized_permute: bool = False) -> float: + # init_dtype = torch.float16 if use_fp8_w8a8 else dtype + hidden_states = torch.randn(num_tokens, hidden_size, dtype=dtype) + output_hidden_states = torch.empty_like(hidden_states) + if use_fp8_w8a8: + align_block_size = 128 # deepgemm needs 128 m aligned block + qhidden_states, scale = _fp8_quantize(hidden_states, None, None) + else: + align_block_size = None + qhidden_states = hidden_states + + input_gating = torch.randn(num_tokens, num_experts, dtype=torch.float32) + + topk_weights, topk_ids, token_expert_indices = fused_topk( + qhidden_states, input_gating, topk, False) + + def prepare(): + if use_customized_permute: + (permuted_hidden_states, first_token_off, inv_perm_idx, + m_indices) = moe_permute( + qhidden_states, + topk_weights=topk_weights, + topk_ids=topk_ids, + token_expert_indices=token_expert_indices, + topk=topk, + n_expert=num_experts, + n_local_expert=num_experts, + expert_map=None, + align_block_size=align_block_size, + ) + # convert to fp16/bf16 as gemm output + return (permuted_hidden_states.to(dtype), first_token_off, + inv_perm_idx, m_indices) + else: + (permuted_qhidden_states, a1q_scale, sorted_token_ids, expert_ids, + inv_perm) = _moe_permute(qhidden_states, None, topk_ids, + num_experts, None, align_block_size) + # convert to fp16/bf16 as gemm output + return (permuted_qhidden_states.to(dtype), a1q_scale, + sorted_token_ids, expert_ids, inv_perm) + + def run(input: tuple): + if use_customized_permute: + (permuted_hidden_states, first_token_off, inv_perm_idx, + m_indices) = input + moe_unpermute(permuted_hidden_states, topk_weights, topk_ids, + inv_perm_idx, first_token_off, topk, num_experts, + num_experts) + else: + (permuted_hidden_states, a1q_scale, sorted_token_ids, expert_ids, + inv_perm) = input + _moe_unpermute_and_reduce(output_hidden_states, + permuted_hidden_states, inv_perm, + topk_weights) + + # JIT compilation & warmup + input = prepare() + run(input) + torch.cuda.synchronize() + + # Capture 10 invocations with CUDA graph + graph = torch.cuda.CUDAGraph() + with torch.cuda.graph(graph): + for _ in range(10): + run(input) + torch.cuda.synchronize() + + # Warmup + for _ in range(5): + graph.replay() + torch.cuda.synchronize() + + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + + latencies: list[float] = [] + for i in range(num_iters): + torch.cuda.synchronize() + start_event.record() + graph.replay() + end_event.record() + end_event.synchronize() + latencies.append(start_event.elapsed_time(end_event)) + avg = sum(latencies) / (num_iters * 10) * 1000 # us + graph.reset() + return avg + + +@ray.remote(num_gpus=1) +class BenchmarkWorker: + + def __init__(self, seed: int) -> None: + torch.set_default_device("cuda") + current_platform.seed_everything(seed) + self.seed = seed + # Get the device ID to allocate tensors and kernels + # on the respective GPU. This is required for Ray to work + # correctly with multi-GPU tuning on the ROCm platform. + self.device_id = int(ray.get_gpu_ids()[0]) + + def benchmark( + self, + num_tokens: int, + num_experts: int, + hidden_size: int, + topk: int, + dtype: torch.dtype, + use_fp8_w8a8: bool, + use_int8_w8a16: bool, + use_customized_permute: bool = False, + ) -> tuple[dict[str, int], float]: + current_platform.seed_everything(self.seed) + + permute_time = benchmark_permute( + num_tokens, + num_experts, + hidden_size, + topk, + dtype, + use_fp8_w8a8, + use_int8_w8a16, + num_iters=100, + use_customized_permute=use_customized_permute) + unpermute_time = benchmark_unpermute( + num_tokens, + num_experts, + hidden_size, + topk, + dtype, + use_fp8_w8a8, + use_int8_w8a16, + num_iters=100, + use_customized_permute=use_customized_permute) + return permute_time, unpermute_time + + +def get_weight_block_size_safety(config, default_value=None): + + quantization_config = getattr(config, 'quantization_config', {}) + if isinstance(quantization_config, dict): + return quantization_config.get('weight_block_size', default_value) + return default_value + + +def main(args: argparse.Namespace): + print(args) + + config = AutoConfig.from_pretrained( + args.model, trust_remote_code=args.trust_remote_code) + if config.architectures[0] == "DbrxForCausalLM": + E = config.ffn_config.moe_num_experts + topk = config.ffn_config.moe_top_k + elif config.architectures[0] == "JambaForCausalLM": + E = config.num_experts + topk = config.num_experts_per_tok + elif (config.architectures[0] == "DeepseekV3ForCausalLM" + or config.architectures[0] == "DeepseekV2ForCausalLM"): + E = config.n_routed_experts + topk = config.num_experts_per_tok + elif config.architectures[0] in [ + "Qwen2MoeForCausalLM", "Qwen3MoeForCausalLM" + ]: + E = config.num_experts + topk = config.num_experts_per_tok + + else: + # Support for llama4 + config = config.get_text_config() + # Default: Mixtral. + E = config.num_local_experts + topk = config.num_experts_per_tok + + hidden_size = config.hidden_size + dtype = torch.float16 if current_platform.is_rocm() else config.torch_dtype + use_fp8_w8a8 = args.dtype == "fp8_w8a8" + use_int8_w8a16 = args.dtype == "int8_w8a16" + use_customized_permute = args.use_customized_permute + + if args.batch_size is None: + batch_sizes = [ + 1, 2, 4, 8, 16, 24, 32, 48, 64, 96, 128, 256, 512, 1024, 1536, + 2048, 3072, 4096 + ] + else: + batch_sizes = [args.batch_size] + + ray.init() + num_gpus = int(ray.available_resources()["GPU"]) + workers = [BenchmarkWorker.remote(args.seed) for _ in range(num_gpus)] + + def _distribute(method: str, inputs: list[Any]) -> list[Any]: + outputs = [] + worker_idx = 0 + for input_args in inputs: + worker = workers[worker_idx] + worker_method = getattr(worker, method) + output = worker_method.remote(*input_args) + outputs.append(output) + worker_idx = (worker_idx + 1) % num_gpus + return ray.get(outputs) + + outputs = _distribute( + "benchmark", [(batch_size, E, hidden_size, topk, dtype, use_fp8_w8a8, + use_int8_w8a16, use_customized_permute) + for batch_size in batch_sizes]) + + for batch_size, (permute, unpermute) in zip(batch_sizes, outputs): + print(f"Batch size: {batch_size}") + print(f"Permute time: {permute:.2f} us") + print(f"Unpermute time: {unpermute:.2f} us") + + +if __name__ == "__main__": + parser = FlexibleArgumentParser() + parser.add_argument("--model", + type=str, + default="mistralai/Mixtral-8x7B-Instruct-v0.1") + parser.add_argument("--dtype", + type=str, + choices=["auto", "fp8_w8a8", "int8_w8a16"], + default="auto") + parser.add_argument("--use-customized-permute", action="store_true") + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--batch-size", type=int, required=False) + parser.add_argument("--trust-remote-code", action="store_true") + args = parser.parse_args() + + main(args) diff --git a/cmake/external_projects/vllm_flash_attn.cmake b/cmake/external_projects/vllm_flash_attn.cmake index afd7c47e8ac..b04e4c2d06e 100644 --- a/cmake/external_projects/vllm_flash_attn.cmake +++ b/cmake/external_projects/vllm_flash_attn.cmake @@ -38,7 +38,7 @@ else() FetchContent_Declare( vllm-flash-attn GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git - GIT_TAG dc9d410b3e2d6534a4c70724c2515f4def670a22 + GIT_TAG 8798f27777fb57f447070301bf33a9f9c607f491 GIT_PROGRESS TRUE # Don't share the vllm-flash-attn build between build types BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn diff --git a/csrc/attention/merge_attn_states.cu b/csrc/attention/merge_attn_states.cu index 7af0caceda2..14e5edd7e28 100644 --- a/csrc/attention/merge_attn_states.cu +++ b/csrc/attention/merge_attn_states.cu @@ -107,13 +107,14 @@ __global__ void merge_attn_states_kernel( #define LAUNCH_MERGE_ATTN_STATES(scalar_t, NUM_THREADS) \ { \ - vllm::merge_attn_states_kernel<<>>( \ - reinterpret_cast(output.data_ptr()), output_lse_ptr, \ - reinterpret_cast(prefix_output.data_ptr()), \ - reinterpret_cast(prefix_lse.data_ptr()), \ - reinterpret_cast(suffix_output.data_ptr()), \ - reinterpret_cast(suffix_lse.data_ptr()), num_tokens, \ - num_heads, head_size); \ + vllm::merge_attn_states_kernel \ + <<>>( \ + reinterpret_cast(output.data_ptr()), output_lse_ptr, \ + reinterpret_cast(prefix_output.data_ptr()), \ + reinterpret_cast(prefix_lse.data_ptr()), \ + reinterpret_cast(suffix_output.data_ptr()), \ + reinterpret_cast(suffix_lse.data_ptr()), num_tokens, \ + num_heads, head_size); \ } /*@brief Merges the attention states from prefix and suffix @@ -122,10 +123,10 @@ __global__ void merge_attn_states_kernel( * @param output [n,h,d] The output tensor to store the merged attention states. * @param output_lse [h,d] Optional tensor to store the log-sum-exp values. * @param prefix_output [n,h,d] The prefix attention states. - * @param prefix_lse [h,d] The log-sum-exp values for the prefix attention + * @param prefix_lse [h,n] The log-sum-exp values for the prefix attention * states. * @param suffix_output [n,h,d] The suffix attention states. - * @param suffix_lse [h,d] The log-sum-exp values for the suffix attention + * @param suffix_lse [h,n] The log-sum-exp values for the suffix attention * states. */ template @@ -146,13 +147,17 @@ void merge_attn_states_launcher(torch::Tensor& output, if (output_lse.has_value()) { output_lse_ptr = output_lse.value().data_ptr(); } - // process one pack elements per thread. float -> 4, half/bf16 -> 8 + // Process one pack elements per thread. for float, the + // pack_size is 4 for half/bf16, the pack_size is 8. const uint threads_per_head = head_size / pack_size; const uint total_threads = num_tokens * num_heads * threads_per_head; dim3 block(NUM_THREADS); dim3 grid((total_threads + NUM_THREADS - 1) / NUM_THREADS); + const c10::cuda::OptionalCUDAGuard device_guard(prefix_output.device()); + auto stream = at::cuda::getCurrentCUDAStream(); + LAUNCH_MERGE_ATTN_STATES(scalar_t, NUM_THREADS); } diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu new file mode 100644 index 00000000000..0319d1daf30 --- /dev/null +++ b/csrc/attention/mla/cutlass_mla_entry.cu @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#if defined ENABLE_CUTLASS_MLA && ENABLE_CUTLASS_MLA +void cutlass_mla_decode_sm100a(torch::Tensor const& out, + torch::Tensor const& q_nope, + torch::Tensor const& q_pe, + torch::Tensor const& kv_c_and_k_pe_cache, + torch::Tensor const& seq_lens, + torch::Tensor const& page_table, double scale); +#endif + +void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope, + torch::Tensor const& q_pe, + torch::Tensor const& kv_c_and_k_pe_cache, + torch::Tensor const& seq_lens, + torch::Tensor const& page_table, double scale) { +#if defined ENABLE_CUTLASS_MLA && ENABLE_CUTLASS_MLA + return cutlass_mla_decode_sm100a(out, q_nope, q_pe, kv_c_and_k_pe_cache, + seq_lens, page_table, scale); +#endif + TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); +} diff --git a/csrc/attention/mla/cutlass_mla_kernels.cu b/csrc/attention/mla/cutlass_mla_kernels.cu new file mode 100644 index 00000000000..6743af0cf2d --- /dev/null +++ b/csrc/attention/mla/cutlass_mla_kernels.cu @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include "cute/tensor.hpp" + +#include "cutlass/cutlass.h" +#include "cutlass/kernel_hardware_info.h" + +#include "cutlass_extensions/common.hpp" + +#include "device/sm100_mla.hpp" +#include "kernel/sm100_mla_tile_scheduler.hpp" + +using namespace cute; +using namespace cutlass::fmha::kernel; + +template +struct MlaSm100 { + using Element = T; + using ElementAcc = float; + using ElementOut = T; + + using TileShape = Shape<_128, _128, Shape<_512, _64>>; + using TileShapeH = cute::tuple_element_t<0, TileShape>; + using TileShapeD = cute::tuple_element_t<2, TileShape>; + + // H K (D_latent D_rope) B + using ProblemShape = cute::tuple; + + using StrideQ = cute::tuple; // H D B + using StrideK = cute::tuple; // K D B + using StrideO = StrideK; // H D B + using StrideLSE = cute::tuple<_1, int>; // H B + + using TileScheduler = + std::conditional_t; + + using FmhaKernel = + cutlass::fmha::kernel::Sm100FmhaMlaKernelTmaWarpspecialized< + TileShape, Element, ElementAcc, ElementOut, ElementAcc, TileScheduler, + /*kIsCpAsync=*/true>; + using Fmha = cutlass::fmha::device::MLA; +}; + +template +typename T::Fmha::Arguments args_from_options( + at::Tensor const& out, at::Tensor const& q_nope, at::Tensor const& q_pe, + at::Tensor const& kv_c_and_k_pe_cache, at::Tensor const& seq_lens, + at::Tensor const& page_table, double scale) { + cutlass::KernelHardwareInfo hw_info; + hw_info.device_id = q_nope.device().index(); + hw_info.sm_count = + cutlass::KernelHardwareInfo::query_device_multiprocessor_count( + hw_info.device_id); + + int batches = q_nope.sizes()[0]; + int page_count_per_seq = page_table.sizes()[1]; + int page_count_total = kv_c_and_k_pe_cache.sizes()[0]; + int page_size = kv_c_and_k_pe_cache.sizes()[1]; + int max_seq_len = page_size * page_count_per_seq; + using TileShapeH = typename T::TileShapeH; + using TileShapeD = typename T::TileShapeD; + auto problem_shape = + cute::make_tuple(TileShapeH{}, max_seq_len, TileShapeD{}, batches); + + auto [H, K, D, B] = problem_shape; + auto [D_latent, D_rope] = D; + + using StrideQ = typename T::StrideQ; + using StrideK = typename T::StrideK; + using StrideO = typename T::StrideO; + using StrideLSE = typename T::StrideLSE; + + StrideQ stride_Q_latent = cute::make_tuple( + static_cast(D_latent), _1{}, static_cast(H * D_latent)); + StrideQ stride_Q_rope = cute::make_tuple(static_cast(D_rope), _1{}, + static_cast(H * D_rope)); + StrideK stride_C = + cute::make_tuple(static_cast(D_latent + D_rope), _1{}, + static_cast(page_size * (D_latent + D_rope))); + StrideLSE stride_PT = cute::make_stride(_1{}, page_count_per_seq); + StrideLSE stride_LSE = cute::make_tuple(_1{}, static_cast(H)); + StrideO stride_O = cute::make_tuple(static_cast(D_latent), _1{}, + static_cast(H * D_latent)); + + using Element = typename T::Element; + using ElementOut = typename T::ElementOut; + using ElementAcc = typename T::ElementAcc; + auto Q_latent_ptr = static_cast(q_nope.data_ptr()); + auto Q_rope_ptr = static_cast(q_pe.data_ptr()); + auto C_ptr = static_cast(kv_c_and_k_pe_cache.data_ptr()); + auto scale_f = static_cast(scale); + typename T::Fmha::Arguments arguments{ + problem_shape, + {scale_f, Q_latent_ptr, stride_Q_latent, Q_rope_ptr, stride_Q_rope, C_ptr, + stride_C, C_ptr + D_latent, stride_C, + static_cast(seq_lens.data_ptr()), + static_cast(page_table.data_ptr()), stride_PT, page_count_total, + page_size}, + {static_cast(out.data_ptr()), stride_O, + static_cast(nullptr), stride_LSE}, + hw_info, + -1, // split_kv + nullptr, // is_var_split_kv + }; + // TODO(kaixih@nvidia): When split_kv=-1 and is_var_split_kv=false, we compute + // split_kv automatically based on batch size and sequence length to balance + // workload across available SMs. Consider using var_split_kv for manual + // control if needed. + T::Fmha::set_split_kv(arguments); + return arguments; +} + +template +void runMla(at::Tensor const& out, at::Tensor const& q_nope, + at::Tensor const& q_pe, at::Tensor const& kv_c_and_k_pe_cache, + at::Tensor const& seq_lens, at::Tensor const& page_table, + float scale, cudaStream_t stream) { + using MlaSm100Type = MlaSm100; + typename MlaSm100Type::Fmha fmha; + auto arguments = args_from_options( + out, q_nope, q_pe, kv_c_and_k_pe_cache, seq_lens, page_table, scale); + size_t workspace_size = MlaSm100Type::Fmha::get_workspace_size(arguments); + auto const workspace_options = + torch::TensorOptions().dtype(torch::kUInt8).device(q_nope.device()); + auto workspace = torch::empty(workspace_size, workspace_options); + + CUTLASS_CHECK(fmha.can_implement(arguments)); + + CUTLASS_CHECK(fmha.initialize(arguments, workspace.data_ptr(), stream)); + + CUTLASS_CHECK(fmha.run(arguments, workspace.data_ptr(), stream)); +} + +void cutlass_mla_decode_sm100a(torch::Tensor const& out, + torch::Tensor const& q_nope, + torch::Tensor const& q_pe, + torch::Tensor const& kv_c_and_k_pe_cache, + torch::Tensor const& seq_lens, + torch::Tensor const& page_table, double scale) { + TORCH_CHECK(q_nope.device().is_cuda(), "q_nope must be on CUDA"); + TORCH_CHECK(q_nope.dim() == 3, "q_nope must be a 3D tensor"); + TORCH_CHECK(q_pe.dim() == 3, "q_pe must be a 3D tensor"); + TORCH_CHECK(kv_c_and_k_pe_cache.dim() == 3, + "kv_c_and_k_pe_cache must be a 3D tensor"); + TORCH_CHECK(seq_lens.dim() == 1, "seq_lens must be a 1D tensor"); + TORCH_CHECK(page_table.dim() == 2, "page_table must be a 2D tensor"); + TORCH_CHECK(out.dim() == 3, "out must be a 3D tensor"); + + auto B_q_nope = q_nope.size(0); + auto H_q_nope = q_nope.size(1); + auto D_q_nope = q_nope.size(2); + auto B_q_pe = q_pe.size(0); + auto H_q_pe = q_pe.size(1); + auto D_q_pe = q_pe.size(2); + auto B_pt = page_table.size(0); + auto PAGE_NUM = page_table.size(1); + auto PAGE_SIZE = kv_c_and_k_pe_cache.size(1); + auto D_ckv = kv_c_and_k_pe_cache.size(2); + auto B_o = out.size(0); + auto H_o = out.size(1); + auto D_o = out.size(2); + + TORCH_CHECK(D_q_nope == 512, "D_q_nope must be equal to 512"); + TORCH_CHECK(D_q_pe == 64, "D_q_pe must be equal to 64"); + TORCH_CHECK(D_ckv == 576, "D_ckv must be equal to 576"); + TORCH_CHECK(H_q_nope == H_q_pe && H_q_nope == H_o && H_o == 128, + "H_q_nope, H_q_pe, and H_o must be equal to 128"); + TORCH_CHECK(PAGE_SIZE > 0 && (PAGE_SIZE & (PAGE_SIZE - 1)) == 0, + "PAGE_SIZE must be a power of 2"); + TORCH_CHECK( + B_q_nope == B_q_pe && B_q_nope == B_pt && B_q_nope == B_o, + "Batch dims must be same for page_table, q_nope and q_pe, and out"); + TORCH_CHECK(PAGE_NUM % (128 / PAGE_SIZE) == 0, + "PAGE_NUM must be divisible by 128 / PAGE_SIZE"); + TORCH_CHECK(D_o == 512, "D_o must be equal to 512"); + + TORCH_CHECK(q_nope.dtype() == at::ScalarType::Half || + q_nope.dtype() == at::ScalarType::BFloat16 || + q_nope.dtype() == at::ScalarType::Float8_e4m3fn, + "q_nope must be a half, bfloat16, or float8_e4m3fn tensor"); + TORCH_CHECK(kv_c_and_k_pe_cache.dtype() == q_nope.dtype() && + q_nope.dtype() == q_pe.dtype(), + "kv_c_and_k_pe_cache, q_nope, and q_pe must be the same type"); + TORCH_CHECK(seq_lens.dtype() == torch::kInt32, + "seq_lens must be a 32-bit integer tensor"); + TORCH_CHECK(page_table.dtype() == torch::kInt32, + "page_table must be a 32-bit integer tensor"); + + auto in_dtype = q_nope.dtype(); + at::cuda::CUDAGuard device_guard{(char)q_nope.get_device()}; + const cudaStream_t stream = + at::cuda::getCurrentCUDAStream(q_nope.get_device()); + if (in_dtype == at::ScalarType::Half) { + runMla(out, q_nope, q_pe, kv_c_and_k_pe_cache, seq_lens, + page_table, scale, stream); + } else if (in_dtype == at::ScalarType::BFloat16) { + runMla(out, q_nope, q_pe, kv_c_and_k_pe_cache, + seq_lens, page_table, scale, stream); + } else if (in_dtype == at::ScalarType::Float8_e4m3fn) { + runMla(out, q_nope, q_pe, kv_c_and_k_pe_cache, + seq_lens, page_table, scale, stream); + } else { + TORCH_CHECK(false, "Unsupported input data type of MLA"); + } +} diff --git a/csrc/cache_kernels.cu b/csrc/cache_kernels.cu index 0b3f6fc8c19..88559c8fe71 100644 --- a/csrc/cache_kernels.cu +++ b/csrc/cache_kernels.cu @@ -270,9 +270,10 @@ __global__ void reshape_and_cache_flash_kernel( cache_t* __restrict__ value_cache, // [num_blocks, block_size, num_heads, // head_size] const int64_t* __restrict__ slot_mapping, // [num_tokens] - const int block_stride, const int key_stride, const int value_stride, - const int num_heads, const int head_size, const int block_size, - const float* k_scale, const float* v_scale) { + const int64_t block_stride, const int64_t page_stride, + const int64_t head_stride, const int64_t key_stride, + const int64_t value_stride, const int num_heads, const int head_size, + const int block_size, const float* k_scale, const float* v_scale) { const int64_t token_idx = blockIdx.x; const int64_t slot_idx = slot_mapping[token_idx]; // NOTE: slot_idx can be -1 if the token is padded @@ -288,8 +289,8 @@ __global__ void reshape_and_cache_flash_kernel( const int head_idx = i / head_size; const int head_offset = i % head_size; const int64_t tgt_key_value_idx = block_idx * block_stride + - block_offset * num_heads * head_size + - head_idx * head_size + head_offset; + block_offset * page_stride + + head_idx * head_stride + head_offset; scalar_t tgt_key = key[src_key_idx]; scalar_t tgt_value = value[src_value_idx]; if constexpr (kv_dt == Fp8KVCacheDataType::kAuto) { @@ -396,16 +397,16 @@ void reshape_and_cache( // KV_T is the data type of key and value tensors. // CACHE_T is the stored data type of kv-cache. // KV_DTYPE is the real data type of kv-cache. -#define CALL_RESHAPE_AND_CACHE_FLASH(KV_T, CACHE_T, KV_DTYPE) \ - vllm::reshape_and_cache_flash_kernel \ - <<>>( \ - reinterpret_cast(key.data_ptr()), \ - reinterpret_cast(value.data_ptr()), \ - reinterpret_cast(key_cache.data_ptr()), \ - reinterpret_cast(value_cache.data_ptr()), \ - slot_mapping.data_ptr(), block_stride, key_stride, \ - value_stride, num_heads, head_size, block_size, \ - reinterpret_cast(k_scale.data_ptr()), \ +#define CALL_RESHAPE_AND_CACHE_FLASH(KV_T, CACHE_T, KV_DTYPE) \ + vllm::reshape_and_cache_flash_kernel \ + <<>>( \ + reinterpret_cast(key.data_ptr()), \ + reinterpret_cast(value.data_ptr()), \ + reinterpret_cast(key_cache.data_ptr()), \ + reinterpret_cast(value_cache.data_ptr()), \ + slot_mapping.data_ptr(), block_stride, page_stride, \ + head_stride, key_stride, value_stride, num_heads, head_size, \ + block_size, reinterpret_cast(k_scale.data_ptr()), \ reinterpret_cast(v_scale.data_ptr())); void reshape_and_cache_flash( @@ -432,9 +433,11 @@ void reshape_and_cache_flash( int head_size = key.size(2); int block_size = key_cache.size(1); - int key_stride = key.stride(0); - int value_stride = value.stride(0); - int block_stride = key_cache.stride(0); + int64_t key_stride = key.stride(0); + int64_t value_stride = value.stride(0); + int64_t block_stride = key_cache.stride(0); + int64_t page_stride = key_cache.stride(1); + int64_t head_stride = key_cache.stride(2); TORCH_CHECK(key_cache.stride(0) == value_cache.stride(0)); dim3 grid(num_tokens); diff --git a/csrc/core/math.hpp b/csrc/core/math.hpp index b8171133f6a..6764e1fd605 100644 --- a/csrc/core/math.hpp +++ b/csrc/core/math.hpp @@ -7,3 +7,22 @@ inline constexpr uint32_t next_pow_2(uint32_t const num) { if (num <= 1) return num; return 1 << (CHAR_BIT * sizeof(num) - __builtin_clz(num - 1)); } + +template +static inline constexpr auto div_ceil(A a, B b) { + return (a + b - 1) / b; +} + +// Round a down to the next multiple of b. The caller is responsible for making +// sure that b is non-zero +template +inline constexpr T round_to_previous_multiple_of(T a, T b) { + return a % b == 0 ? a : (a / b) * b; +} + +// Round a up to the next multiple of b. The caller is responsible for making +// sure that b is non-zero +template +inline constexpr T round_to_next_multiple_of(T a, T b) { + return a % b == 0 ? a : ((a / b) + 1) * b; +} diff --git a/csrc/moe/marlin_kernels/marlin_moe_kernel.h b/csrc/moe/marlin_kernels/marlin_moe_kernel.h index 47ecf109d0f..a217401b3d7 100644 --- a/csrc/moe/marlin_kernels/marlin_moe_kernel.h +++ b/csrc/moe/marlin_kernels/marlin_moe_kernel.h @@ -138,8 +138,8 @@ __device__ inline FragB dequant(int q) { const int HI = 0x00f000f0; const int EX = 0x64006400; // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); // We want signed int4 outputs, hence we fuse the `-8` symmetric zero point // directly into `SUB` and `ADD`. const int SUB = 0x64086408; @@ -182,8 +182,8 @@ __device__ inline FragB dequant(int q) { const int HI = 0x00f000f0; const int EX = 0x64006400; // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); const int SUB = 0x64006400; const int MUL = 0x2c002c00; diff --git a/csrc/moe/marlin_moe_wna16/marlin_template.h b/csrc/moe/marlin_moe_wna16/marlin_template.h index 205b308fe51..3705216cada 100644 --- a/csrc/moe/marlin_moe_wna16/marlin_template.h +++ b/csrc/moe/marlin_moe_wna16/marlin_template.h @@ -209,8 +209,8 @@ __device__ inline typename ScalarType::FragB dequant( const int HI = 0x00f000f0; const int EX = 0x64006400; // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); // We want signed int4 outputs, hence we fuse the `-8` symmetric zero point // directly into `SUB` and `ADD`. const int SUB = 0x64086408; @@ -233,9 +233,9 @@ dequant(int q, // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); q >>= 4; - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); static constexpr uint32_t MUL = 0x3F803F80; static constexpr uint32_t ADD = 0xC308C308; diff --git a/csrc/moe/moe_permute_unpermute_op.cu b/csrc/moe/moe_permute_unpermute_op.cu new file mode 100644 index 00000000000..76d5f0eab02 --- /dev/null +++ b/csrc/moe/moe_permute_unpermute_op.cu @@ -0,0 +1,133 @@ +#include +#include +#include +#include "permute_unpermute_kernels/moe_permute_unpermute_kernel.h" +#include "permute_unpermute_kernels/dispatch.h" +#include "core/registration.h" + +void moe_permute( + const torch::Tensor& input, // [n_token, hidden] + const torch::Tensor& topk_weights, //[n_token, topk] + torch::Tensor& topk_ids, // [n_token, topk] + const torch::Tensor& token_expert_indicies, // [n_token, topk] + const std::optional& expert_map, // [n_expert] + int64_t n_expert, int64_t n_local_expert, int64_t topk, + const std::optional& align_block_size, + torch::Tensor& + permuted_input, // [topk * n_token/align_block_size_m, hidden] + torch::Tensor& expert_first_token_offset, // [n_local_expert + 1] + torch::Tensor& src_row_id2dst_row_id_map, // [n_token, topk] + torch::Tensor& m_indices) { // [align_expand_m] + TORCH_CHECK(topk_weights.scalar_type() == at::ScalarType::Float, + "topk_weights must be float32"); + TORCH_CHECK(expert_first_token_offset.scalar_type() == at::ScalarType::Long, + "expert_first_token_offset must be int64"); + TORCH_CHECK(topk_ids.scalar_type() == at::ScalarType::Int, + "topk_ids must be int32"); + TORCH_CHECK(token_expert_indicies.scalar_type() == at::ScalarType::Int, + "token_expert_indicies must be int32"); + TORCH_CHECK(src_row_id2dst_row_id_map.scalar_type() == at::ScalarType::Int, + "src_row_id2dst_row_id_map must be int32"); + TORCH_CHECK(expert_first_token_offset.size(0) == n_local_expert + 1, + "expert_first_token_offset shape != n_local_expert+1") + TORCH_CHECK( + src_row_id2dst_row_id_map.sizes() == token_expert_indicies.sizes(), + "token_expert_indicies shape must be same as src_row_id2dst_row_id_map"); + auto n_token = input.sizes()[0]; + auto n_hidden = input.sizes()[1]; + auto align_block_size_value = + align_block_size.has_value() ? align_block_size.value() : -1; + auto stream = at::cuda::getCurrentCUDAStream().stream(); + const long sorter_size = + CubKeyValueSorter::getWorkspaceSize(n_token * topk, n_expert); + auto sort_workspace = torch::empty( + {sorter_size}, + torch::dtype(torch::kInt8).device(torch::kCUDA).requires_grad(false)); + auto permuted_experts_id = torch::empty_like(topk_ids); + auto dst_row_id2src_row_id_map = torch::empty_like(src_row_id2dst_row_id_map); + auto align_expert_first_token_offset = + torch::zeros_like(expert_first_token_offset); + + CubKeyValueSorter sorter{}; + int64_t* valid_num_ptr = nullptr; + // pre-process kernel for expert-parallelism: + // no local expert id plus "n_expert" offset for priority to local expert + // map local expert id [n, .., n+n_local_expert-1] to [0, n_local_expert -1] + // For example, 4 expert with ep_size=2. ep_rank=1 owns global expert id + // [2,3] with expert_map[-1, -1, 0, 1], preprocess_topk_id process topk_ids + // and map global expert id [2, 3] to local_expert id [0, 1] and map global + // expert id [0, 1] ( not in ep rank=1) to [4, 5] by plus n_expert. This map + // operation is to make local expert high priority in following sort topk_ids + // and scan local expert_first_token_offset for each ep rank for next group + // gemm. + if (expert_map.has_value()) { + const int* expert_map_ptr = get_ptr(expert_map.value()); + valid_num_ptr = + get_ptr(expert_first_token_offset) + n_local_expert; + preprocessTopkIdLauncher(get_ptr(topk_ids), n_token * topk, + expert_map_ptr, n_expert, stream); + } + // expert sort topk expert id and scan expert id get expert_first_token_offset + sortAndScanExpert(get_ptr(topk_ids), get_ptr(token_expert_indicies), + get_ptr(permuted_experts_id), + get_ptr(dst_row_id2src_row_id_map), + get_ptr(expert_first_token_offset), n_token, + n_expert, n_local_expert, topk, sorter, + get_ptr(sort_workspace), stream); + + // dispatch expandInputRowsKernelLauncher + MOE_DISPATCH(input.scalar_type(), [&] { + expandInputRowsKernelLauncher( + get_ptr(input), get_ptr(permuted_input), + get_ptr(topk_weights), get_ptr(permuted_experts_id), + get_ptr(dst_row_id2src_row_id_map), + get_ptr(src_row_id2dst_row_id_map), + get_ptr(expert_first_token_offset), n_token, valid_num_ptr, + n_hidden, topk, n_local_expert, align_block_size_value, stream); + }); + + // get m_indices and update expert_first_token_offset with align block + getMIndices(get_ptr(expert_first_token_offset), + get_ptr(align_expert_first_token_offset), + get_ptr(m_indices), n_local_expert, align_block_size_value, + stream); + if (align_block_size.has_value()) { + // update align_expert_first_token_offset + expert_first_token_offset.copy_(align_expert_first_token_offset); + } +} + +void moe_unpermute( + const torch::Tensor& permuted_hidden_states, // [n_token * topk, hidden] + const torch::Tensor& topk_weights, //[n_token, topk] + const torch::Tensor& topk_ids, // [n_token, topk] + const torch::Tensor& src_row_id2dst_row_id_map, // [n_token, topk] + const torch::Tensor& expert_first_token_offset, // [n_local_expert+1] + int64_t n_expert, int64_t n_local_expert, int64_t topk, + torch::Tensor& hidden_states // [n_token, hidden] +) { + TORCH_CHECK(src_row_id2dst_row_id_map.sizes() == topk_ids.sizes(), + "topk_ids shape must be same as src_row_id2dst_row_id_map"); + TORCH_CHECK(topk_ids.scalar_type() == at::ScalarType::Int, + "topk_ids must be int32"); + TORCH_CHECK( + permuted_hidden_states.scalar_type() == hidden_states.scalar_type(), + "topk_ids dtype must be same as src_row_id2dst_row_id_map"); + auto n_token = hidden_states.size(0); + auto n_hidden = hidden_states.size(1); + auto stream = at::cuda::getCurrentCUDAStream().stream(); + const int64_t* valid_ptr = + get_ptr(expert_first_token_offset) + n_local_expert; + MOE_DISPATCH(hidden_states.scalar_type(), [&] { + finalizeMoeRoutingKernelLauncher( + get_ptr(permuted_hidden_states), + get_ptr(hidden_states), get_ptr(topk_weights), + get_ptr(src_row_id2dst_row_id_map), get_ptr(topk_ids), + n_token, n_hidden, topk, valid_ptr, stream); + }); +} + +TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) { + m.impl("moe_permute", &moe_permute); + m.impl("moe_unpermute", &moe_unpermute); +} \ No newline at end of file diff --git a/csrc/moe/moe_wna16.cu b/csrc/moe/moe_wna16.cu index 51ae76c1ec8..7b6a111c00a 100644 --- a/csrc/moe/moe_wna16.cu +++ b/csrc/moe/moe_wna16.cu @@ -13,7 +13,6 @@ template __global__ void moe_wna16_gemm_kernel( const scalar_t* __restrict__ input, scalar_t* __restrict__ output, - const uint32_t* __restrict__ qweight, const scalar_t* __restrict__ scales, const uint32_t* __restrict__ qzeros, @@ -54,8 +53,6 @@ __global__ void moe_wna16_gemm_kernel( if (token_index / top_k >= size_m) break; num_valid_tokens = m + 1; - if (blockIdx.z == 0 && offset_n < size_n) - output[token_index * size_n + offset_n] = Dtype::int2num(0); if (expert_id != -1) { int k_per_thread = DIVIDE(BLOCK_SIZE_K, BLOCK_SIZE_N); @@ -284,8 +281,7 @@ torch::Tensor moe_wna16_gemm(torch::Tensor input, torch::Tensor output, int64_t BLOCK_SIZE_M, int64_t BLOCK_SIZE_N, int64_t BLOCK_SIZE_K, int64_t bit) { const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); - auto options = - torch::TensorOptions().dtype(input.dtype()).device(input.device()); + output.zero_(); const int num_experts = b_qweight.size(0); const int size_m = input.size(0); @@ -302,9 +298,9 @@ torch::Tensor moe_wna16_gemm(torch::Tensor input, torch::Tensor output, const uint32_t* b_qzeros_ptr; if (b_qzeros.has_value()) b_qzeros_ptr = (const uint32_t*)b_qzeros.value().data_ptr(); - const float* topk_weights_ptr; + const float* topk_weights_ptr = nullptr; if (topk_weights.has_value()) - topk_weights_ptr = (const float*)topk_weights.value().data_ptr(); + topk_weights_ptr = (const float*)topk_weights.value().data_ptr(); int groups_per_block_row = BLOCK_SIZE_K / group_size; TORCH_CHECK(bit == 4 || bit == 8, "bit must be 4 or 8"); diff --git a/csrc/moe/moe_wna16_utils.h b/csrc/moe/moe_wna16_utils.h index 4396b80240e..8ef03f0e605 100644 --- a/csrc/moe/moe_wna16_utils.h +++ b/csrc/moe/moe_wna16_utils.h @@ -108,11 +108,11 @@ __device__ inline void dequant(int q, half2* res) { const int MUL = 0x2c002c00; const int ADD = 0xd400d400; - int lo0 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi0 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo0 = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi0 = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); q >>= 8; - int lo1 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi1 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo1 = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi1 = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); res[0] = __hsub2(*reinterpret_cast(&lo0), *reinterpret_cast(&SUB)); @@ -149,13 +149,13 @@ __device__ inline void dequant(int q, nv_bfloat162* res) { static constexpr uint32_t MASK = 0x000f000f; static constexpr uint32_t EX = 0x43004300; - int lo0 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int lo0 = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); q >>= 4; - int hi0 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int hi0 = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); q >>= 4; - int lo1 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int lo1 = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); q >>= 4; - int hi1 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int hi1 = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); static constexpr uint32_t MUL = 0x3F803F80; static constexpr uint32_t ADD = 0xC300C300; diff --git a/csrc/moe/permute_unpermute_kernels/dispatch.h b/csrc/moe/permute_unpermute_kernels/dispatch.h new file mode 100644 index 00000000000..41932cdd85b --- /dev/null +++ b/csrc/moe/permute_unpermute_kernels/dispatch.h @@ -0,0 +1,53 @@ +#pragma once +#include +#define MOE_SWITCH(TYPE, ...) \ + at::ScalarType _st = ::detail::scalar_type(TYPE); \ + switch (_st) { \ + __VA_ARGS__ \ + default: \ + TORCH_CHECK(false, "[moe permute]data type dispatch fail!") \ + } + +#define MOE_DISPATCH_CASE(enum_type, ...) \ + case enum_type: { \ + using scalar_t = ScalarType2CudaType::type; \ + __VA_ARGS__(); \ + break; \ + } +#define MOE_DISPATCH_FLOAT_CASE(...) \ + MOE_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \ + MOE_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__) \ + MOE_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) \ + MOE_DISPATCH_CASE(at::ScalarType::Float8_e5m2, __VA_ARGS__) \ + MOE_DISPATCH_CASE(at::ScalarType::Float8_e4m3fn, __VA_ARGS__) + +#define MOE_DISPATCH(TYPE, ...) \ + MOE_SWITCH(TYPE, MOE_DISPATCH_FLOAT_CASE(__VA_ARGS__)) + +template +struct ScalarType2CudaType; + +template <> +struct ScalarType2CudaType { + using type = float; +}; +template <> +struct ScalarType2CudaType { + using type = half; +}; +template <> +struct ScalarType2CudaType { + using type = __nv_bfloat16; +}; + +// #if __CUDA_ARCH__ >= 890 +// fp8 +template <> +struct ScalarType2CudaType { + using type = __nv_fp8_e5m2; +}; +template <> +struct ScalarType2CudaType { + using type = __nv_fp8_e4m3; +}; +// #endif \ No newline at end of file diff --git a/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.cu b/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.cu new file mode 100644 index 00000000000..aa353d0f043 --- /dev/null +++ b/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.cu @@ -0,0 +1,229 @@ + +#include "moe_permute_unpermute_kernel.h" + +// CubKeyValueSorter definition begin +CubKeyValueSorter::CubKeyValueSorter() + : num_experts_(0), num_bits_(sizeof(int) * 8) {} + +int CubKeyValueSorter::expertsToBits(int num_experts) { + // Max value we represent is V = num_experts + (num_experts - 1) = 2 * + // num_experts - 1 The maximum number of bits is therefore floor(log2(V)) + 1 + return static_cast(log2(2 * num_experts - 1)) + 1; +} + +CubKeyValueSorter::CubKeyValueSorter(int const num_experts) + : num_experts_(num_experts), num_bits_(expertsToBits(num_experts)) {} + +void CubKeyValueSorter::updateNumExperts(int const num_experts) { + num_experts_ = num_experts; + num_bits_ = expertsToBits(num_experts); +} + +size_t CubKeyValueSorter::getWorkspaceSize(size_t const num_key_value_pairs, + int const num_experts) { + int num_bits = expertsToBits(num_experts); + size_t required_storage = 0; + int* null_int = nullptr; + cub::DeviceRadixSort::SortPairs(nullptr, required_storage, null_int, null_int, + null_int, null_int, num_key_value_pairs, 0, + num_bits); + + // when num_key_value_pairs, num_experts, num_bits, required_storage = 64, + // 4, 3, 0 The required_storage seems to vary between 0 and 1 for the same + // inputs + if (required_storage == 0) { + required_storage = 1; + } + return required_storage; +} + +void CubKeyValueSorter::run(void* workspace, size_t const workspace_size, + int const* keys_in, int* keys_out, + int const* values_in, int* values_out, + size_t const num_key_value_pairs, + cudaStream_t stream) { + size_t expected_ws_size = getWorkspaceSize(num_key_value_pairs, num_experts_); + size_t actual_ws_size = workspace_size; + + TORCH_CHECK(expected_ws_size <= workspace_size, + "[CubKeyValueSorter::run] The allocated workspace is too small " + "to run this problem."); + cub::DeviceRadixSort::SortPairs(workspace, actual_ws_size, keys_in, keys_out, + values_in, values_out, num_key_value_pairs, 0, + num_bits_, stream); +} +// CubKeyValueSorter definition end + +static inline size_t pad_to_multiple_of_16(size_t const& input) { + static constexpr int ALIGNMENT = 16; + return ALIGNMENT * ((input + ALIGNMENT - 1) / ALIGNMENT); +} +template +__device__ inline int64_t findTotalEltsLessThanTarget(T const* sorted_indices, + int64_t const arr_length, + T const target) { + int64_t low = 0, high = arr_length - 1, target_location = -1; + while (low <= high) { + int64_t mid = (low + high) / 2; + + if (sorted_indices[mid] >= target) { + high = mid - 1; + } else { + low = mid + 1; + target_location = mid; + } + } + return target_location + 1; +} + +// Calculates the start offset of the tokens for a given expert. The last +// element is the total number of valid tokens +__global__ void computeExpertFirstTokenOffsetKernel( + int const* sorted_experts, int64_t const sorted_experts_len, + int const num_experts, int64_t* expert_first_token_offset) { + // First, compute the global tid. We only need 1 thread per expert. + int const expert = blockIdx.x * blockDim.x + threadIdx.x; + + // Note that expert goes [0, num_experts] (inclusive) because we want a count + // for the total number of active tokens at the end of the scan. + if (expert >= num_experts + 1) { + return; + } + expert_first_token_offset[expert] = + findTotalEltsLessThanTarget(sorted_experts, sorted_experts_len, expert); +} + +void computeExpertFirstTokenOffset(int const* sorted_indices, + int const total_indices, + int const num_experts, + int64_t* expert_first_token_offset, + cudaStream_t stream) { + int const num_entries = num_experts + 1; + int const threads = std::min(1024, num_entries); + int const blocks = (num_entries + threads - 1) / threads; + + computeExpertFirstTokenOffsetKernel<<>>( + sorted_indices, total_indices, num_experts, expert_first_token_offset); +} + +void sortAndScanExpert(int* expert_for_source_row, const int* source_rows, + int* permuted_experts, int* permuted_rows, + int64_t* expert_first_token_offset, int num_rows, + int num_experts, int num_experts_per_node, int k, + CubKeyValueSorter& sorter, void* sorter_ws, + cudaStream_t stream) { + int64_t const expanded_num_rows = static_cast(k) * num_rows; + // We need to use the full num_experts because that is the sentinel value used + // by topk for disabled experts + sorter.updateNumExperts(num_experts); + size_t const sorter_ws_size_bytes = pad_to_multiple_of_16( + sorter.getWorkspaceSize(expanded_num_rows, num_experts)); + sorter.run((void*)sorter_ws, sorter_ws_size_bytes, expert_for_source_row, + permuted_experts, source_rows, permuted_rows, expanded_num_rows, + stream); + computeExpertFirstTokenOffset(permuted_experts, expanded_num_rows, + num_experts_per_node, expert_first_token_offset, + stream); +} + +__global__ void preprocessTopkIdKernel(int* topk_id_ptr, int size, + const int* expert_map_ptr, + int num_experts) { + auto tidx = threadIdx.x; + auto bidx = blockIdx.x; + auto lidx = tidx & 31; + auto widx = tidx >> 5; + auto warp_count = (blockDim.x + 31) >> 5; + auto offset = bidx * blockDim.x; + auto bound = min(offset + blockDim.x, size); + extern __shared__ int smem_expert_map[]; + // store expert_map in smem + for (int i = tidx; i < num_experts; i += blockDim.x) { + smem_expert_map[i] = expert_map_ptr[i]; + } + __syncthreads(); + + // query global expert id in expert map. + // if global expert id = -1 in exert map, plus n_expert + // else set global expert id = exert map[global expert id] + if (offset + tidx < bound) { + auto topk_id = topk_id_ptr[offset + tidx]; + auto local_expert_idx = smem_expert_map[topk_id]; + if (local_expert_idx == -1) { + topk_id += num_experts; + } else { + topk_id = local_expert_idx; + } + __syncwarp(); + topk_id_ptr[offset + tidx] = topk_id; + } +} +void preprocessTopkIdLauncher(int* topk_id_ptr, int size, + const int* expert_map_ptr, int num_experts, + cudaStream_t stream) { + int block = std::min(size, 1024); + int grid = (size + block - 1) / block; + int smem_size = (num_experts) * sizeof(int); + preprocessTopkIdKernel<<>>( + topk_id_ptr, size, expert_map_ptr, num_experts); +} + +template +__global__ void getMIndicesKernel(int64_t* expert_first_token_offset, + int64_t* align_expert_first_token_offset, + int* m_indices, const int num_local_expert, + const int align_block_size) { + int eidx = blockIdx.x; + int tidx = threadIdx.x; + extern __shared__ int64_t smem_expert_first_token_offset[]; + for (int i = tidx; i <= num_local_expert; i += blockDim.x) { + smem_expert_first_token_offset[tidx] = __ldg(expert_first_token_offset + i); + } + __syncthreads(); + auto last_token_offset = smem_expert_first_token_offset[eidx + 1]; + auto first_token_offset = smem_expert_first_token_offset[eidx]; + int n_token_in_expert = last_token_offset - first_token_offset; + + if constexpr (ALIGN_BLOCK_SIZE) { + n_token_in_expert = (n_token_in_expert + align_block_size - 1) / + align_block_size * align_block_size; + // round up to ALIGN_BLOCK_SIZE + int64_t accumulate_align_offset = 0; + for (int i = 1; i <= eidx + 1; i++) { + int n_token = smem_expert_first_token_offset[i] - + smem_expert_first_token_offset[i - 1]; + accumulate_align_offset = + accumulate_align_offset + (n_token + align_block_size - 1) / + align_block_size * align_block_size; + if (i == eidx) { + first_token_offset = accumulate_align_offset; + } + // last block store align_expert_first_token_offset + if (eidx == num_local_expert - 1 && threadIdx.x == 0) { + align_expert_first_token_offset[i] = accumulate_align_offset; + } + } + } + for (int idx = tidx; idx < n_token_in_expert; idx += blockDim.x) { + // update m_indice with expert id + m_indices[first_token_offset + idx] = eidx; + } +} + +void getMIndices(int64_t* expert_first_token_offset, + int64_t* align_expert_first_token_offset, int* m_indices, + int num_local_expert, const int align_block_size, + cudaStream_t stream) { + int block = 256; + int grid = num_local_expert; + int smem_size = sizeof(int64_t) * (num_local_expert + 1); + if (align_block_size == -1) { + getMIndicesKernel<<>>( + expert_first_token_offset, align_expert_first_token_offset, m_indices, + num_local_expert, align_block_size); + } else { + getMIndicesKernel<<>>( + expert_first_token_offset, align_expert_first_token_offset, m_indices, + num_local_expert, align_block_size); + } +} \ No newline at end of file diff --git a/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.h b/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.h new file mode 100644 index 00000000000..43c29721cd1 --- /dev/null +++ b/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.h @@ -0,0 +1,95 @@ +#pragma once +// reference from tensorrt_llm moe kernel implementation archive in +// https://github.com/BBuf/tensorrt-llm-moe/tree/master + +#include +#include +#include "dispatch.h" +#include +#include +#include +#include "cutlass/numeric_size.h" +#include "cutlass/array.h" + +template +inline T* get_ptr(torch::Tensor& t) { + return reinterpret_cast(t.data_ptr()); +} + +template +inline const T* get_ptr(const torch::Tensor& t) { + return reinterpret_cast(t.data_ptr()); +} + +class CubKeyValueSorter { + public: + CubKeyValueSorter(); + + CubKeyValueSorter(int const num_experts); + + void updateNumExperts(int const num_experts); + + static size_t getWorkspaceSize(size_t const num_key_value_pairs, + int const num_experts); + + void run(void* workspace, size_t const workspace_size, int const* keys_in, + int* keys_out, int const* values_in, int* values_out, + size_t const num_key_value_pairs, cudaStream_t stream); + + private: + static int expertsToBits(int experts); + int num_experts_; + int num_bits_; +}; + +void computeExpertFirstTokenOffset(int const* sorted_indices, + int const total_indices, + int const num_experts, + int64_t* expert_first_token_offset, + cudaStream_t stream); + +void sortAndScanExpert(int* expert_for_source_row, const int* source_rows, + int* permuted_experts, int* permuted_rows, + int64_t* expert_first_token_offset, int num_rows, + int num_experts, int num_experts_per_node, int k, + CubKeyValueSorter& sorter, void* sorter_ws, + cudaStream_t stream); + +template +void expandInputRowsKernelLauncher( + T const* unpermuted_input, T* permuted_output, + const float* unpermuted_scales, int* sorted_experts, + int const* expanded_dest_row_to_expanded_source_row, + int* expanded_source_row_to_expanded_dest_row, + int64_t* expert_first_token_offset, int64_t const num_rows, + int64_t const* num_valid_tokens_ptr, int64_t const cols, int const k, + int num_local_experts, const int& align_block_size, cudaStream_t stream); + +// Final kernel to unpermute and scale +// This kernel unpermutes the original data, does the k-way reduction and +// performs the final skip connection. +template +__global__ void finalizeMoeRoutingKernel( + T const* expanded_permuted_rows, OutputType* reduced_unpermuted_output, + float const* scales, int const* expanded_source_row_to_expanded_dest_row, + int const* expert_for_source_row, int64_t const orig_cols, int64_t const k, + int64_t const* num_valid_ptr); + +template +void finalizeMoeRoutingKernelLauncher( + T const* expanded_permuted_rows, OutputType* reduced_unpermuted_output, + float const* scales, int const* expanded_source_row_to_expanded_dest_row, + int const* expert_for_source_row, int64_t const num_rows, + int64_t const cols, int64_t const k, int64_t const* num_valid_ptr, + cudaStream_t stream); + +void preprocessTopkIdLauncher(int* topk_id_ptr, int size, + const int* expert_map_ptr, int num_experts, + cudaStream_t stream); + +void getMIndices(int64_t* expert_first_token_offset, + int64_t* align_expert_first_token_offset, int* m_indices, + int num_local_expert, const int align_block_size, + cudaStream_t stream); + +#include "moe_permute_unpermute_kernel.inl" diff --git a/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.inl b/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.inl new file mode 100644 index 00000000000..42441800fb1 --- /dev/null +++ b/csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.inl @@ -0,0 +1,211 @@ +#pragma once + +template +__global__ void expandInputRowsKernel( + T const* unpermuted_input, T* permuted_output, + const float* unpermuted_scales, int* sorted_experts, + int const* expanded_dest_row_to_expanded_source_row, + int* expanded_source_row_to_expanded_dest_row, + int64_t* expert_first_token_offset, int64_t const num_rows, + int64_t const* num_dest_rows, int64_t const cols, int64_t k, + int num_local_experts, int align_block_size) { + // Reverse permutation map. + // I do this so that later, we can use the source -> dest map to do the k-way + // reduction and unpermuting. I need the reverse map for that reduction to + // allow each threadblock to do 1 k-way reduce without atomics later in MoE. 1 + // thread block will be responsible for all k summations. + int64_t expanded_dest_row = blockIdx.x; + int64_t const expanded_source_row = + expanded_dest_row_to_expanded_source_row[expanded_dest_row]; + int expert_id = sorted_experts[expanded_dest_row]; + + extern __shared__ int64_t smem_expert_first_token_offset[]; + int64_t align_expanded_row_accumulate = 0; + if constexpr (ALIGN_BLOCK_SIZE) { + // load g2s + for (int idx = threadIdx.x; idx < num_local_experts + 1; + idx += blockDim.x) { + smem_expert_first_token_offset[idx] = + __ldg(expert_first_token_offset + idx); + } + __syncthreads(); + int lane_idx = threadIdx.x & 31; + + if (lane_idx == 0) { + // set token_offset_in_expert = 0 if this expert is not local expert + int token_offset_in_expert = + expert_id >= num_local_experts + ? 0 + : expanded_dest_row - smem_expert_first_token_offset[expert_id]; + int64_t accumulate_align_offset = 0; +#pragma unroll 1 + for (int eidx = 1; eidx <= min(expert_id, num_local_experts); eidx++) { + auto n_token_in_expert = smem_expert_first_token_offset[eidx] - + smem_expert_first_token_offset[eidx - 1]; + accumulate_align_offset += (n_token_in_expert + align_block_size - 1) / + align_block_size * align_block_size; + } + expanded_dest_row = accumulate_align_offset + token_offset_in_expert; + } + // lane0 shuffle broadcast align_expanded_dest_row + expanded_dest_row = __shfl_sync(0xffffffff, expanded_dest_row, 0); + } + + if (threadIdx.x == 0) { + assert(expanded_dest_row <= INT32_MAX); + expanded_source_row_to_expanded_dest_row[expanded_source_row] = + static_cast(expanded_dest_row); + } + + if (!CHECK_SKIPPED || blockIdx.x < *num_dest_rows) { + // Load 128-bits per thread + constexpr int64_t ELEM_PER_THREAD = 128 / cutlass::sizeof_bits::value; + using DataElem = cutlass::Array; + + // Duplicate and permute rows + int64_t const source_k_rank = expanded_source_row / num_rows; + int64_t const source_row = expanded_source_row % num_rows; + + auto const* source_row_ptr = + reinterpret_cast(unpermuted_input + source_row * cols); + auto* dest_row_ptr = + reinterpret_cast(permuted_output + expanded_dest_row * cols); + + int64_t const start_offset = threadIdx.x; + int64_t const stride = blockDim.x; + int64_t const num_elems_in_col = cols / ELEM_PER_THREAD; + + for (int elem_index = start_offset; elem_index < num_elems_in_col; + elem_index += stride) { + dest_row_ptr[elem_index] = source_row_ptr[elem_index]; + } + } +} + +template +void expandInputRowsKernelLauncher( + T const* unpermuted_input, T* permuted_output, + const float* unpermuted_scales, int* sorted_experts, + int const* expanded_dest_row_to_expanded_source_row, + int* expanded_source_row_to_expanded_dest_row, + int64_t* expert_first_token_offset, int64_t const num_rows, + int64_t const* num_valid_tokens_ptr, int64_t const cols, int const k, + int num_local_experts, const int& align_block_size, cudaStream_t stream) { + int64_t const blocks = num_rows * k; + int64_t const threads = 256; + using FuncPtr = decltype(&expandInputRowsKernel); + FuncPtr func_map[2][2] = { + {&expandInputRowsKernel, + &expandInputRowsKernel}, + {&expandInputRowsKernel, + &expandInputRowsKernel}, + }; + bool is_check_skip = num_valid_tokens_ptr != nullptr; + bool is_align_block_size = align_block_size != -1; + auto func = func_map[is_check_skip][is_align_block_size]; + + int64_t smem_size = sizeof(int64_t) * (num_local_experts + 1); + + func<<>>( + unpermuted_input, permuted_output, unpermuted_scales, sorted_experts, + expanded_dest_row_to_expanded_source_row, + expanded_source_row_to_expanded_dest_row, expert_first_token_offset, + num_rows, num_valid_tokens_ptr, cols, k, num_local_experts, + align_block_size); +} + +template +__host__ __device__ constexpr static U arrayConvert(T const& input) { + using Type = typename U::Element; + static_assert(T::kElements == U::kElements); + U u; +#pragma unroll + for (int i = 0; i < U::kElements; i++) { + u[i] = static_cast(input[i]); + } + return u; +} + +template +__global__ void finalizeMoeRoutingKernel( + T const* expanded_permuted_rows, OutputType* reduced_unpermuted_output, + float const* scales, int const* expanded_source_row_to_expanded_dest_row, + int const* expert_for_source_row, int64_t const orig_cols, int64_t const k, + int64_t const* num_valid_ptr) { + assert(orig_cols % 4 == 0); + int64_t const original_row = blockIdx.x; + int64_t const num_rows = gridDim.x; + auto const offset = original_row * orig_cols; + OutputType* reduced_row_ptr = reduced_unpermuted_output + offset; + int64_t const num_valid = *num_valid_ptr; + + // Load 128-bits per thread, according to the smallest data type we read/write + constexpr int64_t FINALIZE_ELEM_PER_THREAD = + 128 / std::min(cutlass::sizeof_bits::value, + cutlass::sizeof_bits::value); + + int64_t const start_offset = threadIdx.x; + int64_t const stride = blockDim.x; + int64_t const num_elems_in_col = orig_cols / FINALIZE_ELEM_PER_THREAD; + + using InputElem = cutlass::Array; + using OutputElem = cutlass::Array; + using ComputeElem = cutlass::Array; + auto const* expanded_permuted_rows_v = + reinterpret_cast(expanded_permuted_rows); + auto* reduced_row_ptr_v = reinterpret_cast(reduced_row_ptr); + +#pragma unroll + for (int elem_index = start_offset; elem_index < num_elems_in_col; + elem_index += stride) { + ComputeElem thread_output; + thread_output.fill(0); + float row_rescale{0.f}; + for (int k_idx = 0; k_idx < k; ++k_idx) { + int64_t const expanded_original_row = original_row + k_idx * num_rows; + int64_t const expanded_permuted_row = + expanded_source_row_to_expanded_dest_row[expanded_original_row]; + + int64_t const k_offset = original_row * k + k_idx; + float const row_scale = scales[k_offset]; + + // Check after row_rescale has accumulated + if (CHECK_SKIPPED && expanded_permuted_row >= num_valid) { + continue; + } + + auto const* expanded_permuted_rows_row_ptr = + expanded_permuted_rows_v + expanded_permuted_row * num_elems_in_col; + + int64_t const expert_idx = expert_for_source_row[k_offset]; + + ComputeElem expert_result = arrayConvert( + expanded_permuted_rows_row_ptr[elem_index]); + thread_output = thread_output + row_scale * (expert_result); + } + + OutputElem output_elem = + arrayConvert(thread_output); + reduced_row_ptr_v[elem_index] = output_elem; + } +} + +template +void finalizeMoeRoutingKernelLauncher( + T const* expanded_permuted_rows, OutputType* reduced_unpermuted_output, + float const* scales, int const* expanded_source_row_to_expanded_dest_row, + int const* expert_for_source_row, int64_t const num_rows, + int64_t const cols, int64_t const k, int64_t const* num_valid_ptr, + cudaStream_t stream) { + int64_t const blocks = num_rows; + int64_t const threads = 256; + bool const check_finished = num_valid_ptr != nullptr; + using FuncPtr = decltype(&finalizeMoeRoutingKernel); + FuncPtr func_map[2] = {&finalizeMoeRoutingKernel, + &finalizeMoeRoutingKernel}; + auto* const kernel = func_map[check_finished]; + kernel<<>>( + expanded_permuted_rows, reduced_unpermuted_output, scales, + expanded_source_row_to_expanded_dest_row, expert_for_source_row, cols, k, + num_valid_ptr); +} diff --git a/csrc/moe/torch_bindings.cpp b/csrc/moe/torch_bindings.cpp index d0de42251f9..2a8b9bb39ca 100644 --- a/csrc/moe/torch_bindings.cpp +++ b/csrc/moe/torch_bindings.cpp @@ -53,7 +53,29 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) { "int size_m, int size_n, int size_k," "bool is_full_k, bool use_atomic_add," "bool use_fp32_reduce, bool is_zp_float) -> Tensor"); + m.def( + "marlin_gemm_moe(Tensor! a, Tensor! b_q_weights, Tensor! sorted_ids, " + "Tensor! topk_weights, Tensor! topk_ids, Tensor! b_scales, Tensor! " + "b_zeros, Tensor! g_idx, Tensor! perm, Tensor! workspace, " + "int b_q_type, SymInt size_m, " + "SymInt size_n, SymInt size_k, bool is_k_full, int num_experts, int " + "topk, " + "int moe_block_size, bool replicate_input, bool apply_weights)" + " -> Tensor"); + + m.def( + "moe_permute(Tensor input, Tensor topk_weight, Tensor! topk_ids," + "Tensor token_expert_indicies, Tensor? expert_map, int n_expert," + "int n_local_expert," + "int topk, int? align_block_size,Tensor! permuted_input, Tensor! " + "expert_first_token_offset, Tensor! src_row_id2dst_row_id_map, Tensor! " + "m_indices)->()"); + m.def( + "moe_unpermute(Tensor permuted_hidden_states, Tensor topk_weights," + "Tensor topk_ids,Tensor src_row_id2dst_row_id_map, Tensor " + "expert_first_token_offset, int n_expert, int n_local_expert,int " + "topk, Tensor! hidden_states)->()"); // conditionally compiled so impl registration is in source file #endif diff --git a/csrc/ops.h b/csrc/ops.h index 86039a26041..59ae0937604 100644 --- a/csrc/ops.h +++ b/csrc/ops.h @@ -97,6 +97,9 @@ void batched_rotary_embedding(torch::Tensor& positions, torch::Tensor& query, void silu_and_mul(torch::Tensor& out, torch::Tensor& input); +void silu_and_mul_quant(torch::Tensor& out, torch::Tensor& input, + torch::Tensor& scale); + void mul_and_silu(torch::Tensor& out, torch::Tensor& input); void gelu_and_mul(torch::Tensor& out, torch::Tensor& input); @@ -128,6 +131,12 @@ void advance_step_flashinfer( torch::Tensor& paged_kv_indices, torch::Tensor& paged_kv_indptr, torch::Tensor& paged_kv_last_page_len, torch::Tensor& block_table_bounds); +void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope, + torch::Tensor const& q_pe, + torch::Tensor const& kv_c_and_k_pe_cache, + torch::Tensor const& seq_lens, + torch::Tensor const& page_table, double scale); + torch::Tensor get_cuda_view_from_cpu_tensor(torch::Tensor& cpu_tensor); #ifndef USE_ROCM diff --git a/csrc/quantization/activation_kernels.cu b/csrc/quantization/activation_kernels.cu new file mode 100644 index 00000000000..acc3d672202 --- /dev/null +++ b/csrc/quantization/activation_kernels.cu @@ -0,0 +1,120 @@ +#include +#include +#include + +#include +#include "core/math.hpp" +#include "cuda_compat.h" +#include "dispatch_utils.h" + +#include "quantization/fp8/common.cuh" + +namespace vllm { + +template +__device__ __forceinline__ T silu_kernel(const T& x) { + // x * sigmoid(x) + return (T)(((float)x) / (1.0f + expf((float)-x))); +} + +// Activation and gating kernel template. +template +__global__ void act_and_mul_quant_kernel( + fp8_type* __restrict__ out, // [..., d] + const scalar_t* __restrict__ input, // [..., 2, d] + const float* scale, const int d) { + const int32_t blocks_per_token = gridDim.y; + + const int32_t elems_per_128bit_load = (128 / 8) / sizeof(scalar_t); + + // We don't expect the hidden dimension to exceed 32 bits so int32 should + // be safe here. + const int32_t tgt_elems_per_block = div_ceil(d, blocks_per_token); + const int32_t elems_per_block = + round_to_next_multiple_of(tgt_elems_per_block, elems_per_128bit_load); + const int32_t block_start = blockIdx.y * elems_per_block; + int32_t block_end = block_start + elems_per_block; + block_end = block_end > d ? d : block_end; + + // token_idx is 64 bit to prevent 32 bit overflow when the number of tokens + // is very large + const int64_t token_idx = blockIdx.x; + const scalar_t* __restrict__ x_ptr = input + token_idx * 2 * d; + const scalar_t* __restrict__ y_ptr = input + token_idx * 2 * d + d; + fp8_type* __restrict__ out_ptr = out + token_idx * d; + + // 128-bit vectorized code + const int32_t vec_loop_end = + round_to_previous_multiple_of(elems_per_128bit_load, block_end); + const int32_t vec_end_idx = vec_loop_end / elems_per_128bit_load; + const int32_t vec_start_idx = block_start / elems_per_128bit_load; + + const int4* __restrict__ x_128bit_ptr = reinterpret_cast(x_ptr); + const int4* __restrict__ y_128bit_ptr = reinterpret_cast(y_ptr); + int2* __restrict__ out_128bit_ptr = reinterpret_cast(out_ptr); + + float inverted_scale = 1 / *scale; +#pragma unroll + for (int32_t vec_idx = vec_start_idx + threadIdx.x; vec_idx < vec_end_idx; + vec_idx += blockDim.x) { + const int4 x_128bit = VLLM_LDG(&x_128bit_ptr[vec_idx]); + const int4 y_128bit = VLLM_LDG(&y_128bit_ptr[vec_idx]); + using scalar_128bit_vec_t = std::array; + using scalar_64bit_vec_t = std::array; + + scalar_64bit_vec_t out_vec; + const auto x_vec = reinterpret_cast(x_128bit); + const auto y_vec = reinterpret_cast(y_128bit); + +#pragma unroll + for (int i = 0; i < elems_per_128bit_load; i++) { + out_vec[i] = scaled_fp8_conversion( + ACT_FN(x_vec[i]) * y_vec[i], inverted_scale); + } + + out_128bit_ptr[vec_idx] = reinterpret_cast(out_vec); + } + + // Scalar cleanup code + if (block_end > vec_loop_end) { + for (int64_t idx = vec_loop_end + threadIdx.x; idx < block_end; + idx += blockDim.x) { + const scalar_t x = VLLM_LDG(&x_ptr[idx]); + const scalar_t y = VLLM_LDG(&y_ptr[idx]); + out_ptr[idx] = + scaled_fp8_conversion(ACT_FN(x) * y, inverted_scale); + } + } +} +} // namespace vllm + +// Launch activation, gating, and quantize kernel. +#define LAUNCH_ACTIVATION_GATE_KERNEL(KERNEL) \ + int d = input.size(-1) / 2; \ + int64_t num_tokens = input.numel() / input.size(-1); \ + dim3 grid(num_tokens, num_tokens > 16 ? num_tokens > 32 ? 1 : 2 : 4); \ + dim3 block(std::min(d, 512)); \ + const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \ + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \ + VLLM_DISPATCH_FLOATING_TYPES( \ + input.scalar_type(), "act_and_mul_kernel", [&] { \ + VLLM_DISPATCH_FP8_TYPES( \ + out.scalar_type(), "fused_add_rms_norm_kernel_fp8_type", [&] { \ + vllm::act_and_mul_quant_kernel, \ + fp8_t> \ + <<>>(out.data_ptr(), \ + input.data_ptr(), \ + scale.data_ptr(), d); \ + }); \ + }); + +void silu_and_mul_quant(torch::Tensor& out, // [..., d] + torch::Tensor& input, // [..., 2 * d] + torch::Tensor& scale) { + TORCH_CHECK(out.dtype() == torch::kFloat8_e4m3fn); + TORCH_CHECK(input.dtype() == torch::kFloat16 || + input.dtype() == torch::kBFloat16); + TORCH_CHECK(input.size(-1) % 2 == 0); + LAUNCH_ACTIVATION_GATE_KERNEL(vllm::silu_kernel); +} diff --git a/csrc/quantization/cutlass_w8a8/moe/moe_data.cu b/csrc/quantization/cutlass_w8a8/moe/moe_data.cu index 2fb0417ce6c..894727383a6 100644 --- a/csrc/quantization/cutlass_w8a8/moe/moe_data.cu +++ b/csrc/quantization/cutlass_w8a8/moe/moe_data.cu @@ -46,14 +46,26 @@ __global__ void compute_expert_offsets( } __global__ void compute_arg_sorts(const int* __restrict__ topk_ids, + const int32_t* __restrict__ expert_offsets, int32_t* input_permutation, int32_t* output_permutation, int32_t* atomic_buffer, const int topk_length, const int topk) { - int expert_id = blockIdx.x; + int const blk_expert_id = blockIdx.x; + int const num_experts = gridDim.x; + int32_t const num_tokens = expert_offsets[num_experts]; for (int i = threadIdx.x; i < topk_length; i += THREADS_PER_EXPERT) { - if (topk_ids[i] == expert_id) { + int const expert_id = topk_ids[i]; + if (expert_id == -1 && blockIdx.x == 0) { + // output_permutation is used to re-order the moe outputs. It is + // used as c2 = c2[c_map], where c2 is a torch.tensor that is the + // output of the cutlass kernels and c_map is the output_permutation. + // c2 is initialized to zeros, therefore by setting the output_permutation + // to num_tokens, we are guaranteed to fill the moe outputs to zero + // for "invalid" topk_ids. + output_permutation[i] = num_tokens; + } else if (expert_id == blk_expert_id) { int start = atomicAdd(&atomic_buffer[expert_id], 1); input_permutation[start] = i / topk; output_permutation[i] = start; @@ -83,6 +95,7 @@ void get_cutlass_moe_mm_data_caller( static_cast(atomic_buffer.data_ptr()), num_experts); compute_arg_sorts<<>>( static_cast(topk_ids.data_ptr()), + static_cast(expert_offsets.data_ptr()), static_cast(input_permutation.data_ptr()), static_cast(output_permutation.data_ptr()), static_cast(atomic_buffer.data_ptr()), topk_ids.numel(), diff --git a/csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_fp8_dispatch.cuh b/csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_fp8_dispatch.cuh index 4e82c99c3af..6082937e7e1 100644 --- a/csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_fp8_dispatch.cuh +++ b/csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_fp8_dispatch.cuh @@ -336,7 +336,7 @@ inline void cutlass_gemm_sm89_fp8_dispatch(torch::Tensor& out, uint32_t const m = a.size(0); uint32_t const mp2 = - std::max(static_cast(32), next_pow_2(m)); // next power of 2 + std::max(static_cast(16), next_pow_2(m)); // next power of 2 if (mp2 <= 16) { // M in [1, 16] diff --git a/csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_int8_dispatch.cuh b/csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_int8_dispatch.cuh index 95723b31ca3..87be125b2eb 100644 --- a/csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_int8_dispatch.cuh +++ b/csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_int8_dispatch.cuh @@ -321,7 +321,7 @@ inline void cutlass_gemm_sm89_int8_dispatch(torch::Tensor& out, uint32_t const m = a.size(0); uint32_t const mp2 = - std::max(static_cast(32), next_pow_2(m)); // next power of 2 + std::max(static_cast(16), next_pow_2(m)); // next power of 2 if (mp2 <= 16) { // M in [1, 16] diff --git a/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu b/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu index 6e14de0c780..97c0e0da7b1 100644 --- a/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu +++ b/csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu @@ -134,7 +134,7 @@ typename T::Gemm::Arguments args_from_options( using StrideB = typename T::StrideB; using StrideD = typename T::StrideD; using Sm100BlkScaledConfig = - typename T::Gemm::GemmKernel::CollectiveMainloop::Sm100BlkScaledConfig; + typename T::Gemm::GemmKernel::CollectiveMainloop::Sm1xxBlkScaledConfig; int m = static_cast(M); int n = static_cast(N); diff --git a/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu b/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu index 2b6ab7fcec9..95aa92e25b3 100644 --- a/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu +++ b/csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu @@ -96,7 +96,7 @@ void rms_norm_dynamic_per_token_quant_dispatch( std::optional const& scale_ub, std::optional& residual) { int32_t hidden_size = input.size(-1); - int32_t num_tokens = input.numel() / hidden_size; + auto num_tokens = input.numel() / hidden_size; dim3 grid(num_tokens); dim3 block(std::min(hidden_size, 1024)); diff --git a/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu b/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu index ec0bf2c3cb4..ea3bb429904 100644 --- a/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu +++ b/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu @@ -347,7 +347,7 @@ struct ComputeTile_W8A16_PerC_MtilexNtilex32_multistage_SM8x_SplitK { for (int n_idx = 0; n_idx < WARP_NITER; ++n_idx) { hmma16816_f32( C_frag[m_idx][n_idx], A_frag[reg_buf_idx][m_idx], - reinterpret_cast(BF_frag[reg_buf_idx][n_idx])); + reinterpret_cast(BF_frag[reg_buf_idx][n_idx])); } } } diff --git a/csrc/quantization/gptq_marlin/gptq_marlin.cu b/csrc/quantization/gptq_marlin/gptq_marlin.cu index 83bbd1e6816..a974c881eb8 100644 --- a/csrc/quantization/gptq_marlin/gptq_marlin.cu +++ b/csrc/quantization/gptq_marlin/gptq_marlin.cu @@ -173,8 +173,8 @@ dequant(int q) { const int HI = 0x00f000f0; const int EX = 0x64006400; // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); // We want signed int4 outputs, hence we fuse the `-8` symmetric zero point // directly into `SUB` and `ADD`. const int SUB = 0x64086408; @@ -197,9 +197,9 @@ dequant(int q) { // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); q >>= 4; - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); typename ScalarType::FragB frag_b; static constexpr uint32_t MUL = 0x3F803F80; @@ -221,8 +221,8 @@ dequant(int q) { const int HI = 0x00f000f0; const int EX = 0x64006400; // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); const int SUB = 0x64006400; const int MUL = 0x2c002c00; @@ -244,9 +244,9 @@ dequant(int q) { // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); q >>= 4; - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, MASK, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, MASK, EX); typename ScalarType::FragB frag_b; static constexpr uint32_t MUL = 0x3F803F80; diff --git a/csrc/quantization/marlin/dense/marlin_cuda_kernel.cu b/csrc/quantization/marlin/dense/marlin_cuda_kernel.cu index ba0a2410c03..ea96326ed7e 100644 --- a/csrc/quantization/marlin/dense/marlin_cuda_kernel.cu +++ b/csrc/quantization/marlin/dense/marlin_cuda_kernel.cu @@ -96,8 +96,8 @@ __device__ inline FragB dequant(int q) { const int HI = 0x00f000f0; const int EX = 0x64006400; // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); // We want signed int4 outputs, hence we fuse the `-8` symmetric zero point // directly into `SUB` and `ADD`. const int SUB = 0x64086408; diff --git a/csrc/quantization/marlin/qqq/marlin_qqq_gemm_kernel.cu b/csrc/quantization/marlin/qqq/marlin_qqq_gemm_kernel.cu index cd1830764cc..c96d68d9b29 100644 --- a/csrc/quantization/marlin/qqq/marlin_qqq_gemm_kernel.cu +++ b/csrc/quantization/marlin/qqq/marlin_qqq_gemm_kernel.cu @@ -141,8 +141,8 @@ __device__ inline FragB dequant_per_group(int q, FragS_GROUP& frag_s, int i) { static constexpr uint32_t HI = 0x00f000f0; static constexpr uint32_t EX = 0x64006400; // Guarantee that the `(a & b) | c` operations are LOP3s. - uint32_t t0 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - uint32_t t1 = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + uint32_t t0 = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + uint32_t t1 = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); // We want signed int4 outputs, hence we fuse the `-8` symmetric zero point // directly into `SUB` and `ADD`. static constexpr uint32_t SUB = 0x64086408; diff --git a/csrc/quantization/marlin/sparse/common/mma.h b/csrc/quantization/marlin/sparse/common/mma.h index 49eee4128ee..b26505f771c 100644 --- a/csrc/quantization/marlin/sparse/common/mma.h +++ b/csrc/quantization/marlin/sparse/common/mma.h @@ -127,8 +127,8 @@ __device__ inline FragB dequant_4bit(int q) { const int HI = 0x00f000f0; const int EX = 0x64006400; // Guarantee that the `(a & b) | c` operations are LOP3s. - int lo = lop3 < (0xf0 & 0xcc) | 0xaa > (q, LO, EX); - int hi = lop3 < (0xf0 & 0xcc) | 0xaa > (q, HI, EX); + int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX); + int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX); // We want signed int4 outputs, hence we fuse the `-8` symmetric zero point // directly into `SUB` and `ADD`. const int SUB = 0x64086408; diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu index 2c3cae95e7f..29235264916 100644 --- a/csrc/rocm/attention.cu +++ b/csrc/rocm/attention.cu @@ -25,8 +25,9 @@ #include "../attention/dtype_fp8.cuh" #include "../quantization/fp8/amd/quant_utils.cuh" -#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx942__)) - #define __HIP__MI300_MI250__ +#if defined(__HIPCC__) && \ + (defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__)) + #define __HIP__GFX9__ #endif #if defined(NDEBUG) @@ -42,7 +43,7 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b)) -#if defined(__HIP__MI300_MI250__) // TODO: Add NAVI support +#if defined(__HIP__GFX9__) // TODO: Add NAVI support #define GCN_MFMA_INSTR1 __builtin_amdgcn_mfma_f32_16x16x4f32 #define GCN_MFMA_INSTR __builtin_amdgcn_mfma_f32_4x4x4f16 @@ -1479,7 +1480,7 @@ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_reduce_kernel( } } -#else // !defined(__HIP__MI300_MI250__) TODO: Add NAVI support +#else // !defined(__HIP__GFX9__) TODO: Add NAVI support // clang-format off template +torch::Tensor LLMM1(at::Tensor& in_a, at::Tensor& in_b, + const int64_t rows_per_block); + +torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b, + const int64_t CuCount); + +void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c, + at::Tensor& scale_a, at::Tensor& scale_b, const int64_t CuCount); + void paged_attention(torch::Tensor& out, torch::Tensor& exp_sums, torch::Tensor& max_logits, torch::Tensor& tmp_out, torch::Tensor& query, torch::Tensor& key_cache, diff --git a/csrc/rocm/skinny_gemms.cu b/csrc/rocm/skinny_gemms.cu new file mode 100644 index 00000000000..72d2820f2aa --- /dev/null +++ b/csrc/rocm/skinny_gemms.cu @@ -0,0 +1,1600 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "cuda_compat.h" +#include "dispatch_utils.h" +#include "quantization/fp8/common.cuh" + +#if defined(__HIPCC__) && (defined(__gfx90a__) || defined(__gfx942__)) + #define __HIP__MI300_MI250__ +#endif + +#if defined(__HIPCC__) && defined(__gfx942__) + #define __HIP__MI300__ +#endif + +#if defined(NDEBUG) + #undef NDEBUG + #include + #define UNREACHABLE_CODE assert(false); + #define NDEBUG +#else + #define UNREACHABLE_CODE assert(false); +#endif + +template +struct scalar {}; + +template +struct scalar2 {}; + +template +__device__ __forceinline__ float2 __s22float2(T v); + +template +__device__ __forceinline__ T __float2s(float v); + +template +__device__ __forceinline__ T __float22s2_rn(float2 v); + +// Definitions and cvt functions for fp16 +template <> +struct scalar { + using type = half; +}; + +template <> +struct scalar2 { + using type = __half2; +}; + +template <> +__device__ __forceinline__ half __float2s(float v) { + return __float2half(v); +} + +template <> +__device__ __forceinline__ float2 __s22float2(__half2 v) { + return __half22float2(v); +} + +template <> +__device__ __forceinline__ __half2 __float22s2_rn(float2 v) { + return __float22half2_rn(v); +} + +// Definitions and cvt functions for bf16 +template <> +struct scalar { + using type = __hip_bfloat16; +}; + +template <> +struct scalar2 { + using type = __hip_bfloat162; +}; + +template <> +__device__ __forceinline__ __hip_bfloat16 __float2s(float v) { + return __float2bfloat16(v); +} + +template <> +__device__ __forceinline__ float2 __s22float2(__hip_bfloat162 v) { + return __bfloat1622float2(v); +} + +template <> +__device__ __forceinline__ __hip_bfloat162 __float22s2_rn(float2 v) { + return __float22bfloat162_rn(v); +} + +template +__device__ __forceinline__ T loadnt(T* addr) { + return __builtin_nontemporal_load(addr); +} + +__device__ __forceinline__ float4 load_ntmprl(const float4* addr) { + auto addr_alias = reinterpret_cast(addr); + auto dat0 = loadnt(addr_alias); + auto dat1 = loadnt(addr_alias + 1); + auto dat2 = loadnt(addr_alias + 2); + auto dat3 = loadnt(addr_alias + 3); + return make_float4(dat0, dat1, dat2, dat3); +} + +// TBlock fetches entire rows of A, and entire col of B (K dimension); assume +// N=1 for time being grid is M/A_NUM_ROWS blocks +template +__global__ void LLGemm1_kernel(const scalar_t* in_a, const scalar_t* in_b, + scalar_t* out_c, const int K) { + using scalar2_t = typename scalar2::type; + auto af4 = reinterpret_cast(in_a); + auto bf4 = reinterpret_cast(in_b); + auto c = reinterpret_cast(out_c); + __shared__ float red_smem[NUM_A_ROWS_PER_BLOCK][WARP_SIZE]; + const int row_addr = blockIdx.x * NUM_A_ROWS_PER_BLOCK * K / 8; + const int threadid = threadIdx.x; + const int warp = threadIdx.x / WARP_SIZE; + const int lane = threadIdx.x % WARP_SIZE; + const int num_warps = blockDim.x / WARP_SIZE; + const int qwarpid = threadid / num_warps; + const int qthreadid = threadid % num_warps; + float4 rowA_elem4[NUM_A_ROWS_PER_BLOCK]; + scalar2_t colB_elem4x, colB_elem4y, colB_elem4z, colB_elem4w; + float acc[NUM_A_ROWS_PER_BLOCK]; + scalar2_t acch2; + scalar2_t oval; + + // As we later use warp shuffle operations, we may have more threads in the + // block than the actual available data, hence the if guard here. + if (threadid * 8 < K) { +#pragma unroll + for (int i = 0; i < NUM_A_ROWS_PER_BLOCK; i++) { + // rowA_elem4[i] holds 8 * half numbers seen as a single float4. + rowA_elem4[i] = load_ntmprl(&af4[row_addr + threadid + K / 8 * i]); + } + } + + colB_elem4x = bf4[threadid * 4 + 0]; + colB_elem4y = bf4[threadid * 4 + 1]; + colB_elem4z = bf4[threadid * 4 + 2]; + colB_elem4w = bf4[threadid * 4 + 3]; + + scalar2_t Af2; + [[maybe_unused]] scalar2_t Bf2; + float2 S; + + auto Ah2ptr = reinterpret_cast(&rowA_elem4); + scalar2_t* ah2lptr; + +#pragma unroll + for (int i = 0; i < NUM_A_ROWS_PER_BLOCK; i++) { + // Multiply-add on 8 scalar_t. + ah2lptr = Ah2ptr + i * 4; + Af2 = *(ah2lptr); + acch2 = __hmul2(Af2, colB_elem4x); + Af2 = *(ah2lptr + 1); + acch2 = __hfma2(Af2, colB_elem4y, acch2); + Af2 = *(ah2lptr + 2); + acch2 = __hfma2(Af2, colB_elem4z, acch2); + Af2 = *(ah2lptr + 3); + acch2 = __hfma2(Af2, colB_elem4w, acch2); + S = __s22float2(acch2); + + // See comment above concerning the if guard. + acc[i] = (threadid * 8 < K ? S.x + S.y : 0.f); + } + +// all reduce across warp. +#pragma unroll + for (int mask = WARP_SIZE / 2; mask >= 1; mask /= 2) { +#pragma unroll + for (int i = 0; i < NUM_A_ROWS_PER_BLOCK; i++) { + acc[i] += __shfl_xor(acc[i], mask); + } + } + + // Warp leaders store the data to shared memory. + if (lane < NUM_A_ROWS_PER_BLOCK) { + red_smem[lane][warp] = acc[lane]; + } + + // Make sure the data is in shared memory. + __syncthreads(); + + if (qwarpid < NUM_A_ROWS_PER_BLOCK) { + acc[qwarpid] = qthreadid < num_warps ? red_smem[qwarpid][qthreadid] : 0.f; + for (int mask = num_warps / 2; mask >= 1; mask /= 2) { + acc[qwarpid] += __shfl_xor(acc[qwarpid], mask); + } + float oval2 = __shfl_xor(acc[qwarpid], num_warps); + + if (lane % (num_warps * 2) == 0) { + oval = __float22s2_rn(make_float2(acc[qwarpid], oval2)); + c[blockIdx.x * NUM_A_ROWS_PER_BLOCK / 2 + qwarpid / 2] = oval; + } + } +} + +torch::Tensor LLMM1(at::Tensor& in_a, at::Tensor& in_b, + const int64_t rows_per_block) { + auto M = in_a.size(0); + auto K = in_a.size(1); + auto N = in_b.size(0); + + TORCH_CHECK(N == 1, "Row number of activation tensor must be 1."); + TORCH_CHECK(in_a.dtype() == in_b.dtype()); + TORCH_CHECK(in_b.dtype() == torch::kFloat16 || + in_b.dtype() == torch::kBFloat16); + + auto out_c = torch::empty( + {N, M}, torch::TensorOptions().dtype(in_b.dtype()).device(in_b.device())); + + // NUM_TREADS need to be a multiple of WARP_SIZE, as we are using warp shuffle + // operations. + const int NUM_THREADS = + K * 2 / 16 % WARP_SIZE == 0 + ? K * 2 / 16 + : K * 2 / 16 + (WARP_SIZE - K * 2 / 16 % WARP_SIZE); + + int NUM_BLOCKS = M / rows_per_block; + + const at::cuda::OptionalCUDAGuard device_guard(device_of(in_b)); + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + // call the kernel function... + AT_DISPATCH_REDUCED_FLOATING_TYPES(in_b.scalar_type(), "LLGemm1", [&] { + auto a_ptr = in_a.data_ptr(); + auto b_ptr = in_b.data_ptr(); + auto c_ptr = out_c.data_ptr(); + if (rows_per_block == 2) { + LLGemm1_kernel + <<>>(a_ptr, b_ptr, c_ptr, K); + } else if (rows_per_block == 4) { + LLGemm1_kernel + <<>>(a_ptr, b_ptr, c_ptr, K); + } else if (rows_per_block == 8) { + LLGemm1_kernel + <<>>(a_ptr, b_ptr, c_ptr, K); + } else if (rows_per_block == 16) { + LLGemm1_kernel + <<>>(a_ptr, b_ptr, c_ptr, K); + } else { + NUM_BLOCKS = M / 4; + LLGemm1_kernel + <<>>(a_ptr, b_ptr, c_ptr, K); + } + }); + + return out_c; +} + +#define DOT2C(V0, V2, V3) \ + if constexpr (std::is_same_v) { \ + asm("v_dot2c_f32_f16 %0, %2, %3" : "=v"(V0) : "0"(V0), "v"(V2), "v"(V3)); \ + } else if constexpr (std::is_same_v) { \ + float2 s = __bfloat1622float2(*((__hip_bfloat162*)(&(V2)))) * \ + __bfloat1622float2(*((__hip_bfloat162*)(&(V3)))); \ + V0 += (s.x + s.y); \ + } + +#if defined(__HIP__MI300_MI250__) // TODO: Add NAVI support +// This version targets cases where A[] fits LDS capacity +template +__global__ void __launch_bounds__(WvPrGrp* THRDS) + wvSplitK_hf_sml_(const int K, const int M, const scalar_t* B, + const scalar_t* __restrict__ A, scalar_t* C, + const int _WvPrGrp, const int CuCount) { + using scalar8 = + __attribute__((__vector_size__((A_CHUNK / 2) * sizeof(float)))) float; + union bigType { + scalar_t h[A_CHUNK]; + float f[A_CHUNK / 2]; + float2 f2[A_CHUNK / 4]; + double d[A_CHUNK / 4]; + scalar8 h8; + }; + + //---------------------------------------------------- + // Reserving 64 KB of LDS to have 1 WG / CU + // Goal is to bring the activation matrix A to the LDS + // and use it across the lifetime of the work group + // TODO: When activation matrix is larger than 64 KB + // then this is not goint to work! + //---------------------------------------------------- + __shared__ scalar_t s[1024 * 32]; + + //---------------------------------------------------- + // Fetch the activation matrix to LDS + // Loop iteration: + // - Each thread (lane) is fetching 8 elements (A_Chunk) + // - Each wave will fetch 64*8=> 512 elements + // - Each WG will fetch 512 * 16 => 8K elements + // - Then the WG will move to another 8 K elements + // TODO: Logic below will only work when K is multiple of 8 + //---------------------------------------------------- + for (uint32_t k = 0; k < min(K * N, 32 * 1024); + k += THRDS * WvPrGrp * A_CHUNK) { + uint32_t k_in = k + ((threadIdx.y * THRDS + threadIdx.x) * A_CHUNK); + + if (k_in >= min(K * N, 32 * 1024)) break; + + *((bigType*)(&s[k_in])) = *((bigType*)(&A[k_in])); + } + __syncthreads(); + + if (threadIdx.y >= _WvPrGrp) return; + + uint32_t m = (blockIdx.x * _WvPrGrp + (threadIdx.y % _WvPrGrp)) * YTILE; + + float sum[N][YTILE]; + + //---------------------------------------------------- + // Each wave works on a single column of weight matrix. + // There are 16 waves per WG, and hence, each WG is + // working on 16 columns of weight matrix. Moreover, + // we tile in column direction by YTILE, so when YTILE=1 + // the above math is right, however, when YTILE=2 then + // each wave will be working on 2 columns and WG will + // be working on 32 columns. + // + // Top level loop that makes WGs persistent! + // - WGs iterates across columns of weight matrix + // - Each wave within WG works on a given column(s) + // - After completing first set of columns, WGs start + // working on the next set of available columns + //---------------------------------------------------- + while (m < M) { + //---------------------------------------------------- + // 'sum' accumulates the matrix A x B computation + // split across 64 lanes. + // + // YTILE represents how many column of weight matrix + // are being worked on by each wave. + //---------------------------------------------------- + for (int i = 0; i < YTILE; i++) + for (int n = 0; n < N; n++) sum[n][i] = 0; + + bigType bigA[N][UNRL]; + bigType bigB[YTILE][UNRL]; + //---------------------------------------------------- + // Fetch weight matrix B in interleaved K-split! + // - Each thread (lane) is fetching 8 elements (A_Chunk) + // - Each wave will fetch 64*8=> 512 elements (1024B) + // - YTILE represents the number of column being serviced + // by wave + // - Loop for fetching weight matrix (B) are unrolled + // + // Fetch activation matrix A from LDS + // - Loop for fetching activation matrix (A) are unrolled + // + // Finally, do the matrix multiplication in an unrolled + // fashion. This provides lot of food for compiler + // scheduling. + // + // TODO: Logic below will only work when K is multiple of 8 + //---------------------------------------------------- + // for (uint32_t k1 = 0; k1 < K; k1 += THRDS * A_CHUNK * UNRL) { + for (uint32_t k1 = 0; k1 < K; k1 += THRDS * A_CHUNK * UNRL) { + // Fetch the weight matrix from memory! + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + const scalar_t* B_ = &B[(m + 0) * K + k_]; + bigB[0][k2].h8 = (loadnt((scalar8*)(&B_[0 * K]))); + //---------------------------------------------------- + // The following code with YTILE > 1 has to be deleted + //---------------------------------------------------- + if constexpr (YTILE >= 2) + bigB[1][k2].h8 = (loadnt((scalar8*)(&B_[1 * K]))); + if constexpr (YTILE >= 3) + bigB[2][k2].h8 = (loadnt((scalar8*)(&B_[2 * K]))); + if constexpr (YTILE >= 4) + bigB[3][k2].h8 = (loadnt((scalar8*)(&B_[3 * K]))); + if constexpr (YTILE >= 5) + bigB[4][k2].h8 = (loadnt((scalar8*)(&B_[4 * K]))); + if constexpr (YTILE >= 6) + bigB[5][k2].h8 = (loadnt((scalar8*)(&B_[5 * K]))); + if constexpr (YTILE >= 7) + bigB[6][k2].h8 = (loadnt((scalar8*)(&B_[6 * K]))); + if constexpr (YTILE >= 8) + bigB[7][k2].h8 = (loadnt((scalar8*)(&B_[7 * K]))); + } + + // Fetch activation matrix from either just LDS or from both LDS / memory + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + // Fetch A activation matrix in interleaved fashion from LDS or memory + + for (int n = 0; n < N; n++) { + bigA[n][k2] = *((const bigType*)(&(s[k_ + K * n]))); + } + } + + // Do the matrix multiplication in interleaved manner + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + // Do the matrix multiplication of activation and weight matrix + // - Remember the accumulation is happening for K-split of 64! + #pragma unroll + for (uint32_t n = 0; n < N; n++) { + #pragma unroll + for (uint32_t b = 0; b < A_CHUNK / 2; b++) { + DOT2C(sum[n][0], bigA[n][k2].f[b], bigB[0][k2].f[b]) + //---------------------------------------------------- + // The following code with YTILE > 1 + //---------------------------------------------------- + if constexpr (YTILE >= 2) { + DOT2C(sum[n][1], bigA[n][k2].f[b], bigB[1][k2].f[b]); + } + if constexpr (YTILE >= 3) { + DOT2C(sum[n][2], bigA[n][k2].f[b], bigB[2][k2].f[b]); + } + if constexpr (YTILE >= 4) { + DOT2C(sum[n][3], bigA[n][k2].f[b], bigB[3][k2].f[b]); + } + if constexpr (YTILE >= 5) { + DOT2C(sum[n][4], bigA[n][k2].f[b], bigB[4][k2].f[b]); + } + if constexpr (YTILE >= 6) { + DOT2C(sum[n][5], bigA[n][k2].f[b], bigB[5][k2].f[b]); + } + if constexpr (YTILE >= 7) { + DOT2C(sum[n][6], bigA[n][k2].f[b], bigB[6][k2].f[b]); + } + if constexpr (YTILE >= 8) { + DOT2C(sum[n][7], bigA[n][k2].f[b], bigB[7][k2].f[b]); + } + } + } + } + } + + //---------------------------------------------------- + // Final reduction step using shuffle + //---------------------------------------------------- + for (int n = 0; n < N; n++) { + for (int y = 0; y < YTILE; y++) { + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:8 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:4 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:2 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 wave_shr:1 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_bcast:15 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_bcast:31 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + } + } + if (threadIdx.x == 63) { + for (int n = 0; n < N; n++) { + for (int i = 0; i < YTILE; i++) { + // if (commitColumn[i]) C[m + i + n * M] = __float2half(sum[n][i]); + C[m + i + n * M] = __float2s(sum[n][i]); + } + } + } + + m += CuCount * _WvPrGrp * YTILE; + } +} +#else // !defined(__HIP__MI300_MI250__) TODO: Add NAVI support +template +__global__ void wvSplitK_hf_sml_(const int K, const int M, const scalar_t* B, + const scalar_t* __restrict__ A, scalar_t* C, + const int _WvPrGrp, const int CuCount) { + UNREACHABLE_CODE +} +#endif // defined(__HIP__MI300_MI250__) TODO: Add NAVI support + +#if defined(__HIP__MI300_MI250__) // TODO: Add NAVI support +// This version targets cases where A[] marginally exceeds LDS capacity +template +__global__ void __launch_bounds__(WvPrGrp* THRDS) + wvSplitK_hf_(const int K, const int M, const scalar_t* B, + const scalar_t* __restrict__ A, scalar_t* C, + const int _WvPrGrp, const int CuCount) { + using scalar8 = + __attribute__((__vector_size__((A_CHUNK / 2) * sizeof(float)))) float; + union bigType { + scalar_t h[A_CHUNK]; + float f[A_CHUNK / 2]; + float2 f2[A_CHUNK / 4]; + double d[A_CHUNK / 4]; + scalar8 h8; + }; + + //---------------------------------------------------- + // Reserving 64 KB of LDS to have 1 WG / CU + // Goal is to bring the activation matrix A to the LDS + // and use it across the lifetime of the work group + // TODO: When activation matrix is larger than 64 KB + // then this is not goint to work! + //---------------------------------------------------- + __shared__ scalar_t s[1024 * 32]; + + //---------------------------------------------------- + // Computation of columns that need to be committed to memory! + //---------------------------------------------------- + uint32_t commitColumn[YTILE]; + for (uint32_t i = 0; i < YTILE; i++) { + commitColumn[i] = 1; + } + + //---------------------------------------------------- + // Indexing function into the column of weight matrix B + // Algorithm does 64 lane k-splitting / wave and uses + // WG ID and Thread ID to find the index. + //---------------------------------------------------- + // int _WvPrGrp = mindiv(N, CuCount * YTILE, WvPrGrp); + uint32_t m = (blockIdx.x * _WvPrGrp + threadIdx.y) * YTILE; + + // Check whether there will be fragmenation! + // This will happen only for the last wave! + if (m < M && (m + YTILE) >= M) { + uint32_t startColumn = M - YTILE; + for (uint32_t i = 0; i < (m - startColumn); i++) { + commitColumn[i] = 0; + } + m = startColumn; + } + + //---------------------------------------------------- + // Fetch the activation matrix to LDS + // Loop iteration: + // - Each thread (lane) is fetching 8 elements (A_Chunk) + // - Each wave will fetch 64*8=> 512 elements + // - Each WG will fetch 512 * 16 => 8K elements + // - Then the WG will move to another 8 K elements + // TODO: Logic below will only work when K is multiple of 8 + //---------------------------------------------------- + for (uint32_t k = 0; k < min(K * N, 32 * 1024); + k += THRDS * WvPrGrp * A_CHUNK) { + uint32_t k_in = k + ((threadIdx.y * THRDS + threadIdx.x) * A_CHUNK); + + if (k_in >= min(K * N, 32 * 1024)) break; + + *((bigType*)(&s[k_in])) = *((bigType*)(&A[k_in])); + } + + __syncthreads(); + + if (threadIdx.y >= _WvPrGrp) return; + + float sum[N][YTILE]; + + //---------------------------------------------------- + // Each wave works on a single column of weight matrix. + // There are 16 waves per WG, and hence, each WG is + // working on 16 columns of weight matrix. Moreover, + // we tile in column direction by YTILE, so when YTILE=1 + // the above math is right, however, when YTILE=2 then + // each wave will be working on 2 columns and WG will + // be working on 32 columns. + // + // Top level loop that makes WGs persistent! + // - WGs iterates across columns of weight matrix + // - Each wave within WG works on a given column(s) + // - After completing first set of columns, WGs start + // working on the next set of available columns + //---------------------------------------------------- + while (m < M) { + //---------------------------------------------------- + // 'sum' accumulates the matrix A x B computation + // split across 64 lanes. + // + // YTILE represents how many column of weight matrix + // are being worked on by each wave. + //---------------------------------------------------- + for (int i = 0; i < YTILE; i++) + for (int n = 0; n < N; n++) sum[n][i] = 0; + + bigType bigA[N][UNRL]; + bigType bigB[YTILE][UNRL]; + //---------------------------------------------------- + // Fetch weight matrix B in interleaved K-split! + // - Each thread (lane) is fetching 8 elements (A_Chunk) + // - Each wave will fetch 64*8=> 512 elements (1024B) + // - YTILE represents the number of column being serviced + // by wave + // - Loop for fetching weight matrix (B) are unrolled + // + // Fetch activation matrix A from LDS + // - Loop for fetching activation matrix (A) are unrolled + // + // Finally, do the matrix multiplication in an unrolled + // fashion. This provides lot of food for compiler + // scheduling. + // + // TODO: Logic below will only work when K is multiple of 8 + //---------------------------------------------------- + for (uint32_t k1 = 0; k1 < K; k1 += THRDS * A_CHUNK * UNRL) { + // Fetch the weight matrix from memory! + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + const scalar_t* B_ = &B[(m + 0) * K + k_]; + bigB[0][k2].h8 = (loadnt((scalar8*)(&B_[0 * K]))); + //---------------------------------------------------- + // The following code with YTILE > 1 has to be deleted + //---------------------------------------------------- + if constexpr (YTILE >= 2) + bigB[1][k2].h8 = (loadnt((scalar8*)(&B_[1 * K]))); + if constexpr (YTILE >= 3) + bigB[2][k2].h8 = (loadnt((scalar8*)(&B_[2 * K]))); + if constexpr (YTILE >= 4) + bigB[3][k2].h8 = (loadnt((scalar8*)(&B_[3 * K]))); + if constexpr (YTILE >= 5) + bigB[4][k2].h8 = (loadnt((scalar8*)(&B_[4 * K]))); + if constexpr (YTILE >= 6) + bigB[5][k2].h8 = (loadnt((scalar8*)(&B_[5 * K]))); + if constexpr (YTILE >= 7) + bigB[6][k2].h8 = (loadnt((scalar8*)(&B_[6 * K]))); + if constexpr (YTILE >= 8) + bigB[7][k2].h8 = (loadnt((scalar8*)(&B_[7 * K]))); + } + + // Fetch activation matrix from either just LDS or from both LDS / memory + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + // Fetch A activation matrix in interleaved fashion from LDS or memory + + for (int n = 0; n < N; n++) { + if (k_ + K * n < 32 * 1024) + bigA[n][k2] = *((const bigType*)(&(s[k_ + K * n]))); + else + bigA[n][k2] = *((const bigType*)(&(A[k_ + K * n]))); + } + } + + // Do the matrix multiplication in interleaved manner + #pragma unroll + for (uint32_t n = 0; n < N; n++) { + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + // Do the matrix multiplication of activation and weight matrix + // - Remember the accumulation is happening for K-split of 64! + #pragma unroll + for (uint32_t b = 0; b < A_CHUNK / 2; b++) { + DOT2C(sum[n][0], bigA[n][k2].f[b], bigB[0][k2].f[b]); + //---------------------------------------------------- + // The following code with YTILE > 1 + //---------------------------------------------------- + if constexpr (YTILE >= 2) { + DOT2C(sum[n][1], bigA[n][k2].f[b], bigB[1][k2].f[b]); + } + if constexpr (YTILE >= 3) { + DOT2C(sum[n][2], bigA[n][k2].f[b], bigB[2][k2].f[b]); + } + if constexpr (YTILE >= 4) { + DOT2C(sum[n][3], bigA[n][k2].f[b], bigB[3][k2].f[b]); + } + if constexpr (YTILE >= 5) { + DOT2C(sum[n][4], bigA[n][k2].f[b], bigB[4][k2].f[b]); + } + if constexpr (YTILE >= 6) { + DOT2C(sum[n][5], bigA[n][k2].f[b], bigB[5][k2].f[b]); + } + if constexpr (YTILE >= 7) { + DOT2C(sum[n][6], bigA[n][k2].f[b], bigB[6][k2].f[b]); + } + if constexpr (YTILE >= 8) { + DOT2C(sum[n][7], bigA[n][k2].f[b], bigB[7][k2].f[b]); + } + } + } + } + } + + //---------------------------------------------------- + // Final reduction step using shuffle + //---------------------------------------------------- + for (int n = 0; n < N; n++) { + for (int y = 0; y < YTILE; y++) { + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:8 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:4 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:2 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 wave_shr:1 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_bcast:15 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_bcast:31 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + } + } + + if (threadIdx.x == 63) { + for (int n = 0; n < N; n++) { + for (int i = 0; i < YTILE; i++) { + if (commitColumn[i]) + C[m + i + n * M] = __float2s(sum[n][i]); + } + } + } + + m += CuCount * _WvPrGrp * YTILE; + + // Check whether there will be fragmenation! + // This will happen only for the last wave! + if (m < M && (m + YTILE) >= M) { + uint32_t startColumn = M - YTILE; + for (uint32_t i = 0; i < (m - startColumn); i++) { + commitColumn[i] = 0; + } + m = startColumn; + } + } +} + +#else // !defined(__HIP__MI300_MI250__) TODO: Add NAVI support +template +__global__ void wvSplitK_hf_(const int K, const int M, const scalar_t* B, + const scalar_t* __restrict__ A, scalar_t* C, + const int _WvPrGrp, const int CuCount) { + UNREACHABLE_CODE +} +#endif // defined(__HIP__MI300_MI250__) TODO: Add NAVI support + +#if defined(__HIP__MI300_MI250__) // TODO: Add NAVI support +// This version targets big A[] cases, where it is much larger than LDS capacity +template +__global__ void __launch_bounds__(WvPrGrp* THRDS) + wvSplitK_hf_big_(const int K, const int M, const scalar_t* B, + const scalar_t* __restrict__ A, scalar_t* C, + const int _WvPrGrp, const int CuCount) { + using scalar8 = + __attribute__((__vector_size__((A_CHUNK / 2) * sizeof(float)))) float; + + union bigType { + scalar_t h[A_CHUNK]; + float f[A_CHUNK / 2]; + float2 f2[A_CHUNK / 4]; + double d[A_CHUNK / 4]; + scalar8 h8; + }; + + //---------------------------------------------------- + // Reserving 64 KB of LDS to have 1 WG / CU + // Goal is to bring the activation matrix A to the LDS + // and use it across the lifetime of the work group + // TODO: When activation matrix is larger than 64 KB + // then this is not goint to work! + //---------------------------------------------------- + __shared__ scalar_t s[1024 * 32]; + + //---------------------------------------------------- + // Computation of columns that need to be committed to memory! + //---------------------------------------------------- + uint32_t commitColumn[YTILE]; + for (uint32_t i = 0; i < YTILE; i++) { + commitColumn[i] = 1; + } + + // int _WvPrGrp = mindiv(N, CuCount * YTILE, WvPrGrp); + if (threadIdx.y >= _WvPrGrp) return; + + //---------------------------------------------------- + // Indexing function into the column of weight matrix B + // Algorithm does 64 lane k-splitting / wave and uses + // WG ID and Thread ID to find the index. + //---------------------------------------------------- + uint32_t m = (blockIdx.x * _WvPrGrp + threadIdx.y) * YTILE; + + // Check whether there will be fragmenation! + // This will happen only for the last wave! + if (m < M && (m + YTILE) >= M) { + uint32_t startColumn = M - YTILE; + for (uint32_t i = 0; i < (m - startColumn); i++) { + commitColumn[i] = 0; + } + m = startColumn; + } + + //---------------------------------------------------- + // Fetch the activation matrix to LDS + // Loop iteration: + // - Each thread (lane) is fetching 8 elements (A_Chunk) + // - Each wave will fetch 64*8=> 512 elements + // - Each WG will fetch 512 * 16 => 8K elements + // - Then the WG will move to another 8 K elements + // TODO: Logic below will only work when K is multiple of 8 + //---------------------------------------------------- + #define PCML + #ifndef PCML + for (uint32_t k = 0; k < min(K * N, 32 * 1024); + k += THRDS * WvPrGrp * A_CHUNK) { + uint32_t k_in = k + ((threadIdx.y * THRDS + threadIdx.x) * A_CHUNK); + + if (k_in >= min(K * N, 32 * 1024)) break; + + *((bigType*)(&s[k_in])) = *((bigType*)(&A[k_in])); + } + __syncthreads(); + #endif + + #define TUC (THRDS * UNRL * A_CHUNK) + uint32_t kBase = 0; + // find biggest k size that fits in LDS + uint32_t kFit = (32 * 1024) / N; + // kFit = (kFit%TWC==0) ? kFit : (kFit-kFit%TWC+TWC); //round up to multiple + // of TUC + kFit = (kFit % TUC == 0) + ? kFit + : (kFit - kFit % TUC); // round up to multiple of TUC + // if (kFit == 0) kFit = TUC; + kFit = min(kFit, K); + + float sum[N][YTILE]; + + //---------------------------------------------------- + // Each wave works on a single column of weight matrix. + // There are 16 waves per WG, and hence, each WG is + // working on 16 columns of weight matrix. Moreover, + // we tile in column direction by YTILE, so when YTILE=1 + // the above math is right, however, when YTILE=2 then + // each wave will be working on 2 columns and WG will + // be working on 32 columns. + // + // Top level loop that makes WGs persistent! + // - WGs iterates across columns of weight matrix + // - Each wave within WG works on a given column(s) + // - After completing first set of columns, WGs start + // working on the next set of available columns + //---------------------------------------------------- + #ifdef PCML + int YW = (YTILE * _WvPrGrp); + uint32_t Mrndp = (M % YW == 0) ? M : (M - M % YW + YW); + while (m < Mrndp) { + #else + while (m < M) { + #endif + //---------------------------------------------------- + // 'sum' accumulates the matrix A x B computation + // split across 64 lanes. + // + // YTILE represents how many column of weight matrix + // are being worked on by each wave. + //---------------------------------------------------- + for (int i = 0; i < YTILE; i++) + for (int n = 0; n < N; n++) sum[n][i] = 0; + + bigType bigA[N][UNRL]; + bigType bigB[YTILE][UNRL]; + //---------------------------------------------------- + // Fetch weight matrix B in interleaved K-split! + // - Each thread (lane) is fetching 8 elements (A_Chunk) + // - Each wave will fetch 64*8=> 512 elements (1024B) + // - YTILE represents the number of column being serviced + // by wave + // - Loop for fetching weight matrix (B) are unrolled + // + // Fetch activation matrix A from LDS + // - Loop for fetching activation matrix (A) are unrolled + // + // Finally, do the matrix multiplication in an unrolled + // fashion. This provides lot of food for compiler + // scheduling. + // + // TODO: Logic below will only work when K is multiple of 8 + //---------------------------------------------------- + for (uint32_t k1 = 0; k1 < K; k1 += THRDS * A_CHUNK * UNRL) { + #ifdef PCML + if ((k1 == 0) || (k1 == kBase + kFit)) { // load next chunk of A[] to LDS + if (k1 != 0) kBase += kFit; + __syncthreads(); + for (uint32_t k = 0; k < kFit; k += THRDS * _WvPrGrp * A_CHUNK) { + uint32_t kOff = k + ((threadIdx.y * THRDS + threadIdx.x) * A_CHUNK); + if (kBase + kOff >= K) break; + if (kOff >= kFit) break; + for (uint32_t n = 0; n < N; n++) { + uint32_t k_in = kBase + n * K + kOff; + uint32_t k_ot = n * kFit + kOff; + *((bigType*)(&s[k_ot])) = *((bigType*)(&A[k_in])); + } + } + __syncthreads(); + } + if (m >= M) continue; + #endif + + // Fetch the weight matrix from memory! + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + const scalar_t* B_ = &B[(m + 0) * K + k_]; + bigB[0][k2].h8 = (loadnt((scalar8*)(&B_[0 * K]))); + //---------------------------------------------------- + // The following code with YTILE > 1 has to be deleted + //---------------------------------------------------- + if constexpr (YTILE >= 2) + bigB[1][k2].h8 = (loadnt((scalar8*)(&B_[1 * K]))); + if constexpr (YTILE >= 3) + bigB[2][k2].h8 = (loadnt((scalar8*)(&B_[2 * K]))); + if constexpr (YTILE >= 4) + bigB[3][k2].h8 = (loadnt((scalar8*)(&B_[3 * K]))); + if constexpr (YTILE >= 5) + bigB[4][k2].h8 = (loadnt((scalar8*)(&B_[4 * K]))); + if constexpr (YTILE >= 6) + bigB[5][k2].h8 = (loadnt((scalar8*)(&B_[5 * K]))); + if constexpr (YTILE >= 7) + bigB[6][k2].h8 = (loadnt((scalar8*)(&B_[6 * K]))); + if constexpr (YTILE >= 8) + bigB[7][k2].h8 = (loadnt((scalar8*)(&B_[7 * K]))); + } + + // Fetch activation matrix from either just LDS or from both LDS / memory + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + // Fetch A activation matrix in interleaved fashion from LDS or memory + + for (int n = 0; n < N; n++) { + #ifdef PCML + bigA[n][k2] = *((const bigType*)(&(s[k_ - kBase + kFit * n]))); + #else + if (k_ + K * n < 32 * 1024) + bigA[n][k2] = *((const bigType*)(&(s[k_ + K * n]))); + else + bigA[n][k2] = *((const bigType*)(&(A[k_ + K * n]))); + #endif + } + } + + // Do the matrix multiplication in interleaved manner + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + #pragma unroll + for (uint32_t n = 0; n < N; n++) { + // Do the matrix multiplication of activation and weight matrix + // - Remember the accumulation is happening for K-split of 64! + #pragma unroll + for (uint32_t b = 0; b < A_CHUNK / 2; b++) { + DOT2C(sum[n][0], bigA[n][k2].f[b], bigB[0][k2].f[b]); + //---------------------------------------------------- + // The following code with YTILE > 1 + //---------------------------------------------------- + if constexpr (YTILE >= 2) { + DOT2C(sum[n][1], bigA[n][k2].f[b], bigB[1][k2].f[b]); + } + if constexpr (YTILE >= 3) { + DOT2C(sum[n][2], bigA[n][k2].f[b], bigB[2][k2].f[b]); + } + if constexpr (YTILE >= 4) { + DOT2C(sum[n][3], bigA[n][k2].f[b], bigB[3][k2].f[b]); + } + if constexpr (YTILE >= 5) { + DOT2C(sum[n][4], bigA[n][k2].f[b], bigB[4][k2].f[b]); + } + if constexpr (YTILE >= 6) { + DOT2C(sum[n][5], bigA[n][k2].f[b], bigB[5][k2].f[b]); + } + if constexpr (YTILE >= 7) { + DOT2C(sum[n][6], bigA[n][k2].f[b], bigB[6][k2].f[b]); + } + if constexpr (YTILE >= 8) { + DOT2C(sum[n][7], bigA[n][k2].f[b], bigB[7][k2].f[b]); + } + } + } + } + } + + #ifdef PCML + if (m >= M) { + m += CuCount * _WvPrGrp * YTILE; + kBase = 0; + continue; + } + #endif + + //---------------------------------------------------- + // Final reduction step using shuffle + //---------------------------------------------------- + for (int n = 0; n < N; n++) { + for (int y = 0; y < YTILE; y++) { + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:8 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:4 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_shr:2 bound_ctrl:0 " + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 wave_shr:1 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_bcast:15 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + asm("s_nop 0\n\tv_add_f32 %0, %2, %3 row_bcast:31 bound_ctrl:0" + : "=v"(sum[n][y]) + : "0"(sum[n][y]), "v"(sum[n][y]), "v"(sum[n][y])); + } + } + + if (threadIdx.x == 63) { + for (int n = 0; n < N; n++) { + for (int i = 0; i < YTILE; i++) { + if (commitColumn[i]) + C[m + i + n * M] = __float2s(sum[n][i]); + } + } + } + + m += CuCount * _WvPrGrp * YTILE; + kBase = 0; + + // Check whether there will be fragmenation! + // This will happen only for the last wave! + if (m < M && (m + YTILE) >= M) { + uint32_t startColumn = M - YTILE; + for (uint32_t i = 0; i < (m - startColumn); i++) { + commitColumn[i] = 0; + } + m = startColumn; + } + } +} +#else // !defined(__HIP__MI300_MI250__) TODO: Add NAVI support +template +__global__ void wvSplitK_hf_big_(const int K, const int M, const scalar_t* B, + const scalar_t* __restrict__ A, scalar_t* C, + const int _WvPrGrp, const int CuCount) { + UNREACHABLE_CODE +} +#endif // defined(__HIP__MI300_MI250__) TODO: Add NAVI support + +int mindiv(int N, int div1, int div2) { + int nPrRnd = div1 * div2; + int rnds0 = N / nPrRnd; + nPrRnd -= div1 * 3; + int rnds3 = N / nPrRnd; + nPrRnd -= div1; + int rnds4 = N / nPrRnd; + nPrRnd -= div1; + int rnds5 = N / nPrRnd; + nPrRnd -= div1; + int rnds6 = N / nPrRnd; + nPrRnd -= div1; + int rnds7 = N / nPrRnd; + nPrRnd -= div1; + int rnds8 = N / nPrRnd; + nPrRnd -= div1; + int rnds9 = N / nPrRnd; + nPrRnd -= div1; + int rtn = div2; + if (rnds0 == rnds3) rtn = div2 - 3; + if (rnds0 == rnds4) rtn = div2 - 4; + if (rnds0 == rnds5) rtn = div2 - 5; + if (rnds0 == rnds6) rtn = div2 - 6; + if (rnds0 == rnds7) rtn = div2 - 7; + if (rnds0 == rnds8) rtn = div2 - 8; + if (rnds0 == rnds9) rtn = div2 - 9; + return rtn; +} + +torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b, + const int64_t CuCount) { + auto M_in = in_a.size(0); + auto K_in = in_a.size(1); + auto N_in = in_b.size(0); + + TORCH_CHECK(in_a.dtype() == in_b.dtype()); + TORCH_CHECK(K_in % 8 == 0, "k % 8 == 0"); + TORCH_CHECK(in_a.dtype() == torch::kFloat16 || + in_a.dtype() == torch::kBFloat16); + + auto out_c = torch::empty( + {N_in, M_in}, + torch::TensorOptions().dtype(in_b.dtype()).device(in_b.device())); + + dim3 grid(CuCount); + + const at::cuda::OptionalCUDAGuard device_guard(device_of(in_a)); + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + +#define WVSPLITK(_WvPrGrp, _YTILEs, _YTILEm, _YTILEb, _UNRLs, _UNRLm, _UNRLb, \ + _N) \ + { \ + dim3 block(64, _WvPrGrp); \ + if ((K_in * N_in <= 32 * 1024) && (M_in % _YTILEs == 0)) { \ + int __wvPrGrp = mindiv(M_in, CuCount * _YTILEs, _WvPrGrp); \ + wvSplitK_hf_sml_ \ + <<>>(K_in, M_in, af4, bf4, c, __wvPrGrp, \ + CuCount); \ + } else if (K_in * N_in <= 32 * 1024 * 1.2) { \ + int __wvPrGrp = mindiv(M_in, CuCount * _YTILEm, _WvPrGrp); \ + wvSplitK_hf_ \ + <<>>(K_in, M_in, af4, bf4, c, __wvPrGrp, \ + CuCount); \ + } else { \ + int __wvPrGrp = mindiv(M_in, CuCount * _YTILEb, _WvPrGrp); \ + wvSplitK_hf_big_ \ + <<>>(K_in, M_in, af4, bf4, c, __wvPrGrp, \ + CuCount); \ + } \ + } + + AT_DISPATCH_REDUCED_FLOATING_TYPES(in_b.scalar_type(), "wvSplitK", [&] { + using fptype = typename scalar::type; + fptype* af4 = reinterpret_cast(in_a.data_ptr()); + const fptype* bf4 = reinterpret_cast(in_b.data_ptr()); + fptype* c = reinterpret_cast(out_c.data_ptr()); + switch (N_in) { + case 1: + WVSPLITK(16, 2, 2, 2, 2, 2, 2, 1) + break; + case 2: + WVSPLITK(16, 2, 2, 2, 2, 2, 2, 2) + break; + case 3: + WVSPLITK(16, 4, 7, 7, 1, 1, 1, 3) + break; + case 4: + WVSPLITK(16, 4, 7, 7, 1, 1, 1, 4) + break; + default: + throw std::runtime_error( + "Unsupported N value: " + std::to_string(M_in) + "," + + std::to_string(K_in) + "," + std::to_string(N_in)); + } + }); + return out_c; +} + +#if defined(__HIP__MI300__) // TODO: Add NAVI support +template +__global__ void __launch_bounds__(WvPrGrp* THRDS) + wvSplitKQ_hf_sml_(const int K, const int Kp, const int M, const fp8_t* B, + const fp8_t* __restrict__ A, scalar_t* C, + const float* __restrict__ s_A, + const float* __restrict__ s_B, const int _WvPrGrp, + const int CuCount) { + using scalar8 = + __attribute__((__vector_size__((A_CHUNK / 4) * sizeof(float)))) float; + using intx2 = __attribute__((__vector_size__(2 * sizeof(int)))) int; + using intx4 = __attribute__((__vector_size__(4 * sizeof(int)))) int; + union bigType { + char f8[A_CHUNK]; + char2 c2[A_CHUNK / 2]; + scalar_t h[A_CHUNK / 2]; + float f[A_CHUNK / 4]; + int i[A_CHUNK / 4]; + long l[A_CHUNK / 8]; + intx4 l2[A_CHUNK / 16]; + scalar8 h8; + }; + + __shared__ fp8_t s[1024 * 64]; + + for (uint32_t k = (threadIdx.y * THRDS + threadIdx.x) * A_CHUNK; + k < min(K * N, 64 * 1024); k += THRDS * WvPrGrp * A_CHUNK) { + *((bigType*)(&s[k])) = *((bigType*)(&A[k])); + } + __syncthreads(); + + if (threadIdx.y >= _WvPrGrp) return; + + uint32_t m = (blockIdx.x * _WvPrGrp + (threadIdx.y % _WvPrGrp)) * YTILE; + + using floatx16 = __attribute__((__vector_size__(16 * sizeof(float)))) float; + floatx16 sum[N][YTILE]; + float sA = *s_A; + float sB = *s_B; + + while (m < M) { + for (int i = 0; i < YTILE; i++) + for (int n = 0; n < N; n++) sum[n][i] = {0.f}; + + bigType bigA[N][UNRL]; + bigType bigB[YTILE][UNRL]; + + for (uint32_t k1 = 0; k1 < K; k1 += THRDS * A_CHUNK * UNRL) { + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + #pragma unroll + for (uint32_t n = 0; n < N; ++n) bigA[n][k2].h8 = {0.f}; + #pragma unroll + for (uint32_t y = 0; y < YTILE; ++y) bigB[y][k2].h8 = {0.f}; + } + + // Fetch the weight matrix from memory! + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + const fp8_t* B_ = &B[(m + 0) * Kp + k_]; + #pragma unroll + for (uint32_t y = 0; y < YTILE; ++y) { + bigB[y][k2].h8 = (loadnt((scalar8*)(&B_[y * Kp]))); + } + } + + // Fetch activation matrix from either just LDS or from both LDS / memory + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + for (int n = 0; n < N; n++) { + bigA[n][k2] = *((const bigType*)(&(s[k_ + K * n]))); + } + } + + // Do the matrix multiplication in interleaved manner + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + if (k >= K) break; + + for (uint32_t n = 0; n < N; n++) { + for (int i = 0; i < A_CHUNK; i += 8) { + for (int y = 0; y < YTILE; ++y) { + sum[n][y] = __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8( + bigA[n][k2].l[i / 8], bigB[y][k2].l[i / 8], sum[n][y], 0, 0, + 0); + } + } + } + } + } + + // Final reduction + for (int n = 0; n < N; n++) { + for (int y = 0; y < YTILE; y++) { + float accm0 = sum[n][y][0]; + float accm16 = sum[n][y][8]; + asm("v_add_f32 %0, %2, %3 row_shl:1 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][1]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:1 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][9]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:2 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][2]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:2 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][10]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:3 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][3]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:3 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][11]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:8 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][4]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:8 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][12]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:9 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][5]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:9 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][13]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:10 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][6]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:10 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][14]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:11 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][7]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:11 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][15]), "v"(accm16)); + accm0 += __shfl(accm0, 36); + accm16 += __shfl(accm16, 52); + sum[n][y][0] = accm0 + __shfl(accm16, 16); + } + } + + if (threadIdx.x == 0) { + for (int n = 0; n < N; n++) { + for (int y = 0; y < YTILE; y++) { + C[m + y + n * M] = __float2s(sum[n][y][0] * sA * sB); + } + } + } + + m += CuCount * _WvPrGrp * YTILE; + } +} +#else // !defined(__HIP__MI300__) TODO: Add NAVI support +template +__global__ void wvSplitKQ_hf_sml_(const int K, const int Kp, const int M, + const fp8_t* B, const fp8_t* __restrict__ A, + scalar_t* C, const float* __restrict__ s_A, + const float* __restrict__ s_B, + const int _WvPrGrp, const int CuCount) { + UNREACHABLE_CODE +} +#endif // defined(__HIP__MI300__) TODO: Add NAVI support + +#if defined(__HIP__MI300__) // TODO: Add NAVI support +template +__global__ void __launch_bounds__(WvPrGrp* THRDS) + wvSplitKQ_hf_(const int K, const int Kp, const int M, const fp8_t* B, + const fp8_t* __restrict__ A, scalar_t* C, + const float* __restrict__ s_A, const float* __restrict__ s_B, + const int _WvPrGrp, const int CuCount) { + using scalar8 = + __attribute__((__vector_size__((A_CHUNK / 4) * sizeof(float)))) float; + using intx2 = __attribute__((__vector_size__(2 * sizeof(int)))) int; + using intx4 = __attribute__((__vector_size__(4 * sizeof(int)))) int; + union bigType { + char f8[A_CHUNK]; + char2 c2[A_CHUNK / 2]; + scalar_t h[A_CHUNK / 2]; + float f[A_CHUNK / 4]; + int i[A_CHUNK / 4]; + long l[A_CHUNK / 8]; + intx4 l2[A_CHUNK / 16]; + scalar8 h8; + }; + + __shared__ fp8_t s[1024 * 64]; + + for (uint32_t k = (threadIdx.y * THRDS + threadIdx.x) * A_CHUNK; + k < min(K * N, 64 * 1024); k += THRDS * WvPrGrp * A_CHUNK) { + *((bigType*)(&s[k])) = *((bigType*)(&A[k])); + } + __syncthreads(); + + if (threadIdx.y >= _WvPrGrp) return; + + uint32_t m = (blockIdx.x * _WvPrGrp + (threadIdx.y % _WvPrGrp)) * YTILE; + + using floatx16 = __attribute__((__vector_size__(16 * sizeof(float)))) float; + floatx16 sum[N][YTILE]; + float sA = *s_A; + float sB = *s_B; + + while (m < M) { + for (int i = 0; i < YTILE; i++) + for (int n = 0; n < N; n++) sum[n][i] = {0}; + + bigType bigA[N][UNRL]; + bigType bigB[YTILE][UNRL]; + + for (uint32_t k1 = 0; k1 < K; k1 += THRDS * A_CHUNK * UNRL) { + // Fetch the weight matrix from memory! + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + const fp8_t* B_ = &B[(m + 0) * Kp + k_]; + for (int y = 0; y < YTILE; ++y) { + if (y + m >= M) break; // To avoid mem access fault. + bigB[y][k2].h8 = (loadnt((scalar8*)(&B_[y * Kp]))); + } + } + + // Fetch activation matrix from either just LDS or from both LDS / memory + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + for (int n = 0; n < N; n++) { + if (k_ + K * n < 64 * 1024) + bigA[n][k2] = *((const bigType*)(&(s[k_ + K * n]))); + else + bigA[n][k2] = *((const bigType*)(&(A[k_ + K * n]))); + } + } + + // Do the matrix multiplication in interleaved manner + #pragma unroll + for (uint32_t k2 = 0; k2 < UNRL; k2++) { + uint32_t k = k1 + k2 * THRDS * A_CHUNK; + uint32_t k_ = k + threadIdx.x * A_CHUNK; + if (k_ >= K) break; + + for (uint32_t n = 0; n < N; n++) { + for (int i = 0; i < A_CHUNK; i += 8) { + for (int y = 0; y < YTILE; ++y) { + sum[n][y] = __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8( + bigA[n][k2].l[i / 8], bigB[y][k2].l[i / 8], sum[n][y], 0, 0, + 0); + } + } + } + } + } + + // Final reduction + for (int n = 0; n < N; n++) { + for (int y = 0; y < YTILE; y++) { + float accm0 = sum[n][y][0]; + float accm16 = sum[n][y][8]; + asm("v_add_f32 %0, %2, %3 row_shl:1 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][1]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:1 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][9]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:2 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][2]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:2 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][10]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:3 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][3]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:3 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][11]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:8 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][4]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:8 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][12]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:9 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][5]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:9 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][13]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:10 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][6]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:10 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][14]), "v"(accm16)); + asm("v_add_f32 %0, %2, %3 row_shl:11 bound_ctrl:0 " + : "=v"(accm0) + : "0"(accm0), "v"(sum[n][y][7]), "v"(accm0)); + asm("v_add_f32 %0, %2, %3 row_shl:11 bound_ctrl:0 " + : "=v"(accm16) + : "0"(accm16), "v"(sum[n][y][15]), "v"(accm16)); + accm0 += __shfl(accm0, 36); + accm16 += __shfl(accm16, 52); + sum[n][y][0] = accm0 + __shfl(accm16, 16); + } + } + + if (threadIdx.x == 0) { + for (int n = 0; n < N; n++) { + for (int y = 0; y < YTILE; y++) { + if (y + m >= M) break; // To avoid mem access fault. + C[m + y + n * M] = __float2s(sum[n][y][0] * sA * sB); + } + } + } + + m += CuCount * _WvPrGrp * YTILE; + } +} +#else // !defined(__HIP__MI300__) TODO: Add NAVI support +template +__global__ void wvSplitKQ_hf_(const int K, const int Kp, const int M, + const fp8_t* B, const fp8_t* __restrict__ A, + scalar_t* C, const float* __restrict__ s_A, + const float* __restrict__ s_B, const int _WvPrGrp, + const int CuCount) { + UNREACHABLE_CODE +} +#endif // defined(__HIP__MI300__) TODO: Add NAVI support + +void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c, + at::Tensor& scale_a, at::Tensor& scale_b, + const int64_t CuCount) { + static c10::ScalarType kFp8Type = is_fp8_ocp() + ? c10::ScalarType::Float8_e4m3fn + : c10::ScalarType::Float8_e4m3fnuz; + auto M_in = in_a.size(0); + auto K_in = in_a.size(1); + auto N_in = in_b.size(0); + auto Kp_in = in_a.stride(0); + TORCH_CHECK(K_in % 16 == 0, "k % 16 == 0"); + TORCH_CHECK(in_a.dtype() == in_b.dtype() && in_a.dtype() == kFp8Type); + TORCH_CHECK(out_c.dtype() == torch::kFloat16 || + out_c.dtype() == torch::kBFloat16); + + dim3 grid(CuCount); + const at::cuda::OptionalCUDAGuard device_guard(device_of(in_a)); + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + +#define WVSPLITKQ(_WvPrGrp, _YTILEs, _YTILEm, _YTILEb, _UNRLs, _UNRLm, _UNRLb, \ + _N) \ + { \ + dim3 block(64, _WvPrGrp); \ + if ((K_in * N_in <= 64 * 1024) && (M_in % _YTILEs == 0)) { \ + int __wvPrGrp = mindiv(M_in, CuCount * _YTILEs, _WvPrGrp); \ + wvSplitKQ_hf_sml_ \ + <<>>(K_in, Kp_in, M_in, a_ptr, b_ptr, c_ptr, \ + s_a, s_b, __wvPrGrp, CuCount); \ + } else { \ + int __wvPrGrp = mindiv(M_in, CuCount * _YTILEm, _WvPrGrp); \ + wvSplitKQ_hf_ \ + <<>>(K_in, Kp_in, M_in, a_ptr, b_ptr, c_ptr, \ + s_a, s_b, __wvPrGrp, CuCount); \ + } \ + } + + AT_DISPATCH_REDUCED_FLOATING_TYPES(out_c.scalar_type(), "wvSplitKQ", [&] { + using fptype = typename scalar::type; + auto c_ptr = reinterpret_cast(out_c.data_ptr()); + auto s_a = scale_a.data_ptr(); + auto s_b = scale_b.data_ptr(); + VLLM_DISPATCH_FP8_TYPES(in_a.scalar_type(), "wvSplitKQ", [&] { + auto a_ptr = in_a.data_ptr(); + auto b_ptr = in_b.data_ptr(); + switch (N_in) { + case 1: + WVSPLITKQ(16, 2, 2, 2, 2, 2, 2, 1) + break; + case 2: + WVSPLITKQ(16, 2, 2, 2, 2, 2, 2, 2) + break; + case 3: + WVSPLITKQ(16, 4, 7, 7, 1, 1, 1, 3) + break; + case 4: + WVSPLITKQ(16, 4, 7, 7, 1, 1, 1, 4) + break; + default: + throw std::runtime_error( + "Unsupported N value: " + std::to_string(M_in) + "," + + std::to_string(K_in) + "," + std::to_string(N_in)); + } + }); + }); +} diff --git a/csrc/rocm/torch_bindings.cpp b/csrc/rocm/torch_bindings.cpp index 537e9357d52..4ac6fd1e994 100644 --- a/csrc/rocm/torch_bindings.cpp +++ b/csrc/rocm/torch_bindings.cpp @@ -14,6 +14,24 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, rocm_ops) { // vLLM custom ops for rocm + // Custom gemm op for matrix-vector multiplication + rocm_ops.def( + "LLMM1(Tensor in_a, Tensor in_b, int rows_per_block) -> " + "Tensor"); + rocm_ops.impl("LLMM1", torch::kCUDA, &LLMM1); + + // Custom gemm op for skinny matrix-matrix multiplication + rocm_ops.def( + "wvSplitK(Tensor in_a, Tensor in_b, int CuCount) -> " + "Tensor"); + rocm_ops.impl("wvSplitK", torch::kCUDA, &wvSplitK); + + // wvSplitK for fp8 + rocm_ops.def( + "wvSplitKQ(Tensor in_a, Tensor in_b, Tensor! out_c, Tensor scale_a, " + " Tensor scale_b, int CuCount) -> ()"); + rocm_ops.impl("wvSplitKQ", torch::kCUDA, &wvSplitKQ); + // Custom attention op // Compute the attention between an input query and the cached // keys/values using PagedAttention. diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp index b6ff6a006c0..5ed33097672 100644 --- a/csrc/torch_bindings.cpp +++ b/csrc/torch_bindings.cpp @@ -81,9 +81,13 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { // Activation ops // Activation function used in SwiGLU. - ops.def("silu_and_mul(Tensor! out, Tensor input) -> ()"); + ops.def("silu_and_mul(Tensor! result, Tensor input) -> ()"); ops.impl("silu_and_mul", torch::kCUDA, &silu_and_mul); + ops.def( + "silu_and_mul_quant(Tensor! result, Tensor input, Tensor scale) -> ()"); + ops.impl("silu_and_mul_quant", torch::kCUDA, &silu_and_mul_quant); + ops.def("mul_and_silu(Tensor! out, Tensor input) -> ()"); ops.impl("mul_and_silu", torch::kCUDA, &mul_and_silu); @@ -443,6 +447,13 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { ops.def("cutlass_sparse_compress(Tensor a) -> Tensor[]"); ops.impl("cutlass_sparse_compress", &cutlass_sparse_compress); + // CUTLASS MLA decode + ops.def( + "cutlass_mla_decode(Tensor! out, Tensor q_nope, Tensor q_pe," + " Tensor kv_c_and_k_pe_cache, Tensor seq_lens," + " Tensor page_table, float scale) -> ()"); + ops.impl("cutlass_mla_decode", torch::kCUDA, &cutlass_mla_decode); + // Mamba selective scan kernel ops.def( "selective_scan_fwd(Tensor! u, Tensor! delta," diff --git a/docker/Dockerfile b/docker/Dockerfile index d1ecef586d5..17adb7a92dc 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,11 +5,11 @@ # docs/source/contributing/dockerfile/dockerfile.md and # docs/source/assets/contributing/dockerfile-stages-dependency.png -ARG CUDA_VERSION=12.4.1 +ARG CUDA_VERSION=12.8.1 #################### BASE BUILD IMAGE #################### # prepare basic build environment FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base -ARG CUDA_VERSION=12.4.1 +ARG CUDA_VERSION=12.8.1 ARG PYTHON_VERSION=3.12 ARG TARGETPLATFORM ENV DEBIAN_FRONTEND=noninteractive @@ -19,7 +19,10 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ && apt-get install -y ccache software-properties-common git curl sudo \ - && add-apt-repository ppa:deadsnakes/ppa \ + && for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done \ && apt-get update -y \ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ @@ -34,6 +37,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 +ENV UV_INDEX_STRATEGY="unsafe-best-match" # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519 # as it was causing spam when compiling the CUTLASS kernels @@ -66,7 +70,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \ COPY requirements/common.txt requirements/common.txt COPY requirements/cuda.txt requirements/cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements/cuda.txt + uv pip install --system -r requirements/cuda.txt \ + --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # cuda arch list used by torch # can be useful for both `dev` and `test` @@ -89,9 +94,11 @@ COPY requirements/build.txt requirements/build.txt # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 +ENV UV_INDEX_STRATEGY="unsafe-best-match" RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements/build.txt + uv pip install --system -r requirements/build.txt \ + --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') COPY . . ARG GIT_REPO_CHECK=0 @@ -158,19 +165,25 @@ FROM base as dev # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 +ENV UV_INDEX_STRATEGY="unsafe-best-match" + +# Workaround for #17068 +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.4" COPY requirements/lint.txt requirements/lint.txt COPY requirements/test.txt requirements/test.txt COPY requirements/dev.txt requirements/dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements/dev.txt + uv pip install --system -r requirements/dev.txt \ + --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') #################### DEV IMAGE #################### #################### vLLM installation IMAGE #################### # image with vLLM installed # TODO: Restore to base image after FlashInfer AOT wheel fixed FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS vllm-base -ARG CUDA_VERSION=12.4.1 +ARG CUDA_VERSION=12.8.1 ARG PYTHON_VERSION=3.12 WORKDIR /vllm-workspace ENV DEBIAN_FRONTEND=noninteractive @@ -185,7 +198,10 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && apt-get update -y \ && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \ && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ - && add-apt-repository ppa:deadsnakes/ppa \ + && for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done \ && apt-get update -y \ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ @@ -200,6 +216,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 +ENV UV_INDEX_STRATEGY="unsafe-best-match" # Workaround for https://github.com/openai/triton/issues/2507 and # https://github.com/pytorch/pytorch/issues/107960 -- hopefully @@ -220,7 +237,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system dist/*.whl --verbose + uv pip install --system dist/*.whl --verbose \ + --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # If we need to build FlashInfer wheel before its release: # $ export FLASHINFER_ENABLE_AOT=1 @@ -237,9 +255,17 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ - uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \ + # TESTING: install FlashInfer from source to test 2.7.0 final RC + FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \ + uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@v0.2.2.post1" ; \ fi COPY examples examples +COPY benchmarks benchmarks +COPY ./vllm/collect_env.py . + +RUN --mount=type=cache,target=/root/.cache/uv \ +. /etc/environment && \ +uv pip list # Although we build Flashinfer with AOT mode, there's still # some issues w.r.t. JIT compilation. Therefore we need to @@ -247,7 +273,8 @@ COPY examples examples # TODO: Remove this once FlashInfer AOT wheel is fixed COPY requirements/build.txt requirements/build.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements/build.txt + uv pip install --system -r requirements/build.txt \ + --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') #################### vLLM installation IMAGE #################### @@ -261,6 +288,11 @@ ADD . /vllm-workspace/ # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 ENV UV_HTTP_TIMEOUT=500 +ENV UV_INDEX_STRATEGY="unsafe-best-match" + +# Workaround for #17068 +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.4" # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ @@ -289,6 +321,7 @@ RUN mv vllm test_docs/ #################### OPENAI API SERVER #################### # base openai image with additional requirements, for any subsequent openai-style images FROM vllm-base AS vllm-openai-base +ARG TARGETPLATFORM # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 54d1ce86d01..c647d9036f4 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -121,6 +121,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ADD ./tests/ ./tests/ ADD ./examples/ ./examples/ ADD ./benchmarks/ ./benchmarks/ +ADD ./vllm/collect_env.py . # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch new file mode 100644 index 00000000000..6989106c429 --- /dev/null +++ b/docker/Dockerfile.nightly_torch @@ -0,0 +1,313 @@ +# The vLLM Dockerfile is used to construct vLLM image against torch nightly that can be directly used for testing + +# for torch nightly, cuda >=12.6 is required, +# use 12.8 due to FlashAttention issue with cuda 12.6 (https://github.com/vllm-project/vllm/issues/15435#issuecomment-2775924628) +ARG CUDA_VERSION=12.8.0 +# +#################### BASE BUILD IMAGE #################### +# prepare basic build environment +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base +ARG CUDA_VERSION=12.8.0 +ARG PYTHON_VERSION=3.12 +ARG TARGETPLATFORM +ENV DEBIAN_FRONTEND=noninteractive +# Install Python and other dependencies +RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ + && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ + && apt-get update -y \ + && apt-get install -y ccache software-properties-common git curl sudo \ + && for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done \ + && apt-get update -y \ + && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ + && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ + && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ + && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ + && python3 --version \ + && python3 -m pip --version +# Install uv for faster pip installs +RUN --mount=type=cache,target=/root/.cache/uv \ + python3 -m pip install uv + +# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out +# Reference: https://github.com/astral-sh/uv/pull/1694 +ENV UV_HTTP_TIMEOUT=500 + +# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519 +# as it was causing spam when compiling the CUTLASS kernels +RUN apt-get install -y gcc-10 g++-10 +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10 +RUN < torch_build_versions.txt +RUN cat torch_build_versions.txt + +# cuda arch list used by torch +# can be useful for `test` +# explicitly set the list to avoid issues with torch 2.2 +# see https://github.com/pytorch/pytorch/pull/123243 + +# Override the arch list for flash-attn to reduce the binary size +ARG vllm_fa_cmake_gpu_arches='80-real;90-real' +ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} +#################### BASE BUILD IMAGE #################### + +#################### WHEEL BUILD IMAGE #################### +FROM base AS build +ARG TARGETPLATFORM + +# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out +# Reference: https://github.com/astral-sh/uv/pull/1694 +ENV UV_HTTP_TIMEOUT=500 + +COPY . . + +RUN python3 use_existing_torch.py + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system -r requirements/build.txt + +ARG GIT_REPO_CHECK=0 +RUN --mount=type=bind,source=.git,target=.git \ + if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi + +# Max jobs used by Ninja to build extensions +ARG max_jobs=16 +ENV MAX_JOBS=${max_jobs} +ARG nvcc_threads=2 +ENV NVCC_THREADS=$nvcc_threads + +ARG USE_SCCACHE +ARG SCCACHE_BUCKET_NAME=vllm-build-sccache +ARG SCCACHE_REGION_NAME=us-west-2 +ARG SCCACHE_S3_NO_CREDENTIALS=0 + +# if USE_SCCACHE is set, use sccache to speed up compilation +RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,source=.git,target=.git \ + if [ "$USE_SCCACHE" = "1" ]; then \ + echo "Installing sccache..." \ + && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \ + && tar -xzf sccache.tar.gz \ + && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ + && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ + && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ + && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ + && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ + && export SCCACHE_IDLE_TIMEOUT=0 \ + && export CMAKE_BUILD_TYPE=Release \ + && sccache --show-stats \ + && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \ + && sccache --show-stats; \ + fi + +ENV CCACHE_DIR=/root/.cache/ccache +RUN --mount=type=cache,target=/root/.cache/ccache \ + --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,source=.git,target=.git \ + if [ "$USE_SCCACHE" != "1" ]; then \ + # Clean any existing CMake artifacts + rm -rf .deps && \ + mkdir -p .deps && \ + python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \ + fi + +#################### WHEEL BUILD IMAGE #################### + +################### VLLM INSTALLED IMAGE #################### +# Setup clean environment for vLLM and its dependencies for test and api server using ubuntu22.04 with AOT flashinfer +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS vllm-base +# prepare for environment starts +ARG CUDA_VERSION=12.8.0 +ARG PYTHON_VERSION=3.12 +WORKDIR /vllm-workspace +ENV DEBIAN_FRONTEND=noninteractive +ARG TARGETPLATFORM + +RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ + echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment + +# Install Python and other dependencies +RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ + && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ + && apt-get update -y \ + && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \ + && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ + && for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done \ + && apt-get update -y \ + && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ + && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ + && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ + && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ + && python3 --version && python3 -m pip --version + +RUN --mount=type=cache,target=/root/.cache/uv \ + python3 -m pip install uv + +# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out +# Reference: https://github.com/astral-sh/uv/pull/1694 +ENV UV_HTTP_TIMEOUT=500 + +# Workaround for https://github.com/openai/triton/issues/2507 and +# https://github.com/pytorch/pytorch/issues/107960 -- hopefully +# this won't be needed for future versions of this docker image +# or future versions of triton. +RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ + +# get the nightly torch version used in the build to make sure the version is the same +COPY --from=base /workspace/torch_build_versions.txt ./torch_build_versions.txt + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu128 + +# install the vllm wheel +RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/vllm-dist \ + --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system vllm-dist/*.whl --verbose + +# install xformers again for the new environment +RUN --mount=type=bind,from=base,src=/workspace/xformers-dist,target=/vllm-workspace/xformers-dist \ + --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system /vllm-workspace/xformers-dist/*.whl --verbose + +ARG torch_cuda_arch_list='8.0;8.6;8.9;9.0' + +# install package for build flashinfer +# see issue: https://github.com/flashinfer-ai/flashinfer/issues/738 +RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.post1 + + +# build flashinfer for torch nightly from source around 10 mins +# release version: v0.2.2.post1 +# todo(elainewy): cache flashinfer build result for faster build +ENV CCACHE_DIR=/root/.cache/ccache +RUN --mount=type=cache,target=/root/.cache/ccache \ + --mount=type=cache,target=/root/.cache/uv \ + echo "git clone flashinfer..." \ + && git clone --recursive https://github.com/flashinfer-ai/flashinfer.git \ + && cd flashinfer \ + && git checkout v0.2.2.post1 \ + && git submodule update --init --recursive \ + && echo "finish git clone flashinfer..." \ + && rm -rf build \ + && export TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} \ + && FLASHINFER_ENABLE_AOT=1 python3 setup.py bdist_wheel --dist-dir=../flashinfer-dist --verbose \ + && cd .. \ + && rm -rf flashinfer + +# install flashinfer +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system flashinfer-dist/*.whl --verbose + +# install common packages +COPY requirements/common.txt requirements/common.txt +COPY use_existing_torch.py use_existing_torch.py +COPY pyproject.toml pyproject.toml + +COPY examples examples +COPY benchmarks benchmarks +COPY ./vllm/collect_env.py . + +RUN python3 use_existing_torch.py +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system -r requirements/common.txt + +################### VLLM INSTALLED IMAGE #################### + + +#################### UNITTEST IMAGE ############################# +FROM vllm-base as test +COPY tests/ tests/ + +# install build and runtime dependencies without stable torch version +COPY requirements/nightly_torch_test.txt requirements/nightly_torch_test.txt + +# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out +# Reference: https://github.com/astral-sh/uv/pull/1694 +ENV UV_HTTP_TIMEOUT=500 + +# install development dependencies (for testing) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system -e tests/vllm_test_utils + +# enable fast downloads from hf (for testing) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system hf_transfer +ENV HF_HUB_ENABLE_HF_TRANSFER 1 + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install --system -r requirements/nightly_torch_test.txt + +#################### UNITTEST IMAGE ############################# + diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index f9ebb10ca87..e60cf5e69a4 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -114,8 +114,16 @@ COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 ENV TOKENIZERS_PARALLELISM=false +# ENV that can improve safe tensor loading, and end-to-end time +ENV SAFETENSORS_FAST_GPU=1 + +# User-friendly environment setting for multi-processing to avoid below RuntimeError. +# RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, +# you must use the 'spawn' start method +# See https://pytorch.org/docs/stable/notes/multiprocessing.html#cuda-in-multiprocessing +ENV VLLM_WORKER_MULTIPROC_METHOD=spawn + # Performance environment variable. ENV HIP_FORCE_DEV_KERNARG=1 CMD ["/bin/bash"] - diff --git a/docker/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base index b8523fbc2a0..12009b8aa04 100644 --- a/docker/Dockerfile.rocm_base +++ b/docker/Dockerfile.rocm_base @@ -12,7 +12,7 @@ ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git" ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" ARG FA_BRANCH="1a7f4dfa" ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git" -ARG AITER_BRANCH="8970b25b" +ARG AITER_BRANCH="7e1ed08" ARG AITER_REPO="https://github.com/ROCm/aiter.git" FROM ${BASE_IMAGE} AS base @@ -32,7 +32,10 @@ ENV DEBIAN_FRONTEND=noninteractive # Install Python and other dependencies RUN apt-get update -y \ && apt-get install -y software-properties-common git curl sudo vim less libgfortran5 \ - && add-apt-repository ppa:deadsnakes/ppa \ + && for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done \ && apt-get update -y \ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ python${PYTHON_VERSION}-lib2to3 python-is-python3 \ diff --git a/docker/Dockerfile.s390x b/docker/Dockerfile.s390x index 128929ac333..9c10cd56b59 100644 --- a/docker/Dockerfile.s390x +++ b/docker/Dockerfile.s390x @@ -16,7 +16,7 @@ ENV LANG=C.UTF-8 \ RUN microdnf install -y \ which procps findutils tar vim git gcc gcc-gfortran g++ make patch zlib-devel \ libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \ - openssl-devel openblas openblas-devel autoconf automake libtool cmake && \ + openssl-devel openblas openblas-devel autoconf automake libtool cmake numpy && \ microdnf clean all # Python Installation @@ -123,6 +123,7 @@ ENV UV_LINK_MODE=copy ENV CARGO_HOME=/root/.cargo ENV RUSTUP_HOME=/root/.rustup ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH" +ENV GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1 COPY . /workspace/vllm WORKDIR /workspace/vllm diff --git a/docker/Dockerfile.tpu b/docker/Dockerfile.tpu index 50806d8820a..295270d29f7 100644 --- a/docker/Dockerfile.tpu +++ b/docker/Dockerfile.tpu @@ -23,7 +23,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=.git,target=.git \ python3 -m pip install \ -r requirements/tpu.txt -RUN python3 setup.py develop +RUN python3 -m pip install -e . # install development dependencies (for testing) RUN python3 -m pip install -e tests/vllm_test_utils diff --git a/docker/Dockerfile.xpu b/docker/Dockerfile.xpu index ad4abf16b43..681102b9d18 100644 --- a/docker/Dockerfile.xpu +++ b/docker/Dockerfile.xpu @@ -40,12 +40,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=.git,target=.git \ python3 setup.py install -# Please refer xpu doc, we need manually install intel-extension-for-pytorch 2.6.10+xpu due to there are some conflict dependencies with torch 2.6.0+xpu -# FIXME: This will be fix in ipex 2.7. just leave this here for awareness. -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install intel-extension-for-pytorch==2.6.10+xpu \ - --extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ - CMD ["/bin/bash"] FROM vllm-base AS vllm-openai diff --git a/docs/Makefile b/docs/Makefile index 5b801f79d1f..d3b429dfb92 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -22,3 +22,4 @@ help: clean: @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) rm -rf "$(SOURCEDIR)/getting_started/examples" + rm -rf "$(SOURCEDIR)/api/vllm" diff --git a/docs/source/api/engine/async_llm_engine.md b/docs/source/api/engine/async_llm_engine.md deleted file mode 100644 index 904feaa5051..00000000000 --- a/docs/source/api/engine/async_llm_engine.md +++ /dev/null @@ -1,7 +0,0 @@ -# AsyncLLMEngine - -```{eval-rst} -.. autoclass:: vllm.AsyncLLMEngine - :members: - :show-inheritance: -``` diff --git a/docs/source/api/engine/index.md b/docs/source/api/engine/index.md deleted file mode 100644 index b6544d94afd..00000000000 --- a/docs/source/api/engine/index.md +++ /dev/null @@ -1,17 +0,0 @@ -# vLLM Engine - -```{eval-rst} -.. automodule:: vllm.engine -``` - -```{eval-rst} -.. currentmodule:: vllm.engine -``` - -:::{toctree} -:caption: Engines -:maxdepth: 2 - -llm_engine -async_llm_engine -::: diff --git a/docs/source/api/engine/llm_engine.md b/docs/source/api/engine/llm_engine.md deleted file mode 100644 index d6613ef5562..00000000000 --- a/docs/source/api/engine/llm_engine.md +++ /dev/null @@ -1,7 +0,0 @@ -# LLMEngine - -```{eval-rst} -.. autoclass:: vllm.LLMEngine - :members: - :show-inheritance: -``` diff --git a/docs/source/api/inference_params.md b/docs/source/api/inference_params.md deleted file mode 100644 index 181c30cab9c..00000000000 --- a/docs/source/api/inference_params.md +++ /dev/null @@ -1,21 +0,0 @@ -# Inference Parameters - -Inference parameters for vLLM APIs. - -(sampling-params)= - -## Sampling Parameters - -```{eval-rst} -.. autoclass:: vllm.SamplingParams - :members: -``` - -(pooling-params)= - -## Pooling Parameters - -```{eval-rst} -.. autoclass:: vllm.PoolingParams - :members: -``` diff --git a/docs/source/api/model/adapters.md b/docs/source/api/model/adapters.md deleted file mode 100644 index e103a51d007..00000000000 --- a/docs/source/api/model/adapters.md +++ /dev/null @@ -1,9 +0,0 @@ -# Model Adapters - -## Module Contents - -```{eval-rst} -.. automodule:: vllm.model_executor.models.adapters - :members: - :member-order: bysource -``` diff --git a/docs/source/api/model/index.md b/docs/source/api/model/index.md deleted file mode 100644 index 8fee3a55c93..00000000000 --- a/docs/source/api/model/index.md +++ /dev/null @@ -1,11 +0,0 @@ -# Model Development - -## Submodules - -:::{toctree} -:maxdepth: 1 - -interfaces_base -interfaces -adapters -::: diff --git a/docs/source/api/model/interfaces.md b/docs/source/api/model/interfaces.md deleted file mode 100644 index 55bee57f64f..00000000000 --- a/docs/source/api/model/interfaces.md +++ /dev/null @@ -1,9 +0,0 @@ -# Optional Interfaces - -## Module Contents - -```{eval-rst} -.. automodule:: vllm.model_executor.models.interfaces - :members: - :member-order: bysource -``` diff --git a/docs/source/api/model/interfaces_base.md b/docs/source/api/model/interfaces_base.md deleted file mode 100644 index 75d58d34228..00000000000 --- a/docs/source/api/model/interfaces_base.md +++ /dev/null @@ -1,9 +0,0 @@ -# Base Model Interfaces - -## Module Contents - -```{eval-rst} -.. automodule:: vllm.model_executor.models.interfaces_base - :members: - :member-order: bysource -``` diff --git a/docs/source/api/multimodal/index.md b/docs/source/api/multimodal/index.md deleted file mode 100644 index 069ed53e545..00000000000 --- a/docs/source/api/multimodal/index.md +++ /dev/null @@ -1,28 +0,0 @@ -(multi-modality)= - -# Multi-Modality - -vLLM provides experimental support for multi-modal models through the {mod}`vllm.multimodal` package. - -Multi-modal inputs can be passed alongside text and token prompts to [supported models](#supported-mm-models) -via the `multi_modal_data` field in {class}`vllm.inputs.PromptType`. - -Looking to add your own multi-modal model? Please follow the instructions listed [here](#supports-multimodal). - -## Module Contents - -```{eval-rst} -.. autodata:: vllm.multimodal.MULTIMODAL_REGISTRY -``` - -## Submodules - -:::{toctree} -:maxdepth: 1 - -inputs -parse -processing -profiling -registry -::: diff --git a/docs/source/api/multimodal/inputs.md b/docs/source/api/multimodal/inputs.md deleted file mode 100644 index 21bd938be9e..00000000000 --- a/docs/source/api/multimodal/inputs.md +++ /dev/null @@ -1,49 +0,0 @@ -# Input Definitions - -## User-facing inputs - -```{eval-rst} -.. autodata:: vllm.multimodal.inputs.MultiModalDataDict -``` - -## Internal data structures - -```{eval-rst} -.. autoclass:: vllm.multimodal.inputs.PlaceholderRange - :members: - :show-inheritance: -``` - -```{eval-rst} -.. autodata:: vllm.multimodal.inputs.NestedTensors -``` - -```{eval-rst} -.. autoclass:: vllm.multimodal.inputs.MultiModalFieldElem - :members: - :show-inheritance: -``` - -```{eval-rst} -.. autoclass:: vllm.multimodal.inputs.MultiModalFieldConfig - :members: - :show-inheritance: -``` - -```{eval-rst} -.. autoclass:: vllm.multimodal.inputs.MultiModalKwargsItem - :members: - :show-inheritance: -``` - -```{eval-rst} -.. autoclass:: vllm.multimodal.inputs.MultiModalKwargs - :members: - :show-inheritance: -``` - -```{eval-rst} -.. autoclass:: vllm.multimodal.inputs.MultiModalInputs - :members: - :show-inheritance: -``` diff --git a/docs/source/api/multimodal/parse.md b/docs/source/api/multimodal/parse.md deleted file mode 100644 index 4676139efe6..00000000000 --- a/docs/source/api/multimodal/parse.md +++ /dev/null @@ -1,9 +0,0 @@ -# Data Parsing - -## Module Contents - -```{eval-rst} -.. automodule:: vllm.multimodal.parse - :members: - :member-order: bysource -``` diff --git a/docs/source/api/multimodal/processing.md b/docs/source/api/multimodal/processing.md deleted file mode 100644 index 0d81c8d3966..00000000000 --- a/docs/source/api/multimodal/processing.md +++ /dev/null @@ -1,9 +0,0 @@ -# Data Processing - -## Module Contents - -```{eval-rst} -.. automodule:: vllm.multimodal.processing - :members: - :member-order: bysource -``` diff --git a/docs/source/api/multimodal/profiling.md b/docs/source/api/multimodal/profiling.md deleted file mode 100644 index b4551452122..00000000000 --- a/docs/source/api/multimodal/profiling.md +++ /dev/null @@ -1,9 +0,0 @@ -# Memory Profiling - -## Module Contents - -```{eval-rst} -.. automodule:: vllm.multimodal.profiling - :members: - :member-order: bysource -``` diff --git a/docs/source/api/multimodal/registry.md b/docs/source/api/multimodal/registry.md deleted file mode 100644 index 0737a4385cf..00000000000 --- a/docs/source/api/multimodal/registry.md +++ /dev/null @@ -1,9 +0,0 @@ -# Registry - -## Module Contents - -```{eval-rst} -.. automodule:: vllm.multimodal.registry - :members: - :member-order: bysource -``` diff --git a/docs/source/api/offline_inference/index.md b/docs/source/api/offline_inference/index.md deleted file mode 100644 index ec2cc599d92..00000000000 --- a/docs/source/api/offline_inference/index.md +++ /dev/null @@ -1,9 +0,0 @@ -# Offline Inference - -:::{toctree} -:caption: Contents -:maxdepth: 1 - -llm -llm_inputs -::: diff --git a/docs/source/api/offline_inference/llm.md b/docs/source/api/offline_inference/llm.md deleted file mode 100644 index 9f129d5e416..00000000000 --- a/docs/source/api/offline_inference/llm.md +++ /dev/null @@ -1,7 +0,0 @@ -# LLM Class - -```{eval-rst} -.. autoclass:: vllm.LLM - :members: - :show-inheritance: -``` diff --git a/docs/source/api/offline_inference/llm_inputs.md b/docs/source/api/offline_inference/llm_inputs.md deleted file mode 100644 index 21f688a12c5..00000000000 --- a/docs/source/api/offline_inference/llm_inputs.md +++ /dev/null @@ -1,19 +0,0 @@ -# LLM Inputs - -```{eval-rst} -.. autodata:: vllm.inputs.PromptType -``` - -```{eval-rst} -.. autoclass:: vllm.inputs.TextPrompt - :show-inheritance: - :members: - :member-order: bysource -``` - -```{eval-rst} -.. autoclass:: vllm.inputs.TokensPrompt - :show-inheritance: - :members: - :member-order: bysource -``` diff --git a/docs/source/api/summary.md b/docs/source/api/summary.md new file mode 100644 index 00000000000..46de545f9de --- /dev/null +++ b/docs/source/api/summary.md @@ -0,0 +1,133 @@ +# Summary + +(configuration)= + +## Configuration + +API documentation for vLLM's configuration classes. + +```{autodoc2-summary} + vllm.config.ModelConfig + vllm.config.CacheConfig + vllm.config.TokenizerPoolConfig + vllm.config.LoadConfig + vllm.config.ParallelConfig + vllm.config.SchedulerConfig + vllm.config.DeviceConfig + vllm.config.SpeculativeConfig + vllm.config.LoRAConfig + vllm.config.PromptAdapterConfig + vllm.config.MultiModalConfig + vllm.config.PoolerConfig + vllm.config.DecodingConfig + vllm.config.ObservabilityConfig + vllm.config.KVTransferConfig + vllm.config.CompilationConfig + vllm.config.VllmConfig +``` + +(offline-inference-api)= + +## Offline Inference + +LLM Class. + +```{autodoc2-summary} + vllm.LLM +``` + +LLM Inputs. + +```{autodoc2-summary} + vllm.inputs.PromptType + vllm.inputs.TextPrompt + vllm.inputs.TokensPrompt +``` + +## vLLM Engines + +Engine classes for offline and online inference. + +```{autodoc2-summary} + vllm.LLMEngine + vllm.AsyncLLMEngine +``` + +## Inference Parameters + +Inference parameters for vLLM APIs. + +(sampling-params)= +(pooling-params)= + +```{autodoc2-summary} + vllm.SamplingParams + vllm.PoolingParams +``` + +(multi-modality)= + +## Multi-Modality + +vLLM provides experimental support for multi-modal models through the {mod}`vllm.multimodal` package. + +Multi-modal inputs can be passed alongside text and token prompts to [supported models](#supported-mm-models) +via the `multi_modal_data` field in {class}`vllm.inputs.PromptType`. + +Looking to add your own multi-modal model? Please follow the instructions listed [here](#supports-multimodal). + +```{autodoc2-summary} + vllm.multimodal.MULTIMODAL_REGISTRY +``` + +### Inputs + +User-facing inputs. + +```{autodoc2-summary} + vllm.multimodal.inputs.MultiModalDataDict +``` + +Internal data structures. + +```{autodoc2-summary} + vllm.multimodal.inputs.PlaceholderRange + vllm.multimodal.inputs.NestedTensors + vllm.multimodal.inputs.MultiModalFieldElem + vllm.multimodal.inputs.MultiModalFieldConfig + vllm.multimodal.inputs.MultiModalKwargsItem + vllm.multimodal.inputs.MultiModalKwargs + vllm.multimodal.inputs.MultiModalInputs +``` + +### Data Parsing + +```{autodoc2-summary} + vllm.multimodal.parse +``` + +### Data Processing + +```{autodoc2-summary} + vllm.multimodal.processing +``` + +### Memory Profiling + +```{autodoc2-summary} + vllm.multimodal.profiling +``` + +### Registry + +```{autodoc2-summary} + vllm.multimodal.registry +``` + +## Model Development + +```{autodoc2-summary} + vllm.model_executor.models.interfaces_base + vllm.model_executor.models.interfaces + vllm.model_executor.models.adapters +``` diff --git a/docs/source/assets/deployment/anything-llm-chat-with-doc.png b/docs/source/assets/deployment/anything-llm-chat-with-doc.png new file mode 100644 index 0000000000000000000000000000000000000000..f9b57f5c3cecc92da660efaddb4e75d8f72160b3 GIT binary patch literal 120834 zcmeFZXIxX;7CnkcL_h=uL_vy;DxlIkO7Fc_MM~(sgeIUMAOa#FHS`vG37v>Y5u^nI z(n9Zq9y;W0&;Oot^?2WVpWe6k0e&HS?^WiSYpglPn42&)6~4fuQ+??NaTj{x{|0r;T-e(><{)A9fQ zhQKEM;@_VyTK>FP58HWwhbMt2FZEQ*>%#gZp|6(qSjRT=YxG6A@^qd!%Zro`-xh&< zo{petJ=W{tBXF2)3*1(pYW?l2SERzyF--Y9*VLbavJc|-({MzdpX4n+e0$0CdfxN& zbzSgp;9!#c+ZPDPCGdX#^_j$O zqBcf?bQARFwe*LV=jZ3i^I1u#gZTd4FwoC+vt^3Si~r-fcb9>O?okI>{*Q+UY`n<+ z$JZ88uOyUj)sgdD{C#{shnsIi`82MIM>xlXZcQ5&Qe9+#vcg&HTGG}z1d|$9w?A=p zjbOX`A|S2{Jlx>wE?nWqYg;D?G4WQcz{K84w(Fdaj8D{V-1#17HEE+a*3A1I{?V$W zaiW0sd$Y568wXcrb>qM70`#k(%2@0#BlEpeZmw%(xOC;}utlhhPN@nVOrtv7QGan! z$*|foQrVzlsKm7D>kTW54%BSBJG(xDd(9w0bXBkEnD=hCVOS-z z1eV|crXH!G!K@ng)K}l_GDJX9?LXBaT}%zYaJJ_CHeJ@bKUOLIE)h?$?zi}oo^pX! zh6kBpxAN6hlP(DJmOCgX>z!S?WqIV&TYJWyP+CS5abB@YeQJ^H`l8*(_aZ9R@^c%u z$o2#hKTxJJw4?LT(44}usDi}Az#=(fm1wR@u>l^4TJ8V5%2&it-H7Ve8LMCE?nyIo zs7riPQz{jH``zyz1uVQXk=sPwR*UH1d1p8*WlL# zgZBCcg$>n@5>nNGX0%vpdEMvcEFrCGF;N?X~Bpvo}se0;-$S))IFOq`4jERj46k37Uo2*%o;SxNee^cU#l6I#xqmhu>3Yeiq%nlg@vCv zGDNtb@$@Mb-C9k7NP?_)gaY_P=b$w;oK00S^JBuFo7ciLeNN@^$;YpDr%91@`{PJE zLkzunjUgeCpo5h)R2OMNl9O$ns;_(Tk=6Uy$96lJDgz4U6_s+N$z39an~` zoGA!(3*txY^UD-I{_M1YU)gg(8!$4_d+jnAZx6>0{NJ3)L|3!AHBl%!j%AAZ5wV5G zy8J|c?8C13&!35C?IE`%Q&&fnDg*siN1v7uDtXoKZzb)2O>}_75q!Ysj5I2}g>4Uu z2L?idUe^-m%1xh9wdt7Ub?$}H7WWIlsl=7y5Ngz48gbqEAKyU(OJ9%9 z4<1ehFBVj;ep<{OZ30`sRu{WbEmGzrP75aaex)U40AOIdOs=%bgY%HY_b@kzXM#9N3d;@o?C5qW0iLw^C1Tq znO+PPN1v$o$`eg=CbItjo;b8sZ!<*Yr$#MRFi6$Y>&@eA z`*O%2K{5ThkOz+k1)k|54Mz(r6b-eE%G0d#d_`CPU)8;Z)Iq_&1h5!WZqe-qsUjf1(bY z>;lMgZ*Txx7-qGRG+i_lu}Hu#9tjzdQk)6)PZ}zo4-vE{^)qsFA{}3rnS7U`FJBxgzd928eTyZRAf5mPxkKh()ttK zFH>?#q~|&@?BD(O0fmR3!vtI!$}hP5Ke{yn;-nd|b?JY_aX;e~W#CeQy5_Y%X2qyh zix-&m#qUSNA4mD+k4eBKjjY_ee-1z0=>k6Qo0Br?|1k_+;8Ko^vh*Kr^h#m|pTLIr z;5pa-7={gg94+s}9a9VNhI`4$fVtjSgw25|;-vwT+P0HIgy% zlgB;0JR@}Z9r;UCyZw!)F&!P>Z^Q3Zqy?Eg*FPKVJZ(X5&E0L16r70tEbR5Yf%ED4 z^kad$n~x!WPt&#_bKe+lQ;Rr=Hg7?KPLCKUu)V^ZJEyw|bVov^eqr>~zGCn+$MZgE zHN%;!GaZDo^P)HWjiPI&x7$QHPj|0Ym2cC*sE}=^PA|BMx3g2n&XV@(i#02#Gx4+jO+9 zsuOx9DyAcJiLBSr_Wj}e@g#8^HyuMFvEAE=rUle6q;;H{*Y<1s)*L?W?G@=pxm?d> z%q?}R!=w9sK5O2xT8k#HM-l=|=TF!+U2miz%uaR^%Bs<`&4^tE;%40H<9gjdn#quK zl9S#=-UCRdm--7)H{Q1>t$FcLNvEEn0RAfo91=yUbX4O<8H+H!l3*jadHj)ByG!cNEqI-rQ7smaCiH(KI0Zj1 z{r^w>2Y~)Rutt8$=f}5$fc8IzOF$s?sJ_lJA@@~-o+Kb&thR|asjJ9|Q^08byyKtc zq0(m@N%i&6iMJoC`Zio@YGpfI1dn6a(ev=|P|!zCC`Yd{{Y)S%WYT4Nl7_S8HEpXT z&4Zp;EEe{Y{|cCDcor)k+V)3GBkWp_v8)&FsKs8_A(qAqlmdBxn_;~6V-MX9$Et1Z zF-g4V3Db)`$sFC%Rlf(A65z{P9-_xEono6~a~F(|dyxlbEW?JH!@wHR2c?Fj7BLgp ze0U%)c$rb{?uD~i+kwnDRH1fv!fwYVVtiE__K-h8CE!8Og4wSllS`5?Fi1ib>-Kq| zEvQWw>)qGZM@*NPsy#^qld_FDiJ$!(BxRz*pe31R6=b0+4!+eV&cV#gJTNP8^*7{~ zE0D-hh$|XZ5!OFD#`=}w1YIppV(`A@^UO#n$FXS_J#P)sX@X$idRS}Fol*H#(-xAi zam@(9(;_5LZUIk6zJXXrCO@qROy;)<@pX3J1J;V{0ycZpwa-ILuF`4dnPIaV_(l zIEP*t;|r+CGS}h>rUq&Z`St+5*?f`|#P?JqdTE!dxy=9U_{e;YXgFV_m>??~@!9rj zgjV0@+g1E&vY<+~x)OC*O2pU=c2=_-XGentCT{M^{>JhymTw+{s|NXWNdxs|-V&5% znx0$>>S$~E6L&E--6J=ZH`wTY?=_*-+RW7`9aHJ?VZhYap*wxzIHTDoAJkuk2(S1( zi)Ol&^fB;PAIxDTKh4l@p_v^2p7OZ(D~kDczV~59Xp#Lm{5)6avcxh|gM2KL$AC2T z5$!oM6w<|ETFo>hhnX@zn8dWxI+Ue}`W%}{c}YmPY8tJ-H=Yg=6~XQezPUuAm5)#2 zvr(6aLEz4t-}7?bd~!7Ih)FQ@CqmV6Kv|ZZF}x#s%AO_{FI_3S#clc3P9^vfX;CtX zKd*;>rDadW_FM-w&5yVGYC*!l4zf5|uZ)7;1h4-n=kwf{_;jHZotH9;S*oS=TZoM3 zY59Qf4D6!v2a54OYBuUmS4wQFvJcu!d@vSt!fs`3oVB|P38wXr{SinQ8*w~7kFK%1 z_D9jSslDwEhVjmrdl3O<*Q5Hswj58usExNsIK$dBDSz3jml>m7E=+b0<*T=n1tUdO zvXhlu|N5JWJD2GX-0Hv7*-fW6%@Ae6$FUv!<%H3Y!acx~wd&1iE z!3eIVmlVt@4^a|57YJ{g@lSeXVP=`6>KDgs?a__ik1z5bOa&mwmap6vcx0I_BN}#o zet;MswMt!3K0lel9h>ODCdz70!OZ+6oA7amO8QXxRi7E7S%@L4nj{;G(~!DsBIFB8 z=k@y_$Eq`N+>vToL8t73u9Jfy6R9_`jwdpO=l2}Z%!jI9G;$F(Ali$}QB>d|V4IPM z3?g48-TlT4{XwBBw!*>do1>Yhl8fe}6ZWVYoQyoK0uQd|LH&0~Hzpfuu&b!iAE+HSW_V`qeJ2dD|*1K~UNTf&4UPtH5 zi0baGn9J6}*JQk>#~qoqYC=x(JHj9p58Xx8W^e7sIb|sxeQuAWO)fbz8W~&}R27fB z$(fVJPi^d#L8kWY10_`Auq?;yrq&@mQ$ao5(lC!nIq8_m+CA?}NSpndhlcoXexmmA z)dtwjF$5n^q3T|=h4+&dh7gjvx4=p#)b3)XkkTcKz2!;5GMKU2=fMG- z;!QgKsB4%@x~@$u(9<|s?>N<^WK6GZ6Fd8u^HX8XZI*l3lB3BcXR1ccv(-qq!ar97E)UYcSM9{*uMnTVCrn_F* zL{(-b>&Zz%D5a3mWZ0CLH;b3m)#RSuCxzOKRTZ4AFg@3Kl$+Y^^LFiV_N&Ms4jPs0 zJ62-{mBJ?WpLoC;Qe<&Wso$Ow(#!hePz#EeP7A!&Q*E1vn`j~jXs&Wq`d0dJ8E?zAKFmw;pPHU zVfD&#pozh(U)TX z9JEkUfb3F*{~2UT2HUP~sFl}?*u;*Uq$hMu_Vr-0_6rU^?As>OqlQ;{m&vWT+p#~6 z+Ck-2#Z@0dnpb)Xzr9g)bEQ5Lv3kF`oh_foE9DSOGOy&~x?}y|G1mHSTtFZYupP%5 z(ejOY9f{lalK&P3G`!kyrpzA@RZS!wBG{raaZ92dcA1ZnJzgl7a&FQQJ6Y#uxJk5g zeE9ie|KFhAGo1Uwh4lG5d1elOE>bQgo%ZtN$$8d~kj;qfa8D>qrYqm1%%b zH>B~}Pk6K6%zAE;m+Fxudc2&4-pfsP*l7YdaL8^Czh9ZWXmV8&a>__GoiSZ2J&4lr zuub_&(Hf_$@_oOJx}{?ial*RKF(Ol5bEFK7gI%fwHX6QsRg=ivBR z`X&7PNfFZQAXbUYxV)qW=jX&q^9(u}Ii(DnBYg*!>U|%Lbhk)LbQMJTYu@22((LRF z8#<2Cc}_ex2rDpd@DRhxmc^P4+~nrqEuN908RNAXVzHeGk|7?lreX&Us34xC(0g)8 zQo8IK;7_q3j|w=koCPy>L$KWZy%s`-AKZN+<^gqRg<>N=;`;?1kzM`w+YKt4WTWVt zsUw+ebc|^@bLa(#-?OmXPq6=B^lEYZTkILO|F-B!9lf$m!zrp^=|N3boQ{82Krvk2 zu%JqdfG6t12QkV2Tia-}#sr58$+TR+?)UfKP_7X6KlNJMgygbBXVSWpF34AThQ1#( z;+C!Te#uiBrc<9d7c$I(x06beX5CDKMfOM2fFyUAv0ekw_dy^^w_Ec%V2#50E^RzH z$#*ghuOc8hEVa≠lXra~M`_xfWVKnHViOP#--V{He~hWB;B z((HqLbw+7X{$nHmIxtJO(nzrMF4Fg4($nAle(`DL7}kH_(LGW+HU|x@!Z_OFEWBvE zA>lb_$-&zXkZl(7Y(5sJjOo`Q0&nto9ZD!c+(FrR&%BV$Acw^D7={454mzJAvfe7E zr!=T67v|bJt>*LNwJph6Nb?xTDkwW$>hP4eW&aC3l^*e?^M{6o+e1tbNN*us7xbGS zq*Hv%OaHJ)IyNkC?=^_ku~`X;c6Z;!HZGzsnop(((oV2xWrsgdbF;2^;q3(fZ+ZtL zO3efcE`6~8=7vlgIh`N*SKsYn;I{5(6c>>b73L{-QsoD{xW?u~5K%ptYa<(OaBcvj zW@X7p6E@h_hY^D0n-o9tghq~CZ@jwSASbg7E@JMU_%PWzNq}I!9nc6RTMQTTTffZz zPBHdspS!4+n~4R7@~1~Fgz0Y0d36^34BqoGYG!#EYc1K&4K3~z@N3`_O!s#&`Ju|r zr2+Z^gk%4v|0D_t6e#o6)jszL&g$%{BaL_rR16fK+v*`hOJpPBo327)eK#Q}>o>W; z88xQL0yB9~S*3IkKP}Je>HAzB%X*l(P)T)o^n8rxLo+=HIL z%o(LN`PIP83>gCjgI1`U7~bj zB7E;ZGspNn- zol((vgnQ9zWKD{lhLu$yJF(^&39W`Qy?vxTC+%|1C-0?W_{gropF+t zW5sIOVgA7qLHq3K8ArxdZhP9g#ug12xS^BY^0q3!_`7{r%Bg%{M%H_wE;66vxhV0| zuT|ZXk8-xM@sO$|{O=&!))v|AoQJ!y$wly5`3v_!7DHD->6A{$>_+0ZubHvr-rXR# zeYftw8c%jfy0yh>b;i3f&y8mmn0Aa5%}hN;Eza`{YaE7$tL5$0H>rx>yO$?V2&Pq) zuHC6VK>JX49;4r0%K)}c8tZf}z4i?27TZHLiZtlbuRH9o>Hamz9unLMRN3XiEo>T* zDh(fxBU*$Zn+RIP-f{L!qdVN*%3v{ z_ap=hs>GD}d67GOfM5V>O+))^ikjvH-I!tLV`a=r6HhkAv(*1_zmZM$={->D{!~Cf z@@j~^w4tCpmgUfG+`i_>+l;bIf30iC+Xl*{T(T0v@a?|bShHCRzG*2O#)&1@pv@Dc z=HM^c#SLJm)%y$K+{0aRLY#6*Q_>xl_0zijlF~~#cwr%S1y-6I{4HBT{OgLlpYv;? zKD^U5JApLRY>P3_;d7PzWSs41{w{Na{XD5&&#r~yDL~Ac{)=i-%~r` z#l16Ox27O?TxHBI+q9}vgzV3|;NZP2ywS*gSMRz>O_vT`7i^zhAhUvplD#LcdbpS` zhmTPH$emNj6a{l78Oz^i`D-*vUcpBr?t#vrhlD37%e-zKxJ#lw98ox=9 zm`PWfE`L6h<7zb++1LwCH&j=Q-Dktgtb+jY>3qWSFx*-(m|92o95P#X2?!c;(`7Z! zpX?xX)L0D~c9vkvrLy(t7c?4r$dB)yD2!ctFZ(-F%)+9UDWTjl3&(LJ^&bLR#{qLD zW5;Y;1m=uPtZZ{v`L5B8zP&&~hCRh}hQ;_<9Y@AuNX~RD1f8GI<;36doxLPf?YXF7 z*R1zIuH1#7hcWbx&wlHoD;5xSe1U9{HZzO%c@HYH+5UEf1%^gKkv%|x|PdqR?x5E{{Lx%r5u>Y zbqbAN`Lh3jPxNg-f%@j0P5CR?`F{?d^sj&drJy8t;SZd8CE-GFdFXyejK-g147^SP zR4qNe1Nw&@{_o@8uH~nsykPljgg`A$Hmb;BrP#B)(DA|Rgh4U9aTA0zcy;wo-+~PW{ zeD}Glb9whP-Y-wGeZX@a(#nSQ)cr)CHcB~|LEEaBv+J{`Nn3@5ZVpzYVgcwvh5+kB z%im5h39$>1q4|q54}Rvg5@`|!O=H7OLlN;hSq4bCRT1O}(hxHrBH5H!^@Q~o#WHZ` zKCt8>#>%}v%_jFj!XPzi*!qjDJR3*FrzAfW?rvVggZnK>a<@!=2};e{`EJI@~z^^V5?nZjr7`Y`1 z3c|g6H{%68(=0gj!Qi;T;x10*VukT|9q-V0ccV&Aeni6Ux}xGMoxm~P-+pmR3F&Kq zW~*MpvlIrn-Pb$O=*9w6W07Z!VCBTBGHr9x3)*%+>KM5zW;3q59>U zV!s1dLtNWeGtKWEg@hRMxN)ASI>bYaHM+45NT2c_@`4*|6sv7eMSdsyM((F-bG?rgvr-_nP$};$6-;}-255wmH@SB^@?nlG^>2BZX$tJ?eh4~*lSp1Zx z^&9S15dyC8Lk1t^gDYN6nr!5uMI;-$i6z|gS)l1YweXMehS7~oA`7)So=okhWf<0! z@VPBd=jWBXtvoSo^l~2ZdocPbEsY+#RmD@2B$4w4fuG{i#R?x(5thDfTIXVM(jo2; z@k2+)wDDwvd^`YG+`8$|xP1wbO5QXb&MJDGA1#ef)VX4kY)e>?8AWLfnE&8i;10Xm zVNKPJ4?5-6Uw$qYSSVV99$Flyf+SX(m0NHaU56MCQS?|)s46%%DT<83lSRPcujds- zA$w&syf%yDNe3>-YB8_16{1qtZ!)PeYXPNA|0EsH((}BGGF1=T(e-$iXAB! zZaSE32UBF+pknO_2tIbldnYz3=c8W>gx!!h^EPBY>4GJ1cXtv>9?0TB4nIxzSj1~%>X zj(dE*KzL{j$SPcG=%$8%5+QoXvT4hm^X+KKt6|IbuFDydeobsk#WmvJHC?Y;qUXeW z@>%R~+E2VnZYdS9^nOgtx?jREn_h$f;S}F%_{h;k_j8tM;HXZ!St;c56B( zUH_3qpa6)cE`#Qz{hH(j2qIthv zB@K(?ZY_Y$F1A4xo+}pD8!h32;O(gWU zpT{z*!T~Yncn2VR9E4Z0V==pZ2saDeO3{OfL77Mz3-L3ajs%8N%?6L+w`e}L17NW$ zT7q#_9k*TI8M~vj_q(|e$Mx>t0c(K$j`zv|GG#E$d-0a3(o%1#(R8kQM&QTvW-oei z+$o9pN(nwM#2?7=8>XmfjiOhc(>{LT6?oTXqy)2+KCl@Vc#+UxFjrZ*=`H2kqY(r> z@d9EtoPDD1yL(FtnekR$a9@`BX8oE2bSa$u_{UGl1#z~Y9C=$135cDxgmi9YqUVR^ z9%oqoLy<0M%(XfK-$)wXB0zxSvmbLinU7Uf8rkSzuo_<<1%1Exq|#pV&6E2aC#YoYkl-s?+G<rb?<1ib*9!%i38pSE*W&v#t}QWnc(I#iyGIrwC0G z|D%PsPkd5x?HLmvx$H*+Mf@6)mWKobEW&RyG&3fvOW8^GK5>Qz` zPVCXpk``3g&jFPsBM-ImD+xXlAZCy7TTo>5#2nYE<{zVd42tj7>nD*Mwkc>Q@mD6v z(p$&TqN3s+yzuD_M_nsbTxW@1+!0#wBp}TcucuVmCs48eWJ|y`~UL)GS&RO~8ZGWSxC1OUu4WxE2gv2U=ujc1+UINY7n7G~Jf&rm ztmAnziUwgTUn!zIoAefs#5t8xk5kFe>y5jd`wIzKvxSDx~q_nDfi7&`UWV(cT&w~mtn^$9_ph0m#x|c?fhKIUefxSBr1nA z&End9h$G8JhI1{1e~6(4tD{3AAaPIioVnzex^zZoIBTbl1oLzdk_~0$ns;1vc$3kI zddKsv40*H!UD64GOa-7bsY;$pu1f!ukLzB2A#r#z=^>UJ0$xxxcwrzJ;xPuO;o{#T zjyxNt)n#G&u4|{xs|8Oix+tNgAtbdRb_Nik3RLW#m%+L$7O7cE-*dzbuzb{Jt8+ok zq$F4Jw9I6V#BissYsj3+ROj|v4l2oS_Lram$3g*zGd{wr`B|r)5vt;6-}fgy84x=| zI}61SGs#=*0)Dz?=MTh>iq}n6FgeS<)+h5ilw@R<(C+Ak##sW)klH#bm)}B_|;~ zC6tMUi{EptjLUwoj9t0Y7dUi|AJV)i*ay4JGp^jO&K!Q+v9|-WT*00m?XK6a!F#D) z*NSV#gUQ6EgLq{PN==#?BSO^P^s%}gOdixe*XgT2dR>B*hB2#8QxD~=Wu*TAymzUw zhSRZxC^MOlQt`!Q z#IbBbVI_V;=S^XwCIt*G>k31{wT&z+8lJ>6Bzv~fja;x0h=ruWtp~oGAI?R6l6>N%! zTVQ{_!f%LMAW(Gt^qkkn4NfyDEZswEXCyw|G`-@1%kTS^BIrsiO12QjESL3+=e4zu zB5)Mz8YvHNLQ}&mywh?-~InHS{7S;kkx3!z!yfjEL1Ab4Jc63ykIE7 zAKNmY(Xxc_g+8`o4#bu=Gf=#k8_~3H3&us<%KjE$wqHR7x9rS=&#v%u_^9vQaT8=8 z`O)r)=nP=_2dRJYX+sS%P+X=?A**bj zafYhJ+&ENkafpHBX3>e+yJGsi@`Vr%Sz|`ccch8peKl07jSh*3dF30lY4($?XFU~{ z&qI^9{t0U&(qE8#rOF@BgXBay#4#&>k^>6h!tbQ_oiOn`*J^C}q`yeit_qp+n1iFY-d2;?+tp zi10f8Wvz#S)21zXa zw=uB9vdN3aeHYxdo9ryLn`T+ZAI13$d@=3;^g6d5!LzM}NC4z~psBNEihsDa7qmfcWhe z7Ul)X09Md|QE!3cP*iR2+B}OY{%BUN!7&$!@`d;hxojn#mc9Mz@eRK2@)5IMzcy4X zVI&C+svKG60%+cRJ$&B{_F?RM54fP~Vsag{i*VDtE^GiAc7d!O3U|2Z zmR1{yUJ~~H2kL*O<1dH+;_wD_;$H{v4#^=ZS+9jvoa9&cmXrIrh!kvz!N0D@-wbN_ z1Ar;hGkPiW_ZR=YNrPF9O%ht~w(`O%%{jz<6FuoM>8=Y~kGBS-T!~dsMMZ@jP*TQc z`TeGk?X*NPP}{P~o^$CJGyfdnYmf&0WAD@Qj`c4jTuQZpw*g=cBVl0~E7WGkW%WJB zou6SsXdRUrCEou!v!A?OfrLV+80jsMPmIb6TnumSsfkKi(0)H2mQ+;D9XS_`WLL z``8Tkah-HP7W0@6oSe1!TI)RZ12{h^bWNW8GG(=oc-P`GU;l{u=`?CH1OUl&T-&RE zf$jl5ud{Q^Kg^s2<5hsBc4mzEKh*Iok?(Ggm(O~62Z`u*OE4i-Jv8+f_oLKheBe4n zS#xdvUy8wB5#6IKS<+4XpDPoXD()co7&BtND*QFFU+Y~5`4;HYEj&t*Hj(Xw}K{258VDI&-w4hq@)CR)c*S= z9287S;p_~FnIxx2OZbLW*6MqycK?2VB|V53*hCkpn=RHviw_F&41o%4DO(etGt$B4 zKE&aZ7d{oH`M#*#&%wBGmgBl5ldbX1`X%&R3|I5elq18T$80w;&8-XUG4LQk_G356 z?;GF(0b4y_izho*KOIjV>`(0CBF2)6ii+~bW2?(u^HiGi%gqt*SzOjf=62vwM{F-n z_~)1pM^c3_)a9u^0RkX}`;BEmfWF zo}Nf{Q;|dv$znqEl6qks)maV zq$#1_`9Po!O-ps}@UhbR@Gi}v^YkV_E0j2*z8sGkUR6zc^>aV`?J*KhK!BK`$xUQo zJaTYpyzII8_YaG`XKfD63w-0Y(B9#NSHbHWJ{y$)^>ldZ=UDqqMPQrK5CK#a7H4SX zLA~FndbUVsNPI2ShWVw@I@a@MzNZiMI4I3iI_nkp$F=g{4r|gvk2-+*p6 zQ0%e;7z{o?Dx0~t9z?=bu46wo2F`<8(9p!i#?BqAFrWg~FrGh}4n)mFuL^(3RZgZK zqZS!>RBHZSc;I{!eR7tIxWNTif^MMOaCst6 z6?%-IB!$ z^_oBgcP4^-e_E{Vnwrz%kS31)nAIjHKm1;6;(yodzT~Tgx(Ky*)fIz>L=_#*SpYpVHp)mC`tZ7W8YKZWhG$oEfz-9lB4R_5*RuuWQ@ia7uN zUjQCJE%J}IJ7fZ80hM_;bD&hXq7^qh$YdvoX!a}JcAdAb9SmcpcZ_9P0@+;^c~shf z!SA?T_ZvC#dE#kMOhOwNek9AI=J>zOMiZ1)HqwCV@x zo|nc&ah?baK(l(ZVn>er_$Ry~HV0e*9(WAM0&Jp0j%&HrcDgykSX4XK(Ye*)n?QXm z@)D_U>ll1;@CK)mb>8`z5&(v&bgph($>IUcO%nmHNQUh8Fk_A0mB#Ma>RbsWfFLY} z2?rQ!g5YTXHTRzx)}~;#MASaib7Pcm00g5yU228f9T7JyHZqP2!XiIZ>dLNYZWl-D! zf#R$?e1?!;a>w%eVYdBM$8aWI!vMc(pJ3t=gK0W2@vuISJeBG(CR=xC4M8kjPWlJAo*el1E+#8HtNMqpx=D zZRjt-_++K}^y*)ej-us9Bm9?VWPQzKxp;`mkx}Bke59r}gZ0%-!gjTyPPisM->*Ko zCY)=-r8P605*eS{I-tI*OIhFz$mZ)j0Q628&D;Gg41^4uUi6BsU4bq{17e4$>rkb1 zlZrvrkDer6+ZzUv3QfvD30t9lRtRkAMFSG6lPCbzU{T95Qz5^yb8AxEX4ic~B(7xB zru%HJ!rSDI-bLjpy6!p4Rs1Nq}YU;cs<{bW=A_u zg-&?paL(?l&*u1AX@iMuiEK=e(bFV>$E8;N7A2wYgLo8xS3^qu&cP;(irzr(!;fg{ zqtZQ+J3>ezF-_x+O$Bo+#BqEI&n+{O`ZzSNvPJB($ zDBhFNF`Un*2Vdm@*`x)`NF(Y1#mhU&m~1QRjIePw^g7+}oE8q!M1sePD$*Ymzl75& zRi6|mh1%u;MYt^+`B7(SUS016cAOZN)kB1hRcWy8w;oMvi#IAtHm3RP-!=gBo@631=m_b$E={f z^EHT_)wOeVob^VcnLxY=`=czXN}drVKqR%>if-gYc!=ax` z&k~3yYNPNfp-__d$_f!b!Rn7W)$sh;+x+DvOlZ-ylwIS}DqD1dO6qT=z-u#J2J3nu0wiylYau_v`#`qM!li zD@zwYY@XahbM$91v;6dt|FDh!G17IabXnz07gjNqz@Tx5Cc(@bq#&BZVO#&WtUTyZ z>85+8Sr=_7Vm6rP>0+#kNQ*eCERT^cVB+z4K?q)(LJ~}xVv~&yVvRPBK1k<)iLg#n zh=pwglTFzA9UEB!It;eSE4MCitwl25TUFH3P%cGzpjX74oxMdF?1Q_5Nma!c8fi!!nJ?CIHVXz!)N z>5u!y&I`O^aui~>%6p$W$lgBZKiRA7^NW^)fx!;Yxu`zFYl@-?8=e%v4yi$(pLLpx z$z=%!@DM1%^ke>I2y!{Ngu&UncN3Y!D01=~>4+Otai~qxa$j$wBlwb1l9E7-MQyBm z?<%%JkbCe+SHBS}@i32d^tGfE{##4H%WVeyf=((@PWjyz2#>--#eD;6O$&`~JnhQ> z%BS?|kVYf-s87SzBPr6j1_e1y%fHZUmIdeJw>-O(H%XcU7l2TwA3`Lapf3?%!Dp<24}p3Cnl> z&PVmb@&6+3FT<+rw(oI#)1j1C$L(%m2-(%oGfX_1mHkw!vNx_6`)0j=?05&As;ds$GJbw3b+2oksc zcWgijr3U`XpsSeVKa=~vVf@nAD+>NSD6HRIvgUx2Gmx%nqx`4A|I}m(4B-)ieRa;q z3JV|JgX=v(IpBJYMTYbz|C0vmtwf4RZ!8my*(*PgmL9=hBvK7T{8{aK8KGo6mpdMB z^6Gj|wg!rgK=7_C0Y`U=sX&4TbQB2B3}$-@+bevr~~XS>hMy*5Sa>4a}Aee?P%L+D0v}_Mv5g% z0`%&iqaZkilash*vdM5B|7fL`rN0=+!e$C<_ufa-W!FtBCF8A87HcCzQLKLKWd!(~ zavA*`aj>0&KL6)cl=Qq50%Cu!hZFrmy`f(j{o?^2MuME)D+=8$FdxC_|M1Yje;EA#6H{QCxJk*^ZD4_q>Yl2)b<@5Q!AK;%ilM{R3dVGB(#PZ+E{QWOj zwmPK0fB1Xx^`2(5$KyLnG42JULNPep3x*)MKy29ShAGi?~}r^Qi<_xB;<*%AXhT8pZ@cFD4XU zh(s*tnZ;U2ZV&zr20WCMdl=D7itvb3*)54T4DrK1T}QnH z4PdWax9r~iPaM3|grz|lh5p0_5KDI@BzdIN%?(QOM~q251pjYoMWzL*#M84EO&cGt zYjn#00yX{<2eh9bup?Zb(6ax5yBvsl+X&fIRJ|8&hONz7fmf-MA7f{|aoO&1Jw_x6-#j?dD2Adk4oqYl4h zwEKl6enp00LnWKEb50ZXe;j@)t-UO#)7J}m_D?>snD&*%v6xRz87(!5m2b zHICQ&ECFc(Fl>15wPrwb*b$VN+5tV;5P0vV-auUEdJP=Gp-Q8Z0+dg^@b%D>98W1H z48qflA#((CZ&xkc{|UHVMZJWx9D-$7Ce%TAp?FWZq6!9rT%cxk7?^YP_ZD!xSs}YS z1!}*j;ANV0??>?UYArs@&Bx@PiVDvV^jjjmnI7Ar;xGNVGs(HbSDJ;zL-|b75Ujc} zI7}7h;g3EQul5mOyIY}-=a@=MD3q^MNH?dF%e}#fk3hD2_0KpYmpl9G?@K#`f-v7E z31Uh~WQZUbvPRK7mC0oGwiI$E#KS%LEwA03e`yYWf(cj*zO|vlN4!W84Dx1-f60k_CNhK9 zD~a8!ny$ku#9!ez&)!w@EXwDPRVpg!A!3fl6RhLiabi&-RJ?|bTAF=Zn$Oh29@sz1 zK;Ie4d(qL4;s&rr<9bGEF&DtpXs9hZUjhPUKqo_ zIT5^Bs~&6w(r{bZ;Unl%aFe{rrRALNEQJ{24@g;2`(Kj%7gzmjChJ|}OlEn-r%XuI zR$9LKqyh7ehSv#unqaL9@E3QhfT6>}Lk7L8Jt*G%ZFW}6W#hYJ8ZnmJ2pk5l9=^`U zuev1fxR_a3et_C<@<-L&dXEBTHjw%bVVKatiXw|7SZ95C^UPuU0&2X9VE#>fbq(Q< z5%z34qbrv7=bzj6ro8m$qH(fYS2t~YebtfdF%3HFQmx-)xYlrh-Oy6l?SpT!cb+Y?a07l)*m+2XnOUDDy(j{_oYg0c8CPcBWiBgbLkidp(~xi&JO#PZ>Erol%x-z=!3(_YJ^a z;SQBTwy=g3(rfp#Ok?MJiic}YP4qt*Mt5BkJKE@pgBRfog!kSVT;|@%PH=48XS+f^ zI{nvB;KiDiq@MFvVKY2 z7c2VT{>DtBV0?lr2Xh40T4KCaoQUI4nyCC4-s~uFsPHCAw~!Ab{BacJkrb~slaA9& z1^-T2P$*&<(aal>R<5iS>+pC8}uu#_i>1qQ=I5s^5#X|I$cQIS&6iM5%BA>teu|A=UVqZ-2NC=G!z+@7Iyk zG4mfLQFmSnrjxQXi?lQcDhgqmwguuAN=HVH8&U^o;rRVA;mPHsyetWGXiPM_LIwSB z8W;iU5L=BK)I!_wQR} zf?YT3V|^~#-$}@SHfzi;G+@`ILgV>=&IN3{XLm9U1wZq)ZrmO2H}2MLP}lLbxA)rgkj|3Quy>Yb?~ANO%DU zf%zq<5FYE`#Qy`Fhrr_MqM|^ z6c>QX%I@9*Z$4MU29tn#{%zT}r_>XcbxR_hJcK{{IeZ{x__uF>amrQ(1mFV*M(k59 zenDD{CSUuKZX+#ce-4$-UDM4W{08y42g_lvO`iP~0G;y)HhEJ~G>Texe%}%jG!tpN z{^1?=uRDtc)}V~yA3F^PuJyQ8KGd0AGNcbXvzG#%+(29_hQKVitua8q0|&4hln805?t%v zth~RwPWUte$lZ)&o&j-O?dS-5jSo0D0Q+W4lk0yN_k3fwT^8x-HdwxVYGm=>hH6d| zEKQu$Pgs`^Ti_;w)mQ)m`TYIEJ8O=G-Y3^r+qNhOW-UykBVubgywUkbf6Qvkhxi-z zQT^-F$p{KXMapmwHC(+f=!=V-4ainb>Q=slH+QQ2M`nddBe(aNUpY^yOIk~Pj8>K2 zF&oc$&B;&D8&+Li-Rjw7-?zxfNIeu{m3ai#*-gMLsTxWbPgswW5LKwA)jfSo!tJXs zmn)bh^>?b{wX2Y#b($j|o&~;Dc(cl$s%IheuVE0FPPv9Q{CovjVfrLk?6;z#5X`)} zaSP=6^1}WIM`zfzEg5M(v$tRxCPR{AE)r=q=>MwKBKc-%zNK%;yXF1qLTa)`E7xj6 zJwJ^6i=k=bg?oNrgdz;~>*m`z#|3e|HL#)f++(vDPPX4%`b_AnySnA6o<@RY+X7h$ zVBp*zp=7`z2e7Qwug`XS#8&8_bkmiV+b+FXS0|uBVGgv8=6U(KjBDb@N<|xdXOnX$ zT%fHi*oN~`)hj;G8HMY?QjeSq_91pba|2gxPNEsN&kSc<{Q5Y%CA6!rkB*L0CSNF5 zXH56oFyJ{`-d;BnS4i|Fa8v;;!{A>khHLTQr&a^}*9$qc6}z%p*X4YuXxsgqAJaSB>(6OK0ZK8uLU!x zvR2X&uo)kyzm3J->bK6_y>uJ*mtIg!Q9Yo@T|L6-53^<%Uv7}%KAajs5l3x^m)Qrg-qPdd zviA1S|9U9>dUK|#2xAdm2)}|GdH8paYxxbWq)_sKp^|k$iuk7F>R1jszi8iD6i`Q0 zI7icFRyT6)n@}g(xO&|JVy{PGjBvE&ywA%bB*m%yHlcKW-zDmVb8t;%{jk)Y&$%e<(hsNt@gN;aN& zx{$n=Z9?1pd(Fp^o68EmksrK3Nx+E@KSlUlu0Bt9ce30}F{fbnG@uu!wHoMJEJt&O zC?;CbxAlPylBsZZX45+-8@g2CcZVrX(?qgKPVtEaAC`0JRFa&g!-IKqovhoqv(;u+ zxWX1Emsji8zG?cjR_1pACOu8=ZnXMD;dSi-#SmO`h51A!=se(jdZR;po^tqa1qdR; zBi_9VRdLXWt$)b45YF0aDZV|0Mo4(^o@-=r#xV?gYU&wb@k0PS>;aofF-@z+SPDbX`Q$aNr8nETAS#%bXX8r*3}L2TTJPYEQ>C=lYhOcyt~twchBu zIWD+40#?CS?WMwMSW9kQg!Gg96A*UOU$7{5fJ0NIqU>$0=4ZYhvYD!V`|$VAu`rZQ!F>%v35M_DB7#pmYFdxsxJVz zO67+&R3iQxJ|a$=l=zKEs+h)mckQamyP6+rCA0Hxj%vj`Cr*G!GO2F*-58CK^<@u za$(&7Y zVeh+$Bd=5FI; zPK)j{Zs+WK=LW`_4X$z5ujcTF4COB$)KN2L+238`uIj;1o?nxS`XDu8$D4~KvuNj6 z&3D)8)O_^-^Y?Z?yigm%X5vGvHn`nhJjl%GBx>#Q(pD!L!GUDVir!t>L7qVTR}^PYlZ z?xW;yM@^?VKR2Btm#v=%qNH~~`~2Cx!yHIZ1~6{>xsEz1Z}&$56|(xcuae9?c75I& z<7&ZHh)9-eZB$MX_1Z1}ywJU4+YP9+YA=p2fR5&NzdLQ-JWsHr%Ca^agO=UAGX2_P z2=QFUtp}((I5s|{-`|fv6#h&8_m&Ip3Fpp3wtiDUs6Akee~y&Avtx1Yo}dSM=D{uk zye2$*Khy79`v+6^r}Ou&#lx2`6CHjJHYb}$QiaUzMOJ~y~_;(U1QjgsfV&q>?rP5{Q%dkD9Tl_FTYJl)**j2NtVRqH~_f@VI*PNSK z;g)qUFc_0?xf$xP*iqHJWQf|5u-ja7+ZpC}bDd|Axvy=M7(%OUfOs7DaW1r)D4NXo zU5yVPC*|G^Yu%dd&Fap^>{{JPY7Tw_K2u$8Q*&~wy39T|eth(RZv(tkmK)&r^~H&8 ze>SRrYXf1ki%!Q`BwJou_%khR`_8?lacHbB-W&NrsB~XSxT*>fZ;p7t%FK%_Z-=kI z#JRo{Wu>qP@8`dM<;Wt}*B(kjWD$7z@P%J698ik8?|gpq7<{`|5ya2ucz>^Zj2Fc1 z^&I8j5jGyQ9FLb6ihEG9I34Jb)zt!{p*b|2)9~ZhRkJ&AvNc^}eS2x5k9IXCSR^^l z+9cv@#W-HY-&{qj>e%bfvg{_g?4i%?bc%YFKIlz|{0j=CYA@Fl=3EMLi^O!#*apNR zsib#-S=w zREG^!IPh-kaoIP6Z9r)Dvs!fijKdBv;gcGN6p5n6k3)0j7Th?-KF*bv*=gBq8SJ8G zf?jS-!fw8_mzXvO!Y@`7rvffxqaWvh|HZckhp1W&DWs}>{c1D5OLg<8hv7K+JIB1! z2wE+%5de@KIA%1hSA9MNeo98XRH~e30BF@ga}7`x4kCOe`l;6{;1(`=3HnNFaFXvg zCQEduI>#!^L(ZJ#-9UV{UY_Ek^y}}TWx}ev?Zn-g% z+nkG>HV1a(*7L3=yVCFzXxlkC{O0+1`bOd zs6@)pa(0Z)Cw0|n>)IcM1@w+Xn#SE)sd^aGFZ6vq`;c}utzS>%cyyLgGQbm>dxKoZ zIu{Ru)g8k?&D)zzh=-g!xZ0&S+C@wF8gsw;gMm&7a5up+G1TC5Wt7F;rQ+Yi@=bpN zdvkTF8`y+1aszRVXVkB&zBwA!;TO0$@IemPWtCLX1vK3kDO2{S&{P7rumm>qJu~9o zcs5Nbbc6gG`K=P5(HTQDsKBRqB%D$OdlRktt&cTHWK$EWMeYkivVSCeDK0qT5R^Wa zLm%Skn-*|ughML7jyc7#nw9d_N@zItAYHWOxd-8$n_&+8aJzOmn(f(sUmenc)rSV( z1w2UASu>MHfL;hfmiA;pC06L!4Ge6NdYRZZffQM0@-*wkObsaZ*LdhE1vw4drC=_p zX!Hgf+>nT*A$1f<>$n;W(1>!VZR1JRDIrpWbR4s~>#teMy&AgRpJfAe+O=nhHjiO< z5?ZD`TJ^)PsFUP$wi2cvt-<<($nYp-#Pc2@pf^EL7r z91?$;1OO>=771Hu2zk1k5H60-7iyJ==wqqlLY^`y2+?_be`>CO8^nZ>;?FJ`L$6(- z1IdRvKX=)H{+jdXx)KICf^txn?MhY>VqYLG*%w4b+3$~H8$YaZH@gC%1*|9E4}m0s zct-a?W)%xp*4G8=^Ccam`JNT^reXRkFGvL>G&A1nyUBnRo@v4LIK)JJj@>%+^1B(> zUOXvgM(euXzXTyB-`;Lk3Eh8c-(ai-P*#x+B0T|_dOD?_V$C^5Zhd$-k_$>rM*_Gw z$pRip@5d)WdfY{$m<3HbIcA)nbiF>=r1TuC{nRy!o-#1q|BSR34ip15XKZ%d}P0-eKMFIiW z2Vd8xca<@N(FG|H2G0-|fQe`n6``;U*gbHcmOQ-w5ZMDEMqB0GvCy^O*di!Qm~}?$ zTNv-5qdBQ&lf|iB2^?WrFBEpv+gZJD;<=D-3XU|xKmSCY1o%QCEU#92yUf)Q?3Rrc zN(!U0fe@TNejmIP>4(+MT=kSWgRUcgbHV8EJRYioq2|41Hd;XtbChB~*1E9tVI;Bw zOp4SJh^e2wJ%Kc((rs$*x`f(-0d73*RSE%iJR9%>P}+^4EBq;p@bS^GoV72(a7q^h z2AaCrV!ix}PY_Q7lULSPQh?$u#Zyp_?4l|SQ;T)qgrlZTjpVB`;MkM!e%|4pt9l&^%ocmc_f zLbWL0!P1#>vq+?2Ohn|-ba7dk^MIU~OhAsen&{#Q4C@(q85G12T2j&P*6Pm>uRj|m>Z#yHzr}b9C(ppT$>>tyTm5Pu zu{_y=D6RL!{*~)Os&wG^W5aUnyzLg;eVOjuK8E<$tgWMety zcO0%wKc3MGbih2k!r;V7FFlgFEAW5wOy!d4Gc6_R@k0$dT9uwQYOh-_yI$NOk-qi8 zv|Pc$Ukc+4V11AIGPmO{1K=bwtdF*GHc=f8f#JCHadSMDy%6M$C`mH<0XePuqW<+6 ziCM%i{=s`?Y&4hF`ys1V&S%6FU(!VtQiYunRtCLMDB+X4BFPl(o%q8K)gch@dp}kQ z$KAl(P3+FGBm1wCG25L~zU^?W%LxgOz$^0H(2W)8>Xv8acb<|Yp@=iO9}q@{0>5** zK}hG9Db)Pian(nM>1T=wRobFe6U10@Si?7~WZ6)JyCccRPRAV-MX{AB0<0?3!mq4% zv{iRmMetHR82<0JqHTKICYbBtT^m7+ug3HhHr7=ZYmX z5OznwkG z+vTL_r9>{J{40LEdRHN2=dOb4*B5%~9xvsvABVUVkJr>U$Zr-yN z#jf{}vJmixPa@D_$-TLBr!d?fX6A?ACGd<7B_WgZ!nKR?ZGA_U^#XAHqKhhAt? zV0H3fIn1ab?;(%mO^cqDa^^eSA+?0vu=oSwWUAO~wX#N_r-=jI?JSu03cJ~UsJNrH zR?_znD3pVcB>=e(seA@Pd%5oO8zV+X;{yZZXBVhN(=qUK$gb9HaOFBBc|%-YdEf{0 z)@0kedJZ!zymVo+oU=@SI+M+GCx(7*t2s-57Or2m$#4lL~}GM@`jPl zE<&1Dacr=UNiX85W#?I^;2bn8tBl8LB6^;OhW{A+=ErZT6k%8&f)bSyzz5Y>R|qmov89WSQ>z z`-8>s&k`{j)xyL}i(L(6+x50S=SKJ26CDEeYFAlSU-mC7^hZeySy}}Kg1^)$bxv+s zwGwR$pA@YN0fs{4=5%HdB5;9F>{&GI{<`7`UVk?^h#pQP+clxqUWG6QLL{nl{HZD7zn^Y5%IU&z zZnjl*oNSKZ9F%M>p1-%{pB1^;Q7Y1r9@{n_&7rR~Kfx-t2n)j`N~Sb<+sr+EhXnDg zCMK8mM?W07K0ZngBVZ9@3(hegNa9Z*_O|!ll&Cl8w1O{L74c)4-yaK z3(*Ki;^|TAew8$N(;C|I?e&Q1RJN=&%+33)EHuxE3zO1*1CV6qxg~3RKot71k*fJV zeOxg2L6S;Pl@`aaijdUfP9m>+g+h zP@Gzu^hl56AF%wYg*zTj6CD`8;mY=45+DXn6WkM+?QD9F8l=1l$>gUEwz1b@yPX0e zl0$r^u=SW$a6LaWJ!%Bg$THoju za8>w0%vwJQlERYUx1pSpXt6THWHx(azsNA*#9Rbr1r_sQ;x?IUHQofIEJ1ffhy{fQBDe`ci1}^bSLuLqIGkRmO zG(+4J<*p;wjC(rnSJLt{B^`D-C8%Jw4`;`}DLoLv5&eFY z*)J%`5b@*wvt#pD-`|hEU?3A7Mretw%^DTw$B9PA7x$xlEdYSDC6-+|b_P0^aSU(KTm;7gwu_*X5x_}U#s~Mq) zsKHiOwCloG=Co`rTW2>@l{4_&k$r?j9~g7xSiUh2-6yP6`HZ=~2uDaaPL1Y?ie5Qn zRIK6P`RJ8Q$&rQhqK(1a4^50de}NuNl_LVxogG6M{)X&HCX{8(S6?^7`qGViRB8-y zQ?Nw%5YY%THqJ02V_)8mwu}4t>`uSV>0lpJJEjq}YOZ`gNmMHik2)js~zvt`4 zGB;%NDX8cva!R#o(flVFO-Ac{wUV^E169N(LmG8mJ)*<;dNKt@i=0=*!cfa5{QSPw z@!zjJVT*-$94ulQ*n|gmbw?}Vo%KeL!g7Q0Jv7WeLpw;$ogAk(0x8WaImH!+YDD#9 zwq~o#VnP~2<<~XWE66X8-;PdL#9wTl_mA{TZmyx-a=vAMwZ$2ts_Nirc7-$W<6~_oi=bn4NrG?o#ODJdB}{!AbsT^dgX-%-Ghvnh{(E=TA!8vnSOGx52R6p6>utIFwMn3-tvwqE{q z0j9ZEv8Nf+fp$eUC3aH&Np9t7cNe1lH2VEt$386!@Htk-zb#RaCj1h81L#g&ijEUa zZ&vq*#QnGF-y+)tAj4o|hb+FcNxLAx>UW7oK!7K`{`&pZf%HOi z7Qax1okSVFTbC$yb?Jk}otB7geetYZ@?svw;+@zlDPXL{7GgLcav0F_bN#yONxTHW zTU&HmO6&Ip6sU<4sPY}7+_@#!>2IP=c4uV0Scla{2tAy;`iJJd!-Sf!7Y~7F4cB#dAab#Z8n+pV8kLT;!M^Yyt($@sX0qw{si;p zu^8JP`fe~fN3WB9R}Y5`UObN9lJ^i0H474OpLC{OdGU(j{#NM-ZKYmdL1ZKMtU~1S zqE*q%M?kyuSY(A&p75$~B}^7OdxQZcB^K|oOjNrc54y;bAj)N--@KctbJTB_wyqC^ zY%XJrK5+f7<%Q)xJsJ+bePpoG|7bWbL8F{Zc^#L`b#cryV1^s{1lEl@Cs%+<=Ihi| ziS9x}p0IfQ?WO!~ekhMUu?={A>4kx(ao|CQWix_=9v%AA^-i@7uyH8mh8l#xXf!YM zPRpw7BASKDjS%`Gv|KstL8l$|zfIs{C?W2Q2@{t@=5BtixLfaY`300T7R=$F z&t_ixol`Sljw)b{YgAeN$geMuTaJy>wzV+{b`T8zO}9PnM*3K0GyZ%VScfURGo)M* zN5C$aPCE?T;vpop@JE;PI+hoZpAaEOJ1oHS6$s&5Sl}&LLkxowJcr}?5{Boa8%YuC zUyt6?(Z2GOfj&#s9S!1fv&eNC5aTbPlXr{LhjqWlbXOPg9Z8x6ON+^acO%ijq%9ER z{2i)OY{5dfXEg_bA77Z#43?j2aK2JI-)Y!zV@tcM+Wn*LNBVV-VJ!kKNHbpt47WeR z*ls)=7D17GvA{SlPQx(PfcIiRX&Uo|z%qP?S=vWW5jCsg3XXVoV5KIDGX_SB7!#fN zgP`=Y#b1rDcWp~Q!5n&@7SY-!bymS1_5|$5=PG8|NQ3Fz1sN&!gU?yBVf;EG9T}84m5ok)Cv}3NBp4AAd`E;?q zVkZEurb88s3e48r&MPqQ-x`lyLTP8DP1Vx1#6sah;WpQsz@DT$qU_1Vy6e5bzu7%e6AnZ9SlUy!9okoTZ zlMl|mVxKE`2iLL)owO3Iay~~A1`c?m^RXX6*S@^#oSWHJP(Jb3ZuBGqC;G!HqtgpR z#HaY}*A`}uy`kquGH{+S!OpfrH>_Kyv#ngzc3Q7=qyPND1#Pey3h&BFcR zwZOu_`(eoXFG0dk&r#%hPo9909hmno!#>a~3^liU6`&7aaE0WGn|QL`F>?;c@S=>* zY47*JTm{3KovjBwvL)_BCl)0ANG|Q*q~LEwg29!t{$lD|?GLBjjWrEz2g)4ETFKPv zCnsn&E7BafD&Tl3W1N&`7$(~rLiT2>k$Q0qk^{#tc>OtFDeWcO$=oVCoQ9LR?enY< zrV7+ql&e}5Ycrif38$7k12QZV*w>eX=u742hHRU_i@cSqN$WW6VyEVy<O{x7}Z>%P$@8L|TiJHC|vG*W9FE}Z+ zhmMX_f;QqD@oSxpsXHFhl5VXWIf}9A07n6xr%7As+(JPy^L||Dzg7g5OueiovYlpm zoLUQiE`ONwcG4xO7hA%ll65{_kAAJc5cp)Sjq5F^r)=8&9mhG0ZFr)x`WHm2r53+) z4NmUx)3xgj&WJKh~q+0Uh+xR^|Cl^wOyxRHpsJNFRxfH~1ssZr)s%Lx;-7 zazBY#(LPC}X7;|1Q=afK)~ay;(HsMJU9&wxkuASs%F4s{0?zEqqi`Q8X{GSbtg$jL zV9kCG_=cD%)2<4}EB#zs<^~gB=MfT+_>tHFt!|LYBF%M#G*sg#o4DKVBh&`sr3YyE z>);?P#y6iI_dA3%fc&a&xoyDYU42n?R-FYJ52NCdjCB?`twlkc1<-Fye>i_uIc$G2`p&;bdNF*XsZiU4$o0Lp+v!*w;y7eDTwqNu zDc#2hE=E^Jqp~I`vX}`%g7C}hKAhr>^ywBGc6(1Ahx^T8_8>x1frg_hLx-0VmnZue z=mmZ1(kO@O1CxT$+I}l)Sw6OjC|Y|nhy2R?A`l==6x|P@k^Sa`tJZ|)UrWX>7-FM- z&@m)QFH$@7%31bEQn)hu%H${<`T3?Sf>P#YD`T0Odh9|dNkk%9-qzdLUppHdJG9mv zXBf)GuK(bzmc)C!&>QhW@=3eIen>I5|6G^?eXS5Xbdb?%wD_iVucPlH8EkqK{tKBp z+6<2Gedf9&k+Nc|`rDBtp}FN8(LcMU`Sn{w_Sv1k(Q9|lbdA<7@dRjCjXE_HP$pA# z*MifK)_!rZ4Ng!^`)EtKpWWn^JJOIbC);P&_OSB#+BCtiQ9S-oMe?uxLm3^C z)|=3QYD6=ax(6t#)7jeQlXEB?&7Ksi3An6gc&}oMX1sM>58G-IaOpaCrz9MG@Gl*TvxC8Xl7q`Gi91A zkwh}O6D<{yxxvqm3@AM`hk<6_8Qrvh+N6TRm_B8Rg=pyT=j`g*k42*fKb$RunEL*3 zxy?x5f{!1#rrH7LwRmuJ-^?rByGrMiA*|Bx2jun+_uz}|v~zlwIJ?he`E%`CLZ4;T z9MzrSU4H19uG^_HQ`@9r8hqo!*$H+)`zOH{rm|oL+6T;_E%1YYF(sMn{w9|4{qhf; zdX0-oiS5<}McKYcR`EQz_j?S0HF2^%q3ejWJzWu$!KVwKo8CA6L$lW#A?Rb3XhQ4- zz^j=|lieztEvBqs`fq{pav$%FjYsZrfC*3tnE-!=yb{9^F|)`%qQ;M)ffr^ryee}X zY0cuYz^NeTUWeiW4$)IDPBwWvjHTWJkOt?N62*jNx_-d^}^TEYAOBw(_6WqWLd{UCzd>nf{JxnDKMOhauUFVHI`cU}=9i3WnN;Np_vM-UV zEP6vEZNG$Qt2a&-8c$^Pa~_ll?~AyEtgr4`l1Su7dXAro26o!*r)Z?`o-_QCUG>3m z7P!5wFX?g-O*9YS4 zCW%~ynB4Y3K&_+qc3{FqH9U{Z7JvDa!>VA_Imw&PZoL;Pb(;|eJw6BXmpI>O>KhlI z!JtKd$i?mXLL4Av$wIvHPa1x7_~Gjqt@nR7RVlogjC!r9vc;>Iq$~4-r^OI3W9wNb zwCqA)lqO4!gVqBe?z=Sn}3h0DB^( zUOQ+Pn*u`13%6})yW7(V7qj%ru5~_(Nk+33`EGnov95@iSZvwebZ5t%# z?~$qncWU8XDK2-zcy`M?*!{nb$P92HX1B=kW*y)=uYL6}%<9jz} zi_mNL*nZBH#|XQZ8NsK9+Jx z%^t`s4;BbhM2(=V&y`NQWGq73fM@Vd^yT9r;^5=W4nrfo`NFM&=!95Dr<9X&pkHos zeb|xD`?>+NW#VIuk<=tXvF**Z_mdcv8zzcTCts;r*;+g<`Ah5ZI^V&PKOOvFJY^H7 zo;9|nNPN26*=XDx`VjrCqs3 zi6UV<9W*;-3WnlD4MqxXHEhf*-^>QEQE5Ay>WfdnLaQP$J!G&%NgB`V9LRklA_=q1 zr&_4Rg%ybRrHM0@DCBocmMdvVxi$Xjc!97w1>BV@Xf3fu-Sz#2!uf-Vt0KF8M@6&^=UeMU}|6^ zld%dBG8tr?1Yj$tcg{CzxlSiKo4~+iw>#4fmOGB-dGi{oY?rAzK(VRW+CkXi z?~2}rpNjiU=ca=U1v_blP!LMsG?m9ZP~|A1J*~~tsdcd2-1ndjJ1D`<5V-+Hz>3=ECKp40&@a4%S}Pt+mBqeTl%BdBkR zGr~kO0bQ+AEw<1O%dQKUY>UoVcm@0CE6)UMpka;_sN0Pxkn1k zLRx|SfYn}2e)X9>!_i)(Qpb>d8rOJQ+igqHX*}GmQ+HI?I!G42G4El8q=>6tTzx_nAKZBG{TR7sW2q{=680P2_KE;6dP)J z485EB?;EA8UR>^bCfz1o=x|0Cv$gE-A#}zt>_ooYDW0QIa(zacu=hs0@s5PMvUMJd zJ=v+F_!1qBCP{L%rk$=8)H9$e5I`qZ%0VPkK+l=-_}&p6RnOJr1g23qDiUV!#lgCN z0D>?_zIVa%(kE`KU+=T?cR&X#a<~4Tx~0VgH&$_qjUoEVNJ&fZx#WK1eFH2TG+51G z?I$R+&lF&?v!sc70xk8mZN)m9GumYDz84Id=&wyb@l;MbaXm>7&=VpY*3&uNDI3TQ z?`O-BDhPfSYiC7cJs&&nRlnZ3Rg-DIJ-*2Tmr(*g`RiK~a#gk6#`wao02g14a-x?( zXOzE$4mzK#OERUbN3jaW)ss1h^dJc_L#aGXq=6efn#ogLcLF;TR7T9O2iAomI38oD zJuWwk)zbp--Ob-w9&G?SaN?oxPl5;&3=L`w@u+R0cC_wOqtsXe7Rk;#B@$+NA>6FlkHGV$+v5NeH{=^5bpu-I&G!=XVWp}p$D zi?h|oj#w(kTUApi)T;msy?+J+YuEGbeCBzo_`GJ+5FiOe3V683IfkmTBpZeXT#B}6 zVJ@~}U=DUjz;!-WpHHV)r4}^l&03ZpwH490>dBH`Z~}ytU@j>{tQ(3-B@=(SN5hz( z11RnKxHfP$m@{o+>r0|=bfjFGfxPU-n-;?4(rV{QWV=Od?&x%j;fEZX#UC zs6h`J(;@LPIB482yKj6yQ$Q*Z%-(if7|EE=*Ac{+A)_%T-L}t>qjA8Tyo&9yFLi`J zUy+aE;vECw*74tP6GS5`%aLMHC$s$}G**5)L8&Ey#D{e8cHJl7aJkc#yqY7Htmw<7m^zzf@gbLnP`urJ_eZrW?bQS+!h)4X(gcte$n#fAt-m~^or*~dNUu@Cee19kD5l@>f7`jlWAZOGa{k4pL3%A* z{$gIM+)%tg8%n9}&2b1l`OReT;oxhmahZ3$7If|Ws*CV+; z-_Pg&0^uKgsagLiB!1>{j!?#PAk79(r-b9YPcJmiNE74aexR8_H zeb9}(0Od}sN-}=&H9W<5#9HI-411#aG)`DrDCKY|!YxB%27(nz#|FL|%Cr7TOU={S zd;hQ(lDWZl!-E5$OG`uERi7t1Ty1B1$agGHC?NmK1snl`APBVz#Ahhfn*q4TYx2b* zL~MbE)Hq6rn*znmghV7%21G*hk-}2-?wi9T8uywAd7Hb#^QD)^*mgKsGD2x#?dc)| z^X+IZ;*(CzhJOt*Vvo(lUmJ1^91=-qQ^U6S6R;Wxozl`@J*T^@;D6Hgg18Y6%Bf}3j6kb2{S%jNJ`FaVO}b>MnNpZz zU67N_j{*4^aOhJWiKw)k$*h%zqUuu#&S2dY#Az1a7)|V8`V~LGPE7loi=u5eFM%IUkWP!aJ)WAIKR^(k z|2Ejt6-ixGKT6qPU0{M}Vg@~9g$~N?AKu(H2-;jcwh3g%)_8%b!EE|nSx)fF(+ht% z!>d_vy77o7VBE)y+~owEEn)Pr2x54$O~Wyf($|+~sLtc3E!j0AY?yl>5PPZVog~O* zaraB~I8~S=+HZ}@>&p6G!aQab_#Mf~Bj#ca*cJ}gxz$n?Cxd#&KDZjya~>zA0>9T9 z@f}Fdc3Ot1Yh{VveWAi$X@g}L#U9}%_Pj<()|ICL(|MEb&Jofu_@&erqr?eke`=*; z7g#G|-?Q7b38-AnGs&;pT)ETQhp2ksuY)YB_#Ut;DyKc{KfHL@NB;FPl0Z)>Y02Yk zf`yeENSbTA+5FXMk0u$WSL}X^zcm22z`667+3kuIciNHyxtenfoab5s;T{ty?xUpJ z9BX5cAyemDfJ2_VkPpk*N(PPc(l^SQ;Mk|Dt^8jS>u=`{pT8#^hle$7if#A9>W^h2 z+8)WS%2hAR;ubOPcumZU^c}0CXZz<$s4{6VF_&;(Mn5NIBScgD6TjT>43%qF$}NnX z5BAsz=5!F}|3}qZ2UWGcf1sNVrArz_y7SPjG>9PGodVL`AR(x9cY}0ycXu}k(%syb z^ZTBA=g$7aade2i*Lv6Ud=e@sFFvZ;dhtWV=Cm>kgOH_kux%tdU=FSK3F*`uh7S15 zbTZ6+X$edsd^m`Cvu9>cLi{FY%qMgIe39_)zNPo^O=H0yO8Tet=4-u#-00nfD-FsI z|D6vb$vhYJYxKqsH+IN=bNoHw_*r3&;VHmC@aIMxX2e1ZJyH;XTXYp3o$s@U(;9W~ z)mo#mE>hmO59}0*E_FN(5R-m#?&yHxfh}{nS?5M2%^GPc=mm2+|2fx2Ht)zdRcCS- zkFQ{7gvv*Bhjju{j+-V8>IDt-ws&N&BLZ0~WyRwuFc9?9*gj>HnZ`-$wAbBSx2Iv9 z3xoND-mHViFhbBW#Lf|vEf?vs62>09G4|4Ea46s}T6LIhr2AY*GuLyZ!R;I&(QZ1* zA7=j-bSv)hQ63OuFe?z^a;@fQs3w*2yw7Q&HALxW zq-tzk_qnq-!=(QgrQ^NTQO4)ang*|o?w)G%$KJAw?B?T^SOd37>Bj|ZnvcT7+byR( zns?+j=U)I>&VB5m)=uDr;rG&3Fj;S}TFOm0`|1Z)zZY|bJ|emql=5$_e)jVATg%!n z0u$LGz064nE!-!H?M`*vD<#^EUlOa@dD~n0j;*M?6yS@5aA7TkptS6BGi74<{S<<%^mS(@=OsFZa3r9))(UuA;bYgowP_FUy5ZZAkwtkX3~p zE}NVFuv$6TfqUZ&QvLVQe!w1Prl0IGCspB3J3m}zzV7J+wE@3&)is?_H=h7;c~ux$ z(kj%9k|hTgV4|iFK z{euR)U9|^H)6)AYTI~qQ=98ri#}HxLtpQXG?FNVO?5R_*gpb|LH64u4tm@cTjsBza zhJX`n|FKP^*kkNFGzr#SP(SBFkT=w?4i{B@aXeT>s<7H%z3F2!zn{m?qo8^LH(NG` zeFvy;E#l*xe>ga;-%X8TJUZqATX+Th_y>>KHHJ5nG(zLo4DA%c`l(Rq8rMex7qP9R z3w0tSfdYI<5r&2Qy^6NSD`>Q9Tn=VKKY4txQR_=0>CbY|VBW%nE;~#P!%C^UFXTZH z;3m;z4Jlku-w{3b-(`y^?Ewf#(c84$Zlux_)eInpck7w5cL=D9;~vQF66ivGq=V1@ zu6UnfUvz@LFulIdTLUS4Bc_@Q$m?E#VKNP;s$zB~~}@x}5m= z&IYD$$JQVV9NY?5d;zjK+x-)j(2;2f4>D>%1RaKK3uGUaB5vXkMux^_y$U0_@(FRt zUAX-R$ZN0O&H|%Ad<6QXnF|VO4OuL?BhTynin6gKHnhhBBEJapdbm8lx49=^Waa{KUQE(Xl9Y8NfSiy z_V2Js{I=I5eE-esd)AtG*VSzJCfL7e6|tL19h0+$m4qQWJwI3N>^p`g?O;H^=rPmu zR)4B?p=)2-#J#~67ABF&tt?)33P91x%@6b*gXxQm$ zBFl>>E#4nH4LV2Dr!>WDlCBEGBV`|BL&ubh^Tl0RUd{dQ`&t01M>cvdbp3~xa zAHuV2`SZ-~=YKN=5yDjr4A@;%Q5!B^W{(W;)5+0Nhd#VVK_-1y4N??*uU)HT~)e;Ks)zntdFH;sU?<6iSgr1dJdo#92|2yaZzDoqmd^pSjWaH1;nlRmy8(U^Kcn`DpSE&RZ z+JdD`Zq1|=Jhce^cMyY*N|}4DW>-E?lj-;W(8d3Fu>b#`!c<=YLy*v?6<7WL{=c>s zTQcvbNfbT=o&gOa8jTC9&Jk7wLgL~>-XEiTz*0P}U5si({E7A+xZUHlvqoeoL&6Au z!hG>~Z=~F1Hg9e9-YPZ>3<`GOOOwzqh5YX;w`KJB0yAIVO#i4`D@a0$SIqigf=}KA zK)_t!Ky??04JIW?cWl}EC3Dr_oDQ=_-KcELRsP6etPKc>pP`AIe?#h;!lSv-&BxLu z9E$dYZ5x=b@me#LnJ5;wnM?o2K0T$2?RcE5iBQo;Os>J`CH)!Cl1QLV^XPbu&&KVt zsBH>CV6*fv7M`wnj|{h+E+S>Yr%jmWMfyl6E;Z%-!2QX#7 zFisqDY9F+I>^5JoL{SwnYSzTR+`MvXCOT^@GoPC7=G+e$s`NYI-4l!>?vr%)%x_w* z(m<)*Lb2|%v0Lbfiute0*5VcPfHi11WJF2!DImdQFH)VIK1N>?w&e$b0}4Cd&-c1Z zv8GcGBn(j<6`<=dj+TjK(Vk3`56eWt9~3?&fx?r0l}L{!DvBCEqom2h#oE8=g98y) zzp6WzUvBxAS06X32Z#l(c)VmA{;@f$Yr_IyXyvvZ=DG8Tb%C$xdtfA-UA6+tNn5h_ z)2(@-QjHb9{l$21JQ2@VxAmz;hrLgUJ+EFT2UIhzcsjAtsht!z3nl}fmk!zWpSaV3 zkVIZZPb;^Yt`)q{Pqk&-r$^Kqs!et6cfs9AjS61+z@}RIXxb4F(q2gFFsuK_OjpYO zH3G@K<|!C-*Y75*dOr^{s8@86Hm#0!?h`o)QP3x27#~`a=#)*h)-}<(>Y{O>O$@KaL5cSmv5AWrreL36ek1EqY{lcOa z!MjiM`11*GBG~v1LM0Io zaMb>bgL#<^&!cVoWyje>U^y0BwFX(taknR(Xz#yWeQ~Fbp;zSq(QvGry$Z_gCBXfd z0H8$+;DsyRHPWd%tb$R8!n84Jb<2n z*7{jv)ctbb8bo+_4kMi8gB<*w>?nCQ@8@P2PI|L@@Ou(zM11&m5{#>yc{jHO*dkVX z6%U#nV#C)aB=M2sf~lYzzBI~oj`m1p#O!qpy>qpnZtGJ+C$M6+nd}koQGlADq}yZf zXG<3+@QxMYbC^ZmBDyZQY@*y9{A2U90wh?Y?pG%xqoshs5xEl)=>IFOu65Vh^mxTL zz$e@ZWGeASquna`l0V;0q@sezdxi0`F#QD#y)2CXW(A)&1eCz{U`Eg zfMv6QV#vi;W7#7>FTw%ZA6pZ6zU^^nTvfO<~^m5_sTh@_4BkH`B?6@aXNxVH83e-+%d%U%uGejM7{S#; zzbVLxVIxhkZh3xkhlvE2vn2j#usQx<9P?~VWw$E`uEj|5{tPnM6b)#_ez#}9N+-R_E!F-EY;2jQ z8dY_wEBLB=Z+VWz2Hug%=zEU#5OSaB6Wc%Dqm_xgu$5;`Tm#AM{~URw3O(Zw+*?dr zxR@V=q^yEe$hVL7xhON60IAMBmfYDAAC0|s&UBWvjaVT)uHrzk#S3OYsL0t^?6oW>aJUr0@q%94Q9SvB&DLrW7EoK6ium(51YL9D zM1MbzB9eb1*-=I0{O+$!JZL2N1ba-p101i@G^E~51DYthl_n*wx7%@?9)L$e4uz00 zXg5rG-0kHNtN@Mjci`R1Z1cVllM*lSwZ__jbx>`nTLDlzF29wi~IZrG}4Jl<%wP{G6ukKl-jy` z>OtB?Rb)B|8Y!ULt?dB^==NkQ9Zi$r3PP*gO%CSD2MC4V!bPmev9{u^=Uw(u;|7R8N#`*Q zBA-E*Eo&A7W%@MrM&phYoIN2&zRj0b&&U}qmeLFU)iPJE;!zlAG-6)y#U~(u-PiiT ztSiWQEq*UJKvX;wB~(R6;%bymvNsD%6j3c-Tl|E($`9#3@3X;K?^%L@ONne{=jXsp zrgf79q~nEyI=@q07&$Su1@m383E?5Bp(u2T zJ6vPgBwQF-V1i^k_R$;yXIkE-!)A|ME)WB0keFv)2yBQ{@#(hACF8y-*T2P#pdNH5 zT=lLxzXvo>#$$rl5E@AZ_gU-nLMky(QTwwtf6R->$Y7K~&8#LQ>6Cy^ou}mhGu2>#C`XVBX64>vEKB`m zoXB^A>iCt0ezZnOznj9>n+iif0`37E66KT32?UV2LZ&zDNdON5er%{mbD9JfI|5&s zb6dZ6hiZ?g$&$i+cLt4JtlR2a%ZtV>;^igXQwqqLB3wD}*$JQ!3~jkx9hft#J}om(CHkL z1ggLa#2V+px<(1u%}Z!R?Se!Dafk;rY^Ij*U|G3LBDk^IxNX-c#AA+!zat-vub$Lm z7!z4^EPFeprbnQYNkoz?U%En$0tKFHS2TX_jS-1&!oGd|aZpHNOci3$`<;+_)S#uu ze>baZp7#+?P~&Vju#}J{X15VM)w0hSZYht?<&5O{w02G#jTm}3w()~6|E^}78%OiR zv!w&YP`(MdhEBLD;&<8|Mxo2SNmUW`4)l!@e2Xd9&NRGZ+?{#wDQo+8?qCb!df2n} zGV44fKgC;)e;WE$ZvMAN%i(&NmDH$-9g;pCeK3(z^a1?Jg?=rTvtBigEB(79i`Q09 zArbnUw2HhP38TSik++71;pHc1z@_%xa!mv#eU1RdQ0h!D)^_NB}wP2$(BEon*YmeRy<4J`%#s0Vs}))N0mPD8!)o}%E!98ML;n%g95)UoY4+XY zA42YddLgeLCJGNWG>oaCkKR&u;FL{j-3Z{LZ?msHvH0A9>!%$C#fmFd0x{1E+7=kl zMKOvd9;oD(U`EP6$=-)y(-w;#Fl?mTb2g+LaOBZU>Mnj$m>3r>S*78e)G{k z=qF{B9R&Gdhc7&9sot>YzU%(}Serz$le2@-^Hm8!QOf%pK=yq7NJqXCoW$k1>B|B8 z37)_*{y+rKY8b-1P}45=8)26g6?IngmTt^e2Nv0g-qEH=a>P}XEHW^{fgr?SEO+~6 zLZfct+8dz7|!bY0$Kv8M@=vk4d1f8%Ene)!E7MB<# z%%#eyHgo(k`2BbM1f~piY(nbL@gU_mPbc?|NC;hy3sNc?h+MRTu!-~>>m~ZP4^uvvoI)aE;=uM@YpLPnj3tmsUMb(>-LmyO z!U}((wOFk@Wj~kHMnEB;IiAwCh&34uJ+rc2TKmagxzER_rt0izYfGjH#p!`)jQg_| z#)2)Zkx3V<#J*x7Fdo>I9f{iCKKDzI;A7V|925rp_^yKZBscR-H~8-Iue;sz?XHdX z`~|dA+DW-}NTi1uNWSLkYupig<4x-U0VKJN`H{V^`A^CKMHNJhkGrFhx7%gcUs`T+ zqkRZ0o&2RDpO++o4`q0`B9$ZJ-80??whc+& zfU(S>d;0E(oO$XXBi*OQepeo4cl3xmFn!Ov<%;OzcXyy1i>^5O{5=-*oF{?sGs%Eu zc^$!IH4-AbE|Qix#FKFauHPud%=FY&*&p3)D6k?p3Vyt z;$Pu;oWp#C`i>+7g_%yN-;bsGOAetmy2#!1hIQ9!U~b+g(3mt!R=y`)8T?< z*5Jyfzcs*x>We+AB&BBIZs7LOW!()IXxd}YJ+M-pB}WFl8IUL z0e$36)<-&5iT-2ZF8#|`Jfy;7ghbng*A*jlzdmmYjX8eZeo@mJ2_gV@B85D=W|*D^ zy{0c`2)9zR8z-lN*;snUhjo9_wH{1ViQ8|?FK%igT*ibKH#OwF?6)p{q!Wv|vUT$S zSC#QoO<%InUgldhUC36N(DPmBoVS|n)d<#Oz{hoj zNQNIlJCLH2Sf++QqNhUWO4qge{gKbeYCmfj2-E6GspH}!)!O6SizPGd{BBiyls4{e z20KQfQH4w9N7mZ?r|>ID^UD(vfCNe|3kv85D4FROzR@D{1+Ev*VhXa$W98+V3F@=8 zb4K`N02OS0J*vJJOQ30J)ZPm<_!y+dj_3h>E5fMxA#;XChNejT_2$xc>mkqmG?_PArf!yE zzr(NNvt`Q3aB%C6Bqkr96kp3!2e2euYlo_9$n?W#`-;plt5qt>Al_lgD<*E;`x>|) zs2YGk@lNx~Zu0>7t3+6*KBGOm1Msl_WgXCscBN1kRNTSdQK-86gYYo+g@BUE9Av#J z4$I0?cJ@#uQe{_m-S1KbIeL|{;kqPh`jTrS_#fc?#Amh z?^P;0+PL#6l4;5U7WG1}r*P3wte{EQMuz5H<#iz$tB@t%3fvE|@5q+_b$k(q z*_zqvjU=lVvf8p#$Hv$uSG;xDY}E%eow=aNX@YMM9BQEZ)d6|yQ3Ju>=QYqZonR3Y zP9pH>aU>n5GuR;*mADJT@UtHvjg3h}5AyB1(rAuNtCT34+o|NA~RD*tgbS89}-9|UVo&oidejWmOl+TWf? z68#_N&TEZ>i4$t$W$rHa7)8D1;QIeW9XJG}<%DOy>^}?;6{4Pv?Bvs4G=w7%Jm?(F zGZ%YWR3~yX>Ww<-8ecWEchH6#Gr`{C=Nddr8*h~HezHJGf^*p5LBtIIJ#i9uRMeZLZ92O6xEs6wj(#iR*SZJDaxEtD@U41`wFoLDH zv2jjLF>`JG{P`jH`G$G&JJ`2mMtsVD21{GmQjASe{)aZ)C&3hJK}|r2${x4>S+!bP z{oGe|MgpD%r}kso9`WI`Z0Mqoxd;c*UuU(Q!^FB(3cG!F+W9ufVw~ZMp3ytr$TN*E zl#Td?T!V&Nmqnw^c&uT0+4#Hbv#$`UmuTX`B#63~9nK z!Nu!;{1J3|JKLZ|-++ z{bG#(gv(6jB#`k7R5wFfn&*|er%4UNAhT=*@CN3NqJ2auVK|eBuUKHs218(#h5NOa zWP2M>#6x;zFS4!P!92@Erto;ZOUg$-BZKFV$PiFa{#Z8;j4 zyL(8Zsa~_Q-SK!Bd7KW7EuG%?A?}2IKv|@7-aAW&n6m~CdH2P3K#HHNL&o9ttPJ5> zL4|24s$$P?Jau`@OQCy@M>}ixtZ*@J-c&dZPS5)uSj!%;4<0(^5pBG8ed>BOL)*t! zB?SQ6gcIj7bC9XjYcGse!+5Qvrqfk@c$rtJuYYdE;HfcLr>17bpS3KOGRH;u%!59P zFL9`{NN0sQBYqZqr3*Oq?%>Y~D1K-cXu>KoB0BT7?y`;^x4rPL0mHOr2J7^Qwy7l* z8J8PTy?&00Jtlz4#u)->V)4)PRegSk$T|p%@9TXknw|J^A{MSCd)3+xqO~t?%GaRY zY~`)FZ0MB9KehA_N%npT2H3zWV0vI<-%R=7NyGy@r)}O}L5x7s_OC9J zK*Wl|bg4{}eMZX>?3^Fu{s@*;Gu8{@A;0Z1Ho&G4j`Of8Ah#=wx#gW8X3ad=@_2}|$^UVzC-*)HbkO!E{*xcX;fSN~RX$t? z%5$#=a5%nY9@25~R*&)OxXCv*eLLj*u)Y)~jmTQuLT!O{%MzJ>^hNGt=1xkgi{idm z%Lxp+6_~-K!h9tF#2Iu(qfR&qWZ9h8XMEk&_~)pMz}XnPef_9+Y@h9|w;zyh5>f^f zjrlxN0IxUqry#Z)8~jH`!`@FoDywEN_uH&@ooaep!hY31ij7ux7bNc2N5;sw3^W>i zwSNow7EBRy3(z;Au-IzYjK~HI%RvWa7%wr+VLB;Xi*^AZx;3}pw~~}tf;t*^1z0igrAwOdC>iN2Kd}z? zZU4TMv9!+DkXtpP!k@cn6){^KaIatsRyNlnBZWI?j!j+Fi*cv!3h7HP7S`qe_0E!@ z>Me|c1omaf@Cun+W~{-ge8Yh>g#J{RV5(Ssos>Bj=Y~Y&9BL9-kl(*CXUs|E4+cvV z(mm*HS}^?)RPoS;F^f(gE5Z`gP^@`x$7jMi1e$S1r!rwg7%58Sy zA)A$i-72e}l#pOf>cj-cs1y=1+iPcw^62eP59X{J)-Yg`B6B22T*a+>q4Wi#teF+h zjX9B~?W(kJ5zEeQF7`I~SzQI}4gy2D=o|9`H_Ix(Wn_TBFTV(-cbRL3UXJYNwq`Mp zk(0Z0JkOrXMo^sA==4Hnt`N1&2I*?TAL1v=Q@t(HW;d&q!e01i#@8g1W1g017*Dyc zJj7-!y+tL3IFqIKTdlFRlSNX|-=|&3vEopZt(z-wgyahcNXdgVvJL3K`_>?YTyYQf z_pF|LEY*wNIY&#k?ljz$&la1~U;krJWl@Q+h2>1xXcSlGJZG`Ua==|{@`VVYrP=** zf|&9*E}wzf!_BElu@B|k-sT(IWUV-0qp_ALziz%-u*7-IqaYhcrhN4AkeKNS2v$`N zm@wHHR9@d*{WYC=MW(FT_Qr=mhViIT;X!I>TxKX|?)I&szzZ*>qpu1zHJbFw%pZ$m13aWOl37s|O#@UY5D_d3&JvYL-?#_@yq!JOvGBeC4hl95X%bj zB22G30+DGUVr%i`hQk-Vq^e5!oEwX9ZT=-xV3%{N?k3@~ijU)Bm(QH4Y>ihCxy?Dk zy{O-e;A`4nTl!(mGmnb4);~d9yZS}pU$6BV*#-oN>+{;iiC{E;z*Bb}ypBm3PK)of z^N6SO@j@4PVwjBvIYqd=8A+SG@*`hk1p4R$YCsl$;Aa``;({*>fp#>fkAT_hH&EBH zgS0T&y;~*rz}p#05F-}6_p2N-SIO6<^ZaL!g~^}3qla=R?4*Lq z!?NynVsY-!-(5!!n_q-zw-*~!6RMHe(8?vJ&aHE7*N)$^KnYAe_R@S?G;?Iil@{(n?Z;GcJz^uUS%fk8Xb$%u+h3TR82ekNr z5qzMEedm^GNuW-+2NsChuGK28^ez z8fc3en^hA0d?a2xR6_<=iHscwY4YjFL68Id#$`ZoSuNTS;y1F&_tckov%D>gx#qx$OB~ zUQu^Wr>SC8Ha7FL8>W!eihKbyQ^lq*?)J<3s&+USMXiuqHgRMSjbBr`$nswnK*DAb z72n4Sw-4&WFbAQ?F2)F6b-XW9%l#1Q#m%PDdkVep@o*Q@`x{-vliyhl zdlCVPVhPk)wFv~Ag-MB=hKWD(Qk)7CcDsapyhI_S>bZwN^4z>%)YG^gGKXc_#x4_O`?WK5hy+6I!1bpB@ zXWpO{#-GoMREtqY2JQd0M95-u6=$ zbtUIj-kF3A!~sIKHyWsCkk$TE&*%m?uQWK)Lt!8%f+I^;uKw4l$4K)0&YWT7bk#|) zb-5`}0F|=#*XqvA4Tvt;4|ep9F+US+~@MuSNlLhBgdR={dX~B*&cPg#=o!k|E?qzipK*A&ihiZT z-eA>gHRwbbtd)sF5WBC%bG=5|1ewu2cMQP%pLY;`TNSU)pX-cu$}9bBC@XX#j9?F* z3^dFwKLWEs48g(o3_Ss+AKEf(M@1XLst9@>Ki1+Qb zDqGFsz)gS7>IWEj{$j(ySR}o%Y$+^acA#S#1S-26V6u<90}7iF-oFf-oc(D+R}3#! z^k&?lxt_<2V7DZo=y~hNEjsa>eG4Z4`r#g+<=R>gM#yKzyz2o9C#)xjGY@n(3>fP0 zW>AbIa=zMh>pod)BUI44o2W3?B(7p+q$fJ+M*^q;4zCB;(rAhZZmr#9anFShvRskq z)QSSuci<>W1heg)aS~U_5Qxg*bo_gC%9>DoTmdBkj+>#|3Yh(I2~z8tFUxN|y?>72 zb3FrHt8|sT#(r`$5ZB-vMp|kfJ{sAFe($_N-85I@>0c3x2B)0Feq)wex&3QTgTdIe zVUTuE`}rgyp1~R05^7_2z|k+_66i1qAC!Vl{auMkvV4`AZg>p1>eO~wl3eHR|Ga2C zy_Ur8;iTFRq$N?b&gp$SnEJ@-}Ga@1G4nU@#CH~X|09EtO zN>l`cL-IUyl~X9g6AIFUL1qM!AE4P%N})MUGf&{4^1zL<&;4e_X0A+8xX6u;`)+W9 z=tnpycIcMHzrF=`*tnkD=vsIt& z==*U5)$}3`6h`!c)#`IwQLF6r(Q@?hySI#@kL$s`E+1A-T>gy3*Xw0AoG^Ud9XX6~ z2W;SkeDGv zF+V;7@#Ci!>-mal6U#rV7p)+wfg^y>5(di`bPsB|#B3ipG9+Pm?8sTgyH%iD zWAIt0*q3w$QKHCC*mTNc087+3M|paYP#~9pT!9!t0nmQ{b^+L|Lr1yCv)xE@Vwkh_ zMKlQf39Qce?t3U+#8r0;=QIwR71<4398!692RwyjA(*9i z6hrNA+<_uA_2f&jpK3M~StuJbE7O>~0h2Et*z4H`!FL7L|6HWy6 zM&REF*VKX33b-yl2^)Ytb4Akd%c;oExqqtxOoH_88k&$6d3rC={QM>9VPy^KRbNw2 z(;w2m04vCe_|D?Ra-CT(e(yRf@ce+8R6^3Zq6+Y&X|G36oRkDe_C04WbyR}%%_Ot;Bjv4yNYpL+zymZ1^@bg zFSa083kwCL_)fVVw`Uci4ZR$=C?t(CByEWFRi}vUmq(4q+lZ(lTQ&#=Kw>1xhBxLDEs?a2IyA3-5E_A%|WCt zm1^t_3sN`N9<~OZ0*I%J;&qmPPG8%e00HUb&FOyTKhWObMvRON8|CJr(Zj)C^Lvsc z4bTM|%WIAT&_KxOwRvU6Ja;fL`7AZqF#{?Tu=Q?1C%8b%yvKe2m!CGx)mqH=7d-Nu zkq6B#y}-AQc$`cByheKo@^5_k{flr8$mM>_RGI*PUq3lU}{{C|Pst=7r}7!f#PSyg_TU4U;gIedx?1o&So$gDi?$hN{+>uV)d+Zv5G4kCM`)DCF%$2w+>3Z;w_kuuB@?;qVb8p zK+LiN-399cD*YnyhgUrBq)HVQA3An^(p+}sVG8YFaq#;I%VoBN?Yx2Pfggl_7^~+m zs6CFsixd*nhT;-dJJSLWna9%?9l7bWUO5@RR@o@(a z>L|Faq-;+q7^mmcrUX2w{0GAn9>#D|E>dB!)|mp7-S`7f*f}^eW6)~@!Uh%!9dX`; z@~YkqR_s1q)9C{eVg*>*ZOM3k9QqK%Uv}SJ-c`)2veS%xL6{)wJK7z1L5slHZA&D4 z9uU(3M5hb{7`pq?ODF4*g*)OPNTg4t^L^FV;xf$+hYn2o-J8n3IObC1-r}uOL(|tX zONMZj!12x{Nu<`YnPPbN%S-}ksNxeP>{}Tiy!R;NO0H(xW7(zm;|MP3;1ylD+Q)c9U9X7Co}B8c!`1#{Ub=b zz6&*x_S(}#%#_H#N|0OIb)0%~2FmIpr+y&Swc=XAkfWl9VD_DhOm;!%2QX^i6~^Pt zOg)8`rG&dsIVmBhGHc*?7;1EA`PaiUlJ3B?=mJ1_k7M){oV6+$D`&6dU|SR)b9c8w zW@dr952X7hm=lg`oMQR$6Yz>aI~;*VCm)pa*V}UOEaX!eOzAKLqCTGSe8Z^%nvl>N z4XO6C7nf`eo^6i;#M2A;Y7d-Ii6Hf5Gct3xf9I!XK^`NG;wrFQ6hkYCoRe`_vY>Q` ze|PQ!&oxQ{uG+BMP0k%)ySgBVR%Clp_?x*V$y@GsCG5`9J&ewy|Ce*NwhIv*1(Svz(7l*EqJyfp z-L1~D79B&jj>0?OJmUZGKuN{sRl#>i{zjz95VXc0yf5EvvS+vzV)MV9=O%iDZWdZf zU1U4tlo+yRD2K++F1vraX>VAcgs!meL6iBh(Hwk=#bz~!E?4_?TUy=D>-|vnfZP5b zu1IjS29h8{^Z6Jl(kiZ;PGr%cc|JexaI7Nn%RD>#ZwC)&c|W&g5Ant7jKi*v^NhtLs`les^s#58%8^*Y{&4 zsp1D_b{m_&;VV2qN2m(HD#4S()zVDA zH!)GGlO0KVNJICzXJ>q;DPk9f51r(b*Jv#>cIkH!-)NWkMNXR)ExF;bEkaI6G64W~u%=vp(%6fuo5zh-kOzf3J-U&xbFTn=D zbAp1h09<4)F=kOnT8cc=2g10wQl4Lfm{Oj{T>17tCez|gi%ydiY^d?P!Ub{T2 zu6MIYN(?9kO5g@1_ES)qC^DmV=R42lY#da@R1g9Pf*-n?t(qHG^RP|Z-KW&I+1B#4i-u1hrZl(ETg+zb&!=H zC_NZWJw7x}q&S4n*2f_hlKVKnEU3aAc^P?#{89h65mFS_1Gk!7gr2rlQ+_Y-k}40< zyd}^w6B0I4dC{^%!Czf<*V;Tj>q4<_zz8e;m1zRSrUhJ>RQl*0SZ#7|0gC|^&#q{6 zgn{@aHS(n0@EVvW`-cED+yKpF?|Y=YRqpogpY|UxJfS2iZx}iPIx#i4%!>6JU^9%x zpM}e6(oxAGSB=mSR?IA46LRsRs40O01OL?|vUPfRgdH^gF-4p8a~OoiejkDXLy}6= zj=xg{SuN^_uae?0zc0VYQHab-v4x~oVTKVymT3#`)S8_&GyDAsE zgf+3-Mm&WG^7+8ZZr@}WvrEfn6)G2GE+}P#r&YjdwBWCNq7_yBWAeM#K1Kk17ho6K zhp)qq$voK%XFrn|{!;346E^UZA}>O_$kEWVdA{#-X8_o`ay)A7&US?a_TG9$f3?y9 zIi3FkbiRo&?JTOf|2gH_JE35aI56kt`wFWKf4xM+SyI^15yVK2($Tdjov44T`ZI5V ze3AA{L`7dTooRyg56KSwLF`0>qa_9nsR}|twPmkUW@r*w6b!3&TY$kr6>Wob5T5=Jck{k3IR;iwO|9Ve_66_zWY5=)+2Bw)|pm^vTT zd_(^#0A>pc2IxwECLpKkp?(0)j8I?o)BRsmQ9r@}YV)mLMaq%p;pv`K^1yq#YQ!QA zHtJaB`58zn=?L)9yv!b^$sn}%BY*}oU42e8khFfhIIeWWG& zA+4f0joSQ&@l{XdZ0sm7B;o3cO=7RLwPK&c zUy}{-COq%y0$J!}xd`_$-UH726s3-xJEQH7E60`^Mj%$?hxl*Qxg_o-Uw# zugQ62*)+0MAh~iz$MR6^n7oI_@Vleg&B;$TJoMO&a2^`Cb_z2cuhVdNTuXccPQCAK zQ*vIGl=cKrG@`;oLXRG~_2-D6BKTUGYY~ZTV2jgtTqwQ_RwAmwGCa+ z+Dm;~ojbAT=ag+sdYGz{dn1eSBuO%vNb=!5VQ--IVvTbJi(QHj%|9!3FH!CiA+2t<$0efL_;r(#yr~KEk3T{|INN|ag$!>l=FxDsw&tqaaM2B!2VrD37`SqZ) z?Tm03Za+dvR*Xvbe9;}0yN4tbbtBvxMhZ}g{sb7I)clvqH&Lh=AsK3}-Y4w{8h@1J zt8G^09}qx8gAOVCs`BNZaqR$3pcDO3Y@0h@__>dq4(f<)Y%ZSL?w6?a5+cS4vX}Viq^`f=-`;3 zh@b@6LhWXJ0I~){WRaNwB&=Kf55Vg;k^AFwm+JRL4_1a>PwOS5qmLcoCTJaRh;lYy zf_YHZ%^F1fKST)8R&g{q?7htkSx0~qJ@GxJh2=(v_5nzfC6tmviRYL`M_Y{D7VboR z!%J9+JhMTu7~&hCgQLBE*sNk9%5My5X};M=*0l07)I}A!F%JCZ;!D zZE41o6%ss3ko<`-ql5QQ$$li+`CSp2WhgB>L2mg#$R&xvP}v6-lW+xs04b1S7?C`( zef*Bs=*nUr4;F3XLud5v&lL%?ak4o&rQGlbR>63*BhDe46411elI$0R<%yUPSBrYI zT1-(sKu+4~hWVxc?T-b-+Nruz#}E`I))x>-Lp2wOWQWx>)Ach@c@ExtS zL4074aOCJFm~;f~yrhd(*rPK6Yp}`u(;1;JA>SnJA)2jN*`DA&->2pcVsAVxTVJ!0 zx)cV9fqR$WbFXgof4qNwqiDkq19p!{?Dr4pYb*rh%Ec{u9{LvB(^Px2rKQOZ@Ja`i zLp^5}))*6Yj~?+Z z484VI{EJ9K_a{q9KRwO;_g6BL`wMXq^$RCZ}l*^t1%=C(y2UihY}Wh{Sc)nu;ms=>Cx_& zz`sd0BEEA&sk@l?(fq99nOySn$&($D`(!C8Z-L7TwAJ{~!tc<@C2~wB5=s#}=QiW! z4infHPJyu{M zbH));@NHu($g=?|07mZ@VwxT5HZR=Xl0*KlktE$-@3Y$eHu`e7f@kf)dEM;ehE& zD#p%Z&rVsh@d@QL<+lDjTr5q|yQ?Lh)>Xfow{$wI9lM#gR^bQ6ScyE7nqFf+D zkxkYPBycK|P0n>!^}kd}Q3K$NLw6~16C@1cKi5giwIIDSPsg)C`s;d^L1Qw6F`8ox zXXs(d({ff_VPo?W^pz&ZLE9SH>_!|jRta>i(NXycJg7g`0}@=8>DCy%De#J1NffQo ziI=x-S?@QT`~Fh7S}d;H`^r<&G_O5gcqyiSvvMQyN4$|I`8?z9_Bt<~-MBl?%VV&` z^6DgSAOrNj^5~Um^N?{m3!6(X8WpGyzae zes;Y$&iTAKFhnTgJPtOq1ZuGWjNMb4T>_OC5!4Y1eP4hbJs+41kStvrTuz_$Z}*7h z2~Jd5%oN{+E2yUr&iSVXU^5PwZgh9e ze~-Do=Fx6z2V|OjaP*{hzUgNld{GfKyZsKhtvZ}<`3RoT-zut)%R*VWzOyR z7YaoZML$09&V4>HhVV}i^~Wx_m?e!J1LZifrqX^pHUCqOD~)BH1IaV74Zv*cv)ThD zqYr>$t9v95`5KVt)1ZMMPYr)ImV*=2$Tai!NtSYich4e^!DckHE8PN9)cV* z!vVB?gg?yFM9y2_rRL9_Er4R2sGGkX?;>Nio<8{FgSn;M5WIDB%&Z88*vCxb@&OAX z*hsoWr@;^}Vy@S6PRDKp#C*LKUMO_J5IpK`0|pV(BUYD#KX4XOY|;h4BCMCz2UB&y zQTzSsK(glkOVW2lT;_`4urTg{SUhgJRY^mdKUdN?{Th(kRjvZp(5@ca*KG}b&j>${ zj>hE9t>!iz69H#(jR%uJvfV(9OVy!O-nm(cz2nlz8SrsKJsa>-1>lj6iLYf5`3f3bwZQ-`yoMk`1zLAlRnd>+oU*L~HvesU+m0$4LHDObG zRBI#!U==<9Rq+h~TqrE1^@j7JdPgPmJ8of~R8eD+epDT5aOG~fZuWX?3g%Su9ruO4 zhVwD&-i2`u6J*Z(R$L_V!91I-VU8Y+1ki)Wo5Deb_fJ}4s}%%KGr?uar#73d{tgr` z!mTM|hVFf+dmKO}XirXNlAg>c2p5g-S}wY}ovEm4=XJ?&pMrtrTu{b^P0Q2&J_f$> zkTEbE`^5Nw3lNt1{RqUy^T*{B%Sv`WwyE3i{1!b|pkmb&G5bPt2@QQ9?xx8V6Xur> z^k#x5G5eROMy{^st;!bMV?QYj_YRD!2Tv`lX>ldf&)#FGLCZ`et(=akWC*2VAGLh~ zh*RJajbD>MWt09GCfCo|cysjDV#FrI%2ifBR9Kd2YtpkP-VXf1O5E;eQLHsazX*#6 zhGaWeW9Xwvnm()yx)BO{c>f_XlkXyI>v#O|;6$%IoA#crw@B%akXh!MR*&#QwMX zpn1)pAzSFmvSG=#+M#hiYf>F?;&ITHXSL8+t5Xt~E%VcSJ;-G63*6&zAbsQ6?VfDY zb=tIePi~wFH1x)9KOII9<^Obhh3>LbU%2pkY779@o^79jc@-L!h@V0(Ge#ah?w~|X z5|HJY1ge&nN}ke(5{9-@!W;QC)=hb`T@R4L?NXeQmB4A1J^4FlT=@QmYM*6k zsno(w2WgP{S4{#6>JEUk7wgF8RmcEd&}yr_u{d@N6X*!8AjTanNm_mFEI{%P0)-x@ zsXW%fPxb#c>l&N4YH7m9enZ>5L4`r4TSn&c?7({kT`5I_A404-)MQi9kn zfN`gUkN&<^UxfU9b7}ct{PD+A;ONgkU+~mLC4B_~B4bBdgNHB#S_4IW5K+oeqlHYz zUYLmQ&o@N#9D=ax5Vz(DuxB2AM^nBy9^B@zQxQXj@V$w@$=H3E`!RBI$OSprC{*)J zp1KxS%IZStbn;0r$a)5abw3~o(Ne&K0Q=mVz}afPUNRO;F^=cqJbkeUDN`$%ti+-# zAY?Zxgk)WilL1(_$4UKZGT+;XshwI~ac|D8qfR3CPqU2xUyv}I zKx;tAh{n1=8TJGxfZ^616b|`7+IjIq=<`NoKEOh}R9<5Z!u=GEK#&MCn*~E6ro-tS zjk~P&=|V;x3A}362fLKj7bl~GpA$=LU?TwH9-;>)W#8icB%Kp0vr)8@#gOXY)1BfT zSoS69VKXuYf)K7kKLDgu1)zt3!Z=6V0X7V*&Nciy@3a zhYIs4r4pq0EM|NINHNxL%1580>6s^L*X40`Z~p|~z(3pSa)lp#wa0C)P6qwwsTu^y z-^EY7je7`x0$LaL8;W}Ii@~d4`&)(8SY zwB^mmxeN>NCZoeH{AoTV*8lR@^tiXwU_q&39Lqa_Nt3Gp?07P6=JAb^KlUfo#X>C3 z_q6#29bDXNb=?8ZCw*K(Nc5P+aJoS9kHkL@(6ghTTK_#jd05S|M#*PXpihLBHwjn_ z`Jgy_ng9EDgSciTO9sfpvmFed1JmOxR9K6WVT9w7%}O;#$7SsD)Ub|hIq^f9ccGq8 z`cNWnd6=^hACRsY!!@U3!LpN7LL}=3JaXF#tU&gU?X2jrb$c(f{X}`&9a58JAf4yC zRb6@0m1eyCO?5A!masXgR{b{@)j)MfoRIoW|85wTr@Ss%BP~eM(gi^5713Q-Iy>^1 z_y^f8YwUg`BbZebRile&zbUW;;it zj-}R|&SgG>WMYCGPeapaRTsX)J@NVC(7xCf;5X%4Uq*?G(0m0QTUFV>m~6q)=u9hV-Hh${KL1+Wv_RGjLC4$J!FGOV#pYAH7lV~A6c zPb7sKqfu3GCVT2o$=QLKe)`|pJiZEg~zKN5+f#I>dAf^a{HoM zjJEsHPPb5<_4e>Ol`4}0O{^Tz{{A6hy+4!1OO$6hTgZLh+QSwF!Vj`ZNKt`JQ^%V? z3QIXpi3S_u2I%iwzqpZLFHG-lA|xKM+AKM-PJ-4?Ak~2lB}v0nrCC*5Gq$fo1g{Gh zqUjvisvPUbNHmPgL7e)>?kl_c0-J_aEw! zgS_=dFnlwjyAYr+KhyoWMUsVXN-0g z@ zp!EIj9MKVjkN=lj=wA9p8Z0(nYq{f zJa)`G{cu`s(Eh0ZdQejrpHN)Xn$w0#8!Q%(f{-PLWEw<^K8*u2qmW=l(7E7D*D0wV zk>zmfFLwf$H7(ZX(f-51PaH)3)NoK;hdU?zc!&*@5JARS=MoHj|A^Mle+b6fUQNPj z>dT@XD(nXNl`6a-@Xf5E^iTK(X#wqQs9dLIB=6Ht=3gB%&tSGx+H>m8lh3UZ$WOzJ z(dzobIV~IP7abYRS?0Xz?G_3dj#xUIoUA1k<&*J`b>KBYgERFBxZMf=$L-!H?6lff zpJaO-@U`5MC+HNtn)0ND8{Zyeg%ln0bi+PVcG-x#@;UCk)X>>OM|Q+DM-YK5Vj(V# z`d4>W43lt=xKkbl##EtKL8wvS3f@J(ptcbD@6BO^{YRV>DHdw}fRLNkziCAxuj*N5rO_MT_am7A0{$vY!ww#Wtc|CO!853%n41YaLr`nF8Oaf* zB@Y>Qyx3}CEO0(nqh@d``QsaNMI?sR@|q4bX~YxL8`25By`lsq-QtHJJS+ZTYkEDAyj^n0HtUsbf9{DzpOr4T|176G^ht&s9HM`Em>(D#ON81Qa+7+IEDvFM3&0!@ zyuh2Fu+-#8E{s98RP%x`v$2Pyt+cbqB-ORBtF;qi(qKon8z2#hEk&LL$8hyrAD;H3 zB@=0dIX*YcRCN> zLTbj){E0$~|$ox)wJ;@-0wc_HxQKM8uTBdIy!N z^39J+jemQ0plMAl{fIq8Cn954lz@*+;kgplZ2aHjD$^s0Fr z?su4@nASX=fbGdC8`<`XE!_T4#XADU0L6fWP_T?2S zhi^z2Zpx`Hb~zV)%?cR1UN%Qm2<#UqEj`fH-a(T^MNSK)(?p2!&5|-=-7477zeYI1 zK2C?~!evy7lc<*HWIec+ZAjgK7M+i$TS(Q2aS;0$xMrLVUwdz&Ibj7{$LU%!xf0w;Dlo{!JM(noP}CJE_9H zkLe!gJ50^)u^lf=^E_2ktC5C+6@u&v&1>h1X!HF&Ey{DX>}HxSv3xF zp3~I_0J?M+m=$SQR(R1(9K^{rXGnA9F;&LAioiv@vc$+K;~vfKpXXR;^OkrY9EZak z_sWaOMT%652&b}Op_jx$0a)f@JU<)N&B98#J|=V^y8+=63+@1HgYJE3wR=z8g`I4U z7&yA8H{)K-|7b6akZym?FLr`WCDnWVD0({0U_^5r%o9_X7ky7%%35rb?!FqV!|6JN z-V9CFmmj{v^?e#89cgP$93t}Oq1{X`Ih0-Xw~fpZ_K5beTj0{@GT>ZJaP2K;2`1q$ zb7maor{fI!>Q8%5J?(#thD-Pg`c~jXkzQI8p$MyAM-S^ld;?zDAXm4^L!+TSnTM4Y zjfo^2ncLT3VY+e~8c>feiQ`vxXxBPWmbVSsqLo2%7QDQ<_S=zE3QLO{<3$bnM=Rj1 zrp5I61q~eqwqvwI@N}9emE*IVWQ33V_pPX$mBOhyOUk}S`bK(UKowof1WlD7Xn?>K z)Z~z+?XheEi<-A}bI;)@ad{-Neb+M-cW>TZMCSJg->L++UgBh0P;c{Bt; z7ef5&_svSQ!U;a=r5MUTtr%rhu*s~iZIOB!&BhzgGuSdW71ox5DH%m~(o@Kl$HCIGmxG2OnUB4TlXV2T`vm>PoUa4k>2``2o*t@`F4})K z*ykz&4d6T|lf&l72h_9eF5+lfPdg?MFjN!EKsO_JC{gp{hq|zn?h=$l*fNQ-ldQt| zm{wYII5~hU{BUh?U~pa&A)F50_)}kspqs;0jAfEi>8QbjF2Zw*C38Zkm}`v=(0w`;$2tRtH`F)}l_12as+%SQnhU<^TWxw@|HNcjix1EE$PKb~2m`pewBdM&t z>u1~$B|xEGPM5B{TF0j2I}Ep+y)dK;CF+$ngZBzpKw^-^$QdJtei?%O&W$K`@J#CS zd2!EznueA{EKLbByxB9Kt$vOFyvb=N)b7NbI(y5uh_{us;`)TOP^Yz>Lc5l|V(Pc* zenxZCa?5x!J)!zuW8j2iX2yP=6-WK-W!*l`u)L!O-LjxZ)DX|deyOKt5=5wbT&!HY z79sVN?+SUvYSj(cmFDTujSE@u=E4;-#<^<@U@o16Rqco2_N^cF+hzx&*KOmkJ>RaT(3aS)fDRJzNP)LP6Y{^v{Kdt5cNz{O-&|M%cl} zn3r>yLk*7&GXc^SHJKN&2LH3G(>SJ{-geV5YE-VoZc1yHf4T`JcR!2dc;TVL!~#w% z>6=o$Zy_Yf2}15J^mN#lQvza;G-N}Y$_xr7D!ndijXgn4M^HHbc`r(yMaVf3e~`!a z)SUmXeD)_(lw9YYt0#|#>$0b^PeI-}=PhB#_U6cFjwSytzL5R(#LxbRW$%vlqstBu z@+>q>|0-*LE4F{7!hhwr*Z~nG>hZ=K*>4!n!i)#O~$|8gmei-_hDH98#FSXU)uVqr&UxGULJQ;@YkJ zm{0nn;4}b%2XNaMHsTLHk_rodQado>0)+NZaS|o8{H-u$#to zcS-M_BRzM2vvmx{tr@Q9Nk3vl5OH&*P}_7h0Qh$dZy@DHeYvN-U5-VTX0}( zSOesAZT1u6PbkMGoLY-9f!OnIf08Tb+;{vEq6G`hpt@$#l; zb#jYDIu7*=yJ&{s1UzD`f30SxqfKAua7;36J6^!E+Akx zdZP}In#sm{LROCtXhHm#z@Efq=#yrXXj|W&rwp=zrpG7qwTXRa5b`qH1GwStpB9nOh4x- z4Tp`K4EF}dIu@?a*Dr^qZk){Dq~GcQE|bgM<;%u*>6LB!1fg`>64Xa;ap?CBG03>g ztF5Af3ISpBKJy#nWouQE#FK0z0?7B_Y_pNfyen{g{;Fa)@kVwQ5SR;}OvTakEQ9jA;^&5KlFVPcx=*pK|g&s+~D>Y#q{GiJMXv%ep?+J2%Qd*P&Dwk zUd1!p>2TSLGp^T<>Xqr?{-T>Ax*&QuvCt`e`k;3!mnc|-{C#ZLvF$z&7*})}u1t+j zK~sMl^Jskxm`IC>9o6CKa}oZ@nlsZJzUU;!n+6FtH@!4kW~@<@!MAG4Pv9u|NKm zgp-8z45(pZLYA0hLWS|Z&!?Z|sXQ$rLxBSN42;mQJM!J`T%B|x@u`xo8}`TWT<*Gx zi;M1WOm$-=!2XbqZ1B<%poQ~*X;6Op=z&9O)p#j|irhbXunpw(cZu9q7&s#^d@#bL z_MjPPR9PmsmLQMS+V_2`v;oO_;S2EZSFlmS$jF|eE#9w%abM*EtFy_@NWp`ab-!2e z0IODh-to!_nBLx}2sk4wfOebq`JBipI6%_h%%Tx;C?Zg>NUtoUi9YTK&r59H0r>;t zgGhORhHgfi;RH)VoPF@O_XvZo&h$w^jYjDb(kZZH7XTo8E?5k@?0WZ5sZ|_{Z%^4u z-hN)F+Xm4>>-k^^i>%`g?G$t>b3y0NOn*!@NQ%D+$YiHY_jBn3d<@ZmS(oUt8jI_l z+LL2BhiA{fKe6_5Jr}TW_`Kg7&$?%k@i~~6#P@k`f#*NQ+1KH#e*4pRWRA{q3H61+ zxJ)mnk@+2V6andD+;cSP1vu?a*!_qm^-(i{(?$FhXgTTh`MufX-Aemn=ZV*_caUG- zoFkqZ?1f=(%!l3C8s!F%%(#{8xW?!G`iL5{qP!XBBij1{Rk_XsP}`2}OxcYcESCOS z_pVqBW+)z7<-psmYzv6U1we4gDx0F1?YjWPCt{-@(uc35KLlt%8#OHIU`|ogS*Pjl z?U6u@y#w2M&FdJ4WQ7TN#Yi@+YHJi#>ei&_nk^a*rtfxxf?d}ydiG_U`%~|u+Kb-j zDShQY81f8V1*)C>2M1j(ns(wric$nq38B@m zSnq@Jn%?@R*BfB74J2|izy8_ZRy9wU^Cr$iLi_WuQg~n`j&aSQxn_)@;HD}+xO7aoVZ>FLKlGk@TMfO zi^C5zav6w3*OTnRJ9oYm5!3uP zcZ9R}{q@4V?*dK1{1wRRieog2weX!s99IgYLKgU52`URQaoSR*9y-+@?gi?zdm&{!Y|6oBJ) z@K9UO=MzLkbpKSFpl0y0p!XgJj{u?&WyDrcZQu&H*5a^8d zaNXGu9rk9wSF%c(wye5fp8)aheTwuIUM4!v{vOB^>y2fIgAK8S+pTnefAftoo_Ypg zvrW*0lQdvX$;XA@;}FJ`f7TX`J=CAbOS88e&(9*=W;YCLF*oQ8HQ55v5$~-Y%GO9$ zc{|l!`JjJ4(ywyxIeW{2ptQ+p7PG<%?4>PZ?Z~!as)*Szifs2#QqbT@Gyg_9x-6l- zH$c>6Fq)$fef7r}HT4sukR0JRbJMK)9`@LhPlzykqw|*`_)&(Dk%=IjF4S`7pztp;_tY0^X+c8gtOmV?s)=?YYuJ?~% z$PaM;J(Zao-s@WVKT}X*w5tK*4Q%IN)xVn7GABhN6=2y(mch4k+4&tAk;`)B<-EFr$+5aMWZEOQ>59!-?dbNgkccsGTY_m3vdN94 zYoNpYm9(;ER_`c@DGp|=n&gfcl|+E?dVD>VIjSp}-xwSHnMb?nGQiAA5+VLdpKpjj z#*uMd>DIsfe)ly4?)YqGnX`>sN$3kGEwc$Owi3Mfl-U`XgB&nTejEDHgEWwE%j1xQ zDR*)FE#QQre|^^;F!TkC6jkoz4kVrz%d5RzRA_~EVIheEoqPNgV+WwRr@$UNVi<6< zMc&*JfPCkw<62bLzNkxiHQ%$df$ojaS=4gGLgDe5?xf8{bD4_O%yFS{hEngHuza{AWx>9b+1UVS-8-F=I$f^vn-o zR_v+eG_XTf0HYT~V(3fC5`r+!(!PWg>b>9O=02NuyCgxz>!gRDdI0EF9cy2TZ$rwl z!TfM-9f1G0b`F7^xiWg*=4#)9fGnC;4mX=Xjt1dd)C(CRJu!BpesY7~7yTb^B8cP1 zLXpvtIXdtkGimx?#nhnV1gm-{dQUiCoe)ePZr@A1#h_Swn5uyvxv3SZlx*`}$MP2A zAV_gqvXOpa-fN)6N(WvWR0*goKQ52g@r=8{ReS0`(H`ai9xe}?yNhKmD-c?7U5HmrWrlz??5$neZ{< zlgki8hdn$&Ww%$qJZqRv(RYoe|H>Du3>a-S%+B6TsZ*d)Btd(0-+j7fi|k zm4e(c{_O5V5*!ER7aFUW{_Pj$2*Sg@koUh($&X=i2NgRGr=d_;eY-RX**YlsQIsz` zD=2gxgt2IZQTp>9JkS-@61q5EZ&Gq-oT8sCr)z)2A9R9FRz|><2{y0yKUhC_{eHr^ za@W0*0miLvZ8NzXj;G2~nJrkw2uIRv77bS*_n^e9hA2xWk*}O7czr=aY*ifMO1M}< zF<9qPMq&L`Ai)sK1I))o(`Bo8(qIDKbRTxBxAn|dthKh^C5M9={}CC7(U8oh^H^w* z8`xbpAtwF5_1w_Ng!<|Im-||-KmcbE3y!u~ z1H!duUUYEqDWoHcZb(3Z{$wbH*KTct zS4liEE2$tpgFjQtp>)nS@`)4!u4Jp9_6;a_pzK|5(ua@1VMEGZNRIbUbCP%;sumF7 zw1(e^5it=yF?mdg15|EgJM)1nzfOYw^0wV>6IByEdfHy%=DTQivdJijp9 z`v?br35>Ae_$OTsCiokLQk17qQX7*H3%V2+?}R@x9}AN)8&rRa+TNIl{FO8Mh`mlbYLD?_#l`=srzvm{}tuW zBLY#7w4D45z6;Qg10TVs8o~g%TRjn-NvHlIH;pz9vQ40SKC3MD{N?*@iQ#QcjP-d} zc3rbta4iRiOlOaQFFfB7>0{0h_Oo zF*I&?%esqVj=-3D7s-*>?jAHYdYSms02&w;P0ExCatGhSTE3EnqC;rm?pgf%1=ke) zXRE8P>1wJKt3ZX23xchJJKKR+MBd6*!WCMFIaJN+%Zv31riSAIYB)P02T8ktQ3J|% zZ?>0zO+t%n+RpdTFTF)FypG6yDucSLxRs4s8zyC{u`tmYeX&^bd3+_`W`68gZ(S?o z7e;8Izv3dy#TrgLwPiUov#`&2-I0+P-=iOp*v(LDW~x7qnheMh)z~M(Bk0P~`)p(!KBW&tqPi zd47V)EO{b)zXXF%bgx;1cy+u9Q6vl?O{Ne(>@|u=A^|7pkWu^d2zwu)9Ct`|e=VLG zZAy348krqYEAi;=wV(6u-R=N}u!YXY+wPc6M!Me(DX?GY2$7a39lYtIep`P$j6l|* zi_axv$_iDj?;^RplT*JifI8@i z!o0!*Ye>u0LMFuDcIvfp+_NG|jzID&Z<6r?%x$WKo&&tuS+`@kbi>utE}u(;Ml@@s z?X02Szl%6oxJ;CzRR;`ieE-aJ@|@a}Ud$r2bkN10pqD*-_-uWLSPuiNGT|Q2k70Zuo22!PKE1(M-AUO7lKt~+ zl4I1sQ-l^dMwAgmU-Uvw7Vo{o2t*heg!53a6o7t zoe2~(ybi=sU@73z2ZuC2kEY=;=7D>qkW!!jCboBa(G{I33H>s=C5rzOzLk|1bR%u? z1js|H*lX{B8RJmu?dbx{fzn+37*$zdt(|XoOt2^#`{jrNj~172D!J^?O)g-5v8nxH zpU~-?I9j!nQ~YfYa*{p<2TUbsF8eh~utZG7#i>Gb4j(p=LMx-tgJQ9AWVlQM=K0eT zFwFM!d-kyWc&!)qg6KTk?*1qq7|`K$zn=g3U>kXu&F6sM88~eDwa3nHtD|DfEdvu= z`~=_{hnO}U&oA3$kvli@a#Er&`BT7qRC#C1#v|nNNnM?2VC{j|!!ipBY71eWm{?Q5 zqn=7`lSa7H&?BAdX;}2%t}O2}5)HOq@~txTc4&5uPf}f$IG8=4K*b5K72Xw_S?_0O z5B9_vYnd$3Wwu|V;sW~ymBSt6@d0b|tEwr2!8C=cR?I=bMv)YML<_?-a6=d6k?U;g z2mj}Dy}q|5IP+9WDLL#;uE1PCPFo3%=wsT~JOh#pNMgSw39L+`;SsDXdy}t~-nlvk z)avF`ch>Zc9OBd%2D0qSk=0R&BYR$xK; zv+JCNwW(0GC;_TSUU8HGP`j@e4j8}EYSr6Il^cTw_0kMdG*ka=C@-0qsV}*Pz)Nq{ z{sZ;C+*Hlnu7p7GiRTHMk|1|}vOm83CLxlVQI%jUf{!%<`zHl8=QLkoT%j_LQdbQF z9UrS2_=4PXlY_jnTz=^)EpB@CQAo=*PKQfN)!LGFOA#L%E_8m1wMr&-4zlnx`aeUwtfPqQFoyGU_5$fkn*L#}7u#A$aFCtj&S6-1g z=mgA%^ZQff0UpvPlj`SDcSX8QDZK@g$XBtAb|pDHl{xc;C4~f?5*#D^-I^)4?k-e% zPdIZN?`I}7|4u1LJ@U^6mmZtM;|Lq+@ZF$I;(u02-xka&|Rg>H+gx$IlG&yjR)zU7Ytm@&9Zqq%LCsD}?gce-9cqx7R6 zl(c&JGS;xQb!J56>+RJx^G8z2h6agvn0v*z^nGjEqBV{f)K^?}8S5*HGUB-o%19@s z(1nOYkPQMdPWJ$JyEG?RyVMK6sTMF~p5!5(k?kkcTW`PpZD5bf##(B70$xl!T|D5- zHUSeO1oYQ99UnS#M_$J9@4FJK%@w>A^yK$B#YH$7BgpGx$l*N-wq_< zg)D2)iMt=KrHj;6_DH?nDGJhyK_-;X zXUoBg_8tT|`+d+K5Qz!$QA^qrV^OEii}+-FLgo?3LgJ(E^ZHYL>8J(1(w1_8ss(RW z2Tv=aac5*|WakW0l(oJznQ=MG3CqS1n1EWLX5G`z#4}4Jg!KvneP0kSRps{TR5|p6 zZxB|0$|P}{pk=>%B-c<5h?R~So!1$rGZ%$S9dP?T>WUb`m9XybvAVFdiKge5NuJx( z+xsxca!rA`MvYNMar5oZOxqAs)r|ssOEEikvMsuEG^MgD~L!qVEr`b@Y^L{m^Gq zi(cws8;mLci6V*3fN&bjr4p1t0(3oUN(Gk5S}E@EJ;5>x^aW5oWnWraP`G`!hn3D0 zs9E6EYL*q62EYHIa){ z+625<^31>-FXWvZ@%{~XC;5>!m6+=Pdc0H+z79_66W(doscN)>L`cbe4KEXTAg1I9 zQ9O#$GLILV7aCHY`VZw~oE;my01u8pm0HN7zphm7ADFMCY&{i&1(V^b^H973#Z*xtfOk2^Il8q!Mgf3DP#m`0$iI zg+GaJWMIXE2*HGEnr+9NO27ukn;}J#fk=+ZVX^@&pPP(Bsh(_qkEf?5f{W|pDjs^= z1N0>uSGFz=>uD6OQA_TM>vH>*oab7PT@4fgp#?$sDKVa0{QN~wZbV(<2@U_6KY^_< z`}y#e!4mcQ8_#rBpNZ%=n9s#7Yqo=cu{>qwG5^(E;`}u~IQs{0GHk`XKv7x;al9ri zhq&w1c`Gr3j(hy+rT$j2IFisPYj12Zk*jZ<@Tm+Q2m{d3+hX^u3zJvhMcZJXuZ0f# z=qlTaK`hB!`0l&O_OW7d(^UmBEg+JAh^ya+S6-`kwyXS-T|g`xAw{(9qF6*7Ee$mE zle&{QYIj}`cfrdFS}>8#1MSj@5On5}$DfY{WEz~~vO4*@$69n07~v;LIOVi|etuo{ z@*}6|*C&Z`pZ@BzIsEV!Ujijqih7=pMCMF6hsQJ_2k^a|><0eN=rPwMDo$Ck zx2-bEmXW(cciz)neZeX69$^(`5mokGa-MA5nJPDtOH}%()7|5z6HN00U1@*J1<85a z)H6p8So&!#&Jv3%|A${O2OBy(G$%V6$fBpyMzp3(*A(+-B*mKOE| zDX7O`6_87DD06w`Bd0!wQ^AM+l0X#%ffUTN+K?{L#b{XmO{f_oN$&fF)4hzU1(aNY zLEt}wk)y)?hg$w%ezguJ|8hy&HJu7A|EY{HFj_`1ak5#l<1hpLTk!vX*4BTmvNGrh z^|WDm@BDv_QevTdeI6#AHvYeL_{P05Vi2Ctu1Oxu(;E25LR01uc7l6lfxlK}MQzwRi?;Htz zxb(w;(>MS76Fr6thYNF9u?k%|vqAYsy??1yt*BLVvcI4(UUZ!vZ8Y<9cMHgLpFL(0 zHz3qV#~Zwz_hV-tZea1x=)~{iYkwE~b02v~{+Uz2XfU6F{{7f_{>=ne_0u3LtUcBf zw0xF>_rsEs*czM<_}^V#tddy$KFohx`cYCPTQ**0qHz5}b-08y=`MSXmEZ#_u+K&l=c zyjOiNzirLd6hJH<;&vXGXFBecAwGRTPbmg%qC%h&rvsC5okrsg5vTPZa%Yc@=W4lX zIwr*jbk-LK*r$LBm~4!@ z{sJH&w#XE08GJw}d%dSMj7I-%$r4+2XQoQ+7mRh(*~n6F0JH#$ZP)tgtHG;E$9`kz z-GeQZHW*iDWz>)ZhYSKzca@v&t_rpQV-||~E@Dz=JNu=ooBV^{aby;dW%HGDFtz}- zPZh}85%8YYK-s#>$-VCkBT@mq*e4;HmNk+iZs&7V`G$QH^6?L%WVM455N~7Oqzd^I z4?5&cPFD)QCcZMeK5Kdp-k+ZrKH(*S$F@Z1&dCd~x#?8E*@8xAxf6XHm3d5YP)JkM zL-?<2bZe)4Z~{i62>jRYfE1|+Mqz6RIiP7Qgp$Ftuk0;{(!BNx`cyKc(7{buO|F$B z!jTA8<)*_)O@zA7WJo*C&F|)GR!t_w03OPS`;QMGRaSGd^!o|UZ(OF}$d}R6z^MG- zZM;id%S2|w3nVtvQk(}@s01GqbmkPvJTAV2xdo(5V(zu!=}#}8n~`FdfFh1?c0BSXQV36N_T9^IE*=V9MNsBSMChRzC9Z zcv6=?Q+SYfop^44=7f0ryJh_$qDrHivn0Vo6nBca+-I`=7;~Q!vkES@d#YaqZcx^H z<{Ij_Lf*n;KrFLet22pnlqJvA0djUnQi0-k@6J89512xiD>xQ=Q+?Dp=s%URNMoGu zy2zReUTN2E*|TyC20cSBA>YP49-EcPkNiOk=1yy zgc|hx({44G@E8L!Z_caw1F@E4z+WBS{+5F^ISAdy*@E})Uw$ycQssl<_i=}Y>5J(r ztEEgSGHp9KIKcsp4F{dft~veFHcW;JXElbJoLNOOhatONLgx|y;*bNJ#J3M_-@)jd zq~)K=1!QtFyW^CRN(hCU%5ShMHvhKn$bV5jYI|R#p`xFZt&my_#x}%YwLsl|gO4tb z^*95`hr?k<-NX#wsmH;nMiG$i@@QpYLoDIfLiV{z`c4;&9pTdkKg?W)Y{| zd&pT7_SH1~E@{LBgJl7gO=}hSr4i5{Sj!UZr{H>r_z`fQc1pZ>`;Czji)Xt~mar?7 zR+CAuH&ZyBowxdnXB4F*NBLDy0tgfDji!r85ODrlyZS9rQW01o*RSq8!y}%+5PfM` zu=eZO)qs`y%~hV`{(LyqrymV~RP3jp<^Djcd>H4%thp>! zFu_zS?3fFT9WQ;ASQ;TTSl*HTZY;?ke&~6zw0tNx!uE>)ly=`ih|?aT${iiCz^Sp& zy0X@7^G>}?Q5^$$Ii>W0l3=>#*KEs?)eQWgT)7R7?@{q{1^H~kJ9A0=bbV%z|<_KQ9nKtEVOBGD4Wf0 zd<<2e!X2671>f}!qiOROIF}SgRZM|{FD$;auO^48NzyE<7p1Ue<>T2TQHAeE;d|6R z72?osT+{$N!Wi%dW|Dy%`HK1y_!~oo6itGhPG{{k*>Zg~kwS05BCYF4lFNvoDip(mcx=u=@c~#Fp?8 ze1^)5@U?V;>}se1JX~~Z@7`?fc68oY0+l~B84L-ihLH+Nk?@^Fm`)a3{C0gYQY%FY z{z@FK7}tnc3K|>Ug9Te9cgTWLk;|~Uq;#I{vGmF`7(0U@Ec zAkf>7mc26V;}739goxn;erUswaxkA^Ih?YXk)SQbfvg9clbF+|9{y-Do?j`DUJPbp zh-vY~mJ#KMDm+7_FvuEN-V6#HSKMKd<`dj*U1jhzfaUsg8ngwJb9!S_advSWYlio* zzolJbr&GDa-D3ZOp<{PO`~XsKo~G4pszBp*@kh#bvF)`7umTJ0q-udQ=dd8F61Dd2 zZ%|9A0O;$A2atLxZyu1!l0!cM`>^FvOC{Ka&;s~Lo1sjfqnZa{rZDQE51C%!X5$&G_8vI zz%T6j;)f^kua_NvecKiI)ovMi7>tX1mot=YhqHVJ>J}5GB#&^!+25sBd0ZA6W-Ys< z*emTC%Hd(D!m+H$H%lQ|OV&w;GnHoC^nm)TJyOC_2I6L7f2iMX6&IYRTrP1Nn~vqu zYH$F~fHep78}q#eYB66fpn)u%1OZPNt-3IPNoM+yp3?KW7 zC89+xuuRHMkRoV_UJb+$ZExw!V7o|f(;dBe-Hf8Si1m%2AF%)x^x8Nq9{~>YkrBppD=-ZZX=F2-2v469blyBr8nW&_2ovgfO9Z^ z&@3LKx+1_`cVLfT1?Hat@jMp=w_X)Y&cB!GrMB8b*Z%xY#>{(RXT)*m_dB^%Avv>f zw~}oZ+5IwR_IInMM~iLaaxCIfK1i-^@7LG+srGsy@hi9-inc|`dgohSBNq#+fhfV+CwB@CqPoO(M$MW ze7$v4)Lr|=D~*JJlz^ntQW8Ul3P`u)5YpXUk}66_cXy-Y&?q3?os!bs44ggB`@FyN zu65Qr|L}(>4m10^_r34?x<1#ILABLz)JrqJkqu;WRKFp z5?E4|Y5#bavaUYDz^PHawGt8$8Z%dbic>+|IcDy+r7(SaMO}nwPAR1F>sL_9N&dEw zhs!?=u00LlQhT1tH3iVnp977px1ZW7aIK=VJ$qeBa7zEm__f6(&4<^k?U+{CXC>OJ z^4FP~{lp`aTXPrT&t-;hIXk#dhD8_*S4LD<>;Zj;A*^4;!LVfr2p=%BK>Q=ah9VGr zPwL3g+NRNX{E1}3Y?wONg2#?}NtfV7^bq*uKtD5UgPX@&7gW2@{wK?I-g^&z?9L34 zhO+?s>Z2VdhVS|>4+22(GP*lkBeYA|rZYtTpi}2wtt67^W~U-yr=}!X|D=M5O|a_^ z;6->m?_jQ`4vz}uA+65M6bq5TithI0-i=6m~qVN$#F3b9rlAj#6fHk7X zld~V$FiP$J^v0MG{$ofq#v}0!*e&FEEIBXQh*oFJGaQeX8A|xzANr-sZinyZO@TC1 zB2lUp&nNN0HnwDOo$%K~ynwI-QQ7!a~Tv+b%f2dj+1{a%YTP4hF2&in? zQ7K8+2(j%~P;J$6J%4FE0ZvITVDB}(T@f<|b5#>cDX@PUOxvG4OBdy~ye+>>>fj&@ zoVnR&Wk#mxcbgBM9P+=Xbi)W)=vE^=xTJmNp17p*xXj=u%j0gmV3epOy*GtB^cRVo zI}e=a(x|wB^IPk<)aQzk_os*fDk-SMNBt6Evb9Ic>*XJrrzkhlDXOk}a+U0w7uLNF z8eMnj0*g)tg_No#fq(TN{PEL=b9&-ln(D*oq?3_F#;n_)z$sgywc`(B zwEeyT&kqfG2`IKpNZu<=q@bmwF}z$Fn07%v!+pj1x@aacAIG}}^Eoo&{*D^PhxczQ z;q4F1SSA&^(tRMsFFJ2@DCfVy^>#Sz1`qApeh*{cM-eId+ppZCpIeEko`7sI)YVLq z2{*C#34lG{^tfC2BXi52LKcWoqP;jasUORrpyPB8bz?t&QwS&7tGB@dlg5#81OWqT zv!3NC{p>)bp;RB_dMll_#OZsv$kk%skK(3!6!PBf0KOgWhW-lgFW8l0Sszj2R$-?+ zer1g0wHn@H#P0*(g)2Xvr8gbv8Ww`^Os8zZ2nHaJq%(UVMPs^RAwpDU>ilA$%E7HX zDEy&&vd4bT8Jo}!sZ~L;-OD>Q`6n%8O&ut~Sf{*Szh_xE)BdgJ^t^&DTuNHU2-)U( z25V#V)95e*F%VHqTdwcMAoH!LMFt$)fw0V%LLDw`pGX8t^2?Q&B>jR{k~N8A7XnVRg*UT6EQ z**qecJ{wK7#){XH`x2ENRT^`N>&B7L_za-(oiTcRqb_8k-=u%|XDNJ}@oD?)Ew^Vv z6;t{`JHk>lTjFDkXSFWwA3d<&i!ySh&YP7oX*EVd$ql^bPWV;$!JRH;K)m zPD!%#MCwmgS>&4z$H-|)OV6; z+_6Ty71s8tcDziN;{{en#arJmMc9KTN7O+ue=sAaEMs(M*qx3f=ass?2a~<1m?V&C zA>vMSBcRgm`@Y&BVp4EH}(LK`_Dbi~g#4Ii;{3m{=BcmPDjbl=cc(dj^ zHCM#1XA=J6;OPAxiV7!8Y59cE7rQ^yQt|AGV75W%PMMq|eR@PKxlq1?!lb*)}%ZV;PVEQlj^pYgN z!EUZtgQ7vYBolzYv7~?FQ{RhtpT~M%h^6j!eEm7SqKkrVbhIM5^5Wu9g^)IwD-lT; z*gZxSM}y>o(8GhZ*TF?MfLU5S#p`4w{Ix+&-blWJkj#+dY;}ThH$iCq*5RMreCA6c zhnRLObd=4`)Lr9LljE{Ao^{KOT*rGEZNJmWSnH%Vv-wI5F_I1R zr$zO(2;$`9x5Q@bu?yoR4g89IY%K5DgIJ053MnpBMuIg?>yfi~>SR|r@?E)_&cB6P za3*63a)*EefRQo0`E*oHmFjp?SL6qYhAr<& z!uT?dB14RqfMG~ZzrX4W#~@kl;y{{^W)~Mh;4-lZ+;NpGH$zp&Uf%t1YL(#{%ZI>3 zDUnm3%JEf-`-{dMtI-tBVCd7Ch>cHN1UH};_Di_;i-@Ihv(l&4guz%+6Sz{iXh-WT zVx&5NE2J#R^=-BJz2dW6zq1|4@IhYacbUN2ZO|XACYjvK-ZI?6|wMg7mj!n@@=s^GC1OmaCbs)S9ppKywK1r zB zwgCwofD1DgZ7ZJp2Gdx8>U^KciTXef^Cf#UQ8r3bqO^64ftUT0e$friuPAj69TXgs z2U@;ePYQ+zJgx|wL8p!TmreIU!$a(scE%H(Ys72hO08X1&(=L}A~{Em^aHz8oIaf) z4bZjLkakyi^8Og`+Bj!iWoINooMC~dYEIh&MIMRIJVQLIF=8|)X;Q17@yN1QMHq^b zm^6oJ`5YFRcw(NO#MPhFT*Mmjl}q9lAj1@*EM8_cgw@6;M2|%-3h&x~ z`9O_4Wl5-LF*8c<7ik=-9iE(sKZE&SJC`8MvZ+H*ZPKa-q2#wjarZmUF9^rUBP?M0 zp^J|SMO`Dz-TxNMvHvRPVf-i3T+#j8!1q)Ggz3lw4i{qGy7^@27V;+MNrb^T)U$!2aX z4Ux%wSFv1_)9uQy?I|tW=SueX1SHQXkLXmY&yfD;mU2;GS6Ez07pq12g~i+FdO3e5 ziueY{ltAcB;8dN&wVo&OZAV~L2bvJ&-V-iJOIeRq_<>H?<2^c_%6FEGcc8^wzTb4B zIMQ_bB_q}Ew68l<8})HGaJW4uS4qkEcYu{khRRLGi0F}ltgy-Rd5i`dQgrAHMy_s$L-@C~F&4d1DN+!;a3<$dsAK|9| z?++9wm;5j$%@6&hYJbGG0PxwtEd0-WYtlj}9X?pHm57Yn* z6%ppo$>&#;6(80nZ#*W3;*AO3_8~NY|8?;qc)69BsBa7b6T_(cuVRL%2FS8uGTbI4 zWP3{~aLzk>RNPZ^Md?nYf}pO-C6liQ6C3)cR%3$RH53kV@4CzUw#EwFdlB;(L10WS z1{l#|%P}*>%OjWonBUB{-iyBqo2z%s5w-I6U2Xycl7j6?UoMB+@i46leGy5hYRu^N zb@aQPv}9k)lUfAB#aq}tJF5EX-_Kh-S!($!_nn(7)A|*qJWXUa7&61>Luo=8qtZZ^NB%9!YRvkq@nHaNYLSNc0j^ft`_<@yT8zjOO0|mGTb>8(se-bxq#D#v7hhygV z_cgB5m48~UF7x_;OsF^EAS@XB_ie9}X75evl4KasJG(zgLZTOoJd2wWe$qMy(JAhw zE>-CmMcj^$p$D1k=${h``OumBx~ed9(ZMQ=AupU2{=fDjc)R7jDnRz?il#$z@qh1^tu$3P9}c;hO;VOdP7Uzi0Tnld1*_08GNl)(Tn$&Iu_pBwA4P#s&>-5&=dm3M7*b!_jFf%9^LaQ+L4 zr<>+8p)_g#Qa8Wt5q#wawv-D^^(zsVtR>c$#%|Ei+WZ?$z|Bn|V_2H_%ifQqYPJ zHq71!7?WQS9un&cw!)3UG{%!h8LXuk33}0TBL-hTgO?y+9alsHADnIXBDBRBE(b2n zum`zszs=?Cfy=kDg*}b1*V%+3%%CFL6O0t+GyuXk&GV;^ILcqpAntv_uD0RGoX!Pp zx`fhwFfEg?09^=SwVF@H#t8E*-jnMzEB9?cCu+dtB!cpuIh1DVn~3FG%O#RnXI zd_3gIQ)a+T`ajNN9=-*bpm>3l$~R0M7zm<1Vm{BSMtW`dOJEP|3Bk12z)cPi1nzGR zhpUV+LNZZTf-b-Jv6KG6qCVs-1uhs;v-e=iacqRA_Ei~h++IYZ{~1t#EaZ|!;#!U7 z$_NUVaONXoO+XBT2AEK(f>H5!U`)$n>Hw4uQh6F2)4``$K{FwSe-Y&ah#h_p|Hdyj z#oD+%ck#sdw{xnZ-#r}b?C$13E*U)kB?#YBupkHzH4uTHjy<-!qvfG{YV?hE8`wnc z2QZf=DKb}!L#_&TW59pmkZFE{Xrz0ZC>kVQe5fukHI+M!3~Lgw#k?$Si~@e zt`|eiZ>Zazp6$(Pc1eP2sfOlz<{M)i@OEhQri=Q*biS`)69fa-;Bb)=zD=7L5P84c zPvTDe)g}&f17yq?e+)rHOSM=`T^?RV)3Iz4wXjJ-6S!jPRKC-KAO6I%TKV;vZq57d zs<&7*&sfKJoLFM8H%_b=@IO&IQd-{c`<&0vLGP+sUivb}jok+>1EsU~LV@-;cr4#M z3lVjh8bUNEG82xvo7zspZg43gl5lA+C7zgEhx*vTwxp88fW%arCUt&war`G`=x`L5 zIp)*hUpev{oq%${v3|CF1|#7&pWu%q?h*sTK`(hco}N@zxZQ?+HCG#~hkV^m$Qs|8 z5;+yzy|O@Rr`mvR!8)MnWpT0!>biz7}C9Bz%01=v~lk}24Qdj z0RK95p?Q+i9e6Cpudqv>-z=FPJ1I@a)b

JZoz3{BSe1bU-J5!Rjz5T)w9~RUD&f^lW^T;FvXqct* zm)==n=91e-#|Plun4Be{HTs>eK*7s>9PQ&lQAU0&{yJFfiL~1v|3#+RxeK%TwB(zYaas8OyK5tn~;T4qosSeSh`W_jE*h0wl#2xGNesMgta^LUqWr zifK3HEGX|6v+U_E5C~^5Wl#m@RV$~KpX+nF4KSb7AoZco8fpcbuK+)2VCjTE;nJ%N zei$jq`N<+o&v}5CfR`pP0=xIsk9Lbz8m8AC51>GG*zkz#r8DrvVf}Hd$9< zBIE#Jqx^Ve_!ab_Y@I!$Z^VYGvdt^!e3(T~oAPRG>{WpBmj$gd7{q5#&BPtl>`bXBRQsZhS)>08+tQ zu$tX1%)v)HE^;ydZ2HiT3}TmxDXuLIpueS4JxSyljgtH_W_~-?)o1jj$kFNUS@%u( zeofR?lcnnxUaB7f8`ZMX>*UL($me|ZVfRRHTTH?7LS68@*f)YJwm3dQiiau77xC|{ zF4YM$Q{#hSd$Y0pGb4PQtcYgQK8}c}PzN3}POg=pfnl)BCExzE@7I|g=sf*BoZ)X5 zw_sGQj03-QetY=S-p%q#{7w-Yf|I2klo^^flW8Jf@(Fa~qH_Y1^#-LKt9>pF%K_8S zmQGEk`AEv3`NlwIYO6B&rKy$HOCCxf8_Xk3I%`U?Sg}t>G??LZ`*X38QsL<2H}5n4tOe78Q+>G*(iK{2CN1 zX@olX!I_?DF@YN$0I6J4Nn1pMtY|~2RO$?iVANoM+-Y zSovLYUu}c2PemF}EaZU-#$)^AAS63>U;Ew9>tKduqCcb_$=?>?I0=D$izDK(%T6_zM8Wx`yv zz$Ci{jgJYoy7~8h3=Lf!Mt8T7;~)ehe}`A!xLVyh5yuggOy;GWSsNJ9{IVfzNAg90 zP_{%X_Me_~RfO zz6MC2mvvrxp_9)hDgPP%K5}M7?xArWc@9>hj&x}Zh+)!Md{}x0+`ZP{!#u)CDNt!F zULSQoGz!&HBBMaSG;PEy-?{9~N?MOY^J}8;`_1qN#0ZxVJ9SGU&%SlF*Rxx6yo)X* zWSL{>li!gmo=W--?e{Z{c$~~(O_8gTgIG*~4<(dQ)xUlUOV>UcbO`276UazsG89LB zv;OsicK?z737W{+paV<#hxc!?KglXaqdw=bhg%h9nY-nio0-=_-CE9s8@b0O7Bt{n zm2>w+#dfJ@OX*Z1U-Z)w3V#09bYvO@KK2$F7w414uW@ zXxOXOg_j;p&Dq_2!Twul{So;gxipgi+Ej@!=p1b`*mj1EN|3p-%EXP!%CC-E%={P> z?5lAZ@y_6pQgn*;1P{dgnvHGrP?DkVxa2xW(Qsq497mQ}_^2qfr(Wl`_jAoO?$1S) zSOqU=Vq=iB|A?GhW|gmqqE|(latoPn`@P!l+~tCD6vi}{XxuF5c1wv>y}NVN3fvhg zO%|bq6Bez|czW8Xk_T*7PQ=E3sQBU!0%U2ITetRR&n_OakpWoNicYP?vb1Z!tbMC5 z^x|2~rSfR_XC1u18Gi~#n>#cB zL~w0N!{T-&Y z`(WuB0tV`_d#cWv6pgRS^nFGjbXy^j%pZsAfxiOA)<#U%b#O zKWl|OljbKlB=j=eaK)#~p+YAGV$f~$q;U@nS@N}a_nC-Dal2a_*PW5@l5|xfpALCX zwIa{@uJqwVA^^s(7)eC#LAl@1!B}Vm5hh1=nI^5fQwEW)O2ng(Q}Rs-$U=_{B*Tim zg3+nazSWPXt-$V;t6HW#e|H3Y7I$&x$0qUM*;j7GI!IyjuEJW!*QM_meR3MzhJLUM ztB`EFciUJ%sDW!&N+3oOi7biL20}uAg$rd>xdtvUR4cmLs;N$K<3^;UVmazDdE~M` zg?XxG$|+T;VEO5m9$69jD7U>~vm(lE05pr}62fckg><4%qhqB!oSP3BZwGOSQmwMy zmFYCT`Xu-I0Y-++WjNAGJtJa6h?X@L(yL!DYHWqF6XwP`5Vg@H{CX;;9#_N^w#Ds;jQdEks zH@FoRkj};dDc-A5BHh(CKCV1K)`~t~B7?YoBvI(PG2hxqUB>uhk>odYA}XH0Qa9Yg z=nJ95rH@SEb1{96aRZ*jag6Kgen>jw>UX~VBy=CZVU$boccLV+acg{_q$@^Q)ft{* zlYmB##sUP{o9O-HIp5=mI7h@%0oJ4ZrqlqIq^MYwe`TL)+-I}>cr^jq+H&f3`x0=Z zsv)Fx^#$l#6#6xnBVzOIx%uloJ#~*pP7UwAaI~x;6Tm$Gj;>!`zK1c7EXUY45m?g_ z3{783v61jBS5b+SaY@9QWOqm2*f*RpnNFNd8%H`SohthRs*VwcmEu#~jyj8(daH}a zsqw`TMS4Mfg^~fuS36zu5Lzs^zoFxiB0R=j^4>AGnirV5xM9U-mX-Z5SBQwMjByPJ zZyBvw8GhEQB(Po`3^VoDi(TyY)8*warJ^kbB71dN^5L1fPs8SW-egQn7aiC`*3sU{ z8cw9i%jbaYf(uo zl~_ftv4Ya{k&=uv;nW*q+2K3aR2~Nzm59!gVtu`g z)$3ktuBZ)Y6?#0#i>o?cVc=i)HsAR5j;4cjcTQI-Isc&)#V0I-g7O$aX`-)ZQ!;2t zJZ_0yXEJetXM-k$dW+PXAkb68$|x`1Q5LHn?}w?FVBHP{XtuE}DhGP?iitDXM^ zvlEp189x=%$lkD~dgHzjDD;6`L@ z)sHsKq-rKBF>6WLV6_tbR>+k7DHYC_2DOYo3!>hp5P6J@Nt7X;Re^NlzPpCsZT`0v z93?hK74(_@L}zPwycAPn?~9^!cnVo%9io=iPvS0kd?_kEysfnN6E(GJjAw_E8-#Sp zsPuh_Q4|y?3(3$fQ!sgaelPKbM5qBN$=4dW)rP&I$F<)~dqS>^N_F9dmFJ$b@9KRs z?wedW&?|E5>Emd0DaO*>NwCjX zj8}#Ib&FnOi$051)C0HQupbw9#sunYskZQKzsx1UZ%5J&GPR_(#vgULYWIZS*fhbL zw)CAHmEi!kvNAtb#*wilh>_vFuYCe~S)Cb|@l7rAiZiL$Jv;Je+O8^n3Co66MH_vt z$vRbkwXheftxNIuywjH2w~TfMUDAeJZ2o$T3y|MZe6vg|jtq24R@zikm)Sgsrk7Jq z&w#}lJzNEpSSK)oT>@wv64|t0R`Gpsej1 z-UH1;g-kEZwdYaxnW5!M0*>6=h)Vkba-5R*((@&kF`7Cf@?U45e^l&4(L=J%R%taM z8;JC-ijH5;;}4o!<_UcMQXJNUYdsTt5n+yoNhB^_c7=}f#G*0$%x%H0rk9vCzR|9z zz1x&Dlj&W6cpiSHq(a_wxD6`uI;;4XiP~iG3sI4U(;FY}zTrdq=U@Hr7nY9?0+M^HsAsVL zQ$H{agV4L1Crhw6n-Z3(jqy_Lv1-d7CEnR`w6*d|rm}u(UejJ0wdtiq8lw+ty~0#l zn5Xt|SSyBaXi7`p56$US$as;*r9V)gQ7hln7@vyl-5)xqRklDv<_#e+_5J4^#*CH7 zluKBhUyXzV6Za!7MBQ<(RXMfhgjKsjr>ioQu9@h6cS7w()880QMXmP<#Lz<$TYM|} zyfj{i&YObTFt>D*qwgz6^XaQ5MQ}!cZ1|gv-S<5?*mTskAU)sZH|@vg{S@a}!silN zo>v>|FyE_q&A;m>%?uy|P+_CO-?w&nqPM<dTcI~{w)?SFSW?AaTAbYpcw5gi#+jTRjsj)dwV?2d%5jfPmsZ5te!1%(zL27OeEUH^sx$g{2) zP6ue%bcudH1!0REH8z`r7%6}opLl&n@$00cS4QLhGVoj>|8}{lb>l|spy|0to*~1y2&vR3SCa(Le zsdHIaP4oB^<#}84^S&$A`5Nt9coy{@dG=Jy^i%LD@lBOhu4`ndUqyekK0fw|kPB_oKS(EuUMdMBXJ zN*WTqnICWlYHygMY;4`nM?x@0ZO{!9s$K7ZOBs9NP~Sy0T5gSRJ($wS2izyAGNN%? zGcdL=15uKe zFNu5v2hOH0cpG=We<_>ul!~<2Te?U80jy86A5?9DCFlLlG0YQ`6Z6G9p&g)$Z8pfi zD!-Z`TLUEKi7EC}CSIrfdE+_1A_X>{oOFw&|LIa;X4bavihy^`JM}GlaIV+2iY!jZ z7~fkMfMINFKj;_#>BX9hY~S+&D%f4Wg%JfW00LamCTCp-fm%Fu3vP{`Yv2CVn*OGP zuuFkxRlO@gDiG%wba@NhP_Y6h{oGLK+1pb9+32NWJla*YAE(=?{OPxP-X2EWHz0O* zG^BcBaZ_zT4zb&{QO^KrQm`w1&-L}oMCsYhmwtCwi9c%LpPoIcyU7XFZt+QK*eTE2 zh*1)C?6cGbE+1C!emoA1^z+r4DShMGIXhi7)$i?;8B+#J94!^q;_P!pUJEW`0~-KN z>%J)sTYc$C{X(aDm`Z80@p>zN_x5DSy}NagtY-NM&tHZg79(^C6?@vi*kCzXT7>;R z-DR|=ycB#S9qA}HiC|X#q~`YT*rt^?m7ewaqPKk=FpL{PV2H&0b8xLF&EUDVuE zd|o(z49Fk+bCRVQ-Wh{}^}agN`7kqYc`K+az@^p-oKG7gT&#P1ckinef-siI@x;xY z%RfSYsCQL$QyII6$hem&Lvmi@iToNbQk$Z@z#41<_ZgjW{iX71iJdes|Mm%Kd}JxI z$_eV@Rhp18XmFmHSp*-hzQ zs@n}5{{sehP9;Y>hd1RKC24B%-?lu|Cu;6p2CZMP!3V~c z9;W@Ua!nXG(}`$Ew@x~@pFTUs^Z!c#YdwXqTpdVE3oJ*XKyzYWra2qAIVUcJha@&2M7N5jbtL%grwsm3SaY?w{%-UlD2KC z(0R7Mm21xV0Ay8fCRxyO)Z_%Xey0Ya^dcr*hb1tQBE_gI?X?Vk%#Hm%VK+ zNG@48?@t%$wp%(mr#{i<^Kc2c+2pgKnY+Kc@b%`_>_bz$?IlW7(N-()+YHnM6O)R) z_=%ClwFvii>Ka9Xo82EPeSbq`A;G-XlZPdhQdO2P8O;2p+YM#+XIf=CyzVIZ;qTE}mU)N4B_fTuI_?PhvJI{+Oe zvX)=|z&)ngepj(K5*ZCZ8G8jtVW2PLzae(-Ij4{Tu}>F@biqB_`u!}@VG0Ra66ZoW za;7<@1H~Z1u^%(jBsK8|d4cm)LN?Qt!2z`z<%8bJ@2}k8>8s5W3$xK zEY#ig^{SuhCtl;~VT6#Ky9CX^@&y#OW0ik0MEw0olW$WGlxQ{)(r6?5E#8Sj7vQ9a zUR;=_*w#8OFz-x^-Szu4#~ZV6>hRq_cD+*U{6pX(m_iu0j+xM;0Rhr?b<14s-l(b% zhqD>tu*N{Q5($;1`NQ4JRCs9%S>vD3J3DXJoZ^&~o|snGjEUqtfE_wVzr^x3zHd%B zpLh1JfmAEa*c)*0Cg?q;a5wuI`gl%t)n`uDxTf(+O^$RyuW;fgFsK@zD*XXfhUFd!eBNhx8qVg0<<4_I;iU>n@GyS8s~j7ot*)=|(;AsAF1 znFhk^B|2??%~aTUseCxyPRdj0&{WfXjxBG7u94W=0LVOYT6|n=UNGHj+fwKNWdm>p zxts>+5#a3Fyb;w+L{39pq0X62-NN}k926&(7%$-^@GG&Yj^(@reZGq3bETYjEP!y` zZI@mN|GRU1+{e*qzl-q51;d#cm#ki&^pL)cpe2Yn(8T=ORHa%&mKz6BYqG83%4m>pZH2PwpVp+4gsl8`gQGD zqIUNHwoEBiizQx2|MrkS_M=YP?J9MZ?;-GgB!QEEeBS_#U9~P=6^T^OA-y?k(J=A` zdxgycJRsO}(MD0aZop)De*4=`XqDozN6db`d>!8S_S>@XLBXA;?(9|?u&gQEx`-lx z8Xhl$w!`E&6rOU+Zgv9)KVN)HJDLMP0T7TDj{E|;y;&N`(!M!Br7as-W%nqabdbF! zS;Vj(({3b#-1XL$&peG0lyXscBIH(zOJYm@^%|VO+*nd9T6vQmvRm@L*n(+wMpLDD zHc!{|D&@LxM+f@N>s_^cyD~{+%zgl^S8q8P*iJ02L8GFV6 zEB>j|TXWL>iSVjGBp7;eUPL)IRv2KZPoV4RyPYr_uw@b*YFuBm-W%&O-Az#Wlg^QN zy4PD?^ZHymcn`Mji&|47vgFx0OEc9#dhWan<+IrWEUK+U?N-0$lzsoa#`0}@$?2=j zpn4sp|9jYMjlnR!AIP)b*SY3D`?jfVp0;%!2kAXc?rX?jrAdKPY{YFJ80NQP7;S#2 zBdUyu#4_@r=V z(oMx`FU(`*Gb3Tx=zx5_csPl&J3Csupv&|a*i5zDxr(tt8F`D{Jhz(es9nD%-G?bhBA-UQf%Rszf+CUH5Ivvj{G;2Rs=fI{qF8Q(vGYsrU&rvAX})=a|4RunVlU$u>!%LT?^oNrIr~3L&X+)fb5_w%~(L!JVWe z!5d$DPp+5XUtyi01V}COh&L1ssRqg-*IlZlkZiNMl`c=8>83P&?ZTE(v|>|jbSqoy@$`M?PMYzq zBBEw-#9Rzo78WQ2$(wmjt#!RheQ&WbK(XD0J5o0*yPX}ko4?hUP%$jmg#Gmf|1yuF zaxK>qHltNV2XAWuQ#5DDdszEeA}bImd|yJ%h^s7KyEbX0CVcn(=rt#1j*OLWcCIlC z6si`dx#UKmA}gv0*3f$XP6A|P;3F*Ja&4(Z2n5T_+4@&O4+ZPrp<qw+^oHR)-DQ8x#p<0_16s_O=YWh z8*$PkSY_N;hi{iZ@VVF*v02`FIYn0r65sghZ)+$jSR4F$GMq*3zYA<;H~E=(3vWLK z;30}>sVPH>(X1*hkg-K|9iC?2YNk}@MLigrw zX0D8JtIfr6rB~+m%&4tH;kSA)z|rb6rl8a427T2RL+L)>J~1?OV9=*p8$Usq?eZ+o zP7>S=!VS`P&WZ1%{)D%1O+8544kfsKr@N~6cW(3%rC>b;U2F0Ze7~+%9(PbsR5bq! zjO+@YXYU#lb4T!w<~5|xZ=PTvCtjEw<26!_-2x1@Pj=U%14A0l{Gr0Q5S&|Fa=!4N zSt2}EnZ9+Qr+W(lu*~47g^SH`3FXgm>0EafSj#`8Rz{kF_z{QKJl zBdoZ8#?dUew|n+1kwW{4!4r(S7~I&l-uUs4B2m*9J#@Niwo=3!f?Sv1i6A#$&7 zCc2p#&rz`>&9@nWYyr#PmU~2vuVI~Q93e2Ag`UfZ-u1kh5m!vRZq{vWc{ur?aVDSAZMq#- zr#OWYx4fv`9gGeo!sEtI4Iuf~8`}5cCAvyZqXWlB-P8wN!*2)sPPLa_Sy#@15^%w_ zXltP{!Pj?uaeOzavhZ%pcRApOF#ieCiaX5>RrQ8Y}XNG>KF{n;1LAD9t6+8`NgbT5?) zZfTfktHaW>NF~`*TVi=Waoy{z^ByWRZG7my|4^r>S6)<+_p(@r1kq%9_+!WH-A$2F z#v*%L-YNpp;Q+XC+*Z4IXxBWG-|Q-R?m`3KZ=2-@WDz5#mI_D$t$M|puGVAAS+fVW zv%A|7_RUjVD~)CA-_weEf4e5bQ24BC%P5qjpg(vj_YVKy!NjBOo@2+2zk1VM9rzXY zjT%6k_U@`RI<{lV2w(1fd^M?IY#d&rls9^QzUvt>M{1tx2%97ILXOtqUr%;wHYAsA34PZ{Zm(GxPi-dv>^N+2lU%~Qqxo2XmSOo=|(-Mx@?V$t3 zu)yL#g0Pa42v5wxw@eAOGux6t33AA*Rn~d09ozkpUw)Yi3yG|6qgl3?HOV!ljoVSX zhle3QN%cLx*&=a{M~FB^U?*GnUU%#5Ton1=2e2<^1M&W%)mE;doYUwN5}+rqlqh09 zCzz>P=lzre<75I$-dAdM-6B(uexZ%Qovp8$dLi&Uq3$RHUShAEJ=D|HZu!}@d4f&U z`~1a}2#W?~bfnqfCj%wA-8-G|9`$fhf8%D2Fj(VWsb) zZ;Ln1w05d=`MBBJz!rnNdl#H7SeEayZt#|+ z4whhKE%ek!^~2ksx$UiYew=n?ns;|cC&`*9z9rb1I_J#dCD!3OmUo= z=DaN8dxJg(WzC~Jmrb48o2j{9lO#8ksqz0T<*2Qvzt2;#Cx1>2DXAH%h&>0_V|B~v zLY1w-EE(tPZW$I;*;fw&RtT9+c@6I)(w?qdzVzwS^trZiBtpLKhAP38rwEqfXdCou z_?3U0W%$YNCPxb!Oq(zK-bxX{EO{nkN^- zXntFo`HAY;9k97~Zff+!TURtsqJJ24gQ#rRj(7*IQNOC@;fXCJ~LZVJSlslj<7Ms zfdxl$H=GdL)>>&O>9|fW^$vN?D|T`wb%<001Es$ebG%iQL2J2Q?HEVQP{|;ftL7<` z&io8bswEfrG_wm5*K&`*86B}+&jOb)2nQnLwUU_Xg>6?1Pe{X=1BuAgi zMQDRw1-#Y-=(X<6bfk~(dN&Rv_%_99hZ^MP#P2qq4KKPCsgs{<{e7vCEUu6=t%Vt| z@_3lqHqXUB%6WoDIi}>v$+N?qhd599+gzo|Ei(?YsYO;Atf$J^4P6HXHeq2!A)zov zAbj6E3gfdHA4YF=b7T`9zc#GgL;lmv{(0yZ1!AQ3Piyiq)*3}&){YvzFw^w&|NJQy zJ*UT^fgYp)1#j2nk>FpNDO2$vS$*e$nj<<@k@OnlZWNokFX`tgdyh$f(Y__zT*3X% zd=irepG7>AA@-yEe|}JyuK2(`JW-^*z`#`XYRLc2IAffL#q=UazAySe>Or8|Ao}q@ z$(LoLd8qaoLc~E66(nB1>YGdcjN#)-A~E7MB3>wyEVyAxHq6af{P&gr^YRg23O^yE zTCuF?XDR-(9slnac5&_rTzO({NK+L&twAMB3idI z&ms8FoAf{5qQk;3Ec)afB1rqsPx+rO;ws=<%)JYPQvdlq{`Xswh+#3o1(xfp|A)P= z46ABu+Z6;A5J4$vEK)ip7a}0tog&@Roi+kWN_Tg6sdU$(8y4N&XDk%m@9X#D{5XHU zbL~I-TAR6?YtAvA@#Ov7&!1cU{Nx!AzZ6svqv-J7`Y5C9p2vUJd zl=fVNzW@B|#Y^JI03TX#l%@~ma_xS<#79|hNoLKC4IWAT{_Djj>UdCW{aC7$igme_ zzn>pF30#UFT{UI->yrF4uq!yPa-=E#`+om#HY`|X67-s|fX0u2Ffg0E`&dV07zUci za9UycQ8&n>brC_jf*J4yQWfN(vUUa^EK)#U$EK6>*eW4HC_5Wgk_A zTFJR|<@7t=ym_1pw11dc@esMLU%sBvh#&XXbHI$9m7nF2L>&d$6)h1o#r*lUj;NGP zue$h$kU2rimL5p&1si-gq+j!9Ne7AE@9MS5fZ-s>c`V@SQyH~`D-t=HN<)+K6bWX2 zoN2de6o+IiXy)#DK!%Xv86UX>AzLuvrj7X&|=g8nbmG2OM*Lg17NPp7T+B8gO-N^hs`?$AWL;X{e3AR z%x=t9se-nBW?Kq)w1=P!JQLGfl?G<8E)Kprcd#@76i%?3bJvOX=(IMQ<(=OB5eqF) z9!6w#J+k4Wnpp-3DpSz-U=|t?^GXxB0Qd1h^IB*g{=5bloT&mrXrqDbs6&teWbWv^ z8LS1wXl|m>^d5YPAbyCz5C-?(;eCGF*7AOtRUMfGgLGU7`5aEBjOEk1FS8GQKqajH z>NDQYOu9{7vnps?Dz?(vq9coHcLdA$bk~Usv*wDTkn40F-5}e@Kq)jEfg&4Or9nUm z5zcwl25^q;B+e@@z?4WmFvslT zDH~lqB_GxzM}sBNd%X1=^^@V4n;K56%%RLRWBCxb!l-;QxSnZ7v|Wa>a;@Z6q35?* zmDQ;}S~4lHocbo;Psu8=t6Ee2;~IyXNE2Fy6gUto zY?-#Uas!W$1M@`yWoa?Gl29xRU6xzp{K20?jS zqxG(>MKfEPvu{8-V-`s6jPIuSnfV4pVjD`U0%}WV4KU@)Sz=eL?u}WA8LV77ZNwju zjgT!UF`RX-5U|1MtrQ2VlOn5QW6CO#6*0-g3e_cvlenEkfh0qBp~b}bH92SnuAp49 zi&DCIvtCH}J%$%x=9oFpF#XcEq>#s`yi{r8wol~7u$4`~N)3T$njXtKF$MvAZ+{Wh zljjNLsXh^2CcJtx)gV(p5M|m*OL!-B(^Bw6G&cFYJ#T~`T=QdMb@^7AZ@9`{dAL_# z6yDjIoa7r*!mZ`aPe4l9#dcuM!OL-bH2Y~^V-CBi#*iH`nwoHO5hV?MlDl+pbN0E<_Ny6vMG{g+lwhN;b5eszHb~Z5rK0um?lP-8m~wqx`c}*zBHU@ zu7LXZ03yM_w^F5A|L{8gH<(g%i(F`e4V8i`OM@RG-qKMaDBzFuq-2?_0^j+oO~$KU zoJkp3%cTg5hA>bw0}fLd@Uor~%LXsvif@|$A|N3?8$evXC4R$qR4pe>PEeZ8DaFvwQ!)Qf6@j^EAIxP3-0 z9m=xk_1t(&u1Qg_`c@?zQ5uG%S~6ZgmYXpr3Z`6*t{8VS8FY+(qLl+$WM0+A&|?UP z#_g3S;Y5!RjqH_Y1BalYyNpqtsbBDLX}Yk<&p9ER=SZdtqVXQPw23_J#74?YREe}G zS?0>qvHYMuf+Uf^HYv%it8^y06QKMS4q&=r@B~Wg8(-CW$|Razy^tv-&~%`_Op0t2 zg#9V+6+yz}CyYu;6FalWHPOni@!d zOU;L$<_e(b1jr)De2e?8v&P@*2@1_G7OulJ^LKM;O)+iVCK>98X>}Qb*+y1LV% zCj%3_;&~s_0hA{`k@#oDe8777GUO9TlvPFqs7kY#4tAX$Wb;NRfzm^E#uqOZR9Gra z--@qy8;K=Ecp%AmYgQyX;}bNFPEoXcYQVr2MYhNJoYJCbsrxQ9>eJNVkI=L=o*{B% zKY4?Gs;}Rc0Q!eW=`Er_`nzje!1g-2Y}#CNY6)yeG8%WGDDg$5qyijJ?Nx6t76lv| zE%BKKwI}Sz&x2gF#k0QlA*X==gbspDbVbGUf$!BsM$?@_T3^0|M6(R7nrdf)&5DGw6qB#evV%oPb-1}KV7 zq%%S=_=u*wP|QZw^z9~iLUK|Vk=0%w==D^k<9voDg*+I73TuR7+W?v&HuECmS4{Q1 z$s;KYS8dOG0)sh0>JwQUOj#ziu*KG&93b910zjS85J}JpYKK6B+~0Rc=qgf%bGTa& zuz@RxYR${C83eX7pefn=0vS~fu`$zL^cI@YJ)kP7xhe$X2EN{Z`lsCw7 z@o!LGNvQQfWDu=(mfJuPpCe9`xkCAsEd?U&h%D3BFTSi;t8cw^V@-Jcfd_;I5$sQ4 z7K1)Hc5WV}`}{Z1lT`aS3!kUIOm0%@J zHIIFUFW~nlEr@lBk7OnyO&(+Wmc(;D8wwDbt)t^T$~5zLI#w{xrYfzm(g->H8B{e1 zLHNI>dW#J8`8cv^fiNOsy*dGQ5y8}Wd3a$}AItRB({Lnc`z}q!NGptwMfI4?e7^U+ zN3U$L8s~G4!9v^I*DxY&>c)6w)}O9A2naC2jgVjH0!I?Qy+Jh|7*3SR36S>|8d!RG ziOOQ(-D_0Bs6<9LktgznvXrpz4q=OPJa?7Fjkr1HbZ1tHbW${0^L=M^y`LZH=|QZ*J!m`UuYfF; z138_=w2yN0i6s?LII9m&xarzx8I@HS2m*sOV!H-BPPrPY(MiwS*@L6}R!78wq5h=O`ht`KzekSln9UfCzSdvncjPJ?7w_*c;Rk+#dtpGO_J z(S@^I9Osd=961~A)7Vn5o!A3s`X>Jjdt+A+53d=ghg2DcJp1nm6=)2~L=YT4HO{B| zD*%L$VSvqWoXn%V+?}7c8NPbb&%vcPeYzSy_vzf9NEBIeeZt(20o{jPU!wv7jeMs3r+rJ|AD~OIaFjJea`V0Q^g?}y4Q+PC=NWT16 z#EyWp1w4(w>m)uGQvc6qzk>J%9?d_OfWG-9H2-*Lu`07e~(oK@9*{f42Mp0ZzyB`^H+YZ+P@q2M-0KIe>d!( zhw1!3>^3q5m~{~bYo;J& z2eIQV$&SUUmI{C$Y`=;zYVa1ry=e!uYyiKe@qlx0*o0mw2h0cmk%BYryOg=g1N)kQ zmd6S8{4Mt&FSttt(^LGZ7pSvpL3g(WyygR_PHof7RsgT1(NKPpO1^F$S|t(izL@P+ z*ppyrk{kz<-V%WDtsUf9yaEfHav&ePm{vy`5#fE?EMW9Ao0>&cFgyZl2jnlhz^Aq- zw|UahZ&K%lY+eOYGGC2GinAOy>fYs33p4kp%6wOMT6_e@?H2LHl+J+!DWqf^Fy81@ zk?8+Xk(8f6kqC9BN%dlV*{nU`2v{>u3`Hb%12E{A5oBvw!K{f_f$uQf2e|=?woc{F zI8AML>8l4IC$sZ3GiHWUjcLx1&%!@b=~P&%+7zdc+&(nqaOlVm{JVd9sxO19h!W2Ejo=UjZj}qq_4d zD-6KGtZAK{_A0+YXEdx=@)}DgB|y$kTtvind*l1}_N~O z3XnzZ0IRwVwXQ|D{uR(h4-_>GfkIc5x0n3Y>*EWE>fwuUKzSsDO4j?2m!wegPgQhB$_1h-TtNOuZ7Wp#+Y_j3|V)$=z4nHfM;dsJOV}) zM8-UVz>p8I{d6b=DjOxM`RE|BpdarMC4lA=7nlw);b8*MIbZn+@S$H|H{o+1;DR28 zlrlwn5LB~Vq)|ZKS#7+%bTbqhN7gl6FLuX^#f_wBscbH!E_0>oEtK2p9nliFNo)!_H{6VfvL!X}Mh0nn)-fhCowta8m3ZSkjUqPez zP@a)u1i&sdpB#4b-BgabrMEkJj| zx-<-i_8eb zE*?VLcM?&s9$@^k>(oBL&bQ=K4BfgAEhe;sO(QU8Yg)AcNhuMR4s>}~&-5aB-*vhp z-qaaRS;p9}%Z`@D&2JLezL4G02*Ea#Ia(=@&?xjmAhR`)wNJk% zu;vTCSyOI9B@Mz(`3*p&)lBz1!HD+;NLlN++GM%)MJ7;+_&BHlw z*6=rE5;w3_eo2FL8t~}k)5n?{e!B94wf<7fhF$F_Qbw|v(?HcQ_9nO6WULP6B{(b4 z;GW-I*Y>%qcU0$s(*_!Dc}7o;ikx=C51f6w;}aEW>JmNP^?>P?-hpGzMJw=hvHC{+ zb>Wl1l)gKuFTT^ZA8cYxw;FMWd0xFfsQ9$stTp!pD9bvARb|S1~@>F3Var zr$DpVkv%wV7ADJ|fl>g6xqriqjBIVAJ^C?N#8c|Fd+%%j1-ljK+357GN?^3?;{k)U zB%$l^2PZEqZgjm~1g_ane=QhZr_meDFuka_KqL`|{G<}{G+k&y(iL8QuQaQ&#bW2k zM(qm>EVo~WwLlM8K(~$6x|Zv%MX^se+FDfz*X9qbw&k4j*5W>qn@y>L{tb4Ay)ksG z?Bg6w?KxViLQ-DCMk%m8+xKaJjk0yylf+jrVP`Hh@0HC?wf%a==m7_hv*w%^*NAEF z5AKcHQ*$C>bnMJRp!`M@IWnd-lCZDz)Hd^8Nep2`T`|1!!Zuh|R>gTGuL8a2WU}}e zuuxj|;#6*lWPJ|eJ+=9Cc)BiC?9ZyDQz4CRGuHI>W?8FkU5`^>$pn2T&83qn-3Bko zIj$G1UTpMQ+H>PC)81J80ov@bvt*!bna277@jhxxUxrer-|gP+BJItYUdq~dPbVqP z#fOBZY5Hm{nal(Z%2`@0`r-_waaLN6a|$EVi9SQF`+G3ZEy@TV(6&t8Uu>qvrTofl zyI)DGf`Voj_!=3>T5fYK%9w(*x`3E&74acDf$0?DJupkdJ8PcbT%4$=<#4THkb_y( z^BNhHgN5Ka4D>_z6E`_%m1vS)k@dmnNdmJ@-VT+Ybs~R+Lr;r1;iQxi>~|}zuvT&a z+^&>PcX*tH$Eoy+1RVBhp8`6UI?#v3KR1|c{`TP?qHgr1Jj39o!K=ir98wkmef=d#@U~3VHda==o`!=CkcrY*nHb(GGqU6gmC_9g2|G`c{f0AH`$s&@BAx`u8D1I{ zEdJN-LCXo*ObrK|y-Y-y^;+zUM|$elsAN@zXf2Z^eR-1AT2qfhqBWEF-xRHZ#?}_I zb5M;jiaD_?6X1S3UNTMHyYqnNCAqLAEkaqxpr3X zXI6b5qIkAzK(h41*)7bk67)`cFC-6>+vdn#5vu!Y8sJ5yx(+~XUv2U3A&c}0HkT-N z&jXiWxL<-lP+nwL$s{({DUFWc=AeI8pfOKave9w>$0qx@78bt4ORG~0qwaE>;dt|s zmslfNB6e90UU0wAuby8$5Xn%Jbw!yC8m>L;V2SBeDtUO$Vai9_Yhslv7!#QCC`Vwo zlf{22KJXlGyT9T@$QwKmCQjMk#`)tf{wl|M>f8pG z9y^Dx{(T9R9)yYC*6&ll|LqiC#xnKy!6oHAmXV9(#N}TvUJ*wV=$SVYg7TPu1%n?2 z!KJ(vqhhSTE>Twk%Ys_tlS+1ZaDU$v8*X%ThI&mY{<1B(c>g`QLasP_JTfS;eFr{D$r1C3|Me?{-H%J57^ zYqUPs-UeoFclBI}4vTPc(^kguHsgZUpX z^q={Vh&Q#0uF-5ocI`x|-3^bOPZ5r-&-L>(d>)au#Gb?fgGIDMlmm>oLP|DKLx?zF|d8MYenBy?4eEmMOyrf+9& zU!sUM;_Hz6lD2sB(ZP-}dI_V%`T@h4iUqXe2nyZCD$w~Mra!i_KJ7?tQREumKP~K3 zpK{2qVBF6tVd31Fd9pI0zo>OROn!NH+c;|5g1gTBap*vng+u*g*E8G#s~wZMd^g44 zx?7$FF5+hX(xev)kNRk`F$G0YRO(X;n{>jQ+u?%Vc5909eyiM3IQc-; zXUth;#%aL~zY3@C3 zD}OR^ogyrtKMx}J?HTXyExIxL2;`pVd2uf=CaEAj{LL`;1>Z*QM1v{m;_Cc<&N57j z*`)fs#Z{eoSFGSHB}_j>lKs!DkTkb?ByGD%Alm76e%vE*!yK)I+ch%nax^cdWz?iqB-Pl)XXN);PG?+0Wo1pmACqjOi z^6-$NU?GmCO;L$gKiy)WB{PDxzPo;utb8-JN*7E(SGAJ z#o*n-hR|^XD1G$Hs~Xq864((Ah#!tVb~KMo%M&0h5G>)%=IUQUA&n69-zgre327Wq zThHwkulg=ovTJGvZ9g4Puf;(FX)7a|ThM?xs>nEyoky z9MwQ#@zpASCYteczJlph3JLGPY=)7WN1?5nd!5ehM|x=OAw1cvYNw8+hl?G2<%_$1 zoUNvZ@2brohwEAJ=v>t~ju65dol|Q5s z_f=-v`*;$C6@ddH-=~+}AX#x;X}zJno3nW7v@qgqyR{@@7tMg8ZHCX(b@lwPWOHEd z+^y~8q~zXaXB~U`M9;j19iT8WiHMkBGOjOJ@qIU1h)&~XN9&N@m;RD1Q^l-!#m!gc ztRoOMAG&UNNW@HeJxUGZk4C zmf3O7Q^@G{!9|RN2}=Z$MUj zdTm34%Uagl8zsjVU76@{m;FpnA^I}3p`g&re0V8%(`c=|f!`mCV(T|DEp-%b`S;KC zmM_*jAM*y`LuDy6G3j3cIiv$&H)Wc-4671@{PCX zGAnZ-zN(`v)hGAl@~y(q3&n5Jk+`3?Hs&-;oD*0;&klTPiaU6PBXCHoF>{_~@wj2R zcm9Ds{->AFgZi4sPHkoY`?2&(liZ~UjIqGr;*%M&g7kHn22-L>CE(0Rv`6hzD>N1{ zlYiwrCNXh4YveJ%89I4$8MlMQm%eO3ek5PS#mX19x)NJcB!M&1o(?s?{*^OIk_B{W zF{7CuxmIQ#v^}ZZTt=sp&3Af37^%#kQvU9w?y=LxmbM*qojp^-tg+Rx-_)@Ivah_> zA1D+Sp0)Q%s<_4JQQL2~4>$sVTS8YjAOGU0>EV5Kl~FtHOYx&pie)rKjSAuA>SDp) zu$No!;=AknJzjpjOKw!kO2vn?y2VzlCon9acPWpwbW$wJucp8owPq@R7n>pmZZ9(l zwnK=I=W_JNycXT8>~bDGLfJ+hSY~F~Ydbm95HAuMAvk|X6JsE2n;^RRnzG=YVn=|% zwLvo~MzzQu_OroP=-ld^yie78|0H$tZonbN-EYMSM>18I*;~Q12j+HBUc?*nPd%S% zUENzBY^L%(Cx?#3-3iDO!BMWYx26^gO?C;%s(px)VQfA*ldOMwhfDfZy0%n(I&BgE zWr0K#rEuaU2D@2XN`ly&9kuU)#)Yqvgq1p^?=H7J(wAjOJ=S0@NvP}1yo|S9 z-vujdO}$iE*owe&dBLMXc=2zL@sybzKXn7L%@lUv`c_w@lED8CKSt6vhzdUDK113U zCO;lm`E)g9kFj5m)Ly6+*UEA#ejMMF`|`^k5Gs&G4Z8mV@#406xLuRAXss_iCR73T;6f(V@3Zg+k9h-!Od6|2fc*Czf4jq&QNZ>7_a6b=)4vPxQ#Aiwh+k*) z--Y;hA%5%03$6XHMEole|4PKaegqQczYf)}oa?^}@$W)hhEG2;9REthzY_7UMEo0u z{~J+XW?TLZ8~zO&{{IdeYR^mTDWt6w_M;jtECR~=RaFU}HnBZ@FL=k~^G$2Zn>x0S z3x52#hM`U9%U(e7odTQk5I+@-1`P06(MajVcyn)31EfvSCY60{iwGgYJ` zJm9j<^1HGP0Ft04fV!3Yr`Y}5K0TKN30o^@(BJ*=H?ZNr`_J!!f64C8>c_=4Laf03 z0t%A<08f7ZR%|Rtr+RT`{spx_KnlPD82?DtM}HEwzbkpj@P2~X7mqF##-HE%90~3> z`#%~JN_a0n83xT|8T|J%T=AR*V1><7I&M6q02SRQCV;Nv z&z*jKLWOrOG<_HS<=FhmD?`YhfcrJ8$bn%3zu)H9CuwQ0wEQHBf1x7&mv5CSTDIP= zoswuMEG+yAgX!-Z8XGe?{Vwq#Af?kVFfr-u+$ABg^N)+W_fjZbjl66Cj+Kv`P{JVGGGaBTl;~{i>Tb_F^jRC$AdkbmD zL}KJM8~cy6Tj>Hge@@BPjRoN%ScpDz7awFjoLk69!=>g^9Y%I6KVQU~Y~wy{{EQ1<5k0g8w5STM;Pytp%Wpz7P)bqW9wKCC`-waWz9%i` zM{o@PD*kOlBeUk`Vusg#i>wC{M~&F|JlLEg|9iCX&1u0hWJ)e7DynoH`qM!ZONTsq z7{$~1APL?Z=j2b!3WvE<-!U|bW0w&0Sow5$%=*%<%sbjTY2R1$fk-@~WD@{i%O|fz z`liAaUwHlUbaC9Tb<`~9LqRg`aXm@p0Z$`cd_`+guQ%_(<&#MdAV$vj?@m#vJ^@rE z9|pmFFq@Jga#-G>1&+E#IKRHfQVxD^@eJ^mi0p4B?EPFf7tc_Mf360aWwg@3yA;DY zplTlJT!l-GXDT89$*~X<6U(ss>s5fUpoGC<|2Y0+Xo#bstZ2et*x(DtimP0_LR4Ti zC)(TNGAk*2dU^_rA$1!x9WBjUVy##!H)-#h{9JY%?VPTOiwyr47h9pJ4f+Vy=7x(* zPVYzGelO@j!kWhrfC4Udl_Ew(MZsEVvROt-))~%^m$leUs*g3SHzxVFKP+c619Dc= znQvWIza+dV)5D8}hOV-UXRUJP){Hk>m>C1Qm^|2v_)+Y);OBIuCU<(J>_JPhQ_)bB zbIDNd3-7Z1>f^Sqjn7X28NFLZoeCkdhKMujs+wVxu&)VW3~A>tdBK#>-iEc$u-WiO zbd#{nC~jQyWJ0=oB!7*d;^I|^a2ATH41&PqWRqg^u_ev@y}j_7ql|N-qoboy=Tqy^ zSB+tO2i_)aZTQUXO-=8I%B)ikkFS%m04%rF$mm`Yec0*x8hay=^{2(iQKMU=yrymk z-$}!@&SC5?9V1<#DzJDTIwrXk3ES^cn$D-Jy}IYE!7NH0=u3wn*#BsC=;2C&wesw;gxR?!ef{<)yS)7tuPu6*P$D|ImH*el7-6cJ2 z61DDFk9--f7A_$rfhe76LTI8wPuBT321h`;#4c$P#W>GZ8Ib)9 z-F|;;QTwXgve*c4j~HC1dPCcd>`t^u>rBLVP&dJ95ak%sJ<5VAO3A^)-KTw`w(;5MtmG7^<;>0W6LKc+G+&w7^7OZMZRpyF0l(Uh;fFtaoI&bnQ3Eg z%W*%AL_Z*60z;cw2L>UXlan6k?=#t}T&rL~fl`cMimTm(lq#;c*3mQ`7~O8&#(ci{ zsk^!F8x}6X?%C$tp_xKBqzxT zWP^#ma>GmQ>|M#2K8%EGpmJ-A2Hq=^FKOKldOk+W!GO4L$Tl5?wpD=oygho22YQTx z*C#t-&>6P6wP`kw|4WUv;C1qLaQ=WG9tM*w^)9pBs>{gAU81B4BbcCqai*7_Ln&3tO%2Q1W~>hLQI)q1Lum9u?g4Ks{!?>e1pqchbV>X3!LzmcEU z9SIrM8?y6#Cj&DW{;KKWr1(V(MYlc!Sj5tKPTLIt!e=M6(<1w`@X&EI)xT< zo>hq(ZRxd&Z}m_*VM?0UXfOy7uXh!b3|WaYMf{b3ix-crjS?05mzLmSiwSeOs@Xf% z&5!UZUOXd+PYj}wE-;zT&U=ilOOSV7MRM1Q*S2&m`vF-=+>Je zK?+pn2qCP2(1PK7W^be&MjN5Aq1T%nI_8A#FFU#9UwxuCaJ*o^cV2eb?N35HqJ(fC zS+y_0@S|D5=m}h%v@=nsAgh4iq}G zMYYO!4%f@DO~hXUdw)MfrdH^~l(dUsJSsz&r94$J+&*JAQN`Yq2fg@~%h-n9++Y^o zE?)LP%SRkxNpTPe^l&rQz4N*gQDXXPdB-0ni$XrJEhyj_)EFmFLmaeeosYCHXE{To zb@-s5t)rKzi%;8aH$y87%pRdDZ7v;{vMREoWUw*Oj&(OZ{$S@25a3k#dL({IL=+HF zeO%SsLFZaAijdi#2YrL}>I?+WWnvcre-aaFaa*qWfQEBc*u-LVl#4E?xMMz>|k z8qb&zLd@9uirw-~XV>Sb6xfZqL8dChJnON8M2K%d3kuZ2$2Mdifr}L55)U$cyNOjv zB0CZ%b81bxOjkKX4A|l_d1biiB*RphgsPb{!Y8=6Z-%-*VN=O=`evwEpu%iW=E8C> z8>X*vigC`swjzXp+KtG1t!7lIAQmd$mN)5FT*5@q$3-(r9NmXzFp!-f;J<~F*=%QPW!)c_NBCU>D-+Yc^vz@{ie5nkna6(XC$MnSO_AYj0n1PjTv0 zo&s48o1sTw*>>4-cd0q}+p$T+)RzZfhLQPtNpDv9d22eLg9vL~ooyE7vRh-o$ktrd zoN%6Sg~1|fw6o7U2P*c%g<+uA&n|~Fugb8zJlTIl$v(4(_S<>RuB@d<$%LZp`d9n% zi9G(O=C!Jpn*H(PPv+>=hTR|Nx_0-}EC}2CtrwGoy%m?r5+>Un=c7$JxVObi);+zi zx0TeK`meE`|MGbuyU6O8`ZjLi>Pb|e1!BoVZ?=Y;?wh#Zid1?Dh!@N?Hgq}H8#00a z-G=hxlwEsh%Q~tXt(XEa%?@#ucr_0%yW+k?LEt&=QFRwXT5+*r0?PYVi6w~w`}Og> zIK{&)8CRCWb)YF&*=DIHisv3Ht%rjCH0W4>8h@~5ILE8_?JnAi?;-*DWtu1wgPwHI4X;Rsz@pN(+p}@hM9aFbYY{5zFzO|%>RoSY*5J*D!x+T=@ zd}+eI1VyXFEy~e)Bmx!$MoaieX{@}JnH#3iIxH*J!-tzPps3`P$Ua<=uvLSsiGVm! zNgfjhz1F7Q;;xJTmQ}og_!D-gr821CeEsH)L1Y z8Ig6@LH@r<79nG(NZ!nJhH@pWNjgTrfCV-r2t{4S+Q05)Oq~n*>%&8&riO3~JIs3x zFBkcD@CH~(=s<#JkyEn|_dg#MJPOlekTCjwJv8~Rj25IE_#cVO4l-)LLbpE|qo!Pt z(7H*g9)4kUE`!92y+%~;UkyAk^l(FZq5zJ9d%H~Xfcsi%^rV_6;oLFw*J zs?nskRPSG_8C#9CAS0*M=sgTy#u`D){!rdkZT3^@C*pG!TDw0sblRIoQ6Bw{R8y-ab?K_U&yZ+5Y*7U!TD9 zA+l0&D$D=oa{Vn|`LsYLM;b$+f6L@YxJ>@ecYNHO7c|4`pPd}aq=#5XczPlfz$#4I#OTdh%6ON)X>*;@n;Du&KzkfL-1h==ZZ}#;L+G1DNXM7?e7LbWriyPRDG{k63iRO#xL-&+L>I)P?KyaUq zA3*cts}6d6-@>dCiVE0;*~2njUvzZ<+nEk zo+r36QJYwhUu8YoElf!G4sx_}{UFrv#NdI`X#!^GWMExw2dbj>EM2zTKC5^_J|bfW z-5}qkP7}%nETm6hzGfqBfB*&aadzSWA-g$CsJhjM;|@yJ)SN(eK-Y(gtm8ueAgU`Z z!lsPE=8Yt70-LI1keXW`)8MTO?d&OZd(yc`#?QuOwM`z7M}&5CNZQ}mxb+2*3#0~^ zL85}~`Yj>`kdmUNh#1_9LCiS+t?p28}U9b=Kk&7_bE zdGMvhY{YozOZ$A;8by~}I^l|sQ}mB7eXVd7?QDbgYS-iGru^rW-B1v->j5}31?-E}K1n(H46xWrM1${v3U)Jw8q>q%{Lkl5v$vQzreqWfy zcdPEy;Sc+;G@)nTb>r}e2ni)6rMefAb3N1>39%56i^3-%p<|B-*h}E)F5}?s&KkVq z>4d4QJ+ITfk!+csq80C=ousV&VDzo}y6eUR?jNo6`>T6;i&akD3srkESHxORj@0#@ zslfIzLQ?eVip$MR9siVaJcrR(2C7l#1|a;t_{{q9kDkw%Bu%kV3+O3ubw~HSdBU3% zk#`;O#w@P%B%q?T)PA%Ntq(n_eV%*$OUuS~e`?96<<W+4S1?nn zA-feH_hq(SS_|=rcVFPWSX$U#vpt=(D6eURtEc=mV;lCwQu14LbC?<8?lkMNdo;zg zt5oFJW;~4);aF<-JZc>f+U7}u6Wa1%Kg*Hj$Wg27{^rE7xUIvicB=JI7An+RcQ@v* zY!197Dj@%y4)Jwz|H0^)i=j{XJi+ID61j*jp6@VC3WvKR-Fu0NgG1S}LWd@ykm9a9 z>iqn2&RiYa>k=AVqUZTM)#ki6686($Fa{5HofJKUJa4&I?i}^i`*=spx@h?9y{3uh zoJZ}!J~n-`-F;j-97tmka<8iik+UpapW&G&9-a$6dc#6d07mt}{3K&nzIv`u94g2wj*IUdhlsnX_>3~%P3(| zRvN((3$TOAem?8nxgFOKu^Db?N-ra-@%!sg`IXCuzr)a(MMcHVaCdM`vXya@+1~KP zTb;T35>?yKaU+f_9Fh6V!RUCh+9`%!FiWiZw5T$GyHAbss|%NR>keo0-s@Hv4`LDS z1_g693EI=`vV*yPCn$NJ?aA|3fGDXHO;>|^!fLy{PGQY9dz^U6S3Y!n ze0*a%V7#$EIsmo3;8nmX$~8vpRLVY+=Z%zi+qyPUubFSLG$N}QJGyw)@%78Zlv!@A z`t^uW`psa`#q8gBNIPlH&O`7Cf0>T^h(^B0R>qO@i|GBuhmsT)_T#UF>qxj!!xwZV z+4|Gfc#u8?Fr?i4=_dwYcJ1$;?oC}~%2=^?6~x;&jK*2n$LGdI^gH~I|}xdg4oPc?J{=*?HN zQ`^0asMo%_d~V30-y<^6_!wO8gp^JNMj!4CJlfUmZuT8tE{fjW;E|rr6zCi$vEEZL zh&bGyj{qZUQf4!E&nGt7Y5|3Eb^hDR^e6)TS*ht0dK0(!N#uk&nj${GD-u<+GE6L7 zKf{X8vItSF2iy3k90}{w#%CJ=vgGL+UJ&jt{&!F_b$dfXLh|i9?dx);SW$@%cFVu| zF&UXXvfcE3{{SOKDXH^S-y>s^@$yJc;W2h&4b=X7e%vl+btG(coe^4ED^EJgv1xpE zZ^(;5C;&sK(mP3=oeeN@f_H5z#$^jnzdK$KwjFb*yeFobM`WP=_vKQnR&Hm zn0LogPLD;Saqh}KnS0p(2S`$G4r!Wul`(vAHB=mSi&{MYp6+zwsw&=Qf09 zb!rF6i)h5pcCy`g_=tf{`Q^LA1NnCIy0Nh_7~#Z0@{`rVBs=BVXIi?m^IyKaD9~$F zEC4iJO~q?!A zgGQMLA1^&138G`hgA`OneM9g|-R}F;`nZn$$n=dj%i+g?*U*PZ1QL)s0v;EC|A&aE z?=VQ&K>dT%S4hJ7SIj~7!gDgx1qDyy75<&)3bFdSx<9a&RASL|`OZJoNJH!0a`)e8 zt@+0R<+Ja!=jlC}zhTm!)FYqC6AaU8ITI;rB#p5g`_?G9OcUlzk+%1Gb)WM2u6s*ISxW9s^ zoAAA}?-s3+M?;Mr{R&@;!1uQwlXPsUiff?4?GK2wd#ck@Vp3JtX(VKpWsRP!OeiCM z7uZANR+vEawRJJPV5oqnjij?ZxH@U<4fZn1AnB6btS2wEwQJ$N{p;R(H;22@2UT|J z7FQ96i$~Fnz*&!@tRGG&r>6Wu&v1LCIc|?~uG~jDZ=6D;SlBkGf~3yF7cFu~ude*) z%6p6sqyefJ7pld**UNEEN19KkZ09QdGkE5L+Ifd?wbv5<8E5;_z|jK!uDv_i+3_Xx z;av9FCUe;u6~^PkdwX`wUlA z#)HHWtd&!_#k{8eVvdnOnsdt5=`+Gy)5EgtK|D6o_p;n0@-nezPt^pi-4*hTb^qau zzx7zg?BbJdh+er9S{u+SX{~aR08~j%7xq<6z0y3}9B5+qA`Zh?XL(TEsFUvzU2xtUq7GeXbV^>xvV|+cW;)bOZE2>orChN^!>%#W)Z( zCv_u*w|T4*v_>Hitd{m_UW$3RN5{O4rh&K|G#qNVDxw!FBrLLRN2&T%URH&=K=s3( zJbCM8TAP0rp;~NOTJsL+T1ibcc2s_5UYy}Y3iKuz^onfm??1&So|ClE@6K={KS%)h$nC|Wo5QNCXO zx^Z8TNoO>V!m3I_Gy(O44=)f3DS8a-MumuCUJ+ID$0B-?Gjr>Dv%b4Uv9+J=^)dgl zJ(LTkN?XiIXhn5v|D`(3=7^vV`Cb@PfS?mKWs_fe?G$}Ti7Uu?G8J@ zX4gSO61^?0q_Ug&q|ZJz9brRu=|}P9A-#fFhP;zd)G=an*6=xH6hMc<%SWu|y`e<( zOKbf@Ln@nD6|6x*_9JNxKk5#bNHry$zugy}uy*lN-9Ys`Kn`EQt9pgijgEdR3|b8R zc%Iy#C2aLOtwEp!84xH(E^(WsfCRR8^6MkAa&n_)E_8!D11Be^IDjsQ2us{<2L4X^ zP6t)m{RG0^=uKmym`##Of!J`wYI|Suq~5Wss;a8u)?x)l=H_-3NRim`axs;cDJstg zh~e%+7|41o;;x2@5NX(b*1tgJ8f=cDG=guAuf=9lt(nFGdb ziUt~Zg;-AYHDnB}^aV?C2tmN}e*ky>&wvUyDLq(U_{Y@W@~skd>63#i(=O-Plb%gb zLLD266#Q`A;@_<0bIJsv^YaUo8t>g}&ih|hU2cZDuy2AWV}0-A^_8bG#T#c8UAX~V z%%mk$QU1Sfd;Rk%nxan+pF?;6$a1jd{E;nFe>T|ft{3-}7in*S-RumF#;iMU)8F5d zOTTgRW~Q3ooEri2Oi)IbH)$@&mX3X_@iTi~QIwQsfUynmO5?sQg_c_rfJfx4sAl|M zuF%)T$ICnO?(X|#k!NR^YWL}!WQO;vLE*GwWyAh$7q|AUt32g9F-bp})x`bQo;$lX z06PX(7CL->@&9{rJ$urs$I-jXcFqCbCy{71W3`%E=qC8WHPDEC6vvBMZr9Dc{zWan zQ{~dN=f`a?-;8YS?K0n-85%{nGBzaNeld6EpZkqFW?j5~JsUWf{&Lf7>-@Nwo0Gt& z20*7BfTQR;8ul-{Xxg`~RLgTBlPPe(Y8{Vj$11~kS5@AGc%W++HO!r!^(iA?9O$)k zm$vQQcvZ%x;=-2g+h_lLUzXrZhe_03gF3D?Lzi(5f zPDJscPIaSnU(*FAQI7md628!(9J~96LhrgvE!j^`w67bj-23WQKpLsSn|9-l=H~sv?_#3OgXjgCS+6+9lewFe9g|i*9pG>?D3Q5nn zDnAc+F5Y&l)~6@F+P<%yKhygAjg6+Du`~9ZpS64Ux-rh(p>8Ut-64*e-aQfP;GuVKO^ToSh?`o~{MhjF$S|b_Qz~oLl#P1NOm=VFUz)#uuKlA}>*+0Rj)EIHoq!#VD-jOB z`B>36o8LXVcJJOhttAirX4&kRW}BI%NjWG-|D|i>!W|Zy7_g} zMXom2O1XK0EA}?FVdQ&MBb@0PxFJmjD0& literal 0 HcmV?d00001 diff --git a/docs/source/assets/deployment/anything-llm-chat-without-doc.png b/docs/source/assets/deployment/anything-llm-chat-without-doc.png new file mode 100644 index 0000000000000000000000000000000000000000..952a43bcd677d23b8d78cdc23375c6a2c8621e8d GIT binary patch literal 138979 zcmZ^LbzGBc*gqf>1PP_21QqExA`Ag(L288bXl3Lm=@=;>A|O)2=&sR>EC->y;N`by`I4+|Abe{-Bs%%l{l zN(gVM-%51xE%fQBzITl^2KqShZEn8RdfpX)m;9LYO~K=k=W4l36|=g+%IU;%E9ji6 z^_3^i?)pWVQDYX}PdwBv4_y+Hpgk`$7}Rfl1<`)^+?q=dC!Bt;DZoa)(=c1t#^U$* zZgQ%`{$nwmM5yrp=Cuk@530iz&iKoQ)r&uzLE2(?dCuR&63_% zvBzYaigXaWjnA!=yR1!F-&h6duD@<2Wc4+^d2b&7vwGx9RHMu2?h{CyP)2qVc>1e| z3N6klJD2ToTtgYYp1$4GkT;Nn-^9w{>Abxy(Kh{)p8V^_&wZ8>zt^GYvULD83_hJ- zSxj_IT;5#Ucf<(%2}E1Y6a>Oz$6VuMVFy`Y;bN|^F@H3eKP)Vq6r6wF!G)$=`{(+a z(~l2@W^%l-u*9+Co=K>?VsA|08AVB-w(Xb+0PyPxlIWjEw+80Cz5nuv(@fX{rPlje zdvjNq7q#wjvA;Vqg_F>V{(cpG~-JRQCdI5h9-`9#mFv_W;1BGN}a=S_e zg2-lwmBN2r(2wdI#vR-L~DV&T{nE6j=o7VzOt;Qdt z3^vj=Ot-3^TTdvs-FNYS4fexVXH>|KwDdS;fh>nQL>dS>N0;L#X4pJ z5YAoZ)|p0KVMf2l=?G@ADub|ato0#FMf6p6MmVg8Qyx;*x&)66be+bI$>j$8A!9DN z6tlKsF6~_-M+C71DBJjMKYl09MzJ^m>JyR-u%!a%z2feKi%Erv7`o z8_cM7xiW9$3zP~-I*y-UWMi^1-Dz`Nv_ymoe+_AnicVla(uO}0KAFzcjOTv*wIbUH zG=Zqm4k}E%J zHR~LL$0j(eEJOcXMPCz6+vd=Cm~(i1$D(pU@%VM?P3p#uq;tE9#A*-P^=U7VV`hik z(9%gbr}Aix?hKCbYZa?AcbkI;BonAz~w#M3bd_ zP{me~`zT|4cw?e4l;Ju>vW^We&h;l}z>LH}Q@6~cU;FcFR5wy~S13pMH$z2u!Y@)$7F%I-@P*4bvsDT3v_+MFc+-T#obbNQ-n#g3X$ z-}GHmCofesb1J{b{eZb4p;0RW4*mkFeG!z_q#zJ0oxYJ^0_pkqjgr3$iD!5S=P4NUp4-=2f#XrIQ!=tTn z7H2#$<E`iR(oKacYe^N+q_<9foL;&+9QX|6qTmvXC={Jj8dkE9X6D*UEHrOYC{U&;d{6VR@GT;E(CVYZ|R72RBC`CbzQ@1B#M#`^RI-nrF8Jt#vhU zDFdwyR#>jx70x`wj`FJCwf66E^RkIgxyb9C8V~3u!r8SC619m^7Iv8~7Ir!t;l~B$ z0be8^ZJNAuQzsmKuUp*_Qg!=n$0*l7C_Aw;Aq)B;kf z>*E?kbj7JJMoo=ts~Wc}F;sUc)tJT6+n?QrZ!XoItV?{;ikBZ&(y!!DdO->LXuP*j zx_H=p4nUgp&o4;Ox?~)Jbc*}b1@zrs$_Il^EUUQ;v~S@K1pCh$){2$ecAz>z7?t~= zD{VQvtNyjDHFBj+5@H8Yb6OH`!kgEd08QWd6 zJT}NMELtnmcCZ>@cGL(T4L^2RJ!lHuLx=Jxa9oqTB_JucH&t#gkq}w~JlCrR3q4)H zp&RCf%1=|xz0ah?J zLZ-?_)nosJ10bH1oAHdY>2oU0A6TswqV>u4-QO#0ugozMTp~I@sA4S+_YNxe-=?E| zn*69d)bL=nyU0@1xq`NSxKQI|RYptvoC=4Q05m~DkeguS)0y{xT+nj#^NodMFJy~W zLHWf5efVtuJki`1uIF-&0_d}$-f=iJ_ubxLqxLyWSvIO4zWbaY~*-7|^4>14B zS4^bjbJ$dF1wHyMdq;sBRe@@~vc5`o6`yHs?THn07wet`t+ld-`j(b1)i?u#;avXy z8aeXPW;#K5ra<&|w&hf1BezoSo2oytfs|q@yfSU6%bMN6`7^}u`jZ34(qjR~jr)FFIUz69) z0&dkpq0-wdW7TCf-2kf&{db@owdJ0SG!u=OuYcg46hZugEnWvoP8YS}XEZF117&8) zXKg!pP{E{o&aeRkVD^6lk3B)d##57po zHr&qFgYP)*3HG0l@)4K8xz$+c^2rAz16G&(-4kHr(nv7!h|PR<=lfj!ro{1B1f1MA&ed-6g(1MI;XP^$fXH zESLWb@Q1dFr#u4OR}HD=grD2!*A@vPWaO$@!;;6D3t6myL6Z($=n)6$;G*P(suWGi zu>2>s=O=!m8D7NIVP7YG-Id(+*+>vM$*!Iq9rnVvd)1~m3iG_rE zbE?#x z?|#<9$=`cx54|*sVl5T?TrywfjayZ=P4-4mqC>Ad3#N1j^9g2;y-@1Hn?1*tJ8gg~ zdfBI<(+J?S1P8?h;j{)UOw9XB!||R5LjMAmSU(lKzI&uK8+!tK*O=6L2A>qJ`ubxS z{tHh$r;iL;)hM*mQ(MoAwIhMD?|$IiWYqNX@m^*j!$xppV@0hY#d2_?axjWMA$6@R zOjmd0&;yNZ)#5h{4^uW6V4#|r=}D{syXja+H<>nnKKV9b!tq}=pMHa!d?`-)fsL%X zpu;F%yb#NOSpEQhYe%PEAcz_p&Zo2?^B=AO#7I6!2Q0WfY(ATS{~xBwl43;E>7K3j zQtZ!3^u5KoS1GUlgyxm3`h$1}_CJk}G1F&cYv4l;zQbq+-%Gszctj2k822e0BrmDM0`y9RbF{jBY28{P_ggholbQ(6AjwEqdPgel#cVP4e|#Os_XFX`!s4SK z5Lust{C~)yT6`2>b>AkFHSpQPBjf+r(7qL%h#b=rV`8{W(DIxA@NvApDwAK&t59{% zFuV9#l97i8lMHS`(GxDkX=%ujJPnS{&R&;CroX!HzG563Y3q=v*on2_d_j>0fjnkHd9W#{;KJ2FqJ_SIq zLiOcjlj^H`x%I?IP3;y7K?dh1v0krBw?YIvw~b14G2HP94aON;s6c3*WBI-Z04L2~ zt&f#yK`tmbG)Lbpc3v0;W@ct;Zr=Ybzest6OJ2an$;o%|qFVp#aHLRQn4wB_+jMhM z@UZ2mjHZy5TPTWwn5@|1i7{9P%U2P1pK3M>es~yRHGH)b!~h)|$fR_*1^G0F(p)1yXHc$mT%WQ$s$h(?HwC#4*M(pqtIpDCWDSvg&G+Zau+RI0oVo zYYi}gqufA+7|s$wu?rJbu?Irpzif;6CjJUlhr*Og%wlJZPLbjHXCbFG6J2OYCMfwb znt}R5car5LO{|gHnGZ-p!q*8sJ`Aq3N$g_)HKfnH<8hF>^Wwr(ug%%DI#f6LM^74dKFUaa*hPK^Oct64!4797L|c(^JeXp7hJ@{`Lk$! z|8Wtza%Q^c$%W&DO)Xln8J@yAx;MUFG(jXVIUC;1WS)Ll{`&p9CcZZ5S6!oY`+`$t zeqiHR-io2>9NXM?I9fDR*&6m89UYbh@VwAdXl(jsz-cOVu~j_FGhbk)Z*JHcGsk$* zx@v7+wpAY6D@hdMEa&?`qS)98cpNDTI~_XvXx8;fbqiS9?-MAoy8Oln1mv^0_Op@E zs4*S~l=%TqZT3#eb9IDaUS3Hdw}vpb;%(r8I9NyZdRG(U+&V3-pLSm#%gs(&YdoHD z-0QX(%U_I69KP)KK2fM$`Aleiyget%dW+#sUqNSf^((X5IZ}a{*9xH$*ZA|WS}1j) z#=@Ib>0&`IK4y7t`@;6R1)`Us9aIEf3EtCgnhS;N4QHEW7K2%t8%nX<`p{xRzXLlj zL(iQ6P{NavuUOAJ0@te2UACr?XynA{ji_m_Gi$h@gK0+-YA%R=37uWbbDjq;Fu%_X zSU5dITkS3PR+3D@PQFc{Or*uAbZzS^h(;|rIP4^n1g2eELb1DK#bZYPL>mbXy%mXi z+p9?)TNQGV%>0&3rrC+kr6D{u?LbE|mPD7!6QI>dK@>JTzp$nQ&sipvCYskMy(?|z zd`ZB0vZT#Da>BN8%1ocjsbw)Q#<0rXJJ!HGmuLd!k{s<&)7kK%De#59(G2W}AM%xT z^fo!bzWf3?VM7SaglWP6tMR4gtm?%yQA-7>zXH$oJfDU^b5UAYn@+Kjmu8?0TkTh( zvD^zfAb||4@X}(eA(Ga%<{R8nyR*sn8fZ-5+Vz{o>6284xoX+tC_`_wu-EZi(1h0+ zdU-tzHh<#^pe7NPvQc(4Eozir(KASq=uIX-XSHHedy+>Zn(1Z_95W$X?@RT6NO&{D zs*L-Le`9&0tf#bIf94>-ZRl0_@aiPZ^^BY35G1D^HCx z_B%L=m~N)OVD8DyPME)@QJP`GOe&VZcIPfdBbPo0!6=`Tj%|HX10k3wFKg~KXiB-~ z*D`MgS)U6Htg?Si^|Wwrs`7y0B>mv^p58}U#B#CRhz?|uHDsZ%c5|1S?38!98v^#+ zVrCv8WrgTDZcR^_;XOT`^%s5cP)9fpIvDE#Q$^1-Hj;c`olR|ON|Eq~a#W(HZjiCV zRqdX)aPuk4>~us^zNRWj)g8|&NC0rdUsBX}bn%XF)) zpo?Lujq;PvWZb#L>a}2r55}nUy8-F$yISIeRvcsY5@NLTIx-!|ljaL7 zjsQ%&{ z_ue9mgIKRs)5HOO)?dZBUsO9M79aA=VZw8FHQS3jGE(Vj|7o?cI*p`LugKA8EH|ra z^1v6OM4H(pk^b1r++>)kq4z;F{Zi=@2o9umA9ZLQ9E1m_?ac`IPZHiBanz9($pI$V1T7;Um#bGnfE&MBpjwr;Jp_j*N(aB~n#8*=8o9{I#*N z?283VVL`=ILCZxTRwAP*)Q_9SG$Yk~T%d)%NK`TDso=d=5Lz={lXg-u7sX1OsSPuM zg^fsM(Va1_4w?AH!lN%#hWieIxTxX2-iHq#UXs_=-cw$y3=FmK(*aEIos>h#vD8SF zP%;*mIEj;H&t1S0{Yz1xk@GD&t+zj8uw2~zDVZ7jsHdr0PYLz)QbjTCgGWc*Y*yH` zPBMn=(Guxta%L0-)K6WDlc}WhpIf|~JvDJ%QMd@CwKi!v8g23JznX*`JUtjv6IDq+ zaX77VU_UOgo0FWu16?|t+qa#33wCk7xU-^}`-1d2nsqiEE5r{#@^stB$*v{u^0-ax zRQ!2$XAQOB>kM0|1HRr)NUZ)+qmkvuAyIChvh>cC8-6!yYs9~i3W$YkK6`;w^lXxm z@^ke3ppz!UudLV*62taahIqGqVR$ngu&8#pG^#%R#LbcG%a<<=;T=7yhK@@5S8oZq zx0`t?YU|-=&o9(~k%w0Bq+wKVLcdLqXKbE^irllm1n0`%5*Zmu$gY{abWxu>)Jg3X%WVLAiV)>xbp2M`>UoYQ??Dt8D z?n#gFfD1j9sgA395t|vTaitbf#t)7KX52f?+B}b@T)f)*vGM4P;I&);z7q0)%28-cMC@$RVMimsUh^>2PuaG5ug4({2%B=^U`?Mh ze16w0<6>FtYA1`z(6gkM_(ju7#Php55-boQi-SxNeNz=u?L23{>(kqzaB=M;{jhku z(>G*F+dAS>dRTHvn7CL>GH;Q|iUTC_eEd4&1Ou!aSn~B4KmJGO6;|3#%oasO z$8LS(Un?zesfk3|&P?+K7vm6gn5tgy`W`N}>5sn}2C6h{j-+=UZN_6Pe{+OO#oDjO z1=f-Ey^T<>h@ch7!jc}IqXhGwozmKDW9&w~pGlOehBG?MW|JE5iI04(|2acNCr{&K z`Rl zI{Dsjpj!{uZ|h03OVX*HmO(=ZMO8J^Bv^HfgWxvnnUs_PAN5viFqw5mr@E@RwBaa! zYj)w-T_Gih{tfg}Tz?%4nA)s{_I0{=dmH!Lv=eAlMQSRL;GH^GhflxCLjWGRCpB8r z>BWs&XA1m*zVu4HS!s-ud`}4Uv7joADJ5E5^{+=Cb%4I@p^HN%`k&Wv)V=p8$cg5kiLVSFP z_%=PuagSQ^I=hQKA)%T5fH6Ql>YM#YoLPKemFFfen9sJmjlp|EOKm`2qEip}K9Ipe zR@FKrO}&2V10%o>Hx(M{T?M^O0Xbl0%n)VuNo5QO6#WvrXv0kb;P*=tm-Os!7_)Ih z+hedH-sPH>sBBp-;mFEU%OjWos-t$Rf!14ArYN?W5u`$%@HTmddDLLA3`cQ?nze)< zl_HvJ&KhBm-;>7go0uX3h~+C%QKLGTyA@ks9!CpCmce_MC5|3%BjIXh(Yh7y21;=U zf>?E9@U%jHcF^4ljI=XupSk-n^;KzzH8Cwwx8O-e>WFS zSo!Cb9UuJ&PInC^zS&klk($CU>?Gr!g1N!@L;&eAX7>2+GO!!dcxU?0(sv^5S9<_7&@d>V3oh$uLi(t2_iz67LJjx?OSM*}$>e2<`lbzpb zK44d<$J(JpbJBQqa^f0guQqX(Oo$R^_{R(W;}6Zvea)FHRRwr+l(1<5c;^XBHU|t8 z-5TkToQgNdK#uxZUcGWX=MVb3!JVc3@08BA8XJ>cHFcN`&E2;6C89JpkK^Cm>~D|%l%oIb=#A*9@S|gNO*C}S*MI~_N4q#)ylPHzgil5q zqc8K&a32$w0xTS^2=tO2sIDZ}^toOGx>Mbx6D;-K3IFv&- z{qDzCN=II6!up@JRImwEF6H%GzVLRAB(b^^ z6KDuq^ixRoI<=5g8!rqB8*2fb=BLWfcwf~|*)J!CeiXU=giZXgO@X_lcGAtG`r+qZ zOcE-Vp}?Vt*z$WKZDX;g*mKr#04@9NS0cmK+4EF;JuzgLiV~^cC!2Ri#s+3B3uQbH)NiB$J0n#5S4ivBfA{-B7 zNDyfmM?diHEnKl7?{4MvBZ6t%=9|_ic{P$Mib6i7HPK4WlSS`K2gyFB%SlV!WflDc zLy6lJnfJi=P$LADLeZymQF+Z!m*52kd zb-BC~pHDdaYqjGVjB;CQQ(6_NH!_?qYBZ-%$0^`ahw>roGjwHgFK_49gBj-uYtiEP zP^9I=*A#k=KvgA&W|7|_eFE+&$(>mqv)znfrv}bYlbR}9f%P$@6Z$dKQ4>JB1}5*q z_j?eBOg)V&U?<0wn4i#4VbW10z4wVpTjH(FRLxisiC`<96rWhQpJ1e(X3uwS4p#hK zCadSmiZRlqtwjCun(AUtiVoMu^9~>310M<-G)Lw^`iO-;HT12=%T!ZT@l_Y$+>7rt zn{D#-vaq#QKxVjtmh;?`6gPdtRQuT4?efNDhK~riAFgW+yK`M!Jc$*_N)0^V8Z%FC zfX%x8rU64Z0BM^iQFn9m%L_!}kB{^63wNmyMs0O&dmGiu$(71^iok>rb}d7PP(vXY zN#&c$QvY)K7(bPcR@IS?K^x-uGyShBpg8VoSN(l9RUadTG`y`}qK@wpYhRXvFvr@8 z42rKUB)^lNqBd3dRI;F2Z~y7*1zCn-9V$P%jvB$ks_z?rRlcvh3!8e%vR1i-OC#c5 zUcV6|uEk9BCn4drZb}~j3U;zqZjz~#lS*oEilo^BI-1+eRM5C*W16QwR`qAYUHds8 z?NMh)0H^)ZtkXVt1^6FDdOF;fi>e{f(lm25`TRY;J^b?-k8i!L610@c%*2Jpf4f-V{Y7R!tn)`ytG*2C z=Cve`db%Ph!QU-%h z_O}qw`D{(#>f;bkfwq^KHj`Yx?2RQCX;{rI)l6=4we<&rf6q4sa^p>l<`Y++ipEnq z!lSjM{qa5ECIoRfp8MKxbv@F0F_c921Xadfxr6ibA!;w-&Y^{F zc(anOPWD+*9>4h9wnWzRv8Z0_lZOI23)yZJHvZRs5~6u9PA0^L7%ux>E=YI2dpZ6V zrr__0llmEF-R?WX8hJsmZLLud7oyY3MQ7dLIeeze>SxoCz4u`&=3NI%3v&Q5{Kr=* zIZ4+>dukfo!UM+3JK`nJyu*v@rW@^AGVXo@!};F)UoaMTAb%@!Uvnwuz?2xim6p1p3&3`HU=qvCY9P{$^E>F4E&;;0I&+O-3q?ZpZ|s{Jn1WPyB*AMO*FoT zs08@P{ZhMNgy$u~vN?XXY4;?=`b%Vy&dz$IABZiPFbHaO0uzw0ZcKIA$JD{!y?aM< z%2)nVVNVusBD^w@b|0D>$DDeikF0W!!R;sH76-HCF zU2wq+4Q8i3HEve&qL=J25v>QMCe_ifJ=k~+f|$-SRWrb1onJcOO9<7(=KweA%b-hjka&xtNNdr3JfP$5a~2 zHBdtLG81Zaj=y)7wB4n;ib4H6y?F7sGe~Yg`psZVK6|Af3b~LY-W*duc*waFLs9ZL=P7tU!hNylgse#R?UnUdS7%5KE2pi<0p>GD`@|j3RB8S z+T)P_EPzv(8hDB}Ssy_lii`Vp_x89jeYx<;j@QW+#29tEN9v?HF3a?CadA--47S9e zg5gqvwf?6;h3PnRJ@k{_H+BaOMJH4B$Axw%cE`GwZc^JJ`?slLk}YcthA? zx*3tIie`!)ue9Cr=Wph|`091AHms?sx#S&EG_$0YNXD4c`4|VEuh6G-mb_T?5RL;x zfGc*X?Fo=;sVUvNi=DKUY6)B{xRMA0-nHlL7w6vf@Y5|~8Y}xl16=Jy`PB07D8v7rxBh#LIN9x*^1xQnIR?MSV zbh<7$A94U_4@%SO@pQc^mA*M^O;GCB6^tXlj;nydnr?Z+`3f9{dp~(>-z;)`ss&vy zV+k}4@Kh3&tHyFAt-NQ%sZ;uf#|7j@B`AP%K&tXV1Dgh5yI-wIip0#|(gBe|oC(2y zw#3lA~T3y?7N>; z-vwI$RjebC6kY9YX!evz{_i$o1-!s9_$;mML-x{T`safRTx@_L6}R4s*bEJOX75Rw zbO`=ejK?&^%L*GwpH!#uE-s9ba6plnyCj0>#y@*dU3U`8td4guPN&VxI| zPT4QHiZJouA2T-Z$9d#(P*SYh{*CMANBbt3mK44>l6ym(&T;%0;hT%%5HxTb)Qw_V zNE^$Q{mc3ibn=~~tnjA@O>ElSHxm>(##g6B+vs3yb6q9{T1!?DYb#Jc|* zMLZ=HSJF5H%v$CXx>Vg~7i@G#UFsm%`IdN!1%R)s#0ph-xmVaLLUv04ePLcu7TX{? zHcnUTEt4I>ZD^67tyEbWC#G$3`7(r}Xtxo&-JvC8*SjRCV%+lq#|la~9eKYnG{-8P zt>f~H;e#ExyN$Au>(WV5dip2af?*P;+I>lTWbqj4msvZ^aB)4!ScS;b%>1q|GkM># zt^^zdZ%=~XR%}$_R75Rtshozv!@mNQcinA2?Mil1aUwN}>df+)z2eKmmde;JPK7zV zzQa1#$H(3UqK&Kri50;DquQP`Wz7mnKM7deDAcF4A$^mivdX1fI=?Mvr7|3B9Ed4V zcI%2`G|5ukuh)C8;box`&KHuWvA5F}roy2+{8u3Af?Rhalew3=qK6c^o}CXU)T5kjgs-0QtFwBd0K|jQd_S)=amlcFg}ju zSgc{A(SN-bCK&N8HHHu0DYlrZsR&}X&zH2gVT4IA%5P%Brumzz7MMqsp6{ergj~(5HbUyTC#l8XYwiBu2yhEg85i9dEz6isRUkY5R;cZ!8<%0Oo}2!Sc=3A`udRb|@-L6dzlbKsRh1-d?T#Z$G_)>J zX%0)MvWLV!;o+1M0`n(PF$ZCpgqZM8v6C%Ezy;veeSF#qr_?PkXaLzi{mHv`gPTI%@N%snZo+ATo&gBJTt769OTniX z)>$+2tq>@5;qmyWea1;QpAP#>2PhA#4x=l$D1lxzEznh&MLnCfg%=C=g=Jl6l-3}XBcyQ~@fni@zX z#AMl*BH`TC|9qk^Rm%E%sDN|i65q;dyibJM{SR;)40gb@z&U?3;6;Qiqd=I(n}D!< z(NC)Q!!6#%?MQ#b;jkwK)W%&rT`w$CQxbeQmr;i?*sr%#?8-5;gv-?4x%P+$=+7bC zmI;v#l>aX2Z!!7`5x$vP;7|R)-$e&})%O0Lw%W38i!Z*3p|}JsjQdya2^LN@7is7h zy{0VerD9dn?(Ml2hqJX+dK@kl@ni*<=pT1;kUHphx;M!rB?U3EhC2^_596$VG^m=x<0)YHZu(Fzp%2nbQy0G#$oo0-za@Jg%!ikn!xc? zayYx}6aINs=3_W>{AI+L=2m)UEoQ9$?_KeY%rt~%E;_bc4;3*Io1~m|h@UtLn&w9| z%{Kd6$Le1<8w$QbHjQcDd+?;!{7kZCRP*H(@C{|Y%l!Fc?wy0bw?zxBS1%5|$g*m=qcg>p@N_4R$By_oP%{yryP zoR6l&JmYC?Br&SuG2JhM_+QO4S~4vz(Jn~99L&1&_YA;=KW1R=kGG|FBPCUdBD`Y4 zFEP$!8rR9~Cl+8zAx^|@PG@oOt>Rn`#FfVMpyzc#opo58nV67F;`Qxzh z&$-GNCflbDYAGSbhC_`8uK!p>Qw>K-u>WAUjv7Ci*;GB)J?vkr_|S=S4^^$rX$pbd zS}M*(ht{kv>28dES1Ldo5B=n@{y`9y$j5;|S`mE*BK@U+@GsDT{R@ zs~wh^$69{mkN@2MKUa3?seI_aFo)sJRYwbYDV>jRJ=bBnEw}R3pF{*Yel)&r%LMH` zq^&60(}Q(iJ>>gwlJ5-deKJ9DV`8h?C1XZ(t)zy>>}NT}fs5UA5Df@{4-0#nkPBR(h``(J5o!Kgxg zg!0z(4gGf0WWT`27LWB49nYK!9i0ZyB&UBvZ*O0tYFh+lX7Ib8&5!SmPeV}PWHBo* zlt!|W*!t3|H2`U;A=mN1tmpje<#wBb^}4l7v4k*DN#iA2Ubn%sx=3vPa|n7?inwfE zBmdlks6=S((|2@hq%z{BET?`=7pi(?Ug37nBj{1;kB zCq2Y$Q?0Hx$}FPMt6wlZZR_kr`xx6-vHC&P_Y6_v7-}TPdi#kOYHGGOzrpJE>a38z zyln%2Lun}NNVsqF!f?I#<}sP%~DMCv1Y#v)td@NBPFVc28<-C;N8njX3_wwxpY89zVSRmdM!%j)cJ zOET*4IvK`3n>Ys$Zlne=lwfFPV=HZUOS*wmLl{Pupa(`_t&{7IO2eoA%4fuSgx=$U z=3nZLAMeBQ_h5&}fXm3S+;dyO9i}BffNReJLd!W{; zzyQ6FqwJHTmA1CgnMyj@^&U1jqqciThm2lCP<*M&tCAJMGBbOJ+>o8rj%ftOQ zhmKu5XNNnei*YO4(G^t^-9|6n(F8${%dZ{I^fp{wdYsL)pJC!i>rsAr z`B0DGk*At?mRWBSvaHr?N)1qT*JJ-4EuT4FvRsBw_+hf^XM5l`0HW}uP0V^O?aN_X z2#;db^*?MClTuL?Ji-Cy%29acqTi`hAd1SCtDC+?Yn^v9*9r9FQ6AEXD#FP++9QsB z6e7?OK@8j2*U{&lhF3NuV!R#G&Jyt#hf{4fb*q#ftC?D!O-oo zVS5vY6E0gFF!z!v;EN(oqfEU-N{`ts4V=&cEBmoV;aBqG7!Ge=rUuB9XH)wA>$cNv=na=-QNH9M)MM0Dfur4Z{7(BEWWU&0B9h z$V=p6;m-PWJ-oL^aDRK|6Z0$4z4wD4sc)G0+UJSOH)PTEsYW?#+LNj3_P-7oLm9@e zHUedEN(O@R;1TR<%yEzK4U%-9NtxzPzZoI(Yz3}B`HZoJ%n zS+Lt{xm1f8l%z*ptg6C-PjK>XljPv!Xj4do`;L7B$-85L&F}}|Pr5L)SP9zFiMz-v zue0%QEQv;NYotZ-gjn+H-Qei0Xz6sU@YC#6BlJXNr9X*KUy|FQpvTI+*nkDz_v9M{ z9T{>IagvQr)WR6z0JQPK`EZHmR5>`Tpn@dhXak4xcK4J|%i`{Y9aa*X$!852C3I`UyS%uF?OnYPdGr5m_JT2Gwbd*MzWMNB^q z$1uEJnsl3`L+?=Nx81{I(00=w&q`Hgx7JHx*XjR|n}Yk*ouO`XC|9i*j%mP|U&o1$ z4ZQIN-WYkEJ0`qitF545+wluipeDeyFyzv+$29nVcf;VOj*o(dd>TNUn; zKq~1@*ZncfUgL(EK zU7WF6iI(pp>E27*#Y8L(rKo~$fo<>IKHH*-a~&a{Yi-BAQ0dQJ;}WPz34O;bGxFv$ zfuhLXKH1?~C(kMRNNoFq_XtBgocT^x<`F@kc>q_p^dN?WPx}~|MsihqWaDw%e714E z*K;6ocUWW0UPXH5tLTpUm+ME_ej@Sjb;vk4{AG0{z(ngG#SHXT>yHdt&;^f`i(=4f zWB5G(Na3FZCnwuyBwz$A`sj$UB`fLE*i*fhNE=6~bT6mABC>dqX;4%C4fk-7OQzTW zhmVH*78#x5s@wBZl{4EC*NJEcp4HLRq<7s0(5ds~mMjHOKe6-B^}aB;fxd3br&GkM zHIx*nd9czywZ+cKSuS#TXNhHQI8Z`Z9oN#murldlsr`XxGO^e$s~D348NHSE#~#GV zD@%5o&lx~3kFOSolY2bJ!jf zO*Fulis2gx-)_x=a zuWh*~j)mE^$c)JN#kbzxO7E-ntMm1cE4yH)&BFvn?!~j3XW<&*Z@1p6KP=ZWfOX@Al%K{d zT?($GyrU?O`sNr7$FFgPlw7ROpiYcJmv9IAI1ygbzn=xhl*-BrTv*9VRkC{$DeC0F ze0| zEU=5je@^E2%-@-2o`uzGt~&eT_HrpcZ*X3c!_LQtG{?(+;=yBrI|m&J-_yQ?Af6v> zUbc`n^0*@8-cD3iyYuC~c3@Aeb^NyS+Vadryha8Ot^G)BpgdP?Py_Po;i6)t8~Y4@ zH66M9Xew`3HuxuwHO4wwpo^<^^8pCkcKQTav^- z&SUw;u&Qj19B?~1)Z8w%2zE)f`Y9LtpM1u5lH5)~>PwnT6SKe;as7WiVpc{j!a*Rr>L3Pt+&Cci^*Z~-aaN)$Xm z2RtDf23;^4IzZ?g_WOa3D=HJN-$S{?8k?0+DzOr^_yv(T;2Pb02@5}C7%}zr3Z-co zpRz3RE7GA>Fe?SzFGJ~a*TO$K~oR%JKa&aEObBhO#j{3 zS-?w-Rz<}=ZwgGt`(<1H-c3d-+)2k4N{8^$AIERGCrMBL0TDkb2CFt%=oe1%m$Kwy zL_fX#zw5^wWUAt$r7FAE!tAn%FY>!yW3nFZ*5p^LMETDa`#`B2e+<%B3V3NVZoE`%F(!6RPvj3lEX0eqFOF2E?zD1#c*0QT<a-Gt+;3rCwjrYb57S{!xe@fE8JJs{y#A{# zU{>L)M}}WbY$3Om;~^0UDx9J}_5XJd{uzVoMH6BL9Vo?SF4dGJeTik)|Hnco$ZKPFFANNce}Sz-i(``i`u0$aAQnXAm)cfpW6w2*OH>uojZ@1848!^vkwVa_X5s zHtj0LH2?TvK`XzV`4)eT9c!$%$qOx|DON4UobhtLuM(M%fWYUA^YP0T!DJY|L%~~( z%6(biHgQ>gU8p5x-&5d{-w#99j^j5O9=h3UfE)kfKZ|kIY^43A(}h+#M5E?+$p8Pf zI6X#koCDwcRk0KqfNQTzyVzFKM^*U-n#Unlmr z9+f`mBc%q8tx_X?GtD0_xzxllzZ#?xvs`Qu7p2Glhl(&_exK$OyqS63Vbmnss{B{V z_TSGdSHuQbP4WS|j0bsb#k77g*}pfknw7NE);iS>hzotav((4(hq;-@;mm1TbE%KU z_<%a=F8*T{3u)pxDHU-Tr`G$Wr=JCP26TEC8fg!L?l$YyJP5XgOKi_q+{rYA-;Fkb=O@#zu$id&b;rv_uO;O zJ?D9zGneOBW4FjURO4ig9Iq+?{IX|&9+qLPI|duetC#{TtLK`vujEy_rC$8Utfc9` z)55W;((78Jdwp3d#`}UFo@|ibi;DS}@K zfpjJ6&>zC1nDJ~21(Za7&G(>Zq*H{NsH;To;W+0lnCH<>V%^N5uA4rZQ{tJgCk=Q1 zHd3^w!pg{490WtOs^n`JmKcFzW7(QNl?NQukcVpANrk8}dzf{!-p{89y)8Gq+fUn? zgaxXud5zE9#DajA)e(+OqICL?V@|q-$*JtusYVhipG1o}gc0Ua=srq+=?Y$H!|7gO zxNuU(`7YhLM{DTqQIiCiMx{{bAOTezmd+LLxMKo_n}oVVVfu%PwV1!w9l;J>K)ZmN z+wwzTIBo1EAD}idRD7xfS{RJ%8jK5J0S&ZJ%P>>L#1E^lEEPlPF-Jn>6{+-tMgM&) zs#4$Juqzqi>?DU(bv*9AmenA1O(|PS3cKCpjpF@pl9m`_KKqi2{WgU*)vIVuEWyX9E9NMQ@q8DwBF0;!6;ju~Z~;(cO?oeTZ^P^NvWiir8`I z4C&a2Virg0P?n2OEuwe5e>4mC(2bVsO~&X6kQwe_>HlZ$W72b zKNKa}epmXV{?5|fkSI#*9t_c3G1Ie~?y$NEFzGYsC*mHc55<{y;XwYjhTBw_zqVi55 z0Dnel{n_>mKzHnD@&SG=|FCBFan1J-AJzxlBS-*9khOiXEe&#dW%0Hv*&bXBgY-;H zCE&U}##*(jFcsm$^ZGKbq{n^D>rd~7S_s5&`rlg}+G;~&VIr6%vpW7A=l~O7^Kled$uCF9OIY2S>}sa*ey6#iYt`+>nC>Lg<<#<+dUOA08rvrN7z_R+w;k81VbIRr|)_+ zAGsFwB6#c8{U&&N=6D<|I>O|hg1T}v7bB<`g^%5NiDKg3hIlhVgl*E>7c?4h5%TxN znnGqHuQHf~ly=b+>hBmcsz&V? zPej6tiJt-RbR=uQ&h!%hf9y5s2$Na2{FCHFT+MB6b?fH_ch^Z>3!*L1zY(qQpj$(GGMPf`U|cL?PL@or&q3Sp2j~ieIAd> z-_Xl{>!R)5<f`vZfm~@c}1{UBTuE9;W0jTLePejLHTo52~-Vbzi%sGFM?3{i8ef=HV+-dfIzL;*}2Lt zd>4q7YNP(5yB>}#zJvO78VBkm@uwo5q-DQkZAf9P6qBOix`im z;l5gh#k~HV?{0DAX$}_{jLwa(Wr$!ZVba%#B`RV=9EeQ@bHW`QJi8B8s}dYGCR=y` zqRZD83czJX06SE6w6i!mUL`p8n50p+&c#7nw&mq79s~r@HagzE+t?*!aP@MFXlb`| zcVvGizIW^mRO)Bn#S||HS9_j3{Lg2VUB~o&Jd~@-hUPG9>lu}GMso~94HppT`UwEC z-FPXe>umMV0zluiFJuD3xqV4~`N{@_Cx5jCsCQ^2);G@HS_8rgGq_)yySTl zziY5`JntZ{{gR$LYQJc@l?4+8*%yi6Z#R5|D91oEla)unYzDO1dO^^wE)= zw;AN<$hmwrm@lT-Ji%?7f$B}R3Pa~*=%(P|cM%jwMO%znR)C7j*f(0~7=!uwXKwL+ z!4v;XAd4Ecg2r=|YaQ!pEi5A+rO?I8m>?m*vtNa`;5qH|eyebrikAcWijVx*tp|b; ziZy15|B6G~pm*jCZ!qXeH`4OpYl02r9l?;K*Iwx9{-$oHYYgV@;z*aJ;%cV4Mp$YZ zEvWlTfGGro(eQ8K`NnpKX2CW0bKpIlDRGH|zJ+~K3YWYco+Q=Qj!WOvH0w37r&xY4 zVPSC*_ciZQ((Bdyib?iPk^XCOo*_8VWr`rxV+Y7}1TPV#O`n>ne=keAzM%Ur` zHwO@89ZVQ~SewogbOYP_Og1U$HG*e{AJa&v0zc=i+%^4N8KjK7vN4-;xU*c#dtR+| zyoX#Y8MepWDjc3SPuXA*(c9_tG^OBk{D$~kZZ_BsLztxIm5Gb|Sjkc)Z#CwXv(t%Z z5E#oItu1ZH%>g85}{i*VE^tcHMQJlw$vr8D|d(6(Rm>{ zbFCSNEz1#6?~XzM3;lkHZO}1e#O8EwcoZlBoK7ANGvv@&OakqLlcOAuwn5ju7l2VA zn^nkd9Pg09e=uG$SYFscW>vR$gN$d}GsqE3Rofw*|77!ZkYl|Tf3L&v=~r2Z&eR!$ zoE@-7grY3Jsndi*=K0aM=lSW%81vU3ZmH)v^rRs=$goax75~SD7z2Whtwe#Hp7a*N z9AgZKcVAb(CVIfBJnixMYrej@ewV<9&5Q|i=+XX2R@&a63ScRJU8-&KAwUfG(2Tmt zOxuW-%f2v7X-myWA%^7KFuVsn&8xQ7b}zh_YjBX7Twpwur@r6GJ!Ld-^1X?#2CMk1 zfl@^>Q@rbXRmOLfg784r>K@pHy$`nU)7$ck)ksf3uvJ zW<^OBp_CJH=PSB`imdeV;Ly>iMu6j6Wlb`rH;SKqMZUq^!BoBZ%&uFod(!!UB9`4* za^4Y2)rC4eg4y&?=W6JVb&N|I0vPC1OxRK_Z^LmBSjF~o+bv%Rjj*F!`7sHG1rd42 zp28H;x^>oktQ>;VCRyz0hfB5)k<2EkQl~QZp-emWvi(UH--bz>&#N*^x{2CKv`sunSkLyokCUX zWjc#SX)6^Kh6T`U;V@%UPLJu@0qEXNnB&1ln4(PnBe&fp`Ln=XuHlmwYemOTLEMvi z=Z9aX&Fa22r=(Y11tDhWAoKs#kstu3d1E&7FsgcE?G zUi$#Zea<@zs8f)%50j2&|7<94x#(k?Z`tGnbzDx599+@SwQ0g66L7;n*zkpD7Co;5 z^zC8Q{VkG)L*G-R?r^nh|DdF#qylk6u6%S z;~ul%h=khK7rJ)wG?ajn+wdv#VvTtek|OMMuiNu1(Xwt27CxME+ij&m5=a$jiM35Q zcCb_+z%jF4v5b!k<=y!<<+AZPFmePeumNhlrqE$!piL#8u^g?IQc$&U&iLB1n&kX+ zgMS~?fK|#LIIiBTIa-KiiZ6TdE$S=NqT~l;mVD}JuekV;r}*hSo2TRHI07jCSON*5 z^2|h!p^ZX|p1K|`JhJkdGV|d{Tu-fOKQ?}fhOq`2!_(o0v(MeqcE`)c_sCQAEkO3i zMsPNh&H^Zx0Pq9|2nfsn#=G_597=~DAzC;_3-ZGA#M^d^ESf7yS zy6daRoz)%C9=?kmWbh&2^;FEek^T(2_uwo!NyzP%fLo>M&Gwnkfop(*x#VZX-$Wu^i`<L*`D{_Xi13vs;ZzS&j4%3S^}_yZNM0ceB>4KHsS4Ya+8>?i+>PVT3RW$Hyda zb@vFMR%hw50MOuy7EC4jbj{jxujwvDrk=dKyk-qB6!-mz6-v*Om-P}*X_(W+&mgt& z2^UFb>3+Psb&-5cX;T`X_>^p}@e+P%8>{9jS^!1(I@ASGYB~#fU_BD4>#>Zi17@%f z%Djie$0ALcGc(GL1M=d^R{eo@gC90qtHKw6C)q|tGON#AqVLa?t+*}XSoSIvlq}R2 zL3s?(Q$_qefUR2qm50B(Oro{Yv6$$&PPbaWHxzqX7YDP9;Rkzref+bPT3CV@aeN2HBy-LK=#c@{qS=#0&WGaCL)d>XJj{?NdtKZK(?liaI*m_V+A;*S-;;X_ zgmf}_Fg6u8{-5MgAv13|ph^b77ZMy6yZ9GQXZLzU@Ey)?q8>U}li7sO){Hlv-Zdyj z;p$m-Tqd*l-tZ1B)=|w1p8ZP9Hz-r@AV1S5fF{VZxgk{UzUtk z>OZr(nva8^=M&q>9AB=do^trQ<`MkXvYgC>u4byX&{ia7z5W!2(%UvTpBJ3-;Lux$ zxM%-0E548DVr;rpT2jspF0co%JD(&gu4J(AvfyVn2+m#UIi;t#+^4irw1jV{M%gt zNF0Kx%w$l_`lAKSNZjo(>+`jWDMxjdbFNONROUpPcwR)F`qXGix4BjEdmn zu)@_qa@gc6!h;g~3jQMl-}fsM>jM7Oe*Tm$BL^bh%+OiZhLi3yd(vWizycaga9vk- zK90$oa@%34X*3*+e>H=}s&+)@`3@qGs~UOF;37kk9bmT>gG=xbw7($FB9`Q@2wINM zJGA4tlgVVT6qJUvc{dm`XH_WcBQ@tNcAmTrR`#qcp5&G10Z_ z+;111Z`Ag?+P2%1wnxNCiuABM`YwWhH1&lx|C1Q{kiCdenpWudA&rs7V?8G798z%M zo!A3Ydc}uYMTn1nG2`F;TC*nWoF??o?R3eB|4{J~Pk+w(E^Dfg$KnHVykM*p4jrF( zu_ftbN}lgw3fnG7?9@)BT_rRWSbDipcc7%wA8D(tI#utkaJi;bSHvj6?mYiZVyyAX zK=xVSDR4}SV>o_Kpxxng>N}6snMiF3boVD+t;Zb4-sl zYMgT8km)n^s6TYjKrKcj6EfwFUO!W+SCB_QvQV<0d47Wjn_!%H5CfB_sK|RffkVAW zbpY=4h?!$ISTFdtA}7`Zk~GU_?puGfoq}a70(x7h*_rcKTc2Ngj39t=LP#P83S`ur zE{5>U@hL0^*{Hg)4pamFG++j*$;Gvf^FVbnseHg7BBRExE z0IrPIh4sf8ZXa3I?4r9jZ%z9NEBuuhNb-R6nxvWfKaqB9@Wnm5z4I2uG6b}{G#i-A z(o)aCxqhq{1YvHlvx6DbEO9$jd@yf`_jYAZ2idTFJ}f)y zxGd8|2K9}WkK0O7?aEP02w#N~B;(H z@dDK@Nb9L;U7*>*sa-L@sVEobtr%;&R(8JGLP^s_F-2&axEIx zaXoI7zW>doJq&+bi{1G68)@f8ioW7kay$Ez>9ex+jqBp%%dhdVv9&#p<{iN|spjhx zUd`M5njh?&1ui54!8dNHoU$n zCFPD&!`Sg~N2IEOD0DHKG)vkzU+AL%p+C9p8=Qv}3m*07iFK$8HaU{0`)9P#kJ)^D zm6`QKJ_WyhuI{j=4KJ*7n${%7*=POCy6=a1XTR7{8_J|6CqzUnA9kltU!#6Uumv~z z8T-duMt%m{aT5>PI_nk_P&E>*?si?S&Fd$sS0T%&j#%q@X`SW^(V9PMGiD6e9GYU- z58@wab*v#RN(9_d$GT*D={pkunWSw}xbGeNy~84YEensSFX|AAbK`LG{@#NysoAOU zrJ_2iW(jo4W<^nmq+eL?&ZtL3HBR5BTZyF!KKK2pT;ouv3eWlt9^+%(No7@MS@L0G zV|95p^1=6vda0H(Y%8zVRQB^BOp?X9w_!kbPJLCDpJCwjou_%RdGQDpXDJ_?BRz_} zYI%GL+>i3nybd{13N_Y!>eW7$In~lmf`zr)IfBL@CXeojRa`aNJsQ&SJjJE@Brl7P zt1qv;C0Zv>6eb@^S5Lo`EYt*IuJ5k}S6|n&N|#Pch#M`6t;N)F8SM7 zZddTg=eot^5)m1H)RV5*eYs^^mDDj^z^(W`-o7uuBOJs#-B(08$(KPmv+W(*g1)YO@wgAh_1ZjkU3~+@u5l?xGS3O zw4p-`8w08sC$3N0a+SkuH2C#>`if>X7!J}8;(gME<#q1yWY=vO0%?{V;fM2OSbn*t zPqo~5HXAQJ$ImSpdW)^b*qXgYl-4iTMD6S-*9(LdH+C*SL#qC&#dssIo=n)Rf0#*U z64XmwaB>W|@S5!*DP?+N=hcJHdckD)35TjREi6-#{I8S_*jQD+$tq@ycb@oby$&AmxIu?rE{^uj-;ujtelrz)|CH&m^%id8tk z?;?nlN~6C`eQi~dpJyQW88fL_@p4A&u^W}-cgzzMGt8T)FT%&Fe{<7TgmK;R{Sl4(U(oV>+3OOb}+BI_U{?G06bdsgIQO-J1=m=ab_eJVnW3;WzbD8!%Q52w{5mrgK1uc`&*q;y!S2d>K)EmfJ zJh_&utg1aoBTspqIe0?gcrpldc&R5?;lQxYjlJ5(nTAhmT}DGC_!{N=3X(O^`V8~EHKo= zp=Ko~_oVXF`b)@x?yJETG2P0&`2+Dtz{(5HOaK+X2MAmowHE2A9QB6@ZZlYXMFU zfP%*;7k|sq{fGkwEjwFNv?4oo&AqJ{ zV7N8&+3K`pf@q=Q*feLaR_9WCF5%V0nETUwR9SNH$KBcD;`i?{WU*~xQbrCFp3@(> z#|KWgh##s)BzU+)>6+G}oE~QBmfUA3%J*23`=Ct>m07PwvU=*)d2qh|kin%YFZaCB z>*87cS?Yd{38_t|LWV(OW}NL?-3)_f;f?no<8X8 z&Y;S!Q?P-TGjONI);L!DOFu8ZTAn(WBx$V~Ufl03mvkdPue(=Yk5*ONyT|*Dsvq3j zSB$<@x6_0Pbj}~%TC&4h<_ii|^o}F5u4O1UO%ym^BoLfh@qwXGae}=2y{UrR-c)sh zxg~gvA3Aszt;PJNHMoZmarSSNm*P8`B64$b@YaC+iU15Kej}aYg6VweaLtk zla<1TAFbsx_gcAZ%ar7*0$Y^S#Kb4<}9}2G5d{mD)YlWhhZJ_!Ya0wCX%Q^ZHnWR`edB- zM<+cZ8TZ~(%(}E+by{2QP4$-rTBwF;S!nNPdSwBdXZ*Sx$xpVM*v&3o>D;&%0Kb|!};Z>{-O&9-u{t` z7oE?RllPn7ObamNu@VhWRR(I?@9ovc9PV*1^$k|rBaJl9=ZzCzB5O>sC*XLNsVTP^ zKvF9h`rxGb>Pd*t4(UP~&SA`ry2#ynse?Mz{^QqsNOM6_NIJ&R3hsxS&w3h~j;}xN zrny|}{R;I%(qGEjXU36Nya~NHuL3CQ}QD?y8JAiju zldww8xESO|Lt)?3BoOu9LJ(O{6U^r~^_<^}V7M10S1z1Hzjbu@m6x_mp2yxNg(}TH zIEPoJZgEK6*pehGR%7{5PQpsw;1{|?x8_AgY@;*m{ce`OidIn$4pZjJ0;p2A%bkd* zC6v}OqxSd#;o|k<1{h3e>aM7wD2H^`lGN}+Zdw8QV}b!t z-~X9W>ccxi?Wy2Y&WVoV@N2+aNHi{dm{oK8@>g(~w=y~^bsPbi9r|?r`qX-ix(m!AdK@>?S((om+8 z%MFnTI*s^8`n9M>S8FV34=1_i^%J)lxj21znKYV@XnXgpKIAAB#tW7?$AksQ5c>Z#c|5k|4JP@$dl zl>!|uVV@ZbK0o`h+(&#Yn&tQ8KC<3K{@^geeITC;{H5v1+uo_CsBEhBA>81xM-tlJ z;(#q8)x|wxRX2abwS`9g-Ntgt*49U@B+&rBL{i>?D`sIRF~y)ML4&95r_A=wSW3aX z?KI=|Ro0WvC`&;WCwLRkx1QU`Aqh6}axwaZDcJFN(`{xAkcQkU%sc6Ao1i3zk9Y%+ zn}42%Oy0}=nW%fK0d2j+Z{!Q3Pux!Dzn((*%_FXumlg=f5`O}b^B%0@5)6^}p}`zI zI(ea^HBwi&*rMoEB7y67!`Y*h&L6o6gFvCDQ@P;>4>U<@Q`X$ZI*ob`&d<7?Y6Mq% zMAn{ilZDqCn2H4+79R+p4t3bkGU50G^zubR0UIv~8|v3}myO={kwkme=z(pPW{Fgz;vSN;diUcI6-}#Knvs0y5|lR1 zQpo~KWWPB&DI-_rbAR4bpX!~wkeKI+v*MZRWC^WDDz#1O;_E#{uw14M2*ICw=g$q) z4sB*tBYa!AFjS&&8CmWvm9KJ!fpiL6UePz`t%O~V*&Z^S&M?@wvv zvKGzS1eh$7)VRJ1KH$XrI)JJl`4yswds|Mmxp&_V9_* z8MJErRe@(NR7j#%fr*8+z0*q?FWwlB7^m7|O5Jp-Uh zuqS?|pya7FG*%(7&pywch9CucB?CTSX|Vby*i%^kb#Pi_IKSetEc%-PGVfkKNL8wJ3bTgZ8y zY#D2yjA(v1l0VnkRwF5IpgRpHufil3a&JX}@yXSBO|lVT^byO28ZPx@=85>@mG5-% zX}*{8g1H}4!Cd#5WBHsCSk#L@0RaRSf&jITw9gww!6k=E(bV-Be3E%&7teV4XG>ivu2{gmZez?}NS zsn6UBg&(xLm+mNO@BA0$h~8Z)I8UUahxl@3se^vw9D9q=$Sr)pi>%D9YYAG(dU*4f z>Hh1YKphBe*Meu{fgVdY5+BF>4nGc({tmyi58oTs%-ouTn_AGlnf(tM1Qh$n2}cZC zWXgAf#~$2cJdoiR{~f;{%_z6(C(?PiBkM9^u!5DM{mX3qFyv5I2qCRZX<`NeyseI- z`}#jt_LrdxeZfB22*gq$vT6#=ddDxq`Fl?lTS@Aoi(+GAjTo$|r|AFwGIo&nCvG^tj{#zK{hM9m@Bg6+Vwiq|g{7-Y zJtc{6X6Id(=ba!Dd}KS`)}{yCFG97tbYG1Gf`WC zm-M1ce4b&h3X?@@l5{dMdS^C_fk}&m`0pqE^Kl0nIfc@JpnKTHvUp^BdD($_(igI- zmHtaJK<}1TB5%2S-OWeP(d%&eu9z6S*OFH>9CEDw2NEh23z7_vrD6O)bCHbZWh3^H+De1uJG|X5Pwf17Am)F9b5!0C0|z zuUgn*`7P>!x@#OZ?;2dh#XA%Daf7`gqcw??>7NrIboT~k)eD@eKU#W=i&vlW=4IFP zvw-w)oG#admvptmIglSe1tpGh_hzJMKWk1I8+)BGZtZbIq&;xP|rGZvl{L!sBr3tKT>m>hT5d@O$@d=iY>u zE>=%jPSulSD4X`g(#O75G^L}0u(V2p*4jS?m@S9iUbpK9DAaCl$0sxez8!X3|LQAMG#~p zfgkI)m|gqTMCm1K2~%NhUjhYJdqxc<_h2}j%t>XT`a!;lyli+^@#Pa!)slQJ@cD(C=|N2t>TU1od&!VRbi#m{uJx#C_E0nbSLy7e zdMI`*ti&gyvjp^(V!TRo6VG-C1BoHwe+8HPCi8X$;5t^O#|~#S`@O_Bs=Dy4Y*^b^ zb$>(G)IZqOww(`m zZi1>Mkn;}Ym}h>t*>Jp)TW-IsaDY6#pb0=eTI{_#jv)7mnw9%*wku9G3RKsPt!uaZ zq@y18y3{O!g2&4Do=On!h}{c~l2dC*472A}?%k0TBEK50Kz&bfrw)VP*Pa>sJ`Iy_ z662^H^DifI5-Sw+Z<@>4>*`qdlGgS~(;Nm8C2KcHU#>#1pYj_0R8KP#jI@fc> z`wAueHmgY=z_S2#fc{mFgGw~H9z+LYSE$_dTvvEIl^f6eM#TTY|U|L?kzPo1q~9PutG3n1J-)qFkm5c3X&M9Lx5S_vAi7mJREa z0H^fXI+a|2U;Bb2NdE4Zfa0clV!itsT7|!(pl)9S)d$jas2@@+*j*;&G!sTc(Z{g~>j>UR62RFsqqGHbqfR#?3X8*12W z#*q&21%)Ul92d+9^%k#A3+{UoO)*K&9P7+EC!E}Fm08fM@~)jcUsZDjHSaZ25-)^X zNv8#FMT~nlX2rG#gQoI4v*S|g zHbA2sQo?{>fDhG|-9tjz5zu+Yimv)AnFZWC+XJ^DM?&J6RUYhKgkHQoB72CAZUDt%=}3hlAi-@6+C zo<_~}3CtWuz0o*MfUkECDf*N`8x@P>2m(X2;9(eNhToXx#gAyo?iUe?eRLZ~) zltd<}{nAL%`|>q<79F$c+LP5N8pbGv8UZbpv~>}DzHgmDIsy|;JuqCXzQVSH3D-i| z0oN>?akt%R|NTFZqJ%^=JA?47K^5&m+3N-K_yW73WhTAN`0>?DRw~(gvDNkY1@Suz zRS)b~i{Q4PGsx~PT;D@YsVCT2i!}~iPEt?ldbIN_QOkKmQFe5H?Bne1`!hGZzuc2c z5PB85!H_!1j@@i>Ieo=QMqx5C=L4Z}?EBz~M~TuQ7UsNb|1eKl3A~Fx+zE^@Y73=@ za)$}h#HWlVnJFkaS#|)L{8~n=b-72R7D4Xg5>8Wz zOT!D=j-xcN&lQv;BqU>0RtU@aR0(CyCk*OXete$=NvNK^Z76gA{>rFD;qlz{7Jn4u zV8_Ch$kxgU#@xfNI3%p4lzgjt)|Ck`;Vx2jfs@S^k0>>Xw~og_M}UuKkS^rLc56bF zT1=QMTuH)}c*&GmwCOPElO{$7@MM1nk4dZ;1J2DUhBwUm%Pl{%-g#NZ)SLI#2a~CU z-qbpjFTG$aS?fXjEk!EP62|Pa9LW8>qN$i6b!EQ68nGsJng=qKOS`3&7%d6*|gsk1;AbYe-z{v)`V2h3%+;3tWs-xlFO{O%C>u z2?~Iy3G=RFK)uO{X2!hPj}v*tYVH}O0+~^<6xr$QitcT zDVo9j!elvzJ|RZ*<)Qc)HQ8$}E{QiA=?L@hO zZm_-R3fk9Q4}e4;Sd1(&A>j;*tDMSqYLEWmxrKg)B!?n_G-fD2zXim6@2&h~G*E7c z`7m*#QiuCCcX@73%B>Hauix9|n`rt8E2n?9`{|1RcFOQ#jDZ%~ndcPh?^|fEOI#%J zVGUq>^vm@8um$ftk~e>_e9$$qvbFvnL9mw)E#@2v%;91z1Okh;VK@07Epn;7eQ2dw z3aJB)0%l783H<&J)PuB>l1z72J3-e+S-h{m%VJg0B{78G=tgxkzkljTOVL%Q_~n0G zmR^v&slKFje^HhM-gZ|Hd=>0}eAVb(xXHQ&PE~XY@Gq`iw^)9$_`k+0#&)e3K-U^G zdJ~QDKp^+eJ?hV&iefS1sL(cX_6$sMDfQoK6%iwXW5p-CL1-l?-#1kByT?g!$&KS%ZV zGO5#F+bzDW_tBRwg!cE>pC!U{83lxpxW&lS-zmGyL`nYAHxf8wemO<|dO+1}A^h0! zhoI{C+k_f;3)zoGfdq-VO|i+H-~K%3CuH*HpQ~Qh^ZeedvlZf@wZnqs1`cnlm~5?PapLP{MUr8wK8~T zy|;guDf95kzuK?-`H*Oq?;r7MKDF$^+g5j;Ab(%TD^Sdlx$hE-p}*MxH3u|hzW+&Y z3oVB%e*W*PFizA%F8_@nJa#c5_4kc_l~}G_9|F7OVrz2aw*k$iczwXj4Z-CBgxXcR zZ@*8+dq{`E$TfIdYl%qe@9#_p)w)8)0b<=YyKdp%jps7GM;HZt3hO^x5)9Hgd1 z_0$)D(M8)nHu;^%HCHP{zxtR<9}MaQ=OP+~|HDWBd`K&`w_L0A8bx{SRWpJg=jFfu z2Z$h&-~oJR^cQ2}Jcw;Je@^+|zAaQ9A{|o!H!)=ZhQrIm`7b5XKOYCaLOt+?iiXmY zM%qkWDEY5isz1KYAWTT&Wf4+e5omp+c3}too-T~Lp;m12UbOjoq+`@Cx1xXE;yOfB z>V~F-1QyzS$^NJLf>0yDcVb`?EW-TXQ~$Sb#x!^%q}G!B7@Q8F{lXmoohUFXCACiz ze3$fFr2dzMk$Z=VcDAR0Psb@fmiq0})Wgu`j8Js*X)Vv&HYdN&q6B0`U(v`pdf*=7 ze~fRgSO|2odZPW!4+vW{U_W&HeIajphnh=Bmvb9;W|cNVXb{4tXy}&3C8h z`7LaZ!yB7^_alB-PuyDlZp+_t)G)#OZ7+=3`oixb?cvS*<$?UFyIscRHk)!xM`w{E?Cbxm8OUanLJZ}Z zm`#ATC@!N4+Hh=Ds6A_>^E+P*4;>!jI3lf%na)o5Vm-IkRF22qkq$Rq{$=*(QW^!% zt3FDf?g1Ins#n`QM(8o|ddmsdm(|Wgn8scGS5)%1k4`LMMaq+u&v(u_Mj~o=J;IUO z_kNw7sI}sq;GtqeCNP{IvZIXHe`r*|9XI$*k~4gX6V<%MS*+_C@5B$=x1=R#cx#Lh z{oI2f8~%k&PDRBCuop2bNYAquFu}Rxy$gNJ6044qO?c}0nlx{8_1$E~5f;g$Oi#vT zi@dl0SpEwL=pmhvB>nJfaBRz;!PrxpYgn3Ni&xTJ%4H(B`ARB?*N01FR#N_vssqWj zSfN%5Ir_A7QhtkC$oZc;4!*ERW%-^^Tj`dAXdp4>Jz3|fn0=x`Y-#zTZ_PkqR)LJ=EDa9FroG{JSt#9sl$Z~*0}OU zP5skdc*|=)b?%oQwiK1W+KntStS*`Bw|0YPNOb4tjTTHWJ&IiQ&cIA`-5zX9cz)}b zIpzT}yJGp3l~K3lc}^L>e@D-eA%vw+FjFJ2Z`5uMn)6LN?99Fi8}eX9ATE5<^TYA@%ycJ@#)-INA^LnT;ka6 zV12@UqRzEuu9u6xW^G>(0kMRuj!l?Xidh@=*r}H@K=Z}PwnKtY9BBgW*PT1#;IDy zCUafxec?*RWoFRV631m;-Wm1a?y?@M$q{@e1XGI3Q(Wh2b~@fRf`Fl`63yVRBL}lPr$f0 z=T&Dd&wPA;vF;owwxg!U%UH9{4^zf!ZM)Ig2?eEERaSir@AvpJN;HP@2n}{atTzEp z#Y{RD7N|>9a}JW}Rn*N`MPHlZ4fZg)xjwA{MuDn6H|TKr9w6UJn~M$b`%Im*^FLag zg9_%-*l3Dq$4GxXpHn&D&W-_E0n+CM#~Q#VZ*s8NBg;)KDE{#!w`jiGjy!pc4&NLU zOs)dMgA-tEb1!SH>7=F`4i*qLJRVI+-@;d%?4Ej_8rD5H3|c83Rf0@IdeI8^2oQUl z_FbH~AMO6UBbaBwL{S!Zk6B*buG16fT8Ny6CK)GEn4X_37I-*sCgQNwHUeBU)OBxA zbv4@94}jw>7Zab2eD*)#6R@=V!bCG3fK|8*nl3*=c`SqVb!VBJ)i7;!t zf?^rpdP!f3DFHNZA?jI-W=qZJD$}Z5rfC(H9L@CRo%a!Ku3<`QZTzic_NSjj_s$Lh z$S(w{R9b>2pYSIW>xCzoE$_@o zf5cJ1A@nfj_al@m9dMXd5j^>6EjjnC>)!InDXiV=d%341J2u?v zz^3eg+MK<$tk7$MkUWc?aS2;B036>)-2!TVz{EgN4U95B1>7=UXJew;W8J%F?= z!`WrtV6X?7Wc5Owy78*zRl9DbtozeVe_#%i|j_7L5Z4+%o8lg;?hIJ3SgDZ?jU5)uz~gE`hpDv=RMmsA?G5s(b1 z53(ureSb78WxDxY*rT=U68`u`WG?qp>9;GV^O1>bBQHYA03E4kFNLSC)5<;sw4qNV z&H+^H_Zj3YWR|7LRK2M9W4$MiWl@^K6FOV%#YZclb1qf3+}fqC^$!_Y2<7uDxMZ3v=X%v8XTNI#=3e*CQyDb*0d<90*#023$8rNt^;Xx7=YTI zzd{qwM4XH&b*SuKbb{VB!4t#r?A#d-=|OkD*Bu{ii_5Gg5C`m_`SErFThk6#760tJ z61Sv#f`U_kCGe2yLf`4(HrV|rN-mJHFI{Um_h9Y0UJia{_6X;#I^XSYIqgXXXLLdP zaoD9LfW9vRoU}rYrO|B)#7_ed2>$Kkbm) zxpv;?+coM2d;7%YIHJu%1~?==qt+x$;}$L0-O77Ob12;$m+HxcemHICxP6cCl}a6! zhC?0l*)Q(PuOFJ{5B5&FAJmoZ=j*ithP@R);6(rmngr`qJ2i=Y*3CZ@x*mqF7>O%{ zQ0(ROUdshy50hQINDK4P;&2ML&8B(cPp7IpY9J_)B7u)i9W-M3-T9oaK%`$ z(9)X8T*6D(7)xWdxpQVM}F}mhuv~AGr0!l zrpF7i4yiQQR_YkjTPt-!*k|VF+jOw1YvQs^g!w3CmU=ADv%5kSl zpw@lh0K%1*R3rTt5-aH#~EyRCM?MmfQr{TfnHpk4Dt zMMXP?K(k+FqojM%YGR(_ktxk!YRV(d7Jqyver3EMX{dlS4vXI~Q zM(t*E;FjzWN^*cqm$Re)wtv`xjeKFe6svl|h$@mFM4N*P*;p%zUX1l74!6sdRFu02x%N3cz+k~$Ntz7*8e1^TIc zUW`LS*||?vboe%EciaY7)QJwEsJa&bmmn@ufi{O8*1ro9-lb42 zE5W@kyfV->Y`dbzuna>nVn|gLSxVLcT1vqcFjkH^l2GMisM$lu(ZYGA`J@q38gHKj zO%3D~l&VI8h4AQ!Z~1O)o65;-p$e~EuqX*jxyi%l*SQ0gCS!6nr^U=QJtDi>VELta z1A|+`9`huV;R)Z0p_1^LNf+d3scF9g-t9hK=@`xjojFR{z06W80jZNrqY5gz?hf4Z zuj)^ak)@2W%yiJ_J`DXI^J@Vc;9D`X(^?hu_cOUImf2GKm^1R)z_-fNLl!5?EUBxK}?i_nK%_QSZ++7FsN%8tYl)~HE947r>b{lMuT6!m&AytBX#3|`zD7`#Z+zx4N@SK?yAu)2-8M9ht@8W|jh>9NWYCkz zWd%NtbFm&TIIa~k_?i|E=azY7^?}=9Kvo1zWU!XmLJ#oDjA&(Acmb+SyNzH?3qV(<#TE_In%Q2kKhD&2`IXjBX za9{nPPrL%p^|gq)_0bZZ-EaO%Ds1kB#CR*t6OY|vGk%R?6>z=7qdU#0b@syCDg3FUthRfIs{@xz3ktx^} z@a?MR1wAn|QS3JMN2*fEKV?pX)hISY7)emnp8^?H#WemQ5)$z7VmP=D;aRa&^)-p= zDz@dLZy}bif{nXO2Afm$XjD6j+=mT$-~OZIP7#IB1ZKrX)9lci&{OXhb=eV2aDy;g6vyS!S7;N#j03B6~94h zpfOVj3^iY}{O=GCg)A{FCsAi4sOcGdw{iUWga7zxSEQ#Kma3rkcu|}oeV3j6U3t3u zE4dYJQ)6k7{YKz{e=Z9-@juIJ|M?a>recP-OI{puU3r(AIavSw`Cng`%mL>x>r@Sn z4f@N)IcQ{4|MD$RN=$!IQ*a&vfd@ll^M4fU?k>xi$`W*Hp7#(uR3hf9_^k|lHw+Xc z=<4k~J9k=amN^gau7rXY!TESyhj+W5s-QKLjIXsdh%kHAggdo!&FR;V|1p-HlPG0oQ==TShE%}K2;u#I@pAtd@8m)RX}+BYCNsIXuyGLI zj9-|R=Qh?lX!(P#m$>FP5^XBp{BTj-f6v2b;br&FHI`g)2b1G5r~!Y`3J=p?_-;Sf zJ)C49U;jmb0O3OzE`r@6Ja5OS&Oa+=c2t< zsu*$py*xR9nj2LYLp+*PM)1EZA!($_S@zvq0B`t*_Xz!BZU1wE1>Yil`bjy~i^*qN zx@bD^^0!6Sj^+8qM;JhX%k2}Z%)bs)n}UK>riu~STuNSxJmY9ee}CU{v4^inl_PMK zC2>QuT>o`B?7$q7`sr&$;7Rq@>PzOo*R)V?mfJHEJhVs;ukm9xd zEnED>3U0Z$qf$ZG`?uv!@dF{E9$;eN+D^7S((VF@^*ce41o04(uSe)Ge_z&nUk@*K z#$a$+-CU$)zW_4r4@f~|32uzBvH8`{LwsES9vHTVuya4D!PtL|mq!1Eh9%>{b-odS zTTp8#`8%2m)*^x4nt|t94wdgt1pkP`|1RE@NY14n{LYd>lF~8L_}5@)oRY_WI_um zN%DR}9_}`OS^hVQ5tMRJ1j*X}83gcO!C(YoO4KTWF!V8%5VYT>5yle&JhACSQi?Ig znIGrwy8c_3$}aJEqlpcWMqQlge=MXw@A7S-U|CtS)Rt!Fm+Z)oMB}`FJs*EX(_#&Z zJD8A@BY=#fORDmBss&QMEl3!UK!IRL@UB4czf)li%PA4z^AiiC6BPgQb~n~%wZ|mX z+Tq+ZpMLdL{MSGT7NUN7ge5WWJh9AkukF7zFn=x7_W^=WBMd$ivN**IQcV7B1pC)a zFV}c%>4-E?uW2FtV`=|qkqc5IM?`*;*^&>&U)OrqagjePOzA7SavbR4 z{TI*Z&l~aVekhAU9Ay(t!U56-L$u$<4**$gYMG8`-$hpr13muOwf{Lro)54sVTl6= ze^USKMd|r%iag`-K0^$H1IItv5PAOo^Pm9ssv+13$;dzhU@v$s09Yot@L}z{Dgntq2yK z_KJ82(yS6kk}t21^H`!~X+eBi$o-i2Ks=cdiCf{(Dc$r=}@Shn$O+hZ=VO zC3vmCVc{182dgfje|_8}Jr9rBck9h#vuh$JFi-v^x>3k7f>Ar2_$YH%yhR@0PyOeT z`_C8w9KDav{s(%VgSOh9Bsb;1Q5cYX9)acxR`BTJ{JG-)9$`;Otf%dL-xn8k|35IE zLObP)PZQ<;|8ZbrS6ce~|NS^7x0VcE$!&W0JVWT;?!Vm*7R^8Q>gW3~^tB6A&!vBP{;{4~ z{YbQB5*37|zKACHuhJ)XB0>M=z(MMP&Ly)_jmitNB z?~Dj*O$yMt_F>n3LBntU>|d6QB#5HJk5Yb-QT}x%+imzvo9<%pUoK1kn8=_zy;nTz zYZHQpkt|uCeE%B1KOYznhDDpO0+*}aaa5r<{(c8AaD-Izhhy1M;0}Vgss0U$*QDrM zDFW6wV6wKD?uGn$B>sC@Jr|)^j>SUvdqxxtV8|c^;onImfK-r*F8DL$(N`CMwchCd z%TQ3*+z2eZcdC?O*l- z4T|kl_H!qYTOCBf`u)Bk#XL?{O%+aI2H|icy&v!QfM@3U8FhSlCHyD#4?qq1=P~h* z!3NZj<|ibK-@)nT`DT9q@BL`)MxLdz!-vCN)LFP(j(0!cNra4pho@F;zhfHA{`LnD zz392q9z;Mm2h4X<(B)YU??q}+Gw`4M#iQd zzw1mmTJ3oQDfHq1yyRo|`dj`Ppi4t+0@RPqiW@Hg*nC^|W7fIrwB{9r1@)iL0tGdu zJ>Q+SCDM4aXyvb57!$j_D{vfqW-Al>!Guj8-yS?m3C&PV+I%m*R!}(rnI`bmP0c|Y zbt%xP90qceL;0Q~e=kbRb}`TFt(14-g9{ojpd@#q?6Q%9Lzz-LKrYYb+)#f~f;_lT zshGOX4O%0|mn#F!b^8mhzS}Jx-wZ0^Y27PYi z)4O#zjUziRs^H)bgsfh{MjSI7Fl_`_p<<6=khK*oo@V{(GnYp$Ga2h0)pSTul=)r* zivY3R&;@{`NdsSIeV(Mb`h~<-ZWBBloN;ktv-UNc)%b^M;NG~Yi^~jio@uH9N>y7g zuixFis`zldKgC-rnmJ|AR2HQ)A~G`LYUs#6BiD?LJ!KQMVu%TGvgo9~v=ew@=oi(V zRX9S6rJDUvw?n`08PgQgvi8?K;lk*G^X+pnTBL<(}~;4YOoDd zU@I*8a*b3;V`q&zyv8IE|)aF(V(ADV+l;YfpElY4N*gOXS zAdM@(I$omtjnhdJYTp#-2a?+JJChIZt2o=)JaXIT&UAg>v?lCIW>PT%I`-18>cgF! zoXX9eMdF8=TQ^*50CVFB<0=ar8eMVo6VgQ9P%;j*fZpD_`IN*N2c3j;pxIki3>I%C zhLA}dSmoj)3Ol(*Ncg#kV1vk zvXQZYr~o9?P~F}9Fx*@Dua8-8O`J*Ci${X~|{g6uw z``BEhjDI@9La+K!$-ypj!Ll7XsXB)KV&CJn)If{YyLWX(B1K~Wb#E(hNWNYozs)W) zw$WBA3zd8&AXs$wJ`NpF)pxK#gozQym-ZddAnR7l;`J`z0C|w~-x&nxh!^9jIm~3 z`u+;;U$?Be`^5IK->9k%CptSvo9#H8F+8KI$Y|M>`LVkff7DyPyDOafqF90d{cHRm zYY;8{@PeY1DO=@+%;6nf{HsP`jdi$VX|U7~2( zM(U7#JPp7ZBhcUUd6ThsmUQ3 z3=h6n^$^0WlpRHb$i@tfHv zGC8(3%M0$*5#Fu~?f1Fw8iS8o`Ox`2sG8#onFNrnhE5fk;+mY1H;>vr=w+I~RX&zc zjbnuH+Nt%i#pknp+k8ip)BDD5O6k-^`;PkZnr~}(D<@Z8X{IA_Yr;n2)@`&dEb{%- zWA&oUu8LX63lBFyH<@_E$@|LejcYkm2_Kjs|Gtr9D@|9%VcUANS~t73Ovu`h*&cXo zb>3(+4d3mYKm-z^Iaj$^A@F!84}!RZj*1f ziJF%;M%*ZEI(KPa@GC)l+o7!6ug70-f>akJoX6tb7kp0Emt@AN ziOt@BeK~&XZM`~SFhyvPxbkD8hGFgVCfA-9>JA*|vf-9l#XG!t+Pz?Gmz;r{k&rp} zTg?$p74Gd~5!a9Al?&nm!)w`TA!R_Fq+;;qu0i+?2$KBy)ob<%U=x*>k8T=ndVYT% zkSRX*Axd-mPWuZM2W?<|;(Jyw8Q-M+c#5?pttzpd(Cx+6EmVf~X-2dCCj0T+?Kh?& zY$7!!H8oxEg@syspi9z*)x~wg7kkD4Qtx>1Fo{deGZk#4;{cQ09UHPlL35~H4s)~L zD)@1xMP~uo(7%npVRhUWG&s*gBjIXi0yv=SokIBGxg3tDGmwJbN%0HAa0pN;^>Vl~ zyaBkl5AT|CHJKhhL{LFt&PK96|9+9wjxc@!pfm)l|F{}obRgdQnv`VmW7_)rUQT-8 zCvvxFQ+%!IK7^KUtrm;$qMI#gR}_T;9(QyB^V&eO)}WXY=*4M$6M_a$Rf;A}>o+46 z04C2YZTpQ=$JrkAC4Jm=s+)34;>QOalrkq>5}{;EQ5ysmb8fr>WhfewO-;5Wib8^;f8apJ07VF}FQNsKVsc0d>agZ>pUJU3mB|YU%nZE?v(!tIVd(QKta2tpQDn#MF;`c1rq1$<=h16qMXxU)`&8)qZwQxT7ZB>CvB!b{GNgUuaPm*q3_{-P=NMM_og* z_yrnufc54??YrBhcYP-3V_SezxUJ=qf zwM^OxGz0XtUBtkoo!gtDqB}~`H8IPadEGhQ0#ZcI z#aLUGRBln9yC-$E^J8fSbFu5{y4lVtNRS8a;#B;W+IL7b#_m@pN|fcHZxqY-@olH~ zQLXW`kH$MdPd!4t5^GENF=q~ML9BgiqqzzV^>-ePcfKIB-y}3Rd&eaKSrGa9GErS% zt{}-kuQll1s7Ti14Y#t}NEY>B5Wn;Ru`cdbHk!3mhsi+6-P z20{at>=WxR^48ycoT@O>JGGNL8`HAc_NPg-6!Dvd;s-b%0*yl({37xTs4ZpQbxYLY zhrDyKyWYs^a6D4wct7!N;nPbs>hKUhf% z3FV-Ad2Cg{x;)UAN{PDCo{q)#fMYbr;4)goyRmC z&K8{J+%5|?_%Z?^fVI{e!gms_%|5mZM;ml_8GgfW$<3|S$i+zFdKvzt2 zF3=qC9i@JD*e>M4cm>P~z$4D;0|lnyHcd=5Ap*_!7=+dqE|IvqTf)+?g&@**YIT!Z zw^MQe4aCc{r9(l<;!5MhKuR zF>$;&;X5T8U5bs!4N*=)biO9Fm946NXXEVJEVq7NOE&UUP7t+;GGV3K_QAa?Lr-hoc%vAUDf0i?A?-j*J!G z83fl$kE)qS2?RYGG4PtT7;)`2o(dryJMq-4l^MQN@~j1lZKH_OxG(H1l*s4JW^1Zq z181oMq^Pqz??p)CF%6VEMnHugwW7J^r(BcF8M6BJf(TkZS9=rI>c`VfMfqN&0AuON@*j5+^kX;EHhooOILdL1 z1YYl88_jXQ3?fuJ)87-rKiee4x{@bhuxOv^VybV_{KAL#rsg9s;JOHGx1CJZ;#acO zA;59%81~7(u@%Y>q}7xJ9m<>WY@5hfx2Sc_ONKAMIwPm6A8)Por--=9P$Kw>2RXKC zHq>Z?>@2V=?LZDPGHlETU7P?Trm+{w?{;}7hMguG=S~R!AMl2%Im=0*wL*P4om- z&8F2Az88yA1jA{kk#KZCu@J6yUEl$iv+45^BW@C<^v;~AAL$-joPTR{7Tud^rlwBSG_6Klj{*Fa z!&z3Rx~(!eE^lB@UpC9_UTBCXbc9L>F1f*T_1B(mxnrAj;C$Xu8Nw0S;yy<8~-_RktnvI!7r4j z=l6P0I495F8H0o`>*AdAu_Cm=*j2p4ySdys_&ul7F9I@t@yFXZ;;4e+Oz zO2_myol}g=S?>@4b?xYTXAVs(@NHO0dUKLwJC)lYt2?lj4eYh}v+*rbD0wazCak-b z`5s_z~3k{e&zoNHA4$yUlS8LZ1pvluh1MyaMpbw$8 z)k`}#mP*g{UEz;e$gZ4I4DaclN-_1!QJQwF?`kU|EiT1Nyq6*F^%+Opd8huP3IqK@ z(8OV@dtyI1pnK`gVoFNKd_sKqZ6T!i>QZK^X5}NgeDK~hpb#)LUr6qp+jnz8w-JB0 z22Wjcmy~3czT+FejC_}L!1p$Mr04m)+=sHE`qu#8eeJ(?SIZD*R3fZz`%?=bc>xQe z@>=OzC6mPkYCxUhri@m*?MINPoUaMk*RS($yHSosVQVJe+cM%sxUVRYOU5zje#IQD ztp#<;Ti`HW0zD2hV$i3zS%D%w#wjcQV)bX6f%(>N)}us7hk!ENlfu#YP&?Qn)Ui9N zt@!0=lq;!$?$`+^K1{7q%FPSm=RwvBbJyRv?sQt~A^5VTn_NSAFSYc@ot6j2#YA>C z3n$(xXcS?h1#bdspZmLTGty-!kfR@aFLU`}GbRJo6`DWbljU3Wlu6!2PJ4bpg1iD8 zXvPpS&FwsKHI`eL#+Z&) zYPY{JNsWqY*5paZ>#7!Or)c3kc+(G$MvD^QOr4?raBED=K-TND+PS;smKKReZiXY*tOlvU+>^XMfIWHc(RvI(|je0A6 zwyqo|mlS?4ulU(&@)#K-@_64N=nUVlyubT7KmGi*iDY+*JrwX}! zC1-khJJ-0^8c5DP&3js&rDv#)&~230kTnVa%cZ#=jVBkJ+$GKRbA#vcQZ7mBR*MEM zJmjpe+lx4IqqO$-OqfaNtXf;nH~^71=}Cf3tm*AWjZCEaYwp&FP=kx(xog>CEqAHw zJ+ad3;s>6hu-8)qr61n-iau%jr8U|91XdGfBTy+fxo5CaTdF3o?mem7w$<+5TpgnH zoK3H-mgteICgkN%)Zu(!bX`Pllyt5~@Cms)C0>2fMP-rJHV|J~ahtfVXex?8dG}Oj z&zw;x+fD8b1H@wpr}|=LA3q9_Y6AbT*^Fqh|(qnhCNW;!}p)6#5-{KQgcC~3csS`sF(6?G00&BmhC22X1mw)b+% zC%FOfie6;69it-*PlM6~vw{FM>Q`BD$s~ zwx%`INDm{gDMyA2EtX|UJ)3<34nRql4``3}hBh}UJ#osyp_o9FJ+oJ)LI^KO27Wc%kxcXlgS=8ES--<9q<<+S%lIXCU7=pH;<&JU$_M zm&tLT4`IkjAmf50&uB`ap#D)3DmX+QuCXC~p``$twJWs)VLMD9Fp<6D-5DP0F|KrD znET>vo&Vz<4SogIuE! zJyKd=wn)f}Np$YTrvg;NLyVyTUW}b)Hqmh?HW6_vRCN&$t&MM$T6lr-SI(L4Uxq_` z={iS}_4*l@{rTY8;>9{%h9j8zVvsMmvnU=;fJClZ*v30>_m@m{4G@I9S$ta2Fe9fc z*3+CGh_yoTnF;bGfQd*y)qM99q9GJpMH{Wg>z1&&TeQ>KZ;~xn?-tdnUpdHua(`We zF2@Ws{}g%ODJwTb>X*G5?WxOkz0YSI!Th!soo)HG<}842RFaZ}@QXnCP{T*-2sdA$ zX$%x{hvrVU#`?7OmCKXnt1=ml%v0kSjlILpG_;9cB<&W2(VIA`EEe?~Ol^T$xY-of zUfhiA=OoQ9xe1&hS`y#$v!6z@fD*>7T0eqhZtB&OQUy8T>DUO+S+vN6X{VEwC`81O zqSxPIB{%SFR;{rrdAQ81(LVN-xThrAsT0WFt|gzxE@Vr^Rg{p(H9|VfBjb-ids#Us zXO6D4IX0b^4t^BHcW^GO3$zY=Kwt^Hd`>mZ*9WyvRlga4roWH^FqAjYqUG6gS`@D> z8*y0O>(`S7Q<;pIYKhgNPGfx0(Y^l8X-}VKo^A(JDDidR zJWv{(q)BufJT>Kp4u`uYziyngeN)=U@x;4u)6pAZhcyQZ2y=Q21kcmSkcsqp%%{h- z*##bdhx`hCl4lzI7zCZl$H21TOfRz9%ld#dsRiABBEhz|55)R40Rda|`ZM8oastir zLW8-T0WQ6nxyl`<*tgFV_9Sap=WJf(d8@*B@8C9!8i=%om>>iW)|qA7#EvK1&Sp)D z-`a1s(IF&4AuqP_S*Q1l>~D^kOQgxDz7%@Q%AoiH%t|>3`fKxink?#3p#CG^$&};^ z(VmAZbr=_@GOmQm7y)S2rn-m_Bae|^0`!+K!Hm}E_OS8r&ZOpB7Q-#(b%)lcD;X}l zb3a>0!s+~iPm1i>6bD*L8TGNf*(d7{eb<%u&&VDLRS#DS*MHj#ObcUb#dj5wx@yYT zQh?%XwFdJJ(xfV-4xgo(F;%7O$^?Se#>?dhpjVb3f%@2O>iwx*p&}b`P}t}bbk{)} zforxF(DYjLddPtOr6nj1WNd3<9>>d*9-Kl|oL;brifVPAP8e8>sjodvdM_{X={PU% zCgS_kvk}#)=?!agO}mlEn?>=^kv!4H)U$^qGFQ)Mlx1>6e}HD8y=MxMM!I|*WQeuT zjz7FAia88S>omE4A3zbFNjopwQd3b+VBtHlm+z|^MYrAK{22N$cXRJ{f)}G9Dfcuq z>2bzWA{4^N30XRM96907MTDgWv>!i&C)660{=jr|&tQA0_$W0jpY3HpXZLMQVyI0* zA5?X&h>375r|6@=TH<-U)!q?*-x0~APo^>#E5WDrT%|l?+{>h6f0;{34(oC!-A!$Y zXdb)Rc;Qb}I`M)`>J^en9wVyW@J0Tiq=S~Pi3AC8OlFunijkQ;(@c169hJyCi2EPF z5(pPB;}DCaY__wm>eM; zFHkGTY4BY$@+t%$09gAv06$; zylS44J6L=o`jh?}WYXn}m(2A~M>vWi*%YrJcrTC#W9-aBVzLhOkA|d8=KR#A=#+9v zN@}V3>|lyndJT~gw^O7mVvHD${L4~COHg$*=kL{Z$FMq4klMhpzEU^o3c{a+Z5b2@ ziUU56IY34|K%YPlt>Ez}FKb16SCGYS%BRBbH#}ics>DvA=W`!C3WlFV)IW=#7|T~x zxQKOHE247pOFJ6Kx+#s3DCt%hTzE;GwFKPGBTZGtRJ6EP+P*Fc3UfE;KRwzAP|Q(K zFM`Z(2Px1l?eLuCT&{4i$9o{o8My_3V1NYRo2?Yu~Bggv96CjgcL8ha|DD@zG`R(|Vp+^d&sY zREQ?|Lf%Y^VyH`iO^9rJ8*hWoTu&oOP{i&i?h9@F+W$KFCRE!{@_oDC%!l~311#bI zm|8Bo0E}f+f3B(wl@bFsBqeTYR<23^806~w1*D@CU<|1Q150)FDez;(0@4FRJ9AcXl%Jrm2==EV0X zr0wpUvyhyZ*WUW0)q9K>MA!gO$9)D_?NJxatq5vCwMqz9*+5;^DRX+wt~z$m@xx@y z$e;)RB~+8WmkM!rV6MD>k{x#K44F2TJ=ur9F>KE^OD7x?9l{m@fA@+)LWFtE9C!fF z!G`f-C68$=$JAjiFOgOOU5pr^*-O8NGmaw>&ZOH&j^;_e!DU6R*xIhpK}R54o6J3H z3N852eFn41speC}(~w?r#POiUVH5$PXr?FBuqWpTLS@FLJw)zK2P69XSpxX=gjw_ldkDBq`YWv}^k#|m0m%-wLx5f^7T&Vv?* zT3w{ELxOmtfbOv?yueSgsLq|GgRK>kkaQNEqwS{cu2@PO%EJsdJe0BIMOSLqMP0rV z_iS;C((;kw!-?9qAkR=cw)+8*&-OCT&5y*<`X3J%aeNH%cS080zFGSDlcZ?pi8bdR z<9_!qJ|*J5->2}4GoH%e@*J*n^MO>K6YR*>VOP*G${l)x8kqf()rCcFbT0IGUi8P` ze5L#sTSk)9L`Gpb)(VR>dcjxl0fK|gfP;=Cn&8aCXZZD@=pOijao9Bbnl_Wp-)6*M z@cSy>-mW^qEK^ERxUI8A3ny^+Ij^T6LU$Yc4&LI2=IAhnEqVOs)$pVfw!6;}!|%CS zjcD9<>?cR7<|k0?mqoxOsuN zSoZZuDxR*H<>xK##Lf-V)*--M6H}xN9S<+Iu7|Hr*PaXXRpK#fs92@5D;CdVUyL6l z3Rw|xm?nolEh5IT_v5F`{m{iKvrDV6%I&J?aFATN;5NUtHKIJ#@ycYC-|299A@vQ{ zWFTM?raBZ<{1UPFE$3k(`94;#HeJKU*RZ=?aUobd8kh*eKirwF8X|-eGSAzoMo~ph zrHa|ON*;?m^f{$9am*Ir4V|t1=(FMktd$hOyx?4$cF#zutx~yo)A)V{7-jP3HhG)n z7g--#{J5Z769#XZX;-b;3RyLzTdA35)<45R#Z3b`S>SXJE)#g zq&&8mh-1OT^)8f(G?_d&Oz$LX4?F4A)dOTJ`CS8ffHTJb^I$8%VKxN?-%Y;u#`!k+MUvZvk=@z8PC1|tV9f$Ac=|Rrhu!HX zPss!@vjN-8YNy#g9#Jz5wPk9#R}X$JAZX^mX6wDbD?*-s|lG) zjHFuSb=Wm7l@$xX!Lw}ScT85y34^;C*Ler$>ixHGO1EY{8Fun3BH%X#GJY2GmuMfZ#UNZb&G75^TW*;gTw&}`K4s8W7s-qrgTE) zrE>&i<(s-!VjQ_2#%)^Xw`Vj06YJR^r*e8cCU~2mNcj9c5=s)MMPfc-cQr6DZD(yL z0y-KS-1|9K`i#Scns0am>V!O4x_jJjUF+lf*SjaZ#y8wlNHDd+CPUNVO&KyP)&E$q zT`_5qQivZLx9bXiJmt55RwU~#Ayq>sOV;d z1bI&u$R=|br#|{(0~qsTH45JPu$4<0u^JseVcp^#BOWr z0%Is^Ge`+&#t*?kjf4oKeBlrdO*|QsVOy*w#R;hQfaH5Fz$Qd$udMY7zB0db1x>NJ zuOmC*0+L~mz-c{)Qp7h=4oy<+x4)+7&>i}(HHGt$PW`wn5&Ql01-&Y zZfLzym9=Miu0bDJDogsX5#mKN0AT@rWjCu4b>}NM^F%x&rkQwRTLVMV+|rT*4&BJb z$K9-AN_8&hlDH$94l}f|j7Ed$a(Cz=;`sZXSfxogiZ&GzF3aRmHRIfJPwJPGLf-e0 zxb|C#u~$w(w5xs5BLxVRWvjC0M+${G@)>x^UPB}x;iA`W-k$glUG0E}dvJYo3EM&m zg9!C!1Lm#;&H#29@q~A8w+YAE+XfW5*KTlhEQMB|d0a0PvBes4fQr9!eHFL$a`8Xb@6=wbMltkGE=?BgXQb7=_Z?8QM5MC9 ztF5y`_3ygpn+c4`(=~aTPwC*P@}1^pGoz7qaXowdIGC1+ ze@u1-hfeV&s#0V1uzh06;CSECNAfprCC$i6=E6fUHNZV1&v-c7$nCx+c9?_k;5}U9 zYpy9b6eWB~r172*gmje1*<8>bh^BEea7(ZVGCUneDc+KSkRaFA*6j($X^N(kPg9 z`&cqsdY*)|z3dO;ygkkW>j~Cf^ zf?el!CgPs~tS&f)G}RLO9fMTba2CD$W&zM#L&QjW{49h)HaNus)$gP4DU}<-G^%#T zlA9cXRrzkRfuZgLl5s7zC~B!#hQ6D#B^fA?Zcdt}GKFUb>t@dwY~>H{-wyC-J4|3U zy?{!_9@7md3UW!LdGXYpsL2daKiUOCGm5}N5t{kak2=R8f#20P_64^rB2&ME_v#xB z4OJ#K$|VF@WbDU+uxt$N@T~2Cv-GOYgRN{0cI$Afi10p+=tqQ2 z;C2SA>c7;4PyMi9vg$Xk+NmfbdVi)FcZARo^}eHUFjkUQEDcjCL)ZtU^N8DK)jtMv zejWsqq6)K1p~DX@Lu8U)=@gMX^bZAZkKAuh-Q@+!oX}XvV3r@iYRp*vgdW-~wMrQn z3dJ~ze>6Q__xPq5_E>q{%0}Mf$tvvX=OA1pZR8z>Rbjv`*>bx4W@x~d(?|(Z0!U@; z@9Fi`NutcLZ351BF%W~&lE#X(n21=Fauo+)lvHW`pxUe5szlQ>46205J-uTyyy%}+ zJjg60LP;Gwq*U%#A^wqGM$sa|C@?DvBgA0%9IDXw`;8wu zBXV-zcNoa*@Q_7?_DCl{+9isO=*eBShf8hj7n`ll2>=!yH(BpqFDg{2s~n4A&Hs=E z(Qqg;0f`t`b(|>f(^`O@(+h7a-|a*Dtx=^t$yLk+Q{XURywK!>!h3nRLf^Sdb@0P< zB2{4WyFZ;%nmAKHCVKT52-ty0&*M1EhUvz3_)@4-Y4> zA}kF9_L_;;Fc2&+d)t61(_l23pRuy*~?a!A$9K%>hvh?6)L9bRSZd9#SpkNh7BEnZFQyRaoH;mMyu@GsDN zuwY_o6{wA73nI#U85v+sjEp3)lW_&Lp_?Q3UN`|pJ6i*%>Zsr@NNka>$#S2Vcn|dI zlHonOC`)ha&UU+oJQp}n_zsda3N4N}PK$x9fp?mIOV)H$CRsDe`-sJhK*tHMUeNFj z%l#d@``;g3JrshKN})g2*v#)4KS9-oUvHm==|+|S$DdJZ)h8<~d#|S$=rxF%20pdnj zCXh7-SLY6>zO2{1XKK0k1zU(3gpff-dYBu>YJlFSf%Yr`4gsb8694vPkjZGWuydOC zIokO(o1BFQpF{|J3YUjC$()4rPih4p!zu!50FdHe&_rauCtIGVWOOt?Ot z$Zj`f34y+j<#ls@YsSZjKRt-S*71;i1<0wRGPkthdFccbm5chsU?|#0VUS%Z@MrHk zd=YSW!*VnkE2OV6>`$)SHUx$dO7=r^Z}Tb0y67U3~c-Nw6$hSa2y_6<4bF>yzXJ;+U0&XMj73o>i3IW4zirEb>Q z?+}&iwMjdfA4W<-1(w5k?2JJk1cMVzC5R+5OgOJIlv%X06q}%7j~d0~u4PXL8*A7H zP+A(JGZk?d7)4mN#w3^rr=iCDtGq}I>3)X9)!iPnE{=1SB6J7U zf^&*_c+4$^GmuC_Ztd6m=W7+4A=dq}1vQw!Ts7T^$k!ZG=zq1B>@uEfL zJ=`bPUd)U$$LT@zs5)Hf@>hkc(_;S0@@c=#{0HSE@InYR;(s=#xB6w+sr`!(TvH6! z4ZYrj*`0}_w0(3TqykSMq9cwfOWwtJ4Z2&hfKRx>roIb%Kcff5J(Uae#wOWHw?&}2 zGe_Y;+ps_tx{T|g4^AhkP`n&Qq{*&t&%#Sk96t#f3O-gaPcQjdi6#*=qw~{uJ?#za zmc0w37j!8UzyGKh5=~E0mSzMS{T?Rnkr+zg{wICJz8K|wU(nT0t6G{qTW~9CUvt3g z?eR9_HTyZ5pq~X+gYfPv6Ks9%d`;he{$h5%aTWyo)ehnoTDT zgI|Ewqw?s|;z9!59mp=knq5X&TGdF9(M$_*yURqcO?2trbnKjt3|lb@;^h z*jw7w&<#=Ks7)TaEnB*vCXeJPlcV7{3{W)B5h*6od!WJ4CZp@*DJPAJr_`UrS8fhv zh3>yU*;aMdkbL;857e|#`wUG=bX&YyzbmIGaC(?@glXn^`k0q~i)F_s9>d&29Q^`w z7A@{|K-;Wx3_nGUrd)OeV_f>aBX_rQN%~T5_~R-T1^x>!M}g-_=p;z_9r^yiG;!$Y zNk*Z_{LFG+gi}v!BV@K%hf4JlRvyL-5G|;-h?l1OL208UzZe`*AayZN6@skGN6B)> z&Db(XT-}-X=R+vVqG2|f-4|V(_}*W{o<7W!|1ljVex}n$9PU@mgWdppQgW|*#6)Lb zz4M^$KNU4lS6hUkK+U~E8g)q~;d2(<>%Y$T33g3Q?P)_6zE3W{9hBDO0$99TXFvQc z`~0@>weqn&n>$auDiJ<6rnex4>my>>F!O&YiIIR3&Y`;qiXizQxr0zS=+ⅈNHN> z{|;-jKVab}mT>zj6*eV`J3gity4oASK+qLx*5kyb{7HRJTK!DKQl<7nK_*#bs}X7c z{78U$CG|rI;IMbxAKCK9p8ImR?Lsv(G&WUnO+kL!Eb^Iv(>&jU4r$wv5OzHJ1QT(= z0k{L{N;g_GdL@KNY zMRzarQNDH~&HJEa+%!=Pw7Xri!~0sfnrklGu=umOo~c*jp~vF+Y#P{hNp3mXbHz>DnXE9Jd~bAu!K z^r%4^hC))3f@BLWvG{GjxJZ8AelPk`OpN!m-s*6w7=4Uhvju2l3Bs4g`-u~Be0CzF za9@`{<-o(EOYLaVWaf!9C>^z=e?9}gii2wIMXy|L1@bjvM1u@2h|xquJm4|-BGfx+ z9`vtWpDMRUy932CDYmLdIPi1?0*~&kj;cvVLtrDuVbCa4zcHwaR^Ln1?@N?2k(s4# z5SPsy4F#!#bcwKGTOXEd=r^O?>8jdzC(dL(Us{4mN6e#A5x)~%=T-}k98dVmE$CX= zZJGx`+tB*G$|N2;x`EF^m-puAPiNz#XQ}Lsr8)E-fU~z0-1^(XtC};X#e|S^iudqw zFzkH-TCbYWitiVrr5Is{^baqFo_aeu#8gJNu!N$q?9K;EeLT5ZFDTl_HzO=Fr6^-D zg5TSIre0SP-cfl^Qn$c;B{GwUq+YmAj+q?(Mb-7m@u3?f*3!4~XnWQuKtX+`7kG+? z?vvzRS89mWlgKC0I(oRW*TIQyG+?`ovB(SC|HT3MlgJUMdv26x6ncEGQI63Idi6(! za5MAjT1(zlJ_6oU*s9&<&uX|M$@~(6XSl$<2mBZPUX^=w8l~`lW|zQeZaK|qZ&phz z{C=ZvXdi0KHzxNvc#~`z7$8f1^(-s2cquU1$5Q(Te58f_TDRXRD8sX54@$+6^0b5JG=T&$P8Xnd{#jgAUJUZ^_nP{{PfVbQHCFy9k!#dm?>c-590Px3Ve*ZNFAmr z1s`~@2$HJ7VG2rtmHi%{*qsf@7|8xL z*g6ZSD8K#P&(I;QAV{Yoor83PbR!~-4AP=>3?ZF@Fod*pGlYPGfFRwe(jg_?bvNgn z-~asAy?42mvD6{vefPWf_xU`}XJ3&_BuupI(IPdnpX_|?! z4UrrKOe+Sad5G0oG%HK01RU>uz1nBjf(av^0`P{|>Oj9k(E7Z>jv-VQr+>OjYo9<2 zWJ&0SENRVe=KM8&oOT*_5-&_wTT`grY5QXUd9cbyMO_Ll6;J%$0DHFM=zQSx z7dW!c)wokj11nC;ty-R2HE`1O1^A#p%>NXj;gdYnKZ z)<5Eg(YZZCme#EC|IQ{BL*?qXbbgRTHWSgRN;UvQ7wj10ftnEF2&0dxV3|P$+<$K(=>yND9_Hj^@W88 z!|ZS}xq`5X?mFAFhY)xr@B0qJc<82lm@|hICC_mgge1nA+%ctGEExf+ollZ{9T zA(bM@?jt@+onwvFv~G`tV>sL5=!C(8&9ED!B+cD|L-fHZ>WdX5FkK)B63o;3SHJ~0 ze)F&FmPyr!RL?C~p8pIN5OO9j;~MS>+nM4$8|J?D+S?k+mZIaWgDN8gN0+=S;nzQ*NqDA0f_~ zm5)fDcv8vKPT^y`@Mn_sG`JJbE|}p`!MqMA2^wU~1}*D2^Y`Dj-M#C;(s>_Dm{?6O z*~UwssZfuiW!#6(TAim&kl&v5`)*iN0tL-gsmo7)FT4nRNxW(YQD;#}+RuS-WPd}D zBh@fb8ocS#jHNZ|yIgG;VJ_+7-h>*4Hfnz7>x^F1x~PQ6msCbCu^uikybOc{g>2=1 zR5JwEe-`^ypfWNWC?~dASF8s*u`7%Fimf0;cvOrCPB~-b<9mKM(R?P`*`dNM_#PES zgGy~9$$~+8KqDOHmK;9$P`qIDQS?d19*fmJQp~d$u2VhnU?`Qh0Y1#wUo(Q1#Je-K zb3$efO@hV@WnS|x#L{iJe_+?E)hLG5nztKR3}Bdrb!Ix4U>4#d!#pz>q%7b!*K^j< z%!H6ghFOx1?7?Aj6M&P#niOh)J7-}(V~M7-VU^~3v%))Q_6$DCv--FBfU6r_hiBFn zAyxVeP8C0Z;N7J@Xa>xz`b8u=%j&bz>Ao6Z7!=28<$wDI+$2iWFqf@hTJM2SHD=n+ zHvj0($voJ2_t=?P@$$oc?ku9-{BC4ngk%vq^l7P5V_>2b`*7K&+7Ed9Tv~{m?0oF0 zq+??Z+yV)*1=|(H*xa&S5mlub<;b~^u`GWeQhdl&-du1`x71LJo2!k5^+n+e|1!N@ zf;|$4QIKn}!ZaIf{lmEln;u#SnaH{tZMlJjw{;b(`M{)bRH_ehFR zf+^_P%kegvLUj9YsGki|VMlxkDT!CfAH=63p!>>r^ zuhB1uV`zS|;I77&7Gz6l`)4pI#(3;=!po8bGWJmqSfqX}gL+8X_o3Vf!h1diovf3CxMZ+kx%sXXKZ~H&et?=0EFT zS#uA4AUxL~H21K<@zWebG}xb3|1D5W(Vj4yFiM|l$47huVBXUgdu1RPn!*APGpzeQ zddE!Tk#{$Gt6fjxqa5)!yJnX{nt|*~e#S}E^GZ8R=e}=NpPy7)h1#M1VI)>Mp>EC2 z%e`GRbT7cOf&_}RbYp;g`01!p;{F>uj7n&}t5Az+S#~nLGC*IZ#m^UmLz*72+G%oy zyv-daSoFkZcyvgdf}eH1+-O*5ivl3Aw!50lW+b?gG0!+#5JLRwQ2_{kkZ!7dtZ&A( zTR-dYmxZngr|dM;410)G6(5p4q-%CUU4tRJrOrrS(;Ks761+4~rh(4bVHrvy1L+hX zl<`=Vt+VbWw_sPiW>O?f*0Es@JR%wphWje36{M8VUq??pQcMUs&NUCDoau}< zeaG=cSMpsH9J5n9d8cKLHOuVwDOE#I9cH~{D3AHO%@xs1E8e)dvKC2`46qvKz@6*?9a(r_4}_TJ4g;SAkV6Rc5Dzn@PJ zDO!d?)Ab59l?8-)k}Lka0COu1RPGj!Yito|Di@;kS`wlz@ab~lvaVl`M~ks-v0b?fUW^%ysmyaI zePHc07WL3eKv8DOU9?I1t5U*IJo?T&G33ETp`NsVQ{(m)2RFY{Jg{~6fH_O|*5_=c zbJi&Xsx8&bH1uaVzfYG{HxK+ zFd@n;gl29K zZnNBP^&!c~#Uzi|Eekp(mtr&{@*IuT1d6Qu=DTPDoVaT+G9@khOu`gmmr&<9A_VCK zK=0@%o{SEz#TUo1!t7rcH|x^}f)0y`TB`T@YXZ=*BU?mHEyxPuISnh7dUl3$3lL%# z$5*UrRLJ6Nt&UZ!O=7tgG{!Q+y?|r_GeWyNGj5Q+7;5arcIa%`B}=phBgN$Z%sl3` z7g~XikqaJ7%QW92H$0o%%ETn^BKUCJaN-pR(qPnAwr;TLBlJJKT_5)?w&z+}x)^&P zcjrZ@&;X%6Ot+zi3X#(})9gr6eEQP zRdC+Gq>|{gA$X|}Nx_{IE5Cv1a_4aY{lat1MfADEZtR1;l$s9r?@^SaDle{p3{o*tM-{exc52{KCCl z-UZ=tOX|gv_K9?(&#ym3^?Dx2q{+V9V8c5(U?L{J?^-oxy)kBM6t~6b8?a=K&eZ|Z z$*Z>-d6G8AOSoSu;l$&JA2LYA`1aiQC3>j*Ou-dDnyK%*zTJVi>{#UJHqUb&-zXwd z0Y&c z1Bv$&!l(?jRSH1ERE=~0o=4jMu%(lRWayOzjmOG|PK%Ahy{^ZYaxeYH%YOcNP|_iv z_AVDmiGJ$IX9UccWUf?-O{-Si7>-gl=%v$p&n*A&%fb;5RQ9w5&QR-NHf~J?1iuus zPZ=d`Z{h2;_dSHsxu!0PM%1X@B?4&;4QBLNI;D{03i;!||1@fEe*hd!v68zV>|*eY zz+*{bJUN4t@CYYG?h&nBonq$vEJ|{b}dLuW;^%4l>EY$UEge5a3BYg0S|T+~K|6lb`&gLZMgt>YmGf6cL-i4#aUd zO5Qb!7YZfhQ}90d%XuX;4c{838(ZlYW?OfEDuo|S5H@MDSuSinfFnoA~Zd-MzpFm70|oiV0rY`D*e+uwxJ z_&q<}n~eU9;8(hqA-QIrayg_^vs>Ix|L82CbauMDG<Yt@ zz}7%kAxeJ*X`dio815sCgBZ=nyZ22RgNd#A#6g$3xSn#p`b)KM97$sAm2&B7M&vdsKn;wfE4JV%S zxf{3GSU-qN2%^>jSBzph_5-y?BOo*%-y*;Lmvw*P^ZE=c%%N}LufPOM9)i1}tr8T3 zPkt2Z(n2Fi1$U5_bYr>ZukOz(K^g%G3MsQQlt1Tb!$};rAA$yrwD_Ox*Cv zn_1W%LF=G$&ii*$`5_;vLmi|TNq7A$uGVzF+{&p^gz2BUzJC~gfXD6Sne(itF=FD^ z)-=C$6y!RsN=6P*4D`hCI=t5TXyJSb%NLoV?lD;Iv76OY_?ZCRW;1(ir}I7H0|pW; z%sjztr6+bp;ETAoA8BP0-}&)&?zbwu}*8>Zl0jUpj89W{i*tW4x>Zc{?L79vgoH$=dZ?1aL{6w zvoF@(&$o&fpvEW4pM9J^-u?Zo+#d7of!d5sGP*r!pJMWm%l!K_1iC%f8P`|)2XJ(u zwKQDtj7}g@a=UWb`Z2)5Hj(w6#J20)XYxRXs{(Z8Snvha5&2Wc_YUx)F{^6<95#3f za&7GP_t!o0DAXv!UG#}lsFJESttWHEgpvs3!NhD?{b*>DB99zgL1#mTp;cc9E(=A% z8|T5Ng;TW|ZCY__YN&jeMPid--JklMU);j+yw=Xcq`{e_OpR}bIs7cbDF>u^s3MpW zaOjiC4(2&mb@JyRq`boj^ zaDOFvGe{urJ&OwY>r&*KDwBg{P#G*Z^V|>ZE~v9kpCB5XEWheQzbYaV`^FqDfVevK zmvzOv5xOS85z_c`p?MI8I({j0b$(=J%xQF3*KD)Zh_;KYQh=pq!fT;Q-`rc5K&p@d z%Go2PlOk&;jzM;?kZh3V#U-^82Z(U0u^#I;+8l)nQQRP%Ii0CtHq_3Dei6&E-c!ld z*2^NGk zH*y`UiU0lKe0=+XR;NinVXCTo9qYXo`G;zT5gHp)KYT7U-?!URC|I`}O@%qu&UAL? zlzYRiDlxoBq2BTLr^YS)bF~_-B)0~yC;!7k-@ZcdB19s~`AH`8m@LKW|NeM?MTphM zFMcKatS8R5rm7Thzfk!%vGwnt(WrkQ8NN(VNo+tK2-E-XzxjWE&vlp3*kAVPQT=h( zvVc8TBCX5O|4mH)?>{$ei8zD(#MQxxBw^*}EyX~~e^BxN->^9v2Y|6r2{n~_c<_e*2`C={I2zB zHBoRUyUB$brb6iK!_jp((-B`qE#9|>S__2z1?MOy35sV;VuIM(1?nt{5YdIrk9W&}F-m%|*djiLfw zAdxNyCFIsoa2nm;udtuyGr|4*dPRGEIg4`q5)V8D4A&UoJ@0cp)jYi=HUlx8ik7<*u-~Q8 zftD1Vv1zSER|wBbpt`?W?W2^oTs&KQTv9)0k_R&K^8SLJp$=}twNpSR>w%kKKzWu+ z=M&KS^UhEFuhOe6CQ2zI1lxzdcR@Q9Quqdyzgw;=P8#|u8w1lu0gFvRWGBJa= zrPh@WsCtKLjxgEBkp7ji&GNE;hCpor00}I%$@JA%L+*wEk!h!vSWGjTPEuMBKBRel z4FvgGucKExC!iS3-^RJz^Wf#Hokv2X3zypc2U?LE>CsTQ|oG|{LfPD{0-=ZI^c@S@e zR+trxARLl~_c-T*^THEtES8fWPx|5!y8s=)L@Ua2>=Txi;_L)DeKlb~gypYY$>m4l}gUYM&*|dBXvF?$sV4tn_ zCL4UmqYmxi(*3QSr<4tbjD$zP^cnJ$^e#F3mvG|k>FJ?#&zyg<049DXTAH?husyMC zIMyrD9v@)W+G!tuY{Y=8M!{)%AZB!JbMY&2sXPXKEe*Xs|FarMxzZ!76q$>1U-?Jx zx~!*$7{dTs=mkijv9Y2Ay)>b+m21$vs0mn{5mqgyl0k_JWA+s8Ii_OxaKXxTH(Tad zKEqpi=WiPQfOih`nYN#8mf!waZveI|#vtWDBjV$vh%2ch;~|&<76xJ8=ze99S>p(p%ItNJ(s3Hw@ht*o$G>mA{7AYFhIs<@cXv1m2 z=P~%iCvVb}`77vI)T?)Vp$Ez!Ix2brZ8X;Q4j=5-_4Lm6@7-hXM7m|Cpg~T+m1K8N zOY!z{YXg9Gw*H*;g#bG>08uGgY4NCJ?~PGF%z+w6)9}?X+-(f!qYaI5u(uQdK~bp4 z7+Ue1TAkGr5VfK5V5C5!0_=o1XY>&sAiX3T{^N)2CY=xK+pQ^S^Hg*U1^RDFRdcxIW^%!neU zB5!lFAi2E#>KC3rC|=_*V{HK&oq~Lg%wp|NPs^^aFPzE&;Xl(M)-$-9!4;3^tmkn;@zsUD&Wk3u(Y?B- z>A}0^eMl{@FX!5;#3^!ip$3$`qYZm1Xw~vA~w`PKmVy!QQ^GYow%Bl0}aGk&7kdKOPcc<-%GjND7S6KJX##iAYMP7|5 zJu7+E-dzvgBs_w9Y4g?%*D*r+-E79c7fvK^s3$f-l#)7|!JxQ-!h(OX-6U6zU=fOZ z`8MZtPTWBt3pH6nIwSQC@1E`>?L1xotG7yL(qM3NDBW;8JOKbYZ`UlTfXnkQ`ij|C z-QJcz>*%b+z;ML0hh-3iXH*ED^1b^w7k|2}C(%K`+Bhh75Y?v!vFVhGrTrpCES^*? zYqg0f|DY>=)lz1ar1>MCbus(1{bz4`IqPvGhljSuiXyR7Pc|nF1kyoz%+k2)P!`n7 z)h5}`;7+2uHegt-s4KDTf1iJ(#oj*r^{=3qu#G!+S z*<`+Y^fG#pL50N3`fmIA(y0^l4xEeb2=Qd4=X()&dpxN*GNQx_#H5!=`00^ID zx9-`jjk`{osj-Q@p6}VMtJjU~W1E!rN|^;OLE~@()sLWAOEg4#DKP@fSsqt$hQK(pY}Qh2LbLGo-G`X z!vT_EqH)~imYHx6RtKvUf~|bY_)3pZ&a)cjI8lhlC%=pIgQmP0}Y>&@Ihm5b7}Q}PmGylVGEeDbpZCS30RE%U@K3L4LUc!P=W_Jz|>O$ z%~6|C>{lhZwi_$tYK06ZahsMy%h1ujmKs*c>|y`FfMb+BJ17cvA<}^2L0ci!kO#Y? zR+q`ai;vf{R(j_sh$Fpw5IZ5af)ca~q7*|XnGY-{D*M;KHla(deE?y3_*jNEl^zj^XoRORL`e^C#I}mThon9LJ?`jsBvYpgQn7X8 z8ReP9Zy*x%Vd!%7i5lrJtxk5rZ1t zu$xsd_D#6ZBD?}5x4j?Jx6q~AY9WlHfYPM2A0wdrn{uGVHkAHpG%-Bi*Yva|$y9@oUzSbIW zH4gTSWySDrZUvb*<&RH3Y|-8T2?dSgm$%cCGh`%hWaaRW2MocI5)Dw+p*E!WMauy{ zTIDXdL&7ap-1?wI#96Xub+|uyuFS>?wTup|h zVDLZOb}1Cz=D3#Tq(0`yPGze&$i4C8-Vj}{tz7OhXtO@r2adpMa#b|6lJ2ys9(7bU zh#XVN9HxDkixqN**lKYez}v^17{KhMFQBk7QN}$FO%;T}ThH=GS#ss^}R3`)eeXEiT3_o}1>2qmsPK3Tc!FSi((6+ed1wXHmN>L*is zFggo(vyHvod@O0a-l+(U-^5u%5)kCO`%8zstNw>JSLcTZbY4*tcO3{9v#yf3UU)gm zFL7yKksOvcvKK|^ENO%Yu0;!Xs(4-VF4bH2>Z8q*gdijxW^_N62TX?Ut)QRXkC-4N zBibdaBy#vfNQ35A0u}=DuV<`0F$+#85!?MuFtINv!^nb|;qlH6BWt>Z+h7MUfH{!t zeyoDoP25@c?)(E`Hg$hr?% z_1@aduRhTmg<&(8vC_W-_jxb()gMBu@`3%6;E)h}=mJ%<3GRG5&aQnlDzIgeXaAY( zFf<5XIyEk3)19CHCTUGzmHs@QjKm5|Tl67T*__=Wl%wo=h%S?__?a@)!+lJI27 z8_Vhv$5tw|vxg?t%l?ei%6U3B^l?8Azf)|!IjcP~{K42XcgVmhBvkg|BIP83#!}!; zVGu(rZL28!Z2ts>rEN>#s5%+ALG=Y+Q~_VZKc(=syb87&CHe$16g~qA$n;g=JkgQw zPAgT)Y9F31b#TdU9WxT6?cT3Gd)3DHBHCG=dL}ZQ;;d6UpX9IdgK#z+sl?F|1C27} z1ZlfOW9}JDxDaJqS0YIwkArMbfQSXo0V#@|XEfaD_wL*G0_z`gFcY|581bqwn4&6F zZ~N2A84xhp;f+D6fH@BIiMW`?T&?%~Yro$UWC?c|y&^LbFVzZJY{yOrvE$h0*>kZU zXn^Kb;bygJa=4Ckyr_8@{rR3wH7GRxak1SJPDE#~^B(LEx_mHv^RDwH;ya65zGnMb zGTM=?eyn{pt_zOeW2*wf-d}U(UBG!icoHHd1T73kv)_U({OzU~6`#;cQ$)Va8BP>+ zcjPV-Z}AkSmAH&8U?GjxFkzkO2PX&SZE2kW)=pBt*;_ka-`fm%_6s1`g|Ekd#GXk9 zKB(lkf9+S9JXx)1IZdy>)?>wE8$}_#c}*>#_Cx1e6J=?Li^K1rPPthPvhTVKGlR9( zErgTZL>yr}l1Wz_ zcoejDN>>13aj8w;nj;=iM;It^&Am$*e?M6HPVovvwXnS6g}2eJa?~Z+du*APGd(TyP=%+6g_FF1qNj z2b2b8%)@HY`FvltojIUGLO_rKvjh@mY-t-B?$nt>YYQ>(@4Nigc{83Z)g^tXc54P{<;K_bQvA4ObW4jztlw15dzhwhpE z8q*nz744qm37K!SQ~hr69~yeqM-K$x^sAZh6mz~vxBFDWK<~6C_1n*OTtQgJi{O+@$^(+6)Z=NUQd!}VRxmQ@6I#iq|SYIwW)YIbx~6iYf2@V z{q0*QquU?qi{DBm&ABAnrBnO`)a#DP%!SJ}Y1=pS&0t$lVK+#x*x`6Q_Q*gP0CNZM^^*q{AtTV)|H+ZoLg=hyZtAl6x6fY^^sBZK*x z$Owglq>UqzU_mMZ1EBv<)h}2r#+~XjvNB_j(8lW+8qwOhh+~sCJ_UJTOaJ3@(aDgz zWh>$7EBvXi@Y%uBM9OHk(E+zai6zpP4<#>87bi6C5lt0^TG z{8;v6E;jeia&<3l*)^P$IJ3$u7#{rU=N6$FM<@E`d&<$ocM52hEB23)+>m@x>*~NT z$*CJ7@U7TmZ_#2SuzlHZH7}W`jleScXhd^yZy^t>NPW$2KXzhnxU;xBw4~+lF~94e zH##x;DBlwC&(jZ#>M#>9AMCLrS*2?9mCLW#qYLs(Df=8WK``TjoRQCk``{(}t@M1b zhty~AK_NpjE(RAR_iU0jq8MdRYg4R|a`sI}dG(M?T{eKneBN*yu=L$dmx6Ep3`zUc znUNzr@!8hq2hr6ah+u_iG^!_nq>es)BaKNbqtrf!Re~D; z-TvO|%Tm>kPvkGlrk?G_JKj*b;-w!iCXVI<^DBNtq%uV?x*7E;Ya+3deRfACk!XEk z7szuLfBkd>_ykKp6!n4Pf!|UMMz$kx;dg%QV!8QMT5C6N- zOMwhvu23EPauAcM!4=rPtX^igEVahw3r19y0*huvb-=(zjkQ4{k({b^RMB0gCR&J?O%ti(f}{B3 z`%So7dgoZNo*fBnngoT*7ALgOl4U>rnvsK%2V9|k?YO^?YpqV{nxxp@5TKYtGUSFS z>9C{=n~?)7{x^O6-aS=_e*E zhq?|=Z2T4-5AokmVv}|O>Z~3POAEp8B?nm!(}dIFNm$y{V`?- zJ~pLe(y6B%%=6TjNTl2eZGnxL)Y+#b(w4R4M(Z`x@rY{p$p*_W>Cm&f z1gXj5c3md$WqRfpo|-X9s=u(G5r8;nsCzfIFuqH%S^f9nd6}$uHoI-hQ_ervy4^Tr z-UgsV%T|1Tqio10E{hcY$Xeb(0zummU3Dz0QrXhEtBVui*x%zWVD8YDmT-RcM!tBg zXfXXzK~A+OdI}ktI%ZSkt%zXV7SLgWzQ0U>bD2hJ?!Lme9*Ccxf)t%5e0SASR7gYq z_d3vas3>Bd-mNVd_jTT*f^g@30(B`cO=hNhX5E__^?bB zdRGjLeci=nrNc4P2E+`H7a&SU+nzZ>@7b$S`uipC?|?3)@H4~l;_;4HG$VXa6%c8V zFdS$$IpPMh<7=hlW2?Mh8MO-@v0<9Vb@u7ukDQW%_9Iwtpj)}vnB+78MzI#%vH@tmS6AZ@9h8qe^ zkLaR>S`(b_KB_R>Ao_s^>)PvEz~L0@M5jVzV{ACDFOty~(YQ&B392MDb^IcX#GO_7N)biM_ zsGu4AN;D}UN;v1*w{|*UyC3#W!|mDi1u*x8w|lgNWJ{XJ_$yL%RrPfl_5$KC9P5== zH|}eDAD;ZSvWh5d>Q?m2P{s`YI#RnYM0z3k7dtR#5c{aSU^iK+?#zO$Ihj4kYLxg+ z0A?fKjNjnW{44K*8oTW^q=hC{oE)}#AtND(m%aNbnaYk~0g3D{HU8Wq>Xp=KuKf+0 zYau?&@;woq1mgqhIp2Cc{|=hO*w+UEJ=k7dF(0k#<1L?a8&q}<%1V`ibiZZb%Jx64 z+4mE+n@|;C0KjYx&7>a_o07SD=Kx%w`YJ>&^1jZXr}ZI&E0fXJ;{&Db$|WduWx0Hgk}IF~lZ%wH zaI-eDAfkof4a0NM5ktYZ%T&dGk6r&-LRIIFz)Ad4ndzDkd~1iN?Z0-lu{Ckr`1<@@ z(xBUR&vT*f^@&RIFI-zd+$T%kBeqrD<LrT|%7rOsch+8?jz@;a;XK893kF1V%lF3|Fm{tSLx^)sqR zd}FmRHyH$qxNSo1cox91=pP|Hj+oGliy9x66xaNMsw*Il8fJ~X0!fSebwh~*8#mrSKE>-{iCxIdmxTR_)XivX)Idt{<|4Kl+ z)$^6^W20_T$=G5L zU*!RPhrIPnV2HK>WcL`6Yk!hGQ0Sly+Sy{C7LV6hW&_h%F|Z&AB^9oT4@^&RzTMDe z2Mro)UuN@LL5GFWZ(y}vwP`N5NtiGt7Hx3{gJR5|Adp<)++pyWil;+FN6@KQ%gE7;iZFy7P%3nQ(ru?IPYAq6;)g@Byx+y=;s@#+ zfyI7=HLF6U7HCAQ2jUVANI^S==FU*bowj$Gx)m+6gLKrRJ2lF76q<#a(zPCYjDq%4 znn$GTJg+ z_1ymTJ~r@(I9DMAL%fyW2hY}+trg8K)Vnpj`Sq=OIJgMMi^Rq4k&1LpWrl3_PPxMBdBI4!1jrBxSuz8x z4k$DcnE9-jaA)pbx45J*HLOU$VkzcGLb&hk=YPg*#^=<3hL;3^X3klW4YW=DN#lfH z7rtDMkOcLOWOwEW7WKkIpdYhyw~>lU`im@Qa+{jG{p9w8BAQe?8K~d-vGBpJ=m&J1 zPjkdIqK-c)#mj)CHX0J?$c~3>4AC*4ei<&xv3r9Y7AjOBCEwGATcvyHdu(Tmv!@4T zdi#SOLgAM3Ca!lA7-{(zzB?kN(#xe}J!!%Dk;Z9L8LU>#$n+&0Cx2tV&`p(u8-Gtu zP5PE*QIO6-Sxt3)JQbhwsdh=BiAYa9Tjrha{xU_rrw!`3VU0o(TOGcgN_Las+~s9@ z`C{GZpOww|s|b)!=?Vie@o(er!sY}0V?lhxpM)CyB5H zk*p*B*KKspleZf=*Ovzx%AhOZ;n8gx8|W5cr>ndb2-|o^y;f6fbc=P14i8$!0all- z7U6{%AS?#D(TX>K1yCD+A6l7yjS8xUQ~h*2W?8q2_Oa~i3YnTe6VFE+ia-0UYl|ptmS_9A<`*l!f9i*PCHwD0J!0zuSF;OwR?r#A6AmjcA%tl8W z+&HnDVD_dhiu$jus5!urGq_z`~hYJC$Z&R49>;oYi0I z(ujjCoJyyHNL#qXY$3Ro^Z#r*7XT&XP$zYbJB)~G(eG>}nW_-4IPiK(qa45*O(4sy z1S|RdKQgZlcu)(%a<1h+&c6l9|2T-m7@w+I3iOz)wX@;vEzv7cAMAwmK^ zk{2!$gaO`@oSCi4B3h=&+ctuXLhch1ZgiBS=g*P%}H`G6a8dqJg|G$^ezjVGt5uqy_?!1u+>SAOk zo1)unx>{zETW_`Pi)~Tv9ssA5UzCPNL_Xemm))&dIlqLrZ~%x+fE1X|$!_EJC~H4pGjRc`4(E70xeK%lQTd8Ms0!Hog@oEtwJ&(f{NiWAy_q0xg2c}BJx&3AGQSwsaNmXtWo(`2tm}hmSM|WFoyT$r_;r~{0|_zO}|tWm~(V?T3TfEN)4qC*emzG zc?Epl1LLgt^I*H!BKthlaA$6JkX?g2`Bl>6$t%(iUuT*Oa1HrgrrAcUSLA zAbA_PIwm*U;YOj{cw#~8UTz2>O+GMV5B8!be8wW!z>cN?xY}n95$AXM7;9;qD2Y?+ zZqiBkf3@!^SgYa$(5dMWx9v*#(#h%@y9@q-67!cL3^*X9MaAW#r2khVkhef$cs9>= z2Q5xIOg=0{Ok}<{Pb)X%7PCO}gFL0@SsgT$^1pw467f(wuM)wm{NDIcRu@Qs(7{QV zuCbwT4hw@21HGHf++(3lzVZ6!_gIbL9C^W^N)x~wGZP$A>j1X|Uh&}7$y~*|-R<)w zza$HrFHUn3Mv(G+poKBD@yPUox})Ys6gZo03@S>Z9jI`B$#Xv3VB64Xa5g8Pa!70j zPtjtay_IBmm!sr>Psl=$&^Z%ROzXK!LgGezIp?uNKtc~q zIgJaI*$XcdGSN3K&T;GpaG!A>=q<)4TvOAAnqv)S_V+8 zhs`t~vafL=~WGspcM2!EQee;LSfV_qSAAS4&$;wcEL0xWaz69}}G{KsGKgm{_| zGlCy)K}RWWR(R3C2LsmlMs%ug{Ic;IFBf^ru?eUOM?ica8mEI=0&6t8c3xO`j&#aq zpxfL6H)1YBT+(*Ir}69cLrW^`zWkOH5avaFGt~G^k-%W};&!lg6@X>RoYPWc)lKT6 zel|)(E2h-BdW+=EY*}VRiPw||rH_kT+V}YB&}@U>Iq@m`Bsh9VDnC=bw#oqU&U_B&F|{ z1h*mN^07fVo`{A8rAEFnwN^5ay>FE#tT+XHrG6sFP{QX$#b9RBo zK^Ez91C(SHw9eP50`(ZAndid9NeX`Q&qY1Budhxd_f6{g7p~+QzQoK7{=Qo23Lx|H zl`eU*zB2h{(&E22Sk0PLC?QR4|4jfScv1lS9&Oy21PsU&=QdD3UtmR zR;A>NP!QsBsbi{|c>2er@z8(2i>qkbIvHa^GFKsbBx{fn_f{wuG;j0h#%GYEB9YX| zS2apwY@&kg3fwmv(dq)7y%AVZshuXqIoS|SPv#BpJ6UzvSV? z^v9$xG_1T~cdmd&xCY2l&=l9{SxA;<_w7-tO zl(<_sgN6AOGl=R)!&4_u%I z6+r3!TRs^{DIG5bpwd{WXDZ{y+`ew62Hpc-Sfv#l= zK|gl_YBcG~SVi2SoAlnJ?};S6Piz)v3?Q*)1$wv@A={y`s^``iC@_zUZS zO5ybR(zik}QsAI0=VG7NX_4XARjyk4P#+J<^MKs71O!Gfcs|Qzuppb^!I9LEc%~#E z=GegSfm}P;+l;^ks&K6EnY#9yEDUFI`H zDP#y0Fs0Xlvg?Z3&I#ZLIn!k`CeH}yBxF=MRefAnWTy~V5$qLI>a}m1&==DSB{p?+SqMSTB>HQVk~J_F+z}diCLeKi zJO_!bU*-Wg+$(;-{B;QzkzNaL3wFWlFKXHuD1r{g+ccv$x8?i-{XShr_Min`HYa{# zQk!@1Gb>7bk1{rqEYqEQ&obL^g0Qo{JK^S4!py7pEJh`l8T>Ym9P$mYWD@M5Y8OPm z_1Hpr(LYm>@ZJD0L)`edJ3h14WyfnB6?SU zOdnf+?`=ePnzff;y4yw+s`kbEg)dqg0~fi5Ivd8(9_#ObYgF=|MwBjU!4U_Nz3|F9~uiQyw)6{|@0 zvVtYM?b4Tc^W0*8kqRtIUBV|-Q7Wqs9k7woX(19oC#XWekW^@gIVtyJ`WhBKi)*NQ zlrDw8E%74u@CL_|gbN$HgC20^;J zLqNLwEcSl(`<`}?&*&9Ap0w=DUmj4uC9FL*=s zKiqTxm;nnZIE+69c{QbjAi7B2Ue}#3h_tjFAcF5?&XEo|GVueEk8Ff$<=qTiL0mEK zQoZs^b2X@3a-Nxg$0Gcqtmm?xNf`FQirGjWLL#)j*x3=W4}!pae1_!jhN4;u|HHYV z`MIgNU3j*BdksBS0TM3V3Gr%P6Ba!BO9;n(hk+8LP++fG$28 zp=u^)gfgK%QciaJJC%1!S8<+^E-WjttlfoAH!JkHleq1Ia)n~R8r)LR?Ka9@WiaNZ zn}bOEkMx{OlH@l&vP^u+_zggt%vE2~#o!`+)QO6Db`?}&$_DTcxlV2F=r6mAF~ zfAGHS+(ehUxx03W;KoOhr^gQERnY?^Mbs#u+IECzXE?S23hjBG?htfo$(QeF)+ZGF zfrNwdc^)_4O!Cjn%s5o~DC?Zzql0Uk4}8tahJ94YktnK0UZ&6@stFWjB%;Wb@+QQy zL5q|~a|3l;y-T6~)R0%-Z?cnZc#T?i9<}li8V3czuycb#aUcpi6FF%8FRa_e>UWBtX1oqw3e$oH zNtoHDOj%D~%C|HD+;WYXRJE+_GZrLD+JjuEwE4m>*YGxk_X_ErAh`+hYe_KEB<#6) zFEONQNyG%u)!$(zldVk(QyI7b0>KYMoLv8kYyV;ICb}o=3q^{Emqns{nZVALQ!sVt zNFI40PYeBZ@uYg)+%}nXKH?e9R0((oRk_fC=wK-zDuCFH5a}Z`+zJm#Czf{pHIRa5iK<#k z|L(A2l%v9fg8V(S8)0L&(0Id}Nu-y|a3MJim#5U5&v5@pD2Kg^e=hR2Lf>)j_eJYi zl<;%E8=d}k1Gfs^z1y4iYMlD~C^opAD*EbLwGP`z^3UUdMf?p!AI(GpDB^01VZ?pc z-p?(U*5HPU?9YIN22yzWTbk)W7z`who2LVml`ya&cNprYyzQMg`&KtzN?g<{`BdVUWEg8~5=hnBM2 zOP)9&r~4X_@#@KyL7)FcE^5OCz>e0Sy5b#n8btLViGwwSDA%G7!R0wD-|V4Y&v=1@-NX2oy)cq<3#(f8j0UgepCyxdo-k;5g5%Xp*d zI3`R<-F0F#_??cw`PIue5!f$4l$(t3@NXfe(A2S{pV-r)XOoy=rO5q-c`RO{M>w<( zgZoHK!1bd+;W(_BI@f}Um8~b|W7Wp(^6GD65<`$?VTVg&R9C-VS*g43&d9Z}Q@gA@ zrI3!EePvNSbDuna1Gbf&Cc3oPilVJ(3{;ENx#u$;0b$(DbFwk=o5Z<*#EX>pk-#BA zfYBxpn>ikRl|J4sDhCy?MP8y}`4~VL`q#&aG^z^o8212Wk!>uMsm^X;+WO{3Mrj?y zQhZtMVMAGPOs>IsF|(y2R`V$7HG;htZDEqa+U?nwuvrCE(Fteur|ytddk=sc9QcfpRRuTi6nj?C$B3{6dV5) ziC(MZZmf8KVmF$P)Rg8Gam!r&Zk2xUhCqxR1)f@N)8DbY@??{+OjD?3r<;Fr0l(^_ z(fcX$L#~xuv?qEE9uc{_Jf4oHInl17p9E$^u;>JVc>mLC_t(h;b}{am%w*^8JcAo! zmYx}-Cli&y7f99@5`YW*>H}nfOBZZJ>kl@3ns-C?hoP$z^em-JV7d+b9g7mth`!d7 z^a@e4^;xTBz5AcZ6;Ldpl*lyAd4pb&Z9T1f9vywUJGh3 zIx;i~_^1(p1ZAZ*-U$4HgR2EHCTeO9F)ee5%USr)83|ahtY{fdKzJnecC`wM#4?nQ z_3sNNqsxej0IowZ_Jk`YpY9O_1R;Iy<*nV6Yr4eHc1B87nn}0fFJI}SkerLN)*BAu z!CVYcIT@rJwP$hAwkQPfRDe#6$QOD>u$Uvpe{a zHE}-oFtzal<+5sSU1|wGuH7ruPTsX~TB6v+r3$zlFRe^8+fe|y1oZLz>dYYJR;3&L zUxlP%a8Wa!T#>~2yBLWX2+;s!M~w7da>=&)Hgp128-G05?|*xJki5V^{1TV612zWVqZ94Cf{3% z`ez!CwUB`;3Qo4VwJ?u8Afdj{>NJ1uxAW>7*bnRDLmTADH^yT=Vv0dlKXD`M=PvX+ zl3V!*i(4aZp;bYn@H$IT**5VJj|bS>N{k0%qU8Q{l4inC&h8xN3|e~c5fy1foa<(5 z*#W}IsgA1th*~7Jib7mAY9mCX>H0aj#7IX%%h0~gF_S@r1$Afggl;twnJ z1D?^p)y%sDpK`5Yrt4=l;f*I62ECYZYMKzLE)8{VoROiHX|=|TwGa)Z)L?Pwiq+SV zsY&I3HaF9n=bOA1Qi`t9<{0Fn(RAC$N6?Z`S@p3JlB86AZCAkg-_L8whshCy^}n5= zbjl;Mc=C5`xwHZVOHDU?(q_%hju4_arP!0UmaQ?EL zsY+Wx9)CbaWI;c`g4>X-BS|@l>aBRlltB`FMq>PDtX%C9KlNh6G1vN!t8Cl7GpJ1j zlD4_P^H1mx(3TnHQ{P2vyACC+z|V;}>TK%SniI_1Birt?dXbWv(6kr~xpEnTJS1B} z5=@%wKE{tQs1akZwW`o@D4eZx!-U|)43xg%;90j)AE}WIaRXB-0(1+;3qssPAD7&j z`$`v}_j4-s;03~;MX-OoSLHdbPz{S;BroIq3H{HK2~;8vgrCT`V76N&`Nc6gGKBC; zT88kGT_U|S8LwVti*S7*THumCnd70g$RhCyx+H>A{=~cR+!HcKYD!T0-yia@?phd) z{y%@jLgxD+Ow;@5UBkzpga3Zk^wW%vog~`1pz-GyCak9_PMqos$S)VB_+@#*OvhPU z$=ZWr&p}QCNwzZWi^+URZf>S}zFfheq=*+dmZ#DETwbiUShvBHV&DFeef{;<#wzgp zAy9*CG29&GxRFQi{s#Q}iUO600_CGA^Fdg1p1ALo71eS9w+g;N(cXALPTfv%y#a&b z`6s3*5Z6pp7PMI}2Cy(Kx=m3Lla#;hSrJILU5d7w!9rf%BUzX>Hy2Jez##${v#)@a z_!gn{9-PRU0LvrfyjS8}#U$!tCI5~meh~ngaAN!Q@Zsr1NE;KnZZz|18JTtm|ND^{ctMw1%HB=?+y(&1>p^jS#gON?CnGZJE7peq zoI4q=^cNzrA4&@Z4b~`|chUxbXRF`R&EMqgX9z1=0q1qoDyv&Zo(5~dN@wI_Hy$$4 zdo|Pb2e#+_?soHsy_i=6D{kwePx)8y4r)OX&<{WpegY2L#FV4>nJGthx3d)!a*l|Q zWnf)lq!poy8V_m0<6p=d?HmdlHc8a+rL_}#=MyO@4Z_cf;a{R%pHsKA-Btc?{Xhfj z_t+VWZkK}Zwt>C+Y~0!H`Q7zkgS7AC2zGUoO*$3xhV7M8QB7SZMIv^(HelLD9EbY= zuU)bFb|!aw+~Uv16*;emEADA8$GD6ZgJ#_`^WI#JlAE``E>J^X5(yklV;EYuiE+QV zqD^4wPXLHyLBoe|jm98e9PQX8kHcEL*XYLSUEs3)deuNP*SXC+lw}cM;S&6gOOmIc zzvD#(kh|o9(8+vY8c>U-AR3}PrleXvlAv2QH0|W3H9TCo7A=EC?GWr5?kYw#W%jA9 ziZNUb6^I!QGF1u|TN7mtHa@%wb&*}($N)0Pz4b2T&6G}HG@1HY?Z00nGY2X<_J!KK zvyzte=mV(;@`_)B#D3r+3LXRE`ii#^R3V<6Z<~TT@(gcDPC+Vt9?(Z}A6YIg&CHJ5Tm}gKB;t70 z_w>nr)$CCLfMINSQ-qI_X>NeuU%t9BTO-zeakqHKrxKVen>su}gVF2J?C94ZCTa|L zl{LY=Wqcq-B;^JiAX$6$`-TL3^nausO^$MC0ESinGLA8%?yxKBQ5T5IsoVzUY&Brl z|7Y@zdkVGjRlne3F0d2}dpZx7t?IKsSmXtdkw$|-PPJ`ez2Z<&^+JCCq}Kx|TmT9! zAA2+(%FEapJ{vx4433z+I9weE{)xxY+TO~#V`bnMd>RCecVi%tYpm3`eVu2w4FhGj z_tWD7Ks$cET8Z@3%gkw3O9E{nT}Fk=2wsO8oj=73K32Cf1ig)u(xVX7_`x6@2jXjEIZ7 zYdYo_vKVxU4Q){PgSL-DCGf{{R2s%g6jKOtv$@z`eV@~lOnkXvpQD;G-WA;}eClyB ztT3*`{ra`woMW*4jx`8h7<{gx{Z(YS>yhtxtqa*{J6>R2+t5V*Yo*v#s&TO8$tFcQ z<5CMP6>zGW2XU!>7iHV&;;c4{Jk7G7zlvEB0{zJ10p|^#Z=YKOm3|fg7jl3aIUO!` z^s7B1Pi9t_>jDt_BM<@BIs*@5a_jyGKCSJ^lAKCXx_CdpCRX=pxHlv8WVh*7m^%!x zLzcnLWA7ju^}j3t^BCY$ab640i}{XGL~6!89 zJ|LxAF?Lt#6UL6X3zmdbl=p;E*9f@Dt$Q~4c>ai@vj0UNH3<{|6^gI*Yu(rh-tz$C zbR{rA0`7=sL%lGyJ=8aAQG>_!c@I9sZF}sW2_(*q*WPD;hfl5ZDsRt14XA*vfX1y! zL67wb$H=!XmvASw?7myZn_J-hw3(^c3d9hgx7$K;S z!C%IRmZAs7Vp@VGM9N;B>sxJ*vabbrC_*WaT1qu8cK&&S&Py!iEWTJRUD^gTi`f8W zD7)Rv`zx92H~oA~Sq~m{e$y^&;~VjdwAA=613u6~*xGV)GRh8sEGr-YxzDd&*iLf{ z#g01%je=|t1n@&zcb&c+Z}4<+(yDV@24{_x`pTOFeQzAYP*U|G3*R4q3Iv#_fJ~58y@QF4!<0^fW_fS!xv7>Y zUXVgu<}7#7Mv07uYN%4NB$yyjnZ(%EX43f~zfc#&<%5tPEUIDyPQ`)thNR^zfP$cPL`kj(RM2QTVy)y=4$8C>sE=gO;m%m)Fj4!h8X{X2Nzrm7*=hz$MS5Z zf36>?8Wq<}nvLQ?Pw*LeyPIx~l)frFh=Py%zzerYmwSHrPdoQwEJ02cg4hi3tUGAK{8+UE-B%))O+T*NfhoTI9B`Hz(0LUReWtKhSrvwmb+zuz zdshtkBtzxEg^FET^t%FM$$2E%!vlEK0wjglWgl?i=AU83nZ6E}H2Ywt&ukP+szB0K$cFp)bIh}z}pr6P@Hz}<{BI=GfiyEu6v{cVv% zscEv?#0ajm@gL)rDc=x4$!hSutTAl55h;>7YP!h&vv5lj^u8&qvR0Y@GC%_4f4~nd z|4zMiHtxr>b0cmG6K*VL0DHnnEIH>X!S zfx;wt-#@*$m&)w5LNLg(>6E64nmpwVJ-iM&Lq?6i>R~l?y({VQ@B(6nN)}X?qPolk zjp8{C%HtL`#kj5yX1VvJE-@C8!20>48B?J&0%;|Lj6E_G#n^8~y1~e6HCmpsyXslk z*Y0}v{V&3nsL{(^@{Z!I=K%E2f9`b>UNPSn=e<{}pEy1cv5g_asG#Q}9i3oM>->_! z%72I!Zf6uC5SG+%#hSihB7{v)li4q1Bq^z*+jKL^I7h`}@>xSAm|+JGj*p2Wxqt>m zp6{r61$@h1yOoo2;XS+)aua57&cahG1`r=e22JN%Wg}#WGP||-`-!f@tHQXwB!=aS zVr(dho?D;kpS2$U3i;JYb-$WJ+I+m*LZUS~dzDPzLI$f}m3CC*E}4}9HM-@3r+Sz8 zitU89QIpr5SUZlsv{h=8!B&guQ#1jmph1@ZO@C|QEdcr~ z!kE#j`19syIC3H+$w&+6taLfwKUsyo6R>*(%j*aRUGtG?-T2=Gpzo$0zsGuq4?k!f zIP))CZ5+0FZroAxtRQKmvVGBz+RQ6Y@02v4ADndXew1w-3qRYhrgMv-Z;D7_`$0Q(daZhftYjv-%TKV%K70(NnQae~7@mPnhO2e1t!|RV zGt2Cq)u0J{#_0m#+M_d2-O8%2@t;gS>j}txFZ?eK2*Nx!{bs9nO`O|ZnV+!;bm)TP zMmJDpL&w5`rID^dc@7QBWdEl^{#y_}A$#!98yP=$Dd+tg=A~*hgr-M%Lbke~0-eAn zdE%HfK|Q?;wS{*7MZE!2jd8|hTU@OP)P=>xSr@3ym}~MTl%0rX_@J+F{H!XIeEsLBZRj(xbw7{OTr+4DBx&6}Sf-0OW0ZDcBIY@~ zIeBF=Ah&(k!@5e`M#e|LQ!NV(PBR8`Z}zwCf|89$qghmXQBl7ehsHe;F$JS|K?g&z zy=sv*$`Sg~zf)!Lip6N;9F>yg*|eFpS%U=9bYfm(p0wLRr)cTAVBC;1YOfDJwE_}} zP1(h?ekKq+QVd3i;{FAr)~8+DtKrP8ExTu3SsJtnj=mYoFNP~${_f4VC;Xn4z*JkqU(db zf!RVW@c;assnoFj>BGW)k6@vP@qFW9kx>4LU8Z6C3n7rU9dj9)F6M2rSighLgwqNq z82nhWZiVs%f~2`Nn^!#kWP&gVjC+G`!eA>i3QOzNfR6F z^#e#C(kpz&0#iAb@#_`5lnvH~F@Wom%#oFat%i&}xz9xIC z3{^Y~Lvi`-kNVBZyW_QyOk-Q+yclT%jPLaQsw%?UFiB|d?S{vXmxI>G4!*-PHnUYl zq~V3mH_r$rU)?ir7-ZqfUj)@|e-~?%4G^~)qNf#x)0G*S^uLN{adj+>!a&?Q!e1ep ze7%2P1tQUvKdnfPqR_8-4+mFj8NNw;$5oW5(;m*J47j(T#MY5#Qq~)~s0Y1nL}kvx zVQJoxt>MRsmw)1sA_R_T$A32Gu05M!T};Nls@Vi65ZjE171~9=31f-X`87L#PROCu z(w8LO0%b3pQc8hCsp0JZK@4V*NHZLWebIA+$3@6QfCXd2R<rjjKcI8Q zoXddx{yFmN`z0TvD;W3GPxAty8veoIXLY(Fe>&K*lVFi;W*NTMReDd(N`#)`UI2nX zqy$#m8kMDp7e9#WXYgm*lH)^9qVO^%3zWB{gQfgZrTL$GtHUFG32KQ<^;mVBZ55oqyou^G#Z^^SDJ%Cr-d-NdC^-LXBdAA zfINkw(+S;!%1F(bZTM7lnrqPlXny`tUtedHNz+~wNT+R05KHc$cTo@(UJOw=@Zm>O z#+(SSsI6g?+ikF&x0(i*`rnKFz&|7J{l*{J=ecK0Z~U16y=GRGns4-MZ=SUWtQ61d zOV`*0k%|bsV@lTIAuR5OB`e}@cju!Ln8IW-#xLfFRNIB0WfvhHQ^qrWmjb1y5kGpm z^*WzdIIsVeM4fyKUy0&|f0O?emYNB4Z?YmcDTOKS&GcnXvZ{1a=D)?+n+^S85=H-H z))mSjywNA)PY?}dWi4|8ZOLHH>+F3#;^BU7%(o+32pL#_>$7?Ydd8(bTn0ZA>Z%D!ap;w@!rfV%?4|zbj7CP zbKqM1lc)ZM>wZ`#&}asxVZsVR9y!Za!s?|oOy=v}!g~Vqf&$G28$jiv7nzt(3IU;$ zGqE4GqYWk#s8kNqdqT9T-Tt{J?3uWY5DpePX~Ag0ws5nM$Yb0)79&JP zy8qp!HuX^(qLmzn=6Q!PJhO*&rl!jPOG7yMOnP`}y;`(S^1$hEIqbHR4xdulfH!6y zf<*4u=VD*G{zXT(JshMLdGXM-_Q19t{IcJSE@A>5VIT0JfAOtA%%C!syZGm(j%gd| zArLSZ!e(*PlDndh8OZK}ZcGRlCeEfvCP!gUX453MEU@L%WpEd@ zT|0bGrEJ#l#ZBtcVuy+!?0q+$JB_H;gN)26qi?3=W|f#bTzEHo%2l-5pU{~rwC!UZ z{gCvMVcmP{U1GnGLhili7CoQjsL`0Bx@tWDdYMHdG=j^L{E_KmW9`XR=)F)E(9MKG zONZpMo+ghl4Q-EYaoUlRAa+_k(H1ku+pjUowSI8Lj}`>2lG~A@W-bj}v*AmySb(e3 zT$tZv1R~k=Em{eYkUT~hID5Nd$X5mi!F9>@C(iihJG}$@4#C$84W4FuwfyVSn%yom zXO#O5i#l&Pbx~I$TS&CCCKg$>b#am|Al9S$|6Oq{onZmf5m?2-ZODQEEYcG{5TdOE zSniQiv)!z~t|NaaGP9rK^ETDg7`tp~#IPf5GS%<7lxjMqgdeq(UV&NkMCcRx$&FXJ z4fDq3mV=}ksBVtIG+k|L%l?7R(el5Qee(&UEmC3lzwZ@Xia~dCL5ld~xF2)V7uZaM169mT zmszjM-p>bBHH7&_;~?;ELZ9ZbP1YHrLHuF~MecdBm<;tX?K{$CuHRWM=TbGLFVYG8wBWM@=V?16bk6wM0*~If+Q>OFIA2u^dYl zlRcmcnM6~j8F(X%+$}WnSlo9|DEMHwqnk4a29OyM!ccB}<@ea0(IoI^(3!5moMd|j9X37EEm5%n|}g%Q!Y@-_0@Dpg|=+R z!~ULm4FFyYr$Qe168ip;C5cA&W}=opY38Fv`jYhWiy+0hv1up}f*L)__a3Fid&oL9VXjkdDT=`Y3>bB{O&0Li`BX zPH0iL<1{#3WKeAqe2ttOGZn5o6Xi`!(mx)-~IwZ-wmQ|!< z?K2wOLP9RIY!Q%LOqgnTf$Y8Y!v-_4wx4a7poN;asOgyV=G95)G{lGL zqZ%YBsnXM4kMkYi34LW8nDqY7$+|_a2JAimRChOoyfJPTX;sWYt77@j?_Ii~S8MRV zwCkOD(HClA?QhuHMrq@;(s9UK7V{vk_i;{-n<~`xpfldY5Y~TL0UayQmY87{Xp*C6 zh9uhJ%-#REO&u@AjEKRh0E-Y6xxm;IBS?;Zi!oqZ=* zXn~(+;=Fq`wTxWKP7;f9{tx;22@R5L9^3NZbO7XTP1@)qZ$FI4mFP&FLen^tJ=z}!<{lfa? ztKLRZ=94-)0~YCPfM4brw~MP6=~aI22F3+V5Uq(5LwVZh-G5fERkK(R_`dJr=>;*L zcLq4NKTM&L26`}l+<1g4kXO{g@?B_V?O&MAh`>?wB+DY&eU8U!@{oikskLLC{2RQ(!rfg@qNm&3H+z5mH@ zX=#u2y8e%snHjv;_uJLQY8ePT|E>vgJk@eurK$t{f;zZEM7bWR1Ah_;`*>c(Xf;T_ zwc(BW579WX!XSg52mF}ev$EH;9x=!RV0_La(x=RWzx?OxT=mB5D;YujpL4F@g4L73 z(mddV$m~tzZeB2MkOtAJvc4C~NT>aRSSc02lk<V=2r`bObE1@e5n4vy+0e<8~ z*(~^8wDYSM_cD&wdWt9#HGwJhHeVxWz-F~;G1;fKMHIv%%jcVPz9dwoUG%?i1P-jy zmqm63=ex74yIF>5R3V`KV_Xha}P#@b5gqUkY1W6Q*o+KD+SWX{!Fkp4_F1#w` z@CG)o9;XdJrm3a+3h3j|T*aA%_exA2Z}@Hz2lVY{NCk|}H~&4c4N~DZPw2XLs~i?p&t0YF_pQeOMZ#aq zI}WJO6z4`*-TmD)@dim$l1v!xR@p^aMQ0CvU&y4#T=RVZEywg&&W8 z)uJXF9;=>im*hTVM9sL~Tr2{2lKp!$f6+DvOMzGS zYOR4+JY)cRqyUas6;}{+7Q}Zpf9g}G2OJD*Q-5ZZB2$6+6Ly?^6iMv zus)E~6VRHM8>e8=#4 z;P=Kg#Hw+7X#RMCzm`SRS04;)@+YgmZwDUl=cICx5x8^(dhXZltoSK}Pzpx9oj}Y~ zqOfSP?1mGxpL(weirt%DwTqTFlOi`m5A3?!GsGVEa+?Tz|DU{p3e^J{7(rzW;lStn z8kF9Xp`)UQS*igK)>vk&cs@n!Yfz#%e1#bIAJ=zqb^PA`uC0Pv$Pwr*c;)?T1}CMH zPCvP|2BFyuXE>~Xi&eRkfmMR%R?U6xA2q~B;3Ts)9=5&=;54{UjgYzQI%G6g3)6RK zd`-R&p0xtN)v3DfFQ~vJnI*lE{Aq96Jm|ye{Soxa1x*(#SRf$2*=UO?nHL^(GMPR^sPn z~rqP4Ls{se88q<4)7LH!i)m+ewsxrJsL%AH3Lt$_BG{|tO^1q1yp28 zs6wJHlW{Z?;P}e}47zRllpdN)kmC~AxG;GoeSTqOf+6j9jT8djQs8%crB@m!@lmMn zt;Fgf$VMji!T&I{`8ik(?P!#4FQf6G3wa0l?%}#pQ$n^AmO-vy@;kYxm-z`1xuP=0 zz~<>@p9Bg4)Q~8ZE)GZo*PG{gfY5tMDPg+s=2bDExVVA{QRSdfpzYuXrGSqNoCcEz z=IV6R8P+O3@SDRA-z?B@0HzhnTXXkAnVKPN0yx3g;>Dy2TV=o*KhY`$Z zW=<;w75Kveryq{8gD31v+-Atf(%Sv}@)jOaao!QJT84@*YDOa^*&}3WcXM;hZT9XA zP_?86S)-oO|L|H45VrE|lrn6n1BFD0uEMWiM2t?v7#<8W{#hX(w^ zvE%&!D6$R_0C~ySxCp;zi{Cu61iNC*Nwr7=a7Gi_!;SYW4U#G=fTqL-@OIDc1nsWg zBn*H1dhwEn*oN98n)`@N_vh!rU6xeK?|Jm6#tU&dx^%aNXxYUq+$H=tf5~k{-1i96 z|ALdK0zb2=aCOi9QJWsjy*a*SvpII(U0l^-Eq^hgcCAM z-x9abz+9xL$(K7_^Y8uX<2(GNRa_0gY+owOJkaIiKtTh{8O}2lD`v(Ehhn^dr}S(B zowJPv&o$kk(E`{rB(6JM7ok!btZ$oqVm9PqIuB%||1a|iNDN-`n|&Trank5$@Qk1< zBwpX&iDZ5q49y^sX<JqBgj<(YccUJ{R z$T(epW2AvFzq+l={K-Qp1}YxKGtC)fjq^6F1i3Uc)1L0QKRo7*Yq}4)qfdii~VkoG2`}-xxrFwf$k@ zOS&WZ@LPn5&+X}?7j|F5y%lMzq&y>B(QuW3rPO-V;#2=;HjR)n-?cae*)(z$|BF_@ zP3nF7r%sypHI8eeU*a?Cdr;L`iIiVVs}ShfdQqfXUe(8ost9&F0+l3moQ02kR;^K# z{4W`lAH)!F&uGtx_h)#nAA7KbsD`YNk3OP z=W7>esh|<1f;jo%B^O%R8pjegaGbbKm96B=LnZP=q_N4>H$-=6zP&ghQ9d+J{4pfm z8}V)WT%rmDPO{HH`Bo!XArJ4Vyol`jadk*}~Jr<2dLSns8) zbgYaoEC-VC__e+SplVI+-dR&Xb>`IZ=`EdMIN)tf=Ou4xErC0wLG!s5%2%5BH;E>2{^b*%m?KfftLi5!aHRaR+>y9}3)Y*T>}8gVs3TO-mz zrdtZhl$jf<04yAQR5=s}6fvct7gp;nLm`ZCGT2mmm_M_H8n_0I1$}H$71`cfi}={3 z0$cIh;;^eb0JSByMF=O8Af$Fnge+gmbN9W%5ST{C*k5qYsxSIhuOt_OFO?I*%v8uj zFjOJcPp6VAaR(Jb;D!2W|va!65Ikx(*gz z3ULpFROju^x(l^Y_L^@}xdf0;^#fE+{1ke@(o43WBT0l`SY1AyntzAm4#V z?KCHrv64y(n*|OVwI02DwH=z4Eg!8dON2u7z4`7ri)oem4@=)zJNTr18=vE!AqMr- z7#eSQ%4(Kw=@Jzyn}up2C_73Ki5({8g-|JHVDpp=;hS*sz(bYy=4#gjz6slm98>KI zL1x)s8;)66a7RS2A5%!JW+}$cPlqz)->per{auU`Iq*%927oX$=RZy$bIX9v!Xs4U>=HabHU8BtL|`BJg66O+B%{P2g1)5!6Ly>&Tqb>jx8Dm%Xm{I5gR+*VOaCZUTi(lD0^`Bz6J-x`i`G;&S8r97DRjZAd%s{a;?c@XkDvl-%w9H z*dJX6xtmxn9D&ZwC4@J(EsPOW8`w=)>FO@Zt-!-}7Bc)MgYxz13+Y@n&${-5#K2ex z;@d+97pfq_SPJ@&q4zHTaN+VQU3ne|6lzT|DCQ}XMqx$7&|qE6!cNybnWKipTODwO zfPPcORwycQXUPY+2D&nb&7N%?bfd`4(q+?tY2#IO%joBbRLTkgY_!!*--8CsvrQG+ z)-~GWJ1k!tkpN32n=AdC5Kg zaN7xn>R7IGgJ+X_Wt&Yg*)-}G(-M9M@4IvQDsBAEb(+_w3(-vkm+*-(byIAz*s9nM?YXt_< zTBpPW{O&xXqi`l1q4#esL_zbTZd=>!VX$XSh2b*PBF_a7=9K|e8GYOpPEQ{=1Ub~6QO(e z5yrS7GeSdAgoDaaJ@1yZDj2v4#Q;bQ#R; zkDhJ}`dG~FzBSoAxRdbd)h-wfn`E>xRuw~@G^VhJp1fdDq}g|TlE3=~ZF3v8GV z1mlzaC6oY}YR}Hd{jJuG)I);RDn3XPC~FyjJRXjjyMtgsk-5J&?PeMMD1m}A!Z6)K zh7z9rfuuV)DVG8BH>V8>aaG44Y8j_g6vN{$G;V$TAd2P_;_cUGJjc>PLFJlHM#%ZC z=bGcXdOnNP0ba@i$uLqE&qxE;L zK+>EaFfrEFHw--+DweD{*{zP8SG#CBo5h{5?0vT8PyLnnCA?Tjrz?s$-LMp0S-EdY z+lu!A=U~NOeDhKN`j=-e_e~6Ug|&{Lex(hYZSwW4#ZjaiCv1!H);WXN52R-EM)hZ&vb#+SWy$_(k9m%3 zM8;+CBx<%LknRH=(oJQHNwobp# zyH@uYC{S|v%|>tGdK@i6J@KbZ?1R2AG$p59MA`At9;Ak}5aQ7GeImy*giCOt(nJBZ z_A%7nAehS5!QhvJ$iaSa0PVeDp`hR4`$=mM0UN~djpQO(D%{$tBi#5IZC~Rx88Qwl z`*JSnB5TweYI7zJTsgmPGCRowA0Wr?@$Rx~TN@DkR@sumHOIg6BxW6mINf-RGwy{t zF*nbh9>XKox~-iNN^O?#C>E#Vi0S=9_+V1|}@I^(KSVTqz)D2W#VhW1xNP#L?<;vc3f~JM-%e@ z82$LzS$TpvRgPw!K#3}s+v0+F9n%;J*1=PVNV2Mo5A*5YX^rAb(`~FO*X@S<$|qjd z67=Kdf(bRSi23^F)s9U@YWcrws}AWv;Jb1jP}6*WR}Xzu8~Tsj&^n9w)XFp3o_3?9 zY6<09Lyz`k=l4EHKf-|k zhblDEr@^}Chv~r)g!^JQEXyvD{ADUh6F*O2W24M z0rrE2S)o(wqzN-AdQ1>Guih+Zqj))2e;whrBshTO;(k46=e|i|Gp9!DJCi2m`&B`x zr^<}r1r${WPj{579xT}*Uim~5>Xxq>tb`dxNnrpdb3CN~IAkkXw(|JLp)G&Bc@Lu_ z0|6-X_z%CRM-VC7Nm+ym=WK;v5@4U4?asO$6!ywK1PvFL->R| zplLe;vxWnla$5g}68%t1+>~KK486aip{YrSd z$y%&PbmI+r-NFO#IoV{4#)00;&Jcfy`q}bLm#b*q@zV2ecL!}rH$&1Wc-6edZL8A5 zQI|_Mw_^{3?VyiT%ZDD5+(V_5#x5(z`WifbEoD)OT>+=t8=YGB1*hT#@K*5vsbiC~sv4kV_ql0MA6XGOSarGw#_z3+ID3?mv1?E06-?^k zB>lgPw30_s^vRUZIoC4n0Sf+);FtGtaDKC2#kc!<%|bO+O%E8F0;wd?Agd9C-7{XzKEaN>Eb_ik0v^{-=E&DEcOeioeIQ$^w|ZaJd+z!1Lzg$l$k4xd@)kCUV})rK_*rB|J7Owf-{EB*$~F z>QxLRnlX8*Za=Ta5Roa*ZOOcOnBT<_H%mQv>ecfAW#hM+sznn=JZFHP9$Bx8k8{@G z#(|%!omJ5~n4yamG+MVmY~RX9+^l8w)%o3XJ$OT6ltrS5AKzqLmml9H{$x=tiJhLi z53fBw%dpl=EXx4C@3SwDfdi1Cg@x=MKvU8THpgZEv<#3`Qy$T-0Y(X8#^E4<^u0EI_4`O;SWj&aPBO(DHs#sh!X5L*zN`v9M32 zu2g^LY*@tq!_`}dRh{nN--;k0(gM;cA>9oUf=EescgLn1M39z}Zlt@ryBq25?uOsZ zoH^&YzRy2%U86I^-k*KPd#&|aGH1RF0euCM=^XIr%$oT+k?$Mz&z>dd|Al(~lWC3k z0A<-M*IVX&Vm$GOq~ImVz*a#CIl=c%a>Sq)QhwN4zj&wAv`akyfB)P6`B!x(XzPw4 zomW6JUb$J~Wdk+P+t|^=HG0+&M?yia5=aPz*?ql& zMQ5)&ykNaSzF{rD#VG*ywZO(-NJFY8K-h*hA<{wOfpo)@iZ6&d%cEo!zemiVC^InJrCNit0c>w%iGtiBrR_ z_rtw7lbZ_`fqT%s!Y=7dNt=nbcQy7H-<5CEvTd5{#CH99*ZUQ0jx;}hF=&7M0K<3) zZi|ynSbZ`Rz=P~?y7O<$Hu3HomJR+HV9Uya>>^_HhBHD&At0?BIa3c?3D|T_NI&wr zHWh!Ar#zy_^GNA@Y8&Y~g3h%SrE2Pyw^@_ys@y~8N141F)5PY8PnKc7n?X~ z*xk6`*@Z%<5^&qwiC)}ut}#ps<9n;*P;+cNCwL$6At)aVF#Y;vC0;y)9vSx|s~JZj zw?!wshfK@Us>f|Q%RbO5A0z!EzJZF;;{x3Z3-y2H>g1c-02GTEk5d+#)d{A*Dz3eZT$IL(Oi5WXKAkJo^IzpZpOBL_L7dK?U|*s+my_t> zSm_Jh3twG1MNp{$#tkwOsD|#*)C#BsVdxn{i+_JzklcRxif`f(zwB#gS1@)V65iip zpb~w!n#^xbCve?ovb%>Tr0IN3DNfjVgcVrrC+}|% z*0DWNtU=3&_yW*2ch7g^cCVJ6z9n(lPsI{guf`(mmjFc}hn4@7+9Bk*sh98G+UqPW z^eF_?U=7Z51-V=va&=iGXHfX>es$1&7k{X$Z;Z zykaH7h-9t;t!B*)z%)NtY*Yi4rP348AdMS@)L;06ELWe$yA=dU!`JcpGQ)O3#H=%3k~aFM>`QJLE|+g( zZ-nFP!)N^h5YBAd{X%x1gU2)FTgC&haanvIh>Imp#6ZJ=zBaka3UaOB{&ETCEA3pjg1|2amd>=Yo6VGKINO$#y)>oW( zZG0kzbCZ126`T$xB-st74sN9ZT3igUiWO~UQESp2v;b~gWiK`e;1?A5A8TaIi0Kq1 z<5^?@oz}Lj{$grB4=4d2h5X(!9rc1j%upTa{s8%^NRyukyX&mg??q#iv%cEb@_oI{ z(HwWl1yz6ImT6OF{Ci)(5+MAlgPQKgag)={MxIN9W^vdJUvB@S)RYhlH$dUw=PO+7 zO^?SXh4pfk8Vy^5x9UoHPZ%VJFStBXf(t=nvyI08rmToQ9Y21z_l z@zh+~w=C9r%D)A?=p-X`fMXG4mSgt4=}llyVIcM`kDPAysJ#yK z&R%~?n^Z9P7msseAzaWHLC03E=IskR%* zXsz&Xeba#NTpmF-l(Q`p*iAk{zX1ofah<2HyeJ%>)`4FH{EjuUvsGZkHJG^d&Jf>k zSP~Ab{Wz(IH5sXfP@eJt7&;cbXhLgk^5+d!lU(|0J(1NSk13F~Cczr(tK+DL{0ijo z9rHVbV!k*}Ys6?WqcT1sH6*)EPUj=%k{J*wt5qLjvAUca-)s}I7^hC)zMSm|C;cD= zBu62xlaAbvO(NFwy`iTCDk%ZhB>?_`lfVt_f33?3vaqPJ#QVyr-B+O5ndIgcpF%iv zSe=OX%(v7Iv>o@|$Md>Y#q!!c3*Y!10z{B;N%nwWwq3z-yJ7rnW?gpz7G0B{5V(>0 zl>JUTKmr$^|J}-Fk@W@Apzb_z{Y|K<0ysb-YemI>CN+J4D0zOL&fN-8-T~ zs+5YaSoIR5!sZwOR9uDR%X3$tF!)Carl66mRnv_8At(j<*tSG0p~h42H-u zT{i&ZF$vtv29jj-v#1i92C#ry=mJUYh8fbVWNw#YH{Nf zd@^o!pQa6=%eDY&Ch4z49_KQ+8Wi$%AgBAPWYONQx)E3awse!yvLtpezOO8bh4X1I z#f3|~c-P9aK>w>Cec*FI>XWc)SmP#=GtopQ5=j1qv$_L0D-WDQNj@?`JkkcX7;y5+J0Ybg)pbbPTfMj2l|h5B+zfbFWlZ_pWf0n^~$?GTH=8^9d{F@>AlZaTJJ zv9A76E>hqd(a?uOe9uBX4}@#3!H#0|P49&}ZEwkET>aRe zeloG?B9qVMUI_qcYAhMFuiC@-4BM7WE;s85y+WXupHgMi>m+94rdj7S8dAR*mLgK6 z*%v+y=K|?cS`=)W9UTvVTc?Vrw`7IHiuockBm!@V?B^iNNr3-ps|iQpHsRAo&3eD8 zS7S2HzVSOrIWe9O6hD&`q$BN*DCR3_B=I=Qt`gaK!KWQExcVJ7R#!ZJ8!CXIA>^3l z(Nh_Lp878fpv#RW{xev|qmAJ*Mi`hll6Y>yrYf8mf8PuNuMsN*A0^ddq3$z67S5Y- zs|s@x-YdDKFYd;(Wp+EVGtnCaY41Ue#*{j=r7yAd?RsSCE{J9aRnGM@SD=mf8IPo< zO^Mtl5$&u2{Jtr%x<*n~vV#*2f5Cfxoiw$uu%igfi{of$$@m0iMAZ9;h zwn5+CI~)@@f{}P1lpO4r@z6Do`wMs3#;;I2Liv!?l#0};n)2be?Y2wUi1WyyQURQQ z0pbz)I&iNBpj(7!-`4XfdPZvH%6_;(&C{sQGM=0$Pyv})IQ807(ZbOwu~o(`lv5rs z{WhBRO(c>3+wAE{^gXrNxLb$3^Y$h2+k?L+v?@ZV>NUO6Gl$ok@y^Sv)m^~zU2&Tw ziBOOPZcJnDobt2!Q%&FY>@)4#qO3efI}I8lJS$C$IRq`CDS~IT{}J6w>6nI4@XDk} zY7F0!#o1MT|*0Td-PP4i=2YwyP~mwi^p9_STJs59{S5lhafJO;rF_CHHWg6TCs z&6Y$ywheQSWBr*u)sfp?I9>QUJvq zm-AMZT-0MYp0yu!3I!H|1ud54?kf=cU59CdZQN(ZY^h0z$mcPr_G<{vqynH5+do)Y zJ6tS0LI-DjMFR)6{pT2~9wx>iXZ_UZ#B{FwPlQ-Op=IP!?n82DbGRFy+)v(f6M#g6 zr-L}$iXlh;9)mKiBw}FbQODdXBgH1-8{}nH51d)f>ouR+NYd^?sYxV4zI8Y=ly{8e zH~i+x!Pq~st$?2`u(KgvBHd)p2WBZ&ztn^A!TSarpJI$3Q&nuvjX(W*Po3VRL$QQ1 zEAA#iy3OjFg$RY~aQ-j^P@P8Vab@ifm{hXD6;i-z>narm>fPjgGoOR#+5EhqZM19( zCZVqe9Z=fcM3sd&;=gr=Jrz!;rl*^%hVEsXJGwy*j4U5OXtm8ZgVQ@V=7iAi_s=x& z#MRih!FfokdbD@`**7byRh%^&PzwmtPLdn&f!E>Q%y`zkagyN(Vc)_Ta}cZRd7L)c z$ffX=-cirH>>C|wPkqIPvs>_e0WYATPFduRn@htF>B=PJbE~P1S_1lQzaNyLfpJA0 zd|@^?27`!dGBL{$h^Pt&P4FMO0fXzZ8?kj9-A~T`dk`Nz`%y>R{jX^|f(B|+h!8@= z4#U^+gbsOwe)!12uA|w^VDI;Bj@>tplaJYF=Wl55PlYnu5 zD$9@x3*>I1e0wm1ez)k`uD$4NM10YKF8wnVIA3a2KgmiSmh0E5YH-y8M`5tN0*r3AWR19 zS`lo#^7#*9x7_!FJut*QkTsJc#>h9HM=J(-ktH>+m$&DttAj0|>9st0wJH(?ZvAN@GX<9vzez?R=Wy*lumPv zVK+z;+%pwXE4WCRg-v21rh>I3FpJcqW`K)O>WlHIO&D{V1pfU%5=el5VQV!Y@(rVo za}b%U299aNA?V^iS+thkNmN0VSTAij0w?%a#{OCdebOaqnHy~=*IkScP)Q%0`=2de z?7eqq5LOV*9b{Nx%;Au+KK`20E4=F@SLYXw-@5eiWN+`3LsBu+pwROE0*}>qu zpr^g7Z}Ncadb!md3tj7IKB}A66bneEu+J54s5%}QT5+Y`N6DHI#EeGxHHhyNSskuIp-_`&}I$_Cp)^M!G z6Z<=gl)cVuYMB9H^wRa?33IbhH0nXkn`nP-bD(0*`uABdHNTA)4)RW?Iwnj&yNN40p(!>RieTdBfS4W@74%+-W}qS@UrIIOo)MftyJ zAlV1#zZhDLF44v%sY6J50U`)e95B|kCR%|0RD zteC$`1FE3TRL71i91^*7@@go-362&<9FxJ6hpfv^WI`aE0|%yGxJ`mWcrVle;j731 z0NkSUA?HhUJ$I6x?F{%}IE~c`LpZ7(rs^sfv2-gk?te}Q72y4fe*!7*2-B16x)=ys zKQLeLK%_2g7T;#@*lnZG`nrCDHh^2&MuvP;7S#TsC->giEy&9VoPE+Zp|UTRrJ+|a z$O$=&L?T-%&hV`5USxSE;{DJ<#EU0EfmV}}K@c6Spp_@^G#O;^ErXNP$>Fwl5g#t! z6Z#y&H86aS6m3$m_EDbTkl`LSt@h$Mlq$ex1~*+`1?Oe+2j%MP(`MCLM_yhJCb{wEd>p>ajLYem=%Mw_*H};rE{Sb=8$0r_0&kCdvJ>9>7}7Mr;C(+IZ-P zS3}gpii0;gO7EI6I7Izwx|;_cA7DGk+5&Ru1t%ALiKD#Xg-4(*N5V<{E1&$xL#46d zCp@+Dz@W6)vCY*uJM5+3Yr~-N)MNY?#7-Zzni}r6uxCFf&aY%K8|e;&OVrfUlk#4v zEq!x0?pvMKczw_o@F82J(hLjbqt`IdniDfB#Pk@1+Wpqj=PJ!eMfuNy*Ia?NxQ>^R zfTAqJ=fjQHx+=F(Sj}a+Ise2V_OsT@@Hr+Z>|`*@sonBIG~+wN?TjRY!ail$p4GsL z_3mt%kSIlbeY0e0N>v;mgUJ9rjxNXYVS)6nPi!QE8^dH8r95BSzypfe++ej(SF0&>nLsIpQ(Tq=ZA3p1d(2!No1eJP4q7P08o_b{nF{{) zx2BYBqrN-(uJ(?LX5zt=|R9u|QtmCp)DVz#-~<*jKv7W5Fcyld+NEIt+Ibz=iYiP_B{|(0>@p5ZcUYpTGeh~>I9Cwfg|S&kN?7i596{Sx4&vR^! zO~LilMKwIXs+rv?ORwJX(OtEvuUZy73u zD7}PKe0jDabFj$2!HK^`Wph3yV~oV%f0RStzfaeR;dOOT*%E5hT4nZ=B-VuZGnEMqi5H_801J%9v^zO9PFE<=LHC@!7UN597Zo;VEA=RUgM==Ys(&dT*=9EK!e z2gbqs%}gw*lCN*xSu8{fCS~Lb32G1K@;qBF@yA$WeRmk)QIsUpGe z3CcJ^laqjy4}xM}dbYr1NC`B|z}@VFZBG#xxs{HZmq^zkxUm%|$IM9WAuHCjCi6An zy8w<=)4j#YGh0XDu)*=!zc%_?6pAvb`rc@U|LXO*aX{6)>rvw6Mu%jCIfr~YH zCp8=wqRAM`qMzrmLnPKS2D9{2o&XvP>-)bKJAxW2f!#_5aJd?_2gjFiKK|g_AVPbt z##mniI{!G%yP3&x=@{-UXTSx!kb2nAopT=&!u_?5QZur~(EBo?2V=I{%Cn5?&)&bR z*e00-F8eZo?2A3R4!|fC`2PA$T4h4IMG$vkqm&EWNHdsSi@RWyvwp>dOT0M>RxG_4 z=E9f4Y&5~u`S%UA5m{!URe<{o-_05H6CR!W>C2C5<58w{y=MDUTzY-tlehXx@G3+3 z#Zxi}m~I&yeHdWO!UVP_x08W~whvcr6MN3;X35Gox4F_aHro^yx}7I&?R(C;lhnkg z8vjjOmYtY_s9{_^GEnS-3j;nk;i&CE5PSKmVuKa>u0X$Gb8)C}65uFz} zm}*4$l4?Fdhzgq+uV;7kc4)yI($Ix;+w>s06rjw~VuScL8i_rN_)Y-Y=lnT=_X-I16TmiqAsXMJLwTIg+)f zBUQnwwac>b}m3)Yxagb__7gJGXxn zz{_CMw+vOzmJED@l5N;q+UAERDZb9Y4D`K{Y6vet4p{mWa>vSdYqvh;zkGGez4puB zql=-N%ykgv^rO-tRV&8JAE>*iL)U*aK_+#5ikP0KVe@B7T|(RYAVB$@kkCLdKs>Sz6ZN(7b;X54Flh$s%e&WG4PRPYHCg8 zT`dMN!ZaM#?VWSnenG@$nrK}d7zK4OV#y$fHoiA?HG5xDLdP5eKISQ!LaO}@4E7hQ z$dV1FlbVX)zSW#eQ7QS0fWA^VnlKnG?8+Hc-IcO zUoGTsT|7;Im~7!Qy?OjKy1&-$5^Ty^cP< z4Zzt0O{!L)l$vH#FjfM<(ymw|e@lfB@QTDjd||)CBHOLD`?lk?j6BCCePP{RwB-=< z;BeYzGVp=K4yZ_mgk-iqy&~)aUOYt*FwTXc)5-x^tY@@!PwzDN!~+6w4AJ{~fT76; z5kL5~641aK(uxj79H)n5E=F8Q3`_SugJ7U)pMdbDT&TltXY9>)1we3M1pr!Lo2;%I zy78?Ijh2C$C!&l4YReFRvCDe6|HgOR*CpY`Ey{``^yh2xNvb4v&x!5ZLZ^&^-B!N6u z@rfexyXS8j_I#iUeNFCoN$7BGvkTAm3~?o~8X(n11A8yA57B#vSk|An`hzY(495)i za0DoL#3e27d_6w#!JsFyS*VJ&6BgTSXREU#!NpjDcObW(Die@znUb3yF4fR8LE96R z1=CA7N$s)z-|hp@VuX0SE)u>@hc??rZ?i`X?KB~+@1S49@oBnHi6Fe%+<)fGCa`tF zaRnT~hJAr_NH0h>UjJDD$=xxDc|%al2ptEu?W>}qJ@MW8qb51fS&Xs*e2nLih>L?c z(J8N52)`02Z456R%~z6k4jp;>(qBzktls1a$l`13I-5E0sa7p|fUL+=Ra{Tk2=K~b z&IQ<$J$q^BGFm0c44Mn;NEwy|Nd?&a4;8XNpB4kTGLf!wZpk3b!AZR^UUDdF=oO%2 z$ucZ~Pjd=)ytMF^$V2(40!vgxjCX0WuN!N@$gt9@s3y{^GjRB)HWIN1$0?4U=8^g-Vx{Gs z$`N+t!`)RR7wIto7)yU1xdNc~M0?U%`vm*AEBZBLbm80gY56Iom`J3!mZ)$d%K&4F zrq62b6JC1t-w0#@s#lyV5I7C0t2F8_#}xTwXAVHR57Blw)Mj@=$(X!8&9KNbrX-A? zfSP+AoK+Nxn>D4GZ<@_ES2ab2{@8Bx-3nb1q8?W~N942s_4dGcqCov7W0CVH6UYX+ zzv(o{o!eTb3vo+4{bdBM#|l&+GoLA-B<4USa&n`Qd=|r4PTj-#Bh}J%mJWpzFj_~X zOs70m@Y;c)T7}4{7%Ez+-0g@Ea^P}jCNH4K?%UC;RjE6~!*A)~3q&W36lud^`o#In zh`kj=*Z}L=goJ@yuLnR*#iK60E2W&BDbW!cq_hfm(q=K8fJ5zJwOSNx`&$WxP@zI} zj{;oqj0%OS_O1R5vv0pYJSQfoF8-p(6q+%vt8P1dkK)pD|aCC^o#;MJyA}u2rw5syEA>Yshb2^m$RFKK} z^{y9P#a)MBBI$9=a4Y?=XJ5X04Go>X(a%pi`xS)4m`2yJLTIgc+J;g2?ZXsOn`Y{- zq(_%YfD`MegOlUp69O{Vvv#JHNA4qgTdvjYLGp#gtOFDe0#k|RJ7PpZb!d9Ho`Lt; zZ!P{F=QE^6=*04$Z4FQR{PkKk0!dRQew3+aT_Sl;CzeL&^%7)wf#k379!x7Ui*Cmk zS}y`(_dZ@TYj8xXI5w^`Hb50|1-WD@AN(ujH5G*odQ+Vkj zs?`s{CXR7wMJh2P!*%J=30I#KE{!s6v34E5!4kF&fRT+`h<@TT>Ho~w`TX?u{c&2sLBky4=W@&Muu#m< zxmjULP8m(brG|YVx$pf2F0+Q_2j7QXrsu{GGSC?EuY-{g3P@rYo3O+o3D?(5zRvys zk)c+Md9@W=x(R_3jEqrXIm3QDYYD^TIZ89kaq|;-*5g6xxU>GyS<>}8{nC) zMjE z_O-S8N|9CC5o_cu&~FPhA0X+;9Af};`aDdme6eP=)&4gMe>cVfa7~A2c3zv^BfjTy zyZ%L5paYAE2j(;w2!s+jU(`U+w3y!!Udd9fWWie7j@>9eJgcc2k$|XL z$Y@^~+o1DC_h%C^fp0BNywZ6gv4rm5K?B%n$?&eUW?MaLqNQi8AeTr2vu57I_2mWd z`_%@$lSJJ0(Ow7VU6jHgj|V;!M0l1#uxikrP)Nq(QRB0Fbd{vkE)Tu;#>>Jrex;Kq z1EhOYHeI@Ha!)7U6jG1fL%J}tsn38~IHVeG#`qM^Rki8}K4vOuSxPdmUy1_Go# z71)tE05_p9h6xJC`rN?XDbP#^5L;McP(2r*)EKyJR%26x^=6~653$lT^5m&LyP~5nf(uhbjxlOD|7`4suh1-}3|m4aKYncj z=)su%ov;~^CTZKYHL&7v$J{UMRZ$I_HPVAVV1meOe4P+6|13$c1qkDg`+lysOt9OW zK2l~rIVk>2ga!p?c5TFJPLigd@?Vj+Oz4FL3Z?Rk6-!D(cG1ud%x}kd7Qy}Jp|BnP z8o+{ce&~pCkfQg;x7-Z&gb``QvrTfI6q*5A&k`;T?%l@_)ctSte9GT#&wqz^ zet7hwAe;_dc9&nUH55NitMFV4J{M%O!q^hJTVRhocK)Ix27z44_;^_kP78Z7Pa!Cp ztj4OwF$|fcp2%@b1jqleFQnC50#%>yxaAeeyH5#0Czf56ayd<(uC2F{ew>Hip|`yq zOyX8Tz|QM+2j`j~!`_v4{{^x2+2Qw>%vkf-89P51kmWLXT^UsBWy~z!+KPso{rmh7 zs2k_ht{~~DH=lJwB;3V4I5su{Z0G46xnf`}^|-rlGn zOU_$2AD&2i#D0Q7zhLq8Br(1^Bi_5}(W z4ULTbN5n`vnOEu<%#IJpmir?0NuweVja?`$$7n*o#Qogzn$0DKh=|tr%NycG{|U9H zv*d*GIPMvydTwz3TG(nabTpE3H*`2mbZk1}?#6>uP-l4kajDC*Yk7HBe>HS3`cf<- z3p@pd-CrURRV4p>jsHojQr6Kq3>S!ARyW$ATx9a~4JGY~>3R=p@28P64^!j!pmF+c z&@=BgjOVm}=t9`rCYpsY#tU8Pt0TeYc+n8A3JSixqR=zUYb%pWwsyI2tq-i&~IYIY} z?zl2|7Nu-Rk?p9Ub;5uek2l~cNcWz6F*&2pB0AGz9JaG|oM?AcT~ zWT;bgQcxPg>;=0F!v7Y#s2zc*3I=qrIV(9oxIa-o=(L)P=`2X7f1F+${UQ@9VlKepozYDLzCG_#Gf}o62AS?k)#>o zIeWV`l%~x1HIO(R_W+n8+B@HllAUAj9W4Sk<9Z%iU$$2pD{G}x$xS&T(@vy1O9k4G zd8evH7wTz0K5$)e#vdql_h{q?cnc}t+KZ`#kg!)LNk{71Yg+2ttFV+|P-CVSlrU`y z1GpRY0NWX1iQXq?tO|x&<#{86g)|s+_4~4Wn3Xn*le=OCw>WN8Et%3xrO1boV$Nw| z-NKCT$sDK*e?k5Ix-NUXXeWl=4J`O7G5LG@rd%lwFJ-Li4=^ceoTmv?EPCR_oWWNU z4Gk#>zn7b|v+u7Hc$ou>Ot4Qr!&O`sEvBTDcd#?m?lz~Oj?$Tm4Jb7Rm)d#c3uF?J z><{E_EpwL?M~(A;2|TXDqc+kL+hsfuR{Ah%)TTb!dOBT9!izBy$I@#NG=);SUpb#S zUnzQ``C)9UQd^jOsMn5|1o)o^qk~Id_^u_m&7fP(z6%EBn9$0h1%L>dO6m`ET-RWW zK8iC+wwaRRzv*RVOTAfBM;$geMPR43x=Ox&=_J1v;w(%9YKN?rD{2;}7HyNeEe#Tgns7wXH4>TjtZQ8q#)B zpnVC$VY05c1|maEYZ@&C<0l=HUJ~7(aY#(ID&9lWRr|Zh)BNZW8ci`TH!rWNE$fcZ zR0QrN+@}{%I#63lj^1kUVy)cByyxGl)r!8Vaw&>Q-&F*Ik=s)XZEd~iAh)AvJLZCfEjvNc62ygAe>v;s%icC&0ex_nV~SBw zWWzDCZbPL*FJivdU_Wmv_P7Hx<_+@6+oH(`AF&h4P>$7}<*bU7#tT`F@9MP``^70v z+kY2q`)TnD6543U`o8FoBR+ssHDPuHKfg~bgC~m`HXI}fwmFS(Ug*fY`-Ih+%fvEv4ZEL>X(W~m$cf0(1 zV)eRAR4w@tP1GI%zcrBPN(@5Vn<+x+=@bK3LgXvM*O zX8BT{PKc|hxbf18LZ4D3M3d`r6@#Z*ezT<953E{MIpl>!W{nRzs@=GS6`;!a5D(UCXOx{Wy_&0@yjYY3xJk zsJ+ah1+?_@IVGAzar%ew{H|*Bjrj4!F#E0ABY#4xrmc!y8GgIPfEb@mFj6~GzW^1p zLoQp1>I4|S?m=mmt@FgA9qybV$mCT}uT-iLnfj}D%GMqja(Aac4qj}f?RCt0mT4T> z<8yCB$sH`M?j_pC7fo{^e^|d>UdroZzOWb7$@dyWJG!XW@b-T?3G)=bojaOa#)sO0 ztKgohjw8QgXB{o}d4 zv0IXgysc(WCj7AA*|sn#CmI(5`>Mt7O#hb742{dJAc>E_Y*!+~eW0v?!`+2m7JN+v(=A$a0EEY6OgoW|^TC%4V1) z_QHlrz}?_h;cvD2dXr2YCLb-loQK;RV{Q=UWR5U%DXb z&peOU)t`0pVR6b<;U61A@6-`41BN#wwJS7qWKG)gi`N%qG|(>}?>(kY3kfmVoaHi! zpow0*gtK#os;iCXxoO7kMr~Rdj^*qea)ZWLU@(e@%f&9#lplB(oFO;h5U5gJz|;1d zKxS6_D)sA$`FdAc_NXoHSfqa8mrSu8(5LphayQjpVQPTU@aJri`j_(#bpeYDvb#|3 zSy~_6{jdIcU~QlAKmfBDNWLu9W+=Iy<6>Onu#B_aw)wVrRFi+8@MF9bO_b}#PgGKM zo^W82>G(L61Cc56_dw}OIKcU^$KS~GM}2lcUOLY`w`k!;JxmVdLO$0@8S16hjcGvK zxMvm-c(XrHIy+{YTL2x4JtK&&j?&^cWjFVvjkubQwrJE|BaH?G)lA;~T~+SmP=|XF zj9*{(8{*WZAet38E_GtqL><8wP|w@;NADJpuu=C~(hXJ+H|x(^0a+ABIxJJ6{3T=CmIwDpxpsgKJ(Q5QAdsBfk+kg3<9uJLT-A zV6xM0@pI|lo@|dkut%VHeOiHDwGH69|3x~GH)&QUF@^T z&i0BZ-RwH~wjhg?ufJw-P~J?4VLxv&D3sDrAyc!Jxw&x$js0iGBh>%=>GR13JU{<@ z2k>fzf9juw6;g=iaO!&d&!0w1YR_3zSN4xw=9$AS*qy7!35mP-%Q^CFwlY#_ACo7T zqY*gFxFcWI?sJLyPm!i#9Jcib7RLl2mZEw&np5V&L1`p=>HanT^9eeE&XW@RKi_Zt z>oO?SWKFplFtVjp{Cxvf#usPP&3u}_-S>i}s9qd^eWOxy{14wI7kSFv{MOW-Xd$-PgoPtUpECDM4c|qPxCogRyFvc>NR06jv?{?@Om_qjmx~c^vOWcNdkI z9CGM{uiVGMSpPf@by(go2$+}_3YoS<7uy-g2>|)}TS&=vw;ka0o(J5dF;v|)i+1B7 zn+4@V!90Fn2b86P`%dukIv^x5jF17_w9lGw@|fQEj`OYi^-G%2_(cWnL&n6>1nboC%fJXxgTPKo)4Ns=tTVhf zRdRj9H`YHhJpY)o&oKt+(X4QNWv#NVck_P%ZA<#AWaffhg!*!7T`_9@pR?-=??ch$ zdB{_D&%4H~tSK6;#{m16HADA%T2D;;x1)caHx3qXsFt-ILFRL6z8UshEVE=+s<%Ky zMBIFOJaU0tOitG@^#-RQ+fLrxZ}A)L?zz?)ID4K1)fZ5YYiZC`HiP&@TwdNsX8j&a z%ZoJ6`?ZwKtOQFYpfUX9+{r{AygtdcEBSkUQGCSwj06QNYOKN9tz^BX`;82B4e;L+ z4uG_sRn@F1H30SDLAtPw>i&4q*RJc9wYzs$@A)Yj6QHNq$J!{#RsC6OZTyft9$ zXRkl3%irq;$9ZkKnq|RhtmY>m7qS8Qhn1F=b~9S((ZI6dgdCD&Pe12&Hh8ehYBoR` z2Esb^M@qfnfHynlc2$ox`|!Ph&>`XGxE*;A*~2>g8PwYk>Bg^~VWhQXGy=xwqZ;D@ zY5s@cn_Z=5-P)AxQ=nx&#mY%>&fq}?rWdZhBi%+A*hT~iRTvJMo^A0zb@wW*9ziQdek1`+``Ma#sR0^utD}F8}-xomO!C+=8Q9I-SkZ-?o|E$+q7RsSbH`T z>l?@ISi}q4WcDNF#-u)9kJ0=u!*f3v$Sb1lxZtQz^4=e}UtavA&W3z@mdPa8GDIxQ zK($Em=9HQhguOgQ-8TF#Y4?iR^%AfCLGf(X>`ChPc&+;|?^V2B*n=Pbo}b}lUi4-4 zZ61FAlf$-L){=3}LyV@&Ol<=U_QSR)KU0h*7@F5Ro%Uu&*!NqURE$g6PltmX6j`-D zf1-68`yf{H_2=T&FjM`&DyAo32ZD$-L}G72OYv)6rJ$ zd*+}L;-F%DoS;~uTWLBo0DVbX6W1Q8FA3xVEu}U0nodHB6EZ@%#**lF6jkO76ABOP zSMS)!$6sJIuPit&F@j##&xeqvX%a~nrX-5EqSi+aL_)s#17X>cEQVFM&YX6I9V z+%=;pdr_VRBk?BjU)cN}4KwEDgZr%IGr%D+`N&4c4&oAhP6P6-;7m@mQK@{lySFzO zt)!__d_pQ|Zy!=#y8dKZUB6hMS~>g3#s=}Z((%W)Y&!lijGMHC))GUbqU-Un zr&YgHPu;=~*7O)*rk1f?UoRagXlAG~fS8fX269i zX3t95X6)SErK#7??MPDC9`CoFwmRaQ9}bQbTUt)fJ?b#W7{2#f%h zo~qUaa!1l^{`d|#)Ois3NJ$vlshgXt=BM_@*KAeI*=mU`=lkWuan1J|jE+(o9ybOe zf)Nq?)x#WXZ<#@Vo?{Wn<-IH4N_JIE+p&!iebT=R@tb=ZPkG!Q8YUu@>;cIkgQm;M z;tzY1ut_Pi$zlblj?v@#s@+0@Enr<+ftJxyxvoB_-a1orzS&TkdQ+Dj2kaC$0c2XY zV=)8y#Zq6Wr*ax4x*b6jPo!Bo1&MQA+qGyS?iJE?`qFw)Wr|zbJbY_!h&QrJI!(Cf z(dzL$WOzoe_B*;q^dRA|deLW@p-72o?MiBy=hSBTYX7IRVNbr5&<9XmeDD{^daLFX zI+Ejl8J53f+RKI7>9JQcU#n`m8Qj{#x1$eZZ@BDe6*=KgTg^&^#R75xv^N@E{jhe6 zp!*tpJ;!dD4g1D$?PlXq-LrhHN`&pYqTM@0_IYj3afE{WHI>8a^|>{CaH!#ja%bB% zV_UFerZMk38W|z|@26+!ezV$g?8%@1zLoV39QqQp?pLMC6WpO_gT8}ByHiR(tVEH8 zzZyOM)q=2WB+3GVbo{w@P;oj%;hVQHA15a!YI;`)pRPr(GF6;eW`JVrIzO%?$r3#~ zt6DhO<5UhD#Ey});>qrpGjs5iAc0XT*t3<`8FFf>JN@^svAm-6QTq;%2^^;IXNzpU zynU`t9moi)3X_?7T|IAG&Uw3CJVQ~~EQT2~`HLnfij3{cH9R@Y*`%+OSTLwUF_+@I zkuNu&VWQ~cx!oqUgv#(p0yV6MROez8Nr@F*AS*t75V_p1p2DIdA8$o5`aDFqqd`al zJMPFnFa?LUg2DF!wDiF*0 zGKx#%boxD(Kg(8gMiWP5DNBJ)^8i4;8E;lpHAREVz1LPU8E)0yY{b>%c{`pm&#Kpr z?`Wrc1#Xl3MRI7S(EwIq;cRZ|@pQLzw>#wud}pA4jtOj-4K0Y*)+qc#S^Ks=>|b_w=xoQnOb? zlOT)FtkugptsZ%gs9~>QRleXbSE~U|yab^uFj#Kgs>)4!RbqG#U;)<~z8a5E)GJGm zvi}nSoxZzk>WB!ok|)E$q{mHNxt)75ffrC4;;Gq{rUwRvWi|mHm-K{(|2f>JLPLYl zu91dcUXog*y5sG+NmxueMFZOB^{mo z1K&*J=Q>*{P>{GBNm8s@yd)n)ubS+3%GN^~*?Lx6>BsL!gqo|-Ymd0Xzf8SkYV`!4 zVeTkokMQmFNE)5>w)IY&W{ux$wgJDyW2C`FKJUqxBFw6|I^s9^EnhFxC+VUOQ`edB zzsXy^raYc!FGMckT3**KCAq~Ygrtnz1>`uGmQ}f2d}QyHG(>VAVU)p^WUBtFi*P5e z5*Z$kq$rX`vgb~F$*5HgaAhMMy9IqYmMb0>)e+&!?w52#!pqaH> z$a~k)FPY%kY_Y?C?L?kFZqR#{wk`Eqw(V5&pAD2hX58Vs-Jc9(Drewx5o!URB?$fv z81v5qLsM(<&<`n}5cRh8B8(08yf;1vjycu`SdyzBzJbd?=@ib!YqL&=0(3pkr#pW+ z1IIai#_UT2MobAVK2ZYa%zB6GGm7guq)o{S&N-{k=wOFyUPyE{yz6)bvm8fD(cInB zIVSui5PI%aG3fpHT(5hnIlSxv-r}(22rb#s*dI*YP`_NHsErD| z0m=LOtcoLB&I_^M$xQ>6^#_}5;?{4;;hE4b-6G85rEeuAQuqdwg35GVR|6;pKok3_ z;~CORxF{;`*?6wI>a;hlSrTE$3qH^HId3%7>yxnJD(N4NcI-=aVpAbS^U-d%cS}!} zYto5^YlgwLpgXv2<8?ZG4{!l4=u1e`jDP6F{T9z`WH~k_yo52=-fcU2H8rK!_#slA z)@_XJ1nf7J=&i-LDzV$hH8dz=e>OLNuhUf$7O8Q^&|8I(l8WP`_@M8~V+yT2xRBsW zp!tz`P??OSXWG1uY%*J2m8-$;tm}<>_ataTe13i#!oFyC;u6aLcrEdHYEEfiuSxT} zgHX|0a38&b+DzZA9zim6b{_Q{o8&w9v5s5N5#>}~d zq`MUk9V#Go=g-dh_kY{H~crc~9pM2e+4vHX&NG%D?ifcDJ2hN1-FIGdX)m1a{G6BeS! zkC!Szk$|UR&1AazB2qy+AW_I*>XC!(wp`8Q?_lqj%}?!apP_F&XZE<@qOoOwO}v|* z{{DSsq}C*gHfRw-od>BtL6rH*_SP)ZL~3xK)HUIb`YRO`mH0@J;(MpG8`+j&avAQg zPS4QA*Yi%p>29`;j8<)1cm;V$=WZBFrX&AI)HTce_RK#tePikV%SYSVQhFQ=Wrv~X zz(}=1Ugbz_C{Y~6jD8A#afL-Z#OMty`;5zN_2FoILlr2*rRmUORtqFJujkNfW|+^; zIIT>QPK|KYBZd90xB)a-gxR?E&xI+gF@ z;3cM4E>VJUAUj-3_3is*d?s%3Oa6sS(s+|a(3V>KfvyTYt2=ReTVUc>YWf%SIW^8x z>8oEz8cgYkm&p`b`Vj>Y#0O>lbsobR=3E=Cu$vfElo&?84DCdz3^!wP08=$Vr5o^G zN9+r_rfCaxcpc2oUaJq^ntpB!loQ17>Ksq;Zt%nK!IzOgy*M7J{zm#V^II|-8<3b4 zuW_R602j@IyUJW!4g>u$9dZ^#O*bJU9dEKk$3flVIn6Gyp&TJ9JDFd~Gz)3NjNwbc zT$$@vo-HR1R<*AWok0Y}dv(`c0leBZ1iZJ$lq>$r$8BWN^S~A?ETi5#>#w^VtWYuI z?~V#AZ$BF!Xo`MUahA1J{D1F zum^5vpLv6Kw&-_hj#mi`I$hCNGHNU#EdG6QkdrZ#BB<_4W5McA`Apny+NY zYq;$%K-x!7ncuN4NN;Veg_1eTglaBvsMJ_$#~uCP!FRPy_(~XN{VF?BFcHC}SFFh3R? zbw@4Vi?wAHCgV$I8o|pr>oW$7AJg;ORb1u!UbI6s#rsB-gYp+Az1F|A06bFBu7O&! z!oziQ9o@3(;V+vtzfB)=RAABKT;vZ$8*fs}`{My;yT}F~c?H!&p;uf<`YIIj{2LZ4 z)y4F2{f0cD)YoC{jG2og2l}WsaF8vkR_msu_kbvM;sQQg?NF^z607yOutXedvDdri zXI|0(T))dI9IUKC734HnkGSvCs)W>Fs&}~P(Vu>-Hi>z&cBq6MxC!xkG4q<=NRK(U z21w;-_=QOajipPEq_3GbqoLdy9eNN!`kvDKP2y_D5~Xq2Qn;!oFpSEvgc$6tI9)eW zW$h>>;rx6FHz2H}D2BwjGr0)Ex55!|Z|2GO&I75T?(@f3-&N*ii5mJCN#Hqhi{;~! zz?oe3))8g=N;Smt-2ACU-64U)7TZd^1?RGM){MXWH5BFa6KCl<-<}lH2CM=@vinQ5 zGw1bzUHdl2aniMG(h&X{skLzDMvSevX3Cx_14!Jt-|C2=I8 zenFw_m|nDUWp?0z9aaC4YInFT7Il4CekrEox6%&x#Zxj-LW?eJdI~G|*^kfY8&om% z#;gHQ$hOO|3c=>lpL1@SvEv@uIR>zTJ;FFBHnRPh=X@&cS{!v7Tuhz?B7vo<)L=x0 zPeE2)JZ06owf*!5z#x}i@OjKYL|GoN5A34g&fo!|5jD#h3<<8rf{&?H(3;(b zTC%~%TK!Xuf(#Mn`H9rR3!g({p_D+y50uD`3q_^{J8ESiX5BdfyB_knQndDh2nanEBa*hj#DQN^^ z_)4MNcIqPyB}XbM#VQb_6AYzYz?2XW-Yp`1EIZaF#MnkGFiZtXCB?wSPcrO^a0lid z$*9jHX|j7v3B&2uox;{@<|g`4G(a_*H#QdGOUeBAZfZt5Cbb93q`vb`6MbK^o=c;~ zXW`cjqKf9eY;1EnQRc4hst_FbX3gYH2EdcY3s}P$?zdq8r{F*V1s+O0N30qCW0r)&61Hx%Qnu&X=#*d4QABRd43UsJBm_wV+W=zw-yfJ@H zxNY)r)T)-_$~Bo~^I}%sTZBbeF2Na>Q~R?eE7Rff z#Ysh^1ba0fC10kuLDUl?YjRYyh8htbvknC62WQV)MQKuWm8scfI5RkwV@^YiH5}vH zWiC&ewD|DSs8BLKO9*8EMYS&CtK8GwF9O2h?#MRN_}x3AhwjZ-(Y1bC01nI#*h$gs zhupUop3mG@5yie8E-!U#%YfnI+k@wN@98Np)2z(j0%XFm zAO7;BOpPSMV6~S{Eo9QxO*T=C%JnYRsS0vs zZ7#q5H?*Wlj`MAIM~Di#)1AT)pjo0d(P15l4ilo046dEhOUM>7rW9&Dhn%Pv&`&4c zGl#WmAP3L)n^EJr>ziwcC=`t2g-BV z6Z~(GpbA%C10pwRN3JUK`TgaP;eyHvl^+>-H0`f;Tu;eN;#56oV%#g&PJ8T;_>ygq zzC0<($cRk-l<+D^{wiXSTzfwRul>0G_=R^7bJXB4G%sSCwjN5bt)Je*fotCBJLLuS^rNu_2BY07VLE+SPz%%_Bhxln4Tm_xH5RhoqKMa z`1}kp{R(OQYK;l9I+m7|lT-Mt*e{ey?ynv4MjDh=*}$!(#>a-<)JkXkcVz5$r``th z*})2Oq6{%KA_o!wHJpF`ltl7+9mu8{`4RNy&EI=d0?Dq}H8D0ZDJk}gq>lV+Z@7?0 zhd#iX3eks#TS}*$h|?eZwLj6GLq_?UIti(p0S}aS7V{@>qU{zRKXljQCK z1dCIS`f4Zt?=vSDhDJtAbX{GFql>{IVSjy#IzmA~fxXg>JYt+Sn3`I{w@Ape0-hJHh_>s&G#0W)(AEW)5axZO+VvfL^!b1CJ zz9#-t4-T&!oNI_RC|&@I>AG}J1lG4j8R|^c-A|T;oB|Sr&v^h}DR4TNV?B>-EqWX! zQ@ZTy^Dd_qqsiJDtJ`NX{ypn24B%@1yM4Dnf%;Gd6BlHaU28STv z06pP$K47-9gGag4iuLyQo8scdna;>h*qCJ~)w??R?iYmEQ^h5Ex~mNxv8w9@gU#kF zzUg&!G@y_W_K}F}X$#~C_{+MP++Ei@!e?| zWGjd-(;R#PCZ-vvU*7*Le+4O5R641Q13xIro z;h2k-1Oc%mJMBOeR@Tb*R5Cowm$uV7wQc-m=!51%Co6#)sC``e>LJe^JZL8}wG?99 zCCpo@p*hwNnbLBk1{scxjqL;sh)z6<51}j`@efH$&B~=bKwQgX!dI}&dWqV>ef^PK zQUqxW)!O^jNn9XwcNxD_DmN@qx&#VZd7FtO&6-DYsS)Gmdo$#(*70g7B3NaS`QaAG zq)Q>*+JUc8lLi~A1n_2}#zQ0_<&6D2Q+#=9Tj4`~6>Bz3VTh2SEddBJi*ZAQci?!x z=mI7x1nd|uS;UGddlib|^zmq;iFkIKFa&`zweb;(A&Qdax?^(Or7_)bSQv2#c`t$c zx0EGbuW+eSr_Cfvli(5DUoLegeY5`oLT$xYpYv zz8?KRX;3VYYs`PWtkd&)-21+?mvJCvHSRJ3@&bR30g?x#_}5#AcQrfTCv<{zkb;j- zKKTjw1lfE-W3`>Z)B2wry{)Y6h6eChvd?{fI?N%Dn$ zSq#Tvn#VRHeTygnnMs_kb0nHe#ng_fEJQwfMjTqXzB|E<`h$D>I9UkWVQLVZXtr($ zpl0n8Amj-p@ZcJ#TX{mP1dD=h`!{h>6)MR9~URNZeW5Cs_1=q;t1V8BaBnHA^vX1PF;hqYCKCru= zzo1;5-cv!I`)eZmyN3fa|@zwTJ?a;#@N=JUEH z#I#@!Wzfptj~)3Gm_%r_W=`d1Em(!;$xcQIj-9r;U#{}RkC(ciA44R#V(2QrNA!ty zoI0D}4&^zL_)C?|xSw&DU{n*oL8FKVX-~0$YH?fe@M1%&bqR z9WcH)joKEd?|e@ig~pjdduv8a7f&PFhlHMcX{U82;=>40WGyzPJKZ9O#MyjWA5If| zr#DFo=1tV1vu|M=p;xL9dMzzY8v_V{bNMvm9tsp% z08J0~QX$Ok(&R8sB_Q4GPdzHcH>n*!w9Q7Zrd%}?iDPD)*!_NY*I8Q~f9i(lBrIlpG1v;)lUIK|`RGMc*xtQlBI5ix% zi0KFAgzTu>YFe{ohgsY^d4fHn;S%wwkwwuCmyFm>bz-4n)1_HB?l9I1 zg_BAVDk_!N*qM3sOAtZ(qUTFsxQhEb!!3SGh};|Q6W7^lO;zl z$hWmghzb>jy<>1pNXtJ*x~k=>FumzdAhCig2p`IlLFBJ}Cd&;AV%NDc+`KRmbu+@u zn|8{4>8!pVNjGFcOvcZKA2&1yK(`QN&ith^*3;PTKK>w(-zA)lj3R;S5wWnu87l6% ze}?LYWz<-8Qh~l zaw-u{eK%xc%D1F0d|i;98sb9w-Y4AqI5%iS!@_}g|7^(`+sDPuPHs_WZliliZJ5{p z_TtSb)pq@kjSX2m$gt2Zx{#um{Iiw`y>Z9&bnM@Tk3T>8yB4HI&=$!f2u2*TqYt`iv!< zg)xPqzvoJqR!H@)OIU~R3CmMV?+J!;s>SI#rI{EoChdVBSWz5Bl54v^O}JXzy2N0) z>j}znIj@ZSO#=Bia1%;BiJPw!>XiHtZ~1jW?2fM46F?_4C?U>^)@+A%Yfj516)bB% zB;(kW?j$^HEsIc0$9yM1a%(++_L5jPl=4Blm$C>k*0Q}tt&joGxidCuU41s%S6pgE zl~klYkoW=_@86hKKiSa zNcY0>Bu9$04?~?VT_KXM08j-40z;3J*;FmHM6$ z;u?WXYZxq$CS0c)q6<%Rq=rD3JB>7Q45HOq*je+1!>1i4SS{- zF*dV>8v}<{lB-BVwa&IGZOmUlm@X5VaC8i|C!GqFk)M}dga$;DC$0zc(pY2G9Sces-L>R@_Z;S0HKQu}UgxZ@j-y$RUpwwX!uqL_TPi_*M{Dm}+thrC> z9o|jbD7^P!QMFOYnOHSVR4B(?tOSexfJ;xP`eAm5qjFHf;wA?CIo(L10=;mDcV5Bl zNxZ1jo;_*ue$i07a<;Sy1`L;eS2 z0Yngd*D%f?S}sy*)7mX)cb&)?usJ(xckFFvcJ-v|I=DrLc}ht^SlTX+xh+*2l#?!e zOa9(p>#1Hl=JAw@3MJ3G;QIrEgQkg2%Q5hisflrqVWtp6vS^Bo9@pgolVd!J>CX2| zBnxceF;87XCW2lUb03XTe;I` zzE7lnvb0RW5{5Mr3-Af<#LBcHSQ+pPduJ7*iwtnEMo@A3Z`Q;-SjydfLOa zL9536?5_Bp%+N|l&bd;VK6}ZFT@&vm*F@7D6k9H!7rj!S#xD#@R%L{h95;tOPJA&b71-?Au6H8P0YyxzNXUoT`Lps$s~{Z1x1jPm_@dr4Wd~9RV0nvzviZtM;SpA)J)us>OtGkX%@b1zSaR-oOHm27 zCK5E~q!{a`58ms=*V~>h8*a;h?qrocs2zCiTbAi#L52HN-mM+S30IeunwKYe%`=5~ zS+}UwFq1I2kfLl`Mi3ZPR(br_JccD{h3e(Vw-JGE#XFFCb*CkbwvV41-7aj%w~g~8 zueVoXxJSpy%gE?B+VnN#!Qg6Ns^t&F?fq^k!^-SFy(nIT+zt@G+W3417$K5#1*#%b zI7)+Gk~E0P%;IPv?X1orM4{e zV0(&Y39`|=9j!fM>j4$oPe+!L=X%3wI6n!)Ys>*O>Y=ye zbSCOndBi!qjZ67tq{4)u#e>tWovGrvrvql;l1l=FZt{CG1Cf39OLDvck<81*waO*q zsp>xuZoWc9=Y!Q4JLD79jd!scX5lC#j0P8CG8WPP0LEYpiU3d#e2!r()2#nm?^@px`_jxQrDf#)#df9d40q+%Ber@M z)W-?D&cfNmzntB6A3l(HoP`WEj+1@hbu(ZkrXJdIAB^zPuMw)f=cA`VS_)&OCpDHK z?gEjrFuC7388#j8YG^VwEWDxRx1VdW{roCM$yj$*zE?Z2A(-biG_RJHE#E&Ik)H@j@Y73(#d z>I0H5nGs2xG?`LVtwJh4pL0XLG+~xyl1?N1YFv*&KFM}e=*ZrB`aDE&sH&Y-u@t>A zv+8{gNCmnCl_Vc{iQovJq`fryHOYDq!xEZ1L&L-R<)xvDBsep46%VvQ`l#2tGxv{<`YWFSw$Exj=V!^|4XWKa}6$s5=&Hq2x7bOKkM>GQ~kOv8xB(va7dk^JllM^wQbWfT|T&t_7=*~8i zzCeuUj5stDeW04Umn9_pzfE}ZJ%n5$Hb8j<3Qz?9Nlpu+1=K~5&rb)>sSrNFD4v$g zEGQW&hiYks^9uaFg#UE@8e)6I$5erUu41iE@4TYcBiDiKKjxs zi^yMW6Z{xER<5*kD>~aWdGy|YhO(-SC|{&*21w?H&`HZ1Wwu7lje;*a<+7z7Dvf}? z2vf)NrE+LN>|qvBhxGAeEdMhVU}>}meVnYef!WxIiWf>%-%Xv`qShM3rh1IvXWU#2 zFhO5(_+Ne$@)1;bfvKsfN%7I#9|0Y>%8L{V(_*2qaj`#4;@`j0)f} z3}>6Grp$9Mn?cQOnqo(1Ay^Cw##eyr|24u0EU!o!h1j(L-cmOaOj9F(jF_ArP4y0C zaLBPzsUw)t4S&AXNUokecrJJZLNAr`_~E4@vZ1TebpJuqlD{KSvV8^0|6PK3hYn3J8JRRRvB}M-AMzIGklE7o z-5iA?XV4)7;L2}CZ3|}a>iQoh7JI|+I-GG(84iD$UH|^#p`M7*;E11yOjwTLaXV!* ziMvdsWMhSA*3`7vRcBZVM$6|QawQbkZh*Y}f6dX=pJ3`q*K@_13X_9;U?=!^&VNCZ`FtSRx$s-i zVLE@gRO$mn&C$OMDT2TYx^Xw-wWf@F>zl`afX#n;1TMPq;qr04y8EWo2Vb9>{%uI1 ze4;vWMkXeonlc~VyX$u4w*J$?<=04lj$H1-PXuZh%o+T(8zk3ja~+7E_0CyyF&|vI zVE;2_FA=h2!%xHQZ<0SnIwLTe|E(KAp7|yaIXNV#o`dcx`hVu^8UhxK@2(UJ7R*;6&)LcW2~ixg-Gt!b)}J2A5EEu zt#MM9tM{!K-@KR|4De#+B=~g=b^igbnIK>9wcBbe%h=eI85x;dAq35|xEzy8s!=qy z5RTgReCo)){E&+;lPfBQ2M37<7heWT{~2|%BjN!gtY|q?A6T)nAKqkF4$=aHm|ooE@)jisUR-xE;LvFlXQ8#l3&6k^cL=pWwOc)B}T~ui-EbAvL#Jv=^EQ z^GF<%L%}5HmkUag^12_39rrrA3Cc}jr$5svNsKVr5Q$_eTBHkUOLy3 zj}gBUxSR-w|LZ+9cdt)1?DiIHJSxftGJ;O~u}6*q8^jL;=wLioO|C)_7jO9}4w&7g zxbb_bCpRIj@`xYXq;J3A3-E%ojCrV_(MwIO6hDDn{-*lAq^FkBXvn``Lw|j$XRSVH zunuV!GE^5tk#^Y@dW`2qaT%_xuIj($T@Uv0Ch_t!RV9M7cn$+J)A8csHspsfmoX<> zJcJ>Zp(%aZJBa_Rua6J5cWQc+dY2LLyze?Bk+ch4zL%d&lOzUY@$z^58pqXg^rGkc zI5kre#E-)!Jytt9Y>b5`@7I3U|Iy`ltHJYHH_kVynPAf6`tu$65UYAN$XC3uOn}z} z3nKe;*-i-Rbp*7P=K&x7JeItHKFd?I@hDG>yCApliL(|)k{{&kvLm)H5WW-byty4o z^v~85^*sBqZG|g4q|_)F@Y*(bjuGW*uHT9fhJLJOr>5Ea^Q>1PKX|y%w!Hrso?9`# zswkh))Aab~mkC1r_F@`8m|Ewb?b-s{o9l%mSG~+LNh%^9wcXMY-npE)h?`#EdKf)e zAIeL2yQFVR)skK{@@tpc;vm?&QR*B7!;2fqa%&&SwOpDo$rwoQ@N3K1zsQaxF-|GZH>AL8amsQNEoK={?dwyM_TwZm@(-Us4>p5B9$)}0X z8LpdXg%_uyVMBMde}<^AUL*sO({k~Pdrtw~=Lv7a+^zkOukOWyno94B2yEUxWHyv+ zw$x1iJ;Snh!D4sWfk(Tjf%J=(0M^JCo}p3Re1T{jsxk_JnldP(mYn=N*O2nAjQDVAbBYQ^()uGa>F}ywUc{ z>gPB(HY>jU*P948(X+RVnq!q!Q&ISS1}On2E(4w^h0A7{3~soZvk%rwBKv@s^ck=| z+TBKYfyo60)Ep`Og+M6O%*;$5s2ndxA5ZE4BW7za9%Rl}32vQl+*obRB?-h&dPz4ua>BIN9)UDd{V%gb!%>6p$pWI^ z1BOrstx`?-3w!t(AOW+PB%hu-Ujq31Kv62@%^MI9T{&We!{PdUsRGO0oE}

t~F zE}!RoCT*lVZRZkvu&P}5UZR#vSC%)8jS&F}`aQ&Y`YmDsKci#oH%?u+*; zgFSwq8H zzM)U>yi3U-(I^)pw>-9zXw&MxVO9Z!XsWxekwsf1xg7y$xh5v&sY8&{LKkc2@+&AS z=XQD8B}J~hsmJ&-VLK)*e|BJUQr9OGS$`=v5-^Y|nSi-$?~zna_rCtTz#<05IyLy= zXlJAo=oEK-)%p~e@8#;kQ#3ADA8g>hng8ky^bU)T!{ZIaxMOm0W*`E`MDiw>5tM_` zDb0VK#Z<8aAn1B0yQaKMj~^G}o-qxYUNHtvk6e47>4T&zmKv=%py~0{xwcO~FH|K4 zMB%zjtCymddtcmT0XjL>dM|?>7}F6S4{laUdzL5m|A<6i5m) zYGUjI_o`;=hfO!T{%o?B{@(5IuFrluD&jY~K%i@?9<0e>WKPbp`o~tE@9Xz92}B*2 ziBI;A-Ztg88bsQ?G30jm?!E7>1SHvIp&1 z;r9q5^~cUST*H?&lk$~}^ZPiS`R@A&v0t7bk>Y&wooG<|F60@mM%T5EH@duTKhn(f zI0%K~R`XIF0NSC3EpfNqh(=O*_Q{U#50+MZmv{^)xw00&Q4ZX_8c;2!RxUHCRS#Yf zC~7SZEQ9^QeA{h}ugrLfdcVx1NYwvXnqZ)HiM6U%FR#b91%iZY^Bd-+taL70)mbXb z(r$M%$wc+sY$_+1*Up8P0Y8}OGF0XSYWz%AV09?G2cgE1F9B zR_6fHa*3BIeg)Pwxe-tc$XekeYAcieJSGdno0>jeJL--p)l0)F2^#|-YWHsPbvN6> z!D0Jd0%)hQe>p1j5*yx@g)p8fjSw~)ua!04>OQ0}tWJ~5UgU_R?uut0*ld__*9R;` zyKMn$HEC%ztw#OsP4+fwQYd`vM#D= z-#@^E08wJ0^BzO>XE9LD)sHcp+->%#!%PHW*l8tkBR@3J<-%z{x`8W-mL^5WJAn2b zv{(n5bw?R$d_cLf2Pg+vVZCV_WP((3a&j>|!)PK!Ha+JAm9QV#9_z;-7tx&Og6&5Z zcN@yduNy1d5+P?YRhlT&GVOJoEH#H|2Y+%##!z8XEa9ppxzOg%ESUquMP>MI3tsgd zh$<$5+77fg>17k>Hq0lf}k6!*OOx!kX<9 zfQ^gHyN}c9aCQ)UVOS&(GnsjB2#Osf0Wpmj6fgD|pf%W4H(xmXW1zkD^?cmPFAOnL zgi)hV?I9u$9^^lfI~vA7jU&Y3`DtnEA{ZpuMU7d0LhBj=e3d)7G#w9`xhHfNV5;0G z@p+?k$I}iP{rE4Nyyu%7LrG*_K?rp9N)rNak{nYe-MaHY6H`^OP_~hIyhdc`IKOgh z%A`5`M~#}s+4zXJ(%waW6;#BcVLvF~=>iQdsNjQTgmj`Pgmn!P)pes)pq-=Xt?W5DyK|rHvG>EK1!Guei*>hV^Z1h9aAm&mZ6lUe3;39P zhEa>X-ibkyozSbp7!D_BB*!2-x-!i8lo33fHktOj7J^w(_VPTNWH#GjwOciD7*>fT z&nzPoolYO(;0Sv85}bb`$Tu>GWCu06ru-#K+ktRqf~a)O8_Wm=R(6U6+f*!`M7WpZ zZN-t)a~XY&YI{5CYFe;`IyOr?==ELj%GrWQnbt}0ght_JI&&ZNbcQz#b;dU>&YtWq z#4J}#Sg8>N94<+3c%(73NZSKA2* zbK5djeu?jt?$m!eVzQJ2YPWO_xdB?C?UxRm-v-|v1z}}&?RGtl-c2;LPm^_`xe0)C z2#RdF!9?M4X6TlP<=T`?rfd(d?sh^81nVCsLq#5X^`}Ecv7hd7b`GUIU zbtus?Nu;?#Z2h8V@WrZpSLqB(rzImZ6AeMm_cJR4kQs_#f*tL?N1Cr_X}V&$J0@** z>bCa_DyQ@>g`8|QS%d6mund-gva@{>0bHI1C|qnv%G0PC8SQ!~t3zVSO)JCl^{{DG zHJcNrw`<2Q*{tuPFbUYLUt@e&?n*Gg>@fIoKH{fPx)}PlY@)C>Ho<O;scr0i6j@cb$ z5+>Vj1G8oVx`a%E_|l@ zy?axoQEe>Tz)BMU7~c?!aKa1Cayu=B*stp}P7ZuF#lg!kR+htjNF*yi$`xw#kpsd1 z>4E~YawU8rOAoMVCPP{)WuFV?8mAsm$W>=g=ExaBn&462Y6gb{MKyG>UE7 zG=dqT8a(qI=1#i`6d#Ov5_e0+fEm=8ZT6D=I9FD$AD%?bF!9HRr1z3WVrj;|4v!Aw zGqK4tGI>@^MP@Vp%Vk=G%|bWK?KVqVYpS?vw@dSE;`*r->EBvS!==%G`Iug=Nc`}^ zK4WR?otB%_1fUcgxfYOvX}tt8T<$VjU+sa_=ZLy)HVZJ;>c=$Vfe2WCd~8bY>(wvx zM3Gp=kV<$lfD@B>6C5NYKMxWBOom<4hABP9=D7Lu^sz*=^qA+TjNp^AW69&kLJ8gb z?E(FAtq3KR5c{iN5Dm%>J4tr-k>_HNYvJUAbk6q?Ie7ng#XmDHYtX!8wN!;uF_h{3ezg8dXo!b$$aU?fEee`xp(Km|Mq2%JXxvchu zGBy;>4U;RuP5BiM0bcb#?jt}(RuMP7r=etGDtQ2%4GN6^-&wePIQa`mfXXZ&^U{>j z?Uem5ZVY@|VvKB&QMuG{bW`FRBtWm|-|zT2;gnD>ez{2Cv@|wljwa_2{BO4}f5J}| zk}Z8F4&)?efBf>-)JK1b5L)Nr3+M z!TIkeC0!Bdx&q0ifH&*fU;f`FF2@5gEHGz+`U6k;zEdb*-~965e=iR3Bg{y?=kSq= zhqYpu&wK+!V%z_nt^2!CK7IuG+!AkCo>AWOV!yZVO#jjlAK;DV8xKSo0#Fb!+vu-w zD?f4X@T9(i-xDNgsA1?|P(J`Yc=D-)vy_xHni`ArV#EH`(|`TRPxv)X#ogeR84pq; zIRZ`Tzy9mbpI%|;ju*`+^4Q9->D4Ubly`K|8LVqco(6E>$p58ujoaw-wkrRKLhypD?vkL zBlY^TAG^fl9z{ys{@aqd)ckpTwxOdB(;@MO%~9rD|J(Qe{6cfYsy4Qw0t!-QEu~Ta z))HkrUvMI~7;z}!oBvNcv;8#mEF>@A@fZ8urua)d1k835a%Upv1o(HmG1D?)@b_~1 zXPJZbs7)Ngn3+rc>`vT!OKG&fcIQ1`D$G9+>^RLZ+5blygd4Gl*ifdvblksf74+NJ zO*D0Ww31QZjT-!Kv<>uPo^W=4L=iPEmW?{??%$dKgF#~Jh<)55dz{?_Laq@%L$3e& zNLPREq8pYCJ#y}y%gki;hH>%d-2CfD;F~Qm_@Qg;Of>r0GmQFbR}&H-prO6krxJYl z=k=2Eh`6|KVlTn5RW$|(A_`gWxUrb1C?1pP9}M?Ppr^Uv(X>gCZc0|7`#9I(piJA2 zmB#~M6oU+;j=J#*=ulqZOm;~B-j!bdguQe1`djB+gGMr4K&nse{(?bBGg0qw zzwtQkeN+Dz(aKi`0U_CS4;hkBeNfQjh-$Dn-4UbGP)0+;qTcEIuKt%0BC0cpo{o%6 zxYKOo?M{%G;vdFsmiXq)5rsv2FJt*L3qe*JAa-E$=-{ye5FL$Gkh3}>D!MC)^mA5v zn~~lTcu4QcSCY=%d;=;LV<>m$1`u(SN-n5DStT;G&K3oec^0p{vGYB?KFjw zLrz%%4ArDNrN6V_Wg%W~-lR}9c#ckJ=u>xhL8s1A+SA>whvwubVe za|#Mz9nYHr5z*_Mk2cr#){xf$mARvPGhx@#wLkI8rY9c``D`;T?`v(pOUNiEhDqm* z6Q}|1Cd1w!5zp4QS-Imb{w;bmNfb<*aZ3{OG*%nwmsR9=^Yw$8jqTdK`VtuzpG?ph zr%DaOiHKj2q%A7~+Hlx|#>`s$q5gL(UtR@03I2W(x=l1HCJHBxwGScN?(_R8}!t_3V&O&^93rC$13>S4ZWP0MaZ z^wj|-IR;W8LI@>k5kT^8h_I}Jom->e^8i|@d7;vpwUMHvvT~&wTcbyE%c{2*goEEL zgeRi85K49<>OM=wEYCMGGz?yo7QcS7C@%ZfC!l3DoWO>D!6r2j%an9-QM$YGq5H9w zchkhH8N0Hz$x?5;lb_;ZSMLV-;Q0piXdBu!g_<&YFMAt2e!Sb3IDtf6Bel}poo~*R z+c1hGWJ+8OVdv>n3dKNJ;!92;g2%yLp?y)HZ$qVxq18$~f~TZ#Ljw`RHLWKK9G6<@ zHt@*YfP}!f{6l2Qb`?X7OyveAGQTGxr-d9NfpRluk0&1g>#|DbLLxO~g~%Og(2S4% zs8tWED(Mr%!xG5fWnO%f1|5_y!B}WZU~Hvu(2oE|i!?@vCs8OVZE<>cf}6=FWiL`g zsdT6-7oO5?xk`hWjW(=UP($zp6}IlHjvH5*3g!nVd=fhG=ID=A2wDMQ#JC#iDf)5S zHn~(?B@)#4+7$4V3J~R19L#(m;qJ)Jb7GZ-iAY!+#P{M8hsgtmsQihz8H%B`0s~jH zaq`fX%DDoAay4vdBPKoGWORoCLw`C=n*ArUtScz=k73t;Ur3fhOs=V3WJH&Xj;bXf zpGG7SNVaOjTOCnII(Dhv5o8cat|5LP3&4?{pC!8g8TCnhy={|FuDu*o&0T|F=l3la zk;wPrm0Pb4ovaOvI<1rx7)Fw0jhUGN$WUc&s={lZq@GbFjUR}5u1M3XsJDa%N|v8L zN-~W90*qT*(pmGw{3=K^jcn`q$d6^6cEn@md^=&V8*nE(4SpYy?IE5Q<_2Y&X!a97 zwt6zein)?#qCF>557wozp~^M2WztOJ^nwXeCzxql+JkM5sSrYfE0WTiH{aA!T+ghH zq|Y1E%^+`6Gpi<3Q!8HnjADWLF2(qYAok)F_HDy^+erlxdtrnjG+xl!BdMlb7B#`* zU78z99KDtoWj!1nVy-*f$Dl@$s-wgBj(;>H%T7XdL3m=0mh*vxKA+EHf&`W0X&Z?> z)dBXAm=%@fzET{7$d?}jC~{Q8=9BN+8%cAHXWAcvp{+;l97#-KhK%kER1}NKJ#tI^ za~GkI%>LkEWxiwMk@r+-ZGA3aZ)2x6j6p^59;F%;;<)m7ybo-bMx)jf( zV;Qx)tJ@w2X!cu4_w!_Yb+2S<=1pKq#GGP;-0UeW((GpvwVU*IYD!voBvEBv{~|dI znXNJXZJ{K@;KQ|rn?zITCvhAU{X!8Xtk%j*?K7QDyW|hztHOdYbbcLC5hT~o6T~mN zhHu2YN!e&l-1q1>=kd5F8MKfhv2>XhbuLmKLr*mN1(!64@00pqh$6{-Lbj4&-P1UV z_>f)0oO|n{Ha0dbu4DFs4<#g3QASqIinbb=UhnH+Ad9d+o?&HZS*<-?@xg1**nt?6 zI(}Qo=$Qz@#p(>(voUx_F?s)xJyJ1zYZ7H7ZO1B*LT<|R6(1Q!FD$Adl={|fWXA5B z?Mm%p!aP#@^MM}sDhkYUDsJyDp0RTcgS+6f((&s8&%J^q>z}LKON2PNVWcLySJYg} zVam>-AFF0+FZ0}@;W2tO*U@!Y$wi9-gC(w?XjhKHnVHAa7CMrOWnr1B`v zug?;dItwfOP=O=uoA2x07W}jdmL$raXgT5P3v%0fyTlk-nD3yu)14Y9!7_ zYLqrM7tN{d1|B#P4ci_`rMO*fq1AT8c%OVMZCxo&z?(Ox?2aAwXBXMFC`80p8Yrxz zu;l!98$k+LmZBy|RF(*1l`uHYqPiIt!Q#iW(fRaM?c|E=@Z&kL^$u{meTuU=eLUi5 zZOjZ?mJR|!+(N})bOq9NFfmzdKX@aE$Ih{pWIPR)kVKRvubOJNI=sB#0# zXb{o%#y~IQAPbGjJ!B;PE!evW#d(jxBx%yA&Nabpa2bCw>>KFSyg{hKBtR@(a=QFJ zTD$iuh6hfr>}v?8E_6h^l>3Xjb>yo%sH;xm1vOFEIZv&5 zCo0?0$Uph8+&*LiCW$VI#iK8hxxUyfYv`YL_mGIvb8#Yz?GyqJDkj8UCV2EB)2-2U zu1D$&LXY$P@k{)A9{jZA-b%?R|XyDeY#YMT3}n_ae;hK+H<~laSa=z%rmxE-BZng-4L$L-_ z9Rb^C8AL&z0L7eyHK~zLtPfA>-~~fsanPyGL~c&&7~6+afvRGuVl~k3_U4$}$kTs3 zUOpT$P43k3d@qM+bW>}A%_;E@j2N^#K94M53lyVLpFK6XUFqXJd3i8qT#8=ki3^nt zOK7E#7(a@;8yWdEpu3=0F}%>}WW>_nuMu}RSB`md#?YcMOC9Y>eSfVKev!=bW@bCM49EPt zL!HZYqAI%YN^VrldUz5khbz+cvJ1Pd6qA)-t@L@+b0Y$PUzr?l@*m=gUl0|{LlDvoHXfof9$4)|z?M1Jnm8KaS<6ZQIj;TpI zu(AsIl`V#W$y#z-*&&Nx63$ zdK#jQx$3#=vN#()97grz?I^QQ=PIUITfJ+o1d%2#FKbAkThgEC`K9L_f9;+ts(~Ul zi{gzqemfi`vfS!w_B73Kv~0w9v9{V$-Ht4d!RY5QDGO3(rO*9{8TB?ys9M6s4UtiC z8|yPCvTB|nKhAg%ypwfG%vvD35TISKw9$3|hWuoxr@o!kQ&T3UH9_h(7}a}@4Dwro zb@CbO_nxdnm;kjA?~*&ILsEoX;ZPgO;!8dvH6?rb#zzan20mdXGaM#d5Va3{*w|bj z2*ca0n|DN6yQPD&MzsT59<)04muR~zK@82zSU~V764f*G_nUwWfC%5QQ@T0dM-*tw zbfLq;Msl1*b7C_(N$HH_UZPO)O`gbHF-w(47^^_j^(ZH#$LfGg1s|eRfzQ&8xbQ(J zU8-1%BBH=fbbzf~?RnsjO`cH?S>H*&(hO6#HhJ~4wBh^Mv9j{#2iFS@>v}Qa3_cYv zPTGlk$F+ZiIQZpx=;7lCqW*DDQOIVu_fZO7?(+X#+woRq8nZ+q zF(kVnkB$ILC7hSOggj{1Q6bf}$J_dW1czvr?ZneS_|q1WP{Bg=W+AKKtOsuC z-G#8^>YQTDZ8evJgU5gTep_CIng$X(Hv|-o0e!p5*MV9C8L8s*pl0^KDo)zg^Yw`A z`xRnsH^qs}vP`lb*kzi~e5iY1hLhH9*)ZU?(^)Za=(jZ$z^9^;XT!3ko~F(G+rZ{a zeu7LH@4dYvq?Y8VnyefcRUK62N=6txWSfHBvSVU?BfX(#%s?nz0TzP+a@QJTt>7d$hvGVwC0|dF7-OO9^+duAJ%{734 zZyj4zJ-74v6sdxcO zT>Hzk?g5dD5KH}N6vdJLNo`|e6O{T)>}d*P;Dr3%G+z;!3Htk|oJWHt7jkTSj^zt_ z&8JdpK4_+WBJmO^0*O2z<(ol9#pGqwi?HW__uZbVxUtw9s4L$R&OgUv>B_M;i-+hh z{843s(lOz49FPN)kyFzHRP)Ke>#Vn%(47R)X6bKoNPnc0SgUclf*~ z?Da6Bk=e#$??7Hv>&U*m%*>X-p`q$%9+Rdy3FCwVjac{1AvDsG@WDTrx+NlW(xHuVh^+_0mNzWb1sQr#%r3)3)AYqSlasVX zX8L7GEq>BoCTvFp=85-88Y7GQ&jP#VB|;venmnaEW;+I=Dz4Wa{>fwbGmL8pQ^B4e zQX&@dUuzC@dwT#j7X;mMo23H3nmX+Fy#AWv|5Mj>Mm4c@?JFVzUI7V3I!IFp0t%5L zMQIX3DAIdZLNgSRj&c#{J+vU5&^staiV%uO?;t9jP$dw0`G)(x%UAEZ-<7=rsfK8`bP=AOcG(=e8e|WjSrS}_??u$pH zZPSR{CWGwCfbGdMADRDcSNysq&n8ya5strs<$!?hbfACQ+`hOP#~f745ZhiLN(u>7 z&jtf4QP4{@lz_{@5kTn>L3UhudGV1zZ<*J(N5DllKn0Ny2?l(^`? zc(Zy`@Kl283IAQ`WJjGMJ@a!u<|s8EzTSd=5{1YrlRmwjB_6&ANuFy%t$}uGP~dLQ zQj-sp)fhq>=)D5A{cXs3Jd3M$WzVF3w4&Ga^6bX;@GUk*{mq^ADf$5($M@$M;=hFQ zY2ty1T935|X5b4?@34)1zxgU%`~Y`Bj}5RYkGFG|tN&9_2ZCfJT6raVK)qZ)^SZ?4 z@MKj2C~@J^at41JN8|81{Q3h}iiLkJ{8z!U><}Wn?BEw{sA%#~+XDH5q`vWE>jJyN zJ69l|z(cG#Z>78#>Hmu3Ze*c6eJShC^_|F2f0eOmzUJ(N-md7JR>H$?)=POW@AXV; zPkKv0>^gm+@yb7FQGlpwPqc{c02*8Wz!uPr>X-Zih&jSjCrQ5P^rTW+`dhQX7?2k= zB}HcX{;9?hXzzxUTclU|xfbL(*~@c#LPS%>NtZ-LMd9+d+OfRGPBr_(C}2b5LD%$o z;z{>9)@kHZydu(dmckcp|K^`){D;IMyqYf(J?%7xIuJK}4tJCn)%|DmEJwNQAkX0< z{hy7cfKs_Es%07z0K3vSwn`7cN$NMeC^rNdNoxcD$Mhqr{|-*$JKUW&m%5BW~lpar!F-REc zU&(qF1+@sZ}>T5{_mJ&m52lii&x`M&d#PJE7u{; zoPP%Nq-JsLCtX;h`Es5lv>2=4WNl^Fv1!JGZalT5rLFfJKNT}~%X~iX zKa#^&0!`P-ZdoLLnMt1+%X7na+eiZ5h_1afbd%s{1DfV+Rc>c2mm6RtIzw+8O&jD&=) zHeOxq-WPxO@0WVJ5#7(%aw6avPoSUSgX?7nD{ln)iDAwE(i_A6C{4&GkcU=H=ZuzE z2lXV(xHy7@F8oX1KO}xO4kk1Xpd3pF+Nm}3N93O zIWf0eTLy)D1^qvQNdUoAHrq+8L4(tx)&pu@?w@MLeiQf0r;SugZ>W5&l#yF$Th~|% z7ZA7o^f4U5B4ArTojdJFpTXtn@bB-+^JA6tMZZC7Bxa5)b9Aj*9z;20}{H(YGv$ay9b3{cWHf%~N>{%SXxptx1W+ zB8_HaYYAw3T^#tlU{*g?zq81>NG5C2RRUk}&d_v%F8vnhs~F-dZo52-34svzP|OOr z3}#*PHM_ZDgYGlxd;>upO^Gi~zmFvI>2Rg>es5|eH8OeVv%jxr>JuWpe7Xd*JsQsS zRtZ+8T$DmYYd zuk=e)>Da1*Z^`$vsNRpM)o#W?w`U*fRxZw}cDcK_#J;Y`?^K{zh%&y7zYp${ZnFPz ze9$zA!zmj{@!<>kOjU1C-kDr4va6p~HmTuU>Y|V8d^chL(PYiBy5J^e`2?>cWBtja z{q~yuJC7B~Z&EO6RE(d*htMZj$<((ig6&X@<8pI}{oO+ZR6znla<<5o*9rF6Tutj+ z;&2yw(OtSMSeM6dEu(veh)=V{z@o9qK2|4$j;3I3Lk9@7bwDVbl5&VZaQkb*{;g-- zbe+sH=Iz_)KDPqRcVnt@*hs}JF(kT&KDYtPi*x0J zrw-$u4|!I?-S4^MHgdF8yYd zJyvSEnr-Zs@Ii8f@W@%M4{kjcEoO=dy;6=^E13*?9@>}{>;0lQ+bh2+@S!OrJb!Zw zS>^B{;L3>`cadFjPCz;Pico5w7Ja#CX_j?Fcz>(r(2&G?`nI zAxr(5?Z)YL^(mm-aEPX0RuWlH|-zXQ$h0 zIJUE6aVfv8%}%xskNs6O>W7k+E`{mF2B9(kI|fK z#&e8Rydcp6lg3U!L7{Q6ONvUvzLJc3%46MG+sJ+p4ObB4nqmZQSpaV#XUo+=%}`C- zzCi1-_cGvO{jl1J@FAD;h#F3J6HM7n%Uw}99@WB8%DoYT@m2I0=63eWZGuwF6i4fK zOOqe9z~!Gg70H@{h%##UC%$YfT9SojZ4R<>3x4i~Sa8vl3%CVe4=}jfd%c~QZby@( zUXBteOk!)vjb}@WQ}VOdL2U1)3wZ6Mj&^=>5Qt!GD-ZXch@H|W&%XDUh7wt@aF-Zw zx{P18PPr;9TQwML^#)dZd+xSU-B=4kH-3bg2iLeZhvAF7m&-3P{V4+C2zF>edv0yD>x^#5I<6?_BELN)9 z{7KqY%k$W+rd+?9Y{^38TpjR*hZ_6%CKJ`7dm2z-?1pOUR(eOd*Xog5EiER=1{v zU-l=69)!$YU|G8>%po`hW7oec%o5pw98{FxeK?;1uF!+ldb39H@`>?}u3AeB{Ur+D zcO{YcgpG5(8xKNfrtMhpyIIc$Y-fyqmM*;;o5^;R2-a|)=O{$4(Hee~^@*#iPSCanaJcUS#`Pwx#w4^5 zTFkv%)vqBD$@RoSUE4Gt%McW9#-oGQ-@}@-WBT;qDZwlEqpxZo1u}E!&a5rFG4NiF zx#er2ubukrSY3(Pa3}H^B5za8$!SQ_=VC8dtboK~RAY)_-bh?W=EcN`LPWL!JStvn zSCf7z5v`E=Rm*dsNXv($f{9K{Z?RzPp?L)7&GxWQU8sv)QZiDHeQIzz6q%?*65e=B zsk#!G7R^b#G6%dp!isXJW&cr4mj=5U`BvcqjG|nBvWUJy&Jr4XPt|Q+whlIB$D>kT z)e_yprTQnn?ZR~rCT(!-s6HV=j?yKls4+)Xn(;F6t-%xz|bmW+zo_BlzCN2map^sBD$F|TcxfMD&QG=9Z%-g zVVhwIHW2di{i5H7cMK`)|>Uz{_@lWX=4!L*U|;0M-R?!gF{ zs1Esu2sRoB1$sLBsr#`t33)T*rd zG~(DxqL(YoKDwEet%^|G z;j*Fxd7NUC70aPI15x#2{nb&6e%6)522Dy$zl^NvkNQK)Iy7+45Ydm()Kf^Xg^?Y) zg_Bu-ShvQuO0hAyS7}$Vl>JAV!()cSKji3wXgjDT8%*;2R~RIsQxtAh(mBliMZwEK z$c*eyur;J@-7SN&c~MMe_Ec^ZrA-T6o8YG6^?DywJ|saL4fCwL>DDJSs~juD?0a@$ YJjA*8t*6BFAHbok0GBU$X!h#=05p90{Qv*} literal 0 HcmV?d00001 diff --git a/docs/source/assets/deployment/anything-llm-provider.png b/docs/source/assets/deployment/anything-llm-provider.png new file mode 100644 index 0000000000000000000000000000000000000000..bb699f7571f4034f4c26f42c96df213017d0eb9e GIT binary patch literal 112470 zcmZ_01yCGYv;`V0Sa1s=NJwxC5Zp-!!QI`1yIX(|g1Zjx?#|#A21{_a!QFk}ci#W6 z>fd{LRYTPjXQun~>9hA6(B1C@=BxBZa(!t*N<{$+Ks)(aupKQt%4+fuj>YeNfoAx6SS1 zp;Mt6H)}r3?Wy!|>6)5uHDZ`cu8h+sLlV{1Q$z|vUwV7UMTdXVwgT>_@nLEO(?Z_XX<$3+wp$DDT&)~(IHIy#pSi|Xd*O^mplVsl9f^^NLJ^^_W_*;4= z+w0`E8We3EyTu7#P6iHROGo}Sds~cSQtpwG=hx!jHq)Ts>DL7sNX_Km2K#7?wTxZA zBb++{3(jq#E@3Jo^Nb#NkMayI$ov^1@D2|6djnc@3jj|TEL z{~?J3hlBmp>r3LZkkk+YhLmRKy%b+p?uEG9i)lSBpATQ7+}ESUhju4R7WJ9~>C&E> z7cg1jRqR6fEdSMt{lY&vG7UD1xs7zMN9a`$KM6nas;2xFjq%0`GULW|HI%|J*d%G9 z62lh!ZEeC->Da#YLdQVyvECia|7 zmiN=6$+nwiQnu7UE|LVo=?#ndp7;ib9kd#YQ^T67EkmP(-08M+dKLPowxwTLu<+t;-mw1$iFC?tWa%H{ zQgG+m=0@6Xl8f_R(yRHY57br$MprDl=MBWHX5~%CqfMs6Kh~SyFxu$0tl(Dw>niDJ{N<@EwT&L zmwb(<=R^#{X0cOTDGRlLnUAljR1aj<2(j(su8_Vt|Zl#O)EKNIiGs7Qn0yk0A6BAl#xSR zso_`@XZ(k9t-b^`W<`7GS$4-887ME0Ul{E2SDuX0(iCYP%IVIm_$#Bmr;`kjVPK!{ zji~cisa0|u=*EvAyPLw6ba-)=5v5k?-19$72Snw&tZd6HE9Zh^+6lL4om`5ebCf!q zLtKWm5=>K<3PqKy2DPo@vpZ&NO5O~VWP}_Hd-c+5PZ3ZAg@3g}tMyBEmEd_gZPHY_ zZG5g-(40)IwiRQ)FQ_Z0h@(>PQSVR@O&n2&!`Pl@I3PzR9 zt7#@n$ozFnqZvVhv6cH-Y#OH5!7jMdqYJ}$lP(`qj_NwHOs92fhPxj%(7T+UZ_-sV zxn-%e>OiW1KOZyk$!v$u;)m+n!ytKC%dF^`kD|Y?b>DeCIih3+LgNVuyp(~o2zMcT zHY=B&H43WE*BxeKP6%fa0xz^lnj57Bq%LJ;T|5*S?lD=HlxwmE8@|>4@}r$yb;+yR zFdl@-AMf&=u3G{m`TILl)wrLMZd_NdbqHk8@R5 zuB>o6@STI%3DSNE36AybS|S)a@~s$!gzvDC%(!Kx-I9c+d>T7hk<53vQQRC`k{S*a zNJtsa(6aKWKCwUKedEzr{At6uTEJ*ym`^G(ne~jQai;^%Tq}v*>)b!^M{SYu%C_&& zw)r41p&$ZgUHk5~KjVqSWBDK2y)6~cd9ZPhX2?0_JQwdLX~{KDdVR1 zo)j*~-o)_Mrc}?RLZuJP#q3x@a>`Mv(|1lxwj80aQ`#_BS9;^Ho08=4wH4z;)GJj| zHLGUOB@j=G3z*|8KJ%M8$4$`D<`4&ZDQIDZ{)OgTI^fo#Pp1=*jt z!%nL|(l^cKw525Q9|4i9k|zr0&6}f6EFvgD7axsM=~#2$n*xPCBw5O8wROF2kug#7 z9WPa6FLhXl5-<#ZqZ%C6#OF1K0Oe&!Odv67T&se$UtLB6cHsCyMSA>Vpob&>u~5a- z`c()>-C1f16fVca#(;I*UuwvE{Qnh1)qu+bNfH=0?6Ma!8S)gW5zfMC^j>jrl${UR zWV{Q~w~v*y|JY1k=5Mx<622)CgK( zc3ZE8F&M!ocUd0xiN3ljU)(3l;&8w-$g2lU^*xZ2;p(-nfwg0HxN87`Y#d@*z^*cb zwN9>s3MCP_^1!`OtD|6Y)bt{K>SbPqqjqlP>5h{+mD*%!;NGw`f&;#Yg7cQER?#aC zO9&!WJAS<0Q`kVifvt$>I8;NIU@LdPDT$L!ZJ3KZPBD zxxL%UX?^I?6y3?p=z zL8Ok;AKwJu1nj&$wJy%3INhk_Q`d9C^4F4@4dI##V|;r@ApN=_JyuZASKA*^Xl-v` z{>ZNW`rOC%z25l=+UDR!x`v>h)*)MxkeOv=`hrvNl*5@>!f0Y5(^0Ay50p10v7(d3 zaW2tX(AQ`ElIxFqeCo&d?Y0^tZ0>>?oVE|rXGo)1dKSAT$J}}So=O!~0kE4S-L~t8 z^#j|+vXh;&{koQ%J(={;#MlPl!yA{&iOE*UT6(58Udi_T12(yJ15|B7O&2ty@k$Pd z&c6y|(}ddE3cXJH8F*Vqw-P%&95q!MZyff9&$*H2;~g_Xjw)Dq{s8k$+tc&%(6i9h ztrBqQC0A=dzgyc2wRNpb2j6(10$-~~I=fL0PPB*4b9Ow+roL^YDbV4W&RN)lhkapg z6XHSjZVwo|zVAG5^KGIV8%zWVf)g5A>G)PcewA^}wAt;r`y!v2^zjvx`mY}_e|2vr z*_Z1)898uGlBjGuZo9URXI~1~Ijd*U7PV=e{)|C9@KAb>SwTR{-ouQVLMhbnppEk z*Gy{GxAUy~NUAM2{o1EMHe-;<*0;MU0QPHmODQjLxnQK_#pdWNNhyo%?Que>`+P`E zlg~|b8bJ3hX&$a`@AMkw2DlO3#kR1f5*ya_AHWyu0&Q*I*KfRvdj=Qo2j+>G-?(|P z^Xc+jAVwEVzNAQ;8OG%ucn13!fc zfuj$=r&RwR!@%56^9#R}kUgCVe)$7O*AP}Be0rFc{%;y0FrR$;bQ;pY24WfgD}?gt zVKFfv&ty}Uc{~aDK2e|pM>mF((Rg||vk>KryOMIQO61cw##jV6I%6hX=+nda1i)ay&m$)P5(H9 zl^EhwnU_t19uV}|lB8Z6NB7XgX3P|R(n(!)R&sz*~sF~A;i&aX7QiUFC1@n+> z{ncw*6|Mgj99r~#kSW}P!w9|<$#{C6qi-3$DW(Y;k$(4*!OBbeKDDir>MEt`tmVFl zj~W&Ta3>A+Moxufr>Zp~(ySWvI=rrIOk+^fD&qm%Cbb@gNh};d-l%LeZx&}SXj}!u zyr)WIX65T7{~c7Kh`R^Xp+&l_&ErDSX>2^pgxpk$?vBTu$eD}a0A^1GCcgfTK3-wRHm zG_N=VmVU!K=otQ8O4PNmm?|`Q__8WneYNBX`iRNX zQ+dCUsJAvlAm*#tU}G&0sk53L8%kpBzCRlkik@7fcAP6J&+79Sir+c5k2x(*J%}wNgtVNv`A{? z^1hUnyvUzYcSpw^s>CGMW$$S(v$Bwwi=FM9NKQ3vpQa)M0s%eW3l+ub8m|vr22-)J z{C`q?A8sOGKXKj;E&D!_CKFQ4BVh5bFd8*qZR7AgtoAq`IQ=2gam(Tvi9o`)h!nU? zBEeEO3(V7P_1KKyJA99FBykQnk*%$ul-|@kz1`i(J zH+3ckLm4YQep(gautY;h7^&JeqUHK!aF#W z#54Yagp)D(lG>t`&*mi4U!+PDr=Dbo4l1CI(_qof28f7!ok!X zd>BWZZiL6>?9c|i8Vg$>)qJ}6?#4{}{-=hcS|anvexb|3+z8V4Hw4W0Tj`$Von31n z+csA3+w)+hg-`K`RwR%d(y~GP4O}UtLCm$IeM1CHQtt_c&3HAva1$D)5K`@$pLx|w z^l29AiDZo0b{Lxa1t0n>kdd&>>D4!4Fu-f20bTZH;83FZfq;2c&%NE6vk^YNP?Azb zdEG2817CTFCSK{>)a}paA|i&@2}{k!>xBxY$#s%hKH5KKA`B5isi%T{WTLhY>gPvV z-;bv8agEiX2IAg)5!a0q4K3RtB4mn#MVdTA*0Ze)4{U17P#JmuoLiOlpfIDJ%yEcS z7F8TXMV0YPa!#|(qU8r0Y2$x4fh#LnRfhgi5ke32w}d>-Xp5awCR?@ha~o=FMrgQa&u&++9N>(?FQzg8gKkrJ>xg z^g2vYVuBZJ_eZ{u^ZUkSU*vPa);e9~Er-00kO7(&>Kw*S+`_an-?dzc&74TV5a+y* zI)2^>diTPbbd~cJaq-rG=NN44WgKV_3V4#~DhM;k89NUa9cb@sMYWdzx-Ht%@!klaOY}YO=}EtRPHe zHNX&|lRk*J*lyNi|D->yYYU#_DlkcqPhD&zZOZ@G=vBnUNSf=?4m^WgWw}Cz*KJYL zu|K*tah#Ym`E5w`R$0SRh|&^ve}Q={_4BJYh+@w#IP0zRmo!{LHW13#ApKww)=xvp z>}4f5k~DE#Ne)@5orZyerInI753oCsXITj$Ao#APxNO9$H9s4fKp?+$%MHi)ZkTJ( z{bN%>iwlyMt-O@^HG6pIgG;{3H=L+sH;F_&dof1c-#??@4*N~TKt_$l?x?6DpLK$x zHdOuS{*CDTm;#?2anstmZ4G>OSMqQ~@}ojirBjd0bh|$8#u~mmYCkH~IUT2o!)c^r zWx$(bjeB;sIn1u7m4|&Nzwp|<2PawuTb{Tfjm8R`X2YpEdVK;-qeKdQg-*HbAvKR`3Ug& zm=v>sNFZItLtZhB2FYoSZHP$-2mVPUw;+-0{&s$f(Ase~DHIOjc@mc3HN_YPqtFGt zZ6{tKfLGPF={;qo{rgq*UhZ0<7DK~B;Xi2{q~)Pq zG30C5+L$$T$rL9#%XkjcOI5z2e8vXBWpqH<)MhlC7g}hxG*D5RuZL@EthNtB(O>AM zPjYF1&`1o>e51tad?o!KZlZ+-_ZR_Pc4s@7HZ^nq2u?^~(&9~gxe)79sY(q_l}l#p z?d5#aCP8l^#`cBwnS6pddf@e9h>O}KbK=m7LQn$v4!sYSOn4d-C1Cf+J*eWAL z3Ta5mq&Xro3;n64q-%+gFgp{2gAhCoAzr5gG_0-_c~XffZ}fbUFBsL!$KbEALa7)l zo@0^i&;8i=Gb;3GmF~NWbXH?h)k**B9aYA+$o?LlcTdE|H{bq~BmU$|%bIGi2&%&J z^NH5o;u2{FlFaLAJzowMtR5(GK;x1z`wfZ`IpNoPyhK)57NN` zK^Ojgn@Fq#QFW9o;hf?BFdU%0JEM(ir3xYOzjUc(m%_s3v-pg{~(2+o}MpFpC`INQ53 z77~=C>Z-#x*ESM(aqO{W9GHsB^rLoHx4%bR#PqOB9aa=@zv~b5tQu@GX^%cLZ~x(BcY%*I|Wnvsh)fSp+kQ>^?Sao;{C%AYO^95&>`!(|(mH5V`) zdg)m2>~1J&4gcU{%j8h{qw~P5K3CcQ4p+oSLGt||zP*#b|HZ&ky+DwB%X;`yjMn*DZM61w z#NDUC^*O&g6@!V>XLNB9L$fNZ_Q^^ED;-YnB}L~ao15i z9{jb*QbklTOdFFO2x-IeA-;%+RGzPD8QUoTU8zPp-4tp_tuLu(Dd>hlow>@g?Il~! z@EpMcnRZa#e~=GwPwVmF0Y-QRP!tfzrg>nZOlunEK0~WQEuH!`y;8}Lz0Ub}vA?%b ze*{N`h9z+vYU4@fPgFD;(dHz{zTQx#Dw)yW-J^__ zNswwRx~PVL#1cp)S>ZXMs?X8$*ZR_SI}os>OC*Bk>A(T~5z^;r=6|+kPqB%Nevy65 zr@%rBVc9i?I6q~!u);u;%sqV`N@VJ8_#;14Kag#HWc$M~y^Z|TcO(;(am`07vrluK z&DsWKP$ZMiR366#X-PyvO!N?9?XsjMw80E}XkTC~H+o#rl=pVDI5~$*nW3Kxz-xQy z!adISceESi9o(`6GUk$#O=pM;lQ2RqJv({V0nX=YqdT$atfX(;zRZ%G3>i(Dn9_V> z5dFcR3$ekoI{ZkqRY2sJ+JI9#mAl)$3K@(NCwjAhk4D}B+MdH4Ho+z@%)3up(}qJ18r#JN zEapw)ETzhD3*<5c^Skn&4XoD2$?2`Yx3}-J=Iz^IZ`idFvzw);9h`5fpJQ)p{-O~qqDvh=}l{^|K&9)xf5}3JkhQ`xzwrR zb5%~WRQn%}7lzMi;TlFhm8oqCrEJe#$N9Qw>N{O10m6@?stB}*{wJ6EhGHz9 z*_5`AOw4h57luw7oxpWYsi2+WU@85}ZJjQWzqOQg9kcU{&mTFh^%cZ`0= z1Rc=UV^-TJ930^9+^LJ<`KrG;(ASwy+6A{cgHpcg%q1PrEy$jV*JT_i#EjTeET77g z=Ri*9To2Zp=SMZ%kY9PbJN)yLnYv8ksR}uc7|s#T|op z1I^hup2)5r{HZgX#w0e~O5SzzA~Vpye{MhSLO1=Mwu-(H{Z#(=l&Sw$w|xJ4oF2)N zVy*1GBfgCo0y3skjj5PMajuF!4s}=a_FmS<#feSnUY~Uw@ucRdc#C9bmhSN4Wsq(2 z&ghvDtIGF9g{Pb}m9yBiRjjt0B>onKfWGL5BR>R!|5)VzuVN(m((k?p92|+F-Nl=~ z`Ls6NG4;0%ROXFX`1o(>FL@d5g3{UuK3!;@{IxUcXj*2SO5 z$v9*YKV?>;fbShx0IJ)PgoM5cVjtz_t4^`5pX~!*E%D_m7mWdCv8>H>&%;pZ={8ta zzEmQ!jb6JCO#13jfO5R*1fYuSX}r!3cP*|*Hn$g^?9A4cBP$V9QVB`XZ^cspIQ&~W zN{K4RR)(+N4Itm3*J2V-|5K=rA_lI34VpIrT%i_< zUjQcWFXdw8fl`ectu8X)yTkic{bV+e&CZs1`IfWQdFFGq!SlXXi>}GGZNxT!OD%@R z8*sM`8$}9OKoW+V&SO|g!KhQj%$e5noSVBPkCtTK=d9yp^LVR^HxD``Q;nu9Niel$C+Fe+_Z&cL!WD$!sR4=XM?68jM0f{CsaY zt!64aOVz8oVAczbtO;xRAe#>bX)YG~t|DdHcAZG9y~BV;TuQk`ayBj}M4)^^|F7l) zh16f^9aVPr@4lhH@dG}ck}lBFe{eXqA*%!|e4}-dk90c!><62L3D`mNo{j1DZ-Xkv zf>Z}dtJX|aO#b5zb%m`|$8tv+kf06(^b-G{mm%Mi zo*qbGx?)q)ws&vi1^d7}9RG-s8ky97tt!wun!6BWcizQkw^>kkoHa?DZRB1CCA0h1 zpk;iv@!c8A8}7yu`uaKSdgT7$j0Y;&5@^`V6hW z3G(JloZNfmwDo3_WOJt=gAI>5j4PH#E-XHDPyJ}Ab*Lv;C`ITY$=c`cfT$$y7Sizx zT~#WUTE=c;!Pi&qY$Nfbc8l3)`p&KWg8k#EgwbH@rG>%mrT60G#vs*ly62`{2vscu zZPAjuS!Nx5%I4gA?_1^Z_D8Mfd7yu$1xZS{UNBv-xR^EIyPL5>y$3X9R;p)yzTy$D zO_tnpDlj~>As6Ncb9MRJO%56tEH6V%FLo!(!+v0p%yo3BEG#d&x&}`w)md-^bZr3l zlM}ePp_AfZARu;=9`69=1c+A2j}KQC0iuCy{%gK?!aR!(zuEzMaP_I$oY2?anw5}^4`0G7x3 zyh-1G{J|ce752ZMsv#bCSvzs=yWzu%y=6hW19$uYp43-fLJ&qmH zpuI4yUc(wB#V6POfpOPY(U(B`(+75;#y79xYWe3Bq|=74v``64u`I8X1c`^KfxtsZC?dVY=trBp5yGLHsQzTseASrjlQBsnILUpkqow+N2NReYR9TT5 zmtB`MI#<@dE4~PQ`@Lwo>V;@u1gWJg_v*Wb5nln%%lCi|Ug3V&wDmt79zdQVk&bNhe4!OLyn2M->Zptr2Y!VN9VF(TJ9=WUbL z{Gok@mmyKx!F=Hela#l|$H;lz2)CEhMswElsC77r)u;<}q4f_@JS6z7c%`0^KI z_sNbel-__?yT7bbRhq`Pybq7#y5o?TwrG(}%(gBSpn@T~pHYzR!JCjUTl{bhm8$^= z&kX0kGQY%|s0Ta;3G=GIx0(VT>B}>*&Vz>Kgo2(EX#f06dLBQk*#T(@g`Fi|p*hk4 zqM5oqm`~lWL}>rx*Q5u~Y#fTqefuLULutk7aACH$UZm!ak7i!FGflB4=xRutxN5ta)g516*dLSjn48RSxDOxr=`}G-aYpC@V@fcOUm;&e6 zB#)bj4KP6Dt@Z>$rOn_qOsMf&bdz zmm7sUX(iHqADSAe~ImM3eYZ+eQY^yQyKg(_S) zn$|WcA-)lD=v~)G9(MQZSrN%s*mdHF^{VhgUwT5>Lr~VD|F+<4rn1W7`+-i_*e>sa zKW@Yy+=c$zX6Kzz!&-yxKD)~>A9jYxtf2aaajSNl6fRpruDf_OS*j2Lywbu<$Qylj zb{hepMC9wXrWszry<|HZm*HA`UCMa?xa*Jh)}|L+=F3K4A)c82h8Tl1rnRSIt)5sd znh?6R`+c?jw+A1wV52fMhvOD>|A1vwfx>@HTboA*6cTds6Z(|om~P4MLR|p2ZyW`@ zbj47pr-DX8txZk)9xN9XPZB$jsen!f#K{VzQg+}OgTZN5rk@V-D-AZS85-5b!Jsy9 zChb{trcMiN7--aDsPYVa_XH$EYi(GOzX%P3_}xEJHU%mCuvV?zz~}IL!L)UIb2J%u zp!TX1G@jI*I481zh7LOu&D(0(=7WGbpLva{U(_-9LDhZ(67MKtDKiOmUEE!1#YRbeUb#Y7z%+f-iVmpBu8qUE{2OO937$< zFN9x4Ybn2(`MsAH4xKlSM_mFxodr&9*6~}@UIg48;5zAEQ zk}6m3#3~?-PuT;{)0`6y@6rRYFBhzt|&I{Gzn~H`3XcY!@DiMt=4}+ zslW5hJVssDa)%Zm6cl+SC-%y$l7E;4>~><`&Pb3jMX`;Vl5Fv}HfeFfG42DFiw)kL(MP71{@wpP{QW|}v z;(t~?UAQfd3$S>&GU?Ws%KT~2<^ZgH_zZBqOLu$1Rh*yy^t`O?HntsZq>A^?o| z)5*_dXoZY~N?ivqzwlV7t#gjBjMVrOnhIHsN9u>YgW@iX)`b~WH=#mH zoOXIDUL(1aewU+$vJJ7RXm#&2CHnKbN_pr>N#5b=NsNDe zg9ZOnJAdq3-aCK?;T&(Zwqsh!E&`XOHO$oDG;-Ge@w90x-3YM87&z|A8f)<|cP295 zep;3+-me{x*aV-A47;AW`9V!a2CMoX2DP14Xdc;DPs7+{xq@Ch*mc1I>!AMVBV2s@ zl}3K}dU=xZjSgK#6gq_VfyOt`zjU)<$s?3!CpWg5(co-UHT6aINC7}GKqF~$T8f z@2f@2zvcs1g1PFIkFSc%oiOb|0G!N-?r9ki48dFybzCIWy+m}8iE97^E>T+k z$%rO_qBf?uENeaR5)Vn4El{nQAm}K#(`%JCWKU52_m*;Hq}G{7zL5}#Dw6x5snX+A z5)H65d=*Yu?6uwsUugVI{5~!`zN9m$+zTFi)~fc7CCS|O*~vn_ZCYDb*i|WFE)OM$7A? z_D356lKyKQt@YDTmbUlMK3L`K#lc^J@M|R2Vl-Y;xelv zYooapMXSct%TJ&R66Q2&U$50=lK7lyRh_nTDCN{G$JIUtln5&y#QOC!0G&j#^BvU2 zf)w_E{vQ``Nu(L&`I8fr%w^j)zx8PO$50L^-32Wy3y1;KkLNq%GC`Q)ARv|SBtBbv zA#lC&G2djcbtsr7{p)qH=$s$IGON&i)Sa;Jf_;B)=@&AuW4JRw8-4{pcvk5GyO`D( z%x-30*0OCqz*nz*qmrEaMhLbHbe>=6?J0O&Z2uX*C<5xej;Eajh}XH;(zn`&%XVoY z)d4b)4k{B}782d0 zJNqL@_vWPU0Pl&btCcUHEsO&-xq+-?ru?;?FkkmeS{>ek$Wg(&QfnjGZ#HAr?XXK( zk)G~9w-Pcy|J&_ETfW0Y z1*3&4lkMMGfP!SJy}T{tb$4}S>vM1KUvv9)qA+?7Ib>SU`*y~jbu^77h}=hA+P+7< zmlq0n2soX0Cs@^V-2*{yVxd8lL?>HE6Gh`MwkX{R8P!+%uGV@(7a#1}K1~OFdZ8F@ zUXF1LXfX|oH>CHlZY=2ANZ4GRrOusFnjEu2!eg0s)7U=HEqQLTlBnlnG8&061f0S` zwe)AY3H~5{iD;Avl5vl~_%C6nI|I>}70*5AO_VDD2X519CF` zQjd0?&jtMJB6<%7Y-%FD47{|02lXudIp0T^k-LH*64k8FNI{0r?d+mfjE?2kk0W&{ zmSX1JH=)(m6NL(!Alo1P#W`UBi%$mf6xUR`zTOP^>mZ=bA?3DYfACj88;W|P*L>O} zYoE3~ci~g=oqGmQh4b?7wSnyWMXqYn02XJ-=WrIb^5+ zY%i!h2;AvbRp1$bkSa5_y( z^+_GKMOkS>i!U=C@B7o(`C~JU?{D|A%8QMs1(p+5s)5OF0yvf(;rR=vQI6nK_9fg! zazbY9xMTOBX#(fgRc+IuW2^YXnlP5glP=h7OB#R@7sH7TsQ`moq$k-)A11(ZtONlT zfAt!#t>XJ=lkxP!rz`vNQ3a#r297`MFQ7?xAEws}w|bm147>z12{HY=G?wsXW0i~l zf-rn?I5K&+_aEK60uKNB0(ISHM*^_9mWg!T{s=5IST?kK&vO6fAapb$J9PU6An0@s z=+Sn4gYEKCKF_bELLpa{e*R+^(EJYHqcF+vzSKB`10~wy;No3{s0)hnm9eU=xt_-An$~* z)-YausPulE5U9;|TkS+xH3&lXuV{am^E&@S5nH-^oaf6=0jRR%ci%Xd=o-F~#}o@R z1{{DL1zf{EM+^yk2Bd+kJG9YIug3=jZH?m+RVJE|e%5{`yO08)%Vtad0-2ji_UM5Iv;RMjnF}v!N*-IHq$P;gr1lCCVHxEt}J!@6BNO7eXO7DRht~B1`nCo z-H!EZrgc3xSVOn`io`+*^8wa2<*~_yvXIWzYPn$5CuG^~k zbBx=W7vzHc4lfWj?7(gcFd)ef^BsYq_y)9yIJ#q40L!~t89G^|0=d|Mqdys?dAHx6 zhLSjzjwf^m2suIz0io0h`tmRgE8?VXTK8i$ztMtq?c;E|z#1W?APmumdNZ^2P3pF7;YEQ2f4jQT_2`+peq*hUcd zGpDl5UQmRH@EWB!zU)P0X__FtPurdue^B0vJu<2vY=3;1*OB9O+SWIE$-zl$qEOGQ zUin@wI*eqb2%yF-L`1q?#5wQ9<(Ga7>G~T$1~I!DsgEiRx)(2k&$ZuX_tg(p?g+(g z#ocruT(v;YftW>d?VpMzWx?$vQTw}`!_pwm;R_rj43@3#Am6=11X^z#h$yifIq}D1 zkvARo!ZD-_jC6DJp78g-2J6s}aub#Bcx(asYaf|AtS~LI{CqAHH^dUbD0okd`+>+L zGfZkC@tFJ5o49T8N0IM}kU>E6J3I{b;$G=S@~xC|A(h_ZMjA!eQ=4D|!oH>Lmd#6x zIHT0DOxQeJajm$d+~Ea}i!Y$v4G|>QR!Ta-R$j#oa2%I4+_reo$fa8WHkK_Y&eq5L ztF%JT>?1Q}(tW7l!%2Szci_8nn}@&QPwq0aI&yFdXt?%QBJq<^@a1o2LFT{=Ja2L(f$nSGT1I~OJoJjM@r-}T zDTo{{j^wEIDn0pwd5m|F_p~>U4S)l##<~Jtv+Sc2G1n%%h7JMYwb@2HCUt5Lg~DMW zy$BimlDdd95IG;~|jMKTw zBWAt31?->;QWt=eVtcn|tOI=txz-wXi*cD}8MYU&9OR6o_g&ZPgzBoJ6J4CwWQ-7> z3+#nO$~&Y{&41DpJ2kgTpjYc@{|Fd17YC)QeR7Tgr(gW)Aa4Q{EtZ>lH;$%^`TzbF z040LNWO}O6c^ltnF!zBCH212K=KY3JsT@(`FkikSr0j9 z9_4XQIbvB(PzV)-9`9@T^xRf^MfHIn47lt)6E`#I?xs>@*+JQ&B{Vb;!Re`5z4y64 zv9WoLII5C?MfEv*PXs@}sqX`eG;NpxoY5o5QZm~}?r)J_6*N5~F`-q(1B1uO@Iw{+ zDfJ<7MK=3D-)WZrW?kvBx1oV=`OBz!9^<9G;HiJmG3AfIKEOe|c`M6X8ukB1Z~Nwy z=rFn@Q6C*k1lN*V!18{=08rE&M&}1)Ss{aig$g-~&fhJWrnTeB1eqX2zCQeS`;PTX z1c%)3_~Zr#`=9lZzTj>E8fEUTq^?K>Rm%lN$@7~MBk&V?$UImtS-_uetA?5q-{x^L zITFT27qaA{M11TssXoILW0JHQ(fEGX3pj}vnjcvvo!_TH*GJLkVx_bDWV81ys~sBE z9EQM1?-GpfH#MJG;$!OK>Ikv5K6iN3Y;g&19T|1G;(&~9w#DwJzf?iKSYIG&b0#b+ zb=3R9QNF4P!DPb6$8^@8SB0bKTYzAsyK^Y!r7se*A?M<&zwlWg@}?PA z$M)U_|9FRg@cMk=t()meI-D9KKJY^h?q{n(2JA>Y_U$seZYWiL*Y8(%T}Ac9qbWvaNsLR)7s=`h9aGjCpyW?DvaX%~XeNuKR2a7W z;_|+gt6CO0ble&VPV?0cDsL{1RocwtUR^kyo>JfZEt$q!H_Y1q+9)2h+-UDV_B`-H z@NS>f(db-bj7PWgEMd?1*rV^7QLinPWrThmI=To1@SsL@9TP*TQ6C2cErLmv@^~!=w{*Pe@DFnz1lTXTDckO6Z#4k4hhATftXX0BCRWUfTT)`+3dcrgzc;dnwnOcHRjk07u?cXD1BwDcTJ=>TPg=71LT2 zF1~85V$ZC_aha6S@o|ahzHs|Wiz*7*o#TX=z*3riZ>hO`-T2h?XdAY@Vo_MkLhN zv^Gm#XEIm48AS4A{@LG=y~%oRvJa@V)0fi^+0xjWmMAXvj$Ym2{%Q*Qq!Sc#0b=2P z$m$``f1ICf7IBv-5D>JcmNM-VS?w95VKKT+6GzoMZgFZF>`hzs!@Xi_3wv{Yc^30C z5HUd`EEr&0w8cq{*8E|sxSbdC%b>0oUeDWT4G)*f7awd)iD&Sn;5+9~#X=JA$vpLB z?FOp4V^B(7f9E_~`{95n@nLFl}rn-&;yVt$s+2AzwF?VC3Ly=Ju_kt>Q>Wb-)ySNAAo@T?+Xj5h< zznAS7XHv#(DE=Wrdro-;?K5;yAO}}Uuij=1b40nAcrMfZ6jb;jbQxSOqTZyRS2fHT zKMeHNH}F}<^JSa~op%H)?BsyoC&Ea*V~|`{o*pi|LpYFej)bgx#O`bSXpSnhKdA70 zAg&4tH~OwZ4PW8(j^N4(f>JQx!YFpZ>a~nIowpqLk>X0pU^Onoz6kJ`H#XhmgR}+n zz4oWdKp=QFZEV)JGh2bN0INT&%pZ?bZyy5c1Vp+wEiFicAks(*($d`>($X#69ljSc&&+R}@&DyLj{RXL-1n8| zxz4rL#Sda0XS&|2rM}}@#|^A0s-9LIW(K(+>3Qploy?-M@29Y&vC;24@uqWcJ1epb z#HJ#?^VH+OqxW=lo`U&afgXHTYb?VLs^E>DUtfd>m0&cn`p0u;i?6I=Zuw8|SSs+r z?hAEXc2lQr>+vDtZ;{4cTn6XW2`UC}33&t>Z2MSMe%a6yxkC=vcm zx!r5%lrR#EtPzcEy&oi6!QF#@ky8JPW;z~sW)P1#7LJbsYp|u(5_Y61PAjqMCx+4o zggAp5IS=Xy1lsyvMwUmks}14Wml zXhd=6&TzcyYnlODL!=kJ=~69x^RiUNP3wl-qh5?TKh2Vz;!xtG3#e_JVk`>~dJzj1 zLCRiIbGiAm#>>v*_9!*K0L!3SH5+auaH8>Y(*keZN9fBOoS#?55YO3+L-IG@D?uIn zJ>!nhuS{qoGp?f*R)j9rM&j=aG5i9ITUnMg?fQHlp*AGFoOqB*Rn<56A-l2rk4<9o zp#n>I*3V_(xp(tx@m}t>337I#JmKtRmWu;z_~J+ofJRfjjLAS^ z46ISviFmCDB{V(-0EhiHh)8MhMyR$W){ZE)-YG7}=MC_FTkPmM&of&#$1LYy$Sd?q zAfXnBRnlNZ3QrcBn0R@?HWmNlGOC~5cB*uB_q*cF)SOFKB8YPe{K|tV#;T~em?q9} z>D75$p7u-u9UNOt{IY#uFQ&B+U>7yidS%vFOU#mftNvpj;HGeoJJ_&oe>gW^;`KD` zkiaZy7w3Z}cCyDI?JiGyS@_TW)g$T`*-I%jsq-X6S6VfX3%gemwJ6_y*OqE#S0-Ff%zf$4eW{90Yt zBtq%hxTjB1kq#M#O~asH{lpWFV*Q3zk?GGbVl;4&Y^DqXS*DI#Ft4mMs>qE8db|7L z{S!_D(o1flwK^{y^*m3;I7m8F0}~Wx@dhmwO18IpenI&8W;Xs{fHz?TV=>((4Em*3c zoLg|_wsz#Zl1k^7J@E^*yPHb|Tp0xjQi|^17-Nc-s^o2T#jDmZTB&8g=4Nq`GY+}U z{S@vfEYlqkQ5_)|W9{xye~W+M4!p|%m0*0}F+BbKY0=caYK?LL+VsB>BWbW-UkRhM zJ9|)|qx9`!i})b0&bEHVg3N~=vuEMH`EtiAPON#30^z$1ZT&@OFN3cUV!No)g0E|- zMo$y|ygTeIjf#gC2_jPQCLA6;jAEtvNQV|)K&Mb-z+g=648F0j+{aRrUao-eK!3SG za?341LlF~pGE=%VQB{e-xqjFSaafM3ugi7WI$g>3ukNG~Fbh}1(^uvW@*T%%i#1|s zdEf(AqE<&wx=tGM;P%}9W4LWWF;mUOS`xZcY;ZenClkXl?eGa%Qrvt(_=YXPpA%N| zQK4_2=~BP};A5Osr_vbvEmZ~$Mj3qE=ehsQV!U8+fb^6_4<7AtdG{BJTQ2PxmgdJx z8Sj|v*BRG|JBR6;q;W{5Ew7quY%|1!gM)455Gi`kB}*zDH&7I`w$4oK>#kk5kmrt5 z*M(A{Uaip)6^DuUvTh03r%I5I7QZ(;HJ!1K-5R*JY&CPRp^%+DHNr@zSR!c-Vj(?h zUzZ=lOC5lyCcEgMyGn=)%l3s9XVyG4b<;~nyxJ<_R@lHR=q07E24O6LT){4HIh%(fsvdf)Q`^}){pz!Tm&+gD3;_O()eIZz zqXK5>b=Ls9Dr?(@;!$7g!egHzHdFpl9iOYBio>JyC?5$_NWITf&(DR*6#ci;mx^ji*4d{N8 z4hj6D${koFVm`5b^SYk&)pvpziZ+qJ!`n0OpX)e44{8@6+aK&>f<{f(f+B!^IicMS z=Zo7TW6|S&#(!2df9*2;rSjuO9qdo zgE)KJ<#|IMO`=$pxxAJr**I;(E+No)$P0vJVZ`k)=r=SzB&ALn{%)5EW=q$57b1Q? z1$NSPU9>D7$<>tfNmFu8J>#7!YROz(@=r1)l|wI8Wj?e$PCz@3w|1>NC$?+DJZ+g| z$66Ysm;r+ByaDkWWMr4qs<~UFTn}F7hfwP-YGAfT~NBZp07fPg$?^<6{-pi z^a>@n&n7|_8glRUU<*%ByEC?aro4J?m3Jwh>big?V0l%?Fk-IAT(7@ggSq;-8LtS` zy*W1Q(opq!xb%+J*J&y}tGxg{yvMqTVSjo=i`7g0%ca=}ik0KTo{ll{AA`H9RLg_! z9{2en2g!BbQ+;K;^z-(Lw2IDfIIEYKX%_{(R1Yi6sZsDX5@&}|jd2ny)KxfL+IFJ2 z31o&O3#~0B*$ug#i{-%D&!sS=a@M()x3DjFQGTY?ytOe#7rn*&B6;Tnl})MfAV3GP z%|k@+{ibailU1*?v`Lpy^BV$C@Wk~-GCDX+917XsGX~!0*0AyGDVr3Bo!S>teCC`g z@kK}O7IRhIQ5>aetXKXNc=|wBH>cQ?aQ!}{4Ypu?IRQ_P%5`JqlUZp4@Igt~t~)YU zVN}&cK-uIP4&R@YcJ`DX+s(a6(78+T;rC2dvUa9sTzpskuOcn==gNckerlH6LeRaE z4lj-zW@YErZej$}&)E(o0yOcqV61IgyEsne9UZHuJsQQF>#~sgQO}JK89!Vp?yG35 z*!3L%KycPCQB^@+&&u1qg783`_Nv?C6ojZ#Ge7a$ z>3xv9HioLsC_Nl0wO=Go3Gin06mxAuT4!H>u&q%jJ+3q8eLya)EaT;BpxzrWa7E}~ zCrURh;I;^dN~u}XIlMli0*m}ks5HJt41Kw-7~W4-;wz)_TyDALGKvh{B2mBCH`=3A zZ*}eVuFdig`LVs{o(ehq%4oqVGm#I))jux_@sJ3ya4J$Lqvr&F3#_Cj6a>&syqIP5 z!FqaBW7Jpl-;~@Iv1BU^J*k2)M1KYt3TTS-P_Wpz8B~XWzIO zE5+SfqEHSgEtV&A=;^btilI^D>16xBIn5#u7Ha;hGH$9B`d5o}iq5xUW1Zab+cTk$ zr(;{IQ+{|ve5M{*##Mu#er3Zt*By@Q5xpw^RZuzc+BsWf?Yf!Y@nDIPX+dC_2?!O; zwX!#5nHjgb2WoAmb*>Q&k<5+)*p*HIfO7*BdjW(aFYng`A6CnQkENxw%UCxf4Gc|79tVHQMgX zNsO`8cs{3lP?N69VWd*B_ZTTwLD2jli-^TOQ>%gY%@Rnah^Hd;3$D=7p=~CMKG?!+ zI}a+wO_43dwsd;E+EdIJjk;RX)_lh3u|2+ts~d;Rlj^%N_l8AZCi+4i%BH2c0?UB@ z7V&rXWiJBY>>l}g&t;HvMzPF$rhZBmH6Ohl5;YcxamC>@kHcf&oj&nhXnx*!n}K?z zrmh{PMQ@FC!T~3+lCPxsF4-#a{htn!WGwfW!ubxnK8@opqMxy+(1u+k_fO2Vea)N` zMa9NZoZUfq{}OVNCo7Ze8}Xp+GU{zpoDe`6A^ms7u9U>j&!;+S20V;&Y%0c=$qoCi z){BOiZu;K5DI_ImqD#vZclD-8qsqs}q``nm3|K|`-UGn@;hC4q3`rJ#yKBnonmF}t zC#CmMcxwn)5jSg_y<8~<&{PYe1>Y}R`m6Q+khi0wdQllxakaPSk>eRDrr!Yy_S?Q2 z6|cwWI3B#6@5Cyrl56;A5F&YRsTdm1+(^8I(`RY7$I}fj!4GH+eTw5C;BkT=zg8My z9E?w#H{sxWOZY8KSiwME?vmqskwh=%KVj`)WKd+(RZKO+7=y72~%nmGwQ4 zKNwq)N%tU*Guk1ldakQuPx4dTY2r$-FegRefnDtJ4j=F#zV3LlD*tH33Kr+Lemx$0)YCAQ;xHf zG>(UNiWb&lnjmG#%m!(#4~a=_Sx7J=VYOlKehzbcGI~3XYDUb@Fs~bsQ2fS&%pf0R z39O|Aju(JOD|alUc-@W_OzO=VCS5%^{XDfHF*LX`BKJ~Gps_*7J4zP z@G~qejm}!SZP_^gxi12nae(o~9~K(J1qoUf%a7BJm$R+HyYyZ4&FOSMP;Se&BjfhncdkG+ z8U$t%!_;9qyB3TuZ4=yP6yQigI~r!4r>hCF9=@6wfM+H8NDVP*1j6ez0A9SC4$4?O zlDDdBA9%s;FwVgF1qy77XP?hg+z!3yhYFM1fimn>YRM|IHnlnj;%)iz_VJYF(Tv#_ zC8h7zRu5Q7Uq8TmI&W3_%8)<#P)cJc`YnPm6M}XnGY%`~G(JZeLj)*r^czTzHn9o| zk6(=?O~T7n&h^nbd?>Mq$d>Pib>DU_yPu}qWVf9s`r8qR?bEN|+S=UQ{)%)y60JZM*IKOW&Ufb>#}JD;so}vi;ysMuZyoe!#6*(EUZ_ zw^Z+eY6+l8rm(<>feK8@|C^B^UW}`1RdZvBDMngHou1Nu&13 zw7>3S*z;%&kG_cZ??CnFixv+W;~~FO z(Kgw_ZTiBh8Nk5Zu{&8=N9d^l06Gy3ZAMM(Up?o(}In`blyeML z$xt{iXbHYu)sWUYqr+YKn0H3w=Ks8^eENNa${|B*L-t>l?+BKM!uK@ra30j9JptIL zOJVTb(kfu+F$0pUJyQ&g%+>LR@hts10mD^c-668sm!{r5Ag;kayBgQFaj?swscmq& zT82A0oA<6fD5>dWkODM4&8^o{G zdWl0ZN_6;F*xk3EB*L6`I*nd-M^iFEFF=HE1@si_6-V<()I>H)1<#91nxsZPNuu*U zn7^L%QvWew60f`2)8To?PKc;h{c2eq&kgjFXzGviHHtXycJbOgZ7Wud3#>*@q$2F)v7^5o1Xym9c{#}8^Co*#`4FT4=0A$n~=KwAkX%^64fp_wXAFqRBc5{XPc1O}x^rr!WyG??hQ630NnTQ#j=c-eT1?^X8@}Khz z(b7+LmrTueNVGB{Q{vsE%_@&mm&lB08dmUsZ!H1h35Vp2&}E#sizB=bcO*9sHVGw> zI|H$swf59TBy1`k@q|6as?deKp7)h3U?(ZHv!&vts!DXD0=&M8osAzbbFCI_Z=>K> zy8sn)Ay)gCl;GJ9KrqCfZT_pMZ7~M$e(<8tV$Y z<9JqQK?wDuL$lWTYWiqTC|+=wJH&)(JvZLjIU08JBbQzDeC9tC$@p4xuQ2ZT;ZCr? zC+J6kUU9Sm6{kPhlIA#>=1kqyuKR$I-#m~da@^I83(&lN@|(ePF_?F0d#fs0{Y=CY zsyNx1p6D--xCJI=2{DQS32F{a_YpqOs%G<0qBJUt4UxnOYl&c5oL27UN%@8#CBRv>z7xvZIb8&Sm8S$qD?{!%bA=3s=K7y1<6> zH~V$ZjaH9_vk4AC(MEJ+bO6@nd@)FCRr04MCQWb`Z)?XMnZnpzEvi5uTX7JnTvkNy z)y6uc@%r_b&|S`|N9j5TkjlDiBUisHm>X(mf}Y1!P;!h?jkV{6d^Fj%M^{?k;sGyj zo}^NGHzNM*5#y<2r=PK3RtyB&87x_9ig^Z@1Vs?-ikbZu?+ya9s=7Nqj;cZc%H{3V zULUBCFEi8>H?&|O=i|q^*a{^86^eI28RZQcb}WO`!x+_B9^*jcLmsdnXQs3y)>rcB1BK*`q#$KuIwBncxPnT9gU-Z* zMz@p46YSo*rC|r-K)O`IB~)c-oZjXP3GQi)M%k)J;dkd2r;H(aW>phZ7T5ZjewZ7q za;bM(%VyeLCH0J4N-yjU%Kr;(7l5p2S^5vIxBhY7mr$njQ<*01 z4FMa~Sr%?Q5)P84C9vhnqWi#kp181r2>D?s%+DRgdV;cGnwS>s##DvI$E< zBY)lT1yGQ6bDDmml0)WC|r%^T#`IWx&c9_t0sWsTD+jrXvGw@1{kC#8+SK0pZYBC5H&506QufdLx zJO$mFQWFO^Q-xf!vho!WBF2?y3YQaF1p7vVvD6+*9o&=ISkr~uu|pt?c_k=H zlr7+?CmqYUymPcZ=(6kH99p|ulmAXPhSpeT4nIfll{-)L4)knYxnkY0*owxt?wh|u zidSFCjXQI&b}GMnTrJc2kzW`Q_F-C^A+hxHIiLbdYVY5vT&LslGpX)Fx+P>W@>g{( z)GGU~b#s17B~{q}cN1!p`>ZIAX_I3(X#qKJFSXDNzMftYV0$6qTMIF1%K*u++k98uboK<#P>j&~EPW^#t73&(%O( znlz1Svh0Pe{;l|?lhoT$ofzmcD^=fO$ojvjYh#h7(oDcOYNrqF3hIrH&=AqnW439$ z;aLA3(A)Q8_>(a*TF<%(uExX61}EMpZt9Ym8)>vfrPov3d*66twI2`L~-7sQPs{*(y}29#PRq!J~EscP@=wQCbOS%XK3ahrU-DngwaoA2ql&kTcn< z3y*jAnK|{!Pc2u7-*n|sQ$yI$h84dDs022o!5W*AL1?JXk6I3dm2_-%>I}k!>tf|h zuFnq&(HgI#u_q8Mp2ChN$Qn_>CQzLY9(4ZfeQ~+H7n0pCyp^KtT)_&Xa#w|O+?#u9 zcoNEv6Lgy-bP$_gCp`m}AYQcYHNIox9vA<4%^XZpq%9{0rnyqN-H;7ReO*Y8_r=erPGtP+t~%|<)V3!u?9qJ67)JfZKP zLzFQo@qf};%lEJ{cUIK5dGrrZJ-$eMz`Vnticc_Uepm=?ad1OE#8BbP9#>U)@4rut zA1*fVX`XP?`&=3Y&Nb4c59o<1l-IPgY`k%H&E7sep|HO;lzSAf z{BhEzo(44yKl%Q%83Kuh!3Vt;~hwrgfb&l`FM!TVw z!1}{M(zt{59}-3};Bi3}5>oN!F*GNlDnfF$G z#fI}An*9*)oWIgF;v-@db4Q6VC$8Z*x;CJJc-47VXSUWwQMP&W$oy|LY_St?9v>Zd zsvFbBq)~#=_MLw21EXyC_d5K)Cy{T2yk@eWO+Sap{r(48`QKjvPSQSiUM`C?xoH3W z?Czb&H{f%mrdR)wmi~za0KXiSxmf>CX&irJ1HU{Gf&rgX-23!@Tf{H_MhWXq{Erpx z`~6>V-d`_~z~?ddLOFixB=c5C`pzO1lsf4=vB&k@nU$Nqu&O#+ekkbd)cg9l@%Qg^Ys zp1L~U8{eM~RPw&@j26?`VezUaEWi83JtTjAn5fnl@#F(S1sBC0uCzM2k(gGt4*}Ed zz0=mXG&!SavOtZ((_gOguPgE?Rv_yO=EN`~8!gbS9j~*!KmPfZpA)`6;b{5=WhAXC zmFw}3C?YP~rag(c-&`;F?=mokRF=pYs3M;hoR*(KCp8!0S264@@o`UGUu*&17g8^e z+sfNisz7u?(+Xi!yN`^Ov7 z{k!JzXM!M1tpjc3%RS|3cs+t#jw+L>RDX=LlI4=iR0 z4iwWERQM^sWrJ#^!#{X#riqL7STKy;`^h+fSF$pGLLq6g zKrxX=?cw%3L^hrw%6lL=213Zrx@yyKkzJtoRc7GlQG`wn6ogxI48Tm1W4hsd35ENU zh>9OPT3?N)=rb4PKLllo?m2$J1VUE_&;M`X!0orRU4tdOf?D{hk1#(H8%d!P-IhtFN)Z#YI<<# z2|kJ&AMY>KSPyoa^gn{3d{kO-q=F=~s9 zrB(T`llr-H(B%Pw%#+#Mi3%$TzRzjvIK0l56SrLBzNuc(&+=4z!EU<(R35C0hKaY= zBWeg&z$0iDN^Uh(Do!Hk#qd-zlzGvR&1~S=2?p|=Kzc&pt`X*7iLQv#Y_Sg~{3ZW0 z`W7fKsO;w^u>+~m#2(v}Nt2iNJjTKkluwfaAz0`nOax9}-!r2XDUi~K9xzW88~Rzz z%@2BQF50g4;09w;hp+U!mwh4`&VW6`r@tW2W^?N?G5``Z$`|FYz7=}96R^LIrKQ&^ z_|TWgsR;iNpN!jnEf&mM0z0@l$j@Athb^TB{BEbuQU@w`XKM)$@^ULmY~}^Zkc=MI z*%h38yBR71rY7C$xYh=ufv8d6Q@!Y%wnn3ZW2h2X5jl$TsvM8jlqr18L-&~OZq{=Dr%=tr}_WO)1t8Gbz= zt(WTZ+ZWjS?R;?*B${~4XG7L@ojb>m*w~V98ur+N1vtO>|RH^;> z5cx*Y`g8375~6pfOGsS&wlp(!=UdGr|6WvuY%vkpQaJZBOH2a@=r@Hd-$X9sKrMB1 znv8p|ul$+s6S8p7h0!d2m?>hCB{O_)Ur~qRq zQs<*-9Pl`a4?b)6f$Bj%J-I2DiU>= zTED@A%5v+Y+ilb4XyFc4dU1oI!_LZhB$K~N@(>?y@!j4+(Y>4sB~B3OB&>5{K6->j z%{ja=nEyQQiE(FW)cEGuNV&PHq&!oD6wK~?7~T{fNg}M&n_nLbQ%L?u2kC#l0ne5?8 z_bRaaI#WY4)L}F0i#+MUT70p;+br;k@WP8PK*VGqNsBIT9bGI5cxBgO;XZ9d5Bv41 z|GbHX3k*c;B{YRuWcK~0lHhmB*7cU#&auK(t{=6sW#VYa&-Ugy%Xrg?a||V@5Xo@a zb$)1}N=MG3U0A(-jrfg5K0K6}0>93TS?vO($+}}v^YD7Cc}{`3DdA%6yH6AnE37E9 z2O>|DPc|hYg&!a|y|C#cF+n2{3v0MmfzL%8$q+|T^-vVej3n$t=G^scM^)u^Jf($h zg?dG6UJNa^N_3U%9LD_a(#fDmVs8l{bKY8(ev{5XCDu;@c4?HQ6h-}6w}r|5CyAN( zi^}_5C5vhbEbAK)j}G0b*{N}XS z0>_pgmm0Al<9r=c#@CpZ06P_F;J34l`Z1(a2Zk!{5lf>}srtm=VhM ztjBFqsJwY}VXh@_`!@aN2nfELzl--h!HqQBJV<-Y=lY5QLmf$|I5XClT z<*)bjztGx8N%+l@d~Ox$Htc`GfIr$7N+opA!G;(2`_Zni<Q!-e|N z|22f}`jP~OeD_pjk?L5-TT<+%((yaU7;>)MCAi`ZtGW|gVPi5HRhyWT^76itG6MfJ{Y(*Kb(#ZXASt4izVO8 zj}*J~Pa6N)t3LbkWMb`!`(e>$lD}`VW<}7U`)nrf{`L3&_Y%Vrq)D)ZG8|(6|9{S< zWE7VS_HXI!$r9o$fpUgGyxtSI_s zseeT2s8~_tq>tdJ6}^ygf8ov=#3C8x_ieEM4733s8aw1FmQGCu&@)Uc22wc{F;v9= zyXoG|6CX2jp@TK8AKa(9bz61B-aP;4b@O$5 zu8jJFwy6X7b+h!sXdbrx<+1d}bY*3yDY={huO0~Ejx^2M-YypMtoC~UC{d_`9<2Nr zZq1$xC!W3@wW~0U2x>^Tjv$%68TFon1ECzdCoywcV`ETH)YF_$LeB9zJ?|SQ?=AbM zzs`iFGE4&rUG!_@M>GcZ6O;1TR0_}DVLlPtSMxd?rJbs>`}D;T*%QHlr7=F!L2n^Wy?uU++>*836|!P_TlkA{#_*g@$rc2FRno`5wo zK0@F#<`C12!mqu75ssu(XQ!uw=Hd&ou7SGk?3=5zL~+zyjE5$`=F_6ot`)v4M2WU& zZJwiu*X{vQq>$#5H&d%#x2`@oG)hBbJHWJ1rX$p*X8cCGi7itsI67MfIBjNuWFwl( z?g_0RU;q!k@xIrnfEI%MS9LtnR{;qsD zzjTg-eXDJ!eOQ71IZGO(-W(cW=CsiJfO*#AGLII!)~_uUPQ;yQw+wmU`5D9J_Vy|* zFJcg=+&|;Z^Ofo4sfY*T(#HWMAse^rVI~oeqf`hUq;OGFl6_r{TQ$e96<#G@U0(WT zg}@n9*AjB|{IIDNSf?kxXg>|cqQrVTyxObr%mkXq?NGDQm!NukbEc1mP7mB}pK}-u zgShF=u{3**(B|dPerdzuxV9GTg_V-a&Wlv5!oU;;kLv@;Z@J2%()oi8U>x?ezl+bb zm?(5(LXktWJlf-%nO^4kbv(R5Y$u$9yBoy&p|E6iI`wX9OC4ePIJ!&wydUoxpRC1(?*@)R-0rtgVd=WOP?d){I!8B zKop!vPgU=JraZj%2J<<{M8A;m2g^h8_}q|j>0Vw|dtN!03ATJ)5cj_^*oS#h2aPHk zfILI&F0?bwqHR!(L2CYr&3ILf}sz?3e7VRtb4@p0Rp(A&uaPHj=SCp58OcX8yEnf z8wYY|X+T8LZRu}+hnHp2tNyLR>Waze<6m9??!ubX1=2Ac(fsZlPZuUR05@RmyD41u zT>aJe{sw`?1Y?KOz7&4#*As>G=Z>7v6@Zub}J^$hrGJu3qi)0c5O ziAQ{v1cLusk*~4R*2DCv816PuA2RI8N}E)-n&W7&Bv+mCL-!-gEhhBZ;{|n0ZW*iG z%Y>@TL6vV|#2he4qQUZ&_{5!Wl=ek=)xo`%Krpd-04^n;3}io($IG!)THxT0EVuqx zP8dbNQ0RB|e`Phj=0a8QYB&Qdozim2;Dj+X&;-_T;DkJ;qK3yBGVx3%;|Hg=SF1WH z67fvB(f1!*CRnKx`En`(Xi(>*WPuM|pRh{g*U7!`4#Y_MTz|f;sA#a#rL2-IXWpI2 zS%mcdv(jSY46Bbht`Yk7<%vb9`G{$PfM;O0&GHQ1lQ3LsB~Z|FF1z@a)yuR94+rAl z7(z~KM3H<@jkeefCFWJAaoi|q>CIQEcVmyiARq)p64Q#>#;HiUSjda=$%!?S-na`a zn~+D0o8MEY6-g#a9Tvkl=WM^!qy||$!28d@0TB@b63X4%3mZrVFZS3i)VnA2Igco_ zVMm{|g)X(vBjtWf-3fX~%rW=uV5yxQ$#?i6YPy1yJ?eC=OX>xYpqK9fsxMw&n(%xA z>nJJDv3uF}ZH3d8x(25GDwkQBh=^%UvjakRxFIm%iKW*le2cu za{S-sxFC3ct?nQdy%L0Y{thrwtf5n}NksrqI_KE}w3CbP;Wb?cOWnJ3_4z4Ga4%Z} z!%{d9+!K=9k$n3=o$oak+NVa4D?10U^e3=&5}hr!29*M8iN5DK@D79XJbfwE5{g%# zko>(v8zBS36dcC5Y^+~9>6KOt2!yOQa~{GCsldC!;;lL%n;&jV-d&XR&xbIGv{E?F z1MsA|&B6QW@o!FM5bOC-=Td-kr$o*)({lbWsFGD>Pj&=)-CR>0?F^-VD)Q7D?E!d8 zmi6p+)7OUpC+LY|O!ROvIoV{Vn=EjCPtL*;O{5LHFB)}^&oBBp`%Px5O4g>(Y>2l? zmh}imf)s&;#9O`(>96&*BlY^r8w+6BYkbhgfQBvyM7 zp4*L1?GP!i)G2kl2mN2Mq7cSg3{E{h^wwaZ?L0LAl)k3>?jvMy8UnZr^)@gMLY$-) z;4t1`*a9$Fi1fld<^C;uibiXgfxGk0VbSBt3(w8V>>T8J zzYeiKU#1}WziKfW)HR^}b))(_&wRrPg4Vhy!~cf(e|*Is<}MFx7B~3!l>grf&_oCl zy}7R9q~E7XBKPm|z=bEp|BtA3Aq=c>HZL{*71#WAC+TnQ@<3b6Nw{A}!Cz_7K8ORu zI9VB}|9az>;);)w5C{`#st~tfet+{&5X5!}Q-R*bqE;Z13?6*aqmrYrlXESO)Aklq z6l{~mdH@XdDAWaXJI*_Bckl$?^0&9zMQG4`CH5WwQ&g<8uwd^B)( z$prW{oC#ipQ=|3w=M^^7rh2u`yRou(kdGiRpmQzQ(?dMC!;$gUUdkNC&dfy?4b1TKBbYkfuD_e`o10_Xdp z8`DLpVxjoTfP6tM-+0-hs-|zkU*i)0MZ4mhZW;M^%_H=K610w;)g?NXgJ_#y&t7lIYTT8(9fhn)FDTF{Ar|e{^KeTVRsQBtotv& z*HG?y?4O<`8~4HamfJrXaT#I1zd6)UKP!$A4tF0( zPxMFUP`aq@bk$#D?w1w^sdYOcLc}tiPLdi+y4%;+g(?i6P^WoZHea?yWc9{LgWY@u zIM1-ONPyTXQ^*H~74ZFnAG3cBB^A_DNa3Spd{G_=f`J}b-A0d#(wTa94b%P<#+<>V zC{PHI%~$`jl7Wm5Qmr_Ubo4GX`>nQ8w@7D*1yhJ#9+&EiCzFc?R$VTeFAjf_WHlQg z5|+Y&%>cWR>|C9zNzZ5=t7ivlcRVEZcy~*tNjR2B>h>J4B8erby!)t7j61L(Xm zsSTd1r^&n>?>b2ktW103031nKMw1eSC`=egX7MI$-FKc zzG~Da@KJdn4hY2br-YI%RvEnI5WHEzHV5U!sR)~2+Uc*xYXI0rea}}gDhbk?sU9eN ze3pkw#5MFLPrI@(Nj>867Only<_8;NWjW7j^!0!qlL?(vAU3Jmz8g3+%6US^$#77% zR>5J6r3KWApmrpueQ^l1wY82rbRZjzx^tDo&2YMHffI&4H`rcC7tMYV*O!pxb#uuo zQ|}&5!cW&8f*YgwdW_XbK^FI2F+Y$K7kU}wu)2jl@czk|;G_+TOj%XCG327~6NbGS zxn$tX(;Tg!Glf|mr%+pVP|~PQTN(iooQYU6FsGDL$&oSH6dv-`t+p3`y?HQ9B}Q@# z=d0uwh!)oLRRS&(#GqYfbTGnv8UMvyK(cp!hct1b6I|Y$br{2^urNd0(oh4H$m(Vs zCSF39(VluFDMvh}pGos7nyWup9LA!QqhTAR?|58if0`GM){I?-4WmUy2vfI1jzH(^4q^m)m{eZE(rLh7Zx8JWJ3>@s2E!xOVx^fUY()0=5P*y#%nFAU@p;6M(z-rneXUsc z+LAyF$KusU0t8{z+>yr~QAn6w&Mthv5M?v(L5azdKa#yusrk5OT)(KW6=^Vzyd(DpVgGsYH+> zYQb*#Il&Ewd}8pFm{;z7-$(kcZsssI%HjX*P``q7ATfY0#|MWJuyw4w6xNGl`PkX< z*nXC*vLCFUv>WWCXd(rX<@hI5@+l0!)IY}LMb67$EY{a6Qd;G<>12G!6(BB9E%JA3 zPa`=TB#ZCcH*A%=Nqq*3#HKBbffT9+Qr6d_x#Y%P=fWHoM#j)_1LD9_um(n)eu;4V z*`(d|{Ufz%iuY7*)JDX~$AqFz$z06|CS6Ol3F>HSjnN_6Bw1ehkWgEumHQ#2kd*G_ zGBNDOqCZ4l#j;aKcN}k4TFKlb^8u7kX2APdj_GMt2kALS*N?(t^ zR(^OMCwiS((zpcn6;I67T!W|fLn1DNQ$G!izO}4o)|L+#JWl%y!n7*oG$C@)3pzZa zV-_MR~PL*r%r^apiK;fhJvW6Lc8nSfw*cd7 zV6~f_Oi?44t)X8i5|`{uyza|^Ndg~*@C3J|#sI^2A>Cnfl(uZ3vRIU?D)qkW5iT=z z`VYyrqYrl=s$PXx$nd8pGE8mi&2@b`qD}N?-`Nvmhd= z)JqQv#0#sL>ErV7%%(hcRFsx*YCo5UeW#e79if_YA_ZhELTSRpIq5&8tWoVcDpqI0 zZ!nkwwZ6`v`95l?otbtl>O2{eB?I-Vv)y^zw>kf;==6-CaCf#{-6Yo2+C0j`!{%LCM~b&VCXP+ciXaC$-Ry0ey53 z$Zez7$G)r#vS>Z#`_LIakMn)FGvAWm!r7SUd!C3Q?-kdL~jbg5P&WRnBc8>KuvT&mUiymZAMg#X$J$UHDX zU8A9pdN2_DQCLK>PWe|~*T*W$%|~QF&LpyBF6hu`)b^GfoJpXn%=C?|vK{`HC77%V zfR#wr8I}>?_cPc~^`b<;vXP3VQKoP^{(%JoB?Da4l&>}e@9;o0u2uIc>JLWA%}M>I2)1o>oNdVH(bVpOg;Im{T!nRHn%}3rv62RR$J_ub@vSktYI*Z>5*fzbDZE{^| zLkcdOqYcKUj0Qoybap7g>W)Z&2;4@e;?%oTJcdXRE_|pT%zxz|Bc6o1F(T^=i}dm` zLa^t}{Rf$5MS6-;mt{NZISRzV_`~aG?*T-fuw@JC+h!KA?*E*^{#ti)eLKsz^A1ma zeBYnsJ#?M;B29-I6W5+euL=w_@%PVZReP9DUSF(E=$jm^eZe9(f~|Hr1Ch9cWP7qC zWaU{9Al}0IO;_5;R%}nLX1Mt@4gIWkSpR(Qy|hdm!(D)QHYSMp@cs#R8qa;JN(4|D z`Q!l<@b)k2O=lR>IBZGOtL_f155?#HHr56Nu~{nZAuvTrQJ*BbO-{V*D&KsUyfFio z;?7nMUBE1x5c1_x--En05Zz@De&U~qogNkpYNDGo(=$a-*IK+aYN+*=f!BHz92ZrK7q{>iIb*4qbP-Fw3x#7$4G5u zUQoj)MF2drc}kLE){K^!DVEJPP)bLOsnb36k55NO7Vu0xlH zpk-Bo=0OnAgc1dGf~>Lo#{Fz{(Ju%KqQ@6Z!dUZCeK5}LL_n1#!v{18uNWK=#cqJ@ z0f2QrQxBO67Jsj>FsoCs#8(#C-KB3^k=P(5s5EQ@0YT~R?i7$N zDJdy|O*bNqba!`ybV-NO4N^*Oy5U{SJTvpmy#E6}?1K;NU#@k>b)9!~Gw|J1epnH% zN>&$p5=hu9W+?oPl_B&^Fdj!~!Ps$8S$16Qm#+S)QXLj!XS>bk$B#BwtH%r8CQOti zvO7F)jGg<#)nT`N%NWSad8;Ou_@Q14y_3r0`dr-0Cl2Rz>8asr0LE7xzL@0_dlfM% zzXTuvSEw`@GT)kZgof#~dMrR#;qQe%l3QShL#Ii&-PFbJ;JyX-=|Jb;e4Du$_sBg^||b-4kj?E$gxo+g{QS=k%JEe*PfLmL4J`8<>)7{hCPT zNlCX9U4|W1x4HfVQug~vh8@yYCqdrjpy;uL|Iie~ zfnSx766R;mln5RDua+CVAGwntXK0jJc%(F4944?Ys?bmJgUqms(_-@V!$*+!AF@D3 zR*UwpsyuWMho<;$go6BzLE!GS`u+Bg7Pk2WTkyW`C8p9nl;zUNmNYP%tp4Iw`s*>y%Ey^+6R@1MsfSlnTQ7aE zL5}%xz{&S>Z~hNMqwaoH92S$;EcybUewPPv*c;WhEdNldAD#Oiyx!n=DYY7Jg&I50 z#C9H<#1V>(z_jLrMuUwDRd4&b-O%1LLCgyI+_4P`-i*dbX&{DQj|d=W-$rH!8Te6x zus=2g4_g_IuKUgC%M4kDLan>$^po4sDpG-5Z)$7oK1DKJfhZ~k_8iIE_ssVp@l5uK17jIzaLXFo%wqx+N)>h@QY*PBuD!F!Bb zhR-v`Xq*Oc;mQF9zX!O_dCK$Na$N4t)pAL@JwF(DJ%8l;K3=-6=r%H3?}u-v%cuR1 zx9L4)lc~j^BJfkoR7~iF5(pLAf?qx`$O7ippZV=~gKcgrzq$`U_TktaeaCQ4&rs?P zeij1+V*~i^@}RdZkGq%3#GB#Rv4!?xi)mkU_IsXZ0LXFQ;IN+q1gm_^bU0pNa}Eqm*Di9} zU$B3(^i#BV@_i9?NDT{SWH;#6Gto>%!C#u*k~ET}*`}_A9BfFf76%(w`|^tFeR#19 z5{hruVCcGLe5JcW7lBrLfZ>dTD=9F+6v|loNQ7TGt24Zu^3d#=^+mzj$%Ha7?UBVl z`>`XwCMGh0Pbehh`*2<=u2oWn z5)FVX%eo_h5jSQ`)9kIWg_*-?`Hli&N!l^zc50w5Xu^C-5dl3g^<6Gkg1g zj-sf^mEj+`iw(?9{Vja9f0P@DC^S}t{1svgvpsZLL4J>+JBcrBECr83Y{CVdIu!PE z!x`nT!((~!s2=`9my=o>;g=wANX(Az{o{rGMJ&?lo8+xv3D$>qSc`nkV zygBTXAg6D$e`j{<)|cZ<2`@SUMSyI(^?oYvm$ej;)Q6;9pu_J+J3lIvK877TA;V_{ zA7JwEU&y%PDGAt^{{1s2m{K}k5&Okxu9en>lBrp{7fxEE<9;l0q6$moo&tEUAb^^p zZ|Y7Lyf#7E89csXL8C7Q z(fv)$r7`+CSbi$UPl%on3+KPOjrWuEE2o7GH0AoB<2>n%2!t)QxVK=28g;>ieK-`^ z<%Eg3?2Npfp5Q#i`oSy4>A{tPM*1aK;m;kx2eJ`Y%G|v|7S#*W-flyYQM8tuTGWSC zCv(+i6jmpytE=HTOHm1xJM4dZJ(i{FD|FehZGe!Uf0ge-fRi;1>q;DfAxN+b&tZAg zZ9a%QeJj7Ub~bM4<-L@VW&KSDlKgLmpAchKbi<;JO2J`J@*}L5$8g4iquhcS zi6QkYc3qGmZ>26vy-Z(j^pD~nW*{VRC;^Q}gZ~QFC)tc(forCh)|4c~&@^AR>q2x6 z&$YldhB*+PAH`thDP{;WyG=8)$i2Y{Or0Qprru*o-J9a=jffl^mA2+z{tqP~kZTH- zd!3;m#sN9C%Tr97r<6<{GGeJ3_9imnsm&o%s7+SP#%^N}u2t5I-H42(0X_^i7djb} zla`AfNW;itXIqWeJS$|Zi%bn3=&XZ#oI8bQnTj@YLDFCe^{AfQeBR;D==wBO38L3= z)Z@~nFHmi`dk|$T`x(O`o%AIvRv5pddKLR4G*IQO5>bN%+=tKQE!n`Hu^ekSc9$8D zs`Mv}K{y~69b^IVGkpt+iyC4M$3&!JFies9cK!*0%igT%N{cCail8idQCApE%nyQc zee7!K7<#KNm$id-=VdNFy|mqT^%a{IS9?2ADaL5K=~(ha1$PSiOVzaWT+M-&uEXE7 z|H|LJN5LHBFuCHDJ6%wjL?5833b4fn*5LeA9(gCKBsm0AlP)dqh4y zm-YM|{$fchzajEdBe67gt3-V-FM(O6?S9a3$VvV-)d}A|P6G~rmnT6&I%_3ff{!}M z<~}>P*9;3E77WY!e8Y#Ms~sRu${no=PD`byGGCp<>0V1G$$75$8(_m!qBJ`?k$nsP zaC17Vi8L_(Uj9yXr(L#pDOrwMbQM6WNLmp0Yh!t!#c=#H$?b0?=+8j?aJUV*kx?3 zH&SP|&Wb`g?>#64BE%G_ zf@GaMNa8YY>E+@T~9O0x={FKt0gaBP=%BI-Y@G!^FrVSm`M zP`CL`9!*&SLU$^ALqB|QDT~GAKSf_kK_z5TPWf$SSRVgosO-+V(1pC=v*jvKAzQY=a+b3X#Ch|&mD zN1Xu%z!LW#=O>Q{U9?Uyr6w;rTN@%|G^mr_t*M zd+(8f<6*Sf;b*GmH)^#Z*@`1XYlhx1A5%0${RtB@a@l1+{zh{6yyGCwvksl*mA}_a zbl-zKpI6&$%~CLx=t}soFR#Lf+i*}Y#T|(y;k8bsXatMt*Np@rQWW&>vvtka+jJu}tdoC~h=~y|v1t&5NN%#bI za5XKY=EI`)>FxquynHg~_rtGIlX)F}!g|3S!b_WqGNPR?)vMFhtTv}es<)dgQ&IDzb*z{2Ao$kAKcAq}2XXFTE3nO>5m;4AF z2FiulPP$PeV}CzMNoY1Iqh-GkajRP=qZUMWa`{D?I-icdP@hh@8dB7K7be0Fr!^mo zh<~1POq8cLqv~y5Yf5onxB-6g#28{P<9N?&rFg5t+KnC|*mGXTOg_{;;Zox3%SA9L zB!LBFUlzWG8rCVGwjnJ(bmo;%S`0_z5T$%naxU6dv1yLk%K;K*{g*&|YnbxuA`qOp zCa_m!ld@Rc{%hz(4PZqiNRrka{~em0XZK7^dg z{v%U7Dv)+|*|}@qx?Go@{lB%H8L1!;7ub~X8>Irxjsd(UpvZM3!`+cH|3*5RhDrk2 zg!I5fGW`qVE9*=glh0fQRqvb=X{WT3UQ>Iw;Q(RQel`e@y;*MxzfiR;81k-T&z{F^ zs0iH>t_ch27GCO&vnUH#&(-R9;Pc4;{NOY9n!5_n2Oq87*zHTHrkEAToROM5&kDFJ zDkF6iGhTM`HCnjDSi~yUN|oq%?}^nL`k#&-c0Vekfh|p|AcNFdd^zV+y$^x3i+)6( zh@`>}4vo(J7X((2ug<5~YbmCX!NvexLj}#HC0HL6b&ofN_S<-4PPXPm|IbzhYmItX zWE0MuDSPG~pGzV^erAy=NiHH@&$TBuEK?$!O6(JIVcp~Bj8%%e(-&^0ztEiD^1k?6 z77psxf`W+goa9j9e~KR!gqF*x@cHeI^C_j1d)G=5I6PZ`tyek^C5<70*(91J@oa0` zx+St(CN?p<3)om9`k&GDgLG@?2NWRn&19eGf^t!V$7=3yNBJ-%g=f&74c>nWyg*;g zs9K+5JEcXf_!->5<&+2Zi!js9H}@9@K8_exXj5g_BmXMv*Fd}PXVxbdXk2+?AFxQ+ z2mO@zVCRZB79Z(k?{tbxFr=bsBCjttsV=Nm_I)CxC|@bW(B|yjmF~>fXx;Z;wQ6rq z#&1@_{&UsgFTi4M<{L+P-FdivBa4cuv>BJ?>F7|Ru+&m&Ob-gf*AhS$?D@L>A3EoD z@dr}j2ojQ6% zmoAYA!D&!rqO#QGI86li9UXf*tEx@z2V!>&D<#?@+xnub!f-N|VJ>7Ok}pa7o12D5 zGQUXfNc<}>NHAJ5@`V)8RD6o9KclotWTpCYkl8Jxd!CRhbN0;zp<7iU(@Hlgs}(LE zu!1QXN~l_CJo0LDI5kq4d&o^vYcRfo(k0}#-NnzBdY|Q!IlpK(!N})z>|J$#Wc5U7 zHyriOvK6_d)ag9LyQ`gZAwMC|FwF@j6Yx$JxErzOTPw0%?ROuV0Up<(O_$`j=7Vgh zsDi*W=~_#GDpy+84aZ>%(r&g}_doL|fwUq*r`7if>6Vu*tq5XMGBGFbWm+wufPFCJ zYjJ9yaqn)010M(U&}Y7lko&*=i%D8IFQv47zR?XG;Sf z^3NVE;WNFb+28i4ZO-@MwAGE_ziEjA1AUbFN=XTau}J6RA}LzpkOYPvS9}b`XPZQS zA>h^;J>TL!=P>_j%tY$QIpbz{{> z$=9ZX(Hf>nSBn;&zC~Cf0b$Wro>r0Mz9`6QQSFQJAFlaP)+x2BE$Q}St_0<`Ez=#$ zJ&B`rd~@hi{XsQyGC^`b{&b7u10@){84D0>h%#4}eu%tJ?sW)HOpDbG1*~+?IARe6 zq}q7y^+Gc?kMG}c#K+!XfN-B%iY;31s@T!H!nMY?zf#^Gc5kK)0IloV;zA>p&_SPa zp7F+@#v#zV6mSf`sTv7}S&Wu{Q7Ji%luhE6G8suUxb0<7u1#jW%FPMz>W+1^(rOC) zc;PC0{gv;eXXb6r&nu6PLG$rzl|oMyWvHSba_2ItJJ65>v05rBV3J(Vt#JzrB)Lt< z+CyK)Rv3!&os|j;N=LrP;q|zdEL1HckD?BBBEcPC=)mor@YYv{e^Qt}0^@oe7>UW( z5;>iJ)t7A_HHGmy^ow#8>FQ_92+y5yHIimYV>oU&hC z%8V_(O?hZ*jgM#Mi-b<(yoWfCMC>Z~*EPD=5?yb0mi+acks8w4xNB8@DzKbuk!!XL zl4iz;@hP;|Ia;xts}OPAUpx}#(~4ju=onJGS-*+Xa}%W&Jl~y4Gd?R#q3RkC{xjKJ zw>BK5hJEQ9Q7%t1*Kd2m>cfGG48ddVqS&?g6Cxqe+QKIn800xO`tiU?T6&dAPiDUU zV9>+2b(@;*-8qb04D>{sRgtA9i)U1K0$)=rBuB89B*}cumNFh)Udh#BF}*@8_Z#8peQ)$dt-Ab+R zgew1xguc!GU~4}NO8l&wmYt@c`%UG3h52>u)mIBGEaR2^*8Wj)eqCJO&%j=bZT7P% z_e*6~|6;Riw#L}^+&fMx^<3f=-U`Rf38`cXcPQzf3gYQjJl%ZL?SI_*s@Hs)Oz6RO<7O$J<+; zsVM2zOEO&){^6jC-db#KJY21QC2rYW>vcI>r6zDQEFSglO{=(r(A~z(aU=SbM}!+( z2eQvlG0QU^T9!AGE=y+Nb8UQh>$)#}i-2^pIA{`|#d6C> z)XaA_Cz6eqHx@q_R*-Bt+Oyg&V&egU({jvUSg8)4CraqNv?|P;Ya4^ zd>;PJ1K3$)oa+A3$;<}VSU<}Y6u#gIc)M}H;)w+uRT%ZXLkalaZj2;Z#U9j3qVNYl zzcCT@62vUp|h)!uP){v^}F(Wq}EIKOO8S4 zC>lp;|2IbKJxiAI{z#TMgOh$b+idj%9A?{V{69sT#9HB3ozqe#HlKygunehQnO+Ai zs#Aqr2`&7jOKeFWst4lkBx{qeOadXJ-a8NalDUFeZ~IN#!gh05&De!katrFjGP}O8 zZN6oGN)$}MlG`;J$HCGw>5(qresvu21^4?FwA{;Wtf^*E;ED0IpP9+?8X?-2P*OHw z=HE3WIgcdp_a3gz)VpuiezIKBL<-a5#+{>K8PM#(v^sGiW+j)&Cz3GL9mpmZIl!fi zzP)ituzVxTjW__CN9dd3N_7nTHK{HXcYPEO%jp`Ne`_13< z@Zl=xG1~Ys&XropJfq$usKC-Skbj&X=6NOsme~-PH}@p`+RKSdbm@KfB9o6)_@2(% zuCUm#>q%tKzCGbHW0%uCYu*`dCv$7RjWYonYf*i$bV#SIU=Z+ImWNF3ODb8_IV53< zhjcg*ohgo)3|Zj;zEX1@HaFTnel@BAVkTbG27*1W!cybz+$|2TuRcuPzWGIDQvq>cESLt@-}aE7 z<#L%+Hoc0h1Hpye#dR+slza}S{o`_ z2y*qqIm z3(7z45+3Qe(L|K2L`CNgrJZ?IL=4p~hi=N~78+CrHjZp|GAz;I259&h*DK z435s4fZe6mDPj~-8T|8KEDl@Mw~M$YJ=E5V)7<->cY88)gqd7xPrRw8n4aNodn##> z7Qj%+AE&;DM9tJ$mvL=yjUkxJY_`ADh2&pb zs2;o8?an%QOCrvd8>Ae_&wVqls{T0FzU|a|>d^Pg|7Vk!MS*%jq05&W8o-ifauVFS;SA6kl6QFL`a9JEd`kueXHoDwfyNYQI)z)Z3aUF>boD zVW{^^TS@Ul9DoSI`_kU z+>tmC&=QUCj34cPOIH5D5Ec}C$~o-#$b~(#H8s`MfR7_K>S5e{gBw~6w2`=)W$0I8 zAic?jv?;IGO#PI;b-PfhKi5L(wsrWPUtOuwnGPTw#@a^*be@CwOvdY56NirN2zFPK zR$Td{WMx}rspk{N9T$(eDO5EyYTx@equ*qw&?%|7@vN`iWQNc z|M{iNeDTa<%c>3B3~Wj0<6+NYbzmP8`K*oPtt}Z(lW{Szph5CT7>2`zvGN!IM_8s% z$mt=kg3}ifCK)B`9OaOZVD5rV^`HJ^j^FG&rV!4NOnB>Z`rEQ7VWbL()sac!U74>d zLsy}!kdCL8CtIqeK&hjYK`(1m=@pW5NcOr{tpTE0Lw)z z|9QHPhQuvNpu*lS3=?gj+8kS6kD^o4-R;4M0{{?!3u2!*+<2qkv7OwP#Z=$f#-p(4 zX7ZhKlh5WjAFGGrGgi_Kxvmgn)h3qENP;EdA~rnPtKRlq=Ls?vi9u6UIaY^a@}@Q6 z@#YYbR$gAT=UGnGAEB+3iDG9uiuXQ99T2@KMn?4>>qhRPL}QpyJ2tzC)n#?xTRD-* zO0#@j<7Z~@6z;sUVCAOvX8YfxOOAL)W}VImb5kH}ecYt%abxT)*z1cD-6%9)aOUg| zh#-}8+RR^r(Eqt9p!!Kz2M`ND<}9yOSC2`TYk>Q^Nd9Fyxa4EFsIL{m;Q8t1FCvKbohA zcXkiVxx^p}vB6d9PCQ5bWucqYQK=>3z=|!665sdI>?@@SExcxXlAr>`*CCJ*+i7$- zben@!Mbxudz+=hpVV9!N1LeR~tgAU-<-x(I5WK}H7Qi~#IR2vy63{zBc6MKd&2>>s z2znc}9Iv(!ycvp?gfR@xXRqL&nO@1yo>P22`F~?4ZMM;rMgW?qa zu(7O80mjyx@rXaKIrq^t*rvFGv$f6^sjRW=#smWe`NP`qk-H!bUNHZgV(PC8c)vtC z#pOblA1umS%CuhHRg`T)^#Z#3s;g2`lpF}g*n9KMG0xQu>{&>hpe7Cw`tDhBwx z;1W3|)#SdQBDyZGd=)7dByPKK3c@1oUSQzpNr&ra_3X{Nd5wi7V{6M4o(m^E?rp2L zW%*eBF_Xk>Vj-rF?P{y77Utd52*=Cl>T@TPEiDQ7$={OjxODl+uV~7qvJN-QycU^u zzi}W_>izhs;%t-0X!{rLZWun>a3G7*af1&rqkL!cgYeiT&#Ue=MW6KAPZyr#{P{%j z+$gGwaL2TCIegwWOHL|Y>=HSA4sr2Ne5!Rim8ZO}^7)M1PJ6E#XHJ}dt)XJL_pfR8 zAT+5%>t|iZroIanw^gB%e*lE;w4MW~_AlpK`7d8f=d})01%ont1YSem4SOJOeB<2a ziF)#tZvEdJ2QS{&8uwl3Th5e7S5 z-tzAXLpxwP=5kvLbY7U?W@2jNd%QkfiuC#!|DeSd{NI zH_u4&(>OhZmk$1N!Hz@!0#cm9r(yggN-{GQFQr!Y#Ov zan6me=j0dIJbSiHM#J2&v14NCFGh%ZYVoE=mOiCJ%1GCAybKS=(?WnGfdc%}492e;HKA)ncJ_!k{9 z;>og9mFt;2MH>|H(Q5=lz*J(HLhX~c$`7y?pkxV<=#ICI8fq|rVvPiqF1REyG9?9f z?D=N*!Tp3GLlOHD`I|o}jvn)Mh>V|!fBUp;Y=t^QZNEV`{u_RVviu__h{P~n7qj11I+;l88LGd}8S+;)3W6gSxlbXkE?vD98{Fgt z%yzSFooi-IIT4(MQl_@I^`|L~r;KL^mjU(x)JT3rYcF1%-F_b4Bf=t8IP)u63oTg) zQvd;h7M8A{{l?7;?Cwc(k1Su85pq-17P@+C*V8TQOVAi{zr;LVePcY=zDe9HU`HWk zAKgn87j91!i??-#XyA;k)|M;U_m=9F9;5?|zHdx)PyILpyReZqZ;q2tmEA~te0!@MwoUAb zVY}q>cJt>2;@J{YH-_sT<_~r7cpGi+>h%<=ai>kQu9zDB>7fIi8&Qwj&&yBlTP#4c z3(M~_cp|>c3V9_FMm%AY#${3t+h24a>aNf$)om=ikgqMux9{=e-Cb3d@hclO+ll@r$BKgT^@T$GZ;ELsFEVOm$(wO+Djd={y0eVxj}wE> ziNaC9#;naGIsJtf|6oEUU+1&@PhEe^WFle8=eM^-7G$pn$Mh})N)Nsy*djkt9~?BI zPcPHtr`sU5v7NBF8BP>qr`*aQuO<^@TJ+#QqIT_wM@MuPQmTCH^G$<@bI4bCxN zde3CJoRI1x*W{xdm>%aFn555$nre1fD%;0!p{G8(Q{Kmo&vR|doA2@MQ8|3Y7~IB4Ul$K)b`XX!H%;(VzNu-v8Y<-O!~U=d}cZ2g{2vay5JWJDE0mW3du;|+&s zHBOo$zkGdlM$+MEF%V17d=Z(f+%;zfNtg$*@skA`%lRZ274OfhADa62L2Ek6+Df~V zeqZIphW3Gus0Twb+2Oiu5`Pad=B{i4Z~N;`-@Xg?0Q?{~W{mNq5;uS8fM=pppthKB zxo54=l3V(o80#)MKy*%OT%=fLN_WH6-*x@-4~oGvEYu0#wY^ZCf3X1UcFUlVDkgU?KWk5qOElm-96`@~QFG zeOz+OJVYf>a*t(9!~Z{GS2vFqExV8-J$ZjtJJK!-?s~``gxDcMRr$( zo+tN+coaYyRg$Q zLitA8^wCGFzP+6Zdd5mqN8&4Kx8kFh(e=@DR^i%ZX9#Ln|Cpqf9@%OU`Q!P?q8Yw+ z<7Tq$#~x0X{^iYN%B*UA(moRfHQt^}CU@kc6j&ix9fC?D#p$TF zk43F;Upj?eH&T!43RH5^;(SQIm41A89#GOJ3r&=suAiiQ2?vC zcuJdF9B<8f3HpeI^%P5e_~%yx+wQjluS&=h@y6}3-eaaJ3@0RCg9RHCEZ9y@#v42p z!&c|R2kuMzf(RqwAei0kiG2gagnotyb*z5S+>K0U&4<+WKLqQA(mpVx3MN*UnZ5rr z_w$=EizkwQ+wpOB0Q*qtvwF%tZ!?D1lkGe2AyS#ZlS2*98fFbJT8=4FVOf-gpopB~ z|I5K42-FWvSYSg)O>7$=WM4-Nz%DdB=&-FlVI9D>+AA{cJ%R(k+pP`Re;T3S?g0-8 zVOjsc6m@`Hv)fd%sqDQi0wTzoVS^)0kKFj*c&n9YuYE4E&OcIz)&JGp|ChT$5?Xwp zUcM_MzLL8?_5b~azXjN=)%wABdUh~h#6uS-fbGZ8TEt}SwZ45a`$8t*5kdIoZFV5g z=5+4(7AP^Pnn#GZPSi*q2&7|ef>HtE+pV;{BF zR+BxG`ae)e_dcF1jh<(7%@%Q%OSMBIj}ZpO)bAkxfCo$Jl)~;%2x_6SCd2XMz&P?1 zcuOvyaPscu``3h(3ngL#_Or+ep>Pw=f>kBXk;EH)+VnzNm?(cPcCJJa47O@pAfIuNp()LFD3yv7H z7@d@NdIS@j9V~9t_lV!AvAj4;67F6t>*k!X_TfWij&vW;dKdsIaoHNN;OBj0VJ(+a zpS@-smKJL))K(k5@+gpAi1_D&e#J zuRy0i5L2YK4y?^41l#xBVsBdhfB4LcpcPIdqyQ%qd{F2u@#UG$va$Vf?-LF9n1bEL zv6;8z!?JKgiE!?)7hfrD-&eJm>}oaur!_F8yp?XbDjhFY9msRLx4~?tA*Bj(l7`JG z_>(HU%MeDo2e|&BYcgD(vD@3x0Jev^DSj9?UUlfvSKbT9HGo=NLnq5}@)d`T)6&Rfk13GOVtih7nO7nFcG7|-UGcvAE6#qCVL#sO zZ6okL{b0P5MxwHCFrSKTov4CXbyA>_zE#ijXF!O2aqt7LrIhC0iuJD-tO<8{$pds& zWv0p7p2$K=RX62RvUQtXDnHFI14&#ks4A~TUtcUnQ0!?GT3@#RKD)OHt z$sqW_-RWbkso>@lS!m_v`&e$&@e(aH`6B8ezzUE%a0gZIlbu)3`s3nicV|(X^Z-Xv zzt#Zm`5SIvw%F9r*NR|GI&MoSiUAvtRZ~LARLBW>W}LSXa8WOg0XoDijDb8eY_FJ_ zJoL5dEF-xcwj|XU8cd*~(Nnkg_2)!(EA!KXanKw@;^FhSRzJj}S>`S^lU%x2$m_6X zPRI@!f=OdU1*%oKO<9>J!rA)oK5-SJBUIvUDf!t4G?)Iuh)4Who_6xqWi^z;Z8wfh zi~Ry1QHM@wl`6BRHrfmt0^-RPhS{6N$$nqB=QF^N)>W8>>^{dmvqSySf%g#N45##M zlm7d92{aLK4l-(b)YhppV{R6gMZ{m`omv$B0chlD=(^Y+^$Aosz1(wkxfo&1pSBkl zUZWg40;B$`f?vY4DR0c-5#-`tK+E7~!2z37fB4Y9h^;KNZmp*@1)&0av|;|6!@!j7 z4@#4jEkL<@l7oGF+)q~)Kbpf{qBUTEfm_vqvGuEIxk35&Ze7JznMMX$Ojr>BJ85$c z-^hP}#n}rCa{7NhB58ik&O4w??U=8Q{K!<@5~ILHr?Vb=Z+$K7TA~^*1vQWUfS|zi zx78>@@SM`C^#q(+C}PJWAWrbE4}yfx0&*Lr+QXiI8r!eVI9$)%UTi>#^8nknMN>3C zvXPt`-K54xh(^#O&WaWXsu0Qsp^_iS%n+`#UUGcytbO{4*HlAr^H+hAA7%yy>=1eY z&sWNl{f*OaZV`s>#FO3d9>A4ole^dNaK1NboD%-XuTW?~Q>TT$EE4 zAYv_}yf5dA8XOsU;!@B^u&%Bzj!?|?-k~KSW{QP=)Uz) zni{$gV7Tmx5xtE?Ae25{kJFQONg*+xtu~`7U_eA(>*$MN$vakjxLHs2gM{BQp6-BN zxa4qifO>{Cy2s43zy^$iBGhPcYtRp%^OJARL5&E{#|y5Rt}q<<+8r!M`kWGH7{I8C z$2C%~Szuq%6(su~lcR1dg6FndYYsx^Q!}p353%ac?ue<$)pIw-(@CpgYkG`;f%Dku zCw49vJ)c-!2_`3M|1!VDzvF#B3hBtSBqgujr$=bOTr)6;M>B*Q3FFV!k?;_fkz-Bp zJ{Jn{odORnzW{e>MNj{0bPhzgh3xSMrjWqS1W3n_@%k5_D*5MclOM1v6N?gETSWpy@2 ztxK=oGeCIxkO&YT-Zt17M?I}K$P)KyV%>9xJJL%IZ(EMlhAOP>Ug(&b`k)M(%W+!{ zYmH!JD7|5`WfCnapmkK|Nv~oNiWX+x1aS55PqTJ0h}e52$UJv6ok+0;97zm545%`S zkdjA}M-P0opA-?<+m07Pd#7Pq+fl4~#42Rh>Tx5RVD*S@*tjw5s4GN$fwfFA@`YGP zV6~p)^s7o96zpiX=xB_$_O3H=Z zdKP~zDrZv;J3>elM);&7$;MZt)1q78@rTAdcc4k$$eV7)Qn06&>_uOpr3r9P4Il`! z-H~EmNhjXd*b>g}%X%?{lY&f|p%ov?D;|P>*o~*hD^oT9f0@KnzkvANwByXpQG7V| zhwkR^^sACPp#Lwt(ZBu^)LRkYaOG%PIR74H{rg1#9~7Hb`2PQh z`yYSFh`%Q+x!F#${e21f&*yYlr|bWJH2?UEFcVlUGtPgN|3eS=fBgT0fU|$SjekEH z@Y(NP*DQR{Z7}t}=lR!n{leJ)_v!uj2S_)#ZhpUlJ4^q^H2<%!0072E|C?O=^Wd<9 z?hWYOc%OCt>xuu*{}1A5LixWB2>KK-e{q8tFdP3{)cfx*dTI^B`qvTsk9?H|Fkq;j z>JdBsD;@jmE0*teQc&Wbis1$Xqf0-HfJ!0uL0(8POEN}?B2q$VgeXy8Kw5(|6tN%n z1sYLOzm!52vcr@HKS9uova$wU(=*5Y)8+cP=Bq8|Q^(xAyvEIx2S-+_Y=3<}xa(~H z$6dJ(a~|G5Fb~p*G5_a~{&lBm7CB430V72kV8uyZIz3m4eJK;GxSooX9|hEnBfnfV z6=Gi}s1&O7l{w;4Ns-b79i&;9@WVyL&e6o6N)R^L^+ z)V$WK9aOS2we3D!uQXrpzyE^!c<9|5Etls@CoVvtw+7D;VRq<$%H?>RuTrW#W2#jG zY~NAT^1T2%V04r#SLwOL|4K34YvfX?!r;8z;9ff@A*Qwizz6?2nSXzH{~A27Vafp4 zroCG8mD|Bw?Nuz;U}-@(X&ZB)8P92(S$r(Ac2Lu5deKAXP6xpMl7%Fvo@fa)o#D|A44=NaMuMFxgSLf}J-wwW#0TXa>E zV7#_kfzIB|dR%VtRpWlY_s!LMa?q=Ll$Kf&tL;Pym^eHzhm;|+?Qx@ePc*ofs*y5b zYv`W`lp`n*Sn5@Oh=lU?kAd`*9E%t3rQPT_$G@{kb#SJQv7|ofs5HLmrdr zEPg-B!~MAaek@1ub9&aBjHL!m6ol{^V6*B4A^wmp37?%Pn5q7(Q3{ymUJ`LwMpT&& z_kIz$1Dchq##)1inLjgpQHEOJ=gwddr}iA#P^wNCF~4Q4koMv8*p0%$|C7w-tq5LH z&Isankt+(%sC7%Vrc{HBqYbq3KuQ7>UUuSQAsHf{`*-K+a`aj~0E5y&nM_y)Fzw|$ z2>mpK+7dX;z5+FK$6Drv$2g6$^@fOF7txv1vxKpzN0fC$8yUNuJWmr>kB0ucVbn- z4&J{~EsVY{&U}JCu!KoWb5D){8ui{Z+C6;|12r&JEB)SKK?(+>{+X%Do2j$S5(n}d zwJL_LVs>tN7$o%Ps&62x;~>7!Ep)omY^)zlFMI(;8%Gd_^HHsS4`W-u7D4zSw|st( z_3Pqdw~p^;(G;E%KPxXCV=S14;@qmIw11zD@J!B()X7LKNy=pT3SFtyDL+5Z?F}Zfb;e8$PbL7Yt-V^+&Tb1Smz|%P{Zh-4 zww2#(vZ+!sUsuA4r?oqgM+?CbPx(R^R2=N&$V<2K;`!a641M$DoP-7Ly z3|S^khLQ&Gnq8#qZ}>A)xpN$@8DTVRmeY#z`412~YcIDZ9=XbPC;cAoG(_$Ypu`q_ za5nexMBVqLOhDIKLXJ{afZf~X^~=*6?(BEt?%iKjwG_hf_FJv$6|f5Hh(s48NS#HY zx34v;6;s^~U#+_1IZ&7=faOvi0r=@{OKPp=#5LjjhUa!;kshp4kKS4EzEI#h ziN3wlPk8#qW%KTNtuAZ{EK<=RQ%LlzBVzqPqgJYoYK`~t-!L7>dxX1Y48#K=(T&k7 zK-asaKx80tgMam7R@Xey_r?pY0gpoeM~oC7i7#XQg%RF2fMa2GKZ-k*F-O(NyBUm; z|DJ`U`vxRk?-)Cn{bB&_yC45VMI*;YO6TN(PYM`Ud9qXnkg0Mfoz4+BvL>psZ&%ssQ z%JpL-sKM}a^#8fhcxSf;18f%lEC!#0iN|n(_9=U^d-wFb9>RGYt60!ep9oWRq?DT< zNnm{U#C$7kBbr|4`}pTNS}@xO;GU^~?MguTW;I-ZiEE$t3}ZiGO(Sjl-#f3j9CW8H zvW@i0onT?f^UliSOPNMiz;cyDH-X>Fc6bVd9)F!j)zv7t(i%yxe)xXI!Ddw zA|Iam1&0!GBxZtlU68qu(+BG-y6J@8@Ji165|a{5@dw^&3JxSFO&>v`W`4L7g3r|P zj%6uyd<=;T%(vj;MFnX;^pT2go?T_}oSf5{!H z`@3e>+TWhc2F@q-yRZt3Reyt40C`9>z7`Vje~jl?V(*RU^W{O81gJYo>2!W)m;)$- zcOzAug#6BmfU>+9n`PeSb?bsf!Zg+I3Y34lGrB{8qKYdAcdt1e7u3#id= zef?jxj->FI9{vDSXPwj^ejWa-M&>y#N5wi--%CQ5SoJV&aVHz>O(|x1(;H0n0R^3v z%RD(o?SlsiC4x965o|{=-YoM=^a_FBZE*(KTBp$YRO7<8zE?xGk4EBuBxWJs^$}V<-_MULxfLW@DBX(cAQm8Uy17QDZ{Il4=Tyt$JJA|;S5#&5J;-RS)g zW(9)9d8CKeNti_JmVN^=Q)t{RiN4fyP1*)PpLVD^q#CB!2e9=CcaPJ{osXt;=E|`r zSx`t_Jnu>b#`EJ8#BKyaYD|U_6TUpnmB-kfu-JzGdlC)w616M*mK<>r^1TrM$hdds zt)a?v`Q*kB$4jt~y#*yZ79w2HP=J<<&+e?n2ckPs zOurZH*x!C!*Gvso-W(NB(TPuw-?4g^|3)#uDJx`r6lwJR_|M_-n8E;m$_ZLP?RLv` zC6t`GwsX`3U;xzvD_Tp|9eLr5G-@rpg^3B}cH1Fu;fED0V&qTFzn%K7R`mFkzrjZ& zgJHJoj@jlZJ$zjCQHWFcW`zH4zUzqM6FQki>UM4N5+$|{A~fP%z9PTM83^83jq4RA z^UJ^4t<&PZMLPI&ce})<@<%?O(oVGvz(h;?2A3iMfRkwD7%T@d0Cyw>vfs51Fui%( z^~oxmTBQ-{GtoTBxmxRfj-_LctHmKQN(sFte1x%-WDfY#j%E%i-0XXYVrHr!z}uV}+p=@!wpZgJKM?v`_7;ohELlUYFS>`SSe1#pmS8plfKsdD!$OZZbOg`v) zS+_SR=K0E6QOP_YfZDjNLxC?iWqrECQysKQXD}Zb085)gL&g(x07fz|Tp?YmFicwL zc3Vviz(!O$`H0@y^8cv&3a_ZQwrx^|9zZ08p%qZNK~h2qkrt#=8l+1alm=2#!| zyF=*)=^k48+jD%5=bYy`?;r539fPxP(I(O0BwtiZ?F4WAh}zZRX5iKE%h)=eK$J89sUG+xRd zlw69N9jvM{Tae|Ae)oH2WF+QXci|dSbEqk&WOw;-w|wpUk}bUdV2!6o;Oh7G4C5}? z2e>)fA!u1t=G(M*_(MY~wxE^>>~2_s`%5OH;ZV-&i5bH>IZfQzKY!@-^HfATi(nL& z9J!1vabsiPpwoOQ*X{g>$zR9dO5akEFqtr=c%iu7vS@!GD-~pzQwfPWy2n9bJ6V-HQd* zw&F9eDr!g`Ii_%#d?m60pvCq?A1JgzMQ9jL0$k;l{bGa`M?BK_?JL6tbjxk{=ywWr z1(e9+b?%TUm3;&Z;DzfzRhdgFJ=C51l$O5u;=wQf85 zZLiv9K~aCtr^gpWmSt~)%sGYw2348%p3~pOL#*_ROo7;DuD809pH01y&fMD=utau) zx}odL4-&LVY{iX(pzd>1MC#i?bi)NY%{A$Kc2gZT6@}^p-a_tNy6|N(Qq{z-#4fbr zm!U+O(Cx6r=`?;@PMUk}ozk~VW9e1CO5L>41!m)%^+ANxf(?rwB`k+NMBjVHA5C*% zuzg2&*0%zL5rEp#*xW~&_;FfAlZQ?DHpqTxv+xgYv?mT@L_-w#gNpJAC9ivHp+?U( zhGYe(yzG`@OTWWcM9k_X6XKOHmbxdFuFDzod1piM! z$Nx+b9Plv(XBwzRA$n*~c_0`!x(6ckh=Z=^KB5Fb3cQX*I-6n*t|^4@XE`n~ z2MpW}yH{K{(b6+fZrb@cN#b#x_x**)Ks_}81M)ZFn}2#X@Yd+oEn1(0QqXX0S#0dg z6ssn(i;2eykMugezKh{Ha+c?&VP9J=gMFP~NWEBxAPBTJ-gcbMQYk<_-c{Zco>Km*2;Q z!U$zC%TvEn!n)pwnx*n)W^;_tA zgb#(NXVb)X6)mQt3cVh<1Yr(8X3@0wpPvYWDQ^k%60Xmzi=N z{x^(2j1>*C`yf-a3Z|EL|5c(J-=v0mt_f&xDL zmq$VXNSH{Q6E>i}^A}__Md}YCPCRP z_}AV(nA{%#z$xK}xyA31_m2lqE`SHpx4kO+7vvMbI4}@mRulesYV%JQq2G|suLtzV zIG`5yQvSaV0gi_m~?M})tdQ%Bb?{Y zlkxXw!6hUI?@qMPI`NMq`VG86KLIF2=C+;d{{b8lF+jzQhjsp_-Hd7cLE+ySnKrrv zl5w2QjtZ0AUWn2YL%7i#f3&|*uOmK=^1(y7Vh3j$=fdWn* zy$k@?(6!nkD3|~lAu3Mf6^n9u4A@SFutpaJ0I&8A(Ci0uG#e3(6f!cf5sNkfG_!e{ z&YZA_!@@C&p4PJp*iD6nhqjr}`@dyEAv$cs_qLMU+u@U^p6kTXD|=e^A58Cb*lv9&mkHF! zMPGs6S<>OgNYzM{1X^iI!?5+$kXz}_< zGTD%)A0Z2-F9KFD?IZ$;m%@&IEhBKgdt57v4|f5kfl*Js)eAcQkkJ9e4+HcRalbmo zhpFmL8@mlzDrwB2gtSN22AcH#$%-iTF|#!uJj}H8(SW6dhdPyCX@NeB9@-yI?wq?T zVT|f8;g{JGpw=J_+)OGTq8-#YzL>1EiU+1!`giqeEcMq*BKprg_DwT9^Ql$8nXJVz znMdC<@L^)%O9x?CaJ1-*0v-6*F1M|z+mjWxJd;N|lPoHvw~fo^&l)Z-vb=XP#n4JS zCSH|UciWMM56sZEM|WR&9&o0yYUaRLcw4Lh|HbKa==N>xlD9(zI%>d55#~DCfQsI8 z&jNVI;|tHE3puCya2+ARCb0A5v3+|55IlvgCuKGj-!G=!rrQpl#WNS#wnyLOxp;r2 z#-h^o({Vu$>2u$x2v#A(9Cgk&Qc+?zh+kk4Y)?z@+*3i@if|BRgx>R)@W~7kcS17f z!g|5o`_u6Vbeq|bd8cBmmEOP=5bfP3;4ARib?eF6Xz?4vvxM7fiVt%KKXMs8=#b-i zzWPWZPoHBjTSZ{9Br08a2e`L`q6H|c+vI_>JxG}%<~DADC?s>f^FE$-10-tIwW%sN zGH5CeECp&T1~WC>vKzE&l1u(rG@MJWy3HZ@X24b!Ejk_eX?6q0dM4n7$^8KM@NcJg zj`HZVyPZdiV=Q>Bz6sb~OMn!f#$kASklm_iB=U$n@G4 zvI4f0<81&W>cNOfPX>b%>WzW6JA>cCg*ltD6s7=M$41OVdMa2rwv7@y+Iy zbO2b((=wI(xl#o&7a##=0_KSE?5wHU&BYE(4-$Fnp&(;wRu)O=2~XidN9zQULyK0^ zJK-#uoJ|kf32sJYtIV^!NvH?3UtaX+WU!8P@w{eDIgGNeWr}A}DSuk&6HP7jct(@x z8o%Rr#uO2Ys3DKt`xLzHB|`%P#Qe)2GFY(vf4pzke#AmTi>TlK(ve<<-&G+gt1GaX|) zRRVTTW&6~yk^#J+04C8-C9c9FgKrIyrWCb|p;jYIp;c%1$5I65)|=)$xCt&>Ar%C8 z`s|3T#)RyB-%|xB|6z&eX2V{dRxj;3sYx(Zg3r;(irvOTsiv}$-oZIX6I(!bk>zDo zD=)aScaou5>i|NQ=O>~l%3L=pq0h{=j%WQUUOA~Ycg8U)Z$4Q;!v-`VWh|Nki>QUD zt^Zt^)}xJ}b``Ws=v?gvA=3!xE%Df*qfM*zk?%5X9d78wv{c4r z_p?dmqNo@JWPNXDUwovIkbiN$)!?5so5Eub$+{6L`0r7E5InRVkA-#*iA+lG0-w{h zY;NzL4&&K$_ixEPHVotBv=Gy!ot1wyRsK|Dtr$C+i;C3d>q{ntZ%Wpn+luG8;~KE_ zRm>0`w$UvxdMr;LvDm@c?`o@kquhvfpM$ptC&ZW1^1aE1*r0*Mdr#H<*|*Un_BHb( z1frpjbD>E7Pdqh6%5iM}wJN6qp|eJE(RQPa-QQTm-@7n#m(Z|yZjHPv^>D~h>^&b( zcsn*e)=S$E2NuQ7$QYKzm*HV%-v&O$3m=wOTmbo(T^z(~a(Nsp=q4juUv3jPh36(g zU1=flQXhh6oenR4;&bf^(e{zrdgZ)V1IC znVUMcH&LP!02^v%cm^=&cdKTnac(82QqvkND~4?9V6{6{TzHnZ{;ov-UL5#$Fg2tL z#CG#M(4G9^0gpJn^lE&*_!|fK$4iqzTTBW1Mk~n1WqqjA`lkkz_C}~O8tVPGAXh97 ztlTds2W#<|+fAZ_ALwt}aDZS70JBqC5GB2q0F4t&qr!QLCM%+sl$mBd$=`}Ma7Jm? zzZ*0AgH%RHQ!3KE$)Qb*SKP$rz@p;AW5HolS}jrgTsEF7$9^lyCk!fVE}A`iBw zbpdM&2`Xa`U&D#fzIidXpB9by7Rw-;ymi^rwo(?4ayuxdOxl_A=1{sJvfC7&x&XE| z&+I+~YV+t@tcmQdy!pI6YJen8el|XF>WrZa)DEl(k_EbTNS_MU%3k?p=)KPcD_FE04>@WXcQz^pZB9r)APcz5ZA7h+ z(_?{gV(b_1>^|4f=xp1uN>xN@NK!`-HpzrW#SU;2O-V}Wf4(ecuP+Rx;q0CU4IQ(T zm*gWf-^aK5GQ{n*0)Kb`yPZ8mGa=RXnMOm3m~~>rh{N!6I_xt?sJLZ@1!xd1_hnee zb!ZhK>5GPk;`IU33|Gqkg{rKlk2OFdP8ly=pnedtc5*2wPAK$AicybY; zRsS%`pNUnlAYe7DkAuAZ=U7nBEvPselmnB|qHko=wJ`f#o;z)haart&ag2hbM_{IM z$3j$J+vWs#OJKg~C;{&1XLy~W`I%xr)xR0M6uVR8Nva1@Bt5ncN6kQ)ZI@=fj^_at zzH3mosYx@<<}uuCndEe6K31-WnFBnil6%Fxj(E}VYvLU4B0fLOeatjoZkxty_q}Dv z-|+C*z)wNzg0Bv=7P&QRoeV*zBU1r%#r1^9CN=W4aD(cQHKoh&0&0kvo^_c9%49)R zuc7$mK|J;MB)Kp1N;RdUFILKoTwTw{L3K(2{}A>{RsOBw+V?gdlh&@+d*J4-^qhfP zL^mujReQRBR4YY%^t2bkmqOd_EMcz^p0};A<#_sa=x)gAQAOk<5n&wq5BR(Sz*|@@ z|LTsc<{5P82di69j`X8L^A(MVHfI#`%wE=8VrQFh*&%`*{V!W$dmRjqHEK^_HqwYc za~)oAfK}Q1Kbc}dP|KmpQe+BH&GSaw_aR@`O+xtHjX=rRHYC;2Q%uRdA5~XU{4I&Q zMgmrIZHbgsllQ8$eUHq*+emL?_tiFNwwtAeEDq&ssq|Tq*hM3F6uYfP-(1?=z@raj(AF~Uo_I+42^;!mgNlzIvC zC7|S$r{uG-40?Tk-4@a;rv9XrYcc$5A^jN<>F3g7GfWjJh$+M(NDxOwhO=F;c07Ll z{(dx0??d&TXEOyUiyHko!0N{%f1NdO1vrdNIbVheKraof1|*7}Rx^JP6J2kj2&|;5IQ}q-ybr_CKlfKKYo36LysYMT=cH_Bs3b z;-y!I7v{r){*OVdeGBfY zyPiQTQ_ifZFLs4*GHlC1ZYd?u_Rz@5lG?#u{-BZVD`sB5v(lq#a@NQX(e~oY9=(0D zy?!EFyvnk`a{Eq8SGjHd!kTs(pY5}tX)cqtN*QJwLi-v)EqCR)8lCRE8A)gzr};0s zu0#)`M1NYchrWk8xa_g=DA=C5YaFSZ=btCE3|6`Jp)#o(Amh|m$v&IoaAeL=RZ05g zoY*{jyQ}-y^#Oyht>Q*RUrNsQ6{4Kt?Ti?$??LRzSUDon;3ci!xg$mKg;1|n6+EiK zY;o+Z2MY=vKCLxy^W=Wz9vgU!j>n-1{oz31sDJq#dK)O<`GNU4Pg)F=vRt9oM$|Tx znX#>#Uf;n;`{)Sw3Rsr+xPB=E+hh2q2mblnDx11*-c%i*0u5G-`h#Z!f;N z#qz=$62dq%0*mAQJ|Jw3&~M~HqhUj=2ViwWNh7!Rf4s#JcNxaL_P%ZBls#Ze-5$EA z8_+n3-hW*$A5oW^V+(6>9LiBOKa(YWkynohSkzFpD1X^I0!>`mN)5Ci!0WwJxz7mb;DmqA+e0QO?BCsrCKw}J|Ttf%X>lsJU% zfH&x_u_&UW^j)FKGn(5FYEEjbqgI|t&))GB9vPOfOyy>tn{lzV{YUw$nC8$>o3os& zu$LNR2DCVFnzbiTePl>m63ZvJQoq^a!@wMzW?w;-k#h=;qtF!=(Ne=tLmQAQnsLs% zlQ-&rfqupW07v6m(c6;;d#!B}|2NX2wgczUr*Rm&%c{Lf^SzVwHFOQS?R8=Aw!IJQ z8aaC98F6>p(hgT=rmy>*j$7EJ`Mh4h^aq9}zb`A=~uM_N_J+-seOFedFXvO&V=yc>q>!~%(I#Cm zyr8CrKAsTFQj%T$CYKsUxk)$E@G#qdxjY+lyuY*ES-{$5 zsTZ?GZiYzwJffOiUuP*+br4ML`fVY_-(}Ic^cq)dsxt-0VF}elHXOei&#{TJ$k{$* zil@TcX1{=t%L~{XF)QL`0^n=@lgZxMPY-tGUCga!k*vi*6)H*GnOhw~=FY2Qmyz!8 z*pyYL?BKHq^T2VFiY09~9NAlPW6xL*1JH>es>;m~kB%~Bh0M3%`YL^%m+U*LyO=gd zxpO0mAd9TnV0MXaso$OGK`2ERv@cj@xAI9%FZXJ{w)U_5F_<8dtq&O6svvX#;}+`K z_dqhp$}6nPg`Hk>K)MnxGOZN+2;e?*{c~IGz!jsq9C}nI?t_a{>k6g1E>-NAo1oN8 zNIEn_IpId+7Zh97Pg5?vq(TkjG7QDYQ8v0(pZo?VMVUfhr}A3eXvuZP&*dRCjJJ8G zTA0;ZUonZ&#l(2#FjJH@QTB;T(IVokLH1gA?xVnc%YCi6CsNy#H8vAT>IS1IN0Q&a z^9wWaHCfkqD2&zpyjg|;{efXbP^Btb%Lt_jvbMGjv*_d^wS=KaO_laj=taxR+ZMkn zk&U*-Gv<-XwXwusU-%j{W?B27=^1(^52CJ7(58}TUA!;=M+OhlKN3SufwqrVsR9d=f5AlS_m4&ZMnJP6GY7keFKY{3-mGq9VI(7f;gxy zA@d|n7G|hqvZLqiCN=EvKHH9ug?<&YN9WDWkdv3tJWLK6cf1oGeNkNK`{0(Y%DYfk zOb+EgR~e>;7A&sgsl5&`550fU&`-ycab7G(7@yYW8h(|$z=XnK+K18NPL!H`!ql}9 zaradd>!nnFsGbDtzAiQm444&vB^i^%Vqp(O76 zVfH+>Z*{Q9R+l8N?1`8g6erNx^iF{_%3wIdN3{AmuaL>{i<6s&7%&=%N0N1MU?ZMl z+6)Pm7NprToj03W*3l^(#csjsc^)(+3w=~_UbAhzzY^qz0V1Z8dcM+%mc~?H8*Q;( zCkou?rc^Nn;S*iw z$S@15CtVo17=#NvmZ9u*R)r*F?lg?y!x7K#{46W>v z%2a`L`wI~6oil&)wdL-IN$oTc{T z=v};dP5zG^J0{o^6;`lCLK$pC)Af~1^LFfLYIlkok9TMxf}CM=ng?x;u#dPi68o(} z3V(V5K=^{@1?QH`o4zW7ew>KtO1c?cT10yv6FwihTy|*mv~TgOd%2XLYz~8rv0vLO zJ(*-BymyY!)&Ia-Txk;2O6AL2v$pk76U^0tUi?UN>La4CO$&){#!PrE8K<=CXI4qY z=SdrMpHCIu4sQJ*Q(Ji`IH6p~QvwUc)?e+=C--7P8LJf*4?KU%Cy zHQ%@DL1JV&<&PxFOOd>g3#lM;TsbK2t8F4deIyVVf|%2C&2}Kstb$*x>eaok-&`OW zzShVn`VGTG_-0%&GyOvItDNE_RSS{K6Q1=`D@A=KQ}PV zrAUm9$}^axy7?6u7+idyo?(*@qRFJg8(KEo(-h#2YSpbSXRlc}I2N?=`gXBpTLaBO zaH*ASppc1}%S3GNe)jt{gwK%H*B2%rav31X4Us6mhanY_^&@kywQ4jEHMYyRaRGj- zkIuFCW$!J>t3~T5d@bhX%AjfTY_Iw-YcHQill~996+#ZM3AAN(2N)=CxH2TNdQwbI z>Gpp90{FLi{5zx4?;lsO@&DmYzzCJV5S&y;$-pD{Bh%;CtN#8|6tt#y8JqqU{{{UC z81mCrO&xCiDyIJX=NwWD!9s+j!?w7CCNJU}*@6l;A*U=cq{T}B+J2oufizla2KbvX zUrfYL1=vpr0-yP3AB7x{@H+rdwEdw&^c@E`(A-{y{ra2R!hQH>jQ)lU4PF9cedO&A z0bINrG78uY5(NxU0XsdU0>41Sj)oH(g9mjAUg?J3`%@JA3s2K7P*7r^9eNO7cIY7E zCizJqHCLFe%kV}MB*B3j4V#294j_2GFRIgR1|)527L$wej{CEYMV;3KPjO*^nDf z>4;~E22$CFN88g>8c#p}&9C9F_wgrJhFP|x*{$D454^R&OC@a9nhq@cQhHbVGn_SF zA@<~ewq35n=(&ruqK`FN}ac?BpPL`j* zC&h-5!hF5cl{a3`0!7;IqG=ADOBG*~^vrr1l{MP__t=m?`z8&uG*9F143#D(Y@WPk zOwbk6e~p&?LyzZ=x0`msX&@g{;q+i_snWBlVa9W#@RpE>kkfJkXv^MLFxX^VhBmm^ zw^G>4Eh()YXhxg;?+5rRz;3lr)uam-ehHD4EO7LsGNdrZC%z$ary%Kxh1iNyHnemZ z^mEDFo7H`nDOSwkQw2Ia$mmC8ULMt%R82+t z1>o_}#UU&kWPEYDQ5+}eXuMX)F$?y-d}r^(+%Fn7FULYI=;$>H9!Z7}Qkj?U6Jl$l z)nQNr*K+88Hu1l24|$OGigCY>N~NXN^O#`CC{UO`p$qOSGvfSg^)aAlUSNYVJ^yw@|w;xIft zPAH#7r0m&9f+|QF%~wiIz7#!f^}N|z3kTj8$j&>pu=^uc&4SfJ%5GyJ)Q#ZfgU9gW z!08*^M7xMzXv5#Ws3kVWC-*?aEk$&Kezc8Pljcw99C?u9mpLxmL!F!4d(}HV0zvVB zL-Rn0^qFC2JWCSjsda?lQ|UF~cU4JtBY|>7kb`;%S@Ca#}0U8!* zwnv05WZ87ryLFcJTu0#DnUmkJfV!RD5R-by@os1tVa5M4p)mN7;~C0~sX`?Ze?5_I zXw0h7pIjfH0wroelZRnp0v#&o#i>B=+^vl7_&HHqIx&;=vCKr7EgK*25Vk&N>q}A| zeL+7E*P9zMz{0jScJWgQ?VIF__e#$nSsrj{fc%ZIG;F!|n}&fGu~c z`?p?`y44Kh;+g^JUGK8pP8}PdM7ZX^r1%6PQjEIVqHeO?(JfE7lUm>oIzw?>CbAxv zCo9wI{{e>o-fRBjF@2!KKo7rJtxZJC0#|+lNl>qU;O8(;nEH3qhS6e!BoHwtoW|Sz zFp)hLcvB`bHb1R)iFaIRPaFRP`%!~v!u@PNd#}o2KEPVMKZa%WS~OY(e(o7+!d%>8q zeQhvsz|^Y(o;D2339RQ61(Il7uOv*x(O=%EUYIU-K08Xr1cqb(Jay0w>_xUm4V>Eb zcD6w}8285#_H)u_csm>yQcB3*NCjpnHKa1<(%1=0?(gTL3}b#Ez_+HoP5p|M#7dznKG!zep;AqQBGp{(3B7h-4#b zuLOSh7Xd1A$rS+aKIP!>f0T4+NLplrFfBFyznvKr>`L(N3n77Q|NhmW+JG{JF>*3z z`S-*9>oKTZTd4%7_5S^<`8R;napsaftpC31f8Jeo;N2_d=l<+7|8>xRTo{ZS;G+1u z+!Lhy_w(w2lizH8DzwjR z2*uA!u5;)s0%7R8N0R}iFd!#hcK^|KxU&nIZ$e5JAjuJHVcrRJEB-cap$w3p$MwhL zjyadKGRJPSuaPoazymNnD$qgl{3!UIsY`?lHizQJa{&3v*4sE1mAjMWcCUdgZ}(8H z+EU9KzXw;B7t`xOWgtzp{82!(#rVh_-H#gGiAH?Qwv13-j5wx;%s|f^^FfXf^nD8w zBdJEa=bN$25Ob16XSENLD$Gs~tiBiM44%2_iMZOt14$^-{io)L%VqsK+-I=^L?g+4 zbU6R%xa>{?Xvp=hRDA0qYT_nscy@)G`*XX+wIg*mrM_vQQDHYvE&E8aQp9$XR3IDQFvz-~xIgXaEJw5#N!V_O9 zT~L)$;2Gmhk>?P%x2>k z|KY&;;r$&^#b-k~cR@uP_C?z3c-l_(`m@QSUe3w$*{iRidzZNy`B@3q$K{{E|omG0bl7PIq8$0FM* zr=_?^Dt_{Q8kPR!*KC>{pzK=gm~a04yptHN@}{^BsccX7b+wtj3klydgKSi~Y$UR4 z&95a#z3WZ0@4_9em#3%wGn)CS6%bqs^{X-PoreW;K;@ z#lE_^ZM9<(1Ae)I@A#y9>eDKx`#3k)8JQj=sB^?MeXrcjH-|tt@*chO&Y2G)vs<+F zDto38c`NAb4tt;jug4yv7AZs#h=oSWwrv}A{W1^Fa|3##V zSZ5$;^0yv^be_k(0gx0 zBUT&`#w~)4g}{Q>*;d72>q(Ra>gn6qh70BwNf#?`{AQl373-r5<!G+4d9*c|(hC_?1kBuuyzihr#u`vWdf>J?fT~ksm}%M> zH(t06oDd2#ow_4O37Z-pwez^)YXf1dp6{7$G=+Gb?Uh|*aB0A!+xAjP)0jU-Ci!&C{%AAB&IGIZWZ#c_vXrn7+sXf*q)Qkx zunk-g%-4~sdE&Y?vBWCoy&AJQThV`LwR^hpy>2>#EWi8|-{?~>m3{Ill&(N2nNxCR z1i=>c^l7c0qsB8_gwGuvV#MK+CrpO0&wU0$BqTV~+l4}x*YdP*C+v8k;Xp&A%_EBzKViVpa8>N~s za_4UTjiZZ`6@z4##c_}QUVdghoHL|+rNXngc0|wfc-yRrk>lxUyG>xsSJO^o5R?+H zBPX>VPSQ?AgyCa6m4i+wojorIQ>4Rq)}L9sOfg)xA>z4H?xwk4bq>cVD*=Xm9!lm> zjuO;T^(^YEZQ7hsLeA!C7qAC`$QXl&6_~bUP{1k%*R_6mZ6b!SXs+(tk3T*twFxKc zLCSzLwFFrN_1!Q2>#AoVYb?PJpwS>;tr^yU(-J?ij=H~Q@g~K#K0hWfNw8*>{Tm24 z{ti2bgn09e_+DMORa>?sFs?g327Zxh{N@LiaN48R;{Mx1YORzt<^`V~CMjRbPwL?! z)$&c|?R2kxJ6nQBFP~ZCq12ZKF2RGRy_17rcds?eQF{=VZ-2(-lBMXTf7JABbd=xy zMk7IP2j;`PnfnOgCfsbbm`3e_Mz+blFhJ z#yXhsKHr^MMwh&%?+)B4`7>_X9;h*1lj-NsAtho@@oow$ zf-M~b*f#7=EPYfEhh$14Le^$3o~nUg)ybwiY!7^B&v?{)b+IGXnIRHw+MW0mCM^9i zSc|qmtGK_HJds@|4^5eXXB)&5CWDQjR5J`@FgJ~{X?V)Ej(VCqyPtU2Pyv2CUsb@s zF`)k1mTsJV42dezfy=fJ>YlN(0%pGey;2l)bk(}yJeW)<_#saN8_)x@jermKYaYO;=?1X~_2Xf-7K+NjVxW&!FVEw80Jd%xj>m1E}7oXSJ zRc7#vw4e?#)y$EX!jX9t8RIzv3N_q+@=;6`q}iX@7;;Tz%fzp zI7v#mREnf&UXi}w2#j8*nPUSn;2-*p6@NoY3h2y6<^`~)RM5_jcLaL9(kQ32>Tz#O z5y2cU0G86JE=|B%;0I>ly`B22XJhk}q|C?FO-T+{vO|>oHk*4t{aJcO4UV0y>CbS|lF4*YH+4RwJ906vL}AHk)Sc`cP?(A^+@x&yo)3E5CvF?U$R zxW)IDaJRiHo9VB;qG^s(C7;c9g5>Df@l4vXtp-J^Uefp+e2a>*c3<;{&4K+xC71L#A+b)$Gjewiwf zE)-AyAZC%w2)!dXFt;`~uq{n2f%Wwop*|a*^-#>-QkO^mGOI|ILZGUBOr3la9EJY6 zzANkj-4_%N)oIYTOR>o0IuVY6c@$pK6Ew4n2wsK&sXN$o-|OxPakzvr;?^(v71L=$ z2AdK#07d@!R&Y>9zWSUb`B)ecgQAf8fIKV@QlR6^qDVNelzVV*pEn5Mt{*RNV}sw1 zY7Nwms;X{Po}o2{dyj7lIt;icj%^WLT!N)?F;ZY6*{j}eI!SsflfLz)ir7YmG28jY zF%vwQ(+F)J-yeg<0s1;U#3F`o<@r3s>N9Pa?%7Z`6no%;D@+l9c)S{5V=!Ni8>G8f znYy}nI`|ZuWQNEQfd^7na#-4IgP%>_{U;U%V@BGvI^Mv_ zLwqYWkz3f^s|tA#IRa&2yd`^=e6iABX)vhHz^aaN5q@%COb$p|Y>0QHa!QuH*BWuJ z--F5{=mVz+=T0Bi)xQ#HzZ_O(fG9S1E zUI;0U<1xjbSuYLoAd{zS?Xh%3aow#6GD>x1 zd?>4YvxeHQ*r#c3TwPAO;t?TQUuCJuQj90J(ohK}CAJJHY8VH#PRAP){K2S&6k}VE9csQilslN}P#Nd>B|%^|U+d~U z&nlnO(t7D(^Qo-%b*XMq{3)xW+O zgg!RIf2WedyT)lA&{W*jm&CJ}=SUfMPVdN-H5$?|zu64Gd(>P!++h`HFjY$EE97?h z1b?2nzAr3z{4JDu-a}8mWJ&{*Ht3=DKD#qb8V@$tx=01x&b2033=f1~(?jZ@kzJEB z$mWxB#zc*tu^umR*fX@=)Y>exzGCy`?29u$fn#!W>E>E&Y5lCFZY}cY)RH#y9iyIG znC~!XTrw5>0YYZ6_eDaxmHk1XZiQpoe3$N-G9g9(R=oPA%}0Hwc$O%7f7Xr7}A%!UUwJff*3-YOfB#h9YlrCY&sJGziWJdKfUKN==PE|bJC~qYPX%%t|_wE zQxg#;PwPsX?XX}&tB+C_g?lzO{&e%+OxGsA!EEyQdNZMTLap$Y&^^fQ#xL{Afulsh zR4hby<(Naj}t@{#tfo-Vu43Q@;qpm@GPFyL+PhXI9*^o`i?*d z$o1Y5oWbW$5baV-j3JerF6+Dtjsb{wiYb>5?d&LVPSB%y{R*r{-FnrR$=RB!Ih(Ib zk;dALh2ZxlbEyHpjCc11%YnB96(P)he5zE$=QW3cszu6#NsGEWiBx5yXW%t_B|ed? zG=VD--xcyL*90Ji@9O!!G!>;&&3Wx0ny~}%*eOKbV_{=zc9NS|ih+nXlNOQC#Jglk zmML@Hu7=2j;%|$nLUN3bEAejSLrA`9i5jL!8iasn&S;tJ})C03b-9h{OYlboycy-1}N^V}@@8p5Izf=6L`yghA4W1ksm^uXExuH-RtXmL}> z4G@AY&CB*RXnE!vc?f^J!ls&x^T$RJK$gH{`?dH>5Diwm$9$Rkf2 zcR$oo?XAk_{wLOP3uDiU5zpr0YZ(y@acdfcVSxa=AW=e**nX4if2Bd&SpHr__T>C7 z;(d@wAj0{pmG%3vzdRBai<9_F@o&!p-3~2)#ij`JY~ug1?)`?j=#fT^@@SprzklH$`KZ_TYsK!&?*IxEuCHg(qw)RqF3#!0cLWdL4sH9(rA)ODD>-*R6nO; z0Jiq92_rn<)Lqd1&p!oMUkmMAXhR>{32+5~XxZU#qW-#Fz|p4TTxl>02K7|_-y~Om zC|X;V;r%8o{@RKtGyHSyX^R;Fx|0^w6D~I&f+xdJ)&n7$Bpretk!;@{wkd_PpkD ze;2~2dGS@h_%ZmD;kC={YNw+f3Y|KelzHc0@L|Hk77Ac}QKMI37u22(+zo+tN;>Em z3_XY9za*~iJJ)7~7Q%ktr}Uu!jmByRHdw$H@W-I-5Eik@!d zk;RkMoh(y;3ElUPqLY|1v>QH+$Vf`KW`F#S0 zd0*EB)HPREfTusV@06gH(8N@oZQ2_8q?gZb@OX$x&+eE>+})?b@L9F{FCk6eMskly zqDU6jztvXf-@F?mVM;vrd@fcY-ZoZZh-j{s%B5hOxYUxRIJmo}*)vvMjUX!XVSi=Xv;sggJY-=~0Gbte%BY30icbj;o* zHo}}T|DT%5xXHpkVFsPrpd>YkMTtb*kVe>(QA!Md|;k?|T2cs{vQ4;axv-IFYK zJmri;?BKQ&L%9KnoO?c}9j^G2)^wM$?c#Peg!^ycKQ_+<=mY-@lB?`(`| z7VF=!&zHd1VfjVh{N0}V<3Wo#+OEldqZf@xjm_mFqIcdWdmwy?SOM4{dJopugX&1? ztIO82AbiG6F$ct3A_u^LWNMG5O)_ScZI}y3BTPYNC8Prq01yku0VpSR>t~?rXps&B za7|2#QxU%d^(`_?bsSLe zDC*oG^)jq>-Qu1#x{wq{wD~tSX91Q2HxPH<&SsxiMHd0P&xYo#Ji9ab);!7yGEx7D z3VZu`SZdNEoDf?XTCEr|$T^cQ~rSoeM{PYt-Jo-^(e{tjr8s5JaQ7|Hdcef8Ui z-aVj8G_5fQ@TkSngM6d=Y}Dbe=q@)$25!5*)0TFR;rkLwkyen5I)5$X_9O8@nKs~8 zmQ=XD#v>`F4Xe|3UJgD18}vvr<(Zf%Q3OG1-us@+>1wl%xkmGJs3aik*t!CDp0BoT zVN#mvP2@Bli$o;l!AEpc4NmtiEA#7adJO}yy;Yb5Vot}>{dA@PJ+0HW6?Bx-eE?^XXzMz504^jwpzeb?i8ma0O{PW&sSz=dX5ZZF=N;b+QmSa#@;lD^Wk8dH}$yc>}Hul~KpJjmm;@cZzR2JzP8=X|JHigKha zzLXp=epeD4M8%H`pku_z2gzLKJmyUSF>o~!0uxH7jbUfa_{;Yh zz#Kj8oQGqi$aZh@oRa(D4X0_?8%1y?7y;tNY=9qkLmVjQ%5~#&PE7FayV8Z=1!sp^ zlF8(8OeS3pL8ieLz(SZ?13GiK^O|kBGUE*P26~#GGb1fXz^EIN)CbU6GeFwUVy%fq zIyU|HKWG}Zd$d7#ugt+%tTq`9?ENu0z&>j8y@Dm4BxXoD2hyS?YSC)rUfm~m0i9{WB8kAxyU8@nj${6mf5^*Bo#~D+Vpg{cdngi*+ zf&7C6P&moDSK_6;C zmla3tl0OlyxOOl1E}zqUZ&`zC6&PI;>*v3GG>Qk7!@Dvf z$^VbEuMDedUAtCfk%CA{hf0@pr+|onbhALZyE_DFRJudDyGs<1?oR3MX2JKc&)(;p zefInB`@_ZMlC|cXYtHAn?=i+b2K(T2kHdC1AKI@lbh8{*tlwOSTM>})`mJ{%)a?-u zWJEu>L`@9x8Gv8BqGo=;d|c6ZI{)Lz36v8E_sWSIdU8HwR-JoLJtLykuy zrjxm!CG*1$#8&jD({*E&vFu|h9FDX7xu%Y z<61oJP^_YBz=rWQ9M>hW8p+rRu$VD2zi5(x^0VpVq*pe%owAS@3OI!?B76#d`n1;k zSegwU@9aA&(2S=E6|L?@ePCC?W!T+1H;VZB&-RKjay>p0+w(J51~K3}r6XOuX|@sy}z9li`%$;y?+a#QsV1vJ6=W`k3Les;`MZfI2gv#$pEK!ZIo5Mm}|>G^McdpXah@akXz9`F=k-3a`uc z#v5If<%oalrw=oI)D!s#lALt&!hS}lcSg~?nIZ_^z01wM*vVcm!p_5DyJN}8#fG@| z)Y;9t<@&-P965^L+R7NzE4y&|SGgUb+uVxgWs>tAV}~_FBAoyu=`(j^GBN&A#mTOi z3#t~zC4|+uwkI(_UwFK``$S}9a1HSNO%{%HVEst`rT6Ht9fh+R4zpEd%q+b&5DCaJ zUyLpkPmmM`j^xPPw@C1pJ!Ff-?<4R+Q77<B1ERiG+qXa4UE6bPj*K- zotYam>%`WWRLRf?*dp)w>#g>lhr09_BmwBewD-ahdbP&MD&QKSCUJ*>5V=fS--%`j zh|uVK+Th1SuLcxt9{RSA(kRS!5YMCVrI0zcwL@G!kZB4(t0(lc_Pu!xydGcuTCgAl zWgXJX+*UpK)dQX~1gjX}GL$gT$)mBzamwgJh1M_WtlaGGqF?|xL+yg^yc?zc&op)wtP`A@QobGyi(Qn8p2g_fa@k!q8111fME`q4YIZV*HbA3CvY!{4+U_w&t#bfnaaq+;fytK< zldFTkMDqgmYzzTYz-Zia;r-`66F`8myO_9caC<*Me2TU{9iYU$?aGKZE+EHQxZP!! zy4JnJkGAP+gxg^;C9LRr=lA5BeLM?gZ@xi+a=DgR(QwvV{QY#mr4w3_`7;6;X6nj5 zh5;zn$*>eJe9(Y_J@d8_;6{6S;3+~$gOYW{QeC@0MPOJm^M2^V5H1oABcK%K|2{zB zik1*?`9F8*+J9ZxM5dQDuEX?Tkblv;=CS`XBpPRzpHhq<+K0i1tU^OHEkD3{y7l?* zPEs#6U2Y8K{@g*oK}@>WLJwMvmIKfjN1Zacd!Td@BwK ze&tA&PIa$jVq{Vz6Jx8iV75!qAKV%_dV%Pauh_TxQUw=q6Rkd(VhrjW*!!OU^1g)` z1jm4+M2Jn69}1PswU+3b!25KAmbIQX?vIOW`@R1Cw@X6?Sn!!1&K4fuzAs=wP~QSM zNZ+euL9h9#su`AW7sWT>qzw;Q(VFvrQxq@tCwSxketfjwYCXT$Dl;uS0{oyj{F%!_|I^qg*lYB0v&;kggsV->+^T!Ryr}WL-F3 zoc|0%&MsJR60gv4Z*~(!iY{}W3VIdif}*S34;=2TDt$_y_hPSB0%(at1og^)5i@3~ zmTZlzBouA=?N`b$Y{geUO^}HFia>aR`-FT%ezV7Qg26iJCpwHbNsUQ8&nnnoUYVj~3tE5wB|J5Vj2<6}W8`5Cv0S(XV8R#Dbo)I}exONTC6* z+up2&Fmo7nVLlVpEJgSXr)w5>)|ApOzFJ~1Jk?|!u6t1Wy)$u%=y{Ixgwy=1<+2gu zOvWFd(GoD8i40_FW6HTz8(|?Ejq8<8JcY-}nH$GJVjZ_GOwks1B(z0Yx&nC{MTsi! z!OALKoK>GNdkKd1m?!L5Pe<>g-|7r^Z@D-^A0anA6) zn3DbvEE(_N3Nd#EE0r!TbuZ;7(=R1TxpFN)vHrA7ft@e{pier-d~ET0Z2DwRvRx(tP1tndvqr zcb}m)Dn(v*k@|Jw^mnomODUY{l(~FZ#PN&oSiIC@pm!woItq$cGq>%kf(S!W`0|vV zRgO!QG?7wDf5{@)@*fmPOzp6A@>ChO7Wl+(+&O}4>WnLrPloF=<9Pb+d!cTZ+cKMT z1`eHauYV?YPZ<%|gzdbF?^2t1ePZjb?znEt0(WSGaNS;q(2Jp3e0lryX>`VLvs8$( zSUwasL!ZS@lO_f*+RfJav{!>dEAa%mr`N?~Bi6JRp_~G*ob##hdx1%yI{%%ibO**@73cT<|O$b`+ zrxwa~J*Wa=>FzqPpY{hw_zK6HpghRszZ_Mtv528wC!YA#VK4ulE`_E#Z-1D_zb{b`Blxh41@itUKJd72I(ie~ zn~9SM;3UbxKg-V=GGhMg==f+#sp2`Y>HELly{M(WKj1(9{jVQgba3!iN8QOQJpcQO z5Sc6rM&qxqHPIm=fBzeB&W#j82b3kVlEshydZT~8@6ophZHas?8g<~q-pGMH^S}#u?}QyafNi1Lr!h)g&eQ*QtDw^S(oU+tJX^{@mLehf(nda6k2ihePrg653a4 zGBWw|J-vm>!BvnP=N)jk|9>=%Pw=^Jc!kK?AW=4%=d@^lo&FochaG4yj_1~=p9;0! zu|pl$sx4ItzrVl;UJMHm zJMGdjRy1uRocjc}d3DnBudx~+>YxzRj~pO$c%~WF@}U3_Q{{RQ{Nq3u_MzqO)-3=Y zjQ>?y+yh~Zn#LdwqO9DYdy;b`$EcQ=_@DRLI=sj$^S3^P5Z&%5hQvHli?4EnR1DLI zB+jk3stx0w_C67>Ex@Q*<>rFBUJLB1K>C^BymE=U0gxEfnbfHRs=#8UlT;&O#!B`S z6B~PkeOz$uS&Qdr*XJ{U4qUGTp1d54ab?vLqdxuj3I2H;I{U~k+z%+ba(}%&>;1;m z(p$a)v6#8C0Da=UooLUI=b3&xkd$4uz5{O~KiBu^iQox7ORDEp-l^w>ZmNr&IqZtV zu{Tcfx|8t+=z-+*w|*3_v;X75uQUsNN3@cEjsnLMuNA9)HyQ&j%r9ESTpc>)!h-^< z`pxXo`qhi_@%0ol0!*bw)iT8hkj*I#)L{!d-_eP8$~s9Zuw?j#a2eEB9`TIjD@!{h zZhRczyDp5TgU)9U#64|_TmR?1AOx2V{6(xYL%)u~acvg$gkJVF__6~X*b%#q4Dada z97oG-{kHj%)56p=yV=hAYERL?^N7jwV&&ctrsijxGfi$z3lgGV*(x`G;%S%OTsnOr zbzby23qeJ1IzF56G>*BtDRnMO9&W5#@SL42!|naX4)~$#@v({^i_4?c3P9>)tIl3E z9`87-w7MKV5&WiFjK_1fp6am{F*9y}3&bIPf`ULW>aD0x){1~B+)vgl^@OMt>&z;)ysumaaHy7llJyFr0rGow_GYAs+*%;d@k zL}SuERFLE|r-}b8RDV4$K1n!8Kg|{Lo>KQ@=k+iLS-8osi(lt!H`N2~ryuY}Yujyn z=QqcW9r#9Za2#K?N?-3*&sbIf@=Uc|Jj@&rtDaMdR*kv<|H%e}@=beno%i|25~D@i zq*XhFH3JGV+%M(dtM*jFh8j>11jkEKwLC9ZvzO#KYnK@sE)5&j_ZncwHpOH%=G&ur zUz@JB3dS4NSlwV%6F1!Vfl=eYLer_h;5Z%iwaHrEP%%mWYH^H=VM4Wn+c#NKLNGbN zWbF>Zf~Tu<sS99k#xXIfkId z-t(uSN@XW*?e(#<^2e~^g}U1A29o)@F6U!T^-kexx(Z>FMFKNO&IO; zTE}xIxM~P0IgOzT&p%E#QaX}rcH7#9|Vo#h0#zD zX(@8mqu-nOnzs!+#=97d&_=X4Uh8X9xCYL}&j*%WG9DA7l?^lJnH4vU(~`QE_Sv)- zp>wZbqzYs2Wa_l}3Rq_*aQbDM9>H(3x{kgtbd?}6L18nwI&{-wNaLKVP zMhGv$Y5vy1!dIlZ{&s^N7cDbbJ60_f3=&LFcwK@_teeW2wzKQndgcV*YaTo@?oP-% z#pShU6Dpl3EJhiqICopYSXQ&OLw8A@SDPSZf}@2F62M_TiGf@Se=WOhQ_*uFN4I9W z_oEs}dZ$1`?omWE)`y%B;AmocOu%kB3DR6{e(zd3e>IFw*oNZ;ZmmaRpV{4M=N+5r zd<)aHiZpi#t0rdOGwe!;g+#}{{y#1`7LwM1_!Rq4h4iM&V3Z9OHUoN$!J3?d=IGwW z)CeAW_`EW!D9F{AWRi~wG3|9{``}+>FZNL?ntqejF5|id7=t^GDXCoYaH8zo=eaX( zO%54NDCTQ&*PA5_C^cRH8~c-w5l?tF$UxrO`)?)Vb58qB&9|Xj$6dmrRbUY5cL+lP zwauzlr^{G!!E{{_@Z-%>kX~1=Qm^W2W^v8%9;;)_%nK&$KizBCS7+vp_SIzjXFaCo z<4?+g(@Tq^Ke^$6&nYFZE;z(0m!&+&#V;d0QY7*tA&90r%qE`pUJdfoAFy2}c$(He z1NAh&4a3y8g3;;HGQk&>^;TNU4~K1t`8h;~N6Y?mk;8}&op%NZ6CZ=?Mpc=r!#My zOSZBf;Sdh-^xVvfomRvHuj2_KOU1W#KiMY|y|9eGE}D zG6X|&XFoTFs!MwGO`7S<)^SvxP*wv3n%Yq=TboXbs;M>1v6{17Cn~IhGyE$J5zl`W zX@4wc_m$ey#v%SL(+5;kdkf$>vP{t0+A1hHtI}c9d^;s!0Y;^8=(5TGcM*I5H}w)D zVNu#q%i3{w{h>;yxL`hMCQb8lp6*wzPD8E!w>F%ye2e!U-?dJ41E0GGxHXD29GrFzr<)0{?B8n1jq8_ zY}{_?A1`4bJoM$%t(8i8{*U75MG1TVG>!1Ez-~M&MB;p867iTK%>LCUkqsapcon98H+weWepq&uHDWKJD;B!=U6o!*0*>8WO*SIR{9Fg$6G-5q%Nb?(p(|(ykQ^r z!V5(3PVhK7#6V@^S-8grn3H=zS~|<@zD3R4)&indy=HNGNqysUyWNfc%4n<6ul1efE@wv+TCh_0#x81M{qfda+W^8 z411Tt$F2I~ukU%~=|kJw$5^cwBJ(#DFSW33`RLKPp4%~J+>xFtgiPON-X9Zxw%FyBl0Y2G;-n~4C+Zsfw9rl zaukNrYf$j~_a(4#x7`AhLPbuQ{hMJ8yrv50YcZ>KJ9xN@+$B7%1Z#L}IL2|+zhaqeEO0+OQ=gw|fc!bkRPWvr4V8pRMoc~OfH1u-2q~(UA$xRR&c@adANZm(IlCIpB zR`0HPb}L8R)1D(OZg2&XxaOi}E3%&s9fVc%^YoA^wDW@wB(=+?#(WC%SF6h%?0lz} zRU_^Ski;S*vHc|cmNkTyweucN2`%cI!D2Lky)$$RT1jy)3#U1UnifVhNrjm*y+D(e zA<@*R!*4LqY>l074yOCH%nxTt&2XJ6N+H6#vrTjL#2t6sHLX=IGA%k(*TJ zotN3&_UepoT8s>WL39Z(q@z@KKh8I`d+QpUj7JMN%|S0mJ}{cyT*C4Q=b19|8{s#n z9tUmnG-qGOQ9TgeC9t(w<+N^r(Pi@%SoifgNaW)^Cv7ScSQfa{kBX*M>JE6~wcvpe zelh@LrOSTVSC0W}8NU!XuKoU+EA)=pnOcbhZ7u$WjBO-eByIt{7?$A0| z4*pfBPilED_U%#93xj(CcVng|;_kyM#~rTotd;~iA@!aVujZB~+9!L-MIWw6*NfLv%k*MZptil+qgVYEcYUB;6ccDs0S6q(KXz{f5FLkCjNR#_M}ae}M>MGdz-1q_CLomh&9}#J>>iRzXYFyX z^b}@$Hef%NW3|@tJv39+rG1LEN)CM9Ly&M3M_WMn3{N(a`c`)!8sVC6`og7GO&9^O zkyR7>sGt3YHE0ZPmlwR$(^^YeY(|S4%Nx9)*Le6h^4W>LNvv1-tV1bgsdvW;vX9-hi-aO~9%ny9?!n_~I|r5&s_4jQoTSw&r(mfu!0!(7k9CZJfHD zK$5Lpzn52EupU1dehbUegz9@R!vaAKn%`SDxIiUd+3Qm(^dbHC>kM0=*ftq zFbo$~rBU?ZXYl7!*Nn7J>I=;g;^n&77~Cg=v}YH~vb8#Id0RYx|B%cU=`{sYUWd>x zCpSA4PKKo$bu7^4Ti5)QX}XfeEmEmy+D%f0>#_RN{&<@xWPmr!}Xn^Xb zYFjKnmfuC=+Rop z!|rpo`%;?Ro?1i9ns1Ki2)U1*<4tNA<7wBjQs&86X_g7b(_ZD-vyXbw~ICOgE*x7whmUsv6=2)3yWYP*&;)4OLL zyN6c>oi5p2$l5Dv4NX6g@$OW6Vto=sa6E?%Uuy!@ zrNmA+@7~EoPU{JfHt(`v3kI9_{GU6XYWa%9UF;t3-p129^|7jCQG8-@*1Gc2(>U>I_f5 zfo?^6WL^mf^8%<(0b5%+jHC)|K@cr@KJ;hItt|pq8<{XYU_WBf>McwY1L7a-XsUzG zKkoUJ<61WCj(1VEH9f}tu^sz$9X|;*!bM_nEz!zGlv4KHsjp&)q?$oDGk8n`3ZzDU zfH{IhY!4-cY+!`<*?<56=P_a^{k!!bxHMB?8ykL=fW%Jp^ZXXg>>Sv z^m+~lh^@cNEo@%2E3{h=fid^c6<;v14mrJDjTImSleVu?9dehF05YXV>@fOSV1zMR zS``$9e-w$ENT63j3{(QkiaT5c|5@A}=19}El4nfDP^RFygT;$<9clX!>*QFd=g+>| z%GM{m&zKioTCReqZr4e12Jr=M_*I0?`SBt*_RW^yp`jV)U+3*0Xl&S`(0aClkzBdw zTCHoucVdM$%i^G7*W^U)!}PiVaVAp$YTpRt+U!Y(lLo}MwrhiInPm)Je!e*lBtUUQ z+`&xoof0I-sQ02nYghN5cV>yp?e!-7k2Aztx~y&O^HSr$M`Q<{4|W>VbhJa|IlzFoxWv1l`xCS3gNLg&tX zV=!Gf_A%+HTGoy*M8RkvWm6E3^4(* z2PkDTw98h!dSCM@9*dOEP>g)tH2s|ABjZ#8g^+HURc zcHd-yVWY)j#_^7)OJoR-+2H3!$+XjgN5mang<%9pCG{DQmwN~tCk*S<3Vre894$+d z;YFYLdR!AXKzm~EcA6hl9aQOKzs^%BDZZmPpLc7Ay&9FBVg@5W2dr;s{KP~lum?@1 zPkvxSWc>8tugCRH4oRFBg-HvCpc{Zf+HOd8x3z;(i3;OsrJCC24w?p{<6TbBP8aIF zq3zE9p4~l;-0W_O2+C1w=}_2z8Y06u z1K0VY06fgMbK@FgR?5Rfk#eZN`nb7s8*kqQp>zGI&nP@w(iSLA&-Sq(L7+x2PXkq~ z;GMw8`;?a^-f5m+spRR+@*WSLO&utE3DdC|+|TN>95rUuii2-v(S$}qJv){U&a zwI;*%OifAR_v%2eo#m7fj5m}IAxvXuYb~&y+sDR(rDZJvmv-w{owK)Da6f@-GZiE?0LDtmZ4ac(;b`P=vWgetnG7-G-O-Bbr8csN?m;@&vmZ)6a0$2izrSBg<3)vZ-WepSqL&ZriM*~CNKVk963HYrYg718P zfK=07Khap4Hs=?Fda)R|m@^gCXG|`J zQ#hoRh-TQ}^?}oQSqSSX4=*39Lu+n-_k-XZgFq->XZjO3g7I(f8k0aJngVZW?|JoZ z@L;fz!^3g+*O?kW#N@(OgHs&%HPU#0LNz;3hQ^pH?h~XiSmZy5I$2=zT1`94bS->{0W)jp3rRm zgwURj9W@l8Eo&Rk(YDRx#IFKnKyCi93M<7& zA9mvIOj+~|TY_d)Ai|k}I>rGZ3$xBHW1C&}c6wW5Y(1~dN1WZ%`%y4jf2l#w{*Fhj z1PJ8@NYm(4*uC$P$=7+N#HMLZYOwrcyfbKz^!nyA$Y*EGp^mPL@1a0W@a2)xEJEzN z$k0_o!Cj3$>jlbbPv7yYSvy!%=TEIz*K;_&TRNgzl#uIy?aT6;m-Raw86sdu|3t(qF;fmF)?fRFggO+cFkEAI$%!YvvL_AP#s->)%2UvN(s z56!&hSd&C~$YL#TeE|12mK&)`h}xGEa~vbDu(J1Y;kpUCN$pTsR*ZBp(|XQmhA+$I(v#_8DI|hGfmi zHzT{ZJ8*&$L^iKq49fRIL@N90iY+7h^}g~){XuCifw>$k^e3Y;k3eI(SE8SoKu zUAGiKSa|3C@HQ)@pnm}B1v6kxmIVEO;eE^HP~YL>hX6^;cHRL)qffYKHn4A~m#OJ? zV>GO)0>TIYte9eq=eV(Gad-X$OKnPN*c?lXDp*%R(r%{i{eRH`7+slg>3J?AY|-yR zM$1j3K0l?GwMabK%Vg0V%~vjn(dKL^2DwGIhpu>wND1Zo^B(*B+dFW)R1HFXPZ*6e zIi?IY;H5>=WNf#^>*J$*IXtfHN0=!LD&QG%*E7k&SY+(A_8ag^N3na=cEgnbSCr-! z5HTIW5;z*AROxkBBIFnn zCq;h1J@I8LyBO+GdPFr}Z9GJgI%KO)fKTD68jV_NoU8D@S4E9xXcx3)XH4vx)W!sB z*p*&dyF^vecejM}<Ptmw)Sd=pO?dmyQ z1r?-bF6jbL_2zMbhd@C<~CZJx;KR2*hE^%CVnZfgnL z3j%aDkB-OJLl9ESx>ekU-@$Z+;(ZAm6Q}$G23fhxr2*nu(VxFy-C0xCrpd?VD_JDa zi5IH#?9NnGO!g-B5~)CfNrBy>Tdk87^u&3vxSk_HT&q1RzfP~nzCy=@=d_Rm?d3=* z-R^)`gM0c$rRl|nHae-mtPypU#YhKYUW)rxULgsOtW|8Z*RczMf5_h{*dy=zwbWLb zGOYQ>_PP}3aa6g@3Ewk{EL|m(oehTwE#c%|1WS9OF+VxxwIXuS9S|YMDNS6QE##LC zX~}nUx>IMR&ucxT$`qfHERHNToy{HSv{c`gN#?y4I7FA8UfLRhw`f4s|1lCZmq}Ws zp#`)YkMhV4!io_h0yxD6VsLo%X&phPpXR1vhwA`D3cGa+cv3^!2pJ}#HoJFRUge`p z@7(_OC=!|qVQSQ&wJbCn3Y8BevVEg(mt4bQAB;n1xT`7jR=RV4*Z0kkgEa-=i_5Qt zH*T`FER3Xo`yP=#{!xYgz+-r`Y6}EBi?OC}H=D*X0t2XZG9M>xWG{xr7&uHSo;06m zw%cR-%ju98W$Of>Y%tp6^VUtV{dhAayqRof*e@&kvr-o41Nt;=tWD;U8qVD43d%Qq ziII0237_ejV0p^n*?0rRW)phpwQ}%V%vA^wube~RlJ9c1RL4+9~P<% zzV%52OzI2+=Yly^{o_nu2x>xrQWl$U8UvG$L@BG%|-I}8s!0;BppWgqV({5k!JmY$w>B; zuI0rfSo_XR$=5XNHPI9O;qpr=9cFMjU)wgI%HOK^(IV5->ceA-kU^Vol+%Pf=~UC+ z>r?*p=PKip@g4Ze6}jQts*R0wy#7mD|2GJb!hi!*BYCZ4kah4BHxb>_;y^ngGK|Sa zgRe{+B#moNAHv&0g@i|oG)^nVo=h-k=DyiWRU|TMU6&+!V=t`3gzA5Ktk1-1!v&9l z1qdcjmci2B&~5CJf!i6KN-jpW|^V)hr#@ zTgceK5#;5S0%hG%qxY>yamFlQ``@@fY*IN*ksbxf1R3-A{ir^35KP#y{yD25GPp>k z3f_xocbMR>H{so-QO+mafq>6nLI|JBb)Qmw`S``q9@4s>PzPMlmCKjI_z|GK{GtY! z4VDFuVq%{cIPYyf-2Z_r+0%uFAZMASh6+$0x?V zCCedJ!I2=t?ozl{M`)xdDMgqSF3m2zJ)gHVSV8Q$GW zY!7sS3muJ%Ij3s#P5DHSxDGqpE*US=>%{i(#amBxN)hZ|j^M|o{|)X<5qf5z5W7nm zU++9e#mlb!0vr|wDMLxE3Y3d0^Ppb4f`s$VFb4%@s(SwOor-G5K7#A59wv{}lAL#C zfQ%4#3gVln=IQ?}7DCbE{1=b8g!AUo3$oON-cJP1q|3&K!(v($c=I~)_GiUve&FA) zZ3kHt2EewjtYNRd$7rFEUH=n}4pt=y=2g#=&ng6b*?5}pBaM}2MgC)eeuL;@M6DaW zsd9r|!0Xfio%kdJfTuqYgG*1A1UI2+7goh}{O%eI7&|q12VqgF0)Ha&#a1836x3Ny zFgobXEZf0II4n`agFa zpGaiv$5WTK^`J5kDnhmWm`@kCDnn`Vl$_C^nPXaJ{_s3V6a_K~lOQo-~(2mXs zLZj}ax->0bUN9N%A+(-gR#G*j*Kr}xC(Hfw?)2vS*{;P}Z~SL1PoVWtF4Ym{El?`x zVo3oXQ46`Z2N=08Kv(@}+s5pHYd^3XHUi_5wfv(EHBi2OgQZXs{%NCqnz)8>gKbUQ z&8)M6p792-wJjK>_vF`g8SMQ;@T!vcAvM@%P%Uk}-#4z0QUAQBzB2&TcI`f?QN#l{ zH%T3!W}N}+p;M3l&;gA{o$ENn(YG~B#w45&mmQP=ASWP2usvHk;i41t{g)E3>2iY* z;J}&c*!!yUbl!}Dskai1PC6Pw?HxiLP3jz5T*n;AuO$G(!W;yqk#Ln-iEXe1>8HrKETam z0?@{WMRU;dntE?3`;h~xs#kiQ;!I})Ms?-{5XVtp^F&8RR4U_qyL2XXl*Qb<$-_BI zYX4~q%i1UwYZ3F^Yh+RLn#LHRS1To<P$A)&R|_3 zkv9hH2WuAlb(Qijm{dUBf9#RKVPV=n@6y@tC`t-wfSpI|baOi_2_MCafSfsdqG~R- z1U!_N1l*{aHY4-J=;By44B!hpN@)dgqq>MYEnfL}%8kx8*PvT>)W|91!x2^CNZVS< zHMu(Abv-F<^Fy4$=0wH$_m>>=AfD4@+tZdEd@o^ePwKtH32WQ?W!`H0Q%i80V}G@FO~7?a3{*8D!JU z5AGQU-8ZrQ`)`yoC$=eoZ<@y#tX#a8oj5G8hbUSr-4Eb;U0vcq;X`iL-$`(`kKXaa zDx1K)_<7a!D80ZTb6TU#D4OK-veM()v4vjDNc%4l&IbH54xD<84&>+Ft@D_J*4a95 z6s&u%EDdv+W@GbE8EeVDkdNfq89`szAE`jqfMLnaXk*QsV;zGze9|^p25J(Gy3ID6S^T4M4KOaI+z2q3vy4RU0I;dMCwu)|de&^P8zrG!T7!q)&ts2>BNz+UWsEV7(tmimokAa$P_0 z+(Q}YVkyf*ss3!gFl7D^@=bjnQz!v4zJ(L8T=#4-mOJqFm8KSOMmS;QawmL{-I%!f^4a{i)Vg#}1)BUJ zUmFKs>vu42!6%zj5dNImGaF<<8um=LvjJb9O;O70)m@jgoae8Xu|FJ_*+B{UzR>v4 zzy7S(GbPlvEuNN0S2^P~@`&3VQ0Xvx$WZI8eU%lg0*V^8jo*U2z5}sc-?e1> zE#`QBV&v<1U^EULg4MCKK`6r`Wydq%UJ1}h2`mb9mBBbh??g@yYg^(*tMn62^rQze z+Sf+?r!O*uSP-mO>YpUsY6!>cI4g<66f{bfRDdYt%u840!TssqA-|Z8E$T(Ex^4I} zm%7IvnQ^r|3f>h;5WlrU@(SOfW8kX+nTqbYi*FcyBz%0uGTDaL(Z{ zUTnpNNU}C8%x;xj5%Jt586UygaV*E)V}e5Rzotg+C3pHQLb-mN!i$t$waD5bk`IZ{ zlI0&zu}UkLcm-r~oek}EsQhG;-KWW z_7j+^*dC~JF8lBq#(%t1kxlx~6I=-uqGq-F@QCxFG)5P)FBi&&e)U?P7@pROk&Cg* zxLTVfEYr|)W3VjqwDDMmwAKVTN5Os@e&vRp08SRTA??SqTQ=Ht5H;PpT-pj28kf-t z zq4phRrE3x3DgmQjLVnju6VSzCl%zEQH}dL11FXD0I|nxV#zl!K!5*)xUnO(^qfGsx zI%7sX8X*-ER~|On5k|c2r|ep6{>^GmG0U{=k_gnXw^g2-(@{;|Jd77gny&Iw`9-%C zdzCsp*LQ1Rw`1#pKSOq-a)sir{@z{QueLflLL0uECJ;+s!s+uN3D`*5o^&Z+N+(PU z_Z?s`EQ$+kzD{E|V^O{ zMXymm+6?flmK^{zRlj6>lOfT^{0jJ+rOs9KLipDj9{kb`BsR1{!~gJyat&%Z@v7fp zN{nZK-E92Am34Yu4rtp)G)s+uKweN7>c$B@M z>9T{+@z!O13KT5nm4qb9y#|xiNcO-6e+Jl<-PoRvj2d^b1HRX%_92&i9mSwE0uXk3o*ch!n`+u%I6xHpI@SszG&4q+r-JFo7Z-3$w0KgiR{kl5t zPgrbke6?DlU?Xh?n49VV>RcPhdOMacfTT2_<#lY>M==*$uQT#lf_SxL=_elaB%Yff zMod)43TdSWFF&HWN%;FK41k7EG&^A(CW+Xva!bP7s(GJVGA=wo2WwM2E>a&D@k5_| z0~GJg$KEJ>J0=>7EDG2^jE6EllH!gI7{F_}|9aURBcpyTFb!diI>gdH&t2f+^fv`v z1&|%CVkiC*l@=y2*vM_#H|&xJp&ZFYj-8BJ^_5=9(@m&lJ4FU6g_^0$)jgjP4q}?G+kK9b@uC8PM4NLRZbaZ2u;`XZk|o$p~gr% zXg+Wdk>jBnPA)D4xb&xg_QS@O)KtMjj3@{5ePZtDVGrQcFp++8K9iOxDojYrmfnfp zGwF~nZ4oPG1ZGzf_4J=d0dR6Z(^UeHcq%Q=SS_qdxP}-=$u^OVf74O`g-LFt=_WN9 zra9?Jkf&k`07u1{js|d$ka&ym#h4V3QQIB&tr5Yg5k;IHILPNjIJKIbfozW1Y_zb&kq`o;30>3 z0(*3U=c_oepE2jKx#Iu8PphC?q;{bulM&OePPeZmU_2bAC>DKhPl4WcMjC)e(OP|R z#g@ijd|B^%;QbbZ9^AXyf#P&2*H;5$rE+t4eTFz}5I4|PTheLi{>A zw3X;wj3Mr~kb;KUM-%7$L6uJ1piFgafg=%-&n)Jk8^=Yvq-CZs*m0=saBrXav~id!$|ttSTlg`BtnBTZ)A2rcq#GW)O_ctb`DI z@!5VexnqOJ6i%~Fr`RiMc&_1_5oFDu^ayB`g}j7@+&JV2Q>lVnjkuWL9r50r41Ap% zhfk-g~9>#nsXQ*gjlihNvTP8b99IA zq%XQVzt)k31Y^$!K2G8n(|3cnRTc6gpKxOzy1xLPfn0*{y?2~~|H0?}sAhwh2qL@3 zf$8Oe6uZO!Ywx>*n%us26+saTARr1-6s1U05Q20?suUsgswkb%OOPfUMFj+vPJk%A z_ue}S(xe4K6QqV-LWgfhe;z^3e0S!~op0{FbIusgU)E0I-fyiSd`evcU{Ab#jeJ;)XV77)|5lPO zEKjryq%ad%Sq#Jvk2p}S^p~soH&=mM*YRjGUW|Tpvh=%GVsQ>4qkF*%PD@CFRuU3Vfrfgc<+I9-|k)$K|%Xz`i~?_@tpOhxi}7ckKjswU!y%AKe!DheiS5 zEqwCw%GTc=z2En!jTErw&sJq?{rlLjWc7cichnkyqw6AXmY$O9g9$8}j2{k7*S?(s5V(Nfy#{z+19SHl3Kt?s zA3T7vW6R!gOFqun_v<+8%UO*68!)+o<=DKk%{@7jvoOHb%v%=?5I{`n&sspjHqMjB zl7B`QN9s5TQeKA*g6T@^XQx}Yz67(w0By9!ATjk75>^s?bI&zUSSFwD*B<^QT-Dd! zE>t1tn|l1%Wr7SsB~!IkagNO;5cA#0%XO)db8`sn~|200*?8>oVu2+Mc{V)%q8PI z)2?g)-9iDct+8Vz(Lv*?0?20G_2-jtjcX8oCIAV0bu7w(+o5R|0vz{9uV zlB+*{U|mG3iJo79$AD|9^j)kcw?I>N{A@D4LYrr5;Ph1=q1ga zDEGPB=tAq5Gc^32IK!g^pr2;9#C^a(Bi(2s}!_1>7w$C z4^Fd{Xm46&14Gc@()!mc<#NWG)3e4vMr33ZL z;=lyzkfF(E6g|G5Bx6LW43uaM0PGLo1(168nXKeY2I(tFdghhLzEtx?a|OEn>jg+c zYDo#j7Z}4?W5Qwr0lk)0bOAbG0`NGjgz<74w&dQn9lz%g#})?WUozX%Pk5RcDc(`x zz9)sWP9I*(HfX{Xo_%Lf6##I0>mQXs>SZR~dCq({e_>NfJ)74cvxIfBngJLY^i}~T zT)YQwl5?8$c2x+tmuOuF6St%Is&VJ7)7OEJ%#u$HMtlERrJS~`-9bW>yVppH zhvh6LcLCW%zT3`IGgRisAp4pEPn^-I0ZKobXQw#F#81`U2f-$N2UY_8A4;%V`YnQ| z(OSUjpLJ4`oQOwQ<~ODj+VofqxR6XZK=N*%@%TbSQusOb!p?!5Z4WEL=g|+XuRJ(Q zb?cKw>7oSUrfV@sr91a7 ziDJCxy!3g1BL-w+JLqD%D0 zIMtVE!pK5#ixyBQyO#|#t5V!*z5#I5lA3^K&%837|29Zk_P9ia7e+xCf1-89VMX%S z3H9?BkTU&drR4YQice*PDTG}#M2O@4<)78Jw#=qrI>h4jZ z^Gx65Cf*-dKisiKkIUGtw$0#!9^5ma4Ls!uk%o5^Jvr1w=nrBr3;nDu2(`1R&D; z@34L>^uMdMPfFkaj|w3X;}VZrh6?1Jg1@a8{VhD7as+j_?by9P1XhXYs@;MYX9SDo z`HX$~#LsO?zX3usfO0p2_3#cLISF<^d-I6$A1?&EcLRrY7!sQBKHPK!bR57=z{O2=_%m_Ib~x9>!&eEF|dZvvj0 zI#7D6h}$^l@k^1oFhH~e@@p+XRr;YAC>uy`p0(NnRaBR0XC#!@fP1!h(ugP9t0ROiwtICcAZK`oeu^w$nL5yXG~_9F_Z_2K3s5a47& zx@5azHzs%;X4KIj=+0w~YU3yz#$zpR73O_7Tq%@2KUh`*bF2#7bKaa1>QlghBchz= zvVfk22L2GFz8=VD%-Sipi4)pgA60E5umX(vk2UjwUtkMI9<|ckOu3=`5TuIeVu1&< z4 z0{REgm9PLa-v@Y(#$U3qh~`)f@C#(#B|v}YG9PqVmP2qG`3*e9q&c+RIT3chEK3r^ z^3cvJnK*4(MNQX4B}HZV;_dqY7!l>X>$Ioc07AJzH16Ar!dAI%HOG-VF{zOlz)6E_ z+*uni(o={uod*FDD^uaelX@Y|(fObZEH%GfB1rK)fd&44O~9lzv(h1-Ge|5l(}P)e z-CD^Kc#6%%!Ccadr>|>eKJ=i`$cSp~*BJ8@-8peOQ)G9&Mo;T0Ca(sVI6dhOJ?;Q8 zy>7N{vS9;KjVHF9as`)V&4%$Ffl`rJj%ij~9Z=ww)(ixq5=EHY_SQ70FyTB)J9F$VQ2284EVeFNmW^%su1_)`fP2-Qc3Y|b0gqY=@bOw?fxPFr> zseZetJ5a2Tms-+PzYhQ?0@mEXHMQTcY{4w> zgs0lX$?OW|?u~G$L;KQj_#0SeNmM~UD4CM`dAvE&3{;O*qx^^XSGMR~qrpIHv3JV; zh5@ITV|#GOjGE|d+wtPzzp~YEL<|6@ku$UcD#I*pwLgdhR)fa15bGQOBc_8ZK6|q1 zd$D{7kd;opG)m%-I}`FbromJtr4JZ1v(Mw@X3-nV3>rMk1GcM*s;$Z)u#S&nC+Lj5 zX^7%vKX#?4ZmfprIE2A$JP;6VtbYr46gE6%maMZu@qCLIvGAlnb?20{S=sba3df#q z%%C$v9z3|@3j)l)@VOgGuGC#aiXFuOK7!6hx-Ic8Mo_yRKJSeP&hf)@o_szR0uN{f z6C%d$|*OfPw(}3udIILd0 zsnu`Ae1@TMg3GBBr2U)3#F$nwXXaXctL}8jL1&9|7_U4#Rm|KZv1)-M1>!b#z=Sq> z_U0H#1}8_g9H+EO8pBhi+KdbC-lUo1hZKt{+L0QZfpj?aBx`h7Qd1i*o!a=7to-{7 zJjw93ttK8+qJa^8NHn%Khw7X^WBv68Gv%8FvB>Bx=C-ArrmLULOZpA!nFAzVIIA3! zoFK_PlV1PEi$=V_^(!GBfvd7)dwedsyhPXnSE^qVe+}o2kSt!P&oF`S9`?k*0sknHSB8t=pG*n%Gg`E$26VRQe4kFMXr};Kr z+XG~EIy{5REif%wBhBv1hi2QFSU{@w9n;OskN~E=yTsAbopw5>*n{3 zG~~plF=HCSviLJ42giih$ARH^w!H=&1QBgQx7`4^eB@yiO^4-%693#&RG^?Tr-WE! zA4thFLm7wRodW^SSL9u;Bd?iUJoG7+RG{pk9_f5pc;rb2+00N$v6mnaOJ@myCizVL zw+7b+9g%Mww^SZVam%nsMVli&%%jsMGq}_^Y{jCqa~p^&sJ@wg#}~`K)uaq65$)_o zIdUr#%-$@RHybclh7{3zi{xu9er7_Va5%IQfS#y3;Yog)D3|~}jk^BI1(2I;ieG2m zPDQX^Pjn1u-vqbS*AsrUc4GOR%HNBI9{X=hRM~G(mbfKNE{oF$Y<}njFno$K?_cS4 zzc1Xkr|>ppLRfORz2Cj~>x7ucc3T{qIkHzS&?%ZNC^uUoSHA8g&kReB+(m(^(+?U0 z73<|Rd*V)COw*gwwYLqe@>{QO|os*?~MRl(Gni~C5Ff- zoWEN^&Qr&cxEiFZRY;gR;Hbec(p z3Fu3^2Ld(AU7YXRoiK6UnXSwZD+Loeyyg$*39q^|Bg$&thDs`aFq%PSnM7$zU^@EW zQta=}k4Yn;0=nHvmlWlhbwHaYvwWIc6FKgc`r8)#&)+=SaAs9=+Q^?;Do`pf;>@a1 zor&4oU09J50AqqG(U(QWz|Uz6bYi4TLYX4wwt*qkwbdbb7lIEe?f?B zR;IgH31;g7Jm^EoMV*k4C%B@$8;0A z@-2EtGNgM}L8+BKqJjaizX7#iRCqVzyD@;l$iuOz0uLvxM*S~H;&0&u!QhjTPGtAf zWdk5XE*&R0$SaNe6W{R|quRU+Kgu9@E%QyF34M<&J}G7C*SU~<*Zp|b;qjt*!yeF9XY!~1#F~Xra1^{~W z9h849!WIyv^kJot!tV^IwN=|ZC|EZ?YJt*A1)z|+M0ecU_Y;n1H%*4h%OaolnP36- zZhemEc%;?HYt=Rb`kilFw!lxz(&3g-g4L`5TK-&4#sE(RAfa@K6==Xx>Kcm=wB07_=Gpz}jH~7h9!L9ZK$@JL zWa!tj28syg1J`&jC|d`stn^SVlp@98@Wh}$y>AN!mr7_B+a*B_wR=rSkqZ0i{$ADX z?HpWjyK)8KO5?IM{s&%7PvEg$9{0jz6u>wh#-zT#GACIT^Z^D)8+F}B&+IAXJ?RWQ zx}0*#=9XiXKTumYQp1`#xFzqD({PFb+r}dq+Ll!;@t0#9(yVWNKpbVFS}It>Yjas#^}TsXz@X zf3Q@+(H0}Iz^G#Z@(O-f9)JHqLWGNFNcdiO1ZZJClP7vp7L*L*;c5{EF>7$Wzh=8( z5CUrOU}oM)(k|xk0A>Rnw+fH}SmG`?lUZX9IJ9<)07IRY&cc)YQ@e?uGQD6b_F?>% zbB)+Vn*NN(+HVa@n#OUAQ9D<<1{5PN<+aC1ZZ-J2u`tIL3pWNAt&jM~amxrT){>R4 zTMJM|+y;D1(191Gd(2kLUexh%mml+soLP%LY&)$J+*Mm<)U_cH1?M2K7hRDVWG-Z}ciP3e#02s|=jO))@faz>0pj|1(^Tcy@cdcTtm%X50U&tA|vcn^5 z@&OQK<|?Ll%kLy!R1<|R>h_S2_p zbF+HM^Yh`ojm=N<%zL>iW*$R2lL3TDc$pEY6@6Jdd5!E=GgntH1&eAV5=XWdehX&0 zL+jc}3}_o(d6oBVB zl~L+PS*URF#uAZZRJZyy>|Dy5Y+??szLWHamUL;qc)7^;Q%yvYfa{)|E@p){*i z3_DLrpqr;dBBE&Odw>Gg-r#)6Q$BzgR6_HjBvomHNCO9*OzYWp@Yx0MB&y2c_YQfS zYe}R)Qkdb?DT?b)w&n|v$Y^Jfk{&h?vHQnTf`5tz17)f9PC{=2?Q9RHUzFXQKVSC) z@YLGE8&oDc^9$a-@4Ed&1N5e%(H6x+%GME&Lg3x2tpiblec%6iS<*X#rtf|Nl82O| zqxd5rw}_0AEb`DpM?CsTf#M`Ecj>Tqj|4=Nh7U)Se!Rm*5;V;f+A9A;KR7_|t#7uU zJmeE3`FCIkrt9AoJ0Oq$?}_5@9=fDwurO=uY-Qk+&&5vHoC3Pf^szq`YbSRq@^M3q z+vn3IAbO?>SY|@*K$xG3Y0=x~WEsppwXd+-)uwqV5ZUzY?Iop{mzC<=H8r+QeCH~W z#H}RsqR);T#lt5gp?`agzVx%c(yRac^Y72w{GTnv(>bP{`~B6QZQ1zGAsVtoq9DK0G-|Xqg>Ke&x^4R~YC^ry6upaPzpo&4UfR=zPH4L!Y~Wx!Wl;3;J`i z|6PQ?7yI8u_$^ESKkNh#CXG1N)HcA!=$)39wnW+r;pO$t%geI~baZsAYi^dac6HU! z*3}L6hYNpihDRVTy9=+rIj}*(PYNb47#bNRpaurmc_EO5Wbs#IkE8_i3ksTS)YR1K zrlw4+9UOR93xypn_%C-fOvob+Y`siv4@uI&dxSX^6?(_Hd=?~7wpLaTW@cw8=7%ac zTUuKwlj8Fp`%Y{HwzRGZf#N=YOXIU69$n|uHx?N0hJ=3nT7N#r`YbVV(?vceCMKA& zM#2RclfUfJjc!C)|T?d|Pfv|`T-!AOsbKd^Ff(e57{nlEo?nv47m&8%jE%g>jv0wPZGG6&B7${Pf# z*Gpe#9Gf@8M9*?ee_5Tv{qI(k_R+Be=i=~3qEuOUK>4G@sn_cztso~p1Q7(W0x(<{*6DjqZSk9lU>Fw<;)z{dvtq<7b*Zme--pzuT*22Q0 z2PP+skZzhT{7-AKz%w&4FF6riT<7G?j;-S@uW+%g7IF1uDkgpGEv&%LNmd-Tqly^) zY@vMTKsVHjD`lQTZl3!1M%)`(Nm?a`k9QL~vb@!M&idO_tXO=ZMyy z64#SgP-qxRUoB2s>@>BL`344P5h9~SBpm;O%i1q_Ys5vRCPKM&z16_aNU%`)G1WY@ zG~M}>dUDL0#n|HQs|SYnh$!_QoUGttURGV5doj;-j`);sqMb;NOryLm3BA5VhE4S| zVuq_u-Q~QS@3k7>e7tjdvlFSVT{fewwp(0o+N<^!G^0nMoi{Pu2e%GkI;RvDy;}quas(N zX{E47aJ*-t2{jasT~&>iZFylAs1?g;t!-}><7etB7gf+662WiNO%@LEBa|MGOo%A| zVwNBe#?ew*3^fjQrdE#xpP=0i$l`_gN*OkF?GRpeiA$H|*j%(+Yn{8q-JloV8Y*$!#f z)=TB6n&3sn8R!cN1H3^81$PX$vbMIi&@Y;WdHey4@P@w-)5TX+b_vbC_Aw|LYF0iF zV%+>{hk!)IulC$_(-;jSH?MWta=B~N&}wv@E6FdHb4R-yuH@W>EXuK*eh!8CCiF?R z9#9V-!tq#12XWi)wB>k~WGJ_pj|Ly<=`XC5_V@I{E#w-=W+jijCF5|y7i?CkRsE3b-wV>&~b;hEr8EJaz;f!4vp@Z}7huixhAt7~Gtb5FDFVEB4A; zLMd<7zlpofoWf;a@=~lQ6Y<&_2cny89T>2u(r_L*| zS?!Gj8iqzAdyc4-RUI823d_OLr1HFlWsT9-G7P6f^Mk&1?(Eua$>GzFqDwWdH(gT5 zv+N>a8q7FO)kSiZfOon6-SnK9VA77p1Y6^GKZdTJakx^Xy?g!+WVoYsH-4sCWM?VF zz9d?3{~kTgzR)AGA>CV{#<`c}Afm=t3XXS{vuDqm9?NyA{QOxO=mFg_L10~TRIi+t zDxhWmnpECEc3IxOTaKzGcdcXBL}ZJ$HP?-O(+x4IkxN{S5FfiF`TJ6>d`xD1ZZNeL z2DPqcxYdhBo8?+r2N27%T|%o`b=1T4^BZG(d5a!js)olmKMTk1re>XaB$%_Q=BS4r zO+uG19azCkL66ljo-Wt>K+l*jf5*aZ*X|I&&oG?mOf7G3EZ3Lb4Rsw9kL=ns5}LrD-5{n zo1~tfKI<(eJKvbKZ^o26$}qb{fm=lKz-fd#$0-N#rxq8jNzFX}m)e7j7o?z|pbo%{ z1R_1ixcUEky+r-<0%Ib>0(^N~j>&Aemg>6V8kZ+xCF^F?)th?>%iKd!Cl8De&WFdk ztn#hn)8pn)A0HomclV0Lo>vVpzD0Hs9X5Hvw7t=J>ej+f^SS~e6*&vO2h=82$%kV# z2tF0k{MyIA)kL$%szQQ^m6hB84Z%Eq8$A@fJY#&@WvB+!eB7s)sI%jg!&vC~f{xnJ z&jCzz-QB7iuO_Iesq6asG&N@C=F$s`idZNG%S%f^wjDRWSK&qfW$xP6IN<}^MzjRa z|9Yvk9cg_ngJDN%wV_~*uuqDEn^#&IaGU*B>eYojf^;(BtMWml2c8Eb@>djvFJEXa zzIkAsKm)M^Xm}_en9KTh_iV1BnM>MJbA>2DlNf&c-$SAoeMo{gMa87E+&QZp(?2#o zK3; zSZ?<9^{1lK!DbSC=#i&51ere;yyE!b&9{gsH!anioD#Hz{q%vGl^*{qk9_fX5;{rZ zTl3hMLGR$)%XnoOe!6PIj}u#2(ZFE2rZ&c2ZE*<1c^V>jHq-aMAHY8hdx^#&pV1 zV3*Kd6>(p(s?dI+nUtf~LD%NBM4Nf>5?ShhA9C9oprSyd(c(qVk}`K)sLIP zo12`REa`>dMTob&cuaU&lim|iqG&}{F_Bd_?~3rD2nq7ry)7WXu3NE-TpIJC+hH!7 zW!iuC&_;+BgV#nBc5B#~T^hVsGX$`8*7o))wQIq2w3mF-7bFT439ZfxG5Imvkn$+J zJ#D^D5S&4t;RToAO{mPb~*sdj+K8rET_B_POe3d?I>?@p?zsj-psog4-yqtnNpanE?U6Eko; z{3ZOac;BGt~PJ2jio8yjfkf^ z)33f-350a`|fvzy;Kw$&3eDobt@mf zg&sOjNl6LyTj1R{Qv6_65=WNW0-3nF*P}n)@FWs3O{ht1sWi4IGCrlxI_R^PkxH(u4f zMx+rZ(RW#FS;O+yA>iM8}mdwL!o7!1n!d@OnHVBDC~!(p~ZXJ;^^;`~gv_KQHMsZ2)j?>pd`_V>WzxT6ya9AKzaH#n*?8d@{O{P*hmT+>KpB z4iHp=Z)dIArSS3dUl#(N9v=h-Gcu0NFQgIDcNaAOv+GCkW(jL!!$B@}PF9wp@YlZD i1}00N93P*Ny(8DN6wUDJckPdW|D?s`@1@^;==om-haErw literal 0 HcmV?d00001 diff --git a/docs/source/assets/deployment/anything-llm-upload-doc.png b/docs/source/assets/deployment/anything-llm-upload-doc.png new file mode 100644 index 0000000000000000000000000000000000000000..00c70e9c01f672cf4bc83fc4277b7f0c60ff3e55 GIT binary patch literal 114117 zcmZsD1yEeg)-4t^xVr@l?(PEwhmhdzHn?jDHaH}>J0ZBcySux)yT8dN_kZuczp95i zb*4{GpO(GXUTZfYN(xfQhy;ib5D>`Hp8%gBAYdFIAfTP#VZnFiYfyb5ARu)u#Ko1Q z#l^{#9BfT3tW6*wXk(nCMP;Fsu>!}YK}upw**$2iMP%h@^yX7yCB=5+s&X|U7ZwEJ zkBLbjmlr5CbfD(DN#;)=&>=32pG48(3}3m#fYX{>9*$srYE)OtS(>X~>9^4aa%kFk zYxXoyVUg9*@3(hsSodfF#q42HDuo|v*0uN*b5NxBPkz-O?en;J=#;6(E?A84c&fbK zx~8UEj~eEZenIb*CyegwE+kGtTBSVYro*~uU2pEA`OZ#~(Ihx@z;l6YAB}y@a6xHL zJGHyFRBvuwRra33n@mkpgMv7d0Dyt;E3xnWWG=z3cCRoy=BXT!iQRE$H_jz(d;S0| zFA=OmHxjL%!F%#e_|7js4d>ci=P!9olK659(lu9m1(gV)9zQbL-Y2zI!E5W-txWrJ z(Q_JGISOpr+ajM6^Nbe3-b;PkP1|aoBP!5%*GT$psF%iA%h)vl`q~LR;5;Vk(x&qA z5Ff$U@DPx}77#GtD@gE%0Q`Y~fXamW_YRChCiK78(0`tVP##?W00AKaAq^1y;s$w~ z_U@bZ%uJsx7oplGcmgu@^L3`4FdSD`Iz|*=mhL()qM& zXfwY%KYwM_!pN!V8=Psn4)2}on!@hu-tv>?GmrK2_lB$8AbX>}fVx#Ts%7uK6NByk z7=zw$e1q5L2T^J$it=rY{V6j{+vR9-f5O@rMjW@7sRBh?b>S~f@St!1l<8=ZCl_MN z#7dtZH^&W?xXWd0G&d@KGT4yl=5|on*w|S*O)drvJMl<;q`qGB9z`W$Q6z$5J`p1hPy3 zdLCV!cxxEAh7T0XT;^Ejl(ypt=f+6gDxy(eBy!x;$@ZL{4~|Xq>mrNe?l6nhY;o@z zfjjMAP5j-^Uw@HB$Rszzk+ujjUB1$R8z({lr0wRX*dPuuw|V_a<*@kdcHiNt(;kAG zLWx(S0Rs^EUkU*wbWmG_@A9O2X#?9HD)h36e+0IXRQ2P%>5%NJ*+F8Q;lWXe^>guDu+GPK6l(!Zv^ty?ugZlB$unf^j@Rw_(L^k2A8NaC?ad2ZMr|9d)QVf>1tRhDL}Ehi|5 z7J-H3u6vONHC({v#53T#oi~Ma0un-Q@?kpm$%3yYVRrMX#9irxwWvimb`f-5!X7qm zrEk~g`<^OYsSPHQC>ri@ORk8F+(&qtA9fhaFL%8;IETHhWZ-w=)wGlbX~4rA`>pe5 z*yco!q(Wxki`IORS{aMOcE4rTZBcQh%>HDa?EU2s^N>%uNS)0}XDEFs9xO5e7PWlx zbU&T(vX#-=JCD0l;@${C+#qB^)iR;3W|abkSNPGM3;!=oE+?JIR;Bphy@V+lj;CA( zEIR8PGO2rgJbJ=pgGC`yUn?#2LN~{N@wnAqcp>6;(f)a^rV#oKp1~c4%~G0ivBtV+ z(C%uGK9Dh9huW!v`E!Y8-R0-mBtaT$ZW9?E{pm!z+4U@g>C|LFbgKm>#;%b*M$B6E z0>}Ny8lBo^=aS|V?nLz4HDd>!qg7Ue>D*+-;pJ>A6LUw&sBxc{Jn^Sd;>w+oipzXk z4o0~(ORq%dqy2Jy!;$BrcN8Y*f6b-(cZA}KUV;r~-&aTVe5ok3gr;r#XxC+TGq>l9 zzLp)|hao-iz!alJ3LO3_Uv?M@w0e7-JHMIJ3uYe`sQQqEslY>i+%l)TvHEn1bhqrN zIB*?7-qCnenqslVy%xYe%9Eq3(|Gvy%ItKuDtKIV*80Z)+-Twk+SV?ZZbbRsi{I3Wwfzfzn%xgu z&KHl}L^qMWYlzNs;YLzVrTSt>=8c$&j6uIvUEG zfnB1Y@5YUF=QK(T4~R$0cAS%8rbK1tQjYAf)%vL1*+im#r~cgrNJ}z!A&kU3cP2{SBFIearFPQS#A_E z#|BHnW+!aVkF)2lbehdGN)6_279Xg-i$^v9oobA=idVUwAM~fVFXwB<I1r~@!CKH0FEkj}~U^A2|tQrJ64q*|Zc=EYm3EErXw zr14M*u-JIrer`<@2l8RZ@F;gnABd$l2O@}RFL|XsY($sj2g9w^S}02tJ3Nh&gkVaf zdhOT)Umvf!nosfr8_GB%qHieVXQ|N5Ct(ahmtZ%N;>5r0bwzGB5s2Fex_Uts4p33x zr2@q43WQD1&rtwS1lGUV6-eyno>YwqAxV)s$|V{q7d7-=7fU3OR6m_I1hT0@ zOD{DnshLHbdpoh+$cYj{Y-_mw+~#!N0T*&T9F z+&bpL4_K+Tnfa*g(}TS+!o7BVC3m$j)=g=BGoyyeEb!)4e^a~E+4g!<*51e@cc$r@ z<_t9B#qGL{-W=^Y3-$S zkdK`v&FM4GeWt(IQZ2CSc%iCG%eDpk)!IjD$!#kFngZVnmOPm}^Q$V(D`J->SKH%E zS?8djXAmN0z2sst7fY5lL+#Fq>ng$$unD*iUNIY}T)>aO1WjS$LB%bF__?iAKqH}2ZrylYb(7KknHDDSo>WtmUBA#6`L* zPLYfGM)O`ZwKq)LU))AaT1!mysuRV&4t}c2&to)cQ8PE$sTri->~03}=4x0y3~LHp zc(nop^DKbQdzgWS9;Gos)qnYSuqXdT=Lg1^7BlZz_SBFtZPqbQ?Y@&%N-gR>;xPTF1 zXd6iFevFK?Bk&UK8wxAyA(WM)5dV2Qbl1aP#+k~9X(GH|iG>x?3Q!Eg3fABc#N=6= zMazCguhn;w#bj_;n-b!Y@LnBHQyajT@jBi1gQA!Z!dK&t<}mVQ)AL}Qj3(J zTwa`5bTh4lRcSazsq(yhkC}vKj~4Oq<434)&0L}_g1ZhbE!&ej_+v|lvYsbj*_3$D zSg}=i_w9_w&7~**Dr({H>VN&ghM#MG&?{%6%uJ29gxg`NNf$~>(IM*I$pY1~M<-hi zMMIAzu#+v#dLS`@W^Y8;TFkU%B`$vSx&DjAR8gbLuU{99t?o!A8v~jMyqpY6;ugEu zDk_%RNbO^&RCva^F^-$g+V0+2WVr7pwP!Y` zAUB3|E==Lbub!C+U-c8fj>)}cH!sfLUu< zewd$m-y$?EV3nUS9KucP7FUg4T4ne=7D^eVnoP(`k+|nR^#=mw*W1;v;OzIj!?YK+ z3WSk8H3G&caRk5ge^R_2gd_D)rxG;m=52d6iL@{eZZk)7*C}px0}r%r(apD;shg4M$JcM87)gq2W3>o& z7sHC8WHwoQpJ4>XeR^a_1>4C9W>KijCRf1MMeD0F>YWQ7hopa_y4dz#6G=T6c;|Do z{bA=FMm*6Q~Qd-@^^A$*YX6z9m@OS}%Lq1PS1ZoX65)S%uIfevyz15|p za>pGm%l1-h@lUsRzR#3|kk?ruRKkm2=&XZJ>h6GEMg&;UPWgI0g(BKdLpuqc;MfsC zO6NbJ$TlFTEr0Ad3dG81qax%6fPRt%N1uL2g?fdwg-KNP(#9B(BFCgUsJN#&8JXtd1YXrnlC<3j!pz(wQ{x$EVHrME%)izN>_)$%dez82p&bp~_;10M6XNN>j3^vR;c!!7i*8MgKP`?({`YU+!qw*J#$6;+PYwwCm zKjB`6MR!`xN0xBM6A87i*(0J8Mv-q7ZT!O`=-3Ilb;@ za#;+tC5dFhc$K?17K8N^5y4+w6&n?J3kk`Qn1xB~f_N(gO68vn#mGwJU;vp02(*9( zgNLHqOZaB}DZ~7wMtV7|$E%FjFH~2MTrGi|1|+`E7awCtzhW$O%<1`_pmP-`RDmNu zGv+F;AyVtCMcQjSgM0)kffGb~!#W*T=Ph(4abt2=W}c= zD;k0rQ9PB#O{~HaAabmiOYsB7$WRrhMfser?@HapNs4Xj#f^u$3KgSAntd;Rdce{A z?LZ@EACYIV)CcsyAGPSUO01hwsiD0!IxedXp?E(Ogsu0L6vwi=CJZ0xsAT{fWV%Bu zt&fLAK9=-aNOYlu@D%o<2g}z7Tc3d;6SCZEUR7mk-HSYPx}LyB(6P|Vu=6QrzCN+V zi~6ZktHZoE7&0b#PCn9?cKEZ~x1<#txx0WTSACw}d#s8Ym1`X5G5(*m3IU^zO9MRM zbZ{uko2pR1c!M25^jx6q2#M5akuq`H?7Gac@2}gfEa@aJ`}zh+rL$(*q8XgfmKcK_ zy1T&MXX|ZJ8N!ckFmpOnkXmrXbm+4oC4@!iUd!v&yKb;y`t-Xx@TIYPs!+3LlxOgo zhQc1rW3-wBJfT&4Oz>u_{9Maksc~jeczXw54lb!RKa5IIU;lyO)_%_G)+#;ui>?wR ztfNzfhVE4(Z3jQ|lR)gNQ_-qmDr~$Q2JvNbrChhr1-)Mpy!C{WzBx9?qc6_lw0lnk z5aW2AR2)~f;;}X)!gft1gf4_Jw_x2?b4uEj7l?Wr`K#l!OI-Iyd~m<6=<6&u=<3Yj zdI7!rQC9AOT232LjZDDNKWC>gkk3wzvFy6$o$jNjm0(&kql%;`_>e116&m| z#6r}c7J))2(c?NGL`hukJsrnPAX~cECW4-qEAkS}=Y3gB$V-W^cE209Qud1PG=}8i z3}&h7Ij8BKrx(=cALzaKMgxel^=~V0Tqa$j7+;4mb0aHn9M$_>YvuP2W{TAKq~l(i ztXl~4zjlPa3!OSJP#+X}d%j{>%pbccFR&JT9rJBm6L|S3iHAL;_nWb;?p=b$zB_h` zCR~>xPEU;sj)rq50?qyRUIrHJNU#f>nbUQwIJdpM=6o!wx|yq-)ysQd%Gol9wo|`1 zwe$3%|H6RjmoJuSHa0xH2qq&ru@iO*;5q1^0L$HUUk@z4!t(`Y+Z#fYR#<#dBW`uN zf3VSk>KqKq`0C3dA*W^Txm&6Ob{$BaMS3`6{7X>Ye05eDd5KIXY!z%OX7Qx&zT{~i#$3Vh9oI>&uVSZwIB#n+iV97CcWMP@G}L8o5h{SfZ$X^uDHTi!OG;WZb{GE^EP z30^3F@q0TMZeuvVs5bp+5LhKZeLhzJy8O3gYel(D)@?lKFX42Y_LiuMWSymzG^`c- zjC=0I+!QGOcaFdc-EMgjsmPu}_TsC~MP7lDkiPW2Md4%R4Ztp0yR|`75ed7g9M3Au zx5VH=Sw;p1?6t89Dfc~s8;V@ATBTkkOvmzlTe&fa<{|_kKzMcar?^)x?2nH>mn3Q-`*yO{ABKO2hzWzT_;J3~+Lw8>$CT%py}a8#g4 z>VO$QM1p$;lp7pwVJ<97^^D+jK;t!fxW=>+-ccwV0v1F{WU`{0ua%0g|HUKjZqsYx>4Kq}`foOC{iDm}C)JxI{ss#@ z_AE0`Pnp?PLf3O=ceD(q#R6F{SCpD^#%FlSLR`Twl6>nNx=i_IvxsO5rLxJC! z_eXaiA1++u?*t2Q{p$Ly4f7cW&%DgQe>LWI|JW&3_M`N`%2#VqJ=l6y#2n)w3Iz2G zIN@vvz-OF$69TLl{=9If_oa^B?(2SV&lMAL7Y6GJxnu2q&_xJnfrACwFo|B_6yg9H z3V%d=Wi}AvR&U%Zg%myj@8Xlqb4dhi9%9Vtl5JawJ%3n@Nz2VFQd>2RS;ePoVe1KV zYhh21sfOEWI+#i@nUa^Ba!p)pH2t*(hEqV9i9b6%2$fV92+aOGgWWjihJmL7*H%mC zELXPWBQjoSp%?#KZB9bp9%H{n)ztnp==Bwg(*N=1$=IGt&pD*skY>COhre`Sk$0kk z-Q@rSWxm+^zF}k^U*Bpr_rw*BiC9%DpN+`Xb9Vz<%4UU?s0P?qe#c2ExNwH5{n5Hv z=E$ynP4dBm|Dd_SqQRv@SEq|#GS&Aii;d>5yh-boUAv8iV6_~j7eUSEQqy@cC9 z{t{zhJpDF9;mMb#WH?9!-jSS)85wli;Pz0Q0r3wC^6}s43YNtztORR*EBW#L70m2l zD1jUrx?^==aD*W#A`?A&uxK<+;JvJ)IHbzS*+R_80Yo_ZyY}0%l^|%w#o?DO?+;)O z`wz?QM-|atj^K!!0)pn3*k-VBjb5aLG6DAOi{H@btDFT zwNyUNUlMa?f|6x|1V7Q>T@)W8qC&1zRG`Wk*E^v2KjgW2IE+Jv!YrP5tRmQ&JRrQs zCzvDL;c<~XD6g{4XVdXaySFEV2UF2gE@${~Lk)PgN0n zC{4-&9#S2!q@?~HX0h(59M4i2`@aT&V*DC`mZIR`E=%)YjphHTF)jyRJ@0V@#0@7A z>`ht9mVigX325hPv+uwNNQD_VIDD8oxZ0NeSQzGo-!^r#V=MY-ja203XJVXEr*qK# z^_KmhXt(S~X1nN&$L~6RZmpT?3rjw|*H@W6t^Sr~uZr5hC8yWn1t&qIDeOh9#5VA) z=Ecj=?V;tl2lpO=O|C6H;8jmsRNx8Ywd>bZaX#N`hAPH%=KPZMwq>t_wkUPW17Kr@ zWiP))952xuiB)`=rRVQD?zy?&2SqwHLk~yb%JC{KuMSs?*1il?^DYM7J3C<}ss}Xu zA$2_^mYz7XdIDqZ7N@v z2xXb7<@d@4_Vi}1E2a;dE7h!NZmpJkt-c-=9lAEAA3hi&9C)zrO+Fa@?p*qKSI3JM zg|zocZ{R-eK|u4qL4-e4%UY4zz}nSAi_@3S+j^}Ni*u-s#rss(*;kgUJxpEQcCl@F zvWT?cDz1-TMOEkW7PZ=PM82-z)Q#ENmFcIGcf;K8Rkfri!L-&!-v%m%;=1K0{t^DG zhS&_X8iEtRHN8)|$zn`O<)M9}PN5>Q0&dOk2gAw0lSs2ot~8-X{IZ=b;;Lda%p=U>ZSB{ce-MSool`7rpS?IR4I(>~fqdit(h z>7SO%iHq9RnjVY2#5R`a zu2PCYKg+^4uWk zW3R$b8vDx)RdZK--nYk9?}i>%&U|nsH3LphW>u|$M@>H!_~@2B&srd%7b$>j@2)ON@Rq~rfBgQ0-B{PIQP{Ej_JovCjQd&EMpy9Di;}W3I@hvYm)*lo!jqsnNV&E=m3Yyu@kOWD z+Pb1+R+xKXjfm($Yuont3Cu-W`Bu~ZMwQDXuY^Yr@hZAK#6CBeAI}=Nc2Fb$ZwE}D z@+Z&p^$v-*TeiZVZKOt)fo@m`dkuAMPpABEPhki1I9p$Bp48M00__F|Xr9(qw2j_Q zo6cZgRsV>?3bj|_!@l6qIXgYw^Y!5zEbKMV=bb`h^swC*YnaS@?HW#E@4}1{CaIk_ z3@h(Vwt9TIO7*^NxqUp+HLV!?r9R8@6u!SuWmbQ>Xa)Ux=^A&|8scjd@G|u8GR6ag z39OtPE3m5z3xt?LBIv^M@0pyZ)*KMRQT`6Px}H)DK9d8?1MgP4EQ?BT7LyVu7GdKi zG#8k!-fwHBKuk__gYtmUO_MV8?|cr-_$CgE%1A~9_lOjA5BYYU2Vu{Kn3}Bb_qU?D zaP_>A7_r@!r;N zEn}Zb04Vbz$g#Xl?#LK90ox4Tz-;>XmfaNZ~17>)-y);joef_&P@Eel0)zvLq z(&Kvd!{vD0p=}O;Hw-SQInnrz<|PE%a2xlk*VzfhJ2uswk>bw_$;U` z5C4|8JOd#&RwGn}!Rj4YZ^d9L)mQ1nLllJRs`m*{(Q?6Jjc#)^DNU z+J9=BOZRL9b!RS*1YpeakwZZ*jZ+}r=kZHQVqGp_E)iybvPlOH3j;FM^t@+&UeNPq zBTOJF3f;uoM#%!cC+PVsg?^*99{4gC{}LD^8Y}F+{u;&UbutVR$s# zmYaA~Z8)#1h%n`Ja(DV3j>nV>&lD!7;3!lNVCl@%HVGP<(u8` z+DKl4?%{+Y-~zh|Rlw-*1Z>h!*5+Rnji(T3Qr9P}0k_QtP!oAvoRMSx_^Pg~3KjKz z#bX*_AwC~_$tAbDy%zt$`Xb~|7!HUr6YhVzdIrdR(;2a~l7<^11?lHc(r09n_LmX7BB+ezMiFlyW~^H)SQUpobH~D5VdGi~I+?(YK6R zypb=5BrZEdIcvVBy8ct}S;5L48_0h)1hmZy9i311mxJFNz!$Fx}<@YzQa>u>b5maoS1@)DQ zLQ!EuC~!k-928r5ewZ!E0Y*qUzV(NSo8PCjPex3-;<0Lw;erax8AI8vB;zs1oB0D_L2rxciW zkAa=h-#Qv1R!9{XqL^ z;Ze9R8NyzaO_1r%NA#QF<5wa2UNbPcenTe?AI{u*pIv6_6kU?w&-%;kg=kia{QEnt zuKWQ2eJ^GHz)EKz+n-g`RNv+N_)!6!(HS*H$h!>7@QZm#>=q-%dxc^%KzBbl-w8>= zbWYoFQL%c^_%{){2;QA27EWUN^yI6HV+5yZ=*!7YKC`oSs2cAH4S}uN(V3|5E8b~U z|A+3uwht%FyHbTund+-zb29dt)abrOub+wbhpag?hjUB*35>VNm16c%75)VA?S@qC zTl|EN=;>iK9rERDv!I54*fqeXX5dkhJ_FC72PIyZD_hNWxq#xPfj36JYI$wHqCAWk znzS%U^Z1RT(95(h&`bvv29v@ar9uLP|AAeeE~7%k0!|Ax0Q9aqPt2#_YIoN_bV-*Q z1HGc-k(Op5nd$vJ{xt@2SLX-=fxv1v#_UG}h^U?TpQSm}PiKoBG|z*+;U#??*DSC_ zzU3ZC@qx4*iz0$R&B-Ea)$<{5?fIk{+eqQp`;T9haKxq>Y9o=R)*|^1Y4u10g=kfA zrfNZ_naB104sqX4GSO7r^z2?A7R>1{E0_@WM)|JfaEV&0{621yoOI7ug1Ce#>sdE3 zkLtFo481^_okBI9uoW@ya&pl9zLP;A&Gs(XG@bk|X8sZ`#X#vQn{?g5%rFRQgPtAT zkp5CU<&WJITrbp-ou@u$N|o~COSrO7{&s;a&HObrOds@#{R{xY0APT=(@P*|mAvl` zbr*O;6U4&pKKVh9KEyNSKm}LNDMoL|yJ%!h*9QM!%dk1LCd2&WDJc&i%zNbfnm}h- zPl}ac-l>0bY)neNwn7h&64w;rLoq-*_Frj^6$7$mf+7eUb22HKwX-j6x8P-vGV=Mum+>$n zcR=|;l)?uWM|J%y1i^NKNWMc##;TB$Xh+>uo;g~V6{-|?B!Rr(mGL%>kAI5Yp*@u2 zT8r*dS|@KlP&-htU>gRE#|_0kGdP5o4^Xx}5E}ZO?bC$GP==;iY--$kbzUA?l_KD( zr`cy&zxM9ua99k|J=6tba&ipP_ZH0yigA#fC=x1m8!~)QNOLk;lcGWk|L3zBJfNc?MD1L3R7(U#uXui9n-{t$}g#(!r^tBWgI zgsK#^;-Am_K=44Y#8#$*hqMg{E4lFIY#)1%l^!w!~=+OYsSD?>KnoJEKQ{yHl* z(5rHuVNXNfRYgkxcY;2HH4pSDA~`mW**D9vL9PML$Eb6blX{xGerIu#iz?`90+pmE zlFZFNL|-l;$z1G{Jgz5WcW|vk6jq}mXf|Y1qSW_cem8%>b@-ZvLL7`Q<@dMXf{&$xz4=-UWJ;Y@A}_vVdIL0QaXz z6P(4}cokZmiYvg&Haj4?8YG_l708D%Ar)9Wt}mpg5jfcIDwI(8!h)6f8mrh%?pwNc zz_tZddEnS{i*!LZJ*%!-A?gcsC;Ne2`T0&svpXwXTe2q+qe6NA72w9uo1N;(QI~;K zQU;Poz{F`YST^eYV-DV5I4;HF^LbV)tPM&8o2aWVUSuhnNM6-5=Msr=Pd z?YE>(+CN#-kdR@WKfzXogUoOGdbMXq0_mwnN~H(~7dS%+6%j6Cl%$ERX*|o-|Ki*j zv+;xfe~8a$s&Vyv)tj?7i&>fzFr~`z-Y<~8hU#Xnsy+vl8gyVb15U(_rXYi)c_-fV zpT{DS|3H5GHv*4{4*0^rbSE5Tb9yLb@pb^==BD1{+BtL!t3u+Chka63LML-XUc?n}0_ZixeiK4CPKOq7ju6Qvg27y8q{ie=ai7 z(abgI$P3V&{I$&^VSYS(VcM7%qELualU&>olN=9$QY0?_tw&DtThT(w>hL*xTYj#< z8%!!mD(dIOc?6-H@CVtp5h`u

jV;4v7BCs{2tziVVf$PXrW9a^k1}6pRiK9wvwo zjH`*hr2ZBgeSUYvo*n6}>Xyuh<9MjsFAamI$1kWBF4|q;xQgb%t2QmYx8uR*TiBeN z0GmI?f;~aQH5s@4&kI(VHZ#uvZF`eklC7QC|9n;g#R-pbd_GT)rZv9^S zyU_|f?=DO$W&4-;-v_Ui<>IwTTqdchwviY?Jie;}Ymx>*vp<+pxas^aQ(+zqR}GG(q|~Pa2UrHk?!?7K}nHF6Ako$kH0) zNq_gB<&{kDN2fv^YGp?;Gkw+nW!w1Kj3(*A`v< z;5$5&b4{DT0TSN6Xm;EUXi$EieW4sH4O?=GwEhG4<|iy`{}H?J?-1%R2v*zKZroCn zMmFg{#Z=jcc1W%IQA7 zd7XGlSyk@HWHLT664R8G@u1me$#C`6>1Eq$G~Vv!2^x}P13Ss&mk9)?>1TE`(|na0 z+qDi#N7FGIfi?%uy(L*7?%yI7HSNCXZ zZ-#g;F%}^${jKU_k+t43rX3bo({Qb!i{or0Ui)?PQE=pPaexj4h60wx2fK-22w;DD zwUf0DS3oWnYg9dR_FaxvIdW9zg$N7PWXlt-{YdEPuJ2t>Ykq%fI+}`a>JaHif=2NT z=EFhAF?m^=b%tYZ4wLS;E3fT}D`!hp++O~h#T3#$Zs923+fKz&%5X9V_goAF%nU;< z!`#nuNV2{$95~tuCoCDi?=2W~+$tK^gEH=qd=^=KC>q^Ue!S+7O#&@r6r|SFJudZE z<8$TI$Ej_b;$%r51&yCp*IZVfVjiVaQt!SeFA**qm=L|Bn;|jRYWpLXna{%cw9VI6 z;NP`ZTc`+rEO0KGeXU!a_lgz%kC~)^fb^*!^+K{D5_pE2=-miyM)TX1ZVyrmug3H3 z0TP8(?3hq;gpINagb&~iu_;_#;BVDj_L+q-ZL8OO8EH@bL;#bDLd+j@d$QE@Ys)ZD z$1GD4fNU78H>K2K6fp@ikY(6lE3kxe30>~)R`0h`x6OoZk` zhM2?*FMbbrc>lVm{sHx7jJ$$a4bzV&!-y*8Gk01vp69B@ZZK-TMM2t?!;k71>`q*F zbV-3p5L>Z)h0y>i^rHW`L_-Yagxo6Z<@Wu0f!mM~l3xzP{|HFxoW{dKdt~oqq z6XH!dx_9J$wb7$bTo>$1)K(c%+w%{Jk+1o{!~_d(agWE4*PYgZ{5;f?#Mfk5KF?&qHQ> zxN5qmkvqHYLPK}k@wd52x6i(cE%rkB@|RowPe30x!XLLD-tda1B;LkC6sBaeJkVBA zbGAMd9!f@7UTC@cVQ28f+aZ{Wnq<#zg##%M-K^*=K0+77!bR0nkt>ECD4(B{0ogW> zuraXWpDYZ%Yht~dmlQUTUz_mhPC}I3Gtne?WK3Z~Ki+&1O(L-{IXF;Lv|2WF!o8U) ztM-nUFuwRgiF;W*jN?+OA-VJ61^IqKgvqJ3H9J3!}js;-6yP zY@vOlgNwH2HPhm5>V=r+@2;Jg58Iokac1y`tgd4}AnM|SA%uEEd)>7AQU|Jjy4klSJ5AG#;2sRE) zc7!`k9rNw(50-HH5n-HSL~@UdVMop6M8J8w_;&JzY@ zi>vp73*weNy?1n)i`HNwfQU$%s8bm>`u(Q+#)r;#q;6F}h|zTcMCSL+_`fJo#vyJ6 z54!=-m*`o35Z}=Jc(8g14?m%*^5**thTV8JfKa*ar5Ma@nV&mQ-?M(h-SO1r-xH#N zUcX*Xc=S%gg>7wahVcZ;sR81Rr~8D;=+y-!HBpTI*0`j@!A>w3a|2xs`(4&n%ze{H zNQ?e!_~bF60gmB5SpyOwx)=+NtDGE#`#Y5{tCs zWR2Z?UTKt8NG6)Sa&20R^QO-Z7vV0-33-%a{b#66lD-u8l&w=8{cf8}AciZxqs|$b_6jdVY?9;0ym4M3TSxjcGI!=}9 zQ0G0x6botb`iSLkaL9Va^#NMH8y#2vxH*g}wN4F7@E0y*aIF#!1@5ac!874M}TC5E|o5p@+S%WGdA*&Zl-PoQ^NmFDlPT3+crEP+2*0k-;%EK3|+C*c$z+ojrEp9QCI3-r*VIs+U5r!mS_Hg*YR8=Pev=i*9wfFc~`x@*S z$9F$!W!zk;;j9XwnZW?Kd|cLVeZK;)TW>IFHE~40e~OM(e5{UM{~i&yrP5aHlaXw+ zL;jJhmaKU_3W-V;r+*3xZWH7RGgk~0qOEnz&C+E~;5!N2|Gox~2Ex`Z5ULgWRO6Dz zI}Vc(Z|W|Mi+i1i^4u;p`O~-c;XRyJRxz!m5^PX8KnMmOkIk-cnZ}CyxCowiJ4-Mg zr6YoE>zXdn;TY=241(=#T8qxm4(XI+h5!bQ^xhrpQ}cOBLJg8`h2Uuc@fIrZ);Thq zTxe=lo$1ES-Pn9L{=`23I`vry9554BU^19KWs$Sq%3I@_1bTD!-mw5r}o) zNiWi*y@Juww+pY!V|`*;`w#eye-8!LB&tma_}o4yJS(&qSId=v<1+PS7_U^*98W?6 z8u2Y-YwNA8MevzkesUV}j`MLnE<7{7U?dW13^E3m@@se;LJ<46C}pDe!yy>@h&YB& z$^!5r)zPBD6o~$wr0shcbsULd(C{S6*X1R1!KCkPem*OYw=;KB!;t2mlGT&NN7E3L zhgDi& z_Gw?K{TQ91dD@PuwPRM%jq~~-lNaIcW#!!Q<661iKvqj1S~#-d^ti})R+I_yaRIl1 z%jY~>FHyJ0y-nCUqm9yc;ilJSxyj4#aIvQ7jbOIs=(9mA_*7!VQ@|L?|CONx#wlx5 z_Y>tnt$34tJn(tyRK}oSmUA7zUnEft6xB)1*D6OxT;vdL_S;4`{I04(=_SQ&gy z@!$WR;etF7;Iz#i{1SK;yXS9pd$My zOzpMsh-|oL&@uFmmOi1HDU=LWgYPcvR>Q~4OzW8v!;tGqy5$~_RK2hy`Gly>MJz4- z^Cwrz)S|4;HS!DEl*w7}2WE!UB=DksS{2q#*r8e{h3K6-6O~1uS^0QJz$-lk2$y(Q zAfJL``?GiszFbgElgg9GHGeD-C+NM#Jr(%yjdA=Jrj;gV)Rjg?%qHO`VzUV?0Sd0< zNs~{Yuv6Sqm=(B@Bcc5TN0X?JAsNB;{Kos4EGyaTIdnQfFl&T(QRg80w^L$Bi@y#` zg^7@{lMIp01CzgNv*8YtRWk09> z`;i7Y*;iFv8AgJh`L zN?6i7w~o}y(Q{)(014@mknZkOS_J7%>69EAh6ZWrM(OVE?(UY7?)u&M9G~-??>X-u z@UF$0HR3RF-Pe3#fA-!NzokB|++iRyG|u+Bb@dBH!%;Xl2ebfIqa-pJx7HZPNH~ug z6ow(Y;hxVzEj3)VHHBhm$T5D+1O=M)7hs}+YD#ro@bLUS$-fwlPo%~<5M=q?_CSIK z3yjh1<@5_YxtVb_!>4vfej2t+82&kn)&*MmA=&zsE^tJ>6A!ipF7@}ijB3;7T zRum||(e7>=1!6%)=jY{wM($;&0Vcd8p+1Qwg0!iJQ! zwzIW=()(1o;&-(O!W=|mOwhkAL#1o!idXwQoY^pXArQO0QYgBnf!v)D}i1& z)52%bN;DRb&2;UUW#bo?>N3rapL7;Q@v+8l7a|phL?jpsxAM*#6CcXGEnBB5V{9?- z9MVa$gW#B(Mqox0q;-jJvr|op&F(x8peBPRljA1fAFOm)pINOn1t;R=tTE!_fxt@A#HH9OKQB!iAA6GTd_F6e#cUZv)s} zm(?G~vD^~OOdI(eL*zU8`6IuiSm<^kX_J1z#@)v?td-Q=Y!P??s(sk0f%W?MI5z!o zRgbP}|KHvmY5a(cWAw|r;yprCCD?7kui)H%L?Sn#S!T?5JzE*2t$IT@3MGmtI@$&Q z!2a`#7xZXr2rpr}-n@wxs>3%!KbXEM>>V_inq9rXK(Z_e5TuKLhm)w31VsFbIOs~a zIW{NzKtc$CsMa4rNNZq^S(j8xzwU!M#%5iF;kF63_!5<#%H}U3@CQrffs(RbiP?(5 z`|^sc?HCK5>w}o{**=}r6U`&f)PvkimLrlXzF*@+rF;sax@oCqL$h;G`lO+AYRkG4 zV%mx*hGY|>kw{SfJ!Ebd-!=kVRFG(j2R`UWzuq$iJoDEx zb|{fvtz4#SuZS`}af(X|9_dKAoG`73;#ePdS0NCJ+8FFf?g|MWIcS+n@uSKx+dR{4 zIaRp^rs2WQ*4litWo~6%hV|!PNcjkgkfRxjm-?|28+9<}ec^Zb@QG9!O?Q0<-9vwV zG2r9

9 z-;{w#za(97V_h8IS&aem-<3ha7%QFfmjdG@G0*L@uin?4F^JEW_3 zEW32-*MG@qe(Xu!XRgF({#(2Gob|htum-KVJ56-0H!ap0B#>XMk~q(4X!WNu8u3Z{ zzcRppQ$M_QsB`jxH5{qR`&a(N#d2fGxCP#Dga?K0K;SWHpMLqCecwvc7^ySX9z;6Sv_ zKkmg*nD#%7--d;>_+PgaG+bZnhUreoW&!)2hi`}EeYue|`KIsXSHFhtOW7=Q+e|&6 zl+n{cjhzh$Am5n<)9vFSn5C=jRQPS?Wxb$9Po74gzPRXiYf^Oa!P z3O*=w+fzIw>)v{X?f}o+&!x$i^K7q>03PLIp5B0IQg949hOOctKDn%=+MiP1PX8$B z^Xo>{4m+jBJMrM`;e1Mk(=Cg4rz=4b0u;GwFIRc85PgZkHywWlX0_D_k2u}FVQksm zWOKYPoR`f0l$D29i{DoDA=YvtbJYye(K%N4WjpxEOa}GwR}Gq81B!B*S0)9z?y9t6 zW`BM~vw33PDHmo=N7(8&LW(LbI;M{}B>& zMjRC~Lv>PZf)TXtS)JQU_Y*ySR`4WLRy+k)pe7@=qWwS{f1(39kHP>-#p0Hq<6PwiNm@DL1WnC9c6h&d9 zd@*`gJ%F;UR;=o2DjwiX84(t~*=Co9Van)rbo+E#=CjgX^%3ub7RLv!yN6{zoY1## z7wYlHoB31T``g}Z(AS1_n~rGtSkC9X4_!~a|3e>Ln)4+JNna72E+-4GaOCAV8pGQp zR$WFV=sGjQM`Q5-0?c8RQ4WU6+&^}Y?iP)52_^U>&qZfahUs1Bu~FsDnn>_#%*V(^ zeq+#J>Aj{DbX{n}3w8O|hNCWcv-Kqsr}5^upiAYma9Z5pWtyk(49ydRP#g5HHAn2H zP4ki$)@kl}BaZKS7%X=DL!YH+kR+F{ILi}UTO45B&b8Kob+c{g*!H^8icM)}eTny; z_>Pm|ITp!3B7X#=7AWIpQk}sI5Cw*tja2O8&eGYRxLdsA;NfGVR~Z2CY86MA9eL@rt)q|GSYrR}*c!1@wDhMQBgB^S9$HG+40~JXtxor7 z{2m65R-+25!r?dubk*|4ujn?Zw(@%DGFD+@mV4Urt?q)BA?uuuf&7j)@7j_j6}x}*m8uPpgFgaGc`9j6Z+z9l zbw2E(nwpN|e!@LX#Om5P8mY?<(O6cJJKAwQz`gWDte|04*#u)4ygLKUc`4|U6Plul zor}KmE|f;U)s<_emQw_BogGm_GxTC02~&^@bB|7eENQ&zH&rRwyT*;-7fQlxS9n_K z5z793Jpo3P>{7lUGlCVZ4K!5tGo6|~;pFT}Y;WW2?C3jy224Wjjp`6(0rDtp2b;fAOhS9}QebZm)fH z&alIpgJfKUY(9!aF4nk1{F?c^veR8UJ3*s#;bz0>Q?29(%N2Ut&J*}kNByvw!gkO1 zRnIYHATYxIf{SK1I}OQy&O3ZcdtAV(JYI6Fe_0`J?SHzBd`5NXvp4vMV@0a$7u6bh z!Gy<5(@fCo%858CV&4fH7HJQRfa>-}T!WC1lERPMXB*MAO0n98c7}nilp`XOzT06F~bx79x*sM;UrhjX9pC<$iaGem5;GuHj;zHQqQc5YS4-KBtQdHlp#Kj#oN8ghC zl{3sD64S()uiC?3KlxrgkJpI5$Dq(>5TIVBy~dLBB%}wm!e`y$>34SqZH`Tf+@Ktm zKx&KMU>zsPRu$0lf+tE*r`-q~pt={S<`Bcc2e6N54rI!3@-g-w0gW}T~% zxIoI-%KNlhBZ5*DEUJjvZ+_cAX^|*9Lx%%^HPu1PG*`Rf>!hT?N$cn(ku6Ffrl}0` zK1<4ojbYa;CZ{u*Xuq&!CRt2Ya*()?@n~%Fx6zVMKU7`~6lT7AS;54yMfyOs9)aeA2Sc)WC*2xOS zXcf*Ie9yRlz2acF!h&L&FKT>0CZp+7A6(j!?hwcLLyU)1gNS$+!`#%~hF-4i7~ zG0L^2(0u(zy7lKZzIW3nALq^Le-%~#=>LW!yGDg4*N4V+&>k9SHFh|XQHBcuAs5d> zulM;WL)(iQA5?}=uIw~)ztC7b(F=m)mwr1NVyibz(kU8D5Oe8lQ*`DPW62WDa?)9_*gXHPRELKiA?lmO3HM4rIP@yAc|F@aXa!(U9dJnf|ScknC1}j z7`;dZ?O4tZEPT8?K$JBbcA+I5Ez_Z7;q}!@9OCL}T^&he1y8qnM?dVgP;>fJA)e}` zader!4B})dN)u|*7!ZWZbC7wU>wGEu`R-2C6LWH8f%8lnMR^v&1eMt8N~581a#A4zs)ioC*76uQj#L}%fvt&z;4 zqrZC22>zK;ZAt;+1J^B-cQC;e-ml|zY@vKCEk%#PO(OH8vHR>Hljh%Sqi}NG)aZ>_p;#xcoCZc-`n$xM{ z4Xn45o>*?IA%X}7awn+Uee<%O42?|mj80dI*}(b7fuAT^diyqx-4}(Kb+;^>s^uzYFq}hPUjDcp;4D5@)b7eh1QiyJZ#aQ8;{V1jOq~n zdWbQ32)(&Ac2Im*&)xo+hSII>ep2~^!PR$#=vQM!G?@hJB?ll7KaS{6Wkj>l9xc#4 zo^vqX4=km>qk8Qj1ea>U>#ZVzKI|-OT0+iNiAypQ9tzCD! z#`y1K(Y)Zo7be-||pcq5nojWbuX%^t3=@ef%B$d5ngcRyRcs`LB}i(%U&A;AGtYJ=bpf!St#KmwbcnKgY_Mf& zb>b4C0JGFe=j%3qCJlc*X={Qd4U*#gp@XqyNOgdNN}k1))3#P?nH8lMMXAM{E-dzV zC!ZqZ5ICAHy6E8ClpveL*6to(2zft6z7Z#w8%0tdhR2X~dBE`T%D4e*_V2`pFTV76 zG{ty0ST|ivqj101s#{fx&!{29{O|#OYfcuDjp} za-kK}Q(?e(33K<`RKtsb=+l<8ZWin-i$;l@qd#t>5%Om*?Z%3haF3RgJy19jM>FrE ze5%%8=(KFB&B)k^a!iU3nRzZBVb=FuzZXW0FhkHgY6Xjf16S&oOp)hi8ao%x-7vwt zPm3Z61{Hf#R*xM>4?jNz!6!0BU`LhXw@=3DH#fTT<9L59)QmMpbhsPUxxNb$G&$!u zJu9^8{r%;N%dq48Oxx+SXK0yUfnk78WSrog<<}^LQKKp8csr+dTF`~Nr9cOagn*ie z$Q&D^ND_8tAxIPGHN)ljr0WvjfTW5rCc zD8S-7iP4?H$g6ul`6fNPJO0`Iaoi@P9=hR%3)B@MC6-OssB+<656=b!e+K+d;X}Lb zFRSI_vh(M^3!jfN;wW@9=s|`$wE`BkEdt+pReT zbeMRTd8~b#*ec)O2d&E~+?w5C;NiuME&}NZF)-h7!X!FfphLHHxx>{lQA9 zsarV)2Z};#3mC(-VVO2Ab1xAOP!?)aFOJ(i%=S}vhvMQ^uwn$LnXc-60fz>EUo~0uzeq-#|n`vZeO6~Dyz*L`5zV6*O zGsOvE>V@O|v*WEFmU4XPc$XBoxsl>hj5X8r6uMujuY78+>CWazJfH7^;lkQSYv^w+ z#n~{3Dk!r+4ctiOG)JH8hbZ6fK2L?~g0uFDYRC+=uiAhQ9!P&+d!tiH=F7V(-lyTY z3gib|le{mW6juf#94l9?FN5@@n*7$`bZkL>XQMN|V~0_fl&z_Y{`_mM!L!90GvqUG z*sd4rfC2kVpZPa}`zt)U?us1oJ+$jkC~?k>U6nW0DW2P7&x0eax=8OBy>g545wh5MPb z!zP*_BUAi_IN~uKi zH{4satJ5OWh?cuQerxf(%Z|{3r)xq)`jW=KkDP?pSvD3CeN`7aI1u?H z0)vQoj;ERh|2A)x25*oRrGPGQB>ss-ds{JENkdVZ@CpJyjsXxTCt2HQ+x;UU3XB(j zWkM|N4_{*Z!26pNs6bvlpf=cE>)SJUnBH-l;9^P!uthvarQrs|iC;00V2KRcFAa#L z1F|CiVn#{{9P~T^~+| zRM1^>35+P74%CSMzY_LWf!Tj~r&nOT=E|3i%dE;ls=&qf&8P0=s#=V-^dmKe~$zN+yQ4CC4fVv`5Pj``Bt1$4tR0^ zwLBTFk+5tUUwO0NUE#Lo|2%xmsa^GRaX=qZ$L+K}mG=LK)2?3QJ7J?*8YF)UNPbGM zM3Iy$6Fme_Ib)-!mSh!iT?vX^JI;R{+%f-UF_MpX+K~14dV}zP>wMDVUtx!fi9L&D z*l^GJH1)p{luwW`O~l2emBUnDV?F;j2J@HCe1hEH`W-gw=S01d{3IoGyaxobCSZ;G z--x3`@iu*u#fYZ2|HTN%8X@n5rKUW^Oo!1frleqee)(?^+J8SG>8N_)(V31y{Eiua zue!n&S)kUkd77Z=?*q#i*%f2>;gW?wpy}gE;idJ_=Izz)KP1+_9980{$KS#JKYo-k z2mnSIH+ia_|NR}`{{H8{wuf5+EYAjc++aHVNvs*gB`?MS1`PiZG((fh| zkZx;Mv@q%Zn%@P8DK5VrLV#e1jc-1Sde6$NzH8{;t`5v&<*AMq_|&JrUiu?e$#fe1 za?khI1b%HfO#gXq_~>cR%SSG#*72Z#+Mo7p z*Pj7TY(@2Sw|U7h)(kQLqp4BDAHN{f8D~Tk&AQqZ@C2p{(~mj=Kypx6^|w*@BA$O z_V3{t@+CXd8v##p*&DxJr5+!k=6ifwspL(Qo`*E^S8xu%E+Z)LPE|L6Mk?d^GA(y` zwCs?2czl-^(m|BuEpC$D>W@ni53D6R*?IrR`{=z6$8D>i?IeBF)J)c%T;#8Ckr_UjX}e?;Q{e$13*to>e;W~b#VOusz5G#BC)rhc zrq2rH?2`|22Uu-0DY=gJs#WYQqCTY#^?4zWzh32B3Puw@f04icCgHH&-`(t0syHE_ z;{4CO`YHYv)$ie2tD9kQAd*U%Sop_Z`si&bEE{lE{fOXLo%`E=kKqy6=8PGAb_c(y)|IilGa=r~Z_^pC%I@Ruoftx4-B+)dhuSt}x)5I94ec3^ zt^VheL{aGxe+o7~r8$hY0k#KMzc#7t-Ov?3fqF4B)8xNbr-F&<_eE*eBt_uAuCQxI zaebt+BL-H%{E9V|Ke0DF`ZvJ&;k(88U7P>F_VB8jZEEZwWLJ>@{0?ayAC1i?)_hL^ zWuj__2`J8Dc0Z}VL)c-V>7y6IbBO|+y%Ef{chnqIe9vK3=Tnq%sN8w;6Hx0812OjU z?HH*kK3*aM;%@Mj{N;k7St~G~Y_xw950*w@w_vRid?V>ZO{S0*s(*rDuiDJJE7<>avTtK6D6xi??^719VGaT{51swB}| z|8aH)@qfG)*{(0;SEkLZ!n=-ulgrUioa$;&6gG|JO9hgmbfQLTrIk~*!G4rxi`KYTp=Tw^P z=?w`~(CWnTOk_c1`t8?;r-^Lo4S=~Z_liw7K0;t`^pzGFm=^KAulFRuQ9ESDy?2Oy zww(-gHb{whm+s9`dpcSxWuo7EMJY*k&MaGXYnSZ@ZOi$4$@h--Tg%%>zcsRxl71jTZxLI#O*mV^28rQ?%fIs-ShgC(%0zSQ z>})st;8Iy5T&^oJ%bM=Zp1j%8b-X8?WtnZ%`ORt>pmv6*&6!8fQ2qA0Z_5RaJ^d?B zd_r7*itE`?2Mck2-HCvNii3iuo*)=!NsnX}8}17guOC^tQ_g|`YvJ#7;wU{tzBh{= z|0y=UMm&jsr%VO#uu|WNT0c1N3^V@d)O+q`p}Yy~wh}iNN3W$Y#AuXP+Rnyr*S2zu z^xM740Ve2ECKMu1RO}r&^0v)L>a{m~#WO{~w?_ENNNus<2#-4mpS=SZahIlxx-4yT z0}ivZ+8p{+{$;@t)cmTJGI=pugGzFXT>1Hcd|x5;XuKn&<*F>sGI)tE<0W2?H4c)l6z&lW+YSU-d`GKIfFX6`j71G_-QSfvH{6W zdY$!*s-`qKhOHpO2O-g$5Wpo`A0{G+McndH@B?$h4~Sc-PvtQ9ShKu-$oyytN7`Di z&Wa##6Fww5@pMH7bT}#Pb#cgKSZV^=E^=BsA17jE(o!t6sB_p3KHM1LfVp!U1Nb@f+<76{-jveU0mHQ?cxkq;xyO2{8b%C0M$t z&kmqoAgLYFBLiL1VDk=})6_*ayJb)!gvV&RDhDNBqx2;*nh4?V{pbf&tW`^lj#r3P z_sYxQ%YsoQ3M?~*(VWT7R#h}oZYY{S0k@i<$eKvD3bL=!%19jAFzLBGMYt%3%e?Nx z_awlm*5Em{*(@J$Z>C5G?#;vV;*_O-Il#*&0s6+jQW zI|KFi44$!a&@bHQ`*#albf0__Dr;X%m&y<~f1KEKoT2AOxkMH=B{A+k5@fsXPWx+pQ=z`7@Srw4DnZ*=VCnPW>6QM~gc{02 zRHn}t0ngh4tnT%rr}PeH-yiM)2HN~AT5QBLV=2WMd6i~NaZskpHD@X6_M=04*1Dj# z_$?&lPwv~usCiN6^POy>Q=#M} zz1YnacVP8hmOpN?9f#lRF&wZV^{F3uQI=pOz1jEUrput!(o8>#DyFo~1cF|Sku#Jm zF3@kkIgL~C_S;YG>C_#FWvrc%;k=>Bkpe(4M)wRDNZ+zz_Hj(!cdu0*)_;TaC?n34 z^?HuD*Q8}C*Zo2~KJLrcT6{`OWF;fd+_`Ws1~kFO5N`rvbVN}h#yKA%6Ks&S!OAr2 z@MbA2CG9@T%Yclh$kMO0(E*avN$ZklG4=(B*d}2#XiMfKd9&~Ks-hv9GAgRwOaP;F zdHJU7;`C~0y26e*!o7^zOPa>=4n~6J(0w&*}><}A~v5rUG#XUQ#l;T|B|P* zLY4NXOS~ZR<5~&Ud<=~+jeIHN)(*4g@SdR$V+NJ)dwW3j%kud=K|PvJ*(V3Kpjo!h zioEkux?1vDxgNSo=TCwu9YH}qAwtf5GkZ80P3?czPvVgM^UF$8$D&W z`x!b;r#D7bNG?w!I7sT7jC&CPlTG`g?45E(&PlrHnS$Ejw~~O-WX{|nl<$y<&a2@v zA+fIf&vV14mqKTG7Li}1C4%qQk@H^=QiVQ)nBfKnKKYK{|f`Xm_STOn@@vd3=AtXXct^-gUyxaVwH*R zdN+YF2*2YqG-ItfwsX#F0NPGYrmlS?8WM|{*&@GU0+z|^>ROb~z~SK-MMU8NT(p5P zz&;A^<~5-O4#d)b4qSc1{GJu(gNXRZ zX>VMt{kDqBcOWfDhaq?FUC1rr4HLF9`vG>W#TZ%WPRrW`ftt{bhv~n{N0vZ>-=2n; z^HGkc(xjCoCeATrY!GkDf-WNLx052%hps*$Y2!{`-7n}fUI)cYaJcc=DuZns5tL~R z3nEhm-CVN*B6)~@NBB|BX|*|esTK#kDXGGsyb}ipMw&}%u_$4r-UY%yVV(vos@V^3 zSu5x*#p*%l`sgiM#(b3HC~%jnu}~oIL$1N$k!Jolb&Q=2q3a!S445Vwb<%c|+_d`N zjvw|o7&jr##Q(0~4m+vN0S63^#3@M4G&J*Q^|98SK0r|Yk$Nz?U0GEp_Od-b&u{uv zzLxfSjO4S=A^56Ac^}f`(te$mK9uLZDOaz{nG8A=%W}yWTp2tYJzIfs zxvU}>5jqw^XM)@=0+5tr&uvV8AUp)Vl_@*CyHxtb-}hEXZikp0e=;ueODUi(x4g?J zGFQhytMENhfuL0q-(+Mz$p|{4&v2C-hWvPFDK!tKEwlIfT$UTRwJi zdGIn)EvIu?8OT%$R0o2{REtk2{~bdVAYRlp0?XpluG{xM>)oVftN;4Id6#*lJg@Jw z7_ViJVtgExwE-^z>(4W_Y-t3B+7pBL_*Xni71Eq4tZjtef|BwDk$H`>*IL&F95EbCUnEsM|?I*ma zl71CtyUbZG^l}apCeNuh+6i*+fQ>t@VhVMM9Cpzd?$L90(&|hpGxQBFeR*Q{OXv3HV^QpmsPrpiB53D{7$b)9bIo6Ld9CtM5I?Sk|EXibAW7#$1lXE-U^L}zOQ zd3`Hw!0=WrM5){@*}SwhG1jN2%h9&;ti))Nij5gysPZ}OD(|!sqH=M3Zx64-?`ner z!6{&cpH9yX?&J7j8?A3AWGm8PZ+wB}q7_~>oqAA=hn7lIHO*@~|G4IcgbG@*@l3F- zY&0G$uB$=h?~xe|@L!9E)aeFyF%-DKlmT(2EYokf)PVv|hi zv=hDy9j=Q$REcUNOiLZy9Qlh`cpsHFz}UXc!5bkR@O!S?w*eKE#+`#No-OisuLpL3 zFYXTtvs}Q&kF#K6KIOb8k_K`uEeFXe_iHsuXg8e>zLt;nfRfI9ByntnE}0m})l2Rg zc${W3d2;xF^SA=U*HCAxf3;%F>68e!uKZPY5Uo6j^| zoElf$9ZJ>CQVQwPwrs6uP&8I!(v%P&Qx1c0m>2icbdtN9DMf7#3Q zn5>h=QJVWPlJFRH)TK4}MJi47;|EF`rny7?VLzQ!PDrUGUZWyE=gF2*+H*Pci*63OlM?zQ;vK!xu`A2b@&RL zXXdq`f+9P5)rcR|#1DM3c2+-cR#-k8yTX*)bNc0Z9)np>;Y)S^$vYEfdAbuVzgkVj zQXG|1Wy)O^83a?f5S{TetF=eNbH!DvH{}}5NQ+}rSjK9VMu){(cU1Wz1{)f&EBGsq zxpl037b#j7%jnfsdsaZ{2dO2lTv$C&Yf7~K3)Fz=@4nY_V7QQ7@2y$OS{P;F0hFvw z)7o>DwbE4F^Jtw~Tu#+8Oo7tT>t6=_FuM@d=4W+W^R(0J%2Q}%vros!XPB9Z>@Jk7{mjYzIzT+; z`8p6rGq-Zc^6x#n>Ht~p_*d%%oRV8u>jo9)q(~^NviU|X2`0LlIE$1U^)plJlEcf@ z;Db9VJ@IP059pYAMSoFY-0A>b!wzKs%pK7v2s)S9;$R4GbDl0_B4XBbJ^1n&h8g0r zlr|Js*fgFU>o$r)lf^2G+pgOg==L((QKAOzE-SDdhSZ z;s?`DdLsiFQ9aTf*>zUZ^1(PW4xe{aA~Gwf4N?H;Ytv7aI5v9Z&t(~_Me%C!9IY6S z3RRm~8kE|u?lt7d6MD;Z3`V*ytx<_YK2GQOjyolTK~@hb+?LJb^!|H3j+8>neDgEB zqUE7cjphw5ZC~bakjV2}mA??8VZ`tGjV}CQE04X@a?j?gv5pOLUq=ULLE8n%)iS8P zreAAE73&U068sA6=2aNXRSnZ$F6>;Ut}hnsQcX}liu=-0b*!$mnsUD`6`d+&L$PV0 z0ewM)_6DS=nCWV``3-w#Z9@LfUaX*BB{U&U_2UR^h?eN_t$eBRaNypxw8U-X4|y%(#=tf`4`vSRD*So@AoxA3em z6`2+#V?sR#gEdRjn6c`#F|V3pEkSpG1)s}i;+1CI+uyxoosW00AgpK+z&*ZRb*<>i zv$lP$CG_-qt9D|ywGJc)DJ-6T~f!qshgz#p3r*K^B zq+mcdXPKWh9cRKiE;w^{x$MA}5Hnz(^Eac=7@52$&)9_%=Rd)IffvXfTc0UOyPadl zk}8Dw@1MkXTX!SmSaWKurrsna?vbX!^xeCzfAO#$+2TF`J!Z$?3#RbK?{Gq}{_yJI z3k*4(+*XMMix2rG{gf-Ek2yk5c(YVh7(yqIg(goY+}0t(2`q?@B;jixAHO<3Wz)}AMsK6K4J7_YaLXFlFe}BV0+UX+Z{>UBIosv?{FfX=voGee3vWNe&b#C>;eOGXt z{&G{O+TElFE|c0(9NTb4d21AJcFlM69gWrzhgp#=y{?^rym2Rf(=cl2J5`JZbht+9 zD@qKq7bbThAHUIp5G{i7!5KYX-*;4&V~PSl4@Ud{PGPuEF8MVp%w&2@dF`sy)>h)v&59Wb<_svZ`u_DkG_X>OYG5g&d!`b*#!*O zJs($9#TSIDkAXxP zT-1FXK$_BK6HN50vaaGflHr4C(q~15cldg@y2xfk7=wQsWpIrpSwrTEi+mn?kExE( zCBc5htOW|*0!=ApYnC;?FBit@GHsb&>{htV#=R3{pUs4-YdR_%gW6U0YJ#Ge9&S?L zUTF&YH5oFd>i}tD(|+FFYo*TVXth>)ZnQI6ikdOyuD^pUs%)oa#oH<=@;#}Jw?B|Izg5z zj)GqjzgQ5_Ds&YKC%4yf@KogH_sA2hawP^B#Eg6Br%v)pC(oJAdVw5`@5Cd`GP^}I zHjCBB(oKZ2HwNzNZX%tzYb_ElU|_NC)j#DEYKP*j#-M3oITYnG56*T4U7qZ6^enrX zh99tWUz>U6Z;IZmZ)rq4om!7L=DNN@yXLZMTWZ&cS0gKgj_0fq@xY1e7m@_|g8#ma z9}%Rva(vCm&l$wD%5NXieA(hxVE7nlZ*V`^QCu`rmvfY?xtE-&tV~}sEurr_hEt#) zUSUtQ8Y${@XT`r3a%*yr*KE_NPMAj`L$SpwmpRnGd0<3CE2UeyzQrTi)-0^1R0penK%!l3F29_k zKB|Q?XzkT}pSE2!jz$A5l&Y-r>52Oa#E3SdT#0Ke^B#rO)3Zi(?j(D;Q*oP8}m-Y@{TDC04?ad_( zG4kAb%^7PhuZ@+{v-04#vU>?i4%Vwi6z$(zM=B}gan9%uG7T-u@u|q=Y53o5EV%(a zQTW}Inv!3YTe;L;89H@miMt`c{@z}19CagXd{J7P-1^$+z&08Mtx3^gX&Se}D~Qy2 zusr7$kg;{LKWGdFM*$9rGRv-tplJh|;0w&w#4mr}K**mEQI352EpG0m^PC=M3cjni zwiVy(rEh7}(k#3WEj~v#Bo!MN!#^8apTv!cQ1moCAuo8$9TpZ2+n&wGpDKzci_dfx)yXWXAaz6%bWzIsIW z7jlCx1u###q$*R34$5WePU^z;t5fMM?EPj8x{X=OD2)cu-U0+;g?;{8cl#W0dbM`Yj?k$H^2aa26&;RDTup+9y?6Tm(E$_ z5U2NuA9jKXa&Y*X(T8~N8{*sd-4(5x%SA6Z-(?|FwLEJ~AIqrp_n{{)m&y7);1I)eXdu8Y-9;T0N`~sjvxhE8D+70lv)BGf_-`@^PPHNTvc+$XoW*7KG!=} zAXx$VkOzX}?V*NQ$dr^q?%dHLg58JJw{YpR>u$Px;cjr1pp6cWcFLENiFLudTZq4* zvF3t#eE72afORTRr+Yj#k2_ty-8sD>b1SpfU0!I7okN$Ssg>%-+N}`6SW=f0TPieh z8#Pb1ZyiZwcZVq#>XrODu+gmi2rE%1v9TR4v{fTD_x8&xkvHg&2a9^@$Skxi9Rocx zdiWJ?8ozDo_(z=Nk}bAty_1oHkF+8?@%CN?e5bIf=Jx-?-gkyIwQcPxh#(4Buu%lJ zA_4-^B~%L_z4s>4yAVQ85K!5o6r~eF5ed>efzWM82`xZ?5I})Y0tvkn?qZ*&TjQ1U53|WDFI(N8Tko(_!2{3l5#6r#b>Z}AdIAz0<)Duwm z_zR=sCE1G-g2Fs;O4Y3%$aOpY+&y%zncvX5>UtWMfXm z5`X@>QR(Vj?iHG7iT_#lKnU3&2mJ zeLJhV1Nlpgfh5Zm!U8nQHC4<0H6& zi>xilf(|-g|q9% zD)O4X%6#+eZ8RupbZ!5lKcGFwv2c*3#JQ0ITKqJhRuOfI{Y1YaVk0k%ci8tmz0vs{ z$psVju|aeQuJ)1g5bTC$=ggoMbu6Uo4k3E_O%iaRze$21Q>83--=|6`j60u-3%d#8 zJzscIz;FS}Zo{L`zHJZjpxCs6P9$pVzF9jrb8jWW=@HRSrn zZo4;&WpHP?;eLT8a^4`PZ-b^ph*cxpF(X)~mvSmZPA}8Ag_bT1n*dA+0Ue^>mH2Bi z=$o|p1d_ib0Un+zivsbhUEh&~w&{jV1s^G%S+-)R{<3^q2=C3`CvOQi2^b`mHdQzU zRu`vA)!y97XI|5ksHYiym5+SHTs(X){N)q;+g0eoF$QkCna>vgF{!_9 z$I1u%kWzl_u<5Uz_HQ>idaV2$_tRL!Iq-iUlg@kKhgkE74vs$?um8tQPJhz^p4V5a z^RF)JzaHX`F6`Wg4*P!yhT&YYJoLroAj-2+Q^}4>fZGUn##%|r2{p8zYFsSFlt+~& z0F;i44%6+1HWi;eMgY39Et$g`6+qc%xlgJFRAR-WmZYrJ>)L61-&j|t^t*3_Sl*N` z0ZPmQwR;_v6N z9sr`_ zLP-apJkQH*(FWbyGgUaNZgKNlx!P5&dL2fbm{-D@+WnN=jF91*ias8sXryM{>bX_c zDYyZ?gR(R5rb)W1O@#~k@l>trgLh`2^pOm|EceY6hi*AvvGdjALL!2ZQX`a`VJW%` zuA|kaI=DAAi^i<$rZr14eh90^J(OVecD=>w$rZDzFTS@iFW#2Sc3m(%-ecKcR$X2{*58Z!?Q}kwtz|ARjOt5WYp6(m=qsWGz0v$f&Y3|hUaNsr}$ zt;1~!tnPA#jK9{uDB~V=mvVWgt>cvW3i)$s;CTTZ3GZ7!YXMxq(4rNnTXA`fu2rS^ z?$yqKIV*nAawwppC`7&Z&VZM}?(d(XsI1lH9t4m+fV{{W2PIkNr(Tg0YOfkCx9$%bP+gE@k`R)2?OjE4rK;+zCI->0SCFyx9F^By-M+5DwLgM=#PA~e6NnEt> z&&%@~>J<`_4DhOOYskI*%>&0xE|e_qhr+Wlx7 zq0NSkU2HdzcjRuD0-jY;xM?L-8Zb~$@|Slxw%b!m?BlCdzJwSBje3(7iB0Qlwd)G9 z+shVsoV=6tu#5GYRmj`c>q{hTS>|GXaef3*ALbTP|A8}J>tm8SdQ*Io44+kC%SrCt zNg(qX>W=Lgw7%(-8R{2r{H94VwDM^3%*O$g#5qUR?R>8@^%?Vp{Ty?VBe(rW2@2G^ z#vz!DJj;X#Ym!2xgQc+oC0|^AOtjE_uj~M*(9MSM9N9E1px)k$hR^O@DcjBZjedI9 zxL4#ucgwr#-f9K7)HoDD9%UAa^9V}m!U1h8-Ib%+NYU3gxst5CqTbxkUl}kSPnLAA zOK!aq(ymFRMtmV6;hx~ugkoB@`w#MDR`h&sF|!3HnNPI~l!)Qh=Q3QqQsy{LUAYDd z0gTZyQ6J74;t0E?oR9perl3k!;ih-Jwq(EEX$6wuIy3v~dyCp~;yTWerx@G6!T8&d`R+{sYL_~$Yan;-8#2PBh|J2pV?qMji-hSLh>=3|FKo8cO* z#`)udpI7m+@nJAnJ*VIPyMVeSXL zd$H2nQ-Kbjm++Q58SJ|R7jgf0?Et1f2bxaUg&pi~O5m)(IlCTfv%7oz7Bko{)p2Hi z6XRaH7^1>~bYJ~0nf~Qmo97zKPHsA^OQRAY%qdeXfmp~ujqNDlori=)T$2wsWIB|r zgx#HI*T5n*Eq3DL7O3PJTcwIta$W6yiABy^Qu15-;c|TH8^3WIm%&1t;)dcT(%v$= zz6xdn|02~fJz&ftK*M&*u=dSM=aOegBUz9y9?U>R4q%mVAL$=oEOmrgN^xfKXkx>+?=A*|8J za6eobuJ2x~p{u3eHk3=4=QmXK<7l(seAxKS{hQ@A%O`X5V@dGIXt|MqEv-um)=P#* z=^>9?#_;sxcCZTV37>{imEveAg(gsTl{G-`@NP9E89CN9M-do`GH3C9QTTtV<}$^yS$v zi)ASO8kbEf3gA;7Z57a%6V1V-XljGU^JuAysDtiN7#^d%xJ~d=xRV=ofqDC$bNzgO z-Xc+FAEI?!ees%W3X3~BdoHxAp2hvP9_|(G!n%_7X~?Yq8_n>w=d}Tf*1+VeXFm-s zW&#ln8xN=v4{m=g0opz^iHr|Lcls!CqM$otc5D)$FC#wfkm%VEcDWLk*5K8U9c>}nMeac8T7kI81m9qDC^WaJlqLC2Y=G`d*=`4~OpMW*pe449e zeI5ul3-$!eb^xk$x^Z_kChbvm*eREDoGels4PV9O8s02k;&K2g4)LHKqHTrnq=!hb zG#o5p$+;)LjdP1GF=#U1TcH}x1RubU!;`E62Ykr=W8!W)+w9bE9LH1&~e_HreY1bNMp?2qDpop>Y-|rkb%z&yeEX{Yg~DUh3Q>Bc0j6pgk_< z?xy9d2_Iqf zb~`d*iN<*b&W@XY$Rz8O@A9NpY`5%8^zy-O6Tq2cjF)$eal{mytw-j{L`zr?L6oq3 zz#*>4ANn@0aZ2WI_I%$74oLr*>d8G$T)OYN1glr6B&gQvS7PnqWnf2atm6Jarx=?)gtIUUN z{gzamqXsxM@4NxKOJ&HN+h1ooO$#v+x&osnKzO(s7b+*qr=iz7>aw zb781=a3-|*n?q%j1TTq?$6k$LK999$E(@rbbv}?X>Rv`AM zN}AFC0>(~PKB%k>Pnn#YzkA!=mRfYxW1OW zNI>uF49FFn%y)mWx$mtQzTC9gup2GUSs&<{vC66Dtg0w!uQ`=Yhr6HFl%JR71b6FH zbjt_T&bhSKy@>X4pQA1IxOXK)mO4c1`)ECl`5pnfzac}Z?Xsr37>>+8P#12R@5-8t zE;4`QodGKDguQWzQ{j#A!(FGd zX)Fz$;nSA4O&w~6pl!)L_gT8;L2u_QHr5$sjIjeP!X)dAvYE${e;5ETPGC#7?@Qh> zMf*4h6=qC@(NP)>-qaNfd?@6#VvSe${%mseHxPw2xUsR@Q0O$9q~*H7Y40CfL7nL& z+BQ5;+*?_DYzu7%deOA^!&Rz)CJ=Nyd=hQvn)|3~-k@&dyD|>Czsy6Z67jckt_#?& zQyQW?bt}oqpKM##eiFi^d)sB(v#HjVP$v0dLt%GQcz^$UI189&61{!3)@}!yOh&)0 zzaoDa6195Dd%FHu$zOK(_5~f@a8TYsHDZ-^firq1JC|wAE|gqvsGFNfW+N*+ip?|2 ziPUXvN5=D1gCEYt$B$)>sY?4@qsJOM5e9q-JY_HUH}JRyYX?dt4v`obsED}UqBZ8` z-L*RrYGkxp%I1MU7u;s2m8#f3bLz?#Wv^f`8!h4DA9)h^hct}N4|nQ)$HW&0&^wNX z#dQb`QBzM{*?utHGTL%`U^f!ox+~(dZkG7Z@*gWVK`|w2Mcy1|J299}-C=u*-_9)>DfyZgPyS!jx zx6hDmlTHsK4a6O%49R%PK-^}+Ah6VQD_wePtXOwdOClecV;Rf1Vo0mwx~rD*o)nR; ztVjPo5`?m!4X58B580Atzc`f=HTmTIGa<7txpH2vDDfp!cB^gX_=p9j+EDk>CaW^> z;)VOc&^}8~3AxU0;S%GVyL!^nT%p{ki5)IV$Zfpk=9b})~q=diYEGB?O=PvEbe2pvljw(i;&GO52a6S&&DP`fjN910S%Az~lw zuVK3-AF;_ps{yS1r2?014?1%A;G^1*NMUpBU~X=?x8DA=EML~Ty=!jnD=p8rt7|X& zSowKkL-=o6Hb_pJY~nh6nrKD$<@ii;ZDXWIEDxIY=3V{3Por~?8x$QEF1qt;*Td4+ zPFYxgJz5sC}8JYgQ~%)qVVLkGlw2-S|=KYid(nsYr0sfIom8$p%(8C{5x+Byyi z&4Y>kUf+8kW=|X;m}ZQ>OT#mq>fxWLq@jb@uoCNjs5VBLwR7^AQ~jC?>;=W?tPe~xgdzf z+=&Z0uz)XPmL(f<3qAK4r)s)<8s~+^FQ#WaMyH3Q2kgCc?BrwW(s=(vx8eL9EcR&C z0#?{>_{gXB1c7sPV53ze@ee}O!g8*-z9WJli8En)d0A>XO0QrpFLTD7v3wXzM1%Mw z;E{c)@xsPHzpRy@MxvQ&U&I7Ag^j!FPb$!~?@uZaJjLkZ0mjnq)-{pwWs5^WN%w)qdqsjy?zt%T6;UY zHUV^x$3LlJ_qQ;vrV%fOc=P%aP1mwcUps#7TJiBu?_~c? z?8zU7ceSI$fSU^}Ze4J3gp}uhh%*e>U7vc^cQtRz^^*-LLvLglxzQ(`%`06gWGMq7 z8x*0V;0h%QR)NmwUPN0QFTs)IU20zMUpDF&bkP9rfQ6;Za}h6E3@XUwJW5P2RWsxt zindR9{z~#SSJ@hliMGQfb>x9<#w=gz1Yt0hs1BIqSjgv$b2hkH4Zm8){P2w8L!*3R zZyQ>21ks-E+X*+wYZlcsegv7Xz`lklY#I0-?7`(TV=*FylgfO#tN>!mVCxonCnpX| zN%dJwsy_%UUjm|lwF+$Urjz_1XkjeyFby7m_tAb^Ir#X|VJr1@8O6ITEN*P~a#H5O zmcrS&y=(oUdLQQYnuzlPZ~df}rLkUV$#0|%jp_?D?Bm3^FZVjbetGOVm2@scGA|5~ z^1ET|PKF2@HQg*$ww=~ma?d^y?zyP2LGkmOpXchN^|-PCQeJmz`uaep3xdhkD;TsP zbG5;wp}EYpJGp7!k00-!c{J>fnkNg@mo!qp27Ay8HLR_SwX~Tw;Eq)7(ASVNGhFqn zWtS%Gl9;b!Kl+UZpiGy(Bnh4W7|tU1gBnV~u0FeLXXg0?4Z1fle8;=r#PR(oY5ulO zYdP2E)jJ1?u#%R?hT;BFIjha51BBWC8C=-h0vY*M^8d-<(;T@bhTaZCJXi#YlAu`(BQU$b6TpWB~FdJ9Gie z#31X_fIs|fL}i=OT^XLa+tP_ z#+uhET6lH&Q|`&WjSE71T~X|rYd2f;bllknVJ(aHff-%GHEGA=j8&?NTX*)e`XBG_ zu=oIeWH3m9wG{T1nY)Blrh90aS*Y%Zi^6b2zNQa(qr9tRI5SyK+j+v0sS`ez+(A4v zUb_raSh++8uGTyR?A9mLy~YyLiWj7`!DRXvIcmI^{=|+Fd!b&Nc!M+ct4CwH@w(T{ z(>}Vnw1>u`oS)Da_#i3N^wJ(Axv$o^Egg>@b3{9Axi>Bo%{{Dg&SJ5KAZr8u5wD1H~ z;ND#+P3NFPATe3EQxRd>h?Oa8-jU*oO!{{IQqPg!3l@lWGwyE$8719 znU72B6$zUtG*N#HHse&s=aRQ(v!#joV^9Khv%Wkcz<*_O@OG|yRn&{>I2}YS+T6&` zCKnXF2isnpHAJWYS=io_UbA3;KCh9~#eUq=DF64PlAO_?{Pq48CTNLoz$e}z`~DmJ zY7$eF8-CV>2fg*%m&wfQL5m^^Ha|X*&7=0qz8z8;!MdOfPup5dRn>{|pDH&YZ%Y}#ue0VVouzKBKvF=3$)%iQy_1Lv;qAacv1ed=@hQ=)Bh_|Tt-WN+t*&wiV0$TfauL50oFs#3^Tsy z$;DwF;DyZQV?MYNkXkUBrs**x5{NAy(3c#@FBnR{9LOqxb?uA+(_byE9;~rHR78sqdd`{kC7x6Akk(qJ#iwJ0Y%X5EISmAiv`Pi4o`wa=f zDt0Tfy1_<{6nfv-zN>y?^Lk|;u0ano6A=BMUNCFH1FtJU3{R`Nr+FMeHX#G~3Fm0T zk)Y@>{6HpN#-Q*K84b?VlwQrY+>Cx{AHJG*&6hoB6Sj0|Fivtjq-n_4#K`+-*~DHO z1&W{ZXZBrjl2;I{wS=HNOU7_@_2ZzBcsZYLRod z-LsKzM!ovalR>oi9NqMS3C-PAjoqBNxe+B>-Va-(m%F7e6@2#ME8N9a$Z64?Qv-7~ z0BHxkt6*qa9b79@92Q=dn<|W_toaxNwIx;)+A*Hj@RCff3QfftB`JWJ+)=JE*N43K zd~uO`xX6Nc9)JNgJKr_;bo*Xmy2NQ@SNSzvy)Ezc$7pFybt*L8g?+AT+F%vABUH$o zN9^_q&M~{8Fg`aYun#jlH5mCk`SA%wxWfR?U3pY?A(r$}k06@gLe|HyXYTPHPWQ9He-qb)0D8Eq@(S`WJRlp|EbL$jOBsm3m5w|gSwd+i!0gklUP87=9?wv_B|t>*Mv)Q+*JzuKDoJ^5Ib2c zGqCZo%Q)qkCQGr*HWcm4_pLdU{CF^qrM;l!s3|tVpjGq9nC9i~-)gQLSOrW-RoUT@ zK&gNMVe_Vm#$$p9+m7x}-UgL)&1vB1G09yw7m~4+Ze8m>Gb_ponyX4CoO*(1#Z@I5 zG8?h$xq<7J*kU0!bQe_4{jIwyP9C=HGoxX(z*?$qoLg^I5ZoZz-~f^=m!pWf^SYtu zV=-PF)y&Al0}db^G<*Fe+0!ydAq+016EG<&_zJmo$gt*j!`atjZ0yr*M%3+a=8^H~ zBI9;O0#C?LRs^d^R2ZJHBPkfIr(b!e4#9b+s@l|rl9nzHMXECm)DK^#LtPz==RFz~ z&{r71j(Xjf!}pwlC+O&2IqI?2ze2P>ZUU5-JJpPkdq+Ya(mk|cFA zhwb%p=oUD*k!pAZHVA&>>`WSYM}-Ou3mllGw-hVJVY_Z^{8O&xt24q+YMT>`Wuukm zK=SFUFSn<#0S*zI{CD&@!Nc!qUQSZom>_0lkDqTlZCF&`((59|5*j2HNW`ItuqA92 zG7b8hW~lukm*!9wZl;HXph0*MkVkR#G(gK>V0+2`+h0=pb>-$8`U4I<>GqR%`*#-0 z&(|0c0NQmAXJ>Hfr}G97y#5xSKIF|{gaM+&DtiUp|9t)LAN&8|HlYm1sM|AK;F;Ju zOb{g`*_Y&sE0zH2#iaDso9Ev$ZyePKp0EMyqB<~f+H!TOaEt?2(>Bo@+=>8V6iF7& z4&5nrs(67@C$(XAkpT|#tIxCY`_KF(rqH32rxkyPp3}YiOOT;%$6!0#7c0D{^FJ3J zP}&055Uk6TOW1~FkAQ=bE6&Wy^S!{K2^Un zLP8GI3TjXagv59UW#pR*+Ffe5GlXxeCkcr)1Xl^{iRTi)WHLY?tLcxX~^8{0^Y^52sq)`I^rX{d!FAuVs4rW@F{t-h;%>4y(C z2OI?6w!a9UtDX$%nYik~5qgLWuX5@ZAe{qz!QgLK>3JRjEj8VNsF1TzQzd-ZwM4EH zgBtY!%Jv`%ghKZBjf=Z6T`sm80KFN!C}E7wY@Y4ipVpgQ*J|D2rDcFU0ZKg|B_3DAKPf3`ph$}y6 z7%jUf9ZECJ(`8TnH zRpCwy?M|wsyBV50GVGH4(iASsI7)Rnlt~ZaXT>fy)n$&EZ#s}fG1p%c>4@h|uRi+B zi}>**2%dHSc*b`G)*IEJM9J5cwbg7dzp{xtrsCf zhrT=UMWZpCi0OV}ZJcM(R_Q)IQ2&^+V#He{{E5tFrv;ijwz+&iD55Ehp`<(6!FROm zowK*u{nfO^Si`070MH2zeTK8nuvb3SL`Zw`(aYm9odD~T8UW`)&Rx!}ZEilJ(l3rl zMUauZcZ2a&a=0j|OyPo|E;q>QkexYMl$DfiB-i|d66>aP_{yur+gHGpV}7_g&~r#D zOwc?xQLx(^+fC{jvGB}D?D1#JQO~7_4~koNn*k}*f%(z*$>lA-ua4Z%<6HEgml?z| z0Ep)hknyPeb&fBe(x?}R4CLkTydk$&BRw5|4%4{1fVec|kFzXD`Tog6y&rW*`}!9~ zy+BDUa%^?*-;XkN7&$3?NIw_1U8F{6ZbEvRdth*0a|JpZgUb3a2F99&if${ij2tve zu~X@yujC<}X1D9W3A@9A9Xx(HY%Ys6*{s?vCC{YnU5XS1?3V^erp%#kv04n^J;qy$ zFX!i85+cKLocotpfqdoFbK|*zVgY?nq+Slf5-z*G21ce<)%kTkVNbsKy-yP|H$n(@ z%Xsv`oz_Y}SDvyl-P<;a(UMtoIwUs)FHhqf+T*&Dfhr;Bfg~Zz_K`%jD!#`1e_217 zIwwvm-Za1{+5bG6LTBlG9Czn5y~${!+(uB)z;}y!e=jk-Z(zY}w5PRCL{P1_eN@=7 zmLnelJZ{IdGnAf80d=02)!{SuHP>$ll<-SJ$%lvs6jMc@1($Ddx8OM1EK6IB>^i%N@&sltLp3a_# zN(T+;(A!cng>4&AyNLj1hsR-$F^+9bgeFS=Q6}JatGxXW{FjmrPIWLbbtXNir5-pn zh~V9+uy$j~0RwmZAOO~p)AjCb_al*-1r?ZKM=3GJA*lWvB)TbKaHFa$vs-3xcIX453SW) zP0#n`?!)e9C5#+PLD>mAH!XW*+j6FMm@S|OGZ`W1Hndp%+D`xNLt3ibekp*$@(8DH zfR~#=*sgf-tQyUjbGOtAvU9JERFWQV3_+zfsVg059#C5NszV?}lQq7Fy443{se>o( z?!u=#y!+&H;N+QZlIv0zjE=!z>Qdo-AbY~Cm1_0UKy41`SmB;i#BW_}QEUor{=$BY zm%k|XYmF0-z=oYcTRG%t-W^zcD`3fZ1Cz^YC@ib+C8sv^_Oi|9vu`y)6iEmb{N6l@ z<~K%@S%5dhB;kkWH7*xyTeOsQj$JU6ypuI4?Ub_#3#sI!8wN7xBXc0tYR|H*aKzMkI`=OfRCwAbFN+R^tNGZ~8W-P+RL{ zXYhi6Hq?6?PyA-AI&c{wOF_b@E3Zlj*fW1mgSGmmExk2COLz=azYqhuzl=GZhs93& zV4^tMqywIFEW&e>NN zg;izwmK5-9CEg~Hxi`fbHV?@pn%V<+%sCFlVW>^WY~HyjnO-w(?zBI^6rMARJ?Dfc zzcQn=kMG&V!yeM|?Rtsst!zUF8l6rrD@SQbBmfv>Y`svF;N81F7Y`7>aXtMFDl;%x zsVoJ3c>a1j+&oOb2?pM%;kqHeu!8f4_+<0EVcogq`j1aq(^~!Lg{N_^mDbuPF{q~9Pb`{ z+Ro3`Y}VQV7;vtK{9igDW)lUiFu2V=3zJ3R-E>113?4> zBPWSpz_{3Fv4or9f9zQxD0RYKPMdnqCG>w6r0|qi{8M zof3BUt%7DydNn|K!GW>MV=ZGj2X!fTV3oUChJr@*$pv1^kJ+jG^D-a9>-63|P=&|I z&xD#5Z8mycv&e1G!;3o&bwYjQzi83~lE3`7cyDhO* zQk}ibGg7(uI5n-e@6bmgCRv9-D(q6pxNPGb(EMZkJTg zp7)+{5*n(ikOb88Bhs0knqy5vGEIJjDzo;xE)ZV8ZY`0NS*)}TC0F*bI@hs@mW{7q>t&ezxT0K2gHh62fVzrsFB}> zvPyD|#IJ-MZ)X-@{Jd3tN{EtVit<3P*Vm+38l%xW0aCbvK2VYAZntvY>ow^1%Cr<- zq^5QwV*PJ}kG<7=iJ|ScVC7#rhD$xcNgIJ{1^rG|5bm0+lXfVv_f0-bfyIrC^_Ly*(rHE2Hm7xoi?_Hw}kKD zN4KUs<$c|r0dcgtUhJ8&ku&_^KfU2ULQHmtsb05YlYtE#nRtZnb)->H1qwJr6s_pu z)n-r1JfQ%&SSuy!NhuARW7t{eaDXo{4KeJD`5$-rYz8D=6v89gfj|{2@RaX#9N|9? zqQ8I8>)g#NC`MhbUkCnQNB_^O->raKt+^{Q{j>>wzVTO>;P=Bw){w-B^Y?zu*xw%Nh@ADJj!OLd8-IO++ZTAy$GGbTzmkIf_Hd{M|B6 z=jwkS)4Pkn*$idZq5bb4c?@JIUG>$x_x%4d>VE|Dzsv8R74x54{o`r>l*GTq2L2JuKZ5z2mix!k{%_0SpP<#>Jd1x;%s*ky|9zwU z6N30JuGK$+`LBY}5%raLckd_6qNsa*Ys-dx(>bReu^JJEAk44Ttdyuh{p_Q3)w!&HCwp{-h?D01jR;Ys*kpv+AW&HNdQ zy05I}Wgp|Wh_3n$gSYg($sGEHFsOc^*EL&TLe;EEX)aKL%m@H|%-C)KxRWkGWY94% zPv2L5bn@gaOBX71>8`C+z`!2Vg|>qD`5Z=Jzz_d1;Su=fzyW;e*s;Mv5gvtf+xBP< zn@G-(LcX!;A`uz#rQ}cm%hKsTtczE5!7=2FB6l_b9ld_g2{5POH%mQ6PC5`i1X)Z> zP%6xlA@&^}KiEtU;m<#r;^ z=3(5$+El4>^}}t$2S-A~E&v1Xc~{5+@Y5O&soLYu7LL%Ao!>%Hm>k^mL>&xc)NRdG z_f;PPC*h3Y?bxUCCAF z#9yPUXV@cThHh7AS!Oq30g7d&(;*GYY!Nf|CqAlpj7H7;JueIO&OsPMb=Qrb>?lA& zLq4Gct(>0Jd|m7;r5dQF1Wfj^(~(-{6~J`YFrB$(O%iK%H2c zz_T`sk%r14tvSFLfTZH5I^rWK4S!O8LT?^R6p|n*6DE&qbdiWL3XL$Ui#D)hr?(C^ z$hYp~3&>4u;YKcAznVoCd+e>^<1;iBL*Y>(IvP=}aR$?0VzjOxMe{U}?Riszh5@Q$ zZ|Swa=BYaTFi78%G}M{QfQLr$@pLN<;*Ij@pF5$LIH8>hst2 z`@Xkaa{o+A_~W~*At>>x8w=C;oGea&|^XXe)`hpdJg30WL*pGDYF&zoqG7% zCD%;h#It+9tsAqwpP@n(BwkhjSu9YI^_W(gEH4wA;ugkJ;Z-2FO}#_zkeu+LECgex zD8ydS&&9=koCX3p+m4BQq&f#Sq%PL3R*;*D0^xQJx6w96++0Nr&u_e+Gpl%?ldDQi zm5_J2tobKtC)A12gu8|6IbPjnsJ}l&IawekZThvqPc3q6S4|UhQsx#tkfJfo)6DQ} z8X|Y+C#`cSRPjT#mU4lgte^VZw{f4TET@>;7pr|wT)$D%U42U=6kjuQoOtA?(op1J z@DF|F^jhsw!2Z*rhChZN(9Ce`z{=e0=LQow%OGJHHxY`#7%*AE#I6Vzw7E`CI4y%xST zaP73xQCO(*nWX!NsYJk|D5f!1R<%7)RW)D_p?*|4gNbVLqmc6gcXqkjp08fC+e}Zg zd;Ht=Bw@?MB0TD$e%x{Hz`*OgGUhJ6M(Q>;-}~hy(OG4Ux*!=D9zGsZNzaj6a4BP1 z>~}&z%PTWzxf`w+@fMyWQ|0MSIDbM@qh`8#iHNyS3nf*rZOFy0lLpo72OLaSuTN{? zfclyvJ=aSbtF86)^$8*#Rby|B2-Kdea57lkwLrE9ikoY!s#^Dm|I$lQkcs*Hqs_L8c^xctSc;6UIrQE3KZKUS3o~|xA8b3VDQxw>>31Jst zm-OiA-Icliy=^wOHHLm_W!lWx(Zwv%PCiAa?tSHhD5=^vJgdWXv zm&=(3ln?2ki+HW69Ru{{Xh7Gxt zp89=Wt+YgWCI*wsoR`5fw%U_H_D*)6qY9VgSy-6V>jt6Z1=Pr8YyJTVe7&K52$x*o zt&74gxxqNg;YJAD&MCKkAZX#$eF+mcawFIi zy2C_;o65B%-ScDFrOe*f<>HGndYaq4tCpdB3@7P}`IS8_@}JZae=Khm~hV=rz` z_FbV2SuGZqRpIB$M8~Vr+6INb09{Zlmzu+FUx=;22B?Hf8K?Ju`$-xxai)o2CWFCO z{8z7Z`cbX8ZQACd-mYcQ{+g=LiN8;kGEHdK)e}kqVUuT}YSbUkJ3vzR;6Y|%sUw4f z-_yf3)1RX32bV2lGdw+c6hfjnWHxfBU+aW{jGzQ_6yDD1iAgkv{4Hl^N&d_sNGz&y zERl&lUF(SZoF9LCM^0IpYDQ^^uXB9iCqks$wkKj}u-4MdtWd!Og`TFPqg$yPhhCQ| z9*>0zQy?7|Rz#87gH0>#9f70k6&~qHb!N<0WGB+sy4%fotkduCu={+DfIKz7JlOo% zP|lb%rLCb6@2ZJ;TL*HSmmMrJ=d}a{W?~kqJ>HDbvt0LFcj4o=D7YqUkmZk%iKcS| z)|q|OPD<8{lYn4CKg)P9*4;A$Q_+x(rTZxoTXf7-Z^+F8nY%RdzEq;aPWGU)U`tV&CRl> z-amkv9n8n~$5M}Qa76E&F)=bC(lGdueb5v4R8;C%SuZ$ET-DqO+#RJVDJkiL>qKWZ%nHE_4_GjW@=nMg>V{W#|6XpiT*eED+1+M0OMy>ERimVNPB zR;oR|LDx=KJNHa}+*V(SP~ZU1!PO+b#BMv-y|1et`S7~9&d)v$DOA3uHWqw*a?;kL z@~fpKE*m*`_h88eUGTAw%e0I;^gR2zP0R0izd( zE_&ecGSZ;;6|2R^&yvwD3p?hQ`+B)kQZAF_g#SFFO1uai^NC}tziI#gx4PB3oA$%w4jF z+dn_(DJl;*S@=3e`*+BDM+Yt6k9g6bjQBJs&kVl7GoD9}2K6=6_&31MJu2y{dSt)X z=9N2x(eBgo?-<}pubxc_Z+yyP8^ ztCvm8xZwidm%fJ!>DU}iiAvR)BSyhjS{2dG=pW|GM*k`OQq0#AGgDJet(z`Mk*&%{<6Tw! z{b#n|S~xIk&ofGD#ex--Nt556+g4=8%4$9=FIcbO{C@9!Yq5 z-1o}23y6oHyI9WiZkVLs5L_RLHl^hzL8$-J-gW;qm1S{=0jY|N;zx?0(i9Lz+9m>m zi}a#I5h+SjWC*Y>MT8VG3g`+#q68a7CI~1cNE=EJ5JW(Vgr!JG7!V>=N(eP%Zd_1w zcJ{M>z~qN~-g`OsynD~P=j5F4yZ0Qbtfnsd~Ak!{fFnnN4o<%g))p(_T` zaJmhUt$ii_&S6dIqk?SXpT%QHKraIY?wj_OCK03871j7HH2BEi4L17|X;~z>vfFGQ z%P>r}9xh+Qa=5@ype#hVJ$zFBK3w1GiG|8cv4dxJcrdiKw~(2EU_!}{xw)kQzS1c% z)a^q?E-K~2^is>%$6fD^Y4!6X;{n^E zqhFhqVK^hP^OYZ?hKDa6rreDa)Dji->>;%g9p5<2KW>NZRA2N6Qem;#OGW}4!dYOF(Ysz=-! znQeyYS+0NIXB3o}A*-pWX|G<4@OR5CDk>=-58O66dDZny*9ZfFN<5S6^X0=@LL&Qo9=%;C-jd&>BrXIpq3?0| z01=B=m`Is%a>Smv7Ugs@-SlYvtLBvc3MG^7)T0ZzF38b)?8&~N5ofXhk)9Ra5L)(B zZsDqq6F&d0%$*S+s{A9M$_S-1{Om;rOM|ix^kWle-pUQO{@}!JDYExq%qrHbKQKnGuvx! zN*4}4B#&m@>g=3IXlk-W^z}U!%Cy5MnaA(#qH!NQ48o!gg|v5?c?6oVMk5|Qcdt5a z7Pm~I#O+^vms+Vr+4(=o zZ+1A`dz0U~Scl?v$;YT`=@{hzo!EDymCD4DRhHV4IgZ=pj7TA)t`a}&lUtg8lM(qo zr`tF4yk@?7C%q3-^%Cf>+in>g9v|S3a=tF7rlb_|Q!gqtb*gbAvFOdH>OjoOIn&~I zLb73r-fJr>gjpZ#p>XXnjX@9wF4{!y5HhMr(*ea3KL1-2agl40A_gB5>)3k2L{lG& z(h#;wL#Ue@#gA;uI_grO(lW_)txw5HeeQ%N(ldd3QxZVkA*Bu7j>L}Jw6$Aa^HD2d zAXi#T+HSiV^_88=ymsl*>)10b!}^aN**-LYnS{JP9YJX%A&UQQRQWbEcG_QR4g)N= z9|892u^csCm%*%mtkSm;GY1QljMD@K;0-l%B-Mv=V!CqP9f{6(>}m{5-%AN1l!L^T zVJ-6}O0KZiwI~vpBZzst)$GiWctDLG@xxt-GquR6BOU{t`w z)#p#1xj1~=pi(ow*bX7B>*h!FYN+azpIU-@u*ku*Q9^Ax?U10RruKtCkS_ zTcvlG8{x)PM)Fk`Jpk|}Y9x?f*ryhL!K#S)#=&aW2$Nt7n121?xiK!!dVTa99Y-rly@0p8+J45Lkg7KW?M8 zap!1=Fm@$RF)q&H_12enemi$kcf(Ht!m{3N0Yh7L!l!DOCc$L>W)z8*?Q)}+3 zJ%Pqezzh9YDR?^Wv$M+kBJ1$WTuZ==$P`340|=)s$vADHX6PhJ_dyqLItPh<81ZhU z@cRKyIHtt?#rhS=^7o2cj;&b>JDxFY!1wIhGzwMGo*RvDCN4w6^MlE~T$ll*`E(>- z?GcLBV=2I!SEplQB6|-mY=;nf7Av{jcA@djcKSU$h_p|Jn^o#0hpwv-=vfYwA5Ht) z=-{=+)uo7lTJI49C*j?3L8b~kOfG$>>C_HmcL6X-oN9K*Bh1~EgjwvSx)}L?0k}z2 zF;)(?zlP9}*a>O<+P~^Rh?f`P^jWn%+!D zhQ~CoWQ!JHiQ2v~wZ3!?5J^Z_^6`Ij;kzXO_rbO&kMhmMCjP(%7+=)ZKV0Q8{|_$( z2S9;WR^PN=?dy-9j})fB4um?+Sw6mvHs29|)BU_n3J=L&Mf*Jo?pNLhgoO+Ie1G1T zn>zuCZ>{8?-)4~aDwl>80Ph064B)#XFdn|+Bw_3SahnJr@GIP1eBZU)bc(Y8B!3Cz n+XwNNP`;JN=Nb5a31v-QJ5Op?U3UC1ANW|Furtj&?*8-dPU+kU literal 0 HcmV?d00001 diff --git a/docs/source/autodoc2_docstring_parser.py b/docs/source/autodoc2_docstring_parser.py new file mode 100644 index 00000000000..41c49ed1c54 --- /dev/null +++ b/docs/source/autodoc2_docstring_parser.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: Apache-2.0 +from docutils import nodes +from myst_parser.parsers.sphinx_ import MystParser +from sphinx.ext.napoleon import docstring + + +class NapoleonParser(MystParser): + + def parse(self, input_string: str, document: nodes.document) -> None: + # Get the Sphinx configuration + config = document.settings.env.config + + parsed_content = str( + docstring.GoogleDocstring( + str(docstring.NumpyDocstring(input_string, config)), + config, + )) + return super().parse(parsed_content, document) + + +Parser = NapoleonParser diff --git a/docs/source/conf.py b/docs/source/conf.py index a83ad764125..060649e43b9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,16 +13,17 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. import datetime -import inspect import logging import os +import re import sys +from pathlib import Path import requests -from sphinx.ext import autodoc logger = logging.getLogger(__name__) -sys.path.append(os.path.abspath("../..")) +REPO_ROOT = Path(__file__).resolve().parent.parent.parent +sys.path.append(os.path.abspath(REPO_ROOT)) # -- Project information ----------------------------------------------------- @@ -40,8 +41,7 @@ "sphinx.ext.linkcode", "sphinx.ext.intersphinx", "sphinx_copybutton", - "sphinx.ext.autodoc", - "sphinx.ext.autosummary", + "autodoc2", "myst_parser", "sphinxarg.ext", "sphinx_design", @@ -49,7 +49,22 @@ ] myst_enable_extensions = [ "colon_fence", + "fieldlist", ] +autodoc2_packages = [ + { + "path": "../../vllm", + "exclude_dirs": ["__pycache__", "third_party"], + }, +] +autodoc2_output_dir = "api" +autodoc2_render_plugin = "myst" +autodoc2_hidden_objects = ["dunder", "private", "inherited"] +autodoc2_docstring_parser_regexes = [ + (".*", "docs.source.autodoc2_docstring_parser"), +] +autodoc2_sort_names = True +autodoc2_index_template = None # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -77,6 +92,11 @@ 'repository_url': 'https://github.com/vllm-project/vllm', 'use_repository_button': True, 'use_edit_page_button': True, + # Prevents the full API being added to the left sidebar of every page. + # Reduces build time by 2.5x and reduces build size from ~225MB to ~95MB. + 'collapse_navbar': True, + # Makes API visible in the right sidebar on API reference pages. + 'show_toc_level': 3, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -164,68 +184,64 @@ def linkcode_resolve(domain, info): return None if not info['module']: return None - filename = info['module'].replace('.', '/') - module = info['module'] - - # try to determine the correct file and line number to link to - obj = sys.modules[module] - - # get as specific as we can - lineno: int = 0 - filename: str = "" - try: - for part in info['fullname'].split('.'): - obj = getattr(obj, part) - - if not (inspect.isclass(obj) or inspect.isfunction(obj) - or inspect.ismethod(obj)): - obj = obj.__class__ # Get the class of the instance - - lineno = inspect.getsourcelines(obj)[1] - filename = (inspect.getsourcefile(obj) - or f"{filename}.py").split("vllm/", 1)[1] - except Exception: - # For some things, like a class member, won't work, so - # we'll use the line number of the parent (the class) - pass - - if filename.startswith("checkouts/"): + + # Get path from module name + file = Path(f"{info['module'].replace('.', '/')}.py") + path = REPO_ROOT / file + if not path.exists(): + path = REPO_ROOT / file.with_suffix("") / "__init__.py" + if not path.exists(): + return None + + # Get the line number of the object + with open(path) as f: + lines = f.readlines() + name = info['fullname'].split(".")[-1] + pattern = fr"^( {{4}})*((def|class) )?{name}\b.*" + for lineno, line in enumerate(lines, 1): + if not line or line.startswith("#"): + continue + if re.match(pattern, line): + break + + # If the line number is not found, return None + if lineno == len(lines): + return None + + # If the line number is found, create the URL + filename = path.relative_to(REPO_ROOT) + if "checkouts" in path.parts: # a PR build on readthedocs - pr_number = filename.split("/")[1] - filename = filename.split("/", 2)[2] + pr_number = REPO_ROOT.name base, branch = get_repo_base_and_branch(pr_number) if base and branch: return f"https://github.com/{base}/blob/{branch}/{filename}#L{lineno}" - # Otherwise, link to the source file on the main branch return f"https://github.com/vllm-project/vllm/blob/main/{filename}#L{lineno}" -# Mock out external dependencies here, otherwise the autodoc pages may be blank. +# Mock out external dependencies here, otherwise sphinx-argparse won't work. autodoc_mock_imports = [ + "huggingface_hub", + "pydantic", + "zmq", + "cloudpickle", + "aiohttp", + "starlette", "blake3", - "compressed_tensors", "cpuinfo", - "cv2", - "torch", "transformers", "psutil", - "prometheus_client", - "sentencepiece", "vllm._C", "PIL", "numpy", - 'triton', "tqdm", - "tensorizer", - "pynvml", - "outlines", - "xgrammar", - "librosa", - "soundfile", - "gguf", - "lark", - "decord", + # The mocks below are required by + # docs/source/serving/openai_compatible_server.md's + # vllm.entrypoints.openai.cli_args + "openai", + "fastapi", + "partial_json_parser", ] for mock_target in autodoc_mock_imports: @@ -236,18 +252,6 @@ def linkcode_resolve(domain, info): "been loaded into sys.modules when the sphinx build starts.", mock_target) - -class MockedClassDocumenter(autodoc.ClassDocumenter): - """Remove note about base class when a class is derived from object.""" - - def add_line(self, line: str, source: str, *lineno: int) -> None: - if line == " Bases: :py:class:`object`": - return - super().add_line(line, source, *lineno) - - -autodoc.ClassDocumenter = MockedClassDocumenter - intersphinx_mapping = { "python": ("https://docs.python.org/3", None), "typing_extensions": @@ -259,7 +263,4 @@ def add_line(self, line: str, source: str, *lineno: int) -> None: "psutil": ("https://psutil.readthedocs.io/en/stable", None), } -autodoc_preserve_defaults = True -autodoc_warningiserror = True - navigation_with_keys = False diff --git a/docs/source/contributing/deprecation_policy.md b/docs/source/contributing/deprecation_policy.md new file mode 100644 index 00000000000..598f1612d3a --- /dev/null +++ b/docs/source/contributing/deprecation_policy.md @@ -0,0 +1,87 @@ +# Deprecation Policy + +This document outlines the official policy and process for deprecating features +in the vLLM project. + +## Overview + +vLLM uses a structured "deprecation pipeline" to guide the lifecycle of +deprecated features. This policy ensures that users are given clear and +sufficient notice when a feature is deprecated and that deprecations proceed in +a consistent and predictable manner. + +We aim to strike a balance between continued innovation and respecting users’ +reliance on existing functionality. Deprecations are tied to our **minor (Y) +releases** following semantic versioning (X.Y.Z), where: + +- **X** is a major version (rare) +- **Y** is a minor version (used for significant changes, including deprecations/removals) +- **Z** is a patch version (used for fixes and safer enhancements) + +Features that fall under this policy include (at a minimum) the following: + +- CLI flags +- Environment variables +- Configuration files +- APIs in the OpenAI-compatible API server +- Public Python APIs for the `vllm` library + +## Deprecation Pipeline + +The deprecation process consists of several clearly defined stages that span +multiple Y releases: + +**1. Deprecated (Still On By Default)** + +- **Action**: Feature is marked as deprecated. +- **Timeline**: A removal version is explicitly stated in the deprecation +warning (e.g., "This will be removed in v0.10.0"). +- **Communication**: Deprecation is noted in the following, as applicable: + - Help strings + - Log output + - API responses + - `/metrics` output (for metrics features) + - User-facing documentation + - Release notes + - GitHub Issue (RFC) for feedback + - Documentation and use of the `@typing_extensions.deprecated` decorator for Python APIs + +**2.Deprecated (Off By Default)** + +- **Action**: Feature is disabled by default, but can still be re-enabled via a +CLI flag or environment variable. Feature throws an error when used without +re-enabling. +- **Purpose**: Allows users who missed earlier warnings a temporary escape hatch +while signaling imminent removal. Ensures any remaining usage is clearly +surfaced and blocks silent breakage before full removal. + +**3. Removed** + +- **Action**: Feature is completely removed from the codebase. +- **Note**: Only features that have passed through the previous deprecation +stages will be removed. + +## Example Timeline + +Assume a feature is deprecated in `v0.9.0`. + +| Release | Status | +|---------------|-------------------------------------------------------------------------------------------------| +| `v0.9.0` | Feature is deprecated with clear removal version listed. | +| `v0.10.0` | Feature is now off by default, throws an error when used, and can be re-enabled for legacy use. | +| `v0.11.0` | Feature is removed. | + +## Important Guidelines + +- **No Removals in Patch Releases**: Removing deprecated features in patch +(`.Z`) releases is disallowed to avoid surprising users. +- **Grace Period for Existing Deprecations**: Any feature deprecated **before +this policy** will have its grace period start **now**, not retroactively. +- **Documentation is Critical**: Ensure every stage of the pipeline is +documented clearly for users. + +## Final Notes + +This policy is a living document and may evolve as the needs of the project and +its users change. Community feedback is welcome and encouraged as we refine the +process. diff --git a/docs/source/contributing/model/multimodal.md b/docs/source/contributing/model/multimodal.md index 03d830fe90f..b42536f054d 100644 --- a/docs/source/contributing/model/multimodal.md +++ b/docs/source/contributing/model/multimodal.md @@ -128,11 +128,9 @@ HF processing as well as memory profiling. ### For memory profiling -Override the abstract method {meth}`~vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_processor_inputs` -to construct dummy inputs for memory profiling. This dummy input should result in the worst-case memory usage of -the model so that vLLM can reserve the correct amount of memory for it. +Override the abstract methods {meth}`~vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_text` and {meth}`~vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_mm_data` to construct dummy inputs for memory profiling. These dummy inputs should result in the worst-case memory usage of the model so that vLLM can reserve the correct amount of memory for it. -Assuming that the memory usage increases with the number of tokens, the dummy input can be constructed to maximize the number of output embeddings, which is the same number as placeholder feature tokens. +Assuming that the memory usage increases with the number of tokens, the dummy inputs can be constructed to maximize the number of output embeddings, which is the same number as placeholder feature tokens. ::::{tab-set} :::{tab-item} Basic example: LLaVA @@ -244,38 +242,45 @@ def get_num_image_tokens( ``` Notice that the number of image tokens doesn't depend on the image width and height. -We can simply use a dummy `image_size`: +We can simply use a dummy `image_size` to calculate the multimodal profiling data: ```python +# NOTE: In actuality, this is usually implemented as part of the +# model's subclass of `BaseProcessingInfo`, but we show it as is +# here for simplicity. def get_image_size_with_most_features(self) -> ImageSize: hf_config = self.get_hf_config() width = height = hf_config.image_size return ImageSize(width=width, height=height) -def get_dummy_processor_inputs( +def get_dummy_mm_data( self, seq_len: int, mm_counts: Mapping[str, int], -) -> ProcessorInputs: +) -> MultiModalDataDict: num_images = mm_counts.get("image", 0) - processor = self.info.get_hf_processor() - image_token = processor.image_token - - hf_config = self.get_hf_config() - target_width, target_height = self.info.get_image_size_with_most_features() + target_width, target_height = \ + self.info.get_image_size_with_most_features() - mm_data = { + return { "image": self._get_dummy_images(width=target_width, height=target_height, num_images=num_images) } +``` - return ProcessorInputs( - prompt_text=image_token * num_images, - mm_data=mm_data, - ) +For the text, we simply expand the multimodal image token from the model config to match the desired number of images. + +```python +def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str: + num_images = mm_counts.get("image", 0) + + processor = self.info.get_hf_processor() + image_token = processor.image_token + + return image_token * num_images ``` ::: @@ -412,29 +417,30 @@ def get_image_size_with_most_features(self) -> ImageSize: Fuyu does not expect image placeholders in the inputs to HF processor, so the dummy prompt text is empty regardless of the number of images. -Otherwise, the logic of this method is very similar to LLaVA: ```python -def get_dummy_processor_inputs( +def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str: + return "" +``` + +For the multimodal image profiling data, the logic is very similar to LLaVA: + +```python +def get_dummy_mm_data( self, seq_len: int, mm_counts: Mapping[str, int], -) -> ProcessorInputs: +) -> MultiModalDataDict: target_width, target_height = \ self.info.get_image_size_with_most_features() num_images = mm_counts.get("image", 0) - mm_data = { + return { "image": self._get_dummy_images(width=target_width, - height=target_height, - num_images=num_images) + height=target_height, + num_images=num_images) } - - return ProcessorInputs( - prompt_text="", - mm_data=mm_data, - ) ``` ::: diff --git a/docs/source/contributing/overview.md b/docs/source/contributing/overview.md index 31c7059fda3..89b31f0311e 100644 --- a/docs/source/contributing/overview.md +++ b/docs/source/contributing/overview.md @@ -17,7 +17,7 @@ Unsure on where to start? Check out the following links for tasks to work on: - [Good first issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22) - [Selected onboarding tasks](gh-project:6) -- [New model requests](https://github.com/vllm-project/vllm/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22new%20model%22) +- [New model requests](https://github.com/vllm-project/vllm/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22new-model%22) - [Models with multi-modal capabilities](gh-project:10) ## License @@ -40,6 +40,10 @@ pre-commit install --hook-type pre-commit --hook-type commit-msg # You can manually run pre-commit with pre-commit run --all-files +# To manually run something from CI that does not run +# locally by default, you can run: +pre-commit run mypy-3.9 --hook-stage manual --all-files + # Unit tests pytest tests/ ``` @@ -54,6 +58,12 @@ Therefore, we recommend developing with Python 3.12 to minimise the chance of yo Currently, the repository is not fully checked by `mypy`. ::: +:::{note} +Currently, not all unit tests pass when run on CPU platforms. If you don't have access to a GPU +platform to run unit tests locally, rely on the continuous integration system to run the tests for +now. +::: + ## Issues If you encounter a bug or have a feature request, please [search existing issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue) first to see if it has already been reported. If not, please [file a new issue](https://github.com/vllm-project/vllm/issues/new/choose), providing as much relevant information as possible. diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md index 6b794db656c..ca56710bc2e 100644 --- a/docs/source/deployment/docker.md +++ b/docs/source/deployment/docker.md @@ -19,6 +19,18 @@ $ docker run --runtime nvidia --gpus all \ --model mistralai/Mistral-7B-v0.1 ``` +This image can also be used with other container engines such as [Podman](https://podman.io/). + +```console +$ podman run --gpus all \ + -v ~/.cache/huggingface:/root/.cache/huggingface \ + --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \ + -p 8000:8000 \ + --ipc=host \ + vllm/vllm-openai:latest \ + --model mistralai/Mistral-7B-v0.1 +``` + You can add any other you need after the image tag (`vllm/vllm-openai:latest`). :::{note} diff --git a/docs/source/deployment/frameworks/anything-llm.md b/docs/source/deployment/frameworks/anything-llm.md new file mode 100644 index 00000000000..d430c170ef5 --- /dev/null +++ b/docs/source/deployment/frameworks/anything-llm.md @@ -0,0 +1,47 @@ +(deployment-anything-llm)= + +# Anything LLM + +[Anything LLM](https://github.com/Mintplex-Labs/anything-llm) is a full-stack application that enables you to turn any document, resource, or piece of content into context that any LLM can use as references during chatting. + +It allows you to deploy a large language model (LLM) server with vLLM as the backend, which exposes OpenAI-compatible endpoints. + +## Prerequisites + +- Setup vLLM environment + +## Deploy + +- Start the vLLM server with the supported chat completion model, e.g. + +```console +vllm serve Qwen/Qwen1.5-32B-Chat-AWQ --max-model-len 4096 +``` + +- Download and install [Anything LLM desktop](https://anythingllm.com/desktop). + +- On the bottom left of open settings, AI Prooviders --> LLM: + - LLM Provider: Generic OpenAI + - Base URL: http://{vllm server host}:{vllm server port}/v1 + - Chat Model Name: `Qwen/Qwen1.5-32B-Chat-AWQ` + +:::{image} /assets/deployment/anything-llm-provider.png +::: + +- Back to home page, New Workspace --> create `vllm` workspace, and start to chat: + +:::{image} /assets/deployment/anything-llm-chat-without-doc.png +::: + +- Click the upload button: + - upload the doc + - select the doc and move to the workspace + - save and embed + +:::{image} /assets/deployment/anything-llm-upload-doc.png +::: + +- Chat again: + +:::{image} /assets/deployment/anything-llm-chat-with-doc.png +::: diff --git a/docs/source/deployment/frameworks/index.md b/docs/source/deployment/frameworks/index.md index cb758d3e6d2..683fa8217a8 100644 --- a/docs/source/deployment/frameworks/index.md +++ b/docs/source/deployment/frameworks/index.md @@ -3,12 +3,15 @@ :::{toctree} :maxdepth: 1 +anything-llm bentoml cerebrium dstack helm lws modal +open-webui skypilot +streamlit triton ::: diff --git a/docs/source/deployment/frameworks/open-webui.md b/docs/source/deployment/frameworks/open-webui.md new file mode 100644 index 00000000000..83e5303a00e --- /dev/null +++ b/docs/source/deployment/frameworks/open-webui.md @@ -0,0 +1,29 @@ +(deployment-open-webui)= + +# Open WebUI + +1. Install the [Docker](https://docs.docker.com/engine/install/) + +2. Start the vLLM server with the supported chat completion model, e.g. + +```console +vllm serve qwen/Qwen1.5-0.5B-Chat +``` + +1. Start the [Open WebUI](https://github.com/open-webui/open-webui) docker container (replace the vllm serve host and vllm serve port): + +```console +docker run -d -p 3000:8080 \ +--name open-webui \ +-v open-webui:/app/backend/data \ +-e OPENAI_API_BASE_URL=http://:/v1 \ +--restart always \ +ghcr.io/open-webui/open-webui:main +``` + +1. Open it in the browser: + +On the top of the web page, you can see the model `qwen/Qwen1.5-0.5B-Chat`. + +:::{image} /assets/deployment/open_webui.png +::: diff --git a/docs/source/deployment/frameworks/streamlit.md b/docs/source/deployment/frameworks/streamlit.md new file mode 100644 index 00000000000..084550ec991 --- /dev/null +++ b/docs/source/deployment/frameworks/streamlit.md @@ -0,0 +1,42 @@ +(deployment-streamlit)= + +# Streamlit + +[Streamlit](https://github.com/streamlit/streamlit) lets you transform Python scripts into interactive web apps in minutes, instead of weeks. Build dashboards, generate reports, or create chat apps. + +It can be quickly integrated with vLLM as a backend API server, enabling powerful LLM inference via API calls. + +## Prerequisites + +- Setup vLLM environment + +## Deploy + +- Start the vLLM server with the supported chat completion model, e.g. + +```console +vllm serve qwen/Qwen1.5-0.5B-Chat +``` + +- Install streamlit and openai: + +```console +pip install streamlit openai +``` + +- Use the script: + +- Start the streamlit web UI and start to chat: + +```console +streamlit run streamlit_openai_chatbot_webserver.py + +# or specify the VLLM_API_BASE or VLLM_API_KEY +VLLM_API_BASE="http://vllm-server-host:vllm-server-port/v1" streamlit run streamlit_openai_chatbot_webserver.py + +# start with debug mode to view more details +streamlit run streamlit_openai_chatbot_webserver.py --logger.level=debug +``` + +:::{image} /assets/deployment/streamlit-chat.png +::: diff --git a/docs/source/deployment/integrations/production-stack.md b/docs/source/deployment/integrations/production-stack.md index e66e8e6a16b..05f1568306c 100644 --- a/docs/source/deployment/integrations/production-stack.md +++ b/docs/source/deployment/integrations/production-stack.md @@ -16,7 +16,7 @@ Ensure that you have a running Kubernetes environment with GPU (you can follow [ ## Deployment using vLLM production stack -The standard vLLM production stack install uses a Helm chart. You can run this [bash script](https://github.com/vllm-project/production-stack/blob/main/tutorials/install-helm.sh) to install Helm on your GPU server. +The standard vLLM production stack is installed using a Helm chart. You can run this [bash script](https://github.com/vllm-project/production-stack/blob/main/utils/install-helm.sh) to install Helm on your GPU server. To install the vLLM production stack, run the following commands on your desktop: diff --git a/docs/source/deployment/security.md b/docs/source/deployment/security.md new file mode 100644 index 00000000000..e2ef8196c16 --- /dev/null +++ b/docs/source/deployment/security.md @@ -0,0 +1,58 @@ +# Security Guide + +## Inter-Node Communication + +All communications between nodes in a multi-node vLLM deployment are **insecure by default** and must be protected by placing the nodes on an isolated network. This includes: + +1. PyTorch Distributed communications +2. KV cache transfer communications +3. Tensor, Pipeline, and Data parallel communications + +### Configuration Options for Inter-Node Communications + +The following options control inter-node communications in vLLM: + +1. **Environment Variables:** + - `VLLM_HOST_IP`: Sets the IP address for vLLM processes to communicate on + +2. **KV Cache Transfer Configuration:** + - `--kv-ip`: The IP address for KV cache transfer communications (default: 127.0.0.1) + - `--kv-port`: The port for KV cache transfer communications (default: 14579) + +3. **Data Parallel Configuration:** + - `data_parallel_master_ip`: IP of the data parallel master (default: 127.0.0.1) + - `data_parallel_master_port`: Port of the data parallel master (default: 29500) + +### Notes on PyTorch Distributed + +vLLM uses PyTorch's distributed features for some inter-node communication. For +detailed information about PyTorch Distributed security considerations, please +refer to the [PyTorch Security +Guide](https://github.com/pytorch/pytorch/security/policy#using-distributed-features). + +Key points from the PyTorch security guide: +- PyTorch Distributed features are intended for internal communication only +- They are not built for use in untrusted environments or networks +- No authorization protocol is included for performance reasons +- Messages are sent unencrypted +- Connections are accepted from anywhere without checks + +### Security Recommendations + +1. **Network Isolation:** + - Deploy vLLM nodes on a dedicated, isolated network + - Use network segmentation to prevent unauthorized access + - Implement appropriate firewall rules + +2. **Configuration Best Practices:** + - Always set `VLLM_HOST_IP` to a specific IP address rather than using defaults + - Configure firewalls to only allow necessary ports between nodes + +3. **Access Control:** + - Restrict physical and network access to the deployment environment + - Implement proper authentication and authorization for management interfaces + - Follow the principle of least privilege for all system components + +## Reporting Security Vulnerabilities + +If you believe you have found a security vulnerability in vLLM, please report it following the project's security policy. For more information on how to report security issues and the project's security policy, please see the [vLLM Security Policy](https://github.com/vllm-project/vllm/blob/main/SECURITY.md). diff --git a/docs/source/design/arch_overview.md b/docs/source/design/arch_overview.md index 7bed0a001d6..94bda8b5c58 100644 --- a/docs/source/design/arch_overview.md +++ b/docs/source/design/arch_overview.md @@ -52,8 +52,8 @@ for output in outputs: print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") ``` -More API details can be found in the {doc}`Offline Inference -` section of the API docs. +More API details can be found in the [Offline Inference] +(#offline-inference-api) section of the API docs. The code for the `LLM` class can be found in . diff --git a/docs/source/design/mm_processing.md b/docs/source/design/mm_processing.md index 0947c1da1e5..dc92a3c2c51 100644 --- a/docs/source/design/mm_processing.md +++ b/docs/source/design/mm_processing.md @@ -47,7 +47,7 @@ Moreover, since the tokenized text has not passed through the HF processor, we h ### Dummy text -We work around the first issue by requiring each model to define how to generate dummy text based on the number of multi-modal inputs, via {meth}`~vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_processor_inputs`. This lets us generate dummy text corresponding to the multi-modal inputs and input them together to obtain the processed multi-modal data. +We work around the first issue by requiring each model to define how to generate dummy text based on the number of multi-modal inputs, via {meth}`~vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_text`. This lets us generate dummy text corresponding to the multi-modal inputs and input them together to obtain the processed multi-modal data. (mm-automatic-prompt-updating)= diff --git a/docs/source/design/v1/metrics.md b/docs/source/design/v1/metrics.md index b3981b2dc24..7e7c8b925e2 100644 --- a/docs/source/design/v1/metrics.md +++ b/docs/source/design/v1/metrics.md @@ -66,8 +66,8 @@ vLLM also provides [a reference example](https://docs.vllm.ai/en/latest/getting_ The subset of metrics exposed in the Grafana dashboard gives us an indication of which metrics are especially important: - `vllm:e2e_request_latency_seconds_bucket` - End to end request latency measured in seconds -- `vllm:prompt_tokens_total` - Prompt Tokens/Sec -- `vllm:generation_tokens_total` - Generation Tokens/Sec +- `vllm:prompt_tokens_total` - Prompt Tokens +- `vllm:generation_tokens_total` - Generation Tokens - `vllm:time_per_output_token_seconds` - Inter token latency (Time Per Output Token, TPOT) in second. - `vllm:time_to_first_token_seconds` - Time to First Token (TTFT) latency in seconds. - `vllm:num_requests_running` (also, `_swapped` and `_waiting`) - Number of requests in RUNNING, WAITING, and SWAPPED state @@ -86,6 +86,17 @@ See [the PR which added this Dashboard](gh-pr:2316) for interesting and useful b Prometheus support was initially added [using the aioprometheus library](gh-pr:1890), but a switch was made quickly to [prometheus_client](gh-pr:2730). The rationale is discussed in both linked PRs. +With the switch to `aioprometheus`, we lost a `MetricsMiddleware` to track HTTP metrics, but this was reinstated [using prometheus_fastapi_instrumentator](gh-pr:15657): + +```bash +$ curl http://0.0.0.0:8000/metrics 2>/dev/null | grep -P '^http_(?!.*(_bucket|_created|_sum)).*' +http_requests_total{handler="/v1/completions",method="POST",status="2xx"} 201.0 +http_request_size_bytes_count{handler="/v1/completions"} 201.0 +http_response_size_bytes_count{handler="/v1/completions"} 201.0 +http_request_duration_highr_seconds_count 201.0 +http_request_duration_seconds_count{handler="/v1/completions",method="POST"} 201.0 +``` + ### Multi-process Mode In v0, metrics are collected in the engine core process and we use multi-process mode to make them available in the API server process. See . @@ -456,6 +467,9 @@ In general: hatch](https://kubernetes.io/docs/concepts/cluster-administration/system-metrics/#show-hidden-metrics) for some time before deleting them. +See the [deprecation policy](project:../../contributing/deprecation_policy.md) for +the project-wide deprecation policy. + ### Unimplemented - `vllm:tokens_total` Added by , but apparently never implemented. This can just be diff --git a/docs/source/design/v1/prefix_caching.md b/docs/source/design/v1/prefix_caching.md index ec1f3cb8d64..ec661d8ec64 100644 --- a/docs/source/design/v1/prefix_caching.md +++ b/docs/source/design/v1/prefix_caching.md @@ -16,7 +16,7 @@ In the example above, the KV cache in the first block can be uniquely identified * Parent hash value: The hash value of the parent hash block. * Block tokens: A tuple of tokens in this block. The reason to include the exact tokens is to reduce potential hash value collision. -* Extra hashes: Other values required to make this block unique, such as LoRA IDs and multi-modality input hashes (see the example below). +* Extra hashes: Other values required to make this block unique, such as LoRA IDs, multi-modality input hashes (see the example below), and cache salts to isolate caches in multi-tenant environments. > **Note 1:** We only cache full blocks. @@ -76,6 +76,24 @@ Block 3 In the rest of this document, we first introduce the data structure used for prefix caching in vLLM v1, followed by the prefix caching workflow of major KV cache operators (e.g., allocate, append, free, eviction). Finally, we use an example to illustrate the end to end prefix caching workflow. +**Cache Isolation for Security** +To improve privacy in shared environments, vLLM supports isolating prefix cache reuse through optional per-request salting. By including a `cache_salt` in the request, this value is injected into the hash of the first block, ensuring that only requests with the same salt can reuse cached KV blocks. This prevents timing-based attacks where an adversary could infer cached content by observing latency differences. This offers protection without compromising performance. + +```json +{ + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Here is a document with details about the world series: ..."}, + {"role": "user", "content": "Who won the world series in 2020?"} + ], + "cache_salt": "Z3V2bmV3aGxza3ZubGFoZ3Zud3V3ZWZ2bmd0b3V2bnZmc2xpZ3RoZ2x2aQ==" +} +``` + +With this setup, cache sharing is limited to users or requests that explicitly agree on a common salt, enabling cache reuse within a trust group while isolating others. + +> **Note:** Cache isolation is not supported in engine V0. + ## Data Structure The prefix caching in vLLM v1 is implemented in the KV cache manager. The basic building block is the “Block” data class (simplified): diff --git a/docs/source/design/v1/torch_compile.md b/docs/source/design/v1/torch_compile.md index 57dba680b97..7920131643c 100644 --- a/docs/source/design/v1/torch_compile.md +++ b/docs/source/design/v1/torch_compile.md @@ -99,7 +99,7 @@ This time, Inductor compilation is completely bypassed, and we will load from di The above example just uses Inductor to compile for a general shape (i.e. symbolic shape). We can also use Inductor to compile for some of the specific shapes, for example: -`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation_config "{'compile_sizes': [1, 2, 4, 8]}"` +`vllm serve meta-llama/Llama-3.2-1B --compilation_config "{'compile_sizes': [1, 2, 4, 8]}"` Then it will also compile a specific kernel just for batch size `1, 2, 4, 8`. At this time, all of the shapes in the computation graph are static and known, and we will turn on auto-tuning to tune for max performance. This can be slow when you run it for the first time, but the next time you run it, we can directly bypass the tuning and run the tuned kernel. @@ -134,6 +134,6 @@ The cudagraphs are captured and managed by the compiler backend, and replayed wh By default, vLLM will try to determine a set of sizes to capture cudagraph. You can also override it using the config `cudagraph_capture_sizes`: -`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation_config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"` +`vllm serve meta-llama/Llama-3.2-1B --compilation-config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"` Then it will only capture cudagraph for the specified sizes. It can be useful to have fine-grained control over the cudagraph capture. diff --git a/docs/source/features/compatibility_matrix.md b/docs/source/features/compatibility_matrix.md index 6056ca0d366..8865d26deae 100644 --- a/docs/source/features/compatibility_matrix.md +++ b/docs/source/features/compatibility_matrix.md @@ -42,7 +42,7 @@ Check the ❌ or 🟠 with links to see tracking issue for unsupported feature/h * [APC](#automatic-prefix-caching) * [LoRA](#lora-adapter) * prmpt adptr - * [SD](#spec_decode) + * [SD](#spec-decode) * CUDA graph * pooling * enc-dec @@ -122,7 +122,7 @@ Check the ❌ or 🟠 with links to see tracking issue for unsupported feature/h * * * -- * [SD](#spec_decode) +- * [SD](#spec-decode) * ✅ * ✅ * ❌ @@ -377,7 +377,7 @@ Check the ❌ or 🟠 with links to see tracking issue for unsupported feature/h * ✅ * [❌](gh-issue:8475) * ✅ -- * [SD](#spec_decode) +- * [SD](#spec-decode) * ✅ * ✅ * ✅ diff --git a/docs/source/features/disagg_prefill.md b/docs/source/features/disagg_prefill.md index 52d253b9c2b..2fa20140c08 100644 --- a/docs/source/features/disagg_prefill.md +++ b/docs/source/features/disagg_prefill.md @@ -21,11 +21,11 @@ Disaggregated prefill DOES NOT improve throughput. ## Usage example -Please refer to `examples/online_serving/disaggregated_prefill.sh` for the example usage of disaggregated prefilling. +Please refer to for the example usage of disaggregated prefilling. ## Benchmarks -Please refer to `benchmarks/disagg_benchmarks/` for disaggregated prefilling benchmarks. +Please refer to for disaggregated prefilling benchmarks. ## Development diff --git a/docs/source/features/lora.md b/docs/source/features/lora.md index a71da72e436..b5b51095b3a 100644 --- a/docs/source/features/lora.md +++ b/docs/source/features/lora.md @@ -106,19 +106,18 @@ curl http://localhost:8000/v1/completions \ ## Dynamically serving LoRA Adapters -In addition to serving LoRA adapters at server startup, the vLLM server now supports dynamically loading and unloading -LoRA adapters at runtime through dedicated API endpoints. This feature can be particularly useful when the flexibility -to change models on-the-fly is needed. +In addition to serving LoRA adapters at server startup, the vLLM server supports dynamically configuring LoRA adapters at runtime through dedicated API endpoints and plugins. This feature can be particularly useful when the flexibility to change models on-the-fly is needed. Note: Enabling this feature in production environments is risky as users may participate in model adapter management. -To enable dynamic LoRA loading and unloading, ensure that the environment variable `VLLM_ALLOW_RUNTIME_LORA_UPDATING` -is set to `True`. When this option is enabled, the API server will log a warning to indicate that dynamic loading is active. +To enable dynamic LoRA configuration, ensure that the environment variable `VLLM_ALLOW_RUNTIME_LORA_UPDATING` +is set to `True`. ```bash export VLLM_ALLOW_RUNTIME_LORA_UPDATING=True ``` +### Using API Endpoints Loading a LoRA Adapter: To dynamically load a LoRA adapter, send a POST request to the `/v1/load_lora_adapter` endpoint with the necessary @@ -153,6 +152,58 @@ curl -X POST http://localhost:8000/v1/unload_lora_adapter \ }' ``` +### Using Plugins +Alternatively, you can use the LoRAResolver plugin to dynamically load LoRA adapters. LoRAResolver plugins enable you to load LoRA adapters from both local and remote sources such as local file system and S3. On every request, when there's a new model name that hasn't been loaded yet, the LoRAResolver will try to resolve and load the corresponding LoRA adapter. + +You can set up multiple LoRAResolver plugins if you want to load LoRA adapters from different sources. For example, you might have one resolver for local files and another for S3 storage. vLLM will load the first LoRA adapter that it finds. + +You can either install existing plugins or implement your own. + +Steps to implement your own LoRAResolver plugin: +1. Implement the LoRAResolver interface. + + Example of a simple S3 LoRAResolver implementation: + + ```python + import os + import s3fs + from vllm.lora.request import LoRARequest + from vllm.lora.resolver import LoRAResolver + + class S3LoRAResolver(LoRAResolver): + def __init__(self): + self.s3 = s3fs.S3FileSystem() + self.s3_path_format = os.getenv("S3_PATH_TEMPLATE") + self.local_path_format = os.getenv("LOCAL_PATH_TEMPLATE") + + async def resolve_lora(self, base_model_name, lora_name): + s3_path = self.s3_path_format.format(base_model_name=base_model_name, lora_name=lora_name) + local_path = self.local_path_format.format(base_model_name=base_model_name, lora_name=lora_name) + + # Download the LoRA from S3 to the local path + await self.s3._get( + s3_path, local_path, recursive=True, maxdepth=1 + ) + + lora_request = LoRARequest( + lora_name=lora_name, + lora_path=local_path, + lora_int_id=abs(hash(lora_name)) + ) + return lora_request + ``` + +2. Register LoRAResolver plugin. + + ```python + from vllm.lora.resolver import LoRAResolverRegistry + + s3_resolver = S3LoRAResolver() + LoRAResolverRegistry.register_resolver("s3_resolver", s3_resolver) + ``` + + For more details, refer to the [vLLM's Plugins System](../design/plugin_system.md). + ## New format for `--lora-modules` In the previous version, users would provide LoRA modules via the following format, either as a key-value pair or in JSON format. For example: diff --git a/docs/source/features/quantization/auto_awq.md b/docs/source/features/quantization/auto_awq.md index b703d019531..b4ac597f5a7 100644 --- a/docs/source/features/quantization/auto_awq.md +++ b/docs/source/features/quantization/auto_awq.md @@ -6,13 +6,13 @@ To create a new 4-bit quantized model, you can leverage [AutoAWQ](https://github Quantization reduces the model's precision from BF16/FP16 to INT4 which effectively reduces the total model memory footprint. The main benefits are lower latency and memory usage. -You can quantize your own models by installing AutoAWQ or picking one of the [6500+ models on Huggingface](https://huggingface.co/models?sort=trending&search=awq). +You can quantize your own models by installing AutoAWQ or picking one of the [6500+ models on Huggingface](https://huggingface.co/models?search=awq). ```console pip install autoawq ``` -After installing AutoAWQ, you are ready to quantize a model. Please refer to the `AutoAWQ documentation `_ for further details. Here is an example of how to quantize `mistralai/Mistral-7B-Instruct-v0.2`: +After installing AutoAWQ, you are ready to quantize a model. Please refer to the [AutoAWQ documentation](https://casper-hansen.github.io/AutoAWQ/examples/#basic-quantization) for further details. Here is an example of how to quantize `mistralai/Mistral-7B-Instruct-v0.2`: ```python from awq import AutoAWQForCausalLM diff --git a/docs/source/features/quantization/bitblas.md b/docs/source/features/quantization/bitblas.md new file mode 100644 index 00000000000..d0b2bf858c9 --- /dev/null +++ b/docs/source/features/quantization/bitblas.md @@ -0,0 +1,48 @@ +(bitblas)= + +# BitBLAS + +vLLM now supports [BitBLAS](https://github.com/microsoft/BitBLAS) for more efficient and flexible model inference. Compared to other quantization frameworks, BitBLAS provides more precision combinations. + +:::{note} +Ensure your hardware supports the selected `dtype` (`torch.bfloat16` or `torch.float16`). +Most recent NVIDIA GPUs support `float16`, while `bfloat16` is more common on newer architectures like Ampere or Hopper. +For details see [supported hardware](https://docs.vllm.ai/en/latest/features/quantization/supported_hardware.html). +::: + +Below are the steps to utilize BitBLAS with vLLM. + +```console +pip install bitblas>=0.1.0 +``` + +vLLM reads the model's config file and supports pre-quantized checkpoints. + +You can find pre-quantized models on: + +- [Hugging Face (BitBLAS)](https://huggingface.co/models?search=bitblas) +- [Hugging Face (GPTQ)](https://huggingface.co/models?search=gptq) + +Usually, these repositories have a `quantize_config.json` file that includes a `quantization_config` section. + +## Read bitblas format checkpoint + +```python +from vllm import LLM +import torch + +# "hxbgsyxh/llama-13b-4bit-g-1-bitblas" is a pre-quantized checkpoint. +model_id = "hxbgsyxh/llama-13b-4bit-g-1-bitblas" +llm = LLM(model=model_id, dtype=torch.bfloat16, trust_remote_code=True, quantization="bitblas") +``` + +## Read gptq format checkpoint + +```python +from vllm import LLM +import torch + +# "hxbgsyxh/llama-13b-4bit-g-1" is a pre-quantized checkpoint. +model_id = "hxbgsyxh/llama-13b-4bit-g-1" +llm = LLM(model=model_id, dtype=torch.float16, trust_remote_code=True, quantization="bitblas", max_model_len=1024) +``` diff --git a/docs/source/features/quantization/bnb.md b/docs/source/features/quantization/bnb.md index e356b99d85c..1843a33a3df 100644 --- a/docs/source/features/quantization/bnb.md +++ b/docs/source/features/quantization/bnb.md @@ -14,7 +14,7 @@ pip install bitsandbytes>=0.45.3 vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint. -You can find bitsandbytes quantized models on . +You can find bitsandbytes quantized models on . And usually, these repositories have a config.json file that includes a quantization_config section. ## Read quantized checkpoint diff --git a/docs/source/features/quantization/fp8.md b/docs/source/features/quantization/fp8.md index a62e0124b77..95e105357bd 100644 --- a/docs/source/features/quantization/fp8.md +++ b/docs/source/features/quantization/fp8.md @@ -30,6 +30,7 @@ from vllm import LLM model = LLM("facebook/opt-125m", quantization="fp8") # INFO 06-10 17:55:42 model_runner.py:157] Loading model weights took 0.1550 GB result = model.generate("Hello, my name is") +print(result[0].outputs[0].text) ``` :::{warning} @@ -44,6 +45,12 @@ To produce performant FP8 quantized models with vLLM, you'll need to install the pip install llmcompressor ``` +Additionally, install `vllm` and `lm-evaluation-harness` for evaluation: + +```console +pip install vllm lm-eval==0.4.4 +``` + ## Quantization Process The quantization process involves three main steps: @@ -86,7 +93,7 @@ recipe = QuantizationModifier( # Apply the quantization algorithm. oneshot(model=model, recipe=recipe) -# Save the model. +# Save the model: Meta-Llama-3-8B-Instruct-FP8-Dynamic SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic" model.save_pretrained(SAVE_DIR) tokenizer.save_pretrained(SAVE_DIR) @@ -94,18 +101,13 @@ tokenizer.save_pretrained(SAVE_DIR) ### 3. Evaluating Accuracy -Install `vllm` and `lm-evaluation-harness`: - -```console -pip install vllm lm-eval==0.4.4 -``` - Load and run the model in `vllm`: ```python from vllm import LLM model = LLM("./Meta-Llama-3-8B-Instruct-FP8-Dynamic") -model.generate("Hello my name is") +result = model.generate("Hello my name is") +print(result[0].outputs[0].text) ``` Evaluate accuracy with `lm_eval` (for example on 250 samples of `gsm8k`): @@ -188,4 +190,5 @@ from vllm import LLM model = LLM(model="Meta-Llama-3-8B-Instruct-FP8/") # INFO 06-10 21:15:41 model_runner.py:159] Loading model weights took 8.4596 GB result = model.generate("Hello, my name is") +print(result[0].outputs[0].text) ``` diff --git a/docs/source/features/quantization/gptqmodel.md b/docs/source/features/quantization/gptqmodel.md index 34adf6512b7..9771d5a4fe9 100644 --- a/docs/source/features/quantization/gptqmodel.md +++ b/docs/source/features/quantization/gptqmodel.md @@ -16,12 +16,16 @@ GPTQModel is one of the few quantization toolkits in the world that allows `Dyna is fully integrated into vLLM and backed up by support from the ModelCloud.AI team. Please refer to [GPTQModel readme](https://github.com/ModelCloud/GPTQModel?tab=readme-ov-file#dynamic-quantization-per-module-quantizeconfig-override) for more details on this and other advanced features. -You can quantize your own models by installing [GPTQModel](https://github.com/ModelCloud/GPTQModel) or picking one of the [5000+ models on Huggingface](https://huggingface.co/models?sort=trending&search=gptq). +## Installation + +You can quantize your own models by installing [GPTQModel](https://github.com/ModelCloud/GPTQModel) or picking one of the [5000+ models on Huggingface](https://huggingface.co/models?search=gptq). ```console pip install -U gptqmodel --no-build-isolation -v ``` +## Quantizing a model + After installing GPTQModel, you are ready to quantize a model. Please refer to the [GPTQModel readme](https://github.com/ModelCloud/GPTQModel/?tab=readme-ov-file#quantization) for further details. Here is an example of how to quantize `meta-llama/Llama-3.2-1B-Instruct`: @@ -49,12 +53,16 @@ model.quantize(calibration_dataset, batch_size=2) model.save(quant_path) ``` +## Running a quantized model with vLLM + To run an GPTQModel quantized model with vLLM, you can use [DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2](https://huggingface.co/ModelCloud/DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2) with the following command: ```console -python examples/offline_inference/llm_engine_example.py --model DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2 +python examples/offline_inference/llm_engine_example.py --model ModelCloud/DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2 ``` +## Using GPTQModel with vLLM's Python API + GPTQModel quantized models are also supported directly through the LLM entrypoint: ```python @@ -67,17 +75,22 @@ prompts = [ "The capital of France is", "The future of AI is", ] + # Create a sampling params object. sampling_params = SamplingParams(temperature=0.6, top_p=0.9) # Create an LLM. -llm = LLM(model="DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2") +llm = LLM(model="ModelCloud/DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2") + # Generate texts from the prompts. The output is a list of RequestOutput objects # that contain the prompt, generated text, and other information. outputs = llm.generate(prompts, sampling_params) + # Print the outputs. +print("-"*50) for output in outputs: prompt = output.prompt generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}") + print("-"*50) ``` diff --git a/docs/source/features/quantization/index.md b/docs/source/features/quantization/index.md index 6f539f6e3f4..7ad46b7094e 100644 --- a/docs/source/features/quantization/index.md +++ b/docs/source/features/quantization/index.md @@ -11,11 +11,13 @@ Quantization trades off model precision for smaller memory footprint, allowing l supported_hardware auto_awq bnb +bitblas gguf gptqmodel int4 int8 fp8 +modelopt quark quantized_kvcache torchao diff --git a/docs/source/features/quantization/int4.md b/docs/source/features/quantization/int4.md index f8939e5bf01..be48788a4ef 100644 --- a/docs/source/features/quantization/int4.md +++ b/docs/source/features/quantization/int4.md @@ -18,6 +18,12 @@ To use INT4 quantization with vLLM, you'll need to install the [llm-compressor]( pip install llmcompressor ``` +Additionally, install `vllm` and `lm-evaluation-harness` for evaluation: + +```console +pip install vllm lm-eval==0.4.4 +``` + ## Quantization Process The quantization process involves four main steps: @@ -87,7 +93,7 @@ oneshot( num_calibration_samples=NUM_CALIBRATION_SAMPLES, ) -# Save the compressed model +# Save the compressed model: Meta-Llama-3-8B-Instruct-W4A16-G128 SAVE_DIR = MODEL_ID.split("/")[1] + "-W4A16-G128" model.save_pretrained(SAVE_DIR, save_compressed=True) tokenizer.save_pretrained(SAVE_DIR) diff --git a/docs/source/features/quantization/int8.md b/docs/source/features/quantization/int8.md index b381f34bccd..d6ddca18e26 100644 --- a/docs/source/features/quantization/int8.md +++ b/docs/source/features/quantization/int8.md @@ -19,6 +19,12 @@ To use INT8 quantization with vLLM, you'll need to install the [llm-compressor]( pip install llmcompressor ``` +Additionally, install `vllm` and `lm-evaluation-harness` for evaluation: + +```console +pip install vllm lm-eval==0.4.4 +``` + ## Quantization Process The quantization process involves four main steps: @@ -91,7 +97,7 @@ oneshot( num_calibration_samples=NUM_CALIBRATION_SAMPLES, ) -# Save the compressed model +# Save the compressed model: Meta-Llama-3-8B-Instruct-W8A8-Dynamic-Per-Token SAVE_DIR = MODEL_ID.split("/")[1] + "-W8A8-Dynamic-Per-Token" model.save_pretrained(SAVE_DIR, save_compressed=True) tokenizer.save_pretrained(SAVE_DIR) diff --git a/docs/source/features/quantization/modelopt.md b/docs/source/features/quantization/modelopt.md new file mode 100644 index 00000000000..001d18657da --- /dev/null +++ b/docs/source/features/quantization/modelopt.md @@ -0,0 +1,78 @@ +# NVIDIA TensorRT Model Optimizer + +The [NVIDIA TensorRT Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer) is a library designed to optimize models for inference with NVIDIA GPUs. It includes tools for Post-Training Quantization (PTQ) and Quantization Aware Training (QAT) of Large Language Models (LLMs), Vision Language Models (VLMs), and diffusion models. + +We recommend installing the library with: + +```console +pip install nvidia-modelopt +``` + +## Quantizing HuggingFace Models with PTQ + +You can quantize HuggingFace models using the example scripts provided in the TensorRT Model Optimizer repository. The primary script for LLM PTQ is typically found within the `examples/llm_ptq` directory. + +Below is an example showing how to quantize a model using modelopt's PTQ API: + +```python +import modelopt.torch.quantization as mtq +from transformers import AutoModelForCausalLM + +# Load the model from HuggingFace +model = AutoModelForCausalLM.from_pretrained("") + +# Select the quantization config, for example, FP8 +config = mtq.FP8_DEFAULT_CFG + +# Define a forward loop function for calibration +def forward_loop(model): + for data in calib_set: + model(data) + +# PTQ with in-place replacement of quantized modules +model = mtq.quantize(model, config, forward_loop) +``` + +After the model is quantized, you can export it to a quantized checkpoint using the export API: + +```python +import torch +from modelopt.torch.export import export_hf_checkpoint + +with torch.inference_mode(): + export_hf_checkpoint( + model, # The quantized model. + export_dir, # The directory where the exported files will be stored. + ) +``` + +The quantized checkpoint can then be deployed with vLLM. As an example, the following code shows how to deploy `nvidia/Llama-3.1-8B-Instruct-FP8`, which is the FP8 quantized checkpoint derived from `meta-llama/Llama-3.1-8B-Instruct`, using vLLM: + +```python +from vllm import LLM, SamplingParams + +def main(): + + model_id = "nvidia/Llama-3.1-8B-Instruct-FP8" + # Ensure you specify quantization='modelopt' when loading the modelopt checkpoint + llm = LLM(model=model_id, quantization="modelopt", trust_remote_code=True) + + sampling_params = SamplingParams(temperature=0.8, top_p=0.9) + + prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", + ] + + outputs = llm.generate(prompts, sampling_params) + + for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + +if __name__ == "__main__": + main() +``` diff --git a/docs/source/features/quantization/quantized_kvcache.md b/docs/source/features/quantization/quantized_kvcache.md index 9f36c2949e0..86e6354ec82 100644 --- a/docs/source/features/quantization/quantized_kvcache.md +++ b/docs/source/features/quantization/quantized_kvcache.md @@ -126,7 +126,7 @@ oneshot( num_calibration_samples=NUM_CALIBRATION_SAMPLES, ) -# Save quantized model +# Save quantized model: Llama-3.1-8B-Instruct-FP8-KV SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-KV" model.save_pretrained(SAVE_DIR, save_compressed=True) tokenizer.save_pretrained(SAVE_DIR) diff --git a/docs/source/features/quantization/quark.md b/docs/source/features/quantization/quark.md index 935ee37a815..955890dbc75 100644 --- a/docs/source/features/quantization/quark.md +++ b/docs/source/features/quantization/quark.md @@ -19,6 +19,12 @@ pip install amd-quark You can refer to [Quark installation guide](https://quark.docs.amd.com/latest/install.html) for more installation details. +Additionally, install `vllm` and `lm-evaluation-harness` for evaluation: + +```console +pip install vllm lm-eval==0.4.4 +``` + ## Quantization Process After installing Quark, we will use an example to illustrate how to use Quark. @@ -150,6 +156,7 @@ LLAMA_KV_CACHE_GROUP = ["*k_proj", "*v_proj"] export_config = ExporterConfig(json_export_config=JsonExporterConfig()) export_config.json_export_config.kv_cache_group = LLAMA_KV_CACHE_GROUP +# Model: Llama-2-70b-chat-hf-w-fp8-a-fp8-kvcache-fp8-pertensor-autosmoothquant EXPORT_DIR = MODEL_ID.split("/")[1] + "-w-fp8-a-fp8-kvcache-fp8-pertensor-autosmoothquant" exporter = ModelExporter(config=export_config, export_dir=EXPORT_DIR) with torch.no_grad(): diff --git a/docs/source/features/quantization/supported_hardware.md b/docs/source/features/quantization/supported_hardware.md index 2cbe8779dd8..f8af1ba60b1 100644 --- a/docs/source/features/quantization/supported_hardware.md +++ b/docs/source/features/quantization/supported_hardware.md @@ -74,6 +74,17 @@ The table below shows the compatibility of various quantization implementations * ❌ * ❌ * ❌ +- * BitBLAS (GPTQ) + * ✅︎ + * ✅︎ + * ✅︎ + * ✅︎ + * ✅︎ + * ❌ + * ❌ + * ❌ + * ❌ + * ❌ - * AQLM * ✅︎ * ✅︎ @@ -118,7 +129,17 @@ The table below shows the compatibility of various quantization implementations * ❌ * ❌ * ❌ - +- * modelopt + * ✅︎ + * ✅︎ + * ✅︎ + * ✅︎ + * ✅︎︎ + * ❌ + * ❌ + * ❌ + * ❌ + * ❌ ::: - Volta refers to SM 7.0, Turing to SM 7.5, Ampere to SM 8.0/8.6, Ada to SM 8.9, and Hopper to SM 9.0. diff --git a/docs/source/features/quantization/torchao.md b/docs/source/features/quantization/torchao.md index 9a85f0bab9e..82100c6ddca 100644 --- a/docs/source/features/quantization/torchao.md +++ b/docs/source/features/quantization/torchao.md @@ -30,5 +30,4 @@ tokenizer.push_to_hub(hub_repo) quantized_model.push_to_hub(hub_repo, safe_serialization=False) ``` -Alternatively, you can use the TorchAO Quantization space for quantizing models with a simple UI. -See: https://huggingface.co/spaces/medmekk/TorchAO_Quantization +Alternatively, you can use the [TorchAO Quantization space](https://huggingface.co/spaces/medmekk/TorchAO_Quantization) for quantizing models with a simple UI. diff --git a/docs/source/features/reasoning_outputs.md b/docs/source/features/reasoning_outputs.md index 3a0be69f8e1..a079eb8b77e 100644 --- a/docs/source/features/reasoning_outputs.md +++ b/docs/source/features/reasoning_outputs.md @@ -15,16 +15,16 @@ vLLM currently supports the following reasoning models: | [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `guided_json`, `guided_regex` | ❌ | | [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) | `deepseek_r1` | `guided_json`, `guided_regex` | ✅ | | [IBM Granite 3.2 language models](https://huggingface.co/collections/ibm-granite/granite-32-language-models-67b3bc8c13508f6d064cff9a) | `granite` | ❌ | ❌ | +| [Qwen3 series](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | `qwen3` | `guided_json`, `guided_regex` | ✅ | - IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass `thinking=True` in your `chat_template_kwargs`. ## Quickstart -To use reasoning models, you need to specify the `--enable-reasoning` and `--reasoning-parser` flags when making a request to the chat completion endpoint. The `--reasoning-parser` flag specifies the reasoning parser to use for extracting reasoning content from the model output. +To use reasoning models, you need to specify the `--reasoning-parser` flags when making a request to the chat completion endpoint. The `--reasoning-parser` flag specifies the reasoning parser to use for extracting reasoning content from the model output. ```bash -vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \ - --enable-reasoning --reasoning-parser deepseek_r1 +vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --reasoning-parser deepseek_r1 ``` Next, make a request to the model that should return the reasoning content in the response. @@ -139,8 +139,7 @@ Remember to check whether the `reasoning_content` exists in the response before The reasoning content is also available in the structured output. The structured output engine like `xgrammar` will use the reasoning content to generate structured output. It is only supported in v0 engine now. ```bash -VLLM_USE_V1=0 vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \ - --enable-reasoning --reasoning-parser deepseek_r1 +VLLM_USE_V1=0 vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --reasoning-parser deepseek_r1 ``` Please note that the `VLLM_USE_V1` environment variable must be set to `0` to use the v0 engine. @@ -315,9 +314,8 @@ class DeepSeekReasoner(Reasoner): The structured output engine like `xgrammar` will use `end_token_id` to check if the reasoning content is present in the model output and skip the structured output if it is the case. -Finally, you can enable reasoning for the model by using the `--enable-reasoning` and `--reasoning-parser` flags. +Finally, you can enable reasoning for the model by using the `--reasoning-parser` flags. ```bash -vllm serve \ - --enable-reasoning --reasoning-parser example +vllm serve --reasoning-parser example ``` diff --git a/docs/source/features/structured_outputs.md b/docs/source/features/structured_outputs.md index de3c5bf5e7a..03119ec7441 100644 --- a/docs/source/features/structured_outputs.md +++ b/docs/source/features/structured_outputs.md @@ -2,8 +2,11 @@ # Structured Outputs -vLLM supports the generation of structured outputs using [outlines](https://github.com/dottxt-ai/outlines), [lm-format-enforcer](https://github.com/noamgat/lm-format-enforcer), or [xgrammar](https://github.com/mlc-ai/xgrammar) as backends for the guided decoding. -This document shows you some examples of the different options that are available to generate structured outputs. +vLLM supports the generation of structured outputs using +[xgrammar](https://github.com/mlc-ai/xgrammar) or +[guidance](https://github.com/guidance-ai/llguidance) as backends. +This document shows you some examples of the different options that are +available to generate structured outputs. ## Online Serving (OpenAI API) @@ -15,10 +18,17 @@ The following parameters are supported, which must be added as extra parameters: - `guided_regex`: the output will follow the regex pattern. - `guided_json`: the output will follow the JSON schema. - `guided_grammar`: the output will follow the context free grammar. -- `guided_whitespace_pattern`: used to override the default whitespace pattern for guided json decoding. -- `guided_decoding_backend`: used to select the guided decoding backend to use. Additional backend-specific options can be supplied in a comma separated list following a colon after the backend name. For example `"xgrammar:no-fallback"` will not allow vLLM to fallback to a different backend on error. +- `structural_tag`: Follow a JSON schema within a set of specified tags within the generated text. -You can see the complete list of supported parameters on the [OpenAI-Compatible Server](#openai-compatible-server)page. +You can see the complete list of supported parameters on the [OpenAI-Compatible Server](#openai-compatible-server) page. + +Structured outputs are supported by default in the OpenAI-Compatible Server. You +may choose to specify the backend to use by setting the +`--guided-decoding-backend` flag to `vllm serve`. The default backend is `auto`, +which will try to choose an appropriate backend based on the details of the +request. You may also choose a specific backend, along with +some options. A full set of options is available in the `vllm serve --help` +text. Now let´s see an example for each of the cases, starting with the `guided_choice`, as it´s the easiest one: @@ -50,7 +60,7 @@ completion = client.chat.completions.create( "content": "Generate an example email address for Alan Turing, who works in Enigma. End in .com and new line. Example result: alan.turing@enigma.com\n", } ], - extra_body={"guided_regex": "\w+@\w+\.com\n", "stop": ["\n"]}, + extra_body={"guided_regex": r"\w+@\w+\.com\n", "stop": ["\n"]}, ) print(completion.choices[0].message.content) ``` @@ -96,26 +106,29 @@ print(completion.choices[0].message.content) ``` :::{tip} -While not strictly necessary, normally it´s better to indicate in the prompt that a JSON needs to be generated and which fields and how should the LLM fill them. -This can improve the results notably in most cases. +While not strictly necessary, normally it´s better to indicate in the prompt the +JSON schema and how the fields should be populated. This can improve the +results notably in most cases. ::: -Finally we have the `guided_grammar`, which probably is the most difficult one to use but it´s really powerful, as it allows us to define complete languages like SQL queries. -It works by using a context free EBNF grammar, which for example we can use to define a specific format of simplified SQL queries, like in the example below: +Finally we have the `guided_grammar` option, which is probably the most +difficult to use, but it´s really powerful. It allows us to define complete +languages like SQL queries. It works by using a context free EBNF grammar. +As an example, we can use to define a specific format of simplified SQL queries: ```python simplified_sql_grammar = """ - ?start: select_statement + root ::= select_statement - ?select_statement: "SELECT " column_list " FROM " table_name + select_statement ::= "SELECT " column " from " table " where " condition - ?column_list: column_name ("," column_name)* + column ::= "col_1 " | "col_2 " - ?table_name: identifier + table ::= "table_1 " | "table_2 " - ?column_name: identifier + condition ::= column "= " number - ?identifier: /[a-zA-Z_][a-zA-Z0-9_]*/ + number ::= "1 " | "2 " """ completion = client.chat.completions.create( @@ -226,6 +239,8 @@ Step #2: explanation="Next, let's isolate 'x' by dividing both sides of the equa Answer: x = -29/8 ``` +An example of using `structural_tag` can be found here: + ## Offline Inference Offline inference allows for the same types of guided decoding. @@ -236,11 +251,11 @@ The main available options inside `GuidedDecodingParams` are: - `regex` - `choice` - `grammar` -- `backend` -- `whitespace_pattern` +- `structural_tag` -These parameters can be used in the same way as the parameters from the Online Serving examples above. -One example for the usage of the `choices` parameter is shown below: +These parameters can be used in the same way as the parameters from the Online +Serving examples above. One example for the usage of the `choice` parameter is +shown below: ```python from vllm import LLM, SamplingParams diff --git a/docs/source/features/tool_calling.md b/docs/source/features/tool_calling.md index 8b8bbd28d34..f98ec6108ce 100644 --- a/docs/source/features/tool_calling.md +++ b/docs/source/features/tool_calling.md @@ -152,12 +152,14 @@ Recommended flags: `--tool-call-parser mistral --chat-template examples/tool_cha Supported models: -* `meta-llama/Meta-Llama-3.1-8B-Instruct` -* `meta-llama/Meta-Llama-3.1-70B-Instruct` -* `meta-llama/Meta-Llama-3.1-405B-Instruct` -* `meta-llama/Meta-Llama-3.1-405B-Instruct-FP8` +All Llama 3.1, 3.2 and 4 models should be supported. + +* `meta-llama/Llama-3.1-*` +* `meta-llama/Llama-3.2-*` +* `meta-llama/Llama-4-*` + +The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) introduced by the Llama-3.2 models, see the `pythonic` tool parser below. -The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) in Llama-3.2 models, see the `pythonic` tool parser below. Other tool calling formats like the built in python tool calling or custom tool calling are not supported. Known issues: @@ -166,10 +168,20 @@ Known issues: 2. The model can generate parameters with a wrong format, such as generating an array serialized as string instead of an array. -The `tool_chat_template_llama3_json.jinja` file contains the "official" Llama chat template, but tweaked so that -it works better with vLLM. +VLLM provides two JSON based chat templates for Llama 3.1 and 3.2: + +* `examples/tool_chat_template_llama3.1_json.jinja` - this is the "official" chat template for the Llama 3.1 +models, but tweaked so that it works better with vLLM. +* `examples/tool_chat_template_llama3.2_json.jinja` - this extends upon the Llama 3.1 chat template by adding support for +images. + +Recommended flags: `--tool-call-parser llama3_json --chat-template {see_above}` + +VLLM also provides a JSON based chat template for Llama 4: +* `examples/tool_chat_template_llama4_json.jinja` - this is based on the "official" chat template for the Llama 4 +models, but tweaked so that it works better with vLLM. -Recommended flags: `--tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3_json.jinja` +For Llama 4 use `--tool-call-parser llama4_json examples/tool_chat_template_llama4_json.jinja`. #### IBM Granite diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md index e3046f35ee1..78938de317c 100644 --- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md @@ -13,11 +13,11 @@ There are no pre-built wheels or images for this device, so you must build vLLM - Intel Gaudi accelerator - Intel Gaudi software version 1.18.0 -Please follow the instructions provided in the [Gaudi Installation -Guide](https://docs.habana.ai/en/latest/Installation_Guide/index.html) +Please follow the instructions provided in the +[Gaudi Installation Guide](https://docs.habana.ai/en/latest/Installation_Guide/index.html) to set up the execution environment. To achieve the best performance, -please follow the methods outlined in the [Optimizing Training Platform -Guide](https://docs.habana.ai/en/latest/PyTorch/Model_Optimization_PyTorch/Optimization_in_Training_Platform.html). +please follow the methods outlined in the +[Optimizing Training Platform Guide](https://docs.habana.ai/en/latest/PyTorch/Model_Optimization_PyTorch/Optimization_in_Training_Platform.html). ## Configure a new environment @@ -32,15 +32,13 @@ pip list | grep habana # verify that habana-torch-plugin, habana-torch-dataloade pip list | grep neural # verify that neural_compressor is installed ``` -Refer to [Intel Gaudi Software Stack -Verification](https://docs.habana.ai/en/latest/Installation_Guide/SW_Verification.html#platform-upgrade) +Refer to [Intel Gaudi Software Stack Verification](https://docs.habana.ai/en/latest/Installation_Guide/SW_Verification.html#platform-upgrade) for more details. ### Run Docker Image It is highly recommended to use the latest Docker image from Intel Gaudi -vault. Refer to the [Intel Gaudi -documentation](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#pull-prebuilt-containers) +vault. Refer to the [Intel Gaudi documentation](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#pull-prebuilt-containers) for more details. Use the following commands to run a Docker image: @@ -278,8 +276,9 @@ Lower value corresponds to less usable graph memory reserved for prefill stage, ::: User can also configure the strategy for capturing HPU Graphs for prompt and decode stages separately. Strategy affects the order of capturing graphs. There are two strategies implemented: -\- `max_bs` - graph capture queue will sorted in descending order by their batch sizes. Buckets with equal batch sizes are sorted by sequence length in ascending order (e.g. `(64, 128)`, `(64, 256)`, `(32, 128)`, `(32, 256)`, `(1, 128)`, `(1,256)`), default strategy for decode -\- `min_tokens` - graph capture queue will be sorted in ascending order by the number of tokens each graph processes (`batch_size*sequence_length`), default strategy for prompt + +- `max_bs` - graph capture queue will sorted in descending order by their batch sizes. Buckets with equal batch sizes are sorted by sequence length in ascending order (e.g. `(64, 128)`, `(64, 256)`, `(32, 128)`, `(32, 256)`, `(1, 128)`, `(1,256)`), default strategy for decode +- `min_tokens` - graph capture queue will be sorted in ascending order by the number of tokens each graph processes (`batch_size*sequence_length`), default strategy for prompt When there's large amount of requests pending, vLLM scheduler will attempt to fill the maximum batch size for decode as soon as possible. When a request is finished, decode batch size decreases. When that happens, vLLM will attempt to schedule a prefill iteration for requests in the waiting queue, to fill the decode batch size to its previous state. This means that in a full load scenario, decode batch size is often at its maximum, which makes large batch size HPU Graphs crucial to capture, as reflected by `max_bs` strategy. On the other hand, prefills will be executed most frequently with very low batch sizes (1-4), which is reflected in `min_tokens` strategy. @@ -326,8 +325,7 @@ INFO 08-02 17:38:43 hpu_executor.py:91] init_cache_engine took 37.92 GiB of devi - We recommend running inference on Gaudi 2 with `block_size` of 128 for BF16 data type. Using default values (16, 32) might lead to sub-optimal performance due to Matrix Multiplication Engine - under-utilization (see [Gaudi - Architecture](https://docs.habana.ai/en/latest/Gaudi_Overview/Gaudi_Architecture.html)). + under-utilization (see [Gaudi Architecture](https://docs.habana.ai/en/latest/Gaudi_Overview/Gaudi_Architecture.html)). - For max throughput on Llama 7B, we recommend running with batch size of 128 or 256 and max context length of 2048 with HPU Graphs enabled. If you encounter out-of-memory issues, see troubleshooting section. @@ -336,11 +334,11 @@ INFO 08-02 17:38:43 hpu_executor.py:91] init_cache_engine took 37.92 GiB of devi **Diagnostic and profiling knobs:** -- `VLLM_PROFILER_ENABLED`: if `true`, high level profiler will be enabled. Resulting JSON traces can be viewed in [perfetto.habana.ai](https://perfetto.habana.ai/#!/viewer). Disabled by default. -- `VLLM_HPU_LOG_STEP_GRAPH_COMPILATION`: if `true`, will log graph compilations per each vLLM engine step, only when there was any - highly recommended to use alongside `PT_HPU_METRICS_GC_DETAILS=1`. Disabled by default. -- `VLLM_HPU_LOG_STEP_GRAPH_COMPILATION_ALL`: if `true`, will log graph compilations per each vLLM engine step, always, even if there were none. Disabled by default. -- `VLLM_HPU_LOG_STEP_CPU_FALLBACKS`: if `true`, will log cpu fallbacks per each vLLM engine step, only when there was any. Disabled by default. -- `VLLM_HPU_LOG_STEP_CPU_FALLBACKS_ALL`: if `true`, will log cpu fallbacks per each vLLM engine step, always, even if there were none. Disabled by default. +- `VLLM_PROFILER_ENABLED`: If `true`, enable the high level profiler. Resulting JSON traces can be viewed in [perfetto.habana.ai](https://perfetto.habana.ai/#!/viewer). `false` by default. +- `VLLM_HPU_LOG_STEP_GRAPH_COMPILATION`: If `true`, log graph compilations for each vLLM engine step when any occurs. Highly recommended to use with `PT_HPU_METRICS_GC_DETAILS=1`. `false` by default. +- `VLLM_HPU_LOG_STEP_GRAPH_COMPILATION_ALL`: If `true`, always log graph compilations for each vLLM engine step even if none occurred. `false` by default. +- `VLLM_HPU_LOG_STEP_CPU_FALLBACKS`: If `true`, log CPU fallbacks for each vLLM engine step when any occurs. `false` by default. +- `VLLM_HPU_LOG_STEP_CPU_FALLBACKS_ALL`: if `true`, always log CPU fallbacks for each vLLM engine step even if none occurred. `false` by default. **Performance tuning knobs:** @@ -381,7 +379,7 @@ INFO 08-02 17:38:43 hpu_executor.py:91] init_cache_engine took 37.92 GiB of devi Additionally, there are HPU PyTorch Bridge environment variables impacting vLLM execution: -- `PT_HPU_LAZY_MODE`: if `0`, PyTorch Eager backend for Gaudi will be used, if `1` PyTorch Lazy backend for Gaudi will be used, `1` is default +- `PT_HPU_LAZY_MODE`: if `0`, PyTorch Eager backend for Gaudi will be used; if `1`, PyTorch Lazy backend for Gaudi will be used. `1` is default. - `PT_HPU_ENABLE_LAZY_COLLECTIVES`: required to be `true` for tensor parallel inference with HPU Graphs ## Troubleshooting: tweaking HPU graphs diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md index beb803cf059..4459cc61e1c 100644 --- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md @@ -44,7 +44,7 @@ There are no pre-built wheels for this device, so you must either use the pre-bu You can provision Cloud TPUs using the [Cloud TPU API](https://cloud.google.com/tpu/docs/reference/rest) or the [queued resources](https://cloud.google.com/tpu/docs/queued-resources) -API. This section shows how to create TPUs using the queued resource API. For +API (preferred). This section shows how to create TPUs using the queued resource API. For more information about using the Cloud TPU API, see [Create a Cloud TPU using the Create Node API](https://cloud.google.com/tpu/docs/managing-tpus-tpu-vm#create-node-api). Queued resources enable you to request Cloud TPU resources in a queued manner. When you request queued resources, the request is added to a queue maintained by @@ -97,10 +97,10 @@ gcloud alpha compute tpus queued-resources create QUEUED_RESOURCE_ID \ `TPU regions and zones `_ - * ACCELERATOR_TYPE * The TPU version you want to use. Specify the TPU version, for example - `v5litepod-4` specifies a v5e TPU with 4 cores. For more information, - see `TPU versions `_. + `v5litepod-4` specifies a v5e TPU with 4 cores, `v6e-1` specifies a v6e TPU with 1 core. For more information, + see [TPU versions](https://cloud.devsite.corp.google.com/tpu/docs/system-architecture-tpu-vm#versions). - * RUNTIME_VERSION - * The TPU VM runtime version to use. For more information see `TPU VM images `_. + * The TPU VM runtime version to use. For example, use `v2-alpha-tpuv6e` for a VM loaded with one or more v6e TPU(s). For more information see [TPU VM images](https://cloud.google.com/tpu/docs/runtimes). - * SERVICE_ACCOUNT * The email address for your service account. You can find it in the IAM Cloud Console under *Service Accounts*. For example: @@ -158,7 +158,7 @@ sudo apt-get install libopenblas-base libopenmpi-dev libomp-dev Run the setup script: ```bash -VLLM_TARGET_DEVICE="tpu" python setup.py develop +VLLM_TARGET_DEVICE="tpu" python -m pip install -e . ``` ## Set up using Docker diff --git a/docs/source/getting_started/installation/cpu.md b/docs/source/getting_started/installation/cpu.md index db22ef79c92..2c0ec60d710 100644 --- a/docs/source/getting_started/installation/cpu.md +++ b/docs/source/getting_started/installation/cpu.md @@ -272,7 +272,7 @@ $ python examples/offline_inference/basic/basic.py - Decouple the HTTP serving components from the inference components. In a GPU backend configuration, the HTTP serving and tokenization tasks operate on the CPU, while inference runs on the GPU, which typically does not pose a problem. However, in a CPU-based setup, the HTTP serving and tokenization can cause significant context switching and reduced cache efficiency. Therefore, it is strongly recommended to segregate these two components for improved performance. -- On CPU based setup with NUMA enabled, the memory access performance may be largely impacted by the [topology](https://github.com/intel/intel-extension-for-pytorch/blob/main/docs/tutorials/performance_tuning/tuning_guide.inc.md#non-uniform-memory-access-numa). For NUMA architecture, Tensor Parallel is a option for better performance. +- On CPU based setup with NUMA enabled, the memory access performance may be largely impacted by the [topology](https://github.com/intel/intel-extension-for-pytorch/blob/main/docs/tutorials/performance_tuning/tuning_guide.md#non-uniform-memory-access-numa). For NUMA architecture, Tensor Parallel is a option for better performance. - Tensor Parallel is supported for serving and offline inferencing. In general each NUMA node is treated as one GPU card. Below is the example script to enable Tensor Parallel = 2 for serving: diff --git a/docs/source/getting_started/installation/cpu/build.inc.md b/docs/source/getting_started/installation/cpu/build.inc.md index 39d9dfbd2b2..f385f3d5b19 100644 --- a/docs/source/getting_started/installation/cpu/build.inc.md +++ b/docs/source/getting_started/installation/cpu/build.inc.md @@ -2,7 +2,7 @@ First, install recommended compiler. We recommend to use `gcc/g++ >= 12.3.0` as ```console sudo apt-get update -y -sudo apt-get install -y gcc-12 g++-12 libnuma-dev +sudo apt-get install -y gcc-12 g++-12 libnuma-dev python3-dev sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 ``` @@ -26,3 +26,9 @@ Finally, build and install vLLM CPU backend: ```console VLLM_TARGET_DEVICE=cpu python setup.py install ``` + +If you want to develop vllm, install it in editable mode instead. + +```console +VLLM_TARGET_DEVICE=cpu python setup.py develop +``` diff --git a/docs/source/getting_started/installation/gpu/cuda.inc.md b/docs/source/getting_started/installation/gpu/cuda.inc.md index d3e375aec10..06915f09dd5 100644 --- a/docs/source/getting_started/installation/gpu/cuda.inc.md +++ b/docs/source/getting_started/installation/gpu/cuda.inc.md @@ -1,6 +1,6 @@ # Installation -vLLM contains pre-compiled C++ and CUDA (12.1) binaries. +vLLM contains pre-compiled C++ and CUDA (12.6) binaries. ## Requirements @@ -23,12 +23,12 @@ Therefore, it is recommended to install vLLM with a **fresh new** environment. I You can install vLLM using either `pip` or `uv pip`: ```console -# Install vLLM with CUDA 12.4. +# Install vLLM with CUDA 12.6. pip install vllm # If you are using pip. uv pip install vllm # If you are using uv. ``` -As of now, vLLM's binaries are compiled with CUDA 12.4 and public PyTorch release versions by default. We also provide vLLM binaries compiled with CUDA 12.1, 11.8, and public PyTorch release versions: +As of now, vLLM's binaries are compiled with CUDA 12.6 and public PyTorch release versions by default. We also provide vLLM binaries compiled with CUDA 12.8, 11.8, and public PyTorch release versions: ```console # Install vLLM with CUDA 11.8. @@ -46,7 +46,7 @@ LLM inference is a fast-evolving field, and the latest code may contain bug fixe ##### Install the latest code using `pip` ```console -pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly +pip install -U vllm --pre --extra-index-url https://wheels.vllm.ai/nightly ``` `--pre` is required for `pip` to consider pre-released versions. @@ -65,9 +65,11 @@ Note that the wheels are built with Python 3.8 ABI (see [PEP 425](https://peps.p Another way to install the latest code is to use `uv`: ```console -uv pip install vllm --extra-index-url https://wheels.vllm.ai/nightly +uv pip install -U vllm --extra-index-url https://wheels.vllm.ai/nightly ``` +##### Install specific revisions using `uv` + If you want to access the wheels for previous commits (e.g. to bisect the behavior change, performance regression), you can specify the commit hash in the URL: ```console @@ -151,7 +153,7 @@ git clone https://github.com/vllm-project/vllm.git cd vllm python use_existing_torch.py pip install -r requirements/build.txt -pip install -e . --no-build-isolation +pip install --no-build-isolation -e . ``` ##### Use the local cutlass for compilation diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/source/getting_started/installation/gpu/rocm.inc.md index 21c8d7d01ad..dc74368fe2c 100644 --- a/docs/source/getting_started/installation/gpu/rocm.inc.md +++ b/docs/source/getting_started/installation/gpu/rocm.inc.md @@ -73,7 +73,22 @@ Currently, there are no pre-built ROCm wheels. You might need to downgrade the "ninja" version to 1.10 it is not used when compiling flash-attention-2 (e.g. `pip install ninja==1.10.2.4`) ::: -3. Build vLLM. For example, vLLM on ROCM 6.3 can be built with the following steps: +3. If you choose to build AITER yourself to use a certain branch or commit, you can build AITER using the following steps: + + ```console + python3 -m pip uninstall -y aiter + git clone --recursive https://github.com/ROCm/aiter.git + cd aiter + git checkout $AITER_BRANCH_OR_COMMIT + git submodule sync; git submodule update --init --recursive + python3 setup.py develop + ``` + + :::{note} + You will need to config the `$AITER_BRANCH_OR_COMMIT` for your purpose. + ::: + +4. Build vLLM. For example, vLLM on ROCM 6.3 can be built with the following steps: ```bash $ pip install --upgrade pip diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index c41905f250f..4ab41a21c2a 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -23,6 +23,8 @@ Currently, there are no pre-built XPU wheels. - Second, install Python packages for vLLM XPU backend building: ```console +git clone https://github.com/vllm-project/vllm.git +cd vllm pip install --upgrade pip pip install -v -r requirements/xpu.txt ``` @@ -33,13 +35,6 @@ pip install -v -r requirements/xpu.txt VLLM_TARGET_DEVICE=xpu python setup.py install ``` -- Finally, due to a known issue of conflict dependency(oneapi related) in torch-xpu 2.6 and ipex-xpu 2.6, we install ipex here. This will be fixed in the ipex-xpu 2.7. - -```console -pip install intel-extension-for-pytorch==2.6.10+xpu \ - --extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -``` - :::{note} - FP16 is the default data type in the current XPU backend. The BF16 data type is supported on Intel Data Center GPU, not supported on Intel Arc GPU yet. @@ -79,5 +74,3 @@ python -m vllm.entrypoints.openai.api_server \ ``` By default, a ray instance will be launched automatically if no existing one is detected in the system, with `num-gpus` equals to `parallel_config.world_size`. We recommend properly starting a ray cluster before execution, referring to the helper script. - -There are some new features coming with ipex-xpu 2.6, e.g. **chunked prefill**, **V1 engine support**, **lora**, **MoE**, etc. diff --git a/docs/source/getting_started/troubleshooting.md b/docs/source/getting_started/troubleshooting.md index 87fa442e9a4..a4744827f22 100644 --- a/docs/source/getting_started/troubleshooting.md +++ b/docs/source/getting_started/troubleshooting.md @@ -24,7 +24,7 @@ To isolate the model downloading and loading issue, you can use the `--load-form ## Out of memory -If the model is too large to fit in a single GPU, you will get an out-of-memory (OOM) error. Consider [using tensor parallelism](#distributed-serving) to split the model across multiple GPUs. In that case, every process will read the whole model and split it into chunks, which makes the disk reading time even longer (proportional to the size of tensor parallelism). You can convert the model checkpoint to a sharded checkpoint using . The conversion process might take some time, but later you can load the sharded checkpoint much faster. The model loading time should remain constant regardless of the size of tensor parallelism. +If the model is too large to fit in a single GPU, you will get an out-of-memory (OOM) error. Consider adopting [these options](#reducing-memory-usage) to reduce the memory consumption. ## Generation quality changed diff --git a/docs/source/getting_started/v1_user_guide.md b/docs/source/getting_started/v1_user_guide.md index a87484c3bb0..de90b8a7851 100644 --- a/docs/source/getting_started/v1_user_guide.md +++ b/docs/source/getting_started/v1_user_guide.md @@ -44,8 +44,8 @@ This living user guide outlines a few known **important changes and limitations* |-----------------|-----------------------------------------------------------------------------------| | **Prefix Caching** | 🚀 Optimized | | **Chunked Prefill** | 🚀 Optimized | +| **LoRA** | 🚀 Optimized | | **Logprobs Calculation** | 🟢 Functional | -| **LoRA** | 🟢 Functional ([PR #13096](https://github.com/vllm-project/vllm/pull/13096))| | **Multimodal Models** | 🟢 Functional | | **FP8 KV Cache** | 🟢 Functional on Hopper devices ([PR #15191](https://github.com/vllm-project/vllm/pull/15191))| | **Spec Decode** | 🚧 WIP ([PR #13933](https://github.com/vllm-project/vllm/pull/13933))| @@ -121,11 +121,6 @@ Although we have re-implemented and partially optimized many features and models These features are already supported in vLLM V1, but their optimization is still in progress. -- **LoRA**: LoRA is functionally working on vLLM V1 but its performance is - inferior to that of V0. The team is actively working on improving its - performance -(e.g., see [PR #13096](https://github.com/vllm-project/vllm/pull/13096)). - - **Spec Decode**: Currently, only ngram-based spec decode is supported in V1. There will be follow-up work to support other types of spec decode (e.g., see [PR #13933](https://github.com/vllm-project/vllm/pull/13933)). We will prioritize the support for Eagle, MTP compared to draft model based spec decode. diff --git a/docs/source/index.md b/docs/source/index.md index 28dc0f67d77..bbff7361f75 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -132,6 +132,7 @@ serving/integrations/index :caption: Deployment :maxdepth: 1 +deployment/security deployment/docker deployment/k8s deployment/nginx @@ -180,6 +181,7 @@ design/v1/metrics :maxdepth: 2 contributing/overview +contributing/deprecation_policy contributing/profiling/profiling_index contributing/dockerfile/dockerfile contributing/model/index @@ -192,11 +194,8 @@ contributing/vulnerability_management :caption: API Reference :maxdepth: 2 -api/offline_inference/index -api/engine/index -api/inference_params -api/multimodal/index -api/model/index +api/summary +api/vllm/vllm ::: % Latest news and acknowledgements diff --git a/docs/source/models/extensions/fastsafetensor.md b/docs/source/models/extensions/fastsafetensor.md index 66cd710c97e..531d5869001 100644 --- a/docs/source/models/extensions/fastsafetensor.md +++ b/docs/source/models/extensions/fastsafetensor.md @@ -1,5 +1,5 @@ Loading Model weights with fastsafetensors =================================================================== -Using fastsafetensor library enables loading model weights to GPU memory by leveraging GPU direct storage. See https://github.com/foundation-model-stack/fastsafetensors for more details. +Using fastsafetensors library enables loading model weights to GPU memory by leveraging GPU direct storage. See [their GitHub repository](https://github.com/foundation-model-stack/fastsafetensors) for more details. For enabling this feature, set the environment variable ``USE_FASTSAFETENSOR`` to ``true`` diff --git a/docs/source/models/extensions/runai_model_streamer.md b/docs/source/models/extensions/runai_model_streamer.md index 99c37876a01..e0daa6f86dd 100644 --- a/docs/source/models/extensions/runai_model_streamer.md +++ b/docs/source/models/extensions/runai_model_streamer.md @@ -51,3 +51,29 @@ vllm serve /home/meta-llama/Llama-3.2-3B-Instruct --load-format runai_streamer - :::{note} For further instructions about tunable parameters and additional parameters configurable through environment variables, read the [Environment Variables Documentation](https://github.com/run-ai/runai-model-streamer/blob/master/docs/src/env-vars.md). ::: + +## Sharded Model Loading + +vLLM also supports loading sharded models using Run:ai Model Streamer. This is particularly useful for large models that are split across multiple files. To use this feature, use the `--load-format runai_streamer_sharded` flag: + +```console +vllm serve /path/to/sharded/model --load-format runai_streamer_sharded +``` + +The sharded loader expects model files to follow the same naming pattern as the regular sharded state loader: `model-rank-{rank}-part-{part}.safetensors`. You can customize this pattern using the `pattern` parameter in `--model-loader-extra-config`: + +```console +vllm serve /path/to/sharded/model --load-format runai_streamer_sharded --model-loader-extra-config '{"pattern":"custom-model-rank-{rank}-part-{part}.safetensors"}' +``` + +To create sharded model files, you can use the script provided in . This script demonstrates how to save a model in the sharded format that is compatible with the Run:ai Model Streamer sharded loader. + +The sharded loader supports all the same tunable parameters as the regular Run:ai Model Streamer, including `concurrency` and `memory_limit`. These can be configured in the same way: + +```console +vllm serve /path/to/sharded/model --load-format runai_streamer_sharded --model-loader-extra-config '{"concurrency":16, "memory_limit":5368709120}' +``` + +:::{note} +The sharded loader is particularly efficient for tensor or pipeline parallel models where each worker only needs to read its own shard rather than the entire checkpoint. +::: diff --git a/docs/source/models/generative_models.md b/docs/source/models/generative_models.md index 63fc53b0e7c..dd765e4a976 100644 --- a/docs/source/models/generative_models.md +++ b/docs/source/models/generative_models.md @@ -14,7 +14,7 @@ Usually, this is automatically inferred so you don't have to specify it. ## Offline Inference The {class}`~vllm.LLM` class provides various methods for offline inference. -See [Engine Arguments](#engine-args) for a list of options when initializing the model. +See for a list of options when initializing the model. ### `LLM.generate` @@ -59,7 +59,7 @@ A code example can be found here: for a list of options when initializing the model. ### `LLM.encode` @@ -141,3 +141,77 @@ Our [OpenAI-Compatible Server](#openai-compatible-server) provides endpoints tha - [Pooling API](#pooling-api) is similar to `LLM.encode`, being applicable to all types of pooling models. - [Embeddings API](#embeddings-api) is similar to `LLM.embed`, accepting both text and [multi-modal inputs](#multimodal-inputs) for embedding models. - [Score API](#score-api) is similar to `LLM.score` for cross-encoder models. + +## Matryoshka Embeddings + +[Matryoshka Embeddings](https://sbert.net/examples/sentence_transformer/training/matryoshka/README.html#matryoshka-embeddings) or [Matryoshka Representation Learning (MRL)](https://arxiv.org/abs/2205.13147) is a technique used in training embedding models. It allows user to trade off between performance and cost. + +:::{warning} +Not all embedding models are trained using Matryoshka Representation Learning. To avoid misuse of the `dimensions` parameter, vLLM returns an error for requests that attempt to change the output dimension of models that do not support Matryoshka Embeddings. + +For example, setting `dimensions` parameter while using the `BAAI/bge-m3` model will result in the following error. + +```json +{"object":"error","message":"Model \"BAAI/bge-m3\" does not support matryoshka representation, changing output dimensions will lead to poor results.","type":"BadRequestError","param":null,"code":400} +``` + +::: + +### Manually enable Matryoshka Embeddings + +There is currently no official interface for specifying support for Matryoshka Embeddings. In vLLM, if `is_matryoshka` is `True` in `config.json,` it is allowed to change the output to arbitrary dimensions. Using `matryoshka_dimensions` can control the allowed output dimensions. + +For models that support Matryoshka Embeddings but not recognized by vLLM, please manually override the config using `hf_overrides={"is_matryoshka": True}`, `hf_overrides={"matryoshka_dimensions": []}` (offline) or `--hf_overrides '{"is_matryoshka": true}'`, `--hf_overrides '{"matryoshka_dimensions": []}'`(online). + +Here is an example to serve a model with Matryoshka Embeddings enabled. + +```text +vllm serve Snowflake/snowflake-arctic-embed-m-v1.5 --hf_overrides '{"matryoshka_dimensions":[256]}' +``` + +### Offline Inference + +You can change the output dimensions of embedding models that support Matryoshka Embeddings by using the dimensions parameter in {class}`~vllm.PoolingParams`. + +```python +from vllm import LLM, PoolingParams + +model = LLM(model="jinaai/jina-embeddings-v3", + task="embed", + trust_remote_code=True) +outputs = model.embed(["Follow the white rabbit."], + pooling_params=PoolingParams(dimensions=32)) +print(outputs[0].outputs) +``` + +A code example can be found here: + +### Online Inference + +Use the following command to start vllm server. + +```text +vllm serve jinaai/jina-embeddings-v3 --trust-remote-code +``` + +You can change the output dimensions of embedding models that support Matryoshka Embeddings by using the dimensions parameter. + +```text +curl http://127.0.0.1:8000/v1/embeddings \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "input": "Follow the white rabbit.", + "model": "jinaai/jina-embeddings-v3", + "encoding_format": "float", + "dimensions": 32 + }' +``` + +Expected output: + +```json +{"id":"embd-5c21fc9a5c9d4384a1b021daccaf9f64","object":"list","created":1745476417,"model":"jinaai/jina-embeddings-v3","data":[{"index":0,"object":"embedding","embedding":[-0.3828125,-0.1357421875,0.03759765625,0.125,0.21875,0.09521484375,-0.003662109375,0.1591796875,-0.130859375,-0.0869140625,-0.1982421875,0.1689453125,-0.220703125,0.1728515625,-0.2275390625,-0.0712890625,-0.162109375,-0.283203125,-0.055419921875,-0.0693359375,0.031982421875,-0.04052734375,-0.2734375,0.1826171875,-0.091796875,0.220703125,0.37890625,-0.0888671875,-0.12890625,-0.021484375,-0.0091552734375,0.23046875]}],"usage":{"prompt_tokens":8,"total_tokens":8,"completion_tokens":0,"prompt_tokens_details":null}} +``` + +A openai client example can be found here: diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md index b6fef2f43b8..831f9a86d1d 100644 --- a/docs/source/models/supported_models.md +++ b/docs/source/models/supported_models.md @@ -40,29 +40,37 @@ You can force the use of `TransformersForCausalLM` by setting `model_impl="trans vLLM may not fully optimise the Transformers implementation so you may see degraded performance if comparing a native model to a Transformers model in vLLM. ::: -#### Supported features +#### Custom models -The Transformers modeling backend explicitly supports the following features: +If a model is neither supported natively by vLLM or Transformers, it can still be used in vLLM! -- (except GGUF) -- -- +For a model to be compatible with the Transformers backend for vLLM it must: -#### Remote Code +- be a Transformers compatible custom model (see [Transformers - Customizing models](https://huggingface.co/docs/transformers/en/custom_models)): + * The model directory must have the correct structure (e.g. `config.json` is present). + * `config.json` must contain `auto_map.AutoModel`. +- be a Transformers backend for vLLM compatible model (see ): + * Customisation should be done in the base model (e.g. in `MyModel`, not `MyModelForCausalLM`). -If your model is neither supported natively by vLLM or Transformers, you can still run it in vLLM! +If the compatible model is: -Simply set `trust_remote_code=True` and vLLM will run any model on the Model Hub that is compatible with Transformers. -Provided that the model writer implements their model in a compatible way, this means that you can run new models before they are officially supported in Transformers or vLLM! +- on the Hugging Face Model Hub, simply set `trust_remote_code=True` for or `--trust-remode-code` for the . +- in a local directory, simply pass directory path to `model=` for or `vllm serve ` for the . -```python -from vllm import LLM -llm = LLM(model=..., task="generate", trust_remote_code=True) # Name or path of your model -llm.apply_model(lambda model: print(model.__class__)) -``` +This means that, with the Transformers backend for vLLM, new models can be used before they are officially supported in Transformers or vLLM! + +(writing-custom-models)= + +#### Writing custom models + +This section details the necessary modifications to make to a Transformers compatible custom model that make it compatible with the Transformers backend for vLLM. (We assume that a Transformers compatible custom model has already been created, see [Transformers - Customizing models](https://huggingface.co/docs/transformers/en/custom_models)). To make your model compatible with the Transformers backend, it needs: +1. `kwargs` passed down through all modules from `MyModel` to `MyAttention`. +2. `MyAttention` must use `ALL_ATTENTION_FUNCTIONS` to call attention. +3. `MyModel` must contain `_supports_attention_backend = True`. + ```{code-block} python :caption: modeling_my_model.py @@ -71,7 +79,7 @@ from torch import nn class MyAttention(nn.Module): - def forward(self, hidden_states, **kwargs): # <- kwargs are required + def forward(self, hidden_states, **kwargs): ... attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation] attn_output, attn_weights = attention_interface( @@ -87,11 +95,11 @@ class MyModel(PreTrainedModel): _supports_attention_backend = True ``` -Here is what happens in the background: +Here is what happens in the background when this model is loaded: -1. The config is loaded -2. `MyModel` Python class is loaded from the `auto_map`, and we check that the model `_supports_attention_backend`. -3. The `TransformersForCausalLM` backend is used. See , which leverage `self.config._attn_implementation = "vllm"`, thus the need to use `ALL_ATTENTION_FUNCTION`. +1. The config is loaded. +2. `MyModel` Python class is loaded from the `auto_map` in config, and we check that the model `is_backend_compatible()`. +3. `MyModel` is loaded into `TransformersForCausalLM` (see ) which sets `self.config._attn_implementation = "vllm"` so that vLLM's attention layer is used. That's it! @@ -129,7 +137,7 @@ class MyConfig(PretrainedConfig): ### Hugging Face Hub -By default, vLLM loads models from [Hugging Face (HF) Hub](https://huggingface.co/models). +By default, vLLM loads models from [Hugging Face (HF) Hub](https://huggingface.co/models). To change the download path for models, you can set the `HF_HOME` environment variable; for more details, refer to [their official documentation](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhome). To determine whether a given model is natively supported, you can check the `config.json` file inside the HF repository. If the `"architectures"` field contains a model architecture listed below, then it should be natively supported. @@ -213,6 +221,16 @@ output = llm.encode("Hello, my name is") print(output) ``` +(feature-status-legend)= + +## Feature Status Legend + +- ✅︎ indicates that the feature is supported for the model. + +- 🚧 indicates that the feature is planned but not yet supported for the model. + +- ⚠️ indicates that the feature is available but may have known issues or limitations. + (supported-text-models)= ## List of Text-only Language Models @@ -314,7 +332,7 @@ See [this page](#generative-models) for more information on how to use generativ * ✅︎ - * `GemmaForCausalLM` * Gemma - * `google/gemma-2b`, `google/gemma-7b`, etc. + * `google/gemma-2b`, `google/gemma-1.1-2b-it`, etc. * ✅︎ * ✅︎ - * `Gemma2ForCausalLM` @@ -334,7 +352,7 @@ See [this page](#generative-models) for more information on how to use generativ * ✅︎ - * `Glm4ForCausalLM` * GLM-4-0414 - * `THUDM/GLM-4-32B-Chat-0414`, etc. + * `THUDM/GLM-4-32B-0414`, etc. * ✅︎ * ✅︎ - * `GPT2LMHeadModel` @@ -497,6 +515,11 @@ See [this page](#generative-models) for more information on how to use generativ * `adept/persimmon-8b-base`, `adept/persimmon-8b-chat`, etc. * * ✅︎ +- * `Plamo2ForCausalLM` + * PLaMo2 + * `pfnet/plamo-2-1b`, `pfnet/plamo-2-8b`, etc. + * + * - * `QWenLMHeadModel` * Qwen * `Qwen/Qwen-7B`, `Qwen/Qwen-7B-Chat`, etc. @@ -519,8 +542,8 @@ See [this page](#generative-models) for more information on how to use generativ * ✅︎ - * `Qwen3MoeForCausalLM` * Qwen3MoE - * `Qwen/Qwen3-MoE-15B-A2B`, etc. - * ✅︎ + * `Qwen/Qwen3-30B-A3B`, etc. + * * ✅︎ - * `StableLmForCausalLM` * StableLM @@ -735,6 +758,11 @@ If your model is not in the above list, we will try to automatically convert the * `BAAI/bge-reranker-v2-m3`, etc. * * +- * `ModernBertForSequenceClassification` + * ModernBert-based + * `Alibaba-NLP/gte-reranker-modernbert-base`, etc. + * + * ::: (supported-mm-models)= @@ -765,6 +793,8 @@ or `--limit-mm-per-prompt` (online serving). For example, to enable passing up t Offline inference: ```python +from vllm import LLM + llm = LLM( model="Qwen/Qwen2-VL-7B-Instruct", limit_mm_per_prompt={"image": 4}, @@ -774,7 +804,7 @@ llm = LLM( Online serving: ```bash -vllm serve Qwen/Qwen2-VL-7B-Instruct --limit-mm-per-prompt image=4 +vllm serve Qwen/Qwen2-VL-7B-Instruct --limit-mm-per-prompt '{"image":4}' ``` **This is no longer required if you are using vLLM V1.** @@ -865,6 +895,13 @@ See [this page](#generative-models) for more information on how to use generativ * ✅︎ * ✅︎ * ✅︎ +- * `GraniteSpeechForConditionalGeneration` + * Granite Speech + * T + A + * `ibm-granite/granite-speech-3.3-8b` + * ✅︎ + * ✅︎ + * ✅︎ - * `H2OVLChatModel` * H2OVL * T + IE+ @@ -942,11 +979,18 @@ See [this page](#generative-models) for more information on how to use generativ * ✅︎ * ✅︎ * ✅︎ +- * `MiniMaxVL01ForConditionalGeneration` + * MiniMax-VL + * T + IE+ + * `MiniMaxAI/MiniMax-VL-01`, etc. + * + * ✅︎ + * ✅︎ - * `Mistral3ForConditionalGeneration` * Mistral3 * T + I+ * `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, etc. - * + * ✅︎ * ✅︎ * ✅︎ - * `MllamaForConditionalGeneration` @@ -970,6 +1014,13 @@ See [this page](#generative-models) for more information on how to use generativ * * ✅︎ * ✅︎ +- * `Ovis2ForConditionalGeneration`^ + * Ovis2 + * T + I+ + * `AIDC-AI/Ovis2-1B`, `AIDC-AI/Ovis2-2B`, etc. + * + * + * ✅︎ - * `PaliGemmaForConditionalGeneration` * PaliGemma, PaliGemma 2 * T + IE @@ -990,7 +1041,7 @@ See [this page](#generative-models) for more information on how to use generativ * `microsoft/Phi-4-multimodal-instruct`, etc. * ✅︎ * - * + * ✅︎ - * `PixtralForConditionalGeneration` * Pixtral * T + I+ @@ -1026,6 +1077,13 @@ See [this page](#generative-models) for more information on how to use generativ * ✅︎ * ✅︎ * ✅︎ +- * `Qwen2_5OmniThinkerForConditionalGeneration` + * Qwen2.5-Omni + * T + IE+ + VE+ + A+ + * `Qwen/Qwen2.5-Omni-7B` + * + * ✅︎ + * ✅︎\* - * `SkyworkR1VChatModel` * Skywork-R1V-38B * T + I @@ -1057,7 +1115,7 @@ See [this page](#generative-models) for more information on how to use generativ :::{important} Pan-and-scan image pre-processing is currently supported on V0 (but not V1). -You can enable it by passing `--mm-processor-kwargs '{"do_pan_and_scan": True}'`. +You can enable it by passing `--mm-processor-kwargs '{"do_pan_and_scan": true}'`. ::: :::{warning} @@ -1072,7 +1130,7 @@ V0 correctly implements the model's attention pattern: V1 currently uses a simplified attention pattern: - Uses causal attention for all tokens, including image tokens -- Generates reasonable outputs but does not match the original model's attention for text + image inputs, especially when `{"do_pan_and_scan": True}` +- Generates reasonable outputs but does not match the original model's attention for text + image inputs, especially when `{"do_pan_and_scan": true}` - Will be updated in the future to support the correct behavior This limitation exists because the model's mixed attention pattern (bidirectional for images, causal otherwise) is not yet supported by vLLM's attention backends. @@ -1086,6 +1144,36 @@ This limitation exists because the model's mixed attention pattern (bidirectiona To use `TIGER-Lab/Mantis-8B-siglip-llama3`, you have to pass `--hf_overrides '{"architectures": ["MantisForConditionalGeneration"]}'` when running vLLM. ::: +:::{warning} +The output quality of `AllenAI/Molmo-7B-D-0924` (especially in object localization tasks) has deteriorated in recent updates. + +For the best results, we recommend using the following dependency versions (tested on A10 and L40): + +```text +# Core vLLM-compatible dependencies with Molmo accuracy setup (tested on L40) +torch==2.5.1 +torchvision==0.20.1 +transformers==4.48.1 +tokenizers==0.21.0 +tiktoken==0.7.0 +vllm==0.7.0 + +# Optional but recommended for improved performance and stability +triton==3.1.0 +xformers==0.0.28.post3 +uvloop==0.21.0 +protobuf==5.29.3 +openai==1.60.2 +opencv-python-headless==4.11.0.86 +pillow==10.4.0 + +# Installed FlashAttention (for float16 only) +flash-attn>=2.5.6 # Not used in float32, but should be documented +``` + +**Note:** Make sure you understand the security implications of using outdated packages. +::: + :::{note} The official `openbmb/MiniCPM-V-2` doesn't work yet, so we need to use a fork (`HwwwH/MiniCPM-V-2`) for now. For more details, please see: @@ -1095,6 +1183,14 @@ For more details, please see: Our PaliGemma implementations have the same problem as Gemma 3 (see above) for both V0 and V1. ::: +:::{note} +To use Qwen2.5-Omni, you have to install Hugging Face Transformers library from source via +`pip install git+https://github.com/huggingface/transformers.git`. + +Read audio from video pre-processing is currently supported on V0 (but not V1), because overlapping modalities is not yet supported in V1. +`--mm-processor-kwargs '{"use_audio_in_video": true}'`. +::: + ### Pooling Models See [this page](pooling-models) for more information on how to use pooling models. diff --git a/docs/source/performance/optimization.md b/docs/source/performance/optimization.md index ccbe8a36706..4160f078496 100644 --- a/docs/source/performance/optimization.md +++ b/docs/source/performance/optimization.md @@ -2,65 +2,188 @@ # Optimization and Tuning +This guide covers optimization strategies and performance tuning for vLLM V1. + ## Preemption Due to the auto-regressive nature of transformer architecture, there are times when KV cache space is insufficient to handle all batched requests. -The vLLM can preempt requests to free up KV cache space for other requests. Preempted requests are recomputed when sufficient KV cache space becomes -available again. When this occurs, the following warning is printed: +In such cases, vLLM can preempt requests to free up KV cache space for other requests. Preempted requests are recomputed when sufficient KV cache space becomes +available again. When this occurs, you may see the following warning: ```text -WARNING 05-09 00:49:33 scheduler.py:1057 Sequence group 0 is preempted by PreemptionMode.SWAP mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_cumulative_preemption_cnt=1 +WARNING 05-09 00:49:33 scheduler.py:1057 Sequence group 0 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_cumulative_preemption_cnt=1 ``` While this mechanism ensures system robustness, preemption and recomputation can adversely affect end-to-end latency. -If you frequently encounter preemptions from the vLLM engine, consider the following actions: +If you frequently encounter preemptions, consider the following actions: + +- Increase `gpu_memory_utilization`. vLLM pre-allocates GPU cache using this percentage of memory. By increasing utilization, you can provide more KV cache space. +- Decrease `max_num_seqs` or `max_num_batched_tokens`. This reduces the number of concurrent requests in a batch, thereby requiring less KV cache space. +- Increase `tensor_parallel_size`. This shards model weights across GPUs, allowing each GPU to have more memory available for KV cache. However, increasing this value may cause excessive synchronization overhead. +- Increase `pipeline_parallel_size`. This distributes model layers across GPUs, reducing the memory needed for model weights on each GPU, indirectly leaving more memory available for KV cache. However, increasing this value may cause latency penalties. -- Increase `gpu_memory_utilization`. The vLLM pre-allocates GPU cache by using gpu_memory_utilization% of memory. By increasing this utilization, you can provide more KV cache space. -- Decrease `max_num_seqs` or `max_num_batched_tokens`. This can reduce the number of concurrent requests in a batch, thereby requiring less KV cache space. -- Increase `tensor_parallel_size`. This approach shards model weights, so each GPU has more memory available for KV cache. -- Increase `pipeline_parallel_size`. This approach distributes model layers across GPUs, reducing the memory needed for model weights on each GPU, which indirectly leaves more memory available for KV cache. +You can monitor the number of preemption requests through Prometheus metrics exposed by vLLM. Additionally, you can log the cumulative number of preemption requests by setting `disable_log_stats=False`. -You can also monitor the number of preemption requests through Prometheus metrics exposed by the vLLM. Additionally, you can log the cumulative number of preemption requests by setting disable_log_stats=False. +In vLLM V1, the default preemption mode is `RECOMPUTE` rather than `SWAP`, as recomputation has lower overhead in the V1 architecture. (chunked-prefill)= ## Chunked Prefill -vLLM supports an experimental feature chunked prefill. Chunked prefill allows to chunk large prefills into smaller chunks and batch them together with decode requests. +Chunked prefill allows vLLM to process large prefills in smaller chunks and batch them together with decode requests. This feature helps improve both throughput and latency by better balancing compute-bound (prefill) and memory-bound (decode) operations. + +In vLLM V1, **chunked prefill is always enabled by default**. This is different from vLLM V0, where it was conditionally enabled based on model characteristics. + +With chunked prefill enabled, the scheduling policy prioritizes decode requests. It batches all pending decode requests before scheduling any prefill operations. When there are available tokens in the `max_num_batched_tokens` budget, it schedules pending prefills. If a pending prefill request cannot fit into `max_num_batched_tokens`, it automatically chunks it. + +This policy has two benefits: + +- It improves ITL and generation decode because decode requests are prioritized. +- It helps achieve better GPU utilization by locating compute-bound (prefill) and memory-bound (decode) requests to the same batch. -You can enable the feature by specifying `--enable-chunked-prefill` in the command line or setting `enable_chunked_prefill=True` in the LLM constructor. +### Performance Tuning with Chunked Prefill + +You can tune the performance by adjusting `max_num_batched_tokens`: + +- Smaller values (e.g., 2048) achieve better inter-token latency (ITL) because there are fewer prefills slowing down decodes. +- Higher values achieve better time to first token (TTFT) as you can process more prefill tokens in a batch. +- For optimal throughput, we recommend setting `max_num_batched_tokens > 8096` especially for smaller models on large GPUs. +- If `max_num_batched_tokens` is the same as `max_model_len`, that's almost the equivalent to the V0 default scheduling policy (except that it still prioritizes decodes). ```python from vllm import LLM -llm = LLM(model="meta-llama/Llama-2-7b-hf", enable_chunked_prefill=True) -# Set max_num_batched_tokens to tune performance. -# NOTE: 2048 is the default max_num_batched_tokens for chunked prefill. -# llm = LLM(model="meta-llama/Llama-2-7b-hf", enable_chunked_prefill=True, max_num_batched_tokens=2048) +# Set max_num_batched_tokens to tune performance +llm = LLM(model="meta-llama/Llama-3.1-8B-Instruct", max_num_batched_tokens=16384) ``` -By default, vLLM scheduler prioritizes prefills and doesn't batch prefill and decode to the same batch. -This policy optimizes the TTFT (time to the first token), but incurs slower ITL (inter token latency) and inefficient GPU utilization. +See related papers for more details ( or ). -Once chunked prefill is enabled, the policy is changed to prioritize decode requests. -It batches all pending decode requests to the batch before scheduling any prefill. -When there are available token_budget (`max_num_batched_tokens`), it schedules pending prefills. -If a last pending prefill request cannot fit into `max_num_batched_tokens`, it chunks it. +## Parallelism Strategies -This policy has two benefits: +vLLM supports multiple parallelism strategies that can be combined to optimize performance across different hardware configurations. -- It improves ITL and generation decode because decode requests are prioritized. -- It helps achieve better GPU utilization by locating compute-bound (prefill) and memory-bound (decode) requests to the same batch. +### Tensor Parallelism (TP) -You can tune the performance by changing `max_num_batched_tokens`. By default, it is set to 2048. -Smaller `max_num_batched_tokens` achieves better ITL because there are fewer prefills interrupting decodes. -Higher `max_num_batched_tokens` achieves better TTFT as you can put more prefill to the batch. +Tensor parallelism shards model parameters across multiple GPUs within each model layer. This is the most common strategy for large model inference within a single node. -- If `max_num_batched_tokens` is the same as `max_model_len`, that's almost the equivalent to the default scheduling policy (except that it still prioritizes decodes). -- Note that the default value (2048) of `max_num_batched_tokens` is optimized for ITL, and it may have lower throughput than the default scheduler. +**When to use:** -We recommend you set `max_num_batched_tokens > 2048` for throughput. +- When the model is too large to fit on a single GPU +- When you need to reduce memory pressure per GPU to allow more KV cache space for higher throughput -See related papers for more details ( or ). +```python +from vllm import LLM + +# Split model across 4 GPUs +llm = LLM(model="meta-llama/Llama-3.3-70B-Instruct", tensor_parallel_size=4) +``` + +For models that are too large to fit on a single GPU (like 70B parameter models), tensor parallelism is essential. + +### Pipeline Parallelism (PP) + +Pipeline parallelism distributes model layers across multiple GPUs. Each GPU processes different parts of the model in sequence. + +**When to use:** + +- When you've already maxed out efficient tensor parallelism but need to distribute the model further, or across nodes +- For very deep and narrow models where layer distribution is more efficient than tensor sharding + +Pipeline parallelism can be combined with tensor parallelism for very large models: + +```python +from vllm import LLM + +# Combine pipeline and tensor parallelism +llm = LLM( + model="meta-llama/Llama-3.3-70B-Instruct, + tensor_parallel_size=4, + pipeline_parallel_size=2 +) +``` + +### Expert Parallelism (EP) + +Expert parallelism is a specialized form of parallelism for Mixture of Experts (MoE) models, where different expert networks are distributed across GPUs. + +**When to use:** -Please try out this feature and let us know your feedback via GitHub issues! +- Specifically for MoE models (like DeepSeekV3, Qwen3MoE, Llama-4) +- When you want to balance the expert computation load across GPUs + +Expert parallelism is enabled by setting `enable_expert_parallel=True`, which will use expert parallelism instead of tensor parallelism for MoE layers. +It will use the same degree of parallelism as what you have set for tensor parallelism. + +### Data Parallelism (DP) + +Data parallelism replicates the entire model across multiple GPU sets and processes different batches of requests in parallel. + +**When to use:** + +- When you have enough GPUs to replicate the entire model +- When you need to scale throughput rather than model size +- In multi-user environments where isolation between request batches is beneficial + +Data parallelism can be combined with the other parallelism strategies and is set by `data_parallel_size=N`. +Note that MoE layers will be sharded according to the product of the tensor parallel size and data parallel size. + +## Reducing Memory Usage + +If you encounter out-of-memory issues, consider these strategies: + +### Context Length and Batch Size + +You can reduce memory usage by limiting the context length and batch size: + +```python +from vllm import LLM + +llm = LLM( + model="meta-llama/Llama-3.1-8B-Instruct", + max_model_len=2048, # Limit context window + max_num_seqs=4 # Limit batch size +) +``` + +### Adjust CUDA Graph Compilation + +CUDA graph compilation in V1 uses more memory than in V0. You can reduce memory usage by adjusting the compilation level: + +```python +from vllm import LLM +from vllm.config import CompilationConfig, CompilationLevel + +llm = LLM( + model="meta-llama/Llama-3.1-8B-Instruct", + compilation_config=CompilationConfig( + level=CompilationLevel.PIECEWISE, + cudagraph_capture_sizes=[1, 2, 4, 8] # Capture fewer batch sizes + ) +) +``` + +Or, if you are not concerned about latency or overall performance, disable CUDA graph compilation entirely with `enforce_eager=True`: + +```python +from vllm import LLM + +llm = LLM( + model="meta-llama/Llama-3.1-8B-Instruct", + enforce_eager=True # Disable CUDA graph compilation +) +``` + +### Multimodal Models + +For multi-modal models, you can reduce memory usage by limiting the number of images/videos per request: + +```python +from vllm import LLM + +# Accept up to 2 images per prompt +llm = LLM( + model="Qwen/Qwen2.5-VL-3B-Instruct", + limit_mm_per_prompt={"image": 2} +) +``` diff --git a/docs/source/serving/distributed_serving.md b/docs/source/serving/distributed_serving.md index 591acc2c9b7..c285ef3e8e1 100644 --- a/docs/source/serving/distributed_serving.md +++ b/docs/source/serving/distributed_serving.md @@ -77,6 +77,10 @@ bash run_cluster.sh \ Then you get a ray cluster of **containers**. Note that you need to keep the shells running these commands alive to hold the cluster. Any shell disconnect will terminate the cluster. In addition, please note that the argument `ip_of_head_node` should be the IP address of the head node, which is accessible by all the worker nodes. The IP addresses of each worker node should be specified in the `VLLM_HOST_IP` environment variable, and should be different for each worker node. Please check the network configuration of your cluster to make sure the nodes can communicate with each other through the specified IP addresses. +:::{warning} +It is considered best practice to set `VLLM_HOST_IP` to an address on a private network segment for the vLLM cluster. The traffic sent here is not encrypted. The endpoints are also exchanging data in a format that could be exploited to execute arbitrary code should a malicious party gain access to the network. Please ensure that this network is not reachable by any untrusted parties. +::: + :::{warning} Since this is a ray cluster of **containers**, all the following commands should be executed in the **containers**, otherwise you are executing the commands on the host machine, which is not connected to the ray cluster. To enter the container, you can use `docker exec -it node /bin/bash`. ::: diff --git a/docs/source/serving/engine_args.md b/docs/source/serving/engine_args.md index e9943571a40..97ea01cd3b2 100644 --- a/docs/source/serving/engine_args.md +++ b/docs/source/serving/engine_args.md @@ -16,6 +16,7 @@ Below, you can find an explanation of every engine argument: :func: _engine_args_parser :prog: vllm serve :nodefaultconst: + :markdownhelp: ``` ## Async Engine Arguments @@ -29,4 +30,5 @@ Additional arguments are available to the asynchronous engine which is used for :func: _async_engine_args_parser :prog: vllm serve :nodefaultconst: + :markdownhelp: ``` diff --git a/docs/source/serving/multimodal_inputs.md b/docs/source/serving/multimodal_inputs.md index f45d36c3cca..d9a093e8d14 100644 --- a/docs/source/serving/multimodal_inputs.md +++ b/docs/source/serving/multimodal_inputs.md @@ -228,7 +228,7 @@ First, launch the OpenAI-compatible server: ```bash vllm serve microsoft/Phi-3.5-vision-instruct --task generate \ - --trust-remote-code --max-model-len 4096 --limit-mm-per-prompt image=2 + --trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}' ``` Then, you can use the OpenAI client as follows: diff --git a/docs/source/serving/offline_inference.md b/docs/source/serving/offline_inference.md index 85f2cafacdd..2621eda3254 100644 --- a/docs/source/serving/offline_inference.md +++ b/docs/source/serving/offline_inference.md @@ -25,13 +25,15 @@ The available APIs depend on the type of model that is being run: Please refer to the above pages for more details about each API. :::{seealso} -[API Reference](/api/offline_inference/index) +[API Reference](#offline-inference-api) ::: +(configuration-options)= + ## Configuration Options This section lists the most common options for running the vLLM engine. -For a full list, refer to the [Engine Arguments](#engine-args) page. +For a full list, refer to the page. (model-resolution)= @@ -59,6 +61,8 @@ model = LLM( Our [list of supported models](#supported-models) shows the model architectures that are recognized by vLLM. +(reducing-memory-usage)= + ### Reducing memory usage Large models might cause your machine to run out of memory (OOM). Here are some options that help alleviate this problem. @@ -81,6 +85,12 @@ before initializing vLLM. Otherwise, you may run into an error like `RuntimeErro To control which devices are used, please instead set the `CUDA_VISIBLE_DEVICES` environment variable. ::: +:::{note} +With tensor parallelism enabled, each process will read the whole model and split it into chunks, which makes the disk reading time even longer (proportional to the size of tensor parallelism). + +You can convert the model checkpoint to a sharded checkpoint using . The conversion process might take some time, but later you can load the sharded checkpoint much faster. The model loading time should remain constant regardless of the size of tensor parallelism. +::: + #### Quantization Quantized models take less memory at the cost of lower precision. @@ -103,6 +113,39 @@ llm = LLM(model="adept/fuyu-8b", max_num_seqs=2) ``` +#### Reduce CUDA Graphs + +By default, we optimize model inference using CUDA graphs which take up extra memory in the GPU. + +:::{important} +CUDA graph capture takes up more memory in V1 than in V0. +::: + +You can adjust `compilation_config` to achieve a better balance between inference speed and memory usage: + +```python +from vllm import LLM +from vllm.config import CompilationConfig, CompilationLevel + +llm = LLM( + model="meta-llama/Llama-3.1-8B-Instruct", + compilation_config=CompilationConfig( + level=CompilationLevel.PIECEWISE, + # By default, it goes up to max_num_seqs + cudagraph_capture_sizes=[1, 2, 4, 8, 16], + ), +) +``` + +You can disable graph capturing completely via the `enforce_eager` flag: + +```python +from vllm import LLM + +llm = LLM(model="meta-llama/Llama-3.1-8B-Instruct", + enforce_eager=True) +``` + #### Adjust cache size If you run out of CPU RAM, try the following options: @@ -110,16 +153,25 @@ If you run out of CPU RAM, try the following options: - (Multi-modal models only) you can set the size of multi-modal input cache using `VLLM_MM_INPUT_CACHE_GIB` environment variable (default 4 GiB). - (CPU backend only) you can set the size of KV cache using `VLLM_CPU_KVCACHE_SPACE` environment variable (default 4 GiB). -#### Disable unused modalities +#### Multi-modal input limits -You can disable unused modalities (except for text) by setting its limit to zero. +You can allow a smaller number of multi-modal items per prompt to reduce the memory footprint of the model: + +```python +from vllm import LLM + +# Accept up to 3 images and 1 video per prompt +llm = LLM(model="Qwen/Qwen2.5-VL-3B-Instruct", + limit_mm_per_prompt={"image": 3, "video": 1}) +``` +You can go a step further and disable unused modalities completely by setting its limit to zero. For example, if your application only accepts image input, there is no need to allocate any memory for videos. ```python from vllm import LLM -# Accept images but not videos +# Accept any number of images but no videos llm = LLM(model="Qwen/Qwen2.5-VL-3B-Instruct", limit_mm_per_prompt={"video": 0}) ``` @@ -134,6 +186,29 @@ llm = LLM(model="google/gemma-3-27b-it", limit_mm_per_prompt={"image": 0}) ``` +#### Multi-modal processor arguments + +For certain models, you can adjust the multi-modal processor arguments to +reduce the size of the processed multi-modal inputs, which in turn saves memory. + +Here are some examples: + +```python +from vllm import LLM + +# Available for Qwen2-VL series models +llm = LLM(model="Qwen/Qwen2.5-VL-3B-Instruct", + mm_processor_kwargs={ + "max_pixels": 768 * 768, # Default is 1280 * 28 * 28 + }) + +# Available for InternVL series models +llm = LLM(model="OpenGVLab/InternVL2-2B", + mm_processor_kwargs={ + "max_dynamic_patch": 4, # Default is 12 + }) +``` + ### Performance optimization and tuning You can potentially improve the performance of vLLM by finetuning various options. diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md index 11ca571c684..34382c87a48 100644 --- a/docs/source/serving/openai_compatible_server.md +++ b/docs/source/serving/openai_compatible_server.md @@ -33,11 +33,13 @@ print(completion.choices[0].message) vLLM supports some parameters that are not supported by OpenAI, `top_k` for example. You can pass these parameters to vLLM using the OpenAI client in the `extra_body` parameter of your requests, i.e. `extra_body={"top_k": 50}` for `top_k`. ::: + :::{important} By default, the server applies `generation_config.json` from the Hugging Face model repository if it exists. This means the default values of certain sampling parameters can be overridden by those recommended by the model creator. To disable this behavior, please pass `--generation-config vllm` when launching the server. ::: + ## Supported APIs We currently support the following OpenAI APIs: @@ -172,6 +174,12 @@ print(completion._request_id) The `vllm serve` command is used to launch the OpenAI-compatible server. +:::{tip} +The vast majority of command-line arguments are based on those for offline inference. + +See [here](configuration-options) for some common options. +::: + :::{argparse} :module: vllm.entrypoints.openai.cli_args :func: create_parser_for_docs @@ -394,9 +402,26 @@ you can use the [official OpenAI Python client](https://github.com/openai/openai To use the Transcriptions API, please install with extra audio dependencies using `pip install vllm[audio]`. ::: +Code example: -Code example: +#### Extra Parameters + +The following [sampling parameters](#sampling-params) are supported. + +:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py +:language: python +:start-after: begin-transcription-sampling-params +:end-before: end-transcription-sampling-params +::: + +The following extra parameters are supported: + +:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py +:language: python +:start-after: begin-transcription-extra-params +:end-before: end-transcription-extra-params +::: (tokenizer-api)= diff --git a/examples/lmcache/README.md b/examples/lmcache/README.md new file mode 100644 index 00000000000..95a6bf995b2 --- /dev/null +++ b/examples/lmcache/README.md @@ -0,0 +1,56 @@ +# LMCache Examples + +This folder demonstrates how to use LMCache for disaggregated prefilling, CPU offloading and KV cache sharing. + +## 1. Disaggregated Prefill in vLLM v1 + +This example demonstrates how to run LMCache with disaggregated prefill using NIXL on a single node. + +### Prerequisites + +- Install [LMCache](https://github.com/LMCache/LMCache). You can simply run `pip install lmcache`. +- Install [NIXL](https://github.com/ai-dynamo/nixl). +- At least 2 GPUs +- Valid Hugging Face token (HF_TOKEN) for Llama 3.1 8B Instruct. + +### Usage + +Run +`cd disagg_prefill_lmcache_v1` +to get into `disagg_prefill_lmcache_v1` folder, and then run + +```bash +bash disagg_example_nixl.sh +``` + +to run disaggregated prefill and benchmark the performance. + +### Components + +#### Server Scripts +- `disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh` - Launches individual vLLM servers for prefill/decode, and also launches the proxy server. +- `disagg_prefill_lmcache_v1/disagg_proxy_server.py` - FastAPI proxy server that coordinates between prefiller and decoder +- `disagg_prefill_lmcache_v1/disagg_example_nixl.sh` - Main script to run the example + +#### Configuration +- `disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml` - Configuration for prefiller server +- `disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml` - Configuration for decoder server + +#### Log Files +The main script generates several log files: +- `prefiller.log` - Logs from the prefill server +- `decoder.log` - Logs from the decode server +- `proxy.log` - Logs from the proxy server + +## 2. CPU Offload Examples + +- `python cpu_offload_lmcache.py -v v0` - CPU offloading implementation for vLLM v0 +- `python cpu_offload_lmcache.py -v v1` - CPU offloading implementation for vLLM v1 + +## 3. KV Cache Sharing + +The `kv_cache_sharing_lmcache_v1.py` example demonstrates how to share KV caches between vLLM v1 instances. + +## 4. Disaggregated Prefill in vLLM v0 + +The `disaggregated_prefill_lmcache_v0.py` provides an example of how to run disaggregated prefill in vLLM v0. diff --git a/examples/lmcache/cpu_offload_lmcache.py b/examples/lmcache/cpu_offload_lmcache.py new file mode 100644 index 00000000000..bf191960b08 --- /dev/null +++ b/examples/lmcache/cpu_offload_lmcache.py @@ -0,0 +1,151 @@ +# SPDX-License-Identifier: Apache-2.0 +""" +This file demonstrates the example usage of cpu offloading +with LMCache in vLLM v1 or v0. + +Usage: + + Specify vLLM version + + -v v0 : Use LMCacheConnector + model = mistralai/Mistral-7B-Instruct-v0.2 + (Includes enable_chunked_prefill = True) + + -v v1 : Use LMCacheConnectorV1 (default) + model = meta-llama/Meta-Llama-3.1-8B-Instruct + (Without enable_chunked_prefill) + +Note that `lmcache` is needed to run this example. +Requirements: Linux, Python: 3.10 or higher, CUDA: 12.1 +Learn more about LMCache environment setup, please refer to: +https://docs.lmcache.ai/getting_started/installation.html +""" +import argparse +import contextlib +import os +import time +from dataclasses import asdict + +from lmcache.experimental.cache_engine import LMCacheEngineBuilder +from lmcache.integration.vllm.utils import ENGINE_NAME + +from vllm import LLM, SamplingParams +from vllm.config import KVTransferConfig +from vllm.engine.arg_utils import EngineArgs + + +def setup_environment_variables(): + # LMCache-related environment variables + # Use experimental features in LMCache + os.environ["LMCACHE_USE_EXPERIMENTAL"] = "True" + # LMCache is set to use 256 tokens per chunk + os.environ["LMCACHE_CHUNK_SIZE"] = "256" + # Enable local CPU backend in LMCache + os.environ["LMCACHE_LOCAL_CPU"] = "True" + # Set local CPU memory limit to 5.0 GB + os.environ["LMCACHE_MAX_LOCAL_CPU_SIZE"] = "5.0" + + +@contextlib.contextmanager +def build_llm_with_lmcache(lmcache_connector: str, model: str, + vllm_version: str): + ktc = KVTransferConfig( + kv_connector=lmcache_connector, + kv_role="kv_both", + ) + # Set GPU memory utilization to 0.8 for an A40 GPU with 40GB + # memory. Reduce the value if your GPU has less memory. + # Note: LMCache supports chunked prefill (see vLLM#14505, LMCache#392). + if vllm_version == "v0": + llm_args = EngineArgs( + model=model, + kv_transfer_config=ktc, + max_model_len=8000, + gpu_memory_utilization=0.8, + enable_chunked_prefill=True, # Only in v0 + ) + else: + llm_args = EngineArgs( + model=model, + kv_transfer_config=ktc, + max_model_len=8000, + gpu_memory_utilization=0.8, + ) + + llm = LLM(**asdict(llm_args)) + try: + yield llm + finally: + # Clean up lmcache backend + LMCacheEngineBuilder.destroy(ENGINE_NAME) + + +def print_output( + llm: LLM, + prompt: list[str], + sampling_params: SamplingParams, + req_str: str, +): + # Should be able to see logs like the following: + # `LMCache INFO: Storing KV cache for 6006 out of 6006 tokens for request 0` + # This indicates that the KV cache has been stored in LMCache. + start = time.time() + outputs = llm.generate(prompt, sampling_params) + print("-" * 50) + for output in outputs: + generated_text = output.outputs[0].text + print(f"Generated text: {generated_text!r}") + print(f"Generation took {time.time() - start:.2f} seconds, " + f"{req_str} request done.") + print("-" * 50) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-v", + "--version", + choices=["v0", "v1"], + default="v1", + help="Specify vLLM version (default: v1)") + return parser.parse_args() + + +def main(): + args = parse_args() + + if args.version == "v0": + lmcache_connector = "LMCacheConnector" + model = "mistralai/Mistral-7B-Instruct-v0.2" + else: + lmcache_connector = "LMCacheConnectorV1" + model = "meta-llama/Meta-Llama-3.1-8B-Instruct" + + setup_environment_variables() + + with build_llm_with_lmcache(lmcache_connector, model, args.version) as llm: + + # This example script runs two requests with a shared prefix. + # Define the shared prompt and specific prompts + shared_prompt = "Hello, how are you?" * 1000 + first_prompt = [ + shared_prompt + "Hello, my name is", + ] + second_prompt = [ + shared_prompt + "Tell me a very long story", + ] + + sampling_params = SamplingParams(temperature=0, + top_p=0.95, + max_tokens=10) + + # Print the first output + print_output(llm, first_prompt, sampling_params, "first") + + time.sleep(1) + + # print the second output + print_output(llm, second_prompt, sampling_params, "second") + + +if __name__ == "__main__": + main() diff --git a/examples/offline_inference/disaggregated_prefill_lmcache.py b/examples/lmcache/disagg_prefill_lmcache_v0.py similarity index 100% rename from examples/offline_inference/disaggregated_prefill_lmcache.py rename to examples/lmcache/disagg_prefill_lmcache_v0.py diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml b/examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml new file mode 100644 index 00000000000..c3f5a0ae69c --- /dev/null +++ b/examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-decoder-config.yaml @@ -0,0 +1,13 @@ +local_cpu: False +max_local_cpu_size: 0 +#local_disk: +max_local_disk_size: 0 +remote_serde: NULL + +enable_nixl: True +nixl_role: "receiver" +nixl_peer_host: "localhost" +nixl_peer_port: 55555 +nixl_buffer_size: 1073741824 # 1GB +nixl_buffer_device: "cuda" +nixl_enable_gc: True diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml b/examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml new file mode 100644 index 00000000000..8b0e82958a6 --- /dev/null +++ b/examples/lmcache/disagg_prefill_lmcache_v1/configs/lmcache-prefiller-config.yaml @@ -0,0 +1,13 @@ +local_cpu: False +max_local_cpu_size: 0 +#local_disk: +max_local_disk_size: 0 +remote_serde: NULL + +enable_nixl: True +nixl_role: "sender" +nixl_peer_host: "localhost" +nixl_peer_port: 55555 +nixl_buffer_size: 1073741824 # 1GB +nixl_buffer_device: "cuda" +nixl_enable_gc: True diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh b/examples/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh new file mode 100644 index 00000000000..df8a4129350 --- /dev/null +++ b/examples/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +echo "Warning: LMCache disaggregated prefill support for vLLM v1 is experimental and subject to change." + + +PIDS=() + +# Switch to the directory of the current script +cd "$(dirname "${BASH_SOURCE[0]}")" + +check_hf_token() { + if [ -z "$HF_TOKEN" ]; then + echo "HF_TOKEN is not set. Please set it to your Hugging Face token." + exit 1 + fi + if [[ "$HF_TOKEN" != hf_* ]]; then + echo "HF_TOKEN is not a valid Hugging Face token. Please set it to your Hugging Face token." + exit 1 + fi + echo "HF_TOKEN is set and valid." +} + +check_num_gpus() { + # can you check if the number of GPUs are >=2 via nvidia-smi? + num_gpus=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) + if [ "$num_gpus" -lt 2 ]; then + echo "You need at least 2 GPUs to run disaggregated prefill." + exit 1 + else + echo "Found $num_gpus GPUs." + fi +} + +ensure_python_library_installed() { + echo "Checking if $1 is installed..." + python -c "import $1" > /dev/null 2>&1 + if [ $? -ne 0 ]; then + if [ "$1" == "nixl" ]; then + echo "$1 is not installed. Please refer to https://github.com/ai-dynamo/nixl for installation." + else + echo "$1 is not installed. Please install it via pip install $1." + fi + exit 1 + else + echo "$1 is installed." + fi +} + +cleanup() { + echo "Stopping everything…" + trap - INT TERM # prevent re-entrancy + kill -- -$$ # negative PID == “this whole process-group” + wait # reap children so we don't leave zombies + exit 0 +} + +wait_for_server() { + local port=$1 + local timeout_seconds=1200 + local start_time=$(date +%s) + + echo "Waiting for server on port $port..." + + while true; do + if curl -s "localhost:${port}/v1/completions" > /dev/null; then + return 0 + fi + + local now=$(date +%s) + if (( now - start_time >= timeout_seconds )); then + echo "Timeout waiting for server" + return 1 + fi + + sleep 1 + done +} + + +main() { + check_hf_token + check_num_gpus + ensure_python_library_installed lmcache + ensure_python_library_installed nixl + ensure_python_library_installed pandas + ensure_python_library_installed datasets + ensure_python_library_installed vllm + + trap cleanup INT + trap cleanup USR1 + trap cleanup TERM + + echo "Launching prefiller, decoder and proxy..." + echo "Please check prefiller.log, decoder.log and proxy.log for logs." + + bash disagg_vllm_launcher.sh prefiller \ + > >(tee prefiller.log) 2>&1 & + prefiller_pid=$! + PIDS+=($prefiller_pid) + + bash disagg_vllm_launcher.sh decoder \ + > >(tee decoder.log) 2>&1 & + decoder_pid=$! + PIDS+=($decoder_pid) + + python3 disagg_proxy_server.py \ + --host localhost \ + --port 9000 \ + --prefiller-host localhost \ + --prefiller-port 8100 \ + --decoder-host localhost \ + --decoder-port 8200 \ + > >(tee proxy.log) 2>&1 & + proxy_pid=$! + PIDS+=($proxy_pid) + + wait_for_server 8100 + wait_for_server 8200 + wait_for_server 9000 + + echo "All servers are up. Starting benchmark..." + + # begin benchmark + cd ../../../benchmarks/ + python benchmark_serving.py --port 9000 --seed $(date +%s) \ + --model meta-llama/Llama-3.1-8B-Instruct \ + --dataset-name random --random-input-len 7500 --random-output-len 200 \ + --num-prompts 200 --burstiness 100 --request-rate 3.6 | tee benchmark.log + + echo "Benchmarking done. Cleaning up..." + + cleanup + +} + +main \ No newline at end of file diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py b/examples/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py new file mode 100644 index 00000000000..8db93bc8931 --- /dev/null +++ b/examples/lmcache/disagg_prefill_lmcache_v1/disagg_proxy_server.py @@ -0,0 +1,193 @@ +# SPDX-License-Identifier: Apache-2.0 + +import argparse +import os +import time +from contextlib import asynccontextmanager + +import httpx +import numpy as np +from fastapi import FastAPI, Request +from fastapi.responses import StreamingResponse + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Lifespan context manager to handle startup and shutdown events. + """ + # Startup: Initialize clients + prefiller_base_url = f'http://{global_args.prefiller_host}:{global_args.prefiller_port}/v1' + decoder_base_url = f'http://{global_args.decoder_host}:{global_args.decoder_port}/v1' + + app.state.prefill_client = httpx.AsyncClient(timeout=None, + base_url=prefiller_base_url) + app.state.decode_client = httpx.AsyncClient(timeout=None, + base_url=decoder_base_url) + + yield + + # Shutdown: Close clients + await app.state.prefill_client.aclose() + await app.state.decode_client.aclose() + + +# Update FastAPI app initialization to use lifespan +app = FastAPI(lifespan=lifespan) + + +class StatsCalculator: + + def __init__(self): + self._stats = [] + self._last_log_time = time.time() + + def add(self, value): + self._stats.append(value) + if time.time() - self._last_log_time > 5: + self._log_stats() + self._last_log_time = time.time() + + def _log_stats(self): + # Print average, median, and 99th percentile + np_arr = np.array(self._stats) + output_str = f"\nNum requests: {len(self._stats)}" + \ + "\nPrefill node TTFT stats:" + \ + f"\n - Average (ms): {np.mean(np_arr)}" + \ + f"\n - Median (ms): {np.median(np_arr)}" + \ + f"\n - 99th Percentile (ms): {np.percentile(np_arr, 99)}\n" + print("===============================", output_str, + "===============================") + + +stats_calculator = StatsCalculator() +counter = 0 + + +def parse_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--host", type=str, default="localhost") + parser.add_argument("--prefiller-host", type=str, default="localhost") + parser.add_argument("--prefiller-port", type=int, default=8100) + parser.add_argument("--decoder-host", type=str, default="localhost") + parser.add_argument("--decoder-port", type=int, default=8200) + args = parser.parse_args() + return args + + +# Initialize variables to hold the persistent clients +app.state.prefill_client = None +app.state.decode_client = None + + +async def send_request_to_service(client: httpx.AsyncClient, endpoint: str, + req_data: dict): + """ + Send a request to a service using a persistent client. + """ + req_data = req_data.copy() + req_data['max_tokens'] = 1 + if 'max_completion_tokens' in req_data: + req_data['max_completion_tokens'] = 1 + + headers = {"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"} + response = await client.post(endpoint, json=req_data, headers=headers) + response.raise_for_status() + return response + + +async def stream_service_response(client: httpx.AsyncClient, endpoint: str, + req_data: dict): + """ + Asynchronously stream the response from a service using a persistent client. + """ + headers = {"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"} + async with client.stream("POST", endpoint, json=req_data, + headers=headers) as response: + response.raise_for_status() + async for chunk in response.aiter_bytes(): + yield chunk + + +@app.post("/v1/completions") +async def handle_completions(request: Request): + global counter, stats_calculator + counter += 1 + + st = time.time() + try: + req_data = await request.json() + + # Send request to prefill service, ignore the response + await send_request_to_service(app.state.prefill_client, "/completions", + req_data) + + et = time.time() + stats_calculator.add(et - st) + + # Stream response from decode service + async def generate_stream(): + async for chunk in stream_service_response(app.state.decode_client, + "/completions", + req_data): + yield chunk + + return StreamingResponse(generate_stream(), + media_type="application/json") + + except Exception as e: + import sys + import traceback + exc_info = sys.exc_info() + print("Error occurred in disagg prefill proxy server" + " - completions endpoint") + print(e) + print("".join(traceback.format_exception(*exc_info))) + raise + + +@app.post("/v1/chat/completions") +async def handle_chat_completions(request: Request): + global counter, stats_calculator + counter += 1 + + st = time.time() + try: + req_data = await request.json() + + # Send request to prefill service, ignore the response + await send_request_to_service(app.state.prefill_client, + "/chat/completions", req_data) + + et = time.time() + stats_calculator.add(et - st) + + # Stream response from decode service + async def generate_stream(): + async for chunk in stream_service_response(app.state.decode_client, + "/chat/completions", + req_data): + yield chunk + + return StreamingResponse(generate_stream(), + media_type="application/json") + + except Exception as e: + import sys + import traceback + exc_info = sys.exc_info() + print("Error occurred in disagg prefill proxy server " + " - chat completions endpoint") + print(e) + print("".join(traceback.format_exception(*exc_info))) + raise + + +if __name__ == '__main__': + global global_args + global_args = parse_args() + + import uvicorn + uvicorn.run(app, host=global_args.host, port=global_args.port) diff --git a/examples/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh b/examples/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh new file mode 100644 index 00000000000..831ef0bb574 --- /dev/null +++ b/examples/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [model]" + exit 1 +fi + +if [[ $# -eq 1 ]]; then + echo "Using default model: meta-llama/Llama-3.1-8B-Instruct" + MODEL="meta-llama/Llama-3.1-8B-Instruct" +else + echo "Using model: $2" + MODEL=$2 +fi + + +if [[ $1 == "prefiller" ]]; then + # Prefiller listens on port 8100 + prefill_config_file=$SCRIPT_DIR/configs/lmcache-prefiller-config.yaml + + UCX_TLS=cuda_ipc,cuda_copy,tcp \ + LMCACHE_CONFIG_FILE=$prefill_config_file \ + LMCACHE_USE_EXPERIMENTAL=True \ + VLLM_ENABLE_V1_MULTIPROCESSING=1 \ + VLLM_WORKER_MULTIPROC_METHOD=spawn \ + CUDA_VISIBLE_DEVICES=0 \ + vllm serve $MODEL \ + --port 8100 \ + --disable-log-requests \ + --enforce-eager \ + --kv-transfer-config \ + '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_producer","kv_connector_extra_config": {"discard_partial_chunks": false, "lmcache_rpc_port": "producer1"}}' + + +elif [[ $1 == "decoder" ]]; then + # Decoder listens on port 8200 + decode_config_file=$SCRIPT_DIR/configs/lmcache-decoder-config.yaml + + UCX_TLS=cuda_ipc,cuda_copy,tcp \ + LMCACHE_CONFIG_FILE=$decode_config_file \ + LMCACHE_USE_EXPERIMENTAL=True \ + VLLM_ENABLE_V1_MULTIPROCESSING=1 \ + VLLM_WORKER_MULTIPROC_METHOD=spawn \ + CUDA_VISIBLE_DEVICES=1 \ + vllm serve $MODEL \ + --port 8200 \ + --disable-log-requests \ + --enforce-eager \ + --kv-transfer-config \ + '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_consumer","kv_connector_extra_config": {"discard_partial_chunks": false, "lmcache_rpc_port": "consumer1"}}' + + +else + echo "Invalid role: $1" + echo "Should be either prefill, decode" + exit 1 +fi diff --git a/examples/lmcache/kv_cache_sharing_lmcache_v1.py b/examples/lmcache/kv_cache_sharing_lmcache_v1.py new file mode 100644 index 00000000000..af1b4351dd5 --- /dev/null +++ b/examples/lmcache/kv_cache_sharing_lmcache_v1.py @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: Apache-2.0 +""" +This file demonstrates the example usage of remote KV cache sharing +with LMCache. +We will launch 2 vllm instances, and launch an additional LMCache server. +KV cache is transferred in the following manner: +(1) vLLM instance 1 -> LMCache server (KV cache store). +(2) LMCache server -> vLLM instance 2 (KV cache reuse/retrieve). + +Note that lmcache needs to be installed to run this example. +Learn more about LMCache in https://github.com/LMCache/LMCache. +""" +import os +import subprocess +import time +from multiprocessing import Event, Process + +from lmcache.experimental.cache_engine import LMCacheEngineBuilder +from lmcache.integration.vllm.utils import ENGINE_NAME + +from vllm import LLM, SamplingParams +from vllm.config import KVTransferConfig + +# LMCache-related environment variables +# The port to start LMCache server +port = 8100 +# Use experimental features in LMCache +os.environ["LMCACHE_USE_EXPERIMENTAL"] = "True" +# LMCache is set to use 256 tokens per chunk +os.environ["LMCACHE_CHUNK_SIZE"] = "256" +# Disable local CPU backend in LMCache +os.environ["LMCACHE_LOCAL_CPU"] = "False" +# Set local CPU memory buffer limit to 5.0 GB +os.environ["LMCACHE_MAX_LOCAL_CPU_SIZE"] = "5.0" +# Set the remote URL for LMCache server +os.environ["LMCACHE_REMOTE_URL"] = f"lm://localhost:{port}" +# Set the serializer/deserializer between vllm and LMCache server +# `naive` indicates using raw bytes of the tensor without any compression +os.environ["LMCACHE_REMOTE_SERDE"] = "naive" + +prompts = [ + "Hello, how are you?" * 1000, +] + + +def run_store(store_done, prompts): + # We use GPU 0 for KV cache store process. + os.environ["CUDA_VISIBLE_DEVICES"] = "0" + + sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=10) + + ktc = KVTransferConfig.from_cli( + '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}') + # Set GPU memory utilization to 0.8 for an A40 GPU with 40GB + # memory. Reduce the value if your GPU has less memory. + llm = LLM(model="mistralai/Mistral-7B-Instruct-v0.2", + kv_transfer_config=ktc, + max_model_len=8000, + gpu_memory_utilization=0.8, + enforce_eager=True) + + outputs = llm.generate(prompts, sampling_params) + for output in outputs: + generated_text = output.outputs[0].text + print(f"Generated text: {generated_text!r}") + print("KV cache store is finished.") + store_done.set() + + # Clean up lmcache backend + LMCacheEngineBuilder.destroy(ENGINE_NAME) + + +def run_retrieve(store_done, prompts, timeout=1): + # We use GPU 1 for KV cache retrieve process. + os.environ["CUDA_VISIBLE_DEVICES"] = "1" + + sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=10) + + ktc = KVTransferConfig.from_cli( + '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}') + # Set GPU memory utilization to 0.8 for an A40 GPU with 40GB + # of memory. Reduce the value if your GPU has less memory. + llm = LLM(model="mistralai/Mistral-7B-Instruct-v0.2", + kv_transfer_config=ktc, + max_model_len=8000, + gpu_memory_utilization=0.8, + enforce_eager=True) + + print("Waiting for KV cache store to finish...") + store_done.wait() + time.sleep(timeout) + + outputs = llm.generate(prompts, sampling_params) + for output in outputs: + generated_text = output.outputs[0].text + print(f"Generated text: {generated_text!r}") + + # Clean up lmcache backend + LMCacheEngineBuilder.destroy(ENGINE_NAME) + + +def run_lmcache_server(port): + server_proc = subprocess.Popen([ + "python", "-m", "lmcache.experimental.server", "localhost", + str(port) + ]) + return server_proc + + +def main(): + store_done = Event() + store_process = Process(target=run_store, args=(store_done, prompts)) + retrieve_process = Process(target=run_retrieve, args=(store_done, prompts)) + lmcache_server_process = run_lmcache_server(port) + + # Start KV cache store process + store_process.start() + + # Start KV cache retrieve process + retrieve_process.start() + + # Clean up the processes + store_process.join() + retrieve_process.terminate() + lmcache_server_process.terminate() + lmcache_server_process.wait() + + +if __name__ == "__main__": + main() diff --git a/examples/offline_inference/audio_language.py b/examples/offline_inference/audio_language.py index 8f6779088e8..bab41c915c3 100644 --- a/examples/offline_inference/audio_language.py +++ b/examples/offline_inference/audio_language.py @@ -38,6 +38,37 @@ class ModelRequestData(NamedTuple): # Unless specified, these settings have been tested to work on a single L4. +# Granite Speech +def run_granite_speech(question: str, audio_count: int) -> ModelRequestData: + # NOTE - the setting in this example are somehat different than what is + # optimal for granite speech, and it is generally recommended to use beam + # search. Check the model README for suggested settings. + # https://huggingface.co/ibm-granite/granite-speech-3.3-8b + model_name = "ibm-granite/granite-speech-3.3-8b" + + engine_args = EngineArgs( + model=model_name, + trust_remote_code=True, + max_model_len=2048, + max_num_seqs=2, + enable_lora=True, + max_lora_rank=64, + limit_mm_per_prompt={"audio": audio_count}, + ) + + # The model has an audio-specific lora directly in its model dir; + # it should be enabled whenever you pass audio inputs to the model. + speech_lora_path = model_name + audio_placeholder = "<|audio|>" * audio_count + prompts = f"<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>{audio_placeholder}{question}<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>" # noqa: E501 + + return ModelRequestData( + engine_args=engine_args, + prompt=prompts, + lora_requests=[LoRARequest("speech", 1, speech_lora_path)], + ) + + # MiniCPM-O def run_minicpmo(question: str, audio_count: int) -> ModelRequestData: model_name = "openbmb/MiniCPM-o-2_6" @@ -89,7 +120,7 @@ def run_phi4mm(question: str, audio_count: int) -> ModelRequestData: engine_args = EngineArgs( model=model_path, trust_remote_code=True, - max_model_len=4096, + max_model_len=12800, max_num_seqs=2, enable_lora=True, max_lora_rank=320, @@ -130,6 +161,36 @@ def run_qwen2_audio(question: str, audio_count: int) -> ModelRequestData: ) +# Qwen2.5-Omni +def run_qwen2_5_omni(question: str, audio_count: int): + model_name = "Qwen/Qwen2.5-Omni-7B" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=5, + limit_mm_per_prompt={"audio": audio_count}, + ) + + audio_in_prompt = "".join([ + "<|audio_bos|><|AUDIO|><|audio_eos|>\n" for idx in range(audio_count) + ]) + + default_system = ( + "You are Qwen, a virtual human developed by the Qwen Team, Alibaba " + "Group, capable of perceiving auditory and visual inputs, as well as " + "generating text and speech.") + + prompt = (f"<|im_start|>system\n{default_system}<|im_end|>\n" + "<|im_start|>user\n" + f"{audio_in_prompt}{question}<|im_end|>\n" + "<|im_start|>assistant\n") + return ModelRequestData( + engine_args=engine_args, + prompt=prompt, + ) + + # Ultravox 0.5-1B def run_ultravox(question: str, audio_count: int) -> ModelRequestData: model_name = "fixie-ai/ultravox-v0_5-llama-3_2-1b" @@ -179,9 +240,11 @@ def run_whisper(question: str, audio_count: int) -> ModelRequestData: model_example_map = { + "granite_speech": run_granite_speech, "minicpmo": run_minicpmo, "phi4_mm": run_phi4mm, "qwen2_audio": run_qwen2_audio, + "qwen2_5_omni": run_qwen2_5_omni, "ultravox": run_ultravox, "whisper": run_whisper, } diff --git a/examples/offline_inference/batch_llm_inference.py b/examples/offline_inference/batch_llm_inference.py new file mode 100644 index 00000000000..6548857b6d1 --- /dev/null +++ b/examples/offline_inference/batch_llm_inference.py @@ -0,0 +1,90 @@ +# SPDX-License-Identifier: Apache-2.0 +""" +This example shows how to use Ray Data for data parallel batch inference. + +Ray Data is a data processing framework that can handle large datasets +and integrates tightly with vLLM for data-parallel inference. + +As of Ray 2.44, Ray Data has a native integration with +vLLM (under ray.data.llm). + +Ray Data provides functionality for: +* Reading and writing to cloud storage (S3, GCS, etc.) +* Automatic sharding and load-balancing across a cluster +* Optimized configuration of vLLM using continuous batching +* Compatible with tensor/pipeline parallel inference as well. + +Learn more about Ray Data's LLM integration: +https://docs.ray.io/en/latest/data/working-with-llms.html +""" +import ray +from packaging.version import Version +from ray.data.llm import build_llm_processor, vLLMEngineProcessorConfig + +assert Version(ray.__version__) >= Version( + "2.44.1"), "Ray version must be at least 2.44.1" + +# Uncomment to reduce clutter in stdout +# ray.init(log_to_driver=False) +# ray.data.DataContext.get_current().enable_progress_bars = False + +# Read one text file from S3. Ray Data supports reading multiple files +# from cloud storage (such as JSONL, Parquet, CSV, binary format). +ds = ray.data.read_text("s3://anonymous@air-example-data/prompts.txt") +print(ds.schema()) + +size = ds.count() +print(f"Size of dataset: {size} prompts") + +# Configure vLLM engine. +config = vLLMEngineProcessorConfig( + model_source="unsloth/Llama-3.1-8B-Instruct", + engine_kwargs={ + "enable_chunked_prefill": True, + "max_num_batched_tokens": 4096, + "max_model_len": 16384, + }, + concurrency=1, # set the number of parallel vLLM replicas + batch_size=64, +) + +# Create a Processor object, which will be used to +# do batch inference on the dataset +vllm_processor = build_llm_processor( + config, + preprocess=lambda row: dict( + messages=[{ + "role": "system", + "content": "You are a bot that responds with haikus." + }, { + "role": "user", + "content": row["text"] + }], + sampling_params=dict( + temperature=0.3, + max_tokens=250, + )), + postprocess=lambda row: dict( + answer=row["generated_text"], + **row # This will return all the original columns in the dataset. + ), +) + +ds = vllm_processor(ds) + +# Peek first 10 results. +# NOTE: This is for local testing and debugging. For production use case, +# one should write full result out as shown below. +outputs = ds.take(limit=10) + +for output in outputs: + prompt = output["prompt"] + generated_text = output["generated_text"] + print(f"Prompt: {prompt!r}") + print(f"Generated text: {generated_text!r}") + +# Write inference output data out as Parquet files to S3. +# Multiple files would be written to the output destination, +# and each task would write one or more files separately. +# +# ds.write_parquet("s3://") diff --git a/examples/offline_inference/cpu_offload_lmcache.py b/examples/offline_inference/cpu_offload_lmcache.py deleted file mode 100644 index 8211629b24e..00000000000 --- a/examples/offline_inference/cpu_offload_lmcache.py +++ /dev/null @@ -1,65 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -""" -This file demonstrates the example usage of cpu offloading -with LMCache. - -Note that `pip install lmcache` is needed to run this example. -Learn more about LMCache in https://github.com/LMCache/LMCache. -""" -import os -import time - -from lmcache.experimental.cache_engine import LMCacheEngineBuilder -from lmcache.integration.vllm.utils import ENGINE_NAME - -from vllm import LLM, SamplingParams -from vllm.config import KVTransferConfig - -# LMCache-related environment variables -# Use experimental features in LMCache -os.environ["LMCACHE_USE_EXPERIMENTAL"] = "True" -# LMCache is set to use 256 tokens per chunk -os.environ["LMCACHE_CHUNK_SIZE"] = "256" -# Enable local CPU backend in LMCache -os.environ["LMCACHE_LOCAL_CPU"] = "True" -# Set local CPU memory limit to 5.0 GB -os.environ["LMCACHE_MAX_LOCAL_CPU_SIZE"] = "5.0" - -# This example script runs two requests with a shared prefix. -shared_prompt = "Hello, how are you?" * 1000 -first_prompt = [ - shared_prompt + "Hello, my name is", -] -second_prompt = [ - shared_prompt + "Tell me a very long story", -] - -sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=10) - -ktc = KVTransferConfig.from_cli( - '{"kv_connector":"LMCacheConnector", "kv_role":"kv_both"}') -# Set GPU memory utilization to 0.8 for an A40 GPU with 40GB -# memory. Reduce the value if your GPU has less memory. -# Note that LMCache is not compatible with chunked prefill for now. -llm = LLM(model="mistralai/Mistral-7B-Instruct-v0.2", - kv_transfer_config=ktc, - max_model_len=8000, - enable_chunked_prefill=False, - gpu_memory_utilization=0.8) - -outputs = llm.generate(first_prompt, sampling_params) -for output in outputs: - generated_text = output.outputs[0].text - print(f"Generated text: {generated_text!r}") -print("First request done.") - -time.sleep(1) - -outputs = llm.generate(second_prompt, sampling_params) -for output in outputs: - generated_text = output.outputs[0].text - print(f"Generated text: {generated_text!r}") -print("Second request done.") - -# Clean up lmcache backend -LMCacheEngineBuilder.destroy(ENGINE_NAME) diff --git a/examples/offline_inference/disaggregated-prefill-v1/decode_example.py b/examples/offline_inference/disaggregated-prefill-v1/decode_example.py new file mode 100644 index 00000000000..66efbc0c9de --- /dev/null +++ b/examples/offline_inference/disaggregated-prefill-v1/decode_example.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 + +from vllm import LLM, SamplingParams +from vllm.config import KVTransferConfig + +# Read prompts from output.txt +prompts = [] +try: + with open("output.txt") as f: + for line in f: + prompts.append(line.strip()) + print(f"Loaded {len(prompts)} prompts from output.txt") +except FileNotFoundError: + print("Error: output.txt file not found") + exit(-1) + +sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=10) + +llm = LLM( + model="meta-llama/Llama-3.2-1B-Instruct", + enforce_eager=True, + gpu_memory_utilization=0.8, + max_num_batched_tokens=64, + max_num_seqs=16, + kv_transfer_config=KVTransferConfig.from_cli( + '{"kv_connector":"SharedStorageConnector","kv_role":"kv_both",' + '"kv_connector_extra_config": {"shared_storage_path": "local_storage"}}' + )) #, max_model_len=2048, max_num_batched_tokens=2048) + +# 1ST generation (prefill instance) +outputs = llm.generate(prompts, sampling_params) + +for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") diff --git a/examples/offline_inference/disaggregated-prefill-v1/prefill_example.py b/examples/offline_inference/disaggregated-prefill-v1/prefill_example.py new file mode 100644 index 00000000000..f7cbf6557d5 --- /dev/null +++ b/examples/offline_inference/disaggregated-prefill-v1/prefill_example.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: Apache-2.0 + +from vllm import LLM, SamplingParams +from vllm.config import KVTransferConfig + +context = "Hi " * 1000 +context2 = "Hey " * 500 +prompts = [ + context + "Hello, my name is", + context + "The capital of France is", + context2 + "Your name is", + context2 + "The capital of China is", +] + +sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=1) + +llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct", + enforce_eager=True, + gpu_memory_utilization=0.8, + kv_transfer_config=KVTransferConfig.from_cli( + '{"kv_connector":"SharedStorageConnector","kv_role":"kv_both", ' + '"kv_connector_extra_config": ' + '{"shared_storage_path": "local_storage"}}') + ) #, max_model_len=2048, max_num_batched_tokens=2048) + +# 1ST generation (prefill instance) +outputs = llm.generate( + prompts, + sampling_params, +) + +new_prompts = [] +for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + new_prompts.append(prompt + generated_text) + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + +# Write new_prompts to output.txt +with open("output.txt", "w") as f: + for prompt in new_prompts: + f.write(prompt + "\n") +print(f"Saved {len(new_prompts)} prompts to output.txt") diff --git a/examples/offline_inference/disaggregated-prefill-v1/run.sh b/examples/offline_inference/disaggregated-prefill-v1/run.sh new file mode 100644 index 00000000000..0ebf45a1586 --- /dev/null +++ b/examples/offline_inference/disaggregated-prefill-v1/run.sh @@ -0,0 +1,5 @@ +rm -rf local_storage/ +rm output.txt + +VLLM_ENABLE_V1_MULTIPROCESSING=0 CUDA_VISIBLE_DEVICES=0 python3 prefill_example.py +VLLM_ENABLE_V1_MULTIPROCESSING=0 CUDA_VISIBLE_DEVICES=0 python3 decode_example.py diff --git a/examples/offline_inference/distributed.py b/examples/offline_inference/distributed.py deleted file mode 100644 index e890c6dad8b..00000000000 --- a/examples/offline_inference/distributed.py +++ /dev/null @@ -1,109 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -""" -This example shows how to use Ray Data for running offline batch inference -distributively on a multi-nodes cluster. - -Learn more about Ray Data in https://docs.ray.io/en/latest/data/data.html -""" - -from typing import Any - -import numpy as np -import ray -from packaging.version import Version -from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy - -from vllm import LLM, SamplingParams - -assert Version(ray.__version__) >= Version( - "2.22.0"), "Ray version must be at least 2.22.0" - -# Create a sampling params object. -sampling_params = SamplingParams(temperature=0.8, top_p=0.95) - -# Set tensor parallelism per instance. -tensor_parallel_size = 1 - -# Set number of instances. Each instance will use tensor_parallel_size GPUs. -num_instances = 1 - - -# Create a class to do batch inference. -class LLMPredictor: - - def __init__(self): - # Create an LLM. - self.llm = LLM(model="meta-llama/Llama-2-7b-chat-hf", - tensor_parallel_size=tensor_parallel_size) - - def __call__(self, batch: dict[str, np.ndarray]) -> dict[str, list]: - # Generate texts from the prompts. - # The output is a list of RequestOutput objects that contain the prompt, - # generated text, and other information. - outputs = self.llm.generate(batch["text"], sampling_params) - prompt: list[str] = [] - generated_text: list[str] = [] - for output in outputs: - prompt.append(output.prompt) - generated_text.append(' '.join([o.text for o in output.outputs])) - return { - "prompt": prompt, - "generated_text": generated_text, - } - - -# Read one text file from S3. Ray Data supports reading multiple files -# from cloud storage (such as JSONL, Parquet, CSV, binary format). -ds = ray.data.read_text("s3://anonymous@air-example-data/prompts.txt") - - -# For tensor_parallel_size > 1, we need to create placement groups for vLLM -# to use. Every actor has to have its own placement group. -def scheduling_strategy_fn(): - # One bundle per tensor parallel worker - pg = ray.util.placement_group( - [{ - "GPU": 1, - "CPU": 1 - }] * tensor_parallel_size, - strategy="STRICT_PACK", - ) - return dict(scheduling_strategy=PlacementGroupSchedulingStrategy( - pg, placement_group_capture_child_tasks=True)) - - -resources_kwarg: dict[str, Any] = {} -if tensor_parallel_size == 1: - # For tensor_parallel_size == 1, we simply set num_gpus=1. - resources_kwarg["num_gpus"] = 1 -else: - # Otherwise, we have to set num_gpus=0 and provide - # a function that will create a placement group for - # each instance. - resources_kwarg["num_gpus"] = 0 - resources_kwarg["ray_remote_args_fn"] = scheduling_strategy_fn - -# Apply batch inference for all input data. -ds = ds.map_batches( - LLMPredictor, - # Set the concurrency to the number of LLM instances. - concurrency=num_instances, - # Specify the batch size for inference. - batch_size=32, - **resources_kwarg, -) - -# Peek first 10 results. -# NOTE: This is for local testing and debugging. For production use case, -# one should write full result out as shown below. -outputs = ds.take(limit=10) -for output in outputs: - prompt = output["prompt"] - generated_text = output["generated_text"] - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - -# Write inference output data out as Parquet files to S3. -# Multiple files would be written to the output destination, -# and each task would write one or more files separately. -# -# ds.write_parquet("s3://") diff --git a/examples/offline_inference/eagle.py b/examples/offline_inference/eagle.py index c7b4368c9b1..91e2f68ecff 100644 --- a/examples/offline_inference/eagle.py +++ b/examples/offline_inference/eagle.py @@ -36,6 +36,10 @@ def parse_args(): help="downloaded from the eagle repo " \ "https://github.com/SafeAILab/EAGLE/blob/main/eagle/data/" ) + parser.add_argument("--method", + type=str, + default='eagle', + choices=['eagle', 'eagle3']) parser.add_argument("--max_num_seqs", type=int, default=8) parser.add_argument("--num_prompts", type=int, default=80) parser.add_argument("--num_spec_tokens", type=int, default=2) @@ -52,8 +56,14 @@ def main(): args = parse_args() - model_dir = "meta-llama/Meta-Llama-3-8B-Instruct" - eagle_dir = "abhigoyal/EAGLE-LLaMA3-Instruct-8B-vllm" + model_dir = "meta-llama/Llama-3.1-8B-Instruct" + + if args.method == 'eagle': + eagle_dir = "yuhuili/EAGLE-LLaMA3.1-Instruct-8B" + elif args.method == 'eagle3': + eagle_dir = "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B" + else: + raise ValueError(f"unknown method: {args.method}") max_model_len = 2048 @@ -81,7 +91,7 @@ def main(): max_num_seqs=args.max_num_seqs, gpu_memory_utilization=0.8, speculative_config={ - "method": "eagle", + "method": args.method, "model": eagle_dir, "num_speculative_tokens": args.num_spec_tokens, "draft_tensor_parallel_size": args.draft_tp, @@ -95,6 +105,9 @@ def main(): outputs = llm.generate(prompt_token_ids=prompt_ids, sampling_params=sampling_params) + if not hasattr(outputs, "metrics") or outputs.metrics is None: + return + # calculate the average number of accepted tokens per forward pass, +1 is # to account for the token from the target model that's always going to be # accepted @@ -109,6 +122,11 @@ def main(): {sum(acceptance_counts) / acceptance_counts[0]:.2f}") print("-" * 50) + # print acceptance at each token position + for i in range(len(acceptance_counts)): + print(f"acceptance at token {i}:" + f"{acceptance_counts[i] / (acceptance_counts[0]):.2f}") + if __name__ == "__main__": main() diff --git a/examples/offline_inference/encoder_decoder_multimodal.py b/examples/offline_inference/encoder_decoder_multimodal.py index 61e5f5eae4e..2883c37ca23 100644 --- a/examples/offline_inference/encoder_decoder_multimodal.py +++ b/examples/offline_inference/encoder_decoder_multimodal.py @@ -22,7 +22,7 @@ class ModelRequestData(NamedTuple): def run_florence2(): engine_args = EngineArgs( model="microsoft/Florence-2-large", - tokenizer="facebook/bart-large", + tokenizer="Isotr0py/Florence-2-tokenizer", max_num_seqs=8, trust_remote_code=True, limit_mm_per_prompt={"image": 1}, @@ -165,6 +165,7 @@ def main(args): temperature=0, top_p=1.0, max_tokens=64, + skip_special_tokens=False, ) start = time.time() diff --git a/examples/offline_inference/llm_engine_example.py b/examples/offline_inference/llm_engine_example.py index abff90d1c0c..d84cd9ee9f5 100644 --- a/examples/offline_inference/llm_engine_example.py +++ b/examples/offline_inference/llm_engine_example.py @@ -50,6 +50,13 @@ def initialize_engine(args: argparse.Namespace) -> LLMEngine: return LLMEngine.from_engine_args(engine_args) +def parse_args(): + parser = FlexibleArgumentParser( + description='Demo on using the LLMEngine class directly') + parser = EngineArgs.add_cli_args(parser) + return parser.parse_args() + + def main(args: argparse.Namespace): """Main function that sets up and runs the prompt processing.""" engine = initialize_engine(args) @@ -58,8 +65,5 @@ def main(args: argparse.Namespace): if __name__ == '__main__': - parser = FlexibleArgumentParser( - description='Demo on using the LLMEngine class directly') - parser = EngineArgs.add_cli_args(parser) - args = parser.parse_args() + args = parse_args() main(args) diff --git a/examples/offline_inference/mistral-small.py b/examples/offline_inference/mistral-small.py index 9bb66fdbc45..37c3181dc5f 100644 --- a/examples/offline_inference/mistral-small.py +++ b/examples/offline_inference/mistral-small.py @@ -16,11 +16,11 @@ # # Mistral format # vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 \ # --tokenizer-mode mistral --config-format mistral --load-format mistral \ -# --limit-mm-per-prompt 'image=4' --max-model-len 16384 +# --limit-mm-per-prompt '{"image":4}' --max-model-len 16384 # # # HF format # vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 \ -# --limit-mm-per-prompt 'image=4' --max-model-len 16384 +# --limit-mm-per-prompt '{"image":4}' --max-model-len 16384 # ``` # # - Client: @@ -62,6 +62,7 @@ def run_simple_demo(args: argparse.Namespace): tokenizer_mode="mistral" if args.format == "mistral" else "auto", config_format="mistral" if args.format == "mistral" else "auto", load_format="mistral" if args.format == "mistral" else "auto", + limit_mm_per_prompt={"image": 1}, max_model_len=4096, max_num_seqs=2, tensor_parallel_size=2, diff --git a/examples/offline_inference/profiling.py b/examples/offline_inference/profiling.py index 9c818d07573..99303950d39 100644 --- a/examples/offline_inference/profiling.py +++ b/examples/offline_inference/profiling.py @@ -14,7 +14,7 @@ from vllm import LLM, SamplingParams from vllm.engine.arg_utils import EngineArgs -from vllm.profiler import layerwise_profile +from vllm.profiler.layerwise_profile import layerwise_profile from vllm.utils import FlexibleArgumentParser BATCH_SIZE_DEFAULT = 1 diff --git a/examples/offline_inference/qwen2_5_omni/README.md b/examples/offline_inference/qwen2_5_omni/README.md new file mode 100644 index 00000000000..c30541a598c --- /dev/null +++ b/examples/offline_inference/qwen2_5_omni/README.md @@ -0,0 +1,32 @@ +# Qwen2.5-Omni Offline Inference Examples + +This folder provides several example scripts on how to inference Qwen2.5-Omni offline. + +## Thinker Only + +```bash +# Audio + image + video +python examples/offline_inference/qwen2_5_omni/only_thinker.py -q mixed_modalities + +# Read vision and audio inputs from a single video file +# NOTE: V1 engine does not support interleaved modalities yet. +VLLM_USE_V1=0 python examples/offline_inference/qwen2_5_omni/only_thinker.py -q use_audio_in_video + +# Multiple audios +VLLM_USE_V1=0 python examples/offline_inference/qwen2_5_omni/only_thinker.py -q multi_audios +``` + +This script will run the thinker part of Qwen2.5-Omni, and generate text response. + +You can also test Qwen2.5-Omni on a single modality: + +```bash +# Process audio inputs +python examples/offline_inference/audio_language.py --model-type qwen2_5_omni + +# Process image inputs +python examples/offline_inference/vision_language.py --modality image --model-type qwen2_5_omni + +# Process video inputs +python examples/offline_inference/vision_language.py --modality video --model-type qwen2_5_omni +``` diff --git a/examples/offline_inference/qwen2_5_omni/only_thinker.py b/examples/offline_inference/qwen2_5_omni/only_thinker.py new file mode 100644 index 00000000000..c2c28d5ae6a --- /dev/null +++ b/examples/offline_inference/qwen2_5_omni/only_thinker.py @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: Apache-2.0 +""" +This example shows how to use vLLM for running offline inference +with the correct prompt format on Qwen2.5-Omni (thinker only). +""" + +from typing import NamedTuple + +import vllm.envs as envs +from vllm import LLM, SamplingParams +from vllm.assets.audio import AudioAsset +from vllm.assets.image import ImageAsset +from vllm.assets.video import VideoAsset +from vllm.utils import FlexibleArgumentParser + + +class QueryResult(NamedTuple): + inputs: dict + limit_mm_per_prompt: dict[str, int] + + +# NOTE: The default `max_num_seqs` and `max_model_len` may result in OOM on +# lower-end GPUs. +# Unless specified, these settings have been tested to work on a single L4. + +default_system = ( + "You are Qwen, a virtual human developed by the Qwen Team, Alibaba " + "Group, capable of perceiving auditory and visual inputs, as well as " + "generating text and speech.") + + +def get_mixed_modalities_query() -> QueryResult: + question = ("What is recited in the audio? " + "What is the content of this image? Why is this video funny?") + prompt = (f"<|im_start|>system\n{default_system}<|im_end|>\n" + "<|im_start|>user\n<|audio_bos|><|AUDIO|><|audio_eos|>" + "<|vision_bos|><|IMAGE|><|vision_eos|>" + "<|vision_bos|><|VIDEO|><|vision_eos|>" + f"{question}<|im_end|>\n" + f"<|im_start|>assistant\n") + return QueryResult( + inputs={ + "prompt": prompt, + "multi_modal_data": { + "audio": + AudioAsset("mary_had_lamb").audio_and_sample_rate, + "image": + ImageAsset("cherry_blossom").pil_image.convert("RGB"), + "video": + VideoAsset(name="baby_reading", num_frames=16).np_ndarrays, + }, + }, + limit_mm_per_prompt={ + "audio": 1, + "image": 1, + "video": 1 + }, + ) + + +def get_use_audio_in_video_query() -> QueryResult: + question = ("Describe the content of the video, " + "then convert what the baby say into text.") + prompt = (f"<|im_start|>system\n{default_system}<|im_end|>\n" + "<|im_start|>user\n<|vision_bos|><|VIDEO|><|vision_eos|>" + f"{question}<|im_end|>\n" + f"<|im_start|>assistant\n") + asset = VideoAsset(name="baby_reading", num_frames=16) + audio = asset.get_audio(sampling_rate=16000) + assert not envs.VLLM_USE_V1, ("V1 does not support use_audio_in_video. " + "Please launch this example with " + "`VLLM_USE_V1=0`.") + return QueryResult( + inputs={ + "prompt": prompt, + "multi_modal_data": { + "video": asset.np_ndarrays, + "audio": audio, + }, + "mm_processor_kwargs": { + "use_audio_in_video": True, + }, + }, + limit_mm_per_prompt={ + "audio": 1, + "video": 1 + }, + ) + + +def get_multi_audios_query() -> QueryResult: + question = "Are these two audio clips the same?" + prompt = (f"<|im_start|>system\n{default_system}<|im_end|>\n" + "<|im_start|>user\n<|audio_bos|><|AUDIO|><|audio_eos|>" + "<|audio_bos|><|AUDIO|><|audio_eos|>" + f"{question}<|im_end|>\n" + f"<|im_start|>assistant\n") + return QueryResult( + inputs={ + "prompt": prompt, + "multi_modal_data": { + "audio": [ + AudioAsset("winning_call").audio_and_sample_rate, + AudioAsset("mary_had_lamb").audio_and_sample_rate, + ], + }, + }, + limit_mm_per_prompt={ + "audio": 2, + }, + ) + + +query_map = { + "mixed_modalities": get_mixed_modalities_query, + "use_audio_in_video": get_use_audio_in_video_query, + "multi_audios": get_multi_audios_query, +} + + +def main(args): + model_name = "Qwen/Qwen2.5-Omni-7B" + query_result = query_map[args.query_type]() + + llm = LLM(model=model_name, + max_model_len=5632, + max_num_seqs=5, + limit_mm_per_prompt=query_result.limit_mm_per_prompt, + seed=args.seed) + + # We set temperature to 0.2 so that outputs can be different + # even when all prompts are identical when running batch inference. + sampling_params = SamplingParams(temperature=0.2, max_tokens=64) + + outputs = llm.generate(query_result.inputs, + sampling_params=sampling_params) + + for o in outputs: + generated_text = o.outputs[0].text + print(generated_text) + + +if __name__ == "__main__": + parser = FlexibleArgumentParser( + description='Demo on using vLLM for offline inference with ' + 'audio language models') + parser.add_argument('--query-type', + '-q', + type=str, + default="mixed_modalities", + choices=query_map.keys(), + help='Query type.') + parser.add_argument("--seed", + type=int, + default=None, + help="Set the seed when initializing `vllm.LLM`.") + + args = parser.parse_args() + main(args) diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py index 6b533346ac3..aca11f5c50b 100644 --- a/examples/offline_inference/vision_language.py +++ b/examples/offline_inference/vision_language.py @@ -150,7 +150,7 @@ def run_florence2(questions: list[str], modality: str) -> ModelRequestData: engine_args = EngineArgs( model="microsoft/Florence-2-large", - tokenizer="facebook/bart-large", + tokenizer="Isotr0py/Florence-2-tokenizer", max_model_len=4096, max_num_seqs=2, trust_remote_code=True, @@ -376,9 +376,9 @@ def run_kimi_vl(questions: list[str], modality: str) -> ModelRequestData: engine_args = EngineArgs( model="moonshotai/Kimi-VL-A3B-Instruct", - max_model_len=4096, - disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache, trust_remote_code=True, + max_model_len=4096, + limit_mm_per_prompt={"image": 1}, ) return ModelRequestData( @@ -725,6 +725,34 @@ def run_nvlm_d(questions: list[str], modality: str) -> ModelRequestData: ) +# Ovis2 +def run_ovis2(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + model_name = "AIDC-AI/Ovis2-1B" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=2, + trust_remote_code=True, + dtype="half", + hf_overrides={"architectures": ["Ovis2ForConditionalGeneration"]}, + limit_mm_per_prompt={"image": 1}, + ) + + placeholder = "\n" + prompts = [("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n{placeholder}" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n") for question in questions] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + # PaliGemma def run_paligemma(questions: list[str], modality: str) -> ModelRequestData: assert modality == "image" @@ -814,10 +842,13 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData: engine_args = EngineArgs( model=model_path, trust_remote_code=True, - max_model_len=4096, + max_model_len=5120, max_num_seqs=2, + max_num_batched_tokens=12800, enable_lora=True, max_lora_rank=320, + # Note - mm_processor_kwargs can also be passed to generate/chat calls + mm_processor_kwargs={"dynamic_hd": 16}, limit_mm_per_prompt={"image": 1}, ) @@ -941,6 +972,42 @@ def run_qwen2_5_vl(questions: list[str], modality: str) -> ModelRequestData: ) +# Qwen2.5-Omni +def run_qwen2_5_omni(questions: list[str], modality: str): + model_name = "Qwen/Qwen2.5-Omni-7B" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=5, + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + "fps": [1], + }, + limit_mm_per_prompt={"image": 1}, + ) + + if modality == "image": + placeholder = "<|IMAGE|>" + elif modality == "video": + placeholder = "<|VIDEO|>" + + default_system = ( + "You are Qwen, a virtual human developed by the Qwen Team, Alibaba " + "Group, capable of perceiving auditory and visual inputs, as well as " + "generating text and speech.") + + prompts = [(f"<|im_start|>system\n{default_system}<|im_end|>\n" + f"<|im_start|>user\n<|vision_bos|>{placeholder}<|vision_eos|>" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n") for question in questions] + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + # SkyworkR1V def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData: assert modality == "image" @@ -1002,6 +1069,7 @@ def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData: "llama4": run_llama4, "molmo": run_molmo, "NVLM_D": run_nvlm_d, + "ovis2": run_ovis2, "paligemma": run_paligemma, "paligemma2": run_paligemma2, "phi3_v": run_phi3v, @@ -1010,6 +1078,7 @@ def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData: "qwen_vl": run_qwen_vl, "qwen2_vl": run_qwen2_vl, "qwen2_5_vl": run_qwen2_5_vl, + "qwen2_5_omni": run_qwen2_5_omni, "skywork_chat": run_skyworkr1v, "smolvlm": run_smolvlm, } @@ -1040,7 +1109,7 @@ def get_multi_modal_input(args): if args.modality == "video": # Input video and question - video = VideoAsset(name="sample_demo_1.mp4", + video = VideoAsset(name="baby_reading", num_frames=args.num_frames).np_ndarrays vid_questions = ["Why is this video funny?"] diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py index 52e93896706..48d590b05b0 100644 --- a/examples/offline_inference/vision_language_multi_image.py +++ b/examples/offline_inference/vision_language_multi_image.py @@ -331,11 +331,10 @@ def load_kimi_vl(question: str, image_urls: list[str]) -> ModelRequestData: engine_args = EngineArgs( model=model_name, + trust_remote_code=True, max_model_len=4096, max_num_seqs=4, - tensor_parallel_size=1, limit_mm_per_prompt={"image": len(image_urls)}, - trust_remote_code=True, ) placeholders = [{"type": "image", "image": url} for url in image_urls] @@ -437,6 +436,34 @@ def load_nvlm_d(question: str, image_urls: list[str]) -> ModelRequestData: ) +# Ovis2 +def load_ovis2(question: str, image_urls: list[str]) -> ModelRequestData: + model_name = "AIDC-AI/Ovis2-1B" + + engine_args = EngineArgs( + model=model_name, + max_model_len=8192, + max_num_seqs=2, + trust_remote_code=True, + dtype="half", + limit_mm_per_prompt={"image": len(image_urls)}, + hf_overrides={"architectures": ["Ovis2ForConditionalGeneration"]}, + ) + + placeholder = '\n'.join( + [f'Image {i+1}: ' for i in range(len(image_urls))]) + '\n' + prompt = ("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n{placeholder}" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n") + + return ModelRequestData( + engine_args=engine_args, + prompt=prompt, + image_data=[fetch_image(url) for url in image_urls], + ) + + def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData: model_name = "mistral-community/pixtral-12b" @@ -504,11 +531,13 @@ def load_phi4mm(question: str, image_urls: list[str]) -> ModelRequestData: engine_args = EngineArgs( model=model_path, trust_remote_code=True, - max_model_len=10000, + max_model_len=4096, max_num_seqs=2, limit_mm_per_prompt={"image": len(image_urls)}, enable_lora=True, max_lora_rank=320, + # Note - mm_processor_kwargs can also be passed to generate/chat calls + mm_processor_kwargs={"dynamic_hd": 4}, ) placeholders = "".join(f"<|image_{i}|>" @@ -684,6 +713,7 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData: "mistral3": load_mistral3, "mllama": load_mllama, "NVLM_D": load_nvlm_d, + "ovis2": load_ovis2, "phi3_v": load_phi3v, "phi4_mm": load_phi4mm, "pixtral_hf": load_pixtral_hf, @@ -790,7 +820,9 @@ def parse_args(): parser.add_argument( "--num-images", "-n", - choices=list(range(1, 13)), # 12 is the max number of images + type=int, + choices=list(range(1, + len(IMAGE_URLS) + 1)), # the max number of images default=2, help="Number of images to use for the demo.") return parser.parse_args() diff --git a/examples/online_serving/chart-helm/values.yaml b/examples/online_serving/chart-helm/values.yaml index 9c48e7d061b..28dba9a6f68 100644 --- a/examples/online_serving/chart-helm/values.yaml +++ b/examples/online_serving/chart-helm/values.yaml @@ -8,7 +8,7 @@ image: # -- Image tag tag: "latest" # -- Container launch command - command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--dtype", "bfloat16", "--host", "0.0.0.0", "--port", "8000"] + command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--dtype", "float32", "--block-size", "16", "--host", "0.0.0.0", "--port", "8000"] # -- Container port containerPort: 8000 diff --git a/examples/online_serving/gradio_openai_chatbot_webserver.py b/examples/online_serving/gradio_openai_chatbot_webserver.py index 13331609eb0..314f1c5b739 100644 --- a/examples/online_serving/gradio_openai_chatbot_webserver.py +++ b/examples/online_serving/gradio_openai_chatbot_webserver.py @@ -23,10 +23,6 @@ from openai import OpenAI -def create_openai_client(api_key, base_url): - return OpenAI(api_key=api_key, base_url=base_url) - - def format_history_to_openai(history): history_openai_format = [{ "role": "system", diff --git a/examples/online_serving/kv_events.sh b/examples/online_serving/kv_events.sh new file mode 100644 index 00000000000..a111db2179f --- /dev/null +++ b/examples/online_serving/kv_events.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# This file demonstrates the KV cache event publishing +# We will launch a vllm instances configured to publish KV cache +# events and launch a simple subscriber to log those events. + +set -xe + +echo "🚧🚧 Warning: The usage of KV cache events is experimental and subject to change 🚧🚧" +sleep 1 + +MODEL_NAME=${HF_MODEL_NAME:-meta-llama/Meta-Llama-3.1-8B-Instruct} + +# Trap the SIGINT signal (triggered by Ctrl+C) +trap 'cleanup' INT + +# Cleanup function +cleanup() { + echo "Caught Ctrl+C, cleaning up..." + # Cleanup commands + pgrep python | xargs kill -9 + pkill -f python + echo "Cleanup complete. Exiting." + exit 0 +} + +export VLLM_HOST_IP=$(hostname -I | awk '{print $1}') + +# a function that waits vLLM server to start +wait_for_server() { + local port=$1 + timeout 1200 bash -c " + until curl -s localhost:${port}/v1/completions > /dev/null; do + sleep 1 + done" && return 0 || return 1 +} + +vllm serve $MODEL_NAME \ + --port 8100 \ + --max-model-len 100 \ + --enforce-eager \ + --gpu-memory-utilization 0.8 \ + --trust-remote-code \ + --kv-events-config \ + '{"enable_kv_cache_events": true, "publisher": "zmq", "topic": "kv-events"}' & + +wait_for_server 8100 + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +python3 "$SCRIPT_DIR/kv_events_subscriber.py" & +sleep 1 + +# serve two example requests +output1=$(curl -X POST -s http://localhost:8100/v1/completions \ +-H "Content-Type: application/json" \ +-d '{ +"model": "'"$MODEL_NAME"'", +"prompt": "Explain quantum computing in simple terms a 5-year-old could understand.", +"max_tokens": 80, +"temperature": 0 +}') + +output2=$(curl -X POST -s http://localhost:8100/v1/completions \ +-H "Content-Type: application/json" \ +-d '{ +"model": "'"$MODEL_NAME"'", +"prompt": "Explain quantum computing in simple terms a 50-year-old could understand.", +"max_tokens": 80, +"temperature": 0 +}') + +# Cleanup commands +pkill -9 -u "$USER" -f python +pkill -9 -u "$USER" -f vllm + +sleep 1 + +echo "Cleaned up" + +# Print the outputs of the curl requests +echo "" +echo "Output of first request: $output1" +echo "Output of second request: $output2" + +echo "🎉🎉 Successfully finished 2 test requests! 🎉🎉" +echo "" diff --git a/examples/online_serving/kv_events_subscriber.py b/examples/online_serving/kv_events_subscriber.py new file mode 100644 index 00000000000..88bbbebd747 --- /dev/null +++ b/examples/online_serving/kv_events_subscriber.py @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Optional, Union + +import msgspec +import zmq +from msgspec.msgpack import Decoder + + +# +# Types copied from vllm.distributed.kv_events +# +class EventBatch(msgspec.Struct, array_like=True, omit_defaults=True, + gc=False): + ts: float + events: list[Any] + + +class KVCacheEvent(msgspec.Struct, + array_like=True, + omit_defaults=True, + gc=False, + tag=True): + """Base class for all KV cache-related events""" + + +class BlockStored(KVCacheEvent): + block_hashes: list[int] + parent_block_hash: Optional[int] + token_ids: list[int] + block_size: int + lora_id: Optional[int] + + +class BlockRemoved(KVCacheEvent): + block_hashes: list[int] + + +class AllBlocksCleared(KVCacheEvent): + pass + + +class KVEventBatch(EventBatch): + events: list[Union[BlockStored, BlockRemoved, AllBlocksCleared]] + + +def process_event(event_batch): + print(f"Received event batch at {event_batch.ts}:") + for event in event_batch.events: + print(f" - {event}") + + +def main(): + decoder = Decoder(type=KVEventBatch) + last_seq = -1 + + context = zmq.Context() + + # Set up the main subscription socket + sub = context.socket(zmq.SUB) + sub.connect("tcp://localhost:5557") + topic = "kv-events" + sub.setsockopt_string(zmq.SUBSCRIBE, topic) + + # Initialize replay socket + replay = context.socket(zmq.REQ) + replay.connect("tcp://localhost:5558") + poller = zmq.Poller() + poller.register(replay, zmq.POLLIN) + + print("Listening for KV cache events on topic:", topic) + + while True: + try: + if sub.poll(50): + _, seq_bytes, payload = sub.recv_multipart() + seq = int.from_bytes(seq_bytes, "big") + + if last_seq >= 0 and seq > last_seq + 1: + missed = seq - last_seq - 1 + print(f"Missed {missed} messages" + f" (last: {last_seq}, current: {seq})") + + replay.send((last_seq + 1).to_bytes(8, "big")) + + while poller.poll(timeout=200): + seq_bytes, replay_payload = replay.recv_multipart() + if not replay_payload: + # End of replay marker is sent as an empty frame + # for the payload + break + + replay_seq = int.from_bytes(seq_bytes, "big") + + if replay_seq > last_seq: + event_batch = decoder.decode(replay_payload) + process_event(event_batch) + last_seq = replay_seq + if replay_seq >= seq - 1: + break + + event_batch = decoder.decode(payload) + process_event(event_batch) + + # ... do other periodic work or check for shutdown ... + + except KeyboardInterrupt: + print("Interrupted") + break + except Exception as e: + print("Error decoding message:", e) + + +if __name__ == "__main__": + main() diff --git a/examples/online_serving/openai_chat_completion_client_for_multimodal.py b/examples/online_serving/openai_chat_completion_client_for_multimodal.py index ecfcf05a90d..70db4d95e64 100644 --- a/examples/online_serving/openai_chat_completion_client_for_multimodal.py +++ b/examples/online_serving/openai_chat_completion_client_for_multimodal.py @@ -9,7 +9,7 @@ (multi-image inference with Phi-3.5-vision-instruct) vllm serve microsoft/Phi-3.5-vision-instruct --task generate \ - --trust-remote-code --max-model-len 4096 --limit-mm-per-prompt image=2 + --trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}' (audio inference with Ultravox) vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096 @@ -303,12 +303,7 @@ def run_audio() -> None: } -def main(args) -> None: - chat_type = args.chat_type - example_function_map[chat_type]() - - -if __name__ == "__main__": +def parse_args(): parser = FlexibleArgumentParser( description='Demo on using OpenAI client for online serving with ' 'multimodal language models served with vLLM.') @@ -318,5 +313,14 @@ def main(args) -> None: default="single-image", choices=list(example_function_map.keys()), help='Conversation type with multimodal data.') - args = parser.parse_args() + return parser.parse_args() + + +def main(args) -> None: + chat_type = args.chat_type + example_function_map[chat_type]() + + +if __name__ == "__main__": + args = parse_args() main(args) diff --git a/examples/online_serving/openai_chat_completion_client_with_tools.py b/examples/online_serving/openai_chat_completion_client_with_tools.py index 416fb61ca8b..c25203860ff 100644 --- a/examples/online_serving/openai_chat_completion_client_with_tools.py +++ b/examples/online_serving/openai_chat_completion_client_with_tools.py @@ -17,6 +17,7 @@ --enable-auto-tool-choice --tool-call-parser hermes """ import json +from typing import Any from openai import OpenAI @@ -24,15 +25,6 @@ openai_api_key = "EMPTY" openai_api_base = "http://localhost:8000/v1" -client = OpenAI( - # defaults to os.environ.get("OPENAI_API_KEY") - api_key=openai_api_key, - base_url=openai_api_base, -) - -models = client.models.list() -model = models.data[0].id - tools = [{ "type": "function", "function": { @@ -78,86 +70,123 @@ "Can you tell me what the temperate will be in Dallas, in fahrenheit?" }] -chat_completion = client.chat.completions.create(messages=messages, - model=model, - tools=tools) - -print("Chat completion results:") -print(chat_completion) -print("\n\n") - -tool_calls_stream = client.chat.completions.create(messages=messages, - model=model, - tools=tools, - stream=True) - -chunks = [] -for chunk in tool_calls_stream: - chunks.append(chunk) - if chunk.choices[0].delta.tool_calls: - print(chunk.choices[0].delta.tool_calls[0]) - else: - print(chunk.choices[0].delta) - -arguments = [] -tool_call_idx = -1 -for chunk in chunks: - - if chunk.choices[0].delta.tool_calls: - tool_call = chunk.choices[0].delta.tool_calls[0] - - if tool_call.index != tool_call_idx: - if tool_call_idx >= 0: - print( - f"streamed tool call arguments: {arguments[tool_call_idx]}" - ) - tool_call_idx = chunk.choices[0].delta.tool_calls[0].index - arguments.append("") - if tool_call.id: - print(f"streamed tool call id: {tool_call.id} ") - - if tool_call.function: - if tool_call.function.name: - print(f"streamed tool call name: {tool_call.function.name}") - - if tool_call.function.arguments: - arguments[tool_call_idx] += tool_call.function.arguments - -if len(arguments): - print(f"streamed tool call arguments: {arguments[-1]}") - -print("\n\n") - -messages.append({ - "role": "assistant", - "tool_calls": chat_completion.choices[0].message.tool_calls -}) - -# Now, simulate a tool call def get_current_weather(city: str, state: str, unit: 'str'): return ("The weather in Dallas, Texas is 85 degrees fahrenheit. It is " "partly cloudly, with highs in the 90's.") -available_tools = {"get_current_weather": get_current_weather} - -completion_tool_calls = chat_completion.choices[0].message.tool_calls -for call in completion_tool_calls: - tool_to_call = available_tools[call.function.name] - args = json.loads(call.function.arguments) - result = tool_to_call(**args) - print(result) +def handle_tool_calls_stream( + client: OpenAI, + messages: list[dict[str, str]], + model: str, + tools: list[dict[str, Any]], +) -> list[Any]: + tool_calls_stream = client.chat.completions.create(messages=messages, + model=model, + tools=tools, + stream=True) + chunks = [] + print("chunks: ") + for chunk in tool_calls_stream: + chunks.append(chunk) + if chunk.choices[0].delta.tool_calls: + print(chunk.choices[0].delta.tool_calls[0]) + else: + print(chunk.choices[0].delta) + return chunks + + +def handle_tool_calls_arguments(chunks: list[Any]) -> list[str]: + arguments = [] + tool_call_idx = -1 + print("arguments: ") + for chunk in chunks: + if chunk.choices[0].delta.tool_calls: + tool_call = chunk.choices[0].delta.tool_calls[0] + if tool_call.index != tool_call_idx: + if tool_call_idx >= 0: + print(f"streamed tool call arguments: " + f"{arguments[tool_call_idx]}") + tool_call_idx = chunk.choices[0].delta.tool_calls[0].index + arguments.append("") + if tool_call.id: + print(f"streamed tool call id: {tool_call.id} ") + + if tool_call.function: + if tool_call.function.name: + print( + f"streamed tool call name: {tool_call.function.name}") + + if tool_call.function.arguments: + arguments[tool_call_idx] += tool_call.function.arguments + + return arguments + + +def main(): + # Initialize OpenAI client + client = OpenAI( + # defaults to os.environ.get("OPENAI_API_KEY") + api_key=openai_api_key, + base_url=openai_api_base, + ) + + # Get available models and select one + models = client.models.list() + model = models.data[0].id + + chat_completion = client.chat.completions.create(messages=messages, + model=model, + tools=tools) + + print("-" * 70) + print("Chat completion results:") + print(chat_completion) + print("-" * 70) + + # Stream tool calls + chunks = handle_tool_calls_stream(client, messages, model, tools) + print("-" * 70) + + # Handle arguments from streamed tool calls + arguments = handle_tool_calls_arguments(chunks) + + if len(arguments): + print(f"streamed tool call arguments: {arguments[-1]}\n") + + print("-" * 70) + + # Add tool call results to the conversation messages.append({ - "role": "tool", - "content": result, - "tool_call_id": call.id, - "name": call.function.name + "role": "assistant", + "tool_calls": chat_completion.choices[0].message.tool_calls }) -chat_completion_2 = client.chat.completions.create(messages=messages, - model=model, - tools=tools, - stream=False) -print("\n\n") -print(chat_completion_2) + # Now, simulate a tool call + available_tools = {"get_current_weather": get_current_weather} + + completion_tool_calls = chat_completion.choices[0].message.tool_calls + for call in completion_tool_calls: + tool_to_call = available_tools[call.function.name] + args = json.loads(call.function.arguments) + result = tool_to_call(**args) + print("tool_to_call result: ", result) + messages.append({ + "role": "tool", + "content": result, + "tool_call_id": call.id, + "name": call.function.name + }) + + chat_completion_2 = client.chat.completions.create(messages=messages, + model=model, + tools=tools, + stream=False) + print("Chat completion2 results:") + print(chat_completion_2) + print("-" * 70) + + +if __name__ == "__main__": + main() diff --git a/examples/online_serving/openai_chat_completion_client_with_tools_required.py b/examples/online_serving/openai_chat_completion_client_with_tools_required.py index 779369d1634..97d900bb75f 100644 --- a/examples/online_serving/openai_chat_completion_client_with_tools_required.py +++ b/examples/online_serving/openai_chat_completion_client_with_tools_required.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 """ -To run this example, you can start the vLLM server +To run this example, you can start the vLLM server without any specific flags: ```bash @@ -8,7 +8,7 @@ --guided-decoding-backend outlines ``` -This example demonstrates how to generate chat completions +This example demonstrates how to generate chat completions using the OpenAI Python client library. """ @@ -18,15 +18,6 @@ openai_api_key = "EMPTY" openai_api_base = "http://localhost:8000/v1" -client = OpenAI( - # defaults to os.environ.get("OPENAI_API_KEY") - api_key=openai_api_key, - base_url=openai_api_base, -) - -models = client.models.list() -model = models.data[0].id - tools = [ { "type": "function", @@ -116,21 +107,36 @@ }, ] -chat_completion = client.chat.completions.create( - messages=messages, - model=model, - tools=tools, - tool_choice="required", - stream=True # Enable streaming response -) -for chunk in chat_completion: - if chunk.choices and chunk.choices[0].delta.tool_calls: - print(chunk.choices[0].delta.tool_calls) +def main(): + client = OpenAI( + # defaults to os.environ.get("OPENAI_API_KEY") + api_key=openai_api_key, + base_url=openai_api_base, + ) + + models = client.models.list() + model = models.data[0].id + + chat_completion = client.chat.completions.create( + messages=messages, + model=model, + tools=tools, + tool_choice="required", + stream=True # Enable streaming response + ) + + for chunk in chat_completion: + if chunk.choices and chunk.choices[0].delta.tool_calls: + print(chunk.choices[0].delta.tool_calls) + + chat_completion = client.chat.completions.create(messages=messages, + model=model, + tools=tools, + tool_choice="required") + + print(chat_completion.choices[0].message.tool_calls) -chat_completion = client.chat.completions.create(messages=messages, - model=model, - tools=tools, - tool_choice="required") -print(chat_completion.choices[0].message.tool_calls) +if __name__ == "__main__": + main() diff --git a/examples/online_serving/openai_chat_completion_structured_outputs.py b/examples/online_serving/openai_chat_completion_structured_outputs.py index 986ff500e58..9c57af1c158 100644 --- a/examples/online_serving/openai_chat_completion_structured_outputs.py +++ b/examples/online_serving/openai_chat_completion_structured_outputs.py @@ -1,43 +1,49 @@ # SPDX-License-Identifier: Apache-2.0 +""" +To run this example, you need to start the vLLM server: + +```bash +vllm serve Qwen/Qwen2.5-3B-Instruct +``` +""" from enum import Enum from openai import BadRequestError, OpenAI from pydantic import BaseModel -client = OpenAI( - base_url="http://localhost:8000/v1", - api_key="-", -) # Guided decoding by Choice (list of possible options) -completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", - messages=[{ - "role": "user", - "content": "Classify this sentiment: vLLM is wonderful!" - }], - extra_body={"guided_choice": ["positive", "negative"]}, -) -print(completion.choices[0].message.content) +def guided_choice_completion(client: OpenAI, model: str): + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": "Classify this sentiment: vLLM is wonderful!" + }], + extra_body={"guided_choice": ["positive", "negative"]}, + ) + return completion.choices[0].message.content + # Guided decoding by Regex -prompt = ("Generate an email address for Alan Turing, who works in Enigma." - "End in .com and new line. Example result:" - "alan.turing@enigma.com\n") - -completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={ - "guided_regex": "\w+@\w+\.com\n", - "stop": ["\n"] - }, -) -print(completion.choices[0].message.content) +def guided_regex_completion(client: OpenAI, model: str): + prompt = ("Generate an email address for Alan Turing, who works in Enigma." + "End in .com and new line. Example result:" + "alan.turing@enigma.com\n") + + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={ + "guided_regex": r"\w+@\w+\.com\n", + "stop": ["\n"] + }, + ) + return completion.choices[0].message.content # Guided decoding by JSON using Pydantic schema @@ -54,66 +60,101 @@ class CarDescription(BaseModel): car_type: CarType -json_schema = CarDescription.model_json_schema() - -prompt = ("Generate a JSON with the brand, model and car_type of" - "the most iconic car from the 90's") -completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={"guided_json": json_schema}, -) -print(completion.choices[0].message.content) +def guided_json_completion(client: OpenAI, model: str): + json_schema = CarDescription.model_json_schema() -# Guided decoding by Grammar -simplified_sql_grammar = """ - ?start: select_statement + prompt = ("Generate a JSON with the brand, model and car_type of" + "the most iconic car from the 90's") + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={"guided_json": json_schema}, + ) + return completion.choices[0].message.content - ?select_statement: "SELECT " column_list " FROM " table_name - ?column_list: column_name ("," column_name)* +# Guided decoding by Grammar +def guided_grammar_completion(client: OpenAI, model: str): + simplified_sql_grammar = """ + root ::= select_statement - ?table_name: identifier + select_statement ::= "SELECT " column " from " table " where " condition - ?column_name: identifier + column ::= "col_1 " | "col_2 " - ?identifier: /[a-zA-Z_][a-zA-Z0-9_]*/ -""" + table ::= "table_1 " | "table_2 " -prompt = ("Generate an SQL query to show the 'username' and 'email'" - "from the 'users' table.") -completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", - messages=[{ - "role": "user", - "content": prompt, - }], - extra_body={"guided_grammar": simplified_sql_grammar}, -) -print(completion.choices[0].message.content) + condition ::= column "= " number -# Extra backend options -prompt = ("Generate an email address for Alan Turing, who works in Enigma." - "End in .com and new line. Example result:" - "alan.turing@enigma.com\n") + number ::= "1 " | "2 " + """ -try: - # The no-fallback option forces vLLM to use xgrammar, so when it fails - # you get a 400 with the reason why + prompt = ("Generate an SQL query to show the 'username' and 'email'" + "from the 'users' table.") completion = client.chat.completions.create( - model="Qwen/Qwen2.5-3B-Instruct", + model=model, messages=[{ "role": "user", "content": prompt, }], - extra_body={ - "guided_regex": "\w+@\w+\.com\n", - "stop": ["\n"], - "guided_decoding_backend": "xgrammar:no-fallback" - }, + extra_body={"guided_grammar": simplified_sql_grammar}, ) -except BadRequestError as e: - print("This error is expected:", e) + return completion.choices[0].message.content + + +# Extra backend options +def extra_backend_options_completion(client: OpenAI, model: str): + prompt = ("Generate an email address for Alan Turing, who works in Enigma." + "End in .com and new line. Example result:" + "alan.turing@enigma.com\n") + + try: + # The guided_decoding_disable_fallback option forces vLLM to use + # xgrammar, so when it fails you get a 400 with the reason why + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "user", + "content": prompt, + }], + extra_body={ + "guided_regex": r"\w+@\w+\.com\n", + "stop": ["\n"], + "guided_decoding_backend": "xgrammar", + "guided_decoding_disable_fallback": True, + }, + ) + return completion.choices[0].message.content + except BadRequestError as e: + print("This error is expected:", e) + + +def main(): + client: OpenAI = OpenAI( + base_url="http://localhost:8000/v1", + api_key="-", + ) + + model = "Qwen/Qwen2.5-3B-Instruct" + + print("Guided Choice Completion:") + print(guided_choice_completion(client, model)) + + print("\nGuided Regex Completion:") + print(guided_regex_completion(client, model)) + + print("\nGuided JSON Completion:") + print(guided_json_completion(client, model)) + + print("\nGuided Grammar Completion:") + print(guided_grammar_completion(client, model)) + + print("\nExtra Backend Options Completion:") + print(extra_backend_options_completion(client, model)) + + +if __name__ == "__main__": + main() diff --git a/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py new file mode 100644 index 00000000000..b807bc54052 --- /dev/null +++ b/examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py @@ -0,0 +1,85 @@ +# SPDX-License-Identifier: Apache-2.0 +from openai import OpenAI + +# This example demonstrates the `structural_tag` response format. +# It can be used to specify a structured output format that occurs between +# specific tags in the response. This example shows how it could be used +# to enforce the format of a tool call response, but it could be used for +# any structured output within a subset of the response. + + +def main(): + client = OpenAI( + base_url="http://localhost:8000/v1", + api_key="-", + ) + + messages = [{ + "role": + "user", + "content": + """ +You have access to the following function to retrieve the weather in a city: + + { + "name": "get_weather", + "parameters": { + "city": { + "param_type": "string", + "description": "The city to get the weather for", + "required": True + } + } + } + +If a you choose to call a function ONLY reply in the following format: +<{start_tag}={function_name}>{parameters}{end_tag} +where + +start_tag => ` a JSON dict with the function argument name as key and function + argument value as value. +end_tag => `` + +Here is an example, +{"example_name": "example_value"} + +Reminder: +- Function calls MUST follow the specified format +- Required parameters MUST be specified +- Only call one function at a time +- Put the entire function call reply on one line +- Always add your sources when using search results to answer the user query + +You are a helpful assistant. + +Given the previous instructions, what is the weather in New York City, Boston, +and San Francisco? +""" + }] + + response = client.chat.completions.create( + model="meta-llama/Llama-3.1-8B-Instruct", + messages=messages, + response_format={ + "type": + "structural_tag", + "structures": [{ + "begin": "", + "schema": { + "type": "object", + "properties": { + "city": { + "type": "string" + } + } + }, + "end": "" + }], + "triggers": [" requests.Response: return response -if __name__ == "__main__": +def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="localhost") parser.add_argument("--port", type=int, default=8000) parser.add_argument("--model", type=str, default="BAAI/bge-reranker-v2-m3") + return parser.parse_args() + - args = parser.parse_args() +def main(args): api_url = f"http://{args.host}:{args.port}/score" model_name = args.model @@ -30,9 +32,9 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response: text_2 = "The capital of Brazil is Brasilia." prompt = {"model": model_name, "text_1": text_1, "text_2": text_2} score_response = post_http_request(prompt=prompt, api_url=api_url) - print("Prompt when text_1 and text_2 are both strings:") + print("\nPrompt when text_1 and text_2 are both strings:") pprint.pprint(prompt) - print("Score Response:") + print("\nScore Response:") pprint.pprint(score_response.json()) text_1 = "What is the capital of France?" @@ -41,9 +43,9 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response: ] prompt = {"model": model_name, "text_1": text_1, "text_2": text_2} score_response = post_http_request(prompt=prompt, api_url=api_url) - print("Prompt when text_1 is string and text_2 is a list:") + print("\nPrompt when text_1 is string and text_2 is a list:") pprint.pprint(prompt) - print("Score Response:") + print("\nScore Response:") pprint.pprint(score_response.json()) text_1 = [ @@ -54,7 +56,12 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response: ] prompt = {"model": model_name, "text_1": text_1, "text_2": text_2} score_response = post_http_request(prompt=prompt, api_url=api_url) - print("Prompt when text_1 and text_2 are both lists:") + print("\nPrompt when text_1 and text_2 are both lists:") pprint.pprint(prompt) - print("Score Response:") + print("\nScore Response:") pprint.pprint(score_response.json()) + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/examples/online_serving/openai_embedding_client.py b/examples/online_serving/openai_embedding_client.py index b7c5651e3ba..bc217f7ca7a 100644 --- a/examples/online_serving/openai_embedding_client.py +++ b/examples/online_serving/openai_embedding_client.py @@ -6,22 +6,29 @@ openai_api_key = "EMPTY" openai_api_base = "http://localhost:8000/v1" -client = OpenAI( - # defaults to os.environ.get("OPENAI_API_KEY") - api_key=openai_api_key, - base_url=openai_api_base, -) - -models = client.models.list() -model = models.data[0].id - -responses = client.embeddings.create( - input=[ - "Hello my name is", - "The best thing about vLLM is that it supports many different models" - ], - model=model, -) - -for data in responses.data: - print(data.embedding) # List of float of len 4096 + +def main(): + client = OpenAI( + # defaults to os.environ.get("OPENAI_API_KEY") + api_key=openai_api_key, + base_url=openai_api_base, + ) + + models = client.models.list() + model = models.data[0].id + + responses = client.embeddings.create( + # ruff: noqa: E501 + input=[ + "Hello my name is", + "The best thing about vLLM is that it supports many different models" + ], + model=model, + ) + + for data in responses.data: + print(data.embedding) # List of float of len 4096 + + +if __name__ == "__main__": + main() diff --git a/examples/online_serving/openai_embedding_matryoshka_fy.py b/examples/online_serving/openai_embedding_matryoshka_fy.py new file mode 100644 index 00000000000..4544dcfb5ab --- /dev/null +++ b/examples/online_serving/openai_embedding_matryoshka_fy.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Example Python client for embedding API dimensions using vLLM API server +NOTE: + start a supported Matryoshka Embeddings model server with `vllm serve`, e.g. + vllm serve jinaai/jina-embeddings-v3 --trust-remote-code +""" + +from openai import OpenAI + +# Modify OpenAI's API key and API base to use vLLM's API server. +openai_api_key = "EMPTY" +openai_api_base = "http://localhost:8000/v1" + + +def main(): + client = OpenAI( + # defaults to os.environ.get("OPENAI_API_KEY") + api_key=openai_api_key, + base_url=openai_api_base, + ) + + models = client.models.list() + model = models.data[0].id + + responses = client.embeddings.create( + input=["Follow the white rabbit."], + model=model, + dimensions=32, + ) + + for data in responses.data: + print(data.embedding) # List of float of len 32 + + +if __name__ == "__main__": + main() diff --git a/examples/online_serving/openai_pooling_client.py b/examples/online_serving/openai_pooling_client.py index e17f9c5efd6..abcfe27c276 100644 --- a/examples/online_serving/openai_pooling_client.py +++ b/examples/online_serving/openai_pooling_client.py @@ -17,7 +17,7 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response: return response -if __name__ == "__main__": +def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="localhost") parser.add_argument("--port", type=int, default=8000) @@ -25,15 +25,20 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response: type=str, default="jason9693/Qwen2.5-1.5B-apeach") - args = parser.parse_args() + return parser.parse_args() + + +def main(args): api_url = f"http://{args.host}:{args.port}/pooling" model_name = args.model # Input like Completions API prompt = {"model": model_name, "input": "vLLM is great!"} pooling_response = post_http_request(prompt=prompt, api_url=api_url) + print("-" * 50) print("Pooling Response:") pprint.pprint(pooling_response.json()) + print("-" * 50) # Input like Chat API prompt = { @@ -50,3 +55,9 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response: pooling_response = post_http_request(prompt=prompt, api_url=api_url) print("Pooling Response:") pprint.pprint(pooling_response.json()) + print("-" * 50) + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/examples/online_serving/openai_transcription_client.py b/examples/online_serving/openai_transcription_client.py index 062868dd8ad..66e622672ef 100644 --- a/examples/online_serving/openai_transcription_client.py +++ b/examples/online_serving/openai_transcription_client.py @@ -26,7 +26,12 @@ def sync_openai(): model="openai/whisper-large-v3", language="en", response_format="json", - temperature=0.0) + temperature=0.0, + # Additional sampling params not provided by OpenAI API. + extra_body=dict( + seed=4419, + repetition_penalty=1.3, + )) print("transcription result:", transcription.text) @@ -41,11 +46,15 @@ async def stream_openai_response(): "model": "openai/whisper-large-v3", } url = openai_api_base + "/audio/transcriptions" + headers = {"Authorization": f"Bearer {openai_api_key}"} print("transcription result:", end=' ') async with httpx.AsyncClient() as client: with open(str(winning_call), "rb") as f: - async with client.stream('POST', url, files={'file': f}, - data=data) as response: + async with client.stream('POST', + url, + files={'file': f}, + data=data, + headers=headers) as response: async for line in response.aiter_lines(): # Each line is a JSON object prefixed with 'data: ' if line: diff --git a/examples/online_serving/ray_serve_deepseek.py b/examples/online_serving/ray_serve_deepseek.py new file mode 100644 index 00000000000..f9ef3e2da1a --- /dev/null +++ b/examples/online_serving/ray_serve_deepseek.py @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: Apache-2.0 +""" +Example to deploy DeepSeek R1 or V3 with Ray Serve LLM. +See Ray Serve LLM documentation at: +https://docs.ray.io/en/latest/serve/llm/serving-llms.html + +Run `python3 ray_serve_deepseek.py` to deploy the model. +""" + +from ray import serve +from ray.serve.llm import LLMConfig, build_openai_app + +llm_config = LLMConfig( + model_loading_config={ + "model_id": "deepseek", + # Since DeepSeek model is huge, it is recommended to pre-download + # the model to local disk, say /path/to/the/model and specify: + # model_source="/path/to/the/model" + "model_source": "deepseek-ai/DeepSeek-R1", + }, + deployment_config={ + "autoscaling_config": { + "min_replicas": 1, + "max_replicas": 1, + } + }, + # Change to the accelerator type of the node + accelerator_type="H100", + runtime_env={"env_vars": { + "VLLM_USE_V1": "1" + }}, + # Customize engine arguments as needed (e.g. vLLM engine kwargs) + engine_kwargs={ + "tensor_parallel_size": 8, + "pipeline_parallel_size": 2, + "gpu_memory_utilization": 0.92, + "dtype": "auto", + "max_num_seqs": 40, + "max_model_len": 16384, + "enable_chunked_prefill": True, + "enable_prefix_caching": True, + "trust_remote_code": True, + }, +) + +# Deploy the application +llm_app = build_openai_app({"llm_configs": [llm_config]}) +serve.run(llm_app) diff --git a/examples/online_serving/streamlit_openai_chatbot_webserver.py b/examples/online_serving/streamlit_openai_chatbot_webserver.py new file mode 100644 index 00000000000..d8a0f211d44 --- /dev/null +++ b/examples/online_serving/streamlit_openai_chatbot_webserver.py @@ -0,0 +1,185 @@ +# SPDX-License-Identifier: Apache-2.0 +""" +vLLM Chat Assistant - A Streamlit Web Interface + +A streamlined chat interface that quickly integrates +with vLLM API server. + +Features: +- Multiple chat sessions management +- Streaming response display +- Configurable API endpoint +- Real-time chat history + +Requirements: + pip install streamlit openai + +Usage: + # Start the app with default settings + streamlit run streamlit_openai_chatbot_webserver.py + + # Start with custom vLLM API endpoint + VLLM_API_BASE="http://your-server:8000/v1" \ + streamlit run streamlit_openai_chatbot_webserver.py + + # Enable debug mode + streamlit run streamlit_openai_chatbot_webserver.py \ + --logger.level=debug +""" +import os +from datetime import datetime + +import streamlit as st +from openai import OpenAI + +# Get command line arguments from environment variables +openai_api_key = os.getenv('VLLM_API_KEY', "EMPTY") +openai_api_base = os.getenv('VLLM_API_BASE', "http://localhost:8000/v1") + +# Initialize session states for managing chat sessions +if "sessions" not in st.session_state: + st.session_state.sessions = {} + +if "current_session" not in st.session_state: + st.session_state.current_session = None + +if "messages" not in st.session_state: + st.session_state.messages = [] + +if "active_session" not in st.session_state: + st.session_state.active_session = None + +# Initialize session state for API base URL +if "api_base_url" not in st.session_state: + st.session_state.api_base_url = openai_api_base + + +def create_new_chat_session(): + """Create a new chat session with timestamp as ID""" + session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + st.session_state.sessions[session_id] = [] + st.session_state.current_session = session_id + st.session_state.active_session = session_id + st.session_state.messages = [] + + +def switch_to_chat_session(session_id): + """Switch to a different chat session""" + st.session_state.current_session = session_id + st.session_state.active_session = session_id + st.session_state.messages = st.session_state.sessions[session_id] + + +def get_llm_response(messages, model): + """Get streaming response from llm + + Args: + messages: List of message dictionaries + model: Name of model + + Returns: + Streaming response object or error message string + """ + try: + response = client.chat.completions.create(model=model, + messages=messages, + stream=True) + return response + except Exception as e: + st.error(f"Error details: {str(e)}") + return f"Error: {str(e)}" + + +# Sidebar - API Settings first +st.sidebar.title("API Settings") +new_api_base = st.sidebar.text_input("API Base URL:", + value=st.session_state.api_base_url) +if new_api_base != st.session_state.api_base_url: + st.session_state.api_base_url = new_api_base + st.rerun() + +st.sidebar.divider() + +# Sidebar - Session Management +st.sidebar.title("Chat Sessions") +if st.sidebar.button("New Session"): + create_new_chat_session() + +# Display all sessions in reverse chronological order +for session_id in sorted(st.session_state.sessions.keys(), reverse=True): + # Mark the active session with a pinned button + if session_id == st.session_state.active_session: + st.sidebar.button(f"📍 {session_id}", + key=session_id, + type="primary", + on_click=switch_to_chat_session, + args=(session_id, )) + else: + st.sidebar.button(f"Session {session_id}", + key=session_id, + on_click=switch_to_chat_session, + args=(session_id, )) + +# Main interface +st.title("vLLM Chat Assistant") + +# Initialize OpenAI client with API settings +client = OpenAI(api_key=openai_api_key, base_url=st.session_state.api_base_url) + +# Get and display current model id +models = client.models.list() +model = models.data[0].id +st.markdown(f"**Model**: {model}") + +# Initialize first session if none exists +if st.session_state.current_session is None: + create_new_chat_session() + st.session_state.active_session = st.session_state.current_session + +# Display chat history for current session +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.write(message["content"]) + +# Handle user input and generate llm response +if prompt := st.chat_input("Type your message here..."): + # Save user message to session + st.session_state.messages.append({"role": "user", "content": prompt}) + st.session_state.sessions[ + st.session_state.current_session] = st.session_state.messages + + # Display user message + with st.chat_message("user"): + st.write(prompt) + + # Prepare messages for llm + messages_for_llm = [{ + "role": m["role"], + "content": m["content"] + } for m in st.session_state.messages] + + # Generate and display llm response + with st.chat_message("assistant"): + message_placeholder = st.empty() + full_response = "" + + # Get streaming response from llm + response = get_llm_response(messages_for_llm, model) + if isinstance(response, str): + message_placeholder.markdown(response) + full_response = response + else: + for chunk in response: + if hasattr(chunk.choices[0].delta, "content"): + content = chunk.choices[0].delta.content + if content: + full_response += content + message_placeholder.markdown(full_response + "▌") + + message_placeholder.markdown(full_response) + + # Save llm response to session history + st.session_state.messages.append({ + "role": "assistant", + "content": full_response + }) diff --git a/examples/tool_chat_template_llama4_json.jinja b/examples/tool_chat_template_llama4_json.jinja new file mode 100644 index 00000000000..759f1655443 --- /dev/null +++ b/examples/tool_chat_template_llama4_json.jinja @@ -0,0 +1,116 @@ +{%- macro is_array_of_type_objects(var) -%} + {%- if var is iterable and var is not string -%} + {%- set valid = true -%} + {%- for item in var -%} + {%- if 'type' not in item -%} + {%- set valid = false -%} + {%- break -%} + {%- endif -%} + {%- endfor -%} + {{ valid }} + {%- else -%} + {{ false }} + {%- endif -%} +{%- endmacro %} + +{%- macro render_message(message) %} + {%- if message['content'] is string %} + {{- message['content']|trim }} + {%- elif is_array_of_type_objects(data) == 'True' %} + {%- for content in message['content'] %} + {%- if content['type'] == 'image' %} + {{- '<|image|>' }} + {%- elif content['type'] == 'text' %} + {{- content['text']|trim }} + {%- endif %} + {%- endfor %} + {%- else %} + {{- message['content']|tojson }} + {%- endif %} +{%- endmacro %} + +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0] %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = ({ "content": "You are a helpful assistant with tool calling " + "capabilities. Only reply with a tool call if the function exists in the " + "library provided by the user. If it doesn't exist, just reply directly in " + "natural language. When you receive a tool call response, use the output to " + "format an answer to the original user question."}) %} +{%- endif %} + +{%- set tool_lib_preamble = 'Tools: You have access to the following tools. You might need to use one ' + 'or more function/tool calls to fulfill the task. \n' + 'If none are needed, then proceed to the response.\n\n' + 'Tool Call Syntax: You can call tools using the following syntax:\n' + '{"name": function name, "parameters": dictionary of argument name and its value}.\n' + 'Separate multiple function calls by "; ". Do not use variables.\n' + 'Do not include anything else when calling the tools with the syntax above.\n\n' + 'Here is a list of functions in JSON format that you can invoke.\n' %} + +{{- "<|header_start|>system<|header_end|>\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- tool_lib_preamble }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- render_message(system_message) }} +{{ "<|eot|>\n" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0] %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} + {%- endif %} + {{- '<|header_start|>user<|header_end|>\n\n' }} + {{- tool_lib_preamble }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- render_message(first_user_message) + "\n<|eot|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }} + {{- render_message(message) }} + {{- "\n<|eot|>" }} + {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %} + {{- '\n<|header_start|>assistant<|header_end|>\n\n' -}} + {{- render_message(message) }} + {%- for tool_call in message.tool_calls %} + {{- '{"name": "' + tool_call.function.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.function.arguments | tojson }} + {{- "}" }} + {%- endfor %} + {{- "\n<|eot|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "\n<|header_start|>ipython<|header_end|>\n\n" }} + {{- render_message(message) }} + {{- "\n<|eom|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '\n<|header_start|>assistant<|header_end|>\n\n' }} +{%- endif %} diff --git a/examples/tool_chat_template_mistral3.jinja b/examples/tool_chat_template_mistral3.jinja new file mode 100644 index 00000000000..2b2f94d7ef5 --- /dev/null +++ b/examples/tool_chat_template_mistral3.jinja @@ -0,0 +1,119 @@ +{%- set today = strftime_now("%Y-%m-%d") %} +{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %} + +{{- bos_token }} + +{%- if messages[0]['role'] == 'system' %} + {%- if messages[0]['content'] is string %} + {%- set system_message = messages[0]['content'] %} + {%- set loop_messages = messages[1:] %} + {%- else %} + {%- set system_message = messages[0]['content'][0]['text'] %} + {%- set loop_messages = messages[1:] %} + {%- endif %} +{%- else %} + {%- set system_message = default_system_message %} + {%- set loop_messages = messages %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- elif tools is not none %} + {%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. If you call one or more tools, format them in a single JSON array or objects, where each object is a tool call, not as separate objects outside of an array or multiple arrays. Use the format [{\"name\": tool call name, \"arguments\": tool call arguments}, additional tool calls] if you call more than one tool. If you call tools, do not attempt to interpret them or otherwise provide a response until you receive a tool call result that you can interpret for the user." %} + {%- if system_message is defined %} + {%- set system_message = parallel_tool_prompt + "\n\n" + system_message %} + {%- else %} + {%- set system_message = parallel_tool_prompt %} + {%- endif %} +{%- endif %} +{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }} + +{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %} + +{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %} + {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %} + {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }} + {%- endif %} +{%- endfor %} + +{%- for message in loop_messages %} + {%- if message["role"] == "user" %} + {%- if tools is not none and (message == user_messages[-1]) %} + {{- "[AVAILABLE_TOOLS] [" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- '{"type": "function", "function": {' }} + {%- for key, val in tool.items() if key != "return" %} + {%- if val is string %} + {{- '"' + key + '": "' + val + '"' }} + {%- else %} + {{- '"' + key + '": ' + val|tojson }} + {%- endif %} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- "}}" }} + {%- if not loop.last %} + {{- ", " }} + {%- else %} + {{- "]" }} + {%- endif %} + {%- endfor %} + {{- "[/AVAILABLE_TOOLS]" }} + {%- endif %} + {%- if message['content'] is string %} + {{- '[INST]' + message['content'] + '[/INST]' }} + {%- else %} + {{- '[INST]' }} + {%- for block in message['content'] %} + {%- if block['type'] == 'text' %} + {{- block['text'] }} + {%- elif block['type'] == 'image' or block['type'] == 'image_url' %} + {{- '[IMG]' }} + {%- else %} + {{- raise_exception('Only text and image blocks are supported in message content!') }} + {%- endif %} + {%- endfor %} + {{- '[/INST]' }} + {%- endif %} + {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %} + {%- if message.tool_calls is defined %} + {%- set tool_calls = message.tool_calls %} + {%- else %} + {%- set tool_calls = message.content %} + {%- endif %} + {{- "[TOOL_CALLS] [" }} + {%- for tool_call in tool_calls %} + {%- set out = tool_call.function|tojson %} + {{- out[:-1] }} + {%- if not tool_call.id is defined or tool_call.id|length < 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }} + {%- endif %} + {{- ', "id": "' + tool_call.id[-9:] + '"}' }} + {%- if not loop.last %} + {{- ", " }} + {%- else %} + {{- "]" + eos_token }} + {%- endif %} + {%- endfor %} + {%- elif message['role'] == 'assistant' %} + {%- if message['content'] is string %} + {{- message['content'] + eos_token }} + {%- else %} + {{- message['content'][0]['text'] + eos_token }} + {%- endif %} + {%- elif message["role"] == "tool_results" or message["role"] == "tool" %} + {%- if message.content is defined and message.content.content is defined %} + {%- set content = message.content.content %} + {%- else %} + {%- set content = message.content %} + {%- endif %} + {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }} + {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }} + {%- endif %} + {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }} + {%- else %} + {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }} + {%- endif %} +{%- endfor %} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 167e975c70f..069e295bfb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,10 +3,10 @@ requires = [ "cmake>=3.26", "ninja", - "packaging", - "setuptools>=61", + "packaging>=24.2", + "setuptools>=77.0.3,<80.0.0", "setuptools-scm>=8.0", - "torch == 2.6.0", + "torch == 2.7.0", "wheel", "jinja2", ] @@ -15,7 +15,8 @@ build-backend = "setuptools.build_meta" [project] name = "vllm" authors = [{name = "vLLM Team"}] -license = { "file"= "LICENSE" } +license = "Apache-2.0" +license-files = ["LICENSE"] readme = "README.md" description = "A high-throughput and memory-efficient inference and serving engine for LLMs" classifiers = [ @@ -23,7 +24,6 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", - "License :: OSI Approved :: Apache Software License", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", @@ -46,8 +46,7 @@ vllm = "vllm.entrypoints.cli.main:main" [tool.setuptools.packages.find] where = ["."] -exclude = ["benchmarks", "csrc", "docs", "examples", "tests*"] -namespaces = false +include = ["vllm*"] [tool.yapfignore] ignore_patterns = [ @@ -59,7 +58,8 @@ ignore_patterns = [ line-length = 80 exclude = [ # External file, leaving license intact - "examples/other/fp8/quantizer/quantize.py" + "examples/other/fp8/quantizer/quantize.py", + "vllm/vllm_flash_attn/flash_attn_interface.pyi" ] [tool.ruff.lint.per-file-ignores] @@ -158,7 +158,6 @@ markers = [ "skip_global_cleanup", "core_model: enable this model test in each PR instead of only nightly", "cpu_model: enable this model test in CPU tests", - "quant_model: run this model test under Quantized category", "split: run this test as part of a split", "distributed: run this test only in distributed GPU tests", "skip_v1: do not run this test with v1", diff --git a/requirements/build.txt b/requirements/build.txt index 13d643bcaff..5edc593b927 100644 --- a/requirements/build.txt +++ b/requirements/build.txt @@ -1,9 +1,9 @@ # Should be mirrored in pyproject.toml cmake>=3.26 ninja -packaging -setuptools>=61 +packaging>=24.2 +setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 -torch==2.6.0 +torch==2.7.0 wheel jinja2>=3.1.6 diff --git a/requirements/common.txt b/requirements/common.txt index 4df32460c2d..7ea27753eab 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -8,7 +8,7 @@ blake3 py-cpuinfo transformers >= 4.51.1 huggingface-hub[hf_xet] >= 0.30.0 # Required for Xet downloads. -tokenizers >= 0.19.1 # Required for Llama 3. +tokenizers >= 0.21.1 # Required for fast incremental detokenization. protobuf # Required by LlamaTokenizer. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. aiohttp @@ -34,9 +34,9 @@ mistral_common[opencv] >= 1.5.4 opencv-python-headless >= 4.11.0 # required for video IO pyyaml six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 -setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 +setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 einops # Required for Qwen2-VL. -compressed-tensors == 0.9.3 # required for compressed-tensors +compressed-tensors == 0.9.4 # required for compressed-tensors depyf==0.18.0 # required for profiling and debugging with compilation config cloudpickle # allows pickling lambda functions in model_executor/models/registry.py watchfiles # required for http server to monitor the updates of TLS files diff --git a/requirements/cpu.txt b/requirements/cpu.txt index 69f732c2417..752931158a0 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt @@ -2,18 +2,19 @@ -r common.txt # Dependencies for CPUs -torch==2.6.0+cpu; platform_machine == "x86_64" -torch==2.6.0; platform_system == "Darwin" -torch==2.6.0; platform_machine == "ppc64le" or platform_machine == "aarch64" +--extra-index-url https://download.pytorch.org/whl/cpu +torch==2.7.0+cpu; platform_machine == "x86_64" +torch==2.7.0; platform_system == "Darwin" +torch==2.7.0; platform_machine == "ppc64le" or platform_machine == "aarch64" torch==2.7.0.dev20250304; platform_machine == "s390x" # required for the image processor of minicpm-o-2_6, this must be updated alongside torch torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x" -torchaudio==2.6.0; platform_machine == "ppc64le" +torchaudio==2.7.0; platform_machine == "ppc64le" # required for the image processor of phi3v, this must be updated alongside torch torchvision; platform_machine != "ppc64le" and platform_machine != "s390x" -torchvision==0.21.0; platform_machine == "ppc64le" +torchvision==0.22.0; platform_machine == "ppc64le" datasets # for benchmark scripts # cpu cannot use triton 3.3.0 diff --git a/requirements/cuda.txt b/requirements/cuda.txt index cdc6ee75afb..a71d9728f38 100644 --- a/requirements/cuda.txt +++ b/requirements/cuda.txt @@ -6,8 +6,9 @@ numba == 0.61.2; python_version > '3.9' # Dependencies for NVIDIA GPUs ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1. -torch==2.6.0 -torchaudio==2.6.0 +torch==2.7.0 +torchaudio==2.7.0 # These must be updated alongside torch -torchvision==0.21.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version -xformers==0.0.29.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.6.0 +torchvision==0.22.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version +# https://github.com/facebookresearch/xformers/releases/tag/v0.0.30 +xformers==0.0.30; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7 diff --git a/requirements/docs.txt b/requirements/docs.txt index 416ca503b36..401f714ae9f 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,25 +1,15 @@ -sphinx==6.2.1 -sphinx-argparse==0.4.0 -sphinx-book-theme==1.0.1 +sphinx==8.2.3 +sphinx-argparse==0.5.2 +sphinx-autodoc2==0.5.0 +sphinx-book-theme==1.1.4 sphinx-copybutton==0.5.2 sphinx-design==0.6.1 sphinx-togglebutton==0.3.2 -myst-parser==3.0.1 +myst-parser==4.0.1 msgspec -cloudpickle +commonmark # Required by sphinx-argparse when using :markdownhelp: # packages to install to build the documentation cachetools -pydantic >= 2.8 -f https://download.pytorch.org/whl/cpu -torch -py-cpuinfo -transformers -mistral_common >= 1.5.4 -aiohttp -starlette -openai # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args -fastapi # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args -partial-json-parser # Required by docs/source/serving/openai_compatible_server.md's vllm.entrypoints.openai.cli_args -requests -zmq +torch \ No newline at end of file diff --git a/requirements/hpu.txt b/requirements/hpu.txt index 830f6ef3f50..a88777268a3 100644 --- a/requirements/hpu.txt +++ b/requirements/hpu.txt @@ -7,6 +7,6 @@ triton==3.1.0 pandas numpy==1.26.4 tabulate -setuptools>=61 +setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 -vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@4312768 +vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@f1f6624 diff --git a/requirements/neuron.txt b/requirements/neuron.txt index 5f25bd0546e..f8e3030834e 100644 --- a/requirements/neuron.txt +++ b/requirements/neuron.txt @@ -2,5 +2,7 @@ -r common.txt # Dependencies for Neuron devices +packaging>=24.2 +setuptools>=77.0.3,<80.0.0 torch-neuronx >= 2.5.0 neuronx-cc diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt new file mode 100644 index 00000000000..e2711354ac1 --- /dev/null +++ b/requirements/nightly_torch_test.txt @@ -0,0 +1,33 @@ +# Dependency that able to run entrypoints test +# pytest and its extensions +pytest +pytest-asyncio +pytest-forked +pytest-mock +pytest-rerunfailures +pytest-shard +pytest-timeout + + +librosa # required by audio tests in entrypoints/openai +sentence-transformers +numba == 0.61.2; python_version > '3.9' +# testing utils +boto3 +botocore +datasets +ray >= 2.10.0 +peft +runai-model-streamer==0.11.0 +runai-model-streamer-s3==0.11.0 +tensorizer>=2.9.0 +lm-eval==0.4.8 +buildkite-test-collector==0.1.9 +lm-eval[api]==0.4.8 # required for model evaluation test + +# required for quantization test +bitsandbytes>=0.45.3 + +# required for minicpmo_26 test +vector_quantize_pytorch +vocos diff --git a/requirements/rocm-build.txt b/requirements/rocm-build.txt index 29d5647807b..981b90632c1 100644 --- a/requirements/rocm-build.txt +++ b/requirements/rocm-build.txt @@ -2,13 +2,14 @@ -r common.txt --extra-index-url https://download.pytorch.org/whl/rocm6.2.4 -torch==2.6.0 -torchvision==0.21.0 -torchaudio==2.6.0 +torch==2.7.0 +torchvision==0.22.0 +torchaudio==2.7.0 +triton==3.2 cmake>=3.26,<4 -packaging -setuptools>=61 +packaging>=24.2 +setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 wheel jinja2>=3.1.6 diff --git a/requirements/rocm.txt b/requirements/rocm.txt index 4df92aab374..8a84f2ff1ed 100644 --- a/requirements/rocm.txt +++ b/requirements/rocm.txt @@ -5,11 +5,10 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req numba == 0.61.2; python_version > '3.9' # Dependencies for AMD GPUs -awscli boto3 botocore datasets -ray >= 2.10.0 +ray>=2.10.0,<2.45.0 peft pytest-asyncio tensorizer>=2.9.0 diff --git a/requirements/test.in b/requirements/test.in index b9b3df0651b..cdc7c563f08 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -8,7 +8,6 @@ pytest-shard pytest-timeout # testing utils -awscli backoff # required for phi4mm test blobfile # required for kimi-vl test einops # required for MPT, qwen-vl and Mamba @@ -23,18 +22,21 @@ sentence-transformers # required for embedding tests soundfile # required for audio tests jiwer # required for audio tests timm # required for internvl test -torch==2.6.0 -torchaudio==2.6.0 -torchvision==0.21.0 +torch==2.7.0 +torchaudio==2.7.0 +torchvision==0.22.0 transformers_stream_generator # required for qwen-vl test +mamba_ssm # required for plamo2 test matplotlib # required for qwen-vl test mistral_common[opencv] >= 1.5.4 # required for pixtral test num2words # required for smolvlm test opencv-python-headless >= 4.11.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.8 # required for model evaluation test -transformers==4.51.1 +transformers==4.51.3 +tokenizers==0.21.1 huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads. +schemathesis>=3.39.15 # Required for openai schema test. # quantization bitsandbytes>=0.45.3 buildkite-test-collector==0.1.9 diff --git a/requirements/test.txt b/requirements/test.txt index a5c062b0b1f..9a15d9a0d82 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile requirements/test.in -o requirements/test.txt +# uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu128 absl-py==2.1.0 # via rouge-score accelerate==1.0.1 @@ -20,21 +20,27 @@ aiosignal==1.3.1 annotated-types==0.7.0 # via pydantic anyio==4.6.2.post1 - # via httpx + # via + # httpx + # starlette argcomplete==3.5.1 # via datamodel-code-generator +arrow==1.3.0 + # via isoduration attrs==24.2.0 # via # aiohttp + # hypothesis # jsonlines # jsonschema + # pytest-subtests # referencing audioread==3.0.1 # via librosa -awscli==1.35.23 - # via -r requirements/test.in backoff==2.2.1 - # via -r requirements/test.in + # via + # -r requirements/test.in + # schemathesis bitsandbytes==0.45.3 # via -r requirements/test.in black==24.10.0 @@ -45,7 +51,6 @@ boto3==1.35.57 # via tensorizer botocore==1.35.57 # via - # awscli # boto3 # s3transfer bounded-pool-executor==0.0.3 @@ -69,11 +74,12 @@ click==8.1.7 # jiwer # nltk # ray + # schemathesis # typer colorama==0.4.6 # via - # awscli # sacrebleu + # schemathesis # tqdm-multiprocess contourpy==1.3.0 # via matplotlib @@ -105,12 +111,11 @@ dnspython==2.7.0 # via email-validator docopt==0.6.2 # via num2words -docutils==0.16 - # via awscli einops==0.8.0 # via # -r requirements/test.in # encodec + # mamba-ssm # vector-quantize-pytorch # vocos einx==0.3.0 @@ -137,6 +142,8 @@ filelock==3.16.1 # transformers fonttools==4.54.1 # via matplotlib +fqdn==1.5.1 + # via jsonschema frozendict==2.4.6 # via einx frozenlist==1.5.0 @@ -155,8 +162,12 @@ genai-perf==0.0.8 # via -r requirements/test.in genson==1.3.0 # via datamodel-code-generator +graphql-core==3.2.6 + # via hypothesis-graphql h11==0.14.0 # via httpcore +harfile==0.3.0 + # via schemathesis hf-xet==0.1.4 # via huggingface-hub hiredis==3.0.0 @@ -164,7 +175,9 @@ hiredis==3.0.0 httpcore==1.0.6 # via httpx httpx==0.27.2 - # via -r requirements/test.in + # via + # -r requirements/test.in + # schemathesis huggingface-hub==0.30.1 # via # -r requirements/test.in @@ -179,17 +192,29 @@ huggingface-hub==0.30.1 # vocos humanize==4.11.0 # via runai-model-streamer +hypothesis==6.131.0 + # via + # hypothesis-graphql + # hypothesis-jsonschema + # schemathesis +hypothesis-graphql==0.11.1 + # via schemathesis +hypothesis-jsonschema==0.23.1 + # via schemathesis idna==3.10 # via # anyio # email-validator # httpx + # jsonschema # requests # yarl inflect==5.6.2 # via datamodel-code-generator iniconfig==2.0.0 # via pytest +isoduration==20.11.0 + # via jsonschema isort==5.13.2 # via datamodel-code-generator jinja2==3.1.6 @@ -209,12 +234,18 @@ joblib==1.4.2 # scikit-learn jsonlines==4.0.0 # via lm-eval +jsonpointer==3.0.0 + # via jsonschema jsonschema==4.23.0 # via + # hypothesis-jsonschema # mistral-common # ray + # schemathesis jsonschema-specifications==2024.10.1 # via jsonschema +junit-xml==1.9 + # via schemathesis kaleido==0.2.1 # via genai-perf kiwisolver==1.4.7 @@ -233,10 +264,14 @@ lxml==5.3.0 # via # blobfile # sacrebleu +mamba-ssm==2.2.4 + # via -r requirements/test.in markdown-it-py==3.0.0 # via rich -markupsafe==3.0.2 - # via jinja2 +markupsafe==3.0.1 + # via + # jinja2 + # werkzeug matplotlib==3.9.2 # via -r requirements/test.in mbstrdecoder==1.1.3 @@ -268,6 +303,8 @@ mypy-extensions==1.0.0 # via black networkx==3.2.1 # via torch +ninja==1.11.1.3 + # via mamba-ssm nltk==3.9.1 # via rouge-score num2words==0.5.14 @@ -312,45 +349,48 @@ numpy==1.26.4 # transformers # tritonclient # vocos -nvidia-cublas-cu12==12.4.5.8 +nvidia-cublas-cu12==12.8.3.14 # via # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch -nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cuda-cupti-cu12==12.8.57 # via torch -nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.8.61 # via torch -nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cuda-runtime-cu12==12.8.57 # via torch -nvidia-cudnn-cu12==9.1.0.70 +nvidia-cudnn-cu12==9.7.1.26 # via torch -nvidia-cufft-cu12==11.2.1.3 +nvidia-cufft-cu12==11.3.3.41 # via torch -nvidia-curand-cu12==10.3.5.147 +nvidia-cufile-cu12==1.13.0.11 # via torch -nvidia-cusolver-cu12==11.6.1.9 +nvidia-curand-cu12==10.3.9.55 # via torch -nvidia-cusparse-cu12==12.3.1.170 +nvidia-cusolver-cu12==11.7.2.55 + # via torch +nvidia-cusparse-cu12==12.5.7.53 # via # nvidia-cusolver-cu12 # torch -nvidia-cusparselt-cu12==0.6.2 +nvidia-cusparselt-cu12==0.6.3 # via torch -nvidia-nccl-cu12==2.21.5 +nvidia-nccl-cu12==2.26.2 # via torch -nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvjitlink-cu12==12.8.61 # via + # nvidia-cufft-cu12 # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 # torch -nvidia-nvtx-cu12==12.4.127 +nvidia-nvtx-cu12==12.8.55 # via torch opencv-python-headless==4.11.0.86 # via # -r requirements/test.in # mistral-common -packaging==24.1 +packaging==24.2 # via # accelerate # black @@ -360,6 +400,7 @@ packaging==24.1 # fastparquet # huggingface-hub # lazy-loader + # mamba-ssm # matplotlib # peft # plotly @@ -425,8 +466,6 @@ pyarrow==18.0.0 # via # datasets # genai-perf -pyasn1==0.6.1 - # via rsa pybind11==2.13.6 # via lm-eval pycparser==2.22 @@ -443,6 +482,8 @@ pygments==2.18.0 # via rich pyparsing==3.2.0 # via matplotlib +pyrate-limiter==3.7.0 + # via schemathesis pytablewriter==1.2.0 # via lm-eval pytest==8.3.3 @@ -455,7 +496,9 @@ pytest==8.3.3 # pytest-mock # pytest-rerunfailures # pytest-shard + # pytest-subtests # pytest-timeout + # schemathesis pytest-asyncio==0.24.0 # via -r requirements/test.in pytest-forked==1.6.0 @@ -466,10 +509,13 @@ pytest-rerunfailures==14.0 # via -r requirements/test.in pytest-shard==0.1.2 # via -r requirements/test.in +pytest-subtests==0.14.1 + # via schemathesis pytest-timeout==2.3.1 # via -r requirements/test.in python-dateutil==2.9.0.post0 # via + # arrow # botocore # matplotlib # pandas @@ -483,7 +529,6 @@ pytz==2024.2 pyyaml==6.0.2 # via # accelerate - # awscli # datamodel-code-generator # datasets # genai-perf @@ -491,6 +536,7 @@ pyyaml==6.0.2 # peft # ray # responses + # schemathesis # timm # transformers # vocos @@ -521,10 +567,16 @@ requests==2.32.3 # pooch # ray # responses + # schemathesis + # starlette-testclient # tiktoken # transformers responses==0.25.3 # via genai-perf +rfc3339-validator==0.1.4 + # via jsonschema +rfc3987==1.3.8 + # via jsonschema rich==13.9.4 # via # genai-perf @@ -535,16 +587,12 @@ rpds-py==0.20.1 # via # jsonschema # referencing -rsa==4.7.2 - # via awscli runai-model-streamer==0.11.0 # via -r requirements/test.in runai-model-streamer-s3==0.11.0 # via -r requirements/test.in s3transfer==0.10.3 - # via - # awscli - # boto3 + # via boto3 sacrebleu==2.4.3 # via lm-eval safetensors==0.4.5 @@ -553,6 +601,8 @@ safetensors==0.4.5 # peft # timm # transformers +schemathesis==3.39.15 + # via -r requirements/test.in scikit-learn==1.5.2 # via # librosa @@ -569,20 +619,26 @@ sentence-transformers==3.2.1 # via -r requirements/test.in sentencepiece==0.2.0 # via mistral-common -setuptools==75.8.0 +setuptools==77.0.3 # via + # mamba-ssm # pytablewriter # torch + # triton shellingham==1.5.4 # via typer six==1.16.0 # via + # junit-xml # python-dateutil + # rfc3339-validator # rouge-score sniffio==1.3.1 # via # anyio # httpx +sortedcontainers==2.4.0 + # via hypothesis soundfile==0.12.1 # via # -r requirements/test.in @@ -591,9 +647,15 @@ soxr==0.5.0.post1 # via librosa sqlitedict==2.1.0 # via lm-eval +starlette==0.46.2 + # via + # schemathesis + # starlette-testclient +starlette-testclient==0.4.1 + # via schemathesis statsmodels==0.14.4 # via genai-perf -sympy==1.13.1 +sympy==1.13.3 # via # einx # torch @@ -617,9 +679,15 @@ tiktoken==0.7.0 # mistral-common timm==1.0.11 # via -r requirements/test.in -tokenizers==0.21.0 - # via transformers -torch==2.6.0 +tokenizers==0.21.1 + # via + # -r requirements/test.in + # transformers +tomli==2.2.1 + # via schemathesis +tomli-w==1.2.0 + # via schemathesis +torch==2.7.0+cu128 # via # -r requirements/test.in # accelerate @@ -627,6 +695,7 @@ torch==2.6.0 # encodec # fastsafetensors # lm-eval + # mamba-ssm # peft # runai-model-streamer # sentence-transformers @@ -636,12 +705,12 @@ torch==2.6.0 # torchvision # vector-quantize-pytorch # vocos -torchaudio==2.6.0 +torchaudio==2.7.0+cu128 # via # -r requirements/test.in # encodec # vocos -torchvision==0.21.0 +torchvision==0.22.0+cu128 # via # -r requirements/test.in # timm @@ -659,17 +728,18 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.51.1 +transformers==4.51.3 # via # -r requirements/test.in # genai-perf # lm-eval + # mamba-ssm # peft # sentence-transformers # transformers-stream-generator transformers-stream-generator==0.0.5 # via -r requirements/test.in -triton==3.2.0 +triton==3.3.0 # via torch tritonclient==2.51.0 # via @@ -682,6 +752,8 @@ typepy==1.3.2 # tabledata typer==0.15.2 # via fastsafetensors +types-python-dateutil==2.9.0.20241206 + # via arrow typing-extensions==4.12.2 # via # huggingface-hub @@ -694,6 +766,8 @@ typing-extensions==4.12.2 # typer tzdata==2024.2 # via pandas +uri-template==1.3.0 + # via jsonschema urllib3==2.2.3 # via # blobfile @@ -705,6 +779,10 @@ vector-quantize-pytorch==1.21.2 # via -r requirements/test.in vocos==0.1.0 # via -r requirements/test.in +webcolors==24.11.1 + # via jsonschema +werkzeug==3.1.3 + # via schemathesis word2number==1.1 # via lm-eval xxhash==3.5.0 @@ -712,6 +790,8 @@ xxhash==3.5.0 # datasets # evaluate yarl==1.17.1 - # via aiohttp + # via + # aiohttp + # schemathesis zstandard==0.23.0 # via lm-eval diff --git a/requirements/tpu.txt b/requirements/tpu.txt index b63993ba1ee..17d57058bfa 100644 --- a/requirements/tpu.txt +++ b/requirements/tpu.txt @@ -3,12 +3,13 @@ # Dependencies for TPU cmake>=3.26 -packaging +packaging>=24.2 setuptools-scm>=8 wheel jinja2>=3.1.6 ray[default] ray[data] +setuptools==78.1.0 # Install torch_xla --pre diff --git a/requirements/xpu.txt b/requirements/xpu.txt index fa09004d0a9..04c4d4ff85a 100644 --- a/requirements/xpu.txt +++ b/requirements/xpu.txt @@ -3,14 +3,14 @@ ray>=2.9 cmake>=3.26 -packaging +packaging>=24.2 setuptools-scm>=8 -setuptools>=75.8.0 +setuptools>=77.0.3,<80.0.0 wheel jinja2>=3.1.6 datasets # for benchmark scripts -torch==2.6.0+xpu +torch==2.7.0+xpu torchaudio torchvision pytorch-triton-xpu @@ -18,6 +18,6 @@ pytorch-triton-xpu # Please refer xpu doc, we need manually install intel-extension-for-pytorch 2.6.10+xpu due to there are some conflict dependencies with torch 2.6.0+xpu # FIXME: This will be fix in ipex 2.7. just leave this here for awareness. -# intel-extension-for-pytorch==2.6.10+xpu -oneccl_bind_pt==2.6.0+xpu +intel-extension-for-pytorch==2.7.10+xpu +oneccl_bind_pt==2.7.0+xpu --extra-index-url=https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ diff --git a/setup.py b/setup.py index b0cc2f48163..7675fbdf3ef 100755 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def load_module_from_path(module_name, path): # fallback to cpu VLLM_TARGET_DEVICE = "cpu" -MAIN_CUDA_VERSION = "12.4" +MAIN_CUDA_VERSION = "12.8" def is_sccache_available() -> bool: @@ -269,15 +269,17 @@ def run(self): # First, run the standard build_ext command to compile the extensions super().run() - # copy vllm/vllm_flash_attn/*.py from self.build_lib to current + # copy vllm/vllm_flash_attn/**/*.py from self.build_lib to current # directory so that they can be included in the editable build import glob - files = glob.glob( - os.path.join(self.build_lib, "vllm", "vllm_flash_attn", "*.py")) + files = glob.glob(os.path.join(self.build_lib, "vllm", + "vllm_flash_attn", "**", "*.py"), + recursive=True) for file in files: dst_file = os.path.join("vllm/vllm_flash_attn", - os.path.basename(file)) + file.split("vllm/vllm_flash_attn/")[-1]) print(f"Copying {file} to {dst_file}") + os.makedirs(os.path.dirname(dst_file), exist_ok=True) self.copy_file(file, dst_file) @@ -377,13 +379,22 @@ def run(self) -> None: "vllm/_flashmla_C.abi3.so", "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so", "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so", - "vllm/vllm_flash_attn/flash_attn_interface.py", - "vllm/vllm_flash_attn/__init__.py", "vllm/cumem_allocator.abi3.so", # "vllm/_version.py", # not available in nightly wheels yet ] - file_members = filter(lambda x: x.filename in files_to_copy, - wheel.filelist) + + file_members = list( + filter(lambda x: x.filename in files_to_copy, wheel.filelist)) + + # vllm_flash_attn python code: + # Regex from + # `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)` + import re + compiled_regex = re.compile( + r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py") + file_members += list( + filter(lambda x: compiled_regex.match(x.filename), + wheel.filelist)) for file in file_members: print(f"Extracting and including {file.filename} " diff --git a/tests/compile/test_basic_correctness.py b/tests/compile/test_basic_correctness.py index 0b76779b3a7..b6b45d1cbe8 100644 --- a/tests/compile/test_basic_correctness.py +++ b/tests/compile/test_basic_correctness.py @@ -103,7 +103,8 @@ def test_compile_correctness( method = test_setting.method fullgraph = test_setting.fullgraph if cuda_device_count_stateless() != pp_size * tp_size: - pytest.skip("Not correct CUDA devices for the test.") + pytest.skip(f"Need exactly {pp_size}*{tp_size} CUDA gpus but got " + f"{cuda_device_count_stateless()}") with monkeypatch.context() as m: m.setenv("VLLM_ATTENTION_BACKEND", attn_backend) diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py index 579133ec0c3..c0940638598 100644 --- a/tests/compile/test_full_graph.py +++ b/tests/compile/test_full_graph.py @@ -20,15 +20,11 @@ def models_list(*, all: bool = True, keywords: Optional[list[str]] = None): ("facebook/opt-125m", {}), ("nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change", { "dtype": torch.float16, - "quantization": "compressed-tensors" }), ("neuralmagic/Llama-3.2-1B-Instruct-FP8-dynamic", { "dtype": torch.float16, - "quantization": "compressed-tensors" - }), - ("neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8", { - "quantization": "compressed-tensors" }), + ("neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8", {}), ("meta-llama/Llama-3.2-1B-Instruct", {}), ] diff --git a/tests/compile/test_functionalization.py b/tests/compile/test_functionalization.py index 9f9b2d06b22..1e1364ce7bf 100644 --- a/tests/compile/test_functionalization.py +++ b/tests/compile/test_functionalization.py @@ -5,19 +5,19 @@ import vllm.envs as envs from vllm import LLM, SamplingParams +from vllm.compilation.activation_quant_fusion import ActivationQuantFusionPass from vllm.compilation.fix_functionalization import FixFunctionalizationPass from vllm.compilation.fusion import (FUSED_OPS, FusionPass, QuantKey, kFp8DynamicTokenSym, kFp8StaticTensorSym) from vllm.compilation.fx_utils import find_auto_fn, find_auto_fn_maybe, is_func from vllm.compilation.noop_elimination import NoOpEliminationPass -from vllm.config import CompilationConfig +from vllm.config import CompilationConfig, VllmConfig from .backend import TestBackend OPS_IN_MODEL = [ torch.ops._C.rotary_embedding.default, torch.ops._C.fused_add_rms_norm.default, - torch.ops._C.silu_and_mul.default, ] RMS_OP = torch.ops._C.rms_norm.default @@ -29,6 +29,9 @@ ], } +SILU_MUL_OP = torch.ops._C.silu_and_mul.default + +SILU_MUL_QUANT_OP = torch.ops._C.silu_and_mul_quant.default prompts = [ "Hello, my name is", "The president of the United States is", @@ -49,13 +52,17 @@ def test_fix_functionalization(model: str, quant_key: QuantKey, do_fusion: bool): torch.set_default_device("cuda") - config = CompilationConfig.PassConfig(enable_fusion=do_fusion, - enable_noop=True) - noop_pass = NoOpEliminationPass(config) - fusion_pass = FusionPass.instance(config) - - passes = [noop_pass, fusion_pass] if do_fusion else [noop_pass] - func_pass = FixFunctionalizationPass(config) + vllm_config = VllmConfig() + vllm_config.compilation_config = CompilationConfig(pass_config= \ + CompilationConfig.PassConfig(enable_fusion=do_fusion, + enable_noop=True)) + noop_pass = NoOpEliminationPass(vllm_config) + fusion_pass = FusionPass.instance(vllm_config) + act_quant_fusion_pass = ActivationQuantFusionPass(vllm_config) + + passes = [noop_pass, fusion_pass, act_quant_fusion_pass + ] if do_fusion else [noop_pass] + func_pass = FixFunctionalizationPass(vllm_config) backend_func = TestBackend(*passes, func_pass) backend_no_func = TestBackend(*passes) @@ -77,6 +84,7 @@ def test_fix_functionalization(model: str, quant_key: QuantKey, model_runner.model = torch.compile(orig_model, fullgraph=True, backend=backend_no_func) + gen_no_func = llm.generate(prompts, sampling_params) for output_func, output_no_func in zip(gen_func, gen_no_func): @@ -86,7 +94,12 @@ def test_fix_functionalization(model: str, quant_key: QuantKey, # and replaced by fused quantized ops in RMS_QUANT_OPS. rms_ops = [FUSED_OPS[(quant_key, True)], FUSED_OPS[(quant_key, False)] ] if do_fusion else [RMS_OP] - ops = OPS_IN_MODEL + rms_ops + silu_mul_ops = [SILU_MUL_QUANT_OP] if do_fusion and \ + quant_key == kFp8StaticTensorSym else [ + SILU_MUL_OP + ] + + ops = OPS_IN_MODEL + rms_ops + silu_mul_ops for op in ops: find_auto_fn(backend_no_func.graph_post_pass.nodes, op) diff --git a/tests/compile/test_fusion.py b/tests/compile/test_fusion.py index efebf05b6b0..6a696fe0226 100644 --- a/tests/compile/test_fusion.py +++ b/tests/compile/test_fusion.py @@ -77,12 +77,13 @@ def test_fusion_rmsnorm_quant(dtype, hidden_size, num_tokens, eps, static, vllm_config = VllmConfig(compilation_config=CompilationConfig( level=CompilationLevel.PIECEWISE, custom_ops=["+rms_norm"])) + vllm_config.compilation_config.pass_config = \ + CompilationConfig.PassConfig(enable_fusion=True, + enable_noop=True) with vllm.config.set_current_vllm_config(vllm_config): # Reshape pass is needed for the fusion pass to work - config = CompilationConfig.PassConfig(enable_fusion=True, - enable_noop=True) - noop_pass = NoOpEliminationPass(config) - fusion_pass = FusionPass.instance(config) + noop_pass = NoOpEliminationPass(vllm_config) + fusion_pass = FusionPass.instance(vllm_config) backend = TestBackend(noop_pass, fusion_pass) model = TestModel(hidden_size, eps, static, cutlass_fp8_enabled) diff --git a/tests/compile/test_pass_manager.py b/tests/compile/test_pass_manager.py index 2c1ee4dc748..673ebe8b6fd 100644 --- a/tests/compile/test_pass_manager.py +++ b/tests/compile/test_pass_manager.py @@ -6,7 +6,7 @@ from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass from vllm.compilation.pass_manager import PostGradPassManager -from vllm.config import CompilationConfig +from vllm.config import VllmConfig # dummy custom pass that doesn't inherit @@ -16,7 +16,7 @@ def simple_callable(graph: torch.fx.Graph): # Should fail to add directly to the pass manager def test_bad_callable(): - config = CompilationConfig().pass_config + config = VllmConfig() pass_manager = PostGradPassManager() pass_manager.configure(config) @@ -43,7 +43,7 @@ def __call__(self, graph: torch.fx.graph.Graph) -> None: ], ) def test_pass_manager_uuid(callable): - config = CompilationConfig().pass_config + config = VllmConfig() pass_manager = PostGradPassManager() pass_manager.configure(config) @@ -64,7 +64,8 @@ def test_pass_manager_uuid(callable): # UUID should be different due to config change config2 = copy.deepcopy(config) - config2.enable_fusion = not config2.enable_fusion + config2.compilation_config.pass_config.enable_fusion = not \ + config2.compilation_config.pass_config.enable_fusion pass_manager3 = PostGradPassManager() pass_manager3.configure(config2) pass_manager3.add(callable) diff --git a/tests/compile/test_sequence_parallelism.py b/tests/compile/test_sequence_parallelism.py new file mode 100644 index 00000000000..79f5486dadc --- /dev/null +++ b/tests/compile/test_sequence_parallelism.py @@ -0,0 +1,190 @@ +# SPDX-License-Identifier: Apache-2.0 + +import pytest +import torch + +import vllm.envs as envs +from vllm.compilation.fix_functionalization import FixFunctionalizationPass +from vllm.compilation.fx_utils import (find_auto_fn, find_auto_fn_maybe, + find_specified_fn, + find_specified_fn_maybe, is_func) +from vllm.compilation.sequence_parallelism import SequenceParallelismPass +from vllm.config import (CompilationConfig, DeviceConfig, ModelConfig, + VllmConfig) +from vllm.distributed import tensor_model_parallel_all_reduce +from vllm.distributed.parallel_state import (init_distributed_environment, + initialize_model_parallel) +from vllm.model_executor.layers.layernorm import RMSNorm +from vllm.platforms import current_platform +from vllm.utils import update_environment_variables + +from ..utils import multi_gpu_test +from .backend import TestBackend + +OPS_IN_MODEL_BEFORE = [ + torch.ops.vllm.all_reduce.default, +] + +OPS_IN_MODEL_AFTER = [ + torch.ops.vllm.reduce_scatter.default, + torch.ops.vllm.all_gather.default, +] + +OPS_IN_MODEL = [torch.ops._C.fused_add_rms_norm.default] + +prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", +] + + +class TestModel(torch.nn.Module): + + def __init__(self, hidden_size=16, intermediate_size=32): + super().__init__() + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.gate_proj = torch.nn.Parameter( + torch.empty((intermediate_size, hidden_size))) + self.norm = RMSNorm(hidden_size, 1e-05) + # Initialize weights + torch.nn.init.normal_(self.gate_proj, std=0.02) + + def forward(self, hidden_states, residual): + """ + Forward pass implementing the operations in the FX graph + + Args: + hidden_states: Input tensor + residual: Residual tensor from previous layer + + Returns: + Tuple containing the output tensor + """ + # Reshape input + view = hidden_states.reshape(-1, self.hidden_size) + + #matrix multiplication + permute = self.gate_proj.permute(1, 0) + mm = torch.mm(view, permute) + + # Tensor parallel all-reduce + all_reduce = tensor_model_parallel_all_reduce(mm) + + # layer normalization + norm_output, residual_output = self.norm(all_reduce, residual) + + return norm_output, residual_output + + +@multi_gpu_test(num_gpus=2) +@pytest.mark.parametrize("batch_size", [8]) +@pytest.mark.parametrize("seq_len", [16]) +@pytest.mark.parametrize("hidden_size", [16]) +@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16]) +@pytest.mark.skipif(envs.VLLM_TARGET_DEVICE not in ["cuda"], + reason="Only test on CUDA") +def test_sequence_parallelism_pass(batch_size: int, seq_len: int, + hidden_size: int, dtype: torch.dtype): + num_processes = 2 + + def run_torch_spawn(fn, nprocs): + # need to use torch.mp.spawn otherwise will have problems with + # torch.distributed and cuda + torch.multiprocessing.spawn(fn, + args=(num_processes, batch_size, seq_len, + hidden_size, dtype), + nprocs=nprocs) + + run_torch_spawn(sequence_parallelism_pass_on_test_model, num_processes) + + +def sequence_parallelism_pass_on_test_model(local_rank: int, world_size: int, + batch_size: int, seq_len: int, + hidden_size: int, + dtype: torch.dtype): + current_platform.seed_everything(0) + + device = torch.device(f"cuda:{local_rank}") + torch.cuda.set_device(device) + torch.set_default_device(device) + torch.set_default_dtype(dtype) + + update_environment_variables({ + 'RANK': str(local_rank), + 'LOCAL_RANK': str(local_rank), + 'WORLD_SIZE': str(world_size), + 'MASTER_ADDR': 'localhost', + 'MASTER_PORT': '12345', + }) + + # initialize distributed + init_distributed_environment() + initialize_model_parallel(tensor_model_parallel_size=world_size) + + # configure vllm config for SequenceParallelismPass + vllm_config = VllmConfig() + vllm_config.compilation_config = CompilationConfig( + pass_config=CompilationConfig.PassConfig( + enable_sequence_parallelism=True, ), ) + vllm_config.device_config = DeviceConfig(device=torch.device("cuda")) + + # this is a fake model name to construct the model config + # in the vllm_config, it's not really used. + model = "nm-testing/TinyLlama-1.1B-Chat-v1.0-FP8-e2e" + vllm_config.model_config = ModelConfig(model=model, + task="auto", + tokenizer=model, + tokenizer_mode="auto", + trust_remote_code=True, + dtype=dtype, + seed=42) + + sequence_parallelism_pass = SequenceParallelismPass(vllm_config) + backend_no_func = TestBackend(sequence_parallelism_pass) + func_pass = FixFunctionalizationPass(vllm_config) + backend_func = TestBackend(sequence_parallelism_pass, func_pass) + + model = TestModel(hidden_size, hidden_size * 2) + hidden_states = torch.randn((batch_size * seq_len, hidden_size), + dtype=dtype) + residual = torch.randn((batch_size * seq_len, hidden_size), dtype=dtype) + + compiled_model_no_func = torch.compile(model, backend=backend_no_func) + compiled_model_no_func(hidden_states, residual) + compiled_model_func = torch.compile(model, backend=backend_func) + compiled_model_func(hidden_states, residual) + + # Check substitution worked + pre_nodes = backend_no_func.graph_pre_pass.nodes + post_nodes = backend_no_func.graph_post_pass.nodes + + # In pre-nodes, all reduce should be there, + # reduce scatter and all gather should not + for op in OPS_IN_MODEL_BEFORE: + find_specified_fn(pre_nodes, op) + for op in OPS_IN_MODEL_AFTER: + assert find_specified_fn_maybe(pre_nodes, op) is None + + # In post-nodes, reduce scatter and all gather should be there, + # all reduce should not + for op in OPS_IN_MODEL_AFTER: + find_specified_fn(post_nodes, op) + for op in OPS_IN_MODEL_BEFORE: + assert find_specified_fn_maybe(post_nodes, op) is None + + # check if the functionalization pass is applied + for op in OPS_IN_MODEL: + find_auto_fn(backend_no_func.graph_post_pass.nodes, op) + assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, + op) is None # noqa: E501 + + # make sure the ops were all de-functionalized + found = dict() + for node in backend_func.graph_post_pass.nodes: + for op in OPS_IN_MODEL: + if is_func(node, op): + found[op] = True + assert all(found[op] for op in OPS_IN_MODEL) diff --git a/tests/compile/test_silu_mul_quant_fusion.py b/tests/compile/test_silu_mul_quant_fusion.py new file mode 100644 index 00000000000..313848372e0 --- /dev/null +++ b/tests/compile/test_silu_mul_quant_fusion.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: Apache-2.0 +import pytest +import torch + +import vllm.envs as envs +from vllm._custom_ops import scaled_fp8_quant +from vllm.compilation.activation_quant_fusion import ActivationQuantFusionPass +from vllm.compilation.fx_utils import find_auto_fn, find_auto_fn_maybe +from vllm.config import CompilationConfig, VllmConfig +from vllm.model_executor.layers.activation import SiluAndMul + +from .backend import TestBackend + + +class TestModel(torch.nn.Module): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.silu_and_mul = SiluAndMul() + self.scale = torch.rand(1, dtype=torch.float32) + + def forward(self, x): + y = self.silu_and_mul(x) + x2 = scaled_fp8_quant(y, self.scale) + return x2 + + +@pytest.mark.parametrize("num_tokens", [256]) +@pytest.mark.parametrize("hidden_size", [64]) +@pytest.mark.skipif(envs.VLLM_TARGET_DEVICE != "cuda", + reason="Only test on CUDA") +def test_fusion_silu_and_mul_quant(num_tokens, hidden_size): + torch.set_default_device("cuda") + torch.set_default_dtype(torch.float16) + + # Reshape pass is needed for the fusion pass to work + config = VllmConfig() + config.compilation_config = CompilationConfig( + pass_config=CompilationConfig.PassConfig(enable_fusion=True, + enable_reshape=True)) + fusion_pass = ActivationQuantFusionPass(config) + + backend = TestBackend(fusion_pass) + model = TestModel() + + # First dimension dynamic + x = torch.rand(num_tokens, hidden_size) + torch._dynamo.mark_dynamic(x, 0) + + result = model(x) + + model2 = torch.compile(model, backend=backend) + result2 = model2(x) + + # Check that it gives the same answer + torch.testing.assert_close(result[0].to(dtype=torch.float16), + result2[0].to(dtype=torch.float16), + atol=1e-3, + rtol=1e-3) + + # Check substitution worked + pre_nodes = backend.graph_pre_pass.nodes + post_nodes = backend.graph_post_pass.nodes + + silu_and_mul_quant = torch.ops._C.silu_and_mul_quant.default + fp8_quant = torch.ops._C.static_scaled_fp8_quant.default + + # In pre-nodes, fp8 quant should be present and fused kernels should not + assert find_auto_fn_maybe(pre_nodes, silu_and_mul_quant) is None + find_auto_fn(pre_nodes, fp8_quant) + + # In post-nodes, fused kernels should be present and fp8 quant should not + find_auto_fn(post_nodes, silu_and_mul_quant) + assert find_auto_fn_maybe(post_nodes, fp8_quant) is None diff --git a/tests/conftest.py b/tests/conftest.py index d272f448f61..fa979f1093b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 - import json import os import tempfile -from collections import UserList from enum import Enum from typing import Any, Callable, Optional, TypedDict, TypeVar, Union @@ -21,9 +19,10 @@ from tests.models.utils import (TokensTextLogprobs, TokensTextLogprobsPromptLogprobs) from vllm import LLM, SamplingParams +from vllm.assets.audio import AudioAsset from vllm.assets.image import ImageAsset from vllm.assets.video import VideoAsset -from vllm.config import TaskOption, TokenizerPoolConfig, _get_and_verify_dtype +from vllm.config import TaskOption, _get_and_verify_dtype from vllm.connections import global_http_connection from vllm.distributed import (cleanup_dist_env_and_memory, init_distributed_environment, @@ -57,16 +56,12 @@ def _read_prompts(filename: str) -> list[str]: return prompts -class _ImageAssetPrompts(TypedDict): +class ImageAssetPrompts(TypedDict): stop_sign: str cherry_blossom: str -class _ImageAssetsBase(UserList[ImageAsset]): - pass - - -class _ImageAssets(_ImageAssetsBase): +class ImageTestAssets(list[ImageAsset]): def __init__(self) -> None: super().__init__([ @@ -74,7 +69,7 @@ def __init__(self) -> None: ImageAsset("cherry_blossom"), ]) - def prompts(self, prompts: _ImageAssetPrompts) -> list[str]: + def prompts(self, prompts: ImageAssetPrompts) -> list[str]: """ Convenience method to define the prompt for each test image. @@ -84,29 +79,44 @@ def prompts(self, prompts: _ImageAssetPrompts) -> list[str]: return [prompts["stop_sign"], prompts["cherry_blossom"]] -class _VideoAssetPrompts(TypedDict): - sample_demo_1: str +class VideoAssetPrompts(TypedDict): + baby_reading: str + + +class VideoTestAssets(list[VideoAsset]): + + def __init__(self) -> None: + super().__init__([ + VideoAsset("baby_reading"), + ]) + + def prompts(self, prompts: VideoAssetPrompts) -> list[str]: + return [prompts["baby_reading"]] -class _VideoAssetsBase(UserList[VideoAsset]): - pass +class AudioAssetPrompts(TypedDict): + mary_had_lamb: str + winning_call: str -class _VideoAssets(_VideoAssetsBase): +class AudioTestAssets(list[AudioAsset]): def __init__(self) -> None: super().__init__([ - VideoAsset("sample_demo_1.mp4"), + AudioAsset("mary_had_lamb"), + AudioAsset("winning_call"), ]) - def prompts(self, prompts: _VideoAssetPrompts) -> list[str]: - return [prompts["sample_demo_1"]] + def prompts(self, prompts: AudioAssetPrompts) -> list[str]: + return [prompts["mary_had_lamb"], prompts["winning_call"]] -IMAGE_ASSETS = _ImageAssets() -"""Singleton instance of :class:`_ImageAssets`.""" -VIDEO_ASSETS = _VideoAssets() -"""Singleton instance of :class:`_VideoAssets`.""" +IMAGE_ASSETS = ImageTestAssets() +"""Singleton instance of {class}`ImageTestAssets`.""" +VIDEO_ASSETS = VideoTestAssets() +"""Singleton instance of {class}`VideoTestAssets`.""" +AUDIO_ASSETS = AudioTestAssets() +"""Singleton instance of {class}`AudioTestAssets`.""" @pytest.fixture(scope="function", autouse=True) @@ -254,15 +264,20 @@ def example_long_prompts() -> list[str]: @pytest.fixture(scope="session") -def image_assets() -> _ImageAssets: +def image_assets() -> ImageTestAssets: return IMAGE_ASSETS @pytest.fixture(scope="session") -def video_assets() -> _VideoAssets: +def video_assets() -> VideoTestAssets: return VIDEO_ASSETS +@pytest.fixture(scope="session") +def audio_assets() -> AudioTestAssets: + return AUDIO_ASSETS + + _T = TypeVar("_T", nn.Module, torch.Tensor, BatchEncoding, BatchFeature, dict) _R = TypeVar("_R") @@ -272,7 +287,8 @@ class HfRunner: def get_default_device(self): from vllm.platforms import current_platform - return ("cpu" if current_platform.is_cpu() else "cuda") + return ("cpu" + if current_platform.is_cpu() else current_platform.device_type) def wrap_device(self, x: _T, device: Optional[str] = None) -> _T: if x is None or isinstance(x, (bool, )): @@ -390,10 +406,15 @@ def get_inputs( processor_kwargs["images"] = image if videos is not None and (video := videos[i]) is not None: processor_kwargs["videos"] = video - if audios is not None and (audio_tuple := audios[i]) is not None: - audio, sr = audio_tuple - processor_kwargs["audio"] = audio - processor_kwargs["sampling_rate"] = sr + if audios is not None and (audio_inputs := audios[i]) is not None: + # HACK - not all processors take sampling_rate; we should + # clean this up in the future. + if len(audio_inputs) == 2: + audio, sr = audio_inputs + processor_kwargs["audio"] = audio + processor_kwargs["sampling_rate"] = sr + else: + processor_kwargs["audio"] = audio_inputs inputs = self.processor(**processor_kwargs) if isinstance(inputs, BatchFeature): @@ -531,7 +552,10 @@ def _hidden_states_to_seq_logprobs( for _, hidden_state in enumerate(hidden_states): last_hidden_states = hidden_state[-1][0] logits = torch.matmul( - last_hidden_states.to(output_embeddings.weight.device), + last_hidden_states.to( + device=output_embeddings.weight.device, + dtype=output_embeddings.weight.dtype, + ), output_embeddings.weight.t(), ) if getattr(output_embeddings, "bias", None) is not None: @@ -700,7 +724,7 @@ def hf_runner(): class VllmRunner: """ The default value of some arguments have been modified from - :class:`~vllm.LLM` as follows: + {class}`~vllm.LLM` as follows: - `trust_remote_code`: Set to `True` instead of `False` for convenience. - `seed`: Set to `0` instead of `None` for test reproducibility. @@ -708,7 +732,7 @@ class VllmRunner: - `block_size`: Set to `16` instead of `None` to reduce memory usage. - `enable_chunked_prefill`: Set to `False` instead of `None` for test reproducibility. - - `enforce_eager`: Set to `False` instead of `None` to test CUDA graph. + - `enforce_eager`: Set to `False` to test CUDA graph. """ def __init__( @@ -749,7 +773,7 @@ def __init__( def get_inputs( self, - prompts: list[str], + prompts: Union[list[str], list[torch.Tensor]], images: Optional[PromptImageInput] = None, videos: Optional[PromptVideoInput] = None, audios: Optional[PromptAudioInput] = None, @@ -771,16 +795,18 @@ def get_inputs( if audios is not None and (audio := audios[i]) is not None: multi_modal_data["audio"] = audio - inputs.append( - TextPrompt(prompt=prompt, - multi_modal_data=multi_modal_data - if multi_modal_data else None)) + text_prompt_kwargs = { + ("prompt" if isinstance(prompt, str) else "prompt_embeds"): + prompt, + "multi_modal_data": multi_modal_data or None + } + inputs.append(TextPrompt(**text_prompt_kwargs)) return inputs def generate( self, - prompts: list[str], + prompts: Union[list[str], list[torch.Tensor]], sampling_params: SamplingParams, images: Optional[PromptImageInput] = None, videos: Optional[PromptVideoInput] = None, @@ -806,7 +832,7 @@ def generate( output_str = sample.text output_ids = list(sample.token_ids) req_sample_output_ids.append(prompt_ids + output_ids) - req_sample_output_strs.append(prompt_str + output_str) + req_sample_output_strs.append((prompt_str or "") + output_str) outputs.append((req_sample_output_ids, req_sample_output_strs)) return outputs @@ -873,7 +899,7 @@ def generate_encoder_decoder_w_logprobs( def generate_greedy( self, - prompts: list[str], + prompts: Union[list[str], list[torch.Tensor]], max_tokens: int, images: Optional[PromptImageInput] = None, videos: Optional[PromptVideoInput] = None, @@ -925,6 +951,7 @@ def generate_encoder_decoder_greedy_logprobs( max_tokens: int, num_logprobs: int, num_prompt_logprobs: Optional[int] = None, + skip_special_tokens: bool = True, ) -> Union[list[TokensTextLogprobs], list[TokensTextLogprobsPromptLogprobs]]: greedy_logprobs_params = SamplingParams( @@ -932,6 +959,7 @@ def generate_encoder_decoder_greedy_logprobs( max_tokens=max_tokens, logprobs=num_logprobs, prompt_logprobs=(num_prompt_logprobs), + skip_special_tokens=skip_special_tokens, ) ''' Greedy logprobs generation for vLLM encoder/decoder models @@ -1008,20 +1036,6 @@ def vllm_runner(): return VllmRunner -def get_tokenizer_pool_config(tokenizer_group_type): - if tokenizer_group_type is None: - return None - if tokenizer_group_type == "ray": - return TokenizerPoolConfig(pool_size=1, - pool_type="ray", - extra_config={}) - if isinstance(tokenizer_group_type, type): - return TokenizerPoolConfig(pool_size=1, - pool_type=tokenizer_group_type, - extra_config={}) - raise ValueError(f"Unknown tokenizer_group_type: {tokenizer_group_type}") - - @pytest.fixture() def temporary_enable_log_propagate(): import logging diff --git a/tests/core/block/e2e/test_correctness.py b/tests/core/block/e2e/test_correctness.py index e9b537ed515..9e8e315d87b 100644 --- a/tests/core/block/e2e/test_correctness.py +++ b/tests/core/block/e2e/test_correctness.py @@ -195,15 +195,15 @@ def test_lookahead_greedy_equality_with_preemption(baseline_llm_generator, ]) @pytest.mark.parametrize("per_test_common_llm_kwargs", [{ - "block_size": 8, + "block_size": 16, "max_num_batched_tokens": 2, "max_num_seqs": 2, }, { - "block_size": 8, + "block_size": 16, "max_num_batched_tokens": 3, "max_num_seqs": 2, }, { - "block_size": 8, + "block_size": 16, "max_num_batched_tokens": 256, "max_num_seqs": 10, }]) diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py index 8bd64923fe2..a5ba16898d8 100644 --- a/tests/core/test_scheduler.py +++ b/tests/core/test_scheduler.py @@ -2,16 +2,18 @@ import time from collections import deque +from typing import Optional from unittest.mock import MagicMock import pytest # noqa +import torch from torch import Use # noqa from vllm.config import CacheConfig, LoRAConfig, SchedulerConfig from vllm.core.interfaces import AllocStatus from vllm.core.scheduler import Scheduler, SchedulingBudget from vllm.lora.request import LoRARequest -from vllm.sequence import SequenceGroup +from vllm.sequence import SequenceGroup, SequenceStatus from .utils import (append_new_token, append_new_token_seq, append_new_token_seq_group, create_dummy_prompt, @@ -968,3 +970,73 @@ def test_no_multiple_partial_prefills_with_chunked_prefill_and_prefix_caching( ), "A partial prefix of C (4 tokens) should be prefilled, with the " "remaining tokens fit into 3 token budget (4-1 from the seqA). It will " "then be rounded down to 2 tokens on block size, thus 6 tokens in total." + + +def test_no_batches_mixed_with_prompt_tokens_and_prompt_embeds(): + """ + Test that the scheduler does not schedule batches with prompt tokens and + prompt embeddings co-mingled. + """ + block_size = 2 + max_seq_group = 3 + scheduler = initialize_scheduler( + block_size=block_size, + num_cpu_blocks=16, + num_gpu_blocks=16, + max_num_seqs=max_seq_group, + max_model_len=100, + enable_prefix_caching=True, + ) + + # the odd indexed inputs should be passed in via embeddings, + # evens via token_ids + seq_length = 7 + embedding_size = 5 + num_seqs = 11 + seq_tokens: list[list[int]] = [] + seq_embeds: list[Optional[torch.Tensor]] = [] + for i in range(num_seqs): + if i % 2: + seq_tokens.append(list(range(seq_length))) + seq_embeds.append(None) + else: + seq_tokens.append([0] * seq_length) + seq_embeds.append(torch.rand(embedding_size)) + + seq_and_seq_groups = [ + create_dummy_prompt(f"{i}", + prompt_tokens=seq_tokens[i], + prompt_embeds=seq_embeds[i], + block_size=block_size) + for i in range(len(seq_tokens)) + ] + + for _, seq_group in seq_and_seq_groups: + scheduler.add_seq_group(seq_group) + + while not all(seq.is_finished() for seq, _ in seq_and_seq_groups): + unfinished_seq_groups = [ + seq_group for _, seq_group in seq_and_seq_groups + if not seq_group.is_finished() + ] + _, out = schedule_and_update_computed_tokens(scheduler) + assert len(out.scheduled_seq_groups) > 0 + batch_is_prompt_embeds = out.scheduled_seq_groups[ + 0].seq_group.uses_prompt_embeds() + expected_scheduled_seq_groups = [ + seq_group for seq_group in unfinished_seq_groups + if seq_group.uses_prompt_embeds() == batch_is_prompt_embeds + ] + + # We should have as many scheduled groups as possible, without mixing + assert len(out.scheduled_seq_groups) == min( + max_seq_group, len(expected_scheduled_seq_groups)) + assert all(scheduled_seq_group.seq_group.uses_prompt_embeds() == + batch_is_prompt_embeds + for scheduled_seq_group in out.scheduled_seq_groups) + + # Finish the scheduled groups + for scheduled_seq_group in out.scheduled_seq_groups: + for seq in scheduled_seq_group.seq_group.seqs: + seq.status = SequenceStatus.FINISHED_STOPPED + scheduler.free_finished_seq_groups() diff --git a/tests/core/utils.py b/tests/core/utils.py index ea18b879a31..84b0426b470 100644 --- a/tests/core/utils.py +++ b/tests/core/utils.py @@ -5,9 +5,11 @@ from collections.abc import Sequence as GenericSequence from typing import Any, Optional +import torch + from vllm import SamplingParams from vllm.core.scheduler import Scheduler, SchedulerOutputs -from vllm.inputs import EncoderDecoderInputs, token_inputs +from vllm.inputs import EncoderDecoderInputs, embeds_inputs, token_inputs from vllm.lora.request import LoRARequest from vllm.sequence import (Logprob, Sequence, SequenceGroup, SequenceGroupMetadata) @@ -19,6 +21,7 @@ def create_dummy_prompt( block_size: Optional[int] = None, lora_request: Optional[LoRARequest] = None, prompt_tokens: Optional[list[int]] = None, + prompt_embeds: Optional[torch.Tensor] = None, min_tokens: int = 0, max_tokens: int = 16, ) -> tuple[Sequence, SequenceGroup]: @@ -31,9 +34,13 @@ def create_dummy_prompt( prompt_tokens = list(range(prompt_length)) prompt_str = " ".join([str(t) for t in prompt_tokens]) + inputs = token_inputs( + prompt_token_ids=prompt_tokens, + prompt=prompt_str) if prompt_embeds is None else embeds_inputs( + prompt_embeds=prompt_embeds) prompt = Sequence( int(request_id), - inputs=token_inputs(prompt_tokens, prompt=prompt_str), + inputs=inputs, block_size=block_size, ) seq_group = SequenceGroup( diff --git a/tests/distributed/conftest.py b/tests/distributed/conftest.py new file mode 100644 index 00000000000..ee8f2097933 --- /dev/null +++ b/tests/distributed/conftest.py @@ -0,0 +1,145 @@ +# SPDX-License-Identifier: Apache-2.0 +import random +from typing import Optional, Union + +import msgspec +import msgspec.msgpack +import pytest +import zmq + +from vllm.config import KVEventsConfig +from vllm.distributed.kv_events import EventPublisherFactory + +from .test_events import SampleBatch + + +@pytest.fixture +def random_port(): + """Generate a random port number for testing""" + return random.randint(10000, 60000) + + +@pytest.fixture +def publisher_config(random_port, request): + """Create a publisher config with inproc transport""" + how = request.param if hasattr(request, "param") else "inproc" + + if how == "inproc": + endpoint = f"inproc://test-{random_port}" + replay_endpoint = endpoint + "-replay" + else: + endpoint = f"tcp://*:{random_port}" + replay_endpoint = f"tcp://*:{random_port + 1}" + + return KVEventsConfig(enable_kv_cache_events=True, + publisher="zmq", + endpoint=endpoint, + replay_endpoint=replay_endpoint, + buffer_steps=100, + hwm=1000, + topic="test") + + +@pytest.fixture +def publisher(publisher_config): + """Create and return a publisher instance""" + pub = EventPublisherFactory.create(publisher_config) + yield pub + pub.shutdown() + + +@pytest.fixture +def subscriber(publisher_config): + """Create and return a subscriber for testing""" + endpoint = publisher_config.endpoint + replay_endpoint = publisher_config.replay_endpoint + + if endpoint.startswith("tcp://*"): + endpoint = endpoint.replace("*", "127.0.0.1") + if replay_endpoint and replay_endpoint.startswith("tcp://*"): + replay_endpoint = replay_endpoint.replace("*", "127.0.0.1") + + sub = MockSubscriber(endpoint, replay_endpoint, publisher_config.topic) + yield sub + sub.close() + + +class MockSubscriber: + """Helper class to receive and verify published events""" + + def __init__(self, + pub_endpoint: str, + replay_endpoint: Optional[str] = None, + topic: str = "", + decode_type=SampleBatch): + self.ctx = zmq.Context.instance() + + # Set up subscriber socket + self.sub = self.ctx.socket(zmq.SUB) + self.sub.setsockopt(zmq.SUBSCRIBE, topic.encode('utf-8')) + self.sub.connect(pub_endpoint) + + # Set up replay socket if provided + self.replay = None + if replay_endpoint: + self.replay = self.ctx.socket(zmq.REQ) + self.replay.connect(replay_endpoint) + + self.topic = topic + self.topic_bytes = topic.encode('utf-8') + self.received_msgs: list[tuple[int, SampleBatch]] = [] + self.last_seq = -1 + self.decoder = msgspec.msgpack.Decoder(type=decode_type) + + def receive_one(self, + timeout=1000) -> Union[tuple[int, SampleBatch], None]: + """Receive a single message with timeout""" + if not self.sub.poll(timeout): + return None + + topic_bytes, seq_bytes, payload = self.sub.recv_multipart() + assert topic_bytes == self.topic_bytes + + seq = int.from_bytes(seq_bytes, "big") + data = self.decoder.decode(payload) + self.last_seq = seq + self.received_msgs.append((seq, data)) + return seq, data + + def request_replay(self, start_seq: int) -> None: + """Request replay of messages starting from start_seq""" + if not self.replay: + raise ValueError("Replay socket not initialized") + + self.replay.send(start_seq.to_bytes(8, "big")) + + def receive_replay(self) -> list[tuple[int, SampleBatch]]: + """Receive replayed messages""" + if not self.replay: + raise ValueError("Replay socket not initialized") + + replayed: list[tuple[int, SampleBatch]] = [] + while True: + try: + if not self.replay.poll(1000): + break + + frames = self.replay.recv_multipart() + if not frames or not frames[-1]: + # End of replay marker + break + + seq_bytes, payload = frames + seq = int.from_bytes(seq_bytes, "big") + data = self.decoder.decode(payload) + replayed.append((seq, data)) + except zmq.ZMQError as _: + break + + return replayed + + def close(self): + """Clean up resources""" + self.sub.close() + if self.replay: + self.replay.close() diff --git a/tests/distributed/test_comm_ops.py b/tests/distributed/test_comm_ops.py index ac6d6aae300..8f4c3537e15 100644 --- a/tests/distributed/test_comm_ops.py +++ b/tests/distributed/test_comm_ops.py @@ -14,7 +14,8 @@ from vllm.distributed import (broadcast_tensor_dict, get_pp_group, tensor_model_parallel_all_gather, - tensor_model_parallel_all_reduce) + tensor_model_parallel_all_reduce, + tensor_model_parallel_reduce_scatter) from ..utils import init_test_distributed_environment, multi_process_parallel @@ -47,6 +48,34 @@ def all_reduce_test_worker( torch.testing.assert_close(t, expected) +@ray.remote(num_gpus=1, max_calls=1) +def reduce_scatter_test_worker(monkeypatch: pytest.MonkeyPatch, tp_size: int, + pp_size: int, rank: int, + distributed_init_port: str): + # it is important to delete the CUDA_VISIBLE_DEVICES environment variable + # so that each worker can see all the GPUs + # they will be able to set the device to the correct GPU + monkeypatch.delenv("CUDA_VISIBLE_DEVICES", raising=False) + device = torch.device(f"cuda:{rank}") + torch.cuda.set_device(device) + init_test_distributed_environment(tp_size, pp_size, rank, + distributed_init_port) + + num_elements = 8 + all_tensors = [ + torch.arange(num_elements, dtype=torch.float32, device="cuda") * + (r + 1) for r in range(tp_size) + ] + + index = rank % tp_size + partition_size = num_elements // tp_size + all_reduce = torch.sum(torch.stack(all_tensors, dim=0), dim=0) + expected = all_reduce[index * partition_size:(index + 1) * partition_size] + t = all_tensors[index] + t = tensor_model_parallel_reduce_scatter(t, 0) + torch.testing.assert_close(t, expected) + + @ray.remote(num_gpus=1, max_calls=1) def all_gather_test_worker( monkeypatch: pytest.MonkeyPatch, diff --git a/tests/distributed/test_events.py b/tests/distributed/test_events.py new file mode 100644 index 00000000000..15bcfdb8555 --- /dev/null +++ b/tests/distributed/test_events.py @@ -0,0 +1,193 @@ +# SPDX-License-Identifier: Apache-2.0 +import threading +import time + +import msgspec +import pytest + +from vllm.distributed.kv_events import (EventBatch, EventPublisherFactory, + NullEventPublisher) + + +class EventSample( + msgspec.Struct, + tag=True, # type: ignore + array_like=True # type: ignore +): + """Test event for publisher testing""" + id: int + value: str + + +class SampleBatch(EventBatch): + """Test event batch for publisher testing""" + events: list[EventSample] + + +def create_test_events(count: int) -> SampleBatch: + """Create a batch of test events""" + events = [EventSample(id=i, value=f"test-{i}") for i in range(count)] + return SampleBatch(ts=time.time(), events=events) + + +def test_basic_publishing(publisher, subscriber): + """Test basic event publishing works""" + + test_batch = create_test_events(5) + publisher.publish(test_batch) + + result = subscriber.receive_one(timeout=1000) + assert result is not None, "No message received" + + seq, received = result + assert seq == 0, "Sequence number mismatch" + assert received.ts == pytest.approx(test_batch.ts, + abs=0.1), ("Timestamp mismatch") + assert len(received.events) == len( + test_batch.events), ("Number of events mismatch") + + for i, event in enumerate(received.events): + assert event.id == i, "Event id mismatch" + assert event.value == f"test-{i}", "Event value mismatch" + + +def test_multiple_events(publisher, subscriber): + """Test publishing and receiving multiple event batches""" + for _ in range(10): + batch = create_test_events(2) + publisher.publish(batch) + + received = [] + for _ in range(10): + data = subscriber.receive_one(timeout=100) + if data: + received.append(data) + + assert len(received) == 10, "Number of messages mismatch" + seqs = [seq for seq, _ in received] + assert seqs == list(range(10)), "Sequence numbers mismatch" + + +def test_replay_mechanism(publisher, subscriber): + """Test the replay mechanism works correctly""" + for _ in range(19): + batch = create_test_events(1) + publisher.publish(batch) + + time.sleep(0.5) # Need publisher to process above requests + subscriber.request_replay(10) + + batch = create_test_events(1) + publisher.publish(batch) # 20th message + + replayed = subscriber.receive_replay() + + assert len(replayed) > 0, "No replayed messages received" + seqs = [seq for seq, _ in replayed] + assert all(seq >= 10 for seq in seqs), "Replayed messages not in order" + assert seqs == list(range(min(seqs), + max(seqs) + + 1)), ("Replayed messages not consecutive") + + +def test_buffer_limit(publisher, subscriber, publisher_config): + """Test buffer limit behavior""" + buffer_size = publisher_config.buffer_steps + + # Publish more events than the buffer can hold + for i in range(buffer_size + 10): + batch = create_test_events(1) + publisher.publish(batch) + + time.sleep(0.5) # Need publisher to process above requests + subscriber.request_replay(0) + + batch = create_test_events(1) + publisher.publish(batch) + + replayed = subscriber.receive_replay() + + assert len(replayed) <= buffer_size, "Can't replay more than buffer size" + + oldest_seq = min(seq for seq, _ in replayed) + assert oldest_seq >= 10, "The oldest sequence should be at least 10" + + +def test_topic_filtering(publisher_config): + """ + Test that a subscriber only receives messages matching its topic filter + """ + publisher_config.replay_endpoint = None + + cfg = publisher_config.model_copy() + cfg.topic = "foo" + pub = EventPublisherFactory.create(cfg) + + from .conftest import MockSubscriber + sub_foo = MockSubscriber(cfg.endpoint, None, "foo") + sub_bar = MockSubscriber(cfg.endpoint, None, "bar") + + try: + time.sleep(0.1) + + for _ in range(3): + pub.publish(create_test_events(1)) + + foo_received = [sub_foo.receive_one(timeout=200) for _ in range(3)] + assert all(msg is not None for msg in foo_received), ( + "Subscriber with matching topic should receive messages") + + bar_received = [sub_bar.receive_one(timeout=200) for _ in range(3)] + assert all(msg is None for msg in bar_received), ( + "Subscriber with non-matching topic should receive no messages") + finally: + pub.shutdown() + sub_foo.close() + sub_bar.close() + + +def test_high_volume(publisher, subscriber): + """Test publishing and receiving a high volume of events""" + num_batches = 10_000 + events_per_batch = 100 + + # Publish events in a separate thread to not block + def publish_events(): + for i in range(num_batches): + batch = create_test_events(events_per_batch) + publisher.publish(batch) + # Small delay to avoid overwhelming + if i % 100 == 0: + time.sleep(0.01) + + received: list[tuple[int, SampleBatch]] = [] + + publisher_thread = threading.Thread(target=publish_events) + publisher_thread.start() + + start_time = time.time() + while len(received) < num_batches: + if time.time() - start_time > 10: # Timeout after 10 seconds + break + + result = subscriber.receive_one(timeout=100) + if result: + received.append(result) + + publisher_thread.join() + + assert len(received) >= num_batches * 0.9, ( + "We should have received most messages") + + seqs = [seq for seq, _ in received] + assert sorted(seqs) == seqs, "Sequence numbers should be in order" + + +def test_null_publisher(): + """Test that NullEventPublisher can be used without errors""" + publisher = NullEventPublisher() + + # This should not raise any errors + batch = create_test_events(5) + publisher.publish(batch) + publisher.shutdown() diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index 05e30f855ce..03de8d9b92b 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -161,12 +161,12 @@ def iter_params(self, model_id: str): "deepseek-ai/DeepSeek-V2-Lite-Chat": PPTestSettings.fast(), "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct": PPTestSettings.fast(), "tiiuae/falcon-7b": PPTestSettings.fast(), - "google/gemma-2b": PPTestSettings.fast(), + "google/gemma-1.1-2b-it": PPTestSettings.fast(), "google/gemma-2-9b": PPTestSettings.fast(), "gpt2": PPTestSettings.fast(), "bigcode/starcoder": PPTestSettings.fast(), "EleutherAI/gpt-j-6b": PPTestSettings.fast(), - "EleutherAI/pythia-12b": PPTestSettings.fast(), + "EleutherAI/pythia-1.4b": PPTestSettings.fast(), "ibm/PowerLM-3b": PPTestSettings.fast(), "ibm/PowerMoE-3b": PPTestSettings.fast(), # Uses Llama @@ -195,7 +195,7 @@ def iter_params(self, model_id: str): "microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(), "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(multi_node_only=True, load_format="dummy"), # noqa: E501 "Qwen/Qwen-7B-Chat": PPTestSettings.fast(), - "Qwen/Qwen2-7B-Instruct": PPTestSettings.fast(), + "Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(), "Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(), "stabilityai/stablelm-3b-4e1t": PPTestSettings.fast(), "bigcode/starcoder2-3b": PPTestSettings.fast(), diff --git a/tests/distributed/test_sequence_parallel.py b/tests/distributed/test_sequence_parallel.py new file mode 100644 index 00000000000..19497ad9c14 --- /dev/null +++ b/tests/distributed/test_sequence_parallel.py @@ -0,0 +1,296 @@ +# SPDX-License-Identifier: Apache-2.0 +""" +WARNING: This test runs in both single-node (4 GPUs) and multi-node + (2 node with 2 GPUs each) modes. If the test only uses 2 GPUs, it is + important to set the distributed backend to "mp" to avoid Ray scheduling + all workers in a node other than the head node, which can cause the test + to fail. +""" +import json +import os +from dataclasses import dataclass +from typing import Literal, NamedTuple, Optional + +import pytest + +from vllm.config import TaskOption +from vllm.logger import init_logger + +from ..models.registry import HF_EXAMPLE_MODELS +from ..utils import compare_two_settings, create_new_process_for_each_test + +logger = init_logger("test_sequence_parallel") + +VLLM_MULTI_NODE = os.getenv("VLLM_MULTI_NODE", "0") == "1" + + +class ParallelSetup(NamedTuple): + tp_size: int + sp_enabled: bool + eager_mode: bool + chunked_prefill: bool + + +class SPTestOptions(NamedTuple): + multi_node_only: bool + load_format: Optional[str] = None + + +@dataclass +class SPTestSettings: + parallel_setups: list[ParallelSetup] + # NOTE: the length of distributed_backends and + # vllm_major_versions should be the same, and they + # are first zipped together to iterate over all + # test settings. + distributed_backends: list[str] + # vllm major version: "0" for V0, "1" for V1 + vllm_major_versions: list[str] + task: TaskOption + test_options: SPTestOptions + + def __post_init__(self): + if len(self.distributed_backends) != len(self.vllm_major_versions): + raise ValueError( + f"Length mismatch: distributed_backends " + f"({len(self.distributed_backends)}) != " + f"vllm_major_versions ({len(self.vllm_major_versions)})") + + @staticmethod + def detailed( + *, + tp_base: int = 2, + multi_node_only: bool = False, + task: TaskOption = "auto", + load_format: Optional[str] = None, + ): + return SPTestSettings( + parallel_setups=[ + ParallelSetup(tp_size=tp_base, + sp_enabled=True, + eager_mode=False, + chunked_prefill=False), + ParallelSetup(tp_size=tp_base, + sp_enabled=True, + eager_mode=False, + chunked_prefill=True), + ParallelSetup(tp_size=tp_base, + sp_enabled=True, + eager_mode=True, + chunked_prefill=False), + ParallelSetup(tp_size=tp_base, + sp_enabled=True, + eager_mode=True, + chunked_prefill=True) + ], + distributed_backends=["mp", "ray"], + vllm_major_versions=["1", "1"], + task=task, + test_options=SPTestOptions(multi_node_only=multi_node_only, + load_format=load_format), + ) + + @staticmethod + def fast( + *, + tp_base: int = 2, + task: TaskOption = "auto", + multi_node_only: bool = False, + load_format: Optional[str] = None, + ): + return SPTestSettings( + parallel_setups=[ + ParallelSetup(tp_size=tp_base, + sp_enabled=True, + eager_mode=False, + chunked_prefill=False), + ], + distributed_backends=["mp", "ray"], + vllm_major_versions=["1", "1"], + task=task, + test_options=SPTestOptions(multi_node_only=multi_node_only, + load_format=load_format), + ) + + def iter_params(self, model_id: str): + opts = self.test_options + + for parallel_setup in self.parallel_setups: + for backend, vllm_major_version in zip(self.distributed_backends, + self.vllm_major_versions): + yield (model_id, parallel_setup, backend, vllm_major_version, + self.task, opts) + + +def _compare_sp( + model_id: str, + parallel_setup: ParallelSetup, + distributed_backend: str, + vllm_major_version: str, + task: TaskOption, + test_options: SPTestOptions, + num_gpus_available: int, + *, + method: Literal["generate", "encode"], + is_multimodal: bool, +): + ( + tp_size, + sp_enabled, + eager_mode, + chunked_prefill, + ) = parallel_setup + + multi_node_only, load_format = test_options + + model_info = HF_EXAMPLE_MODELS.find_hf_info(model_id) + model_info.check_transformers_version(on_fail="skip") + + trust_remote_code = model_info.trust_remote_code + tokenizer_mode = model_info.tokenizer_mode + hf_overrides = model_info.hf_overrides + + if load_format == "dummy": + # Avoid OOM + text_overrides = { + "num_hidden_layers": 4, + "hidden_size": 512, + "intermediate_size": 800, + "num_attention_heads": 4, + "num_key_value_heads": 1, + } + + if is_multimodal: + hf_overrides.update({"text_config": text_overrides}) + else: + hf_overrides.update(text_overrides) + else: + model_info.check_available_online(on_fail="skip") + + pp_size = 1 + if num_gpus_available < tp_size * pp_size: + pytest.skip(f"Need at least {tp_size} x {pp_size} GPUs") + if VLLM_MULTI_NODE and distributed_backend == "mp": + pytest.skip("Skipping multi-node pipeline parallel test for " + "multiprocessing distributed backend") + if multi_node_only and not VLLM_MULTI_NODE: + pytest.skip("Not in multi-node setting") + + common_args = [ + # use half precision for speed and memory savings in CI environment + "--dtype", + "float16", + "--max-model-len", + "2048", + "--max-num-seqs", + "8", + ] + if chunked_prefill: + common_args.append("--enable-chunked-prefill") + if eager_mode: + common_args.append("--enforce-eager") + if task != "auto": + common_args.extend(["--task", task]) + if trust_remote_code: + common_args.append("--trust-remote-code") + if tokenizer_mode: + common_args.extend(["--tokenizer-mode", tokenizer_mode]) + if load_format: + common_args.extend(["--load-format", load_format]) + if hf_overrides: + common_args.extend(["--hf-overrides", json.dumps(hf_overrides)]) + + compilation_config = { + 'level': 3, + 'custom_ops': ["+rms_norm"], + 'compile_sizes': [4, 8], + 'splitting_ops': [], + 'pass_config': { + 'enable_sequence_parallism': sp_enabled, + 'enable_noop': True, + 'enable_fusion': True, + }, + } + + tp_sp_env = tp_env = { + "VLLM_USE_V1": vllm_major_version, + } + + tp_sp_args = [ + *common_args, + "--tensor-parallel-size", + str(tp_size), + "--distributed-executor-backend", + distributed_backend, + "--compilation_config", + str(compilation_config), + ] + + tp_env = { + "VLLM_USE_V1": vllm_major_version, + } + tp_args = [ + *common_args, + "--tensor-parallel-size", + str(tp_size), + "--distributed-executor-backend", + "mp", + ] + + try: + compare_two_settings(model_id, + tp_sp_args, + tp_args, + tp_sp_env, + tp_env, + method=method) + except Exception: + testing_ray_compiled_graph = tp_sp_env is not None + if testing_ray_compiled_graph and vllm_major_version == "0": + # Ray Compiled Graph tests are flaky for V0, + # so we don't want to fail the test + logger.exception("Ray Compiled Graph tests failed") + else: + raise + + +SP_TEXT_GENERATION_MODELS = { + # [Decoder-only] + "meta-llama/Llama-3.2-1B-Instruct": SPTestSettings.detailed(), +} + +SP_TEST_MODELS = [ + # TODO support other models + # [LANGUAGE GENERATION] + "meta-llama/Llama-3.2-1B-Instruct", +] + + +@pytest.mark.parametrize( + ("model_id", "parallel_setup", "distributed_backend", "vllm_major_version", + "task", "test_options"), + [ + params for model_id, settings in SP_TEXT_GENERATION_MODELS.items() + for params in settings.iter_params(model_id) + if model_id in SP_TEST_MODELS + ], +) +@create_new_process_for_each_test() +def test_tp_sp_generation( + model_id: str, + parallel_setup: ParallelSetup, + distributed_backend: str, + vllm_major_version: str, + task: TaskOption, + test_options: SPTestOptions, + num_gpus_available, +): + _compare_sp(model_id, + parallel_setup, + distributed_backend, + vllm_major_version, + task, + test_options, + num_gpus_available, + method="generate", + is_multimodal=False) diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py index 92387b46425..65471cb3af3 100644 --- a/tests/engine/test_arg_utils.py +++ b/tests/engine/test_arg_utils.py @@ -1,16 +1,151 @@ # SPDX-License-Identifier: Apache-2.0 +import json from argparse import ArgumentError, ArgumentTypeError +from contextlib import nullcontext +from dataclasses import dataclass, field +from typing import Literal, Optional import pytest -from vllm.config import PoolerConfig -from vllm.engine.arg_utils import EngineArgs, nullable_kvs +from vllm.config import config +from vllm.engine.arg_utils import (EngineArgs, contains_type, get_kwargs, + get_type, is_not_builtin, is_type, + literal_to_kwargs, nullable_kvs, + optional_type) from vllm.utils import FlexibleArgumentParser +@pytest.mark.parametrize(("type", "value", "expected"), [ + (int, "42", 42), + (int, "None", None), + (float, "3.14", 3.14), + (float, "None", None), + (str, "Hello World!", "Hello World!"), + (str, "None", None), + (json.loads, '{"foo":1,"bar":2}', { + "foo": 1, + "bar": 2 + }), + (json.loads, "foo=1,bar=2", { + "foo": 1, + "bar": 2 + }), + (json.loads, "None", None), +]) +def test_optional_type(type, value, expected): + optional_type_func = optional_type(type) + context = nullcontext() + if value == "foo=1,bar=2": + context = pytest.warns(DeprecationWarning) + with context: + assert optional_type_func(value) == expected + + +@pytest.mark.parametrize(("type_hint", "type", "expected"), [ + (int, int, True), + (int, float, False), + (list[int], list, True), + (list[int], tuple, False), + (Literal[0, 1], Literal, True), +]) +def test_is_type(type_hint, type, expected): + assert is_type(type_hint, type) == expected + + +@pytest.mark.parametrize(("type_hints", "type", "expected"), [ + ({float, int}, int, True), + ({int, tuple[int]}, int, True), + ({int, tuple[int]}, float, False), + ({str, Literal["x", "y"]}, Literal, True), +]) +def test_contains_type(type_hints, type, expected): + assert contains_type(type_hints, type) == expected + + +@pytest.mark.parametrize(("type_hints", "type", "expected"), [ + ({int, float}, int, int), + ({int, float}, str, None), + ({str, Literal["x", "y"]}, Literal, Literal["x", "y"]), +]) +def test_get_type(type_hints, type, expected): + assert get_type(type_hints, type) == expected + + +@pytest.mark.parametrize(("type_hints", "expected"), [ + ({Literal[1, 2]}, { + "type": int, + "choices": [1, 2] + }), + ({Literal[1, "a"]}, Exception), +]) +def test_literal_to_kwargs(type_hints, expected): + context = nullcontext() + if expected is Exception: + context = pytest.raises(expected) + with context: + assert literal_to_kwargs(type_hints) == expected + + +@config +@dataclass +class DummyConfigClass: + regular_bool: bool = True + """Regular bool with default True""" + optional_bool: Optional[bool] = None + """Optional bool with default None""" + optional_literal: Optional[Literal["x", "y"]] = None + """Optional literal with default None""" + tuple_n: tuple[int, ...] = field(default_factory=lambda: (1, 2, 3)) + """Tuple with variable length""" + tuple_2: tuple[int, int] = field(default_factory=lambda: (1, 2)) + """Tuple with fixed length""" + list_n: list[int] = field(default_factory=lambda: [1, 2, 3]) + """List with variable length""" + list_literal: list[Literal[1, 2]] = field(default_factory=list) + """List with literal choices""" + literal_literal: Literal[Literal[1], Literal[2]] = 1 + """Literal of literals with default 1""" + json_tip: dict = field(default_factory=dict) + """Dict which will be JSON in CLI""" + + +@pytest.mark.parametrize(("type_hint", "expected"), [ + (int, False), + (DummyConfigClass, True), +]) +def test_is_not_builtin(type_hint, expected): + assert is_not_builtin(type_hint) == expected + + +def test_get_kwargs(): + kwargs = get_kwargs(DummyConfigClass) + print(kwargs) + + # bools should not have their type set + assert kwargs["regular_bool"].get("type") is None + assert kwargs["optional_bool"].get("type") is None + # optional literals should have None as a choice + assert kwargs["optional_literal"]["choices"] == ["x", "y", "None"] + # tuples should have the correct nargs + assert kwargs["tuple_n"]["nargs"] == "+" + assert kwargs["tuple_2"]["nargs"] == 2 + # lists should work + assert kwargs["list_n"]["type"] is int + assert kwargs["list_n"]["nargs"] == "+" + # lists with literals should have the correct choices + assert kwargs["list_literal"]["type"] is int + assert kwargs["list_literal"]["nargs"] == "+" + assert kwargs["list_literal"]["choices"] == [1, 2] + # literals of literals should have merged choices + assert kwargs["literal_literal"]["choices"] == [1, 2] + # dict should have json tip in help + json_tip = "\n\nShould be a valid JSON string." + assert kwargs["json_tip"]["help"].endswith(json_tip) + + @pytest.mark.parametrize(("arg", "expected"), [ - (None, None), + (None, dict()), ("image=16", { "image": 16 }), @@ -24,6 +159,10 @@ }), ]) def test_limit_mm_per_prompt_parser(arg, expected): + """This functionality is deprecated and will be removed in the future. + This argument should be passed as JSON string instead. + + TODO: Remove with nullable_kvs.""" parser = EngineArgs.add_cli_args(FlexibleArgumentParser()) if arg is None: args = parser.parse_args([]) @@ -53,12 +192,20 @@ def test_compilation_config(): assert args.compilation_config.level == 3 # set to string form of a dict - args = parser.parse_args(["--compilation-config", "{'level': 3}"]) - assert args.compilation_config.level == 3 + args = parser.parse_args([ + "--compilation-config", + "{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}", + ]) + assert (args.compilation_config.level == 3 and + args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8]) # set to string form of a dict - args = parser.parse_args(["--compilation-config={'level': 3}"]) - assert args.compilation_config.level == 3 + args = parser.parse_args([ + "--compilation-config=" + "{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}", + ]) + assert (args.compilation_config.level == 3 and + args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8]) def test_prefix_cache_default(): @@ -80,17 +227,6 @@ def test_prefix_cache_default(): assert not engine_args.enable_prefix_caching -def test_valid_pooling_config(): - parser = EngineArgs.add_cli_args(FlexibleArgumentParser()) - args = parser.parse_args([ - '--override-pooler-config', - '{"pooling_type": "MEAN"}', - ]) - engine_args = EngineArgs.from_cli_args(args=args) - assert engine_args.override_pooler_config == PoolerConfig( - pooling_type="MEAN", ) - - @pytest.mark.parametrize( ("arg"), [ diff --git a/tests/engine/test_options.py b/tests/engine/test_options.py new file mode 100644 index 00000000000..0cf4f69d56a --- /dev/null +++ b/tests/engine/test_options.py @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: Apache-2.0 +from contextlib import nullcontext + +import pytest + +from vllm.entrypoints.llm import LLM +from vllm.sampling_params import SamplingParams + + +@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) +def test_skip_tokenizer_initialization(model: str): + # This test checks if the flag skip_tokenizer_init skips the initialization + # of tokenizer and detokenizer. The generated output is expected to contain + # token ids. + llm = LLM( + model=model, + skip_tokenizer_init=True, + enforce_eager=True, + ) + sampling_params = SamplingParams(prompt_logprobs=True, detokenize=True) + + with pytest.raises(ValueError, match="cannot pass text prompts when"): + llm.generate("abc", sampling_params) + + outputs = llm.generate({"prompt_token_ids": [1, 2, 3]}, + sampling_params=sampling_params) + assert len(outputs) > 0 + completions = outputs[0].outputs + assert len(completions) > 0 + assert completions[0].text == "" + assert completions[0].token_ids + + +@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) +@pytest.mark.parametrize("enable_prompt_embeds", [True, False]) +def test_enable_prompt_embeds(hf_runner, model: str, + enable_prompt_embeds: bool): + prompt = "abc" + + with hf_runner(model) as hf_model: + token_ids = hf_model.tokenizer(prompt, return_tensors="pt").input_ids + token_ids = token_ids.to(hf_model.model.device) + + embed_layer = hf_model.model.get_input_embeddings() + prompt_embeds = embed_layer(token_ids).squeeze(0) + + ctx = (nullcontext() if enable_prompt_embeds else pytest.raises( + ValueError, match="set `--enable-prompt-embeds`")) + + # This test checks if the flag skip_tokenizer_init skips the initialization + # of tokenizer and detokenizer. The generated output is expected to contain + # token ids. + llm = LLM( + model=model, + enable_prompt_embeds=enable_prompt_embeds, + enforce_eager=True, + ) + + with ctx: + llm.generate({"prompt_embeds": prompt_embeds}) diff --git a/tests/engine/test_skip_tokenizer_init.py b/tests/engine/test_skip_tokenizer_init.py deleted file mode 100644 index 5e197f5ffe5..00000000000 --- a/tests/engine/test_skip_tokenizer_init.py +++ /dev/null @@ -1,29 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -import pytest - -from vllm.entrypoints.llm import LLM -from vllm.sampling_params import SamplingParams - - -@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) -def test_skip_tokenizer_initialization(model: str): - # This test checks if the flag skip_tokenizer_init skips the initialization - # of tokenizer and detokenizer. The generated output is expected to contain - # token ids. - llm = LLM( - model=model, - skip_tokenizer_init=True, - ) - sampling_params = SamplingParams(prompt_logprobs=True, detokenize=True) - - with pytest.raises(ValueError, match="cannot pass text prompts when"): - llm.generate("abc", sampling_params) - - outputs = llm.generate({"prompt_token_ids": [1, 2, 3]}, - sampling_params=sampling_params) - assert len(outputs) > 0 - completions = outputs[0].outputs - assert len(completions) > 0 - assert completions[0].text == "" - assert completions[0].token_ids diff --git a/tests/entrypoints/llm/test_chat.py b/tests/entrypoints/llm/test_chat.py index e96081c167e..742a6668344 100644 --- a/tests/entrypoints/llm/test_chat.py +++ b/tests/entrypoints/llm/test_chat.py @@ -1,15 +1,31 @@ # SPDX-License-Identifier: Apache-2.0 +import weakref import pytest from vllm import LLM +from vllm.distributed import cleanup_dist_env_and_memory from ..openai.test_vision import TEST_IMAGE_URLS -def test_chat(): - llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct") +@pytest.fixture(scope="function") +def text_llm(): + # pytest caches the fixture so we use weakref.proxy to + # enable garbage collection + llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct", + enforce_eager=True, + seed=0) + with llm.deprecate_legacy_api(): + yield weakref.proxy(llm) + + del llm + + cleanup_dist_env_and_memory() + + +def test_chat(text_llm): prompt1 = "Explain the concept of entropy." messages = [ { @@ -21,13 +37,11 @@ def test_chat(): "content": prompt1 }, ] - outputs = llm.chat(messages) + outputs = text_llm.chat(messages) assert len(outputs) == 1 -def test_multi_chat(): - llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct") - +def test_multi_chat(text_llm): prompt1 = "Explain the concept of entropy." prompt2 = "Explain what among us is." @@ -55,13 +69,14 @@ def test_multi_chat(): messages = [conversation1, conversation2] - outputs = llm.chat(messages) + outputs = text_llm.chat(messages) assert len(outputs) == 2 -@pytest.mark.parametrize("image_urls", - [[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]]) -def test_chat_multi_image(image_urls: list[str]): +@pytest.fixture(scope="function") +def vision_llm(): + # pytest caches the fixture so we use weakref.proxy to + # enable garbage collection llm = LLM( model="microsoft/Phi-3.5-vision-instruct", max_model_len=4096, @@ -69,8 +84,20 @@ def test_chat_multi_image(image_urls: list[str]): enforce_eager=True, trust_remote_code=True, limit_mm_per_prompt={"image": 2}, + seed=0, ) + with llm.deprecate_legacy_api(): + yield weakref.proxy(llm) + + del llm + + cleanup_dist_env_and_memory() + + +@pytest.mark.parametrize("image_urls", + [[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]]) +def test_chat_multi_image(vision_llm, image_urls: list[str]): messages = [{ "role": "user", @@ -87,5 +114,83 @@ def test_chat_multi_image(image_urls: list[str]): }, ], }] - outputs = llm.chat(messages) + outputs = vision_llm.chat(messages) assert len(outputs) >= 0 + + +def test_llm_chat_tokenization_no_double_bos(text_llm): + """ + LLM.chat() should not add special tokens when using chat templates. + Check we get a single BOS token for llama chat. + """ + messages = [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Hello!" + }, + ] + outputs = text_llm.chat(messages) + assert len(outputs) == 1 + + prompt_token_ids = outputs[0].prompt_token_ids + assert prompt_token_ids is not None + + bos_token = text_llm.get_tokenizer().bos_token_id + + # Ensure we have a single BOS + assert prompt_token_ids[0] == bos_token + assert prompt_token_ids[1] != bos_token, "Double BOS" + + +@pytest.fixture(scope="function") +def thinking_llm(): + # pytest caches the fixture so we use weakref.proxy to + # enable garbage collection + llm = LLM( + model="Qwen/Qwen3-0.6B", + max_model_len=4096, + enforce_eager=True, + seed=0, + ) + + with llm.deprecate_legacy_api(): + yield weakref.proxy(llm) + + del llm + + cleanup_dist_env_and_memory() + + +@pytest.mark.parametrize("enable_thinking", [True, False]) +def test_chat_extra_kwargs(thinking_llm, enable_thinking): + messages = [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "What is 1+1?" + }, + ] + + outputs = thinking_llm.chat( + messages, + chat_template_kwargs={"enable_thinking": enable_thinking}, + ) + assert len(outputs) == 1 + + prompt_token_ids = outputs[0].prompt_token_ids + assert prompt_token_ids is not None + + think_id = thinking_llm.get_tokenizer().get_vocab()[""] + + if enable_thinking: + assert think_id not in prompt_token_ids + else: + # The chat template includes dummy thinking process + assert think_id in prompt_token_ids diff --git a/tests/entrypoints/llm/test_guided_generate.py b/tests/entrypoints/llm/test_guided_generate.py index e43e9826e8f..fdbdccd4654 100644 --- a/tests/entrypoints/llm/test_guided_generate.py +++ b/tests/entrypoints/llm/test_guided_generate.py @@ -16,10 +16,11 @@ MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" GUIDED_DECODING_BACKENDS = [ - "outlines", - "lm-format-enforcer", - "xgrammar:disable-any-whitespace", - "guidance:disable-any-whitespace", + # (backend, disable_any_whitespace), + ("outlines", False), + ("lm-format-enforcer", False), + ("xgrammar", True), + ("guidance", True), ] @@ -36,13 +37,17 @@ def llm(): @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) -def test_guided_regex(sample_regex, llm, guided_decoding_backend: str): - sampling_params = SamplingParams(temperature=0.8, - top_p=0.95, - guided_decoding=GuidedDecodingParams( - regex=sample_regex, - backend=guided_decoding_backend)) +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) +def test_guided_regex(sample_regex, llm, guided_decoding_backend: str, + disable_any_whitespace: bool): + sampling_params = SamplingParams( + temperature=0.8, + top_p=0.95, + guided_decoding=GuidedDecodingParams( + regex=sample_regex, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate(prompts=[ f"Give an example IPv4 address with this regex: {sample_regex}" ] * 2, @@ -62,14 +67,18 @@ def test_guided_regex(sample_regex, llm, guided_decoding_backend: str): @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) def test_guided_json_completion(sample_json_schema, llm, - guided_decoding_backend: str): - sampling_params = SamplingParams(temperature=1.0, - max_tokens=1000, - guided_decoding=GuidedDecodingParams( - json=sample_json_schema, - backend=guided_decoding_backend)) + guided_decoding_backend: str, + disable_any_whitespace: bool): + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams( + json=sample_json_schema, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate(prompts=[ f"Give an example JSON for an employee profile " f"that fits this schema: {sample_json_schema}" @@ -92,14 +101,18 @@ def test_guided_json_completion(sample_json_schema, llm, @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) def test_guided_complex_json_completion(sample_complex_json_schema, llm, - guided_decoding_backend: str): - sampling_params = SamplingParams(temperature=1.0, - max_tokens=1000, - guided_decoding=GuidedDecodingParams( - json=sample_complex_json_schema, - backend=guided_decoding_backend)) + guided_decoding_backend: str, + disable_any_whitespace: bool): + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams( + json=sample_complex_json_schema, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate(prompts=[ f"Give an example JSON for an assignment grade " f"that fits this schema: {sample_complex_json_schema}" @@ -123,14 +136,18 @@ def test_guided_complex_json_completion(sample_complex_json_schema, llm, @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) def test_guided_definition_json_completion(sample_definition_json_schema, llm, - guided_decoding_backend: str): - sampling_params = SamplingParams(temperature=1.0, - max_tokens=1000, - guided_decoding=GuidedDecodingParams( - json=sample_definition_json_schema, - backend=guided_decoding_backend)) + guided_decoding_backend: str, + disable_any_whitespace: bool): + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams( + json=sample_definition_json_schema, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate(prompts=[ f"Give an example JSON for solving 8x + 7 = -23 " f"that fits this schema: {sample_definition_json_schema}" @@ -154,14 +171,18 @@ def test_guided_definition_json_completion(sample_definition_json_schema, llm, @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) def test_guided_enum_json_completion(sample_enum_json_schema, llm, - guided_decoding_backend: str): - sampling_params = SamplingParams(temperature=1.0, - max_tokens=1000, - guided_decoding=GuidedDecodingParams( - json=sample_enum_json_schema, - backend=guided_decoding_backend)) + guided_decoding_backend: str, + disable_any_whitespace: bool): + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams( + json=sample_enum_json_schema, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate(prompts=[ "Create a bug report JSON that fits this schema: " f"{sample_enum_json_schema}. Make it for a high priority critical bug." @@ -195,14 +216,18 @@ def test_guided_enum_json_completion(sample_enum_json_schema, llm, @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) def test_guided_choice_completion(sample_guided_choice, llm, - guided_decoding_backend: str): - sampling_params = SamplingParams(temperature=0.8, - top_p=0.95, - guided_decoding=GuidedDecodingParams( - choice=sample_guided_choice, - backend=guided_decoding_backend)) + guided_decoding_backend: str, + disable_any_whitespace: bool): + sampling_params = SamplingParams( + temperature=0.8, + top_p=0.95, + guided_decoding=GuidedDecodingParams( + choice=sample_guided_choice, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate( prompts="The best language for type-safe systems programming is ", sampling_params=sampling_params, @@ -221,15 +246,19 @@ def test_guided_choice_completion(sample_guided_choice, llm, @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) def test_guided_grammar(sample_sql_statements, llm, - guided_decoding_backend: str): - sampling_params = SamplingParams(temperature=0.8, - top_p=0.95, - max_tokens=1000, - guided_decoding=GuidedDecodingParams( - grammar=sample_sql_statements, - backend=guided_decoding_backend)) + guided_decoding_backend: str, + disable_any_whitespace: bool): + sampling_params = SamplingParams( + temperature=0.8, + top_p=0.95, + max_tokens=1000, + guided_decoding=GuidedDecodingParams( + grammar=sample_sql_statements, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate( prompts=("Generate a sql state that select col_1 from " "table_1 where it is equals to 1"), @@ -300,26 +329,31 @@ def test_disable_guided_decoding_fallback(sample_regex, llm): top_p=0.95, guided_decoding=GuidedDecodingParams( json=unsupported_json, - backend="xgrammar:no-fallback")) + backend="xgrammar", + disable_fallback=True)) with pytest.raises( ValueError, match="xgrammar does not support advanced JSON schema features " - "like enums, patterns or numeric ranges."): + "like string length, item limits, or property bounds."): llm.generate(prompts="This should fail", sampling_params=sampling_params, use_tqdm=True) @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) -def test_guided_json_object(llm, guided_decoding_backend: str): - sampling_params = SamplingParams(temperature=1.0, - max_tokens=100, - n=2, - guided_decoding=GuidedDecodingParams( - json_object=True, - backend=guided_decoding_backend)) +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) +def test_guided_json_object(llm, guided_decoding_backend: str, + disable_any_whitespace: bool): + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=100, + n=2, + guided_decoding=GuidedDecodingParams( + json_object=True, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate( prompts=("Generate a JSON object with curly braces for a person with " @@ -337,7 +371,7 @@ def test_guided_json_object(llm, guided_decoding_backend: str): print(generated_text) assert generated_text is not None - if 'disable-any-whitespace' in guided_decoding_backend: + if disable_any_whitespace: assert "\n" not in generated_text # Parse to verify it is valid JSON @@ -359,14 +393,18 @@ class CarDescription(BaseModel): @pytest.mark.skip_global_cleanup -@pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS) -def test_guided_json_completion_with_enum(llm, guided_decoding_backend: str): +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) +def test_guided_json_completion_with_enum(llm, guided_decoding_backend: str, + disable_any_whitespace: bool): json_schema = CarDescription.model_json_schema() - sampling_params = SamplingParams(temperature=1.0, - max_tokens=1000, - guided_decoding=GuidedDecodingParams( - json=json_schema, - backend=guided_decoding_backend)) + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams( + json=json_schema, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace)) outputs = llm.generate( prompts="Generate a JSON with the brand, model and car_type of" "the most iconic car from the 90's", @@ -383,4 +421,124 @@ def test_guided_json_completion_with_enum(llm, guided_decoding_backend: str): assert generated_text is not None print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") output_json = json.loads(generated_text) - jsonschema.validate(instance=output_json, schema=json_schema) \ No newline at end of file + jsonschema.validate(instance=output_json, schema=json_schema) + + +@pytest.mark.skip_global_cleanup +@pytest.mark.parametrize("guided_decoding_backend,disable_any_whitespace", + GUIDED_DECODING_BACKENDS) +def test_guided_number_range_json_completion(llm, guided_decoding_backend: str, + disable_any_whitespace: bool): + sample_output_schema = { + "type": "object", + "properties": { + "age": { + "type": "integer", + "minimum": 18, + "maximum": 99 + }, + "score": { + "type": "number", + "minimum": 0.0, + "maximum": 100.0 + }, + "zipcode": { + "type": "string", + "pattern": r"^\d{5}(-\d{4})?$" + }, + }, + "required": ["age", "score", "zipcode"], + } + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams( + json=sample_output_schema, + backend=guided_decoding_backend, + disable_any_whitespace=disable_any_whitespace), + ) + outputs = llm.generate( + prompts=[ + "Create a JSON object for a user with age, score, and zipcode." + ] * 2, + sampling_params=sampling_params, + use_tqdm=True, + ) + + assert outputs is not None + + for output in outputs: + assert output is not None + assert isinstance(output, RequestOutput) + prompt = output.prompt + + generated_text = output.outputs[0].text + assert generated_text is not None + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + output_json = json.loads(generated_text) + jsonschema.validate(instance=output_json, schema=sample_output_schema) + assert 18 <= output_json["age"] <= 99 + assert 0.0 <= output_json["score"] <= 100.0 + assert (re.fullmatch(r"^\d{5}(-\d{4})?$", output_json["zipcode"]) + is not None) + + +@pytest.mark.skip_global_cleanup +def test_guidance_no_additional_properties(llm): + schema = { + 'type': 'object', + 'properties': { + 'a1': { + 'type': 'string' + }, + 'a2': { + 'type': 'string' + }, + 'a3': { + 'type': 'string' + } + }, + 'required': ['a1', 'a2', 'a3'], + } + + prompt = ( + "<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a " + "helpful assistant.<|im_end|>\n<|im_start|>user\nPlease generate a " + "large JSON object with key-value pairs a1=b1, a2=b2, ..., a20=b20" + "<|im_end|>\n<|im_start|>assistant\n") + + def generate_with_backend(backend, disable_additional_properties): + guided_params = GuidedDecodingParams( + json=schema, + backend=backend, + disable_any_whitespace=True, + disable_additional_properties=disable_additional_properties) + sampling_params = SamplingParams(temperature=0, + max_tokens=256, + guided_decoding=guided_params) + + outputs = llm.generate(prompts=prompt, sampling_params=sampling_params) + assert outputs is not None + generated_text = outputs[0].outputs[0].text + assert generated_text is not None + parsed_json = json.loads(generated_text) + assert isinstance(parsed_json, dict) + jsonschema.validate(instance=parsed_json, schema=schema) + return parsed_json + + base_generated = generate_with_backend("guidance", False) + assert "a1" in base_generated + assert "a2" in base_generated + assert "a3" in base_generated + # by default additional keys are generated + assert "a4" in base_generated + assert "a5" in base_generated + assert "a6" in base_generated + + generated = generate_with_backend("guidance", True) + assert "a1" in generated + assert "a2" in generated + assert "a3" in generated + assert "a4" not in generated + assert "a5" not in generated + assert "a6" not in generated diff --git a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py b/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py index eca5d184f5d..642c204b9ff 100644 --- a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py +++ b/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py @@ -150,6 +150,7 @@ def test_wer_correctness(model_name, expected_wer, n_examples=-1, max_concurrent_request=None): + # TODO refactor to use `ASRDataset` with RemoteOpenAIServer(model_name, ['--enforce-eager']) as remote_server: dataset = load_hf_dataset(dataset_repo) diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py index b13002a5b68..72e61665677 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/test_audio.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import json + import openai import pytest import pytest_asyncio @@ -27,7 +29,7 @@ def server(): "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", - f"audio={MAXIMUM_AUDIOS}", + json.dumps({"audio": MAXIMUM_AUDIOS}), ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: @@ -102,6 +104,35 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI, assert message.content is not None and len(message.content) >= 0 +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("audio_url", [TEST_AUDIO_URLS[0]]) +async def test_error_on_invalid_audio_url_type(client: openai.AsyncOpenAI, + model_name: str, + audio_url: str): + messages = [{ + "role": + "user", + "content": [ + { + "type": "audio_url", + "audio_url": audio_url + }, + { + "type": "text", + "text": "What's happening in this audio?" + }, + ], + }] + + # audio_url should be a dict {"url": "some url"}, not directly a string + with pytest.raises(openai.BadRequestError): + _ = await client.chat.completions.create(model=model_name, + messages=messages, + max_completion_tokens=10, + temperature=0.0) + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("audio_url", [TEST_AUDIO_URLS[0]]) diff --git a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py index 53df1d9241b..e00f001ef73 100644 --- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py +++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py @@ -13,9 +13,9 @@ @pytest.fixture(scope="module") def server(): # noqa: F811 args = [ - "--max-model-len", "8192", "--enforce-eager", "--enable-reasoning", - "--reasoning-parser", "deepseek_r1", "--enable-auto-tool-choice", - "--tool-call-parser", "hermes" + "--max-model-len", "8192", "--enforce-eager", "--reasoning-parser", + "deepseek_r1", "--enable-auto-tool-choice", "--tool-call-parser", + "hermes" ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: diff --git a/tests/entrypoints/openai/test_cli_args.py b/tests/entrypoints/openai/test_cli_args.py index e0285b5e556..8d1abe28a02 100644 --- a/tests/entrypoints/openai/test_cli_args.py +++ b/tests/entrypoints/openai/test_cli_args.py @@ -122,31 +122,23 @@ def test_enable_auto_choice_fails_with_enable_reasoning(serve_parser): """Ensure validation fails if reasoning is enabled with auto tool choice""" args = serve_parser.parse_args(args=[ "--enable-auto-tool-choice", - "--enable-reasoning", + "--reasoning-parser", + "deepseek_r1", ]) with pytest.raises(TypeError): validate_parsed_serve_args(args) -def test_enable_reasoning_passes_with_reasoning_parser(serve_parser): +def test_passes_with_reasoning_parser(serve_parser): """Ensure validation passes if reasoning is enabled with a reasoning parser""" args = serve_parser.parse_args(args=[ - "--enable-reasoning", "--reasoning-parser", "deepseek_r1", ]) validate_parsed_serve_args(args) -def test_enable_reasoning_fails_without_reasoning_parser(serve_parser): - """Ensure validation fails if reasoning is enabled - without a reasoning parser""" - args = serve_parser.parse_args(args=["--enable-reasoning"]) - with pytest.raises(TypeError): - validate_parsed_serve_args(args) - - def test_chat_template_validation_for_happy_paths(serve_parser): """Ensure validation passes if the chat template exists""" args = serve_parser.parse_args( diff --git a/tests/entrypoints/openai/test_embedding.py b/tests/entrypoints/openai/test_embedding.py index 2cdeb684f75..1019bfd5893 100644 --- a/tests/entrypoints/openai/test_embedding.py +++ b/tests/entrypoints/openai/test_embedding.py @@ -11,11 +11,12 @@ from vllm.entrypoints.openai.protocol import EmbeddingResponse from vllm.transformers_utils.tokenizer import get_tokenizer -from ...models.embedding.utils import check_embeddings_close +from ...models.utils import run_embedding_correctness_test from ...utils import RemoteOpenAIServer MODEL_NAME = "intfloat/multilingual-e5-small" DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501 +DTYPE = "bfloat16" @pytest.fixture(scope="module") @@ -25,7 +26,7 @@ def server(): "embed", # use half precision for speed and memory savings in CI environment "--dtype", - "bfloat16", + DTYPE, "--enforce-eager", "--max-model-len", "512", @@ -43,9 +44,17 @@ async def client(server): yield async_client +@pytest.fixture(scope="module") +def hf_model(hf_runner): + with hf_runner(MODEL_NAME, dtype=DTYPE, + is_sentence_transformer=True) as hf_model: + yield hf_model + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) -async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str): +async def test_single_embedding(hf_model, client: openai.AsyncOpenAI, + model_name: str): input_texts = [ "The chef prepared a delicious meal.", ] @@ -66,6 +75,9 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str): assert embeddings.usage.prompt_tokens == 11 assert embeddings.usage.total_tokens == 11 + vllm_outputs = [d.embedding for d in embeddings.data] + run_embedding_correctness_test(hf_model, input_texts, vllm_outputs) + # test using token IDs input_tokens = [1, 1, 1, 1, 1] embedding_response = await client.embeddings.create( @@ -86,7 +98,8 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str): @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) -async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str): +async def test_batch_embedding(hf_model, client: openai.AsyncOpenAI, + model_name: str): # test list[str] input_texts = [ "The cat sat on the mat.", "A feline was resting on a rug.", @@ -107,6 +120,9 @@ async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str): assert embeddings.usage.prompt_tokens == 33 assert embeddings.usage.total_tokens == 33 + vllm_outputs = [d.embedding for d in embeddings.data] + run_embedding_correctness_test(hf_model, input_texts, vllm_outputs) + # test list[list[int]] input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24], [25, 32, 64, 77]] @@ -181,7 +197,7 @@ async def test_conversation_embedding(server: RemoteOpenAIServer, @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) -async def test_batch_base64_embedding(client: openai.AsyncOpenAI, +async def test_batch_base64_embedding(hf_model, client: openai.AsyncOpenAI, model_name: str): input_texts = [ "Hello my name is", @@ -192,6 +208,7 @@ async def test_batch_base64_embedding(client: openai.AsyncOpenAI, model=model_name, encoding_format="float") float_data = [d.embedding for d in responses_float.data] + run_embedding_correctness_test(hf_model, input_texts, float_data) responses_base64 = await client.embeddings.create(input=input_texts, model=model_name, @@ -202,24 +219,13 @@ async def test_batch_base64_embedding(client: openai.AsyncOpenAI, np.frombuffer(base64.b64decode(data.embedding), dtype="float32").tolist()) - check_embeddings_close( - embeddings_0_lst=float_data, - embeddings_1_lst=base64_data, - name_0="float", - name_1="base64", - ) + run_embedding_correctness_test(hf_model, input_texts, base64_data) # Default response is float32 decoded from base64 by OpenAI Client responses_default = await client.embeddings.create(input=input_texts, model=model_name) default_data = [d.embedding for d in responses_default.data] - - check_embeddings_close( - embeddings_0_lst=float_data, - embeddings_1_lst=default_data, - name_0="float", - name_1="default", - ) + run_embedding_correctness_test(hf_model, input_texts, default_data) @pytest.mark.asyncio diff --git a/tests/entrypoints/openai/test_embedding_dimensions.py b/tests/entrypoints/openai/test_embedding_dimensions.py index 79d43a2231f..332fa332a4a 100644 --- a/tests/entrypoints/openai/test_embedding_dimensions.py +++ b/tests/entrypoints/openai/test_embedding_dimensions.py @@ -3,80 +3,122 @@ Run `pytest tests/entrypoints/openai/test_embedding_dimensions.py`. """ -from typing import NamedTuple +from typing import Optional import openai import pytest from vllm.entrypoints.openai.protocol import EmbeddingResponse +from ...conftest import HfRunner +from ...models.utils import EmbedModelInfo, run_embedding_correctness_test from ...utils import RemoteOpenAIServer - -class ModelInfo(NamedTuple): - name: str - is_matryoshka: bool - - MODELS = [ - ModelInfo(name="BAAI/bge-m3", is_matryoshka=False), - ModelInfo(name="jinaai/jina-embeddings-v3", is_matryoshka=True), + EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False), + EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v1.5", + is_matryoshka=True, + matryoshka_dimensions=[256]), ] input_texts = [ "The chef prepared a delicious meal.", -] * 3 +] -@pytest.mark.asyncio -@pytest.mark.parametrize("model", MODELS) -async def test_validating_dimensions(model: ModelInfo): +@pytest.fixture(scope="module", params=MODELS) +def model_info(request): + return request.param + + +@pytest.fixture(scope="module", params=["bfloat16"]) +def dtype(request): + return request.param + + +@pytest.fixture(scope="module") +def server(model_info, dtype: str): args = [ "--task", "embed", # use half precision for speed and memory savings in CI environment "--dtype", - "bfloat16", + dtype, "--enforce-eager", "--max-model-len", - "512", - "--trust_remote_code" + "512" ] - with RemoteOpenAIServer(model.name, args) as remote_server: - client = remote_server.get_async_client() - - async def make_request(dimensions): - embedding_response = await client.embeddings.create( - model=model.name, - input=input_texts, - dimensions=dimensions, - encoding_format="float", - ) - embeddings = EmbeddingResponse.model_validate( - embedding_response.model_dump(mode="json")) - - assert embeddings.id is not None - assert len(embeddings.data) == 3 - assert len(embeddings.data[0].embedding) > 0 - assert embeddings.usage.completion_tokens == 0 - assert embeddings.usage.prompt_tokens > 0 - assert embeddings.usage.total_tokens > 0 - - if dimensions is not None: - assert len(embeddings.data[0].embedding) == dimensions - - if model.is_matryoshka: - for dimensions in [None, 16]: - await make_request(dimensions) + if model_info.name == "Snowflake/snowflake-arctic-embed-m-v1.5": + # Manually enable Matryoshka Embeddings + args.extend([ + "--trust_remote_code", "--hf_overrides", + '{"matryoshka_dimensions":[256]}' + ]) + + with RemoteOpenAIServer(model_info.name, args) as remote_server: + yield remote_server + + +@pytest.fixture(scope="module") +def hf_model(hf_runner, model_info, dtype: str): + with hf_runner(model_info.name, dtype=dtype, + is_sentence_transformer=True) as hf_model: + yield hf_model + + +@pytest.mark.asyncio +async def test_matryoshka(model_info: EmbedModelInfo, + server: RemoteOpenAIServer, hf_model: HfRunner): + client = server.get_async_client() + + async def make_request_and_correctness_test(dimensions): + prompts = input_texts * 3 + + embedding_response = await client.embeddings.create( + model=model_info.name, + input=prompts, + dimensions=dimensions, + encoding_format="float", + ) + embeddings = EmbeddingResponse.model_validate( + embedding_response.model_dump(mode="json")) + + assert embeddings.id is not None + assert len(embeddings.data) == 3 + assert len(embeddings.data[0].embedding) > 0 + assert embeddings.usage.completion_tokens == 0 + assert embeddings.usage.prompt_tokens > 0 + assert embeddings.usage.total_tokens > 0 + + if dimensions is not None: + assert len(embeddings.data[0].embedding) == dimensions + + vllm_outputs = [d.embedding for d in embeddings.data] + run_embedding_correctness_test(hf_model, prompts, vllm_outputs, + dimensions) + + if model_info.is_matryoshka: + valid_dimensions: list[Optional[int]] = [None] + if model_info.matryoshka_dimensions is not None: + valid_dimensions += model_info.matryoshka_dimensions[:2] + + for dimensions in valid_dimensions: + await make_request_and_correctness_test(dimensions) + + invalid_dimensions: list[Optional[int]] = [-1] + if model_info.matryoshka_dimensions is not None: + assert 5 not in model_info.matryoshka_dimensions + invalid_dimensions.append(5) + + for dimensions in invalid_dimensions: with pytest.raises(openai.BadRequestError): - for dimensions in [-1]: - await make_request(dimensions) + await make_request_and_correctness_test(dimensions) - else: - for dimensions in [None]: - await make_request(dimensions) + else: + for dimensions in [None]: + await make_request_and_correctness_test(dimensions) + for dimensions in [-1, 16]: with pytest.raises(openai.BadRequestError): - for dimensions in [-1, 16]: - await make_request(dimensions) + await make_request_and_correctness_test(dimensions) diff --git a/tests/entrypoints/openai/test_lora_resolvers.py b/tests/entrypoints/openai/test_lora_resolvers.py new file mode 100644 index 00000000000..c96151349eb --- /dev/null +++ b/tests/entrypoints/openai/test_lora_resolvers.py @@ -0,0 +1,209 @@ +# SPDX-License-Identifier: Apache-2.0 + +from contextlib import suppress +from dataclasses import dataclass, field +from http import HTTPStatus +from typing import Optional +from unittest.mock import MagicMock + +import pytest + +from vllm.config import MultiModalConfig +from vllm.engine.multiprocessing.client import MQLLMEngineClient +from vllm.entrypoints.openai.protocol import CompletionRequest, ErrorResponse +from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion +from vllm.entrypoints.openai.serving_models import (BaseModelPath, + OpenAIServingModels) +from vllm.lora.request import LoRARequest +from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry +from vllm.transformers_utils.tokenizer import get_tokenizer + +MODEL_NAME = "openai-community/gpt2" +BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)] + +MOCK_RESOLVER_NAME = "mock_test_resolver" + + +@dataclass +class MockHFConfig: + model_type: str = "any" + + +@dataclass +class MockModelConfig: + """Minimal mock ModelConfig for testing.""" + model: str = MODEL_NAME + tokenizer: str = MODEL_NAME + trust_remote_code: bool = False + tokenizer_mode: str = "auto" + max_model_len: int = 100 + tokenizer_revision: Optional[str] = None + multimodal_config: MultiModalConfig = field( + default_factory=MultiModalConfig) + hf_config: MockHFConfig = field(default_factory=MockHFConfig) + logits_processor_pattern: Optional[str] = None + diff_sampling_param: Optional[dict] = None + allowed_local_media_path: str = "" + encoder_config = None + generation_config: str = "auto" + + def get_diff_sampling_param(self): + return self.diff_sampling_param or {} + + +class MockLoRAResolver(LoRAResolver): + + async def resolve_lora(self, base_model_name: str, + lora_name: str) -> Optional[LoRARequest]: + if lora_name == "test-lora": + return LoRARequest(lora_name="test-lora", + lora_int_id=1, + lora_local_path="/fake/path/test-lora") + elif lora_name == "invalid-lora": + return LoRARequest(lora_name="invalid-lora", + lora_int_id=2, + lora_local_path="/fake/path/invalid-lora") + return None + + +@pytest.fixture(autouse=True) +def register_mock_resolver(): + """Fixture to register and unregister the mock LoRA resolver.""" + resolver = MockLoRAResolver() + LoRAResolverRegistry.register_resolver(MOCK_RESOLVER_NAME, resolver) + yield + # Cleanup: remove the resolver after the test runs + if MOCK_RESOLVER_NAME in LoRAResolverRegistry.resolvers: + del LoRAResolverRegistry.resolvers[MOCK_RESOLVER_NAME] + + +@pytest.fixture +def mock_serving_setup(): + """Provides a mocked engine and serving completion instance.""" + mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) + mock_engine.errored = False + + def mock_add_lora_side_effect(lora_request: LoRARequest): + """Simulate engine behavior when adding LoRAs.""" + if lora_request.lora_name == "test-lora": + # Simulate successful addition + return + elif lora_request.lora_name == "invalid-lora": + # Simulate failure during addition (e.g. invalid format) + raise ValueError(f"Simulated failure adding LoRA: " + f"{lora_request.lora_name}") + + mock_engine.add_lora.side_effect = mock_add_lora_side_effect + mock_engine.generate.reset_mock() + mock_engine.add_lora.reset_mock() + + mock_model_config = MockModelConfig() + models = OpenAIServingModels(engine_client=mock_engine, + base_model_paths=BASE_MODEL_PATHS, + model_config=mock_model_config) + + serving_completion = OpenAIServingCompletion(mock_engine, + mock_model_config, + models, + request_logger=None) + + return mock_engine, serving_completion + + +@pytest.mark.asyncio +async def test_serving_completion_with_lora_resolver(mock_serving_setup, + monkeypatch): + monkeypatch.setenv("VLLM_ALLOW_RUNTIME_LORA_UPDATING", "true") + + mock_engine, serving_completion = mock_serving_setup + + lora_model_name = "test-lora" + req_found = CompletionRequest( + model=lora_model_name, + prompt="Generate with LoRA", + ) + + # Suppress potential errors during the mocked generate call, + # as we are primarily checking for add_lora and generate calls + with suppress(Exception): + await serving_completion.create_completion(req_found) + + mock_engine.add_lora.assert_called_once() + called_lora_request = mock_engine.add_lora.call_args[0][0] + assert isinstance(called_lora_request, LoRARequest) + assert called_lora_request.lora_name == lora_model_name + + mock_engine.generate.assert_called_once() + called_lora_request = mock_engine.generate.call_args[1]['lora_request'] + assert isinstance(called_lora_request, LoRARequest) + assert called_lora_request.lora_name == lora_model_name + + +@pytest.mark.asyncio +async def test_serving_completion_resolver_not_found(mock_serving_setup, + monkeypatch): + monkeypatch.setenv("VLLM_ALLOW_RUNTIME_LORA_UPDATING", "true") + + mock_engine, serving_completion = mock_serving_setup + + non_existent_model = "non-existent-lora-adapter" + req = CompletionRequest( + model=non_existent_model, + prompt="what is 1+1?", + ) + + response = await serving_completion.create_completion(req) + + mock_engine.add_lora.assert_not_called() + mock_engine.generate.assert_not_called() + + assert isinstance(response, ErrorResponse) + assert response.code == HTTPStatus.NOT_FOUND.value + assert non_existent_model in response.message + + +@pytest.mark.asyncio +async def test_serving_completion_resolver_add_lora_fails( + mock_serving_setup, monkeypatch): + monkeypatch.setenv("VLLM_ALLOW_RUNTIME_LORA_UPDATING", "true") + + mock_engine, serving_completion = mock_serving_setup + + invalid_model = "invalid-lora" + req = CompletionRequest( + model=invalid_model, + prompt="what is 1+1?", + ) + + response = await serving_completion.create_completion(req) + + # Assert add_lora was called before the failure + mock_engine.add_lora.assert_called_once() + called_lora_request = mock_engine.add_lora.call_args[0][0] + assert isinstance(called_lora_request, LoRARequest) + assert called_lora_request.lora_name == invalid_model + + # Assert generate was *not* called due to the failure + mock_engine.generate.assert_not_called() + + # Assert the correct error response + assert isinstance(response, ErrorResponse) + assert response.code == HTTPStatus.BAD_REQUEST.value + assert invalid_model in response.message + + +@pytest.mark.asyncio +async def test_serving_completion_flag_not_set(mock_serving_setup): + mock_engine, serving_completion = mock_serving_setup + + lora_model_name = "test-lora" + req_found = CompletionRequest( + model=lora_model_name, + prompt="Generate with LoRA", + ) + + await serving_completion.create_completion(req_found) + + mock_engine.add_lora.assert_not_called() + mock_engine.generate.assert_not_called() diff --git a/tests/entrypoints/openai/test_openai_schema.py b/tests/entrypoints/openai/test_openai_schema.py new file mode 100644 index 00000000000..1ccb803a328 --- /dev/null +++ b/tests/entrypoints/openai/test_openai_schema.py @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: Apache-2.0 +import pytest +import schemathesis +from schemathesis import GenerationConfig + +from ...utils import RemoteOpenAIServer + +schemathesis.experimental.OPEN_API_3_1.enable() + +MODEL_NAME = "HuggingFaceTB/SmolVLM-256M-Instruct" +MAXIMUM_IMAGES = 2 + + +@pytest.fixture(scope="module") +def server(): + args = [ + "--task", + "generate", + "--max-model-len", + "2048", + "--max-num-seqs", + "5", + "--enforce-eager", + "--trust-remote-code", + "--limit-mm-per-prompt", + f"image={MAXIMUM_IMAGES}", + ] + + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +@pytest.fixture(scope="module") +def get_schema(server): + # avoid generating null (\x00) bytes in strings during test case generation + return schemathesis.openapi.from_uri( + f"{server.url_root}/openapi.json", + generation_config=GenerationConfig(allow_x00=False), + ) + + +schema = schemathesis.from_pytest_fixture("get_schema") + + +@schema.parametrize() +@schema.override(headers={"Content-Type": "application/json"}) +async def test_openapi_stateless(case): + #No need to verify SSL certificate for localhost + await case.call_and_validate(verify=False) diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index 19d16713b20..5e11af8cf89 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -272,3 +272,43 @@ def test_serving_chat_could_load_correct_generation_config(): assert mock_engine.generate.call_args.args[1].temperature == 0.0 assert mock_engine.generate.call_args.args[1].repetition_penalty == 1.05 + + +def test_serving_chat_did_set_correct_cache_salt(): + mock_model_config = MockModelConfig() + + mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) + mock_engine.errored = False + + # Initialize the serving chat + models = OpenAIServingModels(engine_client=mock_engine, + base_model_paths=BASE_MODEL_PATHS, + model_config=mock_model_config) + serving_chat = OpenAIServingChat(mock_engine, + mock_model_config, + models, + response_role="assistant", + chat_template=CHAT_TEMPLATE, + chat_template_content_format="auto", + request_logger=None) + + # Test cache_salt + req = ChatCompletionRequest( + model=MODEL_NAME, + messages=[{ + "role": "user", + "content": "what is 1+1?" + }], + ) + + # By default cache_salt in the engine prompt is not set + with suppress(Exception): + asyncio.run(serving_chat.create_chat_completion(req)) + assert "cache_salt" not in mock_engine.generate.call_args.args[0] + + # Test with certain cache_salt + req.cache_salt = "test_salt" + with suppress(Exception): + asyncio.run(serving_chat.create_chat_completion(req)) + assert mock_engine.generate.call_args.args[0]["cache_salt"] == "test_salt" diff --git a/tests/entrypoints/openai/test_transcription_validation.py b/tests/entrypoints/openai/test_transcription_validation.py index 29571bcd764..5c48df3cebb 100644 --- a/tests/entrypoints/openai/test_transcription_validation.py +++ b/tests/entrypoints/openai/test_transcription_validation.py @@ -192,3 +192,36 @@ async def post_with_stream(*args, **kwargs): else: continuous = continuous and hasattr(chunk, 'usage') assert final and continuous + + +@pytest.mark.asyncio +async def test_sampling_params(mary_had_lamb): + """ + Compare sampling with params and greedy sampling to assert results + are different when extreme sampling parameters values are picked. + """ + model_name = "openai/whisper-small" + server_args = ["--enforce-eager"] + with RemoteOpenAIServer(model_name, server_args) as remote_server: + client = remote_server.get_async_client() + transcription = await client.audio.transcriptions.create( + model=model_name, + file=mary_had_lamb, + language="en", + temperature=0.8, + extra_body=dict(seed=42, + repetition_penalty=1.9, + top_k=12, + top_p=0.4, + min_p=0.5, + frequency_penalty=1.8, + presence_penalty=2.0)) + + greedy_transcription = await client.audio.transcriptions.create( + model=model_name, + file=mary_had_lamb, + language="en", + temperature=0.0, + extra_body=dict(seed=42)) + + assert greedy_transcription.text != transcription.text diff --git a/tests/entrypoints/openai/test_truncation.py b/tests/entrypoints/openai/test_truncation.py new file mode 100644 index 00000000000..137ed9db858 --- /dev/null +++ b/tests/entrypoints/openai/test_truncation.py @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: Apache-2.0 +from typing import Any + +import openai +import pytest +import pytest_asyncio + +from tests.utils import RemoteOpenAIServer + +MODEL_NAME = "sentence-transformers/all-MiniLM-L12-v2" +max_model_len = 128 + +input = """Immerse yourself in the enchanting chronicle of calculus, a + mathematical domain that has radically transformed our comprehension of + change and motion. Despite its roots in ancient civilizations, the + formal birth of calculus predominantly occurred in the 17th century, + primarily under the influential guidance of Sir Isaac Newton and Gottfried + Wilhelm Leibniz. The earliest traces of calculus concepts are found in + ancient Greek mathematics,most notably in the works of Eudoxus and + Archimedes, around 300 BCE. They utilized the 'method of exhaustion'—a + technique for computing areas and volumes through the use of finite sums. + This methodology laid crucial foundational work for integral calculus. + In the 17th century, both Newton and Leibniz independently pioneered + calculus, each contributing unique perspectives that would shape this new + field.""" + + +@pytest.fixture(scope="module") +def server(): + args = [ + "--task", + "embed", + "--dtype", + "bfloat16", + "--enforce-eager", + "--max-model-len", + str(max_model_len), + ] + + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +@pytest_asyncio.fixture +async def client(server): + async with server.get_async_client() as async_client: + yield async_client + + +@pytest.mark.asyncio +async def test_smaller_truncation_size(client: openai.AsyncOpenAI): + truncation_size = 10 + kwargs: dict[str, Any] = { + "model": MODEL_NAME, + "input": input, + "truncate_prompt_tokens": truncation_size + } + + response = await client.post(path="embeddings", + cast_to=object, + body={**kwargs}) + + assert response["usage"]["prompt_tokens"] == truncation_size + + +@pytest.mark.asyncio +async def test_bigger_truncation_size(client: openai.AsyncOpenAI): + truncation_size = max_model_len + 1 + kwargs: dict[str, Any] = { + "model": MODEL_NAME, + "input": input, + "truncate_prompt_tokens": truncation_size + } + + with pytest.raises(openai.BadRequestError) as err: + err = await client.post(path="embeddings", + cast_to=object, + body={**kwargs}) + + assert str(err) == f"""openai.BadRequestError: + Error code: 400 - {{'object': 'error', + 'message': 'truncate_prompt_tokens value + ({truncation_size}) + is greater than max_model_len ({max_model_len}). + Please, select a smaller truncation size.', + 'type': 'BadRequestError', + 'param': None, 'code': 400}}""" + + +@pytest.mark.asyncio +async def test_max_truncation_size(client: openai.AsyncOpenAI): + truncation_size = -1 + kwargs: dict[str, Any] = { + "model": MODEL_NAME, + "input": input, + "truncate_prompt_tokens": truncation_size + } + + response = await client.post(path="embeddings", + cast_to=object, + body={**kwargs}) + + assert response["usage"]["prompt_tokens"] == max_model_len diff --git a/tests/entrypoints/openai/test_video.py b/tests/entrypoints/openai/test_video.py index f9ccce9c1c3..53f057a294c 100644 --- a/tests/entrypoints/openai/test_video.py +++ b/tests/entrypoints/openai/test_video.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import json + import openai import pytest import pytest_asyncio @@ -31,7 +33,7 @@ def server(): "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", - f"video={MAXIMUM_VIDEOS}", + json.dumps({"video": MAXIMUM_VIDEOS}), ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: @@ -106,6 +108,35 @@ async def test_single_chat_session_video(client: openai.AsyncOpenAI, assert message.content is not None and len(message.content) >= 0 +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS) +async def test_error_on_invalid_video_url_type(client: openai.AsyncOpenAI, + model_name: str, + video_url: str): + messages = [{ + "role": + "user", + "content": [ + { + "type": "video_url", + "video_url": video_url + }, + { + "type": "text", + "text": "What's in this video?" + }, + ], + }] + + # video_url should be a dict {"url": "some url"}, not directly a string + with pytest.raises(openai.BadRequestError): + _ = await client.chat.completions.create(model=model_name, + messages=messages, + max_completion_tokens=10, + temperature=0.0) + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("video_url", TEST_VIDEO_URLS) diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py index 4b9029ded41..1ab50b41c7e 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/test_vision.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import json + import openai import pytest import pytest_asyncio @@ -35,7 +37,7 @@ def server(): "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", - f"image={MAXIMUM_IMAGES}", + json.dumps({"image": MAXIMUM_IMAGES}), ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: @@ -135,6 +137,36 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI, assert message.content is not None and len(message.content) >= 0 +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) +async def test_error_on_invalid_image_url_type(client: openai.AsyncOpenAI, + model_name: str, + image_url: str): + content_text = "What's in this image?" + messages = [{ + "role": + "user", + "content": [ + { + "type": "image_url", + "image_url": image_url + }, + { + "type": "text", + "text": content_text + }, + ], + }] + + # image_url should be a dict {"url": "some url"}, not directly a string + with pytest.raises(openai.BadRequestError): + _ = await client.chat.completions.create(model=model_name, + messages=messages, + max_completion_tokens=10, + temperature=0.0) + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) diff --git a/tests/entrypoints/openai/test_vision_embedding.py b/tests/entrypoints/openai/test_vision_embedding.py index 3e6f13e10ac..26c68e06c19 100644 --- a/tests/entrypoints/openai/test_vision_embedding.py +++ b/tests/entrypoints/openai/test_vision_embedding.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import json + import pytest import requests from PIL import Image @@ -37,7 +39,7 @@ def server(): "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", - f"image={MAXIMUM_IMAGES}", + json.dumps({"image": MAXIMUM_IMAGES}), "--chat-template", str(vlm2vec_jinja_path), ] diff --git a/tests/kernels/conftest.py b/tests/kernels/attention/conftest.py similarity index 100% rename from tests/kernels/conftest.py rename to tests/kernels/attention/conftest.py diff --git a/tests/kernels/test_attention.py b/tests/kernels/attention/test_attention.py similarity index 99% rename from tests/kernels/test_attention.py rename to tests/kernels/attention/test_attention.py index 0d7898a900e..e5650136f25 100644 --- a/tests/kernels/test_attention.py +++ b/tests/kernels/attention/test_attention.py @@ -6,13 +6,12 @@ import pytest import torch +from tests.kernels.allclose_default import get_default_atol, get_default_rtol from tests.kernels.utils import opcheck from vllm import _custom_ops as ops from vllm.platforms import current_platform from vllm.utils import get_max_shared_memory_bytes -from .allclose_default import get_default_atol, get_default_rtol - if not current_platform.is_rocm(): from xformers import ops as xops from xformers.ops.fmha.attn_bias import BlockDiagonalCausalMask diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py new file mode 100644 index 00000000000..b0414244c21 --- /dev/null +++ b/tests/kernels/attention/test_attention_selector.py @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: Apache-2.0 + +from unittest.mock import patch + +import pytest +import torch + +from vllm.attention.selector import _cached_get_attn_backend, get_attn_backend +from vllm.platforms.cpu import CpuPlatform +from vllm.platforms.cuda import CudaPlatform +from vllm.platforms.rocm import RocmPlatform +from vllm.utils import STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL, STR_INVALID_VAL + + +@pytest.fixture(autouse=True) +def clear_cache(): + """Clear lru cache to ensure each test case runs without caching. + """ + _cached_get_attn_backend.cache_clear() + + +# Define MLA and non-MLA backends separately +DEVICE_MLA_BACKENDS = { + "cuda": ["TRITON_MLA", "FLASHMLA"], + "hip": ["TRITON_MLA", "ROCM_AITER_MLA"], + "cpu": [], +} + +DEVICE_REGULAR_ATTN_BACKENDS = { + "cuda": ["XFORMERS", "FLASHINFER"], + "hip": ["ROCM_FLASH"], + "cpu": ["TORCH_SDPA"], +} + +DEVICE_MLA_BLOCK_SIZES = { + "cuda": [16, 64], # CUDA supports both standard and extended block sizes + "hip": [16, 1], # HIP requires special handling for block_size=1 + "cpu": [16] # CPU uses fixed block size from test cases +} + + +def generate_params(): + params = [] + for use_mla in [True, False]: + for device in ["cuda", "hip", "cpu"]: + backends = DEVICE_MLA_BACKENDS[ + device] if use_mla else DEVICE_REGULAR_ATTN_BACKENDS[device] + for name in backends: + block_sizes = DEVICE_MLA_BLOCK_SIZES[device] if use_mla else [ + 16 + ] + for block_size in block_sizes: + params.append( + pytest.param( + device, + name, + use_mla, + block_size, + id= + f"{device}_{name}_mla_{str(use_mla)[0]}_blks{block_size}" + )) + return params + + +@pytest.mark.parametrize("device, name, use_mla, block_size", + generate_params()) +@pytest.mark.parametrize("use_v1", [True, False]) +def test_env( + device: str, + name: str, + use_mla: bool, + block_size: int, + use_v1: bool, + monkeypatch: pytest.MonkeyPatch, +): + """Test attention backend selection with valid device-backend pairs.""" + with monkeypatch.context() as m: + m.setenv("VLLM_USE_V1", "1" if use_v1 else "0") + m.setenv(STR_BACKEND_ENV_VAR, name) + m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0") + + if device == "cpu": + with patch("vllm.attention.selector.current_platform", + CpuPlatform()): + backend = get_attn_backend(16, torch.float16, torch.float16, + block_size, False) + assert backend.get_name() == "TORCH_SDPA" + + elif device == "hip": + with patch("vllm.attention.selector.current_platform", + RocmPlatform()): + if use_mla: + # Validate HIP MLA backend-block_size combinations + valid_combination = ( + (name == "TRITON_MLA" and block_size != 1) + or (name == "ROCM_AITER_MLA" and block_size == 1)) + + if valid_combination: + backend = get_attn_backend(16, + torch.float16, + torch.float16, + block_size, + False, + use_mla=use_mla) + assert backend.get_name() == name + else: + with pytest.raises(ValueError) as exc_info: + get_attn_backend(16, + torch.float16, + torch.float16, + block_size, + False, + use_mla=use_mla) + assert f"The selected backend, {name}" in str( + exc_info.value) + else: + backend = get_attn_backend(16, + torch.float16, + torch.float16, + block_size, + False, + use_mla=use_mla) + expected = "TRITON_ATTN_VLLM_V1" if use_v1 else "ROCM_FLASH" + assert backend.get_name() == expected + + elif device == "cuda": + with patch("vllm.attention.selector.current_platform", + CudaPlatform()): + if use_mla: + if name == "FLASHMLA" and block_size == 64: + from vllm.attention.backends.flashmla import ( + is_flashmla_supported) + + # only on cuda platforms with specific capability. + is_supported, _ = is_flashmla_supported() + + if not is_supported: + # if platform is not supported then skip this case. + pytest.skip() + else: + backend = get_attn_backend(16, + torch.float16, + torch.float16, + block_size, + False, + use_mla=use_mla) + expected = f"{name}_VLLM_V1" if use_v1 else name + assert backend.get_name() == expected + else: + backend = get_attn_backend(16, + torch.float16, + torch.float16, + block_size, + False, + use_mla=use_mla) + expected = ("TRITON_MLA_VLLM_V1" + if use_v1 else "TRITON_MLA") + assert backend.get_name() == expected + elif name == "FLASHINFER": + backend = get_attn_backend(16, + torch.float16, + torch.float16, + block_size, + False, + use_mla=use_mla) + expected = "FLASHINFER_VLLM_V1" if use_v1 else name + assert backend.get_name() == expected + else: + backend = get_attn_backend(16, + torch.float16, + torch.float16, + block_size, + False, + use_mla=use_mla) + expected = "FLASH_ATTN_VLLM_V1" if use_v1 else name + assert backend.get_name() == expected + + +def test_flash_attn(monkeypatch: pytest.MonkeyPatch): + """Test FlashAttn validation.""" + # TODO: When testing for v1, pipe in `use_v1` as an argument to + # get_attn_backend + + with monkeypatch.context() as m: + m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL) + + # Unsupported CUDA arch + monkeypatch.setattr(torch.cuda, "get_device_capability", lambda: + (7, 5)) + backend = get_attn_backend(16, torch.float16, None, 16, False) + assert backend.get_name() != STR_FLASH_ATTN_VAL + + # Reset the monkeypatch for subsequent tests + monkeypatch.undo() + + # Unsupported data type + backend = get_attn_backend(16, torch.float8_e4m3fn, None, 16, False) + assert backend.get_name() != STR_FLASH_ATTN_VAL + + # Unsupported kv cache data type + backend = get_attn_backend(16, torch.float16, "fp8", 16, False) + assert backend.get_name() != STR_FLASH_ATTN_VAL + + # Unsupported block size + backend = get_attn_backend(16, torch.float16, None, 8, False) + assert backend.get_name() != STR_FLASH_ATTN_VAL + + # flash-attn is not installed + import sys + original_module = sys.modules.get('vllm_flash_attn') + monkeypatch.setitem(sys.modules, 'vllm_flash_attn', None) + backend = get_attn_backend(16, torch.float16, None, 16, False) + assert backend.get_name() != STR_FLASH_ATTN_VAL + + # Restore the original module if it existed + if original_module is not None: + monkeypatch.setitem(sys.modules, 'vllm_flash_attn', + original_module) + else: + monkeypatch.delitem(sys.modules, 'vllm_flash_attn', raising=False) + + # Unsupported head size + backend = get_attn_backend(17, torch.float16, None, 16, False) + assert backend.get_name() != STR_FLASH_ATTN_VAL + + # Attention-free models should bypass env and use PlaceholderAttention + backend = get_attn_backend(16, torch.float16, torch.float16, 16, True) + assert backend.get_name() != STR_FLASH_ATTN_VAL + + +@pytest.mark.parametrize("use_v1", [True, False]) +def test_invalid_env(use_v1: bool, monkeypatch: pytest.MonkeyPatch): + + with monkeypatch.context() as m, patch( + "vllm.attention.selector.current_platform", CudaPlatform()): + m.setenv("VLLM_USE_V1", "1" if use_v1 else "0") + m.setenv(STR_BACKEND_ENV_VAR, STR_INVALID_VAL) + + # Test with head size 32 + backend = get_attn_backend(32, torch.float16, None, 16, False) + EXPECTED = "FLASH_ATTN_VLLM_V1" if use_v1 else "FLASH_ATTN" + assert backend.get_name() == EXPECTED + + # when block size == 16, backend will fall back to XFORMERS + # this behavior is not yet supported on V1. + if use_v1: + # TODO: support fallback on V1! + # https://github.com/vllm-project/vllm/issues/14524 + pass + else: + backend = get_attn_backend(16, torch.float16, None, 16, False) + assert backend.get_name() == "XFORMERS" diff --git a/tests/kernels/test_blocksparse_attention.py b/tests/kernels/attention/test_blocksparse_attention.py similarity index 99% rename from tests/kernels/test_blocksparse_attention.py rename to tests/kernels/attention/test_blocksparse_attention.py index 3025ae0f921..82d03825757 100644 --- a/tests/kernels/test_blocksparse_attention.py +++ b/tests/kernels/attention/test_blocksparse_attention.py @@ -6,14 +6,13 @@ import pytest import torch +from tests.kernels.allclose_default import get_default_atol, get_default_rtol from vllm import _custom_ops as ops from vllm.attention.ops.blocksparse_attention.interface import ( LocalStridedBlockSparseAttn) from vllm.platforms import current_platform from vllm.utils import get_max_shared_memory_bytes -from .allclose_default import get_default_atol, get_default_rtol - FLOAT32_BYTES = torch.finfo(torch.float).bits // 8 # This will change depending on the compute capability. # - 512 as a buffer diff --git a/tests/kernels/test_cache.py b/tests/kernels/attention/test_cache.py similarity index 93% rename from tests/kernels/test_cache.py rename to tests/kernels/attention/test_cache.py index 899122818e0..2f2212dd2b0 100644 --- a/tests/kernels/test_cache.py +++ b/tests/kernels/attention/test_cache.py @@ -16,6 +16,7 @@ NUM_HEADS = [8] # Arbitrary values for testing HEAD_SIZES = [64, 80, 120, 256] BLOCK_SIZES = [8, 16, 32] +CACHE_LAYOUTS = ["NHD", "HND"] # Parameters for MLA tests. KV_LORA_RANKS = [512] @@ -220,6 +221,7 @@ def test_reshape_and_cache( @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) @pytest.mark.parametrize("kv_cache_dtype", KV_CACHE_DTYPE) +@pytest.mark.parametrize("kv_cache_layout", CACHE_LAYOUTS) @torch.inference_mode() def test_reshape_and_cache_flash( kv_cache_factory_flashinfer, @@ -232,17 +234,21 @@ def test_reshape_and_cache_flash( seed: int, device: str, kv_cache_dtype: str, + kv_cache_layout: str, ) -> None: current_platform.seed_everything(seed) torch.set_default_device(device) + # fp8 conversion requires continugous memory buffer. Reduce the number of + # blocks and tokens to consume less memory. + num_tokens = num_tokens // 2 + num_blocks = num_blocks // 2 # Create a random slot mapping. num_slots = block_size * num_blocks slot_mapping_lst = random.sample(range(num_slots), num_tokens) slot_mapping = torch.tensor(slot_mapping_lst, dtype=torch.long, device=device) - qkv = torch.randn(num_tokens, 3, num_heads, @@ -261,27 +267,35 @@ def test_reshape_and_cache_flash( kv_cache_dtype, dtype, device=device, + cache_layout=kv_cache_layout, ) - key_cache, value_cache = key_caches[0].contiguous( - ), value_caches[0].contiguous() + key_cache, value_cache = key_caches[0], value_caches[0] del key_caches del value_caches k_scale = (key.amax() / 64.0).to(torch.float32) v_scale = (value.amax() / 64.0).to(torch.float32) + def permute_and_compact(x): + y = x if kv_cache_layout == "NHD" else x.permute(0, 2, 1, 3) + return y.contiguous() + + key_cache_compact = permute_and_compact(key_cache) + value_cache_compact = permute_and_compact(value_cache) + # Clone the KV caches. if kv_cache_dtype == "fp8": - cloned_key_cache = torch.empty_like(key_cache, dtype=torch.float16) - ops.convert_fp8(cloned_key_cache, key_cache, k_scale.item(), - kv_cache_dtype) - cloned_value_cache = torch.empty_like(value_cache, dtype=torch.float16) - ops.convert_fp8(cloned_value_cache, value_cache, v_scale.item(), + cloned_key_cache = torch.empty_like(key_cache_compact, + dtype=torch.float16) + ops.convert_fp8(cloned_key_cache, key_cache_compact, k_scale.item(), kv_cache_dtype) + cloned_value_cache = torch.empty_like(value_cache_compact, + dtype=torch.float16) + ops.convert_fp8(cloned_value_cache, value_cache_compact, + v_scale.item(), kv_cache_dtype) else: - cloned_key_cache = key_cache.clone() - cloned_value_cache = value_cache.clone() - + cloned_key_cache = key_cache_compact.clone() + cloned_value_cache = value_cache_compact.clone() # Call the reshape_and_cache kernel. opcheck(torch.ops._C_cache_ops.reshape_and_cache_flash, (key, value, key_cache, value_cache, slot_mapping, kv_cache_dtype, @@ -289,16 +303,20 @@ def test_reshape_and_cache_flash( cond=(head_size == HEAD_SIZES[0])) ops.reshape_and_cache_flash(key, value, key_cache, value_cache, slot_mapping, kv_cache_dtype, k_scale, v_scale) + key_cache_compact = permute_and_compact(key_cache) + value_cache_compact = permute_and_compact(value_cache) if kv_cache_dtype == "fp8": - result_key_cache = torch.empty_like(key_cache, dtype=torch.float16) + result_key_cache = torch.empty_like(key_cache_compact, + dtype=torch.float16) ops.convert_fp8(result_key_cache, - key_cache, + key_cache_compact, k_scale.item(), kv_dtype=kv_cache_dtype) - result_value_cache = torch.empty_like(value_cache, dtype=torch.float16) + result_value_cache = torch.empty_like(value_cache_compact, + dtype=torch.float16) ops.convert_fp8(result_value_cache, - value_cache, + value_cache_compact, v_scale.item(), kv_dtype=kv_cache_dtype) @@ -310,8 +328,12 @@ def test_reshape_and_cache_flash( for i in range(num_tokens): block_idx = block_indicies_lst[i] block_offset = block_offsets_lst[i] - cloned_key_cache[block_idx, block_offset, :, :] = key[i] - cloned_value_cache[block_idx, block_offset, :, :] = value[i] + if kv_cache_layout == "NHD": + cloned_key_cache[block_idx, block_offset, :, :] = key[i] + cloned_value_cache[block_idx, block_offset, :, :] = value[i] + else: + cloned_key_cache[block_idx, :, block_offset, :] = key[i] + cloned_value_cache[block_idx, :, block_offset, :] = value[i] if kv_cache_dtype == "fp8": torch.testing.assert_close(result_key_cache, @@ -323,8 +345,8 @@ def test_reshape_and_cache_flash( atol=0.001, rtol=0.1) else: - torch.testing.assert_close(key_cache, cloned_key_cache) - torch.testing.assert_close(value_cache, cloned_value_cache) + torch.testing.assert_close(key_cache_compact, cloned_key_cache) + torch.testing.assert_close(value_cache_compact, cloned_value_cache) @pytest.mark.parametrize("direction", COPYING_DIRECTION) diff --git a/tests/kernels/test_cascade_flash_attn.py b/tests/kernels/attention/test_cascade_flash_attn.py similarity index 100% rename from tests/kernels/test_cascade_flash_attn.py rename to tests/kernels/attention/test_cascade_flash_attn.py diff --git a/tests/kernels/test_encoder_decoder_attn.py b/tests/kernels/attention/test_encoder_decoder_attn.py similarity index 100% rename from tests/kernels/test_encoder_decoder_attn.py rename to tests/kernels/attention/test_encoder_decoder_attn.py diff --git a/tests/kernels/test_flash_attn.py b/tests/kernels/attention/test_flash_attn.py similarity index 99% rename from tests/kernels/test_flash_attn.py rename to tests/kernels/attention/test_flash_attn.py index 572563c0bd8..88516b75cde 100644 --- a/tests/kernels/test_flash_attn.py +++ b/tests/kernels/attention/test_flash_attn.py @@ -145,7 +145,7 @@ def test_flash_attn_with_paged_kv( v_descale = None if q_dtype is not None: # QKV are drawn from N(0, 1): no need for a fp8 scaling factor - maybe_quantized_query = query.to(q_dtype) + maybe_quantized_query = q.to(q_dtype) maybe_quantized_key_cache = key_cache.to(q_dtype) maybe_quantized_value_cache = value_cache.to(q_dtype) diff --git a/tests/kernels/test_flashinfer.py b/tests/kernels/attention/test_flashinfer.py similarity index 100% rename from tests/kernels/test_flashinfer.py rename to tests/kernels/attention/test_flashinfer.py diff --git a/tests/kernels/test_flashmla.py b/tests/kernels/attention/test_flashmla.py similarity index 100% rename from tests/kernels/test_flashmla.py rename to tests/kernels/attention/test_flashmla.py diff --git a/tests/kernels/test_lightning_attn.py b/tests/kernels/attention/test_lightning_attn.py similarity index 100% rename from tests/kernels/test_lightning_attn.py rename to tests/kernels/attention/test_lightning_attn.py diff --git a/tests/kernels/test_merge_attn_states.py b/tests/kernels/attention/test_merge_attn_states.py similarity index 100% rename from tests/kernels/test_merge_attn_states.py rename to tests/kernels/attention/test_merge_attn_states.py diff --git a/tests/kernels/test_mha_attn.py b/tests/kernels/attention/test_mha_attn.py similarity index 100% rename from tests/kernels/test_mha_attn.py rename to tests/kernels/attention/test_mha_attn.py diff --git a/tests/kernels/test_mla_decode_cpu.py b/tests/kernels/attention/test_mla_decode_cpu.py similarity index 100% rename from tests/kernels/test_mla_decode_cpu.py rename to tests/kernels/attention/test_mla_decode_cpu.py diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/attention/test_prefix_prefill.py similarity index 100% rename from tests/kernels/test_prefix_prefill.py rename to tests/kernels/attention/test_prefix_prefill.py diff --git a/tests/kernels/attention/test_rocm_attention_selector.py b/tests/kernels/attention/test_rocm_attention_selector.py new file mode 100644 index 00000000000..4cf7bcb01d4 --- /dev/null +++ b/tests/kernels/attention/test_rocm_attention_selector.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: Apache-2.0 + +import pytest +import torch + +from vllm.attention.selector import _cached_get_attn_backend, get_attn_backend +from vllm.platforms.rocm import RocmPlatform +from vllm.utils import STR_BACKEND_ENV_VAR + + +@pytest.fixture(autouse=True) +def clear_cache(): + """Clear lru cache to ensure each test case runs without caching. + """ + _cached_get_attn_backend.cache_clear() + + +def test_selector(monkeypatch: pytest.MonkeyPatch): + with monkeypatch.context() as m: + m.setenv(STR_BACKEND_ENV_VAR, "ROCM_FLASH") + + # Set the current platform to ROCm using monkeypatch + monkeypatch.setattr("vllm.attention.selector.current_platform", + RocmPlatform()) + + # Test standard ROCm attention + backend = get_attn_backend(16, torch.float16, torch.float16, 16, False) + assert (backend.get_name() == "ROCM_FLASH" + or backend.get_name() == "TRITON_ATTN_VLLM_V1") + + # MLA test for deepseek related + + # change the attention backend to triton MLA + m.setenv(STR_BACKEND_ENV_VAR, "TRITON_MLA") + backend = get_attn_backend(576, torch.bfloat16, "auto", 16, False, + False, True) + assert backend.get_name() == "TRITON_MLA" + + # If attention backend is None + # If use_mla is true + # The selected backend is triton MLA + m.setenv(STR_BACKEND_ENV_VAR, None) + backend = get_attn_backend(576, torch.bfloat16, "auto", 16, False, + False, True) + assert backend.get_name() == "TRITON_MLA" + + # change the attention backend to AITER MLA + m.setenv(STR_BACKEND_ENV_VAR, "ROCM_AITER_MLA") + backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False, + False, True) + assert backend.get_name() == "ROCM_AITER_MLA" + + # If attention backend is None + # If use_mla is true + # If VLLM_ROCM_USE_AITER is enabled + # The selected backend is ROCM_AITER_MLA + m.setenv(STR_BACKEND_ENV_VAR, None) + m.setenv("VLLM_ROCM_USE_AITER", "1") + backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False, + False, True) + assert backend.get_name() == "ROCM_AITER_MLA" diff --git a/tests/kernels/test_triton_decode_attention.py b/tests/kernels/attention/test_triton_decode_attention.py similarity index 100% rename from tests/kernels/test_triton_decode_attention.py rename to tests/kernels/attention/test_triton_decode_attention.py diff --git a/tests/kernels/test_activation.py b/tests/kernels/core/test_activation.py similarity index 97% rename from tests/kernels/test_activation.py rename to tests/kernels/core/test_activation.py index cf0f21ce065..79f838a954e 100644 --- a/tests/kernels/test_activation.py +++ b/tests/kernels/core/test_activation.py @@ -5,6 +5,7 @@ import pytest import torch +from tests.kernels.allclose_default import get_default_atol, get_default_rtol from tests.kernels.utils import opcheck from vllm.model_executor.layers.activation import (FastGELU, FatreluAndMul, GeluAndMul, MulAndSilu, @@ -12,8 +13,6 @@ SiluAndMul) from vllm.platforms import current_platform -from .allclose_default import get_default_atol, get_default_rtol - DTYPES = [torch.half, torch.bfloat16, torch.float] NUM_TOKENS = [7, 83, 2048] # Arbitrary values for testing D = [512, 13824] # Arbitrary values for testing diff --git a/tests/kernels/test_fused_quant_layernorm.py b/tests/kernels/core/test_fused_quant_layernorm.py similarity index 100% rename from tests/kernels/test_fused_quant_layernorm.py rename to tests/kernels/core/test_fused_quant_layernorm.py diff --git a/tests/kernels/test_layernorm.py b/tests/kernels/core/test_layernorm.py similarity index 100% rename from tests/kernels/test_layernorm.py rename to tests/kernels/core/test_layernorm.py diff --git a/tests/kernels/core/test_opcheck.py b/tests/kernels/core/test_opcheck.py new file mode 100644 index 00000000000..c9a9679c5d8 --- /dev/null +++ b/tests/kernels/core/test_opcheck.py @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: Apache-2.0 +""" +Tests for miscellaneous utilities +""" + +import torch + +from tests.kernels.utils import opcheck + + +def test_convert_fp8_opcheck(): + data = torch.randn((256, 256), dtype=torch.float32, device="cuda") + result = torch.empty_like(data, dtype=torch.float8_e4m3fn) + opcheck(torch.ops._C_cache_ops.convert_fp8, (result, data, 1.0, "fp8")) + + +# TODO: Add this back, currently fails with +# csrc/cuda_utils_kernels.cu:15 'invalid argument' +# @pytest.mark.skipif(not current_platform.is_cuda(), +# reason="Only supported for CUDA") +# def test_cuda_utils_opcheck(): +# opcheck(torch.ops._C_cuda_utils.get_device_attribute, (0, 0)) +# opcheck( +# torch.ops._C_cuda_utils. +# get_max_shared_memory_per_block_device_attribute, (0, )) diff --git a/tests/kernels/test_permute_cols.py b/tests/kernels/core/test_permute_cols.py similarity index 100% rename from tests/kernels/test_permute_cols.py rename to tests/kernels/core/test_permute_cols.py diff --git a/tests/kernels/test_pos_encoding.py b/tests/kernels/core/test_pos_encoding.py similarity index 99% rename from tests/kernels/test_pos_encoding.py rename to tests/kernels/core/test_pos_encoding.py index eb83b4d612c..2b7bf755ec2 100644 --- a/tests/kernels/test_pos_encoding.py +++ b/tests/kernels/core/test_pos_encoding.py @@ -6,11 +6,10 @@ import pytest import torch +from tests.kernels.allclose_default import get_default_atol, get_default_rtol from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.platforms import current_platform -from .allclose_default import get_default_atol, get_default_rtol - IS_NEOX_STYLE = [True, False] DTYPES = [torch.half, torch.bfloat16, torch.float] HEAD_SIZES = [64, 80, 112, 120, 256] diff --git a/tests/kernels/test_rotary_embedding.py b/tests/kernels/core/test_rotary_embedding.py similarity index 100% rename from tests/kernels/test_rotary_embedding.py rename to tests/kernels/core/test_rotary_embedding.py diff --git a/tests/kernels/test_uva.py b/tests/kernels/core/test_uva.py similarity index 100% rename from tests/kernels/test_uva.py rename to tests/kernels/core/test_uva.py diff --git a/tests/kernels/test_causal_conv1d.py b/tests/kernels/mamba/test_causal_conv1d.py similarity index 100% rename from tests/kernels/test_causal_conv1d.py rename to tests/kernels/mamba/test_causal_conv1d.py diff --git a/tests/kernels/test_mamba_mixer2.py b/tests/kernels/mamba/test_mamba_mixer2.py similarity index 100% rename from tests/kernels/test_mamba_mixer2.py rename to tests/kernels/mamba/test_mamba_mixer2.py diff --git a/tests/kernels/test_mamba_ssm.py b/tests/kernels/mamba/test_mamba_ssm.py similarity index 100% rename from tests/kernels/test_mamba_ssm.py rename to tests/kernels/mamba/test_mamba_ssm.py diff --git a/tests/kernels/test_mamba_ssm_ssd.py b/tests/kernels/mamba/test_mamba_ssm_ssd.py similarity index 100% rename from tests/kernels/test_mamba_ssm_ssd.py rename to tests/kernels/mamba/test_mamba_ssm_ssd.py diff --git a/tests/kernels/moe/test_cutlass_moe.py b/tests/kernels/moe/test_cutlass_moe.py new file mode 100644 index 00000000000..975cd418a17 --- /dev/null +++ b/tests/kernels/moe/test_cutlass_moe.py @@ -0,0 +1,364 @@ +# SPDX-License-Identifier: Apache-2.0 +import dataclasses +from typing import Optional + +import pytest +import torch + +from vllm import _custom_ops as ops +from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config +from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp8 +from vllm.model_executor.layers.fused_moe.fused_moe import (fused_experts, + fused_topk) +from vllm.platforms import current_platform + +NUM_EXPERTS = [40, 64] +TOP_KS = [6, 8] + +MNK_FACTORS = [ + (2, 1024, 1024), + (2, 1024, 1536), + (2, 3072, 1024), + (2, 3072, 1536), + (64, 1024, 1024), + (64, 1024, 1536), + (64, 3072, 1024), + (64, 3072, 1536), + (224, 1024, 1024), + (224, 1024, 1536), + (224, 3072, 1024), + (224, 3072, 1536), +] + + +@dataclasses.dataclass +class MOETensors: + a: torch.Tensor + w1: torch.Tensor + w2: torch.Tensor + ab_strides1: torch.Tensor + c_strides1: torch.Tensor + ab_strides2: torch.Tensor + c_strides2: torch.Tensor + + @staticmethod + def make_moe_tensors(m: int, k: int, n: int, e: int, + dtype: torch.dtype) -> "MOETensors": + a = torch.randn((m, k), device="cuda", dtype=dtype) / 10 + w1 = torch.randn((e, 2 * n, k), device="cuda", dtype=dtype) / 10 + w2 = torch.randn((e, k, n), device="cuda", dtype=dtype) / 10 + ab_strides1 = torch.full((e, ), k, device="cuda", dtype=torch.int64) + c_strides1 = torch.full((e, ), 2 * n, device="cuda", dtype=torch.int64) + ab_strides2 = torch.full((e, ), n, device="cuda", dtype=torch.int64) + c_strides2 = torch.full((e, ), k, device="cuda", dtype=torch.int64) + return MOETensors(a=a, + w1=w1, + w2=w2, + ab_strides1=ab_strides1, + c_strides1=c_strides1, + ab_strides2=ab_strides2, + c_strides2=c_strides2) + + +@dataclasses.dataclass +class MOETensors8Bit(MOETensors): + # quantized + a_q: Optional[torch.Tensor] = None # a -> a_q + w1_q: Optional[torch.Tensor] = None # w1 -> w1_q + w2_q: Optional[torch.Tensor] = None # w2 -> w2_q + a_scale: Optional[torch.Tensor] = None + w1_scale: Optional[torch.Tensor] = None + w2_scale: Optional[torch.Tensor] = None + # dequantized + a_d: Optional[torch.Tensor] = None # a -> a_q -> a_d + w1_d: Optional[torch.Tensor] = None # w1 -> w1_q -> w1_d + w2_d: Optional[torch.Tensor] = None # w2 -> w2_q -> w2_d + + @staticmethod + def make_moe_tensors_8bit(m: int, k: int, n: int, e: int, + per_act_token: bool, + per_out_channel: bool) -> "MOETensors8Bit": + dtype = torch.half + q_dtype = torch.float8_e4m3fn + + moe_tensors_fp16 = MOETensors.make_moe_tensors(m, k, n, e, dtype) + + # a -> a_q, w1 -> w1_q, w2 -> w2_q + n_b_scales = 2 * n if per_out_channel else 1 + k_b_scales = k if per_out_channel else 1 + # Get the right scale for tests. + _, a_scale = ops.scaled_fp8_quant( + moe_tensors_fp16.a, use_per_token_if_dynamic=per_act_token) + a_q, _ = ops.scaled_fp8_quant(moe_tensors_fp16.a, + a_scale, + use_per_token_if_dynamic=per_act_token) + w1_q = torch.empty((e, 2 * n, k), device="cuda", dtype=q_dtype) + w2_q = torch.empty((e, k, n), device="cuda", dtype=q_dtype) + + w1_scale = torch.empty((e, n_b_scales, 1), + device="cuda", + dtype=torch.float32) + w2_scale = torch.empty((e, k_b_scales, 1), + device="cuda", + dtype=torch.float32) + for expert in range(e): + w1_q[expert], w1_scale[expert] = ops.scaled_fp8_quant( + moe_tensors_fp16.w1[expert], + use_per_token_if_dynamic=per_out_channel) + w2_q[expert], w2_scale[expert] = ops.scaled_fp8_quant( + moe_tensors_fp16.w2[expert], + use_per_token_if_dynamic=per_out_channel) + + # a_q -> a_d, w1_q -> w1_d, w2_q -> w2_d + a_d = a_q.float().mul(a_scale).to(dtype) + w1_d = torch.empty_like(moe_tensors_fp16.w1) + w2_d = torch.empty_like(moe_tensors_fp16.w2) + for expert in range(e): + w1_d[expert] = (w1_q[expert].float() * w1_scale[expert]).half() + w2_d[expert] = (w2_q[expert].float() * w2_scale[expert]).half() + + return MOETensors8Bit(a=moe_tensors_fp16.a, + w1=moe_tensors_fp16.w1, + w2=moe_tensors_fp16.w2, + ab_strides1=moe_tensors_fp16.ab_strides1, + c_strides1=moe_tensors_fp16.c_strides1, + ab_strides2=moe_tensors_fp16.ab_strides2, + c_strides2=moe_tensors_fp16.c_strides2, + a_q=a_q, + w1_q=w1_q, + w2_q=w2_q, + a_scale=a_scale, + w1_scale=w1_scale, + w2_scale=w2_scale, + a_d=a_d, + w1_d=w1_d, + w2_d=w2_d) + + +def run_with_expert_maps(num_experts: int, num_local_experts: int, + **cutlass_moe_kwargs): + + def slice_experts(): + slice_params = [ + "w1_q", "w2_q", "ab_strides1", "ab_strides2", "c_strides1", + "c_strides2", "w1_scale", "w2_scale" + ] + full_tensors = { + k: v + for k, v in cutlass_moe_kwargs.items() + if k in slice_params and k in cutlass_moe_kwargs + } + + for i in range(0, num_experts, num_local_experts): + s, e = i, i + num_local_experts + + # make expert map + expert_map = [-1] * num_experts + expert_map[s:e] = list(range(num_local_experts)) + expert_map = torch.tensor(expert_map, + dtype=torch.int32, + device="cuda") + + # update cutlass moe arg with expert_map + cutlass_moe_kwargs["expert_map"] = expert_map + # update cutlass moe arg tensors + for k, t in full_tensors.items(): + cutlass_moe_kwargs[k] = t[s:e] + + yield cutlass_moe_kwargs + + out_tensor = torch.zeros_like(cutlass_moe_kwargs["a"]) + for kwargs in slice_experts(): + out_tensor = out_tensor + cutlass_moe_fp8(**kwargs) + + return out_tensor + + +def run_8_bit(moe_tensors: MOETensors8Bit, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + num_local_experts: Optional[int] = None) -> torch.Tensor: + assert not any([ + t is None for t in [ + moe_tensors.w1_q, moe_tensors.w2_q, moe_tensors.w1_scale, + moe_tensors.w2_scale, moe_tensors.a_scale + ] + ]) + + kwargs = { + 'a': moe_tensors.a, + 'w1_q': moe_tensors.w1_q.transpose(1, 2), # type: ignore[union-attr] + 'w2_q': moe_tensors.w2_q.transpose(1, 2), # type: ignore[union-attr] + 'topk_weights': topk_weights, + 'topk_ids_': topk_ids, + 'ab_strides1': moe_tensors.ab_strides1, + 'c_strides1': moe_tensors.c_strides1, + 'ab_strides2': moe_tensors.ab_strides2, + 'c_strides2': moe_tensors.c_strides2, + 'w1_scale': moe_tensors.w1_scale, + 'w2_scale': moe_tensors.w2_scale, + 'a1_scale': moe_tensors.a_scale + } + + num_experts = moe_tensors.w1.size(0) + with_ep = num_local_experts is not None or num_local_experts == num_experts + if not with_ep: + return cutlass_moe_fp8(**kwargs) + + assert num_local_experts is not None + return run_with_expert_maps( + num_experts, + num_local_experts, # type: ignore[arg-type] + **kwargs) + + +@pytest.mark.parametrize("m,n,k", MNK_FACTORS) +@pytest.mark.parametrize("e", NUM_EXPERTS) +@pytest.mark.parametrize("topk", TOP_KS) +@pytest.mark.parametrize("per_act_token", [True, False]) +@pytest.mark.parametrize("per_out_ch", [True, False]) +@pytest.mark.skipif( + (lambda x: x is None or not ops.cutlass_group_gemm_supported(x.to_int()))( + current_platform.get_device_capability()), + reason="Grouped gemm is not supported on this GPU type.") +def test_cutlass_moe_8_bit_no_graph( + m: int, + n: int, + k: int, + e: int, + topk: int, + per_act_token: bool, + per_out_ch: bool, +): + current_platform.seed_everything(7) + with set_current_vllm_config( + VllmConfig(parallel_config=ParallelConfig( + pipeline_parallel_size=1))): + + mt = MOETensors8Bit.make_moe_tensors_8bit(m, k, n, e, per_act_token, + per_out_ch) + + score = torch.randn((m, e), device="cuda", dtype=torch.half) + topk_weights, topk_ids = fused_topk(mt.a, + score, + topk, + renormalize=False) + + # Note that we are using the dequantized versions of the tensors. + # Using a, w1 and w2 directly results in minor output differences. + triton_output = fused_experts(mt.a_d, mt.w1_d, mt.w2_d, topk_weights, + topk_ids) + + cutlass_output = run_8_bit(mt, topk_weights, topk_ids) + + torch.testing.assert_close(triton_output, + cutlass_output, + atol=5e-2, + rtol=1e-2) + + +@pytest.mark.parametrize("m,n,k", MNK_FACTORS) +@pytest.mark.parametrize("e", NUM_EXPERTS) +@pytest.mark.parametrize("topk", TOP_KS) +@pytest.mark.parametrize("per_act_token", [True, False]) +@pytest.mark.parametrize("per_out_ch", [True, False]) +@pytest.mark.skipif( + (lambda x: x is None or not ops.cutlass_group_gemm_supported(x.to_int()))( + current_platform.get_device_capability()), + reason="Grouped gemm is not supported on this GPU type.") +def test_cutlass_moe_8_bit_cuda_graph( + m: int, + n: int, + k: int, + e: int, + topk: int, + per_act_token: bool, + per_out_ch: bool, +): + current_platform.seed_everything(7) + with set_current_vllm_config( + VllmConfig(parallel_config=ParallelConfig( + pipeline_parallel_size=1))): + + dtype = torch.half + + mt = MOETensors8Bit.make_moe_tensors_8bit(m, k, n, e, per_act_token, + per_out_ch) + + score = torch.randn((m, e), device="cuda", dtype=dtype) + topk_weights, topk_ids = fused_topk(mt.a, + score, + topk, + renormalize=False) + + # Note that we are using the dequantized versions of the tensors. + # Using a, w1 and w2 directly results in minor output differences. + triton_output = fused_experts(mt.a_d, mt.w1_d, mt.w2_d, topk_weights, + topk_ids) + + stream = torch.cuda.Stream() + graph = torch.cuda.CUDAGraph() + with torch.cuda.graph(graph, stream=stream): + cutlass_output = run_8_bit(mt, topk_weights, topk_ids) + + torch.cuda.synchronize() + graph.replay() + torch.cuda.synchronize() + + torch.testing.assert_close(triton_output, + cutlass_output, + atol=9e-2, + rtol=1e-2) + + +@pytest.mark.parametrize("m", [64]) +@pytest.mark.parametrize("n", [1024]) +@pytest.mark.parametrize("k", [4096]) +@pytest.mark.parametrize("e", [16]) +@pytest.mark.parametrize("topk", [1, 8]) +@pytest.mark.parametrize("per_act_token", [True]) +@pytest.mark.parametrize("per_out_channel", [True]) +@pytest.mark.parametrize("ep_size", [1, 2, 4, 8, 16]) +@pytest.mark.skipif( + (lambda x: x is None or not ops.cutlass_group_gemm_supported(x.to_int()))( + current_platform.get_device_capability()), + reason="Grouped gemm is not supported on this GPU type.") +def test_cutlass_moe_8_bit_EP( + m: int, + n: int, + k: int, + e: int, + topk: int, + per_act_token: bool, + per_out_channel: bool, + ep_size: int, +): + current_platform.seed_everything(7) + with set_current_vllm_config( + VllmConfig(parallel_config=ParallelConfig( + pipeline_parallel_size=1))): + + mt = MOETensors8Bit.make_moe_tensors_8bit(m, k, n, e, per_act_token, + per_out_channel) + + score = torch.randn((m, e), device="cuda", dtype=torch.half) + topk_weights, topk_ids = fused_topk(mt.a, + score, + topk, + renormalize=False) + + # Note that we are using the dequantized versions of the tensors. + # Using a, w1 and w2 directly results in minor output differences. + triton_output = fused_experts(mt.a_d, mt.w1_d, mt.w2_d, topk_weights, + topk_ids) + + assert e % ep_size == 0, "Cannot distribute experts evenly" + cutlass_output = run_8_bit(mt, + topk_weights, + topk_ids, + num_local_experts=e // ep_size) + + torch.testing.assert_close(triton_output, + cutlass_output, + atol=5e-2, + rtol=1e-2) diff --git a/tests/kernels/test_moe.py b/tests/kernels/moe/test_moe.py similarity index 99% rename from tests/kernels/test_moe.py rename to tests/kernels/moe/test_moe.py index 425f36984a3..f2cca65ae42 100644 --- a/tests/kernels/test_moe.py +++ b/tests/kernels/moe/test_moe.py @@ -420,7 +420,8 @@ def test_fused_marlin_moe( score = torch.randn((m, e), device="cuda", dtype=dtype) - topk_weights, topk_ids = fused_topk(a, score, topk, False) + topk_weights, topk_ids, token_expert_indices = fused_topk( + a, score, topk, False) torch_output = torch_moe(a, w_ref1, w_ref2, score, topk, e_map) diff --git a/tests/kernels/moe/test_moe_permute_unpermute.py b/tests/kernels/moe/test_moe_permute_unpermute.py new file mode 100644 index 00000000000..dfcd61f7758 --- /dev/null +++ b/tests/kernels/moe/test_moe_permute_unpermute.py @@ -0,0 +1,223 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Tests for the MOE permute/unpermute kernel + +Run `pytest tests/kernels/test_moe_permute_unpermute.py`. +""" + +from typing import Optional + +import numpy as np +import pytest +import torch + +from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk +from vllm.model_executor.layers.fused_moe.layer import determine_expert_map +from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import ( + moe_permute, moe_unpermute) +from vllm.platforms import current_platform + +NUM_EXPERTS = [16, 64] +TOP_KS = [2, 4, 6, 8] +EP_SIZE = [1, 4, 16] +current_platform.seed_everything(0) + + +def torch_permute(hidden_states: torch.Tensor, + topk_ids: torch.Tensor, + token_expert_indices: torch.Tensor, + topk: int, + n_expert: int, + n_local_expert: int, + start_expert: int, + expert_map: Optional[torch.Tensor] = None, + align_block_size: Optional[int] = None, + fill_invalid_expert: int = -1) -> list[torch.Tensor]: + n_token, n_hidden = hidden_states.shape[0], hidden_states.shape[1] + if expert_map is not None: + is_local_expert = (expert_map[topk_ids] != -1) + not_local_expert = (expert_map[topk_ids] == -1) + topk_ids = is_local_expert * ( + topk_ids - start_expert) + not_local_expert * (topk_ids + n_expert) + + sorted_topk_ids, sorted_indices = torch.sort(topk_ids.flatten(), + stable=True) + dst_row_id2src_row_id_map = token_expert_indices.flatten()[sorted_indices] + + expert_first_token_offset = torch.zeros(n_local_expert + 1, + dtype=torch.int64, + device="cuda") + idx = 0 + for i in range(0, n_local_expert): + cnt = 0 + while idx < sorted_topk_ids.numel() and sorted_topk_ids[idx] == i: + cnt += 1 + idx += 1 + expert_first_token_offset[i + 1] = expert_first_token_offset[i] + cnt + + _, src2dst_idx = torch.sort(dst_row_id2src_row_id_map) + valid_row_idx = [] + if align_block_size is None: + + permuted_hidden_states = hidden_states[dst_row_id2src_row_id_map % + n_token, ...] + permuted_row_size = permuted_hidden_states.shape[0] + m_indices = torch.empty(permuted_row_size, + device="cuda", + dtype=torch.int32).fill_(fill_invalid_expert) + for i in range(1, n_local_expert + 1): + first_token_offset = expert_first_token_offset[i - 1] + last_token_offset = expert_first_token_offset[i] + m_indices[first_token_offset:last_token_offset] = i - 1 + src_row_id2dst_row_id_map = torch.arange( + 0, n_token * topk, device="cuda", + dtype=torch.int32)[src2dst_idx].reshape((n_token, topk)) + valid_row_idx += [i for i in range(expert_first_token_offset[-1])] + return [ + permuted_hidden_states, expert_first_token_offset, + src_row_id2dst_row_id_map, m_indices, valid_row_idx + ] + else: + permuted_row_size = (topk * n_token + n_expert * + (align_block_size - 1) + align_block_size - + 1) // align_block_size * align_block_size + permuted_hidden_states = torch.empty((permuted_row_size, n_hidden), + device="cuda", + dtype=hidden_states.dtype) + align_src_row_id2dst_row_id = torch.empty(n_token * topk, + device="cuda", + dtype=torch.int32) + align_expert_first_token_offset = torch.zeros_like( + expert_first_token_offset) + m_indices = torch.empty(permuted_row_size, + device="cuda", + dtype=torch.int32).fill_(fill_invalid_expert) + # get align_permuted_hidden_states, + # valid row_idx and align_expert_first_token_offset + for i in range(1, n_local_expert + 1): + first_token_offset = expert_first_token_offset[i - 1] + last_token_offset = expert_first_token_offset[i] + n_token_in_expert = last_token_offset - first_token_offset + align_expert_first_token_offset[ + i] = align_expert_first_token_offset[ + i - 1] + (n_token_in_expert + align_block_size - + 1) // align_block_size * align_block_size + align_first_token_offset = align_expert_first_token_offset[i - 1] + align_last_token_offset = align_expert_first_token_offset[i] + dst_row_id2src_row_id_in_expert = dst_row_id2src_row_id_map[ + first_token_offset:first_token_offset + + n_token_in_expert] % n_token + # store token in current expert with align_first_token_offset + permuted_hidden_states[align_first_token_offset:\ + align_first_token_offset+n_token_in_expert,\ + ...] = hidden_states[\ + dst_row_id2src_row_id_in_expert, ...] + # set current expert m_indices + m_indices[align_first_token_offset:align_last_token_offset] = i - 1 + valid_row_idx += [ + i for i in range(align_first_token_offset, + align_first_token_offset + n_token_in_expert) + ] + # get align_src_row_id2dst_row_id + for i in range(n_token * topk): + eid = sorted_topk_ids[i] + if (eid >= n_local_expert): + # check token not in local expert + align_src_row_id2dst_row_id[ + i] = align_expert_first_token_offset[-1] + continue + first_token_offset = expert_first_token_offset[eid] + align_first_token_offset = align_expert_first_token_offset[eid] + token_offset = i - first_token_offset + align_src_row_id2dst_row_id[ + i] = align_first_token_offset + token_offset + align_src_row_id2dst_row_id = align_src_row_id2dst_row_id[\ + src2dst_idx].reshape((n_token, topk)) + return [ + permuted_hidden_states, align_expert_first_token_offset, + align_src_row_id2dst_row_id, m_indices, valid_row_idx + ] + + +def torch_unpermute(permuted_hidden_states: torch.Tensor, + topk_weights: torch.Tensor, topk_ids: torch.Tensor, + token_expert_indices: torch.Tensor, + src_row_id2dst_row_id_map: torch.Tensor, + valid_row_idx: torch.Tensor, topk: int, + n_expert: int) -> torch.Tensor: + # ignore invalid row + mask = torch.zeros(permuted_hidden_states.shape[0], + dtype=bool, + device="cuda") + mask[valid_row_idx] = True + permuted_hidden_states[~mask] = 0 + idx = src_row_id2dst_row_id_map.flatten()[ + token_expert_indices.flatten()].reshape(token_expert_indices.shape) + output = permuted_hidden_states[idx, ...] * topk_weights[..., None] + output = output.sum(dim=1).to(permuted_hidden_states.dtype) + return output + + +@pytest.mark.parametrize("n_token", [1, 33, 64, 222, 1024, 2048, 3000, 5000]) +@pytest.mark.parametrize("n_hidden", [2048, 4096, 7168]) +@pytest.mark.parametrize("n_expert", NUM_EXPERTS) +@pytest.mark.parametrize("topk", TOP_KS) +@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16]) +@pytest.mark.parametrize("ep_size", EP_SIZE) +@pytest.mark.parametrize("align_block_size", [None, 128]) +def test_moe_permute_unpermute(n_token: int, n_hidden: int, topk: int, + n_expert: int, ep_size: int, dtype: torch.dtype, + align_block_size: Optional[int]): + fill_invalid_expert = 0 + ep_rank = np.random.randint(0, ep_size) + expert_map = None + n_local_expert = n_expert + if (ep_size != 1): + n_local_expert, expert_map = determine_expert_map( + ep_size, ep_rank, n_expert) + expert_map = expert_map.cuda() + start_expert = n_local_expert * ep_rank + current_platform.seed_everything(0) + hidden_states = torch.randn((n_token, n_hidden), device="cuda").to(dtype) + gating_output = torch.randn((n_token, n_expert), device="cuda").to(dtype) + topk_weights, topk_ids, token_expert_indices = fused_topk( + hidden_states, gating_output, topk, False) + gold0, gold1, gold2, gold3, valid_row_idx = torch_permute( + hidden_states, + topk_ids, + token_expert_indices, + topk, + n_expert, + n_local_expert, + start_expert, + expert_map=expert_map, + align_block_size=align_block_size, + fill_invalid_expert=fill_invalid_expert) + + result0, result1, result2, result3 = moe_permute( + hidden_states, topk_weights, topk_ids, token_expert_indices, topk, + n_expert, n_local_expert, expert_map, align_block_size, + fill_invalid_expert) + + # check expert_first_token_offset + torch.testing.assert_close(gold1, result1, atol=0, rtol=0) + # check src_row_id2dst_row_id_map + torch.testing.assert_close(gold2, result2, atol=0, rtol=0) + # check mindice + torch.testing.assert_close(gold3, result3, atol=0, rtol=0) + # check permuted_hidden_states, only valid token + torch.testing.assert_close(gold0[valid_row_idx], + result0[valid_row_idx], + atol=0, + rtol=0) + + # add a random tensor to simulate group gemm + result0 = 0.5 * result0 + torch.randn_like(result0) + + result4 = moe_unpermute(result0, topk_weights, topk_ids, result2, result1, + topk, n_expert, n_local_expert) + gold4 = torch_unpermute(result0, topk_weights, topk_ids, + token_expert_indices, result2, valid_row_idx, topk, + n_local_expert) + + # check unpermuted hidden + torch.testing.assert_close(result4, gold4, atol=2e-2, rtol=0) diff --git a/tests/kernels/test_triton_moe_ptpc_fp8.py b/tests/kernels/moe/test_triton_moe_ptpc_fp8.py similarity index 100% rename from tests/kernels/test_triton_moe_ptpc_fp8.py rename to tests/kernels/moe/test_triton_moe_ptpc_fp8.py diff --git a/tests/kernels/quant_utils.py b/tests/kernels/quant_utils.py index 498da6001ae..764924f2678 100644 --- a/tests/kernels/quant_utils.py +++ b/tests/kernels/quant_utils.py @@ -87,3 +87,63 @@ def ref_dynamic_per_tensor_fp8_quant(x: torch.tensor) \ ref_out = (as_float32_tensor(x) * ref_iscale).clamp( fp8_traits_min, fp8_traits_max).to(FP8_DTYPE) return ref_out, ref_scale.view((1, )) + + +def native_w8a8_block_matmul(A: torch.Tensor, B: torch.Tensor, + As: torch.Tensor, Bs: torch.Tensor, block_size, + output_dtype): + """This function performs matrix multiplication with block-wise + quantization using native torch. + It is agnostic to the input data type and can be used for both int8 and + fp8 data types. + + It takes two input tensors `A` and `B` (int8) with scales `As` and + `Bs` (float32). + The output is returned in the specified `output_dtype`. + """ + A = A.to(torch.float32) + B = B.to(torch.float32) + assert A.shape[-1] == B.shape[-1] + assert B.ndim == 2 and B.is_contiguous() and Bs.ndim == 2 + assert len(block_size) == 2 + block_n, block_k = block_size[0], block_size[1] + assert (A.shape[-1] + block_k - 1) // block_k == As.shape[-1] + assert A.shape[:-1] == As.shape[:-1] + + M = A.numel() // A.shape[-1] + N, K = B.shape + origin_C_shape = A.shape[:-1] + (N, ) + A = A.reshape(M, A.shape[-1]) + As = As.reshape(M, As.shape[-1]) + n_tiles = (N + block_n - 1) // block_n + k_tiles = (K + block_k - 1) // block_k + assert n_tiles == Bs.shape[0] + assert k_tiles == Bs.shape[1] + + C_shape = (M, N) + C = torch.zeros(C_shape, dtype=torch.float32, device=A.device) + + A_tiles = [ + A[:, i * block_k:min((i + 1) * block_k, K)] for i in range(k_tiles) + ] + B_tiles = [[ + B[ + j * block_n:min((j + 1) * block_n, N), + i * block_k:min((i + 1) * block_k, K), + ] for i in range(k_tiles) + ] for j in range(n_tiles)] + C_tiles = [ + C[:, j * block_n:min((j + 1) * block_n, N)] for j in range(n_tiles) + ] + As_tiles = [As[:, i:i + 1] for i in range(k_tiles)] + + for i in range(k_tiles): + for j in range(n_tiles): + a = A_tiles[i] + b = B_tiles[j][i] + c = C_tiles[j] + s = As_tiles[i] * Bs[j][i] + c[:, :] += torch.matmul(a, b.t()) * s + + C = C.reshape(origin_C_shape).to(output_dtype) + return C diff --git a/tests/kernels/test_allspark_gemm.py b/tests/kernels/quantization/test_allspark_gemm.py similarity index 100% rename from tests/kernels/test_allspark_gemm.py rename to tests/kernels/quantization/test_allspark_gemm.py diff --git a/tests/kernels/test_aqlm.py b/tests/kernels/quantization/test_aqlm.py similarity index 100% rename from tests/kernels/test_aqlm.py rename to tests/kernels/quantization/test_aqlm.py diff --git a/tests/kernels/test_awq.py b/tests/kernels/quantization/test_awq.py similarity index 100% rename from tests/kernels/test_awq.py rename to tests/kernels/quantization/test_awq.py diff --git a/tests/kernels/test_awq_marlin.py b/tests/kernels/quantization/test_awq_marlin.py similarity index 98% rename from tests/kernels/test_awq_marlin.py rename to tests/kernels/quantization/test_awq_marlin.py index 939b0e7157b..c30fe60becd 100644 --- a/tests/kernels/test_awq_marlin.py +++ b/tests/kernels/quantization/test_awq_marlin.py @@ -84,7 +84,8 @@ def test_fused_marlin_moe_awq( score = torch.randn((m, e), device="cuda", dtype=dtype) - topk_weights, topk_ids = fused_topk(a, score, topk, False) + topk_weights, topk_ids, token_expert_indices = fused_topk( + a, score, topk, False) marlin_output = torch.ops.vllm.fused_marlin_moe( a, qweight1, diff --git a/tests/kernels/test_awq_triton.py b/tests/kernels/quantization/test_awq_triton.py similarity index 100% rename from tests/kernels/test_awq_triton.py rename to tests/kernels/quantization/test_awq_triton.py diff --git a/tests/kernels/test_block_fp8.py b/tests/kernels/quantization/test_block_fp8.py similarity index 98% rename from tests/kernels/test_block_fp8.py rename to tests/kernels/quantization/test_block_fp8.py index c450048bf66..38c7e461bb9 100644 --- a/tests/kernels/test_block_fp8.py +++ b/tests/kernels/quantization/test_block_fp8.py @@ -6,6 +6,7 @@ import pytest import torch +from tests.kernels.quant_utils import native_w8a8_block_matmul from vllm.config import VllmConfig, set_current_vllm_config from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.fused_moe import fused_moe @@ -18,8 +19,6 @@ per_token_group_quant_fp8, w8a8_block_fp8_matmul) from vllm.platforms import current_platform -from .utils_block import native_w8a8_block_matmul - dg_available = False try: import deep_gemm @@ -339,7 +338,8 @@ def deep_gemm_w8a8_block_fp8_moe(M, K, a, w1, w2, w1_s, w2_s, score, topk, M, K = a.shape N = w2.shape[-1] - topk_weight, topk_ids = fused_topk(a, score.float(), topk, False) + topk_weight, topk_ids, token_expert_indices = fused_topk( + a, score.float(), topk, False) block_m = deep_gemm.get_m_alignment_for_contiguous_layout() @@ -436,7 +436,8 @@ def test_w8a8_block_fp8_deep_gemm_fused_moe(M, N, K, E, topk, seed): ref_out = torch_w8a8_block_fp8_moe(a, w1, w2, w1_s, w2_s, score, topk, block_size) - topk_weights, topk_ids = fused_topk(a, score.float(), topk, False) + topk_weights, topk_ids, token_expert_indices = fused_topk( + a, score.float(), topk, False) out = deep_gemm_moe_fp8(a, w1, w2, w1_s, w2_s, topk_weights, topk_ids) diff --git a/tests/kernels/test_block_int8.py b/tests/kernels/quantization/test_block_int8.py similarity index 99% rename from tests/kernels/test_block_int8.py rename to tests/kernels/quantization/test_block_int8.py index 9447f9d6916..104f23fd7cd 100644 --- a/tests/kernels/test_block_int8.py +++ b/tests/kernels/quantization/test_block_int8.py @@ -6,6 +6,7 @@ import pytest import torch +from tests.kernels.quant_utils import native_w8a8_block_matmul from vllm.config import VllmConfig, set_current_vllm_config from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.fused_moe import fused_moe @@ -13,8 +14,6 @@ w8a8_block_int8_matmul) from vllm.platforms import current_platform -from .utils_block import native_w8a8_block_matmul - if current_platform.get_device_capability() < (7, 0): pytest.skip("INT8 Triton requires CUDA 7.0 or higher", allow_module_level=True) diff --git a/tests/kernels/test_cutlass_2of4_sparse.py b/tests/kernels/quantization/test_cutlass_2of4_sparse.py similarity index 99% rename from tests/kernels/test_cutlass_2of4_sparse.py rename to tests/kernels/quantization/test_cutlass_2of4_sparse.py index 2890e15d6cb..d67d2dbb899 100644 --- a/tests/kernels/test_cutlass_2of4_sparse.py +++ b/tests/kernels/quantization/test_cutlass_2of4_sparse.py @@ -7,13 +7,12 @@ import pytest import torch +from tests.kernels.utils import baseline_scaled_mm, to_fp8, to_int8 from vllm import _custom_ops as ops from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( sparse_cutlass_supported) from vllm.platforms import current_platform -from .utils import baseline_scaled_mm, to_fp8, to_int8 - CUDA_DEVICES = [ f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2) ] diff --git a/tests/kernels/test_cutlass.py b/tests/kernels/quantization/test_cutlass_scaled_mm.py similarity index 99% rename from tests/kernels/test_cutlass.py rename to tests/kernels/quantization/test_cutlass_scaled_mm.py index f11ce6f45a9..8084d9bf2c2 100644 --- a/tests/kernels/test_cutlass.py +++ b/tests/kernels/quantization/test_cutlass_scaled_mm.py @@ -8,13 +8,11 @@ import pytest import torch -from tests.kernels.utils import opcheck +from tests.kernels.utils import baseline_scaled_mm, opcheck, to_fp8, to_int8 from vllm import _custom_ops as ops from vllm.platforms import current_platform from vllm.utils import cdiv -from .utils import baseline_scaled_mm, to_fp8, to_int8 - MNK_FACTORS = [ (1, 256, 128), (1, 16384, 1024), diff --git a/tests/kernels/test_fp8_quant.py b/tests/kernels/quantization/test_fp8_quant.py similarity index 100% rename from tests/kernels/test_fp8_quant.py rename to tests/kernels/quantization/test_fp8_quant.py diff --git a/tests/kernels/test_ggml.py b/tests/kernels/quantization/test_ggml.py similarity index 100% rename from tests/kernels/test_ggml.py rename to tests/kernels/quantization/test_ggml.py diff --git a/tests/kernels/test_gguf.py b/tests/kernels/quantization/test_gguf.py similarity index 100% rename from tests/kernels/test_gguf.py rename to tests/kernels/quantization/test_gguf.py diff --git a/tests/kernels/test_gptq.py b/tests/kernels/quantization/test_gptq.py similarity index 100% rename from tests/kernels/test_gptq.py rename to tests/kernels/quantization/test_gptq.py diff --git a/tests/kernels/test_int8_kernel.py b/tests/kernels/quantization/test_int8_kernel.py similarity index 100% rename from tests/kernels/test_int8_kernel.py rename to tests/kernels/quantization/test_int8_kernel.py diff --git a/tests/kernels/test_int8_quant.py b/tests/kernels/quantization/test_int8_quant.py similarity index 100% rename from tests/kernels/test_int8_quant.py rename to tests/kernels/quantization/test_int8_quant.py diff --git a/tests/kernels/test_machete_mm.py b/tests/kernels/quantization/test_machete_mm.py similarity index 100% rename from tests/kernels/test_machete_mm.py rename to tests/kernels/quantization/test_machete_mm.py diff --git a/tests/kernels/test_marlin_gemm.py b/tests/kernels/quantization/test_marlin_gemm.py similarity index 100% rename from tests/kernels/test_marlin_gemm.py rename to tests/kernels/quantization/test_marlin_gemm.py diff --git a/tests/kernels/test_nvfp4_quant.py b/tests/kernels/quantization/test_nvfp4_quant.py similarity index 100% rename from tests/kernels/test_nvfp4_quant.py rename to tests/kernels/quantization/test_nvfp4_quant.py diff --git a/tests/kernels/test_nvfp4_scaled_mm.py b/tests/kernels/quantization/test_nvfp4_scaled_mm.py similarity index 100% rename from tests/kernels/test_nvfp4_scaled_mm.py rename to tests/kernels/quantization/test_nvfp4_scaled_mm.py diff --git a/tests/kernels/quantization/test_rocm_skinny_gemms.py b/tests/kernels/quantization/test_rocm_skinny_gemms.py new file mode 100644 index 00000000000..622079c3944 --- /dev/null +++ b/tests/kernels/quantization/test_rocm_skinny_gemms.py @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: Apache-2.0 +import pytest +import torch + +import vllm._custom_ops as ops +from tests.kernels.quant_utils import ref_dynamic_per_tensor_fp8_quant +from vllm.platforms import current_platform + +DTYPES = [torch.bfloat16, torch.float16] +M = [16, 32, 64, 128, 256, 512, 1024, 4096, 8192] +K = [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192] # k % 8 == 0 +N = [1, 2, 3, 4] +SEEDS = [0] + + +@pytest.mark.parametrize("n", [1]) # only test for batch size 1 +@pytest.mark.parametrize("k", K) +@pytest.mark.parametrize("m", M) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("rows_per_block", [2, 4, 8, 16]) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.skipif(not current_platform.is_rocm(), + reason="only test for rocm") +@torch.inference_mode() +def test_rocm_llmm1_kernel(n, k, m, dtype, rows_per_block, seed): + torch.manual_seed(seed) + A = torch.rand(n, k, dtype=dtype, device="cuda") + B = torch.rand(m, k, dtype=dtype, device="cuda") + + ref_out = torch.matmul(A, B.t()) + out = ops.LLMM1(B, A, rows_per_block) + + assert torch.allclose(out, ref_out, rtol=0.01) + + +@pytest.mark.parametrize("n", N) # only test for batch size <= 4 +@pytest.mark.parametrize("k", K + [9216, 10240, 16384]) +@pytest.mark.parametrize("m", [8] + M) # m >= 8 +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.skipif(not current_platform.is_rocm(), + reason="only test for rocm") +def test_rocm_wvsplitk_kernel(n, k, m, dtype, seed): + torch.manual_seed(seed) + cu_count = current_platform.get_cu_count() + + A = torch.rand(n, k, dtype=dtype, device="cuda") + B = torch.rand(m, k, dtype=dtype, device="cuda") + + ref_out = torch.matmul(A, B.t()) + out = ops.wvSplitK(B, A, cu_count) + + assert torch.allclose(out, ref_out, rtol=0.01) + + +@pytest.mark.parametrize("n", N) # only test for batch size <= 4 +@pytest.mark.parametrize("k", K[1:] + [14336, 24576, 32768]) # k % 16 == 0 +@pytest.mark.parametrize("m", M + [28672]) # m >= 16 +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.skipif(not current_platform.is_rocm(), + reason="only test for rocm") +def test_rocm_wvsplitk_fp8_kernel(n, k, m, dtype, seed): + torch.manual_seed(seed) + + A = torch.rand(n, k, device="cuda") + B = torch.rand(m, k, device="cuda") + + A, scale_a = ref_dynamic_per_tensor_fp8_quant(A) + B, scale_b = ref_dynamic_per_tensor_fp8_quant(B) + + ref_out = torch._scaled_mm(A, + B.t(), + out_dtype=dtype, + scale_a=scale_a, + scale_b=scale_b) + out = ops.wvSplitKQ(B, A, dtype, scale_a, scale_b, + current_platform.get_cu_count()) + + assert torch.allclose(out, ref_out, rtol=0.01) diff --git a/tests/kernels/test_triton_scaled_mm.py b/tests/kernels/quantization/test_triton_scaled_mm.py similarity index 100% rename from tests/kernels/test_triton_scaled_mm.py rename to tests/kernels/quantization/test_triton_scaled_mm.py diff --git a/tests/kernels/test_attention_selector.py b/tests/kernels/test_attention_selector.py deleted file mode 100644 index a51e70d45ee..00000000000 --- a/tests/kernels/test_attention_selector.py +++ /dev/null @@ -1,136 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -from unittest.mock import patch - -import pytest -import torch - -from vllm.attention.selector import _cached_get_attn_backend, get_attn_backend -from vllm.platforms.cpu import CpuPlatform -from vllm.platforms.cuda import CudaPlatform -from vllm.platforms.rocm import RocmPlatform -from vllm.utils import STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL, STR_INVALID_VAL - - -@pytest.fixture(autouse=True) -def clear_cache(): - """Clear lru cache to ensure each test case runs without caching. - """ - _cached_get_attn_backend.cache_clear() - - -@pytest.mark.parametrize( - "name", ["TORCH_SDPA", "ROCM_FLASH", "XFORMERS", "FLASHINFER"]) -@pytest.mark.parametrize("use_v1", [True, False]) -@pytest.mark.parametrize("device", ["cpu", "hip", "cuda"]) -def test_env( - name: str, - use_v1: bool, - device: str, - monkeypatch: pytest.MonkeyPatch, -): - """Test that the attention selector can be set via environment variable. - Note that we do not test FlashAttn because it is the default backend. - """ - - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "1" if use_v1 else "0") - m.setenv(STR_BACKEND_ENV_VAR, name) - - if device == "cpu": - with patch("vllm.attention.selector.current_platform", - CpuPlatform()): - backend = get_attn_backend(16, torch.float16, torch.float16, - 16, False) - assert backend.get_name() == "TORCH_SDPA" - elif device == "hip": - with patch("vllm.attention.selector.current_platform", - RocmPlatform()): - backend = get_attn_backend(16, torch.float16, torch.float16, - 16, False) - EXPECTED = "TRITON_ATTN_VLLM_V1" if use_v1 else "ROCM_FLASH" - assert backend.get_name() == EXPECTED - else: - if name in ["XFORMERS", "FLASHINFER"]: - with patch("vllm.attention.selector.current_platform", - CudaPlatform()): - backend = get_attn_backend(16, torch.float16, - torch.float16, 16, False) - EXPECTED = "FLASH_ATTN_VLLM_V1" if use_v1 else name - assert backend.get_name() == EXPECTED - - -def test_flash_attn(monkeypatch: pytest.MonkeyPatch): - """Test FlashAttn validation.""" - # TODO: When testing for v1, pipe in `use_v1` as an argument to - # get_attn_backend - - with monkeypatch.context() as m: - m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL) - - # Unsupported CUDA arch - monkeypatch.setattr(torch.cuda, "get_device_capability", lambda: - (7, 5)) - backend = get_attn_backend(16, torch.float16, None, 16, False) - assert backend.get_name() != STR_FLASH_ATTN_VAL - - # Reset the monkeypatch for subsequent tests - monkeypatch.undo() - - # Unsupported data type - backend = get_attn_backend(16, torch.float8_e4m3fn, None, 16, False) - assert backend.get_name() != STR_FLASH_ATTN_VAL - - # Unsupported kv cache data type - backend = get_attn_backend(16, torch.float16, "fp8", 16, False) - assert backend.get_name() != STR_FLASH_ATTN_VAL - - # Unsupported block size - backend = get_attn_backend(16, torch.float16, None, 8, False) - assert backend.get_name() != STR_FLASH_ATTN_VAL - - # flash-attn is not installed - import sys - original_module = sys.modules.get('vllm_flash_attn') - monkeypatch.setitem(sys.modules, 'vllm_flash_attn', None) - backend = get_attn_backend(16, torch.float16, None, 16, False) - assert backend.get_name() != STR_FLASH_ATTN_VAL - - # Restore the original module if it existed - if original_module is not None: - monkeypatch.setitem(sys.modules, 'vllm_flash_attn', - original_module) - else: - monkeypatch.delitem(sys.modules, 'vllm_flash_attn', raising=False) - - # Unsupported head size - backend = get_attn_backend(17, torch.float16, None, 16, False) - assert backend.get_name() != STR_FLASH_ATTN_VAL - - # Attention-free models should bypass env and use PlaceholderAttention - backend = get_attn_backend(16, torch.float16, torch.float16, 16, True) - assert backend.get_name() != STR_FLASH_ATTN_VAL - - -@pytest.mark.parametrize("use_v1", [True, False]) -def test_invalid_env(use_v1: bool, monkeypatch: pytest.MonkeyPatch): - - with monkeypatch.context() as m, patch( - "vllm.attention.selector.current_platform", CudaPlatform()): - m.setenv("VLLM_USE_V1", "1" if use_v1 else "0") - m.setenv(STR_BACKEND_ENV_VAR, STR_INVALID_VAL) - - # Test with head size 32 - backend = get_attn_backend(32, torch.float16, None, 16, False) - EXPECTED = "FLASH_ATTN_VLLM_V1" if use_v1 else "FLASH_ATTN" - assert backend.get_name() == EXPECTED - - # when block size == 16, backend will fall back to XFORMERS - # this behavior is not yet supported on V1. - if use_v1: - # TODO: support fallback on V1! - # https://github.com/vllm-project/vllm/issues/14524 - pass - else: - backend = get_attn_backend(16, torch.float16, None, 16, False) - assert backend.get_name() == "XFORMERS" diff --git a/tests/kernels/test_cutlass_mla_decode.py b/tests/kernels/test_cutlass_mla_decode.py new file mode 100644 index 00000000000..87e4bd4b096 --- /dev/null +++ b/tests/kernels/test_cutlass_mla_decode.py @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: Apache-2.0 +import pytest +import torch +import torch.nn.functional as F +from torch import Tensor + +import vllm._custom_ops as ops +from vllm.platforms import current_platform + +if not current_platform.has_device_capability(100): + pytest.skip( + reason="Cutlass MLA Requires compute capability of 10 or above.", + allow_module_level=True) + + +def ref_mla( + out: Tensor, # (bs, num_heads, v_head_dim) + query: Tensor, # (bs, num_heads, head_dim) + kv_cache: Tensor, # (num_blocks, block_size, head_dim) + scale: float, + block_tables: Tensor, # (bs, max_num_blocks) + seq_lens: Tensor, # (bs,) +): + bs, num_heads, v_head_dim = out.shape + head_dim = query.shape[2] + + for i in range(bs): + # gather and flatten KV-cache + kv = kv_cache[ + block_tables[i]] # (max_num_blocks, block_size, head_dim) + kv = kv.view(1, -1, + head_dim)[:, :seq_lens[i]] # (1, seq_len, head_dim) + v = kv[:, :, :v_head_dim] + + q = query[i].view(num_heads, 1, head_dim) + o = F.scaled_dot_product_attention(q, + kv, + v, + scale=scale, + enable_gqa=True) + out[i] = o.view(num_heads, v_head_dim) + + return out + + +@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16]) +@pytest.mark.parametrize("mean_seq_len", [128, 1024, 4096]) +@pytest.mark.parametrize("bs", [1, 2, 4]) +@pytest.mark.parametrize("varlen", [False, True]) +@pytest.mark.parametrize("block_size", [16, 64, 128]) +def test_cutlass_mla_decode(dtype: torch.dtype, mean_seq_len: int, bs: int, + varlen: bool, block_size: int): + torch.set_default_dtype(dtype) + torch.set_default_device('cuda') + torch.manual_seed(42) + + d = 576 + h_q = 128 + dv = 512 + + q_nope_dim = 128 + q_pe_dim = 64 + scale = (q_nope_dim + q_pe_dim)**(-0.5) + if varlen: + seq_lens = torch.empty(bs).normal_(mean_seq_len, mean_seq_len / 2) + seq_lens = seq_lens.clip(2).to(torch.int32) + else: + seq_lens = torch.full((bs, ), mean_seq_len, dtype=torch.int32) + max_seq_len = seq_lens.max().item() + block_num = (max_seq_len + block_size - 1) // block_size + + # Pad block_num so that small blocks can be packed into full 128-sized + # CUTLASS tiles. One 128-wide tile can hold (128 // block_size) small + # blocks. + pack_factor = 128 // block_size + block_num = ((block_num + pack_factor - 1) // pack_factor) * pack_factor + + q = torch.randn(bs, h_q, d) + block_table = torch.randint(0, + bs * block_num, (bs, block_num), + dtype=torch.int32) + + kv_cache = torch.randn(block_table.numel(), block_size, d) + + out_ref = q.new_zeros(bs, h_q, dv) + ref_mla(out_ref, q, kv_cache, scale, block_table, seq_lens) + out_ans = torch.zeros_like(out_ref) + q_nope = q[:, :, :dv].clone() + q_pe = q[:, :, dv:].clone() + ops.cutlass_mla_decode(out_ans, q_nope, q_pe, kv_cache, seq_lens, + block_table, scale) + + torch.testing.assert_close(out_ans, out_ref, atol=1e-2, rtol=1e-2) diff --git a/tests/kernels/test_cutlass_moe.py b/tests/kernels/test_cutlass_moe.py deleted file mode 100644 index 3cfed6ae853..00000000000 --- a/tests/kernels/test_cutlass_moe.py +++ /dev/null @@ -1,244 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -import pytest -import torch - -from vllm import _custom_ops as ops -from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config -from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp8 -from vllm.model_executor.layers.fused_moe.fused_moe import (fused_experts, - fused_topk) -from vllm.platforms import current_platform - -NUM_EXPERTS = [40, 64] -TOP_KS = [6, 8] - - -def run(a: torch.Tensor, a_scale: torch.Tensor, w1_q: torch.Tensor, - w2_q: torch.Tensor, w1_scale: torch.Tensor, w2_scale: torch.Tensor, - topk_weights: torch.Tensor, topk_ids: torch.Tensor, - ab_strides1: torch.Tensor, c_strides1: torch.Tensor, - ab_strides2: torch.Tensor, c_strides2: torch.Tensor): - with set_current_vllm_config( - VllmConfig(parallel_config=ParallelConfig( - pipeline_parallel_size=1))): - return cutlass_moe_fp8(a, - w1_q, - w2_q, - w1_scale, - w2_scale, - topk_weights, - topk_ids, - ab_strides1, - c_strides1, - ab_strides2, - c_strides2, - a1_scale=a_scale) - - -@pytest.mark.parametrize("m", [2, 64, 224]) -@pytest.mark.parametrize("n", [1024, 3072]) -@pytest.mark.parametrize("k", [1024, 1536]) -@pytest.mark.parametrize("e", NUM_EXPERTS) -@pytest.mark.parametrize("topk", TOP_KS) -@pytest.mark.parametrize("per_act_token", [True, False]) -@pytest.mark.parametrize("per_out_ch", [True, False]) -@pytest.mark.skipif( - (lambda x: x is None or not ops.cutlass_group_gemm_supported(x.to_int()))( - current_platform.get_device_capability()), - reason="Grouped gemm is not supported on this GPU type.") -def test_cutlass_moe_no_graph( - m: int, - n: int, - k: int, - e: int, - topk: int, - per_act_token: bool, - per_out_ch: bool, -): - current_platform.seed_everything(7) - with set_current_vllm_config( - VllmConfig(parallel_config=ParallelConfig( - pipeline_parallel_size=1))): - - dtype = torch.half - - a = torch.randn((m, k), device="cuda", dtype=dtype) / 10 - w1 = torch.randn((e, 2 * n, k), device="cuda", dtype=dtype) / 10 - w2 = torch.randn((e, k, n), device="cuda", dtype=dtype) / 10 - - # Get the right scale for tests. - _, a_scale1 = ops.scaled_fp8_quant( - a, use_per_token_if_dynamic=per_act_token) - a_q, _ = ops.scaled_fp8_quant(a, - a_scale1, - use_per_token_if_dynamic=per_act_token) - - a_d = a_q.float().mul(a_scale1).to(dtype) - - n_b_scales = 2 * n if per_out_ch else 1 - k_b_scales = k if per_out_ch else 1 - - w1_q = torch.empty((e, 2 * n, k), - device="cuda", - dtype=torch.float8_e4m3fn) - w2_q = torch.empty((e, k, n), device="cuda", dtype=torch.float8_e4m3fn) - w1_scale = torch.empty((e, n_b_scales, 1), - device="cuda", - dtype=torch.float32) - w2_scale = torch.empty((e, k_b_scales, 1), - device="cuda", - dtype=torch.float32) - - ab_strides1 = torch.full((e, ), k, device="cuda", dtype=torch.int64) - c_strides1 = torch.full((e, ), 2 * n, device="cuda", dtype=torch.int64) - ab_strides2 = torch.full((e, ), n, device="cuda", dtype=torch.int64) - c_strides2 = torch.full((e, ), k, device="cuda", dtype=torch.int64) - - for expert in range(e): - w1_q[expert], w1_scale[expert] = ops.scaled_fp8_quant( - w1[expert], use_per_token_if_dynamic=per_out_ch) - w2_q[expert], w2_scale[expert] = ops.scaled_fp8_quant( - w2[expert], use_per_token_if_dynamic=per_out_ch) - w1_q = w1_q.transpose(1, 2) - w2_q = w2_q.transpose(1, 2) - - ab_strides1 = torch.full((e, ), k, device="cuda", dtype=torch.int64) - c_strides1 = torch.full((e, ), 2 * n, device="cuda", dtype=torch.int64) - ab_strides2 = torch.full((e, ), n, device="cuda", dtype=torch.int64) - c_strides2 = torch.full((e, ), k, device="cuda", dtype=torch.int64) - - w1_d = torch.empty_like(w1) - w2_d = torch.empty_like(w2) - for expert in range(e): - w1_d[expert] = (w1_q[expert].t().float() * w1_scale[expert]).half() - w2_d[expert] = (w2_q[expert].t().float() * w2_scale[expert]).half() - - score = torch.randn((m, e), device="cuda", dtype=dtype) - topk_weights, topk_ids = fused_topk(a, score, topk, renormalize=False) - - triton_output = fused_experts(a_d, w1_d, w2_d, topk_weights, topk_ids) - - cutlass_output = cutlass_moe_fp8(a, - w1_q, - w2_q, - w1_scale, - w2_scale, - topk_weights, - topk_ids, - ab_strides1, - c_strides1, - ab_strides2, - c_strides2, - a1_scale=a_scale1) - - #print(triton_output) - #print(cutlass_output) - #print("*") - - torch.testing.assert_close(triton_output, - cutlass_output, - atol=5e-2, - rtol=1e-2) - - -@pytest.mark.parametrize("m", [2, 64, 224]) -@pytest.mark.parametrize("n", [1024, 3072]) -@pytest.mark.parametrize("k", [1024, 1536]) -@pytest.mark.parametrize("e", NUM_EXPERTS) -@pytest.mark.parametrize("topk", TOP_KS) -@pytest.mark.parametrize("per_act_token", [True, False]) -@pytest.mark.parametrize("per_out_ch", [True, False]) -@pytest.mark.skipif( - (lambda x: x is None or not ops.cutlass_group_gemm_supported(x.to_int()))( - current_platform.get_device_capability()), - reason="Grouped gemm is not supported on this GPU type.") -def test_cutlass_moe_cuda_graph( - m: int, - n: int, - k: int, - e: int, - topk: int, - per_act_token: bool, - per_out_ch: bool, -): - current_platform.seed_everything(7) - with set_current_vllm_config( - VllmConfig(parallel_config=ParallelConfig( - pipeline_parallel_size=1))): - - dtype = torch.half - - a = torch.randn((m, k), device="cuda", dtype=dtype) / 10 - w1 = torch.randn((e, 2 * n, k), device="cuda", dtype=dtype) / 10 - w2 = torch.randn((e, k, n), device="cuda", dtype=dtype) / 10 - - # Get the right scale for tests. - _, a_scale1 = ops.scaled_fp8_quant( - a, use_per_token_if_dynamic=per_act_token) - a_q, _ = ops.scaled_fp8_quant(a, - a_scale1, - use_per_token_if_dynamic=per_act_token) - - a_d = a_q.float().mul(a_scale1).to(dtype) - - n_b_scales = 2 * n if per_out_ch else 1 - k_b_scales = k if per_out_ch else 1 - - w1_q = torch.empty((e, 2 * n, k), - device="cuda", - dtype=torch.float8_e4m3fn) - w2_q = torch.empty((e, k, n), device="cuda", dtype=torch.float8_e4m3fn) - w1_scale = torch.empty((e, n_b_scales, 1), - device="cuda", - dtype=torch.float32) - w2_scale = torch.empty((e, k_b_scales, 1), - device="cuda", - dtype=torch.float32) - - ab_strides1 = torch.full((e, ), k, device="cuda", dtype=torch.int64) - c_strides1 = torch.full((e, ), 2 * n, device="cuda", dtype=torch.int64) - ab_strides2 = torch.full((e, ), n, device="cuda", dtype=torch.int64) - c_strides2 = torch.full((e, ), k, device="cuda", dtype=torch.int64) - - for expert in range(e): - w1_q[expert], w1_scale[expert] = ops.scaled_fp8_quant( - w1[expert], use_per_token_if_dynamic=per_out_ch) - w2_q[expert], w2_scale[expert] = ops.scaled_fp8_quant( - w2[expert], use_per_token_if_dynamic=per_out_ch) - w1_q = w1_q.transpose(1, 2) - w2_q = w2_q.transpose(1, 2) - - ab_strides1 = torch.full((e, ), k, device="cuda", dtype=torch.int64) - c_strides1 = torch.full((e, ), 2 * n, device="cuda", dtype=torch.int64) - ab_strides2 = torch.full((e, ), n, device="cuda", dtype=torch.int64) - c_strides2 = torch.full((e, ), k, device="cuda", dtype=torch.int64) - - w1_d = torch.empty_like(w1) - w2_d = torch.empty_like(w2) - for expert in range(e): - w1_d[expert] = (w1_q[expert].t().float() * w1_scale[expert]).half() - w2_d[expert] = (w2_q[expert].t().float() * w2_scale[expert]).half() - - score = torch.randn((m, e), device="cuda", dtype=dtype) - topk_weights, topk_ids = fused_topk(a, score, topk, renormalize=False) - - triton_output = fused_experts(a_d, w1_d, w2_d, topk_weights, topk_ids) - - stream = torch.cuda.Stream() - graph = torch.cuda.CUDAGraph() - with torch.cuda.graph(graph, stream=stream): - cutlass_output = run(a, a_scale1, w1_q, w2_q, w1_scale, w2_scale, - topk_weights, topk_ids, ab_strides1, - c_strides1, ab_strides2, c_strides2) - torch.cuda.synchronize() - graph.replay() - torch.cuda.synchronize() - - #print(triton_output) - #print(cutlass_output) - #print("*") - - torch.testing.assert_close(triton_output, - cutlass_output, - atol=9e-2, - rtol=1e-2) diff --git a/tests/kernels/test_fused_quant_activation.py b/tests/kernels/test_fused_quant_activation.py new file mode 100644 index 00000000000..fa84ad74cd8 --- /dev/null +++ b/tests/kernels/test_fused_quant_activation.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: Apache-2.0 +import pytest +import torch + +import vllm._custom_ops as ops +from tests.kernels.utils import opcheck +from vllm.model_executor.layers.activation import SiluAndMul + +DTYPES = [torch.bfloat16, torch.float16] +QUANT_DTYPES = [torch.float8_e4m3fn] +NUM_TOKENS = [1, 17, 86, 1234, 3045] # Arbitrary values for testing +HIDDEN_SIZES = [16, 48, 128, 1562, 4096] # Arbitrary values for testing +SEEDS = [0] +CUDA_DEVICES = [ + f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2) +] + + +def ref_impl(silu_and_mul: SiluAndMul, x: torch.Tensor, + scale: torch.Tensor) -> torch.Tensor: + silu_and_mul_out = silu_and_mul.forward_native(x) + out, scales = ops.scaled_fp8_quant(silu_and_mul_out, scale) + return out + + +def ops_impl(x: torch.Tensor, scale: torch.Tensor) -> torch.Tensor: + out_shape = (x.shape[0], x.shape[1] // 2) + out = torch.empty(out_shape, + dtype=torch.torch.float8_e4m3fn, + device=x.device) + torch.ops._C.silu_and_mul_quant(out, x, scale) + return out + + +@pytest.mark.parametrize("num_tokens", NUM_TOKENS) +@pytest.mark.parametrize("hidden_size", HIDDEN_SIZES) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("quant_dtype", QUANT_DTYPES) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.parametrize("device", CUDA_DEVICES) +@torch.inference_mode() +def test_silu_and_mul( + num_tokens: int, + hidden_size: int, + dtype: torch.dtype, + quant_dtype: torch.dtype, + seed: int, + device: str, +) -> None: + torch.random.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.set_default_device(device) + + layer = SiluAndMul() + + # Make inputs + scale = (torch.randn((1), device=device, dtype=torch.float32)) + x = torch.randn(num_tokens, hidden_size, dtype=dtype) + + ref_out = ref_impl(layer, x, scale) + ops_out = ops_impl(x, scale) + + assert ref_out.dtype == quant_dtype + assert ops_out.dtype == quant_dtype + assert ref_out.shape == ops_out.shape + assert torch.allclose(ref_out.to(dtype=torch.float32), + ops_out.to(dtype=torch.float32)) + opcheck(torch.ops._C.silu_and_mul_quant, (ops_out, x, scale)) diff --git a/tests/kernels/test_rocm_attention_selector.py b/tests/kernels/test_rocm_attention_selector.py deleted file mode 100644 index 90b483b4a41..00000000000 --- a/tests/kernels/test_rocm_attention_selector.py +++ /dev/null @@ -1,34 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -import pytest -import torch - -from vllm.attention.selector import _cached_get_attn_backend, get_attn_backend -from vllm.platforms.rocm import RocmPlatform -from vllm.utils import STR_BACKEND_ENV_VAR - - -@pytest.fixture(autouse=True) -def clear_cache(): - """Clear lru cache to ensure each test case runs without caching. - """ - _cached_get_attn_backend.cache_clear() - - -def test_selector(monkeypatch: pytest.MonkeyPatch): - with monkeypatch.context() as m: - m.setenv(STR_BACKEND_ENV_VAR, "ROCM_FLASH") - - # Set the current platform to ROCm using monkeypatch - monkeypatch.setattr("vllm.attention.selector.current_platform", - RocmPlatform()) - - # Test standard ROCm attention - backend = get_attn_backend(16, torch.float16, torch.float16, 16, False) - assert (backend.get_name() == "ROCM_FLASH" - or backend.get_name() == "TRITON_ATTN_VLLM_V1") - - # mla test for deepseek related - backend = get_attn_backend(576, torch.bfloat16, "auto", 16, False, - False, True) - assert backend.get_name() == "TRITON_MLA" diff --git a/tests/kernels/test_triton_flash_attention.py b/tests/kernels/test_triton_flash_attention.py new file mode 100644 index 00000000000..cf2bdc908e4 --- /dev/null +++ b/tests/kernels/test_triton_flash_attention.py @@ -0,0 +1,499 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Tests for the triton_flash_attention kernel + +Run `pytest tests/kernels/test_triton_flash_attention.py`. +""" +import pytest +import torch + +from vllm.attention.ops.triton_flash_attention import (SUPPORTED_LAYOUTS, + MetaData, + compute_alibi_tensor, + scale_fp8, + triton_attention_rocm) +from vllm.platforms import current_platform + + +class ReferenceAttention: + + def __init__(self, Z, HQ, HK, N_CTX_Q, N_CTX_K, D_HEAD, use_alibi, dtype, + input_metadata): + self.Z = Z + self.HQ = HQ + self.HK = HK + self.N_CTX_Q = N_CTX_Q + self.N_CTX_K = N_CTX_K + self.D_HEAD = D_HEAD + self.use_alibi = use_alibi + self.dtype = dtype + self.input_metadata = input_metadata + + def fwd(self, q, k, v): + scores = torch.einsum('bhqd,bhkd->bhqk', q, + k).float() * self.input_metadata.sm_scale + if self.input_metadata.causal: + mask = torch.tril(torch.ones(self.N_CTX_Q, + self.N_CTX_K, + device="cuda"), + diagonal=self.N_CTX_K - self.N_CTX_Q) + scores[:, :, mask == 0] = float("-inf") + + if self.input_metadata.bias is not None: + scores += self.input_metadata.bias + + if self.use_alibi: + scores += compute_alibi_tensor(self.input_metadata.alibi_slopes, + self.N_CTX_Q, self.N_CTX_K) + + p = torch.softmax(scores, dim=-1) + if self.input_metadata.causal: + # If N_CTX_Q > N_CTX_K, there's at least one row of all -infs going + # into softmax. This creates a row of NaNs as -inf - -inf == NaN. + # So we fix this by converting the NaNs to 0s, which is what they + # should be out of the softmax. + nan_mask = torch.isnan(p) + p[nan_mask == 1] = 0 + ref_out = torch.einsum('bhqk,bhkd->bhqd', p.to(self.dtype), v) + # compare + if self.input_metadata.layout == 'bshd': + ref_out = ref_out.transpose(1, 2).clone() + return ref_out + + def fwd_fp8(self, q_quantized, k_quantized, v_quantized): + q = (q_quantized.to(torch.float16) * self.input_metadata.q_descale).to( + self.dtype) + k = (k_quantized.to(torch.float16) * self.input_metadata.k_descale).to( + self.dtype) + v = (v_quantized.to(torch.float16) * self.input_metadata.v_descale).to( + self.dtype) + result = self.fwd(q, k, v) + if self.input_metadata.o_scale is not None: + result, _ = scale_fp8(result, self.input_metadata.o_scale) + return result + + def fwd_fp8_kv(self, q, k_quantized, v_quantized): + k_descale, v_descale = (self.input_metadata.k_descale, + self.input_metadata.v_descale) + k_dequantized = (k_quantized.to(torch.float32) * + k_descale.to(torch.float32)).to(self.dtype) + v_dequantized = (v_quantized.to(torch.float32) * + v_descale.to(torch.float32)).to(self.dtype) + return self.fwd(q, k_dequantized, v_dequantized) + + def varlen_fwd(self, q, k, v, is_mqa=False): + ref_out = torch.empty_like(q) + if is_mqa: + # Make KV look like HQ/HK "groups" of HK. Later, we will reshape so + # the size aligns with Q. + k_ref = k.view(k.shape[0], k.shape[1], 1, + k.shape[2]).expand(-1, -1, self.HQ // self.HK, -1) + v_ref = v.view(v.shape[0], v.shape[1], 1, + v.shape[2]).expand(-1, -1, self.HQ // self.HK, -1) + else: + k_ref = k + v_ref = v + + for i in range(0, self.input_metadata.num_contexts): + start_q, start_k = self.input_metadata.cu_seqlens_q[ + i], self.input_metadata.cu_seqlens_k[i] + end_q, end_k = self.input_metadata.cu_seqlens_q[ + i + 1], self.input_metadata.cu_seqlens_k[i + 1] + k_curr = k_ref[start_k:end_k] + v_curr = v_ref[start_k:end_k] + if is_mqa: + k_curr = k_curr.reshape(k_curr.shape[0], -1, k_curr.shape[3]) + v_curr = v_curr.reshape(v_curr.shape[0], -1, v_curr.shape[3]) + scores = torch.einsum('qhd,khd->qhk', q[start_q:end_q], + k_curr).float() + p = torch.softmax(scores * self.input_metadata.sm_scale, + dim=-1).half() + ref_out[start_q:end_q] = torch.einsum('qhk,khd->qhd', p, v_curr) + return ref_out + + +def quantize_input(q, k, v, fp8_kv=False, use_o_scale=False): + q_descale = None + if not fp8_kv: + q, q_descale = scale_fp8(q) + k, k_descale = scale_fp8(k) + v, v_descale = scale_fp8(v) + + # In real world use case, the p scale would be a parameter trained by the + # model. + p_scale = None + + o_scale = torch.rand(1, device="cuda", + requires_grad=False) if use_o_scale else None + + return q, k, v, q_descale, k_descale, v_descale, p_scale, o_scale + + +def input_helper( + Z, + HQ, + HK, + N_CTX_Q, + N_CTX_K, + D_HEAD, + dtype, + layout=None, + use_alibi=None, + causal=None, + is_fp8=False, + fp8_kv=False, + use_o_scale=False, + use_bias=False, +): + assert layout in SUPPORTED_LAYOUTS, "Got unsupported layout." + + current_platform.seed_everything(0) + + # Initialize q, k, v + if layout == 'bhsd': + q_tensor_shape = (Z, HQ, N_CTX_Q, D_HEAD) + k_tensor_shape = (Z, HK, N_CTX_K, D_HEAD) + elif layout == 'bshd': + q_tensor_shape = (Z, N_CTX_Q, HQ, D_HEAD) + k_tensor_shape = (Z, N_CTX_K, HK, D_HEAD) + + if use_alibi: + # for n heads the set of slopes is the geometric sequence that starts + # 2^(-8/n) + alibi_slopes = torch.tensor( + [2**(-8 / HQ * i) for i in range(1, HQ + 1)], + dtype=torch.float32, + device="cuda").repeat(Z, 1) + else: + alibi_slopes = None + + if use_bias: + bias = torch.randn((1, HQ, N_CTX_Q, N_CTX_K), + dtype=dtype, + device="cuda", + requires_grad=False) + else: + bias = None + + q = torch.randn(q_tensor_shape, + dtype=dtype, + device="cuda", + requires_grad=False) + k = torch.randn(k_tensor_shape, + dtype=dtype, + device="cuda", + requires_grad=False) + v = torch.randn(k_tensor_shape, + dtype=dtype, + device="cuda", + requires_grad=False) + + if is_fp8: + (q, k, v, q_descale, k_descale, v_descale, p_scale, + o_scale) = quantize_input(q, + k, + v, + use_o_scale=use_o_scale, + fp8_kv=fp8_kv) + else: + q_descale = k_descale = v_descale = p_scale = o_scale = None + + input_metadata = MetaData(sm_scale=D_HEAD**-0.5, + max_seqlens_q=N_CTX_Q, + max_seqlens_k=N_CTX_K, + layout=layout, + alibi_slopes=alibi_slopes, + alibi_batch=Z, + alibi_nheads=HQ, + q_descale=q_descale, + k_descale=k_descale, + v_descale=v_descale, + p_scale=p_scale, + o_scale=o_scale, + bias=bias, + seqlen_q=N_CTX_Q, + seqlen_k=N_CTX_K) + return q, k, v, input_metadata + + +def varlen_input_helper(Z, + HQ, + HK, + N_CTX_Q, + N_CTX_K, + D_HEAD, + dtype, + equal_seqlens=False): + current_platform.seed_everything(0) + + # Random sequence lengths. Using N_CTX as kind of max of sum of individual + # seqs + if not equal_seqlens: + max_seqlens_q = N_CTX_Q // Z + max_seqlens_k = N_CTX_K // Z + seqlens_q = torch.randint(1, + max_seqlens_q + 1, (Z, ), + dtype=torch.int32) + seqlens_k = torch.randint(1, + max_seqlens_k + 1, (Z, ), + dtype=torch.int32) + else: + seqlens_q = torch.full((Z, ), N_CTX_Q // Z) + seqlens_k = torch.full((Z, ), N_CTX_K // Z) + + # Calculate cumulative sequence lengths + cu_seqlens_q = torch.cat([ + torch.tensor([0], dtype=torch.int32), + seqlens_q.cumsum(dim=0, dtype=torch.int32) + ]) + cu_seqlens_k = torch.cat([ + torch.tensor([0], dtype=torch.int32), + seqlens_k.cumsum(dim=0, dtype=torch.int32) + ]) + cu_seqlens_q = cu_seqlens_q.to(device="cuda") + cu_seqlens_k = cu_seqlens_k.to(device="cuda") + + # Initialize q, k, v with variable lengths + total_q = cu_seqlens_q[-1].item() + total_k = cu_seqlens_k[-1].item() + q = torch.randn((total_q, HQ, D_HEAD), dtype=dtype, + device="cuda").normal_(mean=0., std=0.5).requires_grad_() + k = torch.randn((total_k, HK, D_HEAD), dtype=dtype, + device="cuda").normal_(mean=0., std=0.5).requires_grad_() + v = torch.randn((total_k, HK, D_HEAD), dtype=dtype, + device="cuda").normal_(mean=0., std=0.5).requires_grad_() + sm_scale = D_HEAD**-0.5 + input_metadata = MetaData(sm_scale=sm_scale) + input_metadata.set_varlen_params(cu_seqlens_q, cu_seqlens_k) + return q, k, v, input_metadata + + +@pytest.mark.parametrize('Z, HQ, HK, N_CTX_Q, N_CTX_K, D_HEAD', [ + (1, 48, 12, 1, 1, 64), + (4, 4, 4, 128, 128, 65), + (16, 48, 48, 1, 1, 128), + (64, 48, 24, 3, 3, 128), + (4, 4, 4, 113, 123, 1), +]) +@pytest.mark.parametrize('causal', [True, False]) +@pytest.mark.parametrize('use_alibi', [True, False]) +@pytest.mark.parametrize('layout', ['bshd']) +def test_op_fwd(Z, + HQ, + HK, + N_CTX_Q, + N_CTX_K, + D_HEAD, + causal, + use_alibi, + layout, + dtype=torch.float16): + current_platform.seed_everything(0) + q, k, v, input_metadata = input_helper(Z, HQ, HK, N_CTX_Q, N_CTX_K, D_HEAD, + dtype, layout, use_alibi, causal) + + o = torch.empty_like(q) + + # triton implementation + tri_out, _ = triton_attention_rocm(q, k, v, o, input_metadata) + + # Transpose here if layout is bshd so we have same reference code for all + # layouts + if layout == 'bshd': + q = q.transpose(1, 2).clone() + k = k.transpose(1, 2).clone() + v = v.transpose(1, 2).clone() + # Replicate K and V if using MQA/GQA + if HQ != HK: + k = k.view(k.shape[0], k.shape[1], -1, k.shape[2], + k.shape[3]).expand(-1, -1, HQ // HK, -1, + -1).reshape(k.shape[0], -1, k.shape[2], + k.shape[3]) + v = v.view(v.shape[0], v.shape[1], -1, v.shape[2], + v.shape[3]).expand(-1, -1, HQ // HK, -1, + -1).reshape(v.shape[0], -1, v.shape[2], + v.shape[3]) + + ref_impl = ReferenceAttention(Z, HQ, HK, N_CTX_Q, N_CTX_K, D_HEAD, + use_alibi, dtype, input_metadata) + ref_out = ref_impl.fwd(q, k, v) + + torch.testing.assert_close(ref_out, tri_out, atol=2e-2, rtol=2e-2) + + +@pytest.mark.parametrize('Z, H, N_CTX_Q, N_CTX_K, D_HEAD', [ + (4, 48, 1, 1, 64), + (4, 48, 1, 1, 128), + (4, 48, 3, 3, 128), + (4, 4, 128, 128, 65), +]) +@pytest.mark.parametrize('causal', [True, False]) +@pytest.mark.parametrize('layout', ['bhsd']) +@pytest.mark.parametrize('use_o_scale', [True, False]) +@pytest.mark.skipif(torch.cuda.get_device_capability() < (9, 0), + reason="Triton FP8 requires CUDA 9.0 or higher") +def test_op_fwd_fp8(Z, + H, + N_CTX_Q, + N_CTX_K, + D_HEAD, + causal, + layout, + use_o_scale, + dtype=torch.float32): + current_platform.seed_everything(0) + + # Disable grad to save memory it won't run into OOM on CI machine. + # q, k, v, input_metadata = input_helper(Z, H, H, N_CTX_Q, N_CTX_K, D_HEAD, + # dtype, layout) + + q_quantized, k_quantized, v_quantized, input_metadata = input_helper( + Z, + H, + H, + N_CTX_Q, + N_CTX_K, + D_HEAD, + dtype, + causal=causal, + layout=layout, + is_fp8=True, + use_o_scale=use_o_scale) + + o = torch.empty_like(q_quantized) if use_o_scale else None + + tri_out, _ = triton_attention_rocm(q_quantized, k_quantized, v_quantized, + o, input_metadata) + + ref_impl = ReferenceAttention(Z, H, H, N_CTX_Q, N_CTX_K, D_HEAD, False, + dtype, input_metadata) + ref_out = ref_impl.fwd_fp8(q_quantized, k_quantized, v_quantized) + + # compare + torch.testing.assert_close(ref_out.to(torch.float32), + tri_out.to(torch.float32), + atol=7e-2, + rtol=2e-1) + + +@pytest.mark.parametrize('Z, H, N_CTX_Q, N_CTX_K, D_HEAD', [ + (4, 48, 1, 1, 64), + (4, 48, 1, 1, 128), + (4, 48, 3, 3, 128), + (4, 4, 128, 128, 65), + (4, 4, 113, 123, 1), +]) +@pytest.mark.parametrize('causal', [True, False]) +@pytest.mark.parametrize('layout', ['bhsd']) +def test_op_fwd_fp8_kv(Z, + H, + N_CTX_Q, + N_CTX_K, + D_HEAD, + causal, + layout, + dtype=torch.float32): + current_platform.seed_everything(0) + + q, k_quantized, v_quantized, input_metadata = input_helper(Z, + H, + H, + N_CTX_Q, + N_CTX_K, + D_HEAD, + dtype, + causal=causal, + layout=layout, + is_fp8=True, + fp8_kv=True) + + o = torch.empty_like(q) + + tri_out, _ = triton_attention_rocm(q, k_quantized, v_quantized, o, + input_metadata) + + ref_impl = ReferenceAttention(Z, H, H, N_CTX_Q, N_CTX_K, D_HEAD, False, + dtype, input_metadata) + ref_out = ref_impl.fwd_fp8_kv(q, k_quantized, v_quantized) + + torch.testing.assert_close(ref_out, tri_out, atol=3e-2, rtol=8e-1) + + +@pytest.mark.parametrize('Z, H, N_CTX_Q, N_CTX_K, D_HEAD', [ + (4, 48, 1, 1, 64), + (4, 48, 1, 1, 128), + (4, 48, 3, 3, 128), + (4, 4, 128, 128, 65), +]) +@pytest.mark.parametrize('causal', [True, False]) +@pytest.mark.parametrize('use_bias', [True]) +@pytest.mark.parametrize('dtype', [torch.bfloat16]) +def test_op_fwd_bias(Z, H, N_CTX_Q, N_CTX_K, D_HEAD, causal, use_bias, dtype): + current_platform.seed_everything(0) + q, k, v, input_metadata = input_helper(Z, + H, + H, + N_CTX_Q, + N_CTX_K, + D_HEAD, + dtype, + layout='bhsd', + causal=causal, + use_bias=use_bias) + o = torch.empty_like(q) + + # triton implementation + tri_out, _ = triton_attention_rocm(q, k, v, o, input_metadata) + + ref_impl = ReferenceAttention(Z, H, H, N_CTX_Q, N_CTX_K, D_HEAD, False, + dtype, input_metadata) + ref_out = ref_impl.fwd(q, k, v) + + # compare + torch.testing.assert_close(ref_out, tri_out, atol=2e-2, rtol=2e-2) + + +# NOTE: Uses thd layout, so also tests thd. +@pytest.mark.parametrize('Z, H, N_CTX, D_HEAD', [(1, 48, 256, 64), + (4, 48, 512, 64), + (16, 48, 512, 64), + (64, 48, 128, 128)]) +@pytest.mark.parametrize('causal', [True, False]) +def test_op_varlen_fwd(Z, H, N_CTX, D_HEAD, causal, dtype=torch.float16): + + q, k, v, input_metadata = varlen_input_helper(Z, H, H, N_CTX, N_CTX, + D_HEAD, dtype) + + tri_out = torch.empty_like(q) + triton_attention_rocm(q, k, v, tri_out, input_metadata) + + ref_impl = ReferenceAttention(Z, H, H, N_CTX, N_CTX, D_HEAD, False, dtype, + input_metadata) + ref_out = ref_impl.varlen_fwd(q, k, v, is_mqa=False) + + torch.testing.assert_close(ref_out, tri_out, atol=2e-2, rtol=2e-2) + + +# NOTE: Uses thd layout, so also tests thd. +@pytest.mark.parametrize('Z, HQ, HK, N_CTX, D_HEAD', [(2, 48, 24, 128, 64), + (4, 48, 12, 256, 64), + (4, 48, 4, 512, 64), + (4, 64, 16, 128, 128)]) +@pytest.mark.parametrize('causal', [False]) +def test_op_varlen_mqa_fwd(Z, + HQ, + HK, + N_CTX, + D_HEAD, + causal, + dtype=torch.float16): + q, k, v, input_metadata = varlen_input_helper(Z, HQ, HK, N_CTX, N_CTX, + D_HEAD, dtype) + + tri_out = torch.empty_like(q) + triton_attention_rocm(q, k, v, tri_out, input_metadata) + + ref_impl = ReferenceAttention(Z, HQ, HK, N_CTX, N_CTX, D_HEAD, False, + dtype, input_metadata) + ref_out = ref_impl.varlen_fwd(q, k, v, is_mqa=True) + + torch.testing.assert_close(ref_out, tri_out, atol=2e-2, rtol=2e-2) diff --git a/tests/kernels/test_utils.py b/tests/kernels/test_utils.py deleted file mode 100644 index d3f03200265..00000000000 --- a/tests/kernels/test_utils.py +++ /dev/null @@ -1,25 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -""" -Tests for miscellaneous utilities -""" - -import pytest -import torch - -from tests.kernels.utils import opcheck -from vllm.platforms import current_platform - - -def test_convert_fp8_opcheck(): - data = torch.randn((256, 256), dtype=torch.float32, device="cuda") - result = torch.empty_like(data, dtype=torch.float8_e4m3fn) - opcheck(torch.ops._C_cache_ops.convert_fp8, (result, data, 1.0, "fp8")) - - -@pytest.mark.skipif(not current_platform.is_cuda(), - reason="Only supported for CUDA") -def test_cuda_utils_opcheck(): - opcheck(torch.ops._C_cuda_utils.get_device_attribute, (0, 0)) - opcheck( - torch.ops._C_cuda_utils. - get_max_shared_memory_per_block_device_attribute, (0, )) diff --git a/tests/kernels/utils_block.py b/tests/kernels/utils_block.py deleted file mode 100644 index c16cba50967..00000000000 --- a/tests/kernels/utils_block.py +++ /dev/null @@ -1,63 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -import torch - - -def native_w8a8_block_matmul(A: torch.Tensor, B: torch.Tensor, - As: torch.Tensor, Bs: torch.Tensor, block_size, - output_dtype): - """This function performs matrix multiplication with block-wise - quantization using native torch. - It is agnostic to the input data type and can be used for both int8 and - fp8 data types. - - It takes two input tensors `A` and `B` (int8) with scales `As` and - `Bs` (float32). - The output is returned in the specified `output_dtype`. - """ - A = A.to(torch.float32) - B = B.to(torch.float32) - assert A.shape[-1] == B.shape[-1] - assert B.ndim == 2 and B.is_contiguous() and Bs.ndim == 2 - assert len(block_size) == 2 - block_n, block_k = block_size[0], block_size[1] - assert (A.shape[-1] + block_k - 1) // block_k == As.shape[-1] - assert A.shape[:-1] == As.shape[:-1] - - M = A.numel() // A.shape[-1] - N, K = B.shape - origin_C_shape = A.shape[:-1] + (N, ) - A = A.reshape(M, A.shape[-1]) - As = As.reshape(M, As.shape[-1]) - n_tiles = (N + block_n - 1) // block_n - k_tiles = (K + block_k - 1) // block_k - assert n_tiles == Bs.shape[0] - assert k_tiles == Bs.shape[1] - - C_shape = (M, N) - C = torch.zeros(C_shape, dtype=torch.float32, device=A.device) - - A_tiles = [ - A[:, i * block_k:min((i + 1) * block_k, K)] for i in range(k_tiles) - ] - B_tiles = [[ - B[ - j * block_n:min((j + 1) * block_n, N), - i * block_k:min((i + 1) * block_k, K), - ] for i in range(k_tiles) - ] for j in range(n_tiles)] - C_tiles = [ - C[:, j * block_n:min((j + 1) * block_n, N)] for j in range(n_tiles) - ] - As_tiles = [As[:, i:i + 1] for i in range(k_tiles)] - - for i in range(k_tiles): - for j in range(n_tiles): - a = A_tiles[i] - b = B_tiles[j][i] - c = C_tiles[j] - s = As_tiles[i] * Bs[j][i] - c[:, :] += torch.matmul(a, b.t()) * s - - C = C.reshape(origin_C_shape).to(output_dtype) - return C diff --git a/tests/kv_transfer/test_disagg.py b/tests/kv_transfer/test_disagg.py index 5b9ea6dba40..dc948a48bf3 100644 --- a/tests/kv_transfer/test_disagg.py +++ b/tests/kv_transfer/test_disagg.py @@ -14,8 +14,8 @@ # Fixture to set up environment variables and teardown servers after tests @pytest.fixture(scope="module", autouse=True) def setup_servers(): - if torch.cuda.device_count() < 4: - pytest.skip("Skipping test: fewer than 4 GPUs available") + if torch.cuda.device_count() < 2: + pytest.skip("Skipping test: fewer than 2 GPUs available") # Set up environment variables VLLM_HOST_IP = subprocess.check_output("hostname -I | awk '{print $1}'", diff --git a/tests/lora/test_llama_tp.py b/tests/lora/test_llama_tp.py index cdb8c893b8b..e3a054bd620 100644 --- a/tests/lora/test_llama_tp.py +++ b/tests/lora/test_llama_tp.py @@ -47,6 +47,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]: ] sampling_params = vllm.SamplingParams(temperature=0, max_tokens=256, + skip_special_tokens=False, stop=["[/assistant]"]) outputs = llm.generate( prompts, diff --git a/tests/lora/test_lora_manager.py b/tests/lora/test_lora_manager.py index 576d95a4715..52b0834cacb 100644 --- a/tests/lora/test_lora_manager.py +++ b/tests/lora/test_lora_manager.py @@ -31,6 +31,8 @@ f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2) ] if current_platform.is_cuda_alike() else ["cpu"]) +DEFAULT_DTYPE = torch.get_default_dtype() + @pytest.fixture(scope="function", autouse=True) def use_v0_only(monkeypatch: pytest.MonkeyPatch): @@ -125,8 +127,10 @@ def test_replace_submodules(dist_init, dummy_model): model = dummy_model manager = LoRAModelManager( model, 1, 1, 1, - LoRAConfig(max_lora_rank=8, max_cpu_loras=8, max_loras=8), - torch.device(DEVICES[0])) + LoRAConfig(max_lora_rank=8, + max_cpu_loras=8, + max_loras=8, + lora_dtype=DEFAULT_DTYPE), torch.device(DEVICES[0])) model = manager.model assert isinstance(model.get_submodule("dense1"), ColumnParallelLinearWithLoRA) @@ -155,7 +159,8 @@ def test_lora_model_manager(dist_init, dummy_model, device): 2, LoRAConfig(max_lora_rank=8, max_cpu_loras=3, - max_loras=2), + max_loras=2, + lora_dtype=DEFAULT_DTYPE), device=device) assert all(x is None for x in manager.lora_index_to_id) assert manager.add_adapter(model_lora1) @@ -221,7 +226,8 @@ def test_lora_lru_cache_model_manager(dist_init, dummy_model, device): 2, LoRAConfig(max_lora_rank=8, max_cpu_loras=3, - max_loras=2), + max_loras=2, + lora_dtype=DEFAULT_DTYPE), device=device) assert all(x is None for x in manager.lora_index_to_id) assert manager.add_adapter(model_lora1) @@ -316,7 +322,8 @@ def test_lru_lora_model_manager(dist_init, dummy_model, device): 2, LoRAConfig(max_lora_rank=8, max_cpu_loras=2, - max_loras=2), + max_loras=2, + lora_dtype=DEFAULT_DTYPE), device=device) assert all(x is None for x in manager.lora_index_to_id) @@ -424,7 +431,10 @@ def test_lru_lora_model_manager(dist_init, dummy_model, device): @pytest.mark.parametrize("device", DEVICES) def test_lru_cache_worker_adapter_manager(llama_2_7b_model_extra_embeddings, sql_lora_files, device): - lora_config = LoRAConfig(max_lora_rank=8, max_cpu_loras=4, max_loras=4) + lora_config = LoRAConfig(max_lora_rank=8, + max_cpu_loras=4, + max_loras=4, + lora_dtype=DEFAULT_DTYPE) worker_adapter_manager = LRUCacheWorkerLoRAManager( 4, 2, llama_2_7b_model_extra_embeddings.unpadded_vocab_size - lora_config.lora_extra_vocab_size, lora_config, device, @@ -504,7 +514,10 @@ def test_lru_cache_worker_adapter_manager(llama_2_7b_model_extra_embeddings, def test_worker_adapter_manager(llama_2_7b_model_extra_embeddings, sql_lora_files, device): # Should remove every LoRA not specified in the request. - lora_config = LoRAConfig(max_lora_rank=8, max_cpu_loras=4, max_loras=4) + lora_config = LoRAConfig(max_lora_rank=8, + max_cpu_loras=4, + max_loras=4, + lora_dtype=DEFAULT_DTYPE) worker_adapter_manager = WorkerLoRAManager( 4, 2, llama_2_7b_model_extra_embeddings.unpadded_vocab_size - lora_config.lora_extra_vocab_size, lora_config, device, @@ -600,7 +613,8 @@ def test_packed_loras(dist_init, dummy_model_gate_up, device): 2, LoRAConfig(max_lora_rank=8, max_cpu_loras=2, - max_loras=2), + max_loras=2, + lora_dtype=DEFAULT_DTYPE), device=device) model = manager.model diff --git a/tests/lora/test_resolver.py b/tests/lora/test_resolver.py new file mode 100644 index 00000000000..8ebc2ae98fc --- /dev/null +++ b/tests/lora/test_resolver.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: Apache-2.0 + +from typing import Optional + +import pytest + +from vllm.lora.request import LoRARequest +from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry + + +class DummyLoRAResolver(LoRAResolver): + """A dummy LoRA resolver for testing.""" + + async def resolve_lora(self, base_model_name: str, + lora_name: str) -> Optional[LoRARequest]: + if lora_name == "test_lora": + return LoRARequest( + lora_name=lora_name, + lora_path=f"/dummy/path/{base_model_name}/{lora_name}", + lora_int_id=abs(hash(lora_name))) + return None + + +def test_resolver_registry_registration(): + """Test basic resolver registration functionality.""" + registry = LoRAResolverRegistry + resolver = DummyLoRAResolver() + + # Register a new resolver + registry.register_resolver("dummy", resolver) + assert "dummy" in registry.get_supported_resolvers() + + # Get registered resolver + retrieved_resolver = registry.get_resolver("dummy") + assert retrieved_resolver is resolver + + +def test_resolver_registry_duplicate_registration(): + """Test registering a resolver with an existing name.""" + registry = LoRAResolverRegistry + resolver1 = DummyLoRAResolver() + resolver2 = DummyLoRAResolver() + + registry.register_resolver("dummy", resolver1) + registry.register_resolver("dummy", resolver2) + + assert registry.get_resolver("dummy") is resolver2 + + +def test_resolver_registry_unknown_resolver(): + """Test getting a non-existent resolver.""" + registry = LoRAResolverRegistry + + with pytest.raises(KeyError, match="not found"): + registry.get_resolver("unknown_resolver") + + +@pytest.mark.asyncio +async def test_dummy_resolver_resolve(): + """Test the dummy resolver's resolve functionality.""" + dummy_resolver = DummyLoRAResolver() + base_model_name = "base_model_test" + lora_name = "test_lora" + + # Test successful resolution + result = await dummy_resolver.resolve_lora(base_model_name, lora_name) + assert isinstance(result, LoRARequest) + assert result.lora_name == lora_name + assert result.lora_path == f"/dummy/path/{base_model_name}/{lora_name}" + + # Test failed resolution + result = await dummy_resolver.resolve_lora(base_model_name, + "nonexistent_lora") + assert result is None diff --git a/tests/lora/test_tokenizer_group.py b/tests/lora/test_tokenizer_group.py index d605ab73468..8845eb33d20 100644 --- a/tests/lora/test_tokenizer_group.py +++ b/tests/lora/test_tokenizer_group.py @@ -5,17 +5,14 @@ from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizer import get_lora_tokenizer -from vllm.transformers_utils.tokenizer_group import get_tokenizer_group - -from ..conftest import get_tokenizer_pool_config +from vllm.transformers_utils.tokenizer_group import TokenizerGroup @pytest.mark.asyncio @pytest.mark.parametrize("tokenizer_group_type", [None, "ray"]) async def test_tokenizer_group_lora(sql_lora_files, tokenizer_group_type): reference_tokenizer = AutoTokenizer.from_pretrained(sql_lora_files) - tokenizer_group = get_tokenizer_group( - get_tokenizer_pool_config(tokenizer_group_type), + tokenizer_group = TokenizerGroup( tokenizer_id="gpt2", enable_lora=True, max_num_seqs=1, @@ -60,8 +57,7 @@ def test_get_lora_tokenizer(sql_lora_files, tmp_path): @pytest.mark.parametrize("max_num_seqs", [1, 2]) @pytest.mark.parametrize("max_loras", [1, 2]) def test_lora_tokenizers(enable_lora, max_num_seqs, max_loras): - tokenizer_group = get_tokenizer_group( - get_tokenizer_pool_config(None), + tokenizer_group = TokenizerGroup( tokenizer_id="gpt2", enable_lora=enable_lora, max_num_seqs=max_num_seqs, diff --git a/tests/lora/test_utils.py b/tests/lora/test_utils.py index 1c90cedf1a1..67f3866beff 100644 --- a/tests/lora/test_utils.py +++ b/tests/lora/test_utils.py @@ -39,6 +39,18 @@ def test_parse_fine_tuned_lora_name_valid(): False, False, ), + ( + "language_model.layers.9.mlp.down_proj.lora_A.weight", + "language_model.layers.9.mlp.down_proj", + True, + False, + ), + ( + "language_model.layers.9.mlp.down_proj.lora_B.weight", + "language_model.layers.9.mlp.down_proj", + False, + False, + ), } for name, module_name, is_lora_a, is_bias in fixture: assert (module_name, is_lora_a, diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py index ac2e0f3542e..2d9cf1d48fd 100644 --- a/tests/model_executor/test_enabled_custom_ops.py +++ b/tests/model_executor/test_enabled_custom_ops.py @@ -11,6 +11,8 @@ dispatch_fused_experts_func, dispatch_topk_func, torch_vllm_inplace_fused_experts, torch_vllm_outplace_fused_experts, vllm_topk_softmax) +from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( + is_rocm_aiter_moe_enabled) from vllm.model_executor.layers.layernorm import ( RMSNorm, dispatch_cuda_rmsnorm_func, fused_add_rms_norm, rms_norm, rocm_aiter_fused_add_rms_norm, rocm_aiter_rms_norm) @@ -100,11 +102,10 @@ def test_enabled_ops_invalid(env: str): def test_topk_dispatch(use_rocm_aiter: str, monkeypatch): monkeypatch.setenv("VLLM_ROCM_USE_AITER", use_rocm_aiter) topk_func = dispatch_topk_func() - + is_rocm_aiter_moe_enabled.cache_clear() if current_platform.is_rocm() and int(use_rocm_aiter): from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( rocm_aiter_topk_softmax) - assert topk_func == rocm_aiter_topk_softmax else: assert topk_func == vllm_topk_softmax @@ -116,11 +117,11 @@ def test_fused_experts_dispatch(use_rocm_aiter: str, inplace: bool, monkeypatch): monkeypatch.setenv("VLLM_ROCM_USE_AITER", use_rocm_aiter) + is_rocm_aiter_moe_enabled.cache_clear() fused_experts_func = dispatch_fused_experts_func(inplace) if current_platform.is_rocm() and int(use_rocm_aiter): from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( rocm_aiter_fused_experts) - assert fused_experts_func == rocm_aiter_fused_experts elif inplace: assert fused_experts_func == torch_vllm_inplace_fused_experts diff --git a/tests/model_executor/test_guided_processors.py b/tests/model_executor/test_guided_processors.py index 59da575e37b..6cd966f8480 100644 --- a/tests/model_executor/test_guided_processors.py +++ b/tests/model_executor/test_guided_processors.py @@ -202,12 +202,15 @@ def test_multiple_guided_options_not_allowed(sample_json_schema, sample_regex): def test_guided_decoding_backend_options(): """Test backend-specific options""" - params = GuidedDecodingParams( - backend="xgrammar:option-1,option-2,option-3") - assert params.backend_options() == ["option-1", "option-2", "option-3"] - - no_fallback = GuidedDecodingParams(backend="xgrammar:option-1,no-fallback") - assert no_fallback.no_fallback() + with pytest.warns(DeprecationWarning): + guided_decoding_params = GuidedDecodingParams( + backend= + "xgrammar:no-fallback,disable-any-whitespace,no-additional-properties" + ) + assert guided_decoding_params.backend == "xgrammar" + assert guided_decoding_params.disable_fallback + assert guided_decoding_params.disable_any_whitespace + assert guided_decoding_params.disable_additional_properties def test_pickle_xgrammar_tokenizer_data(): diff --git a/tests/models/decoder_only/language/test_hybrid.py b/tests/models/decoder_only/language/test_hybrid.py deleted file mode 100644 index 60eb3830c6d..00000000000 --- a/tests/models/decoder_only/language/test_hybrid.py +++ /dev/null @@ -1,360 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -import pytest - -from tests.utils import multi_gpu_test -from vllm.engine.arg_utils import EngineArgs -from vllm.sampling_params import SamplingParams - -from ...utils import check_outputs_equal - -# This test is for the hybrid models -MODELS = ["ai21labs/Jamba-tiny-dev", "Zyphra/Zamba2-1.2B-instruct"] -# Bamba at Fp32 is too big for the CI (L4 GPU). -# MODELS = ["ai21labs/Jamba-tiny-dev", "ibm-ai-platform/Bamba-9B"] - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [96]) -def test_models( - hf_runner, - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - - # numeric error produces different generation - if "Bamba" in model: - example_prompts.pop(3) - - model_kwargs = { - "use_mamba_kernels": False, # mamba kernels are not installed so HF - # don't use them - } - if "Zamba2" in model: - # Zamba2 HF implementation automatically checks if mamba kernels are - # installed - model_kwargs = {} - - with hf_runner(model, dtype=dtype, model_kwargs=model_kwargs) as hf_model: - hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens) - - with vllm_runner(model, dtype=dtype) as vllm_model: - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - - for i in range(len(example_prompts)): - hf_output_ids, hf_output_str = hf_outputs[i] - vllm_output_ids, vllm_output_str = vllm_outputs[i] - assert hf_output_str == vllm_output_str, ( - f"Test{i}:\nHF: {hf_output_str!r}\nvLLM: {vllm_output_str!r}") - assert hf_output_ids == vllm_output_ids, ( - f"Test{i}:\nHF: {hf_output_ids}\nvLLM: {vllm_output_ids}") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [96]) -def test_batching( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - # To pass the small model tests, we need full precision. - for_loop_outputs = [] - with vllm_runner(model, dtype=dtype) as vllm_model: - for prompt in example_prompts: - for_loop_outputs.append( - vllm_model.generate_greedy([prompt], max_tokens)[0]) - - batched_outputs = vllm_model.generate_greedy(example_prompts, - max_tokens) - - check_outputs_equal( - outputs_0_lst=for_loop_outputs, - outputs_1_lst=batched_outputs, - name_0="for_loop_vllm", - name_1="batched_vllm", - ) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float16"]) -@pytest.mark.parametrize("max_tokens", [10]) -def test_mamba_prefill_chunking_with_parallel_sampling( - hf_runner, vllm_runner, example_prompts, model: str, dtype: str, - max_tokens: int) -> None: - # Tests prefill chunking in conjunction with n>1, in this case, - # prefill is populated with decoding tokens and we test that it - # doesn't fail This test might fail if cache is not allocated - # correctly for n > 1 decoding steps inside a - # chunked prefill forward pass (where we have both prefills - # and decoding together ) - sampling_params = SamplingParams(n=3, - temperature=1, - seed=0, - max_tokens=max_tokens) - with vllm_runner( - model, - dtype=dtype, - enable_chunked_prefill=True, - max_num_batched_tokens=30, - max_num_seqs=10 # forces prefill chunks with decoding - ) as vllm_model: - vllm_model.generate(example_prompts, sampling_params) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["bfloat16"]) -@pytest.mark.parametrize("max_tokens", [7]) -def test_mamba_prefill_chunking(hf_runner, vllm_runner, example_prompts, - model: str, dtype: str, - max_tokens: int) -> None: - # numeric error during prefill chunking produces different generation - # compared to w/o prefill chunking for those examples, removed them for now - if "Jamba" in model: - example_prompts.pop(7) - example_prompts.pop(2) - example_prompts.pop(1) - elif "Bamba" in model: - example_prompts.pop(6) - example_prompts.pop(3) - example_prompts.pop(2) - dtype = "half" # use a different dtype for Bamba - elif "Zamba2" in model: - example_prompts.pop(7) - dtype = "half" - - model_kwargs = { - "use_mamba_kernels": False, # mamba kernels are not installed so HF - # don't use them - } - if "Zamba2" in model: - # Zamba2 HF implementation automatically checks if mamba kernels are - # installed - model_kwargs = {} - - with hf_runner(model, dtype=dtype, model_kwargs=model_kwargs) as hf_model: - non_chunked = hf_model.generate_greedy(example_prompts, max_tokens) - - with vllm_runner(model, - dtype=dtype, - enable_chunked_prefill=True, - max_num_batched_tokens=5, - max_num_seqs=2) as vllm_model: - chunked = vllm_model.generate_greedy(example_prompts, - max_tokens=max_tokens) - - check_outputs_equal( - outputs_0_lst=chunked, - outputs_1_lst=non_chunked, - name_0="chunked", - name_1="non_chunked", - ) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [15]) -def test_parallel_sampling( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - - with vllm_runner(model, dtype=dtype) as vllm_model: - for_loop_outputs = [] - for _ in range(10): - for_loop_outputs.append( - # using example_prompts index 1 instead of 0 since with 0 the - # logprobs get really close and the test doesn't pass - vllm_model.generate_greedy([example_prompts[1]], max_tokens) - [0]) - sampling_params = SamplingParams(n=10, - temperature=0.001, - seed=0, - max_tokens=max_tokens) - n_lt_1_outputs = vllm_model.generate([example_prompts[1]], - sampling_params) - token_ids, texts = n_lt_1_outputs[0] - n_lt_1_outputs = [(token_id, text) - for token_id, text in zip(token_ids, texts)] - - check_outputs_equal( - outputs_0_lst=n_lt_1_outputs, - outputs_1_lst=for_loop_outputs, - name_0="vllm_n_lt_1_outputs", - name_1="vllm", - ) - - -@pytest.mark.skip(reason="RE-ENABLE: test is currently failing on main.") -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["bfloat16"]) -@pytest.mark.parametrize("max_tokens", [20]) -def test_mamba_cache_cg_padding( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - # This test is for verifying that mamba cache is padded to CG captured - # batch size. If it's not, a torch RuntimeError will be raised because - # tensor dimensions aren't compatible - vllm_config = EngineArgs(model=model).create_engine_config() - while len(example_prompts) == vllm_config.pad_for_cudagraph( - len(example_prompts)): - example_prompts.append(example_prompts[0]) - - try: - with vllm_runner(model, dtype=dtype) as vllm_model: - vllm_model.generate_greedy(example_prompts, max_tokens) - except RuntimeError: - pytest.fail( - "Couldn't run batch size which is not equal to a Cuda Graph " - "captured batch size. " - "Could be related to mamba cache not padded correctly") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [20]) -def test_models_preemption_recompute( - hf_runner, - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - # Tests that outputs are identical with and w/o preemtions (recompute) - assert dtype == "float" - - with vllm_runner(model, dtype=dtype) as vllm_model: - vllm_model.model.llm_engine.scheduler[ - 0].ENABLE_ARTIFICIAL_PREEMPT = True - preempt_vllm_outputs = vllm_model.generate_greedy( - example_prompts, max_tokens) - - vllm_model.model.llm_engine.scheduler[ - 0].ENABLE_ARTIFICIAL_PREEMPT = False - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - - check_outputs_equal( - outputs_0_lst=preempt_vllm_outputs, - outputs_1_lst=vllm_outputs, - name_0="vllm_preepmtions", - name_1="vllm", - ) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -def test_fail_upon_inc_requests_and_finished_requests_lt_available_blocks( - vllm_runner, - model: str, - dtype: str, - example_prompts, -) -> None: - # This test is for verifying that the hybrid inner state management doesn't - # collapse in case where the number of incoming requests and - # finished_requests_ids is larger than the maximum mamba block capacity. - # This could generally happen due to the fact that hybrid does support - # statelessness mechanism where it can cleanup new incoming requests in - # a single step. - try: - with vllm_runner(model, dtype=dtype, max_num_seqs=10) as vllm_model: - vllm_model.generate_greedy([example_prompts[0]] * 100, 10) - except ValueError: - pytest.fail("Hybrid inner state wasn't cleaned up properly between" - "steps finished requests registered unnecessarily ") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -def test_state_cleanup( - vllm_runner, - model: str, - dtype: str, - example_prompts, -) -> None: - # This test is for verifying that the Hybrid state is cleaned up between - # steps, If its not cleaned, an error would be expected. - try: - with vllm_runner(model, dtype=dtype) as vllm_model: - for _ in range(10): - vllm_model.generate_greedy([example_prompts[0]] * 100, 1) - except ValueError: - pytest.fail("Hybrid inner state wasn't cleaned up between states, " - "could be related to finished_requests_ids") - - -@pytest.mark.skip(reason="RE-ENABLE: test is currently failing on main.") -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -def test_multistep( - vllm_runner, - model: str, - dtype: str, - example_prompts, -) -> None: - # This test is verifying that multistep works correctly - #on mamba-like models - with vllm_runner(model, num_scheduler_steps=8, - max_num_seqs=2) as vllm_model: - vllm_model.generate_greedy([example_prompts[0]] * 10, 1) - - -@pytest.mark.skip(reason="RE-ENABLE: test is currently failing on main.") -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [64]) -def test_multistep_correctness(vllm_runner, model: str, dtype: str, - max_tokens: int, example_prompts) -> None: - with vllm_runner(model, num_scheduler_steps=8, - max_num_seqs=2) as vllm_model: - vllm_outputs_multistep = vllm_model.generate_greedy( - example_prompts, max_tokens) - - with vllm_runner(model, num_scheduler_steps=1, - max_num_seqs=2) as vllm_model: - vllm_outputs_single_step = vllm_model.generate_greedy( - example_prompts, max_tokens) - - check_outputs_equal( - outputs_0_lst=vllm_outputs_multistep, - outputs_1_lst=vllm_outputs_single_step, - name_0="vllm_outputs_multistep", - name_1="vllm_outputs_single_step", - ) - - -@multi_gpu_test(num_gpus=2) -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [64]) -def test_hybrid_distributed_produces_identical_generation( - vllm_runner, model: str, dtype: str, max_tokens: int, - example_prompts) -> None: - - with vllm_runner(model, dtype=dtype, tensor_parallel_size=2) as vllm_model: - vllm_outputs_tp_2 = vllm_model.generate_greedy(example_prompts, - max_tokens) - - with vllm_runner(model, dtype=dtype, tensor_parallel_size=1) as vllm_model: - vllm_outputs_tp_1 = vllm_model.generate_greedy(example_prompts, - max_tokens) - - check_outputs_equal( - outputs_0_lst=vllm_outputs_tp_1, - outputs_1_lst=vllm_outputs_tp_2, - name_0="vllm_tp_1", - name_1="vllm_tp_2", - ) diff --git a/tests/models/decoder_only/language/test_mamba.py b/tests/models/decoder_only/language/test_mamba.py deleted file mode 100644 index 47b9c0f69c3..00000000000 --- a/tests/models/decoder_only/language/test_mamba.py +++ /dev/null @@ -1,337 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -"""Compare the outputs of HF and vLLM when using greedy sampling for Mamba. - -Run `pytest tests/models/test_mamba.py`. -""" -import pytest -import torch -from transformers import AutoModelForCausalLM, AutoTokenizer - -from vllm.engine.arg_utils import EngineArgs -from vllm.sampling_params import SamplingParams - -from ...utils import check_outputs_equal - -MODELS = [ - "state-spaces/mamba-130m-hf", - "tiiuae/falcon-mamba-tiny-dev", - # TODO: Compare to a Mamba2 model. The HF transformers implementation of - # Mamba2 is buggy for Codestral as it doesn't handle n_groups. - # See https://github.com/huggingface/transformers/pull/35943 - # "mistralai/Mamba-Codestral-7B-v0.1", -] - - -# Use lower-level interfaces to create this greedy generator, as mamba will -# choke on the model_kwarg 'attention_mask' if hf_model.generate_greedy is used. -def generate_greedy(model_name, example_prompts, max_tokens): - # Create a text generation pipeline - tokenizer = AutoTokenizer.from_pretrained(model_name) - model = AutoModelForCausalLM.from_pretrained(model_name) - - # Set the device (GPU if available, else CPU) - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model.to(device) - - # Generate texts from the prompts - outputs = [] - for prompt in example_prompts: - # Tokenize the input prompt with truncation - inputs = tokenizer(prompt, return_tensors="pt", truncation=True) - input_ids = inputs["input_ids"].to(model.device) - - # Generate text using the model's generate method directly - generated_ids = model.generate(input_ids, - max_new_tokens=max_tokens, - do_sample=False) - generated_text = tokenizer.decode(generated_ids[0], - skip_special_tokens=True) - - outputs.append((generated_ids[0].tolist(), generated_text)) - - return outputs - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [96]) -def test_models( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - hf_outputs = generate_greedy(model, example_prompts, max_tokens) - - # Set max_num_seqs to keep Codestral from going OOM at fp32 - with vllm_runner(model, dtype=dtype, max_num_seqs=16) as vllm_model: - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - - for i in range(len(example_prompts)): - hf_output_ids, hf_output_str = hf_outputs[i] - vllm_output_ids, vllm_output_str = vllm_outputs[i] - assert hf_output_str == vllm_output_str, ( - f"Test{i}:\nHF: {hf_output_str!r}\nvLLM: {vllm_output_str!r}") - assert hf_output_ids == vllm_output_ids, ( - f"Test{i}:\nHF: {hf_output_ids}\nvLLM: {vllm_output_ids}") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [96]) -def test_batching( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - # To pass the small model tests, we need full precision. - for_loop_outputs = [] - with vllm_runner(model, dtype=dtype, max_num_seqs=16) as vllm_model: - for prompt in example_prompts: - for_loop_outputs.append( - vllm_model.generate_greedy([prompt], max_tokens)[0]) - - batched_outputs = vllm_model.generate_greedy(example_prompts, - max_tokens) - - check_outputs_equal( - outputs_0_lst=for_loop_outputs, - outputs_1_lst=batched_outputs, - name_0="for_loop_vllm", - name_1="batched_vllm", - ) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [10]) -def test_chunked_prefill_with_parallel_sampling(vllm_runner, example_prompts, - model: str, dtype: str, - max_tokens: int) -> None: - # Tests chunked prefill in conjunction with n>1. In this case, prefill is - # populated with decoding tokens and we test that it doesn't fail. - # This test might fail if cache is not allocated correctly for n > 1 - # decoding steps inside a chunked prefill forward pass (where we have both - # prefill and decode together ) - sampling_params = SamplingParams(n=3, - temperature=1, - seed=0, - max_tokens=max_tokens) - with vllm_runner( - model, - dtype=dtype, - enable_chunked_prefill=True, - max_num_batched_tokens=30, - max_num_seqs=10 # forces prefill chunks with decoding - ) as vllm_model: - vllm_model.generate(example_prompts, sampling_params) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [32]) -@pytest.mark.parametrize("chunked_prefill_token_size", [1, 4, 16]) -def test_chunked_prefill(vllm_runner, example_prompts, model: str, dtype: str, - max_tokens: int, - chunked_prefill_token_size: int) -> None: - """ - Checks exact match decode between huggingface model and vllm runner with - chunked prefill. - """ - max_num_seqs = chunked_prefill_token_size - max_num_batched_tokens = chunked_prefill_token_size - - non_chunked = generate_greedy(model, example_prompts, max_tokens) - - with vllm_runner(model, - dtype=dtype, - enable_chunked_prefill=True, - max_num_batched_tokens=max_num_batched_tokens, - max_num_seqs=max_num_seqs) as vllm_model: - chunked = vllm_model.generate_greedy(example_prompts, - max_tokens=max_tokens) - - check_outputs_equal( - outputs_0_lst=chunked, - outputs_1_lst=non_chunked, - name_0="chunked", - name_1="non_chunked", - ) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [15]) -def test_parallel_sampling( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - - # Numerical differences produce slightly different output for these - if 'state-spaces' in model: - example_prompts.pop(0) - example_prompts.pop(0) - example_prompts.pop(0) - - with vllm_runner(model, dtype=dtype, max_num_seqs=16) as vllm_model: - for_loop_outputs = [] - for _ in range(10): - for_loop_outputs.append( - vllm_model.generate_greedy(example_prompts, max_tokens)[0]) - sampling_params = SamplingParams(n=10, - temperature=0.001, - seed=0, - max_tokens=max_tokens) - n_lt_1_outputs = vllm_model.generate(example_prompts, sampling_params) - token_ids, texts = n_lt_1_outputs[0] - n_lt_1_outputs = [(token_id, text) - for token_id, text in zip(token_ids, texts)] - - check_outputs_equal( - outputs_0_lst=n_lt_1_outputs, - outputs_1_lst=for_loop_outputs, - name_0="vllm_n_lt_1_outputs", - name_1="vllm", - ) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["bfloat16"]) -@pytest.mark.parametrize("max_tokens", [20]) -def test_mamba_cache_cg_padding( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - # This test is for verifying that mamba cache is padded to CG captured - # batch size. If it's not, a torch RuntimeError will be raised because - # tensor dimensions aren't compatible - vllm_config = EngineArgs(model=model).create_engine_config() - while len(example_prompts) == vllm_config.pad_for_cudagraph( - len(example_prompts)): - example_prompts.append(example_prompts[0]) - - try: - with vllm_runner(model, dtype=dtype) as vllm_model: - vllm_model.generate_greedy(example_prompts, max_tokens) - except RuntimeError: - pytest.fail( - "Couldn't run batch size which is not equal to a Cuda Graph " - "captured batch size. " - "Could be related to mamba cache not padded correctly") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [20]) -def test_models_preemption_recompute( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - # Tests that outputs are identical with and w/o preemtions (recompute) - assert dtype == "float" - - with vllm_runner(model, dtype=dtype, max_num_seqs=16) as vllm_model: - vllm_model.model.llm_engine.scheduler[ - 0].ENABLE_ARTIFICIAL_PREEMPT = True - preempt_vllm_outputs = vllm_model.generate_greedy( - example_prompts, max_tokens) - - vllm_model.model.llm_engine.scheduler[ - 0].ENABLE_ARTIFICIAL_PREEMPT = False - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - - check_outputs_equal( - outputs_0_lst=preempt_vllm_outputs, - outputs_1_lst=vllm_outputs, - name_0="vllm_preepmtions", - name_1="vllm", - ) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -def test_fail_upon_inc_requests_and_finished_requests_lt_available_blocks( - vllm_runner, - model: str, - dtype: str, - example_prompts, -) -> None: - # This test is for verifying that the Mamba inner state management doesn't - # collapse in case where the number of incoming requests and - # finished_requests_ids is larger than the maximum Mamba block capacity. - # This could generally happen due to the fact that Mamba does support - # statelessness mechanism where it can cleanup new incoming requests in - # a single step. - try: - with vllm_runner(model, dtype=dtype, max_num_seqs=10) as vllm_model: - vllm_model.generate_greedy([example_prompts[0]] * 100, 10) - except ValueError: - pytest.fail("Mamba inner state wasn't cleaned up properly between" - "steps finished requests registered unnecessarily ") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -def test_state_cleanup( - vllm_runner, - model: str, - dtype: str, - example_prompts, -) -> None: - # This test is for verifying that the Mamba state is cleaned up between - # steps, If its not cleaned, an error would be expected. - try: - with vllm_runner(model, dtype=dtype, max_num_seqs=16) as vllm_model: - for _ in range(10): - vllm_model.generate_greedy([example_prompts[0]] * 100, 1) - except ValueError: - pytest.fail("Mamba inner state wasn't cleaned up between states, " - "could be related to finished_requests_ids") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -def test_multistep( - vllm_runner, - model: str, - dtype: str, - example_prompts, -) -> None: - with vllm_runner(model, num_scheduler_steps=8, - max_num_seqs=2) as vllm_model: - vllm_model.generate_greedy([example_prompts[0]] * 10, 1) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [64]) -def test_multistep_correctness(vllm_runner, model: str, dtype: str, - max_tokens: int, example_prompts) -> None: - with vllm_runner(model, num_scheduler_steps=8, - max_num_seqs=2) as vllm_model: - vllm_outputs_multistep = vllm_model.generate_greedy( - example_prompts, max_tokens) - - with vllm_runner(model, num_scheduler_steps=1, - max_num_seqs=2) as vllm_model: - vllm_outputs_single_step = vllm_model.generate_greedy( - example_prompts, max_tokens) - - check_outputs_equal( - outputs_0_lst=vllm_outputs_multistep, - outputs_1_lst=vllm_outputs_single_step, - name_0="vllm_outputs_multistep", - name_1="vllm_outputs_single_step", - ) diff --git a/tests/models/embedding/utils.py b/tests/models/embedding/utils.py deleted file mode 100644 index 5aeeb517854..00000000000 --- a/tests/models/embedding/utils.py +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -from collections.abc import Sequence - -import torch -import torch.nn.functional as F - - -def check_embeddings_close( - *, - embeddings_0_lst: Sequence[list[float]], - embeddings_1_lst: Sequence[list[float]], - name_0: str, - name_1: str, - tol: float = 1e-3, -) -> None: - assert len(embeddings_0_lst) == len(embeddings_1_lst) - - for prompt_idx, (embeddings_0, embeddings_1) in enumerate( - zip(embeddings_0_lst, embeddings_1_lst)): - assert len(embeddings_0) == len(embeddings_1), ( - f"Length mismatch: {len(embeddings_0)} vs. {len(embeddings_1)}") - - sim = F.cosine_similarity(torch.tensor(embeddings_0), - torch.tensor(embeddings_1), - dim=0) - - fail_msg = (f"Test{prompt_idx}:" - f"\n{name_0}:\t{embeddings_0[:16]!r}" - f"\n{name_1}:\t{embeddings_1[:16]!r}") - - assert sim >= 1 - tol, fail_msg - - -def matryoshka_fy(tensor, dimensions): - tensor = torch.tensor(tensor) - tensor = tensor[..., :dimensions] - tensor = F.normalize(tensor, p=2, dim=1) - return tensor diff --git a/tests/models/encoder_decoder/__init__.py b/tests/models/encoder_decoder/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/encoder_decoder/audio_language/__init__.py b/tests/models/encoder_decoder/audio_language/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/encoder_decoder/language/__init__.py b/tests/models/encoder_decoder/language/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/encoder_decoder/vision_language/__init__.py b/tests/models/encoder_decoder/vision_language/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/encoder_decoder/vision_language/test_broadcast.py b/tests/models/encoder_decoder/vision_language/test_broadcast.py deleted file mode 100644 index 8d986414eec..00000000000 --- a/tests/models/encoder_decoder/vision_language/test_broadcast.py +++ /dev/null @@ -1,37 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -import pytest - -from ....utils import multi_gpu_test - - -@multi_gpu_test(num_gpus=2) -@pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"]) -@pytest.mark.parametrize("model", [ - "meta-llama/Llama-3.2-11B-Vision-Instruct", -]) -def test_models(hf_runner, vllm_runner, image_assets, - distributed_executor_backend, model) -> None: - - dtype = "half" - max_tokens = 5 - num_logprobs = 5 - tensor_parallel_size = 2 - - if model.startswith("meta-llama/Llama-3.2-11B-Vision-Instruct"): - from .test_mllama import models, run_test - else: - raise NotImplementedError(f"Unsupported model: {model}") - - run_test( - hf_runner, - vllm_runner, - image_assets, - model=models[0], - size_factors=[0.25, 0.5, 1.0], - dtype=dtype, - max_tokens=max_tokens, - num_logprobs=num_logprobs, - tensor_parallel_size=tensor_parallel_size, - distributed_executor_backend=distributed_executor_backend, - ) diff --git a/tests/models/decoder_only/__init__.py b/tests/models/language/__init__.py similarity index 100% rename from tests/models/decoder_only/__init__.py rename to tests/models/language/__init__.py diff --git a/tests/models/decoder_only/audio_language/__init__.py b/tests/models/language/generation/__init__.py similarity index 100% rename from tests/models/decoder_only/audio_language/__init__.py rename to tests/models/language/generation/__init__.py diff --git a/tests/models/encoder_decoder/language/test_bart.py b/tests/models/language/generation/test_bart.py similarity index 98% rename from tests/models/encoder_decoder/language/test_bart.py rename to tests/models/language/generation/test_bart.py index e8070d28bef..8ab0167dc77 100644 --- a/tests/models/encoder_decoder/language/test_bart.py +++ b/tests/models/language/generation/test_bart.py @@ -1,8 +1,4 @@ # SPDX-License-Identifier: Apache-2.0 -"""Compare the outputs of HF and vLLM for BART models using greedy sampling. - -Run `pytest tests/models/encoder_decoder/language/test_bart.py`. -""" from typing import Optional import pytest diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/language/generation/test_common.py similarity index 65% rename from tests/models/decoder_only/language/test_models.py rename to tests/models/language/generation/test_common.py index 79fa3fa9977..c755593c9ac 100644 --- a/tests/models/decoder_only/language/test_models.py +++ b/tests/models/language/generation/test_common.py @@ -1,14 +1,14 @@ # SPDX-License-Identifier: Apache-2.0 -"""Compare the outputs of HF and vLLM when using greedy sampling. - -Run `pytest tests/models/test_models.py`. -""" +import os +from typing import Optional import pytest import torch from vllm.platforms import current_platform +from ....utils import large_gpu_mark +from ...registry import HF_EXAMPLE_MODELS from ...utils import check_logprobs_close # These have unsupported head_dim for FA. We do not @@ -25,9 +25,9 @@ AITER_MODEL_LIST = [ "meta-llama/Llama-3.2-1B-Instruct", "openbmb/MiniCPM3-4B", - "Qwen/Qwen-7B", + "Qwen/Qwen-7B-Chat", "Qwen/Qwen2.5-0.5B-Instruct", - "ehristoforu/Falcon3-MoE-2x7B-Insruct", + "TitanML/tiny-mixtral", ] @@ -60,7 +60,8 @@ pytest.param( "openbmb/MiniCPM3-4B", # fused_moe not supported on CPU - marks=[pytest.mark.core_model], + marks=[pytest.mark.core_model, + large_gpu_mark(min_gb=32)], ), pytest.param( "facebook/opt-125m", # opt @@ -71,7 +72,7 @@ marks=[pytest.mark.core_model], ), pytest.param( - "Qwen/Qwen-7B", # qwen (text-only) + "Qwen/Qwen-7B-Chat", # qwen (text-only) ), pytest.param( "Qwen/Qwen2.5-0.5B-Instruct", # qwen2 @@ -80,18 +81,21 @@ pytest.param("stabilityai/stablelm-3b-4e1t"), # stablelm pytest.param("bigcode/starcoder2-3b"), # starcoder2 pytest.param( - "ehristoforu/Falcon3-MoE-2x7B-Insruct", # mixtral + "TitanML/tiny-mixtral", # mixtral marks=[pytest.mark.cpu_model], ) ]) -@pytest.mark.parametrize("dtype", ["half"]) @pytest.mark.parametrize("max_tokens", [32]) @pytest.mark.parametrize("num_logprobs", [5]) @pytest.mark.parametrize( "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False]) def test_models(hf_runner, vllm_runner, example_prompts, model: str, - dtype: str, max_tokens: int, num_logprobs: int, - use_rocm_aiter: bool, monkeypatch) -> None: + max_tokens: int, num_logprobs: int, use_rocm_aiter: bool, + monkeypatch) -> None: + + model_info = HF_EXAMPLE_MODELS.find_hf_info(model) + model_info.check_available_online(on_fail="skip") + model_info.check_transformers_version(on_fail="skip") if model in REQUIRES_V0: monkeypatch.setenv("VLLM_USE_V1", "0") @@ -105,17 +109,38 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str, # in parts of the operators pytest.skip(f"Skipping '{model}' model test with AITER kernel.") - with hf_runner(model, dtype=dtype) as hf_model: - if model.startswith("THUDM/chatglm3"): - hf_model.model.get_output_embeddings = lambda: \ - hf_model.model.transformer.output_layer + use_prompt_embeds = os.getenv("VLLM_USE_V1") == "0" + with hf_runner(model) as hf_model: hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs) - with vllm_runner(model, dtype=dtype) as vllm_model: + prompt_embeds: Optional[list[torch.Tensor]] = ([] if use_prompt_embeds + else None) + + prompt_token_ids = [] + for prompt in example_prompts: + token_ids = hf_model.tokenizer(prompt, + return_tensors="pt").input_ids.to( + hf_model.model.device) + prompt_token_ids.append(token_ids) + if prompt_embeds is not None: + prompt_embeds.append(hf_model.model.get_input_embeddings()( + token_ids).squeeze(0)) + + with vllm_runner( + model, + tokenizer_name=model_info.tokenizer or model, + tokenizer_mode=model_info.tokenizer_mode, + trust_remote_code=model_info.trust_remote_code, + max_num_seqs=2, + enable_prompt_embeds=use_prompt_embeds, + ) as vllm_model: vllm_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, num_logprobs) + if prompt_embeds is not None: + vllm_outputs_from_embeds = vllm_model.generate_greedy_logprobs( + prompt_embeds, max_tokens, num_logprobs) check_logprobs_close( outputs_0_lst=hf_outputs, @@ -123,6 +148,14 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str, name_0="hf", name_1="vllm", ) + if prompt_embeds is not None: + check_logprobs_close( + outputs_0_lst=vllm_outputs, + outputs_1_lst=vllm_outputs_from_embeds, + name_0="vllm", + name_1="vllm_from_embeds", + ) + if use_rocm_aiter: # this is to ensure that vllm engine # has deallocated the memory before running the next diff --git a/tests/models/decoder_only/language/test_granite.py b/tests/models/language/generation/test_granite.py similarity index 89% rename from tests/models/decoder_only/language/test_granite.py rename to tests/models/language/generation/test_granite.py index 119b79d64c9..f381c34f44b 100644 --- a/tests/models/decoder_only/language/test_granite.py +++ b/tests/models/language/generation/test_granite.py @@ -1,8 +1,4 @@ # SPDX-License-Identifier: Apache-2.0 -"""Compare the outputs of HF and vLLM for Granite models using greedy sampling. - -Run `pytest tests/models/test_granite.py`. -""" import pytest from ...utils import check_logprobs_close diff --git a/tests/models/language/generation/test_hybrid.py b/tests/models/language/generation/test_hybrid.py new file mode 100644 index 00000000000..880967b4aed --- /dev/null +++ b/tests/models/language/generation/test_hybrid.py @@ -0,0 +1,315 @@ +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from tests.utils import multi_gpu_test +from vllm.engine.arg_utils import EngineArgs +from vllm.sampling_params import SamplingParams + +from ...utils import check_logprobs_close, check_outputs_equal + +# NOTE: The first model in each list is taken as the primary model, +# meaning that it will be used in all tests in this file +# The rest of the models will only be tested by test_models + +SSM_MODELS = [ + "state-spaces/mamba-130m-hf", + "tiiuae/falcon-mamba-tiny-dev", + # TODO: Compare to a Mamba2 model. The HF transformers implementation of + # Mamba2 is buggy for Codestral as it doesn't handle n_groups. + # See https://github.com/huggingface/transformers/pull/35943 + # "mistralai/Mamba-Codestral-7B-v0.1", +] + +HYBRID_MODELS = [ + "ai21labs/Jamba-tiny-dev", + # NOTE: Running Plamo2 in transformers implementation requires to install + # causal-conv1d package, which is not listed as a test dependency as it's + # not compatible with pip-compile. + "pfnet/plamo-2-1b", + "Zyphra/Zamba2-1.2B-instruct", + "hmellor/bamba-tiny-random", +] + +# Avoid OOM +MAX_NUM_SEQS = 4 + + +@pytest.mark.parametrize("model", SSM_MODELS + HYBRID_MODELS) +@pytest.mark.parametrize("max_tokens", [64]) +@pytest.mark.parametrize("num_logprobs", [5]) +def test_models( + hf_runner, + vllm_runner, + example_prompts, + model: str, + max_tokens: int, + num_logprobs: int, +) -> None: + with hf_runner(model) as hf_model: + hf_outputs = hf_model.generate_greedy_logprobs_limit( + example_prompts, max_tokens, num_logprobs) + + with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: + vllm_outputs = vllm_model.generate_greedy_logprobs( + example_prompts, max_tokens, num_logprobs) + + check_logprobs_close( + outputs_0_lst=hf_outputs, + outputs_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + ) + + +@pytest.mark.parametrize("model", SSM_MODELS + HYBRID_MODELS) +@pytest.mark.parametrize("max_tokens", [64]) +@pytest.mark.parametrize("num_logprobs", [5]) +def test_batching( + vllm_runner, + example_prompts, + model: str, + max_tokens: int, + num_logprobs: int, +) -> None: + for_loop_outputs = [] + with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: + for prompt in example_prompts: + single_output, = vllm_model.generate_greedy_logprobs([prompt], + max_tokens, + num_logprobs) + for_loop_outputs.append(single_output) + + batched_outputs = vllm_model.generate_greedy_logprobs( + example_prompts, max_tokens, num_logprobs) + + check_logprobs_close( + outputs_0_lst=for_loop_outputs, + outputs_1_lst=batched_outputs, + name_0="for_loop_vllm", + name_1="batched_vllm", + ) + + +@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) +@pytest.mark.parametrize("max_tokens", [32]) +@pytest.mark.parametrize("num_logprobs", [5]) +@pytest.mark.parametrize("chunked_prefill_token_size", [1, 4, 16]) +def test_chunked_prefill( + vllm_runner, + example_prompts, + model: str, + max_tokens: int, + num_logprobs: int, + chunked_prefill_token_size: int, +) -> None: + max_num_seqs = chunked_prefill_token_size + max_num_batched_tokens = chunked_prefill_token_size + + with vllm_runner(model, + enable_chunked_prefill=True, + max_num_batched_tokens=max_num_batched_tokens, + max_num_seqs=max_num_seqs) as vllm_model: + chunked = vllm_model.generate_greedy_logprobs(example_prompts, + max_tokens, num_logprobs) + + with vllm_runner(model, + enable_chunked_prefill=False, + max_num_seqs=max_num_seqs) as vllm_model: + non_chunked = vllm_model.generate_greedy_logprobs( + example_prompts, max_tokens, num_logprobs) + + check_logprobs_close( + outputs_0_lst=chunked, + outputs_1_lst=non_chunked, + name_0="chunked", + name_1="non_chunked", + ) + + +@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) +@pytest.mark.parametrize("max_tokens", [10]) +def test_chunked_prefill_with_parallel_sampling( + vllm_runner, + example_prompts, + model: str, + max_tokens: int, +) -> None: + """ + Tests chunked prefill in conjunction with n > 1. + + In this case, prefill is populated with decoding tokens and + we test that it doesn't fail. + + This test might fail if cache is not allocated correctly for n > 1 + decoding steps inside a chunked prefill forward pass + (where we have both prefill and decode together) + """ + sampling_params = SamplingParams(n=3, + temperature=1, + seed=0, + max_tokens=max_tokens) + with vllm_runner( + model, + enable_chunked_prefill=True, + # forces prefill chunks with decoding + max_num_batched_tokens=MAX_NUM_SEQS * 3, + max_num_seqs=MAX_NUM_SEQS, + ) as vllm_model: + vllm_model.generate(example_prompts, sampling_params) + + +@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) +@pytest.mark.parametrize("max_tokens", [20]) +def test_mamba_cache_cg_padding( + vllm_runner, + example_prompts, + model: str, + max_tokens: int, +) -> None: + """ + This test is for verifying that mamba cache is padded to CG captured + batch size. If it's not, a torch RuntimeError will be raised because + tensor dimensions aren't compatible. + """ + vllm_config = EngineArgs(model=model, + trust_remote_code=True).create_engine_config() + while len(example_prompts) == vllm_config.pad_for_cudagraph( + len(example_prompts)): + example_prompts.append(example_prompts[0]) + + try: + with vllm_runner(model) as vllm_model: + vllm_model.generate_greedy(example_prompts, max_tokens) + except RuntimeError: + pytest.fail( + "Couldn't run batch size which is not equal to a Cuda Graph " + "captured batch size. " + "Could be related to mamba cache not padded correctly") + + +@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) +@pytest.mark.parametrize("max_tokens", [20]) +def test_models_preemption_recompute( + vllm_runner, + example_prompts, + model: str, + max_tokens: int, +) -> None: + """ + Tests that outputs are identical with and w/o preemptions (recompute). + """ + with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: + scheduler = vllm_model.model.llm_engine.scheduler[0] + scheduler.ENABLE_ARTIFICIAL_PREEMPT = True + preempt_vllm_outputs = vllm_model.generate_greedy( + example_prompts, max_tokens) + + scheduler.ENABLE_ARTIFICIAL_PREEMPT = False + vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) + + check_outputs_equal( + outputs_0_lst=preempt_vllm_outputs, + outputs_1_lst=vllm_outputs, + name_0="vllm_preepmtions", + name_1="vllm", + ) + + +@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) +def test_fail_upon_inc_requests_and_finished_requests_lt_available_blocks( + vllm_runner, + example_prompts, + model: str, +) -> None: + """ + This test is for verifying that the hybrid inner state management doesn't + collapse in case where the number of incoming requests and + finished_requests_ids is larger than the maximum mamba block capacity. + + This could generally happen due to the fact that hybrid does support + statelessness mechanism where it can cleanup new incoming requests in + a single step. + """ + try: + with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: + vllm_model.generate_greedy([example_prompts[0]] * 100, 10) + except ValueError: + pytest.fail("Hybrid inner state wasn't cleaned up properly between" + "steps finished requests registered unnecessarily ") + + +@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) +def test_state_cleanup( + vllm_runner, + example_prompts, + model: str, +) -> None: + """ + This test is for verifying that the Hybrid state is cleaned up between + steps. + + If its not cleaned, an error would be expected. + """ + try: + with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: + for _ in range(10): + vllm_model.generate_greedy([example_prompts[0]] * 100, 1) + except ValueError: + pytest.fail("Hybrid inner state wasn't cleaned up between states, " + "could be related to finished_requests_ids") + + +@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) +@pytest.mark.parametrize("max_tokens", [64]) +def test_multistep_correctness( + vllm_runner, + example_prompts, + model: str, + max_tokens: int, +) -> None: + with vllm_runner(model, num_scheduler_steps=8, + max_num_seqs=2) as vllm_model: + vllm_outputs_multistep = vllm_model.generate_greedy( + example_prompts, max_tokens) + + with vllm_runner(model, num_scheduler_steps=1, + max_num_seqs=2) as vllm_model: + vllm_outputs_single_step = vllm_model.generate_greedy( + example_prompts, max_tokens) + + check_outputs_equal( + outputs_0_lst=vllm_outputs_multistep, + outputs_1_lst=vllm_outputs_single_step, + name_0="vllm_outputs_multistep", + name_1="vllm_outputs_single_step", + ) + + +@multi_gpu_test(num_gpus=2) +@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) +@pytest.mark.parametrize("max_tokens", [64]) +@pytest.mark.parametrize("num_logprobs", [5]) +def test_distributed_correctness( + vllm_runner, + example_prompts, + model: str, + max_tokens: int, + num_logprobs: int, +) -> None: + with vllm_runner(model, tensor_parallel_size=1, + max_num_seqs=2) as vllm_model: + vllm_outputs_tp_1 = vllm_model.generate_greedy_logprobs( + example_prompts, max_tokens, num_logprobs) + + with vllm_runner(model, tensor_parallel_size=2, + max_num_seqs=2) as vllm_model: + vllm_outputs_tp_2 = vllm_model.generate_greedy_logprobs( + example_prompts, max_tokens, num_logprobs) + + check_logprobs_close( + outputs_0_lst=vllm_outputs_tp_1, + outputs_1_lst=vllm_outputs_tp_2, + name_0="vllm_tp_1", + name_1="vllm_tp_2", + ) diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/language/generation/test_mistral.py similarity index 86% rename from tests/models/decoder_only/language/test_mistral.py rename to tests/models/language/generation/test_mistral.py index ec885386dd9..c1b612ae213 100644 --- a/tests/models/decoder_only/language/test_mistral.py +++ b/tests/models/language/generation/test_mistral.py @@ -1,8 +1,4 @@ # SPDX-License-Identifier: Apache-2.0 -"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling. - -Run `pytest tests/models/test_mistral.py`. -""" import copy import json @@ -10,8 +6,8 @@ import jsonschema.exceptions import pytest -from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import ( # noqa - MistralToolParser) +from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import ( + MistralToolCall, MistralToolParser) from vllm.sampling_params import GuidedDecodingParams, SamplingParams from ...utils import check_logprobs_close @@ -194,7 +190,6 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str, ) -@pytest.mark.skip("RE-ENABLE: test is currently failing on main.") @pytest.mark.parametrize("model", MISTRAL_FORMAT_MODELS) @pytest.mark.parametrize("dtype", ["bfloat16"]) @pytest.mark.parametrize("max_tokens", [64]) @@ -246,10 +241,8 @@ def test_mistral_symbolic_languages(vllm_runner, model: str, assert "�" not in outputs[0].outputs[0].text.strip() -@pytest.mark.skip("RE-ENABLE: test is currently failing on main.") +@pytest.mark.parametrize("model", MISTRAL_FORMAT_MODELS) @pytest.mark.parametrize("dtype", ["bfloat16"]) -@pytest.mark.parametrize("model", - MISTRAL_FORMAT_MODELS) # v1 can't do func calling def test_mistral_function_calling(vllm_runner, model: str, dtype: str) -> None: with vllm_runner(model, dtype=dtype, @@ -270,7 +263,8 @@ def test_mistral_function_calling(vllm_runner, model: str, dtype: str) -> None: parsed_message = tool_parser.extract_tool_calls(model_output, None) assert parsed_message.tools_called - assert parsed_message.tool_calls[0].id == "0UAqFzWsD" + + assert MistralToolCall.is_valid_id(parsed_message.tool_calls[0].id) assert parsed_message.tool_calls[ 0].function.name == "get_current_weather" assert parsed_message.tool_calls[ @@ -281,28 +275,38 @@ def test_mistral_function_calling(vllm_runner, model: str, dtype: str) -> None: @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("guided_backend", ["outlines", "lm-format-enforcer", "xgrammar"]) -def test_mistral_guided_decoding(vllm_runner, model: str, - guided_backend: str) -> None: - with vllm_runner(model, dtype='bfloat16', - tokenizer_mode="mistral") as vllm_model: +def test_mistral_guided_decoding( + monkeypatch: pytest.MonkeyPatch, + vllm_runner, + model: str, + guided_backend: str, +) -> None: + with monkeypatch.context() as m: + # Guided JSON not supported in xgrammar + V1 yet + m.setenv("VLLM_USE_V1", "0") - guided_decoding = GuidedDecodingParams(json=SAMPLE_JSON_SCHEMA, - backend=guided_backend) - params = SamplingParams(max_tokens=512, - temperature=0.7, - guided_decoding=guided_decoding) + with vllm_runner( + model, + dtype='bfloat16', + tokenizer_mode="mistral", + guided_decoding_backend=guided_backend, + ) as vllm_model: + guided_decoding = GuidedDecodingParams(json=SAMPLE_JSON_SCHEMA) + params = SamplingParams(max_tokens=512, + temperature=0.7, + guided_decoding=guided_decoding) - messages = [{ - "role": "system", - "content": "you are a helpful assistant" - }, { - "role": - "user", - "content": - f"Give an example JSON for an employee profile that " - f"fits this schema: {SAMPLE_JSON_SCHEMA}" - }] - outputs = vllm_model.model.chat(messages, sampling_params=params) + messages = [{ + "role": "system", + "content": "you are a helpful assistant" + }, { + "role": + "user", + "content": + f"Give an example JSON for an employee profile that " + f"fits this schema: {SAMPLE_JSON_SCHEMA}" + }] + outputs = vllm_model.model.chat(messages, sampling_params=params) generated_text = outputs[0].outputs[0].text json_response = json.loads(generated_text) diff --git a/tests/models/decoder_only/language/test_phimoe.py b/tests/models/language/generation/test_phimoe.py similarity index 96% rename from tests/models/decoder_only/language/test_phimoe.py rename to tests/models/language/generation/test_phimoe.py index f9757d6ac29..603ca1cb12a 100644 --- a/tests/models/decoder_only/language/test_phimoe.py +++ b/tests/models/language/generation/test_phimoe.py @@ -1,8 +1,4 @@ # SPDX-License-Identifier: Apache-2.0 -"""Compare the outputs of HF and vLLM for moe models using greedy sampling. - -Run `pytest tests/models/test_phimoe.py`. -""" import pytest import torch diff --git a/tests/models/decoder_only/language/__init__.py b/tests/models/language/pooling/__init__.py similarity index 100% rename from tests/models/decoder_only/language/__init__.py rename to tests/models/language/pooling/__init__.py diff --git a/tests/models/embedding/language/test_cls_models.py b/tests/models/language/pooling/test_classification.py similarity index 91% rename from tests/models/embedding/language/test_cls_models.py rename to tests/models/language/pooling/test_classification.py index 6a3cd8a5c59..44af3df08a8 100644 --- a/tests/models/embedding/language/test_cls_models.py +++ b/tests/models/language/pooling/test_classification.py @@ -1,8 +1,4 @@ # SPDX-License-Identifier: Apache-2.0 -"""Compare the classification outputs of HF and vLLM models. - -Run `pytest tests/models/test_cls_models.py`. -""" import pytest import torch from transformers import AutoModelForSequenceClassification @@ -19,7 +15,7 @@ ) @pytest.mark.parametrize("dtype", ["half"] if current_platform.is_rocm() else ["float"]) -def test_classification_models( +def test_models( hf_runner, vllm_runner, example_prompts, diff --git a/tests/models/embedding/language/test_embedding.py b/tests/models/language/pooling/test_embedding.py similarity index 94% rename from tests/models/embedding/language/test_embedding.py rename to tests/models/language/pooling/test_embedding.py index 5deb35fa321..9db385e77bd 100644 --- a/tests/models/embedding/language/test_embedding.py +++ b/tests/models/language/pooling/test_embedding.py @@ -1,14 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 -"""Compare the embedding outputs of HF and vLLM models. - -Run `pytest tests/models/embedding/language/test_embedding.py`. -""" import pytest from vllm.config import PoolerConfig from vllm.platforms import current_platform -from ..utils import check_embeddings_close +from ...utils import check_embeddings_close @pytest.mark.parametrize( diff --git a/tests/models/embedding/language/test_gritlm.py b/tests/models/language/pooling/test_gritlm.py similarity index 64% rename from tests/models/embedding/language/test_gritlm.py rename to tests/models/language/pooling/test_gritlm.py index 87a1dde9381..3ad6e719094 100644 --- a/tests/models/embedding/language/test_gritlm.py +++ b/tests/models/language/pooling/test_gritlm.py @@ -7,11 +7,10 @@ import openai import pytest -import pytest_asyncio from scipy.spatial.distance import cosine -import vllm -import vllm.config +from vllm import LLM, SamplingParams +from vllm.config import ModelConfig from vllm.utils import STR_BACKEND_ENV_VAR from ....utils import RemoteOpenAIServer @@ -31,73 +30,45 @@ def _arr(arr): return array("i", arr) -def test_find_array(monkeypatch: pytest.MonkeyPatch): - # GritLM embedding implementation is only supported by XFormers backend. - with monkeypatch.context() as m: - m.setenv(STR_BACKEND_ENV_VAR, "XFORMERS") - - from vllm.model_executor.models.gritlm import GritLMPooler - - # Create an LLM object to get the model config. - llm = vllm.LLM(MODEL_NAME, task="embed", max_model_len=MAX_MODEL_LEN) - pooler = GritLMPooler(model_config=llm.llm_engine.model_config) - - arr = _arr([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - - assert pooler._find_array(arr, _arr([3, 4, 5]), start_idx=0) == 3 - assert pooler._find_array(arr, _arr([3, 4, 5]), start_idx=1) == 3 - assert pooler._find_array(arr, _arr([3, 4, 5]), start_idx=5) == -1 - assert pooler._find_array(arr, _arr([3, 5]), start_idx=0) == -1 - - with pytest.raises(ValueError): - pooler._find_array(arr, _arr([3, 4, 5]), start_idx=-1) - - -@pytest.fixture(scope="module") -def server_embedding(): - # GritLM embedding implementation is only supported by XFormers backend. - args = ["--task", "embed", "--max_model_len", str(MAX_MODEL_LEN)] - with pytest.MonkeyPatch.context() as m: - m.setenv(STR_BACKEND_ENV_VAR, "XFORMERS") - with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: - yield remote_server - - -@pytest.fixture(scope="module") -def server_generate(): - args = ["--task", "generate", "--max_model_len", str(MAX_MODEL_LEN)] - with pytest.MonkeyPatch.context() as m: - m.setenv(STR_BACKEND_ENV_VAR, "XFORMERS") - with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: - yield remote_server +def test_find_array(): + from vllm.model_executor.models.gritlm import GritLMPooler + model_config = ModelConfig( + MODEL_NAME, + task="embed", + tokenizer=MODEL_NAME, + tokenizer_mode="auto", + trust_remote_code=False, + dtype="bfloat16", + seed=0, + ) + pooler = GritLMPooler(model_config=model_config) -@pytest_asyncio.fixture -async def client_embedding(server_embedding: RemoteOpenAIServer): - async with server_embedding.get_async_client() as async_client: - yield async_client + arr = _arr([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + assert pooler._find_array(arr, _arr([3, 4, 5]), start_idx=0) == 3 + assert pooler._find_array(arr, _arr([3, 4, 5]), start_idx=1) == 3 + assert pooler._find_array(arr, _arr([3, 4, 5]), start_idx=5) == -1 + assert pooler._find_array(arr, _arr([3, 5]), start_idx=0) == -1 -@pytest_asyncio.fixture -async def client_generate(server_generate: RemoteOpenAIServer): - async with server_generate.get_async_client() as async_client: - yield async_client + with pytest.raises(ValueError): + pooler._find_array(arr, _arr([3, 4, 5]), start_idx=-1) def run_llm_encode( - llm: vllm.LLM, + llm: LLM, queries: list[str], instruction: str, -) -> list[float]: - outputs = llm.encode([instruction + q for q in queries], ) +) -> list[list[float]]: + outputs = llm.embed([instruction + q for q in queries]) return [output.outputs.embedding for output in outputs] async def run_client_embeddings( - client: vllm.LLM, + client: openai.AsyncOpenAI, queries: list[str], instruction: str, -) -> list[float]: +) -> list[list[float]]: outputs = await client.embeddings.create( model=MODEL_NAME, input=[instruction + q for q in queries], @@ -132,7 +103,7 @@ def get_test_data(): return queries, q_instruction, documents, d_instruction -def validate_embed_output(q_rep: list[float], d_rep: list[float]): +def validate_embed_output(q_rep: list[list[float]], d_rep: list[list[float]]): cosine_sim_q0_d0 = 1 - cosine(q_rep[0], d_rep[0]) assert math.isclose(cosine_sim_q0_d0, 0.609, abs_tol=0.001) @@ -143,70 +114,100 @@ def validate_embed_output(q_rep: list[float], d_rep: list[float]): assert math.isclose(cosine_sim_q1_d0, 0.120, abs_tol=0.001) cosine_sim_q1_d1 = 1 - cosine(q_rep[1], d_rep[1]) - assert math.isclose(cosine_sim_q1_d1, 0.532, abs_tol=0.001) + assert math.isclose(cosine_sim_q1_d1, 0.534, abs_tol=0.001) -def test_gritlm_offline_embedding(monkeypatch: pytest.MonkeyPatch): +def test_gritlm_offline_embedding(monkeypatch: pytest.MonkeyPatch, + vllm_runner): # GritLM embedding implementation is only supported by XFormers backend. with monkeypatch.context() as m: m.setenv(STR_BACKEND_ENV_VAR, "XFORMERS") queries, q_instruction, documents, d_instruction = get_test_data() - llm = vllm.LLM(MODEL_NAME, task="embed", max_model_len=MAX_MODEL_LEN) + with vllm_runner( + MODEL_NAME, + task="embed", + max_model_len=MAX_MODEL_LEN, + ) as vllm_model: + llm = vllm_model.model + + d_rep = run_llm_encode( + llm, + documents, + d_instruction, + ) + q_rep = run_llm_encode( + llm, + queries, + q_instruction, + ) + + validate_embed_output(q_rep, d_rep) + + +@pytest.mark.asyncio +async def test_gritlm_api_server_embedding(): + queries, q_instruction, documents, d_instruction = get_test_data() + + # GritLM embedding implementation is only supported by XFormers backend. + args = ["--task", "embed", "--max_model_len", str(MAX_MODEL_LEN)] + env_dict = {STR_BACKEND_ENV_VAR: "XFORMERS"} + + with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_dict) as server: + client_embedding = server.get_async_client() - d_rep = run_llm_encode( - llm, + d_rep = await run_client_embeddings( + client_embedding, documents, d_instruction, ) - q_rep = run_llm_encode( - llm, + q_rep = await run_client_embeddings( + client_embedding, queries, q_instruction, ) - validate_embed_output(q_rep, d_rep) - - -@pytest.mark.asyncio -async def test_gritlm_api_server_embedding( - client_embedding: openai.AsyncOpenAI, ): - queries, q_instruction, documents, d_instruction = get_test_data() + validate_embed_output(q_rep, d_rep) - d_rep = await run_client_embeddings( - client_embedding, - documents, - d_instruction, - ) - q_rep = await run_client_embeddings( - client_embedding, - queries, - q_instruction, - ) - validate_embed_output(q_rep, d_rep) +def test_gritlm_offline_generate(monkeypatch: pytest.MonkeyPatch, vllm_runner): + # GritLM embedding implementation is only supported by XFormers backend. + with monkeypatch.context() as m: + m.setenv("VLLM_USE_V1", "0") + m.setenv(STR_BACKEND_ENV_VAR, "XFORMERS") + input = "<|user|>\nWhat is the capital of France?\n<|assistant|>\n" -def test_gritlm_offline_gen(): - input = "<|user|>\nWhat is the capital of France?\n<|assistant|>\n" + with vllm_runner( + MODEL_NAME, + task="generate", + max_model_len=MAX_MODEL_LEN, + ) as vllm_model: + llm = vllm_model.model - llm = vllm.LLM(MODEL_NAME, max_model_len=MAX_MODEL_LEN) - sampling_params = vllm.SamplingParams(temperature=0.0, max_tokens=256) - outputs = llm.generate(input, sampling_params=sampling_params) + sampling_params = SamplingParams(temperature=0.0, max_tokens=256) + outputs = llm.generate(input, sampling_params=sampling_params) - assert outputs[0].outputs[0].text == "The capital of France is Paris." + assert outputs[0].outputs[0].text == "The capital of France is Paris." @pytest.mark.asyncio -async def test_gritlm_api_server_gen(client_generate: openai.AsyncOpenAI): +async def test_gritlm_api_server_generate(): input = "<|user|>\nWhat is the capital of France?\n<|assistant|>\n" - outputs = await client_generate.completions.create( - model=MODEL_NAME, - prompt=input, - max_tokens=256, - temperature=0.0, - ) + # GritLM embedding implementation is only supported by XFormers backend. + args = ["--task", "generate", "--max_model_len", str(MAX_MODEL_LEN)] + env_dict = {"VLLM_USE_V1": "0", STR_BACKEND_ENV_VAR: "XFORMERS"} + + with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_dict) as server: + client_generate = server.get_async_client() + + outputs = await client_generate.completions.create( + model=MODEL_NAME, + prompt=input, + max_tokens=256, + temperature=0.0, + ) assert outputs.choices[0].text == "The capital of France is Paris." diff --git a/tests/models/embedding/language/test_jina.py b/tests/models/language/pooling/test_jina.py similarity index 82% rename from tests/models/embedding/language/test_jina.py rename to tests/models/language/pooling/test_jina.py index 881d0a75b15..5287ca37c0f 100644 --- a/tests/models/embedding/language/test_jina.py +++ b/tests/models/language/pooling/test_jina.py @@ -1,16 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 -# ruff: noqa: E501 -"""Compare the scoring outputs of HF and vLLM models. - -Run `pytest tests/models/embedding/language/test_jina.py`. -""" import math import pytest -from tests.models.embedding.utils import check_embeddings_close, matryoshka_fy from vllm import PoolingParams +from ...utils import check_embeddings_close, matryoshka_fy + SCORING_MODELS = [ "jinaai/jina-reranker-v2-base-multilingual", # Roberta ] @@ -21,9 +17,9 @@ "Organic skincare for sensitive skin with aloe vera and chamomile.", "New makeup trends focus on bold colors and innovative techniques", "Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille", - "Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken", - "Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla", - "Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras", + "Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken", # noqa: E501 + "Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla", # noqa: E501 + "Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras", # noqa: E501 "针对敏感肌专门设计的天然有机护肤产品", "新的化妆趋势注重鲜艳的颜色和创新的技巧", "敏感肌のために特別に設計された天然有機スキンケア製品", @@ -153,14 +149,24 @@ def test_matryoshka( with vllm_runner(model, task="embed", dtype=dtype, max_model_len=None) as vllm_model: - vllm_outputs = vllm_model.encode( - example_prompts, - pooling_params=PoolingParams(dimensions=dimensions)) - - check_embeddings_close( - embeddings_0_lst=hf_outputs, - embeddings_1_lst=vllm_outputs, - name_0="hf", - name_1="vllm", - tol=1e-2, - ) + matryoshka_dimensions = ( + vllm_model.model.llm_engine.model_config.matryoshka_dimensions) + assert matryoshka_dimensions is not None + + if dimensions not in matryoshka_dimensions: + with pytest.raises(ValueError): + vllm_model.encode( + example_prompts, + pooling_params=PoolingParams(dimensions=dimensions)) + else: + vllm_outputs = vllm_model.encode( + example_prompts, + pooling_params=PoolingParams(dimensions=dimensions)) + + check_embeddings_close( + embeddings_0_lst=hf_outputs, + embeddings_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + tol=1e-2, + ) diff --git a/tests/models/embedding/language/test_scoring.py b/tests/models/language/pooling/test_scoring.py similarity index 72% rename from tests/models/embedding/language/test_scoring.py rename to tests/models/language/pooling/test_scoring.py index d6408258ffc..e9527700c3c 100644 --- a/tests/models/embedding/language/test_scoring.py +++ b/tests/models/language/pooling/test_scoring.py @@ -1,15 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 -"""Compare the scoring outputs of HF and vLLM models. - -Run `pytest tests/models/embedding/language/test_scoring.py`. -""" import math import pytest import torch import torch.nn.functional as F -MODELS = [ +CROSS_ENCODER_MODELS = [ "cross-encoder/ms-marco-MiniLM-L-6-v2", # Bert "BAAI/bge-reranker-v2-m3", # Roberta ] @@ -28,21 +24,21 @@ "The capital of Germany is Berlin.", ] +DTYPE = "half" + -@pytest.fixture(scope="module", params=MODELS) +@pytest.fixture(scope="module", params=CROSS_ENCODER_MODELS) def model_name(request): yield request.param -@pytest.mark.parametrize("dtype", ["half"]) -def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str): - +def test_cross_encoder_1_to_1(vllm_runner, hf_runner, model_name): text_pair = [TEXTS_1[0], TEXTS_2[0]] - with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model: + with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model: hf_outputs = hf_model.predict([text_pair]).tolist() - with vllm_runner(model_name, task="score", dtype=dtype, + with vllm_runner(model_name, task="score", dtype=DTYPE, max_model_len=None) as vllm_model: vllm_outputs = vllm_model.score(text_pair[0], text_pair[1]) @@ -52,18 +48,16 @@ def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str): assert math.isclose(hf_outputs[0], vllm_outputs[0], rel_tol=0.01) -@pytest.mark.parametrize("dtype", ["half"]) -def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str): - +def test_cross_encoder_1_to_N(vllm_runner, hf_runner, model_name): text_pairs = [ [TEXTS_1[0], TEXTS_2[0]], [TEXTS_1[0], TEXTS_2[1]], ] - with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model: + with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model: hf_outputs = hf_model.predict(text_pairs).tolist() - with vllm_runner(model_name, task="score", dtype=dtype, + with vllm_runner(model_name, task="score", dtype=DTYPE, max_model_len=None) as vllm_model: vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2) @@ -74,18 +68,16 @@ def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str): assert math.isclose(hf_outputs[1], vllm_outputs[1], rel_tol=0.01) -@pytest.mark.parametrize("dtype", ["half"]) -def test_llm_N_to_N(vllm_runner, hf_runner, model_name, dtype: str): - +def test_cross_encoder_N_to_N(vllm_runner, hf_runner, model_name): text_pairs = [ [TEXTS_1[0], TEXTS_2[0]], [TEXTS_1[1], TEXTS_2[1]], ] - with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model: + with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model: hf_outputs = hf_model.predict(text_pairs).tolist() - with vllm_runner(model_name, task="score", dtype=dtype, + with vllm_runner(model_name, task="score", dtype=DTYPE, max_model_len=None) as vllm_model: vllm_outputs = vllm_model.score(TEXTS_1, TEXTS_2) @@ -101,13 +93,10 @@ def emb_model_name(request): yield request.param -@pytest.mark.parametrize("dtype", ["half"]) -def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name, - dtype: str): - +def test_embedding_1_to_1(vllm_runner, hf_runner, emb_model_name): text_pair = [TEXTS_1[0], TEXTS_2[0]] - with hf_runner(emb_model_name, dtype=dtype, + with hf_runner(emb_model_name, dtype=DTYPE, is_sentence_transformer=True) as hf_model: hf_embeddings = hf_model.encode(text_pair) hf_outputs = [ @@ -116,7 +105,7 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name, with vllm_runner(emb_model_name, task="embed", - dtype=dtype, + dtype=DTYPE, max_model_len=None) as vllm_model: vllm_outputs = vllm_model.score(text_pair[0], text_pair[1]) @@ -126,16 +115,13 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name, assert math.isclose(hf_outputs[0], vllm_outputs[0], rel_tol=0.01) -@pytest.mark.parametrize("dtype", ["half"]) -def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name, - dtype: str): - +def test_embedding_1_to_N(vllm_runner, hf_runner, emb_model_name): text_pairs = [ [TEXTS_1[0], TEXTS_2[0]], [TEXTS_1[0], TEXTS_2[1]], ] - with hf_runner(emb_model_name, dtype=dtype, + with hf_runner(emb_model_name, dtype=DTYPE, is_sentence_transformer=True) as hf_model: hf_embeddings = [ hf_model.encode(text_pair) for text_pair in text_pairs @@ -147,7 +133,7 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name, with vllm_runner(emb_model_name, task="embed", - dtype=dtype, + dtype=DTYPE, max_model_len=None) as vllm_model: vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2) @@ -158,16 +144,13 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name, assert math.isclose(hf_outputs[1], vllm_outputs[1], rel_tol=0.01) -@pytest.mark.parametrize("dtype", ["half"]) -def test_llm_N_to_N_embedding(vllm_runner, hf_runner, emb_model_name, - dtype: str): - +def test_embedding_N_to_N(vllm_runner, hf_runner, emb_model_name): text_pairs = [ [TEXTS_1[0], TEXTS_2[0]], [TEXTS_1[1], TEXTS_2[1]], ] - with hf_runner(emb_model_name, dtype=dtype, + with hf_runner(emb_model_name, dtype=DTYPE, is_sentence_transformer=True) as hf_model: hf_embeddings = [ hf_model.encode(text_pair) for text_pair in text_pairs @@ -179,7 +162,7 @@ def test_llm_N_to_N_embedding(vllm_runner, hf_runner, emb_model_name, with vllm_runner(emb_model_name, task="embed", - dtype=dtype, + dtype=DTYPE, max_model_len=None) as vllm_model: vllm_outputs = vllm_model.score(TEXTS_1, TEXTS_2) diff --git a/tests/models/language/pooling/test_snowflake_arctic_embed.py b/tests/models/language/pooling/test_snowflake_arctic_embed.py new file mode 100644 index 00000000000..c050b35b76b --- /dev/null +++ b/tests/models/language/pooling/test_snowflake_arctic_embed.py @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: Apache-2.0 +import pytest + +from ...utils import EmbedModelInfo, check_embeddings_close + +EMBEDDING_PROMPTS = [ + 'what is snowflake?', 'Where can I get the best tacos?', 'The Data Cloud!', + 'Mexico City of Course!' +] + +MODELS = [ + EmbedModelInfo("Snowflake/snowflake-arctic-embed-xs", + is_matryoshka=False, + architecture="BertModel", + enable_test=True), + EmbedModelInfo("Snowflake/snowflake-arctic-embed-s", + is_matryoshka=False, + architecture="BertModel", + enable_test=False), + EmbedModelInfo("Snowflake/snowflake-arctic-embed-m", + is_matryoshka=False, + architecture="BertModel", + enable_test=False), + EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-long", + is_matryoshka=False, + architecture="NomicBertModel", + enable_test=True), + EmbedModelInfo("Snowflake/snowflake-arctic-embed-l", + is_matryoshka=False, + architecture="BertModel", + enable_test=False), + EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v1.5", + is_matryoshka=True, + architecture="BertModel", + enable_test=True), + EmbedModelInfo("Snowflake/snowflake-arctic-embed-l-v2.0", + is_matryoshka=True, + architecture="XLMRobertaModel", + enable_test=True), + EmbedModelInfo("Snowflake/snowflake-arctic-embed-m-v2.0", + is_matryoshka=True, + architecture="GteModel", + enable_test=True), +] + + +@pytest.mark.parametrize("model_info", MODELS) +@pytest.mark.parametrize("dtype", ["half"]) +def test_models( + hf_runner, + vllm_runner, + example_prompts, + model_info: EmbedModelInfo, + dtype: str, + monkeypatch, +) -> None: + if not model_info.enable_test: + # A model family has many models with the same architecture, + # and we don't need to test each one. + pytest.skip("Skipping test.") + + example_prompts = example_prompts + EMBEDDING_PROMPTS + + vllm_extra_kwargs = { + "hf_overrides": { + "is_matryoshka": model_info.is_matryoshka + } + } + + with hf_runner(model_info.name, dtype=dtype, + is_sentence_transformer=True) as hf_model: + hf_outputs = hf_model.encode(example_prompts) + + with vllm_runner(model_info.name, + task="embed", + dtype=dtype, + max_model_len=None, + **vllm_extra_kwargs) as vllm_model: + + assert (vllm_model.model.llm_engine.model_config.is_matryoshka == + model_info.is_matryoshka) + + if model_info.architecture: + assert (model_info.architecture + in vllm_model.model.llm_engine.model_config.architectures) + + vllm_outputs = vllm_model.encode(example_prompts) + + check_embeddings_close( + embeddings_0_lst=hf_outputs, + embeddings_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + tol=1e-2, + ) diff --git a/tests/models/language/pooling/test_truncation_control.py b/tests/models/language/pooling/test_truncation_control.py new file mode 100644 index 00000000000..1b8ac395ed1 --- /dev/null +++ b/tests/models/language/pooling/test_truncation_control.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: Apache-2.0 +import pytest + +MODEL_NAME = "sentence-transformers/all-MiniLM-L12-v2" +max_model_len = 128 + +input_str = """Immerse yourself in the enchanting chronicle of calculus, a +mathematical domain that has radically transformed our comprehension of +change and motion. Despite its roots in ancient civilizations, the +formal birth of calculus predominantly occurred in the 17th century, +primarily under the influential guidance of Sir Isaac Newton and Gottfried +Wilhelm Leibniz. The earliest traces of calculus concepts are found in +ancient Greek mathematics,most notably in the works of Eudoxus and +Archimedes, around 300 BCE. They utilized the 'method of exhaustion'—a +technique for computing areas and volumes through the use of finite sums. +This methodology laid crucial foundational work for integral calculus. +In the 17th century, both Newton and Leibniz independently pioneered +calculus, each contributing unique perspectives that would shape this new +field.""" + + +def test_smaller_truncation_size(vllm_runner, + model_name=MODEL_NAME, + input_str=input_str): + + truncate_prompt_tokens = 10 + + with vllm_runner(model_name, task="embed", + max_model_len=max_model_len) as vllm_model: + vllm_output = vllm_model.model.encode( + input_str, truncate_prompt_tokens=truncate_prompt_tokens) + + prompt_tokens = vllm_output[0].prompt_token_ids + + assert len(prompt_tokens) == truncate_prompt_tokens + + +def test_max_truncation_size(vllm_runner, + model_name=MODEL_NAME, + input_str=input_str): + truncate_prompt_tokens = -1 + + with vllm_runner(model_name, task="embed", + max_model_len=max_model_len) as vllm_model: + vllm_output = vllm_model.model.encode( + input_str, truncate_prompt_tokens=truncate_prompt_tokens) + + prompt_tokens = vllm_output[0].prompt_token_ids + + assert len(prompt_tokens) == max_model_len + + +def test_bigger_truncation_size(vllm_runner, + model_name=MODEL_NAME, + input_str=input_str): + + truncate_prompt_tokens = max_model_len + 1 + + with pytest.raises(ValueError), vllm_runner( + model_name, task="embed", + max_model_len=max_model_len) as vllm_model: + + llm_output = vllm_model.model.encode( + input_str, truncate_prompt_tokens=truncate_prompt_tokens) + + assert llm_output == f"""truncate_prompt_tokens value + ({truncate_prompt_tokens}) is greater than + max_model_len ({max_model_len}). Please, select + a smaller truncation size.""" diff --git a/tests/models/decoder_only/vision_language/__init__.py b/tests/models/multimodal/generation/__init__.py similarity index 100% rename from tests/models/decoder_only/vision_language/__init__.py rename to tests/models/multimodal/generation/__init__.py diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/multimodal/generation/test_common.py similarity index 91% rename from tests/models/decoder_only/vision_language/test_models.py rename to tests/models/multimodal/generation/test_common.py index 5c87cefcd8e..6e915a9f600 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/multimodal/generation/test_common.py @@ -8,13 +8,14 @@ from pathlib import PosixPath import pytest -from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq +from transformers import (AutoModelForImageTextToText, + AutoModelForTextToWaveform, AutoModelForVision2Seq) from vllm.platforms import current_platform from vllm.utils import identity -from ....conftest import (IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets, - _VideoAssets) +from ....conftest import (IMAGE_ASSETS, HfRunner, ImageTestAssets, + VideoTestAssets, VllmRunner) from ....utils import (create_new_process_for_each_test, large_gpu_mark, multi_gpu_marks) from ...utils import check_outputs_equal @@ -139,6 +140,24 @@ image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), + "qwen2_5_omni": VLMTestInfo( + models=["Qwen/Qwen2.5-Omni-3B"], + test_type=( + VLMTestType.IMAGE, + VLMTestType.MULTI_IMAGE, + VLMTestType.VIDEO + ), + prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 + img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>", # noqa: E501 + video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", # noqa: E501 + max_model_len=4096, + max_num_seqs=2, + auto_cls=AutoModelForTextToWaveform, + vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output, + patch_hf_runner=model_utils.qwen2_5_omni_patch_hf_runner, + image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], + marks=[pytest.mark.core_model, pytest.mark.cpu_model], + ), #### Extended model tests "aria": VLMTestInfo( models=["rhymes-ai/Aria"], @@ -250,6 +269,7 @@ multi_image_prompt="Describe the two images in detail.", # noqa: E501 max_model_len=4096, max_num_seqs=2, + dtype="bfloat16", auto_cls=AutoModelForImageTextToText, vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}}, patch_hf_runner=model_utils.gemma3_patch_hf_runner, @@ -406,6 +426,8 @@ get_stop_token_ids=lambda tok: [tok.eos_id, tok.eot_id], hf_output_post_proc=model_utils.minicpmv_trunc_hf_output, patch_hf_runner=model_utils.minicpmv_25_patch_hf_runner, + # FIXME: https://huggingface.co/openbmb/MiniCPM-V-2_6/discussions/55 + marks=[pytest.mark.skip("HF import fails")], ), "minicpmo_26": VLMTestInfo( models=["openbmb/MiniCPM-o-2_6"], @@ -417,6 +439,8 @@ get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501 hf_output_post_proc=model_utils.minicpmv_trunc_hf_output, patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner, + # FIXME: https://huggingface.co/openbmb/MiniCPM-V-2_6/discussions/55 + marks=[pytest.mark.skip("HF import fails")], ), "minicpmv_26": VLMTestInfo( models=["openbmb/MiniCPM-V-2_6"], @@ -428,6 +452,21 @@ get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501 hf_output_post_proc=model_utils.minicpmv_trunc_hf_output, patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner, + # FIXME: https://huggingface.co/openbmb/MiniCPM-V-2_6/discussions/55 + marks=[pytest.mark.skip("HF import fails")], + ), + "minimax_vl_01": VLMTestInfo( + models=["MiniMaxAI/MiniMax-VL-01"], + prompt_formatter=lambda img_prompt: f"user: {img_prompt} assistant:", # noqa: E501 + img_idx_to_prompt=lambda _: "", + test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), + max_model_len=8192, + max_num_seqs=4, + dtype="bfloat16", + hf_output_post_proc=model_utils.minimax_vl_01_hf_output, + patch_hf_runner=model_utils.minimax_vl_01_patch_hf_runner, + auto_cls=AutoModelForImageTextToText, + marks=[large_gpu_mark(min_gb=80)], ), "molmo": VLMTestInfo( models=["allenai/Molmo-7B-D-0924"], @@ -437,6 +476,18 @@ max_num_seqs=2, patch_hf_runner=model_utils.molmo_patch_hf_runner, ), + "ovis2": VLMTestInfo( + models=["AIDC-AI/Ovis2-1B"], + test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), + prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 + img_idx_to_prompt=lambda idx: "\n", # noqa: E501 + max_model_len=4096, + max_num_seqs=2, + dtype="half", + # use sdpa mode for hf runner since ovis2 didn't work with flash_attn + hf_model_kwargs={"llm_attn_implementation": "sdpa"}, + patch_hf_runner=model_utils.ovis2_patch_hf_runner, + ), "phi3v": VLMTestInfo( models=["microsoft/Phi-3.5-vision-instruct"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), @@ -642,7 +693,7 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str, test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - image_assets: _ImageAssets, monkeypatch): + image_assets: ImageTestAssets, monkeypatch): if model_type in REQUIRES_V0_MODELS: monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] @@ -667,7 +718,7 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str, test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - image_assets: _ImageAssets, monkeypatch): + image_assets: ImageTestAssets, monkeypatch): if model_type in REQUIRES_V0_MODELS: monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] @@ -692,7 +743,7 @@ def test_image_embedding_models(model_type: str, test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - image_assets: _ImageAssets, monkeypatch): + image_assets: ImageTestAssets, monkeypatch): if model_type in REQUIRES_V0_MODELS: monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] @@ -714,7 +765,7 @@ def test_image_embedding_models(model_type: str, )) def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - video_assets: _VideoAssets, monkeypatch): + video_assets: VideoTestAssets, monkeypatch): if model_type in REQUIRES_V0_MODELS: monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] @@ -765,7 +816,7 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str, test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - image_assets: _ImageAssets, monkeypatch): + image_assets: ImageTestAssets, monkeypatch): if model_type in REQUIRES_V0_MODELS: monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] @@ -791,7 +842,7 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str, test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - image_assets: _ImageAssets, monkeypatch): + image_assets: ImageTestAssets, monkeypatch): if model_type in REQUIRES_V0_MODELS: monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] @@ -817,7 +868,8 @@ def test_image_embedding_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - image_assets: _ImageAssets, monkeypatch): + image_assets: ImageTestAssets, + monkeypatch): if model_type in REQUIRES_V0_MODELS: monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] @@ -840,7 +892,7 @@ def test_image_embedding_models_heavy(model_type: str, def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - video_assets: _VideoAssets, monkeypatch): + video_assets: VideoTestAssets, monkeypatch): if model_type in REQUIRES_V0_MODELS: monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] diff --git a/tests/models/encoder_decoder/vision_language/test_florence2.py b/tests/models/multimodal/generation/test_florence2.py similarity index 87% rename from tests/models/encoder_decoder/vision_language/test_florence2.py rename to tests/models/multimodal/generation/test_florence2.py index a6ec333e2e9..b8225f5f124 100644 --- a/tests/models/encoder_decoder/vision_language/test_florence2.py +++ b/tests/models/multimodal/generation/test_florence2.py @@ -9,16 +9,16 @@ from vllm.multimodal.image import rescale_image_size from vllm.sequence import SampleLogprobs -from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets +from ....conftest import IMAGE_ASSETS, HfRunner, ImageTestAssets, VllmRunner from ...utils import check_logprobs_close MODELS = ["microsoft/Florence-2-base"] -# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer -# Therefore, we borrow the BartTokenizer from the original Bart model -TOKENIZER = "facebook/bart-base" +# Florence-2 model repo's tokenizer config is missing some special tokens. +# Therefore, we use a converted tokenizer from a forked repo +TOKENIZER = "Isotr0py/Florence-2-tokenizer" HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ "stop_sign": - "", # special task token + "", # special task token which will output special tokens "cherry_blossom": "Describe in detail what is shown in the image.", }) @@ -45,7 +45,6 @@ def hf_to_vllm_output(hf_output: tuple[list[int], str, output_ids, output_str, out_logprobs = hf_output output_str = output_str.replace("", "").replace("", "") - output_ids = [ids for ids in output_ids if ids not in [0, 2]] return output_ids, output_str, out_logprobs @@ -71,8 +70,11 @@ def run_test( enforce_eager=True) as vllm_model: vllm_outputs_per_case = [ vllm_model.generate_encoder_decoder_greedy_logprobs( - prompts, max_tokens, num_logprobs=num_logprobs) - for prompts in inputs + prompts, + max_tokens, + num_logprobs=num_logprobs, + skip_special_tokens=False, + ) for prompts in inputs ] hf_inputs = [get_hf_images_prompts(prompts) for prompts in inputs] @@ -93,6 +95,7 @@ def run_test( outputs_1_lst=vllm_outputs, name_0="hf", name_1="vllm", + num_outputs_0_skip_tokens=1, ) @@ -115,7 +118,7 @@ def run_test( @pytest.mark.parametrize("max_tokens", [64]) @pytest.mark.parametrize("num_logprobs", [5]) def test_models(hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - image_assets: _ImageAssets, model: str, + image_assets: ImageTestAssets, model: str, size_factors: list[int], dtype: str, max_tokens: int, num_logprobs: int) -> None: images = [asset.pil_image for asset in image_assets] diff --git a/tests/models/multimodal/generation/test_granite_speech.py b/tests/models/multimodal/generation/test_granite_speech.py new file mode 100644 index 00000000000..96c444441e3 --- /dev/null +++ b/tests/models/multimodal/generation/test_granite_speech.py @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: Apache-2.0 + +from collections.abc import Sequence +from typing import Optional + +import pytest +from transformers import AutoModelForSpeechSeq2Seq + +from vllm.lora.request import LoRARequest +from vllm.sequence import SampleLogprobs + +from ....conftest import (AudioTestAssets, HfRunner, PromptAudioInput, + VllmRunner) +from ...registry import HF_EXAMPLE_MODELS +from ...utils import check_logprobs_close + +HF_AUDIO_PROMPT = "<|start_of_role|>system<|end_of_role|>Knowledge Cutoff Date: April 2024.\nToday's Date: December 19, 2024.\nYou are Granite, developed by IBM. You are a helpful AI assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|><|audio|>can you transcribe the speech into a written format?<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>" # noqa: E501 + + +def vllm_to_hf_output( + vllm_output: tuple[list[int], str, Optional[SampleLogprobs]], +) -> tuple[list[int], str, Optional[SampleLogprobs]]: + """Sanitize hf output to be comparable with vllm output.""" + output_ids, output_str, out_logprobs = vllm_output + + hf_output_str = output_str + "<|end_of_text|>" + + return output_ids, hf_output_str, out_logprobs + + +MODEL_NAME = "ibm-granite/granite-speech-3.3-8b" +# Audio lora co-exists directly in the model directory, but +# currently still needs to be passed directly to vLLM. +audio_lora_path = MODEL_NAME +models = [MODEL_NAME] + + +def run_test( + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + inputs: Sequence[tuple[list[str], PromptAudioInput]], + model: str, + *, + max_model_len: int, + dtype: str, + max_tokens: int, + num_logprobs: int, + tensor_parallel_size: int, + distributed_executor_backend: Optional[str] = None, +): + """Inference result should be the same between hf and vllm. + + All the audio fixtures for the test are from AUDIO_ASSETS. + For huggingface runner, we provide the audio as input. + For vllm runner, we provide MultiModalDataDict objects + and corresponding MultiModalConfig as input. + Note, the text input is also adjusted to abide by vllm contract. + The text output is sanitized to be able to compare with hf. + """ + # NOTE: take care of the order. run vLLM first, and then run HF. + # vLLM needs a fresh new process without cuda initialization. + # if we run HF first, the cuda initialization will be done and it + # will hurt multiprocessing backend with fork method (the default method). + # max_model_len should be greater than image_feature_size + with vllm_runner( + model, + task="generate", + max_model_len=max_model_len, + max_num_seqs=1, + dtype=dtype, + limit_mm_per_prompt={"audio": 1}, + tensor_parallel_size=tensor_parallel_size, + distributed_executor_backend=distributed_executor_backend, + enable_lora=True, + max_lora_rank=64, + enforce_eager=True, + ) as vllm_model: + lora_request = LoRARequest("audio", 1, audio_lora_path) + vllm_outputs_per_case = [ + vllm_model.generate_greedy_logprobs(prompts, + max_tokens, + num_logprobs=num_logprobs, + audios=audios, + lora_request=lora_request) + for prompts, audios in inputs + ] + + with hf_runner(model, dtype=dtype, + auto_cls=AutoModelForSpeechSeq2Seq) as hf_model: + + hf_processor = hf_model.processor + eos_token_id = hf_processor.tokenizer.eos_token_id + + hf_outputs_per_case = [ + hf_model.generate_greedy_logprobs_limit(prompts, + max_tokens, + num_logprobs=num_logprobs, + audios=[audios], + eos_token_id=eos_token_id) + for prompts, audios in inputs + ] + + for hf_outputs, vllm_outputs in zip(hf_outputs_per_case, + vllm_outputs_per_case): + check_logprobs_close( + outputs_0_lst=hf_outputs, + outputs_1_lst=[ + vllm_to_hf_output(output) for output in vllm_outputs + ], + name_0="hf", + name_1="vllm", + ) + + +@pytest.mark.parametrize("model", models) +@pytest.mark.parametrize("dtype", ["bfloat16"]) +@pytest.mark.parametrize("max_model_len", [2048]) +@pytest.mark.parametrize("max_tokens", [128]) +@pytest.mark.parametrize("num_logprobs", [10]) +def test_models(hf_runner, vllm_runner, model: str, + audio_assets: AudioTestAssets, dtype: str, max_model_len: int, + max_tokens: int, num_logprobs: int) -> None: + model_info = HF_EXAMPLE_MODELS.find_hf_info(model) + model_info.check_available_online(on_fail="skip") + model_info.check_transformers_version(on_fail="skip") + + audio, sr = audio_assets[0].audio_and_sample_rate + # This model expects 16k sample rate, which our test audio + # already is; if this changes, it may break this test, + # so we check it directly + assert sr == 16000 + run_test( + hf_runner, + vllm_runner, + [ + ([HF_AUDIO_PROMPT], [audio]), + ], + model, + dtype=dtype, + max_model_len=max_model_len, + max_tokens=max_tokens, + num_logprobs=num_logprobs, + tensor_parallel_size=1, + ) diff --git a/tests/models/decoder_only/vision_language/test_interleaved.py b/tests/models/multimodal/generation/test_interleaved.py similarity index 96% rename from tests/models/decoder_only/vision_language/test_interleaved.py rename to tests/models/multimodal/generation/test_interleaved.py index 8804497ae61..eec84751e45 100644 --- a/tests/models/decoder_only/vision_language/test_interleaved.py +++ b/tests/models/multimodal/generation/test_interleaved.py @@ -16,6 +16,7 @@ def base_prompt(modalities_str: str) -> str: NONINTERLEAVED_PROMPT = base_prompt("

5S^Ff(KE^hJ5w`!QKbSHin(F*T{}V$q%i5{ zLVZ{aj$J9OQvM-Vm%VYvKu(_(-(r=0GPkfD(D90B{c3>W?exX>sMBLDoxO)SHn&x< z2|$H6)GOt?imN@nt5^#0yiBpt-($Eq@Us!PSx&XUZu$D~M@aV%W-==mbTGvRBTj`E zsfchm$~(9%1l&{FuWf#>@j_VQLn2>6XkX5(bF=M7EvMcXz(}W->8ry%@*h<+yj)vP zOYOmgd!(Q0Xr=N|+AY!deLj^=!Z*qUd3)v0fPlkf>$|6_&~j3iN;lu96RDf>v6HLs zqT6fU**Ay_0f!>3gL#IMz#^Uh;yA&6L;nT(flXw4RqU+0SWBz1Re1I~&U!qvjyPB8 z{gKWy*%VH}aMQ+@{c>t^gAQ6ly?sLV=?4n5nKBS=1e%R>=^?RT_ucW-0*yXXLsJbE zmaqf!<>$EhNKMhJ_erl>n9eKTBp<93HDqWH4Hb1FP{8utAjd8>kqcNjM)p*xm{U5m zwQ0Xg9t207afL8dE!dqIO6iL&+sWBXUPns)nS}it5F5!tp#2u1FJlux4!2J`&<|acB~o(am5sdOzLEf}>C##sqEpu9c{3H2Bu5O=$0!@CYx4V?)() zQj~{UCsFQxk>sRc?{{Y`TNb)IE6^;Q@&Tw7nI^i|i>N#~FYi37gR!gR=BbfVt_}`L z?KXrwOJ61wCX1AiXd~&jcrJ-$XGz!Zi2N@SeQIQjOF`9I&JxhUL`od>0 z;I!OM;N!WuY(TO_M31Pj`umAfguaJzyB`sWk&60p#!IjYRtO>l?iI*uJAR8w?iT3# z?584_1p=$qCWW^lJSd94dZ7W46b-OfgMB$Lub<(AL_LMIo5M%nqr$aer3(fE*JcZJ zFsUrQr14%QuSUM$24nL~_!{y{ssM|#X%uE*6!19xTwwDS2CtF2dkG2O14bw;45=OG zMOgviuHHB0v*3oS4kV)0*70;nIc){FE{79&qWKw=aE&tFNQ_NIBiiNL*`d+grRgn> zOXvlvRQriNuxj!|cyqK@t$b_=Y3S(@&EO&35>n)+hTNYtTE{7?{GON4uuMag?(=t$ z@16hNXMZvL0meUVU4%Z2=m7i)JE(djbl}+{+=|T{%Jgf4E-wKeM{HhNzH1x}QsE;~ zWS=kLxI#T744d=LqR>So8jL?LTbne=+A{r;Gf_GS$Lx~N%(=v0LP$kkvFumiO>4rI z+Gc>z{es{34|~RAU-S00UX)VF60aS#ail66ZO(KKtjvzB5*?&V&-Ei|kDt4^?(=Rc z(!044%Zj~hI_R82-s%OP&Ci=|_ln?UAavR6!}mE1O;|7(TRkr$x+P~Ovp9?5UJYMa z6Aqg^co^4i_-cC`xVQ9$v90@~W~uZ|=&5;QYC&OS;AqE$ za#pi1`v67ACR~>uJj+fPg|Z&+3T-qP{mpJ9@eiK#C#10|K|evx{qBM$Fjgrno#i1o zArd5Q&mrW<+wLu<^PG9cp1mU&UajyK6fcM;Y9P8Wvdys?;Tb|P4KFq5P+=7NjM9Px zN(py2aJPHIdsYqSZD{AeGT?*MaotK^5)xpD;Q&fvTgX=u0r0qZN8%1vsdt3Wa#k+k zZU_JsDdX^}Cji+V4&22`ud|Yytlz2^{_~w1InlO0*fG;ty|Bri2aWFt?c!=-uCLSM zF6caK&yP|;wxIzzHq~y3r#A{S;E2vNAWn$qUDOq!Us1)8D7VL(owDwCTE+dl{Wr0! zVf-PNihiYClIsU0W?Hget7a`i2rgk>>a1nNF`ek^uipy_9JfirTb+~+CQbgszwBR_ zryvHIJf(0L>Pw3j9QZZ<8Fr=?_`C03ZF1_uWtyUj&_ z-diORji@G!PG@>!ZQ@DtqbSj8F&{ z9R@|yQCT2YdEnbJvn!nH3cnpigb$?z3bg6CV`G#GNrB+a8(FoY&67=D)6DQDndIP* zWQM@OnScxyX*U^qgXe)NDXCWeR;mwO0g`WSQo;Q%Vy1>CZ(hhQ(EM@x_`5g&c~QJe zv-HLPX6e)ZoZZ^MdJ!6mj15u|>F#(GPv9o5(VNDc9Cv2(OpVp*$II}HQvH)o?wTZm zGBrNPTcth4%^Z&h}@=9}98WK2MNV=eXZqTnx{--9qVak;YBde>r4yyE=LjRE*Tx zZ8JKb7|_%odgw`Op5;QeIn9X}^5&aVWI4+*Srxm~98|KSa4| z$S!@HGe5TJuA1dO%@zSk-p2HOvfwx#eI6p(A#~IdU)MqWnk1m}3)dkUx8pv{XY@DM z$KCE_>zpM;Yhd{#^b)ScXWO;jSGee zBpMpH$b8gN8B#aLFkJ)7e%;qy+AA$GfBWLSXcF}F*t^67{t%M}i)~ep@xxhg(EYOc zYThakM5SJMHC`^!NGEYp1N3IH;Ds#xaK4=Ps+x87HYENYw|$lF7qdJK=TquhOHH@G zFf3~~mBX#+J`S294w_aLIQid6#~1PbLfFJS^)1=@(ID==QR(RvFOP$e4@U-c@1>DOsC>8V}~X9 zLBGtoCj&oPgu6qJN0wEiZ`BT+n4b^=h`>%xTM3W;R!!GAQwKymjs@G>%PRkbZ#cYe z&}?_-_d1op_{3y0D#@Z1#P*C1eyz-U@Y`jI>PP)UY%iqlbcEyN4$Jhl_qs$0w2LVN zr+TVLs(jcZIeM3==13{uV@TfgAM%T>MQ>%l0@l|-SZCg%8WGQ;LcwG_M<(8Kuk-w1 zp5yHBF(K|gSUZV;LOLE4FMD>}O{BijHxC9RJ8`J+gieC&!$=GbIN+<*06<7{8{l{_ z=NCZeGBa)g*rDm>7goIlj8w{rs;!pklU#PG(*e7~hs+Lumj7DI{amTJYOko?ck+|= z;w6v$&lNQG$A`;tFRIr$yiD#_y_Bgh$RxsEniW>Yij7Fydwj>3H!J3Ar${u%Cj>j` zb__H3(_&mKdw3+im>hk)-(PNSp1k~)JM6xl2DxGnAPQ6C_A0)~MEM(i{Kn@cZStEqEnz1-LoEq9|={sYw^(9A|rhTKFzaAk=c?Lsa=Z||Zg{6aaFsLY%19l-=p zkUQF$$64dl>xWSZqRq`UyiI(!BcYU7?z!qi(39mK;Fn+~z+pa90=V5J+^eh#!4sIY zeV9LbxFzxh-hsM19z3bjiE1LdMI-&~P6zCd0e;~Tz?^6;V?2!KmwdLx9kKlYlSBZ( zvwq8ayKjeXb0Rd!^>h`W1iPTpGas_-Q>vYqt z0adSaQ&3z630&jFoSAO2!xRd@T`Xp=3~I2LuR7mgF&;t?MF$`uH5_(xC)$jkv5w_= zOl{-9&WpyZ>9GekwcoGB`he{e*QN1SbduQr7Mv0Xc={&c6F*0dgN-CJ6;ZCCo>9^K zS4Q$$gN1ls15S+Pfgm-AEr~6~90YArEF2I4aP3zwgXxWqN<6nyJxC}R>I_TQ&v03N z;89WV%u9sWQAhds*=B~M;~7UU6Mp%H5(z9-i=ItPKw4(qP6v5iAm4jcG{}l63_B>z zNpN%v)s02TJIHf1bjNdzr*@BF8vn)P1j;BuWJHtCO2|Uge+Q2iVQ2`WG|_uJIZx8} zx$&hVM}Upm8q!M)>h`GA_|Lwe=&*R*!o-SFWtlmMiBQ9L)Jy_k0*hn?b-O=S zBScUxI)elNYX)!G%FjwWX6^VcKS|idOye8<>mpil*~!F1jvNfCmR!u#Ii1c+00yQq zbpWdbzD><^=USI&8^W&jNv3Wm1Zd3o zkDr6}e{oJy6AmY^Oy_<$Xu7prtr(X*ALqSD6bo|xfhm`wM~4>z!JMjVr7}L*pwP`OA#zBRuTP^3`Q6!HN)R|0u%8$K{$T7f<_!{?pah>4ktXS- zAC;RtpV-8_^M1Pps~ zHrs*6XV8;z|6}p4=Xa|S`D{ERKB$ANv7d<$X$Y5HVFu!y@S#(UQ+lz_*ycuAqnbkd zo9*x@T#m!$R4+!+TN?E`$VbbBznrqpWuD~BjX?~}wD^ds%lvt60FE~XdzI1}TzTPC zI*clGx;0!*sXO0awaJgDj>eT;OXK&aa2JGvc0;57)RCUo8%4QIXERr^hV-nv)92Kv zX!G4%1kIZKM-AHA*v_~ADD@|hpb8@u8B~<={yfW{^8c?2cY+TJk;`HD|3pGfB=1U*{*O%+B|NHw^-dphg z1y`VF6sDDJxD;n@Kk%@@sFpjxu6Uu=4rcxu35!(@;dABhRR&fqujDWj6O&HF&@kHy zJNql3+hoPJ?P=A0ssMgF1lCe*Qd=#et+;lqwEDF39G)RR-fge1u7?X0)wd~qeR>{N z#-}=D%scd;J|c==A~R|o&a(LjK+`fYfHfLU!Q2{3obPG)!|RlK0R<+Gw|X2pvXU|E z?;ELeNK|7x=%Xn{;5lqitN{XSJ0S7I04g`0N3;xOKWfEf48c5gz^JDkkPG8mHlA-C z0V+Q~Df(-r93-?`PR4DpIJ|2lhmKvxDoM-eTM%*+KAe4eiz(B!_1 za+WzYVCr&D84fH67J44Q{p-`8dxge{z(E)&LSTy44U3O$iB6LH4HAX!)bsW!aPY%k zWlR6rZpiOEJ%ZJ$EmbF;W3d~_;NRCPy;96P95vobg z5N$cOu-!k;1=zILqWFlMVehO+N0Jahk!1ghR9^n%@=+eaPTZJCc>Uk#y*_C$L? zvMy)tn1UVjw8x(kf32J$bS1X!TE%yAbV{De7r4}`6m)AJ(X3wujz=HjUDN-eM*v@d zuxH^FQx!jQ77xyd?w>fxfR0;QCz8KYqoH)_6PnFFXOZ7Jsq8CYoP}n zJ0F>tbCpx50cY+13~vAWrywS@19qf-XO0+t_`vrMG5@YY|1L-Wx`X8>c;JVh^D&!F zHd!xGbW3vJ9n4tM|9)0lT(1+qBm*_f$*%88s@NIvQGDqHe@eAKnEIb;rH)j-DI#hj z2_t;n*!^?NfvH7O-7aAa89=h87yS_(S%0BbAl| z8w->OB^PszX5Siy#@IeKI!ryoB!9XLG7!N+KYS~FH@z5w8mDnzT>4MIF8xO6#do>D zg6}w^E7Nuis0L}xs}~(7-RSI_)Y`;d*5Wh6Q5s#&KW8oV(mlOn6JA*Q+Kn|v^O=Z- zVn8XFwS?ce^)0_0(Bply+QfcaL%-`c&40VEnD-+2FOsMv!o;udN}WfhE=o+?Pk~i_ z7*>{Z74~+Fm$u<*EqM0Z6yUyIQtZyua8b8cw!LeYtCX*-3s__#3OpQ%#C(2{T>MoM z{;yCASbSq$_)yB(@|_Np|ESzf{&J(t6L;7_=(d(l+U|aRGk_4<4Y_oB*(vk=SK;k^@*e>{wr=5* zaf!JLXA_Evyra!Eg~7~vs=rl?g6<%vo1MpK+Nz8U(t%WgX0KdLDC9^_INNfVTg__f&4{96y zMNrj~aWcRA4QvymEVI{7?Z=x>I9#?_+7F$x#sPBt*W?ZNLp)5iRvb$vnT(Tt?;n5k z_${15q=4qrSgQ52}AEph{L@OTcAw z8c93q$qaY7shrm0**8?2UUE8>NkyL;5-kOk?*UfL_w2m)sck+HK!e8lq%2R*be~Nz zzCV7l7XW(PW-X)iYQB&i?!5vG(9+gGG9EurGCpNEqo~VYi#+>4srlq_!C&&4&!xi7 z3sro%(BvRTu8zJk`gRs*rUki4i|4AXh@L3P55S)e5v!LtKE(;Z{E2fbUcb$GJ9+() z@HpWXfERd@?1r3wZ3;YIn*bTl!tD{T5kMw#Ka0Fiw5cJ|w&~o|ZN9%?i_A2O2S(5} zhfTNbM2}aw=6g?`9(%>jSW_zc=ZwZfNk@6^Q`)w@#pTnp*H`PP2j@1N=M&@iZmIlu z{(zAO^W8;tGZHHAQCcbwV4Y4&x@CWRX>!jv&0n0rUkN;IO%1DI-^9_jz#~`U6wA!~ z`Sjx;f9~bkupX)T702Gi{39Vl{c^OAWP5Y#w1%BU&Ev!EJ@*0caYs}eAX6p+(w@}a zLgnnn-rqYH;JBQ0v>8CG@+?;XpW5rOri^e>yRVOq1zI}b( zsi4s5Ct=>LEAcf?{z0m>$d*Wpi9Yl*n9I`ju3FBvIu^woX~g^6OVrbSY?+JZyAvsF zqb49)0N|SSmjpX)kNe8QQ>1O%etJBh6>4NO=!rO5ay`6`n@E-sOyf0ws@D_a<&S|f z##!YH9){MF4By-ybxkh2ajeQ4@g)Kx0HV- zK;`r7VyvGbz>DI>Tt~Y}KVdZP3=0kJ*to4DP^+7Bubqy4d13F(BJS4d!|frF&V4De z8r(}9P;4cbd$xn;hA^{mQM$*6?-I%G17A?)TdQ70xqBMkQ)zK> zi5X5{K>*YerOfmbN8Jh-upsdlxZeq%Ri8_5!mh9h_lUo_8~}7_)UL?{9S=`v zitDed_3+Ei{cz4T$bK!590oi(1UqVvyYSl3B=9YvNB5vS;86)*Jsdt3+3Qjm5x{EmVY%fHUXc0HA(wn%%gTat`DexhdP55u$WHw%ZIs@I|$k)(LSuz zvL43Cc&=c-3o(fXJiqDoOYSco`YGjBTC^%_uQBM>o}-!U6;==VH$R@wncchGSacJ7 zIqO_r_ISKM+lg>ZL5K$kAg<8^Bk3Z1bQuG)Sexbfg~cMT2A;fhE;!N+m)$QNgbYVk zT32a;omx{o9-PBTg(F(8zBU%#+xBnMm@(@>RA$-dxnQH?3g{FJxTl(CFiNAUhyur) zHzN0`edCZhW?DSB#7f@bWnc>r0Gt0HcU4k!ppN2**J6}$G&~-D6bwTX+}fTQ&bO7j z%(_LZ$G6Zlpf4(T-1!)*ZgtMTwa|y9;E;t(du7O76a_kjnY#}Y6V(KCwz^gKyZG)x z4cw<8EO$$gdnwhw{KS%AucQHU(t&Nq0_wADR$!1Ram{WCBVu! zxHrUecmg3nUpa&)YQP3MmJhcB+Bmh&vxd#*W?8M(UzVKbMH7+u7@#pMfP*L~+W=R> zjpBZW$KXKKOvw~B{%U6uaz4N!T$LpE6$`1zZvxW@J_0R22_>4z6+E5z3V`Lg&lYzl zNsQQ#bdUYE_Vw~a{`ne~p@hL%X4FVJHHBbhT|ih^;nLWMG z)Y^;9C%kt!XY z`X=SY0{lyB#+%4JQ9UuTACRB4IRrhw(^)#g(P1BenW|>r0QuZ0%Oaexe2O1$XBJQ?H)wU>yFvV$P1o&Sdr{)14g(o1Tw;^fP*L}I z4@iQgJHy)nz-6Q2y_gYbFZQ@kjBofE>mg;z7o@aGW{%T{Ak}2u z!aYgplP23hmG-;L8lfc?+Bsj4{nVarGVxTR>N2VB7?vN(Q0IM(Osdp&`k-mBL)Z6b z$GEAJICtX~klWf(o~%$q0A=Ps9rmbi;YO=vABKaMZVZieB_fv2>12_Mvn%ds_+41G ztdR@a8y1NjCnMnGT#5(ZO7ry@Ll$R56I0X%Y#&31FKgdFpbFfLOmWRq0IoF(4~HE< z&2FwYBJLV`F?6SGz@VPfR2BZ!IZiCgDjq!u63W@W z_Ao=8o3yq!(Gh~c^fd5j0?H_-AL8mcYCr{^T-9nq26kZ7l6I zN|;ClIpNWLWEQb%8P)rdaN@6-k{=Wzt*cib4i68&uCCigjz;T)zxW)c#}{MvrrB83 zhZ~fXHX6g+Da({)ISW)jY2S~tZ|*i7oj7=y90oHyu<5q?ycyc)&Ao2jz6V^i%nxR3 zsq!V`wS7IusmlA#>YkcBuTe>zpMFUon|&mNbhuW32DpL4?adM~8daQVZ3>l|uLPzz z=Cx!8CRs)7fi335D~`0a5$#0&Dw!eTB-@prv>j~V(0sMgt{oU3N(@_dat@+%S4#5M zHI0PYap@3V1}Bu9rWNM<=uZR0lk4^~-Ex;}dHzjmS^j0ks!Hq1Btv#g)tQnNjNsOyHWx%SpFmHoo+eCqVO zXUrao(v-yIQ!mT-6(Py>m$PML8@=3<>z?og|Mf=H0=GK=TwmraIL;q{jY1QU81520 zw#o$`fSJ#VfOHA_AV+g;`ZC5Bpouxe@J_U>n{B4V);NNx0uMQ-#1atoVq8Y0CaoJ6 z&|Vku?3aiH+j7kPOxywbsAFm!?MD}^)Sfua3p@3WYf-fy9Nt zq#Np^!cvV40okZjd-^O&;E<#Ful?G(LAMq+^H8k+<|&B49b+^tBVXzN1Jo>g0ZBso zcvXtYId6NYQ_rnwHAPSBIAEVnX|AyHiS#^4k>v?8uO{a)>0!ObDu^}3+J$NM0zZirtYax>Uy<@yRsgl!F}>Fpv-hdPeC5f z0{N)$z?+HX*c=zke7(4>Dq&e00VK+wxoEOGehnQr^^ZuBi94H^_ zVqO%w-=8<%7Wt0a@?Sp9!z+Ngb7rkeeLfs1Kfb4|)B*!+IHwz6<*K$;)<516$Rknq zV?0+%J4TF`{%S$f^`O!M=_q*d5!It+I#=~rL`bz`*=bz{Fgl>o7|`Xh#uuDi z{;XKcQaatRi(CdsiISfHUL95poN{2`Iar_(Q8o>?O^QS`)WletY;&hDnASR+0RJM) z1EO!pLSNmHKveEJ&+Z7djAg@LJw%e80Dq8twB?Cx z-ENap>PgN?Ob7E2I#h~8T>~NVAtoV@TNLOeGQd$gikhMip6>Lei;isF+i2<@&6XMB zY`UIqm-AKv*q;D6>MLom+Jl(yoi{g=!;AbRTX&jur6sbABX~Lw*2_1VhWra;*6x!9 zrm~#JqNrV8A#AumDy!9QP~-Fb7?o?gWGic~TBDgkL4A7{(d(z~$3?2gzRy2!Nl*X& zqP~LNN&$TBbCj1D8qYG2UR1W-?ll94ZOv`33b-IpZZ?#B?#^iB#f&?|ag28X%E^Vb zQhX1pVm#`&L$(5On%vFUJ+VZC9<8WzNQhJlN zd1+@OM1siOATRjC1MtEy#4B@w_Xqvkl<=2#2TP`H4#oilj*fJG%%8vi!rVj+7DI^% zHB|!6%izY0<#Z!qnpvBkHUWix1)xY}Ra=&3!23+vL1&1W)GVop+M-oOQNFYrDGIN$)&8 zCCH1*>)E)!iygv}AzDC?2r~oV#eIy9NMma;}IRs z95!GutXgiM)b9}sky-INV*8jOkHIRZ^dC^Lmx74SPv0a#-!vbwus}AIB}Qw#dK+o_ z(y_qK(CsrJ!<8|B)2-u#umBwS)UG&!K)rcPnIegA`*cbF@iQosW|)^BpoB6X2;8iN z9?HfrnA1unrGugS{+y=tAIPsx1qert1OW2C)GDFq$p#~Gs;qW=^4mKHtbabn z3r)~-F;j&?&~gKP<@y+Z^Xa|mkYnHgoe7n0C`%B5`fhBv!oo*38CbZ`XR$7SJ})B= zBI}9f8886JGK^Ia~m&+*C07|yjOS?1@# zE%CJ5`aI=vf@I5L&G!d~W7y3#OZ$a+?}4RZKhQVZ37 zPeI81o?ZZ$&5J#SJ~Nj|N#{Jw^NrW1cTQoyKMdnnM9H4lzPZ@Yuo=j9mhJflojTjZ3=}P}pUPw;(x({R1qs5?37R#dgT9WE zrmay)uAp(Cf5ogQR5C+h-5ysc{Jo%O7%Mz^Xu(MUqlOrOGY)pH);(Bn(nH#?P8OWN zyb`~87C61YjQwTXxQMTy{m-92S)p>t2IHB?WU66=a?8szm%ZVHM?zj7q!2mRG8%wY zxfZs;6gkmisJr9Z({ys@w-^L%#m-+$PUbh=U=a39x&{Fo6gs$?sIX7i&JGW z+;l+FZOkql$af>UM)4KfnD+P<=`$1x!r`!V`#kw7UtFI0uTgpnG5Sh}AeXFe&FEjOFBINWmMH`|A>ML*2vjw@~7ELwUnw$ROYv@YvBd|EkEE zno?f=9$XV@UM;IdvPL|m<0ryz5sPYU|1E)qB^dj%nt`$paz~x{p~WOiqqy8qcCG*= zIl;-mJ0vE4+C5~lo7PJstM?8N`o?n-o>GifuI^!)VF^2NWM8Iq;^U z4rfYP?(v=FyTy}&8DgzCkg6IF6PR~#n>JlmKcS#9<>Ke!Yb8izlKgwQBbzfH;&u)zD&fe?l> zW9%(a_1qQhyS-4;Mu1`e+0J3kmt46okv`hNU10=?YV&1cx`Fz2ft!nK$B6CZHR=UD zzQI)3x>-C0A9Z5-O)yBmsxQXZFqij>Mq>51uaVWs3#^S2Hc^?9(brU{^3@l}LgWh( zxbQEE{dnJF29}la7V;Lvhma}E&2_1PHJ2=X(@!kX6~eI9KI1)Hq<%9Hx_IwA>y;Mm z{6*DO<0aWbV_97%!OnM{1RGM#FZ>Deu`x=jvcc7nwEYKXq-SFwsE0Jmy}l~AgI#8` z@_jG%XywR%g|Re3sE3pz>c$6s#YgPbLYq&M`Ph;_34DIfENKRXm&-l6f$&7 zl)g|WTm8VS^`Lr*-6pTSGs0%QD5J3AsHICFJ$uns=OG zsjX&az)#RYkf|($tKeJ&Okq|tvIbU`>k}0f9w1@CcB3V{tnjh7l+lNly$HY%4w#JU zlee1t+$Nt}Ihq@pfDntwrBAyo8~hc~u<))lhOMRF8m$ZOGjKjm&)mWBV5-YZ@^r$9 z6C6Zq$-hBkSKMtOo=HU(jqqi;?RRY~l>UNWo4g1C*r8UXXDTFy!woz|#%XcMryd)7!!d@J8mT!Fq;qS9m*ghuB0IC#3IR$9*;u ziIuKaaQt)5_z|k30>!nw@vbQT5QJI9TNF=6h&y%8NI0~zW3Tfg_uLYlmZ7p@!V3G; zyw1B|mgzs)?TGTe3wfft<;7RVVd}#57FjQ`M91OaZ+5g^v)X@j)xV5fLuz@d;7e+ayj@b0=9W{@0@kZ&vN!Xf>}iOORq$EBr3 zA8qd#B9**8__mwYD1OdQOdjE#R*H?M8O2LOZ;Z~-B2E)^bqV^74QJ_UF^69G)*vT+ zG*{bxA}jUYAG!HRQwVCR>hA(JW0tpv4U(8LQTPv+c3*kpAm=vz&+Q-&YzH{O5&?ZX z-voaR`V8VWVuTu&))qfh3^S0Jn#@_eX>glkRatC^-)1bGMkc=KT!~66o>xa_>YX&6 zMh_Dd42k~M)d2T%0(ct2>a9}lt2s^xJ;G}P1A0+HY`+1kkj-}qt43QE2&)tcUSE4@ zovo|FkWG8(PHdq%`-F&FNlsRJ+LOg72K?}Yw+{yB!ayZDf`k>llXOzQoYnYCkb?0~ zj_B5c&pkeTmgp(@jiV&zP{6(6yoxFl(I?Fl0r?W4nrvijYw!LH3}0LMyK z*`x7)5T9C}{l3_|L@V>e%^(4D9U^*IpBm#I$a~oaSoE{Pouz(G8yGh0`)eRl@B33` z=?)pgl~cdP3jEkQcWMw66DTXT6uo_|T1Y+JXH(j{J3IMR^|2+L ztoq6ekDg{cr*U(N)kra_tHNlw^;)rN6RKHqnC(1UOF{hDyg=h`lIcGRQyl5vK>$PH zS$7)$RE-T9Ink9d(l4Xc4v~%oMqOcC0YWs&`B`dpc@As|+UB09t5vG!k5t`$5REHA z!IoOjl}{IXE<&ozY2S2ZeArNU_#-K=S6-x(n8{M)F^2ZEQFEwYaU|l3;uc8fAzhRH zBt{CinWo&A3fWA@?=O&zsrL%_d zxM^*|WUzMPh_F-eBqhyYd4*Y9p$3K)1X~YlN^D^r6bAYXifTjkUyA4GkP*Xs+(v$7i1T|^?vW04B9-fSE&9e66JPIz z9@1Wwb(d~7I`nhnV#5?Gh&U~R^j*n&q4guzJ30n()U~h0#iLQ`S-whxDp(X%jJwjw zp!dLHzS`A!&>tA<);gfC&qa=2)!ibnSiwJElSUu2AqWt=;64^k)X0MEKg0dw1;DHj zP=U`ntazvb-~ryU`Vsy-p40GL@XR*H@xY>lw{${?DkJm?O}2r3R|3j;j$K_F z;@H8`JGYLR2G|q+GZp-GpHb&&FL*^jafd;;Dgbe zUqzk<_Y7kXRNAlCf#9j&gK-j#4C;AFs!Dp8+hjKaA(00&28!;s9(^5eAy){*7PSbh zQ$q+ex&PUkU{QoXq|KB!h!6D>Y1MPuQkuj))sFE#cNVk0BhbZ{}waJ%V-H;B7#dKmvMyydziDAEn--!Y3)k2^|uQs_d1A zWLhhZpTG$A&VK!A;`Mhw7yG$aO!f9m0d>Sq#Ox#+AqPDt2HbH-BwbLo&vgU+yr{!n z>hrv*^mj7&M9B%pYsgLT9f5jSST*z)uAYz*@sYf7sK zzp85+l42hqb}fM#{|DQu<=ImsN8Ex_Kqk#NBSF=RH7B+p#M?t_L#oxdMyjQf6+wxD z)?PisuA4sYizSMVtrPKDnsJ4@96^sn-#5Y1Ca?kGGc!8NH8iZ6(nFVkiS$-xf+*74 zY%F4QCLtjPO&D&9c7hB^LNJJzg$cn!^fkVobO54yQlNyEG*bW6$PQOy5HI58jtWJ^ z)sRYU#QKk*?RZTzc&LE7k(>g}WK;h@&VAlaZjMnbpah5$?&%6jwZ%4M-I zRORJs*?tcFccZ-89c&#mNEggxs53kmG(6>!eOHmiJ+RK(NT&AOE86k8yukzzoXfs`y&d`tl$7cvF({ML<}NmU__teDt2G) z*gDgfk0x8MBbzkvnG)OQhHA1H5_^T<1%){l1xsWe7s8(#=TsIZ5U; zBM73PB0<7$11o2S$yMj+lh$6zLxabZt5Mt|5UfLuR!1w#sZyV<@JmRoFR-l7Sq%3# zqWM?3_V=w0@l!+Zt3_?`a*ZqCsKo(4JI{fEOlCuoLx|Ap3I|4v`=qs#D||~>Vk`C9 z_az3p7#FyfUQ$VQse;C~KRe=U%Q(ST)fH>)gdz=bE^y_ZiJT$MgH4H@3i~;YdL{zX z8!XALzMfo5Rk&y=T=Sf_r-jP75DZTHo?u3H=X?MCG#ioJbc_Yb1xhv?Kabh^d`mkrVVZ7@aVY}aC4Rf4*`?!^W z1|$FRlrC>-jm$A8zyRBbf5FvP4~o+?973m z)k?v5CybK0uCV_W{HCC|L=- z(y?uAN8wiCpaL|#HAZ@eHR9lxWh5jqahYHz!o>$NKZrd6)c^T&^Nx7_^Y_%iJqnh{ zE`ki1CMCO1##ICk=JOgHsAB&CCjO(4v4$3**-$}~U~?OfbtU;Hg| z_>Xts#cFT1K|HiE&l13f0u9g?8lsk*Zx7Cfd=X6nnqbp7==Pyx9b}edl8kZw9u0>O z8eJ|Rp#1&+wPx^YC14h0SA~#t+$M-538RDhjkLYIjOCJBDz#t2Lnmjellrm9I^_K1^sm$wS zx36^rgf`1>{Zsv~1;B3XjeR-lmGPeI^h2}BTEO0bd*8D3g>(7cP>0fgm#8rcSP9vL<(ZZ-!>gyMj~c+Cz(g`n2Mf^n zz}YvqKqF-V@;RnOfCzKnXZ(cQJYID#kJ}CNEf=4iqZgL-^({%HxSf1=QNTkXGD*av zmqe3z<8`&5W6^Hp(Aa3TblG%mu?zlmGwks&Zhfv=e|&%F;o$->PTgd>{V~{aN~?!w zE*rM_Oug^(uERa2(VIFhimjvUasM|c<3Bh};tFiw z9_(X$5I&McAmy{j^r$+miv8%sVFEAT|4P|G(!Oe=JvSV=k89?Q++TQb3SS0Ric5frA_b38h<*=Fr^@Qitw5lyujjzRmMI-uwRV zz26uN2Pz(avG&?)&pFpzBkM}N)>-_jE8al$=BmR6goIrwC^H2meUoND=e$u7X5UGJ zkqK0)PbnRT2PB0^NjYvIr{=EIPPyG)3ZQphm-%1Y3X-6!1O2OBn#S-~-hQ4j7JX7W z9LN#7n*G}c50c6KE_Ep|f7aJpXDB32uAU6fx?ewnfgx#YJ{VN50B3V5nzL?3sCpXy zyft=K*vNa=ZwJ52xrL*HB2oCX$$yAg>*Gk;)eQtU3G|?Y$PMfMmgIPBR=zojf{;=$ zsp{MggV10tJz7B}GbtQ^n`@(|UVebl`r-CIETYIUasNUk3E9Zjf418sSm1QcT{^c=r0pE0=0&UwOk^{}kK z6Sm%%Re^rc(gycB9&&&SH7Isao}|sDGBPvt>y4t0d!z1E4O~;8^BU{T@q#(fFswU5 zFy-};4_~>37nKSRAL8(u4QAKJcJNFMU$a(&Np88C-8fQ0WWS8^t5y`Cq5z2xlV07~ zCsN?6ckk^|roS}Ml1UOiE(%RoQ50(17onMPUDa8Br1LjScj(i&3p}v1r^#*^WDZG? zXE>iC+G*xH?=ieuV@4I1ZJhO^n=ugXYbr?M%Oktmk8Zf%Yi?;NJCvalBfRT#NB)f~ z=jCpI^v@0|_vw~~!*sUvKq6W z{;N~!^yO&1KH&<@L2zZUc>GI+&5sk|#kmDn>0<$F7T4zu(JmT4cwM9k6qBuw+?sVzm4v%e&-TF z@-7|3F`FlXC^zGz)|85*2FUO(96)Awo!-Z8sHb+{2@n;Kq2*rD z2HI@5!maCg#uj7K=h|ZG=DZ@4t>Ej?^nCIq!bh-DpzWM;RbgXG;q>0L+>4tbjF%JF`_b>FOyQE*Q6fh>Td})6W11E$8|R6~x~_@_8l;vXJe2l5MK=DNptagQ$ABya?67k%+&eHm*?J)4F)rX;<1 zjsCjLDC-;He44qJu5qO5#ie8o&%}G8lAbewuy+=(;$p`DJ>h4;`qSr)_nSGEA`d-h$hfV$BWmE_@x{RWqRJ^&>%5DJ;EMS<}9k z+cWH!ew4^#=NU&5#emKx{gs?tF9W|6y`Lb_&-y2RxR-1JCB9I)XeYlu6eP_7P0a-#-wYZ0<#?(rv(4ia z5<nD<^v2v6jYkGs=`gE%K|{ zF$8c1n@21&aqVS2KEQ^R_ubRR5S@)~!Um1m69@~B^C9~GR-L(h{-K_%fbdiWJ&Zv`^89_N+xm7<&mTzJYQ)m{_I*wocsJ5QQPds|_C2g=OCz$~pNm81Iw zCvq~gR3`lo8pFns3 zzK1@FyA|*GVGEnrUXboh?@svF8mYkUVBX`?-vVnlzQC=Ufj+^sF-9yvB5# zkDrABrBM>L3{d-fyw^&o7&o*hp@MTVDMBv6{ZwMou^|bDm_XaiG2-%AucvaZp1qO1&aB zNl9OeIK99t!Wp=#<9V<(D9d&PAI0UJ?icBH-|)We+TivN5J8vFxU01k;WLqf5nUH` z+W`pE*ngt!pUj3u4WPvuDYbw}O~epf^mi9~6>Aw2)%@S-X_R}om=ZfahVe`=YKMq)>TmvlDF<&=JMIJt~==~3z>4;~hG3rI- zT0W36l-r{+W61&%RG^qi)F6^SY-;BoyXeArfAJ$Y64N`3ZeQD2jrP(O)GdV?yMpVl zTgZGR)7^7-iaM^c7E-T}V-1r9NA9j3%)($*4VF%r#8)kgzxOS>Yix++1NCd^g06P3 z6{=V7Vza@GK{pH652M>!JI~@7Iv+vwjzI_+a=^AWOS}`J)D2(5Gb@}D90s4!(=+e* zovJAIGG_rE>ZJcTbh3Ry4{i9&cjBzrBjpM?u8x@$9 z&cW%T;?ENJ)$8}MH&!4eYW!V0gmwfEY=IF%J$@l*y|~-Z@y!?%%}2t-L-*EiRv_>z z7>?blTx@N=`vGi_B|g(aZT2V47PY$<;9w$B{b1A}fN~{DP@U%y1PnbSsdU8fdk1L- zog!`iv>E3#g?pQ<^?W7T;(RTs7y-uQY9{Kns>S(v=wg;Xjm)16F@qi{IEmT^&4Y80$*>L3BV}%O%x3cZibA`3(5_KiK-uzE zEon{D!gG`*Q@?u7>)c%{(0jKDqe@bN-?HvXT3a~RjLFhuU+nEdVkq5F;^~A}pD-)%lT(6i*~ZW>XAguFl5MNq2Uy5N{1 zxmg)kr5gp_o&(qY>6AN@+uj%JK~L~9_9|og?wOJC8@;|yi?;_d3?4~vAoX~!|^>2DZKjM~!D9X{`m^1^bfx6L=J)%9rP z-C$~v$?Zc>V~wHn9aKlY=D$o0V%3};wXhdTl4XtM2+AgZ?=c`r^5AT&Mj!cb8x)ua z2M2J|)p44DCSufwct0{XW4ee#*4`{+bywQXMCmN(Z3REj=F2l6WiBzIAil##T+qg$ z-m#~P)uHenS`IPbe%Ziuq6AT%&J?*c6B%>6=KOlsf{nOG@|NVQC&`*uM@d-zG<}9B zD!r2NIsU51qxLcolEUh*7Z922;*+8r_)FO3^EVr)mycYbo;p+r;U1yO6 z7IoINc&Xc^W+d?hwU&k`rWq`t$_*#nG;Jl!5H#R!FMYx+9wX3Qn#`16IkKoP3r_j)x7m89~kgF|k;~ zT!H6heTds{<{RX=S0d${Ez2Y)a{VtLS^U*JsP2!S-PW=kvAhT8Lb-anLZY1^Etlw) zp+)y-WwWrQV(3Zq`v$jr?FEB;5;Rx8neji7!<@X=8{<$Gtg6?>gxd_Z%0w(w?5Osh zVS!ZTQ}DG#fH86kE>%6-hhsAC#XIN){Amt;hYB56;?A3M1_>JVx4n(CS@sd|GBCTh z5OD%}yjo$&go*^|>MH)vWcWs{8-*@*>4dkS4(h$^Pdk{YyD2D+-jfldzJt0zpDnmq zVEAH(WXo{8F?lmK%ER+*OpsIbu}+otD47^VEL6Aq(h0x&lsmJaxamjI9)>EM#;=u9^1Gt~wSh(grajZhIkE8!qgqj!zdgCN+1w zkyymI&}!z=`kO5!tTm2#h0{eR|6R7+qnD-ckfH};XSA!nY!C>8A>MRLV7P_LroRXX0;6QF!7!_vp z04!x5yI@~-9l&Qx^!vE64D_s z_{6bOyTh$y@c9}EFGbHNg4FD_$9w4axiNnffyd!9KPJnsaM}%i=mBKe#Py=q*-E zJetEaYRU~Q)gf=j8U3;{ww?H_+o>4IW?GhI>58#Sm%VOp7YG*DLT{S7f^aZ#DZtdn zlz>;ta2mrsD#_gh(0EQriGuBeAM_Z~-(&;EcSphw4p5j@O#`{B`uJBFN<7*Pf3+L- z+D1q=?Jmk)7p#cghY^ovQEN|BCmcafahB!-P+RRrXBONMOnPTu6a#T>Ne#l*l))bV zQM?Uj=~4z|IMnoSzLqZX6SJMv`gfH&aQJh3LNH1?Ld#LuIO&H%iA}z!-g(a(k2qgw zDNGKcukUnqM&d#->ST~YIwT&0v;lH&xA8o-5-{_1EW5!GYIpkN3Pk8(6S^P4^i5k~ z(A#!5M#5+xOTP6t`O*`~$K%!mqsM+TIZuEC z%0R4&3<1z=E_hM3nj|Yb$wi=D=LE)EJ6p~>e=ZOgzW}B&bHaRV6)4{5FE)QRZ>y_g z7kQzNk^vClxPF6hZ{p?FykCjG-D@Je4c3f#_AqttOWdBCRnDTHcS?ZSOcQ$2LDdr! z*Ot}dZq}V5OtZgY?>ku0(_RnedSX=+nZM=NBgqP$TZk2@;QAe&E+-#-YE~`Ay55g% znpy~;-hM$LUu+p`i8MvKThq}e^0k@jg>nA* zgkrJP;_k8gL?PK(>b->@+$M^I$t>dcC<$F9UCu58sfSiUpSe&4;>%3#Zbw`|SsVeq z0{?OZLP%acedAZk@oEz|H(G6IIA(xm#0~l{-qWxZ0v98)`)7qvZtVJi7ESgHqNq!Q zq9$~`odFD5`IFca*AsW41GcdO4|#7y%$lds;LjnC95Rt_^Xi?KK*zdo(P}i>AtpEr z+$OeaHt$q)H9Zf2+wVW+e5UsuWjR7b1&Q=pj-~r)ACal-w){6aO9yF;NsrPrr)S#a z-P~-!7grKS>lnWeVJAPr*NIo$FnzwLq6V1hlx(cLVwq!$``!}0*&!mnXX(*CsuKo3 z8uK|uRt7mK{rn&1`bQpoIII?N{J^zy)w=lZJ;m)p^L#Z=I@URCYGi-Fm$hdL-YvLe z=R8Z8@*~Z3x@U4aq(9#(H2uJFDnpzz5kF?hT#VBj#wwsdlc=4YY~nQ0B=z&3rMQG# zbUW(j+!7OWW!GZCN4_zx$!Ti|ede9l?c?$;SEx02Y(-P>br7A&n^#=C^Ubl9P#TGRT^pStU(@f5`vQQN zk&?V^LPHHPcQwx_yG_YbRpf+>WmuzRjXClL&<{^8CqG>EQs5I(o%~J<<+A5iQ$aP3CS_~gEc4sGmsV=7X*g7jHrHr$wd;{T&|W&Od+)!?uA=8s_BYdq--T@G9yr~Y(<-7*O?}XiiVW}IYmlTHf+1gz7 zbz@UR=VqYC7h+Su9{Nm(>$ioPzCh{yrIKrc?-~i_pZ*d3+@?#(R=vHLIZVs0jV84{ zoI_wsV7>WaApwqiAcafv^2WyJ&(HqfZ|Gtg3*}*VQ8LrZxXX$=Lld!IVtlF^=k(Rc zS^vL3_wVlk&;j~Hnvm^r^I{=^yq1{!QWutQ5R|jb&mWfbstekXyX;`yovzf*9V&nc%e-#I6$%ZRSVI!&;KpW&fqRhSP zuzIzLRIy+v`e9DA3=(_hkhPcn3G<7RBBBm4Y?dYLwvt=+LLGCTH}cP2r%;iKC7Xw; zY_R4V2=xXZo_)L&n?x zF#jJ=Z%FZ_Q9kvlN8_iZj6~Mp726ZodAO;nFSb@;HD=xY`z7^?>w_+S1r`=qxl#AF z*T``(Iv!8BlNd8ZhEeIh0_?TX+W1h^La)H+AP9mO~Y^EiN~e#}g@45%FkZP)ys zTNZk<;OyQM7R`MRyArTsL9lI+e>krvpzi@|4FojV@XFybzu?Jq$r@l~$Vy@B7-=+~ z+1L6JnS%dlBJ}xg2>9<6DKVPHV)9UaC?Czz2h9;1LG#GJnwIFIZ)wRd&Uzd)lrLMq zV^lP1zV^OaWikUrF?W%vT=hRhhhK$OD24Y$=pZoDG}Yr~Spy&`SCCJONB8;-=8m9D zT!}SJIs_XalYEkgW;LMX1!tn`57W6op=kz4gbK^Zx!0zNj^&m|?m+>2#c*NwOu){- zcR-WD3=u{YW2MXj%JtQ76K6^Ej+e8dwVs7Vydw3B`A@e!Z+Wl8sti@yE)Jg0U3-;i zx<1<~8#_^{bNjx^=4Gquc6GKo2#^#`Y&PZ(p@RF+Qc(Z8&jQ9*M&NjSaj*m~91kAW z^x*=&_js;jFZa8eCvCvAc>9DsLD2PEK+8sSL#=>(yi<6P<6ukyDfREMC8XdI;kLUHd$Sbzm zU9nzM`7=4^N#KDnbUu3`yy0i9;q5`T(|jW-Xg{lU0FBETdY9{(JMlNbPc?vd2KiSL zHueCFxLUq^v@;a^kn`izj$$S-FCJfajtJQLhxh15u6lXt<;pAR!^$OH%2q}78YzJ% zfZ5F%>ZtN}s_YHb7E7#c^P?0TR*?|}I?gj~8A8V%ctOm?&`hoO;rOx3Z=|aMcAT1p zpwQl_Tnss^g2TR_v_yYBQd(;xxnAEhA9*p@&RGRo+=ons4<0ItC#O^I)q8?T^IsSk&HoIVmE6|589`*A&!psxBCJ0C&E(wJC(jm(AwbLxF*xlhJ3x8 zsfO3tgF#^1_}jkXm6V98MYX|sck zf@`6iSSLUQkk|qe)`Nr?xCZ1N0?|UPRigRGa+14OGHLMQ(~TbiRIFWMwF0XN$uZE9 z82_0hzzT}|e4uukh1(22@9VTWb&cEKoalD5H;2tJNhP^DU zbpTeGI(JvX()mYUD*By>lsJfjJY4`?_p<{I~pCq2OV#?80=1hXmZQ= zrO$HMN~DNPt<*c8L^oUTCkfHVc5ji$@VvYN)|Q9%)<+?bU4erh>T}E8>MJC z&kwz3NZd4N02&SL-5B4wv}0u$zJbP#%EZ2#6j67!5`WdBvB}E}fcV&TLzOzyn?I`Jo=|up zFLwK4ksQ8aDPh1!A3#kg2HX=0fOTDJs3ETN;A;i}eCAa1k(&M3Fd8*6^<+sA zB-k&u5l^CIf%#=5Ryz4lPy{8n9c(IL&HO8B?vYZ_bdc>B_L91nrBzxfKg|KtByS3+ zPFeyL)~q8?3mB{6EMhD9qp(U(>|tL&G)*w`>k_y8G4kCr=Yb(^hz}F-9hHptsPcfh zkk`|}eBlWbk7y~)_B@#ZqOTca&M^Fr!xS6OY-dMZCRa7}M#zl?#SH2Vll`2A!NXJt zAi_Z@bthLZ4F&L2)UN=6!qZx;sbj+OV=7>c6BCUFL11^#+u%fF`jxxbj0;M=Q+uG` zOj~0pGkb&l{w}es-fd2ANXG`poW}xGG@0qA9b5W(D(v;Jb`Ae9>F~k(=+&Vaa1j}?B zR)FVC&H9C=lQvk_W3Mf8z=7?bB3NEte;ksKk&)8%x8OBc05w!32{za4Ks`#{WWe2| zPf--$vkG$l+Elb+Lgjas6ps|#s0OnQc=x0 zH6?)nD^{cK!s?BNR4i~b{8@0Pb(ji$xspJM3aCsf7{63Tt^$sbmjf1fraNY}W05wQ7+q#fN623~~JPDkHsZgfF?G1QcWz9>Ftx~!-`$aGNVmD@@+0mY^SxvMg&wxCc?}R_>is0}MpUT=H2~^5U7b9r z%ul~*lE+WUIuB0JQGbxQQz_~IG$e0xeJ}ng^FV?t<+AL3%Rxwri$=Qp_62&H<;eb4 zuQhwuTPj+1U(83f01Nl7Ksi6Imp>@(_G%qfij4!NaV6UQu>wcItz^&gFj|@noAa8Q zBA7>e_<#l0V1V%9%PARG?DMuO+LCYTHE`IDHDzjw>#Apg{7IYvHs9RFGN*BN6nMx* z_gz6vCO19F48%+HfzRX$#T+UD1X`;uaep+ZiSe_fVGAe>bL5)@hB6rIv@mgHS;`JS zsS6+W`z2l8AfU!l8hI#wq(BVz3@8BA$0TFH${vE{kp3RCOFnJQ;ZZfs(b)cIwvj)L zPADl_+zJC8F> za;>}G*N)k4_cS$?2G(SAecwLA>aotd-qjBptMyfQ z`D5UU$!=(_%EAlk*m@hz(buL49Bo6z^vY>RAs)FS)C=g?$-up&W$v7r;;Z;4L14T> zo89-%VXeOuEe=i_;PXLG`zqI1@7vRSOdM>Kjc!Tn5U<3OvDoR8ik%f#*V$zYi?_$* zUXmMK#`lKj?3jv2V3=G)DCouu1XRrk1E8DPe>s0cs5Z$R@m-w~c{-=P{;p7x__=0>H0n>BVWFCP6N11p^t)*r|+Ha6&aJD0!qH*>qZ`1sm z#OagtStZ`VNY|~RjwIpq;5G|eOwJAp`;P`^cEHICDewf74ER0CR3D9gBpU@~h*c1+ zA}cXPJ8}s?VMk^fN9vUf zv(_?Tif4CftK70d&i&=*b*$QDZtUr+04KG{rC_$7*ISXCv~w*>;@-0Yj9|P}86>hU zkVB|_2w2nFn}IFtabVL z?|(G~)jFft?j%vmXKuI{QT_$>m~~myqyKi%4@{kYYwwdT+~nK5A(T#+kvhP+9=Tj^Zj?3`ya^G>LH{at-{cAq_uPc7T|xt|3x z-cfC*=y2-lc&2s^c}~xkcH#A`;oSvFMAw4lw+5q4+Mu(fliIPgJ?WwZ%wQ8#IYPsr z&3Ag9j!`DXQeautF8I}SHR5}pw$zg=T^Q;;oQ*!e8udAO@q%klXJ&S`PFTU(7{7GZ z<6X&%dwwhCA?3B8^tjSc!|TutKpN60uOYP<^3+dz@ z&MCy~d5vbc*?!{i;n4+d|HMeF0Pr|EkSo2P!gX5l`apeY$#3w(V^j1juE$qBc1?Fg ztdq~QjD!S3S5c01_YcG0CD2X8JPoy{s!)mv59qV!L)qXxWZLPj)u@7>-?JWPeerQ= z;G{m@=<5k|RqLMkq9OOIU`aMTD8AS#E6cb17HYB}m;VnnfCb#A26rb-r2RtZ7*`;(-Ax=@Ve_{bL5= zLnxWz3ZW?EnD13(e3#SHjq!tjkd)$le)vj#ua0hWdbg2_T8@)on*+1&@>zSg(Y(bO z@>9G;Gxuq4vadqjy~6r?ml5^TaVMtvwuiYK>GiS==9S)&`*_^EM1joKciWaPBTini zuDK;=u(>DqEplno@BQU9a)ax{V6s2>VmW-y_=;Ooh> zMU<290~rkidIOH!!wckSk`ra~-F~r8O$FcC4>n3IDpHHiNm~WoV?_MspQg2y z6Qs*YJRann-zt{8?-`^dLHnYs52MpKbiXV;k_cr-_OPV3z?E9OvuJxjQiN0b-5Qw@ zLT0+snvE?gB*NH&g2a9oesDPKB%d_lj+&gOSJ7Wkl_o+ow~Zadnfj-vMwE!a1(aML zw^Fmk)xJMd=_4(!jV$`7?F5I!XNACS-{%9zo3fYIc@2wV3alBGk$AUN(SLBTb1Msz z;u)sbdNh(7B#We+kCv0|p9BT5-b(Nhz5yn`I-(M4 zgurW73Yam9Tr@%`SJ}cRGV=&bq)(&2j4V^gG@hegwuBI6N2HMc6#@uy9^eR z+xD}BMIDRU(Q`Nw+cM=JH1Sw=OOQJyJ>5wGrRHfJuGU`DvkXvK{T=g-wE1 zFIk5_#Z+H}Wq-${61cEQx~h4 z6M0|kD;952Ii3NO2ST&*=rE$?p~pFD88Lnk6^U(6L4r3er@s$h^on|fgr;XEFPdY0TH5kVD&F6J6+SVt^n^r* z6`Q6sAVysIN=Y|ct_!TT|9q;X57FXeI#D!mo_pdF5WUwX_=Qnb#QKVa_zn+g2#Tdo zT$5Q6hA%E{uZtE2#sYmSMMZhhA;i8_$W>Akjjq{)k`R5-73IvPioiTZ#y?CvF|Zpc zz)_dOEc)PRsO>jh*h&0vk!AEj>x;sA1J0c3UapMW*Hc?J=*cs_~KrUX{<9rYo zl{g-RW`)%TcONw4_{#D%u+FWb6n(~wB4ozVD$ft(+BT)dJ6Y(Q#we`2hUrk%m_9!|Oep-GC<&#S{EZhnr zB9hYXbM*?&F#5D97Hll%hVVC5z!{xk+b?vm8Zk0Ho!It#KbvM){V)>EWppJK^vg`H zta>tS2*!ROx*YCv2?#n(nicds;p}T=*ijZ}=o|VjeK^v|^kOK0Njy-4k$auM^`uAC z`QMzvpWu{NxCzV8@m}oX?^9xfo@g(^!?^Mu6gSzyeQd*L2hZ*^#IHka{DPWz#yBp2 zVGO#&l%#(MY_)Kv>6v8< z$GBl%{v$F$?-sb$1daaX+mlo+z0>f3+~t=RNij{?z<}qp*&#FJ*lA9cQGzrX)!3`G zi4DDsaNAHpSUrT>hQJ<5Vw)PARS+GZ-`W@>Zq-LCDpE?FN+QkveID#2nFCg<+4mtqqPcq28o?p@H*@jJ|N{t(#7eeuT`gm_{QTRZpGk zTW8L=WXn*~fG`vDzvUa$YIs78sJvz5iJwaux@ABtdS8LC;dwl*<)}zqs2WU5Qf%hN z%mRbO1oFoV0Q~moY~-{2mA=UpRef2QsV(oB4&^R#u)TI^9)XdV0S z?FSrnxgx>;`pvJ2aVm(je|Zvy(Iav}pyBZaYnAas;0|pt7SqtLmi^}E7&HMEc>BAS z?UwADqkK-={KvgrN2kPNi&{tG2P)ddnzh=?<*cRH~oW(BdP`U(Ue`MtG&2E8cGWb3XW+ z{}M%$V!J6PNVQjPzkv`@qs8kb7>V}OX*G`XqY+c5?0YcR*9fw06TaJROGPh2e~06{ z6<1sADD!dGVk|PB^p^DOGT%Zp!#%w5-+L(l&_@L)PzM|_&>uF zj0TZO;RoZ>@NUaVwIn*}sXu6Kyf1>_5N%@7nQdAxtr+fDKc+?uVlZ_x zb(I#&ct#+CyaIQ{k;<-)<{A3`y`1SE_X~jOi;TQc?ivmR}z1;Q(=WgG9Ewn4@sTfSBv?7V*DoFMeT_`P|;v0oQaOZRV&X@N(Yl)TCzxcK{z^L#B3)U zPXaXg-<=ewNgc)PQ_7?LHl_J#V`+&^lOAqz{M-8oNwA<66)n~bLO~}&Z%Mv3OHOin zcl3V*>=_nUblB~JAEfW`B$IltKdQDYJbp9Kan~SImy&UCBxrlVH7+%n(e-|+Jbw%0 zURTrqXvY5Fb+dtRiExg*L0cyx#T$6f*OfJ=-1eVY5ir+Ms1s@mE$S&>ZYcEs_4S+D z$AHMrBL{;R&FTKwWp0V`Kb8CcNdkXCpcJq5T{6^`RYVDFE;`#NoCXY(Mh#)1gLKd~ zO<7&9HQG1nnSZs-{0aMiuL6C%N9>1Uc(iymn~!G4%Mm(Gt**g4^+Vt7F!`V_yev(f zt4@i6Xk`1>e#!rjeF#KqT5@AQHh(*No=vq}@Y?#OT%uF3*)J2vDt$lo@7O#vvo=z{ ze;Fsx;-mdAbQ}HcEo}bdzh3cQH*_EEvs!1<7j%x#oTGeDKKl7mX9t%%yW;)N(Bic# zn+hKM$SfOafl?3)C00p>YArol@BIIJ9%8`HF@b`X76*I~yOCTgXbi8h;`otqXRjU4 z-FAaKU#nN-E9J&&;dBwdMpAuR-3*J{^z)AW=m^e$!?)aECf0vHsYkeVVZiqWSVVnHdP{1m@#ZR=e=(a&74Ay&T8W%Ia2Q#E=g?4ZENio?%enQ>&ML#(2J862%*R?u>!UbYLR-SXgO${p}Pn zRKlF+XY-=7fcIgh7V3UpEnbqTSMGnEW&Yl5f*+%{&@G8RCNYUI@xQf*{RrdP0X9uX zT3lI3knnT_1kB4~#v$ye;3%M5x<4T;9APmgJi>&ZAh{VRA@GkS+kXVC=y$>!J4`Yv zU%V5V!6h%r`>b{iRANDA{&zxx1k%r6X}RJppf6-Sh0`8&Ry+^?|B2lcEG!^9QTZGe z#O>lA8LWkfxac0ZB-{J!$K`TJOtL{8bT1XxG{ZXb4rTFQI-UPsWS|DsKfcS~BKlN< zg2dX6u;a3s>;vg~3hn<0fSZFgJ7%yO4aO_0Kox;#7Q4GYOxZs$;{Wf}ODcw%rq>ym z{WiU%HJ)ZusY~;JMU{RhCKoJBrRwUd)=2Ac)g=p8M{^!SV5qyxLVE`_16wGh5g>Q)jiTbYU3v@kh;F~g5VuM6#`pQu? z)MnWgjZ~-aNhVsP!_=;yiu9$sz2Pk&T0?#_EwfwtZ(9IRCiNep#mRPJaiBKQi4Z&m z_F$Tt%CZ&~=F1!H!|ebjHuuM?}EwuES)MZ9X^FR+I4&l2Yr zWR}^B8-^_V1nq0>d+eoc&M(pu9{pNW;r;Jg@g{-?Uf%#GXqZHFVkx4BqInC!P#-8~ zIgF)jV!%8gh&C}XHmj-R*U-7Kck%;6EVE~9Y@86f+H6Gz$6~vh-KUYS$p&ekm(lVm zDb|2tXtG1Ud@$DR#ylSgPgG@LoK3tu|3&@|QyeN5MZ#>dGKL!{p+V>d2Gjj_3MMr` zdv2T($oREz0f_ej5fMcHwg8vLbh?tu>*AEFuMf=T?UkUE_u35Y;N1DKWf_IE~>jP1lu|PsH2t+1{E5&|?!!vD(%9>sK zf_4i}^Q;jRsbQ;TYYgf-E5)^ZS@;-d%9*KKT#?6rGF{_S`a+;s_!+J1#$g-v??a#u zf1oG;*GL^DC^GK)-;oW#;-|o5gpw`Fmp;M?2oobEes1s;ff_bL6mZ)jvih5p)$2t> z;PM(Rp06D1(xcf&jrx+DX>~$hV>#%`wBnu)F|}YD2B&A{W3pn3VzRd$IJojB5;Lr? zL~%z&aGupT;*IeIDeBZxsbNeictyO_-fwKiKwp|usN0-miKH!HX87a8)|f^6$puI| z+}-2(0Cn=Oxx?yFpE)2)yXL(Y}qY4aw{MLL(;8>zvvqRI?clIwk5M z3vl~J;%HvvK-lL=Gq3)lZX1)$>8m#;htdo$)+BHQ<0Ih(0cbC~{|bAaDh?~^5Ud+kr` zG%fxQOL)vUEQlmoV~_NliZmElvU8 zKsG`5B=A~^S|8fej8>}3uxdgY7gHrgAlR)0`})=Q&S;m|pD=dW3_yyZrprxCs%l&Ni4fI@!F45D4U9Jd=WhMLqUQ^kf zld9xysVId%K2TCbJ~D&><*=$1vYQ@v@^2=y_V67cJ&ki5EM4P&sR-FcZ;HJB6yV|B zrBpL#5_C@M7R+goNuW-^Zg&ex4WavNBlY&>)3uS<-%7ec?arUNU?#7m-Iy)&L`men zY{6mL1%6med|h9%<&{h#c2v@t71YNe#<;Wuwk`XenW4+RI9{!6UEOym*n)VLEe{U0 zqqhxKF~a`^@ZKkY`hV?~*8oD0?g~FQN>|y=hi~(`!05@p3m5$`Os>o@qp$W@?Gjid z_Iy)5{&+Q|j%;pjZzamN#pK{aizKdL z&C_SDEetDS?|KxBoRBQn5m?pMHt4!9hV4rT#xbZ6`|P&3>SPyCZhrLlZl&|aYo34E zn<}@T2~(o7UHWqsT!c51N_$XEe?#KU^Gcz1+>HDGN?HJGkq72A=HVtQh_)E*`+Jf; z^7A%Ik4bn56sKwIY}BXu2)G5g5N~N_=>$VbXun}IxK2E76JhHuzzXHl-WFMWr*`@( zvS9Vq$X3Yms}qgyY53|E0zVbgIL_YoPEi|Pb7zS*Pt?l@k^t*StCJ$*>HxaUB{A%? zV*=0p3&U_D$fJZHe)`|1nu7G(^I!H~t1x~?R{ARp$9B>X*#0~1KyEeTqT=DbG|d|A zc=)7p80XLCu#z)lc|x-@T7{pZsL`gRKr8Hg+85hHbeAh(EJ&dJ3g>m_GA8 zmH5sMn2sG`pbJ#I6|4K!8y^%ilflJ^JG&mKGs8@tmQRLSrR(FaT!quey8qG5d2!LRNhsd1dh% zqL-?0$2=9Wte0{xAum5P(FC^&2x7iut&E8g6U+}J*%%|3WAV4MppYQZrde7VOXM;B z;({r~VSJYmeWBx^%wq7-Biji7Buz0qm9jjsBnHR4JE}Gvg(^Gk{Lm!D6qTOHbmvEB+A>+ozZ;P!0-% z2jVVrjdPY6!dE3FM=)^7>e`p6{L$(*zI+6(dlBo3TJaAigw4Na8<^q z@M6WJIgLQLr0D)xO84)H_Dq0nY7q2h!{Ryb#%Iz0>n%VwH{)6~m&m1aBjny+DhF2)J`?QsUNUG4wR+glO zmBMzv(q9kQ$yT~hrQ0B)luchlt%*6Q8ai89X?I?!utvKOTCLiQ?1v)T_z~-WwHy8? z(!uTqyjmADKC3sUQ*M4yU1Rjj*-B6iQK>(!Wrl0CCf0^Qwb*L4GheA3r&0cS9gk4I z66&w!mW^t1{wmJo(Eak{&&JnJfhGk`2$It_*a->V_3kAswp>7z(wfezWs|>ru{jCL?V4oQqG^X1MX23E7&cFq7}2Edd;s)UR8Pj-yf}t z##q52-4kxym*v)K9{f){1+S!?-dl!6xqfk@iYTB+3@9KmFtjL0 zGju5_t)Qd~I?^%HC82=CFoZOypn#M}OA1Jb3eq*q0Ma?c0K<9Md+S#J`#m4dw{uN`Av$a^JGW@^A*l#qFI3(frlI6*%56F=2&CZkfa zK1qB3@0+E6v-*(j+8inLGs>wiL-v)(KkUC|Os1JE;@Q>-2dV^l2s7QM@XR#$88bHI za9o3vK{Gd~-|GHo0Muy#uq2y3tA54h>!~mK5vq(?UH_bF2eNvi0nc^EB}IR@bwG!_ z*mSB06wf6<&R!?q{5$_)EqTQrJAdN0|BklA3Sbg99mAx5#scbma9)GoEU?^Px6!hl zGsuL<{h<9f;*M~hP7vHc&gSP>H!cKxu|@@1=u5T?m4UavN4q?LZmhXns%N={yc_v* z$AO0?^mHWWbX@NDpJCyPVX_E*KuQ=9!+dmHQ!mq69xS*k9W0ptDe<;4C$qYsYOj z$-^Y}4Q)}X$zOF-{z!rYe;!|n2#Kb#j7&4Gbg*vkF_}{t&(*8)2zuP~vv3oKA|2&gegXGi69If}~VtP~S*(tBal8M*-p{^`WW z!e@Z$ByKl8qwZ@lBp3c<+CPTwFZ}X=Bmf1-<9R`DI;XqF#`=7NZf?3E*N7RS#y;KR zLSxg7#)kqpwdf#PaQH%3pG7>4r55(f)|{SqZDEzgw=LZ5)BoA84Tydi$l6>8717LA zELCW>R%%mdMzf9`m{u0snuJOyWF21==qfo)-TKucQn;l#wUKRYD6+MP>lt>BZoP`t z?6rScsSF5*d2?gb3mezUTT!91>ojh#@nYW9cw64ich~9*j`D(~w4&}Wjw7To^$NO&m_a#R1gnlLB56?kL8ezi5 z9im&_=FG=*#o6T^{GxMHGjaZ-vuq!giQnkOUsa@1d~nx`38EVQ1aYnG>^e{$!^sT{ zQNJT{xj94HGL(=14R$`9<7l*{7XJioUoM0^)XkLFZ7T8w zI{cDw+OvnGXR)oKd&9n=igjzw=|J_zgp}5E8mz$_U!R|*sm9@{CqL14DAEe~(>+=1 zW%i4dEMj*p^YYrWx6h4ar+Z%`-I-4npWmgGa(Uq6h#WI>U8pdw;d<4$sPpPu0qP1S zBP~O|npWZWlKOHBEq9gcxO971Zv5*`{$(92(KoV)hiutY!c-zlIb73I$cIybBz=XN zOY*x7t`LXBoPbAedBNBgoYzLy*1u z-8;eM<**BnNtGG`fs2<>-f;(&JCb(}a#C8e`8}~VXo6k7W^kcV6R8}1sXl-2W9i(G zUtGn%WS755#IE1QOGZLU`lKypzZ!gVHoZwXSaFSBYTy%7AoZ=0WhSCP?F`5rejbC) zlt;T5#TO4tq`5T{qq!o$tF~LL{ax2SW{q!Dbk!bE@k{3O^KT@Eeo@+LdlI48F(u3Q zh6g$N-n=y2HGm4$a%ZpqUTPSW4O?juoCqTskPxOyd84f@9B~dO|L9ee!~&_Xdlh)+ zi<#vU34{KoM(Vc^Q&ORiBJ_KT{iJ4YtZz`$@3E$YSym}?elJf%q{%UBb0sg-ZCF6|?5~l*&1P6L5-Dqw(@k=J=Oo>55SypTXJPFq)b}@z^ z*+vMwhKvWe<>Ri{{}BY2v;jBTwA#8Y2b&cDfPWinBpAe0{ zi8R}lgXJZ~uo)E@ll=D^#U1G6_fV&-mu9lKj%oIMi?pChr90`AGeguf9*tv2JPq>T zA<6W)C>LLQh?Kr#1)*3v)1)XN{xqF3(-1qB%H6AnL?Ogw`O~2c?7yB~eoWe>Xkm2@ zD#A_tSOPOMQN>9CQRHW3Eq8K4Ds?%XWJ4lfJPt+or{JUl{w(NIp#jci4 z{CAytimH&;kz}jiX6|1IjEbj=l4LcSl6Isqr4pesZq$2y1#7_^M5TAp*5r!N%TvWc zUe_g@LnlAp75BVEcV}tjI*6}tv)BWssTqc6t5F_iu8LNw!y@a?LEVrT$$Wc)ocAh` z{k>nFVT09&nxBUncqE(~UIqE9Cq-AYMefj8 zi-c>)5OGxNMqk=e;wVeIa*oJKcb8Q?T0)h+F5oP6-^5MfiP-Q{)=fT%qb(j>&k@1o zb8Q^6af;F;_MS}7Y|naD+(0$exVK79zh|FY;%Ul(&%j&u*nEui&8#$!++=#fCH{+k zhFVikFF!#H6vOj%qy@WEqy9)s#LqO@8>JBIA2*^8_s!!JBOngQ_4aa#z(v^`Y z?%9tLaTldpiYW?nSHZ2X92qG;2Bjv?bnLs0n7|uD{JPwcuvEBrMUP#beg3*i3wi?b zC#ylx4eg|3PLOQg#3ZYSUtzTLy5eo8&w_#*R}y@x`>B(Jw;f--x^9Xm!CVhy)!4ne z$^~l?b_rUvw`#Xrl4U%+JLxRjmD|n{+-Vdow-rfi?K&=lDL0jX$z`cFt9OP)Fu;uW zJ-XkvgFPFQT2uAtx!<2&SLC0qmT4&)T2GGIbLl?Nkq^B*D`7tAF+L!d;=ee&Zk4La zHSoD*+79(oCV#M7b?QB_R()HNS|As3C5r~}$!Y@~vD(K|AN+>}SG}!QeHc!w(=fv_ zE=~%S^>8u@X)RdWbeA!b=r&@ML!XHy=ssLKf72wEvI<#b;eO-^hw`V!ntuMyQP_-0Gf_MxwFkFyH(@)pb_>Qrz) z{lEtDFopIKtgRJhb=6NSWc@*m9*o(Sr+FKWFI6dca5xGdfE=*>2i=8;vW>A3AlCbS z*T?hgc!PC0O3QWql!1cJ?@nlz%O;CzJUi;uQXu9cv&s38r;c{V) zfU)Y1P|#bEzq;@Cpl7Mg`ju_IV<0@j-j;q}zjwDQerV~+bTUrlHkNgg$CEy2Xa604 zVN<*j-szefMtk2QI)i<6{hsN?Kdi+uXZUYUJ$jb9jwJZVcvvX~D^zC`cm`FjUlLRp zme-4bXPx4l4nmAV12t7MifeH*vMP(WaG^9K^C8jl(^Pc4jiN|qwr4{pgRu`iTVzS2 zYxd84{CE&8Jd9GBSRU45bGdSnxZRYFpIizP;1iZL%+V^r@>(rk&~dhZqEC20_@Zm+ zDiZXjw#`EX@^YAG)de<00n zE_6o9MaaK}OU@+8n+A%x8_G?&+yzSyDT}htuju`Fms;1p8|IT@n_|J<8Grp(d;YiX z1e_+yzKdn0k%&G)8tleO+p^e{Wb{xkWZM~i)ABP3wZcn3>F<*pT?@$utl;I6nlLs8 zvj@wW?bc8+6*gVo%iNZkY7x%-OSjb&9%rT7xV=M#jNo7prr9)cD* zv=^lYgzu)z8?_EG$hby~ZuB9AT0}3FtXn$*ap?OTGVXzEos={iCZ!Z#uNAF76oegI z7c61wN;_Z*yMO6F*+3&P4uPA{O%t6As8O(Cdh~S~LM_y?tYQ*OW*M+Joa6(PNq2hi zD$7(~j%Bk^rm6*@ObpYHLE0+c5YWrP)P*JAVZ3GE74cC5X54v0^IXmc6Zp8jR4!LHqHcJ(j9qwn!&p!b&hdStOX~IR^GqDy?phlL%|}wyQb8l^%H66+c&dqK2qqW zp{|erWky|yvJJ7Uv=BNfB8+IvnLT+0vb9KcM3QiLnlP#gqT4+o z0#>>`V9mw!03mO;CY#&^AdgEf6a`OEY%Fv^!@?p#quHj z6Q#3(C&K#NVp$>4RMgZk9h!5Gv`KofmymlH1L3ef7V}Gp($=V!Z7%MM__7sjwr_FQ z!bRf^$gHf7;Dhx#r2SxPM1rXz-6#HBBgq@HGU-iu+6r<4Xi`w+eAIU0=PUe5W{>Jj_7y{_nvHVm zM=J~^kg*XJc7>aGtr%wRlBnkKQ=3xlmm>bxFN~2My9I_%@@_AcD@E9XLK#jgDfe6_ zg0J_nYrA7V)u*CpZr*URuS#rCt4bAFnWoPF-%eC5IGpg;DZuQnPFoA7dp11V!HQJT zpp-P#>%D_NXz{1}Bkm0w`W*WbtvwyVN zJEkF$(?!unp`q*aO=9h77qd|9GWdVkW{!92#15LMl8ASW4;UC0Ip4Vv%c5ymgr&D$ zQZi2vw&m!}j_`w&;XAe@&8L4KDm$>jD=bxi9VMYgraE<(%H!iXvun}|x?s_oz3z(h z+CJ0S6yZCAmK0HcIJhZwGCMPC5)9}6V9Vp#7gC5?1`}TWNr6UV8>|1dN=Y+0h_jB%ymT!StYkvZnM?J(hD)>R?)c z(CDp5`x4|v|?n#pp_gUTX>s@;ze%Q12ykbo30AF-8vX;f|JM!LO;P>@{ zc3)8IV0}L8B&!mEj7}33`ONTzc#kidHPx>AuA=km@p-fQUfU%Z-xhL?29NOEbGS}^ zmhE>TWxKrxUA^?O)9kXALt|rhM05JGGy2J&JvUh|`AVqo9rozXEATm{pTckC>fQ%DsPsK~;rO{;C^yNS`W>>ean)#bY_6Kj? zR+xWzWRCxuKmoiun{a;BZ>M_haAL2=ox&XdS{rP({XU^WQn|M;eY@1Nn(hxWuaFTv zOZ5(X3*}@DSX7@Hty|u+KUiK~Mu*pZKNr%wm)gtHlP)v5P=}GK!#taPTC-Q}zq3jy zw<%o2Qn!~%&eD;{v z;_8gc@$(j=Fe66B59?4`f#2el0Po<({;p%}MmH}5-8&K$d0Zz;K_0XrnOHUxTTAB; zu-b%vT>5j&AB?UvCZ>hDpUX%CXmE>jt!C~p7QME%*0xoLGtThFPB8wVNZFB(A#h@VZ*oxd8<)l5CvtX#zx z`b*Z&PET3XkAqoQN;hX#jHBg#TNJPoI1wY#2g^=*{E|YfHVg(EqR$kUK$=%{CN;8? zhmerC7CyVZN(qRHD;ZF5n!Buh+;87LPf88W^{-etQ3r&`9Z(mnL^>XHhmYg~s&_j? z9>JX{KFoATMzXUTd$=xlWpy>O)x&}R39mrSCClN)5S9Aeq`~N!~ zwJRF(`AfM2UK5Lni-!P(Av`7ASH_&Smy+&zdwTCv(Q#;a znEBY>0M7>Rp{J&%^i1ty0bu4dehfYKRi=O@0iDzmh}kYDpk#x)^osBSTI$HjWA4bK z^<8!KqQKpRQ}F5^9Vc)$`14G=M*`Jv>YAEZw$V+;*N|COZR>4K%1cfj+oZx%fN^wv zyb^t!Zy_54{52y`Il-)Hbj4$FZJ=V@ZzJl4`VW*BKtp9gLQ(B3LHLfG81`HB_^!kv zmMV7?fCk=2-ysvCKL+_zBcL^A=-~5;Of`8TZjJ_j2Jf{XuyoDQsZ}sxzrfL^UNh}o z-o2j&aq2A@tJCgy(4@kyY4D`lmE#|7hXJ-VUiSPP(4h!X&8ayBN-v$!C!6UzyXK7t z@E@MMp^Up>m34`9)*a+0UVLo9jV=JD_0}FoY6vmMXVNwET9(}g5{mkx*-2aD)KCA$ zAcb@%St_QFn>oXKaCllKz#CEY?2wou%Oa$H>c* zuAM%2;Ly0r^H)HaCl46wvl)R1VA7XPn%Nzwtm4_-r*~Hs{t0sf;vt{Sf~5&jjjNac zYB87sxLz-RqA0+rh!EKs&W`5#a7P`n{_wL#*NF~}|Ff)R*|Ur&L`;Jh;7Rfa=ay|a z-@PYZL1gTg)156`B2G*E!DUVx%z6O<1tMHhzR~K7Ozh@1Aqe>a4f33P40CtzxvB`J0AvXp8^j$O&=|^ zY_5w>`fR?{?DPELDV@%ft^L}~v(%rxAm)0`yH9w|9r)eiuR}A|-P=Wh3FwnA`x5*K zhm&&BxLob!yqX^&hs90O|5HaL@ZS8T!N?35Z?|3UUhfH?LbD&kzndW5m2~lQ|BLUH zX2j)1gAGx90Gn8e%PT5Enwo;f3j6nDRy}*y1i_nvO>=%La};vhFgdp`W3mTLay}ja zF{ONlLVSnbi4!b$Hk?U%cYJzxN-HyV!!iKgFPEdg0D!our>AEvW_7;~tt~xG-4vT~ zfE^%UVEgWVbMEDfA<|g3tG!Ju_|M1c>+vnmOw?!poSD-XkJ~>c>g4-7omSea%&^RW zz$MZEh9KFzXyg<4;3Js4$PbqnQokooXjywfBJ=&#{QB;NYBxg9U3x>_;z)hsde zU>JP5$I$Pa;rYq^m;~|++;E1B4`I%qa2<2!ry}9F-*wbioA^ui07_Ju4Op8M>&DrH z$HZDP0ggZgmA^oL-Qj5c;j1#APli69)=INw>^(Y-CxphfXT>YF<#55=rJElKgtf-i z-6(W||6T$~u=c3+W}lqQ!I-VNRL%G60+%Lmh3@B%I%CsCqzUUtM*(fZi0&DnG#GPJ zFJY;d;%Mf)BR!#_e#4OeQt#Q{Fsnr^StYkqt(?Chpf{=Z$^0CFfW!@TOsyYRj*zEWQcmExCN=N_w(U9fgs1y>w2HEq<$;k3Q8QTjvngWwEGAHAkgtej>>(?1aJ84#dN^3w(j*SG$Ue6vx;T6=E-xkquUb}%ADt@4{L1NXcID5|aN5I&R?y6#cP+r@nFVXaGer>!U6bzc^efM=`r z2i^|W%!)1x=DF8kn3rWFee?e!$`6wT&_|4AuT6zz!=;-hVZHv+=48rVQ4b?#;OJm* zZkwcImtI~f)((w~rpia z#ncAts?P4YxObic?>2C`O#nt{V%%UG5Xip$W@Sp% z`v5wE$%(qtA_&2T+`z`5_^{j%xW3Fyx@D7%|H((K@492C&5yfQU4Z20iw3wO1+Ee- z;@yFlX~6jgv%hCf9OQ}W*yqk1=$OX*FtxwXR-6lPoRND~A1?nUr+-Th(DYd<DI z+Al4Be2ygoQHA*I+``*m2>9c(@+6>p4h@vwI05hZqs3240t4w_#}YI0@?!osTQha$ z*z}*2Gf3e0-+-hcQcWI(fcX^`s{c=~tO1PCfKU4Wy3kSasbmv?LQZGNq=9E1WuBC{ zb^MrkK6}6>xbj_8r&~ARo1Reto~(@oEyD3`h1d^6IVnOO!v3Tw$n7oo`b@OfzuCFU zr^I4nA3nA$x>XCxe!gbrgRKRB5%mJ%^ka0PgEcXphytR?E9}>f?pq^<=in)laxZZlmo-A1Snwqi6S|MH-fD)CTjx`?awT`@r3 z$~Qo(LE4HSfPq$a_qHA^_$8roK(|iDDa)3Jd3nnSdhX(G5T-+3J55_^_dx za(|WdG1nY9JL+;9=GUY654!?Hxjwh2thhu5R6YVGCV5N+m0gOf02Qdd`wO}l_Yg5T z{zz(7IuPS0bqhy1n2dnt@&JI?6#xK<(M=*kV`*|mc*(z0Kju^+Ux1Hjmup&Z&>y)7 z;J0JBG)V`_awaX=x}Ay1<3rIciaz> ziozaZc8)MWs>>BP)iPFti>f%kPqJS?=-S?1izbn?s$4Fp0EaE^*X_%!vP^i-?MIa& zl)BQum553wbmMhsfMaO*2HH){ zmb=f7(2BvFcRYwhD>-^LuT5zmWIPq{8!!jUV)_U^Gz#4D|3R{Q%`=gr2x_fWP#6*qVt!zkml9{MMdKil@qWI{@JpRBZ6| z<)q$%`i&LIWJw{LCQ(GOkI{dAYQ zSllE~w~{c+mT^Fav_)#I@6xu?`Sj?|+M91iz$q#ww!NDukNWwZo|B?`3YW%d%-Oe^ z*6i1=GWe~QPs(jb3{uu^%rq@KdUvj69xa7D&GuU{x3@RnbM0ku+i^o?RrX7RJo>6q z&P?oY0t*1wO&#H5)|sW;DT?rPa$?dJK9p3*M~AbkHO{+7={u%ZeQXi7yA^U9$yK5F z*;PDu6L5pvoTa?N=asm$%&)d#%7%285%*1}1PO36fCjWXq~hDPhBvHtkPu%9RPb2S zo2ENz*y-_R4@zGfYVBV2Jh@))Gt)V8m@T*7_tbHB+tFrFZhzI!&tt@VuMz!3!1Igr zc4fHx0n(kE((`jjcM3&F6B<#s#M@MK*e`H?l7ItFwV@}Es?jRuxDNDa`eyegJwHE( z6kPAeTL7X+_5jTJ)k<`lbWP3PBw;NtMBvEV9C{FPbO3H0tCy%*mKz^&n;(PI%7R@c z0RrIFG9=!?LN{jDpL9>|*b6ue5`MRFg;9^C{$V13Izvx1V&MMsb0M{dwf7JR#BPZ{ zPN3)088WlA+DTFAX>H%FgWVzRqpasUC9)WSNThrkGmC`ZCau)tA=GAQh|ED~7Sw?V z>4jA}e>79!zo|+iATIPx@u2P3~6LkM~q# z#1YF&D-^0sW*Z0TJGDD)cRjz9{OeDSO>(6-;C9@R8MVh=062S+GU+n9CVI3p$;h%f z$0E1(y)V3e-fA@rJ<}2j9IWp+f*#=3QE%mT;Ig%~Tf4)(yKi4(4)+9S6(|orqu~It zO=vf#6Mzkh%KBY7@cu0<-D!WS%h787OR_um8<*#09mdkEeuqpeEC47P$G{zroiI1o zntS~ovhN{+7G|Y?>iD^v?($+ zla3bGHvA7Wmk&jK!F5*`IXJ8ERX)?L<6rAeFADhLV?zpbu}my1EGY{X#eOw%{)2nY z{MrmGht$YReshy`_dLe64;t5=OHB;0bW4hb#s5CjFj70|4HQzk1`{QvMLQNKCiXOP z-7;u%H(UmOwtfm6<$tXdajVqJA6QtP#?*7;1>Y;jXxcRm3sgZ2Mb~H4AAaaz3-Vt! z)SOh0|8Re9u6B2?>DVFP?;hwSUE8>Cwvp<-NP6RMeBm@g=F{9w3BHZq;g+adk5rkA zbRQX(^d}p8NSrLiP)jB?2CZyOSpqLn?t5bd(^E9=p~H^?9+8o|W3QZql@W8iODFc* zldE%(q4T3G#7DW;n1&%JH$;J7{>BM__wk}$TWlR%+;l4g{wx|8}GBliPw&2xjE#L!B$VgEHiT3wVonLkqpJ{N@;b%s1psHnOim`w|e76^^|o*1ZRJopyqDw_bjzUump~YamGjMhc4R z5B@RFzoZhtU*`itBmGJM==Yak#L&cqO%;GhMt4iK5m}Lr%hL3t zEp%U_4omngD}i$YRa(9+cIPhn>u9%EjYr7E2+Z|+KW~@;oE5)Ho7*2W;8Q=r`z8E@ zl|odezU&9#kiDl4R=V7n;Jtg|wwwH$8SbZp39M6x81DCiVUOlCC`5{%y zD=T8wvAEa}F+6;^H7zWY(m55{$OQ^b?02?D{l85T#71>5dYorWA3`6^dh3nX!VS1^ z0y*`0{ftmjj^>Hvm%V|X5eNh9-xwEVOX4?cPvLUiqDe6R?T-N z!)>v@Fj@F2xN~egl9l(1zLdj4uc^y@S(83GR1*`GLaDvusv9JPS!vY1hcvy5EhdyH zw-b+U>tS)BPFN3x-Hc!;R%Tuf<4hNtv8f}cQdHX4%uKon_VcH2d%63#zk9z?hLc9`tyEo`}cXT z7c_FNRTLC>t&$h#hXm@hQ_x#NRBc%LQbX^X-BKT8b-`Dbx;!Anbrl4k-?^6gny8ZN z4IPw}VKy=7;p}yj0c~bqkIA0ll0NT%(0d8mV2w#1v8|F{1I`~-^Eh@2xWE<7%%Vvx zD6c@OQ`uceJz|q^bGbRmQeCH?Kgny3-1ni%3sz1Dy-VSk=tWP45Mh_6j)5bid><9e zqe`!1shZetaN5vkj9(L-d&X@((Hhj83%!wi4SA`eg(b6ke;&W<ieo0CW;lWdtOPO<+Xc6_cKkDv$o1zEeCgi);L0p@Mi8v5rKj z1;apy=U0+>lx&$T+^f{)?%m~g3VKu;Y`vx7uUT-W^)VSH%o zmtXZdH7y*I|E9YFu7``y14}_d-3qUQB**~otj5Fe$IiILX);zesuL*gzJ0j5g%!6d zR#hzt?`5~}-T3_*v;GtlQqTyBH{#>qv(=RcDioIYLC|@9q(*#nXmjY}lcqB%#9B3{ z5m^Go{qfEBG>JQPQA&)EP@^8~6}8~Xx2+{@)a{Lx%>&*ZHWZ>1G0#K?ylwEs?^AfH zJ-phRe7kb9BEo8c!m8Ygz0NYF+{jcL5+D%`qFg@ z^_k1gA7zljy5FU`#ic@o=#+atNx6L2-z56>IPmD635RyeTxyVYcDk!z zcK#D#`%tE{pr=7G<(9&=r?{EI*bN2w>h3A^OdD6nxVgFAz2f^Y6FcT`z!ty>;C+n- zBOnFz503Z?X<2+D12zwzY`4kJIsnE|DlGckLpi2Tk$>4zu@k=yc1jGV4z|`Ov!9R%{eZu8wAZZvC`gAo=K2fPXwpMpa6T)4sy-~;z ze7f3IJ&RQ?>KTO#hardhx#UmIreCcjX%g6rIR&F;wI?G?J#FCKmj0Hx8uzp~Z>dLL zDHlfyyQr$wVW(1=?wEfX*iA{{MB)9qiZXy_`QPpweHPz)-}%&IAzETD;!WkejC_)Q zXz=2Tc_6fS?3wLz`tsJvW| z5vJ3iyvTz3h_jqO-TX>|t=w^SAAp<8zDG7HXK=MqW;C&`UqKYEz>X?L_0ZC61yMS+ zt|IgH7AWGwg8rO!A?_jF4ij(tEuXySwpeX9tj=ibmvq642+krn*%rWRne#|B zwlJ@kv+HI;;BKz=;5D-cc0EiC+`jldj@gG#d0Q=gU9Jwy`CZDi0fEtRRbP_JKS8tV zdXsE>L646!lq7#}O^Y``y0D2|Hq-B+%ZWRE_2Fm2bapt^yqrJFaS;c4>8^9=d#%3< z<@pMk!**kC#5;nN!vie^-SImZ4z9UatY&)ta;0pollscr6t}@F@O1V>D~p)uC=P<(8vc5Ut zBCv?d`302*=D{X{>6Q;(rVjuj1TT>Dqx&@t2V@T(5bXd5%Ub5OL00`ImXZF-CrF4+ zgk2*P#fK6(KrWV7EM9M4l0;MI*C>U^Qu8(&@)uracpo@q&E(=xQKT@J_Zf8=8xc44 zK>*vSaL945&D{2~n%-bPr|jb5YfWMej-FQ{=*2GK$xDtU-J*~;^xsD4LmHuaT#Hue zhm!lwAjT+PaH5@KhV|I~7-^ah|5lL-c9+RW0NL>kFPtqV=$|hNnC-j%2mEILB%S|O z0Euqm72j2FWFH)^)=o(nQh}_=GWCoNI#AW!33DuY8zYG&(hKy?I%TeJw)!v0 zVWt6pp?VG#STOK0YlIKHCzTnfB3p6Jch?5Nadxm>(5&3n<#Ph)rJf>*r!Eqr>u>O$ zHCo-^zKP{tdEp4PLA5i)h@Q`d!-O8`Hk(wLA1gKsB{1#Bhb|08`@wtP_kaw$id%TA zK1ncyL%;SIZOwmTuxA<;qoHptf8(_C5h2;&p|d1w(VVnae5JGL6$oic;_vP5}tXosSCq z__#;tl=te`&vIXboIQMUX4`8L;oZf`T4ZS;XhfueBoksVQ)9mMl6sp{da#y&vY7ZH zW8IVjg&i|!o z_5+X0`xZ59zeA#$;{8G00%s8PQ*?2=={k;q^(}oxONRsJw5 zj>JQw-|@H{i2q__k3Xm4K=M?qq7VtAGu5{isM*~*`lC`m6q)jIIxVbcj=a`aJc$Oi z2kEdk`qeh-#PN^FOx*UOgVH#=!Z!^rgFT0+^``?r z)b&daSLyp!!K3=vwaPL>_Vm@;uzB6Nd6_Mik)9lRSyULUSGg{0f!{S@l7m_=*8;W4 zG8dGdU*?Qcmlj;LuE7jc)<#Z^I4&)gFSdEi1M&-*LW(&SeOOsg2XXA%N z&`;-44t}$h>}$D;ueuc>$4Ldi$LO^y|9Y~&h}=*8om1r|3$K%bbX-HuIFwWOp?2<& znt`&8vKv)`4<|Pl%2`CaRw|o?6Y_@m1|E5*t-txk#!RZ1B`>GYLWJh!FS@q({iI*U zd%CP3J3b#>5sWRC*d#WvQY$E#&QLUtvEzM1g zpATdop=z#mf5hDCGa4ruao_T?Ki(DE&U8or<_jCFxf{UyZJ{xAWu2MCr~D68bXeJp zth0z#KPx<@GXKgZr5Jton_~<$wIEP?V}ivz&vdno4dzzlbaOHWq#7Cn%FU}$W;5{U zf15?>_mqtx!?~jTjY@v#8xEC1cIeQaKIn;ihg}pDVroM^ZJluz2hHjB(Rk|IJN8Bh zch&D(_90WYv1>g0Ai1GR%N;`Cv}Qr#I{j^#XSXIdL}i}GZ>;A^*PR+ngr>gwvN zLS9i|@5EpS1;z4N2RDZW=c*KY zaO8Dd`Qp)Z7usgJ*cl$t+EJ`yIOmoBj9G}YD&~|zpWfCK&1x9omNCGC1peysC`|qD;tU*}h&y3<`rb$JTk&lQuhzcnV^82vtVdvzKLO7FVNacr6)&JviRf@NVGYj zjpwD_h9&;mWka_e&DF#(6E;8pjIqSrk;BFwCb#cQEDqgC%Acw9Rmd(3h?hlP&3yfm z9}?#JZY8uZ69lg>i{wj0DwzixPTx_Rb>;sErN+n8 zbZOUe^vKQ<){vU=8_EWs>#C)E4|jq5-%{SEVcnpuMEp%(50l*T71pQpws3fM`_m~! zA02f~D?`d^eJWw^f=K$7q9RU#mq86|?lOgh^ z&7786RKZo{o^2D4cL!UU+U+6M73b|$&$wZn($>Escclj|#Pz@KAL2BG_{YT7n9wva zpaZvFIx&CL#W+MD(pfS)=-U!OcVnC9F*k(RW2Y6VzZUCZEZC76VufS$~-;> zISqd?-v4 zz_v-GdZY9)-@G5@2;8AXH&up|m|l)GSwL)UX8A84+`EZ0y0D{qQGHfLOC77dKrLee z_iJ_nbBztsr(jauM(I)4`?;fp4_hWMNoHk)YuW_aI{3LJ)$HWaD{8MdvWb04JN!@m zHeCSA@C8(_NXjLgu9fCF*$@>Cq8?+Xa$(}WZ4vE98rorK^|sDMH;7n4nboW z9ubiu48A8z@{#m zDV9E7S7}!4=||jzUXokpQ8NY-&uiaiYIa_)4z^mcK)Lznuxj2b*4pHAUEJ^mDkT51L+_hU>4ddMDhug>$I^4NZ8x@}M-jg#RoUD?4 z&XQ~3&#o^Jdkn8E<-V;_4Wn=K+&T|Sa!bmRoy#-*{+_@f0D5u|*1c!bTJzO!2kT)N zv#)Mh^eD2W2n^s|yzgw`@rjRrgV#z0(qqKWr$Bp!C0wJcFuEDH>~1KcJmNGv^}5cp zSfI+&*|B4sna^`&9x-5mVLGB8oM!quSDNqaOqj@^0N_L4Iuxq$o4b%FqN6@O&TP~w zF2#0ffiX6k6#BMlYa*e4vvJCV29f$$)fA|P*|D8*=RUYel%zTsxFa+N;TGUtxlb{^ zUpFS45%;OQt`@1`J)vZsBkBb07;vdS9y) z<4S3Oyur5i=e=LIq2-BeHKyLA^h6Q`JE9b)w_M&>X_2LaINDB7OjDf+cVS%)P}3B+ zh<7X8-!?%^jEr0YPKJ&qnbbr>eWBS|lsndPg$0h>{FXVr*EKh;rc;}erwBe=#q4iq z;$rRYGV$#|;a$%?w%IY9N0y@^NHH5*EoSlg3_%x%0D_S%K%6qdu4&D02GA*)ELF-~ ze?RDG%EF(PiPHvhYUpJXf8wDpF=gwg#%yBZuTwb^Ie0JS%RW?O)Hm9~fczl%Wk+Ia z2|LNl*smH~mJ46_g4NaHu`8PoD6nmn`62kWRP0=8O+<=Hbx!6M2BGbpk+IIMewh+v z8~gc*klIT>3WgMmrzTe2A96CSmQrNs z!OJ+mjP9HG+x{*(LWA>yV#FHF2JJ7O*Q(A^CZ=6uFehalM@)SR*Vd z<|=k{Qh-;Pf26#E8rrw-^nNpYgW_g7hc=cXhV#`EPqaI|)T_V`-8VP1ASAf~UBFQg z>&@{YiOXU(<0(x^!tmKvU~77Cg`k+gW#SOb4I$Or+d-jyqn$YVYK?KbDXvZ*~1JU~-irtU-ac zpIy;T_FmB0;w3^3;-defIWi%3L1kt2Yi4dcac#W3O|IaODV~=fJvgt>Gb+#wD}ooI z!54a6yB$I75xx?1oT76hC-nn~b5W*Z@%u}|!kIbHJ`OcCs*RF#K^JoPd3cXqg(vIF zXX6|(p_zrVw+82pA=y@1!Rozl2TZDy-xFetRlxDDBK;zIeQauRs`RiDv5~NF=}zg^ zusZ{(KsKZM=wzkOZzf=B3p9oFCQPYFGBNeT6!XvQ=|Tb!R?W`H6E~6EY_}Ien|aq! zX{?-Pmq}nw$l$h>t!XVUN0PrQ zNW*lD;La8yRF?CaDTZY?^-ka5)J{Y!kFl|Q5qV(U)dqD=8H%poxL~`XLr1{!%s2M% zKy33n#=Xi9hVL{>dcE;miuq%6buQ#qo@`O z>sRr*m+Lm(pL^;}foEoUp3|Or3---uvfjWm2CiC~&(|1&vv(Nqu~z!<%_rP+R?Jxf zf9H!-_n2qj3FmL>?A}13HWJI;IPPwZHOA#g|nlnYol{ zbw)SB(Dv=PME^}rXHID${`Dk8x7631Xh2X__o{VXOzc)`Fis#DTY@L^&H0uXuxFeMEoBbkSf1?iFZEW5xOo|K3F-7 z7JDjiYRV$!;vKJMg)kEAO)@GMHa$1@ot9J_4^=VgQ`K`DH+-UtX7#1+Ebx6n$SkL` z(IM~LAH%+_imi4TfT_l|DWvPD^WXn}*n97HIJ>TWJVY2Zqa;f7E;=Db@1oa4?=3nb zBx3d`xnO?2d>$B?N!co zuC>=ybK{mer|yHCJh&1!Y}n|QsM#_#ASV|jAl zYV%%RT8iyEX+N=;wc%i%cgdgR5)ADR0!-Mw=>eVbF{=K30q1XdsW%ab*;FI1-nX>d zF2_YT;!3iC>J`(lW_fW0n;Zv;Yjz7U2L&sX(=L+!INoA)yp|#gKap_eNqSK?%kw%J zK|-nnMI8-g@R<{1QB5I|T$Do`8;Y8GSuQvdFjZTM6q2zO(hZd!NR7)(L>qGe+< zDAeyWh4&l7WB7e{Rmd(HH&aBUYwz1VhcwDMn7lB26(>9Mpt`-*Zx&YJJ+-A2T+@RT zeF~J10Gi0_|I$Q2Po(8K(~kvAisfw%QjK6{D$WH3%K41pl+hyd6q6A%x)^AR|wD-q2IA6~n0%ny~`X3$( z3fB5LeIj=s&*}Jf1C_0IkSjY@y?BKEt?(<(d(Zz+_^4h2S?}(VXbPO^y_}*$TjY(t z#_0#Z>RmF%(m;4vmK&Vm(>-ilBJ2hgljM@)i4?Zv$%>OcsD`{EYRGi3+0X$&!}v1^ z>~DCfTf2TZotfh2TYFH};7Z2vXge^F{Lu>~GXEv5b+)e7)+cZ8yo&#%*kaFZeG8UU z_@1oOsvzB>(vOQ%Dr5LvEwh8p=0}P%d-QK!;Aei=mA~r>bNIBzDscBe+mo3Rw%28= z(h?t4b&sT$=ial}1eI)ZA0>;Y+7__m7mo^L8^~`jZM_tICQ-{~hz-4<`=SXnXpDy* z2b}xYPDei+p-(Nd(;O&%aZq+*mqkCK7Q{3prq(0w&z|>w@SWQ+SMM z-@1@6Ay$)PLh|qYCd<3?-|+rHmP5jSmxFGk6=w~o4fL-a#_~QER?n6#&Ac|U(8lE@ zNN=FFWcTZ*Y(NqvIZwsTI_%b6A9$Gw-n|tO^mp5YP^+(*UqXN0H|{81q8vN^I(4k; zKC2`(hv7CtZ&zdm((~GHYDuFnvRl(oTUyHlrJs(lfHqd|JMWKPK21ZNuw1${$EPSY z3Hr8iE4pL!7!)HMvho#MZu0dAC3!Rwl7H0xy8BI!tU!h&EaAft(_@B074!VM9@$zZ zzm`q9x=dAzt>Iv7ux+~UivN;{%hXvr&CYBh5V$U6eT^@ZUrWq*N%)z!Gbye#t8gc+ z8#Kh*vuFJ5u(~-t$0fd%kbf?F;#xQIE_rEWI#K_q)Dih{x~*)gWU@d`y87{kLv=## zl%p`$so2S#y++}i=K}Ffp%(yiHM=kR_}$-uH-N$E!aI41KgzKd4FyuzCjSiSxju_2 zfm&Ch+1JbDwu;>9EY)4)1dLbwoA;?c4QaLf0|#cKP005=s)`(h8vbCfRSQ3FO|?2b z>ij7ExNLu8^Kr!zSFSVEds1w@^yy?~PCR-?4LI@8{0`pBx!G)%eDT{gS|53atyB%X zH3;lnytIp#vs^IKS4%YGt7DJa))2gOs9hkvYSXCW_Rz?=Q|(Z$XnsaO>)!Lu9|G`q z!D2&2yRo*GGHseQzQSdvJ4(P8$>z84UM`k-3gLp%-Zkzf{#sX7{)N_02El##Huv_~ z*oJ{-ml@bNmvy#wFjRXz^Lv3u<+|r>#L>*=ty?*2D2kNG;P{Jj&5G`vBd@{H?_=1H zf)~D}0`@4ld>bpLT%I*lr(r{qbpnWa)nM5m(50z=J$>c1IYTTq`GM zda3V)sM_HSeVNa&`Y-zNx-C_yAEGx;N75zt3|9nxG-Tkxep+9cv3g&=paZ1wT;V{k zwExrZO`>=%`YXjzM)Fsf#i7cq#j=OjcmQO*f=5EZ2_d^qPO0{|3_PZjjba}H@F`M( zVIAs3a;PlJY&5JAq_L?md5;!ZuJ%ZIFsW#>xLwB@>+F75RX?Z8Xb7ny!OA^cgd#tt;6iL>-F{TL3Tz>keVv!z^NX?PP- ziYCG#5wUZ2c1~ENuRF~t;7;xpNTN{WVB3}yVP%aG?Yy6*gf>&yW`=Tpr64CKB&Suj zBEryTKi*iP-yuW345aO+K{>%V3^eFel7QsOv$b^>EO{I1I_}a&wq){n(0P}hqcn@f zEykrK`D$|i9@}QpnSM-`DM~dMkGw-u;_VByDzc$24rs5!(!h1`7JFwBci-J93-j3+ zz)6J2x)}pA8vd4sFWKOEzW!Ruc@vBe0^#*dPB!0)&J_h+_1}X-B8Nw${K#^+3u^Fm z<03&(6BEdEN)`n+Q7Y)@xUMT9{yp9>6tgoCraH29H<+g|Jgw5l zrA&aeOnFo%mFFJ&rl@*~D8HW!(Ys(*i_L*>X`Zu|xkC^4-AQGHQ!3vuv%}b(p?jr8 zQqYHWXPzxnVGtFNFcUKVDr?zZTU)ks+#S0{0Zxm2r43hNny1R+-G@B-N@Sb87Q;#whaWN^E=yy`t0U@5gj+HJTaB z6HcN4)=Q!XMMOqY;4RJVB?cZ7e)jPW3$xkMWLHKXu6Fw;CendKK^tmVDys8GmDfA5a1TV0@Iw$R*iZ( zOgpUxUkGbkD3>z}Ydlc)G|EpQ!n7OoP?oYQt#AoKCOva06vheE_3er_$GTXRhWppG z6LcTGo1XOB5ESkoiw#KBBFsjq45V4O;y=u2^M8jB$WUK3Vp4vf&atUriT;^=$bR=| z^YtFDe~M5SLD}IkZuzFc(bj8!RGMd zjek3Y3zTKsWrwg$cpOpKvqv3GG8_)Nr*4V9ojrhyM&0hvhZZw(y!3yQrpF8&%?(Dd zIGF8x-hCB6CXlu(KGNoxK5V8yW5V`pke)JA?6f$4CiXT(wZiCEW9wq9y8Qb%MN{i@FOoxS#y=wnQGb4f^EUwq5& zE8Sy>pHwcVA9rza@Q9zH=4#WhgWP*n%(9&NU{j&lJG2x$N#8*!qcO4NYOh$;C8pa3 zR5lpY@?Kc8r9?g+7xLWrS~7me^M$394_M%=m1|Hw7-5p0DxGa!tj?~77TD&{bh!R8 zl0o{Mf37)tJlw1J@~V7UX-rTG-DPX~iZ!kF%+qcQIC1Ci5YOpd8(i zu6H#|Nw!%9T-`UP>`j7mCH8Ucq>CqxX=WuFOqT2 z*6eG=x7+9Pa4>p2|IEoMfSuRv-mm8sLxU!24VAg-1MlV5Pj|w+ly5n8HEi6H72FJD!m9}M$J1km_GpGU#5+`qX-s0 zIbNp4YC%{9Zd(&Rf9b|pQ^_;8)FGfALG$?o$K-A+PjZt~w{o&T>#hDUak1 zNAKSL%7N?ghTWDdzJuESof;)h=Q+7JH)i8D^A?HB4j_Wt=61rDwc3F*a zfHHx1AzJ4xFDmgTG4=QUr^vVA+^cR~Y$PV)iUq~;Xxy**?6q=g%vu*PqV!|kptYgG zA?Twy4J4HFRgb4kC0izSdZ?9)a$pjh8bIHQHFQj7zXtMd*t$qi!IYJvQ)CuKN}j59Y!8%_$N*^zp?xFy8j;+MP4WaHLS_RHRk;U zNY=#%>wQB7MQ=S;)|+#Tw)NFAA(TThb0NV4@9ea&P^$-Dq3zGE+h!4`MX&bf>cl*1rgw+zf{-wC^Zd;A1!K~w|f04%xX=qhZ}s8_I-Etu}8-wEsnlr*-xOHQ_IFD zf`2+K{H{dC8OufB*I_lDi9Ocj^BHR4AH^<^{lHR(db_A;V}hA13XSM zf62O1Mwm8dC|2UhTS3nft5J(aE?=A7YQZD*ywKf*oX`syzYB$)>aXqeAuASJU0L~9 zAaWRFo-}X3gDnZ!*4*(~m>n zRgEO&N{2#WZlokTks9*jWXFIK+EZQT!T|W_6h+u1w zAkOcGh{t~;p8nIi0rbZh9CP$t%?4fzHwH(jmAq#j zDp{p16}*A63UN0JTj3eJ=$XN8zUv;oF~|}vo$xkGY_M4qsG^_6v1GK1{g_U-bK(~WHlRRtt>C^4MU4YDhE(Y5x~NE ze=l6oCi*fI_&qUZYcki(P=Ul@j$m#$3CP7-*T-qu2b+BfAPPZ*so3W`6y&?nJmLBj zC2PYmS;OJnuVkdmycrD5(FPU%jAM%4MnQw91dGD!9aXvW6kH=J^k{NEgzpQZkx9B* zO_B8R&o2$uHl8FK8dNZL*nLVCLaBwSNMP0v?mdvBmJq(_H*KaFt%BDefI>`?`KWCe z=M>Aa16}A;5M#ng&%eii01DLJ4F`H?;K=NhPyDeI{yXnOfuTbBFVlt< zH583xI(1?N!&6Vkl@FDz9OkqE7_KPAy0L2GCQXz0BbND4M-im2=sq*2Lk;zI;6r8w zsTZi{DfcQ(=A*9)q-Mi)Yk~Jqo|6H;tjxwnTFeH+f}59qnRX|psYmf~~)(oaihwDr1HE%8uW52aw^?!_iE zyg-psd;_^7e`SU&AE8nywET!!`it37)hFo@eN0nDTRUovgm`v1`~3EOT}#58m)23# ze&dm18Nrp7a^qrarGqwB$#w&?aFd`xPU8`{G-Jl;PsH7aEelgyVmWzSgci|2f3--M z`(;qYUjG^oOchJz(9dpy?6nVw2g=H0DjRrHMU0kF{=H!5qK(Iji zs>7M@yStzBBD%`^G+&T&bL8CSz2-*HfGW%_!n?uhY$rKV|B{&X#R+N>#xzJkX~@aN zapZLuxWeTKcw&L8!2!bNGEvrn0GZ$eRE(l^xl!0%(Tqe#=fQ zM7uL$`;!7NNIN*{hFcZG-Q8NA?04edBpbt%IcgU~mWKncB@2gF2Vh0@`=e%sM!?&SYGE=&5RB(*}n%%brwaayhM*`+_ zP)g{am>Iy=?%f(tic-Q;L1qW8WH7LWgjiA8uZgQzv4w3fGxp>Y=U zV#~*0C_|?6yYH7;ilV;Qw>vQ`+)ssYmLg4cZ@I|UAq>qs12_w(x8zCQKJsCRD}0Jf zNrf0-gVxSv`FoitBv=5{O{hTow}G3#v4hPFNEyXU1HPITdt;AUMF&~yz(5X$`{qhR zxR_3P;?Pr>qY5jan^3+JP0K+GdftP9Tz1YgZ0HaMJrwRTn-m&vUy^IV6T)zS`h0I? zARwbx9k2`?#Sxl6UFf|l$&uA@RKrw33VKky4q-2kIdbv}OMe}U$pOq607v;|%brfU zOm#pJFgm-^K>wj;{yRsK$kG#aEZ-!X@JMC@B|%QnG?$lIW^UOTxIrSbd(Ovh8ybTB zX{EU;0gw;t%erk>vVokQwygW0V)H>rV)qfJ{l^|_uS3i_|K?`*tfEl^ ztMvw7;k4PHgdJr*hKq?J;ou4N^)34239+rEw)kgEF4pBI4{$Sq8fyn1>(xJ(!5Kar zrY@uqIK24)C2TOM{$uz{Jr)m4$umUwA~pgZ_r}DXnMwo2d?!|$ zgo2=K)J*hqS*Q;EY!yZ`t_o&ZN)x=yX@D+edSOQ)JnQBKoM#3 zSt2Nd0T`fdn?`lUS&cP2ii~{joZyqz;H91sn}M<;Av-KRPKgx9BV*nA-JGgmyW$Gz9imp)XV*@tX8pYNK61Hf z$((_878C|#ghS{XOo5QqdT*5-04-dF4hffNH{gOOd)P5|M>t#|C5@ZMM@VKlbttTy zi%i%s#ja{3-KzKa@T$I1qGW>N>@CPN3LQuk9nM0AAmW!BN1&1OUA?Rc%{?jzFk9)?m;^COgR zn|gXaZN3<{h3)=isU4s`tn$D1!QWdiF;rM*mSBHkBE^y;ycM>4$dW;`sX5}eZC(N( zaazFLj9qC2W+nfPic9LPq_80%7P(3;G(zSy77`a>wdt+Rm%?%wUONm%VimQX(lfI1 zC4mFFxD`tnAHN*5zdJemB&p5;#Oiop-q_kYpKNfX>@HN-N)kA+X#kXd8#x+pAB`?0 zD{>@q;-t=xR~=@4dx;fwn((D*T#U|@Nb)wk zJ8RobwV94-C3<>qRRoZACXPeXAF{sB5ymZ#r^iXF*T}lYj+ut#q!4h3S3X54D=WQY zy$z=0aHmKtFK?%7+vcJbM%%eeznn<4=y3jld>bkBxq%ZZr0d_O?CqT&d6J^AxDFBI zgW=r{Xl#p-uNdm!R99K1;Gylw)EJKyPU7XrYYd8!OV1X1RW@M!WHzvhDTG&``u26q zfMeMd4PuRkZ69bI1Maup24q{+=fw3#1gTP9EkB;0w>c&1D=t=7kNkS8mX*0urjZqj zdmtS5;svSy7;|vr^7FcLx=aQxju4tUMzQDL3yh<{%aOS<)9qqIDLgGz_#cvk$Jtm* zib0+B8Yn$Ebt*;|J2Ej{hwF(j@}{1|ahImmArsdh2;m2?S;><^YX?KivRZ8Go%QRR zb4{qcQ&4(>olfc_yDpbGy#~(f7rbhXff>Ce03Z>V7<%x>nt3&^;3TWX2D0?}se?;l zdaDfu$Qg<=J#=&Wu0nm3-OyKUS#K!FMf`5I(q=ebobqj!z9%CLE{3Pa6{@qDYDJB*fQ6L$8;Y&# zVpZA^n1SSj=}ZPyj^Zz7OPaDX5;x4(Vy7N%Yj&yNa=cd{8#i&K@@98@y z94U=6+Yg(cViZ3=x7UeHnV7gZYfq*}#dB9Y+`Q)=SLUK4COF&P@D}TOO45-UFWe}w zDRLxeZ&}kebv@=bN@1;ju?%ad8^m~R?1w~*nk<1`N2$>A9c$n!?ph)*&^Xs&=ivW$ zdjq;~>gn*FxOfu2Ou4o}?0Q!x3KtsAz`y(Qvpy4^vi}QkPfwyrwiUaUCEm~zF{{k) zBalun4H&}SgyZpH=*upW?C#)cO2rtp1dUZxa!JbFg!}g1n+=s|2sVYr47#A64W(~- ztVR^6-|rQoK5Ox)4rfbNz7cyN_uO)Y%O|!d&>}(DZP`{K_%M1*;-kdAi-*`0#lvJ< zTCO5sg2q2WAse7iU!BZB2OVX*rG_R_xarNz@KCAL;jyMNuI4L&5MNa*W`QS5agR}2 zkTM2HY+UT~dsJ&F=c{3Lf`S1C0mF$eZ!(!Mfmgp28VH0E}q#98_K|X7EYRvg@+qOn*}L2~Y{S;O9OoX}^j zPiy*$kzR?F)Szh{V7^3=y8ZMMA!jS1uDdG?%K76QJNJGexUM@Z6Rs8KGabY~>mEw4 z_I0iMB#|97Yhs^Ce_LNkcGfCUeI~2A>4_ukxQIVs-lMrGa(E$kj^VE04!bwxx5FiG zwEV;2KBrjya65`LKo7TY*|s0^Yic?7%Hr%SCfC}z{j{4BS5V|m>~oB3VFb+$ z4{~#U`?WBhw={Y}6wy*<;0AE(q~Kyvgt5P-Dw==(dN$nfE3Zv%TBY z(Q%{hYb-nxX-;$;gL@95LG4~$cfCfXCy(>jkJV~oa4cAQoS0pN+a^yzb#gH&LUt2t zF?l#F=bF?XEQ!ly=``jtHy7x(kftQHgI2}3wYw!XHEca7lFq|AkLAq`F6Ey+`sH%( zUEucQw`V2r#x8hMp2*>ieY=jmPkX6DThcp@Ot@9jvWcJR8(`oV*`wfBOI%~#wY@%g zBv4qIbrnl0a1wK4Cs&RBufvXFev zme|=bZ`eQX5cYdS#P+;ZV>orgka4VK$;GmIETl2-ViO&D6af$LD6N(j*g8*_^9pIr zu|N7wvOqJU9M(TvL9znP4{+?z36BI8LH5xn=f4a1y%g{0YoO~QX~S1ou13JEb&r~M z`PjxBmOb5oK|LhC^O=r}M&qGfbt|g~S&5E}Az-{u#>{N)SV!(zm!DqjwDJeNr#~?TU0Zl+mgQDMEfAH5a{O3Sc$BDj4XOQTN@G%QL zy!xz2Tz1TMl3(%6;ufq-%StOPWWIYh6+IHbnCgfMTyA<{ENks#)dNz1-qFnXFljnj zQNueYJQWBH^5C*UTlHt~#PeKaJIkQ4V7~J+rSL*re2CJ9w9bFe{`Z&Cn67^0==Kk; zX^vP8Gk=^V@`%R^%j8ksTS>wQ%_o6?yhZdrhxefhB@xgwdc#NbEm9S$9jRCg3#ru924E_mZjI%hYoJ+u=s1! z5~OD@LYHSVce0$#k$&iI0bU{HUS{XmmqUNofiPTW`PIXXG4j`lG0El(N73TZpRsC< zFvnaxFm{Vo3P1quPKEbKtpwgR>^O$N;DyyBB3VakCRQGw7F0KzBcXG1w7I?&?yn}K zq;^v6=kqo;;D@C{lqkdp1xLrT6Q1q54dnFZCWAHE zVGJ%Yh0w4mxcc4gzZp;%Zv4&m;4_U5(zt?fd6q~T)hUP76c%k#z%|^PW{%Kvk~Dbs zt#o<0*(!oowbt8>a%eNH$do%%Uo=y|5=MR}(=I_j|-YC@k#)cy( zD4Mu}BZ<+eQ}kLL5uTeB$PLLMG7$u<8p zIJj9%+S!>y9_rw2(0_A#ExpL{p%lg`OL0Y{nCC^D2W|VeJnT$q+#w5laV0FguGXW(Vd_u?*h9%f_d!wl`mLxvvTr+KxJnV#SlCWheiF z2m!RMHN*63YVagkA;{4|Z#r72y39g#PpIxK@%dcAGi;+w+5}?CowV;Z9_N z=_GT^dJ_p*G5z+4IHGlKwG`hRVd>G9)SKmR zl}Ncknoo4gmA=$BHZs;>yJ-7jB~wadsiT=oq8ZIYY@i-l`jPqcANsj-TIMsiRuReE zVymkUskyvuyy_#ue zoRJwaP!erAQyeX0>YTez>g_!uZZw9xj(vGO-6Sk?3?X&g`CA|Y zoY75YJ+)dy48hztHZP@@vhD555AG|$UWy@lV+x!eKMw4?Pb#!6-j{;*fPGF6S0@#n z_i`?SAz9ut->EFr3pvpu)por?p_XA2RqLzuDKUW2Q0%EGmN?Vjo%0m0G>xQNpeo7Q^tJTlX zH&m-Zh=aqlb{v{DNzJxpgVa#q0@Tlzak}yMgOabDBo$-3XHInbU(r#_U}!03yi1oo zRnDR`>5Z-D4@Sj5{4^Un4xTxg&f^o^sm17?KKkdA?u`r&Ak)IEQV0GtwD*uXjVPU=(Os(3GfcY z;G74`xUFW$ybb+O{*0&O?Kl7AX1IrBE|C%yDU<5Vt3EO8_Qzu%*?jg&b^5D z4x)XDUz;A%d{7)tDSG^Uwj_{*Wq5gO7Hpu^e*+h+NZOZ37l9J_Ot9!$kgA~Wp+p+v z`lT8d;s5j_kQ(3kzCIgcitANpJvO$uOdo?lOERzE*4XMv{;bWCYRzv5RC4 zZ;;$Cmy3K(a@S<jq{YQFyp9e75cj}|oHIfeCIsTRFTfeYRq4-Clnu+0-Z;bJe8 z&iG2nW`7tyIIRy6ds0zI(H4AAV-|di+M3epDobCb8CP5KOtNNL2Ia=l%jMSlk!*^k z?$+)CvMo#7YUdzdZmGpwM@eIw4}c`&Z?*6 zD71=d3^D_ClTSN$%s^bi5oI%b0u1!KEZp z$lHWW&{1aA716D%llG-D8P8=|`keFT#l=Qw{!-$pR>#v$uiC00tB8djk=iL8@oC>_{td8dM-?Lhi+YWfnEqkVI>`^~J z`ENhWT$D7GyOp%t<`uN0;MjVuav(g@YA7HWkpnlG#5hg`%6l%0I`>2(1K+pGT*G+5 zIst@aQPvv@Z0LNX{W2z!Ai$-i1FO=DH|*~!#V*#fjlF`4Q)Tl9om_-s82x%KVEL9S zt##277=Nzq!Z&FD*%nMwO^BSi|9Pmt+;W>^+j--X=hz0k&9P;y$?>~Yco7i*i|>zK zT#jI(obueM(_O1!9vyiQ!R}mF505u^lI~m-MQ$LYJIz%$S?c1td<7K{;_dK{ro~`~=H>ByU$|XyZ4{zf4 zK0Dl?Xnn7t7)EkU1U#I~0+}kXr97_%aK%d{Z5`M*#TPtZ2WST>u(#clpUW z`;!WsYejOddJ6C5C6bGjo=i2r9>q%9MJbwu;Q-RRrEl>S zh9)PM>$&=gHX)9d04&{B1L>=gYI5g<@Z|XWN}04~ubWyg%929AxSb@2m`tAf{h}jt zG%t@tH>sk#P05@V#9MXe_G?lx$8L0Ke&lH$e!<$oCm~1yMR@MWNezFuMxOZWPyLM4 ze_r{gM}|xf5Z@f=48U2{$_2d-GXrnkClzxAy%jIb&87Q+@GgvIqKyUT36ZZMgkqDP zC50zBN>)0G&zIXOlWL13v5ec;t&r$(r-EbjRmKB!A1Z^Cf%Yt3o-$bfE2cyOh7)+N1(T= zpXy*vw)r(Rl55WjB0R+2XXCSpDOV_fZSg3>l|yC%P1LR!UVTpYvtCAtL$yfgpqoGc zuvB3((0zTL&H*7G^FaXffa9cuKT4dovi?OyhUrcXj;`e#KIg(FcJ(txMs;Ct0qXX( zB-%i2(fOnXJ>^WQtw!Oa?BR7%hOEzfjr1q?{;*F6oB8%UkX!C58_d_B;D*q!i~YcH&+QhaIJ5&`cSP8;lLlSVFxUA$aiq0U($|Bf(zG^n@=|X;+k2lf zMMQTiH}131yS{O`N82*#^2TH)sRCb8-*p9mEs}S)n|jneG5;?FRyUt08-0BX^rTTcU%nyd25mh;zpBLxOy8#g8Q!a;j~gpe(T0( zOfaj!dH+c2e5RlTpJ;};|CyQUMQKT(`2*5K>>Ji6g=D34R?XLyofy#ekFK*BZz~t* zTW^%#lZ7@D?PO=o>C0e3NOea`=T*H$EjQf|giw&td?G8&55?@=9E^dLbVJ$hoUtKs zoH#}IH@gtDSFXL*LUq-~OsnJh7pX;|i3{#Ki=D5Zo7a>wT2B#r)NUnS8I7Son3j01O=-Dct9K% zR#Wp8Xj_>GRE(yBA(&(A30*-lG&n~2=P5X(YG_jVhm=FpKtBLkA4y;GSEZyK*`-pW z(`;D%XULtF;LRw`g=KeZe6^MYm+}Ey7sH*JaFTEx&#}fP4_Z)5k&ml+`Qw@>v92u% zcTnXQf+;)=-BG)s8vBwbH8tdT9}w zN(cg70f9ggAeslp0Z%+HUhk3k09@N}d7Hz5kX;|W6()ndRdfdG|0)5k4#A^vFAF|# zo>Z1d#UTwR#R$#3b`5fUs965NlkIzXp=$8?_hOQWWD-a27<**5gB9A5KvFTG<4Mb9Lv>>gCo>i+0T`@wB{dA=MnZBH#gEl@t=A@V}|Lo3j z1&%g<4@|z&mXOWHHB0Giy0OYM&$D$S9@_N6y)0L3e&M!k*A)6|wnyO)I8Yq%ySVcM zAe+|VR1#VMXC=ha#(RkdayGKDw=aqU-AVMeajS~Pg8*;;A_$6v)4Qa6R>J3`mG$Op zI~VdAt5DD)4RT7%-l$M`_s9cA#9H>SA4a0tC=0xVM|kU<(W2r?X`9mw8c3992SU?K z$41_lguZ#aJebCUSZ+M>Z!~+Fp$#4|TA6bk-E>>*cyT)f8gvqKx*KB()4Q$dXo(Lo z9J`}#eB$?yV#Vo;^B-L;hz9xH0gX+^c{|W)fhB}C;2R{0o9XH20($?^_bBQs3EP&+5Vsc%gRi^t>-Vjr(%-HC~VxUBZzJOnT@Fv+*K?SDOv1!T{ zlNs@nX*d4_Gf$V)plR+OZ97po$G7mG!JLRa|eKF!WT%vJaDkECn)6F2D5eNcd7cQ`ZkKNdQ1 zX|w8tJ2qd8`JtCv;BLfYuQwT{8TT9i0pkqe(o(&Dwoo7D;c>;siSp{IxplYBi82D4T*bz#xgUs@m$te6Pu8b8tLA< z{X~7)!5?-l@{dTovbH2o#xT|E=~GW#PHlURL~`f+^Qo;DA+=!&lkGoeYcDN;Rdx^4^h5AzLxIXMh8&3OGRqZaOL&e3# zUnyWtD(Sz9c0FUP2*c&#w@LE-_$uQM)lBK#7|Y~ha{@hl;-rHQSrSo6Qlc?b%K7SH zVR;Vlypx6n&KUJs8SR9}*pMZWy9#tmAX zj@-&Ux7FN|e(MC8yE;^wy4P`jM>PF#^m3BDtx%UIK^4y0=E z;!?-U2Ux}}-wa2eY#-* zKm`v2O2n3brONmc?u#b+<_^aO=gEPdulJ*1M`s8^iUKL!bBWoN}5S=U)C7bZk z)U@KWwyal~ELRz9w_+pH;o-wn86W|vH3W1^5=X}3QYfX-4D-seW7od53?4wlT`2?7 z(vx!Rs(oH$>dK=TSRczkA^j$VB1WhTAL6(8XhF!i+Sl_9u*>hXe@8+9D6P0+O*vV9 z3pW$(!T1Q?rkGB!ng)*k#t})snK1BKVUW++rm5vVlRv$WwtL;w01IlQAST4r-#|Y6 z08o)ZIfB+po28;*?=^llw^-#GrX-|Po?58+rsX+v%xdW}ql#X?aFlq$@`!moD4Zr) z>7kPRfULVO@T4XVKs{O~fmeoHdKQdNK4}^TA0ZbX&n{KaVQ@^{53nMmB|YEZ&Xwj| zp+wcg1(kBy@^3yiviV&PBHn26zO1i4xE@K<_>$;Ydfco4L9=(E9^X^X$(P3k+IDWv zv1vY#5AXk7Fr$QaUZTXkRj+OH8?^Y)E;|&YYER0S0vYhQ?=R`@Px+3XZ|!Dly%HY- z&_?HXaplpQ8tqF*BO#2VIx?<9J3L-nS2(_lq(hC@=CzkbHsOPwhKH@+mg!a+&zN{}=z}KiG zGUP+j&Kk&koG)$vf4(7H7UYpMU{hdu1U_@8$T)!;#|L_%q0O3Td^hxrPC;87l z!*KvnUwtpFd}-JJ*$#~ofVN5ec8o6V?LW`M2Rsvr{1LF&%NwWCvmpUe3t>c!;s4L` zn1Q%*WkTuy{Ez&gm1U}!8U~NP@9;V zcKlT2`)onH>)*-n%lEYez#~v^Su7gdi69yThilqHaBZnc{EnWOH`yCGq!-{c0JlaEfJAj+0Jo>gT+0ovTd_@%Ru;e?>Ix z@7#_BeR$Wy9toM&GD;l$0z8}7Emh~O8KISWcj{E!KxC?q!$e9P+tT*@@bgEHLgNsZjh2tE{ zHhy>iz>XdY{(a*>kP>LTqM*$*tJMb&y;i7W`ZzfxqkJFO^dk}U^FN+@jrz;gT=lH} zVfxanGw)pupXH}?DEO)9!}C8Go-DTa(GUphkdQMO0%Go%@j8!3HH$kz$ep*we%#hs#w~;BX}JeEy`T}r|@^gX<#l~dPepoe96wv0j|zZqn_H2RS6BtU()kj<&+qlNPieUEf1(} zxHn_z-xUa;?XKW*yk_Z*o7>)YCG};!!@|4pX#wr;|M)e<7v*xL0#S{uY4~lVl>e=l zo9$1ufM?i8bX?{mfwBURp6ZJ7LqkP)%Zz7;#~btbv(ZskLQUX}c&~v06rRpOW%!ch zyJM^2ojIRmVEx+B=GnO{H?xCGhh0Y z$5kaDCp`rkSo$~qaX`^Jxab`ZfWP#|tRF-N)UEE`4@&5LbQ32obr=AG9#6QoS@p>b ze5|tl)4$Ah&U)vf-mM`H&M0GfmGDE(rA zd-B%r_9$o`TwzVYn;4wE0;uYh)01r$j_P;Rw`}JUuxF4lwDis>OB*FK8Fp=rd^!u zyrTt+9kk75;r(h#tkf~xn%ZSQHc5W_pQ8&@9~_Zv7(eGP-S#o9bM)C7z)o7=^D@AT zA13`T3l_xkt@lju{iNad_c5qaL7sE?JGlWyrovekn2#0`@wOff6P9@aN#Q4&TSSev z5*@W@&mqm8*~W_Dz|nxAjRUj({q1Mo@AKE+B;dwbzNIKN^+&B&cUZ+VH^eyKG1XBE zA1?HhE*hM&{1g+j^eD)+muR&dyVJ`*QxsEpmI68J&+>5JKJDbo2KqNa?6(2kv21Yp zuQiBOFbA};Lj)74#s!mZtSUo()B%M{&uiRS6-6^_G*wuk&T2S6Y|!R*?-Fo+TyEm; z2l>Kv76LTHpP0ltr}~Ttl#I|srWC0=pZ`rW3})p%B3E%UY50?Bi;c4WVMkMVm-on( zrZSO=E4}-2kmq*xXC0F92AZ9OX{6stC;ElyME6NGN^JvFBTE28uxzCM3-ACWBWnCw zMYDEaJ^yBnHs9FFHOt{UD!Z~5(3~D~oaj`6i4`|dR&-_33#(Z}LWzGtz+OhH31GqV?*=%N4eqc@G1W4c`^YSKu6Q&?;~J`*@uXdCD#w0Tcz? z`4DkWU8BEEMW-^=3#x#5*?^6%8(04{P`m&9eocDZh{7<>K_Ocp$ai#f%}WzL6Bh-NGK->Q>rCj47*0TcflJEx1K-z4y|;+RQP?aiyL zSMN2nq)!_LH#Dk)*q{G&AAx3;|CS#M`xC&0+i=9UsgoRfyAr=@kd18~2KNk9{sh>e zyy+bOk(1n+qb+B0*v|RvA3io_l5bnB2Ppdd`uc1>_N(J_BPq`&tKz$^I%lr;%o9er}t+(EhsxFU%F7@t5@`rCd$ zZv~TVwuw!bfsGuy3OuMHeE!z?kjT2;T&o&jTBFO_DEnUzG?sT@MVq^}M)Rj6oPk1) z=#!pb05qJhlC~80xS~7TngkQZlD<@@uZk}s;N8YKGmXApt;narh(<)FXoyJL%_;t@Qh8VEc;`sv80#{MNvolem<(XIzROGWWfY0Mzlior++LP! z+r+7F+vT5XnY3Dn!nIdr&Mr-o&`@uE!7IT6!iS|Wj(B$PtQzrbOB%~6aAia*tc+4| z6*VOKQvafxr@zEr7e}*u^ygo}n_K~`XLF=c9r&p`j z@!HNu@H<>ye*U7d#ba@;Y4UoW`EwI}r#uQ@VIgX#%cOFC5(933@5>HmeagO_Qbhf- z{DsJ&x59ws_I7FzzZ12WZ0zmrmo2UO_ovK<{G|`Y?0$abHTn7Z9?s6CHBYF$OTVr1 zT+-I2mm1@_I^nL%U`O4A&WDQFz44m1`4erW z2md~ISfIug2$ur(w#-7&nq`U-wVZL96yR+9@4&B;b;N1%u?qV^Z>4E?PCpRjQb&e; z{#xm0m;(HxT}S4+z=QRp$0WxpED!yE3XD4-qObff+&;ub$a>cDkq~Y0ZEcP3!-{z! zSrJ?FF)zS0H^hwm*Z79WaNi3qE#c=f-LcO|VU?}{6xJ9!?<4wKhD=YjGjLCAtNh0^ zHy;aQyApBHLqG~Y*Y>EQWQf4!>oi0p{t;6VGmzrTR(@8}J+l@W0*}I*6btK`4#RdK zm%=fl+fRL!WNAL-W<4btnWg#e^M4t=pYOysM7`1G&fR$+8k+npUig`Rrc06)4Fp%S zuF#q=&FA18WHsLXnUHeJL#6kw;!n1#hTUT$w>MUPGxdpk@4f{Z(}m>h%$6$s^G&Dn zIh;5wh2P!1?LYhdUBKO?K4riQ)z{G0)+PcG6LV;$I=^68wgc?iuc}60TO8!8)YLbX z2r#LS4In8IbWbiyyhXzK=gNks#j#^aTTssjlRGvm9QcWUMi*m}ECpxg7jN=SHF5&Z zkAjBntVbp6qCZemQ4%{T98+3c+)YGmF5MVeS~B`VeB=Tu@ZKf~i|BZn=MdFj)U9!V z-N#;zccOP}>6{$fGG+B>m@2lFDw;SyEm#x`eC>-ic!WPDQ$F(QuQT>X79X;r#NiJz{k^B7Xb(}c<(N#aXMbO-KdsZ5 zlJiydCmJJ}0d*?+hbnH}t%8B;(F!WH{Ntuy_U!CQN;3X_<^Iw?2ItBr)N}JWQI^s_ z#)tEQEb%R%byV$PKk#FhJ_=6KhjHa zxY7J?yFhO}-^#DE8bikk8FjGzVDIC{ynU_(Tx?5oJe>1_jKsV7wcYV+(^0=4QF`@8 z8;x!H)w)K|lu&NZYTglQTJPi(Z)GSpd*7>(R} zU|uiU*OZA9W3U;0uy`+;)=-^*UN8e&(Oy15nr^R`OO(z3OU2AtLQ`R^MIgA0j^eGfu-xW;r>A;);ji!LgFkL6xSD#FbQVbp7q3G?p6bMfT{BxGE zVu~+1X~6P6nSz#}Z;jm8k}Zqe+_O2gJP7K`=N~f|F`5~S@ov}8OypGH~ zc%j|U4;7*dMnAb*ft-D>Ym9}?5gM?B6S{ITUs6k5H)e0{3iyKm=_}W*vQIoZjr6AU z2i2!m>M0U%AN_A6l6L0re`9IwCRbRAF@tOqWs+rD#@)QT zM`{CPP2ozWd-|{ZzXo--W`Nb(#|c(&S<|p{F8IJtkA90k$o(c-tZkM$%Y6H-CZxwC zeQP79Fx)R$o)UiRT#dvsZQniEOaxt?693$wW*3umKdF{-e|T`gqMWdGlF)sOp4Joo z{UxpwU9omvZRlHdKoa405I*1uLN8GMA{sqe&soGT{%tK1IeO(c`k!%f9qfS4!g$;a zZbQX>5HSr!_)+UP5GL@I8Q9hjyNtJWTU&nAtqS^G>L)A4L+Tyn?ad`$)ap=z;%G_o zrQqC@T^;l^OEX_vQEb_JyS7Hi_hfD%EpcwTYPMgc`dCN{`D?TG>ajSn+R&g#J#p^Y zwjCZ=CGX)^YffMWuZhw4#_;{8A-caW2o&-&8YM8BYa9`Us7W4Mo!>x8c@^ud+S5BR zx5#FW3l_3%ukD8TXj7TyN_9SPvkZ30)tV>C){lSg%x7 zc|PZ;LRA??$1M22qPJ>gkABZt@XS+ok47e{-$y;LXHJ+$w^uo7w--N(Yk9a8zGW`4 z?l(6)jKpt$(r47aS1!x67X&6wwz;M|?_j+}5p$HpyKRt<>pkV&zj6S{=40Bmw2wrJ z-ds1aP-nS{SW6>={Qa@yPBPOqr6|{LHjI7y?&4Po>MAi?Z+X3DnI4kk%bX-DFCu3k zK_sqzC-19L=19XrruCiLXD>WIOzg&^ipaO0p^9R>?!QCQyy?w%3PiH0-fo9rk2uS| zl!!4MM20J0JJckw`mo_q@fUN}CX%CwTfLr>ggT_au;%@9N*|Q(R{yw*3#zI)f_k`) z`%*bN&8n?bRy8m>-pi)`Ba@sMwH9D68~RL2pxI(tPnq!=`lzN)xm4L=|M9Z8uE>uY zakZBMuFKMza`)K3QzMIByc8!#AT#r7*=`(p{GA>teKnVc{GPJb8-z$TqQ8Tb70;T@ zLTFu?-}8=#dU>cmRzQL72bvW_$XLf-Qv^Lsb7T2N(6@rd+@QV*HDky_>bGx372cbV zz#28Ha0MBkSG+lgd}gST8JXqzN`HnAMFy;9%uc z^GU-Zd2@uhwXmd(o0k{M{J6Yb4#HRpHJnO7nNYAK$c&7b?`ENzE> z6iSc=$6uAiexsx?cLNcqe8K+xrUyi2i~N=YNi-4(h~mN*HHf43iPMHB)>{*W@Ciq` z?H&yC3vCK}C2|LqZw^!44QqK=SjRh)6s)R6JS($mR=_+TJJdhxgP+PtS)`6BKOY5JJK6d|A{Mv8y5pya+lmX3ujaJ*_#aZtMX4h1IN8 zhccD^1{-`T^oP~x5tb53rqH>r8_^;P*zJyA3wHPiNX zyqkTytg4Eevy;Y#uV`YCo#B3LjI~nzn5g zhu(h02R1hew$^*YN=xzzi(23W997;d&hdT2+ZP)p-gh25?d>0(Y)x-5u3lQI0fl_M z_v30^#b(NN#bzDh1X-#OP+S{!1WTxU+BGeE0dVCwbvU^mC}-bLpS0(Bpo~&k@s>Ox zxhI!(Ng|*~LgxHZEF-Tm7a1{UUMsrO-G};4&&p4*+K4LJkGgQyZX*tt%y9EaE-&f0 z$@+32g;B&qnR$dxkMr_y8gjXLb->4CO{yi&{xRyqN)2DJkI*>ocgko&Q{L-$Rx{KP z)t`W1aBJx~>#0#rxDenrw&BevZL;7et?&1>jfh@!VBC&FaOaw~Zokq(T7Rl-A>YKA z3s4wT-)&>#nl zk*LFMjPs-j4TQtdx}h{z0_te?XAs&m>b3ieNPrx;&DGM zfOsJ^&Ytbc3HFGLe|lX(cq;Ku>>K3~pkzqj6eF`6|? z;*8(LEXHkNllH~xZDo7U$Yh?jBdZ--7WQo{JKC;5ho=EK+(V#2iMC(PjzcS3K#>X{ zO6k_iAqhvu)S94Cgj9X}AJ{eILX#vxO82j}1RoSC#}fLHw-=h1$i|0wE8{U( zP0XSSRBW76s25A&IX0XsplBP@|44DYQ$OsSjV$fEpdI-5)TNNaG!1u!=F$Rth`ib; z<+N6Gi{m|xidkOXA|4bA?*y+~m#Hjk9PH|?g#`V>!!shN?joiZ%{UXZX^?4cXN`Jb zP14Al70ABj(Gf4R`%R0r^q|%Q1p&m^$jXGD!mi1_X8R{Dq!|&4d;{7740-25+lxTn zmuRnwz>yaNMfD{wr8enpJdK1S`!u6!*ldc;(o84TEw8TS{F$k^DqmM&vFhLqVi%NJ zSxfoymKGur^iBFjd54eO`LW;a1u*)DnGY~ohx(;;W&)s~+NMuDu!*7PBx;5Fq{jh< zV+y(Q^Qot3xxULG2zI<@e9z;DX8XPuE-S<5YZ$DR3qtKDMI)o8VcwlhK5YBhSo0_Z zZdAY;4{x^4vC}|1$n%wT-nIcy-eL;j7`YjT-O9@emNA9vkW)Tprfe2>v>Y0$=M?nF zua*xL1;Qd^{b8WQU0d$y%4;2;_dKvmVQP&#m)sQSajH>Q%U^1#bwv99KseE7+p|y2 zDY6(m{QmSw!Op_o+|4eM}Og5 zQ?TH@ApzGZ2VYBpUCT&8%sOMYMZHHk^EgK}m4onYmqg2(0^et(3M_R=?3(MvdE2!^ zRm&}=FK>lwW&9a&Sj&cR=*>8{Ral(1$XsY2NPz{k9UJ_m?4K8@vae=miK9;VDzYO#uOO|et7wd?6G`$FOQxw|hnB*~y z4U(*s;v6${Fu(B1`u;>{8YGyF(-i={GpnDdh|O2!zko!O<18ik6Q7ot_K{(OXI6}p zy9rm#dE0^E4a(!jWhuk&1IiCBLd6!97Nh*W+sDeBk^9P`Ze{0#@`Df<)nE?fc4D`3<=q7J6%gT1yME z8+Y*D0El)$Vj!jNHFm~xpmUzYbH; zbjAXq7%obw-jLst7eGmUE9+t;9^aMNh}m_*P{NBo9rCtTLRA?L55!UT-r=K$iJbiF zK~0C~@=4CEX>)VZZvDBYZ(?(HEUf)mGm^7E#t2#*{m1Jf_KvyH-Y z$m?<1Onp2O8p}P)kgg6p=KZqc+1YkBG20nv{B@^O@bIZ^{|qybr}Y^E+%@%gV@W zkF4cQv(D7-!c1m&s7gor1PoE+X@Wj_(4c~2=bG}Jh1HV?4rMAJJQr-tRWosPA3IEl0BVlYeIQ&ioR1&w z+MCAcFpxVhp>{BhO#_O{}izu9RGc>l_VW+ERGlbLAu%0_yp&Nrf!z!{yvt7XgdsaaYkXMtbe^{t| zb;3Eb@E})ZD)h0Doe4=)==+FW6DJ1_lYX7Y)?AG>RJGih**j3qzpQrFA#gAp@}Rf_ zN&d{^S3X`u-vS5SKqg!4s+RhcM|#OKSsn0=>as(bj{BuJdfrNOdV}lb;uN&iAtho_ zb3E?tR+j1XH`f9$L=Q;O>d_3op(34!C=0u#bh4`b1Js9f^|h)fKW~v+&ow1_gRbe& z+!~>ts&mg>LJdRFoI*R(`8*_FE9y&5A9d*gc}Gih>681jO^>UW7$#A$zrlh(vR0QI z)H~$+ofLXp)kjlYQ@5Yt>^d>#O~#7%qqX7&&iZSI)&UW2NdhAHrDr?4udOx3UEB8N z`e(w(T>ZLnmDrad-$hGr!x<&CNSNlBpf@HI9(sk~a`= z=T3>$JaVOlS7^{S0&0DxDivQnF>pB$MpU6BOwRHkW+^TRMQC8#Oibk% zNK|sBrCUD1dH0DAYP6&S$HwK~+`?lN)KwUCnArHon!2XNp9wdB=oC~B3c5hWQNI*o1?`7td%cF;E0Rb;r0s7SO4#payKl`F^ft1z zYy7d3V%#6+&(ccb4_nXN;Pg6oj?3r%lbeNQ_bruH79aLk)D_rnzaet*F3$ua~IEC>NzfXEB<<6Y~SjiC)jwRQE+JijwSX1r)v9Vt-Hl zVJwWuD0^n*p|I>s3lX3Z=6PJYzbUE>H9XU!kN1hN{x)Q6em=gT|D(WT-s4jdqPt1T zyZGF&^WHjNxm>Ort+qUQQyevC#??CGiG3iHef>)1*O^x{3b>$lf%dYbt)#-q*iDn@ z`l=&z7m3}DMz~`b_c$leK2ojW?*~!X zy^}K5MbI&ZyV~fbM-r}Yg&4kH8=O1TuAWTWETtv?w)YpE10=n&Y0vZ}Qc+B563JLD zo@vkA@<8?>qE>}=&imc?)MxnRDR06ah`qufghwF9OBrzhPF)-YkS2^(;v?N zwaVZaZZhnp`23nv23Y7@wA)xcqrv+tfn8N<-*9CY#j%;~SgFw7kxs+MRYkD?GxM#% zU~TP;l2AvnJsy}bFnpsAsvV~lx3;wbij`?>^?hg7egA@dDC>}E@Yj|jtfIKSYBn(y z(2uM`3kfnhSK=>{CX=#Za=Bp^#rDHnhMVD9oRHKW^$W+0I8>72<%bevtY?<*-83yr zi(CQff+=DuuU@R*Zi|?d3-=0Rz}&W=s&SJ(V8JW!=3Tz#sj5qv{3V7c*r|CMZv2Ih zjnJZ0(J3IB^OnBU9{lLL<&c5c^<#^Q7I>TH7k-&*>ht{*z6soyBsF|M$F3Qy=S*zs7C1VP*4h^zfC?(*04*LU`^B;7CgGaVM6+UqTug&9@=C1~L(-tfIl@YV; z)&1D%TSN^+k3cm@ceWK6wk$#1z6))6jCPJIU5+oGQt^bh->G3TNGM&>lD7TLK*3u} zI5F?1tXEHT*`G_~j)W~18z5>T?tZjtqkN{&4Hqy`L)KPCJ!dGPui}Hb=PCnmY97#? zE)Iyb-AG@(mWP=W8;2udA&T4Gercmj({&rI*~S+M0Hc6=qi^N^U$2WYRIS@4%|N8W`4+K zQ|aE16~5cJ?yPAh5v{)F#CA0h({}pif&isxcPUc6r%*hR|9K@m0%%CObofZq9 zk&zPGnrvpxwn=vZ4|41`B8|2l457HwF?Va1#AySCk9d|DO|{#vmW~!~CoC;9mhy=rq<}yw zckRkLxo=BXTWs}^dlo(gw-Hxef!pkShXd)YGG}lr+_kR*7hPRN7;DPRH(1Xo?9b#O zX*)*fG4R^cuV~w?{SH~sqxvxW{Zf|r;f@f^INFmy%E{KAJB0lJKwMQU|(ji+@>L_t?I?==&n3At?q9C+elwzer>PF1O<*G zC(*})_{st1LSOfWt0R4OeoT~aH))^BPy`AS#g8!z+zCqZgB_HJNR>EC*6V;0x_zmX z*Dr{F5q>#(zUe!%gw~z&d)e1a2?5`FYEV1MwdK)2;6pvgqCLBH z9|+aHO-OQ&Y|pqg61r(7oBaYj4O|K7Z8PGmS%r+bW?$wigmL~S|0FkX&Z0Iaush1` z2cok-=o(c3vYMxeRE2yH_uHL`!n-4Y13mQzh)(uMr(Hw^86ASOVg>K~o`#-EE67zD%GF+%nWb&r`eQmyFK zGZzti&Jz}yyNOG+jDGut(H$V2O$eHfOOBrzBru-oN_7aKR)tdT{J=2xDEm>Va)2$^2#mk*aEqEGct$!tbNd? zT7we2TW=JqU7FvMlV(r@6&XMkweR{>&02UkW9UlyJRt_67Jc4ss6Dr%;W)@^$RI&A z=k-8E$F56X1>*goGSF|Y%NNOYNZMOv?DEqjb?lqyXIr3QiS-5NE%)#E5_43dU622e zNNx$jow=rizK?pLODSY7kSXz7FOcR?cp_^=*%^F9tHOn2LBEE2!S5-P-b!WcjXs)bZ z?_(6$w^2vAqg~=fnsb5tx|~8u>_Ek$*@>HhaqpG%IP+p$c0UU&1EJP}^UlR#Pi0zh z$Bzo1??Q8;xe#>vdtEY1{pTbfR5?9Wv4??N{!ZcBZJ->Yt_?=%c`sQyg_(1`D?xMy zc~D*@y@Svh@64jKAlEXKe)V##W3YN6*V8BaGL4%)#D_OMcm--)yb>29Iev?nf*7<) z3>T|iboW%2@kLH*Aa(0g;uK^q%Pj&hh849RMekP`nkMp{*U4f7n?3nnk#c6nhOir{ zdWC|d)_6A0yC?^?zGVrMuw~QP8|R4|e#>wxVZ43<%K_XTWuIa;gXke-*Hdw8#@B?p zFb*NL2`~r;pMT9Ev|cosxs#UJNo+?;f*nGSp96<_kBui&riFZk0I zJyXLpv6NIR+}bS_X^KU_oq8!F`d*(W{XU$;aA*Wf)<%t^-yW`vT$xw*oohqLkUS@s z+fhct}T+5~!HRdu^hcjeS8VeJzyvf%2q;iH(~35V!B` zURH~GW6+9$FvkYeELDKkt-RZq`1yPCpzk}TZ$&FIZ}QkyW|*eTW)3=~oP@X&r`nFp zOk%9;WJ7l1=lYO@HN6b&UdBSteyhKDes(M#A`^nr^`b`kL9_};RbZP1E+_gid@UjK zrcuO-?JHy5x$CRwRPAcgPKJcXW-TMNvzD^?Pn&v8>br)i8Edg)_X89pyJ6m`9ft_uC5Y{N?da>CD?e>@epV+;5pgM~_#jqSiG~B7}h*s|0%0vlk`8 zoxk^7pKC=>I=P~@rK6j5rWM-UEvpFb6+}z%>bCP4%hbsy4ji0&R6RDr3Kc6%aO>$w%eRC=^jx-*`ml z*0ese|Du zBJ^-u%bMWPi9~#2tY{5nlV|=jBFmFf3@L+eVW((7k76|ha?OYGSEr`w13?7p9!V@4 zhGbGv%AwCtx5P#FzL03kS1Z@+g>YVL?EFEwy$HZtd4YMfY?`i?tSx|vD>aErIu12AFVLxem^KgKpfLjS(}by%*3^6JX)2U5vtv$ zUMXs5c$pUPo^VZrcqBU=nM>O+V{Y)T6?Ig=$ra&;X4|{AFs+koDbAF2<H7okan9f8nFneS-5_VmwwzB}b0 zkE$p20u1U>CUd<|X*eKaO(Up5V_`5(k|MV`m49>bYxjt%E`63DM_7jMP1fRQfG75@ zWa*mc6ALC!@;WNVQWT)MJRCgnbCx-C@pv0S+ z!P_iA+3t3mpRAfnBXB2in{&oa#KM$@V_!BrnT!5O(8I+}m+VUzE~(U7KIMy63k}|E z(|anXGoK-KQ5q`bJx#-!LyQAZBp2^u?L@_<~a z8xg)aD$cX!!iJBZP0*Vam`u(`%?yQe5t4oQH_c^bd@6kvJcZKpDQ8DXPv-N9rz=Km zF{jJy5hHzm0Q)!*dStPJ@NkoSR?nHXsnSoWu>8V6F>V#Wklk$el{khZISyPzm#&7i z=+t}WYb^wAgtXl|!M!YVa2z3RdV#O)mB6&g1=ezbp+uWQzHKz5SUSA6>!UJ>VUnZ^ zvEWYmm5mlxwYlZqSUE+fp!VY|G{Gc)j09+~at5CO{UiNmh*YsA~VTD-x$V?|00$*C#l!+nLv z8iSS^ac%>iqn_!33(}U~IcRE0#AWQ%69VB!+sVK4gCXQ2azcvjgXaQ$E6-OsZK`=` z{`h^dctrm&C~D=Yu;aB_+fd;PXzUn7vPHf*Spor8RCBS}B+k;yAQtmw4%cc%u$fCi zSFR4my6j*x_|tp;X8RvFDB!ixbVeiY#+V!D!K>rRke1fc9y^E90+hXaMSB%WjdU@v z1=r~qb+-&6U{4@~)&I@@|COXuQ!+1boHWMA4Ua?#OuaP`XiU7WRmFG>_vz5Di7Cl{ zm`)ca9~8Bn1z4_61Lyxem*;$la7d8l@ZcpkVvwW?$n~si^HbFanenj)d`RAGmi1zd7C$6{P)^NxvXbGK@8zeF%nsved zAAQUmUa}TGA!qu|D+&+Mo0uDzqy2yO0F*_R<7O_lJVYFTm{zuqsXn}zyg<- zbft~W-C0W-Z}`;P=(>JQ(21HsqW@T`BB1oC3WPup>Q4l|X7aS>62L#(pG46Yx*ELloI_;YMARea zR5o@(&A0FUS|J^pf4$r|gj>$*TD%&UcZ8udP+@iF({8^#7=Bx@IjtT<0ED zY&oELH8op5Ox^vW+7Eeo0Vz}FDRz-*Q9}Qhqnal}jCCf|u3YmUS?#m<)ctE#j3y!^ zTz8L;n4&kWP(#?uBP@w;zmA*j>+3VE0WA*s*f~8vAn)5Kvgu&e2qAHT+jP?oXrP^#&+20JS}IJRoZS`n_ht!LYsT z==xa+x1ehg7h`W+_jKl}B0$06T4EdY;%m2;|J)p4XhUSU zycU1zz~q6%mFJJ2GY$J*@|?YI8lMn{akR@p^SDx1B*XWwq!@J!%TC#!2&@;~ibTNo zsaovJ+G{r8Q;q*gEDdJ;QnyiW&JR>BU3`5)nh$n{P$-x=a-sLR=Gg**K|w`E!&5Yl zPq>R0AJ3iwm-v3~gT>O(`3AJ^AxM^~t#WAdFIW6m;R)rsLm-rv*TxeJOYJ%Aie~4*xV=JhcrgJLCA^h7dP1*Co`&fV}7QB+@gX?VB z@gRM?E$R;aiGPUR>&W|Cj-8s7v!$3fQl>YlPvQmMJs!SQe}u)9?JPJ?m`w{lg{Ni^ z198)^dX8Uf{JB`b2?qnbj1);A6`)vhUOswI?d1j-Z+iB@JCpbx4)yKbL+pJVWq6DSE427s@HEUtjb*2SMPLe;8PmcI>t}&PZ^LA9N(S4c|Mkn8EEb2 z^11WKzu~s-gs3?>(^#d1-I3{#x6BIt zLsowa4fthUFk?;tT_!2WQk_whOTw12Mq^B>P#4QcUwwKbW2JKwSwS*xu5Wlnrh@<)|_ zpRtIC@zcklV}iPOjJe}7~d#9MVOy5*xY%S7k>gc})`{QhqrlHrA4 z!}`_Sz15tZ>y&{Gcyfy|fq3=l@*fYJzvAku?$BPk%Dl8PcB5|TqH9nz8_ zARr*!?|He_{k&`4w>;n9@5lRtZJTZ4;=0Z_j{Vs8{W!yQwN*)p7>Tg3ut?R_l=QK% z&cU&;aCKnkz&q?$-Knv#ur2Hq6?N4W71?y}J6qd1-o?V=iFc2y*TCpfwAJ?|2GXvu z1?uJ`APjjouB$q<`2VwgTPxmd5vN1<0^_)s--_ za_Kd3` zqn~F|Qz$-czL}Db4(;D)T=depbLhUucle=>{YI&mJjZdip_uiR02(gsdq~Z7TGtva z4azkl2~%0B4A=df8=~nElqq4yH?(uB`Z-*A3>KK&GcFI87c)2S$bL;rXJ5@GVM8MW zQ=5_cId+rpOr#Jeuz`~kx z_r*}uri{z26Fh6=T|!-{_mN=Ck#~*Nt+lkU_`quz7Ivr|79Mzo4gMIx9~Kr)HqKw~ zoP%fM{`DGH2m0`x;F|(0EEJZylDwfO_R2GYR72&R50N}7QAIGK2osTocY%@?rtV*^ zbrv`bi7&uC%-p&UF1LvbSt|=$b*Jjmhz8!-`!4X|phoPgsEmwEujppx!qU?C<-s>z zYpcQw*B3I3n6G$bz{LpFvRkMYd!H*tGS}dy1mIm@!zNTi;ZVi>*GoRtZ1?%JQqDho z^dCRC!H10>n(Q^t{Naqt{QhosARY~z4>pkZAHV$POM#?-Kt+w(UokiTelP#|U0rz` zBC_*Ab_RdD-~ap$8#?(as_}X5tw(JFe|omRzFWWs?w+WrocDiwNR$R=_H?g)@yEI3 z{~DmbF7oeTGmJxx6II;(UvCSW&`<=uFBS4`(sH#ajktn5@}WJQR98ROj>lem_{`&( zIZQxLYcRJQoV9NM{mEn7FC|uOpSIgC*KSKe}W!f>(h-hXaW`( zx$NF;dZ`Dt1N33Fi71ZWQ_E3tZ{Vx}LWxtLY)72mFkp5%x(xLn{)8WSR~`lTZBwzN z%@w6yP0_8Chd4G-Owa*wHdbwH%N&?Y`OJV62ZtI-^zxTeWtm(iu| zV(fpsjrsa;ikU#pb+&nLqIx@#hFj~^b3-equ_H>pz|Ea-?#!^|BM!7ZS8tu~q08d) z&=6%>lM6n(Umwos_#YT}ZhyIsIe*<$&*Q^+_-B;`^ciDN;Du9D)Gr9HZ*(O6v9NTk z8kBC%Vs67^;5EBEQCs~`%@R}TI)`J48>mn>(f~7VvpU+_K>JeiAdM1G;?%aoZ_V}& zr%qW~Lcnv&z#mSy&dkNikr4}&>xw_Dd39Lv#pou!@z0Vr7Ue_9g$y^^uiryv@vPIG zDjHrEnwTB`{POxb@_VXPQ!A76#9)ZEKj%O9U}_BB`n4XBHu&>vuZ;%E>69dBsF=9u zc*}JiWt@7A%#3zf9tj9zsx?vsA311vt9RjL97S7fc3*L(A#06cW?G#4#&{`%m|H-_ z5O-N0{Ef6q#mGM}b}VU`Epn|8zq>qH>%Dih*^|$R!<%PhayQB4WPc&F@1?5F`>*E* zLQz`GNdeSlH$U7nfZeG-IdG`l|DKKsBBY*`(K;w@p);5Exc}9CIJ(;u!5rq&`di-YTx4JyG!-s;yhsa^6*gH=DvJIqn}`AIG7* z=>4z6mwZqmqz^k*KWl<(wm*@dvmdE!yhKS9QxLtv-<6^e*||{ z@!*0e5cu`*GOuuMVM^$KnB2pgeab)70C$1E?=D6|CXTB1h!DBTf_{^ z^wE40Zqa8-xZ3<<4%e5_5`4>hMC4T_Vxew(LDbr?vd&8yA8&;bf1+f!<^e zm0c^gFa7EER- z^Tqs;WZy;`S0~?8ix7s{pC4J1Z+R`PJU8GUIG6R<;cb=mbmT`tf)i{4IECEHO&hxP zs=6`z*DVBDPg6|qf1+6^93)2Y?P<2`krcnKPm(HL{bz!^&|8U@Z00clEt^ z^0kA>3frq?eGk4_ll$+_r_6vj8C^DC^~hCat!Az6Jf)XQr68EI^GjvXNAlSY!*8+M z02-K}%6#$1ZWszC+I&b zeZN}M%IMJC9xqh4hv=T<_QJE^mdrE<%f&eEFh9#L;%Ilu|J3u1i!E4tqa`j63BL;1 zrHmasokErP?!z1UZpFBgTjnexyx69$R%Pa7UbV!;u`41tltL18WF_w(j!&H!y`GLc z2v8%L@|la2n92kpV0~*qXTeD5<9d??h?0abfoE49l3gr%r{KBKLgzNVNjKtOT)Xqx zY;%xBkHO*1d!wI>Yh^vM=(W1tNpGS0@V)9p@46A$&92MljS{cz?(Y8l`ZBC*HQ6EpI@IE{E!|G$L#V`1#wj^ufA{IkRDm+*ANo$_Jq+6n>{oe0a1Xs z0&NgoK!6}!q$`vY1Y?2PB3M#bB-f)F0ehM$>FvuHDL!G`bP4qZ`LXybYad02PpTF& znNrlZ6~><3;ePf&|AEmONXG^+GMi`aM#kQ&@2v(27Py}g;gEJak0EMd+Sf8PQp`L| zD#5P9DU?N?!-!~)Ctc-!axhMnH9+#<=co_H#CvZhRCQx<%%PYxjI`Ekn+SPNPyV%* zBU_+#?e^7Mo`Zh>522!xny`Ny*zoXN(Bt>s~%xL09b9AT8f{M%@uY5l|X?kj7W?}c#s zL4sz|L5CTpPd6;wt^N)onE8>ZhVPsg#}B>B_;a$|L}W_A6n%}}PYyG@Gkc~_59?1g zw3=yb;XP63T2*P_zTZWU-Vfi33_PbN{;7bw$+ye9FpY$gfP*>fjFlHHjHdF(^0PTI z+*4mCoql+m>h5R#_$f%^ZBuQCI6r7>VFf zVF&s~^diX%+DZRY-)cB%K7zbE{XEl1`<|=x_u&aP0a|UjjIY9ytK|c;{N_Ov&14xd zFiK>sYt+VfHU$pJ?*+bSH!TNgoNx8qM4{w}<`6V4fl3>frJ}fsM26qNl0go~TxRo3 z7Kx2Oyq-^iB{J|+Y|lYC+1<;8WI2(RGViH{hsvBU{glTGNiw>@JDEZ*v!!boBpITb zX4iAF(&5{E7@v6ilUmhdy#*`|A|k~wCALqOn~CtM{7(<3T&dUQ>=~)uZJ$uq>ajMm ze-U77CU1>aTzl=l7$Mo-ox&lUV#+6b(#<3JsZcbU4oJn=u=kN|p&j&J-Cf4J zi=U^^mOrQ&b)Hgu>fXnVR!`Q=Selsi2t3~$dLM6CO$^!f+2E3_Wsd*s|6_*>BcWu;VEywWD@gzBf?w7#nO zRDBA&f19~(_39X?4~lcEKKX|cQnv=XvQ99!k`<_5zV%X2^T6FUwXla&d{!|%%|}E0 zxNxJ2?$X%X)aih-oQG?Z!QBQ6Qi)7(wj_!EBNus>@9UjWHW|n;Qe>7V|GNg9zQS%t zyNr3{_<*uz6K)TwIzvzx>Kk^Mf$Qu-Pm!`(z?4mvFYPlgm+V#?(GR?fN9!1J)jlj$ ze~E~or!(|D$TA+&^e96*`i1VxttPh8VXz#@#>R-ac=#I_QNca+MYU~i7Zk#}sQSmN zl6Mj@6-SAB`wKhn`*?eCjjrm(*Vs$2ZICfD_97XW!X3i0j&Ng?G~0Q@p{1@vxi0Ai z76q5~?g(=L=w2q!F5JKWbGA}hOv4g)jncX&VJc>H9(L*D6a~CAVW}n%|x}Y>urCl{`>pOow?=p=HHd?hI zT&1176y-hli=tHr_QpDUEm%2jZ0sH_CE0$q>%j#P_O8!1S>=NMr&sG*8KPc>j}GSy zvz^zt1W%-o=}x?dM~P!)B(ZeEWo7r3dS85ocFveQ1uCv#Dd|~Drz_H^WCb}FapS~= zg`beFkbe1|xZORMw=*l4r%Be6L;0&Ww3qX!U@p;M80-ZROlsqg=iE{#Hc=e347s95NKfW>f5j@y zG(Y41Rnt5H{ow{8n@3kUX)X^Pu~VTL1?YME_0z=gi27%9MwbfTk~FDZb>8l}Jk>lG zacm(W9f%*ZVb5q#a*|vsOGvh&Q7y<~$_<|jQ-Y=WST{;$i3j4uUZ8jRT9$l#zei4@ z#yrC1HuG>AkGK*7FStu$oZrRF<(&Y3#q z)*tUgz8(!tH1&ze@6r0{PNcj&MFScs{6p8B+(j_#|}uZ!e{CyXle-x2qmi@tncw&KW2!}qIQavD#t)SEo#1c&C> z@VTaMuHsKsxQ3N(m1jw^1P=`v>@ae3bhmfdmS?fk;<uWVA-G*3kWn)+fh=_mbamtA zw)BuoVT#3aCr-`P3Ohd>FsSfNCtncT5_dS{5bdD78f!0{(L}%YeC3c^aVfPgVT!K8f8_anD)KaUb=gE{E-l6htOxt!3AW_;yL}) z-1guc&-}T~EC7v$D*3{Fzg)^Y@*7$ZFIx-8ncKXOOFJu?G$`kz{p1NzqeYL*E=kQw zX$Otg+&b4QPQu$hlhy*SzuCxTl^(w9ti{;ScDqG9+>{JPp9qBy@|tulNMccORs=QMUL{Icx5yXgFfeg)Ga5PO|%>N?LtiV6~B-m-Tn z5sHw#mA{0C7%F*VI#-*4Shju4Bi+eJM)}zPpDBZrvLU4<2cr;D&o^Zg%*uZL$-VZ| z?cnaqU#inij~zZf65r1n58YV%%GmmGOEozxyxxb_+IS;dxa!Ni_}v_0)ytY+j3y(z zh{+zcU2oAK`uL=Sxs6g=@&42)s8+(ubX=Q-Z)QCZL^h{xM~LFvSWq$0<5}X1?%Tb7 z9+Hi6waI8TQ8a8wok=4K-dP&pmWq6muw8$8?3~fny<&F|s7TW`N_4S^;!sm$qG+bU z%0uwb{6U{`RoeL##d4f(z5WYd?uH3>cF(J{QEJxY6smWBtB!f-jUk-o=Ts&s%dPsU z?ug>Y^rgV7M`buSs|y~2F$#r?^_3% zwd2Gm5rONU!n!S^9Ci9kmd0{?J++s0=SzGVxt48JQG9ol*x7_PP@ZZpctn>dY1BrO zg7Q~3!YE-KCL9Mrj8tJg@KD=y=dGs>F>!6;0)rk>A(s6|$m#u+vL5Z%b2fK=Dx>iy z>y+Gyi-sK72!i$obV)|0h6&xn zx6`a#&a|5yd0>xUZJ_IDR*q1cOK;4NUt8wOzll-RCV6Iak!oIdu2|8{R$Q^I*DLRl z!tzsx9S5^HOuDc{`P26fT3)#o2K(EkzS*mmMxa!Mp)Gm&k%V>^T;jAfFd{kyn)g?P zZzd53tp{)pRIL5F<0vRfERA$A; zmoNarGd|=9L~lvbck5bLN!I#(Q4NhdI29&H$NmBR2I_W6egb=W@_a7a+v*H<^GSM`+E<@6Ca$RRImo7^5%J+YpZvO5Tq_9M<>^yLsb!Is|L@7e=%fuJQ)-%lc$)j4J-%uTXGe1e~97@g{BA|UUvd0CwLO|`NPkz zbR0K(dd=sast3{_gm?KVFAa99@0v=HFDERf$)5ICRA=r>hv2udzs)@iK9mVJa(}P) z!^7B3wr;$VRs0|9GfIOiZkzN1S?*hn!|OjJFT$s6^)I(^Jz zg4Wx#aF@@6Mq-Q$etriLmfXdIUj5`VQEl>>FKA(MjWaqYj)MG4=y!=8;ag%f9x4yi zKrE{G;ebp;>gad%Z+uI!;d1v-xw!v+i4Lw#sSr=`Ya-vIGFE2C_^lj_3`PQjAy-F? z+Ie&jKGYuz&qVH-Gu*nZ zaA9_Q=B&G6c+z21YHOhiqBs;rWx;Lgxy?5G!xS0|7l&-#G99Dnr_&6UA8)Qd4Wqyy zpX6@cTyi(-$*IDG*=Ms(y!10a8*xYsYPOzc3Ek~qdubn` zBFET7vZ`6Kx)A5-1=eX+8;F4L$3`QD{+`e;Gnl14_Px)d$T z$5x9caJ^2K?~PK;IB=K4l`w$fa#t>& zN`Gt>`Haiq#ZhlIBcq*CR!07`X-P>eZZf+Bsqe$HcAUZSX_Ct+(=aY>bFL9h8MxUs)|TC^Bf$ zbp26E5sf({z=)XfU(=ajU(=z2|6*>)<{h z)E4Xwa_ZRaxsqY&$ld+>8r}Es2r`bt1dx-IssjsZ_Ky$a44vH>vM7}l-T*dy!;PF> z%t6V5^Crk_n4z1_%;uAi7M^V!=ykG`5XA@fJT2O&%9N3HQlyyIYu`I6UcRN&tel}@ zJngm`r%1*-Xz01#NHFJX30Jl*4d(&k6=!!@atsgeP?IUYYeH8(G%(Aod<*R0WXnm(-vfmNFOrFq-D$~1b&aCs`0 z5$^P{kky<6_SwfhbdzoC@(&tKG@=uVHW-rhzrU01qF<2dIqvLqPcBS~ckcYAGT2~p zOS5?d5cQTKvnwT0?q1QnUx|a2dK_6DZmQVkOll>chg)cmeJHQ1kQhbNjv6b$Y z@K$L4QD?=qDD@lufFhaikL+U!L8{UyQ;|?Ec~0fPsG5J##6OWq>G;F^h{@n+?p^pQ zn!vnaU=ytgE6Vq})(QMb$!kTQ4<-LF!#p1vdUb~br^#d7iOz;AVJbaizOa(N@FI_&CSUS> zYqjg2C{|TO5e)<+qCeAL##@Kk2IbxBRG7M(Vs;-tDt|sj!NA7m^bg|$^yCw@nTg&g zs(BYBwp6P;O7U&D^~Htt36cC=R=xa@hVCxGT>Y2;VyZZlIv%5F6M9g@X}ls@{Jj2; z)F)E&+JAmD@a+~D#gfatte9=>VkO304%VV8rR?s8jqLI3CO86#C@Uf|0h&V088SEO z3uSg5#?PUPzQX)%dDI1(usR?bzaMgJXUz#zl%+J+$PT8|b+NUeo0$27S_WcPxh)Qi zd|6;fu&C~N?9|iM&B}W0E&C?>SL;0}@@4z%Or6o5%EB^y@{SO462T#Eho35&!JxXEw z<<+$^hDhA_|M~Q9gPX^SZNR4`x07k|znK<#9bxn{y^eypxbdh*c$$TVESF>diTVGn zQRQ98o)+dLH&@SYp%l*5kF5DdW_-rQqZJwwuSN%cQTg*a`giFl5D7-Hqk1yN=#0z! zex_a?VA5d3e;xhj$@}-s-Jr#GK$aIRuiA17pT~|=(Ftxo|FlPYV@jg?*A@T6tl;2@+()C-C<<3*GJ^)D10I84sy1B1=s$JgGCLlo5N{l;} z2?LHL1dxin(i_dxGZSt-q|{uRaW((@`s#{9nFL~hr~vocTBLmY@z-ZzzoWf(otHX- zpr|}kcleVSa0wbCfSti;UH(d=63uwN`oR*F2N1s30lgY}dCI4Z0r5bM%61GakHPZd ztlBf2IjHv(JQ^;LvV#G_1Jhf8MUXKj)=xQ$z$=TE^Vdl_gz9}fk2cycKo`$snhRmN zM{i{2{^b=NeCUS5n|FZIXf26m5ak;7r@PRbL71vvC~0oi_6NBFTJvZ--~*D(PXjV+ z(@2SxTWH=q8M9O~jYa~O)@LUxv>JiFK#~KH=5EkSd%7$Fvgd8MhG5l&w@gTv+xM7> z$)(p(o&a899DyR;a-e?;)WxzEYqh)gbj>v+V-Nqmto~=s{MSB9V?vkWg#x)zj{p6j z{&TY$kNCDH8iEX2p(G4cBNdA;BF{AwI0A)}5K;?)(r@8o)|~}}|BEaYKn|WKGq(sn zKYPg?k!)-kUMUTX7%F;}&0&=2&@-O)-(q1yIlHdW)m%_wa6no`iSUCT-`iOI;Sj>> z&C^OZ9=GVtmbWtiG8tQeVqJSflH=TetCH_y8+rQ!qdPyhSeR-(QKa^bPOP6L(?v~&_PeT2Z zg`=(Q-10eDs?=L}pRX1wrCrGQsst}8{meJhb3grKNyS zDJtYUZ+0M9%<^~ zXZ|PqB9^!_i1bV0(K&`!ll9do&!hL6T$L0k!sVOsROCcnoPU=%6wVV0$vY8jcH>Zd z{0RnW&)8%1?%avpO`nf+qZHAI+E_2EMv z9%(r;fB1@qQ0)V@YIJ)D4P^tr&kJfEMj4P4N)ICkSn?0auB=eB1YMr= zASer?P%L0>3~dCi492>>*~oXj3dHj}PE&Qhrg;^Lmk>f+nyC~?dPDIQ5p%+04T~vf zhCR(IID32gG??-$%d9pEp40NlbX<4xX*36_Su=HTT40HswR&PtrSLx$Bm~SL<$&zl z1c7!AHsgm&xhbu~N>{|1^(lXJKfBUqeGA0AJT=!Li1ICqtikd?4RR-RG+Rror%?ND5DY2mk! zT`$}A7bqc_T;eVdrgq8QX^+cz7l#)*e9K#4c&k9~rKK;fQBD}lUQqWY2u(MLSpAP& zpB>AWPs=;uzu;4=QF;cmS7kF5)+V}Qm1C+9)&K%et??Y7!OC@%q=9AQN(pHNCNQv1 z+RCk4(uOS}&`qxxyCkIiD(_t?JvfZCAZgG!6==(IzW_|3j?H({It$F%`rzErVlRE3 z7PZ2+NA@Z4g?ct4n7|gjD`?Bk#{}dtvD{fcmnh~@UN7$ydU<(iFEjm;zTFVuiFAGe zhE`ON!#c-Mk(VftR0&ra>`gQemOiVV2KLNz^LoFH%IcF|!EM@iW)HuLXFTXv7dh@X zy6JSjO*=Y_fYnD>`d2$gRUQR`f{TJl?TeCyT@V#Q)(-#x9xvTa#`wUxb!_G4=IG0l z^gjVAqGu1pl6N{66atC%=-Gib=7**k8_Q7My8P3W!1TXu$^Vg|__VVbTSCOz z4NawLcT8{w5>T&Wn6GHRbtb3`=M9)Bv*|GwLWhL{(F6L5QUIRIRnYBeq zn>5&-f@#tG{yN>VgYVGrwE~9c*>NKck}6U3A}xg>q)ZYm%L<2g9=6NL)^y!HEB&b2h%(|IR&B@Vx6bueU$y$ZQ(|OVcMyRV z&cI`o32#!Ks;W#r-i2Ny1VfRgPS^gq%FuaQSPL?GxLRFm1acsxx)bKu$=9q7s!p-_J|LbG;XY-vE?tcY}J@=d3yfb&aZ^JfY5Qp&Qy{GXS}hIe*+x zX%qn*&K1WGUp;2P!X&~aAmgvpsk*53+E7q3_9AqHRo|qw&m0FMJU+7;bbAn>T0+PA znQDQt(jw`a5utPKwo!}8rOa?@ZljN8Oo=EFP*JtCSKd1FN_u!My&P1ApKXf)2-kDr z8TttkhT5P=^89Rhm^hwwm6&IK2Qznr3@Q&Z99GrNsz}i{U}7PUSsE>NMs3J#GF(*_ zfg0oa>k~I7nC=$+_op_QBh>wZz*uZ%0rEz1uFbrYebh$cFmXx)HoyP4toFMq zD5AU8(xeb)E)5c#==tm{F*|+aw>U+dZAK~gA2dIKC9n6*%@4xErfx}n9sT5XMH`8e zxVw&acu_94cGo91&Mc${5*q-rXK6X7E#82?axwYfBzO<+D#=C_9H)!!L&3B+c#1$tW-(Fe3OQFvS$)>!*F)aYn2n!72n66UrZ)f#uUO-;%ICr8 zle?$Z7RxH9oW8UT%23>eX5+WzxfK_`zFk$%rbk1aEsnaJiNNPA1ouDMu$WJ}PG;`A zeQgiyq^8+OA(p!1opG1Xy!k;;r9Dz*CIoshu+gBDHIx@x3H;Mo6_DWoNa^POAP1+G zUfkXg|3(QRVWM`nJSCa`!9Z3Ml*5#R!8*Ns1SYUb)VXRtc?MuP5~#%R{85;Hf4Rd1 zb#0|TS7X`-z*IA+Md9j;Jif8)cwuT6TY z%;(2GIA>jH16KAXVD&o>8Jj1n?cNJ_@RtRN4;gbvA1uBkA{!v8UN~bm8pIH&>X+Zy zkU8aA1F-b`c+>@5k3QQMq+Yu}nYCs2zrEmL^%Op76t6^ZlykX|zj0#q0h{2L>KNiXakN z)t>tyF=!3KAPKm2&C0o$EB51WrYjh%Q`tK~;GNlMVP0p`AFN&KOh8ERNKi~F_1Ra{1CMM;OtWCk3RUcL-XJvd!wHci(EKn1 zO#U#MDlR^bYw6r~sSmdn`#6TuvPZ;=cL9_Rn3g{#jA1Xe1Y^*>eGd={WMQmpb%*y* z1#Gp6XJ$Kq7?cP!iy2Evb5-l3@7eMw#|ovOMk&dZB={?*1NHE-^AtpGT848rd9T%8 zcXWHUtOJ0!3+QeLhTQ+MJepds@*w0{w`AuJaW*TvfEQ@nQNxsjZkgbd%UPTNJF+YWaQ7_r@ z&zdz^S`3T#3gxmCo8O*wjM@>$Kh59Je4x3s0vKUV zu8kL%%tiEaaeWw5hO);F94VHRVZ&Iu?)}IaEsg| zqlhlYn$?OKz*WU|K$V}5wjOq5QV`E*m`OC9K;YSBmc=tG zw4D58DA#N4~L}*69td~kJAYApLyK)&CWo8X%dw6!hnu@*N%t` zx>f5He0s85j}Gjjxee6|%UW?`g!!3X_F!!hHvz9Zb*!pSc9>`&Zif;hluJr<#qeK> zAZVZ#n7wmWObYnCDZ)Pl;GWZH*Q#FD7Yv?7C9RAzV2#Dru-Q|{H+t$3{?lsl@BNv5 zmqLv?wv55xmD9H{(!#7SLzfQ+N?~GZE?8e=b|(2e1nhnQQk=-#P%+8 z=`K%$jZXrqSZ7Z_JuNGzyy!%pQolo*Gnn?ojsF1NSkv&%>`p8Uo0WD||6q z!K*O#oI8kIs^Dx&G9DhY6d6!DLCp{0zJ6d&Dy&V`{thbmq17?&XA;o8A7K}4{KO;3 zB1pTIZ#n#UY4>g#D*#lVBBmg4;hTH}w7?9gZAh=1x+)jFM3u`Is^DQjV*)b|j#iL- zDZ64w5LA3^a0cS((Slp^hBzl0I27$%pwfOdj2TAm_)_LTaZBOq1vWMZ_U4=UZ9XdBgMaR;f3~B&)uSmYbfUeu;)#D}c17r0bfS^^OEP~C z1yF23kU|Ej0~X-M3zvJPJ`BVv)0vfbA5X}i#Q^*c1?Uh;o-0ij-`eA60PrP&D2`N6 z6z~0bU2LfXf!}xEo@lf>tr(=g2jR05ByEem^_iKx^m`FCP@kR!DDTOL1uLg%0$U&! z6H-gLbTZ+$K%?V2&<9$NKLD4;yNUZ9Er}X)tFC zGQIB^gQE~I5O+UT;lu>-+y}dBtmjA>4R6wYJrl$JUX&<8Xt1(Z&HazXiHISvhO(V* zyrh6P{^L?J*2=SolTR(LH-|z5WEArii4U0-KssLKxkq7wHkS7#K=4BFWF1s`N`2TC zIFQciV?7R+I;!rl6S}YI=2PWv~efW%1(4Gej^`qfFZu#5+>a0d&ZOtwyFWMF4=`T#a{SecFGPr}pl*+r zH@wJW(Cy#4*$-ILv-0roIwOR#z&J>;>T4y?G(eN@5k)W>$ZZn*pilSrCIb8BZP_R8 zOiu(G{T3klAldH-*b)rjid~_An+O60GrR7G8BnMagGfS50Cz#LMGrtdY- z%WpK2%?l8QZN&91S&jd)?)+_hUqu1VGs%pi=1dF)ZE@QOz!av5BZfdNj&II!I0UG(_C@-3v8RaZz^sqeyw?E3>0NWa83p0 z+ouE!w3lyzH=r)-Q2EE_f-Nv1cgfxU>{$Y*wcK!D8F9UjKVAk-M#&DRgbs_LUM~5X z1ne{*Q_@KQ`5)SRczB`uB#kqGharZ}34xN{JjL#xIV7HVFmvJA6IwW3&^a$|wWVF& zrcft;C6NCW4tG}PP~0GxWZiskf8O&>v>E}6ak_-l>?SyeqX}{~oFk#9Fh5lN=&iqWmJ?2wYl&+_0l@o z|9QGdxq--lVHgXVJ4D~^Y_Uf5Y0t|2g6qvE4cXTd9O=po69MO)Po;`yq$npngPAg7aypVnSW@& zA^u&CLyfFHAk}sGeI#?S$5Gnnuifgm0B0pMPFWgkx<>zfJ0z-P%ZI;)gn7t)5@_QE z2!^JM#UJ%uZk}9)%R&n?!whioy+>VjzdeR%TFu*U-VYH$jWBL@iBCqbo|H@5_2)F; zYHTykMs~6Wor}=OKi>N}TXNY$0H&Dp-H?tHC)=p#ng$P;eqa zRd1fK$I8e%(C|kyVPrcGN%IwxUdt%#O<*kvQDyZNGX?l$2IN1%a&Oaz(AhXFGF~(j z-6cQ+dH5TL0F{_xmjN7+v-dsSdG;4@m7SjiUN09wL(@vvjeI_DWX%4M8R~yu2bm)T zQjMXboPcb7tDU)3d|u{0Kt|^dMk&(4j%bP!?WXS$2a!5NY`*00>$A5RVkH3U#C<;y z15QMtvuPn^6yrg#aaa6%mDs|tmC(@f9JF56457AE3{L_e(^4GEsGA;KiaB9uVNLaB zImR!H^23y$w=r#B;V$_3W^9)9 zsq77aLy26#fCoX$vZ2{(;2*T7w z#81XkO;DY@5_)rxO~{+dE};krUT6Z9db_L=1YVLQWO<JK)PNaZ2mb({g}$4xX&s=_{fUBI z;N;tj74Ob4_uJ>}&N~EXea^%BoFH&b^!05v60m-Yv9K7un?ib&5cLw5fFho%6ArQB zbCS?kD*dx$eQ`v0Q^R*Rd*pQbLWKU6vzDmYx-=qCYL>V?^i$+WeM-{Q!ehWgs#sXY zE7OLTbwizoree5<6#ET+KKaum;9Q%3iTE3i9G}r^%-#DSW?c65kS@dw}@fe3&&WewYb(^9`A%5O9b|DBxf(te-#@q}c+c{RMOZ2ZnV6ch+tehEXWx7OviBnnR%Y6bjuV96}>(%P(exRQM9Y@v9VB~11RK) za=^Jg;*fE|fP`@YNwxuMu`0ET(|$7$FEG3%oLA6pMQpa9$wxhd65$t#}fKL zZcr7KSPu$_CSXXUWp2kkM*;(OJ^ zvxgya$4ff#*wpwg8EYUqTdckxjyYB$sRn3LF>Og_{K(@r0{1;<>PUdD5=|?6D4fbc zOZYHr3Ok430Gv*hxolGsd;uxkhh8>~X?#**5ryk|4kGAhUxHHtHr$`fg<;X##fRY( zbWEeC6{}wYpM&(nVwbnocm$r5kI;>A%SllZvEVF|Jr;xj%_=tsQILc3An~pvuqa~W z378Wl+Pq_ik7Va$)NXzxXBfED!B4%0zF;J>5oAmBquBQEtE2Z4Oe}4>L9Pf!3Y~az zA^fEch=4KL_$TZwg>K+wD?={wj@m#R2{#rQ9$dcjHmBSr$ynD;M}WJr7htCaWzYdc zI-&djLQh>`SlxHv%|7H{L2P_QqXJ$gm~M`iON>4@*h?)y-k(h1;$@g(f)VG;sykV8 zaNDN4-M{y1;c_$W4cq4~%WpokuJMgEs@DST<9pf>*tfG$Zg{wkKp_wB_JAMRF~^KS z7EmJEB90pr&jUvHbDGs#C0LM)`BAV&Yst;%)tsLHAO39Wsel0JUQ; zO-_@heGFt521q8V)QN});9gJ>mUo%{2-$O`9g|J0!vk?Yx(-??;Zv|; z*oKUK2!f0scBYOxttH3~NU$;1!I7T>*Wy)oQ6uy8vx58W&BPA_sezUGSIZCCoXtF& zgT$z6;!o4+2W(1|{ao|IIS4*>hWHF6`G!!34NzGR-6&%6L+*)8;^rRhu;YlCM6|0F zt1Gq~e>%pBEQ)93V;BxS#8$vPkRC}1m z12C((Ea(IV-r@UvUhPE&)(@Xl2QcItpU8E0F5dah)s5ABj--uZ>5_}|MywP9|8zq3 zcx%A5=3hO{*^emM7zGJQ)M?mC(QA*vM&sv}U_(rk+?9|Ktx~5>_%%6K)W#mY7~RL> zl+)A~PQSW&TjtQ^XUG_EYbR4PsYRE8KFx2|YxxyVzvv8Nhor{juE``PB*=#2be?cu z;s_^&{M|Hs-QavGkJv*Se4nA2Y3|RnVH`1G9Yq|R7MJRDN(}4SC0|iqrnvV@U{!Ss zPmel`f<}SO}*ZY>AJo{l3Bq#WXTv$I) zNLN1Xn^^oR;xgT5h-_VuWz zN1ciJVXl|-w{;Cu5-%z7#{IGemhT9JQeAIc%*<0a7r`hu(S&;T)*|PXHyNwQ(Qb}eI7h6S%v(^lvHmAR7 zQ^JZg6bP0Q-);FuPwTdqE(6bYHxDxI1z`zRNg$27iDCfiK?5XUdukPM1Z>=7kLK(cMTDvY#MHCf86cp)AMU;+GLRF+m z6A-0$i1gk&DkvabY5)}oB29V+mEIDmp$7;MI#NUDKJmQgeDCp`_uu_<@8E}HI5IZL z-fKT)tvTnKPaus&H*mlCMEQ_!xo2tsxuj7Pp>L9SmP}PU((LjCbKO~f6cG>0+k1C{ zO@=v26)f0aLP9Nz0QwOaW3&J~)<{J(lwT6#EP|=!>K4ed#zWpeq)=w+`uvCN;O}Lh zVyZ;iInw>;)(&Y0#dRcS(E-`n*H(<1C*5HQ_N^Tai{b<6%wO&He(cr+3N?O1 zgh3A@9dEL_X+1;m1qey|Mt3QjeUIL~Ma4{jb{0JBaiyPg{{($W9!DV{ca|ymRI^** z_3Ct#Nai}+dFoJE`sE4G{M}%;%)0u*Xy{tk%MC7JDn^@abZX3_H*tc9OQLoTn8z}U za*=Gk2JiC-0wjak4RQu4gwLJ@_=ZvGPI)!p@n-r>_mtZ*1Ft!!KTm3T^pZYw_tZl& zyCX8)?=?{?4bVCY^nvjCz5V z+ozxO5d`Z=+BT3OvNnFAe;Gy58ZjjQod^}V)R<=s+~pF^F|=N@>@OE*i%NbNxXAyH zg-n8g?D^eq{#5VM%iqs32L6aR{C2qFdqiW&i?50Ni6#jN2%5wXUSh$HDJo`XbRBwj zc!xE`>DpANS?PyA67J6E6CdvdI>LyIPF^DQQf9nu&?{^ZD?;i{FKBB|Tg0VgSSxP# z^=78{aEPYE_af2cs)5>!94NmyV#zCr3+<^{e=fWW^#pU$@FXi=7jgQ7@7Dy1hKfih ztZKbKULbq+ny^F5+E@wQ#hpHOTKYVLSxbHN$2u?9BGN9umGQf3sVnnSFIhK{cuw^+ z{l|-TE^5N3WFSfwwM9J4T!jaoQ;{-8mn=%ZF$4*t_Xb;D4ecWBH+f*Yy^6A36tp2{ z9~Kkf6od5`GmI~P0p?iE-8Bt0XX^k=(74$fD;v9|nD2hG?S*BGsKA0+B}-H4+eHKI zN@N8PXEK`_#B_6Ay3$oMgH!GQl=@0#A^1U;j?72qTU*H_AA)>}-d563&?rBpv1Jwk zlE;f5uVJnyQ(mHe#V&RI%FqntcM#-x3gPXWf-r|RHZ`(I1`D&xYZryh&#kFl)B%~X zJ&n@qyEI()zKZ`4)Z5Oal#%E^i^ei&9DbN#(E9_5)8h0Wf~8r>4;LAIFaFrr;Y3Je zw$E~XTt^(FbnrwvF#v(jI^A?8MR?VU%hf!jrXdWvnpTJHPCjH| zlub8kEq*cn)t$_k%=xOG+l0p51XK45(MxIfKl-{JkHD|^6?QbtH4o%`T zKU)T9Jo#*2XZWy`Q5A6E&YfixaxWPrQx}r@+3G~~f5$2$dYFDx7E}fIFzZ{%VokJb z2O>%f#Dv7VA*yuTJ><<@zK83n#Y50v@9%jaq1{&_IY9}lne|kUGi(QAk{3lOpyrtI zhKe>mG{@o|(4=5%WmGS`9C{h#M2|>7bUIWtg*xrNoQTJR>5dQX`y^=+3G?;r9LhJw z_tcs*)E;%Sdpc}m(&5%-12|9|Jie3lL_R?BQD%evS>Oinr9l(84?-ywJYS}XX+HHb z(480lF3D@blfJU~WGZn(;q1xK3DC#+h7V_1C7qS4nj$dtJS{#fxcFgf7Ba^x^wWu# zr!+F|%JlBNzviwR7!jc1cgfsO!x69zGEYZ5-?RfQ5rdk73M}#YmHFq7?WlzD<)ElE z16jtG-m>jUU*At`_s?@aJo!4D2ZXG@pXCryL>cbRuvL7H~$P7ad0Lwr4 zJgQQiaa4CBgO|nPm1UD?!p8@PM|w=@3rZHZ5n2BzVcVI0%mzr_P+3zJZ6rxhk2Axm z7kdBU5#m43I}AjmG{% ztT>Es!7twwr6n-YA_~+plKdCN7S8uE$&zYOD-`wH&%%upl9ev7kYtJ$joQ<)%=YLv z$>pA6W>7^(4}2?V?dC<=$<3v#B_kdha|<&BxmmE5M5YLdQC$j>BmAJJ_BlM_;pHuP zX2SM|WRIUiyyN)JAS9{bqZYw}#^w(m%=0bS&9Y)dH02Kz_PB`|#ByXJzU2X1uv>@e zr4vmRZ>Z(&Wfodr)t+%ZOK{RWBZ}wh$Gfksz8UyEe_;^8%zH^(kg};J_)?cWkkd7t zy`l$gZeK9L3%p3=qGL=GN+~_yVy9KEe0k5+3bfOgEO{azcQhvxwhP33>2P4_?r@73 zBW60+zD09C5XZQJTHGy9`-h>g4l!A|ITK=qq0U~+d@{7pikOwvW4PP zhRsDI0#d}2`XH)J8k=&%MP33)&|+cyh@pF%$8rzYNW9+kAMkhL#g~dBlqXL>Z?QSW z=PEv;p)C+h;eqv}@1|{?cp#lYg7pd>krd1<=RN3vc(sdCvRT?)&YbmE9sb~UH!F(U zY@WrAqA;dAXv^@OSqa~QPY|8UEhpKU{w^_;wN~b+rb(pKr1KGQu3hNl6qZoE@vZJL z^_`}tvz6E3BeoA?6o@gfhiaj;LcH8Ff!C}JUp4hdxQPCv=`&HxKe*5C`R*-R|6dT^ z6Y`S|=x9CG&IeE`o)eh@e2odFN|$tDDQ8JU*4_dSgbN+L(mL|i*ZrB6!sGo5QoIFS zZ*X+l<(njB6dKvuY0_;@nV6k9ty2O;M3PBdbv{=vKhMf*&@Le`2Ny)i0 zjZN>6#jy2$+%-f3N0TlZQ?y4(^UQ_eAA9n9)636Q+1_c*+;Hf{mylXFPts1{MSDti zeh)4)##1TTczq;`Pfq1xDM}>v&_SXR86eV#&md%WeW&pvrWT%7})d);W z+7M>bRW(k|%yXduY&b`BN@K)wxXBx3BfCT zha@*8>M4;w-V^MxS3=Ca^ijFV6E!U2UTrVLpZ|C-=t;x$Q6`+;5TyN~CA4;*p^Yyo z_1u{H1>e8sG>Y;&~OZLD~1~dyUEWO0K{VQ=)>;zzcjssG=eU20Wk%!cs{!D#> zE#_8bk}BOQteb)9IwK_Adgs)AiSccU(TKWal-yT|`1c&4vu8gt0$cY#S%&|j7W{Yd zEyM?I6@U1%D@YOu9vY8NF!~%adN)2hNuVlnz*WA8z@HdF@rd%m6{_g?7IHL#Q;&nsj#gd$a8g44-k zY51=um*p#$w?@T0%7}8Po0Ic<{tJP@({e)ojCZJeGi?VkQ3DdIQ$Uc5nho6bbJG8} zfJDf=?~i#zfqY`s!ZIU&?upRJv7&0%d*|-55~Pa^+0|dD4`T4SwqpB|AoT2M!k5>X zd--RI^@D=}#An^jkNxpC0_m?fx3NBcV#ip>G~L8m=N=A(+i;?jV^!5bzrNSFDfAq` z8uZG6YrDfo>U%U%H|{gt5u?zG3%ehm#xa!sKRS8d8xo#}Kl!dXM=}@azxn*!lT#`>}G-(nv1*<~zBdY{AhK~4a5V!QXV$VVhB9!yf)>HN{NUl%A!nmu0-boB=PkNdqX z`5w?44lPe+C`y@6o>Sl=sEx~{bVm*k5R#q8lk$Lj4d=ER_;TyIT^&n2YZsGfQJA&J zK%6l!|NBsoAGP#*zJSN*y3bOC{?Z`86Pbp(zY6M|yGumSu}BZ3*}0ND-#${l4|m zty_Y#epU{sE>ijTxBM#|diH#{f)N)ke}WKbTNOy|3C5d!nna ztr`R{)VN>u1?ZM7xL=?5MEJt|!z*-nr)azJMg zbuWWvCuc<)uW1*&8}UVLn+ntMs!4#zyy^VuXtaV9JoW+3eY=C$m2kFvv%}3X&`~?-io*AF zsFvHCKg-cT@$e+u80LEyRQ`Q0=)qL5CnwKqIlch^B0qBD$VgNs^0huJInJKOm!_(nH34H8vX!*T&Dn&y%>K4)z}e~?$s;-pUIG#ZI+uTf#$Uei zPx<`DO#%n?;2DejxC7E=fu!~VK6NH`iNnc*)&?X0T?eQaC>356Si_vOb_m-hWH(Uw zMqD3|h+ciCx&mNHQhXmmIY23ye}dL-4t8KAU*C~F^}3z#CFLPMYwAOYB;Z#>Aqjdw!;S&*5* zWPS-F3c9Yf8U%}8T(8mQZyW}M3!Q*tIQa*BI~%*SZv*9FLLT8-KFVn+l+~8oA0A)( zmijqx5eSZ?!uKPEwpD?V`W;9j#ti8B{~&TXq9vqMZ?A^a6g8b61jhd4HUQcGQD%#N zkfh7{pVjy4(b@gYeBTS;3FQ@VKtdZnWo8_A{ySPR#K8 z%Pi1oEM<+oanJxfgs#Q{xA+k@AfPWSw*de&B9{fllVuZ3Rb($mvXyYJ^FfF;2jwo|h2>fP@|T9^W>ptpQ`6H1T( zz19QF~gP;NwOAd3>>YaoyUWMBp1aLU<@+62aNGev3MyJW-aw_BXe5^#jyL3kf=lr zuik-1*p~D?J$=zw!H)4!08u-biOUpw-M*?CKl<^ad-eH@jj@kt9bZA{$}@)z%!rZ8 z?UC+Ktt~@3K%=izsDslZf*rAS8fFQm1ULe=KEc50eNVq$!~Hz0Pdd7qwTP{F_Tmbg zYH&ndO6ho`=+_#_DiJ&po3)HFS_uzyczK5}tUy(qs2t^%Z=cWebnHd7Z+T$jA@;+; z&^5SY^!L|$Neicmmpz-}ODgk(_ttj4m5h%~KTUB7uG8dLz}X7Qhl%|Rzhf>ki^jVx5L*<6 zF(-g=toY5QfG*Pl-}!A3vr=~?HvU6sEi}szlr}=>-=>$Om5c(MC?oHrls=VOtqN_d@c`=`lE+gqo1ht8%1KraH0!Kxf{yL) zfRCiHOFt0bS{50I&Ca2*gG$`+yZ@N%@i{UQGJ-Qt2zdf-y&&P0`x5%dUxnh0x#iXjjz3sez^=8ktrdlFg588at6h6%sqZ-%U#b(wlB_?7 zp3ZhRNz1v@pKaE8%M^s3GXc#KCo>1V&=WR3@EXm%p<{3=e*Bu$XUDVld zd-x&5oMC^Q=pr~6V6fbKYDGam7CWTXXDVClP6SRDg zGo-1f(3aD3fy8pOFEYXSvBNuzQ|wWTjF`iSSQOgX9m!L3{`8=ajYm&XSd$Rx^bm>j ze#=s8JFECktGzFRpS*p<1@y^rr{6$t(nmEjxKBl5R(Xac)~d=!O|%g_EXa1} zr(!?-FNU_a_FA-hLK3ixM{iidQ?KR~zF$NSqPnRn1(P+$Fl(@ZnvD#uQQMlq{?C%T zKDa9Mc6s%T^RVsKw;ZnK_vY=M_Wkb!<4P3NVsPK8%kYP7>{;Jwr>|QqHLtz8f?PoE z>4&PFXy1@`ERCFVcuE)2UtyxHiBfgzC}3=UWhH6u%--T@rL(4&Jggf33A0z}j6L8L zYCVj^sD{x9ecI@4$Bt0#tyi>1s?RlCkD^mH(hu}(#5_%o5T0EyYlAx0y%#4=5)htU zBp2MTeA<%aHnup=9EaN6Wl#O}dGYlK1Ui&jd%w&r(L*`|8)78gW%kemaw$+6sts=VfSsD#}w&IM*G<#{S^=e89J zm~uHAV5BdKogQ;o&c(D)Zif!FGX}-?-esfmPK-sRkOuA3t?T6Qq|A}X2Pu{~8KkQ* zB|F@^%&+HZJ*Uulc6f?o)pT@DM=nU)NU4B{E_A0mb$`YxR0IyWP^Rq*8KPg0Zw^Ja zI`+835F>?2VqA1-b1`zw8vO}R8(|bD^*|XME|uVvGQAjP^)QP8nI2QbfoNKQ z{zjup?@mJ4l9L4dY_W~Ey8;Y|mIHX;)d;BQg63W>M5x0O`Du5G-%%drl5A~(uuSFT zrW+Naxkb0gW-_(2pap5`c@rIWFvTtPJ>C=&<8+g&l)`T^!hL<6s}9<`RXnW_@APAa z`Z6FJ_L?UjK;HHgzRetnKwz!ZTHU1^Sn+42xL3yGbJJ^ak=ER4hCPGtin!NcA_8=? zJRL4&Iim}DIBE{WsOy<7woDlBGL6t3<}^KU6*k3X$lSPVoYiNNwP{JDvBL|Ye^nWA z{ZT;gsY>`ctKPzU&I>unVlK3BeR!gf z8A>0Gd446KaH)FHl#8Y;q;V-XRyPfqeM+c|HKjPzEVbw;{NmVzuMk4lzVV8-)O)M6 zvZ~Rgri}C#LLAMtBe4r^>Ge*m!z<-}R7Z`s7TS3}ge?i_ab1-)W4D--RNd&*WCya~ zMgQABa)+g_QkIU2755x?*jEVoJS~`zWZ3-?QFNsct)Ly|J}Bk@%M0)l>*>(MHyes` zeima<$S;YW&gg@N?B?){3GUdALStGbn;%F9KM|_s(jK!}t6BCo&fHtSJ2tmM^))iK zh}yx-B8t;tx!ZHKHM&l9h3C8voXd`|}?Wx*V^21LOGh(Zp@rbZDUI}v0`*5Ka z%H?7&l}A{aw=)TJ2}G9#TL`-+qLlKye&)oI=Fi0yn}%mK@5=c}q!Lgu#fF79`UWu^ z9-#yHRJrs=*16Ky)=dTUKU&^(m*&f}+#yI+>a4tFLZ8aDJJ+j()huxeMT*uN^33cf zlV+J~myJ0=pfB~EIcJDHhwvwJg@q#GmgW^SwoZ$Oi%z^GUo(aUn(UjReG?uqCHu`8 zD7HTlP7q^BUYh&)DD!@TS&U(c(~~~gQK#?e(bth#D$$$e$%h>d7B5X9I^8Nsu^5kO zE;^GrV8`3@Eq+^-tW9=oNei-MF=ZESEgqe0jB84mIxZ=u_Jw4`a92b}eWW^wbn`nLG8i!tHPD9coW&geM&8IMaSY zMX5~RD-_^48caPHbh?Dv^-eYvWomER(#Xsi%`rXN;ZsLONrzDpR zzIMfw+nMeT=UFsoHoQ$Me+*%}2UB7(&%>C6l5RH-7FKhH39xNUZzn8emM&E1PYK4> zWv<*&D{>xQ+1Sk8t8s|+&VUR#RwL{^$vM!u;XsvQk4_*R(=L14p^MYg?M_JLqfrf& z|4_6Qr{xRDu{>X8rCm}pvwaUby1CtEx|T?~L2LYZI|BE@3z0SXFt^IbOw>_~W_pzk zp@@8tZQ6?*o$?&rYwG==XWL)ab5Ic`?rtluBKU$sbt;#+uX1nU-b_xFm(m-@;QHZ; ze$2sj3H+cRK1W+IN+(<+nkKx)#HC-_Be%AZDeetZyQ`nKF2$X>VL?c0>feD%nrrMn z7pr2m^8+Rn*5femv6bw_)c!ojqvONkbP2hj24T0{yDUk-190anMNuatG^b;{3eV#z1qkX{Nu&AYwG5B>y7`Lio~O24-U(Ok48iD_NKfwV6A>=454!9mxjOj{9O{EB$til2%ANEbl;CW zc|FO>u|Mw8nt8}hCt658U27{XlwaRUe^Xum-dOQ+32MFjTXbxe#%J>CW$jHqIy0)J zgvfC!jznK7)0B!lKDr?z=Du|%`OWGojK0c5tn9;PaQ8<+{l%u@^6d%dL#(R~ zSdzc2%3>YPoSW815V$qo(c{q*_}E_-pEg9laG1lDR+GumoOC;-f2pS^XJ(NhWFdEY z3VXYG4g;^thmiK3j-?c)U)Qu&*M@2!b-g$?UAJBcxpOUe=y3=Pqm|0OdDzgkXG+yp zazV$eP?sfx1qNpd_0{iJKt@0kG??$wvsN4RK&I9e<LhDLtHdOFIo+G(u5W#;09+ zuzWq%!@C6Lu{>#CWrJQq&K>eWX%ebZ6f4qCaF1qM9G(C<`pkR|UOwlIZDpGp^;Q{UchYIIxJ+O0_P%J)PMAPh729E0=NYNywq04LWt zT%b-q^Q#zV9Lh51KG9&D#TUs@EGs@|Ascw}wFz7@o}$*?LtnP?WHfZEElE%MfjQw> z5^c5Dy`N?hRj>2+os0+zDfZ~t4mvtZCOCWg^ZK3S-g`10HM1RQH3J1taD7NGCS_=Q zAu$?0o=RFV|GI4rn= zn9;~X@w{b&$~`GyVo~fl2@8ASq%bmmMj&2nLw?P3T0kl`(N@I&dfA-pqo6Sts7Cq= z(;pb^KQVuM&V%pbBhnoxcbs``=3ezNSC~`n|e*`WC zR}HlkTQ`S7Q`Dfw>{q`yphx(jNAXoTiXxn+K6Mwam{2EK+-s{m)Z21=Hz6ao7P-pS zvjh?76-!xiQyN>)P}#yfz-{Z(wNtJOLkk8O96COkI~MBqKaqFbI--twDsGu9P}%i% z9+e@!5L&(%M8%W(T_%iJi1=1>2t~)**s~WCg%ORYXMsISW#{8dy4CG_3o^q}H(_n& z2Q|Z~Z^Y5(92VQDSh(C<<;|>F98u5q#%SY~ViRyfv~_CNa?O`ZJr_5&Di)Ls*-$xI zbF_R(57uJA*5G~PerLC{{w_qeAwg8gs1lctoznx#sr`>LQNShDvlu_Qw;M9 zJ>O}a4I#PQ3{>QTfqXTe4B?ILSYfn~l46v~Pqq;_AA~Zs^9jvddaDx+N3qk1%nUX5 zu4-{B-qadx*en?bUsV6l)W=`9-AuFFw>Urx?S)T zy0rmexu(cSs=jJ;$3vwTx(HR)TZgtMAT!K&tSgso2m<}`A?%TDH%&X=H! z9k#5cr_9g;a_!HD;^E|CQn^-^&ReR|BY75sR~=)!l(d|^psBgmH=AG6l6QY7)@}ER zj3r(3(34((iXBkM#+FcATT8gk(&(5Q4!aRuMAN-CG=J@#wNYYRrL?3qraiW1xzuAB zc3>K!_Avpi#K4%;-t+MQ{U zs~&@c{_QDs%cYk^+D3s>US!?PY=(eXE!q%xwcemB+R&Jsf#sp>-ktN|d9?FQ?L_X|#N-7_<1HX5& zd$hOt=)q=XQJ&z+#=+;5l|NR-TmEm8Cn8j?_dR`^^0|3i4jdNI@;owBA+fYkC^b&h zY}c)hoYBti{FL|lG&s@s=d6sl2W+KW$yiy&4o>Z{Rg_w@_3G#w!~VAjVjSE~GdXy! zSWd=wSzeyiR9ZCCbUt(m&P~n0ylqy- zGEcG6na@2&ob6Wt))Eh-h0Ug*VnOMA{FIW4&}Y)-We%kp5bhoo`)1)Wd)R`wRFIQJ z200^?A!m0a1{H&NA;QYS$QO0DLcYtRDmFk~I=R1>yC*;r!72BFh4M1Op~&0xQIL~r zuk#6BQ7Nrp5;mpABKXmeO>%{(lwWbEbf;ze=GB=TuAZ3<@IQi3Qx1Pd>4kU3$Z9^zJE`i3M0scXnCZ7s2~k)ZUoGM`ui{HYNMvdcIJ_XGEWAd#e~3l9tG!)E*K6(z3`Q^$1Nby*y*p zVX3Y5srPV9n`JwHq7Il5P|}QgzS9VRv#x44zG<9B?07C87HBsM@hC7vGb`q4Ms)SC z33gZmKPT0 z9%aOWz?5^QThvEs-8%+-t3s=I{k-;Gw-ZfL(Gj+WslA1V2Nm7A%G#VH)VxAVleX4m6Xt#~H(;gw>#L)}ecJFMhThX2d=iYq-{C{qdYhjM z$l?PGgJIzwy=nb6EW^2;gJQ*~1IUVD#Z@Kva;wx_(NPXWOx?=SF$yCY+$OZ#cOsY8 zabEP^sSAaOr6Cyd*08ASDnNTc@+nQ*zl6l&U)S0`QwM(C40UqrbGqn`m|VBm$@gBeYz}J;eJ;-`2fCm?yyFV((~Oy( z+Sv&ZPOdaw?VD=%=5SrD)$%=n6^*f{L44++Hrb>fot|S$Y*3dirB7l>+PwS`PW(HU zQ|Fuz8Us(F*i+?RH~Cx29gNUfJUrSvakm0RlW2-8_Q`;O{~gCs|@422v?!E$ky z(Wj=aB3{wz16DxMQ*T{0i$z&hu9j=Je{)+Vx1@O~E`ei&Qv*)+yu$f?*K zASsU#6mmp9kk-j}%#VXF;7!!%d0muDERwB4<{D(N`kW7|7M;VCfk;tTA)lFt7_G)5*xqfDeZ2SZVTZr%H`Cu!VT43o6)FlU7zVFN~!KY zj5uJmH}g~kKAXO5Ue(Xh6w2zo+0F{SGlvR|k^31xB4ejHZTL1_gC)!phL|arI>nOt zF0NIoIkgmv-ZNDcnRmxTZyl69Phpml*2g|*bYA-8VRg{qq|fE{jkxLH1`qs8)B)S+ zHtIL9B5(vj_>r3|_St!=xrj$HPq#WDDeNJvtJ7QNNn;@SXmNC1h+T@{^ z$bxe}l7hJ#o?o?`J={H*+O^@1Zt;8^Tv>7#Y=}pxop#>><*)3 zjD1-9V4Wijftmj}w+U-RMGR*)_ju?WT`LeY$Bk{J=Jp->Uye*VI8#D0wxLn5ZI zJSRBYr%CA5I7N>G{E60bHcADVZ|pq!vlr{|rx$=2bktpG+8`0RyU%@~pt6vF-yJga zWEus$4>xcXJBWpBOe+lc)_Kn{tY~2@dc8IRNjxa6YKO3&_!z=TO^<8ji|q#!i0=hW zA-Zqy5G5-v)bQ<+DBV{#%=xV+uh zh1Wayng!b45j%h_hK5oDH{qVSx7T#4Sb~5s@5bZ3>B<9~mh%4O1yAoomv&v(J%5dr zN}wNBc*j>G_N}t~;>LmV$rbg5&J7~q1vDi1P9746s#$I{HI4CvWcB!$j+UICVdpD= z^O=}hxu*iy!hi?72$?Tg$8_E)K<;hLeM;hJ$cp>|E3<;ptO z$_@&W?t%oqQ$Ewrd`#yivFwhB1>MS@bYt2<@If8_adn_2=22Dv>UoZx*>yv$ zw^pCJUSZxIvivHZ37?){+zc7hOReKAzv4>&dNWU3W?3H>e}!wpZ=@v2sAU8dV{Y%W z*z98ks~2H;S$2ni>J>tB#>Qxr)E==rBVtw+&wnsf=1J$c^iw+I`614O z2aa(Gd{-T=>Pl~6_YB~sLwTK=X7qem`{7*U<)wxGg5^Wgg~3k8SPp1rnQ2Xu77iX; z)Pss>W&p8pp!+ixFbt-$Ij723m!zsS9}B|Z_w@tom&kzn$?DpKt4x`z>e|kRRUV;# z3_SeOtxqP~Y&Mf|*uW}Qz!C-Pi>N^IotD(Ka*`pQ-ToG|X1OA0tr&PX&@ATCQX=N? z^Z^{zdRLh3p~+Ces7q6y&U!-Ol!1A|(>thltxnPjYZ>ri#cW4!;)&`@dDl zDy~xZyn2#==z+l~B?jV*lV~ua`!)FgyzGy+a+bE5fRoz$=S=^d|GX#syZqZVL<$Dx z-|*+pJJZdbx*Boi65|)4lY473>>O|KNZ@bR`}YyZ6O&f76iNF@91(0yA3Dxn`TZNg z-^@zkzZB5l%Qt-NFEf65@U^y@pjr3#Z^M7T)e~?jQ`$%8CpNu<-_O$8`7hRky>EQ+ zsqEkG^Vi>wM_kd+vU}Zx6~hT7zYAJ4n|OuejCdfqW%%}AZWhGnk->0)BtbbK<6(1l z?|BVH3Hh%_@X-{mSp3_)kO@W*$mD5jO3P)QS_geK*GPdZqXUIW&c}94de~1zcGb(Y_QJ+pn2Dc{~5wRKKa7<#o3E2#(VU3in<#t_P>Rl&;3t zF1FWNl^-nGijT}n3usj>f~FLs?QtS;!F|xWxiZx3@!J#4Y66=8?Bbhb{O6<={!a1Y z8y_yxf!5?N6E?0@!*-wv)MjdPY&``>s|1-#2j>IS*?q(u9iWdc;5%l1_LtQv~zR#0K9++fL`pCw20|-uWx9uhL;nGBQ-EMXe zC~FvK!$h|R@LQvn*X&~5dSz6m?JnGwIyKsw>9ij>Ho$OyFL}iaYlru$17&= z;}>k~UKH3xk4)!87Q?dafg&t?VK@J1H-A+js$n(HwZ?LEazy!WGm)smFQEQ@iUC(R z{)O(63xXqt0%gQ^yD$kC62h>hWi?Ab!J!O3+H)$y)%HbR(74`E?_k zd9HNeh88ZkiwaZ?b~a^&^S*ilX%^_Jow2Mn)9C>GaN*LV!_!A4MtCnnE;-EXp`1y#bcw^BLPdZ&X%%~0$Ay*S$a1hbJ=qyilB z4(97PBbjW$1Zbb47u)R$4@+xtG?N&*qrcN8s(Xr&uIp&a_K1If6n!unfjDSHtT7sK*4;HHs9l9nSfvS=1x~WEeFu`y*QGy_q zW4f&-T>}OwO~_^RrY`zk^~}BegurSHn$NmNl7211Uf1^kU6ZK0q&Y0S#uZxOA*hy&m#J%h?SDu9#8_(nAq= z-V(Q%8|9HV&ea$9lLNiRp^fZtl4f)A8bx`J_FE^|+)CR#no>SA5jn*q^#)aUxT%d> z*@_FTM7}-(X_fOH$K<7Ow}$>ra@B$DAm8nv3BJ^w4xjLZJsG%cTk&I}&lz^n<}s7t zpK6EuBzdafp%30iNKNKO>vzEQHFe@)RvxtQh;UBcWM}IdbYt2<`$Jz z=7W4@g95e3)?zW<6=9NEqR@Tgpd-^ko;FR+_TUR_deiK|3Z3dph3%Z#`u|}#{n7{RZ~V!s!gRtDN0&e+mpH#*~vPId7X^3aj)@WMzFJM48h0C%?n^_hW^ z&fYy+;v@cRtNF_7??ejqNOTGVCFp`c`oPaP-<>#2@4fbvo_TA5idMy@ANbWgR80^Z zCkFZCsOUF5GQ)3d&xm4>mGQ{aEQpRJXjVR)QeJj_+60V{ik$sto#N{N!#Zpmxs&tr zlZ>c^jy^ncNUr_KwQjqDz~zaSRvW@ ze)N)6#L#j>cSP07ZP_tZcz+*xv|j?}A1JmArokh#fsL1Tq-@6RrT#y3jS^v$4hexwlSqPjE%tU_?{T&YWe}z z_Ym9PD}%WpyuAYQl7am9s>+=P;R+Yu`Lb>FLX+HPiV%8QaA@MN>f^yy43-o1D{230 z*?uqXpX^(Np1B9FXKZ}cL8GJZR^~qI?BLj+Y*Y`%$NYry(S~xfQV<0Z{qBz-(z-3n zI{vG4Ta}X?&%9PV(Vp9B;=3%TgKCUC#PAb7v}de0;R0A(*CNAdWpZ#jKrP_~sRkmA z3Z%CUKHZ8pW0_3I5Y_Hc6ZsDeLUV{B`)YzUG>Zp+EJmfJMs_#(qS{S{9kUW2} zB(x#Inzig;q3&=Yf>;>KJjix8ef#HDS)F%NRGj#A%G;^6r8*0fIG+_{Zr5LU)q+1hT(;RHg zv>fi%67A3=$yvP(7pO6B8Ke>t{x}5A@D)k#!CKpT4wu)a?q;P@uk7Mi=n1UPGPVir zixKcK-Ucbqfk*Iv%GzcEXERC2oAugwBT4B65c9JS5Y#QFlh$it<~~ZIo4pglb?d_U z)6r}(yc}Jg=)JMkU1Fj&=8lT3Z#Jm7@3Lp;y*APqMm-egHjtVzRPhn9$Qu=mTQunH zrd7yU_qSDSHofO>_meA&*jKalJk(wBK+zE2P2ijd+R>53+S`VHE{U?8%D1W#M2302HewZr*8Le9`bCOlOx;LleM@(d z?F`=3x?YYo`K}072uZ~imB59hy5JrpJYooQ6l5Xd%V_WhQ*pK3Bab|?j!(<3U|Pt0 z?u(`s8%6qiZqWti#!#m~TGtVKV-hw6ktK6wn!3B02CD_(-c}-N%xQZ*8#2C5+dy5# ztMtMSINiKG9pzP!qDNz7pL}@pR~3{^2m}Gd55Y1!YEOCV zkFhvj2vR7C#trG{2(qkX<}Z~1knP%2G2)S*fdaIL^ZAGKeyTF#T$L9)^;?0$NI-K(&DysNM_}q!s=ie0Coz|1kUZHnb&H)`>Sr~Ga z;n%J|kR>6#kM-t*_L|Pu4UcV=f<)Jfft`b8SM70Y9WWTSD#s`1SJme?`!>mW2Ni}cEozIr4&EHiBBl+Ay#S%U&plbqG)?zm^ z`Pqn{!(lxFrd6NgU8)XsvTwf%WamT8wyjh!I*X}IJ9dEvNAaubyl+jX9m*EgME;h; z3%U{VKeapCitmchd%QEJ?7RCj2P$gmHYcxNdBZtdd(@)*a5G=r<#YAVS6sBV`NY&? z`25|Wpjl^w`&A(9WMmcrZ#Q3>n5}(?cIfG#ju+J$A}s+9Oijg=5t!}f;3K$H2r*Z+ z9c-8JA~J3Pzg5>G4;PW72}Bi?2l%UJb zgQ~Jf=M!Or0=%LNdi`s)u&yI_+Yu9aCh?Vpd+}h`{Ipe#70veF&Ct1X2WnxKTrne+M`@3tLA#BF zfm?G8eQ~q2ePzTg>WHFY3}m-&U+@&x{;Ck-Os8tgs;4x6$uMB)73JDHX!GEdiMFyt4ZknnN`D`gbF`#kZ^}L2I;)#J{^QFk&KC2 zT`1Xe*r=DCGKDYTQ*&@AvM(HSU5#W)-m%2I%LtO`ApKJmR8aK9*EE$TJ48B?s6S{d z;C_|Hq`LmqtaL!+Xe_fzB)EHu;Me43Rh3~f3EopHEh#asNj$&LKJcFgtIB&FoUKLaFs^`*% ze(5s%dSAZJCP=-{IG9LCYX#F#ZA2oKlb-hksGV1@n0X3++t&MR$12W9aqZKkzhNl; zQvPo#X$u2%E=~Z|%~0jIqfNUe@yB>ascVGRfKWA?6XkjV_COpCWZ+W^!i4u$DIbcZ)A1xrH($+LhJJ`3SwRYZm8}rc_Hzy8WF$q4rqQ6Jb#zc zP{9+Qzq?qZfBN_<*Z}KD=Mzsp(hrVR+B!9=;?1ZB4JIA-mKG#i$alUT23aVyS^o`6 zp?yR^1H=svO=j+-ly;zCq&q>Pi-y{nqu7ohSgZm-F749Ro1$zkpi^cDe=X~(*mf8 z(TrX#I2lu1$=M#r&EFREW4M68(tbBnX8(_QhXzJ{*J1|R3d4Q zC7Ts*AQ-_RY9Okk#g$_X&R5VjMr*VLsCN?9E2zFTj=2AxkALO!lH>AE?F8?-)jT3t zBEBV9oXT?P^52H&ujP97zQK+d(7)d3Z~u|t13jCa=?@rx`~1%zeXkGPBYhlM z9sahw{@*YCz#CffWs#lz^?U#K`Jag6>@$D_p-0}p{`|tfBJsz6?g9_X9}2e#AN=TAee@iWn)1Kq3?>Ol_&iQ{UH~uKPj!T~7ito7K{rAP; zxZXd8JC4zwW3cPr%8FyG?-=SlMtYBd?tjZ1j&c5Dn&X(|`8SyQnB+R<_>L*gV}|$N zU}_1!V}kpb;65g}j|uL7%N&jg?qh=cnBe|5Bnr?v#|-cP`wXv1dBmkZy#W3z?8ms@ zG46Ma`yJzc$GG1=bphpMgD(@@w`^^kjb~KjU*VL5@|D zQR2Y`GLLg)k51g6Q2An@?W+Dc@AUBP9G|a+=O@p{g42Pto$u@H|Inqpb5@q;HQ8Lv znK`MmaNYXxHU17|1%yUKPXir$Aqhkl{NczuR)PMfGtl*Qf zm4|;geg9*VA$-KbIPo|!?|(M&e9H5CDe1hYH1)r5l)D6%Ni*jD_d<1i1IH0Kj=*sQ zjw5g!f#V1qN8mUD#}PP=z;OhQBXAsn{~ts^!q{1oQV0~@{ks15U!2KkYHFg$UsaC@ z3=EviefD2W*RRt1tJaM#4%2P@dzuACQo;GA?XlDOxkS~PX`qOR#`?l{_dF`z2muw}%{%+5Lzcf!GG^7DF{_VwjLDV&dHx2dO8S-x|yc@5D;RcP)^FD(Z;rox!Y9yZvvsWMEDCdNp!8`0_;~={2UO0k4VfVfJEU zuNKkw|L`RQcXd{;2>(-3jF&GoUTkmazVlBYAe)$ik9M-h&CYTA=fsXxkgUk9Q+F6A zxW9~P0qYtGMFO51cC$Y|xR>gE{BV<(_3wTKx!G5(jh>kSQH zCBRt3Kap^f$RJ1P8V%hQ%S-)EAEN&WfjlLD{ZcYCzd=KuLY_UhsfPMv*R=?qWUVZ*PWYYvKQZio$Pso2g_j3!2Hk{3;9Rk*WdfxP^Gj8>T{^KvKtcqc(om&44 zQ?EW_ta|Wo*I#~p;f+zHo=MlgjIw(yWX60v(#GqvyF2oNy+~C>{M3+R%8-{16aNlY4jO#belBk|xinekmS!QOCXRszy%LluH)j+ zD*gg@+x6&HSvd^4JMB!A$(4LKSp0ky#!DM1f+{12vLyUr4s_fSm+%? zCyD|pO$fcJG^Hv*LI@B6DFH%&Kp>Fv?woVay`yvQ%Ll%Yy?6H7YprLk|9aTJ{2;f{ zi{A0`fF*v8ryXOxk1s5u5ZN^d-iN(w3X!T^=4#sMBV+Zk56#!Id^l5BMv_M zSV96c16I%Dp6g??X&cb~@-{-VoE*5ZGVAOBb?dQjld0mu3)jz?6MB2okgkb6+X!Sk zEjM_8-@UvSot%ZBEy61}8r6S(hY)a+mZP=#AxA5d3bF%Mj`p7i{5RY29#XDxNWU<2 z<{NH~iQvz&H-M#FP7¥2IiU96MO^PgkhrVhY2!Jj`z=&^+hQXqA$KmdhX;ttyUC zat*Rh_Pk@%(A|8l?|^NJ3EC7$z23IoX*A`NR5ccaet08r243cfiJW@XK{B^t)e`kBoB`j+ zi(1c!8&jY!(@UKvJ~VF?lr?n{2}l@cqj3rvszYB#;(%K%2|<_j4Xtddi98FP^-Zgv zn@kz|iww|R%gV9T1Kuiu*tms)*@ri$T$?}$iR!*3{4z&~ZJ_QabNlvPQQ{%rVzPK#?c z$n>bCZV`HbS@e3dPhKhYjJ8ziGigvb*3pwvR zS{7+MTpm?38|3(%X&E6ZTFX&jMF1`SNvRa5K7>Xh9k5>`dr#iUJjWraSTToLj+TJ? zUXnU=ovT+G{j7$d;T2+v^9%ZaEYn9VkaaMhRPETL1l@V)iqZ}Yhz?b-nx`N-X)(y^Ho~)Ln7oO8 z9E+)oQU#f~R7M=pKU*BdNPY;@+e;mO?c{CqIps86ec`haC98*25PHu*JJr zeX=|4{I4nXrWue@y+9Y7fMBaJH6+%CesGuc|UlxuOSBD$cu zV|$zH7)eoGc<{K2`!Vbmx2v0K()od<0)KRVKyJ}+rc@?N;Cii*@ZDztpWL{7GUYV5 z77r?FK7kkDc?wh&5OJI{LbPPJTh(8_Wkq*;I8#8NpsqBz&sB-}X-y-=vcTpxr0o(W zXMRbb#+0dR?voEs|Bszz-I;VbqOywaGReWuNp8-vr$O7H`c1fqWas|XA!|DYQ&r`1 z3ArSqcN*=x5z-f3XH}a=3hAOdwk{1|m*BigN*4%W_f&)KQ!6_>Jq=;O8q^=Q0yq1O z-6`;xL|J`R(+9yYS^j4dd{386FQg1{NLnT4$(SO_MH8)uL=#=Uf*o#5YU^?fYvomz*A zL*4RMpV0RoC(7u!Ao|onXE?I`1D13z-ZgUy=FA4t5or6`M}DD!aBBcbSlsFvic1~% zl<{&bPKTC9`Aknu4ckRmsbO;^1y*o;z0m^?K%$FwoHXYK)+>07uZan-rZ$Y_sO zN!ppXKItv$rm|ij_mT@7=#O$^j$1?YbWh)2dX?0QZ=LJ&ldTw63u2DzB)8-PtGV~hb0@cr+zZactW5`PgP28ga{~j= zqw=3~6f{@fsg?R(@Pn7RBaW2}eNr{18LKWlml8wdu{!u_DT0Er9lb`5ojj0hJ%;_h zgSjlX@N-{ChZ4P#KPe_^kqM-_o zCIr%R;kVb(Uq~gQ)sx@JqcvyN`?u`=xR@zEs!hE~JEgy_$PUiP&TA)R?;Pl3b6F4i zpJHkSZG5IE&lHMbqxU8wP&2oE*^1c<=R*Ty>K}t@}bQ=ZPXqEu&yr-gM2b;Vo z12a{vPYl=prRuq9cF*$LQ62?`zfsKYZIwJB55*p1)!gwH={%4pNyKkYD_JZ<{^&V0 zL!@Lc;K&k5yvtyWUzbbd=%K}@;uWmMnJ@?Egc7O42MEc;%c2Dz-jQ!{Ci>roq#W75 zt|~jMnoIjO8j~-nf9ruMlW!B$7jNK-tZunj6SQE~S8f+7rFzE?TpUBnqr9v_TsfSd zomX)Sr%)9|hSb7RseK(g4rP8@Wzz(RU%%0n8 zl_+ow{2UXlP!kr_*l$ofZ6(g;H!*dekCAQRTukT}Hp9{7GOQkXNxfufJ zLT~5kjLP`ZjFKCm^%NY6eu$tz-RaZN{wf@v5CM)>R}J3qh;$KW6K$LvCqMQft}`K6 z5Ml`&*#6TA*Ut=n5+qGROb&XC%U|JQ3Skmg4a-rkdm@WCN%*Hs(ZR7JHmrlGa2OsR zV9@x1R2RHCXY`}rMQpj5Af>1zwjs6=(RF(-+isWqBkH7aJ3L)NhqhQ*(}$V39J3*V zW5Vhvz;T;PFK3HpX<-D@f@A@>Q}{+p<&6HrEKC{w&2E(cKkdlLr|mQOPN{;YKAf5D zHC(Uug)i0Xrj5~2-#vf6%MY&We8PT>08!VUX&{g| zZ9Uzglg+$xchhelS~l)|_~i3)f!V=7Cv?hpuYU2dFXGA$8uyXlnC>zv$sV0z5AiE56nd|E}MGP?~I=J|CkmPIYB+Eu@v zjB05Hs4&H5S_Dn$H1CS4reV%wD9VPKXSvVklK5SqDj(!HTEkUdUN*G+=c6;md$SRc z$PIKkk)I!fe;4E>taO1&p;Of@9`-~g#&zK2$d|2qIZSIac9zgoi|PC2?SMlI=Zrqx zp9>oqbxDLwsSA9^y0r;yFJRniYS+KgX*>O78m~gf{?*3w!2?;+8LuY;b7riEzBH4c zu8r-Y?&eAG^E@c%m%g`j+KFAfRA1ZWe8G?2;0Lf+b6UzLe2A_xQ$9K#+A zYPc2{_&1p8h|N#Pz6qdrge@)OvsLgZD_7Y> zsNf$7#a*h(YHBs>WY=ndm*&gX^uQTC*0*-v}+^#!?1!g z{dHHZoFiV&!aksv`mCIl7VS+=&-Q0t?{hd_iM389}6qbcRnElOG!QHdtYKyCSdXn2M6( zrA)@Zr4MJXnCfzcL`eKnkwi*<$p$rR#Ly@cNfLtg(vg(DOcT(Bpd&!~8<6?(^|uJ4 zuM;kRwp|wtz8PZ#=N^>d?iYQ^-K!Q7jhAnn#J;E)d*&j}KLrMh%NIv)Z~2gVUhP=G zL731x z=^IR379WkgsTzRDv|%Vy#N5NlPWVJB$bB~)N*;PL-#$s7EA}Cm7Wg%H?tfy)HGhvN zOO7K-f~zR@e`^}CCcyrj(>I!o)lVlQZvFjp|FQ;%J3V0KjotDx>nbH(?5Fem-CD6z z$HfJ`f*t~lu>butU!DNFczD&%{6l*C&lMY(ud9zQ{gC}Xj@NsB3J{;<0|fukiT|gz z_u@024siXIhW7){WFXM0Q$Kgj3@1H#>%E;hp+iD2(tp|H|GL`r+T~*1^T66BVAX8RC_H+0`Up z+0#1z(*3JvmE3MPVb;+IB*06^*yudmP=+4tDEm&;*L!>|accZgN8|DE@bhy*V^dzk ze~aDz15hg~+douEWK`X4IiR~4rJQ{RlXq-|O&4MQap@iNduvrq=KUfuQJQm)Imez~ zX^`tfIQJ$300*j+Bfu2E0dKq~ICkT!y}Z0kh|8m3JQ9%4zIE=I`2w|OgN8=ZdwW1~ z{X%z?0eO*IJZMC+*C20#|H-SkJHV5$cg^6dZN7_m;wJ(W+v zlzCpLx$3orjCb*^CFZ4qz$F(~n5_O+6W^=ine=U%83XWPunQK&cbg8ATT4b>JKP}x zM|pR$Ui-V_{zWJrHQ%2hD6J;94hO&1@=cQg6qbK$rT@#3&n1s8 z7eoBI#03)h0KwB&%KO&yz+u&z5QcBr@g8r)Mjvv4AD#RLTXq5@_ejcCKa#qK?ULVY zHu~B9-WeV@1fCIT%Dbi=ueH%EHVoJ80?)^H@T|5ZTf1K|6!>}zq?)-=$pR0P`|@Ra zy1j1AMo68M!uYifD=@1h{HpHl+_*>C-@+v;ja+x>Oa81-sN#J~bFC6FMjX|} zA`)9Qv|t+fs=oGW?o|BdP@WR9s%bVKKpz!Tx{b3gltM}9awCLTnR!vZ?x+1G`?hG* z`1nr#15?JvW@hrzvQR^-u-e#&jc>+G~t|Z2t6vJ7LJ_PKH zx;bNG0eZ%G+51i#mRj9S#F84DN<{#enP--nDsD3z!}@YqX;d45H=1Ql*YovuIBj_; zpV2zR9x$&1C?n6Nna~RGlaAM(9lH~QHIZ{qc(+l&)6@N?S+NA%?_iDe>gxtFW8{Lk zzG~&n{2CzGUB)*rr0%{p{{@ivT=^)*2QlpJjP|Oiaw6UV>oCHc5pAti^VMw|CWrVY zaIsH(GlQi;fr^XFLx(c=0V-1#X(kN2^GPf%*a2M6?<;dm;nF!JK_}h7>ya}(FFZ$w zpT6?#S1n~RJOhBeCjMH))#jNZi$)t?3AoD|#}t0Oym{1~A<)MjVUNcFMJ>c9l75JV z)9W@TUDCca%-A*J0K6R-!1V4CBVEvMtD7FG{HDaq* zm3XEwKn1lJlc{OX1jzMiCCZaDN`&y>@ zQ<}VY@Ay1nhg9msy2-uUH)c44U*?0T3kUYF<9d0)L+-Z=qYrQuj-34r#@&(<4+G*N z2R1$rySwUDrVkjPA4?o~|HlsIbR4_>*|GjJXY>4YUGKL3*iRWtU`jPVG~0bAV{cAH=tHf+`KtT{8&i(23z9&Fb6+0tIA&IfMeKL;Z}e zg!xLXUe{1S_@JLY@{Yf7mLyjshn4s_b5Im@=LHpI3T}1|ZKcdgfLf+o7;B8>QNb*iZjf|v710uJy1>_%H5WuXh3gPyj zC69!ab?nV*`L^Q9aA8b zZ3yB*`*o=&7#;_oj6(*eXRG$-G1J;>J)iw)+k~#cn$CmOs8Fc5aI4H#7!3fm9iquDohSo zKHgqn6S&s@J{V(7x!3U{({w5t(8Bj-k6^C1zPSG_xgk77l8Xj_6g5P8XbjL{@St4N zt5E=T_PI>O49FjO zJJUla@5d>-j}&s*s5Su$i#D09UlbS6$M62**kUc$Sf+%jJ=63;cs2teNdEJ1x+Mq+ zJoBtjEOakQzsjx-HyT4DzRl9yS94>&R=CCO+rL{Y@3!MWxvBy@2Flyqk=B5@k5Qhb zldrtn@QtRmIR>&U+>huP%JH7BG76VG(Zpt*J2}U& zdHH0;cL2~gl*OW$LIl8mH=?yquCi*bJfDbU1xTS$&uOo+%m?5<(q4T)b&?u_zsE9# z<^@PyY$kLbZc9Jkf@&wQCp^s!*Qy22nUWbY8j%pv+v;Ra(o#$&{y0ECg%W)-h3ny9I4AFCDv|aUq4-C zS1bB0k`AD<)V3d63qSr@TL9LQtxFCRJU;^2cCj}9eIv%Nz*%XzGkg9>+sJA-(18;5 zxAeeO5~``G95M9JX8|kKczaBc)9CS4SU`Lp%kB+Tt|s=7kvr`lZ^Rw#VYCREZ1F0m zF*(j|-HQE!NZOY_p6EYt+v(BgZLxl`F(s7b~ci7uvX)5rruNe*QA)JiTLj&TsCShjaTF(iaVuD zWZ#TWPZv514J;1k@QN(g7|1FrEdanu6WSfX*Cdq_AM5*Ma({l>C;>CMIIx355+Cc! z2HwmbROr&SP~F*y_5$g;L3-A?pZt`52PD}bB0Uc~x(>e8w4UDhqR(o^;9^?6g&TJ(*xz|zR!-zu_)jHS=I;O8e)5~c?KVkb4MT`UFI6#_fMH4 z1+_wbbWG2e1@P>yx2{!Jg>V&I=x5p*H(0M>82{ujK!V%bM!m5q?{uq)oA@J=yabR{ z2WpbR$6prhGwQ}p{@|^o`|h?)DG>nJ@RNniMh9L6)B!Ip#x}|ke(Kqt3m)D2a+l($ z{qV=9sJxj*#Bt|8bzrk8E`jE0TX+S;hBNN0s^k_1e5!xLwQ0r_^5Yq(d4r{fmN zldso)4l0)bM8?b8$7P%^>XV*zbw-l5ZB6Z!PkXLDe5zk4V}vuj-x3f2>y7%IA#GGS{)~+PN*D{w4As z3v?7r1D%<=Ao=kC2*$x2yF@6Db4$tffF5h}TE7QiliD66P(9?$QkK-pf#T_Mno6cr z?bpyDulUiIRr`zYDmWnKJ;H2OqO{yawhOgaLJ!Ags9dI?4uJGy(FJfl5Aifi|03k4 zo?%5KwxYWAVoWZhfP|UClyb)QGNHIL-8wbxt4pq$2q%x2*psg$gSD67T0>}xW?b? z{Px@F1mkO0ZjS04z5=?I#jbVgJa5;hJtICfzq{q%46P;HJbS49+lICNhU_tiNv&=0 zTw8Wi+YY?1{~)K(OcAy-8v9c-qStbKKDKfGMaSe#@|dZ@FnuJMdO{8TAd7OpbH898 zSlmJ#VK)=wc6IhYuvW7__Gp{9Z0VxX&}XwqJX^;i0|9OC9wg|GN%Z@V?^1zXFx*M7 zS1%wU&kO~(Q|kBoM%^E&qYu#N?JjK-F@k@%7M3WW8bMv)^1z1*wA z;p;+>pD3vYDAX5)pgUi2gJ9Z29lQ!GGha6*I45HU&IuB63au+8H*3&Gy5m?!`;X20V^RmyI=50N1X^Riv*7nN zFg+5Jww=ev;B0qJxeMWUEeqLVMhW#ua}`1hU(7-y^n2}O|GU?t-uR=?{JG861pPLD z=|PceOe&Rki*L#1PZl&xWi8;WPFC$g74i`%hkTloydFI1n zaenS2Z@{f-@0XYZ@za7SBs!Uxm(p?RPbu&vArpp7wukldA2R@2F zcq)dcz`p+vwW4PpN>kOPJE85(uvS!WkP`+1vYZS;5M{7j7^{8`=`OD-~t*=lfQ9wnrbnh8)8@|wI?Y#JS5~4_Y9-l`odiJ_zf@8POF2!0TR!2gx zQ3!L>g$te4j<%m*N$DPYz1P)>zCMwWvgTiKt9|>L{X^8KjgMdvXanO5ZD>OK+L{CE z!27f8@scN|+?P<-&z*IwIL<&~a)Eytk^=h)VLwu!hl$$=I8_$3{YeZ*gwvz8tQ?(0 zfJ_k#kkv}F%r)P19~hoz#mq=FH053Mlzbz`Iv1HiZR9tXqzQ(rVka&{VTk%V?S*nU<%}m9T<6#=hWj#{Zvyf{ zcwiW{mx(x=ZjY!7cvL!3q80SwS6Pdjtr<`wtWW=Fk{i_~-oW^;k#sVDV-gCsWF+*N z)%DjU82>Jig*@HQ5Bsh{50SC5jspMGLe8<=|08 zbrJ9}TXP-q4=&?Wl7KC$f|>)4)-VtNN%v-=U^iD2>`}|1?VxZJs$RIL;T(q2(w6Bv z^v*lkU6;za2MweB_>*Dmyd_#G&7`spbr8`4IM7x;@ z(K8Xyp8pR7+Ya#jG!i>_MHMXx93h4?pgShg&hfSU2!U0M03+}xhe|!yU;CXLs^dQu zQRacptbcgFu`$@^Jv)DV?&tIlFl``N+2f@#@O3udN=CP}ba^0P+g1M1ia{g;;}{kZ zwK3h|C>+Mnsi5p0o}R{LW(^yUkU6lsroYTZ*`(s$13g~>fHVy&4H^YC`3=hS%BJL^ zzaSsz$M-acB!89K{jV7o#9^k#>@0HD;S8R({o&DABHE%Hb{L&MwryuFT`DY81)dli#dJ*ciHwiXcQkYDo!hY~^lYx<12Z>Zss&FvT?O z(r$<=(u2cpwgwuqJlgI(9k)1$9N_DTQF-%ab9HT?QvbHReY!>D>rxSKFGP_6rT-=1 zsLy?lW=z&sTB9Qkv>D38-z3uadcR+*ySF<22KVul!Vu5L&$oKt)b_Lln%xx$*=g*_ zO^zFZRG`QqEW^XY=r1vZL40}gTIiipyDM*eFTdFD3{ROLo|`H{>AcM&=9GU1Ltjw) zI}V}&k;c)<-MCafLhx?V@wz+a-d|_34{vEsozrx4=w)qQykmXq*YW@Fl}Se#OA6=j zHf1+})!xj6^TLgPiJQ`Hqs2wV;qwW>doy9>IAnRbC$g**kRWyw-tZ;iq~`90>cc&3 z;GAbfv^*YY?r!cL@))Gtj#=Cb8VmEcvs^$s!W(BJ*A`9lfqAtf^C^AhXbar=J^FDgMtYtNcT$=Q9`Qb#=2#~!qfdb2ocL+>Z%F;N{$ zQamc0Txk??#0A;V^9m=AEHs3JYeL6?v3%^t@x;<+6umX`fnPUN|EWhf;k@2!ZPKw< zu{fY3Dag}Ud4nM!$@%W$3O&T8-ri2=x$5(K9nk43`_Hy>DV?4F`*}sT&|_oHx^WxN z1^*d!SI%X!D4yi}*S_8zjXAdaj?sk7oete$Jp6Zr{mTsx#?A}9XD3?oKUh-UdHeWO zZp$SW{hU7l`N|}gFGyPWU|sfob)TPRTith41UW>^swL3a@^|68@;F0&x8gp2QQ>>S!FKR{%5TXxAnmFuDvPn`+hy8RF^*rt z=ehmST8-djXz$oTZJb*eYj|i_yM0)fTZu0)Y^lcf{a!QM?GP%EKN3mIRSq5h?lfcT z!zKh#wKIHkJ4#*@*mW(7n|w(unhuNI$oxG}JDSfB+j9tXy?Y&2DnEq_!$=e7V<&6Q zbNt?$$T1_q*{aof=Ibu*)*)aT_7uLc^i=p|Q7BNddzourE18Q^8#U#%0NbHhN)E+; zI3M+H`SWr2`>=p8=D7IR>XHYNZd%vI?)N5g9}}tM`Kha7q%Urc_&Y3sTyck`m&2q% zoD(QkuAZg}dnMmVIjeG!(euRwsGc3Vy|I$Ue+Exj+U1OUg)$S&Cr$fYp9=M{nY#Dm zZ5}IvN7f>3_-_72RHMXyz1w+?v`Q zSU2_H$$VC3HmM@uj`MMu#p0(EgT%HjJ@}Nay~U>c`vQ}@piJk)P?5*8UtRCgyQ7No z)=|CT#>pwyN?w$fmKr5I74>3{yIOLHGEaqMTT9<~QKS*ueZ+_0@ZS{XaZnN>~5@ literal 0 HcmV?d00001 diff --git a/docs/source/assets/deployment/streamlit-chat.png b/docs/source/assets/deployment/streamlit-chat.png new file mode 100644 index 0000000000000000000000000000000000000000..1e37b9d70e15df2d253319dcd0ebeb123ee719a0 GIT binary patch literal 108553 zcmeEuXE>bO*S;1djf6;aq6I;sMwjS4dPED+JEM=$6B5x|)X_Vm_m)Va8(nmw8@&z2 z@PC}&$vNkG-{bxGe)wPS2QJslt6TTA6_cU+`3MF9Rma7mYl4l8U_X~ z5e5df2tF?G2_#uS8v_GV*HS{_rJRHW?Mo+nGfNv&42(ySE)il1FC0nU(YYw+u@xobfq`Q zw}u{SrSFwfPBl&NbD%L-EL{+=mg0J~wG57ZeZe%gOdP%z2b+ql$1``fxEW%0{JtkJ zL0=`??CG>|-zU7G{SJIs6nEE~=!nT@HGw7>Zo3L4D>^7~HuBaoYdOGBedzw#*uS<1 zC;HYhG$rzf@@E&4$p=M|bP95O5*d%2vfkygq;Y@Je&%cFIb46ZO;OFmsPiLiQ9Yd9 zcIkGP-RiitWq`__5VwP>YGOLXa8#?}zIK2glUw{3M~g%lfy3?R=uw{8t{fe)_kG}J zFbGCqn#p^iCf-$Gwx5~4k~33M!e9md#>c=6vc$jv{=x))$blaW46HP)-=E+TrD6a6 z8(ZY^%Nzz*Ees5C3^~c?8t#}I(*(((BNVN>?agMhV;c$#x>+w=RplQF>99)1u(Qgk z2C5BY$S#F%bTw})yeN-O*+4~Y`lM!p8{C$g-L^g@JY6_fhC0q7llSdr-7JsrS`p(%%-1zVR z{ris3*~BsEEbK+@hyBYC|GCJw0s|AroZ|JRMgKkw{%cGu{O4dqox{I>fBBEe*YIhd zP4@d=IgEJ!bs};630?R*`rpR=w`ZPvEROMI{5|Q_!}x3YV!Y^|@#LVq!dAc*bHLg@ zzRLePp?{A`REdQ{&?n`7?)Wg!1tmblD2A_^ZieendpX zDOaw3FZsKug6k}|Sr%UuimG!qD-5+)Y5WxYi zu*$fdPon(+cf^CA2v9uyYhnJEW&O?BPx=-@uJC?RXD9`KPq5$o;6krx6u0?pme-XI zieiVdB4W2_18X9_K58^RxN@lmyI}49`X1c}tBU{n?s`YwfLJZu2T{E_xSSoPw)mxd zuvX4`yw1ADHi_HK@4#zc|HNYMCCk1nBa2~Kj+dBO5x61ZhB-X70$pXFVou(5yc=y3 zujF&a;)3A4W9t@6#%(5-+c&LWIIu?+DHlUPE7@HHV9w8 zmLgua6tcM?*+njkUkSJ?jgQ_}+E`{l>#{!VG$q_WR&KK9B$px>TWL3 zF)Zrdk;l6W3YqWW-EiXEiQX_8F*?CW$3v@Fi7lqC(F93>E{pM}SGkJTriTv4J9j!0 z=o^}n*>$TszXSwj8)v`X>K^(KM@T8WXn@q~i07|4J6X}nN9s|*tp+gs;~q+#Cuyf^ z#Tqe$H7G}x^iSDO6LiMSn1rhP@8-MyLi!$WP-Tbuk$alw;$<76LhYcS@qIl(J*nr8gHy=mQrv5+i3O=?Bu zkw&Sn{!G0~&@B>+2?i1ArLKF6V}>iixa_H(1v#6h%(VPQ?lg3lOy+6;NA8J{#uxv40F-zDy>W3 z&?br+9F1q!o$&q&hKU>i{G8bmsbLbqp;~q441@VdoC#`_CSPYLneP@toma zyW{P9TVR?*g7-2Lf+-DYS!*k-XuO+o)8O;FO13;ZZAb>g8R6=D#RRoMR53EEZ>UOg z<-xJt;{CnX@zi@Jo1WE8ato(scJC(rt{3;WO6)CmW_Mt}V~}`%P_5#OPLFNIj?@i>Q<-Z0JOrr+gS&x+@!L0WbQzi@! z2g;0@RjIvEvVwlxn1QP=R~}rsvyg$}_0E=5FnWnoXVJ;Q#IzS3QG%KU5t`+WLuLt2 zjLI2gBg6}|VUz_T1#F)s1q$Aci(R!R#QX85+zg#MQ$;4J47X$1w6nSq_VP2rb&G{} zpn>EG4()jO4tRHfoz<7mec$d{txr82;A@}LMF^Ufr`0NR-2GQa?t|rs<{f;0CECHj z`oaPruV>pr+M!ookp<$*Uv#v&+c70$C}zI=t-w5bp~oVd$09iI0;DfQ-4u zZ9U$rW(fn!8yM(#DgDmBKJDeM^P3|9@J=!^rBM1xb6a7+S1rOkSMLR)(CW0Tk#k<5og`rMZ|bz(8kv{lQnxzPQ6BAnil-Y5os@R2u+r zhScO&&py5~@D9It%+^_5ZSEN`&`!4m{nfMoe~abc@Xh}%mVaF_|F>BFZ?XL8_x{hb z{97#gUkyVyYqwEInuus-l^?#`^CVYdE23IBFA`4s%f$~s7cHlI z6qZx9?}YaiAv*QW9-jm_m2s|$YqYi3e#wk3*-uvMPgGj2%uIveBJ{APqXO_WXnx_j zglX6CTB@e2fJ*8VkOR7QrU=yx7b1%o6ykacRRVC87I)_9tL-*^ZZqmu7a43eYw_8i zt2duoWu2X)q}@P3hLZKF3~wn`dqTcM57~gAFLyXtRTXer%=`9%xP41qOhn<;W(X;# zi<@DYaklXJapsl_>HqM|x>%8XSVo;urUqkAK+86DjtlR#Q>>RxLlGrXFcm7g(l=j> zPnMImvvh{;FNKTofvXH#TzjoWiegXp?DAK2y@3S(;~5(H)Nym==PvPJi9R#&%g>BJj)`BT|LR_S zizwu@uEDf3N{8B~r*YaI7k||vkV?pPqS}4;R~<^yWPbZ?`W3N$6WPfX)Pe0tYtv@p`WPTG6056HI=n}gZW zjb~1H0uFO&#gpZxg-W*;w}|Gx+P9#$wh{zgSWnJTkx`FSAG%Fr5%2u8^T$8Mn9Zl; z{^BPBKAOM?+M+iZxI>zl(V@v<&`3)j5qzJsgh4s^XISgbSjnV*ljq1mgJla(Fp=2# zrfvp^Pp|1TeA*?7K~B+^E-ay8K1TrBvUrd`6t#a&8AT_7JC-g1Sr;Y@BBUWrNfVIz zh*Gr5qco$rxTbKIS=j_WQZ=8g5N|W9ZZ@01Z5~A_P!6tYINkkl6(}R3#EQHt{;GPT zrPBwRzh7DOs!U!PDoPJp>5D(wG_1BC!|Tc?Y=C;#S9_hG;5U50#d2=;-ywxKY_Xo~ zEgfE`wt!@Jsr=;Brh&tCDwaFK#m-fZwx(A~Mo-8ls%;sUx?-zbu<>rkSq^zS)*xP> zPnD(+f(3J9hUN`p64mfT-rm05ola(G_rY2Xa;U+LFyEl5VA|}9ztr9g7`?d{e5Xxv z#p(EHa)1vVWt}UiA{lfW7Uz%T`x4n`W3gH|{i)()+kR~#KS zkx-vA_*h!zZ7g;Xa5Ezm!uN0(`yA?%`nbF25h0w*`wYIf)vz73ju@AT*I$8si85+y z-WfLxcyyyTndCow^}irozd7RCg**7Cam{Sg5q;)qEM8qPESuhi215_-vFwSrmXGyF zc2=0fK}b%W@<6bF@QruZ@7+y7&H;YNwSva`0o&>%vIqu?H1km znaUt%Jjod$#zO=`d95dr$Ph>d;JnT?%^|?h$a^)zF& z7`B<2|5@}n4VEPr{i8%BiLcFjoks9s`s*rIr?X>6AuI{RfgsD3FYODnLpvr8oPVx6 zj{WU5g&6k^b^&X)3Ve1`CQPc$tTN%0u~25^BTv+YAKrDa zE1|m%=!}s1EYjJls$w;;r-Z9d*lUav^74Gbnrg8bwFTns{PkU7y1Q7nfKM})F3`uVb=RlEO7vew$EIzT*>TN};c)NU=SuO1mReim|1YwwPvPePS4tE4(} zA{e?$?uz+=Y&y6hQDLNxv2HVKY;7&a^0%plO$za;YfX-?%C`MgT2EL&7^J6ZIxDyi zvEfP>t(swHNrJLt05e2)I_gC$rM^MJZI7(skH$9dig_BLbao_{{H%<}enyg<$K8PR z@vBFfG8c2o)>-xoE&Hy9Czcz?J-FC~yM74UQaCK-0*EPwBLiA1WIa2=7C?NR6#t-{ zde%e^i4YBEkNt9DGS<1hK;#|aFkT`|q|(~c>k zJU)A!lG}_CniHsY$d)GGwDoWjwj*Jdt~C0RRk3PNdt`V zQWflyA|1*Jox;Si6^1NVl04REO@&%Ze%G}>BuPQh3NMWoU#Q`qOmGyr8cqh*aM;fX z8zimfVX+H_zZfgg&(g^6k5*%rGiLf^H!Qv`a*_O4r+m1lW+&km!z%tQs;YrX%eOE{ z`IWUShwU4Z&wK%3G+XX6>B^R686=8!4*nL35K-N0^$Reas5G>J0&(x;CavVXvc87? z`a?Fg23IA^*>h95RN*?*OTqC{^8vYcRTlL$+89O~KX(UfwkK1ky*Ln64NM#}P1fNt zk94LA4PR{AVI84Ul5MjOkh_m zv=ktkT@a=XHWQWOBe5#~;;{c%Hh#2sgB>u8PWEE$a%5;A3>rJ)6x}f%KdHn&bDK9; zS`73wDQR%5b_A5@gC`_@jy0_Cm$sXaLaLa(=#zZ z@Rl#s>g_$l&t@=~a^FL6R!l*hmOJJR%&xAvxZ^ye6#p#dRKulu5|GIm|(yWmy8Rh$U_ zTWAMwstCAGG|SbNg3C4>2(-BOI1PHLSPQjbRo%M_==(MOK{I}F$4nz(h2KI}yw(L1 z8~_|@GF3~HWi=w7VHqeKQes_@3Tq*wW=&H~|GHqd_yx6Wdlr?N^wX5ea`*L$cGG&H zX5qtIe#J;~p@r-x;6nJA@{prxo;ryX& zv)*;!IpN^NiB~dapB_AY91~uca`hqv>(c(gxlT{UyQ9{XQv_`XkCUH#2^@}^`6P^= zrPtuvi;KncE;0v8{_~Uc1+&CzS>}*Yo)-yRrd*bhYT(0Lh0}!~9Bm$p-9d#-DH%Or z;c8Gnel2cPT7)?QZPw|3bU_x?4P!PlkT8M|nk(f+iRPIw?8I!uTNba9&ZV!EeFD6PT@R2x=box|3A zNV7I4yJh(WlkE99ie~9QnIii@jJn|3p*i%Hn_+#9R>nWefZ`iO{gHh_ZkutY%PXhF zlhxfLh1zf1KY-%?#w}&9X)V+qd|z)Mnd`K^p7A48H*j2c#l`|~Npw=GpEaEE)iBi_ zTw4oWDF?_-zXsaUx9uF!(tBxedhNWOywyjl*RjZ-tWH$bY)reghNfp0;(7JUbj7kQ zl)U}NzVqtaRHle7m)NA@C% z`EVp$@E_jo;X@)G5UnH*GoKycA8((mW)k*%>#XVFB+FggTVjg>up$ttRc#t6`^bJ% zC-@uF4Jsx24O>UWy?`D*8<)=;<|!QF51sBQRK1$#u-WzA?Hvzg{I z;%L0wG?GCs3a6ZHa%E>e^AdeS(JrESdUX%IOGMq)RXtXB?u<8jgi!K9@d&Bo7TQ&0 zKawEjaumrJff|Y0`K}D3WNKiY9>=sWsS z1yeAzn2}ee`BD?5k|IhE7ajXKX18>nJZ}2qfIxH-D_^h7DYWrinzOw`e|GgzZxXMQ z4brC8V?T$-c4l<<*7+wg?vAs%Az}Wy1yhmby?ceGQ?(z;Eq2SFAhoNyi1$3>Z!s1NPU)+rEyLsVdj%i-DJohvM)92(+X z!g_<|x+YrGqRtFlZub#KF25v)!}o$+9wk5d(-fT0OPNXCh~A^$wh;;8c%QhqerEOr z>ahuen`2KJM?|!U zmG$KMo;?N!&2Rh~Y>5?}+Y=E6Yu<3XyBfRvyfG*D!+N>T#q$;rU*O8I3lt(>pm~U9 z@^C*Q9y;9`ui$^JV!4phi#dz(*uO6ab@P<>$05i#q@+i^`9RDZ7jNh};4O93j6FU( z{lN5Yhli9lv<-(SBP4O*Pp2t<^RkqMZGSyC@HY{GQ_-5Qo@=^Uf2dZiXeskvT$<&X z9p#&VFsIegh2kgRT)F6p9{ZR2T;_cYc(*BIMhbn1mA)_P$j7FzD|F<$FwPH7%10i0nXu>sr~ z3dQT5tlo@}3TdJ_%2i0x`ba`Bd*YdT0VH~q+mMsJ@UAGv^ouQv)-Q%nx@q+~Mm`17L&a}}(yr2yh4(kWc{ac4BMo9!QgC9+jHk z@0I!OiQgZZ!Fz3eABHaTsI7uOz{>ir0H7KB=Is}bdpmZ!{S3bH&Vm}GB=t+<;Q0aw z&VM*Qm3CgSr?tJcRZXrkT1A*QeUJLajTLyrfj8DYYJCZIYHq^YY?>uh1qN(9$kR`2 z#9r0c)bjTUfT#cyukV@|xxpwGHAy%i!vA@<`|*I>hc?#JIq&l+OCuMM4QyYr?Ik^d zun-~2IZaq~*AdPNGljgoIONaF8jT)dG;9_&uq}!0(u7tMM(ZO=;3l0>T?O~{mwXnO zx==JXoitHr8)vVDEDw#jp|+{9iS6g@H0=W3_MS&0OSx8K5nI!O3I!T0O^MB9Crvle zi#K5P;@Zw!bK!UpA`9e8w6j03SrY>b#?oXETD@83FgJ;+HT+1a8{Uz%j%}&uN;JtT z(5DKxgeQv542rL^)!Vf<^l&}^8)00N=BLm3ckQs}gz(UhVh3pl8z=Rv-#X|*T3Qg@ z@xV^{+3dRMdE(wt`UKm^v(bEg_-+$5+U;%Id!@lFoymyYJN@xP@dm#Tw5ykwai?FKaC?a+ur$#3{3ev>Hg)t8g>y}@j@v())6 zO>3%2HtmAdpRSyIk6%~zteB$rx^19`kvb2SX~G}6D|8A za}o_l`d@ESuZq6EiGG25`}CWmn~>X9oIw*x{*WAwODWu+vKDZ#&I_#~jHYsFz>%GIxVs$%lpTANtc0b_C%(Nd{T z{MuAAeePBTf6Aa2U{X$GtxENr!M%-5K)vdCnCh}ItmN5zQnH%&IQQ3Lxh?AU+RVb{ zlK{MWLo|webwMTN_H&WH0^7l*!r)vN1LIG85NzWY#Yj_57!KLgX7(L9pi?%@mpFY0 zc~DF!7m%;WPk-wLsZvUX^LSUpNMy69?I|n_HC&T9S#7!YlUnE2d|ltbY_&LPXPfq4{J(Q`=&4A4 z6jf|7QM)cBtfp&v@G@hEEtQcx%q@m4?upg;IeTyqE~ zAQG=_w|f--JA}9y(i$|n^HBv4C0KBP4W(#COAPjtU`+W2#pScYE28`#mtqs)2H3c% zj1LKkMQRA0b!MsESQDcZ**;KuH3N&CWbNA* z!5a$|oXue!X0A!$cukzhX*a{b8st=|e?t6=A%$r4?IOK}ir!ht6irs`gSy@(HJ#=8C{a$+KD}+c z7WG{8-2$j`D#iHs*~Qj-tdi%u4;Rjul~T7r{F;TrMob(Wixo<2E+xUGT}0d~Uk(v9 zL#qtR8$z=_uTZr^u7<=62+_l{3bRfdw+jc$*;yHKoLx5+bf|E@>MfU4Z?!~z>s1g* zgs21jl8M~oSqjVXabjgu5M}_BfKNe+!#7Q#vx;HUhoNE0Nw^v<=5T{{@sKU?m3tP4 z#_Fhi`Xu2HTuz=>3H?UOc<^Sw$JS2{4>_nP8`gJIq(f`@2JcWO8n6_E49!CGL^)7@ zsW^@6#m_!_`zO=%pBo0^UJcOd)rS$;idiUrfLL)Qp75-uyEC+spU*i_kbPDOD*W`TSwzYnl@eq4hLV*-Jhtj zriV37b?HL@D1UO)@O1C{PNeIgq4v%TW~b2)zAyu{%xICm`3ix9{bE_#*!~a13}(3d zZsV-#5C$~IaEKtKYPEbO2#@*^1KD}XDrBW(daw>eINq@B+dgxlI|ZbG!#?NZ7pz!% zA6I8-QeJu-^~f!!CPyaK*nP~AbYgw$bH=9;Z7$BDL4WTrNB@`@K!M$M4{}3(M*|6Q znqO80Nl(SjIEM)k*s>4`LG!Q;16yT6V`zc?LKqFJC)Fd|l4JqUX!8bffIwY?2@Fnb!4vdFFT6^}kRL`ziZtQ74-)q} z$AbOUm^udYj6^1LFnt$0qrsc>-e<>V20^4;M?PXgqGztxi0(4T(n^JpR!`n%SNr+Y zHw|>R_SA#wF&3L(eyO@AOpWm(i2^7$D;|*_Zolf_a@%^*j`0Gt|x+B-M0yd!DNzI<1^e_DtagLI$GN z{$%I!9sFnL!=~u!$XTzgd&2z?Q+R5xz4FQ1cI9W(d%dalpNDySUtA;XgNkt?yiwE5 z1l#s@L2WW&f?Glb6mguoOYXr@*&o4G9$~UOHph^*oR=W0+6jRBU3`7>@I=0$Z_l8G z(6l?w*1-1Nb(ljrxuP$GASi80j#XE<|H_ZS!MN2V1vXn zQZ7>`WjWrx$)|gR-6V{_S!c!f!mmnx}60qDTGbM^hqx#@|0a%X#{`**I2P zK5WFg5&L?oIl6h|9vBOOVLq>@o2Vs_;+$-5p@0r;(Z;z z!ue`r-~BYMj_p=hVL+Jd1Agyv>k$NKtvq&bgS)C}-C1gA{g|y@3_Un3s+7o6IvY+v z{Qfq8L3mmZEb6$*z20rdm?Y+`Y>LwS_%1~j&*Bn<73UJKOZ9ayIymrHx7B!pCWQ|+y)YWgPhb5do%Db1dk6oELwwzDm18X%RwCG7i137Ue}&UkMZR#en{uYKM760LV4J>; zact$+E5vh*!OpwY`oc{NlmBnu~sf7!+A7K=eNN}i2id%xMuqjfLc1$UC3qIm8VTm zt}7l7DBb2VpUXrxj;SOMrLJAaUmI{u2~lY^^4fA= zO~#|y8$>3&2pWi#^1~t23ueHK`@QJC zy~6Za3*t-q@Vv@`^-eBcmL@vu`Ny*RRDQ%lkke1o^7~&Zyyyfnu6tB;cgV8m zZyaXnjwxequeU>)WHXtH7h5q`<`i0mZVafdc)1rV5L!+jX$tMpN`hJo&Fb;5|K3Qx zt@=c?z7F^M1sMMP2q(NXV7b>!>E5rxB7I~-@r>1sl`Cs_92;WTS!HykJH9>V1Hs}i ztFaQx2K|O*;ZAE>)b5Ck`ogo=Yz6ISyx%)c?=k6ziZ+H8e`kTHO2T9USeifPFq}Gw z8*(Su6C3u5BH|wFa1>8AuJ0?IsxgLiA76yt^_f+mb1T?q?@8q4G;s+WP2#nEge|)I zc8Kg+Id)1wwgK9#+!~oGUXM2{6OhT7WIcFC`)`4O-_J|ov+E3h-*0iQ@dw|Wr=B^= z>`?Ots}XYDmaSRw7ahupYK$IxCUk|{Cx`WBCeL3ns%+_D;*o!yh=(0zNYU(Q+9G)F zd)tt`OZyVrj23lnB(=>0fF>DNZA;JDk~tZ`=*>0{Oy?1%nZpxk@`DcVNA#cEsCV9? z1(_+`NrBQv{2>?L*(ZVOyX?(bs<(Hhg`ZxdlyzU3CiDuk+VSvx!fKsFVs&G=_sQ6> zXqvcx*lPd5i%3m?7^^PQZOV3xpstpuwnPBcZt(22&p4S*VS0HKf8McjdpojgeT4O$2jk)T2tfrr#o2v*cHovW z>H!+9qO4?aC}^~sE)mqUo&l1YjTqzahTKyc;W0Y1xMWQSwyQSAE94q$cfDLfUrqys zGWyDt6=@2-^ZETvZ@B2ZzCo!&SsN^E=k+e6iEYbMzJUUU!Rko$EKs6kbj}#tTH!oCknaT^~qz7)i5$0t0<`JU&*BR_NY=|QSYexfJPiBXFPONbErA61 zal{b5HVs7lV$z<%%P#ZeR3yZklh1a~pDJbd2ZBl@sWA?sBwhUcV-ml8J<5_6=ytiw z2juclpB*9p<(CAGRiaXT5H8VUADb|EA@BGVuRryzD6_1ZMa`Q?^B1( zuAw-6&Kn0a!S@F$Ri8GkLiV+R{t*N$f$uLedi2Keh2^7BL+bm#6MFn92qpwWvq8%R z%Dgw7N8>db&v}uTCF=-+Xy31|RH>2_KMNJ@Z~J~@_szlC$v&6q`X`5!56$`8 z8l*`CA-vaAfi5fEn#HjM5GDUu(9TfK8GN_!Ioh16M!(iVF0WcDBwoIA@SX_}C|pe< z%<&-o(-|K0Ere*1fNO%M8J7FiuExK_&TB<<>Q~MWFg=5gY()wi<5HMBkJcB>mw@_* zTC=gz+KCyBfa`{PUh2J`+4TG5!uJbiG}C7*N}?D+W@E;ADz}oKDK&a+9DjI;7GZlK z`q{AbP1%A48GLi=NBInAZn=%eqN3MkrO*0bB!2@t(S;opqy4W;$N#yw`nOMKe9nU5 zOJ@GMhS{%2={D&ip_u4q|>=;9%Arx!Gv+6{1r zW%jaQ(GKS-F?Hr0YmhB*B%G8zv153R8{OxYwokVjzic;FbKz zX$!h!mS6D6jC%sWiV`Vu)*YE)pKT5(|IW`qg!roHvf_?lJALQ94&&T;l8S2~nr~>2fdq*?1ZmVTah7>iXnsm(_ zMZS#&O;3Xx--3a=j_vJ#%OA{N=vK>cf7+SI8$u(#!THAnTiOHS{szZvSL4kQR!2`(V58fUII8zK=WYd4RBo z(+AvkKnugbrahAy@O{it7+V71DtAV{>yPDw2t5k5`S&3)a+f7Ay%tA(YVg4wxmd33 z29=I$O*NU)=tty_M11D`ds0Qo6 z*TBAg3~WFB_Fq)Ljh`$5?PJ~Q7hI-Yk32-Y$)0)aW!XFxGqE0^$>FuP2%{XIZqulO zOjc9v<(R5TnI4V^Qq7T1bwA`}ZLg(m|Bg zyTUEsP65pYrlC)n!#!Jg1YOgK)LsjucH+0<2k2mNpPS{8Rwk5`DA33wf~35doP%Mn-L!i+JKLNrKLm9I}s#2n_Uax@%sfK1^1{hGoo+sTTQX{NLcu`&Q6}! zW2fvJw+00$*`c?Lwm8cLSFzvbW)Rlu)<8z`1=2j>P(O(66VRyM0H?O)WPRNt;`pQW z$o9%ZZvQX&F}y4}Us=zc&`?2_iSrV*ELkH8%GHqu*qGjpQ{+Lf0c4Zz0Nkfd+r57r zoBC+mU}ToL`D|SXSO`(&`D$n(fc0g&g-x#R`?B4P}F&B?c^5KUVc+GCN^%@4Ne~3O`5tO3u}}yH%{VTVw_r> zU*A5^yQCJZy*@*MwaF9$;Ui8-OCYsb$}UN*aXUQG;DBPA-)l` zV)8L@KfEi0P|hXHSG;@MOl?uTdqZHKs-fOs!*Syc`N7oLwL7Fqo&Hs;UZm$UKM2{} zeGV}TexC)9hRQqNPNq!kD^Lgy>YUip`3g{%x+j9j7m{5+l#-T)j0{hwd5RDoqo3mN zb$B4^X;RqgB`5c(S#$;adySo2JefX|r2@=l*p729=WoPElzDCInQy7Xys^+nacpcr zMvm`N7J%nt@;=v_y~0eX#&)3{O<52Q`)YJri)-YJ`=z zWtVJ$;2#YpzsW5uqLH(_CpzZc)MC-Y)3MQuX@ACIqD-6uoX>H2(uZA7*BiG-J1y$t z4eXog?43PE&?B{~0TyB@OraDV06v=Db>wCC0g7rfyWU?b0+P*e9QMs_wN=Q~qTZWd z01bU^EzS$u;z75mTouVOCq-V?rnU4=+me3*v2tIe7|yQ_2ql%)?ptq44vkp|xC$S5 zVJo*IHDQ^=r-z5=s94xJ9pW~P;N5aoJ;Y@>g~ys)koF*u%*fHFZv%7Nx;<>Vdb^Oy z;JSO#=c%Uw37~A%Hcw1w1{*}Pm7@Qz_O#$HSZ*ZZ{U3VG;ED8_g<91DhVjQ92!o(f ztr87YzRxu$Z9qY))0Q7-1pSDfDmBWaYI@n^X=^!|lVU;Cra5|X)Zn}}*n86!_Lxer z(QJ^xwnj-Xf4n0yb8#&fT47l4wZF_hX6ANqveQ;GTeN#me0OTsFZV!gY*TvNB#RzGoliRMP3LVqQ{S9&TSM)`@3rI)F0aN6Xx(ugD*`tdHoZFa14 z4gQ7v*{bc{H}G;Na)h6C-N2)+%HnmRcvlE}k}S&A7He&ND>Gja?_hhufY90qC}e$I zf2a^w;;@k!ZP6Z1^NMM7@FIuyu!=h+(`b7JViqWOpA5B7<1NE>JYj%%o#?@3U^6~E zU9>OMlfbLv+~)@fq{T0mg46lFtGd^?9Kv)e3VNVIN@q$hw$>^3)C2HnZm%Nt!5S_0 zc(*t1vsS5gKx#)Xrkud%cuGbG2ELegQe=Li2F_(vYL9&QMGZZ&t71HAO~ixF7s7in z4>O7XTG14j;=|JE9-}nkFp)C{7LQ*au5*F=gxjr0=O+|;^$37K<+iafo@T#qO`fKj z*&H7(O8z)pOiLNSKlTl)oK)d4>~GcRZ^e`I7IshCBZ2gmW4tEev~f~}n#UePDOK3O zDzO^irY3Eh&-N&yYn;+gmWpc*)<-0AlvC{dZ%hgklBF+QDmDTMYGgD?_@j$X*cx0n zqdpMlc9iJX%y#UK#s7wDwMZPEP*s3=Ix~2+Hps}JUtj5sswE%FaZIHh8)1%slOU&8 zM%d+kNg5#B%AICg#6s$h?l?_rZl5OfR#6j+ZMTT!DkeXXmJTPU1DIg09`#|o49+TB zmI^cFC{Osvou-R1kUwDf$Yg}JQ} ztv3VOyH@}_p#w^LrS`?v7@dX#s(27oBa3?O8N4}W2?m2zN6a#x_BYRh?K=Z^GqZv8 zzL{^gfK4yY_tV)jH`)1jq5%)F4U<%w75T}#hqj?Ubd$IZ1yzza2~zf>oc)%lk(>*4wrm4 zH+;APx;fFb=na;UAs;R}iIIDEP(a|UQ8`ISaa3n(KT?nl_NnjBQw4eSWifkh)m`lA z^*!~AAaFQ-zP|Ir9c^cIIhul5n%ZR*PCZ`1;NH6k$gWhL8N7^eL6j0d&{ zpS1vjnz2D~eW2xhg85Qk1dz6}^DnM#!Yd+ON|=?CEA5ZA$1Ak$67O_hp2kpVIV3y! zIDyfAX3ABhJnWt(N&vK6WW%*jKq=<<;y@~og5OSdv?zg5VYw#?P{-DBO7{28daTN> z+D^K4H8-I)&+jrSS~Nz>lUB6!Byc6voDRw4f9R7*!%U!O0(9W^&Ij*t*$-Zqn{=AQ zw^2+DZbPV=(r0Dva~M>{!%#h+>6%D*=!JAX0D3KB$o|NzV?nW^ zF&BqpI_GPhVD(7oT@r|`si^$8&6NWCRkX@60^ zdKAy0U%eJk*{ewn-cTLOKyiDNn>d}Hc>s7yk4KDznfEX?`#IWkZ+NIG%jY7TAv9sJ z$QwQR1sg8f97jNXto9EC3&A=yYiW)xHZ03t4e&y^uU4GXyz@mBpUKlB6Z(|*Ma1hTh@nr%QS|ER>Mj=NX#>uGhfIYv65Id!wilbE=1@393F z%go!^^R=I4AsR3P&G$E{EM~P5xREWxavr@&20)+3a1O!Xuym#jnwGQ0NvJiQo`|P7 zvL%Bhjt3HDKK@pXX)Q@7m9fgBO?vjHXYo+sV2-VjPLpSG3=0U%fdlt`e06&?P;PoC zjOVYcrw2$}Gwez4aS@IkDg<1^>v`EXZT9LQg_H`nK*EMsYVarYN2fi)C%)Wl2AfK> z2;h3AoY1GY2GZVzar9@j$;bfWa~UENfF!@k*~f67Q@^s!nKyRu^HZ@-Y3>gdx9QLK z;p|RZ$w*o3X&E_?h^K8u&tiYQ7?tW%({*QE*e0FA^L?hs9Bp#pV`A>clf;G-pi9YV zXO0dTA7=H;>isPwg1_}P$j!~0S!|nZlel2JQmQV5upe7TCv|{fosOHtwy?Bzda8j?KSCdm)ML~o+$!xSPXR5 z^VyzCNQY6Hj`0Tf*MVdjrmC#(N87DRto5f?=#QP;xNJEgb4B^;yP;Lfj6Jx=UQm#6 zEvi#|t`%u{4ibtFC{*cy^Vu5kfH;Ta0TF?004{uT*>S;#6zvpSZEMKW=Ca=j^!#Mk z6PT?on*Ghc{g-FL#4#>trBcf9;^$lk>VU31{%nw_mAci{cir)@T4vQFn<2Sx&dLG9 zW;0km(eKtM34uO}N&#bDdrOmhREH4{ze6IcgpPoUvMOyWLzk*=7su}`{huQlr-&Eb zqWvalsr&#K@ltK}3RL(^^*1Z>k1kEWM)x%|r-;U$A z9_DYx>DzUn^j-Fg&oS!iYyLmIjrduIpx&ALE`!n6<0x8hy$=-ewUQ=vRHrd%ZJ3W zwRG_uhM`5%z-@o~rGLJ2B7E_=u%PPW(say4Zs*)1JX64>#Hq{ zH5W=B-d=?%z>yrjd4G_g+YKb`AlUM#=-*MY@pJ#ii; zf0bPk?yr;*-1EIm4kfF6aqs+^;lWhcw}2b>1ARdpE0K4~(%N|UXGSl&972nz4mM)1qmIhisjC0_K& z32jW(>Bg}evH&U+sPK`6&L0XuA^o4FM$u>0-T_nfjA~nvLD_&5_m5U-^lt9PteZGc zZ@E)&0$`1DqXn9BvtIkCrxKp|FXx}iCGmwe%!W}lm@F0WUN+(8YgMV`?Yp=gyxMUS z4!3iCTC@PuZ!j1|A2nQ_{l%(T@Mhbk1L9r+0}dkc=9dQ4gPixe#8?3t>yb@d>Cp-d zeR_!C^7&#sUS%D=2#0F3il`d=`W~`U<)fbfoWuo)Jb5*m)ZdX8y{ePG`z@Rks0h2R zOaCQ?`ip}8&l)8vympDozX(s<_K{2B(mCem(&e$=Er;ZttufOG*j)fM<)v?(>U*6L zgwqhOkvabuK8ToAQqf7K%}ax?zv+aFzJZ<%dU$SuVsLQ)sjhRjV5K0_CQlf{^7 zqvZRNMQ!_bqZA*AO(b%>s+AOUU1xfC<6h~c&3L`OCZEQZ&x^oB|Cs- zzR(Dbe6{yevHCPz2OzSO%?BRCIA+y$lXSVtDRI~K&1K&otl5BHq4vj0=;aI$Qxd~j z&TDu1h%WU2Txk~#=_O^B71euFAeY4i!78uRT=OO1T(7R&HtC-r$Tyt8=JP#uxLI7L z+}y!deYd-~sBU&XunK#TS!SD+3LMb;msL&6Ic-Uw?Tj( z-QK+};T_m;3EQU&J7HdMW&%j)q5&{wSLx!_q6}6N(|&DcR3kbjzsrm9-m8s0Nvw)e zCekqlf-Z2II*!t6jH@V7ljDhlRK1>4_`3f(yL&}qjRP=>rA%+ULkqg6cmT2UtcY{W zBj7!YYR`od8+H2rtvxZE#4{HFU|1m{I@}Rg^27$=HTPnH}sxMYeXe zQfo9gkn$fj3&lG4*%o4QU_*oF56gR!;7b6 zca=-odoqQ0ys8W~7WOHIhjtwFm8AR?&fx3r?j8TonX z>j7E%@(2>B1<#!H|f^uoC}j+IjLBOrb^Je3%sqp`<~mJEeCMhCr6ZuG?bD#qpb!v z`HN{)(hM8AT7z)eA2tQOeIPb^PJzm27N;9C6)NoDChWO0wz7B3&)k=DK=>W?H|*`j+!4=j3M=3Yr+S)K%jp;ZMekN&^|Jw2l{o7$rofoUnf{f96NN zC>0&)WdB^5K!0h80$g)2-AJn32k}Sp%TNeCg6F~Rl+7LlNsylist&Cp%LgzJLwo&^ zVp^XZNUf&g#*3uT$ZHx!8B%a0ZVGV3uWR*~%O$6NE{TssVvihZYNpH8R6_DHg&dY@ z;1bfK&`xtYS#bJwW4E5;;N_^x>PN()=*T?@Q?K_1rS8c*H9^>I{|EK0LQgp>dr)R>JID}6CxNBC*4Ok=6?WSUc>%@@44 zxhbDHq(T{Z}~Nqg5bfWdbbpkv5w zRT2O{e(K=~mE6&Q&*r~a)?fH^aC4;3YxVOi|1`dW3gz?Ec%$@J-|>g~gZBpxuInxM zQOrc>l&4na1@6SkGnfp_!u@X@`molYV{wb2;ZPrcnjCb!z!~fl&aTGO-@C>Ot93@b zhU!<_KNN@?EpF+qP;KNz2{&Hr2NHFvRRz(dJtbq8IG;7uGm!ngF9=!EkQcQQL1j7T zJSUGm=MJ@Ha5(2lo!0UIM;bZ5e7 zPruu_o&abupE2SJ-&SIOR!>XzI^Dxmm!qkOQg?ialjV}I&#Rfbd&4a~kFy^d=DdoA z^wcS-;t_(=W)^mZVGdH{fEk#y=rx3~nJ5`$Y;nNp7}mR+9D&{U0gJGe@GhOO`;257 z+;VZ0Zo361vqYMB*QO3bt9x|B3Gj_X=k!iZ#yah^CIKY`iO;{jjC^nZ9*B28{K!_1 z;je|*5EKr_27DfMm1R@C%1WfXto%=<&565^+@ybw8Nk%cU^WckFS}y6gRY6#-}tJ9 z1Ts4p^pF-iy?Kg@A*Jn%;2R@eOYwmr4;B}oQKE-R4XP=toL71iUtaevwlCkLt@6QM z{A^d~rm9|mm$J83+o4fzH7>%|aA+E(Vwh$38JX2jr$-vkf@lj_*-J}Uzt!W+UtM}3 z0)6|$RwdlTc_^vKT|uBzxCYWPFA0oBOPaebt)&QM$K*&4iO~IYOk|#bmZr4rWJ{y@ zSihIu7G`B^;Y?5;5d6@m1u)-$Z5$w@g$DIw#0R@XgxugGFLSB(V65WSQomI!VB+0Y zNhpsN_SlWqZd9$avddpd+!JNe#Nqj^xg8um(2%t}@Y;!W7RYXFD5?N0dD-tmQ(FM? z6(K0LZtgS1!9>kdhs4FlD`tI3T>7<*CSC>x>_Fi(RY|@F#t-Xfz43g+r3J=%ezJb3 z%M%U}$}3e5z=}KPwmM;5YM3fcv-p5{@B4+wc4_BatziN3^+erh?#=lYGbqtDiNi|k zyLOKOe#W+^CQ@2)fgxmxo>&0s>j)LAUszagk_O?6Uc6yUE9~|0nsDZM$0u)8pGaW6 zGgozSCuwv4id>~wRUol#9jiZ;BI-6t>(d-(dq!7IT?0LfqXIh2k0>C)Ri2N3ygF+VAWj zhqr1KEb{OOv^GFuD;*MEA zHCTa79LYg;xyQ~WJ1;KjLj^K`UPS>JpCWI#_c}Y+(I(%5FS%z#Z`e~%u$c+NIDj%q zK2i>vL9a`|i@RYpjW!rS30gxB2>#>jHDs10rrhCvjcIKz-T%C8?ZR!v4C*F$M^^K> zPIC7OjE^F=%nsxxgy3Zbb|2tx_3C?Q^lneT+%CzndZiMz>c$`!#uzO}^fiWdEx z29T!i;eFNKUQ8x-C=wgW zF?ww1#y-K(JO`J^=*<{2Ep*9ZCN!-{l|NOwqohmxs-4|lE-n@8I&h>umX$g9yybm} z_jr+L?3)xw10Lidld@X!UVe|D#q}@-G20c#BF?o)Ru5+}(J$Tf^#%7&z)?Utb}fmB z$;(kJdl)xSOMNeK8SOjQf7|)uW0LfDQHq>h2{)rNC_l)IeG2E6&hyTDG*Oxpa>H05 z&x^avygVv{MYrP_|K8ngoFqKnVcKJ1WzqX6w^!{7!Ks zP%rs>L*ZjyQRS<_vz>kShtV{=2Df|Xo@z+U^yxEEsYr~pXR&h{Vp%*Gl2Obl?H43TfCSQ)^h6C`3EM|RB`UfTXBO4+bZ|hpzt`8Mt-CVE4;!CWiS{e zL-)!J`|jHywIG=B>*lm+3zPR`m@o{1Nef*P?b{iD9jPHORkdkH-Q+gqC7gQ4Hp)Cf zL4_b&=u$AsX|$-gb}&jwhrjTx!aIcrzeQktd#FehNHfb+fpBoXOzI{+1EN#SL#ms|_7X^uYG38hcqs+@EW!yVd8C4nEq8N2#Pv>YLB%TM-os+{f0UY94s;N$#_a&ix`b(ZC@R`MLv zue^-~WC&N39aNV10>Y+pxJFgK7R~2GJT`S`(9j+^wnWW-QRDcwActR>h2=x?5#?mj zz9hrDjfrQ~k2)P5klP{xg4BA^KMHj#x=b}7%bE`S*aNDTHMFT2%tJ?FR(TQ)q)jQo zO>EhVSG7Jp;>mtGeUKNg|KdZC_wgXU%iFhJW6$Hgu8h!RC#>6#t)jEi4;VJDz5cPD z^mDTQj=zy4fMU*4vShN{WZ`2OrLM~CEZmCjq?oSIH}jxIPeG(Ed2c5rAHPU(uBmnV zamS(l1?C5t+SJ3t8_|-@S;AkO4m%=Pg)TKw`yGiSPNGwL2EZ7jel$fmfY(l7Z=4Z8 z+V@5Bu${H;O&HDIsD76X!s4@?_2!wVSYw)~LfX|*tYz-JyQxxyE7>c((2f1x{?q_- zkEtuBdSLqMEi>#R%nZ%vpf76M$PiD73ufarr9};JV|U0R)&sgk-rLNBg5+|R(c`rWe(b*OCz5DbY!tt6TL7Vj&{z|qhg&vu_i$sLT5f5WdfN>UbQ8P}vfEi{EI-s= z(RH9USW!b(7QR=TUa{7kFjQ0mz#T%lBdP}oaboHm@@49EXa`w~pc}aI2J= zj#Oc^iie*;s(p#YtK%LAp&|OtL;vznfsNw_I+aHoes0c!$=I>;El?>ErPzjniIE2O z#jlXmRf?Aknh;2o4lOmwKrYfbfu%XShumr@?3sQe$QOSP*%(zZdeAqF>SOw$>Yg%E zF*u@}a8Vw2$Du;W2GjE_$6IeD(n@p(W;iT0Lw1Q1K}_MuP)}QO97F-5XB0)v2yp|k z?aOT_Los;e^YR?i0XTVmahe*%;a=>Refpbf;A0Nko26atid7A_g8mlwtDOm# z_pnYjTlleuwF`D$B@B0dT45Bv4t*kW>wLnW1b4$L7%7}o_wKZA8nv(S^Y05Qh5`*t23f&#>?M=8|VSid-lmbPZn1z5Zv7kV9BvvdRY44dR>G@+y zUQRCPckme6xy<5bc?u99Rd4*B%tju&q^bOSvK*vIl7#IR;Bpu9liXRXLTT7z7e#7! zFa&M!C^wD6(*BsBMa5m~L%HG$EiNgZXuZoFWs=S7^x-7NV*2gdjDN3BAG&s|9AB+kuM!bC{!SQ5zacI*{8Z zc&DLX{L$>hXU+P_EC)|$a7+)c8in^)hlu2vBu9SCNM4n3x2KtT7Yn9R*Y1-NZdhsV zhiWqRd5$Atdc{p9&l>>gU$`Yy-)Q(4*->BZaAn*C$`(@fYK+^A@wWMVr20_N)g}t^ zC+1SZaS58L06$QHgl6HwtL@%$7>Yxb^JPZfL|UO@^fyuYLyVFiqfXp`oRRo^Aa>6q zTGBYf>5{-~x^PE%wo{#Icx%zMEGD{+jpM#7R^FW#AEuPuZ`vd?TBjDUt1Jj??^do6 zFp64^=3z!45Zsf*1~7u$+ol0t4;GuObIZqIKjV=Ap&{OW&8k-~JXYt}RqtlXS|yxS zB&`nGb%7))DACCeRt4RdE}+(9N(hJ#z;`ISm2?dF@PEbITfMYosNixy}vlp#l}V8fczNB7#I3K9=L; z;PF78Y#eL3e762zOiV~WV#)pRk*{PcB$JZU(BN@dOoXIz+Hj2>gZ}I^;TR}z<`XpG ztOMM`5PR2&R)8TQ^cG1Ox8Y{F3Tl4DcwlWO=YT86_RY*OYS`tA!n=dQa~BFerng}G zv(yh&Q#_pGpwwt5o9EDXA2K?`j z$OS!K>9w*sr18Lcg((h;*{SJpZK}vIn-3>Ys~&dKnHv8HhR$5%{&{fKlS>SkC}U^W@Go z12?3b@W2bR!(;jUy~rg&&0IW;NBo(u*0ds)1+d5*Z~|uUJoM|P%d$BFrIP(2cH;19 zp{^hr2|F^1=8QI(jC!3xRCQJ=;#sV6|BLYf--`9voXVk?1nut2z4J=c&uSnx?6pN+ z{!q9?E>du1IWS?j&+`SB z=CT|%Z)I$K0SE4mW71{y0kskC5U(`W8d)cwVV|9?mQ(32b$~Xi*NwaFb$axhqztz! zyT``GNmQstqNwLQ_ErVeBIFdQGTS`(*+ID)1t<@LhR#~>V(yYF z!P@gm5nggiHz9Ha!byQ^G;>MFof|XirHG_k`9w|r5{bHRDxi|R1NX*Nw?g{MC<^&C zdyZ4A4pqHX$c&lz8WVLiTHrXXW1AN7--{LQ@dHITW{a(pK-mT{P=xSxmgN-*33Ip+ zk$^42UMPnwgz1?nlyQ`Ob1=wp6Y zzd)4Nl`PAHQ2M*{!D`C!0`Knn8#Q1yL9Yk}Z2T+y{6mS&{5e+N(a3T%-Ju6&@eaPu zu~yKSff1?n@E?^3C6%BWIp(C`AYr02tjaSZtK{D%uKR%YNxrqScW`WH{58b4s%c3I zL^8jh55QeJXz`H^81El4N#_`|Jhqe4yR}(-K#z!a8buD_jC8*$X8$u)Z`=T?VLkIY zSGBXVO5>uLEX%-2?6l|d0B&p?w-|@pJNfzd(Am!)nYW3C7(fFL#O!yfZp7iJWIB$? z9#W9Z8{=VsikeDUT;hX2hWGO}z6DerkIvkuxVyJ;H!@P(KtR|ni8f-EMx-~PCHD&O z7Df1n@Q>ecIp#1w*CTkss(Vq=mH#^S@3+F&S-Mi#Gu4Oy?^j`cdN7Aa`t1@+4v^IL zbtE%yp3iI#4a!l@{AXJKqk?9Pl<=lcnty@t{*O@mNM0*mruzSWWe55 z|5k(lnlu0V?UFH)Ze*w`MO3?{6pp+k=ciXvfR##3>^oAeuxSP2)O3B zn8b-M0?6G9E z7tj9`G&3Q-=oa((6KN9noZri~_zXdfE6Xkhz=k%q1H>_LbZGDVbMh$h@e18QlNppv z!>8ncKA;#fQ~af5+b?utN&*_3+R|xB0hC zCEb7tpXVj@e+!~KX*Wd`sedYof33c7eL@mu<5;dvxBtE_PZlht3^2m`-;A~X_3hK8 z46yqEB3e+iXZ+vS5IzRxoAU0&rPHDQYd1_h1TJx|{+M+3e_wO|gzQg7+4=vnmz=2p zDd`v8SUP20``60>qRH^9;{EL|;pgxp#GK_vws>#YW84D+0?#Q@oNEnI^2}DW|Ewwz z$orJ@){!y$AMZDz#QYNOJN|anNim5W7xYM`3R>P^M_I1hQEmUTLn!4nmOoF9xzPq=*Qi}(8D$T|jE zt!|*c>is%An2qrxR-j8A4G)}$9E1OJ&S$kw8ggAWJtfgirw#}RRPbK9#PtEemCXGB zoLj0U^oNQgQS(J?h~EzVC(jSmH3XQdQ2P-#`1+C4%k*%)_?`iBX?6c&vPi0c&96tw z_Dg)erOHi&=ci}y52dr$-BV}bycq&APq_Ll;Lgef^BZCs^FO~&cndgh&ZIv(EiEG9 z!Xsxqk4U|(A3;nb@a(x^vS`q4zDx@BXTq#Y+cO?y#+yH~ku$9TegR-?Rw%I?wXCIp z_akTS{@DVQPz6}}1Ggrqe$8RH6Tb7jFH%+RT2(Lqvv7f7;)@^0URS(^_BVY4DCRj0 zy{cJY9syxtvBEP+H@%M!VqyNMB}Y^o`5*J~h7A~W+T#+|(+RO6oGyFF_Ix&ni*K0j zcEG@cJ9$AYIciGRz_V`h3LIgM68|z|EC4Ptd8L4bUlIXTor?njHF!mktXbba2S1X- z`&P{RLT0+~1J6J8X_Fi<$rL0^zsBYJ<;tr!XC#{mma1S>#G*q@+R)pMay<9AOgAL> z_|#`o1myq->n)&pWu&P}Vn^D2%uP*PV&Np?kBJrTpJC9Z{7y*x zF|-X;r>Kw@_QvrWkYeX!J>M3ecApvm=ixa+@-+)9P%C2?9z!su@Vt9n>a>HJgT=&_ zEr2PhC+ABNcjN129aw8TFwNfKW4)XZpC;2nEZjZZ;y)R2A=SGn9V20yGYUMrC-Nahd3VmjL*R;A@#H>rgu1_t6{2!p3vn7 zVldO$|E5w@__Uk-6oC0qR4TW8n*AtU+QK|@kP!2HroFg_*V5xj=PnT;$c56jcn+8<1 z{F!qvfT!kD@Cd}PU)2LE&lM6;ib2D1w<@QHnrsiDZ7L;pIK1k^?>u-OkpaKZPQoaM@sJp1e? zw+vN*%%tW8#$z?hktzeA5wW!_#kJDi^Ns?(6>%_+8F2{YDoeQd8%eTI}?S& z>W;fy6Tg14h3Po!dYzu`fA4YQqB9Sp_@S}&r#@Ib%Pb*&nQMtyO_Q3M?Ol`7o{6@8 zOEYmUKF+RsB+72i^I)qhOTlRV2kF8>H5F^NMXBacZvUBy3a46qi7spSrx##1Oh~0k z{kb>$UciP`c73QAefxTH?M}Ee+T|lonfc}N$T@X@tl3+ikK)(t{=Mj!JMgKFE_`Dn zbW2@5VIa7dPxOI?=anXt)LFm~Ouur^ebxB=1s1!0fBdPL;M-rC2^e0a*$lEAnUdW~ z+>xz7@|n%w_dy1Qs>DTA*@Sy7fARyUZxsSBUKt1ZF4t0&ioqR&p#O3`M&+pdXt5X z4a^?j+~m8b@`v68frxHp^q>G${`ZY2P4PFQIp3`Y0D2tY?+N{N+WZ{C-_7X0DxVI> z5kGf%giL!WeO1huU8Vg$hV!%IsL2bq)4PlM?yG0dKgpEU_Bu0}``<=G!o1;e`buxp zn>uR~l#a_sOIADnKN&6jzSPO0W}Whc3~p)v%_a&McVQN4b|4-GMha5Ba!b7mq{=>f zOa)^0lbSXB#RcGpFBg{FGs{>f95!A2AS1^6I$*6R$tVY-zdCv;-ZIcDdtNdo&TR8o zlxpIaIS+8%lg@W1k^u)sN6v{ufqb_B^p|GF0H25K+5w*a@LQCtXYfxMUjeJ8uK->xp{SlT&X@FmI`fSP$89Yo;KHLrP3&ZPACJMFE4Md^-5+I0#2sHOifBCMCEG4w~X6eAhmTqLc?)Z6+SI*~djL8gqhn?S)tS!1G_lvgj??2C5 z13e`cR#*BbPIbc1@BOpn{$x%GF;9ckc-(7w`%}?h1|HRJ4A4*&c# zt4#RjGHAREX%{bO6?$8DFHj2?tM&O7H_Czsu%`#Cyfi%nLPDaYaL3)wPRQ(7d7w8Q z!MbzjkL<0PI-|9fm<&9mLVSD8eK`XCw zpH}wwU1_CkKh|yhFhA*dS|Gd(H>`R#S-!$7g{u=y!cyE?J)t^I!d;tP{uAjjP&vysl2JRs`r%7Gf65R?ur@9 zY0JnM7dqTal4}2{e=08DE7LX(QovQ2mEX8FVWBFRNns)*FN6Ka(pp4S{V7mB-F_k= zEb<)EpuL`0&kBKgQVxP3rvzP zrL6SdfBJOGC4T9fFucG~YR1z+-|Z0jV%K2pbQ4n?O@z?DEn8h0Eq@zS>DKA5O`cgUE+B;%m59qMr0!1ee zSd|Ws%gM@iwO)x8epo_wRP@#C=Y}GA@*HFJ@e6XNh1}OR=Ft5d8)fkWWvlK%y?N3VmV`6Hv{5!w<*q?`fU(|iMN0^*(Mp=@49&2ki~ zK};=2&AWGI1FbhuVMAYy!yYMvvV4*;b74~59TB{pp$vy*-;tWRs@%xI%tSrc7aU@x z`coI29p8Gz@h@@H&)=)B=*Ug=Vza9koZ{k+B$i&UD-BfWbc8H@ZbOqnQKMSzfyBP1 zJ+82oXSx-2x{O}?*C9UbduzCkPZ7M`YEqD`Nw`U@zU9~&&(r*b+-tw$>XcPY{j4`$ zK(%}!seOWiAgb5j#9d#nr8%zq_WrT4N-{z3Etvs2vnxfNYn6gAA1>9~Rn?Md#9zJ3 z;zs@xs|7{Z?~9RssBQj{GQEFR^Ha29FvmH z(eUJjOu4X#O1M`$muuS5;F~rA;`5ie5vu~QrK;D|^|l>i-HuGD>_qYv))SoI!yUED z;sZ_qaby=UbJCi+2h^u)<9ok55rrd|Tv0hiT*UzFWceYHf==KG5HAv(C~iQ{;2&7F zLf4&vI9F5y_$(V4BZ5@)$9G)M?ugT`NF`;OPpEjun^|86fePMX_R~1!uj5sY`2pTr zZruuzqX;FQ`ftgS!-4h-JRjv4k=F}*N$~p@Y10PEWCbm!pI=$gI0U-=d_le|ll2|m z;c&Bg`dN?T!FzGrp`|CUF2@g^<6ad{M$z7Qr6r&6NxwD*T40t6zOMswNbM2I3%YRa z-fVf#!N!ngOS=3uP zfOt(|u`k)=4SU#x4Hh-={UetakD1fV&ke{d==&g%|GLd%O9nQc(!7t#a3FQ`ONM`c zdcypGBlzXZmwK(Fm%6Hl&G^!VdjJ--)y19uSSd(P(m@QYHIyOO>MA1Gm*6NqabVCu zWLNOnD>SU|;~1Pnhb+wzNw&7K-QVb#TGO~U`I#e!yM&${Gmz$&bI<*F^ZL3qcar$) zA)2G1LbZ00evG`kcgYVCaW|cRG#D7XB&{XLBzJAY*yzcP{D@ z+}F4Pd)>aaE+VqB@hcydM*<{-9|^7;PRE;I^TiDj29CDp9o)vFfZ-C_RJ$FZi`C7B!w1Uef5GQApB*Vyt&MRW3 zGRd!XZl%2;sBaNxQW7n@Qi0HHC-84=)$c6DhtN9%IciJo3A0W8QiUau(R|IsCeZ6z z>$x_SoI9^Gw04)FQ*MWIKvU=#1?t>@F^Yjjpmgw%PD85MLR4`Upo@zJlz4HeI_JmL z0#uz9>CN2Hs^S`c3%Fg?cTsfdx>k_Pkx=3HS#9fT>xrRtP7gF29U?ftsAeb`2Xsiu zS-fv|&_j4rH6?gkl-=d#uXF>miLXSyhC$4P5)f}OO{M>n&*C2IU!uwVvD;Qv0mH2e zspTiDvOCJkeJ{385wC`+Exz!C0QgbK_~T^(?ZfYUkb@MaV;CV?j}fs0u!Gpii8~^< z(#*TkCraT?D`yIH(X|SX1r!|1n*VAit}aIhw7=}ftcs=IKg;b76!~%u{^+oD0SFu7 z1w8;FjsZEvi&CUcP+bU?>WG*pdJ5!9TV;Mlp0UmZ}M$ywdfsgU>(v4T=>K26_I9sz)A^(3%|1zZdXOD z_nrUot=Om?V0dZe%{d=ep2%1Q?kZ4A>6*~DQo#o0^799%sGdS6^?u%5h71069{Yu}~m;zLDA=wbr&TgybA8+{m_AJ={;CxtFh1<&+ zixWk=w>E|H+;La)nwzwG4Pu&cx2iydIM^I!VSlu4t8k{TtHWtHoregsD>C1{-57XB^olUAh&9w5W1TJjzbj06uGdwrqNs3yqT`1 z)?fh4K$#1xXr`)L&%UHhK~ueL9dk7jpH;mIRHz)5o&lpJ_`f!ZAvUJ$R$wYlQNF~4 zfWOlodT)RzffUj5WQa?I-m2cuTW}%LLbMA+KvFjpFyJRGBr4(FQn1_{W;(%T zH<$WUW#7fJH~cy+O%@HTmm7eLNvZtjc$Q)J+q4!^n4MCF==7soHl*4VbmC21KE)Oq zFKhCHcbY4HPV6fk=8X5WHsL#4S8jjFq)Kucl3@An=pBD|w^tb68CM)Rt#1db(Km2+ zt2@%~)Nhg*Cp+G3H@ryiac?z@pP%9p`3@T0h`T^kPR|hXdoY)o2A{UZs6coC=*h|5 zFV6p!vG(#08+b$HNTG&qUt?O;v8`#_b`jcd>3eMMmyn zx0i;p+u=@piI{0vZI45SBT{|6orvsm!po*B6D>p zAu7d0X<>jL2v|cE?_$NA+(AW+Skb@_!5_1kJm=R@86_r>dxEkjzSYqU?@V2~4|JZJ zt@bYUfLWqg3VL1d?quATgkBX&aOUvF9%r;h;ts7c>#&Eq>kV}9$&O^z{%=4_$%S`U zWD`ZnwB%RD&;g87vcvvnv$;?5J|ZUsr+&s+e337yR!h3yO=CRoXWyA-O*$$K?B?mO3O zpD~rhZBFXiPuYa@xP19m^9Wn9omAi8v8a3ZwDRaS8+ zN!!29WJSngrX+cKc#Fr)eta&^Ia9cW;@Um)#>Cv3mzQ})=&`w~t?@e<5BWy&iJNxN zl_o*8-xLfJW*C6(fQmWF$s9mWBuUYsS_7EQP^R3KJWsdQo%N4vznLcYjC{`%QI=Ke z_JpkI*J7jdG|a~P1c~~O$gZ%9QL30}6g=BX-0Rd-e(^qsTc>3HLG6>&I;fUT;foex zwx*35eU-44>Lr7+VSuS7!nAi?zT)5}fJSkFjXs(2_Kwic@jaKT)*rn~>{;uat3e1* zz_f+dy|NtU<(tSI@5Fl;9!vuTG|(f)|XmP16@}00UO$rn{EyAY6pW( zl_c(8lgNb#yemxgSht`BT52T&4I|F7VTRA9UjpBiC+n3-yPB$+3bqX;tzXu%+NYlnnTs)dq|Kys_3Ih!?DigQL9KIO2@2) zvr1Ai&3=bOG$462-oEuG;b#Df5y>ySX;j6K5BEF2)f3ex(gCZgtsM}*H8eau-B8@N zyIvQ?26WVUx5ISriI={IyMVRgis|U_T_g6cl=>s%o^D>_r?D1$+rw)4CW|X}-Qmrc zY)~`)){xSiF+iNc_xcziP-#N+o7wp0W_UV{Y~TGdJGf?BFs)6hwacOiO6M58*?u(P zDCD-iSX`Q!nVHmjkztLYCE$Z-eUE;p^0ccFxvt~5L(14S1nlU=whYc_-MW9xsjOun z-EZ2Y?;B9zxu)!1Z`JG50QTq{FV5`k@Ub*N#ydV#(amapbH%__la!_dWt-pX50s?Z zrjRVN#cC;HWgZ1xcA08=Ynvv#T<%lCFWn3aj32NmC)s>>CAu6^?O>)b*VZXVY+g?& z!Am}Kk6RQlkcZ;j>BoxnhZ#%lQYQ_i_}^sd*LuKg!0h%9Q)eJNK#z%n?UCccO0zCM z3dz~0G&->sD+B4t)v94Pyk^;dhRcohKz?UM4}DCD>~CgQ^ek+GNfHV!4WM=?oKQEQ zdJaz$7;tE~w?$FpC>J6i;JLuI+5Le@a^WYDyi$giL-{joyLLrJvIiC63*C(|5B%RG zGIE%o?Fs#77nEfCpkYVnlYW$&R#i3ZkQIi?59AJY$8Z~IEbg!G6}P(wHXI9*(-b`0 zl=2R;9((~7#ie>X2VA~BMt|tOZ7`MWBD7f>GGh#Nd~hg`bRl*ucXn7YTcdngaFG}% zD3prpOBPeH@nZ#2mFG4K@@A*v8Qbg86(x}I@n;j3?_XOPCJ1$+b816LuZr#6Jc>#x z%2n%5_V#)$W;`jv1Z%VvGwzr(-Dv_%6dUB0 zu|oFVwL+Z@tl4F}Ft4xQ^0p?Hy0cx2w|fXKldtt!)>{a}(-^)tTI7@oaEiZ8N{w=M*dOjs^jG1D z%v?MI$i2vnw=YNXA=brAfTHhC#*WS9k$!&|P>^lt&aV?TSJKT>Y~f$ju&h|Sw>igd zTtlm0GZK6w@PvZ3rnpAo?x?=N!v!@qiKPN-P zs%mt#wpzbPDkjb`)nTzGmUftZd<6qQ3Qb40F9KXS@U`CEvtktvepf_4><=1XveB6m zS+YX`0dS}trQmU++54`Wi+Jq)V{_jd7Hil-ZO4piR)>`4EsP-C?mF{fcm>BMaz#J6a=CQe$&Jt%N@rVVhTnJS?@q-iu zss>R4Fbqui$L*xE|6bUmeUmbZiX`Q1zHl6`pkWPHeD0JW-0Mr?Ow<@1F3N7smD0Ewa+jVxTvPidjYXLBl>BTqnv~c62bLuB%caE!` z`!&%7>99tbz>7ezH04OyN(VqL9%Rvo%Ud0RVp3gZ-1eS1-5>%g7`{C_`&XRVm!I-Z z`FYPoVhS0nYpyhkd&2#&zJAGR6SF&;@bqVchXg6SDg<*AJErRd3`~3!cUHr5-;;kd zU9?z^$ZVTT98Ouu7AO=Rb~+-@xwlr@f1W?{zHawC2PA&5uw_lf3dW;0MDljJx?WDz zeVsqtNF|>aOtN65F?W9(V|C;Y2OssP~`f2+ei55TsPd@SkQ z6Lisn5j2=!FbTA?H0@i8I*))WrQ8*UXnF1y<}nFDVUU?mUWYgX0S+Fi(&i7k^upl> zWS0}M-VV@njxQ(LagnJi%xMq&D=KWi1R`;g(wSKL)A0#OGuJAGG*>^T7LLNSnjX@;!)Lxh>&ZVZxYVKIqO`> z^bKktP=C*8stBwFl_n6Rdv*_TcL-p7H*%gCWNlB~ze+kfJzP*+ zyq>PlC&iJLF=cTEV2UwK9PBU=cQG1%SHHO^wuNAY5nQNqTl1m#%^d<<0&`Z%p$VzP ze=1jvAA#f;WmfZl2!3vY*PVBcw00;IdnGqK*171>_pHt}51fI8xtLByL>)RF8ZPE6 z*AWHHU)YXv)3F)+L9w={(W8O;3x8OohHd0C%>aMYO`UPE1udzP-q@cqV{T8-G!*>I zzKDiCCcYByudP{hY+yO_1U4fuMpfcYaF`F#QuI3v6i$-}n}a$vPEpagXC4%p2C|w7 za)%7J)os0jK44i9KE9p`u9)WLDO^GJEnY4Ljuiz$tr%AmhrQtp1ad4;HEzH^8I+N}`mwhnc0 zpEC^Pc(8j7QuV{Wx=81#%O((SWQThl)lb{VKH9e`>`^%mi`gwxTQ$_(wWJQ8V3j)F zC=F)ZEdY3&q}xIhZ=Yb%#`Ff(cK1K)8$yFWyUH23VlBLDm7P+E9cNN7MNtA2%lF!8 zw~Sg*_dCk zySO4P(2(t0N;U{b%Xv^lwN;M99b?FrcEoNr zl4rMb6_g;!0kJSl0#Yp+rKV1`d6!AY7K6n*qPZ5#jPOOq$__bAhuEJs)MWs;XpEPI zI-j83%m^dg1&%SdYr?&dRUG;B+K*??GG8-(M?J@ln7mH&4wFFnU|O?UXJMsqY^7Om zIr5$3{F~uEVT;?z%nJ84o@n(e4BMnw3{#a@qNf8M57n87q2Hr(8Y{>3%Z2j%LK7dK zBe%0K9kNv|$#Zb@y{y=<9r}&xMzSbc056mm_L#Y$Jq;NzAgI3LA=VY1-d~kT;0fJE zYs+y*@vF^R9kSX0)r?;opn2fP0(kf{B0WEP)!{*c<-@furX+1zFN&fKQ z2~Zpe(Hdyi?mMM<2YmAQlfn~zr-@TI1EsO=TPb+V+w4WQ`Gzc=vkFJhorhQh9j4~e zo1W+@s60`>iJ2$^!zB}Sp_mQ#OPifX-uL#ts8DEBSX)HC;oBSqO2RhD1+qpea~wGk zk=rnZ$h?Dvatm(4`uG%KyO}6wR^Mw@%;3>{t?ryFL%Eyf7knkP4I#GN{G>0k70Alk z_D~09^$s8%e??uOo|3;C9QJ<*y9%hPwyiB4N`o{U8bs-CkWK*w=?3ZU4yC&rNs;bO zk?!v9mTvydedc}l-G7WT82d2JIeV|Q=9+89H@_JgTE#l$%zIX?Y9$7hsPM@klxF`( zQ|4WFR^=YYOoCp5!9$r{Z({cFNKjsb2?$!sZn z3aG~z0mANoqCF^;b4n(l-EY7WWxAs)~k~f)2caQr1GW#*SV3S zLJz0Wy^6u2r(w3!tl5a3m3a{XciSf-5*r2=(7>D<@@q*^9RsfK-pIyAvf?N@c4JF6 z%`o^EWxq*{f?VpN{Cr<3Ws;Vvt;-kcN8u+s*PjZ^EoyQ@0a_5Vg)y%)$I;D_N{{0~ zA_yUQNQ^VMt9K#^-JeFh_w=TGkN4hr*w4osTu+n(|1<<#rcOo~q>`2>-uqdgg!}}L zNh%r|mFW?{CM(IQO*B0UHnPrM+iDME&m<;ketOr6Bzd?@VQA&7Qx^cuTJCy#T6L!8 zzUjtueIs$adS`nEciT$t@X>UqvP~I-qru!<+Y~zaY{E)tqm=b9S$mihALvY&#W(bFC>W0@j7oE+V<*3?u@hi9e!~z;VP! z380aD1d@4v*lL=Zwa5vG5Gj)9nhm+xDxYuAJ1F zfvsB8Hk+;(8l$|^)HG7RLt_N@G$F=d_M$7>t`+I1N(?iz z<^WLfXhWld)0OeK^U4xh;kmdU-={O?Mbo;6!)@=Py>Kf?`SO8#$1Y2z?wl?Iemuw#2fE?IIj>FVelq?Daj^#~K8w~opc@IUZbE>IV z6D%iPZHbc@v~{CAB?UW1Muc;BeG@s8iM0tuP1dm@lJra642H`+W?IS!Jez%Q3JMmB zlDkT>$6~W3tiWNe+Ui&urV5}!D#kvguno8!UTAqEj*yEQ%D=0zo)B5>*Dufu!Ty?- z>tS=P9%{pyiYdA>lhD&UVIZcGs$CN<8;*(+&(+i@f~g*vn`}Kmpr+RmqE`9Hj#fVD zJjJowTdVYd6d`kawLWtPzzT3*eugSDC~!SXsV5Qs4Qc zG;_UQa%{#*Vhfo2PC!CKYlQuhX5|52{UatE{{p$>fY0PA(p&#*sKg!6dO(?$5IiO< zf02zpu*^wMi(9`ozj-F;NL-Fd#`g!<$#}S5iW)TI1{<{1-@xAzSot26FIx*8k@CYZRDC|;GQzm zmQaffr1Mbw+>59@ld_rd?>NyDdpnc}dhHwd8%H2jA2JcPPoxQzCHNp1lvSF~{efPf z#T`N7gSh+B&COH&=dSmj5LQQRVoejm=N~tU4AX8Z4y3lzVWdj*r959}iEG1C4xBQ@ zp`evbj4vK09Z2r5x5q7DyVmlCWp4o4QQk8Lrp1-=@+)@BWgq;4@C4uN#bF%inuuXm z_G`O&+t+=)6ALkG8ZPgQKd>F2s$EaqhrUy$`oqI}0UYLeU$+1(b*nm1UoxpdjEL-M zLW}(BQ48g}p;bPW*2W633Es~9M*?Xb-A?Q(&2VNXTgTN6@ptRE@})2QQg)OhS#s5` z3x`)eO0SQR&7@FM{L+v>rIKK3vTvX4Pw32)5?`vgjC0sKPzGGT>edO`H-p9(Dxfe+ zPcU-kA(5Ut19>IA^Tn26X58r$l`()GF9d&D4=nmlw_Hji!$Nol znWWwiaEp|mi70QSm5$U{)BKVygV0AL$o(nCU&Ftb9q~z{P}&7|jt@d23tndv0f5c! zXzKUH=lMIz5YWs0W!LU>?hg5V)BbIJ}e#_lew$6A*b=+)midYcnSKS z;3-hx1H6--4H(gPC1nVY{)_?zLpi%fxmdB-HKFkbh~dWvU_SxSA2*~QK`l=U^iQL* z#Nwmfr6gsp@jUo5RmKDf-3st&+O8*1=38Bf*SRf`Pb{8vwimX@%GYo9q3$W(;HJuL! zyYyr0#64C>TW0!RR`)(1O&DCY zjII_7?+^ai-xd>j9efKBF~EF{rzni*aK|7Sx5eV3vy{5V72c$>9N!P@6Rv* zy|9TO98CU2s_ZZT`{uo(-A2FK1M;%0SfuuE^=!XZj1S}rKD0IS{8pub^}rQiOUK+= zD5^@bmL{?np;mOGm56nFF2l>+mK+<8rr~;80Dvk@uAIwMT`Zw$YSe-7&Eb0Qt{l*# zh`7dTjp4R`VAO4r{kMbp_Za~8G&V-(5ejV6-Blt2IuQPmDgs&;4Io}N&ko9niXJAU zViWS*9O`V1IeVBa+=Y&QcFe$IcM*i<;gJL|dv^LZ8~evV3)P4eBztRK#-GuE-`3(k zdINyh>t4b_!Yl`~>i5jqGe?)|NpO|{Y-3#!NNZllfjzzPkz@=E^dCyKzp`4bMMpE9 zVgU?|-16rG|4hCA*Mz~w@Wq7k4MG&X*%7Y!WP**&s4wt&4TUTDtipv*&7-T zU32?cvF-i@ft3cSxKujr(&Z!lp$}wBE2GxCGxlNmO4oU1qHfow4Qgi-8i~6MYB`2w zC6S-^?vepxth5j2*oSod@`_w6K|v zlyYA+CW+mTC<1vqvz&?e#2uhZaCgEc!WBT{7;*1tsw)w+jTb8_Xs-cYu&-IE@-XOdF{6k)c&b!O>CA%34M!PetBy2lo=m_@e}y{aJqD z5GMuVjC4|#vtqTg2&IGT1GgxY0i@W*TY`#fWHHH}ld9aRV%r{;99+N!Lclet&ecb7 zTwB8=-5;fLJZ$mpEx9$DV0SwC#N9t`N)RSN+b{_!>eMqtBBDEeGBM{$_b)=E59>qv zey9tLBFpWzR<2_E4TIGx^-(`3V`TD-aZv+R^fNW`fx88|x_s&6g!K2Jv933~TeDkW zoL)G%fP2E zDp*gFBLf2iBY}Lrx-=dy(Eu;+bR*LsKuT^l6(3b8YW}I{?OlZw^~qc};c9l`X9)mG z)m25^R91I+wG++qCZbGcz5xu3*}j|j=mCR!(jn($t2=ByW5@pM&pT)qsxt+wG`)lOeqiTlyI6tvotE)8rTBASz-IG_ zg2VR1ZARV$ghEwYo&ZM6T0amPxjY$AkyL8g<=0MWFFA7;(o)nE=D3#JCufru$_>k0 z^t_;0RW+mvoA#A2j{=JX<&h0Fm!U($!R$y<2``T~7;S-8-vB0U_nmJ%<3g!19*@f; zG&R?C<6fo81H^7}?V(OL17*F_J9)b28b}PFi@aF#B}6rTF>5=Kd^A5>Wy1H;n5ev_ zDubIKwNUEOa&>Z6$Kn(`I;EyJ8XTOzVJKJZmd$IqA%lOYu>LtfK2RJuk(k!1Bluh` z_q!cTpt18raxws6$dy}zAIjOCnV;wmQLw-ymr`{-jkeqVg2Y>C3tiZ7hQo-n6ov4t zH?e7u28a#ZC1mpqYrfPS?z;fcUE=f$#F1_PbaRp4F>k&e^o~PeeFPuPy)aiFC!U9dcHmPveJZGP4(?YRc%g9 zGV9G2%25yPj&4s(LFHy~B&z5LKxwBYuitoAiO+4ZU-RCy0lXlQ&URaJ2;{-jcQ7*^ zj|0W+bP`@TM^JK#4zw($9?9-2he|%EKW3?O0y>MB!e{V%r$*7nx|4BAPG_D=Ev+Kp z-W6w%IGqvzNG9cCM*vEgZVSgWcvvK64WQPMl+5I)fqbmYc!7%fxxIQxdZRB9U~wsp zkOL(M@6Xs+%;$-9+9k`uknz9r{S7hy+gJf!8zmm_77h56x`a6`^UnLr4dDra$3%pj zc9P^$3C{vC-QR6!Xvx7a8@<)&j_EV!o%rmU@%(|k4Rh7xQw8#P(fvy^rh~g_jO^xb zQC&J~S+N~Gi!CxWd7JM%=Np`-2Y?I+e|s?D3lia=hz=mnv|%{3N|B29yf!}pdymd+ zHZPqjpj^$`76Xm#ow+cRpx7n+TIPUG=*;C+hF+k)T$S{TYxeHvp@*XUCv9lAWQv9F z&e_UC4N?fUpIVPABTN$$M|3zQ4WZdV{k@?0w__cMDL?RT-BQfRWMsTZypI7yBd%S! zMXBC;gN8GK&zKmp%+NzX1S#s-n_($~)jD^nj7(;17kO;MU~qiPumyCLGj19wq0Ee@ z{k%=pFM@XKWEKV!>LDuc;8G zfPoaz%hACX>UsLh{QdjYAvzm1IFMrP{YlMXQ3T!04<{KGs6FJL&GkV(k&OLq0(Bcb+$P2j>L18;<{xTd&rjEOzs~~@wNX9>nynFe zF9o!c0SO9GXeBB7JL$?^Wab4?=M{rLiV(QLAKct8lcvvF4#dZQoexO+zbPd`(h@$TFwQn2e9nKzX{GC za_4D*@t5~VlmSy^en7x(vl#}^uS){WBMj6p8EYjf2RuBW5zBerH||iPAe;cyV<3wM z)*8r02IGgtc7qHd;M_8xDf{hLJ5#_fkJbSiuy!RP503{%uMnVhbu2)`u?o;~pkiVg zh>$NP0^RjcP*CylO&^mR_dyIl)w^X*>nPLkc+@)Oj=&B9Dew@F8ddbT`w>tFs%_-} zZ(qS^1QT3|8W}%-|3sLXHP1V%%k8(D|E3}zW(T(5p?JgRY;fxb!t|d$%OC;#uzpwR zzeF1PwLR0a=r{rq*ld2xB`6eVSK3L09JjucP?zD)flLf&>;%q3Tj ze~ND|=8e!meNb|r+ z8*Kcqc!mQ&=yf=%`LjwGAoIa@M7}(hQL+5X{<%>aGIqUcY=KCF+wM}cEl zmto*d^5+o2!T&cBMTa)%)%DQ`U;%i&A=&hsCU_9}J3wn(*lW@J)F=#|ou@n2x(h^{ zh8%x8;12TFB)UK5W8=sa#cv3JzqXW54xfz!!Bdf6 zHRQ;j_yo(c{rMplnp-{~5U)oUTgax>X>)6jl6AU5X#CF(bs&@>Ae4r8M9js$^ANrU( zadFm8Ra-{d>`luXjhRu}vzT7q?Hdn)?dAb|lR`|){I({%&+m@Wnzzf6tBpmuM*%o^^m$R3`LT`^pry2c88 z|3)l6m@(ubJ;9_MR+W&ScN#tN;&SKRV=u}X9n)pl7)JfxnEg3t=TI)&?Wl6sRar-! zGrWUJpei@sv?Ju}ljl>n69hoBM31N!ib^;vMns7$rf1n5-LE)C~L^3rH7#P-EEj zJh@srReHa&*ag)O4eUheMD0Wi9xm+B!%LL~G9QdD7_6O6a}H%{um6K4ZV>d)z50aE z-7rV!;}Z2WJ|3kCrveZg1LOw}4Yw)c$4}d;g_Ucpq5%-4XsN-#exGTxho>CSm3wO@ zC+8+d*cY2|{&3&>Tbjf~N(8$+Ij{|*X4P+4u$det$;4fpA1yzDa#x z`L-*GJ#w`#vZEK)qHb>9mC&vHRblP+GoX)n^0;ed5FYb*H69MDU8ZWa1wV()Za$tH z^VUEruR+|6^G)^n1<)46v{Gve9r6_chmB9OgT;btMRHxW^q(Tf2i_$PQl|-`bK=}9 zR9oh7W>LazX%z#>fp6Are%b8FskIGd!X*&-{w*=-W-JIztXK5(D5F60j!$?dDzeO! zq<|Qyw66l4PR}3wu6Wrq4C*LG;r91PlLI7k@{ z8mSCUtFX*m=HqT-bi0z2J|N(d8V+ZM*HOQTDwwJ4YJ-!{w9J5>55Ho0xf;!4s(k&% z?cO!-xyD(6&E8R*{frp1T>YoZIcIZ(G6Y6a|(auGp>#KejAYipK~avYJv=?J<4X6TIm2s=hi-4y1!~w!+o^WQl-ztFnBx z70ZFL<(Y#_H*Y6>$lKae+||zTjL$aM|-S7SxyiLyv*>$)lfl$69jadG=(>3fp$Z0UZoWTY3;BD(_6PBSOBNbRDYQ;$hR z8g=i0B3r^Kls&mlN7b$2tkn;=g|Y7+^D-s^^`qm@spsAdVNoAu9nIOtn_hTurhk63 zub|V-aZqWY2vm_Kl$+hAp-X@uWUAK~@W1c0|Liy5uMhFVbU;dG&!oR{sJXWy@@Nd= z=}YU5?ZMXJ76X*-B0R;S@uR^5{WDLrz$R1J&%4g~$8l`=iiL7w^C`zEyE9cXcQ@8m zQRATIH?NnPoW_*93eDU=_McFB!ZO8hbtpZ+z#_!vFxknhvwPQfwitoF0#dzREicuh z26#10RM9G&KE3z1J)9oJE*ZXbV|P5te0DX`W6NRO3O~NY(xs@``EI_xBnX>P-RXRv zO2f56a67jrLo%MBJAU)(Vw^RwpMcGJJ)dokpk|C)md^7!9gp2wbc(?qTB6)P`>b$zP&kf$V|8w}OQ=Ar#WrqR?Q7*6H!; zqYFDDr{>*FXlCax>P(a087}z)we&?rU@ut$G<@4$s(BM`$(oN}v(!FkFCVC-$ zOn`On9c%m=VzS|*6@DP0pJke;5U{Lo>m5a;fa9*QUw@3qp3g{Nlq*4kg}>{bA#C*a z8_D_Dw`%6r=RUSTU*kQF^Mu!512C8+strrMuwZ*)3c~<|1g^crRHN>~aI!?550&mpv2hyk~GL&Yac7@ulz`?)tQtI$=v7VIZyoYq-#WCN8JfriuN z;c#YLr>EGZ#uKSW-#SveBglG!Je#q19$l_=SyyyDS5eg zib-pce;qFm<86+XY{mgsy}g1?_V~*aV7}(j=8awejKYv7Ulh1{uaj zGA*@Gs2e?}7ANiQZ!5Cgn<(_|>Y;|qm8h{f?Yy{VKW72`MZ3sPX=J5i`;&vfoA|%1 z1O0jHfRGp3fm?nxUJ3ecNbCE@x#{<0Kng$eWS^05g@q}WDl(_r)7#f1LLz1y^s0$p zjX$w~i#x4|X^5|9fQwT!f!Y0SOl9W*Vw=@a%twc z*aHCEd-MIcozD_nZ~k`Nf;^)Hn{6*P;p|BBBJ_vG!HrA-C}}OzP1(G{%d@D6CiX4$!bEE4uV9Ol^9P~mg zEK3_dA)#E{3xZs@ZE2iY?E4s^R#X@W6e1Iq=}Sig=AE5b3}9wNdK-CIK>OK|3j{&z z4gl12*4Fu_5HN`F;PRXhq8;MRw7+(8SSuXRZky))MA#UxXZ@o1dg(rkT8nzD^sjdk z8K2#g)Sc?FdM6-h^07sd7obx*c+W=jHtDiC(1D3-J^k%>x?!8?=!|TFP{7OrUaO|O z@6b#We(#gy7&Zy1MiYxp8OEEhjm!ylA1*45po%A%c5#1w(4+@+ACxOy-E#|uVdGJ& zwE+u;nMBIqqgw$R_F>QlA-w<7Q1yU}@a8MUMW!*bXc&V*>?+i73pXz6es;;I7ZYSS z(hY|o2yoBYT?%V-8*MS2GTq-L;<>yf|D~rh9l%STK-#eg#FZh)xvWIwdJXV!zHPiQ z@?VvE`K8>jiNxQCeI6aFAu==@KD>0Ly-~5m{b^F6&#& zfq$E$v6{G0)gAIVI*SegcDMBB%qYTw*{5(Vb69{%$J<9NJW8pE3ud@TVRxTY2?4eq z2=x!^4v@ZTVmQ(wF#a5y9Byj?__bbAC5D{H*(y~Expl;Fm9eH>UKrAau9tGVlVFz{ zgKkurhS*3;0lOI{Og}6?OGp|ZL1IY43~Gyf+MVr(PW`!rg!1PTeZR1Hk0z=P;Z^(0 zN`s3chekc@7{`NVr)DH#zlhsPtAi$7o?>-$$UA~af};j&4Yi8X)B)P3_B=38j?DHh zufydst24q}J9T-d2vf1=DKSoGe}hR)XRD0PnL*#P!Vd_7`;=r>yBd~F$Az8a1@zZ> zZ!i@7QRx?K`)(&RhzXzjRqRF3u66tGcsdbpOH4E@z6_U2)Q%8LFvt*IuFXsTWUX#F z;HbhhttpxER(?WexIW&`D!S&NV}9d6r9gRk7*p@u2WWiaR5|vEsBp#%<9z18PZ;7P zb-xAXYxKtU+v{25ald?}ov9e^x@u)6Oe#6S**pb*m#cW_dNwrbTQUuS3bN5V35HeZ z?`t4B1xfQdm)uY>Uo=KM-Qc>Eki0#ZuzEBWfmd@MX6~X zZ{JHm9b>_$k8a`H5{K36*_J1;IK6|Xf{G}nd16K4?&qQ}4)Th)7f3osn03QMg_sGF zhMi)b6BTZs`_<`Cgf}>IJDwd-$Ys>hr+0o^Sw__7^H@q#MB0NJQqxL5vMe9a^$W!{ zM_0UWOyO#}SJ(YoHLuo>G4Tb-KU0Q~8fjvz<|_h(S9_^NUjkA32b|HKH=i>_XDXN7 zK1FwSMfRK2?-U}N!c{CJq9}gfMT-o?7P6@ZG_%e{nO`;iNYSJGOR4@Ykia?f2;49N z!~5CiGQF`!UD&1Kg*Ne|Iv;wE2IB8^2Xa@e4tpJ$399w3FMDy`UVeTHZ`fqpA1xv1 zdZrE8aP0}b?Di^p*kB-FG;c?0e|x+S3u$)FIiQaJWw@Gqz-vXJx@VPHo%d0~IF`Kg zLbIp10d?l?)=7CiEAuO45>La?DB9HWq+egvivIuondV+){$B_4N9gdq8jP$jNz#dZA3)b%k zEMEYHMWL2X&0A%dm!#oo&!fctZj3zGCK8}vHL*gyb#7-;#ggTtfs#P0vrH9+Y)i42 zPfS)z0#29LCc-rtZTJxe3n_zR@`?hlZh9yla~m#cZ^UYFmm%1&c%rl^3?AwBJUt$} zz1k6`+BeGVa6PNpxY(NkKBm7pN)s1P=FAg`hw4I z$NW~+KA9qxZQW3xe=~evERws!{EB}?=o88#v)KISZ$bM+FTdpFRgPr7A0qO=B7uLx zgloI*DNnbflixq>;Y?1w%^{A6U%H@8Yu==G(uOmx52LTu+$k?m_3~y7^Ye?3XUFN>=)2rLFRE+cP1Hl;D(5ZtuyW>9nKz}gghJgk%?`ozP zkDH=m*ky_~W|`n6P1Ps2H`bDdGozcH4zT`2Q0B;npP*yr>b^EyNLWUXgC-RA3rVe< zEiKiY3#zrMTb>|@E(t8-oOx03d|Cc#RP3+scJiqcLD@Zd%hLf_SaXm+5!86;Sa}rr z{g^6z2OfuE@lh2@tj;AjbBG_g5E8y{cRW53HZCX*#zw_P$ex}h<on)D6N* zq0jb;w?^UKdaa*I{&+1$heBF{c8uPD6K}Rax>-TX_gUrAuqcc98EM_V^1v{w%*C~R zKf_@r>WUMrXIzF3MqD2Ur$Avhx!kTocoMthE|`++7^4^O728H@^a0L*;zr;dQ8G4I z$=&;vIErrOj0^4>2cFPeZ2~VPa9D4%L z3Qm;<@+`<$y02ob^uA`%WX}5Kr9nfV!w~vCrpIj$dU{@W;GZ|%K+HO~$_|~{c=>gA z3Fn8L3_+$XHccLVq)q_CmHE5>Pa1XtDdz*$!os;yqGNS6t!i~L+7z)CS9|UMy3_zO z<6;~eYM+P$l^~rzGFVgJvq&?>OTIfA&2V@UEy`iu<$&NkBXq0Rn=!=!498@OdE*5N zLiv*kPAhXMOYTEH$>~ccykP;l>z05QyiOWP$2R>D#g5s*Ji(P*b=)Zuia8=eg^kLy zw85u@B>XisK-Ui{y?ZxH`z`WP9bbw&fkv0*1a1lB>*n597iQ50?ZWTjIbQOsJvRjD z4e7l4fUy4w-yfF0rdff5XXcIaG`)UI|v^ZEpjOB=cJi} z9V`~73Wyh>PbZo;1S<`Z0Z(KvJ8liI33lG$&N8(Kp|Ba_&%N|@JR`BFII?PBq-Zga zYJ2Tr{0|4jZ+=NQDlj@o%D9YCUbm}Hl(KKEXJ;~@VJCtNQ)G{eq%in{z^PvqlTb3K z;wt4jHz`@0&u&ho2bfKuGxIdt%ATi`h|KB4fZZjxe^4>|hIgI^Z9HG6IIH1qYJK+n z*g<%_&geDf`LLdTVxBlgXz6`UD%a&#dYP(c^J*Mk8b(HVP{CrFt1*c>PsmbUz#VP5 zNh46PmpUI?ML(xXr}5Wq@m*FgZL6EBi{Nd7on>Y>5L@lrfs1-_C+%61Row3B@lEF? zllz8S;V*~kr)Txs1u3;>lpf!!41OnO@Mb>{rYm~?N4ngh7A&N~p=yY_J9N*$kG?AL z8M94hvSg>wp{)&C5SFJ~FZS1{xhxTx{K{n8H0+ZN*1ckAdtw45ZqL8?^0qas;#!{W z(#WOzbj03Yoo+WZ#lWJUTYSlAhiJ0mGhn&NwWZQfhs?OlU@{K|12yeX-U5dsvytHJ z@y#JgxCm!Jo{FL+A^UQ!$ap@ndE;6xxc(bKTwzl0LY-Cjj1MVOaN~=M`Om>QND5!x zZN{U(#ywf-BSvtmH;iP=svdh2;cs}j5}JaIVf4sCDxUP{mfZ5(U0gMic^oVxg;U1h z?JkPqvHm(2*4ZT3giwQ1fb;eau~L~|o~yHr3skkJ*Tt%VDle3=gzC&T)%(u$= zLd7SG79cdYARe`Etr_wt<>AW?ik#S-yyS!oj;dLI|%s zMWoP(g~T(JoR@8% zQCX8)a*quhs~PyjDIwY^_{zA!w&9fnCuPT|*Z;F|EJ+tRTuub>e6O6vI{}v#FAzTY zCcPggP9ryJcfLWfc5jkmE4k1@fRMlq9-;KPMZwC+1mXS=){y&70l~r@OQQf=2QlI! zZH|k?0%XG!)`i_J83RVr#7`x{XZwuwg!Q(A$CMTakQ354kC#!w9w(^;qgeW1^c+M8 z`d2M&)+3?PTCclIA$P`e_vpzhDnvnwg7i=?4$4y(nxd56j0gO2P%TS=^Z7Sb?5NVzC5qJj|dB8Q`-q;bzHLsa84 zepF9A`hS-+QVaG6oeMb>esQmY%V}}2_9WK!XV|ow0WMRFBpr?VPt!MS8*XvxA;l5q z1b2uk)36cq;y>Xtz!?-J2RFQU8ZEi1^!%3>8^$MxwpeB^hi2eN8deVZj!#s1{(10^ zNn3;mAxFoAzEx~C)JOZ>k&Swk>Nheet ziuhgq5P@+WFaLbJK9o04NEx)L6FgG? zWUv47_79PPF?oEEvE8A;gnyce;@PlOO7>_4?27J+;m0HVgK7IPWgqzJpTKsmw;hIC z?HATT(r8iwf``JFDzqR(nEmSkf18D$|7;_F*!(`6XW5;NLbsJd!?enzvWcu>Ktz+c z$44Zb4;8sOj-(Ex$)HUzXex{PsOd@IU;Ff}u|oWhW&b@i^4B$L44)S^GHRuA?_XjV z5&8%LwyPtB?0+3?nRwve?6jh+5?T{}KH2}eAVmkmK}HQAJ^uIU1bMy|^nmtxZNo5y z{%ex|9?g4jnNUS3!I?m$=wj=G$>cje08cip)h99HNmJJrASpCt z6U~flGm)q81eh4R08UA=1LjLP0|?p`g|*(4HHJd&y#GpT{+w61mmqa|WV&OJ@%VGO z;>A-Y0@B_~O&UdH&%C03(;)NekpBt@{vpuAu!1Ihe7z9WFT7BrfK@}r2J4+|vGv*U z|JNm?^5<)V22x=Ck-m2`^21%aYBapyez;!lsxtq&D@DZ~PKbxq!AOmllT&$%iCX;} zPV*^vA;mvM;eP6hOj2+ZTp%9DJ5UWnwIx|f6Z- zYNCs!o>?GsPe7BS(h7@~3g~4`SMkoCfBJ@A>~I0?|cOaM@Usr5~Zk z2Sw!>CA4@U9j=xdfHzCJFMnNb_RW@T3srIbhFCEcmFle`=}a(HUPC^@e?2B&00`KL z0c;5QzsW5ms8}z%`aTX*(;zc(&d6_)`iSC=7ftbFHnGr*Py%-7Pb=f6mK4o`EX2YT z2B`32o9^!@qEm!f0)Gqy^agd3jSD3RCN_cr8vaG#KgoeKcozf2(u}!cmqF@_2wuD0 zC?IFk#Xa^+qA%f`Z6|th$8?(O8IX1zvEmgytT+7tAk?XL#?7OEBCno;uL0e2_KN7S zJ$m*%01jMlYZ#-{^qTMifsm_EQJN{0I9_+}+sE4lB&pKW;b!F;U6iO4cAp+J0?CaB z5J$s8Nd(KYg3-b^B;za#v?3vR(Rw`!oh{zJnLv6wOSfyLL5#T18mI<7+N)X+ODH$Y z6bJh30m*yE<8o?%g)s2uazn!k$WZNOd-MQ#{H|;%Uh((vdH^&x$!(%}?`0+Rocd4=8UeFFumEr-5@ED|4ES;a1`^vHlMIDgwn z-EwrjQ^Z~Bcs$VP8+Sk^F-J4;^i>t|kA2~X1lT*1_ogmZej_E6Jz08I^z?sU z>`Wu@!Y?8+f~$h|6aDxl=(9_5S2xFBk9MtJu2rf8iq}}_>0Y1Vm%4lv-6JSsCSUP zI&&3{p}AJ*o^|xiUpQu{vaVLfgg5Kk_3C~c*#Cpzhdzis>Y@-1n6WrPyTbqZz z4Vh&pcj`{0*6E$?<|-d(lgnTsLL<7gm~?zN-HGX7NRJa7B7ER!-k-ZpaMBtH!j{j} z^IS-OW3|VHgY(DVJpp|@y57z+*X;~LAxFBlJQ}4c+kQT>|F{C^Zoe?PX(|6ncQ#cJ z(n=r6`6D z^>C#!_2Xxaiuo7P0hmq9vRk7??(XU9_jC5fndgx*oiCA?V}S~m&QG!{y7NFzT&-?A zf_8yfUmrXOMyCTY7Jwhbd<|@h0h2+B0BEVOpw0$(Z=a3iNJYqHiNm7r;B(kT1KCSw z1laq}vfxL3tVXbwy~?oTpB#^QncBSp=D9?UAgpf@^*2YPjMN~2x)fC`rKtWWLAF^Y zl}Cv}ygEy2M*3(vk5RYPXDIvGRG9`@p9)h9bKV5*msiDCAMww}KhsprDM+r@bF*p= zJ0A0#;6Q?tb%&GG#h=of?ZS4Y@}_DzEVu@*CEKs#K(qpQ+~9AE{uBhUBGZ)-?)Lz+ zJb{ZelDvtACCH3t@faU{CEQH>n{Hliq*opZjA^+*6$jfF9{Z;X=Mf1O_Rda z9=K}6(DPv9|M@zUX}%w{y*EVD-TC-MWOX~wx}!VKYORjcpdWAE^wk;9@h)<%Tvp|? zI@}%=>v^;w0pBEUX9e{{j+Y`syt-%SJLmb$w#bl$7IcrB30UoR1v4ny8?|UdV!ok~ z>x^}7jpPI~G`M*`wK`}(iW^$RDluQfOony+#BdsF?#9O5=xPuWDIMh(RD{+ z*yUo0;%KOY0P69qJObWD6g>fm2y5O+Mb`AdeZ4x>{ch~}LDRsB99&`W_ILrwLV>Dj z+x>A{4L#?ecnGEm9gfZ5!)R%+rGLFpR>7Cdrov;WOS+)yl-n*6zRCV>m zLJ`tP(g!z#;!9T4=@|{>C9~u#g$`2=ugY0l&9uaKXtfl%L|Y>%Wa4?w1N+BMqgOXW z8-vdG=V%>7N|);v%#iHk@l3_L8CIrqzO2Oz4{KR0UBJoLk9iqu%yO=vMSeS9sEX&J zL*BQF_F1&3sZ2g4e>gUKlKCQVkryjrXq;-#H>nRV52!8hk3ZFRe#1h!5}N~9Pm}H@ z-M)QQB;NvmgCmp7AL_P zSGlmBFe?jr3we9RO+n~#SRyNVesG`Zh_XGp1;=jJn*n{6R-=A~GhVTsb_c*ctOig0qdqL228=5S)nT;EqPH-K% z(|Ps;d$`A!@C6feG|*GQA^6$k0ks3N57=_$jJCcV`ZJphu}b>|p0Xh0l<_^8p>$T~ zz_K0pd;H_Y-8t+546NV}i1>zVHoJo?3TE#X1~G-Pzl1W=OhM8ix}HsGqa8HkJjwTj z-x!0}Z)i^Qio>w%8(8-w)7K8d=2jssVFbL6MCRe|z+< zZ%Z-U_WLjl(grN#IZ4Bw;EyrgwDfIR%!kHDVa$l-0|D;;SdMEO#{fgblp6hB7YU%W z(l{Vj-lz~(JFnq#yiSXiKI=5r{iLD4G3WgpY^sk#p-8It(WoM79`7=K#l1Yl8l!E) z4s~OY3eTvta)sMyVy}IZy+|Cj{}_37ax&1YBz|pPT5VR^(YE${mQ*4BE&(r=r$POi zw;v)%G&V5}i9Ohz4lj+6kjIL2Ih*e?glo5%+a3G1c8%5AOoe+^*6C#1oQoKb-Dy-~ zSa&K0880Gh83jE7H6@(*6ARLVx#$VN3ufQT&afbfMLoOKf_%5oSSH(Vzc(yK_ZaLb z7h3Vd@^wK~wjtk~D!J$(Ar8e7$(tk+(80k~YlMUWHORHnM2Q*M4BLFgZAf;#>N95& zJPwfUIyHV>v_7|?+VajOC_#kF;{(EWyqcqp-J|U?-7YJ`(M>S(Bl-;AF7ET!^b7Pa z0qw}z_Z&8;5s5U8%rg!)i%FAy`qAaa0faEhJ{P%jNFJo=F$R`QFE2xR@0s>nbRA? zor6IjMLjxbWRwV|z}D6aI_9)&#bU0n*2dP*?{|?`k7pVK(seEE5|nDSQSmMGV&@g+ z3+;5x5LCKNIJ{OH{TMkny`Dpa4*tS?heR(^Y|Yj=zMCp--W~>z1XUR=Fbv7$R*43* zR2H6|kF#6$35@tEo~;wQvoJC`pGp$gi_j5p4ewjFbgoWsqXI1YR;2<45YiI{%x&Y_ zlOLrZO*e=DjRPn2s8IUb{<&sPHe9q`#44l4+@RZM@9;{X`GXyuT$-2V{g*mZ@uxu_ z4o)19l0POZeO{1%v+%JDYoc)SF1x){BDYjgXMBn4WNU72lsjvjF>BO#DMzDI9s;_{ zb^;?Tf%!As379uD5-aq3`*q3K`p*Yxr1+kzZo=ZVF{PvrC;$8Atb`3gGj`!c-Wv-E zTmDf#0}K}`pw=xS#R$Ku#S$eGv(QOXv$#~_#o7HPZ>Ct}htI0_!gB*m;ZI*AilX(w z&_jb>RqMQVD$_4mYk^5LOqh;PIzEc-;~;+Ig(*e-b?nf?iuJabJqGL}pQj6p|ILB> z`#ui0prRV`BKx|7?oUIsiJFZE1Fj==)D8Uv&6M^zAO{B31GcrMd+D8B{~W0>Z7vHH z$D416!*Ya;1=kc<4CGQNMg*OY(K~5J@EJ)07TgkR#cO-;oaS?S84dcyuWSfbL{yLK zpQ*4!=9J;vaW}|tnlzB8wfZrr>aZ++!~;va^>2C$F*bH8zdu_OEC5j^q{+vx3Xg>+ z9E|e}sBCR4cdGVYnkk(7!XPhVxmwpg;=UY_TE-(0OT)UK8-OA44lSSz{YQp_&jP$8 z2u2kCWviBLgTrRWB8+;rvq8hR7Ii;e~?N-m2)eMMS}o(Y*PeMY{Hd8BrHc7%AG4dDOS!b*~Z!NC`EeeLOf$z zthm(bX@JFQw22+`G?j&Xy#%Y_essj!=9P-{<-3-dZ5Fm1s=|uKP@SiCay@JFIgtsm zWn6DEAkllpJqT76TnUrUuBhk1+~bHAz70{>*v;!=-z^`5g>E2zH%DNFKvOB=z%*tG z`&==&-_3xtvX$vK=xql)YPA@%PQDthxD=#;!F7KRsjaR?$tRcOaaRClP$)1(?)7TW+_U? zjpTDZ&vE0yV+ier^43L7k-Hh7-z5iW+@&T|_XWSV22;DQY=$~=8*H(c67;x(hiP0e z*yYxSM(-0JJdU#FJA=`otEu$pTM@hw(r{kYv(?6FMrz_hN9`z{)lWJ&naKk2)J~~l zVUGwc-avU8L$;+h^Fn?@)&#+mLOq@b!NvtUz=rZ%`PF|Ou^veJmNWvi>xkDJpl-UA zn^SBseF8&=Gi7(#!}!*X1;F^wMsnJGfcO!giAeCsYlSVjU+%CC&vWT@)6o=7hhJ8D zOrNh@wMF_>;&m9#`8aDKy2-aV1o#QUIPSx^kRTzP=ig?}vf6{aIEhVgFK&t@pfxAC zp1x2fw%J!-e3S5!s5Ny6if|g37r1rAI0}l82+32_X z&F6Em?Ty3;I6UvVO=nM*-qP@IY;|sZqz#J7>QRWR)#B9!QkfqP_Q5{|_0zQ#;Kq|T z9$OMErhgHqq@e>^sqm<5zcXQ`dxWV1tzXKgP^7f67dRO@G<63VGz4QoFeEZaK-a;Z zbamD1VG6kd_PJBYsoZjzy2#%=RjG1U#nTzvVso%0&*XCNT;emb88Ae2-QCj7&_l-z3`h;lyZ!#odCytz`&-LDaIs)* zX7A_O_jBLZ=em5!z?{&ivj7+og0QK#rc9ZG1&ZFmdMK$qkS zp5w%$+_6BQSuSo*Uw!Z*nW6*I$>^jwBvMT0s=0ao$a6Wjnx_2KrCB9C#zG}s*xL&7 z6`NPxP;38@MgUTPtp(kn#zm6*pRB>|Rvn~P_mDG^l<#{Q#1@V*7uFvFM1vtx*tqph z^gsQpZndiTgb<-6g*#+fmT&vc`|c_1#Ho8+!fPuB_jCppU;f<#^#o8QZTem$qXmr# z8p;A|yy*K%n*nu%IS<0ZA=b**8I={`}71lNDX^+NsW}s`FSr& zNbYxuhK+)tkL@&utn8GoJ>N*vE~dF4>O}+c7Eb&LSYNlxUhd~kFj>*HUB;J#JDGuZ z$22my?RnUa7j{ZO)^ucd-*wdw6ys#1|8Z0zwY4Uhj{ zQzmf}0uJ;*J<$o)@QcPZ1VmHZ^6lFM0;F6w%aYEcRNm5|ylwPQ;VcUt z*8q)Cayz#br}W@^VRe(qT8{wPE3|7F;q^tdNHlcVz9tAY>EhU{qWmOVm zF8ff2n@)b@Wu(IG26~-jXpMBvWP`q?#`B{D2^2tBO<67IHH!K|X?U01inK&Td2>_P zefFK{*Y-JiefbR?--REhpW1)2$5X6k?lU(YyvkX0Xrl|gASD^Bvs!x(7?p%A#3`{TTEhbD2No8DPeREOLzAbqg=J4SyXn%vmr zbN9}V8AmhHfIqiqqC9)m)^X_0m7?yQ{O*BFpVNLb@y4GYu3g9S^&In)JQjQ(F_yxR zWA+{NRIt>QYBq)ZFfu#1c~8yFu}>R$rl~j)eU?JLGnk**(BX#JZ*2o&4C(&-`|OHzN@rMEV7 zls1HWSxbmkyaD{MF^)nwyuX(rC~7j^``LHT zrV#FTv|jC4G2(dR`tIUS5*?w;a-kFD7MYZxP#)Z))eUA>5ey&*@=4>Aqr8_uGSD@1 z4+%N4KDUzq#sgRQ8~z1fVnl>YJim!dkGWGCAM2PyUT0gC-H!p$fGeSiNgQ!EYhl?w zSnvJp^(|s*FpE@*G~&%qg*3i29)3N4-`^07TivyykIZMSm*)X%L3Wkxe7)O5`=(6~ zxSHj9AJL1tse@UZGRt0b&ib+~w6mypJ}rdw7*zo_IFU~8M)i8H?B4qaJ-k}DXI9p| zx5Lv=8X=Odl+BD$-rZ)LsXI3O>F4YHd$N0i#8eo)_4Buh+K91W`M=fj4{>!>K4QFL z{n=B@em8F4pKa1IkSz?8;;07e#b4iwvF>hq$ADK*R?>Vqb^IGBpViUQ1u}vI2sz@? zy!y*Qm1rFE<+7gyqiDUqit+s{b5S=(j3S)Me^#KTg zFe{6q2z*0vE&V1L#Nl%#Kd_1my(y1M4C)HSrRstKosCK;-ob*?i~NL^*+3Y@Qwy#2 z9E++O`uFP)i<=%FIAFN>2W;qHo?>uQ)ZuXfog3@(>5^jdi&udd(a3a>d^-6R!52*_ zG&PIob8<=w7Ou4zd*h)J_Mr2RCR$&^VEa3r*>eI{Ur%;(C?=ZPE$NAl_v5!u8p0TwPQz9 zOq!n4{48>J@{99D%!i<7Y}4)av-f)=)L^No zawH_8Wys*DPe)~NjvSs~uFX}1D948l0dhGLVVMbE_e9Ujmg5d@^E9%6UG^(&=v?WG zI=PJo47lnI0%m>MQ%e#5!Na1@Q@`1{e(?c#*^hp85LU~n6el=5)|oU6m2~R1x)G`? zdpUJX9$n`vn>nL0L^Pblxrg7axN&2;{q;57?Uy;Gky9e(dWa8Ww17#hz=$8>h;(2- zWWk!+@4WcHWu`!p$CnEJ!I-C73zw5Z%J z_5I0htNocXDg&v1{!yfTutRiYTkIeqsqK2%*P|d@uM{PSPLaeeImCtaU8NJGV