From 9afdbe2eed3eb9fc0078677cad1ecb927cd984d6 Mon Sep 17 00:00:00 2001 From: taozi555 Date: Fri, 23 Aug 2024 10:24:01 +0800 Subject: [PATCH 01/88] update --- docs/en/sampling_params.md | 6 + python/sglang/api.py | 31 +++++ python/sglang/lang/compiler.py | 22 ++- python/sglang/lang/interpreter.py | 5 + python/sglang/lang/ir.py | 63 +++++++++ python/sglang/srt/layers/sampler.py | 1 - python/sglang/srt/openai_api/adapter.py | 12 ++ python/sglang/srt/openai_api/protocol.py | 14 +- .../srt/sampling/penaltylib/__init__.py | 2 + .../penaltylib/penalizers/dry_penalty.py | 129 ++++++++++++++++++ .../penalizers/frequency_penalty.py | 4 +- .../srt/sampling/sampling_batch_info.py | 3 +- python/sglang/srt/sampling/sampling_params.py | 25 ++++ 13 files changed, 309 insertions(+), 8 deletions(-) create mode 100644 python/sglang/srt/sampling/penaltylib/penalizers/dry_penalty.py diff --git a/docs/en/sampling_params.md b/docs/en/sampling_params.md index 7d866e69295..f6fd18771aa 100644 --- a/docs/en/sampling_params.md +++ b/docs/en/sampling_params.md @@ -47,6 +47,12 @@ top_p: float = 1.0, top_k: int = -1, # Min-p sampling min_p: float = 0.0, +# DRY sampling +dry_multiplier: float = 0.0, +dry_base: float = 0.0, +dry_allowed_length: int = 2, +dry_penalty_last_n: int = 0, +dry_sequence_breakers: Optional[List[str]] = [], # Whether to ignore EOS token. ignore_eos: bool = False, # Whether to skip the special tokens during detokenization. diff --git a/python/sglang/api.py b/python/sglang/api.py index 3a2f747bec2..fb10d049fdc 100644 --- a/python/sglang/api.py +++ b/python/sglang/api.py @@ -69,6 +69,12 @@ def gen( min_p: Optional[float] = None, frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, + # DRY sampling + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], ignore_eos: Optional[bool] = None, return_logprob: Optional[bool] = None, logprob_start_len: Optional[int] = None, @@ -107,6 +113,11 @@ def gen( min_p, frequency_penalty, presence_penalty, + dry_multiplier, + dry_base, + dry_allowed_length, + dry_penalty_last_n, + dry_sequence_breakers, ignore_eos, return_logprob, logprob_start_len, @@ -128,6 +139,11 @@ def gen_int( min_p: Optional[float] = None, frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], ignore_eos: Optional[bool] = None, return_logprob: Optional[bool] = None, logprob_start_len: Optional[int] = None, @@ -145,6 +161,11 @@ def gen_int( min_p, frequency_penalty, presence_penalty, + dry_multiplier, + dry_base, + dry_allowed_length, + dry_penalty_last_n, + dry_sequence_breakers, ignore_eos, return_logprob, logprob_start_len, @@ -166,6 +187,11 @@ def gen_string( min_p: Optional[float] = None, frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], ignore_eos: Optional[bool] = None, return_logprob: Optional[bool] = None, logprob_start_len: Optional[int] = None, @@ -183,6 +209,11 @@ def gen_string( min_p, frequency_penalty, presence_penalty, + dry_multiplier, + dry_base, + dry_allowed_length, + dry_penalty_last_n, + dry_sequence_breakers, ignore_eos, return_logprob, logprob_start_len, diff --git a/python/sglang/lang/compiler.py b/python/sglang/lang/compiler.py index 5e1b411fc29..cdc78f5afbf 100644 --- a/python/sglang/lang/compiler.py +++ b/python/sglang/lang/compiler.py @@ -1,7 +1,7 @@ import multiprocessing from concurrent.futures import ThreadPoolExecutor from queue import Queue -from typing import List, Union +from typing import List, Union, Optional from sglang.global_config import global_config from sglang.lang.interpreter import ProgramState, StreamExecutor, cache_program @@ -133,6 +133,11 @@ def run( min_p: float = 0.0, frequency_penalty: float = 0.0, presence_penalty: float = 0.0, + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], backend=None, **kwargs, ): @@ -149,6 +154,11 @@ def run( min_p=min_p, frequency_penalty=frequency_penalty, presence_penalty=presence_penalty, + dry_multiplier=dry_multiplier, + dry_base=dry_base, + dry_allowed_length=dry_allowed_length, + dry_penalty_last_n=dry_penalty_last_n, + dry_sequence_breakers=dry_sequence_breakers, ) return self.run_internal(backend, kwargs, default_sampling_para) @@ -165,6 +175,11 @@ def run_batch( min_p: float = 0.0, frequency_penalty: float = 0.0, presence_penalty: float = 0.0, + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], backend=None, num_threads: Union[str, int] = "auto", ): @@ -184,6 +199,11 @@ def run_batch( min_p=min_p, frequency_penalty=frequency_penalty, presence_penalty=presence_penalty, + dry_multiplier=dry_multiplier, + dry_base=dry_base, + dry_allowed_length=dry_allowed_length, + dry_penalty_last_n=dry_penalty_last_n, + dry_sequence_breakers=dry_sequence_breakers, ) # Extract prefix by tracing and cache it diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py index 306d280c7f0..23b9b7a939a 100644 --- a/python/sglang/lang/interpreter.py +++ b/python/sglang/lang/interpreter.py @@ -666,6 +666,11 @@ def _resolve_sampling_params(self, sampling_params): "min_p", "frequency_penalty", "presence_penalty", + "dry_multiplier", + "dry_base", + "dry_allowed_length", + "dry_penalty_last_n", + "dry_sequence_breakers", "ignore_eos", "return_logprob", "logprob_start_len", diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py index 199a7ac7a4e..4d618b9cbee 100644 --- a/python/sglang/lang/ir.py +++ b/python/sglang/lang/ir.py @@ -25,6 +25,11 @@ class SglSamplingParams: min_p: float = 0.0 frequency_penalty: float = 0.0 presence_penalty: float = 0.0 + dry_multiplier: float = 0.0 + dry_base: float = 0.0 + dry_allowed_length: int = 2 + dry_penalty_last_n: int = 0 + dry_sequence_breakers: Optional[List[str]] = () ignore_eos: bool = False return_logprob: Optional[bool] = None logprob_start_len: Optional[int] = (None,) @@ -46,6 +51,11 @@ def clone(self): self.min_p, self.frequency_penalty, self.presence_penalty, + self.dry_multiplier, + self.dry_base, + self.dry_allowed_length, + self.dry_penalty_last_n, + self.dry_sequence_breakers, self.ignore_eos, self.return_logprob, self.logprob_start_len, @@ -62,8 +72,15 @@ def to_openai_kwargs(self): "stop": self.stop or None, "temperature": self.temperature, "top_p": self.top_p, + "min_p": self.min_p, + "top_k": self.top_k, "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, + "dry_multiplier": self.dry_multiplier, + "dry_base": self.dry_base, + "dry_allowed_length": self.dry_allowed_length, + "dry_penalty_last_n": self.dry_penalty_last_n, + "dry_sequence_breakers": self.dry_sequence_breakers, } def to_vertexai_kwargs(self): @@ -78,6 +95,12 @@ def to_vertexai_kwargs(self): "temperature": self.temperature, "top_p": self.top_p, "top_k": self.top_k if self.top_k > 0 else None, + "min_p": self.min_p, + "dry_multiplier": self.dry_multiplier, + "dry_base": self.dry_base, + "dry_allowed_length": self.dry_allowed_length, + "dry_penalty_last_n": self.dry_penalty_last_n, + "dry_sequence_breakers": self.dry_sequence_breakers, } def to_anthropic_kwargs(self): @@ -106,6 +129,11 @@ def to_litellm_kwargs(self): "top_p": self.top_p, "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, + "dry_multiplier": self.dry_multiplier, + "dry_base": self.dry_base, + "dry_allowed_length": self.dry_allowed_length, + "dry_penalty_last_n": self.dry_penalty_last_n, + "dry_sequence_breakers": self.dry_sequence_breakers, } def to_srt_kwargs(self): @@ -119,6 +147,11 @@ def to_srt_kwargs(self): "min_p": self.min_p, "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, + "dry_multiplier": self.dry_multiplier, + "dry_base": self.dry_base, + "dry_allowed_length": self.dry_allowed_length, + "dry_penalty_last_n": self.dry_penalty_last_n, + "dry_sequence_breakers": self.dry_sequence_breakers, "ignore_eos": self.ignore_eos, "regex": self.regex, } @@ -155,6 +188,11 @@ def run( min_p: float = 0.0, frequency_penalty: float = 0.0, presence_penalty: float = 0.0, + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], ignore_eos: bool = False, return_logprob: Optional[bool] = None, logprob_start_len: Optional[int] = None, @@ -176,6 +214,11 @@ def run( min_p=min_p, frequency_penalty=frequency_penalty, presence_penalty=presence_penalty, + dry_multiplier=dry_multiplier, + dry_base=dry_base, + dry_allowed_length=dry_allowed_length, + dry_penalty_last_n=dry_penalty_last_n, + dry_sequence_breakers=dry_sequence_breakers, ignore_eos=ignore_eos, return_logprob=return_logprob, logprob_start_len=logprob_start_len, @@ -198,6 +241,11 @@ def run_batch( min_p: float = 0.0, frequency_penalty: float = 0.0, presence_penalty: float = 0.0, + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], ignore_eos: bool = False, return_logprob: Optional[bool] = None, logprob_start_len: Optional[int] = None, @@ -237,6 +285,11 @@ def run_batch( min_p=min_p, frequency_penalty=frequency_penalty, presence_penalty=presence_penalty, + dry_multiplier=dry_multiplier, + dry_base=dry_base, + dry_allowed_length=dry_allowed_length, + dry_penalty_last_n=dry_penalty_last_n, + dry_sequence_breakers=dry_sequence_breakers, ignore_eos=ignore_eos, return_logprob=return_logprob, logprob_start_len=logprob_start_len, @@ -418,6 +471,11 @@ def __init__( min_p: Optional[float] = None, frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], ignore_eos: Optional[bool] = None, return_logprob: Optional[bool] = None, logprob_start_len: Optional[int] = None, @@ -439,6 +497,11 @@ def __init__( min_p=min_p, frequency_penalty=frequency_penalty, presence_penalty=presence_penalty, + dry_multiplier=dry_multiplier, + dry_base=dry_base, + dry_allowed_length=dry_allowed_length, + dry_penalty_last_n=dry_penalty_last_n, + dry_sequence_breakers=dry_sequence_breakers, ignore_eos=ignore_eos, return_logprob=return_logprob, logprob_start_len=logprob_start_len, diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py index 3006e765c88..51dfc7c1ee7 100644 --- a/python/sglang/srt/layers/sampler.py +++ b/python/sglang/srt/layers/sampler.py @@ -29,7 +29,6 @@ def forward_cuda(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): if sampling_info.vocab_mask is not None: logits = logits.masked_fill(~sampling_info.vocab_mask, float("-inf")) - logits = sampling_info.penalizer_orchestrator.apply(logits) probs = torch.softmax(logits, dim=-1) diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 582457ae049..ff48855a076 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -433,6 +433,12 @@ def v1_generate_request(all_requests: List[CompletionRequest]): "presence_penalty": request.presence_penalty, "frequency_penalty": request.frequency_penalty, "repetition_penalty": request.repetition_penalty, + "min_p": request.min_p, + "dry_multiplier": request.dry_multiplier, + "dry_base": request.dry_base, + "dry_allowed_length": request.dry_allowed_length, + "dry_penalty_last_n": request.dry_penalty_last_n, + "dry_sequence_breakers": request.dry_sequence_breakers, "regex": request.regex, "n": request.n, "ignore_eos": request.ignore_eos, @@ -798,9 +804,15 @@ def v1_chat_generate_request( "stop": stop, "stop_token_ids": request.stop_token_ids, "top_p": request.top_p, + "min_p": request.min_p, "presence_penalty": request.presence_penalty, "frequency_penalty": request.frequency_penalty, "repetition_penalty": request.repetition_penalty, + "dry_multiplier": request.dry_multiplier, + "dry_base": request.dry_base, + "dry_allowed_length": request.dry_allowed_length, + "dry_penalty_last_n": request.dry_penalty_last_n, + "dry_sequence_breakers": request.dry_sequence_breakers, "regex": request.regex, "n": request.n, } diff --git a/python/sglang/srt/openai_api/protocol.py b/python/sglang/srt/openai_api/protocol.py index 758e48edefb..0b49a8dd757 100644 --- a/python/sglang/srt/openai_api/protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -165,7 +165,12 @@ class CompletionRequest(BaseModel): min_tokens: Optional[int] = 0 repetition_penalty: Optional[float] = 1.0 stop_token_ids: Optional[List[int]] = Field(default_factory=list) - + min_p: float = 0.0 + dry_multiplier: float = 0.0 + dry_base: float = 0.0 + dry_allowed_length: int = 2 + dry_penalty_last_n: int = 0 + dry_sequence_breakers: Optional[List[str]] = [] class CompletionResponseChoice(BaseModel): index: int @@ -265,7 +270,12 @@ class ChatCompletionRequest(BaseModel): min_tokens: Optional[int] = 0 repetition_penalty: Optional[float] = 1.0 stop_token_ids: Optional[List[int]] = Field(default_factory=list) - + min_p: float = 0.0 + dry_multiplier: float = 0.0 + dry_base: float = 0.0 + dry_allowed_length: int = 2 + dry_penalty_last_n: int = 0 + dry_sequence_breakers: Optional[List[str]] = [] class ChatMessage(BaseModel): role: Optional[str] = None diff --git a/python/sglang/srt/sampling/penaltylib/__init__.py b/python/sglang/srt/sampling/penaltylib/__init__.py index 43fff0fca44..5ab2a7dc2f5 100644 --- a/python/sglang/srt/sampling/penaltylib/__init__.py +++ b/python/sglang/srt/sampling/penaltylib/__init__.py @@ -3,6 +3,7 @@ from .penalizers.min_new_tokens import BatchedMinNewTokensPenalizer from .penalizers.presence_penalty import BatchedPresencePenalizer from .penalizers.repetition_penalty import BatchedRepetitionPenalizer +from .penalizers.dry_penalty import BatchedDryPenalizer __all__ = [ "BatchedFrequencyPenalizer", @@ -10,4 +11,5 @@ "BatchedPresencePenalizer", "BatchedRepetitionPenalizer", "BatchedPenalizerOrchestrator", + "BatchedDryPenalizer", ] diff --git a/python/sglang/srt/sampling/penaltylib/penalizers/dry_penalty.py b/python/sglang/srt/sampling/penaltylib/penalizers/dry_penalty.py new file mode 100644 index 00000000000..fc454b5a86c --- /dev/null +++ b/python/sglang/srt/sampling/penaltylib/penalizers/dry_penalty.py @@ -0,0 +1,129 @@ +import typing +from collections import defaultdict +import torch + +from ..orchestrator import _BatchedPenalizer, _TokenIDs + +class BatchedDryPenalizer(_BatchedPenalizer): + """ + DRY (Don't Repeat Yourself) penalizer penalizes tokens based on their repetition patterns in the input. + """ + + multipliers: torch.Tensor = None + bases: torch.Tensor = None + allowed_lengths: torch.Tensor = None + sequence_breakers: typing.List[set[int]] = None + ranges: torch.Tensor = None + input_ids: torch.Tensor = None + output_ids: torch.Tensor = None + + def _is_required(self) -> bool: + return any( + req.sampling_params.dry_multiplier != 0.0 + for req in self.orchestrator.reqs() + ) + + def _prepare(self): + self.multipliers = torch.tensor( + [req.sampling_params.dry_multiplier for req in self.orchestrator.reqs()], + dtype=torch.float32, + device=self.orchestrator.device + ) + self.bases = torch.tensor( + [req.sampling_params.dry_base for req in self.orchestrator.reqs()], + dtype=torch.float32, + device=self.orchestrator.device + ) + self.allowed_lengths = torch.tensor( + [req.sampling_params.dry_allowed_length for req in self.orchestrator.reqs()], + dtype=torch.float32, # Ensure this is float to match other tensors + device=self.orchestrator.device + ) + self.sequence_breakers = [ + [token_id for prompt in req.sampling_params.dry_sequence_breakers + for token_id in req.tokenizer.encode(prompt, add_special_tokens=False)] + for req in self.orchestrator.reqs() + ] + self.ranges = torch.tensor( + [req.sampling_params.dry_penalty_last_n for req in self.orchestrator.reqs()], + dtype=torch.int64, + device=self.orchestrator.device + ) + + def _teardown(self): + del self.multipliers + del self.bases + del self.allowed_lengths + del self.sequence_breakers + del self.ranges + + self.multipliers = None + self.bases = None + self.allowed_lengths = None + self.sequence_breakers = None + self.ranges = None + + def _cumulate_input_tokens(self, input_ids: _TokenIDs): + self.input_ids = input_ids.token_ids + + def _cumulate_output_tokens(self, output_ids: _TokenIDs): + self.output_ids = output_ids.token_ids + + def _apply(self, logits: torch.Tensor) -> torch.Tensor: + batch_size, seq_length = logits.shape[0], logits.shape[1] + for i in range(batch_size): + if self.output_ids is not None: + input_ids = self.input_ids[i] = torch.cat( + [self.input_ids[i], self.output_ids], dim=0 + ) + else: + input_ids = self.input_ids[i] + if self.ranges[i] > 0: + input_ids_row = input_ids[-self.ranges[i]:] + else: + input_ids_row = input_ids + last_token = input_ids_row[-1].item() + if last_token in self.sequence_breakers[i]: + continue + + match_indices = (input_ids_row[:-1] == last_token).nonzero() + match_lengths = {} + + for idx in match_indices: + idx = idx.item() + next_token = input_ids_row[idx+1].item() + + if next_token in self.sequence_breakers[i]: + continue + + match_length = 1 + while idx - match_length >= 0: + previous_token = input_ids_row[-(match_length+1)].item() + if input_ids_row[idx - match_length] != previous_token or previous_token in self.sequence_breakers[i]: + break + match_length += 1 + + match_lengths[next_token] = max(match_length, match_lengths.get(next_token, 0)) + + for token, match_length in match_lengths.items(): + if match_length >= self.allowed_lengths[i]: + penalty = self.multipliers[i] * self.bases[i] ** (match_length - self.allowed_lengths[i]) + logits[i, token] -= penalty + + return logits + + def _filter( + self, indices_to_keep: typing.List[int], indices_tensor_to_keep: torch.Tensor + ): + self.multipliers = self.multipliers[indices_tensor_to_keep] + self.bases = self.bases[indices_tensor_to_keep] + self.allowed_lengths = self.allowed_lengths[indices_tensor_to_keep] + self.sequence_breakers = [self.sequence_breakers[i] for i in indices_to_keep] + self.ranges = self.ranges[indices_tensor_to_keep] + + def _merge(self, their: "BatchedDryPenalizer"): + self.multipliers = torch.cat([self.multipliers, their.multipliers], dim=0) + self.bases = torch.cat([self.bases, their.bases], dim=0) + self.allowed_lengths = torch.cat([self.allowed_lengths, their.allowed_lengths], dim=0) + self.sequence_breakers.extend(their.sequence_breakers) + self.ranges = torch.cat([self.ranges, their.ranges], dim=0) \ No newline at end of file diff --git a/python/sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py b/python/sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py index 178cb54b24c..749a3b0bf4d 100644 --- a/python/sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py +++ b/python/sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py @@ -54,9 +54,7 @@ def _cumulate_input_tokens(self, input_ids: _TokenIDs): pass def _cumulate_output_tokens(self, output_ids: _TokenIDs): - self.cumulated_frequency_penalties += ( - self.frequency_penalties * output_ids.occurrence_count() - ) + pass def _apply(self, logits: torch.Tensor) -> torch.Tensor: logits -= self.cumulated_frequency_penalties diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py index bc70a9018ed..4a6cf8bce9d 100644 --- a/python/sglang/srt/sampling/sampling_batch_info.py +++ b/python/sglang/srt/sampling/sampling_batch_info.py @@ -30,7 +30,7 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): device = "cuda" reqs = batch.reqs ret = cls(vocab_size=vocab_size) - + print(vars(reqs[0].tokenizer)) ret.temperatures = torch.tensor( [r.sampling_params.temperature for r in reqs], dtype=torch.float, @@ -62,6 +62,7 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): penaltylib.BatchedMinNewTokensPenalizer, penaltylib.BatchedPresencePenalizer, penaltylib.BatchedRepetitionPenalizer, + penaltylib.BatchedDryPenalizer, }, ) diff --git a/python/sglang/srt/sampling/sampling_params.py b/python/sglang/srt/sampling/sampling_params.py index c30717dd7cb..5564e828b8e 100644 --- a/python/sglang/srt/sampling/sampling_params.py +++ b/python/sglang/srt/sampling/sampling_params.py @@ -34,6 +34,11 @@ def __init__( frequency_penalty: float = 0.0, presence_penalty: float = 0.0, repetition_penalty: float = 1.0, + dry_multiplier: float = 0.0, + dry_base: float = 0.0, + dry_allowed_length: int = 2, + dry_penalty_last_n: int = 0, + dry_sequence_breakers: Optional[List[str]] = [], ignore_eos: bool = False, skip_special_tokens: bool = True, spaces_between_special_tokens: bool = True, @@ -47,6 +52,11 @@ def __init__( self.frequency_penalty = frequency_penalty self.presence_penalty = presence_penalty self.repetition_penalty = repetition_penalty + self.dry_multiplier=dry_multiplier + self.dry_base=dry_base + self.dry_allowed_length=dry_allowed_length + self.dry_penalty_last_n=dry_penalty_last_n + self.dry_sequence_breakers=dry_sequence_breakers self.stop_strs = stop self.stop_token_ids = {*stop_token_ids} self.max_new_tokens = max_new_tokens @@ -106,6 +116,15 @@ def verify(self): f"min_new_tokens must be in (0, max_new_tokens({self.max_new_tokens})], got " f"{self.min_new_tokens}." ) + if self.dry_multiplier is not None and self.dry_multiplier > 0.0: + if self.dry_multiplier < 0: + raise ValueError( + f"dry_multiplier must be at least 0, got {self.dry_multiplier}." + ) + if self.dry_allowed_length < 0: + raise ValueError( + f"dry_allowed_length must be at least 0, got {self.dry_allowed_length}." + ) def normalize(self, tokenizer): # Process stop strings @@ -136,8 +155,14 @@ def to_srt_kwargs(self): "temperature": self.temperature, "top_p": self.top_p, "top_k": self.top_k, + "min_p": self.min_p, "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, + "dry_multiplier": self.dry_multiplier, + "dry_base": self.dry_base, + "dry_allowed_length": self.dry_allowed_length, + "dry_penalty_last_n": self.dry_penalty_last_n, + "dry_sequence_breakers": self.dry_sequence_breakers, "ignore_eos": self.ignore_eos, "regex": self.regex, } From 30556ace10cf2e00b8875df53f6ab23136872f4c Mon Sep 17 00:00:00 2001 From: taozi555 Date: Fri, 23 Aug 2024 10:28:00 +0800 Subject: [PATCH 02/88] update --- python/sglang/srt/sampling/sampling_batch_info.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py index 4a6cf8bce9d..f686316e375 100644 --- a/python/sglang/srt/sampling/sampling_batch_info.py +++ b/python/sglang/srt/sampling/sampling_batch_info.py @@ -30,7 +30,6 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): device = "cuda" reqs = batch.reqs ret = cls(vocab_size=vocab_size) - print(vars(reqs[0].tokenizer)) ret.temperatures = torch.tensor( [r.sampling_params.temperature for r in reqs], dtype=torch.float, From a5b14ad04337a3371ca2513ef95a5add28b3f34d Mon Sep 17 00:00:00 2001 From: Kaichen Zhang - NTU Date: Sat, 24 Aug 2024 05:11:16 +0800 Subject: [PATCH 03/88] [Feat/WIP] add llava-onevision, with support for (1) siglip encoder, (2) qwen2 decoder (3) openai api compatible server. (#1123) Co-authored-by: Bo Li --- README.md | 5 + .../usage/llava/http_llava_onevision_test.py | 211 ++++++++++++++++ .../usage/llava_video/srt_example_llava_v.py | 16 +- .../llava_video/videos/Q98Z4OTh8RwmDonc.mp4 | Bin 316390 -> 0 bytes python/pyproject.toml | 2 +- python/sglang/lang/chat_template.py | 11 +- python/sglang/srt/conversation.py | 51 +++- .../sglang/srt/managers/tokenizer_manager.py | 50 +++- python/sglang/srt/managers/tp_worker.py | 13 +- python/sglang/srt/mm_utils.py | 86 ++++++- .../srt/model_executor/forward_batch_info.py | 21 +- python/sglang/srt/models/llava.py | 235 +++++++++++++----- test/srt/test_vision_openai_server.py | 93 ++++++- 13 files changed, 701 insertions(+), 93 deletions(-) create mode 100644 examples/usage/llava/http_llava_onevision_test.py delete mode 100644 examples/usage/llava_video/videos/Q98Z4OTh8RwmDonc.mp4 diff --git a/README.md b/README.md index c7d47d67866..c118d6a1a0f 100644 --- a/README.md +++ b/README.md @@ -231,8 +231,13 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000` - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.6-vicuna-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000` - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.6-34b --tokenizer-path liuhaotian/llava-v1.6-34b-tokenizer --port 30000` + - `python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --port=30000 --host=127.0.0.1 --tp-size=1 --chat-template=llava_llama_3` + - `python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --port=30000 --host="127.0.0.1" --tp-size=8 --chat-template=chatml-llava` - LLaVA-NeXT-Video - see [examples/usage/llava_video](examples/usage/llava_video) +- [LLaVA-OneVision](https://arxiv.org/abs/2408.03326) + - `python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --host=127.0.0.1 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384` + - see [test/srt/test_llava_onevision_openai_server.py](test/srt/test_llava_onevision_openai_server.py) - Yi-VL - see [srt_example_yi_vl.py](examples/quick_start/srt_example_yi_vl.py). - StableLM diff --git a/examples/usage/llava/http_llava_onevision_test.py b/examples/usage/llava/http_llava_onevision_test.py new file mode 100644 index 00000000000..c32d529819d --- /dev/null +++ b/examples/usage/llava/http_llava_onevision_test.py @@ -0,0 +1,211 @@ +import base64 +import io +import os +import sys +import time + +import numpy as np +import openai +import requests +from decord import VideoReader, cpu +from PIL import Image + +# pip install httpx==0.23.3 +# pip install decord +# pip install protobuf==3.20.0 + + +def download_video(url, cache_dir): + file_path = os.path.join(cache_dir, "jobs.mp4") + os.makedirs(cache_dir, exist_ok=True) + + response = requests.get(url) + response.raise_for_status() + + with open(file_path, "wb") as f: + f.write(response.content) + + print(f"File downloaded and saved to: {file_path}") + return file_path + + +def create_openai_client(base_url): + return openai.Client(api_key="EMPTY", base_url=base_url) + + +def image_stream_request_test(client): + print("----------------------Image Stream Request Test----------------------") + stream_request = client.chat.completions.create( + model="default", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" + }, + }, + { + "type": "text", + "text": "Please describe this image. Please list the benchmarks and the models.", + }, + ], + }, + ], + temperature=0.7, + max_tokens=1024, + stream=True, + ) + stream_response = "" + + for chunk in stream_request: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + stream_response += content + sys.stdout.write(content) + sys.stdout.flush() + + print("-" * 30) + + +def video_stream_request_test(client, video_path): + print("------------------------Video Stream Request Test----------------------") + messages = prepare_video_messages(video_path) + + start_time = time.time() + video_request = client.chat.completions.create( + model="default", + messages=messages, + temperature=0, + max_tokens=1024, + stream=True, + ) + print("-" * 30) + video_response = "" + + for chunk in video_request: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + video_response += content + sys.stdout.write(content) + sys.stdout.flush() + print("-" * 30) + + +def image_speed_test(client): + print("----------------------Image Speed Test----------------------") + start_time = time.time() + request = client.chat.completions.create( + model="default", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" + }, + }, + { + "type": "text", + "text": "Please describe this image. Please list the benchmarks and the models.", + }, + ], + }, + ], + temperature=0, + max_tokens=1024, + ) + end_time = time.time() + response = request.choices[0].message.content + print(response) + print("-" * 30) + print_speed_test_results(request, start_time, end_time) + + +def video_speed_test(client, video_path): + print("------------------------Video Speed Test------------------------") + messages = prepare_video_messages(video_path) + + start_time = time.time() + video_request = client.chat.completions.create( + model="default", + messages=messages, + temperature=0, + max_tokens=1024, + ) + end_time = time.time() + video_response = video_request.choices[0].message.content + print(video_response) + print("-" * 30) + print_speed_test_results(video_request, start_time, end_time) + + +def prepare_video_messages(video_path): + max_frames_num = 32 + vr = VideoReader(video_path, ctx=cpu(0)) + total_frame_num = len(vr) + uniform_sampled_frames = np.linspace( + 0, total_frame_num - 1, max_frames_num, dtype=int + ) + frame_idx = uniform_sampled_frames.tolist() + frames = vr.get_batch(frame_idx).asnumpy() + + base64_frames = [] + for frame in frames: + pil_img = Image.fromarray(frame) + buff = io.BytesIO() + pil_img.save(buff, format="JPEG") + base64_str = base64.b64encode(buff.getvalue()).decode("utf-8") + base64_frames.append(base64_str) + + messages = [{"role": "user", "content": []}] + frame_format = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,{}"}, + } + + for base64_frame in base64_frames: + frame_format["image_url"]["url"] = "data:image/jpeg;base64,{}".format( + base64_frame + ) + messages[0]["content"].append(frame_format.copy()) + + prompt = {"type": "text", "text": "Please describe the video in detail."} + messages[0]["content"].append(prompt) + + return messages + + +def print_speed_test_results(request, start_time, end_time): + total_tokens = request.usage.total_tokens + completion_tokens = request.usage.completion_tokens + prompt_tokens = request.usage.prompt_tokens + + print(f"Total tokens: {total_tokens}") + print(f"Completion tokens: {completion_tokens}") + print(f"Prompt tokens: {prompt_tokens}") + print(f"Time taken: {end_time - start_time} seconds") + print(f"Token per second: {total_tokens / (end_time - start_time)}") + print(f"Completion token per second: {completion_tokens / (end_time - start_time)}") + print(f"Prompt token per second: {prompt_tokens / (end_time - start_time)}") + + +def main(): + url = "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4" + cache_dir = os.path.expanduser("~/.cache") + video_path = download_video(url, cache_dir) + + client = create_openai_client("http://127.0.0.1:30000/v1") + + image_stream_request_test(client) + video_stream_request_test(client, video_path) + image_speed_test(client) + video_speed_test(client, video_path) + + +if __name__ == "__main__": + main() diff --git a/examples/usage/llava_video/srt_example_llava_v.py b/examples/usage/llava_video/srt_example_llava_v.py index 27ba862d30d..7421dfcdfb8 100644 --- a/examples/usage/llava_video/srt_example_llava_v.py +++ b/examples/usage/llava_video/srt_example_llava_v.py @@ -121,6 +121,20 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size= if __name__ == "__main__": + url = "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4" + + cache_dir = os.path.expanduser("~/.cache") + file_path = os.path.join(cache_dir, "jobs.mp4") + + os.makedirs(cache_dir, exist_ok=True) + + response = requests.get(url) + response.raise_for_status() # Raise an exception for bad responses + + with open(file_path, "wb") as f: + f.write(response.content) + + print(f"File downloaded and saved to: {file_path}") # Create the parser parser = argparse.ArgumentParser( description="Run video processing with specified port." @@ -148,7 +162,7 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size= parser.add_argument( "--video-dir", type=str, - default="./videos/Q98Z4OTh8RwmDonc.mp4", + default=os.path.expanduser("~/.cache/jobs.mp4"), help="The directory or path for the processed video files.", ) parser.add_argument( diff --git a/examples/usage/llava_video/videos/Q98Z4OTh8RwmDonc.mp4 b/examples/usage/llava_video/videos/Q98Z4OTh8RwmDonc.mp4 deleted file mode 100644 index 32d912dbfa17c8426906ca1163b2e5e30fa84fef..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 316390 zcmeFX1yoht*XX^^p%g??x=WC5q#LA>QbD>yy1PL{Bot|+krJdNrKP(OBqXFuLgHJf z&;9#6@3{B>z2m+0jq&Z_@60vlnsctX)?Ry`v)MurL}2RVZfD_WYXd`DT-Ac#cB z#@5ya*sN__%#DGB7MSu;uDE{tBji$<4!nQgeS zdN5HS<;%;(t^ZW$7G3^W-?SQ<>eg&?flYIAS6I667~Jg2{WV8M_Q1!$suGy?WtWx=lY*~`_JlsUC;m7xPSWp@1DoM8}F}q{X4(^YXAT0b@<;kkAJrg z|D5MP^?w)tKklb6aTy~_Ddpc|B{G> zzxDRtlKGc}GyIlnK;r&tU*-Ba2ckc8_P_T-Ne~ouwZ{4X^7$DDYJjl+WrJUxt6RTx z__g0s;g^KX{+9i}?cPA*{c8Wrb^6;L0OTLFpnL%O?SJ_pM*V7|T&?HtcqF}FHpJg+ zhICawm=}W9FNypFNUC4$pZ2Rg3Hsqz`{T+FoX0=c1L5}=Na(=l^DiI7t8)u{5p{mY z{GK1e)w%y;Eq~_xQ_uQ)KMd@PKmB1>d-4zeS72YjOn?oH^%pSTUu}e?UlNfE$loyt zy#Hz=U+w2V*7WB-!2cEVbDsPE^8GoNf9wM!^?&sv#Qm#%<%=}`5B=HSbwt*`>j*x7 z*WqA)|5fwPTBQE2qbvjcYJG8lyyEx#5$yl&2fy0?zvm9W+5^AaSN?Et&HkywYX71C zwO0*({+$AXzJdPV<6KGD6wrVBz~lcO3%^?XKgNQ2{E9`q`cu>&^}m1SNudNNO?!U0v>P905NdC>QLjTu@UH zxbGKVKu`%--*<2bs(J(dG$#TtY3*(@hl! z+6C<+P|FdR$C(!R=JbSMh~76qt6 zFiu5)E(GHRk{3upQ~)sq6PX9>K`;q&02>68dI+$DU^4XpkVgjOk=ch}_q+hF0J(r_ z2qw1%IEP^JHvu44z6OF#heEI!kYh#+a35dl0$`!WgG0$f6{MHB!5 z1p8_XaE4$jY5>q);{-@Tu=OUuIA9fU0>L&x9b1SHY$pVe4k!gQL$G78CdV0o_kdOi zb`J8K8$)o!Vn8ck6oR920i+=KO$~r41jmU7iF1Q({DZ*#bU$$N(_T9wP)l)(3$8<50kB2!1XJ&<0oo zydeab9Uuts5|9HSAe}%6$mjqv04s!m76^z32iD zSOGa#Ky53awpFlxt5p!fMgw3FumISH5Vku2(|`@YIfSrJ0RTDoLC$?m2;qzqAPFH{ zWCJQ8L_{@!F@$)H9KZ%4qTPWIF~$HZ5F*wogoup>A>siaJm7;r3Rs2^iG={F07HNy zgh+-8AOo-hL?J{95KjT(sjLCs5F*V-z#xQ358~)S96gAm2XS{IAVlVDKsBHfFa;s9 zeg^D9h-`2G4ur@-3lIb-0(2on&PBih_^E6U2m-tWuS`CWupO3TLBzzNjy-+P4wx_-Yyg6|?D zU0!a^NUhZ`Ps&!kVw#|sq~6+g>-K=WcCc>O{f8#>UCa!N$P>jOI>GcKocYuCA^uS1;{s ztqq>A*gBZ8UX8+H?qp*Pd~EHUENq`R@>3WY7#bJ}u~UFoazfk`#wLc=wnkP$?EGx} zY!n7h46NN9O@!FofbPc5&Q4)tB4lpjM&ame2wEHzc8=~sY!o&YZYIWhR|&2h^c)PH zn3)K%^H3O>JJ{M7=mAf53MU5>YikQf(BgCBGd6MpVMg{g;M3T|z}Um~3CPCI1O_lQ zaCFkMbF{Ls1FpXl*xTvZnwmPATm?CqJAeR3Ax;WwTU#pwbD-$`Wn-ssw6-t;rTk@J zqj=)*a|j~~8w01SGAy2eSC-ZWzzNt5t(_eV-1UrXZR`xLayvR%I2oB67+PO>8iCaT zuVO8pfP5gz!Qd*u)WN{UMwlD^ZV5nzcY+&aEVhr^REess5MznA+0dr)h za5b?oGj}osEn7R2CwgYKcA)o{(GG-KnYe@8Lg2Ni#S;rBli#`?c!w&)3F?Fj+c2&58!POc&m^hk)91cc$|7`)T`W=jfj2uiUY``l?lb_`~mK~q?yG)~ z2dt@qwVt_yvCtE6)+kIZtgTJ17TLlMoOrOOgxFa?IR^GZ9Q^DoY;0FQTvc05!h#Sk z(>_wXYraH!z#ksbj+#rQ``DLEaN0Y9I*K_FQ8PZ0$|3tOn)1y-A=LWuV_>YuJEdD1 zo~1WD4|L4UQUb;1?lCjHwJ1APD0}#FG~LxRCE|Jd4s6=M0~;h>yy{n9;WM$^g8p_1b^v9A!wmxVMTS%R`@aaaWS5o zT1 zFiyp7+2+Tz(~=$E=8#Y#54KLNr8UrZ)AC1iipkH>Z?3t0ApXu7n^IXg{KI2E)Q;OY zjzkYTQgC-+W$8X03(>CRsWf7nhQ#7La`s3hvbY`#z1>;WPWJOUx2#RZ$pG;eF4uz{ z67L|Q>8q#hAQ3%u5X&<0Mv3%e9csg^Jy(2aDOKMWn{<1JEWa`dBk)L0mHlDh{_g$K zi@j6%qHwuDNJA9D5fs!>%Lii3si2tU9mSr9~ z1rv%pk&@Dc+XU-<+N68$KGo;)Yd2KN-B8$k$l!cX{hINq8+IRFdpd1?ah8rSpQX+K zsULs5`=jn)e-!*L45ST+6WaHTpT6ojB3U$CVGt`q3V~#1*U!BOvr-o7od?{@&cx%x zk*Y*v<9#CeHcun_$wJxb^6J>|q^mmhT-5|y0yb6U1ugIfh~PMU=OL@Q@Oo6{FZVhd z>RLXTA2;Ye!$#4UwoAsa4pf*l+E%??u*bly#Vtt3#OJ=J|CA-l-Y=)bB>K3?)^nD7 zsL|p($;4X`X3m17UE=m>Z1c$OccWg>V^||*HATEDpXT1gQtJwy4Siy8eTRET_~JA; zO~OatyPu9r1H)-XeH*d+VhRLoTLSrdv*n5cuOmG|pRMvna$jr7RW zQAcqE6;-5fZ1})bF?kwmO*7;?*1mQ$#$UX)tY!(GvDI8&Ccfijm{*b|E>t^mtk?X> zx}_|Yc;7`_OWvctSzdzYrpfF3Qflm;{pU}-IOjSuli#`7lDV7% zyMkr#-?4O0 zk*=u`1oKVOM2*+#+!Xh%8a0_}kiB!CXNybdlhI18d2Cn_z8Cmt)*-T1ZP-KHd;LN5 zb+#AZcaYcf-~`MZcO@8SgSp7b*+MZNmgPiEV70pI5eUAw{A!Nh+wSsQ4q@Q#+n4Ks zuLKK^au$nY3<6W*LCJ%;3iEO8Fz*Hn6?Qj zwR0?9`P(?5hGDf)?OW4-MT;})S2&c3nTT$!aGNx3G&r}&yhW&ALNz&2!EG7$WjmG+ zpZ!kFZ9zik$;gwSA`60;+|GsV`WZ@lxx>fbi)AM=$_8vzr47ub+)oa&B099XK3acT zvdbu*kJXUYyW7puFw!b{m&M)Vx=!nTgt$3d0~O#wX~u(uyuF_KwRQOaq7> zvNjQ|`TTaAv#L|xk;UJbHx*$*vk%;8gd+Fg8VAjl0^?Sg?+I3uHw-hvi<$$?aN|5a z;6DEBC1)(wWi--ekjlkJOd=nLtcZeU*XsgjxAUf| zflaCpXG@=%KgZuXXI11bjDLQIW^TZcdW}!#k(pI+s8PSs>F1_9%4Gt9wE_<#b&z%s zeeUI87_er@tCE|m?Cyo0;~Y68xfqsj7Y`t3cw6PQC(g||6REu6fQiySx>tW=V|n$% zO`{L+J9j*b`MnwYg=k#%kDDwP3s)!!tMrF|aB#{gb-Ze(z3%@;$Ngv_%M4#65jK7;C<;fA=lo_Iv>e- zZLEKv|Go^8d4nPxEA(PHsawZTq^E25aW(TnuQT5?`-YPi`{W=$@<4=E(cEj%7u_pA z?y4{(bP=)p_~{zLYT<~l%6I;)cYR(bb{dKuz}Gnb6yADdjwu=i~v;Z)md(V>NNN-hABe8Muo(@ilggcwJ5x z|6^RN;iyM%fVKLj3^(5qx^~cQ%3TF7`~KYOwHmJ~thR*Iq7S=OhZ~R5GwEdzvhofe z&~=)3l<;FvNxd*>e%>yK&HnT@ljSP{!OaR^SXXY$^w#P@4BMJ|1{|3c^(Zc)hJbqP zlwDzJKJegpfRJVs__{_Jo=BBxJJ;aO$1 zYM#!m0tLdS)-A0iOMwNl z_)4V!4c6;N>vUmXD9>&#o*aH)ylp-yT^lqN2MQ- z`Z!6k3}q8%eSm*`Y}AIwRKz=7&RU6KbJ5`eV?>lG5!+mFK#isdRm3fclsSyXt=!zH zA%+azZVX31*2U%9NbOJGmsJH;R1PL2`>+_{Ba)mfJyHAeZgtgYPx}y+2}=tj1p!O& z?$q{Von?pt!kbh<=A3Mi?X1Bt;tyI=*<1V`xs}1?)#gJpE|HcKMKQ(CrqxQr1o<6j zB>lKimntXOSe6|8ddGTk2R)XLPAvz0hFRBaRlazzGbnuM#;{`EESWa625d51E|hIC}SjG#D;f~-k~EqyRxwQ z43XzEO!9x7p=JC`HM$Y^xZ?E7t6`lc{lwca+9I*?90LTGn+Iy1)Q!n`-r}D#PM|kR z^mI1{ut-EdzbkaPy!UnHrf6pbN`<}1%Sa?hxY3I)_r=>twUO4-7CV!U_%8f&xGF=l z4(u-E5zpyq&!3TzhPwL2Jsb7Kb}XjiHHEfd*gKy>ju)t1NBuotU4*aU%^31;Lgkgm^UDgF2KML*7HkuWb z&nakAzf5gTExof=K(wK;RUTMCZvTeZXJ@7WH}|88ZrFkLK~jlpR~N-#=iA+#y?of{ z_(PLe&L!e!hsmy+Q4z0FzMJTnhS!dU-6&%4pry6`5?TCS-qM;lLfJDq`1C?saY9@fUAdKJQl3Gf_BE__oiSX}d&ApU0!&mACCS?_^g!rOA_&iM-!< z;G$f}j))m3X7B9dYS@nZVOnjsOB35Wggi>56;)O4fPM8@#cl-A-px-M=9PEVJ-TA} zrFC_fKB33%h36}{?B_2S70cNO%W6Hu(r1tlE8;RTSvus|k)$p5Vw~)MaACY`^)=E4 zTG%3e&tNIyM(|FBDYBk~Ow8?Dy=olEUS*Q9`r8N%oBpY(ZdJwI=Bhwz@o}5iVq^Qw zDz2nEPs{7cIU;coUdYBB{&3`Qjp33CupTlLMqAc5VDIp!LXa*s(Uu=RC`$5O_(&>C z@+sh&vxa$0xAUGNj!OL#sDE8y*Ct7m7EQX6Qzd#b-cFTHqeLK`w>Ds=YB-U8J9x5S zg<$W%=XUjb$$TiP}~b<6v0S$#QSIy$UA|4 zu%cB9+*tp4?%Ki?xWXqGUDs38#{BmEf<}rN*j>f&YFAd~bKf)_hFZQad261Ycm|*-XonA-0Zy9;dz*9*3u3a zVorJV5>@_H;TY{jspi^FCj!5xRgW=SM21myomYaQD2{3WZ8q3I3F|CtER})JSI5_~ zYVXrE#~b5EQhIB4CL3}eaM4ppQivUG=}m;AmgGFx@Z7oYLxw)>h;iMzpk=_@PrZ`y zb6>*sNP)srR}mF$wVekPWMg}=BhLzV#HGF38sS(eQRC|0_6~Q6dqTSe=wZ$5r_HPC zLj4}3-zt$5pz4yb8ZD21$z7+x(pYqi?!jhzvLnvER4N>asgsqJ#HRRO zs120wQhiLPi&iDFBduhk-Maqdu=K8JawFIVWhD!teBhEEuu$|PKpM~qSuMdSzk zSPNH2;IX62UKHTfCzac$D%H?Vif*_x<(Z*Y|Lp9gjP6{mnQOPj)3ZJOFv5o<4yh7iK0Q*mNv2)KF=%qujxh5N7<@iWBarYW#Yh@;>4e9v9t^s zW%1{}dtvY@q|Zm_?%=ZjgAd%OR#Hr9UL;y{a;(%Tw$iGtm{i?Z+^Ksff(N@ElWQm2 zous;z@5VwWR!6)g7}T|cj+{?3@I1%_h1n?T_pgnQk6~v1;CIYedghae|E`oJMtSmh zu7PFD5IE`7!R;dce zHeW?5K_07`dGy|niZWGf#R_Ehl*T2S8qK3)S8K$ z5?(bw?ABYY0_Vq&bGw(`=p#{n-tiZdk7mu4CZ}*2skR>2rTM{F2|{@_rFnwi7R`yT z8+x838EyCW?=Rppai4pWEWf#%UAU8R*>s1Aw#_d8tEu*VQI0~XP0PF2D;8#bf}SDa zqX`Nlu%Tx3wwKyM8q zv*l~-ZV6}ATlbi2Io1y@hz%4o!R7A|XDWKz`wCmjYTA;kUS{{!<_Po44=C{S?@?=E4l$x}qh*_MTL5*zjCt?LZi1+sHd#EldQY}9XjRilpva1^!$ z!U!$1@o%;zz1FJPpB(Eil_^IR1n=$afAJNVRN);{{$x5ZfohHV{D50D^@&F zn;Y_(wGs05k2QCuC-a&oM!mb2)zJ~kaz5&R2%ovd=BJt@AnI~xRHx4$O7U2H^;qk0 zA03wfnRla?TmekDGFDC>*WyGQDN)vf;S(bM&A4-Tw0 z=Xd2?+fQDKm?8^gxNh%cc~bdsT#`54;M*J+p$c)inIG@Izm1_3v$hpObLq1?M}!{r z0EK?KZNzFTCO0GFZONSs28?S{xZ$&p<`F2{m}rw|-mE|Zb+l@1FNmUZ%|kvMK2Xzo zc2q%8v8<2X+uxToy!;YDeCpM2s=y}EGyI+q5qLJo+Wc6~Hxh2p<79tV zKiT~x{+9nGL*h>8^_!Ajs$ThhOOF&v0!iyBd#0XJle^j*Jy!`vlvWsYiTlAx{$rR* z>Sh1k+Z&3XDoQke93#8RkI!b;X0C_$A5rR3wa0Bf+iA0tp5&T7d~LsDr<$YE7Ns{dAV461_yui@?58y;h*ofp_~03_B1Q{ms;ssxl!I1D zyK9ra1p6#}jp%r^^hY%1%*N8vq4wLiHIacoMtN7)&igTb5Ok6wKi~ZE&Y(nrxO=Z2 z7Qj3ECWGCV6px&48#Cxdt*n2<^wblnaNQAHW-;Oy-RG=0-W%hbql#pZ-uQ&*wU@0f zu_s6J!nr0P2|qHG?(j+0?)kp!e(o@r{?Ub3Z*9C>T(d%uw#j$WGc%(6ZnIV+f*o}a z!BEO|F+7WxZ~o>bsH@ln^q*R9M(R3bgXBRr4tsM5{p zUC@RuaZaSty*t>n+-a;Is`(o;6%+-(nB;SuY8?6gZOjjU)9=B^#e{Fn{SCAyPJ<=Q zSbkn{EN4%1l!jN;X&$BJJigZBuTY>z(n`g5CNVv_T=u3p#q+*aKe6?iFx~h^H+`(N zV8hAOs$-o3T7C!A(zun=hfQ{^K}Soif>m@Oxkj4S^a1rBzv4}a=qN|0tf;)k9WGeN znzp&^^)__4;br__(=FWmOT`Z-wt-__ryj-}# zMtB!!q&I>^4UH0amXX_M4I-Yr#Js&xT25jo_$l6}T%dXUIhUK^h>{`x1ADBr+S2BP zf$N8+uXq)y?|ceZa-Xv%N4DHCc$q4PTa^%Ru;kN-3DreC zM4rogCTW;+nn*&nOw_CxxO0<7IXOFoTwti|=Y~-g_p=)^il?Ddko*tT|P3^vCkIGXe>3^3h`Y447z;)8Bvb!x_fb zX#m&1r}pt?%z6Luxms)A0au-kt+C=3=S{Sp`{+ilM#@Bvqt_p3I-V3@aZ`oP^goQM zjMCp#j@T?t!&E+{4zLs^wZGH$$%6iV#{@^6eUPb09A9yQonNwEl7Dg{yA+6N z=^NdE_EBN-0N1VI+Jid$?fJ%RVx6+_uc%g^p!lxXWJct&WLf&}Y(t^`cV8C2%aN&B zjzUM&5@i@~#(b_!AFLSVd+$YN`hsFH38Rs=z0b89*JVYS}WqW^rAY&*;Esl`0VrDoRr^MGkrZ) ziEGEF{#B8K-`lU<@0|DiG-Pxn>zg!!nma?hJXP@|M2Y!3BHD86Xhk=8^5fA`XU?eB zvJ%}kkzEF})QNm%2rpf_ofi44c+9G&vybs7W*w3i&d;HEYUS3YTZY8VXjljii{`Hw z>>3g32IBgA_H}F`SQXAZ%_mJt=NJ&9?-V9kleQmf3CK-VGS|E1Cam>5X?`aW)+ZSB zvhU513KNk;)Jymea1*pcD-2ZjpGBkH*Le1rF4b|FX;ISVowF4CZrKKQc%1cyTOx@9 zQ;U+r#(KU|pxAy)0}W>$J!_`zDqJ#B7+-QlmxS<4^9ALkfg*v;9g59vamDLGJS^B+ zUq22~Pc0L;8N*~fJXCZgZ_AYXjJC9+zx^f_TQ_U$&`Z;?yb_%1#XEm+QoiM%vef?PFnUzQNLG^A68=hdZ}2BCYF)ql>V-P`{9H3q&+cZ<7ZWFUB!qrDC%!(a{sT&8vHykJTLUB?xCN zJAdehd>IaEZ8G#UA~v2WYL#vbe`@Yj&?I`N&q=x=>N0oSfBoG`*^BLpqOT`m-GyHU zmk)ZWF7ve?mtYr?Fwt-AuJ;#7M?B7%G@U~`Ai|Mm~wD7vr+BZ9h*)_@5^!l zdlB#h`E*_8%@(s%A}gM+xO9~G{PEAqL;XO)7Dlp+{Y_7gEhpy zx`BgQFYQBU@1YVMzP_4D|> zSEx|;H-BJUeQa%5N9q3+(eK3&1+ybImH-YSWL_OI=k83r`KU=@z`JJXBk3^P%^obZ zn*`kRzG|6cjc(!v-tbJxS8vn6`+Q+D66C>S9kJ3Iu6kO{+{3y2jeg-(=2)(S`%>lp z+F2F8zNCBKdP5Dz?F>ayU$tdtey@ziX~)?;N4CE&3y~J)XnP;I^qpFN{9tM{zWCYb zGLalT!Y%JMaWnyqod&j<^5&Pk6sH8M2YhDJa@-*bqZ-7GFCF_a_diR2$q_WV=95r5 zen-@xtb^F+k=5L0#%P%c!9bfU^~sB4hGv5SWr^zh`S$iwO_^Q04FSUHC!Knm&*In3 z_>Y7gH+%?tF0pROlRuVU7-=9YvE1xyZl6E=YJVr*cvgeW`{XF z(RU2Gx{Lfe{8nDvaInE*S6ZVea&l)6YOSLG#dAC zF!zIYH&e8Eh#MZ6e%PC!KqVg{Q>2@`>b0^c!3BFlTz>I08XZ}WZ-?PYi3S~b7-2f! z_}=ZEdud!D5jEfOK+}9RrUa89P5prXtuWFMLK&HmAlkPv6q^xTEsoK00ngW&WD(d6 z`*GRY=$3mDt!aDy7k86FP+<<{SX7GscU4*vC`m4>3ap=|=JIS~r+B@xqvQ^Gm>|`0 z&SyO6*dt)9u1qvQ`hKOR%G-o3MW2sk$HEIUDkva4o=%{DVy!}`>+j2yQ zjY}Q<2bYmMc<1s1@IlJvAwjV`ao(w^hP6BlE@BH)T=LNxIz!8axnefM7iM;a4RIHb zsQX2k3fx|ENRrj;-piWu3%nx`EAqUBcL<5r83p{;hswnIfa4VSw^D?IKt!f0uX=Yn zsYTB5{Y^yDC*sP8EmDyyX}Q$Ar8~1fZe|>%)Om3FJ?)H3tt~}!Rz=WV43a{96#bU> zbo3_0HL(%}A(P6AhAli93?=HtsWwJcbsSV3)E6#DGTJ$Jb7h=UowEo+1IJ092bd(_ z-X!cF4wz*%zEz;2kD5ah`|K-JlbXA=fZtxt=9;_1iI8J(fS{@Hxa+aU$ZW95Q0)B# zS;xo=>~~cT;^dY&F+62J_KibP@!ri*7SfN?Qtr**RYw0vIyW&h7=F zCyR)vojo5}n-d`N6>M;b_O5s{fo{2gF(?0BCXH2tv@7fcK zYqvrrMQe7O(@X9&q`j?DiM$rYcPgfi=yV{Eh_;bmDqfEfvVXaF$PyCsm~SAXwz+QM zu+ngu8f|h=#`L3|?U~G3H~yoTj3SDN9^Ui_wQzMN39^M6btL!hOe(dgb4pyyR7;YP z*ZkB+>T7gKrSN5VM27&31gqUb%Eb7oFL1BC08X)*TcwiccronPpB`Dw^FYdDZB03GHGFe zlYOt9cr)FY?hPcYoUZFVT^Lo1wCDWcm)VDE-ebhlyg9pkgUIEh9(XkS@7xx4o#)7A z#~38))!)C}-=buFn94_JZF2X z-dCGC*$Nb`HmSc`$#hBL{N^SXf=Vt#N1w9MIHj|xzbqMYV!OW^u=4@a&GjhEFUihM z`TA|w9C}N#HZHF$+dwnXSpK1NttPCuQCjY|^R?Sn!x$C^p1523_cGdI27SI(sU57* zW{{0Tq^*%x&Yzo$zv+oP2dBiLw9{Ba&uUyLa#&U?I&xk)r!yQ9(OEY2$IbS7fG%5K zF8D4St&f9&qDWEIh11-L47bnSL2o8gCW3e*8LcpfhruJx`_9shEu*7vTkIqWw@nI@ zTg_?LGe5mt$sMptdz z+4BA9yYF1&+7OvDV)(aLA(z&1)_kHXI0-K3>k-(-H_QWG|m-dJDFYjpZ<$R~(* zZnT6Bsv&aErfaKXR8vhA-rQY*svQ)jB|Scl7BL{+>CQ3j z&Q6gv)&VIk84;*!>+dg>3Hn&vm`_%GVtts%n*YASOe?+Vrc%h4;LK&Jv+qY@NXmOu z@j@AhjKUqMZDb=u0W+*IXad&@81-Y?9!C>I`elW>sc6U8dwQ6k)9Q z=|>{Ev=?bOxBkAI+{fX`g72EZ!cIhWi@5w#!dafvofOJF_2MkrR$X3>}gvDRofd1OFN2=da7-(8$qY zeGjEVv_uN#KaXteX>;}nI-TxUF84berk2C)nk^ujH$8InYloph^6%YIP#Y_bTAxg4 z&h5RX)euYP9DiVEB+k&^kn`a%p=D!c7`v87sc62{Ps`>VzH798a5~%_Z(eDr{oD(o z=Kgk>*$;8o9>n)c1GC&yp=76Bfz^ZsVOr$^!jI1OpQbzvRafs!R7srFc9hfT(d$^Y zX0wg%${ld?e`@x}V^t67y0e`UR;tkG(*pIso?9@weq;C~`kix#wYk3; z`K!+vsXR3}=5HTm#ys^=g)!=3h~wg;$f?=0sT;q^Yx5tvomww5ixk$ptgE+o+m=T=e;D(bb8tb8 zSf~mXT?8w}567|$lf7JqI~;2~rsi6{rVZ)XWf^-a~kWil5f92@*WY$n$iNkj)x$zzY z&AN@A6oqHVMfxko;L-ec*kj97r~GKkkHXjo(LOO*$_DSJ-1THe^|UWe%_7cQ=nFSC zQ+ z|Kys`+f(my2-8|c7b6Pa7Si!aI-bX7Orc*}^||RpS{=z;S#l6PRlm~4vDl7ewt`uh z&iaWqKW#AgS|8qvh`~%MI$7_VtvhSDnp!5W(AIbg2rb*T>iT8vA2-3gr3CJx+0c1N zq;H?CK@n6JTSm85Z%iFo$u`unu{(uqVmLI#U)mrVllM}@w|Qnxe_VGgM<5w8o~8^?^S z9QQl7?%%7)98`=+Db}YLXEkHUY=9C0_z3E7iZDRh-VQZap)v6Ia06JLOITdwj%}y&AJXkCUo6!wn`r zxW_iflgc8Y!Q6+J5CJ{w6#ij6V3OV{=lj9gFNQ_lK3C8^!48k67%;r{Osd?x?5uK)hPoGu+fi@fo(( zAt+_-gowbzYH9uXVrS)(2}UJzoh{B|PNF`l`^GTgu~Wh#{d3n&m7Ujpt}8=dW(buJ z4-fg8Up+&)FVm@wW#80RtBRoMG_O0ovASTm`h5M~Xt;GFsoRjR@uJ54LIpa&032bMA%NIzEb(SFilI)ic8L|O|K zKaxZ^M$R7=jZBM^;5yB4M6W3(q)JD_t-&>d-|92NF#ZJDlHGSZ*W25qtYv&siBm9%@ep>HP?^T7j-;_%(I&)uK&l7I~Zax7v5GhUpqCcj@=j zxb001ESam)C}G?0QI&N?%HOm7cQkKXP)g3Ol`|sjkIc#$J3J$PoU1fZiHN& zE5IC3RwK9(|A5Sxp$M(SPSL-+`{ux6Ex8jJIp))E>i!pf>axYB=j=Zd$7bm zJbm(1-_&C7Za%$_&@2A^#}-09pD0Y-L(+T+**2EXrMMKAuK3 zj@&ljULa|&(2(}+U$RfK@m9xSS{o`|UDUOg`ON97d$ZrwV`_^G(_gHO*_Z<5$?5AX zAIf4;V@}1G@iAe78Hw5(o3QcFfWupg65Fo=WpO>$8}RsKX7hEKD9#nRNw6HyPzy<{ z=Ggv?e?4_rJH_p>h94`kNi7yL+a^jH~N) z!b*eN^1hAY5Qdo#+H0o%hvaGd<~B=4rD;1fHdaV{To&?aY!T#ltfTI$GU|=3_yiA0AA^z8-_)h=v zor>&!(oZi9bNrY4^eMOdJvZpOX!ZOE^OKznD>8KUO4c2)CteBLwhg_s+AsW+aw9po z*@h2xpk6bdrMXBV-0nlg4I9>R!DYme>ZD19SW~gz`%@#+F-8rK3_g{ay*BBRd2PoZ z-anye0?Us`+^iC^4dc+bC_m)D$2L@o=0IU;tnB+18#RCc5qf`Uxp3&Q(IDUH^c8VH zYMkd9xaY}UOJ<8^6cX`;wl=XPSkyOXN3w~G?fEkg%WoI>X~7t7RS-$D=5$nZ6vHsv%S%$5SdrVUbjYgC~$N&sPKLHFq#MTRGfxMj8l(b z3r*9~5kh&r?M7JKm@uIgayn9Eh{ zs8?cqEL$&2ba}y^E)s3MdYEHM)H_2+2=7TZ&{rMFjAu!Urxr>T?9JmJCN;IE<{p|D zaSboznV-`wi}(3B`k=GPEAIJT+C5eE7d!f7(Aw~WswMNa`xHFSXlYNSj?#O}R8&#@ zJ24gtSo9}?xVg)EjaKgpm)pEj&%6E5uVi;Y`AeYtgeS^!$0X85;KiLgar%Dv-(6mk zMtk_q(m$|%L)-Z#53VC86MU1q3~5Rd1vT|u>*Dbl=GhNzYcJxjj7s+xc1pN@xJe+6 zeVo0)SGxX009)x~tR;$~o$xH(`Ze!Pv)wl>okpE)EXAVNeJ^qs<;W9aB~K5KlT@=P z(CrpN2od({NgLmk>?yxjRT7<c1w7{~VE&dVsxtwWK&xE0BTKbI z*`*%Pl;4XarHEVYPN!+(knvx((o}U)X6ux}{L-y8MJNdWN?HPi&m(C$t#ARV&JwxV znG#4AY%vAgp{R_TOUF7Pc}g)i+lo*!AQ#W7csbrP>aMGfsa0;@sMA}YT+pdV;=2c| z>+Ej^4&ZFdHezD+={2K*xqzuGU>6u9S9OX-`UM)oZhOB~M);a7r69?$y)!*a5mA%7 z|HY!QMCpf;V?EbUFXl>d+z$u8bAW|Qa7;0HpvA8-4cLOjsPVY9Ab_FE1j!*V6?TX@SKaqP zVY3>*S=5m$tjnMp$p>92(ia+|o06Ao>tgeCbE1^oa9QwFd^3sX2$zVE{vAf}O$W53RthmdY3e@tODNG8( zzFwey2fDCMFbZ~bEZd%jEcO%jz|;=SCk&+fRcii9hTlzk`A6?Fw;46wu5r{_{WJ-4 zyx0LM0dO~Mquv(t1PedIIxS~bc0RhsqHxm@ads0ksB2#o@)UN(HM|v7X5J-i&uCpt zn+x$6?ROP~xJdnAc77}HI<#-g#JylcwTW7lGFU>Oift+Nftmo%Bqj&aEri>-KA~ku z#u&tE+JkP`Ff2Y4egR!5a-GbeGg4;|o5T#l|JF;oKQ2_RUkoS^y^tB&(V|N+w_cue zBM`b0V39OqQq|Hx%!gf)NQC-ghyAOaI22(w*FLZc!R(V`$b&NZ+}E|O>Q;t5$3sWf zpFV@p!6BgL|J?|BMuQKziN<`uI3KkHrI{yg_Wkt7K3kfy8(3SX>PWGz8foxDbaTXq zImxP#r=AA485P6P*zDwbpoH%~wFz9Sn!F*iPa!Sc)a?GD;X@29bD#6k2X1y+f^6NB z3+KvfD8>j}EwWz`EvTWl_^fOMV(Y;?%p zly%%tANW0El!dsj1d=LSF8rtr21}zmU?HJ+#e*2oQjdDkso?yp-m+;H1auWB%K3Iy zge@V)(JRb(F*SonY6rc%Jb0`RoN@x;Ep$~qcPLR>MU?@fyq^bJY2LnxdHI7 z*nU_@kh|HiwQ=Ki3JftwbpMuW%6Qqa-lG*W}mOPnJ30D z@jr-IlEl1aE6;x6{*jJ-vkmf_VYj&(0sU8EdtM7h|BP-??ee_2#G{^HD1wn!6~Ru(HZNqMCXr z3SA$>)wFlNFzt{y5v_SW4vuKsNO-TlmkT0!@UocIBAzRI>t|Q~&P|n}Q?cmKVX{IrRRE&k(eUuf=r=sdVHcNj z(qA-}b`aZ2XGmc~NEUD435adTv)4u%d_zTD{Y}<6TVbeZHj6ez9=5%$iQWg0x|UU~ zEfL7>G=I07wgD0%&HGI=IfWeV5joP`WYUSLaU+Df@4}y}2B!;|)Y>M_X4T-6(6f{F$<*VBQ>k3Yn0+b}39Dt!5L6?8FKF29nWs z1-T;l&I&a{^UA#Lem$dBdiRFb3+S zjG$mzW;_{HYx~1I+g|-MhD<lF($>Ak~QvS2w? z;qlGoC%cS&I}(@`%Q!IAnlk9xrwc zGfL20tC?R#=VQTYsU$Qd0M6k+k8@sSCv0{1ceitd@kwqG7|(-buI~l4pdEdGMfBni z&Lx}@Y=+GolG+1*cxj?pb7IiU_e6GRg^f;RP*Uz(fP*kD`1pFrh7UQyi7|Djc(poD zcC4MDsP{Ey{NokCH6ddh-lz)IIk%A-d0&{cG}obstl#G&p7Obl6e8KWO(L{lgvT!S zc#bI^;aA-KHL^r~Z;KYw9d)M^=knc<8W1Fx0|xp;#Bu^ZOru;WHzh3;zz=++@$SXB zM03btsDRg050rY7q|wodqx(y{_Cm~8`^JB?_f*md{*Di=h_=^#CCAAxj6lznB*&d3 zSPNH827SMIfFhRx=D(mf|M$8)zOfUxOnZd5@UQ9_mfepv>itrSjv}>q`3QNx;$@)Z z7c7HQ&d>YHYuQmws5Z!elQKqBSv}PxCyd-Y3x?#B!~9k2Z#`%wtJAL{bS}2t$#9M; z>VB7-!8e(yn9ToX`W-=I!X@c+Li|;t0GRvLORq(M*2uX}YDYjJDs)#5#c1llW|S%@)dCdx++oQy*q6#ot@KB zC!x3*yQQX0&zPlZOP_P*7Prm~Q50DLa{P|N@|LAwTWo9<0%b2$B$TYRgh3d|du0}P z>Kvi~P1F)$b$OeHV-1hb-WT& zijSl{Mm0u6v)Yg2d-91$>M|gU)8%Dh3p$=B&!$nEybSx-ZG72M+BkKWj7yIahgA*u zDQC9#ZT+1&^17lL=4@XkJ*aJD_?KhhKf9U(|(3N6wiNqnNtCsK3Dfb;z@XX&J zq6*+!gc=k-TmpXHjnR>-{DbGN@;#pPQaw`+4f&DG!(9yZ+}qQZjldN;kre)bBgI)- zFooMD$r(`@ULde5uFbtPpgb`HL1Bgnhh6Q2C?LIhE_r))^d|Q-n^uME8S|o_JZ4jG z;~5j!(w&w6Uo-0z5vdw{FA-xv*xyo)Jwx+)On|boPr#P7rh?nI8(%diFpDh9A!7)v zp4=4fWQ-3&H%tL+Jj0o%VF`{9ir>)b(UmG;__3*r9NR>w#cL8bAYG?dC9 zTrHPZfC0G1a|mM4J}Av zEk5_~2@0E&RKb9GZ1$FBk!vOUq5X5Q~KF!29syvBQoQ9y1+`n5kI$In%ST{$iI zSaoUvpgJypre?rIAmDT`mEt2)!L7 zJ7BaA9ieePJ~>zs$!P|)H}rn94Q~@5#P9uU&4eSn z7dS@axV4pzAefuiOFjSN7;g~m$5u-)CEDW+-O8~7W6VjC>j?@Heef(iIIfGwEdg@x z76L*q|Nd#K9*hx9d-8x?uYY)q2X#oO404Edpwx-)+hzz!FO9P+p(Sp9239N}NtBT@ zSkG!)Xtv96PPKAAfpZ~Ep-2XprM6=$Rz`F?@4?8-tt#*D@}d3ZCGhLw&|Kc;k3)AD zX3C;SER*j=ZD)^~=Es}ZWho$+9IbPOox_hzH0Pj$-JdmdPfJIRw`VOEKrKkMGSiTd z_@@@sg;BEWr!=Eh?P9&JIuXj^Y8W zG=7FopoNEJV6N9*f7ki|hFvRw)spvoN(Zy2sF1zGgEE4E$P3vKf-%8#GU;w{1eDy}4 z6Tfb#IL}M$EFUybl{?Bc(TB7V@1Y5T`);ZTjfIkOmXfE3UbM&Ylc!HJGtezShoc6; zJ>8db)~-S8VTq~c8-;+@7!0oCZDFd;jBez#Bf)}x9dP5xj{ybHcdW^mqHpDhhzFYg z=|kUSl#)+U7me@vz#C+fK(z8@4f{}8|C=+^#X=h50DpA2y&JZlKAAg)0tL!pqXYw= zOl@2J4fZAHfpC84zd7L!v;40CUaqX+$VvJXl(G{iJDBdG5MpS86jk19`gT0N*IAKI znSheQhiiDr)K9d{T4}2Y0Do=X`)9> z2ka7QVrTjF(HjVemYiMfom#woReoauAM54JWM5k_6a(>a*x`tgylk<@lg z$BtR|D}&+fr5B}0_6C;M%f8;zOJI9l&hh8&&-ob>BU1L!?}9OHc+&xNC!H@62aVk& zMTwXiOf`ZLtSr9ygMg|(PqwF$ewIb9aGNeh_ZbM);!%$qRH`D>QgczN=tfwE9M~$lyB4D(?_Bz0xl-8=} z&2i0i0OS(4GccN=gw^xm67g1c=&5%5`uPXO(d!OSr-mrk9c(!3@?P6Lg{(ZinRHh$-YRs#CZHCX39rlbp9 zAp=fkJSKZ)2I)jLND#C-M?ZoD_}b(rcW@I$TK_griHeJ~BH^%rIFOmTPGeQpAoeVj zQR;h%!`mOMyBW^b$173iB3SF#mg|z6;4n$&xPft0RRRlAo9l4Rn!K}hU2p=Oc8glH zvVnd+;o`J1#C>zCuxB%tUI)3Wy_Jin5dqSx3C0ggUT_IIraUs^CQbl6K|1tV)|9}O zISTdznL-`EcV^4wYh`4jHtDf_HAt9TPvLcaIa;O*XPhGK*OmQW5AMz}*(#=Gm09?$X!g*r|$$*CE`Z zH?|-^LV#21*n=rnXA9}IW)49bDr@BhwnxzQ20^$KK%P8YEdcFhLV_ph@xyGO6S!%% zm>I|Qf}d2)b19v;hLKNMVoYf^YCR&980seFKTOweL8FPy(Fb7};FLsx{tC(5+0tps zgBS1T3x2l6C*qO{$nB%spO4y&L{j?`R#de7FMR~Vn+V?a1Z1~8oP3D->dToQ9c5G{ zUBtW8#M}d|Ph^cHKvQnh)8(KS?PV7RKUeDS>R6E^+<9M!=~>+R%8J!chh#eyZWor* zgTpoXbj9OTB2-!r)M0n+8-FW?n5C_lgpN;|3B8&*i%FWEx3G}=Kd=5L{c34`eG%c% z6ie)~>kKmbjSAEMzo5mx~BOfk*= zs*49pUdDId+v*GR?vC4Sp}52UI=~i!V&0KfgVL?VTRQYce5OiOi zyLwXl4dLTMvKrg@vt`55Qo)){GGx#`%jMk=%4lM{U#Lsm8`R{$%#5;Ij2;BZ){Spl zEyOd3L>7SYp;Qw}ZuhAVISFJnIo#X9G~AUXTZzpbze0~YS|}%`>~@+$%6_ag%Duab zE!8FyRqx4WFK}K_=uLpF)+G0jls04@BM zBFQl~qq1Qil2+bY;46x}VBLgzOPefwgmTdKuBsidLdCZU$L)_1;vG8x)(pRH32$(w z^H}anL6$vZDD_MV5fI|nQJex^J-blg)0&qD&OpBGbl2K4npeEVlA7#IZg2-N&G$Z- zM?^eC1R5-(sIN?2Z^0l_SwqZm17%?+RvSWisdhY!^m77H)9kcMfDki$9U(vqfrj*Y za({&S^Qv=gD(k^94!@9?*PTZF`+mLTJ(m^~yzJ|pL3d@ZWy;ri3LAEc3$&&4NxF!G zK1{o(VgbVd7yN_ibcH4I74|cVrDjJuV)T<@YS-!L^>+0c!sz@XUau1+@eoH~VJE@z z=^;`BcGgg{!WTx?OJU7R%5_kSq~qU3q{*2s^2XwXP4*QdBa7={r>v_uzSmaCqDyOk zr(^-`>?OO>qPT1Jv7`~rrN#SLB3llcQu4ZGyDg7DUxjMqIU9&InKKYx9ev6_5ttkP zz|<`gLLn%Bw22`j{wo+bnNMhMU0 zjv;*%gV|cgU?JZ%^vg|$I)+9Z2GW8gb+3I9Nsi14#hN3vW_N8_K``73LoM;{Ts(LpLrzHzs{B=$ z66tB59HB+E?b@*h8FI7i2x_kJ%=+EqRu1H=6dzotMr^$BZPim#7R0TXW4$;sdQ+L2OM)K&00GHCnj&m}009I0IhDh- zcA%+~#*VF|W{CI+LKS7v;pa%j%1K0(MEC5qBX4o(v@PY7wc?QAGO{ywkWVCzzxVN! z?LFFruDm%&4FattW1F_MAD;Aiz`06Cs8| z(OcXbAY!Ekv*niBC-wEM2mD7C@AFL41LV>RFEfHGkzuD{ALM%>Z*-cNFgZN&=FMYE zdO7YDf-S16T{6$JL{4&KyKd8iz^N&|=!na`hdAuC3S0y!YgQ%6z~(TNhc+36-_m0W zh|%AS|Be`-u?WMURQ}%j-w^&vXlV$Oe^G$Xu3h4gsz`15ikMw5xviusygr&S@p~Z@ zHW=A|b3{Uk-%NM0R$l?l!UekcQ=!Xl_`SSDQ@NdSx?+b1lQo+Z7i)M;@vg$@{`iDxd(BYW2H)St#O#*f)Sl5e!E?Vz6WqZ!UzlxKDH0!n(odz1?RO0Jv@;zh$}umDEIbD*wmw(dCT;;pYL2m?9m_qb~nu8PvPk3+nG@CcIJ;NZ7O@m;I;R)&~EuDfB#%?FmRt`vb{Vq@4h{A9sPLZ!&Wqe z`!37=x}nXNe#c)%==pl<3JMDO{-l(yh(f37b@1t&Pyhe`L_wZpY9S8+)k`FCvWnF^ zib~}B(pce2e;wqK+oKZIRE5S*Je2A*JrnYdM?5w?qa!JbEsBfv;OmQ9i8$7rfWI%@_+J++*NxH5P;yh zj;ISWd*kTMK7|An@nwL9{LxJR+L5P_vT8%GI6tqL02xsL002b+o`r5gp8^eT*=U3I z)}JES@_2Z_I(V28UDg$5r@7&HoL77?N~KBnNSp-eZvx*7Ev zAaaz$Qz6t6vr3aPAil6UN8CpzEcus00+$7cAU*LlTbM+@OUKn-Ufs#Gl3V%!9@hDxLK*KCCMbqUZ1YrOG09pZ_ zr*1-@0s7^6Il{uCOV?_Ax1Y-0AVzSVjmV&h+iqbBCg5mi`9AL6q#mzaBA>5%u}fxl zk(_3(N+iHoYwq=#O`o~lzj|yh#i;{QZ#|s~=>w(zAb0rR`T*H_8Q~^Z?q}d7!_9??Cz=RAXrqzL;NXv zEdT&eXyx9Z$(?^=%wl>izp5gA2F?7I4dYF^jAW01zhtVJZD>S)j5}k+e@w~a&<%4+ zs2CMPmZ~nVWvYD5Xph1t#g&5YdD|*0xq6$c z=-g99{+CWb!9qvQt`9{b2wY^`pYbymR>TH!o9X)Zs7a^~lQ;(xhZ^d?ls z!7040nu;3oVj9&Fa!$Hh2>!}v9LcuizU!Afc_D-})B3*SyrneA_#AAW;+V46ntk7u zFt$1vKis^-=uG{>18`!DOb=-gn9h_$_g#=nTs1cv_Keaq1lBLGn=uGy>dhwji_6*< zB=(*6D>3kc`$6&c)VWx#*5bt0M7mfp>b*&8`Y6nGpD@SvQ~+BT3D=GS>1UO-3w0D_ zJKzu!9f_uW$S<`F-9)J$M9J>+2HM4;fW>JoqPt^jy{oB-txuB>bij3RzGR8|sMXi1 z#sAnq|KsUS_L(I;fvbm+g1(XjTpd|UWhZF6FYE!xeu9WnDVc|qf}fpY8i;>~3}d{_ z4jA~oR5@eiQm@$T5Kcj-+8E$|4wV=nM2kqh?Bb+J{Z^Z)r)8lFyPlo z+22)pwm3aQE&u^xw;SS`fS?y;Dz0t;U#4GL-&Ylj8k~~s%(H6auaC;hH&NoH-xmHX zqp_eAstVcRc*QS05d^-^H#kq=jYT58d)P>ZaBz3dXJ&>o#E|DTGuHqB04M>T$!GugVMy!I`x`_6r>1O4aggMF(#(w^LoFXo+b9un?ME+Ji zOe~$9{^pTj`IS^6UtU7PB@Jy_Ro^uM`{&Og=W*2o4ZK3ciH>UPpg9CW!8LhsT6eht zH(zr}{ny+CPk}D|#4q@vV+ruz$BVV4Fyjh8gF{C=qG2kTA?|u-p2HMA6cDXE%C3tE zq}2k}gYgRMN+CWB0;jgbjmu~>496Gk{*n1JQz&jpF~84qsX0}8Xw_9l44bn}YIehm z1F6DZ(N#ntu4(9oz?ikFpq7FCO-h-bCM%}@`M?qD zqQ%KJcpr%Qm+)CULB+!j60d{kZ+WAIZqe;e{WG3MsFrL1PFeKH%&(gh-Z|1%E?^kV zk}{1RZrynG08=(^$=%Z+yqwF>OXYkk0^3^QJEZYzvX^&1le@51`2f5EiU+DBVKQ-n z<1c&@fc?-=_bNqJe1Ik6NjqEF5To;k4V|qQ;2 zo4^1WlQRzr{=qfqvF&0xqJ};PYIHIj;$_hbmzfD~T*rd>6YuF^PT|T!S)q@R;W@4{ zWe6o-oW$ZybOu>IKoV#~|Gyp0deR&!%%hS(LJf;LHTbrIL6d*sAP&>$u%B83PeP_& z(ZN9sX!Bs(L-OV#8|2bHW`eZydK4Z{tmo+s& z8#sm5&j{V&JZvx~3>QQF2BAKQzvtUdPpbM+cGG!BntoK74#c@$73OCXnnlV;@`1$$r@zG_kYPo%9tg&v3+;^;Ooxe&`T{C~q7#Ui9 z3&}ttgDY*bl~bg1m}Gj*8!sPeL6%H8g>d+}SWZtNoAH z>yo0$;VnFvgM$)Hzp~V$>5ctpxqsnUrb?-O#5}TfurwthpVJbfhIrQ1_y7O^g#n)H zZbF{{HrUTs@zyeORjdekYBX2?g5fSx0PN1U=E`_36|LU|wZ(2;O%>EC?m~6KrEEQ{ z6}Gfq|L`aBO)e!!SGs2qXY@HXL4#7LFWt|BP3N>_^?D9q*FFQ!CGm~#b z5JQ0}ju9+;xO?qw^0@A&1Ab)1v?*`^Wk3J`0^32F?mQv%Or`>h|ExHT$=1pxp^=tK zPDvW;fa%Ae{#r|!X$-39!9uWG1VTu(f?r7S`222l|K2)B)C0Cy)!P%Jwm_xFS7`>& zykj&MwtH=tYscrZUewJf{`)dAWk^wxOGDW{4Di(wAxzRv-p;~%{398o04I@*5o0)s zgrzhbAJ-VPpL--)Z0-NT_|8Fe*vxa{;kO_FL<+F71EG_Re5x!Tl#1zJE2{vw@ZvsK zR9LKBX7>y{mw>N+njry~1ZdQg@(+zD1p{e**p4pn%X&xd(cuvlA1GH>p17$^Ebjlg zvOfGPy5=@9r3G`BJ#F#Wk6YmDpc{G?C1d?ANsa>5Se#1Q$yCk66<+a>gyU z0hUQ$?|MVg4w>SJ&THOyezia)nr7B%>fv~cE1$0D+M(z`URNjoj7iwffrZmJQ6Uh( z$)3;N6uqa8g{)T!Ar*F#sn{PZ7#rrZJ**fNE%LhYxg7QuX?}X^vA+-S9Asf`pddJ^ zxyRi_;zZzbu(J$vXbq5+!7>|!C2vDv^ER61U!4;`)0pC*4 z_z?q?RMBD>lBfsOwE<>Kxuduoe(^Xfmfo>}%Xw{l3-AOx(+Bj`0K0Ggn&d>s!^n=v z$@JboTw6&Bfi1Q>r&EVAAH*pqWe>;J8ze%1?}zjOqZ#^Ewx|I+z!MvascIlC3wlXF z`Du*$pADGbPJ8H9OUSuupE^|``#6@vK>c}OC78}yrLArOCuE|H$G5!zfWyrUoO!!A z1}sv-`dXE1r14T9XK+@6((r(cpzOA~y0wCpn)`cW#sc^fSblxSLosiREQIVFeq2C?PHoL+>NL6IljaBqEg+Uz zr2lDUgz-hfU2zxL2l+DKI^dAr5EFmFon;hO?U<7%c85~t3l;=_XPnX8nCC6}pRj$A zsWTI_>r^z-BDu*roHyi+$@FHFjIodlthl3A!0D;Hk5ub=6Cn4+cU)vC`t--mY0JlVZ75sWf%1We2EXUvyOdzyC^|`jq8lmSC)R`Pd z?14NC9<^J6VdTG>>%uzo$s809t+Otvwl@F?v{LZpduFL-sOwS0&wR&GtC?i@Zy83dY+5JU$r8QyBlN%M@Jgkf9U zp`R{mpSdh51cTsZX-R{h>B|*n)`0tK=1|i&Z@Mm+vJ5yVHQ;s35m>eVx2XWx%@n6X z!P358nZLYSiQ(0P0Lv60lGud8XjpO)N6MA8XwjcqlF}IFieUofwxOy^`4A6P*|(tW zje~L6&!oGjq$w4%uIXrxx}B1~gdo7U{m}k&e=@5g1U zV&c{8vmz1m$n=_N(HEuZdR(%gyEq#zm==17PCZC;+umz&Z1C;}IXQBXhVC_;;|P$= zVlmUjU1&Z}DZo&~bf2FP=%VYgJOIFiwUIfdW>f~wcMc&nz zA>7E718WtQF9$*)$LQ7POM>wnZN&iR7e;Cmr-i-6imh@i@l{7hwR%B*23?zB_yh%> zZZjw+8Xp|!(Q0TcN$~@&_khaC;a=LzrkMT5Pk7sC_gb%&*RnD0*I`3h~QpwCVXD6;L+KgY-Fb$hX_i*o6vO5@S2 z;~klo{Aydz$OvuO`ffR{me`AIw&s1wEQFOdC=%B;NmB#V;+U|H_)JYks?_X;=*HuS z)ILHk>$T}l0g*3*z}zegdYWT1bXerK^P7qm)c-D@5WC`wGZ9Dw!XEH&7duVM0001K z0iQ2!LZ1OHwVX$_y$`XAZa!2hNtLKV`h)xUL1j;eL3vp9WCOg*q!7mC{zdx!$1Q|K zb(HH0J8)ZG+{TX8+);a%A%zcIScuq@a6?eb5jmC$U|5qrG0p^^jZ|5ndxFQBa3}zs zfB*mk3_+VRJR$iqm#DpxrI6}+v*AwPYktr2=Q}CwMq&mPC?#C&|llbFbD-V=a=S4 z@f^m|j3HU5D3qsgoi|>}8?>L6kW`_(nVbBW#7z?JHTh;e=!*i(f2bu%=`Xm*X3y=J4DR+06cwN9_mQK5=-vv~%jZAxYVf-MIH zgw6+Rdg|_%f>NoSfEMh1c`oYX+BW?jc6UbGga__5?~X~p3L!=k^au+?{eu%@ynZpr zW=A-U*9$b^R#^oF{)y1Kh{Ef#nOkL@QiBc@{VlLzEQsmT6wjHa?xR2#>m|08U=NF( zI!wNIq&HAkGIbisG$X-Ewqt6KhJ!Rikx1@zfuZ?bZiwFjd7h{b=X=lt+eM+UUknz* z8t1i|p#_26?2Me#mF)f_>8`DSi?S&3NN{U&zxlPwbc+WlRPSjhw~!LiZLu)IBu&@= z-ZQof%rvltNjuz)6A%{0x^DK``%eyEJ~CQDqAi)?-;)OQ2uTlJygRiL5?eIrv>(pn zK2jKQ!oewNF_zx2E$C^KDFhpvL_zMcD1Od$pf>~;LL%k?)N1~6R!E1bv*fnmi0*fQ znwS%ljCHRq5LpXFHQpYO4pvJrScGZrD&s`^Z05>kh#8pwj78htTLqC$lpQv9bdS9O zY&Am4$waT_1otZh6b|;VERwF6MVqr<_~;d-$Mpwf5wa9zGAQP`#5KO7P%7^el${v| z-aa@E0E;n}!V-`WX2p~u^6Su43Hqn*)ZWD<-@X^s+Xb$tWe`yv5Uh1>o@K^LbQeZ( zH&mIAJN3pINDI7A!QC@_&X_DtZa{}0~M z*#e|E{r~^~R{@_7`%wnst2SSw%=z!J&VvxM8DoJB8-pd>PMh% zs0wb0prl4bXPK^dnNST`(CmEgb^zt|00>y^(4~;@#~~R|-vC<(I@q`G@sszm+O{j` znRAFW3aA(IL;&NLF(kzT+yXj5H}{}n{Bc^(jfbG>+1W0*yILxmP9VVM zhI*d|+*5C^y{>a&2rYpD`bI7#4#T~R0=m=C-Q$BFEJoIFvLqam0)Q_wj@8hK1+hXE zwO}5h%nXA|6u=dPv0_fYW));Jr%OWrOB-vFj$LfiZ6dyjOeD{}rcc9eAOeVj^%@+E z+^{gUx3GD^$w&wXzlA5vh<+q8#5ox<-j)}@5v4F%)OnETJmQl z<(1e(PxX>zUy%`?CMucg&=ph>Q%3Rub{J?$Z>cf{_9w95Pf+_U*KLMbrzHn9R~1`9 zv14eMa_~q4fizV$P7P1+(gp=VgqRM|4ffey9upX|e>HMm4MB;xscYrKP^w^BshfpjcaxQ%sT2bnTAX)3#deN zwRp5@J1dowJF>bZ;4u~mIqkM>$rPN6o9l9%Ojh{1qm-W9s?=30IBq9WV8P>q6Pq1#wi#=lL# zhdB4K&>&(DHpx_DAQ{dOZ_}H>f&W6dNJ2|&8D&A2vJJ#wMHvBQ>2ByE9fGN{(#w&6_LOs_c7Tzy>Z!AMt<;#iawm7FjXjJ@RtUt{0FtuYc>^?>vCbQJzUB zM}BXVTf)M8be<9cBH-^!s$phVt|_)MJyhYZ))=Htn{j{JBNH51K%YKqPoDn*sGyr; zjW}31`V%tLiUXyaD#yFWjWS`6tAAB4LJNqgatR5s;lF?Qq5*knTb( zv8*tz{CRjdj0gR0-%=jSfD1GCxHh1;LGB}XF?u!N`WS1Pmo>{C;G&RG(Z4YHoY1ht z3v05~_)2u%=B`m@+7)nd2Ha()P~aLGSnKhWesX&+4Gi{Uifpd3-YIP}03mDbjJ?rF z4@~iQb(1;2I1|B(D6QswxNFEG9+79(K1YIXW}{cO zXd6#8CgoyA)2;7AN8105%}H$w@V~X%#kPT8HO9-kDpE$@c@#@)H$5~R%5LwrA>(kg z3T^E<4lE_`_W7E=>2s%1PZ`WXZ<6-kW$JRH&c?3{Gh^fpC zAX__bg0DkaUXJow>CMS`k^!Tyah}NTc<^Fa)Xjm$+&8Xt;TE^P{e>|7GdlUpI?^F7 zhWo>1z{>}4Cd>L?G8_WOL_#3*Uh-JILwRZ{ugWMXbS;5sZ$@8K_)taWx66HVwMBlAU(B*K^B z1SjbkuS z9BOZ`EsOiI2u3i5tym8AbG#Uuqo8lF*JI^yK_Qs`d>Q&fzKUNMsP3H1%AvAvhYsE4 z9n!j+WllFOI8Gj+d@kBhD4+P4SA+m>ac|RlK33c$$Gn+b{1fcB&n?t7>WfOD&3{y1 zPSBeJW}Rl158QKYn}2J-*J_I>Mfme^rg83ZxddKWIsDO2h-kaX!y%EbsicnB>?4-A zx=^GAdcR+c?4Sy6y?bXUG~Q%)3ij-fkV2nysI001ZW$dY@5r7@F#b-07RK4~O2g6% zCnj^b7=GNL(m+A=O`D`ScrHK!c6gc;d-NnZ(5*I58Z_pf`_TxFSkXmIKjjyYzUWaD z_(yEpPE=`-ROkuctfjJP>T}|jYwzCfrmIvf?-cOdu9g*HbFRw!C935`2ocGpP!35x z!1w16LYVgKvGY=VP&-h74^eVLBw7!ECP1RmJ*KdCm7%IT+@#gY{&#LcxkB8lF$~Oj zHJ`!pd@;qHRoeM^;DI@$LLM*N-e}0VKOLi)j?op}MV_Mo#;N9mY`FAOXy2lr9d^;; z{JE-Co@hb9M+{y#Y`?e{8VHS@lZ8&|f;&_#_oup5zT{4VaZONvu`{?ubkwiSKtskA zDJ2v-JU}{xAUwZ= zZjJY3S17FnkZ$CZBjITM$@H%>Jsk`P5( zwTk!dbh=gqi+|$Tb#>h87=ocxb@Vpoz|_uY_c?1H9^ZWsf_-Y)UHET(AU!F->I7^0 zU?Y*!72A;fp8%{tQ@`)49~phHLH$QB3jsx0(UrVqsiM)i^>O=D(B*lsea!A|>EiP` z;W}h4h!8Lao-8LdilO8^b~0kFHb5^_YO`7N+(K0q+^Qt$kvpHdr}clM&RzeIxBP0V z{1x>0+(Zaj4Yd^Bx%6#{HFch1E?zuDL5%2WAER-PL;txK(DF1%gO&<(@~9~{KATF_ ztYh?6#u0UFv~}FMF{A1MACdyJFtn4|&ISfIdbBpcY3y_w60$|WQlqIGHcdBT0Wz3LJd4~g zNpgcKp+lrqF;FNBIiHM)!Mg+;L_Es#_GU85!i*boRlG$HOPF-{My!&U^ z>Q)(s$#aD7eexE`p+iK9q}s?qcGb4Sh} zsbw^pK=F1;jfQeXf$QVks#~v1UOukEB*TBX`z$sJ4&>^MwI7OzIYh2|S#!TQ0rkC|x@PV8)znVlEX0Yr3i+s$-(CpLOAme!RYT3IVWrH1P+ zI9?gND`7&BGP@R6y-F+9xJuzKzo%4c8~_LsR&Ze;_EVl0YSb^(TY%iBKeDG*i6mZH zr>eQrYjmZvrNGq^l59K(Gx-)U;e3-Se+&RbZQ=P18g+9Y*Y@fpSTdC*I6c~>vd2Vn!wuo^$?H(ni__HhT*i6HQkyGRUZfSlR3aYMy^^wAJ;lBy$s8w5hAhnv zC4HR_%`{v)L;l2-9YmKeJ8Q|i>|6W3qIqzH)|Z73on76u0;~U zSNKdq*S_imj!m8)oY{AGJN`$T0srcV|C5!?P)#1@ZT^gwlv#cYDV zow#q>s%0cT24|`))#qKT;tDFX8Qn_`O+oFT6UXG=T+#Yw(UHl7Y9r$P>a{2i*3OnA zOj*+!uNZr`A1L_^Lt`7f~qY#0uCEuq(fIIp^T z8SKKjfjvL0TlGP_U~rebZvb@Gh;h?Aj;wxEFwMut63wlk039&^00f9Zo4Pz9`7)Ra zCcj)o{=`q$CjbQNX0$1>?;JvljZ?DflD^a4FajEixnQJ2tbOWui(jZO2|~FeQYaG5 zuhg@IvVFgPc~%kq^9K2*aChP|lN)n_iOqb_IUwTU-CGwGdZ^GDWqd021{M5faf?`{ zRgPtl={YN?!6#X6dYi#G8Lydi5mKAF(?UEYL*6 zBW?8wJ5z5!ks6e|crpy5+&UHy%`N`^01&T`SwSx8bj`>Y4J4#i14!$7v<`tl=aTl{ zzo?%$;>+jF3VpfIrNpr8zj;^Lt0pGM)HNS^Ma!Ekp>WBzif9g! zR(SVrZ&4D^4I*-&a%Q<7k*bJ*6Kc2csocua!JH0ZHq`AMpd|kqd=9Lqu~HOiF!t_F z8hW|8y&Y!hl1*7peh!$odHw$rQ*IG%YdZ)N@tF?X7f6T@L3|J|qs?O$x27Ra;xH7X zQ=Ss4WW=4>6FfB^My)Qk93@`@&fL@VLo%0{d4C;pGhoOi&*vll-oICK(##LNsfjPuiJcp42UO6iz z*!@)cIM>-Fqc9D93(4)Ux{zt98dWB+W~+eMqcKqVU7b$$ya^-U5r~3ra2w;Wb1fO6 zox=bs3DCkrH}z+7nRzF+ap9C15UGl7W-*>D?x!RGMNg7D!IxvhzYo8;z@0yBy9yd| z3+f6Z2K(2*iX$!l6UH4h1q`5g2D9T-$KA}*6Hdy{2qBWgUkGdB1?nZP_s_=jPNC%4 zdY4}OeBhi7piQa?`Q2ZLoX$FnrR9w{j%?NrtmQNBib;0noIofsifQxOE)Ya(A;9Sv z#6J4%mJhJdDI&C$@M;<}#fkgxlERj}vvbJd?+@KvHVnb?6nVhK2@HP7S^1ALUYBDPY);r#C z00#8{00M78o7_Ah`7)RaCcgmF##C(0$0j{)i83UZS{di;X7+0yzDy<%MjC6;pVkl3 z-oVn4U#qCcbVdRMO#nnW>dCI(tp(#xRR^Y>Beny+v~i3C8Z*ZfdfiWMaIY%=!~BP! zbY{HLCC&iA+sDcfPGO;q#ca5BUmW6>&Brv6795incg}@lS1iA8V@|KVr#bZk!y*+b zgb0DzJCANRmbwVILh7K{atbki`%{oYoN^CD`N+K-3GjIvoFtxcE|YCj9-51sy@!qy zA$2}pTUa_2!8uhBZRkH`!k?O$zrmXAkFb^U`TkG~+Tlsl;~_adA1{GV@>b?rJm@}* zuE_1MvOAm|r-Py>pc*DOMp&8G*p6oAnmbF8yckf#Y!FOU%v9NPO*=VljW~vo(YNn} z;yB1n!)M>AO_F%AGj+s`=^G9MHwu*aqkF$;$SpT!1%Sn#BLP)$wkvn=w3l|<4NTuU zwRwGwF6qYbCH2+=7BTXn{o4{b?npH@3+fatO`*j7;r4qz#&e{DkX~1eS z)kCV*#ex3j*(}`aWoqSWuosPI#pmySln9P2F2RVab}C_;UuudFhwk zv+=F{zEDaxvttPf3c5h_cInozIU}ID;qd$!coX|ho_BUJpA1Q+Z=xhgstL4p z{zzGbH;9D0~hSK!9wR>>R8c_*WIc=g94+kxc{N4 zE~#u<>f=u0jea-%m$A2++;&VPY?XKSY-4GOmvAdsd28oqPKz$K8;kFgIpSj^}^rbj*e4g0K6LxYLkL#xrOmh-J_A3yHt7 z+1KqzlkD~d9NQHm-!~}#q&uc@Mm(YK?2J^Ij}?2Ob+Q-r*|(^Q4C+^r{FQ@urOnA^ z$>l7OxV63aBV+(xU#txX0EK15VCYip8y?g6(Y<8~qbfX}s9t2v-6sm%{?_}8@e{{{ zB>z83X*xIiN)qlG3V8MHOV*D!YRo)Lfo&cp zIVnQe<51~Y13|}JYkvkWVgC*{N(_y36=CL_rA*xCUE_)(HkCc`9jcmsXQTc$DjCMj zY?!<&6voBLB1=f~i^Hy&BJlX8ax>`~p(YN=#X;>|H?(vg<(is}V@Z5l; z6sa&&E%6Uh5#x3JeN>1=u!@@qrH4q_DoXpHKTQO>(I4O*hqW|&Kf7(MZ=i~hAfS@+ z{fh8MP~6ZOo-vS3@D!O6^|io@(-H3BCf$?M`{A}=)*uQ+uSbT|iG=reQ#Vh(WV+{L zYywojyb!*6yoZkiN2Z(2X`u2ez9WNBDz6t&M1!A&mFd*0NA*f=3uKUMqmp<0>;Z&t z@RslNTZdby@04T(FdFdeEDMcFDK6FZb_1`IHFrm62&;Ylbc`=iZm(XUa!n2QEc{`bH5FFd7St{N zFC#@TE`^pmczys+&cqFMIS!*2*ZyZ2f@MO43ll6Vc*xC}7H6)5P~P?FGmo5M;Q%&5 zGuaFEERx9vkYkJJP5#w)*;*v6*YMOn#!FHQdc0w}XGv!L_fqv&8054G#VBc%n%`4? zOVfw!zJDLqA|Hd|#HEWpHCLf}9CRkbqu?hD?2suT$UUVC<(Lps*M!2yZIO-|{V+2} z6Gv$+nQm1U$8H-@q6}5i~C?jPt%f*O@{Jwd^21zJ1VwS|^zh$WZV#xnY!N|`^={2kYu?cSw`y=-Qe9R=k^{NajtE9)zgsjjdu!w zt|gb$0UtWk{E4*f`M%K4+EyJZ%p)omO~M>(WA4M!&RqPPrT+X~Z}*Kp{Z(oZLmGcGtyG zV^286nezkaz!LjxtYX#En1Xhp)?8EMjLpWBUu{g|$HPe(0H8~A`T5SOYjEP7>UkwL z1fQOmNTA3+Cfg$s;EcE?BxrsrNL})PM(G_nP zC|Y;=S$n9x8U~+Na(h7PoO6L`1M#>MgiUH~CEvl2Zo!LFnl`r|f;hdbe75{S#42+g zPhY<`#QOj`FUF6nFYBzGx?$KWD%@dYO)rFScx4afx%*9XoZ4*0#ss$bLbJ+p(qcRH zw=xW$oHzMEWQM_Ng!^R;@Qk@!7JixSZ`{2ufgr%w?8sgS^@4oTC=bbdX00JJfs6WT zAE3%))4@IX;uy+N2lqE^oo57!-csB=lzocL z>H|ih03f*eCRu7dKI38eK7%m9`XU~|7b6q*GUG2MPdsHHzhwe6f|DeF za*vlFNVkoOXoY2ZE+g|JZRLD<|B4fM=fKef1*wu}SD;-W_w3fW5DgGOR2ZjOqf8A- zpM)k?wn*!;RKG|8`9))7yQP_dYpgobR}{Bg7pROe{bgBR*s_q%S}~uR<=4bA?x-w$ z$0miKNpK!rke=65b_jSLwfxFU9&4}~rn5){bCYVZv!ODY4uK^)N3`n^3squ7Dgz`O zNZR3~k!vO4A ztg2kVqltJ2tx#`z*PLML)42PQ4VmLHaZRggXuznk45izKf8>)xUIpH;NpbY!R(veQ z6_mmki%f8)``8b>K$A^oh#+ZA+_N}z1poz+ftct-%JtPD3Mts_yxr&6;+af1sdL)p z@M3bZpS%Qy4j7Gh8fYw9*}6crs>)=P_Y8;>0%OzCy6nXc80h-BKPc=tZhtDl zcgQOZzG)AvY4O0On@@_i{S{4q1F_7o?-g}3wue0# z5|Msp*wY^$a(xTk655c)5^_VK0vU8R2xoS6G-p?QKEWd4_H`JN>hrO>iu4INRty#Y z17ljbrDgL(1izuAVkhuJ_X%vpH6K4skl<9@67;OSp`WXG4;psxALUwLCk1=-9Hh;g zS&`O}{^J{!xOon$&Jv~!q^=FY9c=s!nb6-co!&@Q*ANyU!f7z%7jk|*{%sBC>p z7#&^ZE2F{Hs;*yOMJ#d^fqX1n&-qSFdU5R&ub0o zs$)ynBa|me#94lr0004nL7pIFlvGS1{{aCGnQ|h~>vKv{+r^Vt>Nk`KCW;%%i7Th)t0PjeQwwQ7a$p~9rx$#(fVY}N8i%&`fFNhuykzeZaVdg=8^u*jD$-B7`fCJG}{ww6mrlP2FMcn;}mH12@)XGdloPKX6_*Mq{H(OG2uk zvG=tsIB}TB|{Xe}81q33e5;!ys#c;O`fr7_YT8MJzXPJ1Lo{954eTg&$jkKbategJ_ zyXdv9V9cg=2!gH4uU2(!eX3$Zct%TDt?SI2)L{|9)7S?5@yLAo9vvQ_%=cVa7oUXN(gZP5k_i&}p z4`)=`eAK9Y!MCMnAOTO%4|71jk13}eGLX;$*!_<^Gjp-85<^R!cXU&IAfC9$)Ap`K zNk~2$a#(bP9x%Dt9O_8!7gqlh;17r2zlXNJbLHRr3%~#d^Z)<=XaSx+JLt`P zWgRB%9E#ElPB))A>ZE#VNE$YHS53*}X7E50xd7nyc-?OG-C_U(KL7v+n?agIH3%(a z%3vSM=|m6!12)zIrbwqvjLx5q!>&1)Q$ibflDL4fnV1RdT3pUrN8z3_kHsJ&5#zBwo8*V|kh8muO37NT@CLZO$o)6JFwH~wTV!^pboL&Nr`%9paGpGPw=}122-2EdZ(ldb zZg*Xk4+l0p6c`o-%h%-~=8w!n=*^kkUlYW>EayLi%2NkQCfEm*Hq~lAa$r)nX7Q&cwvLJ2@ z9tYv_!Qc zcTMxM$)xm&_%g~AVk!9bE3TCOE$w9z7ZI?sJ=|G|a&h&PZwIRhmTYT}9?*ex~0zg2|ZZGl7QBoQT{DO2m?C9IDmy zVdyDVnIR(PEizF#aTE0a#l^H-)N_Hs(5*V_C6SthT|*pus!YQC~&Fa*yHzH5CZGYfRYML!a!`QVu)qmT2`=Uia)xEMd| z9WOiiJWQ}zb`S7S+96|%NcR%yRyfWd2`LLOD;t$cRf3WA)c9^ggmwUcG0?Ipp zqxh&A=P3fG_vzEAhIan|`mLfe?-Ad6@GE3;Zk?0}RpH{$3m|aXVfQ|6nfZVNvB|>E zEf9a{No+z6;FA+nKJ~6Be4eTJE~F4MJUw!z#un(=%0zpaQHT8>J2B>8HKb3Rs)Atc zL#IAoL7|0hE(AgYfF7D>w$b@t$@39$z^pAz| zmRDvMBTZT<+|JVHVZcu(#*~>Cqs-j(5xE|dgGmqb96GZ^=1(;>0b;qL&-_qX1?my- zk0CCXGnl(hx6tnm-Jag(oCc_Lc6nlfG`8Fj++z0$U0h-GI}sR=(l9J#(Ryop zNVg)FHag0LEv`jRs9TbAxxKbaUAN#DMrPm8i6Vwy8wIp&T=Rs(@`UQj18NET)FOVa z^Yq@kPcixlCbfL}2n@w6JyocTtx@BUfN-l!NkeN=I4R-9f&_7z*T&tXxKbZkc-THL zH`G_%YaT7w@%WQu_og&S_e~t3INh13B&*xuz#na^dg!QUt|sE5hVpW)cTg`#(H|90m;s$@p|qe0g<3#*MEdx2m-K8JO|0$ifJ+f zV*T+7VxHFOQ$30vuWwCi&{f{z`w>;NN=)9yg8T?xR>P*qQc9ZQ1d801#5hW(q#n-_ z0k>7pzgGg1CGnr{QQ$onm*N%=MZ_s(INcU@hY{3{B)>=zuiXp@PGRG)dEI$-r$ zf3I?$$id%{Tmeb=f9jZjw`r#*jV8nglnMLu41fCX#XHZc8paP+G5Y#PxCd!#(-%s= z{;9h&5bhj>OD$^yRjvHrXNk3XW;MZgsk)0frDG$$EqZjk7opCn75xFav~)1&u*cSaZxPmGg9GURz-~hN^nsgAGMf%XEi+iZuuLb z0jp-gktbuVMZPFpIm2c7#^xSnLkG*f!k*fdb+ePObglnGTr4vanx<3#LU-fiYgDI|3r z=DrqxM}*`w_$Bd9XN{t+D2Vy&=0SK!r2|BFHfg6WXrb)$+5t~KovW<~mC{qFl>!-+ zgqFbm_7ZehTBoN?J>gPX!BFat;zf1`wC%D~!f)R|0001*L7rn|lvGS5{{xeJgg}t0GLTS?7b-gu51xFKbf-lB5R-9Kdmer7Xn+>4L-eD4BwB!<`%c$;GHMT2gA^odk7nsjA zg9MJdUA_c-6HU^N5RWmX4>+6nPg=dp)M$`qZ8u&xqfs*^kvQnnoX81O>?yUQi9{~cg$uPL5QEXpWXbZ1$HiHO*D zdExrLB3%8=4+S0@gB!lJ#ma-0YfhCR|1euuf>I-ezqYVyZ#5;rxN7d zqhOba!d3@<(bAt5G2R4I!7um~IIay}fHXxriqdAF;+m1EFpf8^>CG$PEcUqj7gH@^ zHH>IiBbZ(7w_H(}CEEc1SBU4JI-vjn4d_9dhcyT-sWO-ffBynir=EKz!7E|yc~7P8y6ErA36)e%_@HPmT`TqI z(!>CQ-MTPfq$5KCCki-kcm7F}1r*!ccoz4j-VO=7VI<$xq>Zf3mV&%OR+Uq$d`!E{ z@NA2*Y{-zxU;u+4`RhslPX&5n9Mm@Six_=>C7#Eo}=RQ9Z+PR_PZBDWZx9iVoycPauj+ zl-xB@O&lir1xUCuYsGon0n(DzAcLheRMU*G(5!;l^?9qSt`UqFey||hu7Mh@ee!pq zda=iCv2$KMY9`+m(1roDOA^X?9$w#8bImca=fN#+wWc*F_3<%FG7EXv+zqDAfGr)`Zcjy7>{2xC?vC?fp19rAs? zV#IM;Iryxh_nczRb0ACu`W^x5!yK}?>&!XhZ_o6vD~4W){uhx`#(E!9h~~-5*(~%y_U@O z^2zbY*iDqz7q0U7Y~LZ&W(K~xH;9ACD=k=w3Jfi5S4+?v_1X(8O=Pb=W)S&5M(}Iw zj(9Kp(dfLKw~=m^b}wRCnrec~&&;fD>^l!7ALZt5<|(pLpQi;=-U1a0SWsU0H+`P8kOn`QZ&w26YgteMiip^HNiXY z9gF`nM*69r$z#IprwmPtwD@Lk!gw2WqU=WJY1nquaUw?eS7->Z!p7)7;w6Y|H*g8s zo!oWju@RX$Mt+9Dv>V049`)8EomYdk6?TuB-I?d?k4z8M5Edt9bcX78)j#ziM&s&D zqRIwmXH_VgQEc^a#s{7>E75#{cucut)&igga0uU@#~GR%`ny z@Xwaa%%GOa5{bd=OLDr3slwr{DP>siNn~U%d`75R&+6$+Xv$p4Z;{lfzx|=;W6J_K zzG;BaZ2U`+Ln2#^&uUw4P#t~W8Ur{E&fPH4qNIfbcmiiv$3ipRHQE>0t0DE+dJPz+ z4_8c%%p8sAER$v@I1zP8ADztu{ddMNFH2_ZfK(N+vCE-cV*{T}e%U}$|KF77GI*(6LH7?ec+j)yDjF^S?9Y3)rW`S`!6s?paQk@YLIu^QUJ1d z*GM;Gv6v3^zyeYfX{L^!*{%-y%_=2Q09bmsJvFq6lD{`6N@T0FZu7&4=`MWcqDo#s z8zu{KR~4mMFiRRX+^z9xsKs5W=9QZPGiYs8H7Sji`4x2G$U)uz?zoy`chqOVjj8FT zlmJxK)RolE`&uUaStKT_4CnsdEqYJL8K`F3R2!yCb|RFce1~KoRh8 zIM16o3W&Q7& zsA6}RbkY}UT1qu;U^$r*;r!R{6o5w9S8PUy@i(v4-%}j|ZT0?lME*>B_<}N^MR4uJ z6-KpSUcEnO(_D4BiNcDvWuUCl^G%<5o;lcOZ}cI?T~=><a0WH2O;g_y-9rp zyO$K5=fQWHdgRjDjU!usE!)#8I{)wwi;!Jd| zw(qpnm;tIyeI-lb#?4$7lBq)jJKpJGlf-Z6O=0rC8TbkQwMDrLcU=BmadDb;B;F<% zV3ehV38zdt3C(_6W{b-J%4)`PnfFI@98rgCT?*=;fgc6Hz{xS!+P8iG>k$XFtWxB4n|Y{Zjn&FPp8V5t8^@i`;D!lq*N~!?`Yj70^YyDvGubbf zqB~DC^SHmF?%E#$9Z(*{I<0$i#%z^F=Vp*+Te;lh=NXz0c!VV>VOCE+NM;>6mo#q| z%?hoj-El^gTAkGz8I0ECDE@38?Ky^g&EWPUdna~r(LcNi%mF;5cB^L0TNwbHpZFp9 z4|PvG&l3|7I~_D)=Yf`AV>W@noHBW|UF#bX{i)I?fdJyVl!U=`s)`UyY+ zFdP_l$|M;)KL{x{j0Fsi1bvam-gxCEw*WJwJlfv-J!F5%e<1xs@P3_9U~IqYg+#2W z45`&!<%Y&ojpe%B2+6ht2>54`c(+aZ_hcqFwyR8PmHkB;{$Cdp&M!R0Wp7Z#TD>e4 zbX4A6#kD^TS8;gWm}`+lod97Uh-oGkUWzs<{rG-ykqBvo((Ot)%O+7Hh@&tj&iUt!gazS`S;>N4hs% z)rv1WyDW;*{JM*kR&*a0Mus%5?#`hPJHu;Hl4Zo(7J6)bNxFV1Q`FQbmcPBSbdIp@ zO`FnFL9(j*Lg+=%k<4|I6}MHsU%g)^xF(9JvKZX9G!T#(-GS^Jky8|>%}H(Y1zlZW zvwwTDM}RUq6bq;d;a|SU1S9WXxQW!a9bI;Iy^@egra|5LhGyEqKd>Y z+i3DeLniU(zf#O@|8h;hr*#6dBuIFl96PIX261X`WSK&iE((lmk!wO+Acjn=Lp_Nv zXm%W#U-SGR2g2bz{zlzbHExgECo6B)ynJCd;r}M4#_X@iGU|08A?d(@_Z_hnbU4# z8pTmAjgA%+JM+Y5!%XPk1m})oy_$ucB{HorBWKd;BM&JK3zIrda&l7ge%a?XY+{^d zeUnwE6bnZ2}!2AC_ir+E7ECX5sy-r3?&u|pS{j_<=$FjYCY)#l_ffK$7 zI=f0zWenB7sjxJvzUQKtDvwtYC(M9MdRv<=idaaSUDuz{l>2jIeU32DZt)q0Eubdo z2YqdYDAH^jyH(&MY8AVKA5GDuO;+52x= zUw%$Hx{Fv(Tig;I0!sgg2>leoIdsjjaYz-zG)tc{{I|GbOL8Hh^G>VQGi{71A?F5t zZdeBbj5ZK=wo*+Dw_b6zQqGzxFR(ca>+`d6V$n8MO3BDE&1mKkLL-@d@n4eLIn&=! zAZTDhRLoOBwyDY=+;HiggsNXm<6ArfvVsI;AOZgX01Bx=ny5S>OJvGmA1sxyY-c5X zIh_3GnhHMCk{q-&Om21NA!&Ge1Xh#Iq~9ZKd056)!Z&|BSVtG1+f*)(>y_7ltRp{vp+nM!AH~`rI)Q)SQW*m~ZbYC?m=ibl-su zNt`)lJAy=1CYS&KZ}mDKd4^K()P|v$6C&BZ{tO{~hdX$A2X~W63g|ao zM{hUrT~b!}0;_e}IqM)CIfI0!D7CONQSton|2AeA3NT&%nr4@Q$Y(0pRe<)oUbsVA zFn%0zr>OsK6-&0FOc_FA_{)je-A2pXvjI!t4%1@*(`t8Fa=Hj=r68_QhhAlVF#Fjg zpOU;lh{FaZ273J~EuJnLi;GMBhs8IXlvf+qg8b-CuPVjPBSS6wDZd85tZmV4M_x=t zN6`FHjHl)57m?S@2Q3=TAEIcs5i@Hv`YLvihZd zw>`a*IQ5aqizUnnEIKP3{=d0f07uRpW{hg^xL-l+ofg20))dsp@I(MVLBRV-2y$); zdD-UI`w0z>(q%by$<$!5^ha^C^1!D$SD}6|Pgl#$wY~?eDl~ZVx>b}vt4ghsU-DyU z1o4_qyT52IfeO(e2aUGw-B~~eP$wT-s&^qpSVSEpto(|-30QIY6lNa7&8%&sUg{Wd zvrSb&2`J7YrYd5V!i9LNKQ1>BZXcVadW)gJ2Lx!;JJsjfS5d{heEgcu%?>mQqO=wQ{2rH>c};+f0!!N{wR2=!!<0N3X464+xE_Bpl1gu0ckGqCH|G zy=M!_=1%Zv6cI{Yh1mT1&bwu=bYznG`QvjzN|Kx1j`H1!o@tpX@Co|{^nZVd3**{D zSTM2D9t7qZiHg9{eV|@rN@q8Z1-{jhPfBb$h5xy+GZ8%u%B5DY`Jv=UVCI8WheGgl^ zeIt_LIWSQrW;Nchq*gFHU2!k>I*bHTZKfCLL%2d)UqOXMqI0qUEnrafC)9VBn7y~? zJ~wbD5S3RN?~D%G<_f4XBjA`x_@*NBt6Fi&wp_M5GL-N(kgv=HZs*mIg^gjLkqY!< zn`6BOuO7R=sV*jt+0ByF?Lznr)Y$d1JkcU%pJgGBMNz0ay;w$x9pEob;nzz6U5&;N zMO$q0iChY$Wn3K^8aZ-JZcPL-5H?d|FO&+8(dSF(sUFUPqdpMpX#U|j`Eq3CM?K>R zu(f0Vu(eZmB42UmgmsCDRqy`5sqXz+0X@qBJh_Bn^<0+W2CFfm<9zu<3%nT3Vp)6F z!LXbcyEM2zHe3Ua&1kh}=!Q|d#Yq&xW2fb{y>C^yP>%;AR*7wrlxLFSjV2n*WaZ3b zxZCCY3{h!ZSs7QPSssqpDG2*em%LP`Y%0OuLaSpR3c(bQ&lo~KXjPgG>uot;`c zv`Y_E?mcPT%lq&pEQ%n7#OPi*fDc1En^eTCYuF;706M<+9;I2w&Ta9KwHHmkL%(kw zB=81}&^$?xkGGjbSn??SY@LFXyg$Eqhm{3UKp2FFFy=h$UFWlj9}G$`3b~xvunHfQ z_M;v&2aNl{x#aiMBdku@LQO(HaBnzjnZiC}0SyJ1oez@hsWC)Ts|8%Rin0~gSfQa` zQvb+ymA*D5pRi-Q;cvOpdB8acX#Elthx+gq#ijdjxrG@0a$7M{&^-oWJlR`0i&SQp zWOFWCc~o`w!oxA47~UNxnMZWccEFcm=OmrT04yK^E&RaUkv1+o?Z3Bu;Pvhi7Mzpa zK1Ww5*@?iSZV;WzjN?7bd55VV917`tgkCTGdkUq|hUbxC4c}&ZhjmGXIM?)-$Xd=X zmt8;XTX{)bbd#3sYALzOYwrDcZn~a5UL92o+usV6BZR_F;q z>pZcZV~u^cWF&{Cd^(1O>zJm;Y~CrpbeBNJHn}zGj+Y))G6AhHRuJoL7ID7C>mU&) zuH_I(RBgPso397BLsF+jK5jwXn>jHkWQmU!mRMa)UzZeiCw539JmntiHB=*41 z;Au~((mJHC6r8l?Ey{N0zn_fidWYjBz(D%Lj*=25rTV2p<2hzwT8yZ{g$^1z1o1A< zCLh`K_#{L5q|S5f^b&*4K^BT+P&_Q_*tzE*4Q97YK?lip?U-G#|62BX<3YLcQcHsc zM@eG}b(tzkUO+W-LMsLyFzgvfEVq32X@;nsjZw zNXAA>1HpaFO+*Wh!@A>+Y(Rw|5v2`kz|(D^&5fi}tvM|WOk(A>)!}Kslr#*L`A6YP z-y>$yIp8^8I3)B=pmU%$Q4@$Bqr?{Ey8J0f06?^FuxtG8e=V|m=PP~X<@fQA+#m;c zp6e-J`;%V*<%2@WmocQ8*R+G>rV{|KK(yMa{v@;CYph$(@!AfGu~OO4=~Psn9p;f| zemURaetg_YF`tek#!2ON+T_wdYPReEwnO`daGF%I0qA>(&P}BD&v><+2cQGhI4){a zAxY1Th2{2=-GeoaKN(?F#YP}@qO(Y)ao)jO!~u5`3ZT*QOvZJ(kPswFKZ)CL*tHLo zE5cC<=8eYeiUq8uFRShV+}XaVkQvDtW^P|Bd^gnMivpB&fY<`#41DmTkZpWegr|W6!XefE1?2GM^DZg=^c)qq6HFWt%=jQ1Lr&8njWheO;DS3w!&Pz^->1nfaOu_S_SbLIkMk4LLJ;vN=2;+{t>_%bLSYcg zET5TM$O3)sjm?)s;DaP63&?iNV_iZPFC{^_$1BVLl5SxMvHowD@rLZ%q4R@|8cXj1 zlOOW(p#Kcz=A*X2HF9M&2p##eb8Iz;4EKn;Je#M4;is6P+$xiW6r6saiRk37SUFNv zO}O)82P4&EKz9j4)NN6AIT{yRI=sFDGWJC}xi%N&tsM^`9nELu5nBh~&+aXWRwS9H z(!|VNFoQnl>(*$T=gYS_jTnC;K=D#hpGfhn#hTpXHwo8R57%ZXR@>&dI-4ic&J?; z;U}V=BR3%w%@9O9)y!M9g+L@a^el=I9bXZZ6tVjax3E-wgMuJ{lHAy~ZQHhO+dH;r z$F^;;ybWlVmhj({1~@Jo z81`?P_|`ZhY-s=Z^!}td*=v(8XNcdGP{K#wXjID)*|t-Dv+pd*1bargkZvm74_FI~ zda6T~M_z=R9I&!eOVG)bhV*SsgF=dJb%jCXy|!YG%GkQRIkREW)V$!G@3rY;|OzxSS&;kHc z7q$7E5wd3j*gI4@6_nU1ItyHY&$rB;%&~!u)+>R+C8*fdyMi`)`cmwRBBxpM?1qcQFe9!)c@8;4?BVafsW_M$ejKn(%=2C5-YKU;% zrq)3$5Dh~mqLQf?6A3LmAUHY3pynXJL=@X~_?QXJDuSP@-QR)X1YumBp0>WQHl5Xa z9r=PWbd6+~`rmLcB~EvCZrpb)w%Vt4@@>2wFU_lQ(8(A2N^XTOc6h=pGFjNwk9lS< z=`7VJYu~>y1xvc7*TgjZAQrRLT`Hdl;M(*MSCwv{yQ(7HA2|{h;sz#4t|CX5Zd5K; znL2GlB2geM!aDOOtYCEw1w)0sx?kaml}?e#TywSXTmmwlN7N8b*L`i>q9sz5%Ww!) z%vZNUg?Olgp9FD3S)UZqQhF5Z!L^%f!}~7tzvvnTeydN7?M#O}7S!0SsA4>;h?)Dh??s(3BD?(St)g$1f53n| z7+_g(Hg~75zXYS0oB=i&G1Lq*B@y1;PBzv(M)as|C<{pt(^tgsC4l!&;)1F$}#q+ah{yl!B=qAwA^83h>!Pm4I`s&w+mBKKqg`M)kT5XcR^h-~nQ(T~ z$tKwcDMMXYnWf^|A!(d^6Nd zkqnj`5+c};+_6`q`z0Mw-kHVLt(d!XBSY7d<-3i#ee6tC}pFqwR~fpSOQ`i~~u-&9mPmXEBQhOl!-N zHSV=fr#5mz;BI$=VEiVpj(;{LJKlf7fhGL zk#xY>pl&2#p|YFG*_#0%i=q5kZc(R0ONK(6jC0#JL{d|ka?~t~?cgoM1SUy@1hK(} zl^v9i+EC)bDb#^H^DO^oc7jHIAJSuC>w0-~Wc&sS^SP(?3 zso8wn3L74yVEY$G3n$OOOqe{X4iG0GU<~G_*l1`WC7|MYsNu;d;;;ln-Q>UkAOGtU zRI`U4G5B%tH|46vCH9|Y0UJe~kW5K5kYSnP1J>)FiBIWYCmbV~|Eo^7!Pg2Rvw z8FbEPA2#ux99ckD`kCM~u+{EUFB;l2!O8wl1=s*vayF$kxx8OVOD+3xHW|-J;yrW! z81P470}-!J{;)J%b7@87){6nDh&0eG(n%q)b6J_r>3JqZQXwKcITyG%53hJ2POT@=s;&syLuz%;smLh$@IxLrpxC&JB_lfe?FHz!c0%RazDgE zZ|Cz!LmxLXGUUO*+Y+nSRqPPNBa>O#TYe-jBh-Lr2Ua@{%E>mP-1HlWsbtJjZnko@ zgNz}(76!30pIP(WZY$%jdj*i!qzB-B=1VKY&|Ah8`^u|>sDpI*28I*GP87m?9{yyz{il*>_U|cr6|GStjiZX5C1H-7l-mVQ?Qy6waLSuVu!-Gm9u9C zT6oEXDlRE3EaF*SIFpf7mQKHmdW~d2mn6VuC>}N5yRdf@yB9`->CFU;TI5J0*eaE$ zDdGAkt@H?2r3q`3y785GzNL7DP%{DC*zMEorx1BBQri(mQlif+kthPt_2Sq?c?;yB z8O+Q<1q=Gx`lD2W^-IL>jQ#6yqk@^V|F*^lQwHE)mF5~rS4{|Zr!%b*gB2KV8Gv40rqBHOP>$mJo#qbMjM{r{HU|mCdH~Ic>B8 zyQ(ahNLIQN@J31GRs*#anL#RCL~OD9-=pM6;IBQ`6>`qr0su##P@(HCL-N55cHPV&>AS?Ls8mVC)>NjJ1Q4mUxe8C;@hnQEcGG;^o^P}{| zhG(1)`~^jR3114mNAw!Wsix|VJQB6!&w+buF-Kge*NS@ZnYX@?+loGD`nN*FrTvA2 z_XOa8V;Tz3N?7r=qI_+rySsVFhRKs-w-Q;BNH8T41t9>~Hja@-R^bWH#Zf0Eezm0T z!2sUaD45_D)Db!1fL$2jLrZJV%N;yHyB;^4-co9ZZgJ0`C^#)E--c`1 zEzCNDa4%c~ehpJ$cP=`1>Q#mja)EPxrM>tnTdqtWqZ0^5{w~ry+6%wzly}dy4LVKE z8}e*>3Gw#z4{TT}TN%_T4wl=D^At!gPRVf|2@Jz+b&8;M2oFL%cAarboKY8m$T4bV zR|h#ab`441dz_sM&jI;D0uTBqQI5lOM8UKnj85XcUo0y<7??^RevxGvi2@!mk>cNT ztQg4cjXx^uLgExP8nN9S(Vwo^SZjj_7n4`&qTpVgpSP+n~`+{LF~I#m9!Ite0tbuA!PWHx5pSkH02HxMNXS zaBWK!U4^X(fnsF|k`S3$`k=rx*d?PMrR0VX02BvLa-`jC{k&{fv;7$G2bjN(4LhUS z{RaS*1T*FSQ_Acc0dpXM3HU#!wtA4&Lh~>u;TIR|w9*d9X|_R%5q2P5WrFaxl8XJm zz=%JieIj^Kw3Pnm0ub=gQyhYAl$>yz#u2%e`Tk4%D}v`*d<6D72a+&SC*TYX!~K?x zI$BkculFfXq4PkIqbd1nw1Q=U*kKmluRPY16lg^L19ZB_fU8`JoA5B#N95N98#Hbf zU#i}evH?B#u?Uj1)7BAY5{#KS3aVz1f|HS_Is21aV&kh%7QFzyOH)Fl z(55TSy4W#x!O&6&w8h(g9^E6=KOfwT+!YONY9U$f8^)lK$nwu4K!BS8KsFJYxYK>W zBR&`#$TyN^jT~j1^!<~upx6B|G*NR_7l`G_g=fWhv)zcgW~5%LOdsLFEOP_vCAM$w zXG*tXC=^s2seZn_K(Blkj008AmNlTEK9|za<=?B(4{|pwFP)gfuY_}h%b}|*#0&l6 zmJKS|8o*J8B$-$U-Bl-^6^K#_sBL7Md&C|Diq6gr2X$o%_z~jRo6>I?opHo!uw~P0(i_<;j^nALQkM{At98g0pF`XFG>=^K5J=%aqm=S z@=WdvZ0wWyCP_)>&430^`R$LjlM%XJOe;o+=ad_(FYX17Z(1U9 zSKq|HVeNtI0td}wU7_IMN;*==H0e1HNWOhBXhE5SFJIoL$~W^&-YzLj9A(y$_}dl% z3%`>jM!$&F_Q($8dd$f$0cR3OK!2ECVl3t%HNEYc<|wdBOjoJ{mq+l*@I4SKdR-`J}Z_M$_x;zx4Jeny6hg(0d zZpS$(yAv9op#QS&WA@rR(ytR?w?;2ZzdB1rfnFl3HJHItL!NsVcWJi;ble??)!T)1kh}!# z6k=s1SrV`ayTCwCGCy>KtJJPyp=CBmAlJLdH^a1e4FKl|RPG-(dxr=qf|4-`XOeZe zatB$3WD{yTayq-^ZyrGe6^d!-#c_Ce+d9M`Mt? z5g!k4rf~1evwN+o9-M-Pg@eg0TIg@k#~9b3wjkaqPCkwth|R%Nd3823>j+NhE}|mO z)l1#pHnkEvz+SQwoz(B)^A?nITvxi8Aw^Azg3NL3_EdWGXzB&-kqbvluht`Qw`x~1 z=#$p~{kaU4VOXgMsT3aH*&KsSCMJSAuJcJQkB{yf9Iva3KFK|%-$G)ah(W#m!I*QA zg#LJyCOiV2v`x{goON~QMEuGe+dW?&4k9& zg+Lra61%k=^$!dJZwhAG{imx|?JA)(JeW3jwc-CukOt$uPAB@aou8#?(A3}%z@o53 zS(Qi)MvtnbbD(<=Y}Y?=iSF8Qna=Yv!=^dnBx&OIFRoCZqo$kYZ8iWmMAM;06gz0! zK1fDJ<5;-=uy}wW8M;sE7V4%EUlf1Doa26j_U2;E@Nj}?Aqm& zwtQY(Ia4Hc1^%K=Vv`ekpW;mW#EN2%A&f{;l>LBbm=wj6L}M8v>|0p>JFyd8RWVu} z@N`y%&l!fU#iA9lI*UlVxsW(!^bw5m@-ePKqdPcad1p5hEMZsvU;Ot=JTUgVAVMeC zsWIE)QPzh9E&)h4XdE$R*p#iy!CmV#kU8d?2`fTcnspzxWkdKP-lg@dg3|o9p5$9g zxZki$>n#A$j5E0<>Xg@67=c=0nS*%c$Ctm{J;V~`?np1}9cEy+rZZ0aw}aXS!Io2Y z#Z^hgx8OQFZEWv+9#sU8PF5PnxH$K^!`ce~ehRI+Jwk+X`gtV*RE^K|5ooVHX!0&J z%5uIZt2p2)SvhmmVg$TAKV4&8lRXwpMB!#GG=)qhc6S525&XfT#d|a$9Co zMoYh~cZZII`daXBx%KaLje@?ppft=0R4aPg9yO%f6xkn-I3?Y^8IpcwF^A%>sZB)Y zdf!R-5;%Ze*?Q#!1|GBfyZpN@{JwYQfufZ}kQJT%$cogah0JBVe&%bxpnPagZW|IR zj%!L=TV5A^bKIK+38%@1SqrSASfV|TvM~XP#|(-Nz|NhUSw&1)A22tbw8qrd?idJ1 zFFgAx{gZtg4}>-wb6D8G>a3`DtOg1!hgf`#pLkfHCcL$EFHCZuZwdRMN>a_TXl%&W zb!3%TM5tEbkn|}h&vojrv-8rOTcBz(zdSY*HVqhRs;iOxdj5%EZlJkxOaY?>c`Gz!C$$iM07M zz5BgqG)0P2_~(4r)Tvc_mh`+>T}wu!)}o-UD(lPSH~Wb2!z%m*dv(D5=Qw}4y7eBM z=&BM)x(8?f{l$S%31-IsS9yb?BL9v>k>C2Ca@E8OD@U={@O4c@j>xG{#%xXStEG6? zU4bF!8_Vd};l%}VwH=T<7rj6X*q6`FTY#<^Nk(Hv#Q$OramhnNw{@(y-GzWO_S-A? zg)R%#d^&ax#1rFDIHAknYIZN>99r35?2sJ~)Hgl9t)D+d9!yBjVgdFFtmV~Z+pyX* z*XeM#hN2xc<-B}2ZUARki~>aBXUpkv;LM8>FbLOAO;h!zjh5Tn-RRe4Npcyx6Vq@r zlm)X5WxIi7<-qGv2hjGQ)BF|IWiwnWa6DYrqAv3AXO|4jb00Om*rZgD%7ND@SeM<% zq1=`IK=>YLssc+)Z)=`-{+kbca>YjVH$PU8U4Bc3FiHBehm%#m%0)$Zfq8NZfnd*a z0vsCbxSnCxU$1Vm!Mj{tC8C(~Ro(R9Wx*_W1D%*)kSxREv^JXiU_=SP7DFVzz7xSY z2X2tEh^gr0T837*=EE#rL=bn&T4g=1KG1@YMt@WHw;i6o4CaorwdS8CWi5^-V1)7G zx@yXGo$OB8`29$)1?X1V<-yq0aPswYr%+PuXugv9+p6=z(-$E;*4x*Nx`%7ppq?$R zDr!*}>!@E9kghY*?I)4{(sGA~BYX7%XO-tQ$^HCy@-oKcJ{( zH~-Pfi{e0;G+}%PLz!nbgm!WrJS5Epnj0$JhY+gMfRLNt{>1H%&OwOrlETr`Hc1UZ zzbmbfL}SVPm;t^ek~Xi^0?It1J?wh$CrGj2V}IoLD`-FdBMm@m&M;BxZ@NGfYV}ps z%BU>lO1Wb~jnuLcq-EcuJBD+W6`!U`9nH^Sw@PIVyK3W^v8n4UYUw*wi@YK)paC%X zUO36rI9wupGY+s5VKmSUqqxXj&~D#Efp*~nD>YdW9K-91o^6?HI?ks&5iAADo%of? zkF$U>#4onRg0gZtDiux_3nban^jRKq!mdl0`P_><53NMp(8rLS`#-;~lz&2&7%SHj z>yL_)*r2HPiYF5ZggK|cwL8$KgI9&VvVlu6)|M`yG}@v z%i;pS>{ncL?uGaS69SzaVMzWkdvqO$F7%hdF0MET_Vb9;Wn+(p%`vX}Mj?+0-8Z@0 zPdPVUMOf{Yv0a%iwY&OC104Iuuu#FOGk2R9{y5qNT;-k{1uv8ZaDk^~!V`ZdC(>r& zWVF!=Ctz-yS;hFaNu3Y$$;`+WDf`I!>G{FvmmZ>3eE-Da+Y{7$GGlmY8U<-9uw%SF zfS|{#OG_)K*h7=FfakhoB;qS|pz?eK-*A{)))>+Nk^L=MU>%9hW&I@FbDDJmj*q&U zAIH|&{j4^Xsx3CjGxxm^*O0(gqp;T`XKmC$m^nKVh5$#0eB)6^Z(&zE+Bng{t&=Y4 zDqQ}+mNRrQ(JOeRHJkX;pdemCrdpXC<{H_1G8TcyKQ-A7E(NADD^6!~|3biwI4@P5 z{DP#hus0lqfNUd+6z9D$L7(@cC$>u{G~YRByG}~S+pUC(vb#)V2J4oMoI1~F8o)4v zz#W=hl(J#SLdQbD0ZtS?#9Arfi?ajx4sUjBCZ~-Unmj?YR2BqhchwUVE;Z_?dGzE2 z-d&Z^DdcELL{0#=`{%q8%Oq{&|RpS*shr(V=szR<7DCxg!@e+4D$Hhy1dI{|D7VR{P`c^c^P`AZ1Yfsp$kPC)o~D_W7hWqQCp(XS}c7fbQh4n z#L>-~&BX$fQbeR&HA^K2i#|6?ZK%{pn&Hss8kp1{9%w`-+@nNqsa^iwQh{=3Fd~G( zNEP6uZ$Paf2bjGCa)S3xu*B|tu;`A(BHfaw|Ac`5tqj!tf6BnIU}nSrRR;V&4Yb9k z4};Anv+D%i&S-7ujTYDYzjkMpfI2> zK<`ME^Jb@6aw%L1&{s${6pCCNv)YvgOF-jEuEZ!`*Dm zOLHL^i;6>gl=$+)4CMCKU-OH;EYW!7Sb7)Huw;oLaHfb#I}vN2qrL|oYZNhj?lcSU+*IE z(i6PyIUABHK^XL&bQqyfHxBC0%}eAkj z4AETlUn%5k6)L@m)Vl7Y?FoUn)(<&!zh%0iEdfX9r_^7Ro%+;u0ZuRyglsCqZ2#9; z24GNK=4=2}l^~eRTe35uZL1q)9~C)W6YgP2Vb_ZR+IPe~yvOxJ$s;hxO&zmoe@*o` zhwO#tRYKjqDw)+7$rK?)!U`h2l=-i@NMev!@ z*?d)Ze+W)0!7yxuvd8b}Kc>11t_?&vTuC4_1lr&|Mg}u1TX`62XF9QM3~5ATI9KrM zm@!*S<)O0&(UNc@g~?zpe#ESulS3_pg!|T4Vr4#j+(^5X8C^Tgni2oOU;@F+#s9!y zFw$hGGEgM^j9*TG6M?s}*thyeyYo5Q9FU#JK=SYQdR3zzz*Mq&OtR_{M|akXifg0A z#_+XT7vPg$t>coguam-UR zm&#vY`e2X{uezZOvEp&cjBk*i88x8j6pjT08D|S4u*7m|bK81u)Sam`rq=aAsZe-K z%}4Nb>*1C5aJ*b91>G%+&wOm#mz!c>Db6l`tuvMUc&;TmZZ>r4E)(@q+0D|0c~U$9 zyTF2{1p4O8(pE>3`KUOmZuOm?_k_@{pg_N#Mf>ULu1&#Z(>2RooM3+2G08rlJ{dnt z4bomFfAEFVZ$xtL!792ZHkvA#gCR!!$R^d(8Xd+NmB+ZM0I6H zUSHhQvrm-ejZ6&I)#nx`Lv}p|9qVKV2MLU~#W4yD&pb%=zK7NTS%&MbD(-&DBiwH6 zeV^!Xj?L!`q?uiIC9I?o{P5=?udjZoRz59fQry?Cra8YVIs(au%oTn3~Pwb8}hZ8gp}(U+92Y|A!@^4M2LvahM==uJR zf1rF?xdz8NG3f#FyCLydfxq1?wx|dIu0z&*`9%Lw6e37_srDktr=v8}4O8#MyMKB3 zD6~!|Zft6IM3+!8FScQnuWL(wWw8r280l#sLY0jM6N%OkX^a}?2Y-a6({rPZvVNfm+(9DIpD%rz!o>W6?@}%$G3h|ML&TSGlhqj9L&`$_<-#&JoPzgfXQI;g> z)`C~8#3^#*wxE`B#DI^KyY6zn1hDWyPpKevHUoTQ1>gK?r=5;YJVCeF{=lLJcTfl9 zt?ULnW$k3<;kTqYyipQ!mFWB7m$93VgbBO=*{!;J-_Mgj;>~?%^SM*;pOfS0fUBLrtlfSIG zvVdM@BWAvs8ZmS!R>iu&`-4hN{(XDnF20(@4dIegWjoC8wWS8s!rtVH0FGI6QYFu1 z>Zyl)5z6mtcW^;ydsHruREmzd``!dLuff=PPnCQN;?;xCY7;@*Jp~#3^_)t5GMh9H zJz*cL&iSIXxvswQa}Uwm_5R$+694yNykZyD`KgZs^`cHC68bPFfO18&yO* z`i2{Hu*_FDd11#uwDlwhTe5`F!MQ0wJ$R)*?77+*rUNQ#CiQ}P_rs)`2wwYR(XvP_ zr%cfWY7zzGQFxsC@}*Z}C&AIPD&w&y_)-(}oYJ-xzF?|l9HkH2M&Q;h#!X>h0-vxS zR*5#wJYn71dR;heD5J=bIZ?05szlsOcP!<*nI^`f0Ey6wf;EYTMrf7MNUXEKXZ{-9o6+f^rcStncaYa*bd~c z;8?h0TTy@WlC2?Pdj}<(hEB=@9i?8s`pwyqvlv6y%K211aPuEuk)y)}8VYGCV@^?L z7YDe@Q~vy1hIT|Lvkv_kl#9x6%Q5uHAcDgs#DL8cS!l;&BmG(@gZ~4oa!~#oRuTSV z@h+OWAYMjBzOv6T3^&S{Nq6pK&Ph`|4O6KukT&nDTK7I68ndG z3o3kA?P6q8P-r5~cq!`7`<^G`kv4l+_CvLa(;wRgo>$pfkeXfj;Pph_oVb+zn<>|z zIPVZrwNpN{=}vq(c{8ZX6S972?_S+|2)VXemZgx=awe1CbYMStg+rpZa9<90FEV3b z;_4Ll@iUZE_+KFG7kZo}%Z~``W{{^f8vuvDkNihqFOzgW#55~vYrem)k~piB1am2q z=Fj@osKgdRMbSqFf}^wnz;ozU-x{iCu4v)$*a$sKU1l`{9m30ukk2D(hi!0M<7-Gr zzoUp|iY&lDt8<9=r0f;Tn1qKWtjs$%=(+yap(9%5_w!pRV0B*~*Q{jjp64@tg-tVW z%AkIj9PgKDShp^h;o^!%)J`2Tc>Is`yr)IOicEnzgovH4O@Qc%TCBwYkhXv}DFyHw z(TJV1_7SSgLWk)Mo|T}IheY>R->^9ZTN>Yz8Uq^mbWBn@&_mEUVl zaqP7%Y6XpYfLzKZA^A;t;3$_6s!Z{>yT^TJaJ`JO7x*A-g+!==*H-ad*TWre7banx zUF+VZ+pk{V8g?_m!dpWv(b%Az%_RB_bEG=8u=F7iIj|9egd&|0m8fQR3hSib!DkTx zX52P}=`w=4w*x4Yb-HvA1x0x_!sl10cwj;6ZYa7k0~?`3sjKST@S5K)L?$0=z|8VZ zSj6yfXWsAd2c1`oArApBGS?yoIc}G-Xv!XNkVRz@;ULTsRP_9WT_`2c^8O&$TGZ(3 zQ)FuOp&Q1bxwdhoURo_E!uJ%Od)}iX0w&D5*X;X z;Cksv^@2I}D(({d#qIOg#vVI=o+R_W58$b<2d|h<`o$MIO7VxBOZYkwtLqJp#qJrG!$1E?)vb=M_ zK}VIUiC9H3U0Dt!T~u~S-wotf&x6@OLbYBRE^5_WFR9lM{n3GfYgqUAUX8s>bd$(# z#l3ioh)IGdGM^NeSJQ_qQJwxMVhV;=1nS5=;mfzXxP6OBRdu(ui;PqYy-z)hV%tzo zN9QRspDdP*{i%VLS=E>=7ubJ&|HI+7eq4b8%3TgS0wbI~) zQQ$}VdT?JJAa$gPTc+$dv!|D(fkz4;3VXsB;taqE6x!SDp2r41Z#yc6Kw(U{P*1`Qt40cn+ zj-^XQ_+A~vh-qZ3fQOHk0QHD~X)N2ksP`4acrO@R7zr1-gq?F3q||W$dXkEulPjGr zF=NiHfYnR8@&?g;-^7ObCV?*X^u@?oH*w$k$Jps4!rl=Zz4r)8HPCC_h7YIbNQNmB za-1T6dv%yDRYbaMBKR2irTGX;hxo)OOwM%J=5ZB`Pw2;;TuG4Mvubg7?QlTR$>y}n zI;>1w7C3`Au7~X9gx*imu+CF8pFe97)93E*WaGquGNMt5ydK!^8eJZfx{+RlX8{*@ z@WrL7qE(Nb@Vah&^#&rfx`EKURLF{5nlZy<@6>1mV%o~vTy9|VC2A)NVrYtm#sj4_ zwB2{p6yekK_a9Od@vDR#dsNaZI+tuGOUh+_@XO@*#HTVW0N*JBFyU*tx4^28h zMTMX=F)-=OnIqf7D?;wlJZP|fRArpYXTx<~bs;s&9NTV?uW?_Q5xv4as%ks-;5Nba z;Mt(VUO=loL7`LF?P9F79(Q~)*MmD{j6`oulqv%D-@VL$6Ni3#ivJoAZ2xmWfJg~u zaQIL7MT9U~3Tt(Q6nTY{oW@s5 z31yct+jIv+mH0*Z_vf0q3@lxQa};JB&oH(VG(a4LsH56$3>u*}q^RS`@x0uWe4NE7 zWg$&Y_oHQa4Y0&|==#WaTp@j9=+>5<7T)h__(1@I*!N}rHmKb9u5Jekyteug^^yiv z0;&whDxwO1fdqqsMVlNVW>6XDLxSvD0N`CE?~7gf4c}JCQ?6s_{>(=mh7HWXwAlr8 zFs#da*{$8Qd!Ii!UHltx@Nlk`o&S#joPQt$fd-k)W(JyproBp`2(dlBN4k0B6Gp}) z)K+*vnmj)J^e^-V9+O}hkZivOs_ztS&bjL1m^!%{{P=94=}k5bZdmk28ElM(6Eb`j zoL$DT4GZLQ$mrG_)hcw=cNsA^pjJ)j+xr*Ol)0#tv2>>jxb!|=9dk!X!Z{A9IsVR8 zm}!1wmd4Xp*t-Nu&GJabiE zexZF%euJiqgI1yNY}D9^^8IKF2=x5Vrr8&Yb_K zcNo6$DpKWeIlNVZ#pGk!{s{}luaYFIc>Wl2L|7rlQjYQ8jd4M?9@rOWCIpfJ_VKLN z$JPo-oBt}ab`|g*?L?TJc=x4C+~zJzbx=HuPmQo*1lXRg+gX=*9x9Vhonm2oA+YQP zF|kH%rleA_o{TiKXG&dU%N$n`4fxy4HHR8CYN-vedy_B-*Ebr-H=nLlnddBtDy~vc zMKXc*X80-3-?Pfguy953#n$^34PVS)r{ybK<_KV)-MT)(hu|81{sR?hLeW72vS&Mew3h&?&?VfheRi{Cx?R&hnv*% zFC+CIt)HXu-|dCqAKR5Bn4$fj_5w%>WJ&}}9ji(5{O6r`x#>1|KnG?SQ&HoZ&(YSz z7i^r@bXu4^BqiEXEI_mjpwKQSgjj=?*^)r*Y4+%^BvZzqL$)D!PmW+zP*Oms7R0ty z`+lf;L`mTG=Gqu^xe41#eNKCE;B(VX>IBQjdRS!;H-BZoEp^YS&msQB&+C_?k{`HP z#V9i%cB%A`(Wds@v;x8+pdil;+iXnTLJe!;?v-EM4bw^#>_bGzP~egEo2aYBR_Wpd zIz&gqz?8wcCg8V+%+d*Y>psdhY5sS8OIbwvglO zNKrEIAJ8V*n$6J#QS@#h=mf0AmC&vOf8qu)Wo7(&T@H>OIe=1G5GGZ|(2*&pGBySIow{e0 zW#_rhcLbAB-@kHfyb6nx1Hut#6EVi^rd}0N4m5GTnj~IId^)-#@=1)7oC9hi>!88N zRR@9|bzl``X8rk$w0EP^cYF9ejNh>hp(F?h)8{kc+_w;A(DTmz0>s;Ri)j0Qvl4u3+N34+0`HYJXFo0zAG_ce;X;p zreA>#BpGcy1G(B)0?9%2&njq~aEi(T&D$(&P!E5l?yZHtyj^!xEB0Qnkejt+;7(`F zjt1+r6i_hc%!F9@?rWi8lkk24jr|vky11tI+#;l^WKSb(tg}Og&#!{OIXATp4!_zmgc2YJ2)PIivsvXI6Ix;fVC!SRc1K zQbo_>53wQTKmqiHV-LVbKG2k(JWJZXKT&;}g_(RF8!kEa$-R8BlLnK$O1yJkwrtQI zaUAC4J7Yn!s&~!D-43~u$=ur-1bk`7&K~A(6?zx^X1h@nIFx5-mZ9dey1m&^t2!@qU+18IS7@~j(TsSi)Ak#~Zy;1^gr4pDKQ5&Jt-eb})qI{q#FYZxI zYjIC^$K-?t?y2M#u5ejJ2#MiE{0dE3WZ43C_JJ)g6|1P+M5elbH&rWKf@%PVD>M2R zF6o76_S-e4U%bNNpEm@W(^&ST=87We-FvBr*&V(K@JHS_%1M zQmuihaT>0LOVYk;XZ1b&x!FV^N%{v~t1sxuFzpl^4LeoOA2nyEB48KWvTD*NTA8*y za(N43=kNGYJx}q1W9dJL+{XWTy>_lTkGxFdHC$3YpNs=Umyk2GnGm6!X8{kQ z{&N;JR?zt;qcSvn*!(Z5-YHC!sK)|r)brZQHhO+qP}nwr$(EJ@en0JNLEf zrM@I9*}Iagg@K1NUtmiaC=KsFZ(apmXea zPn6}8PI20?fKbD(%pS_opD}}3Sr-_BP#Oa}PqABksrWwECn_1zNDgqixs`PMdXQCq zG19MASY8cFC0`~E&kDr93wun<21jLw=gO1=i~9ayb-cRLw_-5&eFlk;&IX`1u>^>$ ziVQ#?vFJL0U4vUn&#u+?k{yAMlib=L@bk%{6sy>+$?qo^)Gwid0p1t<)GExfeDryU zY4qz76cNk+Dt2Lr0(WsBgYX#S6L~d@ij{|?r&<$Gy7QKwYRp!5>sJZ1|9af18AFyl zC}p@m>0d2D$5?TbQ2uV~Aiw(j;c?!w-QGW5PAD-a$uKdOr;CQ;n&Bmh+cz-LSfDle zmIXj6u|J14DYq+m>&UXUxYqzY+^74?p2XF)s0T|~LqkEoNa{AN(g&hS$Q)USW z`|_Enr0_adi!*OAh!PwGVLd=?4}bO5rD0z1Jh1>ZLhs#H5G*%9-+80zbLniznOy0*;Y=)TjTLKu&prCmTdAM|slXWWM~{|s{v=>x zQ+1(VdtN2p(%RhwcR0(`guCKkvCV@sxfR-bXht2l~Z1nM6KGY@b{td0vOAq0|>RXDK}e;29|N5cm|mo=~>)Mi8iw*U-Jw}p+B&Y)&I z3CEH@VWawAh-->5N``MJ4=@I2gtDsMb~g7OJt%2;%$)?&mL{OG`L<@J+c`3uXi2M) zCqskpi%Ji0-6`Ijku#{GTwln=0NXZ_E=d{UZYRk}+r&UBw?v36OV%ldNGjfLdjy-) zIUMbOeE;RzFt|(!c9ulGfl^sAq-nwFDDTCUik#|Dc?%~(;_7v5K%D22O-*p+>;$5A zPH53Ul`s*5P7$OO2BWh_zzG6CN=mv zhGWyGra7@z={1D~re*zYm|U$~=5RQ^ggJFEVy|2xiFvTwgIGY5#c{oD7oO2?=TrB6 z^Vi^74-5(p1!VxU*WhH69Bmd(iOg+6#5OQkWDNmYN3#@O>xtTT{3W2n37nEnMm#Vn z!3~x4vZ9i`Xb?a+42PdFNBbr_#dt`fsRXXqBaJFO$UyB8UQ1GN$~p&g`av6EQ>VPAQ;Is zI1pgQ&Yt@ktQzOYQzojj$Ngd30!s)t?&4FY6ivlp5gIsxUXY3!)y9}bO`t`&@gbw( zF<*0^SlKI2vYLj^7GLvT4_P84uP%rl;%8hbzJNmL|mjM5O<@cXzw@ zT32?Tfl1_TbT2cy(jpXoW1FJ_8q+5Z+DTebLAj@osXRrh104Y#?{lw-B{|ni0gY~K z&EVrAwv2M^22^p=0US+mC4pkt(^VAAW9x8&FJb)7K4~8zg;bRGE*m$ArGP6HMo(;+ zeC41Ck;YPULRFa;1MVT4L!RU87uk3=faijqLsy<4Vs#W6?AWHaTqr5oexU6WEGZ2&M&!uX z&M(Qe-qFqN^N68HO4;#I%*e6(Wx5ohxIX`!Q3Oq?tXp=SFo)(Cdn734o=KCKR=_Nl zgk{R;k0n%n$~z1+2YakALbSz(t!l)i0gIOO<&t5BwUF-QWhm?%y1R45Yb{Db9E^GI z-xmrd(n_LCu?Tsu%3L^4!^v`jt$=b=T+653yCBisYi8ZRn-o9?e?(Vp6xXld*@*}O zEcR)|bD=_3H)TXvIT7oF<*p@3)?1U^m;<44x@ zbaYb>c=np*yf^5D-B4D$eKTTq)t*jOS2Q3e8g0qxIEg5c8YaPZv8+6yjsX)3u+hwL z@=@M7d&qZNa50#o`xaUzBBSeua^$OA z)2y0~N!1taIY7r1tvvdun=!tFbpp3vw?I>hD5!X@MGAhUi6RL+N0Y$QE5Ane&i@?0 zgi{*p`_ElB^MCKU|J58F;Y-i{@4GJe4^k9gb^z|b#Ak-R0=D5pK(UjE%%wvChl7cH zC7r*%X2p0=b&B@S?Nj(B`TztTvxRdspp6_z@Q#|#+|Utk$yvbQ-{UEBxi zoO!SJhFLrzZun0kLT?uuq=}eU?YocQsOr>_!V$k1f=6g8lW`X}J?+$uef9X_pTFfT zYdtal$;D`JNfJpZ&4162P#FC@hB{8LMlujuo31i=?-tikSK7<4F$GsW&95(DTAf;o z$dPT=hmrnvO)>zp+uAr4Cw3o#ut&9$AbU$BXo&*qxL6ycs1hBAex!lPh*ECLS&;C| zlfl9#=U1%N8AP~E+qYn0Fvds{R0=@O88$^iZCin*%fdyWyYH-3?GR9+#YYITQ*+`M zK^NVsW(KGD*1it&EeBrE^e#)Ba8EJC7c^f3SxiPBy$X1tTAL%wZYob(a^SpC`Z-?< zTQ(3|C7>I#qIaiys!Mdwt!~+U*qs;ru#9S ze~aBMP25#*FB^+)ycDa7J|=3$I2pxBWZEE?(hvDJ!m5Vnyg-ryePDlWUSV*x+J6E; z8?>@N;i#rGB!mj<85r*a;e5`!Yy`bG%4^MaKEqd2 z_oI0II}?w4dn(;2mDeY`rho~BWQs0n&gM$&P1{T@`pL2_TUG}h8GWu&=PLpRM^0R~ z37`X4RnBCzAV*87)2=42fEBArU8Xxv%2)<_;c1uGUnP0$nmi_5h9Nq!5Eo!)6#iX) zB1nC*Gz}u$%?|D-<`z8Vw*iHmjLXG=4&$U-s1}E8Q0%%GE_WINq0CGxoi>cD;Rtl_ zeg&`^Dlm|uPaayEBj`Lhf0Hj}%&r#|@6TfaI^E#5_=5St8E$0u#8n9Oa2!uxdqyDT zg-kbVUajTl)-fxbY3D`PLS+A#&L!CiDQ#0)`1&ZbSnyx{f3Kk z08r%Vti7)wGaRX0ln#$KKKaQYPs(sU8^g=|mL43cBi{M)@FTkBmcdY0$tvN&#fq`7 z0FI!sY+ZU6HLEeXtLlxcxi;xF=jrs<*zL!5o_$HFe}`i(V*9LWr^9R6o2ovkoX3H6KeX-a~I2=Hoi321`BDxs z1`INNwkm5>gAWY#?N_wR!CC|=KzSU9ci z@ppTajkSL0Xf!f+cVL=riaQ#k9e=W>)XFCh7EMHZQ}`*5kv{|QjBQ7+%AU*&`0H;0 zSSvDnjW-oD-m04i?52MC*IQkfw-OFMo7- zb}o8j;VG`tc}N8?Lvbj^Y|$uRPdD6Lpj~b51<#vaIIgah@EnBuL)&4&+KWp+YBmHz zM9}5{Emf!)X)B*tDp%ejAqy;uy(};x8AK?zIy5O=eI;JP-YQ{$qygPWt)4!f!5X<@ z;fHIr9PKL3##9*P7d2lfuNWlk0R7{#3w4_-T!1C;_UkPCC02^zOV9oPCMHL#2-SDr z5E>Zr6VP3Wj9SAdJGR}VPuS;a0vuOoXGMs+oC05e?z^Q@8yC6`eJfFcVgq?#8VzPt zcDBKb*g=dq2T7qvRlc;i>*DNAhYpj9jRjpPnc0g7sBmLLXAV%%_PV!vRWbu07 zfFcAVKSf&+B*ZwGBzm>ak%v1nx%aE3OYXq7$~GTByJP+T?AhMs`Wj*LAdMHgX7@OM zCVwKD29j#Y_f9W-JNeL)@>+!bASA?J6#z)ncW`r0dCr~CwLgP5wULa8RSnn*7GHh? zQhm6Bm<5ny{qZ1BFq6_Ow=twKiwV{K^R6+d{hXmGflBjema&JwjV}%s#O!cHZFzja z;oqA`SW}1Y+_#s$NTX;Hc$uM06Ah6-;;4pDLbQ?)EJ|_z=iLkh2WKha$m0#F-|$g6 z-}dmld`2e>g1=s}!s+rlP%+J`7&42H)ot|Fj6OVhk|Z0nTDzgSZTM|$^qBC#DnVGS zURsMYlZ=~q9z8g$*i-Edlt$^kOqd;bY$trz_@Ucz)uC&H?v1?@S)$cRhAd% z@^$5A{daqlh&a;l;*Il=b5UNwg0|~cD5mALqX5K4lzk)s-04XPpwXFjm>zq!KpmqP zwG25K(r z)yC?7(Njoj41)$nxbu-6hwWytIau^k@>g0@xpq@9uWS1U0;b<2WeTEVx=2COub@>8 zlN%+&*3&DIk9de2G)>7-7vT;dRmk=mo$^kIERW3~%nFhfUr(boV8A?riUn(EOaj)2 zmi4i>%NxBLA`| z)RTYxV{r?N{q%D8Wfw4k_fG*y()2<)3kk*lx>49ih zig&E|mz3zgd%-*Z_qYxE%9q~%KdmJ)eLG5pGFajSNLZUVZPOuv6$1Q!c-JD zNy|^R>ceOyi&`y?9o_m~Z?1B<>D0~17+mgN0{6c&!iiVwqRH5^>Z1lYuo+Vld83Ck z0cjQg3I`otoe5gh-w@p)?M9AOTG%Tgnc+Q0e7Ib*MW%0UUB}!+*N;nc!Pjl_ww%Xw zlc}8V;jnCFOit?SdjY0C^EmERc^zPPW?MlT|5D}Ij}|HuxcDezMbXDF7VPv8X%qRrnqd-3Io=8oL$>a6v!Ry0(+KH(9^7LjzYy*Yg zn-NTd32I5VOBKJjXbUr9O4CC-8J>Y=MN{Wt4{EF&T`Wki>6)(+ZRCftF1X3x@J%6( zfKcuswuuB{D3F6i6RE6@qi>g7+`lQtxuF@3OSVE**9Ocib8z*Q?S#yztb}j#iW_eX z=$+?jbk6zl7a!@DF5xPrA?*@sCgT?P$2j$Lsa!e<dxC(IlONPW0$HD-L32dv)wP?a}!1Q8dg?_J+s2Tt8YWGT-gX?AZD}|?0Myf z&!yvjI_RXno@U>c{U|OLK};(A@gOC%xzW&sZY%RWY&)rPDq1wmFpx8Bzd5J6c&$=>oQ(Xx~u3l)L7CZ>-agV*b ziz}E@qcINbRcN{D$sb%HRf4k@yQ-;R2}>2}dJq0q(&XQ#K*4z{@)dpNMPz7B-3R+Z zIxWZ50?sdd`gO!I5Sm61@ZZhva1It z#0!hd*SUErPjx_S#^>PomOY{ClP7FMXzE$P7yI1RYgO8(4^wMCg54x;3aljsxM8(g zDqF8FyT-|wg^T&>I|!I^(}i^*IzsM|RBhsT7*^$vl=`Iq=TCfD25~Mq8_m;azUrwB5?TNj9eM3a$#(Ylu&txDn>ospO z#M48p-i0vKmxGSetbDi<0HnApXy7JZE)n)8OE9Pbrjn7(MR^>R0^7(cO zBI+Hh(m%U?{fM|lgPBkaehSxI3qC{}G0mh05nHT>prVJy<4%%eEZOv9%>Ku&WdN3O&j*0;{Z-1&slrTk1{7pA^EjqdHFOXhR%fli zyz$bai3n}rT)VpzR2D=dv_4oaBi4wm=OCnVF(ykXvU=%04~#lbL^whs-pWZEisW!c zvhB4!$My5(h?qJvfuR#6*iY48jy9KriP!N|G+reIaG|mkkjs0e4;r#ltQ@$0OUm)D+%sPb6ET_mXDfuotkd1YmEJzx{FA- zRvtks;Z(Xq%L{3>g7artt94iw30fNkmyon5d#@>A$ zKB!Gr7+YO(d4LL5#9I+b0%O+ya0U46<#0@3yEsY0Kfoap)UOo9 z4kD{x*N`!+Agzc7Y|A)?y|$=j_*hLrRNuQ!&CQJzE-#SW6hN?_!k0%iWnrEyaS(6W zm|)sdPM^<0mS=GiF`y~x%Zq?X_%uiqL%6jNKBU=DnzE8x!bZzw+|nipV(XQwRrePX z^;s?Ej73uQ%{m()TW-dZXq5Fewor>ZY2gMXsS8pb$nD)@Dhlg3&e7a)KT37MnMR*> z-6KLbBD{z$B@gyg^Ek~a>){~u4g?*l7rv1wz2mJ3{o})=V>@!zih0s$^)VA{CxeuL z5qt!|e9j0p(|w}H4JGPb{YRKB&NFt}UMtj;5>L?cj zO$C=G!~iD|#Y9F;ZdIga!;4@%`@o@&b-19L0RpgYa~;BV)Sdo^Oz)5BEJ3s~d}3BI zPxG3+@eQ^(?IWM^@-Z@_&7RHF!EV9QR}N8E?#yfQ#6FF(dc0I3vG?;;sqH)g%E!9= zIe3Goee=LO%gFGfUC6B<)}m4=1edKHUC+#{JD#5~&farktVYr?EQiIK%Sy0xpUzuI zec0{&*Dzq5GA?%2+@9v)1@UNUw*Ae8a-(o+93C0NJMzS%+^HbFDksIniLisEA;Xf;CT9c#1~`H8>E&f zg{+W2X7hy>^-`5B;c!SUcg1`)Qi}XmAeBJ(qEmZ;R;ZX{`JME$Y=)#PP6%K4r%JXW zuR6}ccA%+S;|nlGL6C~Xo~R1yU2%4)S+C|wXJPVz$d)*RTdq57tcshNUZM`a)b{#D zi1eAyku_pg7T07~eo|4KR4NAkk_v9XGc+Tz_8I{&6yEamykE%qL0jpTClCY}P`!P7 z%{OO!HdCq~Q{yX+>L^*@H9U3+X51yS@w@wPB5_YKFKJl;WRO@2AK&D>PibJR(-PVC z7`Q?8r%U)K*9q(}vy>j8bo)Y<6wuHb@@JD{B9jd=`VMV6Ii|#xUQCCH3)koeqrcH5 z0#SLFt{t`=fpwP7^STnN2}ut4?qMfc$~LpAjjt7TNm)N;GnR0r@G#krZ{#D6dkZbw zH$A%+bK(91qS{>wNF8Ab+DaRzO?e(`bXw>B+R=z0aCllA8qO+$I^k;l z+p8*~a3M1#CmP2x+xl{E?Cjai6F7}r_S+2RtDg*1z`|~@;M1{lOX&o9oU0QZsYgMn zi_(5C*LnU^bMQcuLdhfv@t@ts03m)pJ+`<9y6BIq@CrF7{h8hw>$F|PF8cCo$mxCZ zPJ47*6k9F+GmDAWgr5P)WaAnj#j*``zz#qzNIYdo&6eF>T!QN_pZuIx6!mWOKu!d@ z+jqN;jwBdBbGpT<-&Rw>Oq*C-{G)r#EzZY9cIW+WO%?*wXCfOO{UGe&wITsonf-CP z%ZoTuGUcJM;rDiMoPG>L_MK~ znA~|U)bdy&;KuzsUg+qB`^|>`C|EgrEii|#YO2Xz|{|@IxznAwaVBJd#(5__h=b+b!9tc<;`?IY z);vFTS_IpkA@E+kcOSKv09UL+l6FHJ^M&8`C-={N?30LcZ{TsrjXam9b}Hv06)wmx zBy*swTQ4MPhqL^e3$)SLIE1R%QJoG?`h83y*IH{%%36e0UoEkZ8Wf!v9W6yVvl_Qd zXfMb^rm9cB?X^?XF=b#TXyM%E^HCB1BqOg;(Ch$k?|&=JBQ{Q6y7D#Z$wg4z9hzlP zjfIuVWVS0);XK`BdoWwe#|^0*Njt)GtJrWYj)yJpAUck;08H|BoemO8{i?1cE*!ZU zx!)QVg0jU5W>OF|VZ^ReP^9U@I%$~|nNBdPNziNu4#_7m)}ChO8%pBh zY<4aR3T;g)?bs&T>lmj4LDFaN#YGK2;DAPrJiLpp+u2YOzhO5wwn$F4T>Q;sK8m4$ z8dcDC2aChPd0!bdI`hyfRDuKrI=~+BnK^UHX1rmfp7pcIC1se+uun=7M}DMMQ7K5`M+pRNM&Y?tqr3#*L}r*aXClqm<1O+UZUho;o=wI zl{0@Kr$UDTZ~D*f5rm(SA8W82&_@#;6MZOeendrwf?Csnn?Z5Uh&l_MgwU=M81>W8 z5x^$8iQc>6qeQZV^E;3%j+M@ay8(F60sK28aN`cFx`c;fM~O~%y&DzBH^i}6H{`5c z=^2va+vZ1S!x6j4(i#1 zEq1i0K9VCPSQ=RR*vmS%zFuvL1(*E}GMOUsYjrAd-(;BQJY)An&oHkNkJXnD7A3=_ z@1&_rd19LP>f{|4Ghya9imaYTMkCumVhlU33DQa*F&Zv-cFTtuz{ZJtx8#Gjhg-{F z2DbiWF(DH!xv22Sq85Ai))@0Gx=o3n-o}4#jfo!Q*sY$B!*k-Kp34bOv_CEok%>Wg zSZ<8uTM<#7e=&pf(18y$jZJigjBhry@r|usH=NiATyZS~5RY3@Wg`~xB#I>uODASL;vV7{hyn@+23hF*DWHJy%_|xA62XWh z`Q)~gPDNdOP;4tHczt5h_PdU;!#A5=hSw!(F*@7%*_^IXWNM&J->uXh)e`i(Qt&D@yM4?Ad-zKqI;QJA}v{LC&xhTk8n`;_DOihpYs`U?7eDl@G zRKsb=Y?ANhp^(^V8QqEmw&v^0Z2GN}fe8lJQu>U`_Id9=uon4WSmXaLD^T!di2pCF z8>`>x-*jfZWETRKRmmVv!486k-xpL(K#d0m;0yiPIcCS1EG8HxJUHgV%*U zj=K#$uu|1SUZOM%oT2f%*BaHi2U9LtZ|GQ(44BUDEE#X~an)YJq1`v}-4sg%G8``o zL@ypcfCxx(eH6xa9=yYPWy(HsQWYDm%`ZW@GpXU)@3wV>A{CAP>09`;ZFK*JbY3re z;wZWyiG-C=_9cV5LL+T9NDh4JTkfv+#}HhLzF~y$K(Jc|1d#jAc)hfpWU|kjF|)Aa zVHOMjv1)qxP+gQ@EdiUid}jReq5Pr|FbuJ>51l&43DI=!uK-~PfOZ|?W21>%OKK>~ zO6b2WeuvdsTqd_8K2G(I-k{qoj6pg*mL8#p zCcmL+Q|Z)eFe~d46?YQBq+)7J{)SIMta{nlA(h6e<_vs9(PvpW6t*+k%k zc-%Z|d}2q*sy{E5AN?XzPE{)Yi4W|gBbT8P`JKlP%LTT^=b&4-Jqy(dk6B0_q;Tr} z_}E(E547ES@&Vf`*@iPRwZ$3-=gvA6l{CSjU2>S8`o|yI7}a~2hf%}Iw-A@%yYFpU zk(-rgQML>MUd?DN6T(m+0r4O7nEfw$ey1sbNcb`={}(-I9_zVc&TvJM#qnCgD6(eJ z7lYR_Ptd;qQfW}04UB#zH4jZ$-th@Fd!tF~;D1nRJJCqdGu3u=}hB$k7I19!~M= zcQyzLQw~3i&+v1rH4y*U@3)#QrRQ5*t{}1o?NLm^m8sZ(x^+gk*~S7l~J3rThL6obTyV+3$%cP0{f8K03`wU)&fLpD!|)+d8O#6c;M<- z9}Ys#V@H=J@K<3A+#@ zk4PA32sAG{z8_;{`WO|kfDlV(+q5Z0E|pRBA*LdF_-{R`ppgPzGTDX~Vrjw_J&-;> zzwc#1N((ymOk9w31AIBb3L1Z?+df~dko{1owcEfpuT(k_?0M@*^yyoNJmTcWMQm8V zS%Qx)G*kwDgCOOqtSev3{J|${t0mq=3S++og>g(pMH38LC)@5{V=N`z` z6tjY2Vak>Y(Mwa$DHWB575eUcV2CBcZ)d8c^g>aRvy!V-V!UI@$nsYi=B8+?5RI1x z{y&iu@xPJtn|Fa^_%dSuJ95B(fCD~6j~FqV$GU^9a4St`37Ak^M)gQUt=t3f0&?x%3(?FkBS z?xOhx>N5KOf=v0@Mf`;_mnf4FV1NWSpoEGc;9QEcpLj^Lpw@|6M%C`F^C}w!y*YS* zv{~OS?A^>#j1#De&%U0921QjEL|Ius(RWcXWDe-Qs#oll>7!8%q$)9;er(R#3TS4d z?-LSX9MSi5G^|(G6YXpFj{|xTn-Tis3RrCsQbXe`bV}6^k{tAk_UPk_Ac{B8JIyE= zMJ$SvtbMg4s_4J%KHorLrpQe)xrqppF%X1DmQ=v!GUEREL6yJx_!n&oo^TV5(-Vx| z{V$q``SlVIm)yNsOVCJXPQkjB|O*tZTIGIhY_$||DbudMLyz6lbqjjW)q5Pr~J zbLg|9NCduNG%nzs6Da&BmMjI;t;U{YE#@^VcCb2sqZj%PF~y3PR_cw9HUlfugH zGN;TxZavpvVZEfR$smT5tYoSX#(P^#=abW&t}zO)Y$2o+KFcuM6Rpx?fmza_Y5j#^ zB4TTKHK-2nE<<$$VN(#2ppPA8OKP$}$G*29KVcpG)n;qaltaFAnKk+)dg-u16H0u| zK)4HQ>i_^m|8KpY zGK2p;(Epbj(Qn53ub&b#|2Gf;gZMHQnIUNYOZFV#&!u(kjdX4t!wY$6zM861**;1b zS3B4MtbxPz@Yzn29+=8 z{{M9!AZ#$2F>RgJLN4&ifBP&O;eVZo|DOj)|34?v@TH^wIX-`f_B(ok!@IOh0GZGxJb-F!uZ1P(otvI1(=Y^(}~;yE-O2Gw=bS8Ig7a zZyx9qEbM0KNJgP@2^8sBo*WJklp*|76B1#@OanfQeMnksVr8n#DIf{tJ7PYpPq&on#C@0WyA#1aQjV+ozl>X0T8 zn#+9Qq7mDl^pOgHmZi7s-AJ{IUFP1e_w(4 z1uE1e*74tVF=TUL4uA3vv8^81XrgR`GBMWfDQho^=GlO}Of>WMH@tThkw#xsS`M(~d z@TIH$A2YK`D%gS>rHu7AuV@srV8!q>1W_JSyLATp&yh?Td!vMl1@G>j+Yo z1XIdL3Cwq~68YUYHp=tSq9q+3sbBk@@7t2=x+$JL1^@^XB6sKg{WLTaDPG*m-d6Ja zTi^deyYIO!0C)uI)bA@0(=(6X|B*4pMxp)yJZl4jJD7Q+E$Xg-FM-)XI`G4lt&xJO zPSxR0Lo+>?xqvqSJRRkG0l0Wx8(okNQ_m+}#8Od*WcnaWdxojC%5b=0*@v9Mx+M0G z*Cmn5yKCFiidJ;VD;}Y`&Nv5$3a*Z8&pD^!CBv>;zN9%iW=?l^C#a;i&_AxrVLrE&Z&n|c(J zla(~%A25QvsXKXHa9Osjzs$01$5d<+xyncHX@03Kc#JMS)X1Q>QMLlU&Log_F;cgN zT8tdX(u*sqvR2q~+m>FkhVdbXSc+_0kLGy>XrCCxiD3!at*TZqx(6}443PcbHpOh# zEGM2n8Us$wmwecQK|&NBk=N+Xfzc^~v;Zo+sw;RU0iLkNuMEI&FM#JX>U|ApNZwJ9g4fV1F`Tw9oDNgck^BK1mKuoRp?v z&I#U#^fUCx$v${>%i!loS)J6=-J9bGURWls z*__y_Iz;8M**yOGgfayeW&miGBj`<_+RkBYM_tX*NEGFxk~ zBFOa*L#n}A9#7oj>r5X8R-?5?t-z&gKLW>2&ksNGmZ%^$2RM#Xqh0cE@Y*$L4Z^b| z-Kvhrullr#_`(EFo5oIy=iOkUYBi-xHYxH#Ng01SAIg@d=3?!SBP+42*sv+aW&_sR zf`|{EO7<*$_%TkKTk1_DoORnHl4C4X@%ta*naqQgf!p6H6I9yNv$J%Ay4Ze@c;9^< zx0PyGs(qO_oKzihQJ`ji*-OHQ(J=!dRe3%%~QcCe@c??yM0ocl9+Ov(x@SBc3LAt2_HKPA=P&Z2;W%KDy?Iv6DENEy{sID zEl5{Up`OANVDw2SovB#dXba`|%gq>48;Cd8=D6#nNTpO>AOjJZYVZ%2P1jwU!#L9O z#o_j^kKMsw^Dkz;dgfGl`q_%YM)>Wi9W79%ju6=$sx(>eS$Rp=P8BM+S_VGus=t638d$IKMFZ_K{EG zhu3MN3k#q!ig2v4h~K%I)kmVU#SC@6Z^2&{ka2 z1>!R}<;4<9#F}L1LNPlhW2dqnXIU8eFgl{?;ThItA}jRD&ls_UAS(zkkr7JKc-6C=%`v=N?%mqLPnOV)Z1h__-&Z9~ZB82bJ5Uqv}pPuw!1BEF%G8 zK;;ZHwq?$V}Iy(pIuPO34AvTpM2#E0UB91ztf^_ZXwbpyNJDP3a1j1ytr)!6 z;N8_{@y*PaE0gvW`A&$oZUVpqakLb#FWZU-4R5X0JwO@+h8(k>=no~++phX}Mr>P1 z{0h9P0+qz1Hq1)!y2zY3RVyqu`9ko2&q@j32#!(vjY6=8r6;oN*k(3S5Ce>W`aELw z5EV*%HLxemLc;D{)A<$fdypxl7i9r0Mw&P`oztm*pBkKTVdtxjn#zk-z!;u?2xEM5 zzV2x%rr!523gp`8cqwAKzg+oT(L5S1%dqU^(zN6i!d#!4#4R7+>xLu=%$&S<@2IfHg5%=$) zKHma_VQ7|8zT&Pz3Ul^V-J_3C8uF1oIH58(Rf^QJ{}c5dDWIK^bwsR>;4~2CEJByq zxr{$%glSGAf#1l_<(&G9$R8w4g|PJE{JsRYKqr(F2Z?8NME*J)y?pK+hm zn}HgKoJs}(w%GJk7&M*@7zeWjkV3^&Pr&JO{Og7bcqVCdtp5hABX!Dh0eGilmx)^u zX10^Sq>JaByXC~BH7iTkTUML&%1%o zn$H=rjHZN?87pM|-TW~%aGrJ^5hKd|$#*TM%y=u^G*UMQ5*{|$muiHj_QEz2xkdfp zq+a<0WJoJAeFm6bLz}?O`N`yYn4va>)w=#V*u6U>UKUlTJ`J;N9LMsq+VC2|nuJHE zhT8wStpDJeZKd;aPeyYVkGg`|zNJKnbhTWFg51u*i!}g~#PCQmc z$t<*K_|TN&o1N89`x7IEjL`)WWw6q20uB>#Kl*Wi|I;I1{y|$48gra|*xnYr1KTJ>r z+0WyM-t!fSn}6c4Z1=fzX8C4&ZhY~VvROyf_r;R`XscF$9-0^+-AX<=0SnZrPKc`J z0&l(1!twBoU(#I-ZKDLqthZ8M@+<^|@zWEW-ws)#AfWC#>3?dvo zWf79scA+Y`qGKF>Bjw>V;MtOxg!2j5A;C&` z*ixJFjiRrD)ot$1T*5$J1-<)2vlci%vd?u^3xi_y{Gm)Z6m4Lv9C3!N>;$D)1~21T%XX zO2n>r%;zlkCCzS}*>rC&eU>5ftYRst8aM%OXaUjwNE5ZtlKu?u=pB)IRCkh;4y|aHQu;C9610&uxS-I8f^4<(Wvf zh~3APxgR#)1&@s1%rJ9NQH=0vfW%3Ir6ePNvTahn{wG#~Z4Pi`XI69pn|Xvfu<*S{ zm$Fe8vo-$%B`Z6cL2g6ZCI-If&(?XZ!+-c#Db{L4Lo{_gj)us8SkEh%wCD}{vNDCa zsAgF%kFiZ3d-gEEgiu~F#LG4;w8lKPDvSzH`McyvRSnG>Is9AArO^z662Ea+4w4{_ zxI@C2*b@5G&&1vfF=OX6Xc^VU=pKF5jew%~y{V6k``B2~5!NH>|V zr8cMtxGz9KmzW#sIsb?OUnz za#k-a-o4T2v%Iv*rkHlswULMzYIb?&-h_#{hPpHpiqn`#;JRXQtlZv7pc{@=NuwM~ zhOBAL_#Au0Os4@UI>*xV*{>S1i1SRq#*fS!g743k(w*s>^-vp=Cj0Ek%1!S{V7Bm$ z#4;Klp(uf~hvmd8{m^0_ygQw|219=OMeH+O4Ew(v`Dn&*sxKl%vp-^ z?UbP0ijix1n>~{y6PqBX?rcPFqihohG6s_A=|KYuw2-%_a50y!K*Zyv&yBFYED2mU zVe^$Bfy@DDOhTD)wSmo*Gk7zAPGX#buuB|nSM!d;p}FL*(* zyF}|21U7DMlwi_kybY%$YHKE%l#sMsajQY2E!N|6XRWE~d&dg_>+s&>!ZcU;*bC85 z=!T5hgkoCyq6GKiww}S4)!)J;9CRyZ7u}nbkP3^7g8y_v8XX{@`-eUEAeWkBemE)n=uDL!WfG`!vS zi~@;EKeK-V5ELhyY@qzr$dcgTb2zLy_(Q@Mwvg$@A!FlSkk!jYa>55Cx*HKU1zvoq z!Szu6LjpIv)F%i97G)lTcLrcU%kTUmA!@vPfzMY#+8GoeSUxTKPQ9PnJ>Z$G5XCq? zuvYFPu`{GqcLVbwXY!8$q#W#8Nx+@zt&JK#rZP&#W%B>c&EcF8!R@ZSx0(@hTE}8@ zZyCd9BrQTa8b)ejH=mA#sO~itxuJSg%k^F6>_$KB`R1DCj088aGd9hoM0UK6>SKr7 zShh@oJerlTo4D5O`ICL`-idUjmql>=`D0vQ#doSxOnqE;&}BP4eEMh`;xP3dXQ4ce zj8|qmvU#l3xdr)b$LP+e9AmzHrqi_3BjmzcAoWqWu>_QNyNg>PFW>G56?aoO>-c|^ z`$*w8UDj-HT$C|;nb1ji^N@za2f9PCwFKU2g%FR{l;$C&ER~os1Jn)Pvx9YrS6U2u zMyBoO-SXc;z9xef&`CZSL3-eM!mB2NaoN-4(w|dAsIS#*QvK3Sk~+Q$>#ZA_VP4rH z_3APwS?CcZVVrK)xEW{E3x;1o{mKs9dOMaMW~D~|v!yoH9OGl#J#H`uTw0*L@d@RD z9BPYXi#R0%NFIA93p8@2qZkIN@WDkO?W))2T`V$aD#u0KfotzkCwqNhT8YKeXL;~X z6VNpb=8VX@(rI3hn;0Nfzb9LeEPYRO@d@?=y`K(rkf?+AjnM*`09H_B zGyb(wlz=dzB;!B?-SGSU!Rz*m9R>M8a*|ZLEoE9ruErCYnQk=ZzW{#k$^oWFe_^iS zQjHiHb@~yrq^39)0DGy11}?F-#%kfU;C9&@R0J?BeZt2}M%05pZt^A$O_i1;%pf_S z*ko$W5q%MTjBDj#^HpA&O8m%R$=oo^U7eXkp(Hv5Jxtf8# z%1zp!u8U08jY9QnJ5xdNzJR8$;VRZnJEwv0rMqv-lLeTz5d$|Q>cfUz8izlXW>xRA zJ*`tR=$M57rQL%U|4a`c@{R0WB<`&cF#l|{GW_EaJV#gf2EH8R_!GOf-r43XC}jA6 z&)Jxob>DrITTZ@~RL@I}9*#!VPY~*!nIM4wBlVViZ+tbs8oZu+0H*vU?^MfYLi+5w7 zt$@`GI@h`vxqbe`b&a|$K0IXg+0}i5L<}y4&DpeXogGyFDvsjWqZ$i*%PY#jG|RL` z2Kw8r5Bi^Lh5hNw<;c4W{;u#_mhyUBqM*vESnG}lR`>MC&1Q&! z{hIrD4c0uM{cPuWQG$E8p{)%Fk_d!Kbm}dBIOyC7`VZ}1q@AcFb7rnwn!qLD@%O6i zfv}y<9lAeo*5P?25=hEi$^f7|183j$+EtOtdFo~ju!J<&W-L(?nKa z4v>v2y^O28>cSjJMH!Q7vm0u;2cfz~@f}7X@~P?N)uBGIsJh}(SZUD$0{Gu2?-l7W zi7_O|7b7sRoz`I_)P?p?UBtv+amum9{~joBP0gMbgM;8Wp+YpchnOuyH* zf=5ZKjK;l>Q>XM*uNA(0qd$J<4j5kNCW(s^`@ioQ*l})aRqT{H!mg7(JHM#L4|Ubr zmo!q3IYXrP>foV_Tq3X$vVmZ65QW;I;E=xyno7@Kn>&V-=N#3 z>tdr*c3D-~WAoQ1VAbd`u&CmvDNO5?VJdC!91amruxQhUDy4@P6Qug*+eTj(0eI~j zpO0IqX+>3XKVeWtMY1S^O)UQDW-x&Yhow;JT0b$$aei-GtsqXgyp55HdcOL;G!cM} zq3!creI#(ssI*IEQR^-x0&b1Py(GzG&L`q|)O+9rNngG{&q!j9%FFE9$`Go;E_`Ol z@6CuC`t;eGQF%nZTk;rtTTX9jewp%|bA+!Essz2ZXS4+ixK6Es~)&5cMM_RTSDBDkOFQt+XgSH?&E$toE&xCEM>Ng3NiF|7VTXCmC)0+hhIDM zjB;G1^=VD>W$>DhdZU%bf$0S8BU5?sq*QWys#?JYYZT7cw*V$(0HkNx((vHmA7!FD z*iHf~^quFRx~+d)FhND8Uy2UJ$bXxNjxshbE+{AHHuB*6<&ZdWmm^YoqVn@qN)375 z1e`6+Cc5jFVtgb*X_!h!_RvKy*QYAkVVBWo9zJ#BQuFqlPWuT2s8Y zE`mlJz!}pzOumd;dV9N(RN(pH@XDFVV6F5*=9Bw_S;Ro%$Gh`%T_lMQ!qQ&SqRshh9yl!hKd`z~@45 z2SXAV@}>2aV!^KNZfCrQPzX+=2gM@A;%BBnMXz=Y*`qT+^v*2x- zuo=Gg@KDo`WMVS0Oze1v*}~K{_jR0JVMA8&<}E`17um}gHo;12pym3w=tYRM-8#j9bs@wQt)o*oKHsy3y z-g|KrFRCX?(A%zr`({x#?Y5F z`Gpejva&seD!2u^o$^-E8Y%SN-;VL5l31%R83+DidPg{$@BQ>WY-n59c)rFrkifML21OEm5 zvs5G07$`3iaF*mlP-8ggzjzZJtNGep6j5DHucqkI&hZ6pI;-jnf_MT5J~@$#Q6Z=89a1_F6fAe9hvFEtnGD2YCdE1Gr_R0=EFME39AV&n{BT_7GcSl%nNx!PTf z3~NaZr=9z0C!g&c>`oO-aQ;)ckDc|b!uv|l$>f_Y!2_~f|MCG5%(v&V&zg^9l-^GalU7i8gVj~KK3}pd z-ZgQUM>J6c{?_M4CznDxY2m@*3%3|6~S}3l9G$91e#6(;X}gkAN&R zLz!LMJYQqn9t9J%|L;M3T|mU~qgYg(@fwD=ju}O}&pj z;5=rQkK&Um3v++`IFU0i9vSz`*B2ksDREtCza#7?39Meimg=!dY=uJ<+IZzWesU-R zqFr%JoN(`t;Tx@NI<}9>oou6!cP*R)Ohi-XsU}%Uqy;dFTs0y^wQ1eksm4#7rblf! zos?akA;oL2)*PIkk}b?eSDF#&@GUZl@NU$m8$id(`FqP?7}w0h+1H_U%wy2mLPtDI z1~yY7pKtO$8>kIpjJ&hKlq5mp{)cw;tiiP+wTU1Q=3oIAO6F(wKDXmC+*J&%!QpQg zF?s{4jULauhwz4{#}aXPDp}wdQxxDPRXn^2Nu9NsLJt)Kw~xiEgCVd!IvEt929DJB zdNP^vksS|@I3Df9b@alKw{|Qm;#{0|84K8iB31~**j+aDh*Hg1jeX)|vFBsR$!=q@ z=4*#`b)hSzV|Mfb+ z(uHu_WySq_iRENYdxs8Dv0tNcBSW=~M5hMh2#WOhM*<-Gc)`;Zu$fWV4AMQx#@nUN z2B+mY{}0=Qtt}4|Pe5iPes|_4P^IEwaKoiS-=60_ndJ|=BQ29TKm4Tpck zn#M!LFd5{LTa>aAW|JW z{FXy#VY~}QHTfPzuGoA~f=&H;an$Ygy+_1hi3KwtAr@@_KaD8#*Sr&{4Ktt#G#E;D zYq&XV;Fe9BK4<{h=Q z0gtb{)avz&DR#?lM3qTI%ILd1cRqO+9p6)BQQS)KkCvm~nQ7VL7+`1b@ z%G{=*Ohd%!SdO8{K-~3`P$}Zr+BhUiK;Q!BmNHUN2Xm7r!@gU&3z~#{JFEx3E6y4j7TPW-ZK#UI+%63R7L& zb{dI<_v9r$MicCX0~9ow$FXY1vR65qVc;p@#3~uMT@VKF z`C}E^Z?9+oxgTa%kBqWZXjgeBG8UI(b%J_=)w#eZJoQa_^2e&gY_!~V{X4WdMjf~g zvA~*BC|!547j{QF{BBO|UvAF!aLIvN(Ol3qp`2H`F?L?!-Uy1$%)dES37PeXDmS&-9}1wVv;LS>RvPBy7uH&PQk61@n<%H8}#+NZXxCn=ly6JnB6rE%Xdp z#3X+C%pH3rVa@g+CS_XfY$+qIW_WL17#3`!R1$CvM^p91LK-#hA1S~tln_;SdSE+o zk6@DLV&4TwwrmErrcqK=C@Hg_=8NNS*4b*vDyz(N5)uX(bnv5TSzuhl8`s5*I6q;n ziyyF>5K2k5&2*a`H-D)QnF9y2!RQUl$vk52VBQ%*nL|wvL)-1e;#yA6Z+4qE!5~$9 zm%hfGtA3NI6W{nM@2~or$sAzQE1Qlc<;9i;s-{j#kv13OSp@whSL%(^eh>#9w9xW#fKB&IqQNyVM=kzP5IQ__SGGqPs%vBcY67Gs^%l~N74O?)n#5S zyX32d;t|}X?t>IxcAC|Pt!C`(5XHKh{sFFY=#5Za7{k?EA1?MAiJLdOv!TY$Z96Q0n%KjI1HjqXxul5!wcx(vCpNcJO(?l{N@2P)$ zo%;To2`*mqP0}z+`>TfFu@i(MFpA!-CrW0z&J5AF|4ns#cLBQ_KQi{K86OU7?+7^0 z1L`E%T9Pg48GJ*fNJDTJxvYL7=qwYw}%e-grH%#I;JeG95*yDkw^bH()r+*qy<)z zx={mB&|>HGki{nv1QVu?FU(54%hLa*N=Z$A{y{2BDXpchn{qA4Vh^xPBL9RrgoYN; zC>TG$woJ?tGOf>w{se(8A?`v6rf-7{3|p{Q-k@ppX!x0Yl02VRlA}rjTwjTkd6(M8 zEDAbS!OwR`!RJE`48Q>YXB)8Xzin?4VR-!ok+RUD99f7Xg!i5|K>V$O=)-3vL$D05 zugzx^p|pgyG2Msyuu@DR!#u0+!qAVq(hBYe93Te*f6vh1=Wb00y{*unz*e`m)e{Hd ztY&9|Wwl@E3IJAS?0&I6^}RUY7iUcYPy+GSEkj!P5&2DEC)_nrPpf(O4nHBH+QPD! z6inOcH5BHw0`LBxfGYNKHw|lWKyaK)p+xC7SpQL>#66++VDB6_8* z@DP<>rmUp^SmKdsrBf&XAo(NNZK66@c5KOWYh_Pz*59+_qzc>2&b3T)S22s#%m_z} zVG<`Zt@U=K{6-nLUkzu0+5In&B_Uo4-N^aptpbDmZ5`p-gyo&GWM_{rTeYanP3x-& zO>nF6K^eM2u5e!kSGTScX1!oM_lRwJ_e3>{ApcBx)(Dy-3x)#-G&;WCWCfj-R(-Lt z4EYeG4@1f)@QS|dbcY82t$t2dkBKfd2X#*;)JUP%Y98e`z7!`ZUh)i?X+L8Kh*xVE zq*~pbkdpY4-`$1>2oD5wFeHpJ{}4y))T1>}pgHY98}ssjtp6iU6+HI$soPCpeI6)4_88hX`o)u*+<=xJ(D7MvvbQl6eJ%0j7>1I%AfyETA*93N zLO7%OwhVDh^TLxY6RunBUQb%#gkyzbQ{nzQ7F|;w!qKHIK$>K2;ukzO>&3pn{$AlhMjcyfA=pM)|-czQv3tM0~FA{pSA6FaE zl0=dVIm=VOn1!vb0qnb=%mGJDJG(Ar0!cM0-h9q)bpK1OV59TCY{~NoTz@x^tS|y;QpxTakXYvaK4F#cAZgM)oFB3juHC zFT+7+J99`q{+WBVjXPF4x0R%JAPNUk?RUdDXcPbtE1x`_u;mVF%Ce#8d)(joWupwg zRR|4Ax|GNh=^m}k%gOVhY5rH==3YbW=r1inZ z1S}BN|Dz5S=x1EjtVNm&qlMiR4r3s#fi!8bwfzh_%uSIlzydbmt)_+X?t)CtuS$ex zpC%9eLjYx1b>BVW3}6BARm9)RN%H{(NgF}X?+i6Q1Q$g z`@(XZgn(skZoXy!(8CwF;KzAVK}vNb~8rOw95*#?XWB!ryq{{ zn!;^(DW>Fk+r#a%X>E;o=XSa*`yKMg!v zya7vhv*C@)wmK1^IF@-~Sz%+pHa8<;owv;W~q4w`;)z6LA@cH z*AHP)&IZiAe75eCZVZYl9HvPgn~`ma9%^Rb9)~Ibdnzaorg=(N^9r}hfOP$=O^8BS zW9ut&m4BvLU~`ABz`LjyW@^o{vsWFT=1xmH4eW!+1l$ga6Gk_}r;OIkB?_acWESjoFhvj2#z;v$gXD`+6aR4V2yot{N5-A;V_&`Ob(xt zz0jb8Dk4MWAf^c?2bgQ2?PW*$zhUp4fDP+>a?A^GTB}SAjFJle#$X>mHW@KC+Po)Hv4+(Bd5%@@Q+mT0D-&uNSw5I`$5Fh0J ziyR~7AJiN3$2srO#xSF4m~nxBOx(>abeAzT4PaIGn$an^aQCd402{gQ*99wi=8Kcl zeM{Q_F(jt6Vi$sHQuhnqg&GPK0KGZKx!>BiA@|L)$jym~24fPmBH117CLUb548{N& z#db^c8oH>KrZUtH&OZ#&Z_NH6;3L4P9nUhW<6t*N&Uz24_0#&MJ^!i#Zv1(Sl$9w> zy(=-THKa%%EKqJR=5g~E#%o3Cfxk?j!qz2A6xVJh?dwb{pE6w=MyR5hk0@Wm2MlH(lr<6Md^v4d8md3f61mq+- zn++o;SArX*oNUMo@gXq;xnS_9>J^zMbWNm7;T@tccOVd*M!9l*0m5r>gz3?r>uXg- zT@^0~?j{TTtN$A%k9-2MB;RCO6-R}fK?Y~ZYO%uH-o{pWVq)PG1TYP@5z^X%6N-q$ z37b*JnTEIHJC;3Lbs{=8(#0k_@q}^vUK_Cmg(=->-8t~Lj!3t4FskTp_&prQM;qa% zg7C1_UB8EN?E6GqM)dImK$wC9!`ol|&embjyoSu5F zHsLMS#%Fs@(&3}bT^@#ExMXdW560bZQ zq}E17`&Sl^>t)Sf)cnhE!y!^H@H(-;>nC<2;cgMWPT%|y#lP`LbVh%E9WHr}01~G2 z(ltk5Ko}|fK!qTHSTTv2#h;>KN*!=BUNn?8OU1fCT*8N~)^9sAHR69=8Y2+=ul0iG zdNNsZhf$e70?#ibXNhs)wBF11pRSk{6X}Z(=;9lE!mUhacb+hF|BLZ2kWc$6J_|2- zi?*t{H6DL(znN?0Cyntqn~nL~1|8RIygqsqhB9NoSX zXeHlGl;@hhyMKAIdC@h9u(|_rAi|J#GY&B>W^WeeGX^vwkXpumcOfO%9 zl_`pn1rN^!id!Av_NJL|l*UjKs-uy8cd-yZ97OPVoK@7P7ZWd=Q?xPA$l?jIJ%AQx z?uAKXtZog+Sy&BB)y_nm*F2)!pDXZ0&S{ZLhCY3IxyQ`KBx0tDm}cZ3XaV8!_i4!L zcr$9;lOjKs*iIzWcz^|Y9Ojg862ZaQ_TQ-33S&z(!C;r>0Aw?aYS;Cq&qVg=KI+-%$auH}T#ZZCgn@1xd{?9k$Qmb4*q@FpUm z32Q0H$|Np*GwT-XVvAr$ry}RBozZL#b2Eegme1pslIkj{Rk7gw1C{YBQ%5*HypM>O zc($Ivcl)*v67DiA{E$uFgKgZTTnLq(=`3bnS>f66;Sij3KW#DGJu%BP`-+3bzL*)SR-Yt3^ywd-vikg-wT#h8sJZ9$P#} zApo4bSAT+>C`md&DZ9qbV}lJX4sQh(feZ6E3RamLDk!vVgF&@6+#dPil`l+Z$CPx% zcB*Sylp}`gvh3SFa z*jo_|PRQu$C7JiQLw$27?zFQ2JeEYtl$$ZKT?TH{i5CCzVcd})!>?^>mY7{HM5StYWs^Pb?32*yE zfvhB2J&7-?ud??rm*u>wrfMlh1lt*IUrSm%QO*P5N2l`e|IY`P_*jFkw$ML301teZ zfuL9z%lu3|RV&E}i2vZFbeyn1ZX|(iw*bP8YDXk0gOjsRHq#VRsTo(#xo_8>ICRh#7+heG7N3J3N%UTTQYG|B@7nz$UOF0Qtopw@to zT+d<*@4qC+CTQiPdcg?9A{*1>4!>cX8TW!i__eJb`)hrSK?~{bd(av8OvJDdP4w3Q z?{_S(1T#-xlfO<0`J?RMmWLP-peq4*->1|9_`&mwQzpa=yWc=&3Wm>}i&}G!;T9v|SXq-fTWbC3*PDh}rFhu@j@nho13AnE?oa3NGF?O+ zMQ9m>)0@Y&zzW5kO_c9xovL4;hZ(DF7xOMKhv{fH@Nki$`P=Bf;;qg+A8NN9l=}_n zO2PcWT@RnxPUK)_Uhs5KW7Vy8miO6W5IbYx!5D@AOczrE&E^s}>su}Lc?giOui_D2 z;9%(E?;OtjyZ&=3Nap-t)szPDH8j2=x;U0wKLBV1z3y@Tr02iE8~n6JB+{Eu%{c0R zpcQ=vG^FNB10qq0Xf|-R{{9tx16=k_^lyGrPD?Mgo$erHZyW3*)7fZOjH*QKb1;@o zV6Fij@bRg+??16swm+QB>ruoh(cR^KqDaY^0c*q=mm)*s#dHn}6nm$#&Z?Hsiy9az zU3XP1^JMTo>@{P}Tz<@g)|1dG3J@fGh7)vo>qIx8*cZ{-RWB0O88#xlUwm3&rNT2m zXzhlD42+OQDl)OaWdbZnl#WT>d^o?Saligc)`LAmcF5K`=H^vnPL7xVW%{Z3cQ@a! ztg&-ssW>vpxg!@*uoxQ#0L#a4XaMaB)3uErm;H=H=)rS)1Ptr@13xMr@8&9?!yjyIh7f$QhsHr_*HYJzdal3M7DKJ3?=WeEi^i&5Z_6L^ z^;|-t$R<QMqE136vShBWIsFU;=UdVSs3 z16Xl)*8>b0+XWe*uVJ%80L1QkP((Ra(62!u@2Z+1mYXBd6AW|bcoGXD%!n{Y}7>_k6l|U=XrVhMd|Kr z5%!0$Irj@8NhyLaek}3yu_vVEXClY<1C~}*OQ^)g+HOQ#9Va`hv#l+WJ7N9hrr=5g z@1sAq#@@}d6CVTl$Q_|kPvHnQQ)$e7F%=RE>sarBlks0};+OT<+rb+~O4?BC`rIL)s-GSbaA@?kc+k+d4w?j-AD8;_|4a}@@E2YWNxs@U& z%6+>lCyuM9Y`mE)Oc`Pvh57d;Hty04)R$3$>e~arn&y%4W~Y` zwHGpZ_z_63w(!jS8liegzx|y2`0n8h`qho-TAXlt;PZp2(E~Y8h=aDBW0JHaDyOxK z3Yb<2l-_FbQZ_qXLE!JL-!#OVpDSI4)n;1Eof;2J074v+a;bRI=`XQED^eb!_2Fne zFzbqoj2S80!%n4TQuSVnN&47wHC{}pVzEnvv6wr&r_n$E4GL^X3>&{gB4c z%vX8XxB^H!xyMdr1au{Ns`2MLlt=6L?PtBluvHco2z5WDau&zFH&ba!Kt_tnBJbeb z!P&{Xwsa6#tPC?-BH<2z>8dkU&i}N;Qnjr9x^H3+E6BPstsYvUAs!$>ew|}OnVOnJ zMA#WoAdFfW%~3-aeo`;FbQ!%5nAaEsCZhO9^&y+ey}W4n-3(yDstb*N*_y);C{I5e z-#LAPrn^;Y@KE6L825}96hA>{Yvx)lF;*AERwuRL*I6ZBpbWBXzVf^V4@M}rcxpm9 zI>r~*Nx<&z^?skb*p~j-G6#@R%6}*#{|{XM%C4pY%f>z4y@3fXTO+wip~|qCz+Bu` zmwik*r}Y)|Nx6v^s#1(Zfr8I~;3{X6?_}JZrE=aagrxW^1o~bNqF@k*N=&|1 z>`0HCtZo&6#@7kw6)JfA>9W*mnVY`_w-EE}>UZIi|$wo=_~&TNDBd7Ie`Qilic zQ%X5U(J5%BJSYMV`23x#zu@dukEdX@J!M+r(BEr@A%ts-{Qms#P#4g0O64aw+R(Xs zaPk(EvyMlyZ`4Ivgv$-+Y zPv1IQ4g?+^DNL2q5cae3cn-^k6vriTcV}I@5+I~gWhm=Cyz3|(f3Y~9#JAQDCWyQS z`Z7`4L@rWDUeAD>VG9D@Wt+!To!6p=L)REcsq)+q5PxiYV)w%oW*>Eb;HwC|L;^w-^Se}7CL!zABZ-OafPz$v zIw7V1qL+C=e7BqL=C0N5EOk5sGTo>#-3h{O1V5n0O)^UcP0B_{kZbYuW%nzca0&O1 zRw#ep4rvy@@~38tL0cho-bw55O-K2%%|g+gdI1yVio=$=hAc9aU@S5%0l;x-#L6Nu zBn)x8#x>UW`0lT<)e}>MF+4)1{RpaCu z{O~&+zVR4=pz+_`-A$h_djg==OUt&dH#4}Ew9?`RxCWyR<7Vj!RiPfHR8HllH*i!ZB1oSgu zTM6Fk&I^RQLu)Fl8JdDJCG%*%P1d+}htmVflBVjldIKax6+)r%8(T*8)rYYUY z_OJc5`Y!mwDGsPA5xsG3SB;4BE`{rQ?Y3Z|kp|n&fSEIMc8uICQAU``m~h>DjmA8+ zpvEeEUq|(X|0fBD&3DL$Np{e==h9EJdVyp}II$+EIbPOB$vg=VjUkEsYZM58qvHzd zC^GTAi@gs4;U$R(OXhWVvX|5vQS`%?DG!-|%nlFbVP79Pjw8|l1$-|ICQTwGeg>cq z#URTvzr!lib*5!x`?+IkwUdkg+kej`Ou7v-2G}oN^@yKu%+aaTvyk>kzN!*}3$`uv@_6(nNgCeuTsP@{>a)e;BZ6@3U#4h7kNf32|LK|&#>67PT z*)9voJqp@Y%Q{AHh&H9G6fUpyji-YDb$v@|wwKE`;`Ch~_%QJd;a6cUE)|e178DhN zd2i2^_?#kJlzz%~r2b=Ve2W?%s7DVc4VO!_&M*q~z;RM_9Q$5kOm^#ct+k>1aG}l} z8*t9kWR1VYDC@F__}#n_s88d-rOVv99`d_2)9E99DnAwFo1>pAF^nWk&#}RAD^H)b zDBZ6i=sV#^)obvI@D^FeN=zI41#LIAYdu1!c-W49-+%8@@FuQWfAk5V)8eFH6Ewh~ z&DdA9(~2$L-aB*?_^-M8YruWy2+;pcapo0%t~qVr|WiHATr5pY=4hiW!DD zU1~><`dC`58=>O?VJOWHQ_C~E`_ajF_0j`=pfv}X?Gj*niNi=F5I#j2eau)x_QP#V zO;xAyhk?(kn2T6~Q?5B@PLay$+i;+@(#_XaRV+|Pd&xgZfQ$l($e8v4g6*<$$?*O@!d*RW_q$PVufw%SDFGCjx zRYKkerOn3WKaxcB5i^D-RanBxO7F?mcw53%xG;cR;&5KQ4bKG@Y#Y#%c+s$KZ=j)^ zM7@*Ejv~H^(V0aNY${k(3;&*@x~?4Ry0a${U2!*r9pusvsheIr`367Wf?1xxoYs%v z7J}(_zFzZhM0hQ)RQ-Ke`21lo?1EFQr!fE(vvAca2hSzM&Zp|ke_z{0=QObDJ-Sb79_Lc`+v?xb#NykTx2yACO3z`}S zxnX%keMzzBDp}Rr_oeCMZ2yO`IRx<_9vg{^0oELRppI-}3-e=soSr+=?UNZ~~sytvHJt5~b86^+hTD39OL@zjWe9HXHuwsr@wntDbO$L%~Fk{&@dl z(-U0{LKHGFy0r8_3~dactBdCY1_iJyzTV}Cbh)v%Q65dK-Ryad0mB+afP)M7u>U3UjDwYck6Na{xq>nvpw9SGkw(YqMpVY88DCJr zn2!^;p2dN6fiq_5ppM4N)o@8vP-Pt-(H;nc40{5(LJVB04&eP)l$q9S= zOK&Wu4M$OGmWag$DL>s7!2ej)XmQ9v606-^k1szZFM{ZpP?4)nIy(wHv#Ab~^BJXG zwz94^GEI2;wu1OvE5To!F>LeoA-G0$%yonzYk_tZYL!$8!ZTLaylLa4)XwGghlptd}c_7#5r`y#?I9k{l!+O97Oz^~81@rsl` zC7fpI7S%819W|e`bhY|}nj13@s69H2!snQJXefXR^&Y0O^-=7T*5cv{FRdMA+dwPk zi!{l0dRw6!HaaOlJs3bFow;j%~uc%Eu5vB#Q_hm zez&BF5Bfc{j_CKhy9coS;S;WSk=Jl6Yr6Y zF~w*gQ;r>t$Gy7AXPM;EoP3e#fG|Q%Awu#KV}tL+L7xHXb1M`t^1xg{puuuDCngMR z95@I)Hk2mY5rvRpKL$J$Xx)$0k*v;&Faq(&WD}aTR#!L?UIjQk^ni!JslMVdEV}#J z@J=cs{h{5@UZYGq$%RA-AVK|%OHlY5@;dQ8$^bbdfX1K2bsvn(XKCn1wQ026D`D8u z9#0_Zd>?7Llq~&9ns%gMfU;`A?V|OaA0))X9}jgr8|-g-LMJHP>mLJa#Tnxw6h{lF z+g51^<^>NX-^bc9FHw^GZ{)Po#VoboJ^V9FE4t~P7VsrZg+&6>ym`6ap3wWa*8I)! zL(0&o4NHV%NsFTt6sELn7KdUu?eGp|wCKe2N;JVaI;)%E49li(%9Ns)tR>_&MS4h| z{Z~lb?@e3P50r2(%MyicI=ukR4n@I&>x`%Td(0CdCV9zz2Z`fCByRldwl^nSm{vpdf z5fZP==7pA)flaGJgnzvPcEFCsF!oI@pjmw@9?3IX&=G?+cL z^vM?wp<2%)ZKaqEfCAoIA@Z<9QHdKX79bG%Su6_$!v5gVIxThR-A33sEYcW_p1v#I4}RN zY^e=2W~VC`y*VM5zknzpwCigtQNBL4_JjzHbI|Y#;GRiWm!*fW5^;@@+XPCQ%d@0vepmQk9CwGp_{HXIQ$_=V;oP-7tGUg^-A`Qbs7 zFF5?44yZukXBRmjkUZTXSQ|VAxuiSC=u|UL$BU&1^GRM@1|yvR84oTI-x# zQ$gc3pj<<9j$yJUx8?n^i1@PUxl$Iwd$kE8&57>8B;XO=7o@@K1QR%idx-6UYQ(OZ zEnRPAX!%2hX%&pWd|B6cb*0xlq*4TAv}{jTwXb__>qZ?hr}IBS*})B~D#?(ZRdhqr zilOekJz7V2b0}*R9$TCHaR027Yg)V3sZwdu*5Q~MfGnS{x_>E|znI>o6$)9(T#xam zj++^)vA>ZJ`iuu}`P5{vbTM;0t%6UbUwfmMv9%6Y@#$$>+QEZz7Hr1e?6 z#K?N+a6ZFn`P}s8_IOyh6R7+KnvQ=*YQRjCO8*Eppg_Xq+aZntB~M|`ikd}2Kqv5* zxson}xuaAzQ`?lK5??tr5I_C0!)KI5d|IP-HDoaj;w# zJ4f2zz&&IXy`nj(9;i_Ns)3~(R^FFT6&V7bB@H~CX;FQB?w?uLBT+gXO@+rdR5-bj#M_x^Lu|7Ge4$GxR}GY`~2?F<4jz$(Z<|E z4LVx4Pg&hEtiVg${H&n?-VP9Xg^n2Fn_Xz>a;e|Yeo1(h*iMqD!79T4WpZlvq*Yd@ z$zaE5wV9_qm!59d5an~lJAH&)DeU$dF2R=h4Nb0utK!cLW>Z!&PRU;^L-?GCV?Mwt zX3{hXn8=M|_!Ss@&-h(6W@>AR9!hwPw!3=Ah-Nr$uR-wX`rIaIL>M`p#KeG8RkQ!x{PUNFM)6}B`MkpR zk(&~s(rNOZC1Cr_JFSNI_|m+CHJ>ra3fZ8tXLjulh3kWrNj-%d4ji=o4}Sc%tELjL zt(h)KQ5EJDeXvRS?tH)Pu<4V)UrCF6)dYmD>Bhr$hX%oxKNmZ79uEp_sRTpTZ?Tai z;cbUGviQ%8mVHbH^d$Yyz37CeJq`Z%UR;Twn23$>R@+gR&0Slhpfa|ufQsA2X@ju~ zNFG879CXI8Sm$4CapmRAS*^To&8Lkv205+W)R-1f8~)mDwI}dUy*gj=2HZkZ0?L zuob-JmdSN3>C31&sK@3nMLZYHMr>1+{o8&EgY)=832GOh>Yw*WvO0?6iAueuJ4n35 zq~Y+Fn|e8;Tq9R&9z)>JSoiTFp<7wrt)%Wv=QISSpqG!F5h4?uOLe4Abi3f1vzIKY zKQYLarQ5r~4){lFui@b_8OQ{@c5CgkxdN4!N*Swhi_MZQ;&%WkoN>~wBO7L=8-7>Q z5sj6(_hWA)ng@!+evB(j{V~GA99ed6DC@qEv@UCYbmJAj$Pk+Gr1UMuf9wk#)8k$kXotrgm`(0N7y% zx)X&atf(xe{7OeltF8`t1kU7AE;dd#(;RQeO(};MsajTwY?u+Z_+w%52O3!9rH&bC zbxHH|nFN#aD+RmkgsnO=G^r>$+-xKG1`EX!<(yGU?JMrvxav_#7B<;aY2}gYu_HZU zuk5*6CTYrXg%2XUlJKt$`FONygIz}~QfWd~>N0C(PD36!p~j9Xs!vA5m>=5t%nIHj zmL?z4`joU=))1IaZ%7^iDZtq*OK7PbrrPlF2a59-(NG)_T94fvr^xZm&ehezm4s_K zxkT(E7Wq?~L#xWyveNaK*m>m7)kMG9V#viqhvfO)lC@3i$5*DLJ+C*V^-SDU|DxR|>o;Z$tNa`*WtzJA zj|i3NKm$qYN(Z<3>YCTFdFP}gSx65)i{!IgH|im)EDFmij^KkLlUb)8(O-=kzl8RO zDJUzW4_+_fQ4d&nns=O@BQ@6#WZ!>+Txcfz-s)GKlM@@12Yhj@ilYF|YCA(h2Z%UQ z1bWeq&YA6rQP0EiWesHNMH3%pn1!B_ciBmuYL5xUbK1fZRI#U--M18Zx#dx#RiR>W zbL*;Em`k_WZAu+ULjYVo<}Ri~Iy`w;o%0Otbm_N2LJ1B%!$7wHc#CMPJpViIpYJ))_xxnR@?w7H2`Ie?HeBvpr-Ncz##wyZp?*ZYM}bpM zvAk9;9~LXV-w-_HZOX$P9Mm-7?XZD`0Bi0=RXxuJk@YYX3N#$B(Jx9ZIx6Vw!FW%3 zR>KSG)ViMTZ|^2|VE*Qtd8)5U2gU9{lotZ?aW~X`T!!c}1SE|nVXA>A$Zostky1y` zmQImXz!~wIi}`tKs3@US!tK6m$%mlp*Pz&+?*cGEN^xfpM@aOzlNZX zZE@=3+Qp@6%G(5|C53rOO%~x*1&H0XrZtd7>O&R&<3vXd-W+lF5(0pi0MWee!PwAz zn6!toya~~>?CvY1xYCKO?XpcA77pc8cFzJU;(iCe`QF%^FP^RZT={Q6M^?RF_B=ySM*MHM zB3|+z8!QP@=K)8we8;p+bl{+Ti~NRkrpfHWA-0rDXa3yc zgjnZn5wj}Zue=FN1sdaXE^*hIpsXN)N~I~v2~QoBLw`+@h6*_o%qS50_wJ&t0@j$| zMrR*XDV-V077{3aEb{c8i4#e~`K83t-L0gGPLqNLS7zJF`Av3(V{t6C75pLf9NqQA zTV$%P-_}~}apm4`$?Y|>!=E}!Ntp>Rp?o7=o`1UiAr5|4FY?uaJ`sW>rl-1+DNAPQ z$nX#7=k_aS4%S2pd!d#tzQ7mJ&mBA-d5fKEQ4NghB*vRRD{;DL4mM>?=&76jrVTu- z&c;YBiU=erJ5T0bb$>QI(JR9q1Ug{HERZ20IN9v1BJ;^cxRL=USmII(6 zJ>>*ja&nmoH%p5{OL@IIem9tt?Y({-hFb(XOCLIg0cAuke`{Y`yvtkVsf`yUIZkg+1@^B=F!n5N(6bfxRVU)-uqyhwM-D1kL4h60 z_tqb!V^eP+u&vtU{feoX;PY$G`-phvm7+hV(Nc)g>otYZ{yRy*Luml{Czp&`8pVB@ zTK$hoj62UX)Ce<_ra~@z-bRB& zOL%=S&M)Ob^;Wn9g;Yv{#Yli^Jdga&G|A8h`BelQP=52oP=EV2_1RUI%jVhxkyDR* z$zmD$*|TaXnh|>j2qsQ7x7CYUEymzzNqKoMP!eILIFVv||4I^b09n5uibO^Bhzyyk z4WdW$580~i4m^D(ZhQf+7z2?r@zpOhu*0+FczKC(reEMyCGMwqD) zTh(ySj6o5hQN~zt1CMZQbyxuoeu;Us zNVXgtM0NWrtg`Muw_)5o1my$a7#Vg%2&bqvXqOLWv@A*2B?!2WKM|-rv&SH$gmxf9 z8dnnyN~NUs4){DDbn10b@+7JCa-`)CV-7ZeBV^%@)JeoGM5q-_HmlPc6yLVKQ)7|i zpXP_hYD`LT^GzUL=+e4#XY${eBHAjY9Mb4@*!LLz&{lTUF(5|^_LVF%&JG_kz8U-H zRM@z6RiPmcJVR2V#GWHs

E%gkl7Sp4gWMv4&|uZTM-lOWOHIQ+mutc`zEhe> zj~O`!4*ry{qVLNSatf0=t+dk%9l*M^#-cG~Ls$MGiYf82)@ zN6%)-&7l`8#&D7Dr!~;g_j9HLF@Hh{Jfk9+1`u@+!EYp`j8F3T?>1|n@EXWE?i!D^ zkOkX+5d`T`7c4>hdv--Fg;WUChN};hyDY`s8sCR{=On+)e{o|du+My=tO0h*#&onq+q(Bvpb+60buITZk|_OJsZ<5 zqp3Eu`tILDf*aiF(K;w1rxW{8w`@FMvU-&0GM{>VQGQsDv>c@+&-M(a?xZD{h3(3< zR~l`o0$~C^D?||el{l)Acj=L}IQFui!7NP@8xld@CiJ2Bef$;@CQYyfI8fK6g~ zMn${N46rV~q;E3^n`H%cD!O0LSfkgJrP>K6C1W+Ly5Yrag>7iwt?^dh-+HF30~7uZ zWEeR(^%t$ik_EaVZ87m~lMvFGKOK;`e6^nXL;m4K)1tZnNaW6d4Xqf`6fQ%z zAV{mwI*?P1^h(Pz)fgzuD8NuNuQ3Pq=bD{7=k}5JHNzhZU0= zjq7sRk?^TtoQB|VGBMfBR*xn*JeN4(qD9`=+JikHX7b=Z{}8?qorksOV>OGF;DatT z#ggF^G_;W@NcMZg&gf=-5G3XVKvv@5qHY*J>RoO4T#0BnZqmqCDvocZOy5x>LfYWb z=qgd|s4h?$;wJ^rgpaQ>ZX|4)-TC?!3F53qS2CE5_RK+6W~#`rIi%Y!=rbu|$1HK( z-#?O7dlw&vu^kAV>JDCwxwI!eixHF|L-vePavbo@>65UH?1(pOy>~}V58|YsK;aqN z`BZTQZsiwYxHn2$h6gS<2ZK%EdfF(lo#IfFqW2=TI*}U^YS`Lw!>5F%So4C4h=OvA zOzWCpL^d2Z+9jp`)Zws&4wZZ^PL!ASL~a~(+18uzMUZxIjHL;a#v|KCLnh0xaC`+O7U;5}etp+8!Y@3=jL&s| zzVOoMYSF%tZ?7|u8LvZqKB=8-&k(}k`P|O|IS;Lmx(qXCwIsPw{#@YgpJ}ae_8JeEIE7ETqMnfkRBrA(NSWe)eArV(S z6oM$sJwfJE7>UiEQO^1oL~Z!`MO`)3uz^QuLPDhxK3H&0WBt-RmAqGr%ae0WVSQJw zt5_U=tzGTMRjn?$pv>sCEEVS_XTY}Fyh}@OPZ6PT@|N>a+^U9s7Y+duxl7@#J#4~F zEJ`vy+EWAuNhcvPKS$Qv!naFevB+CBKkDgWV(FY3&dzPFrFI_@b@^U|MK@C#CF3*r zIqHfFC?}(rtcY>S*H*l9&;?0sRNepSKjtg}m9>p>iS=1%mK%ZR_J#gFn-c?2>K|=?QJQ^Hq4I7)ENEz;oSU9@^&+l1b1w zgDHrnOJNHO*@#8Z9s?Hwtd+Pf8`FPP6T@wySJIM%D^P?~s=yU(#O+B;V97sP;KA01 zc>U}es%kV9+YFKcNz52~#7(|RmuRCBkt_lQkLTBOq4%8-ZlH;JZE{0NJdQO4{CmUG z_~nZpNV&zdVAceXz>Oy7S5@nlO$(=QLq{f$kh$6RmZ$93pxX|d+qZdf3(TfuUr z3j7vy-{4!dpM8ro4TTq;GMBv$!^*ChaBc}wLCu# z(W)(;LOB&WCsp9M9xAKmgZ%h9M~1Cme)nvc!7w=$O!b~*UyBc&c=W35>sXQA&{M$t zJSvW-{mFsq-^uJM+Ak!a#43r3H=T=0EY=EEJXEDwVDqIhwFPdGEg~0o5?M4L>Tjtt zv3T$^wjxu6MU;P>7VpB3?M+lkVFtL{SQVfq`tWzp$x$}H&M(EVZ>xhF4#W=%h5F{6 zYDjEcWFvQv==CQ#>sB{(DueS$7tiVq&5ogfC!{q@XyF{Yn-}O)&2nlU;Y2FN&{#TH z5T%w+NHPcDLL-G&5oH#CHTWwMYM#GjV>_NHN|DV0{V4${${6)VWsIV*BdlgyeMJne zIWb^Xi8=t2F?!ps)t#G2`U>rtJD^(8bfPG&IB$T$kC$p*nkjO(-PeJNaS(GRaanqe z)zOpyE+6xyO{0A!U)}nNvQ&0_P>=+XRB)86nEc|TJRz^yu;unn7?K>{p#4%R2A#}6 zHTQo{-NcdbhM2V2x3hG!y(B2kfMg0{RuEtA_fCnjWZhfgi-3n}75gU*L`+)&VPw}_ zNc?hVV`yNX_A6NTW8vE@>`Oan7mtlDH||t+)`@vFQk^X9@juR4MgjyY7}1D*Fe|Iv zn%4^78w{}!Sps=+7p_F}R>Zf(80C}Bg~?k3i54Zb1d}YuNn{Rr$wg#T-Vp7+&TyCf zs^9Xo-L`U!0{#*gE_Pna!}1cS^VHOHUTX(Ny8E=q5g_8>rW7yenO31Mqgb!4KQgF0 zUFQq7a9;{R^PjS^n!^#v(pKndwT>&BYr_DKskhMT#e2&Sp1;M%%No7C9=w47aXW@E zRONN~4VK}GRZolShDeytb+6o{=!;3NLlU!ER713+L!3m~R#6Kph6!{VAP>%1Z_|g; zjaBblSy1}}ji>8XMa%V@)T;$9wsyVg>a*QPw_%%5ORvT+px_Tj72zi(=%f;RGf;~D zVr%Lh*2O|5Jv<;FqDd4TjbU2u12GT-YeI#?d|I^UlR3dk$GvLx7&pNT!K8%K;wf53 zl6w^Zt9=6z;k|>(DGf<&SVw{K!HgVSF72NpXs4){>a?_*_?DIR$&1x#qD0r`swM;4zTHK(>fl3LCLD^1($&e z5y75CPVAmw&>iTF(`2ri;@3wUNS~-z{j=T#`8y=JRRzQt$2G$4a+f0)&+L-H-6Uz| zR-jiz)sH)0tPZc;P#H?c7G2K4 z5sY&e7TJf-hBsXmsj(($4S1J0tivHaM2@ym2W_d$y7PGd>v0{jiwPtIw_yZ$bxsyE zBNLba=v#J8nIvgfP{#fWk2M#tNOWZx8l4~Fd&kiPmgq{Q%=@zJXn|lZT8lkc3e^Z# zTv2c3I^eB+G(cg3S`;$zV|{57^4raO5Vi~?w_1-y(rFXlwHhe)BaP$uk(%z4VR$Mb1R*UY%8vkHHWJp$K&k4Wz_(B{oifnx@ zrBrB_K;ej%91#CqoA+g!A6%Ii3(dG#ru&(;p-fr7QXkrl;03PaHo0)uh|M|&Kc;x_ zUA9s*iABgnsT5Y-|BSc?AG>G^C@-Fuv;QS8z7M!H0-MkL?9%)}9=4m+KnC*iaW`4! zEDH9YEV=(9f-c-Ci}ZI{P@WSq$CqGX5aK9Qo0-?mHKDy#>6z{ zkUtZX2ogF^R<0av6loR0fFD@($d0(AoO9=0 zgt&`5fOqFvnlYKIy$oHKYsCu7^S{)c`Ozbxk*MV|&8SO-HGy!*Gshu$@K1m*7%Gj5 zmmC!Y&CRNAMfv(v=!gagWPvTCn%;5n)$-_BCr3M*deG{KB~_qQMdoe&DE1pCO97{S z{GI@OCYG*ADV7y|mx@&U{I;c_bG@WOQO_jfwyA*{0)tI;S%zvhQDq-i zIzQ|(2Ub&Jz~#=uDo}3yi?>i>^X;h_#@AixnG50zad(a8p`XxGho0{^LfXp&O6~ZA z0IQdVsJq{H<5BynH`_ApumdZ4+s7azW-y_Eoq@r){CHWt#w{EPPk0uX-7XR%>KqCT z#Zs5tq@U8tw<7av8z*zh?q>u^?uGeFgtj|TMgBVm`fTb^ZIOwMllNm#dAg*p!J9Rz zUJMwU^%u}>{Tq4Z4eXNnv}O9^lmzPI3_F?Ar0a3b#ApV|r6V`9bykTqQfJN)>YGi@ zn6&LetfOVPXZKN~*PNS#mlPoBIQhZHxcZyD3~lqwnBE`)Z2Rq!q*QSAlu)C9MvoT! zYkgV=mbns7OGFJpj`V=uIlqeNgnV-m3&n<)cyQ=6HH( z1jcehlQi!=nWAY#bxaOw3-y0PFWVmKT9tZ3mU{JZkQT-K;ssNy@B{E!AB0mzT!gU=@F$?Qw+6Zb;krTz&1swh3Yxr}P|uOOeN z=BJc)wO1>339-IbvbsqJ9ECTA8zTtP5mgb5Boi82-D$Xz>VVgCj%t7UygXN3Oi<{xJj4# zQF{B&?gzgno*`FJg8Py$%IM5Pq#F=9lzDQn_*|q%5=~#*Z(o5M=c3+y<5cQoLv_9Y z>68-sd?CR-LCvIK4a?{dSxn-B#YLJoA=?N*Q4u5(oV91y8IC8`=MVjQe7~GXV2rH6 z?rTb9DsOwuXl`GX%o6WAk$s9uDSjH}a3CiMa~n`9;2Ax|yh*hwfMjIad8S^Y(OM>S z>_#ZIvSU6RGFvW5##Y^H`_oVOxJ3;yid(vuAQ`?Phqs1{#BE!Q^_WLOGr1v1AKx4N zAqfP@d2hqnAl*t}&B#YLK!B$H^Uc1=k?YIm%uuY&(UcAlWOlz^PX?1L_Yczu-Rg0+ zHIM#hA`Qz|hporYn!GG;=1A__-pM$|U zv6YCcnhdRHl>ORSr~Lg}eD@~JNyz8=&OwBR*Me-M&OEwbiV(kvs0U-#o7Y)#Ds>-+ z890a-oba^_2q->7Fw4CkgXSbx0P`&*6Mp$eZE!yq+}pJWMDa$a6HIJwGZcxmZ%bos z(YW_frKm~II|~tgfBnBhDP;ZbeH2Rg^yiorxn_nv-xAc0cCna5ag%WStQW(B%yqo! zApzzxh@RgqOiFFVa}WUYW&iI;R0bn%wm*eGYm~{e>7(SaR)ppD+?}u5`Rh*9kXNRqK2;oBgvTT&MIW=(xB_w@bKeciIr!k35c$-N4DPu&N#g90pX`s})yowl zcyIB^b3=oUvt%E;%_7UkrS39KPVCw^??V&zHjJ$8RbyNxF^yGCsj%gkdMQ855K+TU0f`qWZ2RAojp@2D#NUU|O!xTpCP7F!> zF+j{F2SRQ@q|f90j<*IwP^+0}jm#_X)lGuegw?$x@gR{^PAgbJSWR?@W^J5<@CQbG62m>hL(A#t_Bu@L zDh1b~&hPx^i#+(Cn@@f-waoj;%turQ40i9*v$GN028s-)PF#A!7F@2R0dyCD>hdaF zoYSZ@LLJLGS7mF|{sb6Rox)b##sG_l)8Z{&*$Kievm_gF&lxsL4SxiS!Sk`tp3xP! z8@|WxCs}zBaaZ?UJ!c+>din$+CB{^GKnk^{IiEP(IS0sv%-Y}7fh--vEmbeAwhw8_ z<6H3eB*}#TnJH@l@ZbYQ_pw`odMM+#4RW$pO!_L3cR+~@LbF&nY(^%`wfUdi)MaJ1 zY6J?yu6^%xu)4T{T6J6VoXH6E$O@aH$+|HNMH^(b&{e!~wX1 zc1>bibt2rFuoRaqF^I*%a9{Xg-0H89-WC?`%Fvc30v%CQ?UDH^A|B`fQ#I}SZ9etX= zLltn%BSTQn#>EqOeRc_Mtlx>`320SPl^?hkkom_OP&tt>e+5kzeZL^839YBM?0&gN zS=TeR%tCEL;>G!nD5l@vSLKoo@)57wm{<`o5T?@xjOueVBK6eO)^5TN|2Mgi8_T7X zC-a3m+IoxhM2z2C(K~v*xiidM)}wnL8i!IAJt>rHJiq1m<>JHZVg%v|0mNq0Z+*|E zup-&-v5d$=fG?Vejr0=clp)&@Qy{HJb$kwX%Lu}?vK^Dh%w6tmE$TM8&iVlXj|gy> z+TU!-h$=zd@8eEAzZzUhl#iH+kizf0BU<#|VzgCaqA+2tiPZ$GUdr<8*iLER?^(Nn z4Xz%3m){tP-_*;V6_YHu6{ZA>lo8c=D>e-M7$6GX)9)QY*NrXjN9N{Yuo6%KiJh-F z+&4fv6Jr*(?ZMN(?d}53weVQf8!ib@;_h|aE&8P>Zm&zjZ8si7g0%of$17*-zOA@2 zSZGzE^>MkQHvM9s(1RdAGdT^8uD3dt2otvNt4q+qSgKZRP@Se<4?T*NQjqR%8>7C+6l^=VfTPhJy%+0tTXkj-T(OQZY0LCx$B48 zS-Vc+`df?6B9lyw6gd7fiW{oj`5a7N;h$#qsbyG;0X?Wv0X>?s>IKspxha*o3k@fd zzvZQx5GnF^p99}2jn5miVQlWL2;m6i-XT{O^!M6uh2hR%L|Q^5$N^$z>GvO%Cw_b{ zxnFAhwfKvCn%=74aprVy1{~Ai^QrHzHCH*-7d&lMwq}l?nR|P@RD#?Kgi7!!-W}J= zQx>cN`xb?u^1u4VK;!Z(Dlj=cF@-}8e;rtJzBQzSuX~e;KS&Ig`fsJElI3+@04b5Z zN3`N%VM{Q8!pWQC|Iq)PmjM~cy1w~LS{PE;)7!|MM#Y$u#~&B)iT)`?Sp7qAkZ!=A z^a83TS{i<5ZeUqx1n6Jh7|=c6H&`6I5dblMp+aNFR`MmoZ<&k`#<4}%)B z?ArStFT%}VRD3XHZASq*kLZvR0J?$+TeV7eG1#x$3_e`AD3D>iV@wZfvV|FECnOp% zx30@|U*1RHR(J^3t2?C9i8?ztkH+uI9B*0e!!G>N_HGEfYH1c|*)lTFf{%?zwOaJx ztt{i3Bon&D87E!8wFdjP{a-$Vwa~ME)}8jP7KoEnfFVN0$L@!EG(p`4NxNtB9`V4- ztJ0T}-di+#yJ=-_W^!isICkWeSadv6Xpq71oc^^6Aci{J4r6ZiOb`O;R^rgR@GRBi z2x9$qmGoMN3@>87LW#?9sp@>}ES%9QT={+{)j1&=QF|Fr#g8)RhC-A3Bc5RU*%GcK z3rNDE-SA5o?`hB#EsBJb)Re7>A0`N_d-~r_A-W9c><Wh6LJx1sn97R zeAhoHtkdgxp18ezX*krwJOOWCs#5E9R7IFjEYp^v>YU=nFGPqlClO#?+6<#73PKRn zW=Avg62sYG@}PS(%~nE^-ZtTmgg#yW8b$ToOEj}$x8r+*2()>;O2~;#W)6pbOH>N^ zIHo`U^S=U|VO*)X0?eNvd3VJA0<(ZnJ7Aq3Z=K`QxvIJbekYl7l`J2Xw%LUNwHTrl zWX?v4EI<;IsV7^(RoBne+}zvF7IQlv!b@LsaFW;tnT1Jy^~5k#QVpTI6K9Kz1Npl( zveZNMcO-H}c$78-Q{KSM*53|`!9L@&3Mf+&Rz=1CCE&^&i492Y4v?kzt!R5u1(dHy zTFVC{07P5G#pA>&pFb)R>yfrdQffE*EIG&FK9Twg1Ji-3Xuni6jZap%+*jqMEkGQR z!5F>o<}>mh9}GZ(BaW(|A9lC*RKV440&Z|Noa9ycATvthnT|O;>~VVuh%6S> zAoLO>Gb5TLYYchAlabw*1Nc{*eB+UzpKn$JSExd!O>v^L)qP(cU1krD`Y8u5I^L~6 zFQy0A(P0+p&`r4ODzm9H)bj}qv$=$S`1T-$MOK(|wR&Zq4<)x0cZ(^|1VrwrVI4>{ zdMHWbWql&kwHhIar8ffX=PtVYF(jjw+O|+lW5{qGneSH}>d@u@77!k4?O2rXE;WoT zC&;3wM!v-)Iu?X`Je;)Y`EAu?+HFQp=(LU0v0J-L^^rfU?b8c!q@&V%7zhkJV|GcES_n2(fm(C)~;Tef}(f5h0%@O^sX3I&gu_7%U2 z(e5-4|Bo*^Rv{*GSLv}^YO1_vry2{Lg~_T(5$SW z#b9OkvYk$ZkiamNkKXt@0+e#KnFB)g9FuG1F`40O$rOxK z{1t0bcROUdEMDCeMbYMIYo^Y4rkQsuw2jpkvngP`&Ysp$NoycDHPM!gbZ~LG*=ORC zFYP+6L_^5feSk)@_mb~)ku7D!JAJ_+LX8eb#umeS|@V+Zy@25`Kn*JL8_oWg02!MKr0L85mG302uNSEQ}K}mizSH zBytn7EqCMZHth)Cv9H5Kr4Zgnjz7;w*p-*k7*?^h0BU2sO) z6tycGp+*F|wlIM&6n#=T``;RVI~8C|@*5x#C-bzA78iTi19 zE5hGaUvQ((j|G$_4xE!F-$DrTsGo%0l9HeH3@lVHDzuWKH%nv*3aCu0RRA z**7*w^-sLKcw+Zm^mB)M{C+baT*E7%u_!0yJ*w{2fLFW~pq$}tG_2HCvoRYtp;u23 zE!4e3MTn4*C#_wH_z_2yqJ72X9?`o!#;ok^&L1;U!muouVKO%DpSonyE`5o?R|x3> zTTsg`Zs4C;>*6*q;~qpsxilI`3Y2r?Au7RKc6<#smTqDHPt@*)oi+MUq14zqgP-wo zBy>iocxc6~vH59M0gEJDEuSy0KJAcUu=VEwx~T`O`W|^KVR%d=Nie(uK*ycdJSU6$ z%IvStlp*!^SY3Z(pq!I6^vxYOJf8d=#Tk`W>TVC&o5{4!l{=$Y+!v2o6IQlN8h)uS z?$5h8_#{`|nXn943)m8^tOu3ts*Xp6n3{!K;0aHIY>_pfYA{@geAKfm%~+h#x648;BnVb{ zFz4eY26)7_L^YGpYHo}zoifa0)x9~=Zkq#niKL^_^66R>aPsweL>$p|k*JMZqP82j zLs;WPVe3l%ywjydp=Gy&F(#Kha=&cdnhYv7;E_rJ*O8a55Dw1e!EVo~JV~Qw#ecL) z_!gQwJg)g!k5tNq z$yK)}v)0!!o$7h33+8vDkhwBdn?o4WW4P3>M0{)sENVFVNr+KDNS=+GAC)c&-@~;X zD`7E4S7mM0$A+>q>BsoRRd>7_XWR&Wobnwxa8|A?n95bkGvPnhZM1AMJc$q{gPB%k z{RSWX>ExESF!0x??Kjn^*FtFAoWS20*ow-0?9r#RdMAK?H@rMv417lbqr)d@#eUkgF z8mQZ1(a;;29J)u8ar8*?=q8<_zdbha(Nk){v92v>TMr9cX=Q3yS)jqR60SY-K>D5t zjXV;;4Od_G=0>9~?<0_5j3mW!+RJu@4NiU`QWuuxk-huY+7AG<4|5aEH7gmX5)+=e zDJCIb9VOB{jM^!I6fL`WD zx!zqWJeKymywl;a$7A+8O}~+zU~X>yvC&MROo4%?n_x81xOgqjX)`?d+MWnWb?RzK z=+nsp?9N%?I9E9OGgGDVmN1%(C+O>wbcAZV9E&y&dot!{%{dQNzZmLX zcRYTv!^1S|&mN6MhFX`fGpPdy6sCnQE1nc~bGHt4wITqF3a_mfZ&Qy54UTdduTCTZ zg(1C`+p~CwCAi=O9?>*3>KyUtV+Rv;jov|6s4WRGAEZkj3`-#o_FKW6Ai%E5o7!1a zQXY#R<%8s<0)`kz6$asJ=10;bEyI5MzrWJm{OMN@J#fPu5yb-`iq?Nl4@8@&qnh!R zvG9#Sm^pJD{OA9o?%ze)igN1XfrBS$>%eN8{A_h=`7V^a0Ko35pMijYIe`JWR$hPc z3S%ve_QF=Wz<0Sy^hDpfh|er$%n+zX;mr}2fjo|)*a?!B-{D>{FGEljTL_5d$hdaV zyt*Lu0E_qc=q#bqQ9=Xt^3E^Rx_wd6Wk_Pq4@F!o zxEGlNatnjQ`Fm<5yKBdAdW(d3G%KZlv!89tuC$7Y_3UU8(3ZpaF2uOCqo<9yb^MbP zRD}0qS58FY6<9V~IAorCmi^&O1cYJ@@}K^QR0R3CTG*Js#78*#V@?1fvG#RU^dEk3 zXjSTp12-sR-9g6w2MM0=N9yP3J-ASh#-sZ;256EZ1|*igI5N{voo52Cn~;FpVntUN zLHaQ$t|ftvm-cR3x_Z8ThoTOKd{zw}VKY6OxNYhXns%xm-*dO9w4Oz+kj0H+ud6bp zT@q<-m~(|0G&Iq^8T|Dy{aIG?K4S!v~vcWgaC{r~kltZWpFUMcKb- zmA9t7xU4;#%~rZt*pYgT4kEp!j>|bj$Rl4w%n~zl`sMtV-07P9JuRw% z98Q4XMMLkmG5G&}eio|;8PDG|WQq-;C$W{6?#!g=rVg`9w8zNw5LMrjY#K1jN?BCB z0(_NNS8_=+DDK{k4Hb1q_gNoZz64t_v*DO;hvKsi2G$@&)lDQCzBKBIWiuZ!&O78?e30s~9CvREBfNuThns@G)3jIy%io;sHZBpIIg9b&yD^ zd*D50&P#@EAABV@7_}pPXhm`bNC>O2C77VRqboq#&tXgAz^5n9B@gaPQn*XQG%El0 z0Zh6j)4(hzk!oO7BhDoWYG-NF;@pKb+wZD>IyT4)&_@H4MoT!&%|TTN+ZzKR-}52JCQpwUF&r(kfTKphpZ9f7G|<%sW1I$zieQ&oI6&{^*@9EtI}s z{1D?;-ivW&8_#x=e%ZWyHeF=3=15HJTm6(w9d|{3x3c?sOEr&O(ibIaSFr^4p0x*Y z<)hn&s*;FtBIEZgo{q~hIp**pG>0Iqt**Wi3T5Sp;8db7Q|;v?P0}`3;;# zWRJcRM?ph0$zE-M;8}i@0ff@Y(Mj*48{CBrn!ZaJNqRQ$uZC@1A@wodIq2hzygJjF z9(|_%;M#=&dRQ4o&$P(^KKN)L3P&DJOd)7MwnVaogK6B`S|fob8tJ zBvveWjvy1`9|qXZhrihOn!U0>e$Nrk(15(=t4;$QszOf4ivP%OHlg4`^~yBe&apG| z7co5$lMM!oF1FXkd;4zgQHKcVHz<}>>dH#w9!eqxJ_DM%ICP#p2fI}>}Ah*-C4=yH&cE>NG z4z3=#8z0GEM=e*9RHAcpaj9i&B(x5itu{*pCXj^vFs(^S+m2`y#81}P#(sAw? zA{Ur&JS7iYNI7zioC=5RK9mIYxJ}_S7WqT?&cBpbm^~Du(U`<59SR>mrk$!Fv87HM zr>HqKrJQYPbS1&aBL3AT?h;2#A;{F*;r+do9PSW%0{TdzX(sabdw`flREE5_B6LaE zpyHh!fEE0b1@M)Ji}*ktEW-S~`>EUg{zl`Vor#C!_%&6uGS5Rz7X&BJAZx2(v#LRg zt|iM6z}E-P+T9u~&DwP_SRcm(3Ohq>SV-*_n#IQYNXH;3j@>ZwM1FjqK`n&2R#7HQO!!7J!@y5B)Y%hjE zaUPPsr`fk4Aq-8T%IP`}P4iZ&r_rkx@As({fT;ENp_7f?%Ls?^7x*`=96B_#YW<-w zzJJsR2}X3Yk~s#36Ps}MyRo^{w8#7rk;>axM;xecuS$q{kmL?b`4cG4aK0V@04U;5 zC${tfwEXp(Dvhrk?$pBHu#|z@^I02pIXADAyw9klwECuYw(NW231J;CKVSXxf|Dm- zs8Lbn6~Uqh>#mNQ!mu{3Waq}~wBpHXBkW3;((E*FK{3U2$Q2`kuY+SWq?Cb);@3K< z!231{x@lkbi$rHVH}d!8zG6`rYelVEPWBJHRw)NKLlI0KQ~hYy=5i=cu?-I6^oRz^ zN^PMP^G6QZzaI+6pDo}T$O`~~002b0|2hE5)^ild`S$@i!9NEE=Kphm4*&p-@TZ%K zmGYaWNg{lAdZV%Q)NGD(l2l?{m+NBRZb1{p z6$2@4c#fD1ip_urAFSsBq1;1@^@zdUhjBmEAUt) zQIosE<^dVC1_-?T%tAP7ZM7SgZ@>_RMU4`i@Z#Q0#jGJkAf%H zn_cX{horuK;mzsUx~hU37j#&}u+ZB&gRF$p(v9R?<1iaiaxPkI0B6+1d&7747Gfv5 zQZ>5QB{ss~pD=LaV=O-o3;_A;Ui>I7Gp7*o1!(XOpnM@eJq;;mq6RtVYW#d8dTZ)b zPDdM5<>7{wsZx3AoT!h5iMtnRq)uRaoY$ZCosnrC>RUKuW*f2{F7~Q*oOczO13K}q zxi%e>zOvO{87HnpB2FU*B2>V_o5sSr4!ljIEShD0x8T^jLF|4AC!g~w8R(0F6Kh|O zxl5JpB-#1M;`)!BQEp> zYWOGGMP`U4|AhZ>~Xz^gilP z^KlsBwo((Xw1oN2=Y2ca{)}^0i1X0?LHu;5Q%;iLXWF|`6u}wG%_&^IbyH(PbxT~T z4vx-m%3}LPXY8!t>+COhad^%c zRGio@`TiL&rJ#41t<MKkqYxrq@%uMJ?LFtd&mLKo(J)jnTYPdQ5Kd{=n_e<13*1 zAxOQrhQn*`rDS+Jo+jEFUtHS~jfsg!%Y@twFfX=V0~zA3F(q^az&wngDzXwHUFvr^ zs~}TTQlshWPdS>Amg;UJY>fRhP2rB_zxB$u2utG>g^4{vaaPZ>^LLgMy(sOBt+{Xu z4*}#z^+T4wt*(KWNI4ycei-nG?bo0I^yU|5_-jlnjb!Rx1k&woZAX28i4Fsu>&*Gb z+!#h``#2xh(^N>PA_+2R$wyJIF&@)m%B#{aZRmNq%>raX-}zaK3m*eczdJIw=v@Zl zeF#4`F`iecEID8x>XTMuz2gJRDvuqGH@*KBc$p8xIfX3LFC4HPiQ9zIDbukKFgX=Q z2q73SBS{d7Zcq;dM?&RWQ7ntUeRi|C?b9E_~Kp6pDs@cw4?4-%Ul)D-G03!(H<)g_IR1bsHW7z6*=^upG2M-hn!e=_=nG*jWiCvArXna)q=pleIzcs#_kmFhRIC$x?{_V}o6E|nARO7Cexw^q6Z=DJ z)k@=M#oq)a+qbc+qOFtuNmkdMHP}&bjXpWlSXY0KHIaDBlbAjzX@R;IHk=-`WtvHP z%^>>7ywYT6LZXCxG@X|F^k=Q`T}VN3%fg;O=qG7@wMWvXHDlVYm5(|~5W*d?hr|CImDzD?TdI3X}0e8Q#{amJ~d8L)qapPf$q+f*G)l+LMH_~=_Hc>Uuhpe4E zkv!9TXr2RUMO4YJSJ(2bpe$g?p41`SFb;FQA0EzB#5n>n!(g zVC2C5FJOrBXCQD9NEnXmlJ|Ur1XLH4D$dyyJ~D}fOk1c)POue-R42?vR?(3&*s%ne z1i&}VcpgVTlpSDk+RpZCAU1B5RFx8=7*CoOlxrS!u(CvTE?j?%F7W3;udLLBpD`RED0_>qx4=l&1g6Flb%Dlxq;0 z`O_#mW@4n2P~PJ?G-q7}!+Ct!bHx_Ym_xB>>U&0C(;FHJQsZBL zx2qCx&jo_$qd09H63jyCrO2n+b?D^f6EAE8Mm+hQsHH41Rd*us>m z=1WC72uZy`Bp>mY%gG?6Pfw{8(|5cL#_bsN&ZuzyyFwW~M}wU$N2Cp|?merXW5bFM zvi&`qS1@C-^h6p%x~vcXdz1j)*n?=v&IT~*IITm8r@n+7{R!Tbm#@)W2eaRlB52n| zS6XdmzlETAf?NxSU18N5;AA-y?bWMa8nO07qQ0E*+p@o|F-QJGjxss6mU`?5)#F?N zvKGbxm(`U$+kxmUpU_SPZoC{pW$0M3+Ayu zE@wVk&exJR)O{Qc;!(ok(44=?m}5A<_q89h%r=>V@>)=u;Y0A+1bE0jeLtV^=36I!q}dx<51a^<9U3bJuJE{5;T65BNRM28UB_HgfFJ2VK{%TvfgK^zsmsFNvLhY{eR;7FNN#IpW(tq z@XDA9<^uqSffm~^XfmV;95rQjyDaecJzlA8YDb}6vn4Dgl|(#+t!sndfhuDxoTguZ zS4=7Z=BE1eI+R4oKt@D0QDi^VT0X!J*LPT00)VZ)!IJM&hPBIMefKXmL%mXM9w|x+ z-!D~*kZ2dF54c;G?M3yS7jo(PJrn7;-jOh?3`ou!gtyv|&Tb~-(yX7J;&_q6YUei* zfxK2Ve-_W+z)EYBNOd540)VH(^K2Diw}UMmA&20=w1Ky-5wz|qL~%5dxrC1C<6Nfq z(l(+#0Im?0aAYbEJph{} zuX^^aARgunpg#umbbzx0lgp%_8z%^4&jc(E5e8r1P83^)4dTJwk4Y--m<;Yp-wOsd zPHH*cQv5OyaqEm{1cxK#`z*82`e}n1t_BKI%QDttWzaIF=o!g@EfjbwpD+`246n(h zi}Fy0Wqt@!A|A-XSg%>Q=1d)N_U5fZ&rH6qNH?K*`ecnTBIk~(zSk3HJ7aV_z9g?9 z?!4BBDFx1W5`tteK~oCnve3ctn4>mGS##uV><-sUn5BjgNrO2q~rAG%1vEZ-j%Zi z%QcPWBV*||*Ll$dVlO-y<=v_r!q8Z%8%*ZeTRN~#Q^{WbtgR%$%j z5UdE5$B}mVlAkqg%AR#G7_Zeg>|1}JJoHCJrTwR3ACM0S8G1Ev5it|^7f|iigV%no z=RT4BeXJZ#wfmXfG7M_VER{nlg77e)ECvY2AM91S?1K(7K6k1*ROD@~yik6p^IY zpP(eOQzL|Sb1BUax)Csn-W0i3i^AC5f3UIg-%tSd=Fixa+JzKI4)sJB=KJ6S?wLkW z)$Mj+YE%D|FR-Va2?O-*A;-35BbriJAj1T(xD^Q=TUJoFUp;Z8L}3_JogMWPDAZ>E zuxra8G7R1BCQCrQ4g}mg@*JHMb)uW40dfBE1i1_A6m#Ym$~=*yxCGUWV4)*P=hKyd ztFy>WHu7mDf*{M9dk39aZZr;!vT#12{xiYUTGdQaQJZ4#wDQ-n<@U;4It~9AA8X)) z@E!_ZIFbNTNC0DYy*ygAF_Vrm;`mCO54I~!I>`EnssOmzFPmyxmmTvWk>HDuVUsf; zPnE4E>(cm@=|^~f+1HcVc}I}o1|jo%Pm@-ZO0*`-J2Nns^T_1F&^1+3EhAO7R)m&* zDZ}HkI^P{EOunhh1o6pJ?_a-oL)yB4$5#XZW67rFDFFzJ*qLi0R?e{f!IGQ_%#a+6 zvwRulE&OGnlr8OzS&{8D%bp+o^;lt!krps(EL6mnd1vg zYVHz-17e1ys=zOqnU8a^2Po^%$jv0=$wq5~z|_*zvQZ7#4qryZ(gfXomJeo;5SDQx z(iQtz;AwMo*o@qK$X}0#uX-GcuH9y6*dAeU#!UHPEQ3}Lj=&vnBYlI4jyV5Hsi^!p z?|(_`CIo<<;XTwEk;34^2Vl(mH(0YV{|}L6WB&)ef3q({6Ms68RH*=>dOl>Y4+hW& zAISG;BY!eSRiBsO$*nhBYjkwui(fCB5jM#3ezg?$`~1Y-qRlEfv6!D?F)(*tNuj7@R#RLfX4J z4&btvkc6OLH0h4Jj|u0{dMl-^533}0Rb?8LrE}7&5*))C+8k?4>w=(CXU+`dA`9Aw z;ai%aadYw2S7F-XDidEJLsBJ@0h+LNg%Q(wK_DvTPELCmgU(;5 z@EXm^Dz%{E?Z)9nW^Xu9#f;_iDBZwFg?rWW85-qu2~XZ(+p(JE<&^>pMtZbolJ$zo z;Ky*GLt&ntHR^LwTgU9!3F{g}B9sW&Ft1Ss)>bEajCcVe8%w@%I#h{XF`)WyegEFA z{H8+(1-v38BumKNIJp8VA8o8w7+{`}?tm?UW274rh^>%CE_h@@Bw`8aQgUKrx|)Gu z8G&rn&6W1B?2#F~xp=q-={U862O6zeogb@IN=YN%SeR_1_d#M0cy-H)NF>|tcZat& z$!j8cqO;-0(%-bv2$N=tTePT?^%2ETqGIz_(;fjgY5UmlYl;}()i(yiwZ%oKSs5k9 zb-uh$l@I8>D;|DSI;yJ-M=9UP($YV0$AaHr3x~V}s>Gy!;R8rptTj^QWh^Vhu>_8(C5Y>h`;~0WfEGkdI^ok2*g^gF_f9x=XK4~}flBZJ+QPx9` z8dqc%SpiE0`M_`OAMC6DSlSLXL7G!=qcN#DA(9b6h!^!3A%sMn5KhW1wmTbz7WIwK zcn`neCTmU2Brh2_%4W+L(vmY5BE&y0>0L_wRTe-16Z;)69^JYFq0bg4t8jZAV5+{1 z90rQPZ9z$audEXEKoEP=yts$%*Qg!+TL~$EilzjSwEB7l->!nTok1;g0GN1_M38n= zbcj80e897m`@&5oh!@Nop7uTc^32X%-5xtYp4@%Kh>2+s2PZy4IJ%!1Xg<&oXX~$l z{?)*pQX+yeD_n9goy{irlPm2KfxEHl!+4{tUZcwOIyPl6rg(KU2?9pwHp24QR?WTQ zC#75K3V!qtw{X6qOB(8E1O#g7K0rgv0oFudim&uyGd*G)stw>=&Tyz^m{L@rHiJ%C zeBwt_GGRRBllyEa6x1)Gsay8^d0`CJop@QCXoz#JE_O|+judSL#2vxmlII^V5j$s=;>z_N*}))tmfVk+`eShh{*0Qs znaV?;LP2*U!b^q;CRpAh>}B^F7LvrxdB>GFuu14E-t{!mzjo`i7Ila;RVwDj;k@nV z7wDLkZ1D%U;E`CdOBXlUsWVWzi<^V&_N0vNVk?iExUeWJ(3`9W@1#SJgBx(yW|grs zRJ*(3E@O+m0K=e4weYf^^}v*s;B2Qo&;j#rm?(YQA$-gDW?8fi=Io7BOyXoeHBI8s zyuZFie`O=Z%kb_c9^Y2-$M86`nQKy~n)3%GdSE9mBNT7K$<=ZrvH0ilkJwiM>wi98 z?K7raPe`rs5(L@9B;W~E#M-HP>rQL(6HJZj0(5u;Y7*`<;Lj8GeMUiE-LB*Mr<4!Z z!-@GlT_;3VR_^rXQFydR)mHMuj{fxCyYzPxb0L1ugzgW?s+^ACzmXt~G^@P(G2S z!JRv>VZ7qBJPQ}?Z^U5>w3+l%8ADM#bX01vDo#%;QgV*5B3IgU6;o3eBTv?INgs&o z*R^w=Wxa53`o%isKha7m$)K?n?Fo*;5arFKa7hfywZJu@kHnYA?V%&iw8?DZh@CQc7A&16JOCKyr$p~BGw@iO))z4Xq zvJ$Wjk5)&aJWH765AZ*4k@o3vtNS}`hgOfCiC3h0taXs7SW&{Rr1urZtxfT)yD9^% z3kf%zs~s}J?}|LoL|c*)vLkOc7{#9G_?>#4Q7)svLNaMfDh{Xq65>cxWO58;zv|mi z&AtCLWpJi*TX?a=9v9zm>SB7Q2hd@mYorzp$ORp!YYS{X@cq;{Y-V+PCcyvnWPbXP zP31^~nI4j-abV7Q$!)2=Tk=88R1uv`m^fAPY2Ld3s)R8L`J38___KKp6Ak}Wxu7T9 z>KV@%&qYGCQE#_@HzNPoU>iN_g*E+00cih60sKw)|LQRD{YL@d0|1~A{ON-KQUDtA zj=o^L-z2#23CsXr1Vz#Zw*4VR*Je{TCPYi)1|%C6ccH4KU5lR&OuW@8>B$dq?lqI8 zFhaoyg02iWB(}}^CLrFn9hFU7hHLG&Nhn9J#0w~ zkr3vr7~;V0m}C-KKX#mo#Ilr7@u`#cLp1EVSO2KrYGc<**F8-gFu@g){H;0AwrvT> zfi#@9Z=vQ32~$u+U1Mn++IVvE^bVGhz<&r7PQj$1-%U};f zbj3rq7+e2JW>_~2NCV91ZECRxQ>Q6`(|1ddsp{ogxnMEP_hb;<0O(RgUA|B6tWMLE z87=}HLs_BS81@F`(Rg6)cWoOa(<&)HZ-oa`j~R$qu@?Lga#djWJ`#e*D%nXoli~2w zVeji+>h;BT{@SuQFTY_JTj6Ddbb@GQ81j^ANJc97uhQM)N;+?_W!vjRg$0!qT&9e{im zgEk^gO)EKYmp$tMI{sMkt{<@pdcx)qg_t?X)2+GL0@axU5;AH%yW(vr?{;Uk3(2A5 zAJK0B?ak;y@t~};QKt#U6AFxuJ+RG-I~8w&RlARVZo0#_^gMlFDE70rbpWohwqT2} zaXk4lxc4E97eoSMdDYHwA615bL&eX`C~RSPaGF=Ci2UdA%5qX;uIEp_sq51hUfltc zT02}D(F&K4A2B?eEnTSP4e@StCGh4)s+jAYFzD$6^1+GgEimnq9ru*n_^JuB8e=l) zT;@0P|URDMfO!K#NuC$n*yRPS}uzOZG z_lQ}6pdDV+zVYD(yOqMR68)w>Y61q4{Gl_g1kHbcyL{+Ub4+YMkDf5 zW&~w4JdAoDaa#p?^Cos7$6j7L4x7`e)P=1c0;l|zp!E}INL1`P*qW7++mAz8q zg&xpJ5F zc&y4{V*6dr|Le?S!%R0iZo--1hp_<$It5Vr`V*8#8}7}^J7w3wWlAiD&igAGCpzx?N|`IosCj+JquZ!ypuc z)BrsDJO29xCcN-e`3Mb3tA{>Tx)oB4dAT6p`Jg63+}E3Oqoym?Jy=3{?(iZ_{Z z>T>i7NG58y-obU*vEbB&Qx<2*1~#ud5k)$d*9b?jN+&tVUVw3?mxHEIDAcnrj0~&Q zfpxcw@amLrW|S09(7bQ=S;soTuzymLq;c9^bsP=U^64^2;wtQR$B)<1g2f)oTa#D* zN@3ux2&+m?%2uS+fvg%B^ca}aS8F+ZviW=Z_VpzN+PScCM{gJWdL|mdX+BTgP9GHW z_k+X+S}I57sU?Cyk2SOahU(zVNw;kc+3)SmT!m7C+&9*9Be~XkXMGBhj0^!_tZpj~ z$cr6>4xkiB(=sWz(yGcjm>IRnkK+eZE02t#mZ01>(yc7te0JWqr8W$-a`Zp=dS_#K z-G;9k5ynr1I%EB@d)=mzitpo>!G~!;`wD&Muh?bEzAU{KuMKNW?$2+JsW~H!3g#N@ zCTVToMv$t{9GS8U>?QJ$m1(+3e7Uma))eAXrAuj^sE4gMA(yQbJ`?2KXkAyVIF(|n zL|v`E{#TngwY|*7^o4o%G3!OBmwq7#jDfl7qOlLv0KH}6aN_f0?{jIx_Th4p(~<)q zZ`&)O_|@Zh<(LQdX#06xF6^@;iDZ&@ypAgQ7LAA-Jty@mE@Xw6U!x--#ry)a2r8{e znWXbyb%H&M;x2H%pu91Jr3$s^4VbA@1gb6Zj83A5IT@s4a5Pluvv-q!;92GAP4m9y zlOBqhR9ZtUoPe69fGp6=!CaG^;dMP9YRSEJcxC&~Y_}p}5@W*-wi7^%q(W%JjmWky zV-2AMO|WB<{&|drM|85VuSZ?Wfh7Xp#Py}6@cI$AFD^ukOB@a4=``u4;vj}gSM67* zR!26Gz&0a-`z&mwhGASKfCpLL&5g#9thNtH95nmWgt?9q^0Yxwv{+^?zyoF?LR>sB zF;?`Qu^8O&H{ffjk{|iV6#8x}LV6I)H?()xTexuDBtS*gRcrA)HP>eqME~lMa!;Nf z6Rmd3#%7uEH-t2mlkK?tLF;i;=I(!=&QdPgMJ}uuQ z5@+JeR4z$w+fW}l77SW&4R6Bn5ZnuQyx`fZIFY~%OM|-d`GPLcK;CaWB*?w2aU=<{ zvz~FKuI70dCl$PPpvbkjxa9--sEs_QU|SLR_LcSWt746u=Y+yjTSeAL{`KQbXu0wV z4WCp2Q)4vmR?t4tiDouFE}L0zh`;`Nq-bw(69nC0M~0q?C5(g;!oF#LEGEoIoz?iX zFK|Z#JDp~qU^d|Rkfn?j<(Su%`gSU~Sb3~}GpYAg+RpqW$EM2ScAy*oma>8?9@+Bs z`Px^F-^U(G0@+w=zwTA2V}x1DOC_$7$Z!24=tEayig02ZP+LcwQvzY!9JlDOgWoK| zwGZscz#Gc|!Jc2R1sJ@SInq^d=r_W+ki*(t{HPEzqAuZOy7{xK6yEtYGOt|*<^d26&IAL@AGT9*c@@~0a zt#RQl`IHHMCSTUcIvBjIKe$U78h)xZW@^gSU+`~%Ms~M^2ECSfu&H;~?B3+Xby9M>4VkWW~-eT_tj`A-_6qNMWWR zwKP>ItJHxx1-Z_ACyf-Nn-?KN?ds0f412-A=p2{rxMyny_0j!1we7><0!^+QK(arN7Fc@nKZ zbDS^c{=+59D%WvDx{4w2ZrLi_2dMrvVy7uA^i(KfN^73{NuX*LR!VaN34WIY|Kn8e z!Dy7s>v9!`$iu6p1o(Z)1TvFBvmm0){L&w>r;Z+iv%2l!wPEy<(^T8KQxA8={`)WDM~Ju5RZ z;nj$`+ZVP#m$^C(h8D=o&stV4BW?WtPO-%0dL+I3&Wq5Jgw+%T9(c5X_O&3KSqAPpE7ooJ+MQN3?{hu-~S4kFj!l)Nu z5nT7<=d*a5&s&V?-WBLcs7JUZxQ#}-E@Sk)nT*l)257njvO?|&pRURwY2}9OF%K2Q z+Bmib0q@c>78cAx{XZbVaew}3YZP=E7TV{JyB{7?&-RloPu3%l#CAqzsfBr6ryJWm z&X-)<#tFy=es5}bwGt+6?J0|@nhCvgU{#`RU^O^N77$RjZSS^e0YW3VcqWscf<4ti zv%`=MtPML4XlLJS56Z`xx6KH{T^E;l!cfT?y<`<|pR~!EdDepB(fw;b=^566tNVWU z*y#5YHRgo00=&733~Zi=jOH=?5{_932EoBGrDF1M4V*Ohy7ShTY|8yl7$5^2r}t#BwOI^oVvtA%R|PhdH;4LkU{RN+(>sD7%jWv~)i zuAO9L)(vCCg$-a%C`|_GI5gsuhI4>c&8H`jf+RM9cG+_k2#gMS3Ge+gaiBGi+%vRM zo8oCATGxGbt9x(X|g;>-l zO*b%endRT8ycGNU|M2a|fAcLUEPwj=-vUSIZ!bCL8-O!G*7oTugSRiqvezZOqTNfK zzLB-#B#>SbL0kWeRpUy-eQXnmc3VQJrG6j*M|q-UaM#TnBk1o?C-kjGXYc+G@F7D5C1cPWf?r2{}FbKEz1Gg29&SqXw54e9(I_#=)Y zjP!s4d6?L=T%^No*z%9ZJ`jxb&*}q+JLTHG=Uk+X%hTTL$xpSW3IqV&M+hK>c`sFs%gqQ9? zLR1@{keBvvy0t^sw~lMvMiTH85E4<=unS+@`Pz5nBLP~my!9L5mFX2~R%C1=%V}*e zl(d#AYKwfLDXVg%%tERCipmk)YxgD_k)a*y7lE|b;E7MrS5EklF6M%D z5Y_kUNE0QB4a}z%2$MUfUgLJN6{DZZu>C{7%p~q9^sOQd_a9r=S&J1tZwfYllhepg ztB`ft2&zJ6=#%?HZsX3i?o;ExgA?lOyT-Z83-IHV`88_gDXNB{$RGZP6;=bqX-IkBnxddW ztg{hg(`E>i1J;SF)LOZI^>A@q?#-tQ+bZ?pkTj2ajhE%zUO|Wpr@!Rdf*VC=)!KuN zdtH{}Eu#`CPZI7wyCmE&ndbWsL7e_KL4eWlr(gWfz@sz}C%rpI{En1Y-FJ?sYPPA@ z3m$5;ymrr-jjpfIrI|#bdTP&;dxwpV4@rsn8J?-^o>?<icm zMljc~iSq}i%v-Xvb+s;shMKHmRybt!tW{R%ZzHGgefD_ojVryD(E=q=g&%P@6^{%v ze2|4mAv$Dwi^3pNB5-l>hf2Pfg=$-hSnPW*n*xz^e`I7kg4apdd%8x=d0*SG-wVNYp=J(v+>hT8-n> z?vkr`6i{iIaregHYtcjR1V)xg5VkE3OZlmNGaW z=?8UzwycwYjA zs+up3C?#=`mOlHRj6e}Y^M0s6cdD_zST}UP|B$z%&a{(Eb#cay4Ovkuv?ec8y#~vq zMuoIHv$>i>R&?r{2YS-uCDZBV0Ve24+9xZt<(&r7MhIb2BewSaLl=?te0wzwHdO*j zm`$adNKE4#dxMN|$)Iw}_~n8n0`!8prE0%#2#D(AoC-v!YJW=R{-F zVpKcjei@Y)AG7e({GG{3iCN-0R?>2j*plEyMR2TCmNw1O{-cQb2W(co^d$s5{rGf? z=IxM2dJTA&l0)u04k<8%1> zU1R=DOkXDSS!G>symU%e7Ia+>xpK<$B^R_%bT$Y#FNLxC#^2vYlVfbvFIyRWb;b=N zNY{u$m}(!{M|g~WPxiUKbs-avv`7UG{RVI-5eKYPi}#-iwEQh>P4(eYY!W!j+z?(ty2kq6>sCG*t)H$5a6)S zXb@uG4DQ8vuiFJcnu;bG~ z2Km)s4TTs%sYc-bgI2W%{?qkeLl$@JnpIOShENd#4J9A%d>cYd`5TXFu+?kG1L^uL%o4ME2A)^3qI=h12bejiNUGO4~~t6}%2 z4bZJLt;y)M^@vQE}ir!<<{L`7Cc#fLndZx#M+kGGpIhE#K};-kp7 z#pn6YO9HQ-9Y!%#OtSpW`eOijsv)I*UiJM3hy+Q|;vQv^%8bX-{8}q_OIiIGrtuD@M9!xb>|utnS~a?*}PJ_;xD zA09iE?EiojjCfYf9{Xe4l{I$m=we{ zSxam6jWeUA(vwE4UwgOf|4hF;5c|@E*HIbijnaViQA#q3itVOgyfbe&KJn{CWg{8p z@45d(W+F7ewuyV69)J0fF+0zeU8mrZGIONMqyGy?=s-h{qQ#+`ByE@{5sb+{6oQ{0 z_Iq2;r!v-5Pa#ZV?6%*2cKs^P$FTmQ%C1#(2PLtSS!CKprUut|zR7xjHgCjRKf^=^ zcqx>Q?6`GfUfI3sx`=4Ik>!jxn+VgIh75=Kh#%{KHT7NV^@6}`rK5A$C5vYo~xgKfSmmsDpYOz%h0+0meqVsoiBR+JL5`~K4)K`P+}c0Sr^^8+VV zL^KQ;vaBJxRL{aQcq`AxX*nk%k~S}X5i#lSl!prtiP6oHoXBs~Bu6B!?xd}Y3#I!( zDTy1s2ZBE$YWnT7!V*9mrMZP8^Y+xtAPtqTg< zm>D3Yx3P65zkNDcwSXqUh}AjcDVu(SQ}Q~hv!gr1sXLnRZpXJX_KVT^*Im=&dcg(~ z4KDjm5Z*BkBy}Dvng_wu)Nv2v!_VZ;f9@I0f9@GR003^zpTYR=S?<4&6`SzUWcdeA zu4AZQj*T3k=unNk(>xSSaLIhhlgFEq(#cMs^#m6vknp$!DN&_d&YlwLz}{V_>vS^v z52pB2ib8c_Q0l!s;3+YIWb6N!M1_cUp<3|YrUYm2oO_SpbgQpRiK>9hN4(Ao5Jr}h z#UHW<*y8kWknKKO%X0<1jhzR!cJTh@?({7d%8E1iX2qo&q=~$t5)|k}LHjD@?*BD$ z#ayP}?_rVGI!EgS0#`Pmoxl}Alj8-a@7$#iE$gGgE_2ITFJA)+nJU1E@kmjc1fg#K zF{xo~Tq@#8)yR2BXdmPdIYmXdKrN>wa2%==DcCuO2!g7&b`Bp$t3pQ5fIiNKN;~8y zPsgz|9zC^eK31ZgPT4nzZ@**)zncw2!np~F^z((*(qP|E}im1TV$naM`Fp2PP{JL48r)}iK7xtJ2Q>cq+}NS_w7`8;{Ixe zg0V=9u%z1-Fv;jo^#>O~JVN-*7%a)F^@+}3QyKh8f4Ob7dyZ9|O1Jlswvb;u+)f4@ z*2ecw$4+TjA5d#bYlh_u*h5db3R9Qh>A`b@2$>7@b5;*OcZ2Xt8>`I=XPll|sJ^gG z$}+T?p~~JjhDDkJtf>f@+4uB@eE>o78qYIMR*)zf4Ym?)LTES?@qT$dHe9^)W5)w# ziJ+FVHVSlvXJU)E)nQCd;&OKeYFg(}z86PyZa$wuS^Xdo*_@P`GN`+%RmmV-Jy@ev zE!D6ssRj2TL#x9>M)2VD7KM!#kPa+*Inp`&se;AGNdj4fhlIH6Y0ZXWQsyr2-A<}VG z+x)KAlySdkW6ZiTpu)@gLe`|@H^ZY|X@E%cU5hQQTp05!e|gkI43)>h)J2CnEkP-| z^xLg5so-)C=BydG#F9d;@kMM6IwG#!VjHaZ=*lUIhsBJpDu8sw*&Ulxr|lt2rZ#B% zgBLqeeDK4ePaw(V12Ap?CKBBo*wbei83l9T{{FH+%AbgEU$odAe^@j)Nq=W8HqQ}t z{@PfY-;fHr3pFxDPk5u?`pLn;q#gAh3K0TdM!P`Kbn;aG866ILq*B z(%Pgkbi-xy&wcD5xS|9ZFO7U6IT>y~eVy8~=e7BoQMcS>n6X z-LiGXqR7X2uaoBxWJrwEO~SzKmHo>NWNI6N$CuccmSVoV6Wn&-Rr4{*CXNVWkku-F zqm-!_h+p4RP*IOcylkZ~fiEx7)FQUWOK?H{_?W-q(o|sblMvwPKd>M<)g}eDyc>_6 zoU$}dslI}B*xx#r$tl>8>5F3V5vJZ4>_=SML~D>$WWc#hfq2rKmDCgi+XZgAaX+fmW82Ks-uQj70WsN; zvkPHeDZd5X;=h}fYgSzd#(1|bg};i@^vu+71P+dC^KyA{caivc0t|O;+5~4db@MFq z%|1(U)e?+qB$D%=Pbr%37A9&BOJ+!e-b$^G{cgQRZ&bVi!0US#r@Jj$Qo(nY+hvAo z#SE*+-B1T&#>_bnEmW_PmI_@|0!#|yxsB4V;oB$CgY>o&dCSr^l3Ii9?QzmG(MI1-`N0#+Rrrs11JJCi;|Tn3IXMjI);|Tx?qBxPPl5Vj zKjk|7cf|q*6wGw~kC5Z%@om6OrabohPe{Ze_mr2f&q?f@j^>ND4rRy2b-MjDQ4-%Y zciinmELi{IwFm-~V5H3$g3RCz+-zmap1P$0({jvvQyedWIg?U4Fv7MhQHYNwZ(giu zBQ7%=UH`36&f1}={Qy5T#hBk<8b9(pyr|`SUT#2Uk|j;F3_t3R_;IL4sr)#6ibCGr4$*p5`f*NXhrV#YViKkq*+rm*_9H%p z_v}_2+GZMY?#pLSAq)d7YYTok0=yj`Fay#4M&7!Qh9Bmvw;uxz5?{1KBfP<#=6&ngq{ zvE;~Xt(2lW14BS`F9x-nXRoC!5|EYW%#U3j_AA{;^+tCgG8cM2X!7WRFZ}A-8L*1U z&2J^w@U)6s98sbG>Oz0d4`Eg=$8Y`v$*-_-Lt{);q_Q89a2|aF!*&fh47hlXqlZf( zX`y{MT{W-y_^Yg>eR=_1efttw?y9DE$!IuOya+yiMN;@c(qWRKkMl8&DUTnAWO5cc z>3FYzv$$OTm|Oov>n0z#A|cJjS7|gVt%Xbe%kBboq9j?M{T)ssO&ZxIlkACUM4LkQ zx<+7~I>ew{Hif+RhiK*ZIB*WiMWb>DTSNq^EDEpxp-CtLvhvX)$tM9bBRye;ZaRPp z)ouMo!} zKb#w&VZqFl|L;i}>LC#M^@$c)`meN|29E0hx7SjaB#+&8f!&s4rA3eYo&EyQ^5$4Y zpr9LSNp)0+Fz+i}?yiXFhX7YD;zM~;a@aDkEeq;2|9c)B;`@VoKc-HD)p z2hL-)^D2z*VCrR#>6aJUXD@dEY_7{{_Pt?3neP->Ca-1;tqZK)t?ZN#cDtyU$idYm zNRYwVm*)mn;{nX)mXl6LMUE)7P$!$IMP2xq=#?O7QQnQRmw|==ZPwC7Vxy_`)fQkO z9W_2PMF4EhQ*OmeHc6vPD0E0`@2D;FRSks%X-<}eyna;BoE+#~SM3GvLPWZfYT*D^ zO5{;|b=YBpCU>z950bSj7FyAl&B;v){&FSGn-fk1>I1))A_4C4aWc4U6E*&$5sy?{ z`~n35QmM2qy%N<_oft+yw6+so#;~LPEYaNfv7-)g zJg9>8G|vU@x7V{X?h_XqzzG9`B-=@(mVru7XPimfpMACQSN0(!WO0A?4--R*xg~;% zZCiTAhzLlJdT67MLu&wSdgaar-JQ&w2I5VZ1Uo_QRQEXT?d@xm;dRY8Y=dU3Nltx; z(;1~aAW)=njn`$8Yoonp?qqGnSt3Kl`$T^1LAE@A`Z4u!L+OZ&)=wyKe-%b6-SXsI zQz=*Hiw<}4>!~C^$9-|e-Wu1vIGtv;mM1g8t2d4|{rwZn_rUyki(D4^tws;CekLhv z=v?R~jRl(Gh=BUVGRf3oMzdQa-;Dt#n`NiAJt<_G8}E)pfNF7cs$3ph4y(0x@6~28 zVcQEk0z$rs(LM!JkN=yFKd&KbOm-rx7CtxVN166i@E>S5{|oJ(3IP0DFtg*I$5cNc zEte?Yt(emlDTJQ7@>Ns#tf%=F``{fF`!^9N#EH!?kZ? zsxJX>qa-q?Fl^FH35LxVx0rQwHEfqR`w_hBy4h48|N0${etYwIHh6w7KwedPcFX!3 z)u`^3o{OPte|HFtBd~P-vDeUcEc*6gkm%;|b&~tctBy6KKo$DfNxzhi_NX^^--%}~ zz!84PP9Mh1(><^su+jGT!XnA$3vGYMO9v@Rt ztASbAx$I9v@`3-eOh)8L!bLibN;RZPmgp3|R)HIFq|%?>cFOx={aapSeXrI(ISab# z#H`ARoOh1RO3cIw)r`QF26-UyeWo+GrQq~sPd17HPzg5LHDVnddQF+n()%N8V4~Ku zb$s@OYY8FJ7xE=dYjc{<9u(c@vdo%|+{!ATT%&aeW!ErvLVA7032xP}r4QAG7CAt1 z;MTGtw~gz4zz_|h-NeaCKnl9(P60GKc!uOSFbCJ|ecn-cek+psCnj{8YT}+Cmm^TV zC|{VH3jKonuVBk%mPKX;?(9TkT9uG*ZMh&4Oy7*hR8gUXrdY);tuBfao6SKhtfcEbQZ&eM_N&sZ)Q#p0j!tW7OV zyD6;tOd^WH{oE9%c3>!P3 zHT~+zw|rWb9O(vCAohoHT3jk$S(@PNiB&iE`b1V7-*o3zJOzQAHU(dLy-u;Kw=R=k z&AAN27?cN>DvSw{-gk@j{;56)g1M{z<2CccWdAQ;tla1S+qk{{|Kd1GF!STz#tlf; z15B1VNmzD0ZW!muGb7cIE*l$1z*+;W0#dIcSfE~j(-GZF5vo>lv%7=M0pp|Bba9C? zVvS>Klm9H&huYQ;rA>%zYA~bJ>o3Qr(hhi%{3~vmeV@q99YWIx(I#PRz=BOmuG6_6 zJKe>2J%Poj;L*mQJm0ocY4}>_`TVve4GxoN<);&>l|YgWpt50}!$HP))L^C42KE^0 z->Nu()&@-@W%zf;2CHXR%*Pf*_ir?vRE<4zp(2_pfgZfNt~Uni;KNC{CBww?h7aFL z&Bn^Td3b2h6xogu}z+5k*ro+MD=XeH=tA$CCh3WY=16u)aW&H{2crbBO8~TaRcz z!5^g%us-Xd96e$me;_uha_<&oDz2HL6!tO)fr9>Gsg>>&3~W&Uqbum>nsZNFLj(NI z^=K@pYcb}95(-L;1c~is(ekO7%>4eO{b=f=KM8!Kqp}C_^-T!ayQDRU&nbwt+8~-? z0G(3`s1Ca=$qJJCP*3s}B=${upV(!DRQf^X$iaWlf?uFF|2iJt{_L zFM(7i;E?H3tH|CcT#PKmaBf)~t>u29mlB9^k#1+T@EWogk{R@MYsAQZ+ou$9J3Qte z=U`tYA*mW<@4dg@usJbUxNuKl*I<-qT3x|_48%Ul7;&>@09%c2$GgxKv01(W5m!Pz zSnvKOx<262`J{h$hy$ua3Q0SeTNMLT|KSXppv46E&B!!z)-Vub+w(Gss8NU*1y!87 zE9H$~eEuTiAN4KG;n%oVH;rAZgPA0iGJXy_^rfZl^kC=DOVl-Wp0|K^vKZ-?&?Ky_ zX!GX@Ozf;e&&yG&>QGQM@HPk1m?jTA}?9R9toU6#3)JsUA^J9edA zo&gkfjM!65K7%;*OBY(x$-+Cl)f2r@m)%j;{}UZZyO*EWm>)3QP`#LCK-!4BFa6~D zR>G9=aY%-VSF1i@QoIoSUlpP-O}8Myznkocf&(5h4NP_y2j%kIV{u&Uz47f-W(V5Kg9 z#g|Na49U?0;ml}$mjJzI(C?rayr6?YE-NjrCq&Y=6)~|u#u|#}fM9R4LX^o4ZW;9< zU40kD$Dbobzpp(4jGK`fk~@<3p|TtaYy!h#>c{d+UQk{48c0^)2QudP;@iV~Dtxa@ z2RaC<`K5#K)ICHDnjYRj33gD<`4OrSI-j#6yg!U)rrFhU#OfHstvc^J=aVxm@1u?}j|r)cAC+Uh&xnhKwAjB$>=N<2WAZbv zbAokHzNtm%A7RIV%h!$HD8(h@0GIRCai9b~O?Af$s5&M2Eko12JG*>R?5uP8+CYAA z653mXIfwz2_x)^_rFQTHfF=Utxm+xVm{OP(%prdHG6*HNaiu0&RlUB$1T8=1nl7Ay zH}+O9&P*Pu)loUUp-KKePIv{E%mNWz@OaG6YxjDeo(X`K&+ivS!hv<|WGK3{(^Q@D zqQ6&7zQHgV2UeEEs9O>o+O7tWF_UoiY7`tc=?6}XIA{+A%u5JwNrMF;fBG7XrtHr_ zNLk^2BYyq5d*xOnp(f6suK6Bm6;YTUD#ujYH1-s^QNMW1PP<0pS|DWE0rrl#v{S;J zK{nfEJGG$Urya&rsS|-TrCi|9jwn4*KuX}1yl|Wg6b;oFhV`TG*>R;vkL9=3{ca?R z_5e@p2$kFlhf^`OUE|^%+J|;d1Fsnpg<~hlrk~tLD=5nEMWJVOSwgg=u7b65I?1=@ zMdFZqgqNczIr$Dz_MNENnCL&hsMmQIO0j{^1a2x0@tJGrst?x?*4f+6KdbnyMMWi< zg?K^QJW%-N`F#}c{7cpyXp%P0O=Ade5%6>BqK;4*Sn-C*=jTlghdB_*cM0UK+^{|$ zkQ$gvp+0C*n`G50Tua7gLtCMHoxsyJuNmxxtbg|CXHD@QgGLj{>{C=zEllkMWgZW~ zh#ZhoNG3s7M+)G&m-?np8AmR7rFd)^7j4MhI&?w$ zNno6!yym^914TnItmQe3kgcxelF0gj8kohhNksLOyK_!{2~91=*COPPRtuTxPt!*R z0E`Lp!(Idv&t^2{+T(N$Q*6_DsH%A$h!Xn+KmtR%|MBQKy^VjpHsI7aq?(lZyz`_w z%Da{AQA??bFaG`7$P+#%r5U7qld8nzyvBC1%@CDI5QhQ5lnP@V-~_j^A*p3F#;6N3 z32Ox)Hb35~dzid_A+2mG$YNb7fYa*CtfpOg$-{L+cNpvM**I7EGR$eYgrIx;;xe}x za$IdlHZSzFj8+Ah&5y!72tsu+RdT&78&6=U;zo%s*JQhw>N1~Oc=1L$xZ8=`_( z(+c8eTA!U*UEouY)dI*RtVqSl1KhcyUR*oW{W&su+qe&v_f%1QF3U~DZZ=Qk?iDM1 z%s+}iSb{(I={3O&e%U>^l!&B06qRp)rV0VE-lZu?bCSw(Oz(gql^P)bo8C2$kQnF1^?!Z+MTZVWqdr&S?dJHpUx6sk&ht!Tfgf3dM`xMxYIR>c zXD1*C^4qyEaq4IDMxxUnSD80(>H+KL%Ld)t44b3y{! z$DEXt!ylzO%ADvd%3Og6ZPOcrOVlUG3tuWhkzxW^_O%#4pOMX7^4>pLo0;%3fndD^T0 zqPCsEgCZ;xdE#hfFX|hje~HRLceas-;^v4>V%sObz@8nK$n%8_n_jX7LwrBPL7i0c@e;^i@JV z7lMQc37VSE=Uu;h`arGT@^^Xa9)dQ?CBFlAuFdtvUVZ{tpj$0`zS)hYk-@I1Ih0as zVV`0j>QXaC%y!zK`4#mHKz?P>Kkyw-FBU1qIok+@4NK^0KHcJ5{G2$rFtP zZ1Wgr`|qhy_{-&0^X}~YncHt`enHLLBYY=y`z3CopK@4*DskAJn*=NZ0M4&;U=t67 zdH`xcC{esHJ~NFwE22E>WTL*>@23myYLQ5+XGV=b-V3~wTKrVjHDArZQGPpf$x-mnK=am~WHygLVQ+R!2Yg3xxj8T@;CK&46!vqjs zFAcAYp`aRoC6qa%c^1R(O-!28>c|`wq8;&!EJWQ1V0-B9G z5cQkOSi%Y`Qs!ZbSM&u_$9w+^!D`^efcEJo9INGcbu^H6iNE?{p1$2_y91fo=It_% zX(xRX95X5{o}JEO5lZMx=``M5)z{7MH4!JB7bgu#C?;e33^;|EVtqf+_Xdd4=ZeU- z4GS>H`LiKiA;cH}exP@x){ld?9}uQOFq9Xh0Y_UjZp-kFax9c=2`pO^BK#?%uY6$> zyk81`__KV@n#Urqww+s!Ka6Ugq9ZODNXTYCnY{fsk&9aW7@iD+f$`ExecLk-&xBA( z)l{Un7B-hmRz|g|`PW8GX;kK&38&rhO_N4Sj-36E>Jm9Ky<7ZXR~wyj~FzSwUuL0G<1|j@ZY=M_qg5BG7(phKNkHu z-av{GskrMI`QnS~;icIL%3B|}2X5?*uuM~>;n;wlFsjXlhXCHU@<90Up))s`_) zPU5xm)zBrcYJOI`S&-&_oYtv22sR%j8l5;&@AJ?KUG_-4(dvtJGJfbJ6)U~?y3^;| z9M)ZCLgbsVUgzTQ9U_#}3n(J3!@Og(1Rql_?|OlUTDPvTk0OP)F@>P%779=YgpN|- zDx#uMwF;I!0BN{k$8W~$d3NU{uZ>LhIvUnFIso0JF+F=D8>If#Q>X?jsZwEdIt3!v zG8_dAOkl}GW}fnQxE`yp!Y7@!>sVCey5yVB?+0fzp~Q{2ybYNYuah%84lK}g&-}zC z*%NJI>I<0mJ|xPU34A!yTf!$A^$QgAlg9h2nPbb836J&S-XXfdP0-`K1QL`ty%J^` zj0o0<86MTj+J*QqB^KI&ZgJUEO1COJ94m9(jUUF?Gh$pawJ)`Y?_T`Wu zedJ*)7f<}DKfY;E*fwRDd*aTpD%oMGY0{n5$9d%69({!3=+cG5@Lg87>Wlj<;k4)9 zG|X$iTAHv0$MZS2d~a2)=+PP1mAVr%GUz(29+JYyCW=s1tDRPO9s=dS{I(mBI?yhg zdZv9z{zS`%G-8Ne8i-3mq9&4HYEN*JXP~bas|rwXl-rV#7;;GXc**J+w;+SPy>2>} zduw*I97$#E(Zs7;p|nCA-68n!-<^U2*B?U&;I`PEw}-`MOJH08E!d<0ly7j^a}?WaGRh&YUWi5R4VM!9M*`*(~^-o z`ee71GH|LRRAjK=6V>GvZF+;O_VNi>dksrs_nSZDIQ)T33RD~VZN@f_HOYC+7dcI* zR?LaTmCSpFzz0hKX**|{;*KUAfzO(d_ISxXh!HptOBaiIxRYJ7FB2_@aa*46S0>)@ zsUClBnUjyaHVtnrWcD0H83?P=9@`eA`-q&E1AUn~4SNE28A} z+uhhZ;H#4nP)sjCjffQK+uwCaDJ&!~AL%w(33Kx~UI98L!gENEd<*Ct45t>Rb7;Nq z?XA2t5x$xT-^bUlJ#IB6nQp^UHE`~5^v3`b6*F7iMhI;`U3_I$em-3A)nciHb$^5 z7q;9nvQ$xya0XM+V95tVH;9(De-ssB>h@^lYN|pJm0ns`hXBGf3uar-kY~2dA?!Bu`^5JHtCLMX3M+~tlcUd6d79wQG*)W- zICqKqWyD`+$Y!O8DJ@mjcP9*>L&^Y+J1VYRq;pF>fH(TiL1a}Aq7@hS`v3{Y8%mti zlX|O15pHhg4pnA8%o?Y^{^8a?kb|L8TW_jgJtn^ZeL_~z}7@kbDW>;5FBHT>X>3J#XU&!IsX4n72-VOhFtj&HENXN2cZkhYEhr?_y4y#PmW z8k;!Jm1s_%OP~xTl~|8&nXgG7KDb! znbqG2o;27)we#HpbnQi2vqAQmyIQeV0o$h}eBMWQN3xgZx}mYBNc(kr5oci&*WSO% zf0-MSjZY)C)Jh&HG*cM2kd%(^;RSHcWJ(*Lm{?!Q{u3BQ{{{xZPhcboW|aTO2NpP( z<^9jPi-*mFbc4fnYO|e%tRKcAA5%c^iVfJn%xMs?D}FB(*kkda)4@{$i7k#unDYEW z86<(7sxWIS+}d`xw%fwJzDM>t1c)^?_c$EyiJ6 z3yR7d@JbP9W*rh_XCHmHVlE{N-B<9GJ-(J6gb!ok1}78s(~7Php>cds?~fL2bj(XK1j zXMQj&t!2icpI{JRR7)0Wln<{w57zUn;`qA;CiYKVXUKD#(U3dun6wk!Lu#egWZ+Wq zrL;%?hyV|KDgtd6?4c-md=k=}6^pwJk>_ zS(sE+uhx2L%;gCmusfu4<>h*iMGvc3bToy{R|H=M+YN93u-L$=a6%53M&n=wVkGYv zp=V$9FyW>T2w)WmD=SNO6z?DBE72x0SWnm2<$waSJ0i5CW=TxoHGM(Lq~Pi3n}Ic~ z93smpi)x7MlfN2@`YXj+F^f#Y6(-RpdAEY1n3og{HEnyFLO#AIc`t&9K#GK-;#lsUls4^n%Ht=Tgbz++m ztIhCjktYbj2ZajAdE)34oxF7XoMWQxn9rTRLlyBVX6eOP+r@%6%?0l1l?2U|JKP;>N9nbCl6THX&j_3YU=1%`RntvE1 z|GOIE#~>H=8@TX}ngo+==@V2P>mz>-hiNcYkLPb^b1@+`+WEbVDRY#n=JRLOc~2-{ z?W-&GL+up2A-n*DM0M5ycz5&^KT@1|)Ac+o#P_%iW}@RuYbYERg|A72f1#KZgx}S0 zTfJ>3p5A@;tfgRg9Ukpl59;F3@dE}#ik>~Uiz~#zF;q6fbGaxiZUz2dS$j4%vBu7C zoWt*FH3-tCfGG$rPKtm~X=;}{>8l^Z7i3v0)Xb4bQquGZLc;FN$CUlKP-fWR9dO#vl74l= zoWzTs4$)~^w>DkvTw;N-QdhvUmL}h-Mice0^lYhT=M_2p!pX8#6b3?Sm|J`H(}~}1 zCgG{X;$Orv0~_V0Faga}@h2*Hft&a!>0n7B(ngB|^(P#ZO>F~nr-cG`15k=Tv1r?9 ztr}VJE_+gVc;SlkD{1*B!rpM;db&VaTI+nw1;M!6uwacpXW81$FjDn(U_;BPw9NWB zI4$Flmop&Vix9RX?b>U^T{P`FPybvsCZI*`Q_kr#j&5Ww6nx$3FxrK}oL~z{3$L_- zzQU$1F5_5>V9umJ9Na;g8>3H%;`u5Bys+R7nYBq6Y<&?lE7ymzpYbiItw651b{hvQ zp(rXB&T2V0M80K_8T*-XvGGbv(1UzE>;D2qe^FHQ{xAM>q5m8Id;g37T&n+r|6H1X z@&6MLKzTn)3Vz1ZKYM$AY|Q})L$4OE96=SigxDjP3I|8w5cQ$k{9Zleb*Ue|{CR%!xUchv%!*RXJ@DpUUy=&Uj0ny<4O82x0RVmB86!U zSHUORMRdbr+>cDPdL9Vm!V^k+F9f~Yg0M39Pc4BAt&wBby>CYjmYFo$Hgfdpebm*s zn6ax9rUmf~ec39O+rS@QSouV{Y?mL#R#9`j>*DlBh|5j2dm42AsNhx~K<$$mI(LXx zJe*I)$f=s|4>0d=K-(pl&$A)|)@yG5xQZC8QpOS}2hAW=Ga#k<9x}=6toZaC1jH{|3M`cDm4jvvu85b5v z{fIxc3MHl|EWV`;gR3;mR#4^fq7I;SeE?eu(3i&&)E7h~knk*eH^UrXkT_{kYEzC3 zM#({R`ro^q$96{6xGmv6x@wD?k1|w+A#IJ%5G1W1qgZ{_8*qAeLo&$b3#(}aeT)DB zTxiDm;LH;X)bvvEeSY?zDc zbR(x|G34yTo|V-m`6lAgEutmB#`|5kh{hTagk3ZHvI{DKnaym-^iCMs+LN#56PZWV z7epCmS@0sui3;NaKBZhdt!m?V-zg3}LOn|(M_ozEu7DOF5`JCy;UFcJoz!&wN(kns z4^zX{WO5qF$x(drl*g^!@lL;&>po2)Z2HFV<_C1ivek;bU8y4h(#ofgFn zvN6g($d>GSi$hQ5z$hQ9%*#tk<=cj_1F8?AMfF4n>7y(RUU0FElQ_|!0bd6>x z+-b~BGaLK~uZ5T;J5#AU z9Suo>`=|s3178TrAw%tap&-oTJUuKv;MQcFnz-u3-FAP8vh}XISD1Rwxv~PlBASxW zmU(T8GD8tfDwOl(sPGf&#_2|&Nzd_q@Jp*BeMlGzliZK^ydhKDUB_|b11@+HLtJfx zH4-h#n~Tfoo>>U-!?l9YBf$Hv^>%QP&C_4h)z14m$*nNwj_-%PSq(HNp|J?G8mH>g zpo2cH)*aKVSGOxXEo+Hvk27xfebQ}9-X;v(Qi8+j@VsYfwUNZ&^Nhl2b82MUBSHS? zbTS&7G zNDKsyz@crgq>ko7KxfjBMuB49{dHdi>ztfVCvGNaaE}(|B?ANT4?+_E8zKM6eGUH? zAx-}xXaWzT^H_m=mx+AS{==ycU&2ORN%8rI*K-&r(NqH+uf^J&ed{(nsc$ zCPGmJY5o9jnm#ah1iRRt1&~dCMVPtpsC=F#P8_%2xYgo@6;_CkHbu$stLwWsBo(T( zGHo6j?Ei)tkjAV$^WexQNH^1QO53v!?S`KJ-BpI84im+N51X(E=afdBeb6CV&|nF_ zd;e+m8&Y^Z5mfn)ia7qeWeb{tp+A!?8l25x z^fdN}j~*(=)Cb$_&nlsA-JE%v>?yG?5m;=Pat&UHtEWpvodgp>QJQBC=nd|+A;R?G zJHl!+c4Q4oAGV?!@s@@?S(3F0zkQ1^jBx__A2g9%_WLj7zxp-i_Q^2f{`z!pgrh{z zKZ(C=mujy;TCi#IntfDSk_z$Et&Cgyv*!JC*em~i*te002A+ZR+PI5Q{UfQ2>AxdeiV=fbX?Gui(DpalWXM$g)7m zf;MIJbws7I?P`$4hPE~5|&5K>7w~LD+Q;+Zl%9efF+HVVifj1kdrv;gclVx#qRmu^o)mbOW%IGs`})$INsEV z)eZH9edbAmN(s5O({Gl=;OB<2YWuW)VV?o|=HuXw^G5&`aMU`T)r z4*$jhcS#z(#l70sphn=Ot|F!Yc14R;oER zaZ$P=N_Uvg132PuvRxR&Ra%n@lpk{L=7>q2?(lPXQ|KA9sM%LdXmZ0SiWKrGE)@A{ z>D8JX5i-RK1M<>_S(9N@qQfst-jWbadz*(-MS@m_|De5UfI&PB=p;QwIH0-;~)j`V`qU{$_eb!N|tA; zYKcq@&zRKfOzWG3)-5}2L&4{E#6&kVUVw%i)e8+EB#L--H&j|)v1a~5z~H2giNF-HT(e6;^g_BL~RjIcbvnZ`j22<`wqR9xHd{xYKO>GjLDN0OF#$; zX_Jw<1XT6(y9A4hrp5ZE$m8hjk1yQ zV`8C&m~igKQ#fer``K zobrooFEF@FOt{s|4(t)C*NbsDk437yi)Ur1ZL5cNii#B-{djpiAOhu6!FPk!73Xk# z{dh;C1YRHOT9`?3BIv9`Fe*Xoj7pSiSV5#13%FXUFC}<^O=8iPKBv7#7qrY(s8sjS zjg|w^e@Gh==SH~UeT>nHURx(adeclLJxmo-R=j*7_}gh=1Ereff8cFDN)Uchv7fl) zK9eqzK-EG`eFdk5&4;6O3$@nx>mplVF03hij`9^t^iT7q@;0tBVX}Lr+jl}?{>gjk z_`In9c<~aI(@T(4f5&XaE!mg#yA)A$+kJk`I7clmdd+zyeyNRy@mSzALlLg?vgddm zrP3gGCHO}z8=)e=65=$&0doX+CHHl8zD z(Vjg^5%WjEtDc>;wj6Lii2ATk%xQl)UC!vo`?!lx4_|VhgNENSrZy=`>lyDaSs!4{ z?2`F<(PMQ`QDav$w%;XAFW?-`2LW{i3nY~+KGkCtt+Cb6Vd`?=rQ|jPw6M;N73^5k z!u~E!`SFLkNQ*cc=J>$$uTxUKm9!PRX;D1F$Rj7wR8kts<}U)rdvB7Mlm&k7+7xE~ zK%vI{N5}jOfJf2yz{6YoW)MbQmbgOd`hNn_MX5U}bXvL`sAYkd&vyCpJQe3K*0H&b zV5e_MFaUREC;o18?@|(V1EOUc}!d25)@U<^M|>qqyZD2$>dc# zdbHmg4D}&Cr2x<97V{iY6E0=i8w(W0(+y~2rTy*7qF1?{VTO*tUnPINRjpp`c%<%9 zLOL5@MVrbWvRyTmY}nt)@{*d!-uY|j3aF=72BaKEzvd@6SVi`=m${?whI*2PErh@8 z81OXPvu!2H-war52^2KX5fd2lXCiSI2(sg@H}rLG^kIue{Stl^b$A4)g(uGzyjdO+ z*o71K@$%mLVj5l}_)EBCz^dv+=1b1=`+a9z6XSL&HZY`KLQFDJtyeKrf0D7hITa7= z_rTgm7Li2l6Mk4bBkq}=9}tlbG0W+@y>_ii;W{I+MM$=t(|{mZ__RK6dBwt~w+YrQ zEtdM7ho#4gUExm7ks|fqaU`xS&Z~3%h>`Qg59~)^+liU67Lbil)`m5j`3?~AU{cr1 zou8rIGnpyTOun%nbV|;*Js-{2C$Ie)Rp;azV5U|X1RdtCgQGBCmb2eJ9uyb}kARPq zp2|?f?^j@8_Php44CK4g-L+SaS}#~m4mIc0Gz|Q*ctD2GPBqc3z2)^`?Oh0f)X}pX zQ#2t1@0xR=%??K8DWN6SVn}on|Kzt6)1i`9`f>r9Dm3g#Vs{LXC}OKxLdmGa3ZqCQ z7Bg5eVbO0PF+6B`M*ExcnqNIam!B;X^Z6E{+*BjTtpdbx`Ujj1-;lDxqN(5V;=FfK>c~5nCt)lf3JW(w5 z9jj-B$TDe4)bP2nGbKIyK&_k9E_f%kyJlG8m2kEk%%?$YVPwL5fRL1Dn0bwH5ia?j zZzrw&FI7RqC=?fJ5J|tpZt!oA9RY;_!{(#g$JlFqz7}&run%e^p@V#i%d^EqI<0P> z)*^kkLES-MoO6AG$EPib%Qz7&AcH%jOheQxacTr7PjRO02{a$eC(dZoc0Ve+LKl+) zyJ9L;6LIm?G%!IL+QinZQjagNlGgg5he-4Fqbl1oe2xz1^cpZ_PZ>VOSYK4V9b0<~m)SFKa8=HbJAdSibBwUos`x2EzE1 zJMjb6iwRDgx=|*W0J|+2RqgPh08;Gj-C--O{xrSvMK$qUGm6$trEM8zbd??pP1Pfi z2zY;O9tvugsq)u;FX!9ql(57g3tHksePw?Zj|0S!M58H7>Mw(|++B>EXI5T&NsC?W zb|s;KWDoap>q&eCarbH9PSJSF5qZYNY0zw3p8kvi#690o+CS)PtM$d=2>TN2Wo z+xVN$e6#hPH4-1yS5kuAx5UT0{NlC2&be4DP|%VJVS5ShX+8l&i|qZJhN!rJN-0JQ z^X*SGz*#nN%6MW~*{u?O7*S9fT zcJL(C{y=21c+O%KfDqUrV~!WwQ{t(5MSs+M2qP&Q?~V_C%+^~Rb!^i1-it_#pv~;) zYBUKN@xx5pB)1zZB2k&3&~DUZK!p8DE)v|S96e=Pbi+Eysu-_T6yuvjr+SHY3bqFh ze`E!pjAR32_7-~>e}m0L5BCh*!kYb5JT#o^0A_#U;WwCoq`5slPgt8skO0TUL9?6u z4igAOH?bvTtgWd)2Sn*L7S?mtzFZ5sMXu^$3(*Oj>ZNppfTllQn3$j&Y$kK&Y+eBx z^6<4OI?oEPO56S(kP0ot+v&-Y5A zgZ;El2yp-nx}xQ>ZNP8-wq7d>W^}N8g(bkzAQG^HR|yyCw0NnSuT{xK+fVU!p{HXU zV0TLx9h$rK&p(XBn~AKJ1lr>+Gwe^m_n*Wug!j98t9cmiAlqQc zO`B^9f*TFVDxYc3Dg}u4%y*TBYAt_LP8~r&v3(A^g6-evISzB>xV0u=hc`Jn_5LnI z_q%qqRic~vqq1RRgBw9M;pjaYHTz+oHkc9}jmWZl^H)}(hewdA1^i+-ghqjUEl%n3 znwZeq7$gg?bcC-er}6JL&*bXl5(tz0YQ;OnD@`lXFFmVH5bO^98gl2-vUL2&S|ZB*XE_82@Mm zZNig3!p3%!6Wi%F?sFASule}y4yB6a?2qus`UT6TD?qlSN3;GoUX~hk*~_!o?|@{N zSGq&pIZKowXzD7lk)SE!^<4L2k|bzAuRnL)CYsLM^Kh-sew}^c0g69KRPs$z7Q9rk zq9*6Cm2~}`(*f-3ON3zZClvfhEWN!dv#OGZZPcZqtVl;|#%TqVw#c`ioN?;oM(qbUSx4 zfUnok(9TF0ZXLdP{R=zS1N>0vwBuoB!SiV}lmRLrab}kU0YM;v@3d}Rq5+Se5Dz9O!V?Aj zdNQb0Xnam@&Ify`p#s22Hy}!3mu0v20vpq_mUtK$*hbiu@Lz`I`wRfxoT&B!vyi1! za3MM%3VZgx)ER19MHmYm9uBJy+N)gonK@F1ab#U`vZ?mSil4Vv4ybZ525cHC8iWOL&ZjW^2zp!_Lubo6@`n^5*a zoVthrS3;myiG=QoPO!-;z}7#+(skCXpm!Knv)Aw|WvC>eM(eSa5!AUStN0ZOlIjn;B zG)g5_$MQK8d{POAa>DnWdg-j#XI!X!bWelEAjupbp~Z5XBc;#$qfy`YnWqrc%4!pm zBG-6_fO^S7U=t~Q&C_gyB|uGVUS#3mMy*pFSg}Irj+!4cd-;-|%&bBr>ddlIM;w7Q z*pC9o02cVkPz>UjLVVusR~ptL(9CDhU}U=oAEeU6ZNr?cg!fV+%B&>@^9C>?9i)Ry zPx#n7V$= z9QC(aMMXk?Dg})L->KhsCV`Ta z12wkxc3~?+6j6|=p~hmFJoTihbZrU%OaQiy{{BW$x~&=)7%FCPNH%{Cmj22dr#I_e zE&1XM$vfgNDmg$SbR!r;Bk18Yba(I~7 zUjZ94(d(h^M^x##5!Vw0`ELEVUya-^$WcX{oZEuGb|557Fh06GSSCZc2zJRY9A~DX z19e<=*}cp+rkJeB|Fcj*l_^z>bm0aJTjqo4I%3EgMVxD7g`l6s`$%kT>z2@wuh;1M|-`VjMlXIdBxus8=0$GBO7Y z)w1K3M&rY-D6GIpBzYF4;cIuXwE6Gf%YGn4FF=od06ED*#Ap=_HJE4<$2yc+XfTMC zO-f!2jxhZ*NS$;!;CCCQlJ~jZB0~@#`@nJ+rXPE^y3Q+Yjdj`gmn0uy%(KI`?^3Z? z-BwM!3}g{FP(QY29BevQL2WwUu55=|IDP~x>R8W#*>98$1ZCOShd#%7--gy;aC~59 z-lhZ#^H|5Q`Rorchp)h~B#97IOqzbzfP+RP2}F366f38gpJ?NfpJ;q02yu^OO&=j( z1y2aj{(El|MrlN;QlKE(TA}u0L#0~_??kPVC^R$h`EAuw`E}9sfZ~BgnlfH9B~g=X z2%;vaYWaaglnJzsW^gz9e^k!_?ny4hp!_$rl0}1kr~$4IqpxI3iYw^6`zwt`ITz23 zO3!>v;Ub`e0c4iRn66*QrtUl6R_wVdbtM)WVd4|O4fNqe?I zHfY6K49lRdmCi?nEaFQj;keDyt~KsSBVN3ZK5`D7AoP3Usia}Mv9FKd zZYvL;$IM>(2+>h$Dj_b}J&o&7nTTu4B^%HZcM~@&jVt<+0Am4nliH3&s7)6usL1aN zpU$eD4T{BCf`=1MM5Np*7)6y`Dg%ZHum<||Z_tjfv@CktIb4VlgLvm)J zD!R`=vJkX0k0y~DcC=SKy^1-ev*Y;Hk7-Ova!ENDS-Hf^SUtHFlUU-L7HA0q_Lv-2PQY!GNj5f^DWK_ZIHeK@;Wi6{!i3Z)7onnLTgO>zAr`}$8y{Ehn~ z3I#y;Y{N6$I*z^e=%EzH_6rbbREeexC+uYuMIOnVD5LW;}zteHPbPjWQtyBWn}Ynnn=|!+=@nm?BPb}i7A~rA*PGAB;@j) zFHv5RDQt=u6#y)>y?~(1yQ8AF7SqZ;rWZ^JvXb6Ka#ZF~907v_foDCL+ALY(>`Xws zaNs7uk{jfH0yS0*Mb8-n1;PsS=}mZx9T8Zv9q#FZ?-rFmOnu#eIgIr{+|lgu@@AbI z>e&!(xSt(ItUOtv>(qqHd{0tJ$9}+Lg8b7~LN6X4Y z=i7iWAt##jnlpTeS)CRgARYbVvK_`Y-w?%!W9?iKcrUP&Na1}O=Ei3KUFgJ42Vo{= zjCUd(6t*28Vc5Gzu|( zX364xUfWS~vZ9Kc&aDoQo9IvI*Lq9)Bo}d^y7rTO$ZKY}M(?2eOoejB$nf3k{hwZ5 z#$1Kdz6uiaUr)=j|GUhFO}L5G?s}%2_`&{Jz0Jf{3P`M_;;rKmZylv(>FcU!$a__V z9SJSfWZ5x%w&zz*#_esU$Fpyp>sY2?KFh!625G|o#1Ds)3RD|xM{_zKTZeiO1%E(H z0{;u^rswI#X+f_MzURxH` zg$-@#1L>TYyOSG0!7p#-S7}Q;9^rvRIY%S2k#q_z4a5mx%rO<$C^g9YqHnN)Xo8H5 zF-rPUZSxnPi0VO5(5Fh2YXcx}i*Kw~R>r@`D&oGI%9A_W9#||Vt zT1I)MYaI+0&Fn#dGdA$~Sz+p#h4`_$`V6JV(cgMQW3+Q*6*0qyp!LI;QW-cM;d3gu ze3*75SSrt*lXkID7B4B9iGFU=30>g^emx6izNP_3 zKNbXkCa?qBeEsXM|3o^4xBF2a10^2*>fY&`KLNem|pnt{+C7!=A)%;slrN z7uV}NI9?n(7Ra6(i9#FS9*G(&^&HpURufL(=-}4JMBmgjA|ly(&EqD zwNSOd#Y*zHinjEf0qNBhl{UaU^~bAeJNsOFV!T7?oHiJaa!+;b0Y{MRzYX~)I3X}M zoq$KVxN&tQk%L~UQU&u5XhIO^P=o^OtGcuPx=th;rt$`4>>qM z;17XGFR7uPOA99irOS32>-PF>k2!%?V*UK$)>>ioDUVW8`CV6R&P7euqUhH|a2Bgh zL%=0l#9F>D6H($AFk-~=Kfk&gslu%nKUm*wh(IR;nz<0J?oSX()?O3#v0q!>4+zJP z_6OJUq~hhgiEw2wSdLGP|4F3^2ryp6RDd)rzy*yJmAg0%m?>w;^_z)!5*<OWo66caLIzX0#f!Q zeH?V;Jue=o9x_G38x;+=yP)BR4o&E+g^))Ddry|_n-}tnPOin$j)*g}n8Qc~31vjC zVCXw(OTd%4!e`s4xQfPBHoAT5_~a5i0p(MxA)U=aIPmX51o!6-ZJ?K7m)o&8jRvN4 zKGXWYem1CSrEQ$j<32U~Xo>dIxRP%AE(+I3Kd{YXW`H>BCW9`3cQ}z%E|~~_^fD|W zSvo|o^&R;E-G78p`C*iHkXUxr9HmgSJ+6#Fr$ zwAcD9d%eax-&M^1?7}f&)l+BN&+RT(-O8eic)OyozY{sD7)=DoyWT%E3GxON%ujJ@ z6Vv833gvtXbxHR(-96*nMF$QZo=)6NsVIVOzCguRRO8pBb< z0KvX#SP`1JDSG6JM1I>tXKXgfoi#CUNs0|$8isB=bhDDxqW)#HpK+H!(q2_NI*|C3 zV~6rQwFEYTCzkj8gF&Ld0YE;v1GrI4&rqgsTv_hWq!M`qX^Q5S&tZh;MPy$GVznt3RY$ZaS$B8u6> z6Ea<7_D`1UR_n`24K)gHxlOaeTT=*4#x8252$Tsyb1g+QX|WX;mt(B@+1`ZV?);CSEHcidlP z5V}!sL2r8Hut3Pzg7<0E9O#^5S3iv@_Ly-E^qo!7HR;{gxhbUP{?;nWM}4QG`g!l< z46M8dRDZ{FvWr8t?$>+>2S&s6swdKy0gc*qhOug@{{=W@UZIFYPCL0t z!6(aqSB8h^d@|nyV)=NV$b|6e^M}?)HZZu#0=#00EtJTkGp0JkY@Q$$MRrD_(LrD6 z;0*`295LuYAP2^8lYR5S4t z^}K*2CLJ?iAdW>D)$wccs&mxv?}#bF{?d#y8Pb#@|F z0);1x$*Hy*ZQ-3c1C&WmmUW*hlwC|bjwcQ_EfO`$NA?F$!D9>Z$>xH0`f_dQ5j@&Q zlullB`bvfedENj$g@ImLPTP0*PVZz3Fz8mgy1cbu68lhLCTWPq)5C8d3N|IT7!c;` zpe>9|=c)!28q{re<7grM!7}kRT=R0zCG3)9OCdQ~y%OVOwoY{kPg)J@0Y9)9k9s@L z->{7_bl4I~KRFTXgO1Gs*e02dOdCZh#r0yEZbF%?7b93TvG?hP>rbbn`7R$*vbkP5BuPPQp|HQvSPPgiYe;i3q_kjE$tID@j?EIbUK1T#zsi%b>E2=q8o)c+MgC}q_P-I;qsaS;(SPh zdmoxbd)*h47fetH8+bpagrxPpw!g14u6=NvpK_S}!v+#WxLUO7x-?5|f$}NxfFDH# zXbj=3bPcJBr%(iiSNG~Co1Qt2g<9FGt^8mjU)!vL41yGRk7;bDrg7&`hF)Dw;4yoh z#&D}tabLPd0?5`+_+T8Dvx8SsI<6&M(&AI2Da{L{2sD~nGYYdYi_WDGr}t52&h^Uq z{Qwi~U)g)_CM4+XrF#Of=UGiN0<@iXOo5yHRPMZ$8vq4$3;iK2s4orsLsjgKLj&J= zZ4U4pXXT|gg_p4vTgMI1RjQ&>;JvM&L2{fZNT!IkGec2V5w07V^>zrC#l{j0t3#uu zMw6+x!Vu4^C8^2*Wf)03=^HQFs8>{YLoJ!`gPYX=p+kS_gX=@}PN+umaRsQ<(gu{m{CG^5aQtAyEixiUJ5JsSC$ zaxMH0Z?3O2X0%358nbU--&hE_;b(3;D+haI1Y+21w$={qhj!}0E;)Hg7Jqv#`=8#7 zwfe{%+Kc9~GfznnehcRQdB7y79CZhQMns+YnAQInT9yEu9IPd?l`%KqKMe~|LLl+^ zZmiTxZ66ZZI+!0*4t;{HS;$D}vMj-*4MDV?Kp~Defby=^6SxTPp|sR5BqYcoZd1a1PM=B`Z$2ra0Zqj>Lz8o!6BiIqqTdu+QI0i=@Zs_9#Jt{ZQ!z z(s9ErXi`R8hu@d`xQRc-$dUtJCz67+a&F-%sw*TXu?cjJqn;H|7E@3AwRYJv%)9u zFK7M(7`%%wMLyEw8=@9e7_n3g;*tQK8@l&2NLX-lv*?S^`FEPpH=CwisU z41XJXQeqt==D$7zeb@GMFd+1f1zt&U$mFB07xwI-h?BnCQH1*dH`!@$Ow6jT`u0iR zkO$yf6ZE%5519@zgKja3Ok80d?w22UPb*7#evYz#zXaj$r^)U+qF3!3kD0c7X(YPw zlUzo7VACQga(*+ZOP6PT(7uh@E2bLF8!pJF+3|4F9;$0v6s?M1da!uVuj}mdHgM)r zfU5QN1Mg4EKtz9F8S#I&vbbNmuZn>|?bV-7)GDA^1Ie8=QrA7VYqV+4_It(34HD`q^#s*=4nAu4} zkN;>_N7wnLpwo%#f6K)1XTw`rnV1HUnelZ$R%}IO6SfKH|r=(_cbRUfn zRgoM;(xI|eYn3ft4d}HxmmywjogznhrI;#EIu#Obis?A_or9iDnDxu&RfPOt7D2RV zh!(B^Li<$qm_`K0$h)e(a%QTqxy< zyK*BGwlHnOf&9Rrpg#DE2Z+DPg}d9wci@79XBbhkln4w$2ch|Dh7qVl0;6pt3U0_> z5AaA-l%SWyER|o8EwJZLnFo#wnB2%j5!$-68y@5~oeydgaV-8Z|Yk5a@E9{_rGaV3x>}E2w3Tc%m6VMOQ)yE$8407GN>@C>ik?w|T$PWufsD zcbR?oZ?vxw>>W!jJ21CgGydy3l(Ei~+y-!^un8XfY4}H0y6dlP0+2Q5Ed7)~ZHCH@wPcFfWF`(ZgAE{RS6vDqLn8f1m1LOyeK%b&A)0iS%+GM zHI7bwcL}E=x9SD0LT<4aN80!J;M7VuMwLSiwu{?DudwKIChR=5^z=p#y%XemmlZM^`k%YYa!De?Cy&wEX;$4h${=ST!84V7JKJ88=-rh?pqD^oYJUXz-=30U|ULxlbARqXr)JU*@Ui2SjAXxEO(TLx?m-;@*FYzKt#t&Aq?7gaR}S~f04Gm zWY57NOS`#9Btkit|7@F8zugc7k;7znSc>Ts-ihU9!GAYEl>c=dMMEn@u^mfmc^Jgf0ki4l{7==@Sp zgz1nnjBsgr$@f{wYY2k!_Hhp9!VeG05>=`PQZrM`AhK{O^`-nQ%v+2%nHuvoT1mCv z=R?_ZwYz&9?zCM8INZ~Z6+5oUj`6RC`i8l0B5&f_0F*O+*V^$_0hmM3&lwK8SJVq7L1lN*N@d99z8+?N>&l;2F1ibZ+3-Z7S5)SLJFk9r|_b9nPaT_O)C5Lhxeoo zs8F5=zuiWoy^fJtPm%I7NDQ)Ms?B_7!RL<(XSpmrjvICHYA2O*hTg1juIN%s&|V5a4ZiX8&>>o`@W_h*K*!*47=(^}oE6ypm6=0PuCg z^o^<29R>T|+Pkc7#v4kd1BS!UC>jQi`W^3G{$GhoUMi4 zzw)!WrkTv{f8%Vc*p_)4t?Nj`+6h1NQ|pay^mo54D{pYI31O2IR6F$hebO) zd$PJ5%A8+w71p`7_TBzVZ!(YH*SF#>xC;X+AZk5KS>*!~Zy=B}FsT}guEMJ(4<&KB z)}=}riRFa!xW#)miCd#ZBx12W_%=%EZBo6}`|)ff}D05#v|Ip-n_3GQ$HnQYmuJ(jDZ<%O=c^7SO7C z44d-_pxTtk^P{pOGwJQqUf;F#Gmw9Nxx^Dyw4PQ2aRUXl@-x;V2BY!72uG<slj8+ zhVGh%_g>fh-Ol^|0!mlmy@vC`#O~a+M1dc+ml7xqwsn5y$`Dd$?nUu>SUfeEzi*cU zb=4O%N39HGyA9QfM#Snt*S@AKF1^_*+hT$(a;;-DJBL-__G<`mh3m+KC$A^%z0`U6 z?SqQgA9h1|S!-$vSf%(LblM^^t@Uw&(`Yz<)jqy|?7;G1CLds2t_|M0c}SkT0Cd>r zXuhB#bYFP@w-NVR*Bi4>#{{Mju`9KNwSWiG5Gx|qRzix<;8>5A+JML=Ww0<8>6~Q9 zEy^`K3BM^y+Hz$G5@$#ED<^bEkdgN-EMZr5WtA1XWOwf?CU&0U1oe7~H^Zz1Lh3c! zBrkFNr~UEvX$8$(bF%c9n4CdpMFdZ+EDbg{31ZCkqkCcNnQn5Wa9#VbG6{<*O zE%^A&tS$mUAf51}Sd4c;>=Q?IvT23teE(rwLX~h2q_xDxw@_$6^0^|}UtbZReY

T z6SjA<^2L-7EJ2CC5{|8;P~j!2;`(^BqD_3`7CBESU$TzI;jW?P+49!(!z_B4*V=G4@hSub!f(3=l4hMY`$A*0ePpNt|6R8T^E z7r1ae+xAh%8v@5IQmtiM1R4Kx1X>K8k{T*8Ka5W0ji!P4HDMKfzhH*9>-dGVg?v>M z_%Am#tN5|IJ)0;*4ETG1F{acK5Zwj5+B-KZu@hdem7+sa0RUq#Rz_R!boEB?;Z>`| z(bL=^v2`OU8@$w;``QK-q=6Oml;U!_!Q$aig37u9@0SwM27=5Z8Y%EfsMXNd@@zqC zDB|&DHil}e7{}^j!elk;{R|NqvEw!X7P~qu4_idcOUEWrGnng3OgqGmUy7NJQHrs` z2-H5Be-^QzNFEmpkViBU}e((f+}x7Fu7z{zp< zzQ4s+7|bk7yH6C_L6FEo06Kk|D6J3tm2Q30qzwgRtNP6m7 z589t3_7CEEGWA?O?}A~d}b7 zew--~S-U#My0;`{R91KqM}pB8EiGbl#tpeJtjUE3;LrwyZ}C^%N~LH(CeAI}wPPX{ z<*t{z$3#!|TxFhWzg=dE?=`kM(#k?Ev@@I|Sd?@!ZA{T^XQg2&1mIQQtfp91=FVdD zWEmooa0++JfiD`)Wlkhyj0_x=z6hPgOvGb!Y7Yhew>3tZ?X1 zgJATudlj%w7VgJ6KwQh+V{L80lE|}fBXG~7p(tNF^ygz)wvvZn|ydGPd@OBFhU@WF-)X&N-5V^$qeK!@7)RcplcHpCE$9YshlTbYSU^&d}bGnnP=p7poOvtNi_&|pLZnEva zV>iz`gZh(Pox|(5@j1`wdZD$?p$ZuG!o2ykYf=;_@7G*Dc|85|+jg5dFKw{}^&JH- z^^MQAe}~?~Ni2)U)s4`^$Vg4QPZL!CoFgq-Tld0!q5aa%(uL`!#G|<=sO4z;U}Sg1fDnn2*zStlKpy!6S*f$`Se%;!&Os@# zr}Q=Of1Adx^TXOutV01`Uo7CRZp;sSp{!vjiNV`^q*~D&cM**wdhxbxFR_Rua8||5 zU}x!LLK<1^69btD!gr8)mAkV9q$UvJLaI#3=C8rky+3~3BdC^Z38*ekv;rT@Pz6>h zpLhp(P*MO+gRp)MTHqmagx)_s(X4A2uL2;A6rVLv7@B@YgwBCPU+`(s{i=z=>a|W7 z)j1MG{Vwd|8Zez{E_JUtNf(zZZiPj?xAw7z*7)wNljOupdvH8O;&7OvIOeONVIVo* z{XVP-yR|PO0CcI0kNIcdpS~D_=u4G zMIe=RzYF%-l7MUOrX_PMGL<~ly!{hELvULfY-kmmR=Ol&364)hQ5e6&hohy6vBu@K zHl2eoee%K})fu-p&vRZ)p0VQ}-#CyjNjn>5r<3lXTv^;nok=+7Y1R$_MBVOPkz%Su;^cj!g@GIk zG}xY!`*|A~=cAUbkGlR{8{*O@QiwS*%YV^%C+q*2W6{y(Lz>P4s}(C+LQfAaO={!@ zDiDTeShIRplnHIQUymHBfHO7@7S`@f=>5}0#_!BmaUvQVKblPbun})D-BX>bGNC4g zdQKceoxiHi2AOmxMqX~NKp1Oxw=Jd;apgm>fJriSN|NjkF;Hmk94#W#*f^n zGX8998xbGOE5=y%I}e=C6a0_@_cAtg#gS}E`$0YHrDK7LMhwcE0FZU}I`vgG?}WJQ z`URCGQd^os%*!n69M??rs+B?iQNu%E;^yai}}%jX=@ z&BUVNWA-42RkU>{r9A(E$(ZWH&uNNv;uLyU9F!Lc?2s?bJ%}*m%_?mGBPcuoren6o zx@r!%OLB3e8rrK2`9vp^#GzP@QJf35pymc~JWIRt%OhyBNhjbmj2I)R6qJni?MM`} zlk;>euT3&)R0)xgQ>EfV-h6OqQPA=dw@fYuOs5w@ez*g-xc9|>_sD2SvwfkdS>;V! z(8@uU)qXv57OTaukJk=!ONRnLYh-fpT5y#2W(2v1Va5k)Z4x-s_PL6jI@80+=*jAL z?XDi^e0lT@T#6}fQ*A_8;jl4mvj#GXfL6!`7~=}+c(5xPDsgR6sgaZ;Gp)~S0ULd< zAu46dgA%j1#swMh4YvW|Y)jMp)rD&_t_Nk_P7jlY0kyjNp4J9gOoKO{ZG~i?bn3JZ z6Bg7wBYPAjDv^3c2!|-tyFKJVcFzmL2SIrS(&aBL4;lQSSh zwF$;pr${^`CPVdh&sYr+rBkB(9;AHI=)$QJrC}LNwg%mN#TkpAnGM07(MvhtEyhN4 zrTWqiMk!zdUaaV{!}h`oB1zoB_2b(D_6YIfA+E4dONP706d@s$RsdMJ%fq+pg-_<) z2vH<=cZu4?T?MzgT-5536=ZWZ3w?+jp^Lq=K5&I-LtyC5EZd zt${aMn$~PcQ6tbz0hhRHY4sdhs|!a+GLgYN_$c+Sj9>&=6}&B)ukuv|4x5d=z^izK zC+O`e{39|Tr-u5&nUM3NbVm4qQu%VHVMw5);)I9={HOWP=Ml%P97H{hr2B8}Bm2z* zyQTyxjQ7Pw(s7~1P{%G-KU%ad1_)uKUJQ;iP@&kRJ2F&yXR40!@<^vA(Z5V$CXJ*m zWh4x4{KjM@lGgtI1^%h_0$b|^On~Rl@OlRkn8^7P7OkoE^l9p%eHecQF{^7z+0UbV z!H_^-q-xa-^`EdAKSm@CyBL`FHPbi$qcqMiyw1Y_WO*=%PqZwmL8D=#f5vdip>WP+ z!XLo*>U-z^p`wU4j+RIsX+JrKc*a$b_u8|7?jR9Zf*Mw;Q%*fH7QjGHq#NBg-C zvoI5rq*iVMFEtg-Z(`i*V6r~>lERu}JOS#pHpB|us!!E2n}4S{9rl$Ni^`h)awI&P zuymjVKhrnEnbp8WRQu-5NIv;w$ZIXP$ytqv%2BGg4B+1)Ck6Q)6l$h-4#c!#Gpdt# zdJrhSnrRcJ4~mo5ZSN37?ADZ*eErcEvg=#wE1-*>ywVe`|%S7r-t@{Le^%ey!eCsHUiEbFqs zIKoTyWv4EV8!IUb$)i^n!g+I^($K4|k&P9Wl;4-VHB7f@ZKjEtYKn)NzaDnhbYvv? z63M4pO{w?hK=?zJD5M~te{!kdXb2QcL+ty?Xk@lc;C&WTYJ5B|m_=k0_JlXK(OO-f z=z~XQ9*Kg7jyA?LszMEr!PvBH|2zV&A&?4tQ{Vty;o?sF7P7qnqqR;|EOg<7&#%>;@=w6&Uz^`>x1JF+F$aJg~b z;oX~qp`yB4cd>2EY{wIv_IgJjT2WB6z@A$gk}kR@-Q3hD)K?&_p>3>?esPfZi!GA9 zn_N!TMJJ~qLID`td-OPt8D`8~^|2ialYj>f}L+qT|qyBol{M1G=tu36%#^}s0q=OzQjZOFdD9+g}xk2Tt z3GLa(M%O&A0{j%m7kXM>HY!DxmaX*&QUlQn&7HTv4>?O>=;e()_C`YU`<}J0vZ%)r zt1%Ck#F$ChI2|^Eo7qq1t)X<)75r=`%h6yIodgyVQp`VMKi5B;Nia3fnt_WmZebR} zraD5rkG2^jkHz6J!dz!bhEao2ClD>%q-0ms_Oo-32Lj)U^?dBhTI_T3xbYU&sTsWL zEIq6hA@ef|&pCuaoDsuecRyp1%WjSPEX{`5&~7D=VX7rvsSXkb_2MDHW`98@WOKm& zI7H&%;)fvY{9YwAyts0Z2Y^ft8>k#u*#(6u?&tA{zI|TVRwPh58oA{=wxJ&J_iCPW z+nW}fJaBlo%bl-|`->ZZnUhFnKTq6XfmV4sK-6sE;Q4Q2RXyk}QK=4IxZ}4hkdeVM zYSZ^^Czm1WvL0*3w;*^wH3Iw0#1y_Qa^g!H-fYngd9@!hq05jqYB%@tD3i$4_9MA~ zhsfFg(&tmQxKe{1huY3SHEK!5K%guys5J`ek9are! z;uK{Hh}iouk=Zb|u?Hna9Hntagc8jRmQTKrYj3-B7M+GB|EuFmn+jDKz*7 zxxGQk{Y&I?<=Q+fsg=wyStpbGWuhK-N$8eGw0!*B=CHJ35v4JWk8Yc+WCZjEHHXWx z)nBu=(_w>XtOWN8-dbvPU3lq-KfNaLp~Wxml&G!r%D1CWfjYxhCvt!Ma4ptlA?^;1 z`x75eNpzubFpJ%h>O|Gi5=?G{_K5`5u2MnOPmb!PWuh#?kjaee+W4Xt9ln*Y5U_MA z$vpNmO|awfz-Mz@x9#MrSiw3gWqtQ?dNGag*Wo{B==%q^(A*v2cl^S6uASC65mSY) z+hMfzS8j#=YpH)jS-LwSPnzbPnjSyS%iyuu2*=gFw;c#F#Nt0DE}S9Rd79I-9qX6p zkW3*DG?YwqmqZat7b*5uvH8ar5TvkkLz z+#?I8X3Ka~4W473TvYS0BnhgwQ@7ouX2`(D++3aDnfCNKTS9_AG(X&whI1G5(I!R# zq>J6(oY*S6XJ@B9UqdR^rC~mYv-5fBuS;YC;B8}BM${LU0(pp$-j8a)(O7A5jeh_? zmS!oK4{DD3AS5F3wYITd6Yl=szAMLXbTfY~V?E=(X9WHVc^6|BodUBd4U*@V8WHcC ziL>3%J8GLV|8eEUmN~$w)P!QMMh8;Ks@i-3Xzy<9h9)BVegko5UG5n+c zhGP{>&wT#j%IhCTQF#bH)=JVj050 zwCysIbK*w4m=j_qN6+OR@pz`iEC}QGDrSmKu7Phc3i99>4ELyjc*@Rx|FtUh`1vOf zVI>_7Y}qA(syefM&?=(5;4y`iAm8j0+~4fYZFLt@*kL}D<#J(5BfY<$Vy00^4Yf71 ze5Xq0!wjynnZg-zPB#ax-LX`*IC3pzv7WX#;lUX{dtyyoI-9*4xF+c*#6we$iT1&= zuk3*DInbLL2I)1+6fkJmL?@khf-(&8a+pCg6rbHBMnS&k82nstCp>*7a{G;ICE370 z^1b`VsqQjMJMvc_o5|8MXrC6vk}QHg&>RJKlwmt&jNc_?f*2a+w_{Q1qsympSN#uj zrn4b=r1`DX?0g0#T{E*-crHR`d1VW*z|=oKTEn-#2#Z73zDGTcpw{t8Nk_thsWw9~ zyAj`DkrR$WVb{@An8a*#%4}7*4isF>k^!A3q=qvI*h;GmvUS=vh|!;I^1A->dQ9RS z9cfNR9WQjYFML%S_te%6kFxc+!;QM%$VTGEA~n&}&igbWD==pXrp3?NETPW!G!Z+= zR||*9bM}^%#sy?=g}&Y62(ON5)px}CrCAw|>u`tO?u)ezl#&nDulzzYZTY*dCfLi)SxisbQDcMUhSiZ_$NO-TzSbgSkGf+4--%g^^QJ zr{8QL0N?E*l!-Q0G(D8%TJc{Zw)ZMg-WY1zPint8jn%x^)2apst!)Sn?tTI*^L1+HAK?H2V&qo)*iL$i=!4KYJK0%sbosl%a_3TFBn4*q zP&u4F9*iJ9_&Q_80_$Yg(28C~MByO4`O1WnEeqjxJVno!4SDh z_N750HpzQ5T?EhdS6FA6y!C$RO!Vv3i@L=s7QSj+Ng=T*Qn456xy}pAu2APcD|c7T z&d>w*Zu^J0ctVM4(|w~B&zYW@nC=yvaNtHaz-rPQw2v7bEalEuz`Q_@?FEyV;sb6ombd!9*x(*DOdMq_^Prl*_!9+x|8Q``ko zZ3bNA=S04?!_K#49VtJ)U6~e>V8d@@J{2@o*2!2?CGND_r$XyYEl2bE%Of|u#v?0E zE6k$_T-A(pyfZ4_jtka7lEDuTw0iX@J21x~cRCqNrC6=gdO7$&c2h%tnwSVOg=^x0 zXxk!z>HOSI{!t}*J?~Hg6IC$uPaq4ciEd64KZ|+SEiR#3WA6ZAK%T$z_{OrA&bB~7 z%s`Sh!&D#rrzh<1*LaXv`j7S5xQ{bVbZOZhe3q$7KNqRz+dn4{0%=rd{PNad zEd{eJ1W%KdjN53{nhD`)(A88PpoK>3t7kqJzXm;zgmW8W-`!+vOgg}v71cxZyBWj@ z5y-KARfk7{jSDPiE)g zMK-#+Uo_0&e_N|nLAs!u8>~L|h#y*zaAK||P7hN^x38uK3(Sy+l4}@r+-T@UKJ*%B zSNab!!igD_KSU?u^5WDy7?2coTiFVYnF1{XJs^4WKrl; z7MQ%%;K}!?J$OE1!B#{OIe+yLutz7mOwI_)OMdV)2l?LRKMBAH_5>Lspa-@a4wWA6 zbe~d1ZrSn&J7?#P8;-l%qX<7yAJ87q=h>Ve%zsW<9}9sc9)lV{K2E6cu8$f6<&1Tn z31@J6n@T{pyZ1B}GDZhf`eeD80S&kCj3^IeTAe*@u!te8RlS|5LTmifU@ zcO3rl3DU4iSJ2`*1y+tAw#&M>}yV}5!*>pa-!soJ?VBhjAOF3(B`O~?Z};u_r_ zf}QOyH1p3t|5t!Fdoc;b+&!7GL#Z9Lv^s=6ECk^Yf}VWhPRS?t8+=J#wohLk<32_p zku-Ox_3u6uY4NQm!^)3zAKfr11-6ZXo$sZqAQMtFPetG?bYFlWqaR1_T3+?~^F zEs>dh?Ac!)ayl1*5MP@2-VK{fBe|Be=w51IXt_YR5=Vn`N^#*8)8W9(NRavx;25{ogk@;VH1we4*jB)!)Eq7?QV{+)LMXYPBZjyou zTqKeC(;P|sIdej9gH>LoLntk;jhFaHm3m5TfF8>u;2S)3A&17abF8~H=T_P%Iv}8z z)_C+GOvMQI+f7@3pJoxA-1TeF4(2kVjw_hc7V=bW`S#MUlz#)_0A9Qlc=b!ilFjGx zGJPkf8}RkRib9-*_bIxUCA?m5+?{KANhK^jtn+?RdAjr4snGuh(RFjWPNC*5L?RrHf<6eZJ44fv>5$(r# zJ12_gfnUMA`>DfusgdmH+hG+~=~&`mF7lNs0;f3G9JL$}DSVP_T-1(@l?*&dAPvBb z;G;P}eg^wcNAPMrmDfL()KW2nQ&Ze2Fj!v&FD4X+7+hp;87g2Y!*}wF9p>-mX3KyV zdYns)BMQY3=l%qGDUEk@yXwZGD4B-rZ_>BMYSJOVo_$?0@695CJZBAjHu#Zvzj zr46-HjPiXKn@~ZW>Uf>WwXm}M=)jc=Wl$__^S#J-EoqT$FIhazNA*O*pk$@d* zWir+^M7jHMKNfV@9Rp7g!>~wU!T&Z$9_9+zvX)2I3Ay#JKx#!p5NKOAdM+fHK<2U- z-i|TB`rO0quhIdW2ZicqDi{we7cS@vQJhOHx*Ix7^T&YItRo)6!Y#+t@dNFDivCfY zYfc_N33yLVL`&qf%Q74&>#1jY^f2I5J>NO~mQ|`{S|*JD#C&~ef_Ff&tI8Kz;6 zq3#u%_w6V8KH31N8GGq~BDl-1&_$-5%(C8fk}Pr3P3x=u`Bl zwO-P$HKa(MZ>c0f%I+eS4t+^L@mrJSw5iCmS z%&14``Ri&A3yak8ZT&gwC9Pn91mJNltqjixyEr=68h&k$P*V$@?1m`qInrOf896=S za4WUVAL~>Bm?4W0`mFN$f27Iz_&2XnB;yJkMbdhti_Kvpp>pZ( zzhM$hQE{;4oDs72;Tg2;(~!!Hc6LJmL2LNZ?Rj$NL?c~54d#Jx9WUcJz!J{B<4-sk zOd^{CvL5mYx!C$c)-wn9BHKf{Vlnb`zrkN88ompwxbX~trcjhDp_(-eRCDwf-MDN! zTb=a-g9vm|X}tFAv5%G3(+5_0L$)H7opvh*2}ga0thfxW9JF2*3Pr+4^lw^2fZQ&N zgY49Z57FoD%aTj8LHlAN(`@n$$}w>1+O0ujoo-0;xP< z!Mq`64Eva0f#y126K9$4 z zP1T~(9>`fQJR#bF&p>3CS8JR1biH4#PazQ-9er&0w@?BdNoBQ{wj+usjsn=q%R(wd zJ16x{3=`&$bLTDNuDAVLW9>-oU9CVgkXof2IR6DaG;{O?@NPx_Gg zEn?B^HE5#I<=VkQF~I6E?-H2ZZG`C=oO4UJIpQyy;SKWMA|-np&xY_dcc8FgxVo~k zqMJ8^E#{1y#X@#n>E7>~+@u*jxeSOULv+{@4s48YhOlwOyVMgW_K_X+@3eu1rRihS z1|{+RnZK6D-Hu{Hre1ks%NlxRAQFEo9Ix|XPB`i3T~~mxA7KM9;=Sx)9Q~kp0%pHA z0R4xDPRRIkfRm(;-%1F+&`Q>|8Z;mho>vck`(%W=hNn_@Fe>)=r;A=ArQZ1iDyS#v z!M{;>fup3VgB~C30O*;OkgxkIvmD|I4kw7;I{auIRl`Sp=;BEoRcV$V!dj0QyOPZA zU8=AQOTxK!IODgsardh*w_<(IR+}uitSKB!2PY!_mQe6dvGMtMCqHM^G*^#Pjxy1# z+meHNq&^amu}l6@K{2zG^qwbL$1zK4LIY@bA@x{4Sj^I&hT(b(&tRb``~OT<^TtNr zu9^vniiQL63DS3|9Gv(U+uN&2OB_tfwU)C5^E8PE$8H#z;g*=^wJ#S(_uyU2X)`u7 z9Q#$CpRZ?v8DEY^!dCWlGKr5@QBCI>78L=KSOfZvf$kd+FE8dDtJ>K8`_ZYxgt^EW zMHm*iE4BEmPQFuhE|%Mi?pcA?3~!(ItH)O+s5#_GO~l3cqd+)x#8^aWvt#-+UC>fP zc>(}$Ct7s@DkE|;W>CzgPHdmrgT3phY9{xaGL*yfb+P$Ea)&Wnx3@0fAni^8@+u)Ypec*uxq(u)hZ3cGC8V~<<=xV3K?!4z#$o(_5PCe$i5$oHP& z@bsEHzD$>edRPTD0MiD)NDz^F)_0!cei2MwrTKDXiNRu=z}RySE@Z4cQ~FAy}Z2^7JNP@T}vk^!~W_;@ek(t&emhCr3mE% z$WR^v%m}`{J~O9p!~jM>st%X7f%_qP(;xl`D~7 zc}H2~#bhf7OpEWjlm9vD2qatdD1XalDV4u$ZQsPLaM0}(GTGXNtM6z_ox#WMF@1&w z;dH3FcIq_xe|a2-z`AZM-W9%9ix-Bd&|BPQM11g-jfT;F>7tzY8Ejl`_7??!IY9tE zLxho&-LOo6#=DGxs-E$J4`Q=(veDRlMR}0|c=p8CrrfsKUW1S61!lhyD0fJk-r}qa zPn)?btw}`cY;0sZGf4a2*P?61ky-)b$$@`+?m$E)Zjk;Mh#xQ(&;nl1eJLz;PDw#~ zCNZIMapJtF7lJ>VaVn|_z>=H;h>dK&{irkA>|VQ7nd=~MWN`0!fCwTYfBz0>C~4{! zIUT{n%A*{t+nT1%yoozXb9zhM=t!Y4B7lnZlcN2znS-mcS1Z4hJQ9(uF9}Z^n&dvX zqNg&ikW*Hmfd+qTsHJV3zP|cNdd{P@p(903j%BvG=!|&q_0NBjGqt*=bCGky1tq5+ zy^Cu9uyPv~iC6E?^nO=A8LImj{;>>4UemQTp0TnKQ2sTJaayl02|sK0{)BnHSUPV2 zLUs>|kVWf2h4`WKL+tgQB}_n!3PMO(o0%ltD; zmo+@ICHBPZytn5iQxn_yH+I1yE$0C_&$hb1m^e|+sk`&7Bd+wKXwu}=-0(kZ&*_9~*ewn|T5S8)`rL)<0%ZQUF zr6)GpS6yc>`ax&=g!pZ@YHw-LK*XqVjl!9pyy^-rbgHU%J#Nh^eo+Kno_%_}&bj~M zK^4%!=>?WSfN*L7=q3IX$iiQDw9MN&;vC>#Tyn3TO6QyeG@B*W6ygznZ6>f$aA&zl2^U#U%Fz~zoX!!0lUc}TzB#|8Uv+x!JHRC#2tmO!sw)M z2|s{TIwK)=De#=sMgE=WY!YL!KTTd3yu( z*3l-n_1bKTGEq3(Jk;>U%{D-#-0Xk!FA z-ZSn>oL53fFl8?Lc?nY&FCq$B;{48N8;lWLu_A861@QLI!DMLr>yO(kI>3EgJ6f&< zj;uCi_Unmh9sfC$FP2f>$wGs8eP$>hpSs5J*3=B&#JlxGA8(naa0{!&r;Z zKHsN*B`ucZ=Rxr%>+lFF1?HwB$M0jd^M%U0ln#S=g*6(gBN?o>t1B>ALn!?pO9SpG zg9`Zl!IjKT_JC32@kabA^5T`0K}ym<%ve1V%QeskL*xNrL%&js)xaRX%@Q1Bi|CKn z7IVu$C&B`VPonP$VwFEl;|zDoE=^LR`lvv+&8sH~3d=2E94R6eh476pYUV0F-goQJ!;#Io!A_9RsIFH z6NAUrGsRHHlA%^W9>)_AB4qN`VnW{6$=c^E(d$dwbs&U=?Ql0vVQEwHgMEM8vcHw5 zOGN%Zg=~w^b82ayIlBPyyb=qrk@le;n|3qJC2kyH?S_Z>oQW<`ah05fWal6+pce$% z>Zl1&+OJ~#{cT1ZeZN?8(|I1mhOi0uafE_Rem9yG}0oEDPj&My4~YmMKDWB=?n#JI2A-8ie!m% z|4CO-9+-~;sLgp^8vkw0I3*(Enc`b5x8`eo$s67?K1-+BPUtSxkJEe;dTSut#@7FE z!9L@!zERp@j-Fq(GB$T0o?LI#JesS!IBOEb1%LA-pwU_L{qo9BL=;eOKXH;qu?J&m z&CyJsSLHZmGD`VOaAT2(55JcO)5Q@Q19=aL0yBOn>F?@z2~)T7yyFPuV62nKG|s%M z$BD2n;{LCs$&vJ9H?RCJ@OYZzGN?Zw5rBs!N2sjAL=>(OnPpZ3fHn0g0Wc@#Gben9 zp_K~G&(s!5I$%xF8;pblLasU5X`utP&~67Q0cjl(TrI1%xdp_}y9l>`S&Fs#KTjYS z+9Y)z`uxX$_7)RGro*#?gxFH+;Z(FWG)BFE#Yr)2g+DF+7=hh{#DK?k-sy^5-ad=J<$i&#jHLM$tp@*z=s^I)??Nkj4 z>q-6s4cG;8e5;|wB^%{&0N`(U!%mpzh*FTzqX^3t%S@npEh8Iy*r~J4$UN-z{m-Ik zE?eoH;N&aH?Dqn%YRdhcmtBV8fY|Ao;>|9eZ%s9_S)92-PFrv35;9}*1tw@scNfk< z5PqkCX0*6re)Y^!GsFeG`y(0rR7g;Ub2xCmxRI zaTG}U#Fx44*iGK5i^LQEA0A=*2;SW23UYV`a?6E&V{|mn%?^1X-?3MXi+1Zqh#Xwgs~xmfL-)xh)vL`)JXZ8nh&QCmq08SZ;x>Fr(esw zvf4>7ZOs=e(RQe#gJpIegSneu@~z2X>cJm6$SbPER&ZBqkS z8%f6YD-9q`{i zy8vbahXb!r4B+wtr@E&ym8=R0^Xf&wdh++%xoh42{sn%iqwY)jFUz0M8&dkS~eN{Sp3~N;{)SDwByH&^94wG0G z(EdR+8GuG}I_H+I2cLbgktbd1qQC6Y=Y)bc#~ehz0&uw6S=I?{zlM3KuPPGbGs;@y zFw+s7|J4J=-|Zb_wfAuM+o)X$MhW|!p#&AVzo$N&zSIp<$=1NNy$p&*RU|={BHsPT zg5>smu`HgH=OH_s2UrlCC12fkU{8zA7b zNr#sPdn17*iWCn~Jvyf)N6doeU4^)K5M%G_EuOT+1nWO5Flt0z|9pvFxrn@FAQ%bi zIz-nUwf-dFZe&HE+w3oWfRU_4bjQTHcUw7B`vMJ?EqF)O`;)R#s-B=~Ec3S!& zWX!(8tud)w>ZzP`KMb|#%MXn zA$gSnIl`PgiJ82^VjkY4t{Cry1$xX1b4%ui2y6q_Af-@I|Qxegl4^84dn24sV# zbQ%}^M^R-)#1kpyb@piAg-_z2Wd#^ghNjT}c=aTpt_~Q}p*X-h#nJR&ov%!asr4G( zBv1OB{pMYL*{c9&M2;SHBzR2Yunb9xdaFs|wXLuXRVt$@G~#rmqCoywX2sbC6}n5|u5 ztOYbdOnhXu3nPq;WDpU##`>aVwGz7}@p9Mc{X?Pd2W>C&CbjiEfvV2Mui=}b{uE75 zX7?p`EQP^0F{T3YhLqgM8zumYW)t@r#Ymi|i)f5&ifFvFJIcK_SH3w*GX7W+@ zHC zmw>YCv^_NXMmdj=Wv78DySNprr&qkqth&v!sZ+o>=3cT{Pyj?Qx#DNsq#0(=^hLK=?R=;;nBsqz6cwcahDD%W70q^iGDnuFdJ8*L}x=7osxze9b6d>>aWzz1|lwjkH` zUGK2YR1y1ttHTH`gk3yQVRH{XF+QjG?W1A8(-4=O;*GpuT^e;(Gpz-raFxc_d@PEmQ)Hd2TDtVkove39td42Zocqt(demfEJJ5eLQ2vkSgq2`6?4DwsF4&X!FeB?)DwHwJAY z=OeTbf0-TCI6*MNny9dhg{ODbMDXFS8nEkze16M>7I<46$U-bD$xfx7FNuj&(*CFr)*xM5CT*yf?*b_*clIz z8&XZ7C&)qOviKU8P*kuGa^MBUJgVE5<(=z6jmSOyfmAAjS7JX4U z&c-{}7usMg1VK{&!)@-@g)wk1T?4rO6Y-@So}89y^DXVaUVn9d+2wR#FL_(DkzERL z_A_fZZq`5V(8*Slu>Tq3!!G4jeyzX+^2-999vu* z5-}zdi2*DfPFSe+AWn(_rB_Ili8psR$i)-_*PZriFZyGdJCXxti-gvM%#jjBL7`zW zQXc94cD>|N8FYK zz`T2nb8zpMA-VniL%&_2N?6*RG!%8v4{5=8TYbVzyE1h^ZB8`FR-#OFgU%`pzI~Ft zop7|Padm|r9jLALKt*7dz!MZKqWgHnD-KNWQ1cvf$3`O7Z8_2G{}Wn?O6e-D^0%&T=KB@t$@bc7Z?J47%wulfd|n0!e#b) zcWfV5{JsQilc9rzUQc0V0yfti4yOEP&Gp`Te)XfwY?zvXT~brX_@cY_FA(qHh}%V& zB`l%NvOS7)-CJi9z!j@TSO@(a-E1~Pm+*Prfui><4G|Am{bX`#X17U4Fw3D1b`WcJ zCF?)=`yw~;BiUzCYY1NvsP-h*O~-BEd9^qdjXJY9<)wOYvB>>Wcn@pRv&-^<+TKV5iOJ%W)=Fm2jAS1e1< zHb~;95vfII6u0GwHVO=nkvXq2hSa1{Z)veh+|y&@=ct15Te}Y0kFX1Pnx_cP#tvh( zgg648|1>gEKadcyrFE?C5yW=YaS$W?V`Lg`KI~;U!iUEnrPOy2K&r;4nW4QeJ7d<( zANtbiIpD+Dp2KPj7U>?8BRwpLQhIn*U)t{${{Af@r)~BXiv>{^HwZ0#{6E64{j%Pv z*>uH8Ka8nblX_@Z?PEABj&3}g{Lk4%&%JV!n%@iD@LsLF zQ3fZzPo6uWJh&{>i{rT<&p2Lqapn?-dXX#m{=YY{D1k+)E5DA6KzkdlLqM%t@XxLZ zx&S^P>U!;O-ybiiaX$$Mo+%aK&>6YmHce479%VQD>!g;p`ZJl7Lb|M6jBDw7<9&Jl zeMplLQcgDC!d{Ws@nd2YpIoND=L5UM07VtESIJkPDx#?M2nwe^Ml!8{(P8gM$VdT- zw)>?V79&$XkB(oUd(%&$5}bL~Rt{uj!p&$3un!S-fi^vfVvAFPs%zv&v4k#AjEU6c z*4U(BAl{reS1mON^53Fp<%H|(>J2Og1T7FCBoe{iJs!T6$0CF+?P0> zT5RcZyE~Vf(P3eNIU!YzI3!mkQ*d6t@0z}pcrjfsIOO?*@x*YCe3@R#79u)mrE4rP zQfk?TqY(n1rdj(U@^lXdqrFCte$yG)4YKaWY*82xqd@QWEl~U1>Y9ZEAl+#i%%YZ8 z>-@H4t%RwbD&_DLO|2zK`yg}Sx};onmjRj)3(ei)@m;FhX;qzHAJWzew9PQDZGdzY zX!8^#4GJFccu{+rP}Lg`{nf=UCZ#v6Y^>d@d{SvKYtL>`-Cx9wKPsglGKr_%GJNDd zv;fmANBL%zid7ZQ;O~C%9RL=1%bK7*vJRyu;=jd`hx+*;2ppRpPxrDnU~<wwT3M0?u;Y9>bOS&dA$UJH(flvszK zJK)iVjaarJZ25!5>~RCBhQ56jalSt&RAx)HlK_>{mcSOQW`;J0RT1l-LE(XP!wTUDXC$*u z&^B^?jV=R7$S2xlEP6V6Bb^mHiB5J}v-6@vBs{WfydQwWP1weN{+nJC?bVUBWW+x; zqJQ5GbjPr*5c491H;gOm37>SG>xs*KCM`nFT}^)(a^5eA1e&(w2&r$m9N6t2;-zVB z_PXCkWKQll#O;Y*$z*>5_Z`9kqASWwZ!5<-%jK!BjBS&-3?BSJreFV1QIZaX6WwgrYP_D zFv>)orCm&ou{c21j{C^1`zHdA51}DEX-!^jwWJL_Qi-@W^Hpoj1;{BCz4e6E|SCoJraV( zATK<`s`N8EXPEJ(+tspUK3NY%D!PrHlvZKJ5(vd(PTK5D%aUb6@2$1>ErngDNontt zDIkLDVY4r@>eBkjRxnaN24b{T!+)MMBt>Swr2rSUU*HGO9Oo07SP_y-Sb_vqBGK6c z_zXNZe4tu0pfdmJD&Jn3Yg9+82R_84?4nMWmI5#-@I2If?x&h#qHW4Q^m(YnBC^q` z0nLudor<*qtKWh|bM$Nqp%lrq59vjw!tjh_-Du7j{Jt|teYVG2qcNS|l&WP#fwfK; z@m_DgDhpkH8q+~q>*{wp22%dbAR|}shzUnNsPhHZP{%VQZ zCq^Ol%7Yr_5BL*yX#M-TMm3SlUW65TzOX)%U~^CQH%7o0omz}C2|uxUOiuGc~?(Nj>%^*yQz-Ix5lkG6G1 zGzK;Xa%L4t_9;YnnI4va!5n(9UFPnzQe3^Oi9BI|(_F zB$q8nVP0e*>_Le+(O@dB!v$s+n;*Yq)SKG2%+E^<74$sE@jkUsx4E5j;-{Y98PwQp zkXn=ZmwS)XIqJf!b?R;Fx|*)mbE9KHHq&1;xDR<9K5g%>^#M=P#E4)eXTVz;4A$;p z%6C$`jurn$6I_+2B33roVSd=eNY*TUENlvUL&-)$;Y^zu_V99~!c}?o)>oRd%?s|WYggt4rZhyx7q!Kng=Sn==xO_rfUB`wph#_Q%yKgX!m@V2MxaeID?Qog zzNkK9*7!P2o3YP?>A}qUQ4?_m9ogG_K9m5imTN3T{((=O;-};2Nz9`kf`0Tu66-9s zj#GqbUYKzCgmWSBU5(9e5*Xu#T2?R}BnW+w^As!zWSE#6SW{&vM$q+NsE3e2eQCB* z%R^)FXdfv*(I23jQIX8nA%0;HEU*hTH!`+oY|?V5j~_H?yYXNLpTs)g81|AT>MEL; zG$LyY5R#A^hzpJv4K*_FOJuV@}BPrLv|VDEg`#S~J8>eZ=^4hmDZ=>?=gG2|zf0;nXW#SX(w05R}1HFLdll zTDxo%kHUek+Yliel+0nxyVpj42T0`mW8mUy5+Ax+&9H(q3UncOh|z>q8H>1SrT(_- z0k2uG!rJN5*j_;niKd@Adjw>h>%a&QLy%PJ+SF`A+O^8YVd^WLx!E3UgG_d9F5sL_ zZFP==^>}DWei2iP;TycTDeo&wxGRkPjRNW(bUHV6>Wl+5?du>Nj>xL8g8HKEv9W&} zL9z@Hjmx*b>ExXO9YfEh_5UU)LO|REx;(WIS~kEkSQj;qv~2>~V0h;3d3HP=hk?~F z1cfIpAI+nLZN{WwTp1kxLQnuNwnPwOQD39)Pf?73)8d|QN^y#Cr-XjA+qXW!Y>G3P z>e7;~w8>r#@rWF0JWhIPWnhiJMn3uo0pZO~_5K}6S7ta{L-CIPSp+lEky$t}OSou0hOvdUD|I19Dp~cA! zN=P%$nuuP9sX;h#5??am!p{X1I>WfC3&SyKK@C9kfDZ<`{davNBLm z9)ZDe==MvUYMrPD!Z7ufrJP`3g$flv{`v^C-^U)-m-!$oL+w6oP2|}MMoMUS5wu7O3-0#(DGWq|T|$ zRBprBskYyO?Ld7<_&E0sLN;mF)B~9n>Z8gvHtLiJepp`BdsK~~z-8zw^%Z)dChC%mTRyuhwmQ_H1JAiKwT1-%<3=yA!RFLs1Mwy(HNt zYB+T@L1`asMJmDWaAr;Q{f;WDDdW`GfCJjJ?0t!{>^qf&5tyrWCD`stRm?UupX?wO z%M6qx#kOBU>N$x66wuh|&d;U_I3x~?#S?Gaiye94$T<>zp z8#Ie~N3r1v{PEKvLh5KuLMaYc?F){fHdTaE5)o`Ve7vYsLKD66;WRz4EKEp1`Im+n zF*(lXxORCco|Mcw;FR8AA*yDETH&>ibjP=13C~JJ^pZV=os^DBWMPeWFFHe`GU{Mp!I5;H^Zbl%PW-pcp3^KjWGJ=FSkf!k&80?N^ zP$XMErG!S(P`Wln=sH}tp#p&HdW-kQvAPvoz!v-`LdT%Ck;cw`NLt#UR3l++?Alt- zMV^vz?d7~n@Ip=)*h718A}l&Iym99dg3bN|rM>&?`@24K9Ly~}!{7NL+y}Nm9AjJn zF&pnV+7-}<7|W`WTXaXf$sO1oTDLtQdH^GV$5}xK3o|;Z>V{41H_*+fZsSVl5Q~K; z&YRMR}etVd`; zJ@3Zv9<@nOhU1>T8uuQebK93hk&hG;s(RD;;nMQ-CA?mhfxkNk7xFxD2nR38sM)<{ z*vqj()&4&BjSXYc{}54IAN!(j)nse@F&DBShZW_y{0dF>+>Iy;waYEOhNmBae4~>? zeOg1F3>O^5H&N-5nRYKb6&crYgBd>5*q9*S5;?trlb7J+;a0EA3g8T`IO^4nN-v=u zr;6mDk;tE{FYJv@&cO96CCJ78dY^=~ z;LuT?^n}<(Y2y1h{Y-f6kFHCP+@zW)=r{HHSNuMh#aDhri@jZ&aH*@1}lS zu9^Tna}8d$4tjk)7D3u9y<>efdy^Uq>=Sh<#^B}V!<5V6ugp!pgs@$MlfYoCT*y?iByhA-UjBuH%#Qi+t&da`~-LBlq_TdzQcYOo{^ zpE02k=|r_!x{mPAVX@g%ew&zJ4hs3|I-4g1mTxA1ERC@r2dQk0t68}OG0y=)Ym%8% zY-Sb<3H<|y8yArI1wk_)FciE{P>Zcrw|ufbP0BR@a7m`47dTJ8xk{rQxrh$&s8ujn zu{>?gVe)qiGwBjDptCsDqDSUN(Yrng^;=r1sv?Wjl22?kSZWxw*Cv)U$}LSm*O>k$ zlo39XVIZ`-ZSyxv<0T=7#-4uw8%Jf57>^%Tw5gJ~QM5-df4M|nlUE%&o`QgYTB@1M z2~@BH$wBWY zUyz9QLnpPMnQY5_Q>a%5*_q@C!gW*Vb)vg0S%&JsF;%xIb|75mLX?eLOfl99XyNmnX*_e zppb`C(i^&n{|MhL(q<|4mZXvx39==Mx`?zhoN?0ybc`z}S+aAEUIKC&JNxt7C;5s)`}qN=T${o8rdi(WfFT|EFA zx)1f=?7^BwN)UKU?D9wVGEdI{lU1}~=n;}{l9e0*bY9(D!ZJ%gc4G!qVA#0!#Eyh$_8*5+wx zijYo-v-WsFG+q^>?W+-4Z%KLlj>%AJccw_X*&;WdnW7#_sgfP3ZxvflO>^t0`Ht!4 zmGitjQ;s`q(=e+fGV7C8rQQ=W$p*OURf-dIeDQ8^@T_=be49dQ`-U_keba-uP*;7L zDHCHl^3ir-BPS($SI`7Th7&5}J28$x?P{8s_Ir5>lb8pc*mT`q*wS01RW44d81M-`{1sYc9DJZ^iC@&)fWM#8NUmqTs329 ziimnEpayViZiv$#X`9wDU{}YTYuca!NQ8TEK#)dCRuK{Tvf#i)gF$`<>9$uQW01=( zd#F{w+s+**WAMXfwgusle}8{Hi&?A0PjF>v;AcEMmBzdi0o7$@R>AJ%p&38L1+ zugQ=@SMVn223p|jam0M*Zd2|g%S`T+)m6{QKuAm+ASb=q<)IbDa3vjf=+d2f9?7wW z+#6V?o)%>w466|p>6H7}(9d>yd>!Ob<31=n7|5tyae3kOKyG=*z`J*h&jjs3r+Y>a zWNlfd+5>{B-aB?3slm%lmefdWlj`!33E_PD50gZ+cHu@YNz5tR7FQY#5R`|M=q4V^ zAg}uy_NS!H(Vy;V_=*~O!AWPm3yk?Y)%s0&EatJJ;%XX-!W+6EOwV&#SdXPqt$?k# zYOZ@nZ$UaT8>t8+J=mn7P5^xZqrm$pkEp1@+6_=46`K6_m)Gk}3F$p1g(6Z+l-3Cr zkVt}dh1tZw-G1r5PRUq)=`>TLqQj?W3^lE2v>wJ9tDWy>-xSa*d(uKE02`V>1M8vM zVAIbblxJVHeJ4B8y4WGm+u1eL8-w7%za5=+8E>dxL0GkXm4PwCKU6WJ4)r`U`b>78 zyj?3MZa3Cb2QaFgMAs^lZ;IA{o!nou$w?_<*6^C;9&=7qy>~Gg5QI-Jk|9DyyoDP{ zs9_dgtEeZ2O|bL*S(!d>ZfD$b3az>mC68u`XX^@eIy}vt84MEP1c@d=oRL-Kl+Qrh?W_5*`stwFazxI$nV*0(Fv#$`KfC8L~Zcb?oI# zTmX&1(vjjdKhiTDZN5-vF;yA;rP+DL$ESd-^SgM(_S)Q%TV4{gt_an~9AN(6t!vY7 zJ*0gR`o^dUZ8T6*LI2Dkdn&b^9(L{#Jf!JJ0pq@DfGeMrCInZ4|1LC(w7?*XSFl4ND_@j@WJJ|CL7Nms%xD#${?g zwZ!}tT<7XOeVwLZ+>QQnmaq5|&V=8oTe;?aXA@FK-q<2gHwE`8ofHAA;|;78zHP+| zhDYOixeTwnYaV^Dsav1Ag%a>iWt86C=Kz13t|^MmjJBX^AV2)>seI@tC^NB4&HgpL z&br)Q)c1SRNJ7z=>84KzL}kftT~--CjlBBfqkk@lNR*Z1h5#5F-BA%ms=>MEe;tIz zl7*-w6N_`c6R+P6Mv9-^V5u~&k-q45=>W%=b{Y`s%-XbrZl8{4*R z+cs8gtk_wxZQHhOTPwD0+sVnUzg68j``p*j=G$!j8-329P?b$s$;k+ubh(tpoU&BW zjfW(}VIhE1r9#PI4zs(0M&a-(<^VsC{r#d?&!yK4=4cOz(XcuRR zn8%xPbN$rowfP0)DYrW9tqJi7u~0#O@!2g?*mtYDgo#^*5BStH|D}h~#U?^tD$%_M z?>XcgZw%-4@rz`syv{vVs#)&d$EUE+h16FpMO_L-=i0g{e7UP!O#FHC+vHH2P~ue% zaPLQHcXs$#u+n-p9Mx0?%!`Z;V7>00@8&AYkd^J#?lJ>^g0{?13Lok{>+G7m2FE)8 zgo_V#ti+v;PY|_eE48vGb`ErrcUOFZZm&Qv;$Jzg^Vc8{Kc%@#V-Z)jkB+rQa-;H| z_;?a*xpe{hulk^uOQ>Xfzr(Ec%3tgojHISp^h;p$%iCBxNNv32HEqr@IDurRYzh(I z=Hq~ty)G%_{$AKC=2ZkpTJ4A-8WQw@o~m!9Q`e$5Ma@Lq;JPmCMR;3Y8TdvuS{g$* z>U>r>fZ*gj#&-Qi6Z*;3pi>zlc%iA3gzde*Gf8H?&G#bB1x_y&Tg$Z-z!itC$yYa$nUhuQCtT(KdOe8r0|Q;O{a>~i5YS)zk&0Miitj4HYG*;JED)G73#YR zXmBjm%^L)ks(D6=f(D5B-e_uu(wQM0a{0@NWEGzzsY{95XDf^3fw>|hr|IVp!mLAe z$Ef{p&nt%%9~u4Cn9?yfJ*ted?pHBP3Lc z8`_0wV+>88%3P9YOyN{Qp-y(z3XX;kFO2d+pu|;9#XDBAf__ZR7Iq!p1cz_P(HaNA z#H^5J=&gbqaAWYLbc6PW>|aFjZI>InfIvF$s@@ve<)LByNjPJk@-LSJ*S$TDc;^U* zQL*#9G_A|5Qr{b@_PNhN;)lhC(^EC`=?=xw3PWOHJi(%bXsU*&q$aEQpeylM6)#-!U{hY>P9i7%2M)f<7 z$cF{eQ1%L;d0m?b#XJSSUY&s^^Fv`RVl@x0TdK6^G_$iBQMjGNM9e@$h z&6<$n0Jv}_t5P@h(naV43=b}IXVQdAwrsj9n!_EMz`Yn>IP@5#$25n$f^;=)uOfC* zEQ<;;SbXGBz&ew7CbiAHz7(PZvYvO~Q7Rih58kG4p(6A6=em7n;`@{O8U9&Hb2UCI zLh9EG=lU&?@g-$FO%r?%*<=TRRDKV_y6>awA%6pNY^>VJsQe+F`e=p2@l{FcS$X>4 zZhbcX$Hxh5fqL6u$~^!8pn+gIiIook;KW&0L&l@@nruskI`7Vs+8C`9hZ)3Eis2py z=A0*18P;ZtbvpC5YfdimP}&{-dK8)dI=(uz_vU=RAIN}fd(Lqdu9`Pak*&?^A;J<1 z+$kdg1ivG$x|he$ic9XdAPknbtv@~G6Gio^Jnuy~Zgg}9scVkJ6%+j6l5EM!Vx>zX z&?&}Dc)jK$l_%5U*kGkce5NzsQZ1FQRK(;qW|k?*A~A2+6HNQZH_oGO#iREo`_la~ zPs4yd>GC>l0{=Ss1Yf>n+w>HdJ;Kcxwkok)AJcIcojxFH@f)t8!dtNFq`46~;{YjY zBh1Ei#!3=4cis_VbqzV-o0069DwvcNeKL7wQ008mM{{#TD4IG7X{uQ7g{3r0+;=clb9)js6tPn!b zBB!Of__fe`y-hv%Xud9S{9{!$j4uMLzsD|5TdN2UEzmy&Pnk@qD3}r33{-5?=ElOp zN0E{0x|4WXdLj~ALZsS^X02KiKXqhr_Vzd!sGMz+dl9w6yJEMo<6!9%#7L+yaqa#< zjjdCLk!xhtd&B>!#FVZ9XLweg7RIRI##y+^48MT>q=t*U9$bS%-3#TfZg7gX2IGA9 z5eKbni*7UnU=IP|8YJSK>0u2rE;k)}#iP}cN58d6>NLSvbMP7_s%zHp0ixyXdEKRU{P6E z9RM_#VEDyfPT9ac4T5UV>_*Mh_*CRg)ELg(z^zZ>O5wfp zVUSEk3cl`nfMfa$fG_Piz2Mu9_J99Ak%E(Yxl^IFF*<)!@?$F(&}M58RPtrRM~yes zxC>S~OnU=n0P3go-)m;FxJnYM-2QM05qejqR)o>wl9GFICi7dBUae)rlGjYd>RGn; znK2Ko+zl89Bto5bQJMa0du6H0Np$apB3!k`S>^hsUBPH=ZhSswX|suF?26nc^$o_w z7+a|SuB19vK`MfcXLk?^`uL{ra5p{M7UW%k@UTchJ>B(7+s+*(YW7Mtom(9+Ir;|m zmIbPyUiTV3izi9EiY2%9n-|=~47E=;-rQ$D}Aq~tBZR|PZv2(%vc@QHTGO0{Q^q6o&>ROwpPf^b*_$8 zX@r!rn|zit=dqWslbyuvT6(!#1zzRi9Q)&g%AzP{w^PorTVOb9{tK=8IOr2Ko{$M9Dd7NB>IV9tjnCph6mA(nC3Eci5Ft z&dbZqKkmt!ZCk0hdwWn`h_L{V>Ea9?T|$OJS+NXKqg~tV7Y$C5XuO8(wT9c3$jsOH zL_NH^ExVf2S|e+%lf|PmjaAcGnx4?_~YFhqt*sJQ3V8RIWQA<6 zC*hy^#*0KWHHkO$P}07o>8=?lHgt|$SFe25p2+gL9Fz7&aBQN*4lby(7%UEN9H<+@ ziF8GRzwR=(!Ak+jQqh2uZp&jCI-j|Fuy%H+*Du-Bak(ZPm@^%F+_Eh#FK<$M!RrS^S@pf3Z|Bi9PQWrwS%+Dp!Wt-G z{7IJ%7M3r?`-kCE1xWn(1%Y7W(BcRJg%g+PGozHK?#F8x@K` zTK=38LG+MQpbt5ae_{;00g`^J%1qpa3J5`3J>vk{g3x9LN3Znp<_@oRw|V7j)~WXL zqh*4AG=@+!rpd4SY?_p=b&rv0y9n?)WZ!aI$Gn5>dtf1R*Ut8d+ zV(;th+q3Q4C)GxJ$-siY*`w z$99TF+Ni2hr!fg&V+1F*u1VJ!+%qUSroAmo%?v70VAF!PL2P_0Xac05vD^>_vqj7zF zomP*NWGq8;4geMU5mavDIFUJ#0R*HOG{t#u!!?nu#N3K#4JVy0e6sQfy zZ!)kENOOCwKgAlBWvKhsCSHP=KZ5`kL*nDqI_3X~tJ-Gf<(8XBZ=W{mC zzk6YAyWivkh+7TkhUvL1Xh0d>f23zyPj|mn;39f$^09t*o*6e+d$PptOHLv@HnJDk zAH-S~zse$7oV!`i_Oq@CKSF|lty2a1ay%2Yt^i0v+8%@K$Qy_JwCfRIpTe%J-HRIlX0flt@_c(cT2H5UD=8(H?25(A+p%o8MQ6@eqVaruMvsv zQ~y)Lg(9*5xBpV7>CWP!A7ZS#d0U4=Xp$;k}& z`AxZXRW>UIAB}AynP@6l7j~5SP(VxRIN-1jm48FAp%ts%YDdcb5RH_1G0#3NAbgy_ z&i8yhUWe_y|AN%)59&ZOZIY^u#qwwXEwZqeGtX%aiMy6Ry(1h?AbWetZ!@rc>htru ziy%WU=|{a1ix_xhWO-_yH7eU2*((yER=HnpP^}^7EI1ghU%Hot&J*2kmPfTjrR+ex z=ayFoe#VZRqTit=O(H}EcTN$vx0zJak$Vfdl8pb-Rt}6&qO#2Yt*so|e`#yLSO5qg z004|pFoWqwTV+hC;NJic7+D$)_=8z<{-Hsm8Y>eGdJH?C@ z?yh}ke6ifog`oz%XItfwsB$*5l-f75`2%dz3gW%tLzhu^F^B>&>05>1(_FuMl1o(W zol-8^`g4j`!y7f<9Y$$ch$c}rS22(LAqhbJg7wZ(*URH(;cwTO-v$veqvVq~wKj+H zP8a4s3???`*a%k&)~balS(2A2YnT|0^a!Q?X%0Wj6Mzp;7me2Qi!#nN=aCNHehTO^ zkx9FQm1@^8UZlsP&D3I(4>-E&J{Y1gLj16+~?-;5s_1UkNchd!IHz$ zZ+>J_7fO(3)_S@N?yoa6ELsinK1-3-O1}Ju2f&J7kYCO5lHW9mtyh zdqP?EcBA03)$EVU-<^&o%&*D!7(qNrG;TbO)E+1Ir|)NUo=Bd^)t@(cAP9?ZP&>kv z*kls^6a_;`7_CQ?bqk)yKT3e1Hu?|vqc<`6V5gu2Qmm-qOeS^-^t4+whX-tm+muj^XD5A; zCgmOcT_wWwZ5?_LkHN+qsOv1ZW=oe$*fCHzcQXx+oUjd?War+;ij5iFgQi0TfnTHL zdu-UI@nPllS&4|9#7nHNI2#N zxaT>+)?gT-o=}bpm(c>k&~ugA;lIC?0tTY?qHAGOWSq^dYJ3n^G6W%(1ppeF2PB>T zK$w9Q1L&V|g}FjcoX7QaJmxdN@_X`Akarbd2I2rqVA%VIrmI+r|RIKqHzCMN|?(S|6Ghr-HqWN|z^>zk%t^e4!=q#qH3 z9+wmsyUT&>a*1Fk%z-$C-diAw&hRhP(brL!*{8MU)MgO9-A9$igc;QWL*!$F6x<3%C6Bm`hJ#aGr^)Cs3Kwy3x9K$TPJUXt9 z)c`tGW8t!JWR%#Y<%P~B3UHc>@B-f)--dc*X?;OgyraQ)l}n4%?!1oiWj~iw&=uj( zdGhz4W~0T}Wz{pRK%1T;_W3Cv!KS;7qD~ha-gUBXzoQQt*zRu@1AKhSIPf8el2*4i zxep~NG;FxO*Lh+%8eqK>NxIFmL4LnmAZ$f{NTf1j^CqHr;-mLwsx$!BBmHR9b$^P) zYZ9tg{-X@CFQWz^DvFb`bY|k@VN_<2paLbx>l)j3>=VKTWbE>*#Mcc>1VN#NHBb5C2Z@L`&{}{QS|8^jyf*H>LPBxOOAuSrN+>#E=apDt_U7=`MFMGzDlqt^n z@N8U+p=yx`MA`+czGu;@qsGQ@i63?F*;r?VQ z%MEEAACFIQbz)D@x7&(uXYfkN`pwQZVm`m9yU$Vk`YQ_5OSnF{G3WGh zU|pG1iMeYZ0WZ_y_M8=xJV(Shs=EgtYwx&q@1=)2mr;tS!>4an)$%jG(69P(K=F1L zxIc!^q7r8YwB+ofmdksk6JtfRI3y`=N{Mdxrg4$-j0a}lPX>2T28=^ZH4Irao7+t^ zeIoAR!v1*2h=ylW-%^0@=9GoNZ6o`@2 zAm=G5L`dl;KM|Gt2Rk)hTb|!5L*3;^c1-wMPA{{pdyU0(UaY=_miVqM6{L>?oTS<>2h`4Nue7g)b_A_V z`c9iovwL}HB=n%(N-nrRAOp){bSUOXk2!Pgy}Maf6MRXQn*YdGJ?f|RliW#lK6Geg zSQSNPZg-)eOti}(g&e!s+v8xdD6`-ROM0c3RA zIgR~!es^L2zGU6%Vimc3MQxhq}Nxq7^xn}Bq$*D_{n!? zPGDO42NRMQAX;U=ADSTx<|O``aXQ|mWa0qY0{=j+;XjZg_$Scx-;e{o7R+e>H{^hA zu(FudYl#E`!K17003#XDcNLHI(Yi18OvXvDO5u;|c2KNF-$JX=)mowaE<4LJT2NIv z5c%*GSDi5YyTJIM_Rkp_(*>3>XH{_28xgWpY{)7nhR4Dwq?(|O;Dbk>Zyl$#KgEW5 zdPS$rcBGS~cm+8Q!g@PjJZ2g(QqcuRmy51a;S`lX+HBI6UI$sOiAPgxRI+&q3-kUe z54@tSBQ8Ij2)?m8_23b6D3&?o80b!+gvnrX{-NN7y>NVPhLZ8iH@yl2@eI`@AhQPX z#6FOWJp*h4V54CK-0PbwZkj&;0&hd}M@9o0do_2=dEuZMu z5{r&>wpA7WO?&A9r!Isq*UY%YIqDj2jvaD&b!Yk+7Imd4|5R47De;8xb4TVz zUaWrv92XTxfUWlEB2yq!@oDVF?5*uxOSx_1L)jjjghqM>BVG7&t?mz%F%2^0==f9L zGEW}JgulTGN9c@gz$kD4aVUpch`RDy9h24gXqy0#`a3el0lg=eqp`s95Q4J}^^h)H1}spkv!b&#}K3C!1WixeNmc3I&9#2UyCM(;Ab-vIm| zfGunu$miuo%h>*M)1OHgs&1r3KAsK`8HcG*7oKhejJDn2GdJS>;7?RBh!X3CaJ|-~ zOO4m!U~=Baz7Y_VRbb*8<$EJ&ng=ghiWJ%t`b^j_wPzWw22dUiSx}CV#jVP!-n6>; z^F%^)!i_Gp!QUrQIkZ1J&01WT$yP_K+Zs7bu?zU*^wR@AU9C|eIY-Lf&}i5bf7y30hra49&HQs-j?5bn)!V&*4$G(o~JJp zuXhAJ=ncClJ#G|0F8j7S3LarhSQEO*Wa2Yl>WZLH3fzzKHECN|;r6E19jnY$o4R1% z1Iqz=+tJdnO_FKaW`cRTvg-gwj#mKj-khB#WTS#c4L^1BM_?M&0?k=n7}*H#fTRvm zK-((X-z*!p26f$>{Zh27R#F+BgpRf<@zw>9>yK4a1&d+UV3wlxsS)OzL>frJc;?kn zt3YwQVmnL{ecSyO@d~`3ucr9gz)YToaI0C5u)teIFkf31S&X$VU7d9kKGZgEk140S zBF(o+uI)Fb{tp8X1ansY&5Hl8Xqof;A5i>Lw9I+^Z%}~93ub)%8x%n6I@$@Mgu-X4 zkyiNZ9%`n$Vsjek{wuO2HNUsKpn)c&X*U#L0vdyp@mGoG?EM8QI4>J`yL_8zj_&+%1y5#=(Sx(sw$!~*qWva-e0lV3qUlOuBk;E%SY+e^g2(B4vDa4wYL)xxius;N_RX$iot^#Mg|C9lns;!#ed0+qMls?}CGFuE1Vdkia33=r_T=*$&7^J-?=>%& zQtOst0-pXsg0-8cbi8gKa~x)V)maeFwtjKh4Mqsuji$~RJMbG6U+y=x9aHW@Lm9|* zm0q;q_z0_FBcr=`srwjs-Z@Mz+y);P-d$2^29H`!2Fmnzj&)q1@0SDM=_<;AXZo(ox_clV zswG0M2Kyz?9_F7y`PstpP@kv}snvdr2~2_si_!@>nWS#}7~LtHe@g>A#*sS*HM;zN zR(~3TX^wrcs<%*Wm+lXqKqBsSV=6|ZimQmktWXRGwEl5?sg~8;;+LZ(`2ausyYozl z3R@J8nSqeK>l~O;vM6~MU+G`^0_r@MP+4Z3lPj?I zx7`_5dD@~RwGIrm5uDI)d6(HIjqXCLD^3-l7N^IT_|_{g;CNJ~Sj3e0Um(iH`F}x_ z&HWz`5&S&#(@w$r-yi~25KJfhH;6#e5f%&jq!QJ&km!(n1xN_%&>{5C_$3~?5W>}Z z-?^e~(msRseI~#sf%M$Oq;hD$71U@NEEtb)uw4H8&!k{F_FS-_vv{u`-?Ms*r0x-RUcR-|tryVxv0cg8 z&{&d=fBpO$Am;Qm2O@EmSy;$8_=M0(7ysh%MgZhM_=vc?GJB5{lwAUJwey-uspT$- zzvDOz6puEjB?5Dm1XKJp2B=hKi-dn9EI+}!Z_kI4&rY8PF*P0y?=MvkefMyCaY3H? zz*sYpfUrhpPTrj5SScVqS%%+dN?GAgWIf+Lp|pos30_zSgr3<~E4>|eomg(mC!33O zFNJn$2RC>hq&?Q9`0o{XTSbv16s&PDGVpd#L5{dt8#9t^QF_2@zP&3$obJKSF~ZGD zAY)$Mhm~kVupY29ZNJWdq9=RpO^OUSA;95K1IVTUVn^!n?szNdjW{8%D|edp|HT<^L4%RlS1=>zN5lMb z6tXq|12(UlI$a8?OD9%~PQnjjZ~FC18KV}A-!b6~B(}Ql zGJsAQlqAHzBYm{Wg=2P&4M^mT7MG*Wv7Q@P-(^ac;k*|&@_wjp<$Wz^`H~!0ImlXy zp3$Ga6ofA7bCu6FCIE}Sk$JmBG9F{WzX}B`gQzd1sep}!F7&+f?;yY_ve?RocGlx}JH$g&T)Op{JBxQ$H&Y$8S$$9hZh zzh6-U)Gl4Ff!|zFn0i`_Xr$ReqTa~a-lHImo~nGv%H!9OK$}69MhK@5@9h_ZtEBpX z3mg-gaj3*BKv`ldzsoTZ&YoYWcd7_OZNf`K47!M8z9G<&d4C1E4yeR(%n1=)X|}@W z@?Lxa8eCgnN;Trv4+qf@Unxs@DQU3DQy8a=U5fL}`|sj1*jfYau+WDEDWbp5FQ_Zo zHnY(igK>tm-Su{z(W@svOnkFeEP9f~AqEXMtb1mL+wBdz@ht$+hfHk^V*yUX@ao66 z;ePD!a^5g-^?w#6uriO(9*JL1(~@)otc+ z6;0LxmTR{BxB>BOG^|GW5-t^eQP1&$R=H~u$xM=LOf^Ii2= zjD+q-EHSO27~pVCyjJfHaY%H{q_KUo#1!f~5OMDS4u%CX!>vLTlQf+L&^eD!KBeKP zv4Q}`CfaGfkKt+aDqwb$!;@2)RGej;KptZb+Wc#>M!tW2)net1W4+NP8-X_DAJ1!0 zWsn?@O?gN2&wl=Xm9`bfjzyscVf2$!1M*03AgZC3rR-x?#uu2xaL zcw(OC_0)%yx{~~6g27nQp2dp?<49TGq!<*$UJ&!PhNO}g=JYP7HnA9gJV)eDLRpaE zU0w_)7=)hqsmfd!5Pu}$Dm(s*YV6bJ?!)Gr28*G(5QjPL3qz-O$}=%%EIeJl9#>Al zDq|L#D8U-M;Z-zk>PEVcBW`Ef;0%vcWM8do-XN_jYR0wsut&`{6{(P!wg}HEUn70Q zzIembPW~b)&Fy-4yRNZuRdM!6AZWIFgN5I4m7}nFpq+srfpFGm8@#*G?Cj6sz7^!l zts4pcKcpa>Y;kqhrQt*0rf0>h{NBltt&#dKo##W=sQoKb%fYD1zTSNRBchkNqH{rM zXsE;QGQn{znHlVEh>pzTlAay-UuFwB6ofbvO)x0na#|7{T#ZHRvDu1pUfOhF5FGG~ z;0KgrAQ`hj=|Lk9Iv=QPv3gg>Go_1S4%>>#-mgyA74tR4?iu3)2!h=s$Nj(R!_X&+ z7Q!nwIdjEl^4_l{PTadr5xgV1qa?f(RUS8;=zkU*uVzw&?e_KQWCUA9z3au>K5B@k z={WEYOWOAteC1KGk+>(@zVX69;H8r)B2A%u%xI`C`e(imP!@wE4|{p$w1q$gt&b|^ zFkWy{`fu>6^=dchs<+%&A1)=?c@Uy4jD}xXtW`VS?o2=0Vy5pchul3b&b3wwmM^$) z@O8jJsUG8gYV!J`vda+z@fe5)lK!qzob=S;0l#h%xwt4){FUUvmImT9(Hk&L+=UTk z8X8L3%deSq1{$*`KbVp_p<+U5nW7b%d%fN!9w*b)82WWRs^EPi_IDS9?Y&`7qIWQ8yH-bi53i%wun=(Tju;)wlh zptxzHcMCk*#ml0Jq~YC8zvV=+o;A)F!EvK~ohAxXXc`V~V2<*(_n}*2Eg=OTY`DLV z@5q*EPxPWlgp{2F$yHCz{MF5u2lB+jhjqpn{%}&&aTO@fpao;p8_F#s{5QcpXra8r zr&>PVyFEs1H80u2Ue&~h*t$)$FQ&6%4Q4*u+y6!waDFmTCbYb>p~}-_z2eO`pl?EZ zb(1J?A#tveZAb)NT2;~hsu6~9COF_Tn`&~LK3b5A9=%GDWe-jS4RpdbNz{ETTd6&z zNjj*Sx%PECSaBchY#&ciO-X`DIZH+$Nsa@pCzM|C&=V?bogx{rRtk;*u-}@>Vxegd z=yD{U0w;VREM3X%11JdNxdRM@rt}CTj080qO>X6)g@djNK{H_|HCfHO4Mqlh)A8AX zd%UTyufmPST1w?TR!pq|a;s?dYwydyHi)`Id4$&rc@iZ+)lJ8rsd{ocRq5UQZ{v)G zO_i)n++@PWE4!<&^JXBsLT!2N-i+N&k4M87=jxov z&&*Mz5zY0Y*ywKh1OKwj)U{2vah^i;+p;g>yDN1_tJbNR$mzmpD+LJW!YvAUX8GjW zpb36_@r@9hHf0sx%P6f<>hdu%>_Z09O|{9I4s&KDJSg9XgFO(Y;Wfv4w}1bUs3ibx^wnkPjXtJ2swUNnY7cLt;xlxFn!Yv%oQE7iEh4r6 zWBac2b^}$9x40N|JF-<621J+*Fl{-Ni`cArDqIJQ;N>Alb7AWS*}n!Br~q^rY|MFy zu#AfcuvnA_UAi`BlWj!YEFb(JgL+Ajv;_Egd^IivdaE){GRgRl!@^PcYvax`#lf*RY7Gpvgo+DtAaa0_>K5+S<2D8D0s5!HXht&v|rkkD;7^4G2 zPHGkFwqHL3yRGHZsYP&^03l2ceM)cPf;^}e#`M|L-43MZ;n`P?5=@)=v~n?=>#97W z3~%ahy%syf0~$41K3`>q1Th#qI{?9r$MM^VUmz9%1F}|~fe7Ccm!1BaUaW0tgEHva8Gbx3O2kk@Iu>K- zm9(L&pExwrC!8-$ObYE7ifqGXG0NNaKCXDKSK+!zLbp)lc%K}Vrrn2BK31eR)xQ_* ztY8b1)#$&K^i@yxlR#T30t1~f&ZEzH{9d6?a%yk`gQ4d{0wbqod6;}5Jf0dYtfDU~ zy^3pi;tToexAJ?c)HBJ_eo5=bTl5GO=bk_NDd@~06~meQwB?R((&xA&7dK?`@t^#h zGsb_mNeoo@9RZgPf-fZ;;rJxqCFrnu-wv@Aoj4Y znA;KTTt{A=s=5jB#|dBX_=c!;R(crEwoFs9hlgdjE0U^7AqN+tS%V|+6Ck^E1v@jYO8k`rDpOAz}S%%eQml@yjd9rNXW^{{4NO3g2WhVJcSbr_js><3Ce9QWQ zW<4erewF@h+!={41si0s13!EM^=?pboU|b%v94*-+0wc!)>BI^4TE8qtQOo`px10@ z!&cDrj<4RKjQbb@nE725V&fh}Jlu!@0eO}v7h*8lhE%}xp(2P^R31}FxaaEk-{nIQ z$6x36-W1db00zc=Fzg-&3X1$(!mLhBxye8x3j-lTf7UDW&}o_a6u6tL zjhk+>xiS^))F>Sk)doTtteQz64Ow|U)w35k_P#5ZYtcHPr57~vqtWSxayAnkb#_TW zuSzFa)3MWRHQrTMZ9MfE%b4i?=vP27Z@S$SCk0p`=hkJ3_uh12d%>eC%_Ow?(}7)X zcB?Em!$@n35W4H=YuN`;I=h!vo;QoOFoFwtv(MW>KCt|k*@+v`J!{B*JjbOLHQH9*=?UG0!BG zn^4vC2FB}9hhMt&eO32w46Smx2&krniqLW{0#n(9nKIBr4~A`e@BG(H&MNz znKFG|_|u^bqe3p*Il)pP zc(6f5c*9z#OcaM-$>q*pM~-!Fp0;ql^vEO7tas9t33@03^a;Z2Kg63W!?$Mw&HllD z)Xh6xu0u>(K-zQyLsUL#=LpD*MtuonN<_Em;VYD~S;KF(LGr}nFk0e1Lx_CW=(Q~` z{TeF#uSAcxjCqIlGSF|SIJHFc28fN8-2y}bnc-b|$LaW5yaZDFgS-XN@LR%=j#j2z z2siMeoU#b|12BR_7Bx6l0v09EyZgg9#XHGUgLwI?Q}W2iA{w zM1bB@wMpR1H-B}W3{MQDS!{Zx_B(;PRcn4y!!PBNN7`k#(;-kl0c(*-P(ojqDRdVeA8L;ghf|ednnUGQTC- zF$%Wu6EAXW&;kY)Um6#8A=}RvBwkf~^@h`d?hwP;4atkgx5{D;6vvy85>2lxZc0l@ zeP=Y%NVd3*M9V3ulIR$el?^M7H2?>e?g@09l!Ws#$rCOL2YHmC}VUgg?=1kRky6?6?Y|Z*c^N zzwCBKha*wI7}z|h;+WPt0uc$F^Ig`1ow1b#U@1ues*aKLP>wkY!O~5aI%Zy`O5GfQ z|C|osNT$$Kt;O#gY?kVH?1*@u5!VRWztAm8qw=%WqhV9k{F0Jg4#V-c_Pr#I)FlYR z#5@X}{6Kf@mnnyztOg;ExG^~+V`gefD1LvYmUe9=v{7bX!#=qx+v66Acz8^+cgfE% zcwdeR5Vtw0Wp10H?N`YmNInEIQga`>7H&7X0L`-6ZLIa5Qnt0+eaC%i%^;$ieUKxi zaF41i3PbYf+n8EmOf&)NZfHuy7Bo4(e^hH~` zAyx++nip0%Vi_!|h32Vw;g0roS1B1mR?5q@=SiLb;c6JW?T4Dd*f)Zzp1loaSIwNJ zQJ9XWE8v8xi%pCL5gQ_!$_~(0vJ08Eaj?&6MM0p*6!Sq)Rmd66u|iMLOx!!3wie5= zv->_vJiYjRN&Tu;i6Q|<#)v#-_C|Y&)`?UN8%~rY|DrIdC+c>=6y&9oil|va)Bb7> zxgJrpMYq?V)@?rq5B#OL5Moi+hU-s)B$D7*(AQ z3SGxaOTjx^`>?(hKrE1(H5<9@03^>^uYk~zvHz}_C%Vy~yC7g}h2T_yGD$yHv*e#^ zaS1JsL@)22)?1f8PW#Y@3wr~ch!SYT;9_wR2Q`9d=+d!;jw`>v2YJDE~`j*e;53@To*1loOII_08{`ww?xh7zH?BKepajSq~PPAKAP#hG=bBIdj!ekbY?sq zWlg?XoBcF=fYsVk?vB3^!GE%bI=j8E;a+o4lk;2`2B@5OhtFVwA2Yf0RB>e|rt`rt zui0k1852)slJ>gSC9xD;SWE_IrCN%%X=XVp;vBPzC0r%`Zjle5P4Swwhx8aZ+He5{ z^k3nOb;BY!p~+eciYicG9z0&CcI! zZh$JlV`@%32%?f3oD$^V=VI=aWx#qJJU@^nCRCz%&%PfbjDM)Z-+#|)E#0Cgbgf&0FCxR74YZ6_Rr?3OppA-2@;Eg1uQi{b%^83CpcXK`X(;>g^ z{Acug|KKN? z!U*CxeiQW1*dTt4MHbahD|XkA$wB|R&iB62W@gah>92=(*;*VbSa-8l4%?|3zS8}bKGeShMQkzLQBT@*WX55 zD+m57rv<~Zq{CgG#uwOGh7@B~Q@ zn*gKYq2{BVd8yy+@(-aG>aE-P>5X+K<|NY2kKM8&$^-fjUfrNo51AI7g7eS|>VV5% zU;O@ojQsx(WNiL7WbFO}nID@7>@Jw$^l!*?ddZtYTbt5fZlU{Oz`{b{zvouxmKzNr zsiT6R@ig9a8LBQ^CLUIlc(X8ZnKsL7l2mTu+=!gN@0Yuo>xv4sV;QADPYlltzn1f- z)h!$f8k!caZ{PBACJmBAliM!EJx}Bi+tBm>rVi(A2n96L-5n@(6#3Wz01P{DVxin0 z-jv)3^ghC-VX!^`rOOy6H3Bp5SZd7Q;wG(SM|RhGcr!PIflvu67~GJI(E@hrcKYQH z;xw#~WM(^Ds^Gm#>Z(J!TtI@|0YBt54 zs@YJS*ecgZeiAnmjJ!9iOtiUHCH#=T7}S5NNPI%jr9;q((#^0<{QeKT^~iW@!5y*< z+=6fxS+Krj*8gGa9iubdx^2Jm+qP}nwr$(ClZtIsY}={i0FZnCAmP>b>>vov^xe%^{RXF~v ztrJSKf(>%kym8b@H#S=Gruu5WSAm!*BZG6x@gP0!>fi;_$3YdxHzIMp7g;aW`W=#p33GX zwiB77_4WEUEVZ!XuHIznaz)tYyXkKfPe2?7L@{&5yNM~uS8l!Dn!((ymXqcnMv51w z`=gW2#$oVqeRnyQyWs^h&R*s~`AX9TjK@#OiPAA}+tpEL*X*$5|wK={NOT#l99c9qsl9$7>CvqIVnH@X}b*#qPzjv z?-k-oRsx=;z7EkVQ#IPp_WfJx^`eOXa5m|Ga(3_M3>Gl2@n6pV*a81JX#SV8KLBNnxE@5y7+K5QF zLDF|A--A}gH!;8XWdqF~+RaB!m{o5S_LJUB4|+UnR7~U->*X#pSD1whQU|}jc_^$s zRFsbWuUztq^~M#tR8>m)9g`E55}{n!8sTtl@rvP}EKZT*&$HP%i3QI*$064?8P`Pe z7$E9Jm^#52Y3{^EB{?fDVOu~0T5ITx=(+BF7e@{A2xY-!skON|6L;9@3Rtn<(Hccl zeAqRr=?5`Zz(TbY^p6a0lzglFc!2WNvA+a#Kw4jhz%k8&M4Sjea#~MM-OZIYA5fpu zbmu6t5vsDqnsfRb6@4OznXA;>M)0K)Cr6;Xe=UMi6+6;Tq6?$jA?7OGm}O2)_4vBZ zFZ|M-FRgB>GzoC!bDgh991*fOSh$g9!<@y~rFe^%-da22@USG&kONEh3Z~1OMFdW- z|I`Uuu~FE{Ve_9c*U}73bzNR4&-$%B2dolQ20^6-J~0LR*+2C_IFUTd2EL7dNZ%9p?w*IoF`ZMG_v+IvD>>(5HkTJ>mDdB zY6FCcE@$zv==#*OUK=z@-xK|p;r;KgfEt@A-Y-r0v<4PBWo=N5a4=4OYA z#ngN{9)8xb|4`wlxM}r&(qs1@Ha!1tdc6KikN-Sr0s>io|C=6wBtTI9Fu`MLx|nIB z%^|lCO=qg7{&o8?!geF=D(5T^-3+a<(?*e&ISh+%d6qrxrTQM`!}gaB6HFkBM6BwD zW@0y}<+Mj5=X4gi3zX64URgrNHy5XLM$3?_zorFV)U075as>>rf3F_;FDI zkg{?-XWtTMJuh62A!zjN`lxrLDaq28nwFq`F~WmXXOU!h*Fq5}^Vc9G@1RjTdB7L> zNV*y57rn0P+*&-?4vV{l)NUez8`mHMBKZKDKL^AntlNLRUgAC`i!5t_y{F|S<7ss9ltynxQIGZBd&LN35}daj zVk4XCWURKE!JQ!E#^9Zjb#Sp*3)w&tNTq4l3L{^Z&#|MZ(wa(n7W&?88L}WA>3y3d zkPS3Vc5y?XiXx(V&cb^_j6NN#HXK&AVtaL>0#~x4RV7FI8{4>U_h%9)v3ZY6Sz+#y zexY;kAm0`;xe~6qwXiz1m}1>;>*&;BM)0oIsEtlCQlIfMBpI_e3tK=4U|w)UjrLdT z!uU)Qbjyzg9#6N+@TSdC9=H5C4!QYL$r zHw%{+9G%!cvx+#UK7V3in`?89V%gV~A}Vnv?pVB+cLONp6*q)TpT zp@)FRhQsxdlQnvld^18BLC|a9hg=v2i^$LWHE6{cKO+sU!3tLQ&W9 zEKLi%gf<6N*u;f@U@M{rbeEcDYb?c~`2&j5?3kZ*vb?yW$PCRBq?v|14TvpCT=A|% zdcWH|jY~!1iV^iwI_#~7!!9JVNSGPK6x#^Rcp@_n3^}OwyGzm&Br#uSFKUAeNH{Ls zZ$(=E`WH?etKhr3EJ)8ZxBY!po9N3GQ{T`9oO7S^d6!>mSLRk5ZS30`@`IN87*quf z&Mg|RPSWaM03cl|hPa0_aMlV;wz z^BybcTbuIvg=dC1y*;lHfx!GTy55vC7Lnm(GD4jEA8G`F-%h|PS_x2oRB()kl?-B5 zIqea4`$W%``9N_|fW5k^3%UC>ljz5`Lef-MZ17f3uq_qXy3%nE-Ja9_a8;oAFqRMi z!?ystp!yt+CqG6ybNW76`Lm68%wRsoIi8kaAXDj215r(f$KTCqP;AM>=^jR_b}xeY zbo2E=P(Z94zAM}+3(%lDV@5f_QTmbofUw*8qJi)1@pq(nn zg2G*RHXhG$hv;@u*3*L&y6FSfGk7c|F?;SqR&`o7YvC2Eq`t8ZW;*7DQ~_=8`fD<# zBoO|4ybdn-AZ8(8E7Rki_phE(9=|sz9Fiov@;23VOm^SzdUVR&`nX5g(H<;RElW@z zar|1S^l_&Wtr1bza3T0iCfp{cZ^uzmYo{VsDpE5|!%tdNYAeXo^%vx=nXxQ}`MAc0 z#wx?4wkIsoXlm|vcuhRd>AaCVpkA^}@Z`V}w2fEO$AQjz!f({?>P0m3c@A2E9&?9o}?Ct5xb_v&b3^gr%b>5|8Y% zx>iA2QxsBS;in(rRIRL6*viNY-dM(4_15H9xlHqjHX$767Mvau;=O~eQznZK<}7HZ zNPTavss|hI*Wo-8qnaacrV8x`J^yN1&@DEMn4;8+C$m3xXN48o$? z+!2Oi-D4y7)h+iS&BXrKv0l3F?(v`_LQFP_Y(H89$c@Mop#>g zxvcZiTHI$4Z^Ps)5=x*{8=NIhL&b;-0lyql6rH@LU@L>7Skp^CDUGXfE2>&u7u-kuKt)U1P|Txyk)@Wy6I6ASvvY)}^+8st@% zTk)dfFMu%pi%JJnT{NAU7_g`4A$Ms?4*l0ObdnLHwX}FaN>?z`WJvby)0$Zve6#4` z1wHB>{j2t1wq({i4;ey9r>V z9slfi<;0X-BA*v#G5&`EG29k^2e*Z5Z(YnV0AuEmtBvGUvh?Fa4SvQqw&OPxXR{K; zuce&{aPgYRPTt00(rzd)a!XQ9VmD9SO9A@X6R>fj6L+kJHaqQ`TI!$zQSs2;k!!TZ z@~njZXZSHRb_amINUo(;&s==PF+d5pue0S=NrneJmF=#~f;lk%4kWAEr_;spC_v*_ z;g+R7Gux3uYm6xqnd9?@%dss_NV{GF&AT!jtNrScLC~R8dR@~SAt0bI5;)xr$x(m* z>hf8fJEu*EUcvI6cR1|oTtgCk4q#151WWR-0E*gMCnI<5-|&;y-_!CC)X4-)t+GSsp%Y)_0%0qi*vJnWo6vuu$G?>=YO0GS0OlzG)_`+E z!At@J0;#cMkWKnSb&sWNAUFhgTq}T+JlpaGG#D?TrzEZIp#0FoU_)KUeL}eyMpcqr z5)o8Z_3>u_;>j-nJXE8f6nhd3VM0Ez6Pm@=BD`aH9>_Qwv!@da{A84Hv;p-gA1*0V zA75~5t8)uSZ|#;JJC*8?f2ws*kV_8U?zuGjT;exA&L|;XD2MmascSG^w-3KZg5-PL zk%T$|<5ygCmq9rXwR2iD2CNGOJvQzOkGtW~*JOcbX6>rEnZuDBoefgoBvrS3VHl(A zDa*kB`eNNw0XzY>^NpH6r}p?U_)I>q&=J760Y@JK?Uhz<>A@yoCAtIs$2Rz>;?@11 z&prseK&B8kfz*GdWB@2hugG;Ca$FAL2PvLr$mWGKI}>k?=L%|4En<3hVwQ}e;6~otVl^?QK}l_NNLttJeB#s0S8@-^%;3<6?TwB;r!(D0+& zF@(Zq!!95Y8tm~mpTv3G?U18?7MPdUPvvc8aRNqi6-7)?@e7^~Qrmy5YZ)q%Oz)K* z-rcKvS^L?reev~Sww|>2S{TtttiRwEA}I_vy`<8UeHA)?@w)mq5&&1O?vv6eF&WSg zN!H3Fin-U)i9A>Q4R1?e|C1x_W$)$~-H9d&arJlyxP~@iuRF8Sw=DJUBc>{G0oKl)av{pRMPAt zj*K#mkC9Hld`0WmCQ<=1FQ_*X-u&_r_&n0$^kS>Xg=REaT!Cf=VuGFFtBXWM{j*;z z8({~H(nb~C8)Uge-bAHcGVUOpbc)9p79P7x`}W)Ot5kwx+=Y#JqUdsWr4 zW9^iHnCzh5CduOb@8+{CPX;L%sQQ-Y4l*LZvXSS@*Et@hWITQv+L4NiRz}g4f~_?y zoFlaPlzOe$^G{VvQ=yQBkNjuLumlKjhLH(P78JHiavboNU`pei!1WH38J5czvIV@o z1C`A=INYVf@Pg$`tOPA)zt!$xH>;}7m(V<=CCi8Hz!%rb586sV#ZPY*GRdhlN(qTq&DFp4_y%CKsUr*T*V~TZ`ts#MP=y zp7I2^OLUoDHCixLTll_hHR>VzuW<9?7iekEn0U@Xh6ho2r+dCKAXa?_Y`@TdM>`UW ztG9pqqa)x2at(}=jc>#Lt0$Z2|8XDu|F`=P_^%!z`1zxaKxWvF`=G#>4(f*wppVPI zN8OHjtHv=kVLftB5|-sv#@^z08X3I$7caE1(!W|uLLYS@r;JI0MpA0>UF;rimTnO|1c-N6fodqiY5)+ddJ zm#jG&GK&q?PF@GwhxH_&<2@oY#_-e8M_9a;e50UELX!&aK$t#l>b>6^mWb2I;-KK#|Yn{+#o;y87u z?TRu+;7emPTH12F9w%x6beZ~|o~TUDA05lDhAR+~u6K3h@+l4BuFm{R!s(1$Hm?y< zekAnG{LSJlCWMM1ITIvDeR*MjRgH&%VD%F$pWpMoj8eZ66d5gWLZ&0Sk{wh}B&jFC z*mu1ehux?&YKW}vgx&njYkqPm5%4=!pCC9QF!@G_cn!azJ zJYflZH9`1F?Puug<^;%w6)Jw-AcDlbYs#%d$n8-L@TrS`4+X_~|M^P|PTD4A|iIdfIt=)hA!n6iKc{jif;62;_#wiC(Lae>%VH05k&29oF>u1?;a1)&i$kBMl@6uBBxlt z&xlZ;_Ddxk{aEL^3Ow%t*7^9(&)e_jW&VqC%UY}Zmk>Rok_Dou7wBg%eBjvgPgHGZ zCo#5ywD3-`tA~@JPRMCujbQy=o0=IUb*pc2JI^shWhTqp-3mVHud9;%CHYul)Q?t? zvs6csu=xh0|1w=vGx10%DoMym5&ZHqd_<-V9sV2qG)LDr_H<-;gw`YhEvexdhA-II z{hGT15hsB+wmh^SdSA>q(n(LX_JFBz!Og5#_{#?`@3yhN37N6iWfPAWA~cfYo}i&5 zNXnSYE%a<-n;@A|zEtx;U;=FBV9RVuuDY{FY5svVU3ccd8RJ(TS5a8uZ&~)+6b=}> z9@CSp1?baEUXs#?MJG@GgRpH;lh>1623R$2P5a zn=0Ta7sN5zArlX#mKAFiJN=3np`(%m^QMg+jP$m!bbTQ8qI>2PNV^R&+Mo4b$nO=V z04iY5kI?}|k^-ILtURSSm?p&=_0b+iM+Mx?ak^8)3T7e#JdLd-T`rI*MBO)GaL*C} zIdPVSDaX}q5WHS$eLaa*4{Cvl;I=tg9R5~CZ5Q1$@+X~K*wDu~*4iD~08UidzWAtn zuUITfrF4uJ7*W%-*to&MKRQp?jduf}kO5QIQ~TQkXHu!egjO7rYsLI(Pb{-;H@A?F z21n)v5%QI?ZeNOXH$84Lv97+593sLmj%4@L{=90Zr=%9fPU3u7v&iMmad>9|R-yN=i z|FJet?{DWP;cyMJ;=9P@2|xN}XX?7{V73W22?KPK+xAhz6;^Rk#FCMbSA13%eJ}6k z7(q-998?0Ajhxt zxO+h8xZGg304VOol!9b8W8)TLm{%G>)rL6SU8#+AzAn?=Lp48LIfcp%83h za$De_1-FMkh8qo&ir$Re`0=aoYdF=2xPNYZFf~HgYbAhpq+2IGLa;r7Nk*%8(f9!) zj^22BRUblvMTZvE3-vB|Ljo+pesQa$pV{piR3D_7X?IjvMgJCa7M;8!gPq&ABA6jk z=`|vqkcrO}NIzRpSgp0$M>Lo!%$AS&j5d=45>WIy9{OS=9nVqb8L(CphP}QUO06## zme)bL7r~sWmn&5WbWe7>eC&g;Dcm$afN1LzzybuDVyH9LQceB%B9Idg&${YI8TM`= zyt3i>_P%)>_=gau{|S5jckhJ3f8jq5^8doWs6ZCnzxW48?rl+-v|LhXb0q|WBH;>Zs(Uz_tf&`R6?ha)T z^31kExMS^S;ConIPosZr*QhJ{>`8x|yuF7`>i;w;Gl)2u!uEhS-%qmdNO&y6rO}a$ z?B3N~#BWA6xCP5cg>SAL2)9~hitFSvj@o;2N`G}RV*+I7>?-t59N%G05VvfUQ{IeE zbZa{gP+*M$KB5-XieHnQ_1U&u^*}|pX%7!f1a-CQ^T2v_Y|=S3{UusF^M~tF!{O#) zflvaus-{>KQNWfc70%~SB5c+{WV+1kL`iyXU^A1J|p9X2}atiSc1vRq5is2a z2xInOMXqz?Rc}LDCE1mEb!Ez2`$Ktm9V$v{zuP-m!Aus|C*Na1!HHMl8-ZCePPor+ zngkFsJzQ?D)@raAwLLW@nk!*)hkd5mY^Qa$TmV6ZYL-X4mnm!9PpvxTMW-gseU21F zYbA7v2B!oQq=<2#IB+OC`gc+;2Oom6JG5c9oXa0f{QnDXdDQ>kC~Kb7zxo&dAN11hmpZaYYNVYDbR8UvlLv8bVE{kI zLypYMoO+e~cDNJ2U4S|SIkw+k%gn{M$)ge9dPk9N_`=k2)_8G}LHYxbeQfbY*^_w? zzOxR%a{UD&kLRLhBsL}I_>G@FwAf7L@r8$J4N`KXCr7fLs0@-@O;rEp5>F$$V%0$^ z>*TM_mfo9M{5Qy94KeNFE)Hb8x2Cl5MCnk6)b4M=w?<7o_t$hem<5R}RnTTUwTuI< zq1)t+)R@Uu_8XiOkDx2r>)2QKqkm3V+1_VlU9l)(Y}1LVVnM1lFXw=f>M!g3l>)k0 z26kDU(S!4iauFB;B1T8ANS5XMU(O*&eG}a0`8~iv=@{wjA56~$-Uvv|2$uaeWRZGC zKyfr_F#DS<`MJ>ri`B)+rQcH4*1QuaH1PZDEw5CC%WITeHvUs5kfHNVWS^y~IA(Au zPBc`_zninWat>MIX$Gpg?@zV>*p3o?IAUg<}_Daf49Dh1G0V+x!!>1*pOq+iTSEp+{L#rmv?S|x6}nFVoQIv5OT@= zZrG3nsNYH=>%R+4h5E?t(VDHJt^T8G!-Xq?HOZi@H*=0749qvuh$4dF9$LaLCfYs7 z@G{Z@Z_Jwo);^o_Sf@h!$Lju#yAL&K4;s9Yh%>wdyBRqO#!PLEsv#Al&$aOR{Kx$sYUeab8VwwTtkgh4q-B#pmQ*Nwwj&pSF`73 z%D~a1H}A=4T(aE4>K2hVdiSp@8-`k#bTALU<+O1f`gO^S=$GE7)*J?bXJ(*jqj$uI zaBAkdg*ZC70h@j*ZUU!$#}UmJ3Uu#OR_(a4Qxp+oA&zlk5Uz#OJ9>=_@;zf>U#~M6BkbPJm3K)w>i@-supm4RX zRl?MWi#ag*TFRH9GPI;=2N`IK5G(In+S6djmTlkb^U$@C(Iy)J(Xue+1tAKLx(~k9-&Z z6*&Gs2h0DLz~Kb4*8UMVl;;m@XdxQIINQ_}5#_U#U? zKI-8Xv^BUS`qB(B0)fWJW8h#HtC=Ve?Ho%LLhQI}%WFOH9}z!_$<_k@l-#+@@LMFV ziVDv+?MH^fM1rD?L07ze&vG(Yhe>~^!ZBfaY>In3vIv(Ac_ig81_c)OPsv12V%%Jv z3lr4?AacTQMl_b_-4JOwHBwu9zD!8We@^y3yEugUdl}Tug~8(}Nw?zBO44^pVm?R8 zDk@;vI4F^-jc^Nog^bnQ0-4PuVi(w#jjS^t5J8Z~Q80X|`BOW%5XEirXt^^0ckY+D zW1Ye6q@w2?<^f!lz-R+j{BuPw|6b95?o3A@>+PRF8S2jsVd2e?D)<^Q4>8zqwF5kfl1dS#Uxq734yDpiBmaLN9GudC36PGW_kV&l%U z+%9C#`I$uB@rHE$#*%S1g0p-K0FE5@+u;v&)Io=OxVjKWnV7e7WaS$UpLru0y*cCc zXJU9167*2SBPQ3CYV|F(&3z5B)O!RF1ZAdBXT~FpR&3LsH?T|@Sb1NO`8(U(G*Frq zXo?cAPhVSsd0S@R$c#c_?+F?NjiiIul#iL}_9)ul-pqAcfN-%yd=%3PM%cR?XR;6; z8$8#VKt#U_0XL&L#zoir`IdjCqZSwm>*2^XKqtIVl10gF4PgtmYp(4PE27dM+qvrD z9aFeZ|Dn_MTdv)UbiPzQX`@3I7(oVF)DAM{nhyMROix7|oqEqd0JoP5V?t`Qo~%j; z;&5rjYq6~#HjYeab4c87+wcspsJ)K5EqB8kTTQFp+@ibPAN6#qw zi71~*s^YbmAo-M4znx5_mT@&{-yp4600dmb7Dp(_;$cWZ>@s4jpZIG&R3cO$MT=fg zw2#BiOZA$xE@Nyvh%yzD8pRNfTj`oaiJgMShg{OMPpBR?wjR1CcAwLp;MEK7q*HM$%g^ zm20GKsW;L*i}d*tck@*|QN=?V-N)IqkJdtYW&d(E#Qp69@584~=;e1Z_ynwfLs}Tn zEV1Uel1pSHG50|fZ`nM&T_9Vvn4|ii_#xquLfv=hL}d(^#9l!d!+5h$AaGP?RUdK~%yh{LB*dk`jXmp63R zV7=Z1vFBlV@uJp=*tZm)Mzl7-!c^XSGW8;I#4piYJZ`1=Yugg3!YzZ~ZwR)>+nwO8 z(S{O~P(c3eNIRCk?A!b~!&^hVphi8M*lR_k@ooKT#TT70$b`gXPd@i|Y?kz8LPGa# zv*yPg^0zIv4~@F}l_k@_eLw!Qqh7gj`*v`T3f{3^*Q2$y{l?agr2!2=&8dC{=T4}@ z)|~fkr#qv^!xIzeI7ZpXT7=*c{&h9G>Q(|d;XMj>l9!eZ?Iz7fXf^4Jxy!%i^>RiAz7P`K zoI2H4Hvo7(PTmI+lNRFS}$ToFN^dJ<5-E4 z%!yC)XhF(e+d=D8u2&@3MfH~{Y`?V?wL_e!t%g5pDQQ$0^(8=ILxzZEnTY_ z`lc3au_+%`^P7wmL?}|UbZRkBeQU0wu#4F1{ zB5Oj&pyhKU@We&0S!)2EHw+r0KWauQGpoS4OL}&a1AF27@JK$=HQJUc6;FJ4?=K=n zxKLi8=k3*v8(ZeP#-SN?yAe8 z+SM=b7}FIRy%anKPgoDgmHQ4uNJwjrBn$>jDlj66X!-1}+il%KtRM@l7HrSfwlU)i zs;KC;2C80OncqU$rAO#gR@p2KsKwc8BP*E!=}X_-Tn`FXvjQ6sR&;2U)eK#+)-<1` zqIYcEI~760FV&L;35EK9(@2cXs=#ymPgb<*e?lEN2#+PV>!yY$Y&v|#j|O0xS7>Z> z5Cpr87G%$OxAKUJ`bX|yr9(EBJ)`&RMh)yU%!R0)=K`c5vXin>Qu1p+bS zC*GZI3yd;@I)=!K!4_fPH4dv9dzA)nn&y@bi4o%n5{^W2E>VTU6rLs!NBP=3Rt7r= z9%zD%vMIpI2!}D{8`RMK?6=PM&wvQ_?i=pp_EB-_wC6X*wnjK6gf+v|JLA_}d$nk$ zJ`((E_Hj3qf@zRcb6yvm18Lt@-UioQjWF}2#u9mP2(xxX|#9Cal~l`6R`PYo)blF+qwXIN!MpsDy@_^*O~)je`ncKcvDgW_o-6E zw)pGLh+)1ygyyyQwoX8m)B&a)Izt~t08)1d2Qz`4oeKF?Xlc5kQdMuao@8EJb=o^N zb4;_TiYQo(p$J|uq3{wA-;xN1D;hqB0q|?Wc}`ES>CrA?O8-mLv4e=~rTgXUqj0ue zRzQBWas@m1TX$9MHsKuNgrnM;q0t_^F%{V7Wp+U1rGLmhzJvs zpIY9H4}uBrtT6q-mlxbFWWdV4bB9#4%;TQl;u0s?jA1)_*^ytvOj%$w{dcye_~gQ$ zE-08uNi;+02l2xLDm4e53!7TF;iaFv0A~+Eh@vB};nACz@lsLRO1&rlzLdE=Ocy{- z62v0kz6Ztkh?{iw7}2f>*+nGa&@_1rIyzRAzfPz#os<4WZBG>xMu37%oKI`BlLhxa zT7r0BO+2p`x%NZc)@_0~H#rPmhX4zW?mq6yo=)?ant=q{NZl3U#!**FP-TdlY?a(EP6*@$2 zFDpS*b*1~hww%N9>;>3;n$FFQT{N#Lbjcaos z**|&n5S?G^@3uoHY^#>X+^|i1$b)xwBiko~95yCD+3kz-6to#Id=1{{0_XFW1a%Ic z9aPO#cKNQ#u$f|8dtOdKyhlFuiUOwL2-ZU!w}yp!9onM>jt?v%+)aJ>r4edWnn;dI z6`$I3+Od=hRJWUfm>52*lQre@=^BoQ@drQJ5NnDKQ26+G5w(QE&(uyz@-NF!tX6;d1KKj%+X|YDf(APK zZFmD#a@};nypcY2i{1L^Eyle%e8ShiNUxX2eq+DL_gM!gxz~qzqRvi~Um&zo$rfp| zPmPoQTsc1_C;_#stAW&evc4bZhRYB#am`1n3lCcj3pcCJj{Jr38;$1_1f@kG@W-=x zu`$RLe4mc8{HqqYFVKb$UW~psBgLfHipXcF2ZpMcqQSkeXtnd9<#RLSH~P)6scsUo7`S zmB`5R(ba-yJJM@i6S_Yrgg@nrj_aU_Dk9U;@|9<<-_VA9RexQ&DxgP7a<87(*U@8R-sqaZB=nSh3FR;Pv z(lsWF$}9k-i484{6bZH-Nf~d4cMJ z5GlP782cj)d#0}uQ=O~(5-vwTGa5W;S6=OE@?ux!7OeO;it_%9A*qVCHHF-jqw%ht zx^uVl3*7a$kJNhrgD}q?p7mKRlG$_Z^|I}89iDdU`k9>xEz3!D<-EREc20bP_>@4N9QGtic=+9WqW^KIiXO3> zJ{h@Co#?wI(mDYNk&-}5SytHPNhPBvKVAquH-^jnrrACGlZS%CiZGaOW>iQLSOsgT zK-Gv?61<--$%BLf>1F1*6HTLK7w=@kS)5YPZO!F3P@yKy+IcolS5L%MnmseynKIV4;E0miKKnyAC4(!Gtbtr6d*To9g8~hAm=0D7L}pKTy=i%ZW@dc ziPow|>h57zbdX-fuS+j2^O~JPQf(jf;lULVY_!xwL4Pi)Zhm8tje!H@4={Yfsw#@f zDZvK)tw0IA;(Tg+Dv(6xbc`p2J~zD1e=G^hSH{h9!2(bPt9aqE8qflGomGYd6lB1G zL61FF(o&y~kl+OP81lq;4KllD?iwDLQu$_;D7PCLA_543v|z@MNB4o1!d8qy41$uR zeooBP$#8Fqya%QUyuZ(9)S>{a?#5ow)2k%>Rxb?pGlOM?2>Hl{wE?jIkD!a{YTKrP zHc8J*cwY(vI!Lnx9G==INMxP);Cgz9yaJkygz-@u;S!AgQd8Tj>e)3QSbnPxd%!0O z#3XMYNB2OH$R!YprXRnjt;Ot2j23*IJP9ht}vwW5_`V(LbNq10AykO$s!Mw;SJc*1hICL1N{nnRaCGz=4JcPWlH&#dM0C+0`@qEf zUN!lUX}tdMYvf>Bcs1Sg6!JmPKpIk?n|f5mL*>Bea}V)0O%ct_TyDRa}=$&DXQeWR=`ieGoAsy97 z9=~-9&7ziSFU@|jA`+ytbw0&Na8{WYB}!L$H6hD zekx_c7#x|OA?^tWby}#7PZgH$U|zN22=uN9pi0h_HlXV~_%^cA(36d4 z*KJp-9rE$id(nCr6qPWQXz4!H^1JanR1}|=eYBreK-6wC`dL21sE-Qkw$Gxe>$tu6 zrjIc65RW-hQM@D9Yeb=&J>z>xoPaBpOPk5de)eu9bw&dBo z{YZRLs9>u!B+0i_Eqh%09!Z3NB!BshC_*BBC`BOHx zs|#q(Q773fo?V^6TD%M`-w_K;=5cWPbV4c8w7njTB_I(#8dwF!n08l?B`>zf&C~)L z+P%QQ)D`ZM$w`m$`C9RLs;fj5o`hE1AGoTg?}^y>NWqx!OgbFRn@}>zTRzoLpLxD? zKAjCa4zkV|FqKNJ+*>Jm#E#nX&($4;dL^%`vhd#9BDltx@M zqAtyK`$gM-v+<3yfX?7@H$QEtl8y7h`j|HPQ3=W?&JGzCQCNT;aOnLKPEU^~gmqW4 zm*6IF*r`w??kTvzpJ`!&rk6`(Ot$(iK#y#@nDn!`@L&NS_sEC{E4C*>X|1cUDHd31 z>1;vP?veP-vk$x5=!K@$Zs{+3#G;Ie9@HSBBzT5)jA}6@Tofp+452uRs?vGi3q~;^ z3q>R(XNL^fAApBe7(Q86sr6@*@Pv#K>3BGlRZ}T-Y7)H-oR?#d)7NUdMhFxfXOJoq z?HjvSa}kGdE6Hinu8O^UjID31Gfy4_X(Ya@xUhgytG&lHHOy?fWUEy@ufFijLLADM zmNVWV4hG$P2%fR;_F_0*MTTu}-*7!09a>1t?HlHt{<58SeTRDM1yo#Y?n(z>zm z#WkuWig#ICmh4I|LP9y+>An0LbuR2U!fpp%f!R8}>eeB2=-=`) z=B6D4jpg#Tmy+ng1xhc$gn#ekuMMdVQ7lGjR3JXPS(T`a<{Xy)USGk~tM`mTWNUnA zr0C5U;we{cDy23v!{kQtVV=+RkPD&tSx&FzdjaPZ9R?DxF_ZfGprE%R$m2j(BBj9_ z^!$o%XkAie-jT#H=K(Abl`r&g)Cg&(U|#$ zsB6mKj=c>HxhT2pZ&M!I}+s>`$FkM>KJc%R>d3v8IZFGrC;_!g$lx={1wvXiVg2mr3 zH5b+S`#N}`lzl7R*h;ilb6er%V^w!9nDZtXM$;aJln2jww6_P@#P##f!OC5K-{IWu zf01qGeV3}$d!Z3Zx1XowgJueCKpC2%W;CJu)QppjA5ug|%Bz;-0e(9MS+o0=sUD6h z#^0wwcNb@oiFKzh{GAHdqzq1yy5nr(R&j~c$myV(eGcSK2NI+_zT+`s0CcHroe?lL zDD>Q4>wN+tjK}*zz@QhpeN}neOBlk^nIDkWBwy-v>$yAd6pZR(y}_RG4W+(KOx@vl zFm?n@!pCGVA456qSqVjNF7IbY84FqzK?gz`x#<@SWu(wdxC^oURn#`ZTOs+J$m${&p7+nhsLaA}O>d>P}EN%@fYw#4YWM?xamuW$s*}oO@ zJA8cc58$7;ldiptEX;XN92f~mi^yunGwj?E8U-RGeqQ?(9Vu9i;wQvzQLg4)Li%n>!u?wg!UpL z2kam#3SGd{C>*A}RG2t)y-qB_X0hotI%GM$gjeQ+VzAJ=WQSrxxH7e&T+GzQTJ6_Q z=1W%zVKKsY?iQn2qm$wmIB;i|hm%J^)f8-F@Y4|e+EzKD>kqQ^ ziF|CfLUF@WHhY}wXjmm4op3XC+B@wMlSMx$ctW=!#u|OyoF><%+zV&N1E7!wA^W60 z2io1-&!ixe{G5WuXX)P=5>Irw4N<`Cxu)fw#+tTI&WbVyG-|66Q16e#E7S4Qxqw&f zZ~ZgDU3|#1VLVleu9VxGUF=nCf34W;A0``5 zuXH{p2V>GOmtZ5;aTS{-QD-GP)+ox8V^(lB+KpatCwnRN?&v#U2M^ zu%<3PAhXVH5PjPu{-keKOvI42mbchQeQl5Rh%v@-q}gx?-O`Y>{vQB0K*+z|BF8S~ z#E!&&y8fT~L1L_*i|4iDQO=c`-7C9_czQZU>ra@<2Mo4D&za!8RmhwlavG??O~WMt zlWXr=VNXQ?B7;|EF(>XrLYu`}cA#+Fy6G20M~h@nVvn$7>?sdg>Bc>jo_!LN&OJnD zAj+pVLkfm%D%LLk$A4tq&!V5b~cotJiw-G0+4 zItp|iVpug%{GYJ4r<0XZdSVd#yh1r%HxrV*vKz%8+4zD&z6IJ%rIL7lSP0Mx4pBro zV-wQ|Se45(wK8&O%NQm>HK94mqZ?8LBdiEMdGacERNe$W_tuJVZIwNwQFU)91Jn&9 zW(|@=4AM!xXg>HP^E}My6pL89zN2nQg-#Qqw|+R3*+_sl2^~TyVEI?draPh(KT_Nu z-i9AmH&ndVf|3F)KU4~-^#-8lfX6TA*pdr|t-E&eR>?Wn*1C-VGvJ_EMa?p>%zDrQ zO64Z~e*zn>l;76a7XUioJt{9eVsCe{eg8#yJn23E2<<-_Lt_$$DN9xicR87sfux_O z+07Lo!_Td|lsW5)?DxLQ<@Hb_i{fH_mp#0oyntQY#9Eman*ks|^to(H z+pW@teBwD_zL|w6_$GGw5$un*C~sW=i>L=T3B4d~o>E5?RvXIg$edPh~qRy<3KC z5iG;B_D>C(K%$jr!&k-k>JeogG>iF52{a1)mxVSZ7oO;R+P&SX+rZqV?O+(tN!&BO z%R9brgvVe#hd3$Vs5}uA5(TTZh?4&ozc<-a&GHk$a`#an<$@{y6$<;BCqRpQMooF; zg`hcq4sM8F+>Co)4Z<7)7$FbZ7t2x~rJ3Fe0@o@D8fB-Jo%pXi2Na4+6BNdsI+>{< zi8K>R>_lKJJQoP;7fZ11h{HNJ?PLUtN2cs*=SKH$?k>d7am5hAGYY^TOOhZ>7*7i2 zTc+nhR;2FjC6EvKk%yo0b))E%lk?~>8E#R-7#%CnRe7%v9To# zo3FB82kFri+cE@`o6AAqoP}{%eqc<-yF)H=-KgFlzls`Op61F79u;eYzXNMHfusvh zc~MMxjqP>t z_5N$spcqj?V(1oBtH_XOn^TAAbV&`CsOyMbl6D6b3HC^bRqr1JK$vy0Odu$iEJoz; zq_(VsJz6_QL$PbW!}YAV#>&0j{d7-$5@5C@!)`3Y~83^HnS{y5&z2|oD9KEVo% z*Vq-MMDpZNLpPJ;?@r;aZ1{VC`FVV!_08GN)6?gqfKS~;O*G9P;!EIA;~=mtbDnj? zl@a9my^n?`b>u=_&ExcT3IyEt?2nV#^spQ30+Nw-MLEWI~_(D#a&Y>Yx<1ySMajJFZ!W)|=74_SJJ>)ac3p2yucN*taK0DgejHbi3SlhyIf^~S_%{aBg_6vz9c zh_G%f?9$NLPizfwMIH}RS{GET75WVd#mk046DXDgEnT2+5@vx63Di#@0OCB@V+A(E`UryE5ab4Holr zX8QQgW!TLf24MR=R4z8x6I2Ig{BqmK+Y2^M{g-Yv_6Pib)w5J!(6hi_6+qhrSuv5% zb2RQjo8iC+^!Mdf!A*AtKbhdo|#4rqI7Y<*dV69jLB!6uFG>lqseLqI{sj1{;B*Z36K~Pvm`uka-bizz*aalU8l# z79?3OtpiSUFhMErVXk)wDE4rNa#EUGFS=8A`P=3HUeoGt1H8exakw@EvmCEwS@2nv zw9r69KD0n1R><2@qg#+7SDEFxSL%gE9t?{PJbJozGckSr8uwm-&qoExK`vPqx|TH^9FAr|GRwO{s8Sa7e?d;WhKT% zeCh_JM5Ldq!b^!fN6#Z(^#%|T#@X>s#hNvb+tGVvf|y6g=0j{x2R-3~*JewPI@p24 z{*Rw_g`fsBD`Wo`STu$YypMZCdm6ysd*2~(j6Cb2g#&y)KJQ*3fS$JI31%nWKV|c| zfP>mLI39-WKa>RUQ5on|TB+}c{%Oj8HmxAUJ-#2+T2VF+q{b(5+Y!_GA7@EfDcB%1 zb3$1I>@kxZJTc7T8~LM{1qY(lQ5vHMFnjvHUj8ht-STKI6 zM-{)`)GGe3@mB2cj(B4qSZazlg4_Fc=^O5NPP~P=ptpv+^_x-! z7Djy zPMyo8s5)*H^~5oNW!hb(Pc9_=z?hO1J3p#yDXX$sl|hFem4dTSih;Ao5&2RIvC#}s zgsICX#!6W5!7_cAj3Uox@?d(6z!4C7g$b1f$>z_8Q@SfFG4X|Jl+)#Jy}Cg|4prct67CkJzw3h;!QdvQpLQAvPjpRTAKPNp0ci20yCm!DgU-62XV zcy3igIm(Mh=MAe1;j3^oLqd_=-Ks$bv$ijI7kcz#&~xKjz7XZ7yU|`}rN&jBG6#ErOGF{ylD3tot+E8oDWr-hrl52>K`_K$DE1w69%E z7wT|2>`(iFDrJmfHHj#taKnGJ2U=GQS6n&r1&xyF_?@EMHB4@KSo|kZ; zT~`kTxSHlEkN$Jz8|*aZXWK}=l8D~lA5T|PpriO@p_iF93dnREEXQ@5)a8f)FP#o~;|l9{zfVh1`+= z&sZ?vP~O2dMr`;ejvNtwPP_R&_Mmd7Rdu&#k-acJ^7LN;BF)ihyg5Rfo`6}XDc0MySZnlpp={@)A|e5 zdpqk4nSF0cTfFU z2=Pn&YIl&MR8}X!Q6tFx*_3#m2+~1*6;J!i_WrtaRr9{FeBa`GAAskN{VlTJiIo1C zrB(p;bD_Qz9er1z?;uMFiDLe_Loe1z6j7^KAQcJ>LArE7f8W>FIdq~8sdh4+5fwL3 z{A0gm@@;z+zS7x;_abtBcHXjlrIryT*XWjH)gPUqAQ7d~I96NE*whk46D{PLWxiJ| zUEiNIqJHRo4@ccvW7c0u`C^HI_R^bj&xB8jyxvO5LEB_*B2Zp&a}LNkDNV0Qxz$Tq zGyV?>UUOpxe|?ehCGKJ+px2*g1J9y5Ixp;e+;!x^aZEHruH>>aDLba5t-%gFhJ6dY zdZ0IBVdipG!X%w}m<+^&j586!xp9?vg6+%v%P4rIyqP z0;PA{3R>|(Mskc}3FJMa+-Rl7G{>_}NC^gP|NbcAW+4f3W-YpY`w=<5aRnh?ba{2E zCK-|jy$q8{h+u8v7f4KMexQa4aS{T$pS`=lXPMbA0F|T!NNM2$-4!xnnT|LxRh0L< zk>`FbEi4UVzOYU1Xlqf4g4yyzYN6#ZU=uL}9x!yEicz37#K=mA_OBiry_a-mc+w)5 z>!>rn@-+%5r&)<*^jMOIT%W(~p)41w5D)L0}qm^{Nve~&RjXa!J3FQi5Sr}7m!=to9XnU7j zejLqFm0ea_=&m6R54&D#l+iu({9vpoXtftd)99r^7y@o1|CVSX8^W_xRW)~dxhwVj zV~>3KS1Y2E5j+{aWVl-N)8`VPOs~rQ1ziC(T{e_K|s}(IdH5vSB(Kqy0p@e$K zvw2E!ZT3mM;nL zog`Y+C81KgkyZN?vkMNt-^w^LmjPT$4d|ZqlZS(pyWW8k4hTY(3Iv(vTnpnlwno`p zcEh~7#6?%|CbYBUXNe62v_7k}C<{9~cYoXJ*QQs>F?4?y=dIkN?lS+u=^pt-;4;Vj zAIAK6(CKAz@jG%w%0qG#9WlRJP%1%>FM^!%+g&%DU1R;QW84fe6}r4?wc)1*+Y7%} z-#}}4E$TmbLz$k^+oYQy?Yj8B;Q-CW=EGcGHEV1>%>?WNGyE;Rw(B$flf!T*IiKl6 zDw2@nwOEt;x}9T)knbHnfJ;5m0BFRusce+cO%dD$HsSx@T+c65Q1C-zX2oT`a29JaJLCYu`ILqw4SHeNyVvbz6Exs`0wrs$uDa2BDVL{mTL?p|;B|qgQ>0laW zf@H*LFr&5PV0ZlTR-Cpu(pjwsFGPqq1A7^kKTR z=SDsMX$PxnUDl4aUST)1RdiKvgw{2OA)1C8UbW#2j&f>4J@?lj=Od~QWt;~86VWuv zFLFPlc)0084EuaXxj-o3otOp=qRe6wjKr^Lwb5i0=b7s%va(~>jU<3+?bl~-#9m|G zw4SF{A4sjC+-D%qqVYehlKV(`QZp7)#oRuvkayg~`|i-)RU=M?GvWaF2VZ8_irn@GV~BcH+%}H^VM0)S$!x%g=(#OwY=#ZkXoq z?W2y(TK2%O(&}2nmLuhE5;W5nT@QoLgw(~q!icpTir&eGq*nd+Bc!YyN=T9ug;961 z6rfKky9t@e!@6#hQ-#oc9~Rei#m+Z1m;0qD3!-`1e3e~wTr6A7g#kV-tmnqjltj@$ zbrPX>ZhQT)dem(#{hEcUn|jCmXq$mog9eH96nmxMY@)>;dzWX`h^hYfO$-;jhCx;n zdC^e3#3Q-1fQAl`W_;PKQ_uu`;&5@nN$$1}qcW+f!rd=bv~Zq15HiS=_f(|2wHxt? zWw5I>8I}m0*HTYG-~3dR-{b{d75n|NFzWq_{CPytM8k`YJT89igB{n$e=jDetLIhw zpPP}c4;mDW_%~N=6-IX*J_Z>Ga_x;o%)?Ap3Na ztSarYZxkRUv@%-raaP6Y)@9_5)cwB_t-AFQC)$iNJzgNGz82j`>QVZnOiw68#1TQ;bHW^!+|ddr0?7w3Pz&&$Gxh$xZm#Um2AA3Arz*!~xGV zk`GPO5P{9lC4+^j$d|Bj{DUt+MI{skfgRNt)VZ0nB>u=ZI!; zq649<8GAG2)L%(fB}61DdZK=_Ze1y-SG)XnvMlYxMr2jtzN&hC_`tysM0HEp8%?B9 zIxaUL3ei$Nt+)^|K85vukBzBg#mQ}zj4avUwF?6k)5kWZ+Yg15?ajIa9&rtpMrJ_ z`{_z?w5ls@-x7`C8l#b&%z@lqS}F#~ugPsjA-T5oYdo3(K>I=eaz{!_Z%1aA@&f%TB7{ zE6M#m55RD+Vh~A00kLKSH|Gx1Ta24(Tt5VEd$cbWBYa^WOqZwB+~^eWAnW^$n7zB$ z{5J%pP8HkN7%-Wb#Zlq-3K3WYc}*G!P%vs2Tbq4^7E*f?MLgCpxkUHO1=C-q+j1@N zl`tOw?!`fitb4mG1*z1#XtJP_!kV6&vz&Zs*O^uTavrY6i`EUc?5C!V=gox0K&vB= z-a3pq4AJCqV?J2WM39L1E)vQ&!l<5|aCK^6(T#!Le0zmImPv{ zWncb?W=*!Hjg7;sbg7?okzp8~JeGfhr!Op(&>N6@=niMiR!nQyK_dJJ(yvC>FgE`W zs#Xh!p7r6}DauH??$jggklSB7Y~KgRsyO%9Zn6ZHT7?KAx8$VZ#YEZ*(7OL+DlP`+ z6VNFeCO7bLWnAUtE5Q<#W5wpAdHDb^d%sy?iaKEC(f%*J;lEDM^MKAf+zjPZ!Twi= zx20-$mP+%*vCq*v1)Bf?amV(Z(uev>oCWKNua;Zd!&sIuPnccUM}y+tG*=L>%XNJ% zn85|_uD(xw_){Hhf7Y=J2?Q8@F6=3k|8Lkw3N=(Jh-IhuPF#sH+rbVj`j+~51tY{k zT9UenJ?Few?2?z!cMEq@ZY=MeH5asXV=iUk+|IZh_r=;(A)pYa3EWb*K_4?@MqO+l zLt0>>?)r~X;=(P4SVq%E=*PMtei69_ENVZHdxl>I;0iY{xMMkr)7So)hV`&@_o7RY zKEC4y!L4Bz&@u@ED#A2B!R%KQu{?VyYAm%lCg}YtjF|Yve@^N*V=mvh^~Vwywmq)# zScSa;f9hBzcty?K{zk(sVPfC9*a)mY8SS? z&n~#zUdPC*Koo49Br6Tg`CN~>(V~5YxkpU=LH}dnwST>QLleE9VRrv=TtYV+CI$Gw zu61y1*0026pM;1sTBjIU21g|Y(|hCb^Y+TO5s>P~IXfErDWPBJ65(1iC0;pYpR6V8 z*7Ud~${saEaI7Box!t{X{z_X|u?Z4p`_0WI-9;OqIbuX+t*VH%kQ!6C%>7y0vKdqF z8r_&nzImGG3Xa`q=CefWyNeO@)Wgw#iCja*Q~?*9JZd6J>AdOc)8v#P=eo^v14!oh z!s@$Z*yZ6H;zp^?&CI7NasE}@ZeEN$TkX`-VcY2l&0U!XSB}LXbBn=1-7qIbIWzo4 z0G=>GZ9JXV5kpr}Fc-pTVtu-NAx&K2A+x2F5B{=+mm9#nY7E=6{l4)k$*@F><*l0l z4(-`>Q-aYaay4sB7T(Yo&%3D(Och2+*)+Yz#f zgh4uF4YwBWZF}Et_;g?o$`zmUAf?m^!=qI~Q|JdeJ^0AczDL@uwXs#zE%N7mg+&a~ zcJ;r3*VCGgQ&ktnIR5cs^zgyx*!Bbzda3Qjhjo~wZ^&|j4pQG7l5-5&Sgya*$qrB6 zn|}MNTjzey3G>Tt-pGw60k-7?Tfy;L1+=fJ5*NMq7&`EeuFU%vG!)?afb=Zbcba4( zI)J+c*>e7REp6J)gORZu(IO1vG%O$ZZIMRgTty59FveCX;q5Ev_2k`;c-hwR6V|nY zH~#GjoH8$=t%%>D&$snhFz3)B{}&Jd0`JH6)yLQ?NKD9$C|mNNP^fWdg=?5IHi}Wc zIJuJcas*&S6s|DI!%axSg}bfN#uH80i!~W#teemOWQjo3k9mS2R6_Q+TAcn-#*_$ofdOM032bGMPwF z-BKF2t^+8>4u!}oi(qhc7avBigLiSWMGQVKxcO!z!>VmE$rg;EU>r z%HcWqfA)}n3=mk3=ZmsgSrJkq&$3(sU+Aj+Aupp~tJ0>L;a)0$eAqG?|3RtCp8Vqb zPbpB$lq{62IH=Totzs*47T$uk$|KY%5vgMFNx18j`Bt}NeyO1hSO-n#0xzL&5W)Xr z>Db%m@SrLwA0d+-s%pZGCy*QGm{d_1iXedY*n{+i`MIxNU4yM%*o0#gkv9{g@_OjD z72GWMxF(vNu_Z2AEPm-W=}uwao8CqV4eo=a#gu zD6YTPvCYJli+W3@|4^pbPI^C!uzwM^=gmid(zrD0>6bCB6}Q{W_1VUwW-;&GZ(oO- zs;y&k%>SC$A)PBk{keGap2+v;ke)eBwNR%g>EL^@Mv_?g71a_I!h|-4BRqv2lv2w| zv~}3e`{!gUlm=4-G1)oGSY%$q{7I%jep|Ea=(HllQP9r+=|!T;oF3+!*Ntz2(1+@e zn5Q0=DdXd^HuN6?M`Nbuxb4nl;#;~-77M?W^Y?ZVKX_n=M~j=|Y*cG=*z|udgD}crMcx)I&y9N9P}mh2JBxoE+@(L(Dk>mm#inL#hVO z(sE0rkjTeiDMF1e;SX0Npng;A9PrSCbIWT~kv3*^I#TS*0_p5k=B7KPmo){F?QIE@I%oQ&^ zNuuL${DQMy^N8nLTF_b-Hq6w{&eCJse9=^~aGCD@2||Z*C$vG9YjO|a#)sY9Ne4FR zJM?#yk75B4I2`sAIydFOxNJy+Y=&vTyG-d;==dm0rhHet1a&Z_z`XXHzn zm?yI_HE-aVFlo^$1X7sg_vjkhzts6l?lD@bS^iRbF_vTiUPOWL5u^mk+Cf=i%W~@U z;MEg%W1TqD;C%|%NjiA zO2+;nC9wE_SH}UE852*erzIfFrfTqasMNf__o=;)fkU?o#WnJp*Znf;yj6P`gxEBd zK(HyMm?&FrQpIKQQJ>)rP!gBNk#c)2$QQG7-vqfmVQJVhb<;wQL92R}nVnbccLYCN zkGa|akZV^Y^2Ggvvp0ol$9$WkBddGQD_$lz_}N#}OFvYgnx72PSb0;tA3o6g=F3Wg7+7cl9VI;bBG*SY;DE1YLOQH)X$zaw7D5Js{}UP+`((b3v$p% zlpsouizyIirABZj-8dDlrgU%io%Hi}YDz2TF~z2vRGo3b*`Ql~cW2vcC4P{&?d_AO zX!6uB${ytVZ_%fbU;w#Ap8fknYJ_sGgY236IeJdfL=frX1|DDwN6&i{Qu36`d!P-_ zZY&h(S$fY|@>Pt+Uz(t9D)7h)3_<5fjswq;gUH_*O^5O^<-*wuyZiCzx`Y#;ZZCRo zTAZ_nHDkX@kSc3pbL{%FXW0hYZ!}@l!ylw8|BNgqdnYeym(}CfXQUlcT2F{rO?*5# zQlho*a_Ew#+CpMyKY2in@XBLFuYWS^fKXjp3u3Y4tIsN(5^F5D+yMcTK+vJ3I$JXS z6>e?e)-TFCp3d&wqwc!df!z2!FyY0a%|AL~VpZgv+=s4(wG-Uqo|wW9*Sk@t2=@15 zj`&DkS^7Qy!2bUBEuj-Yv*=l?QPg2G)?*xC*~Ln6VT_!)>9PfzsIjvxq{dDr2U?0<4{rmUx zYnVG&3N83=UX18`e9gysi%{f5JD${x&@=L}CN_^Gw-jYBJ<%cAv<3}f< zDm?lL-mp=Vyg<;?FC@9w0dTNdGYw$que(bX)Vt3mX7a0QDcA5K<7GZ8rZf#+Vu0kH zt>5hn`*0XhLz^y!NAu}S)5?7u{73;2rL>sZqAK&ayvbr4$7*LC3WnfItPG|M(gL57 zq|85!PyazX7zC@cid+m`yFob0w60leNSqM_%xZD=17q6HUWwEllP&>F{kw`FJd8lGgaYk5=^SAa-Wd`#T!8d=re>6TzR2 z_=%;NYJoW6{FMppIn-d-e6oh#0B8XGeB*_v7>h@zmKAkXm*UVsXM1vWAh+>#DJ($KUA)= zrQ$(7KB8hR3L?<)feJhpla@oA^he*&>IG(wj(jaaJqq!%WulVdu!!swb(A5(M5fLm3v9!s}A#!}|?=`bCC z$m0d1E0Ss5r6D=3q{41m8NS{_q1E(lM_N`OdpVD-`=;+_{)wrG>WK6p;-Evb3kM41 zKe+dV*3{L@m#sh)h6`4+e%=O-&0m>txzGI?+0Rcikt?@e^qia9;m!_r>RFHPp05 zsT>`h#XGl(BTQ#YraUS)nAAQxptUBl%q?#|5W~LMz|!%&rRWSw{!;b6ALMgdI%XZ)qvscTHJ;&PIxJAv{RH=l<@6RY9Tb-4WmSvKPclpl||>Es0i&6cfk&EUJ0 zItt>lLP{I7PEHLmYumq?<99b4NeE#}MqZ`m(ebF z7K}Vk&{36?$DZ~Jj~5Ec=KqQ_vRsT#ttBX$yBd~kPGK8%ou4pX;xbLUhyP%qgv^PY zHLP}hhA4W$Y`H`v@`E_3MP~8_KfE*rY%E!T?G1NgSY}=b6{y(*nUF&6vem+uviE!n zK+kz6bFP8suUAN@u4yAJ4v@NFVEuEdwq|B7Wf2+TTh}1b}j%_ zeZ_ae-EuVa8_5<=cZnZb9BfX?e+M|V;TJZCp``H=9W|xYYSEW@*8U5AgnH)dw*AIu z4I?z0wb0;0X1n)p+q#u)h4*GlX3)`c456?20s_1QwrpfA$S=pHt>(pN<#LH%bN`;# zLwNW4xmq=`@ z^+P|Y!fZlTgpkf|5a(7d?;AcV!{HR9#~R<-LSr*Z%5&^6XCO*L(PFrcH%w@JeWFUU@4Ht_WL|Cbh zDg-c6ytl;WeayRc>*_e@GVHC8M8HzR+bO*m0J6wgE>J!>pwFXp{1td(&=C`IdfCIn z6R`Dc$F#BbLP$5lJL<}gLrHo&?~6z~OOwJgZp;G#y)>Ipr@pJVy*ag_PUCvbZ@j$9 zd2_qXeRLl(jyn#y)pZRKQoweeMdj|sfu1Vxk`p2_qgCy5pQa27$l`TJHsXwRs-RbQ z-gGVKkz=t4Kv>U+r%*}eg28?|W&cFk#aBevO|2;>3mvDB8munDY_@asGt?zpDxL4F zpFfrA7xc$^=TO#*Y3}sCyVlKX`gf#z`FD-cuBgNaD3^SM(X&W?f2)VR_f>nq6ZURr z$-T_KvP&_;-yTa!*G3(J)>KViL!T2#buinjbig^OB3M5Ug{zW3BFW3J=M&@=f<$THukCQawY}lSNG`EBE}mc;D^%&dbYj>D{yBU%$pRq1hK5(ibp$~UG}On|ClB^ ze?Oo6G2;j3)ncMC#oUjEyB+DFoqUA-O?L0U_Hy|&RwjVARHPQaR!>elI?^5e7`RFW zlHWtJ0AVFQ@tZ#fM{(@S>j2J@1MUv-zaiP8ztMwQ%IW&g<$18?NEV#i**Pc~(4?us zPljg+^(t{Jc@w&Ozb0bm+spaYoiFWI!|KYvM?zS^#WhQC;RIL!9wLRnaUZBGb8uVz zpbAbX%opeRU9m&WEg6YndA4_>$Q&W$xh&c^Ek5OH@qLsOx1_RLueeg(eW+puR%x-_-h~CqNqv_cRb_0397rThPmoI|7 zJulDoBOO+E&Snj(CO~$8No%dV@j?xtlf@JS$;!!U#2VL~C>bXVGKfqzY@I zDy>HWX{Ou-dbL`|q~Vav8haBs_GVM>^byy+SiVLc1&1fAS~4OR+idzwcx8^sic98E zeToqdmC|9*qrElWor?1lXHf^nDk=OAw1%+7f8hmSP%Ey}`;Z=*9ZcfSVrhA-WwbC* z)Ro+DknZER24ZyJ1pAn5N=?D#VZ%EJ3pmzEc47mx-am3mjP1vG+x*FOi z3)*m8#@Xt2p!Yyx``wH>X13)<93e!A5;}BtI}2&?cqT&zY8SgF?0aBRH540{GK!?= z^$8HJDB(V_0_~2gK0XimwuZg z{dADiY8>7!F#wEIs;52$RxnX3<`f#|;J$f2tB7ykG&6ucp$rRN83E9}Nrl{&^nXfq zL{x5wDKLHmLPO5u%rFC__WVnZXpT9q=7SFb7hUE_u3ef$1!ix>y3$2dkswpnS3sPO9aODqN6LUOT z(>;?;7eD|yw^Yd1>CD_pU?i1K={At&)7`~{KcT0k_(`Rv7EcRrrHS*R%464CDMLd0J%jE8?x@C$18YLDAHT8x$+y9qL?&thf$F(7q{rN2Z-5^)+Y>QH+jkm z2*W_xEtzC+|I0Xj_FWC#*|4)J4CxF|C=jz1^7B!KdcbL>imf9Vlrf5G$` zy!Z8GukFfCJ+9=!5d>Z>5TVIC9E((E*A;M7S%VQl_3PufOrC+TVi^UUu!wW*C*}_ z8bWe*9`6i4_ODQh!NUlb!!02* zn_J%|K^J?t@=9K6>v-R^xF{r>RwF7_?F?%mJNKtz`ge_bTNnsHLX;p`ZimuLJ(zwL zaR2~UK&ZdzAr)_moW!)Yo|S44z26R_TPjOFEBnFp(8J{&fUFO0Bk?v6L9R-xSMi4s z11czrLmZn9c@;Ox!o@qom}MzqX7JtNzINOMCKXMne2q?=T!MJxv}tmcLnaV_uJptq zF5k*uXuH}#Pvm)<%^mdYs?GiWR+GYJ5@u&^vT>Lj5SOu@tdUnPu0_Vuv43e+j5 z2hv0Ef5;}+79NE9EH&+-9K17^lCq5TgOkjT&fFO*NGwKmt1!13VJ0A@*yEe=G^iTB zEPF3TsQ$MQ`2^8%gMZxja6H{PJ2P=ahj@78mGCD1nkVM9=Da2nk)d1mg2!p%`*K?P zzoJprnW3&^N%~c0tT7II;@I@ilF#Z3DHRvC6mrd@| z1(uwSTSPQehsP!*m0Vx49ewO(G#=?*wGGA$qfGKC_^*2FIYIJnq?A!~CJ|4&z&ppo zhXoYs>JuFu#JwOEroK$qOOsu-q&yL*0aYQn$JDbNEI4blgz^@n)1|-@#GrXbGirEz zIl1IrUEqUWGS*robzoR<4DV|o=u7gLsETjV1#*`3^T}yFqwddQqqL_!c%a9AtP?${ zCrXi$`q;Uk$cGfsT|eH!MEiuxkdWBJfti?~_xFyZ!0sVv^J}zw^oq5g&M6=Q0_fZq z^jzYjVnQgi48=x*JmwBWP%XvQPxU5Jm+HLz;C}r8kI*;)nx3?k4&)!fbj{4N; zW=VLN0-YnTcL$$gQNvh6T96i^H0QNCSxrulhECQBm6a`hJh|(IUh%kpoC&2_=3%ms-c{?)3f1&bgPqvJg*{!yt5e1UeSa|05Q00)N%~Y zmv9%i_t4MLGUZV+o4+Am!##BiTJE4;KdELx7q*|Bo+d%#4eUgn^P};#O?qHtIMM@M z8&1BW(MWxCdXnE5FbwFj4_$aPc70*YgrG-`&Y6M6ozpwCa`&upK!?S{5PLbq%#_xY|6Yd773FyXBcyxu4wBEEi7bU#d6Ps~) zA{^Gcj^mryu1ynau;in7EA3nCV4~DmaMPe|$0Hg-Q&ec(wBBS4|Rb!=l>66IwM9 z{P`FGnYzgW3qL(OeirC6J@c~YoC$|qf5O1QMBT+TNQ@(ohf-mme$$C@yh6YZe~)?PB~TOk4YXF({Q3t`*91QKi$?($_zTn$YOcZ_f{RB-&ZY_Serj zrbJ;UZk@t^mLb!NxLPHFw3m9szBT6Nf;i+R$JO$Lxpj`@kt|C>k4F!WBLK-&^Dxh9!a42P#(Mf1c&#GY0)o zP{p>UA2@kxySqXcTdi5#z}iH24WrIJTbrZ~Qep`e=> zFMq0cOn1pOJX|Et+9EP8zy2HW0+c~)PK~fxlbku&9szC&-D_#RwTG!@IT}UBplX9> zsh9R$B>QJzEc68Ox(X>EEN0YO_LlM_0#-vJu-m^Ymh5EQl%0uu8*CcnF7QyF#^D;P zlpABL6sD}+Z>^U@aoB5EivYXmS~@7OS?NMNgKh8rC57v(_g0)QMrS4#R4Z^GTFU@J zL^B(!(eWj~;d3O4<7VI2VR${)Hy!m+Nc35Bd$OiH`*C*lCzUx#jk*gW?md@SD5!zf z<+Rrn+pB$Jq{^2neQ4Y~i-DUGZtZ6o@NkXl>Y@Rs1MK(Yuz(4%DQj|3$A+w*$$!cV zKzix|Ud%K_Fu)W;(4Ni5bhNstAM>aQ8fY8ltyE}$f;n;3nQd2wE?7x_YqpUC7F8tb znExQuPICaC+~%afd9TM+G3=nFpiPF(ia#lqPXH>)|_k=F+H}+k6ou3?^N96c9s7+F~ zFG_jt8$qXhfIW79%p@%XKAV1slYleN+-jy22QSt(6g^S4vyd22w$nyMri72k{gzux z57qrB@NIe28Ms1rfL=)KxMu`a4>ZQ^aB~NZZ#v{pp4Up%?t5P&s*%ZXs)bPvS5tMQ z+SH|R#flUuyExz&=P8}YjpzLq{t>IaT!*l;ik6wnCFNQZUV~JY#Cslv|1Cd+n%prztw|(xMGD}JYT;q z%f@$7kE;9)MK%iG6!>ILi+sZzh#wFmP*qOFwsgV_W!`xVaKGzj)@y;HGS{K-7f|O& zn<0E!Z(yT%SSY6mIBvWe9+i9nfV8;jCriEM(WYdP&MjG>DSGSRG)6FU@}OczIE!tA zT?qSx=KZ5XcC6I#QzOU7GCOdQof~jMfRwaaWJ?MX93EUaOAn#_)YImZcD(28jpM+a z^k7uto^d|Wmi>-la!$#%yC>z-EW+@AtxJzxLd~lDqX#_9VMTleU--o!Fx=`KX6S&& zh+G?y2|y-WQ(~hN=2(0tcIkf63IccV-_a!kYa3ShYp zVEv}Z*1+O0%Hf>j?<2cP`(@*QPpP*9#%eBs(`g=(4}wV|2<7AM@;bGzBY489t}kos z5zr26q5 z>F9TdTbfX=5c_fS#8?uF8SvLc<1L-lMBO9bUa_{?A7W?KHAA({4yesx4Vfa`n}l$; zBH{$}pB+5wIuj){XbbT%*<9nN#eBt`u0CHm@Ag~&%_P1j%+-63$h35l#LOh8$lrk@ zf`zXxt+g1vh1uqo2NVxNv@Br9J_++EuB1xD`6C|!xFHOT*>pE>0Px0Ka7wF&dUqP; zO5Bc~dYbqkIoqnYOdeRgN;~w6ro}nBY%|&#B4j_h^Q^9FhXuB1iOdZ+_40gUzCg8Z zMZvWe8h5BJUGN?i)$+2=_9uj@`CU}?bB?Vswy+S{M@n9li7PoXpKG>HJAgr`ADAc0 zh!h!-#9k_PQyq$dLtke&-?3WTeeTh=7kcErP+&2wG11b<>0~i2sw*e9BH~1gRh8R+ zI=?;lK*`lg)LMk(BN&NF!iq*g-Lf(^*6cAy%UAL06FKbWC z-omAjv+tbtVHc92-Y(!qY+-%Bx43Z4f(T%?t*?>8TVItc@;oOFET`5+14C5TY(fz; z!`H7z1>QWySRxZp2BmKo&L%CX)>M(fTCeV=be?GKM-Jr>L(D5i0vglh#_F#xyeO7S z>p(ln)~`OyC4?nZ+bPd~_5mU()jhMf@X=vtXXc{Br}}DkJ-g-w5M?D4rDc0aHNAp# zbqBHI)Lr(E%ktDn+h}C9GSBUR^)RL3Jb9TbyU=SOMGK55!TI2fBM&>cpI8+|p)b3{ zGLHqg-~)V(WWs+w=VEsriPdN_+m^4JjdDguqG`Yl*<^onVr3KLT;Kt#Eu1q2gW_=T z>p5oxVA7@c<|nHG#r#~$D<_@Xjd@lue? zWm@t6-3HsMZeju<1;)BFm42R5J0@gYX*l0R!Rm&^QWX7NvS!Ed^buR~Ty31s;&7+% zzc=nnADgojLhbli&nshVTn2qf3D*XWN=M5~Z-IG2ahi@S%wK+Q=Pd#eJrk5oVs06( zbEDjn(Iyj84;0v-#B*i%3MJn{uIE~?S%R4`{8bDq&R*)%br}94ae#Sk!Mf|Ha$}Ei z+7wTh#}k>|z{$#~M{N=g&VaxxadXq^3axPg`!ge^+RT~i2=gE&k1IZf$<&$^-RvtZvSI5uJbQ>uAEf>IBc%&yDrP+Cs{8C>H$Gfil7VpcAgKt9 zU1PnH8w6f@pAp)}$(l@59nRqxDM#V*F(b6UJ5(mT8Si%Pm4WOFq~>q|RJH_CqmJ=} z%(o-t?rBDEr79SaN69E=YRq(pZLO^$`v2z^RXqmv@D9;^=J)@b@^hZ)Abjq9wGUH$ zt&ck-lmdlaNnUIv`V;@MlH|v`d^Z=jnSsxU3MLU!uME%$>pYF7br)VUQ%UYby{bYt z(`+;UEl!L#xIgDLtJ<5ah6DmICP5IMPwEUGg|`X?x`*ztw0?#Jbk zVQI}ML$v320B%*ku0?=CTPEBZ8>pE$DLQwClI5^OG1&R*UW}+x``+S`HWp683M3|< zick?N-%XBiRv}Gy`T-MQZ zcK%dAhZ3lWb5j}9I%*)i(PzO-@)$s0bG3JkP805|P$WX{2iE)KenVj?w_Ey{=WboS zk}-=y?jecpBCzwJ+h>Mz4le-umpxhj)NbSceCh1wTioaq?acE}!KKcVH@)$eKH~7_ z$nH$S-rB*khz%nafvyL30i2vmktmjduAK*VrCRmxjHrcR6!@Z_mQh)}c90)F?K#dX zeo66R>w%qrU(|0Y+DelM?_SmOE(oKW2va8(`v<<#EdnGl)E%7h;ad-YHsD@(!mHC# z@}9{Ta+fpap=0@YCvY?nCzxd{(Xb3J^9$*A?9LyCMeihWKVIW_y!zJ@MIb1fQL~B` zJU2TWk)*}gS?QLnW7w+OIHm)4L~Rue zZ+nZrcfNkX+KD4BvuCIrOcxx!5K=3m}iAB7XF}`WuHOLtzHK0_2Zp}GCq07=L zF<4u#%?pp3sK>J8jQ|WM>%j9Qs`XHuhuC!^%TTlpXIVP4r-Q}7%369EN`nhpFewHi zOY~^CUR>HIT+@M--ad-?HDu(K0dX=YU7wy6->EHKr2C+1gikE^-@(8>J2rvpBwak% zZV`u4P0AGfvrZ^s&yAZ~d5MIax%mv5rX36gRme!Lg>Q6bRn~3#r0o*odX9D-LNBI9 zBHHs6?r1V~1|{wqGQ8jD{N`&Ht47!jI`OMm=u{~W%gAV9a_*vL?N90`{C*AGYeA2Y zl9K~-zqTyCL1g=4?U(rCTqF;V|9ZRTfke}PKP9|~HT&t&=lptQfVC|f&SeC=`qpgPlj&pAFC z;262SEl3`0{$!E8tUv?Dk#g+B>+2=Qt$jV~K1w^`@S7$`(BJ{velR=l;8#6r9hZ}U zUXspuXSsm9qWa4`fM?kv_T*WCVPvl2X{EwLJC^09gKMc?w1xU}PxmBnp(eJ5;XI1s zamb@DueI_Nv;xZ;uWdg$SuMgLnTM~)*1su2-Vi%J4B%3FG>6At2U{x&m7QJ7En9DA zU2Ex3Q$%E2xjg~e3y&c`(A^3B)9S*E06E;(5nWr1xg#-86|bbqb#qzL5DW;<4f1w# z7D8ALM2a^~@{%Knc7$v;nh(viPszpLYpo@pcB~{7R@fdF{&NjNW|4krh&XXyjGq7U z5=+%kSl=; z*}b?lb1pl0Dzo?J*Mm%n2o%9025rZR)Wys32OTgo*UR)p#bMJzR z`TI^=)9W#G&QuN(|)0=3$St zX1%3z4%U}sT?IO9?vle`Y*$>^iWNdo?qr!Gd@e`68x@>5wRky{jKcHo)Vv5KnJd@H zxX3rnEAE4ZQZaN2n*y(bZ4VZ+V^7{}2=7+^mno}Oqj!d2N3Uk?kt{HwmlSq16-z&9 zK7AytjIn5$KyTOCM9zZcuY&D<6UfOJ@iI}lwqw`1Q%oyWvnko^X@z;UqGG9K#x54g zH3(^CTMc+zhhxOHPV;nE2Yl>k0z1!=!^S!tvBM%`uFdWXYoU{(b|8!6fQ@B;s=>yt zl3*q|W#GF~KVYZ=U{GYMj+Zcy7r2SkGW1DIMf&XQ%0|UjDq`q#V9lHroxhxiq;Sc@ z2**d&{4>)zw%N_bSqqSg#a8;YeN@kK7veTdo*jjt%f|0|QmuJM#l?+B6>tzMtU(ig zolAF&RA|;4tU7c?K78%)oj684r-WZ9c zdHu;mB4AdBw7`$#QHMW$x~8_@iFowd##|Z6HF=poZ$%|u_*($u9y+HKRs}C|D12NF zc*lP7%6l}b(a$RMqlK$pk$VZ%ZCt7_d)6dwVN=i*qRc9IcV-}sF_ z#xTTOEF7H#As;!3$I&qZ!CC{JraF9UnO@Ln{MNl;CaM9!VcWc_b)KI3c61&}4ml31 z*gXkJV6!K_%`F#wCYF#CsQ*JWP0_KXEB8#dl&LG%p)CBQcDrlFS|TY{u387Iqd7FI zQq&WS;VLGdJlETi@jy@-O_8-f|1rhIZT{(0{LxJns2L!k;jo z#&yvhN?4rZ!~5L-7p-Nl@>HE1$j{wHf9;Aa$Arw1Gw_RdSNqS3^-1o%qWp$ke#hb5 z!P)T_Wou3^JB^)putx=CvDt??vvJB9NfA;$IvU0!Wwfjz-VmY>1$zEeSgn=AJg8h(>lz(Z+ZmLe;64@T5N zmEd6h|KuZ>rpgOB)+Zn+Dg|>)m}N1^UR)fUH}@wrqsgB^UL!+9BCLD(^m5M2u6}otM&J#$lqEhRSpa~y6t2g^RJqWXTC2%jnZ=Hjp)BkU7A9-$Wc%kv9HWK$Uo7`1fJb$xQnIid#DE6Qc~cJa_AR}-8@u3$Yxt`=LH%Bh z*obWc8d{I*pN%>Po??Ah5~9~#Gr_j^E&jA1Vdjlx9hkhy(RmO+;=kuwNJXF~?5 z48VRRP3PCIqW1Ab6J!MW=$=-i7bVy`+9hWnRepkV+kijv7^Ip5fgOQOUeq1`&m?JE zD0+FrmP(=lC?7PHWb`l>;ZaEls2>jKAu|zKI1)L34SdNsw*&m{4|uQ-n|>d+Y=o-D z&Tb?t;C$WsO}#X*1=&N*5h%2b3j?M(OkNNpAw1PigtWszya14ZZ%mTAqzuQ1(@(mq z3ZYXnLIGHI6UD@Z=T>8JaS$yF@#i|{wlR^r8|9~6>{3mfq!!(Wb51w0`<)I8x_QwG z>FXP~Dg}StX)}=iyPEC!`nvcgpjIu-Gw_0A9F`~42;nNnM1=>)mO00|2?Ogzf4e02 z_k-&TO%Rt0mu)#VdTK*D>(d*ZEe~PFnfwQB61if*(f0ohZ?~z^3w4Ro-tB#wu?pGO zBX=xx=3V-*Z_o*k?y1rI%2xw3&>pv1TZVDh?Ythn>||sKXkl*a^oJ_pCQ@-1_la`?B{La8~!75`r-7 z3S0SH+_SkS-Ev>lKydnARu61^nYp$%Cj@+hM)-lBX?D@e9{lU)n7-HjSpYX7_zuX; z=(y2Wh4m#hY5Fo5kQviPxV}8gs0AxEPjU2oRM>~mH$m@OqaAR`HMp22z@Ug(fHLOr zusJK*wAyGiHJ&RV_G|?azXPR*|2#{Pec0vGpFhR0g1|LKcv1X#+A1ZM-oMRw1Y(6FCPksj@gA%GIjq8Vn-tBa4O}!1gt0KUo~4u{n`SJNM1!7#pSEo_c<{Z%9*$`f}=c?OR( zZ=cbMN@?~VX>xmpqHgMFF4>(%K#CViRUfUxdcU;eC;LddyEg8PB0%q0ALD{EQ;O~C zKGW6c`#YR?^QieBTaAQ^8hNKDv8=8d$%$BT;x|{2gxxN1FSyl{lCr#tzJdaIWGWF- z7D9ZsH2){8YTe2&t~#2m88F`U%%~sr8S@ypUtDXWvg)2qFIQGe9ZTLKS$Z0+PE7=a zO=*Rb0@^fSFv~I=phNZZP3P~(b5s;y(h$ciH`q}OTSE8aXOy%D6GXX_jdE(vcE(7E zY1>!UGIzt?y5x@b0HnmT9%{Et1Sg&JrC(GswB!)6^8Pk7J?$bw4uuRyfB4Nr#*OF8 z=PZC5g89T_wwnzQRH#GCrCY9jwM2MoYT9^N$2A!Rz-sNIE<2Sj4p_jVJL+qIg3S;W zIj%9I+6I%Z9928Xel!9*zI~2r_<;tY`|?ud21T@LLvcOu3@@ne|eYoTm!B&S%uXn#%>`U%PZX&~AaJUygbgzs+C<$~zMQ zal*V6gF@SFF8+MbY|uUZvPv+5mEeRgJ8S!2qojh?|8XBub?-ho$R`SoeIguIReNFg z7d;CFw{E^|MgZ{kxN44>8c>65c~Y&rA02aYXAQ@;x8JenPrAi?^2T|NBKRnct3mrI z1+Cifh1gq~z_glHZ0Ec1szPe*E;kimUMHi$&n#u&pK25he!A1Wt4qX(P}9+e=*bD7m=H3h}rS3dui`G^ z>;CZ{a)9T*XQO^7wevJzCfpxhjmtpwk&;~lx5cRuG+}gcl@+b$e^}+ZjiiYvqCPb4 zW#G2$0yJi!WJfF_@RWvBE0_ZhVd>`-5QZ*z zpZ$Dq^b!}O62fBF;~o~yQ}*`HqHd+ZK;ehv%cVydO)&g>Qzn${7^X|kLuB}OVRV&x z@&rzllbURan2w$xV1`z#uuI2&H_!5C>1D#f2C8m93g<&I?SGglkv}!Jkl&yX#S9~@ zDhrtfRTyJwen+xFVwb%=eUPY!Yrbi2U^T3uZ@WN$(9srGA`YJj=;CC*qo-^m%GQH_ zg!{E;ARP_~hc_@*HefWMtlL#*j~QDQ9eeNrBjXuik}yq=2J3jUsiQCQqf;Z8mmvT} z1G>HU{7i4x%f=z8OXB@876AgSHF&Ds(~T!2%q#^U+FlR>LRebP{#|p1-W1Q~nVpzV z1^d|Vt=Be-?9LcNmm1u!+~4vTLZGP9-&rj%yWQ8HJu z4@ou0x)ozztrx#xWmy~d>*={sE{woP(E`f=p(d5#A9N$J7SN_4s>S7{NR zbOzMeC1x=>2!pWgi%qL5VSw(nsmfLZb(I zl50cM<~ut}7+TksL3bQ|s7UXQRS)fUwu`H+_qgf7saMm{h~-_>0l1xE1P;^UFsuy- zo~9u9w=BQjcAKwi-@g_TjVtHLNxgbhC-2v2nZgP>dfNELHK^C``pVqhG$azTYN zZ!+VN1v z05+>k{~kAcGR>`|A8Ms|>5`)IfYFkaV55h$`Lym-V9kWA76HJp5>o1NaHz8Ld%W4W z;%l<9O|a!_MB9LeOy=mS$AH4^zv+#UHCwNfYY9Y`ABRZDA0d@1vzvVPi)g9o&9h9u zNr{5S2jU*Y;KFEM#YtScx@Scr&a5F%96^R#253ld7ShG!1NhHWK`1iFV&(!R<4|p{Jefq%6tEA+zfy7KWv>Or4B7uOkHPexh!vRjBA$r`-!+hoR zHS9lg9{vM}5K4b8DSNkuG+#Mktm+N-dz=486#fBO+Mgnb8V`p452+jR%NWk4l)DDV zuzaZj6EGSVyp~DV;zM(8#C2V{RoX@)n0u`iUJciQKw;y)!X70 z;45bJoSxGbdZ{i9Y~>5u@a-lMgy`?6%(uH~(gPlbf!w3Y15KgUvqrDcniLLz!+XR!9))7b* zBIkqyX}Lk-Zta!=RKDNgO~%cJP)e$|I}Eml*&!Mcb!IBqmGkAS4_HnEODhfzUhj{} ztJ>=2)-r)ofiR#tTkD3~8;5wh`{`(xp&Wcl((#V`*AY?cxx8~Kes_#gO0pd7=|P~e zX2#}84hx6S;br_dgd9(T8+I{oHLVzA+94>?@XH$O2~;P=#u!iLLd2N`Q&7;AvnVMx z$t8cZLo=yuNWKHZhd_UHEG1uaP3neYIOgR6* zg`Djbcwcps(5K`l`>3A-9CGMD=fvu4@i;l#QoOu=nlhTZJ?L8eO4hS}4wdg0D9 zUVRw%Hs)gCw>w~@>P`0CzrSy8YninM6n!(=(rBYs_}!zJiIh(oIWaXfXNgdat-B&8 zc;*bM7dluUkpuf)9cHibF_V9xV&E?J5)%&zrrI135P0o7I?)o>*pF7v9C3L>?sQTY zf@tn$cmjUy7~xld?{tN%+U&iZ?KxPS&K198bhCBfk%IYRKY5&R>)UdxA!INeG<5!3 zqMFN;Dym6d00%Dd?VzW^;G@-LdZjCW0Moc2{TfMOUm?AR>qC<)Q*RF)4&XQZM+XZ$aF`a-np`ILLR;6C$I7R!e zu@uAqaZK3hqnp+O6|~2mc3Bq~AFq|teY`JK>z>>I8vzdDL_#D8S(&f%4%6~^GGza zpGFpYJK@IG4v5&}$HY78eq-5y!WzqJ++?hYGbb8h+X!g2*;#Z9M}P%eus~|{`jX`S`hWc1 zL(?@&_F0JrXtyLVB)-JCAK`Qkfo;TR#vO(L| z^u1U^A-oZAaPD9IH}o#i&^{_~)Qx}=YBy9P-CQey6^b~0Jy(Us0hRq;vhlqC7>fb( z@QD2ct6h>3g5+@6B0X*?QnfapSZ@jD0LXxA_)c+O0Gbm20P?{r6{KYh~x-Vw7O(O`oeDX6jaxUo{ z?9mzI4wf(DMdI?_=s)C5kPLLPmKcVjzQNu}u-)-JD?@N^bpKxoUxw{j16YTgkX1Vb zal_iewNEp(wWB2|(Q}=MTt6RYLZHz~m<11bF6=?P!IIhLWDkkhalE8Q`;qnvO5Vz& z$Bf*TqvsSM>S1!#CX7;9Mz1Y26|h6U>oN^yP>!Or04Lvf^F(FiUy;tDU9W%yG6HuU z+t0{TBuqn-ruIY{E0>i`Z&T`0jfsTYJV#A@WH?->@hx@g(2YMx;B7zquBH(@E4v~P z6jwfY>(0vCYK~0At)#&8Wlu?hmBXrFi5rWNvlOH(kf5Y(Oiyv=5Z;~?MN^Nbd0g=% z8^Z@}h&6VMI+mqBp4U>SQME!eb@_oM%XmakaKj{s98LlTH8?^@PQE+ngFygY2?1x) znGk2}qW)VD4&;7c*?`CsWc0(&OF}L^O{Bm!Q{qPdR*QvhIOQngZsVe`n}E{5ezPR&FVm^6q@h)VSmKuR{z7N7uGEP8UDn1 zg=#V<_2ASB=!VTRP@GX2ijP*nHpVxm-!HKm+IU^t)u#J2Wm%w3Wf2?kC*HnG{N3m+ z>n{3_FQ>=JL~cIF=gqZ%VAbPjZnv*B_F?B;(|6og@nG=kO2iutd#%|1gc;8`Ra}%i z67?-trT?q*DY?$Rw)Dn=mT{Ky1+`XYnhR)aE_^i+fe|T>M~?w;vLsWln}B9;g*JTr z_$D;Ir152}B|8Ly7kq)<8F14&KrrQj42mjAhFkWf@AN1SZ!HppJ2X74xH(OQjnrR} zGI}VZ+`+3aFvNWQ&w;f{yTwBR32L?l#jO@S0Mwu8wc1lfgOV2_0=Vy%)oiIJBGlB| zqx|>$XWI0F_0=J;r5EY_SEf`2DU zD*%4hHONGs3a~dzmAPMk+3X9IT{bNfz$5Q%Q)p$&`Heg`V=vCjXOO-A)3)6TreppI z$GW|rb$gtjr{qZqby6-Hc)7b5w`)!#YG93UjeIiRRg3A<JVVR0Wq6Q-3s& zdZ4gSM!4Rx;Jw|IWH%I-Q!un`Mv)8gPM`NHCj!18X(da%4upt{n#M%XgCo1cptHcl z4QSO%CYmxUMbu}gpy5;;0(xKb{lyt}nUlVCIyC%c@&#n(F5YN*+!DXBeU_}vh*6G5 zEMz1+-vfn*qB`4os*$rQGVKDIm`u1pc96~5Z8L;#Axx0hv`=Sd!;;PNKDJ-b44Bd)5N$*8n| zGOD!g?;8>oZ-`7IEJ=Y*M-G^c49gObry8; zJM}=$0$+Jx)L+XfD+K(8q)hNgqc~y8}Sfj zSEXwh>RxYt-z~$&%M44A-RGuCf5K5epS=U?1=ef9^WZHf!F(qikPkB|4V*x-$ccSB zsNshK&YAwCTd@eR-!iFKo7>RFooo|~PpjG7WJ*0e2ms+6c~+BuB%#sqQ+TxjtJ7g) zniN9<$Q1vZ=pR0+WE5}4AxpJUZ=Kj{YNpg-J#VYN47ZWBO^9$d8y!XoQtxG<~BSc{FWEeSY36qEaug)$)so7G%OkkVO#25%R&X7)q zertK5k}^TADW-{F;8P}oD{L?H6ISc1q@Yna&`yGwQI>Ti&g%!mxS}x4Lv|=zGy7!F zNehrX#}q`AuzXK<33>0Si}5Et!Y2Un9ngkJXlFh-P)Ix%>w04R#nI^oKvo#(UY8qG zaYe|2?$Yef)bAIMU^6t`#M?HPNR{vuvFGx`#LO_Sukd+ZK~ZETbwsnluFzLoc}3RR z(@qlA3{r_ojxa#Oh%gdrBeU$vB|d8pAqQn4*88;6jgBF|R*L3BH?2-OXWh8mmD@Vc zt7@gEK({6#QM~egR$1|RR(lVuP1M#F%Hkdm(l0J_uleP1D|IkL5`?CemX-DRXBaE3 zhe(7K-~=AdUgg6lra%-R(s!rKFg6lRTs^Xkm3nl3U_naWI*g zAWE^e7t@){`>DVwev~z_-4^U5B0StM+s?Lw*3G%}NX}NRa*UZ}UGt_~sD!i)Wg(9& z^e~9Meyb-od-rzlrD{D}9%pP66*pnS2JU|5C^*#xt=3x%bVken^;8^ALNn;(F~_~l z?vf-JB_{_(g1?qdbtp;?%3nGr6o@pPLKzd9W3ff~LC|YIxggkdoILg1v~>8r_>Xuj z06~4SSv9eeGi?(FF#y#Y@p-pq0Nf)RS~zQ-JOrQ%>X-I?5LhSi5ar&!x#1Er$qpyr zj3KNja^>3fba7RCcO6PD?~}3IQZao+3sT9Pcq(Cs-| z3?i&AbyWUL?-#~^r_&&6g-}cK(1g1{(5q4uW_z(2EnC7Da5F*6FXoG0Y#Q)ju{&#& zst?R^Gfx13Batdjv!2Z-26cTarlD#}-&^{KQT$am%h!i}ziw=1T?63Hbk%mSeV2ef z^rQdBDZHXY?@G`;r=6@pv-JJ4Lm28&A))82`?$ z`g6V;uGZj&Xw1n!V#2nC4%M3;AuhV!b&#%vG`tNtVtk+{|LmBh%lL?|LtHT${rfCG zlJXZ7Ax;QE5}j0f7z|vG1?Ds*;na1ipJo?IhnD=~jdf)sNe+>mGu3iaxLse$Ey!S^ zwKu=tF85~Pk>~*0YcH6mv50dlbU8arLP{;<3H&mFD#nmNF-@-H{NesTDN z{q?IPtc7iqWJ~m-0h4s4@x#8kw(oV}t9w!%~sbv)|>^)ck*NH!#E9`Z@ zIx?4xlQ}%o+cFjm@+5(fT%x2h>85$@V4C5^BjE?Tv2=79Mx`kGjQbp-k_4w4ZYm$2 zWlQ`EKPK@#FlSM`?KZLcxOwAVk=2Y?SRH+Uqpr4F$?gRUbPK!i;ofHU&lIGE5_=N9 zH78%$nVds7WD*?-;Ie{A1do6lO$bxYBhHnwEIIIA?9(1>Ul`i_3C~W$mNY#!P0K7l z`R`Z(H_#DTtIZUI^xz1U=^Z|`u+sRaN401ZK&LU@Ef00093 z03+Z4000dEo?&!Cp8x;@001Z;0000D0iI)OLZ1Kt0{{RhApigY)+?x!2z>6yy&=3I5E@MZ3=J20pZH7468IEGnb40%k7G=myAEl_lRmi#-hl8 zk^8b4kF($j*Vg+1B)oXvdT+APVM{8yA+`YDBm|gL8ELt`vA$y8vcB$pw5pArs<2CAi>9@f!6zyqGrmF_~V5 zebUp><%+otnWQvSgi8Ywmm%_1<^#J5HbCc%v8l%c&yCifTxQXs=T!G|70`()P_Ll# z5Uuh(apWp{oQyp>3*8=+!RzGD`buFA^wDIYkpgHZelu^Y9Q$F_T2>><6+OrWQk5Zd zhfbI8$vO3%nDr9YCO`~?Op(iP=MU`v9+hO90(@}vq8WrOP9ACZIUYW4W3)sQO0GMX zAhUSmG-zW{gN#H&qR!xZ67@X02upDaa<}JF){rLr+Dg`Vi{7^mD?jm( zMY*O{l|E2SII;XcfuEG%Q)>=s<>`p#%$q&;V```l>2;YpN92ps-^nZh000g_o`r5v zFqZ%T0{{RfU;qFB4FR5}ZbF{`00RI3C?Nm<0+T_St2GEMsWO-c{{XW3DTA#Hj-Dn9 zMr`+uvO(_SH{N1fKrm2yDm65M5EgV3MmSf%bXgWk4&ugf9?mcEUsocc?FFZuyr}gVzds?mxE0{%>7Ov@L04NnNkH-w=7H$oVBR=nM+A^Q2%N_f02_ISydl%=gXF6 zn2~zpN-ri*jbqskP8*@_^$9?aR@EcjQOhe=NO~wjK5{_CKM;_~^v-@3a<_H^jJcv& z#+nMBs;gh%=85w+mvtA6_GkU=CeG>^>FShTciAuD!ou)ma5Obsq)4jOdyV^O`8($= z?g;4`7;{#dtJIbqcT!BNsU~ywGrILlGCyG~f~WRj7*hqC0?!WZ&i;fGLsw}%=~M(8 z(uVgo{u%J+{d0;K3VLtsmIIHDlQ8%QZaIH;pEZ8`)5!PPl#eb^?T$9(TmtB63|gSc z295o=&TE4k1@F|*odD(5YuJT~a=pQt_a->qEw)Mu`rf%=Y(-BA@}-Uyy!eJ-7EBC z;7Af1s*}ETPbqJziDK1M@vh#G`I~k@jg8pVMR0!eiiK>8ls)j4swzGnYdISSJGSm1 zsd7Rb(L&Xq3<-kxSR8is162fhnfeFR{=hOKe&iLikhaly*`4Vx zqKYik>~`u}JQK7$ce~+~vmjF!8B*w>5r8z^smiMU^kie8v0{vIB`YI2=<__wHeoNb zdm`px4vOvAqPr9|LH&&2zGn~O&?#S-=}d`^LWa(y(BL*PZu#D2&QGjt0yBHz=}jvL z6d8e~pCP&Z-N}pQH^ z>Rk|$p|!u(FsKBO8oCk^a(lus8Yhv36-YWd?xQM4Rh}NKVxQVSnkkvxkrOop`>T$~ zCQ{U_Qu)P4$bz`44rA^3Qw5N{h!d$j@SDvcp!-JL^+_Z`m&PBxD5Q-XU4EvspfeZ= zb1F*D9jCxW!FA>~TKc&XM)MAEcO$z{T~dYJUt?wfX#s1s*|shz5!B6ldldaLuJ#Cl zAZ))FQFf0nWs_+4Mk)i7x>@4lcz!!X%k0Le?GzbjND% z!XO)h9MwE@sZ2Y4@8k9=TJZIuH#-`SW^;-&wMUgMrwCJRRjte@`U&r3KwFMK;F!<= z?8G*C9zG_Z$+4&yFL@WjIwgJh+B~@3ArICN=6-Ee}3~pRISt zjwq-znCd8aSUYAK9va{8Ro*kvD@gL>J{fuqk*}cuYz-b zAJ=k}-C$wBEk8NEgB@toSB9*q;IvP0*^mIFL@QL`l2 zbZew!?0Zp#{X}3P*Q6>-e4)#1z&`b&FbErqIJHHmaleN%m!`poYaaA#QW^1kK^E(5 zMmZesrDbiMoX&n9iU9{AU^-9^sSqet55zi{$|p`Xh)Z0?OyVF>V=!6AE1u8wm(saM zMv(3ioKZL2rbh>7<4^8)vCkxah{TGq74TOkmih&fQqQKtfo-~jHiS4*$m6*`hN{~6N6ZX0xMqLGnZjYLg!^US8=%QzRuc~2ydLczO10AE7rv{CrGI(fT3gHam z0j-wrw~MF1^d+8_H(6d`GclUeR&LMt38+WCP;LAZ8@Yc&K>z>%0xXcB%uOh1G_Hpa zpl)>qc%Y`WUn_Z?|7s~W@f)}R^U~S=RzGbcK=NIaq|ezEs&5O2gC+Xl9qv`HiD*qD zV!49b+%MZ^mX0Hc_18CmMxV-Sf+R=NCkYt^mNAG$K$R1_4H zui6Ue7F7qL>1MAp3!+-5#nO2tC#cxH>%C&dl-dFRB85V{zoJvlL8^~j)*o|(WU>{asi@5sg{QK+W$E}}yl=?3 za0r5`4fy%iyGK1POrf0vZ-i81vh+;0SPB8x&S6l;=y;_%+BG5nc1LWq@(ybsxDXb` z0Myvr3wT&I;pi+48yy!Ebxwo3nZD9oovoaGurdBb!C6Qc>b{3iZ0&;z6$ypctC%jm50zmr88V8#-QA*q zrzQO1<1He~gH{G>2Xno}UzkZYQa_XQ1I~woBf*clAA-L+(-#bE0ayhZ8&*h4I%#xc z=8kFCJm!J0UH&HQtCMnMJeHkXw9NP!-|eDsSRAz&rE|N?{`nPAf;D4mkWlkT4iYp7 zX{MbHb4y)&TaZu0?bS>or#+9Ct5_?3WWFMxF!y&^!g@?2iVQU?J{*~MlW_GOobxMC zc^-;(nT?eB!oAX9dQ>3^g?ZhZpiGsIvORyMct>r*dlQ7!UCX40q#IwR2r;cvIKV5M zQ*Q)i-W=ak^pPm#U?Ou9XL>3a)O zGK!LU;=p{1u=2{lgR~;t7L}6DoN) zV&g2sLKMJDW^DhvwKQKH>4TCpzcnP;pBc~BSuE)S5}^-+$y~$59!;4x3HVW0`JIv~ z04`ZCEQBp8eGU9iX1x=@;1N;4ue#nPEXG*&hsPn8<6SZ!#GuS*bYz9r0uy`8QvB^S zptfK_ic^AK==N$kE3hh^UmbomVl$s?9fi{JbH0e56OzAT<38V%5MV*Xn^!%_81YYI z&kz(s%ExR#w@!A$(k!dHPM@j=4bl4Y0OM+SU;23KU48otOs$a|cZB)93Yn#t?xXrM zhHyhX^h&)ER@IXa$7U~|qwACZjL+$6BLxj-rNGKxx`?)W41Mr`1cLfKzIKz9a|k~z8LLi_}nfegy)44BLn0E`zo z;CSt@z(=7ifwkt=JKvkIQRaQ!t^4|Fo8;tB3n9wnCSJT=qM+eogd1S&`Cb&YbGjjuel%eafs`JI4xewS#BL~*bW z>3sSd6#Ww;qwYRIxI;HJy7T)pY(i=UDoU0WDRX;*CG&oGTpmivGGBd7z;r8>?D?;9 zz0S+i`)(lqz94#nGU75b0@^j+%r^WB3*$jii2af`X*;C<$~%Zm@1jWf-;k1_a?wiz z?eP0fNb*`>p}q;9W-r-3HD|ieA^s<;YAN$g&^l3c&)3{CUKK1b0my!b3B76p%Y}ZW zahcA7VOoWyOpzWXau_YPc=INj^$)@M)+91yiKY`PvRwHxk{Pdkcfg40?KAZ}vJkN3 z3|P)3NH*u7ZKhf^>laCkKWF2Wt?22>Mt!jHf~tkgAZ2)tm^T*a#k_JfXF1nlzc-Cu zcqv$@A|S~rgQ2MU7sr;w3CY6GR(e=%^&Wi&U*(n4ZhHyWnHtJ)H4%|IPHq6{F(p-D z!P8jxwx^KWmcAsn$7KcF#_`1CtSFX{9uB zeW+OoB9@5Zyj@t@k8eEfOzdjwGpbLY6PgN(E=M%vEKqW1aUEs600Ouwfhq^+oPJP` zAdu7YLjR>+W>fqL06tbH`4~623Wv7Dlu?_Csfm6# zwaTCu?x<{2oyPWY!Wa)q&~Afm|STrj5V9Ck+AYAY}eCqSXEwme?!AdZ3>Bt{{~?Q3NT?=C5cx)&u9wic>$ z<7*u0v&pp=SimoPq11Cliq=1jq#^0&aE2v0Yx}fVpJ?nkI??<17E)balWbLVsipD= zi9nbo&a|774m{nCc(XP$`Z9HXGcSK2AJIxj=HreQQ2b$xCxp|M;PgGZVQNC%-Db6~ zr-0)#nE7SpK6gkk>a5MDK00Tw_FM74)9Yg6($lQrU3nF*(f*PtU?AAV$z2m0%Wvy^ zo#uJPdbUMlgHN#m)DppTtuw(?0^GqX{$^vXG+-5Fd?no2TQI{?Jk{{vpo{y{ardif zu~bRsGr&+ZTPU)#ODVbrQ*(e|9viqXpl07bS&ZPB;2EW_s8%e3?#TnHBK#+v2+$Iq ztKet;B)gNx=y5OK6)A(1&Mtf_*J-uaIB_^2J?3G2zZ^q(5&!-a-}kbVez9WyRKaY3 z(nhXD0Q3k^7t68Fe54A;$#hThDj&K}N{UW8tyJ8`t#~FbP!i zw>}FVBZA6L=>QW-iT)zs8LZ5QBk$SqkMEZDw-mjRuB90aVQf`=t$W?3)Sbh`W0<< zMXYR`e~Co^006NnkdFUvrO-jQ!Ks??6kOBbtCwQakM&U;dov*7GzI@O(K^V!L!npx z7OP}Wnt3J=!mG1l*c)WE7RiX8RY?sY=>#;nzDFANgeukCnE#&VFkWt+sm}*TFtm78 z!S=m`U&%p63lP0ZfaHSZ$Z0bw)WxFU6@^XnE<0k9Ek^MZq4X;n(X6dAZ z{_??|UaGD)*$qtclt=-N_2ctHnrT zyT`g3&ds*4vFuTvqX*94;)Wo)CfBH3+GP7GZkT7LL;}|l(#!*GHrGKp-7M&<+ozuD z(mw#_c~}as@Z|}0AUO9tnW(CJK&_2&OXQHTt;;Ios17UV3+XFem zP0~FGOxK~;4pahQ`VIQXmmU)LSGLoC{9GHhAmSpcTaqp4dE|MAls@ltMFS}|AJb?* zGVjhag|RiBAiNULm~lz**GeDf^$?9?&1DkYh5D)Scg9kzQu?9#c|4Fb z3R%pjKwzD|Ni~?;3U5v=Cxg2yLMVgQ6;M;@GsUVogayVoHJvj-R6+z9*s3FN{N#jp zkSy{@evya}3SmE?3%KAFidvE7HWNjC`8NE3z}LMxuo8zZ!FR}7__79QhL$bQ!?TmT zVda%~f52BRMetW*v;%^0v2k_CdCHKhm!S0VK})i`)0~QI!yv=bw&s^hGp@-NmCe)=D>k}g&%w_oG1oS}Ga8f)0r@vV{KuQyicJ+=|{Scnv1Z#AZ$m=bHHT&PX>z@ouAo z_z4?u%%6L~RCaBEtEe*-qEtD^zvucfsXC_mf0Jx|)6-G&5^$jfbV8JQP0|qnC%^Ug-$jj`+?gu-fzlOf$ zPA$ZCcC}BW*R*CcI`3j{uH)p<^UCtI0R|Rkf94v1Ef?EIH|~3Snn|V1|CobOjAWg6 zxt~sG5D-05gfws|=^qa#GxR}wHL4_cTz`;Yh;O!C2mXD&whNKCS3|V1`x9^|D5vph zsT!`;hcK?q#*lh*Zi*3%V#8&pq%0)rkYFLzLq4PMZBOO3MH#VgE9Hx!DHc!DOByEE zTR*6=4-DiHd5CK>+ zi0bmdt<|;&xv8YI1}-oNo#_>wgrruB{a0OJmfbJ;2Vl4Q%ceoud}Zn?izj2eT1maArT0;dY308aC=3_eTf7MS!JINq+bg^a z4_!~V2y2MgF$fP{Nf4CnN3T?_v9Ae6IceVB`U4B^4Bf}_J!Z2v>u*vNXALuv;X8WI zXGqFQibCtx!IGLEA|x8X92pN1lBY z9q7o$=V5M~)JMEdw9}R_E*Y0-CQ4FU3p}%qw`UDGboBgXi)W62sW1KSm393IB{PI) ztT-5~c1Y(N#K6YYSbQx7O_s)i@$W8_$R6`5NbGQ^Cnew2@*l+^X^;{xgd5`ngwIN| zMr#brQCB2DtIr*4B0?ZNc>&WZHBZ1Hr@?}lXJJ-K;rC@hUsJgU$mXq?Fd%`95|1uP zIi#C!FEtbzs^CD|ik2CJoB+ztA{wc~fwHjaAZ+qlnSpxj82jZ3bO-`Z3^t zCj#nn6u!dDVOUf4_6B6zH!4~$2;8~daukK##izDPBXmP&b;g-hNwQ-2VZPYFu2XVF zh?lNEcOHs3tv20Mz?Q3OPbFeNyA`(|@&&&*(9U#D#%Z@8x?-CY?WfkMK_`Wv?4XBm znzRjqHP4&2LEe`QbY5K7%*X@x5%|nIir7@h8H1%^F~`NU9XjHVn@y&_Z;IrHCCN`rLwC}3B!0oBU9qtm!pIxC;$X{c7-p5o7$Jy_ zYM=K@DlZ-_atzi002}k}RmOg+Ha8_fJF<u`()H{LmPe>oYRE8Qtgnq zt11-(&*J8NS4Q{d%l?3*WjZ9gE)Lc)_K84D%q`J&j3Jpt(9MUD;YMUZx0Qa**=rKa z>lQaJ-^K^)X}BVvl|Fs_g(P-=EXJ@$ILt#Pg>n$}Gt=`lt(@-e{Rlm(md#;*d|3cK z8&F52EH;_Jp{(rRn2yLW)FDzuoP34Li&1jFT9~x&E9`oFU@7eMg^}aWIl}0q?xLV}CqY!jq)z z+q2jOvHcgh(8}EcE+zMF9OG)ti-R{kCN|pk;wAgdFN;6@6dsoCsP*(2dL9DgbCxa# zaRd$&Y90#fEPU&3Y!X7)SV3v5MJoFO@s_B-;{6twdWEtTl5qa=2bcbBVIo3((v`4X zL+N5Wl;k(G7k9 zh2aq+qC%D_OBYc)JF1p_qa&O`kUMCELsIJUS@!8owG=c0C8EK)6|!?L{IjCRK&AJk zJ}VT%fHAG-3FF0n0VyDPUCc)M1KuJ+O2CN!YWc0YSTMQXo_#YWuq$F~c& za2sx^{IUt&5L)v7K~zHel%o-B+Prd5!5{o=d>|P&FJw#vLa`0LMvY3tGXsqXuXhJY zoYE;c*tcs_8ENciqhgk!RXdnF@K`6;KS4tW1ED9TeA-%{u2wGZm_emBYby9<1O}Mk z5H`%*+S3dl(@CavI-_)0KGF8`{B5U<_ z5-LkQ6a@*O2<0`f?0E(U3J#?}2knH!f9E$1i}{^&1ydy;N`>VbtI6aks@sP`5;rP` z_jjlP%ZZwSpR!rP^-0G((KX5D{OUC#8zXD!S`kNT8u_R*mt342-uq+-C*z5l_EteV zl!Y5ufSPg{Zw^-0Jpr>sNvZ=)qhA}RRc!PR;y>}5VuM3UX~_=$pgD0r9sc(Bx#hs8 zV)0<^uLMnByvWn%uDT$%+TCslvuMN7Uz`;0;w@Pai#Jd#muk4$RD1kVQ(Pb?B2j4H`Q zdI>uGbSJ~^&O{PMw;DXf%qp}E-bbD^p{VMK}8nrk}; z@pe}Hy!_8+<7BjyZzi|miy?4Y8rOb%Q3M=JItzUHXIdFNYbwRiyx<&*HLl<&5gy^H6Yd+OMjenU{& zY)c0A8dVQ3n!vKOXLU5^q{iqQ`Mg=W+BJ;REDF@B7iI5p(I4Byl3r4}a0l`^NEYP- zA#|DYPo^(VewM~4S|UF7A?O%QdX1U{X1Ff1>Aj&J`7c0 z)O14Gl6bz1>rH!;5n>F2KTjB%%c_%_4{%Hr>GOhd3VfK&tAOIy=*y3yc^xLlyjonQ z9jIB^ocp#=+$;IHFbGJkR?Uh;GbjC%|*zI}A0Wv%Ia0lkjPt0*h6C7fVA~j2? zIP6_J3>!Rz@xEk$C&SSx1|YFX&b@DTWlyzV9O#emjWP7Hgy!3$<{GY?j7&dZ#5nEW z&uM%dF`Y3(a@@YLJk7F91$}g{fP1%vtnVs z|5UWj=i-et?Yzz(en1^4~2#8TnQs0>7n!z{tq(sN%fbD}0Rp|P}&6|o}v zJ4}zc!UBdWaZRd}Y}hxKIa1w)qd{(;M8^mX+TRf$ z=@cPZ?auw1@zt}HSZcpI3r^2@*|p!b5RP$v)|ElA5nn%+3rN8CJG=|rA|xiR@DxoL z3{YLIe|)#zy)ACQk`_4>?lIEyb893?068lPM0x1`3Apu{daXB}46mPSIKh|;BoA1%9XRVr4axfyy6Z`|)0VBa zL!Oxu5B*q;dgSqYhB-+wUjXO0t`z~~WqO0B?tUrv0awRg){oMx=n~z(oy>IHGBxT5 zV(?q9x>AR;^>gJt&y$X3tkd59O`HZHHOM0Otj&Xa_K`i4>>x6U#BY@N$HZyN{=PU> zy<3Ugf4Wh#VxfS63iePj3X=8vn;sPx{Fq!mZ{VRFYC%RQ#cYLYdl)gR3ri5Js5t2_ zP@6q`zj#QFy`T83)u&AT&Uy0`C{?Ij3Q|Bw5>HWDg}4>zW~*~@N)|vwICvWzZf1uU zqNK;hd&oofz0Z3uLW^;yiJ$RJXgWs#@aeJo__dLQBn0ho(AO@Aj%W7tFQX8=I)1>C zi2GUl(T2(ZiP>U&4C{fH{?OcV0%eA<6VpFPsF&Vy8&5abrw*YzMIv&(Hpe`oMD(^v z5q9c*^5zaMZiB!|4YpsIZK;oL07uYgpJxYmApFSg zB{~m0f65+d!O@M*y|3|k?+$w<3kM29>+MY>X-{B1_=WEcUbB5p@>Zm`Hr1lh?+oMD zoG}SA~EDum72yfwso*IC69f!f?t$uLkU)x1ZAhxIrl=8N`nRZeC<}y?WM%LNBel z661xH{PvH$6g z3HXhuYBK)Y$@YWl?&(gvlwzFa8B+)9m{-M|D(3fb*?V3wt{M#AUQ(E5*yFSBu~dU$ z`NQg`#~@mf)4Gov;>-qg)c3lQ!WOx#@QTh3U7ol`mz@GfhbdePZXv&^10+v%YfKR|(I4cHe z@)*qJqojV5&*gb8oNl?J7X*Uz5Jq+$NbLN`w&=Lh1LQgxXM@m?>KJY`9BQ5Ej?;~( zGQV?@A?-OC#LBtMJhRs<_*IC4&Pr}$mT`n|)&+?%a=Tj)?a*oJt5 ztq`7A0S4g;#OWxcG_$YPD{MBH0AHyNx*E8Sp`ULCArqOL={bNK?sY$9riw?8ci64A zRQ;2Ex@4?em2;6`WnJqEU`OeKx!zW5jCh=C=D)}vO@w&k85B1CPu>6p8Sxngs|_hgX7~md!F}*uGDVld69nmgi}KpO7eL9n-$uYSoRIADO_W)xIrLf zfOv>3awo)&=`BAdjWk{N&uC3we)yzm8$BC(cwo zckx`_EGtS4OAZ!I%bq~B@=d->4N(YB<&M7$px^J8im?pVEbsZt^OH*T=iVv_Jc+kN zJ?vW33zf-ho0Ut8+9pTvI!E}Z``sE!s4$m78zhyLW$lTe36-Yl*aH^bV6Oy!ip{=J zC?4VIHp<6+@6}kSDu;PrEjc@M-n7twV$UaTaz%z%{>F&vIpQi*%F!H+m#6`?&EjdF z9*q%;e(AG_p)L^8>yzd>vIdDWo=ueou`! z|D*iIRsUEJCZdVoH(sF_%G9PXrNhPQ_1m5|4YW?G)+lmPVD~HMw;J;|XQ<+vK2n<2 zm)~z3%971;qKh=aEp46Mha&thPI#%Ce=qM-Zi_&{yFJkzXY|EDIE5$A)?rD@DdK1b zGI6SwjiBUi2VDJIRViof7}=FBOem@$P@L+GT&|+p*t3O%KqpJbof772tYTLBOB-9p z!9BDMc0*S;J#OU9x6l zE!dimvGl}&SDT$v7AL+;GVM>VVC^8)qCgh70xina!7917yu}+1JiHVxvbghPBkeeF z>h~%L+Dc{}U2M61DV`b-<&)=jQI=iuWA_mRp-Vh67ReZio_<%mE)eyybCCi-gNj*Vp16+-tw4*lQ;trN=H@nQz**V3F-a_pT!YK>Yig>$l`T6Vy4 zoBLKg46QyWtU(50Llf9B&N@HUvqJfW*sm4m|I3fP5@O{M3r^9+(=3ByBw-VQQ?~L5(*rAERtwr}^(VmgT+#U2AKe_Gxt~ZqMr~F|PT} zy_VkOD)c;kc27@;?F;Q-H*48*=|pH~uFUI2;G9#(uFbaA)V1wR-ekE_2i@E_TA}Ej zrLXTF`ciu>NKzkdPuC7DrGs$5(FCP8*Y9Z!nlij$IOQ#3p1!~S$%Bf{oZ3u^gslXW zvUa&JGTlFe=Q#Y?Bv^tPv_W_xGX_Hr2=!9^#wa&SU^)7J7NM+4W`3wznR3YJAe2I)@Rk| z_dKz5%sW(P7&D9xSc^MF_@nIAvUc(4U$Cn{ji@!gZ36#X+av$iwGA(jKK?(~HWc|c z08}<%5mUwSnZrH$8Ds_*o?6W#z$K``{r=HapYck@q$_*<3O$})CDw*eF0Zk-=C;E# zim{^`Z7N6OxeAvwHRhp;S9j2L+L#P(O}t_OhEkr?iNYC;pom@j6{>pG7+SLln~HR( z>NLmjP^u;CvUB}hA}khutYFZ^Cr~}TBkqgl9;K2A3lIY5#!Z?P6PZ{VI&9epMI2!vOn-EHkotC^rqP6Rh20ebl|Dqnh+2?B1$Oiig%V{8q1;!< z!#1q;@1#3Ob1Gbdg@?HXJ`uPm>PKouLC0yBZNcSp#$&6}85-M6Z29lD*1UMKlGk)T zVfvkfD`?9b-(jHP#Nah3hUWxdD6@rhAp|`o4TnRhE$rjD@p- z<+U~v0^GOLjD^<_MfWw$CL7J0b{lE$qXLNoC{62$d9M^!o1_ixJU%qPYqEMoF4O*B zsq-$_vT7K_Mquv=0z6fJIKslg`J{^?5hz1)X=`^5+#&VR)#^j)Df48|trOvYww>VNWPpsys9B35$c@L zs^o>&^HHZF*;K>EeM7!hig1pu^Z@_z32#o`b@f#YZMGDs^XWi5@9uE6aP)y zD;I%;A<2c=aQfw z#gw2$KX30^Z+J1HMP?!h|F4AMu~QM-*XGJm(uQZ6c<77W4gUoOJY$Lb;ZV$qEE5Ar>Of1Z{Gi6k3TQUe9E% zz0zgwfc?%%yC!zJauNZt?XC1*5j^h^-8f($s3AGJsT9)1n)|4(MLK|_!%^*i#}bMo zPMRV7ig9Q=HNc z&1lqsN}o9q0%u4Uk`})UYNX7$?+1z((5u88WY=EnC3E~O27f;>CQb6+J<+O`W{z>? z1**?$l;k%%m=7!Nbp}qy@=t{eg3tW8@|4-H2z4~^grT%FQA%HcKK90pl`WuK8`bSP z9rFINOD0Dyskv1RoM%`*E)|)%?XmMm9q6~@?(Hsvgh27P%Dd=5`$9Cii`K&;xI@jLa__e#0BVJ-m)4j!ND1P=tHS3O+4>9Z zHkSJzVab91Z(#wn63Br2S6G16(9sobTYNq}VNCs)aO7Qc-!yAG&u|@w7YHlYX}(#b zJQuAr88-_cM-HLobI(=a?rmgy^mijj74l4@(E8Kcb~eYR1Nv&mTx(=%Ky=DLDmgvP zAL*K$7514^#A&7smQo0O@>*Da+hD+wRPBaG^!FBO4-LPaX11-b$(N4uJh;MgD=KQp zfwxEw_St6{3y(rh-5g_hbkn z!G0*~)(tmlwVTPq_FKKqBjG%E z@@-U-f$Ls~&v&dSu02pUbLqBC<)Y zm@r#NBX@}YCjzwp9RXlPfefbqw`%|GW1REnYUEmRh(R);s%}$Nl3XrKE1jnOsQU_N zlYlhma2y#~3yJ|>(; zr{stz)l3IkF zlO{Y*EY+I{%0!RUN=9O#XqMOklM0(O6N;aQAb#cakP}=_YGf6xW)XL;E9B;HwOZd+ zUzs_KxV4Ey^PyT^BT1{GZ?0aVNTf0*B{8!1H-|gICr+G

C!o(r!K#!7BH#X0nC-u3zI z$3ddeqrtCsX=W0B*~{VlO#)x%YY2+4pCLPL4rCt)gi2J!?gYX3Kz-C<>P zs+wv6B1xx-%&9tQpD#EAe@cQiGWKpyst5S3l5p0=mt&3tO$_8oATH+fXCnu?b)2V3 z-z-9$*lB(0EZNxNoOn=wv?c~>r<)SH{Nn3zB+$%Bb`^Ul3)<{Z+^mh*=dyMy+F5gK>aRC^(@C)^v@Z?QwyG0P>J7`?V z#v^}WK^}(&t+%qdR`OKxSV2ypr(EgaDQ2K>l|?i9nWnCkm0YtcUQ|4UI`Ick; z{|)Bdf3Vx|zroz}e}Oq(AfxqPrz>cyL?F0!@G`${(bz-rLRDmn6`Ll#eKl377M<5p zrWimKM%Gx9!K^uI#l~1ZC?~z+k=}(xs+QZqcSD-L{5sqx7$OM&%v$nP zfFWN*nT_1*Zf&&TE&B(3w`s4#Mmy4&kZLYRad7;?a4vJ_(n5u`_(dR_Db4J2H7Vt# zB&M*$(f}#9ta1zyDWcecHdZ9qM64u$*w69tB1wV@L;4%>0rxEp81rA()DE`+^Fs|{->O~d#KhgR?EETel1I? znymzg==h^8RuynwPEX|9tJru?IXx5-j*8uG^kEnPn$xU6pDFjJ4@tjy*ScgaBpbW6 zju9Ixm&`aq+1&auPD|6&i!Z?V$*;i2w=Q&~ny+eeD_y7Fg!B22 z%L|jvQsHgYu&4EkMP{X@IvCRS_9(pf_J)+@R*>K5Sx{R?=X&$Vn_-A?1owKF%9HU9&(%uMw z3`L$f$cxBYD?1FH1+-{D8y7<&)T2HoN`8*S_f*6fmKZORG4QKBB~EqCt)JabN@`B{ z8vSdMB9!M2_#yKn*p(R|S;?bVoNI{qi`Q9sw&K?f;qRhZc&J{zWyo8vOXy3fyt6s@ zalz3nIq8=allZUCav!z z(9oKfErs|fb(6y#8m<5wCnHS$ONb;?$DYUlr6r&^G6XGJdx-B~Eg#h559NQ<$1gMJ zPO?)!U_~-JQ)MU3))uDUO$Z1m)SgFprp3W$83n3R3__t_9js~2{nTF7_*(moG*(&< zLf+2(eq;p=equ`sqT}r@|lB z#hjJk_jfC3d`o$H;^X6`3KPlxEzKS(idXa^^W=B8pzS5wo1T6kF-iZamUf8yH(WM` z)X-K99v3qoHIcfIbQI=49fzFPf1^V1V;tiX$oTj_HXeyJ`|1%j-&*LbKb{UoHTmOu z!0C8{zkL@Z+Zwxx4My1LJ+}$wIGl(N9jvEd>)~LpN)G9(vSF16^g&#dxnGBaQ#?u{ zHW_sFe&KV7HO{XO<`EV5U_fRDc$^lOi15GP0|8l$W8oQ;^*ki+*FQ`4f&m?g)&o;q znV28W$v?Tj>^e(~9q!jA`+HBy73xw(tXfmLh@YJoCS5*4bxoNa;H%`Yd}zLg-Ko-# zSu;6Nwe}tre|8^YrB}Ikk2JIeQXG(m*k9v@rp2!S!7u2?)vV0uhCeF?e{gU?PrYq+ zcElMA41e0TT;>m$p{W|A+zIGX6W!UbYWug)JfcTru-&YWPbzV;P6g2om3b=E(*n2# zDEHcnM#7V;L{Ctju<0+CdA9IZ>EzwO4Rq{xWC_16X2xPRml^4Pz;YOCdYy#HR6P#) zemzD7cVoNJWT+eo32##rC(eNd>gn%ox5pVkcY65R=`=JtcubfPpYrp5Ld{mgGnhP^ zt10Hocq1cblI$bQ;bvv9>Na@?Q`H4toFgu`RU8EtZz8+=o*#sHfCPnURRr-5>2`rH z1WzP+q(&Zr`eP%p^x%^PoMU;91WOff&^4Ez{7$o#u|>S3cB_9Y0X*z)J-hV4lx3+B z`@m?A2q!QibR`O5R$Kz-7WJ%?qzmh4cv8}@BCRGA%RdNg5*E%0gitnw0i67JREDV3 zMa-|St`VzoLf$gzV@^nd&hI=3Lj16(l;-ZQ4+d;q9G`KM1+Dbs zQ7};l8)u&mv|n3=dO|wnea(s4TS`BBwlL;`t(;Ia$fhU1t%jH6z48m}uM!g}ormlp zmDqS4cp3{02_!Bv>Td{p#AuK4o6-@~d^(y=Qop;4M4QBB{I0O3yN4QtWU3Yo2AQnC z?G%(%<14SjIw+j)^o3a+ni2>)?^x_{G}}19s3&Vof!57ms7H|u=W=6<@~GyyL*Y|k zFT6{YZw%r~=QKn(Sx`tm5Y8}^6HC>I%!V>ZFbOGK`t=s_g#-83m6~m4NQZ-g)mJApR2Ll8F9;cw=asa$6_>0ZWNfyY*CtoE z0^62ZjFuk}!$}lri$W}|PsrHWBL>L}mz`UQ+D-wmbD*A1CU0F4)fYV;7=k?S8Cy6x zCSA8G$w$vMecKf|80tg&Z+T+hgWMYqW`0v{?lB2W?_;Qd>WJd`n;Kop-%sf2A`@g0 z5Y-L5w=$O13i-6&l+8XxKPHpU>v>C0D&3Wh6fE2ri31*qSF}m8GZiX(XjNpS&m}3O zzLiMsRZpu?*=*nx0VdZp#Va#pF` zws#uVdWKxlw0vukQH4cU8j*4%xkS@dCE;?;g$%^4cQ2hmY`u!Hn7AWdhNy+vHkCgR z73r|en$|F8(L-Rb4CuUjdarxbJ!mK7kd~DPTL#Pkl9jeHmp53HS0@G3v38mWiCQ=Z zxE?g=3p(~eY?sykL7A4@yQ`F2pQ@{uFAp=|&&;?Z7J7pHRX<1Anl$XfqO~+MmWf7q z5olO*kDBk-H#A;)V&Xv&&+t@Y|7Co?fDzvR-F3M8-v+7upDW|9LH<_>f$smdP#_e6 z41xc7{{TQPai&OCN{+IDVjsZ&3)=-dO^r^|^oRrpz!WuxM(ruhT0a)^(^?-}v1LAl zNn^2mCF#AB-A6Brad~p=KiTnz0>3{KK6b+j3Ccc7hQtLo{zABRFFTQ{>J@vcjzh1{ z29Pn$!7Z-oWCy9tMp=7w(V^68&$S3tEtz+q4yRBM3tTwsR1>WGXo{L{?+OSd0Eds_ z|MPC4ak!R|ojQ|Ffiq!TNnEr=B5lmD+(H#k2zcX9Xsf`zE0K4uxu-a5>^vQ)=vYDN z2Qr~x+#z`iQhWESAIGcbFafbaLw5n$PwhQ}%rNCo@e&-H!Qq75H^OAQcAbh?`^}iF z3-N580C%yR9HcS$FU8;xEQ&3xxg7`yXA3q00%A3<{S0=Ved|f&z%Jx^p>_d)m3jld zGD;ZK!I{~iym5ck%5j57eYl&K>BLBy$2Wl_e5nWBZAJ3=gzMz7Cns3V%}GVPW9d~< z-Gy)rf_`b4H|R)}u_B5e?N(Bio@}+t1c_G(!m(Og^mpc!8km* zB*{d&B)z3Ko@rcN1PF~f7ctZZ`Z5YsN!)D-JErpr%bK7ci7_YDBRr8V8GSyn zuMzC8?^UG0taq8;PSws5F~go#z|i-IEa_=qntN=<|DbUw4%rTtVWah9-Y0ps3abEm z9zph)&!SdL-<8g7S|4rX3*{Dl#O$Lxnx#5H^GJMm&d(&-qob$UVU6j0D@i*%jA>ym z{5u9E@7qQ6p;}6Y4(rM?i3T~jl{E2>3Q8I9$7h4;HQD&2P|A+DA_v>E|A?QTP^l>A zGzL_3^=fhh&@eK}RPz5lLO_^z<^&(l5B1!F!tjU9o7Y~4KYeGDu(t+NrsYH|1BLGn zdns|kL8s3@e-SXrnpEV7{pdX4DM>+HDSq@bs1^<^=MH=_CP;_2N^1||j;RJ{=qBmO z%xa2BEPHruMTmwG>Iqj;-c>Ye(EM>VH6C>9wH;zx=t>U_ z=Bc`4vJgteZ@GYD1OyTg9H?{{usuB?WYQ}dBS`=P9zXm?Guml$MPetxnDccf`Tm;(Samk7(ge!%M80atrnO~VAa2EeI8UkXPcL_33meI96h$q~|^56BhWu6=&*wG$pO$AU$Ev4`0V^+~Q?_TK|Q; zVO`yRqJ6&m3Os4dFn35yzx|^5^CA!biGK;Dhk~lU&;Q(OCZK3uS3jlZ|WA8uM7mx zHn{1alUHfsul}+KLkQC~ufOtxlk;6gkxx9-4U;081Eb^`lUuw26r@v*ZMsZ5tzfzFu{p7E(f;ydGr+|&^EYR^Y zg_jz0tzlC=0a&eE(wSdn59prYHTzzK8NV@qn*IW@j=X?N5@k&Hw-^Qu?TKt(XnJAj zF`{0c#C6^JHstvyep>eOap3Wt3XT!qz2JQ0l6BB2=^rNyC*{V9CL0#C+jJ%%E{5uzKhV-k`lslrTO5Zh`!TP6!&1-V3Vx=9>`*SG3(qW z22am6>M-RcQL{iQFEm60x%74sWF5lM6z*$w8LzL&2D>JmSw9;@3!P5g_jy@9Ext&c zcJ)`v7z)TGLI$kloV%3(;Uq7G_o|7@Djp_-9yy4~xVk^uSN`NUJqn(Vj-f?JmW~)s zo*C){jgj~07914svye3bh?_wNGnvd^J5cj+oHObTUvY_9fQF)(Al%?!!wdA&)C~b_ zyr_~;YL{ffec!%*brZZkMrDx(mGM_akOgv~|Gy@999u1@K)+11z z{+#pASP<5zwUfPN(v%yh@%2aX{&3hfDLpAYQ+!U3PaP&+q(h6_@0Jyh%ON?mlV_Gu zytuxrr2cFb!-E=~YxEL(uj%OwEMN=b<=fRe3x5QQ25Vd&aHEK@|iS z^-djT;;NGCn$V`b2?Iq%hymI6!hhtS0l!{k$otQFqN{4}xo%a%&0DeD8)#1d)KM?B zuG(&F$lJX>Pvf$%WY~Eu3uZ&4j*Si2UtF138g>o zcY9LMf+C(mQ3Ve&B*>dlBYagY_{!tk|KDPg`Hxh7#RTXskg53J$uQI(06`&2I^uBV zi@4_O^}nKr`V7A9V6YMPr_E)n+~5s?tmGW@nhf#iNO+^WkVaN0g`9N) zCCG#$e4yjn@tr;!BCx`6o#C=0EKd|^Pk$I+-imv0!k?8p&V*f)cZgr$p*X^ZC#GSgNe4oJcIj^V2RR|Lw#nR-QKtIR%95-+waKDnxXg+#+($0eO(aD} zUg-r3!-kW~pIxeSN{6hkW}`-w-ZzTT(BqMCmq3Xqq_W|IZbG!_J+Ey})l7$*W1o|k zpePnTF_IkA7UU?}%1@6jX#-Bs+sTNUx4xKJ8&_Fuf9W$Wa9F^y89Q*>pFUa5=kuPi z!^V2WqgzD+DKdc`t!dK44lLJ#xqyT>OVCp${xc^{p^{CdgEZa(kOu*$hUL9~TzfjQM}1@rx|7 z|NkS4;{IzSd1Ts_qJC=j`0szn>57{0@i$MCyoQRH5iEg+`6QeD({yhyL znK~#w4r(b6_p`}SEjbsEA4)|^ja&a*EFdXS^_ur~Z}P`}3zGAq*_1!>BJrK8W#RF# z(72Ek+Tqj#9vdA7T?+6))ODA^wJTsXlekXX)?*{KH4sRSFsEWX>dak4;M)YJ83V7p zzeB~SP8%P!PR=y9`qP!AZU2jyDlqtg4hdXN@u^%O1D{``oOX?a2PyU0LDziF1`Fml zeH#kIk_U<3`gAQK{`5O$b&aRm1)P#bM1fJ zmpdN-0G?H%140Qmy1%i=IyKYh2p@d;pKfYG>hgx|jtiV`pnsxif{84A zv~U$W_a`c7+nd&pg|=VwJGUH7to##1LBPrvuo&J>#Q#`=d_aZR^>eSUap z|KfoshmD{fPVVdd8QeI9)nG0cCFEr;6^O+&c*0^IHj9<`JqR!%#8y1!yba};Hh3A; zkh9m^H~5=_584m@GYH7Jwu@|@O(z_Xu9N?7(UXua z0KQ$ga4?R6<$RFY+Ch(=%D3P#!}0n%%3*s!wOASC&Bykg{g3Y75*LD6f`-6SeLn+W ze+c+0f94jeU=DXdqIfIBq^H( z^d&8kkR^Tpq+X=ng#{*R1S<@a=71_{f;Q{)I;id4MiMyT@)G4ia#l2^efX+Vw7QfF ze7OskZp-}}@pu&zmC@reO2vDGVZBE(0 zNalZHw8z8CdIC)Zzic|;xWLTAkMtb>*f&7Q1tee=$_ye7B07gCu#L{w=r{YH4r6ns z6X-g3h@_`^{HPTPpu_>={qg*KskY>)$l>fUAeu3~$?hnD+F@MBDVSnYjoF4hmtj}v zAC`4w2k*CLF%|u(!1`jC!buNh2$+H!V%ubUG!n75+jh4axGD$!Kjz8>aLcyF!*bU}7Kg!1FjC(Ck9*usPFpc>|E zf3Qa}UQ3E>5_;VCTj8G>>!V@*2o+!&@O^(rEHW_0W>ZR5UIdAMszU*{02D7dXE@PN zjBz|>{bleT)HB@QSoA8A`ByyEus%n%UD+V@mG%?-7F%m}=XKm6HgDS95QHK@@BgvP zmcLAn>LPKpy3^MjbY2a_JG^I%Sq6M|sUocD>W}fxfCR3OxTC;q#qgv}k&y#6sc`T6 zH7j?usOH3qY$6+A;Zk(eOUF!^L@P7?CQvT2F!<&hmMGU)4-cnwas-k&Fz;_hRKNe$ z9P69VWbRHz?LGEX4eyv>W~;ao(~wLo?mb6OKd6Ga=6im%We|tDz3%`*p1xv$#lF@U zI9cjXvismM>9T3wjl#ynVV^uaWt&A=;Wj;UWeCH(M%f&LXB|OMH>2cSRf`GfL@6L6 znUpfr{naSM3*@JXhNZl2kZlpGH<~sA^`}jL0T;WVr3dNdiIUz;fJ48&20eWg0O1!$ zSw|A=XCMff2QkfjN_?qFds%{GjqNdoIZ9j&N-G+SPOLyUS+~tc#0Z8>tg&?sE@n#JQc)>C{C@}(!(`VKggLiv4>h{fQ0AG-1$$l2qrH}-vA-m zAkW&lZ~alnpX#vnR8tbu46YKlBZ4 zOcJu~b9PpyEy4^3sVnz}ShI0>zr&Z)BWW9Os z`m7qs4DK=-nP`zL*jeyQK0GM?vD$W?{xS(xN&N}#f?wXeeP?CMasD{{>udGRbDso!Ri zsM{N^OO`YI5xRNlCf36fSyW_a54lvUkBqedsK1sFSGLI6t}!+!zJFY_LRB1Atm&Z| zJ(D#Q{~hF1K%lgX_c~|=i#-D8PLkt>R&LMjl@d@>o}}8OoRDFiOda;!dl--(z_OSx zQrR5ipB%FakZCvk#=I;)NP@=(!UaDqvNt#ytqNnnNKtIAakNp$na>%tKMtet+#3Hz zJ!0KF-k0ML9#d;kH?x9hFSSZq+_*ASaWB;Tvf^wDNq{qg{(%LdrK0hjFHv!&Jxy*r zllB@(3!nZ?u%hFFA#_)F~YcC4ay#x$8Gi4wBaX81H(^}C^- zT*<G@$N<*TY8F!n>}P9>wYTF8*6MeaBpdkns_rSt>QP(O#4&aPf+mL z#R8BHd?TZQL&|_Y!u4gG-zl2cSUa1>IGx^7rAx{7NCN{IHE8a&VzsiqBT8At$AtVT z)_#0eX{l~i|3cN+Bvv4i1oeuU>@|gcs2K#g^ryACyYz=M^9F8fyA42UXW{39(-7|rE z$nV{OXm7*abo6TxDmuN{v+)0@p&Cc6@?d!t&ucKW?JN*OPERXa2m)1Z=NiZzQ#m-n zHE&c2oN!YFSTMUE^WP2-!QQzg(N5*Imc~i>`!gIvNUxjBKkP|oRT=uvhv>nmG_4f1yGGrAa#nG;yI*Hy#iYVY-%SwO@9)nre+mbd@os}TQ5F5gms%{eVq;gagJKgo|4_u}tKwpXA0&k?luMITjadd}BbSSP&hjQ2y)XHkaIQ8yFEmPcjaDzb!-}>cN}^ zrbdDqjA5Axf`xVvsbRopGRyu-aV{-$8dla$GZc>W`nc-LJ-G1J<<-AWBBL1&d@!E) zcamCd3>uZYO$bP~?`P_tyX-J1U1lPnP3-tS=#OaH$w!^+t_;J>8~kKouDh!R3Qr-N zx8Bq<`S$F6<;*W|i>r1RD3?$@L2?IyYosC%F?V2k&4$XDQ21SlRKG_SBafZ9md!UG z(RIr6;iM@AVvMad@Kz01_na8rV=JM9Jafg`Ep^f4S^Rm)e#7c=ivl3SqK_d%%9*J5 z)$BDA@3kI|NR4>&wi#zEaeS3SvD;D9W<>v|QFZc|hc=n`=mF05k7jfZziLkrZSy+n z(vQ&D?rdY=q5V@PGXlv6MDYGI{}I?MuxMSi(4yx={l?O-@#ooDgxv}4 z2(;{`jGr=W0A5kp%ZaUM)QmtPU68v4f1q0zv84L17M)Z}SR%F7DJ?r8`)$L#!?wIq z&Zzhr+*CiEld7@l5JpimL`yAA9%eCu5IjoF0$Pm)b(uOH2FtVL{{Xpa09cbR(OCT3 z#9-=@4nKxa8M(xKV)eC&^EU)&Sb%J7F1r$!etaUd=EwPWJlPCGH3K)`;c=1;gm9) z6;yH+c0B3oH?Z%h7^k6k(*re?Qrnmk{BSjRCe$gwX!b!W0I_ z2!8vfUN3s=R)MXuv{ELy<0|o>3DBkQ>t60-Y^hEaVJV=c>(m5KfL}l$m7EIrlfq#* z@*d_$jBh%j6sbYEMYY4+0dFcBh*_4Ou*b51ny-cVml$#rJOH6;-cUhGx!`WIo$ZuB z!jjfKJ*)MdE-D%{zMM92WbI+iAEVL?P-VaJGQNFsxNw4giIO-WfcmrtMHa-ps@)ne z(=O4SqewpScm?*e0$9bSsO58k(qw2V!vOM0ku<=91)RoQ4CoOV?bflh}cy9;d zZc13<(n^aS(c6ILc5kfG=VFWgWk2VL2#n$ftwfzhfq9oa{qecgkx0fp#N%M^k={c$ zsl&_m5z~}tO5PnT(d{ud<7^TD_k8SaSrv_en*HPB_GHH0`u3hwvuZG1u9fvwWX7v< z6$oLI=5Wpf!9M~r((AG`KLgUugAw7v3E^jjgeiPYfjaDy9y-1^5ydj79U9@g`ha&$ zX=am&lld^;J;AVJT&^^NzU&a0QK=P=Sx#R3owlGp8{2Kuy)URxTa0{+Bwd~* zQ!n*QZ4FZ02!w*WDl&J8FDcoJg93U2K)naOpiQhEQBxyMJ+2d=vjkwla^j7g_Ny|i z8uL(F)3tNHwg4}qtBktJDDVY@)8@o+96K!CZiWTq_h9{<5?9^ygP?!@U3r$Jva-dEj~O&;0^;Sn?3je;L(l1qaFtbmkm?0i zk;{Ai!rYgWXq$z1Z=T}WSh%jj zNs$KP*r{bCqn~Wvmy^O_VWg-EJ77gfM=+XS(<#If)F~#9{Ql4#wKy^{CGajIcY#%V zBwJAMePZz)HW44cj|w^AsVhUr5y1&43|^wPPM_IW`QXiVlI#VUf$TQGbV8!)r_>~T<#pI3l_(a)&R-tH=ikLC+!jNU zKVySju8cmc z8O6a8+K8ETTgP}llV4H0za%^lZiTKoOU;0-kREVk`)_eV4|l(6jb@f9`7QJ*_ER__ z;<5JO3xkjQlnv}*t~Ww%x+o-8`j$2r=ioRFpwr*9_`W^oaU6)3wqROr)*OY$1B2>w zNJkyT1GaO!6OUXey4nGaINT+@avTZM0e3&0@XJRulqt{}1@@!f;)eSH7j=D)6y1%l zb3a<~G<(9#5AkwSkB>WXi1f2m2o&c);t8IO!Aimi=)$>XdCOe*0O^-s8@!TVO@{Vo zZ2%GN1#8AfxUlaw^o(w_5|f})#7JkmWsmPv}nGIkWwl6OU_uDHGxKIWw-0{P13Cu{q zbQ>hPTgtMLcYI|;T`e%fe)|+=1-&LsXxeVTk3!*8=n3xaqBui3ZL6_$sxE!|2&m@? zaLL!V(B{>Adi zf^wS@{ylFUYR9YHr=lk%MjbX{I|xtNPPS;`m9IXWC{duf@>dU(E|hK$FrGaop_yJT zm!Ux9q%jT4Y} zc3(;G<#H)%=AxVs5ntt!c3dI|D>=(zEO^=7XNcx*gO1RH5xl%a7OYDnh7R{{%_&q6 z`Qsw8rcth4s(Gx~U>ZFVQ}ILqIyoZ@qCLg{1#Yj8hm$)$V{qE13DXVWTaBY7C~og> zBVIgj`@1w&gKVHGpwKU^0^yR@9KgE5{c)c_hrMQx^vR$i#&qwzw~yu;Zh1V>#dAxC zIDaBZo5zCTz8u~tNj`?U#Cig_u$6*%wX&R8 zDuxvf8-Qw_w%D@{A8XeWMOgNUW8ZL>TH_$`Ef%GIU#~thFlAC?+toNR%5}x8=%$XH zvzvv%o+C~tTd}>gC*U-ouIjzj1a7PM zN^Ih_@%KIPQoBQ*xP^VCQE3g7!iAa<++h-e8OnJx?0g>BkvbcH5sAM|^TianAX`?L zd`kN$PIxil#D@Rhk+p`M2W-70y-=IVyd7zC!cb8vchF7z)$)GN2uo9`dl+(zGRWT< zLANgh0o(GHyx9}NL@k+lW2*DaU82Aqao*kr%QHF+8fv*LOR1dUwT(*oyo(vys9ZmB z!aa&~64byhltTmN+7cGV@%^cdf~no)PN*M%2QB$KYr5YZUY0AU?brQh-SrVHF=j%T3PrY~za_Hn4p;0xFoQH22- z!dqLmf-jw>vLtX0IYot=cu(B+9V)ovJpRYb;>F>iPK92`t^mzzpDCL$_8TWQo|waf z%r#9uae}AGL}_>#W6tMKuDE21!-EIY+=_I!LszywrXNc8NZtf(U9()TDB8TE*7ABd zv$;2D?&iiZk6C;`i=7hz@zQiU38ebVVFM%IBO?{!tJ;6n#{#KtxLAiIFb?)WqVq|U zt^+diN#Dasn=!)m&dVlyi&+^(ya_c$g!L%cENJkIfCw0Q-~-R!``y#X26ZbBI>}F( zkFz84m#u?)yDIU!*qH}Xj9X3JOS~M?uQaPJY2f{-oXC(pA`3T=o#&zP*qbp1-J3yS z4ixj|MU{n`Xmc%xszgJ$n1-f@hTLn@XuCSwen7DKwAknqRVTGQX%+t}r}Vionyv{c`hb6>zEdW>Y36wPGq5kZ(I8~GIP z-$}8KExQ_ll}a0M%2rAdSTXox^D>4t#TqWrAGU%|Mmlsde;Gx#RRn_b%P|M#p}Ely=ASVJh7-F18-{W&)thD+A#IQwKZ4`#m&s zU^giKQXLdSxG0wXc+&mTcr`Z8W9Vz}(m>m62w_$DL5@Gz8Tr(c@)z*T`4l-nfRPdS zKzBp9owBk!(}&C&r59TpghqMhBmX+g&S-1qN|B+~l%^-;+9)&*QIiWTUN?qnN)D%x z@q`9l&RTT-SGtqd09o2@b2lb5@zPL6_8?e9SxVNj9T|ag5DZzeM|)J}eoA}9ui_Q6 zM#uVQ`P4pj_)@ZNu3&5MGJx@CN^ZoZ&DNYcCs&iKij9Ko{=?blvg-Mm@YgO!gq z%+(Rr?Od1)0H%DED!K*d}iAHGgES%iwI3L&HI>^P@ z)Y|D@<8U$Rdt76$5j+W3Kd?DqQM#PQeQbDAOKgzC?moX-CHI`5VhN@iUCLDq;UybW z@5=@V%_d-eu;)~7RJQMX#3;if55hS-z3N@yC}LF)?nK9YqSlbtx{ulk-4!?I>l7&~ z(kiUyIvTOGf}zT(y|#(fONd!(C4ZX@69WJ_xxbmOdagJ74Z`msm2erd5ZxLWZp|;u z@n(~1EXo;Fa~5OU`)o6H^D)@uAF&`3yY5ANh}~aEMpyMLAyM1(M&Khu<_KIN? zZOMNcuJ5;F-yV0q#l=N4!(so+icxr{Qomr74Q0-h+EC(`pVfzqF&tqL;33xXc)w?Q zBx-%@dngO)uwsZ_y+Ey)_;)B?QgnR_S`pH6G818BQ+!US7F0X8LWWG4JycSI{-PAMZgHaz#xx0&B;Kl4cD-y#d4?Xjp!d=sl%PMVf>?1 z8FSdfp1sj(>h>v?vJ;ns?ZW3K!=DS-48>|z__w1j=p5SaYo0A!DFr1V)Ta6FrtIJz zelY8xXy8f>Lvk`uhDnUW;blXvsY*0a7VuE9o=_cTvWEgaJ|yVH=hjLq-qz{eeP+V= zG19OfFX%JW-pYLF4IUY=?jJ9JL>Uo@c!sSz$?Ay7rC^FPz&bwH?hL}vNk>0?QCZ2$ z12*x;l&e+K-O{Sg(PM@9o_s6M?_auck9dDPR#FMJ4X)<&|8XPKlRRf)MgTs zeiNrK@(;?={9=5ma-Ke`!!9eOiP=*zfv!QUJ1x); z?)oxJ^<P(4!&5-8oc|!BI;ls1r^B z2Vgi5+$R}dc7!xIYaII$nqm`}ty-ut=UIS1b0r_v|a~gwZKHInU2I^jTEu@My;JBYL>*E2p2Wr09jb7l!4WI0 z9YqDF-2|uZTv~R0B;(xxvS}t}i@to5129e7KtOFk!zrnUn2gEDaV+z)|IW0#5o!rb z>Nw=Z^$9d{MG$KF&G-B0$l$P00L|X$+i^2DH5!; zbdalyBbs5fiWZ@ITY~5A>66Z~JPO#D{!Z43w{|VsaexB*mE~9+$F!wQ!dGuCkN7wY zBh)jVRbzecljziYY^%Khj@bH*EmKoFF5lLd6Xycgy~y(v5Pt2LK-v#!t*UBKbgEVc zvG6hq*I|#&Q>?yF_}Ijs4$ul;AjobtA(EzfID-M|QB(d~U>Tt6Us9+;#9#rI!oT%4 z3tXR$xmC;ID4Fv&jKDLX!%!6#vY)sM;&I8odH%z37_GOp9?E=9)J7W`0(t>l6C&8?)oalx)TrbmKJcga%1EwB0^K zROsyVU65qq?b8_PLNMGSnYN}@R4Kt#y`KRi%QzbLMK7FCp(8MapGVtdSoFu)(q$&G zX-m@CJ~uch3pnJT7SPB)og>D^IKi*@$t-6;*JE&sr`H#TYld*}w5u(ijds4G$5T?~ zX(|DKIKE3%u`QAQt!Jhga-EKMgz;vNvVe^7INyj;bZOBidKU>SsLZ*>R)KSnaL1kN z*l)^nHILV=*TrZw>>H2jjAa)HVUG(NA0!52XD(TMijziK6!uXJdnpQOZFpgUlNd4#oK&CnYSk`5H0R+s;faXX_y?Kl&Qx8UuU z0-6iaudAkrKQNX+d~BrBo|L5IEz)a{CCw%kn-kkvKI^gvZg5p}TKH|fkfC#I2uWOe zEZNF6-SF%I-IW&_Oumzp7WN;WHHxb5f_Sxr4l946^h|}e!UG(7_hjV)tn~e+dJ169 zxm^OK7ZT*ehiyjW|HWGRtS>(+!~Qu+&hIEA?Vn=Yh{Y zf*>n0E#;hd8R#EGch`KKz74t1fRQ6fG#|{3_7~!eq6<&?L7zM{M50<{sA;(X2Xy+UJ_$-AtJm&p(|>Bq&Wv(31oJH)%yHg(*2PC zn{Rl=cu=)*?Gy-$apjRYJH*`!|dRw=w*y+TGQF%0Fz zduzJ!W|@L#ZTpsPBsy9jOR@7~x4S}tlfiu5AojY;dECb040Y0=YsimSz%7~toUjl# zvQwVp?HYMeYhczpQ56^o^dn*0Tq3Ul{6WBsW4DHDni@73l$J6THz!;7fhj~ZEVhLS z3HEc?(vj}60thD6k3o8mrn$@?S?! z^B)90eJ{|Z$sD?`taxf#4Ju?ec$Nk;q2PA>o)6g)Xf!Mhuk+R=eM8kwt!u9_;yEQr z0TY=D<-sUT@&elu0_Q)3qmeb^VZx@ADP%aG z?5J;s8X`;4 z()WtA5+$E{k<|lZyiIGF>oDAZ5}!^z^3)pSoKD1mU~Ojn*!bxGab-Lo-n50}&3Hn# zE_MoNrd0wiPd%jTt6Q2P2$8=v=|8T?FFE}u6Ol^7K^0*Uk3LJC+?Z3Ka^&uX>h^T6 zsuVb8HvsT8$BkuDDk;);Yzt-s!UScMe~W&F)_!m0%?)x&L99N^OKYS?`m;5gIF5zL z)BW$)?3{Y$a;#?{fsi|zw&;sV1QE;qj1eHXV~i_=CHs0Tk3R{ckIunyqKo^dj1bc1zbWY zqn!O*if&bu^7*~R$42GvXW@sD$*4|JPNAiYkpm#4oYP50aaPgZU95UmUx`I-l?u@9 z5h`Rfwr)H*`CP*ijK3pQJ6PlLKjKy3i{Hx$Ax5^V5K5^}qD}zga6M+WeO*{1DEsVU+ zye8MU?P{yOxz+vrbM}tI+!Jy$8cR&=`E+bUCEb(8oTCB)g9*qu85kI0_)H#KKxc<- zj$?7u6jtsl(brzCUR=}-#n8DG9&;!v8>rrbN;LR1-8`XH6tgjR;nCDae*Y{Qk`D^Z zxCJ3iXnG zNJu+$DoSYSMh@FeczXFi3zTWGr;ik5;5nFpEk?SuNG~5KhIv01#?nq0DGS|%QU856 z>!Ik6)d$SFHuEr}dmivt?ddfhIV@u!+-=PEHk4L-r$cQuO(kv|-pv7?xQS>dL$M@yC%7 z?e8Gd3{$X~%=*sYqyE?`deK@j8{GW#JGF67Y72(73O1AZUPS-CWX+z9r#vqp#O&yT ziKzq=jb=borhz57x8aHl@81l{*?GO3=L@x3m@M)MVc+5@4B}H4miIeE`#lDzLgZNLiT^MCFintuSYq57+eTzlz~2+W*EMr-H&C4&@z49h8J1xbUhG zN({hbD4zv83$Wx^V8xU7(mO^-JCeKTKzd=D48a*pz{bIi`QWXTv9CGQ(HTJ34!c(LiBM6<#<`*qIiRf^rimP*&9q|+R%ue^_tTA67TFbzgD57O)KUeSm zpNKabVjn<4 zECS$%RxGJTI8{QA@RsR9vZ|DBf!6u_gQwhzdEO;BObKjN_M9P~*r(tgAz`T5IZAsT z%+n$#@GCs(_HBp&TBmCuE(7Hkf;Qqh@{1!!1sM?Mh*S_DiAh#YfL(15NCG?sT#iN( zNFWSw$^$gY6o!I?3<&xvyBD?)M3A=c=v={7=HkmOwlco!HR_fO8KYVt9^h-l+lgEr zUO%5Q}_7I#h1}--TTmN=R_Rz*e4#Z7WJcQm0JEtCU zP?W)lwivI1PlZEM)q)xu7#2LMroHzKy_a-U!uGfBh8MWb{^b*&H9X5xZjrCHD_eo^ z^MTWFX`dqay_4CGo2x#bUm*W5DdSpu&;z%s&dZ-(fwIQ!R(Dv>{ibAaUj=1JI!{yS zAz4P~51s%655%Xzp)9J$HXg+x!Ep74H7O8;B+IRdK!)=iQFG#sI*Wteun8&ybP5`TWDS;r3I(J)W2wq zlIsWN;u7W-}>bXS0# zj&0w-`6O|eF)eS*C71!&GZDE~sbiVCNhLh218=J7AFwfqnJ?l-263bx2u?-^&U2K+ zlWE?<*f#xE_z2YvrSj-s=(pIf@I^)~%c`Voi)Pg!6LFWc-C;S$5~x9}gw<8?q2D{G zP4N5ULrgJHGvbKjSqxOGx<0a$X8HF#o$?4*(d~J^)Yd-s`lyX4FJzlk_H5G6*Y4+C z-X~!O;5!`^4@=(~$&>!gT!1bFGS(}bz5;890Luc?oEEV-r?#8+MqBV61{q6|4HC@a zxB%x9haS3r)RYI)aPuij0}}bUxuL_Ph>qgWpdb3oO08BZuAv|5ATSxV(2uD+s{I|Z za+FiTaB)1~6Y01eqlSu^w~i9~Ac!~Ov|iTUm%%1D{d1s}8Hajs=WAH1J)3U1ht8|Q zx!eOO6!Y^*pj`C{T;VOb0xj^8+=9$4cp99=Yz|MEkGoDmYOJJ+>XgOC;Mk-C$^SDP zm|+kh!&c>DDq?$&QG3~7D<=uQZVPk1KIB9w%x=iJ2wYF zVzZ1o=gjgUsC@~Yh{*E)yA~$(9yq(Qi44jc6>3Kt{WKo)$K7w`x~-K1bmsrop($Oe zU?UKayBUHWa`?E$OAhYhUt>J>R5F|l?Ue^a)MPdLYzUJ(YIKx5W?!S%o5m1>qr&4R z0iqJ8I9V6}M33qMh#2$&3KX~1{7mXZ`8YQqC`}71k2!t=wn~f*z2^pr!G8R0GSR-O zM@B_0f=_U0Jlu@mdWLV-eGbW`hgJ3{=aKFQR|MHp*zfim7|U4&?w<)kx8& zNe|VrsWfWJP4vBmh#Vm2Ek)8}&c%(rZ{N`p(a;|?q9s}O^{-tqg0wXc1wMiVaKndE3%{Uh;A zZUeX=1Iv3!)3&>4Vi3i=5A9qUBS)=!kQ=naqCf*!oWLP2 zclvcD6Dc(Uh~8o^3#G);8I%o$i?714Gn<(1bql0Z87kQwDhLcyqE@R=sQAQR56wBg zz0vc2!eKRgzDwKP_xM3x9J4vqbQv@j6UH|xeYVZW)swEKLmY&qUq5QP(iVA*Voq($ zXBnbrZIJox|LQ0O^cf;j1BXBR_`7Air>x-6I8K&;8odojp&``FaK~A5Hgj$4Wp&=% zSSWuxfR0;V9*2@q0Ge|Rv01x=lnR!D9K99;rdWv)AT^4A+2i&{dTr_BJ^{?1LF=~c zc{OLS?Tz*F|FtzoV2`FhGFoLWioGL3BEvnXgd{inxrd)zol((fT$f3sqxM0o`6-^x zT}pKP>zp<{>F$#p+Xy$W&m6VsiM*)^P?0ANo;!|);B+@IJ`16p5lg?GO5)t@E%mOO z02Pt)hf@Xb|CpfCr)k_J7%?cQ{|?J%z0EDX1~1rSH7hxZ&fcGw;FViKz6libiPO3z0~EWkPFnK+gCI;xuSHSJbW@0yVSJ9=KNRjXWC( zEfBNwM6!M4?A(9*cDOy~aq07-Y-U*dzasOe*3^3T>s}r1ZV?hV9FAG6>~d@#dxog> zB@^1%2+GIgd|xW49mlAs>*^s`H+q-ieAGwO9vc}c>b=$2akhdVZlsDOQmL(>^NlvC zYBTKkGg?Zj(-_>}-UrrY&JBk4-DHwGc`w;7cy9c%#H8KIX+78IuK;&uQo~`q<00)Z!xkcLIQa8Ez-~#+mhFAs`@KG={qq}(~ zAl1Q^_```81davZ&;#6@%X2rRiPhvycVOK!%>ab3a<_o{T0rT&_VW9Qnw~bj+`@iv=%!;`h0>LXYLg7` zagsktB&=BMlJO9=JMB5EAJ`EQ0D7`q@jyK3EyJ>*&;W2o;u8?a=HWn|xpn|;9uccB z7??aHA8jAD9SrAP?R#j8*jPbg!}SRn0*|i+uY{v z?eD2_)7YMYnS|0|jInLX3U@hPFa)xEk}oA$9c%l($+owPo9QqF(#h+E#C9li)t}9S z^{vdeNBGn+WH?|Szm$viu?ExMnY)BL63gBxz5{rNKT*`JY%wC>wRNJx76tB*!1>^* z!^=L*uhG%DTgIE_x1Aa1=U9BfwDs=@#aH9SR~lsVXkW>#kyd2j;d)hGM#21DAS#hc z15DTGJTl8SO!nVIpND*Mv1*R2$-`<8DFO_AK_81kEbcfqqceSf2nGgVtS$xt3W zEIrLtj-a8w7qeDz{?VdZ$2H0&yA)T%NS)$|Z}CqlQc1O6KtnK7jW#_x@lylLuw~?l zbffh7{UbW~HhA|XR%F=JB*6?XwGh-7HQ4Ob8NfGu+p_5NF5xTK zlB)V_O8q+u2OY1xLF1kTeE#!mg=wqZw|+Z9NfPkB6|D#GS#hz`Y{J$^wu@B*(an74 zWyg^kJGE$#j8QXNYPQHd@<6#+n{K{Mf?aI~ZBZe&xjqv};@dd^98W|@aIo3l>Gd>L z50+80=k~(1ez{xIRxwC5(9P}aE%MV{;ZmHKg>Ce(l|s85J6)#en zvQz}9B{HzD^8+5ZfzqyG|AUD{g{w_-7jm8yk`M3vgmtl6HN0aJW96#~dU!cZ^Le*SAg4o$F-PAhWZ30^2ia-)<-|>b;cbra5j*%_j}hTwr(Ht{d0W zi?tOgzPgyV7G*z3n$g3Gj2v0OaVe#=ccYCPEqe0#kQ0d0LM2NcU}e(exYDmDs2g)T z&X1vuuDq=w@Wh|jqoUg;uoB>$K&3AZw3HlAOm-qg(2)y?c5p6|>^9cYEP)uC&C&9Q zFv_r2uF%dPi(6_^OPTJx|CVm0ZdPnfgROgIXWZwb9+Wa9J;FzX5Jcw*gRG$PrKbf=)6k%^Cc%9G zgz6_^?+eV*bsK8L^sdHb!nupD59RKPD_liONb*M~z*zhyG#oHx$lM1F;zj*J%>5BM zG;)p+>(BcL2mE!aN(LQd&S@-jEAOO4tW7i%oIswIYU|OFvO7*O+ad{igsEcYu;f}D zb{!3hAVcSZpNe%AW;S! zvjibwc1@2a^Y>QVpXtW3Lqya}Gb%cIlZ1rj(>j6U* zAT*jDa0mzgzY!0p8tpOJ`IHQzG?wPu{&1WOSl}ioKDg4mi%Cz?Pj-X1|Fo+&B&dt< z$ca}Y!;(1OZ<>qWz~`O8{Vv4JMSXki&Pq^W^aS%hK_l5c%e=s8pExKeQIZxWu!#DH zXBojs0T&7Rd^QN3do;vzS!LKTYy$4S#B2?kJNFs5TTG@gMBR1!NZ^R`c5_II?X(Sy z?t8(GkjcM!;iRlBM0l6rn}?8J#r{v3&{VSz(0A`gW0YBa@}Ba@9;IL<0=r1$sF~A6 z-9)C8C3Y<+a>$^rKPE?pyk6^Yxr!@tTuP*ut!FGeQQYwDUMzO!#rqBhsBZBaL548WFg` zRkY>Fr*M?XmRi0IV7i_9guJvFr8ZO5lAc_Tc+qyY%|~SU{eb6=;pV^IVRi;_ElwIZ z3cT%;dMWy`D1^&RGYf^m9y($)lgf<+tHRL+y>djG>E^Q*V1Q{OzB-i$lV9UuEB;Rq zs$@UKVGQuVJ{!*gIfZEB)RcG3XZpw2Uec*x=N1qI{sDu9Z|C*gm9rY}gb2)?y z5qMeJTI@N^FBtw`K7hM7m_pd)2Un*64=R|F9HNCM8&6xdME*a&P`gz4T>+wg8*H1( zGFAYM#n5^S(`owaYj|@T%L$IJ%Hh*EU{5Doh0|a}RHo^T4c&#^j4KZC(%Zps*h4CN zzZ%iJgRTXo#U$myfab1rny0!&vV~DtrLo3oKq-!xPb<&9p{LFv;xwp~$5k$J@=d?X z!PzFDt|BBCmo3!0+AEpXrRJPjri1~;R3YKmGu!TE) zuAM-iO=v*! zL@sxR4BR@CqKs6BaCL3=N6-VEQ&8E5WL9+DuN>zGCLr|)$_?364(PmeeywGuB$Yn&a`P(3gdpi9sH1iUK4WqL|#F;~v0oDx^$YZToe7+@=r( zp}I2+t~^WGy1~EAh|HidCt2OiYTdWxuTL+eu;$_GPPF^y*BasI;rZa_imf}ke-Kti zykaV~PX|SeVGBr6Vrx4m6UfOoySfnbh>|rgG7^Bu&f8nG zI5{ViCpFwWspBGM3SY3=o_xPq$;5BdOvC+Is{xhJ^W#ewq}Vst(*+iGX~w?tCs){e z^j_oTl+6B>NgKXY5(9u}i|)c8XZ3k^i?{$B&NyUj4=GI$KgNXv)ln{!KJvhUu6q^VVRNNcf$!Ml0ApF$r2WM`nrIzvTt zRbZG?tJraFcl?XrpdtcdJ|PA$Ra96vYQaGSM6N-#hcw>HNWEuF8<_A=&r?^<_-3;P zMhKld)I~9h;OR2TVB0%|s%g!&Cjgi9Fi&rN69pUazxXpZbqI(zyE21mxZ!Nqeg`*` z=oh7@TzZ4}dc>mW94ciOK2q|!L-Fqktv>ye3YPO?0Ipa)!8ZbcGK43<{@S{{1r$Oe z^_d+uy0Gj0rKuv0F*7(EapgO)p;6jJY5!jq!9T3Evy!FHv)1d91_an1%nL*-z%aC& zEU~8y-Jipbqe`$f$7x%!LFd~fPsFI1XE;4n6D-bw%fffx8T*^ZybUM#q3w}|q-eO$ zXgWf}t))n?CsXf&O%CZ0c40}KY|^Hw!%2Zu5Iv3uMu#>>;6~2{&3dJ=jd;@uX&iz) zJ!*WTs$cb9=SI3E5Ac=t00qS+`L<@N=-Q!1boo)T04_UQPB+3ejo+a?$1us zT@{8hV;tw$jmRrb`N>0_@Udd9kCD0kK(V^*hOm{=lyk_fqo~9VSL z=O-XP4NSd;TZXy+-Xs8|eO|mF0#NCj^FE57Dy2jz?RzH6ZHbwR*dEvPspLYHA#okN?-IF3mjZ_@L0m6R zTe-H%^8)*4+FLj3Gf|Xos56Y!HqoQ@eA-EDsZslNIV>3YOwBLRxZ=kBh~kcfs>im2 z@35ZyXZ8rYSh#fJr}u^%IImDw$(;azaUtgfZ7$(JVD;dIRQ=B=HK>I+b+wb?(xYTv9eNfxTcarqWhh%8nBGmh{=JfVn;2|DwsABXoCaI4{re~5Gs)E z3<({tZL^IKnk6^Aj0@UwBoh+Y2FbLtS1b9*HKUq;a_ZBRw}%e9vc7DVvI#b(o1Mo zMN>QJCCKvue$+^DKx?!b3>`Q_pV>zQWD*Bn=R5 z#k885aDjkTb4HOW;7zB92_DSU1C$gZV|gVLZjAE(kcMMWEFv%cgd~Rf4mlNdZExEW z3Ftzq;`J|Vn(9w<Zy`sN z>gMui^cD1Ygt*pD_IA1%(5>{Lf)woz{30sEbUU*}IEX{LiS+hnpw%2pYe$c!lSiE1*i))=!#3&Q&HijVnE3+;GJqC`}j$2`Hw*7(%d$XKv?bG!ca z$S1E`QZ%%knjlpp`K>HaC3Zj+L(#^gJ49bEIi3#g)tFIa$vm(et_*=a zG|7ifhYS%3zkS_Bo@&w_RSchBW0&!rKqdl?VqVef3r}#n9|h_&g`rWJYC$$4bN9-D z{`@jy#aLbr%z@yRS@2n$^$!%Ug1*t=O(m-Mg!y<#$V*A=2(UIFD}E?_GRG8*Vf!(0 zbz!ePvo7R|d@8KUP4AMZLd`Rb#G0BzU!qU-EsoXF#9*bi4d{$ul{o;Y*q;tNQ*CIJ zb?|^;A3Oubi;ELv;(LIxZ$n`(*p4RqC9I)=1#MwtNhDBbK}_<+C;yC1j;X_iDY70s znPbU9>0y|uN1aoX$5f`5ydpy-Ko>IR)EGpT%fq$MsFylD1^l7w%l|`fntAo2%-i_L zt$Ghg$J+{P6DkiI1~fZ-5I{C0OL*tz+(=}&T@6zHnsGR#hPzd=Iro)ZP6xue$E3zm z2k}j-ARG(Pwx@I%$AdOy4q_nTQbmVgONNAN98^<+$eoC66qOjL9p!a7liZzvdv0aPRy#+}b=N@I96do$_xzt($wvw>h2qVBhOsEyjI8X&Tl453a75 z2@d9u+x6h=ss_l%Y=gD{sgpw!TaV|9y<46qJM>1Z#4z}EvJ}zh64~v1nUaDOT{P#m=cvfj23`w7tg!Jc z+sQ;`%d;16Xo}d@KQl+Mb!?{8XnF_vqQ!W-lX7ma!)nW)9Wa{2F z&==oWn)uVpJMWj)#OP%zH)u4GY**@4p0D!4P!_BMeD~5-3zSB^k@gm=CInmcm16lfMgty zEMX8Juv6-xJ$Jx)WrS5 zr-Shj^=~|b7y4(-!X&)e{8JR42Ui-lD}yPgIE+mTN0>R#^Coh&t8PTVOxv0vP6TE7 zamU^)K9!X?j;dhF1R4-FU{^8W&@9PrVZ^0FAs2A;kQPWv6W-^Bj!+Uqo%m*|`%~n6 z7dLu2mzsZx2|R#H4_h2#lW8GxzAd0pqO_|8kW4;WC#xo}5t}HsQO9w|b)1NhIfskQ z3=v!=aixMtO_2;FuIlOR2tDb&+Y8ZB_Bk^=mY0h30(PSO&6Asjr=ab2gx5!u4lxRw z>ax%Pv1WW3_6L+C1<3P4B-qp6j-z(xJwkCG6ZbZWrsy4 zB}m%Pz9+LoJMzvYJ$+)jW&F6{`x2X zLWJs3PZPgn&s{?$d!tFb-*eLCRvcVip!i-aWuTC!ch`5>m!>9lbHc{nq=aFDNQu6H1((3)03%yz?8QDk3n5_cTIY8whE&V;hQPt)MmHd-haTj-= zr_9%E$B;7~$NU^q^Gu@1!zZVG5uW2&&nSILzW&o&dwJe~u|)*oJc2)bvK(Bk>zfip zh;qsqo*&E=QsrQ!2ujur2P3OPf&o$8ylCZrcw!WgnO^cVz4c324U#ntpRSe!-#$JN z)TxptG%u!aH#A-cS>(fHk2}sXmBnl0(As|qjC{Jlk7fl`Y3!H>aOj?b``%5AE#UG_ z54CEu3_gyVaTESOD>&e(Wj}~&VVQLq#__;+Cpa4@u)G}S;MA?&xz(GgKF&&uJcK)h zC9dg=?%1dQo<@Pd7z?i)o8rQOy~RCFhTDV>!_YTph0{|@J6IGD7O9)EiBb{qaB3*? z$fA<<(Nv~h4+U6a5tA(1SN}RZ4XV*=P8te`{Jg4)4*EqJS_@@yK67|K9uqz18^A6% z!U;elEelS%_?u{XA*$qR03-#QJQc18fxiW7VBz2#TFNK7KKIElEVvV+)_T0g0ks14 zHe6aJx%7TVta3ZlD85@>Ejt!gQBRqe19<1-)Vu2OmPg#v?V0D5!URKj;1)f`B!miS zcsyy;S=bw#+G06J@x5XNKd{8!(0e6U#5vmGK}T`26t{;2m_{OYgXJP&rNV)2fVi*p zOSr=KhgP97Z3AQZ6(&pqFw`L}Q^^I5GkA=sQtb#P$#R<1Ri036u2N?9vuV%L*--}r zy*B<*QLK-MDP|o;RLqX1Q&c}gu)##g4=KK7D1t(L6%nE0pPv{)o7Fy;Z&PHp9OGX+ zG-l~jQS?-@fMcO|mH%tkn_tE5KyY!DW#T8hfY-Bu&qZ`XL#~%*LG9%V^73(ly4_GP zGK7rZ)d<}=+6%LpeItE%yC*S)NgzdYB#@hSDanro=Q0>&rZwJEZ~>3F1b|WRL06kbd+iTbI0M#kNO3;ZWIF=6*KE+WxUPU>v_ax=RJKM0yxR2fVDR&voS8GkN$0o z67h0=H}AM_7Mo(t;YItCuP~ZWz}$ zHOE)aLKA6gSIahVB2YN_94j9H&Sa0Hconl!?wj}iW2Nen_Lv$> zbY==Yeyfmqtk0w1det9k0D2sL)qG?^D~T6Ni}UM6v!2qHo;}vrH-$C$`DHWa_Ur4c z@RF0zL3hbP?3F|F3tB6Kq+x-r8KnJX27M5a3EpnJoNNijhz8T% z{GllH{Yl&h6z&hk=~Ze($m}xocHxLecn+-iEUWQQLE& zKbKgvJ-kssGAmi^FdGBW*X>1;p=sCj+$14`%(~TYrj#W0yV$kI=SsHHbBCV$@eS7! zR^8Q5V|Ac5_w$e6Z+3o8THfM8|r`_cDVUy2Q@}sRxb-Xh*18 z*Ga7$qm85EVkCYRuxZl6S(mI$@d{0SML#}y;%`<9!oD9b%Z~8aej2*P*#Pd=QE!)B z+v=&e{C)oqw=X(WAe_rTtUkXz^uRJ6Kdq^B`Z8Q|O|kJ(oa*@oyE{*7d6B8fO1N81 z>Y3}>I0XiG(Di5H5Ly3jT8UzG3D|GvZ*POBQH z{S;v*Udg?h@PWurD2xN<*HA@2lsF87cee$aGnG`HjJy8}&B0{#sm)f((U@4`&bQJf zl(@o{2o=KCWXa!8pgfxNp3agQdQU+E>-M5&PhsLxo15L6FC(M3x=wtkzstknOVa7Z zJ!LsgyzApwDJrxGLO1_G{M5fK4siG1?8yj&vkzuG5 z$aI(EAFhE==L*3bM~2;6i~iZ2s+mACUnp^y0+WpdJc0QGd7h-oZpg8K5TNb4nr@tc z>df)lpUqSBvglHe%MbqAS7vY3_S$kpJ zeMfM!9}*FD^u$B#Pj4`%&n^H|5jR;m&ixKy7qVcy?OKjqrszMs=>{*X!%@;N9skYX z`urHUT(E@HplZrV&Q8XE_g%wMv~PAt%=_A_Hiv78j2AUst@(-%cTl zCcjieqNze*fK@*Hk@^!;fWDulEh8L3){drc7GIk8x!Wd~tQAd$!{eZ0sXWJVi1Jy_ z7XGu#UZ)}%nCx7JV7s8{8QkP#YW%VAeTJ38ig0Dg=`RuavY!^ul(&87p-t81)Tm<$ zWSI~!u@ZEOHfdHgSBrx;aj5&x;Ab9}ZOgsVno|VN_JxU_9*2GwqR;%!6Ta5=WuNJ;+TA-NQ(u38%66{1Ge|qQNz78Slij$AEp#!=V@BVU5=_vG@MQBs%~#VQ z#3~CSp*STes_kjbtHpQu14W? z6V-?DKJ)9j0^TR&g~ zFWy^srygb8q>O{8IrSuAfiz>;p-@WJi7~wQY}*Jyc}wbU zoWuhXls7Uber{x_{BE0Wa56`*6Ol)Z2VjF(@k37qBypdI4HGN#KQrh8D53#8N@Q%Pa!%i-uywQ7Qmb@)e z21l(}e)f^~Y2inXnha>Ll67EI+7pSamNY+9e+{J(0WTSTR8%bvyU;8~8l+9}-Bsg;_pOdwyzPh?B zn>0W4y(_Llr>l7ZxRa?4KwQ4)H0Rg~WQlR35r?B#mAFDJ;zaIoiW5T7JbvI=@r9i+ zYpEOMqJrER^yS7N?otP=?yB!K-h-6xMMi9L^W!Kr6%5< zG%MVn5&-3(wot$26UY4U1Om_jG0fKejwz+W+mxSirY~=oyQl2CkxunFQAtENT0}!H z8GvtLOe^s{r5Bj&qgFy;`xDp;r&PPUiaf@^fnK`C5pa6?=*WCBHDD#vXh^Pr^FNf7 zR!xzVfeBo;bA7{k-1~7Ki*PnyYVQpSqTh6YDC8%DEQ3;))ycea0uiL9sBvc_FRr2lUhOdyEodIP$}= zeXfFJb@L59d6Nk&;94>&FbHF9J&^Ge10Y&tLyY9XIsAAVz)brX4sGWfdryaG7fu@} zCV=Uace699;yfJptFFv^m1CF`QNcAzpsmj5h7dZd;C?U9u&`dLMNbUB2EoFCD4tkI zra+U}S!kqD;Zr6mC86-_N>tUrcidae6 z4IEu0=rS2aYY%}Mk{4Nh5JYPu4)#gHS=PvalghQ(y|MA=>R-Lh^q>+?a2D1rG9MVg z=9AA$%*Et>@oJK$Hm$E;5XW*zEM}~G>ps%VPsO4hP#z1LNP4xNvbF^^R_M+-^$Uo< z*S$~tRnI*M-WDR>_S^ruoXzTTEJW>fJ1_ZnA29f9)FitIUq;c!M0!ujx?;tCo*MKk z9%SGqp?Oyn8Vn3P+4Uf`fmiyBj0Aj$r;CZ&qUqctv$HxZh~e%=%C7)wvp{s2)A_!v z`2CB4Pr)w6$=K*YZFyvJ5F^>+NWlO#l5Lc%EJ+@fE4+%{5aBF`IzH=Sk6xQ&U2kw!Xa4PYEou3}5gcy7w#dq^Wj z`a%D+m$=8TLmmVHZ>)^_lOgxZPIOHbx3nK~=1j^>_y@rVQP#}C;IYIPmSvc=>=<#Z z>oRbxz}C;=H{JEy%%8#-%x9;H4&6rT&J+?Jq?}b0lE$HfZI!6t;X;N(&G%$0X1{!- z;_h4`)E!(4I%=$K_G!v@-TUSnL0%Xhqi!(>_;ZRoP`m>7g>ZBRrezt0aviSabM&N4 zy8mTjaq!;!#YeGs&+byha6+%czo^I+#cB_kzE0Vy9E&j?H{5T3yQXnRiXxG&^`~uv z06?u>CQFDr9AH~Z8I0G2R0q~01zKRh&T)bXS*m*I%e+$Ac{d!c61WUG^*z-lVA|HK z=Dv?$u5LLFdHwQg!Ulw?9`bFEY?M(%SH3NS$b;EU$mSxK`V!s$+Yas^1x_}-hhWvo z8`T;^EfELu5>7WpB6frl#8&QCK9l@1`;`SQhoGE2fE4P$5`@YH3c8fRqn38`18b?= znS|y_okxp&$0#&o<5O53oIMxxaRSOJfs12dV_%1$h?RTd{*hS$zQ6;u7TWxol4$<2 zo6Fg!Ha1c!W$TAFKoZvJ^6=CiW-}eorvX0Ir&?afQ}lXf;TG=!O6fp^@DLiAYQxgn zDqQ^1>VMi2qIgI#44@=5tI>jpOy{v-$T5Oo?JT{KqzzG1F4^tM(gs^dND14u_B1>g z=|IKab=En)+u^lSilK{`0jed(0;lnBkWOBVpAY@wB{EPJQ3J8ZY|5=9p*1-ZGqR>X zH%b#y;Ve+XJl>uOja8Skisy>F)i~O`QUY3N1A}JazXO1qRQ&=Oe6$F3I;4g}N94eo zDeVdlQZ+VfYSlC(%%q@vgG{|weQ6SJSds$hcKDvZ+-q+HCVt+p*Q&!`v!kQE5`h~4 znQ_LcIOW|)r;J|2I+5ywmxKo7{JI)L5+}54VM3`$O^g`z)+-@_fP=L*t8h} zokJG`r7Y7@B25C#Y;=7k*uRJZe(%G10iGo4S40)x_-u@z@2C#aF+9CjUaA6p_$XAw z<j9|83#WlP2#gpF_m3XD0 zIA^257R@=4Aoif=qZF)Ln)h>adIID}z_2cy!;~87TyP7Q9g6*H_O)_SIrkqa5E327 zK0t*wn@=~bPF+2aAQuc=aRyPH@9Qy=?1*;3d&Dotm~Hn z3x?w7yQUx7D+?qSuY0NCA>H>d=MKoti&r6mCU%c2spw{T=BfN_1Q2T`c^NLgl0O7T z$;)7vIz1_sp9pPX)`ZKhO5S7}&q4#OKD@g?dX_9{NAajL@K1gaif=qV zEe5S7A9%W$Cr;;3hl&`T&)a;_hnpMI;}JDg&$+j*KRm2)g=IUV=GW`#TLxa*^fQQe zY9lzF97o(@@6MGZUu6AF_D^rU#nHtjHH5m3+LALNtwqP8zWP0}Q0G#JPrK%jab0Ym zbT7fDW%S)Vi08~aoT2r;MX%wc;!Je5+*C;iu3^mU_$VI2iyRX`({$2lekl4Ugus|S z!Dae3c~BVDvWKGn*H9Mao`YVEBA%@kQ^2(*Vm(UsgjqhtOe|N2fg)zmd*2@SPHI<8 zQhUnS7?4B9S3Aqhf(p}`jgs{Fhv8f`C~%y^N5ir=Eoy5IJBbr%y1WOLs;|D z$Z|*ETT&-J^knk+Slw#uwx;|i$5Kgg{^|wD1|-)mGnNQFd_N6fn>damv1cFia-E~# z)isYM@sVKb=$Lh64)`JunPc|?qpaUBGo{&LHBQ4KQD4vp*E)7d zNkvi?QU@2=J)yp2e&Qosnm|MxsgqT^`$4{p6bM3okLU4b`2qslu_LIN#evIh`bdV! zD9!(T=)~n(&U@cD`Lir{Kmg&beTsYpN;bV5Zzvy<6ZeCa0&CnuqA+K;WLda*5k{En zs3Tk`(7-PK=n+eTvfxvWz^_%e;{g;%k%LQr2&sF+iZ2!wz?rfNFhmai!6fopx#q$n z9v}-kMe2sfKUM!TCV2+zA5-iiN)-_!)FK{2(N-LF7GbFU6#7cH5PN3q`v_(wMCofc z_~sj%S?UG`olXL1oM!v#lf6jWY6~VbQiOgv8M}&p7>|H$6?M^54vk@q37hq#@QDit zt7i1Yyr`**3!jOBQFkNW<=?HFz!!Z}F)^t@M*ya|a=HK5z12V`^twT;h73_lG z7{@I?5}*4fT5tq;k2tKwtV2sx2T_ZS@A9YwQI^DGQ5MEQysniJN!O5h3@cUirBBLI z3prpnewgK+#+&rD_0`mDlRGj3LuSA?L2c4fCcRIn7k3ulZfy@Hm!p{{54ar}iuKD03$Q*N34 zS8?Eg872+A$b4XEBPrhe!t9B8163!X9u!c#Ude3FGMj zvarWL?wOFfSV?sJFn&xyrMs!~!Qr}Yfi6#@Bf<>H_6(e~|^sYYEA z^U{>71S=v(zT;?|pWqaCWHrYigw85exm|LBRguO#p>98oi)I_d*CZ5SHy3=WJrK&7 z%nx6)GVXA)K-i@Xf5c#LgD=Wb-b-H8{2r)5)oGt{#P`dV%moPZdXln5B9 zHu{d(6C2i#oOZY{6m}1GSoa44LV_cm4i-s-!yA2gt*3y?szr;$)t%TA3j41`=ZecI zdiF|(cU^D}e=7@U%l;}MC^m4Psz3dy0d3(6iwkTUqokL5Ad%K!GvT$D)$9aMP%bMi zib{?&l_vl{`!sQ%A?4MlC(=RbOCWHCJYbof>JFtp`1&cJ2^hMdqUH4+Hc_wmai#9?I?Jsi8C@euGr{w}sA7M6wn*^@y$RUEqbiU=gZ)x{x zY{~&Oylj>c1O-Gu3Vnfrb+Me^m5ikXHG0bb1{{ zw@*k5^F|)Z%56y)kRnGzquWb4H_kM^%P9pJ*sja`&u{2nyG;78kWxJ3JWIOe&}z>@U5MKtZ|7J+y&iTc8it&d{2bMviW{=D7=CT?NeQAm+E z0q+y@r67`Sr_K2kKBDJ^fk{ZQDbD8p0UM!&Hy1g^4hCWyl>x9pTbD>=DnNOOD$Kw? zOB2y05C__SpSguBTbp;E@fvmSP~v}Jx1<={LUSqXj&;+kq_Y;jgT5CYJv_D&TFB_C z<*9q1I_LA$=0zYO32WL;0p^yz?Ekyh)Dq}M7G~0=1m6e%nk#QS0EC2Dis0-Krk25%x#IU=k zk5{JA4eR?}jlUTV{j`s*lO7M3u}{m0=xY_O$QMTDx!zBgyRXZOH(a{8i+KS(S85UvB%o;JnNi2&W}Cj(v-3*p~mx=${kH- zac-wQ{ZLn^>igQEsSHpfLw#p;MPE>jm7`k1&hldUMW*acDQtkEqo+{VVWJWS50vnd z&}B+G7^Skl5z`{;<2qhkMVd>LV1KgIr%uHQ(Y6gmKm9apHRkV(4$D^2LK_Q)G0t49 zgGU@uSrDlZ+rR7jTbXD9nANWWdDOFvMB!phTxL^%Z+*nJ;UpmAAWKOI7hhr-6dL?W zhSv@^Yi?^tM9;4+b>hl;s6gxIcR;CQD%4gQgKp!I?{+}e>Pl@|$_C}B`)23=ZW@cPqG6?qd*5}R8W(jm^%xRnX| zo6@bUT^anz)g!G79gbu1XM=pe0t8}e`Y&L_Bt>=v=B0BnEtPe)U+GN!8udutecV89 zCaC_U^@Hg%RVt-vMp~~T!GfR>DVZyw`I_3*9l}jZ0v-fHz}dk3C?&31qst91C{+8? zE$)4E<_vf$0rTEQIZu-gV0M?@_DU9x|J%oR5TAg;;(SOe~o zKo7ngF{oioIP7-zFbWFcYEfi^PlYE5EB2FReE;Mv(7Q#G2W;JlU9O@8szy1-SsMSvYKNk7?J2;y)qZffovD%F~iV1tS1fs=H}^9aL@nW66W;Xi}Z zbG_5MzQF0W+iw4xo(rwE4V6fO8{w-bt3T=ro=50e&k_ZoyL~A)Cci#dxpa~AX_TCJ zOSbTF#-d~P!}NHzY6@82G6~F zQRGU)_`^Naz?MZ=)DH-EaR-Zj3OmQRyK3_J5Dy-0^)fUN5_SnU6eV45Tcg*|9p+7V zq&$Ot&Km|9wFgY2<2AB* z8o8>Od@Rdw3Mvc7sYPGz#gXT4HM79b?cBaO1M;dWi?mV9*>H zPSzs_8WsFOr^u$mMGZTq9$#!=T#}nl@c1a$BF3*0y=e8sP=)q^2{*}ZCyVvQ>>^YFD8`QLsQYF6kQ4lCn zieJHVp~Cb=S?OXfrw?3r*VMkMikYREtuZP}kr@6!vakdlF*Bv~%Cc=t@sTZ|UJtBiLZ9EkWN1&O{6DTj^(? zop}aOD^!&xn??bvOU(SWo<0L!`>2P%85)q%#cNu=&RTJxU#OiPQ?C_I#KM*U%>@sLcwlw;aADkrGv#`(J-pW6W?1Kjy4TkfP zNzq1%2pMzxF<|&yERC?F^1G9q7a)kaUBp6&)PMkuf`_ zUD~CD2<$X!i#`ukGCiDJ1|M;+?g){QgO2U*|)RtA~p^$bCsR5w&(XT1uA%}Z#8D4 z#|LXwK28SiSglr7M)xLOmWd(=bh+4W{0^TJ9eKHiH&aE10fAC(v8#~AU9A9&!Gl=A zp~bl?8(B$V>*ygHg()n>x2YOpFYu}<#_p4kJzKgM273P-c~XjkE4*AW=jWBfTo>+^ zd2|f&koj#;I&V>CQMk;V|MFcVqjcI2U)Wcsa&~XVDLzc>Vr+{E5i+u(T5=rCF1Fjg z(Rnw(l=X*$EqTq*6s)B(M*Tv=({hhzN)rNK$Z0sUhmkOL!sXUo3UMwRBZpT=$~Wec zXJOF{7oN|0xr+4$1H;`eqb#bP>Aj|+X0^#O;do~o82J?h;vBO!I3Dz+tfn>pwhi{| zI`V94KURnf<46jfyJFc7WK(QpK%J+Hco%jIIK0zOole7h8lVv+ea3iidY>Fvh>lwj z&LXtG&VaM|D1*beo#uTEJ^?`29I-AIH>bd@^P-R8am`kT|oHvdX8Oe??=(-8IqY= z@WLoLCDfE3k>-Rrx7Q*NolSe zDAjK;RP)1cel59TSV>+)+#~`WjPl+pnZ}u=$+f{LR81qWkW;t))O-?4u56rUUyK#n zPQ%y&wgh?ehZMDgf~TyS1SMy0McEF4d%@`tOo&eismR7o9- z%MYwi%Fa=;0~C6XcpV6Pb(s58+D1`z^iGqRqq#-@aazvJ`6ZMsJ@Ansuboi~$A^QZ zSQP5JXy53Y2f!sf=fC`Y5G_LgyapDpR>;MZbc&0Qjud&Iyd)C@3z^{!xG4Jrr*at&hs|qefkHFgt9q;O>*4oh|ze*<-7di{J(qM&IQ(xY~R$pQS zN&62%WTGI4&34m-KP*Q0B&c7-_z5J4_q@_*&816&fU;6VW5NMtMSNp_R{=UEsxu^= zr#+mi_kxtR5OER#6S-~{M#XM;I~r^7xT&_O)LNVcdmu$~;SxIOlsvS$Q)iY$zaEe? z{rcn0wso4hpm(AFQOf6o$pf)1%b+@YCg_G`PiG=dKTltN`s_4NDWix8+l)k~PWe(u z0IC5XyGPin8rWwNn;VwUiT(+YIf)$ZpW|3{x2%r4YR;=HZd`Dl|1GVRA}Wvj>EUNW zHTTVHwLT~wiVdxEZ9=}e5xYt+mkQqNcW7|Fk?C%ekZhUtzV=6y9ATL_#1%5JJ^GU% z{#M{EKxsPHJz)v3(vG;V_64H-yNYMIG5cP@Dp1~ab$h4qzW+@qZc*FR(EYx9c@Xyc z6;caU_&4HCaqD&Q_EIB+g)fJ+Z650>@tRj&h)q0)p3?;N`IcJn0N1;}-~!0)6k!g0 zE`Q{>|L|P`*|ACBVCD?dq9_BvB3WP)YAW{eFhS*)Txqrm%_BctlblfVxd=~52*LL= z%2LOwevZr`nS3Beo5A4E5v(wbsfuGTBdHW`>A0h|S8(<35`wObpRiJyIbTyVGZ;5a z4Ua3o@LIg!x)V{QXsGoFE6r}uYg)lZeDdBcB#X!lVT-Z2%iu91}jO^^53H5@>_~#*5$u|M}G}B^9h>f<%$Hl zW_AvoOqPX7SeVwJYt=Y(DTE4CCI@+*LwU)pS!&(C(JA(^TtJFyr!Vxor3{{9PEZU0 z@t*TrJk4;%%`zrwKs5i%Cd-7zzC~qBDUc_GgXjrSh_3M%@Y427I1l0-E5&i0ny{{n zr~#z2W<>(j`7CR5cSbD1Gtv(Q6N@tPRJQ_nR(FIuB`nY&PXem!F>j@X?>QbW;`8&$CKZt*uVctl202J-f$)1x{Pdtm?Yz~+)3_*g- zNO5l~WzKAlu)A>`bSQbVDPdbDR}cr1O1~O=qAQcvUNM5ogd z2*myf+-pkU%ko1^+SGl~{$K+mg^xyx-PRE0Q!tp->XsebfBWV~4=c#1aL{_0N?ieFdPn{JauDoI>E^uzaS}4(SkS$ z&XDmB37nXXpCvO5;Iux-?`$<-;kdh(*a+{jPMcl-F|gFu-HB1esG;}d{0PBH$f!jO z(gJYnS9wwSRrg#NHI;N8tG_d0o3)(Sw8{^yYJ>B*1=~1weX92z-^)P|L7={T+!*6` z>Q}#64z*iPsEwE?=OciH)*Gr7lWVesygf}c7j%Wj1a+#Sx~$X*pEQO zv|Xb(vEN#$&hR|i`zuhtXCU{Osk~W1wvN9gOd9a@DK!_)F*^c!1(bOFnT3a3-=*wu zqRWX2nx{sh6=c!if$>z$@%ntIq`?eJfQ-57B^PdsinF_G_C}s~s2vi&!6SSkh1YIi z^pf>FQ>wd=Dm>4B#FkA}D=F2rtb27+0-)7ImKgkqB#BOqWt3co@Kho<1Z)%dIT&yi zQKhiqe-Y8fo0g?A=g8LiM6S>dir`a}fgPfRXg0cc6ir#e2mrf6{XD^ZaPh)1oRTCr z>G~8j{}dctZSZ?>_D)VOOJj5!`~Vo>o|J+y|3d(v$%W)V7aeZ)|~*+dHRXOmZeJ|8}AK6*DG_ z$u`yy`f6c^HF*Y44>A*wd~t?P9FXtc06ebAoGd{Au3k1+--U-M&wyQcsId~U+9g?U zh>7S`7K0CMeBCmdtM%!N=2hq%GW#M29n*G;=2Sp?hhhtZppF&eV+(#Oz;a#f91ZD% zd~z|d;672I=0?cby$9*P|7h7veG?oYzUlqB>(zgX5uiaI{6=w>5QGVHb!yyIV-0VX zXApe=TV-$%(egDqPC_dRaH8mt(~8E+5ebxxvQ8>kj>7^{)3%*2$#A9;ZPbn~D{QFt zafW@`cbqLu6?FBL6}?T;_m8*k(XTM_WwMz}*K(@fPh)jSM>>(Vup$mJv2#LY%ZhI6 zj6ili>1Uoyy$qViu3+yU)XC?wlw?GEQ;d} z_IrbdvbPjTZYUZ(*5Ld^MKT~`&s`E9+G1-Nr1I}*600b20gBaQB8TpjPzGhgFLQG5 zyH<3Dj(OA=7uIK7Jd9Qe$BHCFnPHa;=7a z1pS;ghTAw`xctCdtXu?gePnn!I^nG*oEkERyJ1k891yP(frnr3UH%4RVfwz*HnB6c zim%XC`hQq6tzRbZ5`GHDvO#%BSh5_&#Jmg4Jidbt;d62sI|ZJvqi_2*z`)=el?|}` zep`YFfSWXEP{ZvRq2)3=XCWzMD#nAXa?)_rU0se}%-s0;D4av3%ViHkYZbZ#iKEcM z6wr5lp$#X{-^R>W0(bghQ_H`s^v$Y8iowT^IxEm>Uv2D})_IDFd$;gMYz3bL7ZSGP zb~}$9=a&jXjXT^yHL~dk*Oa`ZDQarvB!R#mp$2%1a0tIyL2JfH0^7jbgs_x=&3T*K zA>FShD~YB+!&~xVdQ~O94|PVVk)wPKzhgE8{upv=!7d`<*0ugBT`mUB-~u6NvW{cU zJCVtJS^QBQG-2F@?Mz6rzC%QT%OF20OK->WMA?O-d!m9!(kNW$IRJQdJ`hIBmP0_C zw(rU065#j|2-%_it7r9LWomOank*p(B30RUCpaUOgfP)rJhL1zoIs?%Ry}B+3$2ZV-d6Mbg=V*m#P?`N%jQcSz>PwBPLLq#VO<}KS^2$n_ln6$`X^kpPJ}#w+DC>ibSx=FE+!?@Nni9d5fV`)u)C1>02P4tW)HO)KhLUOlUDtc8`*ib%=HpTxf8&{VLAyTmA`y_m|Zj-N`sj>R~@ z;uw$dt5o(NG+@hn`2c;S`N?cWD1emFEu_oFaUL_2gFINoEON&Cc_*7}i$3U)%>|a) zCtl6d7v(!-P~=nua7Bi9h0R0Z)X`X_{Y#0iM@ZN}EMSi;+4ivCmNcDco0{T+wG$cA zg}MN?_u4+_beU0#Q2F+<6**rAJA4?1HZxj07MUOVBH?yms6334IZ19cli&=q;iDe@ zcJafdW*dkLz*Hih*5RQwVOKM0Qs5vee+Ny2{s1;ixN2$B0c17-dFp84hqrZ%FCc0} zWJ<2UqBzS-spq4(0l_v4Ib@Ovs6aJ2V&}yAUNaIUUdN$Cb7?bL#Oc*`c_V4B>>PvW zpasG647qnwH0O_`Ds}i{*K&iyw2ZY}$nh{oIB9aP{G?RsG`=mgoHs1t7pWJ*xbHkH z;?4jtZvk1L?69&N%Si;e#O6>Jg?ei<*ZuBwxTh;6>70{v=wHhWtGPayciOYL2OYGc z8F0fGVe-kLzTdS%AP7Wla)Rlm+*HKpcKVZ8(_^4m{?>hN&8{_BEZ31~OZs6vC_UL2 z=k2J%hxVGt|LH5q#d{g1;-{z7P=Zrt4{2RNp5e$Iemxr%c<9)MD?@I>#`xer;2D$Q zePN$k+O#BVsI~2A%7*6s@>@fJipF&wmhSr4Mg#?U;ZoHL$yG;TJRVj{V1ak_wmF`0 zh-66H=Nnj4%znUMe9H`E8Uo>9q2aI`nzr2}o$#1j_~}+LC%%ICymkrC%w9+(@y@x~I{oo4Hm3-4077T9KnQ6IRU@eMoRG0B zOIeC+)dmxplpc-3@;GpDapG_CO5y9527gToO+l46Pk6!aBFmxW+FxzIkB32dqHyJx z5YAeGm3-?(%|G=&x`0P@A;096WEeDvCD~4;9YLIek#{`eqL_;;Wj9^MVrKD+rL;?u9_H6AuuDV>cI*2!yO}LWQNa9r?m}y<0Wk9r$owTULlw`_wbb z#A#?u%}A;VERxz4x-ox@*D#J$!dCvuS0dF2fh3rAO#F(E$Q&9i{z50(89D3S+0x_okC~mu;%epfmK&+F4-YrD2sIqRujI0c;fWc-u%61L5-yS;Mb!hu|~S3A)Z%I^}(JPN1vN@GfCXaN>}}AW0qMV?^7jc3$7NVcDH>Ko#9a2 z76+QZbDn@;<}UO?ae+9&&^@aHJM{sYl9Uw&U8#IA+g^08V$Y>vO9M!g!lg}_v{nsy zJ-fT7-Bf)ak|4iG1ZXXd3|azGR?@KK%_ow=@)Vef9_BmKaM?vyW){dZ`EIHQ_0!#% zVAV~a>buO( zMMB*-I8_>}%cXkLF-9}#Id9DJr(jYC;ISV?^F)e`09#zPI?gh|*m;`F41x92+#a3x zU-63Npla7>C&A!we5yd&C0lsC)F@y*Cuvyadx%b#L}q>;Us}Yr=;~}Hv`Jw2>#Pn! zI_hPvUUbjxX^wiLJltE?KGL5*Vfi51k@u5SQpJnD&kpD^cw#Yj%-wf##?G5u&b?Gc zqy)Q)PJblrx(mLBV1Q$>@!x2ApG;HGpe43kgVE7cJ9g70{BdJhZo%Pkz22I}L#?n` zEdLWuE=c~fA*`~FaH<&#%%W11vjJi>#eyKr4fvb0#~`u}JwrndrGC=Tm<$L4ai!$P zc`n47g-#4XLYYh!iag*{c^{9K&s7f{g>qbgR4vv4F{;U>^N`uuS?Yq#SkbfJYd2Bh zF*N;VR68Vzw4io!mJ=l8xUss(K5<{Z3O78bIQ`3!&pLm(K_y8FF@$z!0+ZA_G(*69 zO0@wzU=v3<@gTy4s$^MK6z6l{DwhPC&f~6@4?mX@brqS^7J&3m%*R7g8gmkju0l z$EUfU3A4aio!NCzxpuxKg4}QAWeQKm&K!AWy6H9gm6%@{6i)NZ@dqe8;GZ;0d)$zV zMfNB`oE+l9biEouVq3YUqO=dshvi8d*9uy1jNXyqfL>|<(fglX6C%01HUsJsiDSuI zRsGw?CRF4&U993KZh)qjIN4 z3DSw;I;^turQM7(n%QQjE4kPVf zYhxL25etV66884$5khaYT=}Il=Tf%UNm_@5fpu>e(cP5BksH_k3?nBVvi@wjPK}aX z+cIgNw+2DgC-#lOf{_?3J_~Z(Pi!fspxkSp8?FF=KBi#J+e-}tzm^F#YaA=6l;?ss zQ*^BoxXtJQvm31|VcTgd6ne4idM2k3Yoy(&aRohs6Ojd{nOm7#RF*`U;&vKc{LqS; zn;3Jid!Uj1GG?@w}C|lhjF#+mWs?5Adq_F7X zn)%KZczJ{S+|XRi&VGd$cVvH;AeZ+_xV=J=oWD+)-XVmBTZdG+3el3tf$y}P#ta@Y z5>C1#_vdZRlAuGwMwF_S{o~twcPdl#TI;;qf{ru!AHtgaQ!B$H z{Ux3Q3NZJrL{!b$lk!XvNWoA7KS}+B=w5yNTxp>flS~Vk_NRmKa*D^BA z!3e^6uv92|fr#!BaNEgQ4`y(2n~iOagMC{erT$U>=9uPk+9pOeCqH{A?Z4$A9O^zk zY_qvO=8~zxh^eAlEls2jMn^llHqB7KExlr170wlxLrv>SS9qmPdCV82%q0Kp&44Nd zsP#)t4TUUb$zW!q2j%_XLm&z@LS8{B%l3fDdK-JMKStbPdB0qH{6sWx_83d^f;(m_ z{Qznd7P#n6(%S$0s0zQULZCScSC4Fh9|zh1ej$YN5t0F;3=~eI>D;aL(yN(3;aSm;0En?D7Nyc zI1lf=^&Vt3S+2)&-*^ebWs*NBaNpnUFTHqzv?n$`mg$faew``MQ88c9E9o;ynfTa9m7TEy+I&I4&XRwIMN1pK4ht z!;`Y*+6O?H*-4ABX$^csc`AG4r}F?kUoE7@$*^kb*kn@$7>5MXGWkRAL{BcGkw@w2 zO56wEG?44CL4K{%?t-TX>cVlgt=#YtTBr_syn%Z4y~Iy_z&Td+t10S0T8QLWt9m5~ zHgIYKC14hWQM%p%+a|PpDjSQJDkw5R;ddr-+j7gL0L}2QgQNRJ%|IziUDla{Z0 z^lw6&^55zt0u2Os<7P&}yGp8(;?{t+DS>*Cyrt=Mv{VL2MXX7Du66%LzZv7mW;B9X{X-dk9Hma+cw+>90Fj{u}|f zAU+@flx&Ia@2>uZaE&f}WsrXhNSc$gN&=gf2*Qa1~9k;Ao=vm!oK{^5%HBC*%}3Hx(xux(v;O3p(YKcE|j{&zm^em#VMeJH93-1hsF zwKtHVWl@AnV4SCExRvjC&$`V6&2)EtkxPs`jq#lJ2W$BSk^mG0y3n`7S(?K=gr}Ss zKtFa;DgKgC|Nh74OV-dt7a{j8H%w=3&yN}nf4jlD!LsrDlcZx9(u~tc{~8nPd?h2O8xfd{Tc(2$|`$m^K&9s7w#kGe3cU{LLBd2Qr9IRHd;h?b?R7TYqGloOkJw zkM#pS#74K{BlzIp!_7$o08sae|E46pCh~x#MW7KG9Zdd3hQOaQZj>aXMfm+cTu=YQ zmEa$)=l{(WXq!Ld=Kpft1_HIULJ|?d%wYVsn$yOsV5c2*8n}~GjccWMx^8z@qJZP1 zRocZf5~UT>AH@mwik@@tgWc$=}Kw^c2l_KKATXf{Ce7hWNw2N7(# zKF|QCHHTwNg7&BUJRzYwSK}4||G&ZH!u>axoqu!4rTq_>e_ps;`u~9Wmk`6B$@C8x zDEA&9V6fz(5lxVsJc)YBYYGQ7eoPY(BjR<7+cc%j+|UYUd?`;-&mH2-dyNu2TM$KM z>}qK9Mi+UHxcLgM!cxcuP1g_j?IkwF4M0#!;(Ot5b#+ZcK>*m{~wDM{FzGJgpx+%dK9g(e;qST zMi13cZ&viYGCXw6(CnT9&+Gb(6Cjy4c}a48m6nfApNfirN%M%dmb;Ew@k@6C#J>$~ zSFx4hbLHbu{Wt|)v+IxT-Gwa$-aJ#CYre#EjKf}gZs~@>wEo%N@qf1W;?H#BCY(>C z^b__40x}w01zF>vG)82dBIi0pIqu#h4I*lx81OnF<9QB3`Y978rRcR?Q`6X6j?bDN%V(?|SZLTY1?t*r?M*KW3s ziKC*y6gAsKwT~j&s}A_P9F0GB%_vDp>hEU+rhzieZ9$Ij2gH^CPodZUzw-P4ROU2s zFg6x%F}E@%kaMs#ayB$JB5*Z#G9wU`;};}gqGO_Cq;)WMaWJ+r)^{`p F`af|AzE}VN diff --git a/python/pyproject.toml b/python/pyproject.toml index 7ba4b4c6bda..6b1b032fdcc 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ ] [project.optional-dependencies] -srt = ["aiohttp", "fastapi", "hf_transfer", "huggingface_hub", "interegular", +srt = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow", "psutil", "pydantic", "python-multipart", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.4", "outlines>=0.0.44"] diff --git a/python/sglang/lang/chat_template.py b/python/sglang/lang/chat_template.py index bfde4bbdb6a..92f717127ff 100644 --- a/python/sglang/lang/chat_template.py +++ b/python/sglang/lang/chat_template.py @@ -137,7 +137,7 @@ def get_chat_template_by_model_path(model_path): register_chat_template( ChatTemplate( name="chatml-llava", - default_system_prompt="Answer the questions.", + default_system_prompt="You are a helpful assistant.", role_prefix_and_suffix={ "system": ("<|im_start|>system\n", "<|im_end|>\n"), "user": ("<|im_start|>user\n", "<|im_end|>\n"), @@ -145,7 +145,7 @@ def get_chat_template_by_model_path(model_path): }, style=ChatTemplateStyle.PLAIN, stop_str=("<|im_end|>",), - image_token=" \n", + image_token="\n", ) ) @@ -322,12 +322,17 @@ def match_chat_ml(model_path: str): if "tinyllama" in model_path: return get_chat_template("chatml") # Now the suffix for qwen2 chat model is "instruct" - if "qwen" in model_path and ("chat" in model_path or "instruct" in model_path): + if ( + "qwen" in model_path + and ("chat" in model_path or "instruct" in model_path) + and ("llava" not in model_path) + ): return get_chat_template("qwen") if ( "llava-v1.6-34b" in model_path or "llava-v1.6-yi-34b" in model_path or "llava-next-video-34b" in model_path + or "llava-onevision-qwen2" in model_path ): return get_chat_template("chatml-llava") diff --git a/python/sglang/srt/conversation.py b/python/sglang/srt/conversation.py index 5ee12169740..d5ca3277030 100644 --- a/python/sglang/srt/conversation.py +++ b/python/sglang/srt/conversation.py @@ -34,6 +34,7 @@ class SeparatorStyle(IntEnum): NO_COLON_TWO = auto() ADD_NEW_LINE_SINGLE = auto() LLAMA2 = auto() + LLAMA3 = auto() CHATGLM = auto() CHATML = auto() CHATINTERN = auto() @@ -137,6 +138,20 @@ def get_prompt(self) -> str: else: ret += role + ":" return ret + elif self.sep_style == SeparatorStyle.LLAMA3: + ret = "<|begin_of_text|>" + if self.system_message: + ret += system_prompt + else: + ret += "" + for i, (role, message) in enumerate(self.messages): + if message: + ret += f"<|start_header_id|>{role}<|end_header_id|>\n\n" + ret += f"{message.strip()}<|eot_id|>" + else: + ret += f"<|start_header_id|>{role}<|end_header_id|>\n\n" + # print(ret) + return ret elif self.sep_style == SeparatorStyle.LLAMA2: seps = [self.sep, self.sep2] if self.system_message: @@ -379,12 +394,23 @@ def generate_chat_conv( conv.append_message(conv.roles[0], message.content) else: real_content = "" + # calculate number of image_url + num_image_url = 0 + for content in message.content: + if content.type == "image_url": + num_image_url += 1 + if num_image_url > 1: + image_token = "" + else: + image_token = "\n" for content in message.content: if content.type == "text": + if num_image_url > 16: + real_content += "\n" # for video real_content += content.text elif content.type == "image_url": # NOTE: Only works for llava - real_content += "\n" + real_content += image_token conv.append_image(content.image_url.url) conv.append_message(conv.roles[0], real_content) elif msg_role == "assistant": @@ -425,6 +451,18 @@ def generate_chat_conv( ) ) +register_conv_template( + Conversation( + name="chatml-llava", + system_template="<|im_start|>system\n{system_message}", + system_message="You are a helpful assistant.", + roles=("<|im_start|>user", "<|im_start|>assistant"), + sep_style=SeparatorStyle.CHATML, + sep="<|im_end|>", + stop_str=["<|endoftext|>", "<|im_end|>"], + ) +) + register_conv_template( Conversation( name="vicuna_v1.1", @@ -437,6 +475,17 @@ def generate_chat_conv( ) ) +register_conv_template( + Conversation( + name="llava_llama_3", + system_message="You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.", + system_template="<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>", + roles=("user", "assistant"), + sep_style=SeparatorStyle.LLAMA3, + sep="", + stop_str=["<|end_of_text|>", "<|eot_id|>"], + ) +) # Reference: https://github.com/InternLM/lmdeploy/blob/387bf54b4f124e72aab30ae9755f562e435d3d01/lmdeploy/model.py#L425-L442 register_conv_template( Conversation( diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 328519cb26e..2d604d28791 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -131,11 +131,49 @@ def __init__( self.model_update_lock = asyncio.Lock() self.model_update_result = None - async def get_pixel_values(self, image_data): - aspect_ratio = getattr(self.hf_config, "image_aspect_ratio", None) + async def get_pixel_values(self, image_data, aspect_ratio=None): + aspect_ratio = ( + getattr(self.hf_config, "image_aspect_ratio", None) + if aspect_ratio is None + else aspect_ratio + ) grid_pinpoints = ( - self.hf_config.image_grid_pinpoints if aspect_ratio == "anyres" else None + self.hf_config.image_grid_pinpoints + if hasattr(self.hf_config, "image_grid_pinpoints") + and "anyres" in aspect_ratio + else None ) + + if isinstance(image_data, list) and len(image_data) > 0: + pixel_values, image_hash, image_size = [], [], [] + if len(image_data) > 1: + aspect_ratio = "pad" # LLaVA OneVision Handling: more than one image --> interleaved image mode or video mode. We do not use anyres + for img_data in image_data: + pixel_v, image_h, image_s = await self._process_single_image( + img_data, aspect_ratio, grid_pinpoints + ) + pixel_values.append(pixel_v) + image_hash.append(image_h) + image_size.append(image_s) + pixel_values = np.stack(pixel_values, axis=0) + else: + pixel_values, image_hash, image_size = await self._process_single_image( + image_data[0], aspect_ratio, grid_pinpoints + ) + image_hash = [image_hash] + image_size = [image_size] + elif isinstance(image_data, str): + pixel_values, image_hash, image_size = await self._process_single_image( + image_data, aspect_ratio, grid_pinpoints + ) + image_hash = [image_hash] + image_size = [image_size] + else: + pixel_values, image_hash, image_size = None, None, None + + return pixel_values, image_hash, image_size + + async def _process_single_image(self, image_data, aspect_ratio, grid_pinpoints): if self.executor is not None: loop = asyncio.get_event_loop() return await loop.run_in_executor( @@ -194,8 +232,8 @@ async def _handle_single_request( ) if self.is_generation: - pixel_values, image_hash, image_size = await self._get_pixel_values( - obj.image_data if not_use_index else obj.image_data[index] + pixel_values, image_hash, image_size = await self.get_pixel_values( + obj.image_data ) return_logprob = ( obj.return_logprob if not_use_index else obj.return_logprob[index] @@ -704,7 +742,7 @@ def get_pixel_values( tuple(int(x * 255) for x in processor.image_processor.image_mean), ) pixel_values = processor.image_processor(image)["pixel_values"][0] - elif image_aspect_ratio == "anyres": + elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio: pixel_values = process_anyres_image( image, processor.image_processor, image_grid_pinpoints ) diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 41f90830123..fa79f849212 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -322,11 +322,16 @@ def handle_generate_request( if self.model_runner.is_generation: req.pixel_values = recv_req.pixel_values if req.pixel_values is not None: + image_hash = ( + hash(tuple(recv_req.image_hash)) + if isinstance(recv_req.image_hash, list) + else recv_req.image_hash + ) req.pad_value = [ - (recv_req.image_hash) % self.model_config.vocab_size, - (recv_req.image_hash >> 16) % self.model_config.vocab_size, - (recv_req.image_hash >> 32) % self.model_config.vocab_size, - (recv_req.image_hash >> 64) % self.model_config.vocab_size, + (image_hash) % self.model_config.vocab_size, + (image_hash >> 16) % self.model_config.vocab_size, + (image_hash >> 32) % self.model_config.vocab_size, + (image_hash >> 64) % self.model_config.vocab_size, ] req.image_size = recv_req.image_size ( diff --git a/python/sglang/srt/mm_utils.py b/python/sglang/srt/mm_utils.py index e09c8215c6d..7918f3f7111 100644 --- a/python/sglang/srt/mm_utils.py +++ b/python/sglang/srt/mm_utils.py @@ -13,10 +13,25 @@ limitations under the License. """ -# Source: https://github.com/haotian-liu/LLaVA/blob/main/llava/mm_utils.py +# Source: https://github.com/LLaVA-VL/LLaVA-NeXT/blob/main/llava/mm_utils.py +""" +Utilities for multi-modal models. + +This python file mainly contains utilities that were used in the +image processing logic of llava-next including operations such as +anyres and anyres_max + +Currently supports the anyres and anyres_max operation for CLIP and +SigLip. For more information, you may refer to the paper or the blog + +LLaVA-NeXT : https://llava-vl.github.io/blog/2024-01-30-llava-next/ +LLaVA-Onevision : https://arxiv.org/pdf/2408.03326 + +""" import ast import base64 import math +import re from io import BytesIO import numpy as np @@ -40,10 +55,13 @@ def select_best_resolution(original_size, possible_resolutions): min_wasted_resolution = float("inf") for width, height in possible_resolutions: + # Calculate the downscaled size to keep the aspect ratio scale = min(width / original_width, height / original_height) downscaled_width, downscaled_height = int(original_width * scale), int( original_height * scale ) + + # Calculate effective and wasted resolutions effective_resolution = min( downscaled_width * downscaled_height, original_width * original_height ) @@ -129,6 +147,26 @@ def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size): Returns: tuple: The shape of the image patch grid in the format (width, height). """ + if isinstance(grid_pinpoints, str) and "x" in grid_pinpoints: + assert patch_size in [ + 224, + 336, + 384, + 448, + 512, + ], "patch_size should be in [224, 336, 384, 448, 512]" + # Use regex to extract the range from the input string + matches = re.findall(r"\((\d+)x(\d+)\)", grid_pinpoints) + range_start = tuple(map(int, matches[0])) + range_end = tuple(map(int, matches[-1])) + # Generate a matrix of tuples from (range_start[0], range_start[1]) to (range_end[0], range_end[1]) + grid_pinpoints = [ + (i, j) + for i in range(range_start[0], range_end[0] + 1) + for j in range(range_start[1], range_end[1] + 1) + ] + # Multiply all elements by patch_size + grid_pinpoints = [[dim * patch_size for dim in pair] for pair in grid_pinpoints] if type(grid_pinpoints) is list: possible_resolutions = grid_pinpoints else: @@ -149,6 +187,31 @@ def process_anyres_image(image, processor, grid_pinpoints): Returns: np.array: An np array containing the processed image patches. """ + if isinstance(grid_pinpoints, str) and "x" in grid_pinpoints: + try: + patch_size = processor.size[0] + except Exception as e: + patch_size = processor.size["shortest_edge"] + assert patch_size in [ + 224, + 336, + 384, + 448, + 512, + ], "patch_size should be in [224, 336, 384, 448, 512]" + # Use regex to extract the range from the input string + matches = re.findall(r"\((\d+)x(\d+)\)", grid_pinpoints) + range_start = tuple(map(int, matches[0])) + range_end = tuple(map(int, matches[-1])) + # Generate a matrix of tuples from (range_start[0], range_start[1]) to (range_end[0], range_end[1]) + grid_pinpoints = [ + (i, j) + for i in range(range_start[0], range_end[0] + 1) + for j in range(range_start[1], range_end[1] + 1) + ] + # Multiply all elements by patch_size + grid_pinpoints = [[dim * patch_size for dim in pair] for pair in grid_pinpoints] + if type(grid_pinpoints) is list: possible_resolutions = grid_pinpoints else: @@ -156,15 +219,24 @@ def process_anyres_image(image, processor, grid_pinpoints): best_resolution = select_best_resolution(image.size, possible_resolutions) image_padded = resize_and_pad_image(image, best_resolution) - patches = divide_to_patches(image_padded, processor.crop_size["height"]) - - image_original_resize = image.resize( - (processor.size["shortest_edge"], processor.size["shortest_edge"]) + # For Siglip processor, only have size but no crop size + crop_size = ( + processor.crop_size["height"] + if "crop_size" in processor.__dict__ + else processor.size["height"] ) + shortest_edge = ( + processor.size["shortest_edge"] + if "shortest_edge" in processor.size + else processor.size["height"] + ) + patches = divide_to_patches(image_padded, crop_size) + + image_original_resize = image.resize((shortest_edge, shortest_edge)) image_patches = [image_original_resize] + patches image_patches = [ - processor.preprocess(image_patch)["pixel_values"][0] + processor.preprocess(image_patch.convert("RGB"))["pixel_values"][0] for image_patch in image_patches ] return np.stack(image_patches, axis=0) @@ -255,7 +327,7 @@ def process_images(images, image_processor, model_cfg): ) image = image_processor.preprocess(image)["pixel_values"][0] new_images.append(image) - elif image_aspect_ratio == "anyres": + elif "anyres" in image_aspect_ratio: for image in images: image = process_anyres_image( image, image_processor, model_cfg.image_grid_pinpoints diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index bac0a05378d..98daeaece4f 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -88,14 +88,19 @@ def init_multimuldal_info(self, batch: ScheduleBatch): reqs = batch.reqs self.pixel_values = [r.pixel_values for r in reqs] self.image_sizes = [r.image_size for r in reqs] - self.image_offsets = [ - ( - (r.image_offset - batch.prefix_lens_cpu[i]) - if r.image_offset is not None - else 0 - ) - for i, r in enumerate(reqs) - ] + self.image_offsets = [] + for r in reqs: + if isinstance(r.image_offset, list): + self.image_offsets.append( + [ + (image_offset - len(r.prefix_indices)) + for image_offset in r.image_offset + ] + ) + elif isinstance(r.image_offset, int): + self.image_offsets.append(r.image_offset - len(r.prefix_indices)) + elif r.image_offset is None: + self.image_offsets.append(0) def compute_positions(self, batch: ScheduleBatch): position_ids_offsets = batch.position_ids_offsets diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py index a885a6e5953..76a0630fc2e 100644 --- a/python/sglang/srt/models/llava.py +++ b/python/sglang/srt/models/llava.py @@ -15,6 +15,8 @@ """Inference-only LLaVa model compatible with HuggingFace weights.""" +import math +import re from typing import Iterable, List, Optional, Tuple import numpy as np @@ -26,6 +28,8 @@ LlavaConfig, MistralConfig, Qwen2Config, + SiglipVisionConfig, + SiglipVisionModel, ) from transformers.models.llava.modeling_llava import LlavaMultiModalProjector from vllm.config import CacheConfig @@ -63,34 +67,61 @@ def __init__( ) def pad_input_ids(self, input_ids, pad_value, pt_shape=None, image_size=None): - new_image_feature_len = self.image_feature_len - # now only support spatial_unpad + anyres - if self.mm_patch_merge_type.startswith("spatial"): + + # hardcode for spatial_unpad + anyres + image_aspect_ratio = "anyres" if len(image_size) == 1 else "pad" + offset_list = [] + for image_s in image_size: + if len(image_size) > 16: + # 2x2 pooling with stride 2 + new_image_feature_len = ( + math.ceil(self.image_size / self.patch_size / 2) ** 2 + ) + else: + new_image_feature_len = self.image_feature_len # multiimage + height = width = self.num_patches_per_side - if pt_shape[0] > 1: - if self.image_aspect_ratio == "anyres": - num_patch_width, num_patch_height = get_anyres_image_grid_shape( - image_size, - self.image_grid_pinpoints, - self.vision_tower.config.image_size, + if "anyres" in image_aspect_ratio: + num_patch_width, num_patch_height = get_anyres_image_grid_shape( + image_s, + self.image_grid_pinpoints, + self.vision_tower.config.image_size, + ) + h = num_patch_height * height + w = num_patch_width * width + new_h, new_w = unpad_image_shape(h, w, image_s) + + if "anyres_max" in self.config.image_aspect_ratio: + matched_anyres_max_num_patches = re.match( + r"anyres_max_(\d+)", self.config.image_aspect_ratio + ) + if matched_anyres_max_num_patches: + max_num_patches = int(matched_anyres_max_num_patches.group(1)) + # times = math.sqrt(h * w / (max_num_patches * unit**2)) + times = math.sqrt( + new_h * new_w / (max_num_patches * self.image_feature_len) ) - if "unpad" in self.mm_patch_merge_type: - h = num_patch_height * height - w = num_patch_width * width - new_h, new_w = unpad_image_shape(h, w, image_size) - new_image_feature_len += new_h * (new_w + 1) - - pad_ids = pad_value * ( - (new_image_feature_len + len(pad_value)) // len(pad_value) - ) - offset = input_ids.index(self.config.image_token_index) - # old_len + pad_len - 1, because we need to remove image_token_id - new_input_ids = ( - input_ids[:offset] - + pad_ids[:new_image_feature_len] - + input_ids[offset + 1 :] - ) - return new_input_ids, offset + if times > 1.1: + new_h = int(new_h // times) + new_w = int(new_w // times) + new_image_feature_len += new_h * (new_w + 1) + + pad_ids = pad_value * ( + (new_image_feature_len + len(pad_value)) // len(pad_value) + ) + # print("calculated new_image_feature_len: ", new_image_feature_len) + try: + offset = input_ids.index(self.config.image_token_index) + except ValueError: + offset = 0 + # old_len + pad_len - 1, because we need to remove image_token_id + input_ids = ( + input_ids[:offset] + + pad_ids[:new_image_feature_len] + + input_ids[offset + 1 :] + ) + offset_list.append(offset) + return input_ids, offset_list def encode_images(self, pixel_values: torch.Tensor) -> torch.Tensor: image_outputs = self.vision_tower(pixel_values, output_hidden_states=True) @@ -124,7 +155,6 @@ def forward( # Embed text input input_embeds = self.language_model.model.embed_tokens(input_ids) - # Embed vision input need_vision = ( (positions[input_metadata.extend_start_loc] < self.image_feature_len) @@ -163,27 +193,73 @@ def forward( if self.mm_patch_merge_type.startswith("spatial"): new_image_features = [] + height = width = self.num_patches_per_side for image_idx, image_feature in enumerate(image_features): - if image_feature.shape[0] > 1: + if len(image_sizes[image_idx]) == 1: + image_aspect_ratio = ( + self.config.image_aspect_ratio + ) # single image + else: + image_aspect_ratio = "pad" # multi image + # image_aspect_ratio = ( + # "anyres" if len(image_sizes[image_idx]) == 1 else "pad" + # ) + if ( + image_feature.shape[0] > 1 + and "anyres" in image_aspect_ratio + ): base_image_feature = image_feature[0] image_feature = image_feature[1:] - height = width = self.num_patches_per_side assert height * width == base_image_feature.shape[0] - if self.image_aspect_ratio == "anyres": - ( - num_patch_width, - num_patch_height, - ) = get_anyres_image_grid_shape( - image_sizes[image_idx], - self.image_grid_pinpoints, - self.vision_tower.config.image_size, + + if "anyres_max" in image_aspect_ratio: + matched_anyres_max_num_patches = re.match( + r"anyres_max_(\d+)", image_aspect_ratio ) + if matched_anyres_max_num_patches: + max_num_patches = int( + matched_anyres_max_num_patches.group(1) + ) + + if ( + image_aspect_ratio == "anyres" + or "anyres_max" in image_aspect_ratio + ): + vision_tower_image_size = self.image_size + try: + num_patch_width, num_patch_height = ( + get_anyres_image_grid_shape( + image_sizes[image_idx][0], + self.config.image_grid_pinpoints, + vision_tower_image_size, + ) + ) + except Exception as e: + print(f"Error: {e}") + num_patch_width, num_patch_height = 2, 2 image_feature = image_feature.view( num_patch_height, num_patch_width, height, width, -1 ) else: - raise NotImplementedError() + image_feature = image_feature.view( + 2, 2, height, width, -1 + ) + + # ( + # num_patch_width, + # num_patch_height, + # ) = get_anyres_image_grid_shape( + # image_sizes[image_idx][0], + # self.image_grid_pinpoints, + # self.vision_tower.config.image_size, + # ) + + # image_feature = image_feature.view( + # num_patch_height, num_patch_width, height, width, -1 + # ) + if "unpad" in self.mm_patch_merge_type: + unit = image_feature.shape[2] image_feature = image_feature.permute( 4, 0, 2, 1, 3 ).contiguous() @@ -191,8 +267,23 @@ def forward( 2, 3 ) image_feature = unpad_image( - image_feature, image_sizes[image_idx] + image_feature, image_sizes[image_idx][0] ) + if ( + "anyres_max" in image_aspect_ratio + and matched_anyres_max_num_patches + ): + c, h, w = image_feature.shape + times = math.sqrt( + h * w / (max_num_patches * unit**2) + ) + if times > 1.1: + image_feature = image_feature[None] + image_feature = nn.functional.interpolate( + image_feature, + [int(h // times), int(w // times)], + mode="bilinear", + )[0] image_feature = torch.cat( ( image_feature, @@ -213,16 +304,31 @@ def forward( image_feature = torch.cat( (base_image_feature, image_feature), dim=0 ) + image_feature = image_feature.unsqueeze(0) else: - image_feature = image_feature[0] - if "unpad" in self.mm_patch_merge_type: - image_feature = torch.cat( - ( - image_feature, - self.language_model.model.image_newline[None], - ), - dim=0, + if image_feature.shape[0] > 16: # video + # 2x2 pooling + num_of_frames = image_feature.shape[0] + image_feature = image_feature.view( + num_of_frames, height, width, -1 ) + image_feature = image_feature.permute( + 0, 3, 1, 2 + ).contiguous() # N, C, H, W + height, weight = image_feature.shape[2:] + scaled_shape = [ + math.ceil(height / 2), + math.ceil(weight / 2), + ] + image_feature = nn.functional.interpolate( + image_feature, size=scaled_shape, mode="bilinear" + ) + image_feature = ( + image_feature.flatten(2) + .transpose(1, 2) + .contiguous() + ) # N, C, H*W + new_image_features.append(image_feature) image_features = new_image_features @@ -233,21 +339,22 @@ def forward( continue start_idx = extend_start_loc_cpu[i] - pad_len, pad_dim = image_features[pt].shape # 576, 4096 + pad_dim = image_features[pt].shape[-1] # 576, 4096 dim = input_embeds.shape[1] assert ( pad_dim == dim ), "invalid pad_dim={}, input_embed_dim={}!".format(pad_dim, dim) # Fill in the placeholder for the image try: - input_embeds[ - start_idx - + image_offsets[i] : start_idx - + image_offsets[i] - + pad_len - ] = image_features[pt] + for j, image_off in enumerate(image_offsets[i]): + # print("actual image_features length: ", image_features[pt][j].shape[0]) + pad_len = image_features[pt][j].shape[0] + input_embeds[ + start_idx + image_off : start_idx + image_off + pad_len + ] = image_features[pt][j] except RuntimeError as e: print(f"RuntimeError in llava image encoding: {e}") + print(image_features[pt].shape) print(input_embeds.shape) print(start_idx, image_offsets[i]) pt += 1 @@ -262,9 +369,16 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # load clip vision model by cfg['mm_vision_tower']: # huggingface_name or path_of_clip_relative_to_llava_model_dir vision_path = self.config.mm_vision_tower - self.vision_tower = CLIPVisionModel.from_pretrained( - vision_path, torch_dtype=torch.float16 - ).cuda() + if "clip" in vision_path: + self.vision_tower = CLIPVisionModel.from_pretrained( + vision_path, torch_dtype=torch.float16 + ).cuda() + elif "siglip" in vision_path: + self.vision_tower = SiglipVisionModel.from_pretrained( + vision_path, torch_dtype=torch.float16 + ).cuda() + # Siglip needs all feature tokens + self.config.mm_vision_select_feature = "full" self.vision_tower.eval() self.vision_feature_layer = self.config.mm_vision_select_layer @@ -276,8 +390,11 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): self.image_aspect_ratio = getattr(self.config, "image_aspect_ratio", "square") self.image_grid_pinpoints = getattr(self.config, "image_grid_pinpoints", None) - self.image_feature_len = int((self.image_size / self.patch_size) ** 2) - if self.vision_feature_select_strategy == "patch": + self.image_feature_len = int((self.image_size // self.patch_size) ** 2) + if ( + self.vision_feature_select_strategy == "patch" + or self.vision_feature_select_strategy == "full" + ): pass elif self.vision_feature_select_strategy == "cls_patch": self.image_feature_len += 1 diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index c599d8b368a..3e858dfa72d 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -1,17 +1,27 @@ +import base64 +import io import json +import os +import sys +import time import unittest +import numpy as np import openai +import requests +from decord import VideoReader, cpu +from PIL import Image from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_child_process from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server +# python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --tokenizer-path lmms-lab/llavanext-qwen-siglip-tokenizer --port=30000 --host=127.0.0.1 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384 class TestOpenAIVisionServer(unittest.TestCase): @classmethod def setUpClass(cls): - cls.model = "liuhaotian/llava-v1.6-vicuna-7b" + cls.model = "lmms-lab/llava-onevision-qwen2-0.5b-ov" cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( @@ -21,9 +31,11 @@ def setUpClass(cls): api_key=cls.api_key, other_args=[ "--chat-template", - "vicuna_v1.1", + "chatml-llava", "--tokenizer-path", - "llava-hf/llava-1.5-7b-hf", + "lmms-lab/llavanext-qwen-siglip-tokenizer", + "--chunked-prefill-size", + "16384", "--log-requests", ], ) @@ -68,6 +80,81 @@ def test_chat_completion(self): assert response.usage.completion_tokens > 0 assert response.usage.total_tokens > 0 + def prepare_video_messages(self, video_path): + max_frames_num = 32 + vr = VideoReader(video_path, ctx=cpu(0)) + total_frame_num = len(vr) + uniform_sampled_frames = np.linspace( + 0, total_frame_num - 1, max_frames_num, dtype=int + ) + frame_idx = uniform_sampled_frames.tolist() + frames = vr.get_batch(frame_idx).asnumpy() + + base64_frames = [] + for frame in frames: + pil_img = Image.fromarray(frame) + buff = io.BytesIO() + pil_img.save(buff, format="JPEG") + base64_str = base64.b64encode(buff.getvalue()).decode("utf-8") + base64_frames.append(base64_str) + + messages = [{"role": "user", "content": []}] + frame_format = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,{}"}, + } + + for base64_frame in base64_frames: + frame_format["image_url"]["url"] = "data:image/jpeg;base64,{}".format( + base64_frame + ) + messages[0]["content"].append(frame_format.copy()) + + prompt = {"type": "text", "text": "Please describe the video in detail."} + messages[0]["content"].append(prompt) + + return messages + + def test_video_chat_completion(self): + url = "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4" + cache_dir = os.path.expanduser("~/.cache") + file_path = os.path.join(cache_dir, "jobs.mp4") + os.makedirs(cache_dir, exist_ok=True) + + if not os.path.exists(file_path): + response = requests.get(url) + response.raise_for_status() + + with open(file_path, "wb") as f: + f.write(response.content) + + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + messages = self.prepare_video_messages(file_path) + + start_time = time.time() + video_request = client.chat.completions.create( + model="default", + messages=messages, + temperature=0, + max_tokens=1024, + stream=True, + ) + print("-" * 30) + video_response = "" + + for chunk in video_request: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + video_response += content + sys.stdout.write(content) + sys.stdout.flush() + print("-" * 30) + + # Add assertions to validate the video response + self.assertIsNotNone(video_response) + self.assertGreater(len(video_response), 0) + def test_regex(self): client = openai.Client(api_key=self.api_key, base_url=self.base_url) From 731fe947e06704053fd26e56d14a5c5d015bbd7c Mon Sep 17 00:00:00 2001 From: taozi555 Date: Sat, 24 Aug 2024 15:11:29 +0800 Subject: [PATCH 04/88] optimization --- .../penaltylib/penalizers/dry_penalty.py | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/python/sglang/srt/sampling/penaltylib/penalizers/dry_penalty.py b/python/sglang/srt/sampling/penaltylib/penalizers/dry_penalty.py index fc454b5a86c..f86ecfd5d8e 100644 --- a/python/sglang/srt/sampling/penaltylib/penalizers/dry_penalty.py +++ b/python/sglang/srt/sampling/penaltylib/penalizers/dry_penalty.py @@ -40,8 +40,8 @@ def _prepare(self): device=self.orchestrator.device ) self.sequence_breakers = [ - [token_id for prompt in req.sampling_params.dry_sequence_breakers - for token_id in req.tokenizer.encode(prompt, add_special_tokens=False)] + [req.tokenizer.encode(f'a{prompt}', add_special_tokens=False)[-1] + for prompt in req.sampling_params.dry_sequence_breakers] for req in self.orchestrator.reqs() ] self.ranges = torch.tensor( @@ -71,6 +71,7 @@ def _cumulate_output_tokens(self, output_ids: _TokenIDs): def _apply(self, logits: torch.Tensor) -> torch.Tensor: batch_size, seq_length = logits.shape[0], logits.shape[1] + max_back_length = 50 # Limit the backward match to 50 to prevent overflow for i in range(batch_size): if self.output_ids is not None: input_ids = self.input_ids[i] = torch.cat( @@ -78,36 +79,33 @@ def _apply(self, logits: torch.Tensor) -> torch.Tensor: ) else: input_ids = self.input_ids[i] - if self.ranges[i] > 0: - input_ids_row = input_ids[-self.ranges[i]:] - else: - input_ids_row = input_ids - last_token = input_ids_row[-1].item() + input_ids = input_ids.tolist() + range_limit = min(self.ranges[i].item(), len(input_ids)) + input_ids = input_ids[-range_limit:] if range_limit > 0 else input_ids + last_token = input_ids[-1] if last_token in self.sequence_breakers[i]: continue - match_indices = (input_ids_row[:-1] == last_token).nonzero() - match_lengths = {} + match_indices = [idx for idx, val in enumerate(input_ids[:-1]) if val == last_token] + match_lengths = defaultdict(int) for idx in match_indices: - idx = idx.item() - next_token = input_ids_row[idx+1].item() - + next_token = input_ids[idx + 1] if next_token in self.sequence_breakers[i]: continue - match_length = 1 - while idx - match_length >= 0: - previous_token = input_ids_row[-(match_length+1)].item() - if input_ids_row[idx - match_length] != previous_token or previous_token in self.sequence_breakers[i]: + while match_length < max_back_length and idx - match_length >= 0: + previous_token = input_ids[-(match_length + 1)] + if input_ids[idx - match_length] != previous_token: + break + if previous_token in self.sequence_breakers[i]: break match_length += 1 - - match_lengths[next_token] = max(match_length, match_lengths.get(next_token, 0)) + match_lengths[next_token] = max(match_length, match_lengths[next_token]) for token, match_length in match_lengths.items(): - if match_length >= self.allowed_lengths[i]: - penalty = self.multipliers[i] * self.bases[i] ** (match_length - self.allowed_lengths[i]) + if match_length >= self.allowed_lengths[i].item(): + penalty = self.multipliers[i].item() * self.bases[i].item() ** (match_length - self.allowed_lengths[i].item()) logits[i, token] -= penalty return logits From c9064e6fd9a5356ee579e9d452bfad725f8e6f2c Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sat, 24 Aug 2024 18:58:16 +1000 Subject: [PATCH 05/88] feat: use gelu_tanh_and_mul (#1193) --- python/sglang/srt/layers/activation.py | 18 ++++++++- python/sglang/srt/models/gemma2.py | 4 +- python/sglang/test/test_activation.py | 55 ++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 python/sglang/test/test_activation.py diff --git a/python/sglang/srt/layers/activation.py b/python/sglang/srt/layers/activation.py index a6f05610bd4..d0e06266040 100644 --- a/python/sglang/srt/layers/activation.py +++ b/python/sglang/srt/layers/activation.py @@ -15,7 +15,7 @@ import torch import torch.nn.functional as F -from flashinfer.activation import silu_and_mul +from flashinfer.activation import gelu_tanh_and_mul, silu_and_mul from vllm.model_executor.custom_op import CustomOp @@ -37,3 +37,19 @@ def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: out = torch.empty(output_shape, dtype=x.dtype, device=x.device) silu_and_mul(x, out) return out + + +class GeluAndMul(CustomOp): + def __init__(self, **kwargs): + super().__init__() + + def forward_native(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + return F.gelu(x[..., :d], approximate="tanh") * x[..., d:] + + def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: + d = x.shape[-1] // 2 + output_shape = x.shape[:-1] + (d,) + out = torch.empty(output_shape, dtype=x.dtype, device=x.device) + gelu_tanh_and_mul(x, out) + return out diff --git a/python/sglang/srt/models/gemma2.py b/python/sglang/srt/models/gemma2.py index 80b99742e3f..37d926c34f2 100644 --- a/python/sglang/srt/models/gemma2.py +++ b/python/sglang/srt/models/gemma2.py @@ -25,7 +25,6 @@ # FIXME: temporary solution, remove after next vllm release from vllm.model_executor.custom_op import CustomOp -from vllm.model_executor.layers.activation import GeluAndMul # from vllm.model_executor.layers.layernorm import GemmaRMSNorm from vllm.model_executor.layers.linear import ( @@ -39,6 +38,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from sglang.srt.layers.activation import GeluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -135,7 +135,7 @@ def __init__( "function. Please set `hidden_act` and `hidden_activation` to " "`gelu_pytorch_tanh`." ) - self.act_fn = GeluAndMul(approximate="tanh") + self.act_fn = GeluAndMul() def forward(self, x: torch.Tensor) -> torch.Tensor: gate_up, _ = self.gate_up_proj(x) diff --git a/python/sglang/test/test_activation.py b/python/sglang/test/test_activation.py new file mode 100644 index 00000000000..357a23319bc --- /dev/null +++ b/python/sglang/test/test_activation.py @@ -0,0 +1,55 @@ +import itertools +import unittest + +import torch + +from sglang.srt.layers.activation import GeluAndMul + + +class TestGeluAndMul(unittest.TestCase): + DTYPES = [torch.half, torch.bfloat16] + NUM_TOKENS = [7, 83, 2048] + D = [512, 4096, 5120, 13824] + SEEDS = [0] + + @classmethod + def setUpClass(cls): + if not torch.cuda.is_available(): + raise unittest.SkipTest("CUDA is not available") + torch.set_default_device("cuda") + + def _run_gelu_and_mul_test(self, num_tokens, d, dtype, seed): + torch.manual_seed(seed) + + layer = GeluAndMul().to(dtype=dtype) + x = torch.randn(num_tokens, 2 * d, dtype=dtype) + + with torch.inference_mode(): + ref_out = layer.forward_native(x) + out = layer.forward_cuda(x) + + if dtype == torch.bfloat16: + atol = rtol = 1e-2 + else: + atol = rtol = 1e-3 + + self.assertTrue(torch.allclose(out, ref_out, atol=atol, rtol=rtol)) + + def test_gelu_and_mul(self): + for params in itertools.product( + self.NUM_TOKENS, + self.D, + self.DTYPES, + self.SEEDS, + ): + with self.subTest( + num_tokens=params[0], + d=params[1], + dtype=params[2], + seed=params[3], + ): + self._run_gelu_and_mul_test(*params) + + +if __name__ == "__main__": + unittest.main(verbosity=2) From f6af3a6561b2528531bcb4815012b085280d4ec7 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 24 Aug 2024 08:02:23 -0700 Subject: [PATCH 06/88] Cleanup readme, llava examples, usage examples and nccl init (#1194) --- README.md | 39 ++++---- docs/en/sampling_params.md | 9 +- .../quick_start/anthropic_example_chat.py | 0 .../quick_start/anthropic_example_complete.py | 0 .../quick_start/azure_openai_example_chat.py | 0 .../quick_start/gemini_example_chat.py | 0 .../quick_start/gemini_example_complete.py | 0 .../gemini_example_multimodal_chat.py | 0 .../quick_start/images/cat.jpeg | Bin .../quick_start/images/dog.jpeg | Bin .../quick_start/local_example_chat.py} | 2 +- .../quick_start/local_example_complete.py} | 2 +- .../quick_start/local_example_llava_next.py} | 23 ++++- .../quick_start/openai_example_chat.py | 0 .../quick_start/openai_example_complete.py | 0 .../quick_start/openrouter_example_chat.py | 0 .../quick_start/together_example_chat.py | 0 .../quick_start/together_example_complete.py | 0 .../usage/chinese_regex.py | 0 .../usage/choices_logprob.py | 0 .../usage/cot_decoding.py | 0 .../usage/json_decode.py | 0 .../usage/json_logprobs.py | 0 .../usage/llava_video/srt_example_llava_v.py | 5 +- .../usage/llava_video/srt_example_llava_v.sh | 0 .../usage/openai_chat_speculative.py | 0 .../usage/openai_parallel_sample.py | 0 .../usage/openai_speculative.py | 0 .../usage/parallel_sample.py | 0 .../trace_and_evaluate_rag_using_parea.ipynb | 0 .../usage/readme_examples.py | 0 .../usage/streaming.py | 0 .../usage/triton/Dockerfile | 0 .../usage/triton/README.md | 0 .../models/character_generation/1/model.py | 0 .../models/character_generation/config.pbtxt | 0 examples/quick_start/srt_example_yi_vl.py | 70 -------------- .../async_io.py => runtime/async_io_api.py} | 0 .../http_llama3_llava_test.py | 3 +- .../http_llava_onevision_test.py | 9 +- .../llava_onevision}/http_qwen_llava_test.py | 3 +- .../{usage => runtime}/openai_batch_chat.py | 0 .../openai_batch_complete.py | 0 examples/usage/llava/srt_llava_next_test.py | 90 ------------------ .../max-tokens-fixed-rag-trace.png | Bin 134888 -> 0 bytes python/sglang/bench_latency.py | 6 +- python/sglang/lang/chat_template.py | 4 +- python/sglang/launch_server_llavavid.py | 29 ------ python/sglang/srt/layers/decode_attention.py | 2 +- python/sglang/srt/layers/fused_moe/layer.py | 4 +- python/sglang/srt/layers/logits_processor.py | 4 +- python/sglang/srt/managers/schedule_batch.py | 2 +- .../sglang/srt/managers/tokenizer_manager.py | 3 + .../sglang/srt/model_executor/model_runner.py | 86 +++++++++++------ python/sglang/srt/models/gemma2.py | 12 ++- python/sglang/srt/models/grok.py | 17 +++- python/sglang/srt/server_args.py | 19 ++-- python/sglang/srt/utils.py | 6 +- python/sglang/test/runners.py | 22 ++--- scripts/{ => deprecated}/convert_yi_vl.py | 0 scripts/{ => deprecated}/convert_yi_vl.sh | 0 test/srt/models/test_embedding_models.py | 2 +- test/srt/models/test_generation_models.py | 2 +- test/srt/run_suite.py | 2 +- test/srt/test_vision_openai_server.py | 14 +-- 65 files changed, 174 insertions(+), 317 deletions(-) rename examples/{ => frontend_language}/quick_start/anthropic_example_chat.py (100%) rename examples/{ => frontend_language}/quick_start/anthropic_example_complete.py (100%) rename examples/{ => frontend_language}/quick_start/azure_openai_example_chat.py (100%) rename examples/{ => frontend_language}/quick_start/gemini_example_chat.py (100%) rename examples/{ => frontend_language}/quick_start/gemini_example_complete.py (100%) rename examples/{ => frontend_language}/quick_start/gemini_example_multimodal_chat.py (100%) rename examples/{ => frontend_language}/quick_start/images/cat.jpeg (100%) rename examples/{ => frontend_language}/quick_start/images/dog.jpeg (100%) rename examples/{quick_start/srt_example_chat.py => frontend_language/quick_start/local_example_chat.py} (98%) rename examples/{quick_start/srt_example_complete.py => frontend_language/quick_start/local_example_complete.py} (97%) rename examples/{quick_start/srt_example_llava.py => frontend_language/quick_start/local_example_llava_next.py} (69%) rename examples/{ => frontend_language}/quick_start/openai_example_chat.py (100%) rename examples/{ => frontend_language}/quick_start/openai_example_complete.py (100%) rename examples/{ => frontend_language}/quick_start/openrouter_example_chat.py (100%) rename examples/{ => frontend_language}/quick_start/together_example_chat.py (100%) rename examples/{ => frontend_language}/quick_start/together_example_complete.py (100%) rename examples/{ => frontend_language}/usage/chinese_regex.py (100%) rename examples/{ => frontend_language}/usage/choices_logprob.py (100%) rename examples/{ => frontend_language}/usage/cot_decoding.py (100%) rename examples/{ => frontend_language}/usage/json_decode.py (100%) rename examples/{ => frontend_language}/usage/json_logprobs.py (100%) rename examples/{ => frontend_language}/usage/llava_video/srt_example_llava_v.py (99%) rename examples/{ => frontend_language}/usage/llava_video/srt_example_llava_v.sh (100%) rename examples/{ => frontend_language}/usage/openai_chat_speculative.py (100%) rename examples/{ => frontend_language}/usage/openai_parallel_sample.py (100%) rename examples/{ => frontend_language}/usage/openai_speculative.py (100%) rename examples/{ => frontend_language}/usage/parallel_sample.py (100%) rename examples/{ => frontend_language}/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb (100%) rename examples/{ => frontend_language}/usage/readme_examples.py (100%) rename examples/{ => frontend_language}/usage/streaming.py (100%) rename examples/{ => frontend_language}/usage/triton/Dockerfile (100%) rename examples/{ => frontend_language}/usage/triton/README.md (100%) rename examples/{ => frontend_language}/usage/triton/models/character_generation/1/model.py (100%) rename examples/{ => frontend_language}/usage/triton/models/character_generation/config.pbtxt (100%) delete mode 100644 examples/quick_start/srt_example_yi_vl.py rename examples/{usage/async_io.py => runtime/async_io_api.py} (100%) rename examples/{usage/llava => runtime/llava_onevision}/http_llama3_llava_test.py (94%) rename examples/{usage/llava => runtime/llava_onevision}/http_llava_onevision_test.py (96%) rename examples/{usage/llava => runtime/llava_onevision}/http_qwen_llava_test.py (95%) rename examples/{usage => runtime}/openai_batch_chat.py (100%) rename examples/{usage => runtime}/openai_batch_complete.py (100%) delete mode 100644 examples/usage/llava/srt_llava_next_test.py delete mode 100644 examples/usage/rag_using_parea/max-tokens-fixed-rag-trace.png delete mode 100644 python/sglang/launch_server_llavavid.py rename scripts/{ => deprecated}/convert_yi_vl.py (100%) rename scripts/{ => deprecated}/convert_yi_vl.sh (100%) diff --git a/README.md b/README.md index c118d6a1a0f..04dd913baa4 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,13 @@ The core features include: ## News - [2024/07] 🔥 Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)). -- [2024/04] SGLang is used by the official **LLaVA-NeXT (video)** release ([blog](https://llava-vl.github.io/blog/2024-04-30-llava-next-video/)). +- [2024/08] 🔥 LLaVA-OneVision with single-image, multi-image and video are supported ([blog](https://llava-vl.github.io/blog/2024-08-05-llava-onevision/)). - [2024/02] SGLang enables **3x faster JSON decoding** with compressed finite state machine ([blog](https://lmsys.org/blog/2024-02-05-compressed-fsm/)).

More +- [2024/04] SGLang is used by the official **LLaVA-NeXT (video)** release ([blog](https://llava-vl.github.io/blog/2024-04-30-llava-next-video/)). - [2024/01] SGLang provides up to **5x faster inference** with RadixAttention ([blog](https://lmsys.org/blog/2024-01-17-sglang/)). - [2024/01] SGLang powers the serving of the official **LLaVA v1.6** release demo ([usage](https://github.com/haotian-liu/LLaVA?tab=readme-ov-file#demo)). @@ -227,19 +228,14 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct - Gemma / Gemma 2 - Qwen / Qwen 2 / Qwen 2 MoE - DeepSeek / DeepSeek 2 -- LLaVA 1.5 / 1.6 - - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000` - - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.6-vicuna-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000` - - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.6-34b --tokenizer-path liuhaotian/llava-v1.6-34b-tokenizer --port 30000` - - `python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --port=30000 --host=127.0.0.1 --tp-size=1 --chat-template=llava_llama_3` - - `python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --port=30000 --host="127.0.0.1" --tp-size=8 --chat-template=chatml-llava` -- LLaVA-NeXT-Video - - see [examples/usage/llava_video](examples/usage/llava_video) -- [LLaVA-OneVision](https://arxiv.org/abs/2408.03326) - - `python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --host=127.0.0.1 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384` - - see [test/srt/test_llava_onevision_openai_server.py](test/srt/test_llava_onevision_openai_server.py) +- [LLaVA-OneVision](https://llava-vl.github.io/blog/2024-08-05-llava-onevision/) + - `python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384` + - Query the server with the [OpenAI Vision API](https://platform.openai.com/docs/guides/vision). See examples at [test/srt/test_vision_openai_server.py](test/srt/test_vision_openai_server.py) +- LLaVA 1.5 / 1.6 / NeXT + - `python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --port=30000 --tp-size=1 --chat-template=llava_llama_3` + - `python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --port=30000 --tp-size=8 --chat-template=chatml-llava` + - Query the server with the [OpenAI Vision API](https://platform.openai.com/docs/guides/vision). See examples at [test/srt/test_vision_openai_server.py](test/srt/test_vision_openai_server.py) - Yi-VL - - see [srt_example_yi_vl.py](examples/quick_start/srt_example_yi_vl.py). - StableLM - Command-R - DBRX @@ -250,6 +246,8 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md). #### Use Models From ModelScope +
+ To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE. ``` export SGLANG_USE_MODELSCOPE=true @@ -258,21 +256,20 @@ Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instru ``` SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000 ``` +
#### Run Llama 3.1 405B ```bash -## Run 405B (fp8) on a single node +# Run 405B (fp8) on a single node python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8 -## Run 405B (fp16) on two nodes -# replace the `172.16.4.52:20000` with your own first node ip address and port, disable CUDA Graph temporarily - -# on the first node -GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 --disable-cuda-graph --mem-frac 0.75 +# Run 405B (fp16) on two nodes +## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port +GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 --disable-cuda-graph -# on the second -GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph --mem-frac 0.75 +## on the first node, replace the `172.16.4.52:20000` with your own first node ip address and port +GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph ``` ### Benchmark Performance diff --git a/docs/en/sampling_params.md b/docs/en/sampling_params.md index 7d866e69295..54b03bf3254 100644 --- a/docs/en/sampling_params.md +++ b/docs/en/sampling_params.md @@ -1,5 +1,8 @@ # Sampling Parameters in SGLang Runtime This doc describes the sampling parameters of the SGLang Runtime. +It is the low-level endpoint of the runtime. +If you want a high-level endpoint that can automatically handle chat templates, consider using the [OpenAI Compatible API +](https://github.com/sgl-project/sglang?tab=readme-ov-file#openai-compatible-api). The `/generate` endpoint accepts the following arguments in the JSON format. @@ -140,7 +143,7 @@ print("") Launch a server ``` -python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.6-vicuna-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000 +python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-7b-ov --chat-template chatml-llava ``` Download an image @@ -155,7 +158,9 @@ import requests response = requests.post( "http://localhost:30000/generate", json={ - "text": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: \nDescribe this picture ASSISTANT:", + "text": "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + "<|im_start|>user\n\nDescribe this image in a very short sentence.<|im_end|>\n" + "<|im_start|>assistant\n", "image_data": "example_image.png", "sampling_params": { "temperature": 0, diff --git a/examples/quick_start/anthropic_example_chat.py b/examples/frontend_language/quick_start/anthropic_example_chat.py similarity index 100% rename from examples/quick_start/anthropic_example_chat.py rename to examples/frontend_language/quick_start/anthropic_example_chat.py diff --git a/examples/quick_start/anthropic_example_complete.py b/examples/frontend_language/quick_start/anthropic_example_complete.py similarity index 100% rename from examples/quick_start/anthropic_example_complete.py rename to examples/frontend_language/quick_start/anthropic_example_complete.py diff --git a/examples/quick_start/azure_openai_example_chat.py b/examples/frontend_language/quick_start/azure_openai_example_chat.py similarity index 100% rename from examples/quick_start/azure_openai_example_chat.py rename to examples/frontend_language/quick_start/azure_openai_example_chat.py diff --git a/examples/quick_start/gemini_example_chat.py b/examples/frontend_language/quick_start/gemini_example_chat.py similarity index 100% rename from examples/quick_start/gemini_example_chat.py rename to examples/frontend_language/quick_start/gemini_example_chat.py diff --git a/examples/quick_start/gemini_example_complete.py b/examples/frontend_language/quick_start/gemini_example_complete.py similarity index 100% rename from examples/quick_start/gemini_example_complete.py rename to examples/frontend_language/quick_start/gemini_example_complete.py diff --git a/examples/quick_start/gemini_example_multimodal_chat.py b/examples/frontend_language/quick_start/gemini_example_multimodal_chat.py similarity index 100% rename from examples/quick_start/gemini_example_multimodal_chat.py rename to examples/frontend_language/quick_start/gemini_example_multimodal_chat.py diff --git a/examples/quick_start/images/cat.jpeg b/examples/frontend_language/quick_start/images/cat.jpeg similarity index 100% rename from examples/quick_start/images/cat.jpeg rename to examples/frontend_language/quick_start/images/cat.jpeg diff --git a/examples/quick_start/images/dog.jpeg b/examples/frontend_language/quick_start/images/dog.jpeg similarity index 100% rename from examples/quick_start/images/dog.jpeg rename to examples/frontend_language/quick_start/images/dog.jpeg diff --git a/examples/quick_start/srt_example_chat.py b/examples/frontend_language/quick_start/local_example_chat.py similarity index 98% rename from examples/quick_start/srt_example_chat.py rename to examples/frontend_language/quick_start/local_example_chat.py index b1e1658a2a9..e1e4b62ccac 100644 --- a/examples/quick_start/srt_example_chat.py +++ b/examples/frontend_language/quick_start/local_example_chat.py @@ -1,6 +1,6 @@ """ Usage: -python3 srt_example_chat.py +python3 local_example_chat.py """ import sglang as sgl diff --git a/examples/quick_start/srt_example_complete.py b/examples/frontend_language/quick_start/local_example_complete.py similarity index 97% rename from examples/quick_start/srt_example_complete.py rename to examples/frontend_language/quick_start/local_example_complete.py index 056245979f4..00a451cf642 100644 --- a/examples/quick_start/srt_example_complete.py +++ b/examples/frontend_language/quick_start/local_example_complete.py @@ -1,6 +1,6 @@ """ Usage: -python3 srt_example_complete.py +python3 local_example_complete.py """ import sglang as sgl diff --git a/examples/quick_start/srt_example_llava.py b/examples/frontend_language/quick_start/local_example_llava_next.py similarity index 69% rename from examples/quick_start/srt_example_llava.py rename to examples/frontend_language/quick_start/local_example_llava_next.py index 5d8f752394f..823dc7b0e83 100644 --- a/examples/quick_start/srt_example_llava.py +++ b/examples/frontend_language/quick_start/local_example_llava_next.py @@ -1,8 +1,14 @@ """ -Usage: python3 srt_example_llava.py +Usage: python3 local_example_llava_next.py """ +from PIL import ImageFile + import sglang as sgl +from sglang.lang.chat_template import get_chat_template +from sglang.srt.utils import load_image + +ImageFile.LOAD_TRUNCATED_IMAGES = True # Allow loading of truncated images @sgl.function @@ -44,10 +50,17 @@ def batch(): if __name__ == "__main__": - runtime = sgl.Runtime( - model_path="liuhaotian/llava-v1.6-vicuna-7b", - tokenizer_path="llava-hf/llava-1.5-7b-hf", - ) + import multiprocessing as mp + + mp.set_start_method("spawn", force=True) + + runtime = sgl.Runtime(model_path="lmms-lab/llama3-llava-next-8b") + runtime.endpoint.chat_template = get_chat_template("llama-3-instruct") + + # Or you can use the 72B model + # runtime = sgl.Runtime(model_path="lmms-lab/llava-next-72b", tp_size=8) + # runtime.endpoint.chat_template = get_chat_template("chatml-llava") + sgl.set_default_backend(runtime) print(f"chat template: {runtime.endpoint.chat_template.name}") diff --git a/examples/quick_start/openai_example_chat.py b/examples/frontend_language/quick_start/openai_example_chat.py similarity index 100% rename from examples/quick_start/openai_example_chat.py rename to examples/frontend_language/quick_start/openai_example_chat.py diff --git a/examples/quick_start/openai_example_complete.py b/examples/frontend_language/quick_start/openai_example_complete.py similarity index 100% rename from examples/quick_start/openai_example_complete.py rename to examples/frontend_language/quick_start/openai_example_complete.py diff --git a/examples/quick_start/openrouter_example_chat.py b/examples/frontend_language/quick_start/openrouter_example_chat.py similarity index 100% rename from examples/quick_start/openrouter_example_chat.py rename to examples/frontend_language/quick_start/openrouter_example_chat.py diff --git a/examples/quick_start/together_example_chat.py b/examples/frontend_language/quick_start/together_example_chat.py similarity index 100% rename from examples/quick_start/together_example_chat.py rename to examples/frontend_language/quick_start/together_example_chat.py diff --git a/examples/quick_start/together_example_complete.py b/examples/frontend_language/quick_start/together_example_complete.py similarity index 100% rename from examples/quick_start/together_example_complete.py rename to examples/frontend_language/quick_start/together_example_complete.py diff --git a/examples/usage/chinese_regex.py b/examples/frontend_language/usage/chinese_regex.py similarity index 100% rename from examples/usage/chinese_regex.py rename to examples/frontend_language/usage/chinese_regex.py diff --git a/examples/usage/choices_logprob.py b/examples/frontend_language/usage/choices_logprob.py similarity index 100% rename from examples/usage/choices_logprob.py rename to examples/frontend_language/usage/choices_logprob.py diff --git a/examples/usage/cot_decoding.py b/examples/frontend_language/usage/cot_decoding.py similarity index 100% rename from examples/usage/cot_decoding.py rename to examples/frontend_language/usage/cot_decoding.py diff --git a/examples/usage/json_decode.py b/examples/frontend_language/usage/json_decode.py similarity index 100% rename from examples/usage/json_decode.py rename to examples/frontend_language/usage/json_decode.py diff --git a/examples/usage/json_logprobs.py b/examples/frontend_language/usage/json_logprobs.py similarity index 100% rename from examples/usage/json_logprobs.py rename to examples/frontend_language/usage/json_logprobs.py diff --git a/examples/usage/llava_video/srt_example_llava_v.py b/examples/frontend_language/usage/llava_video/srt_example_llava_v.py similarity index 99% rename from examples/usage/llava_video/srt_example_llava_v.py rename to examples/frontend_language/usage/llava_video/srt_example_llava_v.py index 7421dfcdfb8..085bcea5a23 100644 --- a/examples/usage/llava_video/srt_example_llava_v.py +++ b/examples/frontend_language/usage/llava_video/srt_example_llava_v.py @@ -1,7 +1,8 @@ """ Usage: pip install opencv-python-headless -python3 srt_example_llava.py + +python3 srt_example_llava_v.py """ import argparse @@ -9,6 +10,8 @@ import os import time +import requests + import sglang as sgl diff --git a/examples/usage/llava_video/srt_example_llava_v.sh b/examples/frontend_language/usage/llava_video/srt_example_llava_v.sh similarity index 100% rename from examples/usage/llava_video/srt_example_llava_v.sh rename to examples/frontend_language/usage/llava_video/srt_example_llava_v.sh diff --git a/examples/usage/openai_chat_speculative.py b/examples/frontend_language/usage/openai_chat_speculative.py similarity index 100% rename from examples/usage/openai_chat_speculative.py rename to examples/frontend_language/usage/openai_chat_speculative.py diff --git a/examples/usage/openai_parallel_sample.py b/examples/frontend_language/usage/openai_parallel_sample.py similarity index 100% rename from examples/usage/openai_parallel_sample.py rename to examples/frontend_language/usage/openai_parallel_sample.py diff --git a/examples/usage/openai_speculative.py b/examples/frontend_language/usage/openai_speculative.py similarity index 100% rename from examples/usage/openai_speculative.py rename to examples/frontend_language/usage/openai_speculative.py diff --git a/examples/usage/parallel_sample.py b/examples/frontend_language/usage/parallel_sample.py similarity index 100% rename from examples/usage/parallel_sample.py rename to examples/frontend_language/usage/parallel_sample.py diff --git a/examples/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb b/examples/frontend_language/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb similarity index 100% rename from examples/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb rename to examples/frontend_language/usage/rag_using_parea/trace_and_evaluate_rag_using_parea.ipynb diff --git a/examples/usage/readme_examples.py b/examples/frontend_language/usage/readme_examples.py similarity index 100% rename from examples/usage/readme_examples.py rename to examples/frontend_language/usage/readme_examples.py diff --git a/examples/usage/streaming.py b/examples/frontend_language/usage/streaming.py similarity index 100% rename from examples/usage/streaming.py rename to examples/frontend_language/usage/streaming.py diff --git a/examples/usage/triton/Dockerfile b/examples/frontend_language/usage/triton/Dockerfile similarity index 100% rename from examples/usage/triton/Dockerfile rename to examples/frontend_language/usage/triton/Dockerfile diff --git a/examples/usage/triton/README.md b/examples/frontend_language/usage/triton/README.md similarity index 100% rename from examples/usage/triton/README.md rename to examples/frontend_language/usage/triton/README.md diff --git a/examples/usage/triton/models/character_generation/1/model.py b/examples/frontend_language/usage/triton/models/character_generation/1/model.py similarity index 100% rename from examples/usage/triton/models/character_generation/1/model.py rename to examples/frontend_language/usage/triton/models/character_generation/1/model.py diff --git a/examples/usage/triton/models/character_generation/config.pbtxt b/examples/frontend_language/usage/triton/models/character_generation/config.pbtxt similarity index 100% rename from examples/usage/triton/models/character_generation/config.pbtxt rename to examples/frontend_language/usage/triton/models/character_generation/config.pbtxt diff --git a/examples/quick_start/srt_example_yi_vl.py b/examples/quick_start/srt_example_yi_vl.py deleted file mode 100644 index 66c7d57126c..00000000000 --- a/examples/quick_start/srt_example_yi_vl.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Usage: python3 srt_example_yi_vl.py - -Requirements: transformers==4.38 -""" - -import sglang as sgl - - -@sgl.function -def image_qa(s, image_path, question): - s += sgl.user(sgl.image(image_path) + question) - s += sgl.assistant(sgl.gen("answer")) - - -def single(): - state = image_qa.run( - image_path="images/cat.jpeg", - question="What is this?", - max_new_tokens=64, - stop="###", - ) - print(state["answer"], "\n") - - -def stream(): - state = image_qa.run( - image_path="images/cat.jpeg", - question="What is this?", - max_new_tokens=64, - stream=True, - stop="###", - ) - - for out in state.text_iter("answer"): - print(out, end="", flush=True) - print() - - -def batch(): - states = image_qa.run_batch( - [ - {"image_path": "images/cat.jpeg", "question": "What is this?"}, - {"image_path": "images/dog.jpeg", "question": "What is this?"}, - ], - max_new_tokens=64, - stop="###", - ) - for s in states: - print(s["answer"], "\n") - - -if __name__ == "__main__": - runtime = sgl.Runtime(model_path="BabyChou/Yi-VL-6B") - # runtime = sgl.Runtime(model_path="BabyChou/Yi-VL-34B") - sgl.set_default_backend(runtime) - - # Run a single request - print("\n========== single ==========\n") - single() - - # Stream output - print("\n========== stream ==========\n") - stream() - - # Run a batch of requests - print("\n========== batch ==========\n") - batch() - - runtime.shutdown() diff --git a/examples/usage/async_io.py b/examples/runtime/async_io_api.py similarity index 100% rename from examples/usage/async_io.py rename to examples/runtime/async_io_api.py diff --git a/examples/usage/llava/http_llama3_llava_test.py b/examples/runtime/llava_onevision/http_llama3_llava_test.py similarity index 94% rename from examples/usage/llava/http_llama3_llava_test.py rename to examples/runtime/llava_onevision/http_llama3_llava_test.py index 813a26af531..a019e214d6f 100644 --- a/examples/usage/llava/http_llama3_llava_test.py +++ b/examples/runtime/llava_onevision/http_llama3_llava_test.py @@ -4,7 +4,7 @@ # Installing latest sglang. # Endpoint Service CLI: -# python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --tokenizer-path lmms-lab/llama3-llava-next-8b-tokenizer --port=30000 --host="127.0.0.1" --tp-size=4 +python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --port=30000 python3 http_llama3_llava_test.py @@ -16,7 +16,6 @@ import asyncio import copy import json -import time import aiohttp import requests diff --git a/examples/usage/llava/http_llava_onevision_test.py b/examples/runtime/llava_onevision/http_llava_onevision_test.py similarity index 96% rename from examples/usage/llava/http_llava_onevision_test.py rename to examples/runtime/llava_onevision/http_llava_onevision_test.py index c32d529819d..40dc27ec20a 100644 --- a/examples/usage/llava/http_llava_onevision_test.py +++ b/examples/runtime/llava_onevision/http_llava_onevision_test.py @@ -1,3 +1,11 @@ +""" +Usage: + +python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384 + +python3 http_llava_onevision_test.py +""" + import base64 import io import os @@ -74,7 +82,6 @@ def video_stream_request_test(client, video_path): print("------------------------Video Stream Request Test----------------------") messages = prepare_video_messages(video_path) - start_time = time.time() video_request = client.chat.completions.create( model="default", messages=messages, diff --git a/examples/usage/llava/http_qwen_llava_test.py b/examples/runtime/llava_onevision/http_qwen_llava_test.py similarity index 95% rename from examples/usage/llava/http_qwen_llava_test.py rename to examples/runtime/llava_onevision/http_qwen_llava_test.py index 1c29658c609..dca56e7a33c 100644 --- a/examples/usage/llava/http_qwen_llava_test.py +++ b/examples/runtime/llava_onevision/http_qwen_llava_test.py @@ -4,7 +4,7 @@ # Installing latest sglang. # Endpoint Service CLI: -# python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --tokenizer-path lmms-lab/llavanext-qwen-tokenizer --port=30000 --host="127.0.0.1" --tp-size=4 +python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --port=30000 --tp-size=8 python3 http_qwen_llava_test.py @@ -16,7 +16,6 @@ import asyncio import copy import json -import time import aiohttp import requests diff --git a/examples/usage/openai_batch_chat.py b/examples/runtime/openai_batch_chat.py similarity index 100% rename from examples/usage/openai_batch_chat.py rename to examples/runtime/openai_batch_chat.py diff --git a/examples/usage/openai_batch_complete.py b/examples/runtime/openai_batch_complete.py similarity index 100% rename from examples/usage/openai_batch_complete.py rename to examples/runtime/openai_batch_complete.py diff --git a/examples/usage/llava/srt_llava_next_test.py b/examples/usage/llava/srt_llava_next_test.py deleted file mode 100644 index 0f9621648a7..00000000000 --- a/examples/usage/llava/srt_llava_next_test.py +++ /dev/null @@ -1,90 +0,0 @@ -""" -Usage: python3 srt_example_llava.py -""" - -from PIL import ImageFile - -import sglang as sgl -from sglang.lang.chat_template import get_chat_template -from sglang.srt.utils import load_image - -ImageFile.LOAD_TRUNCATED_IMAGES = True # Allow loading of truncated images - - -@sgl.function -def image_qa(s, image, question): - s += sgl.user(sgl.image(image) + question) - s += sgl.assistant(sgl.gen("answer")) - - -def single(): - image_url = "https://farm4.staticflickr.com/3175/2653711032_804ff86d81_z.jpg" - pil_image, _ = load_image(image_url) - state = image_qa.run(image=pil_image, question="What is this?", max_new_tokens=512) - print(state["answer"], "\n") - - -def stream(): - image_url = "https://farm4.staticflickr.com/3175/2653711032_804ff86d81_z.jpg" - pil_image, _ = load_image(image_url) - state = image_qa.run( - image=pil_image, - question="Please generate short caption for this image.", - max_new_tokens=512, - temperature=0, - stream=True, - ) - - for out in state.text_iter("answer"): - print(out, end="", flush=True) - print() - - -def batch(): - image_url = "https://farm4.staticflickr.com/3175/2653711032_804ff86d81_z.jpg" - pil_image, _ = load_image(image_url) - states = image_qa.run_batch( - [ - {"image": pil_image, "question": "What is this?"}, - {"image": pil_image, "question": "What is this?"}, - ], - max_new_tokens=512, - ) - for s in states: - print(s["answer"], "\n") - - -if __name__ == "__main__": - import multiprocessing as mp - - mp.set_start_method("spawn", force=True) - runtime = sgl.Runtime( - model_path="lmms-lab/llama3-llava-next-8b", - tokenizer_path="lmms-lab/llama3-llava-next-8b-tokenizer", - ) - runtime.endpoint.chat_template = get_chat_template("llama-3-instruct") - # runtime = sgl.Runtime( - # model_path="lmms-lab/llava-next-72b", - # tokenizer_path="lmms-lab/llavanext-qwen-tokenizer", - # ) - # runtime.endpoint.chat_template = get_chat_template("chatml-llava") - sgl.set_default_backend(runtime) - print(f"chat template: {runtime.endpoint.chat_template.name}") - - # Or you can use API models - # sgl.set_default_backend(sgl.OpenAI("gpt-4-vision-preview")) - # sgl.set_default_backend(sgl.VertexAI("gemini-pro-vision")) - - # Run a single request - print("\n========== single ==========\n") - single() - - # Stream output - print("\n========== stream ==========\n") - stream() - - # Run a batch of requests - print("\n========== batch ==========\n") - batch() - - runtime.shutdown() diff --git a/examples/usage/rag_using_parea/max-tokens-fixed-rag-trace.png b/examples/usage/rag_using_parea/max-tokens-fixed-rag-trace.png deleted file mode 100644 index 2ea09fdc60209c707655451a589f2a904b6eb25b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 134888 zcmeFZXIN8B*ETGmhy_8hAyTBOG^K=&ARxW>4pIYz&_gexfTBo~7K(J~y_bOW5?ZJU zO?pRaXn}9*bwAI2J@3PH`{(=h9xum{WV834*|TP?S+nLm*X;L73X;TEA6&g~;R3O= zl(@=;3zx+&T)_7uA_UH~h2N*XZ~;#jEGDKTEha{zYHG(Mv3$f&AzuYb$E1ny<>>{+LHS0}%RG%E zw7gZbDD!dXVHd8R7%UXD*HeSh=bT@xMVy6IMtfP`)(uey>$rZDXukxa(e3qlJ zrg;MNj#ij7e(O-nZeXz_+v536rus)4T*dAm4#IfDb5Wem@g?P-3a-<7!D9_|zG6f% z!C*@w6RNANq3O1#*lcy^LtUd$|4;I!?dtp+vXKZx;=IcvzrP)gdFc>#7 z_EL%{%`2IfdHHJnLcS-E9#_*S1&^WiZE>4OVTN#dhr z<24n0r#bDGz3}I5nY77-WhVi>tzo7mZ7wf=;VE!TbOA3AeBmN+ga>>c0G|sN2*Lv{ zTn2t`0UzePrsbq1 zFUM~Jg|Hc$LSLJ)xkBuI6}cej$`2eu%$$sATp>2Lj{L4dw7>7*2abQ8W~Zh3eT$Q| z5UrNH5{($t!HfpP_KfWrt?*SE8X7?dQ*(Y5@mK#U4*VxXYw6@<$Is3VgTdHfoNQ1B z3-;%He0=QBIM_KjSb;lO9o=l5j9po69qIn6H*+*`0NXi%p|&)?>NS21b#@Y> zrTz7w|Ni@{oo24!e?Q6A@n2#A0}qDCEe?hN-2*&B z7{v8l@b~@y>&m~M__vZ;|1SBQ7xe7!MgMl`|17HRXyzaWg#gcV68^Vp{#E$z7yngI zko}kFf9s0B`uX=+preJa3bOyV(uA*$y3ER5xFB*tT3l4k6>n|gQjD59MZ?xTt?HVZ z8r0WDnn;zKH^p^G=_5&RDwC2b(WfcjPD`&L4&BhJR;@nl@z{qOyTry$yd!owq)r^G za-1Fa@WJ*_CLT{>;~!_C7Cj%|zDO!^0q;M3(cHQ!{^{*;GutEI3;0Ad|LIG^*Y`0K z{vU2464$}QPYK)aD>nJhwGq*Xh(tR6=f(k7Y4a}N^-FnJjy?ZRQv9lhhNhG6KQ;e% zbHV^2EYpNa*Y5vG4pP$jhkxk13w~0}A|hM^R5~u#|6?Zu%_9nOxbYu*7igYH+8r92 z2VfFbfq*~h;-Wa;t@gJr-{pE8{<{1x;)URtO%y}n~em$yXD39=hpYOyrJnb$^4{O*yV+UHzMNq zf*x>xI1nd$|9W!bP7e(B^rqv(HvZRC6+wkV@wk@$``dNjnP6ND`aPw%BR_DxRh z2S@t#`Px0WzkUj~c$0!>eDLv{-ugLCWNEk5GhsCG($m&vv_FWIl+Hb}?M+Jj=FQ_6 zceHy*854Qyi@Xbq!>Fr-5I0zlsP1;ZUZX9CCZ`ykiN+FLxTIKhQZ?fIA(W_zhrFIn zLrQ*WJOQ)OD$(lYGr8X}Veibd;pS-UlVs2^;WlSGjw)C_%zs2L=TAvGWbTlZxm~-y zx#MIrrg@Gr`!!s$WaP13ix3~mW2C?zOgc=IW)!c#y%4oV@P|8z1gY_bd)sV3jMu;v zPxVS(6`S=w0;AHs3|!zi@$-y{X7Ez|+K^c5=QD{Lhg9y_&Jv%g^usfy*erVE%dk6% z*&HO-5ZhMjV->KZ1L65{_iw^4Kjmu{gv9eZ>kQ{=QzVxQ)bfB{eY_ve1hwsm=Zg?B zoNf7J&9i*8x9YIJ#>eNhRJew&(giGyj20dz5un{^H|+?Q&O<{FFfHULi;lycMck*I-A7h~-@*mm9QYg;K4xmaboR9L9P#q> zOjDWvP;5P7%jZ64`OVhHRgG#C%T`*k+;XogNc=sm^~ec1I>+61#fOnoJV(<@{b2!* zK@MXcyexF@EY~C3Kjnom!O?z%JVg>Bcu%#l=-Ij5YjDv$&S+_M$ROh83*6&t_jlF%%=PCidn;O>J< zk7UIbW8#D@2c!(xj>9qF-kJ7k8)!KF`*iiyyWy<58jJas&r&UmXnT6EUzGp4C&@Y= z_{cxaVP59e&_x!m%KU=I=2~sbTnS8t91gp7V};AqPc-XuN3@wWvT2U`{AzAREjpph z<+Syd!MztI7i(Z)6k9lr_UIi}?Ff=)6ez(z|tcN`+y7?~n zZj?Fl#V*!iFbAQ!H<`^ml%q*Tao<_aZii}cH3%+|WT|F{m^$2^FGC0wnY4MQ?7f?N zX}q0B@pHYxVY~Ug5d=1DGnO{>bu-Ozq<75$SHDbVrTFfWbyE!a`7SvgpG?kt>D{$^ z^x%Os4x3TMV)s3rj&O$H-J|TxT)jFjDZfX=--h#5Wsk5FkIN==nfwX8*`am%n%VU# zEmety_gynjF)A5y%Y|C%LAi9i82Dpl+WreF3*E8J%5mI~C;pdl(+BMyq?ZOVWa;J& z^6B$*%EN9pNp|GMmaNMLI>#%Z+-mP2d0EuHeN9LYzv3F8aixdvmI`KiL+@Hvwf^dr zE-#c^-zN&5Lyt>Qlq(N%sW{!^J^L!NEH#$lWmYn63hVkXhQYw3g&ObeW@#|i{xZ33 z{^l|&Gs2k8`}$U(`b)}ba$V6d+IQ&&6FM_wxUE-u-1n^#^FzxaXxX*VqP4l$opBQxt-qXPE6T~R4}8w+18tPF14rq;5zr+$Rp>KjQsX$|d^=Y;0Ukej3q=uFD0UYV{B zy}i&y$74Gl8Ov!tmT}J~Nl=|1wlY&d-U_3rrBlsRjN;od?`T^`^K#~LkwKdk#UK}) z8&j&bn?C!?J7&1WujVS3RLjMJq|HyCBds%#l~+<3jK5#p?#xj)oq>|Yca8X9DPsla zHQ~5yZFl>b)?Wg+HgW-OWX{lZFK&K&huuU1D;;c}x>B8^mdAoJKVggTPFiB`6Bsug zF2^0vE%%qyuozT}xO?`?httc?e}cb*;f?z&?1+<`gB)JJRh(zu1*TU@qI4Ql2W=&L10%!yTi7l()Nj z5uN3E7_-s7n~ab7QRZhYpQ)#WrBbH+_!)UYHqE3S35T_knvUJ60-tgdWMZa$NsI7|J<4#Us z_p9nzv1sG_x~ZCR%2;BqSV4ZYk}PV%8Uld~Rwm+7suzn+Zi&QogKac(xw zB)NC#1af_Cynm-Rp(u0emSUSh!5K1+r_6Mg>0{Umw?Rb{SiaV7dK$0TriHeDs^R2t zXKX7O$D~G(5@KZ8ruc$**xT}_<4Ar&vBx5tc9BgL3kP2B)h|zFQkvgGg#3@N(W@f` z9g^$9Q7qae?V*iR@bauB+FheOZ+$UAiPAoL~OIVaAANMz0^|=`g|!#YoQQ z`Lr{^wh6Hd~|A3hcPqdzZ?8AwbmO*EN#wiKu zI(sAY5jUv2j+|G`2HT6a2V~i0HYXW9m_(#QY3g@ZrzCGs{rplOXT+<+MGd&XiUe5# z$vE}BeaxOIdT<#ObP!u)&viMTQQnH|VHVwDZ$k9p&fOS_)**4?QdeC>%{uO@G#y3I z@#!)3`l%JojJoQ`eAiiQFDE3SD>^SST{e2`$6%H!`D&WWD5knp+GHY-Xdv`0BS%GwZyod|^-3 zWdxE=r6V6Fr}5xvu*PzjaV_do^SP|W>>g$$A>Rzm-i(tdsQ|S4l%%`qg0|dQ;?clm zf+!XPE6y5xU0C|lJM+{i`Ed@%MC;HAZlZ(3dlSg2{Wn4Z(d_zB)q7R~IoEO=btv6I#pPewA{%w1#AK5Y%N;?A^{QPvxC%~V>ok^} z-Iv472FCS9xGi&marxbNeVKn@-NS;J-;l?j#$k7 z(W=>wQ55ujrbz*=>vV6?(B&Ru+-lJ2uyvtxAC^FzpwAG#>USz^ZKnpGAzt5h6}BbH zQ|9DJg*#1gu-DT>FdpG9e=l2sJ>as#9gG%k`uT4U-4lYCe={)MUhug6i0UWKNC4rz z93Dr##+{?lmF$0+!O!jc^k7HCw)zw_P3gz?ZI!WNju%)xT}i>6(sxm_lAk*Z$h@Q6 zH)~nHC|n2*{Lfj!_=?bzjTW2bKu%RT#khZ_r`t}Tw;@TV#TLC(@cs-xIYqO5 zYDapB4|9xNC5gK$6DUIEk$jx2yVK^Ak$n9Ks|;!Vy0D|R+_CZx-x+yRW0~T#Q|Bk^ zJW|NHNvH&$?WGmQO2&D8^D0;t5{GOkb3#DSNm?>*D(mPg2Nv(EiY5j(m>tbBt7oTKf}$$+YA}Z(95aw9UH_CKO64Ua z-7m)tVXGL4HtZsg=q~BKNK`haM;r<6dsRX^q|JG)EKMH%>O=Hn^T4K+_~GeHZ>GJP zRC22bCi$>;jH{!{Z#?q3t(Kx&GDWH6ZL?L+!_U z=Xv){rzfJ&@y<&1Wa#F~OzYshaU`|kh+JJj^<5m>7q{e(Za6b)F4qxL^CV6Z=n`K} zTWgssf2;dCpRLaRpf%?8PJ?efHP==WUEpUa?uZ`+n1LS=Jq|ky@;eFW*PQG6t3%e6 zAc?%OY3-pDZ*O}t*B_KEbC{{ncsv(2CCI{ zk1xDEHRw(hQ2h`i?9Sal8OJb)pQ2dOf0UHbsh%U_xO4k$;u>=_d$b>paMai02`b=h zXFkX$Gidqw!4it*O%%%9qm;Lp z-g)%7e4rxSNczo1?=fXjeFR&k1c4lyCz((;RT69Mz^OB6=}ks(xC+s~$EH^*D^R|( zJ>Lc5z4?qLg%cdw#ywpP-8>Z;KCyH#YCG(YKMi!kl(Fco!8S+iy-h*0?6Gn~eUH&{ zi{HXb#x>T_Z!>GipK7o`d+Cr@hd!D^ywwaJ&Ze(7qNP%Q{S>NRIYXjF9OkjdM30n0`rATYQvR>jD9p5dY3ik|K!aN z!CgBr=)ah|IaR~XX>n@Iq+W0uNBo*yzgloQx2IR~GgV2`gTDPep}B`it)9aUbM17B z#O-_^ZmjxWa;LgHFYx2Qpt`1S=(_f<-y6&nk#XDl|6S6T{m??exnjAFc`~81RsJL)g=3k zy`&hwwfvZOr$nteLIH~&#?kMXaUrGeJy8)i$kRcZ%fcFqrblYbQMct2LIzcypiTV)o>GY3DN>b$fwCk4Ojy*&E_DV-xBz^M== zSVf$%@%-$$wAm@yj4MZ;>-$>{JHztiM>vv&Hpb5-) ze5)_*ey&ZeSzr*c0r;+*41JdPJdzisgZzSr&T| zuazE(Bxr6j@lSG7sOrX`C??{bM68_2JE6%1PY%y!TvpzbF|B*^naiJ^nR7VTTK7-4 zT$Xw$+~h06k28Y$IQvzdTFONa>?Zsz zIYOj}jf|-$)r^}!aq|j7B|XmCIhj8fN&lIWrxS$9@#?jbO*zxAe+^%sI6TWp6oQD% zXMb6vca}H97mVXhM@mG2Vw2BK>Us#T=)o_$aIxlGdpE?Fx7weANhTp*DpH8$NIx*x zDVz0o%m+_}8SZU>;ia~dxnI(rVK>SQ4J7+>8_l{jM&%Fzn>?Wk2?C=FYW@y-`n^7q zUddah(t$4f)pJBD2_WHhBrg=Nziy%WTe{~vhg1BNJIqlPvwOyPkZQHy)#ol3uW)P4 ztqU06bHeTTT!&4`j*Q{o5LkfK)+mfLpub-VPMfNj8ngapAUW}+A*2Yzo`+{o%(Ch> zXNNM%*e}RMP+K6m>y>`I*~o^4pw?bb5GG71hr^8_d}_fJt8Y!bM_>~cUSK1oIw7x3 zz33~;Kdvr#DlJ|LUWl2klc;t+6qL&O=|b7h(&O7=?@76Nno$!k;72YH}!1MB)IYWbfk zvTPAYxhO7cwW6}BkL0SEd^yNCR=A~PGa@F$i-d}o27%*H=Jy$BJR!v`#%J0Ur3}=o zN@6k_NeFOplgmVl}WbRJ&2C+ApbPOM& zikhgote)~CRr}4^xJ}%bJ~z!rxKOU=Sj~^jrF4AH)2m9UJF9bCWb4n6=2l9H9eE(` z5Lw8dS@vX(Vgx!W>?9k_X2!`2#2^c!*U8#&m_Mb7>=}&CH2Ri5 z1-SKP5A0eAa$%)R_jAdnD59a7P`Avvx8KD37ICr9c>brJjiT{756xYHuOk(-64zuI zb`qW#*5{r;^1tZR>N5#ySTS0TcVMtRm|{f%$oS|LjxJNG3d>>s_Qt-3d^*Jvc|Y>Y zC#+Z2Mpv8obh6>XRS_}52-qgt8>5&pq&zT8a{oaVfgJRQuqs zE_s;tTyoBI
-To use model from [ModelScope](https://www.modelscope.cn), setting environment variable SGLANG_USE_MODELSCOPE. +To use a model from [ModelScope](https://www.modelscope.cn), set the environment variable SGLANG_USE_MODELSCOPE. ``` export SGLANG_USE_MODELSCOPE=true ``` Launch [Qwen2-7B-Instruct](https://www.modelscope.cn/models/qwen/qwen2-7b-instruct) Server ``` SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen2-7B-Instruct --port 30000 -``` +``` +
#### Run Llama 3.1 405B +
```bash # Run 405B (fp8) on a single node @@ -272,6 +274,8 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph ``` +
+ ### Benchmark Performance - Benchmark a single static batch by running the following command without launching a server. The arguments are the same as for `launch_server.py`. Note that this is not a dynamic batching server, so it may run out of memory for a batch size that a real server can handle. A real server truncates the prefill into several batches, while this unit test does not. For accurate large batch testing, consider using `sglang.bench_serving`. @@ -407,7 +411,7 @@ def tip_suggestion(s): s += "In summary" + sgl.gen("summary") ``` -#### Multi Modality +#### Multi-Modality Use `sgl.image` to pass an image as input. ```python @@ -461,7 +465,7 @@ def character_gen(s, name): s += sgl.gen("json_output", max_tokens=256, regex=character_regex) ``` -See also [json_decode.py](examples/usage/json_decode.py) for an additional example on specifying formats with Pydantic models. +See also [json_decode.py](examples/usage/json_decode.py) for an additional example of specifying formats with Pydantic models. #### Batching Use `run_batch` to run a batch of requests with continuous batching. @@ -523,7 +527,6 @@ def chat_example(s): - The `choices` argument in `sgl.gen` is implemented by computing the [token-length normalized log probabilities](https://blog.eleuther.ai/multiple-choice-normalization/) of all choices and selecting the one with the highest probability. - The `regex` argument in `sgl.gen` is implemented through autoregressive decoding with logit bias masking, according to the constraints set by the regex. It is compatible with `temperature=0` and `temperature != 0`. - ## Benchmark And Performance ![8b_throughput](https://lmsys.org/images/blog/sglang_llama3/8b_throughput.svg) ![70b_fp8_throughput](https://lmsys.org/images/blog/sglang_llama3/70b_fp8_throughput.svg) From e61d13acdf3193606c3bc57fb59f0de33eab7490 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sat, 24 Aug 2024 18:35:55 -0700 Subject: [PATCH 08/88] [CI] Fix the problem of hf runner too slow (#1202) --- python/sglang/test/runners.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index e519c92829b..4fc1f0f259e 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -86,7 +86,6 @@ def start_model_process( self.tokenizer = AutoTokenizer.from_pretrained( model_path, torch_dtype=torch_dtype, - trust_remote_code=True, ) self.is_generation_model = is_generation_model @@ -96,7 +95,6 @@ def start_model_process( model_path, torch_dtype=torch_dtype, low_cpu_mem_usage=True, - trust_remote_code=True, ).cuda() else: from sentence_transformers import SentenceTransformer From 1cb4da5c5f1fbaafa5c48b052b1f05abedd97fe5 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sat, 24 Aug 2024 21:43:03 -0700 Subject: [PATCH 09/88] [Fix] the issue of random order when input is a list (#1199) --- .../sglang/srt/managers/tokenizer_manager.py | 14 +++++------ python/sglang/srt/server.py | 4 ++-- python/sglang/test/runners.py | 2 +- test/srt/models/test_embedding_models.py | 23 +++++++++++-------- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 8420f20dd19..8f67005755c 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -437,13 +437,13 @@ async def _handle_batch_request( is_stream = hasattr(obj, "stream") and obj.stream tasks = [asyncio.create_task(gen.__anext__()) for gen in generators] - output_list = [] + output_list = [None] * len(tasks) while tasks: done, _ = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) for task in done: - gen_index = tasks.index(task) + cur_index = tasks.index(task) try: result = task.result() @@ -451,14 +451,14 @@ async def _handle_batch_request( if is_stream: yield result else: - output_list.append(result) + output_list[result["index"]] = result - tasks[gen_index] = asyncio.create_task( - generators[gen_index].__anext__() + tasks[cur_index] = asyncio.create_task( + generators[cur_index].__anext__() ) except StopAsyncIteration: - del generators[gen_index] - del tasks[gen_index] + del generators[cur_index] + del tasks[cur_index] if not is_stream: yield output_list diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 3ec5cd633f4..241fabf6d1b 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -591,7 +591,7 @@ async def async_generate( def generate( self, - prompt: str, + prompt: Union[str, List[str]], sampling_params: Optional[Dict] = None, return_logprob: Optional[Union[List[bool], bool]] = False, logprob_start_len: Optional[Union[List[int], int]] = None, @@ -612,7 +612,7 @@ def generate( def encode( self, - prompt: str, + prompt: Union[str, List[str]], ): json_data = { "text": prompt, diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 4fc1f0f259e..9f18a91f739 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -28,10 +28,10 @@ DEFAULT_PROMPTS = [ # the output of gemma-2-2b from SRT is unstable on the commented prompt # "The capital of France is", + "Apple is red. Banana is Yellow. " * 800 + "Apple is", "The capital of the United Kindom is", "Today is a sunny day and I like", "AI is a field of computer science focused on", - "Apple is red. Banana is Yellow. " * 800 + "Apple is", ] dirpath = os.path.dirname(__file__) diff --git a/test/srt/models/test_embedding_models.py b/test/srt/models/test_embedding_models.py index 44fed2ad0bb..cc830f6257c 100644 --- a/test/srt/models/test_embedding_models.py +++ b/test/srt/models/test_embedding_models.py @@ -20,7 +20,7 @@ from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner from sglang.test.test_utils import get_similarities -MODELS = [("intfloat/e5-mistral-7b-instruct", 1)] +MODELS = [("intfloat/e5-mistral-7b-instruct", 1, 0.2)] TORCH_DTYPES = [torch.float16] @@ -32,6 +32,7 @@ def assert_close_prefill_logits( model_path, tp_size, torch_dtype, + long_context_tolerance, ) -> None: with HFRunner( model_path, torch_dtype=torch_dtype, is_generation_model=False @@ -52,20 +53,22 @@ def assert_close_prefill_logits( hf_logits = torch.Tensor(hf_outputs.embed_logits[i]) srt_logits = torch.Tensor(srt_outputs.embed_logits[i]) - similarities = torch.tensor(get_similarities(hf_logits, srt_logits)) - print("max similarity diff", torch.max(abs(similarities - 1))) + similarity = torch.tensor(get_similarities(hf_logits, srt_logits)) + print("similarity diff", abs(similarity - 1)) - if hf_logits.shape[0] <= 100: - tolerance = 1e-2 - assert torch.all( - abs(similarities - 1) < tolerance - ), "embeddings are not all close" + if len(prompts[i]) <= 1000: + tolerance = 1e-5 + else: + tolerance = long_context_tolerance + assert torch.all( + abs(similarity - 1) < tolerance + ), "embeddings are not all close" def test_prefill_logits(self): - for model, tp_size in MODELS: + for model, tp_size, long_context_tolerance in MODELS: for torch_dtype in TORCH_DTYPES: self.assert_close_prefill_logits( - DEFAULT_PROMPTS, model, tp_size, torch_dtype + DEFAULT_PROMPTS, model, tp_size, torch_dtype, long_context_tolerance ) From bc4c7a35457b0a1cb4e83b9f80a01f2cbee9f0e9 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 25 Aug 2024 10:27:02 -0700 Subject: [PATCH 10/88] Relax the assert in moe throughput test to fix the flaky CI (#1207) --- test/srt/test_moe_serving_throughput.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/srt/test_moe_serving_throughput.py b/test/srt/test_moe_serving_throughput.py index bbcd5122769..3cdf724f31f 100644 --- a/test/srt/test_moe_serving_throughput.py +++ b/test/srt/test_moe_serving_throughput.py @@ -73,7 +73,7 @@ def test_default(self): if os.getenv("SGLANG_IS_IN_CI", "false") == "true": # A100 (PCIE) performance - assert res["output_throughput"] > 930 + assert res["output_throughput"] > 910 def test_default_without_radix_cache(self): res = self.run_test( @@ -84,7 +84,7 @@ def test_default_without_radix_cache(self): if os.getenv("SGLANG_IS_IN_CI", "false") == "true": # A100 (PCIE) performance - assert res["output_throughput"] > 930 + assert res["output_throughput"] > 910 def test_default_without_chunked_prefill(self): res = self.run_test( From 66e7dcaf7008d2ffe892044a21513a6e06424d1a Mon Sep 17 00:00:00 2001 From: Kaichen Zhang - NTU Date: Mon, 26 Aug 2024 01:28:23 +0800 Subject: [PATCH 11/88] [Fix] Fixing the multi-images error for llava-onevision (#1205) --- .../http_llava_onevision_test.py | 46 +++++++++++++++++++ .../sglang/srt/managers/tokenizer_manager.py | 4 +- test/srt/test_vision_openai_server.py | 42 +++++++++++++++++ 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/examples/runtime/llava_onevision/http_llava_onevision_test.py b/examples/runtime/llava_onevision/http_llava_onevision_test.py index 40dc27ec20a..41d60b12afc 100644 --- a/examples/runtime/llava_onevision/http_llava_onevision_test.py +++ b/examples/runtime/llava_onevision/http_llava_onevision_test.py @@ -78,6 +78,51 @@ def image_stream_request_test(client): print("-" * 30) +def multi_image_stream_request_test(client): + print( + "----------------------Multi-Images Stream Request Test----------------------" + ) + stream_request = client.chat.completions.create( + model="default", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" + }, + }, + { + "type": "image_url", + "image_url": { + "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png" + }, + }, + { + "type": "text", + "text": "I have shown you two images. Please describe the two images to me.", + }, + ], + }, + ], + temperature=0.7, + max_tokens=1024, + stream=True, + ) + stream_response = "" + + for chunk in stream_request: + if chunk.choices[0].delta.content is not None: + content = chunk.choices[0].delta.content + stream_response += content + sys.stdout.write(content) + sys.stdout.flush() + + print("-" * 30) + + def video_stream_request_test(client, video_path): print("------------------------Video Stream Request Test----------------------") messages = prepare_video_messages(video_path) @@ -209,6 +254,7 @@ def main(): client = create_openai_client("http://127.0.0.1:30000/v1") image_stream_request_test(client) + multi_image_stream_request_test(client) video_stream_request_test(client, video_path) image_speed_test(client) video_speed_test(client, video_path) diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 8f67005755c..5cc060be1a4 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -744,7 +744,9 @@ def get_pixel_values( image, tuple(int(x * 255) for x in processor.image_processor.image_mean), ) - pixel_values = processor.image_processor(image)["pixel_values"][0] + pixel_values = processor.image_processor(image.convert("RGB"))[ + "pixel_values" + ][0] elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio: pixel_values = process_anyres_image( image, processor.image_processor, image_grid_pinpoints diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index 0a477a92aed..0f136fe6e51 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -74,6 +74,48 @@ def test_chat_completion(self): assert response.usage.completion_tokens > 0 assert response.usage.total_tokens > 0 + def test_mult_images_chat_completion(self): + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + response = client.chat.completions.create( + model="default", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" + }, + }, + { + "type": "image_url", + "image_url": { + "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png" + }, + }, + { + "type": "text", + "text": "I have shown you two images. Please describe the two images to me.", + }, + ], + }, + ], + temperature=0, + ) + + assert response.choices[0].message.role == "assistant" + text = response.choices[0].message.content + assert isinstance(text, str) + assert "man" in text or "cab" in text, text + assert "logo" in text, text + assert response.id + assert response.created + assert response.usage.prompt_tokens > 0 + assert response.usage.completion_tokens > 0 + assert response.usage.total_tokens > 0 + def prepare_video_messages(self, video_path): max_frames_num = 32 vr = VideoReader(video_path, ctx=cpu(0)) From 30b4f771b0c515c18179f3e1ee0b4662b2606a95 Mon Sep 17 00:00:00 2001 From: Chayenne Date: Mon, 26 Aug 2024 01:29:12 +0800 Subject: [PATCH 12/88] Support Alibaba-NLP/gte-Qwen2-7B-instruct embedding Model (#1186) Co-authored-by: Ying Sheng --- .github/workflows/accuracy-test.yml | 2 +- .github/workflows/unit-test.yml | 2 +- README.md | 15 ++++ .../sglang/srt/managers/tokenizer_manager.py | 5 +- python/sglang/srt/managers/tp_worker.py | 1 + .../sglang/srt/model_executor/model_runner.py | 17 ++++- python/sglang/srt/models/llama_embedding.py | 4 + python/sglang/srt/models/qwen2.py | 12 ++- python/sglang/srt/server.py | 3 + python/sglang/srt/server_args.py | 11 +++ python/sglang/srt/utils.py | 9 ++- python/sglang/test/runners.py | 32 ++++---- test/srt/models/test_embedding_models.py | 28 ++++--- test/srt/models/test_generation_models.py | 73 +++++++++++++++++-- test/srt/run_suite.py | 8 +- 15 files changed, 167 insertions(+), 55 deletions(-) diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml index 374f0d2856d..16bb584f4aa 100644 --- a/.github/workflows/accuracy-test.yml +++ b/.github/workflows/accuracy-test.yml @@ -43,4 +43,4 @@ jobs: run: | cd test/srt python3 test_eval_accuracy_large.py - timeout-minutes: 10 + timeout-minutes: 20 diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 3422cde40d9..607cb865db3 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -41,7 +41,7 @@ jobs: run: | cd test/srt python3 run_suite.py --suite minimal - timeout-minutes: 18 + timeout-minutes: 20 - name: Test Frontend Language run: | diff --git a/README.md b/README.md index 2fc91e7858f..651108f9e2e 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,13 @@ response = client.chat.completions.create( max_tokens=64, ) print(response) + +# Text embedding +response = client.embeddings.create( + model="default", + input="How are you today", +) +print(response) ``` It supports streaming, vision, and most features of the Chat/Completions/Models/Batch endpoints specified by the [OpenAI API Reference](https://platform.openai.com/docs/api-reference/). @@ -223,6 +230,8 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct ### Supported Models +**Generative Models** + - Llama / Llama 2 / Llama 3 / Llama 3.1 - Mistral / Mixtral / Mistral NeMo - Gemma / Gemma 2 @@ -243,6 +252,12 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct - ChatGLM - InternLM 2 +**Embedding Models** + +- e5-mistral +- gte-Qwen2 + - `python -m sglang.launch_server --model-path Alibaba-NLP/gte-Qwen2-7B-instruct --is-embedding` + Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md). #### Use Models From ModelScope diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 5cc060be1a4..4008a093add 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -94,7 +94,10 @@ def __init__( trust_remote_code=server_args.trust_remote_code, model_overide_args=model_overide_args, ) - self.is_generation = is_generation_model(self.hf_config.architectures) + + self.is_generation = is_generation_model( + self.hf_config.architectures, self.server_args.is_embedding + ) if server_args.context_length is not None: self.context_len = server_args.context_length diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index fa79f849212..19edc23b836 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -94,6 +94,7 @@ def __init__( context_length=server_args.context_length, model_overide_args=model_overide_args, ) + self.model_runner = ModelRunner( model_config=self.model_config, mem_fraction_static=server_args.mem_fraction_static, diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 661660281fe..6b48d1f90ee 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -204,7 +204,7 @@ def load_model(self): else None ) self.is_generation = is_generation_model( - self.model_config.hf_config.architectures + self.model_config.hf_config.architectures, self.server_args.is_embedding ) logger.info( @@ -522,9 +522,18 @@ def forward_extend(self, batch: ScheduleBatch): batch, forward_mode=ForwardMode.EXTEND, ) - return self.model.forward( - batch.input_ids, input_metadata.positions, input_metadata - ) + if self.is_generation: + return self.model.forward( + batch.input_ids, input_metadata.positions, input_metadata + ) + else: + # Only embedding models have get_embedding parameter + return self.model.forward( + batch.input_ids, + input_metadata.positions, + input_metadata, + get_embedding=True, + ) @torch.inference_mode() def forward_extend_multi_modal(self, batch: ScheduleBatch): diff --git a/python/sglang/srt/models/llama_embedding.py b/python/sglang/srt/models/llama_embedding.py index e8e6780472d..dfff53cbcda 100644 --- a/python/sglang/srt/models/llama_embedding.py +++ b/python/sglang/srt/models/llama_embedding.py @@ -29,7 +29,11 @@ def forward( positions: torch.Tensor, input_metadata: InputMetadata, input_embeds: torch.Tensor = None, + get_embedding: bool = True, ) -> EmbeddingPoolerOutput: + assert ( + get_embedding + ), "LlamaEmbeddingModel / MistralModel is only used for embedding" hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) return self.pooler(hidden_states, input_metadata) diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py index d1295bd8cc8..fcf083e1b5d 100644 --- a/python/sglang/srt/models/qwen2.py +++ b/python/sglang/srt/models/qwen2.py @@ -38,6 +38,7 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor +from sglang.srt.layers.pooler import EmbeddingPoolerOutput, Pooler, PoolingType from sglang.srt.layers.radix_attention import RadixAttention from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -275,6 +276,7 @@ def __init__( self.model = Qwen2Model(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True) @torch.no_grad() def forward( @@ -283,11 +285,15 @@ def forward( positions: torch.Tensor, input_metadata: InputMetadata, input_embeds: torch.Tensor = None, + get_embedding: bool = False, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( - input_ids, hidden_states, self.lm_head.weight, input_metadata - ) + if not get_embedding: + return self.logits_processor( + input_ids, hidden_states, self.lm_head.weight, input_metadata + ) + else: + return self.pooler(hidden_states, input_metadata) def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 241fabf6d1b..813f2de7827 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -333,11 +333,13 @@ def launch_server( start_process = start_controller_process_single else: start_process = start_controller_process_multi + proc_controller = mp.Process( target=start_process, args=(server_args, port_args, pipe_controller_writer, model_overide_args), ) proc_controller.start() + proc_detoken = mp.Process( target=start_detokenizer_process, args=( @@ -515,6 +517,7 @@ def __init__( self.pid = None pipe_reader, pipe_writer = mp.Pipe(duplex=False) + proc = mp.Process( target=launch_server, args=(self.server_args, model_overide_args, pipe_writer), diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 870169c6d58..58e24dab8b7 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -38,6 +38,7 @@ class ServerArgs: quantization: Optional[str] = None served_model_name: Optional[str] = None chat_template: Optional[str] = None + is_embedding: bool = False # Port host: str = "127.0.0.1" @@ -200,6 +201,11 @@ def add_cli_args(parser: argparse.ArgumentParser): action="store_true", help="Whether or not to allow for custom models defined on the Hub in their own modeling files.", ) + parser.add_argument( + "--is-embedding", + action="store_true", + help="Whether to use a CausalLM as an embedding model.", + ) parser.add_argument( "--context-length", type=int, @@ -458,6 +464,11 @@ def check_server_args(self): assert not ( self.dp_size > 1 and self.node_rank is not None ), "multi-node data parallel is not supported" + if "Alibaba-NLP/gte-Qwen2-1.5B-instruct" == self.model_path: + logger.info( + "Not sure why, the tokenizer will add an additional token at the end of the prompt when trust_remote_mode=True" + ) + self.trust_remote_code = False if "gemma-2" in self.model_path.lower(): logger.info("When using sliding window in gemma-2, turn on flashinfer.") self.disable_flashinfer = False diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index 93c54782a09..102dcb3d87e 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -224,13 +224,18 @@ def is_multimodal_model(model): raise ValueError("unrecognized type") -def is_generation_model(model_architectures): +def is_generation_model(model_architectures, is_embedding: bool = False): + # We have two ways to determine whether a model is a generative model. + # 1. Check the model architectue + # 2. check the `is_embedding` server args + if ( "LlamaEmbeddingModel" in model_architectures or "MistralModel" in model_architectures ): return False - return True + else: + return not is_embedding def decode_video_base64(video_base64): diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 9f18a91f739..9a5bd4fd598 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -14,7 +14,7 @@ """ import json -import multiprocessing +import multiprocessing as mp import os from dataclasses import dataclass from typing import List, Union @@ -63,37 +63,35 @@ def __init__( self, model_path, torch_dtype, - is_generation_model, + is_generation, ): - self.in_queue = multiprocessing.Queue() - self.out_queue = multiprocessing.Queue() + self.is_generation = is_generation - self.model_proc = multiprocessing.Process( + self.in_queue = mp.Queue() + self.out_queue = mp.Queue() + + self.model_proc = mp.Process( target=self.start_model_process, args=( self.in_queue, self.out_queue, model_path, torch_dtype, - is_generation_model, ), ) self.model_proc.start() - def start_model_process( - self, in_queue, out_queue, model_path, torch_dtype, is_generation_model - ): + def start_model_process(self, in_queue, out_queue, model_path, torch_dtype): self.tokenizer = AutoTokenizer.from_pretrained( model_path, torch_dtype=torch_dtype, ) - self.is_generation_model = is_generation_model - - if self.is_generation_model: + if self.is_generation: self.model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch_dtype, + trust_remote_code=False, low_cpu_mem_usage=True, ).cuda() else: @@ -107,7 +105,7 @@ def start_model_process( while True: prompts, max_new_tokens = in_queue.get() if prompts is not None: - if self.is_generation_model: + if self.is_generation: output_strs = [] prefill_logprobs = [] for p in prompts: @@ -171,17 +169,19 @@ def __init__( self, model_path, torch_dtype, - is_generation_model, + is_generation, tp_size=1, port=5157, ): - self.is_generation_model = is_generation_model + self.is_generation = is_generation self.runtime = Runtime( model_path=model_path, tp_size=tp_size, dtype=get_dtype_str(torch_dtype), port=port, mem_fraction_static=0.7, + trust_remote_code=False, + is_embedding=not self.is_generation, ) def forward( @@ -189,7 +189,7 @@ def forward( prompts: Union[List[str], List[torch.Tensor]] = DEFAULT_PROMPTS, max_new_tokens=8, ): - if self.is_generation_model: + if self.is_generation: # the return value contains logprobs from prefill output_strs = [] top_input_logprobs = [] diff --git a/test/srt/models/test_embedding_models.py b/test/srt/models/test_embedding_models.py index cc830f6257c..ecb3e7576ea 100644 --- a/test/srt/models/test_embedding_models.py +++ b/test/srt/models/test_embedding_models.py @@ -20,7 +20,10 @@ from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner from sglang.test.test_utils import get_similarities -MODELS = [("intfloat/e5-mistral-7b-instruct", 1, 0.2)] +MODELS = [ + ("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, 1e-5), + ("intfloat/e5-mistral-7b-instruct", 1, 1e-5), +] TORCH_DTYPES = [torch.float16] @@ -32,10 +35,10 @@ def assert_close_prefill_logits( model_path, tp_size, torch_dtype, - long_context_tolerance, + prefill_tolerance, ) -> None: with HFRunner( - model_path, torch_dtype=torch_dtype, is_generation_model=False + model_path, torch_dtype=torch_dtype, is_generation=False ) as hf_runner: hf_outputs = hf_runner.forward(prompts) @@ -43,11 +46,9 @@ def assert_close_prefill_logits( model_path, tp_size=tp_size, torch_dtype=torch_dtype, - is_generation_model=False, + is_generation=False, ) as srt_runner: - srt_outputs = srt_runner.forward( - prompts, - ) + srt_outputs = srt_runner.forward(prompts) for i in range(len(prompts)): hf_logits = torch.Tensor(hf_outputs.embed_logits[i]) @@ -57,18 +58,15 @@ def assert_close_prefill_logits( print("similarity diff", abs(similarity - 1)) if len(prompts[i]) <= 1000: - tolerance = 1e-5 - else: - tolerance = long_context_tolerance - assert torch.all( - abs(similarity - 1) < tolerance - ), "embeddings are not all close" + assert torch.all( + abs(similarity - 1) < prefill_tolerance + ), "embeddings are not all close" def test_prefill_logits(self): - for model, tp_size, long_context_tolerance in MODELS: + for model, tp_size, prefill_tolerance in MODELS: for torch_dtype in TORCH_DTYPES: self.assert_close_prefill_logits( - DEFAULT_PROMPTS, model, tp_size, torch_dtype, long_context_tolerance + DEFAULT_PROMPTS, model, tp_size, torch_dtype, prefill_tolerance ) diff --git a/test/srt/models/test_generation_models.py b/test/srt/models/test_generation_models.py index ba64907eae5..7e7e401d27c 100644 --- a/test/srt/models/test_generation_models.py +++ b/test/srt/models/test_generation_models.py @@ -20,12 +20,46 @@ from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner MODELS = [ - ("meta-llama/Meta-Llama-3.1-8B-Instruct", 1, 1.1), - ("google/gemma-2-2b", 1, 3), + ("meta-llama/Meta-Llama-3.1-8B-Instruct", 1, 1.1, 3e-2, 1), + ("google/gemma-2-2b", 1, 3, 3e-2, 1), + ("Alibaba-NLP/gte-Qwen2-1.5B-instruct", 1, None, 6e-2, 1), ] TORCH_DTYPES = [torch.float16] +def lcs(X, Y): + m = len(X) + n = len(Y) + L = [[0] * (n + 1) for _ in range(m + 1)] + + for i in range(m + 1): + for j in range(n + 1): + if i == 0 or j == 0: + L[i][j] = 0 + elif X[i - 1] == Y[j - 1]: + L[i][j] = L[i - 1][j - 1] + 1 + else: + L[i][j] = max(L[i - 1][j], L[i][j - 1]) + + return L[m][n] + + +def calculate_rouge_l(output_strs_list1, output_strs_list2): + rouge_l_scores = [] + + for s1, s2 in zip(output_strs_list1, output_strs_list2): + lcs_len = lcs(s1, s2) + precision = lcs_len / len(s1) if len(s1) > 0 else 0 + recall = lcs_len / len(s2) if len(s2) > 0 else 0 + if precision + recall > 0: + fmeasure = (2 * precision * recall) / (precision + recall) + else: + fmeasure = 0.0 + rouge_l_scores.append(fmeasure) + + return rouge_l_scores + + class TestGenerationModels(unittest.TestCase): def assert_close_prefill_logits_and_output_strs( @@ -35,10 +69,14 @@ def assert_close_prefill_logits_and_output_strs( tp_size, torch_dtype, max_new_tokens, + prefill_tolerance, + rouge_threshold, long_context_tolerance, ) -> None: + if model_path == "Alibaba-NLP/gte-Qwen2-1.5B-instruct": + prompts = prompts[:-1] with HFRunner( - model_path, torch_dtype=torch_dtype, is_generation_model=True + model_path, torch_dtype=torch_dtype, is_generation=True ) as hf_runner: hf_outputs = hf_runner.forward(prompts, max_new_tokens=max_new_tokens) @@ -46,7 +84,7 @@ def assert_close_prefill_logits_and_output_strs( model_path, tp_size=tp_size, torch_dtype=torch_dtype, - is_generation_model=True, + is_generation=True, ) as srt_runner: srt_outputs = srt_runner.forward(prompts, max_new_tokens=max_new_tokens) @@ -56,17 +94,34 @@ def assert_close_prefill_logits_and_output_strs( print("max_diff", torch.max(abs(hf_logprobs - srt_logprobs))) if hf_logprobs.shape[0] <= 100: - tolerance = 3e-2 assert torch.all( - abs(hf_logprobs - srt_logprobs) < tolerance + abs(hf_logprobs - srt_logprobs) < prefill_tolerance ), "prefill logprobs are not all close" print(hf_outputs.output_strs) print(srt_outputs.output_strs) - assert hf_outputs.output_strs == srt_outputs.output_strs + rouge_l_scores = calculate_rouge_l( + hf_outputs.output_strs, srt_outputs.output_strs + ) + assert all( + score >= rouge_threshold for score in rouge_l_scores + ), f"Not all ROUGE-L scores are greater than {rouge_threshold}" def test_prefill_logits_and_output_strs(self): - for model, tp_size, long_context_tolerance in MODELS: + import multiprocessing as mp + + try: + mp.set_start_method("spawn") + except RuntimeError: + pass + + for ( + model, + tp_size, + long_context_tolerance, + prefill_tolerance, + rouge_threshold, + ) in MODELS: for torch_dtype in TORCH_DTYPES: max_new_tokens = 8 self.assert_close_prefill_logits_and_output_strs( @@ -75,6 +130,8 @@ def test_prefill_logits_and_output_strs(self): tp_size, torch_dtype, max_new_tokens, + prefill_tolerance=prefill_tolerance, + rouge_threshold=rouge_threshold, long_context_tolerance=long_context_tolerance, ) diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 8a887912a0a..5a11c8ee0f3 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -5,6 +5,9 @@ suites = { "minimal": [ + "models/test_embedding_models.py", + "models/test_generation_models.py", + "sampling/penaltylib", "test_chunked_prefill.py", "test_embedding_openai_server.py", "test_eval_accuracy_mini.py", @@ -13,11 +16,8 @@ "test_skip_tokenizer_init.py", "test_torch_compile.py", "test_triton_attn_backend.py", - "test_vision_openai_server.py", "test_update_weights.py", - "models/test_generation_models.py", - "models/test_embedding_models.py", - "sampling/penaltylib", + "test_vision_openai_server.py", ], "sampling/penaltylib": glob.glob( "sampling/penaltylib/**/test_*.py", recursive=True From 902278008a6e5cf0f054c0b6ce4ba0cc64ce7437 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 25 Aug 2024 14:46:34 -0700 Subject: [PATCH 13/88] [Minor] Improve the function organization in TokenizerManager & improve loggers (#1208) --- docs/en/hyperparameter_tuning.md | 2 +- python/sglang/srt/hf_transformers_utils.py | 11 -- .../sglang/srt/managers/controller_multi.py | 7 +- .../sglang/srt/managers/controller_single.py | 14 +- .../srt/managers/detokenizer_manager.py | 20 ++- .../sglang/srt/managers/tokenizer_manager.py | 156 +++++++++--------- python/sglang/srt/managers/tp_worker.py | 14 +- .../sglang/srt/model_executor/model_runner.py | 17 +- python/sglang/srt/openai_api/adapter.py | 4 +- python/sglang/srt/server.py | 8 +- python/sglang/srt/server_args.py | 2 +- python/sglang/srt/utils.py | 16 +- 12 files changed, 137 insertions(+), 134 deletions(-) diff --git a/docs/en/hyperparameter_tuning.md b/docs/en/hyperparameter_tuning.md index 02a0657c3f0..f2bf9d55f3d 100644 --- a/docs/en/hyperparameter_tuning.md +++ b/docs/en/hyperparameter_tuning.md @@ -6,7 +6,7 @@ Achieving a large batch size is the most important thing for attaining high thro When the server is running at full load, look for the following in the log: -```[gpu=0] Decode batch. #running-req: 233, #token: 370959, token usage: 0.82, gen throughput (token/s): 4594.01, #queue-req: 417``` +```Decode batch. #running-req: 233, #token: 370959, token usage: 0.82, gen throughput (token/s): 4594.01, #queue-req: 417``` ### Tune Your Request Submission Speed `#queue-req` indicates the number of requests in the queue. If you frequently see `#queue-req == 0`, it suggests you are bottlenecked by the request submission speed. diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py index 525d2954399..4f6e3d07157 100644 --- a/python/sglang/srt/hf_transformers_utils.py +++ b/python/sglang/srt/hf_transformers_utils.py @@ -142,17 +142,6 @@ def get_tokenizer( raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.") kwargs["use_fast"] = False - if ( - "llama" in tokenizer_name.lower() - and kwargs.get("use_fast", True) - and tokenizer_name != _FAST_LLAMA_TOKENIZER - ): - warnings.warn( - "For some LLaMA V1 models, initializing the fast tokenizer may " - "take a long time. To reduce the initialization time, consider " - f"using '{_FAST_LLAMA_TOKENIZER}' instead of the original " - "tokenizer." - ) try: tokenizer = AutoTokenizer.from_pretrained( tokenizer_name, diff --git a/python/sglang/srt/managers/controller_multi.py b/python/sglang/srt/managers/controller_multi.py index 38229cd4660..d2b10e7fa29 100644 --- a/python/sglang/srt/managers/controller_multi.py +++ b/python/sglang/srt/managers/controller_multi.py @@ -35,7 +35,7 @@ TokenizedGenerateReqInput, ) from sglang.srt.server_args import PortArgs, ServerArgs -from sglang.srt.utils import kill_parent_process +from sglang.srt.utils import configure_logger, kill_parent_process from sglang.utils import get_exception_traceback logger = logging.getLogger(__name__) @@ -193,10 +193,7 @@ def start_controller_process( ): """Start a controller process.""" - logging.basicConfig( - level=getattr(logging, server_args.log_level.upper()), - format="%(message)s", - ) + configure_logger(server_args) try: controller = ControllerMulti(server_args, port_args, model_overide_args) diff --git a/python/sglang/srt/managers/controller_single.py b/python/sglang/srt/managers/controller_single.py index 422db943f6b..4a16a6f6e43 100644 --- a/python/sglang/srt/managers/controller_single.py +++ b/python/sglang/srt/managers/controller_single.py @@ -27,7 +27,7 @@ launch_tp_servers, ) from sglang.srt.server_args import PortArgs, ServerArgs -from sglang.srt.utils import kill_parent_process +from sglang.srt.utils import configure_logger, kill_parent_process from sglang.utils import get_exception_traceback logger = logging.getLogger(__name__) @@ -52,7 +52,7 @@ def __init__( self.dp_worker_id = dp_worker_id self.mp_queue = mp_queue - # Init communication + # Init inter-process communication context = zmq.Context(2) if not self.is_dp_worker: @@ -133,11 +133,11 @@ def start_controller_process( queue: multiprocessing.connection.Connection = None, ): """Start a controller process.""" - - logging.basicConfig( - level=getattr(logging, server_args.log_level.upper()), - format="%(message)s", - ) + if is_data_parallel_worker: + logger_prefix = f" DP{dp_worker_id} TP0" + else: + logger_prefix = " TP0" + configure_logger(server_args, prefix=logger_prefix) if not is_data_parallel_worker: tp_size_local = server_args.tp_size // server_args.nnodes diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py index 9a4306372b1..cd5f63125cb 100644 --- a/python/sglang/srt/managers/detokenizer_manager.py +++ b/python/sglang/srt/managers/detokenizer_manager.py @@ -56,6 +56,7 @@ def __init__( server_args: ServerArgs, port_args: PortArgs, ): + # Init inter-process communication context = zmq.asyncio.Context(2) self.recv_from_router = context.socket(zmq.PULL) self.recv_from_router.bind(f"tcp://127.0.0.1:{port_args.detokenizer_port}") @@ -75,10 +76,13 @@ def __init__( self.decode_status = {} async def handle_loop(self): + """The event loop that handles requests""" + while True: - recv_obj: BatchTokenIDOut = await self.recv_from_router.recv_pyobj() + recv_obj = await self.recv_from_router.recv_pyobj() if isinstance(recv_obj, BatchEmbeddingOut): + # If it is embedding model, no detokenization is needed. self.send_to_tokenizer.send_pyobj( BatchEmbeddingOut( rids=recv_obj.rids, @@ -88,19 +92,18 @@ async def handle_loop(self): ) ) continue - - if isinstance(recv_obj, UpdateWeightReqOutput): + elif isinstance(recv_obj, UpdateWeightReqOutput): + # If it is a weight update request, no detokenization is needed. + self.send_to_tokenizer.send_pyobj(recv_obj) + continue + elif self.tokenizer is None: + # If the tokenizer is skipped, no detokenization is needed self.send_to_tokenizer.send_pyobj(recv_obj) continue assert isinstance(recv_obj, BatchTokenIDOut) bs = len(recv_obj.rids) - if self.tokenizer is None: - # Send BatchTokenIDOut if no tokenizer init'ed. - self.send_to_tokenizer.send_pyobj(recv_obj) - continue - # Initialize decode status read_ids, surr_ids = [], [] for i in range(bs): @@ -134,6 +137,7 @@ async def handle_loop(self): spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0], ) + # Incremental decoding output_strs = [] for i in range(bs): s = self.decode_status[recv_obj.rids[i]] diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 4008a093add..199ea7c3a65 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -21,7 +21,7 @@ import logging import multiprocessing as mp import os -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union import numpy as np import transformers @@ -80,6 +80,7 @@ def __init__( ): self.server_args = server_args + # Init inter-process communication context = zmq.asyncio.Context(2) self.recv_from_detokenizer = context.socket(zmq.PULL) self.recv_from_detokenizer.bind(f"tcp://127.0.0.1:{port_args.tokenizer_port}") @@ -87,6 +88,7 @@ def __init__( self.send_to_router = context.socket(zmq.PUSH) self.send_to_router.connect(f"tcp://127.0.0.1:{port_args.controller_port}") + # Read model args self.model_path = server_args.model_path self.served_model_name = server_args.served_model_name self.hf_config = get_config( @@ -104,6 +106,7 @@ def __init__( else: self.context_len = get_context_length(self.hf_config) + # Create tokenizer if server_args.skip_tokenizer_init: self.tokenizer = self.processor = None else: @@ -127,6 +130,7 @@ def __init__( trust_remote_code=server_args.trust_remote_code, ) + # Store states self.to_create_loop = True self.rid_to_state: Dict[str, ReqState] = {} @@ -134,63 +138,6 @@ def __init__( self.model_update_lock = asyncio.Lock() self.model_update_result = None - async def get_pixel_values(self, image_data, aspect_ratio=None): - aspect_ratio = ( - getattr(self.hf_config, "image_aspect_ratio", None) - if aspect_ratio is None - else aspect_ratio - ) - grid_pinpoints = ( - self.hf_config.image_grid_pinpoints - if hasattr(self.hf_config, "image_grid_pinpoints") - and "anyres" in aspect_ratio - else None - ) - - if isinstance(image_data, list) and len(image_data) > 0: - pixel_values, image_hash, image_size = [], [], [] - if len(image_data) > 1: - aspect_ratio = "pad" # LLaVA OneVision Handling: more than one image --> interleaved image mode or video mode. We do not use anyres - for img_data in image_data: - pixel_v, image_h, image_s = await self._process_single_image( - img_data, aspect_ratio, grid_pinpoints - ) - pixel_values.append(pixel_v) - image_hash.append(image_h) - image_size.append(image_s) - pixel_values = np.stack(pixel_values, axis=0) - else: - pixel_values, image_hash, image_size = await self._process_single_image( - image_data[0], aspect_ratio, grid_pinpoints - ) - image_hash = [image_hash] - image_size = [image_size] - elif isinstance(image_data, str): - pixel_values, image_hash, image_size = await self._process_single_image( - image_data, aspect_ratio, grid_pinpoints - ) - image_hash = [image_hash] - image_size = [image_size] - else: - pixel_values, image_hash, image_size = None, None, None - - return pixel_values, image_hash, image_size - - async def _process_single_image(self, image_data, aspect_ratio, grid_pinpoints): - if self.executor is not None: - loop = asyncio.get_event_loop() - return await loop.run_in_executor( - self.executor, - get_pixel_values, - image_data, - aspect_ratio, - grid_pinpoints, - ) - else: - return get_pixel_values( - image_data, aspect_ratio, grid_pinpoints, self.processor - ) - async def generate_request( self, obj: Union[GenerateReqInput, EmbeddingReqInput], request=None ): @@ -198,7 +145,7 @@ async def generate_request( self.create_handle_loop() while self.model_update_lock.locked(): - await asyncio.sleep(0) + await asyncio.sleep(0.001) obj.post_init() is_single = obj.is_single @@ -214,8 +161,8 @@ async def _handle_single_request( self, obj: Union[GenerateReqInput, EmbeddingReqInput], request, - index=None, - is_cache_for_prefill=False, + index: Optional[int] = None, + is_cache_for_prefill: Optional[bool] = False, ): if not is_cache_for_prefill: # The normal case with a single prompt not_use_index = index is None @@ -235,7 +182,7 @@ async def _handle_single_request( ) if self.is_generation: - pixel_values, image_hash, image_size = await self.get_pixel_values( + pixel_values, image_hash, image_size = await self._get_pixel_values( obj.image_data ) return_logprob = ( @@ -345,7 +292,7 @@ async def _handle_batch_request( parallel_sample_num = obj.parallel_sample_num if parallel_sample_num != 1: - # Send prefill requests to cache the common input + # Send prefill requests to cache the common prefix parallel_sample_num += 1 input_id_result = [] if obj.input_ids is None else None for i in range(batch_size): @@ -436,7 +383,6 @@ async def _handle_batch_request( ) # Then process the responses based on streaming option - is_stream = hasattr(obj, "stream") and obj.stream tasks = [asyncio.create_task(gen.__anext__()) for gen in generators] @@ -482,9 +428,9 @@ def _get_sampling_params(self, sampling_params_data: dict): async def _get_pixel_values(self, image_data): if isinstance(image_data, list) and len(image_data) > 0: - return await self.get_pixel_values(image_data[0]) + return await self._get_pixel_values_internal(image_data[0]) elif isinstance(image_data, str): - return await self.get_pixel_values(image_data) + return await self._get_pixel_values_internal(image_data) else: return None, None, None @@ -563,6 +509,13 @@ def flush_cache(self): req = FlushCacheReq() self.send_to_router.send_pyobj(req) + def abort_request(self, rid: str): + if rid not in self.rid_to_state: + return + del self.rid_to_state[rid] + req = AbortReq(rid) + self.send_to_router.send_pyobj(req) + async def update_weights(self, obj: UpdateWeightReqInput, request): if self.to_create_loop: self.create_handle_loop() @@ -587,13 +540,6 @@ async def update_weights(self, obj: UpdateWeightReqInput, request): else: return False, "Another update is in progress. Please try again later." - def abort_request(self, rid: str): - if rid not in self.rid_to_state: - return - del self.rid_to_state[rid] - req = AbortReq(rid) - self.send_to_router.send_pyobj(req) - def create_abort_task(self, obj: GenerateReqInput): # Abort the request if the client is disconnected. async def abort_request(): @@ -617,6 +563,8 @@ def create_handle_loop(self): loop.create_task(self.handle_loop()) async def handle_loop(self): + """The event loop that handles requests""" + while True: recv_obj: Union[ BatchStrOut, BatchEmbeddingOut, BatchTokenIDOut, UpdateWeightReqOutput @@ -713,11 +661,69 @@ def detokenize_top_logprobs_tokens(self, top_logprobs, decode_to_text: bool): ) return top_logprobs + async def _get_pixel_values_internal(self, image_data, aspect_ratio=None): + aspect_ratio = ( + getattr(self.hf_config, "image_aspect_ratio", None) + if aspect_ratio is None + else aspect_ratio + ) + grid_pinpoints = ( + self.hf_config.image_grid_pinpoints + if hasattr(self.hf_config, "image_grid_pinpoints") + and "anyres" in aspect_ratio + else None + ) + + if isinstance(image_data, list) and len(image_data) > 0: + pixel_values, image_hash, image_size = [], [], [] + if len(image_data) > 1: + aspect_ratio = "pad" # LLaVA OneVision Handling: more than one image --> interleaved image mode or video mode. We do not use anyres + for img_data in image_data: + pixel_v, image_h, image_s = await self._process_single_image( + img_data, aspect_ratio, grid_pinpoints + ) + pixel_values.append(pixel_v) + image_hash.append(image_h) + image_size.append(image_s) + pixel_values = np.stack(pixel_values, axis=0) + else: + pixel_values, image_hash, image_size = await self._process_single_image( + image_data[0], aspect_ratio, grid_pinpoints + ) + image_hash = [image_hash] + image_size = [image_size] + elif isinstance(image_data, str): + pixel_values, image_hash, image_size = await self._process_single_image( + image_data, aspect_ratio, grid_pinpoints + ) + image_hash = [image_hash] + image_size = [image_size] + else: + pixel_values, image_hash, image_size = None, None, None + + return pixel_values, image_hash, image_size + + async def _process_single_image(self, image_data, aspect_ratio, grid_pinpoints): + if self.executor is not None: + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + self.executor, + _process_single_image_task, + image_data, + aspect_ratio, + grid_pinpoints, + ) + else: + return _process_single_image_task( + image_data, aspect_ratio, grid_pinpoints, self.processor + ) + global global_processor def init_global_processor(server_args: ServerArgs): + """Init the global processor for multi modal models.""" global global_processor transformers.logging.set_verbosity_error() global_processor = get_processor( @@ -727,7 +733,7 @@ def init_global_processor(server_args: ServerArgs): ) -def get_pixel_values( +def _process_single_image_task( image_data, image_aspect_ratio=None, image_grid_pinpoints=None, processor=None ): try: @@ -759,4 +765,4 @@ def get_pixel_values( pixel_values = pixel_values.astype(np.float16) return pixel_values, image_hash, image.size except Exception: - print("Exception in TokenizerManager:\n" + get_exception_traceback()) + logger.error("Exception in TokenizerManager:\n" + get_exception_traceback()) diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 19edc23b836..9820e0302e1 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -56,6 +56,7 @@ from sglang.srt.model_executor.model_runner import ModelRunner from sglang.srt.server_args import ServerArgs from sglang.srt.utils import ( + configure_logger, is_multimodal_model, set_random_seed, suppress_other_loggers, @@ -145,7 +146,6 @@ def __init__( # Print info logger.info( - f"[gpu={self.gpu_id}] " f"max_total_num_tokens={self.max_total_num_tokens}, " f"max_prefill_tokens={self.max_prefill_tokens}, " f"max_running_requests={self.max_running_requests}, " @@ -284,7 +284,7 @@ def print_decode_stats(self): self.num_generated_tokens = 0 self.last_stats_tic = time.time() logger.info( - f"[gpu={self.gpu_id}] Decode batch. " + f"Decode batch. " f"#running-req: {len(self.running_batch.reqs)}, " f"#token: {num_used}, " f"token usage: {num_used / self.max_total_num_tokens:.2f}, " @@ -443,7 +443,7 @@ def get_new_prefill_batch(self) -> Optional[ScheduleBatch]: if num_mixed_running > 0: logger.info( - f"[gpu={self.gpu_id}] Prefill batch" + f"Prefill batch" f"(mixed #running-req: {num_mixed_running}). " f"#new-seq: {len(can_run_list)}, " f"#new-token: {adder.log_input_tokens}, " @@ -453,7 +453,7 @@ def get_new_prefill_batch(self) -> Optional[ScheduleBatch]: ) else: logger.info( - f"[gpu={self.gpu_id}] Prefill batch. " + f"Prefill batch. " f"#new-seq: {len(can_run_list)}, " f"#new-token: {adder.log_input_tokens}, " f"#cached-token: {adder.log_hit_tokens}, " @@ -631,7 +631,7 @@ def forward_decode_batch(self, batch: ScheduleBatch): self.new_token_ratio = new_token_ratio logger.info( - "decode out of memory happened, " + "Decode out of memory happened. " f"#retracted_reqs: {len(retracted_reqs)}, " f"#new_token_ratio: {old_ratio:.4f} -> {self.new_token_ratio:.4f}" ) @@ -848,7 +848,9 @@ def run_tp_server( nccl_port: int, model_overide_args: dict, ): - """Run a tensor parallel server.""" + """Run a tensor parallel model server.""" + configure_logger(server_args, prefix=f" TP{tp_rank}") + try: model_server = ModelTpServer( gpu_id, diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 6b48d1f90ee..fa55abba671 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -109,7 +109,7 @@ def __init__( def init_torch_distributed(self): # Init torch distributed torch.cuda.set_device(self.gpu_id) - logger.info(f"[gpu={self.gpu_id}] Init nccl begin.") + logger.info("Init nccl begin.") if not self.server_args.enable_p2p_check: monkey_patch_vllm_p2p_access_check(self.gpu_id) @@ -152,8 +152,7 @@ def init_torch_distributed(self): def load_model(self): logger.info( - f"[gpu={self.gpu_id}] Load weight begin. " - f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB" + f"Load weight begin. avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB" ) if torch.cuda.get_device_capability()[0] < 8: logger.info( @@ -208,7 +207,7 @@ def load_model(self): ) logger.info( - f"[gpu={self.gpu_id}] Load weight end. " + f"Load weight end. " f"type={type(self.model).__name__}, " f"dtype={self.dtype}, " f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB" @@ -224,7 +223,7 @@ def update_weights(self, model_path: str, load_format: str): from vllm.model_executor.model_loader.utils import set_default_torch_dtype logger.info( - f"[gpu={self.gpu_id}] Update weights begin. " + f"Update weights begin. " f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB" ) @@ -298,7 +297,7 @@ def model_load_weights(model, iter): self.load_config = load_config self.model_config.path = model_path - logger.info(f"[gpu={self.gpu_id}] Update weights end.") + logger.info("Update weights end.") return True, "Succeeded to update model weights" def profile_max_num_token(self, total_gpu_memory: int): @@ -387,7 +386,7 @@ def init_memory_pool( layer_num=self.model_config.num_hidden_layers, ) logger.info( - f"[gpu={self.gpu_id}] Memory pool end. " + f"Memory pool end. " f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB" ) @@ -473,9 +472,7 @@ def init_cuda_graphs(self): self.cuda_graph_runner = None return - logger.info( - f"[gpu={self.gpu_id}] Capture cuda graph begin. This can take up to several minutes." - ) + logger.info("Capture cuda graph begin. This can take up to several minutes.") if self.server_args.disable_cuda_graph_padding: batch_size_list = list(range(1, 32)) + [64, 128] diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 582457ae049..f325e84b2f9 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -123,7 +123,7 @@ def create_streaming_error_response( def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg): global chat_template_name - print(f"Use chat template: {chat_template_arg}") + logger.info(f"Use chat template: {chat_template_arg}") if not chat_template_exists(chat_template_arg): if not os.path.exists(chat_template_arg): raise RuntimeError( @@ -355,7 +355,7 @@ async def process_batch(tokenizer_manager, batch_id: str, batch_request: BatchRe } except Exception as e: - print("error in SGLang:", e) + logger.error("error in SGLang:", e) # Update batch status to "failed" retrieve_batch = batch_storage[batch_id] retrieve_batch.status = "failed" diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 813f2de7827..021f231aa7c 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -74,6 +74,7 @@ add_api_key_middleware, allocate_init_ports, assert_pkg_version, + configure_logger, enable_show_time_cost, kill_child_process, maybe_set_triton_cache_manager, @@ -270,15 +271,12 @@ def launch_server( """Launch an HTTP server.""" global tokenizer_manager - logging.basicConfig( - level=getattr(logging, server_args.log_level.upper()), - format="%(message)s", - ) + configure_logger(server_args) server_args.check_server_args() _set_envs_and_config(server_args) - # Allocate ports + # Allocate ports for inter-process communications server_args.port, server_args.additional_ports = allocate_init_ports( server_args.port, server_args.additional_ports, diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 58e24dab8b7..ca27f974829 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -418,7 +418,7 @@ def add_cli_args(parser: argparse.ArgumentParser): parser.add_argument( "--enable-mixed-chunk", action="store_true", - help="Enabling mixing prefill and decode in a chunked batch.", + help="Enabling mixing prefill and decode in a batch when using chunked prefill.", ) parser.add_argument( "--enable-torch-compile", diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index 102dcb3d87e..a6e710009f2 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -692,7 +692,7 @@ def weight_loader_srt( setattr(QKVParallelLinear, "weight_loader", weight_loader_srt) -def add_api_key_middleware(app, api_key): +def add_api_key_middleware(app, api_key: str): @app.middleware("http") async def authentication(request, call_next): if request.method == "OPTIONS": @@ -704,7 +704,7 @@ async def authentication(request, call_next): return await call_next(request) -def prepare_model(model_path): +def prepare_model(model_path: str): if "SGLANG_USE_MODELSCOPE" in os.environ: if not os.path.exists(model_path): from modelscope import snapshot_download @@ -713,7 +713,7 @@ def prepare_model(model_path): return model_path -def prepare_tokenizer(tokenizer_path): +def prepare_tokenizer(tokenizer_path: str): if "SGLANG_USE_MODELSCOPE" in os.environ: if not os.path.exists(tokenizer_path): from modelscope import snapshot_download @@ -722,3 +722,13 @@ def prepare_tokenizer(tokenizer_path): tokenizer_path, ignore_patterns=["*.bin", "*.safetensors"] ) return tokenizer_path + + +def configure_logger(server_args, prefix: str = ""): + format = f"[%(asctime)s{prefix}] %(message)s" + logging.basicConfig( + level=getattr(logging, server_args.log_level.upper()), + format=format, + datefmt="%H:%M:%S", + force=True, + ) From ab4990e4bfd79fe60815a3f872c5857df57798bb Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sun, 25 Aug 2024 14:49:23 -0700 Subject: [PATCH 14/88] [Minor] Temporarily skip flaky test (#1209) --- test/srt/run_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 5a11c8ee0f3..e8edbb55009 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -6,7 +6,7 @@ suites = { "minimal": [ "models/test_embedding_models.py", - "models/test_generation_models.py", + # "models/test_generation_models.py", "sampling/penaltylib", "test_chunked_prefill.py", "test_embedding_openai_server.py", From 308d024092d8a671998b978f419dd40262bef9b5 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sun, 25 Aug 2024 16:21:37 -0700 Subject: [PATCH 15/88] [CI] Fix the issue of unit test hanging (#1211) --- python/sglang/test/test_utils.py | 17 +++++++++-------- test/srt/models/test_embedding_models.py | 6 ++++++ test/srt/models/test_generation_models.py | 13 ++++++------- test/srt/run_suite.py | 6 ++++++ 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index 9f6aa68ab12..ac19d937038 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -460,24 +460,25 @@ def _target_func(): return ret_value[0] +def run_one_file(filename, out_queue): + print(f"\n\nRun {filename}\n\n") + ret = unittest.main(module=None, argv=["", "-vb"] + [filename]) + + def run_unittest_files(files: List[str], timeout_per_file: float): tic = time.time() success = True for filename in files: + out_queue = multiprocessing.Queue() + p = multiprocessing.Process(target=run_one_file, args=(filename, out_queue)) - def func(): - print(f"\n\nRun {filename}\n\n") - ret = unittest.main(module=None, argv=["", "-vb"] + [filename]) - - p = multiprocessing.Process(target=func) - - def run_one_file(): + def run_process(): p.start() p.join() try: - run_with_timeout(run_one_file, timeout=timeout_per_file) + run_with_timeout(run_process, timeout=timeout_per_file) if p.exitcode != 0: success = False break diff --git a/test/srt/models/test_embedding_models.py b/test/srt/models/test_embedding_models.py index ecb3e7576ea..8a43255b7ff 100644 --- a/test/srt/models/test_embedding_models.py +++ b/test/srt/models/test_embedding_models.py @@ -13,6 +13,7 @@ limitations under the License. """ +import multiprocessing as mp import unittest import torch @@ -71,4 +72,9 @@ def test_prefill_logits(self): if __name__ == "__main__": + try: + mp.set_start_method("spawn") + except RuntimeError: + pass + unittest.main(warnings="ignore") diff --git a/test/srt/models/test_generation_models.py b/test/srt/models/test_generation_models.py index 7e7e401d27c..4e49c0a5b1c 100644 --- a/test/srt/models/test_generation_models.py +++ b/test/srt/models/test_generation_models.py @@ -13,6 +13,7 @@ limitations under the License. """ +import multiprocessing as mp import unittest import torch @@ -108,13 +109,6 @@ def assert_close_prefill_logits_and_output_strs( ), f"Not all ROUGE-L scores are greater than {rouge_threshold}" def test_prefill_logits_and_output_strs(self): - import multiprocessing as mp - - try: - mp.set_start_method("spawn") - except RuntimeError: - pass - for ( model, tp_size, @@ -137,4 +131,9 @@ def test_prefill_logits_and_output_strs(self): if __name__ == "__main__": + try: + mp.set_start_method("spawn") + except RuntimeError: + pass + unittest.main(warnings="ignore") diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index e8edbb55009..3756d3ddfb1 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -1,5 +1,6 @@ import argparse import glob +import multiprocessing as mp from sglang.test.test_utils import run_unittest_files @@ -54,5 +55,10 @@ else: files = suites[args.suite] + try: + mp.set_start_method("spawn") + except RuntimeError: + pass + exit_code = run_unittest_files(files, args.timeout_per_file) exit(exit_code) From 15f1a49d2dcbd488155de373e7fcf854f29a7de8 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 25 Aug 2024 16:43:07 -0700 Subject: [PATCH 16/88] Update CI workflows (#1210) --- .github/workflows/accuracy-test.yml | 7 +-- .github/workflows/e2e-test.yml | 11 ++-- .github/workflows/moe-test.yml | 50 +++++++++---------- .github/workflows/unit-test.yml | 9 ++-- python/sglang/test/runners.py | 3 +- python/sglang/test/test_utils.py | 1 + .../test_srt_endpoint_with_penalizers.py | 8 ++- test/srt/test_vision_openai_server.py | 2 +- 8 files changed, 43 insertions(+), 48 deletions(-) diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml index 16bb584f4aa..b60a9c6d480 100644 --- a/.github/workflows/accuracy-test.yml +++ b/.github/workflows/accuracy-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: accuracy-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: accuracy + runs-on: accuracy-test steps: - name: Checkout code @@ -28,9 +28,6 @@ jobs: - name: Install dependencies run: | - source $HOME/venv/bin/activate - echo "$HOME/venv/bin" >> $GITHUB_PATH - pip install --upgrade pip pip install -e "python[all]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall @@ -40,7 +37,7 @@ jobs: pip install -e . - name: Evaluate Accuracy + timeout-minutes: 20 run: | cd test/srt python3 test_eval_accuracy_large.py - timeout-minutes: 20 diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index ad271c37edb..8d33870411b 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: e2e-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: e2e + runs-on: e2e-test steps: - name: Checkout code @@ -28,27 +28,24 @@ jobs: - name: Install dependencies run: | - source $HOME/venv/bin/activate - echo "$HOME/venv/bin" >> $GITHUB_PATH - pip install --upgrade pip pip install -e "python[all]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - name: Benchmark Serving Throughput + timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default - timeout-minutes: 10 - name: Benchmark Serving Throughput (w/o RadixAttention) + timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache - timeout-minutes: 10 - name: Benchmark Serving Throughput (w/o ChunkedPrefill) + timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill - timeout-minutes: 10 diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml index 51f7d022614..dd5665a3f5e 100644 --- a/.github/workflows/moe-test.yml +++ b/.github/workflows/moe-test.yml @@ -18,30 +18,28 @@ concurrency: cancel-in-progress: true jobs: - moe-test: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: accuracy - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - source $HOME/venv/bin/activate - echo "$HOME/venv/bin" >> $GITHUB_PATH - - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + moe-test: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: moe-test - - name: Benchmark MOE Serving Throughput - uses: nick-fields/retry@v3 - with: - timeout_minutes: 15 - max_attempts: 2 - retry_on: error - command: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Benchmark MoE Serving Throughput + timeout_minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default + + - name: Benchmark MoE Serving Throughput (w/o RadixAttention) + timeout_minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 607cb865db3..e2d7951be4a 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: unit-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: unit + runs-on: unit-test steps: - name: Checkout code @@ -28,9 +28,6 @@ jobs: - name: Install dependencies run: | - source $HOME/venv/bin/activate - echo "$HOME/venv/bin" >> $GITHUB_PATH - pip install --upgrade pip pip install -e "python[all]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall @@ -38,13 +35,13 @@ jobs: pip install sentence_transformers - name: Test Backend Runtime + timeout-minutes: 20 run: | cd test/srt python3 run_suite.py --suite minimal - timeout-minutes: 20 - name: Test Frontend Language + timeout-minutes: 10 run: | cd test/lang python3 run_suite.py --suite minimal - timeout-minutes: 10 diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 9a5bd4fd598..37ed2cf9adc 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -24,6 +24,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from sglang.srt.server import Runtime +from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER DEFAULT_PROMPTS = [ # the output of gemma-2-2b from SRT is unstable on the commented prompt @@ -171,7 +172,7 @@ def __init__( torch_dtype, is_generation, tp_size=1, - port=5157, + port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER, ): self.is_generation = is_generation self.runtime = Runtime( diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index ac19d937038..3389e619c92 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -25,6 +25,7 @@ DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1" if os.getenv("SGLANG_IS_IN_CI", "false") == "true": + DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157 DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157" DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157" DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157" diff --git a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py index e72dc30f956..4e91f723551 100644 --- a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py +++ b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py @@ -5,7 +5,11 @@ import requests from sglang.srt.utils import kill_child_process -from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server +from sglang.test.test_utils import ( + DEFAULT_MODEL_NAME_FOR_TEST, + DEFAULT_URL_FOR_UNIT_TEST, + popen_launch_server, +) class TestBatchPenalizerE2E(unittest.TestCase): @@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = f"http://127.0.0.1:{8157}" + cls.base_url = DEFAULT_URL_FOR_UNIT_TEST cls.process = popen_launch_server( cls.model, cls.base_url, diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index 0f136fe6e51..48157b8db4c 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -67,7 +67,7 @@ def test_chat_completion(self): assert response.choices[0].message.role == "assistant" text = response.choices[0].message.content assert isinstance(text, str) - assert "car" in text or "taxi" in text, text + assert "logo" in text, text assert response.id assert response.created assert response.usage.prompt_tokens > 0 From 61bb223e0fc1ccd0c26ac3137f0d9154bcecc25a Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 25 Aug 2024 17:31:52 -0700 Subject: [PATCH 17/88] Update CI runner docs (#1213) --- .github/workflows/moe-test.yml | 4 +- docs/en/setup_github_runner.md | 101 +++++++++------------------------ 2 files changed, 30 insertions(+), 75 deletions(-) diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml index dd5665a3f5e..2caa3d1820e 100644 --- a/.github/workflows/moe-test.yml +++ b/.github/workflows/moe-test.yml @@ -33,13 +33,13 @@ jobs: pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - name: Benchmark MoE Serving Throughput - timeout_minutes: 10 + timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default - name: Benchmark MoE Serving Throughput (w/o RadixAttention) - timeout_minutes: 10 + timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache diff --git a/docs/en/setup_github_runner.md b/docs/en/setup_github_runner.md index 97a7f26266b..282e12b7362 100644 --- a/docs/en/setup_github_runner.md +++ b/docs/en/setup_github_runner.md @@ -1,89 +1,44 @@ -# Set up self hosted runner for GitHub Action +# Set Up Self-hosted Runners for GitHub Action -## Config Runner +## Add a Runner -```bash -# https://github.com/sgl-project/sglang/settings/actions/runners/new?arch=x64&os=linux -# Involves some TOKEN and other private information, click the link to view specific steps. -``` +### Step 1: Start a docker container. -## Start Runner +You can mount a folder for the shared huggingface model weights cache. The command below uses `/tmp/huggingface` as an example. -add `/lib/systemd/system/e2e.service` ``` -[Unit] -StartLimitIntervalSec=0 -[Service] -Environment="CUDA_VISIBLE_DEVICES=7" -Environment="XDG_CACHE_HOME=/data/.cache" -Environment="HF_TOKEN=hf_xx" -Environment="OPENAI_API_KEY=sk-xx" -Environment="HOME=/data/zhyncs/runner-v1" -Environment="SGLANG_IS_IN_CI=true" -Restart=always -RestartSec=1 -ExecStart=/data/zhyncs/runner-v1/actions-runner/run.sh -[Install] -WantedBy=multi-user.target +docker pull nvidia/cuda:12.1.1-devel-ubuntu22.04 +docker run --shm-size 64g -it -v /tmp/huggingface:/hf_home --gpus all nvidia/cuda:12.1.1-devel-ubuntu22.04 /bin/bash ``` -add `/lib/systemd/system/unit.service` -``` -[Unit] -StartLimitIntervalSec=0 -[Service] -Environment="CUDA_VISIBLE_DEVICES=6" -Environment="XDG_CACHE_HOME=/data/.cache" -Environment="HF_TOKEN=hf_xx" -Environment="OPENAI_API_KEY=sk-xx" -Environment="HOME=/data/zhyncs/runner-v2" -Environment="SGLANG_IS_IN_CI=true" -Restart=always -RestartSec=1 -ExecStart=/data/zhyncs/runner-v2/actions-runner/run.sh -[Install] -WantedBy=multi-user.target -``` +### Step 2: Configure the runner by `config.sh` + +Run these commands inside the container. -add `/lib/systemd/system/accuracy.service` ``` -[Unit] -StartLimitIntervalSec=0 -[Service] -Environment="CUDA_VISIBLE_DEVICES=5" -Environment="XDG_CACHE_HOME=/data/.cache" -Environment="HF_TOKEN=hf_xx" -Environment="OPENAI_API_KEY=sk-xx" -Environment="HOME=/data/zhyncs/runner-v3" -Environment="SGLANG_IS_IN_CI=true" -Restart=always -RestartSec=1 -ExecStart=/data/zhyncs/runner-v3/actions-runner/run.sh -[Install] -WantedBy=multi-user.target +apt update && apt install -y curl python3-pip git +export RUNNER_ALLOW_RUNASROOT=1 ``` -```bash -cd /data/zhyncs/runner-v1 -python3 -m venv venv +Then follow https://github.com/sgl-project/sglang/settings/actions/runners/new?arch=x64&os=linux to run `config.sh` -cd /data/zhyncs/runner-v2 -python3 -m venv venv +**Notes** +- Do not need to specify the runner group +- Give it a name (e.g., `test-sgl-gpu-0`) and some labels (e.g., `unit-test`). The labels can be editted later in Github Settings. +- Do not need to change the work folder. -cd /data/zhyncs/runner-v3 -python3 -m venv venv +### Step 3: Run the runner by `run.sh` -sudo systemctl daemon-reload - -sudo systemctl start e2e -sudo systemctl enable e2e -sudo systemctl status e2e - -sudo systemctl start unit -sudo systemctl enable unit -sudo systemctl status unit +- Set up environment variables +``` +export HF_HOME=/hf_home +export SGLANG_IS_IN_CI=true +export HF_TOKEN=hf_xxx +export OPENAI_API_KEY=sk-xxx +export CUDA_VISIBLE_DEVICES=0 +``` -sudo systemctl start accuracy -sudo systemctl enable accuracy -sudo systemctl status accuracy +- Run it forever ``` +while true; do ./run.sh; echo "Restarting..."; sleep 2; done +``` \ No newline at end of file From 2c615d120fa5da4ff6b88f59ca7656b8d595ccd0 Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Mon, 26 Aug 2024 08:38:11 +0800 Subject: [PATCH 18/88] [Feature] Support fp8 e5m2 kv cache with flashinfer (#1204) Co-authored-by: Yineng Zhang --- python/sglang/srt/layers/radix_attention.py | 7 +- python/sglang/srt/mem_cache/memory_pool.py | 90 +++++++++++++++++-- .../srt/model_executor/forward_batch_info.py | 4 + .../sglang/srt/model_executor/model_runner.py | 23 ++++- python/sglang/srt/server_args.py | 8 ++ 5 files changed, 116 insertions(+), 16 deletions(-) diff --git a/python/sglang/srt/layers/radix_attention.py b/python/sglang/srt/layers/radix_attention.py index a02673dc374..91735a1b810 100644 --- a/python/sglang/srt/layers/radix_attention.py +++ b/python/sglang/srt/layers/radix_attention.py @@ -203,7 +203,6 @@ def forward(self, q, k, v, input_metadata: InputMetadata): return self.decode_forward(q, k, v, input_metadata) def store_kv_cache(self, cache_k, cache_v, input_metadata: InputMetadata): - k_cache = input_metadata.token_to_kv_pool.get_key_buffer(self.layer_id) - v_cache = input_metadata.token_to_kv_pool.get_value_buffer(self.layer_id) - k_cache[input_metadata.out_cache_loc] = cache_k - v_cache[input_metadata.out_cache_loc] = cache_v + input_metadata.token_to_kv_pool.set_kv_buffer( + self.layer_id, input_metadata.out_cache_loc, cache_k, cache_v + ) diff --git a/python/sglang/srt/mem_cache/memory_pool.py b/python/sglang/srt/mem_cache/memory_pool.py index 68cefbbf9f7..fef74321ac6 100644 --- a/python/sglang/srt/mem_cache/memory_pool.py +++ b/python/sglang/srt/mem_cache/memory_pool.py @@ -16,7 +16,8 @@ """Memory pool.""" import logging -from typing import List, Union +from abc import ABC, abstractmethod +from typing import List, Tuple, Union import torch @@ -52,14 +53,21 @@ def clear(self): self.free_slots = list(range(self.size)) -class BaseTokenToKVPool: +class BaseTokenToKVPool(ABC): """A memory pool that maps a token to its kv cache locations""" def __init__( self, size: int, + dtype: torch.dtype, ): self.size = size + self.dtype = dtype + if dtype == torch.float8_e5m2: + # NOTE: Store as torch.uint8 because Tensor index_put is not implemented for torch.float8_e5m2 + self.store_dtype = torch.uint8 + else: + self.store_dtype = dtype # We also add one slot. This slot is used for writing dummy output from padded tokens. self.mem_state = torch.ones((self.size + 1,), dtype=torch.bool, device="cuda") @@ -112,6 +120,28 @@ def clear(self): # We also add one slot. This slot is used for writing dummy output from padded tokens. self.mem_state[0] = False + @abstractmethod + def get_key_buffer(self, layer_id: int) -> torch.Tensor: + raise NotImplementedError() + + @abstractmethod + def get_value_buffer(self, layer_id: int) -> torch.Tensor: + raise NotImplementedError() + + @abstractmethod + def get_kv_buffer(self, layer_id: int) -> Tuple[torch.Tensor, torch.Tensor]: + raise NotImplementedError() + + @abstractmethod + def set_kv_buffer( + self, + layer_id: int, + loc: torch.Tensor, + cache_k: torch.Tensor, + cache_v: torch.Tensor, + ) -> None: + raise NotImplementedError() + class MHATokenToKVPool(BaseTokenToKVPool): @@ -123,26 +153,52 @@ def __init__( head_dim: int, layer_num: int, ): - super().__init__(size) + super().__init__(size, dtype) # [size, head_num, head_dim] for each layer self.k_buffer = [ - torch.empty((size + 1, head_num, head_dim), dtype=dtype, device="cuda") + torch.empty( + (size + 1, head_num, head_dim), dtype=self.store_dtype, device="cuda" + ) for _ in range(layer_num) ] self.v_buffer = [ - torch.empty((size + 1, head_num, head_dim), dtype=dtype, device="cuda") + torch.empty( + (size + 1, head_num, head_dim), dtype=self.store_dtype, device="cuda" + ) for _ in range(layer_num) ] def get_key_buffer(self, layer_id: int): + if self.store_dtype != self.dtype: + return self.k_buffer[layer_id].view(self.dtype) return self.k_buffer[layer_id] def get_value_buffer(self, layer_id: int): + if self.store_dtype != self.dtype: + return self.v_buffer[layer_id].view(self.dtype) return self.v_buffer[layer_id] def get_kv_buffer(self, layer_id: int): - return self.k_buffer[layer_id], self.v_buffer[layer_id] + return self.get_key_buffer(layer_id), self.get_value_buffer(layer_id) + + def set_kv_buffer( + self, + layer_id: int, + loc: torch.Tensor, + cache_k: torch.Tensor, + cache_v: torch.Tensor, + ): + if cache_k.dtype != self.dtype: + cache_k = cache_k.to(self.dtype) + if cache_v.dtype != self.dtype: + cache_v = cache_v.to(self.dtype) + if self.store_dtype != self.dtype: + self.k_buffer[layer_id][loc] = cache_k.view(self.store_dtype) + self.v_buffer[layer_id][loc] = cache_v.view(self.store_dtype) + else: + self.k_buffer[layer_id][loc] = cache_k + self.v_buffer[layer_id][loc] = cache_v class MLATokenToKVPool(BaseTokenToKVPool): @@ -155,23 +211,41 @@ def __init__( qk_rope_head_dim: int, layer_num: int, ): - super().__init__(size) + super().__init__(size, dtype) self.kv_lora_rank = kv_lora_rank self.kv_buffer = [ torch.empty( (size + 1, 1, kv_lora_rank + qk_rope_head_dim), - dtype=dtype, + dtype=self.store_dtype, device="cuda", ) for _ in range(layer_num) ] def get_key_buffer(self, layer_id: int): + if self.store_dtype != self.dtype: + return self.kv_buffer[layer_id].view(self.dtype) return self.kv_buffer[layer_id] def get_value_buffer(self, layer_id: int): + if self.store_dtype != self.dtype: + return self.kv_buffer[layer_id][..., : self.kv_lora_rank].view(self.dtype) return self.kv_buffer[layer_id][..., : self.kv_lora_rank] def get_kv_buffer(self, layer_id: int): return self.get_key_buffer(layer_id), self.get_value_buffer(layer_id) + + def set_kv_buffer( + self, + layer_id: int, + loc: torch.Tensor, + cache_k: torch.Tensor, + cache_v: torch.Tensor, + ): + if cache_k.dtype != self.dtype: + cache_k = cache_k.to(self.dtype) + if self.store_dtype != self.dtype: + self.kv_buffer[layer_id][loc] = cache_k.view(self.store_dtype) + else: + self.kv_buffer[layer_id][loc] = cache_k diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index 98daeaece4f..c107b3bc826 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -315,6 +315,8 @@ def update_flashinfer_indices( num_kv_heads, head_dim, 1, + data_type=model_runner.kv_cache_dtype, + q_data_type=model_runner.dtype, ) else: # extend part @@ -393,6 +395,8 @@ def update_flashinfer_indices( num_kv_heads, head_dim, 1, + data_type=model_runner.kv_cache_dtype, + q_data_type=model_runner.dtype, ) else: # extend part diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index fa55abba671..fecfc2b4309 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -311,7 +311,7 @@ def profile_max_num_token(self, total_gpu_memory: int): cell_size = ( (self.model_config.kv_lora_rank + self.model_config.qk_rope_head_dim) * self.model_config.num_hidden_layers - * torch._utils._element_size(self.dtype) + * torch._utils._element_size(self.kv_cache_dtype) ) else: cell_size = ( @@ -319,7 +319,7 @@ def profile_max_num_token(self, total_gpu_memory: int): * self.model_config.head_dim * self.model_config.num_hidden_layers * 2 - * torch._utils._element_size(self.dtype) + * torch._utils._element_size(self.kv_cache_dtype) ) rest_memory = available_gpu_memory - total_gpu_memory * ( 1 - self.mem_fraction_static @@ -333,6 +333,21 @@ def init_memory_pool( max_num_reqs: int = None, max_total_tokens: int = None, ): + if self.server_args.kv_cache_dtype == "auto": + self.kv_cache_dtype = self.dtype + elif self.server_args.kv_cache_dtype == "fp8_e5m2": + if self.server_args.disable_flashinfer or self.server_args.enable_mla: + logger.warning( + "FP8 KV cache is not supported for Triton kernel now, using auto kv cache dtype" + ) + self.kv_cache_dtype = self.dtype + else: + self.kv_cache_dtype = torch.float8_e5m2 + else: + raise ValueError( + f"Unsupported kv_cache_dtype: {self.server_args.kv_cache_dtype}." + ) + self.max_total_num_tokens = self.profile_max_num_token(total_gpu_memory) if max_total_tokens is not None: if max_total_tokens > self.max_total_num_tokens: @@ -369,7 +384,7 @@ def init_memory_pool( ): self.token_to_kv_pool = MLATokenToKVPool( self.max_total_num_tokens, - dtype=self.dtype, + dtype=self.kv_cache_dtype, kv_lora_rank=self.model_config.kv_lora_rank, qk_rope_head_dim=self.model_config.qk_rope_head_dim, layer_num=self.model_config.num_hidden_layers, @@ -380,7 +395,7 @@ def init_memory_pool( else: self.token_to_kv_pool = MHATokenToKVPool( self.max_total_num_tokens, - dtype=self.dtype, + dtype=self.kv_cache_dtype, head_num=self.model_config.get_num_kv_heads(self.tp_size), head_dim=self.model_config.head_dim, layer_num=self.model_config.num_hidden_layers, diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index ca27f974829..8a56c02e162 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -33,6 +33,7 @@ class ServerArgs: skip_tokenizer_init: bool = False load_format: str = "auto" dtype: str = "auto" + kv_cache_dtype: str = "auto" trust_remote_code: bool = True context_length: Optional[int] = None quantization: Optional[str] = None @@ -196,6 +197,13 @@ def add_cli_args(parser: argparse.ArgumentParser): '* "float" is shorthand for FP32 precision.\n' '* "float32" for FP32 precision.', ) + parser.add_argument( + "--kv-cache-dtype", + type=str, + default=ServerArgs.kv_cache_dtype, + choices=["auto", "fp8_e5m2"], + help='Data type for kv cache storage. "auto" will use model data type. "fp8_e5m2" is supported for CUDA 11.8+.', + ) parser.add_argument( "--trust-remote-code", action="store_true", From d3efcb3930cfb1c79958dda00ce3e044fd85b714 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 25 Aug 2024 17:45:35 -0700 Subject: [PATCH 19/88] Update workflow files (#1214) --- .github/workflows/accuracy-test.yml | 2 +- .github/workflows/e2e-test.yml | 2 +- .github/workflows/moe-test.yml | 2 +- .github/workflows/unit-test.yml | 2 +- docs/en/setup_github_runner.md | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml index b60a9c6d480..6e1818c9c97 100644 --- a/.github/workflows/accuracy-test.yml +++ b/.github/workflows/accuracy-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: accuracy-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: accuracy-test + runs-on: 1-gpu-runner steps: - name: Checkout code diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 8d33870411b..2db6801c769 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: e2e-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: e2e-test + runs-on: 1-gpu-runner steps: - name: Checkout code diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml index 2caa3d1820e..111f190c7ce 100644 --- a/.github/workflows/moe-test.yml +++ b/.github/workflows/moe-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: moe-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: moe-test + runs-on: 2-gpu-runner steps: - name: Checkout code diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index e2d7951be4a..752c05da752 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -20,7 +20,7 @@ concurrency: jobs: unit-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: unit-test + runs-on: 1-gpu-runner steps: - name: Checkout code diff --git a/docs/en/setup_github_runner.md b/docs/en/setup_github_runner.md index 282e12b7362..8e817dcc88c 100644 --- a/docs/en/setup_github_runner.md +++ b/docs/en/setup_github_runner.md @@ -24,7 +24,7 @@ Then follow https://github.com/sgl-project/sglang/settings/actions/runners/new?a **Notes** - Do not need to specify the runner group -- Give it a name (e.g., `test-sgl-gpu-0`) and some labels (e.g., `unit-test`). The labels can be editted later in Github Settings. +- Give it a name (e.g., `test-sgl-gpu-0`) and some labels (e.g., `1-gpu-runner`). The labels can be editted later in Github Settings. - Do not need to change the work folder. ### Step 3: Run the runner by `run.sh` From 158e8f1e2d499e225add6ed0554896c94fd5a891 Mon Sep 17 00:00:00 2001 From: Mingyi Date: Sun, 25 Aug 2024 19:02:08 -0700 Subject: [PATCH 20/88] improve the threshold and ports in tests (#1215) --- python/sglang/test/test_utils.py | 12 +++----- .../test_srt_endpoint_with_penalizers.py | 7 +++-- test/srt/test_chunked_prefill.py | 7 +++-- test/srt/test_embedding_openai_server.py | 13 +++++++-- test/srt/test_eval_accuracy_large.py | 8 ++--- ...est_eval_accuracy_large_chunked_prefill.py | 7 +++-- ...al_accuracy_large_mixed_chunked_prefill.py | 7 +++-- test/srt/test_eval_accuracy_mini.py | 9 ++++-- test/srt/test_large_max_new_tokens.py | 7 +++-- test/srt/test_moe_serving_throughput.py | 29 +++++++------------ test/srt/test_openai_server.py | 10 +++++-- test/srt/test_serving_throughput.py | 22 ++++++++------ test/srt/test_skip_tokenizer_init.py | 10 +++++-- test/srt/test_srt_endpoint.py | 9 ++++-- test/srt/test_torch_compile.py | 10 +++++-- test/srt/test_triton_attn_backend.py | 10 +++++-- test/srt/test_update_weights.py | 9 ++++-- test/srt/test_vision_openai_server.py | 22 +++++++++----- 18 files changed, 122 insertions(+), 86 deletions(-) diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index 3389e619c92..373b7c1a57e 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -23,18 +23,14 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct" DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1" +DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600 if os.getenv("SGLANG_IS_IN_CI", "false") == "true": DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157 - DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157" - DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157" - DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157" - DEFAULT_URL_FOR_E2E_TEST = "http://127.0.0.1:9157" + DEFAULT_URL_FOR_TEST = "http://127.0.0.1:6157" else: - DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:1157" - DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:1257" - DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:1357" - DEFAULT_URL_FOR_E2E_TEST = "http://127.0.0.1:1457" + DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 1157 + DEFAULT_URL_FOR_TEST = "http://127.0.0.1:2157" def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None): diff --git a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py index 4e91f723551..2f5b352ae92 100644 --- a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py +++ b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py @@ -7,7 +7,8 @@ from sglang.srt.utils import kill_child_process from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -17,11 +18,11 @@ class TestBatchPenalizerE2E(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.process = popen_launch_server( cls.model, cls.base_url, - timeout=300, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=( "--random-seed", "0", diff --git a/test/srt/test_chunked_prefill.py b/test/srt/test_chunked_prefill.py index 8d81dc0c3e1..2eb704dc919 100644 --- a/test/srt/test_chunked_prefill.py +++ b/test/srt/test_chunked_prefill.py @@ -5,7 +5,8 @@ from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -20,11 +21,11 @@ def run_mmlu(self, disable_radix_cache, enable_mixed_chunk): other_args += ["--enable-mixed-chunk"] model = DEFAULT_MODEL_NAME_FOR_TEST - base_url = DEFAULT_URL_FOR_UNIT_TEST + base_url = DEFAULT_URL_FOR_TEST process = popen_launch_server( model, base_url, - timeout=300, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=other_args, ) diff --git a/test/srt/test_embedding_openai_server.py b/test/srt/test_embedding_openai_server.py index fd8fec48e90..45f7850da99 100644 --- a/test/srt/test_embedding_openai_server.py +++ b/test/srt/test_embedding_openai_server.py @@ -4,17 +4,24 @@ from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_child_process -from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) class TestOpenAIServer(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = "intfloat/e5-mistral-7b-instruct" - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( - cls.model, cls.base_url, timeout=300, api_key=cls.api_key + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, ) cls.base_url += "/v1" cls.tokenizer = get_tokenizer(cls.model) diff --git a/test/srt/test_eval_accuracy_large.py b/test/srt/test_eval_accuracy_large.py index 470ed11aa45..3729ad26b6a 100644 --- a/test/srt/test_eval_accuracy_large.py +++ b/test/srt/test_eval_accuracy_large.py @@ -5,8 +5,8 @@ from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_ACCURACY_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -15,11 +15,11 @@ class TestEvalAccuracyLarge(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.process = popen_launch_server( cls.model, cls.base_url, - timeout=300, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=["--log-level-http", "warning"], ) diff --git a/test/srt/test_eval_accuracy_large_chunked_prefill.py b/test/srt/test_eval_accuracy_large_chunked_prefill.py index 951f481da32..02df2a7f56a 100644 --- a/test/srt/test_eval_accuracy_large_chunked_prefill.py +++ b/test/srt/test_eval_accuracy_large_chunked_prefill.py @@ -5,7 +5,8 @@ from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_ACCURACY_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.process = popen_launch_server( cls.model, cls.base_url, - timeout=300, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=["--log-level-http", "warning", "--chunked-prefill-size", "256"], ) diff --git a/test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py b/test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py index 210c32b5196..8ba71e5c836 100644 --- a/test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py +++ b/test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py @@ -5,7 +5,8 @@ from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_ACCURACY_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_ACCURACY_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.process = popen_launch_server( cls.model, cls.base_url, - timeout=300, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=[ "--log-level-http", "warning", diff --git a/test/srt/test_eval_accuracy_mini.py b/test/srt/test_eval_accuracy_mini.py index a4219b1a0a7..25aa0ca116b 100644 --- a/test/srt/test_eval_accuracy_mini.py +++ b/test/srt/test_eval_accuracy_mini.py @@ -5,7 +5,8 @@ from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -14,8 +15,10 @@ class TestEvalAccuracyMini(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST - cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) + cls.base_url = DEFAULT_URL_FOR_TEST + cls.process = popen_launch_server( + cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH + ) @classmethod def tearDownClass(cls): diff --git a/test/srt/test_large_max_new_tokens.py b/test/srt/test_large_max_new_tokens.py index f29adabced9..10b82706a61 100644 --- a/test/srt/test_large_max_new_tokens.py +++ b/test/srt/test_large_max_new_tokens.py @@ -10,7 +10,8 @@ from sglang.srt.utils import kill_child_process from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -19,12 +20,12 @@ class TestOpenAIServer(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, - timeout=300, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, other_args=("--max-total-token", "1024"), env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ}, diff --git a/test/srt/test_moe_serving_throughput.py b/test/srt/test_moe_serving_throughput.py index 3cdf724f31f..4f6e8db82c4 100644 --- a/test/srt/test_moe_serving_throughput.py +++ b/test/srt/test_moe_serving_throughput.py @@ -7,7 +7,8 @@ from sglang.srt.utils import kill_child_process from sglang.test.test_utils import ( DEFAULT_MOE_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_MOE_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -25,9 +26,12 @@ def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size other_args.append("--enable-p2p-check") model = DEFAULT_MOE_MODEL_NAME_FOR_TEST - base_url = DEFAULT_URL_FOR_MOE_TEST + base_url = DEFAULT_URL_FOR_TEST process = popen_launch_server( - model, base_url, timeout=300, other_args=other_args + model, + base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, ) # Run benchmark @@ -72,8 +76,8 @@ def test_default(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE) performance - assert res["output_throughput"] > 910 + # A100 (PCIE): 950, H100 (SMX): 1800 + assert res["output_throughput"] > 1750 def test_default_without_radix_cache(self): res = self.run_test( @@ -83,19 +87,8 @@ def test_default_without_radix_cache(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE) performance - assert res["output_throughput"] > 910 - - def test_default_without_chunked_prefill(self): - res = self.run_test( - disable_radix_cache=ServerArgs.disable_radix_cache, - disable_flashinfer=ServerArgs.disable_flashinfer, - chunked_prefill_size=-1, - ) - - if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE) performance - print(res["output_throughput"]) + # A100 (PCIE): 950, H100 (SMX): 1900 + assert res["output_throughput"] > 1850 def test_all_cases(self): for disable_radix_cache in [False, True]: diff --git a/test/srt/test_openai_server.py b/test/srt/test_openai_server.py index 828f5ab532c..ce130956de8 100644 --- a/test/srt/test_openai_server.py +++ b/test/srt/test_openai_server.py @@ -8,7 +8,8 @@ from sglang.srt.utils import kill_child_process from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -17,10 +18,13 @@ class TestOpenAIServer(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( - cls.model, cls.base_url, timeout=300, api_key=cls.api_key + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, ) cls.base_url += "/v1" cls.tokenizer = get_tokenizer(DEFAULT_MODEL_NAME_FOR_TEST) diff --git a/test/srt/test_serving_throughput.py b/test/srt/test_serving_throughput.py index 261ac6ec52f..f1089a6a7b3 100644 --- a/test/srt/test_serving_throughput.py +++ b/test/srt/test_serving_throughput.py @@ -7,7 +7,8 @@ from sglang.srt.utils import kill_child_process from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_E2E_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -23,9 +24,12 @@ def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)]) model = DEFAULT_MODEL_NAME_FOR_TEST - base_url = DEFAULT_URL_FOR_E2E_TEST + base_url = DEFAULT_URL_FOR_TEST process = popen_launch_server( - model, base_url, timeout=300, other_args=other_args + model, + base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, ) # Run benchmark @@ -70,8 +74,8 @@ def test_default(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE) performance - assert res["output_throughput"] > 1400 + # A100 (PCIE): 1450, H100 (SMX): 2550 + assert res["output_throughput"] > 2500 def test_default_without_radix_cache(self): res = self.run_test( @@ -81,8 +85,8 @@ def test_default_without_radix_cache(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE) performance - assert res["output_throughput"] > 1450 + # A100 (PCIE): 1500, H100 (SMX): 2850 + assert res["output_throughput"] > 2800 def test_default_without_chunked_prefill(self): res = self.run_test( @@ -92,8 +96,8 @@ def test_default_without_chunked_prefill(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE) performance - assert res["output_throughput"] > 1400 + # A100 (PCIE): 1450, H100 (SMX): 2550 + assert res["output_throughput"] > 2500 def test_all_cases(self): for disable_radix_cache in [False, True]: diff --git a/test/srt/test_skip_tokenizer_init.py b/test/srt/test_skip_tokenizer_init.py index 75010561514..b159bb55787 100644 --- a/test/srt/test_skip_tokenizer_init.py +++ b/test/srt/test_skip_tokenizer_init.py @@ -6,7 +6,8 @@ from sglang.srt.utils import kill_child_process from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -15,9 +16,12 @@ class TestSkipTokenizerInit(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.process = popen_launch_server( - cls.model, cls.base_url, timeout=300, other_args=["--skip-tokenizer-init"] + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=["--skip-tokenizer-init"], ) @classmethod diff --git a/test/srt/test_srt_endpoint.py b/test/srt/test_srt_endpoint.py index 60f4cd58a3b..818aae2151e 100644 --- a/test/srt/test_srt_endpoint.py +++ b/test/srt/test_srt_endpoint.py @@ -6,7 +6,8 @@ from sglang.srt.utils import kill_child_process from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -15,8 +16,10 @@ class TestSRTEndpoint(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST - cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) + cls.base_url = DEFAULT_URL_FOR_TEST + cls.process = popen_launch_server( + cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH + ) @classmethod def tearDownClass(cls): diff --git a/test/srt/test_torch_compile.py b/test/srt/test_torch_compile.py index 5133d3cd3c2..26daf4fa578 100644 --- a/test/srt/test_torch_compile.py +++ b/test/srt/test_torch_compile.py @@ -5,7 +5,8 @@ from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -14,9 +15,12 @@ class TestTorchCompile(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.process = popen_launch_server( - cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"] + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=["--enable-torch-compile"], ) @classmethod diff --git a/test/srt/test_triton_attn_backend.py b/test/srt/test_triton_attn_backend.py index 7a453d8be7e..a94ca921240 100644 --- a/test/srt/test_triton_attn_backend.py +++ b/test/srt/test_triton_attn_backend.py @@ -5,7 +5,8 @@ from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -14,9 +15,12 @@ class TestTritonAttnBackend(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.process = popen_launch_server( - cls.model, cls.base_url, timeout=300, other_args=["--disable-flashinfer"] + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=["--disable-flashinfer"], ) @classmethod diff --git a/test/srt/test_update_weights.py b/test/srt/test_update_weights.py index 64f84263aa9..7b8404c735f 100644 --- a/test/srt/test_update_weights.py +++ b/test/srt/test_update_weights.py @@ -6,7 +6,8 @@ from sglang.srt.utils import kill_child_process from sglang.test.test_utils import ( DEFAULT_MODEL_NAME_FOR_TEST, - DEFAULT_URL_FOR_UNIT_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, popen_launch_server, ) @@ -15,8 +16,10 @@ class TestReplaceWeights(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = DEFAULT_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST - cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300) + cls.base_url = DEFAULT_URL_FOR_TEST + cls.process = popen_launch_server( + cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH + ) @classmethod def tearDownClass(cls): diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index 48157b8db4c..a3457177602 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -11,19 +11,23 @@ from PIL import Image from sglang.srt.utils import kill_child_process -from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) class TestOpenAIVisionServer(unittest.TestCase): @classmethod def setUpClass(cls): cls.model = "lmms-lab/llava-onevision-qwen2-0.5b-ov" - cls.base_url = DEFAULT_URL_FOR_UNIT_TEST + cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" cls.process = popen_launch_server( cls.model, cls.base_url, - timeout=300, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, api_key=cls.api_key, other_args=[ "--chat-template", @@ -67,7 +71,7 @@ def test_chat_completion(self): assert response.choices[0].message.role == "assistant" text = response.choices[0].message.content assert isinstance(text, str) - assert "logo" in text, text + assert "man" in text or "cab" in text, text assert response.id assert response.created assert response.usage.prompt_tokens > 0 @@ -86,18 +90,19 @@ def test_mult_images_chat_completion(self): { "type": "image_url", "image_url": { - "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" + "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png" }, }, { "type": "image_url", "image_url": { - "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png" + "url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" }, }, { "type": "text", - "text": "I have shown you two images. Please describe the two images to me.", + "text": "I have two very different images. They are not related at all. " + "Please describe the first image in one sentence, and then describe the second image in another sentence.", }, ], }, @@ -108,8 +113,9 @@ def test_mult_images_chat_completion(self): assert response.choices[0].message.role == "assistant" text = response.choices[0].message.content assert isinstance(text, str) + print(text) assert "man" in text or "cab" in text, text - assert "logo" in text, text + # assert "logo" in text, text assert response.id assert response.created assert response.usage.prompt_tokens > 0 From 7514b9f8d3660417c085538076cf5162f32ce2fb Mon Sep 17 00:00:00 2001 From: Mingyi Date: Sun, 25 Aug 2024 19:56:42 -0700 Subject: [PATCH 21/88] [CI] Fix CI (#1217) --- python/sglang/test/test_utils.py | 35 +++++++++---------- test/srt/models/test_embedding_models.py | 2 +- test/srt/models/test_generation_models.py | 2 +- test/srt/run_suite.py | 8 ++--- .../test_srt_endpoint_with_penalizers.py | 2 +- 5 files changed, 22 insertions(+), 27 deletions(-) diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index 373b7c1a57e..04688639965 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -2,12 +2,10 @@ import argparse import asyncio -import multiprocessing import os import subprocess import threading import time -import unittest from functools import partial from typing import Callable, List, Optional @@ -19,6 +17,7 @@ from sglang.global_config import global_config from sglang.lang.backend.openai import OpenAI from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint +from sglang.srt.utils import kill_child_process from sglang.utils import get_exception_traceback DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct" @@ -457,35 +456,35 @@ def _target_func(): return ret_value[0] -def run_one_file(filename, out_queue): - print(f"\n\nRun {filename}\n\n") - ret = unittest.main(module=None, argv=["", "-vb"] + [filename]) - - def run_unittest_files(files: List[str], timeout_per_file: float): tic = time.time() success = True for filename in files: - out_queue = multiprocessing.Queue() - p = multiprocessing.Process(target=run_one_file, args=(filename, out_queue)) + global process - def run_process(): - p.start() - p.join() + def run_one_file(filename): + filename = os.path.join(os.getcwd(), filename) + print(f"\n\nRun {filename}\n\n") + process = subprocess.Popen( + ["python3", filename], stdout=None, stderr=None, env=os.environ + ) + process.wait() + return process.returncode try: - run_with_timeout(run_process, timeout=timeout_per_file) - if p.exitcode != 0: - success = False - break + ret_code = run_with_timeout( + run_one_file, args=(filename,), timeout=timeout_per_file + ) + assert ret_code == 0 except TimeoutError: - p.terminate() + kill_child_process(process.pid) time.sleep(5) print( f"\nTimeout after {timeout_per_file} seconds when running {filename}\n" ) - return False + success = False + break if success: print(f"Success. Time elapsed: {time.time() - tic:.2f}s") diff --git a/test/srt/models/test_embedding_models.py b/test/srt/models/test_embedding_models.py index 8a43255b7ff..a5a73bf319f 100644 --- a/test/srt/models/test_embedding_models.py +++ b/test/srt/models/test_embedding_models.py @@ -77,4 +77,4 @@ def test_prefill_logits(self): except RuntimeError: pass - unittest.main(warnings="ignore") + unittest.main() diff --git a/test/srt/models/test_generation_models.py b/test/srt/models/test_generation_models.py index 4e49c0a5b1c..b953ccf5d6a 100644 --- a/test/srt/models/test_generation_models.py +++ b/test/srt/models/test_generation_models.py @@ -136,4 +136,4 @@ def test_prefill_logits_and_output_strs(self): except RuntimeError: pass - unittest.main(warnings="ignore") + unittest.main() diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 3756d3ddfb1..4e11d8da25b 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -7,7 +7,7 @@ suites = { "minimal": [ "models/test_embedding_models.py", - # "models/test_generation_models.py", + "models/test_generation_models.py", "sampling/penaltylib", "test_chunked_prefill.py", "test_embedding_openai_server.py", @@ -33,6 +33,7 @@ tests.remove(target_suite_name) tests.extend(target_tests) + if __name__ == "__main__": arg_parser = argparse.ArgumentParser() arg_parser.add_argument( @@ -55,10 +56,5 @@ else: files = suites[args.suite] - try: - mp.set_start_method("spawn") - except RuntimeError: - pass - exit_code = run_unittest_files(files, args.timeout_per_file) exit(exit_code) diff --git a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py index 2f5b352ae92..e3496102cb1 100644 --- a/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py +++ b/test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py @@ -112,4 +112,4 @@ def test_repetition_penalty(self): if __name__ == "__main__": - unittest.main(warnings="ignore") + unittest.main() From 3579162ab102351b8cac5d17eab29e05fee63abe Mon Sep 17 00:00:00 2001 From: Kaichen Zhang - NTU Date: Mon, 26 Aug 2024 11:58:51 +0800 Subject: [PATCH 22/88] [Fix] Multi-images loading error (#1218) --- python/sglang/srt/managers/tokenizer_manager.py | 2 +- test/srt/test_vision_openai_server.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 199ea7c3a65..3f25ad56072 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -428,7 +428,7 @@ def _get_sampling_params(self, sampling_params_data: dict): async def _get_pixel_values(self, image_data): if isinstance(image_data, list) and len(image_data) > 0: - return await self._get_pixel_values_internal(image_data[0]) + return await self._get_pixel_values_internal(image_data) elif isinstance(image_data, str): return await self._get_pixel_values_internal(image_data) else: diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index a3457177602..0003e4776ab 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -114,8 +114,8 @@ def test_mult_images_chat_completion(self): text = response.choices[0].message.content assert isinstance(text, str) print(text) - assert "man" in text or "cab" in text, text - # assert "logo" in text, text + assert "man" in text and "taxi" in text, text + assert "logo" in text, text assert response.id assert response.created assert response.usage.prompt_tokens > 0 From 632d506d0b526f641f9ced4f408dad8bd64b5009 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Sun, 25 Aug 2024 21:26:31 -0700 Subject: [PATCH 23/88] minor: improve CI and dependencies (#1212) --- .github/workflows/unit-test.yml | 4 +--- python/pyproject.toml | 2 +- python/sglang/srt/managers/tokenizer_manager.py | 8 +++----- python/sglang/test/test_utils.py | 9 +++++---- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 752c05da752..6ab37160336 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -29,10 +29,8 @@ jobs: - name: Install dependencies run: | pip install --upgrade pip - pip install -e "python[all]" + pip install -e "python[dev]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - pip install accelerate - pip install sentence_transformers - name: Test Backend Runtime timeout-minutes: 20 diff --git a/python/pyproject.toml b/python/pyproject.toml index 6b1b032fdcc..4908ad051fd 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -27,7 +27,7 @@ srt = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hub", "intere openai = ["openai>=1.0", "tiktoken"] anthropic = ["anthropic>=0.20.0"] litellm = ["litellm>=1.0.0"] -test = ["jsonlines", "matplotlib", "pandas"] +test = ["jsonlines", "matplotlib", "pandas", "sentence_transformers", "accelerate"] all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"] dev = ["sglang[all]", "sglang[test]"] diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 3f25ad56072..c74251947bf 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -427,12 +427,10 @@ def _get_sampling_params(self, sampling_params_data: dict): return sampling_params async def _get_pixel_values(self, image_data): - if isinstance(image_data, list) and len(image_data) > 0: - return await self._get_pixel_values_internal(image_data) - elif isinstance(image_data, str): - return await self._get_pixel_values_internal(image_data) - else: + if image_data is None: return None, None, None + else: + return await self._get_pixel_values_internal(image_data) async def _wait_for_response( self, diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index 04688639965..59e2ab2924c 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -465,7 +465,7 @@ def run_unittest_files(files: List[str], timeout_per_file: float): def run_one_file(filename): filename = os.path.join(os.getcwd(), filename) - print(f"\n\nRun {filename}\n\n") + print(f"\n\nRun {filename}\n\n", flush=True) process = subprocess.Popen( ["python3", filename], stdout=None, stderr=None, env=os.environ ) @@ -481,15 +481,16 @@ def run_one_file(filename): kill_child_process(process.pid) time.sleep(5) print( - f"\nTimeout after {timeout_per_file} seconds when running {filename}\n" + f"\nTimeout after {timeout_per_file} seconds when running {filename}\n", + flush=True, ) success = False break if success: - print(f"Success. Time elapsed: {time.time() - tic:.2f}s") + print(f"Success. Time elapsed: {time.time() - tic:.2f}s", flush=True) else: - print(f"Fail. Time elapsed: {time.time() - tic:.2f}s") + print(f"Fail. Time elapsed: {time.time() - tic:.2f}s", flush=True) return 0 if success else -1 From 97589a60a2cf2ef75d26ca0de9a78f30e2b63c4e Mon Sep 17 00:00:00 2001 From: Mingyi Date: Sun, 25 Aug 2024 21:54:02 -0700 Subject: [PATCH 24/88] [CI] Parallelize unit tests in CI (#1219) --- .github/workflows/accuracy-test.yml | 30 ++++++++--------- .github/workflows/e2e-test.yml | 52 ++++++++++++++--------------- .github/workflows/moe-test.yml | 34 +++++++++---------- .github/workflows/unit-test.yml | 49 ++++++++++++++++----------- python/sglang/test/test_utils.py | 2 +- test/srt/run_suite.py | 15 ++++++++- 6 files changed, 103 insertions(+), 79 deletions(-) diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml index 6e1818c9c97..6fb102a4c5a 100644 --- a/.github/workflows/accuracy-test.yml +++ b/.github/workflows/accuracy-test.yml @@ -23,21 +23,21 @@ jobs: runs-on: 1-gpu-runner steps: - - name: Checkout code - uses: actions/checkout@v3 + - name: Checkout code + uses: actions/checkout@v3 - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - git clone https://github.com/merrymercy/human-eval.git - cd human-eval - pip install -e . + git clone https://github.com/merrymercy/human-eval.git + cd human-eval + pip install -e . - - name: Evaluate Accuracy - timeout-minutes: 20 - run: | - cd test/srt - python3 test_eval_accuracy_large.py + - name: Evaluate Accuracy + timeout-minutes: 20 + run: | + cd test/srt + python3 test_eval_accuracy_large.py diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 2db6801c769..7f555110d9d 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -23,29 +23,29 @@ jobs: runs-on: 1-gpu-runner steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - - name: Benchmark Serving Throughput - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default - - - name: Benchmark Serving Throughput (w/o RadixAttention) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache - - - name: Benchmark Serving Throughput (w/o ChunkedPrefill) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Benchmark Serving Throughput + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default + + - name: Benchmark Serving Throughput (w/o RadixAttention) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache + + - name: Benchmark Serving Throughput (w/o ChunkedPrefill) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml index 111f190c7ce..4440aa215f0 100644 --- a/.github/workflows/moe-test.yml +++ b/.github/workflows/moe-test.yml @@ -23,23 +23,23 @@ jobs: runs-on: 2-gpu-runner steps: - - name: Checkout code - uses: actions/checkout@v3 + - name: Checkout code + uses: actions/checkout@v3 - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - name: Benchmark MoE Serving Throughput - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default + - name: Benchmark MoE Serving Throughput + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default - - name: Benchmark MoE Serving Throughput (w/o RadixAttention) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache + - name: Benchmark MoE Serving Throughput (w/o RadixAttention) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 6ab37160336..41a565a6382 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -18,28 +18,39 @@ concurrency: cancel-in-progress: true jobs: - unit-test: + unit-test-jobs: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: 1-gpu-runner - + strategy: + matrix: + test_type: ['backend-0', 'backend-1', 'frontend'] steps: - - name: Checkout code - uses: actions/checkout@v3 + - name: Checkout code + uses: actions/checkout@v3 - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[dev]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[dev]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - name: Test Backend Runtime - timeout-minutes: 20 - run: | - cd test/srt - python3 run_suite.py --suite minimal + - name: Run test + timeout-minutes: 20 + run: | + if [ "${{ matrix.test_type }}" = "frontend" ]; then + cd test/lang + python3 run_suite.py --suite minimal + elif [ "${{ matrix.test_type }}" = "backend-0" ]; then + cd test/srt + python3 run_suite.py --suite minimal --range-begin 0 --range-end 8 + elif [ "${{ matrix.test_type }}" = "backend-1" ]; then + cd test/srt + python3 run_suite.py --suite minimal --range-begin 8 + fi - - name: Test Frontend Language - timeout-minutes: 10 - run: | - cd test/lang - python3 run_suite.py --suite minimal + unit-test: + needs: unit-test-jobs + runs-on: ubuntu-latest + steps: + - name: Merge step + run: echo "This is an empty merge step" \ No newline at end of file diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index 59e2ab2924c..d6a1792b85e 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -465,7 +465,7 @@ def run_unittest_files(files: List[str], timeout_per_file: float): def run_one_file(filename): filename = os.path.join(os.getcwd(), filename) - print(f"\n\nRun {filename}\n\n", flush=True) + print(f"\n\nRun:\npython3 {filename}\n\n", flush=True) process = subprocess.Popen( ["python3", filename], stdout=None, stderr=None, env=os.environ ) diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 4e11d8da25b..2351579f190 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -1,6 +1,5 @@ import argparse import glob -import multiprocessing as mp from sglang.test.test_utils import run_unittest_files @@ -49,6 +48,18 @@ choices=list(suites.keys()) + ["all"], help="The suite to run", ) + arg_parser.add_argument( + "--range-begin", + type=int, + default=0, + help="The begin index of the range of the files to run.", + ) + arg_parser.add_argument( + "--range-end", + type=int, + default=None, + help="The end index of the range of the files to run.", + ) args = arg_parser.parse_args() if args.suite == "all": @@ -56,5 +67,7 @@ else: files = suites[args.suite] + files = files[args.range_begin : args.range_end] + exit_code = run_unittest_files(files, args.timeout_per_file) exit(exit_code) From 75ce37f40139394bd2f3f55250095477d8c9b16d Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Mon, 26 Aug 2024 07:02:50 -0700 Subject: [PATCH 25/88] Move sampler into CUDA graph (#1201) Co-authored-by: Yineng Zhang --- python/sglang/srt/layers/logits_processor.py | 8 +- python/sglang/srt/layers/sampler.py | 83 +++++++++++++++---- python/sglang/srt/managers/schedule_batch.py | 28 +++++-- python/sglang/srt/managers/tp_worker.py | 52 +++++++----- .../srt/model_executor/cuda_graph_runner.py | 33 ++++++-- .../srt/model_executor/forward_batch_info.py | 9 +- .../sglang/srt/model_executor/model_runner.py | 14 +++- python/sglang/srt/models/chatglm.py | 16 +--- python/sglang/srt/models/commandr.py | 6 +- python/sglang/srt/models/dbrx.py | 6 +- python/sglang/srt/models/deepseek.py | 6 +- python/sglang/srt/models/deepseek_v2.py | 6 +- python/sglang/srt/models/gemma.py | 6 +- python/sglang/srt/models/gemma2.py | 6 +- python/sglang/srt/models/gpt_bigcode.py | 6 +- python/sglang/srt/models/grok.py | 6 +- python/sglang/srt/models/internlm2.py | 6 +- python/sglang/srt/models/llama2.py | 10 ++- .../sglang/srt/models/llama_classification.py | 4 +- python/sglang/srt/models/minicpm.py | 6 +- python/sglang/srt/models/mixtral.py | 6 +- python/sglang/srt/models/mixtral_quant.py | 6 +- python/sglang/srt/models/qwen.py | 7 +- python/sglang/srt/models/qwen2.py | 8 +- python/sglang/srt/models/qwen2_moe.py | 19 ++--- python/sglang/srt/models/stablelm.py | 6 +- .../srt/sampling/sampling_batch_info.py | 75 ++++++++++++++++- python/sglang/test/runners.py | 2 +- 28 files changed, 336 insertions(+), 110 deletions(-) diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py index 63f74d8b026..b81f3d2a040 100644 --- a/python/sglang/srt/layers/logits_processor.py +++ b/python/sglang/srt/layers/logits_processor.py @@ -29,7 +29,7 @@ @dataclasses.dataclass -class LogitProcessorOutput: +class LogitsProcessorOutput: # The logits of the next tokens. shape: [#seq, vocab_size] next_token_logits: torch.Tensor # The logprobs of the next tokens. shape: [#seq, vocab_size] @@ -185,7 +185,7 @@ def forward( # Return only last_logits if logprob is not requested if not logits_metadata.return_logprob: - return LogitProcessorOutput( + return LogitsProcessorOutput( next_token_logits=last_logits, next_token_logprobs=None, normalized_prompt_logprobs=None, @@ -209,7 +209,7 @@ def forward( else: output_top_logprobs = None - return LogitProcessorOutput( + return LogitsProcessorOutput( next_token_logits=last_logits, next_token_logprobs=last_logprobs, normalized_prompt_logprobs=None, @@ -278,7 +278,7 @@ def forward( # Remove the last token logprob for the prefill tokens. input_token_logprobs = input_token_logprobs[:-1] - return LogitProcessorOutput( + return LogitsProcessorOutput( next_token_logits=last_logits, next_token_logprobs=last_logprobs, normalized_prompt_logprobs=normalized_prompt_logprobs, diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py index 3006e765c88..6cb7d0a7c11 100644 --- a/python/sglang/srt/layers/sampler.py +++ b/python/sglang/srt/layers/sampler.py @@ -1,4 +1,6 @@ +import dataclasses import logging +from typing import Union import torch from flashinfer.sampling import ( @@ -9,6 +11,8 @@ ) from vllm.model_executor.custom_op import CustomOp +from sglang.srt.layers.logits_processor import LogitsProcessorOutput + # TODO: move this dict to another place from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo @@ -16,30 +20,71 @@ logger = logging.getLogger(__name__) +@dataclasses.dataclass +class SampleOutput: + success: torch.Tensor + probs: torch.Tensor + batch_next_token_ids: torch.Tensor + + class Sampler(CustomOp): def __init__(self): super().__init__() - def forward_cuda(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): + def _apply_penalties(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): + # min-token, presence, frequency + if sampling_info.linear_penalties is not None: + logits += sampling_info.linear_penalties + + # repetition + if sampling_info.scaling_penalties is not None: + logits = torch.where( + logits > 0, + logits / sampling_info.scaling_penalties, + logits * sampling_info.scaling_penalties, + ) + + return logits + + def _get_probs( + self, + logits: torch.Tensor, + sampling_info: SamplingBatchInfo, + is_torch_compile: bool = False, + ): # Post process logits logits = logits.contiguous() logits.div_(sampling_info.temperatures) + if is_torch_compile: + # FIXME: Temporary workaround for unknown bugs in torch.compile + logits.add_(0) + if sampling_info.logit_bias is not None: logits.add_(sampling_info.logit_bias) if sampling_info.vocab_mask is not None: logits = logits.masked_fill(~sampling_info.vocab_mask, float("-inf")) - logits = sampling_info.penalizer_orchestrator.apply(logits) + logits = self._apply_penalties(logits, sampling_info) - probs = torch.softmax(logits, dim=-1) + return torch.softmax(logits, dim=-1) + + def forward_cuda( + self, + logits: Union[torch.Tensor, LogitsProcessorOutput], + sampling_info: SamplingBatchInfo, + ): + if isinstance(logits, LogitsProcessorOutput): + logits = logits.next_token_logits + + probs = self._get_probs(logits, sampling_info) if not global_server_args_dict["disable_flashinfer_sampling"]: max_top_k_round, batch_size = 32, probs.shape[0] uniform_samples = torch.rand( (max_top_k_round, batch_size), device=probs.device ) - if sampling_info.min_ps.any(): + if sampling_info.need_min_p_sampling: probs = top_k_renorm_prob(probs, sampling_info.top_ks) probs = top_p_renorm_prob(probs, sampling_info.top_ps) batch_next_token_ids, success = min_p_sampling_from_probs( @@ -55,18 +100,23 @@ def forward_cuda(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): probs, sampling_info.top_ks, sampling_info.top_ps, sampling_info.min_ps ) - if not torch.all(success): - logging.warning("Sampling failed, fallback to top_k=1 strategy") - probs = probs.masked_fill(torch.isnan(probs), 0.0) - argmax_ids = torch.argmax(probs, dim=-1) - batch_next_token_ids = torch.where( - success, batch_next_token_ids, argmax_ids - ) + return SampleOutput(success, probs, batch_next_token_ids) - return batch_next_token_ids + def forward_native( + self, + logits: Union[torch.Tensor, LogitsProcessorOutput], + sampling_info: SamplingBatchInfo, + ): + if isinstance(logits, LogitsProcessorOutput): + logits = logits.next_token_logits + + probs = self._get_probs(logits, sampling_info, is_torch_compile=True) + + batch_next_token_ids, success = top_k_top_p_min_p_sampling_from_probs_torch( + probs, sampling_info.top_ks, sampling_info.top_ps, sampling_info.min_ps + ) - def forward_native(): - raise NotImplementedError("Native forward is not implemented yet.") + return SampleOutput(success, probs, batch_next_token_ids) def top_k_top_p_min_p_sampling_from_probs_torch( @@ -87,7 +137,10 @@ def top_k_top_p_min_p_sampling_from_probs_torch( probs_sort[probs_sort < min_p_thresholds.view(-1, 1)] = 0.0 probs_sort.div_(probs_sort.max(dim=-1, keepdim=True)[0]) try: - sampled_index = torch.multinomial(probs_sort, num_samples=1) + # FIXME: torch.multiomial does not support num_samples = 1 + sampled_index = torch.multinomial(probs_sort, num_samples=2, replacement=True)[ + :, :1 + ] except RuntimeError as e: logger.warning(f"Sampling error: {e}") batch_next_token_ids = torch.zeros( diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index e61f13cb9d7..dfd32dea9c3 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ Copyright 2023-2024 SGLang Team Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +19,7 @@ import logging from dataclasses import dataclass -from typing import List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union import torch @@ -29,6 +31,10 @@ from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo +if TYPE_CHECKING: + from sglang.srt.layers.sampler import SampleOutput + + INIT_INCREMENTAL_DETOKENIZATION_OFFSET = 5 # Put some global args for easy access @@ -671,11 +677,17 @@ def merge(self, other: "ScheduleBatch"): self.top_logprobs_nums.extend(other.top_logprobs_nums) self.return_logprob = any(req.return_logprob for req in self.reqs) - def sample(self, logits: torch.Tensor): - from sglang.srt.layers.sampler import Sampler - - sampler = Sampler() - - batch_next_token_ids = sampler(logits, self.sampling_info) + def check_sample_results(self, sample_output: SampleOutput): + if not torch.all(sample_output.success): + probs = sample_output.probs + batch_next_token_ids = sample_output.batch_next_token_ids + logging.warning("Sampling failed, fallback to top_k=1 strategy") + probs = probs.masked_fill(torch.isnan(probs), 0.0) + argmax_ids = torch.argmax(probs, dim=-1) + batch_next_token_ids = torch.where( + sample_output.success, batch_next_token_ids, argmax_ids + ) + sample_output.probs = probs + sample_output.batch_next_token_ids = batch_next_token_ids - return batch_next_token_ids + return sample_output.batch_next_token_ids diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 9820e0302e1..ddf20970e71 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -31,7 +31,7 @@ from sglang.srt.constrained.fsm_cache import FSMCache from sglang.srt.constrained.jump_forward import JumpForwardCache from sglang.srt.hf_transformers_utils import get_processor, get_tokenizer -from sglang.srt.layers.logits_processor import LogitProcessorOutput +from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.managers.io_struct import ( AbortReq, BatchEmbeddingOut, @@ -486,21 +486,29 @@ def forward_prefill_batch(self, batch: ScheduleBatch): if self.model_runner.is_generation: # Forward and sample the next tokens if batch.extend_num_tokens != 0: - output = self.model_runner.forward(batch, ForwardMode.EXTEND) - next_token_ids = batch.sample(output.next_token_logits) + sample_output, logits_output = self.model_runner.forward( + batch, ForwardMode.EXTEND + ) + next_token_ids = batch.check_sample_results(sample_output) batch.sampling_info.penalizer_orchestrator.cumulate_output_tokens( next_token_ids ) # Move logprobs to cpu - if output.next_token_logprobs is not None: - output.next_token_logprobs = output.next_token_logprobs[ - torch.arange(len(next_token_ids), device=next_token_ids.device), - next_token_ids, - ].tolist() - output.input_token_logprobs = output.input_token_logprobs.tolist() - output.normalized_prompt_logprobs = ( - output.normalized_prompt_logprobs.tolist() + if logits_output.next_token_logprobs is not None: + logits_output.next_token_logprobs = ( + logits_output.next_token_logprobs[ + torch.arange( + len(next_token_ids), device=next_token_ids.device + ), + next_token_ids, + ].tolist() + ) + logits_output.input_token_logprobs = ( + logits_output.input_token_logprobs.tolist() + ) + logits_output.normalized_prompt_logprobs = ( + logits_output.normalized_prompt_logprobs.tolist() ) next_token_ids = next_token_ids.tolist() @@ -539,12 +547,14 @@ def forward_prefill_batch(self, batch: ScheduleBatch): self.req_to_token_pool.free(req.req_pool_idx) if req.return_logprob: - self.add_logprob_return_values(i, req, pt, next_token_ids, output) + self.add_logprob_return_values( + i, req, pt, next_token_ids, logits_output + ) pt += req.extend_input_len else: assert batch.extend_num_tokens != 0 - output = self.model_runner.forward(batch, ForwardMode.EXTEND) - embeddings = output.embeddings.tolist() + logits_output = self.model_runner.forward(batch, ForwardMode.EXTEND) + embeddings = logits_output.embeddings.tolist() # Check finish conditions for i, req in enumerate(batch.reqs): @@ -572,7 +582,7 @@ def add_logprob_return_values( req: Req, pt: int, next_token_ids: List[int], - output: LogitProcessorOutput, + output: LogitsProcessorOutput, ): if req.normalized_prompt_logprob is None: req.normalized_prompt_logprob = output.normalized_prompt_logprobs[i] @@ -654,15 +664,17 @@ def forward_decode_batch(self, batch: ScheduleBatch): batch.prepare_for_decode() # Forward and sample the next tokens - output = self.model_runner.forward(batch, ForwardMode.DECODE) - next_token_ids = batch.sample(output.next_token_logits) + sample_output, logits_output = self.model_runner.forward( + batch, ForwardMode.DECODE + ) + next_token_ids = batch.check_sample_results(sample_output) batch.sampling_info.penalizer_orchestrator.cumulate_output_tokens( next_token_ids ) # Move logprobs to cpu - if output.next_token_logprobs is not None: - next_token_logprobs = output.next_token_logprobs[ + if logits_output.next_token_logprobs is not None: + next_token_logprobs = logits_output.next_token_logprobs[ torch.arange(len(next_token_ids), device=next_token_ids.device), next_token_ids, ].tolist() @@ -688,7 +700,7 @@ def forward_decode_batch(self, batch: ScheduleBatch): (next_token_logprobs[i], next_token_id) ) if req.top_logprobs_num > 0: - req.output_top_logprobs.append(output.output_top_logprobs[i]) + req.output_top_logprobs.append(logits_output.output_top_logprobs[i]) self.handle_finished_requests(batch) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index d045be56d84..96c15849e4e 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -25,16 +25,18 @@ from vllm.model_executor.custom_op import CustomOp from sglang.srt.layers.logits_processor import ( - LogitProcessorOutput, LogitsMetadata, LogitsProcessor, + LogitsProcessorOutput, ) +from sglang.srt.layers.sampler import SampleOutput from sglang.srt.managers.schedule_batch import ScheduleBatch from sglang.srt.model_executor.forward_batch_info import ( ForwardMode, InputMetadata, update_flashinfer_indices, ) +from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo from sglang.srt.utils import monkey_patch_vllm_all_gather @@ -143,6 +145,10 @@ def __init__( self.flashinfer_kv_indices.clone(), ] + # Sampling inputs + vocab_size = model_runner.model_config.vocab_size + self.sampling_info = SamplingBatchInfo.dummy_one(self.max_bs, vocab_size) + self.compile_bs = [1, 2, 4, 8, 16, 24, 32] if use_torch_compile else [] if use_torch_compile: @@ -234,6 +240,7 @@ def capture_one_batch_size(self, bs, forward): def run_once(): input_metadata = InputMetadata( forward_mode=ForwardMode.DECODE, + sampling_info=self.sampling_info[:bs], batch_size=bs, req_pool_indices=req_pool_indices, seq_lens=seq_lens, @@ -298,27 +305,35 @@ def replay(self, batch: ScheduleBatch): self.flashinfer_handlers[bs], ) + # Sampling inputs + self.sampling_info.inplace_assign(raw_bs, batch.sampling_info) + # Replay torch.cuda.synchronize() self.graphs[bs].replay() torch.cuda.synchronize() - output = self.output_buffers[bs] + sample_output, logits_output = self.output_buffers[bs] # Unpad if bs != raw_bs: - output = LogitProcessorOutput( - next_token_logits=output.next_token_logits[:raw_bs], + logits_output = LogitsProcessorOutput( + next_token_logits=logits_output.next_token_logits[:raw_bs], next_token_logprobs=None, normalized_prompt_logprobs=None, input_token_logprobs=None, input_top_logprobs=None, output_top_logprobs=None, ) + sample_output = SampleOutput( + sample_output.success[:raw_bs], + sample_output.probs[:raw_bs], + sample_output.batch_next_token_ids[:raw_bs], + ) # Extract logprobs if batch.return_logprob: - output.next_token_logprobs = torch.nn.functional.log_softmax( - output.next_token_logits, dim=-1 + logits_output.next_token_logprobs = torch.nn.functional.log_softmax( + logits_output.next_token_logits, dim=-1 ) return_top_logprob = any(x > 0 for x in batch.top_logprobs_nums) if return_top_logprob: @@ -326,8 +341,8 @@ def replay(self, batch: ScheduleBatch): forward_mode=ForwardMode.DECODE, top_logprobs_nums=batch.top_logprobs_nums, ) - output.output_top_logprobs = LogitsProcessor.get_top_logprobs( - output.next_token_logprobs, logits_metadata + logits_output.output_top_logprobs = LogitsProcessor.get_top_logprobs( + logits_output.next_token_logprobs, logits_metadata )[1] - return output + return sample_output, logits_output diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index c107b3bc826..e8849962b07 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ Copyright 2023-2024 SGLang Team Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,7 +18,7 @@ """ModelRunner runs the forward passes of the models.""" from dataclasses import dataclass from enum import IntEnum, auto -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, List import numpy as np import torch @@ -26,6 +28,7 @@ if TYPE_CHECKING: from sglang.srt.model_executor.model_runner import ModelRunner + from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo class ForwardMode(IntEnum): @@ -42,6 +45,7 @@ class InputMetadata: """Store all inforamtion of a forward pass.""" forward_mode: ForwardMode + sampling_info: SamplingBatchInfo batch_size: int req_pool_indices: torch.Tensor seq_lens: torch.Tensor @@ -179,6 +183,7 @@ def from_schedule_batch( ): ret = cls( forward_mode=forward_mode, + sampling_info=batch.sampling_info, batch_size=batch.batch_size(), req_pool_indices=batch.req_pool_indices, seq_lens=batch.seq_lens, @@ -189,6 +194,8 @@ def from_schedule_batch( top_logprobs_nums=batch.top_logprobs_nums, ) + ret.sampling_info.prepare_penalties() + ret.compute_positions(batch) ret.compute_extend_infos(batch) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index fecfc2b4309..eb6fe319f91 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -21,7 +21,7 @@ import logging import pkgutil from functools import lru_cache -from typing import Optional, Type +from typing import Optional, Tuple, Type import torch import torch.nn as nn @@ -44,6 +44,8 @@ from vllm.model_executor.models import ModelRegistry from sglang.global_config import global_config +from sglang.srt.layers.logits_processor import LogitsProcessorOutput +from sglang.srt.layers.sampler import SampleOutput from sglang.srt.managers.schedule_batch import ScheduleBatch, global_server_args_dict from sglang.srt.mem_cache.memory_pool import ( MHATokenToKVPool, @@ -514,7 +516,11 @@ def init_cuda_graphs(self): @torch.inference_mode() def forward_decode(self, batch: ScheduleBatch): - if self.cuda_graph_runner and self.cuda_graph_runner.can_run(len(batch.reqs)): + if ( + self.cuda_graph_runner + and self.cuda_graph_runner.can_run(len(batch.reqs)) + and not batch.sampling_info.has_bias() + ): return self.cuda_graph_runner.replay(batch) input_metadata = InputMetadata.from_schedule_batch( @@ -563,7 +569,9 @@ def forward_extend_multi_modal(self, batch: ScheduleBatch): input_metadata.image_offsets, ) - def forward(self, batch: ScheduleBatch, forward_mode: ForwardMode): + def forward( + self, batch: ScheduleBatch, forward_mode: ForwardMode + ) -> Tuple[SampleOutput, LogitsProcessorOutput]: if self.is_multimodal_model and forward_mode == ForwardMode.EXTEND: return self.forward_extend_multi_modal(batch) elif forward_mode == ForwardMode.DECODE: diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py index 0a22f994bb4..1c189eebbc0 100644 --- a/python/sglang/srt/models/chatglm.py +++ b/python/sglang/srt/models/chatglm.py @@ -31,20 +31,18 @@ ) from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope -from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, ) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata -from vllm.sequence import SamplerOutput from vllm.transformers_utils.configs import ChatGLMConfig from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata LoraConfig = None @@ -383,17 +381,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - - def sample( - self, - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, - ) -> Optional[SamplerOutput]: - next_tokens = self.sampler(logits, sampling_metadata) - return next_tokens + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) diff --git a/python/sglang/srt/models/commandr.py b/python/sglang/srt/models/commandr.py index f6d6f6e1f94..c360106f97c 100644 --- a/python/sglang/srt/models/commandr.py +++ b/python/sglang/srt/models/commandr.py @@ -64,6 +64,7 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -326,6 +327,7 @@ def __init__( self.config = config self.quant_config = quant_config self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() self.model = CohereModel(config, quant_config) @torch.no_grad() @@ -340,9 +342,11 @@ def forward( positions, input_metadata, ) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/dbrx.py b/python/sglang/srt/models/dbrx.py index 39ac4aefa72..b3a76b56ae2 100644 --- a/python/sglang/srt/models/dbrx.py +++ b/python/sglang/srt/models/dbrx.py @@ -45,6 +45,7 @@ from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -382,6 +383,7 @@ def __init__( padding_size=DEFAULT_VOCAB_PADDING_SIZE, ) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -391,9 +393,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): expert_params_mapping = [ diff --git a/python/sglang/srt/models/deepseek.py b/python/sglang/srt/models/deepseek.py index 59fd1ec7ed8..b939602c1ba 100644 --- a/python/sglang/srt/models/deepseek.py +++ b/python/sglang/srt/models/deepseek.py @@ -46,6 +46,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -385,6 +386,7 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -394,9 +396,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 13dd477392e..15ecf4bb66b 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -45,6 +45,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -632,6 +633,7 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() def forward( self, @@ -640,9 +642,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/gemma.py b/python/sglang/srt/models/gemma.py index 990937f5180..61cc5c66ea5 100644 --- a/python/sglang/srt/models/gemma.py +++ b/python/sglang/srt/models/gemma.py @@ -37,6 +37,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -287,6 +288,7 @@ def __init__( self.quant_config = quant_config self.model = GemmaModel(config, quant_config=quant_config) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -297,9 +299,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return (sample_output, logits_output) def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/gemma2.py b/python/sglang/srt/models/gemma2.py index c6dbc7e5569..fabf86b498e 100644 --- a/python/sglang/srt/models/gemma2.py +++ b/python/sglang/srt/models/gemma2.py @@ -41,6 +41,7 @@ from sglang.srt.layers.activation import GeluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -396,6 +397,7 @@ def __init__( self.quant_config = quant_config self.model = Gemma2Model(config, cache_config, quant_config) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -406,9 +408,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def get_attention_sliding_window_size(self): return get_attention_sliding_window_size(self.config) diff --git a/python/sglang/srt/models/gpt_bigcode.py b/python/sglang/srt/models/gpt_bigcode.py index 9a9e2aec3a7..979d06886e3 100644 --- a/python/sglang/srt/models/gpt_bigcode.py +++ b/python/sglang/srt/models/gpt_bigcode.py @@ -35,6 +35,7 @@ from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -261,6 +262,7 @@ def __init__( if lora_config: self.unpadded_vocab_size += lora_config.lora_extra_vocab_size self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -270,9 +272,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) diff --git a/python/sglang/srt/models/grok.py b/python/sglang/srt/models/grok.py index 4a0a08bf88b..85a89ca3edc 100644 --- a/python/sglang/srt/models/grok.py +++ b/python/sglang/srt/models/grok.py @@ -46,6 +46,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -297,6 +298,7 @@ def __init__( self.model = Grok1Model(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() # Monkey patch _prepare_weights to load pre-sharded weights setattr(DefaultModelLoader, "_prepare_weights", _prepare_presharded_weights) @@ -313,9 +315,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/internlm2.py b/python/sglang/srt/models/internlm2.py index f2947e991b5..c0e4d19e128 100644 --- a/python/sglang/srt/models/internlm2.py +++ b/python/sglang/srt/models/internlm2.py @@ -40,6 +40,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -262,6 +263,7 @@ def __init__( self.model = InternLM2Model(config, quant_config) self.output = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -272,9 +274,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.output.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py index 9de8d33c5c1..42e96123035 100644 --- a/python/sglang/srt/models/llama2.py +++ b/python/sglang/srt/models/llama2.py @@ -39,8 +39,9 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm -from sglang.srt.layers.logits_processor import LogitProcessorOutput, LogitsProcessor +from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -302,6 +303,7 @@ def __init__( self.model = LlamaModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -310,11 +312,13 @@ def forward( positions: torch.Tensor, input_metadata: InputMetadata, input_embeds: torch.Tensor = None, - ) -> LogitProcessorOutput: + ) -> LogitsProcessorOutput: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def get_module_name(self, name): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/llama_classification.py b/python/sglang/srt/models/llama_classification.py index 02224971d6a..fdf6d28e556 100644 --- a/python/sglang/srt/models/llama_classification.py +++ b/python/sglang/srt/models/llama_classification.py @@ -24,7 +24,7 @@ from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from sglang.srt.layers.logits_processor import LogitProcessorOutput +from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.model_executor.forward_batch_info import InputMetadata from sglang.srt.models.llama2 import LlamaModel @@ -65,7 +65,7 @@ def forward( (input_metadata.batch_size, self.config.classification_out_size) ).to(input_ids.device) - return LogitProcessorOutput( + return LogitsProcessorOutput( next_token_logits=scores, next_token_logprobs=scores, normalized_prompt_logprobs=scores, diff --git a/python/sglang/srt/models/minicpm.py b/python/sglang/srt/models/minicpm.py index 49ff1926f39..0028ae67a8c 100644 --- a/python/sglang/srt/models/minicpm.py +++ b/python/sglang/srt/models/minicpm.py @@ -39,6 +39,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -297,6 +298,7 @@ def __init__( self.scale_width = self.config.hidden_size / self.config.dim_model_base self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -314,9 +316,11 @@ def forward( lm_head_weight = self.model.embed_tokens.weight else: lm_head_weight = self.lm_head.weight - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, lm_head_weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py index d11f6c95198..ca38cb03bae 100644 --- a/python/sglang/srt/models/mixtral.py +++ b/python/sglang/srt/models/mixtral.py @@ -41,6 +41,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -299,6 +300,7 @@ def __init__( self.model = MixtralModel(config, quant_config=quant_config, prefix="model") self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() def forward( self, @@ -308,9 +310,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/mixtral_quant.py b/python/sglang/srt/models/mixtral_quant.py index b02e925c5a0..97ac09ee629 100644 --- a/python/sglang/srt/models/mixtral_quant.py +++ b/python/sglang/srt/models/mixtral_quant.py @@ -45,6 +45,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -333,6 +334,7 @@ def __init__( self.model = MixtralModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -343,9 +345,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/qwen.py b/python/sglang/srt/models/qwen.py index 93dae9585c3..4958a812985 100644 --- a/python/sglang/srt/models/qwen.py +++ b/python/sglang/srt/models/qwen.py @@ -39,6 +39,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -251,6 +252,7 @@ def __init__( vocab_size = ((config.vocab_size + 63) // 64) * 64 self.lm_head = ParallelLMHead(vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -260,10 +262,11 @@ def forward( input_metadata: InputMetadata, ): hidden_states = self.transformer(input_ids, positions, input_metadata) - next_tokens = self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - return next_tokens + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py index fcf083e1b5d..76094b907a7 100644 --- a/python/sglang/srt/models/qwen2.py +++ b/python/sglang/srt/models/qwen2.py @@ -38,8 +38,9 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor -from sglang.srt.layers.pooler import EmbeddingPoolerOutput, Pooler, PoolingType +from sglang.srt.layers.pooler import Pooler, PoolingType from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata Qwen2Config = None @@ -276,6 +277,7 @@ def __init__( self.model = Qwen2Model(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True) @torch.no_grad() @@ -289,9 +291,11 @@ def forward( ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) if not get_embedding: - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output else: return self.pooler(hidden_states, input_metadata) diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py index 9bdbd750660..e08695bc61a 100644 --- a/python/sglang/srt/models/qwen2_moe.py +++ b/python/sglang/srt/models/qwen2_moe.py @@ -35,10 +35,8 @@ ReplicatedLinear, RowParallelLinear, ) -from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope -from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, @@ -49,6 +47,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -366,6 +365,7 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -376,20 +376,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - - def compute_logits( - self, - input_ids: torch.Tensor, - hidden_states: torch.Tensor, - input_metadata: InputMetadata, - ) -> torch.Tensor: - logits = self.logits_processor( - input_ids, hidden_states, self.lm_head.weight, input_metadata - ) - return logits + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/stablelm.py b/python/sglang/srt/models/stablelm.py index 9e10f12f2a2..a3102baabd4 100644 --- a/python/sglang/srt/models/stablelm.py +++ b/python/sglang/srt/models/stablelm.py @@ -40,6 +40,7 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -249,6 +250,7 @@ def __init__( self.model = StableLMEpochModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -259,9 +261,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py index bc70a9018ed..7843f4bd32d 100644 --- a/python/sglang/srt/sampling/sampling_batch_info.py +++ b/python/sglang/srt/sampling/sampling_batch_info.py @@ -21,10 +21,63 @@ class SamplingBatchInfo: top_ps: torch.Tensor = None top_ks: torch.Tensor = None min_ps: torch.Tensor = None - penalizer_orchestrator: penaltylib.BatchedPenalizerOrchestrator = None + + # Dispatch in CUDA graph + need_min_p_sampling: bool = False + + # Bias Tensors logit_bias: torch.Tensor = None vocab_mask: torch.Tensor = None + # Penalizer + penalizer_orchestrator: penaltylib.BatchedPenalizerOrchestrator = None + linear_penalties: torch.Tensor = None + scaling_penalties: torch.Tensor = None + + def has_bias(self): + return ( + self.logit_bias is not None + or self.vocab_mask is not None + or self.linear_penalties is not None + or self.scaling_penalties is not None + ) + + @classmethod + def dummy_one(cls, max_bs: int, vocab_size: int): + ret = cls(vocab_size=vocab_size) + ret.temperatures = torch.ones((max_bs, 1), dtype=torch.float, device="cuda") + ret.top_ps = torch.ones((max_bs,), dtype=torch.float, device="cuda") + ret.top_ks = torch.ones((max_bs,), dtype=torch.int, device="cuda") + ret.min_ps = torch.zeros((max_bs,), dtype=torch.float, device="cuda") + return ret + + def __getitem__(self, key): + if isinstance(key, slice): + # NOTE: We do not use cuda graph when there is bias tensors + assert not self.has_bias() + return SamplingBatchInfo( + vocab_size=self.vocab_size, + temperatures=self.temperatures[key], + top_ps=self.top_ps[key], + top_ks=self.top_ks[key], + min_ps=self.min_ps[key], + need_min_p_sampling=self.need_min_p_sampling, + ) + else: + raise NotImplementedError + + def inplace_assign(self, bs: int, other: SamplingBatchInfo): + # NOTE: We do not use cuda graph when there is bias tensors + assert not self.has_bias() + + self.vocab_size = other.vocab_size + self.need_min_p_sampling = other.need_min_p_sampling + + self.temperatures[:bs] = other.temperatures + self.top_ps[:bs] = other.top_ps + self.top_ks[:bs] = other.top_ks + self.min_ps[:bs] = other.min_ps + @classmethod def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): device = "cuda" @@ -45,6 +98,7 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): ret.min_ps = torch.tensor( [r.sampling_params.min_p for r in reqs], dtype=torch.float, device=device ) + ret.need_min_p_sampling = any(r.sampling_params.min_p > 0 for r in reqs) # Each penalizers will do nothing if they evaluate themselves as not required by looking at # the sampling_params of the requests (See {_is_required()} of each penalizers). So this @@ -72,6 +126,25 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): return ret + def prepare_penalties(self): + self.scaling_penalties = None + self.linear_penalties = None + + for penalizer in self.penalizer_orchestrator.penalizers.values(): + if isinstance(penalizer, penaltylib.BatchedRepetitionPenalizer): + if penalizer.is_prepared(): + self.scaling_penalties = penalizer.cumulated_repetition_penalties + else: + if penalizer.is_prepared(): + if self.linear_penalties is None: + bs = self.penalizer_orchestrator.batch.batch_size() + self.linear_penalties = torch.zeros( + (bs, self.vocab_size), + dtype=torch.float32, + device="cuda", + ) + self.linear_penalties = penalizer.apply(self.linear_penalties) + def update_regex_vocab_mask(self, batch: ScheduleBatch): bs, reqs = batch.batch_size(), batch.reqs device = "cuda" diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 37ed2cf9adc..2d3b0aefa33 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -180,7 +180,7 @@ def __init__( tp_size=tp_size, dtype=get_dtype_str(torch_dtype), port=port, - mem_fraction_static=0.7, + mem_fraction_static=0.69, trust_remote_code=False, is_embedding=not self.is_generation, ) From c5fe11a8e175d48b00b32aafd7412953180314e4 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Tue, 27 Aug 2024 00:28:24 +1000 Subject: [PATCH 26/88] chore: bump v0.2.14 (#1155) --- README.md | 2 +- python/pyproject.toml | 4 ++-- python/sglang/srt/model_executor/model_runner.py | 7 +++---- python/sglang/version.py | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 651108f9e2e..09e3d568692 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ ### Method 2: From source ``` # Use the last release branch -git clone -b v0.2.13 https://github.com/sgl-project/sglang.git +git clone -b v0.2.14 https://github.com/sgl-project/sglang.git cd sglang pip install --upgrade pip diff --git a/python/pyproject.toml b/python/pyproject.toml index 4908ad051fd..4a46adc3fef 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sglang" -version = "0.2.13" +version = "0.2.14" description = "SGLang is yet another fast serving framework for large language models and vision language models." readme = "README.md" requires-python = ">=3.8" @@ -23,7 +23,7 @@ dependencies = [ srt = ["aiohttp", "decord", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow", "psutil", "pydantic", "python-multipart", "torch", "uvicorn", "uvloop", "zmq", - "vllm==0.5.4", "outlines>=0.0.44"] + "vllm==0.5.5", "outlines>=0.0.44"] openai = ["openai>=1.0", "tiktoken"] anthropic = ["anthropic>=0.20.0"] litellm = ["litellm>=1.0.0"] diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index eb6fe319f91..00660611499 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -191,12 +191,11 @@ def load_model(self): self.model = get_model( model_config=self.vllm_model_config, - device_config=self.device_config, load_config=self.load_config, - lora_config=None, - multimodal_config=None, + device_config=self.device_config, parallel_config=None, scheduler_config=None, + lora_config=None, cache_config=None, ) self.sliding_window_size = ( @@ -627,4 +626,4 @@ def load_model_cls_srt(model_arch: str) -> Optional[Type[nn.Module]]: # Monkey patch model loader -setattr(ModelRegistry, "load_model_cls", load_model_cls_srt) +setattr(ModelRegistry, "_try_load_model_cls", load_model_cls_srt) diff --git a/python/sglang/version.py b/python/sglang/version.py index 11ef0928681..f3291e93b7d 100644 --- a/python/sglang/version.py +++ b/python/sglang/version.py @@ -1 +1 @@ -__version__ = "0.2.13" +__version__ = "0.2.14" From 9935f97b3e594e246776466d04134decff1b59ae Mon Sep 17 00:00:00 2001 From: havetc Date: Mon, 26 Aug 2024 18:37:26 +0200 Subject: [PATCH 27/88] [FEAT] JSON constrained support (#1125) Co-authored-by: Yineng Zhang --- docs/en/sampling_params.md | 3 + python/sglang/srt/constrained/fsm_cache.py | 13 ++- python/sglang/srt/constrained/jump_forward.py | 1 + python/sglang/srt/managers/schedule_batch.py | 7 ++ python/sglang/srt/managers/tp_worker.py | 21 +++- python/sglang/srt/openai_api/adapter.py | 2 + python/sglang/srt/openai_api/protocol.py | 2 + python/sglang/srt/sampling/sampling_params.py | 4 + test/srt/run_suite.py | 1 + test/srt/test_json_constrained.py | 96 +++++++++++++++++++ 10 files changed, 147 insertions(+), 3 deletions(-) create mode 100644 test/srt/test_json_constrained.py diff --git a/docs/en/sampling_params.md b/docs/en/sampling_params.md index 54b03bf3254..0e1c13e4bdf 100644 --- a/docs/en/sampling_params.md +++ b/docs/en/sampling_params.md @@ -60,6 +60,9 @@ spaces_between_special_tokens: bool = True, regex: Optional[str] = None, # Do parallel sampling and return `n` outputs. n: int = 1, +# Constrains the output to follow a given JSON schema. +# `regex` and `json_schema` cannot be set at the same time. +json_schema: Optional[str] = None, ## Penalties. See [Performance Implications on Penalties] section below for more informations. diff --git a/python/sglang/srt/constrained/fsm_cache.py b/python/sglang/srt/constrained/fsm_cache.py index fa41f90de3c..6bc6ea6d265 100644 --- a/python/sglang/srt/constrained/fsm_cache.py +++ b/python/sglang/srt/constrained/fsm_cache.py @@ -15,6 +15,8 @@ """Cache for the compressed finite state machine.""" +from outlines.fsm.json_schema import build_regex_from_schema + from sglang.srt.constrained import RegexGuide, TransformerTokenizer from sglang.srt.constrained.base_tool_cache import BaseToolCache @@ -26,9 +28,12 @@ def __init__( tokenizer_args_dict, enable=True, skip_tokenizer_init=False, + json_schema_mode=False, ): super().__init__(enable=enable) + self.json_schema_mode = json_schema_mode + if ( skip_tokenizer_init or tokenizer_path.endswith(".json") @@ -72,5 +77,9 @@ def fset(self, value): tokenizer_path, **tokenizer_args_dict ) - def init_value(self, regex): - return RegexGuide(regex, self.outlines_tokenizer) + def init_value(self, value): + if self.json_schema_mode: + regex = build_regex_from_schema(value) + return RegexGuide(regex, self.outlines_tokenizer), regex + else: + return RegexGuide(value, self.outlines_tokenizer) diff --git a/python/sglang/srt/constrained/jump_forward.py b/python/sglang/srt/constrained/jump_forward.py index b00c48d4784..244931e0509 100644 --- a/python/sglang/srt/constrained/jump_forward.py +++ b/python/sglang/srt/constrained/jump_forward.py @@ -23,6 +23,7 @@ import interegular import outlines.caching +from outlines.fsm.json_schema import build_regex_from_schema from sglang.srt.constrained import ( FSMInfo, diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index dfd32dea9c3..cc180ba21bc 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -268,7 +268,14 @@ def jump_forward_and_retokenize(self, jump_forward_str, next_state): all_text = self.origin_input_text + self.decoded_text + jump_forward_str all_ids = self.tokenizer.encode(all_text) + if not all_ids: + warnings.warn("Encoded all_text resulted in empty all_ids") + return False + prompt_tokens = len(self.origin_input_ids_unpadded) + if prompt_tokens > len(all_ids): + warnings.warn("prompt_tokens is larger than encoded all_ids") + return False if all_ids[prompt_tokens - 1] != self.origin_input_ids_unpadded[-1]: # TODO(lsyin): fix token fusion diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index ddf20970e71..127f71900ae 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -197,6 +197,16 @@ def __init__( "trust_remote_code": server_args.trust_remote_code, }, skip_tokenizer_init=server_args.skip_tokenizer_init, + json_schema_mode=False, + ) + self.json_fsm_cache = FSMCache( + server_args.tokenizer_path, + { + "tokenizer_mode": server_args.tokenizer_mode, + "trust_remote_code": server_args.trust_remote_code, + }, + skip_tokenizer_init=server_args.skip_tokenizer_init, + json_schema_mode=True, ) self.jump_forward_cache = JumpForwardCache() @@ -349,8 +359,17 @@ def handle_generate_request( req.top_logprobs_num = recv_req.top_logprobs_num req.stream = recv_req.stream + # Init regex fsm fron json + if req.sampling_params.json_schema is not None: + req.regex_fsm, computed_regex_string = self.json_fsm_cache.query( + req.sampling_params.json_schema + ) + if not self.disable_regex_jump_forward: + req.jump_forward_map = self.jump_forward_cache.query( + computed_regex_string + ) # Init regex fsm - if req.sampling_params.regex is not None: + elif req.sampling_params.regex is not None: req.regex_fsm = self.regex_fsm_cache.query(req.sampling_params.regex) if not self.disable_regex_jump_forward: req.jump_forward_map = self.jump_forward_cache.query( diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index f325e84b2f9..148f2689d52 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -434,6 +434,7 @@ def v1_generate_request(all_requests: List[CompletionRequest]): "frequency_penalty": request.frequency_penalty, "repetition_penalty": request.repetition_penalty, "regex": request.regex, + "json_schema": request.json_schema, "n": request.n, "ignore_eos": request.ignore_eos, } @@ -802,6 +803,7 @@ def v1_chat_generate_request( "frequency_penalty": request.frequency_penalty, "repetition_penalty": request.repetition_penalty, "regex": request.regex, + "json_schema": request.json_schema, "n": request.n, } ) diff --git a/python/sglang/srt/openai_api/protocol.py b/python/sglang/srt/openai_api/protocol.py index 758e48edefb..ce51e1c029f 100644 --- a/python/sglang/srt/openai_api/protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -161,6 +161,7 @@ class CompletionRequest(BaseModel): # Extra parameters for SRT backend only and will be ignored by OpenAI models. regex: Optional[str] = None + json_schema: Optional[str] = None ignore_eos: Optional[bool] = False min_tokens: Optional[int] = 0 repetition_penalty: Optional[float] = 1.0 @@ -262,6 +263,7 @@ class ChatCompletionRequest(BaseModel): # Extra parameters for SRT backend only and will be ignored by OpenAI models. regex: Optional[str] = None + json_schema: Optional[str] = None min_tokens: Optional[int] = 0 repetition_penalty: Optional[float] = 1.0 stop_token_ids: Optional[List[int]] = Field(default_factory=list) diff --git a/python/sglang/srt/sampling/sampling_params.py b/python/sglang/srt/sampling/sampling_params.py index c30717dd7cb..8111757d85e 100644 --- a/python/sglang/srt/sampling/sampling_params.py +++ b/python/sglang/srt/sampling/sampling_params.py @@ -39,6 +39,7 @@ def __init__( spaces_between_special_tokens: bool = True, regex: Optional[str] = None, n: int = 1, + json_schema: Optional[str] = None, ) -> None: self.temperature = temperature self.top_p = top_p @@ -56,6 +57,7 @@ def __init__( self.spaces_between_special_tokens = spaces_between_special_tokens self.regex = regex self.n = n + self.json_schema = json_schema # Process some special cases if self.temperature < _SAMPLING_EPS: @@ -106,6 +108,8 @@ def verify(self): f"min_new_tokens must be in (0, max_new_tokens({self.max_new_tokens})], got " f"{self.min_new_tokens}." ) + if self.regex is not None and self.json_schema is not None: + raise ValueError("regex and json_schema cannot be both set.") def normalize(self, tokenizer): # Process stop strings diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 2351579f190..cafcf3f2d59 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -13,6 +13,7 @@ "test_eval_accuracy_mini.py", "test_large_max_new_tokens.py", "test_openai_server.py", + "test_json_constrained.py", "test_skip_tokenizer_init.py", "test_torch_compile.py", "test_triton_attn_backend.py", diff --git a/test/srt/test_json_constrained.py b/test/srt/test_json_constrained.py new file mode 100644 index 00000000000..5393ecc33ca --- /dev/null +++ b/test/srt/test_json_constrained.py @@ -0,0 +1,96 @@ +import json +import unittest + +import openai +import requests + +from sglang.srt.utils import kill_child_process +from sglang.test.test_utils import ( + DEFAULT_MODEL_NAME_FOR_TEST, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +class TestJSONConstrained(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.model = DEFAULT_MODEL_NAME_FOR_TEST + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.json_schema = json.dumps( + { + "type": "object", + "properties": { + "name": {"type": "string", "pattern": "^[\\w]+$"}, + "population": {"type": "integer"}, + }, + "required": ["name", "population"], + } + ) + cls.process = popen_launch_server( + cls.model, cls.base_url, timeout=300, api_key=cls.api_key + ) + + @classmethod + def tearDownClass(cls): + kill_child_process(cls.process.pid) + + def run_decode(self, return_logprob=False, top_logprobs_num=0, n=1): + headers = {"Authorization": f"Bearer {self.api_key}"} + response = requests.post( + self.base_url + "/generate", + json={ + "text": "The capital of France is", + "sampling_params": { + "temperature": 0 if n == 1 else 0.5, + "max_new_tokens": 128, + "n": n, + "stop_token_ids": [119690], + "json_schema": self.json_schema, + }, + "stream": False, + "return_logprob": return_logprob, + "top_logprobs_num": top_logprobs_num, + "logprob_start_len": 0, + }, + headers=headers, + ) + print(json.dumps(response.json())) + print("=" * 100) + try: + js_obj = json.loads(response.json()["text"]) + except (TypeError, json.decoder.JSONDecodeError): + raise + assert isinstance(js_obj["name"], str) + assert isinstance(js_obj["population"], int) + + def test_json_generate(self): + self.run_decode() + + def test_json_openai(self): + client = openai.Client(api_key=self.api_key, base_url=f"{self.base_url}/v1") + + response = client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": "You are a helpful AI assistant"}, + {"role": "user", "content": "Introduce the capital of France."}, + ], + temperature=0, + max_tokens=128, + extra_body={"json_schema": self.json_schema}, + ) + text = response.choices[0].message.content + + try: + js_obj = json.loads(text) + except (TypeError, json.decoder.JSONDecodeError): + print("JSONDecodeError", text) + raise + assert isinstance(js_obj["name"], str) + assert isinstance(js_obj["population"], int) + + +if __name__ == "__main__": + unittest.main() From c61a1b6f97c61ebd80bada10c60c8ab75d2745b9 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Mon, 26 Aug 2024 13:52:58 -0700 Subject: [PATCH 28/88] Torch compile CI throughput test (#1223) --- test/srt/test_torch_compile.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/test/srt/test_torch_compile.py b/test/srt/test_torch_compile.py index 26daf4fa578..e8cafa15d25 100644 --- a/test/srt/test_torch_compile.py +++ b/test/srt/test_torch_compile.py @@ -1,6 +1,8 @@ import unittest from types import SimpleNamespace +import requests + from sglang.srt.utils import kill_child_process from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( @@ -20,7 +22,7 @@ def setUpClass(cls): cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=["--enable-torch-compile"], + other_args=["--enable-torch-compile", "--disable-radix-cache"], ) @classmethod @@ -39,6 +41,33 @@ def test_mmlu(self): metrics = run_eval(args) assert metrics["score"] >= 0.6 + def run_decode(self, max_new_tokens): + response = requests.post( + self.base_url + "/generate", + json={ + "text": "The capital of France is", + "sampling_params": { + "temperature": 0, + "max_new_tokens": max_new_tokens, + }, + "ignore_eos": True, + }, + ) + return response.json() + + def test_throughput(self): + import time + + max_tokens = 256 + + tic = time.time() + res = self.run_decode(max_tokens) + tok = time.time() + print(res["text"]) + throughput = max_tokens / (tok - tic) + print(f"Throughput: {throughput} tokens/s") + assert throughput >= 152 + if __name__ == "__main__": unittest.main() From 2f1d92834f41df42e266ed6d7036b4add906d21f Mon Sep 17 00:00:00 2001 From: caiyueliang <393900414@qq.com> Date: Tue, 27 Aug 2024 07:28:26 +0800 Subject: [PATCH 29/88] [FEAT] Support batches cancel (#1222) Co-authored-by: Yineng Zhang --- python/sglang/srt/openai_api/adapter.py | 87 +++++++++++++++++++++++-- python/sglang/srt/server.py | 7 ++ test/srt/test_openai_server.py | 34 +++++++++- 3 files changed, 122 insertions(+), 6 deletions(-) diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 148f2689d52..4feb632b0b8 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -275,10 +275,12 @@ async def process_batch(tokenizer_manager, batch_id: str, batch_request: BatchRe end_point = batch_storage[batch_id].endpoint file_request_list = [] all_requests = [] + request_ids = [] for line in lines: request_data = json.loads(line) file_request_list.append(request_data) body = request_data["body"] + request_ids.append(request_data["custom_id"]) # Although streaming is supported for standalone completions, it is not supported in # batch mode (multiple completions in single request). @@ -289,12 +291,16 @@ async def process_batch(tokenizer_manager, batch_id: str, batch_request: BatchRe all_requests.append(ChatCompletionRequest(**body)) elif end_point == "/v1/completions": all_requests.append(CompletionRequest(**body)) + if end_point == "/v1/chat/completions": adapted_request, request = v1_chat_generate_request( - all_requests, tokenizer_manager + all_requests, tokenizer_manager, request_ids=request_ids ) elif end_point == "/v1/completions": - adapted_request, request = v1_generate_request(all_requests) + adapted_request, request = v1_generate_request( + all_requests, request_ids=request_ids + ) + try: ret = await tokenizer_manager.generate_request(adapted_request).__anext__() if not isinstance(ret, list): @@ -326,6 +332,7 @@ async def process_batch(tokenizer_manager, batch_id: str, batch_request: BatchRe } all_ret.append(response_json) completed_requests += 1 + # Write results to a new file output_file_id = f"backend_result_file-{uuid.uuid4()}" global storage_dir @@ -372,6 +379,72 @@ async def v1_retrieve_batch(batch_id: str): return batch_response +async def v1_cancel_batch(tokenizer_manager, batch_id: str): + # Retrieve the batch job from the in-memory storage + batch_response = batch_storage.get(batch_id) + if batch_response is None: + raise HTTPException(status_code=404, detail="Batch not found") + + # Only do cancal when status is "validating" or "in_progress" + if batch_response.status in ["validating", "in_progress"]: + # Start cancelling the batch asynchronously + asyncio.create_task( + cancel_batch( + tokenizer_manager=tokenizer_manager, + batch_id=batch_id, + input_file_id=batch_response.input_file_id, + ) + ) + + # Update batch status to "cancelling" + batch_response.status = "cancelling" + + return batch_response + else: + raise HTTPException( + status_code=500, + detail=f"Current status is {batch_response.status}, no need to cancel", + ) + + +async def cancel_batch(tokenizer_manager, batch_id: str, input_file_id: str): + try: + # Update the batch status to "cancelling" + batch_storage[batch_id].status = "cancelling" + + # Retrieve the input file content + input_file_request = file_id_request.get(input_file_id) + if not input_file_request: + raise ValueError("Input file not found") + + # Parse the JSONL file and process each request + input_file_path = file_id_storage.get(input_file_id) + with open(input_file_path, "r", encoding="utf-8") as f: + lines = f.readlines() + + file_request_list = [] + request_ids = [] + for line in lines: + request_data = json.loads(line) + file_request_list.append(request_data) + request_ids.append(request_data["custom_id"]) + + # Cancel requests by request_ids + for rid in request_ids: + tokenizer_manager.abort_request(rid=rid) + + retrieve_batch = batch_storage[batch_id] + retrieve_batch.status = "cancelled" + + except Exception as e: + logger.error("error in SGLang:", e) + # Update batch status to "failed" + retrieve_batch = batch_storage[batch_id] + retrieve_batch.status = "failed" + retrieve_batch.failed_at = int(time.time()) + retrieve_batch.errors = {"message": str(e)} + + async def v1_retrieve_file(file_id: str): # Retrieve the batch job from the in-memory storage file_response = file_id_response.get(file_id) @@ -392,7 +465,9 @@ def iter_file(): return StreamingResponse(iter_file(), media_type="application/octet-stream") -def v1_generate_request(all_requests: List[CompletionRequest]): +def v1_generate_request( + all_requests: List[CompletionRequest], request_ids: List[str] = None +): prompts = [] sampling_params_list = [] return_logprobs = [] @@ -464,6 +539,7 @@ def v1_generate_request(all_requests: List[CompletionRequest]): logprob_start_len=logprob_start_lens, return_text_in_logprobs=True, stream=all_requests[0].stream, + rid=request_ids, ) if len(all_requests) == 1: @@ -746,7 +822,9 @@ async def generate_stream_resp(): def v1_chat_generate_request( - all_requests: List[ChatCompletionRequest], tokenizer_manager + all_requests: List[ChatCompletionRequest], + tokenizer_manager, + request_ids: List[str] = None, ): input_ids = [] sampling_params_list = [] @@ -834,6 +912,7 @@ def v1_chat_generate_request( top_logprobs_num=top_logprobs_nums, stream=all_requests[0].stream, return_text_in_logprobs=True, + rid=request_ids, ) if len(all_requests) == 1: return adapted_request, all_requests[0] diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 021f231aa7c..6d1fc9fda7f 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -59,6 +59,7 @@ from sglang.srt.openai_api.adapter import ( load_chat_template_for_openai_api, v1_batches, + v1_cancel_batch, v1_chat_completions, v1_completions, v1_delete_file, @@ -246,6 +247,12 @@ async def openai_v1_batches(raw_request: Request): return await v1_batches(tokenizer_manager, raw_request) +@app.post("/v1/batches/{batch_id}/cancel") +async def cancel_batches(batch_id: str): + # https://platform.openai.com/docs/api-reference/batch/cancel + return await v1_cancel_batch(tokenizer_manager, batch_id) + + @app.get("/v1/batches/{batch_id}") async def retrieve_batch(batch_id: str): return await v1_retrieve_batch(batch_id) diff --git a/test/srt/test_openai_server.py b/test/srt/test_openai_server.py index ce130956de8..cfc65b7e6aa 100644 --- a/test/srt/test_openai_server.py +++ b/test/srt/test_openai_server.py @@ -256,8 +256,7 @@ def run_chat_completion_stream(self, logprobs, parallel_sample_num=1): index, True ), f"index {index} is not found in the response" - def run_batch(self, mode): - client = openai.Client(api_key=self.api_key, base_url=self.base_url) + def _create_batch(self, mode, client): if mode == "completion": input_file_path = "complete_input.jsonl" # write content to input file @@ -333,9 +332,11 @@ def run_batch(self, mode): }, }, ] + with open(input_file_path, "w") as file: for line in content: file.write(json.dumps(line) + "\n") + with open(input_file_path, "rb") as file: uploaded_file = client.files.create(file=file, purpose="batch") if mode == "completion": @@ -348,6 +349,13 @@ def run_batch(self, mode): endpoint=endpoint, completion_window=completion_window, ) + + return batch_job, content + + def run_batch(self, mode): + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + batch_job, content = self._create_batch(mode=mode, client=client) + while batch_job.status not in ["completed", "failed", "cancelled"]: time.sleep(3) print( @@ -371,6 +379,24 @@ def run_batch(self, mode): ] assert len(results) == len(content) + def run_cancel_batch(self, mode): + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + batch_job, _ = self._create_batch(mode=mode, client=client) + + assert batch_job.status not in ["cancelling", "cancelled"] + + batch_job = client.batches.cancel(batch_id=batch_job.id) + assert batch_job.status == "cancelling" + + while batch_job.status not in ["failed", "cancelled"]: + batch_job = client.batches.retrieve(batch_job.id) + print( + f"Batch job status: {batch_job.status}...trying again in 3 seconds..." + ) + time.sleep(3) + + assert batch_job.status == "cancelled" + def test_completion(self): for echo in [False, True]: for logprobs in [None, 5]: @@ -414,6 +440,10 @@ def test_batch(self): for mode in ["completion", "chat"]: self.run_batch(mode) + def test_calcel_batch(self): + for mode in ["completion", "chat"]: + self.run_cancel_batch(mode) + def test_regex(self): client = openai.Client(api_key=self.api_key, base_url=self.base_url) From 5ff25cdf5b1310e83d9e595142b39ae4d7b561e9 Mon Sep 17 00:00:00 2001 From: yichuan~ <73766326+yichuan520030910320@users.noreply.github.com> Date: Mon, 26 Aug 2024 22:04:52 -0700 Subject: [PATCH 30/88] [Minor] add delete test and delete tmp file on ci server (#1227) --- .../usage/openai_parallel_sample.py | 153 ------------------ test/srt/test_openai_server.py | 11 +- 2 files changed, 8 insertions(+), 156 deletions(-) delete mode 100644 examples/frontend_language/usage/openai_parallel_sample.py diff --git a/examples/frontend_language/usage/openai_parallel_sample.py b/examples/frontend_language/usage/openai_parallel_sample.py deleted file mode 100644 index 753e66c744f..00000000000 --- a/examples/frontend_language/usage/openai_parallel_sample.py +++ /dev/null @@ -1,153 +0,0 @@ -import openai - -client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY") - -# Text completion -response = client.completions.create( - model="default", - prompt="I am a robot and I want to study like humans. Now let's tell a story. Once upon a time, there was a little", - n=1, - temperature=0.8, - max_tokens=32, -) -print(response) - - -# Text completion -response = client.completions.create( - model="default", - prompt="I am a robot and I want to study like humans. Now let's tell a story. Once upon a time, there was a little", - n=5, - temperature=0.8, - max_tokens=320, -) -print(response) - - -# Text completion -response = client.completions.create( - model="default", - prompt="I am a robot and I want to study like humans. Now let's tell a story. Once upon a time, there was a little", - n=3, - temperature=0.8, - max_tokens=32, -) -print(response) - - -# Text completion -response = client.completions.create( - model="default", - prompt=["The name of the famous soccer player is"], - n=1, - temperature=0.8, - max_tokens=128, -) -print(response) - - -# Text completion -response = client.completions.create( - model="default", - prompt=["The name of the famous soccer player is ", "The capital of US is"], - n=1, - temperature=0.8, - max_tokens=32, -) -print(response) - - -# Text completion -response = client.completions.create( - model="default", - prompt=["The name of the famous soccer player is ", "The capital of US is"], - n=3, - temperature=0.8, - max_tokens=32, -) -print(response) - - -response = client.completions.create( - model="default", - prompt=[ - "prompt1: I am a robot and I want to learn like humans. Now let's begin a tale. Once upon a time, there was a small", - "prompt2: As a robot, my goal is to understand human learning. Let's start a story. In a faraway land, there lived a tiny", - "prompt3: Being a robot, I aspire to study like people. Let's share a story. Long ago, there was a little", - "prompt4: I am a robot aiming to learn like humans. Let's narrate a story. Once, in a distant kingdom, there was a young", - "prompt5: As a robot, I seek to learn in human ways. Let's tell a story. Once upon a time, in a small village, there was a young", - ], - n=1, - temperature=0.8, - max_tokens=320, -) -print(response) - - -# Text completion -response = client.completions.create( - model="default", - prompt=[ - "The capital of France is", - "The capital of Germany is", - "The capital of US is", - ], - n=3, - temperature=0.8, - max_tokens=32, -) -print(response) - -# Chat completion -response = client.chat.completions.create( - model="default", - messages=[ - {"role": "system", "content": "You are a helpful AI assistant"}, - {"role": "user", "content": "List 3 countries and their capitals."}, - ], - temperature=0.8, - max_tokens=1, - logprobs=True, - top_logprobs=3, -) -print(response) - -# Chat completion -response = client.chat.completions.create( - model="default", - messages=[ - {"role": "system", "content": "You are a helpful AI assistant"}, - {"role": "user", "content": "List 3 countries and their capitals."}, - ], - temperature=0.8, - max_tokens=1, - n=1, -) -print(response) - -# Chat completion -response = client.chat.completions.create( - model="default", - messages=[ - {"role": "system", "content": "You are a helpful AI assistant"}, - {"role": "user", "content": "List 3 countries and their capitals."}, - ], - temperature=0.8, - max_tokens=1, - logprobs=True, - top_logprobs=3, -) -print(response) - -# Chat completion -response = client.chat.completions.create( - model="default", - messages=[ - {"role": "system", "content": "You are a helpful AI assistant"}, - {"role": "user", "content": "List 3 countries and their capitals."}, - ], - temperature=0.8, - max_tokens=1, - n=4, -) -print(response) diff --git a/test/srt/test_openai_server.py b/test/srt/test_openai_server.py index cfc65b7e6aa..3fc5785517f 100644 --- a/test/srt/test_openai_server.py +++ b/test/srt/test_openai_server.py @@ -350,11 +350,11 @@ def _create_batch(self, mode, client): completion_window=completion_window, ) - return batch_job, content + return batch_job, content, uploaded_file def run_batch(self, mode): client = openai.Client(api_key=self.api_key, base_url=self.base_url) - batch_job, content = self._create_batch(mode=mode, client=client) + batch_job, content, uploaded_file = self._create_batch(mode=mode, client=client) while batch_job.status not in ["completed", "failed", "cancelled"]: time.sleep(3) @@ -378,10 +378,13 @@ def run_batch(self, mode): if line.strip() != "" ] assert len(results) == len(content) + for delete_fid in [uploaded_file.id, result_file_id]: + del_pesponse = client.files.delete(delete_fid) + assert del_pesponse.deleted def run_cancel_batch(self, mode): client = openai.Client(api_key=self.api_key, base_url=self.base_url) - batch_job, _ = self._create_batch(mode=mode, client=client) + batch_job, _, uploaded_file = self._create_batch(mode=mode, client=client) assert batch_job.status not in ["cancelling", "cancelled"] @@ -396,6 +399,8 @@ def run_cancel_batch(self, mode): time.sleep(3) assert batch_job.status == "cancelled" + del_response = client.files.delete(uploaded_file.id) + assert del_response.deleted def test_completion(self): for echo in [False, True]: From 909f34363bf551711c20dbadbd5cc7fb6517a614 Mon Sep 17 00:00:00 2001 From: havetc Date: Tue, 27 Aug 2024 12:10:46 +0200 Subject: [PATCH 31/88] [FIX] Wrong logger (#1230) --- python/sglang/srt/managers/schedule_batch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index cc180ba21bc..926266a628f 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -269,12 +269,12 @@ def jump_forward_and_retokenize(self, jump_forward_str, next_state): all_text = self.origin_input_text + self.decoded_text + jump_forward_str all_ids = self.tokenizer.encode(all_text) if not all_ids: - warnings.warn("Encoded all_text resulted in empty all_ids") + logger.warning("Encoded all_text resulted in empty all_ids") return False prompt_tokens = len(self.origin_input_ids_unpadded) if prompt_tokens > len(all_ids): - warnings.warn("prompt_tokens is larger than encoded all_ids") + logger.warning("prompt_tokens is larger than encoded all_ids") return False if all_ids[prompt_tokens - 1] != self.origin_input_ids_unpadded[-1]: From 3602692c7ca7c3757cc3d2b5dfc829209205731a Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Tue, 27 Aug 2024 21:15:31 +1000 Subject: [PATCH 32/88] feat: replace get_act_fn for gpt_bigcode (#1231) --- python/sglang/srt/layers/activation.py | 83 +++++++++++++++++++++++++ python/sglang/srt/models/gpt_bigcode.py | 2 +- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/activation.py b/python/sglang/srt/layers/activation.py index d0e06266040..832d2b70130 100644 --- a/python/sglang/srt/layers/activation.py +++ b/python/sglang/srt/layers/activation.py @@ -13,10 +13,20 @@ """Fused operators for activation layers.""" +from typing import Optional + import torch +import torch.nn as nn import torch.nn.functional as F from flashinfer.activation import gelu_tanh_and_mul, silu_and_mul +from vllm.distributed import ( + divide, + get_tensor_model_parallel_rank, + get_tensor_model_parallel_world_size, +) from vllm.model_executor.custom_op import CustomOp +from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.utils import set_weight_attrs class SiluAndMul(CustomOp): @@ -53,3 +63,76 @@ def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: out = torch.empty(output_shape, dtype=x.dtype, device=x.device) gelu_tanh_and_mul(x, out) return out + + +class ScaledActivation(nn.Module): + """An activation function with post-scale parameters. + + This is used for some quantization methods like AWQ. + """ + + def __init__( + self, + act_module: nn.Module, + intermediate_size: int, + input_is_parallel: bool = True, + params_dtype: Optional[torch.dtype] = None, + ): + super().__init__() + self.act = act_module + self.input_is_parallel = input_is_parallel + if input_is_parallel: + tp_size = get_tensor_model_parallel_world_size() + intermediate_size_per_partition = divide(intermediate_size, tp_size) + else: + intermediate_size_per_partition = intermediate_size + if params_dtype is None: + params_dtype = torch.get_default_dtype() + self.scales = nn.Parameter( + torch.empty(intermediate_size_per_partition, dtype=params_dtype) + ) + set_weight_attrs(self.scales, {"weight_loader": self.weight_loader}) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.act(x) / self.scales + + def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor): + param_data = param.data + if self.input_is_parallel: + tp_rank = get_tensor_model_parallel_rank() + shard_size = param_data.shape[0] + start_idx = tp_rank * shard_size + loaded_weight = loaded_weight.narrow(0, start_idx, shard_size) + assert param_data.shape == loaded_weight.shape + param_data.copy_(loaded_weight) + + +_ACTIVATION_REGISTRY = { + "gelu": nn.GELU(), + "gelu_pytorch_tanh": nn.GELU(approximate="tanh"), +} + + +def get_act_fn( + act_fn_name: str, + quant_config: Optional[QuantizationConfig] = None, + intermediate_size: Optional[int] = None, + input_is_parallel: bool = True, + params_dtype: Optional[torch.dtype] = None, +) -> nn.Module: + """Get an activation function by name.""" + act_fn_name = act_fn_name.lower() + if act_fn_name not in _ACTIVATION_REGISTRY: + raise ValueError(f"Activation function {act_fn_name!r} is not supported.") + + act_fn = _ACTIVATION_REGISTRY[act_fn_name] + if quant_config is not None and act_fn_name in quant_config.get_scaled_act_names(): + if intermediate_size is None: + raise ValueError( + "intermediate_size must be specified for scaled " + "activation functions." + ) + return ScaledActivation( + act_fn, intermediate_size, input_is_parallel, params_dtype + ) + return act_fn diff --git a/python/sglang/srt/models/gpt_bigcode.py b/python/sglang/srt/models/gpt_bigcode.py index 979d06886e3..dc828f0142e 100644 --- a/python/sglang/srt/models/gpt_bigcode.py +++ b/python/sglang/srt/models/gpt_bigcode.py @@ -23,7 +23,6 @@ from transformers import GPTBigCodeConfig from vllm.config import CacheConfig, LoRAConfig from vllm.distributed import get_tensor_model_parallel_world_size -from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.layers.linear import ( ColumnParallelLinear, QKVParallelLinear, @@ -33,6 +32,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from sglang.srt.layers.activation import get_act_fn from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention from sglang.srt.layers.sampler import Sampler From c8a9e79186503c3bd1955cdbd4c364b04db333fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dr=2E=20Artificial=E6=9B=BE=E5=B0=8F=E5=81=A5?= <875100501@qq.com> Date: Wed, 28 Aug 2024 14:51:41 +0800 Subject: [PATCH 33/88] Fix readme (#1236) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 09e3d568692..3f03fd7f1c9 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,7 @@ docker run --gpus all \ ### Method 4: Using docker compose
+More > This method is recommended if you plan to serve it as a service. > A better approach is to use the [k8s-sglang-service.yaml](./docker/k8s-sglang-service.yaml). @@ -94,6 +95,7 @@ docker run --gpus all \ ### Method 5: Run on Kubernetes or Clouds with SkyPilot
+More To deploy on Kubernetes or 12+ clouds, you can use [SkyPilot](https://github.com/skypilot-org/skypilot). @@ -262,6 +264,7 @@ Instructions for supporting a new model are [here](https://github.com/sgl-projec #### Use Models From ModelScope
+More To use a model from [ModelScope](https://www.modelscope.cn), set the environment variable SGLANG_USE_MODELSCOPE. ``` @@ -276,6 +279,7 @@ SGLANG_USE_MODELSCOPE=true python -m sglang.launch_server --model-path qwen/Qwen #### Run Llama 3.1 405B
+More ```bash # Run 405B (fp8) on a single node From 1ece2cda3dde1df62c924c0288ec514f5c5e2af5 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Wed, 28 Aug 2024 00:37:32 -0700 Subject: [PATCH 34/88] Fix bench latency benchmark (#1225) --- .github/workflows/e2e-test.yml | 5 +++++ python/sglang/bench_latency.py | 10 ++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 7f555110d9d..11c94775c15 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -38,6 +38,11 @@ jobs: cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default + - name: Benchmark Serving Latency + timeout-minutes: 10 + run: | + python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8 + - name: Benchmark Serving Throughput (w/o RadixAttention) timeout-minutes: 10 run: | diff --git a/python/sglang/bench_latency.py b/python/sglang/bench_latency.py index dea910f5772..6a918fbd112 100644 --- a/python/sglang/bench_latency.py +++ b/python/sglang/bench_latency.py @@ -200,16 +200,14 @@ def extend(reqs, model_runner): tree_cache=None, ) batch.prepare_for_extend(model_runner.model_config.vocab_size) - output = model_runner.forward(batch, ForwardMode.EXTEND) - next_token_ids = batch.sample(output.next_token_logits) - return next_token_ids, output.next_token_logits, batch + sample_output, logits_output = model_runner.forward(batch, ForwardMode.EXTEND) + return sample_output.batch_next_token_ids, logits_output.next_token_logits, batch def decode(input_token_ids, batch, model_runner): batch.prepare_for_decode(input_token_ids.cpu().numpy()) - output = model_runner.forward(batch, ForwardMode.DECODE) - next_token_ids = batch.sample(output.next_token_logits) - return next_token_ids, output.next_token_logits + sample_output, logits_output = model_runner.forward(batch, ForwardMode.DECODE) + return sample_output.batch_next_token_ids, logits_output.next_token_logits @torch.inference_mode() From 6cc38b2bf31c141e3ae06ca8c1150e35dbeb5578 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 28 Aug 2024 00:54:26 -0700 Subject: [PATCH 35/88] [Minor] Add more type annotations (#1237) --- .../srt/model_executor/cuda_graph_runner.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 96c15849e4e..40c87af88cf 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -17,6 +17,7 @@ import bisect from contextlib import contextmanager +from typing import Callable, List import torch from flashinfer import BatchDecodeWithPagedKVCacheWrapper @@ -53,12 +54,12 @@ def _to_torch(model: torch.nn.Module, reverse: bool = False): @contextmanager def patch_model( - model: torch.nn.Module, use_compile: bool, tp_group: "GroupCoordinator" + model: torch.nn.Module, enable_compile: bool, tp_group: "GroupCoordinator" ): backup_ca_comm = None try: - if use_compile: + if enable_compile: _to_torch(model) monkey_patch_vllm_all_gather() backup_ca_comm = tp_group.ca_comm @@ -67,7 +68,7 @@ def patch_model( else: yield model.forward finally: - if use_compile: + if enable_compile: _to_torch(model, reverse=True) monkey_patch_vllm_all_gather(reverse=True) tp_group.ca_comm = backup_ca_comm @@ -88,7 +89,7 @@ def set_torch_compile_config(): class CudaGraphRunner: def __init__( self, - model_runner, + model_runner: "ModelRunner", max_batch_size_to_capture: int, use_torch_compile: bool, disable_padding: bool, @@ -154,13 +155,13 @@ def __init__( if use_torch_compile: set_torch_compile_config() - def can_run(self, batch_size): + def can_run(self, batch_size: int): if self.disable_padding: return batch_size in self.graphs else: return batch_size <= self.max_bs - def capture(self, batch_size_list): + def capture(self, batch_size_list: List[int]): self.batch_size_list = batch_size_list with graph_capture() as graph_capture_context: self.stream = graph_capture_context.stream @@ -181,7 +182,7 @@ def capture(self, batch_size_list): self.output_buffers[bs] = output_buffers self.flashinfer_handlers[bs] = flashinfer_handler - def capture_one_batch_size(self, bs, forward): + def capture_one_batch_size(self, bs: int, forward: Callable): graph = torch.cuda.CUDAGraph() stream = self.stream From 198974cd1a805a7fab2d81fe9e6b5fbd73d03fb8 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 28 Aug 2024 18:39:12 +1000 Subject: [PATCH 36/88] feat: support sm75 with FlashInfer v0.1.6 (#1233) --- README.md | 2 +- python/sglang/srt/layers/activation.py | 7 ------- python/sglang/srt/layers/layernorm.py | 3 --- python/sglang/srt/model_executor/model_runner.py | 2 ++ python/sglang/srt/server.py | 2 +- 5 files changed, 4 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 3f03fd7f1c9..efb69c04379 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ sky status --endpoint 30000 sglang ### Common Notes -- [FlashInfer](https://github.com/flashinfer-ai/flashinfer) is currently one of the dependencies that must be installed for SGLang. If you are using NVIDIA GPU devices below sm80, such as T4, you can't use SGLang for the time being. We expect to resolve this issue soon, so please stay tuned. If you encounter any FlashInfer-related issues on sm80+ devices (e.g., A100, L40S, H100), consider using Triton's kernel by `--disable-flashinfer --disable-flashinfer-sampling` and raise a issue. +- [FlashInfer](https://github.com/flashinfer-ai/flashinfer) is currently one of the dependencies that must be installed for SGLang. It only supports sm75 and above. If you encounter any FlashInfer-related issues on sm75+ devices (e.g., T4, A10, A100, L4, L40S, H100), consider using Triton's kernel by `--disable-flashinfer --disable-flashinfer-sampling` and raise an issue. - If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`. ## Backend: SGLang Runtime (SRT) diff --git a/python/sglang/srt/layers/activation.py b/python/sglang/srt/layers/activation.py index 832d2b70130..5df387cb2b9 100644 --- a/python/sglang/srt/layers/activation.py +++ b/python/sglang/srt/layers/activation.py @@ -30,18 +30,11 @@ class SiluAndMul(CustomOp): - def __init__(self, **kwargs): - super().__init__() - self.is_lower_sm80 = torch.cuda.get_device_capability()[0] < 8 - def forward_native(self, x: torch.Tensor) -> torch.Tensor: d = x.shape[-1] // 2 return F.silu(x[..., :d]) * x[..., d:] def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: - if self.is_lower_sm80: - return self.forward_native(x) - d = x.shape[-1] // 2 output_shape = x.shape[:-1] + (d,) out = torch.empty(output_shape, dtype=x.dtype, device=x.device) diff --git a/python/sglang/srt/layers/layernorm.py b/python/sglang/srt/layers/layernorm.py index 6cea85404a0..ac4d368d3f6 100644 --- a/python/sglang/srt/layers/layernorm.py +++ b/python/sglang/srt/layers/layernorm.py @@ -32,15 +32,12 @@ def __init__( super().__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) self.variance_epsilon = eps - self.is_lower_sm80 = torch.cuda.get_device_capability()[0] < 8 def forward_cuda( self, x: torch.Tensor, residual: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - if self.is_lower_sm80: - return self.forward_native(x, residual) if residual is not None: fused_add_rmsnorm(x, residual, self.weight.data, self.variance_epsilon) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 00660611499..0dd9f8c201f 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -161,6 +161,8 @@ def load_model(self): "Compute capability below sm80. Use float16 due to lack of bfloat16 support." ) self.server_args.dtype = "float16" + if torch.cuda.get_device_capability()[1] < 5: + raise RuntimeError("SGLang only supports sm75 and above.") monkey_patch_vllm_dummy_weight_loader() self.device_config = DeviceConfig() diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 6d1fc9fda7f..f3d1ab0f94d 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -421,7 +421,7 @@ def _set_envs_and_config(server_args: ServerArgs): if not server_args.disable_flashinfer: assert_pkg_version( "flashinfer", - "0.1.5", + "0.1.6", "Please uninstall the old version and " "reinstall the latest version by following the instructions " "at https://docs.flashinfer.ai/installation.html.", From 184ae1c68316c58a7f5b4ad813639b08604369f5 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 28 Aug 2024 02:15:52 -0700 Subject: [PATCH 37/88] Update README.md (#1239) --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index efb69c04379..305df444d09 100644 --- a/README.md +++ b/README.md @@ -297,7 +297,9 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/ ### Benchmark Performance -- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as for `launch_server.py`. Note that this is not a dynamic batching server, so it may run out of memory for a batch size that a real server can handle. A real server truncates the prefill into several batches, while this unit test does not. For accurate large batch testing, consider using `sglang.bench_serving`. +- Benchmark a single static batch by running the following command without launching a server. The arguments are the same as for `launch_server.py`. + Note that this is not a dynamic batching server, so it may run out of memory for a batch size that a real server can handle. + A real server truncates the prefill into several batches, while this unit test does not. For accurate large batch testing, please use `sglang.bench_serving` instead. ``` python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 32 --input-len 256 --output-len 32 ``` From f25f4dfde5af9a81be52c1ba6d99cc2ac5cca179 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 28 Aug 2024 21:16:47 +1000 Subject: [PATCH 38/88] hotfix: revert sampler CUDA Graph (#1242) --- .github/workflows/e2e-test.yml | 5 -- README.md | 2 +- python/pyproject.toml | 2 +- python/sglang/bench_latency.py | 10 ++- python/sglang/srt/layers/logits_processor.py | 8 +- python/sglang/srt/layers/sampler.py | 83 ++++--------------- python/sglang/srt/managers/schedule_batch.py | 28 ++----- python/sglang/srt/managers/tp_worker.py | 52 +++++------- .../srt/model_executor/cuda_graph_runner.py | 33 ++------ .../srt/model_executor/forward_batch_info.py | 9 +- .../sglang/srt/model_executor/model_runner.py | 14 +--- python/sglang/srt/models/chatglm.py | 16 +++- python/sglang/srt/models/commandr.py | 6 +- python/sglang/srt/models/dbrx.py | 6 +- python/sglang/srt/models/deepseek.py | 6 +- python/sglang/srt/models/deepseek_v2.py | 6 +- python/sglang/srt/models/gemma.py | 6 +- python/sglang/srt/models/gemma2.py | 6 +- python/sglang/srt/models/gpt_bigcode.py | 6 +- python/sglang/srt/models/grok.py | 6 +- python/sglang/srt/models/internlm2.py | 6 +- python/sglang/srt/models/llama2.py | 10 +-- .../sglang/srt/models/llama_classification.py | 4 +- python/sglang/srt/models/minicpm.py | 6 +- python/sglang/srt/models/mixtral.py | 6 +- python/sglang/srt/models/mixtral_quant.py | 6 +- python/sglang/srt/models/qwen.py | 7 +- python/sglang/srt/models/qwen2.py | 8 +- python/sglang/srt/models/qwen2_moe.py | 19 +++-- python/sglang/srt/models/stablelm.py | 6 +- .../srt/sampling/sampling_batch_info.py | 75 +---------------- python/sglang/test/runners.py | 2 +- python/sglang/version.py | 2 +- 33 files changed, 119 insertions(+), 348 deletions(-) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 11c94775c15..7f555110d9d 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -38,11 +38,6 @@ jobs: cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default - - name: Benchmark Serving Latency - timeout-minutes: 10 - run: | - python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8 - - name: Benchmark Serving Throughput (w/o RadixAttention) timeout-minutes: 10 run: | diff --git a/README.md b/README.md index 305df444d09..223f9624f6e 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ ### Method 2: From source ``` # Use the last release branch -git clone -b v0.2.14 https://github.com/sgl-project/sglang.git +git clone -b v0.2.14.post1 https://github.com/sgl-project/sglang.git cd sglang pip install --upgrade pip diff --git a/python/pyproject.toml b/python/pyproject.toml index 4a46adc3fef..7b2741fd216 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sglang" -version = "0.2.14" +version = "0.2.14.post1" description = "SGLang is yet another fast serving framework for large language models and vision language models." readme = "README.md" requires-python = ">=3.8" diff --git a/python/sglang/bench_latency.py b/python/sglang/bench_latency.py index 6a918fbd112..dea910f5772 100644 --- a/python/sglang/bench_latency.py +++ b/python/sglang/bench_latency.py @@ -200,14 +200,16 @@ def extend(reqs, model_runner): tree_cache=None, ) batch.prepare_for_extend(model_runner.model_config.vocab_size) - sample_output, logits_output = model_runner.forward(batch, ForwardMode.EXTEND) - return sample_output.batch_next_token_ids, logits_output.next_token_logits, batch + output = model_runner.forward(batch, ForwardMode.EXTEND) + next_token_ids = batch.sample(output.next_token_logits) + return next_token_ids, output.next_token_logits, batch def decode(input_token_ids, batch, model_runner): batch.prepare_for_decode(input_token_ids.cpu().numpy()) - sample_output, logits_output = model_runner.forward(batch, ForwardMode.DECODE) - return sample_output.batch_next_token_ids, logits_output.next_token_logits + output = model_runner.forward(batch, ForwardMode.DECODE) + next_token_ids = batch.sample(output.next_token_logits) + return next_token_ids, output.next_token_logits @torch.inference_mode() diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py index b81f3d2a040..63f74d8b026 100644 --- a/python/sglang/srt/layers/logits_processor.py +++ b/python/sglang/srt/layers/logits_processor.py @@ -29,7 +29,7 @@ @dataclasses.dataclass -class LogitsProcessorOutput: +class LogitProcessorOutput: # The logits of the next tokens. shape: [#seq, vocab_size] next_token_logits: torch.Tensor # The logprobs of the next tokens. shape: [#seq, vocab_size] @@ -185,7 +185,7 @@ def forward( # Return only last_logits if logprob is not requested if not logits_metadata.return_logprob: - return LogitsProcessorOutput( + return LogitProcessorOutput( next_token_logits=last_logits, next_token_logprobs=None, normalized_prompt_logprobs=None, @@ -209,7 +209,7 @@ def forward( else: output_top_logprobs = None - return LogitsProcessorOutput( + return LogitProcessorOutput( next_token_logits=last_logits, next_token_logprobs=last_logprobs, normalized_prompt_logprobs=None, @@ -278,7 +278,7 @@ def forward( # Remove the last token logprob for the prefill tokens. input_token_logprobs = input_token_logprobs[:-1] - return LogitsProcessorOutput( + return LogitProcessorOutput( next_token_logits=last_logits, next_token_logprobs=last_logprobs, normalized_prompt_logprobs=normalized_prompt_logprobs, diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py index 6cb7d0a7c11..3006e765c88 100644 --- a/python/sglang/srt/layers/sampler.py +++ b/python/sglang/srt/layers/sampler.py @@ -1,6 +1,4 @@ -import dataclasses import logging -from typing import Union import torch from flashinfer.sampling import ( @@ -11,8 +9,6 @@ ) from vllm.model_executor.custom_op import CustomOp -from sglang.srt.layers.logits_processor import LogitsProcessorOutput - # TODO: move this dict to another place from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo @@ -20,71 +16,30 @@ logger = logging.getLogger(__name__) -@dataclasses.dataclass -class SampleOutput: - success: torch.Tensor - probs: torch.Tensor - batch_next_token_ids: torch.Tensor - - class Sampler(CustomOp): def __init__(self): super().__init__() - def _apply_penalties(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): - # min-token, presence, frequency - if sampling_info.linear_penalties is not None: - logits += sampling_info.linear_penalties - - # repetition - if sampling_info.scaling_penalties is not None: - logits = torch.where( - logits > 0, - logits / sampling_info.scaling_penalties, - logits * sampling_info.scaling_penalties, - ) - - return logits - - def _get_probs( - self, - logits: torch.Tensor, - sampling_info: SamplingBatchInfo, - is_torch_compile: bool = False, - ): + def forward_cuda(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): # Post process logits logits = logits.contiguous() logits.div_(sampling_info.temperatures) - if is_torch_compile: - # FIXME: Temporary workaround for unknown bugs in torch.compile - logits.add_(0) - if sampling_info.logit_bias is not None: logits.add_(sampling_info.logit_bias) if sampling_info.vocab_mask is not None: logits = logits.masked_fill(~sampling_info.vocab_mask, float("-inf")) - logits = self._apply_penalties(logits, sampling_info) + logits = sampling_info.penalizer_orchestrator.apply(logits) - return torch.softmax(logits, dim=-1) - - def forward_cuda( - self, - logits: Union[torch.Tensor, LogitsProcessorOutput], - sampling_info: SamplingBatchInfo, - ): - if isinstance(logits, LogitsProcessorOutput): - logits = logits.next_token_logits - - probs = self._get_probs(logits, sampling_info) + probs = torch.softmax(logits, dim=-1) if not global_server_args_dict["disable_flashinfer_sampling"]: max_top_k_round, batch_size = 32, probs.shape[0] uniform_samples = torch.rand( (max_top_k_round, batch_size), device=probs.device ) - if sampling_info.need_min_p_sampling: + if sampling_info.min_ps.any(): probs = top_k_renorm_prob(probs, sampling_info.top_ks) probs = top_p_renorm_prob(probs, sampling_info.top_ps) batch_next_token_ids, success = min_p_sampling_from_probs( @@ -100,23 +55,18 @@ def forward_cuda( probs, sampling_info.top_ks, sampling_info.top_ps, sampling_info.min_ps ) - return SampleOutput(success, probs, batch_next_token_ids) - - def forward_native( - self, - logits: Union[torch.Tensor, LogitsProcessorOutput], - sampling_info: SamplingBatchInfo, - ): - if isinstance(logits, LogitsProcessorOutput): - logits = logits.next_token_logits - - probs = self._get_probs(logits, sampling_info, is_torch_compile=True) + if not torch.all(success): + logging.warning("Sampling failed, fallback to top_k=1 strategy") + probs = probs.masked_fill(torch.isnan(probs), 0.0) + argmax_ids = torch.argmax(probs, dim=-1) + batch_next_token_ids = torch.where( + success, batch_next_token_ids, argmax_ids + ) - batch_next_token_ids, success = top_k_top_p_min_p_sampling_from_probs_torch( - probs, sampling_info.top_ks, sampling_info.top_ps, sampling_info.min_ps - ) + return batch_next_token_ids - return SampleOutput(success, probs, batch_next_token_ids) + def forward_native(): + raise NotImplementedError("Native forward is not implemented yet.") def top_k_top_p_min_p_sampling_from_probs_torch( @@ -137,10 +87,7 @@ def top_k_top_p_min_p_sampling_from_probs_torch( probs_sort[probs_sort < min_p_thresholds.view(-1, 1)] = 0.0 probs_sort.div_(probs_sort.max(dim=-1, keepdim=True)[0]) try: - # FIXME: torch.multiomial does not support num_samples = 1 - sampled_index = torch.multinomial(probs_sort, num_samples=2, replacement=True)[ - :, :1 - ] + sampled_index = torch.multinomial(probs_sort, num_samples=1) except RuntimeError as e: logger.warning(f"Sampling error: {e}") batch_next_token_ids = torch.zeros( diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index 926266a628f..f3af821e4ef 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -1,5 +1,3 @@ -from __future__ import annotations - """ Copyright 2023-2024 SGLang Team Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,7 +17,7 @@ import logging from dataclasses import dataclass -from typing import TYPE_CHECKING, List, Optional, Union +from typing import List, Optional, Union import torch @@ -31,10 +29,6 @@ from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo -if TYPE_CHECKING: - from sglang.srt.layers.sampler import SampleOutput - - INIT_INCREMENTAL_DETOKENIZATION_OFFSET = 5 # Put some global args for easy access @@ -684,17 +678,11 @@ def merge(self, other: "ScheduleBatch"): self.top_logprobs_nums.extend(other.top_logprobs_nums) self.return_logprob = any(req.return_logprob for req in self.reqs) - def check_sample_results(self, sample_output: SampleOutput): - if not torch.all(sample_output.success): - probs = sample_output.probs - batch_next_token_ids = sample_output.batch_next_token_ids - logging.warning("Sampling failed, fallback to top_k=1 strategy") - probs = probs.masked_fill(torch.isnan(probs), 0.0) - argmax_ids = torch.argmax(probs, dim=-1) - batch_next_token_ids = torch.where( - sample_output.success, batch_next_token_ids, argmax_ids - ) - sample_output.probs = probs - sample_output.batch_next_token_ids = batch_next_token_ids + def sample(self, logits: torch.Tensor): + from sglang.srt.layers.sampler import Sampler + + sampler = Sampler() + + batch_next_token_ids = sampler(logits, self.sampling_info) - return sample_output.batch_next_token_ids + return batch_next_token_ids diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 127f71900ae..65daed43b28 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -31,7 +31,7 @@ from sglang.srt.constrained.fsm_cache import FSMCache from sglang.srt.constrained.jump_forward import JumpForwardCache from sglang.srt.hf_transformers_utils import get_processor, get_tokenizer -from sglang.srt.layers.logits_processor import LogitsProcessorOutput +from sglang.srt.layers.logits_processor import LogitProcessorOutput from sglang.srt.managers.io_struct import ( AbortReq, BatchEmbeddingOut, @@ -505,29 +505,21 @@ def forward_prefill_batch(self, batch: ScheduleBatch): if self.model_runner.is_generation: # Forward and sample the next tokens if batch.extend_num_tokens != 0: - sample_output, logits_output = self.model_runner.forward( - batch, ForwardMode.EXTEND - ) - next_token_ids = batch.check_sample_results(sample_output) + output = self.model_runner.forward(batch, ForwardMode.EXTEND) + next_token_ids = batch.sample(output.next_token_logits) batch.sampling_info.penalizer_orchestrator.cumulate_output_tokens( next_token_ids ) # Move logprobs to cpu - if logits_output.next_token_logprobs is not None: - logits_output.next_token_logprobs = ( - logits_output.next_token_logprobs[ - torch.arange( - len(next_token_ids), device=next_token_ids.device - ), - next_token_ids, - ].tolist() - ) - logits_output.input_token_logprobs = ( - logits_output.input_token_logprobs.tolist() - ) - logits_output.normalized_prompt_logprobs = ( - logits_output.normalized_prompt_logprobs.tolist() + if output.next_token_logprobs is not None: + output.next_token_logprobs = output.next_token_logprobs[ + torch.arange(len(next_token_ids), device=next_token_ids.device), + next_token_ids, + ].tolist() + output.input_token_logprobs = output.input_token_logprobs.tolist() + output.normalized_prompt_logprobs = ( + output.normalized_prompt_logprobs.tolist() ) next_token_ids = next_token_ids.tolist() @@ -566,14 +558,12 @@ def forward_prefill_batch(self, batch: ScheduleBatch): self.req_to_token_pool.free(req.req_pool_idx) if req.return_logprob: - self.add_logprob_return_values( - i, req, pt, next_token_ids, logits_output - ) + self.add_logprob_return_values(i, req, pt, next_token_ids, output) pt += req.extend_input_len else: assert batch.extend_num_tokens != 0 - logits_output = self.model_runner.forward(batch, ForwardMode.EXTEND) - embeddings = logits_output.embeddings.tolist() + output = self.model_runner.forward(batch, ForwardMode.EXTEND) + embeddings = output.embeddings.tolist() # Check finish conditions for i, req in enumerate(batch.reqs): @@ -601,7 +591,7 @@ def add_logprob_return_values( req: Req, pt: int, next_token_ids: List[int], - output: LogitsProcessorOutput, + output: LogitProcessorOutput, ): if req.normalized_prompt_logprob is None: req.normalized_prompt_logprob = output.normalized_prompt_logprobs[i] @@ -683,17 +673,15 @@ def forward_decode_batch(self, batch: ScheduleBatch): batch.prepare_for_decode() # Forward and sample the next tokens - sample_output, logits_output = self.model_runner.forward( - batch, ForwardMode.DECODE - ) - next_token_ids = batch.check_sample_results(sample_output) + output = self.model_runner.forward(batch, ForwardMode.DECODE) + next_token_ids = batch.sample(output.next_token_logits) batch.sampling_info.penalizer_orchestrator.cumulate_output_tokens( next_token_ids ) # Move logprobs to cpu - if logits_output.next_token_logprobs is not None: - next_token_logprobs = logits_output.next_token_logprobs[ + if output.next_token_logprobs is not None: + next_token_logprobs = output.next_token_logprobs[ torch.arange(len(next_token_ids), device=next_token_ids.device), next_token_ids, ].tolist() @@ -719,7 +707,7 @@ def forward_decode_batch(self, batch: ScheduleBatch): (next_token_logprobs[i], next_token_id) ) if req.top_logprobs_num > 0: - req.output_top_logprobs.append(logits_output.output_top_logprobs[i]) + req.output_top_logprobs.append(output.output_top_logprobs[i]) self.handle_finished_requests(batch) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 40c87af88cf..796db26623f 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -26,18 +26,16 @@ from vllm.model_executor.custom_op import CustomOp from sglang.srt.layers.logits_processor import ( + LogitProcessorOutput, LogitsMetadata, LogitsProcessor, - LogitsProcessorOutput, ) -from sglang.srt.layers.sampler import SampleOutput from sglang.srt.managers.schedule_batch import ScheduleBatch from sglang.srt.model_executor.forward_batch_info import ( ForwardMode, InputMetadata, update_flashinfer_indices, ) -from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo from sglang.srt.utils import monkey_patch_vllm_all_gather @@ -146,10 +144,6 @@ def __init__( self.flashinfer_kv_indices.clone(), ] - # Sampling inputs - vocab_size = model_runner.model_config.vocab_size - self.sampling_info = SamplingBatchInfo.dummy_one(self.max_bs, vocab_size) - self.compile_bs = [1, 2, 4, 8, 16, 24, 32] if use_torch_compile else [] if use_torch_compile: @@ -241,7 +235,6 @@ def capture_one_batch_size(self, bs: int, forward: Callable): def run_once(): input_metadata = InputMetadata( forward_mode=ForwardMode.DECODE, - sampling_info=self.sampling_info[:bs], batch_size=bs, req_pool_indices=req_pool_indices, seq_lens=seq_lens, @@ -306,35 +299,27 @@ def replay(self, batch: ScheduleBatch): self.flashinfer_handlers[bs], ) - # Sampling inputs - self.sampling_info.inplace_assign(raw_bs, batch.sampling_info) - # Replay torch.cuda.synchronize() self.graphs[bs].replay() torch.cuda.synchronize() - sample_output, logits_output = self.output_buffers[bs] + output = self.output_buffers[bs] # Unpad if bs != raw_bs: - logits_output = LogitsProcessorOutput( - next_token_logits=logits_output.next_token_logits[:raw_bs], + output = LogitProcessorOutput( + next_token_logits=output.next_token_logits[:raw_bs], next_token_logprobs=None, normalized_prompt_logprobs=None, input_token_logprobs=None, input_top_logprobs=None, output_top_logprobs=None, ) - sample_output = SampleOutput( - sample_output.success[:raw_bs], - sample_output.probs[:raw_bs], - sample_output.batch_next_token_ids[:raw_bs], - ) # Extract logprobs if batch.return_logprob: - logits_output.next_token_logprobs = torch.nn.functional.log_softmax( - logits_output.next_token_logits, dim=-1 + output.next_token_logprobs = torch.nn.functional.log_softmax( + output.next_token_logits, dim=-1 ) return_top_logprob = any(x > 0 for x in batch.top_logprobs_nums) if return_top_logprob: @@ -342,8 +327,8 @@ def replay(self, batch: ScheduleBatch): forward_mode=ForwardMode.DECODE, top_logprobs_nums=batch.top_logprobs_nums, ) - logits_output.output_top_logprobs = LogitsProcessor.get_top_logprobs( - logits_output.next_token_logprobs, logits_metadata + output.output_top_logprobs = LogitsProcessor.get_top_logprobs( + output.next_token_logprobs, logits_metadata )[1] - return sample_output, logits_output + return output diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index e8849962b07..c107b3bc826 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -1,5 +1,3 @@ -from __future__ import annotations - """ Copyright 2023-2024 SGLang Team Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +16,7 @@ """ModelRunner runs the forward passes of the models.""" from dataclasses import dataclass from enum import IntEnum, auto -from typing import TYPE_CHECKING, List +from typing import TYPE_CHECKING, List, Optional import numpy as np import torch @@ -28,7 +26,6 @@ if TYPE_CHECKING: from sglang.srt.model_executor.model_runner import ModelRunner - from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo class ForwardMode(IntEnum): @@ -45,7 +42,6 @@ class InputMetadata: """Store all inforamtion of a forward pass.""" forward_mode: ForwardMode - sampling_info: SamplingBatchInfo batch_size: int req_pool_indices: torch.Tensor seq_lens: torch.Tensor @@ -183,7 +179,6 @@ def from_schedule_batch( ): ret = cls( forward_mode=forward_mode, - sampling_info=batch.sampling_info, batch_size=batch.batch_size(), req_pool_indices=batch.req_pool_indices, seq_lens=batch.seq_lens, @@ -194,8 +189,6 @@ def from_schedule_batch( top_logprobs_nums=batch.top_logprobs_nums, ) - ret.sampling_info.prepare_penalties() - ret.compute_positions(batch) ret.compute_extend_infos(batch) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 0dd9f8c201f..abee152d6fd 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -21,7 +21,7 @@ import logging import pkgutil from functools import lru_cache -from typing import Optional, Tuple, Type +from typing import Optional, Type import torch import torch.nn as nn @@ -44,8 +44,6 @@ from vllm.model_executor.models import ModelRegistry from sglang.global_config import global_config -from sglang.srt.layers.logits_processor import LogitsProcessorOutput -from sglang.srt.layers.sampler import SampleOutput from sglang.srt.managers.schedule_batch import ScheduleBatch, global_server_args_dict from sglang.srt.mem_cache.memory_pool import ( MHATokenToKVPool, @@ -517,11 +515,7 @@ def init_cuda_graphs(self): @torch.inference_mode() def forward_decode(self, batch: ScheduleBatch): - if ( - self.cuda_graph_runner - and self.cuda_graph_runner.can_run(len(batch.reqs)) - and not batch.sampling_info.has_bias() - ): + if self.cuda_graph_runner and self.cuda_graph_runner.can_run(len(batch.reqs)): return self.cuda_graph_runner.replay(batch) input_metadata = InputMetadata.from_schedule_batch( @@ -570,9 +564,7 @@ def forward_extend_multi_modal(self, batch: ScheduleBatch): input_metadata.image_offsets, ) - def forward( - self, batch: ScheduleBatch, forward_mode: ForwardMode - ) -> Tuple[SampleOutput, LogitsProcessorOutput]: + def forward(self, batch: ScheduleBatch, forward_mode: ForwardMode): if self.is_multimodal_model and forward_mode == ForwardMode.EXTEND: return self.forward_extend_multi_modal(batch) elif forward_mode == ForwardMode.DECODE: diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py index 1c189eebbc0..0a22f994bb4 100644 --- a/python/sglang/srt/models/chatglm.py +++ b/python/sglang/srt/models/chatglm.py @@ -31,18 +31,20 @@ ) from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope +from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, ) from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.sampling_metadata import SamplingMetadata +from vllm.sequence import SamplerOutput from vllm.transformers_utils.configs import ChatGLMConfig from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata LoraConfig = None @@ -381,11 +383,17 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output + + def sample( + self, + logits: torch.Tensor, + sampling_metadata: SamplingMetadata, + ) -> Optional[SamplerOutput]: + next_tokens = self.sampler(logits, sampling_metadata) + return next_tokens def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) diff --git a/python/sglang/srt/models/commandr.py b/python/sglang/srt/models/commandr.py index c360106f97c..f6d6f6e1f94 100644 --- a/python/sglang/srt/models/commandr.py +++ b/python/sglang/srt/models/commandr.py @@ -64,7 +64,6 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -327,7 +326,6 @@ def __init__( self.config = config self.quant_config = quant_config self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() self.model = CohereModel(config, quant_config) @torch.no_grad() @@ -342,11 +340,9 @@ def forward( positions, input_metadata, ) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/dbrx.py b/python/sglang/srt/models/dbrx.py index b3a76b56ae2..39ac4aefa72 100644 --- a/python/sglang/srt/models/dbrx.py +++ b/python/sglang/srt/models/dbrx.py @@ -45,7 +45,6 @@ from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -383,7 +382,6 @@ def __init__( padding_size=DEFAULT_VOCAB_PADDING_SIZE, ) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -393,11 +391,9 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): expert_params_mapping = [ diff --git a/python/sglang/srt/models/deepseek.py b/python/sglang/srt/models/deepseek.py index b939602c1ba..59fd1ec7ed8 100644 --- a/python/sglang/srt/models/deepseek.py +++ b/python/sglang/srt/models/deepseek.py @@ -46,7 +46,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -386,7 +385,6 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -396,11 +394,9 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 15ecf4bb66b..13dd477392e 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -45,7 +45,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -633,7 +632,6 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() def forward( self, @@ -642,11 +640,9 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/gemma.py b/python/sglang/srt/models/gemma.py index 61cc5c66ea5..990937f5180 100644 --- a/python/sglang/srt/models/gemma.py +++ b/python/sglang/srt/models/gemma.py @@ -37,7 +37,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -288,7 +287,6 @@ def __init__( self.quant_config = quant_config self.model = GemmaModel(config, quant_config=quant_config) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -299,11 +297,9 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return (sample_output, logits_output) def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/gemma2.py b/python/sglang/srt/models/gemma2.py index fabf86b498e..c6dbc7e5569 100644 --- a/python/sglang/srt/models/gemma2.py +++ b/python/sglang/srt/models/gemma2.py @@ -41,7 +41,6 @@ from sglang.srt.layers.activation import GeluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -397,7 +396,6 @@ def __init__( self.quant_config = quant_config self.model = Gemma2Model(config, cache_config, quant_config) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -408,11 +406,9 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def get_attention_sliding_window_size(self): return get_attention_sliding_window_size(self.config) diff --git a/python/sglang/srt/models/gpt_bigcode.py b/python/sglang/srt/models/gpt_bigcode.py index dc828f0142e..94b7f6153cf 100644 --- a/python/sglang/srt/models/gpt_bigcode.py +++ b/python/sglang/srt/models/gpt_bigcode.py @@ -35,7 +35,6 @@ from sglang.srt.layers.activation import get_act_fn from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -262,7 +261,6 @@ def __init__( if lora_config: self.unpadded_vocab_size += lora_config.lora_extra_vocab_size self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -272,11 +270,9 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) diff --git a/python/sglang/srt/models/grok.py b/python/sglang/srt/models/grok.py index 85a89ca3edc..4a0a08bf88b 100644 --- a/python/sglang/srt/models/grok.py +++ b/python/sglang/srt/models/grok.py @@ -46,7 +46,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -298,7 +297,6 @@ def __init__( self.model = Grok1Model(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() # Monkey patch _prepare_weights to load pre-sharded weights setattr(DefaultModelLoader, "_prepare_weights", _prepare_presharded_weights) @@ -315,11 +313,9 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/internlm2.py b/python/sglang/srt/models/internlm2.py index c0e4d19e128..f2947e991b5 100644 --- a/python/sglang/srt/models/internlm2.py +++ b/python/sglang/srt/models/internlm2.py @@ -40,7 +40,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -263,7 +262,6 @@ def __init__( self.model = InternLM2Model(config, quant_config) self.output = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -274,11 +272,9 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.output.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py index 42e96123035..9de8d33c5c1 100644 --- a/python/sglang/srt/models/llama2.py +++ b/python/sglang/srt/models/llama2.py @@ -39,9 +39,8 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm -from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput +from sglang.srt.layers.logits_processor import LogitProcessorOutput, LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -303,7 +302,6 @@ def __init__( self.model = LlamaModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -312,13 +310,11 @@ def forward( positions: torch.Tensor, input_metadata: InputMetadata, input_embeds: torch.Tensor = None, - ) -> LogitsProcessorOutput: + ) -> LogitProcessorOutput: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def get_module_name(self, name): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/llama_classification.py b/python/sglang/srt/models/llama_classification.py index fdf6d28e556..02224971d6a 100644 --- a/python/sglang/srt/models/llama_classification.py +++ b/python/sglang/srt/models/llama_classification.py @@ -24,7 +24,7 @@ from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from sglang.srt.layers.logits_processor import LogitsProcessorOutput +from sglang.srt.layers.logits_processor import LogitProcessorOutput from sglang.srt.model_executor.forward_batch_info import InputMetadata from sglang.srt.models.llama2 import LlamaModel @@ -65,7 +65,7 @@ def forward( (input_metadata.batch_size, self.config.classification_out_size) ).to(input_ids.device) - return LogitsProcessorOutput( + return LogitProcessorOutput( next_token_logits=scores, next_token_logprobs=scores, normalized_prompt_logprobs=scores, diff --git a/python/sglang/srt/models/minicpm.py b/python/sglang/srt/models/minicpm.py index 0028ae67a8c..49ff1926f39 100644 --- a/python/sglang/srt/models/minicpm.py +++ b/python/sglang/srt/models/minicpm.py @@ -39,7 +39,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -298,7 +297,6 @@ def __init__( self.scale_width = self.config.hidden_size / self.config.dim_model_base self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -316,11 +314,9 @@ def forward( lm_head_weight = self.model.embed_tokens.weight else: lm_head_weight = self.lm_head.weight - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, lm_head_weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py index ca38cb03bae..d11f6c95198 100644 --- a/python/sglang/srt/models/mixtral.py +++ b/python/sglang/srt/models/mixtral.py @@ -41,7 +41,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -300,7 +299,6 @@ def __init__( self.model = MixtralModel(config, quant_config=quant_config, prefix="model") self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() def forward( self, @@ -310,11 +308,9 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/mixtral_quant.py b/python/sglang/srt/models/mixtral_quant.py index 97ac09ee629..b02e925c5a0 100644 --- a/python/sglang/srt/models/mixtral_quant.py +++ b/python/sglang/srt/models/mixtral_quant.py @@ -45,7 +45,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -334,7 +333,6 @@ def __init__( self.model = MixtralModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -345,11 +343,9 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/qwen.py b/python/sglang/srt/models/qwen.py index 4958a812985..93dae9585c3 100644 --- a/python/sglang/srt/models/qwen.py +++ b/python/sglang/srt/models/qwen.py @@ -39,7 +39,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -252,7 +251,6 @@ def __init__( vocab_size = ((config.vocab_size + 63) // 64) * 64 self.lm_head = ParallelLMHead(vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -262,11 +260,10 @@ def forward( input_metadata: InputMetadata, ): hidden_states = self.transformer(input_ids, positions, input_metadata) - logits_output = self.logits_processor( + next_tokens = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output + return next_tokens def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py index 76094b907a7..fcf083e1b5d 100644 --- a/python/sglang/srt/models/qwen2.py +++ b/python/sglang/srt/models/qwen2.py @@ -38,9 +38,8 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor -from sglang.srt.layers.pooler import Pooler, PoolingType +from sglang.srt.layers.pooler import EmbeddingPoolerOutput, Pooler, PoolingType from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata Qwen2Config = None @@ -277,7 +276,6 @@ def __init__( self.model = Qwen2Model(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True) @torch.no_grad() @@ -291,11 +289,9 @@ def forward( ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) if not get_embedding: - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output else: return self.pooler(hidden_states, input_metadata) diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py index e08695bc61a..9bdbd750660 100644 --- a/python/sglang/srt/models/qwen2_moe.py +++ b/python/sglang/srt/models/qwen2_moe.py @@ -35,8 +35,10 @@ ReplicatedLinear, RowParallelLinear, ) +from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope +from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, @@ -47,7 +49,6 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -365,7 +366,6 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -376,11 +376,20 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output + + def compute_logits( + self, + input_ids: torch.Tensor, + hidden_states: torch.Tensor, + input_metadata: InputMetadata, + ) -> torch.Tensor: + logits = self.logits_processor( + input_ids, hidden_states, self.lm_head.weight, input_metadata + ) + return logits def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/stablelm.py b/python/sglang/srt/models/stablelm.py index a3102baabd4..9e10f12f2a2 100644 --- a/python/sglang/srt/models/stablelm.py +++ b/python/sglang/srt/models/stablelm.py @@ -40,7 +40,6 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention -from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -250,7 +249,6 @@ def __init__( self.model = StableLMEpochModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) - self.sampler = Sampler() @torch.no_grad() def forward( @@ -261,11 +259,9 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - logits_output = self.logits_processor( + return self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - sample_output = self.sampler(logits_output, input_metadata.sampling_info) - return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py index 7843f4bd32d..bc70a9018ed 100644 --- a/python/sglang/srt/sampling/sampling_batch_info.py +++ b/python/sglang/srt/sampling/sampling_batch_info.py @@ -21,63 +21,10 @@ class SamplingBatchInfo: top_ps: torch.Tensor = None top_ks: torch.Tensor = None min_ps: torch.Tensor = None - - # Dispatch in CUDA graph - need_min_p_sampling: bool = False - - # Bias Tensors + penalizer_orchestrator: penaltylib.BatchedPenalizerOrchestrator = None logit_bias: torch.Tensor = None vocab_mask: torch.Tensor = None - # Penalizer - penalizer_orchestrator: penaltylib.BatchedPenalizerOrchestrator = None - linear_penalties: torch.Tensor = None - scaling_penalties: torch.Tensor = None - - def has_bias(self): - return ( - self.logit_bias is not None - or self.vocab_mask is not None - or self.linear_penalties is not None - or self.scaling_penalties is not None - ) - - @classmethod - def dummy_one(cls, max_bs: int, vocab_size: int): - ret = cls(vocab_size=vocab_size) - ret.temperatures = torch.ones((max_bs, 1), dtype=torch.float, device="cuda") - ret.top_ps = torch.ones((max_bs,), dtype=torch.float, device="cuda") - ret.top_ks = torch.ones((max_bs,), dtype=torch.int, device="cuda") - ret.min_ps = torch.zeros((max_bs,), dtype=torch.float, device="cuda") - return ret - - def __getitem__(self, key): - if isinstance(key, slice): - # NOTE: We do not use cuda graph when there is bias tensors - assert not self.has_bias() - return SamplingBatchInfo( - vocab_size=self.vocab_size, - temperatures=self.temperatures[key], - top_ps=self.top_ps[key], - top_ks=self.top_ks[key], - min_ps=self.min_ps[key], - need_min_p_sampling=self.need_min_p_sampling, - ) - else: - raise NotImplementedError - - def inplace_assign(self, bs: int, other: SamplingBatchInfo): - # NOTE: We do not use cuda graph when there is bias tensors - assert not self.has_bias() - - self.vocab_size = other.vocab_size - self.need_min_p_sampling = other.need_min_p_sampling - - self.temperatures[:bs] = other.temperatures - self.top_ps[:bs] = other.top_ps - self.top_ks[:bs] = other.top_ks - self.min_ps[:bs] = other.min_ps - @classmethod def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): device = "cuda" @@ -98,7 +45,6 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): ret.min_ps = torch.tensor( [r.sampling_params.min_p for r in reqs], dtype=torch.float, device=device ) - ret.need_min_p_sampling = any(r.sampling_params.min_p > 0 for r in reqs) # Each penalizers will do nothing if they evaluate themselves as not required by looking at # the sampling_params of the requests (See {_is_required()} of each penalizers). So this @@ -126,25 +72,6 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): return ret - def prepare_penalties(self): - self.scaling_penalties = None - self.linear_penalties = None - - for penalizer in self.penalizer_orchestrator.penalizers.values(): - if isinstance(penalizer, penaltylib.BatchedRepetitionPenalizer): - if penalizer.is_prepared(): - self.scaling_penalties = penalizer.cumulated_repetition_penalties - else: - if penalizer.is_prepared(): - if self.linear_penalties is None: - bs = self.penalizer_orchestrator.batch.batch_size() - self.linear_penalties = torch.zeros( - (bs, self.vocab_size), - dtype=torch.float32, - device="cuda", - ) - self.linear_penalties = penalizer.apply(self.linear_penalties) - def update_regex_vocab_mask(self, batch: ScheduleBatch): bs, reqs = batch.batch_size(), batch.reqs device = "cuda" diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 2d3b0aefa33..37ed2cf9adc 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -180,7 +180,7 @@ def __init__( tp_size=tp_size, dtype=get_dtype_str(torch_dtype), port=port, - mem_fraction_static=0.69, + mem_fraction_static=0.7, trust_remote_code=False, is_embedding=not self.is_generation, ) diff --git a/python/sglang/version.py b/python/sglang/version.py index f3291e93b7d..839b265519b 100644 --- a/python/sglang/version.py +++ b/python/sglang/version.py @@ -1 +1 @@ -__version__ = "0.2.14" +__version__ = "0.2.14.post1" From 6c498313942b32e548dd0b499f279db0abc5b085 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 28 Aug 2024 04:20:54 -0700 Subject: [PATCH 39/88] Add sglang.bench_latency to CI (#1243) --- .github/workflows/e2e-test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 7f555110d9d..11c94775c15 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -38,6 +38,11 @@ jobs: cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default + - name: Benchmark Serving Latency + timeout-minutes: 10 + run: | + python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8 + - name: Benchmark Serving Throughput (w/o RadixAttention) timeout-minutes: 10 run: | From 66975360e7575a5f573cdaf5c6892d81afc3ed19 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 28 Aug 2024 22:12:36 +1000 Subject: [PATCH 40/88] fix: increase max_new_tokens when testing generation models (#1244) --- python/sglang/test/runners.py | 2 +- test/srt/models/test_generation_models.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 37ed2cf9adc..e69d699a7d3 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -30,7 +30,7 @@ # the output of gemma-2-2b from SRT is unstable on the commented prompt # "The capital of France is", "Apple is red. Banana is Yellow. " * 800 + "Apple is", - "The capital of the United Kindom is", + "The capital of the United Kingdom is", "Today is a sunny day and I like", "AI is a field of computer science focused on", ] diff --git a/test/srt/models/test_generation_models.py b/test/srt/models/test_generation_models.py index b953ccf5d6a..e38584741e0 100644 --- a/test/srt/models/test_generation_models.py +++ b/test/srt/models/test_generation_models.py @@ -62,7 +62,6 @@ def calculate_rouge_l(output_strs_list1, output_strs_list2): class TestGenerationModels(unittest.TestCase): - def assert_close_prefill_logits_and_output_strs( self, prompts, @@ -99,14 +98,15 @@ def assert_close_prefill_logits_and_output_strs( abs(hf_logprobs - srt_logprobs) < prefill_tolerance ), "prefill logprobs are not all close" - print(hf_outputs.output_strs) - print(srt_outputs.output_strs) + print(f"hf_outputs.output_strs={hf_outputs.output_strs}") + print(f"srt_outputs.output_strs={srt_outputs.output_strs}") rouge_l_scores = calculate_rouge_l( hf_outputs.output_strs, srt_outputs.output_strs ) + print(f"rouge_l_scores={rouge_l_scores}") assert all( score >= rouge_threshold for score in rouge_l_scores - ), f"Not all ROUGE-L scores are greater than {rouge_threshold}" + ), f"Not all ROUGE-L scores are greater than rouge_threshold={rouge_threshold}" def test_prefill_logits_and_output_strs(self): for ( @@ -117,7 +117,7 @@ def test_prefill_logits_and_output_strs(self): rouge_threshold, ) in MODELS: for torch_dtype in TORCH_DTYPES: - max_new_tokens = 8 + max_new_tokens = 32 self.assert_close_prefill_logits_and_output_strs( DEFAULT_PROMPTS, model, From b1a540ec42cdd7b2875ce4b84587c522458bc065 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 28 Aug 2024 22:47:34 +1000 Subject: [PATCH 41/88] feat: update GemmaRMSNorm (#1232) --- python/sglang/srt/layers/layernorm.py | 48 +++++++++++++++++++++++- python/sglang/srt/models/gemma2.py | 52 +------------------------- python/sglang/test/test_layernorm.py | 54 ++++++++++++++++++++++++++- 3 files changed, 101 insertions(+), 53 deletions(-) diff --git a/python/sglang/srt/layers/layernorm.py b/python/sglang/srt/layers/layernorm.py index ac4d368d3f6..4c24f50ffe4 100644 --- a/python/sglang/srt/layers/layernorm.py +++ b/python/sglang/srt/layers/layernorm.py @@ -19,7 +19,12 @@ import torch import torch.nn as nn -from flashinfer.norm import fused_add_rmsnorm, rmsnorm +from flashinfer.norm import ( + fused_add_rmsnorm, + gemma_fused_add_rmsnorm, + gemma_rmsnorm, + rmsnorm, +) from vllm.model_executor.custom_op import CustomOp @@ -63,3 +68,44 @@ def forward_native( return x else: return x, residual + + +class GemmaRMSNorm(CustomOp): + def __init__( + self, + hidden_size: int, + eps: float = 1e-6, + ) -> None: + super().__init__() + self.weight = nn.Parameter(torch.zeros(hidden_size)) + self.variance_epsilon = eps + + def forward_native( + self, + x: torch.Tensor, + residual: Optional[torch.Tensor] = None, + ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + orig_dtype = x.dtype + if residual is not None: + x = x + residual + residual = x + + x = x.float() + variance = x.pow(2).mean(dim=-1, keepdim=True) + x = x * torch.rsqrt(variance + self.variance_epsilon) + x = x * (1.0 + self.weight.float()) + x = x.to(orig_dtype) + return x if residual is None else (x, residual) + + def forward_cuda( + self, + x: torch.Tensor, + residual: Optional[torch.Tensor] = None, + ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + if residual is not None: + gemma_fused_add_rmsnorm( + x, residual, self.weight.data, self.variance_epsilon + ) + return x, residual + out = gemma_rmsnorm(x, self.weight.data, self.variance_epsilon) + return out diff --git a/python/sglang/srt/models/gemma2.py b/python/sglang/srt/models/gemma2.py index c6dbc7e5569..3223424d79c 100644 --- a/python/sglang/srt/models/gemma2.py +++ b/python/sglang/srt/models/gemma2.py @@ -22,11 +22,6 @@ from transformers import PretrainedConfig from vllm.config import CacheConfig, LoRAConfig from vllm.distributed import get_tensor_model_parallel_world_size - -# FIXME: temporary solution, remove after next vllm release -from vllm.model_executor.custom_op import CustomOp - -# from vllm.model_executor.layers.layernorm import GemmaRMSNorm from vllm.model_executor.layers.linear import ( MergedColumnParallelLinear, QKVParallelLinear, @@ -39,6 +34,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from sglang.srt.layers.activation import GeluAndMul +from sglang.srt.layers.layernorm import GemmaRMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -50,52 +46,6 @@ def get_attention_sliding_window_size(config): return config.sliding_window - 1 -class GemmaRMSNorm(CustomOp): - """RMS normalization for Gemma. - - Two differences from the above RMSNorm: - 1. x * (1 + w) instead of x * w. - 2. (x * w).to(orig_dtype) instead of x.to(orig_dtype) * w. - """ - - def __init__( - self, - hidden_size: int, - eps: float = 1e-6, - ) -> None: - super().__init__() - self.weight = nn.Parameter(torch.zeros(hidden_size)) - self.variance_epsilon = eps - - def forward_native( - self, - x: torch.Tensor, - residual: Optional[torch.Tensor] = None, - ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - """PyTorch-native implementation equivalent to forward().""" - orig_dtype = x.dtype - if residual is not None: - x = x + residual - residual = x - - x = x.float() - variance = x.pow(2).mean(dim=-1, keepdim=True) - x = x * torch.rsqrt(variance + self.variance_epsilon) - # Llama does x.to(float16) * w whilst Gemma is (x * w).to(float16) - # See https://github.com/huggingface/transformers/pull/29402 - x = x * (1.0 + self.weight.float()) - x = x.to(orig_dtype) - return x if residual is None else (x, residual) - - def forward_cuda( - self, - x: torch.Tensor, - residual: Optional[torch.Tensor] = None, - ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - # from vLLM: TODO(woosuk): Implement an optimized kernel for GemmaRMSNorm. - return self.forward_native(x, residual) - - # FIXME: temporary solution, remove after next vllm release from vllm.model_executor.layers.rotary_embedding import RotaryEmbedding diff --git a/python/sglang/test/test_layernorm.py b/python/sglang/test/test_layernorm.py index ab61aa80405..770e69733db 100644 --- a/python/sglang/test/test_layernorm.py +++ b/python/sglang/test/test_layernorm.py @@ -3,7 +3,7 @@ import torch -from sglang.srt.layers.layernorm import RMSNorm +from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm class TestRMSNorm(unittest.TestCase): @@ -56,5 +56,57 @@ def test_rms_norm(self): self._run_rms_norm_test(*params) +class TestGemmaRMSNorm(unittest.TestCase): + DTYPES = [torch.half, torch.bfloat16] + NUM_TOKENS = [7, 83, 4096] + HIDDEN_SIZES = [768, 769, 770, 771, 5120, 5124, 5125, 5126, 8192, 8199] + ADD_RESIDUAL = [False, True] + SEEDS = [0] + + @classmethod + def setUpClass(cls): + if not torch.cuda.is_available(): + raise unittest.SkipTest("CUDA is not available") + torch.set_default_device("cuda") + + def _run_gemma_rms_norm_test( + self, num_tokens, hidden_size, add_residual, dtype, seed + ): + torch.manual_seed(seed) + + layer = GemmaRMSNorm(hidden_size).to(dtype=dtype) + layer.weight.data.normal_(mean=1.0, std=0.1) + scale = 1 / (2 * hidden_size) + x = torch.randn(num_tokens, hidden_size, dtype=dtype) * scale + residual = torch.randn_like(x) * scale if add_residual else None + + with torch.inference_mode(): + ref_out = layer.forward_native(x, residual) + out = layer(x, residual) + + if add_residual: + self.assertTrue(torch.allclose(out[0], ref_out[0], atol=1e-3, rtol=1e-3)) + self.assertTrue(torch.allclose(out[1], ref_out[1], atol=1e-3, rtol=1e-3)) + else: + self.assertTrue(torch.allclose(out, ref_out, atol=1e-3, rtol=1e-3)) + + def test_gemma_rms_norm(self): + for params in itertools.product( + self.NUM_TOKENS, + self.HIDDEN_SIZES, + self.ADD_RESIDUAL, + self.DTYPES, + self.SEEDS, + ): + with self.subTest( + num_tokens=params[0], + hidden_size=params[1], + add_residual=params[2], + dtype=params[3], + seed=params[4], + ): + self._run_gemma_rms_norm_test(*params) + + if __name__ == "__main__": unittest.main(verbosity=2) From bf53bf5142bd3393d495608e58c86f6d8c991664 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 28 Aug 2024 06:33:05 -0700 Subject: [PATCH 42/88] [Fix] Fix llava on multi images (#1247) --- README.md | 2 +- .../usage/llava_video/srt_example_llava_v.py | 13 +- python/sglang/launch_server_llavavid.py | 26 +++ python/sglang/srt/hf_transformers_utils.py | 149 ------------------ python/sglang/srt/managers/io_struct.py | 9 +- python/sglang/srt/managers/schedule_batch.py | 10 +- .../sglang/srt/managers/tokenizer_manager.py | 135 +++++++++------- python/sglang/srt/managers/tp_worker.py | 19 ++- .../srt/model_executor/forward_batch_info.py | 30 ++-- .../sglang/srt/model_executor/model_runner.py | 21 ++- python/sglang/srt/models/chatglm.py | 2 +- python/sglang/srt/models/grok.py | 12 +- python/sglang/srt/models/llama2.py | 7 +- .../sglang/srt/models/llama_classification.py | 4 - python/sglang/srt/models/llama_embedding.py | 7 +- python/sglang/srt/models/llava.py | 111 +++++-------- python/sglang/srt/models/llavavid.py | 126 +++++---------- python/sglang/srt/models/qwen2.py | 7 +- python/sglang/srt/models/yivl.py | 9 +- python/sglang/srt/server.py | 8 +- python/sglang/srt/utils.py | 51 +++--- test/srt/test_vision_openai_server.py | 2 - 22 files changed, 272 insertions(+), 488 deletions(-) create mode 100644 python/sglang/launch_server_llavavid.py diff --git a/README.md b/README.md index 223f9624f6e..9d795ce4382 100644 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct - Qwen / Qwen 2 / Qwen 2 MoE - DeepSeek / DeepSeek 2 - [LLaVA-OneVision](https://llava-vl.github.io/blog/2024-08-05-llava-onevision/) - - `python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384` + - `python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --tp-size=8 --chat-template=chatml-llava` - Query the server with the [OpenAI Vision API](https://platform.openai.com/docs/guides/vision). See examples at [test/srt/test_vision_openai_server.py](test/srt/test_vision_openai_server.py) - LLaVA 1.5 / 1.6 / NeXT - `python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --port=30000 --tp-size=1 --chat-template=llava_llama_3` diff --git a/examples/frontend_language/usage/llava_video/srt_example_llava_v.py b/examples/frontend_language/usage/llava_video/srt_example_llava_v.py index 085bcea5a23..1f2931a5a4f 100644 --- a/examples/frontend_language/usage/llava_video/srt_example_llava_v.py +++ b/examples/frontend_language/usage/llava_video/srt_example_llava_v.py @@ -184,13 +184,9 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size= # Parse the arguments args = parser.parse_args() - cur_port = args.port - cur_chunk = args.chunk_idx - num_chunks = args.num_chunks - num_frames = args.num_frames if "34b" in args.model_path.lower(): @@ -202,7 +198,6 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size= exit() model_overide_args = {} - model_overide_args["mm_spatial_pool_stride"] = args.mm_spatial_pool_stride model_overide_args["architectures"] = ["LlavaVidForCausalLM"] model_overide_args["num_frames"] = args.num_frames @@ -235,7 +230,6 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size= print(f"chat template: {runtime.endpoint.chat_template.name}") # Run a single request - # try: print("\n========== single ==========\n") root = args.video_dir if os.path.isfile(root): @@ -257,13 +251,10 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size= ) # Calculate the average processing time print(f"Average processing time per video: {average_time:.2f} seconds") runtime.shutdown() - # except Exception as e: - # print(e) - runtime.shutdown() - # # # Run a batch of requests + # # Run a batch of requests # print("\n========== batch ==========\n") # if not os.path.exists(args.save_dir): # os.makedirs(args.save_dir) - # batch(args.video_dir,args.save_dir,cur_chunk, num_chunks, num_frames, num_chunks) + # batch(args.video_dir, args.save_dir, cur_chunk, num_chunks, num_frames, num_chunks) # runtime.shutdown() diff --git a/python/sglang/launch_server_llavavid.py b/python/sglang/launch_server_llavavid.py new file mode 100644 index 00000000000..797ad07a47f --- /dev/null +++ b/python/sglang/launch_server_llavavid.py @@ -0,0 +1,26 @@ +"""Launch the inference server for Llava-video model.""" + +import argparse + +from sglang.srt.server import ServerArgs, launch_server + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + ServerArgs.add_cli_args(parser) + args = parser.parse_args() + server_args = ServerArgs.from_cli_args(args) + + model_overide_args = {} + model_overide_args["mm_spatial_pool_stride"] = 2 + model_overide_args["architectures"] = ["LlavaVidForCausalLM"] + model_overide_args["num_frames"] = 16 + model_overide_args["model_type"] = "llavavid" + if model_overide_args["num_frames"] == 32: + model_overide_args["rope_scaling"] = {"factor": 2.0, "type": "linear"} + model_overide_args["max_sequence_length"] = 4096 * 2 + model_overide_args["tokenizer_model_max_length"] = 4096 * 2 + model_overide_args["model_max_length"] = 4096 * 2 + if "34b" in args.model_path.lower(): + model_overide_args["image_token_index"] = 64002 + + launch_server(server_args, model_overide_args, None) diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py index 4f6e3d07157..2be4169140a 100644 --- a/python/sglang/srt/hf_transformers_utils.py +++ b/python/sglang/srt/hf_transformers_utils.py @@ -119,24 +119,7 @@ def get_tokenizer( tokenizer_revision: Optional[str] = None, **kwargs, ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: - if tokenizer_name.endswith(".json"): - return TiktokenTokenizer(tokenizer_name) - - if tokenizer_name.endswith(".model"): - return SentencePieceTokenizer(tokenizer_name) - """Gets a tokenizer for the given model name via Huggingface.""" - if is_multimodal_model(tokenizer_name): - processor = get_processor( - tokenizer_name, - *args, - trust_remote_code=trust_remote_code, - tokenizer_revision=tokenizer_revision, - **kwargs, - ) - tokenizer = processor.tokenizer - return tokenizer - if tokenizer_mode == "slow": if kwargs.get("use_fast", False): raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.") @@ -199,135 +182,3 @@ def get_processor( **kwargs, ) return processor - - -class TiktokenTokenizer: - def __init__(self, tokenizer_path): - import tiktoken - from jinja2 import Template - - PAT_STR_B = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""" - - # Read JSON - name = "tmp-json" - with open(tokenizer_path, "rb") as fin: - tok_dict = json.load(fin) - - mergeable_ranks = { - bytes(item["bytes"]): item["token"] for item in tok_dict["regular_tokens"] - } - special_tokens = { - bytes(item["bytes"]).decode(): item["token"] - for item in tok_dict["special_tokens"] - } - assert tok_dict["word_split"] == "V1" - - default_allowed_special = None - - kwargs = { - "name": name, - "pat_str": tok_dict.get("pat_str", PAT_STR_B), - "mergeable_ranks": mergeable_ranks, - "special_tokens": special_tokens, - } - if "default_allowed_special" in tok_dict: - default_allowed_special = set( - [ - bytes(bytes_list).decode() - for bytes_list in tok_dict["default_allowed_special"] - ] - ) - if "vocab_size" in tok_dict: - kwargs["explicit_n_vocab"] = tok_dict["vocab_size"] - - PAD = "<|pad|>" - EOS = "<|eos|>" - SEP = "<|separator|>" - - DEFAULT_CONTROL_TOKENS = {"pad": PAD, "sep": EOS, "eos": SEP} - - tokenizer = tiktoken.Encoding(**kwargs) - tokenizer._default_allowed_special = default_allowed_special or set() - tokenizer._control_tokens = DEFAULT_CONTROL_TOKENS - - def encode_patched( - self, - text: str, - *, - allowed_special: Union[ - Literal["all"], AbstractSet[str] - ] = set(), # noqa: B006 - disallowed_special: Union[Literal["all"], Collection[str]] = "all", - ) -> List[int]: - if isinstance(allowed_special, set): - allowed_special |= self._default_allowed_special - return tiktoken.Encoding.encode( - self, - text, - allowed_special=allowed_special, - disallowed_special=(), - ) - - tokenizer.encode = functools.partial(encode_patched, tokenizer) - - # Convert to HF interface - self.tokenizer = tokenizer - self.eos_token_id = tokenizer._special_tokens[EOS] - self.vocab_size = tokenizer.n_vocab - self.chat_template = Template( - "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'].strip() + '<|separator|>\n\n' }}{% elif message['role'] == 'system' %}{{ 'System: ' + message['content'].strip() + '<|separator|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<|separator|>\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}" - ) - - def encode(self, x, add_special_tokens=False): - return self.tokenizer.encode(x) - - def decode(self, x): - return self.tokenizer.decode(x) - - def batch_decode( - self, batch, skip_special_tokens=True, spaces_between_special_tokens=False - ): - if isinstance(batch[0], int): - batch = [[x] for x in batch] - return self.tokenizer.decode_batch(batch) - - def apply_chat_template(self, messages, tokenize, add_generation_prompt): - ret = self.chat_template.render( - messages=messages, add_generation_prompt=add_generation_prompt - ) - return self.encode(ret) if tokenize else ret - - -class SentencePieceTokenizer: - def __init__(self, tokenizer_path): - import sentencepiece as spm - from jinja2 import Template - - tokenizer = spm.SentencePieceProcessor(model_file=tokenizer_path) - - # Convert to HF interface - self.tokenizer = tokenizer - self.eos_token_id = tokenizer.eos_id() - self.vocab_size = tokenizer.vocab_size() - self.chat_template = Template( - "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'].strip() + '<|separator|>\n\n' }}{% elif message['role'] == 'system' %}{{ 'System: ' + message['content'].strip() + '<|separator|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<|separator|>\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}" - ) - - def encode(self, x, add_special_tokens=False): - return self.tokenizer.encode(x) - - def decode(self, x): - return self.tokenizer.decode(x) - - def batch_decode( - self, batch, skip_special_tokens=True, spaces_between_special_tokens=False - ): - if isinstance(batch[0], int): - batch = [[x] for x in batch] - return self.tokenizer.decode(batch) - - def apply_chat_template(self, messages, tokenize, add_generation_prompt): - ret = self.chat_template.render( - messages=messages, add_generation_prompt=add_generation_prompt - ) - return self.encode(ret) if tokenize else ret diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index 56e3d8f7990..3f80c64cf9d 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -55,6 +55,7 @@ def post_init(self): self.text is not None and self.input_ids is not None ): raise ValueError("Either text or input_ids should be provided.") + if ( isinstance(self.sampling_params, dict) and self.sampling_params.get("n", 1) != 1 @@ -161,10 +162,10 @@ class TokenizedGenerateReqInput: input_ids: List[int] # The pixel values for input images pixel_values: List[float] - # The hash of input images - image_hash: int - # The image size - image_size: List[int] + # The hash values of input images + image_hashes: List[int] + # The image sizes + image_sizes: List[List[int]] # The sampling parameters sampling_params: SamplingParams # Whether to return the logprobs diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index f3af821e4ef..5554170a350 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -121,8 +121,8 @@ def __init__(self, rid, origin_input_text, origin_input_ids): # For vision input self.pixel_values = None - self.image_size = None - self.image_offset = None + self.image_sizes = None + self.image_offsets = None self.pad_value = None # Prefix info @@ -600,12 +600,12 @@ def check_for_jump_forward(self, model_runner): if req.pixel_values is not None: ( req.origin_input_ids, - req.image_offset, + req.image_offsets, ) = model_runner.model.pad_input_ids( req.origin_input_ids_unpadded, req.pad_value, - req.pixel_values.shape, - req.image_size, + req.pixel_values, + req.image_sizes, ) jump_forward_reqs.append(req) diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index c74251947bf..5ad4152ea93 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -23,6 +23,7 @@ import os from typing import Dict, List, Optional, Tuple, Union +import fastapi import numpy as np import transformers import uvloop @@ -96,21 +97,18 @@ def __init__( trust_remote_code=server_args.trust_remote_code, model_overide_args=model_overide_args, ) - self.is_generation = is_generation_model( self.hf_config.architectures, self.server_args.is_embedding ) - - if server_args.context_length is not None: - self.context_len = server_args.context_length - else: - self.context_len = get_context_length(self.hf_config) + self.context_len = server_args.context_length or get_context_length( + self.hf_config + ) # Create tokenizer if server_args.skip_tokenizer_init: self.tokenizer = self.processor = None else: - if is_multimodal_model(self.model_path): + if is_multimodal_model(self.hf_config.architectures): self.processor = get_processor( server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, @@ -118,6 +116,9 @@ def __init__( ) self.tokenizer = self.processor.tokenizer os.environ["TOKENIZERS_PARALLELISM"] = "false" + + # We want to parallelize the image pre-processing so we + # create an executor for it self.executor = concurrent.futures.ProcessPoolExecutor( initializer=init_global_processor, mp_context=mp.get_context("fork"), @@ -134,12 +135,14 @@ def __init__( self.to_create_loop = True self.rid_to_state: Dict[str, ReqState] = {} - # for update model weights + # For update model weights self.model_update_lock = asyncio.Lock() self.model_update_result = None async def generate_request( - self, obj: Union[GenerateReqInput, EmbeddingReqInput], request=None + self, + obj: Union[GenerateReqInput, EmbeddingReqInput], + request: Optional[fastapi.Request] = None, ): if self.to_create_loop: self.create_handle_loop() @@ -160,7 +163,7 @@ async def generate_request( async def _handle_single_request( self, obj: Union[GenerateReqInput, EmbeddingReqInput], - request, + request: Optional[fastapi.Request] = None, index: Optional[int] = None, is_cache_for_prefill: Optional[bool] = False, ): @@ -182,8 +185,8 @@ async def _handle_single_request( ) if self.is_generation: - pixel_values, image_hash, image_size = await self._get_pixel_values( - obj.image_data + pixel_values, image_hashes, image_sizes = await self._get_pixel_values( + obj.image_data if not_use_index else obj.image_data[index] ) return_logprob = ( obj.return_logprob if not_use_index else obj.return_logprob[index] @@ -195,7 +198,6 @@ async def _handle_single_request( ) if return_logprob and logprob_start_len == -1: logprob_start_len = len(input_ids) - 1 - top_logprobs_num = ( obj.top_logprobs_num if not_use_index @@ -238,13 +240,14 @@ async def _handle_single_request( sampling_params = SamplingParams(**obj.sampling_params[0]) sampling_params.max_new_tokens = 0 - pixel_values, image_hash, image_size = await self._get_pixel_values( + pixel_values, image_hashes, image_sizes = await self._get_pixel_values( obj.image_data[0] ) return_logprob = obj.return_logprob[0] logprob_start_len = obj.logprob_start_len[0] top_logprobs_num = obj.top_logprobs_num[0] + # Send to the controller if self.is_generation: if return_logprob and logprob_start_len == -1: logprob_start_len = len(input_ids) - 1 @@ -253,8 +256,8 @@ async def _handle_single_request( input_text, input_ids, pixel_values, - image_hash, - image_size, + image_hashes, + image_sizes, sampling_params, return_logprob, logprob_start_len, @@ -268,24 +271,24 @@ async def _handle_single_request( input_ids, sampling_params, ) - self.send_to_router.send_pyobj(tokenized_obj) + # Recv results event = asyncio.Event() state = ReqState([], False, event) self.rid_to_state[rid] = state if not is_cache_for_prefill: - async for response in self._wait_for_response( - event, state, obj, rid, request - ): + async for response in self._wait_for_response(state, obj, rid, request): yield response else: assert self.is_generation - await self._wait_for_cache_prefill_response(event, state, obj, rid, request) + await self._wait_for_cache_prefill_response(state, obj, rid, request) yield input_ids async def _handle_batch_request( - self, obj: Union[GenerateReqInput, EmbeddingReqInput], request + self, + obj: Union[GenerateReqInput, EmbeddingReqInput], + request: Optional[fastapi.Request] = None, ): batch_size = obj.batch_size if self.is_generation: @@ -340,8 +343,8 @@ async def _handle_batch_request( if self.is_generation: if obj.return_logprob[index] and obj.logprob_start_len[index] == -1: obj.logprob_start_len[index] = len(input_ids) - 1 - pixel_values, image_hash, image_size = await self._get_pixel_values( - obj.image_data[index] + pixel_values, image_hashes, image_sizes = ( + await self._get_pixel_values(obj.image_data[index]) ) tokenized_obj = TokenizedGenerateReqInput( @@ -349,8 +352,8 @@ async def _handle_batch_request( input_text, input_ids, pixel_values, - image_hash, - image_size, + image_hashes, + image_sizes, sampling_params, obj.return_logprob[index], obj.logprob_start_len[index], @@ -372,7 +375,6 @@ async def _handle_batch_request( generators.append( self._wait_for_response( - event, state, obj, rid, @@ -388,6 +390,7 @@ async def _handle_batch_request( tasks = [asyncio.create_task(gen.__anext__()) for gen in generators] output_list = [None] * len(tasks) + # Recv results while tasks: done, _ = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) @@ -426,25 +429,18 @@ def _get_sampling_params(self, sampling_params_data: dict): sampling_params.verify() return sampling_params - async def _get_pixel_values(self, image_data): - if image_data is None: - return None, None, None - else: - return await self._get_pixel_values_internal(image_data) - async def _wait_for_response( self, - event: asyncio.Event, state: ReqState, obj: Union[GenerateReqInput, EmbeddingReqInput], rid: str, - request, - index: int = None, + request: Optional[fastapi.Request] = None, + index: Optional[int] = None, response_index: int = 0, ): while True: try: - await asyncio.wait_for(event.wait(), timeout=4) + await asyncio.wait_for(state.event.wait(), timeout=4) except asyncio.TimeoutError: if request is not None and await request.is_disconnected(): for rid in [obj.rid] if obj.is_single else obj.rid: @@ -478,16 +474,15 @@ async def _wait_for_response( yield out break - event.clear() + state.event.clear() yield out async def _wait_for_cache_prefill_response( self, - event: asyncio.Event, state: ReqState, obj: GenerateReqInput, rid: str, - request, + request: Optional[fastapi.Request] = None, ): while True: try: @@ -514,7 +509,9 @@ def abort_request(self, rid: str): req = AbortReq(rid) self.send_to_router.send_pyobj(req) - async def update_weights(self, obj: UpdateWeightReqInput, request): + async def update_weights( + self, obj: UpdateWeightReqInput, request: Optional[fastapi.Request] = None + ): if self.to_create_loop: self.create_handle_loop() @@ -659,12 +656,11 @@ def detokenize_top_logprobs_tokens(self, top_logprobs, decode_to_text: bool): ) return top_logprobs - async def _get_pixel_values_internal(self, image_data, aspect_ratio=None): - aspect_ratio = ( - getattr(self.hf_config, "image_aspect_ratio", None) - if aspect_ratio is None - else aspect_ratio - ) + async def _get_pixel_values(self, image_data: List[Union[str, bytes]]): + if not image_data: + return None, None, None + + aspect_ratio = getattr(self.hf_config, "image_aspect_ratio", None) grid_pinpoints = ( self.hf_config.image_grid_pinpoints if hasattr(self.hf_config, "image_grid_pinpoints") @@ -673,35 +669,42 @@ async def _get_pixel_values_internal(self, image_data, aspect_ratio=None): ) if isinstance(image_data, list) and len(image_data) > 0: - pixel_values, image_hash, image_size = [], [], [] + # Multiple images if len(image_data) > 1: aspect_ratio = "pad" # LLaVA OneVision Handling: more than one image --> interleaved image mode or video mode. We do not use anyres + pixel_values, image_hashes, image_sizes = [], [], [] for img_data in image_data: pixel_v, image_h, image_s = await self._process_single_image( img_data, aspect_ratio, grid_pinpoints ) pixel_values.append(pixel_v) - image_hash.append(image_h) - image_size.append(image_s) - pixel_values = np.stack(pixel_values, axis=0) + image_hashes.append(image_h) + image_sizes.append(image_s) + + if isinstance(pixel_values[0], np.ndarray): + pixel_values = np.stack(pixel_values, axis=0) else: + # A single image pixel_values, image_hash, image_size = await self._process_single_image( image_data[0], aspect_ratio, grid_pinpoints ) - image_hash = [image_hash] - image_size = [image_size] + image_hashes = [image_hash] + image_sizes = [image_size] elif isinstance(image_data, str): + # A single image pixel_values, image_hash, image_size = await self._process_single_image( image_data, aspect_ratio, grid_pinpoints ) - image_hash = [image_hash] - image_size = [image_size] + image_hashes = [image_hash] + image_sizes = [image_size] else: - pixel_values, image_hash, image_size = None, None, None + raise ValueError(f"Invalid image data: {image_data}") - return pixel_values, image_hash, image_size + return pixel_values, image_hashes, image_sizes - async def _process_single_image(self, image_data, aspect_ratio, grid_pinpoints): + async def _process_single_image( + self, image_data: Union[bytes, str], aspect_ratio: str, grid_pinpoints: str + ): if self.executor is not None: loop = asyncio.get_event_loop() return await loop.run_in_executor( @@ -732,12 +735,16 @@ def init_global_processor(server_args: ServerArgs): def _process_single_image_task( - image_data, image_aspect_ratio=None, image_grid_pinpoints=None, processor=None + image_data: Union[str, bytes], + image_aspect_ratio: Optional[str] = None, + image_grid_pinpoints: Optional[str] = None, + processor=None, ): try: processor = processor or global_processor image, image_size = load_image(image_data) if image_size is not None: + # It is a video with multiple images image_hash = hash(image_data) pixel_values = processor.image_processor(image)["pixel_values"] for _ in range(len(pixel_values)): @@ -745,6 +752,7 @@ def _process_single_image_task( pixel_values = np.stack(pixel_values, axis=0) return pixel_values, image_hash, image_size else: + # It is an image image_hash = hash(image_data) if image_aspect_ratio == "pad": image = expand2square( @@ -754,13 +762,18 @@ def _process_single_image_task( pixel_values = processor.image_processor(image.convert("RGB"))[ "pixel_values" ][0] - elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio: + elif image_aspect_ratio == "anyres" or ( + image_aspect_ratio is not None and "anyres_max" in image_aspect_ratio + ): pixel_values = process_anyres_image( image, processor.image_processor, image_grid_pinpoints ) else: pixel_values = processor.image_processor(image)["pixel_values"][0] - pixel_values = pixel_values.astype(np.float16) + + if isinstance(pixel_values, np.ndarray): + pixel_values = pixel_values.astype(np.float16) + return pixel_values, image_hash, image.size except Exception: logger.error("Exception in TokenizerManager:\n" + get_exception_traceback()) diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 65daed43b28..cd1b580643c 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -108,7 +108,7 @@ def __init__( if server_args.skip_tokenizer_init: self.tokenizer = self.processor = None else: - if is_multimodal_model(server_args.model_path): + if is_multimodal_model(self.model_config.hf_config.architectures): self.processor = get_processor( server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, @@ -333,26 +333,24 @@ def handle_generate_request( if self.model_runner.is_generation: req.pixel_values = recv_req.pixel_values if req.pixel_values is not None: - image_hash = ( - hash(tuple(recv_req.image_hash)) - if isinstance(recv_req.image_hash, list) - else recv_req.image_hash - ) + # Use image hash as fake token_ids, which is then used + # for prefix matching + image_hash = hash(tuple(recv_req.image_hashes)) req.pad_value = [ (image_hash) % self.model_config.vocab_size, (image_hash >> 16) % self.model_config.vocab_size, (image_hash >> 32) % self.model_config.vocab_size, (image_hash >> 64) % self.model_config.vocab_size, ] - req.image_size = recv_req.image_size + req.image_sizes = recv_req.image_sizes ( req.origin_input_ids, - req.image_offset, + req.image_offsets, ) = self.model_runner.model.pad_input_ids( req.origin_input_ids_unpadded, req.pad_value, - req.pixel_values.shape, - req.image_size, + req.pixel_values, + req.image_sizes, ) req.return_logprob = recv_req.return_logprob req.logprob_start_len = recv_req.logprob_start_len @@ -368,6 +366,7 @@ def handle_generate_request( req.jump_forward_map = self.jump_forward_cache.query( computed_regex_string ) + # Init regex fsm elif req.sampling_params.regex is not None: req.regex_fsm = self.regex_fsm_cache.query(req.sampling_params.regex) diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index c107b3bc826..f24cdf6b723 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -16,7 +16,7 @@ """ModelRunner runs the forward passes of the models.""" from dataclasses import dataclass from enum import IntEnum, auto -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, List import numpy as np import torch @@ -58,6 +58,7 @@ class InputMetadata: # For extend extend_seq_lens: torch.Tensor = None + extend_prefix_lens: torch.Tensor = None extend_start_loc: torch.Tensor = None extend_no_prefix: bool = None @@ -69,8 +70,8 @@ class InputMetadata: # For multimodal pixel_values: List[torch.Tensor] = None - image_sizes: List[List[int]] = None - image_offsets: List[int] = None + image_sizes: List[List[List[int]]] = None + image_offsets: List[List[int]] = None # Trition attention backend triton_max_seq_len: int = 0 @@ -87,20 +88,8 @@ class InputMetadata: def init_multimuldal_info(self, batch: ScheduleBatch): reqs = batch.reqs self.pixel_values = [r.pixel_values for r in reqs] - self.image_sizes = [r.image_size for r in reqs] - self.image_offsets = [] - for r in reqs: - if isinstance(r.image_offset, list): - self.image_offsets.append( - [ - (image_offset - len(r.prefix_indices)) - for image_offset in r.image_offset - ] - ) - elif isinstance(r.image_offset, int): - self.image_offsets.append(r.image_offset - len(r.prefix_indices)) - elif r.image_offset is None: - self.image_offsets.append(0) + self.image_sizes = [r.image_sizes for r in reqs] + self.image_offsets = [r.image_offsets for r in reqs] def compute_positions(self, batch: ScheduleBatch): position_ids_offsets = batch.position_ids_offsets @@ -153,6 +142,7 @@ def compute_extend_infos(self, batch: ScheduleBatch): for i, r in enumerate(batch.reqs) ] self.extend_seq_lens = torch.tensor(extend_lens_cpu, device="cuda") + self.extend_prefix_lens = torch.tensor(batch.prefix_lens_cpu, device="cuda") self.extend_start_loc = torch.zeros_like(self.seq_lens) self.extend_start_loc[1:] = torch.cumsum(self.extend_seq_lens[:-1], dim=0) self.extend_no_prefix = all(l == 0 for l in batch.prefix_lens_cpu) @@ -238,10 +228,10 @@ def init_flashinfer_handlers( prefix_lens_cpu, flashinfer_use_ragged, ): - if self.forward_mode != ForwardMode.DECODE: - prefix_lens = torch.tensor(prefix_lens_cpu, device="cuda") - else: + if self.forward_mode == ForwardMode.DECODE: prefix_lens = None + else: + prefix_lens = self.extend_prefix_lens update_flashinfer_indices( self.forward_mode, diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index abee152d6fd..8ef47a530f5 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -50,7 +50,7 @@ MLATokenToKVPool, ReqToTokenPool, ) -from sglang.srt.model_config import AttentionArch +from sglang.srt.model_config import AttentionArch, ModelConfig from sglang.srt.model_executor.forward_batch_info import ForwardMode, InputMetadata from sglang.srt.server_args import ServerArgs from sglang.srt.utils import ( @@ -69,7 +69,7 @@ class ModelRunner: def __init__( self, - model_config, + model_config: ModelConfig, mem_fraction_static: float, gpu_id: int, tp_rank: int, @@ -85,7 +85,9 @@ def __init__( self.tp_size = tp_size self.nccl_port = nccl_port self.server_args = server_args - self.is_multimodal_model = is_multimodal_model(self.model_config) + self.is_multimodal_model = is_multimodal_model( + self.model_config.hf_config.architectures + ) global_server_args_dict.update( { "disable_flashinfer": server_args.disable_flashinfer, @@ -95,6 +97,13 @@ def __init__( } ) + if self.is_multimodal_model: + logger.info( + "Automatically turn off --chunked-prefill-size and adjust --mem-fraction-static for multimodal models." + ) + server_args.chunked_prefill_size = None + server_args.mem_fraction_static *= 0.95 + min_per_gpu_memory = self.init_torch_distributed() self.load_model() self.init_memory_pool( @@ -507,9 +516,9 @@ def init_cuda_graphs(self): raise Exception( f"Capture cuda graph failed: {e}\n" "Possible solutions:\n" - "1. disable torch compile by not using --enable-torch-compile\n" - "2. disable cuda graph by --disable-cuda-graph\n" - "3. set --mem-fraction-static to a smaller value\n" + "1. disable cuda graph by --disable-cuda-graph\n" + "2. set --mem-fraction-static to a smaller value\n" + "3. disable torch compile by not using --enable-torch-compile\n" "Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n" ) diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py index 0a22f994bb4..b38b62fafd3 100644 --- a/python/sglang/srt/models/chatglm.py +++ b/python/sglang/srt/models/chatglm.py @@ -17,7 +17,7 @@ # Adapted from # https://github.com/THUDM/ChatGLM2-6B """Inference-only ChatGLM model compatible with THUDM weights.""" -from typing import Iterable, List, Optional, Tuple +from typing import Iterable, Optional, Tuple import torch from torch import nn diff --git a/python/sglang/srt/models/grok.py b/python/sglang/srt/models/grok.py index 4a0a08bf88b..daf6f25da13 100644 --- a/python/sglang/srt/models/grok.py +++ b/python/sglang/srt/models/grok.py @@ -273,9 +273,9 @@ def forward( ) -> torch.Tensor: if input_embeds is None: hidden_states = self.embed_tokens(input_ids) + hidden_states.mul_(self.config.embedding_multiplier_scale) else: hidden_states = input_embeds - hidden_states.mul_(self.config.embedding_multiplier_scale) for i in range(len(self.layers)): hidden_states = self.layers[i](positions, hidden_states, input_metadata) @@ -284,7 +284,7 @@ def forward( return hidden_states -class Grok1ModelForCausalLM(nn.Module): +class Grok1ForCausalLM(nn.Module): def __init__( self, config: PretrainedConfig, @@ -415,4 +415,10 @@ def _prepare_presharded_weights( return hf_folder, hf_weights_files, use_safetensors -EntryClass = Grok1ModelForCausalLM +class Grok1ModelForCausalLM(Grok1ForCausalLM): + """An alias for backward-compatbility.""" + + pass + + +EntryClass = [Grok1ForCausalLM, Grok1ModelForCausalLM] diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py index 9de8d33c5c1..fe75916a43b 100644 --- a/python/sglang/srt/models/llama2.py +++ b/python/sglang/srt/models/llama2.py @@ -357,6 +357,9 @@ def load_weights_per_param(name, loaded_weight): # Models trained using ColossalAI may include these tensors in # the checkpoint. Skip them. return + if name.startswith("model.vision_tower") and name not in params_dict: + return + for param_name, weight_name, shard_id in stacked_params_mapping: if weight_name not in name: continue @@ -364,8 +367,6 @@ def load_weights_per_param(name, loaded_weight): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue - if name.startswith("model.vision_tower") and name not in params_dict: - continue param = params_dict[name] weight_loader = param.weight_loader weight_loader(param, loaded_weight, shard_id) @@ -374,8 +375,6 @@ def load_weights_per_param(name, loaded_weight): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: return - if name.startswith("model.vision_tower") and name not in params_dict: - return param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) diff --git a/python/sglang/srt/models/llama_classification.py b/python/sglang/srt/models/llama_classification.py index 02224971d6a..c5effbfc9c6 100644 --- a/python/sglang/srt/models/llama_classification.py +++ b/python/sglang/srt/models/llama_classification.py @@ -103,8 +103,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue - if name.startswith("model.vision_tower") and name not in params_dict: - continue param = params_dict[name] weight_loader = param.weight_loader weight_loader(param, loaded_weight, shard_id) @@ -113,8 +111,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue - if name.startswith("model.vision_tower") and name not in params_dict: - continue param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) diff --git a/python/sglang/srt/models/llama_embedding.py b/python/sglang/srt/models/llama_embedding.py index dfff53cbcda..e4e9174f144 100644 --- a/python/sglang/srt/models/llama_embedding.py +++ b/python/sglang/srt/models/llama_embedding.py @@ -57,6 +57,9 @@ def load_weights_per_param(name, loaded_weight): # Models trained using ColossalAI may include these tensors in # the checkpoint. Skip them. return + if name.startswith("model.vision_tower") and name not in params_dict: + return + for param_name, weight_name, shard_id in stacked_params_mapping: if weight_name not in name: continue @@ -64,8 +67,6 @@ def load_weights_per_param(name, loaded_weight): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue - if name.startswith("model.vision_tower") and name not in params_dict: - continue param = params_dict[name] weight_loader = param.weight_loader weight_loader(param, loaded_weight, shard_id) @@ -74,8 +75,6 @@ def load_weights_per_param(name, loaded_weight): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: return - if name.startswith("model.vision_tower") and name not in params_dict: - return param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py index 76a0630fc2e..bc522bec9c7 100644 --- a/python/sglang/srt/models/llava.py +++ b/python/sglang/srt/models/llava.py @@ -28,7 +28,6 @@ LlavaConfig, MistralConfig, Qwen2Config, - SiglipVisionConfig, SiglipVisionModel, ) from transformers.models.llava.modeling_llava import LlavaMultiModalProjector @@ -66,13 +65,18 @@ def __init__( torch.empty(config.text_config.hidden_size, dtype=torch.float16) ) - def pad_input_ids(self, input_ids, pad_value, pt_shape=None, image_size=None): - + def pad_input_ids( + self, + input_ids: List[int], + pad_value: List[int], + pixel_values: List, + image_sizes: List[List[int]], + ): # hardcode for spatial_unpad + anyres - image_aspect_ratio = "anyres" if len(image_size) == 1 else "pad" + image_aspect_ratio = "anyres" if len(image_sizes) == 1 else "pad" offset_list = [] - for image_s in image_size: - if len(image_size) > 16: + for image_s in image_sizes: + if len(image_sizes) > 16: # 2x2 pooling with stride 2 new_image_feature_len = ( math.ceil(self.image_size / self.patch_size / 2) ** 2 @@ -153,17 +157,15 @@ def forward( if input_metadata.forward_mode == ForwardMode.EXTEND: bs = input_metadata.batch_size - # Embed text input + # Embed text inputs input_embeds = self.language_model.model.embed_tokens(input_ids) - # Embed vision input - need_vision = ( - (positions[input_metadata.extend_start_loc] < self.image_feature_len) - .cpu() - .numpy() + + # Whether the requests need vision inputs + max_image_offset = np.array( + [max(image_offsets[i]) if image_offsets[i] else -1 for i in range(bs)] ) - # FIXME: We need to substract the length of the system prompt - has_pixel = np.array([pixel_values[i] is not None for i in range(bs)]) - need_vision = need_vision & has_pixel + start_positions = positions[input_metadata.extend_start_loc].cpu().numpy() + need_vision = start_positions <= max_image_offset if need_vision.any(): pixel_values = [pixel_values[i] for i in range(bs) if need_vision[i]] @@ -332,31 +334,35 @@ def forward( new_image_features.append(image_feature) image_features = new_image_features + # Fill in the placeholder for the image extend_start_loc_cpu = input_metadata.extend_start_loc.cpu().numpy() + prefix_lens_cpu = input_metadata.extend_prefix_lens.cpu().numpy() pt = 0 for i in range(bs): if not need_vision[i]: continue start_idx = extend_start_loc_cpu[i] - pad_dim = image_features[pt].shape[-1] # 576, 4096 - dim = input_embeds.shape[1] - assert ( - pad_dim == dim - ), "invalid pad_dim={}, input_embed_dim={}!".format(pad_dim, dim) - # Fill in the placeholder for the image - try: - for j, image_off in enumerate(image_offsets[i]): - # print("actual image_features length: ", image_features[pt][j].shape[0]) - pad_len = image_features[pt][j].shape[0] - input_embeds[ - start_idx + image_off : start_idx + image_off + pad_len - ] = image_features[pt][j] - except RuntimeError as e: - print(f"RuntimeError in llava image encoding: {e}") - print(image_features[pt].shape) - print(input_embeds.shape) - print(start_idx, image_offsets[i]) + prefix_len = prefix_lens_cpu[i] + + # Multiple images + for j, image_offset in enumerate(image_offsets[i]): + if image_offset < prefix_len: + continue + + tmp_image_feature = image_features[pt][j] + pad_len = tmp_image_feature.shape[0] + + left_idx = start_idx + (image_offset - prefix_len) + right_idx = start_idx + (image_offset - prefix_len) + pad_len + try: + input_embeds[left_idx:right_idx] = tmp_image_feature + except RuntimeError as e: + print(f"RuntimeError in image encoding: {e}") + print(f"{input_embeds.shape=}, {tmp_image_feature.shape=}") + print( + f"{start_idx=}, {image_offset=}, {prefix_len=}, {pad_len=}" + ) pt += 1 return self.language_model( @@ -366,8 +372,9 @@ def forward( return self.language_model(input_ids, positions, input_metadata) def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): - # load clip vision model by cfg['mm_vision_tower']: - # huggingface_name or path_of_clip_relative_to_llava_model_dir + # Load clip vision model by cfg['mm_vision_tower']: + # huggingface_name or path_of_clip_relative_to_llava_model_dir + # We put the initialization here instead of __init__ to allow it being reused by other subclasses. vision_path = self.config.mm_vision_tower if "clip" in vision_path: self.vision_tower = CLIPVisionModel.from_pretrained( @@ -422,8 +429,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # load language model self.language_model.load_weights(weights) - monkey_path_clip_vision_embed_forward() - @property def num_patches_per_side(self): return self.image_size // self.patch_size @@ -495,36 +500,4 @@ def __init__( ) -first_call = True - - -def clip_vision_embed_forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: - batch_size = pixel_values.shape[0] - - # Move this conv layer to CPU to avoid a bug in torch >= 2.1 on A10G. - global first_call - if first_call: - self.patch_embedding.cpu().float() - first_call = False - pixel_values = pixel_values.to(dtype=torch.float32, device="cpu") - patch_embeds = self.patch_embedding(pixel_values).cuda().half() - - patch_embeds = patch_embeds.flatten(2).transpose(1, 2) - - class_embeds = self.class_embedding.expand(batch_size, 1, -1) - embeddings = torch.cat([class_embeds, patch_embeds], dim=1) - embeddings = embeddings + self.position_embedding(self.position_ids) - return embeddings - - -def monkey_path_clip_vision_embed_forward(): - import transformers - - setattr( - transformers.models.clip.modeling_clip.CLIPVisionEmbeddings, - "forward", - clip_vision_embed_forward, - ) - - EntryClass = [LlavaLlamaForCausalLM, LlavaQwenForCausalLM, LlavaMistralForCausalLM] diff --git a/python/sglang/srt/models/llavavid.py b/python/sglang/srt/models/llavavid.py index 8b81251d692..44e400ff6a0 100644 --- a/python/sglang/srt/models/llavavid.py +++ b/python/sglang/srt/models/llavavid.py @@ -26,11 +26,6 @@ from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from sglang.srt.mm_utils import ( - get_anyres_image_grid_shape, - unpad_image, - unpad_image_shape, -) from sglang.srt.model_executor.forward_batch_info import ForwardMode, InputMetadata from sglang.srt.models.llama2 import LlamaForCausalLM @@ -59,23 +54,14 @@ def __init__( torch.empty(config.text_config.hidden_size, dtype=torch.float16) ) - def pad_input_ids(self, input_ids, pad_value, pt_shape=None, image_size=None): + def pad_input_ids( + self, + input_ids: List[int], + pad_value: List[int], + pixel_values: List, + image_sizes: List[List[int]], + ): new_image_feature_len = self.image_feature_len - # now only support spatial_unpad + anyres - # if self.mm_patch_merge_type.startswith("spatial"): - # height = width = self.num_patches_per_side - # if pt_shape[0] > 1: - # if self.image_aspect_ratio == "anyres": - # num_patch_width, num_patch_height = get_anyres_image_grid_shape( - # image_size, - # self.image_grid_pinpoints, - # self.vision_tower.config.image_size, - # ) - # if "unpad" in self.mm_patch_merge_type: - # h = num_patch_height * height - # w = num_patch_width * width - # new_h, new_w = unpad_image_shape(h, w, image_size) - # new_image_feature_len += new_h * (new_w + 1) pad_ids = pad_value * ( (new_image_feature_len + len(pad_value)) // len(pad_value) @@ -87,7 +73,7 @@ def pad_input_ids(self, input_ids, pad_value, pt_shape=None, image_size=None): + pad_ids[:new_image_feature_len] + input_ids[offset + 1 :] ) - return new_input_ids, offset + return new_input_ids, [offset] def encode_images(self, pixel_values: torch.Tensor) -> torch.Tensor: image_outputs = self.vision_tower(pixel_values, output_hidden_states=True) @@ -133,22 +119,18 @@ def forward( if input_metadata.forward_mode == ForwardMode.EXTEND: bs = input_metadata.batch_size - # Embed text input + # Embed text inputs input_embeds = self.language_model.model.embed_tokens(input_ids) - # Embed vision input - need_vision = ( - (positions[input_metadata.extend_start_loc] < self.image_feature_len) - .cpu() - .numpy() + # Whether the requests need vision inputs + max_image_offset = np.array( + [max(image_offsets[i]) if image_offsets[i] else -1 for i in range(bs)] ) - # FIXME: We need to substract the length of the system prompt - has_pixel = np.array([pixel_values[i] is not None for i in range(bs)]) - need_vision = need_vision & has_pixel + start_positions = positions[input_metadata.extend_start_loc].cpu().numpy() + need_vision = start_positions <= max_image_offset if need_vision.any(): pixel_values = [pixel_values[i] for i in range(bs) if need_vision[i]] - image_sizes = [image_sizes[i] for i in range(bs) if need_vision[i]] ########## Encode Image ######## @@ -183,31 +165,36 @@ def forward( new_image_features.append(image_feature.flatten(0, 1)) image_features = new_image_features + # Fill in the placeholder for the image extend_start_loc_cpu = input_metadata.extend_start_loc.cpu().numpy() + prefix_lens_cpu = input_metadata.extend_prefix_lens.cpu().numpy() pt = 0 for i in range(bs): if not need_vision[i]: continue start_idx = extend_start_loc_cpu[i] - pad_len, pad_dim = image_features[pt].shape # 576, 4096 - dim = input_embeds.shape[1] - assert ( - pad_dim == dim - ), "invalid pad_dim={}, input_embed_dim={}!".format(pad_dim, dim) - # Fill in the placeholder for the image - try: - input_embeds[ - start_idx - + image_offsets[i] : start_idx - + image_offsets[i] - + pad_len - ] = image_features[pt] - except RuntimeError as e: - print(f"RuntimeError in llava image encoding: {e}") - print(input_embeds.shape) - print(start_idx, image_offsets[i]) - pt += 1 + prefix_len = prefix_lens_cpu[i] + + # Multiple images + for image_offset in image_offsets[i]: + if image_offset < prefix_len: + continue + + tmp_image_feature = image_features[pt] + pad_len = tmp_image_feature.shape[0] + + left_idx = start_idx + (image_offset - prefix_len) + right_idx = start_idx + (image_offset - prefix_len) + pad_len + try: + input_embeds[left_idx:right_idx] = tmp_image_feature + except RuntimeError as e: + print(f"RuntimeError in image encoding: {e}") + print(f"{input_embeds.shape=}, {tmp_image_feature.shape=}") + print( + f"{start_idx=}, {image_offset=}, {prefix_len=}, {pad_len=}" + ) + pt += 1 return self.language_model( input_ids, positions, input_metadata, input_embeds=input_embeds @@ -216,8 +203,9 @@ def forward( return self.language_model(input_ids, positions, input_metadata) def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): - # load clip vision model by cfg['mm_vision_tower']: - # huggingface_name or path_of_clip_relative_to_llava_model_dir + # Load clip vision model by cfg['mm_vision_tower']: + # huggingface_name or path_of_clip_relative_to_llava_model_dir + # We put the initialization here instead of __init__ to allow it being reused by other subclasses. vision_path = self.config.mm_vision_tower self.vision_tower = CLIPVisionModel.from_pretrained( vision_path, torch_dtype=torch.float16 @@ -271,43 +259,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # load language model self.language_model.load_weights(weights) - monkey_path_clip_vision_embed_forward() - @property def num_patches_per_side(self): return self.image_size // self.patch_size -first_call = True - - -def clip_vision_embed_forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: - batch_size = pixel_values.shape[0] - - # Move this conv layer to CPU to avoid a bug in torch >= 2.1 on A10G. - global first_call - if first_call: - self.patch_embedding.cpu().float() - first_call = False - pixel_values = pixel_values.to(dtype=torch.float32, device="cpu") - patch_embeds = self.patch_embedding(pixel_values).cuda().half() - - patch_embeds = patch_embeds.flatten(2).transpose(1, 2) - - class_embeds = self.class_embedding.expand(batch_size, 1, -1) - embeddings = torch.cat([class_embeds, patch_embeds], dim=1) - embeddings = embeddings + self.position_embedding(self.position_ids) - return embeddings - - -def monkey_path_clip_vision_embed_forward(): - import transformers - - setattr( - transformers.models.clip.modeling_clip.CLIPVisionEmbeddings, - "forward", - clip_vision_embed_forward, - ) - - EntryClass = LlavaVidForCausalLM diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py index fcf083e1b5d..a0c54f69105 100644 --- a/python/sglang/srt/models/qwen2.py +++ b/python/sglang/srt/models/qwen2.py @@ -312,6 +312,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # Models trained using ColossalAI may include these tensors in # the checkpoint. Skip them. continue + if name.startswith("model.vision_tower") and name not in params_dict: + continue + for param_name, weight_name, shard_id in stacked_params_mapping: if weight_name not in name: continue @@ -319,8 +322,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue - if name.startswith("model.vision_tower") and name not in params_dict: - continue param = params_dict[name] weight_loader = param.weight_loader weight_loader(param, loaded_weight, shard_id) @@ -329,8 +330,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue - if name.startswith("model.vision_tower") and name not in params_dict: - continue param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) diff --git a/python/sglang/srt/models/yivl.py b/python/sglang/srt/models/yivl.py index 11d4cda1c00..0f86206d821 100644 --- a/python/sglang/srt/models/yivl.py +++ b/python/sglang/srt/models/yivl.py @@ -24,10 +24,7 @@ from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from sglang.srt.models.llava import ( - LlavaLlamaForCausalLM, - monkey_path_clip_vision_embed_forward, -) +from sglang.srt.models.llava import LlavaLlamaForCausalLM class YiVLForCausalLM(LlavaLlamaForCausalLM): @@ -50,7 +47,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): self.config._name_or_path, torch_dtype=torch.float16, subfolder=self.vision_tower_subfolder, - ).cuda() + ).to("cuda") self.vision_tower.eval() @@ -94,8 +91,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # load language model self.language_model.load_weights(weights) - monkey_path_clip_vision_embed_forward() - class YiVLMultiModalProjector(nn.Module): def __init__(self, config: LlavaConfig): diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index f3d1ab0f94d..9c36216eded 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -335,12 +335,12 @@ def launch_server( pipe_detoken_reader, pipe_detoken_writer = mp.Pipe(duplex=False) if server_args.dp_size == 1: - start_process = start_controller_process_single + start_controller_process = start_controller_process_single else: - start_process = start_controller_process_multi + start_controller_process = start_controller_process_multi proc_controller = mp.Process( - target=start_process, + target=start_controller_process, args=(server_args, port_args, pipe_controller_writer, model_overide_args), ) proc_controller.start() @@ -421,7 +421,7 @@ def _set_envs_and_config(server_args: ServerArgs): if not server_args.disable_flashinfer: assert_pkg_version( "flashinfer", - "0.1.6", + "0.1.5", "Please uninstall the old version and " "reinstall the latest version by following the instructions " "at https://docs.flashinfer.ai/installation.html.", diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index a6e710009f2..b7bb6573068 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -26,7 +26,7 @@ import time from importlib.metadata import PackageNotFoundError, version from io import BytesIO -from typing import List, Optional +from typing import List, Optional, Union import numpy as np import psutil @@ -193,35 +193,16 @@ def allocate_init_ports( return ret_ports[0], ret_ports[1:num_ports_needed] -def get_int_token_logit_bias(tokenizer, vocab_size): - """Get the logit bias for integer-only tokens.""" - # a bug when model's vocab size > tokenizer.vocab_size - if tokenizer == None: - return [-1e5] * vocab_size - vocab_size = tokenizer.vocab_size - logit_bias = np.zeros(vocab_size, dtype=np.float32) - for t_id in range(vocab_size): - ss = tokenizer.decode([t_id]).strip() - if not (ss.isdigit() or len(ss) == 0 or t_id == tokenizer.eos_token_id): - logit_bias[t_id] = -1e5 - - return logit_bias - - -def is_multimodal_model(model): - from sglang.srt.model_config import ModelConfig - - if isinstance(model, str): - model = model.lower() - return "llava" in model or "yi-vl" in model or "llava-next" in model - - if isinstance(model, ModelConfig): - model_path = model.path.lower() - return ( - "llava" in model_path or "yi-vl" in model_path or "llava-next" in model_path - ) - - raise ValueError("unrecognized type") +def is_multimodal_model(model_architectures): + if ( + "LlavaLlamaForCausalLM" in model_architectures + or "LlavaQwenForCausalLM" in model_architectures + or "LlavaMistralForCausalLM" in model_architectures + or "LlavaVidForCausalLM" in model_architectures + ): + return True + else: + return False def is_generation_model(model_architectures, is_embedding: bool = False): @@ -317,12 +298,14 @@ def decode_video_base64(video_base64): ) # Return an empty array and size tuple if no frames were found -def load_image(image_file): +def load_image(image_file: Union[str, bytes]): from PIL import Image image = image_size = None - if image_file.startswith("http://") or image_file.startswith("https://"): + if isinstance(image_file, bytes): + image = Image.open(BytesIO(image_file)) + elif image_file.startswith("http://") or image_file.startswith("https://"): timeout = int(os.getenv("REQUEST_TIMEOUT", "3")) response = requests.get(image_file, timeout=timeout) image = Image.open(BytesIO(response.content)) @@ -334,8 +317,10 @@ def load_image(image_file): elif image_file.startswith("video:"): image_file = image_file.replace("video:", "") image, image_size = decode_video_base64(image_file) - else: + elif isinstance(image_file, str): image = Image.open(BytesIO(base64.b64decode(image_file))) + else: + raise ValueError(f"Invalid image: {image}") return image, image_size diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index 0003e4776ab..cf29c0e815a 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -32,8 +32,6 @@ def setUpClass(cls): other_args=[ "--chat-template", "chatml-llava", - "--chunked-prefill-size", - "16384", # "--log-requests", ], ) From c411f32e1c9b551011a52566b5afae1320a99fde Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Thu, 29 Aug 2024 00:07:02 +1000 Subject: [PATCH 43/88] feat: replace GeluAndMul (#1234) --- python/sglang/srt/layers/activation.py | 14 ++++++++++---- python/sglang/srt/models/gemma.py | 4 ++-- test/srt/models/test_generation_models.py | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/python/sglang/srt/layers/activation.py b/python/sglang/srt/layers/activation.py index 5df387cb2b9..9047197af2f 100644 --- a/python/sglang/srt/layers/activation.py +++ b/python/sglang/srt/layers/activation.py @@ -18,7 +18,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -from flashinfer.activation import gelu_tanh_and_mul, silu_and_mul +from flashinfer.activation import gelu_and_mul, gelu_tanh_and_mul, silu_and_mul from vllm.distributed import ( divide, get_tensor_model_parallel_rank, @@ -43,18 +43,24 @@ def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: class GeluAndMul(CustomOp): - def __init__(self, **kwargs): + def __init__(self, approximate="tanh"): super().__init__() + self.approximate = approximate def forward_native(self, x: torch.Tensor) -> torch.Tensor: d = x.shape[-1] // 2 - return F.gelu(x[..., :d], approximate="tanh") * x[..., d:] + return F.gelu(x[..., :d], approximate=self.approximate) * x[..., d:] def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: d = x.shape[-1] // 2 output_shape = x.shape[:-1] + (d,) out = torch.empty(output_shape, dtype=x.dtype, device=x.device) - gelu_tanh_and_mul(x, out) + if self.approximate == "tanh": + gelu_tanh_and_mul(x, out) + elif self.approximate == "none": + gelu_and_mul(x, out) + else: + raise RuntimeError("GeluAndMul only support tanh or none") return out diff --git a/python/sglang/srt/models/gemma.py b/python/sglang/srt/models/gemma.py index 990937f5180..ae3b1b1948c 100644 --- a/python/sglang/srt/models/gemma.py +++ b/python/sglang/srt/models/gemma.py @@ -23,7 +23,6 @@ from transformers import PretrainedConfig from vllm.config import CacheConfig, LoRAConfig from vllm.distributed import get_tensor_model_parallel_world_size -from vllm.model_executor.layers.activation import GeluAndMul from vllm.model_executor.layers.linear import ( MergedColumnParallelLinear, QKVParallelLinear, @@ -34,6 +33,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from sglang.srt.layers.activation import GeluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention @@ -60,7 +60,7 @@ def __init__( bias=False, quant_config=quant_config, ) - self.act_fn = GeluAndMul() + self.act_fn = GeluAndMul("none") def forward(self, x): gate_up, _ = self.gate_up_proj(x) diff --git a/test/srt/models/test_generation_models.py b/test/srt/models/test_generation_models.py index e38584741e0..08288c510c9 100644 --- a/test/srt/models/test_generation_models.py +++ b/test/srt/models/test_generation_models.py @@ -96,7 +96,7 @@ def assert_close_prefill_logits_and_output_strs( if hf_logprobs.shape[0] <= 100: assert torch.all( abs(hf_logprobs - srt_logprobs) < prefill_tolerance - ), "prefill logprobs are not all close" + ), f"prefill logprobs are not all close with model_path={model_path} prompts={prompts} prefill_tolerance={prefill_tolerance}" print(f"hf_outputs.output_strs={hf_outputs.output_strs}") print(f"srt_outputs.output_strs={srt_outputs.output_strs}") From 0a97d7962d31728a3e4d5936b105ab27a83cd1a9 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 28 Aug 2024 08:38:50 -0700 Subject: [PATCH 44/88] [Fix] Fix OOM in llava base class (#1249) --- .../http_llava_onevision_test.py | 2 +- python/sglang/srt/models/llava.py | 51 ++++++++++--------- python/sglang/srt/server.py | 2 +- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/examples/runtime/llava_onevision/http_llava_onevision_test.py b/examples/runtime/llava_onevision/http_llava_onevision_test.py index 41d60b12afc..0c93d2ce2b2 100644 --- a/examples/runtime/llava_onevision/http_llava_onevision_test.py +++ b/examples/runtime/llava_onevision/http_llava_onevision_test.py @@ -1,7 +1,7 @@ """ Usage: -python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384 +python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --tp-size=8 --chat-template=chatml-llava python3 http_llava_onevision_test.py """ diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py index bc522bec9c7..7dcf5348b05 100644 --- a/python/sglang/srt/models/llava.py +++ b/python/sglang/srt/models/llava.py @@ -46,25 +46,7 @@ from sglang.srt.models.qwen2 import Qwen2ForCausalLM -class LlavaLlamaForCausalLM(nn.Module): - def __init__( - self, - config: LlavaConfig, - quant_config: Optional[QuantizationConfig] = None, - cache_config: Optional[CacheConfig] = None, - ) -> None: - super().__init__() - self.config = config - self.vision_tower = None - self.config.vision_config.hidden_size = config.mm_hidden_size - self.config.text_config.hidden_size = config.hidden_size - self.multi_modal_projector = LlavaMultiModalProjector(config) - self.language_model = LlamaForCausalLM(config, quant_config=quant_config) - if "unpad" in getattr(config, "mm_patch_merge_type", ""): - self.language_model.model.image_newline = nn.Parameter( - torch.empty(config.text_config.hidden_size, dtype=torch.float16) - ) - +class LlavaBaseForCausalLM(nn.Module): def pad_input_ids( self, input_ids: List[int], @@ -434,14 +416,36 @@ def num_patches_per_side(self): return self.image_size // self.patch_size -class LlavaQwenForCausalLM(LlavaLlamaForCausalLM): +class LlavaLlamaForCausalLM(LlavaBaseForCausalLM): + def __init__( + self, + config: LlavaConfig, + quant_config: Optional[QuantizationConfig] = None, + cache_config: Optional[CacheConfig] = None, + ) -> None: + super().__init__() + + self.config = config + self.vision_tower = None + self.config.vision_config.hidden_size = config.mm_hidden_size + self.config.text_config.hidden_size = config.hidden_size + self.multi_modal_projector = LlavaMultiModalProjector(config) + self.language_model = LlamaForCausalLM(config, quant_config=quant_config) + if "unpad" in getattr(config, "mm_patch_merge_type", ""): + self.language_model.model.image_newline = nn.Parameter( + torch.empty(config.text_config.hidden_size, dtype=torch.float16) + ) + + +class LlavaQwenForCausalLM(LlavaBaseForCausalLM): def __init__( self, config: LlavaConfig, quant_config: Optional[QuantizationConfig] = None, cache_config: Optional[CacheConfig] = None, ) -> None: - super().__init__(config, quant_config=quant_config, cache_config=cache_config) + super().__init__() + self.config = config self.vision_tower = None if getattr(self.config, "vision_config", None) is None: @@ -467,14 +471,15 @@ def __init__( ) -class LlavaMistralForCausalLM(LlavaLlamaForCausalLM): +class LlavaMistralForCausalLM(LlavaBaseForCausalLM): def __init__( self, config: LlavaConfig, quant_config: Optional[QuantizationConfig] = None, cache_config: Optional[CacheConfig] = None, ) -> None: - super().__init__(config, quant_config=quant_config, cache_config=cache_config) + super().__init__() + self.config = config self.vision_tower = None if getattr(self.config, "vision_config", None) is None: diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 9c36216eded..5ba2a45e70c 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -421,7 +421,7 @@ def _set_envs_and_config(server_args: ServerArgs): if not server_args.disable_flashinfer: assert_pkg_version( "flashinfer", - "0.1.5", + "0.1.6", "Please uninstall the old version and " "reinstall the latest version by following the instructions " "at https://docs.flashinfer.ai/installation.html.", From 492143bf32b25848300dcc18bd51fef6c25d02d7 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Thu, 29 Aug 2024 04:25:46 +1000 Subject: [PATCH 45/88] fix: resolve qwen2 moe weight loader (#1252) --- python/sglang/srt/models/qwen2_moe.py | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py index 9bdbd750660..d5c79a40f0e 100644 --- a/python/sglang/srt/models/qwen2_moe.py +++ b/python/sglang/srt/models/qwen2_moe.py @@ -401,24 +401,12 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("gate_up_proj", "up_proj", 1), ] - expert_params_mapping = [ - # These are the weights for the experts - # (param_name, weight_name, expert_id, shard_id) - ( - ( - "experts.w13_weight" - if weight_name in ["gate_proj", "up_proj"] - else "experts.w2_weight" - ), - f"experts.{expert_id}.{weight_name}.weight", - expert_id, - shard_id, - ) - for expert_id in range(self.config.num_experts) - for shard_id, weight_name in enumerate( - ["gate_proj", "down_proj", "up_proj"] - ) - ] + expert_params_mapping = FusedMoE.make_expert_params_mapping( + ckpt_gate_proj_name="gate_proj", + ckpt_down_proj_name="down_proj", + ckpt_up_proj_name="up_proj", + num_experts=self.config.num_experts, + ) params_dict = dict(self.named_parameters()) for name, loaded_weight in weights: @@ -458,7 +446,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): weight_loader( param, loaded_weight, - weight_name, + name, shard_id=shard_id, expert_id=expert_id, ) From 13ac95b8946ff0bc62527567931bdf647cc43c5e Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Thu, 29 Aug 2024 04:46:33 +1000 Subject: [PATCH 46/88] chore: bump v0.2.14.post2 (#1250) --- README.md | 2 +- python/pyproject.toml | 2 +- python/sglang/version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9d795ce4382..8e3e47c100b 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ ### Method 2: From source ``` # Use the last release branch -git clone -b v0.2.14.post1 https://github.com/sgl-project/sglang.git +git clone -b v0.2.14.post2 https://github.com/sgl-project/sglang.git cd sglang pip install --upgrade pip diff --git a/python/pyproject.toml b/python/pyproject.toml index 7b2741fd216..87c99bffaea 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sglang" -version = "0.2.14.post1" +version = "0.2.14.post2" description = "SGLang is yet another fast serving framework for large language models and vision language models." readme = "README.md" requires-python = ">=3.8" diff --git a/python/sglang/version.py b/python/sglang/version.py index 839b265519b..ad954de503b 100644 --- a/python/sglang/version.py +++ b/python/sglang/version.py @@ -1 +1 @@ -__version__ = "0.2.14.post1" +__version__ = "0.2.14.post2" From 6c34d6339c040628e895d167cf22f2ab7104f8b3 Mon Sep 17 00:00:00 2001 From: Enrique Shockwave <33002121+qeternity@users.noreply.github.com> Date: Thu, 29 Aug 2024 02:57:10 +0100 Subject: [PATCH 47/88] make json_schema usable from gen (#1254) --- python/sglang/api.py | 2 ++ python/sglang/lang/interpreter.py | 1 + python/sglang/lang/ir.py | 5 +++++ 3 files changed, 8 insertions(+) diff --git a/python/sglang/api.py b/python/sglang/api.py index 3a2f747bec2..9405606b712 100644 --- a/python/sglang/api.py +++ b/python/sglang/api.py @@ -78,6 +78,7 @@ def gen( choices: Optional[List[str]] = None, choices_method: Optional[ChoicesSamplingMethod] = None, regex: Optional[str] = None, + json_schema: Optional[str] = None, ): """Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md""" @@ -114,6 +115,7 @@ def gen( return_text_in_logprobs, dtype, regex, + json_schema, ) diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py index 306d280c7f0..91f48456aac 100644 --- a/python/sglang/lang/interpreter.py +++ b/python/sglang/lang/interpreter.py @@ -673,6 +673,7 @@ def _resolve_sampling_params(self, sampling_params): "return_text_in_logprobs", "dtype", "regex", + "json_schema", ]: value = getattr(sampling_params, item, None) if value is not None: diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py index 199a7ac7a4e..99a3e8e68b6 100644 --- a/python/sglang/lang/ir.py +++ b/python/sglang/lang/ir.py @@ -30,6 +30,7 @@ class SglSamplingParams: logprob_start_len: Optional[int] = (None,) top_logprobs_num: Optional[int] = (None,) return_text_in_logprobs: Optional[bool] = (None,) + json_schema: Optional[str] = None # for constrained generation, not included in to_xxx_kwargs dtype: Optional[str] = None @@ -51,6 +52,7 @@ def clone(self): self.logprob_start_len, self.top_logprobs_num, self.return_text_in_logprobs, + self.json_schema, ) def to_openai_kwargs(self): @@ -121,6 +123,7 @@ def to_srt_kwargs(self): "presence_penalty": self.presence_penalty, "ignore_eos": self.ignore_eos, "regex": self.regex, + "json_schema": self.json_schema, } @@ -425,6 +428,7 @@ def __init__( return_text_in_logprobs: Optional[bool] = None, dtype: Optional[type] = None, regex: Optional[str] = None, + json_schema: Optional[str] = None, ): """Call the model to generate. See the meaning of the arguments in docs/en/sampling_params.md""" super().__init__() @@ -446,6 +450,7 @@ def __init__( return_text_in_logprobs=return_text_in_logprobs, dtype=dtype, regex=regex, + json_schema=json_schema, ) def __repr__(self): From 8153168c96c76cdc77eabcbe03b167f9f3b4385f Mon Sep 17 00:00:00 2001 From: Zhiqiang Xie Date: Wed, 28 Aug 2024 18:57:54 -0700 Subject: [PATCH 48/88] fix data racing due to mutable reference using deepcopy (#1255) --- python/sglang/srt/managers/io_struct.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index 3f80c64cf9d..5b91ff62e9d 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -18,8 +18,9 @@ processes (TokenizerManager, DetokenizerManager, Controller). """ +import copy import uuid -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Dict, List, Optional, Union from sglang.srt.managers.schedule_batch import BaseFinishReason @@ -249,6 +250,10 @@ class BatchTokenIDOut: meta_info: List[Dict] finished_reason: List[BaseFinishReason] + def __post_init__(self): + # deepcopy meta_info to avoid modification in place + self.meta_info = copy.deepcopy(self.meta_info) + @dataclass class BatchStrOut: From 381dd57bd69f027a3298d107d8eb851c3c29d8e4 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Wed, 28 Aug 2024 18:58:52 -0700 Subject: [PATCH 49/88] Sampler cudagraph (#1253) --- python/sglang/bench_latency.py | 14 ++-- python/sglang/srt/layers/logits_processor.py | 8 +- python/sglang/srt/layers/sampler.py | 83 +++++++++++++++---- python/sglang/srt/managers/schedule_batch.py | 28 +++++-- python/sglang/srt/managers/tp_worker.py | 52 +++++++----- .../srt/model_executor/cuda_graph_runner.py | 33 ++++++-- .../srt/model_executor/forward_batch_info.py | 7 ++ .../sglang/srt/model_executor/model_runner.py | 14 +++- python/sglang/srt/models/chatglm.py | 16 +--- python/sglang/srt/models/commandr.py | 6 +- python/sglang/srt/models/dbrx.py | 6 +- python/sglang/srt/models/deepseek.py | 6 +- python/sglang/srt/models/deepseek_v2.py | 6 +- python/sglang/srt/models/gemma.py | 6 +- python/sglang/srt/models/gemma2.py | 6 +- python/sglang/srt/models/gpt_bigcode.py | 6 +- python/sglang/srt/models/grok.py | 6 +- python/sglang/srt/models/internlm2.py | 6 +- python/sglang/srt/models/llama2.py | 10 ++- .../sglang/srt/models/llama_classification.py | 4 +- python/sglang/srt/models/minicpm.py | 6 +- python/sglang/srt/models/mixtral.py | 6 +- python/sglang/srt/models/mixtral_quant.py | 6 +- python/sglang/srt/models/qwen.py | 7 +- python/sglang/srt/models/qwen2.py | 8 +- python/sglang/srt/models/qwen2_moe.py | 19 ++--- python/sglang/srt/models/stablelm.py | 6 +- .../srt/sampling/sampling_batch_info.py | 75 ++++++++++++++++- python/sglang/test/runners.py | 2 +- 29 files changed, 342 insertions(+), 116 deletions(-) diff --git a/python/sglang/bench_latency.py b/python/sglang/bench_latency.py index dea910f5772..3a487408573 100644 --- a/python/sglang/bench_latency.py +++ b/python/sglang/bench_latency.py @@ -200,16 +200,16 @@ def extend(reqs, model_runner): tree_cache=None, ) batch.prepare_for_extend(model_runner.model_config.vocab_size) - output = model_runner.forward(batch, ForwardMode.EXTEND) - next_token_ids = batch.sample(output.next_token_logits) - return next_token_ids, output.next_token_logits, batch + sample_output, logits_output = model_runner.forward(batch, ForwardMode.EXTEND) + next_token_ids = sample_output.batch_next_token_ids.tolist() + return next_token_ids, logits_output.next_token_logits, batch def decode(input_token_ids, batch, model_runner): - batch.prepare_for_decode(input_token_ids.cpu().numpy()) - output = model_runner.forward(batch, ForwardMode.DECODE) - next_token_ids = batch.sample(output.next_token_logits) - return next_token_ids, output.next_token_logits + batch.prepare_for_decode(input_token_ids) + sample_output, logits_output = model_runner.forward(batch, ForwardMode.DECODE) + next_token_ids = sample_output.batch_next_token_ids.tolist() + return next_token_ids, logits_output.next_token_logits @torch.inference_mode() diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py index 63f74d8b026..b81f3d2a040 100644 --- a/python/sglang/srt/layers/logits_processor.py +++ b/python/sglang/srt/layers/logits_processor.py @@ -29,7 +29,7 @@ @dataclasses.dataclass -class LogitProcessorOutput: +class LogitsProcessorOutput: # The logits of the next tokens. shape: [#seq, vocab_size] next_token_logits: torch.Tensor # The logprobs of the next tokens. shape: [#seq, vocab_size] @@ -185,7 +185,7 @@ def forward( # Return only last_logits if logprob is not requested if not logits_metadata.return_logprob: - return LogitProcessorOutput( + return LogitsProcessorOutput( next_token_logits=last_logits, next_token_logprobs=None, normalized_prompt_logprobs=None, @@ -209,7 +209,7 @@ def forward( else: output_top_logprobs = None - return LogitProcessorOutput( + return LogitsProcessorOutput( next_token_logits=last_logits, next_token_logprobs=last_logprobs, normalized_prompt_logprobs=None, @@ -278,7 +278,7 @@ def forward( # Remove the last token logprob for the prefill tokens. input_token_logprobs = input_token_logprobs[:-1] - return LogitProcessorOutput( + return LogitsProcessorOutput( next_token_logits=last_logits, next_token_logprobs=last_logprobs, normalized_prompt_logprobs=normalized_prompt_logprobs, diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py index 3006e765c88..6cb7d0a7c11 100644 --- a/python/sglang/srt/layers/sampler.py +++ b/python/sglang/srt/layers/sampler.py @@ -1,4 +1,6 @@ +import dataclasses import logging +from typing import Union import torch from flashinfer.sampling import ( @@ -9,6 +11,8 @@ ) from vllm.model_executor.custom_op import CustomOp +from sglang.srt.layers.logits_processor import LogitsProcessorOutput + # TODO: move this dict to another place from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo @@ -16,30 +20,71 @@ logger = logging.getLogger(__name__) +@dataclasses.dataclass +class SampleOutput: + success: torch.Tensor + probs: torch.Tensor + batch_next_token_ids: torch.Tensor + + class Sampler(CustomOp): def __init__(self): super().__init__() - def forward_cuda(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): + def _apply_penalties(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): + # min-token, presence, frequency + if sampling_info.linear_penalties is not None: + logits += sampling_info.linear_penalties + + # repetition + if sampling_info.scaling_penalties is not None: + logits = torch.where( + logits > 0, + logits / sampling_info.scaling_penalties, + logits * sampling_info.scaling_penalties, + ) + + return logits + + def _get_probs( + self, + logits: torch.Tensor, + sampling_info: SamplingBatchInfo, + is_torch_compile: bool = False, + ): # Post process logits logits = logits.contiguous() logits.div_(sampling_info.temperatures) + if is_torch_compile: + # FIXME: Temporary workaround for unknown bugs in torch.compile + logits.add_(0) + if sampling_info.logit_bias is not None: logits.add_(sampling_info.logit_bias) if sampling_info.vocab_mask is not None: logits = logits.masked_fill(~sampling_info.vocab_mask, float("-inf")) - logits = sampling_info.penalizer_orchestrator.apply(logits) + logits = self._apply_penalties(logits, sampling_info) - probs = torch.softmax(logits, dim=-1) + return torch.softmax(logits, dim=-1) + + def forward_cuda( + self, + logits: Union[torch.Tensor, LogitsProcessorOutput], + sampling_info: SamplingBatchInfo, + ): + if isinstance(logits, LogitsProcessorOutput): + logits = logits.next_token_logits + + probs = self._get_probs(logits, sampling_info) if not global_server_args_dict["disable_flashinfer_sampling"]: max_top_k_round, batch_size = 32, probs.shape[0] uniform_samples = torch.rand( (max_top_k_round, batch_size), device=probs.device ) - if sampling_info.min_ps.any(): + if sampling_info.need_min_p_sampling: probs = top_k_renorm_prob(probs, sampling_info.top_ks) probs = top_p_renorm_prob(probs, sampling_info.top_ps) batch_next_token_ids, success = min_p_sampling_from_probs( @@ -55,18 +100,23 @@ def forward_cuda(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): probs, sampling_info.top_ks, sampling_info.top_ps, sampling_info.min_ps ) - if not torch.all(success): - logging.warning("Sampling failed, fallback to top_k=1 strategy") - probs = probs.masked_fill(torch.isnan(probs), 0.0) - argmax_ids = torch.argmax(probs, dim=-1) - batch_next_token_ids = torch.where( - success, batch_next_token_ids, argmax_ids - ) + return SampleOutput(success, probs, batch_next_token_ids) - return batch_next_token_ids + def forward_native( + self, + logits: Union[torch.Tensor, LogitsProcessorOutput], + sampling_info: SamplingBatchInfo, + ): + if isinstance(logits, LogitsProcessorOutput): + logits = logits.next_token_logits + + probs = self._get_probs(logits, sampling_info, is_torch_compile=True) + + batch_next_token_ids, success = top_k_top_p_min_p_sampling_from_probs_torch( + probs, sampling_info.top_ks, sampling_info.top_ps, sampling_info.min_ps + ) - def forward_native(): - raise NotImplementedError("Native forward is not implemented yet.") + return SampleOutput(success, probs, batch_next_token_ids) def top_k_top_p_min_p_sampling_from_probs_torch( @@ -87,7 +137,10 @@ def top_k_top_p_min_p_sampling_from_probs_torch( probs_sort[probs_sort < min_p_thresholds.view(-1, 1)] = 0.0 probs_sort.div_(probs_sort.max(dim=-1, keepdim=True)[0]) try: - sampled_index = torch.multinomial(probs_sort, num_samples=1) + # FIXME: torch.multiomial does not support num_samples = 1 + sampled_index = torch.multinomial(probs_sort, num_samples=2, replacement=True)[ + :, :1 + ] except RuntimeError as e: logger.warning(f"Sampling error: {e}") batch_next_token_ids = torch.zeros( diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index 5554170a350..f5b9c9eb27d 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ Copyright 2023-2024 SGLang Team Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +19,7 @@ import logging from dataclasses import dataclass -from typing import List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union import torch @@ -29,6 +31,10 @@ from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo +if TYPE_CHECKING: + from sglang.srt.layers.sampler import SampleOutput + + INIT_INCREMENTAL_DETOKENIZATION_OFFSET = 5 # Put some global args for easy access @@ -678,11 +684,17 @@ def merge(self, other: "ScheduleBatch"): self.top_logprobs_nums.extend(other.top_logprobs_nums) self.return_logprob = any(req.return_logprob for req in self.reqs) - def sample(self, logits: torch.Tensor): - from sglang.srt.layers.sampler import Sampler - - sampler = Sampler() - - batch_next_token_ids = sampler(logits, self.sampling_info) + def check_sample_results(self, sample_output: SampleOutput): + if not torch.all(sample_output.success): + probs = sample_output.probs + batch_next_token_ids = sample_output.batch_next_token_ids + logging.warning("Sampling failed, fallback to top_k=1 strategy") + probs = probs.masked_fill(torch.isnan(probs), 0.0) + argmax_ids = torch.argmax(probs, dim=-1) + batch_next_token_ids = torch.where( + sample_output.success, batch_next_token_ids, argmax_ids + ) + sample_output.probs = probs + sample_output.batch_next_token_ids = batch_next_token_ids - return batch_next_token_ids + return sample_output.batch_next_token_ids diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index cd1b580643c..123b1f5d5dc 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -31,7 +31,7 @@ from sglang.srt.constrained.fsm_cache import FSMCache from sglang.srt.constrained.jump_forward import JumpForwardCache from sglang.srt.hf_transformers_utils import get_processor, get_tokenizer -from sglang.srt.layers.logits_processor import LogitProcessorOutput +from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.managers.io_struct import ( AbortReq, BatchEmbeddingOut, @@ -504,21 +504,29 @@ def forward_prefill_batch(self, batch: ScheduleBatch): if self.model_runner.is_generation: # Forward and sample the next tokens if batch.extend_num_tokens != 0: - output = self.model_runner.forward(batch, ForwardMode.EXTEND) - next_token_ids = batch.sample(output.next_token_logits) + sample_output, logits_output = self.model_runner.forward( + batch, ForwardMode.EXTEND + ) + next_token_ids = batch.check_sample_results(sample_output) batch.sampling_info.penalizer_orchestrator.cumulate_output_tokens( next_token_ids ) # Move logprobs to cpu - if output.next_token_logprobs is not None: - output.next_token_logprobs = output.next_token_logprobs[ - torch.arange(len(next_token_ids), device=next_token_ids.device), - next_token_ids, - ].tolist() - output.input_token_logprobs = output.input_token_logprobs.tolist() - output.normalized_prompt_logprobs = ( - output.normalized_prompt_logprobs.tolist() + if logits_output.next_token_logprobs is not None: + logits_output.next_token_logprobs = ( + logits_output.next_token_logprobs[ + torch.arange( + len(next_token_ids), device=next_token_ids.device + ), + next_token_ids, + ].tolist() + ) + logits_output.input_token_logprobs = ( + logits_output.input_token_logprobs.tolist() + ) + logits_output.normalized_prompt_logprobs = ( + logits_output.normalized_prompt_logprobs.tolist() ) next_token_ids = next_token_ids.tolist() @@ -557,12 +565,14 @@ def forward_prefill_batch(self, batch: ScheduleBatch): self.req_to_token_pool.free(req.req_pool_idx) if req.return_logprob: - self.add_logprob_return_values(i, req, pt, next_token_ids, output) + self.add_logprob_return_values( + i, req, pt, next_token_ids, logits_output + ) pt += req.extend_input_len else: assert batch.extend_num_tokens != 0 - output = self.model_runner.forward(batch, ForwardMode.EXTEND) - embeddings = output.embeddings.tolist() + logits_output = self.model_runner.forward(batch, ForwardMode.EXTEND) + embeddings = logits_output.embeddings.tolist() # Check finish conditions for i, req in enumerate(batch.reqs): @@ -590,7 +600,7 @@ def add_logprob_return_values( req: Req, pt: int, next_token_ids: List[int], - output: LogitProcessorOutput, + output: LogitsProcessorOutput, ): if req.normalized_prompt_logprob is None: req.normalized_prompt_logprob = output.normalized_prompt_logprobs[i] @@ -672,15 +682,17 @@ def forward_decode_batch(self, batch: ScheduleBatch): batch.prepare_for_decode() # Forward and sample the next tokens - output = self.model_runner.forward(batch, ForwardMode.DECODE) - next_token_ids = batch.sample(output.next_token_logits) + sample_output, logits_output = self.model_runner.forward( + batch, ForwardMode.DECODE + ) + next_token_ids = batch.check_sample_results(sample_output) batch.sampling_info.penalizer_orchestrator.cumulate_output_tokens( next_token_ids ) # Move logprobs to cpu - if output.next_token_logprobs is not None: - next_token_logprobs = output.next_token_logprobs[ + if logits_output.next_token_logprobs is not None: + next_token_logprobs = logits_output.next_token_logprobs[ torch.arange(len(next_token_ids), device=next_token_ids.device), next_token_ids, ].tolist() @@ -706,7 +718,7 @@ def forward_decode_batch(self, batch: ScheduleBatch): (next_token_logprobs[i], next_token_id) ) if req.top_logprobs_num > 0: - req.output_top_logprobs.append(output.output_top_logprobs[i]) + req.output_top_logprobs.append(logits_output.output_top_logprobs[i]) self.handle_finished_requests(batch) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 796db26623f..40c87af88cf 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -26,16 +26,18 @@ from vllm.model_executor.custom_op import CustomOp from sglang.srt.layers.logits_processor import ( - LogitProcessorOutput, LogitsMetadata, LogitsProcessor, + LogitsProcessorOutput, ) +from sglang.srt.layers.sampler import SampleOutput from sglang.srt.managers.schedule_batch import ScheduleBatch from sglang.srt.model_executor.forward_batch_info import ( ForwardMode, InputMetadata, update_flashinfer_indices, ) +from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo from sglang.srt.utils import monkey_patch_vllm_all_gather @@ -144,6 +146,10 @@ def __init__( self.flashinfer_kv_indices.clone(), ] + # Sampling inputs + vocab_size = model_runner.model_config.vocab_size + self.sampling_info = SamplingBatchInfo.dummy_one(self.max_bs, vocab_size) + self.compile_bs = [1, 2, 4, 8, 16, 24, 32] if use_torch_compile else [] if use_torch_compile: @@ -235,6 +241,7 @@ def capture_one_batch_size(self, bs: int, forward: Callable): def run_once(): input_metadata = InputMetadata( forward_mode=ForwardMode.DECODE, + sampling_info=self.sampling_info[:bs], batch_size=bs, req_pool_indices=req_pool_indices, seq_lens=seq_lens, @@ -299,27 +306,35 @@ def replay(self, batch: ScheduleBatch): self.flashinfer_handlers[bs], ) + # Sampling inputs + self.sampling_info.inplace_assign(raw_bs, batch.sampling_info) + # Replay torch.cuda.synchronize() self.graphs[bs].replay() torch.cuda.synchronize() - output = self.output_buffers[bs] + sample_output, logits_output = self.output_buffers[bs] # Unpad if bs != raw_bs: - output = LogitProcessorOutput( - next_token_logits=output.next_token_logits[:raw_bs], + logits_output = LogitsProcessorOutput( + next_token_logits=logits_output.next_token_logits[:raw_bs], next_token_logprobs=None, normalized_prompt_logprobs=None, input_token_logprobs=None, input_top_logprobs=None, output_top_logprobs=None, ) + sample_output = SampleOutput( + sample_output.success[:raw_bs], + sample_output.probs[:raw_bs], + sample_output.batch_next_token_ids[:raw_bs], + ) # Extract logprobs if batch.return_logprob: - output.next_token_logprobs = torch.nn.functional.log_softmax( - output.next_token_logits, dim=-1 + logits_output.next_token_logprobs = torch.nn.functional.log_softmax( + logits_output.next_token_logits, dim=-1 ) return_top_logprob = any(x > 0 for x in batch.top_logprobs_nums) if return_top_logprob: @@ -327,8 +342,8 @@ def replay(self, batch: ScheduleBatch): forward_mode=ForwardMode.DECODE, top_logprobs_nums=batch.top_logprobs_nums, ) - output.output_top_logprobs = LogitsProcessor.get_top_logprobs( - output.next_token_logprobs, logits_metadata + logits_output.output_top_logprobs = LogitsProcessor.get_top_logprobs( + logits_output.next_token_logprobs, logits_metadata )[1] - return output + return sample_output, logits_output diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index f24cdf6b723..3d40c9d7558 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ Copyright 2023-2024 SGLang Team Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,6 +28,7 @@ if TYPE_CHECKING: from sglang.srt.model_executor.model_runner import ModelRunner + from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo class ForwardMode(IntEnum): @@ -42,6 +45,7 @@ class InputMetadata: """Store all inforamtion of a forward pass.""" forward_mode: ForwardMode + sampling_info: SamplingBatchInfo batch_size: int req_pool_indices: torch.Tensor seq_lens: torch.Tensor @@ -169,6 +173,7 @@ def from_schedule_batch( ): ret = cls( forward_mode=forward_mode, + sampling_info=batch.sampling_info, batch_size=batch.batch_size(), req_pool_indices=batch.req_pool_indices, seq_lens=batch.seq_lens, @@ -179,6 +184,8 @@ def from_schedule_batch( top_logprobs_nums=batch.top_logprobs_nums, ) + ret.sampling_info.prepare_penalties() + ret.compute_positions(batch) ret.compute_extend_infos(batch) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 8ef47a530f5..e6f5e743110 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -21,7 +21,7 @@ import logging import pkgutil from functools import lru_cache -from typing import Optional, Type +from typing import Optional, Tuple, Type import torch import torch.nn as nn @@ -44,6 +44,8 @@ from vllm.model_executor.models import ModelRegistry from sglang.global_config import global_config +from sglang.srt.layers.logits_processor import LogitsProcessorOutput +from sglang.srt.layers.sampler import SampleOutput from sglang.srt.managers.schedule_batch import ScheduleBatch, global_server_args_dict from sglang.srt.mem_cache.memory_pool import ( MHATokenToKVPool, @@ -524,7 +526,11 @@ def init_cuda_graphs(self): @torch.inference_mode() def forward_decode(self, batch: ScheduleBatch): - if self.cuda_graph_runner and self.cuda_graph_runner.can_run(len(batch.reqs)): + if ( + self.cuda_graph_runner + and self.cuda_graph_runner.can_run(len(batch.reqs)) + and not batch.sampling_info.has_bias() + ): return self.cuda_graph_runner.replay(batch) input_metadata = InputMetadata.from_schedule_batch( @@ -573,7 +579,9 @@ def forward_extend_multi_modal(self, batch: ScheduleBatch): input_metadata.image_offsets, ) - def forward(self, batch: ScheduleBatch, forward_mode: ForwardMode): + def forward( + self, batch: ScheduleBatch, forward_mode: ForwardMode + ) -> Tuple[SampleOutput, LogitsProcessorOutput]: if self.is_multimodal_model and forward_mode == ForwardMode.EXTEND: return self.forward_extend_multi_modal(batch) elif forward_mode == ForwardMode.DECODE: diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py index b38b62fafd3..9eb04dc263d 100644 --- a/python/sglang/srt/models/chatglm.py +++ b/python/sglang/srt/models/chatglm.py @@ -31,20 +31,18 @@ ) from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope -from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, ) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata -from vllm.sequence import SamplerOutput from vllm.transformers_utils.configs import ChatGLMConfig from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata LoraConfig = None @@ -383,17 +381,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - - def sample( - self, - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, - ) -> Optional[SamplerOutput]: - next_tokens = self.sampler(logits, sampling_metadata) - return next_tokens + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) diff --git a/python/sglang/srt/models/commandr.py b/python/sglang/srt/models/commandr.py index f6d6f6e1f94..c360106f97c 100644 --- a/python/sglang/srt/models/commandr.py +++ b/python/sglang/srt/models/commandr.py @@ -64,6 +64,7 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -326,6 +327,7 @@ def __init__( self.config = config self.quant_config = quant_config self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() self.model = CohereModel(config, quant_config) @torch.no_grad() @@ -340,9 +342,11 @@ def forward( positions, input_metadata, ) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/dbrx.py b/python/sglang/srt/models/dbrx.py index 39ac4aefa72..b3a76b56ae2 100644 --- a/python/sglang/srt/models/dbrx.py +++ b/python/sglang/srt/models/dbrx.py @@ -45,6 +45,7 @@ from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -382,6 +383,7 @@ def __init__( padding_size=DEFAULT_VOCAB_PADDING_SIZE, ) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -391,9 +393,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): expert_params_mapping = [ diff --git a/python/sglang/srt/models/deepseek.py b/python/sglang/srt/models/deepseek.py index 59fd1ec7ed8..b939602c1ba 100644 --- a/python/sglang/srt/models/deepseek.py +++ b/python/sglang/srt/models/deepseek.py @@ -46,6 +46,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -385,6 +386,7 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -394,9 +396,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 13dd477392e..15ecf4bb66b 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -45,6 +45,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -632,6 +633,7 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() def forward( self, @@ -640,9 +642,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/gemma.py b/python/sglang/srt/models/gemma.py index ae3b1b1948c..5a6e5df37fe 100644 --- a/python/sglang/srt/models/gemma.py +++ b/python/sglang/srt/models/gemma.py @@ -37,6 +37,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -287,6 +288,7 @@ def __init__( self.quant_config = quant_config self.model = GemmaModel(config, quant_config=quant_config) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -297,9 +299,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return (sample_output, logits_output) def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/gemma2.py b/python/sglang/srt/models/gemma2.py index 3223424d79c..77ebd8564c6 100644 --- a/python/sglang/srt/models/gemma2.py +++ b/python/sglang/srt/models/gemma2.py @@ -37,6 +37,7 @@ from sglang.srt.layers.layernorm import GemmaRMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -346,6 +347,7 @@ def __init__( self.quant_config = quant_config self.model = Gemma2Model(config, cache_config, quant_config) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -356,9 +358,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def get_attention_sliding_window_size(self): return get_attention_sliding_window_size(self.config) diff --git a/python/sglang/srt/models/gpt_bigcode.py b/python/sglang/srt/models/gpt_bigcode.py index 94b7f6153cf..dc828f0142e 100644 --- a/python/sglang/srt/models/gpt_bigcode.py +++ b/python/sglang/srt/models/gpt_bigcode.py @@ -35,6 +35,7 @@ from sglang.srt.layers.activation import get_act_fn from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -261,6 +262,7 @@ def __init__( if lora_config: self.unpadded_vocab_size += lora_config.lora_extra_vocab_size self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -270,9 +272,11 @@ def forward( input_metadata: InputMetadata, ) -> torch.Tensor: hidden_states = self.transformer(input_ids, positions, input_metadata) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters(remove_duplicate=False)) diff --git a/python/sglang/srt/models/grok.py b/python/sglang/srt/models/grok.py index daf6f25da13..3c2a2c65eae 100644 --- a/python/sglang/srt/models/grok.py +++ b/python/sglang/srt/models/grok.py @@ -46,6 +46,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -297,6 +298,7 @@ def __init__( self.model = Grok1Model(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() # Monkey patch _prepare_weights to load pre-sharded weights setattr(DefaultModelLoader, "_prepare_weights", _prepare_presharded_weights) @@ -313,9 +315,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/internlm2.py b/python/sglang/srt/models/internlm2.py index f2947e991b5..c0e4d19e128 100644 --- a/python/sglang/srt/models/internlm2.py +++ b/python/sglang/srt/models/internlm2.py @@ -40,6 +40,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -262,6 +263,7 @@ def __init__( self.model = InternLM2Model(config, quant_config) self.output = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -272,9 +274,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.output.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py index fe75916a43b..22751d9b674 100644 --- a/python/sglang/srt/models/llama2.py +++ b/python/sglang/srt/models/llama2.py @@ -39,8 +39,9 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm -from sglang.srt.layers.logits_processor import LogitProcessorOutput, LogitsProcessor +from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -302,6 +303,7 @@ def __init__( self.model = LlamaModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -310,11 +312,13 @@ def forward( positions: torch.Tensor, input_metadata: InputMetadata, input_embeds: torch.Tensor = None, - ) -> LogitProcessorOutput: + ) -> LogitsProcessorOutput: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def get_module_name(self, name): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/llama_classification.py b/python/sglang/srt/models/llama_classification.py index c5effbfc9c6..03ab5e802cf 100644 --- a/python/sglang/srt/models/llama_classification.py +++ b/python/sglang/srt/models/llama_classification.py @@ -24,7 +24,7 @@ from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from sglang.srt.layers.logits_processor import LogitProcessorOutput +from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.model_executor.forward_batch_info import InputMetadata from sglang.srt.models.llama2 import LlamaModel @@ -65,7 +65,7 @@ def forward( (input_metadata.batch_size, self.config.classification_out_size) ).to(input_ids.device) - return LogitProcessorOutput( + return LogitsProcessorOutput( next_token_logits=scores, next_token_logprobs=scores, normalized_prompt_logprobs=scores, diff --git a/python/sglang/srt/models/minicpm.py b/python/sglang/srt/models/minicpm.py index 49ff1926f39..0028ae67a8c 100644 --- a/python/sglang/srt/models/minicpm.py +++ b/python/sglang/srt/models/minicpm.py @@ -39,6 +39,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -297,6 +298,7 @@ def __init__( self.scale_width = self.config.hidden_size / self.config.dim_model_base self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -314,9 +316,11 @@ def forward( lm_head_weight = self.model.embed_tokens.weight else: lm_head_weight = self.lm_head.weight - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, lm_head_weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py index d11f6c95198..ca38cb03bae 100644 --- a/python/sglang/srt/models/mixtral.py +++ b/python/sglang/srt/models/mixtral.py @@ -41,6 +41,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -299,6 +300,7 @@ def __init__( self.model = MixtralModel(config, quant_config=quant_config, prefix="model") self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() def forward( self, @@ -308,9 +310,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/mixtral_quant.py b/python/sglang/srt/models/mixtral_quant.py index b02e925c5a0..97ac09ee629 100644 --- a/python/sglang/srt/models/mixtral_quant.py +++ b/python/sglang/srt/models/mixtral_quant.py @@ -45,6 +45,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -333,6 +334,7 @@ def __init__( self.model = MixtralModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -343,9 +345,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/qwen.py b/python/sglang/srt/models/qwen.py index 93dae9585c3..4958a812985 100644 --- a/python/sglang/srt/models/qwen.py +++ b/python/sglang/srt/models/qwen.py @@ -39,6 +39,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -251,6 +252,7 @@ def __init__( vocab_size = ((config.vocab_size + 63) // 64) * 64 self.lm_head = ParallelLMHead(vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -260,10 +262,11 @@ def forward( input_metadata: InputMetadata, ): hidden_states = self.transformer(input_ids, positions, input_metadata) - next_tokens = self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - return next_tokens + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py index a0c54f69105..6bb5c0b9066 100644 --- a/python/sglang/srt/models/qwen2.py +++ b/python/sglang/srt/models/qwen2.py @@ -38,8 +38,9 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor -from sglang.srt.layers.pooler import EmbeddingPoolerOutput, Pooler, PoolingType +from sglang.srt.layers.pooler import Pooler, PoolingType from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata Qwen2Config = None @@ -276,6 +277,7 @@ def __init__( self.model = Qwen2Model(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True) @torch.no_grad() @@ -289,9 +291,11 @@ def forward( ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) if not get_embedding: - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output else: return self.pooler(hidden_states, input_metadata) diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py index d5c79a40f0e..67b5a6ce663 100644 --- a/python/sglang/srt/models/qwen2_moe.py +++ b/python/sglang/srt/models/qwen2_moe.py @@ -35,10 +35,8 @@ ReplicatedLinear, RowParallelLinear, ) -from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope -from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding, @@ -49,6 +47,7 @@ from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -366,6 +365,7 @@ def __init__( config.vocab_size, config.hidden_size, quant_config=quant_config ) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -376,20 +376,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) - - def compute_logits( - self, - input_ids: torch.Tensor, - hidden_states: torch.Tensor, - input_metadata: InputMetadata, - ) -> torch.Tensor: - logits = self.logits_processor( - input_ids, hidden_states, self.lm_head.weight, input_metadata - ) - return logits + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/stablelm.py b/python/sglang/srt/models/stablelm.py index 9e10f12f2a2..a3102baabd4 100644 --- a/python/sglang/srt/models/stablelm.py +++ b/python/sglang/srt/models/stablelm.py @@ -40,6 +40,7 @@ from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler from sglang.srt.model_executor.forward_batch_info import InputMetadata @@ -249,6 +250,7 @@ def __init__( self.model = StableLMEpochModel(config, quant_config=quant_config) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() @torch.no_grad() def forward( @@ -259,9 +261,11 @@ def forward( input_embeds: torch.Tensor = None, ) -> torch.Tensor: hidden_states = self.model(input_ids, positions, input_metadata, input_embeds) - return self.logits_processor( + logits_output = self.logits_processor( input_ids, hidden_states, self.lm_head.weight, input_metadata ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py index bc70a9018ed..7843f4bd32d 100644 --- a/python/sglang/srt/sampling/sampling_batch_info.py +++ b/python/sglang/srt/sampling/sampling_batch_info.py @@ -21,10 +21,63 @@ class SamplingBatchInfo: top_ps: torch.Tensor = None top_ks: torch.Tensor = None min_ps: torch.Tensor = None - penalizer_orchestrator: penaltylib.BatchedPenalizerOrchestrator = None + + # Dispatch in CUDA graph + need_min_p_sampling: bool = False + + # Bias Tensors logit_bias: torch.Tensor = None vocab_mask: torch.Tensor = None + # Penalizer + penalizer_orchestrator: penaltylib.BatchedPenalizerOrchestrator = None + linear_penalties: torch.Tensor = None + scaling_penalties: torch.Tensor = None + + def has_bias(self): + return ( + self.logit_bias is not None + or self.vocab_mask is not None + or self.linear_penalties is not None + or self.scaling_penalties is not None + ) + + @classmethod + def dummy_one(cls, max_bs: int, vocab_size: int): + ret = cls(vocab_size=vocab_size) + ret.temperatures = torch.ones((max_bs, 1), dtype=torch.float, device="cuda") + ret.top_ps = torch.ones((max_bs,), dtype=torch.float, device="cuda") + ret.top_ks = torch.ones((max_bs,), dtype=torch.int, device="cuda") + ret.min_ps = torch.zeros((max_bs,), dtype=torch.float, device="cuda") + return ret + + def __getitem__(self, key): + if isinstance(key, slice): + # NOTE: We do not use cuda graph when there is bias tensors + assert not self.has_bias() + return SamplingBatchInfo( + vocab_size=self.vocab_size, + temperatures=self.temperatures[key], + top_ps=self.top_ps[key], + top_ks=self.top_ks[key], + min_ps=self.min_ps[key], + need_min_p_sampling=self.need_min_p_sampling, + ) + else: + raise NotImplementedError + + def inplace_assign(self, bs: int, other: SamplingBatchInfo): + # NOTE: We do not use cuda graph when there is bias tensors + assert not self.has_bias() + + self.vocab_size = other.vocab_size + self.need_min_p_sampling = other.need_min_p_sampling + + self.temperatures[:bs] = other.temperatures + self.top_ps[:bs] = other.top_ps + self.top_ks[:bs] = other.top_ks + self.min_ps[:bs] = other.min_ps + @classmethod def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): device = "cuda" @@ -45,6 +98,7 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): ret.min_ps = torch.tensor( [r.sampling_params.min_p for r in reqs], dtype=torch.float, device=device ) + ret.need_min_p_sampling = any(r.sampling_params.min_p > 0 for r in reqs) # Each penalizers will do nothing if they evaluate themselves as not required by looking at # the sampling_params of the requests (See {_is_required()} of each penalizers). So this @@ -72,6 +126,25 @@ def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): return ret + def prepare_penalties(self): + self.scaling_penalties = None + self.linear_penalties = None + + for penalizer in self.penalizer_orchestrator.penalizers.values(): + if isinstance(penalizer, penaltylib.BatchedRepetitionPenalizer): + if penalizer.is_prepared(): + self.scaling_penalties = penalizer.cumulated_repetition_penalties + else: + if penalizer.is_prepared(): + if self.linear_penalties is None: + bs = self.penalizer_orchestrator.batch.batch_size() + self.linear_penalties = torch.zeros( + (bs, self.vocab_size), + dtype=torch.float32, + device="cuda", + ) + self.linear_penalties = penalizer.apply(self.linear_penalties) + def update_regex_vocab_mask(self, batch: ScheduleBatch): bs, reqs = batch.batch_size(), batch.reqs device = "cuda" diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index e69d699a7d3..ac69ab875b9 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -180,7 +180,7 @@ def __init__( tp_size=tp_size, dtype=get_dtype_str(torch_dtype), port=port, - mem_fraction_static=0.7, + mem_fraction_static=0.69, trust_remote_code=False, is_embedding=not self.is_generation, ) From a362340b33258eae0f48504be09659e2e9dcd035 Mon Sep 17 00:00:00 2001 From: lxww302 <68112258+lxww302@users.noreply.github.com> Date: Thu, 29 Aug 2024 23:43:41 -0700 Subject: [PATCH 50/88] fix: multimodal_config in monkey_patch_vllm_dummy_weight_loader (#1260) --- python/sglang/srt/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index b7bb6573068..66a5679d756 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -407,7 +407,6 @@ def monkey_patch_vllm_dummy_weight_loader(): DummyModelLoader, LoRAConfig, ModelConfig, - MultiModalConfig, ParallelConfig, SchedulerConfig, _initialize_model, @@ -422,7 +421,6 @@ def load_model( model_config: ModelConfig, device_config: DeviceConfig, lora_config: Optional[LoRAConfig], - multimodal_config: Optional[MultiModalConfig], parallel_config: ParallelConfig, scheduler_config: SchedulerConfig, cache_config: CacheConfig, @@ -433,7 +431,6 @@ def load_model( model_config, self.load_config, lora_config, - multimodal_config, cache_config, ) From f414352ae6783dc20dc93e09be00ea62f4438931 Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Fri, 30 Aug 2024 14:45:40 +0800 Subject: [PATCH 51/88] Transpose mla weight offline (#1261) Co-authored-by: Yineng Zhang --- python/sglang/srt/models/deepseek_v2.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 15ecf4bb66b..67d99d51246 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -417,12 +417,8 @@ def __init__( v_head_dim=self.kv_lora_rank, ) - kv_b_proj = self.kv_b_proj - w_kc, w_vc = kv_b_proj.weight.unflatten( - 0, (-1, qk_nope_head_dim + v_head_dim) - ).split([qk_nope_head_dim, v_head_dim], dim=1) - self.w_kc = w_kc - self.w_vc = w_vc + self.w_kc = None + self.w_vc = None def forward( self, @@ -464,7 +460,7 @@ def forward( ) torch.bmm( attn_output.transpose(0, 1), - self.w_vc.transpose(1, 2).contiguous(), + self.w_vc, out=attn_bmm_output.transpose(0, 1), ) @@ -715,5 +711,15 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ) weight_loader(param, loaded_weight) + if global_server_args_dict["enable_mla"]: + for layer_id in range(self.config.num_hidden_layers): + self_attn = self.model.layers[layer_id].self_attn + w_kc, w_vc = self_attn.kv_b_proj.weight.unflatten( + 0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim) + ).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1) + self_attn.w_kc = w_kc.contiguous() + self_attn.w_vc = w_vc.transpose(1, 2).contiguous() + del self_attn.kv_b_proj + EntryClass = DeepseekV2ForCausalLM From b7f834101476209767b7c8a52f17aa86cad79f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A2=85=EA=B3=A4?= <149566442+Deepfocused@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:08:28 +0900 Subject: [PATCH 52/88] EXAONE 3.0 Model Support (#1258) Co-authored-by: Yineng Zhang --- python/sglang/srt/configs/__init__.py | 5 + python/sglang/srt/configs/exaone.py | 195 ++++++++++ python/sglang/srt/hf_transformers_utils.py | 12 +- python/sglang/srt/models/exaone.py | 399 +++++++++++++++++++++ 4 files changed, 609 insertions(+), 2 deletions(-) create mode 100644 python/sglang/srt/configs/__init__.py create mode 100644 python/sglang/srt/configs/exaone.py create mode 100644 python/sglang/srt/models/exaone.py diff --git a/python/sglang/srt/configs/__init__.py b/python/sglang/srt/configs/__init__.py new file mode 100644 index 00000000000..9e74366709f --- /dev/null +++ b/python/sglang/srt/configs/__init__.py @@ -0,0 +1,5 @@ +from sglang.srt.configs.exaone import ExaoneConfig + +__all__ = [ + "ExaoneConfig", +] diff --git a/python/sglang/srt/configs/exaone.py b/python/sglang/srt/configs/exaone.py new file mode 100644 index 00000000000..7b0a2d290da --- /dev/null +++ b/python/sglang/srt/configs/exaone.py @@ -0,0 +1,195 @@ +# coding=utf-8 +# Copyright 2024 The LG AI Research EXAONE Lab. All rights reserved. +# Copyright 2024 The LG CNS AI Engineering Team. +# Copyright 2023-2024 SGLang Team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" EXAONE model configuration """ +from typing import Any, Dict + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + +logger = logging.get_logger(__name__) + +EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: Dict[str, Any] = {} + + +# ruff: noqa: E501 +class ExaoneConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a :class:`~transformers.ExaoneModel`. It is used to + instantiate a EXAONE model according to the specified arguments, defining the model architecture. Instantiating a + configuration with the defaults will yield a similar configuration to that of the Exaone + + Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model + outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. + + + Args: + vocab_size (:obj:`int`, `optional`, defaults to 102400): + Vocabulary size of the EXAONE model. Defines the number of different tokens that can be represented by the + :obj:`inputs_ids` passed when calling :class:`~transformers.ExaoneModel`. Vocabulary size of the model. + Defines the different tokens that can be represented by the `inputs_ids` passed to the forward method of + :class:`~transformers.EXAONEModel`. + max_position_embeddings (:obj:`int`, `optional`, defaults to 2048): + The maximum sequence length that this model might ever be used with. Typically set this to something large + just in case (e.g., 512 or 1024 or 2048). + hidden_size (:obj:`int`, `optional`, defaults to 2048): + Dimensionality of the encoder layers and the pooler layer. + num_layers (:obj:`int`, `optional`, defaults to 32): + Number of hidden layers in the Transformer encoder. + num_attention_heads (:obj:`int`, `optional`, defaults to 32): + Number of attention heads for each attention layer in the Transformer decoder. + num_key_value_heads (:obj:`int`, `optional`): + This is the number of key_value heads that should be used to implement Grouped Query Attention. If + `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if + `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When + converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed + by meanpooling all the original heads within that group. For more details checkout [this + paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to + `num_attention_heads`. + intermediate_size (:obj:`int`, `optional`, defaults to `hidden_size * 4`): + Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. + activation_function (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"silu"`): + The non-linear activation function (function or string) in the decoder. + rope_theta (:obj:`float`, `optional`, defaults to 10000.0): + The base period of the RoPE embeddings. + rope_scaling (:obj:`Dict`, `optional`): + Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type + and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value + accordingly. + Expected contents: + `rope_type` (:obj:`str`): + The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope', + 'llama3'], with 'default' being the original RoPE implementation. + `factor` (:obj:`float`, `optional`): + Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In + most scaling types, a `factor` of x will enable the model to handle sequences of length x * + original maximum pre-trained length. + `original_max_position_embeddings` (:obj:`int`, `optional`): + Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during + pretraining. + `attention_factor` (:obj:`float`, `optional`): + Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention + computation. If unspecified, it defaults to value recommended by the implementation, using the + `factor` field to infer the suggested value. + `beta_fast` (:obj:`float`, `optional`): + Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear + ramp function. If unspecified, it defaults to 32. + `beta_slow` (:obj:`float`, `optional`): + Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear + ramp function. If unspecified, it defaults to 1. + `short_factor` (:obj:`List[float]`, `optional`): + Only used with 'longrope'. The scaling factor to be applied to short contexts (< + `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden + size divided by the number of attention heads divided by 2 + `long_factor` (:obj:`List[float]`, `optional`): + Only used with 'longrope'. The scaling factor to be applied to long contexts (< + `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden + size divided by the number of attention heads divided by 2 + `low_freq_factor` (:obj:`float`, `optional`): + Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE + `high_freq_factor` (:obj:`float`, `optional`): + Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE + embed_dropout (:obj:`float`, `optional`, defaults to 0.0): + The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler. + attention_dropout (:obj:`float`, `optional`, defaults to 0.0): + The dropout ratio for the attention probabilities. + layer_norm_epsilon (:obj:`float`, `optional`, defaults to 1e-5): + The epsilon used by the layer normalization layers. + initializer_range (:obj:`float`, `optional`, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): + Whether or not the model should return the last key/values attentions (not used by all models). Only + relevant if ``configs.is_decoder=True``. + bos_token_id (:obj:`int`, `optional`, defaults to 0): + Beginning of stream token id. + eos_token_id (:obj:`int`, `optional`, defaults to 2): + End of stream token id. + tie_word_embeddings (:obj:`bool`, `optional`, defaults to :obj:`True`): + Whether to tie weight embeddings + gradient_checkpointing (:obj:`bool`, `optional`, defaults to :obj:`False`): + If True, use gradient checkpointing to save memory at the expense of slower backward pass. + + Example:: + + >>> from transformers import EXAONEModel, ExaoneConfig + + >>> # Initializing a EXAONE configuration + >>> configuration = ExaoneConfig() + + >>> # Initializing a model from configuration + >>> model = EXAONEModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.configs + """ + + model_type = "exaone" + keys_to_ignore_at_inference = ["past_key_values"] + attribute_map = {"num_hidden_layers": "num_layers"} + + def __init__( + self, + vocab_size=102400, + max_position_embeddings=2048, + hidden_size=2048, + num_layers=32, + num_attention_heads=32, + num_key_value_heads=None, + intermediate_size=None, + activation_function="silu", + rope_theta=10000.0, + rope_scaling=None, + embed_dropout=0.0, + attention_dropout=0.0, + layer_norm_epsilon=1e-5, + initializer_range=0.02, + use_cache=True, + bos_token_id=0, + eos_token_id=2, + tie_word_embeddings=True, + **kwargs + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.num_layers = num_layers + self.num_attention_heads = num_attention_heads + self.num_hidden_layers = num_layers + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + self.num_key_value_heads = num_key_value_heads + if intermediate_size: + self.intermediate_size = intermediate_size + else: + self.intermediate_size = hidden_size * 4 + self.activation_function = activation_function + self.embed_dropout = embed_dropout + self.attention_dropout = attention_dropout + self.layer_norm_epsilon = layer_norm_epsilon + self.initializer_range = initializer_range + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + + self.bos_token_id = bos_token_id + self.eos_token_id = eos_token_id + + super().__init__( + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs + ) diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py index 2be4169140a..7fce3b24012 100644 --- a/python/sglang/srt/hf_transformers_utils.py +++ b/python/sglang/srt/hf_transformers_utils.py @@ -15,6 +15,7 @@ """Utilities for Huggingface Transformers.""" +import contextlib import functools import json import os @@ -34,14 +35,21 @@ try: from vllm.transformers_utils.configs import ChatGLMConfig, DbrxConfig + from sglang.srt.configs import ExaoneConfig + _CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = { ChatGLMConfig.model_type: ChatGLMConfig, DbrxConfig.model_type: DbrxConfig, + ExaoneConfig.model_type: ExaoneConfig, } except ImportError: # We want this file to run without vllm dependency _CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {} +for name, cls in _CONFIG_REGISTRY.items(): + with contextlib.suppress(ValueError): + AutoConfig.register(name, cls) + from sglang.srt.utils import is_multimodal_model @@ -53,7 +61,7 @@ def download_from_hf(model_path: str): def get_config_json(model_path: str): - with open(os.path.join(model_path, "config.json")) as f: + with open(os.path.join(model_path, "configs.json")) as f: config = json.load(f) return config @@ -89,7 +97,7 @@ def get_config( def get_context_length(config): - """Get the context length of a model from a huggingface model config.""" + """Get the context length of a model from a huggingface model configs.""" rope_scaling = getattr(config, "rope_scaling", None) if rope_scaling: rope_scaling_factor = config.rope_scaling["factor"] diff --git a/python/sglang/srt/models/exaone.py b/python/sglang/srt/models/exaone.py new file mode 100644 index 00000000000..4dcafed7ce7 --- /dev/null +++ b/python/sglang/srt/models/exaone.py @@ -0,0 +1,399 @@ +""" +Copyright 2024 The LGcns AI Engineering Team +Copyright 2023-2024 SGLang Team +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +# Adapted from llama2.py +"""Inference-only Exaone model compatible with HuggingFace weights.""" + +from typing import Any, Dict, Iterable, Optional, Tuple + +import torch +from torch import nn +from vllm.config import CacheConfig +from vllm.distributed import get_tensor_model_parallel_world_size +from vllm.model_executor.layers.linear import ( + MergedColumnParallelLinear, + QKVParallelLinear, + RowParallelLinear, +) +from vllm.model_executor.layers.quantization.base_config import QuantizationConfig +from vllm.model_executor.layers.rotary_embedding import get_rope +from vllm.model_executor.layers.vocab_parallel_embedding import ( + ParallelLMHead, + VocabParallelEmbedding, +) +from vllm.model_executor.model_loader.weight_utils import default_weight_loader + +from sglang.srt.layers.activation import SiluAndMul +from sglang.srt.layers.layernorm import RMSNorm +from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput +from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.sampler import Sampler +from sglang.srt.model_executor.forward_batch_info import InputMetadata + + +class ExaoneGatedMLP(nn.Module): + def __init__( + self, + hidden_size: int, + intermediate_size: int, + hidden_act: str, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> None: + super().__init__() + self.gate_up_proj = MergedColumnParallelLinear( + hidden_size, + [intermediate_size] * 2, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.gate_up_proj", + ) + self.c_proj = RowParallelLinear( + intermediate_size, + hidden_size, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.c_proj", + ) + if hidden_act != "silu": + raise ValueError( + f"Unsupported activation: {hidden_act}. " + "Only silu is supported for now." + ) + self.act_fn = SiluAndMul() + + def forward(self, x): + gate_up, _ = self.gate_up_proj(x) + x = self.act_fn(gate_up) + x, _ = self.c_proj(x) + return x + + +class ExaoneAttention(nn.Module): + def __init__( + self, + config, + hidden_size: int, + num_heads: int, + num_kv_heads: int, + layer_id: int = 0, + rope_theta: float = 500000, + rope_scaling: Optional[Dict[str, Any]] = None, + rope_is_neox_style: bool = True, + max_position_embeddings: int = 4096, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> None: + super().__init__() + self.hidden_size = hidden_size + tp_size = get_tensor_model_parallel_world_size() + self.total_num_heads = num_heads + assert self.total_num_heads % tp_size == 0 + self.num_heads = self.total_num_heads // tp_size + self.total_num_kv_heads = num_kv_heads + if self.total_num_kv_heads >= tp_size: + # Number of KV heads is greater than TP size, so we partition + # the KV heads across multiple tensor parallel GPUs. + assert self.total_num_kv_heads % tp_size == 0 + else: + # Number of KV heads is less than TP size, so we replicate + # the KV heads across multiple tensor parallel GPUs. + assert tp_size % self.total_num_kv_heads == 0 + self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size) + # MistralConfig has an optional head_dim introduced by Mistral-Nemo + self.head_dim = getattr( + config, "head_dim", self.hidden_size // self.total_num_heads + ) + self.rotary_dim = int( + self.head_dim * getattr(config, "partial_rotary_factor", 1) + ) + self.q_size = self.num_heads * self.head_dim + self.kv_size = self.num_kv_heads * self.head_dim + self.scaling = self.head_dim**-0.5 + self.rope_theta = rope_theta + self.max_position_embeddings = max_position_embeddings + + self.qkv_proj = QKVParallelLinear( + hidden_size, + self.head_dim, + self.total_num_heads, + self.total_num_kv_heads, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.qkv_proj", + ) + self.out_proj = RowParallelLinear( + self.total_num_heads * self.head_dim, + hidden_size, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.out_proj", + ) + + self.rotary_emb = get_rope( + self.head_dim, + rotary_dim=self.rotary_dim, + max_position=max_position_embeddings, + base=rope_theta, + rope_scaling=rope_scaling, + is_neox_style=rope_is_neox_style, + ) + self.attn = RadixAttention( + self.num_heads, + self.head_dim, + self.scaling, + num_kv_heads=self.num_kv_heads, + layer_id=layer_id, + ) + + def forward( + self, + positions: torch.Tensor, + hidden_states: torch.Tensor, + input_metadata: InputMetadata, + ) -> torch.Tensor: + qkv, _ = self.qkv_proj(hidden_states) + q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1) + q, k = self.rotary_emb(positions, q, k) + attn_output = self.attn(q, k, v, input_metadata) + output, _ = self.out_proj(attn_output) + return output + + +class ExaoneDecoderLayer(nn.Module): + def __init__( + self, + config, + layer_id: int = 0, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> None: + super().__init__() + self.hidden_size = config.hidden_size + rope_theta = getattr(config, "rope_theta", 500000) + rope_scaling = getattr(config, "rope_scaling", None) + if rope_scaling is not None and getattr( + config, "original_max_position_embeddings", None + ): + rope_scaling["original_max_position_embeddings"] = ( + config.original_max_position_embeddings + ) + rope_is_neox_style = getattr(config, "rope_is_neox_style", True) + max_position_embeddings = getattr(config, "max_position_embeddings", 4096) + self.self_attn = ExaoneAttention( + config=config, + hidden_size=self.hidden_size, + num_heads=config.num_attention_heads, + num_kv_heads=config.num_key_value_heads, + layer_id=layer_id, + rope_theta=rope_theta, + rope_scaling=rope_scaling, + rope_is_neox_style=rope_is_neox_style, + max_position_embeddings=max_position_embeddings, + quant_config=quant_config, + prefix=f"{prefix}.self_attn", + ) + self.mlp = ExaoneGatedMLP( + hidden_size=self.hidden_size, + intermediate_size=config.intermediate_size, + hidden_act=config.activation_function, + quant_config=quant_config, + prefix=f"{prefix}.mlp", + ) + rms_norm_eps = config.layer_norm_epsilon + self.ln_1 = RMSNorm(config.hidden_size, eps=rms_norm_eps) + self.ln_2 = RMSNorm(config.hidden_size, eps=rms_norm_eps) + + def forward( + self, + positions: torch.Tensor, + hidden_states: torch.Tensor, + input_metadata: InputMetadata, + residual: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Self Attention + if residual is None: + residual = hidden_states + hidden_states = self.ln_1(hidden_states) + else: + hidden_states, residual = self.ln_1(hidden_states, residual) + hidden_states = self.self_attn( + positions=positions, + hidden_states=hidden_states, + input_metadata=input_metadata, + ) + + # Fully Connected + hidden_states, residual = self.ln_2(hidden_states, residual) + hidden_states = self.mlp(hidden_states) + return hidden_states, residual + + +class ExaoneModel(nn.Module): + def __init__( + self, + config, + quant_config: Optional[QuantizationConfig] = None, + ) -> None: + super().__init__() + self.config = config + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + self.wte = VocabParallelEmbedding( + config.vocab_size, + config.hidden_size, + ) + self.h = nn.ModuleList( + [ + ExaoneDecoderLayer( + config, i, quant_config=quant_config, prefix=f"model.h.{i}" + ) + for i in range(config.num_hidden_layers) + ] + ) + rms_norm_eps = config.layer_norm_epsilon + self.ln_f = RMSNorm(config.hidden_size, eps=rms_norm_eps) + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + input_metadata: InputMetadata, + input_embeds: torch.Tensor = None, + ) -> torch.Tensor: + if input_embeds is None: + hidden_states = self.wte(input_ids) + else: + hidden_states = input_embeds + residual = None + for i in range(len(self.h)): + layer = self.h[i] + hidden_states, residual = layer( + positions, + hidden_states, + input_metadata, + residual, + ) + hidden_states, _ = self.ln_f(hidden_states, residual) + return hidden_states + + +class ExaoneForCausalLM(nn.Module): + def __init__( + self, + config, + quant_config: Optional[QuantizationConfig] = None, + cache_config: Optional[CacheConfig] = None, + efficient_weight_load=False, + ) -> None: + super().__init__() + self.config = config + self.quant_config = quant_config + self.transformer = ExaoneModel(config, quant_config=quant_config) + self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) + self.logits_processor = LogitsProcessor(config) + self.sampler = Sampler() + + @torch.no_grad() + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + input_metadata: InputMetadata, + input_embeds: torch.Tensor = None, + ) -> LogitsProcessorOutput: + hidden_states = self.transformer( + input_ids, positions, input_metadata, input_embeds + ) + logits_output = self.logits_processor( + input_ids, hidden_states, self.lm_head.weight, input_metadata + ) + sample_output = self.sampler(logits_output, input_metadata.sampling_info) + return sample_output, logits_output + + def get_module_name(self, name): + stacked_params_mapping = [ + # (param_name, shard_name, shard_id, num_shard) + ("qkv_proj", "q_proj", "q", 3), + ("qkv_proj", "k_proj", "k", 3), + ("qkv_proj", "v_proj", "v", 3), + ("gate_up_proj", "c_fc_0", 0, 2), + ("gate_up_proj", "c_fc_1", 1, 2), + ] + for param_name, weight_name, shard_id, num_shard in stacked_params_mapping: + if weight_name in name: + return ( + name.replace(weight_name, param_name)[: -len(".weight")], + num_shard, + ) + return name[: -len(".weight")], 1 + + def get_num_params(self): + params_dict = dict(self.named_parameters()) + return len(params_dict) + + def load_weights( + self, weights: Iterable[Tuple[str, torch.Tensor]], name=None, loaded_weight=None + ): + stacked_params_mapping = [ + # (param_name, shard_name, shard_id) + ("qkv_proj", "q_proj", "q"), + ("qkv_proj", "k_proj", "k"), + ("qkv_proj", "v_proj", "v"), + ("gate_up_proj", "c_fc_0", 0), + ("gate_up_proj", "c_fc_1", 1), + ] + params_dict = dict(self.named_parameters()) + + def load_weights_per_param(name, loaded_weight): + if "rotary_emb.inv_freq" in name or "projector" in name: + return + if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: + # Models trained using ColossalAI may include these tensors in + # the checkpoint. Skip them. + return + if name.startswith("model.vision_tower") and name not in params_dict: + return + + for param_name, weight_name, shard_id in stacked_params_mapping: + if weight_name not in name: + continue + name = name.replace(weight_name, param_name) + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + continue + param = params_dict[name] + weight_loader = param.weight_loader + weight_loader(param, loaded_weight, shard_id) + break + else: + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + return + param = params_dict[name] + weight_loader = getattr(param, "weight_loader", default_weight_loader) + weight_loader(param, loaded_weight) + + if name is None or loaded_weight is None: + for name, loaded_weight in weights: + name = name.replace("attn.attention", "self_attn") + load_weights_per_param(name, loaded_weight) + else: + name = name.replace("attn.attention", "self_attn") + load_weights_per_param(name, loaded_weight) + + +EntryClass = ExaoneForCausalLM From 55f5976b42d736f3dfe2f8f9b91a6536c212744a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A2=85=EA=B3=A4?= <149566442+Deepfocused@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:49:07 +0900 Subject: [PATCH 53/88] Update README.md - Supported Models add Exaone 3.0 (#1267) Co-authored-by: Yineng Zhang --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e3e47c100b..bfd01e2080c 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct ### Supported Models **Generative Models** - +- Exaone 3.0 - Llama / Llama 2 / Llama 3 / Llama 3.1 - Mistral / Mixtral / Mistral NeMo - Gemma / Gemma 2 From 79ece2c51f47ee6b792c6282a6f76987892c5f8d Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Fri, 30 Aug 2024 06:05:01 -0700 Subject: [PATCH 54/88] Report median instead of mean in bench_latency.py (#1269) --- README.md | 2 +- python/sglang/bench_latency.py | 12 +++++++----- python/sglang/srt/hf_transformers_utils.py | 8 -------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index bfd01e2080c..2991b40632e 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,6 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct ### Supported Models **Generative Models** -- Exaone 3.0 - Llama / Llama 2 / Llama 3 / Llama 3.1 - Mistral / Mixtral / Mistral NeMo - Gemma / Gemma 2 @@ -253,6 +252,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct - Grok - ChatGLM - InternLM 2 +- Exaone 3 **Embedding Models** diff --git a/python/sglang/bench_latency.py b/python/sglang/bench_latency.py index 3a487408573..966a97d20e9 100644 --- a/python/sglang/bench_latency.py +++ b/python/sglang/bench_latency.py @@ -292,6 +292,7 @@ def latency_test_run_once( measurement_results["prefill_throughput"] = throughput # Decode + decode_latencies = [] for i in range(output_len): torch.cuda.synchronize() tic = time.time() @@ -300,17 +301,18 @@ def latency_test_run_once( latency = time.time() - tic tot_latency += latency throughput = batch_size / latency + decode_latencies.append(latency) if i < 5: rank_print( f"Decode. latency: {latency:6.5f} s, throughput: {throughput:9.2f} token/s" ) - avg_decode_latency = (tot_latency - prefill_latency) / output_len - avg_decode_throughput = batch_size / avg_decode_latency + med_decode_latency = np.median(decode_latencies) + med_decode_throughput = batch_size / med_decode_latency rank_print( - f"Decode. avg latency: {avg_decode_latency:6.5f} s, avg throughput: {avg_decode_throughput:9.2f} token/s" + f"Decode. median latency: {med_decode_latency:6.5f} s, median throughput: {med_decode_throughput:9.2f} token/s" ) - measurement_results["avg_decode_latency"] = avg_decode_latency - measurement_results["avg_decode_throughput"] = avg_decode_throughput + measurement_results["median_decode_latency"] = med_decode_latency + measurement_results["median_decode_throughput"] = med_decode_throughput throughput = (input_len + output_len) * batch_size / tot_latency rank_print( diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py index 7fce3b24012..b22c61020ef 100644 --- a/python/sglang/srt/hf_transformers_utils.py +++ b/python/sglang/srt/hf_transformers_utils.py @@ -50,8 +50,6 @@ with contextlib.suppress(ValueError): AutoConfig.register(name, cls) -from sglang.srt.utils import is_multimodal_model - def download_from_hf(model_path: str): if os.path.exists(model_path): @@ -60,12 +58,6 @@ def download_from_hf(model_path: str): return snapshot_download(model_path, allow_patterns=["*.json", "*.bin", "*.model"]) -def get_config_json(model_path: str): - with open(os.path.join(model_path, "configs.json")) as f: - config = json.load(f) - return config - - def get_config( model: str, trust_remote_code: bool, From 51c554d812f4969f4727e21531224322281efc2f Mon Sep 17 00:00:00 2001 From: Christopher Chou <49086305+BabyChouSr@users.noreply.github.com> Date: Fri, 30 Aug 2024 11:51:44 -0700 Subject: [PATCH 55/88] Allow more flexible assistant and system response (#1256) --- python/sglang/srt/conversation.py | 26 +++++++++++- python/sglang/srt/openai_api/adapter.py | 17 +++++++- python/sglang/srt/openai_api/protocol.py | 10 ++--- test/srt/test_vision_openai_server.py | 50 ++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/conversation.py b/python/sglang/srt/conversation.py index d5ca3277030..dbc376d9593 100644 --- a/python/sglang/srt/conversation.py +++ b/python/sglang/srt/conversation.py @@ -386,7 +386,16 @@ def generate_chat_conv( for message in request.messages: msg_role = message.role if msg_role == "system": - conv.system_message = message.content + if isinstance(message.content, str): + conv.system_message = message.content + elif isinstance(message.content, list): + if ( + len(message.content) != 1 + or getattr(message.content[0], "type", None) != "text" + ): + raise ValueError("The system message should be a single text.") + else: + conv.system_message = getattr(message.content[0], "text", "") elif msg_role == "user": # Handle the various types of Chat Request content types here. role = conv.roles[0] @@ -414,7 +423,20 @@ def generate_chat_conv( conv.append_image(content.image_url.url) conv.append_message(conv.roles[0], real_content) elif msg_role == "assistant": - conv.append_message(conv.roles[1], message.content) + parsed_content = "" + if isinstance(message.content, str): + parsed_content = message.content + elif isinstance(message.content, list): + if ( + len(message.content) != 1 + or getattr(message.content[0], "type", None) != "text" + ): + raise ValueError( + "The assistant's response should be a single text." + ) + else: + parsed_content = getattr(message.content[0], "text", "") + conv.append_message(conv.roles[1], parsed_content) else: raise ValueError(f"Unknown role: {msg_role}") diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 4feb632b0b8..cd7526b0d93 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -844,8 +844,23 @@ def v1_chat_generate_request( if not isinstance(request.messages, str): # Apply chat template and its stop strings. if chat_template_name is None: + openai_compatible_messages = [] + for message in request.messages: + if isinstance(message.content, str): + openai_compatible_messages.append( + {"role": message.role, "content": message.content} + ) + else: + content_list = message.dict()["content"] + for content in content_list: + if content["type"] == "text": + openai_compatible_messages.append( + {"role": message.role, "content": content["text"]} + ) prompt_ids = tokenizer_manager.tokenizer.apply_chat_template( - request.messages, tokenize=True, add_generation_prompt=True + openai_compatible_messages, + tokenize=True, + add_generation_prompt=True, ) stop = request.stop image_data = None diff --git a/python/sglang/srt/openai_api/protocol.py b/python/sglang/srt/openai_api/protocol.py index ce51e1c029f..8073df7952e 100644 --- a/python/sglang/srt/openai_api/protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -200,11 +200,6 @@ class CompletionStreamResponse(BaseModel): usage: Optional[UsageInfo] = None -class ChatCompletionMessageGenericParam(BaseModel): - role: Literal["system", "assistant"] - content: str - - class ChatCompletionMessageContentTextPart(BaseModel): type: Literal["text"] text: str @@ -225,6 +220,11 @@ class ChatCompletionMessageContentImagePart(BaseModel): ] +class ChatCompletionMessageGenericParam(BaseModel): + role: Literal["system", "assistant"] + content: Union[str, List[ChatCompletionMessageContentTextPart]] + + class ChatCompletionMessageUserParam(BaseModel): role: Literal["user"] content: Union[str, List[ChatCompletionMessageContentPart]] diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index cf29c0e815a..4f764c09cd8 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -76,6 +76,56 @@ def test_chat_completion(self): assert response.usage.completion_tokens > 0 assert response.usage.total_tokens > 0 + def test_multi_turn_chat_completion(self): + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + response = client.chat.completions.create( + model="default", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true" + }, + }, + { + "type": "text", + "text": "Describe this image in a very short sentence.", + }, + ], + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "There is a man at the back of a yellow cab ironing his clothes.", + } + ], + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "Repeat your previous answer."} + ], + }, + ], + temperature=0, + ) + + assert response.choices[0].message.role == "assistant" + text = response.choices[0].message.content + assert isinstance(text, str) + assert "man" in text or "cab" in text, text + assert response.id + assert response.created + assert response.usage.prompt_tokens > 0 + assert response.usage.completion_tokens > 0 + assert response.usage.total_tokens > 0 + def test_mult_images_chat_completion(self): client = openai.Client(api_key=self.api_key, base_url=self.base_url) From 52cefdbf5797f612d38f43a120f52ae45b9d1380 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sun, 1 Sep 2024 00:44:29 +1000 Subject: [PATCH 56/88] fix: resolve the fp8 bug introduced by vLLM 0.5.5 (#1276) --- python/sglang/srt/models/deepseek_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 67d99d51246..2f06a5534fa 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -695,7 +695,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): weight_loader( param, loaded_weight, - weight_name, + name, shard_id=shard_id, expert_id=expert_id, ) From 6cc9c52521976450b1371c7555102ded79670b2e Mon Sep 17 00:00:00 2001 From: Byron Hsu Date: Sat, 31 Aug 2024 22:54:34 -0700 Subject: [PATCH 57/88] [doc] fix quick start link (#1282) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2991b40632e..76aaa9d3762 100644 --- a/README.md +++ b/README.md @@ -380,7 +380,7 @@ print(state["answer_1"]) #### More Examples Anthropic and VertexAI (Gemini) models are also supported. -You can find more examples at [examples/quick_start](examples/quick_start). +You can find more examples at [examples/quick_start](examples/frontend_language/quick_start). ### Language Feature To begin with, import sglang. From d134c139a19cfec512d59f55656f12f5b421e14b Mon Sep 17 00:00:00 2001 From: xiaobochen <35516720+xiaobochen123@users.noreply.github.com> Date: Sat, 31 Aug 2024 23:40:28 -0700 Subject: [PATCH 58/88] Optimize the update flashinfer indices (#1262) --- .../srt/model_executor/forward_batch_info.py | 84 ++++++++++++++----- test/srt/test_create_kvindices.py | 76 +++++++++++++++++ 2 files changed, 137 insertions(+), 23 deletions(-) create mode 100644 test/srt/test_create_kvindices.py diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index 3d40c9d7558..a443b113d44 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -22,6 +22,8 @@ import numpy as np import torch +import triton +import triton.language as tl from sglang.srt.managers.schedule_batch import ScheduleBatch from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool @@ -262,6 +264,42 @@ def init_flashinfer_handlers( ) +@triton.jit +def create_flashinfer_kv_indices_triton( + req_to_token_ptr, # [max_batch, max_context_len] + req_pool_indices_ptr, + page_kernel_lens_ptr, + kv_indptr, + kv_start_idx, + max_context_len, + kv_indices_ptr, +): + BLOCK_SIZE: tl.constexpr = 512 + pid = tl.program_id(axis=0) + req_pool_index = tl.load(req_pool_indices_ptr + pid) + kv_indices_offset = tl.load(kv_indptr + pid) + + kv_start = 0 + kv_end = 0 + if kv_start_idx: + kv_start = tl.load(kv_start_idx + pid).to(tl.int32) + kv_end = kv_start + kv_end += tl.load(page_kernel_lens_ptr + pid).to(tl.int32) + + req_to_token_ptr += req_pool_index * max_context_len + kv_indices_ptr += kv_indices_offset + + ld_offset = kv_start + tl.arange(0, BLOCK_SIZE) + st_offset = tl.arange(0, BLOCK_SIZE) + num_loop = tl.cdiv(kv_end - kv_start, BLOCK_SIZE) + for _ in range(num_loop): + mask = ld_offset < kv_end + data = tl.load(req_to_token_ptr + ld_offset, mask=mask) + tl.store(kv_indices_ptr + st_offset, data, mask=mask) + ld_offset += BLOCK_SIZE + st_offset += BLOCK_SIZE + + def update_flashinfer_indices( forward_mode, model_runner, @@ -285,17 +323,18 @@ def update_flashinfer_indices( kv_indptr = torch.zeros((batch_size + 1,), dtype=torch.int32, device="cuda") kv_indptr[1:] = torch.cumsum(paged_kernel_lens, dim=0) - req_pool_indices_cpu = req_pool_indices.cpu().numpy() - paged_kernel_lens_cpu = paged_kernel_lens.cpu().numpy() - kv_indices = torch.cat( - [ - model_runner.req_to_token_pool.req_to_token[ - req_pool_indices_cpu[i], : paged_kernel_lens_cpu[i] - ] - for i in range(batch_size) - ], - dim=0, - ).contiguous() + + kv_indices = torch.empty(kv_indptr[-1], dtype=torch.int32, device="cuda") + create_flashinfer_kv_indices_triton[(batch_size,)]( + model_runner.req_to_token_pool.req_to_token, + req_pool_indices, + paged_kernel_lens, + kv_indptr, + None, + model_runner.req_to_token_pool.req_to_token.size(1), + kv_indices, + ) + kv_last_page_len = torch.ones((batch_size,), dtype=torch.int32, device="cuda") if forward_mode == ForwardMode.DECODE: @@ -365,18 +404,17 @@ def update_flashinfer_indices( kv_indptr = torch.zeros((batch_size + 1,), dtype=torch.int32, device="cuda") kv_indptr[1:] = torch.cumsum(paged_kernel_lens, dim=0) - req_pool_indices_cpu = req_pool_indices.cpu().numpy() - paged_kernel_lens_cpu = paged_kernel_lens.cpu().numpy() - kv_indices = torch.cat( - [ - model_runner.req_to_token_pool.req_to_token[ - req_pool_indices_cpu[i], - kv_start_idx[i] : kv_start_idx[i] + paged_kernel_lens_cpu[i], - ] - for i in range(batch_size) - ], - dim=0, - ).contiguous() + + kv_indices = torch.empty(kv_indptr[-1], dtype=torch.int32, device="cuda") + create_flashinfer_kv_indices_triton[(batch_size,)]( + model_runner.req_to_token_pool.req_to_token, + req_pool_indices, + paged_kernel_lens, + kv_indptr, + kv_start_idx, + model_runner.req_to_token_pool.req_to_token.size(1), + kv_indices, + ) if forward_mode == ForwardMode.DECODE: # CUDA graph uses different flashinfer_decode_wrapper diff --git a/test/srt/test_create_kvindices.py b/test/srt/test_create_kvindices.py new file mode 100644 index 00000000000..230302f264f --- /dev/null +++ b/test/srt/test_create_kvindices.py @@ -0,0 +1,76 @@ +import itertools +import unittest + +import numpy as np +import torch + +from sglang.srt.model_executor.forward_batch_info import ( + create_flashinfer_kv_indices_triton, +) + + +class TestCreateKvIndices(unittest.TestCase): + @classmethod + def setUpClass(cls): + if not torch.cuda.is_available(): + raise unittest.SkipTest("CUDA is not available") + torch.set_default_device("cuda") + + def _run_test(self, batch, max_batch, max_context_len): + req_to_token = torch.arange( + max_batch * max_context_len, dtype=torch.int32, device="cuda" + ).reshape((max_batch, max_context_len)) + req_pool_indices = torch.tensor( + torch.from_numpy( + np.random.choice(range(max_batch), size=batch, replace=False) + ), + dtype=torch.int32, + device="cuda", + ) + paged_kernel_lens = torch.tensor( + torch.from_numpy( + np.random.choice(range(max_context_len), size=batch, replace=False) + ), + dtype=torch.int32, + device="cuda", + ) + + kv_indptr = torch.zeros((batch + 1,), dtype=torch.int32, device="cuda") + kv_indptr[1:] = torch.cumsum(paged_kernel_lens, dim=0) + + # ref + req_pool_indices_cpu = req_pool_indices.cpu().numpy() + paged_kernel_lens_cpu = paged_kernel_lens.cpu().numpy() + kv_indices_ref = torch.cat( + [ + req_to_token[req_pool_indices_cpu[i], : paged_kernel_lens_cpu[i]] + for i in range(batch) + ], + dim=0, + ).contiguous() + + # triton + kv_indices_triton = torch.empty(kv_indptr[-1], dtype=torch.int32, device="cuda") + create_flashinfer_kv_indices_triton[(batch,)]( + req_to_token, + req_pool_indices, + paged_kernel_lens, + kv_indptr, + None, + req_to_token.size(1), + kv_indices_triton, + ) + + # Check + self.assertTrue(torch.equal(kv_indices_ref, kv_indices_triton)) + + def test_create_kvindices(self): + BATCH = [1, 37, 1786] + MAX_BATCH = 4096 + MAX_CONTEXT_LEN = 4096 + for batch in BATCH: + self._run_test(batch, MAX_BATCH, MAX_CONTEXT_LEN) + + +if __name__ == "__main__": + unittest.main() From 1b5d56f7f885cdc4284579ee863f9944f4c12bce Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 1 Sep 2024 00:27:25 -0700 Subject: [PATCH 59/88] [CI] Add more multi-gpu tests (#1280) --- .github/workflows/accuracy-test.yml | 33 ++++++++++- .github/workflows/cache-purge.yml | 27 --------- .github/workflows/e2e-test.yml | 44 +++++++++++++- .github/workflows/moe-test.yml | 45 --------------- .github/workflows/unit-test.yml | 10 ++-- python/sglang/bench_latency.py | 41 +++++++------ test/srt/test_moe_eval_accuracy_large.py | 73 ++++++++++++++++++++++++ test/srt/test_moe_serving_latency.py | 45 +++++++++++++++ test/srt/test_moe_serving_throughput.py | 19 +----- test/srt/test_serving_latency.py | 43 ++++++++++++++ test/srt/test_serving_throughput.py | 19 +----- 11 files changed, 271 insertions(+), 128 deletions(-) delete mode 100644 .github/workflows/cache-purge.yml delete mode 100644 .github/workflows/moe-test.yml create mode 100644 test/srt/test_moe_eval_accuracy_large.py create mode 100644 test/srt/test_moe_serving_latency.py create mode 100644 test/srt/test_serving_latency.py diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml index 6fb102a4c5a..b7118e217a0 100644 --- a/.github/workflows/accuracy-test.yml +++ b/.github/workflows/accuracy-test.yml @@ -18,7 +18,7 @@ concurrency: cancel-in-progress: true jobs: - accuracy-test: + one-gpu: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: 1-gpu-runner @@ -41,3 +41,34 @@ jobs: run: | cd test/srt python3 test_eval_accuracy_large.py + + two-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 2-gpu-runner + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + git clone https://github.com/merrymercy/human-eval.git + cd human-eval + pip install -e . + + - name: Evaluate Accuracy + timeout-minutes: 20 + run: | + cd test/srt + python3 test_moe_eval_accuracy_large.py + + finish: + needs: [one-gpu, two-gpu] + runs-on: ubuntu-latest + steps: + - name: Finish + run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/.github/workflows/cache-purge.yml b/.github/workflows/cache-purge.yml deleted file mode 100644 index c699f49885f..00000000000 --- a/.github/workflows/cache-purge.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Weekly Cache Purge - -on: - schedule: - - cron: '0 0 * * 0' # Every Sunday at 00:00 - workflow_dispatch: - -jobs: - purge-cache: - if: github.repository == 'sgl-project/sglang' - runs-on: self-hosted - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Purge pip cache - run: | - source $HOME/venv/bin/activate - echo "$HOME/venv/bin" >> $GITHUB_PATH - pip cache purge - - - name: Update dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 11c94775c15..c5594ac4aa9 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -18,7 +18,7 @@ concurrency: cancel-in-progress: true jobs: - e2e-test: + one-gpu: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: 1-gpu-runner @@ -41,7 +41,8 @@ jobs: - name: Benchmark Serving Latency timeout-minutes: 10 run: | - python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8 + cd test/srt + python3 -m unittest test_serving_latency.TestServingLatency.test_default - name: Benchmark Serving Throughput (w/o RadixAttention) timeout-minutes: 10 @@ -54,3 +55,42 @@ jobs: run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill + + two-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 2-gpu-runner + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Benchmark Serving Throughput (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default + + - name: Benchmark Serving Latency (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default + + - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache + + finish: + needs: [one-gpu, two-gpu] + runs-on: ubuntu-latest + steps: + - name: Finish + run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml deleted file mode 100644 index 4440aa215f0..00000000000 --- a/.github/workflows/moe-test.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: MoE Test - -on: - push: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - pull_request: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - workflow_dispatch: - -concurrency: - group: moe-test-${{ github.ref }} - cancel-in-progress: true - -jobs: - moe-test: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 2-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - - name: Benchmark MoE Serving Throughput - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default - - - name: Benchmark MoE Serving Throughput (w/o RadixAttention) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 41a565a6382..5d774b67ea5 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -18,7 +18,7 @@ concurrency: cancel-in-progress: true jobs: - unit-test-jobs: + run-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: 1-gpu-runner strategy: @@ -48,9 +48,9 @@ jobs: python3 run_suite.py --suite minimal --range-begin 8 fi - unit-test: - needs: unit-test-jobs + finish: + needs: [run-test] runs-on: ubuntu-latest steps: - - name: Merge step - run: echo "This is an empty merge step" \ No newline at end of file + - name: Finish + run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/python/sglang/bench_latency.py b/python/sglang/bench_latency.py index 966a97d20e9..9006b7150aa 100644 --- a/python/sglang/bench_latency.py +++ b/python/sglang/bench_latency.py @@ -11,26 +11,34 @@ ## plot the results in series of lines: python -m sglang.bench_latency --result-filename out.jsonl --graph-sql="select run_name, batch_size, prefill_throughput from results" - # Usage (correctness test): python -m sglang.bench_latency --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 --correct ## Reference output (of the correctness test above, can be gpu dependent): -prefill logits (first half) tensor([[-10.0312, -9.5000, 0.8936, ..., -4.9414, -3.2402, -3.3633], - [-10.0312, -9.5000, 0.8936, ..., -4.9414, -3.2402, -3.3633], - [ -9.1875, -10.2500, 2.7109, ..., -4.3359, -4.0664, -4.1328]], - device='cuda:0', dtype=torch.float16) -prefill logits (final) tensor([[-8.3203, -7.1211, 3.3379, ..., -4.9570, -4.1328, -3.4141], - [-8.9062, -9.0156, 4.1445, ..., -4.9922, -4.4961, -4.0742], - [-9.6328, -9.0547, 4.0117, ..., -5.3047, -4.7148, -4.4609]], - device='cuda:0', dtype=torch.float16) - The capital of France is. +input_ids=[[1, 450, 7483, 310, 3444, 338], [1, 450, 7483, 310, 278, 3303, 13187, 290, 338], [1, 20628, 338, 263, 6575, 1460, 2462, 322, 306, 763]] + +prefill logits (first half): tensor([[-10.0312, -9.5000, 0.8931, ..., -4.9414, -3.2422, -3.3633], + [-10.0312, -9.5000, 0.8931, ..., -4.9414, -3.2422, -3.3633], + [ -9.1875, -10.2500, 2.7129, ..., -4.3359, -4.0664, -4.1328]], + device='cuda:0') + +prefill logits (final): tensor([[-8.3125, -7.1172, 3.3457, ..., -4.9570, -4.1328, -3.4141], + [-8.9141, -9.0156, 4.1445, ..., -4.9922, -4.4961, -4.0781], + [-9.6328, -9.0547, 4.0195, ..., -5.3047, -4.7148, -4.4570]], + device='cuda:0') + +========== Prompt 0 ========== + The capital of France is Paris. The capital of the United States is Washington, D.C. - The capital of the United Kindom is. + +========== Prompt 1 ========== + The capital of the United Kindom is London. The capital of the United Kingdom is London. The capital of the - Today is a sunny day and I like go for a walk in the park. + +========== Prompt 2 ========== + Today is a sunny day and I like to go for a walk in the park. I'm going to the park """ @@ -225,12 +233,12 @@ def correctness_test( # Prepare inputs input_ids, reqs = prepare_inputs_for_correctness_test(bench_args, tokenizer) - rank_print(f"{input_ids=}") + rank_print(f"\n{input_ids=}\n") if bench_args.cut_len > 0: # Prefill next_token_ids, next_token_logits, batch = extend(reqs, model_runner) - rank_print("prefill logits (first half)", next_token_logits) + rank_print(f"prefill logits (first half): {next_token_logits} \n") # Prepare extend inputs reqs = prepare_extend_inputs_for_correctness_test( @@ -239,7 +247,7 @@ def correctness_test( # Extend next_token_ids, next_token_logits, batch = extend(reqs, model_runner) - rank_print("prefill logits (final)", next_token_logits) + rank_print(f"prefill logits (final): {next_token_logits} \n") # Decode output_ids = [input_ids[i] + [next_token_ids[i]] for i in range(len(input_ids))] @@ -250,7 +258,8 @@ def correctness_test( # Print for i in range(len(reqs)): - rank_print(tokenizer.decode(output_ids[i])) + rank_print(f"========== Prompt {i} ==========") + rank_print(tokenizer.decode(output_ids[i]), "\n") @torch.inference_mode() diff --git a/test/srt/test_moe_eval_accuracy_large.py b/test/srt/test_moe_eval_accuracy_large.py new file mode 100644 index 00000000000..d13f427d822 --- /dev/null +++ b/test/srt/test_moe_eval_accuracy_large.py @@ -0,0 +1,73 @@ +import unittest +from types import SimpleNamespace + +from sglang.srt.utils import kill_child_process +from sglang.test.run_eval import run_eval +from sglang.test.test_utils import ( + DEFAULT_MOE_MODEL_NAME_FOR_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +class TestEvalAccuracyLarge(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.model = DEFAULT_MOE_MODEL_NAME_FOR_TEST + cls.base_url = DEFAULT_URL_FOR_TEST + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + "--log-level-http", + "warning", + "--tp", + "2", + ], + ) + + @classmethod + def tearDownClass(cls): + kill_child_process(cls.process.pid) + + def test_mmlu(self): + args = SimpleNamespace( + base_url=self.base_url, + model=self.model, + eval_name="mmlu", + num_examples=3000, + num_threads=1024, + ) + + metrics = run_eval(args) + assert metrics["score"] >= 0.63, f"{metrics}" + + def test_human_eval(self): + args = SimpleNamespace( + base_url=self.base_url, + model=self.model, + eval_name="humaneval", + num_examples=None, + num_threads=1024, + ) + + metrics = run_eval(args) + assert metrics["score"] >= 0.43, f"{metrics}" + + def test_mgsm_en(self): + args = SimpleNamespace( + base_url=self.base_url, + model=self.model, + eval_name="mgsm_en", + num_examples=None, + num_threads=1024, + ) + + metrics = run_eval(args) + assert metrics["score"] >= 0.64, f"{metrics}" + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/test_moe_serving_latency.py b/test/srt/test_moe_serving_latency.py new file mode 100644 index 00000000000..9d521532316 --- /dev/null +++ b/test/srt/test_moe_serving_latency.py @@ -0,0 +1,45 @@ +import os +import subprocess +import unittest + +from sglang.srt.utils import kill_child_process +from sglang.test.test_utils import DEFAULT_MOE_MODEL_NAME_FOR_TEST + + +class TestServingLatency(unittest.TestCase): + def test_default(self): + command = [ + "python3", + "-m", + "sglang.bench_latency", + "--model", + DEFAULT_MOE_MODEL_NAME_FOR_TEST, + "--batch-size", + "1", + "--input", + "128", + "--output", + "8", + "--tp", + "2", + ] + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = process.communicate() + output = stdout.decode() + error = stderr.decode() + print(f"Output: {output}") + print(f"Error: {error}") + + lastline = output.split("\n")[-3] + value = float(lastline.split(" ")[-2]) + + if os.getenv("SGLANG_IS_IN_CI", "false") == "true": + assert value > 125 + + kill_child_process(process.pid) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/test_moe_serving_throughput.py b/test/srt/test_moe_serving_throughput.py index 4f6e8db82c4..6f040da34b2 100644 --- a/test/srt/test_moe_serving_throughput.py +++ b/test/srt/test_moe_serving_throughput.py @@ -23,7 +23,6 @@ def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size other_args.append("--disable-flashinfer") other_args.extend(["--chunked-prefill-size", str(chunked_prefill_size)]) other_args.extend(["--tensor-parallel-size", "2"]) - other_args.append("--enable-p2p-check") model = DEFAULT_MOE_MODEL_NAME_FOR_TEST base_url = DEFAULT_URL_FOR_TEST @@ -35,7 +34,7 @@ def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size ) # Run benchmark - num_prompts = 200 + num_prompts = 300 args = SimpleNamespace( backend="sglang", base_url=base_url, @@ -76,8 +75,7 @@ def test_default(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE): 950, H100 (SMX): 1800 - assert res["output_throughput"] > 1750 + assert res["output_throughput"] > 1850 def test_default_without_radix_cache(self): res = self.run_test( @@ -87,18 +85,7 @@ def test_default_without_radix_cache(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE): 950, H100 (SMX): 1900 - assert res["output_throughput"] > 1850 - - def test_all_cases(self): - for disable_radix_cache in [False, True]: - for disable_flashinfer in [False, True]: - for chunked_prefill_size in [-1, 2048]: - self.run_test( - disable_radix_cache=False, - disable_flashinfer=False, - chunked_prefill_size=-1, - ) + assert res["output_throughput"] > 1950 if __name__ == "__main__": diff --git a/test/srt/test_serving_latency.py b/test/srt/test_serving_latency.py new file mode 100644 index 00000000000..e762892c8eb --- /dev/null +++ b/test/srt/test_serving_latency.py @@ -0,0 +1,43 @@ +import os +import subprocess +import unittest + +from sglang.srt.utils import kill_child_process +from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST + + +class TestServingLatency(unittest.TestCase): + def test_default(self): + command = [ + "python3", + "-m", + "sglang.bench_latency", + "--model", + DEFAULT_MODEL_NAME_FOR_TEST, + "--batch-size", + "1", + "--input", + "128", + "--output", + "8", + ] + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + stdout, stderr = process.communicate() + output = stdout.decode() + error = stderr.decode() + print(f"Output: {output}") + print(f"Error: {error}") + + lastline = output.split("\n")[-3] + value = float(lastline.split(" ")[-2]) + + if os.getenv("SGLANG_IS_IN_CI", "false") == "true": + assert value > 130 + + kill_child_process(process.pid) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/test_serving_throughput.py b/test/srt/test_serving_throughput.py index f1089a6a7b3..d4ed12612ac 100644 --- a/test/srt/test_serving_throughput.py +++ b/test/srt/test_serving_throughput.py @@ -33,7 +33,7 @@ def run_test(self, disable_radix_cache, disable_flashinfer, chunked_prefill_size ) # Run benchmark - num_prompts = 400 + num_prompts = 500 args = SimpleNamespace( backend="sglang", base_url=base_url, @@ -74,8 +74,7 @@ def test_default(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE): 1450, H100 (SMX): 2550 - assert res["output_throughput"] > 2500 + assert res["output_throughput"] > 2400 def test_default_without_radix_cache(self): res = self.run_test( @@ -85,7 +84,6 @@ def test_default_without_radix_cache(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE): 1500, H100 (SMX): 2850 assert res["output_throughput"] > 2800 def test_default_without_chunked_prefill(self): @@ -96,18 +94,7 @@ def test_default_without_chunked_prefill(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - # A100 (PCIE): 1450, H100 (SMX): 2550 - assert res["output_throughput"] > 2500 - - def test_all_cases(self): - for disable_radix_cache in [False, True]: - for disable_flashinfer in [False, True]: - for chunked_prefill_size in [-1, 2048]: - self.run_test( - disable_radix_cache=False, - disable_flashinfer=False, - chunked_prefill_size=-1, - ) + assert res["output_throughput"] > 2400 if __name__ == "__main__": From 54772f784adb9c9774c359c23661cfb0a3bbac17 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sun, 1 Sep 2024 17:28:06 +1000 Subject: [PATCH 60/88] feat: fix fp8 for MLA and support bmm fp8 for DeepSeek V2 (#1285) Co-authored-by: ispobock --- python/sglang/srt/models/deepseek_v2.py | 60 +++++++++++++++++-------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 2f06a5534fa..bb80e2da2f5 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -19,6 +19,7 @@ from typing import Any, Dict, Iterable, Optional, Tuple import torch +from flashinfer import bmm_fp8 from torch import nn from transformers import PretrainedConfig from vllm.config import CacheConfig @@ -161,6 +162,15 @@ def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float: return 0.1 * mscale * math.log(scale) + 1.0 +def input_to_float8(x, dtype=torch.float8_e4m3fn): + finfo = torch.finfo(dtype) + min_val, max_val = x.aminmax() + amax = torch.maximum(min_val.abs(), max_val.abs()).clamp(min=1e-12) + scale = finfo.max / amax + x_scl_sat = (x * scale).clamp(min=finfo.min, max=finfo.max) + return x_scl_sat.to(dtype).contiguous(), scale.float().reciprocal() + + class DeepseekV2Attention(nn.Module): def __init__( @@ -255,11 +265,6 @@ def __init__( mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim)) self.scaling = self.scaling * mscale * mscale - # self.attn = Attention(self.num_heads, - # self.qk_head_dim, - # self.scaling, - # num_kv_heads=self.num_heads) - # TODO, support head_size 192 self.attn = RadixAttention( self.num_local_heads, @@ -283,7 +288,7 @@ def forward( q = self.q_proj(hidden_states)[0].view( -1, self.num_local_heads, self.qk_head_dim ) - q_nope, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) + _, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) latent_cache = self.kv_a_proj_with_mqa(hidden_states)[0] kv_a, _ = latent_cache.split([self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) latent_cache = latent_cache.unsqueeze(1) @@ -419,6 +424,7 @@ def __init__( self.w_kc = None self.w_vc = None + self.w_scale = None def forward( self, @@ -439,8 +445,17 @@ def forward( -1, self.num_local_heads, self.qk_head_dim ) q_nope, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) - q_nope_out = q_input[..., : self.kv_lora_rank] - torch.bmm(q_nope.transpose(0, 1), self.w_kc, out=q_nope_out.transpose(0, 1)) + + if self.w_kc.dtype == torch.float8_e4m3fn: + q_nope_val, q_nope_scale = input_to_float8( + q_nope.transpose(0, 1), torch.float8_e4m3fn + ) + q_nope_out = bmm_fp8( + q_nope_val, self.w_kc, q_nope_scale, self.w_scale, torch.bfloat16 + ) + else: + q_nope_out = torch.bmm(q_nope.transpose(0, 1), self.w_kc) + q_input[..., : self.kv_lora_rank] = q_nope_out.transpose(0, 1) latent_cache = self.kv_a_proj_with_mqa(hidden_states)[0] v_input = latent_cache[..., : self.kv_lora_rank] @@ -455,16 +470,21 @@ def forward( attn_output = self.attn(q_input, k_input, v_input, input_metadata) attn_output = attn_output.view(-1, self.num_local_heads, self.kv_lora_rank) - attn_bmm_output = attn_output.new_empty( - q_len, self.num_local_heads, self.v_head_dim - ) - torch.bmm( - attn_output.transpose(0, 1), - self.w_vc, - out=attn_bmm_output.transpose(0, 1), - ) - attn_output = attn_bmm_output.flatten(1, 2) + if self.w_vc.dtype == torch.float8_e4m3fn: + attn_output_val, attn_output_scale = input_to_float8( + attn_output.transpose(0, 1), torch.float8_e4m3fn + ) + attn_bmm_output = bmm_fp8( + attn_output_val, + self.w_vc, + attn_output_scale, + self.w_scale, + torch.bfloat16, + ) + else: + attn_bmm_output = torch.bmm(attn_output.transpose(0, 1), self.w_vc) + attn_output = attn_bmm_output.transpose(0, 1).flatten(1, 2) output, _ = self.o_proj(attn_output) return output @@ -717,8 +737,10 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): w_kc, w_vc = self_attn.kv_b_proj.weight.unflatten( 0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim) ).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1) - self_attn.w_kc = w_kc.contiguous() - self_attn.w_vc = w_vc.transpose(1, 2).contiguous() + self_attn.w_kc = w_kc.transpose(1, 2).contiguous().transpose(1, 2) + self_attn.w_vc = w_vc.contiguous().transpose(1, 2) + if hasattr(self_attn.kv_b_proj, "weight_scale"): + self_attn.w_scale = self_attn.kv_b_proj.weight_scale del self_attn.kv_b_proj From 761b2cebd65ff7fbf2cd55b63e1230df1bf6f6ca Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 1 Sep 2024 02:36:56 -0700 Subject: [PATCH 61/88] [CI] merge all ci tests into one file (#1289) --- .github/workflows/accuracy-test.yml | 74 --------- .github/workflows/e2e-test.yml | 96 ----------- .github/workflows/lint.yml | 11 +- .github/workflows/pr-test.yml | 201 ++++++++++++++++++++++++ .github/workflows/unit-test.yml | 56 ------- python/sglang/README.md | 4 +- test/srt/test_moe_serving_throughput.py | 2 +- 7 files changed, 211 insertions(+), 233 deletions(-) delete mode 100644 .github/workflows/accuracy-test.yml delete mode 100644 .github/workflows/e2e-test.yml create mode 100644 .github/workflows/pr-test.yml delete mode 100644 .github/workflows/unit-test.yml diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml deleted file mode 100644 index b7118e217a0..00000000000 --- a/.github/workflows/accuracy-test.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: Accuracy Test - -on: - push: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - pull_request: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - workflow_dispatch: - -concurrency: - group: accuracy-test-${{ github.ref }} - cancel-in-progress: true - -jobs: - one-gpu: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 1-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - git clone https://github.com/merrymercy/human-eval.git - cd human-eval - pip install -e . - - - name: Evaluate Accuracy - timeout-minutes: 20 - run: | - cd test/srt - python3 test_eval_accuracy_large.py - - two-gpu: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 2-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - git clone https://github.com/merrymercy/human-eval.git - cd human-eval - pip install -e . - - - name: Evaluate Accuracy - timeout-minutes: 20 - run: | - cd test/srt - python3 test_moe_eval_accuracy_large.py - - finish: - needs: [one-gpu, two-gpu] - runs-on: ubuntu-latest - steps: - - name: Finish - run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml deleted file mode 100644 index c5594ac4aa9..00000000000 --- a/.github/workflows/e2e-test.yml +++ /dev/null @@ -1,96 +0,0 @@ -name: E2E Test - -on: - push: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - pull_request: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - workflow_dispatch: - -concurrency: - group: e2e-test-${{ github.ref }} - cancel-in-progress: true - -jobs: - one-gpu: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 1-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - - name: Benchmark Serving Throughput - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default - - - name: Benchmark Serving Latency - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_latency.TestServingLatency.test_default - - - name: Benchmark Serving Throughput (w/o RadixAttention) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache - - - name: Benchmark Serving Throughput (w/o ChunkedPrefill) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill - - two-gpu: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 2-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - - name: Benchmark Serving Throughput (TP=2) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default - - - name: Benchmark Serving Latency (TP=2) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default - - - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache - - finish: - needs: [one-gpu, two-gpu] - runs-on: ubuntu-latest - steps: - - name: Finish - run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 07614050640..4857f844f27 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,19 +1,22 @@ name: Lint -on: [push, pull_request] +on: [pull_request] jobs: lint: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up Python 3.8 + + - name: Set up Python 3.9 uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.9 + - name: Install pre-commit hook run: | python -m pip install pre-commit pre-commit install + - name: Linting run: pre-commit run --all-files diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml new file mode 100644 index 00000000000..f8b50ad5dad --- /dev/null +++ b/.github/workflows/pr-test.yml @@ -0,0 +1,201 @@ +name: Pull Request Test + +on: + push: + branches: [ main ] + paths: + - "python/sglang/**" + - "test/**" + pull_request: + branches: [ main ] + paths: + - "python/sglang/**" + - "test/**" + workflow_dispatch: + +concurrency: + group: pr-test-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-test-frontend: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[dev]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Run test + timeout-minutes: 20 + run: | + cd test/lang + python3 run_suite.py --suite minimal + + unit-test-backend-part-0: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[dev]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite minimal --range-begin 0 --range-end 8 + + unit-test-backend-part-1: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[dev]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite minimal --range-begin 8 + + performance-test-1-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Benchmark Serving Throughput + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default + + - name: Benchmark Serving Latency + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_latency.TestServingLatency.test_default + + - name: Benchmark Serving Throughput (w/o RadixAttention) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache + + - name: Benchmark Serving Throughput (w/o ChunkedPrefill) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill + + performance-test-2-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 2-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Benchmark Serving Throughput (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default + + - name: Benchmark Serving Latency (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default + + - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache + + accuracy-test-1-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + git clone https://github.com/merrymercy/human-eval.git + cd human-eval + pip install -e . + + - name: Evaluate Accuracy + timeout-minutes: 20 + run: | + cd test/srt + python3 test_eval_accuracy_large.py + + accuracy-test-2-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 2-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + git clone https://github.com/merrymercy/human-eval.git + cd human-eval + pip install -e . + + - name: Evaluate Accuracy + timeout-minutes: 20 + run: | + cd test/srt + python3 test_moe_eval_accuracy_large.py + + finish: + needs: [ + unit-test-frontend, unit-test-backend-part-0, unit-test-backend-part-1, + performance-test-1-gpu, performance-test-2-gpu, + accuracy-test-1-gpu, accuracy-test-2-gpu + ] + runs-on: ubuntu-latest + steps: + - name: Finish + run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml deleted file mode 100644 index 5d774b67ea5..00000000000 --- a/.github/workflows/unit-test.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Unit Test - -on: - push: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - pull_request: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - workflow_dispatch: - -concurrency: - group: unit-test-${{ github.ref }} - cancel-in-progress: true - -jobs: - run-test: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 1-gpu-runner - strategy: - matrix: - test_type: ['backend-0', 'backend-1', 'frontend'] - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[dev]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - - name: Run test - timeout-minutes: 20 - run: | - if [ "${{ matrix.test_type }}" = "frontend" ]; then - cd test/lang - python3 run_suite.py --suite minimal - elif [ "${{ matrix.test_type }}" = "backend-0" ]; then - cd test/srt - python3 run_suite.py --suite minimal --range-begin 0 --range-end 8 - elif [ "${{ matrix.test_type }}" = "backend-1" ]; then - cd test/srt - python3 run_suite.py --suite minimal --range-begin 8 - fi - - finish: - needs: [run-test] - runs-on: ubuntu-latest - steps: - - name: Finish - run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/python/sglang/README.md b/python/sglang/README.md index c92144254a2..481c69affec 100644 --- a/python/sglang/README.md +++ b/python/sglang/README.md @@ -2,8 +2,8 @@ - `lang`: The frontend language. - `srt`: The backend engine for running local models. (SRT = SGLang Runtime). -- `test`: Test utilities. -- `api.py`: Public API. +- `test`: The test utilities. +- `api.py`: The public APIs. - `bench_latency.py`: Benchmark a single static batch. - `bench_serving.py`: Benchmark online serving with dynamic requests. - `global_config.py`: The global configs and constants. diff --git a/test/srt/test_moe_serving_throughput.py b/test/srt/test_moe_serving_throughput.py index 6f040da34b2..2acf626c1c4 100644 --- a/test/srt/test_moe_serving_throughput.py +++ b/test/srt/test_moe_serving_throughput.py @@ -75,7 +75,7 @@ def test_default(self): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - assert res["output_throughput"] > 1850 + assert res["output_throughput"] > 1800 def test_default_without_radix_cache(self): res = self.run_test( From 6cb32ef92c99ee7c1192ff90023692adc106049c Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Sun, 1 Sep 2024 17:46:40 +0800 Subject: [PATCH 62/88] Support Triton fp8 e5m2 kv cache (#1286) Co-authored-by: Yineng Zhang --- python/sglang/srt/layers/extend_attention.py | 16 ++++++++++++---- python/sglang/srt/model_executor/model_runner.py | 8 +------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/python/sglang/srt/layers/extend_attention.py b/python/sglang/srt/layers/extend_attention.py index 097adca3ca0..31a002f4398 100644 --- a/python/sglang/srt/layers/extend_attention.py +++ b/python/sglang/srt/layers/extend_attention.py @@ -128,7 +128,7 @@ def _fwd_kernel( k = tl.load(K_Buffer + offs_buf_k, mask=mask_n[None, :], other=0.0) qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32) - qk += tl.dot(q, k) + qk += tl.dot(q.to(k.dtype), k) if BLOCK_DPE > 0: offs_kpe = ( offs_kv_loc[None, :] * stride_buf_kbs @@ -140,7 +140,7 @@ def _fwd_kernel( mask=mask_n[None, :], other=0.0, ) - qk += tl.dot(qpe, kpe) + qk += tl.dot(qpe.to(kpe.dtype), kpe) qk *= sm_scale if logit_cap > 0: @@ -276,9 +276,17 @@ def extend_attention_fwd( BLOCK_DV = Lv if CUDA_CAPABILITY[0] >= 9: - BLOCK_M, BLOCK_N = (128, 64) + if Lq <= 256: + BLOCK_M, BLOCK_N = (128, 64) + else: + BLOCK_M, BLOCK_N = (32, 64) elif CUDA_CAPABILITY[0] >= 8: - BLOCK_M, BLOCK_N = (128, 128) if Lq <= 128 else (64, 64) + if Lq <= 128: + BLOCK_M, BLOCK_N = (128, 128) + elif Lq <= 256: + BLOCK_M, BLOCK_N = (64, 64) + else: + BLOCK_M, BLOCK_N = (32, 64) else: BLOCK_M, BLOCK_N = (64, 64) if Lq <= 128 else (32, 32) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index e6f5e743110..cee269dc781 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -348,13 +348,7 @@ def init_memory_pool( if self.server_args.kv_cache_dtype == "auto": self.kv_cache_dtype = self.dtype elif self.server_args.kv_cache_dtype == "fp8_e5m2": - if self.server_args.disable_flashinfer or self.server_args.enable_mla: - logger.warning( - "FP8 KV cache is not supported for Triton kernel now, using auto kv cache dtype" - ) - self.kv_cache_dtype = self.dtype - else: - self.kv_cache_dtype = torch.float8_e5m2 + self.kv_cache_dtype = torch.float8_e5m2 else: raise ValueError( f"Unsupported kv_cache_dtype: {self.server_args.kv_cache_dtype}." From 00b19f198f198bd2f7182596773d80f5217ab757 Mon Sep 17 00:00:00 2001 From: Byron Hsu Date: Sun, 1 Sep 2024 03:12:06 -0700 Subject: [PATCH 63/88] [triton] Remove the zero initialization of qk_acc by directly writing the result (#1288) --- python/sglang/srt/layers/extend_attention.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/layers/extend_attention.py b/python/sglang/srt/layers/extend_attention.py index 31a002f4398..6c7686971e0 100644 --- a/python/sglang/srt/layers/extend_attention.py +++ b/python/sglang/srt/layers/extend_attention.py @@ -127,8 +127,7 @@ def _fwd_kernel( ) k = tl.load(K_Buffer + offs_buf_k, mask=mask_n[None, :], other=0.0) - qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32) - qk += tl.dot(q.to(k.dtype), k) + qk = tl.dot(q.to(k.dtype), k) if BLOCK_DPE > 0: offs_kpe = ( offs_kv_loc[None, :] * stride_buf_kbs @@ -179,9 +178,7 @@ def _fwd_kernel( ) k = tl.load(K_Extend + offs_k, mask=mask_n[None, :], other=0.0) - qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32) - qk += tl.dot(q, k) - + qk = tl.dot(q, k, out_dtype=tl.float32) if BLOCK_DPE > 0: offs_kpe = ( (cur_seq_extend_start_contiguous + start_n + offs_n[None, :]) From 0836055324af92cae74a13bdb137fb754b6c8aae Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Sun, 1 Sep 2024 03:14:56 -0700 Subject: [PATCH 64/88] [Chore] Rename model_overide_args to model_override_args (#1284) Signed-off-by: Kai-Hsun Chen Co-authored-by: Yineng Zhang --- .../usage/llava_video/srt_example_llava_v.py | 20 ++++++++-------- python/sglang/launch_server_llavavid.py | 24 +++++++++---------- python/sglang/srt/hf_transformers_utils.py | 6 ++--- .../sglang/srt/managers/controller_multi.py | 10 ++++---- .../sglang/srt/managers/controller_single.py | 10 ++++---- .../sglang/srt/managers/tokenizer_manager.py | 4 ++-- python/sglang/srt/managers/tp_worker.py | 12 +++++----- python/sglang/srt/model_config.py | 6 ++--- .../sglang/srt/model_executor/model_runner.py | 4 ++-- python/sglang/srt/server.py | 12 +++++----- 10 files changed, 54 insertions(+), 54 deletions(-) diff --git a/examples/frontend_language/usage/llava_video/srt_example_llava_v.py b/examples/frontend_language/usage/llava_video/srt_example_llava_v.py index 1f2931a5a4f..02bab342ac5 100644 --- a/examples/frontend_language/usage/llava_video/srt_example_llava_v.py +++ b/examples/frontend_language/usage/llava_video/srt_example_llava_v.py @@ -197,19 +197,19 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size= print("Invalid model path. Please specify a valid model path.") exit() - model_overide_args = {} - model_overide_args["mm_spatial_pool_stride"] = args.mm_spatial_pool_stride - model_overide_args["architectures"] = ["LlavaVidForCausalLM"] - model_overide_args["num_frames"] = args.num_frames - model_overide_args["model_type"] = "llava" + model_override_args = {} + model_override_args["mm_spatial_pool_stride"] = args.mm_spatial_pool_stride + model_override_args["architectures"] = ["LlavaVidForCausalLM"] + model_override_args["num_frames"] = args.num_frames + model_override_args["model_type"] = "llava" if "34b" in args.model_path.lower(): - model_overide_args["image_token_index"] = 64002 + model_override_args["image_token_index"] = 64002 if args.num_frames == 32: - model_overide_args["rope_scaling"] = {"factor": 2.0, "type": "linear"} - model_overide_args["max_sequence_length"] = 4096 * 2 - model_overide_args["tokenizer_model_max_length"] = 4096 * 2 + model_override_args["rope_scaling"] = {"factor": 2.0, "type": "linear"} + model_override_args["max_sequence_length"] = 4096 * 2 + model_override_args["tokenizer_model_max_length"] = 4096 * 2 elif args.num_frames < 32: pass else: @@ -223,7 +223,7 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size= tokenizer_path=tokenizer_path, port=cur_port, additional_ports=[cur_port + 1, cur_port + 2, cur_port + 3, cur_port + 4], - model_overide_args=model_overide_args, + model_override_args=model_override_args, tp_size=1, ) sgl.set_default_backend(runtime) diff --git a/python/sglang/launch_server_llavavid.py b/python/sglang/launch_server_llavavid.py index 797ad07a47f..43eefef4efa 100644 --- a/python/sglang/launch_server_llavavid.py +++ b/python/sglang/launch_server_llavavid.py @@ -10,17 +10,17 @@ args = parser.parse_args() server_args = ServerArgs.from_cli_args(args) - model_overide_args = {} - model_overide_args["mm_spatial_pool_stride"] = 2 - model_overide_args["architectures"] = ["LlavaVidForCausalLM"] - model_overide_args["num_frames"] = 16 - model_overide_args["model_type"] = "llavavid" - if model_overide_args["num_frames"] == 32: - model_overide_args["rope_scaling"] = {"factor": 2.0, "type": "linear"} - model_overide_args["max_sequence_length"] = 4096 * 2 - model_overide_args["tokenizer_model_max_length"] = 4096 * 2 - model_overide_args["model_max_length"] = 4096 * 2 + model_override_args = {} + model_override_args["mm_spatial_pool_stride"] = 2 + model_override_args["architectures"] = ["LlavaVidForCausalLM"] + model_override_args["num_frames"] = 16 + model_override_args["model_type"] = "llavavid" + if model_override_args["num_frames"] == 32: + model_override_args["rope_scaling"] = {"factor": 2.0, "type": "linear"} + model_override_args["max_sequence_length"] = 4096 * 2 + model_override_args["tokenizer_model_max_length"] = 4096 * 2 + model_override_args["model_max_length"] = 4096 * 2 if "34b" in args.model_path.lower(): - model_overide_args["image_token_index"] = 64002 + model_override_args["image_token_index"] = 64002 - launch_server(server_args, model_overide_args, None) + launch_server(server_args, model_override_args, None) diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py index b22c61020ef..bfdeebdc975 100644 --- a/python/sglang/srt/hf_transformers_utils.py +++ b/python/sglang/srt/hf_transformers_utils.py @@ -62,7 +62,7 @@ def get_config( model: str, trust_remote_code: bool, revision: Optional[str] = None, - model_overide_args: Optional[dict] = None, + model_override_args: Optional[dict] = None, ): config = AutoConfig.from_pretrained( model, trust_remote_code=trust_remote_code, revision=revision @@ -70,8 +70,8 @@ def get_config( if config.model_type in _CONFIG_REGISTRY: config_class = _CONFIG_REGISTRY[config.model_type] config = config_class.from_pretrained(model, revision=revision) - if model_overide_args: - config.update(model_overide_args) + if model_override_args: + config.update(model_override_args) return config diff --git a/python/sglang/srt/managers/controller_multi.py b/python/sglang/srt/managers/controller_multi.py index d2b10e7fa29..ba626d4cffc 100644 --- a/python/sglang/srt/managers/controller_multi.py +++ b/python/sglang/srt/managers/controller_multi.py @@ -71,12 +71,12 @@ def __init__( self, server_args: ServerArgs, port_args: PortArgs, - model_overide_args, + model_override_args, ): # Parse args self.server_args = server_args self.port_args = port_args - self.model_overide_args = model_overide_args + self.model_override_args = model_override_args self.load_balance_method = LoadBalanceMethod.from_str( server_args.load_balance_method ) @@ -114,7 +114,7 @@ def start_dp_worker(self, dp_worker_id: int): self.server_args, self.port_args, pipe_controller_writer, - self.model_overide_args, + self.model_override_args, True, gpu_ids, dp_worker_id, @@ -189,14 +189,14 @@ def start_controller_process( server_args: ServerArgs, port_args: PortArgs, pipe_writer, - model_overide_args: dict, + model_override_args: dict, ): """Start a controller process.""" configure_logger(server_args) try: - controller = ControllerMulti(server_args, port_args, model_overide_args) + controller = ControllerMulti(server_args, port_args, model_override_args) except Exception: pipe_writer.send(get_exception_traceback()) raise diff --git a/python/sglang/srt/managers/controller_single.py b/python/sglang/srt/managers/controller_single.py index 4a16a6f6e43..2ae37059c10 100644 --- a/python/sglang/srt/managers/controller_single.py +++ b/python/sglang/srt/managers/controller_single.py @@ -40,7 +40,7 @@ def __init__( self, server_args: ServerArgs, port_args: PortArgs, - model_overide_args: dict, + model_override_args: dict, gpu_ids: List[int], is_data_parallel_worker: bool, dp_worker_id: int, @@ -76,7 +76,7 @@ def __init__( tp_rank_range, server_args, port_args.nccl_ports[dp_worker_id], - model_overide_args, + model_override_args, ) # Launch tp rank 0 @@ -85,7 +85,7 @@ def __init__( 0, server_args, port_args.nccl_ports[dp_worker_id], - model_overide_args, + model_override_args, ) self.tp_cpu_group = self.tp_server.model_runner.tp_group.cpu_group @@ -126,7 +126,7 @@ def start_controller_process( server_args: ServerArgs, port_args: PortArgs, pipe_writer: multiprocessing.connection.Connection, - model_overide_args: dict, + model_override_args: dict, is_data_parallel_worker: bool = False, gpu_ids: List[int] = None, dp_worker_id: int = None, @@ -149,7 +149,7 @@ def start_controller_process( controller = ControllerSingle( server_args, port_args, - model_overide_args, + model_override_args, gpu_ids, is_data_parallel_worker, dp_worker_id, diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 5ad4152ea93..644670a2bac 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -77,7 +77,7 @@ def __init__( self, server_args: ServerArgs, port_args: PortArgs, - model_overide_args: dict = None, + model_override_args: dict = None, ): self.server_args = server_args @@ -95,7 +95,7 @@ def __init__( self.hf_config = get_config( self.model_path, trust_remote_code=server_args.trust_remote_code, - model_overide_args=model_overide_args, + model_override_args=model_override_args, ) self.is_generation = is_generation_model( self.hf_config.architectures, self.server_args.is_embedding diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 123b1f5d5dc..8fc03b85991 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -76,7 +76,7 @@ def __init__( tp_rank: int, server_args: ServerArgs, nccl_port: int, - model_overide_args: dict, + model_override_args: dict, ): suppress_other_loggers() @@ -93,7 +93,7 @@ def __init__( server_args.model_path, server_args.trust_remote_code, context_length=server_args.context_length, - model_overide_args=model_overide_args, + model_override_args=model_override_args, ) self.model_runner = ModelRunner( @@ -876,7 +876,7 @@ def run_tp_server( tp_rank: int, server_args: ServerArgs, nccl_port: int, - model_overide_args: dict, + model_override_args: dict, ): """Run a tensor parallel model server.""" configure_logger(server_args, prefix=f" TP{tp_rank}") @@ -887,7 +887,7 @@ def run_tp_server( tp_rank, server_args, nccl_port, - model_overide_args, + model_override_args, ) tp_cpu_group = model_server.model_runner.tp_group.cpu_group @@ -904,14 +904,14 @@ def launch_tp_servers( tp_rank_range: List[int], server_args: ServerArgs, nccl_port: int, - model_overide_args: dict, + model_override_args: dict, ): """Launch multiple tensor parallel servers.""" procs = [] for i in tp_rank_range: proc = multiprocessing.Process( target=run_tp_server, - args=(gpu_ids[i], i, server_args, nccl_port, model_overide_args), + args=(gpu_ids[i], i, server_args, nccl_port, model_override_args), ) proc.start() procs.append(proc) diff --git a/python/sglang/srt/model_config.py b/python/sglang/srt/model_config.py index ed496515cd3..edf89f6b977 100644 --- a/python/sglang/srt/model_config.py +++ b/python/sglang/srt/model_config.py @@ -33,17 +33,17 @@ def __init__( trust_remote_code: bool = True, revision: Optional[str] = None, context_length: Optional[int] = None, - model_overide_args: Optional[dict] = None, + model_override_args: Optional[dict] = None, ) -> None: self.path = path self.trust_remote_code = trust_remote_code self.revision = revision - self.model_overide_args = model_overide_args + self.model_override_args = model_override_args self.hf_config = get_config( self.path, trust_remote_code, revision, - model_overide_args=model_overide_args, + model_override_args=model_override_args, ) self.hf_text_config = get_hf_text_config(self.hf_config) if context_length is not None: diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index cee269dc781..05a7513656c 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -195,9 +195,9 @@ def load_model(self): monkey_patch_vllm_qvk_linear_loader() self.dtype = self.vllm_model_config.dtype - if self.model_config.model_overide_args is not None: + if self.model_config.model_override_args is not None: self.vllm_model_config.hf_config.update( - self.model_config.model_overide_args + self.model_config.model_override_args ) self.model = get_model( diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 5ba2a45e70c..feaf91dd390 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -272,7 +272,7 @@ async def retrieve_file_content(file_id: str): def launch_server( server_args: ServerArgs, - model_overide_args: Optional[dict] = None, + model_override_args: Optional[dict] = None, pipe_finish_writer: Optional[mp.connection.Connection] = None, ): """Launch an HTTP server.""" @@ -317,7 +317,7 @@ def launch_server( tp_rank_range, server_args, ports[3], - model_overide_args, + model_override_args, ) try: @@ -328,7 +328,7 @@ def launch_server( return # Launch processes - tokenizer_manager = TokenizerManager(server_args, port_args, model_overide_args) + tokenizer_manager = TokenizerManager(server_args, port_args, model_override_args) if server_args.chat_template: load_chat_template_for_openai_api(tokenizer_manager, server_args.chat_template) pipe_controller_reader, pipe_controller_writer = mp.Pipe(duplex=False) @@ -341,7 +341,7 @@ def launch_server( proc_controller = mp.Process( target=start_controller_process, - args=(server_args, port_args, pipe_controller_writer, model_overide_args), + args=(server_args, port_args, pipe_controller_writer, model_override_args), ) proc_controller.start() @@ -501,7 +501,7 @@ class Runtime: def __init__( self, log_level: str = "error", - model_overide_args: Optional[dict] = None, + model_override_args: Optional[dict] = None, *args, **kwargs, ): @@ -525,7 +525,7 @@ def __init__( proc = mp.Process( target=launch_server, - args=(self.server_args, model_overide_args, pipe_writer), + args=(self.server_args, model_override_args, pipe_writer), ) proc.start() pipe_writer.close() From 32a4141d5aaca699c9377dd0d5c689ac019f91b9 Mon Sep 17 00:00:00 2001 From: Enrique Shockwave <33002121+qeternity@users.noreply.github.com> Date: Sun, 1 Sep 2024 11:42:29 +0100 Subject: [PATCH 65/88] Allow new lines during JSON generation (#1277) --- python/sglang/srt/constrained/fsm_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/constrained/fsm_cache.py b/python/sglang/srt/constrained/fsm_cache.py index 6bc6ea6d265..57c49130622 100644 --- a/python/sglang/srt/constrained/fsm_cache.py +++ b/python/sglang/srt/constrained/fsm_cache.py @@ -79,7 +79,7 @@ def fset(self, value): def init_value(self, value): if self.json_schema_mode: - regex = build_regex_from_schema(value) + regex = build_regex_from_schema(value, whitespace_pattern=r"[\n\t ]*") return RegexGuide(regex, self.outlines_tokenizer), regex else: return RegexGuide(value, self.outlines_tokenizer) From 9b0805242eeaf81bc41f6920788eaa379b43488b Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Mon, 2 Sep 2024 00:29:06 +1000 Subject: [PATCH 66/88] fix: resolve fp8 for mixtral (#1290) --- python/sglang/srt/models/mixtral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py index ca38cb03bae..85f4576c46d 100644 --- a/python/sglang/srt/models/mixtral.py +++ b/python/sglang/srt/models/mixtral.py @@ -362,7 +362,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): weight_loader( param, loaded_weight, - weight_name, + name, shard_id=shard_id, expert_id=expert_id, ) From 6487ef64c659fe1b1c10743a37f6377f70044ecd Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Mon, 2 Sep 2024 03:19:49 +1000 Subject: [PATCH 67/88] ci: add nightly eval (#1291) --- .github/workflows/nightly-eval.yml | 66 ++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .github/workflows/nightly-eval.yml diff --git a/.github/workflows/nightly-eval.yml b/.github/workflows/nightly-eval.yml new file mode 100644 index 00000000000..c1a7e9c1795 --- /dev/null +++ b/.github/workflows/nightly-eval.yml @@ -0,0 +1,66 @@ +name: Nightly Evaluation + +on: + schedule: + - cron: '0 0 * * *' + push: + branches: + - main + paths: + - "python/sglang/version.py" + workflow_dispatch: + +concurrency: + group: nightly-eval-${{ github.ref }} + cancel-in-progress: true + +jobs: + meta-llama-31-8b-instruct: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[dev]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + git clone https://github.com/EleutherAI/lm-evaluation-harness + pushd lm-evaluation-harness + pip install -e . + pip install lm_eval[api] + popd + + - name: Run eval + timeout-minutes: 20 + run: | + python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --disable-radix-cache & + + echo "Waiting for server to start..." + for i in {1..120}; do + if curl -s http://127.0.0.1:30000/health; then + echo "Server is up!" + break + fi + if [ $i -eq 120 ]; then + echo "Server failed to start within 120 seconds" + exit 1 + fi + sleep 1 + done + + lm_eval --model local-completions --tasks gsm8k --model_args model=meta-llama/Meta-Llama-3.1-8B-Instruct,base_url=http://127.0.0.1:30000/v1/completions,num_concurrent=128,max_retries=3,tokenized_requests=False + + echo "Stopping server..." + kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}') + + finish: + needs: [ + meta-llama-31-8b-instruct + ] + runs-on: ubuntu-latest + steps: + - name: Finish + run: echo "This is an empty step to ensure that all jobs are completed." From 58fa6076223ab2438e840c1a4bb2e5508fd2c1f2 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 1 Sep 2024 12:20:46 -0700 Subject: [PATCH 68/88] Fix the flaky tests in test_moe_eval_accuracy_large.py (#1293) --- test/srt/test_moe_eval_accuracy_large.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/test_moe_eval_accuracy_large.py b/test/srt/test_moe_eval_accuracy_large.py index d13f427d822..1183cc4e7a6 100644 --- a/test/srt/test_moe_eval_accuracy_large.py +++ b/test/srt/test_moe_eval_accuracy_large.py @@ -54,7 +54,7 @@ def test_human_eval(self): ) metrics = run_eval(args) - assert metrics["score"] >= 0.43, f"{metrics}" + assert metrics["score"] >= 0.42, f"{metrics}" def test_mgsm_en(self): args = SimpleNamespace( From 4a9f8ea43bc23609f1fcce46e6efb2711b452fe5 Mon Sep 17 00:00:00 2001 From: Byron Hsu Date: Sun, 1 Sep 2024 14:46:36 -0700 Subject: [PATCH 69/88] [doc] Fix more broken links (#1294) --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 76aaa9d3762..aff7699c18f 100644 --- a/README.md +++ b/README.md @@ -393,7 +393,7 @@ You can implement your prompt flow in a function decorated by `sgl.function`. You can then invoke the function with `run` or `run_batch`. The system will manage the state, chat template, parallelism and batching for you. -The complete code for the examples below can be found at [readme_examples.py](examples/usage/readme_examples.py) +The complete code for the examples below can be found at [readme_examples.py](examples/frontend_language/usage/readme_examples.py) #### Control Flow You can use any Python code within the function body, including control flow, nested function calls, and external libraries. @@ -442,7 +442,7 @@ def image_qa(s, image_file, question): s += sgl.assistant(sgl.gen("answer", max_tokens=256) ``` -See also [srt_example_llava.py](examples/quick_start/srt_example_llava.py). +See also [srt_example_llava.py](examples/frontend_language/quick_start/local_example_llava_next.py). #### Constrained Decoding Use `regex` to specify a regular expression as a decoding constraint. @@ -486,7 +486,7 @@ def character_gen(s, name): s += sgl.gen("json_output", max_tokens=256, regex=character_regex) ``` -See also [json_decode.py](examples/usage/json_decode.py) for an additional example of specifying formats with Pydantic models. +See also [json_decode.py](examples/frontend_language/usage/json_decode.py) for an additional example of specifying formats with Pydantic models. #### Batching Use `run_batch` to run a batch of requests with continuous batching. From 47f20da223c62473577231cec49dedb86c56220f Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Sun, 1 Sep 2024 21:50:58 -0700 Subject: [PATCH 70/88] Fix regex mask (#1296) --- python/sglang/srt/layers/sampler.py | 2 +- python/sglang/srt/sampling/sampling_batch_info.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py index 6cb7d0a7c11..f56fee8289d 100644 --- a/python/sglang/srt/layers/sampler.py +++ b/python/sglang/srt/layers/sampler.py @@ -63,7 +63,7 @@ def _get_probs( logits.add_(sampling_info.logit_bias) if sampling_info.vocab_mask is not None: - logits = logits.masked_fill(~sampling_info.vocab_mask, float("-inf")) + logits = logits.masked_fill(sampling_info.vocab_mask, float("-inf")) logits = self._apply_penalties(logits, sampling_info) diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py index 7843f4bd32d..38b6701c705 100644 --- a/python/sglang/srt/sampling/sampling_batch_info.py +++ b/python/sglang/srt/sampling/sampling_batch_info.py @@ -154,15 +154,15 @@ def update_regex_vocab_mask(self, batch: ScheduleBatch): self.vocab_mask = None if has_regex: + self.vocab_mask = torch.zeros( + bs, self.vocab_size, dtype=torch.bool, device=device + ) for i, req in enumerate(reqs): if req.regex_fsm is not None: - if self.vocab_mask is None: - self.vocab_mask = torch.zeros( - bs, self.vocab_size, dtype=torch.bool, device=device - ) + self.vocab_mask[i].fill_(1) self.vocab_mask[i][ req.regex_fsm.get_next_instruction(req.regex_fsm_state).tokens - ] = 1 + ] = 0 def filter(self, unfinished_indices: List[int], new_indices: torch.Tensor): self.penalizer_orchestrator.filter(unfinished_indices, new_indices) From 6def9b018c6b4c87410e870f0a5d0469ba50d637 Mon Sep 17 00:00:00 2001 From: Max Shawabkeh Date: Sun, 1 Sep 2024 21:56:33 -0700 Subject: [PATCH 71/88] Fix hang when doing s += None. (#1297) Co-authored-by: max99x --- python/sglang/lang/interpreter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py index 91f48456aac..2f8ea7e78bb 100644 --- a/python/sglang/lang/interpreter.py +++ b/python/sglang/lang/interpreter.py @@ -855,6 +855,8 @@ def get_meta_info(self, name): return self.stream_executor.get_meta_info(name) def __iadd__(self, other): + if other is None: + raise ValueError("Tried to append None to state.") self.stream_executor.submit(other) return self From 9999442756cf34a298933d2e072bd07493346d52 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 1 Sep 2024 22:22:38 -0700 Subject: [PATCH 72/88] Release v0.2.15 (#1295) --- README.md | 2 +- python/pyproject.toml | 2 +- python/sglang/version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index aff7699c18f..0f1cf3838b9 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ ### Method 2: From source ``` # Use the last release branch -git clone -b v0.2.14.post2 https://github.com/sgl-project/sglang.git +git clone -b v0.2.15 https://github.com/sgl-project/sglang.git cd sglang pip install --upgrade pip diff --git a/python/pyproject.toml b/python/pyproject.toml index 87c99bffaea..e46f9f4e1c7 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sglang" -version = "0.2.14.post2" +version = "0.2.15" description = "SGLang is yet another fast serving framework for large language models and vision language models." readme = "README.md" requires-python = ">=3.8" diff --git a/python/sglang/version.py b/python/sglang/version.py index ad954de503b..ddc77a88056 100644 --- a/python/sglang/version.py +++ b/python/sglang/version.py @@ -1 +1 @@ -__version__ = "0.2.14.post2" +__version__ = "0.2.15" From 2561ed012ce10e109ac888f7e9e7ffe44ccb4a94 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Tue, 3 Sep 2024 01:18:41 +1000 Subject: [PATCH 73/88] feat: update nightly gsm8k eval (#1304) --- .github/workflows/nightly-eval.yml | 45 +++------------ python/sglang/test/test_utils.py | 4 ++ test/srt/test_nightly_gsm8k_eval.py | 89 +++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 38 deletions(-) create mode 100644 test/srt/test_nightly_gsm8k_eval.py diff --git a/.github/workflows/nightly-eval.yml b/.github/workflows/nightly-eval.yml index c1a7e9c1795..4ac911c9a92 100644 --- a/.github/workflows/nightly-eval.yml +++ b/.github/workflows/nightly-eval.yml @@ -15,9 +15,9 @@ concurrency: cancel-in-progress: true jobs: - meta-llama-31-8b-instruct: + nightly-eval-2-gpu: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 1-gpu-runner + runs-on: 2-gpu-runner steps: - name: Checkout code uses: actions/checkout@v3 @@ -25,42 +25,11 @@ jobs: - name: Install dependencies run: | pip install --upgrade pip - pip install -e "python[dev]" + pip install -e "python[all]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - git clone https://github.com/EleutherAI/lm-evaluation-harness - pushd lm-evaluation-harness - pip install -e . - pip install lm_eval[api] - popd - - name: Run eval - timeout-minutes: 20 + - name: Nightly gsm8k Accuracy + timeout-minutes: 60 run: | - python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --disable-radix-cache & - - echo "Waiting for server to start..." - for i in {1..120}; do - if curl -s http://127.0.0.1:30000/health; then - echo "Server is up!" - break - fi - if [ $i -eq 120 ]; then - echo "Server failed to start within 120 seconds" - exit 1 - fi - sleep 1 - done - - lm_eval --model local-completions --tasks gsm8k --model_args model=meta-llama/Meta-Llama-3.1-8B-Instruct,base_url=http://127.0.0.1:30000/v1/completions,num_concurrent=128,max_retries=3,tokenized_requests=False - - echo "Stopping server..." - kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}') - - finish: - needs: [ - meta-llama-31-8b-instruct - ] - runs-on: ubuntu-latest - steps: - - name: Finish - run: echo "This is an empty step to ensure that all jobs are completed." + cd test/srt + python3 test_nightly_gsm8k_eval.py diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index d6a1792b85e..1b9b63e882f 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -23,6 +23,10 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct" DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1" DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 600 +DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1 = "meta-llama/Meta-Llama-3.1-8B-Instruct,mistralai/Mistral-7B-Instruct-v0.3,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct,google/gemma-2-27b-it" +DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2 = "meta-llama/Meta-Llama-3.1-70B-Instruct,mistralai/Mixtral-8x7B-Instruct-v0.1,Qwen/Qwen2-57B-A14B-Instruct" +DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1 = "neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8,neuralmagic/Mistral-7B-Instruct-v0.3-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8,neuralmagic/gemma-2-2b-it-FP8" +DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8" if os.getenv("SGLANG_IS_IN_CI", "false") == "true": DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157 diff --git a/test/srt/test_nightly_gsm8k_eval.py b/test/srt/test_nightly_gsm8k_eval.py new file mode 100644 index 00000000000..35e7d6eb7db --- /dev/null +++ b/test/srt/test_nightly_gsm8k_eval.py @@ -0,0 +1,89 @@ +import unittest +from types import SimpleNamespace + +from sglang.srt.utils import kill_child_process +from sglang.test.run_eval import run_eval +from sglang.test.test_utils import ( + DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1, + DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2, + DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1, + DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +def parse_models(model_string): + return [model.strip() for model in model_string.split(",") if model.strip()] + + +class TestEvalAccuracyLarge(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.model_groups = [ + (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1), False, False), + (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2), False, True), + (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1), True, False), + (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2), True, True), + ] + cls.base_url = DEFAULT_URL_FOR_TEST + + def setUp(self): + self.process = None + + def tearDown(self): + if self.process: + kill_child_process(self.process.pid) + + def launch_server(self, model, is_fp8, is_tp2): + other_args = ["--log-level-http", "warning", "--trust-remote-code"] + if is_fp8: + if "Llama-3" in model or "gemma-2" in model: + # compressed-tensors + other_args.extend(["--kv-cache-dtype", "fp8_e5m2"]) + elif "Qwen2-72B-Instruct-FP8" in model: + # bug + other_args.extend(["--quantization", "fp8"]) + else: + other_args.extend( + ["--quantization", "fp8", "--kv-cache-dtype", "fp8_e5m2"] + ) + if is_tp2: + other_args.extend(["--tp", "2"]) + if "DeepSeek" in model: + other_args.append("--enable-mla") + + self.process = popen_launch_server( + model, + self.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, + ) + + def test_mgsm_en_all_models(self): + for model_group, is_fp8, is_tp2 in self.model_groups: + for model in model_group: + with self.subTest(model=model): + self.launch_server(model, is_fp8, is_tp2) + + args = SimpleNamespace( + base_url=self.base_url, + model=model, + eval_name="mgsm_en", + num_examples=None, + num_threads=1024, + ) + + metrics = run_eval(args) + print( + f"{'=' * 42}\n{model} - metrics={metrics} score={metrics['score']}\n{'=' * 42}\n" + ) + # loosely threshold + assert metrics["score"] > 0.5, f"score={metrics['score']} <= 0.5" + + self.tearDown() + + +if __name__ == "__main__": + unittest.main() From a5a134f39f9b032496fa895050e56485d8fe9957 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Mon, 2 Sep 2024 16:18:48 -0700 Subject: [PATCH 74/88] Fix bugs in sampler with CUDA graph / torch.compile (#1306) --- python/sglang/srt/layers/sampler.py | 44 ++++++++++++++----- .../srt/model_executor/cuda_graph_runner.py | 2 + .../sglang/srt/model_executor/model_runner.py | 2 +- .../srt/sampling/sampling_batch_info.py | 26 +++++------ 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/python/sglang/srt/layers/sampler.py b/python/sglang/srt/layers/sampler.py index f56fee8289d..6cb7d5b5508 100644 --- a/python/sglang/srt/layers/sampler.py +++ b/python/sglang/srt/layers/sampler.py @@ -1,6 +1,6 @@ import dataclasses import logging -from typing import Union +from typing import Tuple, Union import torch from flashinfer.sampling import ( @@ -9,6 +9,7 @@ top_k_top_p_sampling_from_probs, top_p_renorm_prob, ) +from torch.library import custom_op as torch_custom_op from vllm.model_executor.custom_op import CustomOp from sglang.srt.layers.logits_processor import LogitsProcessorOutput @@ -30,6 +31,9 @@ class SampleOutput: class Sampler(CustomOp): def __init__(self): super().__init__() + # FIXME: torch.multinomial has too many bugs + self.forward_native = self.forward_cuda + self.is_torch_compile = False def _apply_penalties(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): # min-token, presence, frequency @@ -46,16 +50,11 @@ def _apply_penalties(self, logits: torch.Tensor, sampling_info: SamplingBatchInf return logits - def _get_probs( - self, - logits: torch.Tensor, - sampling_info: SamplingBatchInfo, - is_torch_compile: bool = False, - ): + def _get_probs(self, logits: torch.Tensor, sampling_info: SamplingBatchInfo): # Post process logits logits = logits.contiguous() logits.div_(sampling_info.temperatures) - if is_torch_compile: + if self.is_torch_compile: # FIXME: Temporary workaround for unknown bugs in torch.compile logits.add_(0) @@ -91,7 +90,7 @@ def forward_cuda( probs, uniform_samples, sampling_info.min_ps ) else: - batch_next_token_ids, success = top_k_top_p_sampling_from_probs( + batch_next_token_ids, success = flashinfer_top_k_top_p( probs, uniform_samples, sampling_info.top_ks, sampling_info.top_ps ) else: @@ -110,7 +109,7 @@ def forward_native( if isinstance(logits, LogitsProcessorOutput): logits = logits.next_token_logits - probs = self._get_probs(logits, sampling_info, is_torch_compile=True) + probs = self._get_probs(logits, sampling_info) batch_next_token_ids, success = top_k_top_p_min_p_sampling_from_probs_torch( probs, sampling_info.top_ks, sampling_info.top_ps, sampling_info.min_ps @@ -119,6 +118,31 @@ def forward_native( return SampleOutput(success, probs, batch_next_token_ids) +@torch_custom_op("my_lib::flashinfer_top_k_top_p", mutates_args={}) +def flashinfer_top_k_top_p( + probs: torch.Tensor, + uniform_samples: torch.Tensor, + top_ks: torch.Tensor, + top_ps: torch.Tensor, +) -> Tuple[torch.Tensor, torch.Tensor]: + # NOTE: we do not use min_p neither in CUDA nor in torch.compile + return top_k_top_p_sampling_from_probs(probs, uniform_samples, top_ks, top_ps) + + +@flashinfer_top_k_top_p.register_fake +def _( + probs: torch.Tensor, + uniform_samples: torch.Tensor, + top_ks: torch.Tensor, + top_ps: torch.Tensor, +) -> Tuple[torch.Tensor, torch.Tensor]: + bs = probs.shape[0] + return ( + torch.ones(bs, dtype=torch.bool, device=probs.device), + torch.zeros(bs, dtype=torch.int32, device=probs.device), + ) + + def top_k_top_p_min_p_sampling_from_probs_torch( probs: torch.Tensor, top_ks: torch.Tensor, diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 40c87af88cf..4459213b02f 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -46,8 +46,10 @@ def _to_torch(model: torch.nn.Module, reverse: bool = False): if isinstance(sub, CustomOp): if reverse: sub._forward_method = sub.forward_cuda + setattr(sub, "is_torch_compile", False) else: sub._forward_method = sub.forward_native + setattr(sub, "is_torch_compile", True) if isinstance(sub, torch.nn.Module): _to_torch(sub, reverse) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 05a7513656c..26afe6600b6 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -523,7 +523,7 @@ def forward_decode(self, batch: ScheduleBatch): if ( self.cuda_graph_runner and self.cuda_graph_runner.can_run(len(batch.reqs)) - and not batch.sampling_info.has_bias() + and batch.sampling_info.can_run_in_cuda_graph() ): return self.cuda_graph_runner.replay(batch) diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py index 38b6701c705..20b1968d24d 100644 --- a/python/sglang/srt/sampling/sampling_batch_info.py +++ b/python/sglang/srt/sampling/sampling_batch_info.py @@ -34,12 +34,14 @@ class SamplingBatchInfo: linear_penalties: torch.Tensor = None scaling_penalties: torch.Tensor = None - def has_bias(self): + def can_run_in_cuda_graph(self): + # Vocab bias and min_ps are not supported in CUDA graph return ( - self.logit_bias is not None - or self.vocab_mask is not None - or self.linear_penalties is not None - or self.scaling_penalties is not None + self.logit_bias is None + and self.vocab_mask is None + and self.linear_penalties is None + and self.scaling_penalties is None + and not self.need_min_p_sampling ) @classmethod @@ -48,35 +50,29 @@ def dummy_one(cls, max_bs: int, vocab_size: int): ret.temperatures = torch.ones((max_bs, 1), dtype=torch.float, device="cuda") ret.top_ps = torch.ones((max_bs,), dtype=torch.float, device="cuda") ret.top_ks = torch.ones((max_bs,), dtype=torch.int, device="cuda") - ret.min_ps = torch.zeros((max_bs,), dtype=torch.float, device="cuda") return ret def __getitem__(self, key): if isinstance(key, slice): - # NOTE: We do not use cuda graph when there is bias tensors - assert not self.has_bias() + # NOTE:This method is only used in CUDA graph + assert self.can_run_in_cuda_graph() return SamplingBatchInfo( vocab_size=self.vocab_size, temperatures=self.temperatures[key], top_ps=self.top_ps[key], top_ks=self.top_ks[key], - min_ps=self.min_ps[key], - need_min_p_sampling=self.need_min_p_sampling, ) else: raise NotImplementedError def inplace_assign(self, bs: int, other: SamplingBatchInfo): - # NOTE: We do not use cuda graph when there is bias tensors - assert not self.has_bias() + # NOTE:This method is only used in CUDA graph + assert self.can_run_in_cuda_graph() self.vocab_size = other.vocab_size - self.need_min_p_sampling = other.need_min_p_sampling - self.temperatures[:bs] = other.temperatures self.top_ps[:bs] = other.top_ps self.top_ks[:bs] = other.top_ks - self.min_ps[:bs] = other.min_ps @classmethod def from_schedule_batch(cls, batch: ScheduleBatch, vocab_size: int): From f64eae3a291ade9654f1b030878df098bdefa9ee Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Mon, 2 Sep 2024 21:44:45 -0700 Subject: [PATCH 75/88] [Fix] Reduce memory usage for loading llava model & Remove EntryClassRemapping (#1308) --- .github/workflows/pr-test.yml | 2 +- README.md | 2 +- docs/en/custom_chat_template.md | 5 +- .../quick_start/local_example_llava_next.py | 5 -- .../sglang/lang/backend/runtime_endpoint.py | 12 ++- .../sglang/srt/managers/tokenizer_manager.py | 14 ++-- .../sglang/srt/model_executor/model_runner.py | 10 --- python/sglang/srt/models/chatglm.py | 8 +- python/sglang/srt/models/exaone.py | 16 +--- .../sglang/srt/models/{llama2.py => llama.py} | 38 ++-------- .../sglang/srt/models/llama_classification.py | 73 +++++++++---------- python/sglang/srt/models/llama_embedding.py | 13 ++-- python/sglang/srt/models/llava.py | 19 ++--- python/sglang/srt/models/llavavid.py | 11 ++- python/sglang/srt/models/mistral.py | 5 +- .../deprecated/test_httpserver_classify.py | 2 +- scripts/playground/reference_hf.py | 26 ++++--- 17 files changed, 104 insertions(+), 157 deletions(-) rename python/sglang/srt/models/{llama2.py => llama.py} (91%) diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index f8b50ad5dad..5784a097584 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -1,4 +1,4 @@ -name: Pull Request Test +name: PR Test on: push: diff --git a/README.md b/README.md index 0f1cf3838b9..edde172a20c 100644 --- a/README.md +++ b/README.md @@ -205,7 +205,7 @@ It supports streaming, vision, and most features of the Chat/Completions/Models/ ``` python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --tp 2 ``` -- Add `--dp 2` to enable multi-GPU data parallelism. It can also be used together with tensor parallelism. Data parallelism is better for throughput if there is enough memory. +- Add `--dp 2` to enable multi-GPU data parallelism. Data parallelism is better for throughput if there is enough memory. It can also be used together with tensor parallelism. The following command uses 4 GPUs in total. ``` python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --dp 2 --tp 2 ``` diff --git a/docs/en/custom_chat_template.md b/docs/en/custom_chat_template.md index 815c7e6760b..3760bbc6a18 100644 --- a/docs/en/custom_chat_template.md +++ b/docs/en/custom_chat_template.md @@ -1,6 +1,9 @@ # Custom Chat Template in SGLang Runtime -By default, the server uses the chat template specified in the model tokenizer from Hugging Face. It should just work for most official models such as Llama-2/Llama-3. +**NOTE**: There are two chat template systems in SGLang project. This document is about setting a custom chat template for the OpenAI-compatible API server (defined at [conversation.py](../../python/sglang/srt/conversation.py)). It is NOT related to the chat template used in the SGLang language frontend (defined at [chat_template.py](../../python/sglang/lang/chat_template.py)). + +By default, the server uses the chat template specified in the model tokenizer from Hugging Face. +It should just work for most official models such as Llama-2/Llama-3. If needed, you can also override the chat template when launching the server: diff --git a/examples/frontend_language/quick_start/local_example_llava_next.py b/examples/frontend_language/quick_start/local_example_llava_next.py index 823dc7b0e83..fc5a1d04c65 100644 --- a/examples/frontend_language/quick_start/local_example_llava_next.py +++ b/examples/frontend_language/quick_start/local_example_llava_next.py @@ -2,13 +2,8 @@ Usage: python3 local_example_llava_next.py """ -from PIL import ImageFile - import sglang as sgl from sglang.lang.chat_template import get_chat_template -from sglang.srt.utils import load_image - -ImageFile.LOAD_TRUNCATED_IMAGES = True # Allow loading of truncated images @sgl.function diff --git a/python/sglang/lang/backend/runtime_endpoint.py b/python/sglang/lang/backend/runtime_endpoint.py index 5012f646ea1..344b51d2dc2 100644 --- a/python/sglang/lang/backend/runtime_endpoint.py +++ b/python/sglang/lang/backend/runtime_endpoint.py @@ -4,7 +4,7 @@ from sglang.global_config import global_config from sglang.lang.backend.base_backend import BaseBackend -from sglang.lang.chat_template import get_chat_template_by_model_path +from sglang.lang.chat_template import get_chat_template, get_chat_template_by_model_path from sglang.lang.choices import ChoicesDecision, ChoicesSamplingMethod from sglang.lang.interpreter import StreamExecutor from sglang.lang.ir import ( @@ -23,6 +23,7 @@ def __init__( base_url: str, api_key: Optional[str] = None, verify: Optional[str] = None, + chat_template_name: Optional[str] = None, ): super().__init__() self.support_concate_and_append = True @@ -39,9 +40,12 @@ def __init__( self._assert_success(res) self.model_info = res.json() - self.chat_template = get_chat_template_by_model_path( - self.model_info["model_path"] - ) + if chat_template_name: + self.chat_template = get_chat_template(chat_template_name) + else: + self.chat_template = get_chat_template_by_model_path( + self.model_info["model_path"] + ) def get_model_name(self): return self.model_info["model_path"] diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 644670a2bac..6af82064152 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -86,8 +86,8 @@ def __init__( self.recv_from_detokenizer = context.socket(zmq.PULL) self.recv_from_detokenizer.bind(f"tcp://127.0.0.1:{port_args.tokenizer_port}") - self.send_to_router = context.socket(zmq.PUSH) - self.send_to_router.connect(f"tcp://127.0.0.1:{port_args.controller_port}") + self.send_to_controller = context.socket(zmq.PUSH) + self.send_to_controller.connect(f"tcp://127.0.0.1:{port_args.controller_port}") # Read model args self.model_path = server_args.model_path @@ -271,7 +271,7 @@ async def _handle_single_request( input_ids, sampling_params, ) - self.send_to_router.send_pyobj(tokenized_obj) + self.send_to_controller.send_pyobj(tokenized_obj) # Recv results event = asyncio.Event() @@ -367,7 +367,7 @@ async def _handle_batch_request( input_ids, sampling_params, ) - self.send_to_router.send_pyobj(tokenized_obj) + self.send_to_controller.send_pyobj(tokenized_obj) event = asyncio.Event() state = ReqState([], False, event) @@ -500,14 +500,14 @@ async def _wait_for_cache_prefill_response( def flush_cache(self): req = FlushCacheReq() - self.send_to_router.send_pyobj(req) + self.send_to_controller.send_pyobj(req) def abort_request(self, rid: str): if rid not in self.rid_to_state: return del self.rid_to_state[rid] req = AbortReq(rid) - self.send_to_router.send_pyobj(req) + self.send_to_controller.send_pyobj(req) async def update_weights( self, obj: UpdateWeightReqInput, request: Optional[fastapi.Request] = None @@ -524,7 +524,7 @@ async def update_weights( # wait for the previous generation requests to finish while len(self.rid_to_state) > 0: await asyncio.sleep(0) - self.send_to_router.send_pyobj(obj) + self.send_to_controller.send_pyobj(obj) self.model_update_result = asyncio.Future() result = await self.model_update_result if result.success: diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 26afe6600b6..09b3c7127f6 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -606,16 +606,6 @@ def import_model_classes(): assert entry.__name__ not in model_arch_name_to_cls model_arch_name_to_cls[entry.__name__] = entry - # compat: some models such as chatglm has incorrect class set in config.json - # usage: [ tuple("From_Entry_Class_Name": EntryClass), ] - if hasattr(module, "EntryClassRemapping") and isinstance( - module.EntryClassRemapping, list - ): - for remap in module.EntryClassRemapping: - if isinstance(remap, tuple) and len(remap) == 2: - assert remap[0] not in model_arch_name_to_cls - model_arch_name_to_cls[remap[0]] = remap[1] - return model_arch_name_to_cls diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py index 9eb04dc263d..94b405f8e8f 100644 --- a/python/sglang/srt/models/chatglm.py +++ b/python/sglang/srt/models/chatglm.py @@ -402,6 +402,8 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): weight_loader(param, loaded_weight) -EntryClass = ChatGLMForCausalLM -# compat: glm model.config class == ChatGLMModel -EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)] +class ChatGLMModel(ChatGLMForCausalLM): + pass + + +EntryClass = [ChatGLMForCausalLM, ChatGLMModel] diff --git a/python/sglang/srt/models/exaone.py b/python/sglang/srt/models/exaone.py index 4dcafed7ce7..9cddcb34fdf 100644 --- a/python/sglang/srt/models/exaone.py +++ b/python/sglang/srt/models/exaone.py @@ -297,7 +297,6 @@ def __init__( config, quant_config: Optional[QuantizationConfig] = None, cache_config: Optional[CacheConfig] = None, - efficient_weight_load=False, ) -> None: super().__init__() self.config = config @@ -345,9 +344,7 @@ def get_num_params(self): params_dict = dict(self.named_parameters()) return len(params_dict) - def load_weights( - self, weights: Iterable[Tuple[str, torch.Tensor]], name=None, loaded_weight=None - ): + def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ # (param_name, shard_name, shard_id) ("qkv_proj", "q_proj", "q"), @@ -358,7 +355,7 @@ def load_weights( ] params_dict = dict(self.named_parameters()) - def load_weights_per_param(name, loaded_weight): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name or "projector" in name: return if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: @@ -368,6 +365,7 @@ def load_weights_per_param(name, loaded_weight): if name.startswith("model.vision_tower") and name not in params_dict: return + name = name.replace("attn.attention", "self_attn") for param_name, weight_name, shard_id in stacked_params_mapping: if weight_name not in name: continue @@ -387,13 +385,5 @@ def load_weights_per_param(name, loaded_weight): weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) - if name is None or loaded_weight is None: - for name, loaded_weight in weights: - name = name.replace("attn.attention", "self_attn") - load_weights_per_param(name, loaded_weight) - else: - name = name.replace("attn.attention", "self_attn") - load_weights_per_param(name, loaded_weight) - EntryClass = ExaoneForCausalLM diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama.py similarity index 91% rename from python/sglang/srt/models/llama2.py rename to python/sglang/srt/models/llama.py index 22751d9b674..43c7cd54a5c 100644 --- a/python/sglang/srt/models/llama2.py +++ b/python/sglang/srt/models/llama.py @@ -295,7 +295,6 @@ def __init__( config: LlamaConfig, quant_config: Optional[QuantizationConfig] = None, cache_config: Optional[CacheConfig] = None, - efficient_weight_load=False, ) -> None: super().__init__() self.config = config @@ -305,6 +304,8 @@ def __init__( self.logits_processor = LogitsProcessor(config) self.sampler = Sampler() + self.param_dict = dict(self.named_parameters()) + @torch.no_grad() def forward( self, @@ -320,30 +321,7 @@ def forward( sample_output = self.sampler(logits_output, input_metadata.sampling_info) return sample_output, logits_output - def get_module_name(self, name): - stacked_params_mapping = [ - # (param_name, shard_name, shard_id, num_shard) - ("qkv_proj", "q_proj", "q", 3), - ("qkv_proj", "k_proj", "k", 3), - ("qkv_proj", "v_proj", "v", 3), - ("gate_up_proj", "gate_proj", 0, 2), - ("gate_up_proj", "up_proj", 1, 2), - ] - for param_name, weight_name, shard_id, num_shard in stacked_params_mapping: - if weight_name in name: - return ( - name.replace(weight_name, param_name)[: -len(".weight")], - num_shard, - ) - return name[: -len(".weight")], 1 - - def get_num_params(self): - params_dict = dict(self.named_parameters()) - return len(params_dict) - - def load_weights( - self, weights: Iterable[Tuple[str, torch.Tensor]], name=None, loaded_weight=None - ): + def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ # (param_name, shard_name, shard_id) ("qkv_proj", "q_proj", "q"), @@ -352,9 +330,9 @@ def load_weights( ("gate_up_proj", "gate_proj", 0), ("gate_up_proj", "up_proj", 1), ] - params_dict = dict(self.named_parameters()) + params_dict = self.param_dict - def load_weights_per_param(name, loaded_weight): + for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name or "projector" in name: return if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: @@ -383,11 +361,5 @@ def load_weights_per_param(name, loaded_weight): weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) - if name is None or loaded_weight is None: - for name, loaded_weight in weights: - load_weights_per_param(name, loaded_weight) - else: - load_weights_per_param(name, loaded_weight) - EntryClass = LlamaForCausalLM diff --git a/python/sglang/srt/models/llama_classification.py b/python/sglang/srt/models/llama_classification.py index 03ab5e802cf..db424ff1804 100644 --- a/python/sglang/srt/models/llama_classification.py +++ b/python/sglang/srt/models/llama_classification.py @@ -16,17 +16,16 @@ from typing import Iterable, Optional, Tuple import torch -import tqdm from torch import nn from transformers import LlamaConfig from vllm.config import CacheConfig -from vllm.distributed import get_tensor_model_parallel_rank from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader from sglang.srt.layers.logits_processor import LogitsProcessorOutput +from sglang.srt.layers.sampler import SampleOutput from sglang.srt.model_executor.forward_batch_info import InputMetadata -from sglang.srt.models.llama2 import LlamaModel +from sglang.srt.models.llama import LlamaForCausalLM, LlamaModel class LlamaForClassification(nn.Module): @@ -42,10 +41,12 @@ def __init__( self.model = LlamaModel(config, quant_config=quant_config) self.classification_head = nn.Linear( - config.hidden_size, config.classification_out_size + config.hidden_size, config.classification_out_size, bias=False ) self.eos_token_id = config.eos_token_id + self.param_dict = dict(self.named_parameters()) + @torch.no_grad() def forward( self, @@ -65,7 +66,7 @@ def forward( (input_metadata.batch_size, self.config.classification_out_size) ).to(input_ids.device) - return LogitsProcessorOutput( + logits_output = LogitsProcessorOutput( next_token_logits=scores, next_token_logprobs=scores, normalized_prompt_logprobs=scores, @@ -74,46 +75,38 @@ def forward( output_top_logprobs=None, ) + # A dummy to make this work + sample_output = SampleOutput( + success=torch.full( + size=(scores.shape[0],), + fill_value=True, + dtype=torch.bool, + ), + probs=torch.full( + size=(scores.shape[0], 1), + fill_value=1.0, + dtype=torch.float16, + ), + batch_next_token_ids=torch.full( + size=(scores.shape[0],), + fill_value=0, + dtype=torch.long, + ), + ) + return sample_output, logits_output + def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): - stacked_params_mapping = [ - # (param_name, shard_name, shard_id) - ("qkv_proj", "q_proj", "q"), - ("qkv_proj", "k_proj", "k"), - ("qkv_proj", "v_proj", "v"), - ("gate_up_proj", "gate_proj", 0), - ("gate_up_proj", "up_proj", 1), - ] - params_dict = dict(self.named_parameters()) - if get_tensor_model_parallel_rank() == 0: - weights = tqdm.tqdm(weights, total=int(len(params_dict) * 1.5)) - for name, loaded_weight in weights: - if "rotary_emb.inv_freq" in name or "projector" in name: - continue - if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: - # Models trained using ColossalAI may include these tensors in - # the checkpoint. Skip them. - continue - if "lm_head" in name: - continue + params_dict = self.param_dict - for param_name, weight_name, shard_id in stacked_params_mapping: - if weight_name not in name: - continue - name = name.replace(weight_name, param_name) - # Skip loading extra bias for GPTQ models. - if name.endswith(".bias") and name not in params_dict: - continue - param = params_dict[name] - weight_loader = param.weight_loader - weight_loader(param, loaded_weight, shard_id) - break - else: - # Skip loading extra bias for GPTQ models. - if name.endswith(".bias") and name not in params_dict: - continue + for name, loaded_weight in weights: + if "classification_head" in name: param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) + elif "lm_head" in name: + continue + else: + LlamaForCausalLM.load_weights(self, [(name, loaded_weight)]) EntryClass = LlamaForClassification diff --git a/python/sglang/srt/models/llama_embedding.py b/python/sglang/srt/models/llama_embedding.py index e4e9174f144..fe407b29f24 100644 --- a/python/sglang/srt/models/llama_embedding.py +++ b/python/sglang/srt/models/llama_embedding.py @@ -1,4 +1,4 @@ -from typing import Iterable, Optional, Tuple +from typing import Iterable, Tuple import torch from torch import nn @@ -7,7 +7,7 @@ from sglang.srt.layers.pooler import EmbeddingPoolerOutput, Pooler, PoolingType from sglang.srt.model_executor.model_runner import InputMetadata -from sglang.srt.models.llama2 import LlamaForCausalLM, LlamaModel +from sglang.srt.models.llama import LlamaModel class LlamaEmbeddingModel(nn.Module): @@ -16,7 +16,6 @@ def __init__( config: LlamaConfig, quant_config=None, cache_config=None, - efficient_weight_load=False, ) -> None: super().__init__() self.model = LlamaModel(config, quant_config=quant_config) @@ -86,6 +85,8 @@ def load_weights_per_param(name, loaded_weight): load_weights_per_param(name, loaded_weight) -EntryClass = LlamaEmbeddingModel -# compat: e5-mistral model.config class == MistralModel -EntryClassRemapping = [("MistralModel", LlamaEmbeddingModel)] +class MistralModel(LlamaEmbeddingModel): + pass + + +EntryClass = [LlamaEmbeddingModel, MistralModel] diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py index 7dcf5348b05..2e3c9ceba1a 100644 --- a/python/sglang/srt/models/llava.py +++ b/python/sglang/srt/models/llava.py @@ -41,7 +41,7 @@ unpad_image_shape, ) from sglang.srt.model_executor.forward_batch_info import ForwardMode, InputMetadata -from sglang.srt.models.llama2 import LlamaForCausalLM +from sglang.srt.models.llama import LlamaForCausalLM from sglang.srt.models.mistral import MistralForCausalLM from sglang.srt.models.qwen2 import Qwen2ForCausalLM @@ -395,21 +395,19 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): "model.mm_projector.0": "multi_modal_projector.linear_1", "model.mm_projector.2": "multi_modal_projector.linear_2", "model.vision_tower.vision_tower": "vision_tower", # Update the vision tower weights if we find them in the checkpoint (it may be finetuned). + "model.image_newline": "language_model.model.image_newline", } params_dict = dict(self.named_parameters()) - weights = list(weights) for name, loaded_weight in weights: - # FIXME: why projector weights read two times? - if "projector" in name or "vision_tower" in name: + if "projector" in name or "vision_tower" in name or "image_newline" in name: for weight_name, param_name in projector_weights.items(): if weight_name in name: name = name.replace(weight_name, param_name) param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) - - # load language model - self.language_model.load_weights(weights) + else: + self.language_model.load_weights([(name, loaded_weight)]) @property def num_patches_per_side(self): @@ -429,6 +427,7 @@ def __init__( self.vision_tower = None self.config.vision_config.hidden_size = config.mm_hidden_size self.config.text_config.hidden_size = config.hidden_size + self.multi_modal_projector = LlavaMultiModalProjector(config) self.language_model = LlamaForCausalLM(config, quant_config=quant_config) if "unpad" in getattr(config, "mm_patch_merge_type", ""): @@ -448,9 +447,9 @@ def __init__( self.config = config self.vision_tower = None + if getattr(self.config, "vision_config", None) is None: self.config.vision_config = CLIPVisionConfig(self.config.mm_vision_tower) - if getattr(self.config, "text_config", None) is None: self.config.text_config = Qwen2Config(self.config._name_or_path) @@ -459,7 +458,6 @@ def __init__( if getattr(self.config, "projector_hidden_act", None) is None: self.config.projector_hidden_act = "gelu" - if getattr(self.config, "image_token_index", None) is None: self.config.image_token_index = 151646 @@ -482,9 +480,9 @@ def __init__( self.config = config self.vision_tower = None + if getattr(self.config, "vision_config", None) is None: self.config.vision_config = CLIPVisionConfig(self.config.mm_vision_tower) - if getattr(self.config, "text_config", None) is None: self.config.text_config = MistralConfig(self.config._name_or_path) @@ -493,7 +491,6 @@ def __init__( if getattr(self.config, "projector_hidden_act", None) is None: self.config.projector_hidden_act = "gelu" - if getattr(self.config, "image_token_index", None) is None: self.config.image_token_index = 32000 diff --git a/python/sglang/srt/models/llavavid.py b/python/sglang/srt/models/llavavid.py index 44e400ff6a0..f268ecbbcd7 100644 --- a/python/sglang/srt/models/llavavid.py +++ b/python/sglang/srt/models/llavavid.py @@ -27,7 +27,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from sglang.srt.model_executor.forward_batch_info import ForwardMode, InputMetadata -from sglang.srt.models.llama2 import LlamaForCausalLM +from sglang.srt.models.llama import LlamaForCausalLM class LlavaVidForCausalLM(nn.Module): @@ -239,12 +239,12 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): "model.vision_resampler.mm_projector.0": "multi_modal_projector.linear_1", "model.vision_resampler.mm_projector.2": "multi_modal_projector.linear_2", "model.vision_tower.vision_tower": "vision_tower", # Update the vision tower weights if we find them in the checkpoint (it may be finetuned). + "model.image_newline": "language_model.model.image_newline", } params_dict = dict(self.named_parameters()) - weights = list(weights) for name, loaded_weight in weights: # FIXME: why projector weights read two times? - if "projector" in name or "vision_tower" in name: + if "projector" in name or "vision_tower" in name or "image_newline" in name: for weight_name, param_name in projector_weights.items(): if weight_name in name: name = name.replace(weight_name, param_name) @@ -255,9 +255,8 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): continue weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) - - # load language model - self.language_model.load_weights(weights) + else: + self.language_model.load_weights([(name, loaded_weight)]) @property def num_patches_per_side(self): diff --git a/python/sglang/srt/models/mistral.py b/python/sglang/srt/models/mistral.py index 614c1c1d747..1430ece436e 100644 --- a/python/sglang/srt/models/mistral.py +++ b/python/sglang/srt/models/mistral.py @@ -15,12 +15,11 @@ """Inference-only Mistral model.""" -from sglang.srt.models.llama2 import LlamaForCausalLM +from sglang.srt.models.llama import LlamaForCausalLM class MistralForCausalLM(LlamaForCausalLM): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + pass EntryClass = MistralForCausalLM diff --git a/scripts/deprecated/test_httpserver_classify.py b/scripts/deprecated/test_httpserver_classify.py index cafbd19fdf6..dbcafb88d7d 100644 --- a/scripts/deprecated/test_httpserver_classify.py +++ b/scripts/deprecated/test_httpserver_classify.py @@ -1,6 +1,6 @@ """ Usage: -python3 -m sglang.launch_server --model-path /model/llama-classification +python3 -m sglang.launch_server --disable-cuda-graph --model-path /model/llama-classification python3 test_httpserver_classify.py """ diff --git a/scripts/playground/reference_hf.py b/scripts/playground/reference_hf.py index d2d31161017..95aeddb9a14 100644 --- a/scripts/playground/reference_hf.py +++ b/scripts/playground/reference_hf.py @@ -3,23 +3,24 @@ python3 reference_hf.py --model TinyLlama/TinyLlama-1.1B-Chat-v0.4 Reference output: +========== Prompt 0 ========== +prefill logits (final) tensor([-8.3125, -7.1172, 3.3398, ..., -4.9531, -4.1328, -3.4141], + device='cuda:0') The capital of France is Paris. The capital of the United States is Washington, D.C. -The capital of Canada is Ottawa. -The capital of Japan is Tokyo -prefill logits tensor([-8.3125, -7.1172, 3.3398, ..., -4.9570, -4.1328, -3.4141], + +========== Prompt 1 ========== +prefill logits (final) tensor([-8.9062, -9.0156, 4.1484, ..., -4.9922, -4.4961, -4.0742], device='cuda:0') The capital of the United Kindom is London. The capital of the United Kingdom is London. -The capital of the United Kingdom is London. -The capital of the United Kingdom is London. -prefill logits tensor([-8.9062, -9.0156, 4.1406, ..., -4.9922, -4.4961, -4.0742], +The capital of + +========== Prompt 2 ========== +prefill logits (final) tensor([-9.6328, -9.0547, 4.0234, ..., -5.3047, -4.7148, -4.4609], device='cuda:0') Today is a sunny day and I like to go for a walk in the park. -I'm going to the park to play in the grass and water. -Today is a very -prefill logits tensor([-9.6328, -9.0547, 4.0195, ..., -5.3047, -4.7148, -4.4609], - device='cuda:0') +I'm going to the """ import argparse @@ -47,7 +48,7 @@ def normal_text(args): ] max_new_tokens = 16 - for p in prompts: + for i, p in enumerate(prompts): if isinstance(p, str): input_ids = t.encode(p, return_tensors="pt").cuda() else: @@ -60,7 +61,8 @@ def normal_text(args): prefill_logits = m.forward(input_ids).logits[0][-1] - print("prefill logits", prefill_logits) + print(f"\n========== Prompt {i} ==========") + print("prefill logits (final)", prefill_logits) print(output_str) From 474317f2b606a79ac6811798c612d13b83f719fd Mon Sep 17 00:00:00 2001 From: Jani Monoses Date: Tue, 3 Sep 2024 07:49:40 +0300 Subject: [PATCH 76/88] Support Phi3 mini and medium (#1299) --- python/sglang/srt/hf_transformers_utils.py | 2 +- python/sglang/srt/models/llama.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py index bfdeebdc975..ae3070c5a78 100644 --- a/python/sglang/srt/hf_transformers_utils.py +++ b/python/sglang/srt/hf_transformers_utils.py @@ -92,7 +92,7 @@ def get_context_length(config): """Get the context length of a model from a huggingface model configs.""" rope_scaling = getattr(config, "rope_scaling", None) if rope_scaling: - rope_scaling_factor = config.rope_scaling["factor"] + rope_scaling_factor = config.rope_scaling.get("factor", 1) if "original_max_position_embeddings" in rope_scaling: rope_scaling_factor = 1 if config.rope_scaling.get("rope_type", None) == "llama3": diff --git a/python/sglang/srt/models/llama.py b/python/sglang/srt/models/llama.py index 43c7cd54a5c..b875e0c9809 100644 --- a/python/sglang/srt/models/llama.py +++ b/python/sglang/srt/models/llama.py @@ -324,11 +324,11 @@ def forward( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ # (param_name, shard_name, shard_id) - ("qkv_proj", "q_proj", "q"), - ("qkv_proj", "k_proj", "k"), - ("qkv_proj", "v_proj", "v"), - ("gate_up_proj", "gate_proj", 0), - ("gate_up_proj", "up_proj", 1), + (".qkv_proj", ".q_proj", "q"), + (".qkv_proj", ".k_proj", "k"), + (".qkv_proj", ".v_proj", "v"), + (".gate_up_proj", ".gate_proj", 0), + (".gate_up_proj", ".up_proj", 1), ] params_dict = self.param_dict @@ -362,4 +362,8 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): weight_loader(param, loaded_weight) -EntryClass = LlamaForCausalLM +class Phi3ForCausalLM(LlamaForCausalLM): + pass + + +EntryClass = [LlamaForCausalLM, Phi3ForCausalLM] From c500f96bb16c686ee8ba5d5f1fc716a0bd8e5fff Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 3 Sep 2024 01:43:08 -0700 Subject: [PATCH 77/88] Update README.md for llava-onevision instructions (#1313) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index edde172a20c..6d5ddb3ee83 100644 --- a/README.md +++ b/README.md @@ -239,6 +239,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct - Qwen / Qwen 2 / Qwen 2 MoE - DeepSeek / DeepSeek 2 - [LLaVA-OneVision](https://llava-vl.github.io/blog/2024-08-05-llava-onevision/) + - `python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-7b-ov --port=30000 --chat-template=chatml-llava` - `python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --port=30000 --tp-size=8 --chat-template=chatml-llava` - Query the server with the [OpenAI Vision API](https://platform.openai.com/docs/guides/vision). See examples at [test/srt/test_vision_openai_server.py](test/srt/test_vision_openai_server.py) - LLaVA 1.5 / 1.6 / NeXT From 12cb115d381cc19605c2fd3aa696ddf550f480de Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 3 Sep 2024 05:32:14 -0700 Subject: [PATCH 78/88] Fix llama2 weight loader (#1317) --- python/sglang/srt/models/exaone.py | 29 ++++------------------------- python/sglang/srt/models/llama.py | 8 ++++---- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/python/sglang/srt/models/exaone.py b/python/sglang/srt/models/exaone.py index 9cddcb34fdf..bb077f2c87d 100644 --- a/python/sglang/srt/models/exaone.py +++ b/python/sglang/srt/models/exaone.py @@ -323,27 +323,6 @@ def forward( sample_output = self.sampler(logits_output, input_metadata.sampling_info) return sample_output, logits_output - def get_module_name(self, name): - stacked_params_mapping = [ - # (param_name, shard_name, shard_id, num_shard) - ("qkv_proj", "q_proj", "q", 3), - ("qkv_proj", "k_proj", "k", 3), - ("qkv_proj", "v_proj", "v", 3), - ("gate_up_proj", "c_fc_0", 0, 2), - ("gate_up_proj", "c_fc_1", 1, 2), - ] - for param_name, weight_name, shard_id, num_shard in stacked_params_mapping: - if weight_name in name: - return ( - name.replace(weight_name, param_name)[: -len(".weight")], - num_shard, - ) - return name[: -len(".weight")], 1 - - def get_num_params(self): - params_dict = dict(self.named_parameters()) - return len(params_dict) - def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ # (param_name, shard_name, shard_id) @@ -357,13 +336,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name or "projector" in name: - return + continue if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: # Models trained using ColossalAI may include these tensors in # the checkpoint. Skip them. - return + continue if name.startswith("model.vision_tower") and name not in params_dict: - return + continue name = name.replace("attn.attention", "self_attn") for param_name, weight_name, shard_id in stacked_params_mapping: @@ -380,7 +359,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): else: # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: - return + continue param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) diff --git a/python/sglang/srt/models/llama.py b/python/sglang/srt/models/llama.py index b875e0c9809..926d87db8b7 100644 --- a/python/sglang/srt/models/llama.py +++ b/python/sglang/srt/models/llama.py @@ -334,13 +334,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name or "projector" in name: - return + continue if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: # Models trained using ColossalAI may include these tensors in # the checkpoint. Skip them. - return + continue if name.startswith("model.vision_tower") and name not in params_dict: - return + continue for param_name, weight_name, shard_id in stacked_params_mapping: if weight_name not in name: @@ -356,7 +356,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): else: # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: - return + continue param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) From 1e495e08470b6dc56645081f644831e0c620dfa5 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 3 Sep 2024 06:31:45 -0700 Subject: [PATCH 79/88] [Fix] Fix select by ensuring each request has at least one token (#1318) --- python/sglang/srt/managers/schedule_batch.py | 9 ++- python/sglang/test/test_programs.py | 68 ++++++++++++++++++++ python/sglang/utils.py | 39 +++++++++++ test/lang/test_srt_backend.py | 7 ++ 4 files changed, 120 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index f5b9c9eb27d..c80cf2e2723 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -178,19 +178,22 @@ def init_next_round_input(self, tree_cache: Optional[BasePrefixCache] = None): def adjust_max_prefix_ids(self): self.fill_ids = self.origin_input_ids + self.output_ids input_len = len(self.fill_ids) - max_prefix_len = input_len + + # FIXME: To work around some bugs in logprob computation, we need to ensure each + # request has at least one token. Later, we can relax this requirement and use `input_len`. + max_prefix_len = input_len - 1 if self.sampling_params.max_new_tokens > 0: # Need at least one token to compute logits max_prefix_len = min(max_prefix_len, input_len - 1) if self.return_logprob: - max_prefix_len = min(max_prefix_len, self.logprob_start_len) - if self.normalized_prompt_logprob is None: # Need at least two tokens to compute normalized logprob max_prefix_len = min(max_prefix_len, input_len - 2) + max_prefix_len = min(max_prefix_len, self.logprob_start_len) + max_prefix_len = max(max_prefix_len, 0) return self.fill_ids[:max_prefix_len] # Based on https://github.com/vllm-project/vllm/blob/7a64d24aad69e4d2548aa0bf528d9fe63428ab01/vllm/transformers_utils/detokenizer.py#L194-L313 diff --git a/python/sglang/test/test_programs.py b/python/sglang/test/test_programs.py index ce402558550..bdecdff2f94 100644 --- a/python/sglang/test/test_programs.py +++ b/python/sglang/test/test_programs.py @@ -2,8 +2,12 @@ import json import re +import time + +import numpy as np import sglang as sgl +from sglang.utils import fetch_and_cache_jsonl def test_few_shot_qa(): @@ -447,3 +451,67 @@ def gen_character_spec(s): ) gen_character_spec().sync() + + +def test_hellaswag_select(): + """Benchmark the accuracy of sgl.select on the HellaSwag dataset.""" + + url = "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_val.jsonl" + lines = fetch_and_cache_jsonl(url) + + # Construct prompts + def get_one_example(lines, i, include_answer): + ret = lines[i]["activity_label"] + ": " + lines[i]["ctx"] + " " + if include_answer: + ret += lines[i]["endings"][lines[i]["label"]] + return ret + + def get_few_shot_examples(lines, k): + ret = "" + for i in range(k): + ret += get_one_example(lines, i, True) + "\n\n" + return ret + + num_questions = 200 + num_shots = 20 + few_shot_examples = get_few_shot_examples(lines, num_shots) + + questions = [] + choices = [] + labels = [] + for i in range(len(lines[:num_questions])): + questions.append(get_one_example(lines, i, False)) + choices.append(lines[i]["endings"]) + labels.append(lines[i]["label"]) + arguments = [{"question": q, "choices": c} for q, c in zip(questions, choices)] + + ##################################### + ######### SGL Program Begin ######### + ##################################### + + import sglang as sgl + + @sgl.function + def few_shot_hellaswag(s, question, choices): + s += few_shot_examples + question + s += sgl.select("answer", choices=choices) + + ##################################### + ########## SGL Program End ########## + ##################################### + + # Run requests + tic = time.time() + rets = few_shot_hellaswag.run_batch( + arguments, + temperature=0, + num_threads=64, + progress_bar=True, + ) + preds = [choices[i].index(rets[i]["answer"]) for i in range(len(rets))] + latency = time.time() - tic + + # Compute accuracy + accuracy = np.mean(np.array(preds) == np.array(labels)) + + return accuracy, latency diff --git a/python/sglang/utils.py b/python/sglang/utils.py index c880d259d53..b212f6caa31 100644 --- a/python/sglang/utils.py +++ b/python/sglang/utils.py @@ -4,6 +4,7 @@ import importlib import json import logging +import os import signal import sys import traceback @@ -15,6 +16,7 @@ import numpy as np import requests +from tqdm import tqdm logger = logging.getLogger(__name__) @@ -260,3 +262,40 @@ def __getattr__(self, name: str): def __call__(self, *args, **kwargs): module = self._load() return module(*args, **kwargs) + + +def fetch_and_cache_jsonl(url, cache_file="cached_data.jsonl"): + """Read and cache a jsonl file from a url.""" + + # Check if the cache file already exists + if os.path.exists(cache_file): + print("Loading data from cache...") + with open(cache_file, "r") as f: + data = [json.loads(line) for line in f] + else: + print("Downloading data from URL...") + # Stream the response to show the progress bar + response = requests.get(url, stream=True) + response.raise_for_status() # Check for request errors + + # Total size of the file in bytes + total_size = int(response.headers.get("content-length", 0)) + chunk_size = 1024 # Download in chunks of 1KB + + # Use tqdm to display the progress bar + with open(cache_file, "wb") as f, tqdm( + desc=cache_file, + total=total_size, + unit="B", + unit_scale=True, + unit_divisor=1024, + ) as bar: + for chunk in response.iter_content(chunk_size=chunk_size): + f.write(chunk) + bar.update(len(chunk)) + + # Convert the data to a list of dictionaries + with open(cache_file, "r") as f: + data = [json.loads(line) for line in f] + + return data diff --git a/test/lang/test_srt_backend.py b/test/lang/test_srt_backend.py index fcd86ae3d31..62c59592821 100644 --- a/test/lang/test_srt_backend.py +++ b/test/lang/test_srt_backend.py @@ -7,6 +7,7 @@ test_dtype_gen, test_expert_answer, test_few_shot_qa, + test_hellaswag_select, test_mt_bench, test_parallel_decoding, test_regex, @@ -62,6 +63,12 @@ def test_regex(self): def test_dtype_gen(self): test_dtype_gen() + def test_hellaswag_select(self): + # Run twice to capture more bugs + for _ in range(2): + accuracy, latency = test_hellaswag_select() + assert accuracy > 0.71 + if __name__ == "__main__": unittest.main() From dc67d9769382cf83b3e2644a4366d6473445a6c6 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 4 Sep 2024 04:29:53 +1000 Subject: [PATCH 80/88] misc: speedup load safetensors (#1319) Co-authored-by: ispobock --- python/sglang/srt/model_executor/model_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 09b3c7127f6..3d3e0cde9d1 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -162,6 +162,7 @@ def init_torch_distributed(self): return min_per_gpu_memory def load_model(self): + torch.set_num_threads(1) logger.info( f"Load weight begin. avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB" ) From a63c8275c6c5aa642f06793d0bfc60c9653e75a7 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 4 Sep 2024 06:32:18 +1000 Subject: [PATCH 81/88] chore: bump v0.3.0 (#1320) --- README.md | 2 +- python/pyproject.toml | 2 +- python/sglang/version.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6d5ddb3ee83..d56a243df49 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ ### Method 2: From source ``` # Use the last release branch -git clone -b v0.2.15 https://github.com/sgl-project/sglang.git +git clone -b v0.3.0 https://github.com/sgl-project/sglang.git cd sglang pip install --upgrade pip diff --git a/python/pyproject.toml b/python/pyproject.toml index e46f9f4e1c7..daf09ea25de 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sglang" -version = "0.2.15" +version = "0.3.0" description = "SGLang is yet another fast serving framework for large language models and vision language models." readme = "README.md" requires-python = ">=3.8" diff --git a/python/sglang/version.py b/python/sglang/version.py index ddc77a88056..493f7415d73 100644 --- a/python/sglang/version.py +++ b/python/sglang/version.py @@ -1 +1 @@ -__version__ = "0.2.15" +__version__ = "0.3.0" From 843e63d809f59e1446d6b0a61306c9a001b404d6 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 4 Sep 2024 04:15:11 -0700 Subject: [PATCH 82/88] Fix the flaky test test_moe_eval_accuracy_large.py (#1326) --- test/srt/test_moe_eval_accuracy_large.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/test_moe_eval_accuracy_large.py b/test/srt/test_moe_eval_accuracy_large.py index 1183cc4e7a6..d4b1354b793 100644 --- a/test/srt/test_moe_eval_accuracy_large.py +++ b/test/srt/test_moe_eval_accuracy_large.py @@ -66,7 +66,7 @@ def test_mgsm_en(self): ) metrics = run_eval(args) - assert metrics["score"] >= 0.64, f"{metrics}" + assert metrics["score"] >= 0.63, f"{metrics}" if __name__ == "__main__": From 5ab9418f5b4c9ad1a90d72a803331d9a0b26d13e Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 4 Sep 2024 21:21:21 +1000 Subject: [PATCH 83/88] [Doc] update news (#1327) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d56a243df49..eb3099cf7ae 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,8 @@ The core features include: - **Flexible Frontend Language**: Enables easy programming of LLM applications with chained generation calls, advanced prompting, control flow, multiple modalities, parallelism, and external interactions. ## News +- [2024/09] 🔥 SGLang v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)). - [2024/07] 🔥 Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)). -- [2024/08] 🔥 LLaVA-OneVision with single-image, multi-image and video are supported ([blog](https://llava-vl.github.io/blog/2024-08-05-llava-onevision/)). - [2024/02] SGLang enables **3x faster JSON decoding** with compressed finite state machine ([blog](https://lmsys.org/blog/2024-02-05-compressed-fsm/)).
From eda7c09048b39bd03b0e34aa16ffef9398072663 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 4 Sep 2024 05:37:32 -0700 Subject: [PATCH 84/88] Remove useless fields in global_config.py (#1328) --- python/sglang/global_config.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/sglang/global_config.py b/python/sglang/global_config.py index d5f16e2ae54..7bd5aa0901f 100644 --- a/python/sglang/global_config.py +++ b/python/sglang/global_config.py @@ -11,10 +11,6 @@ def __init__(self): # Default backend of the language self.default_backend = None - # Runtime constants: Request dependency time due to network delay - self.request_dependency_delay = 0.02 - self.wait_for_new_request_delay = 0.0006 - # Runtime constants: New generation token ratio estimation self.init_new_token_ratio = 0.7 self.base_min_new_token_ratio = 0.1 From 3494b32c3a77e32d1a492b8c2a408b3662c08229 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Thu, 5 Sep 2024 23:39:44 +1000 Subject: [PATCH 85/88] docs: update README (#1336) --- benchmark/benchmark_vllm_060/README.md | 83 ++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 benchmark/benchmark_vllm_060/README.md diff --git a/benchmark/benchmark_vllm_060/README.md b/benchmark/benchmark_vllm_060/README.md new file mode 100644 index 00000000000..acb55f8971d --- /dev/null +++ b/benchmark/benchmark_vllm_060/README.md @@ -0,0 +1,83 @@ +## How to reproduce the benchmark results for SGLang v0.3.0 compared to vLLM v0.6.0 + +## Installation + +```bash +# install sglang v0.3.0 +pip install --upgrade pip +pip install "sglang[all]"==0.3.0 +pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ + +# install vllm v0.6.0 +pip install vllm==0.6.0 +``` + +## Online benchmarks + +```bash +# Llama 3.1 8B Instruct on 1 x A100 +python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --enable-torch-compile --disable-radix-cache +python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --disable-log-requests --num-scheduler-steps 10 --max_model_len 4096 + +# Llama 3.1 70B Instruct on 4 x H100 +python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-70B-Instruct --disable-radix-cache --tp 4 +python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-70B-Instruct --disable-log-requests --num-scheduler-steps 10 --tensor 4 --max_model_len 4096 + +# bench serving +python3 -m sglang.bench_serving --backend sglang --dataset-name sharegpt --num-prompts 1200 --request-rate 4 +python3 -m sglang.bench_serving --backend sglang --dataset-name sharegpt --num-prompts 2400 --request-rate 8 +python3 -m sglang.bench_serving --backend vllm --dataset-name sharegpt --num-prompts 1200 --request-rate 4 +python3 -m sglang.bench_serving --backend vllm --dataset-name sharegpt --num-prompts 2400 --request-rate 8 +``` + +## Offline benchmarks + +```bash +# Llama 3.1 8B Instruct on 1 x A100 +python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --enable-torch-compile --disable-radix-cache +python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --disable-log-requests --num-scheduler-steps 10 --max_model_len 4096 + +# Llama 3.1 70B Instruct on 4 x H100 +python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-70B-Instruct --disable-radix-cache --tp 4 --mem-frac 0.88 +python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-70B-Instruct --disable-log-requests --num-scheduler-steps 10 --tensor 4 --max_model_len 4096 + +# bench serving +python3 -m sglang.bench_serving --backend sglang --dataset-name sharegpt --num-prompts 5000 +python3 -m sglang.bench_serving --backend vllm --dataset-name sharegpt --num-prompts 5000 +``` + +## Online benchmark results + +### Llama 3.1 8B Instruct 1 x A100 80G + +| RPS | Num prompts | Engine | Median E2E Latency | Median TTFT | Median TPOT | Median ITL | +|------|-------------|--------|--------------------|-------------|-------------|------------| +| 4 | 1200 | SGLang | 1564.17 | 31.98 | 13.17 | 11.93 | +| 4 | 1200 | vLLM | 1691.97 | 100.48 | 14.14 | 129.32 | +| 8 | 2400 | SGLang | 2175.02 | 35.68 | 17.85 | 14.41 | +| 8 | 2400 | vLLM | 2137.16 | 120.39 | 17.09 | 158.63 | + +### Llama 3.1 70B Insruct 4 x H100 80G + +| RPS | Num Prompts | Engine | Median E2E Latency | Median TTFT | Median TPOT | Median ITL | +|------|-------------|--------|--------------------|-------------|-------------|------------| +| 4 | 1200 | SGLang | 3005.24 | 53.94 | 25.03 | 21.67 | +| 4 | 1200 | vLLM | 2915.60 | 179.15 | 23.58 | 231.23 | +| 8 | 2400 | SGLang | 4064.98 | 58.11 | 33.07 | 24.45 | +| 8 | 2400 | vLLM | 3752.38 | 207.12 | 29.15 | 275.32 | + +## Offline benchmark results + +### Llama 3.1 8B Instruct 1 x A100 80G + +| RPS | Num Prompts | Engine | Request throughput | Output token throughput | +|------|-------------|--------|--------------------|-------------------------| +| inf | 5000 | SGLang | 22.03 | 4281.51 | +| inf | 5000 | vLLM | 21.27 | 4132.37 | + +### Llama 3.1 70B Insruct 4 x H100 80G + +| RPS | Num Prompts | Engine | Request throughput | Output token throughput | +|------|-------------|--------|--------------------|-------------------------| +| inf | 5000 | SGLang | 19.84 | 3856.01 | +| inf | 5000 | vLLM | 19.04 | 3700.64 | From 79794af52d90abfb00e73871109f0cdc4e0b7f34 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Fri, 6 Sep 2024 00:00:06 +1000 Subject: [PATCH 86/88] docs: highlight ttft itl and throughput (#1337) --- benchmark/benchmark_vllm_060/README.md | 28 +++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/benchmark/benchmark_vllm_060/README.md b/benchmark/benchmark_vllm_060/README.md index acb55f8971d..157bd9df7ac 100644 --- a/benchmark/benchmark_vllm_060/README.md +++ b/benchmark/benchmark_vllm_060/README.md @@ -12,6 +12,10 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ pip install vllm==0.6.0 ``` +## Notes + +We referred to the reproduction method in https://github.com/vllm-project/vllm/issues/8176, and added the `--num-scheduler-steps 10` parameter when starting the vLLM server. The `gpu_memory_utilization` of vLLM is by default 0.9 at both TP 1 and TP 4, while SGLang's `mem_frac` is 0.88 at TP 1 and 0.85 at TP 4, so we manually set it to 0.88 at TP 4. + ## Online benchmarks ```bash @@ -52,19 +56,19 @@ python3 -m sglang.bench_serving --backend vllm --dataset-name sharegpt --num-pro | RPS | Num prompts | Engine | Median E2E Latency | Median TTFT | Median TPOT | Median ITL | |------|-------------|--------|--------------------|-------------|-------------|------------| -| 4 | 1200 | SGLang | 1564.17 | 31.98 | 13.17 | 11.93 | -| 4 | 1200 | vLLM | 1691.97 | 100.48 | 14.14 | 129.32 | -| 8 | 2400 | SGLang | 2175.02 | 35.68 | 17.85 | 14.41 | -| 8 | 2400 | vLLM | 2137.16 | 120.39 | 17.09 | 158.63 | +| 4 | 1200 | SGLang | 1564.17 | **31.98** | 13.17 | **11.93** | +| 4 | 1200 | vLLM | 1691.97 | **100.48** | 14.14 | **129.32** | +| 8 | 2400 | SGLang | 2175.02 | **35.68** | 17.85 | **14.41** | +| 8 | 2400 | vLLM | 2137.16 | **120.39** | 17.09 | **158.63** | ### Llama 3.1 70B Insruct 4 x H100 80G | RPS | Num Prompts | Engine | Median E2E Latency | Median TTFT | Median TPOT | Median ITL | |------|-------------|--------|--------------------|-------------|-------------|------------| -| 4 | 1200 | SGLang | 3005.24 | 53.94 | 25.03 | 21.67 | -| 4 | 1200 | vLLM | 2915.60 | 179.15 | 23.58 | 231.23 | -| 8 | 2400 | SGLang | 4064.98 | 58.11 | 33.07 | 24.45 | -| 8 | 2400 | vLLM | 3752.38 | 207.12 | 29.15 | 275.32 | +| 4 | 1200 | SGLang | 3005.24 | **53.94** | 25.03 | **21.67** | +| 4 | 1200 | vLLM | 2915.60 | **179.15** | 23.58 | **231.23** | +| 8 | 2400 | SGLang | 4064.98 | **58.11** | 33.07 | **24.45** | +| 8 | 2400 | vLLM | 3752.38 | **207.12** | 29.15 | **275.32** | ## Offline benchmark results @@ -72,12 +76,12 @@ python3 -m sglang.bench_serving --backend vllm --dataset-name sharegpt --num-pro | RPS | Num Prompts | Engine | Request throughput | Output token throughput | |------|-------------|--------|--------------------|-------------------------| -| inf | 5000 | SGLang | 22.03 | 4281.51 | -| inf | 5000 | vLLM | 21.27 | 4132.37 | +| inf | 5000 | SGLang | 22.03 | **4281.51** | +| inf | 5000 | vLLM | 21.27 | **4132.37** | ### Llama 3.1 70B Insruct 4 x H100 80G | RPS | Num Prompts | Engine | Request throughput | Output token throughput | |------|-------------|--------|--------------------|-------------------------| -| inf | 5000 | SGLang | 19.84 | 3856.01 | -| inf | 5000 | vLLM | 19.04 | 3700.64 | +| inf | 5000 | SGLang | 19.84 | **3856.01** | +| inf | 5000 | vLLM | 19.04 | **3700.64** | From 62f15eea5a0b4266cdae965d0337fd33f6673736 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Fri, 6 Sep 2024 04:25:14 +1000 Subject: [PATCH 87/88] docs: add conclusion (#1340) --- benchmark/benchmark_vllm_060/README.md | 74 +++++++++++++------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/benchmark/benchmark_vllm_060/README.md b/benchmark/benchmark_vllm_060/README.md index 157bd9df7ac..5a1247c5f4b 100644 --- a/benchmark/benchmark_vllm_060/README.md +++ b/benchmark/benchmark_vllm_060/README.md @@ -1,5 +1,43 @@ ## How to reproduce the benchmark results for SGLang v0.3.0 compared to vLLM v0.6.0 +In short, with multi step enabled, in online scenarios that we benchmarked, the Median TTFT of vLLM is **3 times** that of SGLang, and the Median ITL is **10 times** that of SGLang. Lower Median TTFT and ITL are better. vLLM's multi-step optimization did not improve throughput while ensuring lower Median TTFT and ITL. Also, under maximum throughput benchmark, if vLLM does not set gpu util to 0.95 separately and uses the default configuration instead, its maximum throughput is **lower** than that of SGLang. + +## Online benchmark results + +### Llama 3.1 8B Instruct 1 x A100 80G + +| RPS | Num prompts | Engine | Median E2E Latency | Median TTFT | Median TPOT | Median ITL | +|------|-------------|--------|--------------------|-------------|-------------|------------| +| 4 | 1200 | SGLang | 1564.17 | **31.98** | 13.17 | **11.93** | +| 4 | 1200 | vLLM | 1691.97 | **100.48** | 14.14 | **129.32** | +| 8 | 2400 | SGLang | 2175.02 | **35.68** | 17.85 | **14.41** | +| 8 | 2400 | vLLM | 2137.16 | **120.39** | 17.09 | **158.63** | + +### Llama 3.1 70B Insruct 4 x H100 80G + +| RPS | Num Prompts | Engine | Median E2E Latency | Median TTFT | Median TPOT | Median ITL | +|------|-------------|--------|--------------------|-------------|-------------|------------| +| 4 | 1200 | SGLang | 3005.24 | **53.94** | 25.03 | **21.67** | +| 4 | 1200 | vLLM | 2915.60 | **179.15** | 23.58 | **231.23** | +| 8 | 2400 | SGLang | 4064.98 | **58.11** | 33.07 | **24.45** | +| 8 | 2400 | vLLM | 3752.38 | **207.12** | 29.15 | **275.32** | + +## Offline benchmark results + +### Llama 3.1 8B Instruct 1 x A100 80G + +| RPS | Num Prompts | Engine | Request throughput | Output token throughput | +|------|-------------|--------|--------------------|-------------------------| +| inf | 5000 | SGLang | 22.03 | **4281.51** | +| inf | 5000 | vLLM | 21.27 | **4132.37** | + +### Llama 3.1 70B Insruct 4 x H100 80G + +| RPS | Num Prompts | Engine | Request throughput | Output token throughput | +|------|-------------|--------|--------------------|-------------------------| +| inf | 5000 | SGLang | 19.84 | **3856.01** | +| inf | 5000 | vLLM | 19.04 | **3700.64** | + ## Installation ```bash @@ -49,39 +87,3 @@ python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-7 python3 -m sglang.bench_serving --backend sglang --dataset-name sharegpt --num-prompts 5000 python3 -m sglang.bench_serving --backend vllm --dataset-name sharegpt --num-prompts 5000 ``` - -## Online benchmark results - -### Llama 3.1 8B Instruct 1 x A100 80G - -| RPS | Num prompts | Engine | Median E2E Latency | Median TTFT | Median TPOT | Median ITL | -|------|-------------|--------|--------------------|-------------|-------------|------------| -| 4 | 1200 | SGLang | 1564.17 | **31.98** | 13.17 | **11.93** | -| 4 | 1200 | vLLM | 1691.97 | **100.48** | 14.14 | **129.32** | -| 8 | 2400 | SGLang | 2175.02 | **35.68** | 17.85 | **14.41** | -| 8 | 2400 | vLLM | 2137.16 | **120.39** | 17.09 | **158.63** | - -### Llama 3.1 70B Insruct 4 x H100 80G - -| RPS | Num Prompts | Engine | Median E2E Latency | Median TTFT | Median TPOT | Median ITL | -|------|-------------|--------|--------------------|-------------|-------------|------------| -| 4 | 1200 | SGLang | 3005.24 | **53.94** | 25.03 | **21.67** | -| 4 | 1200 | vLLM | 2915.60 | **179.15** | 23.58 | **231.23** | -| 8 | 2400 | SGLang | 4064.98 | **58.11** | 33.07 | **24.45** | -| 8 | 2400 | vLLM | 3752.38 | **207.12** | 29.15 | **275.32** | - -## Offline benchmark results - -### Llama 3.1 8B Instruct 1 x A100 80G - -| RPS | Num Prompts | Engine | Request throughput | Output token throughput | -|------|-------------|--------|--------------------|-------------------------| -| inf | 5000 | SGLang | 22.03 | **4281.51** | -| inf | 5000 | vLLM | 21.27 | **4132.37** | - -### Llama 3.1 70B Insruct 4 x H100 80G - -| RPS | Num Prompts | Engine | Request throughput | Output token throughput | -|------|-------------|--------|--------------------|-------------------------| -| inf | 5000 | SGLang | 19.84 | **3856.01** | -| inf | 5000 | vLLM | 19.04 | **3700.64** | From ab4a83b25909aa98330b838a224e4fe5c943e483 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Thu, 5 Sep 2024 14:30:26 -0700 Subject: [PATCH 88/88] Optimize schedule (#1339) --- .../sglang/srt/managers/policy_scheduler.py | 110 +++++++++++++++++- python/sglang/srt/managers/tp_worker.py | 21 +++- 2 files changed, 123 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/managers/policy_scheduler.py b/python/sglang/srt/managers/policy_scheduler.py index 04169e80861..3a70bfe5482 100644 --- a/python/sglang/srt/managers/policy_scheduler.py +++ b/python/sglang/srt/managers/policy_scheduler.py @@ -108,18 +108,24 @@ class PrefillAdder: def __init__( self, tree_cache: BasePrefixCache, + running_batch: ScheduleBatch, + new_token_ratio: float, rem_total_tokens: int, rem_input_tokens: int, rem_chunk_tokens: Optional[int], mixed_with_decode_tokens: int = 0, ): self.tree_cache = tree_cache + self.running_batch = running_batch + self.new_token_ratio = new_token_ratio self.rem_total_tokens = rem_total_tokens - mixed_with_decode_tokens + self.total_tokens = rem_total_tokens self.rem_input_tokens = rem_input_tokens - mixed_with_decode_tokens self.rem_chunk_tokens = rem_chunk_tokens if self.rem_chunk_tokens is not None: self.rem_chunk_tokens -= mixed_with_decode_tokens + self.req_states = None self.can_run_list = [] self.new_inflight_req = None self.log_hit_tokens = 0 @@ -136,16 +142,14 @@ def no_remaining_tokens(self): ) ) - def remove_running_tokens( - self, running_batch: ScheduleBatch, new_token_ratio: float - ): + def remove_running_tokens(self, running_batch: ScheduleBatch): self.rem_total_tokens -= sum( [ min( (r.sampling_params.max_new_tokens - len(r.output_ids)), CLIP_MAX_NEW_TOKENS, ) - * new_token_ratio + * self.new_token_ratio for r in running_batch.reqs ] ) @@ -161,7 +165,29 @@ def _prefill_one_req( self.log_hit_tokens += prefix_len self.log_input_tokens += extend_input_len + def add_inflight_req_ignore_eos(self, req: Req): + truncated = req.extend_input_len > self.rem_chunk_tokens + req.extend_input_len = min(req.extend_input_len, self.rem_chunk_tokens) + req.fill_ids = req.fill_ids[: len(req.prefix_indices) + req.extend_input_len] + self.can_run_list.append(req) + + self._prefill_one_req( + 0, + req.extend_input_len, + ( + min(req.sampling_params.max_new_tokens, CLIP_MAX_NEW_TOKENS) + if not truncated + else 0 + ), + ) + + # Return if chunked prefill not finished + return req if truncated else None + def add_inflight_req(self, req: Req): + if req.sampling_params.ignore_eos: + return self.add_inflight_req_ignore_eos(req) + truncated = req.extend_input_len > self.rem_chunk_tokens req.extend_input_len = min(req.extend_input_len, self.rem_chunk_tokens) req.fill_ids = req.fill_ids[: len(req.prefix_indices) + req.extend_input_len] @@ -190,7 +216,81 @@ def _lock_node(self, last_node: TreeNode): delta = self.tree_cache.dec_lock_ref(last_node) self.rem_total_tokens += delta + def add_one_req_ignore_eos(self, req: Req): + def get_req_state(r): + new_token_ratio = ( + 1.0 if r.sampling_params.ignore_eos else self.new_token_ratio + ) + tokens_left = r.sampling_params.max_new_tokens * new_token_ratio - len( + r.output_ids + ) + tokens_occupied = len(r.origin_input_ids) + len(r.output_ids) + + if tokens_left > 0: + return (tokens_left, tokens_occupied) + + return None + + if self.req_states is None: + self.req_states = [] + if self.running_batch is not None: + for r in self.running_batch.reqs: + state = get_req_state(r) + if state is not None: + self.req_states.append(state) + for r in self.can_run_list: + state = get_req_state(r) + if state is not None: + self.req_states.append(state) + state = get_req_state(req) + if state is not None: + self.req_states.append(state) + + self.req_states.sort(key=lambda x: x[0]) + else: + state = get_req_state(req) + if state is not None: + for i, (tokens_left, tokens_occupied) in enumerate(self.req_states): + if tokens_left >= state[0]: + self.req_states.insert(i, state) + break + else: + self.req_states.append(state) + + tokens_freed = 0 + for i, (tokens_left, tokens_occupied) in enumerate(self.req_states): + decode_steps = ( + self.req_states[i + 1][0] + if i + 1 < len(self.req_states) + else tokens_left + ) + bs = len(self.req_states) - i + if self.total_tokens + tokens_freed - decode_steps * bs <= 0: + return False + tokens_freed += tokens_occupied + + if req.extend_input_len <= self.rem_chunk_tokens: + self.can_run_list.append(req) + self._prefill_one_req( + 0, + req.extend_input_len, + min(req.sampling_params.max_new_tokens, CLIP_MAX_NEW_TOKENS), + ) + else: + # Chunked prefill + trunc_len = self.rem_chunk_tokens + req.extend_input_len = trunc_len + req.fill_ids = req.fill_ids[:trunc_len] + self.can_run_list.append(req) + self.new_inflight_req = req + self._prefill_one_req(0, trunc_len, 0) + + return True + def add_one_req(self, req: Req): + if req.sampling_params.ignore_eos and self.tree_cache.disable: + return self.add_one_req_ignore_eos(req) + total_tokens = req.extend_input_len + min( req.sampling_params.max_new_tokens, CLIP_MAX_NEW_TOKENS ) @@ -233,4 +333,4 @@ def add_one_req(self, req: Req): self.tree_cache.inc_lock_ref(req.last_node) self._prefill_one_req(prefix_len, trunc_len, 0) - return True + return True and not self.no_remaining_tokens() diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 8fc03b85991..d914a71c27a 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -221,6 +221,7 @@ def __init__( ) self.new_token_ratio = self.min_new_token_ratio self.new_token_ratio_decay = global_config.new_token_ratio_decay + self.do_not_get_new_batch = False def exposed_step(self, recv_reqs: List): try: @@ -253,7 +254,13 @@ def exposed_step(self, recv_reqs: List): @torch.inference_mode() def forward_step(self): - new_batch = self.get_new_prefill_batch() + if self.current_inflight_req is not None: + self.do_not_get_new_batch = False + + new_batch = ( + self.get_new_prefill_batch() if not self.do_not_get_new_batch else None + ) + self.do_not_get_new_batch = False if new_batch is not None: # Run a new prefill batch @@ -409,6 +416,8 @@ def get_new_prefill_batch(self) -> Optional[ScheduleBatch]: adder = PrefillAdder( self.tree_cache, + self.running_batch, + self.new_token_ratio, self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size(), self.max_prefill_tokens, self.chunked_prefill_size, @@ -416,7 +425,7 @@ def get_new_prefill_batch(self) -> Optional[ScheduleBatch]: ) if self.running_batch is not None: - adder.remove_running_tokens(self.running_batch, self.new_token_ratio) + adder.remove_running_tokens(self.running_batch) has_inflight = self.current_inflight_req is not None if self.current_inflight_req is not None: @@ -428,11 +437,12 @@ def get_new_prefill_batch(self) -> Optional[ScheduleBatch]: ) for req in self.waiting_queue: + if adder.no_remaining_tokens(): + break req.init_next_round_input(None if prefix_computed else self.tree_cache) res = adder.add_one_req(req) if ( not res - or adder.no_remaining_tokens() or running_bs + len(adder.can_run_list) >= self.max_running_requests ): break @@ -700,6 +710,7 @@ def forward_decode_batch(self, batch: ScheduleBatch): next_token_ids = next_token_ids.tolist() # Check finish condition + has_finished = False for i, (req, next_token_id) in enumerate(zip(batch.reqs, next_token_ids)): req.completion_tokens_wo_jump_forward += 1 req.output_ids.append(next_token_id) @@ -712,6 +723,7 @@ def forward_decode_batch(self, batch: ScheduleBatch): if req.finished(): self.tree_cache.cache_finished_req(req) + has_finished = True if req.return_logprob: req.output_token_logprobs.append( @@ -720,6 +732,9 @@ def forward_decode_batch(self, batch: ScheduleBatch): if req.top_logprobs_num > 0: req.output_top_logprobs.append(logits_output.output_top_logprobs[i]) + if not has_finished: + self.do_not_get_new_batch = True + self.handle_finished_requests(batch) def handle_finished_requests(self, batch: ScheduleBatch):

gb)djgzjXWF?m-Ud!sXoHsZV!AD`hfZ$-1-6jgXt`lW$ym^W|xgo zNW_C|#AyaFy*Y?gOEWFzNK4;QBfW`j34oR3VGfi;x&>*ChZEBdTQ`8aoP6qY&m zu#KxDoICjAQHPgfdOq{v@utsO@jP>6^d=CCucHxqOfNu!_A>>ynw%E7503p@;__iV zYgLRUd8u{AJ}nnt3V3fXn{oXIt6(8*=Y)UpYuP-9TTFtwdWAPL2jD*1>e zt4IKIt}=S_HN;^a!CY?Gd5f_DlL29Fvt0BrT6O5T-|h>dJI=JX#VKD~9$UQ@`vU6c z_+9|E(HZok`cswZK@c@^U*Aaqg$HeOivzzoTnbCgNE!**21hHnP=>rKBf;R|2xV|b z(5`s5k~G^objo5R;NqL^P*W^n1#YH`o zw@MkJtzs(6f?x=3_$}2^HLJw@#F#+ui1b||{KXA&bAJmz$`Rj1L~2i{HxyX+qdxyM z!dMi63|Cx6Vk`w2faMR|RGx_qRBX8;2LCg$H9lHwIyZgUzYbha7G_@3HKGH$6V zaG*|?1XG%CAXZv1eJ8^VPspOwwMN19TMr)^Fnc~#q8kU|K7+nFgc1axdux{a;}cK) z9VqQ9^px7%IBy*2-Wn}BYdg8I+C&_uub(c%P-RQy(kEeY2jAW6dD_bMMYHp2IkF(C&##6}(1)u|S3E#wlz{YW+6R#~k-J}b!d?vz4I&lZu#;2Sj?l9kZ z=W{C%-f)abo{jm|)>IRO1e2qDdX$uUu=`1#HYL6=n<+47Qkk?iD|+1g6#tOH=hPFl z#BDi^#nZf(+v_gu#&H$ET6&{Li%hD7l7J;4xWeVAZs2;Uuf#~iW965h+j=ne&O&Dd z!>dP!(jm?&5&ip+GpHP*5>6$s=u{U+JW*wG54BcwHeJ8!?yNO?AB`v^yr5seiuPQq zz2q}WQz~7X3suqvIw-c;sSt z{W3;Zr^3qG-*R$aqTqR4*~*6J)iwSfc?Qra0mZIM*zu<}#k#9Qd1en4swcebgfkSQ zH8K=pe{|p7FWu;SxQXPQhQ6;bDZngkcw{5Lu5$P20wd=1l9Nlq-AH3>fAd!s@}8aM zAo8ZwyKl`Mi3~Km#m}KK0gyj*W1^l^(JR1xrr+kd;gnb+BZkq?pW^yZ=lH}kY~?M^ zMsB0z9k6OiBED+=-bFL2kwbs}hoeXO6`FxUz!dM5h@Tt+xniRqa)|FSVzQiN>Em{^ zt>#1_NNIu8+tpyx-xi+06pJQgjP)^V z)=_t%i>g`)DEE3-{$Er3YD(oin zwNXi5fk0SNzNB~Z1CKQ@TS1)z_P7+=Tsw;2fu@B#M-_XFzS+V-_HYJk^H_5o!oJIt zrLj(-h@BD7mH2u^5Dlq z63R!hWI^yj-^i;KD_W;nDWA;-?FhMg-vn=1Y-w2!$D7}^zR;^W%|}b_f!2VqPQUF4 z5cD~%#b|S>g0iX1CWtrtye2XM?p12Zl^Pf^75{2-P z7lqum2TLl9ZEoWC`udt9g-E78=joQa4Gnug-jfie;ktCf6H4`#d!lN`6@wGiV)@I==_M7NV&83L!?A$ zwy#tQR7`vd_VhSCSr4n3U9lbCVQQj!k>^{p(>_s|so%R(?&Iyb53zeg(|eKr0?aj}tT^glX`e}%j>4=)h~ z1<(oSy!zkG`X5oTU*E5!sOtK`kn?Gi}> zxokgzdb9s1jngk_^4ZJ&s6^ghN|f}}`1JRdpVzC&zf#Mbys{|&q(s1gd@XrrO051U zjXoev@~Z6m@5kK~r!mRsR$0ueU;uB=-82mzll!L%{Js(ajs9?J=eME-%S4h+3Cz%I zZi@PfcFd0riX+v0X63iuQ%$UA)}h(uq9! zZ6k?K{62iiX+Ng+eEdAM?Ps+lPvK-C29&8%fDvzh3)nM*kTCj&6n_~Tm z%h`ZM_mX|$&p1*}4V#WA=1r3vcBRrZO6<=c{%sl0@&BQ-iNJnsC=kB3SOm#rw5`6t zC5~t*#HqhovtlYi!e{co8l&iU>L1u%L?>yA8AhtqZ0I^1??bd}?fl%36*PC`Xi8e) zWpj%~CTX7Oo#9N127Go#NpeZsHEWoXww@j7OhpEXY_)CeK*`vExcBZGWqveY{1e%`S^Arh8sca+gGl1D z{OBhU-d`@0P>pt@@TN$@)^#g7W0C?eW&>%mObInE(RXC;c6H8o&36fVF=%vag|m6N zlMC9sm5Vzli3j2zH0)IEE_)37`w03dhk53XL&rta{x@#sv1a8|NSi#Z^z7>g3)Xzz zCoz;GY1-F148Mgs*bcVv7xmW{E#?)IU#FXH_T=>-rNf?n zqL)n^I2?OU%Isp}R&>G~{={0L$^%<|urbB#lc)6<`>81E;uSL4Lrp8GmD1(;jf5J9 zW>`|L4u9|UshiYEOWO|fBlM2t@wX-})+@pr_B$mU=eE1h5&NXi)`NWqp(RnOjltrXtreat13r(r?k;zaYf%4AkvMvZ^^#p^# zWA!^id>l-|1%jcWIMZe(p%!-PY5Iq8 z{Pwd*AwdcNdPgwG;%slMFj{m*N*7yn(O$lB)R`<6nBZYQIzE^2m+PFCrqMrg3TM&g z=m_PDSevMjl!?iJ0I5E#9xj`asS7;xqHo@Yv+28o50!QKoLAD;Fg21_?=iED70-u; zP*;QjNRG_*?I#lcpYs_Y)uoI}cUnTwB_7df(r|^>EeZ?*E;bQ#RNnB?F>T_s^|2C6 z*;9e|@42ZSy#_NTao+kRR_STntEl={v#r4v1YecuKTPzXm=yV=hpL@0Wb|^T1TH{& z=Pk&lOYd76hV=R~oiMeM)7^(O2$5wzOi z$$E~ePL0tQ1PJbULHn1t7-U!iy}ah98wq+Ku6GH!NaN@f@uXuOzdwN;rc9)HzWabN zp0FSmw|@VCHBPNyCozqq&1$vPr@JWViG*yOWr!}&Nw9!~ObuM?EoO~}haP(zvvGZL zK4WYZu0327Yu$#GmASeldK)nf?pqvY>BBYD?Ul&{HX_CDby#}0&cX7H`_Es?OulSM z(@7xYa67Sk55^zjUg0jqEY%AvBteG0A19*PeoJa)lpbsoIB$&tlf) zp!gZk&bk_y8Fn9et2aKU+H0{VKAcXi1*nIkjf{stCBl1R05qbVE&hzTP4C##z zU;JZyR%+y+B3A`|H{)3~(L5Fym; zBTzk<05n9eYW={ghMFYEDjrADIT4E>*Xc8j&IedsZ-scSs)yqhT^H%k%9F6`Xg1~4S0zp00kdbK^VY?u z1z#8~-4t)EO_>p_@ax0paUOdrRkm?2g?;u*6J8jcybXUY>++t8F2Ixvm%u0!uLe4) zLs#Vwuxo~^uxpm?tR$Vhla-0D+%lV4dc^3zo2Cs_bHg@)0A3HTUkyAkX7ABmX#Qzq zyjXO=wtcQ!#95>HpT>CbuQ4t<(Fvr7bS)(Xv}dqlfXq#|fsNVW@E3*#S?b4>aQV1# z(@M`Hu*VadIGk*hU{=S;82iowd*l|G_g=*Z@%LoX5v|1Oa#`Vw%#TafCQB6Wvg$mY zWt7|HG3>|=PHHFx zL7hLJggA5X*0TuUritq>RUi##u-FiEz@fSM>}L*4hRl&s6QEBuke#~%@^FzM7DeKh zz$x|pHFv?FBP|AlS2km{{e)u9zsM`}TL1M&wLS(K)Y%bKf|cwwFq^R2OCLI?a-wAR z#}DPqZCFs_Yp?8ygEFMP4BJXJzD{TOa>drF1I!^?wp5xLc%waQR+8R2L~x+c&ExoR zdNRS`Wa*K>=S}Yj(8xQa{7Hj)y}j&b*!8suk8ETxM2?byT;iq;wSR7s|l}cSY(KR`|su@+EZzvKlCh zm7=E0W>Y;7hvn~lZELmX2W29>SKsQ0aJ{-tje_d3y*IZ7X=JI0zK&ZU_)IBa3(uDg zedfm!!_#DHN*|u53k%x((WFvsdgX~uX`$tg@6_3|?(pUhce=PFc?Y>3Y4veMyUKuU zC!aEWxCb!DAEe9I#?o}Li7xe};DL5_#K$P{4Dfd!9-HDZn!T0!Ck1MGkDLvGC{8}3ww^zLWfUkv<8 zJjX$v;(?7@P?^}pev`)tKbzgGu}K1b$z})uqlc7Bm-Qt37i|1>N?*v0I4o{ZszR@i zD1(@Xp17ZpWc;@B55bAj;7+(JZ+f~MVc8Ixz92|7$*hRXncis`ml}B2Y zR9DCaZf(R|dfN6Y%;&DA8!6rL{4}rrij| zbx*+1qN++VyD-q9sGD@JI*New*1*lnQsK?gY1%uWV$YVR0)}JJhLQcL0Uf@lQ|*{N ziZT0>-;28-SNy}Pw?m0nN5+VI;V%$gFWh&(c_G|Z1DZjIYv_W7cRP!asqi?wrTpe+ zqC7D91xCfhm$%^N7WaCRR&AeeapN4UK;QN`-Hi~2IRs-4N8H$K6qiyq-ask&fIze& zyeOl{i_Ha|i)!hk3YH6wxFLWB)rSl{4@0v7&1Sy${NdT(e zdWh*_kd1m@uNQ1?zcB$5aaimGGTd6Y$L@}#n*!;uQHh3o_C4;it@7Z>luvV=2>_Vf z{D}hCy3)-=!DH(Jc)11joRPTGk#gA|-!Cy#fBrnq?e#(VcQ{X*DDtjTeQocw`cU6B zPsf?UbNryd(08{zxk=@#kuexkpLzm5LJihSa2GD=+yMB_7MaLM>*~g-^Z#YeDT%b?M<8*`djilQf*IE|P8M4HyI~(W`dqA5O>9A&x1x z+9^6XqKX``9^9?Cft#*5^R*PIm=xITM00zSY%U#?f>F|o`4bYyGOU_K#l#^%O51}3 z`aqIn?kl?3^`r2HU5_mJ<&5Wfw3T-aOMG;)hjPJ3OCO)+Xy)1_oXB0bjHvnCf^>FX8;l|m8kh?^1J z<4d2Q{>l8%$Z!2NjzN5Kh_4ns@nK!jY{xmT&;0yT*jZbG@0IaT?s*;EKEr*sQmxS` zUTkNpNi6=%qkHqOoQv;zMl+Q~+g4={!Y56o69r%zts$qk52I&fBPXekJ?BICWHp#ut{>UCveZv+H}Y28z;|A;+nZ)-fxQEB^TqS&VON&&waZjMQ1s&# z*{EXFUhjs!0EDzuiFIy-p*<Ry-Tmlfv+qxNmxg1}QtnvPu{JfeME=9K9+FVd?&dmx1i zbl1;*unTbhQ&LR#rQh_`rtK72ezi8Lb$cw6a<0bjMSGtyT|${@cZ`7si!P1N>0U+c zNMV*^?Ttt_^_Kx8XOo1Y0254CA9$l<{w%+{%6U)@_LpmdsRu&+|(I%Xur?M8i;Eiwb5<)raixx22NO<0WZr%}wT6i^lQL zTezpX=!1WH(;XIj@Ql?%f}6R?eMbtara}&`+-e6LhP?R?$Y(eyKe*UQ5&2_Q$(<+F$|9Q`zP*KOLT8H`Kwq<^(+D>1u|7Gm2IDH`VOeA*y z*fjQJmHk#57lB8Y4MO3X*6WrKvl7|@{kjKs;fUUlYTT!fM%1NQddOSOyOa;?cFMAJ zASnm=vQxz&Zl%jQ`uiDtukd8mP$L#j5!%^fnfVOfZ^Xk3l%8X93pTwGEI(dmbc}DS zTeqk1p-e}HKjoLe8RL*g$TQ{fJi}wyCyL&9x{(5*sI0YuTmLJ|oKyk717-Hbu?-jg z={uNAi}e>~%7&ku_-MtpK&RZs%0KzR<00Ry)LNyCNYl>9M(@E4dy*rnC>w1~SsO>X zg9W4{&ok~O#`?+oJUoK1wV&4R(D`ic?PZGzO7*SZC?RRbML~c=;wdoLS{>=fuwAy1 z(OmpQ&YWS|9jSenJ$@6SyjYs!cafe_eu@8{9p^OLbXLhkrCSqn+&k5FY^p&L;h$)} zt5%VDnAEcMz_OMZCTNRBu718OK*?#Xq}S}Fq*nrh)^sS;d9Qyk?MalCsns&*i5E1E zqYN=G7EK`NB5mS&i(_Q_uY`j%GKs&btxDU6|$M! z)HAf$72QUD-TC8qIiv=O2KS$cwLLy|%k3c2N7#Nb?M;;LNqE7YsgMwUt0h=EWKA1~ z#7ZB71{PP@`gn<-WR`Ko_Qw8=d%$% zfmqk&2|TRU{y~RF9C;>e8^aPSps$d~%OXhvR;^Su?R*L%d4!ZzD>Mvpk*c(u@@PXkQyVIS^vjvbo!!~sD^?Q(9!@0h?Z z=usiGm#SaWG78vq7JHiMLxNOy%{?Ay3I0)T`rrEys zR#v1`aq2Xj3;a4ut*F-NFgG=x&+%2Y%leabj2C@AUVjOr$AUkJj!ZNgWx8DKfXXkn z+#45gvel-yynh(ole#nVVl@p?qc8w$3UJ%3JDM>$x`LuvYFD1Mnab!P|4RY?M4SOZ zhTo#hZ?c9uV1%kI=%!dWz4U8>6!d0P1yIwjOe}4BJQs4-9S&@H;thXh{1VQvl8@kp zt{Oe~g^o7N{uj|emDF(m9PHMQh#^!^UxN>JV)Pbm4!R5U9?=y)yAD!kn}Y-anB4Ss z>1^r<)&slI^*(~Bt|yg@+h5J_{Qa)KL!!UFh+g=;`a~f+Y@@wSXxbgxSWnI8KDbUu zDQKtE`k7+U1&s>%cvO0)Q1Blh+<#Bv(@(!P_GKXB_$6BuLq&GyvzBVB+F{@t-(G zBEcE}R8E>OX^Q&8CV0{(m*PA}>AUxXeJx{@OKAVp!pACUL7shS@2`{dbICme7=kOD z%;zyeA{Qji*p~n6T>yU*^3G@}IHDAc3%mV9g;(xPig4jyYgy75k>d?7eE7TNFma=q zgZ?m6#Zlt(TN6ZtuM2`+-9yrgaSguL{v&@^dG$K^QEJES2^N!ApY16rpEj8W1zctR z@{1n~lQmM>WDBzn9Hsc*51Zz@EtSZ&>R36l#(_^H7>th&TpE3D@}2&E_OYVKuD0N{ zQO5sW>VL35v{}CUkFpC2p9%vM+Pk4@Es{Ul5NQU!FxFQ88tP6l$FJJ`f*2b7Id87I z7L}ckzxg(l(AfQK5Cx-iv)V`M))(fwqI>Io_Rf0oDvDmdyUJlT_sus|Y>rcYA%KYP z%ip-Ie;e-Kj(o+8PActPo>_wsr@;jVRWmVjz|tx?#B99h=WV(xLBf})g8xLV`g-{F zUA3z=gfA8;3n{)nKcD9B(vTJbbknP~*|y>Hp^bk#YTtVR=VM?O^AEoJ-<96f1z4nJ zVQ(9uKXtk5O&Xe&OM%k=E_?pb|7+y`k39*zd10sI*P9n6-6gNyc|r@kdBHXOsTScc zUqKUj@mI3GE(j71MkwBz^1u8N%&HdrZzK3WjzpSCiCli|@~uOxvzxpM3CHs~?m)tM z=vottzW};yaHJ!khvSt)fIu(MJ!(f-A`$`bhbY6ix_#-)@N#hBd8W9f}H5M)ee z0U67@gIoo68f{p-X8F|%B9&K`%G}QCy&Trj+*xYrA5N;w_f#RWYDxFE0Z=eA+yC8p z6e|!E*}kE7nC(peIYnCj%T*AW_!(gzWh3YEwW?V@@-{x&-Q zTk@ZR*DeasWP1e+$ZhO+b{1{|L8tP?T^;!sy*KAAixU1(wqyp@E5r z>Y3H#m*fQtX&j5A^G&mT>J#n7pG-_*l@KotS& zkONfIc+uo{j(}g^&86XW&MK#^l=v4uqLc19A!}nLiq+0%FK605Gf`K70VLs0gjSJB zM`E3a+0SX}`Mw`bJ;WWcoHblFKl5+18_2Qg)zXLs;^;4JFGe&=hqsRyVx4>Ljf&*) zSj)WIo;PiP`KOx%x`YcTDvSY}UF6_ffw?+m=&FOa_Nitw%~Pfw#q%kv8$%_5p2PEw z;J&Z*wI>7D999OUwTqqJoxEPKXGM2!Fsnbsc)tWNL0W`jEF-+a#yO#dFV`z_|8RjB zd?szd=e82b>v@A(`03s%f+0&aJ(N|qybIXm8&P7>%NNb8Qu%c-PgVCr3?qz$YWK^f z;JaP%d^_=|e~e`ldGL-%iPPyOrzhOQpQr|qW(k=D?5eCHt$M0o7Ta2+H$DssW2#>Y zegllfw~-<%kx$ZIw`^}8Sr>HfE|-|@uT_<=oZ^n88ed*A{mxEiw)8WJ-gexTWrIx^V$(6O+S`Tq6_gJj6sHj<~#^tdNs z8-nlyw$HF0W?=7yv-=oWsFQa)^LgVVtc`MGiWp2y%qQ z-J5djs_M2bsHaVGS)r+M*^JsG^kR?0<2MqauE}*|jQ0EW`U-HriFk#Hvteg6Gr*YX zs574OOPhDcvtGJ77mb~QPV33|e?qS-5))<#>$PZF6Q z>BHN z?gRlt5d6X{5bLj3CJ49yn^4Vn3E4r8RG{z}! z2Qp!bS-`Fb@!*B%=rZV;=<;Ayj?Gj(DjygS<0{#vS3EZ7+xtyK(NSsA;hhvvwFqDx z)bV2Vlz4!n`&WqcO{CI1D2h#=R)y-F$3iP?Y%!mSB>cLi3a{NJW zsjjV_izvHE$LYZ@pxRZ(529Nk0K7M2WCGxZ0n!0 zjqpz8ybpl;uGLMZuENr8Kd1dzp8JVQ_V>yn7XhCO>#mHX@m zHY``0L)Ls}fQc3WUyZP$SUUNYL5H;^WjJmHD#}jn5@S?IcR#xXGG3`24!^FQCo5a@ zT4KJT(LWp{|6tMm1~uQ)D`bqJks0sx&j~F_@8aW*c8eSP3c8zS3r4=tA$3c?XgxP+ zZ#Ha|isvm_XN+V~)~j}2H`QIex&m6p5$k6-%wHANe|z|A@?s{n#Zx8@P=Iq7C_Kb4 zD3aYz;5JDa%-xEUgY%2-ujyF8p329maoDaYjg`$(A08L9FfwN*AmINWVQ(20*S2&E z2X{#D;0YGo-9vz2!QF#h^y$@{KKef!T%tH1XlMWyggCSxbl znza#+0(p-v^~H{SjN8GCiz;N+tD|YbdvcI->LwXza!UF@UE(d3$Lw1p5v)~x)y#+r zY;~qn7z-X_fnEANc@*u|W?-{`<+-gS&{k6xqGb6n?<;<+zvi3BU#I>=u+Du;2G4Zd z9>-*E{dQaXDHWaguC?7GmR8mZN(SK>_ip&K_O?Sw?9Tgm(W4Ort!wv)%H%bAs?HGf zVE5h76Cn{f7ExJ_8B6KpEI7CUy%YquFCi^qfu>5d^kS(#Fs8gDjt3AEhHv#!%i1+x zie)l|$N8qKcGvYXq&`w#NThy)!+dblUhBWAY(N;zrdG_Cvjn6C8t(50sODO&qZrwk zOl$AKt+reA0H5S{mwG&>8_5-7aCJ!Eo+1jA+-@Z?k_Ehq8Z%oz03gopIANwbjy|4+ zp(_NVuB&!XFpOX*adP|B*~jj$vupFUR{TZJOT`P4-Y5-5h5>utF7M8rv|6nyZFJiu ze?`7LKJjUD5<>(K>NqM;Zkc?6PReWXZC4g9SAhsuK2x~V<8@x(U~;u0QQOKm8TdsP zVn0A_`0f*!KVC3GVR^Hxqg+2s?3!Y>8~#%0-N&9t>IkCx*lckr>67xNWS{wQ+3+WG zQUuo;K*QeZpg9xgW+P5<$4xQkXssFTasuL&G!=yxE8qt~q|oyzFGR za4pIDM(x@nSI&7_=D}(mO+uh)Gsh*au++Qyb=Mad z@%$;{5LKiN0NwhpqHYc*>$UAqi9g<7kQZLN!g-u+p(+>lA3PTEeOk>z;WfMB`5Zqypwj6bwE>9~FQ5<0$S#bqVCN6;^H z)nuHa={xVZH!jf}A>`=;(R^qc_MI;Eomcx)AqJ>S1Dt%|0TB1jJP7)dyUM4SFv1_% z9jelVLE2a3r+6!YA+~w~LpP8oyP7Uj1D`P}OpvioFOi5*&O5g2VaW2REK6cc_0Djg z*#sySAM_j)vB$o*U7Cu`^MRxG-J5y2IuSi!%DH{L#yQZ^-5>Ek$}$+d-k;* z>ER~rx?>8Ug;UKFTyHbS%+jXWFNmW2E?+L)G6SEEYL6o?fybIB+IpsJ!L+8l$$&P} znVEdj->9pn_mB_m@UT?dE#7yADPg@EEtUA%C%LE$qTA>x&+mMvdz|{X45%N_0DCD| z#l{iyqnacD1_v3PdQAl`{due{y|Rw*%w7a>D+3yVkX7ASt?-P|Lnq;R?#k84a;w7r zl$W>+F3W4*r;G9$n`KSOd(ICu@)>pnmKSFQRhC^cbh>ZaZ)4^*wIh1g2tC#p4oCes zEF^cQ_IyBheEEAV$5ro1Cp>;f?f}YY5d&eTr54&;$tV{y+ik8Hxta5m;0PMH_H>Vm z{JdL?v*yQr6~DPE^EeDM?N4FCm&!HoUue}oDv98Jmr0233c*ZWNq~|S{}x%WS*`+9 zdXmO+o;rO`Puq?rs={D0mwE+dI%^Z(!-VG?hRs)8zNlf*6&n;)-Bqz7zGTv|RF$?qu6yGNu8DvGTpt+@ zr!%~`OWwvFj(|Apc1? zvhDngQvbKXVc-({!k(a&=4(HvCjCJW2un5?ea}X;CdZ-Iu*#tJiC0QGjW@<3WJ(Mp zbl;I%e6q#5gG2II%B2y2LUB#MtE?1M2v^C%JhkM*k`c>seAPSBP0p6z)rz+t)!Ce; zZRqUohDk2(%Ez?>~RTY+KH!4n-+{&t#X*0;mbF{&W2g>z^BfoS1JqYk_3CUAFfcQ?c!Ez-Qa7~A^c(5VaDHCbYu?{Hia=9$ zx5PWbwn!)B%DMXvfHrLT9<4&XTI#^mwpT5nJviUE8xSOGdKW$?CNt-I+Tr_aJP-*h z9kT1UJnB{oZT1&8?HOKr+&;VNH1nQ5PZba0q9AH_CE`mC9*=lLyxflM2n$9Idd*MO&{PHs2M zcHSM;U++PC^i6zfzRyIzGYWM+8rQ0sTX5PLPA))_>R+c`=D6~L98;FcS6szi*~-^F zVO(S$)NZe3N73HA{{rMb(Ge4F(m$0g!g*O5=p&OpYj0-J+^+i z$e66j(xzIOEAT4pDt)Un5J^EtXy6Z-vl#v;`pDcij2p<3JmcZ+ihZfKm1b+8)k2#y zZM!amzt&o2McANkjZ(^VBY4Phf6$VS5n*YV_>&2MUCAOGNx6;NiEjceqU$_sHS%)y z!c?KeQzNwpZPwb8O%Myd;D4b3579}TxQ69?1PC%Nootgw^RYsov(4#YFNU>J7o2!At zbOGGFH}lJ!$M?U<1)aDz;3HbQBtikZ084yO4%LUWSu?|ZmrB*-s$mZf)4?~%)h4l1 z#WVehr3;7dM=Xu;mX4;$9(M||+j7ZNA6j@_XD2elcE}-EMe--pU@fWGUH+v@V(qq% zQ$0m|4)>oqAG4CFWa1Ue^jev(WkzqRJFF%pl(vvY)aGl67ZtJN{c>@^hUi>V(iJI7TD|I_^6!npdW9qGa(l z9`PLzo(}ABKK}BIH;pb-qw?+{R_@L^a=RF#Rbr8jZvW(gv-eu za__8>AfuE-r&Fo|m#|x&Yz?$CCNLzpATmP=v5p#U>kXv7m>JQ?CaUUY5Ti_QTjJFB zL}hZO^JOM|AYGV6yS;-PL$?NP18PdOxnCTesKON-3ER<$V3OL-)SC`)-=4HHIR{y?^VE5am#0}=J;gqmySVOOe!-vEY*A zgDnlWS}e7BYGf7bC~NBd@BXW*xq6GkYpuO7p7=tC^GOld-A_v_ckKX(mBgzvrOeY< zX_VhCAE6qioVyrk zqlw#P!69LM)hy?_o!2+9wd!;2FSqm74l($iS2zL2-4 z3^w$cYla$lb!Ko`XY?VH+M7>3?l0Rf%CwPBO4ZqD+ujX?ncc-hrRHmn4$F!RBPKwy zsob?5uXnsA-Wg*PyH7n}ePEree2d|3MFYkNPYGAT2(TI(R1C<*q+Wg@N>PGRten`S2=NZEv;23}6cG0Xx_vT;7>CwPVxCgV_L?il>t{HCwBE(E<1{=|t%X@W zqL_hqVU6=mUIZ!GsPE|0Y%|s=xTqAPVN3zP*+)kR? zdbIg;N$psX1ezdZbK||Ne3wq#N;k)dU?Fq>>y{C89`D9>|I5hzuJFv0Y*f6+YNmRm zHS{b>BnK93Txq<+?0wd)VP%y&HoQURzRM=(dF#;v9VYhFX+f=923^DTDt?i%!OiNJ zdu0d!pZvF6TIR)`D}iN)21hO?KEoXy&q$v+w`$J0c2&I6tXQR`B4z0xZM;8UzPahD zA1xDS`T3LcCts;{1KD~VNoF*1UlWi9vT66Y&e(TkTYVU>Vs}{1IRBavc-;7X!)=c>rgMOBnj!gMsl>ZN`ob02C#{tm&E?7bKas;ZQ~ zhX~V;QR_0{@3I_Fq^=YV)qwkW#Pv?xtY-*sL}5@B{b2v?qq!;teLi(8=E=iMnV9g2 z-E;{Z_IgyIjBW;lnRINlDzZ}zs}QwP;sSS%aF5fOv5B?-T?E6+q73a%pPqZG2KA{z z`1pts-h6UJnQTH=>KUMQ>7AHZvW^)LmM%D1xK>`|J6FHIykLJ<7~5F&*cp-uh|%pp z4ueze$+ijee_q(PKa_FEkP4W5Xs8Ovj`WE%?`g@8gshLu&GB{fsLeGEs%Wc4_>UIF zZu0)yxc}p$Re_)Pa;xn^q@(>X7a@K6MC-R$@C}>tLve{AI6O6h`XT~!l@ueqaC zw1&$uZT(!oB#a-3s2_B7WL)XC0^==o2XkLWc4Z3M++nt3H-K#vcsmKYyVZ1dAT3{)#3f=TGMLzu)fA>v(SHxYALJib5!dNcdj! z6^CoSr~{1C8HR4bKmO7Ww%|2|m|`Y(Co2zHz^g?mgiG`S_C+n<5~f$=bzO}W$0Gqk zU7pik&;4NW@C<=WL9JQOZ&Sw&MKZ8vgt&1YLuj^)cTz*79bjEq)cgmR=R! z5Cz+fV72wcCK-p~-}&kk3;lCcIl0_+ic>0gFkEY zLmNmt@P2y(ebmVmKX6nz%j!1b4@SBnGN@FDZ~;0|ymX&c10BJXB*P-&)3H~(+_ zfnqwmTeEdYSG{{Lt(R;*>6nvOMoTXaM%iwQ5fwL}7{FYRbct-Qa&gOCAnIu1MF{Ho zKt+O3Jfdu0f^@KhLf@IU%i)|}>-phXR?#^g8NHv2C2F)=&T)C>i~Qv^R;~*TzV}6_ z8zOuaWy5~i6}y_^LI&@nXJ+4m+qPBeOk^e1qJvT|MrvvQzfNGwnhTJ1jv z?=aL3Y4|Jp!VQY6UVX6|?-2x-9ouVtLtp53CR~93;cYbLB117@UcvA76)%xb`i)>L z7P$*k@Mw-M=Y)h2>m*y(o&0OYi^#57#%K9bX3L_qU5N~b?W6EUaJtsDs;KI3kLGCFks@v!Z?2QS1 z=n+=rr7V<|IxRd&8;2Xjhamxs*VC9-R5RP7vG=Jd=lO+ACSz5)Z!4VZ|KWfvu zSa~`0^Qc<(^|YLXcbi6k>oP2HSUImkmc;7{;AxIR)PleFW4dtmVRK*dxChUms~#xErXRz z)A7BXRn5cq73`<@M)P)4!jE8$G%^bo)bZ5HYN&BLvS2HJ)zNnpu}femVLww(E-Q-S zB)t)g$u}$6f%8halhZ^{`mUwcp#~ByOS%n&#RYr%6OlgWN|8hU68d@Y<-Tp4=7&12mJ_!{<4v`4kHgX%r>~Mc zwvlppB()2Rf#Ec{zdgI>u;PrdAX5|>$ro#8l+HTP?Twc`*%dhx>g1G${VAvkDHnt$ za^Jv?bK+gR{8`p=rEL-Z6|?=YFnvdpnJi|;BdZdE7>2u@9H>Td<*E`)VJ;Jlp#X{w zt&zL7W;WI2Dga}=5KDePg62PT{J71svd9($Hq1^dw)A)S_Uo+uBI{kHeq@GJ8ef_1xGJDw<8p-%8Vzt39$Za^E&K&c{uo!& zJp6O8oA!z}OZvxEP0blN48ZYawe};yBOOf@C&Od@@=*PRJm(xnTP-)sP}ahZf;E#b zI(sFt!8QzEVbl1tl1qxt`TBQ?mR#{JKMSgIaH?eH7r6vG1}3hRZEM?`0gZVll`B zpQrP?h_1B3xjetT7!mowRKFPdlA}%1(D0eQhv{oXEV9T}uj^tz3wV6yd(45P@*l@b zUL*zd5UpdCV~ykH8{NWAA3pljiB<1hCYNVP5fM3q4g3CZ6<)|Gg+J9>SnJvbUL1#Y zJvxor0@D4{6aH#*yvM=(?^UT`Gu(N^3`T^+DcM>K2!+1^@hE;`lN_}Xt1k?_IeHX_5=LxHu89@t>t|NG62FW-jByqQ=8y@U>GKutQ$mN|q z%Y6=9YGm0K1T7p@GPRSma(j!qS9YMAPorL{nYh{Tg4P=HymH3v?RE@jNoET|tdZwf z+=KHd*Ts8&ZT%^HhyV|;Jw}bWbAaY${zoeHIz~v;?=0|dQX_w>*Az|xGS9|bFqM)i zRoSIX?Z(#S@A}!cxE4mNB1cK*STk^tCrn@587Y}sW%utU+RzT{>?=3XXYJE9{>*y) zCgZBF0a5L1eT66pd=&dLcjvv(i`kl!UVrMb91h6{(mJvibJMN+7mWrP!VXR!C@_yHh;*t zZ^Ml47v59RzSSisCzs2(&)_KLWIH3o#=#i_D|KPRDD^!9Orfbd?^uc)6wGEUx$g&_ zvyl|@M?k{FMZi^2QE@ob#D<0K{{VGcRMB_k4tOgr3;z!2)Vv$!#u}6F>0}-ua6|dA z-Zzx?G6%aOp5|o^srKCg`d9~!sk0s?g9}?z*~_JmaIAY5RITkR9-V~zk6D<(GB2;L z&63EGT}U;L#g7Z#iE@e}h%1@!emvtu{F1~q-NL{Dt=R0(At`58)w>gwmsv>)O^ArOcOl9MEX2-VI{VFiWA!}m zDG$~apB5$Q-={<_#*>tQ9yB=HTWV1g@u!++NC$3TNlKb_`&sZQF3SHI?f}{E0W7-$ z_(5^ZyrfC63>7{{vrxB3;^jxxFM72jsWf8hq!xN*`I*gyWO7FVhB~?ZyG8KWfhpW& ztZiUUrT{hW(Jy96hYFC&rtCH>R0UO~c#6VowP&c;g(dQMgPaDtDx64#lpCD0d3VLi z4Y5;&!(p^ zsF!z(fUU;UeL8=-XKz*;Nf&Ojvg^4&Zzp}N!SXUonWnzRuwdiZB=a(+tvj{*W2f87 z!|LK6^Adf;N=GC8oRBhu+`NAZ{^V^JSUI)KTQgn`YT#$3@p^=$TyF zO$}<~ueI{_IzCL63$5Rt!2N=&kbf(ZXLDUZs6>RH8fJjoq%fyV)JY9VkF1Ju!6dW( zD6#kl+s=%$(29l0HrcFnT1Ghiv$-xu|{`TNqQNhI1iz6zNXwlme*~1Y8qp zNAmLJ0egx8+zgfxv;AXUXRO~}c7*vpo%K{&?WJew~ z;g-_p>dXsEk$Z5%r>uGzP!fKG3*(l}iSoqF$4|&)lB4e*<@^l3XRTdw&Oq(Rut0>c z70H_>rZ7jrzgAsUIpVp-47r8n3IV5JmH`3Kcq%P>+sCSX-W-|hAnUslHJ)@FcB>>^ z>VoilBzfx7{R>|VL*?(EzTYlt8<-dG%c&+%87p?G_IT$go6QKeWRPjx5lxyO1ijpM zIz#nAWbUZ~(!VXV$z61%xc<7!I$b*(8*6jd0tVS)btXLx7xFzF^;J$PD$Rst5WR=05^kQ(?0BrA3!S<}7UQyLswZg?8ZB}g`YKQ8WbVg?AptkM# zoq|-T!fq19>>yW7U)hk#+rAMG+kMUC9hL3#Noq$mE{TH{6FyZW+R_#BkVaoB?;dVd z6p5nq%oMcAx}4pMrym>mngq06&M~NV`Y*DXB}uEeh^Hg8?)Xb&l_9hF4iT!Cs`766 zRf?suUn6VFM?T-u14@5ZnN*i1MNm!m^%e>99R*ayEK~#;$?2j`r~AT_nZ9@7*n?U7N&tUR}c;QBRS%e#Uc!&ALQlLwu(-PbJRu8W+6T8tMYxbA~ z72FQF0eVlL>jm6A6I^*rAxMbo;`|EK4@^)1X$pfEx@MmD2V;&6`+?j0hvPjYOq&a` zEcC4~_X~}6Q!(uTna+e67L5oM!BF~x#=^#mZLL*e$8&OhzNF;>nXTWp`Zu5)90A)j zXd?$R*tNgsXTG*3Xl^+90y%+E86|BWyFs5~w%$BfROopYyM!Zk;kL=gi1?CP@0@(e zxRNn%ym3{hV0;^gLQF6_JN0ud@asZEa+em=lqT-J`5JF0x$j;@m{rA^hy{8H?Fk{_ zUff>aQlb9bi)0>?%(hv-I(u{fECnz};Z2=g4%uUE;Y|Bxn-fnAg zC10LlCoLQM`BH;J91!Y4($aMA?+#>~_tt%EhXccy-21=Q&P6D`yvDP7B_`P-(w5C^ z-F)7)9UT(im|*+Lf9uKWeP%HWdM8giE+|*xgU{hl-8_5`Cy_B!RZAH1>T3ep>sQw* zEq$9)?AK-zslW^|&gSnt#@~uOWt@c!55^BFDagyrAo3uOU4-)A^?GN2PZ(n=&Loo} z-LYtU<--b1o=VHH8lKUG9KFKBO{i*)%pY-B+#&};0jeOukb<^9+-w5u)VtLOx0N4{ zLcVJO7bC^^8n{K<)&NV2jJH6O+TY5-p#}L*^;SiBgv%OpYLR7^W^KAaol{ArDX~6$ z2&kdb2iG*Rs3B*5$Ch={){u*~z#8~#kdY#*iXaYT&Y3!bg!|JPpy4B;QRf9(1}kq; zR;EgE+g=T*Za}u3o{5)4V0QmZhE9C+CO5tY;M1dEw_2Dy1k1D!D{wcvW=4d%h9`og zQ5F#~_LKNUc5@`!ce_lE756p@b&qZ9m%{687Yu=1NH)HKl-(rSDPwf5dgtS2@4|CJ zLh$xWNEcYi4a_*%ct--HS`y{@JJ}u=`JTXOwy{CzE91u!e?>ipie#f$`&iX1 z`CFQoZVLQg{jqx1V%eY&@kG)*AeVKkf`QV4?%OmY#Lv-lnmV{h!5BWdn;D-B43nlh-UlSP{ea z`EFdh*tt(^ajQk0Chw|+OO4LCvP*m%4c8MX!3%Z50sF-%XZu^0E^0a?S_C`yJh#2` za2ADieY!a?R&6SL3irV|BJh`A7|z7Qe}0XC#pUJl$2?kCD`pYt25tOGn->*K`O`8@ zlYkAfv>G06qTNAeI+&(rrN8(p^Gp?p?O1hH2tVst^@e!*C1FtAnH1UgHzC>PZN~Z^ zFz*8cqEnCbB$vWi+KSi(XxBQCdXkVmWq^iWd5u8ac%6a}mp;Nl_{~nH{4nzqbOr4j z+lF=cOAhRbd{AMvcoK_2&~ht_@{Z=SX5eTv1`XrKdx^Vmr8-Z);^|I|r%PC()6*lO zqn~?2kFIYXp&r9*1KQEh1Ss=z6&mb|{bJ>(Qrf^cpbA(vq7zdX@QI$8cEywKdtV#5 zhFN@7fkj+xTeDKjbo2^rei0drSVlFp6A1lqb|lIHn# z$#ow_jP}W2_^HFquir$%n{=DIAJQ7{6ld*=tk_G8AbED#RlX9TBqv_c*%Ca;2oe}Q zI!1Hot9~$2-Xe4zSqWY=QJY`GO@X&L;^Vq&#w_@BEg1i&Df-EAE)46(X%1E_Q zHGXK+0v287fqY!Y&cDhHW6240rjFDcs<8PPRpJB03l^$kZ}9G&ZN=!_>0lShuqg41$`D$)xEjBgLMOogIjeDUHhvkvTrMF z4cag@X)@lp=!lGmV=_IAwFPCOhMKLX+YgN3LY%8Stx9y&Xecgz-sfc8HQFV8ZBXvN z7M1D3Hg&?F|FyHAD9@j6I_57uv(chu$CZ_STz9gQUNJy5rO*8oD%z`?T_(@YPi?V3 zFLSUn%VKKtUAGMWy{t96n}wJw465z2frjNqug74g@EhL!;&7RRPFwTTj+RV(qg^hI z%yHY)hbeMRk7dx=rs@YOYNzt1bH_!;13k@ZSJIqZV#ECqd*itNlFYNM8TZREq|sJu zg1r3vwc_&j?nwm^w>DoN;Un?SK|yPYFWlD0LQZ<{9`3-`nSzX3O(dCMpPooOP_<&! zy+P`wU}E__MtK|eoJZq1nXvsaPpE?2PrckUy?WE)$?PWdGNCrd7=4ct`!_TXIUk zXfOYJp~T%OQrQj_GHk+#2;0)ATUC&z{+uXyBdw%aj^J6QRCIE}ci7x&$X2~@0p9*}X&8@z?@I#xPr{FnUa9mBWMN}&59FR#yTQc4 zE0|&30e*hZgtm``po?}oZJQK3sF-vq6Ffp3ID52hgRusRew*>QJ4rf-x>H#83LY5J z(miY%AnEt8^`NPCIs&~9>^G`TI`}q2e)a{5gh0z{DGmYZ z0MX*W;iZ4F&Wa7nlPfa`?vJ;;C05J93(QWq-i$H-E*gOu+Ru=f5gYEwCjXoSKeFVK zv)tgFl;Pw7-0&x0 zw$YL^zL9&}LW+~&j?07IJpA%`0yH7~UJ@gBY~y};AB~Rh6awiUWXaFbsb?k>@T6My zxfU;Xml(lD$}uKM`5DkeJw3q26mN*a+)R(aS92NRPH?amNJAU&!80^t zttryv8`h#y+E4s8Rd1qSuUa=;>Wf&VB10|~C?B-mSjmB(Rtc#Tnqp&SV z<%41k|57`=I0z)io1$N^I8c>$(?MGV){zLz*}~Kp7kJOAN|s2KN#u#tVuR`qxKV{q zh~nR87Rm8<{VXPau~B1s0-^yftg5cK;KAW1At?6t6E7ODU4(9|zKU3`NVmBrL#WC4 z>5*4Y7z%B?pupn(6Q~mPe+`pP*ThGPu-^%UTg6p5H1NBFloiH(aeEYe#Nf}bbM!%a z`jy)&3H`3hrmwerHrqBIH-!hR5xB|mRKt~%m4%T{Rm9j6oVC(Zbu)Dfbjx+?b$vZb zM?a4GizJIrZ`ubBrh3$IH@dMsM?(qzib0HEmD=GC=U`V)MQu4TKCU+HtyE|~11Odp zIqP+o=`|avZB3CCnRLbJ>{?)jPHCv!ye<+pTtI-e?kClNEXO5mCy3HDOM5j>gF$e<#QntdsG7v;Zh(S&+oq z_>eDaf0zhfaA+(XO5)vfQsx5f;bfpa+|Qk?GMQM&ZUi^gk^|#P_<*XWSW}fd6}~~m zO$B4*75QR%gU0UYFFoa3FN?T_>MQi<1^mXL!? zaC7O5-zT+{me;l?Z4c2Nl3QaHMsR{$MuK++Qv_~>xAK{fD;Ib#zk>e0L=N=S&?eK~ z8zE3og}!n6GX3lJ9YaS}vrGk>5Raw^y{C=*9t(_^P9kb}5z=1-8#F=pCm}+iD<9e) z57!Y>0}CJErLT_!vUctd=eKl8-wX{5OhAz^TgE-YjXOWq?sn(Pr}A=l+HkLlY>-b= zJFU21=skE}TEMn_J4?U}MTCdvg*y8G9YFd`DG3oR1-Kko+WH88&E9<~tTKDO!v^7C z`UT$fQ{@Z2;iJ&wrRLW_fppp9+89Katfe(uR3$NTGzl}1#Pl=j0{?Z<2hz0SJA8Yj ztDa7?>Re#w=FRCt-_736_iq;uQK6QT$Qt?%eDn3zVIFAs#n`K6i(Kcfpd0Vn;oqzF z?=s;j*{nnYf>Qn27NWnt#0!GG`tdfL%!wTJ{DMs;q3j4a{(QO&3TJ%c@w~4nttq$f zZe_tWOJ7xQsa(9rmGHVFH*#rsCvT(Y?C@t&bZl$@!yTkkP2*|`O6Fk$Yc`Y;Xs6e_ znvj&yg@*Hl7S|W5w7)o@QQ}B8g7Ejl!UdDT2Jil|MP+cLtM=9P6VtdV?|pUnufDMP zd&6agFxJbIVuig!vnNB%i>eQn;S-?K`6*eY0z1;QiTr9|HFtMH8+BqT%1IS8wF@~U z_?rWCWd#t)!o$OZi-&INK+7;F$E=M_232XuT_pett{RNSS2>57-#pBW8QGpHx6I1J=iiwE{*w9R}@gXnJxJ+qgIEbDS(mUopBmWO= zcTs<J3Ni00JtzBz^^+9W#~yCv-pu!Jw$8x7jet~$?N=Y?DIorrRZOQ`~M%aV%9V&z)-U`&|W6| z(_;8{A`DR3HY{U?HzOB;50dQY5qX9Pw^1{-<+A1i%m|l4R>df$tUhxUhq6)a=1dFm zD&AkF^k4PxNmLjS5s?~;G4wN(F96N7`0D!dmo>}U=o+Q_MX6iSL+v>%ov8m;6xA?4 zFuZuz0oOZMPgHj0sO(X@@>#tKp1;P)i-0akc@HOk3ALK9%timoZ~8an!c%+X&p{z; z-*+;_+I^C`Oz36&=4-uJ$h^E^AMdPF+dcya&adBJcz*1QH4PPT-;HN3{$N6i4)iS- z>TG-FU3}vS8Tk2$iZvdTKQ*fDvY!P)K z01xh5Co~qc*Of>qk2_rM33Pb4FSu6jF4uDSXjPGP*|(7KtO%mJ985~QeG7Ya64k{- zc{~XUL2pBJizM-P3xXsvl5(Law6q%ly=mi4lGoh4kq2-fpPY?F6 zL_M5uJ+=8`W8)m4PZ46?-Ypl4WoR)juPcp~WEdY@5F{g-W?Tq-ZXSt$c6YBmB9Ti0 z{R%+rkjQNK9ua6eb!8Qo|A@&wbH9ms5>GOcNffoEF!3)z;R+ z@uwB1sD}X9Sb0FG{OL+TrYN9NI%%$o2%L0?V=#E5jU?)JG8Y2!!rC2J$*(JaGS1aM z2K7m}KhzS$q^5=dO7fy@ry^&KPwVSp%%f0ZZW}*!CY{wOjZ7&SZF=BzY=2{E>DkL2 zv(~5Hv0P2a0@3F7);{Fxqs4N|OCqkXVcTK9IpRJFjsz_1qCOxrCr?H-lO zc`iF3%h6EsyV#uQ?f?lXK$b0%=v(A)I`}hN_W!!;0&!7xLEn1^7%~rMSWi9Y#{OMh z@D6(G;dq!}hTSe?=Hv3B7JaTG;k`HLMdmj}D}%6Z<^8LP%F2q0(PE>a?YLX>v$HQ& zm=ifYX(zsl$z=nrM1kqQ8C|qM=XZ3%4Kq^iA5bQMA+#tIS?Fa(QX3totXdv_;evXw zN%StDp6DdJ&$TqG%;+(Z@NnGFtfzm+1)@i#{==LkDzDFD_Wh2P_v1 z6cw{}q#+t}IV`RBL?r%`ySo;%HgyxO`k27|-Dx}gz-}K7CH`?JqI|GDv9Zp#z#(eK zZ$Euv<+i!O85@H*jLa%UXRYS~cF$a=WjHqod`nn`Y(QOULYN9 zz0MeOziy36>s%`mB<}$z-7N%$eYz*5vvGgnz5k8X{}CYl9pR5#4#-Ljo{;Q_9dLjx z4Ma48XM|Obj-Mt<-B!P)gs9Au``S#3jY_hOxo@sPFY9LXK{yT$A*YjZQ5)S~p1Od_ zQgMCKH6;%ZR$M_#ag}WK*`>XDmpM>l~%s6u{Q2wSey`ap6u zt7*Si!In1(Q1?C6{~aVE+z={SNjUzs#{p91rc~cgqLPhM@f$V!SF2{xFB=T~hHekg z6|jJ;0sAudpAZDEEv(v^mXZWB^P4x%0skxU_MVNZfXYj7(ga}PC19w)$kV0kBr8{U z_nft8h`sD~RxBHRze_wx|D3=Kn=Qe}svuaufmrNWcm3Z?3b} z{YNfqE*+!JCbMreGw#xlk0Rw}CqQJ80GS=InApZ;zDj~W4fg-8pTBOV2nW!}OZ{1h zSd#y7&i-Y5ryl^Ashk!N82D+Yci{ygp<@4X(ZXb&1A1Aj>na`AE)FrQN5$WFDF5%x zk73RAL@&pe337o#)-h`L^v91xMw_JdIY!RTl|pLG^JO3%!OPVK8LWL@Th6z?>VpMS z=ivpC7=$RW^8Uk~{(E7FGU2e!R6*&kUF}-dJV}Hd;Q^uQ^I12b_t5?+>sfkeGgFCuewe(4<@DZi1_gN4j_aQ%{K^?MuDz{{AFb2dC@Tos?f}Ge~ z_PxLac;D-TJ_qyrvX!+piI{;N__#}SzVz9*1>8Xn4nS~s247wp$hlm_A@qsrvHD_q z51PFzW}Ju;_9jl{DeL^YC9*aqMdqiw0gwtjwO!5jrOq5k#FY2C{@Ka@evSUsdcUb} z#vh5+MWRH)gTnAP%9O76XU>>41!3Q?%2sMICPNeJ#_&cLy>-!PhuB!?*( z_Y^+Rc%LrooSvO^o*Yc)nvalKOqYDQa4URLd#EtKCIi4+3&$;&CL{i$up_#@2ZU+S zaRQ^^vB^3O#86Ff5hSx(?H+btlDbUQsXR=lts8Ri&x__aq{;oOri*rU8(x0e9bR5( z1duK+90zv=*dD(WoE@AeXtr*<2?{?-FFIaC@XI=nEQ5M`d*3dL$!$CTHH6#)R$i^+a?wfPHGl((HJ&1UKv3SM z+;Z(lhvm1eJ}ZGy>zzpC8ve8G6^5O!8MW`xn%o#rb@{Qt9RcwC)T>%8(vlX#i@A2G zFL7NqR#E?9?M}XhDeNayDqz09hZ9^*pF0ff$TEzA4lKOD#r_&y!5Xy`E=l==a^HQC zMWCNJcQCMZmTu;GNm=!4HTr?({QmI0?(bH`St{;AEqq8&#SXQsUn8%Y+^+R^9^RuY zW@U-$a#&E5HRO~p+i@qa9UO!uGNo)}>UpJs!dMRHoE+2Ep?A8TC@GwgVivP~*iCCM z)+}b~A~1grr`99?hQZ_t>A|zyI~2MJ^LmX|wPZ9B2y!f~B|Q8!_Fmy zPP9&KY=DkQXs0He5Q{|(1{sq)0p(YN^e=JQd3PK_*<2MeR9vqys)~4(I2<9C<8Y6h zvYBflEo(G_M@2(h8iowjU|k-(Cf2Dx3SHK~99MTt@4?t@9V_#Dm+*bPbn!AVnG*BUM*>!MV0;L?Yz zic$T{^bU+;tG8V#?77=Y`tE)_SMXemR@pp+`O)$Va~uP;OM3<%AO-_tB|L#Q+y3+{ zlA5|R|1qS{CQh`#*hf(-(a&OaHTZp_aqdENWLDvVWZ7SHD;flkh}nP*Bz|4AnLbL{ z8?%27NzX!Uealb{O|xoIC(|SSESA7`y9>U^WfXA$urPTt$I$b+>J4}KzN=NNZh9ZY2-*$F%P#7x9(7Yu54<>mH( zEFQ$RA){dQ(4Mrw%8L09e{S6xc6cxBgT7FYuQt>2aa55cav{W4hkGA={QS5$$GATT z5dmo<(^jYA6k0CZYFC+baPxRz$?N#3y)9C|6O+#EahE}c+MjOLk@OvLEEJt)xv3>_h2tF*fhDuu7uU= ze_L%T{CQq);9XL#1>23Sd*b@o-LMzwb^5?F5e+oDMwY#XJO;C}>nZdQR5c!zRi!5z z1JTS?v(ypZU4w0uZi9Qyw-^>>R{Dt}DY04R&+|27>g<*f@2?h!TII62E z9JlNnS`_T{_}#`C?Du^DRj{`}W_r2n@ddTlY|r-pW9%(};_8;Q;Xoh>gy0@DxCeKK z5L^d$hr!(m5Fog_yA19wf#4E!aM!`z;h((c-gE0e=e^&#RlBGureN>2*Xmx~Pe1*v z44ww?wr9p`fT1vK4*F@l*&t&5lQLU_5j8^ckib9cG^e^kERU_uNB$bMonSr77&4~4 z84L!MKq&6i?rIFBcKv%~w1#TFSVEUbpBx}viJSl0TLOj9?(iqYlZvXDOXPjqQn+Jl zR#f_-)#KGJkJY_V#PL+24pp7SDuVsn>)c)VB*VBNmgUn2sogV=Zu73#Ku0J6c9udB zQUzJfcL(Jz)AHK=VtOB4%yLh_>`#t`OQ-T8>7P(kCgxprzYb!-<$UZAd3#5&cnv!2 zwP|Pcn=qb)X+zfW(C=9Q@Zi#VlmBj1gJ(11b((-twC#)DB+s5NdeJA`I+(l=ncs3= zEb)(?r#+?bRUdd~SaLm4cRE_oc+1Mv zQib!pP(uXeLE8}t$Y$FWvAv9sEFSFCgX`C4WqK~l(IECTtapc zu&~%j1|gn>cRHur?9U{xymva8g_jTa{hEMz=1j!4z8`|?GyHXz2j}my3Lz7C@-;_} zdLHKkkV8EhSFykLi`8nVOGZo6vX6rPyOztnz~86u9#2PiX;E7=O`fm76o7)fsCzHL zLU(C4>sj>9kr%m$>R(508ZkBXye}@RD|IG70s%Lnn|hE#HTrjFm3X9gNIxk`+m2(J zUVl|8&>RLTAp?eThqxkVWg@cdE~8n~m_$m)pt?QImiXXd~Zt53WAeFfjRuZ~S#dk%o>w6?lpDg$m z2IhIrmjpSlZzZ^`jgS8kMR24)5GOr~y^}%s1xD1V{p5U%7dT7679QF6aMB*g6B7f` zBExfg4wRg0xw75O8CH>8l4O$aJv`arStYnRUW~d~dBHtte<*xz7&z&y?73MThcbLt zUZRfpgsTHbBwL+?*o?h^86Rkc6)a-$XdnJ+2AgJsp<#WVBfD9zS-~Nb7(F|M9M~Go z{RY}YnYC@#eTz+!qupLl(ZJ7_eT(Uh*SC2nBs?uJL>^lR?F=%bSC6LwHyfX>1hNWO zKn36L`mfgf#gb054}FIXR2yb=VU zV1Vch!9*0F6O%{As_1CHemDMA$Mb#K$)}fIFL=GtTR6d|HcUlDXXfk|EGXAX=hmwa zT&g#LW0H>Z^Y7|DW3(^`LCA#6QmYpybV>S0Lk0EE&xoDRPZE%g7R;9DVoSNT)Sy|s%jBqj~C98rp%sQSu?Vlc{x4Q_gq$f(RSgN_$N~d zqQq7RljbyCbew%IAuoCIeC|f6E%f9sJSfTi$Sw@d#L>S_62B0ZI#ujTbZ5{>D+>k* zSy+_c;!a8oZE>jjtwNoCd!1<8ip#@XunGx>l$n?h@1~JX#avo|bSq`QJr*A8*HOI1 zQdwU1mwo&rFS*u8T#t*4TaSeYD5Tw&^ZMcU!K9CQ&d}VkvPpOW`fgx*elTaNvFexcIzH7IAlPD-k>IE4IS?Oi7 zwG&M;XwO;HHpuIg5wBoUvyLZapXT7~?2MwJo&8Q{Ky(K|R#wuv5G?I$acrOfC41}Xm1##_~ry<#SX@{rdv6W?eGzrj2Y1xL?0^2pfYXj&Wr{;UygL^s=iR)vIwjNe{ z46>N&RDoIUcH1u4wUf+(Nw_ClJTh$-`J1db0+Z_{c9`^nc}&$2mBVsDn`t;=dMCRW zE}5r9sX(cMLmeLJLPcoEoGnNEK>cvVo@Iji`Bb=ZTqd&EYK)O~ZO`0daM0#~a8cUR zW~KCl$Cc_BEh;r$bOnQ&iM*Uzci-<*ySjrpti}kRcdUhSG!x@j$s5C@1_roJJ@3ji z8*tBpH!p1l`RYq7prG8-Qp>~beJMG^4KNzHAc9miNtYQFotnvK*(SiFnnl8ULT_lR z!F(CJ$Cfw@o;KM_J>irAl;9#;@kGb6DbniLYB+kX=k(jN z{=D@I1>Zn=r5}timzTNV#J2onnbGM| zdy1i+&t~n+j7X9n>1k6MU49I>&l_EwEirnV?yKv%#J9m{^#n{4kfDWN64)>$M73Tl zWFLW`7OB3T_NImzx`v&?Ceg-v9l$wl(y`&BdD>EX8<$Q#Qvl{pv9glD^j3?3hS|0b-myMEX7 zMwB8?mPlgbnzvRQ2vlJE0`K#rVxdBl0a;+|IHYa-R_qnCTt9EckvzT2iB4YjHLj-Y zDHy4L{KE9Woi~h_4zHLcDxVLT?j`0%phEgW>eXl(r6T~D$mgw#?aTYLm)m9cA8 zm6TPoS(ln+u|-74q7!2e8%C#acY9qFLev#`!p_ng#0RxlGP4jL>qD)h_-T74786rq z3U`tD3#Im?f-I@92%O-=4nUI?DR}#1lPxTTML-9BJ@LM!(w%S4)*Fu0Hca?zR)px$A_CO(Q$DRGVFqo1;Fs-j#GjK#Nrrio%SLm z&z@*}G<1t@oG@Z!JG3Cx8QS3SA(yluaNaUVsQkl^StK9bf0Sr{2uTBe&gOSeG|BeWwNJzSVkJspx-DND$PkKIbgc zXw^*`9vx$BH4fT*Wa)TByXg-SBuuM%j*9STwqE40gA;2!x$STEeYno>;HE4@dz~wX z^`~a@F9^sOvfoCxLcfl*f=Z1zA%zpr2>Ri3qxH%+iuawaOB|o_Ae)J;eI-bpLR%$e zJQi2@Y);4E1KO$-FTN5HJXiEbe22{=_4Y@zMx|Dk%n{Og*EQ$lhby*>L7K&_iwu>7 zjJ>1ugf9To8%uyQ3pWkgux}=nXjA>xW{CO11_A#|(P6ZHMcIj}i9ca^8S@COB8fEo zBcp;wbR;$=dhYH3lpIFKNkY5Zx)Zdh(szd5G$xo@w!>~+!XjQEnPpIFd}ad(y`{op zOH`d@2e^zpgI9iZHd?6^*oR5Y4!Mh{1epaP8cKmcK5rq@!0a84ZwbS4i`r{NrQjP$(uQW|+IcmcnqSy&09Hbc&6QQZlw7hD}y z9O4nbxpZc8=J%G37`da>Vs&({rfA6rgw%!OLfiMGHoN>s%Zo3Us$D-kv)-W)& zI-o+~qDz8Y>xHcMgZj&@z+j>di(yBVwW^GM<*R0hrr3<_w<}svX8KQ)+#Qm#mE_hL z3oDwV2{aU*CXtgByE?^%7w`LaJ^(X}650D)u!PdDpAT%Jpy{vS;;i7=uYToAT<2U! zNo;j{`erlZ*KcZ4uzD`r^*nSc{we6je2ouxa~I<-%YggGT!FWXO~;(OIVMnuq<=&e zY;VyCL)(Q{7*J)1z9>9O(b`IF@ds2<`_KVn&FeDpa8SBtp7k71eRh)&Z3N1-p0AP{ zz3|9EZa69HI*Hx9={uwEb^=otWSL}*7%a4de>ENN#Bh1+eM_x$NdR&(su{tU{!rb- z2~PU+>esB$=Gx6FpIvl|B9EMDw3<%_2HvsREWL{iA0|i`r19&A00#a#Hxa0T0caCqxOziPM}h5(J3hiDMl(a!h%_6^*(IYlemt%Bfa0ApH5dElruJy z)9+1wtIvjOP_0?zys3#*rnP@asgV-7Gs+Hd zlEL%5I?q!1viEt3``F(31MQixd*!V=%*OT6Y|)qkg#*s$9lrNlo#0^riOMe<=PH#Q z`SMcA`TS3R~uRgeM2kHh;1vbU0-pQc=Oorwg65{wf~6%h!}Bv1@m5!G!K z{In5>YG;t3dCMjT;NDXFA6#v$mWRKu?EpqO>xMk=R(yAzFZWiknB+WOpV1CG9`42L zL6uLWp(yv}%iW)h;5D)DXZZKnPvMo*+Ih<@&VB8!Q9m>JA5?aBUr3>qpH&}zo$-Ua zg#%lVU`v_XW6T2@762#U4ay(V`dYmt#@W(E-04NE-bDRybMj-~u-n#Ik<HI?Yz3lzjYX=*54uXZ}4wHdD%`6`(6EJxDq)jm2tm7=-0dDgeO$mNTno z7}UN3=rxSfB!+N7{HUtdT2nk2hn1T-X@o!G>pC3&R>qsMFHMa>Hmz5X ze&&LkFNL!Oz(Z(R3rA#WHF6zB3v`0R@zC%J0wPth+D!7gI!XT!%7R$Np|WGpEovK) z?`+y}?jFqXcvEubc9OwaF`uhYq z4}|t3IJJX4Pw=++DvgBuIv-Yop+tkQD1qc~nmE1g7&3_&ZeoBVUOIi;!QS{^c*DPJ zq0o1D0(buYTn_kIr9uSO7Rt(3a!?~epxr!CNi!}eEWjL1a4$8&5)^2Y(@8LKchDW% zcpyA*_@IHeU+{*M(3|#NJB4}sD!v&Y{XS^u&O~N@T;E(=@58kq7Ss`Ch7v^;Kxax} z6uxvr%lAb!s5n50h2P{@0ZNOXU-gRgen5eCqxSpIq|u?IU`#8=-HC0dnQCavQ?v|R z5BzwSA*s1QY*M3_`e?9I0uOf!Y8E+y@GS50T}@~Gt?+`7Giq_@cxWj^K(j7%#>8RV zcz^9%7fBs7x?be4EiPT;Vb~lA|Jqk-KOgtMiE;n)DF0Iq1(5#X%Dv$QZJ4a%{$Fkc z_=_z^%Lq~F`;0Gp`W`8$(wAgJz~?hQ0H7bRt`fxe)Q^q*)26YEGhee|1FJEq{^I=MTf82dJ@LF$g_&zhBlZ*T{!xJIrkWC{ozX{ zd-f7g|LuQ%%_?vG@QG2O1U7XQQRDnV5<32Jp5)AP_OEtq+v=bEdwyQE7f=yi(NaTz z^g>O(Ea7kH=&^Yn=#@fm;+Z5+Nc;e`gF$wkd~@ETUxz^fu&uT;NMnlsI>>*23wEh+ z({O3*t&*nZ)`&qZ9r);L37&DD^p%pT<|+#QLy-(ZE|DML0k}vBj?(&ozWC~G0tZHJ zs{g$``=1N1v{oo$3^jpUv_Qs2{GZZ8_s;XpjyQ=7TI*LpOd_J7krA0WE0ZGK`-g`T zhx#vDTj`p=6^=*2<0uZ5nLC&XBfoiIBnJi=zRys?+wgPh5x+J}yUVe7O)Ms!jJBnR zi7`G_OGYyj+64FBg=hfWEia$Pv^hb zw=)#=COc$Z_icAvB=K6e3iUG#4r|R#FRR4{7--44rQer*kny- zEq<(c3^?ZIq+2czi!X;xt|Ne3%T_awnzh1X?OT7dI6#R`7guXFEyoGlB036?G&WlM z#?2Xt_doM4)>bi#t6>!NUWAMbo8P`2_D}n~&fNH4`X=&!>6=JptK7TMX7TEGVKqIT z``stU$E!fQ-d{rlV&zXH3sA+QWFBXpGAh`5V(&}5zHq#f*SQs7aFadRC1gf9;{p#n zN|fYSTZ@)EbcK!b<4sDfes#P~2iW;{SSy^t*_P>C0~4|D$*8EIl%i0xPd*-0-{g<# z5nLv9-hoGYkqLLB_UqTVnctf-{xz@tS5sS)L7bk8ZHBUS*?rA3)m&m>Qlq2ynCEB$ zzLKawRCcG+W z+;%WB(lWZ(DW`bUs<+&4kiGx9x}lNld@w7lS#32vJ=e|?(4cUsPE zLsNFLzR%6@A94?jt`G5=Za3Z!@rinQiIfPB=1FWy6S!U$U-IKoCzx=r)Gxd5THhYG zHbZ$3PRoy#46Dr4KR26)7Ix1_j{grk2>W05;*yYR&XU$XguV>*M_(e_Mvo}e>n%TQ zK}m#yPxQRJuE$WeDXG@zIt_&b;kF&L?LQDIAD^&C_h16;{@1$ffZFn@xeF3<_d~3 zd}ND}q9NFaTng3ajj?sK;I6;Py#MK@{$cZe5NZXZ*SV_F0$xv#F~Ct;FhFWeF8MS$ zleo@}dr3UZXQF?2Ddz+JG?OADnE4h;iD0%_G7l$y(POik6M|=}?20Dk++P+ALh?6G zmx8M4@SpYOC3D)wAYv|jwwxw}8c>T7ZaA(zhR?G*UL>KkgUyAhFx2Lo-RP?Bx~=Yg zug6{sgjzmq20Mv}*yh}gY8-e-4GO-{Ai9MM8x>P+#aRX@*W=+u`NI>U}ZlYBCVr7@;&>j~buzRg_5n z{H$N^>;7bc$@SqN$JP5#%x?5?#b`%abhAvTFMbrBw)}VK1`*WM^P{iZngx2 zQij7KCdS>Q>)WF7-tQCsjJ{3#ODu1fgzxdx2p_IqSR%79sNBz+W@8(o>g*lm3kXlE z10C}|V4{=2{O4;R1n#^yzWlh;!hqu{TRmmS!T97FX@&?BlV03#XxML=_sJnGY$)gU zd%<3JK(O&lDf^C>UEKR&1!_^9a*1X3V)g1=hsRq4Nl8h2^0y_c>T0eHcPf5(1O%AT zTsd}0AC)Wo33#i9)iZeHyzKMbwqQ)6$t4Fnn6`Vo!@iEYOw{=FYiKk+g%gCz9!Z4! zBvmSM_&!~MS%Emgb?21tN~e_0L(dk~!hjst6T(~}yCHxOErd)OopLr)zfX#$80*m2 z$ld|uK8|~Z5dC-87Co$AGF^){f9P12-kZ|5ea;6Q(4$%|SZ2aeRvxkr=Ue3k2+l1E zhiXxw|M{3A8i z%k{OCbJJ5xhHmpxhZHe=9pXNbQlw@Es=P}&rw}br&p{o;iLum4wcBYU;LiNiicC37 z89@e-ZbnpVj&Z*ji1{81so)&3ZiOmqeqB4qc-JeU6mg2tU^@THxGHKxJz0Pmt291~+r4Iuk@ zk?g31mx|^vBK>zG`0rtlwNt@>+ugV`O=VZqOz9jSIO>nqPp)SkN=}u9`1O0yA?u}6_;9XIWQ&9Pq}Vc#%4K<>weQt zs){w)?y1&%G~1z6>%p#3?Ga%Cj-?nT*k6L${Xhzw(o@0CNW)qkkI;FC+7?-%cVPz5=@{N{=(7 z`sl8B?|uMVvb5<{8^Y7*w1m^(prIKoe0Do+_rEUFu2qFhmr&?=--Ynq^>&&ZFWouF zl}iy%Uqa7sNMj-7t&1ZiZeu_*SZ6sbT30G`vbgiuD^e=AG>ID%gqjO?lF%`jh*pal zX`i{;c42ni3J6-YmYP;!V&0l#vHHA z0QZ%|4818AGgk;nd}IGMZYPd4ruIKQmfbeSt=&M8F0cEscB+AbgVchHn_Cl2QhYkL z93u^l^&+qUPo^#Vjr8-^9}L>7ey@WG#8F$F78`sC9{$TFD!2m8$9hLpWBzWV_~(*_ zez+CYpW0E8p4=Ht9hfW_mW{z4O}kfSTRaycZB<-8_)=WI_p;(4uLDy=t7K$o*jq6- z+VBR7&Xf4jqh*V*N#Cj`|EGwdi_kAg*Q+mwOYFWB05sXir#Y_SA=E0f7dMLfHPnN` zPjYuIE)}X7wX#qdrJOlj%?gQPK0hBGA8b8oW5l46OZXCjKOp+jqPK>B`-CFUK_I)q zE%^=Zkz6RSI4e6j;;#o-J4d5NQ`VHr{E@&~Bwqv+J40ti$MKc&5AZLLEjlon-LiM! z*q3SXO1cmV26_%8MjRNwWM;^X{uua3W6h&WbzvqSHx+^)XK|g;c6ToMFt%(mb{V33 zF+Y5}5DDE<%iEwN(tNuZLx;=XFx*A^=(OUYSV?&j$b-%Cn4U_74a4yu>JMr4 zEKoDeOeoouxGO8t%`^3PP6w*dpQYg7g<>+0N)DQlA8$!YJj24jmsYT=A`o=xwWkMt z%&6#8JQo*Fv7D{YrsZw49aKp0YpHN&d-hOlZ$!jlr0u`tS=Zb)4xv%jrBwrw`i`f2 zXr#>-PC3_j4^whixq?lAZ++shXaRAd0N_`1^I{o7&Pj{&^&0lWjgY5iy;=`qC&hdS4qnMzTe#3oZKEAfVrf|$`n-1YTeaYVTi_7=3!r%>wfa2 zP+UYcAB4=QbSMF6v^ruR`ROjkONGMeuF@LfG3#wKI(cR$DSxLc5SN*@ICTyxUX@v3 zjM)CP?~00$o{$qvwVyHRZZ!@`W{GvW4nLfF(a}Ds2O+=Xm4PKkMTJ$WVO{-iEdW;( z{56$vXjJi}^n-}sMdtZwZ|Zyq#5DB!W|a(@r^ka|ZYrQ0^_O%=>oo5w%7ezLJ)+dh zv-eiv3TJ69hqw=(fCyCP_deopqn|b788?GCCoh~Nh!Ys4^m?79f$@x5ik4vDmJ_2qZ^4l2XAHh)TCvuvV=7PA zlXwl~e(qAMnRU$&mF;a}LElZT#F>clwAuGQIAEtEidI(}S1lbEJ+OXzWyQgae1aul zdv85Jsm<78zRo7$k=mKREg zaS#^B1k#>wsu7nuuxQ#G`X)&R|;qg@Ta95&lY(2LdL zp}8vwM-2~$L5}(_OZM7))DOK)um!Xg_AazyadB~@t$3Jc;N>|r?2$~;v)G2>krtwr z%*v$P3{~FS45{p#NCbvOSB|iCJL15-r|VtY)iiiiV=-7YGQZe@A*n$nlk}EIan83l zC-yvjgyV2F5XZD#FD9>LLS`{C#&wO z8k!|igszfGmBNnwX8_P_{;=yw`-?-qkrDJvXSKYc)fxS{@@(Q1|?Gjh0lu4?w6_3CCT5;XYy6y+(VNn+GqY zD08{}#jl<;-hZA#*xf&z!u0Dvf$vg(2A2)J=*59?C`beogFg9(quE&+9(I*T_uIrG z=wfAc%#>C%9UrsaVY9B>hnzXhd+UH^D~etYXMPVJlR8LI^&5s~BKQ;MbCxZIc_pl5xs-I#8I2i`B|E*^GETQc*## z9xK87pC4Il?~QA5dkCtY?kx6DxjlEnt0NCf--oZw7@e)I-~`Nv4qvs~#E?PkVZ&B< zAm59}zAwZTNH?a1O9T!Y`EbVNm(M)BDZ}Wz+d4v`(aoTBu8qU!D_ml!=z2X6$?y># z``1eED%6U2lCim9s~(@P<6JL&WOe-AF9>g>YG( z=JkASIopsYL-dq(IA2eebh$NTu~QOPz>!|? z*=B#fe)lKJcDF&L$=qf3J|6wi^ItXE<*0{=AF9$Vr`yWy*Ih18NmKZ((<@GwfI~mw zAs@VLlDJtZr4J%?_-5C`EP4vd^J6qw;P$_-o(DQknFN2V*2Q68;3GyR=c-4ehShi2WslWT?2jd;wVf}w32`&YoVs@H^DW*u zC7PfLvEs#6eqL8xI$__X-`(xAH0_KNI8*^?byh%n&e<|`t4QI&4}QSe9ZPE^kdKjL z+SM5%kq@OEnKdd5u`FhC z!uHX9k~ZsnTAh>*3Ma8>7g0*!rxEJE-FFkvsTcpKQwtQ5tk*>T@FC7f0%h<~V~QZ| z1q-B-G;4`KfQ@D_dD3))ri^@yPxW+{mbY@uxeU22&#O$lN=SeiQfbnp*Da0jIH4S{ z1gKZ2v#d&C``$wa4h{0g32*lKF%5k5s$@XQogstVkFDbbomWFMmL=N~IIPb(C>PJE zxi`+Kxv$~*bihxLH%&+E%Oh@@BFna8Y0TpN+)Lmhllxx5hy!!q3Q+LQgEy{s*Dd0( z)dT<1MIx{Bxp&+h;hnNZe9L3oagczLh}u(+zwfxdyZ9kwTOgNOYnb~!>LJ^`>G(1 zQNnl+0+%vLN}7Ezt0QLIZ7em!?6ja3%Q@uBB_bL+-~!T!*{^mDomC#;HMX^7OUbwi zs8<{NDOS;Iv4L%w(8zr8Ef?MbMb!MXbk*I)vsjngFg|(5)qHu81}V+0A^`Qq8Ne4U zizp~jzi%2dVZlwi^_>du#!>`?SGyo;9W)-+MS@Q^9Eq;`_HiY-4l|lHz9(|t z{d065(i%BV$WG;6pI!L`^D!3;@56roXyWftzVBAv;TaFg9{-hhzFWDPW``4LCTHQj zLojEHZ4a2f`Dst)`^BDYWnuq>EBxRM=ycK&o&cYS^6m9uBHn#?-&6QaK~hCrdcjuWKx*ZjS|Qu4PbHJv9MKExEq` zF01{%^K2li(?n%6-$I1*RXcS8uHGF;Bu=UCMA?sxzHaQb+m zXPBF`ePB9aeHcp(4L0*hk>$b?5oM~*a~yi}=kCV-obN17DYT@!K>BXe@4(Fz9QfLVZr&hFm8g*1Tu%l zQ@tpJB`_g3&%DoNwjid_qF<3Bi!nL%!2D32|xXsw>m z)Tu5zoTKGr2vr;PddStOT(Y-Di!o=oEUwb(oV*aRXdo(qOuBYtNHq*Wls$AqX}qVk1RquhsII=7rW<^y5A#0X%h6R$?UkfEwlYRB960peZHVtJNW_qei3btrGJhkNH$p6Uj-y;*0lJR7uUbEF_v3T^$PqWT>^a<_!@CV8?ug&Vc#b!Z zXy0kP9UPuw?{?2lZ`rQsb`V?EdO4foG^GzbCuIKigr#SsOp+O)Kz;dakMh3fD& z48^>DOK`JMcm+J1za%V(>+g88(ejg@vG23I|6ERyT)Lg6`YYG6yGu^1h+5u8>q*b@ z(7aHGmM*wNBXy)W4etE+S=sU3cTZStobH%}VA3j{`rd^g^dY^oyQTBIPxQJ!Cd56} z583n*h7z0t#c^Wj)K9kxdOYo|N=JqZ@aA>7(` zs3$mni4e$#pHmkzSr6=>qsyB+NO{Kd=QH+1oatZmce&@FPZv0wV@+ZulPu!>SseF_ zHX4oh9-X4Rr{TT)!EHaLc#m(0O@@PJb}}&btJEVE8YeKWba0Gb_i?l>YHolsp2K@^ zJMTYuD7;9}mY(RvZ8e^B{N7y@L{AXBdO=-kbhGXG1S@U1_w&b()34lxF z)@Q3NuR=v&)i7YF`ALa8p4-j@UTqg&b@M)r9cjsE;(0SQoa@SZ&8hw6caGrF&Gea+ zZI#%@`a^miD6%x~^&8P2xJ&uT;cO`(%ocJG-%CU7;;CY!H(RqZF5>?^kJ-SN!8W*c& z^T_q6MjnC`KC5X!;Fv&tzGiVEV)RzYW%;tTE5=Jmb;ifX35f#;e;6JS#lL_bY6+JT27H@U7^aZUKIC^GcZMA#*8S)dyTo`Q zFCW)R8ab$7a@Y-HydFH@_UJ1)X694TOquGp7tyl-kwWw+9H(G|y$2I37 z1ZVR2N^QVN9tARxUmbCA`PC**mtl8o9FO}uAlg!tLZp^|$PG6R172cj!Iw@yl;jX; zbsnKL7rYyE%H-uignFe0xWs`fCn6KA!HAk$b!fh60B~TRxpl)DW$(T_qsiWkDplfN zkx10qbHwKz2k6th35(F{d{8M@mQQW+qi`Ystu_@{X}3Y!cpG#mJykmaKbX zS=iPIp4m6XTY{ldN1m+!&oOh%ludmt|K%F`@T~~+ur2@NgWg!Ml_qYN9VpG_8(DQJ^y`l1Rne<)}l-W0(dF->~Y8}^U z6j!tx2=qR_u733@6uiFbyR*ee7`m6h0o-5qU?_W%1*(-8@>naz3686|?lwf;u?F3H zc4|7eJTz#hQfA629Gssz-Q)uLw4mUYPR{RM!?NC9w*#mB4z$<2^Co5TlkmuHD1#vE zmnyfQdQXh@FQ-<155%f>ynqxVhPtn6CB$#hohROqYpz+#3f3YK{N)i~ePwfx${o4@hUec4(xCOi;tYI+ix@dxQ?`tL98gd>y8KSTGC zl15plG2TGcvmR;GN`iSIU;YCy!_8G^qd-mbU zWixfu`I;y8^WD>vCl87Gv2I)Rrpb3*Lmv~TUHls15&tSG{wWKRVdi!POFqQ)PgY7n zk1C&c_^Y_;CxrHmoiv_JeEP^(L?m6)-a0Xs*~EFqxvCFEww|qSEKt&3+Axa9S>4Q# z1qI`yI-X9Ek1}0YN?Dm^UPGM7v@&6&h%QGJ?MKE*dTuZC&K|;S=aEHD`q`iCy6eJ# z1q}yqPd#^osqxQ$J$;b35X!p5907>ZAbE#>kXW-JR2 z1}aha%N1WyLzonrs_cPQ2?8El(H$7%C7`C<8fhi0YE^v7G8QuUFwB^qQq-Vx82=FM zrh$=SA$wjRgiE5jqW%5=&KF_~5^T%H1m>tl+vNqV*4Dp5I#z$pkXZo|-1Yla54vKY z1S?-Qfet4nMZqCw(Z3Zk_UM@CrgiEmu2*ZFkuV%Sq&Bd{xspOEI8=s@7Hg-ZI99KH z)^6Ys#y{pHuA5Gs2-@k*l{vqyM1D15uwyKw+ zHW%;t*A_!1fABqPt8ER3mgzLhEJvbzHlkQE>)9qNtA50~#CvLtdVL$AiKytCiGvF| zOY=(*Qtj2#&8$G6k440$rAOg=&gDs*7S^QTV(XRhfSz?TSXp?-%Q5epcfAV|q@vNy zjkj;*vOC3yborSVXi#1;QvT2*@J2`@NE``Tv%IsM**scYeVEZAd@gi90IF8f^_lQp zh%kV>I3xUP(7;`cRk+{twY_vVI=)6cX=pUqXci5%mRd4!%81oC3L5Mu= zv7($GbL~^k9Mn8KbX zPcGHg2ztTrHuU;Lm7+3Q>2B-g) z)02VPonr8lyQ%M3cP!pX8J;`wO7rO5GS*d+(l*skb9GW4J0$S3(vSN^x%gT=POv@h z3VLJ->@>c7%qpYXak*ZtcKa?H3!-Av+kV)b5MQK5GuFk38SB&1g0M#y?u-gpA`(Bk zAFr^o)qejnR{N7P2qA3Y22xkP`L7bb+b26nuYLV!i+()z+v$6B>}2(JpD${Ot+a!& zOnpOQI@EcN%3tUk>wd%-HNF$g1p$(Zc$9$W&p4rEMdv&;?!r_%k62AK0dqQwh55!= z1(nKL)@|9hz26~Gkg;h%TT4Z)N#A$kGB<+{!6AC;QbOFRtLlLS0FbQA>Mrw{P`M$O znbc6T6C!s$*$&XOm-@nKV}@~1a_r%{Zp7-L2lO!7h!?onh8H)jb0GjY6d&s9jyho8 zMc5xnU#$->I)J2(FAJ2pqBc%3+C{i7L=?pE;sEzhDCfs@(iJ55cgHgt)yxcoU8b7S zpal!7rRksO2-x|mBN}wI?Zo#NZPYlnmOyYnCuJ@ajjqJz!GN1}bg?KrE0SH9?=ytzzW%M?h8Hx?|{M9cteD?r+9DByA{RdtR#MC%9>17i~+UDPWm%8hPa zTy^XT;!0*Ymr#n1NwDo8l!ia=9d(o)&7MzIbQa;UHvvHKOvoxmDeec(A$(up_n$BR zc)-a%(}(1>@r$3DY_AUXN2I>ExwlZ_da7ECDD^VdhdGxY_VZak{I^LIc^vgue?yqP z+q6DAFLQeM1pi<=U5}#S-J!XAaeZ|11Sv=Mooc&cM+INSuY9~?i!7n?yQ0n^To=v1Fmon{p};!*-Fid_q{x+ zzsc$^eQDJDTEa}CZmLId7&6*t`q}=6lQUrbr#51(7JfwWr??D?ent5Sb*joTf(IgH z>(^U~MXI`CH2L117Md!W8vq0_d6U16Gj_v~YfEbS&w#6ZF4WNLGwcq6ala&j%F&rE zuI{u@&byr{;p{nCX%$Qj{XeTL9^2&hX5FJcU@>k@h6h)Alq~QNUTkFcev=c_U7!kgNg%jLTSQVCXb`IfKm7D{O?}Z1DK? z%&i9FCFOB3Q6}?CifE^&;5sK|CLBO21i(gi^_H&e5l)A;%S|inpha)$5ftr3bRY(oe?h!BVwzp~mF9?DSxWgW(qWvXzh`f(KGpd5iwNon~=a6SKdmYY7J?qJ} zO#Dt^weDUDO#LRR&qO9-8*&*c(&mp9WQ4ONPy|Fez3;d+-a(2%MW<3!X5XRo>*Yzy z{nTu@wwnA6=!q}$`1MkMlTH2146z}J&3sj#B!WR@&oYOP87`Inyfw|BG$a`qA~+rf zM`)WpAOxi~lU`D0-lwAJWp&$BAc3o}Tt+ z?IyMtw^q{}t3W*8_i4~Mz1|2Bk=u9dLhmgIZ_p6mybBHVMO8oO7C$(o*@Un7M#{`M zF7K%D;)btQc3I~2(4xmWDfif*@0_C6{=F%#j8AWQ#!PT$T-|2;#AXrvkkQIg(U1Co zA7*!Y+$`TnN+Dg)jsu^YQH=3|F;lua*Cx^PtDd*}=?#VX(Y@?+tB7cdAh5L63jg4l z>f)ng+CacRaP2=oqr(v3R>(y52<81s z5=Oj@cZT)++Q|kYZBh{nN-8?YZ=|x-g|$hV5&~iKErzf0O7XoHTYMr&jw!UE#~o|7 z3dbSM7?=cM!P1{tmhwv0!4DDm!dH;rfB?Jf%O3m2+K}%=^b%_uY`yYe>*Td=DHj=k z(uUQTfFIUsG8^(4EOAEW!<0urXrVXX>Q3ql%v_r+6-LoWl7F|3;5BIb^ZinlB;AxA z9pk(yAW^7#vB`utBOMu_R$x799Mj6f;yHrD)RZHhSyTG1*SQPM+&f}_PtpYf=ac|( z96ogF(>?>g(cDOT_g<^B^NvT`HJ};1TN)%DK7! zGUeC7E2}%6Tz)OsD%92ewx=Pv^}sgT1aOg5_iSFi+Jt*3WqPudTzti$>YI#+Akb{D z*r8v~mk`^+iwOAQV{{;*Bg8Q3XT97eXAjr5WYEd``0?5GnuhgfaJr>UXcAN*>z%Qm z?nx_i$p(tR4$1mABLdr`Lw}9=tqbD)Je%T*>ZV6Eyt$)3El+$H=R&j_*^l^O>{&PD z10x1D*j>f9Yt%ZVV+LK13r?04wi<=+b528PJL@EFMp&R~jn5NV7WB2|=!ZSE=*MiX z^yVslZbVC!Z#uImYH_y+d8+1UMj>h?)?e0%W~SN?EOe?H*=iR7WjCS@tFwBUw^5q^ zvhV9^aH;eiuma(`nU&Q`>O>tY$6g5%Jbi?1%8h&DVqv>593Q+{Z}WsPOJ2pX=ia#M z?X5laTpw`V5}4E{?wJkJkygL>bldng1zCTkw|1Ls>8YpcC~sva{-44%za^P>zi@MU zAu5UFo{dmbDr?@qf*3T4KdeG)%^(C|8}k(()d)jYQu4%a;m{G***N~Y6O2p zfy4s^KkOBV_sDZvCZb%c;Sh)G8n&m_yoH@*|BPjRzp?0PMQ3wqBVCMAc-CZVISm&`xtQa^Zn z2`XzZi~pGX(nnV1biP8GOS985d0wz0v81*jaxV0Scv6bm=e5%=30P9MGz!@I*6Ynj zUt0Lupcl_oHFJ*)4;m3cOy|ypyW;hiUtKF-Da?AGpDw=TVjIbMPd_(vsZ#-~B1|eo z`LXYLmB7~y!v{}JCD5<8-qF)1-ZP4Yfn$idmJ8H`PI*`aC3sxgCk%9(0Wp6g=#(k) zI%wZj!56O-%q+!a0p0g%a(k~B9s0$K*n91jvhm%GpiJ7LW69Sbj@!!%SilK)A*7{7=R3`r*UyC=83HtZnTot5EaHSxNiRFw9g|2PW zy!8fg4}3WvCTnVa_fUHsv9+>UO+I|eQ*!U8EvCV4cy_J?i7HPu9=x9~ut|5xezv4K z2D5VH!CTj~P!_K!EriYZ_}E=PG0Zz$t`|Wuzn76=Hxso#8Z`0+XIS!kJsORr4(>7T zy~U_DWjuwtS1a^0Yy8qLyoPY-dg5=r^btv&<)&}tzvLiqD|2o3Ii7EpeVDAy(}h;> zAg_qafb;E9mWB zDUn)BAqzH2ozQY6flKwrTdA2%Di=Q#%5fAID?W*G4tT7Pj(QsjT;M=@ccC=1<++`K z55b^im*Dm_i7eLVH9p|4b6SV)-o7FAzygl6ab#4ls~;=+QIrt`1h&|aScs31Ug0JA zGVD+m$aMc538E3foOsb{Rpp>h;JH~9akEzXGMabBYGwvetG#@0bDQ^wW3dx{lFtNM zGy&xT$xzFTXHb{wZd_gavpG<8)cu3?jfd_0{s}v$ED=}t7s@v`mD3u{64M*qK%(ye+b2VK;?ilF zm;M3nKR=Zp$n>cV8f!Mv8nG-}(+IVqO3A1Q5j1jd5oSX2YF}+Oc5CI;^+j5~&}%!) z2zK`S^+dDTxnp{=H#!j^C;d%lUy?&Z^e;G_q|wg0`$_+pzIGwYsy0_ee3Y&y?}37f;)C!XO$}J6B^dpDocLIKTd|@JA(fq6C^R#6%*s>NECn*FzA?Nn!f1$Wu+q z80rFRmq-xChiqYXdj?Ic-gt!g5jTad_vr9N9QfXxDzAg7?5J z%rH{c@_Cp};{H7N^d0XpM^%VghS~aM=6QGpM(x7UFeo@&MPobThl?sWW1bYe*Mj!t zmqUG-7G?M&P{U@c5s#xl=-xsCch}Q!$B3VZPZhR%^%8+u=Hat-*G=;L>6TKYY*JcR zQZ%BZeSWec@{jYGIc_Hi{B6j7SjYAVcuYpzfUkd@qE!R>=~Kvg5n(lSta!00`hDUC zK6>eo;jl_*qTprs##b~ty;YV8H4U^D&&uv|yj?$6@?-f$-SaQ4`c6u`x{HS`c9Bz> z?$_6sK2Q%iH`wfPX>CvpO%`gy!_6&0p|zhmHQl!a*N~3RR?pz@Nw+mmVsANdG=dUT z88`0qOkTim+jG_B$1|ut3a%79#_8W+hRch?FY3vMBj<*>-=8Xtt!}vci!IPQEAFIO zVATY^XltmO6B8|TL8H{~Ft7c?DvZg$Ze{FLPytW3O{?rngl1lOL{Q{X=E~8Qy3**nJPDN9*R>9lW^5 z5-9`cWES+=RKxy7G{Xxn6JhaP*lXc_h9rK*W!@~8j01D5wrFP{O^VFy|H%Ei12Mep zu)1Mw<{9cr*q-nnsa@M=j{aSOzUSUZOUzK_ypGNsCWg;?72(IO){DkxhAhP0yk3>z z8RaL)S5kYsImT)ITA$41dt5VlkEZNV6DCJQfTcb8g|K?Bal81WA`}Xpy|f~-`3lb3 zw>y9qy*-{&^C7(VPGWko$u`#ob+kiR625SB3xWPs>h|u5KEeqmPARVbJZ;6%1gjI1 zJUtqlvbq}NU4z=ee2->vx0$BQi@ZX1m`&Lt{g0RRQabaX!yhPWr&0tn{GT%(?2NgT_zLUhcMJJ@O z)wHaaVz71~UM@~w(I96m-C*B4`JJE6$n)ANiF4jAOMbzW3rP&yoAo+f!r!ey0>TPx zQZ{GGTxEFD_h2iFrW3)EYX9PwF5NvuKn!e6r67Vg&@EM!Lbq2VLW%;0r?*}uuC4l5 zZhK_K?0YZ1vsgam32qEJ;##f9$l)B^SzE<7!D-XKAh`K@uv)&x?(CPx<>y~|ZR0ky zgdZOf_tJgQe0h0>e6osJ;JuiYrS&!y<06E4)yJ?*Vy&Ug?Xfk&?OOos`mH=}$6nRK zABY5eIfp-MCU`dIxUd5z_AK@mC?E{1rj z3ptx-L+|H`+-MjuVB7JHBf=asVV+m5%9UegrYlIq?HE_TYZk;wEz^6JxmHvKJ;-Nr zmGv%b{Hpi>uPH`x@5z&uat4dMqhD)*L#jMFzPbC^&M;J{3>R_sB&81DAJ>nwWK;}f zjJg&6I=;$+n$J#)g>Ps6E4{1ER`?Uyczw4mCx;^!> zb%*v$m%R>&Vxk#ob&d17J-4EXjIUACRj!;GM*}N`fg@rG?$Bb4N?xYFKLbmIATjL&YgZjJ?g6DHs}fC?zn)D)ICp%}%Ls1|ESxY@F5 zOuC=PRV!8H`O#+LbxS~&vn2t@{7!xq_4sQw0*yc zmrP}as+1{BWn=VqE8NR%%cPnMvuNS~<3vkQv{SU-K<3AQt?La6=S67^5Nq(-AQJ~5# z`s6lQEQG;l2IFh1PSfB8#fg7zR}DYqc@wH1S;>XJ>hckhw#j)Ahf=AY2z8)&$AGyJ zW7B@74Xc__Iv%5<5|5r|0P=jTSo_Ps64Vn`9Qsr(-Q^0cLjOgr)_pBPX8q=rgZbo= zUqNPXS}}1GgQ^Oa8m$W7Z8tLeag}aazA^`+Xgz+6tVJ`S$&2y$q*ZY`d5GzGQKjXP z$Q68F{0x85<;b#3;nX%x(JyWeli{kQBC$ZRo|27Ko~h-3W&!&01>oi`}ECTIjX$y9tn)~UzeatMe^N-UOcBdyeVEQ{uf0>5K6 zxWfCQjQke9y3h%)VKZx{piBsTY!|)Ud=W?k%0>(a$9;>hcHFx$XwC}24*aXQ#R=hT z$Fl5&clM7akM$EDIx)83*!Y_xaq0 z$xT#u_??yW>V%ZGpxVUEdL+rpNSkA+?InWaH(~6=Y=xkVC^uqurf29GaevA}{7)F@ zs{529DygIM5xqR{D~^0met^q>RJMaUK|V?kTWBaIHg=iAJF9`&$bG^? z!78`Lctb~kp|Qu*kU$RqQEeo*8->6{@Vn$L@sKz5O?=hYx*-b}QeK&a$Xirgkd_O< z;8z$4IJ5Ki=a2e2=y?N?Zr0Z&`=NAJyKQWsV*oseg4YgJQlemBSSmI**GNipuz1GW zRQjxpM`uywGym>_ZJ`wWCqDnZ=LQwtDJKk_kO50IxlLX7#v@*+e)nL03KR2f$sG5< zZb~3+I2O}sd3$}o)H5&=v2WLbxTxsc#?IP;D6TLkdi0L9(I#D}?c4aeywHSKrQ_b7 zu7jZ){Ri_)wssorGPT$4YvuM?toxx~~(1)>}GIQwjMdRDWp5UhjTMc?Z z7;@h9tN#_cQC+gbu=61((wsx-!|VOBm~$10nYS)^RaycWYWWVT_15fL^&KZ|?)iY` zM|D-ez0*(f<5e%Ts%1Q|*Ob3Ltqwbf(oo83L6TZbPi@h-@?6xq%psWoALr260KeQe z-mPSDb9f+6P$J2YmUhrlLl;@($st`xDvDe4cCSB;l0{^f>iP`+;*06;-8KG?KGi6#(s=9MQ2uEtzCce;5L9fsHfOZ7%AKO31nwza9R& z@agVpV&99TK92IT#2k#x-BPA-fO|`?tZMOc&@xqLpD#V>T_F}`9?(P)bu(Xu#-!Eu zjZhh{79U5ake|2lR@M6Otk-cUtESBrk$f6bs@ZI6ll=Y+k9w{c08cB3Z7LS)wPuv*H4IEgeTb0N zs-J@gD%EL7TC6n0Gw8lwJZ9_Bs#JgUD-q={tEOx$+S1cnlG8BnCS&i^(spe-dY*he zhb=kr2CO>Gij7-luZT*k-(2zyAsBMSNU)UAICaTo*)|Y&)<3o?^+cONMnfBDNn3Yk z+iLJ6X06b6E2T7k=5h;|6J;sW)$0V0Qlp-|E&piUma9GH(9ly>vjqgFrcc%D3)_&O zcz%+G><{0he?`qL(acYGFwGu7mlDU#X7UNDAnxE}6V|8g_Ur{E~T=;*!{h!RyzwGQDs7Ihv zucU;=j}==j5bfUm0j}lPXLqC#1rjS%l+p>YobsJygM*Sa7R#2J)n9yC95c{VDs}bC zrIgEMkl_2Y5}Zu~@SPF75drMvWsmFlF8Pi;h=}T!;v8GZ&(~<`#j;urV0f!ri)sk2 zf{BK%OBYJ>xnKWq<0bUkKp($%Rk0pBjmxM;$4x}PUhZYW&CQ2omIrhRJ_TO@7k+W^ zs6S9ITMb#A@UJN5k_#&(+;sD`a-@V^K z#h=F}@l)Ogo0fw}eqck|S7&TJI6Rs8YJv*Ag>*Xu7EXe&!#aobj_TjF^Z(;Q|8+UN zrz#=aD^~r9|KS^M-U02A=t447fA*vO%Z2pMFAlW-&e68dMKS(&RR1@t@Z{rH6h)G+ zj#iE;Dgw>WBR@?1ro@KE0E}WIQ8yCZt4Bq5PFT}masAt z8JY0I`Pq4a><1_{NhkGfuCne#&O?KZw)AQT*Wf{Ne7@IhjZ&KiOD$2lU?*)RwLG!= z!9=j}SsqJ`eV2^;@iG_Cz1mIn%J<%)mb@1HO5+=qT^Jk5oV7y4dH~;l_{FgiD&7PZ z<-dgQY>Xk2(1^2@Vta2r>Vd9o76*nN-E}kEx5b+C?d-}+lSL(H=LKi8sg;nT z38T%9E_7-8PgP#Gw-|n8I7)S&UN@VK|H18Xk+kIYNM7T4t%rZswTMYbc%Xg@*umiM ze)4!K`B=|m&jFD8=33Wei;J}TUNPR*YBWFj79=qNtnV|L*M7C2UE^{2Hu-kW`gX2I zegNQi)XS18Rp@B)o9$8h+*b#06{OmN&;{;Af46xmi7wu3nJt)(Af3*b#t%jdhTx&( z7nWi_bX0RAn9v%9wM?!7@Qwdo$p7|4$o2K7nIIGOLvWFvx=E3X7LAx)t5FPtR^2qu z<~c6|gUsF4E~VScCiO%HP17^GpLLqJBbmE2p2yw+o2hobF|WzRQ;p>LZrda)^*bvV zv>Un=x5Jq`Sr+Z}7opn(`I7NLHD69yRUPk`9yi81A@YKc+aI9UtB(}=Ubpo>`l*wQ zR(&3G`Ld^SscjyoMipkg4Pvb9&8g`G}+43?P z+;7hkw%^?C7LQ!r@!5z>AO{wfekWsh81^ab$vERNSqJ+1zN?C4StdZ#W%cJ3|Mf<2 zBKmp1;V>iaOWI%YA}T@!5KUrdCYluWK80cAsg@}9^BkPFhRH!h{F*Fh-b$<}Q_Bw~ zGA{%8nF5Y?o6K9}nhhYk)aJ1uwC49zd%dGq+uC z7e84nR@y?@Eq@Ay74t4->?zG1oy|^s;3BZkCcsZtSF6xE{KSugN3C5iUvD*IPb1o1 zKd(66A7nxrcKmBkF>1-RRMAaJSwh?v_%(GB5N>LGRrOq5Q~pIX`;X|13E7Wcm~?Lt z1zT#OSNgsY4e|7gDz-4`+hk^?xVSh%JkrNrjkH-m-B#D3u4hQ)y16mBod)qnk4w$x zQ8u*>EAFf6?$B<-^k$_|LW~#EH>FhMlvbv+g17 z)b9jLv2r~7A-TI;8|)=`U=|NpmH1P7Y3a{fw~Z|sL{}U3`!Sn?@e)irO}~C#H^fey zg~)nXJqnzC|K&Th$f-GJixx0|+~H3bGJ(gg}B$)5IF%U9oO zhGu}q1mOxR?yy(xmkD6w{RX0_hkI_1v)qQOoEy1aMH(K#_B>XPX`h2t^XGUyzJ4${)H8mt?;UjqBq*%m~~ z&mVC!ADl78vtcr`$IjJq#hSbvgo!OB-&Vj zs>9j%qAE7|3>f~H(gvHga(sll4Ry4`w)!)Jh62!6Ha_(Htj5Z7DaEb%b&Nk*uWc79 z|H(PeU?UJuKjC`CTZAg&QKY+D*krjS(s9fiUCLitpinq@Db&+5TroskPSVR*e~pq zs5nd%sM{kkt;g;PxJ+8>D%xvgfm>e=96Fb`cyMOZBS690dUd^vx1T?vge%EE4Ojc| zY6MNJsMTp9A^QjzM-TKP0OvCHFLkJ{)Rb&72s{WVfK)E&s$TM~H zKlKTJi=-+c9?%|@|KOVeE7ng(2Jp~;!G%Aa|LzTb5wDvHl#$%+PulV6LE(;bMT)Y- zK08lBLqnUb%b66azFLU1Hk3bG2>ST!I7SGRTmbNwa?1o7y% zOuP-{trXCjOyloFjZ&3*TtCs9mwkRa2DzTc3Q~wzE)W-<3lo& z{%S|NEYa(=wcW`!qLbkA#Q%V@>7rwf7 zLs)2NR1=<-N~J*))b*S}#VW=A+^(zbZju0IeJgw{IH@kxrJG>7~?CKe6|x=ja`adF^)))UzS^Ri3kXE@2Uo ze7SV38bkNH2k+}cyvZ6dr+Dv+9sR9&Gh#RF!`Vxo+ui<3lbJI_fF-la&0y1B&8Sva znYC*P*GUvB#%&W*>h=1V8DgEG0?Qrd3s=;N66^Ad)Sb!6UM zGL7~U{daNTKYWV+ed5UE2PxSMz}&M{y(`zLmy!ZAGFHN`1(n*haHe*I5mE8Qex=%V zdPz z_)Fgj0sfRpi!)76koRMLol^eVw@~G+pQjsCXxqLI=WTDJ`WYRM;i77Qq zCKfiZMlNBzB__7t0f))*%4KphDwvavJwsn2pVM2^#AVi^!=VR(8&^GKwHhrg_UN?g zss^!4YnK>sl)DM1rXaI6yvF@k=~TyeuYls5$*m~PpIiX$iq?GUEMXVc=yfZ_jZ0uv z!>rRJ%4xG=*-)t{b|fLP!oeaT@o-wWI|MuKpj~d&Jx*Z6&Pty_JpP6pZ?bTqIjOjR zyI!jjSk*3Dv6%7UcKujuto`~W=%d>*t?vW394@0CUA|QB!1by(se;Gj#gH8)9tpEu z$7$Gw+7|ndQ(n)FM~aWW)U9sESTvHE9_xw{GlKW;Ro>2)(vx`KH}9%<`f!yR^2k0p z8W@Pwk$6WjSD*CiZ{z0yRRrN~;rvG%H_4@9<;h$L^?C*`bsm?!cd3omP8UYWbhpIr z7Z$a?kAU0XY?i5#zvOZTugWBHU$PN8y$nX&n5WovH`14s;bXkqpGy#Yyit9=G_SwStYKc+cX;(9G(KPS)Nry) z6APckuudkEWy+=_=oJ&4*Rn>4&(*Dh)5>qqUhA%hj4YnFSHu@Revo%!N!4hnDxH<eb|gJgVRvd1vmVk7%&JTXM~-%P_9o!_Gil(p<|ig zPG}(=4PdXI%lNY2lxvroCdv!C9eL%x6cZ~==*kueruYT7S`v~==c8P7yWddv%w{js ztPQeWY3rpCQQQICAk9{vb`@1ry(VN=>Z(572Cu*tnhHbgc)QNa7H8{KxrE@4n6Y!M z!qd9mAGVnvEmi3W)Z6V%?icngAa+eCK|-;p>JSkDFKtVeN;-}i6P7Y(Moo)wl2#z_ z-N3}d?Fa!_ixA!EJr~Gk^;sr3lgl>Ap`4~UQ>Z~B1eacUo<12UlJt}{Bm29^_IrZJ zJ4(vliR4X?Q8?N%ns^c`&dJWn{#QK`tDRP%_`$NXDM zN}%xc%4}X9g;P*Sn*|C7%_kzmRaZji&L?vnu715m?Y^I!N=7aE^(N0@-ga@TJ|rd$7&= zUEYzmbsqyf3SKU?2`i#(eV(DS5I4ZlDbp!;3Jfl!RVX(aN#(%N;1I?*u8~QetTC}F zd7|m%<i1lOYVD`|14apXh`N0oxC;;thr}L3=gIXqz zh}RWB@5KIzX+peR8f-d^-@ID98-yLh<%7`RYIN!q47!z_vrQxXUYjAbHL5sC3P~M+ zlVB1+r+{R!B;9&{J|@`5XbWZ{GJ#zYv06@NMs0@S&<}w1y(pwP)b>U!17B?In{LB6~?zdae&02XuQ1u3tRAa@QR469p=JIvWhY|-9&&?gh_}R-AtH{YJ zgPyzJUXz56KEct4+a8ijVpVVlbyQ{yb*6`+&)umNAjk1e79JmZj8LcUUpHTElvZ|5 zeaQ*joG&Edl7e=?k1l2?{1%tooeCg*k*SfaI8qx=Z*ynTPt^RvKT91^0Uk5S9J9V( zHzvNS0_MgBg`D>%>U4f`sP#77N5w(224Vl}n}Sk>v+W^j0K;nqlmFB`_*14nU+4xo z6pfeCEdJblb&UR*YSZ=~x}>0*t}ZBroqZg3#0Mq^$=j{XdH>|ST+w3F_jX}v@-6T1 zob6k`9;b~D4qx_FD!Kbl4~ka#bdO47-1%w+fBS-l>eFRJ_3zzgzpt2IV29TLftVM8 zh3xupmATT=Yhc8({l)D;cqO7*sKOUcizNj;-wm5$<j>^<<-6HvPj=_C>5K1_&cQ@v{wV$0iz}UD% zQKfQ!6VJT)+f~lX6z~=oxVvH7U9J367iKl;laW{$>%6B*pi_;3W_m(?D8sK>4bEW4 zzDXC!wcBs5E4Z%OxvW52wpjJ!!SGe| z#0J7)=XsQNLVqEvE{xm#3>y%pQ*$F4U%92-uh~^Pb#;p^1d(j5cs_^ZMhj2?(6Cs( zyAKtsMrT{lVu;SB``#}2pqI~<F zkyJDDtYlXBemtV@#fVI@wBH8`3Yi>KJmoF@xU>lVM0!wGMw1Y?7J!OVjR}&}Ie5)+ zD1YgBMBC}Y$4;XeCQ6v#1YJYoEr;%X(L@%8 zF;_eDb3Q*0Im?t^n@w}fn!lbQCsg(hp?2W6D{8=z+EIqk)vjw%@16Fp&NZL#{$`IU zTEZavOhin{r@R45X^tisS+q&P`!7t){S5OrL1^XxEBVVnf+t!}4hK;G5=y5qezCt9 z3l~VpQ3%2f=gaE?jqOUse%%|XMI?;i9!ujd?1~*rbtRExSktCOF){C=;4TasKVKLt z)%^U*^GK#f=c^LFcms72c6*91I=J?jzeU$f?Wchlfq$(?L$POUAS$>hH7c5dNx%s* ze}8)}%^2xzFQa~Bv)sVH*cvhureOjd5tD4-@~8gS`l6rCr|1$xh-x9|h zK!<@bSGNa0nH1{sex&oci_wT2di^@WqZi0Q-J?rEIHucU+cT-&5`pmEnpPxN)ec53`n}N)aP}K;7c7U= zLW-oJQD^hi$I<|?)}*ITmQ_mk3n7oQ22L_Zo@Q;IQlqU+a#r;peC<{4yn@(eK*CB5 zr34l|s9fMVl(NNMW|_)sRRf5Wavcw*8j1Tx|0jJ<( ze-jOoa`d-z>orhZi^KMvm1W#BC1wAKfE%n@%8MNNQu(^TayL2frtou|gt~NcQ%(f! z9vWM;$uqyZ%e{eq5vApXCGTSqR?ktseoc(;UO()5@?`fD&-(TssmJ)PYse!B{+&YD zocbMtQ;L{*00MA~F9g6MXe0s*ncf2I+Vb9;I}_G|5)H0j6XJz9u>c7Y(OyBJN>m&N z^b9Cm$osMqNOlp3myrleP~d((-Fi&?Ija43yHyV-HhrbCcLN{t`%O+PSJP?Y=S0=>wI% zE8LJPQ5x4Ad;@~Ei3X>~H6dJ6oy+0uj>6kfr2mO`e_t!8x~86EAL49@Nm)-HIMmea zaJD;e;nPo_h!W47!MLSD3Cbja0}R$QZ>fYV_svdpP>hsG_w>Po|BnnFYG!ZuPjWxF z8|RDt!+As8-=@pMVnO;wnFVwfM7RFacPy23q8rQfI7GKg)(C+Jl744_GGU!46O)W| zpAu~ABdk_HqAuCo&VJf~W)F^mc`IKF#FInM%U~wWgsT}1DTT9)uktkx<2;sUIDRyc z*Ia>3Zn)ndhrUP?wi8-DFs4O;`_cSekmi zUi@6lf^b4q^?QaHpQ3!)7e0H0yH&?pQS&yxXm*TtUHmY%cjNE?bbhLM>U&ae+YUuN zPq@v7R$f8NckkoFIg_iU&PZO}QVx4xR-fTx$6w>5&DR~lTjgAOvA?G!Y+fB|4gGK+ zH;Jtn@I1V9+bpJjWF&I2_SQ-$lXaFMwuhEu5)n~4L78IogC3bm)d~@T@mvr6&;&F>T zTJ-eizEcPPq|}EOplp$#HpXcFD{G$K!bbK|TN;)}_;Fz*8ID?xFPuZMFFF$hBxnId zx{`c(A&=XU0VqrG;e(28XM5(opeOW+nV1Xww6qIHoKs-<6s27{?L1uF>{G=fjX?ikx?3=G;Y=p%8uvHR6@vDgXblFvuQ})N9ZjMz_CEwle?TI$~ z7^k&8Hba#X6>%Bd#G?{Qh)ENoOykb3(D+Z>e5Yi1p1L=g3a1{^ zK+Lj7PgeDHDvk6bi+Vy<@8mvLK>lu)8d|m-fx6nddl zsp4V2-ki9jT(lcYRM_EX7o7kOEF2ds=#aG~E?JmulAYi`gh z*@ad3ww~Y6$}0j+NoN(1Vw)lCjJ_fvU{4Ow&@iSYRCyxDT{&LjlX-}-MU~4YI_+%K z>-~i4JuduH!GG_B|4uugVA->zmf8zIM?$mLV&l@2En4}jUb?6StpFmLKM4Yplfz4k zB}t&2sO@Im3awP>#~^RPtW4zFgodOa&AAf#baobmJWbPLU)Ln#0U_Ini9g=vtB&Vk z`R{ysaBorTna*`~x_M=(D1w;gszs$BEt6~_b9sDZMv&o_2C<(oOMy=r z9c!l(&BxB$5tT}a!Y2CGZgnOafeB5+bN*jyPp`GW|Pbtwr*)htz>_Me1 zBlTs9`)+m&Fx;bY4?KJ^CI+E>H!JJOb-}XZjMioAjkccBYw32T$YEDk$HcO`vBo%e zK9NQn%a&No2oO#nLP^RWcRdX@S_Hjo02H1Nx6uZM=2Yg?kGWOc|BVSL!)IIYY`AtT zwY^jg{;ul8>oor{L5d^GSHIc%LNxiyrR=2Jcl+dvSx-znJo+&{c%#y;mkp4(mbPup z-QK4-xv_kSi36~usIB9|uwggMm&PKSspL5MUMG(g zGnw_hSeC0>l?&1ObT?QP z7DBIDq&_Kh(1{+>kJvwmFjEfZaNvv2Jm+eJS!Jcu6G(|SjHB*DluYS+ z*`8+x8osk*J8ylw(<(@JJrj9b_w10C-10<2XOvTXHjFiFJ?AM5VhAG8HQyQLrS$kLfWTtd~vVBYwoBvv0w}i zLzLuBCtGfD<)}`D>%C*6_&*cmAE*(E_UyYNnw(shw;HVG`V5wL(_{g4GL1|2ZQfv-fsy;X9P%j^RPgk3wfP%(gcDC((;$RawlE2+kLFw7!2 z*9NKOud)K=SuRaRz757!(l?6XECcYLyb_RdUtZ(&P=U)*562CYQ`Xy5%>3fY`w6tV z!U*%16K5?$vMVl1m`o+P4j()vMawaVw3>$btGOkWu037X!jm$Jk>tQ7!g{6|eRnV8 z8K@D>iVAvs%xER?Y+VC;zm<|*9ypW*_kw8ZL^vw%%WC`W{m+|6-4o50-wU^?q9CiV z_>l)kOJ%gw*p%U(?SRAhZhqO$)``Au0cHqw&Y5o%jO3p&(9nPmd0izl1^8W_N@2~n zy7?Aw{GNaCe;h^l2oTF2OY!zn8^*bEZDwG46S!DeFz#c(-C~IuP%_hYf97j^bzo;)dQzzVUtRGMuT!uCG6%OPFX%vk+?%ZoA z3UMG4%7|yP7gMZR(wbbIqqizEJfMtLsCB}@l$q{HP-#M8D1AvaKR@!M2<;xj&ryN& z!!~J0z5;sMF%A+apCfyvf@%^^ICn*bBQ1pj){D)3gh-o3H%pmudy9pOGJ5m579&Sw zq z9%B7cSAJxa8Z+kEHdTUR<<9KZ9P8t7=`u=#HA4NGR}sjPStu`ldajpd{n|^REQ|B^ z53tb;wOwowIcnONRg%?gu+IO*<#F+;_D5HRE_0&WG)^w{ZRaB_K8+`#3^%9JL&|Vz zt6}2CL-3(#CC_59p&pp`IvZHgkJU~n#Jj9%fP;hdwiI%v5!xq;2zey7ppo4R{8UyU zGC93){;S$+zNJz$`&T~QWUcqNZ_kki+z7_#PNJysl9bO-N403Sw@OiUWX;qu3b25c zE%t;e$jrT5G#k4YJuy2*SKlsnwDF2HF*l=3bghN6LyS=ZM|h@$?E`~`QCNlzJsO#= zGyKhY4igiT1>1Vqk#^lwnpM=kD`Z94l()s8tf$xZqiYBD<|YAN%O|9+=26@tQ#0;I zHV3%`V8b0n^czWSg!HEKLh9#aK)(FfhRI|G`SXS9@*7HS`?EINO(qlim3rw)yBizp z(l(3p=1z)MnjPz{N|M!;LPons3##il_v4z{G}X^LosFYEUQNz7Er0T#^4j zWBAL9e7r;!3E%gS(L|Ws`)+05@xp%B!6rg^)gD%vFV}v@x>1X6avjA;>>%3z19DQ0 zW!&G%1(L*ijZc|d(KJiLeC}9-xCk`efr+%GCcEk9zpv%yi9nCy5|U{rc=~D`Mt)aK zXDm86mVPw=M}m9LO(Dou!A#jP?Jz%y^ea#qS=oHtZLP`j-j->9@3N=#*n`M$lAVS| z6VRTezCCg;>{ufm{R%u7!V7%*dpV#^6fY0)wi&a8NZGY(UL%r8k}%cY54Mwwo_dp2 zabyo2PPUsQbv1|IUGlXIwO`H>6w0Mj0VzunR;O*k z!!*>bidI{2up>-V#K6EA4zAW4|kQAxC=z+u^EcM6j-ec#6lw(#0vwq$XO z(UNFy?|Gw;k=LkJ1{0SKE;X}{qm;i&YpB<(Eu)R*E@uxLZ-D<(1i+3;tX_>9ZK;N| zY-7D4EJC>u8WhO==FA^;Xta+`fM}`G{$`LbgQ;Ai9t=E_tuU0QNflCFSui4jj}wRU zoChUGV#U2vhq8~aq~Nxel!f9oi8+@iCEk;+>%ET7hab(|^Ls-YYV6j~Bw|0TNS&`- zOuR*wk6?%TUN4dLpN5rvjRo!G(>wBvk@`8lzA=XsI`-aFJ035)K^8a!2fOXSA4a1i zB4Qrf@BBr@#2^y!f(a23GEOfmoLKy_JR6y)o;%6Og$^A$Y%n>cS(R_%fqq~$tTSuF z%D0HzmdNzIICPhR^qKCQ4u99hse8*S>+w8&I!^;QW4Uz@qFOrAfkw0-IfOeh z(sD^cl`0gQk`zNz&B79wm|;t_{%3pB|8JdN5k@_qDo#w)c%OUR-j&LGHMsoD2M-Ny zNT~YlF0y^jEnwUfjSJF)F!EOLdkrHDev(eG$ZRv~3s0$U`$V;3-%6-RX)G?@;(W9s z0kF_1k2}unE>RN6C#oGB&(k{-j3gxprIhC&;u8IztiAlVvp1UM0K+9^=2O)oeM|hB zw)6gw+;Q`lKzxl&aCrV|Ozhb$Qar`&G_p}FkKrR{^tM3(9uGt|07*U`DR8mX_ zI_k+Kii!e4lB>DvX6B=p93yXL0uu3;yogCGh{AqSYuo$pk8_;@IYb6o)~gS>1?^BvROfqZTVSteP23jTv6I z*2>7p$T6~S^)C+b3)ADj=ptzY58_QkNueT3g7ZstG<*P#p0Cu;DH+2ai4N4VQV*(9 zLa!=0KOMt@KPmcTZx(Dp7UmL@H|?z!J!Cw&n>i#|qY4JMqvxa=^{?U+!3`X^W&5kz zhYAa1Y8HoRmnJ70xi1f}w4)MeB*w7Y_V>#tH#(lM-T;JC8VUv1(FKP2LI~@nio6@_ z*|hgJ8hRAuG}Ncs4c76i{3lg19&k$c{SkHJ+Lm4*)0xk1wIK8S)dBUygkp{HuH)^Z zV1}y-H9xrsr}YwpF4GZ3IG#5no2d_@i{(rllP(YMqy{~V{kge;L9Fx9^v9K()o@m~ z72(^nQv6v1NoBE}gJflsE`Pho{qyVvsBrP^HT7s>qu~}^E{3<$`y3QTFs9oT}OIN`=qb zY_-ol(l;){JTn>Xrd1*((GDC^kG^N5*Z7Vx1+Pu`wYN@Jo&Nt`~)&I>SOJen-qeeouqy$>Bv99N3ys9+bTN4^r zY+T#{t|>vb{u>gY0=JnOV?0Yz8{M)jOtnC6L(Or0{Kzp~C)Np~D#VS6^t|NpwwXW+t&*MCf zL(SJ&frVIv_XGFg02juPsipqXj2u>?VtHr2>CiikuLVO)nO^m;5t_;dBTNE?(i1g% zOz(cNz5sj2m~pbgxp|aHHJP+9*pJCEr0h4FDQfh$4wCB?SgiJ^(GM(vH)5z-`jb8x zdJl$(2vSXSKZn$%Cw2Vl=>X-#VHp@|5$eG*#otkG z#@kRsrAp;2yy7Hp9$R$=IUa|i`7r?)tBaXd7TX8Qkqo97p}U|i{we;i(6bR9!^CLo z;;?Ni*nyDKePB%g9&zT7KVa?HUpcO++PJv|-@IE{W`d|&@f#6lx~*Ibc-)DM^#?Ka z4l2|y(hxxQn5OwDZ{M1vKoMtN$301w2?bR1Zo>CBHSA6o%In-OX7c=e_9%}rA!mnk zHO;4#oDvj;9D>WJ(iRP{J2jm#zeMJ08f(fAhNFdwOA7*00!VT%Mf2PQ_*& ztkb5V#^-watxSWbnl6SMlorjlyKr#b%A9@=*#n%453ZIn&Bl2+TAGGi5gBOaJ@wA5 z1yvZI=5rvCZCgYhC;66dv?^@{^uV&91icNZl{Wi(XGTqu&X{YQLy4#Gv8enzJo%G-kjj$zs)a z_R$LqDcQ7W-ZGvj^Zhy{^E9#OPwbEM>YsAGw#)l)&zM@xNQBLk;Mm;PlD&gx_2R_%m2AB))O9~3s%gd{q(kDgv zc&%4WdglA{lp|I2x37c~_B`;>Oulx$KK-N``~0G^AQYd_;*>t-KhXa~=pOq;63!Lh zU)b0sqDG|-HXUlo4i?8bsf3PB)ao!@ZnWOZ4DQ6TB?%*k%9w(TuY08IlbL-OB(vLt zVvi8_!aE$Obc?WM_w5SvuBJotkwF^sl1Lduk`1}>t0ym-Gd2>N4}oZc^_#&0x0kHR z**PV@>cYbCI8is|?Q~ZR>3L%z1mLLI#d%_rB)jK0LvMb_s#I`#d_yS-dBjC-*B~zQ zL%IGtsz!8DhQ-4Gq*{I^v2xv9gKXn$r>h1_o(kpHGcm)7Kfk@s440?p=XppA zOCvMA&}nRyKUU^hcqoVD1eE;7l&6nh`}vAt@TB|#WH)(P_Vm2~asv8u)!0GS&dVm} z+r?*JK!R(aHA}6mcZhsW$Z@qvl}ey z%H5oe6{R2@KSdzrJ+q^(Xj@Ex&NL^UDXWs1oZf|^91>D2ekiGFyOLiJEc=r<-O3{& z2g*^A%#ekJjWG?TqN%mu{sQ@Ln#nG7&_8R}H>qDfC3bbVw<}($44Yf=TBWh(^OVK6 zp5_+W!@@!T9%8MLlAdnP%p_-?MH-dLoz!*hqm)6(cOZ+d8DsDjwnI{a<>9(Fug32N zr^$rgH!*4rHBCSUgORv_7-(R0u-3KF!1Yi_RFPb6;w(MNHYGJF*Rbc9tKiel6@@yi zMd%8LGhD?D=dfEA|>O4rs7EcafG5n`=MtilRr zf4+Z0bNQaCM7@j-(y3Ge`RK1JNjM+Qu*#F~79ej0wH1Xgf*_35hPV_Mby!eG**VDu9_^WY6aJz-zMW@`Mix zdi^B0M46f?(7;eRWQ``@<7K$&Dy_Fl+*Y}H|5vu->ApjB{W;ILUIo1p&?l?l0vpeU z-u)TNFT-~x;c%)b8c$^j`UHy57`dW+*1)(WM(ZJ=c`p#3Q}psl5ouVv^5(a41Ei&m3?5h#c9qmChMga)Qh)#SJ;wio>pwh< zMa-{C7?TNoo4jK+0TctQV8x}w7iHb$dz}?+jA}oZ&hK@IMw3~a-99roGc$AQ?@rld z5hYy@lGV71CePddd3tGbywJPEqQ5NeG`03TF$*&F{vd&8nU#rXOpR$uK1>Vw_0#8t z#JW;iR(bsXpwgt~16ByO`EM)8>!mhJ%i`Nv?baK;WQW`GYNI7IO68%Sx*)jB3Kg~4 z3DC_VGxRXV8cIMuNgDY~jZ;8Kqw{qx4Xv=E!Z|!qd%U>q(3)vC;1Sye#<4TaTRJ&K z@}-J}&4{?2MegqB3S$uo{j2g}Pe)__RC=Rh(K{&~*Oa{=M3eP3{tCqBgT;&V8WYxw zL*e1Q&l8w6utFFLl}G60IWPp<2y;~P&fj&CZaoRhgX1nd|C_!1-y3`pia%k*wygXf zm(#ZW9sT7W&7!U{xvde`Qr=oM`9N9)8llZv6|kqQ$e&o1t)Ch^OFE3`CW&A}sI({U zOY2?)nic;Ivp*qxDrUKkdxZTBr}{4Ur{B}!&t-KRhJtMj-2%LmSkkDq^xaBHUJ{n5 zD1G}pSE&fqt8O=NIshb8icF9mW2e+2N_z|c6*X!!zut~7eMI}P{P<(We-XKYt^WDM z&_B>1ie5eU_wb{`9p0ZlZ55ACqE;`fL?v-`os6z?~l2bf{>dFKwnK2k2~qwq;6}^<&#FKV;!=xoneS)cuc*^X*0`PA5(q zQv;?6sv$Y7fPdegT~?S80&*#yIw_+bq8a{LiPnFRAZuuGki!h; z?=RxN@LV(`KlUX?RlC8?7zbNF*Y(BW3?1yJ4pFR90AoTT2k;z7ymqu5|GQfY++*H* z^qY&@aOe4cgM}h2p~7~Zm*{E8NG{1Aeaeq-b!= znNXYYp(|7U@#E9d-$wrwyZDb+OfuvDK#>?!c=V8lg#YhPDjf5R*!7f}%!^~l z$A!NyfBow!{#|&ukSm`m;)j{)n`s;0jkNy673 zJ+8!OsXebs@WeADA3j_M=t)^)5)k|)E22*Gm|kKX&K*BIWhNuwqaPj-UQ*4w^&cqf z(*dHQAG3m_09C?gAtEJ2NkIt@(?tpWo%Xher|}%~!`vv6LBW$C8T`2ysm>^2GZAcR z@V8gV`2JB`CmTh8J5YX(NNGyZSh{qO`}`>&P^oGosUkl|Y(DRkBI zFQKQAZau_hSyu$~tgNxXtRIjUU;)aB(`}t)RR6t_;Ze>vtOHz6i%2B`nqaGW>5>Ab-qI(I}0thP+8%S^k;;|6Gy& z{aFA14vOggSHm;Ce)7&;UIy92d$Wh22?V;P) z;APJ+6G#g4Ib@J2zGbUA!Y3hOEL0?O+l%=c89UqH4kX2urbgykO{c$^6!dD@=g9bK zjb^~QhG6DgQq4yPhF**aCG{jS zl$s_{Y(eL%eQlFXx1FQ{dlm9P3yS!JCY0^;_@OYlGpOt+5nwn;(p9&~>a`ho0BLk&+)Yz0*X1=+(=u_1#3Xzz@6UGk{O=;+a4{H4yzzb*{Yci+jVum9Jh0JrkKBek$w303r>GS;IXb#h|D?X*pP|(-9Y!7(M}gTsgouw^k~xs7XLoZrn%xfDqXubV#LFiIxLjDRj`-39~%nEy6Uk_(M!P8M^O*Oo0a z(4uX)(1%s zzFp`-9-{MZ+Bdr^ikH>izjy#^byO*0eJcOVRM9vK7W2~t7<{FGPF5rTJjPFXG#dD&^$ItE!UuIQZ#$E*heq&q$y8k%&mSoP zW(m{j<1P&<3>!T98GRO6MqM=R($!FZ{br2u@I;O2?T#FxHlp%{lJ^RE3u{PR+`P=6 z13|s+8q)ImX;>6|n$Wvr)43{8u5NG0&gz+czE)cYpaIFFp<VKO>e4eYfN#(SeYf|@{%+p~q-AB)-0D{4rE36ht!c3pdkr#JXX9D+bx^(9s zJ5@d^e{r)^yHh8>#Cny+sT zF&zFV;hd7OUb@EmHelaoLYtm7YD#PPwi$xkllMWuWB78{bf90_jdLGAz1yjn(a3El zh(PPDV6-GKU7V#?JKBWz>1`)YELV<{V@>cmeM8UozmJJ{PJWMOdgGu1*L0dQr`|Xz zp4^zNGS$E^8~#0%=ejR>x!M&|T1#f3h_p@ZUqTELq?D;r_$z|t3VEd8haeYRY$#i( zbcHm1GO%FgE<<%uju<-dK}AQ+RiqFp2u$Jlti#fN#ja4!YT|;2KcR-kc;a+o*3~p^ zk$Ez_qiM@8-9|;NKs>>^EZ?mKCo?~s`{B+fy%B~5r5yIW2^sUVEUl4t&kHA)WZ|Vd2 z7Ce1W9M@;oQ-2ak^EKVGeRqp7orWR{6dIZA9*Cox^yA$3s)ICN|NgQGygX>LkR7qH zxw-r0+#(Lk?yDQ%VLQV!ilJfz%d@HQov1m)Q^5^6?sAjM(}{=}Myxd3G!N8=E^CoD zoxOtzF;T0X?ayV;c+}J?TyF8k8++`Yy;Cnwevs5ejn#ygd0N)H0}(^worF}%>8clDVAA=Q8FS z&lJcIoSmOrv78T_=(#(OY0KYGUq-{I?}tsnrfXfQp|9!Lkh+ zUH!eBz7Pre3hNvrI|beAc&zG_7}P`e=AatyCZVqBSJirl*?g0-2V&Yku%@p|WhN8( zd3EU*xXY6gR(#iA(nBUSOPQ7A`~)j9j$M}z_tTMi!jg=9;+S>bNugqq?wqx1%+<|% zZ2d}vjI|q2h#8pWR)~0BWHlasocknoGo5tpxqVTBIT||sX8C$lGuFWpS4@D-z0GfZ(f?}MG)_3&xwZBgy1KGEMhq`p+KC(d5rP?H-gbA- z#}=vd=uvf zlx7!MM+a5*`mQfaV^+AXzb7WmB6O!(!t#*jnZRh3h%?9uD>^MQKJ!TFN4iYKlH<(wIbU&&JvV^w91 zv3R~Xq<^)sVu)=uh(p~yaQoBmSgm8K>uJcfq+F*IVr$2yz>1j)N8dWmJvOC2flXq(TIG$$6N5x?suvYm06Z2ItJHR&^0 zIuJHs*?}95aI;T15#bSL%fn*`+&p!wB18y+;Q0vY6!r!nhlh1iaE0dWrx6(-f^smJ zX5P1j?WB!n={U0Pa{Q(ABi?i5g%w|pu7=2t4NllobL4EdOP&JaQM=d5> z^Q&UZMX}eYgchIZY{(XgGreC$r6x5z9@Vt5?KZ0EUI4w<7o}fwz|FP)5tj{LvM6dmE44MQ?de@-cqQ}Sf+w-GDM4!GTXDTIrP)L<|t6^Jc=1^&UclpH;NDc^m9X2LX{m9Jn%1hUS^mDgStmBHuc*`Abl1CT z0^c=+`m{Q)K6!NhDn2`7Suer1G~E)aS8-<@#jCxEHC zr&>k_@bvX^9NrLgxgTaO>eqa3_+T~(iXUynKpOiRCIf@*N4?Ox- zV9geOSHzCAiMnVsPiRynT7Ir6syq<%dV@4?3@3xqn|f9yBYTX(Z>ikczfF$lcX!0- zlBbOQKF=TddBWyf2YU^CpEmER%RBLV{;fe$v)xddM6QI}#7aG_ypuB)z$F@i@P(+w z07``~^T&8??XvdA6B2p+HAgk-S=b*U{j4$d+O&0q7>Hb*u0S&J8hAVxJz6)7tNG)s@eq z+YMk-B>d+rKh(p*m>nn=BnYQ*j*fg0T6*N0={g;6im5}#&War!!=l?f-ZkP9yj$%- zCQuLxv!Reib6fvG_C-d&6%C7HETF=7Ozmz_2}$Gqqi`kpE2l(f;z?NJA||L+>Co^v zNs2NNxdf|x6k|M$pik}^XMaPhjlogCa2jSay-R7f-aB&qJM;b(ho!GAZIvb{c{meg z^aWCW+uCT7X4L~&@sGUrv37803DQKi98ZV5yWCZAsUViM$trQRG$EzJ)N!s(X&r51 zT*Bz&@o6zW=D4o)oXgdz49D*P;<=a75R1@GABr+-qTMe~-|YOc{Lnbv==C+=|E!6`z^=jw7(25Z=E zH}m`>%-OtkZ@6?03CVx&h-|3ZB%OESSGjP=`NrLy907 zDnU&YvK267Rul1DFjl~{$$Mh`%NTvv-=Ax7Tls5__syERjDFd@;8Cwoq!G6Xq6Ab> zBl>+&)%0p~(qF3KRY*HI`yx+g^#sY!<7v%p!KL!T{1UFnyR6lqfS(YsLp~;EV$6!i zd(+N>lpi*TUBL1Jax2Rq`Mtgw9uCL#R~67=G0nnTDhPk1-EZtLiZ1}0mem{I+{&Cu z@%Pl$?s{G3F~r>);1)XKu3PCTZghSb8W)-HjCsOXCq9dL#t%0#Qt2=~DEdq;7 zs;npfL@q->JKv&+5WmcbIoa!jm7T2@uvFZqjlHMjcyDl!O@TMWkC7<+(j^1HtMeDX zNpxTWX>lEY(z?~sDBw6L^!Yq}z1Z(WN^f8Ai#f{`s_OQZT-pUktP~hgpjF%IpRBbhZ=;B`-Z2m`*@N+qcky7`L%e~h3L%(Z8+dJ zq4e~JI`XE5`5OVb^)Ty81mit@1AOmY9Bs# zGuOV}wV`~5|NONx^lC~wo_42sJLhb(X0>twS7N+ot2y=>6YC>Gm`${sx_KK-+1ad) zr{|0VxMCY3c$-?-q#d{YZih*YuB!nGD?7pYL}{n+$UnhY3L$K)<&DVhM_VDO3<+5< zq|f^^TBK1J?TcH_)-yZMRT9&D&J=xlWmkPYe7zfbjYCYeFdflua(h?VJqO_6D;50& zV=Il^|=n}Lab|lSEu2BHQlOkm%X=L*#l$6{xG6 zXNt^QPd|~9$1l>T&rsaEo~;s4O^|4aVY@W@S*fxsdv0)RK~fTjHFk&nOL~tAm&|^? zB|$%XneA1jECi0-mDYYQy-J3kg;Z;83@bL!qqv4s=4%~h@Xx)Gp1Ex>r1089=kr|) z5_Gt)*z6^(U?aS^UZF@>@O6w^bQwyw@YuF_{{gW&@@*2wp*y;OGO?ld>fiUYFr_DpKC-S9^^nDvWw<% zL#YHmS4+9Mwf9T#ZMsityGbD9b??t>yDWfj%;?$2_EGw&LiG@>bK5s7MJm)$Lx1es zScokIO~)x^sMlw+S&$xpv$C4B4cI}u6GPu=IR)hnHbJ$8cnZSV zvLUqATyxrJ0ys}Cg1Th!Fvg!lD1y6pR;^-n1GJGgKTLd=z|VBoWHTmTs5t6pX2P|} zOgHx~*X=FA45Y@}c>E%HlTdrW8)%h|p8VK~WFsz3u5l%9%;|Xpie6WaH2w)Ohr3$P z@|4L%+GvoAMrn@v`*-Yya}8&&3VNAr0%41m0H5jVxYLkLKCYmtMxeIT=-N)o$1M?k zGCibH{)jdRE(U|PdMDwhA5A^)iN&lRZ!wSV^Om0zykUN=e}CbmxambMJ5Xh9T&-TN zkW~aqi`D)b^BlP>U#Ylc1y#?fvR)73-eT_VF5_);9=RuqsVR zs?Se{>n3|O2p*DcJEg7Y-C39N;53Y_;}b-s>M)V(4|kRb8VV8bCB>q%%~TDt3PGkv z1^Tq+G}Dx|S(@U+L8j>FPSqux3;tsH@!uQ>mSoCDUnyuXq?J%S!6U6;N3vOc4Z7Da zJA{kv){(HLE};vG$lvcFyM@Z`2TnYpz+_c%tKXl-c8EMX57qdNIRNt=jutfr-ON4< z^)fi6nn8IYuuUDc7H}Uop|8*e4cdG!KPouOl&E*RmSyTp4!XxFm9gT-}xlbw%Tfv z2-$e{XGgEleh7ke^A@+T|5H@q^q6}hJ(<@u~yp%2-t zZ)?<2+}UvLUc^#ULbq={3U@hJ1O($Ut)LRU(d0BEDO_=-;6@cdz>B$l(YqCkoLyS@ zqR33V{|8%c-uIACmofz4VNysL+zb(V7V{fI!qg3bY5EhD|0*mOh#s4GTerptNsJKe?Pc*FP0EG-J0m-TS zGI^**o#@5XFxkm$Z^@wUR^f^gs0zSMNKBkqnjMTdgDTqj+H8_+HRBp#-z&hNK$rGu z4MlP#rzLY6(itZ(tNF>v9`=`cMo|K>BlP!hv^ZJehV-$TwGzb4<49JatNwatOOhAC zAN^XJ79?+P9!%uZovNqU(O?5Q?`p^~6tpRLBMNPhg5|&C5D7@aex=OdV-0rnn8}cJ zk4@36%dDSa9SNrQG?IIIWqjx0DTeHrhIOnDRWzv>#P4$_ij59D6L=1On)Eis^toQ8 z?|UrPA@V%u`EW2=Mq^U?p>wIh=y@Y#EMBdn&a;%dEK=W8;g=5PlnxZUw$qg2vP&`9 zM5O!_g?E>67d*WT0II>5b_8w2=GhM>T5uK40*hGTv^~T+bYJ2N5_epnukc!92NNJB<*8**3!< z?B@<1%Y37oylwS2~c??pROb_+NXt zg$uP{zWRNi{=^WYnq7_=wUTm1R$|J8jOe!lh<#XC50nTH4e zq$RP?j|o@8%_@DR;^0cgZCm>qh(dl$nQexdvn~7M8ECHzLyzhCvN!yc4z-L{6kMSk zDgn-!#;v6+Fv5mVs>^PNLw3+q<3fgekM zQ^MIN>;9;M_Li*ih2svf-6mL4>=z z8WH9fe~bayntG+T1hD6t`C~WmHriWP4U1@EpJG7YQ3=?~>6L_z8n8&%f6`0l=H})V z>Kc!xw*8FXre7VFmH!;6Qcc0HzlK0 zw1zY_&H{Sm$qTPG-`aCvjKjgLvUIe)L*#DDbP-9+0FU`t?mjVMUf*oyXr)6Ubl8&| z@kbQsSF2QdHS4Qd@X!7~{}hEsiDn)p%wmhYZphfKx=A^d%ay}60KDhej2Zm9zltD= z@ME|xpXh{3JAr2IXaKLQz*xf(X!+1aNDht_A&14wuSZ=_YA#Jmep<^s`wbUWl7sUg zvkhg?h@M+wsuOR9?I16?>A9rf@c!-^CVqMNFL%hiGhOn0VvyuHB|O-g3mJV}J4b_x zE-sdnb>@+2qcwKQwEa(Q#dry-RwL6gE|>5sUA=Y7&b2=LhB~bAxq^tc?A>EJ^w={u7YayvnV;wrbXLN#sjcD-JWh_ zP2Md)ALMnbzU<>72|VW}Na8UdBpD?VKbR0W3`C$_p=&Q{hi6@3lk(9$)1Hy+?RMY4 zkDZ=q)q!&hnsw{y{<-10JSt#UUDswP7hAGUgfb~2(}2lD2Hk!w5*iI%Zl zt9$#_dXuT$mq#6GXupnxLpM!0Kn6>arc0yAXJHjNtrUIDO%TtVFYlkD+$Kz0dI*j8 zc2Mt9Jra?=&!&#T)U-5Dsb^lrp*;KmCq+gCZE{{|Y8{;pFYJf1G02F#h|I#0o=I>o;Ff&dW|b@#ED;ADb=SdDDGDew_2y_ekmJCudvCGAky%K_ zd>JTamzCUt}&>LUR{*9upzp=JMIpe7`C7aGv0*zDFf$ zXR*v`;3c;Hw0I(`fpOx+VU&!0Y9_>V=Qv@aLdhP|kCotSeM?-HcvNFK2Ej?vospG) zx4^()X&{_mkT=NB;hcnuc2?7mBAV+=7Cat4qAT$3A>xVHa@0xRdnlKGrEM8zm^k}%EnoWv# zT+i8vh!t6ZCrMjvsLBtj5WM)JuL|0R9k1w^#hMX&_VL0F>*#Y}$0ylEd*k+%j?G9g zuZ!G{%z(!+)M)1eK)ZwbfO^hefQACh@RlsRy=zyzE}IdM!tRP_1{Ib$h9`IPnwFU3rn@+J$ozmm412isqSn`%9eb-%@UsPYD`MnSDGKp8zIrrV zrvdT)Mx_DVb32iHC`v9b%dCdoAcu+x7w!>lFz~N_1Zc9gUZq}mK6J(?8h9mlNc&+Q zFBAj)h3GPJgv_7++d61dW9V~T7tkS!0s>OBo+ z$jB`xo$>HaOgpKW`NhqrK-eL4mGky1a~P>tZ$Ou9jU7VP3YHz=NQx7|8;|YgI(p7< z)z81nU2oY&MKe|`9MMn*UMntlJrsf6q}A7d)-NRTHb&}cBdowaf2Z#$u~$3IC*lSN zPT^p*zPY0Mkj$@kx}>z;(l@o~vt7PyH|>ZnNwAQ~ELCq7&y&D}ij!(~pT=~zXlI>_ z87&q)dI!F!S-0Qo4i&kSwPoV#EN^ zPwNM}KZ?-yNEW{)T*24w8@u%QFbA=QO>UQ_>wBz|7f`GKt~ln#Ht0#PBD&@CB9WQ# zdc6SNj;}u9uB%Ir+=AYDd3$$npk?XTtx9kj|8M5mJ2+7MI|6_$nwYp!- z^N=$IMI64S2P=__Sd?s4*rO}$*EX-f{JpN$PsoHs~#v z)z%_=T^c`0H=OsZ)V^yXwtScN#o|$Um{)!C9X5VhHgTOT!{ym=0vZ_CB-%ur^p2=~ z<>gngn1LV%#36Io{Py%Xdg%C-+k-jZ>J9ZHrk}-`O5e9Le8hDjcEvME=7d?`@(`1W zSU}*dIHzXgd{b-EyrZ*chN>p0b2`?Wu6|kBN&I*;az)!JEvd$0giW*L>k6OyA5l}{ zCiel=?KjaNCNl{KhaI-ag@XpE4EHtS^%B78yCRVxZiOqckeBUvcs5GIk9j<#o^#Zy z=&bm{m_4^XZ0OI{qk;NoUdwMb{D5L-Nc)`O2GV2p%Um~3<>T|?W2TB`+oOYY(#1=`V(3!BXt+;_FZ7&rZP8cN@A3mX!8W}>5NFzawyra110Uxd8;=&^6)=ccWz zYsJ_UglMl-T8VSiT7F}oo{c%zaNsppdA1iRXXGKlcXJzVpI^ba@~4XnTpI2o$AGcf z;U@PY1ukrT1BuNAyZdQ3N}|TRTxWu-_SxSfBtD=Ab*S7N_?qp+-0#ul4lF6DxLE^pWcpy>;MqW0B?HZeS}d0Fq+wH8ZUf>%GHTA zS|7_D@_ADJ%Gk{tWN$E9%#M5Vu4w-)!x#v0+QJVruVLgjo!&nKUCA52?$CQHR63*0 z5d9{Vr!2u&;=>@3#(cb#@)?;%Yz)lC=^WXQn}7i?@HyKeGhc&!67v-CU?7+8FJ%xK z2CVD?nc_B%=4E>?NHS95lx5zB*ljfxXeE#}Nz9nHKEGp1NFMs&s#)NT7`S$t+teb? z7tp^jsRW%|fugH@Ak#EM?5V(7X0FKE7d;4d-qY7=o<2GouF#}05ii$$=pZA05z@%V z$9_G;V7}P4z)%XTW)*JH>Br+}uk4OjpP7QhV>}&xo}<&gv(?o)Ue+zY<$WGizj*t_ z12yYS^4)-!IcnM;*3dP(vC;DUwTK)X$>^HOiKwfSp`&x;gE}tr@RKq-Qe5{-E!Q8xQV`Rl}W7ZgT!qesR9X=BI-_n6j-} z-+NQr4QPJgHt%6fQfS{CE<@+!4;+QYCOPcuDVK|H$1iN`HE2NJbp0#`ut1pBI$y3& z^g1ESn{#J~hm_|8F%nq)!O%CqSJwAuw`owSx+P}w@zerWd4=#4S8KDa-^MR^Gy3M$ zdh)&E^;`Fsjh}D0*u76tIK&>USuP8}Uyw@s{e*ARR8VqG0S&C7_Qy1S6#_8SI2tOC zD>otiVH5U?$+4S-BbMKrSVXJ5V!lC!8zUTybJh`$2dX;%F=PmbA3nk&>9Bo9JtgvnL$%EjL7YY&yr=6Tr*RHN{rv@ zS$O_b{AB*((p3$!cJM3KWV)ofJ4PmTgO1mm0zC7HQQa?XjB|;1&Y1U zPBXVC5I`+wibWe0``p+};jS)x{V|qIb4Xb3-31PX1L)Ej!{Rq}VgZ+^h6cz#Ki(R@ zWRI^}&7S_J=^DWsI_gP#Sr?eo*fglsY>M#1cO)$r!e^E3LO5E5${!mD;C$_m=uEJi z0AqRnfM3Ma#adc23Eog5Hq3kBQ@0?t3F6a zr%OSmK=)!fJVd+W$LSg)koh-lojcnuG~SQC*DF7dE!>`aLD^!TlWR&t$FtC2TOohH zS;|a7=YNd%r5;Nj&+BeL#7k%&=n7*|6Y;Kze1Tp;Hz(-M+AB5sp7_&268~Jb4J=|a z-EB-UsPm4HNz;n#ry9HIyyfwF6XJLtZZFHDp}g#Tp3d*VsO))0%sfyS8>NcZD+AcC z#Gi|#y`C%>c}|Z?>9wcSn}cun8Bn=rY`dFYItDr{Hbm}yWR`ulLBu(l=`SGrYs#^# zcW0z1Zxge3v8lOPH7&-{7?1chk#lLaO_uK$jAV2_gkVjsYxVcGqJJc=&mRgB8?4F`D&-ED?KK+>Zq3Y_-L7DGBIWdVe z&i!((RMga$-q6;x{mXCW=({CYJ;aqbEQ=Y&W$&#Rc9W@M9=hd4J)XXzu&mbY*T{dq z8)>EOq?s=&raV2UJVogUi*=6i`YcJ-2!&K1Zmr80#SYOoV`U|lQ88B`Tf%_ z_vIFZ()6ZYC+uXm;;vlKn<-^RkJ6fFEPTuOycr$wr;p=sr|A-YYn`N}QF!8dpFUKl zdDg0Ku*{?3)B3}2g!*ISK-<<cMK>HNGAhIG7a`=Ew~<}rC8~$> z1QWYo$LnN{ZBk;MgR)Q>cZVQ+m(?uaiwJQJOvaSKQx<2bqHbiyWo z@K5dc#1z{472gWMLX1I?`%9{I1+#@Z^5f=fxw8B66N{2+LJXB|cEkoNOtu-2i69p7 zEoersg_P~o+j=k>c22X$Q6@=Sze3i_!a z95;df*)P`>e0sH`QoVYxZ`+%s@LZ2;@^jq*GPxnA$@waqbBbj5Y3$wBSE@+JNHhlJ z5(YHvIo%3}r^GcvO+%2;ON(WsT)fu3iC@w0ye13BRLMJ;{h9RLS-+S}>uil0yt?SX zX6@8ypt$!O)$hRVlUQI`&YseebSZ1Ar!)AU=Z7>+^FM=#Tr4K8GgE13_; z8Pk~C1~a>{iN{c$@AV8CP=K%5IcoIXh-r56cnsOtKBy@c_S@`+!;(a+&|z&nEAFk{ zcb8f?_07DvV87M@D@r!eC32~>>8+8}L80@dLlzqffcoPg@#^P@@WHXz$HV2@t$8TUe!j-g7Q4+18KtGCFxGoI6ghhFqd;E0peD7H6v>;Ihm^*e*MDv8HTus6 z)&IaMZNNkSsE@Kv9iJKVNI!s5&_VhkOClDyBhg!aWjt`zCvvN?Rg(Xy)e}+Da_s2n zczm&v@w{O?A6V_;V;-?c4)qOv_dMPh+we#8r9Sj~_uj7Qi2rh}G+B6Lo1Ct>3^1Ex z5Urw^;?{jl$H@tHN{wjCukZgv z!agN?@q)T56#M&8Qp|S-HDN^J;x-$-I17^sy^z6rn7FYlrb_0^WPtgX6@uRnaKBU> z!|jZ02nYNaBGBSNMQphhQ zc3~A-TNh9tw6wU?k0Y5rXZbhhB1%JmDF8~x4<>5mfL_6gUhPnmC7nz3_YA&Z*@?mU zKIaFjTo(T``u?$m_;sl_jNCw4g_k9^{~wr}4p4$GsruR>CJ*=VuiNZjLggUFC~1G+ zGZK(lv*<8s``}<%1Mk_s-}LS;tN*-IJPcJ5PG>w{@=Eenmw_uYnbHdyqhs_G)%4KM z6k|c?9c7T-I6(2+rTlh_^G+ZhpI`j9WVrS*$yZroadka+%+3Ge1)F<-M;Wy8eYd(3 zuk|hE#xombnKOV(L&YzWzTcf6{);5+ur%_wqYR9=Kgox7O4IjTWdmib55N8r=|x`f zCxwPH^`M!c(r*6$4Np+r4GC*PbEyIk{uhJuU(fviGrRurN3L(8+MgQU%#=_+e*C!3 znv#%^g|{zN5|DxdSOoQN1V22i7UoZXiq~S{AN=3h@Ly-fe>n!IZD0H8i9tOO)k0s1 zfuRAS|3lbUKt-I^f1ksDyc743=U;8GSUNj9M@xRn^6rEne|`&UiHVLjVMi}SkzICs11gsV zPR=?6BMw&TIh^3P>9JjyBicaW_ z{M}x9;o}wK<;LtUmlo9j{d4!Bi`kIsNkHdIfZYcF`r&rWL!A%IlVyg?ZTJze=gJN# z{&34o1=APm3`!LJ@snF33P58G1$v;IY^{(@jw*~+R#w){<$~`0@cD09uaE``_C;e8 zLw-ka{#L;IuL{d{iXh^R)`am`OL+wIfT2P(yGSGxt?OpB?ssx~d&HP#L75Auf_UoV zw?+FhXBywqQjw2@9%=g?(6d*%?u=g673OZcPn}ncioE^JC-+M+kjuU0`{L7ogNA<- zfPYCyP}uE2&y{zT4B=->DF9v{ayYeb_-}GMozW;|6w5lAeASmJ%fSD`#{Cbh*Nqh06y+0 zAv_~|`}S?!owV z)@&d-z-{dr{8{J!H^ZwJWcMzcgpiO3Or}hJcDQ&aLi1=^W-<=D;@McldbL#eTg;_aKFU_NfaDcAcPCoq=S z5+S+YeJV_WwnGQ9@4Ek3nEWrX3B+|Ew*l${_i4nQQ9wW)FMPXLCU2ja9T2Pmsis8t zbZ-;x11$xr?2*h=Vs0gLk+HzxUK6y;DKTh#elt8w`4okOD^8_Ba`pSdAMx<011_B9 z@hSneAKyUz694i|qdd*-HoGdSUeH z=kwheOgRmQhP>Qm6DEeMq`$)l8*{GkP;&pRvVRL7xPIhz9jJcz^O*hh@5KUoL@q?O z@(PI=+;rm|6y$Rt{qvxWTyW2x=>FigxmtAW5y3%ymz)F7$0sH^d8sndM&(m#9Mjvk zcywdGe&=k}-!7n_$SUhqol%#rJ#iceGs}B!SO`_5{e1!F5lH)RwZ{v!b%%q}a@U0bP zT;kv0{v@n}I*0DATN2Fr*(>*vdPHVNtX2WmWyJ#HAOEl=^oasH_wz@3 z(CNI(L-K$BwByMw1na$%>ts^2^k}xU%2D!FEc#K%7GVDLKoCgmB8eZaaC!eOEXIHs zW_zyn2*ls;4DM|7BJAs%JO!tKz`B;_R0UsZsaJ1i#uDFTkA0UR_MTLdbIqqR21a9= zDWF(mGWHIKt(PPI`USo0zb#nQ4T2u*kfZIs!~-`P>T}lEa)HQ~%W<2I&8k!;&5q z94xkyLpLRjvzM!Ttibk|J!-!{%Ptewv+VJHoGe~vjHx>7x`4?u>9O1Z%es`_LQbHW z&!B>M`E^_7N}sN*I@aukX1!pM*MVQASRs1{h-JLzW;T_$%mtnf1_jR_l**>+yZ#eZ z>;C}$!Womg0(}lYIZu9YEqKF~T6r6|UB3Lt|u`1~09ik5h)x5VV{;NU3|Fzfc zcv1_&+V8&b@@;ukS?1)W=q23SdfGHBVT#i?JS$e)++VIA_GFy$K<*q8W)MQL{8`26 z=l9~%$NBh7R|t86CN8@W{LgIu;TJAk9LkhFqpw77lV^9?rMVj{-_-!^nrF$8bz4!g z;qk{?*c+szpW54{dU|`klCAyyQ*!e2feOlKh(Rf*+j&| z1W{fC9Xa~vWQCu|9&0QziR|~MJQV(~=4o}1hPqw1vi!Y90pr^iuYXvEiFY!s)82c* zU=!M%m_-e$F1S(la_i@sQTk;cwE^H=j2%I0#16rt@o^J@yyW>RANQv2{kP(ClrIszI2pETQFA?R8%;$Hs>#}lvKQy zQQO?{=r&W1NU`0-JCq489KW9_d@_jv8Zi^b<~g^Xr1>8&c3bVbpn?*!h+zT=oBP^2 zH{*40;N#y>P_}`+r&{+-YuajJVeUPC`lQN7t;_+D%IFsv%eubKgx|*`o_*QLFsbsf zvB}A1Y}44!J!_bv@>hN7A7nZeWk%^tku9$MIr7--Q&}ZN-%J_%#%>ncgwi6j116uM z@$mPfUOzgEn&zudlJ{`pOy97}@Plo9n0SQLzr&ak$2QqzERvbObpkzyxqMu)c|G<& z_qVGkN7(GwJ-lm}83VQIg%KNjIaD@jvJ1pW!-7aojkeno==-*CVU3o!Qp`Ro@!{fAGTpNqEwA@YWFIrA z(okO5~J2q|wvc`WOn5adg>*+qN|_t>9s&caM&n%vM8sifgPpRVQq8kd8x; zmtBjQwig9O0*oSTPTfyA16&&%1Z>N%UvF%;o&ES?CJ?-_NXvg=I?r}u7Gsb$X0j#c zIZ@?J(MWMq6b?p;*g+M%w+wchnrBL9xRTLDz=)_%XgHX3is-MM_*PUq5PYEH*QA7{ zM;bSJXquU2G&KNC(ARVI=7z1)AsTf~hq$PLLUF&ue*eAB_qlzl!?Z5?xMWgSuKxWc z0Ph( zi=vw+5)P8ieWgQEN`Yve>6$!FjYW5BF^yAw7bO|3$i4&dJ(n_3CWNIt1bdP|EmQ%G zzN+E(O!Har!+!OqhHF`wSKl);7)g16FaxyuXcWJZY1{O8!h=SZQx~2Y@H;OXy6?O) z9UqCZlZV!Xy(U1PC$=4*9qeDn%wiwp%i7Gw-lHTs5L5_rk7uf9>Q;E`F#X_-BXk#6 z<~46@Tr>z&7X_Zp6fve|&2rKq4>ZaETf<#34m>plnz<%AGv-c?Jmc}#HjWxgRFkc! zTNYg;TO7x?5bD(zI}ykl>5c6dH9DRmf;>-0Ur@e5RXc@rtS8Xt(VP@(+s z(*{f~3Re4Qj%b}DRa^2Krx$9wx;xB}i^fPL9ZJ8E>|A5MCG{5fWGx#iF#17HZ4?Bb z)S3A8{UW;NyyCn^OF2=p_zi>z^jU_+Y~be21muLw%?mosru7Eft zuL~e@9hv~GAYX2LFRk`rp)i`2`4wpC0RN}q}H}OC5M0p-vqYhYP+FseJoiZw{1K%^giF1P?@X8-w zI`6Uu>=)K^#xME`w7-~LbVJtI{!9k(`6U?C*rY+mCMrE6^Ttc`JNh|J<-68DzrH@; zpUH2eb$D)8<}jI<__|1^7+lF^g|WpXH%$x6Y`Ii3v2h4IvJXpQ@tFN_uf<{cW^K4J z9J5*ZJ$5~s?qG|eYzTU6K{wImdClr|4rxoPdU_J!CHH)o2Z5~%e=GgACC!|lQdyU-$d@RKr~mA9(M#CMVEym?iA zAmxedN*ZXo))iZkc?v###Cc)WiWS8=dJ1$T`SoQ~UtcZmNw*y@xJ4a+6^4n~2FIU^ zsd2aMmt^%P=-DOtq?x|L+5%D1`{z9FuC_y>Zq1tXqF<|JZ_*@>Qfx z(VLAK>$Kr7J|N@tG~Y9H7>~Om(rt~2(l)H2l~Y4QW4l!X@GIetrVavI_T^{td*Gqr z(a!3Fkz=^Wscru0$LioeZ~YBzxuE7-0CSppzXZ2&qtU_o1pi)eK*3y#8uzmP{JRJI z|J@4!PY>DP4krl2-dd>HX!4>Rtgk`OEfm1dP>dI`RKIl{>v&5 zsyXW|uIwk3^V!d8O{p!kNHaI(nBa$Q_zK1?s~uC-2Yb#eh2PvEg8esfei~&!ngq(O zYvrl-!CBIkkJB4d-SX1>uvYUs)%g1p(}E_vK?6?_mce<2Y=zYBCpM6MtRa`sYUy;5 z6=ALO)Fhs?A)j;JJls*Q`V(xu^Fpjt6n$KrCeX1XCojLzj;o*SeUc@BlH2GEM!*=RMgfTv@QmZ&kyO6!Q)Pm?9a`G94%7P3(`=?I@&ZKTWvqCC1%gK0H;3+G zg|W=!rDhCCBBq9^PkGy6iZt=Tgrn_jK_+u@hVw;03%~Om4#ZmSiOpD1Tvyvg-syq( z*u)~96x^vcybJ`DzCa0qr3)#4LJkV5kxi`%H$BZ=o?yDg>^5L#}x-m4DZRU2S+EYbtag2C#3-QBBgSszO`Hpo7AcN zHi;CEU*y<%v@7N?b$!hFT~qfS z_r0x;>E7FBZ$mOPh$4{l^yYs{&aHDseoTT4 z?Yn_*8lJr=($sVCB5ub0tm=ogpK+g)f7-)$b5D6?*@3SL*gLTvAZz<4uZw&J2kJT` zflhzJ=W(%)PD2i%D7!D)LQLJv#h(jRP+q7qVwEV3s?R!-PrdJLSVv5jwoX#lI!(HkLNRCjvK7as&5*63f~sOEk<-d;Vi4_n6J>4Y3%A*Tb8ii zh{X%SCthc3_mA!XtAl5p5K&m5uVvkmbkk|=tl5B^Q+>H z<}lyj2V36Nv!_K9_B9SCig2g4=etYM+|AQOqnkgIoxvwn$JwMZ_G{7R5Xa@BBSKbz zW1H8FEc}k&Rf$PSwN?gc_xFa{Y$lH+Phl9>L!(ljsZB&}HI6$(V`ZRn$^SsN#E4VW zs3KLqUZVrQ_57rC+N_AvgtE!0t-@mEC1f#oYLodN!6Fu?Qz@Z@fv>RV{Y_$0Jh^yL1}!rOFDK^! z5x3zygy1WU+2hSQ5l;s+Nx5-?pb`|-NX*56$b^UvnAU%rP!god!>H)y!>1QL9V_<) zt-0U74~2Ryr-k|)ZHvzovoZ0bPY$i0RZ&d}jQfvt#(RMiO^~XQbf|-qeEMafHhcJ> zQM;<#JO7vRyH<#``ULQ97~ ziVPXxZ5$_BvRNUCcrR=d^7ip*+E#;C{cHvMEs+ZQVmRBp-%H>6qPJha1$<1`mx>zG zn<;seW4WH30Bc-y)N@pxOk3?ZQqT7dBm~zvQ#jyc38O_ZCp~QXB14v+mi0Cd(lIt| zT>-aBhs~f{5o{(G4u)PeQw!~nX`d4eAw9z6u{xu1N$V>(pL{rew5@CyGB;iWguor0 z+-h9hzn}HG$W`a8ttITV1QG=hF87O1;gum@=|<~^oMb+Qm~Vg zx_}O6r!+Ou^1Rfg>VlV`4Mzpx+td9fl1x8cLYis8hcfZ(%YAHfyU9KK{U$G$NNnd@ zuQ`1)MF)Uf>b+~fqPe^B@3aV7=czp%8pfC1dI@@V=>`-<&>THY`!EtbIOGi7 zUS!xZ%6uCK!5mV02@O3#O8$oO&cOz%1cM1n>ZD8)tzEhyW9Vj%&43AbnYWatz(C`w zNFjESC>Ix0vQ@&gn8G}Qh%(mqGoVEPmOEgExf@8MLAcA1 zsR_u@>{``c+lf~^vdzDf{Yc{bm%W2QQTo%6x3wK(UTdTJeutYAa}bQbW6C>Ve~+4c zL46UI-Ad*Wmv8z*`w4H- z>PJsUs<8WxJT*lpfnr{!b0;G1^O&F?u%f%*YRiGyR9ReE_jJF9a7P}1e0cwmY4Y)@ z00~}?`y;TyXok&5M8XiYz`ga#nat~i>ERXrhaa1`&^UeZ<>L^jpA5IyMUBLXO$tyo zl;=Z8QE2`epZb-qso+IbDye%%p^-#Hr%1@zK)QX zbwLk|pM>k4og-l6JiSWutHhwT$f@Z7Kh@t&5I&8Zg_^a|FduEXqF3rS^h}|B=M8m9 zxq>DalnKHOZk=`@%grX3AoS_SMix5@>=)U_t&e^lDATgZyB1tJf!#~N83KNL4J)P+ zMM&RcbmYc+s6>e>Epj906RbXP6+?eEb@l=9iD)jeLg|{jYiS@12wK0NwUzRow5rek z6ukr9V-F5AZILS_qzBc$X(^egXgG9~6nkVwpvI|jVBwki>)ZOxV$Fg62CDo=pag3> z#VX#>F(nUfmaHmbfLtJyl}~JT7WyU`G){8epO1<6IOAb32^-UZ8WmL%xQCtmvjz?B z!K?mX^>k3Y>)?h4wJn!6d7Yv}YigKNaF%{Aue1s{Ndj>q!r@l=R9$ju#X6W9`xJj{ zkjgIPcAhp@ki||lFGEh)=%*#6yMthU0q05WiGc)hpk}-T)fAp~)ZiPjuBfWW?%HK# z>-AVnOgTDQM+JHa^GQ<@@OBG0qO!qqMo|R_a&oTt_wav6KA-W)3MCpur_F@^O=+{J zF1+y9_4vM>#=!^#edM{-*RmM}-T_w##S0beVFeV0f$fOU>ec@1{Z@5}i?>nt-E7$u zRaD1=j1338dX{_o=LBgiKPZ(JRRrR`#ec;9NJs@60L8vO-JBAF9rQnj=33#hvxzHT z*ZiUqHEAhG=U5rkE<=8gNI>;zUaHwx6tt7%9sAfBRbAz|cH|vviYAq(87ViWugoJn zY5Gx!+*h4o>G*?~s3_K^`^7Q3?Ub)bWax!-6EbN3DdIOvsC^n(00|aIOuR$C5$|>( z6IpdlVESG<#bSK6L)KQQKZF%~M1yaNm4&I2EGEq<9N_Cq>Pw1I!^C~BB8x{<)VD%t_n?FC!yJAc|mm;R> z51?YFffCdgsvq~JFT?Itgt5p#8(rPJv$MXfEo&%#$>m~p%eG@b)JSr${c#Cm;{%D^ z>THn)hILL8VRwt&jJ!6dUDpE?od;3{=eOE`zAK(nsc?2K2@{~wd#VT3w1$G4>Q#eP zP&)u6T~;W4tLes6=@e*4tQ@#&Y}q@sw;Qhr5;&_q)D4H8&XoT4ihr<> zMa>WHR&a9)D?Bj2r5F~;Neb^1e+UdSm&*()he(h6vxF%swc~Rz7HN+JpAhaD@p6hA zKN)%A1O<4R%TD^@xNJn0!|dadk_yqS7!&%BP{Rac5ycDtGh0ve5xv0sHvtW-Rhf-y7*CHzP~RhS{Sn z!+g`3(ZJ#b-l|B-JhaX}r?Q3WYW~seW91C0e}n5?_=|nxOJdU#`7RI8nRmPYWn?dx@luemcoBNtH6dbR`{Q=6H|M{%SBQb zsxd3?8}Dy7ISF#xt|W}{yf#}C}4qs!ANo~nlk1U6drp&KuP zCYWZ)(?f=9#CZVIUg**E6g~$qf2jbRpCSw})6nU^!}Gty902MkpbKmGw((GrTK%x*6$`u^sPg>&U^5r+dz8(h?1dDl@|WCvJw6%qg)Tr(CQC1 zE!4og_faY}5I4jg^zXr(mmdMqJ0-`HgXirY6YHj^L({U>XF!G^jL$X(H(!_gerIR6Qai)0AE7*|A!Suc!P2k_Kgyu?`HOMZLER zLh0)D4pI0Vjv4NgCG;L>2o6Ag z4obpI35bZawY6gpw^7OS=J27q=;&yT1y>SazkX5(Engk!9Z*1DE<_lMlvK@kaV_6B ze}m&VtpyoiMEQIza=?Nyx8KjQ+IsII`Fm4^algEV6$MP|7f~Ukb%G7pr?rfEU$^92 z%5q3}zCL6*&q*biXM5nO^l|UwMAutN61{Shmhl=}a>3@lO1lKFza80(-J8>k7I24| zq6YVs^}Qh$9g`crAAvE&#$2Jw%EcY9x>Q%Y>@HcnGL8&2p0zX+JTGnN8cQ~-O)<0* z&5Ta4Nm~DilUm{2O{e8(|ugS^^fpmBXMQU`-2)6A^liBSr+br zNxL3hZk~=G_`W!N+o&T&uZOjO6#R#b1>L?`V%(%kE9#-1>g!qKHGsICW?Z$ts8h_b zqpsYenk4{?MExqRTi%>@Ajt^cRaeliorFG}HhZIC7n(E<1)Ob2b#;KeV$W*IVDyB9^LVvDGG zpA6c!8T@(w@25N?2WMldJmFj{_;;Mx#UJ;^J34wj1KG%M*uOI`oQ`b{6_8gtnKuxV@E8vlmz9&V^hdrM{@>j{mpeFz z?l&+#GHK=&AG0`Be^2`xhm;gwIf38r)1au|4i2q*cW*u98!_+a0kWn+|9IijWuJ-ew5xseBo3<|<#jHZ#0&Z(J z-_|c}JUb^Oal{1(#uQ~Hp3!iu@e?|Jcn$0%7w)EY-+w84)dH#PU)4qxW5m(+I7x@} zEYLlt@PXrgo(=O`nljfU9Zk}6byLspXif}m&9^Wr847~g7{|25+&+r;l)@5x^S0(S z=j&87{&v8DB~?_&utJj9p<1ig`E5!*ub7tHRid%5m-qN2S*} zqe`6f{PSwYwM|nuIaq3KNFoeg;{^FZ%z^#O$E{ugr6<3TGxC-lOlKAUE<&YyqZ{w++s(Nd^k zy4}Qu+yR^c39&N3XT+D^(yVSx_cdB5m(ns{xEHAk_%&a*w)2(H`!?;*_lA-RV{PaB z!)^QUwd~&<@Ra|_ND3Ogy*hiYojPZo5r0u5#nz)>aIvA7D!R)%laa@HH7JPMYv6W422$L8*m@F?M> zxnM-7P3W}7`|O%B*%-cD%Tx8pwKz0Ad-Z%C1d-{Y6_bl*v06R9uw4Bl2-0p#NGsCxSJX`bVsQV68h2sUSTntJBMjGOwK(&raR9UQvy;M&C| zT9wx=Vx!F^hJ`g>T4W8xK2Iug3K_0lvEr@^*bi!%@m_0hZ3!!KlF;Jq*pkx@=QXW6 z8q#1HtMTOVO)rw2@L0LK0wV-N6PBnQ0QKB?OK5k*wR4S3{5M3fhT;gE+EQR5V~qQu z-Hn}xkNQfi6TOqEfCd=@FR$hH*y@J~CS?(TZAS)8u@6E)(#GoJd@nZ;KO^sGRsRJu z{{iqq+c-+g7X?7`w~gjo3B_`yh;_+TFt_FCj>;POvBQeUY4R1qJsp zTOp_f7gnxr?5j^c)Mm@NH;ZqZ2dDAJ!eCkBv9b3(zbpG48q~$^!-T0{0vfWn<;v9& zw~HHVmllOO7B7Z3+gNHGk0;Wj8|;m*7q{-Ys})Z>)Nc7UFX@|25rJDCjGL4T!=weW zwzT`wO;u!UdK2X7_-ia+ZI}z2mu9T#i2UOz(JTL>kF)L{L>WeYx(PN{jS^r48 zDSrCzi7_d$CGG8I@U(L#R-tL-ILp@awBj7*d{&-mf1%jY>BhQgVMQ&?nAzqWEO>Qx zlZln}+kMel6NU1m7TU>MQWowL&t@M8TCaXy zwGp%|8=x34dDxFWyKX^JZ_LyJKJ z$!PPp?M(rk+DqY4hytS8zKLS!+yn`o-`sxOe)ep*_`RlU4k05BMZ zNmS@@1eoOTb3k3Y4d5F&lUt&;dtC?XGsSJfEYhHS6*tJ5;IY4KqN%cgsqZBqDm|i#oHE(S9i-QxJ-siLV!pA*7r0+4GZSqe<6>Mg&a* z@)j1zQYWo5CUrj45nEtq_JUL(1_1_TLt&1KaPR z=wl}jY$RxQCly)8`rv^HL>Y;MJ4_C48HPhtfIe&hg1#mHB_lKGU-RW4jO4+kVBZH_ zy$EEm%FCy#KHRlu4HvFC7DFEW+1M>$gRf)a<=p4cB%yj98l4r%4P#J zKlo)1m1PW3fF9c zWxT{E!D_-SkpWNIfL`GjnJe~72-8~UAywign-9f%9aEoBf{Yjv2{qt^a4fe=ID5|f z0lY-~zh-ZN=^}0OT28`>G?c_u(Fxg(&Q_u|1HUx%5;M#b1CG6_yqxFr%EX6U+WOC& z0v;5;3@NIFAQ~Ktmj_{GUyolM(7UxBqUU~wzze4;Dn-1+J0VK$gZ*o}i5ciM$e?bi z$ib}GLz=1bCsW(m5QQ;r#b!*P6XLFX6Q_0pAy zO3zOQ-kVvS?qK0?nZ3T-|9J-=L=H3p-g~Fk|8pRew!83+UTN7^X&(?z@a;Z+Xt|Jo@lieZQ)D{^Z(WgHP^3LQ@ zEh|lvqgJ@)6yF+;8FshI%jjwWw86D5gZ-OL`T#ze0CI+feD$ zA|e6l!}7z1r{rvH{+CW+O+$V~$*a znuJ-Z!PFhB>JG0#;$GPpcg_)@ZL}Gg)iok6TlFeoM z`3#X3hEP_i}-(M;j%|7L~8=Wmh`bpj) zcMQ(enyuUVMEkJ|Uq$e|ZgQ)ddp=pJ^xUe8L}jeDd+pR`@jm6fy6ynb(F^2RRZPnr zc!2BaHhQI;iaC5TpvmKC(9v$LvBmIkdxojWGKufU>pW5~3G%-iQujrJRF=bsy{Oz# zjNg&sDH}z_s`8KL_ZLxm>7MByfkL!ix`?pBTGm@j3H!%W?4oY%n~a`V-YEdjexWquV3a6l*4 zO(PpjD;VoIVW&}~nOjs$VOgc_PDxWxGrzo#FTJ&6%&audRW~j$HQjLT4c~@Tb_{zw znP16-dz`}HCyQPom^qp`m|nfs;Qe!274%uB>5lHrMzHyc3lLcE zg$JG_CjFIDB{f6#!Z8GX?s}o5$bW0_&K`Fx1RkW3eAn(;7FQYA!X8}tSq%yjD+#duvthmw$H-`KF?xqTz2;;;b*^mp4gAg~%u0@3SnkYMzKBx)lg|qHPMQQ!4-5+F1XC}=0==#6->*4yaP4KN7Vkw-J)AN<#Q3i< zhy48VV!1Qq*nw=E0#i!YpE8n?PKszX7)%DvayNc)hifxGYeO;%o%Ce)zujHl*1jyi5unM?l7QoB^~ zcb^8TUE%nml|u>`rBZn0FD$A=C6QuAG%xdMNkR~T`3koV5uj%^_P-JDPuFys&t`bt|DuoBVS zPt4)qeWy0nGBv5pL?ENMm*JE>=$UE<7hN_DnwP2`d}CgJUO7HBl&=k- zQ7z+UX)Xr>GdUa17s)i+UK?GWuyd4$9^|4`944?ven7_~b1j5OQ2JMw{mBqv?aQ_A zXE4!aj3VOb%*m{4Wa#&5WIj{A@E5Nae^iRtMoT>QV zH`*$5#Q){MpW)#=D`FKFmbiF**UdSeSuHKcyn{=~sfCb$SjRvyj8}Qm`tW6F12F+0 zI2+7#6H(PcPoHj!(Xc))n(pt`{uE~iQVZ8|Kzd1xd?5M+OI9NwAqUOUpY_-$^Q>|Mxk!njdVVAy= z`Bh$-zF!@SlfUJFJx|I_6B{Hgiq1`dr|>`jvBarP2Zo9UbfD;*jS;_(fs?D{_E=*)JgEfR62eXQa-%6 zv3JYM=fe}&v?JK4k=Kif!$PSiuaqq`GTcz^5)Ye!NAX^~tAF|r-=7h4#$HH8`cITz)wC1{ql?18I!Yzs=%*J>#c-JIN=*ZICqKt1ES;b(c-F*eE3fgTd*t(XBrVG;nK}5NPU? zF36_`b#vC;$Nt>rzPu}x(!ULSEEXlyyO8~I>Cr!I!j&Kk7WL*#OGmQ( zRAkyDsaik@8);i9)y4gC5F*X!hvTa=ao^=4%tVcD$HNT@oyB0S>xF#^TSm2Zi#nc5 zQGMD>kD3d{KHldxkqn=?H}PAx!q|Y|L87|$pV1Il8KFuXc&c|TF9^iHklE*X+QN7+ zLTb8R$br^zF;g!@O{H7;W z#CJ#b463&T;iwu_>GOHKsX|>#Pe`>#ok_}WSyfe)N5_KFU8~^apsPVnHYgwa_|+|E zmetC>ZSC`Q&;6fSRKB*oeq|AqWsnyjz5c1uh~k5FmB*rJ)5T&EL%GCxG4 z$M%Ft;VNAu^<)7@q|Z@2KL7fUZ*(o-g-&1RWoutY#4F=*`vR_Q&yU4+uQR3n_NR6S z?DvoJ%L`1q$M!e*NmmbS>FqWJ={I_@jN@4*T6%C*ZBE%Yx+AUAHZ8w?3c=AyL-XTD z9F+B*CcoFYHx0OKvqC4i$p0G{{qGZXa-&)7;2Qav^5ieAp~;DBa*Y{3HpV19s8UX} zhpIPT>!;gKhIIrf(JX9^W#%*YC#|oS&NZ5RqJ*wYa-%^;eFEi^{^Wsi`cA?tH~GtS z)Y`TX-#;P~#`m=yr+r@ep+2f?oA_VX|10{bg=6_HzOeiA!2ce^zm4wBqp0YlMP7kW z?2=uC@3Jl@sz#ACvQcMXU4Uyf_C5x_IkSwMtMYE5o+?2jC1UTO4b}RdYX$d+9K?_B z^PQaRxHe9`{(Yz3Oz?}mxmo{o4`!eh2VitAgj`ze~uXyd_OadaBzXr1qFn^e?zhj{~EZNcB01Up}6#x+5 z+4l|q3-$kfj_v7pE3OXh`H_sDGdgQVyko_~!(IC_hR@NCtElH>yb_!&@M(-BpXL^I zSs#XE$i0W-`Uf8A`AI#b94FuIhX{S!^_3qv$&r(`oF6WyN}b?)+h%7XkmpX(go<&m>hZX;Ck z53mw?@c6NIsbMY85aU9k_t82B_Y_mMJYBU{xE;DiOARO^|EL9|wnWbG+X8r?DBoUg zl0B)P>&{m88#~qM{<0BHB8B;SE$9`!hV)@#J~1K0O&ON?8Z>{~z>k^?N&t}Gq&0$1 zugL4X*RH3c_U;ZsZO?A90GFXgJbX`(_cLZ9F>Tv}FQwF0=Lgc|K4EbY>}O13zai zc?4-e_RFqY7fmx+Cy4>6FFtPu><0WETMuN<7yZt{n}4-W9J|nenHPofMwJbwin6!k zN^5Cq^0`lv`uD%Nt^lw)aybF}(l*>({2w>X0f&k7O4rwJI}FzN1fOAy zcT`86XWV2Q97ES|}rEH)_ytF-KY0aYa>NphZQIJfS^*i9AvwC@G)A?or}vC3_alk!RDph}Jg@O|2& zG9o%L^?85snc6)#27S2gh2BEKCoby)CArhG-=_z#CjZpeW-Dm9MVVG!p076A!{Y7! z88v0<_nF^w@t67OVRBCqkf5NU;(q@su4)u89dg_utH0lM{g+fVkt&)?49pPcpCLj}-W5#K1m!bCh9TT^#hPLqg6 zr%c7~Th`pA^Zg}pnZhh`kEWPAUsKP7&d%`9GJyBFtWiQY_WDIO4pb$E&zL&m0;_CAcdj{MMutZgg>SLpWexkd= zeY)$QahZH}(^9v9Tu_IBw?2)@@o8m=tM}mZOFOG?DNQHlTKsCgQ26k2rxt`rwI27| zw{IJ_IJ%6oOat^YIm(*nh&{zDx*ntahG^F(bE{jEPg;t}DJb$d%IbA7r1#a)CixrZ zM_omRaIVv{aBk0@)o|@?>pY(c>N_D z7uVj`-fOS5*LkjBu|D{k2ge*u@V=@}Y{C@lwb-tru*qKLeFHgPRkr(JnZ5o=ZUB4# z42Y)b8awmt@1Z9Da^8YE*`k`+r8DYpU*}^>p-g2#NhYtt-!M6c)1Wthl@vW&L9DSO`4$2;d7-)od61sx7CE&WfDAIO{NZSh0p% z1Hy@G5^3RzrCQ>hw3z}cDu<2zm3av)a2wjv$KuTKu?%0!Os976DJDLv~!qf zr?x-Xv_BdGsjonM*#}VaX+BrDmS!RwPzd(DbqGnw`xJlpmxo|UPVaKcETHbIe)x@^ zEmy$`qr7pbLp~Vg&Nlz}EsOuIImMoR9uWR*1KUj(UOty9tO!<8;q+y*Om$o4PNod_ zfbw&uY)!OQNCg)Z4g5AsOz*2hPmvWMECgdk40th0Swz?By zsFbXn++t3^`l?HZSNBBkOqZp$rG7%3&Ae>SSbW}g4++l|+rvcjuy@GZjzW=rT8^5- z1YYxe>EPbMMJ>i1_{*?n{|Ud*wffb;a5v=KXrW%o4Ibkyc)+q}CLdkoab}+iIA@W-ikXb{k4sOPx03 zZNYX4NtBrj;#Rf8kTSLjmklv{!U^O6dk_qdn(aqU`P!u=YX~UgCS3}Eu)xA_`FHHA z!%t~Zjb4@H6tM!&A4>v(VM-LN{@JyAdr#9Z>}0In=i+za9Mb zbOC|to9kNcQ(XN=8Csa`ekuOUxtAoTh*viw1Q6k)^8RZ#sv-Jq^W!ys&$JK`OB?I! zcLg0LIxI9D^lfczGoWBJfNfo%{Vc@Ln%hx~aAi1<%Ge5TU@Z2I(~gNXmA7*0V^a2{3Izy-G#`xsosQ%& zj|(TEnKEO2j2usu6ct_DX6Jz*)?+ml6;6R$wkvrBR!2pT3&D&mBJ;IpWx$6-GNpuP zgDN=j0ed*)bE_Yp&MMog;WFQii24-V-OpX3(LwuM>u;*RFo|#t99obij=up3G#WuKUSx0GCi%{oxzw68(#sTS!M|-`OtBlGsZg@JQe_1}Z!>=;8y=R)lLu<9*vyyLV8TD;#GC59#!cCpddyaIlu>l+jp#i1NwAtLmjS^yf*Q)x4# zazaISOB7HsC&(Lz$$zhw%Ek~FJWJkEZyy_GsO>qNt(a;Sw@#<6{phJa*pQ z0N_#1VcRj2L#Sg_2_8KF@MHk0(2Y#(#jgPK&B8_kEjzadY(~z4y??_L35edXY8rjg z^tn$Y8;(;g1SWc2BTzT-4Ni6GDt^sd?@_FaZj!2@7|w9e@at3k9WP>h?i&hwb?e6w zAp?v}z@v{LKc+2poA4+``~2n5fAn>LJ?G<`HV0wfQvY0E{1&9l++S?UDu*aEEILrN zx+*ojcQNX=_tCpL!oNwx?;18<{xJ{%QKU5l>g?6bXc;1xhIaA$bY1;q`yt`QMjbGq3dpy_NO1G()+OT>P+_RCvWMlWh00*jfZqoM`VcT1SRSubaxhIoyZ^wn(R7?Y49>9>K%yls=>J57e+d1f^JBN|6Sy43 z>Ya<+Q|qL`BUZ4ofo+NKEmdw5!YKVVn?|}Js*_I`)%2Ip z`23G_R1FZiqEv=CX=>QnnbmNUZ;=)`oVeP_2QxUTc%*W(r>lnB{SH1L9dFA8`7aq@ zB+qQq0gK0`%kU$Ej7&q{+*dqX`GL<2{Iy0wL|Z|V?Cl<`v|@L=D0NIe*zX@b3#AU*`{4M#(_&7Or3@L3oRW(GPj1kFd+!DX}Uj${E z?d;LfEXjp$#7CzvSz6MfFU}8nBQITFaLHZ$ z`wi&N2PUYE5D4by=j9)a>9PlC`6ANcdLIGYGGQ(2Gy(Y)ae_q?@?T0XfQ?>X=Wm;> z@9w8~a0#r0og_`Rt1z5wO5C26G|UWK%p{t1{qvZAdS3_U6&jikP_S`{k#2S4Ib`r} zgaOK}m!(r}_;|XE%W?MDrmBlBuM?mTMt^?u$|Afds~^CMFY6;ZC_P z8ZyeAF5&5z6j*5R0*|TxOb_f05f)_nr$E7|GP3r74?waP;w=`m^xhaVT-n#0RjkKo zdU$ejQV-;73b<`r9azG}{MJv5ZHMafvE%;!{`GT^`_`Lz8r2p9_Dvh(7ncX~W3hW* zZ<-f6wj{3-LV(0WRP__G@hu>ko5fFXF2Lw+Kte{qE<2*sc+|tJjd*8g#2B)?@YO}Y zYPZIFb3*~hd7BsQy+C-~@Nu_gZj~+a1x?okeKXI;E)B{3LZIB$T0~^yy04q~rpHGh zY^Y^!e&xDWzH-n)LiHH{9vP?j=Ku50WrGo>3W4>9-oM-FXbaE*@@-9`28rci0efNY z*r_$|-Tq+Q(ulFc@Tajye!`Y1&mL=%00og@FF1CWN#ReRFiZc zZi>33EL~(7DCsn$JA^6JV%)|l(77&bzL2uKGFE4oNSyAH?O3-rSQ3s{vvaJ~%RF|B zDWSHUKij-12b3`PYEK~I;GFkagivQ1ML6auvkls_4zWEedR=>$t^FG#}DwfeCWS!U#R_#f;r-GYu39WZq`cSuZV=<-0qzK;&sr&k9 z{S4X<*F@=z1Dm=pu^n%16!_I_0HrEdvO`b5L4$*Xff9fQ4R42m{)MA!hX|sl=3e{4 z=}zrSMpb*Pf}%x5q9>n1=^JK--Uryt3-920E+Eg&#%5`}{Fy+6(?OTuvukuW z-UmpYI!A~=nzuhR3kHw{ob2Ch+BpcvjGdb?8?D7j4#F~3yR~Y#9Dty*aE(_!KUXtZ zX%{{iR;pX232Rzqic9(Ripd-w?hYh%DcE&HRSK*g4HTQVl&wYqiEcd}pVU!3URyU8 zk1pP*rSn)DS0Z3hv^kS2^vJgBvs`$5N=otNHB=oK+vNdzVCHCpf>w(IS4BplE;VX1yO!0n5qFQ#y<9JkhzcMJ@%Go6Fki-V7yh z8zX?KaN@g>=XSm+P?)su#2c6tGw6&OFOQVw7PBzA0;0w9=wO0Bas^(HISQaXje4Ei zq5#K#3{)up`HAk#lyY{iV9_|~`dbfx@|nY0Q$N9R3EyKpw>4UcAlI=E@4U8UJ!oR@CGtuJ^Pv7&qPmLEnEQu_=j zz1+WDu3rE?=ID8?;4eNVR%*?tdV6HdK)|P0OdNOg zEi0T`%LI9YC>FV5202+GIa#^rJJ5@R`5ajcY|rJ2 zUp$q*Vl$}4elvem0jnn+&2e6tsh_tmiPhmdUyViM#C zwH}dubDfuC2BraBaATh^s384AleS&^#`{EuE< z^DoTWa*&ac`GW52+eaaS5)%_kmURt*eD*CC&u!Yq zCd%P%4+9gRAFNP2C+xMEcRX3-a-Z^GfWa$UOD1){C}`N7w*ECJ-)89urF{ zu3?~imF4o(fX+3d!B>IA!r>FfV_p!0r;rZmt;n-N$pMZ1&0x-=!_^^Bz~sBZd>tRF z)R*V1dSzelmgtr@cBi(CaakSr4I)TT?-S5wUYpk&QjA#I#C}*M?aV+&sT*No5vqO^ z&*cB~x&K}VIC)2O&BrtmZw5m@hrDuPri<4)cwwhv>;-Ka4K&J$Riqa#sc$z%Mn(b| zgq_Lj{5*%O`b9G#Aul_NKHjC?Qyt?GFfNoeuVn~0cDwzJ-JkLKx$`F1(SaY}Cq5{a zQ!lN0j};p1$(?RVD$m;XSZJAP>Wy9cdy8gGZ3=G_gi2G9$8rXxW>yJ_Kq^K{CTHw> zUprJYYDSG(?AZJ0+P>}f=tjHhGOW7AMZz&6gk#jC1dtZ4|K%H}cS$^x ziMlv%^1e=l*KTiIR77+wEc4a5M}W%QduxP*hb5TMC|qGJ-_Oq8e-0&+xm50(a&ih< z%L>`!uPEMNPp|x5F~v!K-9-4iCJVw^pRRS)>jDLnL|_y3z)bpTWcNRi0k_xNuf`Twss zGHGOa4YhA>s`Rl!C`zv?o&``2oli24FZ@B+{;GYa>!eCy^sx!tDQ{X@S{Bk{toLna z67$WPtYWR>n>AWS-oFw6k2Co(KF4|F0T_qqX8ApvpYPrvL@HN)Cx4+~l8HekBsFAR ze^h;NoX8M>fueQ+?CyP&RVVGwx8ymBsIjRjUu7YJ4;Q*r;n!u9{NPE|HD3KM!kMjE zQk4BIksls4OX-)IaM530W<@@%6z*H8`x@>aRbi>p^?_2yvZt8Yy|ML!Avo3PSxX!T znLZQ-1hm9r96icmW@cua?JH{*Q)$T+{*%sla{^Bntq~us*W(wRuk4cm2b?1!>gZn5 zu{;_zSL#g@Uajj+=2QCSa}hZ+UQJ)K7IV;%bWdsuJEWoyG2+_tPkJ)1r^ND>-V%mr zzRnR>Gb3ofp5{_`u(Lc~uN=?uU}bsOOZmMj9Vk) z-FC4PMZu+ZhP>Y@zW;1cg!De7@t4`n6$^3k89g?LW8IyB6 z5n$%;tA_jjy}s;PjY+fLiPy?0)L`C*3OVI|BFzI&O$jQ#R^p`#vn?=p1VhP`il%0iOW|XR zD2_XSs^igT z&2h@tIf3ih&5ke%R(q+&6=TOSt9{frw7t~$w?gT4t4uHLJH4_U`yGl4X%7_X3By8n zhG54oiN84a&tr-BEIZjCLSBYY$5Smc@IlI#_V!NT?l-sQke+{ZY&rM&m37gh4FXD0 zhZmP_J@{g_v$rubSS~~DRl6)x0kjQWP(9e8W?H##A47wz>}n_F%mei(Sbop5;q(2% zM(x_jYHmi!H%lSxancBsSaO)0*bT_tLb<=q447gxzjXMhIK}BEuNlV~Vn2&Yc|o6@ z{KSkJFzx8dDE5q3^Am9jT~T)LRiyzg05{02BtR|ztuqsO%@c~r^rDq06i`{Itk0}d zch`(m#+L5dOjPCX3`|dt3wCwWm`63Yw`vjlJrqSMj9WZOAkj^Xk1rW{ z0Er*-S^s{Kl=%VEBcm#NE7-v0Hi=WkH`kgH-ibL*-5Ic-P@waAw42iE&Ac^bqv?9` z;#F_Cu%aZOw}<*F=hc4>WA7Q9eUoahX>qt!%*THC0n*JClc? zTsrl6q_2B3#1*K-&CSjDgN9mE^{q2;rzAr4xA@@2cjU0uVEqq|Y=W~7WUzW?sxod= zJ9ANp?zNrtI!H)N7JMJp7Kmu=0PUsMFL zl5_-os)nf7)+z@XsuO_n;wVR$!c(lY5`O`w!P9s9#0olI$M4E|XAmW$opqH9dGhaU z`9P~gMP|oO>?$9BBDbQxJQx}1cu=Q@x%P`2$cb9eC6JN{cMA7XF%gl)M?~WHv=-N; zn9`xurdqj53fej6Zb?`~eJwhfkZ(M5U|@j7)B7`B=&M($b1r0Ipf@6&(QPd#KMCGc zJ9+cB?z#nw#)-8E=Z){x`=Z2ku1h&H{rjZ^(pF(EtJGEj>afP@$q%P~yoWe?XZL~N zm+!acc`GR65o*L1WhQm@4Bnq!$ZZ@j?J#1JQYK60YT826L-?)yc&)n0E3NL_DWmR; z-E~>ut}?7-R++LQ%+;Igz$(b`8&@y-XY}3oRfi_G*6{fFZVt1-SZT z+S|>P1b*`sW|6;UW@3M{3hwJ$;A7y`;M3=s*tL$6 zc)!6xv0rY5mhgG+Jol#eh8%{Q3t4HI$f(tVs_!UdJ1pIah(WLVEwNEuI{BJ()}os& zSYl7HEah0cdKMlF!vrr`FDxHMN}dR)P++eqNa^z1+;^OsFD_C5CTO~q2~Mav{ub&5 zjl{H2QIS7Q_}(P(dQs_6E<6z_uILsp1Y9T*;5zS-PL&pL`vmGT&Mt!wF(aj-VhN@R&^vViTdR0N$U zOUuz_NuxF46AiJAT4YyMHL}3D^3oz2W+6eB=rHNi^=eTCv_Ij^_iX+&Ey$2|OPXS0 znAv0@DQoFda7$gf{NB>@hSU6A%3b7@eK?Vix&3hegu&a&OpIyKVWGs%Hih%|StTD6 zY@Jow!p`INZscHt2N@+_ZOdYFB312LKTr|dU>PPO896HSjf`~fxCSTczCRuoTfm&c zo%}Jg1Wm0PXq2f2+&ch3!Jt<`UTHy{b`I09saQ>=@fw6~=gz2#yo=1Y>(Ooi+P(0r zd;{^7U0l0TpJ+d9uH_t%tgT(pt3#e7SlKvI>^1Q%R&1oz{TYQd{lUgLKHK3(?2kSM zF2;28gZ#0pB|iPjF3VeH8g`Y>ziBTBUmi>h^f;(D#oYL{vQSTNK#Gf+-~IIIcJMhx zC=D{tG?YhZ>^;E{sK-3EiRJEiF3fmvYZBwUSi?BjC}&JvyXk6H*{xkx(oGjFsHRz! z4L=zg-0#5%v5l93?I=Y-FLldwwWd~v&=@~tj+_oVX!nTbXrKTTh6>LKlAlzlGDn}j z$;4JqlxyX-!+X;umCjXW+U!4|EW^y_;p&lGZ3nZl)!@e-Z;)+UWE8G$*P76{!^_v` z9qB2E9PG|i#j==o#kf}Odw;f|-2JN9koTdtp~sHfqH`EKyVa-gAcg5bJmBQ1mHv`% zuog~&0C~<9lX=e zf{-TM)M~AXbI$1kOPE$pjpnDLn~4X=a??BesZ6T2$0X86^IP9bveBakj(w zkvgsT^Da=K$_VjU;;yCjl|*BN(qeN&eGN!=*ql8hwx*_r^t?zy3L-ye;=wv zjM8cdBxm?d(LY{c?AffqyeV)|=Fp_XQTd^<+-@1(pY=)Uj>1gIp^u_{V$(bhBTOx_ zk$ni$a_MCYqT*Q8loOcL0&RU)7bZgC-+nJ|tHETyV$%%bkq4N^9O-GVtupS>75i4{ zdLZR0rHtRgHX4=)GO6E^b*2o?Blv?&3-gB8k zOeSSTMW zF;)MSm)fN%fc*7@BMZAddWS@gn>ml*M;6GIT`wK{+h`@W) zJ2Ow5f3C0p(+gPM9W#5DW`BR@y?%dq z8gogPgm*LMqS|c#i8!C%p!LS?Mzj@G;~k9=Fp~>NwcHQGX{yelTg0N%emQo_s;_er zcP;LKS1@*K;m+(Hy@F|k3NwI+O)Cd7yOL6Qrdv{#FBp(`vtl=p&PXR5op+b zjoeYPpYT<?Hj~~n+N0{HG7gHQL0`(Id zs|)Dk@ygSujlG_t^x}m~zVePy#S?^c9XYif%HE{O;rTps0`#`e%7pN*K7b=g%nKXczY0Vg7?)c|4BPCzsq)Wlcnv=ht96XD zpZwVAC9yi|WZ2{yH0%$J-rGM^vY7QU6tQ}bd=@7zo32O$95e;O9|Xm+mI~Dm&5)-; zfDYsW@ZMw|Y5-NJTWhnj0QCQ?rxciZq_27jS^532Hv&F^vhonLDE|8rh`g zZvFVKPpF9W13*+PfH>3?dp?ohPtG6C@V|d2rA$OlEaUdJaBE@W@^!m}{&(jk*Px`V zc50!n6|-n1BDmm3>=rw2R^b+#v&eT9XJB%)s0Iff}rJfyI{ zzyIGFq(~PaCpi4>s$K+MR!d7rb*abvV^mt$15Owjt-J86{;0>O`St8zB019wT0b?c zQ7J^<3r_OhC*S)~t~SV+Co4Id>Jq6WMf76zP8xIo`Xe>@A8sRnsB1R>nA-5k%OrYW zXP;j@>zZuh@ktY=TlEf_|MLZk8pj)iUDhv0cu_QFi+bMIe_^gm1VV><#7O~l6Po?A zE&HR13esf)YFl0xzw{oTu9qhibkE$Mi|Rb$bUW#1xz9i_b2`hkPk~y}{ttp=LL}&r zQxenoXyTa_GhRLaL?h=p17V9g`)8hd1JAUsUAx8q-6*86H`{c@z;Qvh7GJFNAO3WL zIA|4F0WW!Ho&X@S(I$1y%)-I*YSdkYkdUxU$M3>k)wYPZ2-4z;%xH#;6bk7EvQ?z$@h_14;)2zfp-hvqbU+IejVAO^5GVMI; ztMH_IGBL+0@jSPIY;s0|WIiMm?_^_{0q!=>&ZYNmt=573)b=lvdlvO8~gXeEpL zM8IYyV|gJzV8nDGQ|}#rbgsXno1X!ItK+FND8!^FXWO?Xl}328fkx=m%cwYYRg;^y zWkOZ`)hn-YZnbI^xKETe7DbR4B7=7ILkA^YZ+4BSl~_)U@qUdq9fM#4EKeFwg%h8ZZf(c9i@{=wQwH|hRq`;r;n6FR+@6~9Y)bD=7A1JaV6n=N4}8 zB!N+Gm|^L0DELWV6MDfkna}1!WjY-mL-)n?$Re99OZ_xxh{rKbvCJiw%fpblNsoHu z%#%#gXk=w`IV^%qkY;yZAtH05+1xWbDBWX6fi7VnHxz}_Lcar$ng^&lABC#usmfi1 zhtp2+*ImEPw0)!9l8jO27dN{z!Q8Gq-rO4D4IbjL@^g zFy?9JU+A?FNwd|3u$V_uvx`C@i9oYK)d~>&j(B~GCDv~#rY;Y2jeu zHZwMFW}JP0S6zwq;D_wwkp=0mO;xDOk6+SqQT>s7{?~)bz)nCKXnWTVGjX=U6MhQM zjnq%%4Gpkuu=!0f`0?6!b;9ABoBTnE>Dp`GiZ_>7>Bl}i1P_;3d3epu$@AHb;P{@- z-eT7&&Li!%`E7ozTr$tI{El`vk$&}Xp;k`>ZtNLi4g2EkmP(N}^K7K#X%#_Tbekl9 z*B1%Ki#K=@)Ka0!B?)SB!No==$to&Eo0^u$TCZ@E*9To0Dyxh#A(!~(OL+5wkVDQx z@3||Uw1J9r7L+I@-SgIRR3;+mgjLN1f@(`qP}CNXw-aCfEF^Uf1X)Y;8a1t3OFNg` zI??QRj%jyU4?u5>MKx$GRz}vGMZ_i|YW$B`kXtJfJ`+2o>Ny0EGv_XMbSI;`as}z4 z%ULuFKGpq;4cSArt!@>$*iU^ZkK;6uf3Lc)&vWD^tq(C|H7GNQ?%0an-2)ovxvP56 zkAqf5z|b)7NS=zkF*&n_@2cO`;k=lN z-rtfJ-QaP%oPXjyjr_PtL}HO9T&^mHamue$>O}yCH&rqJ*vy$GCT+nx_bog zCzCyMVl>ed={>bUSL@ve^d?Qp@z92;**t+y#B~nfE9NK$6B?YFwSRW9qdM;vR|_*r z{8*Yxi~c92w`9-?gDFTH+t-p5r@0buIxoQ4BgKwBTD++=pUn<#fd`~iRcRg{Hd&p? zvc&Roqk7N^@w@>oBvG%r5?O9}1A5W0-o9=ko;8wGs1*HiV3*zaeH=;0v3=!)|Nfjo z(WCOlQr>5T21X;LXtsC`{ix1(PQ{iJ`p(fAR-`xS1tFWGm`hYg<;SJ;1?n%Ag5^)k zN1gRqoU=Yly-SbOUA=Ic@y)4Oz#S6NN=<>+Rj6jTu<} zvrwVU=Ln#pqD@`tP^AAcG5mluRhIumKnMBHY*to{Q;jm?3 zA1vD5$I`m@^JbW3=jC!%OIQRw zGY6mY@@NSswc5G?Z8et=?&H%rTV$2ABc~poY<5eB3pAlK2B)80rT8%PxTLLhfR!0NWYrDD=IBe+HdU)(-P7z3W=*|B=#dU>ikCx80)pp@2|feFUq{7Ui9L&1 zm;Ra>@%;IQOSXm-SB|?ZWHADt8gFo?jN1v=99581m>FLfEuXboieYBryjQco@!)qVjkW1 z{vf?#OD4&bK4tCr0w}<>t_SN*6LzTxor3P9NXNmu5%$Q2O%r3n2gozb15X-AriD8J zAatC;5o(WI`jTd)`I+Tpo)h$KjAE@>qSFS9>EdK7P__Lb}vDGXpTVn%~CbnO`m^h8VOimUa zaS8%gRf$Z$d8Hzo1Qg;}ddEX1_A4bVEjY25`x&1sr=GWzz~Sbps^;>SFbZv?#q9)k zQ)MAXBuC=2r)iW$EwmE$gb}Q(JD?Ox&%UG%#|;GXd1(=uqH+khsqn^Jgv5aP0v!;tYxfM$syW*tLKtejRAe? zRn%fW%jWkj(zS}N`tH?K7MYrXa5W1E$vTT(jQ1$TE^fQeU;>DnIDJV-i;c+0+-bpD zo}S1!rFQC7`c|@j9l~kBlzV+J@M@j$dHPpz5mJ))-3St3yh7{uiCCmEH2|qMwpuf7 zd9tH9k<&G63n~Pl9PWQCClpjm7|*F!<>PyqMnJaxZTe%4>m0hRTsMuG9V%rb-O%v< zD9u>PdVK&0w+>E_QPnq|tar<+f`d@@y9p*O4!L3@mGyA3Qcz+V;*%?+CzQ@xQ#p=} zCDZrd6|H%tQg!!oGt6UbSAY|HML5IR|5nJVtz zO$08{_tTV*py6!rFz@=<&|+^xU5g;bQ*dkUYt2IRhiz?SlVrN8@~aw>=F}d$t93b0 zBVRv1CJoGl^GMogP8VePBni0LCIYzv+EYtYdQ%vGFhm8_>T9v*L$3qF&iY%yV+?`r z)0|yuSL)qXE<+nYmzVuxl;ST1oC}Y&1&}d-d&zO>0Aq}UzhxsIru!t;uOyDuN%>C@-~HXk=o6RN0Af6a=KMbmGoqUNy}GhDmv(!&J;@z(qXsV zNBr&ebD3^d&nWbj zFw|OZ8K_+uHE^{-$0dmS>?$Kdi#z%w&xK(Ui-ORPV!uQW#te(bus>tL2M6zrNL3ND z1QS+vLRk3qgTp2p+{;ynSNtpL(;Nn#%B|`8pNQVYIG6!jN`gM4apGIInQ_krx{a9x zuG1LDi5tjiuh3lx^4L~TMX+wJ5vef@_z)_jKH3iLqDyeMDz&3wdJUfeJWBLecj0$v zM3EtjGUgdfEh;7-6FZRG9Cb(TLf4D47b&WoPb+m29h_IkGWm|ev-#}Wb>Zo(1yy#V zO0MQI+%1?%N1XNWbO9*DcEItt6ilPiVcY+{&JeTx2l6ZBt&X|PXcMt3inQ<~O z$En6rL)+~@k}>!A(_McU5m-a9D`CxkKHaR+aBI{8lKd=^&QYn)qoM0|tn?Vb0Y>8D zhlP9iR1VAd3@h*);FC6Q_mp(>szb2m+Bg*I^WGLZ?c;Y4PMqo5GW=8y8~=&k&lV?x zU`WKUA_2enYah|=>bF#a>OlCBl1S3As$iQSvs`eXxX+Ft0Z0zZZ? zqC~pCVQW6RX3BRGjLK8LeCk{^si1$R0eg%yHLS7@lTCi?s;r`oeGO`9My#2_<3&SN zlf`=L?V_5tQXNq${$j34$S(I+JyFL`c0D#%D>dN#8X?F_MjG>R!wnhh;_I`0iJXo- z4zvK9SfpQC+!QJVsg7dTX^HKVr2hsNFoindBA6|g76sBldAZ{`z)@lqq|oOlnZc|T0)JicIfeUH&D7)rr@sBYy|GpK z9v(M?2pZu#DbY*Ci+(`E)w&&5M4uDbU?E1QppWC=^Vxy7h*ZD`DY$x>HN7WJp#Um^ z%5-aYM|d#SVYRif-k));Lz3oYjV>u`gLwYX&z)jQLD?U@cvv{<+Px(|1<#vTml9yV z{r?hV&Bx%)nqxEaZJ6D>A>A8K*nRuV?0}kJXC=d+=iG}aSk+=Buw>6sA+cp=2T={P zMz{B?Pv=FIqy+Z+g6V+8mHGu&XY(j9u<$T6Rq(E+@{~b0IJXH@G_gMP?Z*Iu)(d4c zl1>l(`MZv_V|b&crt)Vm)?qdrn`4P&O;dG#P=D(+zY%De{b*M7=i|x%#(Q3{##4o2W|n z$pz*I#+NsCH)C=`$>2G{Lvx$?D!sdS9wKtMn6X*H|NF`Uqa+y3wW81El(B%B zWW(u~7k;r542Fcbcpld4#k#TV&qCq*l~!{Yp9{*zi7D`Xm#fZ4k+~b~n>$;v^}GVn zGKaX1ZKd3=D40Tq&H5WL!|MNO<;$`gUQ)5QC8{Ox1n|Z3hAQs z5WCsBN)WGB06;L=HG2Ewk3<{Hh&y3YK?@b8JA&~%~_+l>j~JngPk-|$8Mo&!hOO>zBK@$>hY zvXJRlL2%Ru=}#N&aH?B(u6`?+K`ceSUcWme#Aen+tZ{}96vscGmcVbNN?qA!Jwbl! z7MIpSpHlZ+ms5!_o}Dte0E`2F_^Y#@Cp+yrMgli4d|g?B*(z#jQ}g>wPUo&n82s@2 z8n02-bFHwjIfTg1%xN3<;d{44#RWQQlRk+2`!)6d z>N-!3EAE@MAI$2dY$Z{OM@Pp@e4CA2>HI@=Xse7yoTBwEP472-t`tT$s^&YT=tZ8V z6-s0n6r=I|56&5*mQIODCxV$}_|wdf%r%e=E(@R&U$t)|0EVAd{}F>@Fex}qgFgsT z2RGJ9JMP>Yh+d)&YWP7~{Q3Gd82CF|xT@|NhH8C0>Jz%mS6`2|o&Y%ItS=_@OgDMu zVWklsG}ltp?(}HaN30HbvhL$s1Tx(LtpA<6x_I*XCt6dg^X$`}m>NKEOfN^>y;b}! zr#I6DDdaGRlx+`3g**fHiy`|rpVutczTN-XJO1 1 ) self.tp_group = get_tp_group() + # Currently, there is a bug with mulit-node tensor parallelsim + padded cuda graph, + # so we disable padding in cuda graph. + if not all(in_the_same_node_as(self.tp_group.cpu_group, source_rank=0)): + self.server_args.disable_cuda_graph_padding = True + logger.info( + "Setting disable_cuda_graph_padding to True because of multi-node tensor parallelism." + ) + + # Check memory for tensor parallelism if self.tp_size > 1: - total_local_gpu_memory = get_available_gpu_memory(self.gpu_id) - if total_local_gpu_memory < total_gpu_memory * 0.9: + local_gpu_memory = get_available_gpu_memory(self.gpu_id) + if min_per_gpu_memory < local_gpu_memory * 0.9: raise ValueError( "The memory capacity is unbalanced. Some GPUs may be occupied by other processes." ) - # Load the model and create memory pool - self.load_model() - self.init_memory_pool( - total_gpu_memory, - server_args.max_num_reqs, - server_args.max_total_tokens, - ) - self.init_cublas() - self.init_flashinfer() - - if self.is_generation: - # FIXME Currently, cuda graph only capture decode steps, which only exists in causal models - # Capture cuda graphs - self.init_cuda_graphs() + return min_per_gpu_memory def load_model(self): logger.info( @@ -150,7 +157,7 @@ def load_model(self): ) if torch.cuda.get_device_capability()[0] < 8: logger.info( - "Compute capability below sm80 use float16 due to lack of bfloat16 support." + "Compute capability below sm80. Use float16 due to lack of bfloat16 support." ) self.server_args.dtype = "float16" @@ -168,8 +175,9 @@ def load_model(self): skip_tokenizer_init=True, ) + # A temporary hack to fix the num_heads for meta-llama/Meta-Llama-3.1-405B-FP8 checkpoints + # Drop this after Sept, 2024. if is_llama3_405b_fp8_head_16(self.model_config) and self.tp_size <= 8: - # A temporary hack to fix the num_heads for meta-llama/Meta-Llama-3.1-405B-FP8 checkpoints self.model_config.hf_config.num_key_value_heads = 8 self.vllm_model_config.hf_config.num_key_value_heads = 8 monkey_patch_vllm_qvk_linear_loader() @@ -191,8 +199,8 @@ def load_model(self): cache_config=None, ) self.sliding_window_size = ( - self.model.get_window_size() - if hasattr(self.model, "get_window_size") + self.model.get_attention_sliding_window_size() + if hasattr(self.model, "get_attention_sliding_window_size") else None ) self.is_generation = is_generation_model( @@ -206,7 +214,8 @@ def load_model(self): f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB" ) - def update_weights(self, model_path, load_format): + def update_weights(self, model_path: str, load_format: str): + """Update weights in-place.""" from vllm.model_executor.model_loader.loader import ( DefaultModelLoader, device_loading_context, @@ -222,6 +231,7 @@ def update_weights(self, model_path, load_format): target_device = torch.device(self.device_config.device) try: + # TODO: Use a better method to check this vllm_model_config = VllmModelConfig( model=model_path, quantization=self.server_args.quantization, @@ -291,7 +301,7 @@ def model_load_weights(model, iter): logger.info(f"[gpu={self.gpu_id}] Update weights end.") return True, "Succeeded to update model weights" - def profile_max_num_token(self, total_gpu_memory): + def profile_max_num_token(self, total_gpu_memory: int): available_gpu_memory = get_available_gpu_memory( self.gpu_id, distributed=self.tp_size > 1 ) @@ -319,7 +329,10 @@ def profile_max_num_token(self, total_gpu_memory): return max_num_token def init_memory_pool( - self, total_gpu_memory, max_num_reqs=None, max_total_tokens=None + self, + total_gpu_memory: int, + max_num_reqs: int = None, + max_total_tokens: int = None, ): self.max_total_num_tokens = self.profile_max_num_token(total_gpu_memory) if max_total_tokens is not None: @@ -388,6 +401,7 @@ def init_cublas(self): return c def init_flashinfer(self): + """Init flashinfer attention kernel wrappers.""" if self.server_args.disable_flashinfer: assert ( self.sliding_window_size is None @@ -448,6 +462,11 @@ def init_flashinfer(self): ) def init_cuda_graphs(self): + """Capture cuda graphs.""" + if not self.is_generation: + # TODO: Currently, cuda graph only captures decode steps, which only exists for generation models + return + from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner if self.server_args.disable_cuda_graph or self.server_args.disable_flashinfer: @@ -457,7 +476,12 @@ def init_cuda_graphs(self): logger.info( f"[gpu={self.gpu_id}] Capture cuda graph begin. This can take up to several minutes." ) - batch_size_list = [1, 2, 4] + [i * 8 for i in range(1, 17)] + + if self.server_args.disable_cuda_graph_padding: + batch_size_list = list(range(1, 32)) + [64, 128] + else: + batch_size_list = [1, 2, 4] + [i * 8 for i in range(1, 21)] + self.cuda_graph_runner = CudaGraphRunner( self, max_batch_size_to_capture=max(batch_size_list), diff --git a/python/sglang/srt/models/gemma2.py b/python/sglang/srt/models/gemma2.py index 37d926c34f2..c6dbc7e5569 100644 --- a/python/sglang/srt/models/gemma2.py +++ b/python/sglang/srt/models/gemma2.py @@ -46,7 +46,7 @@ # Aligned with HF's implementation, using sliding window inclusive with the last token # SGLang assumes exclusive -def get_window_size(config): +def get_attention_sliding_window_size(config): return config.sliding_window - 1 @@ -213,7 +213,11 @@ def __init__( self.scaling, num_kv_heads=self.num_kv_heads, layer_id=layer_idx, - sliding_window_size=get_window_size(config) if use_sliding_window else None, + sliding_window_size=( + get_attention_sliding_window_size(config) + if use_sliding_window + else None + ), logit_cap=self.config.attn_logit_softcapping, ) @@ -406,8 +410,8 @@ def forward( input_ids, hidden_states, self.model.embed_tokens.weight, input_metadata ) - def get_window_size(self): - return get_window_size(self.config) + def get_attention_sliding_window_size(self): + return get_attention_sliding_window_size(self.config) def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ diff --git a/python/sglang/srt/models/grok.py b/python/sglang/srt/models/grok.py index 75b086fd6a1..4a0a08bf88b 100644 --- a/python/sglang/srt/models/grok.py +++ b/python/sglang/srt/models/grok.py @@ -295,12 +295,14 @@ def __init__( self.config = config self.quant_config = quant_config self.model = Grok1Model(config, quant_config=quant_config) - # self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) - self.lm_head = ReplicatedLinear(config.hidden_size, config.vocab_size) - self.logits_processor = LogitsProcessor(config, skip_all_gather=True) + self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size) + self.logits_processor = LogitsProcessor(config) # Monkey patch _prepare_weights to load pre-sharded weights setattr(DefaultModelLoader, "_prepare_weights", _prepare_presharded_weights) + + self.use_presharded_weights = True + warnings.filterwarnings("ignore", category=FutureWarning) def forward( @@ -356,6 +358,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): continue name = name.replace(weight_name, param_name) + if self.use_presharded_weights: + extra_kwargs = { + "use_presharded_weights": self.use_presharded_weights + } + else: + extra_kwargs = {} + param = params_dict[name] weight_loader = param.weight_loader weight_loader( @@ -364,7 +373,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): weight_name, shard_id=shard_id, expert_id=expert_id, - pre_sharded=get_tensor_model_parallel_world_size() > 1, + **extra_kwargs, ) break else: diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 33451d645e7..870169c6d58 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -81,13 +81,12 @@ class ServerArgs: disable_cuda_graph: bool = False disable_cuda_graph_padding: bool = False disable_disk_cache: bool = False + disable_custom_all_reduce: bool = False enable_mixed_chunk: bool = False enable_torch_compile: bool = False enable_p2p_check: bool = False enable_mla: bool = False - attention_reduce_in_fp32: bool = False - efficient_weight_load: bool = False - disable_custom_all_reduce: bool = False + triton_attention_reduce_in_fp32: bool = False # Distributed args nccl_init_addr: Optional[str] = None @@ -404,6 +403,12 @@ def add_cli_args(parser: argparse.ArgumentParser): action="store_true", help="Disable disk cache to avoid possible crashes related to file system or high concurrency.", ) + parser.add_argument( + "--disable-custom-all-reduce", + action="store_true", + default=False, + help="Disable the custom all-reduce kernel and fall back to NCCL.", + ) parser.add_argument( "--enable-mixed-chunk", action="store_true", @@ -425,7 +430,7 @@ def add_cli_args(parser: argparse.ArgumentParser): help="Enable Multi-head Latent Attention (MLA) for DeepSeek-V2.", ) parser.add_argument( - "--attention-reduce-in-fp32", + "--triton-attention-reduce-in-fp32", action="store_true", help="Cast the intermidiate attention results to fp32 to avoid possible crashes related to fp16." "This only affects Triton attention kernels.", @@ -435,12 +440,6 @@ def add_cli_args(parser: argparse.ArgumentParser): action="store_true", help="Turn on memory efficient weight loading with quantization (quantize per layer during loading).", ) - parser.add_argument( - "--disable-custom-all-reduce", - action="store_true", - default=False, - help="Disable the custom all-reduce kernel and fall back to NCCL.", - ) @classmethod def from_cli_args(cls, args: argparse.Namespace): diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index a15ea16307b..93c54782a09 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -347,7 +347,7 @@ def suppress_other_loggers(): logging.WARN ) logging.getLogger("vllm.selector").setLevel(logging.WARN) - logging.getLogger("vllm.utils").setLevel(logging.WARN) + logging.getLogger("vllm.utils").setLevel(logging.ERROR) def assert_pkg_version(pkg: str, min_version: str, message: str): @@ -451,10 +451,6 @@ def load_model( quant_method = getattr(module, "quant_method", None) if quant_method is not None: quant_method.process_weights_after_loading(module) - # FIXME: Remove this after Mixtral is updated - # to use quant_method. - if hasattr(module, "process_weights_after_loading"): - module.process_weights_after_loading() # NOTE(woosuk): For accurate performance evaluation, we assign # random values to the weights. diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 9386d7f7afd..e519c92829b 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -24,7 +24,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from sglang.srt.server import Runtime -from sglang.srt.utils import is_generation_model DEFAULT_PROMPTS = [ # the output of gemma-2-2b from SRT is unstable on the commented prompt @@ -63,8 +62,8 @@ class HFRunner: def __init__( self, model_path, - torch_dtype=torch.float16, - is_generation_model=None, + torch_dtype, + is_generation_model, ): self.in_queue = multiprocessing.Queue() self.out_queue = multiprocessing.Queue() @@ -90,11 +89,8 @@ def start_model_process( trust_remote_code=True, ) - self.is_generation_model = ( - is_generation_model(model_path) - if is_generation_model is None - else is_generation_model - ) + self.is_generation_model = is_generation_model + if self.is_generation_model: self.model = AutoModelForCausalLM.from_pretrained( model_path, @@ -176,16 +172,12 @@ class SRTRunner: def __init__( self, model_path, + torch_dtype, + is_generation_model, tp_size=1, - torch_dtype=torch.float16, - is_generation_model=None, port=5157, ): - self.is_generation_model = ( - is_generation_model(model_path) - if is_generation_model is None - else is_generation_model - ) + self.is_generation_model = is_generation_model self.runtime = Runtime( model_path=model_path, tp_size=tp_size, diff --git a/scripts/convert_yi_vl.py b/scripts/deprecated/convert_yi_vl.py similarity index 100% rename from scripts/convert_yi_vl.py rename to scripts/deprecated/convert_yi_vl.py diff --git a/scripts/convert_yi_vl.sh b/scripts/deprecated/convert_yi_vl.sh similarity index 100% rename from scripts/convert_yi_vl.sh rename to scripts/deprecated/convert_yi_vl.sh diff --git a/test/srt/models/test_embedding_models.py b/test/srt/models/test_embedding_models.py index 67e47d90d3b..44fed2ad0bb 100644 --- a/test/srt/models/test_embedding_models.py +++ b/test/srt/models/test_embedding_models.py @@ -59,7 +59,7 @@ def assert_close_prefill_logits( tolerance = 1e-2 assert torch.all( abs(similarities - 1) < tolerance - ), f"embeddings not all close" + ), "embeddings are not all close" def test_prefill_logits(self): for model, tp_size in MODELS: diff --git a/test/srt/models/test_generation_models.py b/test/srt/models/test_generation_models.py index bb56ebdad79..ba64907eae5 100644 --- a/test/srt/models/test_generation_models.py +++ b/test/srt/models/test_generation_models.py @@ -59,7 +59,7 @@ def assert_close_prefill_logits_and_output_strs( tolerance = 3e-2 assert torch.all( abs(hf_logprobs - srt_logprobs) < tolerance - ), f"prefill logprobs not all close" + ), "prefill logprobs are not all close" print(hf_outputs.output_strs) print(srt_outputs.output_strs) diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 4d3f7de30a0..8a887912a0a 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -14,7 +14,7 @@ "test_torch_compile.py", "test_triton_attn_backend.py", "test_vision_openai_server.py", - "test_large_max_new_tokens.py", + "test_update_weights.py", "models/test_generation_models.py", "models/test_embedding_models.py", "sampling/penaltylib", diff --git a/test/srt/test_vision_openai_server.py b/test/srt/test_vision_openai_server.py index 3e858dfa72d..0a477a92aed 100644 --- a/test/srt/test_vision_openai_server.py +++ b/test/srt/test_vision_openai_server.py @@ -2,8 +2,6 @@ import io import json import os -import sys -import time import unittest import numpy as np @@ -12,12 +10,10 @@ from decord import VideoReader, cpu from PIL import Image -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_child_process from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server -# python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --tokenizer-path lmms-lab/llavanext-qwen-siglip-tokenizer --port=30000 --host=127.0.0.1 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384 class TestOpenAIVisionServer(unittest.TestCase): @classmethod def setUpClass(cls): @@ -32,11 +28,9 @@ def setUpClass(cls): other_args=[ "--chat-template", "chatml-llava", - "--tokenizer-path", - "lmms-lab/llavanext-qwen-siglip-tokenizer", "--chunked-prefill-size", "16384", - "--log-requests", + # "--log-requests", ], ) cls.base_url += "/v1" @@ -132,7 +126,6 @@ def test_video_chat_completion(self): messages = self.prepare_video_messages(file_path) - start_time = time.time() video_request = client.chat.completions.create( model="default", messages=messages, @@ -140,15 +133,14 @@ def test_video_chat_completion(self): max_tokens=1024, stream=True, ) + print("-" * 30) video_response = "" - for chunk in video_request: if chunk.choices[0].delta.content is not None: content = chunk.choices[0].delta.content video_response += content - sys.stdout.write(content) - sys.stdout.flush() + print(content, end="", flush=True) print("-" * 30) # Add assertions to validate the video response From b20daf982a82bbeda120d2c30532c74970bd053d Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 24 Aug 2024 14:50:05 -0700 Subject: [PATCH 07/88] Update README.md (#1198) --- README.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 04dd913baa4..2fc91e7858f 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ SGLang is a fast serving framework for large language models and vision language It makes your interaction with models faster and more controllable by co-designing the backend runtime and frontend language. The core features include: -- **Fast Backend Runtime**: Efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, flashinfer kernels, and quantization (AWQ/FP8/GPTQ/Marlin). +- **Fast Backend Runtime**: Efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, and quantization (AWQ/FP8/GPTQ/Marlin). - **Flexible Frontend Language**: Enables easy programming of LLM applications with chained generation calls, advanced prompting, control flow, multiple modalities, parallelism, and external interactions. ## News @@ -248,17 +248,19 @@ Instructions for supporting a new model are [here](https://github.com/sgl-projec #### Use Models From ModelScope