Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cropping_mode to DelayedAggregation #105

Merged
merged 5 commits into from
Oct 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 35 additions & 13 deletions src/diart/blocks/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,33 @@


class AggregationStrategy:
"""Abstract class representing a strategy to aggregate overlapping buffers"""
"""Abstract class representing a strategy to aggregate overlapping buffers

Parameters
----------
cropping_mode: ("strict", "loose", "center"), optional
Defines the mode to crop buffer chunks as in pyannote.core.
See https://pyannote.github.io/pyannote-core/reference.html#pyannote.core.SlidingWindowFeature.crop
Defaults to "loose".
"""

def __init__(self, cropping_mode: Literal["strict", "loose", "center"] = "loose"):
assert cropping_mode in ["strict", "loose", "center"], f"Invalid cropping mode `{cropping_mode}`"
self.cropping_mode = cropping_mode

@staticmethod
def build(name: Literal["mean", "hamming", "first"]) -> 'AggregationStrategy':
def build(
name: Literal["mean", "hamming", "first"],
cropping_mode: Literal["strict", "loose", "center"] = "loose"
) -> 'AggregationStrategy':
"""Build an AggregationStrategy instance based on its name"""
assert name in ("mean", "hamming", "first")
if name == "mean":
return AverageStrategy()
return AverageStrategy(cropping_mode)
elif name == "hamming":
return HammingWeightedAverageStrategy()
return HammingWeightedAverageStrategy(cropping_mode)
else:
return FirstOnlyStrategy()
return FirstOnlyStrategy(cropping_mode)

def __call__(self, buffers: List[SlidingWindowFeature], focus: Segment) -> SlidingWindowFeature:
"""Aggregate chunks over a specific region.
Expand Down Expand Up @@ -55,11 +70,11 @@ def aggregate(self, buffers: List[SlidingWindowFeature], focus: Segment) -> np.n
hamming, intersection = [], []
for buffer in buffers:
# Crop buffer to focus region
b = buffer.crop(focus, fixed=focus.duration)
b = buffer.crop(focus, mode=self.cropping_mode, fixed=focus.duration)
# Crop Hamming window to focus region
h = np.expand_dims(np.hamming(num_frames), axis=-1)
h = SlidingWindowFeature(h, buffer.sliding_window)
h = h.crop(focus, fixed=focus.duration)
h = h.crop(focus, mode=self.cropping_mode, fixed=focus.duration)
hamming.append(h.data)
intersection.append(b.data)
hamming, intersection = np.stack(hamming), np.stack(intersection)
Expand All @@ -73,7 +88,7 @@ class AverageStrategy(AggregationStrategy):
def aggregate(self, buffers: List[SlidingWindowFeature], focus: Segment) -> np.ndarray:
# Stack all overlapping regions
intersection = np.stack([
buffer.crop(focus, fixed=focus.duration)
buffer.crop(focus, mode=self.cropping_mode, fixed=focus.duration)
for buffer in buffers
])
return np.mean(intersection, axis=0)
Expand All @@ -83,7 +98,7 @@ class FirstOnlyStrategy(AggregationStrategy):
"""Instead of aggregating, keep the first focus region in the buffer list"""

def aggregate(self, buffers: List[SlidingWindowFeature], focus: Segment) -> np.ndarray:
return buffers[0].crop(focus, fixed=focus.duration)
return buffers[0].crop(focus, mode=self.cropping_mode, fixed=focus.duration)


class DelayedAggregation:
Expand All @@ -97,11 +112,15 @@ class DelayedAggregation:
latency: float, optional
Desired latency, in seconds. Defaults to step.
The higher the latency, the more overlapping windows to aggregate.
strategy: ("mean", "hamming", "any"), optional
strategy: ("mean", "hamming", "first"), optional
Specifies how to aggregate overlapping windows. Defaults to "hamming".
"mean": simple average
"hamming": average weighted by the Hamming window values (aligned to the buffer)
"any": no aggregation, pick the first overlapping window
"first": no aggregation, pick the first overlapping window
cropping_mode: ("strict", "loose", "center"), optional
Defines the mode to crop buffer chunks as in pyannote.core.
See https://pyannote.github.io/pyannote-core/reference.html#pyannote.core.SlidingWindowFeature.crop
Defaults to "loose".

Example
--------
Expand Down Expand Up @@ -130,18 +149,21 @@ def __init__(
step: float,
latency: Optional[float] = None,
strategy: Literal["mean", "hamming", "first"] = "hamming",
cropping_mode: Literal["strict", "loose", "center"] = "loose"
juanmc2005 marked this conversation as resolved.
Show resolved Hide resolved
):
self.step = step
self.latency = latency
self.strategy = strategy
assert cropping_mode in ["strict", "loose", "center"], f"Invalid cropping mode `{cropping_mode}`"
self.cropping_mode = cropping_mode
bhigy marked this conversation as resolved.
Show resolved Hide resolved

if self.latency is None:
self.latency = self.step

assert self.step <= self.latency, "Invalid latency requested"

self.num_overlapping_windows = int(round(self.latency / self.step))
self.aggregate = AggregationStrategy.build(self.strategy)
self.aggregate = AggregationStrategy.build(self.strategy, self.cropping_mode)

def _prepend(
self,
Expand All @@ -159,7 +181,7 @@ def _prepend(
num_frames = output_window.data.shape[0]
first_region = Segment(0, output_region.end)
first_output = buffers[0].crop(
first_region, fixed=first_region.duration
first_region, mode=self.cropping_mode, fixed=first_region.duration
)
first_output[-num_frames:] = output_window.data
resolution = output_region.end / first_output.shape[0]
Expand Down
2 changes: 2 additions & 0 deletions src/diart/blocks/diarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,13 @@ def __init__(self, config: Optional[PipelineConfig] = None):
self.config.step,
self.config.latency,
strategy="hamming",
cropping_mode="loose",
)
self.audio_aggregation = DelayedAggregation(
self.config.step,
self.config.latency,
strategy="first",
cropping_mode="center",
)
self.binarize = Binarize(self.config.tau_active)

Expand Down