Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: Interpolate between closest ranks #2995

Merged
merged 4 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 72 additions & 25 deletions src/components/explore/ProbeExplorer.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

export let normalizedData;

export let smoothnessLevel;
export let interpolate;
export let pointMetricType;
export let overTimePointMetricType = pointMetricType;

Expand All @@ -56,44 +56,92 @@
? normData.filter((d) => !isEmpty(d.non_norm_histogram))
: normData;

function smoothenData(data, accessor, level) {
// Interpolates percentiles values by applying a Moving Average.
if (!level) {
return data;
function getInterpPercBtnRanksForHistogram(histogram, percentiles) {
// Compute cumulative frequencies
let cumFreq = [];
let totalFreq = 0;

for (let i = 0; i < histogram.length; i += 1) {
totalFreq += histogram[i].value;
cumFreq.push({ bin: histogram[i].bin, cumFreq: totalFreq });
}
// Normalize so we can find percentiles appropriately
if (totalFreq !== 1) {
cumFreq = cumFreq.map((item) => ({
bin: item.bin,
cumFreq: item.cumFreq / totalFreq,
}));
}
const windowSize = data.length / 100;
const dataField = data[0][accessor];
const keys = Object.keys(dataField);

return data.map((item, idx) => {
const windowData = data.slice(Math.max(0, idx - windowSize + 1), idx + 1);
// Find the interval where each percentile falls and interpolate
const percentileValues = {};
for (let i = 0; i < percentiles.length; i += 1) {
const percentile = percentiles[i];
let targetFreq = percentile / 100;
if (targetFreq <= cumFreq[0].cumFreq) {
percentileValues[percentile] = cumFreq[0].bin;
}
if (targetFreq >= totalFreq) {
percentileValues[percentile] = cumFreq[cumFreq.length - 1].bin;
}
for (let j = 0; j < cumFreq.length - 1; j += 1) {
if (
cumFreq[j].cumFreq <= targetFreq &&
cumFreq[j + 1].cumFreq >= targetFreq
) {
let x0 = cumFreq[j].cumFreq;
let x1 = cumFreq[j + 1].cumFreq;
let y0 = cumFreq[j].bin;
let y1 = cumFreq[j + 1].bin;
// Linear interpolation formula
let percentileValue =
y0 + ((targetFreq - x0) * (y1 - y0)) / (x1 - x0);
percentileValues[percentile] = percentileValue;
}
}
}
return percentileValues;
}

const smoothedValues = keys.reduce((acc, key) => {
const sum = windowData.reduce(
(total, wItem) => total + wItem[accessor][key],
0
);
acc[key] = sum / windowData.length;
return acc;
}, {});
const { [accessor]: _, ...rest } = item;
function getInterpolatedPercentilesBtnRanks(
data,
percentileAccessor,
normalizationType
) {
// Generates percentiles using the Interpolation Between Closest Ranks method.
const histogramAccessor =
normalizationType === 'normalized' ? 'histogram' : 'non_norm_histogram';
const percentiles = Object.keys(data[0][percentileAccessor]);

return data.map((item) => {
const histogram = item[histogramAccessor];
const interpVals = histogram
? getInterpPercBtnRanksForHistogram(histogram, percentiles)
: percentiles;
const { [percentileAccessor]: _, ...rest } = item;
return {
...rest,
[accessor]: smoothedValues,
[percentileAccessor]: interpVals,
};
});
}

function filterAndSmoothenData(data, normalizationType) {
function filterAndInterpolateData(data, normalizationType) {
const filtered = filterData(data, normalizationType);
return smoothenData(filtered, overTimePointMetricType, smoothnessLevel);
return interpolate
? getInterpolatedPercentilesBtnRanks(
filtered,
overTimePointMetricType,
normalizationType
)
: filtered;
}

let data = filterAndSmoothenData(
let data = filterAndInterpolateData(
normalizedData,
$store.productDimensions.normalizationType
);
$: data = filterAndSmoothenData(
$: data = filterAndInterpolateData(
normalizedData,
$store.productDimensions.normalizationType
);
Expand Down Expand Up @@ -445,7 +493,6 @@
}
}}
{distViewButtonId}
{smoothnessLevel}
>
<slot name="additional-plot-elements" />
<div slot="smoother"><slot name="smoother" /></div>
Expand Down
17 changes: 8 additions & 9 deletions src/components/explore/QuantileExplorerView.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

let aggregationInfo;

let smoothnessLevel = false; // Only on/off now but intention is to be a gradient.
let interpolate = false;

setContext('probeType', probeType);

Expand Down Expand Up @@ -118,9 +118,8 @@
margin-bottom: 0px;
}

.interpolator label {
color: var(--cool-gray-700);
margin-right: 5px;
.interpolator h3 {
padding-right: 5px;
}
</style>

Expand Down Expand Up @@ -170,7 +169,7 @@
{#each probeKeys as key, i (key)}
{#each aggregationTypes as aggType, i (aggType + timeHorizon + key)}
{#if key === currentKey && aggType === currentAggregation}
{#key smoothnessLevel}
{#key interpolate}
<div class="small-multiple">
<ProbeExplorer
aggregationsOverTimeTitle={overTimeTitle(
Expand All @@ -194,20 +193,20 @@
comparisonKeyFormatter={(perc) => `${perc}%`}
yScaleType={probeType === 'log' ? 'scalePoint' : 'linear'}
{yDomain}
{smoothnessLevel}
{interpolate}
>
<div slot="smoother" class="interpolator">
<input
id="toggleSmooth"
type="checkbox"
bind:checked={smoothnessLevel}
bind:checked={interpolate}
/>
<h3 for="toggleSmooth" class="data-graphic__element-title">
Interpolate
Interpolated
</h3>
<span
use:tooltipAction={{
text: 'Applies a moving average to smooth out short-term fluctuations on percentile values.',
text: 'Generates percentiles using the Between Closest Ranks Linear Interpolation. This can show an innacurate representation of the data if the underlying distribution is not continuous and/or the data between bins is not uniformly distributed.',

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/innacurate/inaccurate/

Also, this isn't quite right. This does not depend on the distribution of data between bins, only on the distribution of data within bins. It will actually handle any distribution between bins, which is fortunate since it's usually going to some weird multimodal exponential-ish thing.

Copy link
Collaborator Author

@edugfilho edugfilho Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops, my bad. I'll open another PR for this. I thought you were done.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops, my bad. I'll open another PR for this. I thought you were done.

Sorry, I thought I was too.

location: 'top',
}}
class="data-graphic__element-title__icon"
Expand Down