-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize date_histograms across daylight savings time #55559
Changes from 43 commits
c230cf8
971b473
41a7699
308a458
b33d546
bd18c2a
35c6ef4
bb01ea9
acf1971
e242a3d
8ef7fb7
d65b447
b8eefda
27089f0
0951db5
2a24404
c99ea89
cf5c5e9
6be2d06
49a60e5
706007e
1185b46
3a6b46e
9ccecd7
587605d
8fbeb0b
dcc3b5b
f646f23
587a6cd
2223966
c073501
b53b59d
5ba5dc7
dd0d16e
c8fcdd0
b3538a1
675fc07
28a10ab
ede9cbc
8e52138
44424ea
83e9ecc
7551cbc
986afab
9d5c1c5
83e72ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.common; | ||
|
||
import org.elasticsearch.common.time.DateFormatter; | ||
import org.openjdk.jmh.annotations.Benchmark; | ||
import org.openjdk.jmh.annotations.BenchmarkMode; | ||
import org.openjdk.jmh.annotations.Fork; | ||
import org.openjdk.jmh.annotations.Measurement; | ||
import org.openjdk.jmh.annotations.Mode; | ||
import org.openjdk.jmh.annotations.OutputTimeUnit; | ||
import org.openjdk.jmh.annotations.Param; | ||
import org.openjdk.jmh.annotations.Scope; | ||
import org.openjdk.jmh.annotations.Setup; | ||
import org.openjdk.jmh.annotations.State; | ||
import org.openjdk.jmh.annotations.Warmup; | ||
|
||
import java.time.ZoneId; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.function.Supplier; | ||
|
||
@Fork(2) | ||
@Warmup(iterations = 10) | ||
@Measurement(iterations = 5) | ||
@BenchmarkMode(Mode.AverageTime) | ||
@OutputTimeUnit(TimeUnit.NANOSECONDS) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nanoseconds don't seem handy for the output and I wonder whether it makes sense to switch to microseconds here? Especially when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks so much for reviewing so quickly! Some of this stuff I just copied from another benchmark to get started and never thought to play with. The count is interesting - it is nice to be able to see a |
||
@State(Scope.Benchmark) | ||
public class RoundingBenchmark { | ||
private static final DateFormatter FORMATTER = DateFormatter.forPattern("date_optional_time"); | ||
|
||
@Param({ | ||
"2000-01-01 to 2020-01-01", // A super long range | ||
"2000-10-01 to 2000-11-01", // A whole month which is pretty believable | ||
"2000-10-29 to 2000-10-30", // A date right around daylight savings time. | ||
"2000-06-01 to 2000-06-02" // A date fully in one time zone. Should be much faster than above. | ||
}) | ||
public String range; | ||
|
||
@Param({ "java time", "es" }) | ||
public String rounder; | ||
|
||
@Param({ "UTC", "America/New_York" }) | ||
public String zone; | ||
|
||
@Param({ "MONTH_OF_YEAR", "HOUR_OF_DAY" }) | ||
public String timeUnit; | ||
|
||
@Param({ "1", "10000", "1000000", "100000000" }) | ||
public int count; | ||
|
||
private long min; | ||
private long max; | ||
private long[] dates; | ||
private Supplier<Rounding.Prepared> rounderBuilder; | ||
|
||
@Setup | ||
public void buildDates() { | ||
String[] r = range.split(" to "); | ||
min = FORMATTER.parseMillis(r[0]); | ||
max = FORMATTER.parseMillis(r[1]); | ||
dates = new long[count]; | ||
long date = min; | ||
long diff = (max - min) / dates.length; | ||
for (int i = 0; i < dates.length; i++) { | ||
if (date >= max) { | ||
throw new IllegalStateException("made a bad date [" + date + "]"); | ||
} | ||
dates[i] = date; | ||
date += diff; | ||
} | ||
Rounding rounding = Rounding.builder(Rounding.DateTimeUnit.valueOf(timeUnit)).timeZone(ZoneId.of(zone)).build(); | ||
switch (rounder) { | ||
case "java time": | ||
rounderBuilder = rounding::prepareJavaTime; | ||
break; | ||
case "es": | ||
rounderBuilder = () -> rounding.prepare(min, max); | ||
break; | ||
default: | ||
throw new IllegalArgumentException("Expectd rounder to be [java time] or [es]"); | ||
} | ||
} | ||
|
||
@Benchmark | ||
public long round() { | ||
long sum = 0; | ||
Rounding.Prepared rounder = rounderBuilder.get(); | ||
for (int i = 0; i < dates.length; i++) { | ||
sum += rounder.round(dates[i]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's safer here to sink it into a blackhole instead to reduce the potential for compiler tricks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I started with that and found that it added pretty significant overhead, smearing out some of differences. When I removed it the differences became more "sharp". I think this sort of thing is a safe way to work around the compiler tricks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The thing is that with the current code it might be able to do loop unrolling (but then: I did not have a look at the assembler code yet). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My first instinct is that loop unrolling wouldn't bother me too much. I'm not really trying to measure the iteration, just the round calls. If loop unrolling let it optimize the round in fun ways that might make it less valid. This is called in a fairly tight loop in production too. Not this tight. And not one the JVM would be able to unroll though..... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ran the microbenchmarks again once with your implementation and once with a cheaper version of Blackhole (calling a method that is not inlined) and there are cases where the score is vastly different, e.g.: sum (your approach):
don't inline:
This means that in some cases your microbenchmark implementation shows numbers that are e.g. in the last case in the table above ~ 1.4 times faster than they are if we don't inline which is more likely in production code. We do see a significant speedup compared to the original implementation in both microbenchmark approaches anyway so I think the actual change makes sense. It's just that the change is probably slightly less pronounced in practice than the microbenchmarks indicate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Blackhole it is then! I'll make the change. I had thought that the synchronization overhead that Blackhole adds was sort of unrealistic. And it might be, but it is more realistic than sum. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s/synchronization/volatile/ |
||
} | ||
return sum; | ||
} | ||
|
||
@Benchmark | ||
public long nextRoundingValue() { | ||
long sum = 0; | ||
Rounding.Prepared rounder = rounderBuilder.get(); | ||
for (int i = 0; i < dates.length; i++) { | ||
sum += rounder.nextRoundingValue(dates[i]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's safer here to sink it into a blackhole instead to reduce the potential for compiler tricks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I talked with @danielmitterdorfer about this. The blackhole adds quite a bit of overhead for such fast methods. Something like 10%. Which makes it harder to see things make the thing faster. On the other hand, the JVM can totally unroll this loop. And in production it (mostly) can't because these will be coming from Lucene. |
||
} | ||
return sum; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@danielmitterdorfer , these are for you!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These changes make sense to me!