Skip to content

Commit f1f7459

Browse files
authored
Generalise exponential bucket histogram, add percentile calculation (#127597)
1 parent a4cb8d2 commit f1f7459

File tree

9 files changed

+333
-78
lines changed

9 files changed

+333
-78
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.common.metrics;
11+
12+
import java.util.Arrays;
13+
import java.util.concurrent.atomic.LongAdder;
14+
15+
/**
16+
* A histogram with a fixed number of buckets of exponentially increasing width.
17+
* <p>
18+
* The bucket boundaries are defined by increasing powers of two, e.g.
19+
* <code>
20+
* (-&infin;, 1), [1, 2), [2, 4), [4, 8), ..., [2^({@link #bucketCount}-2), &infin;)
21+
* </code>
22+
* There are {@link #bucketCount} buckets.
23+
*/
24+
public class ExponentialBucketHistogram {
25+
26+
private final int bucketCount;
27+
private final long lastBucketLowerBound;
28+
29+
public static int[] getBucketUpperBounds(int bucketCount) {
30+
int[] bounds = new int[bucketCount - 1];
31+
for (int i = 0; i < bounds.length; i++) {
32+
bounds[i] = 1 << i;
33+
}
34+
return bounds;
35+
}
36+
37+
private int getBucket(long observedValue) {
38+
if (observedValue <= 0) {
39+
return 0;
40+
} else if (lastBucketLowerBound <= observedValue) {
41+
return bucketCount - 1;
42+
} else {
43+
return Long.SIZE - Long.numberOfLeadingZeros(observedValue);
44+
}
45+
}
46+
47+
private final LongAdder[] buckets;
48+
49+
public ExponentialBucketHistogram(int bucketCount) {
50+
if (bucketCount < 2 || bucketCount > Integer.SIZE) {
51+
throw new IllegalArgumentException("Bucket count must be in [2, " + Integer.SIZE + "], got " + bucketCount);
52+
}
53+
this.bucketCount = bucketCount;
54+
this.lastBucketLowerBound = getBucketUpperBounds(bucketCount)[bucketCount - 2];
55+
buckets = new LongAdder[bucketCount];
56+
for (int i = 0; i < bucketCount; i++) {
57+
buckets[i] = new LongAdder();
58+
}
59+
}
60+
61+
public int[] calculateBucketUpperBounds() {
62+
return getBucketUpperBounds(bucketCount);
63+
}
64+
65+
public void addObservation(long observedValue) {
66+
buckets[getBucket(observedValue)].increment();
67+
}
68+
69+
/**
70+
* @return An array of frequencies of handling times in buckets with upper bounds as returned by {@link #calculateBucketUpperBounds()},
71+
* plus an extra bucket for handling times longer than the longest upper bound.
72+
*/
73+
public long[] getSnapshot() {
74+
final long[] histogram = new long[bucketCount];
75+
for (int i = 0; i < bucketCount; i++) {
76+
histogram[i] = buckets[i].longValue();
77+
}
78+
return histogram;
79+
}
80+
81+
/**
82+
* Calculate the Nth percentile value
83+
*
84+
* @param percentile The percentile as a fraction (in [0, 1.0])
85+
* @return A value greater than the specified fraction of values in the histogram
86+
* @throws IllegalArgumentException if the requested percentile is invalid
87+
*/
88+
public long getPercentile(float percentile) {
89+
return getPercentile(percentile, getSnapshot(), calculateBucketUpperBounds());
90+
}
91+
92+
/**
93+
* Calculate the Nth percentile value
94+
*
95+
* @param percentile The percentile as a fraction (in [0, 1.0])
96+
* @param snapshot An array of frequencies of handling times in buckets with upper bounds as per {@link #calculateBucketUpperBounds()}
97+
* @param bucketUpperBounds The upper bounds of the buckets in the histogram, as per {@link #calculateBucketUpperBounds()}
98+
* @return A value greater than the specified fraction of values in the histogram
99+
* @throws IllegalArgumentException if the requested percentile is invalid
100+
*/
101+
public long getPercentile(float percentile, long[] snapshot, int[] bucketUpperBounds) {
102+
assert snapshot.length == bucketCount && bucketUpperBounds.length == bucketCount - 1;
103+
if (percentile < 0 || percentile > 1) {
104+
throw new IllegalArgumentException("Requested percentile must be in [0, 1.0], percentile=" + percentile);
105+
}
106+
final long totalCount = Arrays.stream(snapshot).sum();
107+
long percentileIndex = (long) Math.ceil(totalCount * percentile);
108+
// Find which bucket has the Nth percentile value and return the upper bound value.
109+
for (int i = 0; i < bucketCount; i++) {
110+
percentileIndex -= snapshot[i];
111+
if (percentileIndex <= 0) {
112+
if (i == snapshot.length - 1) {
113+
return Long.MAX_VALUE;
114+
} else {
115+
return bucketUpperBounds[i];
116+
}
117+
}
118+
}
119+
assert false : "We shouldn't ever get here";
120+
return Long.MAX_VALUE;
121+
}
122+
123+
/**
124+
* Clear all values in the histogram (non-atomic)
125+
*/
126+
public void clear() {
127+
for (int i = 0; i < bucketCount; i++) {
128+
buckets[i].reset();
129+
}
130+
}
131+
}

server/src/main/java/org/elasticsearch/common/network/HandlingTimeTracker.java

+6-44
Original file line numberDiff line numberDiff line change
@@ -9,58 +9,20 @@
99

1010
package org.elasticsearch.common.network;
1111

12-
import java.util.concurrent.atomic.LongAdder;
12+
import org.elasticsearch.common.metrics.ExponentialBucketHistogram;
1313

1414
/**
1515
* Tracks how long message handling takes on a transport thread as a histogram with fixed buckets.
1616
*/
17-
public class HandlingTimeTracker {
17+
public class HandlingTimeTracker extends ExponentialBucketHistogram {
1818

19-
public static int[] getBucketUpperBounds() {
20-
int[] bounds = new int[17];
21-
for (int i = 0; i < bounds.length; i++) {
22-
bounds[i] = 1 << i;
23-
}
24-
return bounds;
25-
}
19+
public static final int BUCKET_COUNT = 18;
2620

27-
private static int getBucket(long handlingTimeMillis) {
28-
if (handlingTimeMillis <= 0) {
29-
return 0;
30-
} else if (LAST_BUCKET_LOWER_BOUND <= handlingTimeMillis) {
31-
return BUCKET_COUNT - 1;
32-
} else {
33-
return Long.SIZE - Long.numberOfLeadingZeros(handlingTimeMillis);
34-
}
21+
public static int[] getBucketUpperBounds() {
22+
return ExponentialBucketHistogram.getBucketUpperBounds(BUCKET_COUNT);
3523
}
3624

37-
public static final int BUCKET_COUNT = getBucketUpperBounds().length + 1;
38-
39-
private static final long LAST_BUCKET_LOWER_BOUND = getBucketUpperBounds()[BUCKET_COUNT - 2];
40-
41-
private final LongAdder[] buckets;
42-
4325
public HandlingTimeTracker() {
44-
buckets = new LongAdder[BUCKET_COUNT];
45-
for (int i = 0; i < BUCKET_COUNT; i++) {
46-
buckets[i] = new LongAdder();
47-
}
26+
super(BUCKET_COUNT);
4827
}
49-
50-
public void addHandlingTime(long handlingTimeMillis) {
51-
buckets[getBucket(handlingTimeMillis)].increment();
52-
}
53-
54-
/**
55-
* @return An array of frequencies of handling times in buckets with upper bounds as returned by {@link #getBucketUpperBounds()}, plus
56-
* an extra bucket for handling times longer than the longest upper bound.
57-
*/
58-
public long[] getHistogram() {
59-
final long[] histogram = new long[BUCKET_COUNT];
60-
for (int i = 0; i < BUCKET_COUNT; i++) {
61-
histogram[i] = buckets[i].longValue();
62-
}
63-
return histogram;
64-
}
65-
6628
}

server/src/main/java/org/elasticsearch/http/AbstractHttpServerTransport.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ public void incomingRequest(final HttpRequest httpRequest, final HttpChannel htt
460460
handleIncomingRequest(httpRequest, trackingChannel, httpRequest.getInboundException());
461461
} finally {
462462
final long took = threadPool.rawRelativeTimeInMillis() - startTime;
463-
networkService.getHandlingTimeTracker().addHandlingTime(took);
463+
networkService.getHandlingTimeTracker().addObservation(took);
464464
final long logThreshold = slowLogThresholdMs;
465465
if (logThreshold > 0 && took > logThreshold) {
466466
logger.warn(

server/src/main/java/org/elasticsearch/http/HttpRouteStatsTracker.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public void addResponseStats(long contentLength) {
9191
}
9292

9393
public void addResponseTime(long timeMillis) {
94-
responseTimeTracker.addHandlingTime(timeMillis);
94+
responseTimeTracker.addObservation(timeMillis);
9595
}
9696

9797
public HttpRouteStats getStats() {
@@ -102,7 +102,7 @@ public HttpRouteStats getStats() {
102102
responseStats.count().longValue(),
103103
responseStats.totalSize().longValue(),
104104
responseStats.getHistogram(),
105-
responseTimeTracker.getHistogram()
105+
responseTimeTracker.getSnapshot()
106106
);
107107
}
108108
}

server/src/main/java/org/elasticsearch/transport/InboundHandler.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ private void messageReceived(TcpChannel channel, InboundMessage message, long st
138138
}
139139
} finally {
140140
final long took = threadPool.rawRelativeTimeInMillis() - startTime;
141-
handlingTimeTracker.addHandlingTime(took);
141+
handlingTimeTracker.addObservation(took);
142142
final long logThreshold = slowLogThresholdMs;
143143
if (logThreshold > 0 && took > logThreshold) {
144144
logSlowMessage(message, took, logThreshold, responseHandler);

server/src/main/java/org/elasticsearch/transport/OutboundHandler.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ private void maybeLogSlowMessage(boolean success) {
414414
final long logThreshold = slowLogThresholdMs;
415415
if (logThreshold > 0) {
416416
final long took = threadPool.rawRelativeTimeInMillis() - startTime;
417-
handlingTimeTracker.addHandlingTime(took);
417+
handlingTimeTracker.addObservation(took);
418418
if (took > logThreshold) {
419419
logger.warn(
420420
"sending transport message [{}] of size [{}] on [{}] took [{}ms] which is above the warn "

server/src/main/java/org/elasticsearch/transport/TcpTransport.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -1001,8 +1001,8 @@ public final TransportStats getStats() {
10011001
bytesRead,
10021002
messagesSent,
10031003
bytesWritten,
1004-
networkService.getHandlingTimeTracker().getHistogram(),
1005-
outboundHandlingTimeTracker.getHistogram(),
1004+
networkService.getHandlingTimeTracker().getSnapshot(),
1005+
outboundHandlingTimeTracker.getSnapshot(),
10061006
requestHandlers.getStats()
10071007
);
10081008
}

0 commit comments

Comments
 (0)