Skip to content

Commit c8f0c8b

Browse files
[ML] Inference API add configurable connection pool TTL (elastic#127585)
* Adding ttl * Using 60 seconds as default
1 parent f4ec27e commit c8f0c8b

File tree

3 files changed

+37
-8
lines changed

3 files changed

+37
-8
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -278,13 +278,17 @@ public Collection<?> createComponents(PluginServices services) {
278278
var inferenceServices = new ArrayList<>(inferenceServiceExtensions);
279279
inferenceServices.add(this::getInferenceServiceFactories);
280280

281+
var inferenceServiceSettings = new ElasticInferenceServiceSettings(settings);
282+
inferenceServiceSettings.init(services.clusterService());
283+
281284
// Create a separate instance of HTTPClientManager with its own SSL configuration (`xpack.inference.elastic.http.ssl.*`).
282285
var elasticInferenceServiceHttpClientManager = HttpClientManager.create(
283286
settings,
284287
services.threadPool(),
285288
services.clusterService(),
286289
throttlerManager,
287-
getSslService()
290+
getSslService(),
291+
inferenceServiceSettings.getConnectionTtl()
288292
);
289293

290294
var elasticInferenceServiceRequestSenderFactory = new HttpRequestSender.Factory(
@@ -294,9 +298,6 @@ public Collection<?> createComponents(PluginServices services) {
294298
);
295299
elasicInferenceServiceFactory.set(elasticInferenceServiceRequestSenderFactory);
296300

297-
var inferenceServiceSettings = new ElasticInferenceServiceSettings(settings);
298-
inferenceServiceSettings.init(services.clusterService());
299-
300301
var authorizationHandler = new ElasticInferenceServiceAuthorizationRequestHandler(
301302
inferenceServiceSettings.getElasticInferenceServiceUrl(),
302303
services.threadPool()

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpClientManager.java

+14-4
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import java.io.Closeable;
3333
import java.io.IOException;
3434
import java.util.List;
35+
import java.util.concurrent.TimeUnit;
3536

3637
import static org.elasticsearch.core.Strings.format;
3738
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSettings.ELASTIC_INFERENCE_SERVICE_SSL_CONFIGURATION_PREFIX;
@@ -112,14 +113,15 @@ public static HttpClientManager create(
112113
ThreadPool threadPool,
113114
ClusterService clusterService,
114115
ThrottlerManager throttlerManager,
115-
SSLService sslService
116+
SSLService sslService,
117+
TimeValue connectionTtl
116118
) {
117119
// Set the sslStrategy to ensure an encrypted connection, as Elastic Inference Service requires it.
118120
SSLIOSessionStrategy sslioSessionStrategy = sslService.sslIOSessionStrategy(
119121
sslService.getSSLConfiguration(ELASTIC_INFERENCE_SERVICE_SSL_CONFIGURATION_PREFIX)
120122
);
121123

122-
PoolingNHttpClientConnectionManager connectionManager = createConnectionManager(sslioSessionStrategy);
124+
PoolingNHttpClientConnectionManager connectionManager = createConnectionManager(sslioSessionStrategy, connectionTtl);
123125
return new HttpClientManager(settings, connectionManager, threadPool, clusterService, throttlerManager);
124126
}
125127

@@ -146,7 +148,7 @@ public static HttpClientManager create(
146148
this.addSettingsUpdateConsumers(clusterService);
147149
}
148150

149-
private static PoolingNHttpClientConnectionManager createConnectionManager(SSLIOSessionStrategy sslStrategy) {
151+
private static PoolingNHttpClientConnectionManager createConnectionManager(SSLIOSessionStrategy sslStrategy, TimeValue connectionTtl) {
150152
ConnectingIOReactor ioReactor;
151153
try {
152154
var configBuilder = IOReactorConfig.custom().setSoKeepAlive(true);
@@ -162,7 +164,15 @@ private static PoolingNHttpClientConnectionManager createConnectionManager(SSLIO
162164
.register("https", sslStrategy)
163165
.build();
164166

165-
return new PoolingNHttpClientConnectionManager(ioReactor, registry);
167+
return new PoolingNHttpClientConnectionManager(
168+
ioReactor,
169+
null,
170+
registry,
171+
null,
172+
null,
173+
Math.toIntExact(connectionTtl.getMillis()),
174+
TimeUnit.MILLISECONDS
175+
);
166176
}
167177

168178
private static PoolingNHttpClientConnectionManager createConnectionManager() {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSettings.java

+18
Original file line numberDiff line numberDiff line change
@@ -70,20 +70,33 @@ public class ElasticInferenceServiceSettings {
7070
Setting.Property.NodeScope
7171
);
7272

73+
/**
74+
* Total time to live (TTL) defines maximum life span of persistent connections regardless of their
75+
* expiration setting. No persistent connection will be re-used past its TTL value.
76+
* Using a TTL of -1 will disable the expiration of persistent connections (the idle connection evictor will still apply).
77+
*/
78+
public static final Setting<TimeValue> CONNECTION_TTL_SETTING = Setting.timeSetting(
79+
"xpack.inference.elastic.http.connection_ttl",
80+
TimeValue.timeValueSeconds(60),
81+
Setting.Property.NodeScope
82+
);
83+
7384
@Deprecated
7485
private final String eisGatewayUrl;
7586

7687
private final String elasticInferenceServiceUrl;
7788
private final boolean periodicAuthorizationEnabled;
7889
private volatile TimeValue authRequestInterval;
7990
private volatile TimeValue maxAuthorizationRequestJitter;
91+
private final TimeValue connectionTtl;
8092

8193
public ElasticInferenceServiceSettings(Settings settings) {
8294
eisGatewayUrl = EIS_GATEWAY_URL.get(settings);
8395
elasticInferenceServiceUrl = ELASTIC_INFERENCE_SERVICE_URL.get(settings);
8496
periodicAuthorizationEnabled = PERIODIC_AUTHORIZATION_ENABLED.get(settings);
8597
authRequestInterval = AUTHORIZATION_REQUEST_INTERVAL.get(settings);
8698
maxAuthorizationRequestJitter = MAX_AUTHORIZATION_REQUEST_JITTER.get(settings);
99+
connectionTtl = CONNECTION_TTL_SETTING.get(settings);
87100
}
88101

89102
/**
@@ -115,6 +128,10 @@ public TimeValue getMaxAuthorizationRequestJitter() {
115128
return maxAuthorizationRequestJitter;
116129
}
117130

131+
public TimeValue getConnectionTtl() {
132+
return connectionTtl;
133+
}
134+
118135
public static List<Setting<?>> getSettingsDefinitions() {
119136
ArrayList<Setting<?>> settings = new ArrayList<>();
120137
settings.add(EIS_GATEWAY_URL);
@@ -124,6 +141,7 @@ public static List<Setting<?>> getSettingsDefinitions() {
124141
settings.add(PERIODIC_AUTHORIZATION_ENABLED);
125142
settings.add(AUTHORIZATION_REQUEST_INTERVAL);
126143
settings.add(MAX_AUTHORIZATION_REQUEST_JITTER);
144+
settings.add(CONNECTION_TTL_SETTING);
127145
return settings;
128146
}
129147

0 commit comments

Comments
 (0)