Skip to content

Commit 2a3bb25

Browse files
authored
feat(shell-api): Account for orphan documents in getShardDistribution() helper MONGOSH-1838 (#2203)
getShardDistribution() should correctly account for orphan documents (>= 6.0) when calculating size statistics. It does so by subtracting the size of orphan documents from the total size.
1 parent 664380b commit 2a3bb25

File tree

2 files changed

+104
-9
lines changed

2 files changed

+104
-9
lines changed

packages/shell-api/src/collection.spec.ts

+64
Original file line numberDiff line numberDiff line change
@@ -2276,6 +2276,70 @@ describe('Collection', function () {
22762276
ShellApiErrors.NotConnectedToShardedCluster
22772277
);
22782278
});
2279+
2280+
describe('with orphan documents', function () {
2281+
const mockedNumChunks = 2;
2282+
const mockedCollectionConfigInfo = {};
2283+
const mockedShardStats = {
2284+
shard: 'test-shard',
2285+
storageStats: {
2286+
size: 1000,
2287+
numOrphanDocs: 10,
2288+
avgObjSize: 7,
2289+
count: 15,
2290+
},
2291+
};
2292+
const mockedShardInfo = {
2293+
host: 'dummy-host',
2294+
};
2295+
2296+
beforeEach(function () {
2297+
const serviceProviderCursor = stubInterface<ServiceProviderCursor>();
2298+
2299+
// Make find and limit have no effect so the value of findOne is determined by tryNext.
2300+
serviceProviderCursor.limit.returns(serviceProviderCursor);
2301+
serviceProvider.find.returns(serviceProviderCursor);
2302+
2303+
// Mock according to the order of findOne calls getShardDistribution uses.
2304+
serviceProviderCursor.tryNext
2305+
.onCall(0)
2306+
.resolves(mockedCollectionConfigInfo);
2307+
serviceProviderCursor.tryNext.onCall(1).resolves(mockedShardInfo);
2308+
serviceProvider.countDocuments.returns(
2309+
Promise.resolve(mockedNumChunks)
2310+
);
2311+
2312+
const aggregateTryNext = sinon.stub();
2313+
aggregateTryNext.onCall(0).resolves(mockedShardStats);
2314+
aggregateTryNext.onCall(1).resolves(null);
2315+
2316+
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
2317+
serviceProvider.aggregate.returns({
2318+
tryNext: aggregateTryNext,
2319+
} as any);
2320+
});
2321+
2322+
it('should account for numOrphanDocs when calculating size', async function () {
2323+
const shardDistribution = await collection.getShardDistribution();
2324+
2325+
const { storageStats } = mockedShardStats;
2326+
expect(shardDistribution.type).equals('StatsResult');
2327+
const adjustedSize =
2328+
storageStats.size -
2329+
storageStats.numOrphanDocs * storageStats.avgObjSize;
2330+
expect(shardDistribution.value.Totals.data).equals(
2331+
`${adjustedSize}B`
2332+
);
2333+
const shardField = Object.keys(shardDistribution.value).find(
2334+
(field) => field !== 'Totals'
2335+
) as `Shard ${string} at ${string}`;
2336+
2337+
expect(shardField).not.undefined;
2338+
expect(
2339+
shardDistribution.value[shardField]['estimated data per chunk']
2340+
).equals(`${adjustedSize / mockedNumChunks}B`);
2341+
});
2342+
});
22792343
});
22802344

22812345
describe('analyzeShardKey', function () {

packages/shell-api/src/collection.ts

+40-9
Original file line numberDiff line numberDiff line change
@@ -2135,12 +2135,14 @@ export default class Collection extends ShellApiWithMongoClass {
21352135
@returnsPromise
21362136
@topologies([Topologies.Sharded])
21372137
@apiVersions([])
2138-
async getShardDistribution(): Promise<CommandResult> {
2138+
async getShardDistribution(): Promise<
2139+
CommandResult<GetShardDistributionResult>
2140+
> {
21392141
this._emitCollectionApiCall('getShardDistribution', {});
21402142

21412143
await getConfigDB(this._database); // Warns if not connected to mongos
21422144

2143-
const result = {} as Document;
2145+
const result = {} as GetShardDistributionResult;
21442146
const config = this._mongo.getDB('config');
21452147

21462148
const collStats = await (
@@ -2179,17 +2181,24 @@ export default class Collection extends ShellApiWithMongoClass {
21792181
.findOne({ _id: extractedShardStats.shard }),
21802182
config.getCollection('chunks').countDocuments(countChunksQuery),
21812183
]);
2184+
2185+
// Since 6.0, there can be orphan documents indicated by numOrphanDocs.
2186+
// These orphan documents need to be accounted for in the size calculation.
2187+
const orphanDocumentsSize =
2188+
(extractedShardStats.storageStats.numOrphanDocs ?? 0) *
2189+
(extractedShardStats.storageStats.avgObjSize ?? 0);
2190+
const ownedSize =
2191+
extractedShardStats.storageStats.size - orphanDocumentsSize;
2192+
21822193
const shardStats = {
21832194
shardId: shard,
21842195
host: host !== null ? host.host : null,
2185-
size: extractedShardStats.storageStats.size,
2196+
size: ownedSize,
21862197
count: extractedShardStats.storageStats.count,
21872198
numChunks: numChunks,
21882199
avgObjSize: extractedShardStats.storageStats.avgObjSize,
21892200
};
21902201

2191-
const key = `Shard ${shardStats.shardId} at ${shardStats.host}`;
2192-
21932202
// In sharded timeseries collections we do not have a count
21942203
// so we intentionally pass NaN as a result to the client.
21952204
const shardStatsCount: number = shardStats.count ?? NaN;
@@ -2203,15 +2212,15 @@ export default class Collection extends ShellApiWithMongoClass {
22032212
? 0
22042213
: Math.floor(shardStatsCount / shardStats.numChunks);
22052214

2206-
result[key] = {
2215+
result[`Shard ${shardStats.shardId} at ${shardStats.host}`] = {
22072216
data: dataFormat(coerceToJSNumber(shardStats.size)),
22082217
docs: shardStatsCount,
22092218
chunks: shardStats.numChunks,
22102219
'estimated data per chunk': dataFormat(estimatedChunkDataPerChunk),
22112220
'estimated docs per chunk': estimatedDocsPerChunk,
22122221
};
22132222

2214-
totals.size += coerceToJSNumber(shardStats.size);
2223+
totals.size += coerceToJSNumber(ownedSize);
22152224
totals.count += coerceToJSNumber(shardStatsCount);
22162225
totals.numChunks += coerceToJSNumber(shardStats.numChunks);
22172226

@@ -2224,7 +2233,7 @@ export default class Collection extends ShellApiWithMongoClass {
22242233
data: dataFormat(totals.size),
22252234
docs: totals.count,
22262235
chunks: totals.numChunks,
2227-
} as Document;
2236+
} as GetShardDistributionResult['Totals'];
22282237

22292238
for (const shardStats of conciseShardsStats) {
22302239
const estDataPercent =
@@ -2243,7 +2252,8 @@ export default class Collection extends ShellApiWithMongoClass {
22432252
];
22442253
}
22452254
result.Totals = totalValue;
2246-
return new CommandResult('StatsResult', result);
2255+
2256+
return new CommandResult<GetShardDistributionResult>('StatsResult', result);
22472257
}
22482258

22492259
@serverVersions(['3.1.0', ServerVersions.latest])
@@ -2467,3 +2477,24 @@ export default class Collection extends ShellApiWithMongoClass {
24672477
);
24682478
}
24692479
}
2480+
2481+
export type GetShardDistributionResult = {
2482+
Totals: {
2483+
data: string;
2484+
docs: number;
2485+
chunks: number;
2486+
} & {
2487+
[individualShardDistribution: `Shard ${string}`]: [
2488+
`${number} % data`,
2489+
`${number} % docs in cluster`,
2490+
`${string} avg obj size on shard`
2491+
];
2492+
};
2493+
[individualShardResult: `Shard ${string} at ${string}`]: {
2494+
data: string;
2495+
docs: number;
2496+
chunks: number;
2497+
'estimated data per chunk': string;
2498+
'estimated docs per chunk': number;
2499+
};
2500+
};

0 commit comments

Comments
 (0)