Skip to content

Commit bf37beb

Browse files
[llvm][cas] Implement a CAS stress tester
This is a basic tester for CAS ObjectStore that will insert random data into CAS and validate with several configurations randomly generated. It will check: * multi-threaded insertion * multi-process insertion * try randomly kill the subprocesses that are inserting data And make sure it doesn't leave CAS in an invalid state. Suggested usage: ``` LLVM_CAS_LOG=2 llvm-cas-test --cas-path=... --print-config ```
1 parent 33921fb commit bf37beb

File tree

2 files changed

+294
-0
lines changed

2 files changed

+294
-0
lines changed
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
set(LLVM_LINK_COMPONENTS
2+
CAS
3+
Support
4+
)
5+
add_llvm_tool(llvm-cas-test
6+
llvm-cas-test.cpp
7+
)
+287
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
#include "llvm/ADT/BitmaskEnum.h"
2+
#include "llvm/CAS/ActionCache.h"
3+
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
4+
#include "llvm/CAS/ObjectStore.h"
5+
#include "llvm/Support/Path.h"
6+
#include "llvm/Support/Program.h"
7+
#include "llvm/Support/RandomNumberGenerator.h"
8+
#include "llvm/Support/ThreadPool.h"
9+
10+
using namespace llvm;
11+
using namespace llvm::cas;
12+
13+
// Options from commandline.
14+
static std::string CASPath;
15+
static bool GenData = false;
16+
static bool PrintConfig = false;
17+
static bool ForceKill = false;
18+
static unsigned OptNumShards = 0;
19+
static unsigned OptTreeDepth = 0;
20+
static unsigned OptNumChildren = 0;
21+
static unsigned OptDataLength = 0;
22+
static unsigned OptPrecentFile = 0;
23+
static const char *Argv0 = nullptr;
24+
25+
// Default size to be 100MB.
26+
static uint64_t SizeLimit = 100;
27+
// Default timeout 180s.
28+
static uint64_t Timeout = 180;
29+
30+
enum CASFuzzingSettings : uint8_t {
31+
DEFAULT = 0,
32+
FORK = 1, // CAS Data filling happens in subprocesses.
33+
CHECK_TERMINATION = 1 << 1, // Try kill the subprocess when it fills the data.
34+
PRUNE_CAS = 1 << 2, // Prune the CAS after the test.
35+
36+
LAST = UINT8_MAX, // Enum is randomly generated, use MAX to cover all inputs.
37+
LLVM_MARK_AS_BITMASK_ENUM(LAST)
38+
};
39+
40+
struct Config {
41+
CASFuzzingSettings Settings = DEFAULT;
42+
uint8_t NumShards;
43+
uint8_t NumChildren;
44+
uint8_t TreeDepth;
45+
uint16_t DataLength;
46+
uint16_t PrecentFile;
47+
48+
static constexpr unsigned MaxShards = 20;
49+
static constexpr unsigned MaxChildren = 32;
50+
static constexpr unsigned MaxDepth = 8;
51+
static constexpr unsigned MaxDataLength = 1024 * 4;
52+
53+
void constraint() {
54+
// reduce the size of parameter if they are too big.
55+
NumShards = NumShards % MaxShards;
56+
NumChildren = NumChildren % MaxChildren;
57+
TreeDepth = TreeDepth % MaxDepth;
58+
DataLength = DataLength % MaxDataLength;
59+
PrecentFile = PrecentFile % 100;
60+
61+
if (ForceKill) {
62+
Settings |= FORK;
63+
Settings |= CHECK_TERMINATION;
64+
}
65+
}
66+
67+
bool extendToFile(uint8_t Seed) const {
68+
return ((float)Seed / (float)UINT8_MAX) > ((float)PrecentFile / 100.0f);
69+
}
70+
71+
void init() {
72+
NumShards = OptNumShards ? OptNumShards : MaxShards;
73+
NumChildren = OptNumChildren ? OptNumChildren : MaxChildren;
74+
TreeDepth = OptTreeDepth ? OptTreeDepth : MaxDepth;
75+
DataLength = OptDataLength ? OptDataLength : MaxDataLength;
76+
PrecentFile = OptPrecentFile;
77+
}
78+
79+
void appendCommandLineOpts(std::vector<std::string> &Cmd) {
80+
Cmd.push_back("--num-shards=" + utostr(NumShards));
81+
Cmd.push_back("--num-children=" + utostr(NumChildren));
82+
Cmd.push_back("--tree-depth=" + utostr(TreeDepth));
83+
Cmd.push_back("--data-length=" + utostr(DataLength));
84+
Cmd.push_back("--precent-file=" + utostr(PrecentFile));
85+
}
86+
87+
void dump() {
88+
llvm::errs() << "## Configuration:"
89+
<< " Fork: " << (bool)(Settings & FORK)
90+
<< " Kill: " << (bool)(Settings & CHECK_TERMINATION)
91+
<< " Prune: " << (bool)(Settings & PRUNE_CAS)
92+
<< " NumShards: " << (unsigned)NumShards
93+
<< " TreeDepth: " << (unsigned)TreeDepth
94+
<< " NumChildren: " << (unsigned)NumChildren
95+
<< " DataLength: " << (unsigned)DataLength
96+
<< " PrecentFile: " << (unsigned)PrecentFile << "\n";
97+
}
98+
};
99+
100+
static void parseOptions(int Argc, char **Argv) {
101+
Argv0 = Argv[0];
102+
103+
for (int I = 0; I < Argc; ++I) {
104+
StringRef Arg = Argv[I];
105+
// option must start with `--`.
106+
if (!Arg.consume_front("--"))
107+
continue;
108+
109+
// flags.
110+
if (Arg == "gen-data") {
111+
GenData = true;
112+
continue;
113+
}
114+
if (Arg == "print-config") {
115+
PrintConfig = true;
116+
continue;
117+
}
118+
if (Arg == "force-kill") {
119+
ForceKill = true;
120+
continue;
121+
}
122+
123+
// options that take an argument.
124+
auto Opt = Arg.split('=');
125+
if (Opt.first == "cas-path")
126+
CASPath = Opt.second.str();
127+
else if (Opt.first == "num-shards")
128+
to_integer(Opt.second, OptNumShards);
129+
else if (Opt.first == "num-children")
130+
to_integer(Opt.second, OptNumChildren);
131+
else if (Opt.first == "tree-depth")
132+
to_integer(Opt.second, OptTreeDepth);
133+
else if (Opt.first == "data-length")
134+
to_integer(Opt.second, OptDataLength);
135+
else if (Opt.first == "precent-file")
136+
to_integer(Opt.second, OptPrecentFile);
137+
else if (Opt.first == "cas-size-limit")
138+
to_integer(Opt.second, SizeLimit);
139+
else if (Opt.first == "timeout")
140+
to_integer(Opt.second, Timeout);
141+
}
142+
}
143+
144+
// fill the CAS with random data of specified tree depth and children numbers.
145+
static void fillData(ObjectStore &CAS, const Config &Conf) {
146+
ExitOnError ExitOnErr("llvm-cas-fuzzer fill data: ");
147+
DefaultThreadPool ThreadPool(hardware_concurrency());
148+
std::atomic<uint64_t> NumCreated = 0;
149+
for (size_t I = 0; I != Conf.NumShards; ++I) {
150+
ThreadPool.async([&] {
151+
std::vector<ObjectRef> Refs;
152+
for (unsigned Depth = 0; Depth < Conf.TreeDepth; ++Depth) {
153+
unsigned NumNodes = (Conf.TreeDepth - Depth + 1) * Conf.NumChildren + 1;
154+
std::vector<ObjectRef> Created;
155+
Created.reserve(NumNodes);
156+
ArrayRef<ObjectRef> PreviouslyCreated(Refs);
157+
for (unsigned I = 0; I < NumNodes; ++I) {
158+
std::vector<char> Data(Conf.DataLength);
159+
getRandomBytes(Data.data(), Data.size());
160+
// Use the first byte that generated to decide if we should make it
161+
// 64KB bigger and force that into a file based storage.
162+
if (Conf.extendToFile(Data[0]))
163+
Data.resize(64LL * 1024LL + Conf.DataLength);
164+
165+
if (Depth == 0) {
166+
auto Ref = ExitOnErr(CAS.store({}, Data));
167+
Created.push_back(Ref);
168+
} else {
169+
auto Parent = PreviouslyCreated.slice(I, Conf.NumChildren);
170+
auto Ref = ExitOnErr(CAS.store(Parent, Data));
171+
Created.push_back(Ref);
172+
}
173+
++NumCreated;
174+
}
175+
Refs.swap(Created);
176+
}
177+
});
178+
}
179+
ThreadPool.wait();
180+
}
181+
182+
static int genData() {
183+
ExitOnError ExitOnErr("llvm-cas-test --gen-data: ");
184+
185+
Config Conf;
186+
Conf.init();
187+
188+
auto DB = ExitOnErr(cas::createOnDiskUnifiedCASDatabases(CASPath));
189+
fillData(*DB.first, Conf);
190+
191+
return 0;
192+
}
193+
194+
static int runOneTest() {
195+
ExitOnError ExitOnErr("llvm-cas-test: ");
196+
197+
Config Conf;
198+
getRandomBytes(&Conf, sizeof(Conf));
199+
Conf.constraint();
200+
201+
if (PrintConfig)
202+
Conf.dump();
203+
204+
// Start with fresh log.
205+
static constexpr StringLiteral LogFile = "v1.log";
206+
SmallString<256> LogPath(CASPath);
207+
llvm::sys::path::append(LogPath, LogFile);
208+
llvm::sys::fs::remove(LogPath);
209+
210+
auto DB = ExitOnErr(cas::createOnDiskUnifiedCASDatabases(CASPath));
211+
auto &CAS = DB.first;
212+
213+
// Size limit in MB.
214+
ExitOnErr(CAS->setSizeLimit(SizeLimit * 1024 * 1024));
215+
if (Conf.Settings & FORK) {
216+
// fill data using sub processes.
217+
std::string MainExe = sys::fs::getMainExecutable(Argv0, &Argv0);
218+
std::vector<std::string> Args = {MainExe, "--gen-data",
219+
"--cas-path=" + CASPath};
220+
Conf.appendCommandLineOpts(Args);
221+
222+
std::vector<StringRef> Cmd;
223+
for_each(Args, [&Cmd](const std::string &Arg) { Cmd.push_back(Arg); });
224+
225+
std::vector<sys::ProcessInfo> Subprocesses;
226+
for (int I = 0; I < Conf.NumShards; ++I) {
227+
auto SP = sys::ExecuteNoWait(MainExe, Cmd, std::nullopt);
228+
if (SP.Pid != 0)
229+
Subprocesses.push_back(SP);
230+
}
231+
232+
if (Conf.Settings & CHECK_TERMINATION) {
233+
for_each(Subprocesses, [](auto &P) {
234+
// Wait 1 second and killed the process.
235+
auto WP = sys::Wait(P, 1);
236+
if (WP.ReturnCode)
237+
llvm::errs() << "subprocess killed successfully\n";
238+
});
239+
} else {
240+
for_each(Subprocesses, [](auto &P) { sys::Wait(P, std::nullopt); });
241+
}
242+
243+
} else {
244+
// in-process fill data.
245+
fillData(*CAS, Conf);
246+
}
247+
248+
// validate and prune in the end.
249+
ExitOnErr(CAS->validate(true));
250+
251+
if (Conf.Settings & PRUNE_CAS)
252+
ExitOnErr(CAS->pruneStorageData());
253+
254+
return 0;
255+
}
256+
257+
static int runTest() {
258+
auto Start = std::chrono::steady_clock::now();
259+
std::chrono::seconds Duration(Timeout);
260+
261+
while (std::chrono::steady_clock::now() - Start < Duration) {
262+
if (int Res = runOneTest())
263+
return Res;
264+
}
265+
266+
std::chrono::duration_cast<std::chrono::seconds>(
267+
std::chrono::steady_clock::now() - Start)
268+
.count();
269+
270+
return 0;
271+
}
272+
273+
int main(int argc, char **argv) {
274+
parseOptions(argc, argv);
275+
276+
if (CASPath.empty()) {
277+
llvm::errs() << "llvm-cas-test requires --cas-path= option\n";
278+
return 1;
279+
}
280+
281+
if (GenData) {
282+
genData();
283+
return 0;
284+
}
285+
286+
return runTest();
287+
}

0 commit comments

Comments
 (0)