Skip to content

Commit 0efdf81

Browse files
[llvm][cas] Implement basic fuzzer
This is a basic fuzzer for CAS ObjectStore that will insert random data into CAS and validate with several configurations randomly generated from fuzzer. It will check: * multi-threaded insertion * multi-process insertion * try randomly kill the subprocesses that are inserting data And make sure it doesn't leave CAS in an invalid state. Suggested usage: ``` LLVM_CAS_LOG=2 llvm-cas-fuzzer --cas-path=... -rss_limit_mb=4096 --print-config ```
1 parent 97a6a7d commit 0efdf81

File tree

3 files changed

+278
-0
lines changed

3 files changed

+278
-0
lines changed
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
set(LLVM_LINK_COMPONENTS
2+
CAS
3+
FuzzerCLI
4+
FuzzMutate
5+
Support
6+
)
7+
add_llvm_fuzzer(llvm-cas-fuzzer
8+
llvm-cas-fuzzer.cpp
9+
DUMMY_MAIN DummyCASFuzzer.cpp
10+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===--- DummyCASFuzzer.cpp - Entry point to sanity check the fuzzer ------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Implementation of main so we can build and test without linking libFuzzer.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/FuzzMutate/FuzzerCLI.h"
14+
15+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
16+
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv);
17+
18+
int main(int argc, char *argv[]) {
19+
return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput,
20+
LLVMFuzzerInitialize);
21+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#include "llvm/ADT/BitmaskEnum.h"
2+
#include "llvm/CAS/ActionCache.h"
3+
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
4+
#include "llvm/CAS/ObjectStore.h"
5+
#include "llvm/Support/Program.h"
6+
#include "llvm/Support/RandomNumberGenerator.h"
7+
#include "llvm/Support/ThreadPool.h"
8+
9+
using namespace llvm;
10+
using namespace llvm::cas;
11+
12+
// Options from commandline.
13+
static std::string CASPath;
14+
static bool GenData = false;
15+
static bool PrintConfig = false;
16+
static bool ForceKill = false;
17+
static unsigned OptNumShards = 0;
18+
static unsigned OptTreeDepth = 0;
19+
static unsigned OptNumChildren = 0;
20+
static unsigned OptDataLength = 0;
21+
static const char *Argv0 = nullptr;
22+
23+
enum CASFuzzingSettings : uint8_t {
24+
DEFAULT = 0,
25+
FORK = 1, // CAS Data filling happens in subprocesses.
26+
CHECK_TERMINATION = 1 << 1, // Try kill the subprocess when it fills the data.
27+
PRUNE_CAS = 1 << 2, // Prune the CAS after the test.
28+
29+
LAST = UINT8_MAX, // Enum is randomly generated, use MAX to cover all inputs.
30+
LLVM_MARK_AS_BITMASK_ENUM(LAST)
31+
};
32+
33+
struct Config {
34+
CASFuzzingSettings Settings = DEFAULT;
35+
uint8_t NumShards;
36+
uint8_t NumChildren;
37+
uint8_t TreeDepth;
38+
uint16_t DataLength;
39+
40+
void constraint() {
41+
// reduce the size of parameter if they are too big.
42+
if (NumShards > 12) {
43+
// If forking, max out the threads.
44+
if (Settings & FORK)
45+
NumShards = 12;
46+
else
47+
NumShards = NumShards % 12;
48+
}
49+
50+
if (NumChildren > 10)
51+
NumChildren = NumChildren % 10;
52+
53+
if (TreeDepth > 10)
54+
TreeDepth = TreeDepth % 10;
55+
56+
if (DataLength > 1024)
57+
DataLength = DataLength % 1024;
58+
59+
if (ForceKill) {
60+
Settings |= FORK;
61+
Settings |= CHECK_TERMINATION;
62+
}
63+
}
64+
65+
void init() {
66+
NumShards = OptNumShards ? OptNumShards : 8;
67+
NumChildren = OptNumChildren ? OptNumChildren : 4;
68+
TreeDepth = OptTreeDepth ? OptTreeDepth : 12;
69+
DataLength = OptDataLength ? OptDataLength : 128;
70+
}
71+
72+
void appendCommandLineOpts(std::vector<std::string> &Cmd) {
73+
Cmd.push_back("--num-shards=" + utostr(NumShards));
74+
Cmd.push_back("--num-children=" + utostr(NumChildren));
75+
Cmd.push_back("--tree-depth=" + utostr(TreeDepth));
76+
Cmd.push_back("--data-length=" + utostr(DataLength));
77+
}
78+
79+
void dump() {
80+
llvm::errs() << "## Configuration:"
81+
<< " Fork: " << (bool)(Settings & FORK)
82+
<< " Kill: " << (bool)(Settings & CHECK_TERMINATION)
83+
<< " Prune: " << (bool)(Settings & PRUNE_CAS)
84+
<< " NumShards: " << (unsigned)NumShards
85+
<< " TreeDepth: " << (unsigned)TreeDepth
86+
<< " NumChildren: " << (unsigned)NumChildren
87+
<< " DataLength: " << (unsigned)DataLength << "\n";
88+
}
89+
};
90+
91+
static void parseOptions(int Argc, char **Argv) {
92+
Argv0 = Argv[0];
93+
94+
for (int I = 0; I < Argc; ++I) {
95+
StringRef Arg = Argv[I];
96+
// option must start with `--`.
97+
if (!Arg.consume_front("--"))
98+
continue;
99+
100+
// flags.
101+
if (Arg == "gen-data") {
102+
GenData = true;
103+
continue;
104+
}
105+
if (Arg == "print-config") {
106+
PrintConfig = true;
107+
continue;
108+
}
109+
if (Arg == "force-kill") {
110+
ForceKill = true;
111+
continue;
112+
}
113+
114+
// options that take an argument.
115+
auto Opt = Arg.split('=');
116+
if (Opt.first == "cas-path")
117+
CASPath = Opt.second.str();
118+
else if (Opt.first == "num-shards")
119+
to_integer(Opt.second, OptNumShards);
120+
else if (Opt.first == "num-children")
121+
to_integer(Opt.second, OptNumChildren);
122+
else if (Opt.first == "tree-depth")
123+
to_integer(Opt.second, OptTreeDepth);
124+
else if (Opt.first == "data-length")
125+
to_integer(Opt.second, OptDataLength);
126+
}
127+
}
128+
129+
// fill the CAS with random data of specified tree depth and children numbers.
130+
static void fillData(ObjectStore &CAS, const Config &Conf) {
131+
ExitOnError ExitOnErr("llvm-cas-fuzzer fill data: ");
132+
DefaultThreadPool ThreadPool(hardware_concurrency());
133+
for (size_t I = 0; I != Conf.NumShards; ++I) {
134+
ThreadPool.async([&] {
135+
std::vector<ObjectRef> Refs;
136+
for (unsigned Depth = 1; Depth <= Conf.TreeDepth; ++Depth) {
137+
unsigned NumNodes = (Conf.TreeDepth - Depth) * Conf.NumChildren + 1;
138+
std::vector<ObjectRef> Created;
139+
Created.reserve(NumNodes);
140+
ArrayRef<ObjectRef> PreviouslyCreated(Refs);
141+
for (unsigned I = 0; I < NumNodes; ++I) {
142+
std::vector<char> Data(Conf.DataLength);
143+
getRandomBytes(Data.data(), Data.size());
144+
if (Depth == 1) {
145+
auto Ref = ExitOnErr(CAS.store({}, Data));
146+
Created.push_back(Ref);
147+
} else {
148+
auto Parent = PreviouslyCreated.slice(I, Conf.NumChildren);
149+
auto Ref = ExitOnErr(CAS.store(Parent, Data));
150+
Created.push_back(Ref);
151+
}
152+
}
153+
Refs.swap(Created);
154+
}
155+
});
156+
}
157+
ThreadPool.wait();
158+
}
159+
160+
static int genData() {
161+
ExitOnError ExitOnErr("llvm-cas-fuzzer --gen-data: ");
162+
163+
if (CASPath.empty()) {
164+
llvm::errs() << "--gen-data requires --cas-path= option\n";
165+
return 1;
166+
}
167+
168+
Config Conf;
169+
Conf.init();
170+
171+
auto DB = ExitOnErr(cas::createOnDiskUnifiedCASDatabases(CASPath));
172+
fillData(*DB.first, Conf);
173+
174+
return 0;
175+
}
176+
177+
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
178+
parseOptions(*argc, *argv);
179+
if (GenData) {
180+
genData();
181+
exit(0);
182+
}
183+
184+
return 0;
185+
}
186+
187+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
188+
ExitOnError ExitOnErr("llvm-cas-fuzzer: ");
189+
190+
if (Size < sizeof(Config))
191+
return 0;
192+
193+
Config Conf;
194+
std::memcpy(&Conf, Data, sizeof(Conf));
195+
Conf.constraint();
196+
197+
if (Conf.NumShards == 0)
198+
return 0;
199+
200+
if (PrintConfig)
201+
Conf.dump();
202+
203+
auto DB = ExitOnErr(cas::createOnDiskUnifiedCASDatabases(CASPath));
204+
auto &CAS = DB.first;
205+
206+
ExitOnErr(CAS->setSizeLimit(1024));
207+
if (Conf.Settings & FORK) {
208+
// fill data using sub processes.
209+
std::string MainExe = sys::fs::getMainExecutable(Argv0, &Argv0);
210+
std::vector<std::string> Args = {MainExe, "--gen-data",
211+
"--cas-path=" + CASPath};
212+
Conf.appendCommandLineOpts(Args);
213+
std::vector<StringRef> Cmd;
214+
for_each(Args, [&Cmd](const std::string &Arg) { Cmd.push_back(Arg); });
215+
216+
std::vector<sys::ProcessInfo> Subprocesses;
217+
218+
for (int I = 0; I < Conf.NumShards; ++I) {
219+
auto SP = sys::ExecuteNoWait(MainExe, Cmd, std::nullopt);
220+
if (SP.Pid != 0)
221+
Subprocesses.push_back(SP);
222+
}
223+
224+
if (Conf.Settings & CHECK_TERMINATION) {
225+
for_each(Subprocesses, [](auto &P) {
226+
// Wait 1 second and killed the process.
227+
auto WP = sys::Wait(P, 1);
228+
if (WP.ReturnCode)
229+
llvm::errs() << "subprocess killed\n";
230+
});
231+
} else {
232+
for_each(Subprocesses, [](auto &P) { sys::Wait(P, std::nullopt); });
233+
}
234+
235+
} else {
236+
// in-process fill data.
237+
fillData(*CAS, Conf);
238+
}
239+
240+
// validate and prune in the end.
241+
ExitOnErr(CAS->validate(true));
242+
243+
if (Conf.Settings & PRUNE_CAS)
244+
ExitOnErr(CAS->pruneStorageData());
245+
246+
return 0;
247+
}

0 commit comments

Comments
 (0)