Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sandbox/linux/seccomp-bpf/codegen.h" | 5 #include "sandbox/linux/seccomp-bpf/codegen.h" |
| 6 | 6 |
| 7 #include <linux/filter.h> | 7 #include <linux/filter.h> |
| 8 | 8 |
| 9 #include <set> | 9 #include <cstring> |
| 10 #include <string> | 10 #include <map> |
| 11 #include <vector> | 11 #include <vector> |
| 12 | 12 |
| 13 #include "sandbox/linux/seccomp-bpf/basicblock.h" | 13 #include "base/md5.h" |
| 14 #include "sandbox/linux/seccomp-bpf/errorcode.h" | |
| 15 #include "sandbox/linux/seccomp-bpf/instruction.h" | |
| 16 #include "testing/gtest/include/gtest/gtest.h" | 14 #include "testing/gtest/include/gtest/gtest.h" |
| 17 | 15 |
| 18 namespace sandbox { | 16 namespace sandbox { |
| 19 | |
| 20 // We want to access some of the private methods in the code generator. We | |
| 21 // do so by defining a "friend" that makes these methods public for us. | |
| 22 class CodeGenUnittestHelper : public CodeGen { | |
| 23 public: | |
| 24 using CodeGen::CutGraphIntoBasicBlocks; | |
| 25 using CodeGen::FindBranchTargets; | |
| 26 using CodeGen::MergeTails; | |
| 27 }; | |
| 28 | |
| 29 namespace { | 17 namespace { |
| 30 | 18 |
| 31 enum { | 19 // Hash provides an abstraction for building "hash trees" from BPF |
| 32 NO_FLAGS = 0x0000, | 20 // control flow graphs, and efficiently identifying equivalent graphs. |
| 33 HAS_MERGEABLE_TAILS = 0x0001, | 21 // |
| 34 }; | 22 // For simplicity, we use MD5, because base happens to provide a |
| 23 // convenient API for its use. However, any collision-resistant hash | |
| 24 // should suffice. | |
| 25 class Hash { | |
| 26 public: | |
| 27 static const Hash kZero; | |
| 35 | 28 |
| 36 using ProgramTestFunc = void (*)(CodeGenUnittestHelper* gen, | 29 Hash() : digest_() {} |
| 37 CodeGen::Addr head, | |
| 38 int flags); | |
| 39 | 30 |
| 40 class ProgramTest : public ::testing::TestWithParam<ProgramTestFunc> { | 31 Hash(uint16_t code, |
| 41 protected: | 32 uint32_t k, |
| 42 ProgramTest() : gen_() {} | 33 const Hash& jt = kZero, |
| 43 | 34 const Hash& jf = kZero) |
| 44 // RunTest runs the test function argument. It should be called at | 35 : digest_() { |
| 45 // the end of each program test case. | 36 base::MD5Context ctx; |
| 46 void RunTest(CodeGen::Addr head, int flags) { | 37 base::MD5Init(&ctx); |
| 47 GetParam()(&gen_, head, flags); | 38 HashValue(&ctx, code); |
| 39 HashValue(&ctx, k); | |
| 40 HashValue(&ctx, jt); | |
| 41 HashValue(&ctx, jf); | |
| 42 base::MD5Final(&digest_, &ctx); | |
| 48 } | 43 } |
| 49 | 44 |
| 50 CodeGen::Addr MakeInstruction(uint16_t code, | 45 Hash(const Hash& hash) : digest_(hash.digest_) {} |
|
jln (very slow on Chromium)
2014/11/14 04:54:29
= default instead?
mdempsky
2014/11/17 23:00:05
Done. (Wrote this code while "= default" was stil
| |
| 51 uint32_t k, | 46 Hash& operator=(const Hash& rhs) { |
|
jln (very slow on Chromium)
2014/11/14 04:54:29
= default?
Otherwise, why not use copy-and-swap i
mdempsky
2014/11/17 23:00:05
Done.
| |
| 52 CodeGen::Addr jt = nullptr, | 47 digest_ = rhs.digest_; |
| 53 CodeGen::Addr jf = nullptr) { | 48 return *this; |
| 54 CodeGen::Addr ret = gen_.MakeInstruction(code, k, jt, jf); | 49 } |
| 55 EXPECT_NE(nullptr, ret); | 50 |
| 56 EXPECT_EQ(code, ret->code); | 51 friend bool operator==(const Hash& lhs, const Hash& rhs) { |
| 57 EXPECT_EQ(k, ret->k); | 52 return std::memcmp(&lhs.digest_, &rhs.digest_, sizeof(lhs.digest_)) == 0; |
|
jln (very slow on Chromium)
2014/11/14 04:54:29
:( Too bad they didn't provide operator==() in bas
mdempsky
2014/11/17 23:00:05
Done.
| |
| 58 if (BPF_CLASS(code) == BPF_JMP) { | 53 } |
| 59 EXPECT_EQ(nullptr, ret->next); | 54 friend bool operator!=(const Hash& lhs, const Hash& rhs) { |
| 60 EXPECT_EQ(jt, ret->jt_ptr); | 55 return !(lhs == rhs); |
| 61 EXPECT_EQ(jf, ret->jf_ptr); | |
| 62 } else { | |
| 63 EXPECT_EQ(jt, ret->next); | |
| 64 EXPECT_EQ(nullptr, ret->jt_ptr); | |
| 65 EXPECT_EQ(nullptr, ret->jf_ptr); | |
| 66 } | |
| 67 return ret; | |
| 68 } | 56 } |
| 69 | 57 |
| 70 private: | 58 private: |
| 71 CodeGenUnittestHelper gen_; | 59 template <typename T> |
| 60 void HashValue(base::MD5Context* ctx, const T& value) { | |
| 61 base::MD5Update(ctx, | |
| 62 base::StringPiece(reinterpret_cast<const char*>(&value), | |
| 63 sizeof(value))); | |
| 64 } | |
| 65 | |
| 66 base::MD5Digest digest_; | |
| 72 }; | 67 }; |
| 73 | 68 |
| 74 TEST_P(ProgramTest, OneInstruction) { | 69 const Hash Hash::kZero; |
| 70 | |
| 71 // Sanity check that equality and inequality work on Hash as required. | |
| 72 TEST(CodeGen, HashSanity) { | |
| 73 std::vector<Hash> hashes; | |
| 74 | |
| 75 // Push a bunch of logically distinct hashes. | |
| 76 hashes.push_back(Hash::kZero); | |
| 77 for (int i = 0; i < 4; ++i) { | |
| 78 hashes.push_back(Hash(i & 1, i & 2)); | |
| 79 } | |
| 80 for (int i = 0; i < 16; ++i) { | |
| 81 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8))); | |
| 82 } | |
| 83 for (int i = 0; i < 64; ++i) { | |
| 84 hashes.push_back( | |
| 85 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32))); | |
| 86 } | |
| 87 | |
| 88 for (const Hash& a : hashes) { | |
| 89 for (const Hash& b : hashes) { | |
| 90 // Hashes should equal themselves, but not equal all others. | |
| 91 if (&a == &b) { | |
| 92 EXPECT_EQ(a, b); | |
| 93 } else { | |
| 94 EXPECT_NE(a, b); | |
| 95 } | |
| 96 } | |
| 97 } | |
| 98 } | |
| 99 | |
| 100 // ProgramTest provides a fixture for writing compiling sample | |
| 101 // programs with CodeGen and verifying the linearized output matches | |
| 102 // the input DAG. | |
| 103 class ProgramTest : public ::testing::Test { | |
| 104 protected: | |
| 105 ProgramTest() : gen_(), addr_hashes_() {} | |
| 106 | |
| 107 // MakeInstruction calls CodeGen::MakeInstruction() and associated | |
| 108 // the returned address with a hash of the instruction. | |
| 109 CodeGen::Addr MakeInstruction(uint16_t code, | |
| 110 uint32_t k, | |
| 111 CodeGen::Addr jt = CodeGen::kNullAddr, | |
| 112 CodeGen::Addr jf = CodeGen::kNullAddr) { | |
| 113 CodeGen::Addr res = gen_.MakeInstruction(code, k, jt, jf); | |
| 114 EXPECT_NE(CodeGen::kNullAddr, res); | |
| 115 | |
| 116 Hash digest; | |
| 117 if (code == BPF_JMP + BPF_JA) { | |
| 118 // TODO(mdempsky): Disallow use of JA. | |
| 119 digest = Lookup(jt); | |
| 120 } else { | |
| 121 digest = Hash(code, k, Lookup(jt), Lookup(jf)); | |
| 122 } | |
| 123 auto it = addr_hashes_.insert(std::make_pair(res, digest)); | |
| 124 EXPECT_EQ(digest, it.first->second); | |
| 125 | |
| 126 return res; | |
| 127 } | |
| 128 | |
| 129 // RunTest compiles the program and verifies that the output matches | |
| 130 // what is expected. It should be called at the end of each program | |
| 131 // test case. | |
| 132 void RunTest(CodeGen::Addr head) { | |
| 133 // Compile the program | |
| 134 CodeGen::Program program; | |
| 135 gen_.Compile(head, &program); | |
| 136 | |
| 137 // Walk the program backwards, and compute the hash for each instruction. | |
| 138 std::vector<Hash> prog_hashes(program.size()); | |
| 139 for (size_t i = program.size(); i > 0; --i) { | |
| 140 const sock_filter& insn = program.at(i - 1); | |
| 141 Hash& hash = prog_hashes.at(i - 1); | |
| 142 | |
| 143 if (BPF_CLASS(insn.code) == BPF_JMP) { | |
| 144 if (BPF_OP(insn.code) == BPF_JA) { | |
| 145 // The compiler adds JA instructions as needed, so skip them. | |
| 146 hash = prog_hashes.at(i + insn.k); | |
| 147 } else { | |
| 148 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt), | |
| 149 prog_hashes.at(i + insn.jf)); | |
| 150 } | |
| 151 } else if (BPF_CLASS(insn.code) == BPF_RET) { | |
| 152 hash = Hash(insn.code, insn.k); | |
| 153 } else { | |
| 154 hash = Hash(insn.code, insn.k, prog_hashes.at(i)); | |
| 155 } | |
| 156 } | |
| 157 | |
| 158 EXPECT_EQ(Lookup(head), prog_hashes.at(0)); | |
| 159 } | |
| 160 | |
| 161 private: | |
| 162 const Hash& Lookup(CodeGen::Addr next) { | |
| 163 if (next == CodeGen::kNullAddr) { | |
| 164 return Hash::kZero; | |
| 165 } | |
| 166 auto it = addr_hashes_.find(next); | |
| 167 EXPECT_NE(addr_hashes_.end(), it); | |
| 168 return it->second; | |
|
jln (very slow on Chromium)
2014/11/14 04:54:29
It feels wrong to cause a crash here if EXPECT_NE
mdempsky
2014/11/17 23:00:05
Done.
| |
| 169 } | |
| 170 | |
| 171 CodeGen gen_; | |
| 172 std::map<CodeGen::Addr, Hash> addr_hashes_; | |
| 173 | |
| 174 DISALLOW_COPY_AND_ASSIGN(ProgramTest); | |
| 175 }; | |
| 176 | |
| 177 TEST_F(ProgramTest, OneInstruction) { | |
| 75 // Create the most basic valid BPF program: | 178 // Create the most basic valid BPF program: |
| 76 // RET 0 | 179 // RET 0 |
| 77 CodeGen::Addr head = MakeInstruction(BPF_RET + BPF_K, 0); | 180 CodeGen::Addr head = MakeInstruction(BPF_RET + BPF_K, 0); |
| 78 RunTest(head, NO_FLAGS); | 181 RunTest(head); |
| 79 } | 182 } |
| 80 | 183 |
| 81 TEST_P(ProgramTest, SimpleBranch) { | 184 TEST_F(ProgramTest, SimpleBranch) { |
| 82 // Create a program with a single branch: | 185 // Create a program with a single branch: |
| 83 // JUMP if eq 42 then $0 else $1 | 186 // JUMP if eq 42 then $0 else $1 |
| 84 // 0: RET 1 | 187 // 0: RET 1 |
| 85 // 1: RET 0 | 188 // 1: RET 0 |
| 86 CodeGen::Addr head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, | 189 CodeGen::Addr head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, |
| 87 MakeInstruction(BPF_RET + BPF_K, 1), | 190 MakeInstruction(BPF_RET + BPF_K, 1), |
| 88 MakeInstruction(BPF_RET + BPF_K, 0)); | 191 MakeInstruction(BPF_RET + BPF_K, 0)); |
| 89 RunTest(head, NO_FLAGS); | 192 RunTest(head); |
| 90 } | 193 } |
| 91 | 194 |
| 92 TEST_P(ProgramTest, AtypicalBranch) { | 195 TEST_F(ProgramTest, AtypicalBranch) { |
| 93 // Create a program with a single branch: | 196 // Create a program with a single branch: |
| 94 // JUMP if eq 42 then $0 else $0 | 197 // JUMP if eq 42 then $0 else $0 |
| 95 // 0: RET 0 | 198 // 0: RET 0 |
| 96 | 199 |
| 97 CodeGen::Addr ret = MakeInstruction(BPF_RET + BPF_K, 0); | 200 CodeGen::Addr ret = MakeInstruction(BPF_RET + BPF_K, 0); |
| 98 CodeGen::Addr head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); | 201 CodeGen::Addr head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); |
| 99 | 202 |
| 100 // N.B.: As the instructions in both sides of the branch are already | 203 // N.B.: As the instructions in both sides of the branch are already |
| 101 // the same object, we do not actually have any "mergeable" branches. | 204 // the same object, we do not actually have any "mergeable" branches. |
| 102 // This needs to be reflected in our choice of "flags". | 205 // This needs to be reflected in our choice of "flags". |
| 103 RunTest(head, NO_FLAGS); | 206 RunTest(head); |
| 104 } | 207 } |
| 105 | 208 |
| 106 TEST_P(ProgramTest, Complex) { | 209 TEST_F(ProgramTest, Complex) { |
| 107 // Creates a basic BPF program that we'll use to test some of the code: | 210 // Creates a basic BPF program that we'll use to test some of the code: |
| 108 // JUMP if eq 42 the $0 else $1 (insn6) | 211 // JUMP if eq 42 the $0 else $1 (insn6) |
| 109 // 0: LD 23 (insn5) | 212 // 0: LD 23 (insn5) |
| 110 // 1: JUMP if eq 42 then $2 else $4 (insn4) | 213 // 1: JUMP if eq 42 then $2 else $4 (insn4) |
| 111 // 2: JUMP to $3 (insn2) | 214 // 2: JUMP to $3 (insn2) |
| 112 // 3: LD 42 (insn1) | 215 // 3: LD 42 (insn1) |
| 113 // RET 42 (insn0) | 216 // RET 42 (insn0) |
| 114 // 4: LD 42 (insn3) | 217 // 4: LD 42 (insn3) |
| 115 // RET 42 (insn3+) | 218 // RET 42 (insn3+) |
| 116 CodeGen::Addr insn0 = MakeInstruction(BPF_RET + BPF_K, 42); | 219 CodeGen::Addr insn0 = MakeInstruction(BPF_RET + BPF_K, 42); |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 128 | 231 |
| 129 // Force a basic block that ends in neither a jump instruction nor a return | 232 // Force a basic block that ends in neither a jump instruction nor a return |
| 130 // instruction. It only contains "insn5". This exercises one of the less | 233 // instruction. It only contains "insn5". This exercises one of the less |
| 131 // common code paths in the topo-sort algorithm. | 234 // common code paths in the topo-sort algorithm. |
| 132 // This also gives us a diamond-shaped pattern in our graph, which stresses | 235 // This also gives us a diamond-shaped pattern in our graph, which stresses |
| 133 // another aspect of the topo-sort algorithm (namely, the ability to | 236 // another aspect of the topo-sort algorithm (namely, the ability to |
| 134 // correctly count the incoming branches for subtrees that are not disjunct). | 237 // correctly count the incoming branches for subtrees that are not disjunct). |
| 135 CodeGen::Addr insn6 = | 238 CodeGen::Addr insn6 = |
| 136 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); | 239 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); |
| 137 | 240 |
| 138 RunTest(insn6, HAS_MERGEABLE_TAILS); | 241 RunTest(insn6); |
| 139 } | 242 } |
| 140 | 243 |
| 141 TEST_P(ProgramTest, ConfusingTails) { | 244 TEST_F(ProgramTest, ConfusingTails) { |
| 142 // This simple program demonstrates https://crbug.com/351103/ | 245 // This simple program demonstrates https://crbug.com/351103/ |
| 143 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could | 246 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could |
| 144 // be tempted to merge them since they are the same. However, they are | 247 // be tempted to merge them since they are the same. However, they are |
| 145 // not mergeable because they fall-through to non semantically equivalent | 248 // not mergeable because they fall-through to non semantically equivalent |
| 146 // blocks. | 249 // blocks. |
| 147 // Without the fix for this bug, this program should trigger the check in | 250 // Without the fix for this bug, this program should trigger the check in |
| 148 // CompileAndCompare: the serialized graphs from the program and its compiled | 251 // CompileAndCompare: the serialized graphs from the program and its compiled |
| 149 // version will differ. | 252 // version will differ. |
| 150 // | 253 // |
| 151 // 0) LOAD 1 // ??? | 254 // 0) LOAD 1 // ??? |
| 152 // 1) if A == 0x1; then JMP 2 else JMP 3 | 255 // 1) if A == 0x1; then JMP 2 else JMP 3 |
| 153 // 2) LOAD 0 // System call number | 256 // 2) LOAD 0 // System call number |
| 154 // 3) if A == 0x2; then JMP 4 else JMP 5 | 257 // 3) if A == 0x2; then JMP 4 else JMP 5 |
| 155 // 4) LOAD 0 // System call number | 258 // 4) LOAD 0 // System call number |
| 156 // 5) if A == 0x1; then JMP 6 else JMP 7 | 259 // 5) if A == 0x1; then JMP 6 else JMP 7 |
| 157 // 6) RET 0 | 260 // 6) RET 0 |
| 158 // 7) RET 1 | 261 // 7) RET 1 |
| 159 | 262 |
| 160 CodeGen::Addr i7 = MakeInstruction(BPF_RET + BPF_K, 1); | 263 CodeGen::Addr i7 = MakeInstruction(BPF_RET + BPF_K, 1); |
| 161 CodeGen::Addr i6 = MakeInstruction(BPF_RET + BPF_K, 0); | 264 CodeGen::Addr i6 = MakeInstruction(BPF_RET + BPF_K, 0); |
| 162 CodeGen::Addr i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | 265 CodeGen::Addr i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); |
| 163 CodeGen::Addr i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | 266 CodeGen::Addr i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); |
| 164 CodeGen::Addr i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 267 CodeGen::Addr i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
| 165 CodeGen::Addr i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | 268 CodeGen::Addr i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); |
| 166 CodeGen::Addr i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 269 CodeGen::Addr i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
| 167 CodeGen::Addr i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 270 CodeGen::Addr i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
| 168 | 271 |
| 169 RunTest(i0, NO_FLAGS); | 272 RunTest(i0); |
| 170 } | 273 } |
| 171 | 274 |
| 172 TEST_P(ProgramTest, ConfusingTailsBasic) { | 275 TEST_F(ProgramTest, ConfusingTailsBasic) { |
| 173 // Without the fix for https://crbug.com/351103/, (see | 276 // Without the fix for https://crbug.com/351103/, (see |
| 174 // SampleProgramConfusingTails()), this would generate a cyclic graph and | 277 // SampleProgramConfusingTails()), this would generate a cyclic graph and |
| 175 // crash as the two "LOAD 0" instructions would get merged. | 278 // crash as the two "LOAD 0" instructions would get merged. |
| 176 // | 279 // |
| 177 // 0) LOAD 1 // ??? | 280 // 0) LOAD 1 // ??? |
| 178 // 1) if A == 0x1; then JMP 2 else JMP 3 | 281 // 1) if A == 0x1; then JMP 2 else JMP 3 |
| 179 // 2) LOAD 0 // System call number | 282 // 2) LOAD 0 // System call number |
| 180 // 3) if A == 0x2; then JMP 4 else JMP 5 | 283 // 3) if A == 0x2; then JMP 4 else JMP 5 |
| 181 // 4) LOAD 0 // System call number | 284 // 4) LOAD 0 // System call number |
| 182 // 5) RET 1 | 285 // 5) RET 1 |
| 183 | 286 |
| 184 CodeGen::Addr i5 = MakeInstruction(BPF_RET + BPF_K, 1); | 287 CodeGen::Addr i5 = MakeInstruction(BPF_RET + BPF_K, 1); |
| 185 CodeGen::Addr i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); | 288 CodeGen::Addr i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); |
| 186 CodeGen::Addr i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 289 CodeGen::Addr i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
| 187 CodeGen::Addr i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); | 290 CodeGen::Addr i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); |
| 188 CodeGen::Addr i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 291 CodeGen::Addr i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
| 189 CodeGen::Addr i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 292 CodeGen::Addr i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
| 190 | 293 |
| 191 RunTest(i0, NO_FLAGS); | 294 RunTest(i0); |
| 192 } | 295 } |
| 193 | 296 |
| 194 TEST_P(ProgramTest, ConfusingTailsMergeable) { | 297 TEST_F(ProgramTest, ConfusingTailsMergeable) { |
| 195 // This is similar to SampleProgramConfusingTails(), except that | 298 // This is similar to SampleProgramConfusingTails(), except that |
| 196 // instructions 2 and 4 are now RET instructions. | 299 // instructions 2 and 4 are now RET instructions. |
| 197 // In PointerCompare(), this exercises the path where two blocks are of the | 300 // In PointerCompare(), this exercises the path where two blocks are of the |
| 198 // same length and identical and the last instruction is a JMP or RET, so the | 301 // same length and identical and the last instruction is a JMP or RET, so the |
| 199 // following blocks don't need to be looked at and the blocks are mergeable. | 302 // following blocks don't need to be looked at and the blocks are mergeable. |
| 200 // | 303 // |
| 201 // 0) LOAD 1 // ??? | 304 // 0) LOAD 1 // ??? |
| 202 // 1) if A == 0x1; then JMP 2 else JMP 3 | 305 // 1) if A == 0x1; then JMP 2 else JMP 3 |
| 203 // 2) RET 42 | 306 // 2) RET 42 |
| 204 // 3) if A == 0x2; then JMP 4 else JMP 5 | 307 // 3) if A == 0x2; then JMP 4 else JMP 5 |
| 205 // 4) RET 42 | 308 // 4) RET 42 |
| 206 // 5) if A == 0x1; then JMP 6 else JMP 7 | 309 // 5) if A == 0x1; then JMP 6 else JMP 7 |
| 207 // 6) RET 0 | 310 // 6) RET 0 |
| 208 // 7) RET 1 | 311 // 7) RET 1 |
| 209 | 312 |
| 210 CodeGen::Addr i7 = MakeInstruction(BPF_RET + BPF_K, 1); | 313 CodeGen::Addr i7 = MakeInstruction(BPF_RET + BPF_K, 1); |
| 211 CodeGen::Addr i6 = MakeInstruction(BPF_RET + BPF_K, 0); | 314 CodeGen::Addr i6 = MakeInstruction(BPF_RET + BPF_K, 0); |
| 212 CodeGen::Addr i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); | 315 CodeGen::Addr i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); |
| 213 CodeGen::Addr i4 = MakeInstruction(BPF_RET + BPF_K, 42); | 316 CodeGen::Addr i4 = MakeInstruction(BPF_RET + BPF_K, 42); |
| 214 CodeGen::Addr i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); | 317 CodeGen::Addr i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); |
| 215 CodeGen::Addr i2 = MakeInstruction(BPF_RET + BPF_K, 42); | 318 CodeGen::Addr i2 = MakeInstruction(BPF_RET + BPF_K, 42); |
| 216 CodeGen::Addr i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); | 319 CodeGen::Addr i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); |
| 217 CodeGen::Addr i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); | 320 CodeGen::Addr i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); |
| 218 | 321 |
| 219 RunTest(i0, HAS_MERGEABLE_TAILS); | 322 RunTest(i0); |
| 220 } | 323 } |
| 221 | 324 |
| 222 void MakeInstruction(CodeGenUnittestHelper* codegen, | |
| 223 CodeGen::Addr program, | |
| 224 int) { | |
| 225 // Nothing to do here | |
| 226 } | |
| 227 | |
| 228 void FindBranchTargets(CodeGenUnittestHelper* codegen, CodeGen::Addr prg, int) { | |
| 229 BranchTargets branch_targets; | |
| 230 codegen->FindBranchTargets(*prg, &branch_targets); | |
| 231 | |
| 232 // Verifying the general properties that should be true for every | |
| 233 // well-formed BPF program. | |
| 234 // Perform a depth-first traversal of the BPF program an verify that all | |
| 235 // targets of BPF_JMP instructions are represented in the "branch_targets". | |
| 236 // At the same time, compute a set of both the branch targets and all the | |
| 237 // instructions in the program. | |
| 238 std::vector<CodeGen::Addr> stack; | |
| 239 std::set<CodeGen::Addr> all_instructions; | |
| 240 std::set<CodeGen::Addr> target_instructions; | |
| 241 BranchTargets::const_iterator end = branch_targets.end(); | |
| 242 for (CodeGen::Addr insn = prg;;) { | |
| 243 all_instructions.insert(insn); | |
| 244 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
| 245 target_instructions.insert(insn->jt_ptr); | |
| 246 ASSERT_TRUE(insn->jt_ptr != NULL); | |
| 247 ASSERT_TRUE(branch_targets.find(insn->jt_ptr) != end); | |
| 248 if (BPF_OP(insn->code) != BPF_JA) { | |
| 249 target_instructions.insert(insn->jf_ptr); | |
| 250 ASSERT_TRUE(insn->jf_ptr != NULL); | |
| 251 ASSERT_TRUE(branch_targets.find(insn->jf_ptr) != end); | |
| 252 stack.push_back(insn->jf_ptr); | |
| 253 } | |
| 254 insn = insn->jt_ptr; | |
| 255 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
| 256 ASSERT_TRUE(insn->next == NULL); | |
| 257 if (stack.empty()) { | |
| 258 break; | |
| 259 } | |
| 260 insn = stack.back(); | |
| 261 stack.pop_back(); | |
| 262 } else { | |
| 263 ASSERT_TRUE(insn->next != NULL); | |
| 264 insn = insn->next; | |
| 265 } | |
| 266 } | |
| 267 ASSERT_TRUE(target_instructions.size() == branch_targets.size()); | |
| 268 | |
| 269 // We can now subtract the set of the branch targets from the set of all | |
| 270 // instructions. This gives us a set with the instructions that nobody | |
| 271 // ever jumps to. Verify that they are no included in the | |
| 272 // "branch_targets" that FindBranchTargets() computed for us. | |
| 273 Instructions non_target_instructions(all_instructions.size() - | |
| 274 target_instructions.size()); | |
| 275 set_difference(all_instructions.begin(), | |
| 276 all_instructions.end(), | |
| 277 target_instructions.begin(), | |
| 278 target_instructions.end(), | |
| 279 non_target_instructions.begin()); | |
| 280 for (Instructions::const_iterator iter = non_target_instructions.begin(); | |
| 281 iter != non_target_instructions.end(); | |
| 282 ++iter) { | |
| 283 ASSERT_TRUE(branch_targets.find(*iter) == end); | |
| 284 } | |
| 285 } | |
| 286 | |
| 287 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper* codegen, | |
| 288 CodeGen::Addr prg, | |
| 289 int) { | |
| 290 BranchTargets branch_targets; | |
| 291 codegen->FindBranchTargets(*prg, &branch_targets); | |
| 292 TargetsToBlocks all_blocks; | |
| 293 BasicBlock* first_block = | |
| 294 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); | |
| 295 ASSERT_TRUE(first_block != NULL); | |
| 296 ASSERT_TRUE(first_block->instructions.size() > 0); | |
| 297 CodeGen::Addr first_insn = first_block->instructions[0]; | |
| 298 | |
| 299 // Basic blocks are supposed to start with a branch target and end with | |
| 300 // either a jump or a return instruction. It can also end, if the next | |
| 301 // instruction forms the beginning of a new basic block. There should be | |
| 302 // no other jumps or return instructions in the middle of a basic block. | |
| 303 for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin(); | |
| 304 bb_iter != all_blocks.end(); | |
| 305 ++bb_iter) { | |
| 306 BasicBlock* bb = bb_iter->second; | |
| 307 ASSERT_TRUE(bb != NULL); | |
| 308 ASSERT_TRUE(bb->instructions.size() > 0); | |
| 309 CodeGen::Addr insn = bb->instructions[0]; | |
| 310 ASSERT_TRUE(insn == first_insn || | |
| 311 branch_targets.find(insn) != branch_targets.end()); | |
| 312 for (Instructions::const_iterator insn_iter = bb->instructions.begin();;) { | |
| 313 insn = *insn_iter; | |
| 314 if (++insn_iter != bb->instructions.end()) { | |
| 315 ASSERT_TRUE(BPF_CLASS(insn->code) != BPF_JMP); | |
| 316 ASSERT_TRUE(BPF_CLASS(insn->code) != BPF_RET); | |
| 317 } else { | |
| 318 ASSERT_TRUE(BPF_CLASS(insn->code) == BPF_JMP || | |
| 319 BPF_CLASS(insn->code) == BPF_RET || | |
| 320 branch_targets.find(insn->next) != branch_targets.end()); | |
| 321 break; | |
| 322 } | |
| 323 ASSERT_TRUE(branch_targets.find(*insn_iter) == branch_targets.end()); | |
| 324 } | |
| 325 } | |
| 326 } | |
| 327 | |
| 328 void MergeTails(CodeGenUnittestHelper* codegen, CodeGen::Addr prg, int flags) { | |
| 329 BranchTargets branch_targets; | |
| 330 codegen->FindBranchTargets(*prg, &branch_targets); | |
| 331 TargetsToBlocks all_blocks; | |
| 332 BasicBlock* first_block = | |
| 333 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks); | |
| 334 | |
| 335 // The shape of our graph and thus the function of our program should | |
| 336 // still be unchanged after we run MergeTails(). We verify this by | |
| 337 // serializing the graph and verifying that it is still the same. | |
| 338 // We also verify that at least some of the edges changed because of | |
| 339 // tail merging. | |
| 340 std::string graph[2]; | |
| 341 std::string edges[2]; | |
| 342 | |
| 343 // The loop executes twice. After the first run, we call MergeTails() on | |
| 344 // our graph. | |
| 345 for (int i = 0;;) { | |
| 346 // Traverse the entire program in depth-first order. | |
| 347 std::vector<BasicBlock*> stack; | |
| 348 for (BasicBlock* bb = first_block;;) { | |
| 349 // Serialize the instructions in this basic block. In general, we only | |
| 350 // need to serialize "code" and "k"; except for a BPF_JA instruction | |
| 351 // where "k" isn't set. | |
| 352 // The stream of instructions should be unchanged after MergeTails(). | |
| 353 for (Instructions::const_iterator iter = bb->instructions.begin(); | |
| 354 iter != bb->instructions.end(); | |
| 355 ++iter) { | |
| 356 graph[i].append(reinterpret_cast<char*>(&(*iter)->code), | |
| 357 sizeof((*iter)->code)); | |
| 358 if (BPF_CLASS((*iter)->code) != BPF_JMP || | |
| 359 BPF_OP((*iter)->code) != BPF_JA) { | |
| 360 graph[i].append(reinterpret_cast<char*>(&(*iter)->k), | |
| 361 sizeof((*iter)->k)); | |
| 362 } | |
| 363 } | |
| 364 | |
| 365 // Also serialize the addresses the basic blocks as we encounter them. | |
| 366 // This will change as basic blocks are coalesed by MergeTails(). | |
| 367 edges[i].append(reinterpret_cast<char*>(&bb), sizeof(bb)); | |
| 368 | |
| 369 // Depth-first traversal of the graph. We only ever need to look at the | |
| 370 // very last instruction in the basic block, as that is the only one that | |
| 371 // can change code flow. | |
| 372 CodeGen::Addr insn = bb->instructions.back(); | |
| 373 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
| 374 // For jump instructions, we need to remember the "false" branch while | |
| 375 // traversing the "true" branch. This is not necessary for BPF_JA which | |
| 376 // only has a single branch. | |
| 377 if (BPF_OP(insn->code) != BPF_JA) { | |
| 378 stack.push_back(all_blocks[insn->jf_ptr]); | |
| 379 } | |
| 380 bb = all_blocks[insn->jt_ptr]; | |
| 381 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
| 382 // After a BPF_RET, see if we need to back track. | |
| 383 if (stack.empty()) { | |
| 384 break; | |
| 385 } | |
| 386 bb = stack.back(); | |
| 387 stack.pop_back(); | |
| 388 } else { | |
| 389 // For "normal" instructions, just follow to the next basic block. | |
| 390 bb = all_blocks[insn->next]; | |
| 391 } | |
| 392 } | |
| 393 | |
| 394 // Our loop runs exactly two times. | |
| 395 if (++i > 1) { | |
| 396 break; | |
| 397 } | |
| 398 codegen->MergeTails(&all_blocks); | |
| 399 } | |
| 400 ASSERT_TRUE(graph[0] == graph[1]); | |
| 401 if (flags & HAS_MERGEABLE_TAILS) { | |
| 402 ASSERT_TRUE(edges[0] != edges[1]); | |
| 403 } else { | |
| 404 ASSERT_TRUE(edges[0] == edges[1]); | |
| 405 } | |
| 406 } | |
| 407 | |
| 408 void CompileAndCompare(CodeGenUnittestHelper* codegen, CodeGen::Addr prg, int) { | |
| 409 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it | |
| 410 // detects a problem. Typically, if anything goes wrong, this looks to the | |
| 411 // TopoSort algorithm as if there had been cycles in the input data. | |
| 412 // This provides a pretty good unittest. | |
| 413 // We hand-crafted the program returned by SampleProgram() to exercise | |
| 414 // several of the more interesting code-paths. See comments in | |
| 415 // SampleProgram() for details. | |
| 416 // In addition to relying on the internal consistency checks in the compiler, | |
| 417 // we also serialize the graph and the resulting BPF program and compare | |
| 418 // them. With the exception of BPF_JA instructions that might have been | |
| 419 // inserted, both instruction streams should be equivalent. | |
| 420 // As Compile() modifies the instructions, we have to serialize the graph | |
| 421 // before calling Compile(). | |
| 422 std::string source; | |
| 423 Instructions source_stack; | |
| 424 for (const Instruction* insn = prg, *next; insn; insn = next) { | |
| 425 if (BPF_CLASS(insn->code) == BPF_JMP) { | |
| 426 if (BPF_OP(insn->code) == BPF_JA) { | |
| 427 // Do not serialize BPF_JA instructions (see above). | |
| 428 next = insn->jt_ptr; | |
| 429 continue; | |
| 430 } else { | |
| 431 source_stack.push_back(insn->jf_ptr); | |
| 432 next = insn->jt_ptr; | |
| 433 } | |
| 434 } else if (BPF_CLASS(insn->code) == BPF_RET) { | |
| 435 if (source_stack.empty()) { | |
| 436 next = NULL; | |
| 437 } else { | |
| 438 next = source_stack.back(); | |
| 439 source_stack.pop_back(); | |
| 440 } | |
| 441 } else { | |
| 442 next = insn->next; | |
| 443 } | |
| 444 // Only serialize "code" and "k". That's all the information we need to | |
| 445 // compare. The rest of the information is encoded in the order of | |
| 446 // instructions. | |
| 447 source.append(reinterpret_cast<const char*>(&insn->code), | |
| 448 sizeof(insn->code)); | |
| 449 source.append(reinterpret_cast<const char*>(&insn->k), sizeof(insn->k)); | |
| 450 } | |
| 451 | |
| 452 // Compile the program | |
| 453 CodeGen::Program bpf; | |
| 454 codegen->Compile(prg, &bpf); | |
| 455 | |
| 456 // Serialize the resulting BPF instructions. | |
| 457 std::string assembly; | |
| 458 std::vector<int> assembly_stack; | |
| 459 for (int idx = 0; idx >= 0;) { | |
| 460 ASSERT_TRUE(idx < (int)bpf.size()); | |
| 461 struct sock_filter& insn = bpf[idx]; | |
| 462 if (BPF_CLASS(insn.code) == BPF_JMP) { | |
| 463 if (BPF_OP(insn.code) == BPF_JA) { | |
| 464 // Do not serialize BPF_JA instructions (see above). | |
| 465 idx += insn.k + 1; | |
| 466 continue; | |
| 467 } else { | |
| 468 assembly_stack.push_back(idx + insn.jf + 1); | |
| 469 idx += insn.jt + 1; | |
| 470 } | |
| 471 } else if (BPF_CLASS(insn.code) == BPF_RET) { | |
| 472 if (assembly_stack.empty()) { | |
| 473 idx = -1; | |
| 474 } else { | |
| 475 idx = assembly_stack.back(); | |
| 476 assembly_stack.pop_back(); | |
| 477 } | |
| 478 } else { | |
| 479 ++idx; | |
| 480 } | |
| 481 // Serialize the same information that we serialized before compilation. | |
| 482 assembly.append(reinterpret_cast<char*>(&insn.code), sizeof(insn.code)); | |
| 483 assembly.append(reinterpret_cast<char*>(&insn.k), sizeof(insn.k)); | |
| 484 } | |
| 485 ASSERT_TRUE(source == assembly); | |
| 486 } | |
| 487 | |
| 488 const ProgramTestFunc kProgramTestFuncs[] = { | |
| 489 MakeInstruction, | |
| 490 FindBranchTargets, | |
| 491 CutGraphIntoBasicBlocks, | |
| 492 MergeTails, | |
| 493 CompileAndCompare, | |
| 494 }; | |
| 495 | |
| 496 INSTANTIATE_TEST_CASE_P(CodeGen, | |
| 497 ProgramTest, | |
| 498 ::testing::ValuesIn(kProgramTestFuncs)); | |
| 499 | |
| 500 } // namespace | 325 } // namespace |
| 501 | |
| 502 } // namespace sandbox | 326 } // namespace sandbox |
| OLD | NEW |