Skip to content

Commit f46a8f3

Browse files
committed
Cleanup Wallace Tree multiplier, add data
Uses the optimised full-adder encoding when the #define is set (which is the case by default). Comments now detail the performance variability.
1 parent 5021afa commit f46a8f3

File tree

1 file changed

+136
-5
lines changed

1 file changed

+136
-5
lines changed

src/solvers/flattening/bv_utils.cpp

+136-5
Original file line numberDiff line numberDiff line change
@@ -664,14 +664,15 @@ bvt bv_utilst::wallace_tree(const std::vector<bvt> &pps)
664664
INVARIANT(a.size() == b.size(), "groups should be of equal size");
665665
INVARIANT(a.size() == c.size(), "groups should be of equal size");
666666

667-
bvt s(a.size()), t(a.size());
667+
bvt s, t(a.size(), const_literal(false));
668+
s.reserve(a.size());
668669

669670
for(std::size_t bit=0; bit<a.size(); bit++)
670671
{
671-
// \todo reformulate using full_adder
672-
s[bit]=prop.lxor(a[bit], prop.lxor(b[bit], c[bit]));
673-
t[bit]=(bit==0)?const_literal(false):
674-
carry(a[bit-1], b[bit-1], c[bit-1]);
672+
literalt carry_out;
673+
s.push_back(full_adder(a[bit], b[bit], c[bit], carry_out));
674+
if(bit + 1 < a.size())
675+
t[bit + 1] = carry_out;
675676
}
676677

677678
new_pps.push_back(std::move(s));
@@ -691,6 +692,136 @@ bvt bv_utilst::wallace_tree(const std::vector<bvt> &pps)
691692
// been observed to go up by 5%-10%, and on some models even by 20%.
692693
// #define WALLACE_TREE
693694

695+
// The following examples demonstrate the performance differences (with a
696+
// time-out of 7200 seconds):
697+
//
698+
// #ifndef BITWIDTH
699+
// #define BITWIDTH 8
700+
// #endif
701+
//
702+
// int main()
703+
// {
704+
// __CPROVER_bitvector[BITWIDTH] a, b;
705+
// __CPROVER_assert(a * 3 == a + a + a, "multiplication by 3");
706+
// __CPROVER_assert(3 * a == a + a + a, "multiplication by 3");
707+
// __CPROVER_bitvector[BITWIDTH] ab = a * b;
708+
// __CPROVER_bitvector[BITWIDTH * 2] ab_check =
709+
// (__CPROVER_bitvector[BITWIDTH * 2])a *
710+
// (__CPROVER_bitvector[BITWIDTH * 2])b;
711+
// __CPROVER_assert(
712+
// ab == (__CPROVER_bitvector[BITWIDTH])ab_check, "should pass");
713+
// }
714+
//
715+
// |----|-------------------|-------------------|
716+
// | | CaDiCaL | MiniSat2 |
717+
// |----|-------------------|-------------------|
718+
// | n | No tree | Wallace | No tree | Wallace |
719+
// |----|---------|---------|---------|---------|
720+
// | 1 | 0.00 | 0.00 | 0.00 | 0.00 |
721+
// | 2 | 0.00 | 0.00 | 0.00 | 0.00 |
722+
// | 3 | 0.01 | 0.01 | 0.00 | 0.00 |
723+
// | 4 | 0.01 | 0.01 | 0.01 | 0.01 |
724+
// | 5 | 0.04 | 0.04 | 0.01 | 0.01 |
725+
// | 6 | 0.11 | 0.13 | 0.04 | 0.05 |
726+
// | 7 | 0.28 | 0.46 | 0.15 | 0.27 |
727+
// | 8 | 0.50 | 1.55 | 0.90 | 1.06 |
728+
// | 9 | 2.22 | 3.63 | 3.40 | 5.85 |
729+
// | 10 | 2.79 | 4.81 | 4.32 | 28.41 |
730+
// | 11 | 8.48 | 4.45 | 38.24 | 98.55 |
731+
// | 12 | 14.52 | 4.86 | 115.00 | 140.11 |
732+
// | 13 | 33.62 | 5.56 | 210.24 | 805.59 |
733+
// | 14 | 37.23 | 6.11 | 776.75 | 689.96 |
734+
// | 15 | 108.65 | 7.86 | 1048.92 | 1421.74 |
735+
// | 16 | 102.61 | 14.08 | 1628.01 | timeout |
736+
// | 17 | 117.89 | 18.53 | 4148.73 | timeout |
737+
// | 18 | 209.40 | 7.97 | 2760.51 | timeout |
738+
// | 19 | 168.21 | 18.04 | 2514.21 | timeout |
739+
// | 20 | 566.76 | 12.68 | 2609.09 | timeout |
740+
// | 21 | 786.31 | 23.80 | 2232.77 | timeout |
741+
// | 22 | 817.74 | 17.64 | 3866.70 | timeout |
742+
// | 23 | 1102.76 | 24.19 | timeout | timeout |
743+
// | 24 | 1319.59 | 27.37 | timeout | timeout |
744+
// | 25 | 1786.11 | 27.10 | timeout | timeout |
745+
// | 26 | 1952.18 | 31.08 | timeout | timeout |
746+
// | 27 | 6908.48 | 27.92 | timeout | timeout |
747+
// | 28 | 6919.34 | 36.63 | timeout | timeout |
748+
// | 29 | timeout | 27.95 | timeout | timeout |
749+
// | 30 | timeout | 36.94 | timeout | timeout |
750+
// | 31 | timeout | 38.41 | timeout | timeout |
751+
// | 32 | timeout | 33.06 | timeout | timeout |
752+
// |----|---------|---------|---------|---------|
753+
//
754+
// In summary, Wallace tree reduction is substantially more efficient with
755+
// CaDiCaL on the above code for all bit width >= 11, but somewhat slower with
756+
// MiniSat.
757+
//
758+
// #ifndef BITWIDTH
759+
// #define BITWIDTH 8
760+
// #endif
761+
//
762+
// int main()
763+
// {
764+
// __CPROVER_bitvector[BITWIDTH] a, b, c, ab, bc, abc;
765+
// ab = a * b;
766+
// __CPROVER_bitvector[BITWIDTH * 2] ab_check =
767+
// (__CPROVER_bitvector[BITWIDTH * 2])a *
768+
// (__CPROVER_bitvector[BITWIDTH * 2])b;
769+
// __CPROVER_assume(ab == ab_check);
770+
// bc = b * c;
771+
// __CPROVER_bitvector[BITWIDTH * 2] bc_check =
772+
// (__CPROVER_bitvector[BITWIDTH * 2])b *
773+
// (__CPROVER_bitvector[BITWIDTH * 2])c;
774+
// __CPROVER_assume(bc == bc_check);
775+
// abc = ab * c;
776+
// __CPROVER_bitvector[BITWIDTH * 2] abc_check =
777+
// (__CPROVER_bitvector[BITWIDTH * 2])ab *
778+
// (__CPROVER_bitvector[BITWIDTH * 2])c;
779+
// __CPROVER_assume(abc == abc_check);
780+
// __CPROVER_assert(abc == a * bc, "associativity");
781+
// }
782+
//
783+
// |----|-------------------|-------------------|
784+
// | | CaDiCaL | MiniSat2 |
785+
// |----|-------------------|-------------------|
786+
// | n | No tree | Wallace | No tree | Wallace |
787+
// |----|---------|---------|---------|---------|
788+
// | 1 | 0.00 | 0.00 | 0.01 | 0.01 |
789+
// | 2 | 0.01 | 0.01 | 0.01 | 0.01 |
790+
// | 3 | 0.02 | 0.03 | 0.01 | 0.01 |
791+
// | 4 | 0.05 | 0.07 | 0.02 | 0.02 |
792+
// | 5 | 0.17 | 0.18 | 0.04 | 0.04 |
793+
// | 6 | 0.50 | 0.63 | 0.13 | 0.14 |
794+
// | 7 | 1.01 | 1.15 | 0.43 | 0.47 |
795+
// | 8 | 1.56 | 1.76 | 3.35 | 2.39 |
796+
// | 9 | 2.07 | 2.48 | 11.20 | 12.64 |
797+
// | 10 | 3.58 | 3.88 | 20.23 | 23.49 |
798+
// | 11 | 5.84 | 7.46 | 49.32 | 39.86 |
799+
// | 12 | 11.71 | 16.85 | 69.32 | 156.57 |
800+
// | 13 | 33.22 | 41.95 | 250.91 | 294.73 |
801+
// | 14 | 76.27 | 109.59 | 226.98 | 259.84 |
802+
// | 15 | 220.01 | 330.10 | 783.73 | 1160.47 |
803+
// | 16 | 591.91 | 981.16 | 2712.20 | 4286.60 |
804+
// | 17 | 1634.97 | 2574.81 | timeout | timeout |
805+
// | 18 | 4680.16 | timeout | timeout | timeout |
806+
// | 19 | timeout | timeout | timeout | timeout |
807+
// | 20 | timeout | timeout | timeout | timeout |
808+
// | 21 | timeout | timeout | timeout | timeout |
809+
// | 22 | timeout | timeout | timeout | timeout |
810+
// | 23 | timeout | timeout | timeout | timeout |
811+
// | 24 | timeout | timeout | timeout | timeout |
812+
// | 25 | timeout | timeout | timeout | timeout |
813+
// | 26 | timeout | timeout | timeout | timeout |
814+
// | 27 | timeout | timeout | timeout | timeout |
815+
// | 28 | timeout | timeout | timeout | timeout |
816+
// | 29 | timeout | timeout | timeout | timeout |
817+
// | 30 | timeout | timeout | timeout | timeout |
818+
// | 31 | timeout | timeout | timeout | timeout |
819+
// | 32 | timeout | timeout | timeout | timeout |
820+
// |----|---------|---------|---------|---------|
821+
//
822+
// In summary, Wallace tree reduction is slower for both solvers and all
823+
// bit widths (except BITWIDTH==8 with MiniSat2).
824+
694825
bvt bv_utilst::unsigned_multiplier(const bvt &_op0, const bvt &_op1)
695826
{
696827
bvt op0=_op0, op1=_op1;

0 commit comments

Comments
 (0)