@@ -664,14 +664,15 @@ bvt bv_utilst::wallace_tree(const std::vector<bvt> &pps)
664
664
INVARIANT (a.size () == b.size (), " groups should be of equal size" );
665
665
INVARIANT (a.size () == c.size (), " groups should be of equal size" );
666
666
667
- bvt s (a.size ()), t (a.size ());
667
+ bvt s, t (a.size (), const_literal (false ));
668
+ s.reserve (a.size ());
668
669
669
670
for (std::size_t bit=0 ; bit<a.size (); bit++)
670
671
{
671
- // \todo reformulate using full_adder
672
- s[bit]=prop. lxor ( a[bit], prop. lxor ( b[bit], c[bit]));
673
- t[bit]= (bit== 0 )? const_literal ( false ):
674
- carry (a [bit- 1 ], b[bit- 1 ], c[bit- 1 ]) ;
672
+ literalt carry_out;
673
+ s. push_back ( full_adder ( a[bit], b[bit], c[bit], carry_out ));
674
+ if (bit + 1 < a. size ())
675
+ t [bit + 1 ] = carry_out ;
675
676
}
676
677
677
678
new_pps.push_back (std::move (s));
@@ -691,6 +692,136 @@ bvt bv_utilst::wallace_tree(const std::vector<bvt> &pps)
691
692
// been observed to go up by 5%-10%, and on some models even by 20%.
692
693
// #define WALLACE_TREE
693
694
695
+ // The following examples demonstrate the performance differences (with a
696
+ // time-out of 7200 seconds):
697
+ //
698
+ // #ifndef BITWIDTH
699
+ // #define BITWIDTH 8
700
+ // #endif
701
+ //
702
+ // int main()
703
+ // {
704
+ // __CPROVER_bitvector[BITWIDTH] a, b;
705
+ // __CPROVER_assert(a * 3 == a + a + a, "multiplication by 3");
706
+ // __CPROVER_assert(3 * a == a + a + a, "multiplication by 3");
707
+ // __CPROVER_bitvector[BITWIDTH] ab = a * b;
708
+ // __CPROVER_bitvector[BITWIDTH * 2] ab_check =
709
+ // (__CPROVER_bitvector[BITWIDTH * 2])a *
710
+ // (__CPROVER_bitvector[BITWIDTH * 2])b;
711
+ // __CPROVER_assert(
712
+ // ab == (__CPROVER_bitvector[BITWIDTH])ab_check, "should pass");
713
+ // }
714
+ //
715
+ // |----|-------------------|-------------------|
716
+ // | | CaDiCaL | MiniSat2 |
717
+ // |----|-------------------|-------------------|
718
+ // | n | No tree | Wallace | No tree | Wallace |
719
+ // |----|---------|---------|---------|---------|
720
+ // | 1 | 0.00 | 0.00 | 0.00 | 0.00 |
721
+ // | 2 | 0.00 | 0.00 | 0.00 | 0.00 |
722
+ // | 3 | 0.01 | 0.01 | 0.00 | 0.00 |
723
+ // | 4 | 0.01 | 0.01 | 0.01 | 0.01 |
724
+ // | 5 | 0.04 | 0.04 | 0.01 | 0.01 |
725
+ // | 6 | 0.11 | 0.13 | 0.04 | 0.05 |
726
+ // | 7 | 0.28 | 0.46 | 0.15 | 0.27 |
727
+ // | 8 | 0.50 | 1.55 | 0.90 | 1.06 |
728
+ // | 9 | 2.22 | 3.63 | 3.40 | 5.85 |
729
+ // | 10 | 2.79 | 4.81 | 4.32 | 28.41 |
730
+ // | 11 | 8.48 | 4.45 | 38.24 | 98.55 |
731
+ // | 12 | 14.52 | 4.86 | 115.00 | 140.11 |
732
+ // | 13 | 33.62 | 5.56 | 210.24 | 805.59 |
733
+ // | 14 | 37.23 | 6.11 | 776.75 | 689.96 |
734
+ // | 15 | 108.65 | 7.86 | 1048.92 | 1421.74 |
735
+ // | 16 | 102.61 | 14.08 | 1628.01 | timeout |
736
+ // | 17 | 117.89 | 18.53 | 4148.73 | timeout |
737
+ // | 18 | 209.40 | 7.97 | 2760.51 | timeout |
738
+ // | 19 | 168.21 | 18.04 | 2514.21 | timeout |
739
+ // | 20 | 566.76 | 12.68 | 2609.09 | timeout |
740
+ // | 21 | 786.31 | 23.80 | 2232.77 | timeout |
741
+ // | 22 | 817.74 | 17.64 | 3866.70 | timeout |
742
+ // | 23 | 1102.76 | 24.19 | timeout | timeout |
743
+ // | 24 | 1319.59 | 27.37 | timeout | timeout |
744
+ // | 25 | 1786.11 | 27.10 | timeout | timeout |
745
+ // | 26 | 1952.18 | 31.08 | timeout | timeout |
746
+ // | 27 | 6908.48 | 27.92 | timeout | timeout |
747
+ // | 28 | 6919.34 | 36.63 | timeout | timeout |
748
+ // | 29 | timeout | 27.95 | timeout | timeout |
749
+ // | 30 | timeout | 36.94 | timeout | timeout |
750
+ // | 31 | timeout | 38.41 | timeout | timeout |
751
+ // | 32 | timeout | 33.06 | timeout | timeout |
752
+ // |----|---------|---------|---------|---------|
753
+ //
754
+ // In summary, Wallace tree reduction is substantially more efficient with
755
+ // CaDiCaL on the above code for all bit width >= 11, but somewhat slower with
756
+ // MiniSat.
757
+ //
758
+ // #ifndef BITWIDTH
759
+ // #define BITWIDTH 8
760
+ // #endif
761
+ //
762
+ // int main()
763
+ // {
764
+ // __CPROVER_bitvector[BITWIDTH] a, b, c, ab, bc, abc;
765
+ // ab = a * b;
766
+ // __CPROVER_bitvector[BITWIDTH * 2] ab_check =
767
+ // (__CPROVER_bitvector[BITWIDTH * 2])a *
768
+ // (__CPROVER_bitvector[BITWIDTH * 2])b;
769
+ // __CPROVER_assume(ab == ab_check);
770
+ // bc = b * c;
771
+ // __CPROVER_bitvector[BITWIDTH * 2] bc_check =
772
+ // (__CPROVER_bitvector[BITWIDTH * 2])b *
773
+ // (__CPROVER_bitvector[BITWIDTH * 2])c;
774
+ // __CPROVER_assume(bc == bc_check);
775
+ // abc = ab * c;
776
+ // __CPROVER_bitvector[BITWIDTH * 2] abc_check =
777
+ // (__CPROVER_bitvector[BITWIDTH * 2])ab *
778
+ // (__CPROVER_bitvector[BITWIDTH * 2])c;
779
+ // __CPROVER_assume(abc == abc_check);
780
+ // __CPROVER_assert(abc == a * bc, "associativity");
781
+ // }
782
+ //
783
+ // |----|-------------------|-------------------|
784
+ // | | CaDiCaL | MiniSat2 |
785
+ // |----|-------------------|-------------------|
786
+ // | n | No tree | Wallace | No tree | Wallace |
787
+ // |----|---------|---------|---------|---------|
788
+ // | 1 | 0.00 | 0.00 | 0.01 | 0.01 |
789
+ // | 2 | 0.01 | 0.01 | 0.01 | 0.01 |
790
+ // | 3 | 0.02 | 0.03 | 0.01 | 0.01 |
791
+ // | 4 | 0.05 | 0.07 | 0.02 | 0.02 |
792
+ // | 5 | 0.17 | 0.18 | 0.04 | 0.04 |
793
+ // | 6 | 0.50 | 0.63 | 0.13 | 0.14 |
794
+ // | 7 | 1.01 | 1.15 | 0.43 | 0.47 |
795
+ // | 8 | 1.56 | 1.76 | 3.35 | 2.39 |
796
+ // | 9 | 2.07 | 2.48 | 11.20 | 12.64 |
797
+ // | 10 | 3.58 | 3.88 | 20.23 | 23.49 |
798
+ // | 11 | 5.84 | 7.46 | 49.32 | 39.86 |
799
+ // | 12 | 11.71 | 16.85 | 69.32 | 156.57 |
800
+ // | 13 | 33.22 | 41.95 | 250.91 | 294.73 |
801
+ // | 14 | 76.27 | 109.59 | 226.98 | 259.84 |
802
+ // | 15 | 220.01 | 330.10 | 783.73 | 1160.47 |
803
+ // | 16 | 591.91 | 981.16 | 2712.20 | 4286.60 |
804
+ // | 17 | 1634.97 | 2574.81 | timeout | timeout |
805
+ // | 18 | 4680.16 | timeout | timeout | timeout |
806
+ // | 19 | timeout | timeout | timeout | timeout |
807
+ // | 20 | timeout | timeout | timeout | timeout |
808
+ // | 21 | timeout | timeout | timeout | timeout |
809
+ // | 22 | timeout | timeout | timeout | timeout |
810
+ // | 23 | timeout | timeout | timeout | timeout |
811
+ // | 24 | timeout | timeout | timeout | timeout |
812
+ // | 25 | timeout | timeout | timeout | timeout |
813
+ // | 26 | timeout | timeout | timeout | timeout |
814
+ // | 27 | timeout | timeout | timeout | timeout |
815
+ // | 28 | timeout | timeout | timeout | timeout |
816
+ // | 29 | timeout | timeout | timeout | timeout |
817
+ // | 30 | timeout | timeout | timeout | timeout |
818
+ // | 31 | timeout | timeout | timeout | timeout |
819
+ // | 32 | timeout | timeout | timeout | timeout |
820
+ // |----|---------|---------|---------|---------|
821
+ //
822
+ // In summary, Wallace tree reduction is slower for both solvers and all
823
+ // bit widths (except BITWIDTH==8 with MiniSat2).
824
+
694
825
bvt bv_utilst::unsigned_multiplier (const bvt &_op0, const bvt &_op1)
695
826
{
696
827
bvt op0=_op0, op1=_op1;
0 commit comments