@@ -688,9 +688,100 @@ bvt bv_utilst::wallace_tree(const std::vector<bvt> &pps)
688
688
}
689
689
}
690
690
691
+ bvt bv_utilst::dadda_tree (const std::vector<bvt> &pps)
692
+ {
693
+ PRECONDITION (!pps.empty ());
694
+
695
+ using columnt = std::list<literalt>;
696
+ std::vector<columnt> columns (pps.front ().size ());
697
+ for (const auto &pp : pps)
698
+ {
699
+ PRECONDITION (pp.size () == pps.front ().size ());
700
+ for (std::size_t i = 0 ; i < pp.size (); ++i)
701
+ {
702
+ if (!pp[i].is_false ())
703
+ columns[i].push_back (pp[i]);
704
+ }
705
+ }
706
+
707
+ std::list<std::size_t > dadda_sequence;
708
+ for (std::size_t d = 2 ; d < pps.front ().size (); d = (d * 3 ) / 2 )
709
+ dadda_sequence.push_front (d);
710
+
711
+ for (auto d : dadda_sequence)
712
+ {
713
+ for (auto col_it = columns.begin (); col_it != columns.end ();) // no ++col_it
714
+ {
715
+ if (col_it->size () <= d)
716
+ ++col_it;
717
+ else if (col_it->size () == d + 1 )
718
+ {
719
+ // Dadda prescribes a half adder here, but OPTIMAL_FULL_ADDER makes
720
+ // full_adder actually build a half adder when carry-in is false.
721
+ literalt carry_out;
722
+ col_it->push_back (full_adder (
723
+ col_it->front (),
724
+ *std::next (col_it->begin ()),
725
+ const_literal (false ),
726
+ carry_out));
727
+ col_it->pop_front ();
728
+ col_it->pop_front ();
729
+ if (std::next (col_it) != columns.end ())
730
+ std::next (col_it)->push_back (carry_out);
731
+ }
732
+ else
733
+ {
734
+ // We could also experiment with n:2 compressors (for n > 3, n=3 is the
735
+ // full adder as used below) that use circuits with lower gate count
736
+ // than just combining multiple full adders.
737
+ literalt carry_out;
738
+ col_it->push_back (full_adder (
739
+ col_it->front (),
740
+ *std::next (col_it->begin ()),
741
+ *std::next (std::next (col_it->begin ())),
742
+ carry_out));
743
+ col_it->pop_front ();
744
+ col_it->pop_front ();
745
+ col_it->pop_front ();
746
+ if (std::next (col_it) != columns.end ())
747
+ std::next (col_it)->push_back (carry_out);
748
+ }
749
+ }
750
+ }
751
+
752
+ bvt a, b;
753
+ a.reserve (pps.front ().size ());
754
+ b.reserve (pps.front ().size ());
755
+
756
+ for (const auto &col : columns)
757
+ {
758
+ PRECONDITION (col.size () <= 2 );
759
+ switch (col.size ())
760
+ {
761
+ case 0 :
762
+ a.push_back (const_literal (false ));
763
+ b.push_back (const_literal (false ));
764
+ break ;
765
+ case 1 :
766
+ a.push_back (col.front ());
767
+ b.push_back (const_literal (false ));
768
+ break ;
769
+ case 2 :
770
+ a.push_back (col.front ());
771
+ b.push_back (col.back ());
772
+ }
773
+ }
774
+
775
+ return add (a, b);
776
+ }
777
+
691
778
// Wallace tree multiplier. This is disabled, as runtimes have
692
779
// been observed to go up by 5%-10%, and on some models even by 20%.
693
780
// #define WALLACE_TREE
781
+ // Dadda' reduction scheme. This yields a smaller formula size than Wallace
782
+ // trees (and also the default addition scheme), but remains disabled as it
783
+ // isn't consistently more performant either.
784
+ // #define DADDA_TREE
694
785
695
786
// The following examples demonstrate the performance differences (with a
696
787
// time-out of 7200 seconds):
@@ -712,48 +803,48 @@ bvt bv_utilst::wallace_tree(const std::vector<bvt> &pps)
712
803
// ab == (__CPROVER_bitvector[BITWIDTH])ab_check, "should pass");
713
804
// }
714
805
//
715
- // |----|-------------------| -------------------|
716
- // | | CaDiCaL | MiniSat2 |
717
- // |----|-------------------| -------------------|
718
- // | n | No tree | Wallace | No tree | Wallace |
719
- // |----|---------|---------|---------|---------|
720
- // | 1 | 0.00 | 0.00 | 0.00 | 0.00 |
721
- // | 2 | 0.00 | 0.00 | 0.00 | 0.00 |
722
- // | 3 | 0.01 | 0.01 | 0.00 | 0.00 |
723
- // | 4 | 0.01 | 0.01 | 0.01 | 0.01 |
724
- // | 5 | 0.04 | 0.04 | 0.01 | 0.01 |
725
- // | 6 | 0.11 | 0.13 | 0.04 | 0.05 |
726
- // | 7 | 0.28 | 0.46 | 0.15 | 0.27 |
727
- // | 8 | 0.50 | 1.55 | 0.90 | 1.06 |
728
- // | 9 | 2.22 | 3.63 | 3.40 | 5.85 |
729
- // | 10 | 2.79 | 4.81 | 4.32 | 28.41 |
730
- // | 11 | 8.48 | 4.45 | 38.24 | 98.55 |
731
- // | 12 | 14.52 | 4.86 | 115.00 | 140.11 |
732
- // | 13 | 33.62 | 5.56 | 210.24 | 805.59 |
733
- // | 14 | 37.23 | 6.11 | 776.75 | 689.96 |
734
- // | 15 | 108.65 | 7.86 | 1048.92 | 1421.74 |
735
- // | 16 | 102.61 | 14.08 | 1628.01 | timeout |
736
- // | 17 | 117.89 | 18.53 | 4148.73 | timeout |
737
- // | 18 | 209.40 | 7.97 | 2760.51 | timeout |
738
- // | 19 | 168.21 | 18.04 | 2514.21 | timeout |
739
- // | 20 | 566.76 | 12.68 | 2609.09 | timeout |
740
- // | 21 | 786.31 | 23.80 | 2232.77 | timeout |
741
- // | 22 | 817.74 | 17.64 | 3866.70 | timeout |
742
- // | 23 | 1102.76 | 24.19 | timeout | timeout |
743
- // | 24 | 1319.59 | 27.37 | timeout | timeout |
744
- // | 25 | 1786.11 | 27.10 | timeout | timeout |
745
- // | 26 | 1952.18 | 31.08 | timeout | timeout |
746
- // | 27 | 6908.48 | 27.92 | timeout | timeout |
747
- // | 28 | 6919.34 | 36.63 | timeout | timeout |
748
- // | 29 | timeout | 27.95 | timeout | timeout |
749
- // | 30 | timeout | 36.94 | timeout | timeout |
750
- // | 31 | timeout | 38.41 | timeout | timeout |
751
- // | 32 | timeout | 33.06 | timeout | timeout |
752
- // |----|---------|---------|---------|---------|
806
+ // |----|-----------------------------|---------- -------------------|
807
+ // | | CaDiCaL | MiniSat2 |
808
+ // |----|-----------------------------|---------- -------------------|
809
+ // | n | No tree | Wallace | Dadda | No tree | Wallace | Dadda |
810
+ // |----|---------|---------|---------|---------|---------|---------|
811
+ // | 1 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
812
+ // | 2 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
813
+ // | 3 | 0.01 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 |
814
+ // | 4 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 |
815
+ // | 5 | 0.04 | 0.04 | 0.04 | 0.01 | 0. 01 | 0.01 |
816
+ // | 6 | 0.11 | 0.13 | 0.12 | 0. 04 | 0.05 | 0.06 |
817
+ // | 7 | 0.28 | 0.46 | 0.44 | 0. 15 | 0.27 | 0.31 |
818
+ // | 8 | 0.50 | 1.55 | 1.09 | 0.90 | 1.06 | 1.36 |
819
+ // | 9 | 2.22 | 3.63 | 2.67 | 3.40 | 5.85 | 3.44 |
820
+ // | 10 | 2.79 | 4.81 | 4.69 | 4. 32 | 28.41 | 28.01 |
821
+ // | 11 | 8.48 | 4.45 | 11.99 | 38.24 | 98.55 | 86.46 |
822
+ // | 12 | 14.52 | 4.86 | 5.80 | 115.00 | 140.11 | 461.70 |
823
+ // | 13 | 33.62 | 5.56 | 6.14 | 210.24 | 805.59 | 609.01 |
824
+ // | 14 | 37.23 | 6.11 | 8.01 | 776.75 | 689.96 | 6153.82 |
825
+ // | 15 | 108.65 | 7.86 | 14.72 | 1048.92 | 1421.74 | 6881.78 |
826
+ // | 16 | 102.61 | 14.08 | 18.54 | 1628.01 | timeout | 1943.85 |
827
+ // | 17 | 117.89 | 18.53 | 9.19 | 4148.73 | timeout | timeout |
828
+ // | 18 | 209.40 | 7.97 | 7.74 | 2760.51 | timeout | timeout |
829
+ // | 19 | 168.21 | 18.04 | 15.00 | 2514.21 | timeout | timeout |
830
+ // | 20 | 566.76 | 12.68 | 22.47 | 2609.09 | timeout | timeout |
831
+ // | 21 | 786.31 | 23.80 | 23.80 | 2232.77 | timeout | timeout |
832
+ // | 22 | 817.74 | 17.64 | 22.53 | 3866.70 | timeout | timeout |
833
+ // | 23 | 1102.76 | 24.19 | 26.37 | timeout | timeout | timeout |
834
+ // | 24 | 1319.59 | 27.37 | 29.95 | timeout | timeout | timeout |
835
+ // | 25 | 1786.11 | 27.10 | 29.94 | timeout | timeout | timeout |
836
+ // | 26 | 1952.18 | 31.08 | 33.95 | timeout | timeout | timeout |
837
+ // | 27 | 6908.48 | 27.92 | 34.94 | timeout | timeout | timeout |
838
+ // | 28 | 6919.34 | 36.63 | 33.78 | timeout | timeout | timeout |
839
+ // | 29 | timeout | 27.95 | 40.69 | timeout | timeout | timeout |
840
+ // | 30 | timeout | 36.94 | 31.59 | timeout | timeout | timeout |
841
+ // | 31 | timeout | 38.41 | 40.04 | timeout | timeout | timeout |
842
+ // | 32 | timeout | 33.06 | 91.38 | timeout | timeout | timeout |
843
+ // |----|---------|---------|---------|---------|---------|---------|
753
844
//
754
- // In summary, Wallace tree reduction is substantially more efficient with
755
- // CaDiCaL on the above code for all bit width >= 11, but somewhat slower with
756
- // MiniSat.
845
+ // In summary, both Wallace tree and Dadda reduction are substantially more
846
+ // efficient with CaDiCaL on the above code for all bit width > 11, but somewhat
847
+ // slower with MiniSat.
757
848
//
758
849
// #ifndef BITWIDTH
759
850
// #define BITWIDTH 8
@@ -780,47 +871,49 @@ bvt bv_utilst::wallace_tree(const std::vector<bvt> &pps)
780
871
// __CPROVER_assert(abc == a * bc, "associativity");
781
872
// }
782
873
//
783
- // |----|-------------------| -------------------|
784
- // | | CaDiCaL | MiniSat2 |
785
- // |----|-------------------| -------------------|
786
- // | n | No tree | Wallace | No tree | Wallace |
787
- // |----|---------|---------|---------|---------|
788
- // | 1 | 0.00 | 0.00 | 0.01 | 0.01 |
789
- // | 2 | 0.01 | 0.01 | 0.01 | 0.01 |
790
- // | 3 | 0.02 | 0.03 | 0.01 | 0.01 |
791
- // | 4 | 0.05 | 0.07 | 0.02 | 0.02 |
792
- // | 5 | 0.17 | 0.18 | 0.04 | 0.04 |
793
- // | 6 | 0.50 | 0.63 | 0.13 | 0.14 |
794
- // | 7 | 1.01 | 1.15 | 0.43 | 0.47 |
795
- // | 8 | 1.56 | 1.76 | 3.35 | 2.39 |
796
- // | 9 | 2.07 | 2.48 | 11.20 | 12.64 |
797
- // | 10 | 3.58 | 3.88 | 20.23 | 23.49 |
798
- // | 11 | 5.84 | 7.46 | 49.32 | 39.86 |
799
- // | 12 | 11.71 | 16.85 | 69.32 | 156.57 |
800
- // | 13 | 33.22 | 41.95 | 250.91 | 294.73 |
801
- // | 14 | 76.27 | 109.59 | 226.98 | 259.84 |
802
- // | 15 | 220.01 | 330.10 | 783.73 | 1160.47 |
803
- // | 16 | 591.91 | 981.16 | 2712.20 | 4286.60 |
804
- // | 17 | 1634.97 | 2574.81 | timeout | timeout |
805
- // | 18 | 4680.16 | timeout | timeout | timeout |
806
- // | 19 | timeout | timeout | timeout | timeout |
807
- // | 20 | timeout | timeout | timeout | timeout |
808
- // | 21 | timeout | timeout | timeout | timeout |
809
- // | 22 | timeout | timeout | timeout | timeout |
810
- // | 23 | timeout | timeout | timeout | timeout |
811
- // | 24 | timeout | timeout | timeout | timeout |
812
- // | 25 | timeout | timeout | timeout | timeout |
813
- // | 26 | timeout | timeout | timeout | timeout |
814
- // | 27 | timeout | timeout | timeout | timeout |
815
- // | 28 | timeout | timeout | timeout | timeout |
816
- // | 29 | timeout | timeout | timeout | timeout |
817
- // | 30 | timeout | timeout | timeout | timeout |
818
- // | 31 | timeout | timeout | timeout | timeout |
819
- // | 32 | timeout | timeout | timeout | timeout |
820
- // |----|---------|---------|---------|---------|
874
+ // |----|-----------------------------|---------- -------------------|
875
+ // | | CaDiCaL | MiniSat2 |
876
+ // |----|-----------------------------|---------- -------------------|
877
+ // | n | No tree | Wallace | Dadda | No tree | Wallace | Dadda |
878
+ // |----|---------|---------|---------|---------|---------|---------|
879
+ // | 1 | 0.00 | 0.00 | 0.00 | 0.01 | 0. 01 | 0.01 |
880
+ // | 2 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 | 0.01 |
881
+ // | 3 | 0.02 | 0.03 | 0.03 | 0.01 | 0. 01 | 0.01 |
882
+ // | 4 | 0.05 | 0.07 | 0.06 | 0.02 | 0. 02 | 0.02 |
883
+ // | 5 | 0.17 | 0.18 | 0.14 | 0.04 | 0. 04 | 0.04 |
884
+ // | 6 | 0.50 | 0.63 | 0.63 | 0. 13 | 0.14 | 0.13 |
885
+ // | 7 | 1.01 | 1.15 | 0.90 | 0.43 | 0.47 | 0.47 |
886
+ // | 8 | 1.56 | 1.76 | 1.75 | 3.35 | 2.39 | 1.75 |
887
+ // | 9 | 2.07 | 2.48 | 1.75 | 11.20 | 12.64 | 7.94 |
888
+ // | 10 | 3.58 | 3.88 | 3.54 | 20.23 | 23.49 | 15.66 |
889
+ // | 11 | 5.84 | 7.46 | 5.31 | 49.32 | 39.86 | 44.15 |
890
+ // | 12 | 11.71 | 16.85 | 13.40 | 69.32 | 156.57 | 46.50 |
891
+ // | 13 | 33.22 | 41.95 | 34.43 | 250.91 | 294.73 | 79.47 |
892
+ // | 14 | 76.27 | 109.59 | 84.49 | 226.98 | 259.84 | 258.08 |
893
+ // | 15 | 220.01 | 330.10 | 267.11 | 783.73 | 1160.47 | 1262.91 |
894
+ // | 16 | 591.91 | 981.16 | 808.33 | 2712.20 | 4286.60 | timeout |
895
+ // | 17 | 1634.97 | 2574.81 | 2006.27 | timeout | timeout | timeout |
896
+ // | 18 | 4680.16 | timeout | 6704.35 | timeout | timeout | timeout |
897
+ // | 19 | timeout | timeout | timeout | timeout | timeout | timeout |
898
+ // | 20 | timeout | timeout | timeout | timeout | timeout | timeout |
899
+ // | 21 | timeout | timeout | timeout | timeout | timeout | timeout |
900
+ // | 22 | timeout | timeout | timeout | timeout | timeout | timeout |
901
+ // | 23 | timeout | timeout | timeout | timeout | timeout | timeout |
902
+ // | 24 | timeout | timeout | timeout | timeout | timeout | timeout |
903
+ // | 25 | timeout | timeout | timeout | timeout | timeout | timeout |
904
+ // | 26 | timeout | timeout | timeout | timeout | timeout | timeout |
905
+ // | 27 | timeout | timeout | timeout | timeout | timeout | timeout |
906
+ // | 28 | timeout | timeout | timeout | timeout | timeout | timeout |
907
+ // | 29 | timeout | timeout | timeout | timeout | timeout | timeout |
908
+ // | 30 | timeout | timeout | timeout | timeout | timeout | timeout |
909
+ // | 31 | timeout | timeout | timeout | timeout | timeout | timeout |
910
+ // | 32 | timeout | timeout | timeout | timeout | timeout | timeout |
911
+ // |----|---------|---------|---------|---------|---------|---------|
821
912
//
822
- // In summary, Wallace tree reduction is slower for both solvers and all
823
- // bit widths (except BITWIDTH==8 with MiniSat2).
913
+ // In summary, Wallace tree reduction is slower for both solvers and all bit
914
+ // widths (except BITWIDTH==8 with MiniSat2). Dadda multipliers get closer to
915
+ // our multiplier that's not using a tree reduction scheme, but aren't uniformly
916
+ // better either.
824
917
825
918
bvt bv_utilst::unsigned_multiplier (const bvt &_op0, const bvt &_op1)
826
919
{
@@ -854,6 +947,8 @@ bvt bv_utilst::unsigned_multiplier(const bvt &_op0, const bvt &_op1)
854
947
{
855
948
#ifdef WALLACE_TREE
856
949
return wallace_tree (pps);
950
+ #elif defined(DADDA_TREE)
951
+ return dadda_tree (pps);
857
952
#else
858
953
bvt product = pps.front ();
859
954
0 commit comments