11#include <gtest/gtest.h>
28 static inline std::vector<ScalarField>
scalars{};
32 size_t total_points = input_scalars.size();
34 std::vector<Element> expected_accs(num_threads);
35 size_t range_per_thread = (total_points + num_threads - 1) / num_threads;
38 expected_thread_acc.self_set_infinity();
39 size_t start = thread_idx * range_per_thread;
40 size_t end = ((thread_idx + 1) * range_per_thread > total_points) ? total_points
41 : (thread_idx + 1) * range_per_thread;
42 bool skip = start >= total_points;
44 for (
size_t i = start; i < end; ++i) {
45 expected_thread_acc += input_points[i] * input_scalars[i];
48 expected_accs[thread_idx] = expected_thread_acc;
52 expected_acc.self_set_infinity();
53 for (
auto& acc : expected_accs) {
64 for (
size_t i = start; i < end; ++i) {
78 constexpr uint32_t fr_size = 254;
79 constexpr uint32_t slice_bits = 7;
80 constexpr uint32_t num_slices = (fr_size + 6) / 7;
81 constexpr uint32_t last_slice_bits = fr_size - ((num_slices - 1) * slice_bits);
83 for (
size_t x = 0; x < 100; ++x) {
85 input_u256.
data[3] = input_u256.
data[3] & 0x3FFFFFFFFFFFFFFF;
86 while (input_u256 > ScalarField::modulus) {
87 input_u256 -= ScalarField::modulus;
89 std::vector<uint32_t> slices(num_slices);
92 for (uint32_t i = 0; i < num_slices; ++i) {
93 uint32_t mask = ((1U << slice_bits) - 1U);
94 uint32_t shift = slice_bits;
96 mask = ((1U << last_slice_bits) - 1U);
97 shift = last_slice_bits;
99 slices[num_slices - 1 - i] =
static_cast<uint32_t
>((acc & mask).
data[0]);
104 input.self_from_montgomery_form_reduced();
106 ASSERT_EQ(input.data[0], input_u256.
data[0]);
107 ASSERT_EQ(input.data[1], input_u256.
data[1]);
108 ASSERT_EQ(input.data[2], input_u256.
data[2]);
109 ASSERT_EQ(input.data[3], input_u256.
data[3]);
111 for (uint32_t i = 0; i < num_slices; ++i) {
113 EXPECT_EQ(result, slices[i]);
120 const size_t total_points = 30071;
121 const size_t num_buckets = 128;
123 std::vector<uint64_t> input_point_schedule;
124 for (
size_t i = 0; i < total_points; ++i) {
126 uint64_t schedule =
static_cast<uint64_t
>(bucket) + (
static_cast<uint64_t
>(i) << 32);
127 input_point_schedule.push_back(schedule);
132 input_point_schedule,
generators, affine_data, bucket_data);
134 std::vector<Element> expected_buckets(num_buckets);
135 for (
auto& e : expected_buckets) {
136 e.self_set_infinity();
138 for (
size_t i = 0; i < total_points; ++i) {
139 uint64_t bucket = input_point_schedule[i] & 0xFFFFFFFF;
140 EXPECT_LT(
static_cast<size_t>(bucket), num_buckets);
141 expected_buckets[
static_cast<size_t>(bucket)] +=
generators[i];
143 for (
size_t i = 0; i < num_buckets; ++i) {
144 if (!expected_buckets[i].is_point_at_infinity()) {
146 EXPECT_EQ(expected, bucket_data.
buckets[i]);
155 const size_t total_points = 30071;
156 const size_t num_buckets = 128;
158 std::vector<uint64_t> input_point_schedule;
159 for (
size_t i = 0; i < total_points; ++i) {
161 uint64_t schedule =
static_cast<uint64_t
>(bucket) + (
static_cast<uint64_t
>(i) << 32);
162 input_point_schedule.push_back(schedule);
167 input_point_schedule,
generators, affine_data, bucket_data);
172 expected_acc.self_set_infinity();
174 std::vector<Element> expected_accs(num_threads);
175 size_t range_per_thread = (total_points + num_threads - 1) / num_threads;
178 expected_thread_acc.self_set_infinity();
179 size_t start = thread_idx * range_per_thread;
180 size_t end = (thread_idx == num_threads - 1) ? total_points : (thread_idx + 1) * range_per_thread;
181 bool skip = start >= total_points;
183 for (
size_t i = start; i < end; ++i) {
184 ScalarField scalar = input_point_schedule[i] & 0xFFFFFFFF;
185 expected_thread_acc +=
generators[i] * scalar;
188 expected_accs[thread_idx] = expected_thread_acc;
191 for (
size_t i = 0; i < num_threads; ++i) {
192 expected_acc += expected_accs[i];
200 const size_t total_points = 30071;
202 std::vector<uint64_t> input_point_schedule;
203 for (
size_t i = 0; i < total_points; ++i) {
205 uint64_t schedule =
static_cast<uint64_t
>(bucket) + (
static_cast<uint64_t
>(i) << 32);
206 input_point_schedule.push_back(schedule);
210 &input_point_schedule[0], input_point_schedule.size(), 7);
214 for (
size_t i = 0; i < total_points; ++i) {
215 expected +=
static_cast<size_t>((input_point_schedule[i] & 0xFFFFFFFF) == 0);
217 EXPECT_EQ(result, expected);
220 for (
size_t i = 1; i < total_points; ++i) {
221 uint32_t prev_bucket =
static_cast<uint32_t
>(input_point_schedule[i - 1]);
222 uint32_t curr_bucket =
static_cast<uint32_t
>(input_point_schedule[i]);
223 EXPECT_LE(prev_bucket, curr_bucket) <<
"Array not sorted at index " << i;
235 constexpr uint32_t bucket_index_bits = 17;
236 constexpr size_t num_entries = 1000;
238 std::vector<uint64_t> schedule(num_entries);
241 const size_t num_true_zeros = 10;
242 for (
size_t i = 0; i < num_true_zeros; ++i) {
243 schedule[i] =
static_cast<uint64_t
>(i) << 32;
249 const size_t num_false_zeros = 20;
250 for (
size_t i = 0; i < num_false_zeros; ++i) {
251 size_t idx = num_true_zeros + i;
252 schedule[idx] = (
static_cast<uint64_t
>(idx) << 32) | 65536ULL;
256 for (
size_t i = num_true_zeros + num_false_zeros; i < num_entries; ++i) {
259 if ((bucket & 0xFFFF) == 0) {
262 schedule[i] = (
static_cast<uint64_t
>(i) << 32) |
static_cast<uint64_t
>(bucket);
266 schedule.data(), num_entries, bucket_index_bits);
270 for (
size_t i = 0; i < num_entries; ++i) {
276 EXPECT_EQ(result, expected) <<
"Zero-bucket count is wrong for bucket_index_bits=" << bucket_index_bits
277 <<
". Got " << result <<
", expected " << expected
278 <<
" (likely overwritten by count from a non-zero bucket)";
281 for (
size_t i = 1; i < num_entries; ++i) {
282 uint32_t prev =
static_cast<uint32_t
>(schedule[i - 1]);
283 uint32_t curr =
static_cast<uint32_t
>(schedule[i]);
284 EXPECT_LE(prev, curr) <<
"Array not sorted at index " << i;
294 EXPECT_EQ(result, expected);
302 std::vector<AffineElement> expected(num_msms);
308 size_t vector_offset = 0;
309 for (
size_t k = 0; k < num_msms; ++k) {
312 ASSERT_LT(vector_offset + num_pts,
num_points);
313 std::span<const AffineElement> batch_points(&
generators[vector_offset], num_pts);
315 batch_scalars_copies[k].resize(num_pts);
316 for (
size_t i = 0; i < num_pts; ++i) {
317 batch_scalars_copies[k][i] =
scalars[vector_offset + i];
320 vector_offset += num_pts;
321 batch_points_span.push_back(batch_points);
322 batch_scalars_spans.push_back(batch_scalars_copies[k]);
324 expected[k] =
naive_msm(batch_scalars_spans[k], batch_points_span[k]);
327 std::vector<AffineElement> result =
330 EXPECT_EQ(result, expected);
335 const size_t num_msms = 10;
336 std::vector<AffineElement> expected(num_msms);
342 for (
size_t k = 0; k < num_msms; ++k) {
343 const size_t num_pts = 33;
344 auto& test_scalars = batch_scalars[k];
346 test_scalars.resize(num_pts);
348 size_t fixture_offset = k * num_pts;
351 for (
size_t i = 0; i < 13; ++i) {
354 for (
size_t i = 13; i < 23; ++i) {
355 test_scalars[i] =
scalars[fixture_offset + i + 13];
357 for (
size_t i = 23; i < num_pts; ++i) {
360 batch_points_span.push_back(batch_points);
361 batch_scalars_spans.push_back(batch_scalars[k]);
363 expected[k] =
naive_msm(batch_scalars[k], batch_points);
366 std::vector<AffineElement> result =
369 EXPECT_EQ(result, expected);
374 const size_t start_index = 1234;
375 const size_t num_pts =
num_points - start_index;
383 EXPECT_EQ(result, expected);
388 const size_t start_index = 1234;
389 const size_t num_pts =
num_points - start_index;
390 std::vector<ScalarField> test_scalars(num_pts, ScalarField::zero());
395 EXPECT_EQ(result, Group::affine_point_at_infinity);
400 std::vector<ScalarField> test_scalars;
401 std::vector<AffineElement> input_points;
405 EXPECT_EQ(result, Group::affine_point_at_infinity);
410 const size_t num_pts = 100;
411 std::vector<ScalarField> test_scalars(num_pts);
412 std::vector<ScalarField> scalars_copy(num_pts);
414 for (
size_t i = 0; i < num_pts; ++i) {
416 scalars_copy[i] = test_scalars[i];
419 std::span<const AffineElement> points(&
generators[0], num_pts);
424 for (
size_t i = 0; i < num_pts; ++i) {
425 EXPECT_EQ(test_scalars[i], scalars_copy[i]) <<
"Scalar at index " << i <<
" was modified";
431 const size_t num_msms = 3;
432 const size_t num_pts = 100;
439 for (
size_t k = 0; k < num_msms; ++k) {
440 batch_scalars[k].resize(num_pts);
441 scalars_copies[k].resize(num_pts);
443 for (
size_t i = 0; i < num_pts; ++i) {
444 batch_scalars[k][i] =
scalars[k * num_pts + i];
445 scalars_copies[k][i] = batch_scalars[k][i];
448 batch_points.push_back(std::span<const AffineElement>(&
generators[k * num_pts], num_pts));
449 batch_scalar_spans.push_back(batch_scalars[k]);
454 for (
size_t k = 0; k < num_msms; ++k) {
455 for (
size_t i = 0; i < num_pts; ++i) {
456 EXPECT_EQ(batch_scalars[k][i], scalars_copies[k][i])
457 <<
"Scalar at MSM " << k <<
", index " << i <<
" was modified";
464 const size_t num_pts = 5;
465 std::vector<ScalarField> test_scalars(num_pts, ScalarField::one());
466 std::span<const AffineElement> points(&
generators[0], num_pts);
472 expected.self_set_infinity();
473 for (
size_t i = 0; i < num_pts; ++i) {
474 expected += points[i];
482 const size_t num_pts = 5;
483 std::vector<ScalarField> test_scalars(num_pts, -ScalarField::one());
484 std::span<const AffineElement> points(&
generators[0], num_pts);
490 expected.self_set_infinity();
491 for (
size_t i = 0; i < num_pts; ++i) {
492 expected -= points[i];
500 std::vector<ScalarField> test_scalars = {
scalars[0] };
501 std::span<const AffineElement> points(&
generators[0], 1);
507 EXPECT_EQ(result, expected);
512 std::vector<size_t> test_sizes = { 1, 2, 15, 16, 17, 50, 127, 128, 129, 256, 512 };
514 for (
size_t num_pts : test_sizes) {
517 std::vector<ScalarField> test_scalars(num_pts);
518 for (
size_t i = 0; i < num_pts; ++i) {
522 std::span<const AffineElement> points(&
generators[0], num_pts);
528 EXPECT_EQ(result, expected) <<
"Failed for size " << num_pts;
535 const size_t num_pts = 32;
538 std::vector<AffineElement> points(num_pts, base_point);
539 std::vector<ScalarField> test_scalars(num_pts);
542 for (
size_t i = 0; i < num_pts; ++i) {
544 scalar_sum += test_scalars[i];
553 EXPECT_EQ(result, expected);
558 const size_t num_pts = 100;
559 std::vector<ScalarField> test_scalars(num_pts);
561 expected.self_set_infinity();
563 for (
size_t i = 0; i < num_pts; ++i) {
565 test_scalars[i] = ScalarField::zero();
572 std::span<const AffineElement> points(&
generators[0], num_pts);
581 const size_t num_pts = 200;
582 std::vector<ScalarField> test_scalars(num_pts);
583 for (
size_t i = 0; i < num_pts; ++i) {
587 std::span<const AffineElement> points(&
generators[0], num_pts);
590 auto result = scalar_multiplication::pippenger<Curve>(scalar_span, points);
598 const size_t num_pts = 200;
599 std::vector<ScalarField> test_scalars(num_pts);
600 for (
size_t i = 0; i < num_pts; ++i) {
604 std::span<const AffineElement> points(&
generators[0], num_pts);
607 auto result = scalar_multiplication::pippenger_unsafe<Curve>(scalar_span, points);
614using CurveTypes = ::testing::Types<bb::curve::BN254, bb::curve::Grumpkin>;
621 this->test_get_scalar_slice();
625 this->test_consume_point_batch();
629 this->test_consume_point_batch_and_accumulate();
633 this->test_radix_sort_count_zero_entries();
637 this->test_radix_sort_count_zero_entries_wide_buckets();
641 this->test_pippenger_low_memory();
645 this->test_batch_multi_scalar_mul();
649 this->test_batch_multi_scalar_mul_sparse();
657 this->test_msm_all_zeroes();
661 this->test_msm_empty_polynomial();
665 this->test_scalars_unchanged_after_msm();
669 this->test_scalars_unchanged_after_batch_multi_scalar_mul();
673 this->test_scalar_one();
677 this->test_scalar_minus_one();
681 this->test_single_point();
685 this->test_size_thresholds();
689 this->test_duplicate_points();
693 this->test_mixed_zero_scalars();
697 this->test_pippenger_free_function();
701 this->test_pippenger_unsafe_free_function();
711using WorkUnit = PartitionMSM::MSMWorkUnit;
717 for (
const auto& u : units) {
718 for (
size_t k = 0; k < u.size; ++k) {
719 total += weights[u.batch_msm_index][u.start_index + k];
727TEST(PartitionByWeight, NoMsmsReturnsEmptyThreads)
729 auto units = PartitionMSM::partition_by_weight({}, 8);
730 ASSERT_EQ(units.size(), 8U);
731 for (
const auto& t : units) {
732 EXPECT_TRUE(t.empty());
736TEST(PartitionByWeight, AllEmptyMsmsReturnsEmptyThreads)
739 auto units = PartitionMSM::partition_by_weight(weights, 4);
740 ASSERT_EQ(units.size(), 4U);
741 for (
const auto& t : units) {
742 EXPECT_TRUE(t.empty());
746TEST(PartitionByWeight, SingleThreadGetsEverything)
749 auto units = PartitionMSM::partition_by_weight(weights, 1);
750 ASSERT_EQ(units.size(), 1U);
751 ASSERT_EQ(units[0].size(), 1U);
752 EXPECT_EQ(units[0][0].batch_msm_index, 0U);
753 EXPECT_EQ(units[0][0].start_index, 0U);
754 EXPECT_EQ(units[0][0].size, 5U);
757TEST(PartitionByWeight, EvenSplitAcrossThreads)
761 auto units = PartitionMSM::partition_by_weight(weights, 4);
762 ASSERT_EQ(units.size(), 4U);
763 for (
size_t t = 0; t < 4; ++t) {
764 ASSERT_EQ(units[t].size(), 1U) <<
"thread " << t;
765 EXPECT_EQ(units[t][0].size, 2U) <<
"thread " << t;
766 EXPECT_EQ(thread_weight(units[t], weights), 10U) <<
"thread " << t;
770TEST(PartitionByWeight, HeavyFirstWeightClosesFirstThreadEarly)
774 auto units = PartitionMSM::partition_by_weight(weights, 4);
775 ASSERT_EQ(units.size(), 4U);
777 ASSERT_FALSE(units[0].empty());
778 EXPECT_EQ(units[0][0].start_index, 0U);
779 EXPECT_EQ(units[0][0].size, 1U);
781 size_t total_assigned = 0;
782 for (
const auto& t : units) {
783 for (
const auto& u : t) {
784 total_assigned += u.size;
787 EXPECT_EQ(total_assigned, 5U);
790TEST(PartitionByWeight, BoundaryStraddlesMsm)
795 auto units = PartitionMSM::partition_by_weight(weights, 4);
796 ASSERT_EQ(units.size(), 4U);
797 size_t total_assigned = 0;
798 for (
const auto& t : units) {
799 for (
const auto& u : t) {
800 total_assigned += u.size;
803 EXPECT_EQ(total_assigned, 8U);
805 for (
size_t t = 0; t < 4; ++t) {
806 EXPECT_EQ(thread_weight(units[t], weights), 10U) <<
"thread " << t;
810TEST(PartitionByWeight, LastThreadAbsorbsRemainder)
818 auto units = PartitionMSM::partition_by_weight(weights, 3);
819 ASSERT_EQ(units.size(), 3U);
820 size_t total_assigned = 0;
821 for (
const auto& t : units) {
822 for (
const auto& u : t) {
823 total_assigned += u.size;
826 EXPECT_EQ(total_assigned, 3U);
827 ASSERT_EQ(units[2].size(), 1U);
828 EXPECT_EQ(units[2][0].start_index, 2U);
829 EXPECT_EQ(units[2][0].size, 1U);
830 EXPECT_EQ(thread_weight(units[2], weights), 1U);
833TEST(PartitionByWeight, MoreThreadsThanScalars)
838 auto units = PartitionMSM::partition_by_weight(weights, 8);
839 ASSERT_EQ(units.size(), 8U);
840 for (
size_t t = 0; t < 3; ++t) {
841 ASSERT_EQ(units[t].size(), 1U) <<
"thread " << t;
842 EXPECT_EQ(units[t][0].size, 1U);
844 for (
size_t t = 3; t < 8; ++t) {
845 EXPECT_TRUE(units[t].empty()) <<
"thread " << t;
850TEST(ScalarMultiplication, SmallInputsExplicit)
852 uint256_t x0(0x68df84429941826a, 0xeb08934ed806781c, 0xc14b6a2e4f796a73, 0x08dc1a9a11a3c8db);
853 uint256_t y0(0x8ae5c31aa997f141, 0xe85f20c504f2c11b, 0x81a94193f3b1ce2b, 0x26f2c37372adb5b7);
854 uint256_t x1(0x80f5a592d919d32f, 0x1362652b984e51ca, 0xa0b26666f770c2a1, 0x142c6e1964e5c3c5);
855 uint256_t y1(0xb6c322ebb5ae4bc5, 0xf9fef6c7909c00f8, 0xb37ca1cc9af3b421, 0x1e331c7fa73d6a59);
856 uint256_t s0(0xe48bf12a24272e08, 0xf8dd0182577f3567, 0xec8fd222b8a6becb, 0x102d76b945612c9b);
857 uint256_t s1(0x098ae8d69f1e4e9e, 0xb5c8313c0f6040ed, 0xf78041e30cc46c44, 0x1d1e6e0c21892e13);
#define BB_BENCH_NAME(name)
BB_INLINE bool get(size_t index) const noexcept
void test_pippenger_low_memory()
typename Curve::ScalarField ScalarField
static std::vector< AffineElement > generators
void test_radix_sort_count_zero_entries_wide_buckets()
void test_mixed_zero_scalars()
void test_batch_multi_scalar_mul_sparse()
void test_duplicate_points()
void test_consume_point_batch()
void test_scalars_unchanged_after_batch_multi_scalar_mul()
void test_msm_all_zeroes()
void test_pippenger_unsafe_free_function()
void test_batch_multi_scalar_mul()
static constexpr size_t num_points
static void SetUpTestSuite()
void test_scalar_minus_one()
typename Curve::Element Element
void test_consume_point_batch_and_accumulate()
void test_msm_empty_polynomial()
void test_pippenger_free_function()
typename Curve::Group Group
void test_get_scalar_slice()
void test_scalars_unchanged_after_msm()
static std::vector< ScalarField > scalars
typename Curve::AffineElement AffineElement
static AffineElement naive_msm(std::span< ScalarField > input_scalars, std::span< const AffineElement > input_points)
void test_radix_sort_count_zero_entries()
void test_size_thresholds()
typename Group::element Element
typename grumpkin::g1 Group
typename Group::affine_element AffineElement
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
group_elements::affine_element< Fq, Fr, Params > affine_element
virtual uint8_t get_random_uint8()=0
virtual uint16_t get_random_uint16()=0
virtual uint32_t get_random_uint32()=0
virtual uint256_t get_random_uint256()=0
static Element accumulate_buckets(BucketType &bucket_accumulators) noexcept
Reduce buckets to single point using running (suffix) sum from high to low: R = sum(k * B_k)
static uint32_t get_scalar_slice(const ScalarField &scalar, size_t round, size_t slice_size) noexcept
Extract c-bit slice from scalar for bucket index computation.
static AffineElement msm(std::span< const AffineElement > points, PolynomialSpan< const ScalarField > scalars, bool handle_edge_cases=false) noexcept
Main entry point for single MSM computation.
static std::vector< AffineElement > batch_multi_scalar_mul(std::span< std::span< const AffineElement > > points, std::span< std::span< ScalarField > > scalars, bool handle_edge_cases=true) noexcept
Compute multiple MSMs in parallel with work balancing.
static void batch_accumulate_points_into_buckets(std::span< const uint64_t > point_schedule, std::span< const AffineElement > points, AffineAdditionData &affine_data, BucketAccumulators &bucket_data) noexcept
Process sorted point schedule into bucket accumulators using batched affine additions.
const std::vector< MemoryValue > data
size_t sort_point_schedule_and_count_zero_buckets(uint64_t *point_schedule, const size_t num_entries, const uint32_t bucket_index_bits) noexcept
Sort point schedule by bucket index and count zero-bucket entries.
constexpr uint64_t BUCKET_INDEX_MASK
Entry point for Barretenberg command-line interface.
TYPED_TEST_SUITE(CommitmentKeyTest, Curves)
::testing::Types< curve::BN254, curve::Grumpkin > CurveTypes
TYPED_TEST(CommitmentKeyTest, CommitToZeroPoly)
TEST(BoomerangMegaCircuitBuilder, BasicCircuit)
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Scratch space for batched affine point additions (one per thread)
Affine bucket accumulators for the fast affine-trick Pippenger variant.
std::vector< AffineElement > buckets