Barretenberg: src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.test.cpp Source File

#include "scalar_multiplication.hpp"

#include "barretenberg/api/file_io.hpp"

#include "barretenberg/common/thread.hpp"

#include "barretenberg/ecc/curves/bn254/bn254.hpp"

#include "barretenberg/ecc/curves/grumpkin/grumpkin.hpp"

#include "barretenberg/ecc/curves/types.hpp"

#include "barretenberg/numeric/random/engine.hpp"

#include "barretenberg/polynomials/polynomial.hpp"

#include "barretenberg/srs/factories/mem_bn254_crs_factory.hpp"

#include <filesystem>

#include <gtest/gtest.h>


using namespace bb;


namespace {

auto& engine = numeric::get_randomness();

} // namespace


template <class Curve> class ScalarMultiplicationTest : public ::testing::Test {

  public:

    using Group = typename Curve::Group;

    using Element = typename Curve::Element;

    using AffineElement = typename Curve::AffineElement;

    using ScalarField = typename Curve::ScalarField;


    static constexpr size_t num_points = 201123;

    static inline std::vector<AffineElement> generators{};

    static inline std::vector<ScalarField> scalars{};


    static AffineElement naive_msm(std::span<ScalarField> input_scalars, std::span<const AffineElement> input_points)

    {

        size_t total_points = input_scalars.size();

        size_t num_threads = get_num_cpus();

        std::vector<Element> expected_accs(num_threads);

        size_t range_per_thread = (total_points + num_threads - 1) / num_threads;

        parallel_for(num_threads, [&](size_t thread_idx) {

            Element expected_thread_acc;

            expected_thread_acc.self_set_infinity();

            size_t start = thread_idx * range_per_thread;

            size_t end = ((thread_idx + 1) * range_per_thread > total_points) ? total_points

                                                                              : (thread_idx + 1) * range_per_thread;

            bool skip = start >= total_points;

            if (!skip) {

                for (size_t i = start; i < end; ++i) {

                    expected_thread_acc += input_points[i] * input_scalars[i];

                }

            }

            expected_accs[thread_idx] = expected_thread_acc;

        });


        Element expected_acc = Element();

        expected_acc.self_set_infinity();

        for (auto& acc : expected_accs) {

            expected_acc += acc;

        }

        return AffineElement(expected_acc);

    }


    static void SetUpTestSuite()

    {

        generators.resize(num_points);

        scalars.resize(num_points);

        parallel_for_range(num_points, [&](size_t start, size_t end) {

            for (size_t i = start; i < end; ++i) {

                generators[i] = Group::one * Curve::ScalarField::random_element(&engine);

                scalars[i] = Curve::ScalarField::random_element(&engine);

            }

        });

        for (size_t i = 0; i < num_points - 1; ++i) {

            ASSERT_EQ(generators[i].x == generators[i + 1].x, false);

        }

    };


    // ======================= Test Methods =======================


    void test_get_scalar_slice()

    {

        constexpr uint32_t fr_size = 254;

        constexpr uint32_t slice_bits = 7;

        constexpr uint32_t num_slices = (fr_size + 6) / 7;

        constexpr uint32_t last_slice_bits = fr_size - ((num_slices - 1) * slice_bits);


        for (size_t x = 0; x < 100; ++x) {

            uint256_t input_u256 = engine.get_random_uint256();

            input_u256.data[3] = input_u256.data[3] & 0x3FFFFFFFFFFFFFFF; // 254 bits

            while (input_u256 > ScalarField::modulus) {

                input_u256 -= ScalarField::modulus;

            }

            std::vector<uint32_t> slices(num_slices);


            uint256_t acc = input_u256;

            for (uint32_t i = 0; i < num_slices; ++i) {

                uint32_t mask = ((1U << slice_bits) - 1U);

                uint32_t shift = slice_bits;

                if (i == 0) {

                    mask = ((1U << last_slice_bits) - 1U);

                    shift = last_slice_bits;

                }

                slices[num_slices - 1 - i] = static_cast<uint32_t>((acc & mask).data[0]);

                acc = acc >> shift;

            }


            ScalarField input(input_u256);

            input.self_from_montgomery_form_reduced();


            ASSERT_EQ(input.data[0], input_u256.data[0]);

            ASSERT_EQ(input.data[1], input_u256.data[1]);

            ASSERT_EQ(input.data[2], input_u256.data[2]);

            ASSERT_EQ(input.data[3], input_u256.data[3]);


            for (uint32_t i = 0; i < num_slices; ++i) {

                uint32_t result = scalar_multiplication::MSM<Curve>::get_scalar_slice(input, i, slice_bits);

                EXPECT_EQ(result, slices[i]);

            }

        }

    }


    void test_consume_point_batch()

    {

        const size_t total_points = 30071;

        const size_t num_buckets = 128;


        std::vector<uint64_t> input_point_schedule;

        for (size_t i = 0; i < total_points; ++i) {

            uint64_t bucket = static_cast<uint64_t>(engine.get_random_uint8()) & 0x7f;

            uint64_t schedule = static_cast<uint64_t>(bucket) + (static_cast<uint64_t>(i) << 32);

            input_point_schedule.push_back(schedule);

        }

        typename scalar_multiplication::MSM<Curve>::AffineAdditionData affine_data;

        typename scalar_multiplication::MSM<Curve>::BucketAccumulators bucket_data(num_buckets);

        scalar_multiplication::MSM<Curve>::batch_accumulate_points_into_buckets(

            input_point_schedule, generators, affine_data, bucket_data);


        std::vector<Element> expected_buckets(num_buckets);

        for (auto& e : expected_buckets) {

            e.self_set_infinity();

        }

        for (size_t i = 0; i < total_points; ++i) {

            uint64_t bucket = input_point_schedule[i] & 0xFFFFFFFF;

            EXPECT_LT(static_cast<size_t>(bucket), num_buckets);

            expected_buckets[static_cast<size_t>(bucket)] += generators[i];

        }

        for (size_t i = 0; i < num_buckets; ++i) {

            if (!expected_buckets[i].is_point_at_infinity()) {

                AffineElement expected(expected_buckets[i]);

                EXPECT_EQ(expected, bucket_data.buckets[i]);

            } else {

                EXPECT_FALSE(bucket_data.bucket_exists.get(i));

            }

        }

    }


    void test_consume_point_batch_and_accumulate()

    {

        const size_t total_points = 30071;

        const size_t num_buckets = 128;


        std::vector<uint64_t> input_point_schedule;

        for (size_t i = 0; i < total_points; ++i) {

            uint64_t bucket = static_cast<uint64_t>(engine.get_random_uint8()) & 0x7f;

            uint64_t schedule = static_cast<uint64_t>(bucket) + (static_cast<uint64_t>(i) << 32);

            input_point_schedule.push_back(schedule);

        }

        typename scalar_multiplication::MSM<Curve>::AffineAdditionData affine_data;

        typename scalar_multiplication::MSM<Curve>::BucketAccumulators bucket_data(num_buckets);

        scalar_multiplication::MSM<Curve>::batch_accumulate_points_into_buckets(

            input_point_schedule, generators, affine_data, bucket_data);


        Element result = scalar_multiplication::MSM<Curve>::accumulate_buckets(bucket_data);


        Element expected_acc;

        expected_acc.self_set_infinity();

        size_t num_threads = get_num_cpus();

        std::vector<Element> expected_accs(num_threads);

        size_t range_per_thread = (total_points + num_threads - 1) / num_threads;

        parallel_for(num_threads, [&](size_t thread_idx) {

            Element expected_thread_acc;

            expected_thread_acc.self_set_infinity();

            size_t start = thread_idx * range_per_thread;

            size_t end = (thread_idx == num_threads - 1) ? total_points : (thread_idx + 1) * range_per_thread;

            bool skip = start >= total_points;

            if (!skip) {

                for (size_t i = start; i < end; ++i) {

                    ScalarField scalar = input_point_schedule[i] & 0xFFFFFFFF;

                    expected_thread_acc += generators[i] * scalar;

                }

            }

            expected_accs[thread_idx] = expected_thread_acc;

        });


        for (size_t i = 0; i < num_threads; ++i) {

            expected_acc += expected_accs[i];

        }

        AffineElement expected(expected_acc);

        EXPECT_EQ(AffineElement(result), expected);

    }


    void test_radix_sort_count_zero_entries()

    {

        const size_t total_points = 30071;


        std::vector<uint64_t> input_point_schedule;

        for (size_t i = 0; i < total_points; ++i) {

            uint64_t bucket = static_cast<uint64_t>(engine.get_random_uint8()) & 0x7f;

            uint64_t schedule = static_cast<uint64_t>(bucket) + (static_cast<uint64_t>(i) << 32);

            input_point_schedule.push_back(schedule);

        }


        size_t result = scalar_multiplication::sort_point_schedule_and_count_zero_buckets(

            &input_point_schedule[0], input_point_schedule.size(), 7);


        // Verify zero entry count is correct

        size_t expected = 0;

        for (size_t i = 0; i < total_points; ++i) {

            expected += static_cast<size_t>((input_point_schedule[i] & 0xFFFFFFFF) == 0);

        }

        EXPECT_EQ(result, expected);


        // Verify the array is sorted by bucket index (lower 32 bits)

        for (size_t i = 1; i < total_points; ++i) {

            uint32_t prev_bucket = static_cast<uint32_t>(input_point_schedule[i - 1]);

            uint32_t curr_bucket = static_cast<uint32_t>(input_point_schedule[i]);

            EXPECT_LE(prev_bucket, curr_bucket) << "Array not sorted at index " << i;

        }

    }


    // Regression test: radix sort zero-counting bug for bucket_index_bits > 16 (3+ recursion levels).

    // The recursive call passes `keys` instead of `top_level_keys`, causing num_zero_entries to be

    // overwritten by non-zero-bucket counts when the MSD radix sort recurses 3+ levels deep.


    void test_radix_sort_count_zero_entries_wide_buckets()

    {

        // Use bucket_index_bits = 17, which pads to 24 bits → 3 recursion levels (shift: 16→8→0).

        // At the 3rd level, the top_level_keys bug causes zero-counting to fire for every

        // level-0 bucket's sub-bucket-0, not just the bucket-0 chain.

        constexpr uint32_t bucket_index_bits = 17;

        constexpr size_t num_entries = 1000;


        std::vector<uint64_t> schedule(num_entries);


        // Place some entries with bucket_index = 0 (true zero-bucket entries)

        const size_t num_true_zeros = 10;

        for (size_t i = 0; i < num_true_zeros; ++i) {

            schedule[i] = static_cast<uint64_t>(i) << 32; // point_index=i, bucket_index=0

        }


        // Place entries with bucket_index = 65536 (= 1 << 16). These have bits [0:16) all zero,

        // so the buggy code counts them as zero-bucket entries after the final recursion level

        // overwrites num_zero_entries from the level-0 bucket 1 path.

        const size_t num_false_zeros = 20;

        for (size_t i = 0; i < num_false_zeros; ++i) {

            size_t idx = num_true_zeros + i;

            schedule[idx] = (static_cast<uint64_t>(idx) << 32) | 65536ULL;

        }


        // Fill remaining entries with random non-zero bucket indices that won't confuse the count

        for (size_t i = num_true_zeros + num_false_zeros; i < num_entries; ++i) {

            uint32_t bucket = (engine.get_random_uint32() % ((1U << bucket_index_bits) - 1)) + 1;

            // Avoid bucket_index values with all lower 16 bits zero (i.e., multiples of 65536)

            if ((bucket & 0xFFFF) == 0) {

                bucket |= 1;

            }

            schedule[i] = (static_cast<uint64_t>(i) << 32) | static_cast<uint64_t>(bucket);

        }


        size_t result = scalar_multiplication::sort_point_schedule_and_count_zero_buckets(

            schedule.data(), num_entries, bucket_index_bits);


        // Count actual zero-bucket entries after sort

        size_t expected = 0;

        for (size_t i = 0; i < num_entries; ++i) {

            if ((schedule[i] & scalar_multiplication::BUCKET_INDEX_MASK) == 0) {

                expected++;

            }

        }


        EXPECT_EQ(result, expected) << "Zero-bucket count is wrong for bucket_index_bits=" << bucket_index_bits

                                    << ". Got " << result << ", expected " << expected

                                    << " (likely overwritten by count from a non-zero bucket)";


        // Also verify the array is sorted

        for (size_t i = 1; i < num_entries; ++i) {

            uint32_t prev = static_cast<uint32_t>(schedule[i - 1]);

            uint32_t curr = static_cast<uint32_t>(schedule[i]);

            EXPECT_LE(prev, curr) << "Array not sorted at index " << i;

        }

    }


    void test_pippenger_low_memory()

    {

        std::span<ScalarField> test_scalars(&scalars[0], num_points);

        AffineElement result =

            scalar_multiplication::MSM<Curve>::msm(generators, PolynomialSpan<ScalarField>(0, test_scalars));

        AffineElement expected = naive_msm(test_scalars, generators);

        EXPECT_EQ(result, expected);

    }


    void test_batch_multi_scalar_mul()

    {

        BB_BENCH_NAME("BatchMultiScalarMul");


        const size_t num_msms = static_cast<size_t>(engine.get_random_uint8());

        std::vector<AffineElement> expected(num_msms);


        std::vector<std::vector<ScalarField>> batch_scalars_copies(num_msms);

        std::vector<std::span<const AffineElement>> batch_points_span;

        std::vector<std::span<ScalarField>> batch_scalars_spans;


        size_t vector_offset = 0;

        for (size_t k = 0; k < num_msms; ++k) {

            const size_t num_pts = static_cast<size_t>(engine.get_random_uint16()) % 400;


            ASSERT_LT(vector_offset + num_pts, num_points);

            std::span<const AffineElement> batch_points(&generators[vector_offset], num_pts);


            batch_scalars_copies[k].resize(num_pts);

            for (size_t i = 0; i < num_pts; ++i) {

                batch_scalars_copies[k][i] = scalars[vector_offset + i];

            }


            vector_offset += num_pts;

            batch_points_span.push_back(batch_points);

            batch_scalars_spans.push_back(batch_scalars_copies[k]);


            expected[k] = naive_msm(batch_scalars_spans[k], batch_points_span[k]);

        }


        std::vector<AffineElement> result =

            scalar_multiplication::MSM<Curve>::batch_multi_scalar_mul(batch_points_span, batch_scalars_spans);


        EXPECT_EQ(result, expected);

    }


    void test_batch_multi_scalar_mul_sparse()

    {

        const size_t num_msms = 10;

        std::vector<AffineElement> expected(num_msms);


        std::vector<std::vector<ScalarField>> batch_scalars(num_msms);

        std::vector<std::span<const AffineElement>> batch_points_span;

        std::vector<std::span<ScalarField>> batch_scalars_spans;


        for (size_t k = 0; k < num_msms; ++k) {

            const size_t num_pts = 33;

            auto& test_scalars = batch_scalars[k];


            test_scalars.resize(num_pts);


            size_t fixture_offset = k * num_pts;


            std::span<AffineElement> batch_points(&generators[fixture_offset], num_pts);

            for (size_t i = 0; i < 13; ++i) {

                test_scalars[i] = 0;

            }

            for (size_t i = 13; i < 23; ++i) {

                test_scalars[i] = scalars[fixture_offset + i + 13];

            }

            for (size_t i = 23; i < num_pts; ++i) {

                test_scalars[i] = 0;

            }

            batch_points_span.push_back(batch_points);

            batch_scalars_spans.push_back(batch_scalars[k]);


            expected[k] = naive_msm(batch_scalars[k], batch_points);

        }


        std::vector<AffineElement> result =

            scalar_multiplication::MSM<Curve>::batch_multi_scalar_mul(batch_points_span, batch_scalars_spans);


        EXPECT_EQ(result, expected);

    }


    void test_msm()

    {

        const size_t start_index = 1234;

        const size_t num_pts = num_points - start_index;


        PolynomialSpan<ScalarField> scalar_span =

            PolynomialSpan<ScalarField>(start_index, std::span<ScalarField>(&scalars[0], num_pts));

        AffineElement result = scalar_multiplication::MSM<Curve>::msm(generators, scalar_span);


        std::span<AffineElement> points(&generators[start_index], num_pts);

        AffineElement expected = naive_msm(scalar_span.span, points);

        EXPECT_EQ(result, expected);

    }


    void test_msm_all_zeroes()

    {

        const size_t start_index = 1234;

        const size_t num_pts = num_points - start_index;

        std::vector<ScalarField> test_scalars(num_pts, ScalarField::zero());


        PolynomialSpan<ScalarField> scalar_span = PolynomialSpan<ScalarField>(start_index, test_scalars);

        AffineElement result = scalar_multiplication::MSM<Curve>::msm(generators, scalar_span);


        EXPECT_EQ(result, Group::affine_point_at_infinity);

    }


    void test_msm_empty_polynomial()

    {

        std::vector<ScalarField> test_scalars;

        std::vector<AffineElement> input_points;

        PolynomialSpan<ScalarField> scalar_span = PolynomialSpan<ScalarField>(0, test_scalars);

        AffineElement result = scalar_multiplication::MSM<Curve>::msm(input_points, scalar_span);


        EXPECT_EQ(result, Group::affine_point_at_infinity);

    }


    void test_scalars_unchanged_after_msm()

    {

        const size_t num_pts = 100;

        std::vector<ScalarField> test_scalars(num_pts);

        std::vector<ScalarField> scalars_copy(num_pts);


        for (size_t i = 0; i < num_pts; ++i) {

            test_scalars[i] = scalars[i];

            scalars_copy[i] = test_scalars[i];

        }


        std::span<const AffineElement> points(&generators[0], num_pts);

        PolynomialSpan<ScalarField> scalar_span(0, test_scalars);


        scalar_multiplication::MSM<Curve>::msm(points, scalar_span);


        for (size_t i = 0; i < num_pts; ++i) {

            EXPECT_EQ(test_scalars[i], scalars_copy[i]) << "Scalar at index " << i << " was modified";

        }

    }


    void test_scalars_unchanged_after_batch_multi_scalar_mul()

    {

        const size_t num_msms = 3;

        const size_t num_pts = 100;


        std::vector<std::vector<ScalarField>> batch_scalars(num_msms);

        std::vector<std::vector<ScalarField>> scalars_copies(num_msms);

        std::vector<std::span<const AffineElement>> batch_points;

        std::vector<std::span<ScalarField>> batch_scalar_spans;


        for (size_t k = 0; k < num_msms; ++k) {

            batch_scalars[k].resize(num_pts);

            scalars_copies[k].resize(num_pts);


            for (size_t i = 0; i < num_pts; ++i) {

                batch_scalars[k][i] = scalars[k * num_pts + i];

                scalars_copies[k][i] = batch_scalars[k][i];

            }


            batch_points.push_back(std::span<const AffineElement>(&generators[k * num_pts], num_pts));

            batch_scalar_spans.push_back(batch_scalars[k]);

        }


        scalar_multiplication::MSM<Curve>::batch_multi_scalar_mul(batch_points, batch_scalar_spans);


        for (size_t k = 0; k < num_msms; ++k) {

            for (size_t i = 0; i < num_pts; ++i) {

                EXPECT_EQ(batch_scalars[k][i], scalars_copies[k][i])

                    << "Scalar at MSM " << k << ", index " << i << " was modified";

            }

        }

    }


    void test_scalar_one()

    {

        const size_t num_pts = 5;

        std::vector<ScalarField> test_scalars(num_pts, ScalarField::one());

        std::span<const AffineElement> points(&generators[0], num_pts);


        PolynomialSpan<ScalarField> scalar_span(0, test_scalars);

        AffineElement result = scalar_multiplication::MSM<Curve>::msm(points, scalar_span);


        Element expected;

        expected.self_set_infinity();

        for (size_t i = 0; i < num_pts; ++i) {

            expected += points[i];

        }


        EXPECT_EQ(result, AffineElement(expected));

    }


    void test_scalar_minus_one()

    {

        const size_t num_pts = 5;

        std::vector<ScalarField> test_scalars(num_pts, -ScalarField::one());

        std::span<const AffineElement> points(&generators[0], num_pts);


        PolynomialSpan<ScalarField> scalar_span(0, test_scalars);

        AffineElement result = scalar_multiplication::MSM<Curve>::msm(points, scalar_span);


        Element expected;

        expected.self_set_infinity();

        for (size_t i = 0; i < num_pts; ++i) {

            expected -= points[i];

        }


        EXPECT_EQ(result, AffineElement(expected));

    }


    void test_single_point()

    {

        std::vector<ScalarField> test_scalars = { scalars[0] };

        std::span<const AffineElement> points(&generators[0], 1);


        PolynomialSpan<ScalarField> scalar_span(0, test_scalars);

        AffineElement result = scalar_multiplication::MSM<Curve>::msm(points, scalar_span);


        AffineElement expected(points[0] * test_scalars[0]);

        EXPECT_EQ(result, expected);

    }


    void test_size_thresholds()

    {

        std::vector<size_t> test_sizes = { 1, 2, 15, 16, 17, 50, 127, 128, 129, 256, 512 };


        for (size_t num_pts : test_sizes) {

            ASSERT_LE(num_pts, num_points);


            std::vector<ScalarField> test_scalars(num_pts);

            for (size_t i = 0; i < num_pts; ++i) {

                test_scalars[i] = scalars[i];

            }


            std::span<const AffineElement> points(&generators[0], num_pts);

            PolynomialSpan<ScalarField> scalar_span(0, test_scalars);


            AffineElement result = scalar_multiplication::MSM<Curve>::msm(points, scalar_span);

            AffineElement expected = naive_msm(test_scalars, points);


            EXPECT_EQ(result, expected) << "Failed for size " << num_pts;

        }

    }


    void test_duplicate_points()

    {

        // Use enough points to trigger Pippenger (> PIPPENGER_THRESHOLD = 16)

        const size_t num_pts = 32;

        AffineElement base_point = generators[0];


        std::vector<AffineElement> points(num_pts, base_point);

        std::vector<ScalarField> test_scalars(num_pts);

        ScalarField scalar_sum = ScalarField::zero();


        for (size_t i = 0; i < num_pts; ++i) {

            test_scalars[i] = scalars[i];

            scalar_sum += test_scalars[i];

        }


        PolynomialSpan<ScalarField> scalar_span(0, test_scalars);

        // Duplicate points are an edge case (P + P requires doubling, not addition).

        // Must use handle_edge_cases=true for correctness with Pippenger.

        AffineElement result = scalar_multiplication::MSM<Curve>::msm(points, scalar_span, /*handle_edge_cases=*/true);


        AffineElement expected(base_point * scalar_sum);

        EXPECT_EQ(result, expected);

    }


    void test_mixed_zero_scalars()

    {

        const size_t num_pts = 100;

        std::vector<ScalarField> test_scalars(num_pts);

        Element expected;

        expected.self_set_infinity();


        for (size_t i = 0; i < num_pts; ++i) {

            if (i % 2 == 0) {

                test_scalars[i] = ScalarField::zero();

            } else {

                test_scalars[i] = scalars[i];

                expected += generators[i] * test_scalars[i];

            }

        }


        std::span<const AffineElement> points(&generators[0], num_pts);

        PolynomialSpan<ScalarField> scalar_span(0, test_scalars);


        AffineElement result = scalar_multiplication::MSM<Curve>::msm(points, scalar_span);

        EXPECT_EQ(result, AffineElement(expected));

    }


    void test_pippenger_free_function()

    {

        const size_t num_pts = 200;

        std::vector<ScalarField> test_scalars(num_pts);

        for (size_t i = 0; i < num_pts; ++i) {

            test_scalars[i] = scalars[i];

        }


        std::span<const AffineElement> points(&generators[0], num_pts);

        PolynomialSpan<ScalarField> scalar_span(0, test_scalars);


        auto result = scalar_multiplication::pippenger<Curve>(scalar_span, points);


        AffineElement expected = naive_msm(test_scalars, points);

        EXPECT_EQ(AffineElement(result), expected);

    }


    void test_pippenger_unsafe_free_function()

    {

        const size_t num_pts = 200;

        std::vector<ScalarField> test_scalars(num_pts);

        for (size_t i = 0; i < num_pts; ++i) {

            test_scalars[i] = scalars[i];

        }


        std::span<const AffineElement> points(&generators[0], num_pts);

        PolynomialSpan<ScalarField> scalar_span(0, test_scalars);


        auto result = scalar_multiplication::pippenger_unsafe<Curve>(scalar_span, points);


        AffineElement expected = naive_msm(test_scalars, points);

        EXPECT_EQ(AffineElement(result), expected);

    }


};


using CurveTypes = ::testing::Types<bb::curve::BN254, bb::curve::Grumpkin>;

TYPED_TEST_SUITE(ScalarMultiplicationTest, CurveTypes);


// ======================= Test Wrappers =======================


TYPED_TEST(ScalarMultiplicationTest, GetScalarSlice)

{

    this->test_get_scalar_slice();

}


TYPED_TEST(ScalarMultiplicationTest, ConsumePointBatch)

{

    this->test_consume_point_batch();

}


TYPED_TEST(ScalarMultiplicationTest, ConsumePointBatchAndAccumulate)

{

    this->test_consume_point_batch_and_accumulate();

}


TYPED_TEST(ScalarMultiplicationTest, RadixSortCountZeroEntries)

{

    this->test_radix_sort_count_zero_entries();

}


TYPED_TEST(ScalarMultiplicationTest, RadixSortCountZeroEntriesWideBuckets)

{

    this->test_radix_sort_count_zero_entries_wide_buckets();

}


TYPED_TEST(ScalarMultiplicationTest, PippengerLowMemory)

{

    this->test_pippenger_low_memory();

}


TYPED_TEST(ScalarMultiplicationTest, BatchMultiScalarMul)

{

    this->test_batch_multi_scalar_mul();

}


TYPED_TEST(ScalarMultiplicationTest, BatchMultiScalarMulSparse)

{

    this->test_batch_multi_scalar_mul_sparse();

}


TYPED_TEST(ScalarMultiplicationTest, MSM)

{

    this->test_msm();

}


TYPED_TEST(ScalarMultiplicationTest, MSMAllZeroes)

{

    this->test_msm_all_zeroes();

}


TYPED_TEST(ScalarMultiplicationTest, MSMEmptyPolynomial)

{

    this->test_msm_empty_polynomial();

}


TYPED_TEST(ScalarMultiplicationTest, ScalarsUnchangedAfterMSM)

{

    this->test_scalars_unchanged_after_msm();

}


TYPED_TEST(ScalarMultiplicationTest, ScalarsUnchangedAfterBatchMultiScalarMul)

{

    this->test_scalars_unchanged_after_batch_multi_scalar_mul();

}


TYPED_TEST(ScalarMultiplicationTest, ScalarOne)

{

    this->test_scalar_one();

}


TYPED_TEST(ScalarMultiplicationTest, ScalarMinusOne)

{

    this->test_scalar_minus_one();

}


TYPED_TEST(ScalarMultiplicationTest, SinglePoint)

{

    this->test_single_point();

}


TYPED_TEST(ScalarMultiplicationTest, SizeThresholds)

{

    this->test_size_thresholds();

}


TYPED_TEST(ScalarMultiplicationTest, DuplicatePoints)

{

    this->test_duplicate_points();

}


TYPED_TEST(ScalarMultiplicationTest, MixedZeroScalars)

{

    this->test_mixed_zero_scalars();

}


TYPED_TEST(ScalarMultiplicationTest, PippengerFreeFunction)

{

    this->test_pippenger_free_function();

}


TYPED_TEST(ScalarMultiplicationTest, PippengerUnsafeFreeFunction)

{

    this->test_pippenger_unsafe_free_function();

}


// Curve-independent unit tests for the work-unit partitioner.

// partition_by_weight is the load-bearing balancing logic in get_work_units; pinning its

// behavior with synthetic weights makes regressions in the partition algorithm visible

// without needing a full MSM run.

namespace {


using PartitionMSM = scalar_multiplication::MSM<curve::BN254>;

using WorkUnit = PartitionMSM::MSMWorkUnit;


// Total weight assigned to a thread (sum of WorkUnit sizes weighted by the input vector).

size_t thread_weight(const std::vector<WorkUnit>& units, const std::vector<std::vector<uint16_t>>& weights)

{

    size_t total = 0;

    for (const auto& u : units) {

        for (size_t k = 0; k < u.size; ++k) {

            total += weights[u.batch_msm_index][u.start_index + k];

        }

    }

    return total;

}


} // namespace


TEST(PartitionByWeight, NoMsmsReturnsEmptyThreads)

{

    auto units = PartitionMSM::partition_by_weight({}, 8);

    ASSERT_EQ(units.size(), 8U);

    for (const auto& t : units) {

        EXPECT_TRUE(t.empty());

    }

}


TEST(PartitionByWeight, AllEmptyMsmsReturnsEmptyThreads)

{

    std::vector<std::vector<uint16_t>> weights{ {}, {}, {} };

    auto units = PartitionMSM::partition_by_weight(weights, 4);

    ASSERT_EQ(units.size(), 4U);

    for (const auto& t : units) {

        EXPECT_TRUE(t.empty());

    }

}


TEST(PartitionByWeight, SingleThreadGetsEverything)

{

    std::vector<std::vector<uint16_t>> weights{ { 5, 5, 5, 5, 5 } };

    auto units = PartitionMSM::partition_by_weight(weights, 1);

    ASSERT_EQ(units.size(), 1U);

    ASSERT_EQ(units[0].size(), 1U);

    EXPECT_EQ(units[0][0].batch_msm_index, 0U);

    EXPECT_EQ(units[0][0].start_index, 0U);

    EXPECT_EQ(units[0][0].size, 5U);

}


TEST(PartitionByWeight, EvenSplitAcrossThreads)

{

    // 8 weights of 5 => total 40, target 10 per thread (4 threads), so 2 weights per thread.

    std::vector<std::vector<uint16_t>> weights{ { 5, 5, 5, 5, 5, 5, 5, 5 } };

    auto units = PartitionMSM::partition_by_weight(weights, 4);

    ASSERT_EQ(units.size(), 4U);

    for (size_t t = 0; t < 4; ++t) {

        ASSERT_EQ(units[t].size(), 1U) << "thread " << t;

        EXPECT_EQ(units[t][0].size, 2U) << "thread " << t;

        EXPECT_EQ(thread_weight(units[t], weights), 10U) << "thread " << t;

    }

}


TEST(PartitionByWeight, HeavyFirstWeightClosesFirstThreadEarly)

{

    // First weight alone exceeds the per-thread target; remainder is evenly split.

    std::vector<std::vector<uint16_t>> weights{ { 100, 5, 5, 5, 5 } };

    auto units = PartitionMSM::partition_by_weight(weights, 4);

    ASSERT_EQ(units.size(), 4U);

    // Thread 0 should close after the heavy weight.

    ASSERT_FALSE(units[0].empty());

    EXPECT_EQ(units[0][0].start_index, 0U);

    EXPECT_EQ(units[0][0].size, 1U);

    // Total assigned across all threads must equal n.

    size_t total_assigned = 0;

    for (const auto& t : units) {

        for (const auto& u : t) {

            total_assigned += u.size;

        }

    }

    EXPECT_EQ(total_assigned, 5U);

}


TEST(PartitionByWeight, BoundaryStraddlesMsm)

{

    // Two MSMs of 4 weights of 5 each => total 40, 4 threads, target 10.

    // Boundary should land mid-MSM if weights cross between MSMs.

    std::vector<std::vector<uint16_t>> weights{ { 5, 5, 5, 5 }, { 5, 5, 5, 5 } };

    auto units = PartitionMSM::partition_by_weight(weights, 4);

    ASSERT_EQ(units.size(), 4U);

    size_t total_assigned = 0;

    for (const auto& t : units) {

        for (const auto& u : t) {

            total_assigned += u.size;

        }

    }

    EXPECT_EQ(total_assigned, 8U);

    // Each thread should carry exactly weight 10.

    for (size_t t = 0; t < 4; ++t) {

        EXPECT_EQ(thread_weight(units[t], weights), 10U) << "thread " << t;

    }

}


TEST(PartitionByWeight, LastThreadAbsorbsRemainder)

{

    // weights {7,7,1}, num_threads=3 => total 15, target = ceil(15/3) = 5.

    // Walk: T0 closes after weight 7, T1 closes after weight 7, then weight 1 trails.

    // Without the "current_thread_idx < num_threads - 1" guard the partitioner would

    // refuse to close T2 (running weight 1 < target 5) and the trailing weight would

    // be lost. The guard makes T2 absorb it via the post-loop push.

    std::vector<std::vector<uint16_t>> weights{ { 7, 7, 1 } };

    auto units = PartitionMSM::partition_by_weight(weights, 3);

    ASSERT_EQ(units.size(), 3U);

    size_t total_assigned = 0;

    for (const auto& t : units) {

        for (const auto& u : t) {

            total_assigned += u.size;

        }

    }

    EXPECT_EQ(total_assigned, 3U);

    ASSERT_EQ(units[2].size(), 1U);

    EXPECT_EQ(units[2][0].start_index, 2U);

    EXPECT_EQ(units[2][0].size, 1U);

    EXPECT_EQ(thread_weight(units[2], weights), 1U);

}


TEST(PartitionByWeight, MoreThreadsThanScalars)

{

    // 3 weights of 5 => total 15, 8 threads, target ceil(15/8)=2.

    // Each weight (5) immediately crosses target => first 3 threads each get one scalar.

    std::vector<std::vector<uint16_t>> weights{ { 5, 5, 5 } };

    auto units = PartitionMSM::partition_by_weight(weights, 8);

    ASSERT_EQ(units.size(), 8U);

    for (size_t t = 0; t < 3; ++t) {

        ASSERT_EQ(units[t].size(), 1U) << "thread " << t;

        EXPECT_EQ(units[t][0].size, 1U);

    }

    for (size_t t = 3; t < 8; ++t) {

        EXPECT_TRUE(units[t].empty()) << "thread " << t;

    }

}


// Non-templated test for explicit small inputs


TEST(ScalarMultiplication, SmallInputsExplicit)

{

    uint256_t x0(0x68df84429941826a, 0xeb08934ed806781c, 0xc14b6a2e4f796a73, 0x08dc1a9a11a3c8db);

    uint256_t y0(0x8ae5c31aa997f141, 0xe85f20c504f2c11b, 0x81a94193f3b1ce2b, 0x26f2c37372adb5b7);

    uint256_t x1(0x80f5a592d919d32f, 0x1362652b984e51ca, 0xa0b26666f770c2a1, 0x142c6e1964e5c3c5);

    uint256_t y1(0xb6c322ebb5ae4bc5, 0xf9fef6c7909c00f8, 0xb37ca1cc9af3b421, 0x1e331c7fa73d6a59);

    uint256_t s0(0xe48bf12a24272e08, 0xf8dd0182577f3567, 0xec8fd222b8a6becb, 0x102d76b945612c9b);

    uint256_t s1(0x098ae8d69f1e4e9e, 0xb5c8313c0f6040ed, 0xf78041e30cc46c44, 0x1d1e6e0c21892e13);


    std::vector<grumpkin::fr> scalars{ s0, s1 };


    std::vector<grumpkin::g1::affine_element> points{ grumpkin::g1::affine_element(x0, y0),

                                                      grumpkin::g1::affine_element(x1, y1) };


    PolynomialSpan<grumpkin::fr> scalar_span = PolynomialSpan<grumpkin::fr>(0, scalars);


    auto result = scalar_multiplication::MSM<curve::Grumpkin>::msm(points, scalar_span);


    grumpkin::g1::element expected = (points[0] * scalars[0]) + (points[1] * scalars[1]);


    EXPECT_EQ(result, grumpkin::g1::affine_element(expected));

}


BB_BENCH_NAME
#define BB_BENCH_NAME(name)
Definition bb_bench.hpp:264

BitVector::get
BB_INLINE bool get(size_t index) const noexcept
Definition bitvector.hpp:44

ScalarMultiplicationTest
Definition scalar_multiplication.test.cpp:19

ScalarMultiplicationTest::test_pippenger_low_memory
void test_pippenger_low_memory()
Definition scalar_multiplication.test.cpp:288

ScalarMultiplicationTest::ScalarField
typename Curve::ScalarField ScalarField
Definition scalar_multiplication.test.cpp:24

ScalarMultiplicationTest::test_msm
void test_msm()
Definition scalar_multiplication.test.cpp:372

ScalarMultiplicationTest::generators
static std::vector< AffineElement > generators
Definition scalar_multiplication.test.cpp:27

ScalarMultiplicationTest::test_radix_sort_count_zero_entries_wide_buckets
void test_radix_sort_count_zero_entries_wide_buckets()
Definition scalar_multiplication.test.cpp:230

ScalarMultiplicationTest::test_mixed_zero_scalars
void test_mixed_zero_scalars()
Definition scalar_multiplication.test.cpp:556

ScalarMultiplicationTest::test_batch_multi_scalar_mul_sparse
void test_batch_multi_scalar_mul_sparse()
Definition scalar_multiplication.test.cpp:333

ScalarMultiplicationTest::test_duplicate_points
void test_duplicate_points()
Definition scalar_multiplication.test.cpp:532

ScalarMultiplicationTest::test_consume_point_batch
void test_consume_point_batch()
Definition scalar_multiplication.test.cpp:118

ScalarMultiplicationTest::test_single_point
void test_single_point()
Definition scalar_multiplication.test.cpp:498

ScalarMultiplicationTest::test_scalars_unchanged_after_batch_multi_scalar_mul
void test_scalars_unchanged_after_batch_multi_scalar_mul()
Definition scalar_multiplication.test.cpp:429

ScalarMultiplicationTest::test_msm_all_zeroes
void test_msm_all_zeroes()
Definition scalar_multiplication.test.cpp:386

ScalarMultiplicationTest::test_pippenger_unsafe_free_function
void test_pippenger_unsafe_free_function()
Definition scalar_multiplication.test.cpp:596

ScalarMultiplicationTest::test_batch_multi_scalar_mul
void test_batch_multi_scalar_mul()
Definition scalar_multiplication.test.cpp:297

ScalarMultiplicationTest::num_points
static constexpr size_t num_points
Definition scalar_multiplication.test.cpp:26

ScalarMultiplicationTest::SetUpTestSuite
static void SetUpTestSuite()
Definition scalar_multiplication.test.cpp:59

ScalarMultiplicationTest::test_scalar_minus_one
void test_scalar_minus_one()
Definition scalar_multiplication.test.cpp:480

ScalarMultiplicationTest::Element
typename Curve::Element Element
Definition scalar_multiplication.test.cpp:22

ScalarMultiplicationTest::test_consume_point_batch_and_accumulate
void test_consume_point_batch_and_accumulate()
Definition scalar_multiplication.test.cpp:153

ScalarMultiplicationTest::test_msm_empty_polynomial
void test_msm_empty_polynomial()
Definition scalar_multiplication.test.cpp:398

ScalarMultiplicationTest::test_pippenger_free_function
void test_pippenger_free_function()
Definition scalar_multiplication.test.cpp:579

ScalarMultiplicationTest::Group
typename Curve::Group Group
Definition scalar_multiplication.test.cpp:21

ScalarMultiplicationTest::test_get_scalar_slice
void test_get_scalar_slice()
Definition scalar_multiplication.test.cpp:76

ScalarMultiplicationTest::test_scalars_unchanged_after_msm
void test_scalars_unchanged_after_msm()
Definition scalar_multiplication.test.cpp:408

ScalarMultiplicationTest::scalars
static std::vector< ScalarField > scalars
Definition scalar_multiplication.test.cpp:28

ScalarMultiplicationTest::test_scalar_one
void test_scalar_one()
Definition scalar_multiplication.test.cpp:462

ScalarMultiplicationTest::AffineElement
typename Curve::AffineElement AffineElement
Definition scalar_multiplication.test.cpp:23

ScalarMultiplicationTest::naive_msm
static AffineElement naive_msm(std::span< ScalarField > input_scalars, std::span< const AffineElement > input_points)
Definition scalar_multiplication.test.cpp:30

ScalarMultiplicationTest::test_radix_sort_count_zero_entries
void test_radix_sort_count_zero_entries()
Definition scalar_multiplication.test.cpp:198

ScalarMultiplicationTest::test_size_thresholds
void test_size_thresholds()
Definition scalar_multiplication.test.cpp:510

bb::curve::Grumpkin::Element
typename Group::element Element
Definition grumpkin.hpp:63

bb::curve::Grumpkin::Group
typename grumpkin::g1 Group
Definition grumpkin.hpp:62

bb::curve::Grumpkin::AffineElement
typename Group::affine_element AffineElement
Definition grumpkin.hpp:64

bb::curve::Grumpkin::ScalarField
bb::fq ScalarField
Definition grumpkin.hpp:60

bb::group_elements::affine_element
Definition affine_element.hpp:27

bb::group_elements::element
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
Definition element.hpp:35

bb::group::affine_element
group_elements::affine_element< Fq, Fr, Params > affine_element
Definition group.hpp:44

bb::numeric::RNG::get_random_uint8
virtual uint8_t get_random_uint8()=0

bb::numeric::RNG::get_random_uint16
virtual uint16_t get_random_uint16()=0

bb::numeric::RNG::get_random_uint32
virtual uint32_t get_random_uint32()=0

bb::numeric::RNG::get_random_uint256
virtual uint256_t get_random_uint256()=0

bb::numeric::uint256_t
Definition uint256.hpp:32

bb::numeric::uint256_t::data
uint64_t data[4]
Definition uint256.hpp:219

bb::scalar_multiplication::MSM
Definition scalar_multiplication.hpp:19

bb::scalar_multiplication::MSM::accumulate_buckets
static Element accumulate_buckets(BucketType &bucket_accumulators) noexcept
Reduce buckets to single point using running (suffix) sum from high to low: R = sum(k * B_k)
Definition scalar_multiplication.hpp:258

bb::scalar_multiplication::MSM::get_scalar_slice
static uint32_t get_scalar_slice(const ScalarField &scalar, size_t round, size_t slice_size) noexcept
Extract c-bit slice from scalar for bucket index computation.
Definition scalar_multiplication.cpp:227

bb::scalar_multiplication::MSM::msm
static AffineElement msm(std::span< const AffineElement > points, PolynomialSpan< const ScalarField > scalars, bool handle_edge_cases=false) noexcept
Main entry point for single MSM computation.
Definition scalar_multiplication.cpp:576

bb::scalar_multiplication::MSM::batch_multi_scalar_mul
static std::vector< AffineElement > batch_multi_scalar_mul(std::span< std::span< const AffineElement > > points, std::span< std::span< ScalarField > > scalars, bool handle_edge_cases=true) noexcept
Compute multiple MSMs in parallel with work balancing.
Definition scalar_multiplication.cpp:497

bb::scalar_multiplication::MSM::batch_accumulate_points_into_buckets
static void batch_accumulate_points_into_buckets(std::span< const uint64_t > point_schedule, std::span< const AffineElement > points, AffineAdditionData &affine_data, BucketAccumulators &bucket_data) noexcept
Process sorted point schedule into bucket accumulators using batched affine additions.
Definition scalar_multiplication.cpp:407

data
const std::vector< MemoryValue > data
Definition data_copy.test.cpp:70

bn254.hpp

grumpkin.hpp

types.hpp

engine
numeric::RNG & engine
Definition eccvm_transcript.test.cpp:282

engine.hpp

file_io.hpp

mem_bn254_crs_factory.hpp

bb::numeric::get_randomness
RNG & get_randomness()
Definition engine.cpp:257

bb::scalar_multiplication::sort_point_schedule_and_count_zero_buckets
size_t sort_point_schedule_and_count_zero_buckets(uint64_t *point_schedule, const size_t num_entries, const uint32_t bucket_index_bits) noexcept
Sort point schedule by bucket index and count zero-bucket entries.
Definition process_buckets.cpp:83

bb::scalar_multiplication::BUCKET_INDEX_MASK
constexpr uint64_t BUCKET_INDEX_MASK
Definition process_buckets.hpp:18

bb
Entry point for Barretenberg command-line interface.
Definition api.hpp:5

bb::TYPED_TEST_SUITE
TYPED_TEST_SUITE(CommitmentKeyTest, Curves)

bb::get_num_cpus
size_t get_num_cpus()
Definition thread.cpp:33

bb::CurveTypes
::testing::Types< curve::BN254, curve::Grumpkin > CurveTypes
Definition shplonk.test.cpp:17

bb::TYPED_TEST
TYPED_TEST(CommitmentKeyTest, CommitToZeroPoly)
Definition commitment_key.test.cpp:217

bb::TEST
TEST(BoomerangMegaCircuitBuilder, BasicCircuit)
Definition graph_description_megacircuitbuilder.test.cpp:22

bb::parallel_for
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
Definition thread.cpp:111

bb::parallel_for_range
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
Definition thread.cpp:141

std::get
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13

polynomial.hpp

scalar_multiplication.hpp

bb::PolynomialSpan
Definition polynomial.hpp:27

bb::PolynomialSpan::span
std::span< Fr > span
Definition polynomial.hpp:29

bb::scalar_multiplication::MSM::AffineAdditionData
Scratch space for batched affine point additions (one per thread)
Definition scalar_multiplication.hpp:171

bb::scalar_multiplication::MSM::BucketAccumulators
Affine bucket accumulators for the fast affine-trick Pippenger variant.
Definition scalar_multiplication.hpp:142

bb::scalar_multiplication::MSM::BucketAccumulators::buckets
std::vector< AffineElement > buckets
Definition scalar_multiplication.hpp:143

bb::scalar_multiplication::MSM::BucketAccumulators::bucket_exists
BitVector bucket_exists
Definition scalar_multiplication.hpp:144

thread.hpp