如何获得随机数列表,具有固定的总和和大小 [英] How to get random number list, with fixed sum and size

查看:141
本文介绍了如何获得随机数列表,具有固定的总和和大小的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

如何通过给出大小来获得随机数列表,并且预期总和,完全支持

How to get random number list by give size, and expectd sum, fully support

我有一个代码 sum-int.ts sum-float.ts internal / sum-num.ts

我想做什么


  1. rN = radom number(float或int)min~max

  2. size = [r1,r2,r3,... rN] .length

  3. sum = r1 + r2 + r3 + ... rN

  4. 所有rN应> = min,< = max

  5. 支持(唯一/无唯一)值

  1. rN = radom number ( float or int ) between min ~ max
  2. size = [r1, r2, r3, ...rN].length
  3. sum = r1 + r2 + r3 + ...rN
  4. all rN should >= min, and <= max
  5. support (unique / no-unique) values


但现在有问题

but now there has problem


  • 无法做到当max< = 0 with float(因此我禁用输入)时工作

  • 无法使其工作时max< = 1 with int(所以我禁用输入)

  • 代码有逻辑错误,所以我来问这里如何通过js

thx @SeverinPappadeux for int版本,以及浮动的想法

thx @SeverinPappadeux for int version, and idea for float

  • coreFnRandSumInt int version
  • coreFnRandSumFloat float version
{ size: 2, sum: 5, min: 0, max: 5, n: 5, maxv: 5 }
true 0 5 [ 2, 3 ] 0 [ 2, 3 ]
{ bool: true,
  ret_a: [ 2, 3 ],
  a_sum: 5,
  ret_b: [ 2, 3 ],
  b_sum: 5 }
[ 2, 3 ] 5
----------
{ size: 6, sum: 13, min: -8, max: 15, n: 61, maxv: 23 }
false 0 61 [ 9, 8, 7, 3, 6, 28 ] -8 [ 9, 8, 7, 3, 6, 28 ]
false 1 61 [ 11, 9, 7, 4, 5, 25 ] -8 [ 11, 9, 7, 4, 5, 25 ]
true 2 13 [ 1, -1, 0, -2, 2, 13 ] -8 [ 9, 7, 8, 6, 10, 21 ]
{ bool: true,
  ret_a: [ 9, 7, 8, 6, 10, 21 ],
  a_sum: 61,
  ret_b: [ 1, -1, 0, -2, 2, 13 ],
  b_sum: 13 }
[ 1, -1, 0, -2, 2, 13 ] 13
----------
{ size: 6, sum: -13, min: -8, max: 15, n: 35, maxv: 23 }
true 0 -13 [ 0, -6, -1, -4, -7, 5 ] -8 [ 8, 2, 7, 4, 1, 13 ]
{ bool: true,
  ret_a: [ 8, 2, 7, 4, 1, 13 ],
  a_sum: 35,
  ret_b: [ 0, -6, -1, -4, -7, 5 ],
  b_sum: -13 }
[ 0, -6, -1, -4, -7, 5 ] -13
{ size: 6, sum: 0, min: -8, max: 15, n: 48, maxv: 23 }
true 0 0 [ -1, 0, -3, -2, -4, 10 ] -8 [ 7, 8, 5, 6, 4, 18 ]
{ bool: true,
  ret_a: [ 7, 8, 5, 6, 4, 18 ],
  a_sum: 48,
  ret_b: [ -1, 0, -3, -2, -4, 10 ],
  b_sum: 0 }
[ -1, 0, -3, -2, -4, 10 ] 0







/**
 * not support unique, but will try make unique if can
 * thx @SeverinPappadeux for int version
 *
 * @see https://stackoverflow.com/questions/53279807/how-to-get-random-number-list-with-fixed-sum-and-size
 */
export function coreFnRandSumInt(argv: ISumNumParameterWuthCache)
{
    let {
        random,
        size,
        sum,
        min,
        max,
    } = argv;

    let sum_1_to_size = sum_1_to_n(size);

    sum = isUnset(sum) ? sum_1_to_size : sum;

    expect(sum).integer();

    min = isUnset(min) ? (sum > 0 ? 0 : sum) : min;
    max = isUnset(max) ? Math.abs(sum) : max;

    expect(min).integer();
    expect(max).integer();

    let n_sum = Math.abs(sum - size * min);
    let maxv = max - min;

    /*
    console.log({
        sum_1_to_size,
        size,
        sum,
        min,
        max,
        n_sum,
        maxv,
    });
    */

    if (sum > 0)
    {
        expect(sum).gt(min)
    }

    /**
     * pre-check
     */
    //expect(maxv, `(max - min) should > sum_1_to_size`).gte(sum_1_to_size);

    /**
     * probabilities
     */
    let prob = get_prob(size, maxv);

    expect(prob).is.array.lengthOf(size);

    /**
     * make rmultinom use with random.next
     */
    let rmultinomFn = libRmath.Multinomial(fakeLibRmathRng(random.next)).rmultinom;

    /**
     * low value for speed up, but more chance fail
     */
    let n_len = argv.limit || 5 || n_sum;
    /**
     * rebase number
     */
    let n_diff: number = min;

    const rmultinomCreateFn = (n_len: number) => {
        return (rmultinomFn(n_len, n_sum, prob) as number[][])
            .reduce((a, value) =>
            {
                let i = value.length;
                let b_sum = 0;
                let bool = false;
                let unique_len = 0;

                while(i--)
                {
                    let v = value[i];
                    let n = v + n_diff;

                    if (value.indexOf(v) === i)
                    {
                        unique_len++;
                    }

                    if (n >= min && n <= max)
                    {
                        bool = true;
                        value[i] = n;

                        b_sum += n
                    }
                    else
                    {
                        bool = false;
                        break;
                    }
                }

                if (bool && b_sum === sum)
                {
                    let item = {
                        value,
                        unique_len,
                        b_sum,
                        bool,
                    };

                    a.push(item)
                }

                return a
            }, [] as {
                value: number[],
                unique_len: number,
                b_sum: number,
                bool: boolean,
            }[])
            .sort((a, b) => b.unique_len - a.unique_len)
            ;
    };

    /**
     * pre-make fail-back value
     */
    const cache_max = 10;
    let cache: number[][] = [];

    {
        let len = 200;

        let arr = array_unique(rmultinomCreateFn(len));

        if (arr.length)
        {
            let i = Math.min(cache_max, arr.length);

            while(i--)
            {
                cache.push(arr[i].value)
            }

            cache = array_unique(cache.map(v => v.sort()))
        }

        arr = undefined;

//      console.log(cache);
    }

    /**
     * try reset memory
     */
    argv = undefined;

    return () =>
    {
        let arr = rmultinomCreateFn(n_len);

        let ret_b: number[];
        let bool_toplevel: boolean;

        let c_len = cache.length;

        if (arr.length)
        {
            ret_b = arr[0].value;
            bool_toplevel = arr[0].bool;

            if (bool_toplevel && c_len < cache_max)
            {
                cache.push(ret_b);
            }
        }
        else if (c_len)
        {
            let i = UtilDistributions.randIndex(random, c_len);

            ret_b = cache[i];
            bool_toplevel = true;
        }

        if (!bool_toplevel || !ret_b)
        {
            throw new Error(`can't generator value by current input argv, or try set limit for high number`)
        }

        return ret_b;
    }
}






推荐答案

好的,这是。让我们从整数问题开始吧。最简单的方法是使用统计分布,这是一组自然产生的数字,总和为固定值。并且有这样的分发 - 多项分发。它具有等于 n 的固定总和,它提供从0到 n 的采样值。因为要求采样间隔是任意的,首先我们将间隔移到最小为0,然后将其移回。注意,采样值可能高于期望的最大值,因此我们必须使用拒绝技术,其中任何高于最大值的样本将被拒绝并且下一次尝试将被采样。

Ok, here it is. Let's start with integer problem. Simplest way to proceed is to use statistical distribution which is naturally produced set of numbers summed to a fixed value. And there is such distribution - Multinomial distribution. It has fixed sum equal to n, it provides sampled values from 0 to n. Because requirement is for sampling interval to be arbitrary, first we will shift interval to have minimum at 0, sample and then shift it back. Note, that sampled values might be higher than desired max, thus we have to use rejection technique, where any sample above max will be reject and next attempt will be sampled.

我们使用Python / Numpy的多项式采样。除了拒绝,您还可以添加唯一性测试。 Code,python 3.7

We use multinomial sampling from Python/Numpy. Along with rejection you could add test for uniqueness as well. Code, python 3.7

import numpy as np

def sample_sum_interval(n: int, p, maxv: int):
    while True:
        q = np.random.multinomial(n, p, size=1)[0]
        v = np.where(q > maxv)
        if len(v[0]) == 0: # if len(v) > 0, some values are outside the range, reject
            # test on unique if len(np.unique(q)) == len(q)
            return q
    return None

k = 6
min = -8
max = 13
sum = 13

n    = sum - k*min # redefined sum
maxv = max - min   # redefined max, min would be 0
p = np.full((k), np.float64(1.0)/np.float64(k), dtype=np.float64) # probabilities

q = sample_sum_interval(n, p, maxv) + min # back to original interval
print(q)
print(np.sum(q))
print(np.mean(q))

q = sample_sum_interval(n, p, maxv) + min
print(q)
print(np.sum(q))
print(np.mean(q))

q = sample_sum_interval(n, p, maxv) + min
print(q)
print(np.sum(q))
print(np.mean(q))

输出

[ 5  0 -2  3  3  4]
13
2.1666666666666665
[ 3  3 -1  2  1  5]   
13                    
2.1666666666666665    
[-4  0  0  3 10  4]   
13                    
2.1666666666666665    

为了将其翻译为Javascript,您需要无论是多项式抽样,还是二项式,二项式都很容易得到多项式。

In order to translate it to Javascript, you would need either multinomial sampling, or binomial one, from binomial it is easy to get to multinomial.

更新

ok ,当我没有将 min 添加到结果时输出,sum将是61(13 + 6 * 8),
范围[0 ... 21]

ok, here is output when I don't add min to the result, sum would be 61 (13+6*8), range [0...21]

[11  7  6  8  9 20]
61
10.166666666666666
[ 5 10 14 13 14  5]
61
10.166666666666666
[ 9 12  7 15  7 11]
61
10.166666666666666

显然,有一个带有多项式抽样的Javascript库,以 R 为模型,
谢谢@bluelovers

Apparently, there is a Javascript library with multinomial sampling, which is modeled after R, thank you @bluelovers

应该调用它在这样的循环中:

It should be called in the loop like this:

v = rmultinom(1, n, p);

然后 v 应检查为在[0 ... maxv]范围内,如果在外,则接受或拒绝。

and then v should be checked to be within range [0...maxv] and accepted or rejected if outside.

UPDATE II

UPDATE II

让我很快(对不起,真的没有时间,明天就会明白)描述了我如何为浮子做这件事。 [0 ... 1]范围内生成的一组数字有类似的分布,名为 Dirichlet发行,它总是总和为1的固定值。在Python / Numpy中,它是 https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.random.dirichlet.html

Let me quickly (sorry, really have no time, will get to it tomorrow) describethe idea how I would do it for floats. There is similar distribution with generated bunch of numbers in the [0...1] range called Dirichlet distribution, and it always sums to fixed value of 1. In Python/Numpy it is https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.random.dirichlet.html.

假设我从Dirichlet采样 n 数字d i ,然后将它们映射到[min ... max ] interval:

Suppose I sampled n numbers di from Dirichlet and then map them to [min...max] interval:

x i = min + d i *(max-min)

xi = min + di*(max-min)

然后我将它们全部加起来,使用所有d i 总和为1的属性:

Then I sum them all, using property that all di sums to 1:

Sum = n * min +(max - min)=(n-1)* min + max

Sum = n*min + (max - min) = (n-1)*min + max

如果 Sum 是固定,然后它意味着我们必须重新定义最大值 - 让我们称之为max s

If Sum is fixed, then it mean we have to redefine maximum value - lets call it sampling maxs.

所以取样专业cedure将如下 - 来自Dirichlet的样本 n [0 ... 1]数字,将它们映射到[min ... max s ]间隔,并检查是否有任何这些数字低于所需 max (原始,未重新定义)。如果是,则接受,否则拒绝,如整数情况。

So sampling procedure would be as following - sample n [0...1] numbers from Dirichlet, map them to [min...maxs] interval, and check if any of those numbers are below desired max (original, not redefined). If it is, you accept, otherwise reject, like in integer case.

以下代码

import numpy as np

def my_dirichlet(n: int):
    """
    This is equivalent to numpy.random.dirichlet when all alphas are equal to 1
    """
    q = np.random.exponential(scale = np.float64(1.0), size=n)
    norm = 1.0/np.sum(q)
    return norm * q

def sample_sum_interval(n: int, summa: np.float64, minv: np.float64, maxv: np.float64):
    maxs  = summa - np.float64(n-1)*minv # redefine maximum value of the interval is sum is fixed
    alpha = np.full(n, np.float64(1.0), dtype=np.float64)

    while True:
        q = my_dirichlet(n) # q = np.random.dirichlet(alpha)
        q = minv + q*(maxs - minv) # we map it to [minv...maxs]
        v = np.where(q > maxv)     # but we need it in the [minv...maxv], so accept or reject test
        if len(v[0]) == 0: # if len(v) > 0, some values are outside the range, reject, next sample
            return q
    return None

n = 5
min = np.float64(-2.0)
max = np.float64(3.0)
sum = np.float64(1.0)

q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))

q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))

q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))

我已经进行了标准的NumPy Dirichlet采样以及自定义Dirichlet采样。显然, libRmath.js 具有指数分布采样,但没有Dirichlet,但可以用用户定义的代码和指数代替。请记住,NumPy对带有单个运算符的向量进行操作,循环是隐式的。

I've put standard NumPy Dirichlet sampling as well as custom Dirichlet sampling. Apparently, libRmath.js has exponential distribution sampling, but no Dirichlet, but it could be replaced with user-define code and exponentials. Remember, NumPy operates on vectors with single operator, loops are implicit.

输出:

[-0.57390094 -1.80924001  0.47630282  0.80008638  2.10675174]
1.0000000000000013
[-1.12192892  1.18503129  0.97525135  0.69175429 -0.73010801]
0.9999999999999987
[-0.34803521  0.36499743 -1.165332    0.9433809   1.20498888]
0.9999999999999991

这篇关于如何获得随机数列表,具有固定的总和和大小的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆