python 中的随机数
代码参考 /python/cpython/Lib/random.py
/moudles/_randommodule.c
python version: 3.7
代码参考 /python/cpython/Lib/random.py
/moudles/_randommodule.c
python version: 3.7
random.randint 与 random.randrange
python 中 randint 与 randrange 这两个函数是很常用的函数
所以来看一下,python 中是如何实现的
randint 本质上是 randrange(a, a+1),所以只要来看 randrange 就可以了
所以来看一下,python 中是如何实现的
randint 本质上是 randrange(a, a+1),所以只要来看 randrange 就可以了
randrange(self, start, stop=None, step=1, _int=int)
_int 用来尝试 将 start stop step 强制转换成 int
randrange 中用来处理数据格式与进行异常的抛出
然后返回
randrange 中用来处理数据格式与进行异常的抛出
然后返回
istart + istep*self._randbelow(n)
_randbelow
_randbelow(self, n, int=int, maxsize=1<<BPF, type=type,Method=_MethodType, BuiltinMethod=_BuiltinMethodType)
当 radom 函数没有被重写的时候,或者实现了新的的 getrandbits 函数时候,会使用新的函数来生成随机数
k = n.bit_length()
r = getrandbits(k)
while r >= n:
r = getrandbits(k)
return r
BPF=53; python 中 float 包含的字节数为 53 所以 maxsize 是将 1 向左移动 53 位即 2 的 53 次幂
9007199254740992
random 函数被重写,但是没有新的 getrandbits 函数的时候,只能使用 random 来实现
rem = maxsize % n
limit = (maxsize - rem) / maxsize
r = random()
while r >= limit:
r = random()
return int(r*maxsize) % n
getrandbits
当 k <= 32 的时候
return PyLong_FromUnsignedLong(genrand_int32(self) >> (32 - k));
当 k > 32 的时候
words = (k - 1) / 32 + 1;
wordarray = (uint32_t *)PyMem_Malloc(words * 4);
if (wordarray == NULL) {
PyErr_NoMemory();
return NULL;
}
#if PY_LITTLE_ENDIAN
for (i = 0; i < words; i++, k -= 32)
#else
for (i = words - 1; i >= 0; i--, k -= 32)
#endif
{
r = genrand_int32(self);
if (k < 32)
r >>= (32 - k); /* Drop least significant bits */
wordarray[i] = r;
}
result = _PyLong_FromByteArray((unsigned char *)wordarray, words * 4,
PY_LITTLE_ENDIAN, 0 /* unsigned */);
PyMem_Free(wordarray);
return result;
genrand_int32
genrand_int32(RandomObject *self)
{
uint32_t y;
static const uint32_t mag01[2] = {0x0U, MATRIX_A};
/* mag01[x] = x * MATRIX_A for x=0,1 */
uint32_t *mt;
mt = self->state;
if (self->index >= N) { /* generate N words at one time */
int kk;
for (kk=0;kk<N-M;kk++) {
y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1U];
}
for (;kk<N-1;kk++) {
y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1U];
}
y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1U];
self->index = 0;
}
y = mt[self->index++];
y ^= (y >> 11);
y ^= (y << 7) & 0x9d2c5680U;
y ^= (y << 15) & 0xefc60000U;
y ^= (y >> 18);
return y;
}
`
上面的代码就是梅森旋转算法的具体实现,有时间单独写一篇关于梅森旋转算法的 blog 好了
random.choice
def choice(self, seq):
"""Choose a random element from a non-empty sequence."""
try:
i = self._randbelow(len(seq))
except ValueError:
raise IndexError('Cannot choose from an empty sequence') from None
return seq[i]
这样看来 choice 也是使用 _randbelow 来生成一个数字,再返回
random.choices
choices 是 python3.6 中新加入的函数
def choices(self, population, weights=None, *, cum_weights=None, k=1):
"""Return a k sized list of population elements chosen with replacement.
If the relative weights or cumulative weights are not specified,
the selections are made with equal probability.
"""
random = self.random
if cum_weights is None:
if weights is None:
_int = int
total = len(population)
return [population[_int(random() * total)] for i in range(k)]
cum_weights = list(_itertools.accumulate(weights))
elif weights is not None:
raise TypeError('Cannot specify both weights and cumulative weights')
if len(cum_weights) != len(population):
raise ValueError('The number of weights does not match the population')
bisect = _bisect.bisect
total = cum_weights[-1]
return [population[bisect(cum_weights, random() * total)] for i in range(k)]
random
static PyObject *
random_random(RandomObject *self)
{
uint32_t a=genrand_int32(self)>>5, b=genrand_int32(self)>>6;
return PyFloat_FromDouble((a*67108864.0+b)*(1.0/9007199254740992.0));
}
bisect
bisect 模块是一个专门使用二分查找进行列表查找的算法,前提是列表有序;
这里用来使累积权重转换成一个随机的 index
所以,choices 在使用的时候可能会在返回的列表中有同一个元素
如果不想在返回的列表中有相同的元素可以使用:sample
这里用来使累积权重转换成一个随机的 index
所以,choices 在使用的时候可能会在返回的列表中有同一个元素
如果不想在返回的列表中有相同的元素可以使用:sample
sample
def sample(self, population, k):
if isinstance(population, _Set):
population = tuple(population)
if not isinstance(population, _Sequence):
raise TypeError("Population must be a sequence or set. For dicts, use list(d).")
randbelow = self._randbelow
n = len(population)
if not 0 <= k <= n:
raise ValueError("Sample larger than population or is negative")
result = [None] * k
setsize = 21 # size of a small set minus size of an empty list
if k > 5:
setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
if n <= setsize:
# An n-length list is smaller than a k-length set
pool = list(population)
for i in range(k): # invariant: non-selected at [0,n-i)
j = randbelow(n-i)
result[i] = pool[j]
pool[j] = pool[n-i-1] # move non-selected item into vacancy
else:
selected = set()
selected_add = selected.add
for i in range(k):
j = randbelow(n)
while j in selected:
j = randbelow(n)
selected_add(j)
result[i] = population[j]
return result
代码高亮会不会更好一些 ゚∀゚)
回复删除