use alloc::vec::{self, Vec};
use core::slice;
use core::{hash::Hash, ops::AddAssign};
#[cfg(feature = "std")]
use super::WeightError;
use crate::distr::uniform::SampleUniform;
use crate::distr::{Distribution, Uniform};
use crate::Rng;
#[cfg(not(feature = "std"))]
use alloc::collections::BTreeSet;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "std")]
use std::collections::HashSet;
#[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))]
compile_error!("unsupported pointer width");
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum IndexVec {
#[doc(hidden)]
U32(Vec<u32>),
#[cfg(target_pointer_width = "64")]
#[doc(hidden)]
U64(Vec<u64>),
}
impl IndexVec {
#[inline]
pub fn len(&self) -> usize {
match self {
IndexVec::U32(v) => v.len(),
#[cfg(target_pointer_width = "64")]
IndexVec::U64(v) => v.len(),
}
}
#[inline]
pub fn is_empty(&self) -> bool {
match self {
IndexVec::U32(v) => v.is_empty(),
#[cfg(target_pointer_width = "64")]
IndexVec::U64(v) => v.is_empty(),
}
}
#[inline]
pub fn index(&self, index: usize) -> usize {
match self {
IndexVec::U32(v) => v[index] as usize,
#[cfg(target_pointer_width = "64")]
IndexVec::U64(v) => v[index] as usize,
}
}
#[inline]
pub fn into_vec(self) -> Vec<usize> {
match self {
IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(),
#[cfg(target_pointer_width = "64")]
IndexVec::U64(v) => v.into_iter().map(|i| i as usize).collect(),
}
}
#[inline]
pub fn iter(&self) -> IndexVecIter<'_> {
match self {
IndexVec::U32(v) => IndexVecIter::U32(v.iter()),
#[cfg(target_pointer_width = "64")]
IndexVec::U64(v) => IndexVecIter::U64(v.iter()),
}
}
}
impl IntoIterator for IndexVec {
type IntoIter = IndexVecIntoIter;
type Item = usize;
#[inline]
fn into_iter(self) -> IndexVecIntoIter {
match self {
IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()),
#[cfg(target_pointer_width = "64")]
IndexVec::U64(v) => IndexVecIntoIter::U64(v.into_iter()),
}
}
}
impl PartialEq for IndexVec {
fn eq(&self, other: &IndexVec) -> bool {
use self::IndexVec::*;
match (self, other) {
(U32(v1), U32(v2)) => v1 == v2,
#[cfg(target_pointer_width = "64")]
(U64(v1), U64(v2)) => v1 == v2,
#[cfg(target_pointer_width = "64")]
(U32(v1), U64(v2)) => {
(v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as u64 == *y))
}
#[cfg(target_pointer_width = "64")]
(U64(v1), U32(v2)) => {
(v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as u64))
}
}
}
}
impl From<Vec<u32>> for IndexVec {
#[inline]
fn from(v: Vec<u32>) -> Self {
IndexVec::U32(v)
}
}
#[cfg(target_pointer_width = "64")]
impl From<Vec<u64>> for IndexVec {
#[inline]
fn from(v: Vec<u64>) -> Self {
IndexVec::U64(v)
}
}
#[derive(Debug)]
pub enum IndexVecIter<'a> {
#[doc(hidden)]
U32(slice::Iter<'a, u32>),
#[cfg(target_pointer_width = "64")]
#[doc(hidden)]
U64(slice::Iter<'a, u64>),
}
impl<'a> Iterator for IndexVecIter<'a> {
type Item = usize;
#[inline]
fn next(&mut self) -> Option<usize> {
use self::IndexVecIter::*;
match self {
U32(iter) => iter.next().map(|i| *i as usize),
#[cfg(target_pointer_width = "64")]
U64(iter) => iter.next().map(|i| *i as usize),
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
IndexVecIter::U32(v) => v.size_hint(),
#[cfg(target_pointer_width = "64")]
IndexVecIter::U64(v) => v.size_hint(),
}
}
}
impl<'a> ExactSizeIterator for IndexVecIter<'a> {}
#[derive(Clone, Debug)]
pub enum IndexVecIntoIter {
#[doc(hidden)]
U32(vec::IntoIter<u32>),
#[cfg(target_pointer_width = "64")]
#[doc(hidden)]
U64(vec::IntoIter<u64>),
}
impl Iterator for IndexVecIntoIter {
type Item = usize;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
use self::IndexVecIntoIter::*;
match self {
U32(v) => v.next().map(|i| i as usize),
#[cfg(target_pointer_width = "64")]
U64(v) => v.next().map(|i| i as usize),
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
use self::IndexVecIntoIter::*;
match self {
U32(v) => v.size_hint(),
#[cfg(target_pointer_width = "64")]
U64(v) => v.size_hint(),
}
}
}
impl ExactSizeIterator for IndexVecIntoIter {}
#[track_caller]
pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
where
R: Rng + ?Sized,
{
if amount > length {
panic!("`amount` of samples must be less than or equal to `length`");
}
if length > (u32::MAX as usize) {
#[cfg(target_pointer_width = "32")]
unreachable!();
#[cfg(target_pointer_width = "64")]
return sample_rejection(rng, length as u64, amount as u64);
}
let amount = amount as u32;
let length = length as u32;
if amount < 163 {
const C: [[f32; 2]; 2] = [[1.6, 8.0 / 45.0], [10.0, 70.0 / 9.0]];
let j = usize::from(length >= 500_000);
let amount_fp = amount as f32;
let m4 = C[0][j] * amount_fp;
if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp {
sample_inplace(rng, length, amount)
} else {
sample_floyd(rng, length, amount)
}
} else {
const C: [f32; 2] = [270.0, 330.0 / 9.0];
let j = usize::from(length >= 500_000);
if (length as f32) < C[j] * (amount as f32) {
sample_inplace(rng, length, amount)
} else {
sample_rejection(rng, length, amount)
}
}
}
#[cfg(feature = "std")]
pub fn sample_weighted<R, F, X>(
rng: &mut R,
length: usize,
weight: F,
amount: usize,
) -> Result<IndexVec, WeightError>
where
R: Rng + ?Sized,
F: Fn(usize) -> X,
X: Into<f64>,
{
if length > (u32::MAX as usize) {
#[cfg(target_pointer_width = "32")]
unreachable!();
#[cfg(target_pointer_width = "64")]
{
let amount = amount as u64;
let length = length as u64;
sample_efraimidis_spirakis(rng, length, weight, amount)
}
} else {
assert!(amount <= u32::MAX as usize);
let amount = amount as u32;
let length = length as u32;
sample_efraimidis_spirakis(rng, length, weight, amount)
}
}
#[cfg(feature = "std")]
fn sample_efraimidis_spirakis<R, F, X, N>(
rng: &mut R,
length: N,
weight: F,
amount: N,
) -> Result<IndexVec, WeightError>
where
R: Rng + ?Sized,
F: Fn(usize) -> X,
X: Into<f64>,
N: UInt,
IndexVec: From<Vec<N>>,
{
use std::cmp::Ordering;
if amount == N::zero() {
return Ok(IndexVec::U32(Vec::new()));
}
struct Element<N> {
index: N,
key: f64,
}
impl<N> PartialOrd for Element<N> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<N> Ord for Element<N> {
fn cmp(&self, other: &Self) -> Ordering {
self.key.partial_cmp(&other.key).unwrap()
}
}
impl<N> PartialEq for Element<N> {
fn eq(&self, other: &Self) -> bool {
self.key == other.key
}
}
impl<N> Eq for Element<N> {}
let mut candidates = Vec::with_capacity(length.as_usize());
let mut index = N::zero();
while index < length {
let weight = weight(index.as_usize()).into();
if weight > 0.0 {
let key = rng.random::<f64>().powf(1.0 / weight);
candidates.push(Element { index, key });
} else if !(weight >= 0.0) {
return Err(WeightError::InvalidWeight);
}
index += N::one();
}
let avail = candidates.len();
if avail < amount.as_usize() {
return Err(WeightError::InsufficientNonZero);
}
let (_, mid, greater) = candidates.select_nth_unstable(avail - amount.as_usize());
let mut result: Vec<N> = Vec::with_capacity(amount.as_usize());
result.push(mid.index);
for element in greater {
result.push(element.index);
}
Ok(IndexVec::from(result))
}
fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where
R: Rng + ?Sized,
{
debug_assert!(amount <= length);
let mut indices = Vec::with_capacity(amount as usize);
for j in length - amount..length {
let t = rng.gen_range(..=j);
if let Some(pos) = indices.iter().position(|&x| x == t) {
indices[pos] = j;
}
indices.push(t);
}
IndexVec::from(indices)
}
fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where
R: Rng + ?Sized,
{
debug_assert!(amount <= length);
let mut indices: Vec<u32> = Vec::with_capacity(length as usize);
indices.extend(0..length);
for i in 0..amount {
let j: u32 = rng.gen_range(i..length);
indices.swap(i as usize, j as usize);
}
indices.truncate(amount as usize);
debug_assert_eq!(indices.len(), amount as usize);
IndexVec::from(indices)
}
trait UInt: Copy + PartialOrd + Ord + PartialEq + Eq + SampleUniform + Hash + AddAssign {
fn zero() -> Self;
#[cfg_attr(feature = "alloc", allow(dead_code))]
fn one() -> Self;
fn as_usize(self) -> usize;
}
impl UInt for u32 {
#[inline]
fn zero() -> Self {
0
}
#[inline]
fn one() -> Self {
1
}
#[inline]
fn as_usize(self) -> usize {
self as usize
}
}
#[cfg(target_pointer_width = "64")]
impl UInt for u64 {
#[inline]
fn zero() -> Self {
0
}
#[inline]
fn one() -> Self {
1
}
#[inline]
fn as_usize(self) -> usize {
self as usize
}
}
fn sample_rejection<X: UInt, R>(rng: &mut R, length: X, amount: X) -> IndexVec
where
R: Rng + ?Sized,
IndexVec: From<Vec<X>>,
{
debug_assert!(amount < length);
#[cfg(feature = "std")]
let mut cache = HashSet::with_capacity(amount.as_usize());
#[cfg(not(feature = "std"))]
let mut cache = BTreeSet::new();
let distr = Uniform::new(X::zero(), length).unwrap();
let mut indices = Vec::with_capacity(amount.as_usize());
for _ in 0..amount.as_usize() {
let mut pos = distr.sample(rng);
while !cache.insert(pos) {
pos = distr.sample(rng);
}
indices.push(pos);
}
debug_assert_eq!(indices.len(), amount.as_usize());
IndexVec::from(indices)
}
#[cfg(test)]
mod test {
use super::*;
use alloc::vec;
#[test]
#[cfg(feature = "serde")]
fn test_serialization_index_vec() {
let some_index_vec = IndexVec::from(vec![254_u32, 234, 2, 1]);
let de_some_index_vec: IndexVec =
bincode::deserialize(&bincode::serialize(&some_index_vec).unwrap()).unwrap();
assert_eq!(some_index_vec, de_some_index_vec);
}
#[test]
fn test_sample_boundaries() {
let mut r = crate::test::rng(404);
assert_eq!(sample_inplace(&mut r, 0, 0).len(), 0);
assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0);
assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]);
assert_eq!(sample_rejection(&mut r, 1u32, 0).len(), 0);
assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0);
assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0);
assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]);
let sum: usize = sample_rejection(&mut r, 1 << 25, 10u32).into_iter().sum();
assert!(1 << 25 < sum && sum < (1 << 25) * 25);
let sum: usize = sample_floyd(&mut r, 1 << 25, 10).into_iter().sum();
assert!(1 << 25 < sum && sum < (1 << 25) * 25);
}
#[test]
#[cfg_attr(miri, ignore)] fn test_sample_alg() {
let seed_rng = crate::test::rng;
let (length, amount): (usize, usize) = (100, 50);
let v1 = sample(&mut seed_rng(420), length, amount);
let v2 = sample_inplace(&mut seed_rng(420), length as u32, amount as u32);
assert!(v1.iter().all(|e| e < length));
assert_eq!(v1, v2);
let v3 = sample_floyd(&mut seed_rng(420), length as u32, amount as u32);
assert!(v1 != v3);
let (length, amount): (usize, usize) = (1 << 20, 50);
let v1 = sample(&mut seed_rng(421), length, amount);
let v2 = sample_floyd(&mut seed_rng(421), length as u32, amount as u32);
assert!(v1.iter().all(|e| e < length));
assert_eq!(v1, v2);
let (length, amount): (usize, usize) = (1 << 20, 600);
let v1 = sample(&mut seed_rng(422), length, amount);
let v2 = sample_rejection(&mut seed_rng(422), length as u32, amount as u32);
assert!(v1.iter().all(|e| e < length));
assert_eq!(v1, v2);
}
#[cfg(feature = "std")]
#[test]
fn test_sample_weighted() {
let seed_rng = crate::test::rng;
for &(amount, len) in &[(0, 10), (5, 10), (9, 10)] {
let v = sample_weighted(&mut seed_rng(423), len, |i| i as f64, amount).unwrap();
match v {
IndexVec::U32(mut indices) => {
assert_eq!(indices.len(), amount);
indices.sort_unstable();
indices.dedup();
assert_eq!(indices.len(), amount);
for &i in &indices {
assert!((i as usize) < len);
}
}
#[cfg(target_pointer_width = "64")]
_ => panic!("expected `IndexVec::U32`"),
}
}
let r = sample_weighted(&mut seed_rng(423), 10, |i| i as f64, 10);
assert_eq!(r.unwrap_err(), WeightError::InsufficientNonZero);
}
#[test]
fn value_stability_sample() {
let do_test = |length, amount, values: &[u32]| {
let mut buf = [0u32; 8];
let mut rng = crate::test::rng(410);
let res = sample(&mut rng, length, amount);
let len = res.len().min(buf.len());
for (x, y) in res.into_iter().zip(buf.iter_mut()) {
*y = x as u32;
}
assert_eq!(
&buf[0..len],
values,
"failed sampling {}, {}",
length,
amount
);
};
do_test(10, 6, &[0, 9, 5, 4, 6, 8]); do_test(25, 10, &[24, 20, 19, 9, 22, 16, 0, 14]); do_test(300, 8, &[30, 283, 243, 150, 218, 240, 1, 189]); do_test(300, 80, &[31, 289, 248, 154, 221, 243, 7, 192]); do_test(300, 180, &[31, 289, 248, 154, 221, 243, 7, 192]); do_test(
1_000_000,
8,
&[103717, 963485, 826422, 509101, 736394, 807035, 5327, 632573],
); do_test(
1_000_000,
180,
&[103718, 963490, 826426, 509103, 736396, 807036, 5327, 632573],
); }
}