rand_chacha/
guts.rs

1// Copyright 2019 The CryptoCorrosion Contributors
2// Copyright 2020 Developers of the Rand project.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! The ChaCha random number generator.
11
12use ppv_lite86::{dispatch, dispatch_light128};
13
14pub use ppv_lite86::Machine;
15use ppv_lite86::{
16    vec128_storage, ArithOps, BitOps32, LaneWords4, MultiLane, StoreBytes, Vec4, Vec4Ext, Vector,
17};
18
19pub(crate) const BLOCK: usize = 16;
20pub(crate) const BLOCK64: u64 = BLOCK as u64;
21const LOG2_BUFBLOCKS: u64 = 2;
22const BUFBLOCKS: u64 = 1 << LOG2_BUFBLOCKS;
23pub(crate) const BUFSZ64: u64 = BLOCK64 * BUFBLOCKS;
24pub(crate) const BUFSZ: usize = BUFSZ64 as usize;
25
26const STREAM_PARAM_NONCE: u32 = 1;
27const STREAM_PARAM_BLOCK: u32 = 0;
28
29#[derive(Clone, PartialEq, Eq)]
30pub struct ChaCha {
31    pub(crate) b: vec128_storage,
32    pub(crate) c: vec128_storage,
33    pub(crate) d: vec128_storage,
34}
35
36#[derive(Clone)]
37pub struct State<V> {
38    pub(crate) a: V,
39    pub(crate) b: V,
40    pub(crate) c: V,
41    pub(crate) d: V,
42}
43
44#[inline(always)]
45pub(crate) fn round<V: ArithOps + BitOps32>(mut x: State<V>) -> State<V> {
46    x.a += x.b;
47    x.d = (x.d ^ x.a).rotate_each_word_right16();
48    x.c += x.d;
49    x.b = (x.b ^ x.c).rotate_each_word_right20();
50    x.a += x.b;
51    x.d = (x.d ^ x.a).rotate_each_word_right24();
52    x.c += x.d;
53    x.b = (x.b ^ x.c).rotate_each_word_right25();
54    x
55}
56
57#[inline(always)]
58pub(crate) fn diagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> {
59    x.b = x.b.shuffle_lane_words3012();
60    x.c = x.c.shuffle_lane_words2301();
61    x.d = x.d.shuffle_lane_words1230();
62    x
63}
64#[inline(always)]
65pub(crate) fn undiagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> {
66    x.b = x.b.shuffle_lane_words1230();
67    x.c = x.c.shuffle_lane_words2301();
68    x.d = x.d.shuffle_lane_words3012();
69    x
70}
71
72impl ChaCha {
73    #[inline(always)]
74    pub fn new(key: &[u8; 32], nonce: &[u8]) -> Self {
75        init_chacha(key, nonce)
76    }
77
78    /// Produce 4 blocks of output, advancing the state
79    #[inline(always)]
80    pub fn refill4(&mut self, drounds: u32, out: &mut [u32; BUFSZ]) {
81        refill_wide(self, drounds, out)
82    }
83
84    #[inline(always)]
85    pub fn set_block_pos(&mut self, value: u64) {
86        set_stream_param(self, STREAM_PARAM_BLOCK, value)
87    }
88
89    #[inline(always)]
90    pub fn get_block_pos(&self) -> u64 {
91        get_stream_param(self, STREAM_PARAM_BLOCK)
92    }
93
94    #[inline(always)]
95    pub fn set_nonce(&mut self, value: u64) {
96        set_stream_param(self, STREAM_PARAM_NONCE, value)
97    }
98
99    #[inline(always)]
100    pub fn get_nonce(&self) -> u64 {
101        get_stream_param(self, STREAM_PARAM_NONCE)
102    }
103
104    #[inline(always)]
105    pub fn get_seed(&self) -> [u8; 32] {
106        get_seed(self)
107    }
108}
109
110// This implementation is platform-independent.
111#[inline(always)]
112#[cfg(target_endian = "big")]
113fn add_pos<Mach: Machine>(_m: Mach, d0: Mach::u32x4, i: u64) -> Mach::u32x4 {
114    let pos0 = ((d0.extract(1) as u64) << 32) | d0.extract(0) as u64;
115    let pos = pos0.wrapping_add(i);
116    d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0)
117}
118#[inline(always)]
119#[cfg(target_endian = "big")]
120fn d0123<Mach: Machine>(m: Mach, d: vec128_storage) -> Mach::u32x4x4 {
121    let d0: Mach::u32x4 = m.unpack(d);
122    let mut pos = ((d0.extract(1) as u64) << 32) | d0.extract(0) as u64;
123    pos = pos.wrapping_add(1);
124    let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
125    pos = pos.wrapping_add(1);
126    let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
127    pos = pos.wrapping_add(1);
128    let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
129    Mach::u32x4x4::from_lanes([d0, d1, d2, d3])
130}
131
132// Pos is packed into the state vectors as a little-endian u64,
133// so on LE platforms we can use native vector ops to increment it.
134#[inline(always)]
135#[cfg(target_endian = "little")]
136fn add_pos<Mach: Machine>(m: Mach, d: Mach::u32x4, i: u64) -> Mach::u32x4 {
137    let d0: Mach::u64x2 = m.unpack(d.into());
138    let incr = m.vec([i, 0]);
139    m.unpack((d0 + incr).into())
140}
141#[inline(always)]
142#[cfg(target_endian = "little")]
143fn d0123<Mach: Machine>(m: Mach, d: vec128_storage) -> Mach::u32x4x4 {
144    let d0: Mach::u64x2 = m.unpack(d);
145    let incr =
146        Mach::u64x2x4::from_lanes([m.vec([0, 0]), m.vec([1, 0]), m.vec([2, 0]), m.vec([3, 0])]);
147    m.unpack((Mach::u64x2x4::from_lanes([d0, d0, d0, d0]) + incr).into())
148}
149
150#[allow(clippy::many_single_char_names)]
151#[inline(always)]
152fn refill_wide_impl<Mach: Machine>(
153    m: Mach,
154    state: &mut ChaCha,
155    drounds: u32,
156    out: &mut [u32; BUFSZ],
157) {
158    let k = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]);
159    let b = m.unpack(state.b);
160    let c = m.unpack(state.c);
161    let mut x = State {
162        a: Mach::u32x4x4::from_lanes([k, k, k, k]),
163        b: Mach::u32x4x4::from_lanes([b, b, b, b]),
164        c: Mach::u32x4x4::from_lanes([c, c, c, c]),
165        d: d0123(m, state.d),
166    };
167    for _ in 0..drounds {
168        x = round(x);
169        x = undiagonalize(round(diagonalize(x)));
170    }
171    let kk = Mach::u32x4x4::from_lanes([k, k, k, k]);
172    let sb = m.unpack(state.b);
173    let sb = Mach::u32x4x4::from_lanes([sb, sb, sb, sb]);
174    let sc = m.unpack(state.c);
175    let sc = Mach::u32x4x4::from_lanes([sc, sc, sc, sc]);
176    let sd = d0123(m, state.d);
177    let results = Mach::u32x4x4::transpose4(x.a + kk, x.b + sb, x.c + sc, x.d + sd);
178    out[0..16].copy_from_slice(&results.0.to_scalars());
179    out[16..32].copy_from_slice(&results.1.to_scalars());
180    out[32..48].copy_from_slice(&results.2.to_scalars());
181    out[48..64].copy_from_slice(&results.3.to_scalars());
182    state.d = add_pos(m, sd.to_lanes()[0], 4).into();
183}
184
185dispatch!(m, Mach, {
186    fn refill_wide(state: &mut ChaCha, drounds: u32, out: &mut [u32; BUFSZ]) {
187        refill_wide_impl(m, state, drounds, out);
188    }
189});
190
191// Single-block, rounds-only; shared by try_apply_keystream for tails shorter than BUFSZ
192// and XChaCha's setup step.
193dispatch!(m, Mach, {
194    fn refill_narrow_rounds(state: &mut ChaCha, drounds: u32) -> State<vec128_storage> {
195        let k: Mach::u32x4 = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]);
196        let mut x = State {
197            a: k,
198            b: m.unpack(state.b),
199            c: m.unpack(state.c),
200            d: m.unpack(state.d),
201        };
202        for _ in 0..drounds {
203            x = round(x);
204            x = undiagonalize(round(diagonalize(x)));
205        }
206        State {
207            a: x.a.into(),
208            b: x.b.into(),
209            c: x.c.into(),
210            d: x.d.into(),
211        }
212    }
213});
214
215dispatch_light128!(m, Mach, {
216    fn set_stream_param(state: &mut ChaCha, param: u32, value: u64) {
217        let d: Mach::u32x4 = m.unpack(state.d);
218        state.d = d
219            .insert((value >> 32) as u32, (param << 1) | 1)
220            .insert(value as u32, param << 1)
221            .into();
222    }
223});
224
225dispatch_light128!(m, Mach, {
226    fn get_stream_param(state: &ChaCha, param: u32) -> u64 {
227        let d: Mach::u32x4 = m.unpack(state.d);
228        ((d.extract((param << 1) | 1) as u64) << 32) | d.extract(param << 1) as u64
229    }
230});
231
232dispatch_light128!(m, Mach, {
233    fn get_seed(state: &ChaCha) -> [u8; 32] {
234        let b: Mach::u32x4 = m.unpack(state.b);
235        let c: Mach::u32x4 = m.unpack(state.c);
236        let mut key = [0u8; 32];
237        b.write_le(&mut key[..16]);
238        c.write_le(&mut key[16..]);
239        key
240    }
241});
242
243fn read_u32le(xs: &[u8]) -> u32 {
244    assert_eq!(xs.len(), 4);
245    u32::from(xs[0]) | (u32::from(xs[1]) << 8) | (u32::from(xs[2]) << 16) | (u32::from(xs[3]) << 24)
246}
247
248dispatch_light128!(m, Mach, {
249    fn init_chacha(key: &[u8; 32], nonce: &[u8]) -> ChaCha {
250        let ctr_nonce = [
251            0,
252            if nonce.len() == 12 {
253                read_u32le(&nonce[0..4])
254            } else {
255                0
256            },
257            read_u32le(&nonce[nonce.len() - 8..nonce.len() - 4]),
258            read_u32le(&nonce[nonce.len() - 4..]),
259        ];
260        let key0: Mach::u32x4 = m.read_le(&key[..16]);
261        let key1: Mach::u32x4 = m.read_le(&key[16..]);
262        ChaCha {
263            b: key0.into(),
264            c: key1.into(),
265            d: ctr_nonce.into(),
266        }
267    }
268});
269
270dispatch_light128!(m, Mach, {
271    fn init_chacha_x(key: &[u8; 32], nonce: &[u8; 24], rounds: u32) -> ChaCha {
272        let key0: Mach::u32x4 = m.read_le(&key[..16]);
273        let key1: Mach::u32x4 = m.read_le(&key[16..]);
274        let nonce0: Mach::u32x4 = m.read_le(&nonce[..16]);
275        let mut state = ChaCha {
276            b: key0.into(),
277            c: key1.into(),
278            d: nonce0.into(),
279        };
280        let x = refill_narrow_rounds(&mut state, rounds);
281        let ctr_nonce1 = [0, 0, read_u32le(&nonce[16..20]), read_u32le(&nonce[20..24])];
282        state.b = x.a;
283        state.c = x.d;
284        state.d = ctr_nonce1.into();
285        state
286    }
287});