ruint/algorithms/div/
mod.rs

1//! ⚠️ Collection of division algorithms.
2//!
3//! **Warning.** Most functions in this module are currently not considered part
4//! of the stable API and may be changed or removed in future minor releases.
5//!
6//! All division algorithms also compute the remainder. There is no benefit
7//! to splitting the division and remainder into separate functions, since
8//! the remainder is always computed as part of the division algorithm.
9//!
10//! These functions are adapted from algorithms in [MG10] and [K97].
11//!
12//! [K97]: https://cs.stanford.edu/~knuth/taocp.html
13//! [MG10]: https://gmplib.org/~tege/division-paper.pdf
14
15#![allow(clippy::similar_names)] // TODO
16
17mod knuth;
18mod reciprocal;
19mod small;
20
21pub use self::{
22    knuth::{div_nxm, div_nxm_normalized},
23    reciprocal::{reciprocal, reciprocal_2, reciprocal_2_mg10, reciprocal_mg10, reciprocal_ref},
24    small::{
25        div_1x1, div_2x1, div_2x1_mg10, div_2x1_ref, div_3x2, div_3x2_mg10, div_3x2_ref, div_nx1,
26        div_nx1_normalized, div_nx2, div_nx2_normalized,
27    },
28};
29use crate::{algorithms::DoubleWord, utils::cold_path};
30
31/// ⚠️ Division with remainder.
32#[doc = crate::algorithms::unstable_warning!()]
33/// The quotient is stored in the `numerator` and the remainder is stored
34/// in the `divisor`.
35///
36/// # Algorithm
37///
38/// It trims zeros from the numerator and divisor then solves the trivial cases
39/// directly, or dispatches to the [`div_nx1`], [`div_nx2`] or [`div_nxm`]
40/// functions.
41///
42/// # Panics
43///
44/// Panics if `divisor` is zero.
45#[inline]
46#[cfg_attr(debug_assertions, track_caller)]
47pub fn div(numerator: &mut [u64], divisor: &mut [u64]) {
48    div_inlined(numerator, divisor);
49}
50
51/// Separate definition of [`div`] to force inlining.
52///
53/// We want to inline this function where statically we know the size of the
54/// parameters to allow for more optimizations.
55#[inline(always)]
56#[cfg_attr(debug_assertions, track_caller)]
57pub(crate) fn div_inlined(numerator: &mut [u64], divisor: &mut [u64]) {
58    // Trim most significant zeros from divisor.
59    let divisor = super::trim_end_zeros_mut(divisor);
60    if divisor.is_empty() {
61        // Force a division by 0 panic, which is smaller in code size than an `assert!`.
62        #[allow(unconditional_panic, clippy::all)]
63        let _ = 0 / 0;
64    }
65    debug_assert_ne!(*divisor.last().unwrap(), 0);
66
67    // Trim most significant zeros from numerator.
68    let numerator = super::trim_end_zeros_mut(numerator);
69    if numerator.is_empty() {
70        // Empty numerator: (q, r) = (0, 0)
71        cold_path();
72        divisor.fill(0);
73        return;
74    }
75    debug_assert_ne!(*numerator.last().unwrap(), 0);
76
77    if super::cmp(numerator, divisor).is_lt() {
78        // Numerator is smaller than the divisor: (q, r) = (0, numerator)
79        cold_path();
80        // `a < b` implies `a.len() <= b.len()`, after trimming most significant zeros.
81        assume!(numerator.len() <= divisor.len());
82        let (remainder, padding) = divisor.split_at_mut(numerator.len());
83        remainder.copy_from_slice(numerator);
84        padding.fill(0);
85        numerator.fill(0);
86        return;
87    }
88    debug_assert!(numerator.len() >= divisor.len());
89
90    // Compute quotient and remainder, branching out to different algorithms.
91    match divisor {
92        [divisor] => {
93            let d = *divisor;
94            if let [numerator] = numerator {
95                assume!(d != 0); // Elides division by 0 check.
96                (*numerator, *divisor) = div_1x1(*numerator, d);
97            } else {
98                *divisor = div_nx1(numerator, d);
99            }
100        }
101        [d0, d1] => {
102            let d = u128::join(*d1, *d0);
103            (*d0, *d1) = div_nx2(numerator, d).split();
104        }
105        _ => div_nxm(numerator, divisor),
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112    use crate::aliases::U512;
113
114    // Test vectors from <https://github.com/chfast/intx/blob/8b5f4748a7386a9530769893dae26b3273e0ffe2/test/unittests/test_div.cpp#L58>
115    // [[numerator, divisor, quotient, remainder]; _]
116    const INTX_TESTS: [[U512; 4]; 45] = uint!([
117        [2_U512, 1_U512, 2_U512, 0_U512],
118        [
119            0x10000000000000000_U512,
120            2_U512,
121            0x8000000000000000_U512,
122            0_U512,
123        ],
124        [
125            0x7000000000000000_U512,
126            0x8000000000000000_U512,
127            0_U512,
128            0x7000000000000000_U512,
129        ],
130        [
131            0x8000000000000000_U512,
132            0x8000000000000000_U512,
133            1_U512,
134            0_U512,
135        ],
136        [
137            0x8000000000000001_U512,
138            0x8000000000000000_U512,
139            1_U512,
140            1_U512,
141        ],
142        [
143            0x80000000000000010000000000000000_U512,
144            0x80000000000000000000000000000000_U512,
145            1_U512,
146            0x10000000000000000_U512,
147        ],
148        [
149            0x80000000000000000000000000000000_U512,
150            0x80000000000000000000000000000001_U512,
151            0_U512,
152            0x80000000000000000000000000000000_U512,
153        ],
154        [
155            0x478392145435897052_U512,
156            0x111_U512,
157            0x430f89ebadad0baa_U512,
158            8_U512,
159        ],
160        [
161            0x400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000_U512,
162            0x800000000000000000000000000000000000000000000000_U512,
163            0x800000000000000000000000000000000000000000000000_U512,
164            0_U512,
165        ],
166        [
167            0x80000000000000000000000000000000000000000000000000000000000000000000000000000000_U512,
168            0x800000000000000000000000000000000000000000000000_U512,
169            0x100000000000000000000000000000000_U512,
170            0_U512,
171        ],
172        [
173            0x1e00000000000000000000090000000000000000000000000000000000000000000000000000000000000000000000000000000009000000000000000000_U512,
174            0xa_U512,
175            0x30000000000000000000000e6666666666666666666666666666666666666666666666666666666666666666666666666666666674ccccccccccccccccc_U512,
176            8_U512,
177        ],
178        [
179            0x767676767676767676000000767676767676_U512,
180            0x2900760076761e00020076760000000076767676000000_U512,
181            0_U512,
182            0x767676767676767676000000767676767676_U512,
183        ],
184        [
185            0x12121212121212121212121212121212_U512,
186            0x232323232323232323_U512,
187            0x83a83a83a83a83_U512,
188            0x171729292929292929_U512,
189        ],
190        [
191            0xabc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0abc0_U512,
192            0x1c01c01c01c01c01c01c01c01c_U512,
193            0x621ed21ed21ed21ed21ed21ed224f40bf40bf40bf40bf40bf40bf46e12de12de12de12de12de12de1900000000000000000_U512,
194            0xabc0abc0abc0abc0_U512,
195        ],
196        [
197            0xfffff716b61616160b0b0b2b0b0b0becf4bef50a0df4f48b090b2b0bc60a0a00_U512,
198            0xfffff716b61616160b0b0b2b0b230b000008010d0a2b00_U512,
199            0xffffffffffffffffff_U512,
200            0xfffff7169e17030ac1ff082ed51796090b330cd3143500_U512,
201        ],
202        [
203            0x50beb1c60141a0000dc2b0b0b0b0b0b410a0a0df4f40b090b2b0bc60a0a00_U512,
204            0x2000110000000d0a300e750a000000090a0a_U512,
205            0x285f437064cd09ff8bc5b7857d_U512,
206            0x1fda1c384d86199e14bb4edfc6693042f11e_U512,
207        ],
208        [
209            0x4b00000b41000b0b0b2b0b0b0b0b0b410a0aeff4f40b090b2b0bc60a0a1000_U512,
210            0x4b00000b41000b0b0b2b0b0b0b0b0b410a0aeff4f40b0a0a_U512,
211            0xffffffffffffff_U512,
212            0x4b00000b41000b0b0b2b0b0b0b0b0b400b35fbbafe151a0a_U512,
213        ],
214        [
215            0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee_U512,
216            7_U512,
217            0x22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222_U512,
218            0_U512,
219        ],
220        [
221            0xf6376770abd3a36b20394c5664afef1194c801c3f05e42566f085ed24d002bb0_U512,
222            0xb368d219438b7f3f_U512,
223            0x15f53bce87e9fb63c7c3ab03f6c0ba30d3ecf982fa97cdf0a_U512,
224            0x4bfd94dbec31523a_U512,
225        ],
226        [
227            0x0_U512,
228            0x10900000000000000000000000000000000000000000000000000_U512,
229            0x0_U512,
230            0x0_U512,
231        ],
232        [
233            0x77676767676760000000000000001002e000000000000040000000e000000000000007f0000000000000000000000000000000000000000f7000000000000_U512,
234            0xfffc000000000000767676240000000000002b05760476000000000000000000767676767600000000000000000000000000000000_U512,
235            0x7769450c7b994e65025_U512,
236            0x241cb1aa4f67c22ae65c9920bf3bb7ad8280311a887aee8be4054a3e242a5ea9ab35d800f2000000000000000000f7000000000000_U512,
237        ],
238        [
239            0xdffffffffffffffffffffffffffffffffff00000000000000000000000000000000000000000001000000000000000000000000008100000000001001_U512,
240            0xdffffffffffffffffffffffffffffffffffffffffffff3fffffffffffffffffffffffffffff_U512,
241            0xffffffffffffffffffffffffffffffffffedb6db6db6e9_U512,
242            0x200000000000000000000000000010000f2492492492ec000000000000080ffedb6db6dc6ea_U512,
243        ],
244        [
245            0xff000000000000000000000000000000000000000400000092767600000000000000000000000081000000000000000000000001020000000000eeffffffffff_U512,
246            0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffff000000000000000000000005000000000000000000ffffffffff100000000000000000_U512,
247            0x0_U512,
248            0xff000000000000000000000000000000000000000400000092767600000000000000000000000081000000000000000000000001020000000000eeffffffffff_U512,
249        ],
250        [
251            0xfffffffffffffffffffffffffffffffffffffffbffffff6d8989ffffffffffffffffffffffff7efffffffffffffffffffffffefdffffffffff110000000001_U512,
252            0xfffffffffffffffffffffffaffffffffffffffffff0000000000f00000000000000000_U512,
253            0x1000000000000000000000004fffffffffffffffc00ffff8689890fff_U512,
254            0xffffffec09fffda0afa81efafc00ffff868d481fff71de0d8100efffff110000000001_U512,
255        ],
256        [
257            0x767676767676000000000076000000000000005600000000000000000000_U512,
258            0x767676767676000000000076000000760000_U512,
259            0xffffffffffffffffffffffff_U512,
260            0x767676007676005600000076000000760000_U512,
261        ],
262        [
263            0x8200000000000000000000000000000000000000000000000000000000000000_U512,
264            0x8200000000000000fe000004000000ffff000000fffff700_U512,
265            0xfffffffffffffffe_U512,
266            0x5fffffbffffff01fd00000700000afffe000001ffffee00_U512,
267        ],
268        [
269            0xdac7fff9ffd9e1322626262626262600_U512,
270            0xd021262626262626_U512,
271            0x10d1a094108c5da55_U512,
272            0x6f386ccc73c11f62_U512,
273        ],
274        [
275            0x8000000000000001800000000000000080000000000000008000000000000000_U512,
276            0x800000000000000080000000000000008000000000000000_U512,
277            0x10000000000000001_U512,
278            0x7fffffffffffffff80000000000000000000000000000000_U512,
279        ],
280        [
281            0x00e8e8e8e2000100000009ea02000000000000ff3ffffff800000010002200000000000000000000000000000000000000000000000000000000000000000000_U512,
282            0x00e8e8e8e2000100000009ea02000000000000ff3ffffff800000010002280ff0000000000000000000000000000000000000000000000000000000000000000_U512,
283            0_U512,
284            0x00e8e8e8e2000100000009ea02000000000000ff3ffffff800000010002200000000000000000000000000000000000000000000000000000000000000000000_U512,
285        ],
286        [
287            0x000000c9700000000000000000023f00c00014ff0000000000000000223008050000000000000000000000000000000000000000000000000000000000000000_U512,
288            0x00000000c9700000000000000000023f00c00014ff002c0000000000002231080000000000000000000000000000000000000000000000000000000000000000_U512,
289            0xff_U512,
290            0x00000000c9700000000000000000023f00c00014fed42c00000000000021310d0000000000000000000000000000000000000000000000000000000000000000_U512,
291        ],
292        [
293            0x40000000fd000000db00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001_U512,
294            0x40000000fd000000db0000000000000000000040000000fd000000db000001_U512,
295            0xfffffffffffffffffffffffffffffffffffffeffffffffffffffff_U512,
296            0x3ffffffffd000000db000040000000fd0000011b000001fd000000db000002_U512,
297        ],
298        [
299            0x40000000fd000000db00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001_U512,
300            0x40000000fd000000db0000000000000000000040000000fd000000db0000d3_U512,
301            0xfffffffffffffffffffffffffffffffffffffeffffffffffffffff_U512,
302            0x3fffff2dfd000000db000040000000fd0000011b0000d3fd000000db0000d4_U512,
303        ],
304        [
305            0x001f000000000000000000000000000000200000000100000000000000000000_U512,
306            0x0000000000000000000100000000ffffffffffffffff0000000000002e000000_U512,
307            0x1effffffe10000001f_U512,
308            0xfffa6e20000591fffffa6e000000_U512,
309        ],
310        [
311            0x7effffff80000000000000000000000000020000440000000000000000000001_U512,
312            0x7effffff800000007effffff800000008000ff0000010000_U512,
313            0xfffffffffffffffe_U512,
314            0x7effffff800000007e0100ff43ff00010001fe0000020001_U512,
315        ],
316        [
317            0x5fd8fffffffffffffffffffffffffffffc090000ce700004d0c9ffffff000001_U512,
318            0x2ffffffffffffffffffffffffffffffffff000000030000_U512,
319            0x1ff2ffffffffffffff_U512,
320            0x2fffffffffffffffc28f300ce102704d0c8ffffff030001_U512,
321        ],
322        [
323            0x62d8fffffffffffffffffffffffffffffc18000000000000000000ca00000001_U512,
324            0x2ffffffffffffffffffffffffffffffffff200000000000_U512,
325            0x20f2ffffffffffffff_U512,
326            0x2fffffffffffffffc34d49fffffffffffff20ca00000001_U512,
327        ],
328        [
329            0x7effffff8000000000000000000000000000000000000000d900000000000001_U512,
330            0x7effffff8000000000000000000000000000000000008001_U512,
331            0xffffffffffffffff_U512,
332            0x7effffff7fffffffffffffffffff7fffd900000000008002_U512,
333        ],
334        [
335            0x0000000000000006400aff20ff00200004e7fd1eff08ffca0afd1eff08ffca0a_U512,
336            0x00000000000000210000000000000022_U512,
337            0x307c7456554d945ce57749fd52bfdb7f_U512,
338            0x1491254b5a0b84a32c_U512,
339        ],
340        [
341            0x7effffff8000000000000000000000000000000000150000d900000000000001_U512,
342            0x7effffff8000000000000000000000000000000000f9e101_U512,
343            0xffffffffffffffff_U512,
344            0x7effffff7fffffffffffffffff1b1effd900000000f9e102_U512,
345        ],
346        [
347            0xffffffff0100000000000000000000000000ffff0000ffffffff0100000000_U512,
348            0xffffffff010000000000000000000000ffff0000ffffff_U512,
349            0xffffffffffffffff_U512,
350            0xffffffff00ffffff0001fffe00010100fffe0100ffffff_U512,
351        ],
352        [
353            0xabfffff0000ffffffffff36363636363636363636d00500000000ffffffffffffe90000ff00000000000000000000ffff0000000000_U512,
354            0xabfffff0000ffffffffff36363636363636363636d00500000000ffffffffffffe9ff001f_U512,
355            0xffffffffffffffffffffffffffffffffff_U512,
356            0xabfffff0000ffffffffff36363636363537371636d00500000001000000fffeffe9ff001f_U512,
357        ],
358        [
359            0xff00ffffffffffffffcaffffffff0100_U512,
360            0x0100000000000000ff800000000000ff_U512,
361            0xff_U512,
362            0xffffffffff017f4afffffffe02ff_U512,
363        ],
364        [
365            0x9000ffffffffffffffcaffffffff0100_U512,
366            0x800000000000007fc000000000007f80_U512,
367            1_U512,
368            0x1000ffffffffff803fcafffffffe8180_U512,
369        ],
370        [
371            // Very special case for reciprocal_3by2().
372            170141183460488574554024512018559533058_U512,
373            170141183460488574554024512018559533057_U512,
374            1_U512,
375            1_U512,
376        ],
377        [
378            0x6e2d23924d38f0ab643864e9b2a328a54914f48533114fae3475168bfd74a61ae91e676b4a4f33a5b3b6cc189536ccb4afc46d02b061d6daaf0298c993376ab4_U512,
379            170141183460488574554024512018559533057_U512,
380            0xdc5a47249a56560d078334729ffb61da211f5d2ec622c22f88bc3b4ebae1abdac6b03621554ef71070bc1e0dc5c301bc_U512,
381            0x6dc100ea02272bdcf68a4a5b95f468f8_U512,
382        ]
383    ]);
384
385    macro_rules! test_cases {
386        ($n:ty, $d:ty) => {
387            for [numerator, divisor, quotient, remainder] in INTX_TESTS {
388                if numerator.bit_len() > <$n>::BITS || divisor.bit_len() > <$d>::BITS {
389                    continue;
390                }
391                let mut numerator = <$n>::from(numerator).into_limbs();
392                let mut divisor = <$d>::from(divisor).into_limbs();
393                let quotient = <$n>::from(quotient).into_limbs();
394                let remainder = <$d>::from(remainder).into_limbs();
395                div(&mut numerator, &mut divisor);
396                assert_eq!(numerator, quotient);
397                assert_eq!(divisor, remainder);
398            }
399        };
400    }
401
402    #[test]
403    fn test_div_8x8() {
404        use crate::aliases::U512;
405        test_cases!(U512, U512);
406    }
407
408    #[test]
409    fn test_div_6x6() {
410        use crate::aliases::U384;
411        test_cases!(U384, U384);
412    }
413
414    #[test]
415    fn test_div_4x4() {
416        use crate::aliases::U256;
417        test_cases!(U256, U256);
418    }
419
420    #[test]
421    fn test_div_5x4() {
422        use crate::aliases::{U256, U320};
423        test_cases!(U320, U256);
424    }
425
426    #[test]
427    fn test_div_8x4() {
428        use crate::aliases::{U256, U512};
429        test_cases!(U512, U256);
430    }
431
432    #[test]
433    #[allow(clippy::unreadable_literal)]
434    fn test_div_8by4_one() {
435        let mut numerator = [
436            0x9c2bcebfa9cca2c6_u64,
437            0x274e154bb5e24f7a_u64,
438            0xe1442d5d3842be2b_u64,
439            0xf18f5adfd420853f_u64,
440            0x04ed6127eba3b594_u64,
441            0xc5c179973cdb1663_u64,
442            0x7d7f67780bb268ff_u64,
443            0x0000000000000003_u64,
444            0x0000000000000000_u64,
445        ];
446        let mut divisor = [
447            0x0181880b078ab6a1_u64,
448            0x62d67f6b7b0bda6b_u64,
449            0x92b1840f9c792ded_u64,
450            0x0000000000000019_u64,
451        ];
452        let expected_quotient = [
453            0x9128464e61d6b5b3_u64,
454            0xd9eea4fc30c5ac6c_u64,
455            0x944a2d832d5a6a08_u64,
456            0x22f06722e8d883b1_u64,
457            0x0000000000000000_u64,
458        ];
459        let expected_remainder = [
460            0x1dfa5a7ea5191b33_u64,
461            0xb5aeb3f9ad5e294e_u64,
462            0xfc710038c13e4eed_u64,
463            0x000000000000000b_u64,
464        ];
465        div(&mut numerator, &mut divisor);
466        assert_eq!(numerator[..5], expected_quotient);
467        assert_eq!(divisor, expected_remainder);
468    }
469}