ruint/algorithms/div/
knuth.rs

1//! Knuth division
2
3use super::{reciprocal::reciprocal_2, small::div_3x2, DoubleWord};
4use crate::{
5    algorithms::{add::adc_n, mul::submul_nx1},
6    utils::{likely, unlikely},
7};
8
9/// ⚠️ In-place Knuth normalized long division with reciprocals.
10///
11/// # Conditions of Use
12///
13/// * The highest (most-significant) bit of the divisor MUST be set.
14/// * The `divisor` and `numerator` MUST each be at least two limbs.
15/// * `numerator` MUST contain at at least as many elements as `divisor`.
16///
17/// # Panics
18///
19/// May panic if any condition of use is violated.
20#[inline]
21#[allow(clippy::many_single_char_names)]
22pub fn div_nxm_normalized(numerator: &mut [u64], divisor: &[u64]) {
23    debug_assert!(divisor.len() >= 2);
24    debug_assert!(numerator.len() >= divisor.len());
25    debug_assert!(*divisor.last().unwrap() >= (1 << 63));
26
27    let n = divisor.len();
28    let m = numerator.len() - n - 1;
29
30    // Compute the divisor double limb and reciprocal
31    let d = u128::join(divisor[n - 1], divisor[n - 2]);
32    let v = reciprocal_2(d);
33
34    // Compute the quotient one limb at a time.
35    for j in (0..=m).rev() {
36        // Fetch the first three limbs of the numerator.
37        let n21 = u128::join(numerator[j + n], numerator[j + n - 1]);
38        let n0 = numerator[j + n - 2];
39        debug_assert!(n21 <= d);
40
41        // Overflow case
42        if unlikely(n21 == d) {
43            let q = u64::MAX;
44            let _carry = submul_nx1(&mut numerator[j..j + n], divisor, q);
45            numerator[j + n] = q;
46            continue;
47        }
48
49        // Calculate 3x2 approximate quotient word.
50        // By using 3x2 limbs we get a quotient that is very likely correct
51        // and at most one too large. In the process we also get the first
52        // two remainder limbs.
53        let (mut q, r) = div_3x2(n21, n0, d, v);
54
55        // Subtract the quotient times the divisor from the remainder.
56        // We already have the highest two limbs, so we can reduce the
57        // computation. We still need to carry propagate into these limbs.
58        let borrow = submul_nx1(&mut numerator[j..j + n - 2], &divisor[..n - 2], q);
59        let (r, borrow) = r.overflowing_sub(u128::from(borrow));
60        numerator[j + n - 2] = r.low();
61        numerator[j + n - 1] = r.high();
62
63        // If we have a carry then the quotient was one too large.
64        // We correct by decrementing the quotient and adding one divisor back.
65        if unlikely(borrow) {
66            q = q.wrapping_sub(1);
67            let carry = adc_n(&mut numerator[j..j + n], &divisor[..n], 0);
68            // Expect carry because we flip sign back to positive.
69            debug_assert_eq!(carry, 1);
70        }
71
72        // Store quotient in the unused bits of numerator
73        numerator[j + n] = q;
74    }
75}
76
77/// ⚠️ In-place Knuth long division with implicit normalization and reciprocals.
78///
79/// # Conditions of use:
80///
81/// * `divisor` MUST NOT be empty.
82/// * The highest (most-significant) limb of `divisor` MUST be non-zero.
83/// * `divisor` and `numerator` MUST contain at least three limbs.
84/// * `numerator` MUST contain at at least as many elements as `divisor`.
85///
86/// # Panics
87///
88/// May panic if any condition of use is violated.
89#[inline]
90#[allow(clippy::many_single_char_names)]
91pub fn div_nxm(numerator: &mut [u64], divisor: &mut [u64]) {
92    debug_assert!(divisor.len() >= 3);
93    debug_assert!(numerator.len() >= divisor.len());
94    debug_assert!(*divisor.last().unwrap() >= 1);
95
96    let n = divisor.len();
97    let m = numerator.len() - n;
98
99    // Compute normalized divisor double-word and reciprocal.
100    // TODO: Delegate to div_nxm_normalized if normalized.
101    let (d, shift) = {
102        let d = u128::join(divisor[n - 1], divisor[n - 2]);
103        let shift = d.high().leading_zeros();
104        (
105            if shift == 0 {
106                d
107            } else {
108                (d << shift) | u128::from(divisor[n - 3] >> (64 - shift))
109            },
110            shift,
111        )
112    };
113    debug_assert!(d >= 1 << 127);
114    let v = reciprocal_2(d);
115
116    // Compute the quotient one limb at a time.
117    let mut q_high = 0;
118    for j in (0..=m).rev() {
119        // Fetch the first three limbs of the shifted numerator starting at `j + n`.
120        let (n21, n0) = {
121            let n2 = numerator.get(j + n).copied().unwrap_or_default();
122            let n21 = u128::join(n2, numerator[j + n - 1]);
123            let n0 = numerator[j + n - 2];
124            if shift == 0 {
125                (n21, n0)
126            } else {
127                (
128                    (n21 << shift) | u128::from(n0 >> (64 - shift)),
129                    (n0 << shift) | (numerator[j + n - 3] >> (64 - shift)),
130                )
131            }
132        };
133        debug_assert!(n21 <= d);
134
135        // Compute the quotient
136        let q = if likely(n21 < d) {
137            // Calculate 3x2 approximate quotient word.
138            // By using 3x2 limbs we get a quotient that is very likely correct
139            // and at most one too large. In the process we also get the first
140            // two remainder limbs.
141            let (mut q, r) = div_3x2(n21, n0, d, v);
142
143            if q != 0 {
144                // Subtract the quotient times the divisor from the remainder.
145                // We already have the highest 128 bit, so we can reduce the
146                // computation. We still need to carry propagate into these limbs.
147                let borrow = if shift == 0 {
148                    let borrow = submul_nx1(&mut numerator[j..j + n - 2], &divisor[..n - 2], q);
149                    let (r, borrow) = r.overflowing_sub(u128::from(borrow));
150                    numerator[j + n - 2] = r.low();
151                    numerator[j + n - 1] = r.high();
152                    borrow
153                } else {
154                    // OPT: Can we re-use `r` here somehow? The problem is we can not just
155                    // shift the `r` or `borrow` because we need to accurately reproduce
156                    // the remainder and carry in the middle of a limb.
157                    let borrow = submul_nx1(&mut numerator[j..j + n], divisor, q);
158                    let n2 = numerator.get(j + n).copied().unwrap_or_default();
159                    borrow != n2
160                };
161
162                // If we have a carry then the quotient was one too large.
163                // We correct by decrementing the quotient and adding one divisor back.
164                if unlikely(borrow) {
165                    q = q.wrapping_sub(1);
166                    let carry = adc_n(&mut numerator[j..j + n], &divisor[..n], 0);
167                    // Expect carry because we flip sign back to positive.
168                    debug_assert_eq!(carry, 1);
169                }
170            }
171            q
172        } else {
173            // Overflow case
174            let q = u64::MAX;
175            let _carry = submul_nx1(&mut numerator[j..j + n], divisor, q);
176            q
177        };
178
179        // Store the quotient in the processed bits of numerator plus `q_high`.
180        if j + n < numerator.len() {
181            numerator[j + n] = q;
182        } else {
183            q_high = q;
184        }
185    }
186
187    // Copy remainder to `divisor` and `quotient` to numerator.
188    divisor.copy_from_slice(&numerator[..n]);
189    numerator.copy_within(n.., 0);
190    numerator[m] = q_high;
191    numerator[m + 1..].fill(0);
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use crate::algorithms::{addmul, cmp, sbb_n};
198    use core::cmp::Ordering;
199    use proptest::{
200        collection, num, proptest,
201        strategy::{Just, Strategy},
202    };
203
204    #[allow(unused_imports)]
205    use alloc::vec::Vec;
206
207    // Basic test without exceptional paths
208    #[test]
209    fn test_divrem_8by4() {
210        let mut numerator = [
211            0x3000000000000000,
212            0xd4e15e75fd4e6516,
213            0x593a70aa5daf127b,
214            0xff0a216ae9c215f1,
215            0xa78c7ad6fea10429,
216            0x18276b093f5d1dac,
217            0xfe2e0bccb9e6d8b3,
218            0x1bebfb3bc05d9347,
219        ];
220        let divisor = [
221            0x800000000000000,
222            0x580c0c40583c55b5,
223            0x6b16b3fb5bd85ed3,
224            0xcc958c207ce3c96f,
225        ];
226        let expected_quotient = [
227            0x9128464e61d6b5b3_u64,
228            0xd9eea4fc30c5ac6c_u64,
229            0x944a2d832d5a6a08_u64,
230            0x22f06722e8d883b1_u64,
231        ];
232        let expected_remainder = [
233            0x9800000000000000,
234            0x70efd2d3f528c8d9,
235            0x6dad759fcd6af14a,
236            0x5fe38801c609f277,
237        ];
238        div_nxm_normalized(&mut numerator, &divisor);
239        let (remainder, quotient) = numerator.split_at(divisor.len());
240        assert_eq!(remainder, expected_remainder);
241        assert_eq!(quotient, expected_quotient);
242    }
243
244    // Test case that forces the `unlikely(borrow)` branch.
245    #[test]
246    fn test_div_rollback() {
247        let mut numerator = [
248            0x1656178c14142000,
249            0x821415dfe9e81612,
250            0x1616561616161616,
251            0x96000016820016,
252        ];
253        let divisor = [0x1415dfe9e8161414, 0x1656161616161682, 0x9600001682001616];
254        let expected_quotient = [0xffffffffffffff];
255        let expected_remainder = [0x166bf775fc2a3414, 0x1656161616161680, 0x9600001682001616];
256        div_nxm_normalized(&mut numerator, &divisor);
257        let (remainder, quotient) = numerator.split_at(divisor.len());
258        assert_eq!(remainder, expected_remainder);
259        assert_eq!(quotient, expected_quotient);
260    }
261
262    // Test case that forces the `unlikely(borrow)` branch.
263    #[test]
264    fn test_div_rollback_2() {
265        let mut numerator = [
266            0x100100000,
267            0x81000,
268            0x1000000000000000,
269            0x0,
270            0x0,
271            0xfffff00000000000,
272            0xffffffffffffffff,
273            0xdfffffffffffff,
274        ];
275        let divisor = [
276            0xfffffffffff00000,
277            0xffffffffffffffff,
278            0xfffffffffffff3ff,
279            0xffffffffffffffff,
280            0xdfffffffffffffff,
281        ];
282        let expected_quotient = [0xffffedb6db6db6e9, 0xffffffffffffffff, 0xffffffffffffff];
283        let expected_remainder = [
284            0xdb6db6dc6ea00000,
285            0x80ffe,
286            0xf2492492492ec00,
287            0x1000,
288            0x2000000000000000,
289        ];
290        div_nxm_normalized(&mut numerator, &divisor);
291        let (remainder, quotient) = numerator.split_at(divisor.len());
292        assert_eq!(quotient, expected_quotient);
293        assert_eq!(remainder, expected_remainder);
294    }
295
296    #[test]
297    fn test_div_overflow() {
298        let mut numerator = [0xb200000000000002, 0x1, 0x0, 0xfdffffff00000000];
299        let divisor = [0x10002, 0x0, 0xfdffffff00000000];
300        let expected_quotient = [0xffffffffffffffff];
301        let expected_remainder = [0xb200000000010004, 0xfffffffffffeffff, 0xfdfffffeffffffff];
302        div_nxm_normalized(&mut numerator, &divisor);
303        let (remainder, quotient) = numerator.split_at(divisor.len());
304        assert_eq!(quotient, expected_quotient);
305        assert_eq!(remainder, expected_remainder);
306    }
307
308    // Proptest without forced exceptional paths
309    #[test]
310    fn test_div_nxm_normalized() {
311        let quotient = collection::vec(num::u64::ANY, 1..10);
312        let divisor = collection::vec(num::u64::ANY, 2..10).prop_map(|mut vec| {
313            *vec.last_mut().unwrap() |= 1 << 63;
314            vec
315        });
316        let dr = divisor.prop_flat_map(|divisor| {
317            let d = divisor.clone();
318            let remainder =
319                collection::vec(num::u64::ANY, divisor.len()).prop_map(move |mut vec| {
320                    if cmp(&vec, &d) != Ordering::Less {
321                        let carry = sbb_n(&mut vec, &d, 0);
322                        assert_eq!(carry, 0);
323                    }
324                    vec
325                });
326            (Just(divisor), remainder)
327        });
328        proptest!(|(quotient in quotient, (divisor, remainder) in dr)| {
329            let mut numerator: Vec<u64> = vec![0; divisor.len() + quotient.len()];
330            numerator[..remainder.len()].copy_from_slice(&remainder);
331            addmul(&mut numerator, quotient.as_slice(), divisor.as_slice());
332
333            div_nxm_normalized(numerator.as_mut_slice(), divisor.as_slice());
334            let (r, q) = numerator.split_at(divisor.len());
335            assert_eq!(r, remainder);
336            assert_eq!(q, quotient);
337        });
338    }
339
340    // Basic test without exceptional paths (with shift == 0)
341    #[test]
342    fn test_div_nxm_8by4_noshift() {
343        let mut numerator = [
344            0x3000000000000000,
345            0xd4e15e75fd4e6516,
346            0x593a70aa5daf127b,
347            0xff0a216ae9c215f1,
348            0xa78c7ad6fea10429,
349            0x18276b093f5d1dac,
350            0xfe2e0bccb9e6d8b3,
351            0x1bebfb3bc05d9347,
352        ];
353        let mut divisor = [
354            0x800000000000000,
355            0x580c0c40583c55b5,
356            0x6b16b3fb5bd85ed3,
357            0xcc958c207ce3c96f,
358        ];
359        let quotient = [
360            0x9128464e61d6b5b3,
361            0xd9eea4fc30c5ac6c,
362            0x944a2d832d5a6a08,
363            0x22f06722e8d883b1,
364        ];
365        let remainder = [
366            0x9800000000000000,
367            0x70efd2d3f528c8d9,
368            0x6dad759fcd6af14a,
369            0x5fe38801c609f277,
370        ];
371        div_nxm(&mut numerator, &mut divisor);
372        assert_eq!(&numerator[..quotient.len()], quotient);
373        assert_eq!(divisor, remainder);
374    }
375
376    // Basic test without exceptional paths (with shift > 0)
377    #[test]
378    fn test_div_nxm_8by4_shift() {
379        let mut numerator = [
380            0xc59c28364a491d22,
381            0x1ab240e2a2a91050,
382            0xe497baaf4e4b16cb,
383            0xd21643d231c590d6,
384            0xda918cd26803c7f1,
385            0xb445074f770b5261,
386            0x37aff2aa32059516,
387            0x3cf254c1,
388        ];
389        let mut divisor = [
390            0xc91e935375a97723,
391            0x86a9ded3044ec12b,
392            0xc7d2c4d3b53bff51,
393            0x6ef0530d,
394        ];
395        let quotient = [
396            0x456b1581ef1a759a,
397            0x88702c90bbe2ef3c,
398            0xff8492ee85dec642,
399            0x8ca39da4ca785f36,
400        ];
401        let remainder = [
402            0x82c3522848567314,
403            0xeaba6edb18db568e,
404            0x18f16cfde22dcefe,
405            0x11296685,
406        ];
407        div_nxm(&mut numerator, &mut divisor);
408        assert_eq!(&numerator[..quotient.len()], quotient);
409        assert_eq!(divisor, remainder);
410    }
411
412    // Basic test without exceptional paths (with q_high > 0)
413    #[test]
414    fn test_div_nxm_8by4_q_high() {
415        let mut numerator = [
416            0x39ea76324ed952cc,
417            0x89b7a0d30e2df1be,
418            0x7011596e8b3f301f,
419            0x11930a89eca68640,
420            0x36a34eca4f73d0e4,
421            0x86d53c52b1108c90,
422            0x6093338b7b667e03,
423        ];
424        let mut divisor = [
425            0x439702d44a8f62a4,
426            0x8dfa6ea7fc41f689,
427            0xc79723ff4dd060e0,
428            0x7d13e204,
429        ];
430        let quotient = [
431            0x181cecbb64efa48b,
432            0x1e97056793a15125,
433            0xe8145d63cd312d02,
434            0xc5a9aced,
435        ];
436        let remainder = [
437            0x682e10f8d0b1b3c0,
438            0xbf46c8b0e5ac8a62,
439            0x5abe292d53acf085,
440            0x699fc911,
441        ];
442        div_nxm(&mut numerator, &mut divisor);
443        assert_eq!(&numerator[..quotient.len()], quotient);
444        assert_eq!(divisor, remainder);
445    }
446
447    // Basic test with numerator and divisor the same size.
448    #[test]
449    fn test_div_nxm_4by4() {
450        let mut numerator = [
451            0x55a8f128f187ecee,
452            0xe059a1f3fe52e559,
453            0x570ab3b2aac5c5d9,
454            0xf7ea0c73b80ddca1,
455        ];
456        let mut divisor = [
457            0x6c8cd670adcae7da,
458            0x458d6428c7fd36d3,
459            0x4a73ad64cc703a1d,
460            0x33bf790f92ed51fe,
461        ];
462        let quotient = [0x4];
463        let remainder = [
464            0xa37597663a5c4d86,
465            0xca241150de5e0a0b,
466            0x2d3bfe1f7904dd64,
467            0x28ec28356c5894a8,
468        ];
469        div_nxm(&mut numerator, &mut divisor);
470        assert_eq!(&numerator[..quotient.len()], quotient);
471        assert_eq!(divisor, remainder);
472    }
473
474    #[test]
475    fn test_div_nxm_4by3() {
476        let mut numerator = [
477            0x8000000000000000,
478            0x8000000000000000,
479            0x8000000000000000,
480            0x8000000000000001,
481        ];
482        let mut divisor = [0x8000000000000000, 0x8000000000000000, 0x8000000000000000];
483        let quotient = [0x1, 0x1];
484        let remainder = [0x0, 0x8000000000000000, 0x7fffffffffffffff];
485        div_nxm(&mut numerator, &mut divisor);
486        assert_eq!(&numerator[..quotient.len()], quotient);
487        assert_eq!(divisor, remainder);
488    }
489
490    // Proptest without forced exceptional paths
491    #[test]
492    fn test_div_nxm() {
493        let quotient = collection::vec(num::u64::ANY, 1..10);
494        let divisor = collection::vec(num::u64::ANY, 3..10);
495        let dr = divisor.prop_flat_map(|divisor| {
496            let d = divisor.clone();
497            let remainder =
498                collection::vec(num::u64::ANY, divisor.len()).prop_map(move |mut vec| {
499                    *vec.last_mut().unwrap() %= d.last().unwrap();
500                    vec
501                });
502            (Just(divisor), remainder)
503        });
504        proptest!(|(quotient in quotient, (mut divisor, remainder) in dr)| {
505            let mut numerator: Vec<u64> = vec![0; divisor.len() + quotient.len()];
506            numerator[..remainder.len()].copy_from_slice(&remainder);
507            addmul(&mut numerator, quotient.as_slice(), divisor.as_slice());
508
509            div_nxm(numerator.as_mut_slice(), divisor.as_mut_slice());
510            assert_eq!(&numerator[..quotient.len()], quotient);
511            assert_eq!(divisor, remainder);
512            assert!(numerator[quotient.len()..].iter().all(|&x| x == 0));
513        });
514    }
515}