Coverage Report

Created: 2025-06-23 13:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/cargo-vendor-dir/libm-0.2.15/src/math/generic/scalbn.rs
Line
Count
Source
1
use crate::support::{CastFrom, CastInto, Float, IntTy, MinInt};
2
3
/// Scale the exponent.
4
///
5
/// From N3220:
6
///
7
/// > The scalbn and scalbln functions compute `x * b^n`, where `b = FLT_RADIX` if the return type
8
/// > of the function is a standard floating type, or `b = 10` if the return type of the function
9
/// > is a decimal floating type. A range error occurs for some finite x, depending on n.
10
/// >
11
/// > [...]
12
/// >
13
/// > * `scalbn(±0, n)` returns `±0`.
14
/// > * `scalbn(x, 0)` returns `x`.
15
/// > * `scalbn(±∞, n)` returns `±∞`.
16
/// >
17
/// > If the calculation does not overflow or underflow, the returned value is exact and
18
/// > independent of the current rounding direction mode.
19
#[inline]
20
0
pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
21
0
where
22
0
    u32: CastInto<F::Int>,
23
0
    F::Int: CastFrom<i32>,
24
0
    F::Int: CastFrom<u32>,
25
0
{
26
0
    let zero = IntTy::<F>::ZERO;
27
0
28
0
    // Bits including the implicit bit
29
0
    let sig_total_bits = F::SIG_BITS + 1;
30
0
31
0
    // Maximum and minimum values when biased
32
0
    let exp_max = F::EXP_MAX;
33
0
    let exp_min = F::EXP_MIN;
34
0
35
0
    // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
36
0
    let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);
37
0
38
0
    // 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
39
0
    let f_exp_min = F::from_parts(false, 1, zero);
40
0
41
0
    // 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
42
0
    let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
43
0
44
0
    /*
45
0
     * The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases
46
0
     * where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with
47
0
     * `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of
48
0
     * the final scale operation by prescaling by the max/min power representable by `F`.
49
0
     */
50
0
51
0
    if n > exp_max {
52
        // Worse case positive `n`: `x`  is the minimum subnormal value, the result is `F::MAX`.
53
        // This can be reached by three scaling multiplications (two here and one final).
54
0
        debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3);
55
56
0
        x *= f_exp_max;
57
0
        n -= exp_max;
58
0
        if n > exp_max {
59
0
            x *= f_exp_max;
60
0
            n -= exp_max;
61
0
            if n > exp_max {
62
0
                n = exp_max;
63
0
            }
64
0
        }
65
0
    } else if n < exp_min {
66
        // When scaling toward 0, the prescaling is limited to a value that does not allow `x` to
67
        // go subnormal. This avoids double rounding.
68
0
        if F::BITS > 16 {
69
            // `mul` s.t. `!(x * mul).is_subnormal() ∀ x`
70
0
            let mul = f_exp_min * f_pow_subnorm;
71
0
            let add = -exp_min - sig_total_bits as i32;
72
0
73
0
            // Worse case negative `n`: `x`  is the maximum positive value, the result is `F::MIN`.
74
0
            // This must be reachable by three scaling multiplications (two here and one final).
75
0
            debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min);
76
77
0
            x *= mul;
78
0
            n += add;
79
0
80
0
            if n < exp_min {
81
0
                x *= mul;
82
0
                n += add;
83
0
84
0
                if n < exp_min {
85
0
                    n = exp_min;
86
0
                }
87
0
            }
88
        } else {
89
            // `f16` is unique compared to other float types in that the difference between the
90
            // minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is
91
            // small, only three. The above method depend on decrementing `n` by `add` two times;
92
            // for other float types this works out because `add` is a substantial fraction of
93
            // the exponent range. For `f16`, however, 3 is relatively small compared to the
94
            // exponent range (which is 39), so that requires ~10 prescale rounds rather than two.
95
            //
96
            // Work aroudn this by using a different algorithm that calculates the prescale
97
            // dynamically based on the maximum possible value. This adds more operations per round
98
            // since it needs to construct the scale, but works better in the general case.
99
0
            let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
100
0
            let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
101
0
102
0
            x *= mul;
103
0
            n += add;
104
0
105
0
            if n < exp_min {
106
0
                let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
107
0
                let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
108
0
109
0
                x *= mul;
110
0
                n += add;
111
0
112
0
                if n < exp_min {
113
0
                    n = exp_min;
114
0
                }
115
0
            }
116
        }
117
0
    }
118
119
0
    let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero);
120
0
    x * scale
121
0
}