|  | /* SPDX-License-Identifier: GPL-2.0 */ | 
|  | #ifndef _LINUX_RECIPROCAL_DIV_H | 
|  | #define _LINUX_RECIPROCAL_DIV_H | 
|  |  | 
|  | #include <linux/types.h> | 
|  |  | 
|  | /* | 
|  | * This algorithm is based on the paper "Division by Invariant | 
|  | * Integers Using Multiplication" by Torbjörn Granlund and Peter | 
|  | * L. Montgomery. | 
|  | * | 
|  | * The assembler implementation from Agner Fog, which this code is | 
|  | * based on, can be found here: | 
|  | * http://www.agner.org/optimize/asmlib.zip | 
|  | * | 
|  | * This optimization for A/B is helpful if the divisor B is mostly | 
|  | * runtime invariant. The reciprocal of B is calculated in the | 
|  | * slow-path with reciprocal_value(). The fast-path can then just use | 
|  | * a much faster multiplication operation with a variable dividend A | 
|  | * to calculate the division A/B. | 
|  | */ | 
|  |  | 
|  | struct reciprocal_value { | 
|  | u32 m; | 
|  | u8 sh1, sh2; | 
|  | }; | 
|  |  | 
|  | /* "reciprocal_value" and "reciprocal_divide" together implement the basic | 
|  | * version of the algorithm described in Figure 4.1 of the paper. | 
|  | */ | 
|  | struct reciprocal_value reciprocal_value(u32 d); | 
|  |  | 
|  | static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) | 
|  | { | 
|  | u32 t = (u32)(((u64)a * R.m) >> 32); | 
|  | return (t + ((a - t) >> R.sh1)) >> R.sh2; | 
|  | } | 
|  |  | 
|  | struct reciprocal_value_adv { | 
|  | u32 m; | 
|  | u8 sh, exp; | 
|  | bool is_wide_m; | 
|  | }; | 
|  |  | 
|  | /* "reciprocal_value_adv" implements the advanced version of the algorithm | 
|  | * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose | 
|  | * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The | 
|  | * exception case could be easily handled before calling "reciprocal_value_adv". | 
|  | * | 
|  | * The advanced version requires more complex calculation to get the reciprocal | 
|  | * multiplier and other control variables, but then could reduce the required | 
|  | * emulation operations. | 
|  | * | 
|  | * It makes no sense to use this advanced version for host divide emulation, | 
|  | * those extra complexities for calculating multiplier etc could completely | 
|  | * waive our saving on emulation operations. | 
|  | * | 
|  | * However, it makes sense to use it for JIT divide code generation for which | 
|  | * we are willing to trade performance of JITed code with that of host. As shown | 
|  | * by the following pseudo code, the required emulation operations could go down | 
|  | * from 6 (the basic version) to 3 or 4. | 
|  | * | 
|  | * To use the result of "reciprocal_value_adv", suppose we want to calculate | 
|  | * n/d, the pseudo C code will be: | 
|  | * | 
|  | *   struct reciprocal_value_adv rvalue; | 
|  | *   u8 pre_shift, exp; | 
|  | * | 
|  | *   // handle exception case. | 
|  | *   if (d >= (1U << 31)) { | 
|  | *     result = n >= d; | 
|  | *     return; | 
|  | *   } | 
|  | * | 
|  | *   rvalue = reciprocal_value_adv(d, 32) | 
|  | *   exp = rvalue.exp; | 
|  | *   if (rvalue.is_wide_m && !(d & 1)) { | 
|  | *     // floor(log2(d & (2^32 -d))) | 
|  | *     pre_shift = fls(d & -d) - 1; | 
|  | *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); | 
|  | *   } else { | 
|  | *     pre_shift = 0; | 
|  | *   } | 
|  | * | 
|  | *   // code generation starts. | 
|  | *   if (imm == 1U << exp) { | 
|  | *     result = n >> exp; | 
|  | *   } else if (rvalue.is_wide_m) { | 
|  | *     // pre_shift must be zero when reached here. | 
|  | *     t = (n * rvalue.m) >> 32; | 
|  | *     result = n - t; | 
|  | *     result >>= 1; | 
|  | *     result += t; | 
|  | *     result >>= rvalue.sh - 1; | 
|  | *   } else { | 
|  | *     if (pre_shift) | 
|  | *       result = n >> pre_shift; | 
|  | *     result = ((u64)result * rvalue.m) >> 32; | 
|  | *     result >>= rvalue.sh; | 
|  | *   } | 
|  | */ | 
|  | struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); | 
|  |  | 
|  | #endif /* _LINUX_RECIPROCAL_DIV_H */ |