Quantcast
Channel: Intel® C++ Compiler
Viewing all articles
Browse latest Browse all 1616

Erroneous code optimization found on 64-bit ICL 13.1.x

$
0
0

This compiler issue could be reproduced using this code snippet

/* main.c */
#include <stdio.h>

/* Define LEN to 1 could give the correct result, 2 or larger will give the wrong result without macro "ICL_WORKAROUND" defined in LreciprtL.c */
#define LEN 2
/* Function to calculate x^(-0.5) */
int
LreciprtL(int x);

static int
bench_reciprt(void)
{
    int Lsrc[LEN];

    int i;

    for (i = 0; i < LEN; i++) Lsrc[i] = (int) (0.760045 * 2147483648.0 + 0.5);


    for (i = 0; i < LEN; i++) printf("in[%d]: %lf\n", i, (double) Lsrc[i] / 2147483648.0);
    printf("-------------------\n");
    for (i = 0; i < LEN; i++) printf("out[%d]: %lf\n", i, (double) LreciprtL(Lsrc[i]) / 2147483648.0);

    return 0;
}


int main()
{
    return bench_reciprt();
}

 

/* LreciprtL.c */
#include "int_math.h"
/* uncomment this to enable the workaround, so the function could give the right answer, e.g. 0.760045^(-0.5) / 2 = 0.573522 (/2 is for down scale to smaller than 1.0) */
//#define ICL_WORKAROUND
static const int L05 = 1073741824;
/* Calculate x^(-0.5) for 0.25 < x < 1, result in 2Q30 (down scaled by 2)*/
int
LreciprtL(int x)
{
    const int PLUSONE2Q30 = L05;

    const int  a0 = (const int) (-3.4982 / 4 * 2147483648.0 + 0.5);
    const short  a1 = (const short) ( 1.8077 / 4 * 32768.0 + 0.5);
    const int iy0 = (const int) ( 2.7260 / 4 * 2147483648.0 + 0.5);
#ifdef ICL_WORKAROUND
    int i;
#endif
    int a  = LmacLLS(a0, x, a1);
    int iy = LmacLLS(iy0, x, S_L(a));

    iy = LshlLU(iy, 1);


#ifdef ICL_WORKAROUND
    for (i = 0; i < 3; i++)
    {
        a =  LmpyLL(x, iy) ;
        a =  LsubLL(PLUSONE2Q30, LshlLU(LmpyLL(a, iy), 1)) ;
        iy = LmacLLL(iy, a, iy) ;
    }
#else
    a =  LmpyLL(x, iy) ;
    a =  LsubLL(PLUSONE2Q30, LshlLU(LmpyLL(a, iy), 1)) ;
    iy = LmacLLL(iy, a, iy) ;



    a =  LmpyLL(x, iy) ;
    a =  LsubLL(PLUSONE2Q30, LshlLU(LmpyLL(a, iy), 1)) ;
    iy = LmacLLL(iy, a, iy) ;



    a =  LmpyLL(x, iy) ;
    a =  LsubLL(PLUSONE2Q30, LshlLU(LmpyLL(a, iy), 1)) ;
    iy = LmacLLL(iy, a, iy) ;
#endif

    return iy ;
}

 

 

/* int_math.h */
/* Define basic math operations */
#define _asl32(a, s) ((a) * (1 << (unsigned)(s)))

static __forceinline int L_A (int a)
{
    return a + a;
}

static __forceinline short S_L (int a)
{
    return (short) (a >> 16);
}

static __forceinline int LshlLU (int a, unsigned s)
{
    return (int) _asl32(a, s);
}
static __forceinline int LsubLL(int a, int b) {
    return a - b; }

static __forceinline int AmpyLL (int a, int c)
{
    return (int)(((long long)a * c) >> 32);
}

static __forceinline int LmpyLL (int a, int c)
{
    return L_A(AmpyLL(a, c));
}


static __forceinline int AmpyLS (int a, short c)
{
    return (int)(((long long)a * c) >> 16);
}

static __forceinline int LmacLLS (int a, int x, short y)
{
    return a + L_A(AmpyLS(x, y));
}

static  __forceinline int
LmacLLL(int a, int x, int y) {
    return a + LmpyLL(x, y); }

 

The problem is found on icl 13.1.x with MSVS 2010 or 2012, on windows 7 64 bit machine. The compiler is set to build intel64 targets, and Multi-File optimization is on (/Qipo).

Steps to reproduce the issue

unzip the attached project

open ConsoleApplication1.sln with VS2012, build release flavor.

run x64\Release>ConsoleApplication1.exe

the result would be:

in[0]: 0.760045

in[1]: 0.760045

-------------------

out[0]: -0.319917

out[1]: -0.319917

definitely wrong for x^(-0.5) which should be positive.

 

Ways to mitigate the issue:

1. define ICL_WORKAROUND in LreciprtL.c

2. set LEN to 1 in main.c

3. Turn off global optimization using IDE settings (set interprocedural optimization to Single file /Qip)

4. use #pragma optimize("", off) and #pragma optimize("", on) to turn off optimization around function LreciprtL() in LreciprtL.c

Either one of the 4 ways above could give the right answer:

in[0]: 0.760045

in[1]: 0.760045

-------------------

out[0]: 0.573522

out[1]: 0.573522

 

AdjuntoTamaño
DescargarConsoleApplication1_0.zip666.46 KB

Viewing all articles
Browse latest Browse all 1616

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>