#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <time.h>

#include "papi.h"
#include "timer.h"

// compil (sur aqutaine): gcc -I/usr/local/include -O0 PAPI-powInt.c /usr/local/lib/libpapi.a
// compil : gcc -O0 -I. PAPI-powDouble.c libpapi.a -o papi_exp -Wall


#define NB_EVENT 3  // max:3

/* Get actual CPU time in seconds */
float gettime() {
  return((float)PAPI_get_virt_usec()/1000000);
}


//#define mult(a,b) ((a*b) % 7000577)
#define mult(a,b) (a*b)

int NBR = 50000000;
int MAX = 67108864;
//int MIN = 16777216;
int MIN = 1;
//int MAX = 4194304;
int  *ALEA;


int randomDe(int max){
    int de;
    do {
        de = random();
        
    } while(de >= INT_MAX - (INT_MAX % max));
    
    return de % max;
}

void randomize(int *T,int max, int min, int n) {
    int i;
    for(i=0;i<n;i++){
        T[i] = randomDe(max-min)+min;
    }
}

/* exponentiation rapide classique, à tester avec les timings et les compteurs de la lib PAPI */
double binPow(double x,int n){
    double r;
    r = 1;
    while (n > 0) {
        if (n & 1) {
            r = mult(r,x);
        }
        n >>= 1;
        x = mult(x,x);
    }
    return r;
}

/* exponentiation rapide classique, la valeur retourner est le nombre d'iterations */
long int binPowCpt(long int x,int n){
    long int r;
    r = 1;
    int cpt =0 ;
    while (n > 0) {
        cpt++;
        if (n & 1) {
            r = mult(r,x);
        }
        n >>= 1;
        x = mult(x,x);
    }
    return cpt;
}

/* exponentiation rapide classique, retourne le nombre de multiplications */
long int binPowMult(long int x,int n){
    long int r;
    r = 1;
    int cpt =0 ;
    while (n > 0) {
        if (n & 1) {
            cpt++;
            r = mult(r,x);
        }
        n >>= 1;
        cpt++;
        x = mult(x,x);
    }
    return cpt;
}

/* exponentiation rapide qui guide le predicteur, à tester avec les timings et les compteurs de la lib PAPI */ 
double superSkewPow(double x,int n){
    double r,t;
    r = 1;
    while (n > 0) {
        t = mult(x,x);
        if (n & 3) {
            if (n & 1) {
                r = mult(r,x);
            }
            if (n & 2) {
                r = mult(r,t);
            }
        }
        
        n >>= 2;
        x = mult(t,t);
    }
    return r;
}

/* exponentiation rapide qui guide le predicteur, retourne le nombre d'iterations */
long int superSkewPowCpt(long int x,int n){
    long int r,t;
    r = 1;
    int cpt =0 ;
    while (n > 0) {
        cpt++;
        t = mult(x,x);
        if (n & 3) {
            if (n & 1) {
                r = mult(r,x);
            }
            if (n & 2) {
                r = mult(r,t);
            }
        }
        
        n >>= 2;
        x = mult(t,t);
    }
    return cpt;
}

/* exponentiation rapide qui guide le predicteur, retourne le nombre de multiplications */
long int superSkewPowMult(long int x,int n){
    long int r,t;
    r = 1;
    int cpt =0 ;
    while (n > 0) {
        cpt++;
        t = mult(x,x);
        if (n & 3) {
            if (n & 1) {
                cpt++;
                r = mult(r,x);
            }
            if (n & 2) {
                cpt++;
                r = mult(r,t);
            }
        }
        
        n >>= 2;
        cpt++;
        x = mult(t,t);
    }
    return cpt;
}


/* exponentiation rapide classique avec deroulement de la boucle principale, à tester avec les timings et les compteurs de la lib PAPI */
double skewPow(double x,int n){
    double r,t;
    r = 1;
    while (n > 0) {
        t = mult(x,x);
        if (n & 1) {
            r = mult(r,x);
        }
        if (n & 2) {
            r = mult(r,t);
        }
        n >>= 2;
        x = mult(t,t);
    }
    return r;
}

/* exponentiation rapide classique avec deroulement de la boucle principale, retourne le nombre d'iterations */
long int skewPowCpt(long int x,int n){
    long int r,t;
    r = 1;
    int cpt =0 ;
    while (n > 0) {
        cpt++;
        t = mult(x,x);
        if (n & 1) {
            r = mult(r,x);
        }
        if (n & 2) {
            r = mult(r,t);
        }
        n >>= 2;
        x = mult(t,t);
    }
    return cpt;
}

/* exponentiation rapide classique avec deroulement de la boucle principale, retourne le nombre de multiplications */
long int skewPowMult(long int x,int n){
    long int r,t;
    r = 1;
    int cpt =0 ;
    while (n > 0) {
        cpt++;
        t = mult(x,x);
        if (n & 1) {
            cpt++;
            r = mult(r,x);
        }
        if (n & 2) {
            cpt++;
            r = mult(r,t);
        }
        n >>= 2;

        cpt++;
        x = mult(t,t);
    }
    return cpt;
}

void test(int nbr){
     init_timer;

     printf( "PAPI Version             : %d.%d.%d.%d\n",
        PAPI_VERSION_MAJOR( PAPI_VERSION ),
        PAPI_VERSION_MINOR( PAPI_VERSION ),
        PAPI_VERSION_REVISION( PAPI_VERSION ),
        PAPI_VERSION_INCREMENT( PAPI_VERSION ) );


    /////////////////////////////////////////////////////////////////////////////
    double timing_;
    int events[NB_EVENT] = { PAPI_BR_CN, PAPI_BR_MSP, PAPI_L1_DCM }, ret;
    long_long values[NB_EVENT];
    /////////////////////////////////////////////////////////////////////////////
    if (PAPI_num_counters() < 2) {
     fprintf(stderr, "No hardware counters here, or PAPI not supported.\n");
     exit(1);
    }
    /////////////////////////////////////////////////////////////////////////////

    int i;
    long int nbloops,nbmult;
    double x1;
    
    randomize(ALEA,MAX,MIN,nbr);
    printf("fini randomize\n");
    
    /////////////////////////////////////////////////////////////////////////////
    first_step_timer;
    x1 = 0;
    for(i=0;i<nbr;i++)
        x1 += binPow(2,ALEA[i]);
    second_step_timer;
    timing_ = tim1;

    printf("------ binPow -------- %lf\n", x1);
    printf("time = %lf sec.\n", timing_);
    /////////////////////////////////////////////////////////////////////////////
    
    if ((ret = PAPI_start_counters(events, NB_EVENT)) != PAPI_OK) {
     fprintf(stderr, "PAPI failed to start counters: %s\n", PAPI_strerror(ret));
     exit(1);
    }
    /////////////////////////////////////////////////////////////////////////////

    nbloops = 0;
    for(i=0;i<nbr;i++)
        nbloops += binPowCpt(2,ALEA[i]);

    /////////////////////////////////////////////////////////////////////////////
    if ((ret = PAPI_stop_counters(values, NB_EVENT)) != PAPI_OK) {
    fprintf(stderr, "PAPI failed to read counters: %s\n", PAPI_strerror(ret));
    exit(1);
    }
    
    printf("Conditional branch instructions (without loops):  %lld\n", values[0]-nbr-nbloops);
    printf("Conditional branch instructions mispredicted: %lld\n", values[1]);
    printf("Ratio: %f\n",(float)((float)(values[1])/(values[0]-nbr-nbloops)));
    printf("L1 data cache misses is %lld\n", values[2]);
    printf("nbloops %ld\n", nbloops);

    /////////////////////////////////////////////////////////////////////////////

    nbmult = 0;
    for(i=0;i<nbr;i++)
        nbmult+= binPowMult(2,ALEA[i]);

    
    printf("nbmult %ld\n", nbmult);
    /////////////////////////////////////////////////////////////////////////////







    /////////////////////////////////////////////////////////////////////////////
    first_step_timer;
    x1 = 0;
    for(i=0;i<nbr;i++)
        x1 += skewPow(2,ALEA[i]);
    second_step_timer;
    timing_ = tim1;

    printf("------ skewPow -------- %lf\n", x1);
    printf("time = %lf sec.\n", timing_);
    /////////////////////////////////////////////////////////////////////////////
        
    if ((ret = PAPI_start_counters(events, NB_EVENT)) != PAPI_OK) {
     fprintf(stderr, "PAPI failed to start counters: %s\n", PAPI_strerror(ret));
     exit(1);
    }
    /////////////////////////////////////////////////////////////////////////////


    nbloops = 0;
    for(i=0;i<nbr;i++)
        nbloops += skewPowCpt(2,ALEA[i]);

    /////////////////////////////////////////////////////////////////////////////
    if ((ret = PAPI_stop_counters(values, NB_EVENT)) != PAPI_OK) {
    fprintf(stderr, "PAPI failed to read counters: %s\n", PAPI_strerror(ret));
    exit(1);
    }
    
    printf("Conditional branch instructions (without loops):  %lld\n", values[0]-nbr-nbloops);
    printf("Conditional branch instructions mispredicted: %lld\n", values[1]);
    printf("Ratio: %f\n",(float)((float)(values[1])/(values[0]-nbr-nbloops)));
    printf("L1 data cache misses is %lld\n", values[2]);
    printf("nbloops %ld\n", nbloops);

    /////////////////////////////////////////////////////////////////////////////

    nbmult = 0;
    for(i=0;i<nbr;i++)
        nbmult+= skewPowMult(2,ALEA[i]);

    
    printf("nbmult %ld\n", nbmult);
    /////////////////////////////////////////////////////////////////////////////


    /////////////////////////////////////////////////////////////////////////////
    first_step_timer;
    x1 = 0;
    for(i=0;i<nbr;i++)
        x1 += superSkewPow(2,ALEA[i]);
    second_step_timer;
    timing_ = tim1;

    printf("------ superSkewPow -------- %lf\n", x1);
    printf("time = %lf sec.\n", timing_);
    /////////////////////////////////////////////////////////////////////////////
    
    
    if ((ret = PAPI_start_counters(events, NB_EVENT)) != PAPI_OK) {
     fprintf(stderr, "PAPI failed to start counters: %s\n", PAPI_strerror(ret));
     exit(1);
    }
    /////////////////////////////////////////////////////////////////////////////


    nbloops = 0;
    for(i=0;i<nbr;i++)
        nbloops += superSkewPowCpt(2,ALEA[i]);

    /////////////////////////////////////////////////////////////////////////////
    if ((ret = PAPI_stop_counters(values, NB_EVENT)) != PAPI_OK) {
    fprintf(stderr, "PAPI failed to read counters: %s\n", PAPI_strerror(ret));
    exit(1);
    }
    
    printf("Conditional branch instructions (without loops):  %lld\n", values[0]-nbr-nbloops);
    printf("Conditional branch instructions mispredicted: %lld\n", values[1]);
    printf("Ratio: %f\n",(float)((float)(values[1])/(values[0]-nbr-nbloops)));
    printf("L1 data cache misses is %lld\n", values[2]);
    printf("nbloops %ld\n", nbloops);

    /////////////////////////////////////////////////////////////////////////////

    nbmult = 0;
    for(i=0;i<nbr;i++)
        nbmult+= superSkewPowMult(2,ALEA[i]);

    
    printf("nbmult %ld\n", nbmult);
    /////////////////////////////////////////////////////////////////////////////

}

void init() {
    ALEA = (int *) calloc (NBR,sizeof(int));
}

int main() {
    srandom(time(NULL));
    init();
    test(NBR);
    free(ALEA);
    return 0;
}
