/*
 * compute pi by integrating f(x) = 4/(1 + x*x)     
 * example program for colloq
 * pthread version
 */ 

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <pthread.h>

#define DEF_N         100              /* default arry dimension */
#define DEF_NTHREADS    4              /* default number of threads */

typedef struct {
  int       nthreads;       /* number of threads */
  int       myid;           /* id of thread (0 .. nthreads-1) */
  int       n;              /* total number of points */
  double    h;              /* step size */
  double   *localsum;       /* array with results */
} thread_arg_t;

void *   par_loop(void * id);


#define PI25DT 3.141592653589793238462643

void 
main(int argc, char *argv[])
{
  double          h, sum, pi;
  int             n, i;
  double *        localsum;             /* partial results */
  int             nthreads;             /* number of threads */
  pthread_t *     thread_id;            /* ids of all threads */
  thread_arg_t *  thread_arg;           /* arguments for the threads */

  /* get command line arguments */
  if (argc != 3) {
    printf("Using default values!\n");
    n        = DEF_N;
    nthreads = DEF_NTHREADS;
  }
  else {
    n        = atoi(argv[1]);
    nthreads = atoi(argv[2]);
  }

  /* initialize data */
  thread_id  = (pthread_t *) malloc(nthreads * sizeof(pthread_t));
  thread_arg = (thread_arg_t *) malloc(nthreads * sizeof(thread_arg_t));
  localsum = (double *) malloc(nthreads * sizeof(double));

  /* compute arguments for all threads */
  h = 1.0/n;
  for (i=0; i<nthreads; i++) {
    thread_arg[i].nthreads = nthreads;
    thread_arg[i].myid     = i;
    thread_arg[i].n        = n;
    thread_arg[i].h        = h;
    thread_arg[i].localsum = localsum;
  }
  
  /* create threads, which all call par_loop */
  for (i=1; i<nthreads; i++) {
    pthread_create(&thread_id[i], NULL, par_loop, &thread_arg[i]);
  }   
  
  /* work myself, as thread 0 */
  par_loop(&thread_arg[0]);
  
  /*  wait for other threads */
  for (i=1; i<nthreads; i++) {
    pthread_join(thread_id[i], NULL);
  }   
  
  /* add partial sums */
  sum = 0.0;
  for (i=0; i<nthreads; i++) {
    sum += localsum[i];
  }
  pi = h*sum;

  /* print the result */
  printf("pi computes to: %20.16lf\n", pi);
  printf("and is:         %20.16lf\n", PI25DT);
}


/* routine called by the threads
 * gets arguments myid, nthreads, n, h, localsum
 * returns result in localsum[myid]
 */
void *par_loop(void *args) 
{
  thread_arg_t *  myarg_p = (thread_arg_t *) args;
  int             myid, nthreads, n;
  double          h;
  double          *localsum;
  int             i;
  double          x;
  double          f(double x);

  /* unpack arguments */
  nthreads   = myarg_p->nthreads;
  myid       = myarg_p->myid;
  n          = myarg_p->n;
  h          = myarg_p->h;
  localsum   = myarg_p->localsum;

  /* threads's share of work */
  localsum[myid] = 0.0;
  for (i=myid; i<n; i+= nthreads) {
    x = h * (i - 0.5);
    localsum[myid] += f(x);
  }

  return(NULL);
}

double f(double x) {
  return 4.0 / (1.0 + x*x);
}