/* -------------------------------------------------------------------------
 *
 * (c) Hans-Wolfgang Loidl, 2000-
 *
 * Parallel ticky profiling, monitoring basic RTS operations in GUM.
 * Similar in structure to TICKY_TICKY profiling, but doesn't need a 
 * separate way of building GHC.
 *-------------------------------------------------------------------------- */

#if defined(PARALLEL_RTS)  && defined(PAR_TICKY)

#include "Rts.h"
#include "RtsFlags.h"
#include "RtsUtils.h"
//#include "StoragePriv.h"
//#include "MBlock.h"
//#include "Schedule.h"
#include "sm/GC.h"
#include "Stats.h"
#include "Sparks.h"
#include "ParTicky.h"                       // ToDo: move into Rts.h
#include "ParallelRts.h"

#if defined(PARALLEL_RTS) && defined(HAVE_GETRUSAGE)
#include <sys/resource.h>
#endif

/* external data */
extern double ElapsedTimeStart;

extern ullong GC_tot_alloc;
extern ullong GC_tot_copied;

extern lnat MaxResidency;     /* in words; for stats only */
extern lnat ResidencySamples; /* for stats only */

/* ngIplu' {Stats.c}vo' */
#define BIG_STRING_LEN              512

/* ngIplu' {Ticky.c}vo' */
#define INTAVG(a,b) ((b == 0) ? 0.0 : ((double) (a) / (double) (b)))
#define PC(a)	    (100.0 * a)

#define AVG(thing) \
	StgDouble avg##thing  = INTAVG(tot##thing,ctr##thing)


#if 0
void
set_foo_time(double *x) {
  *x = usertime();
}

double
get_foo_time(double x) {
  fprintf(stderr, "get_foo_time: %7.2f (%7.5f,%7.5f) \n", 
	  usertime()-x,usertime(),x);
  return (usertime()-x);
}
#endif

static double start_time_GA = 0.0;
static double start_mark = 0.0;
static double start_pack = 0.0;
static double start_unpack = 0.0;
static double start_StgRun = 0.0;

// this was in Stat.c in GUM4
/* static */ double ElapsedTimeStart = 0.0;
static double TicksPerSecond   = 0.0;

#if 0
// ngoq ngo' (from GUM4)
void
start_time(void)
{
#ifdef HAVE_SYSCONF
    long ticks;
    /* Determine TicksPerSecond ... */

    ticks = sysconf(_SC_CLK_TCK);
    if ( ticks == -1 ) {
	fprintf(stderr, "stat_init: bad call to 'sysconf'!\n");
    	stg_exit(EXIT_FAILURE);
    }
    TicksPerSecond = (double) ticks;

/* no "sysconf"; had better guess */
#elif defined(HZ)
    TicksPerSecond = (StgDouble) (HZ);

#elif defined(CLOCKS_PER_SEC)
    TicksPerSecond = (StgDouble) (CLOCKS_PER_SEC);
#else /* had better guess wildly */
    /* We will #ifdef around the fprintf for machines
       we *know* are unsupported. (WDP 94/05)
    */
    fprintf(stderr, "NOTE: Guessing `TicksPerSecond = 60'!\n");
    TicksPerSecond = 60.0;
#endif

    ElapsedTimeStart = msTime();
}

double
usertime(void)
{
# if ! (defined(HAVE_GETRUSAGE) || defined(HAVE_TIMES))
    /* We will #ifdef around the fprintf for machines
       we *know* are unsupported. (WDP 94/05)
    */
    fprintf(stderr, "NOTE: `usertime' does nothing!\n");
    return 0.0;

# else /* not stumped */

#  if defined(HAVE_TIMES) 
    struct tms t;

    times(&t);
    //fprintf(stderr, "usertime: %7.2f\n", (double)(t.tms_utime)/TicksPerSecond);
    return(((double)(t.tms_utime))/TicksPerSecond);

#  else /* HAVE_GETRUSAGE */
    struct rusage t;

    getrusage(RUSAGE_SELF, &t);
    return(t.ru_utime.tv_sec + 1e-6*t.ru_utime.tv_usec);

#  endif /* HAVE_GETRUSAGE */
# endif /* not stumped */
}

double
elapsedtime(void)
{
# if ! (defined(HAVE_TIMES) || defined(HAVE_FTIME))
    /* We will #ifdef around the fprintf for machines
       we *know* are unsupported. (WDP 94/05)
    */
    fprintf(stderr, "NOTE: `elapsedtime' does nothing!\n");
    return 0.0;

# else /* not stumped */

/* "ftime" may be nicer, but "times" is more standard;
   but, on a Sun, if you do not get the SysV one, you are *hosed*...
 */

#  if defined(HAVE_TIMES) && ! sunos4_TARGET_OS
    struct tms t;
    clock_t r = times(&t);

    return (((double)r)/TicksPerSecond);

#  else /* HAVE_FTIME */
    struct timeb t;

    ftime(&t);
    return (fabs(t.time + 1e-3*t.millitm));

#  endif /* HAVE_FTIME */
# endif /* not stumped */
}
#endif

void
par_ticky_Par_start (void) {
  /* init of globalStats struct */
  // see memset(3)
  memset(&globalParStats, 0, sizeof(GlobalParStats));
  ElapsedTimeStart = msTime();
  IF_PAR_DEBUG(verbose,
	       debugBelch(".... initialised globalParStats to all-0\n"));
}

void
par_ticky_createSparkThread (void) {
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    //set_foo_time(&start_time_GA);
      globalParStats.tot_threads_created++;
  }
}

void
par_ticky_GC_start (void) {
# if !defined(HAVE_GETRUSAGE) || irix_HOST_OS || defined(_WIN32)
    fprintf(stderr, "|| sorry don't have RUSAGE\n");
    return ;
# else
    FILE *sf = RtsFlags.GcFlags.statsFile;
    struct rusage t;
    double utime, stime;

    if (RtsFlags.GcFlags.giveStats>1 && sf != NULL) {
      getrusage(RUSAGE_SELF, &t);
      
      utime = t.ru_utime.tv_sec + 1e-6*t.ru_utime.tv_usec;
      stime = t.ru_stime.tv_sec + 1e-6*t.ru_stime.tv_usec;
      
      fprintf(stderr, "|| user time: %5.2f; system time: %5.2f\n",
	      utime, stime);
      fprintf(stderr, "|| max RSS: %ld; int SM size: %ld; int USM data size: %ld; int USS size: %ld\n",
	      t.ru_maxrss, t.ru_ixrss, t.ru_idrss, t.ru_isrss);
    }
#endif
}

#if 0	    
FYI:
            struct rusage
            {
                 struct timeval ru_utime; /* user time used */
                 struct timeval ru_stime; /* system time used */
                 long ru_maxrss;          /* maximum resident set size */
                 long ru_ixrss;      /* integral shared memory size */
                 long ru_idrss;      /* integral unshared data size */
                 long ru_isrss;      /* integral unshared stack size */
                 long ru_minflt;          /* page reclaims */
                 long ru_majflt;          /* page faults */
                 long ru_nswap;      /* swaps */
                 long ru_inblock;         /* block input operations */
                 long ru_oublock;         /* block output operations */
                 long ru_msgsnd;          /* messages sent */
                 long ru_msgrcv;          /* messages received */
                 long ru_nsignals;        /* signals received */
                 long ru_nvcsw;      /* voluntary context switches */
                 long ru_nivcsw;          /* involuntary context switches */
            };
#endif


void
par_ticky_rebuildGAtables_start(void) {
  // collect parallel global statistics (currently done together with GC stats)
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    //set_foo_time(&start_time_GA);
    start_time_GA = msTime();
  }
}

void
par_ticky_rebuildGAtables_end(nat n, nat size_GA) {
  // collect parallel global statistics (currently done together with GC stats)
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    static double foo = 0.0; 
    foo = msTime() - start_time_GA; // get_foo_time(start_time_GA);
    globalParStats.cnt_rebuild_GA++;
    globalParStats.tot_rebuild_GA += n;
    if ( n > globalParStats.res_rebuild_GA ) 
      globalParStats.res_rebuild_GA = n;
    // fprintf(stderr, "rebuildGAtables: footime=%7.2f (%11.5f, %11.5f)\n", 
    //    foo, msTime(), start_time_GA);
    globalParStats.time_rebuild_GA += foo;
    globalParStats.tot_size_GA += size_GA;
    if ( size_GA > globalParStats.res_size_GA ) 
      globalParStats.res_size_GA = size_GA;
  }
  // fprintf(stderr, ">> n: %d; size: %d;; tot: %d;  res: %d\n",
  //	  n, size_GA, globalParStats.tot_size_GA, globalParStats.res_size_GA);
}

void
par_ticky_markInPtrs_start(void) {
  // collect parallel global statistics (currently done together with GC stats)
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    start_time_GA = msTime();
  }
}

void
par_ticky_markInPtrs_end(nat n) {
  // collect parallel global statistics (currently done together with GC stats)
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    globalParStats.cnt_mark_GA++;
    globalParStats.tot_mark_GA += n;
    if ( n > globalParStats.res_mark_GA ) 
      globalParStats.res_mark_GA = n;
    globalParStats.time_mark_GA += msTime() - start_time_GA;
  }
}

void
par_ticky_markSparkQueue_start(void) {
  // collect parallel global statistics (currently done together with GC stats)
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    start_mark=msTime();
  }
}

void
par_ticky_markSparkQueue_end(nat n) {
  // collect parallel global statistics (currently done together with GC stats)
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    globalParStats.time_sparks += msTime() - start_mark;

    globalParStats.tot_sparks_marked += n;
    if ( n > globalParStats.res_sparks_marked ) 
      globalParStats.res_sparks_marked = n;
  }
}

void
par_ticky_PackNearbyGraph_start (void) {
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    start_pack=msTime();
  }
}

void
par_ticky_PackNearbyGraph_end(nat n, nat thunks) {
  // collect parallel global statistics (currently done together with GC stats)
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    globalParStats.time_pack += msTime() - start_pack;

    globalParStats.tot_packets++;
    globalParStats.tot_packet_size += n;
    if ( n > globalParStats.res_packet_size ) 
      globalParStats.res_packet_size = n;
    globalParStats.tot_thunks += thunks;
    if ( thunks > globalParStats.res_thunks ) 
      globalParStats.res_thunks = thunks;
  }
}

void
par_ticky_UnpackGraph_start (void) {
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    start_unpack=msTime();
  }
}

void
par_ticky_UnpackGraph_end(nat n, nat thunks) {
  // collect parallel global statistics (currently done together with GC stats)
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    globalParStats.time_unpack += msTime() - start_unpack;

    globalParStats.rec_packets++;
    globalParStats.rec_packet_size += n;
    /*
    if ( n > globalParStats.res_packet_size ) 
      globalParStats.res_packet_size = n;
    */
    globalParStats.rec_thunks += thunks;
    /*
    if ( thunks > globalParStats.res_thunks ) 
      globalParStats.res_thunks = thunks;
    */
  }
}

//MSA:#if  1
void
par_ticky_before_StgRun (void) {
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    start_StgRun =(double) msTime();

  }
}

void
par_ticky_after_StgRun(void) {
  // collect parallel global statistics
  if (RtsFlags.ParFlags.ParStats.Global &&
      RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
    globalParStats.time_active +=(double) msTime() - start_StgRun;

  }
}
//MSA:#endif


void
par_ticky_TP (Capability *cap) {
  // StgSparkPool *pool;
    nat tp_size, sp_size; // stats only

    // Global stats gathering
    /* the spark pool for the current PE */
    // pool = &(cap->r.rSparks);

    // Global statistics: residency of thread and spark pool
    if (RtsFlags.ParFlags.ParStats.Global &&
	RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
      tp_size = run_queue_len(cap) + 1; // add the TSO just poped
      // No: there may be many blocked threads being awoken at the same time
      // ASSERT(tp_size <= RtsFlags.ParFlags.maxThreads);
      globalParStats.tot_tp += tp_size;
      globalParStats.emp_tp += (tp_size==0) ? 1 : 0;
      globalParStats.cnt_tp++;
      if ( tp_size > globalParStats.res_tp)
	globalParStats.res_tp = tp_size;
      // fprintf(stderr, "run_queue_len() = %d (max %d)\n", run_queue_len(), globalParStats.res_tp);
      sp_size = spark_queue_len(cap);
      //ASSERT(sp_size <= RtsFlags.ParFlags.maxLocalSparks);
      globalParStats.tot_sp += sp_size;
      globalParStats.emp_sp += (sp_size==0) ? 1 : 0;
      globalParStats.cnt_sp++;
      if ( sp_size > globalParStats.res_sp)
	globalParStats.res_sp = sp_size;

      IF_PAR_DEBUG(paranoia,
                  debugBelch("     spark_queue_len(pool) = %d (max %d)\n", 
                             /* cap->r.rSparks, */
                             spark_queue_len(cap), globalParStats.res_sp));
    }
}

void
globalParStat_exit(void)
{
    FILE *sf = RtsFlags.GcFlags.statsFile;
    double time, etime;

    parStatsPrintf("\n  The total time spent executing Haskell code is %11.2fs\n",
		   (((double)globalParStats.time_active)/TICKS_PER_SECOND));

    parStatsPrintf("  globalParStat_exit: RtsFlags.ParFlags.ParStats.Global = %d; RtsFlags.GcFlags.giveStats = %d; RtsFlags.GcFlags.statsFile = %s\n", 
		   RtsFlags.ParFlags.ParStats.Global, RtsFlags.GcFlags.giveStats, (RtsFlags.GcFlags.statsFile)?"NON-NULL":"NULL");

    /*
    if (RtsFlags.GcFlags.statsFile==NULL) {
      sf = stderr;
    }
    */

    /* print only if GC stats is enabled, too; i.e. -sstderr */
    if (!(RtsFlags.ParFlags.ParStats.Global &&
	  RtsFlags.GcFlags.giveStats > NO_GC_STATS)) {
      parStatsPrintf("\n Aborting globalParStat_exit\n");
      return;
    }

    time = msTime();
    etime = msTime() - ElapsedTimeStart;
    // parStatsPrintf("foo=%7.2f\n", time);

    if (par_log_file != NULL){
        char temp[BIG_STRING_LEN];
	ullong tot_mess = globalParStats.tot_fish_mess+globalParStats.tot_fetch_mess+
	    globalParStats.tot_resume_mess+globalParStats.tot_schedule_mess;
	ullong tot_rec_mess = globalParStats.rec_fish_mess+globalParStats.rec_fetch_mess+
	  globalParStats.rec_resume_mess+globalParStats.rec_schedule_mess;

	// GC_tot_alloc += alloc;
	parStatsPrintf("\n");

	parStatsPrintf( "%11d threads created\n", 
		globalParStats.tot_threads_created);
	/*
	  Would need to add a ++ to the par macro to use this

	parStatsPrintf( "%11d sparks created\n", 
		globalParStats.tot_sparks_created);
	parStatsPrintf( "%11d sparks ignored\n", 
		globalParStats.tot_sparks_ignored);
	*/
	ullong_format_string(globalParStats.res_tp, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s thread pool residency", temp);
	parStatsPrintf( " (avg: %3.2f; %d times (%2.2f%%) of %d empty)\n", 
		(double)globalParStats.tot_tp/(double)globalParStats.cnt_tp,
		globalParStats.emp_tp, 
		globalParStats.emp_tp*100.0/(double)globalParStats.cnt_tp,
		globalParStats.cnt_tp);
	ullong_format_string(globalParStats.res_sp, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s spark pool residency", temp);

	parStatsPrintf( " (avg: %3.2f; %d times (%2.2f%%) of %d empty)\n", 
		(double)globalParStats.tot_sp/(double)globalParStats.cnt_sp,
		globalParStats.emp_sp, 
		globalParStats.emp_sp*100.0/(double)globalParStats.cnt_sp,
		globalParStats.cnt_sp);
		
	ullong_format_string(globalParStats.tot_db, temp, rtsTrue/*commas*/); /*MSA : add deathBirth */
	parStatsPrintf( "%11s StillBirths \n", temp);
	
	ullong_format_string(tot_mess, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s messages sent (%d fish, %d fetch, %d resume, %d schedule", 
		temp,
		globalParStats.tot_fish_mess, globalParStats.tot_fetch_mess, 
		globalParStats.tot_resume_mess, globalParStats.tot_schedule_mess);
#if defined(DIST)
	parStatsPrintf( "%d revals", globalParStats.tot_reval_mess);
#endif
	parStatsPrintf(")\n");
	ullong_format_string(tot_rec_mess, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s messages received (%d fish, %d fetch, %d resume, %d schedule", 
		temp,
		globalParStats.rec_fish_mess, globalParStats.rec_fetch_mess, 
		globalParStats.rec_resume_mess, globalParStats.rec_schedule_mess);
#if defined(DIST)
	parStatsPrintf( "%d revals", globalParStats.rec_reval_mess);
#endif
	parStatsPrintf(")\n  Note: boring ACK messages are currently not counted\n\n");
#if 0
        // Global Heap statistics; TODO: re-enable!!

	ullong_format_string(globalParStats.tot_size_GA*sizeof(W_), temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s bytes of global heap in total ", temp);
	parStatsPrintf( "(%5.2f%% of total allocated heap)\n", 
		globalParStats.tot_size_GA*sizeof(W_)*100.0/(double)GC_tot_alloc*sizeof(W_));
	ullong_format_string(globalParStats.res_size_GA*sizeof(W_), temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s bytes global heap residency ", temp);
	parStatsPrintf( "(%5.2f%% of max heap residency)\n", 
		globalParStats.res_size_GA*sizeof(W_)*100.0/(double)MaxResidency*sizeof(W_));
#endif
	//ullong_format_string(globalParStats.res_mark_GA, temp, rtsTrue/*commas*/);
	//parStatsPrintf( "%11s GAs residency in GALA table ", temp);
	// ullong_format_string(globalParStats.tot_mark_GA, temp, rtsTrue/*commas*/);
	//parStatsPrintf( "(avg %5.2f; %d samples)\n", 
	//	(double)globalParStats.tot_mark_GA/(double)globalParStats.cnt_mark_GA,
	//	globalParStats.cnt_mark_GA);

	ullong_format_string(globalParStats.local_alloc_GA, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s GAs locally allocated (calls to makeGlobal)\n", temp);

	//MSA:#if 0
	ullong_format_string(globalParStats.tot_rebuild_GA, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s live GAs in total (after rebuilding tables)\n", temp);
	ullong_format_string(globalParStats.res_rebuild_GA, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s GAs residency (after rebuilding tables) ", temp);
	parStatsPrintf( "(avg %5.2f; %d samples)\n", 
		(double)globalParStats.tot_rebuild_GA/(double)globalParStats.cnt_rebuild_GA,
		globalParStats.cnt_rebuild_GA);
	ullong_format_string(globalParStats.res_free_GA, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s residency of freeing GAs", temp);
	parStatsPrintf( " (avg %5.2f; %d samples)\n", 
		(double)globalParStats.tot_free_GA/(double)globalParStats.cnt_free_GA,
		globalParStats.cnt_free_GA);

	parStatsPrintf( "%11.2fs spent marking GAs (%7.2f%% of %7.2fs)\n", 
		globalParStats.time_mark_GA,
		globalParStats.time_mark_GA*100./time, time);
	parStatsPrintf( "%11.2fs spent rebuilding GALA tables (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs)\n", 
		globalParStats.time_rebuild_GA,
		globalParStats.time_rebuild_GA*100./time, time,
		globalParStats.time_rebuild_GA*100./etime, etime);

	ullong_format_string(globalParStats.tot_sparks_marked, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s sparks marked\t", temp);
	ullong_format_string(globalParStats.res_sparks_marked, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%6s spark mark residency\n", temp);
	parStatsPrintf( "%11.2fs spent marking sparks (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n", 
		globalParStats.time_sparks,
		globalParStats.time_sparks*100./time, time,
		globalParStats.time_sparks*100./etime, etime);
	//MSA:#endif
	parStatsPrintf("\n");

	//MSA:#if 0
	ullong_format_string(globalParStats.tot_packets, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s packets sent\n", temp);
	ullong_format_string(globalParStats.tot_packet_size, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s bytes of graph sent in total (max %d; avg %.2f)\n",
		temp, globalParStats.res_packet_size,
		(double)globalParStats.tot_packet_size/(double)globalParStats.tot_packets);
	ullong_format_string(globalParStats.tot_thunks, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s thunks sent in total (max %d; avg %.2f)\n",
		temp, globalParStats.res_thunks,
		(double)globalParStats.tot_thunks/(double)globalParStats.tot_packets);
	parStatsPrintf( "%11.2fs spent packing graph structures (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n", 
		globalParStats.time_pack,
		globalParStats.time_pack*100./time, time,
		globalParStats.time_pack*100./etime, etime);

	ullong_format_string(globalParStats.rec_packets, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s packets received\n", temp);
	ullong_format_string(globalParStats.rec_packet_size, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s bytes of graph received in total (max %d; avg %.2f)\n",
		temp, globalParStats.rec_res_packet_size,
		(double)globalParStats.rec_packet_size/(double)globalParStats.rec_packets);
	ullong_format_string(globalParStats.rec_thunks, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s thunks received in total (max %d; avg %.2f)\n",
		temp, globalParStats.rec_res_thunks,
		(double)globalParStats.rec_thunks/(double)globalParStats.rec_packets);
	parStatsPrintf( "%11.2fs spent unpacking graph structures (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n", 
		globalParStats.time_unpack,
		globalParStats.time_unpack*100./time, time,
		globalParStats.time_unpack*100./etime, etime);

	parStatsPrintf("\n");

	ullong_format_string(globalParStats.tot_arrs, temp, rtsTrue/*commas*/);
	parStatsPrintf( "%11s bytearrays sent; ", temp);
	ullong_format_string(globalParStats.tot_arr_size, temp, rtsTrue/*commas*/);
	parStatsPrintf( " %s bytes in total (avg %.2f)\n",
		temp, 
		(double)globalParStats.tot_arr_size/(double)globalParStats.tot_arrs);
	
	parStatsPrintf("\n");
	//MSA:#endif

#if 0
	parStatsPrintf( "%11d yields, %d blocks, %d stack overflows, %d heap overflows\n",
		globalParStats.tot_yields, globalParStats.tot_blocks, globalParStats.tot_stackover,
 		globalParStats.tot_heapover); 
	parStatsPrintf( "%11d waits (hit porocessMessages)\n",
		globalParStats.tot_waits); 

	parStatsPrintf("\n");

	//parStatsPrintf("Printing this pathetic statistics took %7.2fs (start @ %7.2f)\n",
	//	msTime()-time, time);

	parStatsPrintf( "%11.2fs active (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n", 
		globalParStats.time_active,
		globalParStats.time_active*100./time, time,
		globalParStats.time_active*100./etime, etime);

	parStatsPrintf( "%11.2fs busy-wait (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n", 
		globalParStats.time_wait,
		globalParStats.time_wait*100./time, time,
		globalParStats.time_wait*100./etime, etime);
#endif

	fflush(par_log_file);
	// Open filehandle needed by other stats printing fcts
	// fclose(par_log_file);
    }
}

/* -----------------------------------------------------------------------------
   see Stats.c for orig, seq code
*/

#if defined(PARALLEL_RTS)

void
parStatsPrintf( char *s, ... )
{
    FILE *sf = par_log_file;
    va_list ap;
    
    va_start(ap,s);
    if (sf == NULL) {
	vdebugBelch(s,ap);
    } else {
	vfprintf(sf, s, ap);
    }
    va_end(ap);
}
#endif


#endif

