/***************************************************************************
 * 
 * Copyright (c) 1999 Geodesic Systems, Inc.
 *
 * Multi-threaded program to test performance when one thread
 * frees objects allocated by a different thread.
 * This program is similar to demomtF (source speed_test_mt.c), but
 * when number of threads is specified on the command line, twice that
 * many threads are created, in pairs.  In each pair, one thread does
 * the same malloc tasks as demomtF, and the other does the same free
 * tasks as demomtF.  Condition variables are used to synchronize the
 * threads in a pair, so that the free thread waits while the malloc
 * thread is working on a pass, and vice-versa.
 *
 * NOTE: Initial implementation only works for ALPHA, AIX, and HP.
 * Need to adapt for NT and Solaris.
 ***************************************************************************
 */

#if M_NT
#include <windows.h>
#elif M_SOLARIS || M_HP700 || M_IBM_R6000 || M_DECALPHA
#include <pthread.h>
#if M_IBM_R6000
#include <stdlib.h>
#else
#include <malloc.h>
#endif
#endif
#include <stdio.h>
#include "mts.h"

/*
 *  MEM_ARRAY_SIZE defines how many mallocs occur before calls to free are
 *  made.  MEM_SAVE_SIZE defines what number of mallocs are not freed in
 *  the current free cycle.  Increasing the ARRAY_SIZE increases the amount
 *  of memory that is malloced and freed.  Decreasing the SAVE_SIZE divisor
 *  also increases the amount of memory needed to run this test.
 *  This test assumes that 1/15th of the mallocs are not freed on each cycle.
 */
#define	MEM_ARRAY_SIZE	100000  
#define	MEM_SAVE_SIZE	(MEM_ARRAY_SIZE / 5)

/* First_addr and last_addr are used to compute total heap size for
 * platforms where sbrk(0) is available.
 */
unsigned long first_addr;
unsigned long last_addr;

#define MAX_THREAD_HEAPS 32

#if M_DECALPHA || M_HP700 || M_IBM_R6000

#define MAX_THREAD_PAIRS 32
char *mem_arrays[MAX_THREAD_PAIRS][MEM_ARRAY_SIZE];
int global_num_thread_pairs;
struct cond {
  int             predicate;
  pthread_mutex_t lock;
  pthread_cond_t  condvar;
  char        **cond_mp;
  char        **cond_mpe;
  char        **cond_save_start;
  char        **cond_save_end;
} conds[MAX_THREAD_PAIRS];
#define mp         (*p_mp)
#define mpe        (*p_mpe)
#define save_start (*p_save_start)
#define save_end   (*p_save_end)

#else
#define MAX_THREAD_PAIRS 50
#if M_NT
/* low_marks and high_marks are used to compute total heap size for
 * platforms where sbrk(0) is not available.
 */
unsigned long low_marks[MAX_THREAD_PAIRS];
unsigned long high_marks[MAX_THREAD_PAIRS];
#endif
#endif

#if M_DECALPHA
#  define THREAD_HANDLE_DEF pthread_t THREAD_HANDLE;
#else
#  define THREAD_HANDLE_DEF
#  define THREAD_HANDLE
#endif

#if M_DECALPHA
#  define  mts_get_thread_id(t) pthread_getsequence_np(pthread_self())
#elif M_SOLARIS
#  define mts_get_thread_id(t) thr_self()
#elif M_HP700 || M_IBM_R6000
#  define mts_get_thread_id(t) (unsigned long)pthread_self()
#elif M_NT
#  define mts_get_thread_id(t) GetCurrentThreadId()
#else
#  define mts_get_thread_id(t) 0
#endif

#if M_NT
#include <windows.h>
showtime() {
    static iter = 0;
    static long firsttime;
    SYSTEMTIME systime;
    GetSystemTime(&systime);
    if (iter == 0) {
      firsttime = 
             (systime.wHour            * 60 * 60 * 1000 +
              systime.wMinute               * 60 * 1000 +
              systime.wSecond                    * 1000 +
              systime.wMilliseconds);
      iter++;
    }
    else {
      int ms;
      int secs;
      int mins = 0;
      ms = (systime.wHour            * 60 * 60 * 1000 +
            systime.wMinute               * 60 * 1000 +
            systime.wSecond                    * 1000 +
            systime.wMilliseconds) - firsttime;
      secs = ms / 1000;
      ms = ms % 1000;
      mins = secs / 60;
      secs = secs % 60;
      printf("\nElapsed time: %dm%d.%3.3ds\n", mins, secs, ms); 
    }
}
#endif

#if M_NT
DWORD WINAPI thread_func(LPVOID param) 
#elif M_SOLARIS || M_HP700 || M_DECALPHA || M_IBM_R6000
void *thread_func(void *param)
#endif
{ 
  int  thread_num = *((int *)param);
  int  thread_pair_num = thread_num / 2;
  int  do_frees = thread_num % 2;
#if M_DECALPHA || M_HP700 || M_IBM_R6000
  char          **memory = mem_arrays[thread_pair_num]; 
#else
  char           *memory[MEM_ARRAY_SIZE];
#endif
  int		i; 
  int		j;
  int		k;
  int		k_max;
  int		repeat = 100;
  char       ***p_mp = &(conds[thread_pair_num].cond_mp);
  char       ***p_mpe = &(conds[thread_pair_num].cond_mpe);
  char       ***p_save_start = &(conds[thread_pair_num].cond_save_start);
  char       ***p_save_end = &(conds[thread_pair_num].cond_save_end);
  long		low_mark;
  long		high_mark;
  long		print_high_mark;
  unsigned long threadid;
  int which_heap;
  THREAD_HANDLE_DEF

#if M_DECALPHA || M_HP700 || M_IBM_R6000
  if (!param)
    return (void *)0;
#endif

  threadid = mts_get_thread_id(THREAD_HANDLE);

  if (!do_frees) {
    mp = memory;
#if M_DECALPHA || M_HP700 || M_IBM_R6000
    mpe = memory + MEM_ARRAY_SIZE;
#else
    mpe = memory + sizeof memory / sizeof memory[0];
#endif
    save_start = mpe;
    save_end = mpe;

#if MTS_MT
    which_heap = mts_which_heap();
    printf("Thread %d using heap %d\n", 
            threadid, which_heap);
#endif

   /*
    *  low_mark and high_mark hold the lowest and highest pointers to memory
    *  that has been malloced.  They are printed out as the difference 
    *  increases between them in 1 meg intervals.
    */
    low_mark = (long) malloc (1);
#if M_NT
   /* On NT, keep track of high mark in control version, since we can't use
    * sbrk(0) at start and end to determine total memory used.
    */
    high_mark = low_mark;
    print_high_mark = low_mark;
#endif
    free ((char *) low_mark);

    while (repeat--)  
    { 
      /* Report progess when each 20% of the test is completed */
      if (repeat % 20 == 0)
 	printf("Thread %d completed %d%%\n", threadid, 100 - repeat);

      for (i = 1; i < 1000000; i = i * 3 / 2 + 1)
      { 
	for (j = i; j; j /= 2)
        { k_max = 1;           /* k_max controls the size allocation bias.
	 	 		  Large things are allocated once.  Sizes less
				  than 100 are allocated 250 times.
                                */

	  if (j < 10000)
	    k_max = 10;

	  if (j < 1000)
	    k_max *= 5;

	  if (j < 100)
	    k_max *= 5;

	  for (k = 0; k < k_max; k ++)
  	  { 
            if ( ! (*mp ++ = (char *) malloc (j)))
  	    { write (2, "? malloc failed\n", sizeof "? malloc failed\n");
  	      _exit (1);
  	    }
#if M_NT
	    /* set the high_mark */
	    if (((long) *(mp - 1)) + j > high_mark)
	      high_mark = (long) *(mp - 1) + j;
#endif

	    /* while allocating skip over that portion of the buffer that still
	       holds pointers from the previous cycle
             */
	    if (mp == save_start)
	      mp = save_end;
	    if (mp >= mpe) /* if we've reached the end of the malloc buffer */
            {
	      /* Wake up other thread in this pair, to do the frees for
               * this round.
               */
              pthread_mutex_lock(&(conds[thread_pair_num].lock));
              conds[thread_pair_num].predicate = 1;
              pthread_cond_broadcast(&(conds[thread_pair_num].condvar));
              while (conds[thread_pair_num].predicate == 1)
                pthread_cond_wait(&(conds[thread_pair_num].condvar),
	                          &(conds[thread_pair_num].lock));
              pthread_mutex_unlock(&(conds[thread_pair_num].lock));
	    }
	  } /* for k */
        } /* for j */
      } /* for i */
    } /* while (repeat--) */
  } /* if (!do_frees) */
  else /* This thread does frees */
  {
    for(;;) 
    { 
      /* Wait until partner thread is done with a round of mallocs */
      pthread_mutex_lock(&(conds[thread_pair_num].lock));
      while (conds[thread_pair_num].predicate == 0)
	pthread_cond_wait(&(conds[thread_pair_num].condvar),
	                  &(conds[thread_pair_num].lock));
      pthread_mutex_unlock(&(conds[thread_pair_num].lock));
      /* If partner thread is finished, no more frees to be done,
       * then terminate this thread.
       */
      if (conds[thread_pair_num].predicate < 0) {
	pthread_exit(NULL); 
      }

      mp = memory;
            
      /* mark the next portion of the buffer */
      save_start = save_end;  
      if (save_start >= mpe)	save_start = mp;
      save_end = save_start + MEM_SAVE_SIZE;
      if (save_end > mpe)	save_end = mpe;
            
      /* free the bottom and top parts of the buffer.
       * The bottom part is freed in the order of allocation.
       * The top part is free in reverse order of allocation.
       */
      while (mp < save_start)
	free (*mp ++);
      mp = mpe;

      while (mp > save_end)
	free (*--mp);

      mp = memory;

      /* Wake up partner thread to do next round of mallocs */
      pthread_mutex_lock(&(conds[thread_pair_num].lock));
      conds[thread_pair_num].predicate = 0;
      pthread_cond_broadcast(&(conds[thread_pair_num].condvar));
      pthread_mutex_unlock(&(conds[thread_pair_num].lock));
    }
  }
  /* Let the partner thread know we're all done */
  pthread_mutex_lock(&(conds[thread_pair_num].lock));
  conds[thread_pair_num].predicate = -1;
  pthread_cond_broadcast(&(conds[thread_pair_num].condvar));
  pthread_mutex_unlock(&(conds[thread_pair_num].lock));

  /* free the residual allocations */
  mpe = mp;
  mp = memory;

  while (mp < mpe)
    free (*mp ++);

  printf("Thread %d finished\n", 
         mts_get_thread_id(THREAD_HANDLE));
#if M_NT
  {
    int i = (unsigned long *)param - low_marks;
    low_marks[i] = (unsigned long)low_mark;
    high_marks[i] = (unsigned long)high_mark;
  }
#endif
#if M_HP700 || M_DECALPHA || M_IBM_R6000
  global_num_thread_pairs--;
  if (global_num_thread_pairs <= 0) {
    printf("All threads finished, thread %d exiting.\n", 
            mts_get_thread_id(THREAD_HANDLE));
#if MTS_MT
    printf("Total size of all heaps: %ld\n", mts_heap_total_size());
#else
    last_addr = (unsigned long)sbrk(0);
    printf("Total heap size: %ld\n", last_addr - first_addr);
#endif
    exit(0);
  }
#endif
  return 0;
}

int main(argc, argv)
int argc;
char *argv[];
{
  int num_thread_pairs = 1;
  int num_thread_heaps = 1;
  int df_size = 10;
#if M_NT
  DWORD threadid;
  HANDLE hthread;
  HANDLE handles[MAX_THREAD_PAIRS];
#elif M_SOLARIS || M_HP700 || M_DECALPHA || M_IBM_R6000
  pthread_t threadid;
  int hthread;
  pthread_t handles[MAX_THREAD_PAIRS];
  pthread_attr_t attr;
#endif
  long print_threadid;
  int i;
  THREAD_HANDLE_DEF
  int thread_nums[MAX_THREAD_PAIRS*2];

#if !M_NT
  first_addr = (unsigned long)sbrk(0);
#endif

 /*
  *  prints different messages depending on whether this test is compiled
  *  with the MTS library or libc.a
  */
#if defined(COMPILE_ID)
  write (1, "Allocating memory using ", 
           sizeof "Allocating memory using " - 1);
  write (1, COMPILE_ID, sizeof COMPILE_ID - 1);
  write (1, "\n\n", 2);
#endif

  if (argc > 1)
    num_thread_pairs = atoi((const char *)argv[1]);
  if (num_thread_pairs > MAX_THREAD_PAIRS) {
    printf("Maximum number of thread pairs is %d.\n", MAX_THREAD_PAIRS);
    num_thread_pairs = MAX_THREAD_PAIRS;
  }
#if M_HP700 || M_DECALPHA || M_IBM_R6000
  global_num_thread_pairs = num_thread_pairs;
#endif

  if (argc > 2)
    num_thread_heaps = atoi((const char *)argv[2]);
  else
    if (num_thread_pairs == 1)
      num_thread_heaps = 1;
    else
      num_thread_heaps = num_thread_pairs * 2;
  if (num_thread_heaps > MAX_THREAD_HEAPS) {
    printf("Maximum number of heaps is %d.\n", MAX_THREAD_HEAPS);
    num_thread_heaps = MAX_THREAD_PAIRS;
  }

#if MTS_MT
  if (argc > 3)
    df_size = atoi((const char *)argv[3]);
  mts_set_df_size(df_size);

  printf("Initializing %d thread heaps\n", num_thread_heaps);
  mts_init_thread_heaps(num_thread_heaps); 

  if (num_thread_pairs >= 10)
    mts_set_sys_alloc_size(256 * 4096, 256 * 8 * 16 * 4096, 1); 

#endif  
  printf("Starting %d thread pairs\n", num_thread_pairs);

#if M_NT
   showtime();
#endif

   /* Initialize mutexes and condition variables for synchronizing
    * each pair of threads.
    */
   for (i = 0; i < num_thread_pairs; i++) {
     conds[i].predicate = 0;
     pthread_mutex_init(&(conds[i].lock), NULL);
     pthread_cond_init(&(conds[i].condvar), NULL);
   }

   for (i = 0; i < (num_thread_pairs*2); i++) { 
    thread_nums[i] = i;
#if M_NT
    hthread = CreateThread(NULL,         
                           0,
                           thread_func, 
			   (void *)(low_marks + i),          
                           0,           
                           &threadid    
                          );
    if (hthread == NULL) 
#elif M_DECALPHA
    pthread_attr_init(&attr);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
    hthread = pthread_create(&threadid, &attr, thread_func, 
                             (void *)&thread_nums[i]);
    
    if (hthread != 0)
#elif M_SOLARIS || M_HP700 || M_IBM_R6000
    pthread_attr_init(&attr);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
#if M_HP700 || M_IBM_R6000
    hthread = pthread_create(&threadid, &attr, thread_func,
                             (void *)&(thread_nums[i]));
#else
    hthread = pthread_create(&threadid, &attr, thread_func, 0);
#endif
    if (hthread != 0)
#endif
      printf("Cannot start thread, error code: %d\n", hthread);
    else {
#if M_DECALPHA
      print_threadid = pthread_getsequence_np(pthread_self());
#endif      
    }
#if M_NT
    handles[i] = hthread;
#endif
  }
#if M_NT
  WaitForMultipleObjects(num_thread_pairs, handles, TRUE, INFINITE); 
#elif M_SOLARIS
  while (thr_join(NULL, NULL, NULL) == 0);
#elif M_HP700 || M_DECALPHA || M_IBM_R6000
  sleep(100000000);
#endif
  printf("All threads finished.\n");			    

  /* Report the total heap size, i.e. the amount of memory that has been
   * reserved for this process by calls to sbrk (on Unix) or VirtualAlloc
   * (on NT).  For the MTS version of the test, we call the
   * routine mts_heap_total_size(), which returns the sum of the amounts
   * of memory that have been acquired by all of the MTS heaps through
   * calls to sbrk or VirtualAlloc (MTS keeps track of this in internal
   * counters, as memory is allocated).  For the standard malloc version
   * of the test, on NT we take the difference between the lowest and highest
   * pointers returned to this program by malloc, and on Unix we call sbrk(0)
   * at the beginning and end of this program (giving the address that
   * would be returned by the next non-zero call to sbrk) and take the
   * difference between these addresses.  The reason we don't use this
   * same approach to report the heap size for the MTS version is that
   * MTS allocates substantial memory for its initial heap before main()
   * is called, so that the approaches used for the standard malloc version
   * of the test would under-report the amount of memory used by the MTS
   * version of the test.  However, on NT where a non-trivial amount of
   * code is required to detect the lowest and highest addresses returned 
   * by malloc, the MTS version of the test performs the same logic as
   * the standard version, to avoid giving the MTS version an unfair
   * performance advantage.
   */
#if MTS_MT
   printf("Total heap size: %ld\n", mts_heap_total_size());
#elif M_NT
  {
    int lowest_mark = low_marks[0];
    int highest_mark = high_marks[0];
    for (i = 1; i < num_thread_pairs; i++) {
      if (low_marks[i] < lowest_mark)
	lowest_mark = low_marks[i];
      if (high_marks[i] > highest_mark)
	highest_mark = high_marks[i];
    }
    printf("Total heap size: %ld\n", highest_mark - lowest_mark);
  }
#else
   last_addr = (unsigned long)sbrk(0);
   printf("Total heap size: %ld\n", last_addr - first_addr);
#endif

#if M_NT
  showtime();
#endif

  return 0;
}
 





