	.TITLE	LEAST_SQUARES
	.IDENT	/V1-001/
;++
;1 LEAST_SQUARES	
; Routine to analyse two data arrays producing a gradient (or slope).
; The 2 arrays are notionly 'graphed' with array1 being the values (X) and 
; array2 being time (Y). The result is the slope of the calculated line and
; is the rate of change of the values. 
;2 Usage_note 
; This routine provides back-end support to the CNTRPRC subsystem and uses
; symbols from CNTRPRCDEF.MAR. While the interface is generalised, modification
; will be necessary should this routine be used outside that context.
;2 Description
; If graphed, we are calculating the line that represents the rate of change
; of the values. a result of 0 is a flat line (no change), a negative value
; indicated that, over time, the values are falling and a positive result
; means values are rising. The magnitude of the result indicates the rate.
; The mechanism is Least Squares method.
;2 Calculation
; The Math is provided by Paul Cummings:
; Consider you have n points all of the form (x,y).  We want the best fit line
; that runs through them.  
; Let the equation be y = mx + c   (this is the general equation of a straight
; line)
; y is the verticle axis (measured value)
; x is the horizontal axis  (time)
; m is the gradient of the line 
; c is the y-intercept (the value of y when x is 0)
;
; m = (sum(xy) - n XY)  /  (sum(xx) - nXX)
; c = (Ysum(xx) - Xsum(xy))  /  (sum(xx) - nXX)
; 
; Here 
; n is number of data points
; X is mean of all x (sum of all x divided by n)
; Y is mean of all y 
; sum(xy) is the sum of x*y values (each point has its x value multiplied by its
; y value and then these n answers are added together)
;
;Thereafter, you can get any value of y for a given x by substituting it back
;into the formula.
;
;This is all based on minimising the sum of the vertical distances from the line
;to each data point.
;
;You can do it to minimise the perpendicular distance to the line, but this is
;only slightly more accurate and the maths gets a triffle tricky.
;
;Normally in our circumstance, you would use x as the time axis and y as the
;"value" axis then you aim to predict the value at a given time.

;If you want to see the maths behind this, I recommend
;http://mathworld.wolfram.com/LeastSquaresFitting.html as a simple explanation
;of it. 
;2 Algorythm
; Set time origin to 1 jan 2002 00:00:00
; Calculate an array of time from origin (oldest to newest) along the time
; line. Convert all integers to floats. Calculate the mean on this array.(X)
; Square each time item and sum this. (SUMXX)
; Multiply corresponding time,value points and sum them (SUM xy)
; Multiply number of points by mean of x and mean of y (nXY)
; Calc the number of data point on x and square the mean (nXX)
; subt nXY from SUM xy (VALUE1)
; subt nXX from SUMXX  (VALUE2)
; div VALUE1 by VALUE2 (m)
; m is the gradient (angle) of the line that best describes the trend through
; the data points. 
; Doing this for all metrics and comparing the resulting gradients should
; highlight those metrics that are not following the overall application trend.
; 
;2 Inputs:
;  .address of array1 - values may be F-Float, or .long int 
;  .address of array2 ; time - VMS .quad absolute time
;  .long	count of entries in the arrays
;  .long	datatype   (symbols in CNTRPRCDEF.MAR)
;  .address of .float where result is written.
;
;2 Outputs.
; For each process/metric item a gradient is produced and written in P5
;
;2 Modifications:
; 001	Jul-2002	PB	Creation
; 002	Mar-2004	PB	modified inputs to allow general use.
;--
	.library	/CNTPRC.MLB/
        $IODEF                          ;Define I/O functions and modifiers
	$SECDEF				; Global Section 
	$SSDEF
	$SMGDEF
	$TRMDEF
	CNTRPRCDEF			; Counter processing
	

	.PSECT	LEAST_SQUARES_D,WRT,NOEXE,PIC,SHR,QUAD

CALCARRAY:	.QUAD
TIMEORIGIN:	.QUAD
QTIME:		.QUAD
ARRCNT:		.FLOAT
MEANX:		.FLOAT
MEANY:		.FLOAT
SUMXX:		.FLOAT
SUMXY:		.FLOAT
NXY:		.FLOAT
NXX:		.FLOAT
VALUE1:		.FLOAT
VALUE2:		.FLOAT
GRAD:		.FLOAT
TEMPF:		.FLOAT
TIME_ORIGIN:	.ASCII	/15-MAR-2004 00:00:00.00/
TIMEORIGIN_D:	.LONG	.-TIME_ORIGIN
		.ADDRESS  TIME_ORIGIN
	.ALIGN	LONG

; Misc
       .PSECT LEAST_SQUARES_C,EXE,NOWRT,LONG
	.CALL_ENTRY	MAX_ARGS=12, HOME_ARGS=TRUE, -
			INPUT=<R2,R3,R4,R5,R6,R7,R8,R9,R10,R11>, -
			PRESERVE=<R2,R3,R4,R5,R6,R7,R8,R9,R10,R11>, -
			LABEL=LEAST_SQUARES

; Calc mem needs. If we have it then continue, if not adjust.
; It is assumed that records are fixed. That is to say that if memory is
; present, it will suffice for all calls.
; Calc:
;	(max samples *8) (Converted times + converted values)
;	--------------------------------
;		512
; Then round up
	TSTL	CALCARRAY 		; Have mem?
	BNEQ	10$                     ; Skip if so
	MULL3	#8,#CTP_C_MAXSMPL,R1
	DIVL	#512,R1
	INCL	R1
; Get mem
	CLRL	-(SP)	       		; P0 Space
	CLRL	-(SP)	       		; Access mode
	PUSHAL	CALCARRAY	       		; Returned address
	PUSHL	R1			     ; No. of pages
	CALLS	#4,G^SYS$EXPREG
	BLBS	R0,10$         		; Br no error
	RET				; Die

; The first (MAXSAMPLE*4) bytes are used for the TIME array. The remainder
; is used to the SAMPLE array. Set origins:
10$:
; Set time origin
; Convert time to binary
	$BINTIM_S-	
		TIMBUF=TIMEORIGIN_D,-
		TIMADR=TIMEORIGIN 
	BLBS	R0,20$
	RET
20$:
	MOVL	8(AP),R9		; Times
        MOVL	12(AP),R1		; Sample count
	CVTLF	R1,ARRCNT		; Make float

100$:
	MOVL	CALCARRAY,R7           	; Store times here
	MULL3	#4,#CTP_C_MAXSMPL,R1
	ADDL3	R1,R7,R8		; Store CONVERTED samples here
; Convert the absolute times to seconds elapsed since origin, convert to 
; float and store in array
	MOVL	12(AP),R11		; Loop control
	MOVL	R7,R10			; Save origin
110$:
	PUSHL	R10
	PUSHAL	TIMEORIGIN 		; Origin
	PUSHL	R9			; Next time
	CALLS	#3,G^TIMEDIFF
	BLBS	R0,120$
	RET
120$:
	CVTLF	(R10),(R10)+		; Convert and store
	ADDL	#8,R9			; Next input
	SOBGTR	R11,110$                ; Loop
; Calculate the mean on this array
	
	MOVL	12(AP),R11 		; Loop control
	MOVL	R7,R10			; Save origin
        CLRF	MEANX
200$:
	ADDF	(R10)+,MEANX
	SOBGTR	R11,200$                ; Loop

	DIVF	ARRCNT,MEANX		; Mean of X array
; Square each item and sum
	MOVL	12(AP),R11		; Loop control
	MOVL	R7,R10			; Save origin
	CLRF	SUMXX
210$:
	MULF3	(R10),(R10),GRAD	; Use GRAD temorarily
	ADDF	GRAD,SUMXX		; Sum the squared result
	ADDL	#4,R10			; Next input
	SOBGTR	R11,210$                ; Loop
; Multiply corresponding time,values and sum them
	MOVL	4(AP),R5  		; Sample table
; If datatype is .long, convert values to float
        BBS     #CTP_V_INT,16(AP),211$
        BBS     #CTP_V_QAD,16(AP),211$
        BBS     #CTP_V_FLT,16(AP),215$
	MOVL	#SS$_BADPARAM,R0
	RET
211$:
	PUSHL	12(AP)			; Sam count
	PUSHL	R5	                ; Sam array (.longs)
	CALLS	#2,G^CVT2FLT            ; Outp area fixed above
	BLBS	R0,212$                 ; Br no err
	RET
212$:
	MOVL	R8,R5


215$:
	MOVL	12(AP),R11		; Loop control
	MOVL	R7,R10			; Save origin
	CLRF	SUMXY
	CLRL	R4			; Index
220$:
	MULF3	(R10)[R4],(R5)[R4],GRAD	; Use GRAD temorarily
	ADDF	GRAD,SUMXY		; Sum the multiplied result
;	ADDL	#4,R10			; Next input
	AOBLSS	R11,R4,220$             ; Loop
; Square MEANX and multiply by number of data points
	MULF3	MEANX,MEANX,NXX
	MULF	ARRCNT,NXX


	MOVL	12(AP),R11	; Loop control
	MOVL	4(AP),R5	; Sample table
; If NOT floating input, reset array to point to converted
        BBS     #CTP_V_FLT,16(AP),222$
	MOVL	R8,R5
222$:	
        CLRF	MEANY
230$:
	ADDF	(R5),MEANY
	ADDL	#4,R5			; Next input
	SOBGTR	R11,230$                ; Loop
	DIVF	ARRCNT,MEANY		; Mean of Y array
; Multiply number of points by MEANX * MEANY
	MULF3	MEANX,MEANY,TEMPF
	MULF3	ARRCNT,TEMPF,NXY	
; Subtract NXY From SUMXY
; Subtract NXX From SUMXX
	SUBF3	NXY,SUMXY,VALUE1
	SUBF3	NXX,SUMXX,VALUE2
; GRADient = VALUE1/VALUE2
	DIVF3	VALUE2,VALUE1,@20(AP)
	MOVL	#SS$_NORMAL,R0
	RET
	
	.CALL_ENTRY	MAX_ARGS=12, HOME_ARGS=TRUE, -
			INPUT=<R2,R3,R4,R5,R6,R7,R8,R9,R10,R11>, -
			PRESERVE=<R2,R3,R4,R5,R6,R7,R8,R9,R10,R11>, -
			LABEL=CVT2FLT 
;++
;2 CVT2FLT 
;  Routine to convert array of .longs to array of .f-floats. 
;3 Input
;  .address of 1st element of .long array
;  .long number of elements
;3 Outputs
;  The longs are converted 1 by 1 to corresponding floats. The floats are
;  stored in the top half of CALCARRAY as per notes under LEAST_SQUARES item.
;3 Returns
;	SS$_NORMAL
;--

	MULL3	#4,#CTP_C_MAXSMPL,R1
	MOVL	CALCARRAY,R7
	ADDL	R1,R7
	MOVL	4(AP),R6
	MOVL	8(AP),R8
	CLRL	R9
10$:
	CVTLF	(R6)[R9],(R7)[R9]
	AOBLSS	R8,R9,10$
	MOVL	#SS$_NORMAL,R0
	RET


	.END	
