norout(3f) - [M_datapac:ANALYSIS] Performs a normal outlier analysis
on the data in the input vector X.
SUBROUTINE NOROUT(X,N)
real(kind=wp),intent(in) :: X(:)
integer,intent(in) :: N
NOROUT(3f) performs a normal outlier analysis on the data in the
input vector X.
This analysis consists of--
1. various normal outlier statistics;
2. various partial sample means
3. various partial sample standard deviations;
4. the first 40 and last 40 ordered observations;
5. a line plot; and
6. a normal probability plot.
When the first 40 and last 40 ordered observations are printed out,
also included for each of the 40+40 = 80 listed data values is the
corresponding residual about the (full) sample mean, the standardized
residual, the normal n(0,1) value for the standardized residual,
and the position number in the original data vector X.
This last piece of information allows the data analyst to easily
locate back in the original data vector. A suspected outlier or
otherwise interesting observation.
X The vector of (unsorted or sorted) observations.
N The integer number of observations in the vector X.
The maximum allowable value of N for this subroutine is 7500.
4 pages of automatic printout as described in the description above.
Sample program:
program demo_norout
use M_datapac, only : norout
implicit none
! call norout(x,y)
end program demo_norout
Results:
The original DATAPAC library was written by James Filliben of the
Statistical Engineering Division, National Institute of Standards
and Technology.
John Urban, 2022.05.31
CC0-1.0
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
real(kind=wp), | intent(in) | :: | X(:) | |||
integer, | intent(in) | :: | N |
Type | Attributes | Name | Initial | |||
---|---|---|---|---|---|---|
real | :: | WS(15000) |
subroutine norout(X,N) real(kind=wp),intent(in) :: X(:) integer,intent(in) :: N real(kind=wp) :: ai, an, anm1, anm2, anm3, anm4, anm5, cdf, hold, res, s, s1, s13, s14, s2, s23, s24, s3, s4, ssq real(kind=wp) :: ssq1, ssq13, ssq14, ssq2, ssq23, ssq24, ssq3, ssq4, st1, st2, st3, st4, st5, st6, st7, st8, st9, stres, sum, sum4 real(kind=wp) :: WS, xb, xb1, xb13, xb14, xb2, xb23, xb24, xb3, xb4, xline, XPOs, Y integer :: i, icount, iflag, irev, iupper, j, mx, nm1, nm2, nm3, nm4, nm5 !--------------------------------------------------------------------- character(len=4) :: blank , hyphen , alphai , alphax character(len=4) :: iline1 character(len=4) :: iline2 ! DIMENSION Y(7500) , XPOs(7500) DIMENSION iline1(130) , iline2(130) DIMENSION xline(13) COMMON /BLOCK2_real64/ WS(15000) EQUIVALENCE (Y(1),WS(1)) EQUIVALENCE (XPOs(1),WS(7501)) ! DATA blank , hyphen , alphai , alphax/' ' , '-' , 'I' , 'X'/ ! iupper = 7500 ! ! CHECK THE INPUT ARGUMENTS FOR ERRORS ! IF ( N<1 .OR. N>iupper ) THEN WRITE (G_IO,99001) iupper 99001 FORMAT (' ***** FATAL ERROR--The second input argument to NOROUT(3f) is outside the allowable (1,',& & I0,') intervaL *****') WRITE (G_IO,99002) N 99002 FORMAT (' ','***** The value of the argument is ',I0,' *****') RETURN ELSE IF ( N==1 ) THEN WRITE (G_IO,99003) 99003 FORMAT (' ***** NON-FATAL DIAGNOSTIC--The second input argument to NOROUT(3f) has the value 1 *****') RETURN ELSE hold = X(1) DO i = 2 , N IF ( X(i)/=hold ) GOTO 50 ENDDO WRITE (G_IO,99004) hold 99004 FORMAT (' ***** NON-FATAL DIAGNOSTIC--The first input argument (a vector) to NOROUT(3f) has all elements = ',& & E15.8,' *****') RETURN ENDIF ! !-----START POINT----------------------------------------------------- ! 50 nm1 = N - 1 nm2 = N - 2 nm3 = N - 3 nm4 = N - 4 nm5 = N - 5 an = N anm1 = nm1 anm2 = nm2 anm3 = nm3 anm4 = nm4 anm5 = nm5 ! ! SORT THE DATA AND ALSO CARRY ALONG THE OBSERVATION NUMBER--THAT IS, ! THE POSITION IN THE ORIGINAL DATA SET OF THE I-TH ORDER STATISTIC ! CALL SORTP(X,N,Y,XPOs) ! ! COMPUTE PARTIAL SAMPLE MEANS ! sum = 0.0_wp DO i = 3 , nm2 sum = sum + Y(i) ENDDO xb23 = sum/anm4 xb13 = (sum+Y(2))/anm3 xb24 = (sum+Y(nm1))/anm3 xb3 = (sum+Y(1)+Y(2))/anm2 xb2 = (sum+Y(nm1)+Y(N))/anm2 xb14 = (sum+Y(2)+Y(nm1))/anm2 xb4 = (sum+Y(1)+Y(2)+Y(nm1))/anm1 xb1 = (sum+Y(2)+Y(nm1)+Y(N))/anm1 xb = (sum+Y(1)+Y(2)+Y(nm1)+Y(N))/an ! ! COMPUTE PARTIAL SUMS OF SQUARED DEVIATIONS ! ABOUT THE PARTIAL SAMPLE MEANS ! ssq = 0.0_wp ssq1 = 0.0_wp ssq4 = 0.0_wp ssq14 = 0.0_wp ssq2 = 0.0_wp ssq3 = 0.0_wp ssq24 = 0.0_wp ssq13 = 0.0_wp ssq23 = 0.0_wp DO i = 1 , N ssq = ssq + (Y(i)-xb)**2 ENDDO DO i = 2 , N ssq1 = ssq1 + (Y(i)-xb1)**2 ENDDO DO i = 1 , nm1 ssq4 = ssq4 + (Y(i)-xb4)**2 ENDDO DO i = 2 , nm1 ssq14 = ssq14 + (Y(i)-xb14)**2 ENDDO DO i = 3 , N ssq2 = ssq2 + (Y(i)-xb2)**2 ENDDO DO i = 1 , nm2 ssq3 = ssq3 + (Y(i)-xb3)**2 ENDDO DO i = 3 , nm1 ssq24 = ssq24 + (Y(i)-xb24)**2 ENDDO DO i = 2 , nm2 ssq13 = ssq13 + (Y(i)-xb13)**2 ENDDO DO i = 3 , nm2 ssq23 = ssq23 + (Y(i)-xb23)**2 ENDDO ! ! COMPUTE PARTIAL SAMPLE STANDARD DEVIATIONS ! s = SQRT(ssq/anm1) s1 = SQRT(ssq1/anm2) s4 = SQRT(ssq4/anm2) s14 = SQRT(ssq14/anm3) s2 = SQRT(ssq2/anm3) s3 = SQRT(ssq3/anm3) s24 = SQRT(ssq24/anm4) s13 = SQRT(ssq13/anm4) s23 = SQRT(ssq23/anm5) ! ! COMPUTE OUTLIER STATISTICS ! OMIT NO OBSERVATIONS, TEST FOR X(1) st1 = (xb-Y(1))/s ! OMIT NO OBSERVATIONS, TEST FOR X(N) st2 = (Y(N)-xb)/s ! OMIT NO OBSERVATIONS, TEST FOR X(1) AND X(N) SIMULTANEOUSLY st3 = (Y(N)-Y(1))/s ! OMIT X(1), TEST FOR X(2) st4 = ssq2/ssq ! OMIT X(N), TEST FOR X(N-1) st5 = ssq3/ssq ! OMIT X(1) AND X(N), TEST FOR X(2) st6 = (xb14-Y(2))/s14 ! OMIT X(1) AND X(N), TEST FOR X(N-1) st7 = (Y(nm1)-xb14)/s14 ! OMIT X(1) AND X(N), TEST FOR X(2) AND X(N-1) st8 = (Y(nm1)-Y(2))/s14 sum4 = 0.0_wp DO i = 2 , nm2 sum4 = sum4 + (Y(i)-xb14)**4 ENDDO st9 = (an-2.0_wp)*sum4/(ssq14**2) st9 = st9 + 3.0_wp ! ! COMPUTE THE LINE PLOT WHICH SHOWS THE DISTRIBUTION OF THE OBSERVED ! VALUES IN TERMS OF MULTIPLES OF SAMPLE STANDARD DEVIATIONS AWAY FROM ! THE SAMPLE MEAN ! DO i = 1 , 130 iline1(i) = blank iline2(i) = blank ENDDO icount = 0 DO i = 1 , N mx = 10.0_wp*(((X(i)-xb)/s)+6.0_wp) + 0.5_wp mx = mx + 7 IF ( mx<7 .OR. mx>127 ) icount = icount + 1 IF ( mx>=7 .AND. mx<=127 ) iline1(mx) = alphax ENDDO DO i = 7 , 127 iline2(i) = hyphen ENDDO DO i = 7 , 127 , 10 iline2(i) = alphai ENDDO xline(7) = xb DO i = 1 , 6 irev = 13 - i + 1 ai = i xline(i) = xb - (7.0_wp-ai)*s xline(irev) = xb + (7.0_wp-ai)*s ENDDO ! ! WRITE EVERYTHING OUT ! ! WRITE OUT THE OUTLIER STATISTICS ! WRITE (G_IO,99041) WRITE (G_IO,99005) 99005 FORMAT (' ',48X,'NORMAL OUTLIER ANALYSIS') WRITE (G_IO,99042) WRITE (G_IO,99006) N 99006 FORMAT (' ',46X,'(THE SAMPLE SIZE N = ',I0,')') WRITE (G_IO,99042) WRITE (G_IO,99007) 99007 FORMAT (' ',39X,'REFERENCE--GRUBBS, TECHNOMETRICS, 1969, pages 1-21') DO i = 1 , 6 WRITE (G_IO,99042) ENDDO WRITE (G_IO,99008) 99008 FORMAT (' ',49X,'OUTLIER STATISTICS') WRITE (G_IO,99042) WRITE (G_IO,99042) WRITE (G_IO,99009) 99009 FORMAT (& & ' OMIT TEST FORM VALUE PSEUDO-SAMPLE SIZE TABLE') WRITE (G_IO,99010) 99010 FORMAT (& & ' AS AN OUTLIER AS AN OUTLIER OF STATISTIC OF STATISTIC FOR TABLE LOOK-UP REFERENCE') WRITE (G_IO,99042) WRITE (G_IO,99011) st1 , N 99011 FORMAT (& & ' NONE X(1) (XBAR - X(1))/S ',F8.4,' N = ',& & I5,' GRUBBS, TECH., 1969, P. 4') WRITE (G_IO,99012) st2 , N 99012 FORMAT (' ',' NONE X(N) (X(N) - XBAR)/S & &',F8.4,' N = ',I5,' GRUBBS, TECH., 1969, P. 4') WRITE (G_IO,99013) st3 , N 99013 FORMAT (' ',& &' NONE X(1) AND X(N) RANGE/S ',F8.4,' N = ',I5,' GRUBBS, TECH., 1969, P. 8') WRITE (G_IO,99014) st4 , N 99014 FORMAT (' ',& &' X(1) X(2) SSQD(1,2)/SSQD ',F8.4,' N = ',I5,' GRUBBS, TECH., 1969, P. 11') WRITE (G_IO,99015) st5 , N 99015 FORMAT (' ',& &' X(N) X(N-1) SSQD(N-1,N)/SSQD ',F8.4,' N = ',I5,' GRUBBS, TECH., 1969, P. 11') WRITE (G_IO,99016) st6 , nm2 99016 FORMAT (' ',& &'X(1) AND X(N) X(2) (XBAR(1,N) - X(2))/S(1,N) ',F8.4,' N-2 = ',I5,' GRUBBS, TECH., 1969, P. 4') WRITE (G_IO,99017) st7 , nm2 99017 FORMAT (' ',& &'X(1) AND X(N) X(N-1) (X(N-1) - XBAR(1,N))/S(1,N) ',F8.4,' N-2 = ',I5,' GRUBBS, TECH., 1969, P. 4') WRITE (G_IO,99018) st8 , nm2 99018 FORMAT (' ',& &'X(1) AND X(N) X(2) AND X(N-1) RANGE(1,N)/S(1,N) ',F8.4,' N-2 = ',I5,' GRUBBS, TECH., 1969, P. 8') WRITE (G_IO,99019) st9 , nm2 99019 FORMAT (' ',& &'X(1) AND X(N) X(2) AND X(N-1) SAMPLE KURTOSIS(1,N) ',F8.4,' N-2 = ',I5,' GRUBBS, TECH., 1969, P. 14') DO i = 1 , 10 WRITE (G_IO,99042) ENDDO ! ! WRITE OUT THE PARTIAL SAMPLE MEANS ! AND THE PARTIAL SAMPLE STANDARD DEVIATIONS. ! WRITE (G_IO,99020) 99020 FORMAT (' ',30X, & & 'Partial sample means and partial sample standard deviations'& & ) WRITE (G_IO,99042) WRITE (G_IO,99042) WRITE (G_IO,99021) 99021 FORMAT (' ', & &' OMIT PARTIAL SAMPLE PARTIAL SAMPLE& &') WRITE (G_IO,99022) 99022 FORMAT (' ', & &' AS AN OUTLIER MEAN STANDARD DEVIATI& &ON') WRITE (G_IO,99042) WRITE (G_IO,99023) xb , s 99023 FORMAT (' ',' NONE ',E15.8,5X,E15.8) WRITE (G_IO,99024) xb1 , s1 99024 FORMAT (' ',' X(1) ',E15.8,5X,E15.8) WRITE (G_IO,99025) xb4 , s4 99025 FORMAT (' ',' X(N) ',E15.8,5X,E15.8) WRITE (G_IO,99026) xb14 , s14 99026 FORMAT (' ',' X(1) AND X(N) ',E15.8,5X,E15.8) WRITE (G_IO,99027) xb2 , s2 99027 FORMAT (' ',' X(1) AND X(2) ',E15.8,5X,E15.8) WRITE (G_IO,99028) xb3 , s3 99028 FORMAT (' ',' X(N-1) AND X(N) ',E15.8,5X,E15.8) WRITE (G_IO,99029) xb24 , s24 99029 FORMAT (' ',' X(1), X(2), AND X(N) ',E15.8,5X,E15.8) WRITE (G_IO,99030) xb13 , s13 99030 FORMAT (' ',' X(1), X(N-1), AND X(N) ',E15.8,5X,E15.8) WRITE (G_IO,99031) xb23 , s23 99031 FORMAT (' ','X(1), X(2), X(N-1), AND X(N) ',E15.8,5X,E15.8) ! ! WRITE OUT THE FIRST 40 AND LAST 40 ORDERED OBSERVATIONS, ! INCLUDING THEIR RESIDUALS ABOUT THE (FULL) SAMPLE MEAN, ! THE STANDARDIZED RESIDUALS, ! THE NORMAL N(0,1) CUMULATIVE DISTRIBUTION FUNCTION VALUE ! OF THE STANDARDIZED RESIDUAL, AND ! THE POSITION NUMBER IN THE ORIGINAL DATA VECTOR X. ! WRITE (G_IO,99041) WRITE (G_IO,99032) 99032 FORMAT (' ',& &'Order Statistics, Residuals about the sample mean, Standardized r& &Esiduals, and Normal(0,1) cumulative distribution function values'& &) WRITE (G_IO,99042) WRITE (G_IO,99042) WRITE (G_IO,99033) 99033 FORMAT (' ',& &' INDEX ORDERED RESIDUALS STANDARDIZED NORMAL(0,1) OBSERVATION') WRITE (G_IO,99034) 99034 FORMAT (' ',& &' OBSERVATIONS ABOUT THE RESIDUALS CDF VALUES OF THE NUMBER') WRITE (G_IO,99035) 99035 FORMAT (' ',& &' SAMPLE MEAN STANDARDIZED') WRITE (G_IO,99036) 99036 FORMAT (' ',& &' RESIDUALS') WRITE (G_IO,99042) IF ( N<=80 ) THEN DO i = 1 , N res = Y(i) - xb stres = res/s CALL NORCDF(stres,cdf) WRITE (G_IO,99043) i , Y(i) , res , stres , cdf , XPOs(i) iflag = i - (i/10)*10 IF ( iflag==0 ) WRITE (G_IO,99042) ENDDO ELSE DO i = 1 , 80 IF ( i<=40 ) j = i IF ( i>=41 ) j = i + N - 80 res = Y(j) - xb stres = res/s CALL NORCDF(stres,cdf) WRITE (G_IO,99043) j , Y(j) , res , stres , cdf , XPOs(j) iflag = i - (i/10)*10 IF ( iflag==0 ) WRITE (G_IO,99042) ENDDO ENDIF DO i = 1 , 10 WRITE (G_IO,99042) ENDDO ! ! WRITE OUT THE LINE PLOT SHOWING THE DEVIATIONS ! OF THE OBSERVATIONS ABOUT THE (FULL) SAMPLE MEAN ! IN TERMS OF MULTIPLES OF THE (FULL) SAMPLE STANDARD ! DEVIATION. ! WRITE (G_IO,99037) 99037 FORMAT (' ', & &'LINE PLOT SHOWING THE DISTRIBUTION OF THE OBSERVATIONS ABOUT THE & &SAMPLE MEAN IN TERMS OF MULTIPLES OF THE SAMPLE STANDARD DEVIATION& &') WRITE (G_IO,99042) WRITE (G_IO,99042) WRITE (G_IO,99044) (iline1(i),i=1,130) WRITE (G_IO,99044) (iline2(i),i=1,130) WRITE (G_IO,99038) 99038 FORMAT (' ',& &' -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6') WRITE (G_IO,99039) (xline(i),i=1,13) 99039 FORMAT (' ',13F10.4) WRITE (G_IO,99042) WRITE (G_IO,99040) icount 99040 FORMAT (' ',10X,I5,& &' OBSERVATIONS WERE IN EXCESS OF 6 SAMPLE STANDARD DEVIATIONS FROM THE SAMPLE MEAN AND SO WERE NOT PLOTTED') ! ! WRITE OUT A NORMAL PROBABILITY PLOT ! CALL NORPLT(Y,N) ENDIF ! 99041 FORMAT ('1') 99042 FORMAT (' ') 99043 FORMAT (' ',I5,4X,E15.8,1X,E15.8,7X,F7.2,11X,F8.5,11X,F7.0) 99044 FORMAT (' ',130A1) ! END SUBROUTINE NOROUT