split Subroutine

public subroutine split(input_line, array, delimiters, order, nulls)

NAME

split(3f) - [M_strings:TOKENS] parse string into an array using
specified delimiters
(LICENSE:PD)

SYNOPSIS

subroutine split(input_line,array,delimiters,order,nulls)

 character(len=*),intent(in)              :: input_line
 character(len=:),allocatable,intent(out) :: array(:)
 character(len=*),optional,intent(in)     :: delimiters
 character(len=*),optional,intent(in)     :: order
 character(len=*),optional,intent(in)     :: nulls

DESCRIPTION

 SPLIT(3f) parses a string using specified delimiter characters and
 store tokens into an allocatable array

OPTIONS

INPUT_LINE  Input string to tokenize

ARRAY       Output array of tokens

DELIMITERS  List of delimiter characters.
            The default delimiters are the "whitespace" characters
            (space, tab,new line, vertical tab, formfeed, carriage
            return, and null). You may specify an alternate set of
            delimiter characters.

            Multi-character delimiters are not supported (Each
            character in the DELIMITERS list is considered to be
            a delimiter).

            Quoting of delimiter characters is not supported.

ORDER SEQUENTIAL|REVERSE|RIGHT  Order of output array.
            By default ARRAY contains the tokens having parsed
            the INPUT_LINE from left to right. If ORDER='RIGHT'
            or ORDER='REVERSE' the parsing goes from right to left.
            (This can be accomplished with array syntax in modern
            Fortran, but was more useful pre-fortran90).

NULLS=IGNORE|RETURN|IGNOREEND  Treatment of null fields.
            By default adjacent delimiters in the input string
            do not create an empty string in the output array. if
            NULLS='return' adjacent delimiters create an empty element
            in the output ARRAY. If NULLS='ignoreend' then only
            trailing delimiters at the right of the string are ignored.

EXAMPLES

Sample program:

program demo_split
use M_strings, only: split
implicit none
integer :: i
character(len=*),parameter     :: line=&
'  aBcdef   ghijklmnop qrstuvwxyz  1:|:2     333|333 a B cc    '
character(len=:),allocatable :: array(:) ! output array of tokens
   write(*,*)'INPUT LINE:['//line//']'
   write(*,'(70("="))')
   write(*,*)'typical call:'
   call split(line,array)
   write(*,'(i0," ==> ",a)')(i,trim(array(i)),i=1,size(array))
   write(*,*)'SIZE:',size(array)
   write(*,'(70("-"))')
   write(*,*)'custom list of delimiters (colon and vertical line):'
   call split(line,array,delimiters=':|',&
   & order='sequential',nulls='ignore')
   write(*,'(i0," ==> ",a)')(i,trim(array(i)),i=1,size(array))
   write(*,*)'SIZE:',size(array)
   write(*,'(70("-"))')
   write(*,*) 'custom list of delimiters, &
   &reverse array order and count null fields:'
   call split(line,array,delimiters=':|',&
   &order='reverse',nulls='return')
   write(*,'(i0," ==> ",a)')(i,trim(array(i)),i=1,size(array))
   write(*,*)'SIZE:',size(array)
   write(*,'(70("-"))')
   write(*,*)'INPUT LINE:['//line//']'
   write(*,*) 'default delimiters and reverse array order &
   &and return null fields:'
   call split(line,array,delimiters='',order='reverse',nulls='return')
   write(*,'(i0," ==> ",a)')(i,trim(array(i)),i=1,size(array))
   write(*,*)'SIZE:',size(array)
end program demo_split

Output

INPUT LINE:[ aBcdef ghijklmnop qrstuvwxyz 1:|:2 333| 333 a B cc ] ================================================================= typical call: 1 ==> aBcdef 2 ==> ghijklmnop 3 ==> qrstuvwxyz 4 ==> 1:|:2 5 ==> 333|333 6 ==> a 7 ==> B 8 ==> cc SIZE: 8


custom list of delimiters (colon and vertical line): 1 ==> aBcdef ghijklmnop qrstuvwxyz 1 2 ==> 2 333 3 ==> 333 a B cc SIZE: 3


custom list of delimiters, reverse array order and return null fields: 1 ==> 333 a B cc 2 ==> 2 333 3 ==> 4 ==> 5 ==> aBcdef ghijklmnop qrstuvwxyz 1 SIZE: 5


INPUT LINE:[ aBcdef ghijklmnop qrstuvwxyz 1:|:2 333| 333 a B cc ] default delimiters and reverse array order and count null fields: 1 ==> 2 ==> 3 ==> 4 ==> cc 5 ==> B 6 ==> a 7 ==> 333|333 8 ==> 9 ==> 10 ==> 11 ==> 12 ==> 1:|:2 13 ==> 14 ==> qrstuvwxyz 15 ==> ghijklmnop 16 ==> 17 ==> 18 ==> aBcdef 19 ==> 20 ==> SIZE: 20

AUTHOR

John S. Urban

LICENSE

Public Domain

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: input_line
character(len=:), intent(out), allocatable :: array(:)
character(len=*), intent(in), optional :: delimiters
character(len=*), intent(in), optional :: order
character(len=*), intent(in), optional :: nulls

Contents

Source Code


Source Code

subroutine split(input_line,array,delimiters,order,nulls)
!-----------------------------------------------------------------------------------------------------------------------------------

! ident_8="@(#) M_strings split(3f) parse string on delimiter characters and store tokens into an allocatable array"

!  John S. Urban
!-----------------------------------------------------------------------------------------------------------------------------------
intrinsic index, min, present, len
!-----------------------------------------------------------------------------------------------------------------------------------
!  given a line of structure " par1 par2 par3 ... parn " store each par(n) into a separate variable in array.
!    o by default adjacent delimiters in the input string do not create an empty string in the output array
!    o no quoting of delimiters is supported
character(len=*),intent(in)              :: input_line  ! input string to tokenize
character(len=*),optional,intent(in)     :: delimiters  ! list of delimiter characters
character(len=*),optional,intent(in)     :: order       ! order of output array sequential|[reverse|right]
character(len=*),optional,intent(in)     :: nulls       ! return strings composed of delimiters or not ignore|return|ignoreend
character(len=:),allocatable,intent(out) :: array(:)    ! output array of tokens
!-----------------------------------------------------------------------------------------------------------------------------------
integer                       :: n                      ! max number of strings INPUT_LINE could split into if all delimiter
integer,allocatable           :: ibegin(:)              ! positions in input string where tokens start
integer,allocatable           :: iterm(:)               ! positions in input string where tokens end
character(len=:),allocatable  :: dlim                   ! string containing delimiter characters
character(len=:),allocatable  :: ordr                   ! string containing order keyword
character(len=:),allocatable  :: nlls                   ! string containing nulls keyword
integer                       :: ii,iiii                ! loop parameters used to control print order
integer                       :: icount                 ! number of tokens found
integer                       :: lgth                   ! length of input string with trailing spaces trimmed
integer                       :: i10,i20,i30            ! loop counters
integer                       :: icol                   ! pointer into input string as it is being parsed
integer                       :: idlim                  ! number of delimiter characters
integer                       :: ifound                 ! where next delimiter character is found in remaining input string data
integer                       :: inotnull               ! count strings not composed of delimiters
integer                       :: ireturn                ! number of tokens returned
integer                       :: imax                   ! length of longest token
!-----------------------------------------------------------------------------------------------------------------------------------
   ! decide on value for optional DELIMITERS parameter
   if (present(delimiters)) then                                     ! optional delimiter list was present
      if(delimiters /= '')then                                       ! if DELIMITERS was specified and not null use it
         dlim=delimiters
      else                                                           ! DELIMITERS was specified on call as empty string
         dlim=' '//char(9)//char(10)//char(11)//char(12)//char(13)//char(0) ! use default delimiter when not specified
      endif
   else                                                              ! no delimiter value was specified
      dlim=' '//char(9)//char(10)//char(11)//char(12)//char(13)//char(0)    ! use default delimiter when not specified
   endif
   idlim=len(dlim)                                                   ! dlim a lot of blanks on some machines if dlim is a big string
!-----------------------------------------------------------------------------------------------------------------------------------
   if(present(order))then; ordr=lower(adjustl(order)); else; ordr='sequential'; endif ! decide on value for optional ORDER parameter
   if(present(nulls))then; nlls=lower(adjustl(nulls)); else; nlls='ignore'    ; endif ! optional parameter
!-----------------------------------------------------------------------------------------------------------------------------------
   n=len(input_line)+1                        ! max number of strings INPUT_LINE could split into if all delimiter
   if(allocated(ibegin))deallocate(ibegin)    !x! intel compiler says allocated already ?
   if(allocated(iterm))deallocate(iterm)      !x! intel compiler says allocated already ?
   allocate(ibegin(n))                        ! allocate enough space to hold starting location of tokens if string all tokens
   allocate(iterm(n))                         ! allocate enough space to hold ending location of tokens if string all tokens
   ibegin(:)=1
   iterm(:)=1
!-----------------------------------------------------------------------------------------------------------------------------------
   lgth=len(input_line)                                           ! lgth is the column position of the last non-blank character
   icount=0                                                       ! how many tokens found
   inotnull=0                                                     ! how many tokens found not composed of delimiters
   imax=0                                                         ! length of longest token found
!-----------------------------------------------------------------------------------------------------------------------------------
   if(lgth > 0)then                                              ! there is at least one non-delimiter in INPUT_LINE if get here
      icol=1                                                      ! initialize pointer into input line
      INFINITE: do i30=1,lgth,1                                   ! store into each array element
         ibegin(i30)=icol                                         ! assume start new token on the character
         if(index(dlim(1:idlim),input_line(icol:icol)) == 0)then  ! if current character is not a delimiter
            iterm(i30)=lgth                                       ! initially assume no more tokens
            do i10=1,idlim                                        ! search for next delimiter
               ifound=index(input_line(ibegin(i30):lgth),dlim(i10:i10))
               IF(ifound > 0)then
                  iterm(i30)=min(iterm(i30),ifound+ibegin(i30)-2)
               endif
            enddo
            icol=iterm(i30)+2                                     ! next place to look as found end of this token
            inotnull=inotnull+1                                   ! increment count of number of tokens not composed of delimiters
         else                                                     ! character is a delimiter for a null string
            iterm(i30)=icol-1                                     ! record assumed end of string. Will be less than beginning
            icol=icol+1                                           ! advance pointer into input string
         endif
         imax=max(imax,iterm(i30)-ibegin(i30)+1)
         icount=i30                                               ! increment count of number of tokens found
         if(icol > lgth)then                                     ! no text left
            exit INFINITE
         endif
      enddo INFINITE
   endif
!-----------------------------------------------------------------------------------------------------------------------------------
   select case (trim(adjustl(nlls)))
   case ('ignore','','ignoreend')
      ireturn=inotnull
   case default
      ireturn=icount
   end select
   allocate(character(len=imax) :: array(ireturn))                ! allocate the array to return
   !allocate(array(ireturn))                                       ! allocate the array to return
!-----------------------------------------------------------------------------------------------------------------------------------
   select case (trim(adjustl(ordr)))                              ! decide which order to store tokens
   case ('reverse','right') ; ii=ireturn ; iiii=-1                ! last to first
   case default             ; ii=1       ; iiii=1                 ! first to last
   end select
!-----------------------------------------------------------------------------------------------------------------------------------
   do i20=1,icount                                                ! fill the array with the tokens that were found
      if(iterm(i20) < ibegin(i20))then
         select case (trim(adjustl(nlls)))
         case ('ignore','','ignoreend')
         case default
            array(ii)=' '
            ii=ii+iiii
         end select
      else
         array(ii)=input_line(ibegin(i20):iterm(i20))
         ii=ii+iiii
      endif
   enddo
!-----------------------------------------------------------------------------------------------------------------------------------
   end subroutine split