split2020 Interface

public interface split2020

Contents


Module Procedures

private pure subroutine split_tokens(string, set, tokens, separator)

NAME

split2020(3f) - [M_strings:TOKENS] parse a string into tokens using
proposed f2023 method
(LICENSE:PD)

SYNOPSIS

TOKEN form

subroutine split2020 (string, set, tokens, separator)
character(len=*),intent(in) :: string
character(len=*),intent(in) :: set
character(len=:),allocatable,intent(out) :: tokens(:)
character(len=1),allocatable,intent(out),optional :: separator(:)

BOUNDS ARRAY form

subroutine split2020 (string, set, first, last)
character(len=*),intent(in) :: string
character(len=*),intent(in) :: set
integer,allocatable,intent(out) :: first(:)
integer,allocatable,intent(out) :: last(:)

STEP THROUGH BY POSITION form

subroutine split2020 (string, set, pos [, back])
character(len=*),intent(in) :: string
character(len=*),intent(in) :: set
integer,intent(inout)       :: pos
logical,intent(in),optional :: back

DESCRIPTION

Parse a string into tokens. STRING, SET, TOKENS and SEPARATOR must
all be of the same CHARACTER kind type parameter.

OPTIONS

STRING      string to break into tokens

SET         Each character in SET is a token delimiter. A
            sequence of zero or more characters in STRING delimited by
            any token delimiter, or the beginning or end of STRING,
            comprise a token. Thus, two consecutive token delimiters
            in STRING, or a token delimiter in the first or last
            character of STRING, indicate a token with zero length.

            ??? how about if null defaults to all whitespace characters

TOKENS      It is allocated with the lower bound equal to
            one and the upper bound equal to the number of tokens in
            STRING, and with character length equal to the length of
            the longest token. The tokens in STRING are assigned by
            intrinsic assignment, in the order found, to the elements
            of TOKENS, in array element order.

            ???If input is null it still must be of size 1?

SEPARATOR   Each element in SEPARATOR(i) is assigned the value of
            the ith token delimiter in STRING.
            It is allocated with the lower bound equal to
            one and the upper bound equal to one less than the number
            of tokens in STRING, and with character length equal to
            one.

            ???one less than? '' ' '

FIRST     It is allocated with the lower bound equal to one and the
          upper bound equal to the number of tokens in STRING. Each
          element is assigned, in array element order, the starting
          position of each token in STRING, in the order found. If a
          token has zero length, the starting position is equal to one
          if the token is at the beginning of STRING, and one greater
          than the position of the preceding delimiter otherwise.

LAST      It is allocated with the lower bound equal to one and the
          upper bound equal to the number of tokens in STRING. Each
          element is assigned, in array element order, the ending
          position of each token in STRING, in the order found. If
          a token has zero length, the ending position is one less
          than the starting position.

POS       If BACK is present with the value .TRUE., the value
          of POS shall be in the range 0 < POS     LEN (STRING)+1;
          otherwise it shall be in the range 0     POS LEN (STRING).

          If BACK is absent or is present with the value .FALSE., POS
          is assigned the position of the leftmost token delimiter in
          STRING whose position is greater than POS, or if there is
          no such character, it is assigned a value one greater than
          the length of STRING. This identifies a token with starting
          position one greater than the value of POS on invocation,
          and ending position one less than the value of POS on return.

          If BACK is present with the value true, POS is assigned the
          position of the rightmost token delimiter in STRING whose
          position is less than POS, or if there is no such character,
          it is assigned the value zero. This identifies a token with
          ending position one less than the value of POS on invocation,
          and starting position one greater than the value of POS
          on return.

          When SPLIT is invoked with a value for POS of
          1 <= POS <= LEN(STRING) and STRING(POS:POS) is not a
          token delimiter present in SET, the token identified by
          SPLIT does not comprise a complete token as described in the
          description of the SET argument, but rather a partial token.

BACK      shall be a logical scalar. It is an INTENT (IN) argument. If
          POS does not appear and BACK is present with the value true,
          STRING is scanned backwards for tokens starting from the
          end. If POS does not appear and BACK is absent or present
          with the value false, STRING is scanned forwards for tokens
          starting from the beginning.

EXAMPLES

Sample of uses

program demo_sort2020
use M_strings, only : split2020
implicit none
character(len=*),parameter :: gen='(*("[",g0,"]":,","))'

 ! Execution of TOKEN form
 block
   character (len=:), allocatable :: string
   character (len=:), allocatable :: tokens(:)
   character (len=*),parameter :: set = " ,"
   string = 'first,second,third'
   call split2020(string, set, tokens )
   write(*,gen)tokens

 ! assigns the value ['first ','second','third ' ]
 ! to TOKENS.
 endblock

 ! Execution of BOUNDS form

 block
   character (len=:), allocatable :: string
   character (len=*),parameter :: set = " ,"
   integer, allocatable        :: first(:), last(:)
   string =    'first,second,,forth'
   call split2020 (string, set, first, last)
   write(*,gen)first
   write(*,gen)last

 ! will assign the value [ 1, 7, 14, 15 ] to FIRST,
 ! and the value [ 5, 12, 13, 19 ] to LAST.
 endblock

 ! Execution of STEP form
 block
   character (len=:), allocatable :: string
   character (len=*),parameter :: set = " ,"
   integer :: p, istart, iend
   string = " one,   last  example  "
   do while (p < len(string))
     istart = p + 1
     call split2020 (string, set, p)
     iend=p-1
     if(iend > istart)then
        print '(t3,a,1x,i0,1x,i0)', string (istart:iend),istart,iend
     endif
   enddo
 endblock
end program demo_sort2020

Results:

[first ],[second],[third ]
[1],[7],[14],[15]
[5],[12],[13],[19]
  one 2 4
  last 9 12
  example 15 21

  > ??? option to skip adjacent delimiters (not return null tokens)
  >     common with whitespace
  > ??? quoted strings, especially CSV both " and ', Fortran adjacent
  >     is insert versus other rules
  > ??? escape character like \\ .
  > ??? multi-character delimiters like \\n, \\t,
  > ??? regular expression separator

AUTHOR

Milan Curcic, "milancurcic@hey.com"

LICENSE

MIT

VERSION

version 0.1.0, copyright 2020, Milan Curcic

Arguments

Type IntentOptional Attributes Name
character, intent(in) :: string
character, intent(in) :: set
character, intent(out), allocatable :: tokens(:)
character, intent(out), optional, allocatable :: separator(:)

private pure subroutine split_first_last(string, set, first, last)

Arguments

Type IntentOptional Attributes Name
character, intent(in) :: string
character, intent(in) :: set
integer, intent(out), allocatable :: first(:)
integer, intent(out), allocatable :: last(:)

private pure subroutine split_pos(string, set, pos, back)

Arguments

Type IntentOptional Attributes Name
character, intent(in) :: string
character, intent(in) :: set
integer, intent(inout) :: pos
logical, intent(in), optional :: back