glob Function

public function glob(tame, wild)

NAME

glob(3f) - [M_strings:COMPARE] compare given string for match to
a pattern which may contain globbing wildcard characters
(LICENSE:PD)

SYNOPSIS

logical function glob(string, pattern )

 character(len=*),intent(in) :: string
 character(len=*),intent(in) :: pattern

DESCRIPTION

glob(3f) compares given (entire) STRING for a match to PATTERN which may
contain basic wildcard "globbing" characters.

In this version to get a match the entire string must be described
by PATTERN. Trailing whitespace is significant, so trim the input
string to have trailing whitespace ignored.

Patterns like "b*ba" fail on a string like "babababa" because the
algorithm finds an early match. To skip over the early matches insert
an extra character at the end of the string and pattern that does
not occur in the pattern. Typically a NULL is used (char(0)).

OPTIONS

string   the input string to test to see if it contains the pattern.
pattern  the following simple globbing options are available

         o "?" matching any one character
         o "*" matching zero or more characters.
           Do NOT use adjacent asterisks.
         o spaces are significant and must be matched or pretrimmed
         o There is no escape character, so matching strings with
           literal question mark and asterisk is problematic.

EXAMPLES

Example program

program demo_glob
implicit none
! This main() routine passes a bunch of test strings
! into the above code.  In performance comparison mode,
! it does that over and over. Otherwise, it does it just
! once. Either way, it outputs a passed/failed result.
!
integer :: nReps
logical :: allpassed
integer :: i
allpassed = .true.

nReps = 10000
! Can choose as many repetitions as you're expecting
! in the real world.
nReps = 1

do i=1,nReps
   ! Cases with repeating character sequences.
   allpassed= test("a*abab",      "a*b",   .true.)  .and. allpassed
   allpassed= test("ab",          "*?",    .true.)  .and. allpassed
   allpassed= test("abc",         "*?",    .true.)  .and. allpassed
   allpassed= test("abcccd",      "*ccd",  .true.)  .and. allpassed
   allpassed= test("bLah",        "bLaH",  .false.) .and. allpassed
   allpassed= test("mississippi", "*sip*", .true.)  .and. allpassed
   allpassed= &
    & test("xxxx*zzzzzzzzy*f", "xxx*zzy*f", .true.) .and. allpassed
   allpassed= &
    & test("xxxx*zzzzzzzzy*f", "xxxx*zzy*fffff", .false.) .and. allpassed
   allpassed= &
    & test("mississipissippi", "*issip*ss*", .true.) .and. allpassed
   allpassed= &
    & test("xxxxzzzzzzzzyf", "xxxx*zzy*fffff", .false.) .and. allpassed
   allpassed= &
    & test("xxxxzzzzzzzzyf", "xxxx*zzy*f", .true.) .and. allpassed
   allpassed= test("xyxyxyzyxyz", "xy*z*xyz", .true.)  .and. allpassed
   allpassed= test("xyxyxyxyz",   "xy*xyz",   .true.)  .and. allpassed
   allpassed= test("mississippi", "mi*sip*",  .true.)  .and. allpassed
   allpassed= test("ababac",      "*abac*",   .true.)  .and. allpassed
   allpassed= test("aaazz",       "a*zz*",    .true.)  .and. allpassed
   allpassed= test("a12b12",      "*12*23",   .false.) .and. allpassed
   allpassed= test("a12b12",      "a12b",     .false.) .and. allpassed
   allpassed= test("a12b12",      "*12*12*",  .true.)  .and. allpassed

   ! Additional cases where the '*' char appears in the tame string.
   allpassed= test("*",     "*",      .true.)  .and. allpassed
   allpassed= test("a*r",   "a*",     .true.)  .and. allpassed
   allpassed= test("a*ar",  "a*aar",  .false.) .and. allpassed

   ! More double wildcard scenarios.
   allpassed= test("XYXYXYZYXYz", "XY*Z*XYz",  .true.)  .and. allpassed
   allpassed= test("missisSIPpi", "*SIP*",     .true.)  .and. allpassed
   allpassed= test("mississipPI", "*issip*PI", .true.)  .and. allpassed
   allpassed= test("xyxyxyxyz",   "xy*xyz",    .true.)  .and. allpassed
   allpassed= test("miSsissippi", "mi*sip*",   .true.)  .and. allpassed
   allpassed= test("miSsissippi", "mi*Sip*",   .false.) .and. allpassed
   allpassed= test("abAbac",      "*Abac*",    .true.)  .and. allpassed
   allpassed= test("aAazz",       "a*zz*",     .true.)  .and. allpassed
   allpassed= test("A12b12",      "*12*23",    .false.) .and. allpassed
   allpassed= test("a12B12",      "*12*12*",   .true.)  .and. allpassed
   allpassed= test("oWn",         "*oWn*",     .true.)  .and. allpassed

   ! Completely tame (no wildcards) cases.
   allpassed= test("bLah", "bLah", .true.) .and. allpassed

   ! Simple mixed wildcard tests suggested by IBMer Marlin Deckert.
   allpassed= test("a", "*?", .true.) .and. allpassed

   ! More mixed wildcard tests including coverage for false positives.
   allpassed= test("a",      "??",         .false.) .and. allpassed
   allpassed= test("ab",     "?*?",        .true.)  .and. allpassed
   allpassed= test("ab",     "*?*?*",      .true.)  .and. allpassed
   allpassed= test("abc",    "?**?*?",     .true.)  .and. allpassed
   allpassed= test("abc",    "?**?*&?",    .false.) .and. allpassed
   allpassed= test("abcd",   "?b*??",      .true.)  .and. allpassed
   allpassed= test("abcd",   "?a*??",      .false.) .and. allpassed
   allpassed= test("abcd",   "?**?c?",     .true.)  .and. allpassed
   allpassed= test("abcd",   "?**?d?",     .false.) .and. allpassed
   allpassed= test("abcde",  "?*b*?*d*?",  .true.)  .and. allpassed

   ! Single-character-match cases.
   allpassed= test("bLah",   "bL?h",  .true.)  .and. allpassed
   allpassed= test("bLaaa",  "bLa?",  .false.) .and. allpassed
   allpassed= test("bLah",   "bLa?",  .true.)  .and. allpassed
   allpassed= test("bLaH",   "?Lah",  .false.) .and. allpassed
   allpassed= test("bLaH",   "?LaH",  .true.)  .and. allpassed

   allpassed= test('abcdefghijk' ,  '?b*',     .true.)  .and. allpassed
   allpassed= test('abcdefghijk' ,  '*c*',     .true.)  .and. allpassed
   allpassed= test('abcdefghijk' ,  '*c',      .false.) .and.  allpassed
   allpassed= test('abcdefghijk' ,  '*c*k',    .true.)  .and. allpassed
   allpassed= test('LS'          ,  '?OW',     .false.) .and.  allpassed
   allpassed= test('teztit'      ,  'tez*t*t', .true.)  .and. allpassed
     ! Two pattern match problems that might pose difficulties
   allpassed= test('e '           , '*e* ',      .true.) .and. allpassed
   allpassed= test('abcde       ' , '*e      *', .true.) .and. allpassed
   allpassed= test('bababa'       , 'b*ba',      .true.) .and. allpassed
   allpassed= test('baaaaax'      , 'b*ax',      .true.) .and. allpassed
   allpassed= test('baaaaa'       , 'b*ax',      .false.) .and. allpassed
   allpassed= test('baaaaax'      , 'b*a',       .false.) .and. allpassed
   allpassed= test(''             , 'b*',        .false.) .and. allpassed
   allpassed= test(''             , '*',         .true.) .and.  allpassed
   allpassed= test('b'            , '',          .false.) .and. allpassed
   allpassed= test('3'            , '??',        .false.) .and. allpassed
   ! known flaws
   allpassed= test(''             , '',          .true.) .and. allpassed
   allpassed= test('baaaaa'       , 'b*a',       .true.) .and. allpassed
   ! add unused character to work around
   allpassed= test(''//char(0),      ''//char(0),   .true.).and.allpassed
   allpassed= test('baaaaa'//char(0),'b*a'//char(0),.true.).and.allpassed

   ! Many-wildcard scenarios.
   allpassed= test(&
   &"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa&
   &aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab",&
   &"a*a*a*a*a*a*aa*aaa*a*a*b",&
   &.true.) .and. allpassed
   allpassed= test(&
   &"abababababababababababababababababababaacacacacacacac&
   &adaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",&
   &"*a*b*ba*ca*a*aa*aaa*fa*ga*b*",&
   &.true.) .and. allpassed
   allpassed= test(&
   &"abababababababababababababababababababaacacacacacaca&
   &cadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",&
   &"*a*b*ba*ca*a*x*aaa*fa*ga*b*",&
   &.false.) .and. allpassed
   allpassed= test(&
   &"abababababababababababababababababababaacacacacacacacad&
   &aeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",&
   &"*a*b*ba*ca*aaaa*fa*ga*gggg*b*",&
   &.false.) .and. allpassed
   allpassed= test(&
   &"abababababababababababababababababababaacacacacacacacad&
   &aeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",&
   &"*a*b*ba*ca*aaaa*fa*ga*ggg*b*",&
   &.true.) .and. allpassed
   allpassed= test("aaabbaabbaab","*aabbaa*a*",.true.).and.allpassed
   allpassed= &
   test("a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*",&
   &"a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", .true.) .and. allpassed
   allpassed= test("aaaaaaaaaaaaaaaaa",&
   &"*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", .true.) .and. allpassed
   allpassed= test("aaaaaaaaaaaaaaaa",&
   &"*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", .false.) .and. allpassed
   allpassed= test(&
   &"abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij&
   &*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn",&
   & "abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc&
   &*abc*abc*abc*",&
   &.false.) .and. allpassed
   allpassed= test(&
   &"abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij&
   &*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn",&
   &"abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*",&
   &.true.) .and. allpassed
   allpassed= test("abc*abcd*abcd*abc*abcd",&
   &"abc*abc*abc*abc*abc", .false.) .and. allpassed
   allpassed= test( "abc*abcd*abcd*abc*abcd*abcd&
   &*abc*abcd*abc*abc*abcd", &
   &"abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abcd",&
   &.true.) .and. allpassed
   allpassed= test("abc",&
   &"********a********b********c********", .true.) .and. allpassed
   allpassed=&
   &test("********a********b********c********", "abc",.false.)&
   & .and.allpassed
   allpassed= &
   &test("abc", "********a********b********b********",.false.)&
   & .and.allpassed
   allpassed= test("*abc*", "***a*b*c***", .true.) .and. allpassed

   ! A case-insensitive algorithm test.
   ! allpassed=test("mississippi", "*issip*PI", .true.) .and. allpassed
 enddo

 if (allpassed)then
    write(*,'(*(g0,1x))')"Passed",nReps
 else
    write(*,'(a)')"Failed"
 endif
contains
! This is a test program for wildcard matching routines.
! It can be used either to test a single routine for correctness,
! or to compare the timings of two (or more) different wildcard
! matching routines.
!
function test(tame, wild, bExpectedResult) result(bPassed)
use M_strings, only : glob
   character(len=*) :: tame
   character(len=*) :: wild
   logical          :: bExpectedResult
   logical          :: bResult
   logical          :: bPassed
   bResult = .true.    ! We'll do "&=" cumulative checking.
   bPassed = .false.   ! Assume the worst.
   write(*,*)repeat('=',79)
   bResult = glob(tame, wild) ! Call a wildcard matching routine.

   ! To assist correctness checking, output the two strings in any
   ! failing scenarios.
   if (bExpectedResult .eqv. bResult) then
      bPassed = .true.
      if(nReps == 1) write(*,*)"Passed match on ",tame," vs. ", wild
   else
      if(nReps == 1) write(*,*)"Failed match on ",tame," vs. ", wild
   endif

end function test
end program demo_glob

Expected output

AUTHOR

John S. Urban

REFERENCE

The article “Matching Wildcards: An Empirical Way to Tame an Algorithm” in Dr Dobb’s Journal, By Kirk J. Krauss, October 07, 2014

LICENSE

Public Domain

Arguments

Type IntentOptional Attributes Name
character(len=*) :: tame
character(len=*) :: wild

Return Value logical


Contents

Source Code


Source Code

function glob(tame,wild)

! ident_6="@(#) M_strings glob(3f) function compares text strings one of which can have wildcards ('*' or '?')."

logical                    :: glob
character(len=*)           :: tame       ! A string without wildcards
character(len=*)           :: wild       ! A (potentially) corresponding string with wildcards
character(len=len(tame)+1) :: tametext
character(len=len(wild)+1) :: wildtext
character(len=1),parameter :: NULL=char(0)
integer                    :: wlen
integer                    :: ti, wi
integer                    :: i
character(len=:),allocatable :: tbookmark, wbookmark
! These two values are set when we observe a wildcard character. They
! represent the locations, in the two strings, from which we start once we have observed it.
   tametext=tame//NULL
   wildtext=wild//NULL
   tbookmark = NULL
   wbookmark = NULL
   wlen=len(wild)
   wi=1
   ti=1
   do                                            ! Walk the text strings one character at a time.
      if(wildtext(wi:wi) == '*')then             ! How do you match a unique text string?
         do i=wi,wlen                            ! Easy: unique up on it!
            if(wildtext(wi:wi) == '*')then
               wi=wi+1
            else
               exit
            endif
         enddo
         if(wildtext(wi:wi) == NULL) then        ! "x" matches "*"
            glob=.true.
            return
         endif
         if(wildtext(wi:wi)  /=  '?') then
            ! Fast-forward to next possible match.
            do while (tametext(ti:ti)  /=  wildtext(wi:wi))
               ti=ti+1
               if (tametext(ti:ti) == NULL)then
                  glob=.false.
                  return                         ! "x" doesn't match "*y*"
               endif
            enddo
         endif
         wbookmark = wildtext(wi:)
         tbookmark = tametext(ti:)
      elseif(tametext(ti:ti)  /=  wildtext(wi:wi) .and. wildtext(wi:wi)  /=  '?') then
         ! Got a non-match. If we've set our bookmarks, back up to one or both of them and retry.
         if(wbookmark /= NULL) then
            if(wildtext(wi:) /=  wbookmark) then
               wildtext = wbookmark
               wlen=len_trim(wbookmark)
               wi=1
               ! Don't go this far back again.
               if (tametext(ti:ti)  /=  wildtext(wi:wi)) then
                  tbookmark=tbookmark(2:)
                  tametext = tbookmark
                  ti=1
                  cycle                          ! "xy" matches "*y"
               else
                  wi=wi+1
               endif
            endif
            if (tametext(ti:ti) /= NULL) then
               ti=ti+1
               cycle                             ! "mississippi" matches "*sip*"
            endif
         endif
         glob=.false.
         return                                  ! "xy" doesn't match "x"
      endif
      ti=ti+1
      wi=wi+1
      if (ti > len(tametext)) then
         glob=.false.
         return
      elseif (tametext(ti:ti) == NULL) then          ! How do you match a tame text string?
         if(wildtext(wi:wi) /= NULL)then
            do while (wildtext(wi:wi) == '*')    ! The tame way: unique up on it!
               wi=wi+1                           ! "x" matches "x*"
               if(wildtext(wi:wi) == NULL)exit
            enddo
         endif
         if (wildtext(wi:wi) == NULL)then
            glob=.true.
            return                               ! "x" matches "x"
         endif
         glob=.false.
         return                                  ! "x" doesn't match "xy"
      endif
   enddo
end function glob