glob(3f) - [M_strings:COMPARE] compare given string for match to
a pattern which may contain globbing wildcard characters
(LICENSE:PD)
logical function glob(string, pattern )
character(len=*),intent(in) :: string
character(len=*),intent(in) :: pattern
glob(3f) compares given (entire) STRING for a match to PATTERN which may
contain basic wildcard "globbing" characters.
In this version to get a match the entire string must be described
by PATTERN. Trailing whitespace is significant, so trim the input
string to have trailing whitespace ignored.
Patterns like "b*ba" fail on a string like "babababa" because the
algorithm finds an early match. To skip over the early matches insert
an extra character at the end of the string and pattern that does
not occur in the pattern. Typically a NULL is used (char(0)).
string the input string to test to see if it contains the pattern.
pattern the following simple globbing options are available
o "?" matching any one character
o "*" matching zero or more characters.
Do NOT use adjacent asterisks.
o spaces are significant and must be matched or pretrimmed
o There is no escape character, so matching strings with
literal question mark and asterisk is problematic.
Example program
program demo_glob
implicit none
! This main() routine passes a bunch of test strings
! into the above code. In performance comparison mode,
! it does that over and over. Otherwise, it does it just
! once. Either way, it outputs a passed/failed result.
!
integer :: nReps
logical :: allpassed
integer :: i
allpassed = .true.
nReps = 10000
! Can choose as many repetitions as you're expecting
! in the real world.
nReps = 1
do i=1,nReps
! Cases with repeating character sequences.
allpassed= test("a*abab", "a*b", .true.) .and. allpassed
allpassed= test("ab", "*?", .true.) .and. allpassed
allpassed= test("abc", "*?", .true.) .and. allpassed
allpassed= test("abcccd", "*ccd", .true.) .and. allpassed
allpassed= test("bLah", "bLaH", .false.) .and. allpassed
allpassed= test("mississippi", "*sip*", .true.) .and. allpassed
allpassed= &
& test("xxxx*zzzzzzzzy*f", "xxx*zzy*f", .true.) .and. allpassed
allpassed= &
& test("xxxx*zzzzzzzzy*f", "xxxx*zzy*fffff", .false.) .and. allpassed
allpassed= &
& test("mississipissippi", "*issip*ss*", .true.) .and. allpassed
allpassed= &
& test("xxxxzzzzzzzzyf", "xxxx*zzy*fffff", .false.) .and. allpassed
allpassed= &
& test("xxxxzzzzzzzzyf", "xxxx*zzy*f", .true.) .and. allpassed
allpassed= test("xyxyxyzyxyz", "xy*z*xyz", .true.) .and. allpassed
allpassed= test("xyxyxyxyz", "xy*xyz", .true.) .and. allpassed
allpassed= test("mississippi", "mi*sip*", .true.) .and. allpassed
allpassed= test("ababac", "*abac*", .true.) .and. allpassed
allpassed= test("aaazz", "a*zz*", .true.) .and. allpassed
allpassed= test("a12b12", "*12*23", .false.) .and. allpassed
allpassed= test("a12b12", "a12b", .false.) .and. allpassed
allpassed= test("a12b12", "*12*12*", .true.) .and. allpassed
! Additional cases where the '*' char appears in the tame string.
allpassed= test("*", "*", .true.) .and. allpassed
allpassed= test("a*r", "a*", .true.) .and. allpassed
allpassed= test("a*ar", "a*aar", .false.) .and. allpassed
! More double wildcard scenarios.
allpassed= test("XYXYXYZYXYz", "XY*Z*XYz", .true.) .and. allpassed
allpassed= test("missisSIPpi", "*SIP*", .true.) .and. allpassed
allpassed= test("mississipPI", "*issip*PI", .true.) .and. allpassed
allpassed= test("xyxyxyxyz", "xy*xyz", .true.) .and. allpassed
allpassed= test("miSsissippi", "mi*sip*", .true.) .and. allpassed
allpassed= test("miSsissippi", "mi*Sip*", .false.) .and. allpassed
allpassed= test("abAbac", "*Abac*", .true.) .and. allpassed
allpassed= test("aAazz", "a*zz*", .true.) .and. allpassed
allpassed= test("A12b12", "*12*23", .false.) .and. allpassed
allpassed= test("a12B12", "*12*12*", .true.) .and. allpassed
allpassed= test("oWn", "*oWn*", .true.) .and. allpassed
! Completely tame (no wildcards) cases.
allpassed= test("bLah", "bLah", .true.) .and. allpassed
! Simple mixed wildcard tests suggested by IBMer Marlin Deckert.
allpassed= test("a", "*?", .true.) .and. allpassed
! More mixed wildcard tests including coverage for false positives.
allpassed= test("a", "??", .false.) .and. allpassed
allpassed= test("ab", "?*?", .true.) .and. allpassed
allpassed= test("ab", "*?*?*", .true.) .and. allpassed
allpassed= test("abc", "?**?*?", .true.) .and. allpassed
allpassed= test("abc", "?**?*&?", .false.) .and. allpassed
allpassed= test("abcd", "?b*??", .true.) .and. allpassed
allpassed= test("abcd", "?a*??", .false.) .and. allpassed
allpassed= test("abcd", "?**?c?", .true.) .and. allpassed
allpassed= test("abcd", "?**?d?", .false.) .and. allpassed
allpassed= test("abcde", "?*b*?*d*?", .true.) .and. allpassed
! Single-character-match cases.
allpassed= test("bLah", "bL?h", .true.) .and. allpassed
allpassed= test("bLaaa", "bLa?", .false.) .and. allpassed
allpassed= test("bLah", "bLa?", .true.) .and. allpassed
allpassed= test("bLaH", "?Lah", .false.) .and. allpassed
allpassed= test("bLaH", "?LaH", .true.) .and. allpassed
allpassed= test('abcdefghijk' , '?b*', .true.) .and. allpassed
allpassed= test('abcdefghijk' , '*c*', .true.) .and. allpassed
allpassed= test('abcdefghijk' , '*c', .false.) .and. allpassed
allpassed= test('abcdefghijk' , '*c*k', .true.) .and. allpassed
allpassed= test('LS' , '?OW', .false.) .and. allpassed
allpassed= test('teztit' , 'tez*t*t', .true.) .and. allpassed
! Two pattern match problems that might pose difficulties
allpassed= test('e ' , '*e* ', .true.) .and. allpassed
allpassed= test('abcde ' , '*e *', .true.) .and. allpassed
allpassed= test('bababa' , 'b*ba', .true.) .and. allpassed
allpassed= test('baaaaax' , 'b*ax', .true.) .and. allpassed
allpassed= test('baaaaa' , 'b*ax', .false.) .and. allpassed
allpassed= test('baaaaax' , 'b*a', .false.) .and. allpassed
allpassed= test('' , 'b*', .false.) .and. allpassed
allpassed= test('' , '*', .true.) .and. allpassed
allpassed= test('b' , '', .false.) .and. allpassed
allpassed= test('3' , '??', .false.) .and. allpassed
! known flaws
allpassed= test('' , '', .true.) .and. allpassed
allpassed= test('baaaaa' , 'b*a', .true.) .and. allpassed
! add unused character to work around
allpassed= test(''//char(0), ''//char(0), .true.).and.allpassed
allpassed= test('baaaaa'//char(0),'b*a'//char(0),.true.).and.allpassed
! Many-wildcard scenarios.
allpassed= test(&
&"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa&
&aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab",&
&"a*a*a*a*a*a*aa*aaa*a*a*b",&
&.true.) .and. allpassed
allpassed= test(&
&"abababababababababababababababababababaacacacacacacac&
&adaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",&
&"*a*b*ba*ca*a*aa*aaa*fa*ga*b*",&
&.true.) .and. allpassed
allpassed= test(&
&"abababababababababababababababababababaacacacacacaca&
&cadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",&
&"*a*b*ba*ca*a*x*aaa*fa*ga*b*",&
&.false.) .and. allpassed
allpassed= test(&
&"abababababababababababababababababababaacacacacacacacad&
&aeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",&
&"*a*b*ba*ca*aaaa*fa*ga*gggg*b*",&
&.false.) .and. allpassed
allpassed= test(&
&"abababababababababababababababababababaacacacacacacacad&
&aeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab",&
&"*a*b*ba*ca*aaaa*fa*ga*ggg*b*",&
&.true.) .and. allpassed
allpassed= test("aaabbaabbaab","*aabbaa*a*",.true.).and.allpassed
allpassed= &
test("a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*",&
&"a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", .true.) .and. allpassed
allpassed= test("aaaaaaaaaaaaaaaaa",&
&"*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", .true.) .and. allpassed
allpassed= test("aaaaaaaaaaaaaaaa",&
&"*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", .false.) .and. allpassed
allpassed= test(&
&"abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij&
&*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn",&
& "abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc&
&*abc*abc*abc*",&
&.false.) .and. allpassed
allpassed= test(&
&"abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij&
&*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn",&
&"abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*",&
&.true.) .and. allpassed
allpassed= test("abc*abcd*abcd*abc*abcd",&
&"abc*abc*abc*abc*abc", .false.) .and. allpassed
allpassed= test( "abc*abcd*abcd*abc*abcd*abcd&
&*abc*abcd*abc*abc*abcd", &
&"abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abcd",&
&.true.) .and. allpassed
allpassed= test("abc",&
&"********a********b********c********", .true.) .and. allpassed
allpassed=&
&test("********a********b********c********", "abc",.false.)&
& .and.allpassed
allpassed= &
&test("abc", "********a********b********b********",.false.)&
& .and.allpassed
allpassed= test("*abc*", "***a*b*c***", .true.) .and. allpassed
! A case-insensitive algorithm test.
! allpassed=test("mississippi", "*issip*PI", .true.) .and. allpassed
enddo
if (allpassed)then
write(*,'(*(g0,1x))')"Passed",nReps
else
write(*,'(a)')"Failed"
endif
contains
! This is a test program for wildcard matching routines.
! It can be used either to test a single routine for correctness,
! or to compare the timings of two (or more) different wildcard
! matching routines.
!
function test(tame, wild, bExpectedResult) result(bPassed)
use M_strings, only : glob
character(len=*) :: tame
character(len=*) :: wild
logical :: bExpectedResult
logical :: bResult
logical :: bPassed
bResult = .true. ! We'll do "&=" cumulative checking.
bPassed = .false. ! Assume the worst.
write(*,*)repeat('=',79)
bResult = glob(tame, wild) ! Call a wildcard matching routine.
! To assist correctness checking, output the two strings in any
! failing scenarios.
if (bExpectedResult .eqv. bResult) then
bPassed = .true.
if(nReps == 1) write(*,*)"Passed match on ",tame," vs. ", wild
else
if(nReps == 1) write(*,*)"Failed match on ",tame," vs. ", wild
endif
end function test
end program demo_glob
Expected output
John S. Urban
The article “Matching Wildcards: An Empirical Way to Tame an Algorithm” in Dr Dobb’s Journal, By Kirk J. Krauss, October 07, 2014
Public Domain
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
character(len=*) | :: | tame | ||||
character(len=*) | :: | wild |
function glob(tame,wild)
! ident_6="@(#) M_strings glob(3f) function compares text strings one of which can have wildcards ('*' or '?')."
logical :: glob
character(len=*) :: tame ! A string without wildcards
character(len=*) :: wild ! A (potentially) corresponding string with wildcards
character(len=len(tame)+1) :: tametext
character(len=len(wild)+1) :: wildtext
character(len=1),parameter :: NULL=char(0)
integer :: wlen
integer :: ti, wi
integer :: i
character(len=:),allocatable :: tbookmark, wbookmark
! These two values are set when we observe a wildcard character. They
! represent the locations, in the two strings, from which we start once we have observed it.
tametext=tame//NULL
wildtext=wild//NULL
tbookmark = NULL
wbookmark = NULL
wlen=len(wild)
wi=1
ti=1
do ! Walk the text strings one character at a time.
if(wildtext(wi:wi) == '*')then ! How do you match a unique text string?
do i=wi,wlen ! Easy: unique up on it!
if(wildtext(wi:wi) == '*')then
wi=wi+1
else
exit
endif
enddo
if(wildtext(wi:wi) == NULL) then ! "x" matches "*"
glob=.true.
return
endif
if(wildtext(wi:wi) /= '?') then
! Fast-forward to next possible match.
do while (tametext(ti:ti) /= wildtext(wi:wi))
ti=ti+1
if (tametext(ti:ti) == NULL)then
glob=.false.
return ! "x" doesn't match "*y*"
endif
enddo
endif
wbookmark = wildtext(wi:)
tbookmark = tametext(ti:)
elseif(tametext(ti:ti) /= wildtext(wi:wi) .and. wildtext(wi:wi) /= '?') then
! Got a non-match. If we've set our bookmarks, back up to one or both of them and retry.
if(wbookmark /= NULL) then
if(wildtext(wi:) /= wbookmark) then
wildtext = wbookmark
wlen=len_trim(wbookmark)
wi=1
! Don't go this far back again.
if (tametext(ti:ti) /= wildtext(wi:wi)) then
tbookmark=tbookmark(2:)
tametext = tbookmark
ti=1
cycle ! "xy" matches "*y"
else
wi=wi+1
endif
endif
if (tametext(ti:ti) /= NULL) then
ti=ti+1
cycle ! "mississippi" matches "*sip*"
endif
endif
glob=.false.
return ! "xy" doesn't match "x"
endif
ti=ti+1
wi=wi+1
if (ti > len(tametext)) then
glob=.false.
return
elseif (tametext(ti:ti) == NULL) then ! How do you match a tame text string?
if(wildtext(wi:wi) /= NULL)then
do while (wildtext(wi:wi) == '*') ! The tame way: unique up on it!
wi=wi+1 ! "x" matches "x*"
if(wildtext(wi:wi) == NULL)exit
enddo
endif
if (wildtext(wi:wi) == NULL)then
glob=.true.
return ! "x" matches "x"
endif
glob=.false.
return ! "x" doesn't match "xy"
endif
enddo
end function glob