MagickCore  6.7.5
token.c
Go to the documentation of this file.
00001 /*
00002 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00003 %                                                                             %
00004 %                                                                             %
00005 %                                                                             %
00006 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
00007 %                      T    O   O  K  K   E      NN  N                        %
00008 %                      T    O   O  KKK    EEE    N N N                        %
00009 %                      T    O   O  K  K   E      N  NN                        %
00010 %                      T     OOO   K   K  EEEEE  N   N                        %
00011 %                                                                             %
00012 %                                                                             %
00013 %                         MagickCore Token Methods                            %
00014 %                                                                             %
00015 %                             Software Design                                 %
00016 %                               John Cristy                                   %
00017 %                              January 1993                                   %
00018 %                                                                             %
00019 %                                                                             %
00020 %  Copyright 1999-2012 ImageMagick Studio LLC, a non-profit organization      %
00021 %  dedicated to making software imaging solutions freely available.           %
00022 %                                                                             %
00023 %  You may not use this file except in compliance with the License.  You may  %
00024 %  obtain a copy of the License at                                            %
00025 %                                                                             %
00026 %    http://www.imagemagick.org/script/license.php                            %
00027 %                                                                             %
00028 %  Unless required by applicable law or agreed to in writing, software        %
00029 %  distributed under the License is distributed on an "AS IS" BASIS,          %
00030 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
00031 %  See the License for the specific language governing permissions and        %
00032 %  limitations under the License.                                             %
00033 %                                                                             %
00034 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00035 %
00036 %
00037 %
00038 */
00039 
00040 /*
00041   Include declarations.
00042 */
00043 #include "MagickCore/studio.h"
00044 #include "MagickCore/exception.h"
00045 #include "MagickCore/exception-private.h"
00046 #include "MagickCore/image.h"
00047 #include "MagickCore/memory_.h"
00048 #include "MagickCore/string_.h"
00049 #include "MagickCore/string-private.h"
00050 #include "MagickCore/token.h"
00051 #include "MagickCore/token-private.h"
00052 #include "MagickCore/utility.h"
00053 #include "MagickCore/utility-private.h"
00054 
00055 /*
00056   Typedef declaractions.
00057 */
00058 struct _TokenInfo
00059 {
00060   int
00061     state;
00062 
00063   MagickStatusType
00064     flag;
00065 
00066   ssize_t
00067     offset;
00068 
00069   char
00070     quote;
00071 
00072   size_t
00073     signature;
00074 };
00075 
00076 /*
00077 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00078 %                                                                             %
00079 %                                                                             %
00080 %                                                                             %
00081 %   A c q u i r e T o k e n I n f o                                           %
00082 %                                                                             %
00083 %                                                                             %
00084 %                                                                             %
00085 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00086 %
00087 %  AcquireTokenInfo() allocates the TokenInfo structure.
00088 %
00089 %  The format of the AcquireTokenInfo method is:
00090 %
00091 %      TokenInfo *AcquireTokenInfo()
00092 %
00093 */
00094 MagickExport TokenInfo *AcquireTokenInfo(void)
00095 {
00096   TokenInfo
00097     *token_info;
00098 
00099   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
00100   if (token_info == (TokenInfo *) NULL)
00101     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
00102   token_info->signature=MagickSignature;
00103   return(token_info);
00104 }
00105 
00106 /*
00107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00108 %                                                                             %
00109 %                                                                             %
00110 %                                                                             %
00111 %   D e s t r o y T o k e n I n f o                                           %
00112 %                                                                             %
00113 %                                                                             %
00114 %                                                                             %
00115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00116 %
00117 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
00118 %  structure.
00119 %
00120 %  The format of the DestroyTokenInfo method is:
00121 %
00122 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
00123 %
00124 %  A description of each parameter follows:
00125 %
00126 %    o token_info: Specifies a pointer to an TokenInfo structure.
00127 %
00128 */
00129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
00130 {
00131   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
00132   assert(token_info != (TokenInfo *) NULL);
00133   assert(token_info->signature == MagickSignature);
00134   token_info->signature=(~MagickSignature);
00135   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
00136   return(token_info);
00137 }
00138 
00139 /*
00140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00141 %                                                                             %
00142 %                                                                             %
00143 %                                                                             %
00144 +   G e t M a g i c k T o k e n                                               %
00145 %                                                                             %
00146 %                                                                             %
00147 %                                                                             %
00148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00149 %
00150 %  GetMagickToken() gets a token from the token stream.  A token is defined as
00151 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
00152 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
00153 %  parenthesis (e.g. rgb(0,0,0)).  GetMagickToken() also recognizes these
00154 %  separator characters: ':', '=', ',', and ';'.
00155 %
00156 %  The format of the GetMagickToken method is:
00157 %
00158 %      void GetMagickToken(const char *start,const char **end,char *token)
00159 %
00160 %  A description of each parameter follows:
00161 %
00162 %    o start: the start of the token sequence.
00163 %
00164 %    o end: point to the end of the token sequence.
00165 %
00166 %    o token: copy the token to this buffer.
00167 %
00168 */
00169 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
00170 {
00171   double
00172     value;
00173 
00174   register const char
00175     *p;
00176 
00177   register ssize_t
00178     i;
00179 
00180   assert(start != (const char *) NULL);
00181   assert(token != (char *) NULL);
00182   i=0;
00183   for (p=start; *p != '\0'; )
00184   {
00185     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
00186       p++;
00187     if (*p == '\0')
00188       break;
00189     switch (*p)
00190     {
00191       case '"':
00192       case '\'':
00193       case '`':
00194       case '{':
00195       {
00196         register char
00197           escape;
00198 
00199         switch (*p)
00200         {
00201           case '"': escape='"'; break;
00202           case '\'': escape='\''; break;
00203           case '`': escape='\''; break;
00204           case '{': escape='}'; break;
00205           default: escape=(*p); break;
00206         }
00207         for (p++; *p != '\0'; p++)
00208         {
00209           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
00210             p++;
00211           else
00212             if (*p == escape)
00213               {
00214                 p++;
00215                 break;
00216               }
00217           token[i++]=(*p);
00218         }
00219         break;
00220       }
00221       case '/':
00222       {
00223         token[i++]=(*p++);
00224         if ((*p == '>') || (*p == '/'))
00225           token[i++]=(*p++);
00226         break;
00227       }
00228       default:
00229       {
00230         char
00231           *q;
00232 
00233         value=StringToDouble(p,&q);
00234         (void) value;
00235         if ((p != q) && (*p != ','))
00236           {
00237             for ( ; (p < q) && (*p != ','); p++)
00238               token[i++]=(*p);
00239             if (*p == '%')
00240               token[i++]=(*p++);
00241             break;
00242           }
00243         if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
00244             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
00245           {
00246             token[i++]=(*p++);
00247             break;
00248           }
00249         for ( ; *p != '\0'; p++)
00250         {
00251           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
00252               (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
00253             break;
00254           if ((i > 0) && (*p == '<'))
00255             break;
00256           token[i++]=(*p);
00257           if (*p == '>')
00258             break;
00259           if (*p == '(')
00260             for (p++; *p != '\0'; p++)
00261             {
00262               token[i++]=(*p);
00263               if ((*p == ')') && (*(p-1) != '\\'))
00264                 break;
00265             }
00266         }
00267         break;
00268       }
00269     }
00270     break;
00271   }
00272   token[i]='\0';
00273   if (LocaleNCompare(token,"url(",4) == 0)
00274     {
00275       ssize_t
00276         offset;
00277 
00278       offset=4;
00279       if (token[offset] == '#')
00280         offset++;
00281       i=(ssize_t) strlen(token);
00282       (void) CopyMagickString(token,token+offset,MaxTextExtent);
00283       token[i-offset-1]='\0';
00284     }
00285   while (isspace((int) ((unsigned char) *p)) != 0)
00286     p++;
00287   if (end != (const char **) NULL)
00288     *end=(const char *) p;
00289 }
00290 
00291 /*
00292 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00293 %                                                                             %
00294 %                                                                             %
00295 %                                                                             %
00296 %   G l o b E x p r e s s i o n                                               %
00297 %                                                                             %
00298 %                                                                             %
00299 %                                                                             %
00300 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00301 %
00302 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
00303 %
00304 %  The format of the GlobExpression function is:
00305 %
00306 %      MagickBooleanType GlobExpression(const char *expression,
00307 %        const char *pattern,const MagickBooleanType case_insensitive)
00308 %
00309 %  A description of each parameter follows:
00310 %
00311 %    o expression: Specifies a pointer to a text string containing a file name.
00312 %
00313 %    o pattern: Specifies a pointer to a text string containing a pattern.
00314 %
00315 %    o case_insensitive: set to MagickTrue to ignore the case when matching
00316 %      an expression.
00317 %
00318 */
00319 MagickExport MagickBooleanType GlobExpression(const char *expression,
00320   const char *pattern,const MagickBooleanType case_insensitive)
00321 {
00322   MagickBooleanType
00323     done,
00324     match;
00325 
00326   register const char
00327     *p;
00328 
00329   /*
00330     Return on empty pattern or '*'.
00331   */
00332   if (pattern == (char *) NULL)
00333     return(MagickTrue);
00334   if (GetUTFCode(pattern) == 0)
00335     return(MagickTrue);
00336   if (LocaleCompare(pattern,"*") == 0)
00337     return(MagickTrue);
00338   p=pattern+strlen(pattern)-1;
00339   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
00340     {
00341       ExceptionInfo
00342         *exception;
00343 
00344       ImageInfo
00345         *image_info;
00346 
00347       /*
00348         Determine if pattern is a scene, i.e. img0001.pcd[2].
00349       */
00350       image_info=AcquireImageInfo();
00351       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
00352       exception=AcquireExceptionInfo();
00353       (void) SetImageInfo(image_info,0,exception);
00354       exception=DestroyExceptionInfo(exception);
00355       if (LocaleCompare(image_info->filename,pattern) != 0)
00356         {
00357           image_info=DestroyImageInfo(image_info);
00358           return(MagickFalse);
00359         }
00360       image_info=DestroyImageInfo(image_info);
00361     }
00362   /*
00363     Evaluate glob expression.
00364   */
00365   done=MagickFalse;
00366   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
00367   {
00368     if (GetUTFCode(expression) == 0)
00369       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
00370         break;
00371     switch (GetUTFCode(pattern))
00372     {
00373       case '*':
00374       {
00375         MagickBooleanType
00376           status;
00377 
00378         status=MagickFalse;
00379         pattern+=GetUTFOctets(pattern);
00380         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
00381         {
00382           status=GlobExpression(expression,pattern,case_insensitive);
00383           expression+=GetUTFOctets(expression);
00384         }
00385         if (status != MagickFalse)
00386           {
00387             while (GetUTFCode(expression) != 0)
00388               expression+=GetUTFOctets(expression);
00389             while (GetUTFCode(pattern) != 0)
00390               pattern+=GetUTFOctets(pattern);
00391           }
00392         break;
00393       }
00394       case '[':
00395       {
00396         int
00397           c;
00398 
00399         pattern+=GetUTFOctets(pattern);
00400         for ( ; ; )
00401         {
00402           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
00403             {
00404               done=MagickTrue;
00405               break;
00406             }
00407           if (GetUTFCode(pattern) == '\\')
00408             {
00409               pattern+=GetUTFOctets(pattern);
00410               if (GetUTFCode(pattern) == 0)
00411                 {
00412                   done=MagickTrue;
00413                   break;
00414                 }
00415              }
00416           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
00417             {
00418               c=GetUTFCode(pattern);
00419               pattern+=GetUTFOctets(pattern);
00420               pattern+=GetUTFOctets(pattern);
00421               if (GetUTFCode(pattern) == ']')
00422                 {
00423                   done=MagickTrue;
00424                   break;
00425                 }
00426               if (GetUTFCode(pattern) == '\\')
00427                 {
00428                   pattern+=GetUTFOctets(pattern);
00429                   if (GetUTFCode(pattern) == 0)
00430                     {
00431                       done=MagickTrue;
00432                       break;
00433                     }
00434                 }
00435               if ((GetUTFCode(expression) < c) ||
00436                   (GetUTFCode(expression) > GetUTFCode(pattern)))
00437                 {
00438                   pattern+=GetUTFOctets(pattern);
00439                   continue;
00440                 }
00441             }
00442           else
00443             if (GetUTFCode(pattern) != GetUTFCode(expression))
00444               {
00445                 pattern+=GetUTFOctets(pattern);
00446                 continue;
00447               }
00448           pattern+=GetUTFOctets(pattern);
00449           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
00450           {
00451             if ((GetUTFCode(pattern) == '\\') &&
00452                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
00453               pattern+=GetUTFOctets(pattern);
00454             pattern+=GetUTFOctets(pattern);
00455           }
00456           if (GetUTFCode(pattern) != 0)
00457             {
00458               pattern+=GetUTFOctets(pattern);
00459               expression+=GetUTFOctets(expression);
00460             }
00461           break;
00462         }
00463         break;
00464       }
00465       case '?':
00466       {
00467         pattern+=GetUTFOctets(pattern);
00468         expression+=GetUTFOctets(expression);
00469         break;
00470       }
00471       case '{':
00472       {
00473         register const char
00474           *p;
00475 
00476         pattern+=GetUTFOctets(pattern);
00477         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
00478         {
00479           p=expression;
00480           match=MagickTrue;
00481           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
00482                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
00483                  (match != MagickFalse))
00484           {
00485             if (GetUTFCode(pattern) == '\\')
00486               pattern+=GetUTFOctets(pattern);
00487             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
00488               MagickFalse;
00489             p+=GetUTFOctets(p);
00490             pattern+=GetUTFOctets(pattern);
00491           }
00492           if (GetUTFCode(pattern) == 0)
00493             {
00494               match=MagickFalse;
00495               done=MagickTrue;
00496               break;
00497             }
00498           else
00499             if (match != MagickFalse)
00500               {
00501                 expression=p;
00502                 while ((GetUTFCode(pattern) != '}') &&
00503                        (GetUTFCode(pattern) != 0))
00504                 {
00505                   pattern+=GetUTFOctets(pattern);
00506                   if (GetUTFCode(pattern) == '\\')
00507                     {
00508                       pattern+=GetUTFOctets(pattern);
00509                       if (GetUTFCode(pattern) == '}')
00510                         pattern+=GetUTFOctets(pattern);
00511                     }
00512                 }
00513               }
00514             else
00515               {
00516                 while ((GetUTFCode(pattern) != '}') &&
00517                        (GetUTFCode(pattern) != ',') &&
00518                        (GetUTFCode(pattern) != 0))
00519                 {
00520                   pattern+=GetUTFOctets(pattern);
00521                   if (GetUTFCode(pattern) == '\\')
00522                     {
00523                       pattern+=GetUTFOctets(pattern);
00524                       if ((GetUTFCode(pattern) == '}') ||
00525                           (GetUTFCode(pattern) == ','))
00526                         pattern+=GetUTFOctets(pattern);
00527                     }
00528                 }
00529               }
00530             if (GetUTFCode(pattern) != 0)
00531               pattern+=GetUTFOctets(pattern);
00532           }
00533         break;
00534       }
00535       case '\\':
00536       {
00537         pattern+=GetUTFOctets(pattern);
00538         if (GetUTFCode(pattern) == 0)
00539           break;
00540       }
00541       default:
00542       {
00543         if (case_insensitive != MagickFalse)
00544           {
00545             if (tolower((int) GetUTFCode(expression)) !=
00546                 tolower((int) GetUTFCode(pattern)))
00547               {
00548                 done=MagickTrue;
00549                 break;
00550               }
00551           }
00552         else
00553           if (GetUTFCode(expression) != GetUTFCode(pattern))
00554             {
00555               done=MagickTrue;
00556               break;
00557             }
00558         expression+=GetUTFOctets(expression);
00559         pattern+=GetUTFOctets(pattern);
00560       }
00561     }
00562   }
00563   while (GetUTFCode(pattern) == '*')
00564     pattern+=GetUTFOctets(pattern);
00565   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
00566     MagickTrue : MagickFalse;
00567   return(match);
00568 }
00569 
00570 /*
00571 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00572 %                                                                             %
00573 %                                                                             %
00574 %                                                                             %
00575 +     I s G l o b                                                             %
00576 %                                                                             %
00577 %                                                                             %
00578 %                                                                             %
00579 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00580 %
00581 %  IsGlob() returns MagickTrue if the path specification contains a globbing
00582 %  pattern.
00583 %
00584 %  The format of the IsGlob method is:
00585 %
00586 %      MagickBooleanType IsGlob(const char *geometry)
00587 %
00588 %  A description of each parameter follows:
00589 %
00590 %    o path: the path.
00591 %
00592 */
00593 MagickPrivate MagickBooleanType IsGlob(const char *path)
00594 {
00595   MagickBooleanType
00596     status;
00597 
00598   if (IsPathAccessible(path) != MagickFalse)
00599     return(MagickFalse);
00600   status=(strchr(path,'*') != (char *) NULL) ||
00601     (strchr(path,'?') != (char *) NULL) ||
00602     (strchr(path,'{') != (char *) NULL) ||
00603     (strchr(path,'}') != (char *) NULL) ||
00604     (strchr(path,'[') != (char *) NULL) ||
00605     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
00606   return(status);
00607 }
00608 
00609 /*
00610 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00611 %                                                                             %
00612 %                                                                             %
00613 %                                                                             %
00614 %   T o k e n i z e r                                                         %
00615 %                                                                             %
00616 %                                                                             %
00617 %                                                                             %
00618 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00619 %
00620 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
00621 %  one at a time from a string of characters.  The characters used for white
00622 %  space, for break characters, and for quotes can be specified.  Also,
00623 %  characters in the string can be preceded by a specifiable escape character
00624 %  which removes any special meaning the character may have.
00625 %
00626 %  Here is some terminology:
00627 %
00628 %    o token: A single unit of information in the form of a group of
00629 %      characters.
00630 %
00631 %    o white space: Apace that gets ignored (except within quotes or when
00632 %      escaped), like blanks and tabs. in addition, white space terminates a
00633 %      non-quoted token.
00634 %
00635 %    o break set: One or more characters that separates non-quoted tokens.
00636 %      Commas are a common break character. The usage of break characters to
00637 %      signal the end of a token is the same as that of white space, except
00638 %      multiple break characters with nothing or only white space between
00639 %      generate a null token for each two break characters together.
00640 %
00641 %      For example, if blank is set to be the white space and comma is set to
00642 %      be the break character, the line
00643 %
00644 %        A, B, C ,  , DEF
00645 %
00646 %        ... consists of 5 tokens:
00647 %
00648 %        1)  "A"
00649 %        2)  "B"
00650 %        3)  "C"
00651 %        4)  "" (the null string)
00652 %        5)  "DEF"
00653 %
00654 %    o Quote character: A character that, when surrounding a group of other
00655 %      characters, causes the group of characters to be treated as a single
00656 %      token, no matter how many white spaces or break characters exist in
00657 %      the group. Also, a token always terminates after the closing quote.
00658 %      For example, if ' is the quote character, blank is white space, and
00659 %      comma is the break character, the following string
00660 %
00661 %        A, ' B, CD'EF GHI
00662 %
00663 %        ... consists of 4 tokens:
00664 %
00665 %        1)  "A"
00666 %        2)  " B, CD" (note the blanks & comma)
00667 %        3)  "EF"
00668 %        4)  "GHI"
00669 %
00670 %      The quote characters themselves do not appear in the resultant
00671 %      tokens.  The double quotes are delimiters i use here for
00672 %      documentation purposes only.
00673 %
00674 %    o Escape character: A character which itself is ignored but which
00675 %      causes the next character to be used as is.  ^ and \ are often used
00676 %      as escape characters. An escape in the last position of the string
00677 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
00678 %      and non-escape) character. For example, assume white space, break
00679 %      character, and quote are the same as in the above examples, and
00680 %      further, assume that ^ is the escape character. Then, in the string
00681 %
00682 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
00683 %
00684 %        ... there are 7 tokens:
00685 %
00686 %        1)  "ABC"
00687 %        2)  " DEF ' GH"
00688 %        3)  "I"
00689 %        4)  " "     (a lone blank)
00690 %        5)  "J"
00691 %        6)  "K L"
00692 %        7)  "^"     (passed as is at end of line)
00693 %
00694 %  The format of the Tokenizer method is:
00695 %
00696 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
00697 %        const size_t max_token_length,const char *line,const char *white,
00698 %        const char *break_set,const char *quote,const char escape,
00699 %        char *breaker,int *next,char *quoted)
00700 %
00701 %  A description of each parameter follows:
00702 %
00703 %    o flag: right now, only the low order 3 bits are used.
00704 %
00705 %        1 => convert non-quoted tokens to upper case
00706 %        2 => convert non-quoted tokens to lower case
00707 %        0 => do not convert non-quoted tokens
00708 %
00709 %    o token: a character string containing the returned next token
00710 %
00711 %    o max_token_length: the maximum size of "token".  Characters beyond
00712 %      "max_token_length" are truncated.
00713 %
00714 %    o string: the string to be parsed.
00715 %
00716 %    o white: a string of the valid white spaces.  example:
00717 %
00718 %        char whitesp[]={" \t"};
00719 %
00720 %      blank and tab will be valid white space.
00721 %
00722 %    o break: a string of the valid break characters. example:
00723 %
00724 %        char breakch[]={";,"};
00725 %
00726 %      semicolon and comma will be valid break characters.
00727 %
00728 %    o quote: a string of the valid quote characters. An example would be
00729 %
00730 %        char whitesp[]={"'\"");
00731 %
00732 %      (this causes single and double quotes to be valid) Note that a
00733 %      token starting with one of these characters needs the same quote
00734 %      character to terminate it.
00735 %
00736 %      for example:
00737 %
00738 %        "ABC '
00739 %
00740 %      is unterminated, but
00741 %
00742 %        "DEF" and 'GHI'
00743 %
00744 %      are properly terminated.  Note that different quote characters
00745 %      can appear on the same line; only for a given token do the quote
00746 %      characters have to be the same.
00747 %
00748 %    o escape: the escape character (NOT a string ... only one
00749 %      allowed). Use zero if none is desired.
00750 %
00751 %    o breaker: the break character used to terminate the current
00752 %      token.  If the token was quoted, this will be the quote used.  If
00753 %      the token is the last one on the line, this will be zero.
00754 %
00755 %    o next: this variable points to the first character of the
00756 %      next token.  it gets reset by "tokenizer" as it steps through the
00757 %      string.  Set it to 0 upon initialization, and leave it alone
00758 %      after that.  You can change it if you want to jump around in the
00759 %      string or re-parse from the beginning, but be careful.
00760 %
00761 %    o quoted: set to True if the token was quoted and MagickFalse
00762 %      if not.  You may need this information (for example:  in C, a
00763 %      string with quotes around it is a character string, while one
00764 %      without is an identifier).
00765 %
00766 %    o result: 0 if we haven't reached EOS (end of string), and 1
00767 %      if we have.
00768 %
00769 */
00770 
00771 #define IN_WHITE 0
00772 #define IN_TOKEN 1
00773 #define IN_QUOTE 2
00774 #define IN_OZONE 3
00775 
00776 static ssize_t sindex(int c,const char *string)
00777 {
00778   register const char
00779     *p;
00780 
00781   for (p=string; *p != '\0'; p++)
00782     if (c == (int) (*p))
00783       return((ssize_t) (p-string));
00784   return(-1);
00785 }
00786 
00787 static void StoreToken(TokenInfo *token_info,char *string,
00788   size_t max_token_length,int c)
00789 {
00790   register ssize_t
00791     i;
00792 
00793   if ((token_info->offset < 0) ||
00794       ((size_t) token_info->offset >= (max_token_length-1)))
00795     return;
00796   i=token_info->offset++;
00797   string[i]=(char) c;
00798   if (token_info->state == IN_QUOTE)
00799     return;
00800   switch (token_info->flag & 0x03)
00801   {
00802     case 1:
00803     {
00804       string[i]=(char) toupper(c);
00805       break;
00806     }
00807     case 2:
00808     {
00809       string[i]=(char) tolower(c);
00810       break;
00811     }
00812     default:
00813       break;
00814   }
00815 }
00816 
00817 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
00818   char *token,const size_t max_token_length,const char *line,const char *white,
00819   const char *break_set,const char *quote,const char escape,char *breaker,
00820   int *next,char *quoted)
00821 {
00822   int
00823     c;
00824 
00825   register ssize_t
00826     i;
00827 
00828   *breaker='\0';
00829   *quoted='\0';
00830   if (line[*next] == '\0')
00831     return(1);
00832   token_info->state=IN_WHITE;
00833   token_info->quote=(char) MagickFalse;
00834   token_info->flag=flag;
00835   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
00836   {
00837     c=(int) line[*next];
00838     i=sindex(c,break_set);
00839     if (i >= 0)
00840       {
00841         switch (token_info->state)
00842         {
00843           case IN_WHITE:
00844           case IN_TOKEN:
00845           case IN_OZONE:
00846           {
00847             (*next)++;
00848             *breaker=break_set[i];
00849             token[token_info->offset]='\0';
00850             return(0);
00851           }
00852           case IN_QUOTE:
00853           {
00854             StoreToken(token_info,token,max_token_length,c);
00855             break;
00856           }
00857         }
00858         continue;
00859       }
00860     i=sindex(c,quote);
00861     if (i >= 0)
00862       {
00863         switch (token_info->state)
00864         {
00865           case IN_WHITE:
00866           {
00867             token_info->state=IN_QUOTE;
00868             token_info->quote=quote[i];
00869             *quoted=(char) MagickTrue;
00870             break;
00871           }
00872           case IN_QUOTE:
00873           {
00874             if (quote[i] != token_info->quote)
00875               StoreToken(token_info,token,max_token_length,c);
00876             else
00877               {
00878                 token_info->state=IN_OZONE;
00879                 token_info->quote='\0';
00880               }
00881             break;
00882           }
00883           case IN_TOKEN:
00884           case IN_OZONE:
00885           {
00886             *breaker=(char) c;
00887             token[token_info->offset]='\0';
00888             return(0);
00889           }
00890         }
00891         continue;
00892       }
00893     i=sindex(c,white);
00894     if (i >= 0)
00895       {
00896         switch (token_info->state)
00897         {
00898           case IN_WHITE:
00899           case IN_OZONE:
00900             break;
00901           case IN_TOKEN:
00902           {
00903             token_info->state=IN_OZONE;
00904             break;
00905           }
00906           case IN_QUOTE:
00907           {
00908             StoreToken(token_info,token,max_token_length,c);
00909             break;
00910           }
00911         }
00912         continue;
00913       }
00914     if (c == (int) escape)
00915       {
00916         if (line[(*next)+1] == '\0')
00917           {
00918             *breaker='\0';
00919             StoreToken(token_info,token,max_token_length,c);
00920             (*next)++;
00921             token[token_info->offset]='\0';
00922             return(0);
00923           }
00924         switch (token_info->state)
00925         {
00926           case IN_WHITE:
00927           {
00928             (*next)--;
00929             token_info->state=IN_TOKEN;
00930             break;
00931           }
00932           case IN_TOKEN:
00933           case IN_QUOTE:
00934           {
00935             (*next)++;
00936             c=(int) line[*next];
00937             StoreToken(token_info,token,max_token_length,c);
00938             break;
00939           }
00940           case IN_OZONE:
00941           {
00942             token[token_info->offset]='\0';
00943             return(0);
00944           }
00945         }
00946         continue;
00947       }
00948     switch (token_info->state)
00949     {
00950       case IN_WHITE:
00951         token_info->state=IN_TOKEN;
00952       case IN_TOKEN:
00953       case IN_QUOTE:
00954       {
00955         StoreToken(token_info,token,max_token_length,c);
00956         break;
00957       }
00958       case IN_OZONE:
00959       {
00960         token[token_info->offset]='\0';
00961         return(0);
00962       }
00963     }
00964   }
00965   token[token_info->offset]='\0';
00966   return(0);
00967 }