|
MagickCore
6.7.5
|
00001 /* 00002 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00003 % % 00004 % % 00005 % % 00006 % TTTTT OOO K K EEEEE N N % 00007 % T O O K K E NN N % 00008 % T O O KKK EEE N N N % 00009 % T O O K K E N NN % 00010 % T OOO K K EEEEE N N % 00011 % % 00012 % % 00013 % MagickCore Token Methods % 00014 % % 00015 % Software Design % 00016 % John Cristy % 00017 % January 1993 % 00018 % % 00019 % % 00020 % Copyright 1999-2012 ImageMagick Studio LLC, a non-profit organization % 00021 % dedicated to making software imaging solutions freely available. % 00022 % % 00023 % You may not use this file except in compliance with the License. You may % 00024 % obtain a copy of the License at % 00025 % % 00026 % http://www.imagemagick.org/script/license.php % 00027 % % 00028 % Unless required by applicable law or agreed to in writing, software % 00029 % distributed under the License is distributed on an "AS IS" BASIS, % 00030 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. % 00031 % See the License for the specific language governing permissions and % 00032 % limitations under the License. % 00033 % % 00034 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00035 % 00036 % 00037 % 00038 */ 00039 00040 /* 00041 Include declarations. 00042 */ 00043 #include "MagickCore/studio.h" 00044 #include "MagickCore/exception.h" 00045 #include "MagickCore/exception-private.h" 00046 #include "MagickCore/image.h" 00047 #include "MagickCore/memory_.h" 00048 #include "MagickCore/string_.h" 00049 #include "MagickCore/string-private.h" 00050 #include "MagickCore/token.h" 00051 #include "MagickCore/token-private.h" 00052 #include "MagickCore/utility.h" 00053 #include "MagickCore/utility-private.h" 00054 00055 /* 00056 Typedef declaractions. 00057 */ 00058 struct _TokenInfo 00059 { 00060 int 00061 state; 00062 00063 MagickStatusType 00064 flag; 00065 00066 ssize_t 00067 offset; 00068 00069 char 00070 quote; 00071 00072 size_t 00073 signature; 00074 }; 00075 00076 /* 00077 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00078 % % 00079 % % 00080 % % 00081 % A c q u i r e T o k e n I n f o % 00082 % % 00083 % % 00084 % % 00085 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00086 % 00087 % AcquireTokenInfo() allocates the TokenInfo structure. 00088 % 00089 % The format of the AcquireTokenInfo method is: 00090 % 00091 % TokenInfo *AcquireTokenInfo() 00092 % 00093 */ 00094 MagickExport TokenInfo *AcquireTokenInfo(void) 00095 { 00096 TokenInfo 00097 *token_info; 00098 00099 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info)); 00100 if (token_info == (TokenInfo *) NULL) 00101 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed"); 00102 token_info->signature=MagickSignature; 00103 return(token_info); 00104 } 00105 00106 /* 00107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00108 % % 00109 % % 00110 % % 00111 % D e s t r o y T o k e n I n f o % 00112 % % 00113 % % 00114 % % 00115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00116 % 00117 % DestroyTokenInfo() deallocates memory associated with an TokenInfo 00118 % structure. 00119 % 00120 % The format of the DestroyTokenInfo method is: 00121 % 00122 % TokenInfo *DestroyTokenInfo(TokenInfo *token_info) 00123 % 00124 % A description of each parameter follows: 00125 % 00126 % o token_info: Specifies a pointer to an TokenInfo structure. 00127 % 00128 */ 00129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info) 00130 { 00131 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"..."); 00132 assert(token_info != (TokenInfo *) NULL); 00133 assert(token_info->signature == MagickSignature); 00134 token_info->signature=(~MagickSignature); 00135 token_info=(TokenInfo *) RelinquishMagickMemory(token_info); 00136 return(token_info); 00137 } 00138 00139 /* 00140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00141 % % 00142 % % 00143 % % 00144 + G e t M a g i c k T o k e n % 00145 % % 00146 % % 00147 % % 00148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00149 % 00150 % GetMagickToken() gets a token from the token stream. A token is defined as 00151 % a sequence of characters delimited by whitespace (e.g. clip-path), a 00152 % sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in 00153 % parenthesis (e.g. rgb(0,0,0)). GetMagickToken() also recognizes these 00154 % separator characters: ':', '=', ',', and ';'. 00155 % 00156 % The format of the GetMagickToken method is: 00157 % 00158 % void GetMagickToken(const char *start,const char **end,char *token) 00159 % 00160 % A description of each parameter follows: 00161 % 00162 % o start: the start of the token sequence. 00163 % 00164 % o end: point to the end of the token sequence. 00165 % 00166 % o token: copy the token to this buffer. 00167 % 00168 */ 00169 MagickExport void GetMagickToken(const char *start,const char **end,char *token) 00170 { 00171 double 00172 value; 00173 00174 register const char 00175 *p; 00176 00177 register ssize_t 00178 i; 00179 00180 assert(start != (const char *) NULL); 00181 assert(token != (char *) NULL); 00182 i=0; 00183 for (p=start; *p != '\0'; ) 00184 { 00185 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0')) 00186 p++; 00187 if (*p == '\0') 00188 break; 00189 switch (*p) 00190 { 00191 case '"': 00192 case '\'': 00193 case '`': 00194 case '{': 00195 { 00196 register char 00197 escape; 00198 00199 switch (*p) 00200 { 00201 case '"': escape='"'; break; 00202 case '\'': escape='\''; break; 00203 case '`': escape='\''; break; 00204 case '{': escape='}'; break; 00205 default: escape=(*p); break; 00206 } 00207 for (p++; *p != '\0'; p++) 00208 { 00209 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\'))) 00210 p++; 00211 else 00212 if (*p == escape) 00213 { 00214 p++; 00215 break; 00216 } 00217 token[i++]=(*p); 00218 } 00219 break; 00220 } 00221 case '/': 00222 { 00223 token[i++]=(*p++); 00224 if ((*p == '>') || (*p == '/')) 00225 token[i++]=(*p++); 00226 break; 00227 } 00228 default: 00229 { 00230 char 00231 *q; 00232 00233 value=StringToDouble(p,&q); 00234 (void) value; 00235 if ((p != q) && (*p != ',')) 00236 { 00237 for ( ; (p < q) && (*p != ','); p++) 00238 token[i++]=(*p); 00239 if (*p == '%') 00240 token[i++]=(*p++); 00241 break; 00242 } 00243 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) && 00244 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<')) 00245 { 00246 token[i++]=(*p++); 00247 break; 00248 } 00249 for ( ; *p != '\0'; p++) 00250 { 00251 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') || 00252 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\')) 00253 break; 00254 if ((i > 0) && (*p == '<')) 00255 break; 00256 token[i++]=(*p); 00257 if (*p == '>') 00258 break; 00259 if (*p == '(') 00260 for (p++; *p != '\0'; p++) 00261 { 00262 token[i++]=(*p); 00263 if ((*p == ')') && (*(p-1) != '\\')) 00264 break; 00265 } 00266 } 00267 break; 00268 } 00269 } 00270 break; 00271 } 00272 token[i]='\0'; 00273 if (LocaleNCompare(token,"url(",4) == 0) 00274 { 00275 ssize_t 00276 offset; 00277 00278 offset=4; 00279 if (token[offset] == '#') 00280 offset++; 00281 i=(ssize_t) strlen(token); 00282 (void) CopyMagickString(token,token+offset,MaxTextExtent); 00283 token[i-offset-1]='\0'; 00284 } 00285 while (isspace((int) ((unsigned char) *p)) != 0) 00286 p++; 00287 if (end != (const char **) NULL) 00288 *end=(const char *) p; 00289 } 00290 00291 /* 00292 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00293 % % 00294 % % 00295 % % 00296 % G l o b E x p r e s s i o n % 00297 % % 00298 % % 00299 % % 00300 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00301 % 00302 % GlobExpression() returns MagickTrue if the expression matches the pattern. 00303 % 00304 % The format of the GlobExpression function is: 00305 % 00306 % MagickBooleanType GlobExpression(const char *expression, 00307 % const char *pattern,const MagickBooleanType case_insensitive) 00308 % 00309 % A description of each parameter follows: 00310 % 00311 % o expression: Specifies a pointer to a text string containing a file name. 00312 % 00313 % o pattern: Specifies a pointer to a text string containing a pattern. 00314 % 00315 % o case_insensitive: set to MagickTrue to ignore the case when matching 00316 % an expression. 00317 % 00318 */ 00319 MagickExport MagickBooleanType GlobExpression(const char *expression, 00320 const char *pattern,const MagickBooleanType case_insensitive) 00321 { 00322 MagickBooleanType 00323 done, 00324 match; 00325 00326 register const char 00327 *p; 00328 00329 /* 00330 Return on empty pattern or '*'. 00331 */ 00332 if (pattern == (char *) NULL) 00333 return(MagickTrue); 00334 if (GetUTFCode(pattern) == 0) 00335 return(MagickTrue); 00336 if (LocaleCompare(pattern,"*") == 0) 00337 return(MagickTrue); 00338 p=pattern+strlen(pattern)-1; 00339 if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL)) 00340 { 00341 ExceptionInfo 00342 *exception; 00343 00344 ImageInfo 00345 *image_info; 00346 00347 /* 00348 Determine if pattern is a scene, i.e. img0001.pcd[2]. 00349 */ 00350 image_info=AcquireImageInfo(); 00351 (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent); 00352 exception=AcquireExceptionInfo(); 00353 (void) SetImageInfo(image_info,0,exception); 00354 exception=DestroyExceptionInfo(exception); 00355 if (LocaleCompare(image_info->filename,pattern) != 0) 00356 { 00357 image_info=DestroyImageInfo(image_info); 00358 return(MagickFalse); 00359 } 00360 image_info=DestroyImageInfo(image_info); 00361 } 00362 /* 00363 Evaluate glob expression. 00364 */ 00365 done=MagickFalse; 00366 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse)) 00367 { 00368 if (GetUTFCode(expression) == 0) 00369 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*')) 00370 break; 00371 switch (GetUTFCode(pattern)) 00372 { 00373 case '*': 00374 { 00375 MagickBooleanType 00376 status; 00377 00378 status=MagickFalse; 00379 pattern+=GetUTFOctets(pattern); 00380 while ((GetUTFCode(expression) != 0) && (status == MagickFalse)) 00381 { 00382 status=GlobExpression(expression,pattern,case_insensitive); 00383 expression+=GetUTFOctets(expression); 00384 } 00385 if (status != MagickFalse) 00386 { 00387 while (GetUTFCode(expression) != 0) 00388 expression+=GetUTFOctets(expression); 00389 while (GetUTFCode(pattern) != 0) 00390 pattern+=GetUTFOctets(pattern); 00391 } 00392 break; 00393 } 00394 case '[': 00395 { 00396 int 00397 c; 00398 00399 pattern+=GetUTFOctets(pattern); 00400 for ( ; ; ) 00401 { 00402 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']')) 00403 { 00404 done=MagickTrue; 00405 break; 00406 } 00407 if (GetUTFCode(pattern) == '\\') 00408 { 00409 pattern+=GetUTFOctets(pattern); 00410 if (GetUTFCode(pattern) == 0) 00411 { 00412 done=MagickTrue; 00413 break; 00414 } 00415 } 00416 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-') 00417 { 00418 c=GetUTFCode(pattern); 00419 pattern+=GetUTFOctets(pattern); 00420 pattern+=GetUTFOctets(pattern); 00421 if (GetUTFCode(pattern) == ']') 00422 { 00423 done=MagickTrue; 00424 break; 00425 } 00426 if (GetUTFCode(pattern) == '\\') 00427 { 00428 pattern+=GetUTFOctets(pattern); 00429 if (GetUTFCode(pattern) == 0) 00430 { 00431 done=MagickTrue; 00432 break; 00433 } 00434 } 00435 if ((GetUTFCode(expression) < c) || 00436 (GetUTFCode(expression) > GetUTFCode(pattern))) 00437 { 00438 pattern+=GetUTFOctets(pattern); 00439 continue; 00440 } 00441 } 00442 else 00443 if (GetUTFCode(pattern) != GetUTFCode(expression)) 00444 { 00445 pattern+=GetUTFOctets(pattern); 00446 continue; 00447 } 00448 pattern+=GetUTFOctets(pattern); 00449 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0)) 00450 { 00451 if ((GetUTFCode(pattern) == '\\') && 00452 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0)) 00453 pattern+=GetUTFOctets(pattern); 00454 pattern+=GetUTFOctets(pattern); 00455 } 00456 if (GetUTFCode(pattern) != 0) 00457 { 00458 pattern+=GetUTFOctets(pattern); 00459 expression+=GetUTFOctets(expression); 00460 } 00461 break; 00462 } 00463 break; 00464 } 00465 case '?': 00466 { 00467 pattern+=GetUTFOctets(pattern); 00468 expression+=GetUTFOctets(expression); 00469 break; 00470 } 00471 case '{': 00472 { 00473 register const char 00474 *p; 00475 00476 pattern+=GetUTFOctets(pattern); 00477 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0)) 00478 { 00479 p=expression; 00480 match=MagickTrue; 00481 while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) && 00482 (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') && 00483 (match != MagickFalse)) 00484 { 00485 if (GetUTFCode(pattern) == '\\') 00486 pattern+=GetUTFOctets(pattern); 00487 match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue : 00488 MagickFalse; 00489 p+=GetUTFOctets(p); 00490 pattern+=GetUTFOctets(pattern); 00491 } 00492 if (GetUTFCode(pattern) == 0) 00493 { 00494 match=MagickFalse; 00495 done=MagickTrue; 00496 break; 00497 } 00498 else 00499 if (match != MagickFalse) 00500 { 00501 expression=p; 00502 while ((GetUTFCode(pattern) != '}') && 00503 (GetUTFCode(pattern) != 0)) 00504 { 00505 pattern+=GetUTFOctets(pattern); 00506 if (GetUTFCode(pattern) == '\\') 00507 { 00508 pattern+=GetUTFOctets(pattern); 00509 if (GetUTFCode(pattern) == '}') 00510 pattern+=GetUTFOctets(pattern); 00511 } 00512 } 00513 } 00514 else 00515 { 00516 while ((GetUTFCode(pattern) != '}') && 00517 (GetUTFCode(pattern) != ',') && 00518 (GetUTFCode(pattern) != 0)) 00519 { 00520 pattern+=GetUTFOctets(pattern); 00521 if (GetUTFCode(pattern) == '\\') 00522 { 00523 pattern+=GetUTFOctets(pattern); 00524 if ((GetUTFCode(pattern) == '}') || 00525 (GetUTFCode(pattern) == ',')) 00526 pattern+=GetUTFOctets(pattern); 00527 } 00528 } 00529 } 00530 if (GetUTFCode(pattern) != 0) 00531 pattern+=GetUTFOctets(pattern); 00532 } 00533 break; 00534 } 00535 case '\\': 00536 { 00537 pattern+=GetUTFOctets(pattern); 00538 if (GetUTFCode(pattern) == 0) 00539 break; 00540 } 00541 default: 00542 { 00543 if (case_insensitive != MagickFalse) 00544 { 00545 if (tolower((int) GetUTFCode(expression)) != 00546 tolower((int) GetUTFCode(pattern))) 00547 { 00548 done=MagickTrue; 00549 break; 00550 } 00551 } 00552 else 00553 if (GetUTFCode(expression) != GetUTFCode(pattern)) 00554 { 00555 done=MagickTrue; 00556 break; 00557 } 00558 expression+=GetUTFOctets(expression); 00559 pattern+=GetUTFOctets(pattern); 00560 } 00561 } 00562 } 00563 while (GetUTFCode(pattern) == '*') 00564 pattern+=GetUTFOctets(pattern); 00565 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ? 00566 MagickTrue : MagickFalse; 00567 return(match); 00568 } 00569 00570 /* 00571 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00572 % % 00573 % % 00574 % % 00575 + I s G l o b % 00576 % % 00577 % % 00578 % % 00579 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00580 % 00581 % IsGlob() returns MagickTrue if the path specification contains a globbing 00582 % pattern. 00583 % 00584 % The format of the IsGlob method is: 00585 % 00586 % MagickBooleanType IsGlob(const char *geometry) 00587 % 00588 % A description of each parameter follows: 00589 % 00590 % o path: the path. 00591 % 00592 */ 00593 MagickPrivate MagickBooleanType IsGlob(const char *path) 00594 { 00595 MagickBooleanType 00596 status; 00597 00598 if (IsPathAccessible(path) != MagickFalse) 00599 return(MagickFalse); 00600 status=(strchr(path,'*') != (char *) NULL) || 00601 (strchr(path,'?') != (char *) NULL) || 00602 (strchr(path,'{') != (char *) NULL) || 00603 (strchr(path,'}') != (char *) NULL) || 00604 (strchr(path,'[') != (char *) NULL) || 00605 (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse; 00606 return(status); 00607 } 00608 00609 /* 00610 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00611 % % 00612 % % 00613 % % 00614 % T o k e n i z e r % 00615 % % 00616 % % 00617 % % 00618 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 00619 % 00620 % Tokenizer() is a generalized, finite state token parser. It extracts tokens 00621 % one at a time from a string of characters. The characters used for white 00622 % space, for break characters, and for quotes can be specified. Also, 00623 % characters in the string can be preceded by a specifiable escape character 00624 % which removes any special meaning the character may have. 00625 % 00626 % Here is some terminology: 00627 % 00628 % o token: A single unit of information in the form of a group of 00629 % characters. 00630 % 00631 % o white space: Apace that gets ignored (except within quotes or when 00632 % escaped), like blanks and tabs. in addition, white space terminates a 00633 % non-quoted token. 00634 % 00635 % o break set: One or more characters that separates non-quoted tokens. 00636 % Commas are a common break character. The usage of break characters to 00637 % signal the end of a token is the same as that of white space, except 00638 % multiple break characters with nothing or only white space between 00639 % generate a null token for each two break characters together. 00640 % 00641 % For example, if blank is set to be the white space and comma is set to 00642 % be the break character, the line 00643 % 00644 % A, B, C , , DEF 00645 % 00646 % ... consists of 5 tokens: 00647 % 00648 % 1) "A" 00649 % 2) "B" 00650 % 3) "C" 00651 % 4) "" (the null string) 00652 % 5) "DEF" 00653 % 00654 % o Quote character: A character that, when surrounding a group of other 00655 % characters, causes the group of characters to be treated as a single 00656 % token, no matter how many white spaces or break characters exist in 00657 % the group. Also, a token always terminates after the closing quote. 00658 % For example, if ' is the quote character, blank is white space, and 00659 % comma is the break character, the following string 00660 % 00661 % A, ' B, CD'EF GHI 00662 % 00663 % ... consists of 4 tokens: 00664 % 00665 % 1) "A" 00666 % 2) " B, CD" (note the blanks & comma) 00667 % 3) "EF" 00668 % 4) "GHI" 00669 % 00670 % The quote characters themselves do not appear in the resultant 00671 % tokens. The double quotes are delimiters i use here for 00672 % documentation purposes only. 00673 % 00674 % o Escape character: A character which itself is ignored but which 00675 % causes the next character to be used as is. ^ and \ are often used 00676 % as escape characters. An escape in the last position of the string 00677 % gets treated as a "normal" (i.e., non-quote, non-white, non-break, 00678 % and non-escape) character. For example, assume white space, break 00679 % character, and quote are the same as in the above examples, and 00680 % further, assume that ^ is the escape character. Then, in the string 00681 % 00682 % ABC, ' DEF ^' GH' I ^ J K^ L ^ 00683 % 00684 % ... there are 7 tokens: 00685 % 00686 % 1) "ABC" 00687 % 2) " DEF ' GH" 00688 % 3) "I" 00689 % 4) " " (a lone blank) 00690 % 5) "J" 00691 % 6) "K L" 00692 % 7) "^" (passed as is at end of line) 00693 % 00694 % The format of the Tokenizer method is: 00695 % 00696 % int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token, 00697 % const size_t max_token_length,const char *line,const char *white, 00698 % const char *break_set,const char *quote,const char escape, 00699 % char *breaker,int *next,char *quoted) 00700 % 00701 % A description of each parameter follows: 00702 % 00703 % o flag: right now, only the low order 3 bits are used. 00704 % 00705 % 1 => convert non-quoted tokens to upper case 00706 % 2 => convert non-quoted tokens to lower case 00707 % 0 => do not convert non-quoted tokens 00708 % 00709 % o token: a character string containing the returned next token 00710 % 00711 % o max_token_length: the maximum size of "token". Characters beyond 00712 % "max_token_length" are truncated. 00713 % 00714 % o string: the string to be parsed. 00715 % 00716 % o white: a string of the valid white spaces. example: 00717 % 00718 % char whitesp[]={" \t"}; 00719 % 00720 % blank and tab will be valid white space. 00721 % 00722 % o break: a string of the valid break characters. example: 00723 % 00724 % char breakch[]={";,"}; 00725 % 00726 % semicolon and comma will be valid break characters. 00727 % 00728 % o quote: a string of the valid quote characters. An example would be 00729 % 00730 % char whitesp[]={"'\""); 00731 % 00732 % (this causes single and double quotes to be valid) Note that a 00733 % token starting with one of these characters needs the same quote 00734 % character to terminate it. 00735 % 00736 % for example: 00737 % 00738 % "ABC ' 00739 % 00740 % is unterminated, but 00741 % 00742 % "DEF" and 'GHI' 00743 % 00744 % are properly terminated. Note that different quote characters 00745 % can appear on the same line; only for a given token do the quote 00746 % characters have to be the same. 00747 % 00748 % o escape: the escape character (NOT a string ... only one 00749 % allowed). Use zero if none is desired. 00750 % 00751 % o breaker: the break character used to terminate the current 00752 % token. If the token was quoted, this will be the quote used. If 00753 % the token is the last one on the line, this will be zero. 00754 % 00755 % o next: this variable points to the first character of the 00756 % next token. it gets reset by "tokenizer" as it steps through the 00757 % string. Set it to 0 upon initialization, and leave it alone 00758 % after that. You can change it if you want to jump around in the 00759 % string or re-parse from the beginning, but be careful. 00760 % 00761 % o quoted: set to True if the token was quoted and MagickFalse 00762 % if not. You may need this information (for example: in C, a 00763 % string with quotes around it is a character string, while one 00764 % without is an identifier). 00765 % 00766 % o result: 0 if we haven't reached EOS (end of string), and 1 00767 % if we have. 00768 % 00769 */ 00770 00771 #define IN_WHITE 0 00772 #define IN_TOKEN 1 00773 #define IN_QUOTE 2 00774 #define IN_OZONE 3 00775 00776 static ssize_t sindex(int c,const char *string) 00777 { 00778 register const char 00779 *p; 00780 00781 for (p=string; *p != '\0'; p++) 00782 if (c == (int) (*p)) 00783 return((ssize_t) (p-string)); 00784 return(-1); 00785 } 00786 00787 static void StoreToken(TokenInfo *token_info,char *string, 00788 size_t max_token_length,int c) 00789 { 00790 register ssize_t 00791 i; 00792 00793 if ((token_info->offset < 0) || 00794 ((size_t) token_info->offset >= (max_token_length-1))) 00795 return; 00796 i=token_info->offset++; 00797 string[i]=(char) c; 00798 if (token_info->state == IN_QUOTE) 00799 return; 00800 switch (token_info->flag & 0x03) 00801 { 00802 case 1: 00803 { 00804 string[i]=(char) toupper(c); 00805 break; 00806 } 00807 case 2: 00808 { 00809 string[i]=(char) tolower(c); 00810 break; 00811 } 00812 default: 00813 break; 00814 } 00815 } 00816 00817 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag, 00818 char *token,const size_t max_token_length,const char *line,const char *white, 00819 const char *break_set,const char *quote,const char escape,char *breaker, 00820 int *next,char *quoted) 00821 { 00822 int 00823 c; 00824 00825 register ssize_t 00826 i; 00827 00828 *breaker='\0'; 00829 *quoted='\0'; 00830 if (line[*next] == '\0') 00831 return(1); 00832 token_info->state=IN_WHITE; 00833 token_info->quote=(char) MagickFalse; 00834 token_info->flag=flag; 00835 for (token_info->offset=0; (int) line[*next] != 0; (*next)++) 00836 { 00837 c=(int) line[*next]; 00838 i=sindex(c,break_set); 00839 if (i >= 0) 00840 { 00841 switch (token_info->state) 00842 { 00843 case IN_WHITE: 00844 case IN_TOKEN: 00845 case IN_OZONE: 00846 { 00847 (*next)++; 00848 *breaker=break_set[i]; 00849 token[token_info->offset]='\0'; 00850 return(0); 00851 } 00852 case IN_QUOTE: 00853 { 00854 StoreToken(token_info,token,max_token_length,c); 00855 break; 00856 } 00857 } 00858 continue; 00859 } 00860 i=sindex(c,quote); 00861 if (i >= 0) 00862 { 00863 switch (token_info->state) 00864 { 00865 case IN_WHITE: 00866 { 00867 token_info->state=IN_QUOTE; 00868 token_info->quote=quote[i]; 00869 *quoted=(char) MagickTrue; 00870 break; 00871 } 00872 case IN_QUOTE: 00873 { 00874 if (quote[i] != token_info->quote) 00875 StoreToken(token_info,token,max_token_length,c); 00876 else 00877 { 00878 token_info->state=IN_OZONE; 00879 token_info->quote='\0'; 00880 } 00881 break; 00882 } 00883 case IN_TOKEN: 00884 case IN_OZONE: 00885 { 00886 *breaker=(char) c; 00887 token[token_info->offset]='\0'; 00888 return(0); 00889 } 00890 } 00891 continue; 00892 } 00893 i=sindex(c,white); 00894 if (i >= 0) 00895 { 00896 switch (token_info->state) 00897 { 00898 case IN_WHITE: 00899 case IN_OZONE: 00900 break; 00901 case IN_TOKEN: 00902 { 00903 token_info->state=IN_OZONE; 00904 break; 00905 } 00906 case IN_QUOTE: 00907 { 00908 StoreToken(token_info,token,max_token_length,c); 00909 break; 00910 } 00911 } 00912 continue; 00913 } 00914 if (c == (int) escape) 00915 { 00916 if (line[(*next)+1] == '\0') 00917 { 00918 *breaker='\0'; 00919 StoreToken(token_info,token,max_token_length,c); 00920 (*next)++; 00921 token[token_info->offset]='\0'; 00922 return(0); 00923 } 00924 switch (token_info->state) 00925 { 00926 case IN_WHITE: 00927 { 00928 (*next)--; 00929 token_info->state=IN_TOKEN; 00930 break; 00931 } 00932 case IN_TOKEN: 00933 case IN_QUOTE: 00934 { 00935 (*next)++; 00936 c=(int) line[*next]; 00937 StoreToken(token_info,token,max_token_length,c); 00938 break; 00939 } 00940 case IN_OZONE: 00941 { 00942 token[token_info->offset]='\0'; 00943 return(0); 00944 } 00945 } 00946 continue; 00947 } 00948 switch (token_info->state) 00949 { 00950 case IN_WHITE: 00951 token_info->state=IN_TOKEN; 00952 case IN_TOKEN: 00953 case IN_QUOTE: 00954 { 00955 StoreToken(token_info,token,max_token_length,c); 00956 break; 00957 } 00958 case IN_OZONE: 00959 { 00960 token[token_info->offset]='\0'; 00961 return(0); 00962 } 00963 } 00964 } 00965 token[token_info->offset]='\0'; 00966 return(0); 00967 }