|
Graphviz
2.29.20120524.0446
|
00001 /* $Id$ $Revision$ */ 00002 /* vim:set shiftwidth=4 ts=8: */ 00003 00004 /************************************************************************* 00005 * Copyright (c) 2011 AT&T Intellectual Property 00006 * All rights reserved. This program and the accompanying materials 00007 * are made available under the terms of the Eclipse Public License v1.0 00008 * which accompanies this distribution, and is available at 00009 * http://www.eclipse.org/legal/epl-v10.html 00010 * 00011 * Contributors: See CVS logs. Details at http://www.graphviz.org/ 00012 *************************************************************************/ 00013 00014 00015 #include <stdarg.h> 00016 #include <stdlib.h> 00017 #include "libgraph.h" 00018 #include "parser.h" 00019 #include "triefa.cP" 00020 #include "agxbuf.h" 00021 00022 #ifdef DMALLOC 00023 #include "dmalloc.h" 00024 #endif 00025 00026 #define InfileName (InputFile?InputFile:"<unknown>") 00027 00028 static FILE *Lexer_fp; 00029 static char *LexPtr, *TokenBuf; 00030 static int LineBufSize; 00031 static unsigned char In_comment; 00032 static unsigned char Comment_start; 00033 static unsigned char Start_html_string; 00034 int Line_number; 00035 static char *InputFile; 00036 static int agmaxerr; 00037 00038 static void 00039 storeFileName (char* fname, int len) 00040 { 00041 static int cnt; 00042 static char* buf; 00043 00044 if (len > cnt) { 00045 if (cnt) buf = (char*)realloc (buf, len+1); 00046 else buf = (char*)malloc (len+1); 00047 cnt = len; 00048 } 00049 strcpy (buf, fname); 00050 InputFile = buf; 00051 } 00052 00053 /* Reset line number. 00054 * Argument n is indexed from 1, so we decrement it. 00055 */ 00056 void agreadline(int n) 00057 { 00058 Line_number = n - 1; 00059 } 00060 00061 int aglinenumber () 00062 { 00063 return Line_number; 00064 } 00065 00066 /* (Re)set file: 00067 */ 00068 void agsetfile(char *f) 00069 { 00070 InputFile = f; 00071 Line_number = 0; 00072 } 00073 00074 void aglexinit(FILE * fp, gets_f mygets) 00075 { 00076 if (Lexer_fp != fp) 00077 LexPtr = NULL; 00078 Lexer_fp = fp; 00079 if (mygets) 00080 AG.fgets = mygets; 00081 if (AG.fgets == NULL) 00082 AG.fgets = fgets; 00083 if (AG.linebuf == NULL) { 00084 LineBufSize = BUFSIZ; 00085 AG.linebuf = N_NEW(LineBufSize, char); 00086 TokenBuf = N_NEW(LineBufSize, char); 00087 } 00088 AG.fgets (AG.linebuf, 0, fp); /* reset mygets */ 00089 AG.syntax_errors = 0; 00090 } 00091 00092 #define ISSPACE(c) ((c != 0) && ((isspace(c) || iscntrl(c)))) 00093 00094 /* skip leading white space and comments in a string p 00095 * whitespace includes control characters 00096 */ 00097 static char *skip_wscomments(char *pp) 00098 { 00099 unsigned char *p = (unsigned char *) pp; 00100 do { 00101 while (ISSPACE(*p)) 00102 p++; 00103 while (In_comment && p[0]) { 00104 while (p[0] && (p[0] != '*')) 00105 p++; 00106 if (p[0]) { 00107 if (p[1] == '/') { 00108 In_comment = FALSE; 00109 p += 2; 00110 break; 00111 } else 00112 p++; 00113 } 00114 } 00115 if (p[0] == '/') { 00116 if (p[1] == '/') 00117 while (*p) 00118 p++; /* skip to end of line */ 00119 else { 00120 if (p[1] == '*') { 00121 In_comment = TRUE; 00122 Comment_start = Line_number; 00123 p += 2; 00124 continue; 00125 } else 00126 break; /* return a slash */ 00127 } 00128 } else { 00129 if (!ISSPACE(*p)) 00130 break; 00131 } 00132 } while (p[0]); 00133 return (char *) p; 00134 } 00135 00136 /* scan an unquoted token and return the position after its terminator */ 00137 static char *scan_token(unsigned char *p, unsigned char *token) 00138 { 00139 unsigned char *q; 00140 00141 q = token; 00142 if (p == '\0') 00143 return NULL; 00144 while (ISALNUM(*p)) { 00145 *q++ = *p++; 00146 } 00147 *q = '\0'; 00148 return p; 00149 } 00150 00151 static char *scan_num(char *p, char *token) 00152 { 00153 unsigned char *q, *z; 00154 int saw_rp = FALSE; 00155 int saw_digit = FALSE; 00156 00157 z = (unsigned char *) p; 00158 q = (unsigned char *) token; 00159 if (*z == '-') 00160 *q++ = *z++; 00161 if (*z == '.') { 00162 saw_rp = TRUE; 00163 *q++ = *z++; 00164 } 00165 while (isdigit(*z)) { 00166 saw_digit = TRUE; 00167 *q++ = *z++; 00168 } 00169 if ((*z == '.') && (saw_rp == FALSE)) { 00170 saw_rp = TRUE; 00171 *q++ = *z++; 00172 while (isdigit(*z)) { 00173 saw_digit = TRUE; 00174 *q++ = *z++; 00175 } 00176 } 00177 *q = '\0'; 00178 if (saw_digit && *z && ((isalpha(*z)) || (*z == '_'))) { 00179 unsigned char *endp = z + 1; 00180 unsigned char c; 00181 while ((c = *endp) && ((isalpha(c)) || (c == '_'))) 00182 endp++; 00183 *endp = '\0'; 00184 agerr(AGWARN, 00185 "%s:%d: ambiguous \"%s\" splits into two names: \"%s\" and \"%s\"\n", 00186 InfileName, Line_number, p, token, z); 00187 *endp = c; 00188 } 00189 00190 if (saw_digit == FALSE) 00191 z = NULL; 00192 return (char *) z; 00193 } 00194 00195 /* scan a quoted string and return the position after its terminator */ 00196 static char *quoted_string(char *p, char *token) 00197 { 00198 char quote, *q; 00199 00200 quote = *p++; 00201 q = token; 00202 while ((*p) && (*p != quote)) { 00203 if (*p == '\\') { 00204 if (*(p + 1) == quote) 00205 p++; 00206 else { 00207 if (*(p + 1) == '\\') 00208 *q++ = *p++; 00209 } 00210 } 00211 *q++ = *p++; 00212 } 00213 if (*p == '\0') 00214 agerr(AGWARN, "%s:%d: string ran past end of line\n", 00215 InfileName, Line_number); 00216 else 00217 p++; 00218 *q = 0; 00219 return p; 00220 } 00221 00222 int myaglex(void) 00223 { /* for debugging */ 00224 int rv = aglex(); 00225 fprintf(stderr, "returning %d\n", rv); 00226 if (rv == T_symbol) 00227 fprintf(stderr, "string val is %s\n", aglval.str); 00228 return rv; 00229 } 00230 00231 /* 00232 * Return a logical line in AG.linebuf. 00233 * In particular, the buffer will contain a '\n' as the last non-null char. 00234 * Ignore lines beginning with '#'; update cpp line number if applicable. 00235 * Fold long lines, i.e., ignore escaped newlines. 00236 * Assume the AG.fgets function reads upto newline or buffer length 00237 * like fgets. 00238 * Need to be careful that AG.fgets might not return full physical line 00239 * because buffer is too small to hold it. 00240 */ 00241 static char *lex_gets(void) 00242 { 00243 char *clp; 00244 int len, curlen; 00245 00246 len = curlen = 0; 00247 00248 do { 00249 /* make sure there is room for at least another SMALLBUF worth */ 00250 if (curlen + SMALLBUF >= LineBufSize) { 00251 LineBufSize += BUFSIZ; 00252 AG.linebuf = (char*)realloc(AG.linebuf, LineBufSize); 00253 TokenBuf = (char*)realloc(TokenBuf, LineBufSize); 00254 } 00255 00256 /* off by one so we can back up in LineBuf */ 00257 clp = AG.fgets (AG.linebuf + curlen + 1, 00258 LineBufSize - curlen - 1, Lexer_fp); 00259 if (clp == NULL) 00260 break; 00261 00262 00263 len = strlen(clp); /* since clp != NULL, len > 0 */ 00264 if (clp[len - 1] == '\n') { /* have physical line */ 00265 if ((clp[0] == '#') && (curlen == 0)) { 00266 /* comment line or cpp line sync */ 00267 int r, cnt; 00268 char buf[2]; 00269 char* s = clp + 1; 00270 00271 if (strncmp(s, "line", 4) == 0) s += 4; 00272 r = sscanf(s, "%d %1[\"]%n", &Line_number, buf, &cnt); 00273 if (r <= 0) Line_number++; 00274 else { /* got line number */ 00275 Line_number--; 00276 if (r > 1) { /* saw quote */ 00277 char* p = s + cnt; 00278 char* e = p; 00279 while (*e && (*e != '"')) e++; 00280 if (e != p) { 00281 *e = '\0'; 00282 storeFileName (p, e-p); 00283 } 00284 } 00285 } 00286 clp[0] = 0; 00287 len = 1; /* this will make the while test below succeed */ 00288 continue; 00289 } 00290 Line_number++; 00291 /* Note it is possible len == 1 and last character in 00292 * previous read was '\\' 00293 * It is also possible to have curlen=0, and read in 00294 * "\\\n". 00295 */ 00296 if (clp[len - 2] == '\\') { /* escaped newline */ 00297 len = len - 2; 00298 clp[len] = '\0'; 00299 } 00300 } 00301 curlen += len; 00302 /* the following test relies on having AG.linebuf[0] == '\0' */ 00303 } while (clp[len - 1] != '\n'); 00304 00305 if (curlen > 0) 00306 return AG.linebuf + 1; 00307 else 00308 return NULL; 00309 } 00310 00311 /* html_pair: 00312 * Iteratively scan nested "<...>" 00313 * p points to first character after initial '<' 00314 * Store characters up to but not including matching '>' 00315 * Return pointer to matching '>' 00316 * We do not check for any escape sequences; pure HTML is 00317 * expected, so special characters need to be HTML escapes. 00318 * We read them in and allow the HTML parser to convert them. 00319 */ 00320 static char *html_pair(char *p, agxbuf * tokp) 00321 { 00322 unsigned char c; 00323 int rc, depth = 1; 00324 00325 while (1) { 00326 while ((c = *p)) { 00327 if (c == '>') { 00328 depth--; 00329 if (depth == 0) 00330 return p; /* p points to closing > */ 00331 } else if (c == '<') 00332 depth++; 00333 rc = agxbputc(tokp, c); 00334 p++; 00335 } 00336 if ((p = lex_gets()) == NULL) { 00337 agerr(AGWARN, 00338 "non-terminated HTML string starting line %d, file %s\n", 00339 Start_html_string, InfileName); 00340 return 0; 00341 } 00342 } 00343 } 00344 00345 /* html_string: 00346 * scan an html string and return the position after its terminator 00347 * The string is stored in token. 00348 * p points to the opening <. 00349 */ 00350 00351 static char *html_string(char *p, agxbuf * token) 00352 { 00353 Start_html_string = Line_number; 00354 p = html_pair(p + 1, token); 00355 if (p) 00356 p++; /* skip closing '>' */ 00357 return p; 00358 } 00359 00360 int agtoken(char *p) 00361 { 00362 char ch; 00363 TFA_Init(); 00364 while ((ch = *p)) { 00365 /* any non-ascii characters converted to ascii DEL (127) */ 00366 TFA_Advance(ch & ~127 ? 127 : ch); 00367 p++; 00368 } 00369 return TFA_Definition(); 00370 } 00371 00372 int aglex(void) 00373 { 00374 int token; 00375 char *tbuf, *p; 00376 static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF }; /* UTF-8 byte order marker */ 00377 00378 /* if the parser has accepted a graph, reset and return EOF */ 00379 if (AG.accepting_state) { 00380 AG.accepting_state = FALSE; 00381 return EOF; 00382 } 00383 00384 /* get a nonempty lex buffer */ 00385 do { 00386 if ((LexPtr == NULL) || (LexPtr[0] == '\0')) 00387 if ((LexPtr = lex_gets()) == NULL) { 00388 if (In_comment) 00389 agerr(AGWARN, "nonterminated comment in line %d\n", 00390 Comment_start); 00391 return EOF; 00392 } 00393 /* skip UTF-8 Byte Order Marker if at beginning of file */ 00394 if ((Line_number == 1) && !strncmp(LexPtr, (char *) BOM, 3)) 00395 LexPtr += 3; 00396 LexPtr = (char *) skip_wscomments(LexPtr); 00397 } while (LexPtr[0] == '\0'); 00398 00399 tbuf = TokenBuf; 00400 00401 /* scan quoted strings */ 00402 if (LexPtr[0] == '\"') { 00403 LexPtr = quoted_string(LexPtr, tbuf); 00404 aglval.str = agstrdup(tbuf); 00405 return T_qsymbol; 00406 } 00407 00408 /* scan HTML strings */ 00409 if (LexPtr[0] == '<') { 00410 agxbuf xb; 00411 unsigned char htmlbuf[BUFSIZ]; 00412 agxbinit(&xb, BUFSIZ, htmlbuf); 00413 LexPtr = html_string(LexPtr, &xb); 00414 aglval.str = agstrdup_html(agxbuse(&xb)); 00415 agxbfree(&xb); 00416 return T_symbol; 00417 } 00418 00419 /* scan edge operator */ 00420 if (AG.edge_op 00421 && (strncmp(LexPtr, AG.edge_op, strlen(AG.edge_op)) == 0)) { 00422 LexPtr += strlen(AG.edge_op); 00423 return T_edgeop; 00424 } 00425 00426 /* scan numbers */ 00427 if ((p = scan_num(LexPtr, tbuf))) { 00428 LexPtr = p; 00429 aglval.str = agstrdup(tbuf); 00430 return T_symbol; 00431 } else { 00432 unsigned char uc = *(unsigned char *) LexPtr; 00433 if (ispunct(uc) && (uc != '_')) 00434 return *LexPtr++; 00435 else 00436 LexPtr = scan_token(LexPtr, tbuf); 00437 } 00438 00439 /* scan other tokens */ 00440 token = agtoken(tbuf); 00441 if (token == -1) { 00442 aglval.str = agstrdup(tbuf); 00443 token = T_symbol; 00444 } 00445 return token; 00446 } 00447 00448 static void error_context(void) 00449 { 00450 char *p; 00451 char c; 00452 char *buf = AG.linebuf + 1; /* characters are always put at AG.linebuf[1] */ 00453 /* or later; AG.linebuf[0] = '\0' */ 00454 00455 if (LexPtr == NULL) 00456 return; 00457 agerr(AGPREV, "context: "); 00458 for (p = LexPtr - 1; (p > buf) && (!isspace(*(unsigned char *) p)); 00459 p--); 00460 if (buf < p) { 00461 c = *p; 00462 *p = '\0'; 00463 agerr(AGPREV, buf); 00464 *p = c; 00465 } 00466 agerr(AGPREV, " >>> "); 00467 c = *LexPtr; 00468 *LexPtr = '\0'; 00469 agerr(AGPREV, p); 00470 *LexPtr = c; 00471 agerr(AGPREV, " <<< "); 00472 agerr(AGPREV, LexPtr); 00473 } 00474 00475 void agerror(char *msg) 00476 { 00477 if (AG.syntax_errors++) 00478 return; 00479 agerr(AGERR, "%s:%d: %s near line %d\n", 00480 InfileName, Line_number, msg, Line_number); 00481 error_context(); 00482 } 00483 00484 agerrlevel_t agerrno; /* Last error */ 00485 static agerrlevel_t agerrlevel = AGWARN; /* Report errors >= agerrlevel */ 00486 static long aglast; /* Last message */ 00487 static FILE *agerrout; /* Message file */ 00488 static agusererrf usererrf; /* User-set error function */ 00489 00490 agusererrf 00491 agseterrf (agusererrf newf) 00492 { 00493 agusererrf oldf = usererrf; 00494 usererrf = newf; 00495 return oldf; 00496 } 00497 00498 void agseterr(agerrlevel_t lvl) 00499 { 00500 agerrlevel = lvl; 00501 } 00502 00503 int agerrors(void) 00504 { 00505 return MAX(agmaxerr, AG.syntax_errors); 00506 } 00507 00508 int agreseterrors(void) 00509 { 00510 int rc = MAX(agmaxerr, AG.syntax_errors); 00511 agmaxerr = 0; 00512 return rc; 00513 } 00514 00515 char *aglasterr() 00516 { 00517 long endpos; 00518 long len; 00519 char *buf; 00520 00521 if (!agerrout) 00522 return 0; 00523 fflush(agerrout); 00524 endpos = ftell(agerrout); 00525 len = endpos - aglast; 00526 buf = (char*)malloc(len + 1); 00527 fseek(agerrout, aglast, SEEK_SET); 00528 fread(buf, sizeof(char), len, agerrout); 00529 buf[len] = '\0'; 00530 fseek(agerrout, endpos, SEEK_SET); 00531 00532 return buf; 00533 } 00534 00535 static void 00536 userout (agerrlevel_t level, const char *fmt, va_list args) 00537 { 00538 static char* buf; 00539 static int bufsz = 1024; 00540 char* np; 00541 int n; 00542 00543 if (!buf) { 00544 buf = (char*)malloc(bufsz); 00545 if (!buf) { 00546 fputs("userout: could not allocate memory\n", stderr ); 00547 return; 00548 } 00549 } 00550 00551 if (level != AGPREV) { 00552 usererrf ((level == AGERR) ? "Error" : "Warning"); 00553 usererrf (": "); 00554 } 00555 00556 while (1) { 00557 n = vsnprintf(buf, bufsz, fmt, args); 00558 if ((n > -1) && (n < bufsz)) { 00559 usererrf (buf); 00560 break; 00561 } 00562 bufsz = MAX(bufsz*2,n+1); 00563 if ((np = (char*)realloc(buf, bufsz)) == NULL) { 00564 fputs("userout: could not allocate memory\n", stderr ); 00565 return; 00566 } 00567 } 00568 va_end(args); 00569 } 00570 00571 /* agerr_va: 00572 * Main error reporting function 00573 */ 00574 static int agerr_va(agerrlevel_t level, const char *fmt, va_list args) 00575 { 00576 agerrlevel_t lvl; 00577 00578 /* Use previous error level if continuation message; 00579 * Convert AGMAX to AGERROR; 00580 * else use input level 00581 */ 00582 lvl = (level == AGPREV ? agerrno : (level == AGMAX) ? AGERR : level); 00583 00584 /* store this error level and maximum error level used */ 00585 agerrno = lvl; 00586 agmaxerr = MAX(agmaxerr, agerrno); 00587 00588 /* We report all messages whose level is bigger than the user set agerrlevel 00589 * Setting agerrlevel to AGMAX turns off immediate error reporting. 00590 */ 00591 if (lvl >= agerrlevel) { 00592 if (usererrf) 00593 userout (level, fmt, args); 00594 else { 00595 if (level != AGPREV) 00596 fprintf(stderr, "%s: ", (level == AGERR) ? "Error" : "Warning"); 00597 vfprintf(stderr, fmt, args); 00598 va_end(args); 00599 } 00600 return 0; 00601 } 00602 00603 /* If error is not immediately reported, store in log file */ 00604 if (!agerrout) { 00605 agerrout = tmpfile(); 00606 if (!agerrout) 00607 return 1; 00608 } 00609 00610 if (level != AGPREV) 00611 aglast = ftell(agerrout); 00612 vfprintf(agerrout, fmt, args); 00613 va_end(args); 00614 return 0; 00615 } 00616 00617 /* agerr: 00618 * Varargs function for reporting errors with level argument 00619 */ 00620 int agerr(agerrlevel_t level, char *fmt, ...) 00621 { 00622 va_list args; 00623 00624 va_start(args, fmt); 00625 return agerr_va(level, fmt, args); 00626 } 00627 00628 /* agerrorf: 00629 * Varargs function for reporting errors 00630 */ 00631 void agerrorf(const char *fmt, ...) 00632 { 00633 va_list args; 00634 00635 va_start(args, fmt); 00636 agerr_va(AGERR, fmt, args); 00637 } 00638 00639 /* agwarningf: 00640 * Varargs function for reporting warnings 00641 */ 00642 void agwarningf(char *fmt, ...) 00643 { 00644 va_list args; 00645 00646 va_start(args, fmt); 00647 agerr_va(AGWARN, fmt, args); 00648 }
1.7.5