Graphviz  2.29.20120524.0446
lib/common/labels.c
Go to the documentation of this file.
00001 /* $Id$ $Revision$ */
00002 /* vim:set shiftwidth=4 ts=8: */
00003 
00004 /*************************************************************************
00005  * Copyright (c) 2011 AT&T Intellectual Property 
00006  * All rights reserved. This program and the accompanying materials
00007  * are made available under the terms of the Eclipse Public License v1.0
00008  * which accompanies this distribution, and is available at
00009  * http://www.eclipse.org/legal/epl-v10.html
00010  *
00011  * Contributors: See CVS logs. Details at http://www.graphviz.org/
00012  *************************************************************************/
00013 
00014 
00015 #include "render.h"
00016 #include "htmltable.h"
00017 #include <limits.h>
00018 
00019 static char *strdup_and_subst_obj0 (char *str, void *obj, int escBackslash);
00020 
00021 static void storeline(graph_t *g, textlabel_t *lp, char *line, char terminator)
00022 {
00023     pointf size;
00024     textpara_t *para;
00025     int oldsz = lp->u.txt.nparas + 1;
00026 
00027     lp->u.txt.para = ZALLOC(oldsz + 1, lp->u.txt.para, textpara_t, oldsz);
00028     para = &(lp->u.txt.para[lp->u.txt.nparas]);
00029     para->str = line;
00030     para->just = terminator;
00031     if (line && line[0])
00032         size = textsize(g, para, lp->fontname, lp->fontsize);
00033     else {
00034         size.x = 0.0;
00035         para->height = size.y = (int)(lp->fontsize * LINESPACING);
00036     }
00037 
00038     lp->u.txt.nparas++;
00039     /* width = max line width */
00040     lp->dimen.x = MAX(lp->dimen.x, size.x);
00041     /* accumulate height */
00042     lp->dimen.y += size.y;
00043 }
00044 
00045 /* compiles <str> into a label <lp> */
00046 void make_simple_label(graph_t * g, textlabel_t * lp)
00047 {
00048     char c, *p, *line, *lineptr, *str = lp->text;
00049     unsigned char byte = 0x00;
00050 
00051     lp->dimen.x = lp->dimen.y = 0.0;
00052     if (*str == '\0')
00053         return;
00054 
00055     line = lineptr = NULL;
00056     p = str;
00057     line = lineptr = N_GNEW(strlen(p) + 1, char);
00058     *line = 0;
00059     while ((c = *p++)) {
00060         byte = (unsigned int) c;
00061         /* wingraphviz allows a combination of ascii and big-5. The latter
00062          * is a two-byte encoding, with the first byte in 0xA1-0xFE, and
00063          * the second in 0x40-0x7e or 0xa1-0xfe. We assume that the input
00064          * is well-formed, but check that we don't go past the ending '\0'.
00065          */
00066         if ((lp->charset == CHAR_BIG5) && 0xA1 <= byte && byte <= 0xFE) {
00067             *lineptr++ = c;
00068             c = *p++;
00069             *lineptr++ = c;
00070             if (!c) /* NB. Protect against unexpected string end here */
00071                 break;
00072         } else {
00073             if (c == '\\') {
00074                 switch (*p) {
00075                 case 'n':
00076                 case 'l':
00077                 case 'r':
00078                     *lineptr++ = '\0';
00079                     storeline(g, lp, line, *p);
00080                     line = lineptr;
00081                     break;
00082                 default:
00083                     *lineptr++ = *p;
00084                 }
00085                 if (*p)
00086                     p++;
00087                 /* tcldot can enter real linend characters */
00088             } else if (c == '\n') {
00089                 *lineptr++ = '\0';
00090                 storeline(g, lp, line, 'n');
00091                 line = lineptr;
00092             } else {
00093                 *lineptr++ = c;
00094             }
00095         }
00096     }
00097 
00098     if (line != lineptr) {
00099         *lineptr++ = '\0';
00100         storeline(g, lp, line, 'n');
00101     }
00102 
00103     lp->space = lp->dimen;
00104 }
00105 
00106 /* make_label:
00107  * Assume str is freshly allocated for this instance, so it
00108  * can be freed in free_label.
00109  */
00110 textlabel_t *make_label(void *obj, char *str, int kind, double fontsize, char *fontname, char *fontcolor)
00111 {
00112     textlabel_t *rv = NEW(textlabel_t);
00113     graph_t *g = NULL, *sg = NULL;
00114     node_t *n = NULL;
00115     edge_t *e = NULL;
00116         char *s;
00117 
00118     switch (agobjkind(obj)) {
00119 #ifndef WITH_CGRAPH
00120     case AGGRAPH:
00121 #else
00122     case AGRAPH:
00123 #endif
00124         sg = (graph_t*)obj;
00125         g = sg->root;
00126         break;
00127     case AGNODE:
00128         n = (node_t*)obj;
00129         g = agroot(agraphof(n));
00130         break;
00131     case AGEDGE:
00132         e = (edge_t*)obj;
00133         g = agroot(agraphof(aghead(e)));
00134         break;
00135     }
00136     rv->fontname = fontname;
00137     rv->fontcolor = fontcolor;
00138     rv->fontsize = fontsize;
00139     rv->charset = GD_charset(g);
00140     if (kind & LT_RECD) {
00141         rv->text = strdup(str);
00142         if (kind & LT_HTML) {
00143             rv->html = TRUE;
00144         }
00145     }
00146     else if (kind == LT_HTML) {
00147         rv->text = strdup(str);
00148         rv->html = TRUE;
00149         if (make_html_label(obj, rv)) {
00150             switch (agobjkind(obj)) {
00151 #ifndef WITH_CGRAPH
00152             case AGGRAPH:
00153 #else
00154             case AGRAPH:
00155 #endif
00156                 agerr(AGPREV, "in label of graph %s\n",agnameof(sg));
00157                 break;
00158             case AGNODE:
00159                 agerr(AGPREV, "in label of node %s\n", agnameof(n));
00160                 break;
00161             case AGEDGE:
00162                 agerr(AGPREV, "in label of edge %s %s %s\n",
00163                         agnameof(agtail(e)), agisdirected(g)?"->":"--", agnameof(aghead(e)));
00164                 break;
00165             }
00166         }
00167     }
00168     else {
00169         assert(kind == LT_NONE);
00170         /* This call just processes the graph object based escape sequences. The formatting escape
00171          * sequences (\n, \l, \r) are processed in make_simple_label. That call also replaces \\ with \.
00172          */
00173         rv->text = strdup_and_subst_obj0(str, obj, 0);
00174         switch (rv->charset) {
00175         case CHAR_LATIN1:
00176             s = latin1ToUTF8(rv->text);
00177             break;
00178         default: /* UTF8 */
00179             s = htmlEntityUTF8(rv->text, g);
00180             break;
00181         }
00182         free(rv->text);
00183         rv->text = s;
00184         make_simple_label(g, rv);
00185     }
00186     return rv;
00187 }
00188 
00189 /* free_textpara:
00190  * Free resources related to textpara_t.
00191  * tl is an array of cnt textpara_t's.
00192  * It is also assumed that the text stored in the str field
00193  * is all stored in one large buffer shared by all of the textpara_t,
00194  * so only the first one needs to free its tlp->str.
00195  */
00196 void free_textpara(textpara_t * tl, int cnt)
00197 {
00198     int i;
00199     textpara_t* tlp = tl;
00200 
00201     if (!tl) return;
00202     for (i = 0; i < cnt; i++) { 
00203         if ((i == 0) && tlp->str)
00204             free(tlp->str);
00205         if (tlp->layout && tlp->free_layout)
00206             tlp->free_layout (tlp->layout);
00207         tlp++;
00208     }
00209     free(tl);
00210 }
00211 
00212 void free_label(textlabel_t * p)
00213 {
00214     if (p) {
00215         free(p->text);
00216         if (p->html) {
00217             free_html_label(p->u.html, 1);
00218         } else {
00219             free_textpara(p->u.txt.para, p->u.txt.nparas);
00220         }
00221         free(p);
00222     }
00223 }
00224 
00225 void emit_label(GVJ_t * job, emit_state_t emit_state, textlabel_t * lp)
00226 {
00227     obj_state_t *obj = job->obj;
00228     int i;
00229     pointf p;
00230     emit_state_t old_emit_state;
00231 
00232     old_emit_state = obj->emit_state;
00233     obj->emit_state = emit_state;
00234 
00235     if (lp->html) {
00236         emit_html_label(job, lp->u.html, lp);
00237         obj->emit_state = old_emit_state;
00238         return;
00239     }
00240 
00241     /* make sure that there is something to do */
00242     if (lp->u.txt.nparas < 1)
00243         return;
00244 
00245     gvrender_begin_label(job, LABEL_PLAIN);
00246     gvrender_set_pencolor(job, lp->fontcolor);
00247 
00248     /* position for first para */
00249     switch (lp->valign) {
00250         case 't':
00251             p.y = lp->pos.y + lp->space.y / 2.0 - lp->fontsize;
00252             break;
00253         case 'b':
00254             p.y = lp->pos.y - lp->space.y / 2.0 + lp->dimen.y - lp->fontsize;
00255             break;
00256         case 'c':
00257         default:        
00258             p.y = lp->pos.y + lp->dimen.y / 2.0 - lp->fontsize;
00259             break;
00260     }
00261     for (i = 0; i < lp->u.txt.nparas; i++) {
00262         switch (lp->u.txt.para[i].just) {
00263         case 'l':
00264             p.x = lp->pos.x - lp->space.x / 2.0;
00265             break;
00266         case 'r':
00267             p.x = lp->pos.x + lp->space.x / 2.0;
00268             break;
00269         default:
00270         case 'n':
00271             p.x = lp->pos.x;
00272             break;
00273         }
00274         gvrender_textpara(job, p, &(lp->u.txt.para[i]));
00275 
00276         /* UL position for next para */
00277         p.y -= lp->u.txt.para[i].height;
00278     }
00279 
00280     gvrender_end_label(job);
00281     obj->emit_state = old_emit_state;
00282 }
00283 
00284 /* strdup_and_subst_obj0:
00285  * Replace various escape sequences with the name of the associated
00286  * graph object. A double backslash \\ can be used to avoid a replacement.
00287  * If escBackslash is true, convert \\ to \; else leave alone. All other dyads 
00288  * of the form \. are passed through unchanged.
00289  */
00290 static char *strdup_and_subst_obj0 (char *str, void *obj, int escBackslash)
00291 {
00292     char c, *s, *p, *t, *newstr;
00293     char *tp_str = "", *hp_str = "";
00294     char *g_str = "\\G", *n_str = "\\N", *e_str = "\\E",
00295         *h_str = "\\H", *t_str = "\\T", *l_str = "\\L";
00296     int g_len = 2, n_len = 2, e_len = 2,
00297         h_len = 2, t_len = 2, l_len = 2,
00298         tp_len = 0, hp_len = 0;
00299     int newlen = 0;
00300     int isEdge = 0;
00301     textlabel_t *tl;
00302     port pt;
00303 
00304     /* prepare substitution strings */
00305     switch (agobjkind(obj)) {
00306 #ifndef WITH_CGRAPH
00307         case AGGRAPH:
00308 #else
00309         case AGRAPH:
00310 #endif
00311             g_str = agnameof((graph_t *)obj);
00312             g_len = strlen(g_str);
00313             tl = GD_label((graph_t *)obj);
00314             if (tl) {
00315                 l_str = tl->text;
00316                 if (str) l_len = strlen(l_str);
00317             }
00318             break;
00319         case AGNODE:
00320             g_str = agnameof(agraphof((node_t *)obj));
00321             g_len = strlen(g_str);
00322             n_str = agnameof((node_t *)obj);
00323             n_len = strlen(n_str);
00324             tl = ND_label((node_t *)obj);
00325             if (tl) {
00326                 l_str = tl->text;
00327                 if (str) l_len = strlen(l_str);
00328             }
00329             break;
00330         case AGEDGE:
00331             isEdge = 1;
00332             g_str = agnameof(agroot(agraphof(agtail(((edge_t *)obj)))));
00333             g_len = strlen(g_str);
00334             t_str = agnameof(agtail(((edge_t *)obj)));
00335             t_len = strlen(t_str);
00336             pt = ED_tail_port((edge_t *)obj);
00337             if ((tp_str = pt.name))
00338                 tp_len = strlen(tp_str);
00339             h_str = agnameof(aghead(((edge_t *)obj)));
00340             h_len = strlen(h_str);
00341             pt = ED_head_port((edge_t *)obj);
00342             if ((hp_str = pt.name))
00343                 hp_len = strlen(hp_str);
00344             h_len = strlen(h_str);
00345             tl = ED_label((edge_t *)obj);
00346             if (tl) {
00347                 l_str = tl->text;
00348                 if (str) l_len = strlen(l_str);
00349             }
00350             if (agisdirected(agroot(agraphof(agtail(((edge_t*)obj))))))
00351                 e_str = "->";
00352             else
00353                 e_str = "--";
00354             e_len = t_len + (tp_len?tp_len+1:0) + 2 + h_len + (hp_len?hp_len+1:0);
00355             break;
00356     }
00357 
00358     /* two passes over str.
00359      *
00360      * first pass prepares substitution strings and computes 
00361      * total length for newstring required from malloc.
00362      */
00363     for (s = str; (c = *s++);) {
00364         if (c == '\\') {
00365             switch (c = *s++) {
00366             case 'G':
00367                 newlen += g_len;
00368                 break;
00369             case 'N':
00370                 newlen += n_len;
00371                 break;
00372             case 'E':
00373                 newlen += e_len;
00374                 break;
00375             case 'H':
00376                 newlen += h_len;
00377                 break;
00378             case 'T':
00379                 newlen += t_len;
00380                 break; 
00381             case 'L':
00382                 newlen += l_len;
00383                 break; 
00384             case '\\':
00385                 if (escBackslash) {
00386                     newlen += 1;
00387                     break; 
00388                 }
00389                 /* Fall through */
00390             default:  /* leave other escape sequences unmodified, e.g. \n \l \r */
00391                 newlen += 2;
00392             }
00393         } else {
00394             newlen++;
00395         }
00396     }
00397     /* allocate new string */
00398     newstr = gmalloc(newlen + 1);
00399 
00400     /* second pass over str assembles new string */
00401     for (s = str, p = newstr; (c = *s++);) {
00402         if (c == '\\') {
00403             switch (c = *s++) {
00404             case 'G':
00405                 for (t = g_str; (*p = *t++); p++);
00406                 break;
00407             case 'N':
00408                 for (t = n_str; (*p = *t++); p++);
00409                 break;
00410             case 'E':
00411                 if (isEdge) {
00412                     for (t = t_str; (*p = *t++); p++);
00413                     if (tp_len) {
00414                         *p++ = ':';
00415                         for (t = tp_str; (*p = *t++); p++);
00416                     }
00417                     for (t = e_str; (*p = *t++); p++);
00418                     for (t = h_str; (*p = *t++); p++);
00419                     if (hp_len) {
00420                         *p++ = ':';
00421                         for (t = hp_str; (*p = *t++); p++);
00422                     }
00423                 }
00424                 break;
00425             case 'T':
00426                 for (t = t_str; (*p = *t++); p++);
00427                 break;
00428             case 'H':
00429                 for (t = h_str; (*p = *t++); p++);
00430                 break;
00431             case 'L':
00432                 for (t = l_str; (*p = *t++); p++);
00433                 break;
00434             case '\\':
00435                 if (escBackslash) {
00436                     *p++ = '\\';
00437                     break; 
00438                 }
00439                 /* Fall through */
00440             default:  /* leave other escape sequences unmodified, e.g. \n \l \r */
00441                 *p++ = '\\';
00442                 *p++ = c;
00443                 break;
00444             }
00445         } else {
00446             *p++ = c;
00447         }
00448     }
00449     *p++ = '\0';
00450     return newstr;
00451 }
00452 
00453 /* strdup_and_subst_obj:
00454  * Processes graph object escape sequences; also collapses \\ to \.
00455  */
00456 char *strdup_and_subst_obj(char *str, void *obj)
00457 {
00458     return strdup_and_subst_obj0 (str, obj, 1);
00459 }
00460 
00461 /* return true if *s points to &[A-Za-z]*;      (e.g. &Ccedil; )
00462  *                          or &#[0-9]*;        (e.g. &#38; )
00463  *                          or &#x[0-9a-fA-F]*; (e.g. &#x6C34; )
00464  */
00465 static int xml_isentity(char *s)
00466 {
00467     s++;                        /* already known to be '&' */
00468     if (*s == '#') {
00469         s++;
00470         if (*s == 'x' || *s == 'X') {
00471             s++;
00472             while ((*s >= '0' && *s <= '9')
00473                    || (*s >= 'a' && *s <= 'f')
00474                    || (*s >= 'A' && *s <= 'F'))
00475                 s++;
00476         } else {
00477             while (*s >= '0' && *s <= '9')
00478                 s++;
00479         }
00480     } else {
00481         while ((*s >= 'a' && *s <= 'z')
00482                || (*s >= 'A' && *s <= 'Z'))
00483             s++;
00484     }
00485     if (*s == ';')
00486         return 1;
00487     return 0;
00488 }
00489 
00490 char *xml_string(char *s)
00491 {
00492     static char *buf = NULL;
00493     static int bufsize = 0;
00494     char *p, *sub, *prev = NULL;
00495     int len, pos = 0;
00496 
00497     if (!buf) {
00498         bufsize = 64;
00499         buf = gmalloc(bufsize);
00500     }
00501 
00502     p = buf;
00503     while (s && *s) {
00504         if (pos > (bufsize - 8)) {
00505             bufsize *= 2;
00506             buf = grealloc(buf, bufsize);
00507             p = buf + pos;
00508         }
00509         /* escape '&' only if not part of a legal entity sequence */
00510         if (*s == '&' && !(xml_isentity(s))) {
00511             sub = "&amp;";
00512             len = 5;
00513         }
00514         /* '<' '>' are safe to substitute even if string is already UTF-8 coded
00515          * since UTF-8 strings won't contain '<' or '>' */
00516         else if (*s == '<') {
00517             sub = "&lt;";
00518             len = 4;
00519         }
00520         else if (*s == '>') {
00521             sub = "&gt;";
00522             len = 4;
00523         }
00524         else if (*s == '-') {   /* can't be used in xml comment strings */
00525             sub = "&#45;";
00526             len = 5;
00527         }
00528         else if (*s == ' ' && prev && *prev == ' ') {
00529             /* substitute 2nd and subsequent spaces with required_spaces */
00530             sub = "&#160;";  /* inkscape doesn't recognise &nbsp; */
00531             len = 6;
00532         }
00533         else if (*s == '"') {
00534             sub = "&quot;";
00535             len = 6;
00536         }
00537         else if (*s == '\'') {
00538             sub = "&#39;";
00539             len = 5;
00540         }
00541         else {
00542             sub = s;
00543             len = 1;
00544         }
00545         while (len--) {
00546             *p++ = *sub++;
00547             pos++;
00548         }
00549         prev = s;
00550         s++;
00551     }
00552     *p = '\0';
00553     return buf;
00554 }
00555 
00556 /* a variant of xml_string for urls in hrefs */
00557 char *xml_url_string(char *s)
00558 {
00559     static char *buf = NULL;
00560     static int bufsize = 0;
00561     char *p, *sub, *prev = NULL;
00562     int len, pos = 0;
00563 
00564     if (!buf) {
00565         bufsize = 64;
00566         buf = gmalloc(bufsize);
00567     }
00568 
00569     p = buf;
00570     while (s && *s) {
00571         if (pos > (bufsize - 8)) {
00572             bufsize *= 2;
00573             buf = grealloc(buf, bufsize);
00574             p = buf + pos;
00575         }
00576         /* escape '&' only if not part of a legal entity sequence */
00577         if (*s == '&' && !(xml_isentity(s))) {
00578             sub = "&amp;";
00579             len = 5;
00580         }
00581         /* '<' '>' are safe to substitute even if string is already UTF-8 coded
00582          * since UTF-8 strings won't contain '<' or '>' */
00583         else if (*s == '<') {
00584             sub = "&lt;";
00585             len = 4;
00586         }
00587         else if (*s == '>') {
00588             sub = "&gt;";
00589             len = 4;
00590         }
00591 #if 0
00592         else if (*s == '-') {   /* can't be used in xml comment strings */
00593             sub = "&#45;";
00594             len = 5;
00595         }
00596         else if (*s == ' ' && prev && *prev == ' ') {
00597             /* substitute 2nd and subsequent spaces with required_spaces */
00598             sub = "&#160;";  /* inkscape doesn't recognise &nbsp; */
00599             len = 6;
00600         }
00601 #endif
00602         else if (*s == '"') {
00603             sub = "&quot;";
00604             len = 6;
00605         }
00606         else if (*s == '\'') {
00607             sub = "&#39;";
00608             len = 5;
00609         }
00610         else {
00611             sub = s;
00612             len = 1;
00613         }
00614         while (len--) {
00615             *p++ = *sub++;
00616             pos++;
00617         }
00618         prev = s;
00619         s++;
00620     }
00621     *p = '\0';
00622     return buf;
00623 }