VPR-7.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
ezxml.c
Go to the documentation of this file.
1 /* ezxml.c
2  *
3  * Copyright 2004-2006 Aaron Voisine <aaron@voisine.org>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #ifndef EZXML_NOMMAP
26 #define EZXML_NOMMAP
27 #endif /* EXXML_NOMMAP */
28 
29 /* Ted Campbell, Aug 14 2007 */
30 #if defined(WIN32) || defined(_WIN32)
31 #include <io.h>
32 #endif /* WIN32 */
33 
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <stdarg.h>
37 #include <string.h>
38 #include <ctype.h>
39 
40 /* Ted Campbell, Aug 14 2007 */
41 #if !defined(WIN32) && !defined(_WIN32)
42 #include <unistd.h>
43 #endif
44 
45 #include <sys/types.h>
46 #ifndef EZXML_NOMMAP
47 #include <sys/mman.h>
48 #endif /* EZXML_NOMMAP */
49 #include <sys/stat.h>
50 #include "ezxml.h"
51 
52 /* Ted Campbell, Aug 14, 2007 */
53 #include "util.h"
54 
55 /* Ted Campbell, Aug 14, 2007 */
56 #if defined(WIN32) || defined(_WIN32)
57 #define snprintf _snprintf
58 #define open _open
59 #define read _read
60 #define write _write
61 #define close _close
62 #endif /* WIN32 */
63 
64 #define EZXML_WS "\t\r\n " /* whitespace */
65 char *EZXML_NIL[] = { NULL }; /* empty, null terminated array of strings */
66 
67 static ezxml_t ezxml_vget(ezxml_t xml, va_list ap);
68 static char *ezxml_decode(char *s, char **ent, char t);
69 static void ezxml_open_tag(ezxml_root_t root, int line, char *name, char **attr);
70 static void ezxml_char_content(ezxml_root_t root, char *s,
71  size_t len, char t);
72 static ezxml_t ezxml_close_tag(ezxml_root_t root, char *name, char *s);
73 static int ezxml_ent_ok(char *name, char *s, char **ent);
74 static void ezxml_proc_inst(ezxml_root_t root, char *s, size_t len);
75 static short ezxml_internal_dtd(ezxml_root_t root, char *s,
76  size_t len);
77 static char *ezxml_str2utf8(char **s, size_t * len);
78 static void ezxml_free_attr(char **attr);
79 static char *ezxml_ampencode(const char *s, size_t len, char **dst,
80  size_t * dlen, size_t * max, short a);
81 static char *ezxml_toxml_r(ezxml_t xml, char **s, size_t * len, size_t * max,
82  size_t start, char ***attr);
83 
84 /* returns the first child tag with the given name or NULL if not found */
85 ezxml_t ezxml_child(ezxml_t xml, const char *name) {
86  xml = (xml) ? xml->child : NULL;
87  while (xml && strcmp(name, xml->name))
88  xml = xml->sibling;
89  return xml;
90 }
91 
92 /* returns the Nth tag with the same name in the same subsection or NULL if not */
93 /* found */
94 ezxml_t ezxml_idx(ezxml_t xml, int idx) {
95  for (; xml && idx; idx--)
96  xml = xml->next;
97  return xml;
98 }
99 
100 /* returns the value of the requested tag attribute or NULL if not found */
101 const char *
102 ezxml_attr(ezxml_t xml, const char *attr) {
103  int i = 0, j = 1;
104  ezxml_root_t root = (ezxml_root_t) xml;
105 
106  if (!xml || !xml->attr)
107  return NULL;
108  while (xml->attr[i] && strcmp(attr, xml->attr[i]))
109  i += 2;
110  if (xml->attr[i])
111  return xml->attr[i + 1]; /* found attribute */
112 
113  while (root->xml.parent)
114  root = (ezxml_root_t) root->xml.parent; /* root tag */
115  for (i = 0; root->attr[i] && strcmp(xml->name, root->attr[i][0]); i++)
116  ;
117  if (!root->attr[i])
118  return NULL; /* no matching default attributes */
119  while (root->attr[i][j] && strcmp(attr, root->attr[i][j]))
120  j += 3;
121  return (root->attr[i][j]) ? root->attr[i][j + 1] : NULL; /* found default */
122 }
123 
124 /* same as ezxml_get but takes an already initialized va_list */
125 ezxml_t ezxml_vget(ezxml_t xml, va_list ap) {
126  char *name = va_arg(ap, char *);
127  int idx = -1;
128 
129  if (name && *name) {
130  idx = va_arg(ap, int);
131 
132  xml = ezxml_child(xml, name);
133  }
134  return (idx < 0) ? xml : ezxml_vget(ezxml_idx(xml, idx), ap);
135 }
136 
137 /* Traverses the xml tree to retrieve a specific subtag. Takes a variable */
138 /* length list of tag names and indexes. The argument list must be terminated */
139 /* by either an index of -1 or an empty string tag name. Example: */
140 /* title = ezxml_get(library, "shelf", 0, "book", 2, "title", -1); */
141 /* This retrieves the title of the 3rd book on the 1st shelf of library. */
142 /* Returns NULL if not found. */
144  va_list ap;
145  ezxml_t r;
146 
147  va_start(ap, xml);
148  r = ezxml_vget(xml, ap);
149  va_end(ap);
150  return r;
151 }
152 
153 /* returns a null terminated array of processing instructions for the given */
154 /* target */
155 char **
156 ezxml_pi(ezxml_t xml, const char *target) {
157  ezxml_root_t root = (ezxml_root_t) xml;
158  int i = 0;
159 
160  if (!root)
161  return EZXML_NIL;
162  while (root->xml.parent)
163  root = (ezxml_root_t) root->xml.parent; /* root tag */
164  while (root->pi[i] && strcmp(target, root->pi[i][0]))
165  i++; /* find target */
166  return ((root->pi[i]) ? root->pi[i] + 1 : EZXML_NIL);
167 }
168 
169 /* set an error string and return root */
170 static ezxml_t ezxml_err(ezxml_root_t root, char *s, const char *err, ...) {
171  va_list ap;
172  int line = 1;
173  char *t, fmt[EZXML_ERRL];
174 
175  for (t = root->s; t < s; t++)
176  if (*t == '\n')
177  line++;
178  snprintf(fmt, EZXML_ERRL, "[error near line %d]: %s", line, err);
179 
180  va_start(ap, err);
181  vsnprintf(root->err, EZXML_ERRL, fmt, ap);
182  va_end(ap);
183 
184  return &root->xml;
185 }
186 
187 /* Recursively decodes entity and character references and normalizes new lines */
188 /* ent is a null terminated array of alternating entity names and values. set t */
189 /* to '&' for general entity decoding, '%' for parameter entity decoding, 'c' */
190 /* for cdata sections, ' ' for attribute normalization, or '*' for non-cdata */
191 /* attribute normalization. Returns s, or if the decoded string is longer than */
192 /* s, returns a malloced string that must be freed. */
193 /* Jason Luu June 22, 2010, Added line number support */
194 static char *
195 ezxml_decode(char *s, char **ent, char t) {
196  char *e, *r = s, *m = s;
197  long b, c, d, l;
198 
199  for (; *s; s++) { /* normalize line endings */
200  while (*s == '\r') {
201  *(s++) = '\n';
202  if (*s == '\n') {
203  memmove(s, (s + 1), strlen(s));
204  }
205  }
206  }
207 
208  for (s = r;;) {
209  while (*s && *s != '&' && (*s != '%' || t != '%') && !isspace(*s))
210  s++;
211  if (!*s)
212  break;
213  else if (t != 'c' && !strncmp(s, "&#", 2)) { /* character reference */
214  if (s[2] == 'x')
215  c = strtol(s + 3, &e, 16); /* base 16 */
216  else
217  c = strtol(s + 2, &e, 10); /* base 10 */
218  if (!c || *e != ';') {
219  s++;
220  continue;
221  }
222  /* not a character ref */
223  if (c < 0x80)
224  *(s++) = (char) c; /* US-ASCII subset */
225  else { /* multi-byte UTF-8 sequence */
226  for (b = 0, d = c; d; d /= 2)
227  b++; /* number of bits in c */
228  b = (b - 2) / 5; /* number of bytes in payload */
229  *(s++) = (char)((0xFF << (7 - b)) | (c >> (6 * b))); /* head */
230  while (b)
231  *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F); /* payload */
232  }
233 
234  memmove(s, strchr(s, ';') + 1, strlen(strchr(s, ';')));
235  } else if ((*s == '&' && (t == '&' || t == ' ' || t == '*'))
236  || (*s == '%' && t == '%')) { /* entity reference */
237  for (b = 0; ent[b] && strncmp(s + 1, ent[b], strlen(ent[b])); b +=
238  2)
239  ; /* find entity in entity list */
240 
241  if (ent[b++]) { /* found a match */
242  if ((c = strlen(ent[b])) - 1 > (e = strchr(s, ';')) - s) {
243  l = (d = (s - r)) + c + strlen(e); /* new length */
244  r = (r == m) ? strcpy((char*)malloc(l), r) : (char*)realloc(r, l);
245  e = strchr((s = r + d), ';'); /* fix up pointers */
246  }
247 
248  memmove(s + c, e + 1, strlen(e)); /* shift rest of string */
249  strncpy(s, ent[b], c); /* copy in replacement text */
250  } else
251  s++; /* not a known entity */
252  } else if ((t == ' ' || t == '*') && isspace(*s))
253  *(s++) = ' ';
254  else
255  s++; /* no decoding needed */
256  }
257 
258  if (t == '*') { /* normalize spaces for non-cdata attributes */
259  for (s = r; *s; s++) {
260  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
261  l = strspn(s, " ");
262  if (l)
263  memmove(s, s + l, strlen(s + l) + 1);
264  while (*s && *s != ' ')
265  s++;
266  }
267  if (--s >= r && *s == ' ')
268  *s = '\0'; /* trim any trailing space */
269  }
270  return r;
271 }
272 
273 /* called when parser finds start of new tag */
274 /* Jason Luu June 22, 2010, Added line number support */
275 static void ezxml_open_tag(ezxml_root_t root, int line, char *name, char **attr) {
276  ezxml_t xml = root->cur;
277 
278  if (xml->name)
279  xml = ezxml_add_child(xml, name, strlen(xml->txt));
280  else
281  xml->name = name; /* first open tag */
282  xml->line = line;
283  xml->attr = attr;
284 
285  root->cur = xml; /* update tag insertion point */
286 }
287 
288 /* called when parser finds character content between open and closing tag */
289 /* Jason Luu June 22, 2010, Added line number support */
290 static void ezxml_char_content(ezxml_root_t root, char *s,
291  size_t len, char t) {
292  ezxml_t xml = root->cur;
293  char *m = s;
294  size_t l;
295 
296  if (!xml || !xml->name || !len)
297  return; /* sanity check */
298 
299  s[len] = '\0'; /* null terminate text (calling functions anticipate this) */
300  len = strlen(s = ezxml_decode(s, root->ent, t)) + 1;
301 
302  if (!*(xml->txt))
303  xml->txt = s; /* initial character content */
304  else { /* allocate our own memory and make a copy */
305  xml->txt = (xml->flags & EZXML_TXTM) /* allocate some space */
306  ? (char*)realloc(xml->txt, (l = strlen(xml->txt)) + len) : strcpy((char*)malloc((l =
307  strlen(xml->txt)) + len), xml->txt);
308  strcpy(xml->txt + l, s); /* add new char content */
309  if (s != m)
310  free(s); /* free s if it was malloced by ezxml_decode() */
311  }
312 
313  if (xml->txt != m)
315 }
316 
317 /* called when parser finds closing tag */
318 static ezxml_t ezxml_close_tag(ezxml_root_t root, char *name, char *s) {
319  if (!root->cur || !root->cur->name || strcmp(name, root->cur->name))
320  return ezxml_err(root, s, "unexpected closing tag </%s>", name);
321 
322  root->cur = root->cur->parent;
323  return NULL;
324 }
325 
326 /* checks for circular entity references, returns non-zero if no circular */
327 /* references are found, zero otherwise */
328 static int ezxml_ent_ok(char *name, char *s, char **ent) {
329  int i;
330 
331  for (;; s++) {
332  while (*s && *s != '&')
333  s++; /* find next entity reference */
334  if (!*s)
335  return 1;
336  if (!strncmp(s + 1, name, strlen(name)))
337  return 0; /* circular ref. */
338  for (i = 0; ent[i] && strncmp(ent[i], s + 1, strlen(ent[i])); i += 2)
339  ;
340  if (ent[i] && !ezxml_ent_ok(name, ent[i + 1], ent))
341  return 0;
342  }
343 }
344 
345 /* called when the parser finds a processing instruction */
346 static void ezxml_proc_inst(ezxml_root_t root, char *s, size_t len) {
347  int i = 0, j = 1;
348  char *target = s;
349 
350  s[len] = '\0'; /* null terminate instruction */
351  if (*(s += strcspn(s, EZXML_WS))) {
352  *s = '\0'; /* null terminate target */
353  s += strspn(s + 1, EZXML_WS) + 1; /* skip whitespace after target */
354  }
355 
356  if (!strcmp(target, "xml")) { /* <?xml ... ?> */
357  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
358  s = strstr(s, "standalone");
359  if (s && !strncmp(s + strspn(s + 10, EZXML_WS "='\"") + 10, "yes", 3))
360  root->standalone = 1;
361  return;
362  }
363 
364  if (!root->pi[0])
365  *(root->pi = (char***)malloc(sizeof(char **))) = NULL; /*first pi */
366 
367  while (root->pi[i] && strcmp(target, root->pi[i][0]))
368  i++; /* find target */
369  if (!root->pi[i]) { /* new target */
370  root->pi = (char***)realloc(root->pi, sizeof(char **) * (i + 2));
371  root->pi[i] = (char**)malloc(sizeof(char *) * 3);
372  root->pi[i][0] = target;
373  root->pi[i][1] = (char *) (root->pi[i + 1] = NULL); /* terminate pi list */
374  /* Ted Campbell, Aug 14, 2007. Changed to use 'my_strdup' */
375  root->pi[i][2] = my_strdup(""); /* empty document position list */
376  }
377 
378  while (root->pi[i][j])
379  j++; /* find end of instruction list for this target */
380  root->pi[i] = (char**)realloc(root->pi[i], sizeof(char *) * (j + 3));
381  root->pi[i][j + 2] = (char*)realloc(root->pi[i][j + 1], j + 1);
382  strcpy(root->pi[i][j + 2] + j - 1, (root->xml.name) ? ">" : "<");
383  root->pi[i][j + 1] = NULL; /* null terminate pi list for this target */
384  root->pi[i][j] = s; /* set instruction */
385 }
386 
387 /* called when the parser finds an internal doctype subset */
388 /* Jason Luu June 22, 2010, Added line number support */
389 static short ezxml_internal_dtd(ezxml_root_t root, char *s,
390  size_t len) {
391  char q, *c, *t, *n = NULL, *v, **ent, **pe;
392  char temp[] = {'\0','\0','\0'};
393  int i, j;
394 
395  pe = (char**)memcpy(malloc(sizeof(EZXML_NIL)), EZXML_NIL, sizeof(EZXML_NIL));
396 
397  for (s[len] = '\0'; s;) {
398  while (*s && *s != '<' && *s != '%')
399  s++; /* find next declaration */
400 
401  if (!*s)
402  break;
403  else if (!strncmp(s, "<!ENTITY", 8)) { /* parse entity definitions */
404  c = s += strspn(s + 8, EZXML_WS) + 8; /* skip white space separator */
405  n = s + strspn(s, EZXML_WS "%"); /* find name */
406  *(s = n + strcspn(n, EZXML_WS)) = ';'; /* append ; to name */
407 
408  v = s + strspn(s + 1, EZXML_WS) + 1; /* find value */
409  if ((q = *(v++)) != '"' && q != '\'') { /* skip externals */
410  s = strchr(s, '>');
411  continue;
412  }
413 
414  for (i = 0, ent = (*c == '%') ? pe : root->ent; ent[i]; i++)
415  ;
416  ent = (char**)realloc(ent, (i + 3) * sizeof(char *)); /* space for next ent */
417  if (*c == '%')
418  pe = ent;
419  else
420  root->ent = ent;
421 
422  *(++s) = '\0'; /* null terminate name */
423  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
424  s = strchr(v, q);
425  if (s)
426  *(s++) = '\0'; /* null terminate value */
427  ent[i + 1] = ezxml_decode(v, pe, '%'); /* set value */
428  ent[i + 2] = NULL; /* null terminate entity list */
429  if (!ezxml_ent_ok(n, ent[i + 1], ent)) { /* circular reference */
430  if (ent[i + 1] != v)
431  free(ent[i + 1]);
432  ezxml_err(root, v, "circular entity declaration &%s", n);
433  break;
434  } else
435  ent[i] = n; /* set entity name */
436  } else if (!strncmp(s, "<!ATTLIST", 9)) { /* parse default attributes */
437  t = s + strspn(s + 9, EZXML_WS) + 9; /* skip whitespace separator */
438  if (!*t) {
439  ezxml_err(root, t, "unclosed <!ATTLIST");
440  break;
441  }
442  if (*(s = t + strcspn(t, EZXML_WS ">")) == '>')
443  continue;
444  else
445  *s = '\0'; /* null terminate tag name */
446  for (i = 0; root->attr[i] && strcmp(n, root->attr[i][0]); i++)
447  ;
448 
449  ++s;
450  while (*(n = s + strspn(s, EZXML_WS)) && *n != '>') {
451  if (*(s = n + strcspn(n, EZXML_WS)))
452  *s = '\0'; /* attr name */
453  else {
454  ezxml_err(root, t, "malformed <!ATTLIST");
455  break;
456  }
457 
458  s += strspn(s + 1, EZXML_WS) + 1; /* find next token */
459  if ((strncmp(s, "CDATA", 5))) {
460  temp[0] = '*';
461  } else {
462  temp[0]=' ';
463  }
464  c = temp; /* is it cdata? */
465  if (!strncmp(s, "NOTATION", 8))
466  s += strspn(s + 8, EZXML_WS) + 8;
467  s = (*s == '(') ? strchr(s, ')') : s + strcspn(s, EZXML_WS);
468  if (!s) {
469  ezxml_err(root, t, "malformed <!ATTLIST");
470  break;
471  }
472 
473  s += strspn(s, EZXML_WS ")"); /* skip white space separator */
474  if (!strncmp(s, "#FIXED", 6))
475  s += strspn(s + 6, EZXML_WS) + 6;
476  if (*s == '#') { /* no default value */
477  s += strcspn(s, EZXML_WS ">") - 1;
478  if (*c == ' ')
479  continue; /* cdata is default, nothing to do */
480  v = NULL;
481  } else {
482  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
483  s = strchr(v = s + 1, *s);
484  if ((*s == '"' || *s == '\'') && /* default value */
485  s)
486  *s = '\0';
487  else {
488  ezxml_err(root, t, "malformed <!ATTLIST");
489  break;
490  }
491  }
492 
493  if (!root->attr[i]) { /* new tag name */
494  root->attr =
495  (!i) ? (char***)malloc(2 * sizeof(char **)) : (char***)realloc(
496  root->attr,
497  (i + 2) * sizeof(char **));
498  root->attr[i] = (char**)malloc(2 * sizeof(char *));
499  root->attr[i][0] = t; /* set tag name */
500  root->attr[i][1] = (char *) (root->attr[i + 1] = NULL);
501  }
502 
503  for (j = 1; root->attr[i][j]; j += 3)
504  ; /* find end of list */
505  root->attr[i] = (char**)realloc(root->attr[i],
506  (j + 4) * sizeof(char *));
507 
508  root->attr[i][j + 3] = NULL; /* null terminate list */
509  root->attr[i][j + 2] = c; /* is it cdata? */
510  root->attr[i][j + 1] =
511  (v) ? ezxml_decode(v, root->ent, *c) : NULL;
512  root->attr[i][j] = n; /* attribute name */
513 
514  ++s;
515  }
516  } else if (!strncmp(s, "<!--", 4)) {
517  s = strstr(s + 4, "-->"); /* comments */
518  }
519  else if (!strncmp(s, "<?", 2)) { /* processing instructions */
520  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
521  s = strstr(c = s + 2, "?>");
522  if (s)
523  ezxml_proc_inst(root, c, s++ - c);
524  } else if (*s == '<')
525  s = strchr(s, '>'); /* skip other declarations */
526  else if (*(s++) == '%' && !root->standalone)
527  break;
528  }
529 
530  free(pe);
531  return !*root->err;
532 }
533 
534 /* Converts a UTF-16 string to UTF-8. Returns a new string that must be freed */
535 /* or NULL if no conversion was needed. */
536 static char *
537 ezxml_str2utf8(char **s, size_t * len) {
538  char *u;
539  size_t l = 0, sl, max = *len;
540  long c, d;
541  int b, be = (**s == '\xFE') ? 1 : (**s == '\xFF') ? 0 : -1;
542 
543  if (be == -1)
544  return NULL; /* not UTF-16 */
545 
546  u = (char*)malloc(max);
547  for (sl = 2; sl < *len - 1; sl += 2) {
548  c = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] & 0xFF) /*UTF-16BE */
549  :
550  (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF); /*UTF-16LE */
551  if (c >= 0xD800 && c <= 0xDFFF && (sl += 2) < *len - 1) { /* high-half */
552  d = (be) ?
553  (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] & 0xFF) :
554  (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF);
555  c = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000;
556  }
557 
558  while (l + 6 > max)
559  u = (char*)realloc(u, max += EZXML_BUFSIZE);
560  if (c < 0x80)
561  u[l++] = (char)c; /* US-ASCII subset */
562  else { /* multi-byte UTF-8 sequence */
563  for (b = 0, d = c; d; d /= 2)
564  b++; /* bits in c */
565  b = (b - 2) / 5; /* bytes in payload */
566  u[l++] = (char)((0xFF << (7 - b)) | (c >> (6 * b))); /* head */
567  while (b)
568  u[l++] = 0x80 | ((c >> (6 * --b)) & 0x3F); /* payload */
569  }
570  }
571  return *s = (char*)realloc(u, *len = l);
572 }
573 
574 /* frees a tag attribute list */
575 static void ezxml_free_attr(char **attr) {
576  int i = 0;
577  char *m;
578 
579  if (!attr || attr == EZXML_NIL)
580  return; /* nothing to free */
581  while (attr[i])
582  i += 2; /* find end of attribute list */
583  m = attr[i + 1]; /* list of which names and values are malloced */
584  for (i = 0; m[i]; i++) {
585  if (m[i] & EZXML_NAMEM)
586  free(attr[i * 2]);
587  if (m[i] & EZXML_TXTM)
588  free(attr[(i * 2) + 1]);
589  }
590  free(m);
591  free(attr);
592 }
593 
594 /* parse the given xml string and return an ezxml structure */
595 /* Jason Luu June 22, 2010, Added line number support */
596 ezxml_t ezxml_parse_str(char *s, size_t len) {
597  ezxml_root_t root = (ezxml_root_t) ezxml_new(0);
598  char q, e, *d, *temp, **attr, **a = NULL; /* initialize a to avoid compile warning */
599  int l, i, j;
600  int line = 1;
601 
602  root->m = s;
603  if (!len)
604  return ezxml_err(root, NULL, "root tag missing");
605  root->u = ezxml_str2utf8(&s, &len); /* convert utf-16 to utf-8 */
606  root->e = (root->s = s) + len; /* record start and end of work area */
607 
608  e = s[len - 1]; /* save end char */
609  s[len - 1] = '\0'; /* turn end char into null terminator */
610 
611  while (*s && *s != '<')
612  s++; /* find first tag */
613  if (!*s)
614  return ezxml_err(root, s, "root tag missing");
615 
616  for (;;) {
617  attr = (char **) EZXML_NIL;
618  d = ++s;
619 
620  if (isalpha(*s) || *s == '_' || *s == ':' || *s < '\0') { /* new tag */
621  if (!root->cur)
622  return ezxml_err(root, d, "markup outside of root element");
623 
624  s += strcspn(s, EZXML_WS "/>");
625  while (isspace(*s)) {
626  if (*s == '\n')
627  line++;
628  *(s++) = '\0'; /* null terminate tag name */
629  }
630 
631  if (*s && *s != '/' && *s != '>') { /* find tag in default attr list */
632  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
633  a = root->attr[0];
634  for (i = 0; a && strcmp(a[0], d); i++) {
635  a = root->attr[i];
636  }
637  }
638 
639  for (l = 0; *s && *s != '/' && *s != '>'; l += 2) { /* new attrib */
640  attr = (l) ?
641  (char**)realloc(attr, (l + 4) * sizeof(char *)) :
642  (char**)malloc(4 * sizeof(char *)); /* allocate space */
643  attr[l + 3] =
644  (l) ? (char*)realloc(attr[l + 1], (l / 2) + 2) : (char*)malloc(2); /* mem for list of maloced vals */
645  strcpy(attr[l + 3] + (l / 2), " "); /* value is not malloced */
646  attr[l + 2] = NULL; /* null terminate list */
647  attr[l + 1] = ""; /* temporary attribute value */
648  attr[l] = s; /* set attribute name */
649 
650  s += strcspn(s, EZXML_WS "=/>");
651  if (*s == '=' || isspace(*s)) {
652  if (*s == '\n')
653  line++;
654  *(s++) = '\0'; /* null terminate tag attribute name */
655  q = *(s += strspn(s, EZXML_WS "="));
656  if (q == '"' || q == '\'') { /* attribute value */
657  attr[l + 1] = ++s;
658  while (*s && *s != q)
659  s++;
660  if (*s)
661  *(s++) = '\0'; /* null terminate attribute val */
662  else {
663  ezxml_free_attr(attr);
664  return ezxml_err(root, d, "missing %c", q);
665  }
666 
667  for (j = 1; a && a[j] && strcmp(a[j], attr[l]); j += 3)
668  ;
669  attr[l + 1] = ezxml_decode(attr[l + 1],
670  root->ent, (a && a[j]) ? *a[j + 2] : ' ');
671  if (attr[l + 1] < d || attr[l + 1] > s)
672  attr[l + 3][l / 2] = EZXML_TXTM; /* value malloced */
673  }
674  }
675  while (isspace(*s)) {
676  if (*s == '\n')
677  line++;
678  s++;
679  }
680  }
681 
682  if (*s == '/') { /* self closing tag */
683  *(s++) = '\0';
684  if ((*s && *s != '>') || (!*s && e != '>')) {
685  if (l)
686  ezxml_free_attr(attr);
687  return ezxml_err(root, d, "missing >");
688  }
689  ezxml_open_tag(root, line, d, attr);
690  ezxml_close_tag(root, d, s);
691  } else if ((q = *s) == '>' || (!*s && e == '>')) { /* open tag */
692  *s = '\0'; /* temporarily null terminate tag name */
693  ezxml_open_tag(root, line, d, attr);
694  *s = q;
695  } else {
696  if (l)
697  ezxml_free_attr(attr);
698  return ezxml_err(root, d, "missing >");
699  }
700  } else if (*s == '/') { /* close tag */
701  s += strcspn(d = s + 1, EZXML_WS ">") + 1;
702  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
703  q = *s;
704  if (!q && e != '>')
705  return ezxml_err(root, d, "missing >");
706  *s = '\0'; /* temporarily null terminate tag name */
707  if (ezxml_close_tag(root, d, s))
708  return &root->xml;
709  if (isspace(*s = q)) {
710  if (*s == '\n')
711  line++;
712  s += strspn(s, EZXML_WS);
713  }
714  } else if (!strncmp(s, "!--", 3)) { /* xml comment */
715  temp = s;
716  s = strstr(s + 3, "--");
717  if (!s || (*(s += 2) != '>' && *s) || (!*s && e != '>'))
718  return ezxml_err(root, d, "unclosed <!--");
719  while (temp != s && *temp != '\0') {
720  if (*temp == '\n') {
721  line++;
722  }
723  temp++;
724  }
725  } else if (!strncmp(s, "![CDATA[", 8)) { /* cdata */
726  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
727  s = strstr(s, "]]>");
728  if (s)
729  ezxml_char_content(root, d + 8, (s += 2) - d - 10, 'c');
730  else
731  return ezxml_err(root, d, "unclosed <![CDATA[");
732  } else if (!strncmp(s, "!DOCTYPE", 8)) { /* dtd */
733  for (l = 0;
734  *s
735  && ((!l && *s != '>')
736  || (l
737  && (*s != ']'
738  || *(s
739  + strspn(s + 1,
740  EZXML_WS)
741  + 1) != '>')));
742  l = (*s == '[') ? 1 : l)
743  s += strcspn(s + 1, "[]>") + 1;
744  if (!*s && e != '>')
745  return ezxml_err(root, d, "unclosed <!DOCTYPE");
746  d = (l) ? strchr(d, '[') + 1 : d;
747  if (l && !ezxml_internal_dtd(root, d, s++ - d))
748  return &root->xml;
749  } else if (*s == '?') { /* <?...?> processing instructions */
750  do {
751  s = strchr(s, '?');
752  } while (s && *(++s) && *s != '>');
753  if (!s || (!*s && e != '>'))
754  return ezxml_err(root, d, "unclosed <?");
755  else
756  ezxml_proc_inst(root, d + 1, s - d - 2);
757  } else
758  return ezxml_err(root, d, "unexpected <");
759 
760  if (!s || !*s)
761  break;
762  *s = '\0';
763  d = ++s;
764  if (*s && *s != '<') { /* tag character content */
765  while (*s && *s != '<') {
766  if (*s == '\n') {
767  line++;
768  }
769  s++;
770  }
771  if (*s)
772  ezxml_char_content(root, d, s - d, '&');
773  else
774  break;
775  } else if (!*s)
776  break;
777  }
778 
779  if (!root->cur)
780  return &root->xml;
781  else if (!root->cur->name)
782  return ezxml_err(root, d, "root tag missing");
783  else
784  return ezxml_err(root, d, "unclosed tag <%s>", root->cur->name);
785 }
786 
787 /* Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire */
788 /* stream into memory and then parses it. For xml files, use ezxml_parse_file() */
789 /* or ezxml_parse_fd() */
791  ezxml_root_t root;
792  size_t l, len = 0;
793  char *s;
794 
795  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
796  s = (char*)malloc(EZXML_BUFSIZE);
797  if (!s)
798  return NULL;
799  do {
800  len += (l = fread((s + len), 1, EZXML_BUFSIZE, fp));
801  if (l == EZXML_BUFSIZE)
802  s = (char*)realloc(s, len + EZXML_BUFSIZE);
803  } while (s && l == EZXML_BUFSIZE);
804 
805  if (!s)
806  return NULL;
807  root = (ezxml_root_t) ezxml_parse_str(s, len);
808  /* Ted Campbell, Aug 14, 2007. Added explicit cast. */
809  root->len = (size_t) (-1); /* so we know to free s in ezxml_free() */
810  return &root->xml;
811 }
812 
813 /* A wrapper for ezxml_parse_str() that accepts a file descriptor. First */
814 /* attempts to mem map the file. Failing that, reads the file into memory. */
815 /* Returns NULL on failure. */
817  ezxml_root_t root;
818  struct stat st;
819  size_t l;
820  void *m;
821 
822  if (fd < 0)
823  return NULL;
824  fstat(fd, &st);
825 
826 #ifndef EZXML_NOMMAP
827  l = (st.st_size + sysconf(_SC_PAGESIZE) - 1) & ~(sysconf(_SC_PAGESIZE) - 1);
828  if ((m = mmap(NULL, l, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0))
829  != MAP_FAILED) {
830  madvise(m, l, MADV_SEQUENTIAL); /* optimize for sequential access */
831  root = (ezxml_root_t) ezxml_parse_str((char*)m, st.st_size);
832  madvise(m, root->len = l, MADV_NORMAL); /* put it back to normal */
833  } else { /* mmap failed, read file into memory */
834 #endif /* EZXML_NOMMAP */
835  l = read(fd, m = malloc(st.st_size), st.st_size);
836  root = (ezxml_root_t) ezxml_parse_str((char*)m, l);
837  /* Ted Campbell, Aug 14, 2007. Added explicit cast. */
838  root->len = (size_t) (-1); /* so we know to free s in ezxml_free() */
839 #ifndef EZXML_NOMMAP
840  }
841 #endif /* EZXML_NOMMAP */
842  return &root->xml;
843 }
844 
845 /* a wrapper for ezxml_parse_fd that accepts a file name */
846 ezxml_t ezxml_parse_file(const char *file) {
847  int fd = open(file, O_RDONLY, 0);
848  ezxml_t xml = ezxml_parse_fd(fd);
849 
850  if (fd >= 0)
851  close(fd);
852  return xml;
853 }
854 
855 /* Encodes ampersand sequences appending the results to *dst, reallocating *dst */
856 /* if length excedes max. a is non-zero for attribute encoding. Returns *dst */
857 static char *
858 ezxml_ampencode(const char *s, size_t len, char **dst, size_t * dlen,
859  size_t * max, short a) {
860  const char *e;
861 
862  for (e = s + len; s != e; s++) {
863  while (*dlen + 10 > *max)
864  *dst = (char*)realloc(*dst, *max += EZXML_BUFSIZE);
865 
866  switch (*s) {
867  case '\0':
868  return *dst;
869  case '&':
870  *dlen += sprintf(*dst + *dlen, "&amp;");
871  break;
872  case '<':
873  *dlen += sprintf(*dst + *dlen, "&lt;");
874  break;
875  case '>':
876  *dlen += sprintf(*dst + *dlen, "&gt;");
877  break;
878  case '"':
879  *dlen += sprintf(*dst + *dlen, (a) ? "&quot;" : "\"");
880  break;
881  case '\n':
882  *dlen += sprintf(*dst + *dlen, (a) ? "&#xA;" : "\n");
883  break;
884  case '\t':
885  *dlen += sprintf(*dst + *dlen, (a) ? "&#x9;" : "\t");
886  break;
887  case '\r':
888  *dlen += sprintf(*dst + *dlen, "&#xD;");
889  break;
890  default:
891  (*dst)[(*dlen)++] = *s;
892  }
893  }
894  return *dst;
895 }
896 
897 /* Recursively converts each tag to xml appending it to *s. Reallocates *s if */
898 /* its length excedes max. start is the location of the previous tag in the */
899 /* parent tag's character content. Returns *s. */
900 static char *
901 ezxml_toxml_r(ezxml_t xml, char **s, size_t * len, size_t * max, size_t start,
902  char ***attr) {
903  int i, j;
904  const char *txt = (xml->parent) ? xml->parent->txt : "";
905  size_t off = 0;
906 
907  /* parent character content up to this tag */
908  *s = ezxml_ampencode(txt + start, xml->off - start, s, len, max, 0);
909 
910  while (*len + strlen(xml->name) + 4 > *max) /* reallocate s */
911  *s = (char*)realloc(*s, *max += EZXML_BUFSIZE);
912 
913  *len += sprintf(*s + *len, "<%s", xml->name); /* open tag */
914  for (i = 0; xml->attr[i]; i += 2) { /* tag attributes */
915  if (ezxml_attr(xml, xml->attr[i]) != xml->attr[i + 1])
916  continue;
917  while (*len + strlen(xml->attr[i]) + 7 > *max) /* reallocate s */
918  *s = (char*)realloc(*s, *max += EZXML_BUFSIZE);
919 
920  *len += sprintf(*s + *len, " %s=\"", xml->attr[i]);
921  /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */
922  ezxml_ampencode(xml->attr[i + 1], (size_t) (-1), s, len, max, 1);
923  *len += sprintf(*s + *len, "\"");
924  }
925 
926  for (i = 0; attr[i] && strcmp(attr[i][0], xml->name); i++)
927  ;
928  for (j = 1; attr[i] && attr[i][j]; j += 3) { /* default attributes */
929  if (!attr[i][j + 1] || ezxml_attr(xml, attr[i][j]) != attr[i][j + 1])
930  continue; /* skip duplicates and non-values */
931  while (*len + strlen(attr[i][j]) + 7 > *max) /* reallocate s */
932  *s = (char*)realloc(*s, *max += EZXML_BUFSIZE);
933 
934  *len += sprintf(*s + *len, " %s=\"", attr[i][j]);
935  /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */
936  ezxml_ampencode(attr[i][j + 1], (size_t) (-1), s, len, max, 1);
937  *len += sprintf(*s + *len, "\"");
938  }
939  *len += sprintf(*s + *len, ">");
940 
941  /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */
942  *s = (xml->child) ?
943  ezxml_toxml_r(xml->child, s, len, max, 0, attr) /*child */
944  :
945  ezxml_ampencode(xml->txt, (size_t) (-1), s, len, max, 0); /*data */
946 
947  while (*len + strlen(xml->name) + 4 > *max) /* reallocate s */
948  *s = (char*)realloc(*s, *max += EZXML_BUFSIZE);
949 
950  *len += sprintf(*s + *len, "</%s>", xml->name); /* close tag */
951 
952  while (txt[off] && off < xml->off)
953  off++; /* make sure off is within bounds */
954  /* Ted Campbell, Aug 14, 2007. Added explicit cast to size_t. */
955  return (xml->ordered) ?
956  ezxml_toxml_r(xml->ordered, s, len, max, off, attr) :
957  ezxml_ampencode(txt + off, (size_t) (-1), s, len, max, 0);
958 }
959 
960 /* Converts an ezxml structure back to xml. Returns a string of xml data that */
961 /* must be freed. */
962 char *
964  ezxml_t p = (xml) ? xml->parent : NULL, o = (xml) ? xml->ordered : NULL;
965  ezxml_root_t root = (ezxml_root_t) xml;
966  size_t len = 0, max = EZXML_BUFSIZE;
967  char *s = strcpy((char*)malloc(max), ""), *t, *n;
968  int i, j, k;
969 
970  if (!xml || !xml->name)
971  return (char*)realloc(s, len + 1);
972  while (root->xml.parent)
973  root = (ezxml_root_t) root->xml.parent; /* root tag */
974 
975  for (i = 0; !p && root->pi[i]; i++) { /* pre-root processing instructions */
976  for (k = 2; root->pi[i][k - 1]; k++)
977  ;
978  for (j = 1; root->pi[i][j]; j++) {
979  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
980  n = root->pi[i][j];
981  if (root->pi[i][k][j - 1] == '>')
982  continue; /* not pre-root */
983  while (len + strlen(t = root->pi[i][0]) + strlen(n) + 7 > max)
984  s = (char*)realloc(s, max += EZXML_BUFSIZE);
985  len += sprintf(s + len, "<?%s%s%s?>\n", t, *n ? " " : "", n);
986  }
987  }
988 
989  xml->parent = xml->ordered = NULL;
990  s = ezxml_toxml_r(xml, &s, &len, &max, 0, root->attr);
991  xml->parent = p;
992  xml->ordered = o;
993 
994  for (i = 0; !p && root->pi[i]; i++) { /* post-root processing instructions */
995  for (k = 2; root->pi[i][k - 1]; k++)
996  ;
997  for (j = 1; root->pi[i][j]; j++) {
998  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
999  n = root->pi[i][j];
1000  if (root->pi[i][k][j - 1] == '<')
1001  continue; /* not post-root */
1002  while (len + strlen(t = root->pi[i][0]) + strlen(n) + 7 > max)
1003  s = (char*)realloc(s, max += EZXML_BUFSIZE);
1004  len += sprintf(s + len, "\n<?%s%s%s?>", t, *n ? " " : "", n);
1005  }
1006  }
1007  return (char*)realloc(s, len + 1);
1008 }
1009 
1010 /* free the memory allocated for the ezxml structure */
1011 void ezxml_free(ezxml_t xml) {
1012  ezxml_root_t root = (ezxml_root_t) xml;
1013  int i, j;
1014  char **a, *s;
1015 
1016  if (!xml)
1017  return;
1018  ezxml_free(xml->child);
1019  ezxml_free(xml->ordered);
1020 
1021  if (!xml->parent) { /* free root tag allocations */
1022  for (i = 10; root->ent[i]; i += 2) /* 0 - 9 are default entites (<>&"') */
1023  if ((s = root->ent[i + 1]) < root->s || s > root->e)
1024  free(s);
1025  free(root->ent); /* free list of general entities */
1026 
1027  for (i = 0; root->attr[i]; i++) {
1028  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
1029  a = root->attr[i];
1030  for (j = 1; a[j++]; j += 2) /* free malloced attribute values */
1031  if (a[j] && (a[j] < root->s || a[j] > root->e))
1032  free(a[j]);
1033  free(a);
1034  }
1035  if (root->attr[0])
1036  free(root->attr); /* free default attribute list */
1037 
1038  for (i = 0; root->pi[i]; i++) {
1039  for (j = 1; root->pi[i][j]; j++)
1040  ;
1041  free(root->pi[i][j + 1]);
1042  free(root->pi[i]);
1043  }
1044  if (root->pi[0])
1045  free(root->pi); /* free processing instructions */
1046 
1047  if (root->len == (size_t)-1)
1048  free(root->m); /* malloced xml data */
1049 #ifndef EZXML_NOMMAP
1050  else if (root->len)
1051  munmap(root->m, root->len); /* mem mapped xml data */
1052 #endif /* EZXML_NOMMAP */
1053  if (root->u)
1054  free(root->u); /* utf8 conversion */
1055  }
1056 
1057  ezxml_free_attr(xml->attr); /* tag attributes */
1058  if ((xml->flags & EZXML_TXTM))
1059  free(xml->txt); /* character content */
1060  if ((xml->flags & EZXML_NAMEM))
1061  free(xml->name); /* tag name */
1062  free(xml);
1063 }
1064 
1065 /* return parser error message or empty string if none */
1066 const char *
1068  while (xml && xml->parent)
1069  xml = xml->parent; /* find root tag */
1070  return (xml) ? ((ezxml_root_t) xml)->err : "";
1071 }
1072 
1073 /* returns a new empty ezxml structure with the given root tag name */
1074 ezxml_t ezxml_new(char *name) {
1075  static char *ent[] = { "lt;", "&#60;", "gt;", "&#62;", "quot;", "&#34;",
1076  "apos;", "&#39;", "amp;", "&#38;", NULL };
1077  ezxml_root_t root = (ezxml_root_t) memset(malloc(sizeof(struct ezxml_root)),
1078  '\0', sizeof(struct ezxml_root));
1079 
1080  root->xml.name = name;
1081  root->cur = &root->xml;
1082  strcpy(root->err, root->xml.txt = "");
1083  root->ent = (char**)memcpy(malloc(sizeof(ent)), ent, sizeof(ent));
1084  root->attr = root->pi = (char ***) (root->xml.attr = EZXML_NIL);
1085  return &root->xml;
1086 }
1087 
1088 /* inserts an existing tag into an ezxml structure */
1090  ezxml_t cur, prev, head;
1091 
1092  xml->next = xml->sibling = xml->ordered = NULL;
1093  xml->off = off;
1094  xml->parent = dest;
1095 
1096  /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */
1097  head = dest->child;
1098  if (head) { /* already have sub tags */
1099  if (head->off <= off) { /* not first subtag */
1100  for (cur = head; cur->ordered && cur->ordered->off <= off;
1101  cur = cur->ordered)
1102  ;
1103  xml->ordered = cur->ordered;
1104  cur->ordered = xml;
1105  } else { /* first subtag */
1106  xml->ordered = head;
1107  dest->child = xml;
1108  }
1109 
1110  for (cur = head, prev = NULL; cur && strcmp(cur->name, xml->name);
1111  prev = cur, cur = cur->sibling)
1112  ; /* find tag type */
1113  if (cur && cur->off <= off) { /* not first of type */
1114  while (cur->next && cur->next->off <= off)
1115  cur = cur->next;
1116  xml->next = cur->next;
1117  cur->next = xml;
1118  } else { /* first tag of this type */
1119  if (prev && cur)
1120  prev->sibling = cur->sibling; /* remove old first */
1121  xml->next = cur; /* old first tag is now next */
1122  for (cur = head, prev = NULL; cur && cur->off <= off;
1123  prev = cur, cur = cur->sibling)
1124  ; /* new sibling insert point */
1125  xml->sibling = cur;
1126  if (prev)
1127  prev->sibling = xml;
1128  }
1129  } else
1130  dest->child = xml; /* only sub tag */
1131 
1132  return xml;
1133 }
1134 
1135 /* Adds a child tag. off is the offset of the child tag relative to the start */
1136 /* of the parent tag's character content. Returns the child tag. */
1137 ezxml_t ezxml_add_child(ezxml_t xml, char *name, size_t off) {
1138  ezxml_t child;
1139 
1140  if (!xml)
1141  return NULL;
1142  child = (ezxml_t) memset(malloc(sizeof(struct ezxml)), '\0',
1143  sizeof(struct ezxml));
1144  child->name = name;
1145  child->attr = EZXML_NIL;
1146  child->txt = "";
1147 
1148  return ezxml_insert(child, xml, off);
1149 }
1150 
1151 /* sets the character content for the given tag and returns the tag */
1153  if (!xml)
1154  return NULL;
1155  if (xml->flags & EZXML_TXTM)
1156  free(xml->txt); /* existing txt was malloced */
1157  xml->flags &= ~EZXML_TXTM;
1158  strcpy(xml->txt, txt);
1159  xml->txt = txt;
1160  return xml;
1161 }
1162 
1163 /* Sets the given tag attribute or adds a new attribute if not found. A value */
1164 /* of NULL will remove the specified attribute. Returns the tag given. */
1165 ezxml_t ezxml_set_attr(ezxml_t xml, char *name, char *value) {
1166  int l = 0, c;
1167 
1168  if (!xml)
1169  return NULL;
1170  while (xml->attr[l] && strcmp(xml->attr[l], name))
1171  l += 2;
1172  if (!xml->attr[l]) { /* not found, add as new attribute */
1173  if (!value)
1174  return xml; /* nothing to do */
1175  if (xml->attr == EZXML_NIL) { /* first attribute */
1176  xml->attr = (char**)malloc(4 * sizeof(char *));
1177  /* Ted Campbell, Aug 14, 2007. Changed to use 'my_strdup' */
1178  xml->attr[1] = my_strdup(""); /* empty list of malloced names/vals */
1179  } else
1180  xml->attr = (char**)realloc(xml->attr, (l + 4) * sizeof(char *));
1181 
1182  xml->attr[l] = name; /*set attribute name */
1183  xml->attr[l + 2] = NULL; /* null terminate attribute list */
1184  xml->attr[l + 3] = (char*)realloc(xml->attr[l + 1],
1185  (c = strlen(xml->attr[l + 1])) + 2);
1186  strcpy(xml->attr[l + 3] + c, " "); /* set name/value as not malloced */
1187  if (xml->flags & EZXML_DUP)
1188  xml->attr[l + 3][c] = (char) (unsigned char) EZXML_NAMEM;
1189  } else if (xml->flags & EZXML_DUP)
1190  free(name); /* name was strduped */
1191  for (c = l; xml->attr[c]; c += 2)
1192  ; /* find end of attribute list */
1193  if (xml->attr[c + 1][l / 2] & EZXML_TXTM)
1194  free(xml->attr[l + 1]); /*old val */
1195  if (xml->flags & EZXML_DUP)
1196  xml->attr[c + 1][l / 2] |= EZXML_TXTM;
1197  else
1198  xml->attr[c + 1][l / 2] &= ~EZXML_TXTM;
1199 
1200  if (value)
1201  xml->attr[l + 1] = value; /* set attribute value */
1202  else { /* remove attribute */
1203  if (xml->attr[c + 1][l / 2] & EZXML_NAMEM)
1204  free(xml->attr[l]);
1205  /* Ted Campbell, Aug 14, 2007. It seems that the size should be
1206  * (c + 2) - (l + 2) = (c - l) */
1207  memmove(xml->attr + l, xml->attr + l + 2, (c - l) * sizeof(char *));
1208  /* Ted Campbell, Aug 14, 2007. We need to adjust c to point to new
1209  * location it was moved to since its old location is undefined */
1210  c -= 2; /* We have one less elements */
1211  xml->attr = (char**)realloc(xml->attr, (c + 2) * sizeof(char *));
1212  memmove(xml->attr[c + 1] + (l / 2), xml->attr[c + 1] + (l / 2) + 1,
1213  (c / 2) - (l / 2)); /* fix list of which name/vals are malloced */
1214  }
1215  xml->flags &= ~EZXML_DUP; /* clear strdup() flag */
1216  return xml;
1217 }
1218 
1219 /* sets a flag for the given tag and returns the tag */
1220 ezxml_t ezxml_set_flag(ezxml_t xml, short flag) {
1221  if (xml)
1222  xml->flags |= flag;
1223  return xml;
1224 }
1225 
1226 /* removes a tag along with its subtags without freeing its memory */
1228  ezxml_t cur;
1229 
1230  if (!xml)
1231  return NULL; /* nothing to do */
1232  if (xml->next)
1233  xml->next->sibling = xml->sibling; /* patch sibling list */
1234 
1235  if (xml->parent) { /* not root tag */
1236  cur = xml->parent->child; /* find head of subtag list */
1237  if (cur == xml)
1238  xml->parent->child = xml->ordered; /* first subtag */
1239  else { /* not first subtag */
1240  while (cur->ordered != xml)
1241  cur = cur->ordered;
1242  cur->ordered = cur->ordered->ordered; /* patch ordered list */
1243 
1244  cur = xml->parent->child; /* go back to head of subtag list */
1245  if (strcmp(cur->name, xml->name)) { /* not in first sibling list */
1246  while (strcmp(cur->sibling->name, xml->name))
1247  cur = cur->sibling;
1248  if (cur->sibling == xml) { /* first of a sibling list */
1249  cur->sibling =
1250  (xml->next) ? xml->next : cur->sibling->sibling;
1251  } else
1252  cur = cur->sibling; /* not first of a sibling list */
1253  }
1254 
1255  while (cur->next && cur->next != xml)
1256  cur = cur->next;
1257  if (cur->next)
1258  cur->next = cur->next->next; /* patch next list */
1259  }
1260  }
1261  xml->ordered = xml->sibling = xml->next = NULL;
1262  return xml;
1263 }
1264 
1265 #ifdef EZXML_TEST /* test harness */
1266 int
1267 main(int argc,
1268  char **argv)
1269 {
1270  ezxml_t xml;
1271  char *s;
1272  int i;
1273 
1274  if (argc != 2)
1275  return vpr_printf(TIO_MESSAGE_ERROR, "usage: %s xmlfile\n", argv[0]);
1276 
1277  xml = ezxml_parse_file(argv[1]);
1278  vpr_printf("%s\n", (s = ezxml_toxml(xml)));
1279  free(s);
1280  i = vpr_printf(TIO_MESSAGE_ERROR, "%s", ezxml_error(xml));
1281  ezxml_free(xml);
1282  return (i) ? 1 : 0;
1283 }
1284 #endif /* EZXML_TEST */
1285 
const char * ezxml_error(ezxml_t xml)
Definition: ezxml.c:1067
static ezxml_t ezxml_vget(ezxml_t xml, va_list ap)
Definition: ezxml.c:125
ezxml_t ezxml_cut(ezxml_t xml)
Definition: ezxml.c:1227
char err[EZXML_ERRL]
Definition: ezxml.h:73
Definition: ezxml.h:44
ezxml_t ezxml_idx(ezxml_t xml, int idx)
Definition: ezxml.c:94
ezxml_t ezxml_set_flag(ezxml_t xml, short flag)
Definition: ezxml.c:1220
size_t len
Definition: ezxml.h:65
int main(int argc, char **argv)
Definition: vpr/SRC/main.c:33
char * u
Definition: ezxml.h:66
struct ezxml_root * ezxml_root_t
Definition: ezxml.h:60
#define EZXML_TXTM
Definition: ezxml.h:39
char * txt
Definition: ezxml.h:47
static void ezxml_char_content(ezxml_root_t root, char *s, size_t len, char t)
Definition: ezxml.c:290
static void ezxml_open_tag(ezxml_root_t root, int line, char *name, char **attr)
Definition: ezxml.c:275
static ezxml_t ezxml_close_tag(ezxml_root_t root, char *name, char *s)
Definition: ezxml.c:318
#define EZXML_BUFSIZE
Definition: ezxml.h:37
static char * ezxml_ampencode(const char *s, size_t len, char **dst, size_t *dlen, size_t *max, short a)
Definition: ezxml.c:858
#define EZXML_ERRL
Definition: ezxml.h:41
char * EZXML_NIL[]
Definition: ezxml.c:65
ezxml_t ezxml_new(char *name)
Definition: ezxml.c:1074
static void ezxml_free_attr(char **attr)
Definition: ezxml.c:575
ezxml_t ezxml_insert(ezxml_t xml, ezxml_t dest, size_t off)
Definition: ezxml.c:1089
#define EZXML_NAMEM
Definition: ezxml.h:38
void ezxml_free(ezxml_t xml)
Definition: ezxml.c:1011
ezxml_t ezxml_parse_fp(FILE *fp)
Definition: ezxml.c:790
char *** attr
Definition: ezxml.h:70
ezxml_t next
Definition: ezxml.h:49
char ** ezxml_pi(ezxml_t xml, const char *target)
Definition: ezxml.c:156
static char * ezxml_decode(char *s, char **ent, char t)
Definition: ezxml.c:195
ezxml_t ezxml_set_attr(ezxml_t xml, char *name, char *value)
Definition: ezxml.c:1165
ezxml_t ezxml_child(ezxml_t xml, const char *name)
Definition: ezxml.c:85
ezxml_t sibling
Definition: ezxml.h:50
ezxml_t cur
Definition: ezxml.h:63
#define max(a, b)
Definition: graphics.c:171
const char * ezxml_attr(ezxml_t xml, const char *attr)
Definition: ezxml.c:102
char * e
Definition: ezxml.h:68
static char * ezxml_str2utf8(char **s, size_t *len)
Definition: ezxml.c:537
int line
Definition: ezxml.h:56
char ** ent
Definition: ezxml.h:69
ezxml_t ordered
Definition: ezxml.h:51
#define EZXML_WS
Definition: ezxml.c:64
ezxml_t ezxml_set_txt(ezxml_t xml, char *txt)
Definition: ezxml.c:1152
char * m
Definition: ezxml.h:64
ezxml_t parent
Definition: ezxml.h:53
struct ezxml xml
Definition: ezxml.h:62
char * name
Definition: ezxml.h:45
char * s
Definition: ezxml.h:67
static short ezxml_internal_dtd(ezxml_root_t root, char *s, size_t len)
Definition: ezxml.c:389
static void ezxml_proc_inst(ezxml_root_t root, char *s, size_t len)
Definition: ezxml.c:346
#define EZXML_DUP
Definition: ezxml.h:40
ezxml_t ezxml_add_child(ezxml_t xml, char *name, size_t off)
Definition: ezxml.c:1137
static ezxml_t ezxml_err(ezxml_root_t root, char *s, const char *err,...)
Definition: ezxml.c:170
ezxml_t ezxml_parse_file(const char *file)
Definition: ezxml.c:846
char ** attr
Definition: ezxml.h:46
size_t off
Definition: ezxml.h:48
short standalone
Definition: ezxml.h:72
ezxml_t ezxml_parse_fd(int fd)
Definition: ezxml.c:816
static char * ezxml_toxml_r(ezxml_t xml, char **s, size_t *len, size_t *max, size_t start, char ***attr)
Definition: ezxml.c:901
char * ezxml_toxml(ezxml_t xml)
Definition: ezxml.c:963
ezxml_t ezxml_get(ezxml_t xml,...)
Definition: ezxml.c:143
struct ezxml * ezxml_t
Definition: ezxml.h:43
short flags
Definition: ezxml.h:54
char *** pi
Definition: ezxml.h:71
static int ezxml_ent_ok(char *name, char *s, char **ent)
Definition: ezxml.c:328
ezxml_t ezxml_parse_str(char *s, size_t len)
Definition: ezxml.c:596
char * my_strdup(const char *str)
Definition: util.c:101
messagelogger vpr_printf
Definition: util.c:17
ezxml_t child
Definition: ezxml.h:52