Spinning Topp Logo BlackTopp Studios
inc
pugixml.cpp
Go to the documentation of this file.
1 /**
2  * pugixml parser - version 1.2
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at http://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13 
14 /// @file
15 /// @brief To allow this test harness to be used without the mezzanine it uses pugixml for xml parsing and this should probably not be used by the tests
16 
17 /// @cond false
18 
19 #ifndef SOURCE_PUGIXML_CPP
20 #define SOURCE_PUGIXML_CPP
21 
22 #include "pugixml.h"
23 
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <assert.h>
28 #include <wchar.h>
29 
30 #ifndef PUGIXML_NO_XPATH
31 # include <math.h>
32 # include <float.h>
33 # ifdef PUGIXML_NO_EXCEPTIONS
34 # include <setjmp.h>
35 # endif
36 #endif
37 
38 #ifndef PUGIXML_NO_STL
39 # include <istream>
40 # include <ostream>
41 # include <string>
42 #endif
43 
44 // For placement new
45 #include <new>
46 
47 #ifdef _MSC_VER
48 # pragma warning(push)
49 # pragma warning(disable: 4127) // conditional expression is constant
50 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
51 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
52 # pragma warning(disable: 4702) // unreachable code
53 # pragma warning(disable: 4996) // this function or variable may be unsafe
54 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
55 #endif
56 
57 #ifdef __INTEL_COMPILER
58 # pragma warning(disable: 177) // function was declared but never referenced
59 # pragma warning(disable: 279) // controlling expression is constant
60 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
61 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
62 #endif
63 
64 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
65 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
66 #endif
67 
68 #ifdef __BORLANDC__
69 # pragma option push
70 # pragma warn -8008 // condition is always false
71 # pragma warn -8066 // unreachable code
72 #endif
73 
74 #ifdef __SNC__
75 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
76 # pragma diag_suppress=178 // function was declared but never referenced
77 # pragma diag_suppress=237 // controlling expression is constant
78 #endif
79 
80 // Inlining controls
81 #if defined(_MSC_VER) && _MSC_VER >= 1300
82 # define PUGI__NO_INLINE __declspec(noinline)
83 #elif defined(__GNUC__)
84 # define PUGI__NO_INLINE __attribute__((noinline))
85 #else
86 # define PUGI__NO_INLINE
87 #endif
88 
89 // Simple static assertion
90 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
91 
92 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
93 #ifdef __DMC__
94 # define PUGI__DMC_VOLATILE volatile
95 #else
96 # define PUGI__DMC_VOLATILE
97 #endif
98 
99 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
100 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
101 using std::memcpy;
102 using std::memmove;
103 #endif
104 
105 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
106 #if defined(_MSC_VER) && !defined(__S3E__)
107 # define PUGI__MSVC_CRT_VERSION _MSC_VER
108 #endif
109 
110 #ifdef PUGIXML_HEADER_ONLY
111 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
112 # define PUGI__NS_END } }
113 # define PUGI__FN inline
114 # define PUGI__FN_NO_INLINE inline
115 #else
116 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
117 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
118 # define PUGI__NS_END } }
119 # else
120 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
121 # define PUGI__NS_END } } }
122 # endif
123 # define PUGI__FN
124 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
125 #endif
126 
127 // uintptr_t
128 #if !defined(_MSC_VER) || _MSC_VER >= 1600
129 # include <stdint.h>
130 #else
131 # ifndef _UINTPTR_T_DEFINED
132 // No native uintptr_t in MSVC6 and in some WinCE versions
133 typedef size_t uintptr_t;
134 #define _UINTPTR_T_DEFINED
135 # endif
136 PUGI__NS_BEGIN
137  typedef unsigned __int8 uint8_t;
138  typedef unsigned __int16 uint16_t;
139  typedef unsigned __int32 uint32_t;
140 PUGI__NS_END
141 #endif
142 
143 // Memory allocation
144 PUGI__NS_BEGIN
145  PUGI__FN void* default_allocate(size_t size)
146  {
147  return malloc(size);
148  }
149 
150  PUGI__FN void default_deallocate(void* ptr)
151  {
152  free(ptr);
153  }
154 
155  template <typename T>
156  struct xml_memory_management_function_storage
157  {
158  static allocation_function allocate;
159  static deallocation_function deallocate;
160  };
161 
162  template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
163  template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
164 
165  typedef xml_memory_management_function_storage<int> xml_memory;
166 PUGI__NS_END
167 
168 // String utilities
169 PUGI__NS_BEGIN
170  // Get string length
171  PUGI__FN size_t strlength(const char_t* s)
172  {
173  assert(s);
174 
175  #ifdef PUGIXML_WCHAR_MODE
176  return wcslen(s);
177  #else
178  return strlen(s);
179  #endif
180  }
181 
182  // Compare two strings
183  PUGI__FN bool strequal(const char_t* src, const char_t* dst)
184  {
185  assert(src && dst);
186 
187  #ifdef PUGIXML_WCHAR_MODE
188  return wcscmp(src, dst) == 0;
189  #else
190  return strcmp(src, dst) == 0;
191  #endif
192  }
193 
194  // Compare lhs with [rhs_begin, rhs_end)
195  PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
196  {
197  for (size_t i = 0; i < count; ++i)
198  if (lhs[i] != rhs[i])
199  return false;
200 
201  return lhs[count] == 0;
202  }
203 
204 #ifdef PUGIXML_WCHAR_MODE
205  // Convert string to wide string, assuming all symbols are ASCII
206  PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
207  {
208  for (const char* i = source; *i; ++i) *dest++ = *i;
209  *dest = 0;
210  }
211 #endif
212 PUGI__NS_END
213 
214 #if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
215 // auto_ptr-like buffer holder for exception recovery
216 PUGI__NS_BEGIN
217  struct buffer_holder
218  {
219  void* data;
220  void (*deleter)(void*);
221 
222  buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
223  {
224  }
225 
226  ~buffer_holder()
227  {
228  if (data) deleter(data);
229  }
230 
231  void* release()
232  {
233  void* result = data;
234  data = 0;
235  return result;
236  }
237  };
238 PUGI__NS_END
239 #endif
240 
241 PUGI__NS_BEGIN
242  static const size_t xml_memory_page_size =
243  #ifdef PUGIXML_MEMORY_PAGE_SIZE
244  PUGIXML_MEMORY_PAGE_SIZE
245  #else
246  32768
247  #endif
248  ;
249 
250  static const uintptr_t xml_memory_page_alignment = 32;
251  static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
252  static const uintptr_t xml_memory_page_name_allocated_mask = 16;
253  static const uintptr_t xml_memory_page_value_allocated_mask = 8;
254  static const uintptr_t xml_memory_page_type_mask = 7;
255 
256  struct xml_allocator;
257 
258  struct xml_memory_page
259  {
260  static xml_memory_page* construct(void* memory)
261  {
262  if (!memory) return 0; //$ redundant, left for performance
263 
264  xml_memory_page* result = static_cast<xml_memory_page*>(memory);
265 
266  result->allocator = 0;
267  result->memory = 0;
268  result->prev = 0;
269  result->next = 0;
270  result->busy_size = 0;
271  result->freed_size = 0;
272 
273  return result;
274  }
275 
276  xml_allocator* allocator;
277 
278  void* memory;
279 
280  xml_memory_page* prev;
281  xml_memory_page* next;
282 
283  size_t busy_size;
284  size_t freed_size;
285 
286  char data[1];
287  };
288 
289  struct xml_memory_string_header
290  {
291  uint16_t page_offset; // offset from page->data
292  uint16_t full_size; // 0 if string occupies whole page
293  };
294 
295  struct xml_allocator
296  {
297  xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
298  {
299  }
300 
301  xml_memory_page* allocate_page(size_t data_size)
302  {
303  size_t size = offsetof(xml_memory_page, data) + data_size;
304 
305  // allocate block with some alignment, leaving memory for worst-case padding
306  void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
307  if (!memory) return 0;
308 
309  // align upwards to page boundary
310  void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
311 
312  // prepare page structure
313  xml_memory_page* page = xml_memory_page::construct(page_memory);
314 
315  page->memory = memory;
316  page->allocator = _root->allocator;
317 
318  return page;
319  }
320 
321  static void deallocate_page(xml_memory_page* page)
322  {
323  xml_memory::deallocate(page->memory);
324  }
325 
326  void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
327 
328  void* allocate_memory(size_t size, xml_memory_page*& out_page)
329  {
330  if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
331 
332  void* buf = _root->data + _busy_size;
333 
334  _busy_size += size;
335 
336  out_page = _root;
337 
338  return buf;
339  }
340 
341  void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
342  {
343  if (page == _root) page->busy_size = _busy_size;
344 
345  assert(ptr >= page->data && ptr < page->data + page->busy_size);
346  (void)!ptr;
347 
348  page->freed_size += size;
349  assert(page->freed_size <= page->busy_size);
350 
351  if (page->freed_size == page->busy_size)
352  {
353  if (page->next == 0)
354  {
355  assert(_root == page);
356 
357  // top page freed, just reset sizes
358  page->busy_size = page->freed_size = 0;
359  _busy_size = 0;
360  }
361  else
362  {
363  assert(_root != page);
364  assert(page->prev);
365 
366  // remove from the list
367  page->prev->next = page->next;
368  page->next->prev = page->prev;
369 
370  // deallocate
371  deallocate_page(page);
372  }
373  }
374  }
375 
376  char_t* allocate_string(size_t length)
377  {
378  // allocate memory for string and header block
379  size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
380 
381  // round size up to pointer alignment boundary
382  size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
383 
384  xml_memory_page* page;
385  xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
386 
387  if (!header) return 0;
388 
389  // setup header
390  ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
391 
392  assert(page_offset >= 0 && page_offset < (1 << 16));
393  header->page_offset = static_cast<uint16_t>(page_offset);
394 
395  // full_size == 0 for large strings that occupy the whole page
396  assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
397  header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
398 
399  // round-trip through void* to avoid 'cast increases required alignment of target type' warning
400  // header is guaranteed a pointer-sized alignment, which should be enough for char_t
401  return static_cast<char_t*>(static_cast<void*>(header + 1));
402  }
403 
404  void deallocate_string(char_t* string)
405  {
406  // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
407  // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
408 
409  // get header
410  xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
411 
412  // deallocate
413  size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
414  xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
415 
416  // if full_size == 0 then this string occupies the whole page
417  size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
418 
419  deallocate_memory(header, full_size, page);
420  }
421 
422  xml_memory_page* _root;
423  size_t _busy_size;
424  };
425 
426  PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
427  {
428  const size_t large_allocation_threshold = xml_memory_page_size / 4;
429 
430  xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
431  out_page = page;
432 
433  if (!page) return 0;
434 
435  if (size <= large_allocation_threshold)
436  {
437  _root->busy_size = _busy_size;
438 
439  // insert page at the end of linked list
440  page->prev = _root;
441  _root->next = page;
442  _root = page;
443 
444  _busy_size = size;
445  }
446  else
447  {
448  // insert page before the end of linked list, so that it is deleted as soon as possible
449  // the last page is not deleted even if it's empty (see deallocate_memory)
450  assert(_root->prev);
451 
452  page->prev = _root->prev;
453  page->next = _root;
454 
455  _root->prev->next = page;
456  _root->prev = page;
457  }
458 
459  // allocate inside page
460  page->busy_size = size;
461 
462  return page->data;
463  }
464 PUGI__NS_END
465 
466 namespace pugi
467 {
468  /// A 'name=value' XML attribute structure.
469  struct xml_attribute_struct
470  {
471  /// Default ctor
472  xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
473  {
474  }
475 
476  uintptr_t header;
477 
478  char_t* name; ///< Pointer to attribute name.
479  char_t* value; ///< Pointer to attribute value.
480 
481  xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
482  xml_attribute_struct* next_attribute; ///< Next attribute
483  };
484 
485  /// An XML document tree node.
486  struct xml_node_struct
487  {
488  /// Default ctor
489  /// \param type - node type
490  xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
491  {
492  }
493 
494  uintptr_t header;
495 
496  xml_node_struct* parent; ///< Pointer to parent
497 
498  char_t* name; ///< Pointer to element name.
499  char_t* value; ///< Pointer to any associated string data.
500 
501  xml_node_struct* first_child; ///< First child
502 
503  xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
504  xml_node_struct* next_sibling; ///< Right brother
505 
506  xml_attribute_struct* first_attribute; ///< First attribute
507  };
508 }
509 
510 PUGI__NS_BEGIN
511  struct xml_document_struct: public xml_node_struct, public xml_allocator
512  {
513  xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
514  {
515  }
516 
517  const char_t* buffer;
518  };
519 
520  inline xml_allocator& get_allocator(const xml_node_struct* node)
521  {
522  assert(node);
523 
524  return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
525  }
526 PUGI__NS_END
527 
528 // Low-level DOM operations
529 PUGI__NS_BEGIN
530  inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
531  {
532  xml_memory_page* page;
533  void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
534 
535  return new (memory) xml_attribute_struct(page);
536  }
537 
538  inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
539  {
540  xml_memory_page* page;
541  void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
542 
543  return new (memory) xml_node_struct(page, type);
544  }
545 
546  inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
547  {
548  uintptr_t header = a->header;
549 
550  if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
551  if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
552 
553  alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
554  }
555 
556  inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
557  {
558  uintptr_t header = n->header;
559 
560  if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
561  if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
562 
563  for (xml_attribute_struct* attr = n->first_attribute; attr; )
564  {
565  xml_attribute_struct* next = attr->next_attribute;
566 
567  destroy_attribute(attr, alloc);
568 
569  attr = next;
570  }
571 
572  for (xml_node_struct* child = n->first_child; child; )
573  {
574  xml_node_struct* next = child->next_sibling;
575 
576  destroy_node(child, alloc);
577 
578  child = next;
579  }
580 
581  alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
582  }
583 
584  PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
585  {
586  xml_node_struct* child = allocate_node(alloc, type);
587  if (!child) return 0;
588 
589  child->parent = node;
590 
591  xml_node_struct* first_child = node->first_child;
592 
593  if (first_child)
594  {
595  xml_node_struct* last_child = first_child->prev_sibling_c;
596 
597  last_child->next_sibling = child;
598  child->prev_sibling_c = last_child;
599  first_child->prev_sibling_c = child;
600  }
601  else
602  {
603  node->first_child = child;
604  child->prev_sibling_c = child;
605  }
606 
607  return child;
608  }
609 
610  PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
611  {
612  xml_attribute_struct* a = allocate_attribute(alloc);
613  if (!a) return 0;
614 
615  xml_attribute_struct* first_attribute = node->first_attribute;
616 
617  if (first_attribute)
618  {
619  xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
620 
621  last_attribute->next_attribute = a;
622  a->prev_attribute_c = last_attribute;
623  first_attribute->prev_attribute_c = a;
624  }
625  else
626  {
627  node->first_attribute = a;
628  a->prev_attribute_c = a;
629  }
630 
631  return a;
632  }
633 PUGI__NS_END
634 
635 // Helper classes for code generation
636 PUGI__NS_BEGIN
637  struct opt_false
638  {
639  enum { value = 0 };
640  };
641 
642  struct opt_true
643  {
644  enum { value = 1 };
645  };
646 PUGI__NS_END
647 
648 // Unicode utilities
649 PUGI__NS_BEGIN
650  inline uint16_t endian_swap(uint16_t value)
651  {
652  return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
653  }
654 
655  inline uint32_t endian_swap(uint32_t value)
656  {
657  return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
658  }
659 
660  struct utf8_counter
661  {
662  typedef size_t value_type;
663 
664  static value_type low(value_type result, uint32_t ch)
665  {
666  // U+0000..U+007F
667  if (ch < 0x80) return result + 1;
668  // U+0080..U+07FF
669  else if (ch < 0x800) return result + 2;
670  // U+0800..U+FFFF
671  else return result + 3;
672  }
673 
674  static value_type high(value_type result, uint32_t)
675  {
676  // U+10000..U+10FFFF
677  return result + 4;
678  }
679  };
680 
681  struct utf8_writer
682  {
683  typedef uint8_t* value_type;
684 
685  static value_type low(value_type result, uint32_t ch)
686  {
687  // U+0000..U+007F
688  if (ch < 0x80)
689  {
690  *result = static_cast<uint8_t>(ch);
691  return result + 1;
692  }
693  // U+0080..U+07FF
694  else if (ch < 0x800)
695  {
696  result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
697  result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
698  return result + 2;
699  }
700  // U+0800..U+FFFF
701  else
702  {
703  result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
704  result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
705  result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
706  return result + 3;
707  }
708  }
709 
710  static value_type high(value_type result, uint32_t ch)
711  {
712  // U+10000..U+10FFFF
713  result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
714  result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
715  result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
716  result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
717  return result + 4;
718  }
719 
720  static value_type any(value_type result, uint32_t ch)
721  {
722  return (ch < 0x10000) ? low(result, ch) : high(result, ch);
723  }
724  };
725 
726  struct utf16_counter
727  {
728  typedef size_t value_type;
729 
730  static value_type low(value_type result, uint32_t)
731  {
732  return result + 1;
733  }
734 
735  static value_type high(value_type result, uint32_t)
736  {
737  return result + 2;
738  }
739  };
740 
741  struct utf16_writer
742  {
743  typedef uint16_t* value_type;
744 
745  static value_type low(value_type result, uint32_t ch)
746  {
747  *result = static_cast<uint16_t>(ch);
748 
749  return result + 1;
750  }
751 
752  static value_type high(value_type result, uint32_t ch)
753  {
754  uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
755  uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
756 
757  result[0] = static_cast<uint16_t>(0xD800 + msh);
758  result[1] = static_cast<uint16_t>(0xDC00 + lsh);
759 
760  return result + 2;
761  }
762 
763  static value_type any(value_type result, uint32_t ch)
764  {
765  return (ch < 0x10000) ? low(result, ch) : high(result, ch);
766  }
767  };
768 
769  struct utf32_counter
770  {
771  typedef size_t value_type;
772 
773  static value_type low(value_type result, uint32_t)
774  {
775  return result + 1;
776  }
777 
778  static value_type high(value_type result, uint32_t)
779  {
780  return result + 1;
781  }
782  };
783 
784  struct utf32_writer
785  {
786  typedef uint32_t* value_type;
787 
788  static value_type low(value_type result, uint32_t ch)
789  {
790  *result = ch;
791 
792  return result + 1;
793  }
794 
795  static value_type high(value_type result, uint32_t ch)
796  {
797  *result = ch;
798 
799  return result + 1;
800  }
801 
802  static value_type any(value_type result, uint32_t ch)
803  {
804  *result = ch;
805 
806  return result + 1;
807  }
808  };
809 
810  struct latin1_writer
811  {
812  typedef uint8_t* value_type;
813 
814  static value_type low(value_type result, uint32_t ch)
815  {
816  *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
817 
818  return result + 1;
819  }
820 
821  static value_type high(value_type result, uint32_t ch)
822  {
823  (void)ch;
824 
825  *result = '?';
826 
827  return result + 1;
828  }
829  };
830 
831  template <size_t size> struct wchar_selector;
832 
833  template <> struct wchar_selector<2>
834  {
835  typedef uint16_t type;
836  typedef utf16_counter counter;
837  typedef utf16_writer writer;
838  };
839 
840  template <> struct wchar_selector<4>
841  {
842  typedef uint32_t type;
843  typedef utf32_counter counter;
844  typedef utf32_writer writer;
845  };
846 
847  typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
848  typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
849 
850  template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
851  {
852  static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
853  {
854  const uint8_t utf8_byte_mask = 0x3f;
855 
856  while (size)
857  {
858  uint8_t lead = *data;
859 
860  // 0xxxxxxx -> U+0000..U+007F
861  if (lead < 0x80)
862  {
863  result = Traits::low(result, lead);
864  data += 1;
865  size -= 1;
866 
867  // process aligned single-byte (ascii) blocks
868  if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
869  {
870  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
871  while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
872  {
873  result = Traits::low(result, data[0]);
874  result = Traits::low(result, data[1]);
875  result = Traits::low(result, data[2]);
876  result = Traits::low(result, data[3]);
877  data += 4;
878  size -= 4;
879  }
880  }
881  }
882  // 110xxxxx -> U+0080..U+07FF
883  else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
884  {
885  result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
886  data += 2;
887  size -= 2;
888  }
889  // 1110xxxx -> U+0800-U+FFFF
890  else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
891  {
892  result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
893  data += 3;
894  size -= 3;
895  }
896  // 11110xxx -> U+10000..U+10FFFF
897  else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
898  {
899  result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
900  data += 4;
901  size -= 4;
902  }
903  // 10xxxxxx or 11111xxx -> invalid
904  else
905  {
906  data += 1;
907  size -= 1;
908  }
909  }
910 
911  return result;
912  }
913 
914  static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
915  {
916  const uint16_t* end = data + size;
917 
918  while (data < end)
919  {
920  uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
921 
922  // U+0000..U+D7FF
923  if (lead < 0xD800)
924  {
925  result = Traits::low(result, lead);
926  data += 1;
927  }
928  // U+E000..U+FFFF
929  else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
930  {
931  result = Traits::low(result, lead);
932  data += 1;
933  }
934  // surrogate pair lead
935  else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
936  {
937  uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
938 
939  if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
940  {
941  result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
942  data += 2;
943  }
944  else
945  {
946  data += 1;
947  }
948  }
949  else
950  {
951  data += 1;
952  }
953  }
954 
955  return result;
956  }
957 
958  static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
959  {
960  const uint32_t* end = data + size;
961 
962  while (data < end)
963  {
964  uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
965 
966  // U+0000..U+FFFF
967  if (lead < 0x10000)
968  {
969  result = Traits::low(result, lead);
970  data += 1;
971  }
972  // U+10000..U+10FFFF
973  else
974  {
975  result = Traits::high(result, lead);
976  data += 1;
977  }
978  }
979 
980  return result;
981  }
982 
983  static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
984  {
985  for (size_t i = 0; i < size; ++i)
986  {
987  result = Traits::low(result, data[i]);
988  }
989 
990  return result;
991  }
992 
993  static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
994  {
995  return decode_utf16_block(data, size, result);
996  }
997 
998  static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
999  {
1000  return decode_utf32_block(data, size, result);
1001  }
1002 
1003  static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
1004  {
1005  return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
1006  }
1007  };
1008 
1009  template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
1010  {
1011  for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
1012  }
1013 
1014 #ifdef PUGIXML_WCHAR_MODE
1015  PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1016  {
1017  for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1018  }
1019 #endif
1020 PUGI__NS_END
1021 
1022 PUGI__NS_BEGIN
1023  enum chartype_t
1024  {
1025  ct_parse_pcdata = 1, // \0, &, \r, <
1026  ct_parse_attr = 2, // \0, &, \r, ', "
1027  ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1028  ct_space = 8, // \r, \n, space, tab
1029  ct_parse_cdata = 16, // \0, ], >, \r
1030  ct_parse_comment = 32, // \0, -, >, \r
1031  ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1032  ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1033  };
1034 
1035  static const unsigned char chartype_table[256] =
1036  {
1037  55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1038  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1039  8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1040  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1041  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1042  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1043  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1044  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1045 
1046  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1047  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1048  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1049  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1050  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1051  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1052  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1053  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1054  };
1055 
1056  enum chartypex_t
1057  {
1058  ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1059  ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1060  ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1061  ctx_digit = 8, // 0-9
1062  ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1063  };
1064 
1065  static const unsigned char chartypex_table[256] =
1066  {
1067  3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1068  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1069  0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1070  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1071 
1072  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1073  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1074  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1075  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1076 
1077  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1078  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1079  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1080  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1081  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1082  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1083  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1084  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1085  };
1086 
1087 #ifdef PUGIXML_WCHAR_MODE
1088  #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1089 #else
1090  #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1091 #endif
1092 
1093  #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1094  #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1095 
1096  PUGI__FN bool is_little_endian()
1097  {
1098  unsigned int ui = 1;
1099 
1100  return *reinterpret_cast<unsigned char*>(&ui) == 1;
1101  }
1102 
1103  PUGI__FN xml_encoding get_wchar_encoding()
1104  {
1105  PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1106 
1107  if (sizeof(wchar_t) == 2)
1108  return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1109  else
1110  return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1111  }
1112 
1113  PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
1114  {
1115  // look for BOM in first few bytes
1116  if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1117  if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1118  if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1119  if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1120  if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1121 
1122  // look for <, <? or <?xm in various encodings
1123  if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1124  if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1125  if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1126  if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1127  if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
1128 
1129  // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1130  if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1131  if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1132 
1133  // no known BOM detected, assume utf8
1134  return encoding_utf8;
1135  }
1136 
1137  PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
1138  {
1139  // replace wchar encoding with utf implementation
1140  if (encoding == encoding_wchar) return get_wchar_encoding();
1141 
1142  // replace utf16 encoding with utf16 with specific endianness
1143  if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1144 
1145  // replace utf32 encoding with utf32 with specific endianness
1146  if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1147 
1148  // only do autodetection if no explicit encoding is requested
1149  if (encoding != encoding_auto) return encoding;
1150 
1151  // skip encoding autodetection if input buffer is too small
1152  if (size < 4) return encoding_utf8;
1153 
1154  // try to guess encoding (based on XML specification, Appendix F.1)
1155  const uint8_t* data = static_cast<const uint8_t*>(contents);
1156 
1157  PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1158 
1159  return guess_buffer_encoding(d0, d1, d2, d3);
1160  }
1161 
1162  PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1163  {
1164  if (is_mutable)
1165  {
1166  out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
1167  }
1168  else
1169  {
1170  void* buffer = xml_memory::allocate(size > 0 ? size : 1);
1171  if (!buffer) return false;
1172 
1173  memcpy(buffer, contents, size);
1174 
1175  out_buffer = static_cast<char_t*>(buffer);
1176  }
1177 
1178  out_length = size / sizeof(char_t);
1179 
1180  return true;
1181  }
1182 
1183 #ifdef PUGIXML_WCHAR_MODE
1184  PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
1185  {
1186  return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
1187  (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
1188  }
1189 
1190  PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1191  {
1192  const char_t* data = static_cast<const char_t*>(contents);
1193 
1194  if (is_mutable)
1195  {
1196  out_buffer = const_cast<char_t*>(data);
1197  }
1198  else
1199  {
1200  out_buffer = static_cast<char_t*>(xml_memory::allocate(size > 0 ? size : 1));
1201  if (!out_buffer) return false;
1202  }
1203 
1204  out_length = size / sizeof(char_t);
1205 
1206  convert_wchar_endian_swap(out_buffer, data, out_length);
1207 
1208  return true;
1209  }
1210 
1211  PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
1212  {
1213  const uint8_t* data = static_cast<const uint8_t*>(contents);
1214 
1215  // first pass: get length in wchar_t units
1216  out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1217 
1218  // allocate buffer of suitable length
1219  out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1220  if (!out_buffer) return false;
1221 
1222  // second pass: convert utf8 input to wchar_t
1223  wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1224  wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
1225 
1226  assert(out_end == out_begin + out_length);
1227  (void)!out_end;
1228 
1229  return true;
1230  }
1231 
1232  template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1233  {
1234  const uint16_t* data = static_cast<const uint16_t*>(contents);
1235  size_t length = size / sizeof(uint16_t);
1236 
1237  // first pass: get length in wchar_t units
1238  out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
1239 
1240  // allocate buffer of suitable length
1241  out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1242  if (!out_buffer) return false;
1243 
1244  // second pass: convert utf16 input to wchar_t
1245  wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1246  wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
1247 
1248  assert(out_end == out_begin + out_length);
1249  (void)!out_end;
1250 
1251  return true;
1252  }
1253 
1254  template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1255  {
1256  const uint32_t* data = static_cast<const uint32_t*>(contents);
1257  size_t length = size / sizeof(uint32_t);
1258 
1259  // first pass: get length in wchar_t units
1260  out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
1261 
1262  // allocate buffer of suitable length
1263  out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1264  if (!out_buffer) return false;
1265 
1266  // second pass: convert utf32 input to wchar_t
1267  wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1268  wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
1269 
1270  assert(out_end == out_begin + out_length);
1271  (void)!out_end;
1272 
1273  return true;
1274  }
1275 
1276  PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
1277  {
1278  const uint8_t* data = static_cast<const uint8_t*>(contents);
1279 
1280  // get length in wchar_t units
1281  out_length = size;
1282 
1283  // allocate buffer of suitable length
1284  out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1285  if (!out_buffer) return false;
1286 
1287  // convert latin1 input to wchar_t
1288  wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
1289  wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_latin1_block(data, size, out_begin);
1290 
1291  assert(out_end == out_begin + out_length);
1292  (void)!out_end;
1293 
1294  return true;
1295  }
1296 
1297  PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1298  {
1299  // get native encoding
1300  xml_encoding wchar_encoding = get_wchar_encoding();
1301 
1302  // fast path: no conversion required
1303  if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1304 
1305  // only endian-swapping is required
1306  if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
1307 
1308  // source encoding is utf8
1309  if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
1310 
1311  // source encoding is utf16
1312  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1313  {
1314  xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1315 
1316  return (native_encoding == encoding) ?
1317  convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1318  convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1319  }
1320 
1321  // source encoding is utf32
1322  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1323  {
1324  xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1325 
1326  return (native_encoding == encoding) ?
1327  convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1328  convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1329  }
1330 
1331  // source encoding is latin1
1332  if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
1333 
1334  assert(!"Invalid encoding");
1335  return false;
1336  }
1337 #else
1338  template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1339  {
1340  const uint16_t* data = static_cast<const uint16_t*>(contents);
1341  size_t length = size / sizeof(uint16_t);
1342 
1343  // first pass: get length in utf8 units
1344  out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
1345 
1346  // allocate buffer of suitable length
1347  out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1348  if (!out_buffer) return false;
1349 
1350  // second pass: convert utf16 input to utf8
1351  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1352  uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
1353 
1354  assert(out_end == out_begin + out_length);
1355  (void)!out_end;
1356 
1357  return true;
1358  }
1359 
1360  template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1361  {
1362  const uint32_t* data = static_cast<const uint32_t*>(contents);
1363  size_t length = size / sizeof(uint32_t);
1364 
1365  // first pass: get length in utf8 units
1366  out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
1367 
1368  // allocate buffer of suitable length
1369  out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1370  if (!out_buffer) return false;
1371 
1372  // second pass: convert utf32 input to utf8
1373  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1374  uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
1375 
1376  assert(out_end == out_begin + out_length);
1377  (void)!out_end;
1378 
1379  return true;
1380  }
1381 
1382  PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
1383  {
1384  for (size_t i = 0; i < size; ++i)
1385  if (data[i] > 127)
1386  return i;
1387 
1388  return size;
1389  }
1390 
1391  PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1392  {
1393  const uint8_t* data = static_cast<const uint8_t*>(contents);
1394 
1395  // get size of prefix that does not need utf8 conversion
1396  size_t prefix_length = get_latin1_7bit_prefix_length(data, size);
1397  assert(prefix_length <= size);
1398 
1399  const uint8_t* postfix = data + prefix_length;
1400  size_t postfix_length = size - prefix_length;
1401 
1402  // if no conversion is needed, just return the original buffer
1403  if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1404 
1405  // first pass: get length in utf8 units
1406  out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
1407 
1408  // allocate buffer of suitable length
1409  out_buffer = static_cast<char_t*>(xml_memory::allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
1410  if (!out_buffer) return false;
1411 
1412  // second pass: convert latin1 input to utf8
1413  memcpy(out_buffer, data, prefix_length);
1414 
1415  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1416  uint8_t* out_end = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
1417 
1418  assert(out_end == out_begin + out_length);
1419  (void)!out_end;
1420 
1421  return true;
1422  }
1423 
1424  PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
1425  {
1426  // fast path: no conversion required
1427  if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1428 
1429  // source encoding is utf16
1430  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
1431  {
1432  xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1433 
1434  return (native_encoding == encoding) ?
1435  convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1436  convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1437  }
1438 
1439  // source encoding is utf32
1440  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
1441  {
1442  xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1443 
1444  return (native_encoding == encoding) ?
1445  convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1446  convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1447  }
1448 
1449  // source encoding is latin1
1450  if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
1451 
1452  assert(!"Invalid encoding");
1453  return false;
1454  }
1455 #endif
1456 
1457  PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
1458  {
1459  // get length in utf8 characters
1460  return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
1461  }
1462 
1463  PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
1464  {
1465  // convert to utf8
1466  uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
1467  uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
1468 
1469  assert(begin + size == end);
1470  (void)!end;
1471 
1472  // zero-terminate
1473  buffer[size] = 0;
1474  }
1475 
1476 #ifndef PUGIXML_NO_STL
1477  PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
1478  {
1479  // first pass: get length in utf8 characters
1480  size_t size = as_utf8_begin(str, length);
1481 
1482  // allocate resulting string
1483  std::string result;
1484  result.resize(size);
1485 
1486  // second pass: convert to utf8
1487  if (size > 0) as_utf8_end(&result[0], size, str, length);
1488 
1489  return result;
1490  }
1491 
1492  PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
1493  {
1494  const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
1495 
1496  // first pass: get length in wchar_t units
1497  size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1498 
1499  // allocate resulting string
1500  std::basic_string<wchar_t> result;
1501  result.resize(length);
1502 
1503  // second pass: convert to wchar_t
1504  if (length > 0)
1505  {
1506  wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
1507  wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
1508 
1509  assert(begin + length == end);
1510  (void)!end;
1511  }
1512 
1513  return result;
1514  }
1515 #endif
1516 
1517  inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
1518  {
1519  assert(target);
1520  size_t target_length = strlength(target);
1521 
1522  // always reuse document buffer memory if possible
1523  if (!allocated) return target_length >= length;
1524 
1525  // reuse heap memory if waste is not too great
1526  const size_t reuse_threshold = 32;
1527 
1528  return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
1529  }
1530 
1531  PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
1532  {
1533  size_t source_length = strlength(source);
1534 
1535  if (source_length == 0)
1536  {
1537  // empty string and null pointer are equivalent, so just deallocate old memory
1538  xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1539 
1540  if (header & header_mask) alloc->deallocate_string(dest);
1541 
1542  // mark the string as not allocated
1543  dest = 0;
1544  header &= ~header_mask;
1545 
1546  return true;
1547  }
1548  else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
1549  {
1550  // we can reuse old buffer, so just copy the new data (including zero terminator)
1551  memcpy(dest, source, (source_length + 1) * sizeof(char_t));
1552 
1553  return true;
1554  }
1555  else
1556  {
1557  xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
1558 
1559  // allocate new buffer
1560  char_t* buf = alloc->allocate_string(source_length + 1);
1561  if (!buf) return false;
1562 
1563  // copy the string (including zero terminator)
1564  memcpy(buf, source, (source_length + 1) * sizeof(char_t));
1565 
1566  // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
1567  if (header & header_mask) alloc->deallocate_string(dest);
1568 
1569  // the string is now allocated, so set the flag
1570  dest = buf;
1571  header |= header_mask;
1572 
1573  return true;
1574  }
1575  }
1576 
1577  struct gap
1578  {
1579  char_t* end;
1580  size_t size;
1581 
1582  gap(): end(0), size(0)
1583  {
1584  }
1585 
1586  // Push new gap, move s count bytes further (skipping the gap).
1587  // Collapse previous gap.
1588  void push(char_t*& s, size_t count)
1589  {
1590  if (end) // there was a gap already; collapse it
1591  {
1592  // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
1593  assert(s >= end);
1594  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1595  }
1596 
1597  s += count; // end of current gap
1598 
1599  // "merge" two gaps
1600  end = s;
1601  size += count;
1602  }
1603 
1604  // Collapse all gaps, return past-the-end pointer
1605  char_t* flush(char_t* s)
1606  {
1607  if (end)
1608  {
1609  // Move [old_gap_end, current_pos) to [old_gap_start, ...)
1610  assert(s >= end);
1611  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1612 
1613  return s - size;
1614  }
1615  else return s;
1616  }
1617  };
1618 
1619  PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
1620  {
1621  char_t* stre = s + 1;
1622 
1623  switch (*stre)
1624  {
1625  case '#': // &#...
1626  {
1627  unsigned int ucsc = 0;
1628 
1629  if (stre[1] == 'x') // &#x... (hex code)
1630  {
1631  stre += 2;
1632 
1633  char_t ch = *stre;
1634 
1635  if (ch == ';') return stre;
1636 
1637  for (;;)
1638  {
1639  if (static_cast<unsigned int>(ch - '0') <= 9)
1640  ucsc = 16 * ucsc + (ch - '0');
1641  else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
1642  ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
1643  else if (ch == ';')
1644  break;
1645  else // cancel
1646  return stre;
1647 
1648  ch = *++stre;
1649  }
1650 
1651  ++stre;
1652  }
1653  else // &#... (dec code)
1654  {
1655  char_t ch = *++stre;
1656 
1657  if (ch == ';') return stre;
1658 
1659  for (;;)
1660  {
1661  if (static_cast<unsigned int>(ch - '0') <= 9)
1662  ucsc = 10 * ucsc + (ch - '0');
1663  else if (ch == ';')
1664  break;
1665  else // cancel
1666  return stre;
1667 
1668  ch = *++stre;
1669  }
1670 
1671  ++stre;
1672  }
1673 
1674  #ifdef PUGIXML_WCHAR_MODE
1675  s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
1676  #else
1677  s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
1678  #endif
1679 
1680  g.push(s, stre - s);
1681  return stre;
1682  }
1683 
1684  case 'a': // &a
1685  {
1686  ++stre;
1687 
1688  if (*stre == 'm') // &am
1689  {
1690  if (*++stre == 'p' && *++stre == ';') // &amp;
1691  {
1692  *s++ = '&';
1693  ++stre;
1694 
1695  g.push(s, stre - s);
1696  return stre;
1697  }
1698  }
1699  else if (*stre == 'p') // &ap
1700  {
1701  if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
1702  {
1703  *s++ = '\'';
1704  ++stre;
1705 
1706  g.push(s, stre - s);
1707  return stre;
1708  }
1709  }
1710  break;
1711  }
1712 
1713  case 'g': // &g
1714  {
1715  if (*++stre == 't' && *++stre == ';') // &gt;
1716  {
1717  *s++ = '>';
1718  ++stre;
1719 
1720  g.push(s, stre - s);
1721  return stre;
1722  }
1723  break;
1724  }
1725 
1726  case 'l': // &l
1727  {
1728  if (*++stre == 't' && *++stre == ';') // &lt;
1729  {
1730  *s++ = '<';
1731  ++stre;
1732 
1733  g.push(s, stre - s);
1734  return stre;
1735  }
1736  break;
1737  }
1738 
1739  case 'q': // &q
1740  {
1741  if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
1742  {
1743  *s++ = '"';
1744  ++stre;
1745 
1746  g.push(s, stre - s);
1747  return stre;
1748  }
1749  break;
1750  }
1751 
1752  default:
1753  break;
1754  }
1755 
1756  return stre;
1757  }
1758 
1759  // Utility macro for last character handling
1760  #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
1761 
1762  PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
1763  {
1764  gap g;
1765 
1766  while (true)
1767  {
1768  while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;
1769 
1770  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1771  {
1772  *s++ = '\n'; // replace first one with 0x0a
1773 
1774  if (*s == '\n') g.push(s, 1);
1775  }
1776  else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
1777  {
1778  *g.flush(s) = 0;
1779 
1780  return s + (s[2] == '>' ? 3 : 2);
1781  }
1782  else if (*s == 0)
1783  {
1784  return 0;
1785  }
1786  else ++s;
1787  }
1788  }
1789 
1790  PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
1791  {
1792  gap g;
1793 
1794  while (true)
1795  {
1796  while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
1797 
1798  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1799  {
1800  *s++ = '\n'; // replace first one with 0x0a
1801 
1802  if (*s == '\n') g.push(s, 1);
1803  }
1804  else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
1805  {
1806  *g.flush(s) = 0;
1807 
1808  return s + 1;
1809  }
1810  else if (*s == 0)
1811  {
1812  return 0;
1813  }
1814  else ++s;
1815  }
1816  }
1817 
1818  typedef char_t* (*strconv_pcdata_t)(char_t*);
1819 
1820  template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
1821  {
1822  static char_t* parse(char_t* s)
1823  {
1824  gap g;
1825 
1826  while (true)
1827  {
1828  while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
1829 
1830  if (*s == '<') // PCDATA ends here
1831  {
1832  *g.flush(s) = 0;
1833 
1834  return s + 1;
1835  }
1836  else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1837  {
1838  *s++ = '\n'; // replace first one with 0x0a
1839 
1840  if (*s == '\n') g.push(s, 1);
1841  }
1842  else if (opt_escape::value && *s == '&')
1843  {
1844  s = strconv_escape(s, g);
1845  }
1846  else if (*s == 0)
1847  {
1848  return s;
1849  }
1850  else ++s;
1851  }
1852  }
1853  };
1854 
1855  PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
1856  {
1857  PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20);
1858 
1859  switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
1860  {
1861  case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
1862  case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
1863  case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
1864  case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
1865  default: return 0; // should not get here
1866  }
1867  }
1868 
1869  typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
1870 
1871  template <typename opt_escape> struct strconv_attribute_impl
1872  {
1873  static char_t* parse_wnorm(char_t* s, char_t end_quote)
1874  {
1875  gap g;
1876 
1877  // trim leading whitespaces
1878  if (PUGI__IS_CHARTYPE(*s, ct_space))
1879  {
1880  char_t* str = s;
1881 
1882  do ++str;
1883  while (PUGI__IS_CHARTYPE(*str, ct_space));
1884 
1885  g.push(s, str - s);
1886  }
1887 
1888  while (true)
1889  {
1890  while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
1891 
1892  if (*s == end_quote)
1893  {
1894  char_t* str = g.flush(s);
1895 
1896  do *str-- = 0;
1897  while (PUGI__IS_CHARTYPE(*str, ct_space));
1898 
1899  return s + 1;
1900  }
1901  else if (PUGI__IS_CHARTYPE(*s, ct_space))
1902  {
1903  *s++ = ' ';
1904 
1905  if (PUGI__IS_CHARTYPE(*s, ct_space))
1906  {
1907  char_t* str = s + 1;
1908  while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
1909 
1910  g.push(s, str - s);
1911  }
1912  }
1913  else if (opt_escape::value && *s == '&')
1914  {
1915  s = strconv_escape(s, g);
1916  }
1917  else if (!*s)
1918  {
1919  return 0;
1920  }
1921  else ++s;
1922  }
1923  }
1924 
1925  static char_t* parse_wconv(char_t* s, char_t end_quote)
1926  {
1927  gap g;
1928 
1929  while (true)
1930  {
1931  while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
1932 
1933  if (*s == end_quote)
1934  {
1935  *g.flush(s) = 0;
1936 
1937  return s + 1;
1938  }
1939  else if (PUGI__IS_CHARTYPE(*s, ct_space))
1940  {
1941  if (*s == '\r')
1942  {
1943  *s++ = ' ';
1944 
1945  if (*s == '\n') g.push(s, 1);
1946  }
1947  else *s++ = ' ';
1948  }
1949  else if (opt_escape::value && *s == '&')
1950  {
1951  s = strconv_escape(s, g);
1952  }
1953  else if (!*s)
1954  {
1955  return 0;
1956  }
1957  else ++s;
1958  }
1959  }
1960 
1961  static char_t* parse_eol(char_t* s, char_t end_quote)
1962  {
1963  gap g;
1964 
1965  while (true)
1966  {
1967  while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
1968 
1969  if (*s == end_quote)
1970  {
1971  *g.flush(s) = 0;
1972 
1973  return s + 1;
1974  }
1975  else if (*s == '\r')
1976  {
1977  *s++ = '\n';
1978 
1979  if (*s == '\n') g.push(s, 1);
1980  }
1981  else if (opt_escape::value && *s == '&')
1982  {
1983  s = strconv_escape(s, g);
1984  }
1985  else if (!*s)
1986  {
1987  return 0;
1988  }
1989  else ++s;
1990  }
1991  }
1992 
1993  static char_t* parse_simple(char_t* s, char_t end_quote)
1994  {
1995  gap g;
1996 
1997  while (true)
1998  {
1999  while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
2000 
2001  if (*s == end_quote)
2002  {
2003  *g.flush(s) = 0;
2004 
2005  return s + 1;
2006  }
2007  else if (opt_escape::value && *s == '&')
2008  {
2009  s = strconv_escape(s, g);
2010  }
2011  else if (!*s)
2012  {
2013  return 0;
2014  }
2015  else ++s;
2016  }
2017  }
2018  };
2019 
2020  PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2021  {
2022  PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2023 
2024  switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2025  {
2026  case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2027  case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2028  case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2029  case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2030  case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2031  case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2032  case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2033  case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2034  case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2035  case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2036  case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2037  case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2038  case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2039  case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2040  case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2041  case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2042  default: return 0; // should not get here
2043  }
2044  }
2045 
2046  inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2047  {
2048  xml_parse_result result;
2049  result.status = status;
2050  result.offset = offset;
2051 
2052  return result;
2053  }
2054 
2055  struct xml_parser
2056  {
2057  xml_allocator alloc;
2058  char_t* error_offset;
2059  xml_parse_status error_status;
2060 
2061  // Parser utilities.
2062  #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2063  #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2064  #define PUGI__PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2065  #define PUGI__POPNODE() { cursor = cursor->parent; }
2066  #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2067  #define PUGI__SCANWHILE(X) { while ((X)) ++s; }
2068  #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2069  #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2070  #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2071 
2072  xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2073  {
2074  }
2075 
2076  // DOCTYPE consists of nested sections of the following possible types:
2077  // <!-- ... -->, <? ... ?>, "...", '...'
2078  // <![...]]>
2079  // <!...>
2080  // First group can not contain nested groups
2081  // Second group can contain nested groups of the same type
2082  // Third group can contain all other groups
2083  char_t* parse_doctype_primitive(char_t* s)
2084  {
2085  if (*s == '"' || *s == '\'')
2086  {
2087  // quoted string
2088  char_t ch = *s++;
2089  PUGI__SCANFOR(*s == ch);
2090  if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2091 
2092  s++;
2093  }
2094  else if (s[0] == '<' && s[1] == '?')
2095  {
2096  // <? ... ?>
2097  s += 2;
2098  PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2099  if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2100 
2101  s += 2;
2102  }
2103  else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2104  {
2105  s += 4;
2106  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2107  if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2108 
2109  s += 4;
2110  }
2111  else PUGI__THROW_ERROR(status_bad_doctype, s);
2112 
2113  return s;
2114  }
2115 
2116  char_t* parse_doctype_ignore(char_t* s)
2117  {
2118  assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2119  s++;
2120 
2121  while (*s)
2122  {
2123  if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2124  {
2125  // nested ignore section
2126  s = parse_doctype_ignore(s);
2127  if (!s) return s;
2128  }
2129  else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2130  {
2131  // ignore section end
2132  s += 3;
2133 
2134  return s;
2135  }
2136  else s++;
2137  }
2138 
2139  PUGI__THROW_ERROR(status_bad_doctype, s);
2140  }
2141 
2142  char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
2143  {
2144  assert(s[0] == '<' && s[1] == '!');
2145  s++;
2146 
2147  while (*s)
2148  {
2149  if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2150  {
2151  if (s[2] == '[')
2152  {
2153  // ignore
2154  s = parse_doctype_ignore(s);
2155  if (!s) return s;
2156  }
2157  else
2158  {
2159  // some control group
2160  s = parse_doctype_group(s, endch, false);
2161  if (!s) return s;
2162  }
2163  }
2164  else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2165  {
2166  // unknown tag (forbidden), or some primitive group
2167  s = parse_doctype_primitive(s);
2168  if (!s) return s;
2169  }
2170  else if (*s == '>')
2171  {
2172  s++;
2173 
2174  return s;
2175  }
2176  else s++;
2177  }
2178 
2179  if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
2180 
2181  return s;
2182  }
2183 
2184  char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
2185  {
2186  // parse node contents, starting with exclamation mark
2187  ++s;
2188 
2189  if (*s == '-') // '<!-...'
2190  {
2191  ++s;
2192 
2193  if (*s == '-') // '<!--...'
2194  {
2195  ++s;
2196 
2197  if (PUGI__OPTSET(parse_comments))
2198  {
2199  PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
2200  cursor->value = s; // Save the offset.
2201  }
2202 
2203  if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
2204  {
2205  s = strconv_comment(s, endch);
2206 
2207  if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
2208  }
2209  else
2210  {
2211  // Scan for terminating '-->'.
2212  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
2213  PUGI__CHECK_ERROR(status_bad_comment, s);
2214 
2215  if (PUGI__OPTSET(parse_comments))
2216  *s = 0; // Zero-terminate this segment at the first terminating '-'.
2217 
2218  s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2219  }
2220  }
2221  else PUGI__THROW_ERROR(status_bad_comment, s);
2222  }
2223  else if (*s == '[')
2224  {
2225  // '<![CDATA[...'
2226  if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2227  {
2228  ++s;
2229 
2230  if (PUGI__OPTSET(parse_cdata))
2231  {
2232  PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
2233  cursor->value = s; // Save the offset.
2234 
2235  if (PUGI__OPTSET(parse_eol))
2236  {
2237  s = strconv_cdata(s, endch);
2238 
2239  if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
2240  }
2241  else
2242  {
2243  // Scan for terminating ']]>'.
2244  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2245  PUGI__CHECK_ERROR(status_bad_cdata, s);
2246 
2247  *s++ = 0; // Zero-terminate this segment.
2248  }
2249  }
2250  else // Flagged for discard, but we still have to scan for the terminator.
2251  {
2252  // Scan for terminating ']]>'.
2253  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2254  PUGI__CHECK_ERROR(status_bad_cdata, s);
2255 
2256  ++s;
2257  }
2258 
2259  s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
2260  }
2261  else PUGI__THROW_ERROR(status_bad_cdata, s);
2262  }
2263  else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
2264  {
2265  s -= 2;
2266 
2267  if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
2268 
2269  char_t* mark = s + 9;
2270 
2271  s = parse_doctype_group(s, endch, true);
2272  if (!s) return s;
2273 
2274  if (PUGI__OPTSET(parse_doctype))
2275  {
2276  while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
2277 
2278  PUGI__PUSHNODE(node_doctype);
2279 
2280  cursor->value = mark;
2281 
2282  assert((s[0] == 0 && endch == '>') || s[-1] == '>');
2283  s[*s == 0 ? 0 : -1] = 0;
2284 
2285  PUGI__POPNODE();
2286  }
2287  }
2288  else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
2289  else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
2290  else PUGI__THROW_ERROR(status_unrecognized_tag, s);
2291 
2292  return s;
2293  }
2294 
2295  char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
2296  {
2297  // load into registers
2298  xml_node_struct* cursor = ref_cursor;
2299  char_t ch = 0;
2300 
2301  // parse node contents, starting with question mark
2302  ++s;
2303 
2304  // read PI target
2305  char_t* target = s;
2306 
2307  if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
2308 
2309  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
2310  PUGI__CHECK_ERROR(status_bad_pi, s);
2311 
2312  // determine node type; stricmp / strcasecmp is not portable
2313  bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
2314 
2315  if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
2316  {
2317  if (declaration)
2318  {
2319  // disallow non top-level declarations
2320  if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
2321 
2322  PUGI__PUSHNODE(node_declaration);
2323  }
2324  else
2325  {
2326  PUGI__PUSHNODE(node_pi);
2327  }
2328 
2329  cursor->name = target;
2330 
2331  PUGI__ENDSEG();
2332 
2333  // parse value/attributes
2334  if (ch == '?')
2335  {
2336  // empty node
2337  if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
2338  s += (*s == '>');
2339 
2340  PUGI__POPNODE();
2341  }
2342  else if (PUGI__IS_CHARTYPE(ch, ct_space))
2343  {
2344  PUGI__SKIPWS();
2345 
2346  // scan for tag end
2347  char_t* value = s;
2348 
2349  PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2350  PUGI__CHECK_ERROR(status_bad_pi, s);
2351 
2352  if (declaration)
2353  {
2354  // replace ending ? with / so that 'element' terminates properly
2355  *s = '/';
2356 
2357  // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
2358  s = value;
2359  }
2360  else
2361  {
2362  // store value and step over >
2363  cursor->value = value;
2364  PUGI__POPNODE();
2365 
2366  PUGI__ENDSEG();
2367 
2368  s += (*s == '>');
2369  }
2370  }
2371  else PUGI__THROW_ERROR(status_bad_pi, s);
2372  }
2373  else
2374  {
2375  // scan for tag end
2376  PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2377  PUGI__CHECK_ERROR(status_bad_pi, s);
2378 
2379  s += (s[1] == '>' ? 2 : 1);
2380  }
2381 
2382  // store from registers
2383  ref_cursor = cursor;
2384 
2385  return s;
2386  }
2387 
2388  char_t* parse(char_t* s, xml_node_struct* xmldoc, unsigned int optmsk, char_t endch)
2389  {
2390  strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
2391  strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
2392 
2393  char_t ch = 0;
2394  xml_node_struct* cursor = xmldoc;
2395  char_t* mark = s;
2396 
2397  while (*s != 0)
2398  {
2399  if (*s == '<')
2400  {
2401  ++s;
2402 
2403  LOC_TAG:
2404  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
2405  {
2406  PUGI__PUSHNODE(node_element); // Append a new node to the tree.
2407 
2408  cursor->name = s;
2409 
2410  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2411  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2412 
2413  if (ch == '>')
2414  {
2415  // end of tag
2416  }
2417  else if (PUGI__IS_CHARTYPE(ch, ct_space))
2418  {
2419  LOC_ATTRIBUTES:
2420  while (true)
2421  {
2422  PUGI__SKIPWS(); // Eat any whitespace.
2423 
2424  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
2425  {
2426  xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
2427  if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
2428 
2429  a->name = s; // Save the offset.
2430 
2431  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2432  PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2433 
2434  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2435  PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2436 
2437  if (PUGI__IS_CHARTYPE(ch, ct_space))
2438  {
2439  PUGI__SKIPWS(); // Eat any whitespace.
2440  PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
2441 
2442  ch = *s;
2443  ++s;
2444  }
2445 
2446  if (ch == '=') // '<... #=...'
2447  {
2448  PUGI__SKIPWS(); // Eat any whitespace.
2449 
2450  if (*s == '"' || *s == '\'') // '<... #="...'
2451  {
2452  ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
2453  ++s; // Step over the quote.
2454  a->value = s; // Save the offset.
2455 
2456  s = strconv_attribute(s, ch);
2457 
2458  if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
2459 
2460  // After this line the loop continues from the start;
2461  // Whitespaces, / and > are ok, symbols and EOF are wrong,
2462  // everything else will be detected
2463  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
2464  }
2465  else PUGI__THROW_ERROR(status_bad_attribute, s);
2466  }
2467  else PUGI__THROW_ERROR(status_bad_attribute, s);
2468  }
2469  else if (*s == '/')
2470  {
2471  ++s;
2472 
2473  if (*s == '>')
2474  {
2475  PUGI__POPNODE();
2476  s++;
2477  break;
2478  }
2479  else if (*s == 0 && endch == '>')
2480  {
2481  PUGI__POPNODE();
2482  break;
2483  }
2484  else PUGI__THROW_ERROR(status_bad_start_element, s);
2485  }
2486  else if (*s == '>')
2487  {
2488  ++s;
2489 
2490  break;
2491  }
2492  else if (*s == 0 && endch == '>')
2493  {
2494  break;
2495  }
2496  else PUGI__THROW_ERROR(status_bad_start_element, s);
2497  }
2498 
2499  // !!!
2500  }
2501  else if (ch == '/') // '<#.../'
2502  {
2503  if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
2504 
2505  PUGI__POPNODE(); // Pop.
2506 
2507  s += (*s == '>');
2508  }
2509  else if (ch == 0)
2510  {
2511  // we stepped over null terminator, backtrack & handle closing tag
2512  --s;
2513 
2514  if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
2515  }
2516  else PUGI__THROW_ERROR(status_bad_start_element, s);
2517  }
2518  else if (*s == '/')
2519  {
2520  ++s;
2521 
2522  char_t* name = cursor->name;
2523  if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2524 
2525  while (PUGI__IS_CHARTYPE(*s, ct_symbol))
2526  {
2527  if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2528  }
2529 
2530  if (*name)
2531  {
2532  if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
2533  else PUGI__THROW_ERROR(status_end_element_mismatch, s);
2534  }
2535 
2536  PUGI__POPNODE(); // Pop.
2537 
2538  PUGI__SKIPWS();
2539 
2540  if (*s == 0)
2541  {
2542  if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
2543  }
2544  else
2545  {
2546  if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
2547  ++s;
2548  }
2549  }
2550  else if (*s == '?') // '<?...'
2551  {
2552  s = parse_question(s, cursor, optmsk, endch);
2553  if (!s) return s;
2554 
2555  assert(cursor);
2556  if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
2557  }
2558  else if (*s == '!') // '<!...'
2559  {
2560  s = parse_exclamation(s, cursor, optmsk, endch);
2561  if (!s) return s;
2562  }
2563  else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
2564  else PUGI__THROW_ERROR(status_unrecognized_tag, s);
2565  }
2566  else
2567  {
2568  mark = s; // Save this offset while searching for a terminator.
2569 
2570  PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
2571 
2572  if (*s == '<')
2573  {
2574  // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
2575  assert(mark != s);
2576 
2577  if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single))
2578  {
2579  continue;
2580  }
2581  else if (PUGI__OPTSET(parse_ws_pcdata_single))
2582  {
2583  if (s[1] != '/' || cursor->first_child) continue;
2584  }
2585  }
2586 
2587  s = mark;
2588 
2589  if (cursor->parent)
2590  {
2591  PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
2592  cursor->value = s; // Save the offset.
2593 
2594  s = strconv_pcdata(s);
2595 
2596  PUGI__POPNODE(); // Pop since this is a standalone.
2597 
2598  if (!*s) break;
2599  }
2600  else
2601  {
2602  PUGI__SCANFOR(*s == '<'); // '...<'
2603  if (!*s) break;
2604 
2605  ++s;
2606  }
2607 
2608  // We're after '<'
2609  goto LOC_TAG;
2610  }
2611  }
2612 
2613  // check that last tag is closed
2614  if (cursor != xmldoc) PUGI__THROW_ERROR(status_end_element_mismatch, s);
2615 
2616  return s;
2617  }
2618 
2619  static xml_parse_result parse(char_t* buffer, size_t length, xml_node_struct* root, unsigned int optmsk)
2620  {
2621  xml_document_struct* xmldoc = static_cast<xml_document_struct*>(root);
2622 
2623  // store buffer for offset_debug
2624  xmldoc->buffer = buffer;
2625 
2626  // early-out for empty documents
2627  if (length == 0) return make_parse_result(status_ok);
2628 
2629  // create parser on stack
2630  xml_parser parser(*xmldoc);
2631 
2632  // save last character and make buffer zero-terminated (speeds up parsing)
2633  char_t endch = buffer[length - 1];
2634  buffer[length - 1] = 0;
2635 
2636  // perform actual parsing
2637  parser.parse(buffer, xmldoc, optmsk, endch);
2638 
2639  xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
2640  assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
2641 
2642  // update allocator state
2643  *static_cast<xml_allocator*>(xmldoc) = parser.alloc;
2644 
2645  // since we removed last character, we have to handle the only possible false positive
2646  if (result && endch == '<')
2647  {
2648  // there's no possible well-formed document with < at the end
2649  return make_parse_result(status_unrecognized_tag, length);
2650  }
2651 
2652  return result;
2653  }
2654  };
2655 
2656  // Output facilities
2657  PUGI__FN xml_encoding get_write_native_encoding()
2658  {
2659  #ifdef PUGIXML_WCHAR_MODE
2660  return get_wchar_encoding();
2661  #else
2662  return encoding_utf8;
2663  #endif
2664  }
2665 
2666  PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
2667  {
2668  // replace wchar encoding with utf implementation
2669  if (encoding == encoding_wchar) return get_wchar_encoding();
2670 
2671  // replace utf16 encoding with utf16 with specific endianness
2672  if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2673 
2674  // replace utf32 encoding with utf32 with specific endianness
2675  if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2676 
2677  // only do autodetection if no explicit encoding is requested
2678  if (encoding != encoding_auto) return encoding;
2679 
2680  // assume utf8 encoding
2681  return encoding_utf8;
2682  }
2683 
2684 #ifdef PUGIXML_WCHAR_MODE
2685  PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
2686  {
2687  assert(length > 0);
2688 
2689  // discard last character if it's the lead of a surrogate pair
2690  return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
2691  }
2692 
2693  PUGI__FN size_t convert_buffer(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
2694  {
2695  // only endian-swapping is required
2696  if (need_endian_swap_utf(encoding, get_wchar_encoding()))
2697  {
2698  convert_wchar_endian_swap(r_char, data, length);
2699 
2700  return length * sizeof(char_t);
2701  }
2702 
2703  // convert to utf8
2704  if (encoding == encoding_utf8)
2705  {
2706  uint8_t* dest = r_u8;
2707  uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
2708 
2709  return static_cast<size_t>(end - dest);
2710  }
2711 
2712  // convert to utf16
2713  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2714  {
2715  uint16_t* dest = r_u16;
2716 
2717  // convert to native utf16
2718  uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
2719 
2720  // swap if necessary
2721  xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2722 
2723  if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2724 
2725  return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2726  }
2727 
2728  // convert to utf32
2729  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2730  {
2731  uint32_t* dest = r_u32;
2732 
2733  // convert to native utf32
2734  uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
2735 
2736  // swap if necessary
2737  xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2738 
2739  if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2740 
2741  return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2742  }
2743 
2744  // convert to latin1
2745  if (encoding == encoding_latin1)
2746  {
2747  uint8_t* dest = r_u8;
2748  uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
2749 
2750  return static_cast<size_t>(end - dest);
2751  }
2752 
2753  assert(!"Invalid encoding");
2754  return 0;
2755  }
2756 #else
2757  PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
2758  {
2759  assert(length > 4);
2760 
2761  for (size_t i = 1; i <= 4; ++i)
2762  {
2763  uint8_t ch = static_cast<uint8_t>(data[length - i]);
2764 
2765  // either a standalone character or a leading one
2766  if ((ch & 0xc0) != 0x80) return length - i;
2767  }
2768 
2769  // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
2770  return length;
2771  }
2772 
2773  PUGI__FN size_t convert_buffer(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
2774  {
2775  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2776  {
2777  uint16_t* dest = r_u16;
2778 
2779  // convert to native utf16
2780  uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2781 
2782  // swap if necessary
2783  xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2784 
2785  if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2786 
2787  return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2788  }
2789 
2790  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2791  {
2792  uint32_t* dest = r_u32;
2793 
2794  // convert to native utf32
2795  uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2796 
2797  // swap if necessary
2798  xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2799 
2800  if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2801 
2802  return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2803  }
2804 
2805  if (encoding == encoding_latin1)
2806  {
2807  uint8_t* dest = r_u8;
2808  uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2809 
2810  return static_cast<size_t>(end - dest);
2811  }
2812 
2813  assert(!"Invalid encoding");
2814  return 0;
2815  }
2816 #endif
2817 
2818  class xml_buffered_writer
2819  {
2820  xml_buffered_writer(const xml_buffered_writer&);
2821  xml_buffered_writer& operator=(const xml_buffered_writer&);
2822 
2823  public:
2824  xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
2825  {
2826  PUGI__STATIC_ASSERT(bufcapacity >= 8);
2827  }
2828 
2829  ~xml_buffered_writer()
2830  {
2831  flush();
2832  }
2833 
2834  void flush()
2835  {
2836  flush(buffer, bufsize);
2837  bufsize = 0;
2838  }
2839 
2840  void flush(const char_t* data, size_t size)
2841  {
2842  if (size == 0) return;
2843 
2844  // fast path, just write data
2845  if (encoding == get_write_native_encoding())
2846  writer.write(data, size * sizeof(char_t));
2847  else
2848  {
2849  // convert chunk
2850  size_t result = convert_buffer(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
2851  assert(result <= sizeof(scratch));
2852 
2853  // write data
2854  writer.write(scratch.data_u8, result);
2855  }
2856  }
2857 
2858  void write(const char_t* data, size_t length)
2859  {
2860  if (bufsize + length > bufcapacity)
2861  {
2862  // flush the remaining buffer contents
2863  flush();
2864 
2865  // handle large chunks
2866  if (length > bufcapacity)
2867  {
2868  if (encoding == get_write_native_encoding())
2869  {
2870  // fast path, can just write data chunk
2871  writer.write(data, length * sizeof(char_t));
2872  return;
2873  }
2874 
2875  // need to convert in suitable chunks
2876  while (length > bufcapacity)
2877  {
2878  // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
2879  // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
2880  size_t chunk_size = get_valid_length(data, bufcapacity);
2881 
2882  // convert chunk and write
2883  flush(data, chunk_size);
2884 
2885  // iterate
2886  data += chunk_size;
2887  length -= chunk_size;
2888  }
2889 
2890  // small tail is copied below
2891  bufsize = 0;
2892  }
2893  }
2894 
2895  memcpy(buffer + bufsize, data, length * sizeof(char_t));
2896  bufsize += length;
2897  }
2898 
2899  void write(const char_t* data)
2900  {
2901  write(data, strlength(data));
2902  }
2903 
2904  void write(char_t d0)
2905  {
2906  if (bufsize + 1 > bufcapacity) flush();
2907 
2908  buffer[bufsize + 0] = d0;
2909  bufsize += 1;
2910  }
2911 
2912  void write(char_t d0, char_t d1)
2913  {
2914  if (bufsize + 2 > bufcapacity) flush();
2915 
2916  buffer[bufsize + 0] = d0;
2917  buffer[bufsize + 1] = d1;
2918  bufsize += 2;
2919  }
2920 
2921  void write(char_t d0, char_t d1, char_t d2)
2922  {
2923  if (bufsize + 3 > bufcapacity) flush();
2924 
2925  buffer[bufsize + 0] = d0;
2926  buffer[bufsize + 1] = d1;
2927  buffer[bufsize + 2] = d2;
2928  bufsize += 3;
2929  }
2930 
2931  void write(char_t d0, char_t d1, char_t d2, char_t d3)
2932  {
2933  if (bufsize + 4 > bufcapacity) flush();
2934 
2935  buffer[bufsize + 0] = d0;
2936  buffer[bufsize + 1] = d1;
2937  buffer[bufsize + 2] = d2;
2938  buffer[bufsize + 3] = d3;
2939  bufsize += 4;
2940  }
2941 
2942  void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
2943  {
2944  if (bufsize + 5 > bufcapacity) flush();
2945 
2946  buffer[bufsize + 0] = d0;
2947  buffer[bufsize + 1] = d1;
2948  buffer[bufsize + 2] = d2;
2949  buffer[bufsize + 3] = d3;
2950  buffer[bufsize + 4] = d4;
2951  bufsize += 5;
2952  }
2953 
2954  void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
2955  {
2956  if (bufsize + 6 > bufcapacity) flush();
2957 
2958  buffer[bufsize + 0] = d0;
2959  buffer[bufsize + 1] = d1;
2960  buffer[bufsize + 2] = d2;
2961  buffer[bufsize + 3] = d3;
2962  buffer[bufsize + 4] = d4;
2963  buffer[bufsize + 5] = d5;
2964  bufsize += 6;
2965  }
2966 
2967  // utf8 maximum expansion: x4 (-> utf32)
2968  // utf16 maximum expansion: x2 (-> utf32)
2969  // utf32 maximum expansion: x1
2970  enum
2971  {
2972  bufcapacitybytes =
2973  #ifdef PUGIXML_MEMORY_OUTPUT_STACK
2974  PUGIXML_MEMORY_OUTPUT_STACK
2975  #else
2976  10240
2977  #endif
2978  ,
2979  bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
2980  };
2981 
2982  char_t buffer[bufcapacity];
2983 
2984  union
2985  {
2986  uint8_t data_u8[4 * bufcapacity];
2987  uint16_t data_u16[2 * bufcapacity];
2988  uint32_t data_u32[bufcapacity];
2989  char_t data_char[bufcapacity];
2990  } scratch;
2991 
2992  xml_writer& writer;
2993  size_t bufsize;
2994  xml_encoding encoding;
2995  };
2996 
2997  PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
2998  {
2999  while (*s)
3000  {
3001  const char_t* prev = s;
3002 
3003  // While *s is a usual symbol
3004  while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
3005 
3006  writer.write(prev, static_cast<size_t>(s - prev));
3007 
3008  switch (*s)
3009  {
3010  case 0: break;
3011  case '&':
3012  writer.write('&', 'a', 'm', 'p', ';');
3013  ++s;
3014  break;
3015  case '<':
3016  writer.write('&', 'l', 't', ';');
3017  ++s;
3018  break;
3019  case '>':
3020  writer.write('&', 'g', 't', ';');
3021  ++s;
3022  break;
3023  case '"':
3024  writer.write('&', 'q', 'u', 'o', 't', ';');
3025  ++s;
3026  break;
3027  default: // s is not a usual symbol
3028  {
3029  unsigned int ch = static_cast<unsigned int>(*s++);
3030  assert(ch < 32);
3031 
3032  writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3033  }
3034  }
3035  }
3036  }
3037 
3038  PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3039  {
3040  if (flags & format_no_escapes)
3041  writer.write(s);
3042  else
3043  text_output_escaped(writer, s, type);
3044  }
3045 
3046  PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3047  {
3048  do
3049  {
3050  writer.write('<', '!', '[', 'C', 'D');
3051  writer.write('A', 'T', 'A', '[');
3052 
3053  const char_t* prev = s;
3054 
3055  // look for ]]> sequence - we can't output it as is since it terminates CDATA
3056  while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3057 
3058  // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3059  if (*s) s += 2;
3060 
3061  writer.write(prev, static_cast<size_t>(s - prev));
3062 
3063  writer.write(']', ']', '>');
3064  }
3065  while (*s);
3066  }
3067 
3068  PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
3069  {
3070  const char_t* default_name = PUGIXML_TEXT(":anonymous");
3071 
3072  for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
3073  {
3074  writer.write(' ');
3075  writer.write(a.name()[0] ? a.name() : default_name);
3076  writer.write('=', '"');
3077 
3078  text_output(writer, a.value(), ctx_special_attr, flags);
3079 
3080  writer.write('"');
3081  }
3082  }
3083 
3084  PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
3085  {
3086  const char_t* default_name = PUGIXML_TEXT(":anonymous");
3087 
3088  if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
3089  for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
3090 
3091  switch (node.type())
3092  {
3093  case node_document:
3094  {
3095  for (xml_node n = node.first_child(); n; n = n.next_sibling())
3096  node_output(writer, n, indent, flags, depth);
3097  break;
3098  }
3099 
3100  case node_element:
3101  {
3102  const char_t* name = node.name()[0] ? node.name() : default_name;
3103 
3104  writer.write('<');
3105  writer.write(name);
3106 
3107  node_output_attributes(writer, node, flags);
3108 
3109  if (flags & format_raw)
3110  {
3111  if (!node.first_child())
3112  writer.write(' ', '/', '>');
3113  else
3114  {
3115  writer.write('>');
3116 
3117  for (xml_node n = node.first_child(); n; n = n.next_sibling())
3118  node_output(writer, n, indent, flags, depth + 1);
3119 
3120  writer.write('<', '/');
3121  writer.write(name);
3122  writer.write('>');
3123  }
3124  }
3125  else if (!node.first_child())
3126  writer.write(' ', '/', '>', '\n');
3127  else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
3128  {
3129  writer.write('>');
3130 
3131  if (node.first_child().type() == node_pcdata)
3132  text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
3133  else
3134  text_output_cdata(writer, node.first_child().value());
3135 
3136  writer.write('<', '/');
3137  writer.write(name);
3138  writer.write('>', '\n');
3139  }
3140  else
3141  {
3142  writer.write('>', '\n');
3143 
3144  for (xml_node n = node.first_child(); n; n = n.next_sibling())
3145  node_output(writer, n, indent, flags, depth + 1);
3146 
3147  if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
3148  for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
3149 
3150  writer.write('<', '/');
3151  writer.write(name);
3152  writer.write('>', '\n');
3153  }
3154 
3155  break;
3156  }
3157 
3158  case node_pcdata:
3159  text_output(writer, node.value(), ctx_special_pcdata, flags);
3160  if ((flags & format_raw) == 0) writer.write('\n');
3161  break;
3162 
3163  case node_cdata:
3164  text_output_cdata(writer, node.value());
3165  if ((flags & format_raw) == 0) writer.write('\n');
3166  break;
3167 
3168  case node_comment:
3169  writer.write('<', '!', '-', '-');
3170  writer.write(node.value());
3171  writer.write('-', '-', '>');
3172  if ((flags & format_raw) == 0) writer.write('\n');
3173  break;
3174 
3175  case node_pi:
3176  case node_declaration:
3177  writer.write('<', '?');
3178  writer.write(node.name()[0] ? node.name() : default_name);
3179 
3180  if (node.type() == node_declaration)
3181  {
3182  node_output_attributes(writer, node, flags);
3183  }
3184  else if (node.value()[0])
3185  {
3186  writer.write(' ');
3187  writer.write(node.value());
3188  }
3189 
3190  writer.write('?', '>');
3191  if ((flags & format_raw) == 0) writer.write('\n');
3192  break;
3193 
3194  case node_doctype:
3195  writer.write('<', '!', 'D', 'O', 'C');
3196  writer.write('T', 'Y', 'P', 'E');
3197 
3198  if (node.value()[0])
3199  {
3200  writer.write(' ');
3201  writer.write(node.value());
3202  }
3203 
3204  writer.write('>');
3205  if ((flags & format_raw) == 0) writer.write('\n');
3206  break;
3207 
3208  default:
3209  assert(!"Invalid node type");
3210  }
3211  }
3212 
3213  inline bool has_declaration(const xml_node& node)
3214  {
3215  for (xml_node child = node.first_child(); child; child = child.next_sibling())
3216  {
3217  xml_node_type type = child.type();
3218 
3219  if (type == node_declaration) return true;
3220  if (type == node_element) return false;
3221  }
3222 
3223  return false;
3224  }
3225 
3226  inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
3227  {
3228  if (parent != node_document && parent != node_element) return false;
3229  if (child == node_document || child == node_null) return false;
3230  if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
3231 
3232  return true;
3233  }
3234 
3235  PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
3236  {
3237  assert(dest.type() == source.type());
3238 
3239  switch (source.type())
3240  {
3241  case node_element:
3242  {
3243  dest.set_name(source.name());
3244 
3245  for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3246  dest.append_attribute(a.name()).set_value(a.value());
3247 
3248  for (xml_node c = source.first_child(); c; c = c.next_sibling())
3249  {
3250  if (c == skip) continue;
3251 
3252  xml_node cc = dest.append_child(c.type());
3253  assert(cc);
3254 
3255  recursive_copy_skip(cc, c, skip);
3256  }
3257 
3258  break;
3259  }
3260 
3261  case node_pcdata:
3262  case node_cdata:
3263  case node_comment:
3264  case node_doctype:
3265  dest.set_value(source.value());
3266  break;
3267 
3268  case node_pi:
3269  dest.set_name(source.name());
3270  dest.set_value(source.value());
3271  break;
3272 
3273  case node_declaration:
3274  {
3275  dest.set_name(source.name());
3276 
3277  for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
3278  dest.append_attribute(a.name()).set_value(a.value());
3279 
3280  break;
3281  }
3282 
3283  default:
3284  assert(!"Invalid node type");
3285  }
3286  }
3287 
3288  inline bool is_text_node(xml_node_struct* node)
3289  {
3290  xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
3291 
3292  return type == node_pcdata || type == node_cdata;
3293  }
3294 
3295  // get value with conversion functions
3296  PUGI__FN int get_value_int(const char_t* value, int def)
3297  {
3298  if (!value) return def;
3299 
3300  #ifdef PUGIXML_WCHAR_MODE
3301  return static_cast<int>(wcstol(value, 0, 10));
3302  #else
3303  return static_cast<int>(strtol(value, 0, 10));
3304  #endif
3305  }
3306 
3307  PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
3308  {
3309  if (!value) return def;
3310 
3311  #ifdef PUGIXML_WCHAR_MODE
3312  return static_cast<unsigned int>(wcstoul(value, 0, 10));
3313  #else
3314  return static_cast<unsigned int>(strtoul(value, 0, 10));
3315  #endif
3316  }
3317 
3318  PUGI__FN double get_value_double(const char_t* value, double def)
3319  {
3320  if (!value) return def;
3321 
3322  #ifdef PUGIXML_WCHAR_MODE
3323  return wcstod(value, 0);
3324  #else
3325  return strtod(value, 0);
3326  #endif
3327  }
3328 
3329  PUGI__FN float get_value_float(const char_t* value, float def)
3330  {
3331  if (!value) return def;
3332 
3333  #ifdef PUGIXML_WCHAR_MODE
3334  return static_cast<float>(wcstod(value, 0));
3335  #else
3336  return static_cast<float>(strtod(value, 0));
3337  #endif
3338  }
3339 
3340  PUGI__FN bool get_value_bool(const char_t* value, bool def)
3341  {
3342  if (!value) return def;
3343 
3344  // only look at first char
3345  char_t first = *value;
3346 
3347  // 1*, t* (true), T* (True), y* (yes), Y* (YES)
3348  return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
3349  }
3350 
3351  // set value with conversion functions
3352  PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
3353  {
3354  #ifdef PUGIXML_WCHAR_MODE
3355  char_t wbuf[128];
3356  impl::widen_ascii(wbuf, buf);
3357 
3358  return strcpy_insitu(dest, header, header_mask, wbuf);
3359  #else
3360  return strcpy_insitu(dest, header, header_mask, buf);
3361  #endif
3362  }
3363 
3364  PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
3365  {
3366  char buf[128];
3367  sprintf(buf, "%d", value);
3368 
3369  return set_value_buffer(dest, header, header_mask, buf);
3370  }
3371 
3372  PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
3373  {
3374  char buf[128];
3375  sprintf(buf, "%u", value);
3376 
3377  return set_value_buffer(dest, header, header_mask, buf);
3378  }
3379 
3380  PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
3381  {
3382  char buf[128];
3383  sprintf(buf, "%g", value);
3384 
3385  return set_value_buffer(dest, header, header_mask, buf);
3386  }
3387 
3388  PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
3389  {
3390  return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
3391  }
3392 
3393  // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
3394  PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
3395  {
3396  #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
3397  // there are 64-bit versions of fseek/ftell, let's use them
3398  typedef __int64 length_type;
3399 
3400  _fseeki64(file, 0, SEEK_END);
3401  length_type length = _ftelli64(file);
3402  _fseeki64(file, 0, SEEK_SET);
3403  #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
3404  // there are 64-bit versions of fseek/ftell, let's use them
3405  typedef off64_t length_type;
3406 
3407  fseeko64(file, 0, SEEK_END);
3408  length_type length = ftello64(file);
3409  fseeko64(file, 0, SEEK_SET);
3410  #else
3411  // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
3412  typedef long length_type;
3413 
3414  fseek(file, 0, SEEK_END);
3415  length_type length = ftell(file);
3416  fseek(file, 0, SEEK_SET);
3417  #endif
3418 
3419  // check for I/O errors
3420  if (length < 0) return status_io_error;
3421 
3422  // check for overflow
3423  size_t result = static_cast<size_t>(length);
3424 
3425  if (static_cast<length_type>(result) != length) return status_out_of_memory;
3426 
3427  // finalize
3428  out_result = result;
3429 
3430  return status_ok;
3431  }
3432 
3433  PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
3434  {
3435  if (!file) return make_parse_result(status_file_not_found);
3436 
3437  // get file size (can result in I/O errors)
3438  size_t size = 0;
3439  xml_parse_status size_status = get_file_size(file, size);
3440 
3441  if (size_status != status_ok)
3442  {
3443  fclose(file);
3444  return make_parse_result(size_status);
3445  }
3446 
3447  // allocate buffer for the whole file
3448  char* contents = static_cast<char*>(xml_memory::allocate(size > 0 ? size : 1));
3449 
3450  if (!contents)
3451  {
3452  fclose(file);
3453  return make_parse_result(status_out_of_memory);
3454  }
3455 
3456  // read file in memory
3457  size_t read_size = fread(contents, 1, size, file);
3458  fclose(file);
3459 
3460  if (read_size != size)
3461  {
3462  xml_memory::deallocate(contents);
3463  return make_parse_result(status_io_error);
3464  }
3465 
3466  return doc.load_buffer_inplace_own(contents, size, options, encoding);
3467  }
3468 
3469 #ifndef PUGIXML_NO_STL
3470  template <typename T> struct xml_stream_chunk
3471  {
3472  static xml_stream_chunk* create()
3473  {
3474  void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
3475 
3476  return new (memory) xml_stream_chunk();
3477  }
3478 
3479  static void destroy(void* ptr)
3480  {
3481  xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
3482 
3483  // free chunk chain
3484  while (chunk)
3485  {
3486  xml_stream_chunk* next = chunk->next;
3487  xml_memory::deallocate(chunk);
3488  chunk = next;
3489  }
3490  }
3491 
3492  xml_stream_chunk(): next(0), size(0)
3493  {
3494  }
3495 
3496  xml_stream_chunk* next;
3497  size_t size;
3498 
3499  T data[xml_memory_page_size / sizeof(T)];
3500  };
3501 
3502  template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
3503  {
3504  buffer_holder chunks(0, xml_stream_chunk<T>::destroy);
3505 
3506  // read file to a chunk list
3507  size_t total = 0;
3508  xml_stream_chunk<T>* last = 0;
3509 
3510  while (!stream.eof())
3511  {
3512  // allocate new chunk
3513  xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
3514  if (!chunk) return status_out_of_memory;
3515 
3516  // append chunk to list
3517  if (last) last = last->next = chunk;
3518  else chunks.data = last = chunk;
3519 
3520  // read data to chunk
3521  stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
3522  chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
3523 
3524  // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
3525  if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
3526 
3527  // guard against huge files (chunk size is small enough to make this overflow check work)
3528  if (total + chunk->size < total) return status_out_of_memory;
3529  total += chunk->size;
3530  }
3531 
3532  // copy chunk list to a contiguous buffer
3533  char* buffer = static_cast<char*>(xml_memory::allocate(total));
3534  if (!buffer) return status_out_of_memory;
3535 
3536  char* write = buffer;
3537 
3538  for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)
3539  {
3540  assert(write + chunk->size <= buffer + total);
3541  memcpy(write, chunk->data, chunk->size);
3542  write += chunk->size;
3543  }
3544 
3545  assert(write == buffer + total);
3546 
3547  // return buffer
3548  *out_buffer = buffer;
3549  *out_size = total;
3550 
3551  return status_ok;
3552  }
3553 
3554  template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
3555  {
3556  // get length of remaining data in stream
3557  typename std::basic_istream<T>::pos_type pos = stream.tellg();
3558  stream.seekg(0, std::ios::end);
3559  std::streamoff length = stream.tellg() - pos;
3560  stream.seekg(pos);
3561 
3562  if (stream.fail() || pos < 0) return status_io_error;
3563 
3564  // guard against huge files
3565  size_t read_length = static_cast<size_t>(length);
3566 
3567  if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
3568 
3569  // read stream data into memory (guard against stream exceptions with buffer holder)
3570  buffer_holder buffer(xml_memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), xml_memory::deallocate);
3571  if (!buffer.data) return status_out_of_memory;
3572 
3573  stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
3574 
3575  // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
3576  if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
3577 
3578  // return buffer
3579  size_t actual_length = static_cast<size_t>(stream.gcount());
3580  assert(actual_length <= read_length);
3581 
3582  *out_buffer = buffer.release();
3583  *out_size = actual_length * sizeof(T);
3584 
3585  return status_ok;
3586  }
3587 
3588  template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
3589  {
3590  void* buffer = 0;
3591  size_t size = 0;
3592 
3593  // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
3594  xml_parse_status status = (stream.tellg() < 0) ? load_stream_data_noseek(stream, &buffer, &size) : load_stream_data_seek(stream, &buffer, &size);
3595  if (status != status_ok) return make_parse_result(status);
3596 
3597  return doc.load_buffer_inplace_own(buffer, size, options, encoding);
3598  }
3599 #endif
3600 
3601 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
3602  PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
3603  {
3604  return _wfopen(path, mode);
3605  }
3606 #else
3607  PUGI__FN char* convert_path_heap(const wchar_t* str)
3608  {
3609  assert(str);
3610 
3611  // first pass: get length in utf8 characters
3612  size_t length = wcslen(str);
3613  size_t size = as_utf8_begin(str, length);
3614 
3615  // allocate resulting string
3616  char* result = static_cast<char*>(xml_memory::allocate(size + 1));
3617  if (!result) return 0;
3618 
3619  // second pass: convert to utf8
3620  as_utf8_end(result, size, str, length);
3621 
3622  return result;
3623  }
3624 
3625  PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
3626  {
3627  // there is no standard function to open wide paths, so our best bet is to try utf8 path
3628  char* path_utf8 = convert_path_heap(path);
3629  if (!path_utf8) return 0;
3630 
3631  // convert mode to ASCII (we mirror _wfopen interface)
3632  char mode_ascii[4] = {0};
3633  for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
3634 
3635  // try to open the utf8 path
3636  FILE* result = fopen(path_utf8, mode_ascii);
3637 
3638  // free dummy buffer
3639  xml_memory::deallocate(path_utf8);
3640 
3641  return result;
3642  }
3643 #endif
3644 
3645  PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
3646  {
3647  if (!file) return false;
3648 
3649  xml_writer_file writer(file);
3650  doc.save(writer, indent, flags, encoding);
3651 
3652  int result = ferror(file);
3653 
3654  fclose(file);
3655 
3656  return result == 0;
3657  }
3658 PUGI__NS_END
3659 
3660 namespace pugi
3661 {
3662  PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
3663  {
3664  }
3665 
3666  PUGI__FN void xml_writer_file::write(const void* data, size_t size)
3667  {
3668  size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
3669  (void)!result; // unfortunately we can't do proper error handling here
3670  }
3671 
3672 #ifndef PUGIXML_NO_STL
3673  PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
3674  {
3675  }
3676 
3677  PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
3678  {
3679  }
3680 
3681  PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
3682  {
3683  if (narrow_stream)
3684  {
3685  assert(!wide_stream);
3686  narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
3687  }
3688  else
3689  {
3690  assert(wide_stream);
3691  assert(size % sizeof(wchar_t) == 0);
3692 
3693  wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
3694  }
3695  }
3696 #endif
3697 
3698  PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
3699  {
3700  }
3701 
3702  PUGI__FN xml_tree_walker::~xml_tree_walker()
3703  {
3704  }
3705 
3706  PUGI__FN int xml_tree_walker::depth() const
3707  {
3708  return _depth;
3709  }
3710 
3711  PUGI__FN bool xml_tree_walker::begin(xml_node&)
3712  {
3713  return true;
3714  }
3715 
3716  PUGI__FN bool xml_tree_walker::end(xml_node&)
3717  {
3718  return true;
3719  }
3720 
3721  PUGI__FN xml_attribute::xml_attribute(): _attr(0)
3722  {
3723  }
3724 
3725  PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
3726  {
3727  }
3728 
3729  PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
3730  {
3731  }
3732 
3733  PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
3734  {
3735  return _attr ? unspecified_bool_xml_attribute : 0;
3736  }
3737 
3738  PUGI__FN bool xml_attribute::operator!() const
3739  {
3740  return !_attr;
3741  }
3742 
3743  PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
3744  {
3745  return (_attr == r._attr);
3746  }
3747 
3748  PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
3749  {
3750  return (_attr != r._attr);
3751  }
3752 
3753  PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
3754  {
3755  return (_attr < r._attr);
3756  }
3757 
3758  PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
3759  {
3760  return (_attr > r._attr);
3761  }
3762 
3763  PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
3764  {
3765  return (_attr <= r._attr);
3766  }
3767 
3768  PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
3769  {
3770  return (_attr >= r._attr);
3771  }
3772 
3773  PUGI__FN xml_attribute xml_attribute::next_attribute() const
3774  {
3775  return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
3776  }
3777 
3778  PUGI__FN xml_attribute xml_attribute::previous_attribute() const
3779  {
3780  return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
3781  }
3782 
3783  PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
3784  {
3785  return (_attr && _attr->value) ? _attr->value : def;
3786  }
3787 
3788  PUGI__FN int xml_attribute::as_int(int def) const
3789  {
3790  return impl::get_value_int(_attr ? _attr->value : 0, def);
3791  }
3792 
3793  PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
3794  {
3795  return impl::get_value_uint(_attr ? _attr->value : 0, def);
3796  }
3797 
3798  PUGI__FN double xml_attribute::as_double(double def) const
3799  {
3800  return impl::get_value_double(_attr ? _attr->value : 0, def);
3801  }
3802 
3803  PUGI__FN float xml_attribute::as_float(float def) const
3804  {
3805  return impl::get_value_float(_attr ? _attr->value : 0, def);
3806  }
3807 
3808  PUGI__FN bool xml_attribute::as_bool(bool def) const
3809  {
3810  return impl::get_value_bool(_attr ? _attr->value : 0, def);
3811  }
3812 
3813  PUGI__FN bool xml_attribute::empty() const
3814  {
3815  return !_attr;
3816  }
3817 
3818  PUGI__FN const char_t* xml_attribute::name() const
3819  {
3820  return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
3821  }
3822 
3823  PUGI__FN const char_t* xml_attribute::value() const
3824  {
3825  return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
3826  }
3827 
3828  PUGI__FN size_t xml_attribute::hash_value() const
3829  {
3830  return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
3831  }
3832 
3833  PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
3834  {
3835  return _attr;
3836  }
3837 
3838  PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
3839  {
3840  set_value(rhs);
3841  return *this;
3842  }
3843 
3844  PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
3845  {
3846  set_value(rhs);
3847  return *this;
3848  }
3849 
3850  PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
3851  {
3852  set_value(rhs);
3853  return *this;
3854  }
3855 
3856  PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
3857  {
3858  set_value(rhs);
3859  return *this;
3860  }
3861 
3862  PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
3863  {
3864  set_value(rhs);
3865  return *this;
3866  }
3867 
3868  PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
3869  {
3870  if (!_attr) return false;
3871 
3872  return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
3873  }
3874 
3875  PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
3876  {
3877  if (!_attr) return false;
3878 
3879  return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
3880  }
3881 
3882  PUGI__FN bool xml_attribute::set_value(int rhs)
3883  {
3884  if (!_attr) return false;
3885 
3886  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
3887  }
3888 
3889  PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
3890  {
3891  if (!_attr) return false;
3892 
3893  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
3894  }
3895 
3896  PUGI__FN bool xml_attribute::set_value(double rhs)
3897  {
3898  if (!_attr) return false;
3899 
3900  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
3901  }
3902 
3903  PUGI__FN bool xml_attribute::set_value(bool rhs)
3904  {
3905  if (!_attr) return false;
3906 
3907  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
3908  }
3909 
3910 #ifdef __BORLANDC__
3911  PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
3912  {
3913  return (bool)lhs && rhs;
3914  }
3915 
3916  PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
3917  {
3918  return (bool)lhs || rhs;
3919  }
3920 #endif
3921 
3922  PUGI__FN xml_node::xml_node(): _root(0)
3923  {
3924  }
3925 
3926  PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
3927  {
3928  }
3929 
3930  PUGI__FN static void unspecified_bool_xml_node(xml_node***)
3931  {
3932  }
3933 
3934  PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
3935  {
3936  return _root ? unspecified_bool_xml_node : 0;
3937  }
3938 
3939  PUGI__FN bool xml_node::operator!() const
3940  {
3941  return !_root;
3942  }
3943 
3944  PUGI__FN xml_node::iterator xml_node::begin() const
3945  {
3946  return iterator(_root ? _root->first_child : 0, _root);
3947  }
3948 
3949  PUGI__FN xml_node::iterator xml_node::end() const
3950  {
3951  return iterator(0, _root);
3952  }
3953 
3954  PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
3955  {
3956  return attribute_iterator(_root ? _root->first_attribute : 0, _root);
3957  }
3958 
3959  PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
3960  {
3961  return attribute_iterator(0, _root);
3962  }
3963 
3964  PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
3965  {
3966  return xml_object_range<xml_node_iterator>(begin(), end());
3967  }
3968 
3969  PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
3970  {
3971  return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_), name_), xml_named_node_iterator());
3972  }
3973 
3974  PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
3975  {
3976  return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
3977  }
3978 
3979  PUGI__FN bool xml_node::operator==(const xml_node& r) const
3980  {
3981  return (_root == r._root);
3982  }
3983 
3984  PUGI__FN bool xml_node::operator!=(const xml_node& r) const
3985  {
3986  return (_root != r._root);
3987  }
3988 
3989  PUGI__FN bool xml_node::operator<(const xml_node& r) const
3990  {
3991  return (_root < r._root);
3992  }
3993 
3994  PUGI__FN bool xml_node::operator>(const xml_node& r) const
3995  {
3996  return (_root > r._root);
3997  }
3998 
3999  PUGI__FN bool xml_node::operator<=(const xml_node& r) const
4000  {
4001  return (_root <= r._root);
4002  }
4003 
4004  PUGI__FN bool xml_node::operator>=(const xml_node& r) const
4005  {
4006  return (_root >= r._root);
4007  }
4008 
4009  PUGI__FN bool xml_node::empty() const
4010  {
4011  return !_root;
4012  }
4013 
4014  PUGI__FN const char_t* xml_node::name() const
4015  {
4016  return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
4017  }
4018 
4019  PUGI__FN xml_node_type xml_node::type() const
4020  {
4021  return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;
4022  }
4023 
4024  PUGI__FN const char_t* xml_node::value() const
4025  {
4026  return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
4027  }
4028 
4029  PUGI__FN xml_node xml_node::child(const char_t* name_) const
4030  {
4031  if (!_root) return xml_node();
4032 
4033  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4034  if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
4035 
4036  return xml_node();
4037  }
4038 
4039  PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
4040  {
4041  if (!_root) return xml_attribute();
4042 
4043  for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
4044  if (i->name && impl::strequal(name_, i->name))
4045  return xml_attribute(i);
4046 
4047  return xml_attribute();
4048  }
4049 
4050  PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
4051  {
4052  if (!_root) return xml_node();
4053 
4054  for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
4055  if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
4056 
4057  return xml_node();
4058  }
4059 
4060  PUGI__FN xml_node xml_node::next_sibling() const
4061  {
4062  if (!_root) return xml_node();
4063 
4064  if (_root->next_sibling) return xml_node(_root->next_sibling);
4065  else return xml_node();
4066  }
4067 
4068  PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
4069  {
4070  if (!_root) return xml_node();
4071 
4072  for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
4073  if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
4074 
4075  return xml_node();
4076  }
4077 
4078  PUGI__FN xml_node xml_node::previous_sibling() const
4079  {
4080  if (!_root) return xml_node();
4081 
4082  if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
4083  else return xml_node();
4084  }
4085 
4086  PUGI__FN xml_node xml_node::parent() const
4087  {
4088  return _root ? xml_node(_root->parent) : xml_node();
4089  }
4090 
4091  PUGI__FN xml_node xml_node::root() const
4092  {
4093  if (!_root) return xml_node();
4094 
4095  impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
4096 
4097  return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));
4098  }
4099 
4100  PUGI__FN xml_text xml_node::text() const
4101  {
4102  return xml_text(_root);
4103  }
4104 
4105  PUGI__FN const char_t* xml_node::child_value() const
4106  {
4107  if (!_root) return PUGIXML_TEXT("");
4108 
4109  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4110  if (i->value && impl::is_text_node(i))
4111  return i->value;
4112 
4113  return PUGIXML_TEXT("");
4114  }
4115 
4116  PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
4117  {
4118  return child(name_).child_value();
4119  }
4120 
4121  PUGI__FN xml_attribute xml_node::first_attribute() const
4122  {
4123  return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
4124  }
4125 
4126  PUGI__FN xml_attribute xml_node::last_attribute() const
4127  {
4128  return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
4129  }
4130 
4131  PUGI__FN xml_node xml_node::first_child() const
4132  {
4133  return _root ? xml_node(_root->first_child) : xml_node();
4134  }
4135 
4136  PUGI__FN xml_node xml_node::last_child() const
4137  {
4138  return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
4139  }
4140 
4141  PUGI__FN bool xml_node::set_name(const char_t* rhs)
4142  {
4143  switch (type())
4144  {
4145  case node_pi:
4146  case node_declaration:
4147  case node_element:
4148  return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
4149 
4150  default:
4151  return false;
4152  }
4153  }
4154 
4155  PUGI__FN bool xml_node::set_value(const char_t* rhs)
4156  {
4157  switch (type())
4158  {
4159  case node_pi:
4160  case node_cdata:
4161  case node_pcdata:
4162  case node_comment:
4163  case node_doctype:
4164  return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
4165 
4166  default:
4167  return false;
4168  }
4169  }
4170 
4171  PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
4172  {
4173  if (type() != node_element && type() != node_declaration) return xml_attribute();
4174 
4175  xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root)));
4176  a.set_name(name_);
4177 
4178  return a;
4179  }
4180 
4181  PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
4182  {
4183  if (type() != node_element && type() != node_declaration) return xml_attribute();
4184 
4185  xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
4186  if (!a) return xml_attribute();
4187 
4188  a.set_name(name_);
4189 
4190  xml_attribute_struct* head = _root->first_attribute;
4191 
4192  if (head)
4193  {
4194  a._attr->prev_attribute_c = head->prev_attribute_c;
4195  head->prev_attribute_c = a._attr;
4196  }
4197  else
4198  a._attr->prev_attribute_c = a._attr;
4199 
4200  a._attr->next_attribute = head;
4201  _root->first_attribute = a._attr;
4202 
4203  return a;
4204  }
4205 
4206  PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
4207  {
4208  if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
4209 
4210  // check that attribute belongs to *this
4211  xml_attribute_struct* cur = attr._attr;
4212 
4213  while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
4214 
4215  if (cur != _root->first_attribute) return xml_attribute();
4216 
4217  xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
4218  if (!a) return xml_attribute();
4219 
4220  a.set_name(name_);
4221 
4222  if (attr._attr->prev_attribute_c->next_attribute)
4223  attr._attr->prev_attribute_c->next_attribute = a._attr;
4224  else
4225  _root->first_attribute = a._attr;
4226 
4227  a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
4228  a._attr->next_attribute = attr._attr;
4229  attr._attr->prev_attribute_c = a._attr;
4230 
4231  return a;
4232  }
4233 
4234  PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
4235  {
4236  if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
4237 
4238  // check that attribute belongs to *this
4239  xml_attribute_struct* cur = attr._attr;
4240 
4241  while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
4242 
4243  if (cur != _root->first_attribute) return xml_attribute();
4244 
4245  xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
4246  if (!a) return xml_attribute();
4247 
4248  a.set_name(name_);
4249 
4250  if (attr._attr->next_attribute)
4251  attr._attr->next_attribute->prev_attribute_c = a._attr;
4252  else
4253  _root->first_attribute->prev_attribute_c = a._attr;
4254 
4255  a._attr->next_attribute = attr._attr->next_attribute;
4256  a._attr->prev_attribute_c = attr._attr;
4257  attr._attr->next_attribute = a._attr;
4258 
4259  return a;
4260  }
4261 
4262  PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
4263  {
4264  if (!proto) return xml_attribute();
4265 
4266  xml_attribute result = append_attribute(proto.name());
4267  result.set_value(proto.value());
4268 
4269  return result;
4270  }
4271 
4272  PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
4273  {
4274  if (!proto) return xml_attribute();
4275 
4276  xml_attribute result = prepend_attribute(proto.name());
4277  result.set_value(proto.value());
4278 
4279  return result;
4280  }
4281 
4282  PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
4283  {
4284  if (!proto) return xml_attribute();
4285 
4286  xml_attribute result = insert_attribute_after(proto.name(), attr);
4287  result.set_value(proto.value());
4288 
4289  return result;
4290  }
4291 
4292  PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
4293  {
4294  if (!proto) return xml_attribute();
4295 
4296  xml_attribute result = insert_attribute_before(proto.name(), attr);
4297  result.set_value(proto.value());
4298 
4299  return result;
4300  }
4301 
4302  PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
4303  {
4304  if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
4305 
4306  xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_));
4307 
4308  if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
4309 
4310  return n;
4311  }
4312 
4313  PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
4314  {
4315  if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
4316 
4317  xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
4318  if (!n) return xml_node();
4319 
4320  n._root->parent = _root;
4321 
4322  xml_node_struct* head = _root->first_child;
4323 
4324  if (head)
4325  {
4326  n._root->prev_sibling_c = head->prev_sibling_c;
4327  head->prev_sibling_c = n._root;
4328  }
4329  else
4330  n._root->prev_sibling_c = n._root;
4331 
4332  n._root->next_sibling = head;
4333  _root->first_child = n._root;
4334 
4335  if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
4336 
4337  return n;
4338  }
4339 
4340  PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
4341  {
4342  if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
4343  if (!node._root || node._root->parent != _root) return xml_node();
4344 
4345  xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
4346  if (!n) return xml_node();
4347 
4348  n._root->parent = _root;
4349 
4350  if (node._root->prev_sibling_c->next_sibling)
4351  node._root->prev_sibling_c->next_sibling = n._root;
4352  else
4353  _root->first_child = n._root;
4354 
4355  n._root->prev_sibling_c = node._root->prev_sibling_c;
4356  n._root->next_sibling = node._root;
4357  node._root->prev_sibling_c = n._root;
4358 
4359  if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
4360 
4361  return n;
4362  }
4363 
4364  PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
4365  {
4366  if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
4367  if (!node._root || node._root->parent != _root) return xml_node();
4368 
4369  xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
4370  if (!n) return xml_node();
4371 
4372  n._root->parent = _root;
4373 
4374  if (node._root->next_sibling)
4375  node._root->next_sibling->prev_sibling_c = n._root;
4376  else
4377  _root->first_child->prev_sibling_c = n._root;
4378 
4379  n._root->next_sibling = node._root->next_sibling;
4380  n._root->prev_sibling_c = node._root;
4381  node._root->next_sibling = n._root;
4382 
4383  if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
4384 
4385  return n;
4386  }
4387 
4388  PUGI__FN xml_node xml_node::append_child(const char_t* name_)
4389  {
4390  xml_node result = append_child(node_element);
4391 
4392  result.set_name(name_);
4393 
4394  return result;
4395  }
4396 
4397  PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
4398  {
4399  xml_node result = prepend_child(node_element);
4400 
4401  result.set_name(name_);
4402 
4403  return result;
4404  }
4405 
4406  PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
4407  {
4408  xml_node result = insert_child_after(node_element, node);
4409 
4410  result.set_name(name_);
4411 
4412  return result;
4413  }
4414 
4415  PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
4416  {
4417  xml_node result = insert_child_before(node_element, node);
4418 
4419  result.set_name(name_);
4420 
4421  return result;
4422  }
4423 
4424  PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
4425  {
4426  xml_node result = append_child(proto.type());
4427 
4428  if (result) impl::recursive_copy_skip(result, proto, result);
4429 
4430  return result;
4431  }
4432 
4433  PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
4434  {
4435  xml_node result = prepend_child(proto.type());
4436 
4437  if (result) impl::recursive_copy_skip(result, proto, result);
4438 
4439  return result;
4440  }
4441 
4442  PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
4443  {
4444  xml_node result = insert_child_after(proto.type(), node);
4445 
4446  if (result) impl::recursive_copy_skip(result, proto, result);
4447 
4448  return result;
4449  }
4450 
4451  PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
4452  {
4453  xml_node result = insert_child_before(proto.type(), node);
4454 
4455  if (result) impl::recursive_copy_skip(result, proto, result);
4456 
4457  return result;
4458  }
4459 
4460  PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
4461  {
4462  return remove_attribute(attribute(name_));
4463  }
4464 
4465  PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
4466  {
4467  if (!_root || !a._attr) return false;
4468 
4469  // check that attribute belongs to *this
4470  xml_attribute_struct* attr = a._attr;
4471 
4472  while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
4473 
4474  if (attr != _root->first_attribute) return false;
4475 
4476  if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
4477  else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
4478 
4479  if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
4480  else _root->first_attribute = a._attr->next_attribute;
4481 
4482  impl::destroy_attribute(a._attr, impl::get_allocator(_root));
4483 
4484  return true;
4485  }
4486 
4487  PUGI__FN bool xml_node::remove_child(const char_t* name_)
4488  {
4489  return remove_child(child(name_));
4490  }
4491 
4492  PUGI__FN bool xml_node::remove_child(const xml_node& n)
4493  {
4494  if (!_root || !n._root || n._root->parent != _root) return false;
4495 
4496  if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
4497  else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
4498 
4499  if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
4500  else _root->first_child = n._root->next_sibling;
4501 
4502  impl::destroy_node(n._root, impl::get_allocator(_root));
4503 
4504  return true;
4505  }
4506 
4507  PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
4508  {
4509  if (!_root) return xml_node();
4510 
4511  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4512  if (i->name && impl::strequal(name_, i->name))
4513  {
4514  for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
4515  if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
4516  return xml_node(i);
4517  }
4518 
4519  return xml_node();
4520  }
4521 
4522  PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
4523  {
4524  if (!_root) return xml_node();
4525 
4526  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
4527  for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
4528  if (impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value))
4529  return xml_node(i);
4530 
4531  return xml_node();
4532  }
4533 
4534 #ifndef PUGIXML_NO_STL
4535  PUGI__FN string_t xml_node::path(char_t delimiter) const
4536  {
4537  xml_node cursor = *this; // Make a copy.
4538 
4539  string_t result = cursor.name();
4540 
4541  while (cursor.parent())
4542  {
4543  cursor = cursor.parent();
4544 
4545  string_t temp = cursor.name();
4546  temp += delimiter;
4547  temp += result;
4548  result.swap(temp);
4549  }
4550 
4551  return result;
4552  }
4553 #endif
4554 
4555  PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
4556  {
4557  xml_node found = *this; // Current search context.
4558 
4559  if (!_root || !path_ || !path_[0]) return found;
4560 
4561  if (path_[0] == delimiter)
4562  {
4563  // Absolute path; e.g. '/foo/bar'
4564  found = found.root();
4565  ++path_;
4566  }
4567 
4568  const char_t* path_segment = path_;
4569 
4570  while (*path_segment == delimiter) ++path_segment;
4571 
4572  const char_t* path_segment_end = path_segment;
4573 
4574  while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
4575 
4576  if (path_segment == path_segment_end) return found;
4577 
4578  const char_t* next_segment = path_segment_end;
4579 
4580  while (*next_segment == delimiter) ++next_segment;
4581 
4582  if (*path_segment == '.' && path_segment + 1 == path_segment_end)
4583  return found.first_element_by_path(next_segment, delimiter);
4584  else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
4585  return found.parent().first_element_by_path(next_segment, delimiter);
4586  else
4587  {
4588  for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
4589  {
4590  if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
4591  {
4592  xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
4593 
4594  if (subsearch) return subsearch;
4595  }
4596  }
4597 
4598  return xml_node();
4599  }
4600  }
4601 
4602  PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
4603  {
4604  walker._depth = -1;
4605 
4606  xml_node arg_begin = *this;
4607  if (!walker.begin(arg_begin)) return false;
4608 
4609  xml_node cur = first_child();
4610 
4611  if (cur)
4612  {
4613  ++walker._depth;
4614 
4615  do
4616  {
4617  xml_node arg_for_each = cur;
4618  if (!walker.for_each(arg_for_each))
4619  return false;
4620 
4621  if (cur.first_child())
4622  {
4623  ++walker._depth;
4624  cur = cur.first_child();
4625  }
4626  else if (cur.next_sibling())
4627  cur = cur.next_sibling();
4628  else
4629  {
4630  // Borland C++ workaround
4631  while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
4632  {
4633  --walker._depth;
4634  cur = cur.parent();
4635  }
4636 
4637  if (cur != *this)
4638  cur = cur.next_sibling();
4639  }
4640  }
4641  while (cur && cur != *this);
4642  }
4643 
4644  assert(walker._depth == -1);
4645 
4646  xml_node arg_end = *this;
4647  return walker.end(arg_end);
4648  }
4649 
4650  PUGI__FN size_t xml_node::hash_value() const
4651  {
4652  return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
4653  }
4654 
4655  PUGI__FN xml_node_struct* xml_node::internal_object() const
4656  {
4657  return _root;
4658  }
4659 
4660  PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
4661  {
4662  if (!_root) return;
4663 
4664  impl::xml_buffered_writer buffered_writer(writer, encoding);
4665 
4666  impl::node_output(buffered_writer, *this, indent, flags, depth);
4667  }
4668 
4669 #ifndef PUGIXML_NO_STL
4670  PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
4671  {
4672  xml_writer_stream writer(stream);
4673 
4674  print(writer, indent, flags, encoding, depth);
4675  }
4676 
4677  PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
4678  {
4679  xml_writer_stream writer(stream);
4680 
4681  print(writer, indent, flags, encoding_wchar, depth);
4682  }
4683 #endif
4684 
4685  PUGI__FN ptrdiff_t xml_node::offset_debug() const
4686  {
4687  xml_node_struct* r = root()._root;
4688 
4689  if (!r) return -1;
4690 
4691  const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;
4692 
4693  if (!buffer) return -1;
4694 
4695  switch (type())
4696  {
4697  case node_document:
4698  return 0;
4699 
4700  case node_element:
4701  case node_declaration:
4702  case node_pi:
4703  return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
4704 
4705  case node_pcdata:
4706  case node_cdata:
4707  case node_comment:
4708  case node_doctype:
4709  return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
4710 
4711  default:
4712  return -1;
4713  }
4714  }
4715 
4716 #ifdef __BORLANDC__
4717  PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
4718  {
4719  return (bool)lhs && rhs;
4720  }
4721 
4722  PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
4723  {
4724  return (bool)lhs || rhs;
4725  }
4726 #endif
4727 
4728  PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
4729  {
4730  }
4731 
4732  PUGI__FN xml_node_struct* xml_text::_data() const
4733  {
4734  if (!_root || impl::is_text_node(_root)) return _root;
4735 
4736  for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
4737  if (impl::is_text_node(node))
4738  return node;
4739 
4740  return 0;
4741  }
4742 
4743  PUGI__FN xml_node_struct* xml_text::_data_new()
4744  {
4745  xml_node_struct* d = _data();
4746  if (d) return d;
4747 
4748  return xml_node(_root).append_child(node_pcdata).internal_object();
4749  }
4750 
4751  PUGI__FN xml_text::xml_text(): _root(0)
4752  {
4753  }
4754 
4755  PUGI__FN static void unspecified_bool_xml_text(xml_text***)
4756  {
4757  }
4758 
4759  PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
4760  {
4761  return _data() ? unspecified_bool_xml_text : 0;
4762  }
4763 
4764  PUGI__FN bool xml_text::operator!() const
4765  {
4766  return !_data();
4767  }
4768 
4769  PUGI__FN bool xml_text::empty() const
4770  {
4771  return _data() == 0;
4772  }
4773 
4774  PUGI__FN const char_t* xml_text::get() const
4775  {
4776  xml_node_struct* d = _data();
4777 
4778  return (d && d->value) ? d->value : PUGIXML_TEXT("");
4779  }
4780 
4781  PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
4782  {
4783  xml_node_struct* d = _data();
4784 
4785  return (d && d->value) ? d->value : def;
4786  }
4787 
4788  PUGI__FN int xml_text::as_int(int def) const
4789  {
4790  xml_node_struct* d = _data();
4791 
4792  return impl::get_value_int(d ? d->value : 0, def);
4793  }
4794 
4795  PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
4796  {
4797  xml_node_struct* d = _data();
4798 
4799  return impl::get_value_uint(d ? d->value : 0, def);
4800  }
4801 
4802  PUGI__FN double xml_text::as_double(double def) const
4803  {
4804  xml_node_struct* d = _data();
4805 
4806  return impl::get_value_double(d ? d->value : 0, def);
4807  }
4808 
4809  PUGI__FN float xml_text::as_float(float def) const
4810  {
4811  xml_node_struct* d = _data();
4812 
4813  return impl::get_value_float(d ? d->value : 0, def);
4814  }
4815 
4816  PUGI__FN bool xml_text::as_bool(bool def) const
4817  {
4818  xml_node_struct* d = _data();
4819 
4820  return impl::get_value_bool(d ? d->value : 0, def);
4821  }
4822 
4823  PUGI__FN bool xml_text::set(const char_t* rhs)
4824  {
4825  xml_node_struct* dn = _data_new();
4826 
4827  return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
4828  }
4829 
4830  PUGI__FN bool xml_text::set(int rhs)
4831  {
4832  xml_node_struct* dn = _data_new();
4833 
4834  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
4835  }
4836 
4837  PUGI__FN bool xml_text::set(unsigned int rhs)
4838  {
4839  xml_node_struct* dn = _data_new();
4840 
4841  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
4842  }
4843 
4844  PUGI__FN bool xml_text::set(double rhs)
4845  {
4846  xml_node_struct* dn = _data_new();
4847 
4848  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
4849  }
4850 
4851  PUGI__FN bool xml_text::set(bool rhs)
4852  {
4853  xml_node_struct* dn = _data_new();
4854 
4855  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
4856  }
4857 
4858  PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
4859  {
4860  set(rhs);
4861  return *this;
4862  }
4863 
4864  PUGI__FN xml_text& xml_text::operator=(int rhs)
4865  {
4866  set(rhs);
4867  return *this;
4868  }
4869 
4870  PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
4871  {
4872  set(rhs);
4873  return *this;
4874  }
4875 
4876  PUGI__FN xml_text& xml_text::operator=(double rhs)
4877  {
4878  set(rhs);
4879  return *this;
4880  }
4881 
4882  PUGI__FN xml_text& xml_text::operator=(bool rhs)
4883  {
4884  set(rhs);
4885  return *this;
4886  }
4887 
4888  PUGI__FN xml_node xml_text::data() const
4889  {
4890  return xml_node(_data());
4891  }
4892 
4893 #ifdef __BORLANDC__
4894  PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
4895  {
4896  return (bool)lhs && rhs;
4897  }
4898 
4899  PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
4900  {
4901  return (bool)lhs || rhs;
4902  }
4903 #endif
4904 
4905  PUGI__FN xml_node_iterator::xml_node_iterator()
4906  {
4907  }
4908 
4909  PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
4910  {
4911  }
4912 
4913  PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
4914  {
4915  }
4916 
4917  PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
4918  {
4919  return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
4920  }
4921 
4922  PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
4923  {
4924  return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
4925  }
4926 
4927  PUGI__FN xml_node& xml_node_iterator::operator*() const
4928  {
4929  assert(_wrap._root);
4930  return _wrap;
4931  }
4932 
4933  PUGI__FN xml_node* xml_node_iterator::operator->() const
4934  {
4935  assert(_wrap._root);
4936  return const_cast<xml_node*>(&_wrap); // BCC32 workaround
4937  }
4938 
4939  PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
4940  {
4941  assert(_wrap._root);
4942  _wrap._root = _wrap._root->next_sibling;
4943  return *this;
4944  }
4945 
4946  PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
4947  {
4948  xml_node_iterator temp = *this;
4949  ++*this;
4950  return temp;
4951  }
4952 
4953  PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
4954  {
4955  _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
4956  return *this;
4957  }
4958 
4959  PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
4960  {
4961  xml_node_iterator temp = *this;
4962  --*this;
4963  return temp;
4964  }
4965 
4966  PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
4967  {
4968  }
4969 
4970  PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
4971  {
4972  }
4973 
4974  PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
4975  {
4976  }
4977 
4978  PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
4979  {
4980  return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
4981  }
4982 
4983  PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
4984  {
4985  return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
4986  }
4987 
4988  PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
4989  {
4990  assert(_wrap._attr);
4991  return _wrap;
4992  }
4993 
4994  PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
4995  {
4996  assert(_wrap._attr);
4997  return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
4998  }
4999 
5000  PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
5001  {
5002  assert(_wrap._attr);
5003  _wrap._attr = _wrap._attr->next_attribute;
5004  return *this;
5005  }
5006 
5007  PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
5008  {
5009  xml_attribute_iterator temp = *this;
5010  ++*this;
5011  return temp;
5012  }
5013 
5014  PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
5015  {
5016  _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
5017  return *this;
5018  }
5019 
5020  PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
5021  {
5022  xml_attribute_iterator temp = *this;
5023  --*this;
5024  return temp;
5025  }
5026 
5027  PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
5028  {
5029  }
5030 
5031  PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _node(node), _name(name)
5032  {
5033  }
5034 
5035  PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
5036  {
5037  return _node == rhs._node;
5038  }
5039 
5040  PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
5041  {
5042  return _node != rhs._node;
5043  }
5044 
5045  PUGI__FN xml_node& xml_named_node_iterator::operator*() const
5046  {
5047  assert(_node._root);
5048  return _node;
5049  }
5050 
5051  PUGI__FN xml_node* xml_named_node_iterator::operator->() const
5052  {
5053  assert(_node._root);
5054  return const_cast<xml_node*>(&_node); // BCC32 workaround
5055  }
5056 
5057  PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
5058  {
5059  assert(_node._root);
5060  _node = _node.next_sibling(_name);
5061  return *this;
5062  }
5063 
5064  PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
5065  {
5066  xml_named_node_iterator temp = *this;
5067  ++*this;
5068  return temp;
5069  }
5070 
5071  PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
5072  {
5073  }
5074 
5075  PUGI__FN xml_parse_result::operator bool() const
5076  {
5077  return status == status_ok;
5078  }
5079 
5080  PUGI__FN const char* xml_parse_result::description() const
5081  {
5082  switch (status)
5083  {
5084  case status_ok: return "No error";
5085 
5086  case status_file_not_found: return "File was not found";
5087  case status_io_error: return "Error reading from file/stream";
5088  case status_out_of_memory: return "Could not allocate memory";
5089  case status_internal_error: return "Internal error occurred";
5090 
5091  case status_unrecognized_tag: return "Could not determine tag type";
5092 
5093  case status_bad_pi: return "Error parsing document declaration/processing instruction";
5094  case status_bad_comment: return "Error parsing comment";
5095  case status_bad_cdata: return "Error parsing CDATA section";
5096  case status_bad_doctype: return "Error parsing document type declaration";
5097  case status_bad_pcdata: return "Error parsing PCDATA section";
5098  case status_bad_start_element: return "Error parsing start element tag";
5099  case status_bad_attribute: return "Error parsing element attribute";
5100  case status_bad_end_element: return "Error parsing end element tag";
5101  case status_end_element_mismatch: return "Start-end tags mismatch";
5102 
5103  default: return "Unknown error";
5104  }
5105  }
5106 
5107  PUGI__FN xml_document::xml_document(): _buffer(0)
5108  {
5109  create();
5110  }
5111 
5112  PUGI__FN xml_document::~xml_document()
5113  {
5114  destroy();
5115  }
5116 
5117  PUGI__FN void xml_document::reset()
5118  {
5119  destroy();
5120  create();
5121  }
5122 
5123  PUGI__FN void xml_document::reset(const xml_document& proto)
5124  {
5125  reset();
5126 
5127  for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
5128  append_copy(cur);
5129  }
5130 
5131  PUGI__FN void xml_document::create()
5132  {
5133  // initialize sentinel page
5134  PUGI__STATIC_ASSERT(offsetof(impl::xml_memory_page, data) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));
5135 
5136  // align upwards to page boundary
5137  void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
5138 
5139  // prepare page structure
5140  impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
5141 
5142  page->busy_size = impl::xml_memory_page_size;
5143 
5144  // allocate new root
5145  _root = new (page->data) impl::xml_document_struct(page);
5146  _root->prev_sibling_c = _root;
5147 
5148  // setup sentinel page
5149  page->allocator = static_cast<impl::xml_document_struct*>(_root);
5150  }
5151 
5152  PUGI__FN void xml_document::destroy()
5153  {
5154  // destroy static storage
5155  if (_buffer)
5156  {
5157  impl::xml_memory::deallocate(_buffer);
5158  _buffer = 0;
5159  }
5160 
5161  // destroy dynamic storage, leave sentinel page (it's in static memory)
5162  if (_root)
5163  {
5164  impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
5165  assert(root_page && !root_page->prev && !root_page->memory);
5166 
5167  // destroy all pages
5168  for (impl::xml_memory_page* page = root_page->next; page; )
5169  {
5170  impl::xml_memory_page* next = page->next;
5171 
5172  impl::xml_allocator::deallocate_page(page);
5173 
5174  page = next;
5175  }
5176 
5177  // cleanup root page
5178  root_page->allocator = 0;
5179  root_page->next = 0;
5180  root_page->busy_size = root_page->freed_size = 0;
5181 
5182  _root = 0;
5183  }
5184  }
5185 
5186 #ifndef PUGIXML_NO_STL
5187  PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
5188  {
5189  reset();
5190 
5191  return impl::load_stream_impl(*this, stream, options, encoding);
5192  }
5193 
5194  PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
5195  {
5196  reset();
5197 
5198  return impl::load_stream_impl(*this, stream, options, encoding_wchar);
5199  }
5200 #endif
5201 
5202  PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
5203  {
5204  // Force native encoding (skip autodetection)
5205  #ifdef PUGIXML_WCHAR_MODE
5206  xml_encoding encoding = encoding_wchar;
5207  #else
5208  xml_encoding encoding = encoding_utf8;
5209  #endif
5210 
5211  return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
5212  }
5213 
5214  PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
5215  {
5216  reset();
5217 
5218  FILE* file = fopen(path_, "rb");
5219 
5220  return impl::load_file_impl(*this, file, options, encoding);
5221  }
5222 
5223  PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
5224  {
5225  reset();
5226 
5227  FILE* file = impl::open_file_wide(path_, L"rb");
5228 
5229  return impl::load_file_impl(*this, file, options, encoding);
5230  }
5231 
5232  PUGI__FN xml_parse_result xml_document::load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own)
5233  {
5234  reset();
5235 
5236  // check input buffer
5237  assert(contents || size == 0);
5238 
5239  // get actual encoding
5240  xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
5241 
5242  // get private buffer
5243  char_t* buffer = 0;
5244  size_t length = 0;
5245 
5246  if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
5247 
5248  // delete original buffer if we performed a conversion
5249  if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
5250 
5251  // parse
5252  xml_parse_result res = impl::xml_parser::parse(buffer, length, _root, options);
5253 
5254  // remember encoding
5255  res.encoding = buffer_encoding;
5256 
5257  // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
5258  if (own || buffer != contents) _buffer = buffer;
5259 
5260  return res;
5261  }
5262 
5263  PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
5264  {
5265  return load_buffer_impl(const_cast<void*>(contents), size, options, encoding, false, false);
5266  }
5267 
5268  PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
5269  {
5270  return load_buffer_impl(contents, size, options, encoding, true, false);
5271  }
5272 
5273  PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
5274  {
5275  return load_buffer_impl(contents, size, options, encoding, true, true);
5276  }
5277 
5278  PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
5279  {
5280  impl::xml_buffered_writer buffered_writer(writer, encoding);
5281 
5282  if ((flags & format_write_bom) && encoding != encoding_latin1)
5283  {
5284  // BOM always represents the codepoint U+FEFF, so just write it in native encoding
5285  #ifdef PUGIXML_WCHAR_MODE
5286  unsigned int bom = 0xfeff;
5287  buffered_writer.write(static_cast<wchar_t>(bom));
5288  #else
5289  buffered_writer.write('\xef', '\xbb', '\xbf');
5290  #endif
5291  }
5292 
5293  if (!(flags & format_no_declaration) && !impl::has_declaration(*this))
5294  {
5295  buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));
5296  if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
5297  buffered_writer.write('?', '>');
5298  if (!(flags & format_raw)) buffered_writer.write('\n');
5299  }
5300 
5301  impl::node_output(buffered_writer, *this, indent, flags, 0);
5302  }
5303 
5304 #ifndef PUGIXML_NO_STL
5305  PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
5306  {
5307  xml_writer_stream writer(stream);
5308 
5309  save(writer, indent, flags, encoding);
5310  }
5311 
5312  PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
5313  {
5314  xml_writer_stream writer(stream);
5315 
5316  save(writer, indent, flags, encoding_wchar);
5317  }
5318 #endif
5319 
5320  PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
5321  {
5322  FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
5323  return impl::save_file_impl(*this, file, indent, flags, encoding);
5324  }
5325 
5326  PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
5327  {
5328  FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
5329  return impl::save_file_impl(*this, file, indent, flags, encoding);
5330  }
5331 
5332  PUGI__FN xml_node xml_document::document_element() const
5333  {
5334  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5335  if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)
5336  return xml_node(i);
5337 
5338  return xml_node();
5339  }
5340 
5341 #ifndef PUGIXML_NO_STL
5342  PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
5343  {
5344  assert(str);
5345 
5346  return impl::as_utf8_impl(str, wcslen(str));
5347  }
5348 
5349  PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
5350  {
5351  return impl::as_utf8_impl(str.c_str(), str.size());
5352  }
5353 
5354  PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
5355  {
5356  assert(str);
5357 
5358  return impl::as_wide_impl(str, strlen(str));
5359  }
5360 
5361  PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
5362  {
5363  return impl::as_wide_impl(str.c_str(), str.size());
5364  }
5365 #endif
5366 
5367  PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
5368  {
5369  impl::xml_memory::allocate = allocate;
5370  impl::xml_memory::deallocate = deallocate;
5371  }
5372 
5373  PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
5374  {
5375  return impl::xml_memory::allocate;
5376  }
5377 
5378  PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
5379  {
5380  return impl::xml_memory::deallocate;
5381  }
5382 }
5383 
5384 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
5385 namespace std
5386 {
5387  // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
5388  PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
5389  {
5390  return std::bidirectional_iterator_tag();
5391  }
5392 
5393  PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
5394  {
5395  return std::bidirectional_iterator_tag();
5396  }
5397 
5398  PUGI__FN std::forward_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
5399  {
5400  return std::forward_iterator_tag();
5401  }
5402 }
5403 #endif
5404 
5405 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
5406 namespace std
5407 {
5408  // Workarounds for (non-standard) iterator category detection
5409  PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
5410  {
5411  return std::bidirectional_iterator_tag();
5412  }
5413 
5414  PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
5415  {
5416  return std::bidirectional_iterator_tag();
5417  }
5418 
5419  PUGI__FN std::forward_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
5420  {
5421  return std::forward_iterator_tag();
5422  }
5423 }
5424 #endif
5425 
5426 #ifndef PUGIXML_NO_XPATH
5427 
5428 // STL replacements
5429 PUGI__NS_BEGIN
5430  struct equal_to
5431  {
5432  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5433  {
5434  return lhs == rhs;
5435  }
5436  };
5437 
5438  struct not_equal_to
5439  {
5440  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5441  {
5442  return lhs != rhs;
5443  }
5444  };
5445 
5446  struct less
5447  {
5448  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5449  {
5450  return lhs < rhs;
5451  }
5452  };
5453 
5454  struct less_equal
5455  {
5456  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5457  {
5458  return lhs <= rhs;
5459  }
5460  };
5461 
5462  template <typename T> void swap(T& lhs, T& rhs)
5463  {
5464  T temp = lhs;
5465  lhs = rhs;
5466  rhs = temp;
5467  }
5468 
5469  template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
5470  {
5471  I result = begin;
5472 
5473  for (I it = begin + 1; it != end; ++it)
5474  if (pred(*it, *result))
5475  result = it;
5476 
5477  return result;
5478  }
5479 
5480  template <typename I> void reverse(I begin, I end)
5481  {
5482  while (begin + 1 < end) swap(*begin++, *--end);
5483  }
5484 
5485  template <typename I> I unique(I begin, I end)
5486  {
5487  // fast skip head
5488  while (begin + 1 < end && *begin != *(begin + 1)) begin++;
5489 
5490  if (begin == end) return begin;
5491 
5492  // last written element
5493  I write = begin++;
5494 
5495  // merge unique elements
5496  while (begin != end)
5497  {
5498  if (*begin != *write)
5499  *++write = *begin++;
5500  else
5501  begin++;
5502  }
5503 
5504  // past-the-end (write points to live element)
5505  return write + 1;
5506  }
5507 
5508  template <typename I> void copy_backwards(I begin, I end, I target)
5509  {
5510  while (begin != end) *--target = *--end;
5511  }
5512 
5513  template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
5514  {
5515  assert(begin != end);
5516 
5517  for (I it = begin + 1; it != end; ++it)
5518  {
5519  T val = *it;
5520 
5521  if (pred(val, *begin))
5522  {
5523  // move to front
5524  copy_backwards(begin, it, it + 1);
5525  *begin = val;
5526  }
5527  else
5528  {
5529  I hole = it;
5530 
5531  // move hole backwards
5532  while (pred(val, *(hole - 1)))
5533  {
5534  *hole = *(hole - 1);
5535  hole--;
5536  }
5537 
5538  // fill hole with element
5539  *hole = val;
5540  }
5541  }
5542  }
5543 
5544  // std variant for elements with ==
5545  template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
5546  {
5547  I eqbeg = middle, eqend = middle + 1;
5548 
5549  // expand equal range
5550  while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
5551  while (eqend != end && *eqend == *eqbeg) ++eqend;
5552 
5553  // process outer elements
5554  I ltend = eqbeg, gtbeg = eqend;
5555 
5556  for (;;)
5557  {
5558  // find the element from the right side that belongs to the left one
5559  for (; gtbeg != end; ++gtbeg)
5560  if (!pred(*eqbeg, *gtbeg))
5561  {
5562  if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
5563  else break;
5564  }
5565 
5566  // find the element from the left side that belongs to the right one
5567  for (; ltend != begin; --ltend)
5568  if (!pred(*(ltend - 1), *eqbeg))
5569  {
5570  if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
5571  else break;
5572  }
5573 
5574  // scanned all elements
5575  if (gtbeg == end && ltend == begin)
5576  {
5577  *out_eqbeg = eqbeg;
5578  *out_eqend = eqend;
5579  return;
5580  }
5581 
5582  // make room for elements by moving equal area
5583  if (gtbeg == end)
5584  {
5585  if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
5586  swap(*eqbeg, *--eqend);
5587  }
5588  else if (ltend == begin)
5589  {
5590  if (eqend != gtbeg) swap(*eqbeg, *eqend);
5591  ++eqend;
5592  swap(*gtbeg++, *eqbeg++);
5593  }
5594  else swap(*gtbeg++, *--ltend);
5595  }
5596  }
5597 
5598  template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
5599  {
5600  if (pred(*middle, *first)) swap(*middle, *first);
5601  if (pred(*last, *middle)) swap(*last, *middle);
5602  if (pred(*middle, *first)) swap(*middle, *first);
5603  }
5604 
5605  template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
5606  {
5607  if (last - first <= 40)
5608  {
5609  // median of three for small chunks
5610  median3(first, middle, last, pred);
5611  }
5612  else
5613  {
5614  // median of nine
5615  size_t step = (last - first + 1) / 8;
5616 
5617  median3(first, first + step, first + 2 * step, pred);
5618  median3(middle - step, middle, middle + step, pred);
5619  median3(last - 2 * step, last - step, last, pred);
5620  median3(first + step, middle, last - step, pred);
5621  }
5622  }
5623 
5624  template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
5625  {
5626  // sort large chunks
5627  while (end - begin > 32)
5628  {
5629  // find median element
5630  I middle = begin + (end - begin) / 2;
5631  median(begin, middle, end - 1, pred);
5632 
5633  // partition in three chunks (< = >)
5634  I eqbeg, eqend;
5635  partition(begin, middle, end, pred, &eqbeg, &eqend);
5636 
5637  // loop on larger half
5638  if (eqbeg - begin > end - eqend)
5639  {
5640  sort(eqend, end, pred);
5641  end = eqbeg;
5642  }
5643  else
5644  {
5645  sort(begin, eqbeg, pred);
5646  begin = eqend;
5647  }
5648  }
5649 
5650  // insertion sort small chunk
5651  if (begin != end) insertion_sort(begin, end, pred, &*begin);
5652  }
5653 PUGI__NS_END
5654 
5655 // Allocator used for AST and evaluation stacks
5656 PUGI__NS_BEGIN
5657  struct xpath_memory_block
5658  {
5659  xpath_memory_block* next;
5660 
5661  char data[
5662  #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
5663  PUGIXML_MEMORY_XPATH_PAGE_SIZE
5664  #else
5665  4096
5666  #endif
5667  ];
5668  };
5669 
5670  class xpath_allocator
5671  {
5672  xpath_memory_block* _root;
5673  size_t _root_size;
5674 
5675  public:
5676  #ifdef PUGIXML_NO_EXCEPTIONS
5677  jmp_buf* error_handler;
5678  #endif
5679 
5680  xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
5681  {
5682  #ifdef PUGIXML_NO_EXCEPTIONS
5683  error_handler = 0;
5684  #endif
5685  }
5686 
5687  void* allocate_nothrow(size_t size)
5688  {
5689  const size_t block_capacity = sizeof(_root->data);
5690 
5691  // align size so that we're able to store pointers in subsequent blocks
5692  size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5693 
5694  if (_root_size + size <= block_capacity)
5695  {
5696  void* buf = _root->data + _root_size;
5697  _root_size += size;
5698  return buf;
5699  }
5700  else
5701  {
5702  size_t block_data_size = (size > block_capacity) ? size : block_capacity;
5703  size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
5704 
5705  xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
5706  if (!block) return 0;
5707 
5708  block->next = _root;
5709 
5710  _root = block;
5711  _root_size = size;
5712 
5713  return block->data;
5714  }
5715  }
5716 
5717  void* allocate(size_t size)
5718  {
5719  void* result = allocate_nothrow(size);
5720 
5721  if (!result)
5722  {
5723  #ifdef PUGIXML_NO_EXCEPTIONS
5724  assert(error_handler);
5725  longjmp(*error_handler, 1);
5726  #else
5727  throw std::bad_alloc();
5728  #endif
5729  }
5730 
5731  return result;
5732  }
5733 
5734  void* reallocate(void* ptr, size_t old_size, size_t new_size)
5735  {
5736  // align size so that we're able to store pointers in subsequent blocks
5737  old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5738  new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5739 
5740  // we can only reallocate the last object
5741  assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
5742 
5743  // adjust root size so that we have not allocated the object at all
5744  bool only_object = (_root_size == old_size);
5745 
5746  if (ptr) _root_size -= old_size;
5747 
5748  // allocate a new version (this will obviously reuse the memory if possible)
5749  void* result = allocate(new_size);
5750  assert(result);
5751 
5752  // we have a new block
5753  if (result != ptr && ptr)
5754  {
5755  // copy old data
5756  assert(new_size > old_size);
5757  memcpy(result, ptr, old_size);
5758 
5759  // free the previous page if it had no other objects
5760  if (only_object)
5761  {
5762  assert(_root->data == result);
5763  assert(_root->next);
5764 
5765  xpath_memory_block* next = _root->next->next;
5766 
5767  if (next)
5768  {
5769  // deallocate the whole page, unless it was the first one
5770  xml_memory::deallocate(_root->next);
5771  _root->next = next;
5772  }
5773  }
5774  }
5775 
5776  return result;
5777  }
5778 
5779  void revert(const xpath_allocator& state)
5780  {
5781  // free all new pages
5782  xpath_memory_block* cur = _root;
5783 
5784  while (cur != state._root)
5785  {
5786  xpath_memory_block* next = cur->next;
5787 
5788  xml_memory::deallocate(cur);
5789 
5790  cur = next;
5791  }
5792 
5793  // restore state
5794  _root = state._root;
5795  _root_size = state._root_size;
5796  }
5797 
5798  void release()
5799  {
5800  xpath_memory_block* cur = _root;
5801  assert(cur);
5802 
5803  while (cur->next)
5804  {
5805  xpath_memory_block* next = cur->next;
5806 
5807  xml_memory::deallocate(cur);
5808 
5809  cur = next;
5810  }
5811  }
5812  };
5813 
5814  struct xpath_allocator_capture
5815  {
5816  xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
5817  {
5818  }
5819 
5820  ~xpath_allocator_capture()
5821  {
5822  _target->revert(_state);
5823  }
5824 
5825  xpath_allocator* _target;
5826  xpath_allocator _state;
5827  };
5828 
5829  struct xpath_stack
5830  {
5831  xpath_allocator* result;
5832  xpath_allocator* temp;
5833  };
5834 
5835  struct xpath_stack_data
5836  {
5837  xpath_memory_block blocks[2];
5838  xpath_allocator result;
5839  xpath_allocator temp;
5840  xpath_stack stack;
5841 
5842  #ifdef PUGIXML_NO_EXCEPTIONS
5843  jmp_buf error_handler;
5844  #endif
5845 
5846  xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
5847  {
5848  blocks[0].next = blocks[1].next = 0;
5849 
5850  stack.result = &result;
5851  stack.temp = &temp;
5852 
5853  #ifdef PUGIXML_NO_EXCEPTIONS
5854  result.error_handler = temp.error_handler = &error_handler;
5855  #endif
5856  }
5857 
5858  ~xpath_stack_data()
5859  {
5860  result.release();
5861  temp.release();
5862  }
5863  };
5864 PUGI__NS_END
5865 
5866 // String class
5867 PUGI__NS_BEGIN
5868  class xpath_string
5869  {
5870  const char_t* _buffer;
5871  bool _uses_heap;
5872 
5873  static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
5874  {
5875  char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
5876  assert(result);
5877 
5878  memcpy(result, string, length * sizeof(char_t));
5879  result[length] = 0;
5880 
5881  return result;
5882  }
5883 
5884  static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
5885  {
5886  return duplicate_string(string, strlength(string), alloc);
5887  }
5888 
5889  public:
5890  xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
5891  {
5892  }
5893 
5894  explicit xpath_string(const char_t* str, xpath_allocator* alloc)
5895  {
5896  bool empty_ = (*str == 0);
5897 
5898  _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
5899  _uses_heap = !empty_;
5900  }
5901 
5902  explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
5903  {
5904  }
5905 
5906  xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
5907  {
5908  assert(begin <= end);
5909 
5910  bool empty_ = (begin == end);
5911 
5912  _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
5913  _uses_heap = !empty_;
5914  }
5915 
5916  void append(const xpath_string& o, xpath_allocator* alloc)
5917  {
5918  // skip empty sources
5919  if (!*o._buffer) return;
5920 
5921  // fast append for constant empty target and constant source
5922  if (!*_buffer && !_uses_heap && !o._uses_heap)
5923  {
5924  _buffer = o._buffer;
5925  }
5926  else
5927  {
5928  // need to make heap copy
5929  size_t target_length = strlength(_buffer);
5930  size_t source_length = strlength(o._buffer);
5931  size_t result_length = target_length + source_length;
5932 
5933  // allocate new buffer
5934  char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
5935  assert(result);
5936 
5937  // append first string to the new buffer in case there was no reallocation
5938  if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
5939 
5940  // append second string to the new buffer
5941  memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
5942  result[result_length] = 0;
5943 
5944  // finalize
5945  _buffer = result;
5946  _uses_heap = true;
5947  }
5948  }
5949 
5950  const char_t* c_str() const
5951  {
5952  return _buffer;
5953  }
5954 
5955  size_t length() const
5956  {
5957  return strlength(_buffer);
5958  }
5959 
5960  char_t* data(xpath_allocator* alloc)
5961  {
5962  // make private heap copy
5963  if (!_uses_heap)
5964  {
5965  _buffer = duplicate_string(_buffer, alloc);
5966  _uses_heap = true;
5967  }
5968 
5969  return const_cast<char_t*>(_buffer);
5970  }
5971 
5972  bool empty() const
5973  {
5974  return *_buffer == 0;
5975  }
5976 
5977  bool operator==(const xpath_string& o) const
5978  {
5979  return strequal(_buffer, o._buffer);
5980  }
5981 
5982  bool operator!=(const xpath_string& o) const
5983  {
5984  return !strequal(_buffer, o._buffer);
5985  }
5986 
5987  bool uses_heap() const
5988  {
5989  return _uses_heap;
5990  }
5991  };
5992 
5993  PUGI__FN xpath_string xpath_string_const(const char_t* str)
5994  {
5995  return xpath_string(str, false);
5996  }
5997 PUGI__NS_END
5998 
5999 PUGI__NS_BEGIN
6000  PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
6001  {
6002  while (*pattern && *string == *pattern)
6003  {
6004  string++;
6005  pattern++;
6006  }
6007 
6008  return *pattern == 0;
6009  }
6010 
6011  PUGI__FN const char_t* find_char(const char_t* s, char_t c)
6012  {
6013  #ifdef PUGIXML_WCHAR_MODE
6014  return wcschr(s, c);
6015  #else
6016  return strchr(s, c);
6017  #endif
6018  }
6019 
6020  PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
6021  {
6022  #ifdef PUGIXML_WCHAR_MODE
6023  // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
6024  return (*p == 0) ? s : wcsstr(s, p);
6025  #else
6026  return strstr(s, p);
6027  #endif
6028  }
6029 
6030  // Converts symbol to lower case, if it is an ASCII one
6031  PUGI__FN char_t tolower_ascii(char_t ch)
6032  {
6033  return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
6034  }
6035 
6036  PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
6037  {
6038  if (na.attribute())
6039  return xpath_string_const(na.attribute().value());
6040  else
6041  {
6042  const xml_node& n = na.node();
6043 
6044  switch (n.type())
6045  {
6046  case node_pcdata:
6047  case node_cdata:
6048  case node_comment:
6049  case node_pi:
6050  return xpath_string_const(n.value());
6051 
6052  case node_document:
6053  case node_element:
6054  {
6055  xpath_string result;
6056 
6057  xml_node cur = n.first_child();
6058 
6059  while (cur && cur != n)
6060  {
6061  if (cur.type() == node_pcdata || cur.type() == node_cdata)
6062  result.append(xpath_string_const(cur.value()), alloc);
6063 
6064  if (cur.first_child())
6065  cur = cur.first_child();
6066  else if (cur.next_sibling())
6067  cur = cur.next_sibling();
6068  else
6069  {
6070  while (!cur.next_sibling() && cur != n)
6071  cur = cur.parent();
6072 
6073  if (cur != n) cur = cur.next_sibling();
6074  }
6075  }
6076 
6077  return result;
6078  }
6079 
6080  default:
6081  return xpath_string();
6082  }
6083  }
6084  }
6085 
6086  PUGI__FN unsigned int node_height(xml_node n)
6087  {
6088  unsigned int result = 0;
6089 
6090  while (n)
6091  {
6092  ++result;
6093  n = n.parent();
6094  }
6095 
6096  return result;
6097  }
6098 
6099  PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
6100  {
6101  // normalize heights
6102  for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
6103  for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
6104 
6105  // one node is the ancestor of the other
6106  if (ln == rn) return lh < rh;
6107 
6108  // find common ancestor
6109  while (ln.parent() != rn.parent())
6110  {
6111  ln = ln.parent();
6112  rn = rn.parent();
6113  }
6114 
6115  // there is no common ancestor (the shared parent is null), nodes are from different documents
6116  if (!ln.parent()) return ln < rn;
6117 
6118  // determine sibling order
6119  for (; ln; ln = ln.next_sibling())
6120  if (ln == rn)
6121  return true;
6122 
6123  return false;
6124  }
6125 
6126  PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
6127  {
6128  while (node && node != parent) node = node.parent();
6129 
6130  return parent && node == parent;
6131  }
6132 
6133  PUGI__FN const void* document_order(const xpath_node& xnode)
6134  {
6135  xml_node_struct* node = xnode.node().internal_object();
6136 
6137  if (node)
6138  {
6139  if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
6140  if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
6141  return 0;
6142  }
6143 
6144  xml_attribute_struct* attr = xnode.attribute().internal_object();
6145 
6146  if (attr)
6147  {
6148  if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
6149  if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
6150  return 0;
6151  }
6152 
6153  return 0;
6154  }
6155 
6156  struct document_order_comparator
6157  {
6158  bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
6159  {
6160  // optimized document order based check
6161  const void* lo = document_order(lhs);
6162  const void* ro = document_order(rhs);
6163 
6164  if (lo && ro) return lo < ro;
6165 
6166  // slow comparison
6167  xml_node ln = lhs.node(), rn = rhs.node();
6168 
6169  // compare attributes
6170  if (lhs.attribute() && rhs.attribute())
6171  {
6172  // shared parent
6173  if (lhs.parent() == rhs.parent())
6174  {
6175  // determine sibling order
6176  for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
6177  if (a == rhs.attribute())
6178  return true;
6179 
6180  return false;
6181  }
6182 
6183  // compare attribute parents
6184  ln = lhs.parent();
6185  rn = rhs.parent();
6186  }
6187  else if (lhs.attribute())
6188  {
6189  // attributes go after the parent element
6190  if (lhs.parent() == rhs.node()) return false;
6191 
6192  ln = lhs.parent();
6193  }
6194  else if (rhs.attribute())
6195  {
6196  // attributes go after the parent element
6197  if (rhs.parent() == lhs.node()) return true;
6198 
6199  rn = rhs.parent();
6200  }
6201 
6202  if (ln == rn) return false;
6203 
6204  unsigned int lh = node_height(ln);
6205  unsigned int rh = node_height(rn);
6206 
6207  return node_is_before(ln, lh, rn, rh);
6208  }
6209  };
6210 
6211  struct duplicate_comparator
6212  {
6213  bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
6214  {
6215  if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
6216  else return rhs.attribute() ? false : lhs.node() < rhs.node();
6217  }
6218  };
6219 
6220  PUGI__FN double gen_nan()
6221  {
6222  #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
6223  union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
6224  u[0].i = 0x7fc00000;
6225  return u[0].f;
6226  #else
6227  // fallback
6228  const volatile double zero = 0.0;
6229  return zero / zero;
6230  #endif
6231  }
6232 
6233  PUGI__FN bool is_nan(double value)
6234  {
6235  #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
6236  return !!_isnan(value);
6237  #elif defined(fpclassify) && defined(FP_NAN)
6238  return fpclassify(value) == FP_NAN;
6239  #else
6240  // fallback
6241  const volatile double v = value;
6242  return v != v;
6243  #endif
6244  }
6245 
6246  PUGI__FN const char_t* convert_number_to_string_special(double value)
6247  {
6248  #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
6249  if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
6250  if (_isnan(value)) return PUGIXML_TEXT("NaN");
6251  return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
6252  #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
6253  switch (fpclassify(value))
6254  {
6255  case FP_NAN:
6256  return PUGIXML_TEXT("NaN");
6257 
6258  case FP_INFINITE:
6259  return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
6260 
6261  case FP_ZERO:
6262  return PUGIXML_TEXT("0");
6263 
6264  default:
6265  return 0;
6266  }
6267  #else
6268  // fallback
6269  const volatile double v = value;
6270 
6271  if (v == 0) return PUGIXML_TEXT("0");
6272  if (v != v) return PUGIXML_TEXT("NaN");
6273  if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
6274  return 0;
6275  #endif
6276  }
6277 
6278  PUGI__FN bool convert_number_to_boolean(double value)
6279  {
6280  return (value != 0 && !is_nan(value));
6281  }
6282 
6283  PUGI__FN void truncate_zeros(char* begin, char* end)
6284  {
6285  while (begin != end && end[-1] == '0') end--;
6286 
6287  *end = 0;
6288  }
6289 
6290  // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
6291 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
6292  PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
6293  {
6294  // get base values
6295  int sign, exponent;
6296  _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
6297 
6298  // truncate redundant zeros
6299  truncate_zeros(buffer, buffer + strlen(buffer));
6300 
6301  // fill results
6302  *out_mantissa = buffer;
6303  *out_exponent = exponent;
6304  }
6305 #else
6306  PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
6307  {
6308  // get a scientific notation value with IEEE DBL_DIG decimals
6309  sprintf(buffer, "%.*e", DBL_DIG, value);
6310  assert(strlen(buffer) < buffer_size);
6311  (void)!buffer_size;
6312 
6313  // get the exponent (possibly negative)
6314  char* exponent_string = strchr(buffer, 'e');
6315  assert(exponent_string);
6316 
6317  int exponent = atoi(exponent_string + 1);
6318 
6319  // extract mantissa string: skip sign
6320  char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
6321  assert(mantissa[0] != '0' && mantissa[1] == '.');
6322 
6323  // divide mantissa by 10 to eliminate integer part
6324  mantissa[1] = mantissa[0];
6325  mantissa++;
6326  exponent++;
6327 
6328  // remove extra mantissa digits and zero-terminate mantissa
6329  truncate_zeros(mantissa, exponent_string);
6330 
6331  // fill results
6332  *out_mantissa = mantissa;
6333  *out_exponent = exponent;
6334  }
6335 #endif
6336 
6337  PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
6338  {
6339  // try special number conversion
6340  const char_t* special = convert_number_to_string_special(value);
6341  if (special) return xpath_string_const(special);
6342 
6343  // get mantissa + exponent form
6344  char mantissa_buffer[64];
6345 
6346  char* mantissa;
6347  int exponent;
6348  convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
6349 
6350  // make the number!
6351  char_t result[512];
6352  char_t* s = result;
6353 
6354  // sign
6355  if (value < 0) *s++ = '-';
6356 
6357  // integer part
6358  if (exponent <= 0)
6359  {
6360  *s++ = '0';
6361  }
6362  else
6363  {
6364  while (exponent > 0)
6365  {
6366  assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
6367  *s++ = *mantissa ? *mantissa++ : '0';
6368  exponent--;
6369  }
6370  }
6371 
6372  // fractional part
6373  if (*mantissa)
6374  {
6375  // decimal point
6376  *s++ = '.';
6377 
6378  // extra zeroes from negative exponent
6379  while (exponent < 0)
6380  {
6381  *s++ = '0';
6382  exponent++;
6383  }
6384 
6385  // extra mantissa digits
6386  while (*mantissa)
6387  {
6388  assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
6389  *s++ = *mantissa++;
6390  }
6391  }
6392 
6393  // zero-terminate
6394  assert(s < result + sizeof(result) / sizeof(result[0]));
6395  *s = 0;
6396 
6397  return xpath_string(result, alloc);
6398  }
6399 
6400  PUGI__FN bool check_string_to_number_format(const char_t* string)
6401  {
6402  // parse leading whitespace
6403  while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
6404 
6405  // parse sign
6406  if (*string == '-') ++string;
6407 
6408  if (!*string) return false;
6409 
6410  // if there is no integer part, there should be a decimal part with at least one digit
6411  if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
6412 
6413  // parse integer part
6414  while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
6415 
6416  // parse decimal part
6417  if (*string == '.')
6418  {
6419  ++string;
6420 
6421  while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
6422  }
6423 
6424  // parse trailing whitespace
6425  while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
6426 
6427  return *string == 0;
6428  }
6429 
6430  PUGI__FN double convert_string_to_number(const char_t* string)
6431  {
6432  // check string format
6433  if (!check_string_to_number_format(string)) return gen_nan();
6434 
6435  // parse string
6436  #ifdef PUGIXML_WCHAR_MODE
6437  return wcstod(string, 0);
6438  #else
6439  return atof(string);
6440  #endif
6441  }
6442 
6443  PUGI__FN bool convert_string_to_number(const char_t* begin, const char_t* end, double* out_result)
6444  {
6445  char_t buffer[32];
6446 
6447  size_t length = static_cast<size_t>(end - begin);
6448  char_t* scratch = buffer;
6449 
6450  if (length >= sizeof(buffer) / sizeof(buffer[0]))
6451  {
6452  // need to make dummy on-heap copy
6453  scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
6454  if (!scratch) return false;
6455  }
6456 
6457  // copy string to zero-terminated buffer and perform conversion
6458  memcpy(scratch, begin, length * sizeof(char_t));
6459  scratch[length] = 0;
6460 
6461  *out_result = convert_string_to_number(scratch);
6462 
6463  // free dummy buffer
6464  if (scratch != buffer) xml_memory::deallocate(scratch);
6465 
6466  return true;
6467  }
6468 
6469  PUGI__FN double round_nearest(double value)
6470  {
6471  return floor(value + 0.5);
6472  }
6473 
6474  PUGI__FN double round_nearest_nzero(double value)
6475  {
6476  // same as round_nearest, but returns -0 for [-0.5, -0]
6477  // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
6478  return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
6479  }
6480 
6481  PUGI__FN const char_t* qualified_name(const xpath_node& node)
6482  {
6483  return node.attribute() ? node.attribute().name() : node.node().name();
6484  }
6485 
6486  PUGI__FN const char_t* local_name(const xpath_node& node)
6487  {
6488  const char_t* name = qualified_name(node);
6489  const char_t* p = find_char(name, ':');
6490 
6491  return p ? p + 1 : name;
6492  }
6493 
6494  struct namespace_uri_predicate
6495  {
6496  const char_t* prefix;
6497  size_t prefix_length;
6498 
6499  namespace_uri_predicate(const char_t* name)
6500  {
6501  const char_t* pos = find_char(name, ':');
6502 
6503  prefix = pos ? name : 0;
6504  prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
6505  }
6506 
6507  bool operator()(const xml_attribute& a) const
6508  {
6509  const char_t* name = a.name();
6510 
6511  if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
6512 
6513  return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
6514  }
6515  };
6516 
6517  PUGI__FN const char_t* namespace_uri(const xml_node& node)
6518  {
6519  namespace_uri_predicate pred = node.name();
6520 
6521  xml_node p = node;
6522 
6523  while (p)
6524  {
6525  xml_attribute a = p.find_attribute(pred);
6526 
6527  if (a) return a.value();
6528 
6529  p = p.parent();
6530  }
6531 
6532  return PUGIXML_TEXT("");
6533  }
6534 
6535  PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
6536  {
6537  namespace_uri_predicate pred = attr.name();
6538 
6539  // Default namespace does not apply to attributes
6540  if (!pred.prefix) return PUGIXML_TEXT("");
6541 
6542  xml_node p = parent;
6543 
6544  while (p)
6545  {
6546  xml_attribute a = p.find_attribute(pred);
6547 
6548  if (a) return a.value();
6549 
6550  p = p.parent();
6551  }
6552 
6553  return PUGIXML_TEXT("");
6554  }
6555 
6556  PUGI__FN const char_t* namespace_uri(const xpath_node& node)
6557  {
6558  return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
6559  }
6560 
6561  PUGI__FN void normalize_space(char_t* buffer)
6562  {
6563  char_t* write = buffer;
6564 
6565  for (char_t* it = buffer; *it; )
6566  {
6567  char_t ch = *it++;
6568 
6569  if (PUGI__IS_CHARTYPE(ch, ct_space))
6570  {
6571  // replace whitespace sequence with single space
6572  while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
6573 
6574  // avoid leading spaces
6575  if (write != buffer) *write++ = ' ';
6576  }
6577  else *write++ = ch;
6578  }
6579 
6580  // remove trailing space
6581  if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
6582 
6583  // zero-terminate
6584  *write = 0;
6585  }
6586 
6587  PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
6588  {
6589  size_t to_length = strlength(to);
6590 
6591  char_t* write = buffer;
6592 
6593  while (*buffer)
6594  {
6595  PUGI__DMC_VOLATILE char_t ch = *buffer++;
6596 
6597  const char_t* pos = find_char(from, ch);
6598 
6599  if (!pos)
6600  *write++ = ch; // do not process
6601  else if (static_cast<size_t>(pos - from) < to_length)
6602  *write++ = to[pos - from]; // replace
6603  }
6604 
6605  // zero-terminate
6606  *write = 0;
6607  }
6608 
6609  struct xpath_variable_boolean: xpath_variable
6610  {
6611  xpath_variable_boolean(): value(false)
6612  {
6613  }
6614 
6615  bool value;
6616  char_t name[1];
6617  };
6618 
6619  struct xpath_variable_number: xpath_variable
6620  {
6621  xpath_variable_number(): value(0)
6622  {
6623  }
6624 
6625  double value;
6626  char_t name[1];
6627  };
6628 
6629  struct xpath_variable_string: xpath_variable
6630  {
6631  xpath_variable_string(): value(0)
6632  {
6633  }
6634 
6635  ~xpath_variable_string()
6636  {
6637  if (value) xml_memory::deallocate(value);
6638  }
6639 
6640  char_t* value;
6641  char_t name[1];
6642  };
6643 
6644  struct xpath_variable_node_set: xpath_variable
6645  {
6646  xpath_node_set value;
6647  char_t name[1];
6648  };
6649 
6650  static const xpath_node_set dummy_node_set;
6651 
6652  PUGI__FN unsigned int hash_string(const char_t* str)
6653  {
6654  // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
6655  unsigned int result = 0;
6656 
6657  while (*str)
6658  {
6659  result += static_cast<unsigned int>(*str++);
6660  result += result << 10;
6661  result ^= result >> 6;
6662  }
6663 
6664  result += result << 3;
6665  result ^= result >> 11;
6666  result += result << 15;
6667 
6668  return result;
6669  }
6670 
6671  template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
6672  {
6673  size_t length = strlength(name);
6674  if (length == 0) return 0; // empty variable names are invalid
6675 
6676  // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
6677  void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
6678  if (!memory) return 0;
6679 
6680  T* result = new (memory) T();
6681 
6682  memcpy(result->name, name, (length + 1) * sizeof(char_t));
6683 
6684  return result;
6685  }
6686 
6687  PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
6688  {
6689  switch (type)
6690  {
6691  case xpath_type_node_set:
6692  return new_xpath_variable<xpath_variable_node_set>(name);
6693 
6694  case xpath_type_number:
6695  return new_xpath_variable<xpath_variable_number>(name);
6696 
6697  case xpath_type_string:
6698  return new_xpath_variable<xpath_variable_string>(name);
6699 
6700  case xpath_type_boolean:
6701  return new_xpath_variable<xpath_variable_boolean>(name);
6702 
6703  default:
6704  return 0;
6705  }
6706  }
6707 
6708  template <typename T> PUGI__FN void delete_xpath_variable(T* var)
6709  {
6710  var->~T();
6711  xml_memory::deallocate(var);
6712  }
6713 
6714  PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
6715  {
6716  switch (type)
6717  {
6718  case xpath_type_node_set:
6719  delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
6720  break;
6721 
6722  case xpath_type_number:
6723  delete_xpath_variable(static_cast<xpath_variable_number*>(var));
6724  break;
6725 
6726  case xpath_type_string:
6727  delete_xpath_variable(static_cast<xpath_variable_string*>(var));
6728  break;
6729 
6730  case xpath_type_boolean:
6731  delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
6732  break;
6733 
6734  default:
6735  assert(!"Invalid variable type");
6736  }
6737  }
6738 
6739  PUGI__FN xpath_variable* get_variable(xpath_variable_set* set, const char_t* begin, const char_t* end)
6740  {
6741  char_t buffer[32];
6742 
6743  size_t length = static_cast<size_t>(end - begin);
6744  char_t* scratch = buffer;
6745 
6746  if (length >= sizeof(buffer) / sizeof(buffer[0]))
6747  {
6748  // need to make dummy on-heap copy
6749  scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
6750  if (!scratch) return 0;
6751  }
6752 
6753  // copy string to zero-terminated buffer and perform lookup
6754  memcpy(scratch, begin, length * sizeof(char_t));
6755  scratch[length] = 0;
6756 
6757  xpath_variable* result = set->get(scratch);
6758 
6759  // free dummy buffer
6760  if (scratch != buffer) xml_memory::deallocate(scratch);
6761 
6762  return result;
6763  }
6764 PUGI__NS_END
6765 
6766 // Internal node set class
6767 PUGI__NS_BEGIN
6768  PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
6769  {
6770  xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
6771 
6772  if (type == xpath_node_set::type_unsorted)
6773  {
6774  sort(begin, end, document_order_comparator());
6775 
6776  type = xpath_node_set::type_sorted;
6777  }
6778 
6779  if (type != order) reverse(begin, end);
6780 
6781  return order;
6782  }
6783 
6784  PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
6785  {
6786  if (begin == end) return xpath_node();
6787 
6788  switch (type)
6789  {
6790  case xpath_node_set::type_sorted:
6791  return *begin;
6792 
6793  case xpath_node_set::type_sorted_reverse:
6794  return *(end - 1);
6795 
6796  case xpath_node_set::type_unsorted:
6797  return *min_element(begin, end, document_order_comparator());
6798 
6799  default:
6800  assert(!"Invalid node set type");
6801  return xpath_node();
6802  }
6803  }
6804 
6805  class xpath_node_set_raw
6806  {
6807  xpath_node_set::type_t _type;
6808 
6809  xpath_node* _begin;
6810  xpath_node* _end;
6811  xpath_node* _eos;
6812 
6813  public:
6814  xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
6815  {
6816  }
6817 
6818  xpath_node* begin() const
6819  {
6820  return _begin;
6821  }
6822 
6823  xpath_node* end() const
6824  {
6825  return _end;
6826  }
6827 
6828  bool empty() const
6829  {
6830  return _begin == _end;
6831  }
6832 
6833  size_t size() const
6834  {
6835  return static_cast<size_t>(_end - _begin);
6836  }
6837 
6838  xpath_node first() const
6839  {
6840  return xpath_first(_begin, _end, _type);
6841  }
6842 
6843  void push_back(const xpath_node& node, xpath_allocator* alloc)
6844  {
6845  if (_end == _eos)
6846  {
6847  size_t capacity = static_cast<size_t>(_eos - _begin);
6848 
6849  // get new capacity (1.5x rule)
6850  size_t new_capacity = capacity + capacity / 2 + 1;
6851 
6852  // reallocate the old array or allocate a new one
6853  xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
6854  assert(data);
6855 
6856  // finalize
6857  _begin = data;
6858  _end = data + capacity;
6859  _eos = data + new_capacity;
6860  }
6861 
6862  *_end++ = node;
6863  }
6864 
6865  void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
6866  {
6867  size_t size_ = static_cast<size_t>(_end - _begin);
6868  size_t capacity = static_cast<size_t>(_eos - _begin);
6869  size_t count = static_cast<size_t>(end_ - begin_);
6870 
6871  if (size_ + count > capacity)
6872  {
6873  // reallocate the old array or allocate a new one
6874  xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
6875  assert(data);
6876 
6877  // finalize
6878  _begin = data;
6879  _end = data + size_;
6880  _eos = data + size_ + count;
6881  }
6882 
6883  memcpy(_end, begin_, count * sizeof(xpath_node));
6884  _end += count;
6885  }
6886 
6887  void sort_do()
6888  {
6889  _type = xpath_sort(_begin, _end, _type, false);
6890  }
6891 
6892  void truncate(xpath_node* pos)
6893  {
6894  assert(_begin <= pos && pos <= _end);
6895 
6896  _end = pos;
6897  }
6898 
6899  void remove_duplicates()
6900  {
6901  if (_type == xpath_node_set::type_unsorted)
6902  sort(_begin, _end, duplicate_comparator());
6903 
6904  _end = unique(_begin, _end);
6905  }
6906 
6907  xpath_node_set::type_t type() const
6908  {
6909  return _type;
6910  }
6911 
6912  void set_type(xpath_node_set::type_t value)
6913  {
6914  _type = value;
6915  }
6916  };
6917 PUGI__NS_END
6918 
6919 PUGI__NS_BEGIN
6920  struct xpath_context
6921  {
6922  xpath_node n;
6923  size_t position, size;
6924 
6925  xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
6926  {
6927  }
6928  };
6929 
6930  enum lexeme_t
6931  {
6932  lex_none = 0,
6933  lex_equal,
6934  lex_not_equal,
6935  lex_less,
6936  lex_greater,
6937  lex_less_or_equal,
6938  lex_greater_or_equal,
6939  lex_plus,
6940  lex_minus,
6941  lex_multiply,
6942  lex_union,
6943  lex_var_ref,
6944  lex_open_brace,
6945  lex_close_brace,
6946  lex_quoted_string,
6947  lex_number,
6948  lex_slash,
6949  lex_double_slash,
6950  lex_open_square_brace,
6951  lex_close_square_brace,
6952  lex_string,
6953  lex_comma,
6954  lex_axis_attribute,
6955  lex_dot,
6956  lex_double_dot,
6957  lex_double_colon,
6958  lex_eof
6959  };
6960 
6961  struct xpath_lexer_string
6962  {
6963  const char_t* begin;
6964  const char_t* end;
6965 
6966  xpath_lexer_string(): begin(0), end(0)
6967  {
6968  }
6969 
6970  bool operator==(const char_t* other) const
6971  {
6972  size_t length = static_cast<size_t>(end - begin);
6973 
6974  return strequalrange(other, begin, length);
6975  }
6976  };
6977 
6978  class xpath_lexer
6979  {
6980  const char_t* _cur;
6981  const char_t* _cur_lexeme_pos;
6982  xpath_lexer_string _cur_lexeme_contents;
6983 
6984  lexeme_t _cur_lexeme;
6985 
6986  public:
6987  explicit xpath_lexer(const char_t* query): _cur(query)
6988  {
6989  next();
6990  }
6991 
6992  const char_t* state() const
6993  {
6994  return _cur;
6995  }
6996 
6997  void next()
6998  {
6999  const char_t* cur = _cur;
7000 
7001  while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
7002 
7003  // save lexeme position for error reporting
7004  _cur_lexeme_pos = cur;
7005 
7006  switch (*cur)
7007  {
7008  case 0:
7009  _cur_lexeme = lex_eof;
7010  break;
7011 
7012  case '>':
7013  if (*(cur+1) == '=')
7014  {
7015  cur += 2;
7016  _cur_lexeme = lex_greater_or_equal;
7017  }
7018  else
7019  {
7020  cur += 1;
7021  _cur_lexeme = lex_greater;
7022  }
7023  break;
7024 
7025  case '<':
7026  if (*(cur+1) == '=')
7027  {
7028  cur += 2;
7029  _cur_lexeme = lex_less_or_equal;
7030  }
7031  else
7032  {
7033  cur += 1;
7034  _cur_lexeme = lex_less;
7035  }
7036  break;
7037 
7038  case '!':
7039  if (*(cur+1) == '=')
7040  {
7041  cur += 2;
7042  _cur_lexeme = lex_not_equal;
7043  }
7044  else
7045  {
7046  _cur_lexeme = lex_none;
7047  }
7048  break;
7049 
7050  case '=':
7051  cur += 1;
7052  _cur_lexeme = lex_equal;
7053 
7054  break;
7055 
7056  case '+':
7057  cur += 1;
7058  _cur_lexeme = lex_plus;
7059 
7060  break;
7061 
7062  case '-':
7063  cur += 1;
7064  _cur_lexeme = lex_minus;
7065 
7066  break;
7067 
7068  case '*':
7069  cur += 1;
7070  _cur_lexeme = lex_multiply;
7071 
7072  break;
7073 
7074  case '|':
7075  cur += 1;
7076  _cur_lexeme = lex_union;
7077 
7078  break;
7079 
7080  case '$':
7081  cur += 1;
7082 
7083  if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
7084  {
7085  _cur_lexeme_contents.begin = cur;
7086 
7087  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
7088 
7089  if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
7090  {
7091  cur++; // :
7092 
7093  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
7094  }
7095 
7096  _cur_lexeme_contents.end = cur;
7097 
7098  _cur_lexeme = lex_var_ref;
7099  }
7100  else
7101  {
7102  _cur_lexeme = lex_none;
7103  }
7104 
7105  break;
7106 
7107  case '(':
7108  cur += 1;
7109  _cur_lexeme = lex_open_brace;
7110 
7111  break;
7112 
7113  case ')':
7114  cur += 1;
7115  _cur_lexeme = lex_close_brace;
7116 
7117  break;
7118 
7119  case '[':
7120  cur += 1;
7121  _cur_lexeme = lex_open_square_brace;
7122 
7123  break;
7124 
7125  case ']':
7126  cur += 1;
7127  _cur_lexeme = lex_close_square_brace;
7128 
7129  break;
7130 
7131  case ',':
7132  cur += 1;
7133  _cur_lexeme = lex_comma;
7134 
7135  break;
7136 
7137  case '/':
7138  if (*(cur+1) == '/')
7139  {
7140  cur += 2;
7141  _cur_lexeme = lex_double_slash;
7142  }
7143  else
7144  {
7145  cur += 1;
7146  _cur_lexeme = lex_slash;
7147  }
7148  break;
7149 
7150  case '.':
7151  if (*(cur+1) == '.')
7152  {
7153  cur += 2;
7154  _cur_lexeme = lex_double_dot;
7155  }
7156  else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
7157  {
7158  _cur_lexeme_contents.begin = cur; // .
7159 
7160  ++cur;
7161 
7162  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
7163 
7164  _cur_lexeme_contents.end = cur;
7165 
7166  _cur_lexeme = lex_number;
7167  }
7168  else
7169  {
7170  cur += 1;
7171  _cur_lexeme = lex_dot;
7172  }
7173  break;
7174 
7175  case '@':
7176  cur += 1;
7177  _cur_lexeme = lex_axis_attribute;
7178 
7179  break;
7180 
7181  case '"':
7182  case '\'':
7183  {
7184  char_t terminator = *cur;
7185 
7186  ++cur;
7187 
7188  _cur_lexeme_contents.begin = cur;
7189  while (*cur && *cur != terminator) cur++;
7190  _cur_lexeme_contents.end = cur;
7191 
7192  if (!*cur)
7193  _cur_lexeme = lex_none;
7194  else
7195  {
7196  cur += 1;
7197  _cur_lexeme = lex_quoted_string;
7198  }
7199 
7200  break;
7201  }
7202 
7203  case ':':
7204  if (*(cur+1) == ':')
7205  {
7206  cur += 2;
7207  _cur_lexeme = lex_double_colon;
7208  }
7209  else
7210  {
7211  _cur_lexeme = lex_none;
7212  }
7213  break;
7214 
7215  default:
7216  if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
7217  {
7218  _cur_lexeme_contents.begin = cur;
7219 
7220  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
7221 
7222  if (*cur == '.')
7223  {
7224  cur++;
7225 
7226  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
7227  }
7228 
7229  _cur_lexeme_contents.end = cur;
7230 
7231  _cur_lexeme = lex_number;
7232  }
7233  else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
7234  {
7235  _cur_lexeme_contents.begin = cur;
7236 
7237  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
7238 
7239  if (cur[0] == ':')
7240  {
7241  if (cur[1] == '*') // namespace test ncname:*
7242  {
7243  cur += 2; // :*
7244  }
7245  else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
7246  {
7247  cur++; // :
7248 
7249  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
7250  }
7251  }
7252 
7253  _cur_lexeme_contents.end = cur;
7254 
7255  _cur_lexeme = lex_string;
7256  }
7257  else
7258  {
7259  _cur_lexeme = lex_none;
7260  }
7261  }
7262 
7263  _cur = cur;
7264  }
7265 
7266  lexeme_t current() const
7267  {
7268  return _cur_lexeme;
7269  }
7270 
7271  const char_t* current_pos() const
7272  {
7273  return _cur_lexeme_pos;
7274  }
7275 
7276  const xpath_lexer_string& contents() const
7277  {
7278  assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
7279 
7280  return _cur_lexeme_contents;
7281  }
7282  };
7283 
7284  enum ast_type_t
7285  {
7286  ast_op_or, // left or right
7287  ast_op_and, // left and right
7288  ast_op_equal, // left = right
7289  ast_op_not_equal, // left != right
7290  ast_op_less, // left < right
7291  ast_op_greater, // left > right
7292  ast_op_less_or_equal, // left <= right
7293  ast_op_greater_or_equal, // left >= right
7294  ast_op_add, // left + right
7295  ast_op_subtract, // left - right
7296  ast_op_multiply, // left * right
7297  ast_op_divide, // left / right
7298  ast_op_mod, // left % right
7299  ast_op_negate, // left - right
7300  ast_op_union, // left | right
7301  ast_predicate, // apply predicate to set; next points to next predicate
7302  ast_filter, // select * from left where right
7303  ast_filter_posinv, // select * from left where right; proximity position invariant
7304  ast_string_constant, // string constant
7305  ast_number_constant, // number constant
7306  ast_variable, // variable
7307  ast_func_last, // last()
7308  ast_func_position, // position()
7309  ast_func_count, // count(left)
7310  ast_func_id, // id(left)
7311  ast_func_local_name_0, // local-name()
7312  ast_func_local_name_1, // local-name(left)
7313  ast_func_namespace_uri_0, // namespace-uri()
7314  ast_func_namespace_uri_1, // namespace-uri(left)
7315  ast_func_name_0, // name()
7316  ast_func_name_1, // name(left)
7317  ast_func_string_0, // string()
7318  ast_func_string_1, // string(left)
7319  ast_func_concat, // concat(left, right, siblings)
7320  ast_func_starts_with, // starts_with(left, right)
7321  ast_func_contains, // contains(left, right)
7322  ast_func_substring_before, // substring-before(left, right)
7323  ast_func_substring_after, // substring-after(left, right)
7324  ast_func_substring_2, // substring(left, right)
7325  ast_func_substring_3, // substring(left, right, third)
7326  ast_func_string_length_0, // string-length()
7327  ast_func_string_length_1, // string-length(left)
7328  ast_func_normalize_space_0, // normalize-space()
7329  ast_func_normalize_space_1, // normalize-space(left)
7330  ast_func_translate, // translate(left, right, third)
7331  ast_func_boolean, // boolean(left)
7332  ast_func_not, // not(left)
7333  ast_func_true, // true()
7334  ast_func_false, // false()
7335  ast_func_lang, // lang(left)
7336  ast_func_number_0, // number()
7337  ast_func_number_1, // number(left)
7338  ast_func_sum, // sum(left)
7339  ast_func_floor, // floor(left)
7340  ast_func_ceiling, // ceiling(left)
7341  ast_func_round, // round(left)
7342  ast_step, // process set left with step
7343  ast_step_root // select root node
7344  };
7345 
7346  enum axis_t
7347  {
7348  axis_ancestor,
7349  axis_ancestor_or_self,
7350  axis_attribute,
7351  axis_child,
7352  axis_descendant,
7353  axis_descendant_or_self,
7354  axis_following,
7355  axis_following_sibling,
7356  axis_namespace,
7357  axis_parent,
7358  axis_preceding,
7359  axis_preceding_sibling,
7360  axis_self
7361  };
7362 
7363  enum nodetest_t
7364  {
7365  nodetest_none,
7366  nodetest_name,
7367  nodetest_type_node,
7368  nodetest_type_comment,
7369  nodetest_type_pi,
7370  nodetest_type_text,
7371  nodetest_pi,
7372  nodetest_all,
7373  nodetest_all_in_namespace
7374  };
7375 
7376  template <axis_t N> struct axis_to_type
7377  {
7378  static const axis_t axis;
7379  };
7380 
7381  template <axis_t N> const axis_t axis_to_type<N>::axis = N;
7382 
7383  class xpath_ast_node
7384  {
7385  private:
7386  // node type
7387  char _type;
7388  char _rettype;
7389 
7390  // for ast_step / ast_predicate
7391  char _axis;
7392  char _test;
7393 
7394  // tree node structure
7395  xpath_ast_node* _left;
7396  xpath_ast_node* _right;
7397  xpath_ast_node* _next;
7398 
7399  union
7400  {
7401  // value for ast_string_constant
7402  const char_t* string;
7403  // value for ast_number_constant
7404  double number;
7405  // variable for ast_variable
7406  xpath_variable* variable;
7407  // node test for ast_step (node name/namespace/node type/pi target)
7408  const char_t* nodetest;
7409  } _data;
7410 
7411  xpath_ast_node(const xpath_ast_node&);
7412  xpath_ast_node& operator=(const xpath_ast_node&);
7413 
7414  template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
7415  {
7416  xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
7417 
7418  if (lt != xpath_type_node_set && rt != xpath_type_node_set)
7419  {
7420  if (lt == xpath_type_boolean || rt == xpath_type_boolean)
7421  return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
7422  else if (lt == xpath_type_number || rt == xpath_type_number)
7423  return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
7424  else if (lt == xpath_type_string || rt == xpath_type_string)
7425  {
7426  xpath_allocator_capture cr(stack.result);
7427 
7428  xpath_string ls = lhs->eval_string(c, stack);
7429  xpath_string rs = rhs->eval_string(c, stack);
7430 
7431  return comp(ls, rs);
7432  }
7433  }
7434  else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
7435  {
7436  xpath_allocator_capture cr(stack.result);
7437 
7438  xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
7439  xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
7440 
7441  for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
7442  for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
7443  {
7444  xpath_allocator_capture cri(stack.result);
7445 
7446  if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
7447  return true;
7448  }
7449 
7450  return false;
7451  }
7452  else
7453  {
7454  if (lt == xpath_type_node_set)
7455  {
7456  swap(lhs, rhs);
7457  swap(lt, rt);
7458  }
7459 
7460  if (lt == xpath_type_boolean)
7461  return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
7462  else if (lt == xpath_type_number)
7463  {
7464  xpath_allocator_capture cr(stack.result);
7465 
7466  double l = lhs->eval_number(c, stack);
7467  xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
7468 
7469  for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
7470  {
7471  xpath_allocator_capture cri(stack.result);
7472 
7473  if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
7474  return true;
7475  }
7476 
7477  return false;
7478  }
7479  else if (lt == xpath_type_string)
7480  {
7481  xpath_allocator_capture cr(stack.result);
7482 
7483  xpath_string l = lhs->eval_string(c, stack);
7484  xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
7485 
7486  for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
7487  {
7488  xpath_allocator_capture cri(stack.result);
7489 
7490  if (comp(l, string_value(*ri, stack.result)))
7491  return true;
7492  }
7493 
7494  return false;
7495  }
7496  }
7497 
7498  assert(!"Wrong types");
7499  return false;
7500  }
7501 
7502  template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
7503  {
7504  xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
7505 
7506  if (lt != xpath_type_node_set && rt != xpath_type_node_set)
7507  return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
7508  else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
7509  {
7510  xpath_allocator_capture cr(stack.result);
7511 
7512  xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
7513  xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
7514 
7515  for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
7516  {
7517  xpath_allocator_capture cri(stack.result);
7518 
7519  double l = convert_string_to_number(string_value(*li, stack.result).c_str());
7520 
7521  for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
7522  {
7523  xpath_allocator_capture crii(stack.result);
7524 
7525  if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
7526  return true;
7527  }
7528  }
7529 
7530  return false;
7531  }
7532  else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
7533  {
7534  xpath_allocator_capture cr(stack.result);
7535 
7536  double l = lhs->eval_number(c, stack);
7537  xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
7538 
7539  for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
7540  {
7541  xpath_allocator_capture cri(stack.result);
7542 
7543  if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
7544  return true;
7545  }
7546 
7547  return false;
7548  }
7549  else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
7550  {
7551  xpath_allocator_capture cr(stack.result);
7552 
7553  xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
7554  double r = rhs->eval_number(c, stack);
7555 
7556  for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
7557  {
7558  xpath_allocator_capture cri(stack.result);
7559 
7560  if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
7561  return true;
7562  }
7563 
7564  return false;
7565  }
7566  else
7567  {
7568  assert(!"Wrong types");
7569  return false;
7570  }
7571  }
7572 
7573  void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
7574  {
7575  assert(ns.size() >= first);
7576 
7577  size_t i = 1;
7578  size_t size = ns.size() - first;
7579 
7580  xpath_node* last = ns.begin() + first;
7581 
7582  // remove_if... or well, sort of
7583  for (xpath_node* it = last; it != ns.end(); ++it, ++i)
7584  {
7585  xpath_context c(*it, i, size);
7586 
7587  if (expr->rettype() == xpath_type_number)
7588  {
7589  if (expr->eval_number(c, stack) == i)
7590  *last++ = *it;
7591  }
7592  else if (expr->eval_boolean(c, stack))
7593  *last++ = *it;
7594  }
7595 
7596  ns.truncate(last);
7597  }
7598 
7599  void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
7600  {
7601  if (ns.size() == first) return;
7602 
7603  for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
7604  {
7605  apply_predicate(ns, first, pred->_left, stack);
7606  }
7607  }
7608 
7609  void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
7610  {
7611  if (!a) return;
7612 
7613  const char_t* name = a.name();
7614 
7615  // There are no attribute nodes corresponding to attributes that declare namespaces
7616  // That is, "xmlns:..." or "xmlns"
7617  if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
7618 
7619  switch (_test)
7620  {
7621  case nodetest_name:
7622  if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
7623  break;
7624 
7625  case nodetest_type_node:
7626  case nodetest_all:
7627  ns.push_back(xpath_node(a, parent), alloc);
7628  break;
7629 
7630  case nodetest_all_in_namespace:
7631  if (starts_with(name, _data.nodetest))
7632  ns.push_back(xpath_node(a, parent), alloc);
7633  break;
7634 
7635  default:
7636  ;
7637  }
7638  }
7639 
7640  void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
7641  {
7642  if (!n) return;
7643 
7644  switch (_test)
7645  {
7646  case nodetest_name:
7647  if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
7648  break;
7649 
7650  case nodetest_type_node:
7651  ns.push_back(n, alloc);
7652  break;
7653 
7654  case nodetest_type_comment:
7655  if (n.type() == node_comment)
7656  ns.push_back(n, alloc);
7657  break;
7658 
7659  case nodetest_type_text:
7660  if (n.type() == node_pcdata || n.type() == node_cdata)
7661  ns.push_back(n, alloc);
7662  break;
7663 
7664  case nodetest_type_pi:
7665  if (n.type() == node_pi)
7666  ns.push_back(n, alloc);
7667  break;
7668 
7669  case nodetest_pi:
7670  if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
7671  ns.push_back(n, alloc);
7672  break;
7673 
7674  case nodetest_all:
7675  if (n.type() == node_element)
7676  ns.push_back(n, alloc);
7677  break;
7678 
7679  case nodetest_all_in_namespace:
7680  if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
7681  ns.push_back(n, alloc);
7682  break;
7683 
7684  default:
7685  assert(!"Unknown axis");
7686  }
7687  }
7688 
7689  template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
7690  {
7691  const axis_t axis = T::axis;
7692 
7693  switch (axis)
7694  {
7695  case axis_attribute:
7696  {
7697  for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
7698  step_push(ns, a, n, alloc);
7699 
7700  break;
7701  }
7702 
7703  case axis_child:
7704  {
7705  for (xml_node c = n.first_child(); c; c = c.next_sibling())
7706  step_push(ns, c, alloc);
7707 
7708  break;
7709  }
7710 
7711  case axis_descendant:
7712  case axis_descendant_or_self:
7713  {
7714  if (axis == axis_descendant_or_self)
7715  step_push(ns, n, alloc);
7716 
7717  xml_node cur = n.first_child();
7718 
7719  while (cur && cur != n)
7720  {
7721  step_push(ns, cur, alloc);
7722 
7723  if (cur.first_child())
7724  cur = cur.first_child();
7725  else if (cur.next_sibling())
7726  cur = cur.next_sibling();
7727  else
7728  {
7729  while (!cur.next_sibling() && cur != n)
7730  cur = cur.parent();
7731 
7732  if (cur != n) cur = cur.next_sibling();
7733  }
7734  }
7735 
7736  break;
7737  }
7738 
7739  case axis_following_sibling:
7740  {
7741  for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
7742  step_push(ns, c, alloc);
7743 
7744  break;
7745  }
7746 
7747  case axis_preceding_sibling:
7748  {
7749  for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
7750  step_push(ns, c, alloc);
7751 
7752  break;
7753  }
7754 
7755  case axis_following:
7756  {
7757  xml_node cur = n;
7758 
7759  // exit from this node so that we don't include descendants
7760  while (cur && !cur.next_sibling()) cur = cur.parent();
7761  cur = cur.next_sibling();
7762 
7763  for (;;)
7764  {
7765  step_push(ns, cur, alloc);
7766 
7767  if (cur.first_child())
7768  cur = cur.first_child();
7769  else if (cur.next_sibling())
7770  cur = cur.next_sibling();
7771  else
7772  {
7773  while (cur && !cur.next_sibling()) cur = cur.parent();
7774  cur = cur.next_sibling();
7775 
7776  if (!cur) break;
7777  }
7778  }
7779 
7780  break;
7781  }
7782 
7783  case axis_preceding:
7784  {
7785  xml_node cur = n;
7786 
7787  while (cur && !cur.previous_sibling()) cur = cur.parent();
7788  cur = cur.previous_sibling();
7789 
7790  for (;;)
7791  {
7792  if (cur.last_child())
7793  cur = cur.last_child();
7794  else
7795  {
7796  // leaf node, can't be ancestor
7797  step_push(ns, cur, alloc);
7798 
7799  if (cur.previous_sibling())
7800  cur = cur.previous_sibling();
7801  else
7802  {
7803  do
7804  {
7805  cur = cur.parent();
7806  if (!cur) break;
7807 
7808  if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
7809  }
7810  while (!cur.previous_sibling());
7811 
7812  cur = cur.previous_sibling();
7813 
7814  if (!cur) break;
7815  }
7816  }
7817  }
7818 
7819  break;
7820  }
7821 
7822  case axis_ancestor:
7823  case axis_ancestor_or_self:
7824  {
7825  if (axis == axis_ancestor_or_self)
7826  step_push(ns, n, alloc);
7827 
7828  xml_node cur = n.parent();
7829 
7830  while (cur)
7831  {
7832  step_push(ns, cur, alloc);
7833 
7834  cur = cur.parent();
7835  }
7836 
7837  break;
7838  }
7839 
7840  case axis_self:
7841  {
7842  step_push(ns, n, alloc);
7843 
7844  break;
7845  }
7846 
7847  case axis_parent:
7848  {
7849  if (n.parent()) step_push(ns, n.parent(), alloc);
7850 
7851  break;
7852  }
7853 
7854  default:
7855  assert(!"Unimplemented axis");
7856  }
7857  }
7858 
7859  template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
7860  {
7861  const axis_t axis = T::axis;
7862 
7863  switch (axis)
7864  {
7865  case axis_ancestor:
7866  case axis_ancestor_or_self:
7867  {
7868  if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
7869  step_push(ns, a, p, alloc);
7870 
7871  xml_node cur = p;
7872 
7873  while (cur)
7874  {
7875  step_push(ns, cur, alloc);
7876 
7877  cur = cur.parent();
7878  }
7879 
7880  break;
7881  }
7882 
7883  case axis_descendant_or_self:
7884  case axis_self:
7885  {
7886  if (_test == nodetest_type_node) // reject attributes based on principal node type test
7887  step_push(ns, a, p, alloc);
7888 
7889  break;
7890  }
7891 
7892  case axis_following:
7893  {
7894  xml_node cur = p;
7895 
7896  for (;;)
7897  {
7898  if (cur.first_child())
7899  cur = cur.first_child();
7900  else if (cur.next_sibling())
7901  cur = cur.next_sibling();
7902  else
7903  {
7904  while (cur && !cur.next_sibling()) cur = cur.parent();
7905  cur = cur.next_sibling();
7906 
7907  if (!cur) break;
7908  }
7909 
7910  step_push(ns, cur, alloc);
7911  }
7912 
7913  break;
7914  }
7915 
7916  case axis_parent:
7917  {
7918  step_push(ns, p, alloc);
7919 
7920  break;
7921  }
7922 
7923  case axis_preceding:
7924  {
7925  // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
7926  step_fill(ns, p, alloc, v);
7927  break;
7928  }
7929 
7930  default:
7931  assert(!"Unimplemented axis");
7932  }
7933  }
7934 
7935  template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
7936  {
7937  const axis_t axis = T::axis;
7938  bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
7939 
7940  xpath_node_set_raw ns;
7941  ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
7942 
7943  if (_left)
7944  {
7945  xpath_node_set_raw s = _left->eval_node_set(c, stack);
7946 
7947  // self axis preserves the original order
7948  if (axis == axis_self) ns.set_type(s.type());
7949 
7950  for (const xpath_node* it = s.begin(); it != s.end(); ++it)
7951  {
7952  size_t size = ns.size();
7953 
7954  // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
7955  if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
7956 
7957  if (it->node())
7958  step_fill(ns, it->node(), stack.result, v);
7959  else if (attributes)
7960  step_fill(ns, it->attribute(), it->parent(), stack.result, v);
7961 
7962  apply_predicates(ns, size, stack);
7963  }
7964  }
7965  else
7966  {
7967  if (c.n.node())
7968  step_fill(ns, c.n.node(), stack.result, v);
7969  else if (attributes)
7970  step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
7971 
7972  apply_predicates(ns, 0, stack);
7973  }
7974 
7975  // child, attribute and self axes always generate unique set of nodes
7976  // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
7977  if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
7978  ns.remove_duplicates();
7979 
7980  return ns;
7981  }
7982 
7983  public:
7984  xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
7985  _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
7986  {
7987  assert(type == ast_string_constant);
7988  _data.string = value;
7989  }
7990 
7991  xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
7992  _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
7993  {
7994  assert(type == ast_number_constant);
7995  _data.number = value;
7996  }
7997 
7998  xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
7999  _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
8000  {
8001  assert(type == ast_variable);
8002  _data.variable = value;
8003  }
8004 
8005  xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
8006  _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
8007  {
8008  }
8009 
8010  xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
8011  _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
8012  {
8013  _data.nodetest = contents;
8014  }
8015 
8016  void set_next(xpath_ast_node* value)
8017  {
8018  _next = value;
8019  }
8020 
8021  void set_right(xpath_ast_node* value)
8022  {
8023  _right = value;
8024  }
8025 
8026  bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
8027  {
8028  switch (_type)
8029  {
8030  case ast_op_or:
8031  return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
8032 
8033  case ast_op_and:
8034  return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
8035 
8036  case ast_op_equal:
8037  return compare_eq(_left, _right, c, stack, equal_to());
8038 
8039  case ast_op_not_equal:
8040  return compare_eq(_left, _right, c, stack, not_equal_to());
8041 
8042  case ast_op_less:
8043  return compare_rel(_left, _right, c, stack, less());
8044 
8045  case ast_op_greater:
8046  return compare_rel(_right, _left, c, stack, less());
8047 
8048  case ast_op_less_or_equal:
8049  return compare_rel(_left, _right, c, stack, less_equal());
8050 
8051  case ast_op_greater_or_equal:
8052  return compare_rel(_right, _left, c, stack, less_equal());
8053 
8054  case ast_func_starts_with:
8055  {
8056  xpath_allocator_capture cr(stack.result);
8057 
8058  xpath_string lr = _left->eval_string(c, stack);
8059  xpath_string rr = _right->eval_string(c, stack);
8060 
8061  return starts_with(lr.c_str(), rr.c_str());
8062  }
8063 
8064  case ast_func_contains:
8065  {
8066  xpath_allocator_capture cr(stack.result);
8067 
8068  xpath_string lr = _left->eval_string(c, stack);
8069  xpath_string rr = _right->eval_string(c, stack);
8070 
8071  return find_substring(lr.c_str(), rr.c_str()) != 0;
8072  }
8073 
8074  case ast_func_boolean:
8075  return _left->eval_boolean(c, stack);
8076 
8077  case ast_func_not:
8078  return !_left->eval_boolean(c, stack);
8079 
8080  case ast_func_true:
8081  return true;
8082 
8083  case ast_func_false:
8084  return false;
8085 
8086  case ast_func_lang:
8087  {
8088  if (c.n.attribute()) return false;
8089 
8090  xpath_allocator_capture cr(stack.result);
8091 
8092  xpath_string lang = _left->eval_string(c, stack);
8093 
8094  for (xml_node n = c.n.node(); n; n = n.parent())
8095  {
8096  xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
8097 
8098  if (a)
8099  {
8100  const char_t* value = a.value();
8101 
8102  // strnicmp / strncasecmp is not portable
8103  for (const char_t* lit = lang.c_str(); *lit; ++lit)
8104  {
8105  if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
8106  ++value;
8107  }
8108 
8109  return *value == 0 || *value == '-';
8110  }
8111  }
8112 
8113  return false;
8114  }
8115 
8116  case ast_variable:
8117  {
8118  assert(_rettype == _data.variable->type());
8119 
8120  if (_rettype == xpath_type_boolean)
8121  return _data.variable->get_boolean();
8122 
8123  // fallthrough to type conversion
8124  }
8125 
8126  default:
8127  {
8128  switch (_rettype)
8129  {
8130  case xpath_type_number:
8131  return convert_number_to_boolean(eval_number(c, stack));
8132 
8133  case xpath_type_string:
8134  {
8135  xpath_allocator_capture cr(stack.result);
8136 
8137  return !eval_string(c, stack).empty();
8138  }
8139 
8140  case xpath_type_node_set:
8141  {
8142  xpath_allocator_capture cr(stack.result);
8143 
8144  return !eval_node_set(c, stack).empty();
8145  }
8146 
8147  default:
8148  assert(!"Wrong expression for return type boolean");
8149  return false;
8150  }
8151  }
8152  }
8153  }
8154 
8155  double eval_number(const xpath_context& c, const xpath_stack& stack)
8156  {
8157  switch (_type)
8158  {
8159  case ast_op_add:
8160  return _left->eval_number(c, stack) + _right->eval_number(c, stack);
8161 
8162  case ast_op_subtract:
8163  return _left->eval_number(c, stack) - _right->eval_number(c, stack);
8164 
8165  case ast_op_multiply:
8166  return _left->eval_number(c, stack) * _right->eval_number(c, stack);
8167 
8168  case ast_op_divide:
8169  return _left->eval_number(c, stack) / _right->eval_number(c, stack);
8170 
8171  case ast_op_mod:
8172  return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
8173 
8174  case ast_op_negate:
8175  return -_left->eval_number(c, stack);
8176 
8177  case ast_number_constant:
8178  return _data.number;
8179 
8180  case ast_func_last:
8181  return static_cast<double>(c.size);
8182 
8183  case ast_func_position:
8184  return static_cast<double>(c.position);
8185 
8186  case ast_func_count:
8187  {
8188  xpath_allocator_capture cr(stack.result);
8189 
8190  return static_cast<double>(_left->eval_node_set(c, stack).size());
8191  }
8192 
8193  case ast_func_string_length_0:
8194  {
8195  xpath_allocator_capture cr(stack.result);
8196 
8197  return static_cast<double>(string_value(c.n, stack.result).length());
8198  }
8199 
8200  case ast_func_string_length_1:
8201  {
8202  xpath_allocator_capture cr(stack.result);
8203 
8204  return static_cast<double>(_left->eval_string(c, stack).length());
8205  }
8206 
8207  case ast_func_number_0:
8208  {
8209  xpath_allocator_capture cr(stack.result);
8210 
8211  return convert_string_to_number(string_value(c.n, stack.result).c_str());
8212  }
8213 
8214  case ast_func_number_1:
8215  return _left->eval_number(c, stack);
8216 
8217  case ast_func_sum:
8218  {
8219  xpath_allocator_capture cr(stack.result);
8220 
8221  double r = 0;
8222 
8223  xpath_node_set_raw ns = _left->eval_node_set(c, stack);
8224 
8225  for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
8226  {
8227  xpath_allocator_capture cri(stack.result);
8228 
8229  r += convert_string_to_number(string_value(*it, stack.result).c_str());
8230  }
8231 
8232  return r;
8233  }
8234 
8235  case ast_func_floor:
8236  {
8237  double r = _left->eval_number(c, stack);
8238 
8239  return r == r ? floor(r) : r;
8240  }
8241 
8242  case ast_func_ceiling:
8243  {
8244  double r = _left->eval_number(c, stack);
8245 
8246  return r == r ? ceil(r) : r;
8247  }
8248 
8249  case ast_func_round:
8250  return round_nearest_nzero(_left->eval_number(c, stack));
8251 
8252  case ast_variable:
8253  {
8254  assert(_rettype == _data.variable->type());
8255 
8256  if (_rettype == xpath_type_number)
8257  return _data.variable->get_number();
8258 
8259  // fallthrough to type conversion
8260  }
8261 
8262  default:
8263  {
8264  switch (_rettype)
8265  {
8266  case xpath_type_boolean:
8267  return eval_boolean(c, stack) ? 1 : 0;
8268 
8269  case xpath_type_string:
8270  {
8271  xpath_allocator_capture cr(stack.result);
8272 
8273  return convert_string_to_number(eval_string(c, stack).c_str());
8274  }
8275 
8276  case xpath_type_node_set:
8277  {
8278  xpath_allocator_capture cr(stack.result);
8279 
8280  return convert_string_to_number(eval_string(c, stack).c_str());
8281  }
8282 
8283  default:
8284  assert(!"Wrong expression for return type number");
8285  return 0;
8286  }
8287 
8288  }
8289  }
8290  }
8291 
8292  xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
8293  {
8294  assert(_type == ast_func_concat);
8295 
8296  xpath_allocator_capture ct(stack.temp);
8297 
8298  // count the string number
8299  size_t count = 1;
8300  for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
8301 
8302  // gather all strings
8303  xpath_string static_buffer[4];
8304  xpath_string* buffer = static_buffer;
8305 
8306  // allocate on-heap for large concats
8307  if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
8308  {
8309  buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
8310  assert(buffer);
8311  }
8312 
8313  // evaluate all strings to temporary stack
8314  xpath_stack swapped_stack = {stack.temp, stack.result};
8315 
8316  buffer[0] = _left->eval_string(c, swapped_stack);
8317 
8318  size_t pos = 1;
8319  for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
8320  assert(pos == count);
8321 
8322  // get total length
8323  size_t length = 0;
8324  for (size_t i = 0; i < count; ++i) length += buffer[i].length();
8325 
8326  // create final string
8327  char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
8328  assert(result);
8329 
8330  char_t* ri = result;
8331 
8332  for (size_t j = 0; j < count; ++j)
8333  for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
8334  *ri++ = *bi;
8335 
8336  *ri = 0;
8337 
8338  return xpath_string(result, true);
8339  }
8340 
8341  xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
8342  {
8343  switch (_type)
8344  {
8345  case ast_string_constant:
8346  return xpath_string_const(_data.string);
8347 
8348  case ast_func_local_name_0:
8349  {
8350  xpath_node na = c.n;
8351 
8352  return xpath_string_const(local_name(na));
8353  }
8354 
8355  case ast_func_local_name_1:
8356  {
8357  xpath_allocator_capture cr(stack.result);
8358 
8359  xpath_node_set_raw ns = _left->eval_node_set(c, stack);
8360  xpath_node na = ns.first();
8361 
8362  return xpath_string_const(local_name(na));
8363  }
8364 
8365  case ast_func_name_0:
8366  {
8367  xpath_node na = c.n;
8368 
8369  return xpath_string_const(qualified_name(na));
8370  }
8371 
8372  case ast_func_name_1:
8373  {
8374  xpath_allocator_capture cr(stack.result);
8375 
8376  xpath_node_set_raw ns = _left->eval_node_set(c, stack);
8377  xpath_node na = ns.first();
8378 
8379  return xpath_string_const(qualified_name(na));
8380  }
8381 
8382  case ast_func_namespace_uri_0:
8383  {
8384  xpath_node na = c.n;
8385 
8386  return xpath_string_const(namespace_uri(na));
8387  }
8388 
8389  case ast_func_namespace_uri_1:
8390  {
8391  xpath_allocator_capture cr(stack.result);
8392 
8393  xpath_node_set_raw ns = _left->eval_node_set(c, stack);
8394  xpath_node na = ns.first();
8395 
8396  return xpath_string_const(namespace_uri(na));
8397  }
8398 
8399  case ast_func_string_0:
8400  return string_value(c.n, stack.result);
8401 
8402  case ast_func_string_1:
8403  return _left->eval_string(c, stack);
8404 
8405  case ast_func_concat:
8406  return eval_string_concat(c, stack);
8407 
8408  case ast_func_substring_before:
8409  {
8410  xpath_allocator_capture cr(stack.temp);
8411 
8412  xpath_stack swapped_stack = {stack.temp, stack.result};
8413 
8414  xpath_string s = _left->eval_string(c, swapped_stack);
8415  xpath_string p = _right->eval_string(c, swapped_stack);
8416 
8417  const char_t* pos = find_substring(s.c_str(), p.c_str());
8418 
8419  return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
8420  }
8421 
8422  case ast_func_substring_after:
8423  {
8424  xpath_allocator_capture cr(stack.temp);
8425 
8426  xpath_stack swapped_stack = {stack.temp, stack.result};
8427 
8428  xpath_string s = _left->eval_string(c, swapped_stack);
8429  xpath_string p = _right->eval_string(c, swapped_stack);
8430 
8431  const char_t* pos = find_substring(s.c_str(), p.c_str());
8432  if (!pos) return xpath_string();
8433 
8434  const char_t* result = pos + p.length();
8435 
8436  return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
8437  }
8438 
8439  case ast_func_substring_2:
8440  {
8441  xpath_allocator_capture cr(stack.temp);
8442 
8443  xpath_stack swapped_stack = {stack.temp, stack.result};
8444 
8445  xpath_string s = _left->eval_string(c, swapped_stack);
8446  size_t s_length = s.length();
8447 
8448  double first = round_nearest(_right->eval_number(c, stack));
8449 
8450  if (is_nan(first)) return xpath_string(); // NaN
8451  else if (first >= s_length + 1) return xpath_string();
8452 
8453  size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
8454  assert(1 <= pos && pos <= s_length + 1);
8455 
8456  const char_t* rbegin = s.c_str() + (pos - 1);
8457 
8458  return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
8459  }
8460 
8461  case ast_func_substring_3:
8462  {
8463  xpath_allocator_capture cr(stack.temp);
8464 
8465  xpath_stack swapped_stack = {stack.temp, stack.result};
8466 
8467  xpath_string s = _left->eval_string(c, swapped_stack);
8468  size_t s_length = s.length();
8469 
8470  double first = round_nearest(_right->eval_number(c, stack));
8471  double last = first + round_nearest(_right->_next->eval_number(c, stack));
8472 
8473  if (is_nan(first) || is_nan(last)) return xpath_string();
8474  else if (first >= s_length + 1) return xpath_string();
8475  else if (first >= last) return xpath_string();
8476  else if (last < 1) return xpath_string();
8477 
8478  size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
8479  size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
8480 
8481  assert(1 <= pos && pos <= end && end <= s_length + 1);
8482  const char_t* rbegin = s.c_str() + (pos - 1);
8483  const char_t* rend = s.c_str() + (end - 1);
8484 
8485  return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
8486  }
8487 
8488  case ast_func_normalize_space_0:
8489  {
8490  xpath_string s = string_value(c.n, stack.result);
8491 
8492  normalize_space(s.data(stack.result));
8493 
8494  return s;
8495  }
8496 
8497  case ast_func_normalize_space_1:
8498  {
8499  xpath_string s = _left->eval_string(c, stack);
8500 
8501  normalize_space(s.data(stack.result));
8502 
8503  return s;
8504  }
8505 
8506  case ast_func_translate:
8507  {
8508  xpath_allocator_capture cr(stack.temp);
8509 
8510  xpath_stack swapped_stack = {stack.temp, stack.result};
8511 
8512  xpath_string s = _left->eval_string(c, stack);
8513  xpath_string from = _right->eval_string(c, swapped_stack);
8514  xpath_string to = _right->_next->eval_string(c, swapped_stack);
8515 
8516  translate(s.data(stack.result), from.c_str(), to.c_str());
8517 
8518  return s;
8519  }
8520 
8521  case ast_variable:
8522  {
8523  assert(_rettype == _data.variable->type());
8524 
8525  if (_rettype == xpath_type_string)
8526  return xpath_string_const(_data.variable->get_string());
8527 
8528  // fallthrough to type conversion
8529  }
8530 
8531  default:
8532  {
8533  switch (_rettype)
8534  {
8535  case xpath_type_boolean:
8536  return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
8537 
8538  case xpath_type_number:
8539  return convert_number_to_string(eval_number(c, stack), stack.result);
8540 
8541  case xpath_type_node_set:
8542  {
8543  xpath_allocator_capture cr(stack.temp);
8544 
8545  xpath_stack swapped_stack = {stack.temp, stack.result};
8546 
8547  xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
8548  return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
8549  }
8550 
8551  default:
8552  assert(!"Wrong expression for return type string");
8553  return xpath_string();
8554  }
8555  }
8556  }
8557  }
8558 
8559  xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
8560  {
8561  switch (_type)
8562  {
8563  case ast_op_union:
8564  {
8565  xpath_allocator_capture cr(stack.temp);
8566 
8567  xpath_stack swapped_stack = {stack.temp, stack.result};
8568 
8569  xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
8570  xpath_node_set_raw rs = _right->eval_node_set(c, stack);
8571 
8572  // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
8573  rs.set_type(xpath_node_set::type_unsorted);
8574 
8575  rs.append(ls.begin(), ls.end(), stack.result);
8576  rs.remove_duplicates();
8577 
8578  return rs;
8579  }
8580 
8581  case ast_filter:
8582  case ast_filter_posinv:
8583  {
8584  xpath_node_set_raw set = _left->eval_node_set(c, stack);
8585 
8586  // either expression is a number or it contains position() call; sort by document order
8587  if (_type == ast_filter) set.sort_do();
8588 
8589  apply_predicate(set, 0, _right, stack);
8590 
8591  return set;
8592  }
8593 
8594  case ast_func_id:
8595  return xpath_node_set_raw();
8596 
8597  case ast_step:
8598  {
8599  switch (_axis)
8600  {
8601  case axis_ancestor:
8602  return step_do(c, stack, axis_to_type<axis_ancestor>());
8603 
8604  case axis_ancestor_or_self:
8605  return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
8606 
8607  case axis_attribute:
8608  return step_do(c, stack, axis_to_type<axis_attribute>());
8609 
8610  case axis_child:
8611  return step_do(c, stack, axis_to_type<axis_child>());
8612 
8613  case axis_descendant:
8614  return step_do(c, stack, axis_to_type<axis_descendant>());
8615 
8616  case axis_descendant_or_self:
8617  return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
8618 
8619  case axis_following:
8620  return step_do(c, stack, axis_to_type<axis_following>());
8621 
8622  case axis_following_sibling:
8623  return step_do(c, stack, axis_to_type<axis_following_sibling>());
8624 
8625  case axis_namespace:
8626  // namespaced axis is not supported
8627  return xpath_node_set_raw();
8628 
8629  case axis_parent:
8630  return step_do(c, stack, axis_to_type<axis_parent>());
8631 
8632  case axis_preceding:
8633  return step_do(c, stack, axis_to_type<axis_preceding>());
8634 
8635  case axis_preceding_sibling:
8636  return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
8637 
8638  case axis_self:
8639  return step_do(c, stack, axis_to_type<axis_self>());
8640 
8641  default:
8642  assert(!"Unknown axis");
8643  return xpath_node_set_raw();
8644  }
8645  }
8646 
8647  case ast_step_root:
8648  {
8649  assert(!_right); // root step can't have any predicates
8650 
8651  xpath_node_set_raw ns;
8652 
8653  ns.set_type(xpath_node_set::type_sorted);
8654 
8655  if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
8656  else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
8657 
8658  return ns;
8659  }
8660 
8661  case ast_variable:
8662  {
8663  assert(_rettype == _data.variable->type());
8664 
8665  if (_rettype == xpath_type_node_set)
8666  {
8667  const xpath_node_set& s = _data.variable->get_node_set();
8668 
8669  xpath_node_set_raw ns;
8670 
8671  ns.set_type(s.type());
8672  ns.append(s.begin(), s.end(), stack.result);
8673 
8674  return ns;
8675  }
8676 
8677  // fallthrough to type conversion
8678  }
8679 
8680  default:
8681  assert(!"Wrong expression for return type node set");
8682  return xpath_node_set_raw();
8683  }
8684  }
8685 
8686  bool is_posinv()
8687  {
8688  switch (_type)
8689  {
8690  case ast_func_position:
8691  return false;
8692 
8693  case ast_string_constant:
8694  case ast_number_constant:
8695  case ast_variable:
8696  return true;
8697 
8698  case ast_step:
8699  case ast_step_root:
8700  return true;
8701 
8702  case ast_predicate:
8703  case ast_filter:
8704  case ast_filter_posinv:
8705  return true;
8706 
8707  default:
8708  if (_left && !_left->is_posinv()) return false;
8709 
8710  for (xpath_ast_node* n = _right; n; n = n->_next)
8711  if (!n->is_posinv()) return false;
8712 
8713  return true;
8714  }
8715  }
8716 
8717  xpath_value_type rettype() const
8718  {
8719  return static_cast<xpath_value_type>(_rettype);
8720  }
8721  };
8722 
8723  struct xpath_parser
8724  {
8725  xpath_allocator* _alloc;
8726  xpath_lexer _lexer;
8727 
8728  const char_t* _query;
8729  xpath_variable_set* _variables;
8730 
8731  xpath_parse_result* _result;
8732 
8733  #ifdef PUGIXML_NO_EXCEPTIONS
8734  jmp_buf _error_handler;
8735  #endif
8736 
8737  void throw_error(const char* message)
8738  {
8739  _result->error = message;
8740  _result->offset = _lexer.current_pos() - _query;
8741 
8742  #ifdef PUGIXML_NO_EXCEPTIONS
8743  longjmp(_error_handler, 1);
8744  #else
8745  throw xpath_exception(*_result);
8746  #endif
8747  }
8748 
8749  void throw_error_oom()
8750  {
8751  #ifdef PUGIXML_NO_EXCEPTIONS
8752  throw_error("Out of memory");
8753  #else
8754  throw std::bad_alloc();
8755  #endif
8756  }
8757 
8758  void* alloc_node()
8759  {
8760  void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
8761 
8762  if (!result) throw_error_oom();
8763 
8764  return result;
8765  }
8766 
8767  const char_t* alloc_string(const xpath_lexer_string& value)
8768  {
8769  if (value.begin)
8770  {
8771  size_t length = static_cast<size_t>(value.end - value.begin);
8772 
8773  char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
8774  if (!c) throw_error_oom();
8775 
8776  memcpy(c, value.begin, length * sizeof(char_t));
8777  c[length] = 0;
8778 
8779  return c;
8780  }
8781  else return 0;
8782  }
8783 
8784  xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
8785  {
8786  assert(argc <= 1);
8787 
8788  if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8789 
8790  return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
8791  }
8792 
8793  xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
8794  {
8795  switch (name.begin[0])
8796  {
8797  case 'b':
8798  if (name == PUGIXML_TEXT("boolean") && argc == 1)
8799  return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
8800 
8801  break;
8802 
8803  case 'c':
8804  if (name == PUGIXML_TEXT("count") && argc == 1)
8805  {
8806  if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8807  return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
8808  }
8809  else if (name == PUGIXML_TEXT("contains") && argc == 2)
8810  return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_string, args[0], args[1]);
8811  else if (name == PUGIXML_TEXT("concat") && argc >= 2)
8812  return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
8813  else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
8814  return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
8815 
8816  break;
8817 
8818  case 'f':
8819  if (name == PUGIXML_TEXT("false") && argc == 0)
8820  return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
8821  else if (name == PUGIXML_TEXT("floor") && argc == 1)
8822  return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
8823 
8824  break;
8825 
8826  case 'i':
8827  if (name == PUGIXML_TEXT("id") && argc == 1)
8828  return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
8829 
8830  break;
8831 
8832  case 'l':
8833  if (name == PUGIXML_TEXT("last") && argc == 0)
8834  return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
8835  else if (name == PUGIXML_TEXT("lang") && argc == 1)
8836  return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
8837  else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
8838  return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
8839 
8840  break;
8841 
8842  case 'n':
8843  if (name == PUGIXML_TEXT("name") && argc <= 1)
8844  return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
8845  else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
8846  return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
8847  else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
8848  return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
8849  else if (name == PUGIXML_TEXT("not") && argc == 1)
8850  return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
8851  else if (name == PUGIXML_TEXT("number") && argc <= 1)
8852  return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
8853 
8854  break;
8855 
8856  case 'p':
8857  if (name == PUGIXML_TEXT("position") && argc == 0)
8858  return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
8859 
8860  break;
8861 
8862  case 'r':
8863  if (name == PUGIXML_TEXT("round") && argc == 1)
8864  return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
8865 
8866  break;
8867 
8868  case 's':
8869  if (name == PUGIXML_TEXT("string") && argc <= 1)
8870  return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
8871  else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
8872  return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_string, args[0]);
8873  else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
8874  return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
8875  else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
8876  return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
8877  else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
8878  return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
8879  else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
8880  return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
8881  else if (name == PUGIXML_TEXT("sum") && argc == 1)
8882  {
8883  if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
8884  return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
8885  }
8886 
8887  break;
8888 
8889  case 't':
8890  if (name == PUGIXML_TEXT("translate") && argc == 3)
8891  return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
8892  else if (name == PUGIXML_TEXT("true") && argc == 0)
8893  return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
8894 
8895  break;
8896 
8897  default:
8898  break;
8899  }
8900 
8901  throw_error("Unrecognized function or wrong parameter count");
8902 
8903  return 0;
8904  }
8905 
8906  axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
8907  {
8908  specified = true;
8909 
8910  switch (name.begin[0])
8911  {
8912  case 'a':
8913  if (name == PUGIXML_TEXT("ancestor"))
8914  return axis_ancestor;
8915  else if (name == PUGIXML_TEXT("ancestor-or-self"))
8916  return axis_ancestor_or_self;
8917  else if (name == PUGIXML_TEXT("attribute"))
8918  return axis_attribute;
8919 
8920  break;
8921 
8922  case 'c':
8923  if (name == PUGIXML_TEXT("child"))
8924  return axis_child;
8925 
8926  break;
8927 
8928  case 'd':
8929  if (name == PUGIXML_TEXT("descendant"))
8930  return axis_descendant;
8931  else if (name == PUGIXML_TEXT("descendant-or-self"))
8932  return axis_descendant_or_self;
8933 
8934  break;
8935 
8936  case 'f':
8937  if (name == PUGIXML_TEXT("following"))
8938  return axis_following;
8939  else if (name == PUGIXML_TEXT("following-sibling"))
8940  return axis_following_sibling;
8941 
8942  break;
8943 
8944  case 'n':
8945  if (name == PUGIXML_TEXT("namespace"))
8946  return axis_namespace;
8947 
8948  break;
8949 
8950  case 'p':
8951  if (name == PUGIXML_TEXT("parent"))
8952  return axis_parent;
8953  else if (name == PUGIXML_TEXT("preceding"))
8954  return axis_preceding;
8955  else if (name == PUGIXML_TEXT("preceding-sibling"))
8956  return axis_preceding_sibling;
8957 
8958  break;
8959 
8960  case 's':
8961  if (name == PUGIXML_TEXT("self"))
8962  return axis_self;
8963 
8964  break;
8965 
8966  default:
8967  break;
8968  }
8969 
8970  specified = false;
8971  return axis_child;
8972  }
8973 
8974  nodetest_t parse_node_test_type(const xpath_lexer_string& name)
8975  {
8976  switch (name.begin[0])
8977  {
8978  case 'c':
8979  if (name == PUGIXML_TEXT("comment"))
8980  return nodetest_type_comment;
8981 
8982  break;
8983 
8984  case 'n':
8985  if (name == PUGIXML_TEXT("node"))
8986  return nodetest_type_node;
8987 
8988  break;
8989 
8990  case 'p':
8991  if (name == PUGIXML_TEXT("processing-instruction"))
8992  return nodetest_type_pi;
8993 
8994  break;
8995 
8996  case 't':
8997  if (name == PUGIXML_TEXT("text"))
8998  return nodetest_type_text;
8999 
9000  break;
9001 
9002  default:
9003  break;
9004  }
9005 
9006  return nodetest_none;
9007  }
9008 
9009  // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
9010  xpath_ast_node* parse_primary_expression()
9011  {
9012  switch (_lexer.current())
9013  {
9014  case lex_var_ref:
9015  {
9016  xpath_lexer_string name = _lexer.contents();
9017 
9018  if (!_variables)
9019  throw_error("Unknown variable: variable set is not provided");
9020 
9021  xpath_variable* var = get_variable(_variables, name.begin, name.end);
9022 
9023  if (!var)
9024  throw_error("Unknown variable: variable set does not contain the given name");
9025 
9026  _lexer.next();
9027 
9028  return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
9029  }
9030 
9031  case lex_open_brace:
9032  {
9033  _lexer.next();
9034 
9035  xpath_ast_node* n = parse_expression();
9036 
9037  if (_lexer.current() != lex_close_brace)
9038  throw_error("Unmatched braces");
9039 
9040  _lexer.next();
9041 
9042  return n;
9043  }
9044 
9045  case lex_quoted_string:
9046  {
9047  const char_t* value = alloc_string(_lexer.contents());
9048 
9049  xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
9050  _lexer.next();
9051 
9052  return n;
9053  }
9054 
9055  case lex_number:
9056  {
9057  double value = 0;
9058 
9059  if (!convert_string_to_number(_lexer.contents().begin, _lexer.contents().end, &value))
9060  throw_error_oom();
9061 
9062  xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
9063  _lexer.next();
9064 
9065  return n;
9066  }
9067 
9068  case lex_string:
9069  {
9070  xpath_ast_node* args[2] = {0};
9071  size_t argc = 0;
9072 
9073  xpath_lexer_string function = _lexer.contents();
9074  _lexer.next();
9075 
9076  xpath_ast_node* last_arg = 0;
9077 
9078  if (_lexer.current() != lex_open_brace)
9079  throw_error("Unrecognized function call");
9080  _lexer.next();
9081 
9082  if (_lexer.current() != lex_close_brace)
9083  args[argc++] = parse_expression();
9084 
9085  while (_lexer.current() != lex_close_brace)
9086  {
9087  if (_lexer.current() != lex_comma)
9088  throw_error("No comma between function arguments");
9089  _lexer.next();
9090 
9091  xpath_ast_node* n = parse_expression();
9092 
9093  if (argc < 2) args[argc] = n;
9094  else last_arg->set_next(n);
9095 
9096  argc++;
9097  last_arg = n;
9098  }
9099 
9100  _lexer.next();
9101 
9102  return parse_function(function, argc, args);
9103  }
9104 
9105  default:
9106  throw_error("Unrecognizable primary expression");
9107 
9108  return 0;
9109  }
9110  }
9111 
9112  // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
9113  // Predicate ::= '[' PredicateExpr ']'
9114  // PredicateExpr ::= Expr
9115  xpath_ast_node* parse_filter_expression()
9116  {
9117  xpath_ast_node* n = parse_primary_expression();
9118 
9119  while (_lexer.current() == lex_open_square_brace)
9120  {
9121  _lexer.next();
9122 
9123  xpath_ast_node* expr = parse_expression();
9124 
9125  if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
9126 
9127  bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
9128 
9129  n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
9130 
9131  if (_lexer.current() != lex_close_square_brace)
9132  throw_error("Unmatched square brace");
9133 
9134  _lexer.next();
9135  }
9136 
9137  return n;
9138  }
9139 
9140  // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
9141  // AxisSpecifier ::= AxisName '::' | '@'?
9142  // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
9143  // NameTest ::= '*' | NCName ':' '*' | QName
9144  // AbbreviatedStep ::= '.' | '..'
9145  xpath_ast_node* parse_step(xpath_ast_node* set)
9146  {
9147  if (set && set->rettype() != xpath_type_node_set)
9148  throw_error("Step has to be applied to node set");
9149 
9150  bool axis_specified = false;
9151  axis_t axis = axis_child; // implied child axis
9152 
9153  if (_lexer.current() == lex_axis_attribute)
9154  {
9155  axis = axis_attribute;
9156  axis_specified = true;
9157 
9158  _lexer.next();
9159  }
9160  else if (_lexer.current() == lex_dot)
9161  {
9162  _lexer.next();
9163 
9164  return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
9165  }
9166  else if (_lexer.current() == lex_double_dot)
9167  {
9168  _lexer.next();
9169 
9170  return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
9171  }
9172 
9173  nodetest_t nt_type = nodetest_none;
9174  xpath_lexer_string nt_name;
9175 
9176  if (_lexer.current() == lex_string)
9177  {
9178  // node name test
9179  nt_name = _lexer.contents();
9180  _lexer.next();
9181 
9182  // was it an axis name?
9183  if (_lexer.current() == lex_double_colon)
9184  {
9185  // parse axis name
9186  if (axis_specified) throw_error("Two axis specifiers in one step");
9187 
9188  axis = parse_axis_name(nt_name, axis_specified);
9189 
9190  if (!axis_specified) throw_error("Unknown axis");
9191 
9192  // read actual node test
9193  _lexer.next();
9194 
9195  if (_lexer.current() == lex_multiply)
9196  {
9197  nt_type = nodetest_all;
9198  nt_name = xpath_lexer_string();
9199  _lexer.next();
9200  }
9201  else if (_lexer.current() == lex_string)
9202  {
9203  nt_name = _lexer.contents();
9204  _lexer.next();
9205  }
9206  else throw_error("Unrecognized node test");
9207  }
9208 
9209  if (nt_type == nodetest_none)
9210  {
9211  // node type test or processing-instruction
9212  if (_lexer.current() == lex_open_brace)
9213  {
9214  _lexer.next();
9215 
9216  if (_lexer.current() == lex_close_brace)
9217  {
9218  _lexer.next();
9219 
9220  nt_type = parse_node_test_type(nt_name);
9221 
9222  if (nt_type == nodetest_none) throw_error("Unrecognized node type");
9223 
9224  nt_name = xpath_lexer_string();
9225  }
9226  else if (nt_name == PUGIXML_TEXT("processing-instruction"))
9227  {
9228  if (_lexer.current() != lex_quoted_string)
9229  throw_error("Only literals are allowed as arguments to processing-instruction()");
9230 
9231  nt_type = nodetest_pi;
9232  nt_name = _lexer.contents();
9233  _lexer.next();
9234 
9235  if (_lexer.current() != lex_close_brace)
9236  throw_error("Unmatched brace near processing-instruction()");
9237  _lexer.next();
9238  }
9239  else
9240  throw_error("Unmatched brace near node type test");
9241 
9242  }
9243  // QName or NCName:*
9244  else
9245  {
9246  if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
9247  {
9248  nt_name.end--; // erase *
9249 
9250  nt_type = nodetest_all_in_namespace;
9251  }
9252  else nt_type = nodetest_name;
9253  }
9254  }
9255  }
9256  else if (_lexer.current() == lex_multiply)
9257  {
9258  nt_type = nodetest_all;
9259  _lexer.next();
9260  }
9261  else throw_error("Unrecognized node test");
9262 
9263  xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
9264 
9265  xpath_ast_node* last = 0;
9266 
9267  while (_lexer.current() == lex_open_square_brace)
9268  {
9269  _lexer.next();
9270 
9271  xpath_ast_node* expr = parse_expression();
9272 
9273  xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
9274 
9275  if (_lexer.current() != lex_close_square_brace)
9276  throw_error("Unmatched square brace");
9277  _lexer.next();
9278 
9279  if (last) last->set_next(pred);
9280  else n->set_right(pred);
9281 
9282  last = pred;
9283  }
9284 
9285  return n;
9286  }
9287 
9288  // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
9289  xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
9290  {
9291  xpath_ast_node* n = parse_step(set);
9292 
9293  while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
9294  {
9295  lexeme_t l = _lexer.current();
9296  _lexer.next();
9297 
9298  if (l == lex_double_slash)
9299  n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
9300 
9301  n = parse_step(n);
9302  }
9303 
9304  return n;
9305  }
9306 
9307  // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
9308  // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
9309  xpath_ast_node* parse_location_path()
9310  {
9311  if (_lexer.current() == lex_slash)
9312  {
9313  _lexer.next();
9314 
9315  xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
9316 
9317  // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
9318  lexeme_t l = _lexer.current();
9319 
9320  if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
9321  return parse_relative_location_path(n);
9322  else
9323  return n;
9324  }
9325  else if (_lexer.current() == lex_double_slash)
9326  {
9327  _lexer.next();
9328 
9329  xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
9330  n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
9331 
9332  return parse_relative_location_path(n);
9333  }
9334 
9335  // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
9336  return parse_relative_location_path(0);
9337  }
9338 
9339  // PathExpr ::= LocationPath
9340  // | FilterExpr
9341  // | FilterExpr '/' RelativeLocationPath
9342  // | FilterExpr '//' RelativeLocationPath
9343  xpath_ast_node* parse_path_expression()
9344  {
9345  // Clarification.
9346  // PathExpr begins with either LocationPath or FilterExpr.
9347  // FilterExpr begins with PrimaryExpr
9348  // PrimaryExpr begins with '$' in case of it being a variable reference,
9349  // '(' in case of it being an expression, string literal, number constant or
9350  // function call.
9351 
9352  if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
9353  _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
9354  _lexer.current() == lex_string)
9355  {
9356  if (_lexer.current() == lex_string)
9357  {
9358  // This is either a function call, or not - if not, we shall proceed with location path
9359  const char_t* state = _lexer.state();
9360 
9361  while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
9362 
9363  if (*state != '(') return parse_location_path();
9364 
9365  // This looks like a function call; however this still can be a node-test. Check it.
9366  if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
9367  }
9368 
9369  xpath_ast_node* n = parse_filter_expression();
9370 
9371  if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
9372  {
9373  lexeme_t l = _lexer.current();
9374  _lexer.next();
9375 
9376  if (l == lex_double_slash)
9377  {
9378  if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
9379 
9380  n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
9381  }
9382 
9383  // select from location path
9384  return parse_relative_location_path(n);
9385  }
9386 
9387  return n;
9388  }
9389  else return parse_location_path();
9390  }
9391 
9392  // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
9393  xpath_ast_node* parse_union_expression()
9394  {
9395  xpath_ast_node* n = parse_path_expression();
9396 
9397  while (_lexer.current() == lex_union)
9398  {
9399  _lexer.next();
9400 
9401  xpath_ast_node* expr = parse_union_expression();
9402 
9403  if (n->rettype() != xpath_type_node_set || expr->rettype() != xpath_type_node_set)
9404  throw_error("Union operator has to be applied to node sets");
9405 
9406  n = new (alloc_node()) xpath_ast_node(ast_op_union, xpath_type_node_set, n, expr);
9407  }
9408 
9409  return n;
9410  }
9411 
9412  // UnaryExpr ::= UnionExpr | '-' UnaryExpr
9413  xpath_ast_node* parse_unary_expression()
9414  {
9415  if (_lexer.current() == lex_minus)
9416  {
9417  _lexer.next();
9418 
9419  xpath_ast_node* expr = parse_unary_expression();
9420 
9421  return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
9422  }
9423  else return parse_union_expression();
9424  }
9425 
9426  // MultiplicativeExpr ::= UnaryExpr
9427  // | MultiplicativeExpr '*' UnaryExpr
9428  // | MultiplicativeExpr 'div' UnaryExpr
9429  // | MultiplicativeExpr 'mod' UnaryExpr
9430  xpath_ast_node* parse_multiplicative_expression()
9431  {
9432  xpath_ast_node* n = parse_unary_expression();
9433 
9434  while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
9435  (_lexer.contents() == PUGIXML_TEXT("mod") || _lexer.contents() == PUGIXML_TEXT("div"))))
9436  {
9437  ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
9438  _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
9439  _lexer.next();
9440 
9441  xpath_ast_node* expr = parse_unary_expression();
9442 
9443  n = new (alloc_node()) xpath_ast_node(op, xpath_type_number, n, expr);
9444  }
9445 
9446  return n;
9447  }
9448 
9449  // AdditiveExpr ::= MultiplicativeExpr
9450  // | AdditiveExpr '+' MultiplicativeExpr
9451  // | AdditiveExpr '-' MultiplicativeExpr
9452  xpath_ast_node* parse_additive_expression()
9453  {
9454  xpath_ast_node* n = parse_multiplicative_expression();
9455 
9456  while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
9457  {
9458  lexeme_t l = _lexer.current();
9459 
9460  _lexer.next();
9461 
9462  xpath_ast_node* expr = parse_multiplicative_expression();
9463 
9464  n = new (alloc_node()) xpath_ast_node(l == lex_plus ? ast_op_add : ast_op_subtract, xpath_type_number, n, expr);
9465  }
9466 
9467  return n;
9468  }
9469 
9470  // RelationalExpr ::= AdditiveExpr
9471  // | RelationalExpr '<' AdditiveExpr
9472  // | RelationalExpr '>' AdditiveExpr
9473  // | RelationalExpr '<=' AdditiveExpr
9474  // | RelationalExpr '>=' AdditiveExpr
9475  xpath_ast_node* parse_relational_expression()
9476  {
9477  xpath_ast_node* n = parse_additive_expression();
9478 
9479  while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
9480  _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
9481  {
9482  lexeme_t l = _lexer.current();
9483  _lexer.next();
9484 
9485  xpath_ast_node* expr = parse_additive_expression();
9486 
9487  n = new (alloc_node()) xpath_ast_node(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
9488  l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, xpath_type_boolean, n, expr);
9489  }
9490 
9491  return n;
9492  }
9493 
9494  // EqualityExpr ::= RelationalExpr
9495  // | EqualityExpr '=' RelationalExpr
9496  // | EqualityExpr '!=' RelationalExpr
9497  xpath_ast_node* parse_equality_expression()
9498  {
9499  xpath_ast_node* n = parse_relational_expression();
9500 
9501  while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
9502  {
9503  lexeme_t l = _lexer.current();
9504 
9505  _lexer.next();
9506 
9507  xpath_ast_node* expr = parse_relational_expression();
9508 
9509  n = new (alloc_node()) xpath_ast_node(l == lex_equal ? ast_op_equal : ast_op_not_equal, xpath_type_boolean, n, expr);
9510  }
9511 
9512  return n;
9513  }
9514 
9515  // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
9516  xpath_ast_node* parse_and_expression()
9517  {
9518  xpath_ast_node* n = parse_equality_expression();
9519 
9520  while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("and"))
9521  {
9522  _lexer.next();
9523 
9524  xpath_ast_node* expr = parse_equality_expression();
9525 
9526  n = new (alloc_node()) xpath_ast_node(ast_op_and, xpath_type_boolean, n, expr);
9527  }
9528 
9529  return n;
9530  }
9531 
9532  // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
9533  xpath_ast_node* parse_or_expression()
9534  {
9535  xpath_ast_node* n = parse_and_expression();
9536 
9537  while (_lexer.current() == lex_string && _lexer.contents() == PUGIXML_TEXT("or"))
9538  {
9539  _lexer.next();
9540 
9541  xpath_ast_node* expr = parse_and_expression();
9542 
9543  n = new (alloc_node()) xpath_ast_node(ast_op_or, xpath_type_boolean, n, expr);
9544  }
9545 
9546  return n;
9547  }
9548 
9549  // Expr ::= OrExpr
9550  xpath_ast_node* parse_expression()
9551  {
9552  return parse_or_expression();
9553  }
9554 
9555  xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
9556  {
9557  }
9558 
9559  xpath_ast_node* parse()
9560  {
9561  xpath_ast_node* result = parse_expression();
9562 
9563  if (_lexer.current() != lex_eof)
9564  {
9565  // there are still unparsed tokens left, error
9566  throw_error("Incorrect query");
9567  }
9568 
9569  return result;
9570  }
9571 
9572  static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
9573  {
9574  xpath_parser parser(query, variables, alloc, result);
9575 
9576  #ifdef PUGIXML_NO_EXCEPTIONS
9577  int error = setjmp(parser._error_handler);
9578 
9579  return (error == 0) ? parser.parse() : 0;
9580  #else
9581  return parser.parse();
9582  #endif
9583  }
9584  };
9585 
9586  struct xpath_query_impl
9587  {
9588  static xpath_query_impl* create()
9589  {
9590  void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
9591 
9592  return new (memory) xpath_query_impl();
9593  }
9594 
9595  static void destroy(void* ptr)
9596  {
9597  if (!ptr) return;
9598 
9599  // free all allocated pages
9600  static_cast<xpath_query_impl*>(ptr)->alloc.release();
9601 
9602  // free allocator memory (with the first page)
9603  xml_memory::deallocate(ptr);
9604  }
9605 
9606  xpath_query_impl(): root(0), alloc(&block)
9607  {
9608  block.next = 0;
9609  }
9610 
9611  xpath_ast_node* root;
9612  xpath_allocator alloc;
9613  xpath_memory_block block;
9614  };
9615 
9616  PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
9617  {
9618  if (!impl) return xpath_string();
9619 
9620  #ifdef PUGIXML_NO_EXCEPTIONS
9621  if (setjmp(sd.error_handler)) return xpath_string();
9622  #endif
9623 
9624  xpath_context c(n, 1, 1);
9625 
9626  return impl->root->eval_string(c, sd.stack);
9627  }
9628 PUGI__NS_END
9629 
9630 namespace pugi
9631 {
9632 #ifndef PUGIXML_NO_EXCEPTIONS
9633  PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
9634  {
9635  assert(_result.error);
9636  }
9637 
9638  PUGI__FN const char* xpath_exception::what() const throw()
9639  {
9640  return _result.error;
9641  }
9642 
9643  PUGI__FN const xpath_parse_result& xpath_exception::result() const
9644  {
9645  return _result;
9646  }
9647 #endif
9648 
9649  PUGI__FN xpath_node::xpath_node()
9650  {
9651  }
9652 
9653  PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
9654  {
9655  }
9656 
9657  PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
9658  {
9659  }
9660 
9661  PUGI__FN xml_node xpath_node::node() const
9662  {
9663  return _attribute ? xml_node() : _node;
9664  }
9665 
9666  PUGI__FN xml_attribute xpath_node::attribute() const
9667  {
9668  return _attribute;
9669  }
9670 
9671  PUGI__FN xml_node xpath_node::parent() const
9672  {
9673  return _attribute ? _node : _node.parent();
9674  }
9675 
9676  PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
9677  {
9678  }
9679 
9680  PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
9681  {
9682  return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
9683  }
9684 
9685  PUGI__FN bool xpath_node::operator!() const
9686  {
9687  return !(_node || _attribute);
9688  }
9689 
9690  PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
9691  {
9692  return _node == n._node && _attribute == n._attribute;
9693  }
9694 
9695  PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
9696  {
9697  return _node != n._node || _attribute != n._attribute;
9698  }
9699 
9700 #ifdef __BORLANDC__
9701  PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
9702  {
9703  return (bool)lhs && rhs;
9704  }
9705 
9706  PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
9707  {
9708  return (bool)lhs || rhs;
9709  }
9710 #endif
9711 
9712  PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)
9713  {
9714  assert(begin_ <= end_);
9715 
9716  size_t size_ = static_cast<size_t>(end_ - begin_);
9717 
9718  if (size_ <= 1)
9719  {
9720  // deallocate old buffer
9721  if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
9722 
9723  // use internal buffer
9724  if (begin_ != end_) _storage = *begin_;
9725 
9726  _begin = &_storage;
9727  _end = &_storage + size_;
9728  }
9729  else
9730  {
9731  // make heap copy
9732  xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
9733 
9734  if (!storage)
9735  {
9736  #ifdef PUGIXML_NO_EXCEPTIONS
9737  return;
9738  #else
9739  throw std::bad_alloc();
9740  #endif
9741  }
9742 
9743  memcpy(storage, begin_, size_ * sizeof(xpath_node));
9744 
9745  // deallocate old buffer
9746  if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
9747 
9748  // finalize
9749  _begin = storage;
9750  _end = storage + size_;
9751  }
9752  }
9753 
9754  PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
9755  {
9756  }
9757 
9758  PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)
9759  {
9760  _assign(begin_, end_);
9761  }
9762 
9763  PUGI__FN xpath_node_set::~xpath_node_set()
9764  {
9765  if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
9766  }
9767 
9768  PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
9769  {
9770  _assign(ns._begin, ns._end);
9771  }
9772 
9773  PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
9774  {
9775  if (this == &ns) return *this;
9776 
9777  _type = ns._type;
9778  _assign(ns._begin, ns._end);
9779 
9780  return *this;
9781  }
9782 
9783  PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
9784  {
9785  return _type;
9786  }
9787 
9788  PUGI__FN size_t xpath_node_set::size() const
9789  {
9790  return _end - _begin;
9791  }
9792 
9793  PUGI__FN bool xpath_node_set::empty() const
9794  {
9795  return _begin == _end;
9796  }
9797 
9798  PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
9799  {
9800  assert(index < size());
9801  return _begin[index];
9802  }
9803 
9804  PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
9805  {
9806  return _begin;
9807  }
9808 
9809  PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
9810  {
9811  return _end;
9812  }
9813 
9814  PUGI__FN void xpath_node_set::sort(bool reverse)
9815  {
9816  _type = impl::xpath_sort(_begin, _end, _type, reverse);
9817  }
9818 
9819  PUGI__FN xpath_node xpath_node_set::first() const
9820  {
9821  return impl::xpath_first(_begin, _end, _type);
9822  }
9823 
9824  PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
9825  {
9826  }
9827 
9828  PUGI__FN xpath_parse_result::operator bool() const
9829  {
9830  return error == 0;
9831  }
9832 
9833  PUGI__FN const char* xpath_parse_result::description() const
9834  {
9835  return error ? error : "No error";
9836  }
9837 
9838  PUGI__FN xpath_variable::xpath_variable()
9839  {
9840  }
9841 
9842  PUGI__FN const char_t* xpath_variable::name() const
9843  {
9844  switch (_type)
9845  {
9846  case xpath_type_node_set:
9847  return static_cast<const impl::xpath_variable_node_set*>(this)->name;
9848 
9849  case xpath_type_number:
9850  return static_cast<const impl::xpath_variable_number*>(this)->name;
9851 
9852  case xpath_type_string:
9853  return static_cast<const impl::xpath_variable_string*>(this)->name;
9854 
9855  case xpath_type_boolean:
9856  return static_cast<const impl::xpath_variable_boolean*>(this)->name;
9857 
9858  default:
9859  assert(!"Invalid variable type");
9860  return 0;
9861  }
9862  }
9863 
9864  PUGI__FN xpath_value_type xpath_variable::type() const
9865  {
9866  return _type;
9867  }
9868 
9869  PUGI__FN bool xpath_variable::get_boolean() const
9870  {
9871  return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
9872  }
9873 
9874  PUGI__FN double xpath_variable::get_number() const
9875  {
9876  return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
9877  }
9878 
9879  PUGI__FN const char_t* xpath_variable::get_string() const
9880  {
9881  const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
9882  return value ? value : PUGIXML_TEXT("");
9883  }
9884 
9885  PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
9886  {
9887  return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
9888  }
9889 
9890  PUGI__FN bool xpath_variable::set(bool value)
9891  {
9892  if (_type != xpath_type_boolean) return false;
9893 
9894  static_cast<impl::xpath_variable_boolean*>(this)->value = value;
9895  return true;
9896  }
9897 
9898  PUGI__FN bool xpath_variable::set(double value)
9899  {
9900  if (_type != xpath_type_number) return false;
9901 
9902  static_cast<impl::xpath_variable_number*>(this)->value = value;
9903  return true;
9904  }
9905 
9906  PUGI__FN bool xpath_variable::set(const char_t* value)
9907  {
9908  if (_type != xpath_type_string) return false;
9909 
9910  impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
9911 
9912  // duplicate string
9913  size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
9914 
9915  char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
9916  if (!copy) return false;
9917 
9918  memcpy(copy, value, size);
9919 
9920  // replace old string
9921  if (var->value) impl::xml_memory::deallocate(var->value);
9922  var->value = copy;
9923 
9924  return true;
9925  }
9926 
9927  PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
9928  {
9929  if (_type != xpath_type_node_set) return false;
9930 
9931  static_cast<impl::xpath_variable_node_set*>(this)->value = value;
9932  return true;
9933  }
9934 
9935  PUGI__FN xpath_variable_set::xpath_variable_set()
9936  {
9937  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
9938  }
9939 
9940  PUGI__FN xpath_variable_set::~xpath_variable_set()
9941  {
9942  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
9943  {
9944  xpath_variable* var = _data[i];
9945 
9946  while (var)
9947  {
9948  xpath_variable* next = var->_next;
9949 
9950  impl::delete_xpath_variable(var->_type, var);
9951 
9952  var = next;
9953  }
9954  }
9955  }
9956 
9957  PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const
9958  {
9959  const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9960  size_t hash = impl::hash_string(name) % hash_size;
9961 
9962  // look for existing variable
9963  for (xpath_variable* var = _data[hash]; var; var = var->_next)
9964  if (impl::strequal(var->name(), name))
9965  return var;
9966 
9967  return 0;
9968  }
9969 
9970  PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
9971  {
9972  const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9973  size_t hash = impl::hash_string(name) % hash_size;
9974 
9975  // look for existing variable
9976  for (xpath_variable* var = _data[hash]; var; var = var->_next)
9977  if (impl::strequal(var->name(), name))
9978  return var->type() == type ? var : 0;
9979 
9980  // add new variable
9981  xpath_variable* result = impl::new_xpath_variable(type, name);
9982 
9983  if (result)
9984  {
9985  result->_type = type;
9986  result->_next = _data[hash];
9987 
9988  _data[hash] = result;
9989  }
9990 
9991  return result;
9992  }
9993 
9994  PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
9995  {
9996  xpath_variable* var = add(name, xpath_type_boolean);
9997  return var ? var->set(value) : false;
9998  }
9999 
10000  PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
10001  {
10002  xpath_variable* var = add(name, xpath_type_number);
10003  return var ? var->set(value) : false;
10004  }
10005 
10006  PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
10007  {
10008  xpath_variable* var = add(name, xpath_type_string);
10009  return var ? var->set(value) : false;
10010  }
10011 
10012  PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
10013  {
10014  xpath_variable* var = add(name, xpath_type_node_set);
10015  return var ? var->set(value) : false;
10016  }
10017 
10018  PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
10019  {
10020  return find(name);
10021  }
10022 
10023  PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
10024  {
10025  return find(name);
10026  }
10027 
10028  PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
10029  {
10030  impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
10031 
10032  if (!qimpl)
10033  {
10034  #ifdef PUGIXML_NO_EXCEPTIONS
10035  _result.error = "Out of memory";
10036  #else
10037  throw std::bad_alloc();
10038  #endif
10039  }
10040  else
10041  {
10042  impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);
10043 
10044  qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
10045 
10046  if (qimpl->root)
10047  {
10048  _impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
10049  _result.error = 0;
10050  }
10051  }
10052  }
10053 
10054  PUGI__FN xpath_query::~xpath_query()
10055  {
10056  impl::xpath_query_impl::destroy(_impl);
10057  }
10058 
10059  PUGI__FN xpath_value_type xpath_query::return_type() const
10060  {
10061  if (!_impl) return xpath_type_none;
10062 
10063  return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
10064  }
10065 
10066  PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
10067  {
10068  if (!_impl) return false;
10069 
10070  impl::xpath_context c(n, 1, 1);
10071  impl::xpath_stack_data sd;
10072 
10073  #ifdef PUGIXML_NO_EXCEPTIONS
10074  if (setjmp(sd.error_handler)) return false;
10075  #endif
10076 
10077  return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
10078  }
10079 
10080  PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
10081  {
10082  if (!_impl) return impl::gen_nan();
10083 
10084  impl::xpath_context c(n, 1, 1);
10085  impl::xpath_stack_data sd;
10086 
10087  #ifdef PUGIXML_NO_EXCEPTIONS
10088  if (setjmp(sd.error_handler)) return impl::gen_nan();
10089  #endif
10090 
10091  return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
10092  }
10093 
10094 #ifndef PUGIXML_NO_STL
10095  PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
10096  {
10097  impl::xpath_stack_data sd;
10098 
10099  return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str();
10100  }
10101 #endif
10102 
10103  PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
10104  {
10105  impl::xpath_stack_data sd;
10106 
10107  impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
10108 
10109  size_t full_size = r.length() + 1;
10110 
10111  if (capacity > 0)
10112  {
10113  size_t size = (full_size < capacity) ? full_size : capacity;
10114  assert(size > 0);
10115 
10116  memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
10117  buffer[size - 1] = 0;
10118  }
10119 
10120  return full_size;
10121  }
10122 
10123  PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
10124  {
10125  if (!_impl) return xpath_node_set();
10126 
10127  impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root;
10128 
10129  if (root->rettype() != xpath_type_node_set)
10130  {
10131  #ifdef PUGIXML_NO_EXCEPTIONS
10132  return xpath_node_set();
10133  #else
10134  xpath_parse_result res;
10135  res.error = "Expression does not evaluate to node set";
10136 
10137  throw xpath_exception(res);
10138  #endif
10139  }
10140 
10141  impl::xpath_context c(n, 1, 1);
10142  impl::xpath_stack_data sd;
10143 
10144  #ifdef PUGIXML_NO_EXCEPTIONS
10145  if (setjmp(sd.error_handler)) return xpath_node_set();
10146  #endif
10147 
10148  impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
10149 
10150  return xpath_node_set(r.begin(), r.end(), r.type());
10151  }
10152 
10153  PUGI__FN const xpath_parse_result& xpath_query::result() const
10154  {
10155  return _result;
10156  }
10157 
10158  PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
10159  {
10160  }
10161 
10162  PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
10163  {
10164  return _impl ? unspecified_bool_xpath_query : 0;
10165  }
10166 
10167  PUGI__FN bool xpath_query::operator!() const
10168  {
10169  return !_impl;
10170  }
10171 
10172  PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
10173  {
10174  xpath_query q(query, variables);
10175  return select_single_node(q);
10176  }
10177 
10178  PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
10179  {
10180  xpath_node_set s = query.evaluate_node_set(*this);
10181  return s.empty() ? xpath_node() : s.first();
10182  }
10183 
10184  PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
10185  {
10186  xpath_query q(query, variables);
10187  return select_nodes(q);
10188  }
10189 
10190  PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
10191  {
10192  return query.evaluate_node_set(*this);
10193  }
10194 }
10195 
10196 #endif
10197 
10198 #ifdef __BORLANDC__
10199 # pragma option pop
10200 #endif
10201 
10202 // Intel C++ does not properly keep warning state for function templates,
10203 // so popping warning state at the end of translation unit leads to warnings in the middle.
10204 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
10205 # pragma warning(pop)
10206 #endif
10207 
10208 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
10209 #undef PUGI__NO_INLINE
10210 #undef PUGI__STATIC_ASSERT
10211 #undef PUGI__DMC_VOLATILE
10212 #undef PUGI__MSVC_CRT_VERSION
10213 #undef PUGI__NS_BEGIN
10214 #undef PUGI__NS_END
10215 #undef PUGI__FN
10216 #undef PUGI__FN_NO_INLINE
10217 #undef PUGI__IS_CHARTYPE_IMPL
10218 #undef PUGI__IS_CHARTYPE
10219 #undef PUGI__IS_CHARTYPEX
10220 #undef PUGI__SKIPWS
10221 #undef PUGI__OPTSET
10222 #undef PUGI__PUSHNODE
10223 #undef PUGI__POPNODE
10224 #undef PUGI__SCANFOR
10225 #undef PUGI__SCANWHILE
10226 #undef PUGI__ENDSEG
10227 #undef PUGI__THROW_ERROR
10228 #undef PUGI__CHECK_ERROR
10229 
10230 #endif
10231 /// @endcond
10232 /**
10233  * Copyright (c) 2006-2012 Arseny Kapoulkine
10234  *
10235  * Permission is hereby granted, free of charge, to any person
10236  * obtaining a copy of this software and associated documentation
10237  * files (the "Software"), to deal in the Software without
10238  * restriction, including without limitation the rights to use,
10239  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10240  * copies of the Software, and to permit persons to whom the
10241  * Software is furnished to do so, subject to the following
10242  * conditions:
10243  *
10244  * The above copyright notice and this permission notice shall be
10245  * included in all copies or substantial portions of the Software.
10246  *
10247  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
10248  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
10249  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
10250  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
10251  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
10252  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
10253  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
10254  * OTHER DEALINGS IN THE SOFTWARE.
10255  */
STL namespace.
To allow this test harness to be used without the mezzanine it uses pugixml for xml parsing and this ...