Spinning Topp Logo BlackTopp Studios
inc
xml.cpp
1 // © Copyright 2010 - 2016 BlackTopp Studios Inc.
2 /* This file is part of The Mezzanine Engine.
3 
4  The Mezzanine Engine is free software: you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  The Mezzanine Engine is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with The Mezzanine Engine. If not, see <http://www.gnu.org/licenses/>.
16 */
17 /* The original authors have included a copy of the license specified above in the
18  'Docs' folder. See 'gpl.txt'
19 */
20 /* We welcome the use of the Mezzanine engine to anyone, including companies who wish to
21  Build professional software and charge for their product.
22 
23  However there are some practical restrictions, so if your project involves
24  any of the following you should contact us and we will try to work something
25  out:
26  - DRM or Copy Protection of any kind(except Copyrights)
27  - Software Patents You Do Not Wish to Freely License
28  - Any Kind of Linking to Non-GPL licensed Works
29  - Are Currently In Violation of Another Copyright Holder's GPL License
30  - If You want to change our code and not add a few hundred MB of stuff to
31  your distribution
32 
33  These and other limitations could cause serious legal problems if you ignore
34  them, so it is best to simply contact us or the Free Software Foundation, if
35  you have any questions.
36 
37  Joseph Toppi - toppij@gmail.com
38  John Blackwood - makoenergy02@gmail.com
39 */
40 
41 /// @cond DontDocumentInternal
42 
43 /*
44  * pugixml parser - version 1.2
45  * --------------------------------------------------------
46  * Copyright © 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
47  * Report bugs and download new versions at http://pugixml.org/
48  *
49  * This library is distributed under the MIT License. See notice at the end
50  * of this file.
51  *
52  * This work is based on the pugxml parser, which is:
53  * Copyright © 2003, by Kristen Wegner (kristen@tima.net)
54  */
55 
56 #ifndef SOURCE_XML_CPP
57 #define SOURCE_XML_CPP
58 
59 #ifndef SWIG
60  #include "XML/xml.h"
61 #endif
62 #include "exception.h"
63 
64 
65 #include <stdlib.h>
66 #include <stdio.h>
67 #include <string.h>
68 #include <assert.h>
69 #include <wchar.h>
70 
71 
72 #include <math.h>
73 #include <float.h>
74 
75 
76 #include <istream>
77 #include <ostream>
78 #include <string>
79 
80 
81 // For placement new
82 #include <new>
83 
84 #ifdef _MSC_VER
85 # pragma warning(push)
86 # pragma warning(disable: 4127) // conditional expression is constant
87 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
88 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
89 # pragma warning(disable: 4702) // unreachable code
90 # pragma warning(disable: 4996) // this function or variable may be unsafe
91 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
92 #endif
93 
94 #ifdef __INTEL_COMPILER
95 # pragma warning(disable: 177) // function was declared but never referenced
96 # pragma warning(disable: 279) // controlling expression is constant
97 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
98 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
99 #endif
100 
101 #ifdef __SNC__
102 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
103 # pragma diag_suppress=178 // function was declared but never referenced
104 # pragma diag_suppress=237 // controlling expression is constant
105 #endif
106 
107 // Inlining controls
108 #if defined(_MSC_VER) && _MSC_VER >= 1300
109 # define PUGI__NO_INLINE __declspec(noinline)
110 #elif defined(__GNUC__)
111 # define PUGI__NO_INLINE __attribute__((noinline))
112 #else
113 # define PUGI__NO_INLINE
114 #endif
115 
116 // Simple static assertion
117 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
118 
119 // Digital Mars C++ bug workaround for passing char Loaded from memory via stack
120 #ifdef __DMC__
121 # define PUGI__DMC_VOLATILE volatile
122 #else
123 # define PUGI__DMC_VOLATILE
124 #endif
125 
126 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
127 #if defined(_MSC_VER) && !defined(__S3E__)
128 # define PUGI__MSVC_CRT_VERSION _MSC_VER
129 #endif
130 
131 #ifdef XML_HEADER_ONLY
132 # define PUGI__NS_BEGIN namespace XML { namespace internal {
133 # define PUGI__NS_END } }
134 # define PUGI__FN inline
135 # define PUGI__FN_NO_INLINE inline
136 #else
137 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
138 # define PUGI__NS_BEGIN namespace XML { namespace internal {
139 # define PUGI__NS_END } }
140 # else
141 # define PUGI__NS_BEGIN namespace XML { namespace internal { namespace {
142 # define PUGI__NS_END } } }
143 # endif
144 # define PUGI__FN
145 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
146 #endif
147 
148 // uintptr_t
149 #if !defined(_MSC_VER) || _MSC_VER >= 1600
150 # include <stdint.h>
151 #else
152 # ifndef _UINTPTR_T_DEFINED
153 // No native uintptr_t in MSVC6 and in some WinCE versions
154 typedef size_t uintptr_t;
155 #define _UINTPTR_T_DEFINED
156 # endif
157 PUGI__NS_BEGIN
158  typedef unsigned __int8 uint8_t;
159  typedef unsigned __int16 uint16_t;
160  typedef unsigned __int32 uint32_t;
161 PUGI__NS_END
162 #endif
163 
164 namespace Mezzanine {
165 // Memory allocation
166 
167 PUGI__NS_BEGIN
168  PUGI__FN void* default_allocate(size_t size)
169  {
170  return malloc(size);
171  }
172 
173  PUGI__FN void default_deallocate(void* ptr)
174  {
175  free(ptr);
176  }
177 
178  template <typename T>
179  struct MemoryManagement_function_storage
180  {
181  static AllocationFunction allocate;
182  static DeAllocationFunction deallocate;
183  };
184 
185  template <typename T> AllocationFunction MemoryManagement_function_storage<T>::allocate = default_allocate;
186  template <typename T> DeAllocationFunction MemoryManagement_function_storage<T>::deallocate = default_deallocate;
187 
188  typedef MemoryManagement_function_storage<int> Memory;
189 PUGI__NS_END
190 
191 // String utilities
192 PUGI__NS_BEGIN
193  // Get string length
194  PUGI__FN size_t strlength(const Char8* s)
195  {
196  assert(s);
197 
198  return strlen(s);
199  }
200 
201  // Compare two strings
202  PUGI__FN bool strequal(const Char8* src, const Char8* dst)
203  {
204  assert(src && dst);
205 
206  return strcmp(src, dst) == 0;
207 
208  }
209 
210  // Compare lhs with [rhs_begin, rhs_end)
211  PUGI__FN bool strequalrange(const Char8* lhs, const Char8* rhs, size_t count)
212  {
213  for (size_t i = 0; i < count; ++i)
214  if (lhs[i] != rhs[i])
215  return false;
216 
217  return lhs[count] == 0;
218  }
219 
220 PUGI__NS_END
221 
222 // auto_ptr-like buffer holder for exception recovery
223 PUGI__NS_BEGIN
224  struct buffer_holder
225  {
226  void* data;
227  void (*deleter)(void*);
228 
229  buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
230  {
231  }
232 
233  ~buffer_holder()
234  {
235  if (data) deleter(data);
236  }
237 
238  void* release()
239  {
240  void* Result = data;
241  data = 0;
242  return Result;
243  }
244  };
245 PUGI__NS_END
246 
247 
248 PUGI__NS_BEGIN
249  static const size_t MemoryPage_size =
250  #ifdef XML_MEMORY_PAGE_SIZE
251  XML_MEMORY_PAGE_SIZE
252  #else
253  32768
254  #endif
255  ;
256 
257  static const uintptr_t MemoryPage_alignment = 32;
258  static const uintptr_t MemoryPage_pointer_mask = ~(MemoryPage_alignment - 1);
259  static const uintptr_t MemoryPage_Name_allocated_mask = 16;
260  static const uintptr_t MemoryPage_Value_allocated_mask = 8;
261  static const uintptr_t MemoryPage_type_mask = 7;
262 
263  struct Allocator;
264 
265  struct MemoryPage
266  {
267  static MemoryPage* construct(void* memory)
268  {
269  if (!memory) return 0; //$ redundant, left for performance
270 
271  MemoryPage* Result = static_cast<MemoryPage*>(memory);
272 
273  Result->allocator = 0;
274  Result->memory = 0;
275  Result->prev = 0;
276  Result->next = 0;
277  Result->busy_size = 0;
278  Result->freed_size = 0;
279 
280  return Result;
281  }
282 
283  Allocator* allocator;
284 
285  void* memory;
286 
287  MemoryPage* prev;
288  MemoryPage* next;
289 
290  size_t busy_size;
291  size_t freed_size;
292 
293  char data[1];
294  };
295 
296  struct MemoryString_header
297  {
298  uint16_t page_Offset; // Offset from page->data
299  uint16_t full_size; // 0 if string occupies whole page
300  };
301 
302  struct Allocator {
303  Allocator(MemoryPage* GetRoot): _GetRoot(GetRoot), _busy_size(GetRoot->busy_size)
304  {
305  }
306 
307  MemoryPage* allocate_page(size_t data_size)
308  {
309  size_t size = offsetof(MemoryPage, data) + data_size;
310 
311  // allocate block with some alignment, leaving memory for worst-case padding
312  void* memory = Memory::allocate(size + MemoryPage_alignment);
313  if (!memory) return 0;
314 
315  // align upwards to page boundary
316  void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (MemoryPage_alignment - 1)) & ~(MemoryPage_alignment - 1));
317 
318  // prepare page structure
319  MemoryPage* page = MemoryPage::construct(page_memory);
320 
321  page->memory = memory;
322  page->allocator = _GetRoot->allocator;
323 
324  return page;
325  }
326 
327  static void deallocate_page(MemoryPage* page)
328  {
329  Memory::deallocate(page->memory);
330  }
331 
332  void* allocate_memory_oob(size_t size, MemoryPage*& out_page);
333 
334  void* allocate_memory(size_t size, MemoryPage*& out_page)
335  {
336  if (_busy_size + size > MemoryPage_size) return allocate_memory_oob(size, out_page);
337 
338  void* buf = _GetRoot->data + _busy_size;
339 
340  _busy_size += size;
341 
342  out_page = _GetRoot;
343 
344  return buf;
345  }
346 
347  void deallocate_memory(void* ptr, size_t size, MemoryPage* page)
348  {
349  if (page == _GetRoot) page->busy_size = _busy_size;
350 
351  assert(ptr >= page->data && ptr < page->data + page->busy_size);
352  (void)!ptr;
353 
354  page->freed_size += size;
355  assert(page->freed_size <= page->busy_size);
356 
357  if (page->freed_size == page->busy_size)
358  {
359  if (page->next == 0)
360  {
361  assert(_GetRoot == page);
362 
363  // top page freed, just Reset sizes
364  page->busy_size = page->freed_size = 0;
365  _busy_size = 0;
366  }
367  else
368  {
369  assert(_GetRoot != page);
370  assert(page->prev);
371 
372  // remove from the list
373  page->prev->next = page->next;
374  page->next->prev = page->prev;
375 
376  // deallocate
377  deallocate_page(page);
378  }
379  }
380  }
381 
382  Char8* allocate_string(size_t length)
383  {
384  // allocate memory for string and header block
385  size_t size = sizeof(MemoryString_header) + length * sizeof(Char8);
386 
387  // round size up to pointer alignment boundary
388  size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
389 
390  MemoryPage* page;
391  MemoryString_header* header = static_cast<MemoryString_header*>(allocate_memory(full_size, page));
392 
393  if (!header) return 0;
394 
395  // setup header
396  ptrdiff_t page_Offset = reinterpret_cast<char*>(header) - page->data;
397 
398  assert(page_Offset >= 0 && page_Offset < (1 << 16));
399  header->page_Offset = static_cast<uint16_t>(page_Offset);
400 
401  // full_size == 0 for large strings that occupy the whole page
402  assert(full_size < (1 << 16) || (page->busy_size == full_size && page_Offset == 0));
403  header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
404 
405  // round-trip through void* to avoid 'cast increases required alignment of target Type' warning
406  // header is guaranteed a pointer-sized alignment, which should be enough for char_t
407  return static_cast<Char8*>(static_cast<void*>(header + 1));
408  }
409 
410  void deallocate_string(Char8* string)
411  {
412  // this function casts pointers through void* to avoid 'cast increases required alignment of target Type' warnings
413  // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
414 
415  // get header
416  MemoryString_header* header = static_cast<MemoryString_header*>(static_cast<void*>(string)) - 1;
417 
418  // deallocate
419  size_t page_Offset = offsetof(MemoryPage, data) + header->page_Offset;
420  MemoryPage* page = reinterpret_cast<MemoryPage*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_Offset));
421 
422  // if full_size == 0 then this string occupies the whole page
423  size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
424 
425  deallocate_memory(header, full_size, page);
426  }
427 
428  MemoryPage* _GetRoot;
429  size_t _busy_size;
430  };
431 
432  PUGI__FN_NO_INLINE void* Allocator::allocate_memory_oob(size_t size, MemoryPage*& out_page)
433  {
434  const size_t large_allocation_threshold = MemoryPage_size / 4;
435 
436  MemoryPage* page = allocate_page(size <= large_allocation_threshold ? MemoryPage_size : size);
437  out_page = page;
438 
439  if (!page) return 0;
440 
441  if (size <= large_allocation_threshold)
442  {
443  _GetRoot->busy_size = _busy_size;
444 
445  // insert page at the end of linked list
446  page->prev = _GetRoot;
447  _GetRoot->next = page;
448  _GetRoot = page;
449 
450  _busy_size = size;
451  }
452  else
453  {
454  // insert page before the end of linked list, so that it is deleted as soon as possible
455  // the last page is not deleted even if it's empty (see deallocate_memory)
456  assert(_GetRoot->prev);
457 
458  page->prev = _GetRoot->prev;
459  page->next = _GetRoot;
460 
461  _GetRoot->prev->next = page;
462  _GetRoot->prev = page;
463  }
464 
465  // allocate inside page
466  page->busy_size = size;
467 
468  return page->data;
469  }
470 PUGI__NS_END
471 
472 namespace XML
473 {
474  //// A 'Name=Value' XML GetAttribute structure.
475  struct AttributeStruct
476  {
477  //// Default ctor
478  AttributeStruct(internal::MemoryPage* page): header(reinterpret_cast<uintptr_t>(page)), Name(0), Value(0), prev_attribute_c(0), GetNextAttribute(0)
479  {
480  }
481 
482  uintptr_t header;
483 
484  Char8* Name; ////< Pointer to GetAttribute Name.
485  Char8* Value; ////< Pointer to GetAttribute Value.
486 
487  AttributeStruct* prev_attribute_c; ////< Previous GetAttribute (cyclic list)
488  AttributeStruct* GetNextAttribute; ////< Next attribute
489  };
490 
491  //// An XML document tree node.
492  struct NodeStruct
493  {
494  //// Default ctor
495  //// \param Type - node type
496  NodeStruct(internal::MemoryPage* page, NodeType Type): header(reinterpret_cast<uintptr_t>(page) | (Type - 1)), GetParent(0), Name(0), Value(0), GetFirstChild(0), prev_sibling_c(0), GetNextSibling(0), GetFirstAttribute(0)
497  {
498  }
499 
500  uintptr_t header;
501 
502  NodeStruct* GetParent; ////< Pointer to GetParent
503 
504  Char8* Name; ////< Pointer to element Name.
505  Char8* Value; ////< Pointer to any associated string data.
506 
507  NodeStruct* GetFirstChild; ////< First GetChild
508 
509  NodeStruct* prev_sibling_c; ////< Left brother (cyclic list)
510  NodeStruct* GetNextSibling; ////< Right brother
511 
512  AttributeStruct* GetFirstAttribute; ////< First attribute
513  };
514 }
515 
516 PUGI__NS_BEGIN
517  struct DocumentStruct: public NodeStruct, public Allocator
518  {
519  DocumentStruct(MemoryPage* page): NodeStruct(page, NodeDocument), Allocator(page), buffer(0)
520  {
521  }
522 
523  const Char8* buffer;
524  };
525 
526  inline Allocator& GetAllocator(const NodeStruct* node)
527  {
528  assert(node);
529 
530  return *reinterpret_cast<MemoryPage*>(node->header & MemoryPage_pointer_mask)->allocator;
531  }
532 PUGI__NS_END
533 
534 // Low-level DOM operations
535 PUGI__NS_BEGIN
536  inline AttributeStruct* allocate_attribute(Allocator& alloc)
537  {
538  MemoryPage* page;
539  void* memory = alloc.allocate_memory(sizeof(AttributeStruct), page);
540 
541  return new (memory) AttributeStruct(page);
542  }
543 
544  inline NodeStruct* allocate_node(Allocator& alloc, NodeType Type)
545  {
546  MemoryPage* page;
547  void* memory = alloc.allocate_memory(sizeof(NodeStruct), page);
548 
549  return new (memory) NodeStruct(page, Type);
550  }
551 
552  inline void destroy_attribute(AttributeStruct* a, Allocator& alloc)
553  {
554  uintptr_t header = a->header;
555 
556  if (header & internal::MemoryPage_Name_allocated_mask) alloc.deallocate_string(a->Name);
557  if (header & internal::MemoryPage_Value_allocated_mask) alloc.deallocate_string(a->Value);
558 
559  alloc.deallocate_memory(a, sizeof(AttributeStruct), reinterpret_cast<MemoryPage*>(header & MemoryPage_pointer_mask));
560  }
561 
562  inline void destroy_node(NodeStruct* n, Allocator& alloc)
563  {
564  uintptr_t header = n->header;
565 
566  if (header & internal::MemoryPage_Name_allocated_mask) alloc.deallocate_string(n->Name);
567  if (header & internal::MemoryPage_Value_allocated_mask) alloc.deallocate_string(n->Value);
568 
569  for (AttributeStruct* attr = n->GetFirstAttribute; attr; )
570  {
571  AttributeStruct* next = attr->GetNextAttribute;
572 
573  destroy_attribute(attr, alloc);
574 
575  attr = next;
576  }
577 
578  for (NodeStruct* GetChild = n->GetFirstChild; GetChild; )
579  {
580  NodeStruct* next = GetChild->GetNextSibling;
581 
582  destroy_node(GetChild, alloc);
583 
584  GetChild = next;
585  }
586 
587  alloc.deallocate_memory(n, sizeof(NodeStruct), reinterpret_cast<MemoryPage*>(header & MemoryPage_pointer_mask));
588  }
589 
590  PUGI__FN_NO_INLINE NodeStruct* AppendNode(NodeStruct* node, Allocator& alloc, NodeType Type = NodeElement)
591  {
592  NodeStruct* GetChild = allocate_node(alloc, Type);
593  if (!GetChild) return 0;
594 
595  GetChild->GetParent = node;
596 
597  NodeStruct* GetFirstChild = node->GetFirstChild;
598 
599  if (GetFirstChild)
600  {
601  NodeStruct* GetLastChild = GetFirstChild->prev_sibling_c;
602 
603  GetLastChild->GetNextSibling = GetChild;
604  GetChild->prev_sibling_c = GetLastChild;
605  GetFirstChild->prev_sibling_c = GetChild;
606  }
607  else
608  {
609  node->GetFirstChild = GetChild;
610  GetChild->prev_sibling_c = GetChild;
611  }
612 
613  return GetChild;
614  }
615 
616  PUGI__FN_NO_INLINE AttributeStruct* AppendAttribute_ll(NodeStruct* node, Allocator& alloc)
617  {
618  AttributeStruct* a = allocate_attribute(alloc);
619  if (!a) return 0;
620 
621  AttributeStruct* GetFirstAttribute = node->GetFirstAttribute;
622 
623  if (GetFirstAttribute)
624  {
625  AttributeStruct* GetLastAttribute = GetFirstAttribute->prev_attribute_c;
626 
627  GetLastAttribute->GetNextAttribute = a;
628  a->prev_attribute_c = GetLastAttribute;
629  GetFirstAttribute->prev_attribute_c = a;
630  }
631  else
632  {
633  node->GetFirstAttribute = a;
634  a->prev_attribute_c = a;
635  }
636 
637  return a;
638  }
639 PUGI__NS_END
640 
641 // Helper classes for code generation
642 PUGI__NS_BEGIN
643  struct opt_false
644  {
645  enum { Value = 0 };
646  };
647 
648  struct opt_true
649  {
650  enum { Value = 1 };
651  };
652 PUGI__NS_END
653 
654 // Unicode utilities
655 PUGI__NS_BEGIN
656  inline uint16_t endian_swap(uint16_t Value)
657  {
658  return static_cast<uint16_t>(((Value & 0xff) << 8) | (Value >> 8));
659  }
660 
661  inline uint32_t endian_swap(uint32_t Value)
662  {
663  return ((Value & 0xff) << 24) | ((Value & 0xff00) << 8) | ((Value & 0xff0000) >> 8) | (Value >> 24);
664  }
665 
666  struct utf8_counter
667  {
668  typedef size_t value_type;
669 
670  static value_type low(value_type Result, uint32_t ch)
671  {
672  // U+0000..U+007F
673  if (ch < 0x80) return Result + 1;
674  // U+0080..U+07FF
675  else if (ch < 0x800) return Result + 2;
676  // U+0800..U+FFFF
677  else return Result + 3;
678  }
679 
680  static value_type high(value_type Result, uint32_t)
681  {
682  // U+10000..U+10FFFF
683  return Result + 4;
684  }
685  };
686 
687  struct utf8_WriterInstance
688  {
689  typedef uint8_t* value_type;
690 
691  static value_type low(value_type Result, uint32_t ch)
692  {
693  // U+0000..U+007F
694  if (ch < 0x80)
695  {
696  *Result = static_cast<uint8_t>(ch);
697  return Result + 1;
698  }
699  // U+0080..U+07FF
700  else if (ch < 0x800)
701  {
702  Result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
703  Result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
704  return Result + 2;
705  }
706  // U+0800..U+FFFF
707  else
708  {
709  Result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
710  Result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
711  Result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
712  return Result + 3;
713  }
714  }
715 
716  static value_type high(value_type Result, uint32_t ch)
717  {
718  // U+10000..U+10FFFF
719  Result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
720  Result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
721  Result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
722  Result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
723  return Result + 4;
724  }
725 
726  static value_type any(value_type Result, uint32_t ch)
727  {
728  return (ch < 0x10000) ? low(Result, ch) : high(Result, ch);
729  }
730  };
731 
732  struct utf16_counter
733  {
734  typedef size_t value_type;
735 
736  static value_type low(value_type Result, uint32_t)
737  {
738  return Result + 1;
739  }
740 
741  static value_type high(value_type Result, uint32_t)
742  {
743  return Result + 2;
744  }
745  };
746 
747  struct utf16_WriterInstance
748  {
749  typedef uint16_t* value_type;
750 
751  static value_type low(value_type Result, uint32_t ch)
752  {
753  *Result = static_cast<uint16_t>(ch);
754 
755  return Result + 1;
756  }
757 
758  static value_type high(value_type Result, uint32_t ch)
759  {
760  uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
761  uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
762 
763  Result[0] = static_cast<uint16_t>(0xD800 + msh);
764  Result[1] = static_cast<uint16_t>(0xDC00 + lsh);
765 
766  return Result + 2;
767  }
768 
769  static value_type any(value_type Result, uint32_t ch)
770  {
771  return (ch < 0x10000) ? low(Result, ch) : high(Result, ch);
772  }
773  };
774 
775  struct utf32_counter
776  {
777  typedef size_t value_type;
778 
779  static value_type low(value_type Result, uint32_t)
780  {
781  return Result + 1;
782  }
783 
784  static value_type high(value_type Result, uint32_t)
785  {
786  return Result + 1;
787  }
788  };
789 
790  struct utf32_WriterInstance
791  {
792  typedef uint32_t* value_type;
793 
794  static value_type low(value_type Result, uint32_t ch)
795  {
796  *Result = ch;
797 
798  return Result + 1;
799  }
800 
801  static value_type high(value_type Result, uint32_t ch)
802  {
803  *Result = ch;
804 
805  return Result + 1;
806  }
807 
808  static value_type any(value_type Result, uint32_t ch)
809  {
810  *Result = ch;
811 
812  return Result + 1;
813  }
814  };
815 
816  struct latin1_WriterInstance
817  {
818  typedef uint8_t* value_type;
819 
820  static value_type low(value_type Result, uint32_t ch)
821  {
822  *Result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
823 
824  return Result + 1;
825  }
826 
827  static value_type high(value_type Result, uint32_t ch)
828  {
829  (void)ch;
830 
831  *Result = '?';
832 
833  return Result + 1;
834  }
835  };
836 
837  template <size_t size> struct wchar_selector;
838 
839  template <> struct wchar_selector<2>
840  {
841  typedef uint16_t Type;
842  typedef utf16_counter counter;
843  typedef utf16_WriterInstance WriterInstance;
844  };
845 
846  template <> struct wchar_selector<4>
847  {
848  typedef uint32_t Type;
849  typedef utf32_counter counter;
850  typedef utf32_WriterInstance WriterInstance;
851  };
852 
853  typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
854  typedef wchar_selector<sizeof(wchar_t)>::WriterInstance wchar_WriterInstance;
855 
856  template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
857  {
858  static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type Result)
859  {
860  const uint8_t utf8_byte_mask = 0x3f;
861 
862  while (size)
863  {
864  uint8_t lead = *data;
865 
866  // 0xxxxxxx -> U+0000..U+007F
867  if (lead < 0x80)
868  {
869  Result = Traits::low(Result, lead);
870  data += 1;
871  size -= 1;
872 
873  // process aligned single-byte (ascii) blocks
874  if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
875  {
876  // round-trip through void* to silence 'cast increases required alignment of target Type' warnings
877  while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
878  {
879  Result = Traits::low(Result, data[0]);
880  Result = Traits::low(Result, data[1]);
881  Result = Traits::low(Result, data[2]);
882  Result = Traits::low(Result, data[3]);
883  data += 4;
884  size -= 4;
885  }
886  }
887  }
888  // 110xxxxx -> U+0080..U+07FF
889  else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
890  {
891  Result = Traits::low(Result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
892  data += 2;
893  size -= 2;
894  }
895  // 1110xxxx -> U+0800-U+FFFF
896  else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
897  {
898  Result = Traits::low(Result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
899  data += 3;
900  size -= 3;
901  }
902  // 11110xxx -> U+10000..U+10FFFF
903  else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
904  {
905  Result = Traits::high(Result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
906  data += 4;
907  size -= 4;
908  }
909  // 10xxxxxx or 11111xxx -> invalid
910  else
911  {
912  data += 1;
913  size -= 1;
914  }
915  }
916 
917  return Result;
918  }
919 
920  static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type Result)
921  {
922  const uint16_t* end = data + size;
923 
924  while (data < end)
925  {
926  uint16_t lead = opt_swap::Value ? endian_swap(*data) : *data;
927 
928  // U+0000..U+D7FF
929  if (lead < 0xD800)
930  {
931  Result = Traits::low(Result, lead);
932  data += 1;
933  }
934  // U+E000..U+FFFF
935  else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
936  {
937  Result = Traits::low(Result, lead);
938  data += 1;
939  }
940  // surrogate pair lead
941  else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
942  {
943  uint16_t next = opt_swap::Value ? endian_swap(data[1]) : data[1];
944 
945  if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
946  {
947  Result = Traits::high(Result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
948  data += 2;
949  }
950  else
951  {
952  data += 1;
953  }
954  }
955  else
956  {
957  data += 1;
958  }
959  }
960 
961  return Result;
962  }
963 
964  static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type Result)
965  {
966  const uint32_t* end = data + size;
967 
968  while (data < end)
969  {
970  uint32_t lead = opt_swap::Value ? endian_swap(*data) : *data;
971 
972  // U+0000..U+FFFF
973  if (lead < 0x10000)
974  {
975  Result = Traits::low(Result, lead);
976  data += 1;
977  }
978  // U+10000..U+10FFFF
979  else
980  {
981  Result = Traits::high(Result, lead);
982  data += 1;
983  }
984  }
985 
986  return Result;
987  }
988 
989  static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type Result)
990  {
991  for (size_t i = 0; i < size; ++i)
992  {
993  Result = Traits::low(Result, data[i]);
994  }
995 
996  return Result;
997  }
998 
999  static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type Result)
1000  {
1001  return decode_utf16_block(data, size, Result);
1002  }
1003 
1004  static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type Result)
1005  {
1006  return decode_utf32_block(data, size, Result);
1007  }
1008 
1009  static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type Result)
1010  {
1011  return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::Type*>(data), size, Result);
1012  }
1013  };
1014 
1015  template <typename T> PUGI__FN void convert_utf_endian_swap(T* Result, const T* data, size_t length)
1016  {
1017  for (size_t i = 0; i < length; ++i) Result[i] = endian_swap(data[i]);
1018  }
1019 
1020 PUGI__NS_END
1021 
1022 PUGI__NS_BEGIN
1023  enum charCollectionType
1024  {
1025  ct_ParsePcdata = 1, // \0, &, \r, <
1026  ct_ParseAttr = 2, // \0, &, \r, ', "
1027  ct_ParseAttrWs = 4, // \0, &, \r, ', ", \n, tab
1028  ct_space = 8, // \r, \n, space, tab
1029  ct_ParseCdata = 16, // \0, ], >, \r
1030  ct_ParseComment = 32, // \0, -, >, \r
1031  ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1032  ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1033  };
1034 
1035  static const unsigned char charCollectionTypeable[256] =
1036  {
1037  55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1038  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1039  8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1040  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1041  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1042  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1043  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1044  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1045 
1046  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1047  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1048  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1049  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1050  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1051  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1052  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1053  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1054  };
1055 
1056  enum charTypex_t
1057  {
1058  ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1059  ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1060  ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1061  ctx_digit = 8, // 0-9
1062  ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1063  };
1064 
1065  static const unsigned char charTypex_table[256] =
1066  {
1067  3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1068  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1069  0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1070  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1071 
1072  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1073  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1074  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1075  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1076 
1077  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1078  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1079  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1080  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1081  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1082  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1083  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1084  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1085  };
1086 
1087  #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1088 
1089  #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, charCollectionTypeable)
1090  #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, charTypex_table)
1091 
1092  PUGI__FN bool is_little_endian()
1093  {
1094  unsigned int ui = 1;
1095 
1096  return *reinterpret_cast<unsigned char*>(&ui) == 1;
1097  }
1098 
1099  PUGI__FN Encoding GetWchar_DocumentEncoding()
1100  {
1101  PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1102 
1103  if (sizeof(wchar_t) == 2)
1104  return is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
1105  else
1106  return is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
1107  }
1108 
1109  PUGI__FN Encoding guess_buffer_DocumentEncoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
1110  {
1111  // look for BOM in first few bytes
1112  if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return EncodingUTF32BE;
1113  if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return EncodingUTF32LE;
1114  if (d0 == 0xfe && d1 == 0xff) return EncodingUTF16BE;
1115  if (d0 == 0xff && d1 == 0xfe) return EncodingUTF16LE;
1116  if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return EncodingUTF8;
1117 
1118  // look for <, <? or <?xm in various DocumentEncodings
1119  if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return EncodingUTF32BE;
1120  if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return EncodingUTF32LE;
1121  if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return EncodingUTF16BE;
1122  if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return EncodingUTF16LE;
1123  if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return EncodingUTF8;
1124 
1125  // look for utf16 < followed by node Name (this may fail, but is better than utf8 since it's zero terminated so early)
1126  if (d0 == 0 && d1 == 0x3c) return EncodingUTF16BE;
1127  if (d0 == 0x3c && d1 == 0) return EncodingUTF16LE;
1128 
1129  // no known BOM detected, assume utf8
1130  return EncodingUTF8;
1131  }
1132 
1133  PUGI__FN Encoding GetBuffer_DocumentEncoding(Encoding DocumentEncoding, const void* contents, size_t size)
1134  {
1135  // replace wchar DocumentEncoding with utf implementation
1136  if (DocumentEncoding == Encodingwchar_t) return GetWchar_DocumentEncoding();
1137 
1138  // replace utf16 DocumentEncoding with utf16 with specific endianness
1139  if (DocumentEncoding == EncodingUTF16) return is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
1140 
1141  // replace utf32 DocumentEncoding with utf32 with specific endianness
1142  if (DocumentEncoding == EncodingUTF32) return is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
1143 
1144  // only do autodetection if no explicit DocumentEncoding is requested
1145  if (DocumentEncoding != EncodingAuto) return DocumentEncoding;
1146 
1147  // skip DocumentEncoding autodetection if input buffer is too small
1148  if (size < 4) return EncodingUTF8;
1149 
1150  // try to guess DocumentEncoding (based on XML specification, Appendix F.1)
1151  const uint8_t* data = static_cast<const uint8_t*>(contents);
1152 
1153  PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1154 
1155  return guess_buffer_DocumentEncoding(d0, d1, d2, d3);
1156  }
1157 
1158  PUGI__FN bool GetMutable_buffer(Char8*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1159  {
1160  if (is_mutable)
1161  {
1162  out_buffer = static_cast<Char8*>(const_cast<void*>(contents));
1163  }
1164  else
1165  {
1166  void* buffer = Memory::allocate(size > 0 ? size : 1);
1167  if (!buffer) return false;
1168 
1169  memcpy(buffer, contents, size);
1170 
1171  out_buffer = static_cast<Char8*>(buffer);
1172  }
1173 
1174  out_length = size / sizeof(Char8);
1175 
1176  return true;
1177  }
1178 
1179  template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(Char8*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1180  {
1181  const uint16_t* data = static_cast<const uint16_t*>(contents);
1182  size_t length = size / sizeof(uint16_t);
1183 
1184  // first pass: get length in utf8 units
1185  out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
1186 
1187  // allocate buffer of suitable length
1188  out_buffer = static_cast<Char8*>(Memory::allocate((out_length > 0 ? out_length : 1) * sizeof(Char8)));
1189  if (!out_buffer) return false;
1190 
1191  // second pass: convert utf16 input to utf8
1192  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1193  uint8_t* out_end = utf_decoder<utf8_WriterInstance, opt_swap>::decode_utf16_block(data, length, out_begin);
1194 
1195  assert(out_end == out_begin + out_length);
1196  (void)!out_end;
1197 
1198  return true;
1199  }
1200 
1201  template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(Char8*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1202  {
1203  const uint32_t* data = static_cast<const uint32_t*>(contents);
1204  size_t length = size / sizeof(uint32_t);
1205 
1206  // first pass: get length in utf8 units
1207  out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
1208 
1209  // allocate buffer of suitable length
1210  out_buffer = static_cast<Char8*>(Memory::allocate((out_length > 0 ? out_length : 1) * sizeof(Char8)));
1211  if (!out_buffer) return false;
1212 
1213  // second pass: convert utf32 input to utf8
1214  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1215  uint8_t* out_end = utf_decoder<utf8_WriterInstance, opt_swap>::decode_utf32_block(data, length, out_begin);
1216 
1217  assert(out_end == out_begin + out_length);
1218  (void)!out_end;
1219 
1220  return true;
1221  }
1222 
1223  PUGI__FN size_t GetLatin1_7bit_prefix_length(const uint8_t* data, size_t size)
1224  {
1225  for (size_t i = 0; i < size; ++i)
1226  if (data[i] > 127)
1227  return i;
1228 
1229  return size;
1230  }
1231 
1232  PUGI__FN bool convert_buffer_latin1(Char8*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1233  {
1234  const uint8_t* data = static_cast<const uint8_t*>(contents);
1235 
1236  // get size of prefix that does not need utf8 conversion
1237  size_t prefix_length = GetLatin1_7bit_prefix_length(data, size);
1238  assert(prefix_length <= size);
1239 
1240  const uint8_t* postfix = data + prefix_length;
1241  size_t postfix_length = size - prefix_length;
1242 
1243  // if no conversion is needed, just return the original buffer
1244  if (postfix_length == 0) return GetMutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1245 
1246  // first pass: get length in utf8 units
1247  out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
1248 
1249  // allocate buffer of suitable length
1250  out_buffer = static_cast<Char8*>(Memory::allocate((out_length > 0 ? out_length : 1) * sizeof(Char8)));
1251  if (!out_buffer) return false;
1252 
1253  // second pass: convert latin1 input to utf8
1254  memcpy(out_buffer, data, prefix_length);
1255 
1256  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1257  uint8_t* out_end = utf_decoder<utf8_WriterInstance>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
1258 
1259  assert(out_end == out_begin + out_length);
1260  (void)!out_end;
1261 
1262  return true;
1263  }
1264 
1265  PUGI__FN bool convert_buffer(Char8*& out_buffer, size_t& out_length, Encoding DocumentEncoding, const void* contents, size_t size, bool is_mutable)
1266  {
1267  // fast Path: no conversion required
1268  if (DocumentEncoding == EncodingUTF8) return GetMutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1269 
1270  // source DocumentEncoding is utf16
1271  if (DocumentEncoding == EncodingUTF16BE || DocumentEncoding == EncodingUTF16LE)
1272  {
1273  Encoding native_DocumentEncoding = is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
1274 
1275  return (native_DocumentEncoding == DocumentEncoding) ?
1276  convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1277  convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1278  }
1279 
1280  // source DocumentEncoding is utf32
1281  if (DocumentEncoding == EncodingUTF32BE || DocumentEncoding == EncodingUTF32LE)
1282  {
1283  Encoding native_DocumentEncoding = is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
1284 
1285  return (native_DocumentEncoding == DocumentEncoding) ?
1286  convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1287  convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1288  }
1289 
1290  // source DocumentEncoding is latin1
1291  if (DocumentEncoding == EncodingLatin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
1292 
1293  assert(!"Invalid DocumentEncoding");
1294  return false;
1295  }
1296 
1297 
1298  PUGI__FN size_t AsUtf8_begin(const wchar_t* str, size_t length)
1299  {
1300  // get length in utf8 characters
1301  return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
1302  }
1303 
1304  PUGI__FN void AsUtf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
1305  {
1306  // convert to utf8
1307  uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
1308  uint8_t* end = utf_decoder<utf8_WriterInstance>::decode_wchar_block(str, length, begin);
1309 
1310  assert(begin + size == end);
1311  (void)!end;
1312 
1313  // zero-terminate
1314  buffer[size] = 0;
1315  }
1316 
1317 
1318  PUGI__FN std::string AsUtf8_impl(const wchar_t* str, size_t length)
1319  {
1320  // first pass: get length in utf8 characters
1321  size_t size = AsUtf8_begin(str, length);
1322 
1323  // allocate Resulting string
1324  std::string Result;
1325  Result.resize(size);
1326 
1327  // second pass: convert to utf8
1328  if (size > 0) AsUtf8_end(&Result[0], size, str, length);
1329 
1330  return Result;
1331  }
1332 
1333  PUGI__FN std::basic_string<wchar_t> AsWide_impl(const char* str, size_t size)
1334  {
1335  const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
1336 
1337  // first pass: get length in wchar_t units
1338  size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1339 
1340  // allocate Resulting string
1341  std::basic_string<wchar_t> Result;
1342  Result.resize(length);
1343 
1344  // second pass: convert to wchar_t
1345  if (length > 0)
1346  {
1347  wchar_WriterInstance::value_type begin = reinterpret_cast<wchar_WriterInstance::value_type>(&Result[0]);
1348  wchar_WriterInstance::value_type end = utf_decoder<wchar_WriterInstance>::decode_utf8_block(data, size, begin);
1349 
1350  assert(begin + length == end);
1351  (void)!end;
1352  }
1353 
1354  return Result;
1355  }
1356 
1357 
1358  inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, Char8* target)
1359  {
1360  assert(target);
1361  size_t tarGetLength = strlength(target);
1362 
1363  // always reuse document buffer memory if possible
1364  if (!allocated) return tarGetLength >= length;
1365 
1366  // reuse heap memory if waste is not too great
1367  const size_t reuse_threshold = 32;
1368 
1369  return tarGetLength >= length && (tarGetLength < reuse_threshold || tarGetLength - length < tarGetLength / 2);
1370  }
1371 
1372  PUGI__FN bool strcpy_insitu(Char8*& dest, uintptr_t& header, uintptr_t header_mask, const Char8* source)
1373  {
1374  size_t source_length = strlength(source);
1375 
1376  if (source_length == 0)
1377  {
1378  // empty string and null pointer are equivalent, so just deallocate old memory
1379  Allocator* alloc = reinterpret_cast<MemoryPage*>(header & MemoryPage_pointer_mask)->allocator;
1380 
1381  if (header & header_mask) alloc->deallocate_string(dest);
1382 
1383  // mark the string as not allocated
1384  dest = 0;
1385  header &= ~header_mask;
1386 
1387  return true;
1388  }
1389  else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
1390  {
1391  // we can reuse old buffer, so just copy the new data (including zero terminator)
1392  memcpy(dest, source, (source_length + 1) * sizeof(Char8));
1393 
1394  return true;
1395  }
1396  else
1397  {
1398  Allocator* alloc = reinterpret_cast<MemoryPage*>(header & MemoryPage_pointer_mask)->allocator;
1399 
1400  // allocate new buffer
1401  Char8* buf = alloc->allocate_string(source_length + 1);
1402  if (!buf) return false;
1403 
1404  // copy the string (including zero terminator)
1405  memcpy(buf, source, (source_length + 1) * sizeof(Char8));
1406 
1407  // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
1408  if (header & header_mask) alloc->deallocate_string(dest);
1409 
1410  // the string is now allocated, so set the flag
1411  dest = buf;
1412  header |= header_mask;
1413 
1414  return true;
1415  }
1416  }
1417 
1418  struct gap
1419  {
1420  Char8* end;
1421  size_t size;
1422 
1423  gap(): end(0), size(0)
1424  {
1425  }
1426 
1427  // Push new gap, move s count bytes further (skipping the gap).
1428  // Collapse previous gap.
1429  void push(Char8*& s, size_t count)
1430  {
1431  if (end) // there was a gap already; collapse it
1432  {
1433  // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
1434  assert(s >= end);
1435  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1436  }
1437 
1438  s += count; // end of current gap
1439 
1440  // "merge" two gaps
1441  end = s;
1442  size += count;
1443  }
1444 
1445  // Collapse all gaps, return past-the-end pointer
1446  Char8* flush(Char8* s)
1447  {
1448  if (end)
1449  {
1450  // Move [old_gap_end, current_pos) to [old_gap_start, ...)
1451  assert(s >= end);
1452  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1453 
1454  return s - size;
1455  }
1456  else return s;
1457  }
1458  };
1459 
1460  PUGI__FN Char8* strconv_escape(Char8* s, gap& g)
1461  {
1462  Char8* stre = s + 1;
1463 
1464  switch (*stre)
1465  {
1466  case '#': // &#...
1467  {
1468  unsigned int ucsc = 0;
1469 
1470  if (stre[1] == 'x') // &#x... (hex code)
1471  {
1472  stre += 2;
1473 
1474  Char8 ch = *stre;
1475 
1476  if (ch == ';') return stre;
1477 
1478  for (;;)
1479  {
1480  if (static_cast<unsigned int>(ch - '0') <= 9)
1481  ucsc = 16 * ucsc + (ch - '0');
1482  else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
1483  ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
1484  else if (ch == ';')
1485  break;
1486  else // cancel
1487  return stre;
1488 
1489  ch = *++stre;
1490  }
1491 
1492  ++stre;
1493  }
1494  else // &#... (dec code)
1495  {
1496  Char8 ch = *++stre;
1497 
1498  if (ch == ';') return stre;
1499 
1500  for (;;)
1501  {
1502  if (static_cast<unsigned int>(ch - '0') <= 9)
1503  ucsc = 10 * ucsc + (ch - '0');
1504  else if (ch == ';')
1505  break;
1506  else // cancel
1507  return stre;
1508 
1509  ch = *++stre;
1510  }
1511 
1512  ++stre;
1513  }
1514 
1515 
1516  s = reinterpret_cast<Char8*>(utf8_WriterInstance::any(reinterpret_cast<uint8_t*>(s), ucsc));
1517 
1518 
1519  g.push(s, stre - s);
1520  return stre;
1521  }
1522 
1523  case 'a': // &a
1524  {
1525  ++stre;
1526 
1527  if (*stre == 'm') // &am
1528  {
1529  if (*++stre == 'p' && *++stre == ';') // &amp;
1530  {
1531  *s++ = '&';
1532  ++stre;
1533 
1534  g.push(s, stre - s);
1535  return stre;
1536  }
1537  }
1538  else if (*stre == 'p') // &ap
1539  {
1540  if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
1541  {
1542  *s++ = '\'';
1543  ++stre;
1544 
1545  g.push(s, stre - s);
1546  return stre;
1547  }
1548  }
1549  break;
1550  }
1551 
1552  case 'g': // &g
1553  {
1554  if (*++stre == 't' && *++stre == ';') // &gt;
1555  {
1556  *s++ = '>';
1557  ++stre;
1558 
1559  g.push(s, stre - s);
1560  return stre;
1561  }
1562  break;
1563  }
1564 
1565  case 'l': // &l
1566  {
1567  if (*++stre == 't' && *++stre == ';') // &lt;
1568  {
1569  *s++ = '<';
1570  ++stre;
1571 
1572  g.push(s, stre - s);
1573  return stre;
1574  }
1575  break;
1576  }
1577 
1578  case 'q': // &q
1579  {
1580  if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
1581  {
1582  *s++ = '"';
1583  ++stre;
1584 
1585  g.push(s, stre - s);
1586  return stre;
1587  }
1588  break;
1589  }
1590 
1591  default:
1592  break;
1593  }
1594 
1595  return stre;
1596  }
1597 
1598  // Utility macro for last character handling
1599  #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
1600 
1601  PUGI__FN Char8* strconv_comment(Char8* s, Char8 endch)
1602  {
1603  gap g;
1604 
1605  while (true)
1606  {
1607  while (!PUGI__IS_CHARTYPE(*s, ct_ParseComment)) ++s;
1608 
1609  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1610  {
1611  *s++ = '\n'; // replace first one with 0x0a
1612 
1613  if (*s == '\n') g.push(s, 1);
1614  }
1615  else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
1616  {
1617  *g.flush(s) = 0;
1618 
1619  return s + (s[2] == '>' ? 3 : 2);
1620  }
1621  else if (*s == 0)
1622  {
1623  return 0;
1624  }
1625  else ++s;
1626  }
1627  }
1628 
1629  PUGI__FN Char8* strconv_cdata(Char8* s, Char8 endch)
1630  {
1631  gap g;
1632 
1633  while (true)
1634  {
1635  while (!PUGI__IS_CHARTYPE(*s, ct_ParseCdata)) ++s;
1636 
1637  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1638  {
1639  *s++ = '\n'; // replace first one with 0x0a
1640 
1641  if (*s == '\n') g.push(s, 1);
1642  }
1643  else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
1644  {
1645  *g.flush(s) = 0;
1646 
1647  return s + 1;
1648  }
1649  else if (*s == 0)
1650  {
1651  return 0;
1652  }
1653  else ++s;
1654  }
1655  }
1656 
1657  typedef Char8* (*strconv_pcdata_t)(Char8*);
1658 
1659  template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
1660  {
1661  static Char8* parse(Char8* s)
1662  {
1663  gap g;
1664 
1665  while (true)
1666  {
1667  while (!PUGI__IS_CHARTYPE(*s, ct_ParsePcdata)) ++s;
1668 
1669  if (*s == '<') // PCDATA ends here
1670  {
1671  *g.flush(s) = 0;
1672 
1673  return s + 1;
1674  }
1675  else if (opt_eol::Value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1676  {
1677  *s++ = '\n'; // replace first one with 0x0a
1678 
1679  if (*s == '\n') g.push(s, 1);
1680  }
1681  else if (opt_escape::Value && *s == '&')
1682  {
1683  s = strconv_escape(s, g);
1684  }
1685  else if (*s == 0)
1686  {
1687  return s;
1688  }
1689  else ++s;
1690  }
1691  }
1692  };
1693 
1694  PUGI__FN strconv_pcdata_t GetStrconv_pcdata(unsigned int optmask)
1695  {
1696  PUGI__STATIC_ASSERT(ParseEscapes == 0x10 && ParseEol == 0x20);
1697 
1698  switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
1699  {
1700  case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
1701  case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
1702  case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
1703  case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
1704  default: return 0; // should not get here
1705  }
1706  }
1707 
1708  typedef Char8* (*strconv_attribute_t)(Char8*, Char8);
1709 
1710  template <typename opt_escape> struct strconv_attribute_impl
1711  {
1712  static Char8* ParseWnorm(Char8* s, Char8 end_quote)
1713  {
1714  gap g;
1715 
1716  // trim leading whitespaces
1717  if (PUGI__IS_CHARTYPE(*s, ct_space))
1718  {
1719  Char8* str = s;
1720 
1721  do ++str;
1722  while (PUGI__IS_CHARTYPE(*str, ct_space));
1723 
1724  g.push(s, str - s);
1725  }
1726 
1727  while (true)
1728  {
1729  while (!PUGI__IS_CHARTYPE(*s, ct_ParseAttrWs | ct_space)) ++s;
1730 
1731  if (*s == end_quote)
1732  {
1733  Char8* str = g.flush(s);
1734 
1735  do *str-- = 0;
1736  while (PUGI__IS_CHARTYPE(*str, ct_space));
1737 
1738  return s + 1;
1739  }
1740  else if (PUGI__IS_CHARTYPE(*s, ct_space))
1741  {
1742  *s++ = ' ';
1743 
1744  if (PUGI__IS_CHARTYPE(*s, ct_space))
1745  {
1746  Char8* str = s + 1;
1747  while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
1748 
1749  g.push(s, str - s);
1750  }
1751  }
1752  else if (opt_escape::Value && *s == '&')
1753  {
1754  s = strconv_escape(s, g);
1755  }
1756  else if (!*s)
1757  {
1758  return 0;
1759  }
1760  else ++s;
1761  }
1762  }
1763 
1764  static Char8* ParseWconv(Char8* s, Char8 end_quote)
1765  {
1766  gap g;
1767 
1768  while (true)
1769  {
1770  while (!PUGI__IS_CHARTYPE(*s, ct_ParseAttrWs)) ++s;
1771 
1772  if (*s == end_quote)
1773  {
1774  *g.flush(s) = 0;
1775 
1776  return s + 1;
1777  }
1778  else if (PUGI__IS_CHARTYPE(*s, ct_space))
1779  {
1780  if (*s == '\r')
1781  {
1782  *s++ = ' ';
1783 
1784  if (*s == '\n') g.push(s, 1);
1785  }
1786  else *s++ = ' ';
1787  }
1788  else if (opt_escape::Value && *s == '&')
1789  {
1790  s = strconv_escape(s, g);
1791  }
1792  else if (!*s)
1793  {
1794  return 0;
1795  }
1796  else ++s;
1797  }
1798  }
1799 
1800  static Char8* ParseEol(Char8* s, Char8 end_quote)
1801  {
1802  gap g;
1803 
1804  while (true)
1805  {
1806  while (!PUGI__IS_CHARTYPE(*s, ct_ParseAttr)) ++s;
1807 
1808  if (*s == end_quote)
1809  {
1810  *g.flush(s) = 0;
1811 
1812  return s + 1;
1813  }
1814  else if (*s == '\r')
1815  {
1816  *s++ = '\n';
1817 
1818  if (*s == '\n') g.push(s, 1);
1819  }
1820  else if (opt_escape::Value && *s == '&')
1821  {
1822  s = strconv_escape(s, g);
1823  }
1824  else if (!*s)
1825  {
1826  return 0;
1827  }
1828  else ++s;
1829  }
1830  }
1831 
1832  static Char8* ParseSimple(Char8* s, Char8 end_quote)
1833  {
1834  gap g;
1835 
1836  while (true)
1837  {
1838  while (!PUGI__IS_CHARTYPE(*s, ct_ParseAttr)) ++s;
1839 
1840  if (*s == end_quote)
1841  {
1842  *g.flush(s) = 0;
1843 
1844  return s + 1;
1845  }
1846  else if (opt_escape::Value && *s == '&')
1847  {
1848  s = strconv_escape(s, g);
1849  }
1850  else if (!*s)
1851  {
1852  return 0;
1853  }
1854  else ++s;
1855  }
1856  }
1857  };
1858 
1859  PUGI__FN strconv_attribute_t GetStrconv_attribute(unsigned int optmask)
1860  {
1861  PUGI__STATIC_ASSERT(ParseEscapes == 0x10 && ParseEol == 0x20 && ParseWconvAttribute == 0x40 && ParseWnormAttribute == 0x80);
1862 
1863  switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
1864  {
1865  case 0: return strconv_attribute_impl<opt_false>::ParseSimple;
1866  case 1: return strconv_attribute_impl<opt_true>::ParseSimple;
1869  case 4: return strconv_attribute_impl<opt_false>::ParseWconv;
1870  case 5: return strconv_attribute_impl<opt_true>::ParseWconv;
1871  case 6: return strconv_attribute_impl<opt_false>::ParseWconv;
1872  case 7: return strconv_attribute_impl<opt_true>::ParseWconv;
1873  case 8: return strconv_attribute_impl<opt_false>::ParseWnorm;
1874  case 9: return strconv_attribute_impl<opt_true>::ParseWnorm;
1875  case 10: return strconv_attribute_impl<opt_false>::ParseWnorm;
1876  case 11: return strconv_attribute_impl<opt_true>::ParseWnorm;
1877  case 12: return strconv_attribute_impl<opt_false>::ParseWnorm;
1878  case 13: return strconv_attribute_impl<opt_true>::ParseWnorm;
1879  case 14: return strconv_attribute_impl<opt_false>::ParseWnorm;
1880  case 15: return strconv_attribute_impl<opt_true>::ParseWnorm;
1881  default: return 0; // should not get here
1882  }
1883  }
1884 
1885  inline ParseResult make_ParseResult(ParseStatus Status, ptrdiff_t Offset = 0)
1886  {
1887  ParseResult Result;
1888  Result.Status = Status;
1889  Result.Offset = Offset;
1890 
1891  return Result;
1892  }
1893 
1894  struct Parser
1895  {
1896  Allocator alloc;
1897  Char8* error_Offset;
1898  ParseStatus error_Status;
1899 
1900  // Parser utilities.
1901  #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
1902  #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
1903  #define PUGI__PUSHNODE(TYPE) { cursor = AppendNode(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(StatusOutOfMemory, s); }
1904  #define PUGI__POPNODE() { cursor = cursor->GetParent; }
1905  #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
1906  #define PUGI__SCANWHILE(X) { while ((X)) ++s; }
1907  #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
1908  #define PUGI__THROW_ERROR(err, m) return error_Offset = m, error_Status = err, static_cast<Char8*>(0)
1909  #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
1910 
1911  Parser(const Allocator& alloc_): alloc(alloc_), error_Offset(0), error_Status(StatusOk)
1912  {
1913  }
1914 
1915  // DOCTYPE consists of nested sections of the following possible Types:
1916  // <!-- ... -->, <? ... ?>, "...", '...'
1917  // <![...]]>
1918  // <!...>
1919  // First group can not contain nested groups
1920  // Second group can contain nested groups of the same type
1921  // Third group can contain all other groups
1922  Char8* ParseDocTypePrimitive(Char8* s)
1923  {
1924  if (*s == '"' || *s == '\'')
1925  {
1926  // quoted string
1927  Char8 ch = *s++;
1928  PUGI__SCANFOR(*s == ch);
1929  if (!*s) PUGI__THROW_ERROR(StatusBadDocType, s);
1930 
1931  s++;
1932  }
1933  else if (s[0] == '<' && s[1] == '?')
1934  {
1935  // <? ... ?>
1936  s += 2;
1937  PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
1938  if (!*s) PUGI__THROW_ERROR(StatusBadDocType, s);
1939 
1940  s += 2;
1941  }
1942  else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
1943  {
1944  s += 4;
1945  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
1946  if (!*s) PUGI__THROW_ERROR(StatusBadDocType, s);
1947 
1948  s += 4;
1949  }
1950  else PUGI__THROW_ERROR(StatusBadDocType, s);
1951 
1952  return s;
1953  }
1954 
1955  Char8* ParseDocTypeIgnore(Char8* s)
1956  {
1957  assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
1958  s++;
1959 
1960  while (*s)
1961  {
1962  if (s[0] == '<' && s[1] == '!' && s[2] == '[')
1963  {
1964  // nested ignore section
1965  s = ParseDocTypeIgnore(s);
1966  if (!s) return s;
1967  }
1968  else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
1969  {
1970  // ignore section end
1971  s += 3;
1972 
1973  return s;
1974  }
1975  else s++;
1976  }
1977 
1978  PUGI__THROW_ERROR(StatusBadDocType, s);
1979  }
1980 
1981  Char8* ParseDocTypeGroup(Char8* s, Char8 endch, bool toplevel)
1982  {
1983  assert(s[0] == '<' && s[1] == '!');
1984  s++;
1985 
1986  while (*s)
1987  {
1988  if (s[0] == '<' && s[1] == '!' && s[2] != '-')
1989  {
1990  if (s[2] == '[')
1991  {
1992  // ignore
1993  s = ParseDocTypeIgnore(s);
1994  if (!s) return s;
1995  }
1996  else
1997  {
1998  // some control group
1999  s = ParseDocTypeGroup(s, endch, false);
2000  if (!s) return s;
2001  }
2002  }
2003  else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2004  {
2005  // unknown tag (forbidden), or some primitive group
2006  s = ParseDocTypePrimitive(s);
2007  if (!s) return s;
2008  }
2009  else if (*s == '>')
2010  {
2011  s++;
2012 
2013  return s;
2014  }
2015  else s++;
2016  }
2017 
2018  if (!toplevel || endch != '>') PUGI__THROW_ERROR(StatusBadDocType, s);
2019 
2020  return s;
2021  }
2022 
2023  Char8* ParseExclamation(Char8* s, NodeStruct* cursor, unsigned int optmsk, Char8 endch)
2024  {
2025  // parse node contents, starting with exclamation mark
2026  ++s;
2027 
2028  if (*s == '-') // '<!-...'
2029  {
2030  ++s;
2031 
2032  if (*s == '-') // '<!--...'
2033  {
2034  ++s;
2035 
2036  if (PUGI__OPTSET(ParseComments))
2037  {
2038  PUGI__PUSHNODE(NodeComment); // Append a new node on the tree.
2039  cursor->Value = s; // Save the Offset.
2040  }
2041 
2042  if (PUGI__OPTSET(ParseEol) && PUGI__OPTSET(ParseComments))
2043  {
2044  s = strconv_comment(s, endch);
2045 
2046  if (!s) PUGI__THROW_ERROR(StatusBadComment, cursor->Value);
2047  }
2048  else
2049  {
2050  // Scan for terminating '-->'.
2051  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
2052  PUGI__CHECK_ERROR(StatusBadComment, s);
2053 
2054  if (PUGI__OPTSET(ParseComments))
2055  *s = 0; // Zero-terminate this segment at the first terminating '-'.
2056 
2057  s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2058  }
2059  }
2060  else PUGI__THROW_ERROR(StatusBadComment, s);
2061  }
2062  else if (*s == '[')
2063  {
2064  // '<![CDATA[...'
2065  if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2066  {
2067  ++s;
2068 
2069  if (PUGI__OPTSET(ParseCdata))
2070  {
2071  PUGI__PUSHNODE(NodeCdata); // Append a new node on the tree.
2072  cursor->Value = s; // Save the Offset.
2073 
2074  if (PUGI__OPTSET(ParseEol))
2075  {
2076  s = strconv_cdata(s, endch);
2077 
2078  if (!s) PUGI__THROW_ERROR(StatusBadCdata, cursor->Value);
2079  }
2080  else
2081  {
2082  // Scan for terminating ']]>'.
2083  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2084  PUGI__CHECK_ERROR(StatusBadCdata, s);
2085 
2086  *s++ = 0; // Zero-terminate this segment.
2087  }
2088  }
2089  else // Flagged for discard, but we still have to scan for the terminator.
2090  {
2091  // Scan for terminating ']]>'.
2092  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2093  PUGI__CHECK_ERROR(StatusBadCdata, s);
2094 
2095  ++s;
2096  }
2097 
2098  s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
2099  }
2100  else PUGI__THROW_ERROR(StatusBadCdata, s);
2101  }
2102  else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
2103  {
2104  s -= 2;
2105 
2106  if (cursor->GetParent) PUGI__THROW_ERROR(StatusBadDocType, s);
2107 
2108  Char8* mark = s + 9;
2109 
2110  s = ParseDocTypeGroup(s, endch, true);
2111  if (!s) return s;
2112 
2113  if (PUGI__OPTSET(ParseDocType))
2114  {
2115  while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
2116 
2117  PUGI__PUSHNODE(NodeDocType);
2118 
2119  cursor->Value = mark;
2120 
2121  assert((s[0] == 0 && endch == '>') || s[-1] == '>');
2122  s[*s == 0 ? 0 : -1] = 0;
2123 
2124  PUGI__POPNODE();
2125  }
2126  }
2127  else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(StatusBadComment, s);
2128  else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(StatusBadCdata, s);
2129  else PUGI__THROW_ERROR(StatusUnrecognizedTag, s);
2130 
2131  return s;
2132  }
2133 
2134  Char8* ParseQuestion(Char8* s, NodeStruct*& ref_cursor, unsigned int optmsk, Char8 endch)
2135  {
2136  // Load into registers
2137  NodeStruct* cursor = ref_cursor;
2138  Char8 ch = 0;
2139 
2140  // parse node contents, starting with question mark
2141  ++s;
2142 
2143  // read PI target
2144  Char8* target = s;
2145 
2146  if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2147 
2148  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
2149  PUGI__CHECK_ERROR(StatusBadProcessingInstruction, s);
2150 
2151  // determine node Type; stricmp / strcasecmp is not portable
2152  bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
2153 
2154  if (declaration ? PUGI__OPTSET(ParseDeclaration) : PUGI__OPTSET(ParsePi))
2155  {
2156  if (declaration)
2157  {
2158  // disallow non top-level declarations
2159  if (cursor->GetParent) PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2160 
2161  PUGI__PUSHNODE(NodeDeclaration);
2162  }
2163  else
2164  {
2165  PUGI__PUSHNODE(NodePi);
2166  }
2167 
2168  cursor->Name = target;
2169 
2170  PUGI__ENDSEG();
2171 
2172  // parse Value/attributes
2173  if (ch == '?')
2174  {
2175  // empty node
2176  if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2177  s += (*s == '>');
2178 
2179  PUGI__POPNODE();
2180  }
2181  else if (PUGI__IS_CHARTYPE(ch, ct_space))
2182  {
2183  PUGI__SKIPWS();
2184 
2185  // scan for tag end
2186  Char8* Value = s;
2187 
2188  PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2189  PUGI__CHECK_ERROR(StatusBadProcessingInstruction, s);
2190 
2191  if (declaration)
2192  {
2193  // replace ending ? with / so that 'element' terminates properly
2194  *s = '/';
2195 
2196  // we exit from this function with cursor at NodeDeclaration, which is a signal to parse() to go to LOC_ATTRIBUTES
2197  s = Value;
2198  }
2199  else
2200  {
2201  // store Value and step over >
2202  cursor->Value = Value;
2203  PUGI__POPNODE();
2204 
2205  PUGI__ENDSEG();
2206 
2207  s += (*s == '>');
2208  }
2209  }
2210  else PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2211  }
2212  else
2213  {
2214  // scan for tag end
2215  PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2216  PUGI__CHECK_ERROR(StatusBadProcessingInstruction, s);
2217 
2218  s += (s[1] == '>' ? 2 : 1);
2219  }
2220 
2221  // store from registers
2222  ref_cursor = cursor;
2223 
2224  return s;
2225  }
2226 
2227  Char8* parse(Char8* s, NodeStruct* xmldoc, unsigned int optmsk, Char8 endch)
2228  {
2229  strconv_attribute_t strconv_attribute = GetStrconv_attribute(optmsk);
2230  strconv_pcdata_t strconv_pcdata = GetStrconv_pcdata(optmsk);
2231 
2232  Char8 ch = 0;
2233  NodeStruct* cursor = xmldoc;
2234  Char8* mark = s;
2235 
2236  while (*s != 0)
2237  {
2238  if (*s == '<')
2239  {
2240  ++s;
2241 
2242  LOC_TAG:
2243  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
2244  {
2245  PUGI__PUSHNODE(NodeElement); // Append a new node to the tree.
2246 
2247  cursor->Name = s;
2248 
2249  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2250  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2251 
2252  if (ch == '>')
2253  {
2254  // end of tag
2255  }
2256  else if (PUGI__IS_CHARTYPE(ch, ct_space))
2257  {
2258  LOC_ATTRIBUTES:
2259  while (true)
2260  {
2261  PUGI__SKIPWS(); // Eat any whitespace.
2262 
2263  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
2264  {
2265  AttributeStruct* a = AppendAttribute_ll(cursor, alloc); // Make space for this GetAttribute.
2266  if (!a) PUGI__THROW_ERROR(StatusOutOfMemory, s);
2267 
2268  a->Name = s; // Save the Offset.
2269 
2270  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2271  PUGI__CHECK_ERROR(StatusBadAttribute, s); //$ redundant, left for performance
2272 
2273  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2274  PUGI__CHECK_ERROR(StatusBadAttribute, s); //$ redundant, left for performance
2275 
2276  if (PUGI__IS_CHARTYPE(ch, ct_space))
2277  {
2278  PUGI__SKIPWS(); // Eat any whitespace.
2279  PUGI__CHECK_ERROR(StatusBadAttribute, s); //$ redundant, left for performance
2280 
2281  ch = *s;
2282  ++s;
2283  }
2284 
2285  if (ch == '=') // '<... #=...'
2286  {
2287  PUGI__SKIPWS(); // Eat any whitespace.
2288 
2289  if (*s == '"' || *s == '\'') // '<... #="...'
2290  {
2291  ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
2292  ++s; // Step over the quote.
2293  a->Value = s; // Save the Offset.
2294 
2295  s = strconv_attribute(s, ch);
2296 
2297  if (!s) PUGI__THROW_ERROR(StatusBadAttribute, a->Value);
2298 
2299  // After this line the loop continues from the start;
2300  // Whitespaces, / and > are ok, symbols and EOF are wrong,
2301  // everything else will be detected
2302  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(StatusBadAttribute, s);
2303  }
2304  else PUGI__THROW_ERROR(StatusBadAttribute, s);
2305  }
2306  else PUGI__THROW_ERROR(StatusBadAttribute, s);
2307  }
2308  else if (*s == '/')
2309  {
2310  ++s;
2311 
2312  if (*s == '>')
2313  {
2314  PUGI__POPNODE();
2315  s++;
2316  break;
2317  }
2318  else if (*s == 0 && endch == '>')
2319  {
2320  PUGI__POPNODE();
2321  break;
2322  }
2323  else PUGI__THROW_ERROR(StatusBadStartElement, s);
2324  }
2325  else if (*s == '>')
2326  {
2327  ++s;
2328 
2329  break;
2330  }
2331  else if (*s == 0 && endch == '>')
2332  {
2333  break;
2334  }
2335  else PUGI__THROW_ERROR(StatusBadStartElement, s);
2336  }
2337 
2338  // !!!
2339  }
2340  else if (ch == '/') // '<#.../'
2341  {
2342  if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(StatusBadStartElement, s);
2343 
2344  PUGI__POPNODE(); // Pop.
2345 
2346  s += (*s == '>');
2347  }
2348  else if (ch == 0)
2349  {
2350  // we stepped over null terminator, backtrack & handle closing tag
2351  --s;
2352 
2353  if (endch != '>') PUGI__THROW_ERROR(StatusBadStartElement, s);
2354  }
2355  else PUGI__THROW_ERROR(StatusBadStartElement, s);
2356  }
2357  else if (*s == '/')
2358  {
2359  ++s;
2360 
2361  Char8* Name = cursor->Name;
2362  if (!Name) PUGI__THROW_ERROR(StatusEndElementMismatch, s);
2363 
2364  while (PUGI__IS_CHARTYPE(*s, ct_symbol))
2365  {
2366  if (*s++ != *Name++) PUGI__THROW_ERROR(StatusEndElementMismatch, s);
2367  }
2368 
2369  if (*Name)
2370  {
2371  if (*s == 0 && Name[0] == endch && Name[1] == 0) PUGI__THROW_ERROR(StatusBadEndElement, s);
2372  else PUGI__THROW_ERROR(StatusEndElementMismatch, s);
2373  }
2374 
2375  PUGI__POPNODE(); // Pop.
2376 
2377  PUGI__SKIPWS();
2378 
2379  if (*s == 0)
2380  {
2381  if (endch != '>') PUGI__THROW_ERROR(StatusBadEndElement, s);
2382  }
2383  else
2384  {
2385  if (*s != '>') PUGI__THROW_ERROR(StatusBadEndElement, s);
2386  ++s;
2387  }
2388  }
2389  else if (*s == '?') // '<?...'
2390  {
2391  s = ParseQuestion(s, cursor, optmsk, endch);
2392  if (!s) return s;
2393 
2394  assert(cursor);
2395  if ((cursor->header & MemoryPage_type_mask) + 1 == NodeDeclaration) goto LOC_ATTRIBUTES;
2396  }
2397  else if (*s == '!') // '<!...'
2398  {
2399  s = ParseExclamation(s, cursor, optmsk, endch);
2400  if (!s) return s;
2401  }
2402  else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2403  else PUGI__THROW_ERROR(StatusUnrecognizedTag, s);
2404  }
2405  else
2406  {
2407  mark = s; // Save this Offset while searching for a terminator.
2408 
2409  PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
2410 
2411  if (*s == '<')
2412  {
2413  // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
2414  assert(mark != s);
2415 
2416  if (!PUGI__OPTSET(ParseWsPcdata | ParseWsPcdata_single))
2417  {
2418  continue;
2419  }
2420  else if (PUGI__OPTSET(ParseWsPcdata_single))
2421  {
2422  if (s[1] != '/' || cursor->GetFirstChild) continue;
2423  }
2424  }
2425 
2426  s = mark;
2427 
2428  if (cursor->GetParent)
2429  {
2430  PUGI__PUSHNODE(NodePcdata); // Append a new node on the tree.
2431  cursor->Value = s; // Save the Offset.
2432 
2433  s = strconv_pcdata(s);
2434 
2435  PUGI__POPNODE(); // Pop since this is a standalone.
2436 
2437  if (!*s) break;
2438  }
2439  else
2440  {
2441  PUGI__SCANFOR(*s == '<'); // '...<'
2442  if (!*s) break;
2443 
2444  ++s;
2445  }
2446 
2447  // We're after '<'
2448  goto LOC_TAG;
2449  }
2450  }
2451 
2452  // check that last tag is closed
2453  if (cursor != xmldoc) PUGI__THROW_ERROR(StatusEndElementMismatch, s);
2454 
2455  return s;
2456  }
2457 
2458  static ParseResult parse(Char8* buffer, size_t length, NodeStruct* GetRoot, unsigned int optmsk)
2459  {
2460  DocumentStruct* xmldoc = static_cast<DocumentStruct*>(GetRoot);
2461 
2462  // store buffer for OffSetDebug
2463  xmldoc->buffer = buffer;
2464 
2465  // early-out for empty documents
2466  if (length == 0) return make_ParseResult(StatusOk);
2467 
2468  // create parser on stack
2469  Parser parser(*xmldoc);
2470 
2471  // Save last character and make buffer zero-terminated (speeds up parsing)
2472  Char8 endch = buffer[length - 1];
2473  buffer[length - 1] = 0;
2474 
2475  // perform actual parsing
2476  parser.parse(buffer, xmldoc, optmsk, endch);
2477 
2478  ParseResult Result = make_ParseResult(parser.error_Status, parser.error_Offset ? parser.error_Offset - buffer : 0);
2479  assert(Result.Offset >= 0 && static_cast<size_t>(Result.Offset) <= length);
2480 
2481  // update allocator state
2482  *static_cast<Allocator*>(xmldoc) = parser.alloc;
2483 
2484  // since we removed last character, we have to handle the only possible false positive
2485  if (Result && endch == '<')
2486  {
2487  // there's no possible well-formed document with < at the end
2488  return make_ParseResult(StatusUnrecognizedTag, length);
2489  }
2490 
2491  return Result;
2492  }
2493  };
2494 
2495  // Output facilities
2496  PUGI__FN Encoding GetWrite_native_DocumentEncoding()
2497  {
2498  return EncodingUTF8;
2499  }
2500 
2501  PUGI__FN Encoding GetWrite_DocumentEncoding(Encoding DocumentEncoding)
2502  {
2503  // replace wchar DocumentEncoding with utf implementation
2504  if (DocumentEncoding == Encodingwchar_t) return GetWchar_DocumentEncoding();
2505 
2506  // replace utf16 DocumentEncoding with utf16 with specific endianness
2507  if (DocumentEncoding == EncodingUTF16) return is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
2508 
2509  // replace utf32 DocumentEncoding with utf32 with specific endianness
2510  if (DocumentEncoding == EncodingUTF32) return is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
2511 
2512  // only do autodetection if no explicit DocumentEncoding is requested
2513  if (DocumentEncoding != EncodingAuto) return DocumentEncoding;
2514 
2515  // assume utf8 DocumentEncoding
2516  return EncodingUTF8;
2517  }
2518 
2519  PUGI__FN size_t GetValid_length(const Char8* data, size_t length)
2520  {
2521  assert(length > 4);
2522 
2523  for (size_t i = 1; i <= 4; ++i)
2524  {
2525  uint8_t ch = static_cast<uint8_t>(data[length - i]);
2526 
2527  // either a standalone character or a leading one
2528  if ((ch & 0xc0) != 0x80) return length - i;
2529  }
2530 
2531  // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
2532  return length;
2533  }
2534 
2535  PUGI__FN size_t convert_buffer(Char8* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const Char8* data, size_t length, Encoding DocumentEncoding)
2536  {
2537  if (DocumentEncoding == EncodingUTF16BE || DocumentEncoding == EncodingUTF16LE)
2538  {
2539  uint16_t* dest = r_u16;
2540 
2541  // convert to native utf16
2542  uint16_t* end = utf_decoder<utf16_WriterInstance>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2543 
2544  // swap if necessary
2545  Encoding native_DocumentEncoding = is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
2546 
2547  if (native_DocumentEncoding != DocumentEncoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2548 
2549  return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2550  }
2551 
2552  if (DocumentEncoding == EncodingUTF32BE || DocumentEncoding == EncodingUTF32LE)
2553  {
2554  uint32_t* dest = r_u32;
2555 
2556  // convert to native utf32
2557  uint32_t* end = utf_decoder<utf32_WriterInstance>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2558 
2559  // swap if necessary
2560  Encoding native_DocumentEncoding = is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
2561 
2562  if (native_DocumentEncoding != DocumentEncoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2563 
2564  return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2565  }
2566 
2567  if (DocumentEncoding == EncodingLatin1)
2568  {
2569  uint8_t* dest = r_u8;
2570  uint8_t* end = utf_decoder<latin1_WriterInstance>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2571 
2572  return static_cast<size_t>(end - dest);
2573  }
2574 
2575  assert(!"Invalid DocumentEncoding");
2576  return 0;
2577  }
2578 
2579 
2580  class BufferedWriter
2581  {
2582  BufferedWriter(const BufferedWriter&);
2583  BufferedWriter& operator=(const BufferedWriter&);
2584 
2585  public:
2586  BufferedWriter(Writer& WriterInstance_, Encoding user_DocumentEncoding): WriterInstance(WriterInstance_), bufsize(0), DocumentEncoding(GetWrite_DocumentEncoding(user_DocumentEncoding))
2587  {
2588  PUGI__STATIC_ASSERT(bufcapacity >= 8);
2589  }
2590 
2591  ~BufferedWriter()
2592  {
2593  flush();
2594  }
2595 
2596  void flush()
2597  {
2598  flush(buffer, bufsize);
2599  bufsize = 0;
2600  }
2601 
2602  void flush(const Char8* data, size_t size)
2603  {
2604  if (size == 0) return;
2605 
2606  // fast Path, just Write data
2607  if (DocumentEncoding == GetWrite_native_DocumentEncoding())
2608  WriterInstance.Write(data, size * sizeof(Char8));
2609  else
2610  {
2611  // convert chunk
2612  size_t Result = convert_buffer(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, DocumentEncoding);
2613  assert(Result <= sizeof(scratch));
2614 
2615  // Write data
2616  WriterInstance.Write(scratch.data_u8, Result);
2617  }
2618  }
2619 
2620  void Write(const Char8* data, size_t length)
2621  {
2622  if (bufsize + length > bufcapacity)
2623  {
2624  // flush the remaining buffer contents
2625  flush();
2626 
2627  // handle large chunks
2628  if (length > bufcapacity)
2629  {
2630  if (DocumentEncoding == GetWrite_native_DocumentEncoding())
2631  {
2632  // fast Path, can just Write data chunk
2633  WriterInstance.Write(data, length * sizeof(Char8));
2634  return;
2635  }
2636 
2637  // need to convert in suitable chunks
2638  while (length > bufcapacity)
2639  {
2640  // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
2641  // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
2642  size_t chunk_size = GetValid_length(data, bufcapacity);
2643 
2644  // convert chunk and Write
2645  flush(data, chunk_size);
2646 
2647  // iterate
2648  data += chunk_size;
2649  length -= chunk_size;
2650  }
2651 
2652  // small tail is copied below
2653  bufsize = 0;
2654  }
2655  }
2656 
2657  memcpy(buffer + bufsize, data, length * sizeof(Char8));
2658  bufsize += length;
2659  }
2660 
2661  void Write(const Char8* data)
2662  {
2663  Write(data, strlength(data));
2664  }
2665 
2666  void Write(Char8 d0)
2667  {
2668  if (bufsize + 1 > bufcapacity) flush();
2669 
2670  buffer[bufsize + 0] = d0;
2671  bufsize += 1;
2672  }
2673 
2674  void Write(Char8 d0, Char8 d1)
2675  {
2676  if (bufsize + 2 > bufcapacity) flush();
2677 
2678  buffer[bufsize + 0] = d0;
2679  buffer[bufsize + 1] = d1;
2680  bufsize += 2;
2681  }
2682 
2683  void Write(Char8 d0, Char8 d1, Char8 d2)
2684  {
2685  if (bufsize + 3 > bufcapacity) flush();
2686 
2687  buffer[bufsize + 0] = d0;
2688  buffer[bufsize + 1] = d1;
2689  buffer[bufsize + 2] = d2;
2690  bufsize += 3;
2691  }
2692 
2693  void Write(Char8 d0, Char8 d1, Char8 d2, Char8 d3)
2694  {
2695  if (bufsize + 4 > bufcapacity) flush();
2696 
2697  buffer[bufsize + 0] = d0;
2698  buffer[bufsize + 1] = d1;
2699  buffer[bufsize + 2] = d2;
2700  buffer[bufsize + 3] = d3;
2701  bufsize += 4;
2702  }
2703 
2704  void Write(Char8 d0, Char8 d1, Char8 d2, Char8 d3, Char8 d4)
2705  {
2706  if (bufsize + 5 > bufcapacity) flush();
2707 
2708  buffer[bufsize + 0] = d0;
2709  buffer[bufsize + 1] = d1;
2710  buffer[bufsize + 2] = d2;
2711  buffer[bufsize + 3] = d3;
2712  buffer[bufsize + 4] = d4;
2713  bufsize += 5;
2714  }
2715 
2716  void Write(Char8 d0, Char8 d1, Char8 d2, Char8 d3, Char8 d4, Char8 d5)
2717  {
2718  if (bufsize + 6 > bufcapacity) flush();
2719 
2720  buffer[bufsize + 0] = d0;
2721  buffer[bufsize + 1] = d1;
2722  buffer[bufsize + 2] = d2;
2723  buffer[bufsize + 3] = d3;
2724  buffer[bufsize + 4] = d4;
2725  buffer[bufsize + 5] = d5;
2726  bufsize += 6;
2727  }
2728 
2729  // utf8 maximum expansion: x4 (-> utf32)
2730  // utf16 maximum expansion: x2 (-> utf32)
2731  // utf32 maximum expansion: x1
2732  enum
2733  {
2734  bufcapacitybytes =
2735  #ifdef XML_MEMORY_OUTPUT_STACK
2736  XML_MEMORY_OUTPUT_STACK
2737  #else
2738  10240
2739  #endif
2740  ,
2741  bufcapacity = bufcapacitybytes / (sizeof(Char8) + 4)
2742  };
2743 
2744  Char8 buffer[bufcapacity];
2745 
2746  union
2747  {
2748  uint8_t data_u8[4 * bufcapacity];
2749  uint16_t data_u16[2 * bufcapacity];
2750  uint32_t data_u32[bufcapacity];
2751  Char8 data_char[bufcapacity];
2752  } scratch;
2753 
2754  Writer& WriterInstance;
2755  size_t bufsize;
2756  Encoding DocumentEncoding;
2757  };
2758 
2759  PUGI__FN void text_output_escaped(BufferedWriter& WriterInstance, const Char8* s, charTypex_t Type)
2760  {
2761  while (*s)
2762  {
2763  const Char8* prev = s;
2764 
2765  // While *s is a usual symbol
2766  while (!PUGI__IS_CHARTYPEX(*s, Type)) ++s;
2767 
2768  WriterInstance.Write(prev, static_cast<size_t>(s - prev));
2769 
2770  switch (*s)
2771  {
2772  case 0: break;
2773  case '&':
2774  WriterInstance.Write('&', 'a', 'm', 'p', ';');
2775  ++s;
2776  break;
2777  case '<':
2778  WriterInstance.Write('&', 'l', 't', ';');
2779  ++s;
2780  break;
2781  case '>':
2782  WriterInstance.Write('&', 'g', 't', ';');
2783  ++s;
2784  break;
2785  case '"':
2786  WriterInstance.Write('&', 'q', 'u', 'o', 't', ';');
2787  ++s;
2788  break;
2789  default: // s is not a usual symbol
2790  {
2791  unsigned int ch = static_cast<unsigned int>(*s++);
2792  assert(ch < 32);
2793 
2794  WriterInstance.Write('&', '#', static_cast<Char8>((ch / 10) + '0'), static_cast<Char8>((ch % 10) + '0'), ';');
2795  }
2796  }
2797  }
2798  }
2799 
2800  PUGI__FN void text_output(BufferedWriter& WriterInstance, const Char8* s, charTypex_t Type, unsigned int flags)
2801  {
2802  if (flags & FormatNoEscapes)
2803  WriterInstance.Write(s);
2804  else
2805  text_output_escaped(WriterInstance, s, Type);
2806  }
2807 
2808  PUGI__FN void text_output_cdata(BufferedWriter& WriterInstance, const Char8* s)
2809  {
2810  do
2811  {
2812  WriterInstance.Write('<', '!', '[', 'C', 'D');
2813  WriterInstance.Write('A', 'T', 'A', '[');
2814 
2815  const Char8* prev = s;
2816 
2817  // look for ]]> sequence - we can't output it as is since it terminates CDATA
2818  while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
2819 
2820  // skip ]] if we stopped at ]]>, > will go to the next CDATA section
2821  if (*s) s += 2;
2822 
2823  WriterInstance.Write(prev, static_cast<size_t>(s - prev));
2824 
2825  WriterInstance.Write(']', ']', '>');
2826  }
2827  while (*s);
2828  }
2829 
2830  PUGI__FN void NodeOutput_attributes(BufferedWriter& WriterInstance, const Node& node, unsigned int flags)
2831  {
2832  const Char8* default_Name = ":anonymous";
2833 
2834  for (Attribute a = node.GetFirstAttribute(); a; a = a.GetNextAttribute())
2835  {
2836  WriterInstance.Write(' ');
2837  WriterInstance.Write(a.Name()[0] ? a.Name() : default_Name);
2838  WriterInstance.Write('=', '"');
2839 
2840  text_output(WriterInstance, a.Value(), ctx_special_attr, flags);
2841 
2842  WriterInstance.Write('"');
2843  }
2844  }
2845 
2846  PUGI__FN void NodeOutput(BufferedWriter& WriterInstance, const Node& node, const Char8* indent, unsigned int flags, unsigned int Depth)
2847  {
2848  const Char8* default_Name = ":anonymous";
2849 
2850  if ((flags & FormatIndent) != 0 && (flags & FormatRaw) == 0)
2851  for (unsigned int i = 0; i < Depth; ++i) WriterInstance.Write(indent);
2852 
2853  switch (node.Type())
2854  {
2855  case NodeDocument:
2856  {
2857  for (Node n = node.GetFirstChild(); n; n = n.GetNextSibling())
2858  NodeOutput(WriterInstance, n, indent, flags, Depth);
2859  break;
2860  }
2861 
2862  case NodeElement:
2863  {
2864  const Char8* Name = node.Name()[0] ? node.Name() : default_Name;
2865 
2866  WriterInstance.Write('<');
2867  WriterInstance.Write(Name);
2868 
2869  NodeOutput_attributes(WriterInstance, node, flags);
2870 
2871  if (flags & FormatRaw)
2872  {
2873  if (!node.GetFirstChild())
2874  WriterInstance.Write(' ', '/', '>');
2875  else
2876  {
2877  WriterInstance.Write('>');
2878 
2879  for (Node n = node.GetFirstChild(); n; n = n.GetNextSibling())
2880  NodeOutput(WriterInstance, n, indent, flags, Depth + 1);
2881 
2882  WriterInstance.Write('<', '/');
2883  WriterInstance.Write(Name);
2884  WriterInstance.Write('>');
2885  }
2886  }
2887  else if (!node.GetFirstChild())
2888  WriterInstance.Write(' ', '/', '>', '\n');
2889  else if (node.GetFirstChild() == node.GetLastChild() && (node.GetFirstChild().Type() == NodePcdata || node.GetFirstChild().Type() == NodeCdata))
2890  {
2891  WriterInstance.Write('>');
2892 
2893  if (node.GetFirstChild().Type() == NodePcdata)
2894  text_output(WriterInstance, node.GetFirstChild().Value(), ctx_special_pcdata, flags);
2895  else
2896  text_output_cdata(WriterInstance, node.GetFirstChild().Value());
2897 
2898  WriterInstance.Write('<', '/');
2899  WriterInstance.Write(Name);
2900  WriterInstance.Write('>', '\n');
2901  }
2902  else
2903  {
2904  WriterInstance.Write('>', '\n');
2905 
2906  for (Node n = node.GetFirstChild(); n; n = n.GetNextSibling())
2907  NodeOutput(WriterInstance, n, indent, flags, Depth + 1);
2908 
2909  if ((flags & FormatIndent) != 0 && (flags & FormatRaw) == 0)
2910  for (unsigned int i = 0; i < Depth; ++i) WriterInstance.Write(indent);
2911 
2912  WriterInstance.Write('<', '/');
2913  WriterInstance.Write(Name);
2914  WriterInstance.Write('>', '\n');
2915  }
2916 
2917  break;
2918  }
2919 
2920  case NodePcdata:
2921  text_output(WriterInstance, node.Value(), ctx_special_pcdata, flags);
2922  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2923  break;
2924 
2925  case NodeCdata:
2926  text_output_cdata(WriterInstance, node.Value());
2927  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2928  break;
2929 
2930  case NodeComment:
2931  WriterInstance.Write('<', '!', '-', '-');
2932  WriterInstance.Write(node.Value());
2933  WriterInstance.Write('-', '-', '>');
2934  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2935  break;
2936 
2937  case NodePi:
2938  case NodeDeclaration:
2939  WriterInstance.Write('<', '?');
2940  WriterInstance.Write(node.Name()[0] ? node.Name() : default_Name);
2941 
2942  if (node.Type() == NodeDeclaration)
2943  {
2944  NodeOutput_attributes(WriterInstance, node, flags);
2945  }
2946  else if (node.Value()[0])
2947  {
2948  WriterInstance.Write(' ');
2949  WriterInstance.Write(node.Value());
2950  }
2951 
2952  WriterInstance.Write('?', '>');
2953  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2954  break;
2955 
2956  case NodeDocType:
2957  WriterInstance.Write('<', '!', 'D', 'O', 'C');
2958  WriterInstance.Write('T', 'Y', 'P', 'E');
2959 
2960  if (node.Value()[0])
2961  {
2962  WriterInstance.Write(' ');
2963  WriterInstance.Write(node.Value());
2964  }
2965 
2966  WriterInstance.Write('>');
2967  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2968  break;
2969 
2970  default:
2971  assert(!"Invalid node Type");
2972  }
2973  }
2974 
2975  inline bool hAsDeclaration(const Node& node)
2976  {
2977  for (Node GetChild = node.GetFirstChild(); GetChild; GetChild = GetChild.GetNextSibling())
2978  {
2979  NodeType Type = GetChild.Type();
2980 
2981  if (Type == NodeDeclaration) return true;
2982  if (Type == NodeElement) return false;
2983  }
2984 
2985  return false;
2986  }
2987 
2988  inline bool allow_InsertChild(NodeType GetParent, NodeType GetChild)
2989  {
2990  if (GetParent != NodeDocument && GetParent != NodeElement) return false;
2991  if (GetChild == NodeDocument || GetChild == NodeNull) return false;
2992  if (GetParent != NodeDocument && (GetChild == NodeDeclaration || GetChild == NodeDocType)) return false;
2993 
2994  return true;
2995  }
2996 
2997  PUGI__FN void recursive_copy_skip(Node& dest, const Node& source, const Node& skip)
2998  {
2999  assert(dest.Type() == source.Type());
3000 
3001  switch (source.Type())
3002  {
3003  case NodeElement:
3004  {
3005  dest.SetName(source.Name());
3006 
3007  for (Attribute a = source.GetFirstAttribute(); a; a = a.GetNextAttribute())
3008  dest.AppendAttribute(a.Name()).SetValue(a.Value());
3009 
3010  for (Node c = source.GetFirstChild(); c; c = c.GetNextSibling())
3011  {
3012  if (c == skip) continue;
3013 
3014  Node cc = dest.AppendChild(c.Type());
3015  assert(cc);
3016 
3017  recursive_copy_skip(cc, c, skip);
3018  }
3019 
3020  break;
3021  }
3022 
3023  case NodePcdata:
3024  case NodeCdata:
3025  case NodeComment:
3026  case NodeDocType:
3027  dest.SetValue(source.Value());
3028  break;
3029 
3030  case NodePi:
3031  dest.SetName(source.Name());
3032  dest.SetValue(source.Value());
3033  break;
3034 
3035  case NodeDeclaration:
3036  {
3037  dest.SetName(source.Name());
3038 
3039  for (Attribute a = source.GetFirstAttribute(); a; a = a.GetNextAttribute())
3040  dest.AppendAttribute(a.Name()).SetValue(a.Value());
3041 
3042  break;
3043  }
3044 
3045  default:
3046  assert(!"Invalid node Type");
3047  }
3048  }
3049 
3050  inline bool is_text_node(NodeStruct* node)
3051  {
3052  NodeType Type = static_cast<NodeType>((node->header & internal::MemoryPage_type_mask) + 1);
3053 
3054  return Type == NodePcdata || Type == NodeCdata;
3055  }
3056 
3057  // get Value with conversion functions
3058  PUGI__FN int GetValue_int(const Char8* Value, int def)
3059  {
3060  if (!Value) return def;
3061 
3062  return static_cast<int>(strtol(Value, 0, 10));
3063  }
3064 
3065  PUGI__FN unsigned int GetValue_uint(const Char8* Value, unsigned int def)
3066  {
3067  if (!Value) return def;
3068 
3069  return static_cast<unsigned int>(strtoul(Value, 0, 10));
3070  }
3071 
3072  PUGI__FN double GetValue_double(const Char8* Value, double def)
3073  {
3074  if (!Value) return def;
3075 
3076  return strtod(Value, 0);
3077  }
3078 
3079  PUGI__FN float GetValue_float(const Char8* Value, float def)
3080  {
3081  if (!Value) return def;
3082 
3083  return static_cast<float>(strtod(Value, 0));
3084  }
3085 
3086  PUGI__FN bool GetValue_bool(const Char8* Value, bool def)
3087  {
3088  if (!Value) return def;
3089 
3090  // only look at first char
3091  Char8 first = *Value;
3092 
3093  // 1*, t* (true), T* (True), y* (yes), Y* (YES)
3094  return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
3095  }
3096 
3097  // set Value with conversion functions
3098  PUGI__FN bool SetValue_buffer(Char8*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
3099  {
3100  return strcpy_insitu(dest, header, header_mask, buf);
3101  }
3102 
3103  PUGI__FN bool SetValue_convert(Char8*& dest, uintptr_t& header, uintptr_t header_mask, int Value)
3104  {
3105  char buf[128];
3106  sprintf(buf, "%d", Value);
3107 
3108  return SetValue_buffer(dest, header, header_mask, buf);
3109  }
3110 
3111  PUGI__FN bool SetValue_convert(Char8*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int Value)
3112  {
3113  char buf[128];
3114  sprintf(buf, "%u", Value);
3115 
3116  return SetValue_buffer(dest, header, header_mask, buf);
3117  }
3118 
3119  PUGI__FN bool SetValue_convert(Char8*& dest, uintptr_t& header, uintptr_t header_mask, double Value)
3120  {
3121  char buf[128];
3122  sprintf(buf, "%g", Value);
3123 
3124  return SetValue_buffer(dest, header, header_mask, buf);
3125  }
3126 
3127  PUGI__FN bool SetValue_convert(Char8*& dest, uintptr_t& header, uintptr_t header_mask, bool Value)
3128  {
3129  return strcpy_insitu(dest, header, header_mask, Value ? "true" : "false");
3130  }
3131 
3132  // we need to get length of entire file to Load it in memory; the only (relatively) sane way to do it is via seek/tell trick
3133  PUGI__FN ParseStatus GetFile_size(FILE* file, size_t& out_Result)
3134  {
3135  // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
3136  typedef long length_type;
3137 
3138  fseek(file, 0, SEEK_END);
3139  length_type length = ftell(file);
3140  fseek(file, 0, SEEK_SET);
3141 
3142  // check for I/O errors
3143  if (length < 0) return StatusIOError;
3144 
3145  // check for overflow
3146  size_t Result = static_cast<size_t>(length);
3147 
3148  if (static_cast<length_type>(Result) != length) return StatusOutOfMemory;
3149 
3150  // finalize
3151  out_Result = Result;
3152 
3153  return StatusOk;
3154  }
3155 
3156  PUGI__FN ParseResult LoadFileImpl(Document& doc, FILE* file, unsigned int options, Encoding DocumentEncoding)
3157  {
3158  if (!file) return make_ParseResult(StatusFileNotFound);
3159 
3160  // get file size (can Result in I/O errors)
3161  size_t size = 0;
3162  ParseStatus size_Status = GetFile_size(file, size);
3163 
3164  if (size_Status != StatusOk)
3165  {
3166  fclose(file);
3167  return make_ParseResult(size_Status);
3168  }
3169 
3170  // allocate buffer for the whole file
3171  char* contents = static_cast<char*>(Memory::allocate(size > 0 ? size : 1));
3172 
3173  if (!contents)
3174  {
3175  fclose(file);
3176  return make_ParseResult(StatusOutOfMemory);
3177  }
3178 
3179  // read file in memory
3180  size_t read_size = fread(contents, 1, size, file);
3181  fclose(file);
3182 
3183  if (read_size != size)
3184  {
3185  Memory::deallocate(contents);
3186  return make_ParseResult(StatusIOError);
3187  }
3188 
3189  return doc.LoadBufferInplaceOwn(contents, size, options, DocumentEncoding);
3190  }
3191 
3192  template <typename T> struct StreamChunk
3193  {
3194  static StreamChunk* create()
3195  {
3196  void* memory = Memory::allocate(sizeof(StreamChunk));
3197 
3198  return new (memory) StreamChunk();
3199  }
3200 
3201  static void destroy(void* ptr)
3202  {
3203  StreamChunk* chunk = static_cast<StreamChunk*>(ptr);
3204 
3205  // free chunk chain
3206  while (chunk)
3207  {
3208  StreamChunk* next = chunk->next;
3209  Memory::deallocate(chunk);
3210  chunk = next;
3211  }
3212  }
3213 
3214  StreamChunk(): next(0), size(0)
3215  {
3216  }
3217 
3218  StreamChunk* next;
3219  size_t size;
3220 
3221  T data[MemoryPage_size / sizeof(T)];
3222  };
3223 
3224  template <typename T> PUGI__FN ParseStatus LoadStreamDataNoseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
3225  {
3226  buffer_holder chunks(0, StreamChunk<T>::destroy);
3227 
3228  // read file to a chunk list
3229  size_t total = 0;
3230  StreamChunk<T>* last = 0;
3231 
3232  while (!stream.eof())
3233  {
3234  // allocate new chunk
3235  StreamChunk<T>* chunk = StreamChunk<T>::create();
3236  if (!chunk) return StatusOutOfMemory;
3237 
3238  // append chunk to list
3239  if (last) last = last->next = chunk;
3240  else chunks.data = last = chunk;
3241 
3242  // read data to chunk
3243  stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
3244  chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
3245 
3246  // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
3247  if (stream.bad() || (!stream.eof() && stream.fail())) return StatusIOError;
3248 
3249  // guard against huge files (chunk size is small enough to make this overflow check work)
3250  if (total + chunk->size < total) return StatusOutOfMemory;
3251  total += chunk->size;
3252  }
3253 
3254  // copy chunk list to a contiguous buffer
3255  char* buffer = static_cast<char*>(Memory::allocate(total));
3256  if (!buffer) return StatusOutOfMemory;
3257 
3258  char* Write = buffer;
3259 
3260  for (StreamChunk<T>* chunk = static_cast<StreamChunk<T>*>(chunks.data); chunk; chunk = chunk->next)
3261  {
3262  assert(Write + chunk->size <= buffer + total);
3263  memcpy(Write, chunk->data, chunk->size);
3264  Write += chunk->size;
3265  }
3266 
3267  assert(Write == buffer + total);
3268 
3269  // return buffer
3270  *out_buffer = buffer;
3271  *out_size = total;
3272 
3273  return StatusOk;
3274  }
3275 
3276  template <typename T> PUGI__FN ParseStatus LoadStreamDataSeek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
3277  {
3278  // get length of remaining data in stream
3279  typename std::basic_istream<T>::pos_type pos = stream.tellg();
3280  stream.seekg(0, std::ios::end);
3281  std::streamoff length = stream.tellg() - pos;
3282  stream.seekg(pos);
3283 
3284  if (stream.fail() || pos < 0) return StatusIOError;
3285 
3286  // guard against huge files
3287  size_t read_length = static_cast<size_t>(length);
3288 
3289  if (static_cast<std::streamsize>(read_length) != length || length < 0) return StatusOutOfMemory;
3290 
3291  // read stream data into memory (guard against stream exceptions with buffer holder)
3292  buffer_holder buffer(Memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), Memory::deallocate);
3293  if (!buffer.data) return StatusOutOfMemory;
3294 
3295  stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
3296 
3297  // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
3298  if (stream.bad() || (!stream.eof() && stream.fail())) return StatusIOError;
3299 
3300  // return buffer
3301  size_t actual_length = static_cast<size_t>(stream.gcount());
3302  assert(actual_length <= read_length);
3303 
3304  *out_buffer = buffer.release();
3305  *out_size = actual_length * sizeof(T);
3306 
3307  return StatusOk;
3308  }
3309 
3310  template <typename T> PUGI__FN ParseResult LoadStreamImpl(Document& doc, std::basic_istream<T>& stream, unsigned int options, Encoding DocumentEncoding)
3311  {
3312  void* buffer = 0;
3313  size_t size = 0;
3314 
3315  // Load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
3316  ParseStatus Status = (stream.tellg() < 0) ? LoadStreamDataNoseek(stream, &buffer, &size) : LoadStreamDataSeek(stream, &buffer, &size);
3317  if (Status != StatusOk) return make_ParseResult(Status);
3318 
3319  return doc.LoadBufferInplaceOwn(buffer, size, options, DocumentEncoding);
3320  }
3321 
3322 
3323 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
3324  PUGI__FN FILE* open_file_wide(const wchar_t* Path, const wchar_t* mode)
3325  {
3326  return _wfopen(Path, mode);
3327  }
3328 #else
3329  PUGI__FN char* convert_Path_heap(const wchar_t* str)
3330  {
3331  assert(str);
3332 
3333  // first pass: get length in utf8 characters
3334  size_t length = wcslen(str);
3335  size_t size = AsUtf8_begin(str, length);
3336 
3337  // allocate Resulting string
3338  char* Result = static_cast<char*>(Memory::allocate(size + 1));
3339  if (!Result) return 0;
3340 
3341  // second pass: convert to utf8
3342  AsUtf8_end(Result, size, str, length);
3343 
3344  return Result;
3345  }
3346 
3347  PUGI__FN FILE* open_file_wide(const wchar_t* Path, const wchar_t* mode)
3348  {
3349  // there is no standard function to open wide Paths, so our best bet is to try utf8 Path
3350  char* Path_utf8 = convert_Path_heap(Path);
3351  if (!Path_utf8) return 0;
3352 
3353  // convert mode to ASCII (we mirror _wfopen interface)
3354  char mode_ascii[4] = {0};
3355  for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
3356 
3357  // try to open the utf8 Path
3358  FILE* Result = fopen(Path_utf8, mode_ascii);
3359 
3360  // free dummy buffer
3361  Memory::deallocate(Path_utf8);
3362 
3363  return Result;
3364  }
3365 #endif
3366 
3367  PUGI__FN bool SaveFileImpl(const Document& doc, FILE* file, const Char8* indent, unsigned int flags, Encoding DocumentEncoding)
3368  {
3369  if (!file) return false;
3370 
3371  WriterFile WriterInstance(file);
3372  doc.Save(WriterInstance, indent, flags, DocumentEncoding);
3373 
3374  int Result = ferror(file);
3375 
3376  fclose(file);
3377 
3378  return Result == 0;
3379  }
3380 PUGI__NS_END
3381 
3382 namespace XML
3383 {
3384  #ifndef SWIG_SAFE
3385  PUGI__FN WriterFile::WriterFile(void* FilePtr): TargetFile(FilePtr)
3386  {
3387  }
3388 
3389  PUGI__FN void WriterFile::Write(const void* data, size_t size)
3390  {
3391  size_t Result = fwrite(data, 1, size, static_cast<FILE*>(TargetFile));
3392  (void)!Result; // unfortunately we can't do proper error handling here
3393  }
3394 
3395 
3396  PUGI__FN WriterStream::WriterStream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
3397  {
3398  }
3399 
3400  PUGI__FN WriterStream::WriterStream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
3401  {
3402  }
3403 
3404  PUGI__FN void WriterStream::Write(const void* data, size_t size)
3405  {
3406  if (narrow_stream)
3407  {
3408  assert(!wide_stream);
3409  narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
3410  }
3411  else
3412  {
3413  assert(wide_stream);
3414  assert(size % sizeof(wchar_t) == 0);
3415 
3416  wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
3417  }
3418  }
3419  #endif //SWIG_SAFE
3420 
3421  PUGI__FN TreeWalker::TreeWalker(): TraversalDepth(0)
3422  {
3423  }
3424 
3425  PUGI__FN TreeWalker::~TreeWalker()
3426  {
3427  }
3428 
3429  PUGI__FN int TreeWalker::Depth() const
3430  {
3431  return TraversalDepth;
3432  }
3433 
3434  PUGI__FN bool TreeWalker::OnTraversalBegin(Node&)
3435  {
3436  return true;
3437  }
3438 
3439  PUGI__FN bool TreeWalker::OnTraversalEnd(Node&)
3440  {
3441  return true;
3442  }
3443 
3444  PUGI__FN Attribute::Attribute(): AttributeData(0)
3445  {
3446  }
3447 
3448  PUGI__FN Attribute::Attribute(AttributeStruct* attr): AttributeData(attr)
3449  {
3450  }
3451 
3452  PUGI__FN static void unspecified_bool_Attribute(Attribute***)
3453  {
3454  }
3455 
3456  PUGI__FN Attribute::operator Attribute::unspecified_bool_type() const
3457  {
3458  return AttributeData ? unspecified_bool_Attribute : 0;
3459  }
3460 
3461  PUGI__FN bool Attribute::operator!() const
3462  {
3463  return !AttributeData;
3464  }
3465 
3466  PUGI__FN bool Attribute::operator==(const Attribute& r) const
3467  {
3468  return (AttributeData == r.AttributeData);
3469  }
3470 
3471  PUGI__FN bool Attribute::operator!=(const Attribute& r) const
3472  {
3473  return (AttributeData != r.AttributeData);
3474  }
3475 
3476  PUGI__FN bool Attribute::operator<(const Attribute& r) const
3477  {
3478  return (AttributeData < r.AttributeData);
3479  }
3480 
3481  PUGI__FN bool Attribute::operator>(const Attribute& r) const
3482  {
3483  return (AttributeData > r.AttributeData);
3484  }
3485 
3486  PUGI__FN bool Attribute::operator<=(const Attribute& r) const
3487  {
3488  return (AttributeData <= r.AttributeData);
3489  }
3490 
3491  PUGI__FN bool Attribute::operator>=(const Attribute& r) const
3492  {
3493  return (AttributeData >= r.AttributeData);
3494  }
3495 
3496  PUGI__FN Attribute Attribute::GetNextAttribute() const
3497  {
3498  return AttributeData ? Attribute(AttributeData->GetNextAttribute) : Attribute();
3499  }
3500 
3501  PUGI__FN Attribute Attribute::GetPreviousAttribute() const
3502  {
3503  return AttributeData && AttributeData->prev_attribute_c->GetNextAttribute ? Attribute(AttributeData->prev_attribute_c) : Attribute();
3504  }
3505 
3506  PUGI__FN const Char8* Attribute::AsString(const Char8* def) const
3507  {
3508  return (AttributeData && AttributeData->Value) ? AttributeData->Value : def;
3509  }
3510 
3511  PUGI__FN int Attribute::AsInt(int def) const
3512  {
3513  return internal::GetValue_int(AttributeData ? AttributeData->Value : 0, def);
3514  }
3515 
3516  PUGI__FN unsigned int Attribute::AsUint(unsigned int def) const
3517  {
3518  return internal::GetValue_uint(AttributeData ? AttributeData->Value : 0, def);
3519  }
3520 
3521  PUGI__FN double Attribute::AsDouble(double def) const
3522  {
3523  return internal::GetValue_double(AttributeData ? AttributeData->Value : 0, def);
3524  }
3525 
3526  PUGI__FN Whole Attribute::AsWhole(Whole def) const
3527  { return (AttributeData ? ToWhole(AttributeData->Value) : def); }
3528 
3529  PUGI__FN Integer Attribute::AsInteger(Integer def) const
3530  { return (AttributeData ? ToInteger(AttributeData->Value) : def); }
3531 
3532  PUGI__FN Real Attribute::AsReal(Real def) const
3533  { return (AttributeData ? ToReal(AttributeData->Value) : def); }
3534 
3535  PUGI__FN float Attribute::AsFloat(float def) const
3536  {
3537  return internal::GetValue_float(AttributeData ? AttributeData->Value : 0, def);
3538  }
3539 
3540  PUGI__FN bool Attribute::AsBool(bool def) const
3541  {
3542  return internal::GetValue_bool(AttributeData ? AttributeData->Value : 0, def);
3543  }
3544 
3545  PUGI__FN bool Attribute::Empty() const
3546  {
3547  return !AttributeData;
3548  }
3549 
3550  PUGI__FN const Char8* Attribute::Name() const
3551  {
3552  return (AttributeData && AttributeData->Name) ? AttributeData->Name : "";
3553  }
3554 
3555  PUGI__FN const Char8* Attribute::Value() const
3556  {
3557  return (AttributeData && AttributeData->Value) ? AttributeData->Value : "";
3558  }
3559 
3560  PUGI__FN size_t Attribute::HashValue() const
3561  {
3562  return static_cast<size_t>(reinterpret_cast<uintptr_t>(AttributeData) / sizeof(AttributeStruct));
3563  }
3564 
3565  PUGI__FN AttributeStruct* Attribute::InternalObject() const
3566  {
3567  return AttributeData;
3568  }
3569 
3570  PUGI__FN Attribute& Attribute::operator=(const Char8* rhs)
3571  {
3572  SetValue(rhs);
3573  return *this;
3574  }
3575 
3576  PUGI__FN Attribute& Attribute::operator=(int rhs)
3577  {
3578  SetValue(rhs);
3579  return *this;
3580  }
3581 
3582  PUGI__FN Attribute& Attribute::operator=(unsigned int rhs)
3583  {
3584  SetValue(rhs);
3585  return *this;
3586  }
3587 
3588  PUGI__FN Attribute& Attribute::operator=(double rhs)
3589  {
3590  SetValue(rhs);
3591  return *this;
3592  }
3593 
3594  PUGI__FN Attribute& Attribute::operator=(bool rhs)
3595  {
3596  SetValue(rhs);
3597  return *this;
3598  }
3599 
3600  PUGI__FN bool Attribute::SetName(const Char8* rhs)
3601  {
3602  if (!AttributeData) return false;
3603 
3604  return internal::strcpy_insitu(AttributeData->Name, AttributeData->header, internal::MemoryPage_Name_allocated_mask, rhs);
3605  }
3606 
3607  PUGI__FN bool Attribute::SetValue(const Char8* rhs)
3608  {
3609  if (!AttributeData) return false;
3610 
3611  return internal::strcpy_insitu(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3612  }
3613 
3614  PUGI__FN bool Attribute::SetValue(int rhs)
3615  {
3616  if (!AttributeData) return false;
3617 
3618  return internal::SetValue_convert(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3619  }
3620 
3621  PUGI__FN bool Attribute::SetValue(unsigned int rhs)
3622  {
3623  if (!AttributeData) return false;
3624 
3625  return internal::SetValue_convert(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3626  }
3627 
3628  PUGI__FN bool Attribute::SetValue(double rhs)
3629  {
3630  if (!AttributeData) return false;
3631 
3632  return internal::SetValue_convert(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3633  }
3634 
3635  PUGI__FN bool Attribute::SetValue(bool rhs)
3636  {
3637  if (!AttributeData) return false;
3638 
3639  return internal::SetValue_convert(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3640  }
3641 
3642 #ifdef __BORLANDC__
3643  PUGI__FN bool operator&&(const Attribute& lhs, bool rhs)
3644  {
3645  return (bool)lhs && rhs;
3646  }
3647 
3648  PUGI__FN bool operator||(const Attribute& lhs, bool rhs)
3649  {
3650  return (bool)lhs || rhs;
3651  }
3652 #endif
3653 
3654  PUGI__FN Node::Node(): NodeData(0)
3655  {
3656  }
3657 
3658  PUGI__FN Node::~Node()
3659  {
3660  }
3661 
3662  PUGI__FN Node::Node(NodeStruct* p): NodeData(p)
3663  {
3664  }
3665 
3666  PUGI__FN static void unspecified_bool_Node(Node***)
3667  {
3668  }
3669 
3670  PUGI__FN Node::operator Node::unspecified_bool_type() const
3671  {
3672  return NodeData ? unspecified_bool_Node : 0;
3673  }
3674 
3675  PUGI__FN bool Node::operator!() const
3676  {
3677  return !NodeData;
3678  }
3679 
3680  PUGI__FN Node::iterator Node::begin() const
3681  {
3682  return iterator(NodeData ? NodeData->GetFirstChild : 0, NodeData);
3683  }
3684 
3685  PUGI__FN Node::iterator Node::end() const
3686  {
3687  return iterator(0, NodeData);
3688  }
3689 
3691  {
3692  return attribute_iterator(NodeData ? NodeData->GetFirstAttribute : 0, NodeData);
3693  }
3694 
3696  {
3697  return attribute_iterator(0, NodeData);
3698  }
3699 
3700  PUGI__FN ObjectRange<NodeIterator> Node::GetChildren() const
3701  {
3702  return ObjectRange<NodeIterator>(begin(), end());
3703  }
3704 
3705  PUGI__FN ObjectRange<NamedNodeIterator> Node::GetChildren(const Char8* Name_) const
3706  {
3707  return ObjectRange<NamedNodeIterator>(NamedNodeIterator(GetChild(Name_), Name_), NamedNodeIterator());
3708  }
3709 
3710  PUGI__FN ObjectRange<AttributeIterator> Node::attributes() const
3711  {
3712  return ObjectRange<AttributeIterator>(attributes_begin(), attributes_end());
3713  }
3714 
3715  PUGI__FN bool Node::operator==(const Node& r) const
3716  {
3717  return (NodeData == r.NodeData);
3718  }
3719 
3720  PUGI__FN bool Node::operator!=(const Node& r) const
3721  {
3722  return (NodeData != r.NodeData);
3723  }
3724 
3725  PUGI__FN bool Node::operator<(const Node& r) const
3726  {
3727  return (NodeData < r.NodeData);
3728  }
3729 
3730  PUGI__FN bool Node::operator>(const Node& r) const
3731  {
3732  return (NodeData > r.NodeData);
3733  }
3734 
3735  PUGI__FN bool Node::operator<=(const Node& r) const
3736  {
3737  return (NodeData <= r.NodeData);
3738  }
3739 
3740  PUGI__FN bool Node::operator>=(const Node& r) const
3741  {
3742  return (NodeData >= r.NodeData);
3743  }
3744 
3745  PUGI__FN bool Node::Empty() const
3746  {
3747  return !NodeData;
3748  }
3749 
3750  PUGI__FN const Char8* Node::Name() const
3751  {
3752  return (NodeData && NodeData->Name) ? NodeData->Name : "";
3753  }
3754 
3755  PUGI__FN NodeType Node::Type() const
3756  {
3757  return NodeData ? static_cast<NodeType>((NodeData->header & internal::MemoryPage_type_mask) + 1) : NodeNull;
3758  }
3759 
3760  PUGI__FN const Char8* Node::Value() const
3761  {
3762  return (NodeData && NodeData->Value) ? NodeData->Value : "";
3763  }
3764 
3765  PUGI__FN Node Node::GetChild(const Char8* Name_) const
3766  {
3767  if (!NodeData) return Node();
3768 
3769  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
3770  if (i->Name && internal::strequal(Name_, i->Name)) return Node(i);
3771 
3772  return Node();
3773  }
3774 
3775  PUGI__FN Attribute Node::GetAttribute(const Char8* Name_) const
3776  {
3777  if (!NodeData) return Attribute();
3778 
3779  for (AttributeStruct* i = NodeData->GetFirstAttribute; i; i = i->GetNextAttribute)
3780  if (i->Name && internal::strequal(Name_, i->Name))
3781  return Attribute(i);
3782 
3783  return Attribute();
3784  }
3785 
3786  PUGI__FN Node Node::GetNextSibling(const Char8* Name_) const
3787  {
3788  if (!NodeData) return Node();
3789 
3790  for (NodeStruct* i = NodeData->GetNextSibling; i; i = i->GetNextSibling)
3791  if (i->Name && internal::strequal(Name_, i->Name)) return Node(i);
3792 
3793  return Node();
3794  }
3795 
3796  PUGI__FN Node Node::GetNextSibling() const
3797  {
3798  if (!NodeData) return Node();
3799 
3800  if (NodeData->GetNextSibling) return Node(NodeData->GetNextSibling);
3801  else return Node();
3802  }
3803 
3804  PUGI__FN Node Node::GetPreviousSibling(const Char8* Name_) const
3805  {
3806  if (!NodeData) return Node();
3807 
3808  for (NodeStruct* i = NodeData->prev_sibling_c; i->GetNextSibling; i = i->prev_sibling_c)
3809  if (i->Name && internal::strequal(Name_, i->Name)) return Node(i);
3810 
3811  return Node();
3812  }
3813 
3814  PUGI__FN Node Node::GetPreviousSibling() const
3815  {
3816  if (!NodeData) return Node();
3817 
3818  if (NodeData->prev_sibling_c->GetNextSibling) return Node(NodeData->prev_sibling_c);
3819  else return Node();
3820  }
3821 
3822  PUGI__FN Node Node::GetParent() const
3823  {
3824  return NodeData ? Node(NodeData->GetParent) : Node();
3825  }
3826 
3827  PUGI__FN Node Node::GetRoot() const
3828  {
3829  if (!NodeData) return Node();
3830 
3831  internal::MemoryPage* page = reinterpret_cast<internal::MemoryPage*>(NodeData->header & internal::MemoryPage_pointer_mask);
3832 
3833  return Node(static_cast<internal::DocumentStruct*>(page->allocator));
3834  }
3835 
3836  PUGI__FN NodeText Node::GetText() const
3837  {
3838  return NodeText(NodeData);
3839  }
3840 
3841  PUGI__FN const Char8* Node::GetChildValue() const
3842  {
3843  if (!NodeData) return "";
3844 
3845  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
3846  if (i->Value && internal::is_text_node(i))
3847  return i->Value;
3848 
3849  return "";
3850  }
3851 
3852  PUGI__FN const Char8* Node::GetChildValue(const Char8* Name_) const
3853  {
3854  return GetChild(Name_).GetChildValue();
3855  }
3856 
3857  PUGI__FN Attribute Node::GetFirstAttribute() const
3858  {
3859  return NodeData ? Attribute(NodeData->GetFirstAttribute) : Attribute();
3860  }
3861 
3862  PUGI__FN Attribute Node::GetLastAttribute() const
3863  {
3864  return NodeData && NodeData->GetFirstAttribute ? Attribute(NodeData->GetFirstAttribute->prev_attribute_c) : Attribute();
3865  }
3866 
3867  PUGI__FN Node Node::GetFirstChild() const
3868  {
3869  return NodeData ? Node(NodeData->GetFirstChild) : Node();
3870  }
3871 
3872  PUGI__FN Node Node::GetLastChild() const
3873  {
3874  return NodeData && NodeData->GetFirstChild ? Node(NodeData->GetFirstChild->prev_sibling_c) : Node();
3875  }
3876 
3877  PUGI__FN bool Node::SetName(const Char8* rhs)
3878  {
3879  switch (Type())
3880  {
3881  case NodePi:
3882  case NodeDeclaration:
3883  case NodeElement:
3884  return internal::strcpy_insitu(NodeData->Name, NodeData->header, internal::MemoryPage_Name_allocated_mask, rhs);
3885 
3886  default:
3887  return false;
3888  }
3889  }
3890 
3891  PUGI__FN bool Node::SetValue(const Char8* rhs)
3892  {
3893  switch (Type())
3894  {
3895  case NodePi:
3896  case NodeCdata:
3897  case NodePcdata:
3898  case NodeComment:
3899  case NodeDocType:
3900  return internal::strcpy_insitu(NodeData->Value, NodeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3901 
3902  default:
3903  return false;
3904  }
3905  }
3906 
3907  PUGI__FN Attribute Node::AppendAttribute(const Char8* Name_)
3908  {
3909  if (Type() != NodeElement && Type() != NodeDeclaration) return Attribute();
3910 
3911  Attribute a(internal::AppendAttribute_ll(NodeData, internal::GetAllocator(NodeData)));
3912  a.SetName(Name_);
3913 
3914  return a;
3915  }
3916 
3917  PUGI__FN Attribute Node::PrependAttribute(const Char8* Name_)
3918  {
3919  if (Type() != NodeElement && Type() != NodeDeclaration) return Attribute();
3920 
3921  Attribute a(internal::allocate_attribute(internal::GetAllocator(NodeData)));
3922  if (!a) return Attribute();
3923 
3924  a.SetName(Name_);
3925 
3926  AttributeStruct* head = NodeData->GetFirstAttribute;
3927 
3928  if (head)
3929  {
3930  a.AttributeData->prev_attribute_c = head->prev_attribute_c;
3931  head->prev_attribute_c = a.AttributeData;
3932  }
3933  else
3934  a.AttributeData->prev_attribute_c = a.AttributeData;
3935 
3936  a.AttributeData->GetNextAttribute = head;
3937  NodeData->GetFirstAttribute = a.AttributeData;
3938 
3939  return a;
3940  }
3941 
3942  PUGI__FN Attribute Node::InsertAttributeBefore(const Char8* Name_, const Attribute& attr)
3943  {
3944  if ((Type() != NodeElement && Type() != NodeDeclaration) || attr.Empty()) return Attribute();
3945 
3946  // check that GetAttribute belongs to *this
3947  AttributeStruct* cur = attr.AttributeData;
3948 
3949  while (cur->prev_attribute_c->GetNextAttribute) cur = cur->prev_attribute_c;
3950 
3951  if (cur != NodeData->GetFirstAttribute) return Attribute();
3952 
3953  Attribute a(internal::allocate_attribute(internal::GetAllocator(NodeData)));
3954  if (!a) return Attribute();
3955 
3956  a.SetName(Name_);
3957 
3958  if (attr.AttributeData->prev_attribute_c->GetNextAttribute)
3959  attr.AttributeData->prev_attribute_c->GetNextAttribute = a.AttributeData;
3960  else
3961  NodeData->GetFirstAttribute = a.AttributeData;
3962 
3963  a.AttributeData->prev_attribute_c = attr.AttributeData->prev_attribute_c;
3964  a.AttributeData->GetNextAttribute = attr.AttributeData;
3965  attr.AttributeData->prev_attribute_c = a.AttributeData;
3966 
3967  return a;
3968  }
3969 
3970  PUGI__FN Attribute Node::InsertAttributeAfter(const Char8* Name_, const Attribute& attr)
3971  {
3972  if ((Type() != NodeElement && Type() != NodeDeclaration) || attr.Empty()) return Attribute();
3973 
3974  // check that GetAttribute belongs to *this
3975  AttributeStruct* cur = attr.AttributeData;
3976 
3977  while (cur->prev_attribute_c->GetNextAttribute) cur = cur->prev_attribute_c;
3978 
3979  if (cur != NodeData->GetFirstAttribute) return Attribute();
3980 
3981  Attribute a(internal::allocate_attribute(internal::GetAllocator(NodeData)));
3982  if (!a) return Attribute();
3983 
3984  a.SetName(Name_);
3985 
3986  if (attr.AttributeData->GetNextAttribute)
3987  attr.AttributeData->GetNextAttribute->prev_attribute_c = a.AttributeData;
3988  else
3989  NodeData->GetFirstAttribute->prev_attribute_c = a.AttributeData;
3990 
3991  a.AttributeData->GetNextAttribute = attr.AttributeData->GetNextAttribute;
3992  a.AttributeData->prev_attribute_c = attr.AttributeData;
3993  attr.AttributeData->GetNextAttribute = a.AttributeData;
3994 
3995  return a;
3996  }
3997 
3998  PUGI__FN Attribute Node::AppendCopy(const Attribute& proto)
3999  {
4000  if (!proto) return Attribute();
4001 
4002  Attribute Result = AppendAttribute(proto.Name());
4003  Result.SetValue(proto.Value());
4004 
4005  return Result;
4006  }
4007 
4008  PUGI__FN Attribute Node::PrependCopy(const Attribute& proto)
4009  {
4010  if (!proto) return Attribute();
4011 
4012  Attribute Result = PrependAttribute(proto.Name());
4013  Result.SetValue(proto.Value());
4014 
4015  return Result;
4016  }
4017 
4018  PUGI__FN Attribute Node::InsertCopyAfter(const Attribute& proto, const Attribute& attr)
4019  {
4020  if (!proto) return Attribute();
4021 
4022  Attribute Result = InsertAttributeAfter(proto.Name(), attr);
4023  Result.SetValue(proto.Value());
4024 
4025  return Result;
4026  }
4027 
4028  PUGI__FN Attribute Node::InsertCopyBefore(const Attribute& proto, const Attribute& attr)
4029  {
4030  if (!proto) return Attribute();
4031 
4032  Attribute Result = InsertAttributeBefore(proto.Name(), attr);
4033  Result.SetValue(proto.Value());
4034 
4035  return Result;
4036  }
4037 
4038  PUGI__FN Node Node::AppendChild(NodeType Type_)
4039  {
4040  if (!internal::allow_InsertChild(this->Type(), Type_)) return Node();
4041 
4042  Node n(internal::AppendNode(NodeData, internal::GetAllocator(NodeData), Type_));
4043 
4044  if (Type_ == NodeDeclaration) n.SetName("xml");
4045 
4046  return n;
4047  }
4048 
4049  PUGI__FN Node Node::PrependChild(NodeType Type_)
4050  {
4051  if (!internal::allow_InsertChild(this->Type(), Type_)) return Node();
4052 
4053  Node n(internal::allocate_node(internal::GetAllocator(NodeData), Type_));
4054  if (!n) return Node();
4055 
4056  n.NodeData->GetParent = NodeData;
4057 
4058  NodeStruct* head = NodeData->GetFirstChild;
4059 
4060  if (head)
4061  {
4062  n.NodeData->prev_sibling_c = head->prev_sibling_c;
4063  head->prev_sibling_c = n.NodeData;
4064  }
4065  else
4066  n.NodeData->prev_sibling_c = n.NodeData;
4067 
4068  n.NodeData->GetNextSibling = head;
4069  NodeData->GetFirstChild = n.NodeData;
4070 
4071  if (Type_ == NodeDeclaration) n.SetName("xml");
4072 
4073  return n;
4074  }
4075 
4076  PUGI__FN Node Node::InsertChildBefore(NodeType Type_, const Node& node)
4077  {
4078  if (!internal::allow_InsertChild(this->Type(), Type_)) return Node();
4079  if (!node.NodeData || node.NodeData->GetParent != NodeData) return Node();
4080 
4081  Node n(internal::allocate_node(internal::GetAllocator(NodeData), Type_));
4082  if (!n) return Node();
4083 
4084  n.NodeData->GetParent = NodeData;
4085 
4086  if (node.NodeData->prev_sibling_c->GetNextSibling)
4087  node.NodeData->prev_sibling_c->GetNextSibling = n.NodeData;
4088  else
4089  NodeData->GetFirstChild = n.NodeData;
4090 
4091  n.NodeData->prev_sibling_c = node.NodeData->prev_sibling_c;
4092  n.NodeData->GetNextSibling = node.NodeData;
4093  node.NodeData->prev_sibling_c = n.NodeData;
4094 
4095  if (Type_ == NodeDeclaration) n.SetName("xml");
4096 
4097  return n;
4098  }
4099 
4100  PUGI__FN Node Node::InsertChildAfter(NodeType Type_, const Node& node)
4101  {
4102  if (!internal::allow_InsertChild(this->Type(), Type_)) return Node();
4103  if (!node.NodeData || node.NodeData->GetParent != NodeData) return Node();
4104 
4105  Node n(internal::allocate_node(internal::GetAllocator(NodeData), Type_));
4106  if (!n) return Node();
4107 
4108  n.NodeData->GetParent = NodeData;
4109 
4110  if (node.NodeData->GetNextSibling)
4111  node.NodeData->GetNextSibling->prev_sibling_c = n.NodeData;
4112  else
4113  NodeData->GetFirstChild->prev_sibling_c = n.NodeData;
4114 
4115  n.NodeData->GetNextSibling = node.NodeData->GetNextSibling;
4116  n.NodeData->prev_sibling_c = node.NodeData;
4117  node.NodeData->GetNextSibling = n.NodeData;
4118 
4119  if (Type_ == NodeDeclaration) n.SetName("xml");
4120 
4121  return n;
4122  }
4123 
4124  PUGI__FN Node Node::AppendChild(const Char8* Name_)
4125  {
4126  Node Result = AppendChild(NodeElement);
4127 
4128  Result.SetName(Name_);
4129 
4130  return Result;
4131  }
4132 
4133  PUGI__FN Node Node::PrependChild(const Char8* Name_)
4134  {
4135  Node Result = PrependChild(NodeElement);
4136 
4137  Result.SetName(Name_);
4138 
4139  return Result;
4140  }
4141 
4142  PUGI__FN Node Node::InsertChildAfter(const Char8* Name_, const Node& node)
4143  {
4144  Node Result = InsertChildAfter(NodeElement, node);
4145 
4146  Result.SetName(Name_);
4147 
4148  return Result;
4149  }
4150 
4151  PUGI__FN Node Node::InsertChildBefore(const Char8* Name_, const Node& node)
4152  {
4153  Node Result = InsertChildBefore(NodeElement, node);
4154 
4155  Result.SetName(Name_);
4156 
4157  return Result;
4158  }
4159 
4160  PUGI__FN Node Node::AppendCopy(const Node& proto)
4161  {
4162  Node Result = AppendChild(proto.Type());
4163 
4164  if (Result) internal::recursive_copy_skip(Result, proto, Result);
4165 
4166  return Result;
4167  }
4168 
4169  PUGI__FN Node Node::PrependCopy(const Node& proto)
4170  {
4171  Node Result = PrependChild(proto.Type());
4172 
4173  if (Result) internal::recursive_copy_skip(Result, proto, Result);
4174 
4175  return Result;
4176  }
4177 
4178  PUGI__FN Node Node::InsertCopyAfter(const Node& proto, const Node& node)
4179  {
4180  Node Result = InsertChildAfter(proto.Type(), node);
4181 
4182  if (Result) internal::recursive_copy_skip(Result, proto, Result);
4183 
4184  return Result;
4185  }
4186 
4187  PUGI__FN Node Node::InsertCopyBefore(const Node& proto, const Node& node)
4188  {
4189  Node Result = InsertChildBefore(proto.Type(), node);
4190 
4191  if (Result) internal::recursive_copy_skip(Result, proto, Result);
4192 
4193  return Result;
4194  }
4195 
4196  PUGI__FN bool Node::RemoveAttribute(const Char8* Name_)
4197  {
4198  return RemoveAttribute(GetAttribute(Name_));
4199  }
4200 
4201  PUGI__FN bool Node::RemoveAttribute(const Attribute& a)
4202  {
4203  if (!NodeData || !a.AttributeData) return false;
4204 
4205  // check that GetAttribute belongs to *this
4206  AttributeStruct* attr = a.AttributeData;
4207 
4208  while (attr->prev_attribute_c->GetNextAttribute) attr = attr->prev_attribute_c;
4209 
4210  if (attr != NodeData->GetFirstAttribute) return false;
4211 
4212  if (a.AttributeData->GetNextAttribute) a.AttributeData->GetNextAttribute->prev_attribute_c = a.AttributeData->prev_attribute_c;
4213  else if (NodeData->GetFirstAttribute) NodeData->GetFirstAttribute->prev_attribute_c = a.AttributeData->prev_attribute_c;
4214 
4215  if (a.AttributeData->prev_attribute_c->GetNextAttribute) a.AttributeData->prev_attribute_c->GetNextAttribute = a.AttributeData->GetNextAttribute;
4216  else NodeData->GetFirstAttribute = a.AttributeData->GetNextAttribute;
4217 
4218  internal::destroy_attribute(a.AttributeData, internal::GetAllocator(NodeData));
4219 
4220  return true;
4221  }
4222 
4223  PUGI__FN bool Node::RemoveChild(const Char8* Name_)
4224  {
4225  return RemoveChild(GetChild(Name_));
4226  }
4227 
4228  PUGI__FN bool Node::RemoveChild(const Node& n)
4229  {
4230  if (!NodeData || !n.NodeData || n.NodeData->GetParent != NodeData) return false;
4231 
4232  if (n.NodeData->GetNextSibling) n.NodeData->GetNextSibling->prev_sibling_c = n.NodeData->prev_sibling_c;
4233  else if (NodeData->GetFirstChild) NodeData->GetFirstChild->prev_sibling_c = n.NodeData->prev_sibling_c;
4234 
4235  if (n.NodeData->prev_sibling_c->GetNextSibling) n.NodeData->prev_sibling_c->GetNextSibling = n.NodeData->GetNextSibling;
4236  else NodeData->GetFirstChild = n.NodeData->GetNextSibling;
4237 
4238  internal::destroy_node(n.NodeData, internal::GetAllocator(NodeData));
4239 
4240  return true;
4241  }
4242 
4243  PUGI__FN Node Node::FindChildbyAttribute(const Char8* Name_, const Char8* AttrName, const Char8* AttrValue) const
4244  {
4245  if (!NodeData) return Node();
4246 
4247  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
4248  if (i->Name && internal::strequal(Name_, i->Name))
4249  {
4250  for (AttributeStruct* a = i->GetFirstAttribute; a; a = a->GetNextAttribute)
4251  if (internal::strequal(AttrName, a->Name) && internal::strequal(AttrValue, a->Value))
4252  return Node(i);
4253  }
4254 
4255  return Node();
4256  }
4257 
4258  PUGI__FN Node Node::FindChildbyAttribute(const Char8* AttrName, const Char8* AttrValue) const
4259  {
4260  if (!NodeData) return Node();
4261 
4262  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
4263  for (AttributeStruct* a = i->GetFirstAttribute; a; a = a->GetNextAttribute)
4264  if (internal::strequal(AttrName, a->Name) && internal::strequal(AttrValue, a->Value))
4265  return Node(i);
4266 
4267  return Node();
4268  }
4269 
4270  PUGI__FN String Node::Path(Char8 delimiter) const
4271  {
4272  Node cursor = *this; // Make a copy.
4273 
4274  String Result = cursor.Name();
4275 
4276  while (cursor.GetParent())
4277  {
4278  cursor = cursor.GetParent();
4279 
4280  String temp = cursor.Name();
4281  temp += delimiter;
4282  temp += Result;
4283  Result.swap(temp);
4284  }
4285 
4286  return Result;
4287  }
4288 
4289  PUGI__FN Node Node::FirstElementByPath(const Char8* Path_, Char8 delimiter) const
4290  {
4291  Node found = *this; // Current search context.
4292 
4293  if (!NodeData || !Path_ || !Path_[0]) return found;
4294 
4295  if (Path_[0] == delimiter)
4296  {
4297  // Absolute Path; e.g. '/foo/bar'
4298  found = found.GetRoot();
4299  ++Path_;
4300  }
4301 
4302  const Char8* Path_segment = Path_;
4303 
4304  while (*Path_segment == delimiter) ++Path_segment;
4305 
4306  const Char8* Path_segment_end = Path_segment;
4307 
4308  while (*Path_segment_end && *Path_segment_end != delimiter) ++Path_segment_end;
4309 
4310  if (Path_segment == Path_segment_end) return found;
4311 
4312  const Char8* NextSegment = Path_segment_end;
4313 
4314  while (*NextSegment == delimiter) ++NextSegment;
4315 
4316  if (*Path_segment == '.' && Path_segment + 1 == Path_segment_end)
4317  return found.FirstElementByPath(NextSegment, delimiter);
4318  else if (*Path_segment == '.' && *(Path_segment+1) == '.' && Path_segment + 2 == Path_segment_end)
4319  return found.GetParent().FirstElementByPath(NextSegment, delimiter);
4320  else
4321  {
4322  for (NodeStruct* j = found.NodeData->GetFirstChild; j; j = j->GetNextSibling)
4323  {
4324  if (j->Name && internal::strequalrange(j->Name, Path_segment, static_cast<size_t>(Path_segment_end - Path_segment)))
4325  {
4326  Node subsearch = Node(j).FirstElementByPath(NextSegment, delimiter);
4327 
4328  if (subsearch) return subsearch;
4329  }
4330  }
4331 
4332  return Node();
4333  }
4334  }
4335 
4336  PUGI__FN bool Node::Traverse(TreeWalker& walker)
4337  {
4338  walker.TraversalDepth = -1;
4339 
4340  Node arg_begin = *this;
4341  if (!walker.OnTraversalBegin(arg_begin)) return false;
4342 
4343  Node cur = GetFirstChild();
4344 
4345  if (cur)
4346  {
4347  ++walker.TraversalDepth;
4348 
4349  do
4350  {
4351  Node arg_for_each = cur;
4352  if (!walker.OnEachNode(arg_for_each))
4353  return false;
4354 
4355  if (cur.GetFirstChild())
4356  {
4357  ++walker.TraversalDepth;
4358  cur = cur.GetFirstChild();
4359  }
4360  else if (cur.GetNextSibling())
4361  cur = cur.GetNextSibling();
4362  else
4363  {
4364  // Borland C++ workaround
4365  while (!cur.GetNextSibling() && cur != *this && !cur.GetParent().Empty())
4366  {
4367  --walker.TraversalDepth;
4368  cur = cur.GetParent();
4369  }
4370 
4371  if (cur != *this)
4372  cur = cur.GetNextSibling();
4373  }
4374  }
4375  while (cur && cur != *this);
4376  }
4377 
4378  assert(walker.TraversalDepth == -1);
4379 
4380  Node arg_end = *this;
4381  return walker.OnTraversalEnd(arg_end);
4382  }
4383 
4384  PUGI__FN size_t Node::HashValue() const
4385  {
4386  return static_cast<size_t>(reinterpret_cast<uintptr_t>(NodeData) / sizeof(NodeStruct));
4387  }
4388 
4389  PUGI__FN NodeStruct* Node::InternalObject() const
4390  {
4391  return NodeData;
4392  }
4393 
4394  PUGI__FN void Node::Print(Writer& WriterInstance, const Char8* indent, unsigned int flags, Encoding DocumentEncoding, unsigned int Depth) const
4395  {
4396  if (!NodeData) return;
4397 
4398  internal::BufferedWriter buffered_WriterInstance(WriterInstance, DocumentEncoding);
4399 
4400  internal::NodeOutput(buffered_WriterInstance, *this, indent, flags, Depth);
4401  }
4402 
4403  PUGI__FN void Node::Print(std::basic_ostream<char, std::char_traits<char> >& stream, const Char8* indent, unsigned int flags, Encoding DocumentEncoding, unsigned int Depth) const
4404  {
4405  WriterStream WriterInstance(stream);
4406 
4407  Print(WriterInstance, indent, flags, DocumentEncoding, Depth);
4408  }
4409 
4410  PUGI__FN void Node::Print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const Char8* indent, unsigned int flags, unsigned int Depth) const
4411  {
4412  WriterStream WriterInstance(stream);
4413 
4414  Print(WriterInstance, indent, flags, Encodingwchar_t, Depth);
4415  }
4416 
4417  PUGI__FN ptrdiff_t Node::OffSetDebug() const
4418  {
4419  NodeStruct* r = GetRoot().NodeData;
4420 
4421  if (!r) return -1;
4422 
4423  const Char8* buffer = static_cast<internal::DocumentStruct*>(r)->buffer;
4424 
4425  if (!buffer) return -1;
4426 
4427  switch (Type())
4428  {
4429  case NodeDocument:
4430  return 0;
4431 
4432  case NodeElement:
4433  case NodeDeclaration:
4434  case NodePi:
4435  return (NodeData->header & internal::MemoryPage_Name_allocated_mask) ? -1 : NodeData->Name - buffer;
4436 
4437  case NodePcdata:
4438  case NodeCdata:
4439  case NodeComment:
4440  case NodeDocType:
4441  return (NodeData->header & internal::MemoryPage_Value_allocated_mask) ? -1 : NodeData->Value - buffer;
4442 
4443  default:
4444  return -1;
4445  }
4446  }
4447 
4448 #ifdef __BORLANDC__
4449  PUGI__FN bool operator&&(const Node& lhs, bool rhs)
4450  {
4451  return (bool)lhs && rhs;
4452  }
4453 
4454  PUGI__FN bool operator||(const Node& lhs, bool rhs)
4455  {
4456  return (bool)lhs || rhs;
4457  }
4458 #endif
4459 
4460  PUGI__FN NodeText::NodeText(NodeStruct* OtherRoot): RootNode(OtherRoot)
4461  {
4462  }
4463 
4464  PUGI__FN NodeStruct* NodeText::Data() const
4465  {
4466  if (!RootNode || internal::is_text_node(RootNode)) return RootNode;
4467 
4468  for (NodeStruct* node = RootNode->GetFirstChild; node; node = node->GetNextSibling)
4469  if (internal::is_text_node(node))
4470  return node;
4471 
4472  return 0;
4473  }
4474 
4475  PUGI__FN NodeStruct* NodeText::DataNew()
4476  {
4477  NodeStruct* d = Data();
4478  if (d) return d;
4479 
4480  return Node(RootNode).AppendChild(NodePcdata).InternalObject();
4481  }
4482 
4483  PUGI__FN NodeText::NodeText(): RootNode(0)
4484  {
4485  }
4486 
4487  PUGI__FN static void unspecified_bool_Text(NodeText***)
4488  {
4489  }
4490 
4491  PUGI__FN NodeText::operator NodeText::unspecified_bool_type() const
4492  {
4493  return Data() ? unspecified_bool_Text : 0;
4494  }
4495 
4496  PUGI__FN bool NodeText::operator!() const
4497  {
4498  return !Data();
4499  }
4500 
4501  PUGI__FN bool NodeText::Empty() const
4502  {
4503  return Data() == 0;
4504  }
4505 
4506  PUGI__FN const Char8* NodeText::GetString() const
4507  {
4508  NodeStruct* d = Data();
4509 
4510  return (d && d->Value) ? d->Value : "";
4511  }
4512 
4513  PUGI__FN const Char8* NodeText::AsString(const Char8* def) const
4514  {
4515  NodeStruct* d = Data();
4516 
4517  return (d && d->Value) ? d->Value : def;
4518  }
4519 
4520  PUGI__FN int NodeText::AsInt(int def) const
4521  {
4522  NodeStruct* d = Data();
4523 
4524  return internal::GetValue_int(d ? d->Value : 0, def);
4525  }
4526 
4527  PUGI__FN unsigned int NodeText::AsUint(unsigned int def) const
4528  {
4529  NodeStruct* d = Data();
4530 
4531  return internal::GetValue_uint(d ? d->Value : 0, def);
4532  }
4533 
4534  PUGI__FN double NodeText::AsDouble(double def) const
4535  {
4536  NodeStruct* d = Data();
4537 
4538  return internal::GetValue_double(d ? d->Value : 0, def);
4539  }
4540 
4541  PUGI__FN float NodeText::AsFloat(float def) const
4542  {
4543  NodeStruct* d = Data();
4544 
4545  return internal::GetValue_float(d ? d->Value : 0, def);
4546  }
4547 
4548  PUGI__FN Real NodeText::AsReal(Real def) const
4549  {
4550  return AsFloat(def);
4551  }
4552 
4553  PUGI__FN Whole NodeText::AsWhole(Whole def) const
4554  {
4555  return AsUint(def);
4556  }
4557 
4558  PUGI__FN Integer NodeText::AsInteger(Integer def) const
4559  {
4560  return AsInt(def);
4561  }
4562 
4563  PUGI__FN bool NodeText::AsBool(bool def) const
4564  {
4565  NodeStruct* d = Data();
4566 
4567  return internal::GetValue_bool(d ? d->Value : 0, def);
4568  }
4569 
4570  PUGI__FN bool NodeText::Set(const Char8* rhs)
4571  {
4572  NodeStruct* dn = DataNew();
4573 
4574  return dn ? internal::strcpy_insitu(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4575  }
4576 
4577  PUGI__FN bool NodeText::Set(int rhs)
4578  {
4579  NodeStruct* dn = DataNew();
4580 
4581  return dn ? internal::SetValue_convert(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4582  }
4583 
4584  PUGI__FN bool NodeText::Set(unsigned int rhs)
4585  {
4586  NodeStruct* dn = DataNew();
4587 
4588  return dn ? internal::SetValue_convert(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4589  }
4590 
4591  PUGI__FN bool NodeText::Set(double rhs)
4592  {
4593  NodeStruct* dn = DataNew();
4594 
4595  return dn ? internal::SetValue_convert(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4596  }
4597 
4598  PUGI__FN bool NodeText::Set(bool rhs)
4599  {
4600  NodeStruct* dn = DataNew();
4601 
4602  return dn ? internal::SetValue_convert(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4603  }
4604 
4605  PUGI__FN NodeText& NodeText::operator=(const Char8* rhs)
4606  {
4607  Set(rhs);
4608  return *this;
4609  }
4610 
4611  PUGI__FN NodeText& NodeText::operator=(int rhs)
4612  {
4613  Set(rhs);
4614  return *this;
4615  }
4616 
4617  PUGI__FN NodeText& NodeText::operator=(unsigned int rhs)
4618  {
4619  Set(rhs);
4620  return *this;
4621  }
4622 
4623  PUGI__FN NodeText& NodeText::operator=(double rhs)
4624  {
4625  Set(rhs);
4626  return *this;
4627  }
4628 
4629  PUGI__FN NodeText& NodeText::operator=(bool rhs)
4630  {
4631  Set(rhs);
4632  return *this;
4633  }
4634 
4635  PUGI__FN Node NodeText::data() const
4636  {
4637  return Node(Data());
4638  }
4639 
4640 #ifdef __BORLANDC__
4641  PUGI__FN bool operator&&(const Text& lhs, bool rhs)
4642  {
4643  return (bool)lhs && rhs;
4644  }
4645 
4646  PUGI__FN bool operator||(const Text& lhs, bool rhs)
4647  {
4648  return (bool)lhs || rhs;
4649  }
4650 #endif
4651 
4652  PUGI__FN NodeIterator::NodeIterator()
4653  {
4654  }
4655 
4656  PUGI__FN NodeIterator::NodeIterator(const Node& node): TargetNode(node), ParentNode(node.GetParent())
4657  {
4658  }
4659 
4660  PUGI__FN NodeIterator::NodeIterator(NodeStruct* ref, NodeStruct* ParentNode): TargetNode(ref), ParentNode(ParentNode)
4661  {
4662  }
4663 
4664  PUGI__FN bool NodeIterator::operator==(const NodeIterator& rhs) const
4665  {
4666  return TargetNode.NodeData == rhs.TargetNode.NodeData && ParentNode.NodeData == rhs.ParentNode.NodeData;
4667  }
4668 
4669  PUGI__FN bool NodeIterator::operator!=(const NodeIterator& rhs) const
4670  {
4671  return TargetNode.NodeData != rhs.TargetNode.NodeData || ParentNode.NodeData != rhs.ParentNode.NodeData;
4672  }
4673 
4674  PUGI__FN Node& NodeIterator::operator*() const
4675  {
4676  assert(TargetNode.NodeData);
4677  return TargetNode;
4678  }
4679 
4680  PUGI__FN Node* NodeIterator::operator->() const
4681  {
4682  assert(TargetNode.NodeData);
4683  return const_cast<Node*>(&TargetNode); // BCC32 workaround
4684  }
4685 
4686  PUGI__FN const NodeIterator& NodeIterator::operator++()
4687  {
4688  assert(TargetNode.NodeData);
4689  TargetNode.NodeData = TargetNode.NodeData->GetNextSibling;
4690  return *this;
4691  }
4692 
4693  PUGI__FN NodeIterator NodeIterator::operator++(int)
4694  {
4695  NodeIterator temp = *this;
4696  ++*this;
4697  return temp;
4698  }
4699 
4700  PUGI__FN const NodeIterator& NodeIterator::operator--()
4701  {
4702  TargetNode = TargetNode.NodeData ? TargetNode.GetPreviousSibling() : ParentNode.GetLastChild();
4703  return *this;
4704  }
4705 
4706  PUGI__FN NodeIterator NodeIterator::operator--(int)
4707  {
4708  NodeIterator temp = *this;
4709  --*this;
4710  return temp;
4711  }
4712 
4714  {
4715  }
4716 
4717  PUGI__FN AttributeIterator::AttributeIterator(const Attribute& attr, const Node& GetParent): TargetAttribute(attr), ParentNode(GetParent)
4718  {
4719  }
4720 
4721  PUGI__FN AttributeIterator::AttributeIterator(AttributeStruct* ref, NodeStruct* GetParent): TargetAttribute(ref), ParentNode(GetParent)
4722  {
4723  }
4724 
4725  PUGI__FN bool AttributeIterator::operator==(const AttributeIterator& rhs) const
4726  {
4727  return TargetAttribute.AttributeData == rhs.TargetAttribute.AttributeData && ParentNode.NodeData == rhs.ParentNode.NodeData;
4728  }
4729 
4730  PUGI__FN bool AttributeIterator::operator!=(const AttributeIterator& rhs) const
4731  {
4732  return TargetAttribute.AttributeData != rhs.TargetAttribute.AttributeData || ParentNode.NodeData != rhs.ParentNode.NodeData;
4733  }
4734 
4735  PUGI__FN Attribute& AttributeIterator::operator*() const
4736  {
4737  assert(TargetAttribute.AttributeData);
4738  return TargetAttribute;
4739  }
4740 
4741  PUGI__FN Attribute* AttributeIterator::operator->() const
4742  {
4743  assert(TargetAttribute.AttributeData);
4744  return const_cast<Attribute*>(&TargetAttribute); // BCC32 workaround
4745  }
4746 
4747  PUGI__FN const AttributeIterator& AttributeIterator::operator++()
4748  {
4749  assert(TargetAttribute.AttributeData);
4750  TargetAttribute.AttributeData = TargetAttribute.AttributeData->GetNextAttribute;
4751  return *this;
4752  }
4753 
4754  PUGI__FN AttributeIterator AttributeIterator::operator++(int)
4755  {
4756  AttributeIterator temp = *this;
4757  ++*this;
4758  return temp;
4759  }
4760 
4761  PUGI__FN const AttributeIterator& AttributeIterator::operator--()
4762  {
4763  TargetAttribute = TargetAttribute.AttributeData ? TargetAttribute.GetPreviousAttribute() : ParentNode.GetLastAttribute();
4764  return *this;
4765  }
4766 
4767  PUGI__FN AttributeIterator AttributeIterator::operator--(int)
4768  {
4769  AttributeIterator temp = *this;
4770  --*this;
4771  return temp;
4772  }
4773 
4774  PUGI__FN NamedNodeIterator::NamedNodeIterator(): TargetName(0)
4775  {
4776  }
4777 
4778  PUGI__FN NamedNodeIterator::NamedNodeIterator(const Node& node, const Char8* Name): TargetNode(node), TargetName(Name)
4779  {
4780  }
4781 
4782  PUGI__FN bool NamedNodeIterator::operator==(const NamedNodeIterator& rhs) const
4783  {
4784  return TargetNode == rhs.TargetNode;
4785  }
4786 
4787  PUGI__FN bool NamedNodeIterator::operator!=(const NamedNodeIterator& rhs) const
4788  {
4789  return TargetNode != rhs.TargetNode;
4790  }
4791 
4792  PUGI__FN Node& NamedNodeIterator::operator*() const
4793  {
4794  assert(TargetNode.NodeData);
4795  return TargetNode;
4796  }
4797 
4798  PUGI__FN Node* NamedNodeIterator::operator->() const
4799  {
4800  assert(TargetNode.NodeData);
4801  return const_cast<Node*>(&TargetNode); // BCC32 workaround
4802  }
4803 
4804  PUGI__FN const NamedNodeIterator& NamedNodeIterator::operator++()
4805  {
4806  assert(TargetNode.NodeData);
4807  TargetNode = TargetNode.GetNextSibling(TargetName);
4808  return *this;
4809  }
4810 
4811  PUGI__FN NamedNodeIterator NamedNodeIterator::operator++(int)
4812  {
4813  NamedNodeIterator temp = *this;
4814  ++*this;
4815  return temp;
4816  }
4817 
4818  PUGI__FN ParseResult::ParseResult(): Status(StatusInternalError), Offset(0), DocumentEncoding(EncodingAuto)
4819  {
4820  }
4821 
4822  PUGI__FN ParseResult::operator bool() const
4823  {
4824  return Status == StatusOk;
4825  }
4826 
4827  PUGI__FN const char* ParseResult::Description() const
4828  {
4829  switch (Status)
4830  {
4831  case StatusOk: return "No error";
4832 
4833  case StatusFileNotFound: return "File was not found";
4834  case StatusIOError: return "Error reading from file/stream";
4835  case StatusOutOfMemory: return "Could not allocate memory";
4836  case StatusInternalError: return "Internal error occurred";
4837 
4838  case StatusUnrecognizedTag: return "Could not determine tag Type";
4839 
4840  case StatusBadProcessingInstruction: return "Error parsing document declaration/processing instruction";
4841  case StatusBadComment: return "Error parsing comment";
4842  case StatusBadCdata: return "Error parsing CDATA section";
4843  case StatusBadDocType: return "Error parsing document Type declaration";
4844  case StatusBadPcdata: return "Error parsing PCDATA section";
4845  case StatusBadStartElement: return "Error parsing start element tag";
4846  case StatusBadAttribute: return "Error parsing element GetAttribute";
4847  case StatusBadEndElement: return "Error parsing end element tag";
4848  case StatusEndElementMismatch: return "Start-end tags mismatch";
4849 
4850  default: return "Unknown error";
4851  }
4852  }
4853 
4854  PUGI__FN Document::Document(): _buffer(0)
4855  {
4856  create();
4857  }
4858 
4859  PUGI__FN Document::~Document()
4860  {
4861  destroy();
4862  }
4863 
4864  PUGI__FN void Document::Reset()
4865  {
4866  destroy();
4867  create();
4868  }
4869 
4870  PUGI__FN void Document::Reset(const Document& proto)
4871  {
4872  Reset();
4873 
4874  for (Node cur = proto.GetFirstChild(); cur; cur = cur.GetNextSibling())
4875  AppendCopy(cur);
4876  }
4877 
4878  PUGI__FN void Document::create()
4879  {
4880  // initialize sentinel page
4881  PUGI__STATIC_ASSERT(offsetof(internal::MemoryPage, data) + sizeof(internal::DocumentStruct) + internal::MemoryPage_alignment <= sizeof(_memory));
4882 
4883  // align upwards to page boundary
4884  void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (internal::MemoryPage_alignment - 1)) & ~(internal::MemoryPage_alignment - 1));
4885 
4886  // prepare page structure
4887  internal::MemoryPage* page = internal::MemoryPage::construct(page_memory);
4888 
4889  page->busy_size = internal::MemoryPage_size;
4890 
4891  // allocate new GetRoot
4892  NodeData = new (page->data) internal::DocumentStruct(page);
4893  NodeData->prev_sibling_c = NodeData;
4894 
4895  // setup sentinel page
4896  page->allocator = static_cast<internal::DocumentStruct*>(NodeData);
4897  }
4898 
4899  PUGI__FN void Document::destroy()
4900  {
4901  // destroy static storage
4902  if (_buffer)
4903  {
4904  internal::Memory::deallocate(_buffer);
4905  _buffer = 0;
4906  }
4907 
4908  // destroy dynamic storage, leave sentinel page (it's in static memory)
4909  if (NodeData)
4910  {
4911  internal::MemoryPage* GetRoot_page = reinterpret_cast<internal::MemoryPage*>(NodeData->header & internal::MemoryPage_pointer_mask);
4912  assert(GetRoot_page && !GetRoot_page->prev && !GetRoot_page->memory);
4913 
4914  // destroy all pages
4915  for (internal::MemoryPage* page = GetRoot_page->next; page; )
4916  {
4917  internal::MemoryPage* next = page->next;
4918 
4919  internal::Allocator::deallocate_page(page);
4920 
4921  page = next;
4922  }
4923 
4924  // cleanup GetRoot page
4925  GetRoot_page->allocator = 0;
4926  GetRoot_page->next = 0;
4927  GetRoot_page->busy_size = GetRoot_page->freed_size = 0;
4928 
4929  NodeData = 0;
4930  }
4931  }
4932 
4933  PUGI__FN ParseResult Document::Load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, Encoding DocumentEncoding)
4934  {
4935  Reset();
4936 
4937  return internal::LoadStreamImpl(*this, stream, options, DocumentEncoding);
4938  }
4939 
4940  PUGI__FN ParseResult Document::Load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
4941  {
4942  Reset();
4943 
4944  return internal::LoadStreamImpl(*this, stream, options, Encodingwchar_t);
4945  }
4946 
4947  PUGI__FN ParseResult Document::Load(const Char8* contents, unsigned int options)
4948  {
4949  // Force native DocumentEncoding (skip autodetection)
4950  Encoding DocumentEncoding = EncodingUTF8;
4951 
4952  return LoadBuffer(contents, internal::strlength(contents) * sizeof(Char8), options, DocumentEncoding);
4953  }
4954 
4955  PUGI__FN ParseResult Document::LoadFile(const char* Path_, unsigned int options, Encoding DocumentEncoding)
4956  {
4957  Reset();
4958 
4959  FILE* file = fopen(Path_, "rb");
4960 
4961  return internal::LoadFileImpl(*this, file, options, DocumentEncoding);
4962  }
4963 
4964  PUGI__FN ParseResult Document::LoadFile(const wchar_t* Path_, unsigned int options, Encoding DocumentEncoding)
4965  {
4966  Reset();
4967 
4968  FILE* file = internal::open_file_wide(Path_, L"rb");
4969 
4970  return internal::LoadFileImpl(*this, file, options, DocumentEncoding);
4971  }
4972 
4973  PUGI__FN ParseResult Document::LoadBufferImpl(void* contents, size_t size, unsigned int options, Encoding DocumentEncoding, bool is_mutable, bool own)
4974  {
4975  Reset();
4976 
4977  // check input buffer
4978  assert(contents || size == 0);
4979 
4980  // get actual DocumentEncoding
4981  Encoding buffer_DocumentEncoding = internal::GetBuffer_DocumentEncoding(DocumentEncoding, contents, size);
4982 
4983  // get private buffer
4984  Char8* buffer = 0;
4985  size_t length = 0;
4986 
4987  if (!internal::convert_buffer(buffer, length, buffer_DocumentEncoding, contents, size, is_mutable)) return internal::make_ParseResult(StatusOutOfMemory);
4988 
4989  // delete original buffer if we performed a conversion
4990  if (own && buffer != contents && contents) internal::Memory::deallocate(contents);
4991 
4992  // parse
4993  ParseResult res = internal::Parser::parse(buffer, length, NodeData, options);
4994 
4995  // remember DocumentEncoding
4996  res.DocumentEncoding = buffer_DocumentEncoding;
4997 
4998  // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
4999  if (own || buffer != contents) _buffer = buffer;
5000 
5001  return res;
5002  }
5003 
5004  PUGI__FN ParseResult Document::LoadBuffer(const void* contents, size_t size, unsigned int options, Encoding DocumentEncoding)
5005  {
5006  return LoadBufferImpl(const_cast<void*>(contents), size, options, DocumentEncoding, false, false);
5007  }
5008 
5009  PUGI__FN ParseResult Document::LoadBufferInplace(void* contents, size_t size, unsigned int options, Encoding DocumentEncoding)
5010  {
5011  return LoadBufferImpl(contents, size, options, DocumentEncoding, true, false);
5012  }
5013 
5014  PUGI__FN ParseResult Document::LoadBufferInplaceOwn(void* contents, size_t size, unsigned int options, Encoding DocumentEncoding)
5015  {
5016  return LoadBufferImpl(contents, size, options, DocumentEncoding, true, true);
5017  }
5018 
5019  PUGI__FN void Document::Save(Writer& WriterInstance, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
5020  {
5021  internal::BufferedWriter buffered_WriterInstance(WriterInstance, DocumentEncoding);
5022 
5023  if ((flags & FormatWriteBom) && DocumentEncoding != EncodingLatin1)
5024  {
5025  // BOM always represents the codepoint U+FEFF, so just Write it in native DocumentEncoding
5026  buffered_WriterInstance.Write('\xef', '\xbb', '\xbf');
5027  }
5028 
5029  if (!(flags & FormatNoDeclaration) && !internal::hAsDeclaration(*this))
5030  {
5031  buffered_WriterInstance.Write("<?xml version=\"1.0\"");
5032  if (DocumentEncoding == EncodingLatin1) buffered_WriterInstance.Write(" DocumentEncoding=\"ISO-8859-1\"");
5033  buffered_WriterInstance.Write('?', '>');
5034  if (!(flags & FormatRaw)) buffered_WriterInstance.Write('\n');
5035  }
5036 
5037  internal::NodeOutput(buffered_WriterInstance, *this, indent, flags, 0);
5038  }
5039 
5040  PUGI__FN void Document::Save(std::basic_ostream<char, std::char_traits<char> >& stream, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
5041  {
5042  WriterStream WriterInstance(stream);
5043 
5044  Save(WriterInstance, indent, flags, DocumentEncoding);
5045  }
5046 
5047  PUGI__FN void Document::Save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const Char8* indent, unsigned int flags) const
5048  {
5049  WriterStream WriterInstance(stream);
5050 
5051  Save(WriterInstance, indent, flags, Encodingwchar_t);
5052  }
5053 
5054  PUGI__FN bool Document::SaveFile(const char* Path_, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
5055  {
5056  FILE* file = fopen(Path_, (flags & FormatSaveFileText) ? "w" : "wb");
5057  return internal::SaveFileImpl(*this, file, indent, flags, DocumentEncoding);
5058  }
5059 
5060  PUGI__FN bool Document::SaveFile(const wchar_t* Path_, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
5061  {
5062  FILE* file = internal::open_file_wide(Path_, (flags & FormatSaveFileText) ? L"w" : L"wb");
5063  return internal::SaveFileImpl(*this, file, indent, flags, DocumentEncoding);
5064  }
5065 
5066  PUGI__FN Node Document::DocumentElement() const
5067  {
5068  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
5069  if ((i->header & internal::MemoryPage_type_mask) + 1 == NodeElement)
5070  return Node(i);
5071 
5072  return Node();
5073  }
5074 
5075  PUGI__FN std::string MEZZ_LIB AsUtf8(const wchar_t* str)
5076  {
5077  assert(str);
5078 
5079  return internal::AsUtf8_impl(str, wcslen(str));
5080  }
5081 
5082  PUGI__FN std::string MEZZ_LIB AsUtf8(const std::basic_string<wchar_t>& str)
5083  {
5084  return internal::AsUtf8_impl(str.c_str(), str.size());
5085  }
5086 
5087  PUGI__FN std::basic_string<wchar_t> MEZZ_LIB AsWide(const char* str)
5088  {
5089  assert(str);
5090 
5091  return internal::AsWide_impl(str, strlen(str));
5092  }
5093 
5094  PUGI__FN std::basic_string<wchar_t> MEZZ_LIB AsWide(const std::string& str)
5095  {
5096  return internal::AsWide_impl(str.c_str(), str.size());
5097  }
5098 
5099 
5101  {
5102  internal::Memory::allocate = allocate;
5103  internal::Memory::deallocate = deallocate;
5104  }
5105 
5107  {
5108  return internal::Memory::allocate;
5109  }
5110 
5112  {
5113  return internal::Memory::deallocate;
5114  }
5115 }
5116 
5117 
5118 
5119 // STL replacements
5120 PUGI__NS_BEGIN
5121  struct equal_to
5122  {
5123  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5124  {
5125  return lhs == rhs;
5126  }
5127  };
5128 
5129  struct not_equal_to
5130  {
5131  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5132  {
5133  return lhs != rhs;
5134  }
5135  };
5136 
5137  struct less
5138  {
5139  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5140  {
5141  return lhs < rhs;
5142  }
5143  };
5144 
5145  struct less_equal
5146  {
5147  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5148  {
5149  return lhs <= rhs;
5150  }
5151  };
5152 
5153  template <typename T> void swap(T& lhs, T& rhs)
5154  {
5155  T temp = lhs;
5156  lhs = rhs;
5157  rhs = temp;
5158  }
5159 
5160  template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
5161  {
5162  I Result = begin;
5163 
5164  for (I it = begin + 1; it != end; ++it)
5165  if (pred(*it, *Result))
5166  Result = it;
5167 
5168  return Result;
5169  }
5170 
5171  template <typename I> void reverse(I begin, I end)
5172  {
5173  while (begin + 1 < end) swap(*begin++, *--end);
5174  }
5175 
5176  template <typename I> I unique(I begin, I end)
5177  {
5178  // fast skip head
5179  while (begin + 1 < end && *begin != *(begin + 1)) begin++;
5180 
5181  if (begin == end) return begin;
5182 
5183  // last written element
5184  I Write = begin++;
5185 
5186  // merge unique elements
5187  while (begin != end)
5188  {
5189  if (*begin != *Write)
5190  *++Write = *begin++;
5191  else
5192  begin++;
5193  }
5194 
5195  // past-the-end (Write points to live element)
5196  return Write + 1;
5197  }
5198 
5199  template <typename I> void copy_backwards(I begin, I end, I target)
5200  {
5201  while (begin != end) *--target = *--end;
5202  }
5203 
5204  template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
5205  {
5206  assert(begin != end);
5207 
5208  for (I it = begin + 1; it != end; ++it)
5209  {
5210  T val = *it;
5211 
5212  if (pred(val, *begin))
5213  {
5214  // move to front
5215  copy_backwards(begin, it, it + 1);
5216  *begin = val;
5217  }
5218  else
5219  {
5220  I hole = it;
5221 
5222  // move hole backwards
5223  while (pred(val, *(hole - 1)))
5224  {
5225  *hole = *(hole - 1);
5226  hole--;
5227  }
5228 
5229  // fill hole with element
5230  *hole = val;
5231  }
5232  }
5233  }
5234 
5235  // std variant for elements with ==
5236  template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
5237  {
5238  I eqbeg = middle, eqend = middle + 1;
5239 
5240  // expand equal range
5241  while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
5242  while (eqend != end && *eqend == *eqbeg) ++eqend;
5243 
5244  // process outer elements
5245  I ltend = eqbeg, gtbeg = eqend;
5246 
5247  for (;;)
5248  {
5249  // find the element from the right side that belongs to the left one
5250  for (; gtbeg != end; ++gtbeg)
5251  if (!pred(*eqbeg, *gtbeg))
5252  {
5253  if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
5254  else break;
5255  }
5256 
5257  // find the element from the left side that belongs to the right one
5258  for (; ltend != begin; --ltend)
5259  if (!pred(*(ltend - 1), *eqbeg))
5260  {
5261  if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
5262  else break;
5263  }
5264 
5265  // scanned all elements
5266  if (gtbeg == end && ltend == begin)
5267  {
5268  *out_eqbeg = eqbeg;
5269  *out_eqend = eqend;
5270  return;
5271  }
5272 
5273  // make room for elements by moving equal area
5274  if (gtbeg == end)
5275  {
5276  if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
5277  swap(*eqbeg, *--eqend);
5278  }
5279  else if (ltend == begin)
5280  {
5281  if (eqend != gtbeg) swap(*eqbeg, *eqend);
5282  ++eqend;
5283  swap(*gtbeg++, *eqbeg++);
5284  }
5285  else swap(*gtbeg++, *--ltend);
5286  }
5287  }
5288 
5289  template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
5290  {
5291  if (pred(*middle, *first)) swap(*middle, *first);
5292  if (pred(*last, *middle)) swap(*last, *middle);
5293  if (pred(*middle, *first)) swap(*middle, *first);
5294  }
5295 
5296  template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
5297  {
5298  if (last - first <= 40)
5299  {
5300  // median of three for small chunks
5301  median3(first, middle, last, pred);
5302  }
5303  else
5304  {
5305  // median of nine
5306  size_t step = (last - first + 1) / 8;
5307 
5308  median3(first, first + step, first + 2 * step, pred);
5309  median3(middle - step, middle, middle + step, pred);
5310  median3(last - 2 * step, last - step, last, pred);
5311  median3(first + step, middle, last - step, pred);
5312  }
5313  }
5314 
5315  template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
5316  {
5317  // sort large chunks
5318  while (end - begin > 32)
5319  {
5320  // find median element
5321  I middle = begin + (end - begin) / 2;
5322  median(begin, middle, end - 1, pred);
5323 
5324  // partition in three chunks (< = >)
5325  I eqbeg, eqend;
5326  partition(begin, middle, end, pred, &eqbeg, &eqend);
5327 
5328  // loop on larger half
5329  if (eqbeg - begin > end - eqend)
5330  {
5331  sort(eqend, end, pred);
5332  end = eqbeg;
5333  }
5334  else
5335  {
5336  sort(begin, eqbeg, pred);
5337  begin = eqend;
5338  }
5339  }
5340 
5341  // insertion sort small chunk
5342  if (begin != end) insertion_sort(begin, end, pred, &*begin);
5343  }
5344 PUGI__NS_END
5345 
5346 // Allocator used for AST and evaluation stacks
5347 PUGI__NS_BEGIN
5348  struct XPathMemoryBlock
5349  {
5350  XPathMemoryBlock* next;
5351 
5352  char data[
5353  #ifdef XML_MEMORY_XPATH_PAGE_SIZE
5354  XML_MEMORY_XPATH_PAGE_SIZE
5355  #else
5356  4096
5357  #endif
5358  ];
5359  };
5360 
5361  class XPathAllocator
5362  {
5363  XPathMemoryBlock* _GetRoot;
5364  size_t _GetRoot_size;
5365 
5366  public:
5367 
5368 
5369  XPathAllocator(XPathMemoryBlock* GetRoot, size_t GetRoot_size = 0): _GetRoot(GetRoot), _GetRoot_size(GetRoot_size)
5370  {
5371 
5372  }
5373 
5374  void* allocate_nothrow(size_t size)
5375  {
5376  const size_t block_capacity = sizeof(_GetRoot->data);
5377 
5378  // align size so that we're able to store pointers in subsequent blocks
5379  size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5380 
5381  if (_GetRoot_size + size <= block_capacity)
5382  {
5383  void* buf = _GetRoot->data + _GetRoot_size;
5384  _GetRoot_size += size;
5385  return buf;
5386  }
5387  else
5388  {
5389  size_t block_data_size = (size > block_capacity) ? size : block_capacity;
5390  size_t block_size = block_data_size + offsetof(XPathMemoryBlock, data);
5391 
5392  XPathMemoryBlock* block = static_cast<XPathMemoryBlock*>(Memory::allocate(block_size));
5393  if (!block) return 0;
5394 
5395  block->next = _GetRoot;
5396 
5397  _GetRoot = block;
5398  _GetRoot_size = size;
5399 
5400  return block->data;
5401  }
5402  }
5403 
5404  void* allocate(size_t size)
5405  {
5406  void* Result = allocate_nothrow(size);
5407 
5408  if (!Result)
5409  {
5410  throw std::bad_alloc();
5411  }
5412 
5413  return Result;
5414  }
5415 
5416  void* reallocate(void* ptr, size_t old_size, size_t new_size)
5417  {
5418  // align size so that we're able to store pointers in subsequent blocks
5419  old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5420  new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5421 
5422  // we can only reallocate the last object
5423  assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _GetRoot->data + _GetRoot_size);
5424 
5425  // adjust GetRoot size so that we have not allocated the object at all
5426  bool only_object = (_GetRoot_size == old_size);
5427 
5428  if (ptr) _GetRoot_size -= old_size;
5429 
5430  // allocate a new version (this will obviously reuse the memory if possible)
5431  void* Result = allocate(new_size);
5432  assert(Result);
5433 
5434  // we have a new block
5435  if (Result != ptr && ptr)
5436  {
5437  // copy old data
5438  assert(new_size > old_size);
5439  memcpy(Result, ptr, old_size);
5440 
5441  // free the previous page if it had no other objects
5442  if (only_object)
5443  {
5444  assert(_GetRoot->data == Result);
5445  assert(_GetRoot->next);
5446 
5447  XPathMemoryBlock* next = _GetRoot->next->next;
5448 
5449  if (next)
5450  {
5451  // deallocate the whole page, unless it was the first one
5452  Memory::deallocate(_GetRoot->next);
5453  _GetRoot->next = next;
5454  }
5455  }
5456  }
5457 
5458  return Result;
5459  }
5460 
5461  void revert(const XPathAllocator& state)
5462  {
5463  // free all new pages
5464  XPathMemoryBlock* cur = _GetRoot;
5465 
5466  while (cur != state._GetRoot)
5467  {
5468  XPathMemoryBlock* next = cur->next;
5469 
5470  Memory::deallocate(cur);
5471 
5472  cur = next;
5473  }
5474 
5475  // restore state
5476  _GetRoot = state._GetRoot;
5477  _GetRoot_size = state._GetRoot_size;
5478  }
5479 
5480  void release()
5481  {
5482  XPathMemoryBlock* cur = _GetRoot;
5483  assert(cur);
5484 
5485  while (cur->next)
5486  {
5487  XPathMemoryBlock* next = cur->next;
5488 
5489  Memory::deallocate(cur);
5490 
5491  cur = next;
5492  }
5493  }
5494  };
5495 
5496  struct XPathAllocatorCapture
5497  {
5498  XPathAllocatorCapture(XPathAllocator* alloc): _target(alloc), _state(*alloc)
5499  {
5500  }
5501 
5502  ~XPathAllocatorCapture()
5503  {
5504  _target->revert(_state);
5505  }
5506 
5507  XPathAllocator* _target;
5508  XPathAllocator _state;
5509  };
5510 
5511  struct XPathStack
5512  {
5513  XPathAllocator* Result;
5514  XPathAllocator* temp;
5515  };
5516 
5517  struct XPathStackData
5518  {
5519  XPathMemoryBlock blocks[2];
5520  XPathAllocator Result;
5521  XPathAllocator temp;
5522  XPathStack stack;
5523 
5524  XPathStackData(): Result(blocks + 0), temp(blocks + 1)
5525  {
5526  blocks[0].next = blocks[1].next = 0;
5527 
5528  stack.Result = &Result;
5529  stack.temp = &temp;
5530 
5531  }
5532 
5533  ~XPathStackData()
5534  {
5535  Result.release();
5536  temp.release();
5537  }
5538  };
5539 PUGI__NS_END
5540 
5541 // String class
5542 PUGI__NS_BEGIN
5543  class XPathString
5544  {
5545  const Char8* _buffer;
5546  bool _uses_heap;
5547 
5548  static Char8* duplicate_string(const Char8* string, size_t length, XPathAllocator* alloc)
5549  {
5550  Char8* Result = static_cast<Char8*>(alloc->allocate((length + 1) * sizeof(Char8)));
5551  assert(Result);
5552 
5553  memcpy(Result, string, length * sizeof(Char8));
5554  Result[length] = 0;
5555 
5556  return Result;
5557  }
5558 
5559  static Char8* duplicate_string(const Char8* string, XPathAllocator* alloc)
5560  {
5561  return duplicate_string(string, strlength(string), alloc);
5562  }
5563 
5564  public:
5565  XPathString(): _buffer(""), _uses_heap(false)
5566  {
5567  }
5568 
5569  explicit XPathString(const Char8* str, XPathAllocator* alloc)
5570  {
5571  bool empty_ = (*str == 0);
5572 
5573  _buffer = empty_ ? "" : duplicate_string(str, alloc);
5574  _uses_heap = !empty_;
5575  }
5576 
5577  explicit XPathString(const Char8* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
5578  {
5579  }
5580 
5581  XPathString(const Char8* begin, const Char8* end, XPathAllocator* alloc)
5582  {
5583  assert(begin <= end);
5584 
5585  bool empty_ = (begin == end);
5586 
5587  _buffer = empty_ ? "" : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
5588  _uses_heap = !empty_;
5589  }
5590 
5591  void append(const XPathString& o, XPathAllocator* alloc)
5592  {
5593  // skip empty sources
5594  if (!*o._buffer) return;
5595 
5596  // fast append for constant empty target and constant source
5597  if (!*_buffer && !_uses_heap && !o._uses_heap)
5598  {
5599  _buffer = o._buffer;
5600  }
5601  else
5602  {
5603  // need to make heap copy
5604  size_t tarGetLength = strlength(_buffer);
5605  size_t source_length = strlength(o._buffer);
5606  size_t Result_length = tarGetLength + source_length;
5607 
5608  // allocate new buffer
5609  Char8* Result = static_cast<Char8*>(alloc->reallocate(_uses_heap ? const_cast<Char8*>(_buffer) : 0, (tarGetLength + 1) * sizeof(Char8), (Result_length + 1) * sizeof(Char8)));
5610  assert(Result);
5611 
5612  // append first string to the new buffer in case there was no reallocation
5613  if (!_uses_heap) memcpy(Result, _buffer, tarGetLength * sizeof(Char8));
5614 
5615  // append second string to the new buffer
5616  memcpy(Result + tarGetLength, o._buffer, source_length * sizeof(Char8));
5617  Result[Result_length] = 0;
5618 
5619  // finalize
5620  _buffer = Result;
5621  _uses_heap = true;
5622  }
5623  }
5624 
5625  const Char8* c_str() const
5626  {
5627  return _buffer;
5628  }
5629 
5630  size_t length() const
5631  {
5632  return strlength(_buffer);
5633  }
5634 
5635  Char8* data(XPathAllocator* alloc)
5636  {
5637  // make private heap copy
5638  if (!_uses_heap)
5639  {
5640  _buffer = duplicate_string(_buffer, alloc);
5641  _uses_heap = true;
5642  }
5643 
5644  return const_cast<Char8*>(_buffer);
5645  }
5646 
5647  bool Empty() const
5648  {
5649  return *_buffer == 0;
5650  }
5651 
5652  bool operator==(const XPathString& o) const
5653  {
5654  return strequal(_buffer, o._buffer);
5655  }
5656 
5657  bool operator!=(const XPathString& o) const
5658  {
5659  return !strequal(_buffer, o._buffer);
5660  }
5661 
5662  bool uses_heap() const
5663  {
5664  return _uses_heap;
5665  }
5666  };
5667 
5668  PUGI__FN XPathString XPathStringConst(const Char8* str)
5669  {
5670  return XPathString(str, false);
5671  }
5672 PUGI__NS_END
5673 
5674 PUGI__NS_BEGIN
5675  PUGI__FN bool starts_with(const Char8* string, const Char8* pattern)
5676  {
5677  while (*pattern && *string == *pattern)
5678  {
5679  string++;
5680  pattern++;
5681  }
5682 
5683  return *pattern == 0;
5684  }
5685 
5686  PUGI__FN const Char8* FindChar(const Char8* s, Char8 c)
5687  {
5688  return strchr(s, c);
5689  }
5690 
5691  PUGI__FN const Char8* FindSubstring(const Char8* s, const Char8* p)
5692  {
5693  return strstr(s, p);
5694  }
5695 
5696  // Converts symbol to lower case, if it is an ASCII one
5697  PUGI__FN Char8 tolower_ascii(Char8 ch)
5698  {
5699  return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<Char8>(ch | ' ') : ch;
5700  }
5701 
5702  PUGI__FN XPathString string_Value(const XPathNode& na, XPathAllocator* alloc)
5703  {
5704  if (na.GetAttribute())
5705  return XPathStringConst(na.GetAttribute().Value());
5706  else
5707  {
5708  const Node& n = na.GetNode();
5709 
5710  switch (n.Type())
5711  {
5712  case NodePcdata:
5713  case NodeCdata:
5714  case NodeComment:
5715  case NodePi:
5716  return XPathStringConst(n.Value());
5717 
5718  case NodeDocument:
5719  case NodeElement:
5720  {
5721  XPathString Result;
5722 
5723  Node cur = n.GetFirstChild();
5724 
5725  while (cur && cur != n)
5726  {
5727  if (cur.Type() == NodePcdata || cur.Type() == NodeCdata)
5728  Result.append(XPathStringConst(cur.Value()), alloc);
5729 
5730  if (cur.GetFirstChild())
5731  cur = cur.GetFirstChild();
5732  else if (cur.GetNextSibling())
5733  cur = cur.GetNextSibling();
5734  else
5735  {
5736  while (!cur.GetNextSibling() && cur != n)
5737  cur = cur.GetParent();
5738 
5739  if (cur != n) cur = cur.GetNextSibling();
5740  }
5741  }
5742 
5743  return Result;
5744  }
5745 
5746  default:
5747  return XPathString();
5748  }
5749  }
5750  }
5751 
5752  PUGI__FN unsigned int NodeHeight(Node n)
5753  {
5754  unsigned int Result = 0;
5755 
5756  while (n)
5757  {
5758  ++Result;
5759  n = n.GetParent();
5760  }
5761 
5762  return Result;
5763  }
5764 
5765  PUGI__FN bool NodeIs_before(Node ln, unsigned int lh, Node rn, unsigned int rh)
5766  {
5767  // normalize heights
5768  for (unsigned int i = rh; i < lh; i++) ln = ln.GetParent();
5769  for (unsigned int j = lh; j < rh; j++) rn = rn.GetParent();
5770 
5771  // one node is the ancestor of the other
5772  if (ln == rn) return lh < rh;
5773 
5774  // find common ancestor
5775  while (ln.GetParent() != rn.GetParent())
5776  {
5777  ln = ln.GetParent();
5778  rn = rn.GetParent();
5779  }
5780 
5781  // there is no common ancestor (the shared GetParent is null), nodes are from different documents
5782  if (!ln.GetParent()) return ln < rn;
5783 
5784  // determine sibling order
5785  for (; ln; ln = ln.GetNextSibling())
5786  if (ln == rn)
5787  return true;
5788 
5789  return false;
5790  }
5791 
5792  PUGI__FN bool NodeIs_ancestor(Node GetParent, Node node)
5793  {
5794  while (node && node != GetParent) node = node.GetParent();
5795 
5796  return GetParent && node == GetParent;
5797  }
5798 
5799  PUGI__FN const void* document_order(const XPathNode& xnode)
5800  {
5801  NodeStruct* node = xnode.GetNode().InternalObject();
5802 
5803  if (node)
5804  {
5805  if (node->Name && (node->header & MemoryPage_Name_allocated_mask) == 0) return node->Name;
5806  if (node->Value && (node->header & MemoryPage_Value_allocated_mask) == 0) return node->Value;
5807  return 0;
5808  }
5809 
5810  AttributeStruct* attr = xnode.GetAttribute().InternalObject();
5811 
5812  if (attr)
5813  {
5814  if ((attr->header & MemoryPage_Name_allocated_mask) == 0) return attr->Name;
5815  if ((attr->header & MemoryPage_Value_allocated_mask) == 0) return attr->Value;
5816  return 0;
5817  }
5818 
5819  return 0;
5820  }
5821 
5822  struct document_order_comparator
5823  {
5824  bool operator()(const XPathNode& lhs, const XPathNode& rhs) const
5825  {
5826  // optimized document order based check
5827  const void* lo = document_order(lhs);
5828  const void* ro = document_order(rhs);
5829 
5830  if (lo && ro) return lo < ro;
5831 
5832  // slow comparison
5833  Node ln = lhs.GetNode(), rn = rhs.GetNode();
5834 
5835  // compare attributes
5836  if (lhs.GetAttribute() && rhs.GetAttribute())
5837  {
5838  // shared GetParent
5839  if (lhs.GetParent() == rhs.GetParent())
5840  {
5841  // determine sibling order
5842  for (Attribute a = lhs.GetAttribute(); a; a = a.GetNextAttribute())
5843  if (a == rhs.GetAttribute())
5844  return true;
5845 
5846  return false;
5847  }
5848 
5849  // compare GetAttribute GetParents
5850  ln = lhs.GetParent();
5851  rn = rhs.GetParent();
5852  }
5853  else if (lhs.GetAttribute())
5854  {
5855  // attributes go after the GetParent element
5856  if (lhs.GetParent() == rhs.GetNode()) return false;
5857 
5858  ln = lhs.GetParent();
5859  }
5860  else if (rhs.GetAttribute())
5861  {
5862  // attributes go after the GetParent element
5863  if (rhs.GetParent() == lhs.GetNode()) return true;
5864 
5865  rn = rhs.GetParent();
5866  }
5867 
5868  if (ln == rn) return false;
5869 
5870  unsigned int lh = NodeHeight(ln);
5871  unsigned int rh = NodeHeight(rn);
5872 
5873  return NodeIs_before(ln, lh, rn, rh);
5874  }
5875  };
5876 
5877  struct duplicate_comparator
5878  {
5879  bool operator()(const XPathNode& lhs, const XPathNode& rhs) const
5880  {
5881  if (lhs.GetAttribute()) return rhs.GetAttribute() ? lhs.GetAttribute() < rhs.GetAttribute() : true;
5882  else return rhs.GetAttribute() ? false : lhs.GetNode() < rhs.GetNode();
5883  }
5884  };
5885 
5886  PUGI__FN double gen_nan()
5887  {
5888  #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
5889  union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
5890  u[0].i = 0x7fc00000;
5891  return u[0].f;
5892  #else
5893  // fallback
5894  const volatile double zero = 0.0;
5895  return zero / zero;
5896  #endif
5897  }
5898 
5899  PUGI__FN bool is_nan(double Value)
5900  {
5901  #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
5902  return !!_isnan(Value);
5903  #elif defined(fpclassify) && defined(FP_NAN)
5904  return fpclassify(Value) == FP_NAN;
5905  #else
5906  // fallback
5907  const volatile double v = Value;
5908  return v != v;
5909  #endif
5910  }
5911 
5912  PUGI__FN const Char8* convert_number_to_string_special(double Value)
5913  {
5914  #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
5915  if (_finite(Value)) return (Value == 0) ? "0" : 0;
5916  if (_isnan(Value)) return "NaN";
5917  return Value > 0 ? "Infinity" : "-Infinity";
5918  #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
5919  switch (fpclassify(Value))
5920  {
5921  case FP_NAN:
5922  return "NaN";
5923 
5924  case FP_INFINITE:
5925  return Value > 0 ? "Infinity" : "-Infinity";
5926 
5927  case FP_ZERO:
5928  return "0";
5929 
5930  default:
5931  return 0;
5932  }
5933  #else
5934  // fallback
5935  const volatile double v = Value;
5936 
5937  if (v == 0) return "0";
5938  if (v != v) return "NaN";
5939  if (v * 2 == v) return Value > 0 ? "Infinity" : "-Infinity";
5940  return 0;
5941  #endif
5942  }
5943 
5944  PUGI__FN bool convert_number_to_boolean(double Value)
5945  {
5946  return (Value != 0 && !is_nan(Value));
5947  }
5948 
5949  PUGI__FN void truncate_zeros(char* begin, char* end)
5950  {
5951  while (begin != end && end[-1] == '0') end--;
5952 
5953  *end = 0;
5954  }
5955 
5956  // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
5957 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
5958  PUGI__FN void convert_number_to_mantissa_exponent(double Value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
5959  {
5960  // get base Values
5961  int sign, exponent;
5962  _ecvt_s(buffer, buffer_size, Value, DBL_DIG + 1, &exponent, &sign);
5963 
5964  // truncate redundant zeros
5965  truncate_zeros(buffer, buffer + strlen(buffer));
5966 
5967  // fill Results
5968  *out_mantissa = buffer;
5969  *out_exponent = exponent;
5970  }
5971 #else
5972  PUGI__FN void convert_number_to_mantissa_exponent(double Value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
5973  {
5974  // get a scientific notation Value with IEEE DBL_DIG decimals
5975  sprintf(buffer, "%.*e", DBL_DIG, Value);
5976  assert(strlen(buffer) < buffer_size);
5977  (void)!buffer_size;
5978 
5979  // get the exponent (possibly negative)
5980  char* exponent_string = strchr(buffer, 'e');
5981  assert(exponent_string);
5982 
5983  int exponent = atoi(exponent_string + 1);
5984 
5985  // extract mantissa string: skip sign
5986  char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
5987  assert(mantissa[0] != '0' && mantissa[1] == '.');
5988 
5989  // divide mantissa by 10 to eliminate integer part
5990  mantissa[1] = mantissa[0];
5991  mantissa++;
5992  exponent++;
5993 
5994  // remove extra mantissa digits and zero-terminate mantissa
5995  truncate_zeros(mantissa, exponent_string);
5996 
5997  // fill Results
5998  *out_mantissa = mantissa;
5999  *out_exponent = exponent;
6000  }
6001 #endif
6002 
6003  PUGI__FN XPathString convert_number_to_string(double Value, XPathAllocator* alloc)
6004  {
6005  // try special number conversion
6006  const Char8* special = convert_number_to_string_special(Value);
6007  if (special) return XPathStringConst(special);
6008 
6009  // get mantissa + exponent form
6010  char mantissa_buffer[64];
6011 
6012  char* mantissa;
6013  int exponent;
6014  convert_number_to_mantissa_exponent(Value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
6015 
6016  // make the number!
6017  Char8 Result[512];
6018  Char8* s = Result;
6019 
6020  // sign
6021  if (Value < 0) *s++ = '-';
6022 
6023  // integer part
6024  if (exponent <= 0)
6025  {
6026  *s++ = '0';
6027  }
6028  else
6029  {
6030  while (exponent > 0)
6031  {
6032  assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
6033  *s++ = *mantissa ? *mantissa++ : '0';
6034  exponent--;
6035  }
6036  }
6037 
6038  // fractional part
6039  if (*mantissa)
6040  {
6041  // decimal point
6042  *s++ = '.';
6043 
6044  // extra zeroes from negative exponent
6045  while (exponent < 0)
6046  {
6047  *s++ = '0';
6048  exponent++;
6049  }
6050 
6051  // extra mantissa digits
6052  while (*mantissa)
6053  {
6054  assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
6055  *s++ = *mantissa++;
6056  }
6057  }
6058 
6059  // zero-terminate
6060  assert(s < Result + sizeof(Result) / sizeof(Result[0]));
6061  *s = 0;
6062 
6063  return XPathString(Result, alloc);
6064  }
6065 
6066  PUGI__FN bool check_Stringo_number_format(const Char8* string)
6067  {
6068  // parse leading whitespace
6069  while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
6070 
6071  // parse sign
6072  if (*string == '-') ++string;
6073 
6074  if (!*string) return false;
6075 
6076  // if there is no integer part, there should be a decimal part with at least one digit
6077  if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
6078 
6079  // parse integer part
6080  while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
6081 
6082  // parse decimal part
6083  if (*string == '.')
6084  {
6085  ++string;
6086 
6087  while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
6088  }
6089 
6090  // parse trailing whitespace
6091  while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
6092 
6093  return *string == 0;
6094  }
6095 
6096  PUGI__FN double convert_Stringo_number(const Char8* string)
6097  {
6098  // check string format
6099  if (!check_Stringo_number_format(string)) return gen_nan();
6100 
6101  // parse string
6102  return atof(string);
6103  }
6104 
6105  PUGI__FN bool convert_Stringo_number(const Char8* begin, const Char8* end, double* out_Result)
6106  {
6107  Char8 buffer[32];
6108 
6109  size_t length = static_cast<size_t>(end - begin);
6110  Char8* scratch = buffer;
6111 
6112  if (length >= sizeof(buffer) / sizeof(buffer[0]))
6113  {
6114  // need to make dummy on-heap copy
6115  scratch = static_cast<Char8*>(Memory::allocate((length + 1) * sizeof(Char8)));
6116  if (!scratch) return false;
6117  }
6118 
6119  // copy string to zero-terminated buffer and perform conversion
6120  memcpy(scratch, begin, length * sizeof(Char8));
6121  scratch[length] = 0;
6122 
6123  *out_Result = convert_Stringo_number(scratch);
6124 
6125  // free dummy buffer
6126  if (scratch != buffer) Memory::deallocate(scratch);
6127 
6128  return true;
6129  }
6130 
6131  PUGI__FN double round_nearest(double Value)
6132  {
6133  return floor(Value + 0.5);
6134  }
6135 
6136  PUGI__FN double round_nearest_nzero(double Value)
6137  {
6138  // same as round_nearest, but returns -0 for [-0.5, -0]
6139  // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
6140  return (Value >= -0.5 && Value <= 0) ? ceil(Value) : floor(Value + 0.5);
6141  }
6142 
6143  PUGI__FN const Char8* qualified_Name(const XPathNode& node)
6144  {
6145  return node.GetAttribute() ? node.GetAttribute().Name() : node.GetNode().Name();
6146  }
6147 
6148  PUGI__FN const Char8* local_Name(const XPathNode& node)
6149  {
6150  const Char8* Name = qualified_Name(node);
6151  const Char8* p = FindChar(Name, ':');
6152 
6153  return p ? p + 1 : Name;
6154  }
6155 
6156  struct namespace_uri_predicate
6157  {
6158  const Char8* prefix;
6159  size_t prefix_length;
6160 
6161  namespace_uri_predicate(const Char8* Name)
6162  {
6163  const Char8* pos = FindChar(Name, ':');
6164 
6165  prefix = pos ? Name : 0;
6166  prefix_length = pos ? static_cast<size_t>(pos - Name) : 0;
6167  }
6168 
6169  bool operator()(const Attribute& a) const
6170  {
6171  const Char8* Name = a.Name();
6172 
6173  if (!starts_with(Name, "xmlns")) return false;
6174 
6175  return prefix ? Name[5] == ':' && strequalrange(Name + 6, prefix, prefix_length) : Name[5] == 0;
6176  }
6177  };
6178 
6179  PUGI__FN const Char8* namespace_uri(const Node& node)
6180  {
6181  namespace_uri_predicate pred = node.Name();
6182 
6183  Node p = node;
6184 
6185  while (p)
6186  {
6187  Attribute a = p.FindAttribute(pred);
6188 
6189  if (a) return a.Value();
6190 
6191  p = p.GetParent();
6192  }
6193 
6194  return "";
6195  }
6196 
6197  PUGI__FN const Char8* namespace_uri(const Attribute& attr, const Node& GetParent)
6198  {
6199  namespace_uri_predicate pred = attr.Name();
6200 
6201  // Default namespace does not apply to attributes
6202  if (!pred.prefix) return "";
6203 
6204  Node p = GetParent;
6205 
6206  while (p)
6207  {
6208  Attribute a = p.FindAttribute(pred);
6209 
6210  if (a) return a.Value();
6211 
6212  p = p.GetParent();
6213  }
6214 
6215  return "";
6216  }
6217 
6218  PUGI__FN const Char8* namespace_uri(const XPathNode& node)
6219  {
6220  return node.GetAttribute() ? namespace_uri(node.GetAttribute(), node.GetParent()) : namespace_uri(node.GetNode());
6221  }
6222 
6223  PUGI__FN void normalize_space(Char8* buffer)
6224  {
6225  Char8* Write = buffer;
6226 
6227  for (Char8* it = buffer; *it; )
6228  {
6229  Char8 ch = *it++;
6230 
6231  if (PUGI__IS_CHARTYPE(ch, ct_space))
6232  {
6233  // replace whitespace sequence with single space
6234  while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
6235 
6236  // avoid leading spaces
6237  if (Write != buffer) *Write++ = ' ';
6238  }
6239  else *Write++ = ch;
6240  }
6241 
6242  // remove trailing space
6243  if (Write != buffer && PUGI__IS_CHARTYPE(Write[-1], ct_space)) Write--;
6244 
6245  // zero-terminate
6246  *Write = 0;
6247  }
6248 
6249  PUGI__FN void translate(Char8* buffer, const Char8* from, const Char8* to)
6250  {
6251  size_t to_length = strlength(to);
6252 
6253  Char8* Write = buffer;
6254 
6255  while (*buffer)
6256  {
6257  PUGI__DMC_VOLATILE Char8 ch = *buffer++;
6258 
6259  const Char8* pos = FindChar(from, ch);
6260 
6261  if (!pos)
6262  *Write++ = ch; // do not process
6263  else if (static_cast<size_t>(pos - from) < to_length)
6264  *Write++ = to[pos - from]; // replace
6265  }
6266 
6267  // zero-terminate
6268  *Write = 0;
6269  }
6270 
6271  struct XPathVariableBoole: XPathVariable
6272  {
6273  XPathVariableBoole(): Value(false)
6274  {
6275  }
6276 
6277  bool Value;
6278  Char8 Name[1];
6279  };
6280 
6281  struct XPathVariableNumber: XPathVariable
6282  {
6283  XPathVariableNumber(): Value(0)
6284  {
6285  }
6286 
6287  double Value;
6288  Char8 Name[1];
6289  };
6290 
6291  struct XPathVariableString: XPathVariable
6292  {
6293  XPathVariableString(): Value(0)
6294  {
6295  }
6296 
6297  ~XPathVariableString()
6298  {
6299  if (Value) Memory::deallocate(Value);
6300  }
6301 
6302  Char8* Value;
6303  Char8 Name[1];
6304  };
6305 
6306  struct XPathVariableNodeSet: XPathVariable
6307  {
6308  XPathNodeSet Value;
6309  Char8 Name[1];
6310  };
6311 
6312  static const XPathNodeSet dummy_NodeSet;
6313 
6314  PUGI__FN unsigned int hash_string(const Char8* str)
6315  {
6316  // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
6317  unsigned int Result = 0;
6318 
6319  while (*str)
6320  {
6321  Result += static_cast<unsigned int>(*str++);
6322  Result += Result << 10;
6323  Result ^= Result >> 6;
6324  }
6325 
6326  Result += Result << 3;
6327  Result ^= Result >> 11;
6328  Result += Result << 15;
6329 
6330  return Result;
6331  }
6332 
6333  template <typename T> PUGI__FN T* new_XPathVariable(const Char8* Name)
6334  {
6335  size_t length = strlength(Name);
6336  if (length == 0) return 0; // empty variable names are invalid
6337 
6338  // $$ we can't use offsetof(T, Name) because T is non-POD, so we just allocate additional length characters
6339  void* memory = Memory::allocate(sizeof(T) + length * sizeof(Char8));
6340  if (!memory) return 0;
6341 
6342  T* Result = new (memory) T();
6343 
6344  memcpy(Result->Name, Name, (length + 1) * sizeof(Char8));
6345 
6346  return Result;
6347  }
6348 
6349  PUGI__FN XPathVariable* new_XPathVariable(XPathValueType Type, const Char8* Name)
6350  {
6351  switch (Type)
6352  {
6353  case XPathTypeNodeSet:
6354  return new_XPathVariable<XPathVariableNodeSet>(Name);
6355 
6356  case XPathTypeNumber:
6357  return new_XPathVariable<XPathVariableNumber>(Name);
6358 
6359  case XPathTypeString:
6360  return new_XPathVariable<XPathVariableString>(Name);
6361 
6362  case XPathTypeBoole:
6363  return new_XPathVariable<XPathVariableBoole>(Name);
6364 
6365  default:
6366  return 0;
6367  }
6368  }
6369 
6370  template <typename T> PUGI__FN void delete_XPathVariable(T* var)
6371  {
6372  var->~T();
6373  Memory::deallocate(var);
6374  }
6375 
6376  PUGI__FN void delete_XPathVariable(XPathValueType Type, XPathVariable* var)
6377  {
6378  switch (Type)
6379  {
6380  case XPathTypeNodeSet:
6381  delete_XPathVariable(static_cast<XPathVariableNodeSet*>(var));
6382  break;
6383 
6384  case XPathTypeNumber:
6385  delete_XPathVariable(static_cast<XPathVariableNumber*>(var));
6386  break;
6387 
6388  case XPathTypeString:
6389  delete_XPathVariable(static_cast<XPathVariableString*>(var));
6390  break;
6391 
6392  case XPathTypeBoole:
6393  delete_XPathVariable(static_cast<XPathVariableBoole*>(var));
6394  break;
6395 
6396  default:
6397  assert(!"Invalid variable Type");
6398  }
6399  }
6400 
6401  PUGI__FN XPathVariable* GetVariable(XPathVariableSet* set, const Char8* begin, const Char8* end)
6402  {
6403  Char8 buffer[32];
6404 
6405  size_t length = static_cast<size_t>(end - begin);
6406  Char8* scratch = buffer;
6407 
6408  if (length >= sizeof(buffer) / sizeof(buffer[0]))
6409  {
6410  // need to make dummy on-heap copy
6411  scratch = static_cast<Char8*>(Memory::allocate((length + 1) * sizeof(Char8)));
6412  if (!scratch) return 0;
6413  }
6414 
6415  // copy string to zero-terminated buffer and perform lookup
6416  memcpy(scratch, begin, length * sizeof(Char8));
6417  scratch[length] = 0;
6418 
6419  XPathVariable* Result = set->Get(scratch);
6420 
6421  // free dummy buffer
6422  if (scratch != buffer) Memory::deallocate(scratch);
6423 
6424  return Result;
6425  }
6426 PUGI__NS_END
6427 
6428 // Internal node set class
6429 PUGI__NS_BEGIN
6430  PUGI__FN XPathNodeSet::CollectionType XPathSort(XPathNode* begin, XPathNode* end, XPathNodeSet::CollectionType Type, bool rev)
6431  {
6432  XPathNodeSet::CollectionType order = rev ? XPathNodeSet::TypeSortedReverse : XPathNodeSet::TypeSorted;
6433 
6434  if (Type == XPathNodeSet::TypeUnsorted)
6435  {
6436  sort(begin, end, document_order_comparator());
6437 
6438  Type = XPathNodeSet::TypeSorted;
6439  }
6440 
6441  if (Type != order) reverse(begin, end);
6442 
6443  return order;
6444  }
6445 
6446  PUGI__FN XPathNode XPathFirst(const XPathNode* begin, const XPathNode* end, XPathNodeSet::CollectionType Type)
6447  {
6448  if (begin == end) return XPathNode();
6449 
6450  switch (Type)
6451  {
6452  case XPathNodeSet::TypeSorted:
6453  return *begin;
6454 
6455  case XPathNodeSet::TypeSortedReverse:
6456  return *(end - 1);
6457 
6458  case XPathNodeSet::TypeUnsorted:
6459  return *min_element(begin, end, document_order_comparator());
6460 
6461  default:
6462  assert(!"Invalid node set Type");
6463  return XPathNode();
6464  }
6465  }
6466 
6467  class XPathNodeSet_raw
6468  {
6469  XPathNodeSet::CollectionType _type;
6470 
6471  XPathNode* _begin;
6472  XPathNode* _end;
6473  XPathNode* _eos;
6474 
6475  public:
6476  XPathNodeSet_raw(): _type(XPathNodeSet::TypeUnsorted), _begin(0), _end(0), _eos(0)
6477  {
6478  }
6479 
6480  XPathNode* begin() const
6481  {
6482  return _begin;
6483  }
6484 
6485  XPathNode* end() const
6486  {
6487  return _end;
6488  }
6489 
6490  bool Empty() const
6491  {
6492  return _begin == _end;
6493  }
6494 
6495  size_t size() const
6496  {
6497  return static_cast<size_t>(_end - _begin);
6498  }
6499 
6500  XPathNode first() const
6501  {
6502  return XPathFirst(_begin, _end, _type);
6503  }
6504 
6505  void push_back(const XPathNode& node, XPathAllocator* alloc)
6506  {
6507  if (_end == _eos)
6508  {
6509  size_t capacity = static_cast<size_t>(_eos - _begin);
6510 
6511  // get new capacity (1.5x rule)
6512  size_t new_capacity = capacity + capacity / 2 + 1;
6513 
6514  // reallocate the old array or allocate a new one
6515  XPathNode* data = static_cast<XPathNode*>(alloc->reallocate(_begin, capacity * sizeof(XPathNode), new_capacity * sizeof(XPathNode)));
6516  assert(data);
6517 
6518  // finalize
6519  _begin = data;
6520  _end = data + capacity;
6521  _eos = data + new_capacity;
6522  }
6523 
6524  *_end++ = node;
6525  }
6526 
6527  void append(const XPathNode* begin_, const XPathNode* end_, XPathAllocator* alloc)
6528  {
6529  size_t size_ = static_cast<size_t>(_end - _begin);
6530  size_t capacity = static_cast<size_t>(_eos - _begin);
6531  size_t count = static_cast<size_t>(end_ - begin_);
6532 
6533  if (size_ + count > capacity)
6534  {
6535  // reallocate the old array or allocate a new one
6536  XPathNode* data = static_cast<XPathNode*>(alloc->reallocate(_begin, capacity * sizeof(XPathNode), (size_ + count) * sizeof(XPathNode)));
6537  assert(data);
6538 
6539  // finalize
6540  _begin = data;
6541  _end = data + size_;
6542  _eos = data + size_ + count;
6543  }
6544 
6545  memcpy(_end, begin_, count * sizeof(XPathNode));
6546  _end += count;
6547  }
6548 
6549  void sort_do()
6550  {
6551  _type = XPathSort(_begin, _end, _type, false);
6552  }
6553 
6554  void truncate(XPathNode* pos)
6555  {
6556  assert(_begin <= pos && pos <= _end);
6557 
6558  _end = pos;
6559  }
6560 
6561  void RemoveDuplicates()
6562  {
6563  if (_type == XPathNodeSet::TypeUnsorted)
6564  sort(_begin, _end, duplicate_comparator());
6565 
6566  _end = unique(_begin, _end);
6567  }
6568 
6569  XPathNodeSet::CollectionType Type() const
6570  {
6571  return _type;
6572  }
6573 
6574  void SetType(XPathNodeSet::CollectionType Value)
6575  {
6576  _type = Value;
6577  }
6578  };
6579 PUGI__NS_END
6580 
6581 PUGI__NS_BEGIN
6582  struct XPathContext
6583  {
6584  XPathNode n;
6585  size_t position, size;
6586 
6587  XPathContext(const XPathNode& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
6588  {
6589  }
6590  };
6591 
6592  enum lexeme_t
6593  {
6594  lex_none = 0,
6595  lex_equal,
6596  lex_not_equal,
6597  lex_less,
6598  lex_greater,
6599  lex_less_or_equal,
6600  lex_greater_or_equal,
6601  lex_plus,
6602  lex_minus,
6603  lex_multiply,
6604  lex_union,
6605  lex_var_ref,
6606  lex_open_brace,
6607  lex_close_brace,
6608  lex_quoted_string,
6609  lex_number,
6610  lex_slash,
6611  lex_double_slash,
6612  lex_open_square_brace,
6613  lex_close_square_brace,
6614  lex_string,
6615  lex_comma,
6616  lex_axis_attribute,
6617  lex_dot,
6618  lex_double_dot,
6619  lex_double_colon,
6620  lex_eof
6621  };
6622 
6623  struct XPathLexerString
6624  {
6625  const Char8* begin;
6626  const Char8* end;
6627 
6628  XPathLexerString(): begin(0), end(0)
6629  {
6630  }
6631 
6632  bool operator==(const Char8* other) const
6633  {
6634  size_t length = static_cast<size_t>(end - begin);
6635 
6636  return strequalrange(other, begin, length);
6637  }
6638  };
6639 
6640  class XPathLexer
6641  {
6642  const Char8* _cur;
6643  const Char8* _cur_lexeme_pos;
6644  XPathLexerString _cur_lexeme_contents;
6645 
6646  lexeme_t _cur_lexeme;
6647 
6648  public:
6649  explicit XPathLexer(const Char8* query): _cur(query)
6650  {
6651  next();
6652  }
6653 
6654  const Char8* state() const
6655  {
6656  return _cur;
6657  }
6658 
6659  void next()
6660  {
6661  const Char8* cur = _cur;
6662 
6663  while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
6664 
6665  // Save lexeme position for error reporting
6666  _cur_lexeme_pos = cur;
6667 
6668  switch (*cur)
6669  {
6670  case 0:
6671  _cur_lexeme = lex_eof;
6672  break;
6673 
6674  case '>':
6675  if (*(cur+1) == '=')
6676  {
6677  cur += 2;
6678  _cur_lexeme = lex_greater_or_equal;
6679  }
6680  else
6681  {
6682  cur += 1;
6683  _cur_lexeme = lex_greater;
6684  }
6685  break;
6686 
6687  case '<':
6688  if (*(cur+1) == '=')
6689  {
6690  cur += 2;
6691  _cur_lexeme = lex_less_or_equal;
6692  }
6693  else
6694  {
6695  cur += 1;
6696  _cur_lexeme = lex_less;
6697  }
6698  break;
6699 
6700  case '!':
6701  if (*(cur+1) == '=')
6702  {
6703  cur += 2;
6704  _cur_lexeme = lex_not_equal;
6705  }
6706  else
6707  {
6708  _cur_lexeme = lex_none;
6709  }
6710  break;
6711 
6712  case '=':
6713  cur += 1;
6714  _cur_lexeme = lex_equal;
6715 
6716  break;
6717 
6718  case '+':
6719  cur += 1;
6720  _cur_lexeme = lex_plus;
6721 
6722  break;
6723 
6724  case '-':
6725  cur += 1;
6726  _cur_lexeme = lex_minus;
6727 
6728  break;
6729 
6730  case '*':
6731  cur += 1;
6732  _cur_lexeme = lex_multiply;
6733 
6734  break;
6735 
6736  case '|':
6737  cur += 1;
6738  _cur_lexeme = lex_union;
6739 
6740  break;
6741 
6742  case '$':
6743  cur += 1;
6744 
6745  if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
6746  {
6747  _cur_lexeme_contents.begin = cur;
6748 
6749  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6750 
6751  if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
6752  {
6753  cur++; // :
6754 
6755  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6756  }
6757 
6758  _cur_lexeme_contents.end = cur;
6759 
6760  _cur_lexeme = lex_var_ref;
6761  }
6762  else
6763  {
6764  _cur_lexeme = lex_none;
6765  }
6766 
6767  break;
6768 
6769  case '(':
6770  cur += 1;
6771  _cur_lexeme = lex_open_brace;
6772 
6773  break;
6774 
6775  case ')':
6776  cur += 1;
6777  _cur_lexeme = lex_close_brace;
6778 
6779  break;
6780 
6781  case '[':
6782  cur += 1;
6783  _cur_lexeme = lex_open_square_brace;
6784 
6785  break;
6786 
6787  case ']':
6788  cur += 1;
6789  _cur_lexeme = lex_close_square_brace;
6790 
6791  break;
6792 
6793  case ',':
6794  cur += 1;
6795  _cur_lexeme = lex_comma;
6796 
6797  break;
6798 
6799  case '/':
6800  if (*(cur+1) == '/')
6801  {
6802  cur += 2;
6803  _cur_lexeme = lex_double_slash;
6804  }
6805  else
6806  {
6807  cur += 1;
6808  _cur_lexeme = lex_slash;
6809  }
6810  break;
6811 
6812  case '.':
6813  if (*(cur+1) == '.')
6814  {
6815  cur += 2;
6816  _cur_lexeme = lex_double_dot;
6817  }
6818  else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
6819  {
6820  _cur_lexeme_contents.begin = cur; // .
6821 
6822  ++cur;
6823 
6824  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6825 
6826  _cur_lexeme_contents.end = cur;
6827 
6828  _cur_lexeme = lex_number;
6829  }
6830  else
6831  {
6832  cur += 1;
6833  _cur_lexeme = lex_dot;
6834  }
6835  break;
6836 
6837  case '@':
6838  cur += 1;
6839  _cur_lexeme = lex_axis_attribute;
6840 
6841  break;
6842 
6843  case '"':
6844  case '\'':
6845  {
6846  Char8 terminator = *cur;
6847 
6848  ++cur;
6849 
6850  _cur_lexeme_contents.begin = cur;
6851  while (*cur && *cur != terminator) cur++;
6852  _cur_lexeme_contents.end = cur;
6853 
6854  if (!*cur)
6855  _cur_lexeme = lex_none;
6856  else
6857  {
6858  cur += 1;
6859  _cur_lexeme = lex_quoted_string;
6860  }
6861 
6862  break;
6863  }
6864 
6865  case ':':
6866  if (*(cur+1) == ':')
6867  {
6868  cur += 2;
6869  _cur_lexeme = lex_double_colon;
6870  }
6871  else
6872  {
6873  _cur_lexeme = lex_none;
6874  }
6875  break;
6876 
6877  default:
6878  if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
6879  {
6880  _cur_lexeme_contents.begin = cur;
6881 
6882  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6883 
6884  if (*cur == '.')
6885  {
6886  cur++;
6887 
6888  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6889  }
6890 
6891  _cur_lexeme_contents.end = cur;
6892 
6893  _cur_lexeme = lex_number;
6894  }
6895  else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
6896  {
6897  _cur_lexeme_contents.begin = cur;
6898 
6899  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6900 
6901  if (cur[0] == ':')
6902  {
6903  if (cur[1] == '*') // namespace test ncName:*
6904  {
6905  cur += 2; // :*
6906  }
6907  else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
6908  {
6909  cur++; // :
6910 
6911  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6912  }
6913  }
6914 
6915  _cur_lexeme_contents.end = cur;
6916 
6917  _cur_lexeme = lex_string;
6918  }
6919  else
6920  {
6921  _cur_lexeme = lex_none;
6922  }
6923  }
6924 
6925  _cur = cur;
6926  }
6927 
6928  lexeme_t current() const
6929  {
6930  return _cur_lexeme;
6931  }
6932 
6933  const Char8* current_pos() const
6934  {
6935  return _cur_lexeme_pos;
6936  }
6937 
6938  const XPathLexerString& contents() const
6939  {
6940  assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
6941 
6942  return _cur_lexeme_contents;
6943  }
6944  };
6945 
6946  enum ast_type_t
6947  {
6948  ast_op_or, // left or right
6949  ast_op_and, // left and right
6950  ast_op_equal, // left = right
6951  ast_op_not_equal, // left != right
6952  ast_op_less, // left < right
6953  ast_op_greater, // left > right
6954  ast_op_less_or_equal, // left <= right
6955  ast_op_greater_or_equal, // left >= right
6956  ast_op_add, // left + right
6957  ast_op_subtract, // left - right
6958  ast_op_multiply, // left * right
6959  ast_op_divide, // left / right
6960  ast_op_mod, // left % right
6961  ast_op_negate, // left - right
6962  ast_op_union, // left | right
6963  ast_predicate, // apply predicate to set; next points to next predicate
6964  ast_filter, // select * from left where right
6965  ast_filter_posinv, // select * from left where right; proximity position invariant
6966  ast_string_constant, // string constant
6967  ast_number_constant, // number constant
6968  ast_variable, // variable
6969  ast_func_last, // last()
6970  ast_func_position, // position()
6971  ast_func_count, // count(left)
6972  ast_func_id, // id(left)
6973  ast_func_local_Name_0, // local-Name()
6974  ast_func_local_Name_1, // local-Name(left)
6975  ast_func_namespace_uri_0, // namespace-uri()
6976  ast_func_namespace_uri_1, // namespace-uri(left)
6977  ast_func_Name_0, // Name()
6978  ast_func_Name_1, // Name(left)
6979  ast_func_string_0, // string()
6980  ast_func_string_1, // string(left)
6981  ast_func_concat, // concat(left, right, siblings)
6982  ast_func_starts_with, // starts_with(left, right)
6983  ast_func_contains, // contains(left, right)
6984  ast_func_substring_before, // substring-before(left, right)
6985  ast_func_substring_after, // substring-after(left, right)
6986  ast_func_substring_2, // substring(left, right)
6987  ast_func_substring_3, // substring(left, right, third)
6988  ast_func_string_length_0, // string-length()
6989  ast_func_string_length_1, // string-length(left)
6990  ast_func_normalize_space_0, // normalize-space()
6991  ast_func_normalize_space_1, // normalize-space(left)
6992  ast_func_translate, // translate(left, right, third)
6993  ast_func_boolean, // boolean(left)
6994  ast_func_not, // not(left)
6995  ast_func_true, // true()
6996  ast_func_false, // false()
6997  ast_func_lang, // lang(left)
6998  ast_func_number_0, // number()
6999  ast_func_number_1, // number(left)
7000  ast_func_sum, // sum(left)
7001  ast_func_floor, // floor(left)
7002  ast_func_ceiling, // ceiling(left)
7003  ast_func_round, // round(left)
7004  ast_step, // process set left with step
7005  ast_step_GetRoot // select GetRoot node
7006  };
7007 
7008  enum axis_t
7009  {
7010  axis_ancestor,
7011  axis_ancestor_or_self,
7012  axis_attribute,
7013  axis_GetChild,
7014  axis_descendant,
7015  axis_descendant_or_self,
7016  axis_following,
7017  axis_following_sibling,
7018  axis_namespace,
7019  axis_GetParent,
7020  axis_preceding,
7021  axis_preceding_sibling,
7022  axis_self
7023  };
7024 
7025  enum nodetest_t
7026  {
7027  nodetest_none,
7028  nodetest_Name,
7029  nodetest_type_node,
7030  nodetest_type_comment,
7031  nodetest_type_pi,
7032  nodetest_type_text,
7033  nodetest_pi,
7034  nodetest_all,
7035  nodetest_all_in_namespace
7036  };
7037 
7038  template <axis_t N> struct axis_to_type
7039  {
7040  static const axis_t axis;
7041  };
7042 
7043  template <axis_t N> const axis_t axis_to_type<N>::axis = N;
7044 
7045  class XPathAstNode
7046  {
7047  private:
7048  // node type
7049  char _type;
7050  char _retType;
7051 
7052  // for ast_step / ast_predicate
7053  char _axis;
7054  char _test;
7055 
7056  // tree node structure
7057  XPathAstNode* _left;
7058  XPathAstNode* _right;
7059  XPathAstNode* _next;
7060 
7061  union
7062  {
7063  // Value for ast_string_constant
7064  const Char8* string;
7065  // Value for ast_number_constant
7066  double number;
7067  // variable for ast_variable
7068  XPathVariable* variable;
7069  // node test for ast_step (node Name/namespace/node Type/pi target)
7070  const Char8* nodetest;
7071  } _data;
7072 
7073  XPathAstNode(const XPathAstNode&);
7074  XPathAstNode& operator=(const XPathAstNode&);
7075 
7076  template <class Comp> static bool compare_eq(XPathAstNode* lhs, XPathAstNode* rhs, const XPathContext& c, const XPathStack& stack, const Comp& comp)
7077  {
7078  XPathValueType lt = lhs->retType(), rt = rhs->retType();
7079 
7080  if (lt != XPathTypeNodeSet && rt != XPathTypeNodeSet)
7081  {
7082  if (lt == XPathTypeBoole || rt == XPathTypeBoole)
7083  return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
7084  else if (lt == XPathTypeNumber || rt == XPathTypeNumber)
7085  return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
7086  else if (lt == XPathTypeString || rt == XPathTypeString)
7087  {
7088  XPathAllocatorCapture cr(stack.Result);
7089 
7090  XPathString ls = lhs->eval_string(c, stack);
7091  XPathString rs = rhs->eval_string(c, stack);
7092 
7093  return comp(ls, rs);
7094  }
7095  }
7096  else if (lt == XPathTypeNodeSet && rt == XPathTypeNodeSet)
7097  {
7098  XPathAllocatorCapture cr(stack.Result);
7099 
7100  XPathNodeSet_raw ls = lhs->eval_NodeSet(c, stack);
7101  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7102 
7103  for (const XPathNode* li = ls.begin(); li != ls.end(); ++li)
7104  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7105  {
7106  XPathAllocatorCapture cri(stack.Result);
7107 
7108  if (comp(string_Value(*li, stack.Result), string_Value(*ri, stack.Result)))
7109  return true;
7110  }
7111 
7112  return false;
7113  }
7114  else
7115  {
7116  if (lt == XPathTypeNodeSet)
7117  {
7118  swap(lhs, rhs);
7119  swap(lt, rt);
7120  }
7121 
7122  if (lt == XPathTypeBoole)
7123  return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
7124  else if (lt == XPathTypeNumber)
7125  {
7126  XPathAllocatorCapture cr(stack.Result);
7127 
7128  double l = lhs->eval_number(c, stack);
7129  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7130 
7131  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7132  {
7133  XPathAllocatorCapture cri(stack.Result);
7134 
7135  if (comp(l, convert_Stringo_number(string_Value(*ri, stack.Result).c_str())))
7136  return true;
7137  }
7138 
7139  return false;
7140  }
7141  else if (lt == XPathTypeString)
7142  {
7143  XPathAllocatorCapture cr(stack.Result);
7144 
7145  XPathString l = lhs->eval_string(c, stack);
7146  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7147 
7148  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7149  {
7150  XPathAllocatorCapture cri(stack.Result);
7151 
7152  if (comp(l, string_Value(*ri, stack.Result)))
7153  return true;
7154  }
7155 
7156  return false;
7157  }
7158  }
7159 
7160  assert(!"Wrong Types");
7161  return false;
7162  }
7163 
7164  template <class Comp> static bool compare_rel(XPathAstNode* lhs, XPathAstNode* rhs, const XPathContext& c, const XPathStack& stack, const Comp& comp)
7165  {
7166  XPathValueType lt = lhs->retType(), rt = rhs->retType();
7167 
7168  if (lt != XPathTypeNodeSet && rt != XPathTypeNodeSet)
7169  return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
7170  else if (lt == XPathTypeNodeSet && rt == XPathTypeNodeSet)
7171  {
7172  XPathAllocatorCapture cr(stack.Result);
7173 
7174  XPathNodeSet_raw ls = lhs->eval_NodeSet(c, stack);
7175  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7176 
7177  for (const XPathNode* li = ls.begin(); li != ls.end(); ++li)
7178  {
7179  XPathAllocatorCapture cri(stack.Result);
7180 
7181  double l = convert_Stringo_number(string_Value(*li, stack.Result).c_str());
7182 
7183  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7184  {
7185  XPathAllocatorCapture crii(stack.Result);
7186 
7187  if (comp(l, convert_Stringo_number(string_Value(*ri, stack.Result).c_str())))
7188  return true;
7189  }
7190  }
7191 
7192  return false;
7193  }
7194  else if (lt != XPathTypeNodeSet && rt == XPathTypeNodeSet)
7195  {
7196  XPathAllocatorCapture cr(stack.Result);
7197 
7198  double l = lhs->eval_number(c, stack);
7199  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7200 
7201  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7202  {
7203  XPathAllocatorCapture cri(stack.Result);
7204 
7205  if (comp(l, convert_Stringo_number(string_Value(*ri, stack.Result).c_str())))
7206  return true;
7207  }
7208 
7209  return false;
7210  }
7211  else if (lt == XPathTypeNodeSet && rt != XPathTypeNodeSet)
7212  {
7213  XPathAllocatorCapture cr(stack.Result);
7214 
7215  XPathNodeSet_raw ls = lhs->eval_NodeSet(c, stack);
7216  double r = rhs->eval_number(c, stack);
7217 
7218  for (const XPathNode* li = ls.begin(); li != ls.end(); ++li)
7219  {
7220  XPathAllocatorCapture cri(stack.Result);
7221 
7222  if (comp(convert_Stringo_number(string_Value(*li, stack.Result).c_str()), r))
7223  return true;
7224  }
7225 
7226  return false;
7227  }
7228  else
7229  {
7230  assert(!"Wrong Types");
7231  return false;
7232  }
7233  }
7234 
7235  void apply_predicate(XPathNodeSet_raw& ns, size_t first, XPathAstNode* expr, const XPathStack& stack)
7236  {
7237  assert(ns.size() >= first);
7238 
7239  size_t i = 1;
7240  size_t size = ns.size() - first;
7241 
7242  XPathNode* last = ns.begin() + first;
7243 
7244  // RemoveIf... or well, sort of
7245  for (XPathNode* it = last; it != ns.end(); ++it, ++i)
7246  {
7247  XPathContext c(*it, i, size);
7248 
7249  if (expr->retType() == XPathTypeNumber)
7250  {
7251  if (expr->eval_number(c, stack) == i)
7252  *last++ = *it;
7253  }
7254  else if (expr->eval_boolean(c, stack))
7255  *last++ = *it;
7256  }
7257 
7258  ns.truncate(last);
7259  }
7260 
7261  void apply_predicates(XPathNodeSet_raw& ns, size_t first, const XPathStack& stack)
7262  {
7263  if (ns.size() == first) return;
7264 
7265  for (XPathAstNode* pred = _right; pred; pred = pred->_next)
7266  {
7267  apply_predicate(ns, first, pred->_left, stack);
7268  }
7269  }
7270 
7271  void step_push(XPathNodeSet_raw& ns, const Attribute& a, const Node& GetParent, XPathAllocator* alloc)
7272  {
7273  if (!a) return;
7274 
7275  const Char8* Name = a.Name();
7276 
7277  // There are no GetAttribute nodes corresponding to attributes that declare namespaces
7278  // That is, "xmlns:..." or "xmlns"
7279  if (starts_with(Name, "xmlns") && (Name[5] == 0 || Name[5] == ':')) return;
7280 
7281  switch (_test)
7282  {
7283  case nodetest_Name:
7284  if (strequal(Name, _data.nodetest)) ns.push_back(XPathNode(a, GetParent), alloc);
7285  break;
7286 
7287  case nodetest_type_node:
7288  case nodetest_all:
7289  ns.push_back(XPathNode(a, GetParent), alloc);
7290  break;
7291 
7292  case nodetest_all_in_namespace:
7293  if (starts_with(Name, _data.nodetest))
7294  ns.push_back(XPathNode(a, GetParent), alloc);
7295  break;
7296 
7297  default:
7298  ;
7299  }
7300  }
7301 
7302  void step_push(XPathNodeSet_raw& ns, const Node& n, XPathAllocator* alloc)
7303  {
7304  if (!n) return;
7305 
7306  switch (_test)
7307  {
7308  case nodetest_Name:
7309  if (n.Type() == NodeElement && strequal(n.Name(), _data.nodetest)) ns.push_back(n, alloc);
7310  break;
7311 
7312  case nodetest_type_node:
7313  ns.push_back(n, alloc);
7314  break;
7315 
7316  case nodetest_type_comment:
7317  if (n.Type() == NodeComment)
7318  ns.push_back(n, alloc);
7319  break;
7320 
7321  case nodetest_type_text:
7322  if (n.Type() == NodePcdata || n.Type() == NodeCdata)
7323  ns.push_back(n, alloc);
7324  break;
7325 
7326  case nodetest_type_pi:
7327  if (n.Type() == NodePi)
7328  ns.push_back(n, alloc);
7329  break;
7330 
7331  case nodetest_pi:
7332  if (n.Type() == NodePi && strequal(n.Name(), _data.nodetest))
7333  ns.push_back(n, alloc);
7334  break;
7335 
7336  case nodetest_all:
7337  if (n.Type() == NodeElement)
7338  ns.push_back(n, alloc);
7339  break;
7340 
7341  case nodetest_all_in_namespace:
7342  if (n.Type() == NodeElement && starts_with(n.Name(), _data.nodetest))
7343  ns.push_back(n, alloc);
7344  break;
7345 
7346  default:
7347  assert(!"Unknown axis");
7348  }
7349  }
7350 
7351  template <class T> void step_fill(XPathNodeSet_raw& ns, const Node& n, XPathAllocator* alloc, T)
7352  {
7353  const axis_t axis = T::axis;
7354 
7355  switch (axis)
7356  {
7357  case axis_attribute:
7358  {
7359  for (Attribute a = n.GetFirstAttribute(); a; a = a.GetNextAttribute())
7360  step_push(ns, a, n, alloc);
7361 
7362  break;
7363  }
7364 
7365  case axis_GetChild:
7366  {
7367  for (Node c = n.GetFirstChild(); c; c = c.GetNextSibling())
7368  step_push(ns, c, alloc);
7369 
7370  break;
7371  }
7372 
7373  case axis_descendant:
7374  case axis_descendant_or_self:
7375  {
7376  if (axis == axis_descendant_or_self)
7377  step_push(ns, n, alloc);
7378 
7379  Node cur = n.GetFirstChild();
7380 
7381  while (cur && cur != n)
7382  {
7383  step_push(ns, cur, alloc);
7384 
7385  if (cur.GetFirstChild())
7386  cur = cur.GetFirstChild();
7387  else if (cur.GetNextSibling())
7388  cur = cur.GetNextSibling();
7389  else
7390  {
7391  while (!cur.GetNextSibling() && cur != n)
7392  cur = cur.GetParent();
7393 
7394  if (cur != n) cur = cur.GetNextSibling();
7395  }
7396  }
7397 
7398  break;
7399  }
7400 
7401  case axis_following_sibling:
7402  {
7403  for (Node c = n.GetNextSibling(); c; c = c.GetNextSibling())
7404  step_push(ns, c, alloc);
7405 
7406  break;
7407  }
7408 
7409  case axis_preceding_sibling:
7410  {
7411  for (Node c = n.GetPreviousSibling(); c; c = c.GetPreviousSibling())
7412  step_push(ns, c, alloc);
7413 
7414  break;
7415  }
7416 
7417  case axis_following:
7418  {
7419  Node cur = n;
7420 
7421  // exit from this node so that we don't include descendants
7422  while (cur && !cur.GetNextSibling()) cur = cur.GetParent();
7423  cur = cur.GetNextSibling();
7424 
7425  for (;;)
7426  {
7427  step_push(ns, cur, alloc);
7428 
7429  if (cur.GetFirstChild())
7430  cur = cur.GetFirstChild();
7431  else if (cur.GetNextSibling())
7432  cur = cur.GetNextSibling();
7433  else
7434  {
7435  while (cur && !cur.GetNextSibling()) cur = cur.GetParent();
7436  cur = cur.GetNextSibling();
7437 
7438  if (!cur) break;
7439  }
7440  }
7441 
7442  break;
7443  }
7444 
7445  case axis_preceding:
7446  {
7447  Node cur = n;
7448 
7449  while (cur && !cur.GetPreviousSibling()) cur = cur.GetParent();
7450  cur = cur.GetPreviousSibling();
7451 
7452  for (;;)
7453  {
7454  if (cur.GetLastChild())
7455  cur = cur.GetLastChild();
7456  else
7457  {
7458  // leaf node, can't be ancestor
7459  step_push(ns, cur, alloc);
7460 
7461  if (cur.GetPreviousSibling())
7462  cur = cur.GetPreviousSibling();
7463  else
7464  {
7465  do
7466  {
7467  cur = cur.GetParent();
7468  if (!cur) break;
7469 
7470  if (!NodeIs_ancestor(cur, n)) step_push(ns, cur, alloc);
7471  }
7472  while (!cur.GetPreviousSibling());
7473 
7474  cur = cur.GetPreviousSibling();
7475 
7476  if (!cur) break;
7477  }
7478  }
7479  }
7480 
7481  break;
7482  }
7483 
7484  case axis_ancestor:
7485  case axis_ancestor_or_self:
7486  {
7487  if (axis == axis_ancestor_or_self)
7488  step_push(ns, n, alloc);
7489 
7490  Node cur = n.GetParent();
7491 
7492  while (cur)
7493  {
7494  step_push(ns, cur, alloc);
7495 
7496  cur = cur.GetParent();
7497  }
7498 
7499  break;
7500  }
7501 
7502  case axis_self:
7503  {
7504  step_push(ns, n, alloc);
7505 
7506  break;
7507  }
7508 
7509  case axis_GetParent:
7510  {
7511  if (n.GetParent()) step_push(ns, n.GetParent(), alloc);
7512 
7513  break;
7514  }
7515 
7516  default:
7517  assert(!"Unimplemented axis");
7518  }
7519  }
7520 
7521  template <class T> void step_fill(XPathNodeSet_raw& ns, const Attribute& a, const Node& p, XPathAllocator* alloc, T v)
7522  {
7523  const axis_t axis = T::axis;
7524 
7525  switch (axis)
7526  {
7527  case axis_ancestor:
7528  case axis_ancestor_or_self:
7529  {
7530  if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node Type test
7531  step_push(ns, a, p, alloc);
7532 
7533  Node cur = p;
7534 
7535  while (cur)
7536  {
7537  step_push(ns, cur, alloc);
7538 
7539  cur = cur.GetParent();
7540  }
7541 
7542  break;
7543  }
7544 
7545  case axis_descendant_or_self:
7546  case axis_self:
7547  {
7548  if (_test == nodetest_type_node) // reject attributes based on principal node Type test
7549  step_push(ns, a, p, alloc);
7550 
7551  break;
7552  }
7553 
7554  case axis_following:
7555  {
7556  Node cur = p;
7557 
7558  for (;;)
7559  {
7560  if (cur.GetFirstChild())
7561  cur = cur.GetFirstChild();
7562  else if (cur.GetNextSibling())
7563  cur = cur.GetNextSibling();
7564  else
7565  {
7566  while (cur && !cur.GetNextSibling()) cur = cur.GetParent();
7567  cur = cur.GetNextSibling();
7568 
7569  if (!cur) break;
7570  }
7571 
7572  step_push(ns, cur, alloc);
7573  }
7574 
7575  break;
7576  }
7577 
7578  case axis_GetParent:
7579  {
7580  step_push(ns, p, alloc);
7581 
7582  break;
7583  }
7584 
7585  case axis_preceding:
7586  {
7587  // preceding:: axis does not include GetAttribute nodes and GetAttribute ancestors (they are the same as GetParent's ancestors), so we can reuse node preceding
7588  step_fill(ns, p, alloc, v);
7589  break;
7590  }
7591 
7592  default:
7593  assert(!"Unimplemented axis");
7594  }
7595  }
7596 
7597  template <class T> XPathNodeSet_raw step_do(const XPathContext& c, const XPathStack& stack, T v)
7598  {
7599  const axis_t axis = T::axis;
7600  bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_GetParent || axis == axis_preceding || axis == axis_self);
7601 
7602  XPathNodeSet_raw ns;
7603  ns.SetType((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? XPathNodeSet::TypeSortedReverse : XPathNodeSet::TypeSorted);
7604 
7605  if (_left)
7606  {
7607  XPathNodeSet_raw s = _left->eval_NodeSet(c, stack);
7608 
7609  // self axis preserves the original order
7610  if (axis == axis_self) ns.SetType(s.Type());
7611 
7612  for (const XPathNode* it = s.begin(); it != s.end(); ++it)
7613  {
7614  size_t size = ns.size();
7615 
7616  // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
7617  if (axis != axis_self && size != 0) ns.SetType(XPathNodeSet::TypeUnsorted);
7618 
7619  if (it->GetNode())
7620  step_fill(ns, it->GetNode(), stack.Result, v);
7621  else if (attributes)
7622  step_fill(ns, it->GetAttribute(), it->GetParent(), stack.Result, v);
7623 
7624  apply_predicates(ns, size, stack);
7625  }
7626  }
7627  else
7628  {
7629  if (c.n.GetNode())
7630  step_fill(ns, c.n.GetNode(), stack.Result, v);
7631  else if (attributes)
7632  step_fill(ns, c.n.GetAttribute(), c.n.GetParent(), stack.Result, v);
7633 
7634  apply_predicates(ns, 0, stack);
7635  }
7636 
7637  // GetChild, GetAttribute and self axes always generate unique set of nodes
7638  // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
7639  if (axis != axis_GetChild && axis != axis_attribute && axis != axis_self && ns.Type() == XPathNodeSet::TypeUnsorted)
7640  ns.RemoveDuplicates();
7641 
7642  return ns;
7643  }
7644 
7645  public:
7646  XPathAstNode(ast_type_t Type, XPathValueType retType_, const Char8* Value):
7647  _type(static_cast<char>(Type)), _retType(static_cast<char>(retType_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
7648  {
7649  assert(Type == ast_string_constant);
7650  _data.string = Value;
7651  }
7652 
7653  XPathAstNode(ast_type_t Type, XPathValueType retType_, double Value):
7654  _type(static_cast<char>(Type)), _retType(static_cast<char>(retType_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
7655  {
7656  assert(Type == ast_number_constant);
7657  _data.number = Value;
7658  }
7659 
7660  XPathAstNode(ast_type_t Type, XPathValueType retType_, XPathVariable* Value):
7661  _type(static_cast<char>(Type)), _retType(static_cast<char>(retType_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
7662  {
7663  assert(Type == ast_variable);
7664  _data.variable = Value;
7665  }
7666 
7667  XPathAstNode(ast_type_t Type, XPathValueType retType_, XPathAstNode* left = 0, XPathAstNode* right = 0):
7668  _type(static_cast<char>(Type)), _retType(static_cast<char>(retType_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
7669  {
7670  }
7671 
7672  XPathAstNode(ast_type_t Type, XPathAstNode* left, axis_t axis, nodetest_t test, const Char8* contents):
7673  _type(static_cast<char>(Type)), _retType(XPathTypeNodeSet), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
7674  {
7675  _data.nodetest = contents;
7676  }
7677 
7678  void SetNext(XPathAstNode* Value)
7679  {
7680  _next = Value;
7681  }
7682 
7683  void SetRight(XPathAstNode* Value)
7684  {
7685  _right = Value;
7686  }
7687 
7688  bool eval_boolean(const XPathContext& c, const XPathStack& stack)
7689  {
7690  switch (_type)
7691  {
7692  case ast_op_or:
7693  return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
7694 
7695  case ast_op_and:
7696  return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
7697 
7698  case ast_op_equal:
7699  return compare_eq(_left, _right, c, stack, equal_to());
7700 
7701  case ast_op_not_equal:
7702  return compare_eq(_left, _right, c, stack, not_equal_to());
7703 
7704  case ast_op_less:
7705  return compare_rel(_left, _right, c, stack, less());
7706 
7707  case ast_op_greater:
7708  return compare_rel(_right, _left, c, stack, less());
7709 
7710  case ast_op_less_or_equal:
7711  return compare_rel(_left, _right, c, stack, less_equal());
7712 
7713  case ast_op_greater_or_equal:
7714  return compare_rel(_right, _left, c, stack, less_equal());
7715 
7716  case ast_func_starts_with:
7717  {
7718  XPathAllocatorCapture cr(stack.Result);
7719 
7720  XPathString lr = _left->eval_string(c, stack);
7721  XPathString rr = _right->eval_string(c, stack);
7722 
7723  return starts_with(lr.c_str(), rr.c_str());
7724  }
7725 
7726  case ast_func_contains:
7727  {
7728  XPathAllocatorCapture cr(stack.Result);
7729 
7730  XPathString lr = _left->eval_string(c, stack);
7731  XPathString rr = _right->eval_string(c, stack);
7732 
7733  return FindSubstring(lr.c_str(), rr.c_str()) != 0;
7734  }
7735 
7736  case ast_func_boolean:
7737  return _left->eval_boolean(c, stack);
7738 
7739  case ast_func_not:
7740  return !_left->eval_boolean(c, stack);
7741 
7742  case ast_func_true:
7743  return true;
7744 
7745  case ast_func_false:
7746  return false;
7747 
7748  case ast_func_lang:
7749  {
7750  if (c.n.GetAttribute()) return false;
7751 
7752  XPathAllocatorCapture cr(stack.Result);
7753 
7754  XPathString lang = _left->eval_string(c, stack);
7755 
7756  for (Node n = c.n.GetNode(); n; n = n.GetParent())
7757  {
7758  Attribute a = n.GetAttribute("xml:lang");
7759 
7760  if (a)
7761  {
7762  const Char8* Value = a.Value();
7763 
7764  // strnicmp / strncasecmp is not portable
7765  for (const Char8* lit = lang.c_str(); *lit; ++lit)
7766  {
7767  if (tolower_ascii(*lit) != tolower_ascii(*Value)) return false;
7768  ++Value;
7769  }
7770 
7771  return *Value == 0 || *Value == '-';
7772  }
7773  }
7774 
7775  return false;
7776  }
7777 
7778  case ast_variable:
7779  {
7780  assert(_retType == _data.variable->Type());
7781 
7782  if (_retType == XPathTypeBoole)
7783  return _data.variable->GetBoole();
7784 
7785  // fallthrough to Type conversion
7786  }
7787 
7788  default:
7789  {
7790  switch (_retType)
7791  {
7792  case XPathTypeNumber:
7793  return convert_number_to_boolean(eval_number(c, stack));
7794 
7795  case XPathTypeString:
7796  {
7797  XPathAllocatorCapture cr(stack.Result);
7798 
7799  return !eval_string(c, stack).Empty();
7800  }
7801 
7802  case XPathTypeNodeSet:
7803  {
7804  XPathAllocatorCapture cr(stack.Result);
7805 
7806  return !eval_NodeSet(c, stack).Empty();
7807  }
7808 
7809  default:
7810  assert(!"Wrong expression for return Type boolean");
7811  return false;
7812  }
7813  }
7814  }
7815  }
7816 
7817  double eval_number(const XPathContext& c, const XPathStack& stack)
7818  {
7819  switch (_type)
7820  {
7821  case ast_op_add:
7822  return _left->eval_number(c, stack) + _right->eval_number(c, stack);
7823 
7824  case ast_op_subtract:
7825  return _left->eval_number(c, stack) - _right->eval_number(c, stack);
7826 
7827  case ast_op_multiply:
7828  return _left->eval_number(c, stack) * _right->eval_number(c, stack);
7829 
7830  case ast_op_divide:
7831  return _left->eval_number(c, stack) / _right->eval_number(c, stack);
7832 
7833  case ast_op_mod:
7834  return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
7835 
7836  case ast_op_negate:
7837  return -_left->eval_number(c, stack);
7838 
7839  case ast_number_constant:
7840  return _data.number;
7841 
7842  case ast_func_last:
7843  return static_cast<double>(c.size);
7844 
7845  case ast_func_position:
7846  return static_cast<double>(c.position);
7847 
7848  case ast_func_count:
7849  {
7850  XPathAllocatorCapture cr(stack.Result);
7851 
7852  return static_cast<double>(_left->eval_NodeSet(c, stack).size());
7853  }
7854 
7855  case ast_func_string_length_0:
7856  {
7857  XPathAllocatorCapture cr(stack.Result);
7858 
7859  return static_cast<double>(string_Value(c.n, stack.Result).length());
7860  }
7861 
7862  case ast_func_string_length_1:
7863  {
7864  XPathAllocatorCapture cr(stack.Result);
7865 
7866  return static_cast<double>(_left->eval_string(c, stack).length());
7867  }
7868 
7869  case ast_func_number_0:
7870  {
7871  XPathAllocatorCapture cr(stack.Result);
7872 
7873  return convert_Stringo_number(string_Value(c.n, stack.Result).c_str());
7874  }
7875 
7876  case ast_func_number_1:
7877  return _left->eval_number(c, stack);
7878 
7879  case ast_func_sum:
7880  {
7881  XPathAllocatorCapture cr(stack.Result);
7882 
7883  double r = 0;
7884 
7885  XPathNodeSet_raw ns = _left->eval_NodeSet(c, stack);
7886 
7887  for (const XPathNode* it = ns.begin(); it != ns.end(); ++it)
7888  {
7889  XPathAllocatorCapture cri(stack.Result);
7890 
7891  r += convert_Stringo_number(string_Value(*it, stack.Result).c_str());
7892  }
7893 
7894  return r;
7895  }
7896 
7897  case ast_func_floor:
7898  {
7899  double r = _left->eval_number(c, stack);
7900 
7901  return r == r ? floor(r) : r;
7902  }
7903 
7904  case ast_func_ceiling:
7905  {
7906  double r = _left->eval_number(c, stack);
7907 
7908  return r == r ? ceil(r) : r;
7909  }
7910 
7911  case ast_func_round:
7912  return round_nearest_nzero(_left->eval_number(c, stack));
7913 
7914  case ast_variable:
7915  {
7916  assert(_retType == _data.variable->Type());
7917 
7918  if (_retType == XPathTypeNumber)
7919  return _data.variable->GetNumber();
7920 
7921  // fallthrough to Type conversion
7922  }
7923 
7924  default:
7925  {
7926  switch (_retType)
7927  {
7928  case XPathTypeBoole:
7929  return eval_boolean(c, stack) ? 1 : 0;
7930 
7931  case XPathTypeString:
7932  {
7933  XPathAllocatorCapture cr(stack.Result);
7934 
7935  return convert_Stringo_number(eval_string(c, stack).c_str());
7936  }
7937 
7938  case XPathTypeNodeSet:
7939  {
7940  XPathAllocatorCapture cr(stack.Result);
7941 
7942  return convert_Stringo_number(eval_string(c, stack).c_str());
7943  }
7944 
7945  default:
7946  assert(!"Wrong expression for return Type number");
7947  return 0;
7948  }
7949 
7950  }
7951  }
7952  }
7953 
7954  XPathString eval_string_concat(const XPathContext& c, const XPathStack& stack)
7955  {
7956  assert(_type == ast_func_concat);
7957 
7958  XPathAllocatorCapture ct(stack.temp);
7959 
7960  // count the string number
7961  size_t count = 1;
7962  for (XPathAstNode* nc = _right; nc; nc = nc->_next) count++;
7963 
7964  // gather all strings
7965  XPathString static_buffer[4];
7966  XPathString* buffer = static_buffer;
7967 
7968  // allocate on-heap for large concats
7969  if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
7970  {
7971  buffer = static_cast<XPathString*>(stack.temp->allocate(count * sizeof(XPathString)));
7972  assert(buffer);
7973  }
7974 
7975  // evaluate all strings to temporary stack
7976  XPathStack swapped_stack = {stack.temp, stack.Result};
7977 
7978  buffer[0] = _left->eval_string(c, swapped_stack);
7979 
7980  size_t pos = 1;
7981  for (XPathAstNode* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
7982  assert(pos == count);
7983 
7984  // get total length
7985  size_t length = 0;
7986  for (size_t i = 0; i < count; ++i) length += buffer[i].length();
7987 
7988  // create final string
7989  Char8* Result = static_cast<Char8*>(stack.Result->allocate((length + 1) * sizeof(Char8)));
7990  assert(Result);
7991 
7992  Char8* ri = Result;
7993 
7994  for (size_t j = 0; j < count; ++j)
7995  for (const Char8* bi = buffer[j].c_str(); *bi; ++bi)
7996  *ri++ = *bi;
7997 
7998  *ri = 0;
7999 
8000  return XPathString(Result, true);
8001  }
8002 
8003  XPathString eval_string(const XPathContext& c, const XPathStack& stack)
8004  {
8005  switch (_type)
8006  {
8007  case ast_string_constant:
8008  return XPathStringConst(_data.string);
8009 
8010  case ast_func_local_Name_0:
8011  {
8012  XPathNode na = c.n;
8013 
8014  return XPathStringConst(local_Name(na));
8015  }
8016 
8017  case ast_func_local_Name_1:
8018  {
8019  XPathAllocatorCapture cr(stack.Result);
8020 
8021  XPathNodeSet_raw ns = _left->eval_NodeSet(c, stack);
8022  XPathNode na = ns.first();
8023 
8024  return XPathStringConst(local_Name(na));
8025  }
8026 
8027  case ast_func_Name_0:
8028  {
8029  XPathNode na = c.n;
8030 
8031  return XPathStringConst(qualified_Name(na));
8032  }
8033 
8034  case ast_func_Name_1:
8035  {
8036  XPathAllocatorCapture cr(stack.Result);
8037 
8038  XPathNodeSet_raw ns = _left->eval_NodeSet(c, stack);
8039  XPathNode na = ns.first();
8040 
8041  return XPathStringConst(qualified_Name(na));
8042  }
8043 
8044  case ast_func_namespace_uri_0:
8045  {
8046  XPathNode na = c.n;
8047 
8048  return XPathStringConst(namespace_uri(na));
8049  }
8050 
8051  case ast_func_namespace_uri_1:
8052  {
8053  XPathAllocatorCapture cr(stack.Result);
8054 
8055  XPathNodeSet_raw ns = _left->eval_NodeSet(c, stack);
8056  XPathNode na = ns.first();
8057 
8058  return XPathStringConst(namespace_uri(na));
8059  }
8060 
8061  case ast_func_string_0:
8062  return string_Value(c.n, stack.Result);
8063 
8064  case ast_func_string_1:
8065  return _left->eval_string(c, stack);
8066 
8067  case ast_func_concat:
8068  return eval_string_concat(c, stack);
8069 
8070  case ast_func_substring_before:
8071  {
8072  XPathAllocatorCapture cr(stack.temp);
8073 
8074  XPathStack swapped_stack = {stack.temp, stack.Result};
8075 
8076  XPathString s = _left->eval_string(c, swapped_stack);
8077  XPathString p = _right->eval_string(c, swapped_stack);
8078 
8079  const Char8* pos = FindSubstring(s.c_str(), p.c_str());
8080 
8081  return pos ? XPathString(s.c_str(), pos, stack.Result) : XPathString();
8082  }
8083 
8084  case ast_func_substring_after:
8085  {
8086  XPathAllocatorCapture cr(stack.temp);
8087 
8088  XPathStack swapped_stack = {stack.temp, stack.Result};
8089 
8090  XPathString s = _left->eval_string(c, swapped_stack);
8091  XPathString p = _right->eval_string(c, swapped_stack);
8092 
8093  const Char8* pos = FindSubstring(s.c_str(), p.c_str());
8094  if (!pos) return XPathString();
8095 
8096  const Char8* Result = pos + p.length();
8097 
8098  return s.uses_heap() ? XPathString(Result, stack.Result) : XPathStringConst(Result);
8099  }
8100 
8101  case ast_func_substring_2:
8102  {
8103  XPathAllocatorCapture cr(stack.temp);
8104 
8105  XPathStack swapped_stack = {stack.temp, stack.Result};
8106 
8107  XPathString s = _left->eval_string(c, swapped_stack);
8108  size_t s_length = s.length();
8109 
8110  double first = round_nearest(_right->eval_number(c, stack));
8111 
8112  if (is_nan(first)) return XPathString(); // NaN
8113  else if (first >= s_length + 1) return XPathString();
8114 
8115  size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
8116  assert(1 <= pos && pos <= s_length + 1);
8117 
8118  const Char8* rbegin = s.c_str() + (pos - 1);
8119 
8120  return s.uses_heap() ? XPathString(rbegin, stack.Result) : XPathStringConst(rbegin);
8121  }
8122 
8123  case ast_func_substring_3:
8124  {
8125  XPathAllocatorCapture cr(stack.temp);
8126 
8127  XPathStack swapped_stack = {stack.temp, stack.Result};
8128 
8129  XPathString s = _left->eval_string(c, swapped_stack);
8130  size_t s_length = s.length();
8131 
8132  double first = round_nearest(_right->eval_number(c, stack));
8133  double last = first + round_nearest(_right->_next->eval_number(c, stack));
8134 
8135  if (is_nan(first) || is_nan(last)) return XPathString();
8136  else if (first >= s_length + 1) return XPathString();
8137  else if (first >= last) return XPathString();
8138  else if (last < 1) return XPathString();
8139 
8140  size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
8141  size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
8142 
8143  assert(1 <= pos && pos <= end && end <= s_length + 1);
8144  const Char8* rbegin = s.c_str() + (pos - 1);
8145  const Char8* rend = s.c_str() + (end - 1);
8146 
8147  return (end == s_length + 1 && !s.uses_heap()) ? XPathStringConst(rbegin) : XPathString(rbegin, rend, stack.Result);
8148  }
8149 
8150  case ast_func_normalize_space_0:
8151  {
8152  XPathString s = string_Value(c.n, stack.Result);
8153 
8154  normalize_space(s.data(stack.Result));
8155 
8156  return s;
8157  }
8158 
8159  case ast_func_normalize_space_1:
8160  {
8161  XPathString s = _left->eval_string(c, stack);
8162 
8163  normalize_space(s.data(stack.Result));
8164 
8165  return s;
8166  }
8167 
8168  case ast_func_translate:
8169  {
8170  XPathAllocatorCapture cr(stack.temp);
8171 
8172  XPathStack swapped_stack = {stack.temp, stack.Result};
8173 
8174  XPathString s = _left->eval_string(c, stack);
8175  XPathString from = _right->eval_string(c, swapped_stack);
8176  XPathString to = _right->_next->eval_string(c, swapped_stack);
8177 
8178  translate(s.data(stack.Result), from.c_str(), to.c_str());
8179 
8180  return s;
8181  }
8182 
8183  case ast_variable:
8184  {
8185  assert(_retType == _data.variable->Type());
8186 
8187  if (_retType == XPathTypeString)
8188  return XPathStringConst(_data.variable->GetString());
8189 
8190  // fallthrough to Type conversion
8191  }
8192 
8193  default:
8194  {
8195  switch (_retType)
8196  {
8197  case XPathTypeBoole:
8198  return XPathStringConst(eval_boolean(c, stack) ? "true" : "false");
8199 
8200  case XPathTypeNumber:
8201  return convert_number_to_string(eval_number(c, stack), stack.Result);
8202 
8203  case XPathTypeNodeSet:
8204  {
8205  XPathAllocatorCapture cr(stack.temp);
8206 
8207  XPathStack swapped_stack = {stack.temp, stack.Result};
8208 
8209  XPathNodeSet_raw ns = eval_NodeSet(c, swapped_stack);
8210  return ns.Empty() ? XPathString() : string_Value(ns.first(), stack.Result);
8211  }
8212 
8213  default:
8214  assert(!"Wrong expression for return Type string");
8215  return XPathString();
8216  }
8217  }
8218  }
8219  }
8220 
8221  XPathNodeSet_raw eval_NodeSet(const XPathContext& c, const XPathStack& stack)
8222  {
8223  switch (_type)
8224  {
8225  case ast_op_union:
8226  {
8227  XPathAllocatorCapture cr(stack.temp);
8228 
8229  XPathStack swapped_stack = {stack.temp, stack.Result};
8230 
8231  XPathNodeSet_raw ls = _left->eval_NodeSet(c, swapped_stack);
8232  XPathNodeSet_raw rs = _right->eval_NodeSet(c, stack);
8233 
8234  // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
8235  rs.SetType(XPathNodeSet::TypeUnsorted);
8236 
8237  rs.append(ls.begin(), ls.end(), stack.Result);
8238  rs.RemoveDuplicates();
8239 
8240  return rs;
8241  }
8242 
8243  case ast_filter:
8244  case ast_filter_posinv:
8245  {
8246  XPathNodeSet_raw set = _left->eval_NodeSet(c, stack);
8247 
8248  // either expression is a number or it contains position() call; sort by document order
8249  if (_type == ast_filter) set.sort_do();
8250 
8251  apply_predicate(set, 0, _right, stack);
8252 
8253  return set;
8254  }
8255 
8256  case ast_func_id:
8257  return XPathNodeSet_raw();
8258 
8259  case ast_step:
8260  {
8261  switch (_axis)
8262  {
8263  case axis_ancestor:
8264  return step_do(c, stack, axis_to_type<axis_ancestor>());
8265 
8266  case axis_ancestor_or_self:
8267  return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
8268 
8269  case axis_attribute:
8270  return step_do(c, stack, axis_to_type<axis_attribute>());
8271 
8272  case axis_GetChild:
8273  return step_do(c, stack, axis_to_type<axis_GetChild>());
8274 
8275  case axis_descendant:
8276  return step_do(c, stack, axis_to_type<axis_descendant>());
8277 
8278  case axis_descendant_or_self:
8279  return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
8280 
8281  case axis_following:
8282  return step_do(c, stack, axis_to_type<axis_following>());
8283 
8284  case axis_following_sibling:
8285  return step_do(c, stack, axis_to_type<axis_following_sibling>());
8286 
8287  case axis_namespace:
8288  // namespaced axis is not supported
8289  return XPathNodeSet_raw();
8290 
8291  case axis_GetParent:
8292  return step_do(c, stack, axis_to_type<axis_GetParent>());
8293 
8294  case axis_preceding:
8295  return step_do(c, stack, axis_to_type<axis_preceding>());
8296 
8297  case axis_preceding_sibling:
8298  return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
8299 
8300  case axis_self:
8301  return step_do(c, stack, axis_to_type<axis_self>());
8302 
8303  default:
8304  assert(!"Unknown axis");
8305  return XPathNodeSet_raw();
8306  }
8307  }
8308 
8309  case ast_step_GetRoot:
8310  {
8311  assert(!_right); // GetRoot step can't have any predicates
8312 
8313  XPathNodeSet_raw ns;
8314 
8315  ns.SetType(XPathNodeSet::TypeSorted);
8316 
8317  if (c.n.GetNode()) ns.push_back(c.n.GetNode().GetRoot(), stack.Result);
8318  else if (c.n.GetAttribute()) ns.push_back(c.n.GetParent().GetRoot(), stack.Result);
8319 
8320  return ns;
8321  }
8322 
8323  case ast_variable:
8324  {
8325  assert(_retType == _data.variable->Type());
8326 
8327  if (_retType == XPathTypeNodeSet)
8328  {
8329  const XPathNodeSet& s = _data.variable->GetNodeSet();
8330 
8331  XPathNodeSet_raw ns;
8332 
8333  ns.SetType(s.Type());
8334  ns.append(s.begin(), s.end(), stack.Result);
8335 
8336  return ns;
8337  }
8338 
8339  // fallthrough to Type conversion
8340  }
8341 
8342  default:
8343  assert(!"Wrong expression for return Type node set");
8344  return XPathNodeSet_raw();
8345  }
8346  }
8347 
8348  bool is_posinv()
8349  {
8350  switch (_type)
8351  {
8352  case ast_func_position:
8353  return false;
8354 
8355  case ast_string_constant:
8356  case ast_number_constant:
8357  case ast_variable:
8358  return true;
8359 
8360  case ast_step:
8361  case ast_step_GetRoot:
8362  return true;
8363 
8364  case ast_predicate:
8365  case ast_filter:
8366  case ast_filter_posinv:
8367  return true;
8368 
8369  default:
8370  if (_left && !_left->is_posinv()) return false;
8371 
8372  for (XPathAstNode* n = _right; n; n = n->_next)
8373  if (!n->is_posinv()) return false;
8374 
8375  return true;
8376  }
8377  }
8378 
8379  XPathValueType retType() const
8380  {
8381  return static_cast<XPathValueType>(_retType);
8382  }
8383  };
8384 
8385  struct XPathParser
8386  {
8387  XPathAllocator* _alloc;
8388  XPathLexer _lexer;
8389 
8390  const Char8* _query;
8391  XPathVariableSet* _variables;
8392 
8393  XPathParseResult* _Result;
8394 
8395  void throw_error(const char* message)
8396  {
8397  _Result->error = message;
8398  _Result->Offset = _lexer.current_pos() - _query;
8399 
8400 
8401  }
8402 
8403  void throw_error_oom()
8404  {
8405  throw std::bad_alloc();
8406  }
8407 
8408  void* alloc_node()
8409  {
8410  void* Result = _alloc->allocate_nothrow(sizeof(XPathAstNode));
8411 
8412  if (!Result) throw_error_oom();
8413 
8414  return Result;
8415  }
8416 
8417  const Char8* alloc_string(const XPathLexerString& Value)
8418  {
8419  if (Value.begin)
8420  {
8421  size_t length = static_cast<size_t>(Value.end - Value.begin);
8422 
8423  Char8* c = static_cast<Char8*>(_alloc->allocate_nothrow((length + 1) * sizeof(Char8)));
8424  if (!c) throw_error_oom();
8425 
8426  memcpy(c, Value.begin, length * sizeof(Char8));
8427  c[length] = 0;
8428 
8429  return c;
8430  }
8431  else return 0;
8432  }
8433 
8434  XPathAstNode* ParseFunctionHelper(ast_type_t Type0, ast_type_t Type1, size_t argc, XPathAstNode* args[2])
8435  {
8436  assert(argc <= 1);
8437 
8438  if (argc == 1 && args[0]->retType() != XPathTypeNodeSet) throw_error("Function has to be applied to node set");
8439 
8440  return new (alloc_node()) XPathAstNode(argc == 0 ? Type0 : Type1, XPathTypeString, args[0]);
8441  }
8442 
8443  XPathAstNode* ParseFunction(const XPathLexerString& Name, size_t argc, XPathAstNode* args[2])
8444  {
8445  switch (Name.begin[0])
8446  {
8447  case 'b':
8448  if (Name == "boolean" && argc == 1)
8449  return new (alloc_node()) XPathAstNode(ast_func_boolean, XPathTypeBoole, args[0]);
8450 
8451  break;
8452 
8453  case 'c':
8454  if (Name == "count" && argc == 1)
8455  {
8456  if (args[0]->retType() != XPathTypeNodeSet) throw_error("Function has to be applied to node set");
8457  return new (alloc_node()) XPathAstNode(ast_func_count, XPathTypeNumber, args[0]);
8458  }
8459  else if (Name == "contains" && argc == 2)
8460  return new (alloc_node()) XPathAstNode(ast_func_contains, XPathTypeString, args[0], args[1]);
8461  else if (Name == "concat" && argc >= 2)
8462  return new (alloc_node()) XPathAstNode(ast_func_concat, XPathTypeString, args[0], args[1]);
8463  else if (Name == "ceiling" && argc == 1)
8464  return new (alloc_node()) XPathAstNode(ast_func_ceiling, XPathTypeNumber, args[0]);
8465 
8466  break;
8467 
8468  case 'f':
8469  if (Name == "false" && argc == 0)
8470  return new (alloc_node()) XPathAstNode(ast_func_false, XPathTypeBoole);
8471  else if (Name == "floor" && argc == 1)
8472  return new (alloc_node()) XPathAstNode(ast_func_floor, XPathTypeNumber, args[0]);
8473 
8474  break;
8475 
8476  case 'i':
8477  if (Name == "id" && argc == 1)
8478  return new (alloc_node()) XPathAstNode(ast_func_id, XPathTypeNodeSet, args[0]);
8479 
8480  break;
8481 
8482  case 'l':
8483  if (Name == "last" && argc == 0)
8484  return new (alloc_node()) XPathAstNode(ast_func_last, XPathTypeNumber);
8485  else if (Name == "lang" && argc == 1)
8486  return new (alloc_node()) XPathAstNode(ast_func_lang, XPathTypeBoole, args[0]);
8487  else if (Name == "local-Name" && argc <= 1)
8488  return ParseFunctionHelper(ast_func_local_Name_0, ast_func_local_Name_1, argc, args);
8489 
8490  break;
8491 
8492  case 'n':
8493  if (Name == "Name" && argc <= 1)
8494  return ParseFunctionHelper(ast_func_Name_0, ast_func_Name_1, argc, args);
8495  else if (Name == "namespace-uri" && argc <= 1)
8496  return ParseFunctionHelper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
8497  else if (Name == "normalize-space" && argc <= 1)
8498  return new (alloc_node()) XPathAstNode(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, XPathTypeString, args[0], args[1]);
8499  else if (Name == "not" && argc == 1)
8500  return new (alloc_node()) XPathAstNode(ast_func_not, XPathTypeBoole, args[0]);
8501  else if (Name == "number" && argc <= 1)
8502  return new (alloc_node()) XPathAstNode(argc == 0 ? ast_func_number_0 : ast_func_number_1, XPathTypeNumber, args[0]);
8503 
8504  break;
8505 
8506  case 'p':
8507  if (Name == "position" && argc == 0)
8508  return new (alloc_node()) XPathAstNode(ast_func_position, XPathTypeNumber);
8509 
8510  break;
8511 
8512  case 'r':
8513  if (Name == "round" && argc == 1)
8514  return new (alloc_node()) XPathAstNode(ast_func_round, XPathTypeNumber, args[0]);
8515 
8516  break;
8517 
8518  case 's':
8519  if (Name == "string" && argc <= 1)
8520  return new (alloc_node()) XPathAstNode(argc == 0 ? ast_func_string_0 : ast_func_string_1, XPathTypeString, args[0]);
8521  else if (Name == "string-length" && argc <= 1)
8522  return new (alloc_node()) XPathAstNode(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, XPathTypeString, args[0]);
8523  else if (Name == "starts-with" && argc == 2)
8524  return new (alloc_node()) XPathAstNode(ast_func_starts_with, XPathTypeBoole, args[0], args[1]);
8525  else if (Name == "substring-before" && argc == 2)
8526  return new (alloc_node()) XPathAstNode(ast_func_substring_before, XPathTypeString, args[0], args[1]);
8527  else if (Name == "substring-after" && argc == 2)
8528  return new (alloc_node()) XPathAstNode(ast_func_substring_after, XPathTypeString, args[0], args[1]);
8529  else if (Name == "substring" && (argc == 2 || argc == 3))
8530  return new (alloc_node()) XPathAstNode(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, XPathTypeString, args[0], args[1]);
8531  else if (Name == "sum" && argc == 1)
8532  {
8533  if (args[0]->retType() != XPathTypeNodeSet) throw_error("Function has to be applied to node set");
8534  return new (alloc_node()) XPathAstNode(ast_func_sum, XPathTypeNumber, args[0]);
8535  }
8536 
8537  break;
8538 
8539  case 't':
8540  if (Name == "translate" && argc == 3)
8541  return new (alloc_node()) XPathAstNode(ast_func_translate, XPathTypeString, args[0], args[1]);
8542  else if (Name == "true" && argc == 0)
8543  return new (alloc_node()) XPathAstNode(ast_func_true, XPathTypeBoole);
8544 
8545  break;
8546 
8547  default:
8548  break;
8549  }
8550 
8551  throw_error("Unrecognized function or wrong parameter count");
8552 
8553  return 0;
8554  }
8555 
8556  axis_t ParseAxisName(const XPathLexerString& Name, bool& specified)
8557  {
8558  specified = true;
8559 
8560  switch (Name.begin[0])
8561  {
8562  case 'a':
8563  if (Name == "ancestor")
8564  return axis_ancestor;
8565  else if (Name == "ancestor-or-self")
8566  return axis_ancestor_or_self;
8567  else if (Name == "GetAttribute")
8568  return axis_attribute;
8569 
8570  break;
8571 
8572  case 'c':
8573  if (Name == "GetChild")
8574  return axis_GetChild;
8575 
8576  break;
8577 
8578  case 'd':
8579  if (Name == "descendant")
8580  return axis_descendant;
8581  else if (Name == "descendant-or-self")
8582  return axis_descendant_or_self;
8583 
8584  break;
8585 
8586  case 'f':
8587  if (Name == "following")
8588  return axis_following;
8589  else if (Name == "following-sibling")
8590  return axis_following_sibling;
8591 
8592  break;
8593 
8594  case 'n':
8595  if (Name == "namespace")
8596  return axis_namespace;
8597 
8598  break;
8599 
8600  case 'p':
8601  if (Name == "GetParent")
8602  return axis_GetParent;
8603  else if (Name == "preceding")
8604  return axis_preceding;
8605  else if (Name == "preceding-sibling")
8606  return axis_preceding_sibling;
8607 
8608  break;
8609 
8610  case 's':
8611  if (Name == "self")
8612  return axis_self;
8613 
8614  break;
8615 
8616  default:
8617  break;
8618  }
8619 
8620  specified = false;
8621  return axis_GetChild;
8622  }
8623 
8624  nodetest_t ParseNodeTest_type(const XPathLexerString& Name)
8625  {
8626  switch (Name.begin[0])
8627  {
8628  case 'c':
8629  if (Name == "comment")
8630  return nodetest_type_comment;
8631 
8632  break;
8633 
8634  case 'n':
8635  if (Name == "node")
8636  return nodetest_type_node;
8637 
8638  break;
8639 
8640  case 'p':
8641  if (Name == "processing-instruction")
8642  return nodetest_type_pi;
8643 
8644  break;
8645 
8646  case 't':
8647  if (Name == "text")
8648  return nodetest_type_text;
8649 
8650  break;
8651 
8652  default:
8653  break;
8654  }
8655 
8656  return nodetest_none;
8657  }
8658 
8659  // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
8660  XPathAstNode* ParsePrimaryExpression()
8661  {
8662  switch (_lexer.current())
8663  {
8664  case lex_var_ref:
8665  {
8666  XPathLexerString Name = _lexer.contents();
8667 
8668  if (!_variables)
8669  throw_error("Unknown variable: variable set is not provided");
8670 
8671  XPathVariable* var = GetVariable(_variables, Name.begin, Name.end);
8672 
8673  if (!var)
8674  throw_error("Unknown variable: variable set does not contain the given Name");
8675 
8676  _lexer.next();
8677 
8678  return new (alloc_node()) XPathAstNode(ast_variable, var->Type(), var);
8679  }
8680 
8681  case lex_open_brace:
8682  {
8683  _lexer.next();
8684 
8685  XPathAstNode* n = ParseExpression();
8686 
8687  if (_lexer.current() != lex_close_brace)
8688  throw_error("Unmatched braces");
8689 
8690  _lexer.next();
8691 
8692  return n;
8693  }
8694 
8695  case lex_quoted_string:
8696  {
8697  const Char8* Value = alloc_string(_lexer.contents());
8698 
8699  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_string_constant, XPathTypeString, Value);
8700  _lexer.next();
8701 
8702  return n;
8703  }
8704 
8705  case lex_number:
8706  {
8707  double Value = 0;
8708 
8709  if (!convert_Stringo_number(_lexer.contents().begin, _lexer.contents().end, &Value))
8710  throw_error_oom();
8711 
8712  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_number_constant, XPathTypeNumber, Value);
8713  _lexer.next();
8714 
8715  return n;
8716  }
8717 
8718  case lex_string:
8719  {
8720  XPathAstNode* args[2] = {0};
8721  size_t argc = 0;
8722 
8723  XPathLexerString function = _lexer.contents();
8724  _lexer.next();
8725 
8726  XPathAstNode* LastArg = 0;
8727 
8728  if (_lexer.current() != lex_open_brace)
8729  throw_error("Unrecognized function call");
8730  _lexer.next();
8731 
8732  if (_lexer.current() != lex_close_brace)
8733  args[argc++] = ParseExpression();
8734 
8735  while (_lexer.current() != lex_close_brace)
8736  {
8737  if (_lexer.current() != lex_comma)
8738  throw_error("No comma between function arguments");
8739  _lexer.next();
8740 
8741  XPathAstNode* n = ParseExpression();
8742 
8743  if (argc < 2) args[argc] = n;
8744  else LastArg->SetNext(n);
8745 
8746  argc++;
8747  LastArg = n;
8748  }
8749 
8750  _lexer.next();
8751 
8752  return ParseFunction(function, argc, args);
8753  }
8754 
8755  default:
8756  throw_error("Unrecognizable primary expression");
8757 
8758  return 0;
8759  }
8760  }
8761 
8762  // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
8763  // Predicate ::= '[' PredicateExpr ']'
8764  // PredicateExpr ::= Expr
8765  XPathAstNode* ParseFilterExpression()
8766  {
8767  XPathAstNode* n = ParsePrimaryExpression();
8768 
8769  while (_lexer.current() == lex_open_square_brace)
8770  {
8771  _lexer.next();
8772 
8773  XPathAstNode* expr = ParseExpression();
8774 
8775  if (n->retType() != XPathTypeNodeSet) throw_error("Predicate has to be applied to node set");
8776 
8777  bool posinv = expr->retType() != XPathTypeNumber && expr->is_posinv();
8778 
8779  n = new (alloc_node()) XPathAstNode(posinv ? ast_filter_posinv : ast_filter, XPathTypeNodeSet, n, expr);
8780 
8781  if (_lexer.current() != lex_close_square_brace)
8782  throw_error("Unmatched square brace");
8783 
8784  _lexer.next();
8785  }
8786 
8787  return n;
8788  }
8789 
8790  // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
8791  // AxisSpecifier ::= AxisName '::' | '@'?
8792  // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
8793  // NameTest ::= '*' | NCName ':' '*' | QName
8794  // AbbreviatedStep ::= '.' | '..'
8795  XPathAstNode* ParseStep(XPathAstNode* set)
8796  {
8797  if (set && set->retType() != XPathTypeNodeSet)
8798  throw_error("Step has to be applied to node set");
8799 
8800  bool axis_specified = false;
8801  axis_t axis = axis_GetChild; // implied GetChild axis
8802 
8803  if (_lexer.current() == lex_axis_attribute)
8804  {
8805  axis = axis_attribute;
8806  axis_specified = true;
8807 
8808  _lexer.next();
8809  }
8810  else if (_lexer.current() == lex_dot)
8811  {
8812  _lexer.next();
8813 
8814  return new (alloc_node()) XPathAstNode(ast_step, set, axis_self, nodetest_type_node, 0);
8815  }
8816  else if (_lexer.current() == lex_double_dot)
8817  {
8818  _lexer.next();
8819 
8820  return new (alloc_node()) XPathAstNode(ast_step, set, axis_GetParent, nodetest_type_node, 0);
8821  }
8822 
8823  nodetest_t nt_type = nodetest_none;
8824  XPathLexerString nt_Name;
8825 
8826  if (_lexer.current() == lex_string)
8827  {
8828  // node Name test
8829  nt_Name = _lexer.contents();
8830  _lexer.next();
8831 
8832  // was it an axis Name?
8833  if (_lexer.current() == lex_double_colon)
8834  {
8835  // parse axis name
8836  if (axis_specified) throw_error("Two axis specifiers in one step");
8837 
8838  axis = ParseAxisName(nt_Name, axis_specified);
8839 
8840  if (!axis_specified) throw_error("Unknown axis");
8841 
8842  // read actual node test
8843  _lexer.next();
8844 
8845  if (_lexer.current() == lex_multiply)
8846  {
8847  nt_type = nodetest_all;
8848  nt_Name = XPathLexerString();
8849  _lexer.next();
8850  }
8851  else if (_lexer.current() == lex_string)
8852  {
8853  nt_Name = _lexer.contents();
8854  _lexer.next();
8855  }
8856  else throw_error("Unrecognized node test");
8857  }
8858 
8859  if (nt_type == nodetest_none)
8860  {
8861  // node Type test or processing-instruction
8862  if (_lexer.current() == lex_open_brace)
8863  {
8864  _lexer.next();
8865 
8866  if (_lexer.current() == lex_close_brace)
8867  {
8868  _lexer.next();
8869 
8870  nt_type = ParseNodeTest_type(nt_Name);
8871 
8872  if (nt_type == nodetest_none) throw_error("Unrecognized node Type");
8873 
8874  nt_Name = XPathLexerString();
8875  }
8876  else if (nt_Name == "processing-instruction")
8877  {
8878  if (_lexer.current() != lex_quoted_string)
8879  throw_error("Only literals are allowed as arguments to processing-instruction()");
8880 
8881  nt_type = nodetest_pi;
8882  nt_Name = _lexer.contents();
8883  _lexer.next();
8884 
8885  if (_lexer.current() != lex_close_brace)
8886  throw_error("Unmatched brace near processing-instruction()");
8887  _lexer.next();
8888  }
8889  else
8890  throw_error("Unmatched brace near node Type test");
8891 
8892  }
8893  // QName or NCName:*
8894  else
8895  {
8896  if (nt_Name.end - nt_Name.begin > 2 && nt_Name.end[-2] == ':' && nt_Name.end[-1] == '*') // NCName:*
8897  {
8898  nt_Name.end--; // erase *
8899 
8900  nt_type = nodetest_all_in_namespace;
8901  }
8902  else nt_type = nodetest_Name;
8903  }
8904  }
8905  }
8906  else if (_lexer.current() == lex_multiply)
8907  {
8908  nt_type = nodetest_all;
8909  _lexer.next();
8910  }
8911  else throw_error("Unrecognized node test");
8912 
8913  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_step, set, axis, nt_type, alloc_string(nt_Name));
8914 
8915  XPathAstNode* last = 0;
8916 
8917  while (_lexer.current() == lex_open_square_brace)
8918  {
8919  _lexer.next();
8920 
8921  XPathAstNode* expr = ParseExpression();
8922 
8923  XPathAstNode* pred = new (alloc_node()) XPathAstNode(ast_predicate, XPathTypeNodeSet, expr);
8924 
8925  if (_lexer.current() != lex_close_square_brace)
8926  throw_error("Unmatched square brace");
8927  _lexer.next();
8928 
8929  if (last) last->SetNext(pred);
8930  else n->SetRight(pred);
8931 
8932  last = pred;
8933  }
8934 
8935  return n;
8936  }
8937 
8938  // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
8939  XPathAstNode* ParseRelativeLocation_Path(XPathAstNode* set)
8940  {
8941  XPathAstNode* n = ParseStep(set);
8942 
8943  while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
8944  {
8945  lexeme_t l = _lexer.current();
8946  _lexer.next();
8947 
8948  if (l == lex_double_slash)
8949  n = new (alloc_node()) XPathAstNode(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8950 
8951  n = ParseStep(n);
8952  }
8953 
8954  return n;
8955  }
8956 
8957  // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
8958  // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
8959  XPathAstNode* ParseLocationPath()
8960  {
8961  if (_lexer.current() == lex_slash)
8962  {
8963  _lexer.next();
8964 
8965  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_step_GetRoot, XPathTypeNodeSet);
8966 
8967  // relative location Path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone GetRoot Path
8968  lexeme_t l = _lexer.current();
8969 
8970  if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
8971  return ParseRelativeLocation_Path(n);
8972  else
8973  return n;
8974  }
8975  else if (_lexer.current() == lex_double_slash)
8976  {
8977  _lexer.next();
8978 
8979  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_step_GetRoot, XPathTypeNodeSet);
8980  n = new (alloc_node()) XPathAstNode(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8981 
8982  return ParseRelativeLocation_Path(n);
8983  }
8984 
8985  // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
8986  return ParseRelativeLocation_Path(0);
8987  }
8988 
8989  // PathExpr ::= LocationPath
8990  // | FilterExpr
8991  // | FilterExpr '/' RelativeLocationPath
8992  // | FilterExpr '//' RelativeLocationPath
8993  XPathAstNode* ParsePathExpression()
8994  {
8995  // Clarification.
8996  // PathExpr begins with either LocationPath or FilterExpr.
8997  // FilterExpr begins with PrimaryExpr
8998  // PrimaryExpr begins with '$' in case of it being a variable reference,
8999  // '(' in case of it being an expression, string literal, number constant or
9000  // function call.
9001 
9002  if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
9003  _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
9004  _lexer.current() == lex_string)
9005  {
9006  if (_lexer.current() == lex_string)
9007  {
9008  // This is either a function call, or not - if not, we shall proceed with location Path
9009  const Char8* state = _lexer.state();
9010 
9011  while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
9012 
9013  if (*state != '(') return ParseLocationPath();
9014 
9015  // This looks like a function call; however this still can be a node-test. Check it.
9016  if (ParseNodeTest_type(_lexer.contents()) != nodetest_none) return ParseLocationPath();
9017  }
9018 
9019  XPathAstNode* n = ParseFilterExpression();
9020 
9021  if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
9022  {
9023  lexeme_t l = _lexer.current();
9024  _lexer.next();
9025 
9026  if (l == lex_double_slash)
9027  {
9028  if (n->retType() != XPathTypeNodeSet) throw_error("Step has to be applied to node set");
9029 
9030  n = new (alloc_node()) XPathAstNode(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
9031  }
9032 
9033  // select from location Path
9034  return ParseRelativeLocation_Path(n);
9035  }
9036 
9037  return n;
9038  }
9039  else return ParseLocationPath();
9040  }
9041 
9042  // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
9043  XPathAstNode* ParseUnionExpression()
9044  {
9045  XPathAstNode* n = ParsePathExpression();
9046 
9047  while (_lexer.current() == lex_union)
9048  {
9049  _lexer.next();
9050 
9051  XPathAstNode* expr = ParseUnionExpression();
9052 
9053  if (n->retType() != XPathTypeNodeSet || expr->retType() != XPathTypeNodeSet)
9054  throw_error("Union operator has to be applied to node sets");
9055 
9056  n = new (alloc_node()) XPathAstNode(ast_op_union, XPathTypeNodeSet, n, expr);
9057  }
9058 
9059  return n;
9060  }
9061 
9062  // UnaryExpr ::= UnionExpr | '-' UnaryExpr
9063  XPathAstNode* ParseUnaryExpression()
9064  {
9065  if (_lexer.current() == lex_minus)
9066  {
9067  _lexer.next();
9068 
9069  XPathAstNode* expr = ParseUnaryExpression();
9070 
9071  return new (alloc_node()) XPathAstNode(ast_op_negate, XPathTypeNumber, expr);
9072  }
9073  else return ParseUnionExpression();
9074  }
9075 
9076  // MultiplicativeExpr ::= UnaryExpr
9077  // | MultiplicativeExpr '*' UnaryExpr
9078  // | MultiplicativeExpr 'div' UnaryExpr
9079  // | MultiplicativeExpr 'mod' UnaryExpr
9080  XPathAstNode* ParseMultiplicativeExpression()
9081  {
9082  XPathAstNode* n = ParseUnaryExpression();
9083 
9084  while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
9085  (_lexer.contents() == "mod" || _lexer.contents() == "div")))
9086  {
9087  ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
9088  _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
9089  _lexer.next();
9090 
9091  XPathAstNode* expr = ParseUnaryExpression();
9092 
9093  n = new (alloc_node()) XPathAstNode(op, XPathTypeNumber, n, expr);
9094  }
9095 
9096  return n;
9097  }
9098 
9099  // AdditiveExpr ::= MultiplicativeExpr
9100  // | AdditiveExpr '+' MultiplicativeExpr
9101  // | AdditiveExpr '-' MultiplicativeExpr
9102  XPathAstNode* ParseAdditiveExpression()
9103  {
9104  XPathAstNode* n = ParseMultiplicativeExpression();
9105 
9106  while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
9107  {
9108  lexeme_t l = _lexer.current();
9109 
9110  _lexer.next();
9111 
9112  XPathAstNode* expr = ParseMultiplicativeExpression();
9113 
9114  n = new (alloc_node()) XPathAstNode(l == lex_plus ? ast_op_add : ast_op_subtract, XPathTypeNumber, n, expr);
9115  }
9116 
9117  return n;
9118  }
9119 
9120  // RelationalExpr ::= AdditiveExpr
9121  // | RelationalExpr '<' AdditiveExpr
9122  // | RelationalExpr '>' AdditiveExpr
9123  // | RelationalExpr '<=' AdditiveExpr
9124  // | RelationalExpr '>=' AdditiveExpr
9125  XPathAstNode* ParseRelationalExpression()
9126  {
9127  XPathAstNode* n = ParseAdditiveExpression();
9128 
9129  while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
9130  _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
9131  {
9132  lexeme_t l = _lexer.current();
9133  _lexer.next();
9134 
9135  XPathAstNode* expr = ParseAdditiveExpression();
9136 
9137  n = new (alloc_node()) XPathAstNode(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
9138  l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, XPathTypeBoole, n, expr);
9139  }
9140 
9141  return n;
9142  }
9143 
9144  // EqualityExpr ::= RelationalExpr
9145  // | EqualityExpr '=' RelationalExpr
9146  // | EqualityExpr '!=' RelationalExpr
9147  XPathAstNode* ParseEqualityExpression()
9148  {
9149  XPathAstNode* n = ParseRelationalExpression();
9150 
9151  while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
9152  {
9153  lexeme_t l = _lexer.current();
9154 
9155  _lexer.next();
9156 
9157  XPathAstNode* expr = ParseRelationalExpression();
9158 
9159  n = new (alloc_node()) XPathAstNode(l == lex_equal ? ast_op_equal : ast_op_not_equal, XPathTypeBoole, n, expr);
9160  }
9161 
9162  return n;
9163  }
9164 
9165  // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
9166  XPathAstNode* ParseAndExpression()
9167  {
9168  XPathAstNode* n = ParseEqualityExpression();
9169 
9170  while (_lexer.current() == lex_string && _lexer.contents() == "and")
9171  {
9172  _lexer.next();
9173 
9174  XPathAstNode* expr = ParseEqualityExpression();
9175 
9176  n = new (alloc_node()) XPathAstNode(ast_op_and, XPathTypeBoole, n, expr);
9177  }
9178 
9179  return n;
9180  }
9181 
9182  // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
9183  XPathAstNode* ParseOrExpression()
9184  {
9185  XPathAstNode* n = ParseAndExpression();
9186 
9187  while (_lexer.current() == lex_string && _lexer.contents() == "or")
9188  {
9189  _lexer.next();
9190 
9191  XPathAstNode* expr = ParseAndExpression();
9192 
9193  n = new (alloc_node()) XPathAstNode(ast_op_or, XPathTypeBoole, n, expr);
9194  }
9195 
9196  return n;
9197  }
9198 
9199  // Expr ::= OrExpr
9200  XPathAstNode* ParseExpression()
9201  {
9202  return ParseOrExpression();
9203  }
9204 
9205  XPathParser(const Char8* query, XPathVariableSet* variables, XPathAllocator* alloc, XPathParseResult* Result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _Result(Result)
9206  {
9207  }
9208 
9209  XPathAstNode* parse()
9210  {
9211  XPathAstNode* Result = ParseExpression();
9212 
9213  if (_lexer.current() != lex_eof)
9214  {
9215  // there are still unparsed tokens left, error
9216  throw_error("Incorrect query");
9217  }
9218 
9219  return Result;
9220  }
9221 
9222  static XPathAstNode* parse(const Char8* query, XPathVariableSet* variables, XPathAllocator* alloc, XPathParseResult* Result)
9223  {
9224  XPathParser parser(query, variables, alloc, Result);
9225 
9226  return parser.parse();
9227  }
9228  };
9229 
9230  struct XPathQueryImpl
9231  {
9232  static XPathQueryImpl* create()
9233  {
9234  void* memory = Memory::allocate(sizeof(XPathQueryImpl));
9235 
9236  return new (memory) XPathQueryImpl();
9237  }
9238 
9239  static void destroy(void* ptr)
9240  {
9241  if (!ptr) return;
9242 
9243  // free all allocated pages
9244  static_cast<XPathQueryImpl*>(ptr)->alloc.release();
9245 
9246  // free allocator memory (with the first page)
9247  Memory::deallocate(ptr);
9248  }
9249 
9250  XPathQueryImpl(): GetRoot(0), alloc(&block)
9251  {
9252  block.next = 0;
9253  }
9254 
9255  XPathAstNode* GetRoot;
9256  XPathAllocator alloc;
9257  XPathMemoryBlock block;
9258  };
9259 
9260  PUGI__FN XPathString EvaluateString_impl(XPathQueryImpl* impl, const XPathNode& n, XPathStackData& sd)
9261  {
9262  if (!impl) return XPathString();
9263 
9264  XPathContext c(n, 1, 1);
9265 
9266  return impl->GetRoot->eval_string(c, sd.stack);
9267  }
9268 PUGI__NS_END
9269 
9270 namespace XML
9271 {
9272  PUGI__FN XPathNode::XPathNode()
9273  {
9274  }
9275 
9276  PUGI__FN XPathNode::XPathNode(const Node& Node): TargetNode(Node)
9277  {
9278  }
9279 
9280  PUGI__FN XPathNode::XPathNode(const Attribute& attribute_, const Node& GetParent_): TargetNode(attribute_ ? GetParent_ : Node()), _attribute(attribute_)
9281  {
9282  }
9283 
9284  PUGI__FN Node XPathNode::GetNode() const
9285  {
9286  return _attribute ? Node() : TargetNode;
9287  }
9288 
9289  PUGI__FN Attribute XPathNode::GetAttribute() const
9290  {
9291  return _attribute;
9292  }
9293 
9294  PUGI__FN Node XPathNode::GetParent() const
9295  {
9296  return _attribute ? TargetNode : TargetNode.GetParent();
9297  }
9298 
9299  PUGI__FN static void unspecified_bool_XPathNode(XPathNode***)
9300  {
9301  }
9302 
9303  PUGI__FN XPathNode::operator XPathNode::unspecified_bool_type() const
9304  {
9305  return (TargetNode || _attribute) ? unspecified_bool_XPathNode : 0;
9306  }
9307 
9308  PUGI__FN bool XPathNode::operator!() const
9309  {
9310  return !(TargetNode || _attribute);
9311  }
9312 
9313  PUGI__FN bool XPathNode::operator==(const XPathNode& n) const
9314  {
9315  return TargetNode == n.TargetNode && _attribute == n._attribute;
9316  }
9317 
9318  PUGI__FN bool XPathNode::operator!=(const XPathNode& n) const
9319  {
9320  return TargetNode != n.TargetNode || _attribute != n._attribute;
9321  }
9322 
9323 #ifdef __BORLANDC__
9324  PUGI__FN bool operator&&(const XPathNode& lhs, bool rhs)
9325  {
9326  return (bool)lhs && rhs;
9327  }
9328 
9329  PUGI__FN bool operator||(const XPathNode& lhs, bool rhs)
9330  {
9331  return (bool)lhs || rhs;
9332  }
9333 #endif
9334 
9335  PUGI__FN void XPathNodeSet::_assign(const_iterator begin_, const_iterator end_)
9336  {
9337  assert(begin_ <= end_);
9338 
9339  size_t size_ = static_cast<size_t>(end_ - begin_);
9340 
9341  if (size_ <= 1)
9342  {
9343  // deallocate old buffer
9344  if (Begin != &Storage) internal::Memory::deallocate(Begin);
9345 
9346  // use internal buffer
9347  if (begin_ != end_) Storage = *begin_;
9348 
9349  Begin = &Storage;
9350  End = &Storage + size_;
9351  }
9352  else
9353  {
9354  // make heap copy
9355  XPathNode* storage = static_cast<XPathNode*>(internal::Memory::allocate(size_ * sizeof(XPathNode)));
9356 
9357  if (!storage)
9358  {
9359  throw std::bad_alloc();
9360  }
9361 
9362  memcpy(storage, begin_, size_ * sizeof(XPathNode));
9363 
9364  // deallocate old buffer
9365  if (Begin != &Storage) internal::Memory::deallocate(Begin);
9366 
9367  // finalize
9368  Begin = storage;
9369  End = storage + size_;
9370  }
9371  }
9372 
9373  PUGI__FN XPathNodeSet::XPathNodeSet(): TypeOrder(TypeUnsorted), Begin(&Storage), End(&Storage)
9374  {
9375  }
9376 
9377  PUGI__FN XPathNodeSet::XPathNodeSet(const_iterator begin_, const_iterator end_, CollectionType Type_): TypeOrder(Type_), Begin(&Storage), End(&Storage)
9378  {
9379  _assign(begin_, end_);
9380  }
9381 
9382  PUGI__FN XPathNodeSet::~XPathNodeSet()
9383  {
9384  if (Begin != &Storage) internal::Memory::deallocate(Begin);
9385  }
9386 
9387  PUGI__FN XPathNodeSet::XPathNodeSet(const XPathNodeSet& ns): TypeOrder(ns.TypeOrder), Begin(&Storage), End(&Storage)
9388  {
9389  _assign(ns.Begin, ns.End);
9390  }
9391 
9392  PUGI__FN XPathNodeSet& XPathNodeSet::operator=(const XPathNodeSet& ns)
9393  {
9394  if (this == &ns) return *this;
9395 
9396  TypeOrder = ns.TypeOrder;
9397  _assign(ns.Begin, ns.End);
9398 
9399  return *this;
9400  }
9401 
9403  {
9404  return TypeOrder;
9405  }
9406 
9407  PUGI__FN size_t XPathNodeSet::size() const
9408  {
9409  return End - Begin;
9410  }
9411 
9412  PUGI__FN bool XPathNodeSet::Empty() const
9413  {
9414  return Begin == End;
9415  }
9416 
9417  PUGI__FN const XPathNode& XPathNodeSet::operator[](size_t index) const
9418  {
9419  assert(index < size());
9420  return Begin[index];
9421  }
9422 
9424  {
9425  return Begin;
9426  }
9427 
9429  {
9430  return End;
9431  }
9432 
9433  PUGI__FN void XPathNodeSet::sort(bool reverse)
9434  {
9435  TypeOrder = internal::XPathSort(Begin, End, TypeOrder, reverse);
9436  }
9437 
9438  PUGI__FN XPathNode XPathNodeSet::first() const
9439  {
9440  return internal::XPathFirst(Begin, End, TypeOrder);
9441  }
9442 
9443  PUGI__FN XPathParseResult::XPathParseResult(): error("Internal error"), Offset(0)
9444  {
9445  }
9446 
9447  PUGI__FN XPathParseResult::operator bool() const
9448  {
9449  return error == 0;
9450  }
9451 
9452  PUGI__FN const char* XPathParseResult::Description() const
9453  {
9454  return error ? error : "No error";
9455  }
9456 
9457  PUGI__FN XPathVariable::XPathVariable()
9458  {
9459  }
9460 
9461  PUGI__FN const Char8* XPathVariable::Name() const
9462  {
9463  switch (ValueType)
9464  {
9465  case XPathTypeNodeSet:
9466  return static_cast<const internal::XPathVariableNodeSet*>(this)->Name;
9467 
9468  case XPathTypeNumber:
9469  return static_cast<const internal::XPathVariableNumber*>(this)->Name;
9470 
9471  case XPathTypeString:
9472  return static_cast<const internal::XPathVariableString*>(this)->Name;
9473 
9474  case XPathTypeBoole:
9475  return static_cast<const internal::XPathVariableBoole*>(this)->Name;
9476 
9477  default:
9478  assert(!"Invalid variable Type");
9479  return 0;
9480  }
9481  }
9482 
9483  PUGI__FN XPathValueType XPathVariable::Type() const
9484  {
9485  return ValueType;
9486  }
9487 
9488  PUGI__FN bool XPathVariable::GetBoole() const
9489  {
9490  return (ValueType == XPathTypeBoole) ? static_cast<const internal::XPathVariableBoole*>(this)->Value : false;
9491  }
9492 
9493  PUGI__FN double XPathVariable::GetNumber() const
9494  {
9495  return (ValueType == XPathTypeNumber) ? static_cast<const internal::XPathVariableNumber*>(this)->Value : internal::gen_nan();
9496  }
9497 
9498  PUGI__FN const Char8* XPathVariable::GetString() const
9499  {
9500  const Char8* Value = (ValueType == XPathTypeString) ? static_cast<const internal::XPathVariableString*>(this)->Value : 0;
9501  return Value ? Value : "";
9502  }
9503 
9504  PUGI__FN const XPathNodeSet& XPathVariable::GetNodeSet() const
9505  {
9506  return (ValueType == XPathTypeNodeSet) ? static_cast<const internal::XPathVariableNodeSet*>(this)->Value : internal::dummy_NodeSet;
9507  }
9508 
9509  PUGI__FN bool XPathVariable::Set(bool Value)
9510  {
9511  if (ValueType != XPathTypeBoole) return false;
9512 
9513  static_cast<internal::XPathVariableBoole*>(this)->Value = Value;
9514  return true;
9515  }
9516 
9517  PUGI__FN bool XPathVariable::Set(double Value)
9518  {
9519  if (ValueType != XPathTypeNumber) return false;
9520 
9521  static_cast<internal::XPathVariableNumber*>(this)->Value = Value;
9522  return true;
9523  }
9524 
9525  PUGI__FN bool XPathVariable::Set(const Char8* Value)
9526  {
9527  if (ValueType != XPathTypeString) return false;
9528 
9529  internal::XPathVariableString* var = static_cast<internal::XPathVariableString*>(this);
9530 
9531  // duplicate string
9532  size_t size = (internal::strlength(Value) + 1) * sizeof(Char8);
9533 
9534  Char8* copy = static_cast<Char8*>(internal::Memory::allocate(size));
9535  if (!copy) return false;
9536 
9537  memcpy(copy, Value, size);
9538 
9539  // replace old string
9540  if (var->Value) internal::Memory::deallocate(var->Value);
9541  var->Value = copy;
9542 
9543  return true;
9544  }
9545 
9546  PUGI__FN bool XPathVariable::Set(const XPathNodeSet& Value)
9547  {
9548  if (ValueType != XPathTypeNodeSet) return false;
9549 
9550  static_cast<internal::XPathVariableNodeSet*>(this)->Value = Value;
9551  return true;
9552  }
9553 
9555  {
9556  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
9557  }
9558 
9560  {
9561  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
9562  {
9563  XPathVariable* var = _data[i];
9564 
9565  while (var)
9566  {
9567  XPathVariable* next = var->NextVariable;
9568 
9569  internal::delete_XPathVariable(var->ValueType, var);
9570 
9571  var = next;
9572  }
9573  }
9574  }
9575 
9576  PUGI__FN XPathVariable* XPathVariableSet::Find(const Char8* Name) const
9577  {
9578  const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9579  size_t hash = internal::hash_string(Name) % hash_size;
9580 
9581  // look for existing variable
9582  for (XPathVariable* var = _data[hash]; var; var = var->NextVariable)
9583  if (internal::strequal(var->Name(), Name))
9584  return var;
9585 
9586  return 0;
9587  }
9588 
9589  PUGI__FN XPathVariable* XPathVariableSet::Add(const Char8* Name, XPathValueType Type)
9590  {
9591  const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9592  size_t hash = internal::hash_string(Name) % hash_size;
9593 
9594  // look for existing variable
9595  for (XPathVariable* var = _data[hash]; var; var = var->NextVariable)
9596  if (internal::strequal(var->Name(), Name))
9597  return var->Type() == Type ? var : 0;
9598 
9599  // add new variable
9600  XPathVariable* Result = internal::new_XPathVariable(Type, Name);
9601 
9602  if (Result)
9603  {
9604  Result->ValueType = Type;
9605  Result->NextVariable = _data[hash];
9606 
9607  _data[hash] = Result;
9608  }
9609 
9610  return Result;
9611  }
9612 
9613  PUGI__FN bool XPathVariableSet::Set(const Char8* Name, bool Value)
9614  {
9615  XPathVariable* var = Add(Name, XPathTypeBoole);
9616  return var ? var->Set(Value) : false;
9617  }
9618 
9619  PUGI__FN bool XPathVariableSet::Set(const Char8* Name, double Value)
9620  {
9621  XPathVariable* var = Add(Name, XPathTypeNumber);
9622  return var ? var->Set(Value) : false;
9623  }
9624 
9625  PUGI__FN bool XPathVariableSet::Set(const Char8* Name, const Char8* Value)
9626  {
9627  XPathVariable* var = Add(Name, XPathTypeString);
9628  return var ? var->Set(Value) : false;
9629  }
9630 
9631  PUGI__FN bool XPathVariableSet::Set(const Char8* Name, const XPathNodeSet& Value)
9632  {
9633  XPathVariable* var = Add(Name, XPathTypeNodeSet);
9634  return var ? var->Set(Value) : false;
9635  }
9636 
9637  PUGI__FN XPathVariable* XPathVariableSet::Get(const Char8* Name)
9638  {
9639  return Find(Name);
9640  }
9641 
9642  PUGI__FN const XPathVariable* XPathVariableSet::Get(const Char8* Name) const
9643  {
9644  return Find(Name);
9645  }
9646 
9647  PUGI__FN XPathQuery::XPathQuery(const Char8* query, XPathVariableSet* variables): QueryImplementation(0)
9648  {
9649  internal::XPathQueryImpl* qimpl = internal::XPathQueryImpl::create();
9650 
9651  if (!qimpl)
9652  {
9653  throw std::bad_alloc();
9654  }
9655  else
9656  {
9657  internal::buffer_holder impl_holder(qimpl, internal::XPathQueryImpl::destroy);
9658 
9659  qimpl->GetRoot = internal::XPathParser::parse(query, variables, &qimpl->alloc, &ResultCache);
9660 
9661  if (qimpl->GetRoot)
9662  {
9663  QueryImplementation = static_cast<internal::XPathQueryImpl*>(impl_holder.release());
9664  ResultCache.error = 0;
9665  }
9666  }
9667  }
9668 
9669  PUGI__FN XPathQuery::~XPathQuery()
9670  {
9671  internal::XPathQueryImpl::destroy(QueryImplementation);
9672  }
9673 
9674  PUGI__FN XPathValueType XPathQuery::ReturnType() const
9675  {
9676  if (!QueryImplementation) return XPathTypeNone;
9677 
9678  return static_cast<internal::XPathQueryImpl*>(QueryImplementation)->GetRoot->retType();
9679  }
9680 
9681  PUGI__FN bool XPathQuery::EvaluateBoole(const XPathNode& n) const
9682  {
9683  if (!QueryImplementation) return false;
9684 
9685  internal::XPathContext c(n, 1, 1);
9686  internal::XPathStackData sd;
9687 
9688  return static_cast<internal::XPathQueryImpl*>(QueryImplementation)->GetRoot->eval_boolean(c, sd.stack);
9689  }
9690 
9691  PUGI__FN double XPathQuery::EvaluateNumber(const XPathNode& n) const
9692  {
9693  if (!QueryImplementation) return internal::gen_nan();
9694 
9695  internal::XPathContext c(n, 1, 1);
9696  internal::XPathStackData sd;
9697 
9698  return static_cast<internal::XPathQueryImpl*>(QueryImplementation)->GetRoot->eval_number(c, sd.stack);
9699  }
9700 
9701  PUGI__FN String XPathQuery::EvaluateString(const XPathNode& n) const
9702  {
9703  internal::XPathStackData sd;
9704 
9705  return internal::EvaluateString_impl(static_cast<internal::XPathQueryImpl*>(QueryImplementation), n, sd).c_str();
9706  }
9707 
9708  PUGI__FN size_t XPathQuery::EvaluateString(Char8* buffer, size_t capacity, const XPathNode& n) const
9709  {
9710  internal::XPathStackData sd;
9711 
9712  internal::XPathString r = internal::EvaluateString_impl(static_cast<internal::XPathQueryImpl*>(QueryImplementation), n, sd);
9713 
9714  size_t full_size = r.length() + 1;
9715 
9716  if (capacity > 0)
9717  {
9718  size_t size = (full_size < capacity) ? full_size : capacity;
9719  assert(size > 0);
9720 
9721  memcpy(buffer, r.c_str(), (size - 1) * sizeof(Char8));
9722  buffer[size - 1] = 0;
9723  }
9724 
9725  return full_size;
9726  }
9727 
9728  PUGI__FN XPathNodeSet XPathQuery::EvaluateNodeSet(const XPathNode& n) const
9729  {
9730  if (!QueryImplementation) return XPathNodeSet();
9731 
9732  internal::XPathAstNode* GetRoot = static_cast<internal::XPathQueryImpl*>(QueryImplementation)->GetRoot;
9733 
9734  if (GetRoot->retType() != XPathTypeNodeSet)
9735  {
9736  XPathParseResult res;
9737  res.error = "Expression does not evaluate to node set";
9738 
9739  String ErrorMessage(String(res.Description()) + "\nError:" + res.error + "\nAt Offset: " + ToString(res.Offset));
9740 
9742  }
9743 
9744  internal::XPathContext c(n, 1, 1);
9745  internal::XPathStackData sd;
9746 
9747 
9748  internal::XPathNodeSet_raw r = GetRoot->eval_NodeSet(c, sd.stack);
9749 
9750  return XPathNodeSet(r.begin(), r.end(), r.Type());
9751  }
9752 
9753  PUGI__FN const XPathParseResult& XPathQuery::Result() const
9754  {
9755  return ResultCache;
9756  }
9757 
9758  PUGI__FN static void unspecified_bool_XPathQuery(XPathQuery***)
9759  {
9760  }
9761 
9762  PUGI__FN XPathQuery::operator XPathQuery::unspecified_bool_type() const
9763  {
9764  return QueryImplementation ? unspecified_bool_XPathQuery : 0;
9765  }
9766 
9767  PUGI__FN bool XPathQuery::operator!() const
9768  {
9769  return !QueryImplementation;
9770  }
9771 
9772  PUGI__FN XPathNode Node::FindSingleNode(const Char8* query, XPathVariableSet* variables) const
9773  {
9774  XPathQuery q(query, variables);
9775  return FindSingleNode(q);
9776  }
9777 
9778  PUGI__FN XPathNode Node::FindSingleNode(const XPathQuery& query) const
9779  {
9780  XPathNodeSet s = query.EvaluateNodeSet(*this);
9781  return s.Empty() ? XPathNode() : s.first();
9782  }
9783 
9784  PUGI__FN XPathNodeSet Node::FindNodes(const Char8* query, XPathVariableSet* variables) const
9785  {
9786  XPathQuery q(query, variables);
9787  return FindNodes(q);
9788  }
9789 
9790  PUGI__FN XPathNodeSet Node::FindNodes(const XPathQuery& query) const
9791  {
9792  return query.EvaluateNodeSet(*this);
9793  }
9794 }
9795 
9796 #ifdef __BORLANDC__
9797 # pragma option pop
9798 #endif
9799 
9800 // Intel C++ does not properly keep warning state for function templates,
9801 // so popping warning state at the end of translation unit leads to warnings in the middle.
9802 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
9803 # pragma warning(pop)
9804 #endif
9805 
9806 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
9807 #undef PUGI__NO_INLINE
9808 #undef PUGI__STATIC_ASSERT
9809 #undef PUGI__DMC_VOLATILE
9810 #undef PUGI__MSVC_CRT_VERSION
9811 #undef PUGI__NS_BEGIN
9812 #undef PUGI__NS_END
9813 #undef PUGI__FN
9814 #undef PUGI__FN_NO_INLINE
9815 #undef PUGI__IS_CHARTYPE_IMPL
9816 #undef PUGI__IS_CHARTYPE
9817 #undef PUGI__IS_CHARTYPEX
9818 #undef PUGI__SKIPWS
9819 #undef PUGI__OPTSET
9820 #undef PUGI__PUSHNODE
9821 #undef PUGI__POPNODE
9822 #undef PUGI__SCANFOR
9823 #undef PUGI__SCANWHILE
9824 #undef PUGI__ENDSEG
9825 #undef PUGI__THROW_ERROR
9826 #undef PUGI__CHECK_ERROR
9827 } // Mezzanine namespace
9828 
9829 #endif
9830 
9831 /*
9832  * Copyright (c) 2006-2012 Arseny Kapoulkine
9833  *
9834  * Permission is hereby granted, free of charge, to any person
9835  * obtaining a copy of this software and associated documentation
9836  * files (the "Software"), to deal in the Software without
9837  * restriction, including without limitation the rights to use,
9838  * copy, modify, merge, publish, distribute, sublicense, and/or sell
9839  * copies of the Software, and to permit persons to whom the
9840  * Software is furnished to do so, subject to the following
9841  * conditions:
9842  *
9843  * The above copyright notice and this permission notice shall be
9844  * included in all copies or substantial portions of the Software.
9845  *
9846  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
9847  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
9848  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
9849  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
9850  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
9851  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
9852  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
9853  * OTHER DEALINGS IN THE SOFTWARE.
9854  */
9855 
9856 /// @endcond
9857 
bool Set(const Char8 *rhs)
Set text.
Node GetNode() const
Get the XML::Node this is referencing.
const unsigned int ParseWnormAttribute
This flag determines if attribute values are normalized using NMTOKENS normalization rules during par...
bool operator!() const
Logical not operator, used a workaround for borland compiler.
size_t HashValue() const
Get a unique identifying value for the Attribute this represents.
Attribute AppendAttribute(const Char8 *Name)
Creates an Attribute and puts it at the end of this Nodes attributes.
bool operator==(const AttributeIterator &rhs) const
Compares this AttributeIterator to another AttributeIterator for equality.
NodeStruct * NodeData
Stores pointers to the Node data and some metadata.
Definition: node.h:98
const XPathNode & operator[](size_t index) const
Indexing operator.
Attribute GetNextAttribute() const
Get the next attribute.
bool operator>(const Attribute &r) const
Compares the internal values to check for inequality.
attribute_iterator attributes_end() const
Get an Attribute iterator that references the one past the last Attribute on this Node...
const Char8 * GetChildValue() const
Retrieve the value of this(or a child's) Nodes PCDATA child Node.
const unsigned int ParseEol
This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
Node FirstElementByPath(const Char8 *Path, Char8 delimiter= '/') const
Search for a node by Path consisting of node names and . or .. elements.
XPathValueType
XPathQuery return type.
const AttributeIterator & operator--()
Decrement the iterator to the next member of the container.
bool operator==(const NamedNodeIterator &rhs) const
Compares this NamedNodeIterator to another NamedNodeIterator for equality.
const unsigned int ParseWconvAttribute
This flag determines if attribute values are normalized using CDATA normalization rules during parsin...
const XPathNode * const_iterator
An iterator trait. Const iterator for XPathNodes.
Definition: xpathnodeset.h:84
AttributeStruct * InternalObject() const
Retrieve a pointer to the internal data.
double GetNumber() const
Get this as a double.
bool AsBool(bool def=false) const
Attempts to convert the value of the attribute to a float and returns the results.
XPathNodeSet FindNodes(const Char8 *query, XPathVariableSet *variables=0) const
Select a group of nodes by evaluating an XPath query.
Node()
Default constructor. Constructs an empty node.
void Print(Writer &WriterInstance, const Char8 *indent="\t", unsigned int flags=FormatDefault, Encoding DocumentEncoding=EncodingAuto, unsigned int Depth=0) const
Output the XML document using a Writer.
XPathNode()
Default constructor; constructs empty XPath node.
const NodeIterator & operator++()
Increment the iterator to the next member of the container.
Node GetParent() const
Attempt to retrieve the parent of this Node.
bool SetName(const Char8 *rhs)
Set the name of .
bool Set(bool Value)
Set variable Value; no Type conversion is performed.
Whole AsWhole(Whole def=0) const
Get text as a number, or the default Value if conversion did not succeed or object is empty...
bool operator!() const
Used to convert this attribute the opposite of it's normal boolean value.
~Node()
Virtual deconstructor.
TreeWalker()
Default constructor, initializes depth, and can do little else without a fully implemented treewalker...
String ToString(const T &Datum)
Converts whatever to a String as long as a streaming operator is available for it.
Definition: datatypes.h:242
attribute_iterator attributes_begin() const
Get an Attribute iterator that references the first Attribute on this Node.
Character data, i.e. ''.
The parser could not determine type of tag.
void sort(bool reverse=false)
Sort the collection in ascending/descending order by document order.
Attribute GetFirstAttribute() const
Get the First Attribute in this Node.
const Char8 * Value() const
Get the Value of this Attribute.
std::basic_string< wchar_t, std::char_traits< wchar_t >, std::allocator< wchar_t > > MEZZ_LIB AsWide(const char *str)
Convert a Convert a c-style string to std::wstring containing native encoding (Usually UCS2 on window...
XPathVariable()
Protected Default constructor to prevent default constrution.
bool operator>(const Node &r) const
Compares the internal values to check for greaterthanness.
void *(* AllocationFunction)(size_t size)
Memory allocation function interface; returns pointer to allocated memory or NULL on failure...
virtual void Write(const void *data, size_t size)
Actually issues the write commands.
Node GetFirstChild() const
Get the first child Node of this Node.
WriterFile(void *FilePtr)
Construct WriterInstance from a FILE* object; void* is used to avoid header dependencies on stdio...
#define MEZZ_EXCEPTION(num, desc)
An easy way to throw exceptions with rich information.
Definition: exception.h:3048
XPathNodeSet()
Default constructor. Constructs empty set.
int Integer
A datatype used to represent any integer close to.
Definition: datatypes.h:154
Node GetParent() const
Get the parent of the XML::Node or XML::Attribute this refers to.
Attribute InsertAttributeAfter(const Char8 *Name, const Attribute &attr)
Creates an Attribute and puts it into the list of this Nodes attributes.
XPathNode first() const
Get first node in the collection by document order.
CollectionType
The different ways a collection may or may not be ordered.
Definition: xpathnodeset.h:76
CollectionType Type() const
Get collection Type.
bool operator==(const XPathNode &n) const
Called when comparing two XPathNode instances for equality.
bool RemoveAttribute(const Attribute &a)
Remove specified Attribute.
const Char8 * AsString(const Char8 *def="") const
Attempts to convert the value of the attribute to a String and returns the results.
NamedNodeIterator()
Default constructor.
bool operator>=(const Node &r) const
Compares the internal values to check for inequality and greaterthanness.
bool operator!() const
Used to convert this node the opposite of it's normal boolean value.
double EvaluateNumber(const XPathNode &n) const
Evaluate expression as double value in the specified context; performs Type conversion if necessary...
bool operator!() const
Used to convert this attribute the opposite of it's normal boolean value.
NodeText & operator=(const Char8 *rhs)
Set text (equivalent to set without error checking)
Parsing error occurred while parsing comment.
A document tree's absolute GetRoot.
bool EvaluateBoole(const XPathNode &n) const
Evaluate expression as boolean value in the specified context; performs Type conversion if necessary...
const_iterator end() const
Get Ending iterator.
bool SetValue(const Char8 *rhs)
Set the value of this.
size_t size() const
Get collection size.
bool operator==(const Node &r) const
Compares the internal values to check equality.
NodeStruct * InternalObject() const
Get internal pointer.
bool operator!=(const NodeIterator &rhs) const
Compares this NodeIterator to another NodeIterator for inequality.
bool SaveFile(const char *Path, const Char8 *indent="\t", unsigned int flags=FormatDefault, Encoding DocumentEncoding=EncodingAuto) const
Save XML to file.
bool Empty() const
Is this storing anything at all?
Node * operator->() const
Get the pointer the Node this points to.
ParseResult Load(std::basic_istream< char, std::char_traits< char > > &stream, unsigned int options=ParseDefault, Encoding DocumentEncoding=EncodingAuto)
Load XML from a stream.
This implements the exception hiearchy for Mezzanine.
const char * Description() const
Get error Description.
Processing instruction, i.e. ''.
Parsing error occurred while parsing document type declaration.
Node GetNextSibling() const
Attempt to retrieve the next sibling of this Node.
const char * Description() const
Error message (0 if no error).
Error reading from file or stream.
Node DocumentElement() const
Get document element.
float AsFloat(float def=0) const
Get text as a number, or the default Value if conversion did not succeed or object is empty...
XPathValueType Type() const
Get variable type.
XPathNodeSet & operator=(const XPathNodeSet &ns)
Assignment Operator.
UTF8 DocumentEncoding.
const unsigned int ParseWsPcdata
This flag determines if plain character data (NodePcdata) that consist only of whitespace are added t...
Node & operator*() const
Deferences this Iterator.
This is returned to indicated there where no issues parsing the XML document.
AllocationFunction MEZZ_LIB GetMemoryAllocationFunction()
Get the current allocation funciton.
float Real
A Datatype used to represent a real floating point number.
Definition: datatypes.h:141
char Char8
A datatype to represent one character.
Definition: datatypes.h:169
Number This corresponds to a double or Real.
const unsigned int FormatSaveFileText
Open file using text mode in XML::Document::SaveFile. This enables special character (i...
void Save(Writer &WriterInstance, const Char8 *indent="\t", unsigned int flags=FormatDefault, Encoding DocumentEncoding=EncodingAuto) const
Save XML document to WriterInstance.
const Char8 * Value() const
Get the Value of this Node.
Node & operator*() const
Deferences this Iterator.
bool RemoveChild(const Node &n)
Remove specified child element.
bool SetValue(const Char8 *rhs)
Set the value of this.
bool operator<(const Node &r) const
Compares the internal values to check for lessthanness.
AttributeIterator()
Default Constructor, makes a blank iterator.
Node GetLastChild() const
Get the last child Node of this Node.
int AsInt(int def=0) const
Get text as a number, or the default Value if conversion did not succeed or object is empty...
Encoding
These flags determine the encoding of input data for an XML document.
bool operator!=(const AttributeIterator &rhs) const
Compares this AttributeIterator to another AttributeIterator for inequality.
double AsDouble(double def=0) const
Get text as a number, or the default Value if conversion did not succeed or object is empty...
bool SetName(const Char8 *rhs)
Set the name of .
virtual ~TreeWalker()
Virtual deconstructor. Tears down a TreeWalker.
void(* DeAllocationFunction)(void *ptr)
Function pointer type for a memory deallocation function interface.
Whole AsWhole(Whole def=0) const
Attempts to convert the value of the attribute to a Whole and returns the results.
std::basic_string< char, std::char_traits< char >, std::allocator< char > > MEZZ_LIB AsUtf8(const wchar_t *str)
Convert a c-style string of wchar_t to std::string containing UTF8.
Plain character data, i.e. 'text'.
Attribute PrependCopy(const Attribute &proto)
Copies an Attribute and puts the copy at the beginning of this Nodes attributes.
const_iterator begin() const
Get Beginning iterator.
Attribute InsertAttributeBefore(const Char8 *Name, const Attribute &attr)
Creates an Attribute and puts it into the list of this Nodes attributes.
Node data() const
Get the data node (NodePcdata or NodeCdata) for this object.
Auto-detect input DocumentEncoding using BOM or < /
ptrdiff_t OffSetDebug() const
Get node Offset in parsed file/string (in char_t units) for debugging purposes.
Node FindChildbyAttribute(const Char8 *Name, const Char8 *AttrName, const Char8 *AttrValue) const
Find a Node by an Attribute it has.
const NodeIterator & operator--()
Decrement the iterator to the next member of the container.
Real AsReal(Real def=0) const
Get text as a number, or the default Value if conversion did not succeed or object is empty...
String Path(Char8 delimiter= '/') const
Get the absolute path to this Node.
iterator begin() const
Get a Child node iterator that references the first child Node.
const AttributeIterator & operator++()
Increment the iterator to the next member of the container.
unsigned int AsUint(unsigned int def=0) const
Attempts to convert the value of the attribute to an unsigned int and returns the results...
const unsigned int FormatNoDeclaration
Omit default XML declaration even if there is no declaration in the document. This flag is off by def...
void Reset()
Removes all nodes, leaving the empty document.
int AsInt(int def=0) const
Attempts to convert the value of the attribute to an int and returns the results. ...
virtual void Write(const void *data, size_t size)
Construct a Writer from a FILE* object.
iterator end() const
Get a Child node iterator that references one past the last child Node.
ProcessDepth Depth
The current process depth as interpretted by Main.
Definition: mezztest.cpp:82
AttributeIterator attribute_iterator
An iterator for Attribute members on this Node.
Definition: node.h:637
bool Empty() const
Is this storing anything at all?
Parsing error occurred while parsing document declaration/processing instruction. ...
ParseResult LoadBuffer(const void *contents, size_t size, unsigned int options=ParseDefault, Encoding DocumentEncoding=EncodingAuto)
Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the func...
bool operator==(const Attribute &r) const
Compares the internal values to check equality.
bool operator!() const
Logical not operator, used a workaround for borland compiler.
Attribute GetLastAttribute() const
Get the Last Attribute in this Node.
bool operator!=(const Attribute &r) const
Compares the internal values to check inequality.
Node GetRoot() const
Attempt to retrieve the root Node, or the most base Node containing this Node.
bool operator<=(const Attribute &r) const
Compares the internal values to check for inequality.
Parsing error occurred while parsing end element tag.
Integer AsInteger(Integer def=0) const
Attempts to convert the value of the attribute to a Integer and returns the results.
void(* unspecified_bool_type)(Node ***)
Used to prevent casting to numerical types acccidentally.
Definition: node.h:103
void MEZZ_LIB SetMemoryManagementFunctions(AllocationFunction allocate, DeAllocationFunction deallocate)
Override default memory management functions. All subsequent allocations/deallocations will be perfor...
XPathValueType ReturnType() const
Get query expression return Type.
Real AsReal(Real def=0) const
Attempts to convert the value of the attribute to a Real and returns the results. ...
NodeType
The types of nodes that could be in the XML Tree.
Node InsertChildAfter(NodeType Type, const Node &node)
Creates a Node and makes it a child of this one, and puts at the middle of the Child Nodes...
bool operator!=(const Node &r) const
Compares the internal values to check inequality.
ParseResult LoadBufferInplace(void *contents, size_t size, unsigned int options=ParseDefault, Encoding DocumentEncoding=EncodingAuto)
Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for...
bool Traverse(TreeWalker &walker)
Perform sophisticated (or whatever) algorithms on this and all descendant Nodes in the XML tree...
Thrown when an XPath query is being parsed but is invalid.
Definition: exception.h:101
const unsigned int ParseDeclaration
This flag determines if document declaration (NodeDeclaration) is added to the DOM tree...
XPathNode FindSingleNode(const Char8 *query, XPathVariableSet *variables=0) const
Select single node by evaluating an XPath query. Returns first node from the resulting node set...
Node PrependChild(NodeType Type=NodeElement)
Creates a Node and makes it a child of this one, and puts at the beginning of the Child Nodes...
const char * error
Error message (0 if no error)
XPathValueType ValueType
What kind of data does this variable store.
Definition: xpathvariable.h:83
bool operator>=(const Attribute &r) const
Compares the internal values to check for inequality.
virtual bool OnTraversalEnd(Node &node)
Called on the root Node of the xml subtree when traversal ends.
Parsing error occurred while parsing start element tag.
Attribute InsertCopyBefore(const Attribute &proto, const Attribute &attr)
Copies an Attribute and puts the copy into the list of this Nodes attributes.
String EvaluateString(const XPathNode &n) const
Evaluate expression as string value in the specified context; performs Type conversion if necessary...
An unkown error, currently nothing should be able to return this status.
File was not found during a loading from filename attempt.
The same document encoding wchar_t has (usually either UTF16 or UTF32)
Attribute GetAttribute() const
Get the XML::Attribute this is referencing.
XPathParseResult()
Default constructor, initializes object to failed state.
Node InsertChildBefore(NodeType Type, const Node &node)
Creates a Node and makes it a child of this one, and puts at the middle of the Child Nodes...
bool operator!=(const NamedNodeIterator &rhs) const
Compares this NamedNodeIterator to another NamedNodeIterator for inequality.
const XPathNodeSet & GetNodeSet() const
Get this as a XPathNodeSet.
XPathNodeSet EvaluateNodeSet(const XPathNode &n) const
Evaluate expression as node set in the specified context.
virtual bool OnTraversalBegin(Node &node)
Called by the root Node of the xml subtree when traversal begins.
Corresponds to the String type.
~XPathVariableSet()
Default Deconstructor, Deletes any XPathVariable it contains.
bool Empty() const
Check if collection is empty.
Document()
Creates an empty document with just a root Node.
Attribute GetPreviousAttribute() const
Get the previous attribute.
float AsFloat(float def=0) const
Attempts to convert the value of the attribute to a float and returns the results.
const Char8 * Name() const
Get the variable name.
Attribute()
Constructs an empty Attribute.
NodeIterator iterator
An iterator for child Nodes that will be easier for members of the std namespace to work with...
Definition: node.h:626
Parsing error occurred while parsing CDATA section.
Attribute & operator=(const Char8 *rhs)
The same as Attribute::SetValue(); without the error return.
unsigned int AsUint(unsigned int def=0) const
Get text as a number, or the default Value if conversion did not succeed or object is empty...
Document Type declaration, i.e. ''.
WriterStream(std::basic_ostream< char, std::char_traits< char > > &stream)
A constructor that accepts a stream of characters.
XPathVariable * NextVariable
The Next Variable in a linked structure of XPathVariables.
Definition: xpathvariable.h:87
NodeIterator()
Default Constructor, makes a blank iterator.
bool operator<(const Attribute &r) const
Compares the internal values to check for inequality.
NodeText()
Default constructor. Constructs an empty object.
Real ToReal(const T &Datum)
Converts whatever to a Real as long as the proper streaming operators are available for it...
Definition: datatypes.h:280
Comment tag, i.e. ''.
XPathVariable * Add(const Char8 *Name, XPathValueType Type)
Add a new variable or get the existing one, if the Types match.
const Char8 * Name() const
Get the name of this Attribute.
DeAllocationFunction MEZZ_LIB GetMemoryDeallocationFunction()
Get the current allocation funciton.
Attribute & operator*() const
Deferences this Iterator.
Unknown Type (query failed to compile)
Node GetPreviousSibling() const
Attempt to retrieve the prvious sibling of this Node.
#define MEZZ_LIB
Some platforms require special decorations to denote what is exported/imported in a share library...
UTF32 with native endianness.
ParseStatus
These statuses are used to help determine what issues, if any the parser had. Returned by Mezzanine::...
Document declaration, i.e. ''.
UTF16 with native endianness.
ParseResult LoadBufferInplaceOwn(void *contents, size_t size, unsigned int options=ParseDefault, Encoding DocumentEncoding=EncodingAuto)
Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for...
const unsigned int ParseEscapes
This flag determines if character and entity references are expanded during parsing. This flag is on by default.
The bulk of the engine components go in this namspace.
Definition: actor.cpp:56
const unsigned int ParseDocType
This flag determines if document type declaration (NodeDoctype) is added to the DOM tree...
ParseResult()
Default constructor, initializes object to failed state.
unsigned long Whole
Whole is an unsigned integer, it will be at least 32bits in size.
Definition: datatypes.h:151
bool Empty() const
Is this storing anything at all?
Parsing error occurred while parsing element attribute.
Integer AsInteger(Integer def=0) const
Get text as a number, or the default Value if conversion did not succeed or object is empty...
const unsigned int FormatRaw
Use raw output mode (no indentation and no line breaks are written). This flag is on by default...
ObjectRange< NodeIterator > GetChildren() const
Get an iterator range for this node's children nodes.
int Depth() const
How many descendants deep are we during traversal.
const unsigned int ParseCdata
This flag determines if CDATA sections (NodeCdata) are added to the DOM tree. This flag is on by defa...
There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or th...
const Char8 * AsString(const Char8 *def="") const
Get text, or the default Value if object is empty.
double AsDouble(double def=0) const
Attempts to convert the value of the attribute to a double and returns the results.
const NamedNodeIterator & operator++()
Increment the iterator to the next member of the container.
Element tag, i.e. ''.
Parsing error occurred while parsing PCDATA section.
XPathVariableSet()
Default Constructor, Blanks any XPathVariable it contains.
ObjectRange< AttributeIterator > attributes() const
A range of iterators for just the attributes of this node.
const unsigned int ParseComments
This flag determines if comments (NodeComment) are added to the DOM tree. This flag is off by default...
const Char8 * GetString() const
Get text, or "" if object is empty.
bool GetBoole() const
Get this as a bool.
const Char8 * GetString() const
Get this as a c-string.
virtual ~Document()
Tears down a document, and incidentally invalidates all Node and Attribute handles to this document...
bool operator<=(const Node &r) const
Compares the internal values to check for inequality and lessthanness.
const Char8 * Name() const
ptrdiff_tGet the name of this Node.
NodeText GetText() const
Get text object for the current node.
Node set (XPathNodeSet)
Attribute * operator->() const
Get the pointer the Attribute this points to.
bool AsBool(bool def=false) const
Get text as bool.
Attribute PrependAttribute(const Char8 *Name)
Creates an Attribute and puts it at the begining of this Nodes attributes.
size_t HashValue() const
Get hash Value (unique for handles to the same object)
XPathVariable * Get(const Char8 *Name)
Get the named XPathVariable.
Could not allocate memory.
Attribute InsertCopyAfter(const Attribute &proto, const Attribute &attr)
Copies an Attribute and puts the copy into the list of this Nodes attributes.
bool Set(const Char8 *Name, bool Value)
Set contained variable Value; no Type conversion is performed.
Node AppendChild(NodeType Type=NodeElement)
Creates a Node and makes it a child of this one.
ParseResult LoadFile(const char *Path, unsigned int options=ParseDefault, Encoding DocumentEncoding=EncodingAuto)
Load document from file.
NodeType Type() const
Identify what kind of Node this is.
const unsigned int FormatWriteBom
Write encoding-specific Byte Order Mark (BOM) to the output stream. This flag is off by default...
Also called IEC_8859-1 a common encoding on windows, see http://en.wikipedia.org/wiki/ISO/IEC_8859-1 ...
std::string String
A datatype used to a series of characters.
Definition: datatypes.h:159
Attribute AppendCopy(const Attribute &proto)
Copies an Attribute and puts the copy at the end of this Nodes attributes.
bool operator==(const NodeIterator &rhs) const
Compares this NodeIterator to another NodeIterator for equality.
const unsigned int ParseWsPcdata_single
This flag determines if plain character data (NodePcdata) that is the only child of the parent node a...
const unsigned int FormatNoEscapes
Don't escape GetAttribute Values and PCDATA contents. This flag is off by default.
Node * operator->() const
Get the pointer the Node this points to.
Attribute GetAttribute(const Char8 *Name) const
Attempt to get an Attribute on this Node with a given name.
const unsigned int ParsePi
This flag determines if processing instructions (NodePi) are added to the DOM tree. This flag is off by default.
Empty (null) node handle.
Node GetChild(const Char8 *Name) const
Attempt to get a child Node with a given name.
Integer ToInteger(const T &Datum)
Converts whatever to an Integer as long as the proper streaming operators are available for it...
Definition: datatypes.h:258
Whole ToWhole(const T &Datum)
Converts whatever to a Whole as long as the proper streaming operators are available for it...
Definition: datatypes.h:252
bool operator!=(const XPathNode &n) const
Called when comparing two XPathNode instances for inequality.
const XPathParseResult & Result() const
Get parsing Result (used to get compilation errors when XML_NO_EXCEPTIONS is enabled) ...
const unsigned int FormatIndent
Indent the nodes that are written to output stream with as many indentation strings as deep the node ...