Spinning Topp Logo BlackTopp Studios
inc
markupparser.cpp
1 // © Copyright 2010 - 2016 BlackTopp Studios Inc.
2 /* This file is part of The Mezzanine Engine.
3 
4  The Mezzanine Engine is free software: you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  The Mezzanine Engine is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with The Mezzanine Engine. If not, see <http://www.gnu.org/licenses/>.
16 */
17 /* The original authors have included a copy of the license specified above in the
18  'Docs' folder. See 'gpl.txt'
19 */
20 /* We welcome the use of the Mezzanine engine to anyone, including companies who wish to
21  Build professional software and charge for their product.
22 
23  However there are some practical restrictions, so if your project involves
24  any of the following you should contact us and we will try to work something
25  out:
26  - DRM or Copy Protection of any kind(except Copyrights)
27  - Software Patents You Do Not Wish to Freely License
28  - Any Kind of Linking to Non-GPL licensed Works
29  - Are Currently In Violation of Another Copyright Holder's GPL License
30  - If You want to change our code and not add a few hundred MB of stuff to
31  your distribution
32 
33  These and other limitations could cause serious legal problems if you ignore
34  them, so it is best to simply contact us or the Free Software Foundation, if
35  you have any questions.
36 
37  Joseph Toppi - toppij@gmail.com
38  John Blackwood - makoenergy02@gmail.com
39 */
40 #ifndef _uimarkupparser_cpp
41 #define _uimarkupparser_cpp
42 
43 #include "UI/markupparser.h"
44 #include "UI/character.h"
45 #include "UI/glyph.h"
46 #include "UI/sprite.h"
47 #include "UI/texttoken.h"
48 
49 #include "unicode.h"
50 #include "exception.h"
51 
52 namespace Mezzanine
53 {
54  namespace UI
55  {
56  ///////////////////////////////////////////////////////////////////////////////
57  // MarkupParser Methods
58 
60  { }
61 
63  { }
64 
66  {
67  TextToken* NewTextToken = new TextToken(Text,TextToken::TT_Text);
68  return NewTextToken;
69  }
70 
72  {
73  // Set up our return
74  TextToken* NewTagToken = NULL;
75  const Integer TextSize = Text.size();
76  // Do some string processing to get some more data
77  UInt32 SlashPos = Text.find_first_of('/');
78  if( SlashPos == 1 /* && TextSize > 3 */ ) {
79  // We now know we have a valid end tag
80  // Get the name
81  String TagName = Text.substr(SlashPos + 1,Text.find_first_of(this->GetMarkupTagEnd()) - SlashPos - 1);
82  NewTagToken = new RangeTagToken(Text,TagName,TextToken::TT_RangeTagEnd);
83  }else if( SlashPos == String::npos ) {
84  // No slashes, so this is a start tag or insert tag
85  // Setup the data we'll be using to parse the relevant data
86  Int32 Position = 0;
87  String SeparatorChars(" =");
88  SeparatorChars.append(1,this->GetMarkupTagEnd());
89 
90  // Get the name
91  Position = Text.find_first_of(SeparatorChars);
92  String TagName = Text.substr(1,Position - 1);
93 
94  // Since we have the tags name, found out if it is a range tag, insert tag, or invalid
95  ConstTagIterator TagIt = this->Tags.find(TagName);
96  if( TagIt != this->Tags.end() ) {
97  // We got a valid tag, but is it a range or insert tag?
98  if( (*TagIt).second->IsRangeTag() ) {
99  NewTagToken = new RangeTagToken(Text,TagName,TextToken::TT_RangeTagStart);
100  }else{
101  NewTagToken = new InsertTagToken(Text,TagName);
102  }
103  }else{
104  // Tag doesn't exist for this parser, mark it as an error
105  NewTagToken = new TextToken(Text,TextToken::TT_Error);
106  }
107 
108  if( NewTagToken->GetTokenType() != TextToken::TT_Error ) {
109  TagToken* CastedTag = static_cast<TagToken*>( NewTagToken );
110  NameValuePairMap TagParams;
111  if( Text[Position] == '=' ) {
112  TagParams["Value"] = Text.substr(Position + 1,TextSize - Position - 1);
113  }else if( Text[Position] == ' ' ) {
114  // Get the parameters
115  String ParamEndChars(" ");
116  ParamEndChars.append(1,this->GetMarkupTagEnd());
117  while( Position < TextSize - 1 )
118  {
119  String ParamName, ParamValue;
120 
121  UInt32 SeparatorPos = Text.find_first_of('=');
122  if( SeparatorPos != String::npos ) ParamName = Text.substr(Position + 1,SeparatorPos - Position - 1);
123  else break;
124 
125  UInt32 PairEndPos = Text.find_first_of(ParamEndChars);
126  if( SeparatorPos != String::npos ) ParamValue = Text.substr(SeparatorPos + 1,PairEndPos - SeparatorPos - 1);
127  else break;
128 
129  TagParams[ParamName] = ParamValue;
130  Position = PairEndPos;
131  }
132  }
133  // Populate the generated parameters
134  CastedTag->Params.swap(TagParams);
135  }
136  }else{
137  // If we're here, there is a slash not where it's supposed to be which we won't support, so error it
138  NewTagToken = new TextToken(Text,TextToken::TT_Error);
139  }
140  // Return the result
141  return NewTagToken;
142  }
143 
144  void MarkupParser::GenerateCharactersFromToken(const TextToken* Token, TextLayer* Layer, const CharacterTraits& Traits, CharacterContainer& Characters) const
145  {
146  // Setup our data
147  Int32 Position = 0;
148  Int32 BytesAdvance = 0;
149  const char* StrBuf = Token->Text.data();
150  // Get generat'in
151  while( static_cast<UInt32>(Position) < Token->Text.size() )
152  {
153  Int32 GlyphID = Unicode::GetIntFromCharacter(BytesAdvance,StrBuf + Position);
154  if( GlyphID == -1 ) {
155  MEZZ_EXCEPTION(ExceptionBase::PARAMETERS_EXCEPTION,"Attempting to parse non-UTF8 encoded markup text. Encode in UTF8 and try again.");
156  }
157 
158  Glyph* TheGlyph = Traits.CharFont->GetGlyph(static_cast<UInt32>(GlyphID));
159  if( TheGlyph == NULL ) {
160  StringStream ExceptionStream;
161  ExceptionStream << "Attempting to parse unknown Glyph ID: " << GlyphID << ". Provided font (" << Traits.CharFont->GetName() << ") does not contain that Glyph.";
163  }
164  /// @todo As the CharacterTraits class expands, so does this logic.
165  Character* NewChar = new Character(TheGlyph,Layer);
166  NewChar->SetCharacterColour(Traits.CharColour);
167  //NewChar->SetHighlightColour(Traits.HLCharColour);
168  Characters.push_back(NewChar);
169 
170  Position += BytesAdvance;
171  }
172  }
173 
174  void MarkupParser::RegenerateTraits(CharacterTraits& Traits, const TagVector& ActiveTags, TextLayer* Layer) const
175  {
176  for( ConstTagVecIterator TagIt = ActiveTags.begin() ; TagIt != ActiveTags.end() ; ++TagIt )
177  {
178  // Ignore the return
179  (*TagIt).second->Process( (*TagIt).first->Params, Traits, Layer );
180  }
181  }
182 
183  ///////////////////////////////////////////////////////////////////////////////
184  // Parsing Methods
185 
186  MarkupParser::CharacterContainer MarkupParser::Parse(const String& Source, const CharacterTraits& InitialTraits, TextLayer* CallingLayer) const
187  {
188  // Tokenize our string
189  TokenString* Tokens = this->Lex(Source);
190 
191  // Do our processing
192  CharacterContainer GeneratedCharacters = this->ParseTextTokens(Tokens,InitialTraits,CallingLayer);
193 
194  // Cleanup and return
195  delete Tokens;
196  Tokens = NULL;
197 
198  return GeneratedCharacters;
199  }
200 
202  {
203  // Character Data
204  CharacterContainer GeneratedCharacters;
205  CharacterTraits CurrentTraits = InitialTraits;
206 
207  // Setup our container that will cache the active tags for our position
208  TagVector ActiveTags;
209 
210  // Lets process some tokens
211  for( TokenString::TokenIterator TokIt = Tokens->BeginToken() ; TokIt != Tokens->EndToken() ; ++TokIt )
212  {
213  CharacterContainer CharacterSegment;
214  switch( (*TokIt)->Type )
215  {
216  // If this is an error token, just treat it as normal text
217  case TextToken::TT_Text:
218  case TextToken::TT_Error:
220  {
221  // Generate and append
222  TextToken* CurrToken = (*TokIt);
223  this->GenerateCharactersFromToken( (*TokIt),CallingLayer,CurrentTraits,CharacterSegment );
224  CurrToken->RenderSize = CharacterSegment.size();
225  GeneratedCharacters.splice(GeneratedCharacters.end(),CharacterSegment);
226  break;
227  }
229  {
230  RangeTagToken* CurrToken = static_cast<RangeTagToken*>( (*TokIt) );
231  // See if this tag is supported by this implementation
232  ConstTagIterator TagIt = this->Tags.find(CurrToken->GetTagName());
233  if( TagIt != Tags.end() ) {
234  // If it is, verify it applies to a range
235  if( (*TagIt).second->IsRangeTag() ) {
236  // Ensure it has a partner
237  if( CurrToken->PartnerTag != NULL ) {
238  // If it's a range token and has a partner then we should be good
239  MarkupTag::ProcessResult Res = (*TagIt).second->Process(CurrToken->Params,CurrentTraits,CallingLayer);
240  if( Res.first ) {
241  if( Res.second ) {
242  // This shouldn't really ever execute
243  CharacterSegment.push_back(Res.second);
244  }
245 
246  ActiveTags.push_back( TokenTagPair( CurrToken,(*TagIt).second ) );
247  }else{
248  this->GenerateCharactersFromToken( CurrToken,CallingLayer,CurrentTraits,CharacterSegment );
249  }
250  }else{
251  this->GenerateCharactersFromToken( CurrToken,CallingLayer,CurrentTraits,CharacterSegment );
252  }
253  }else{
254  this->GenerateCharactersFromToken( CurrToken,CallingLayer,CurrentTraits,CharacterSegment );
255  }
256  }else{
257  this->GenerateCharactersFromToken( CurrToken,CallingLayer,CurrentTraits,CharacterSegment );
258  }
259 
260  // If anything was generated, append it
261  if( !CharacterSegment.empty() ) {
262  CurrToken->RenderSize = CharacterSegment.size();
263  GeneratedCharacters.splice(GeneratedCharacters.end(),CharacterSegment);
264  }
265 
266  break;
267  }
269  {
270  RangeTagToken* CurrToken = static_cast<RangeTagToken*>( (*TokIt) );
271  if( CurrToken->PartnerTag != NULL ) {
272  // If the tag has a partner, then it's time it's effects come to an end
273  for( TagVector::reverse_iterator TagIt = ActiveTags.rbegin() ; TagIt != ActiveTags.rend() ; ++TagIt )
274  {
275  if( (*TagIt).first == CurrToken->PartnerTag )
276  // erase only accepts normal iterators which reverse iterators are offset from, adjust before passing in
277  ActiveTags.erase( --(TagIt.base()) );
278  }
279  // Update our traits based on remaining active tags
280  CurrentTraits = InitialTraits;
281  this->RegenerateTraits(CurrentTraits,ActiveTags,CallingLayer);
282  }else{
283  // End tags always belong to a range, without a partner tag this is just an error
284  this->GenerateCharactersFromToken( CurrToken,CallingLayer,CurrentTraits,CharacterSegment );
285  CurrToken->RenderSize = CharacterSegment.size();
286  GeneratedCharacters.splice(GeneratedCharacters.end(),CharacterSegment);
287  }
288  break;
289  }
291  {
292  InsertTagToken* CurrToken = static_cast<InsertTagToken*>( (*TokIt) );
293  // See if this tag is supported by this implementation
294  ConstTagIterator TagIt = this->Tags.find(CurrToken->GetTagName());
295  if( TagIt != Tags.end() ) {
296  MarkupTag::ProcessResult Res = (*TagIt).second->Process(CurrToken->Params,CurrentTraits,CallingLayer);
297  if( Res.first ) {
298  if( Res.second ) {
299  CharacterSegment.push_back(Res.second);
300  }
301  }else{
302  this->GenerateCharactersFromToken( CurrToken,CallingLayer,CurrentTraits,CharacterSegment );
303  }
304  }else{
305  this->GenerateCharactersFromToken( CurrToken,CallingLayer,CurrentTraits,CharacterSegment );
306  CurrToken->RenderSize = CharacterSegment.size();
307  GeneratedCharacters.splice(GeneratedCharacters.end(),CharacterSegment);
308  }
309  }// case insert tag
310  }// switch token type
311  }// for each token
312  return GeneratedCharacters;
313  }
314 
315  TokenString* MarkupParser::Lex(const String& Source) const
316  {
317  // Return Data
318  TokenString* RetTokens = new TokenString();
319 
320  size_t Position = 0;
321  // Lets process some data
322  while( Position < Source.size() )
323  {
324  // Find our start tag
325  size_t TagStartPos = Source.find_first_of( this->GetMarkupTagStart(), Position );
326 
327  // Deal with non-tag text if needed
328  if( TagStartPos == String::npos ) {
329  // No tag for the remainder of the string, make our token and then exit
330  RetTokens->PushToken( CreateTextToken( Source.substr(Position) ) );
331  break;
332  }else if( TagStartPos - Position > 0 ) {
333  // We have text between our current position and our tag, make a token for it
334  RetTokens->PushToken( CreateTextToken( Source.substr(Position,TagStartPos - Position) ) );
335  }
336 
337  // If we get this far, we got a tag so find the closing character
338  size_t TagEndPos = Source.find_first_of( this->GetMarkupTagEnd(), TagStartPos );
339 
340  // if we didn't get a match, this isn't a valid token so make a text token instead
341  if( TagEndPos == String::npos ) {
342  RetTokens->PushToken( CreateTextToken( Source.substr(TagStartPos) ) );
343  break;
344  }
345 
346  // Otherwise make a proper tag token and link it if it's an end tag
347  TextToken* NewTagToken = CreateTagToken( Source.substr(TagStartPos,TagEndPos - TagStartPos + 1) );
348  if( NewTagToken->Type == TextToken::TT_RangeTagEnd ) {
349  RangeTagToken* EndToken = static_cast<RangeTagToken*>( NewTagToken );
350  for( TokenString::ReverseTokenIterator TokIt = RetTokens->ReverseBeginToken() ; TokIt != RetTokens->ReverseEndToken() ; ++TokIt )
351  {
352  if( (*TokIt)->Type == TextToken::TT_RangeTagStart ) {
353  RangeTagToken* TagCheck = static_cast<RangeTagToken*>( (*TokIt) );
354  if( TagCheck->PartnerTag == NULL && TagCheck->TagName == EndToken->TagName ) {
355  TagCheck->PartnerTag = EndToken;
356  EndToken->PartnerTag = TagCheck;
357  }
358  }
359  }
360  }
361  RetTokens->PushToken( NewTagToken );
362 
363  // Update our position for the next loop
364  Position = TagEndPos + 1;
365  }
366  // Got our tokens, return 'em
367  return RetTokens;
368  }
369  }//UI
370 }//Mezzanine
371 
372 #endif
int32_t Int32
An 32-bit integer.
Definition: datatypes.h:124
This contains simple tools for indexing with UTF8 characters swiftly.
This class represents a normal text segment from the source string.
Definition: texttoken.h:56
NameValuePairMap Params
The parameters provided for this tag, if any.
Definition: texttoken.h:170
virtual CharacterContainer ParseTextTokens(TokenString *Tokens, const CharacterTraits &InitialTraits, TextLayer *CallingLayer) const
Processes a collection of text tokens into a list of renderable characters.
std::list< Character * > CharacterContainer
Basic container type for the storage of Character instances used during processing by this class...
Definition: markupparser.h:130
virtual ~MarkupParser()
Class destructor.
String TagName
Unconverted version of the tag name.
Definition: texttoken.h:168
This struct represents a markup tag segment from the source string.
Definition: texttoken.h:247
This is a helper class that facilitates operations with collections of tokens generated from Markup P...
Definition: texttoken.h:282
Used to describe a single tag used to insert a special character (such as s sprite).
Definition: texttoken.h:67
Class used to describe a single glyph or character available for text operations. ...
Definition: glyph.h:59
Used to describe any generic error with a token.
Definition: texttoken.h:62
TagVector::const_iterator ConstTagVecIterator
Const Iterator type for TokenTagPair instances being processed by this class.
Definition: markupparser.h:148
virtual Char8 GetMarkupTagStart() const =0
Gets the ID for the character that marks the start of a markup section.
Used to describe the end of a range tag.
Definition: texttoken.h:66
UInt32 RenderSize
The number of rendered characters this token produced.
Definition: texttoken.h:76
#define MEZZ_EXCEPTION(num, desc)
An easy way to throw exceptions with rich information.
Definition: exception.h:3048
int Integer
A datatype used to represent any integer close to.
Definition: datatypes.h:154
ReverseTokenIterator ReverseBeginToken()
Gets a reverse iterator to the last TextToken.
Definition: texttoken.cpp:469
const String & GetTagName() const
Gets the name of the tag this token represents.
Definition: texttoken.cpp:244
virtual TokenString * Lex(const String &Source) const
Converts a string into a series of tokens that can be parsed more readily.
This class stores common data for determining the look of a Character.
This implements the exception hiearchy for Mezzanine.
FontData * CharFont
The font this Character belongs to.
virtual CharacterContainer Parse(const String &Source, const CharacterTraits &InitialTraits, TextLayer *CallingLayer) const
Processes a string encoded in UTF8 into a list of renderable characters.
std::stringstream StringStream
A Datatype used for streaming operations with strings.
Definition: datatypes.h:176
MarkupParser()
Class constructor.
virtual void PushToken(TextToken *ToBePushed)
Appends a new token to the end of this string.
Definition: texttoken.cpp:484
TagContainer::const_iterator ConstTagIterator
Const Iterator type for MarkupTag instances stored by this class.
Definition: markupparser.h:140
TokenContainer::reverse_iterator ReverseTokenIterator
Reverse Iterator type for TextToken instances being stored by this class.
Definition: texttoken.h:292
TagContainer Tags
Map of tags recognized by this parser.
Definition: markupparser.h:157
This class creates and encapsultes a character that can be used in text renders.
Definition: character.h:59
uint32_t UInt32
An 32-bit unsigned integer.
Definition: datatypes.h:126
const String & GetName() const
Gets the name of this font.
Definition: font.cpp:91
virtual void GenerateCharactersFromToken(const TextToken *Token, TextLayer *Layer, const CharacterTraits &Traits, CharacterContainer &Characters) const
Helper method for converting text tokens into characters.
virtual TextToken * CreateTextToken(const String &Text) const
Helper method for creating text tokens.
virtual void RegenerateTraits(CharacterTraits &Traits, const TagVector &ActiveTags, TextLayer *Layer) const
Helper method for regenerating current traits after a tag is disabled.
std::pair< Boole, Character * > ProcessResult
An std::pair used to report the result of a MarkupTag being processed any the character it may have g...
Definition: markupparser.h:76
virtual TextToken::TokenType GetTokenType() const
Gets the type of token this is.
Definition: texttoken.cpp:99
void SetCharacterColour(const ColourValue &Colour)
Sets the fill colour for this character.
Definition: character.cpp:233
Used to describe either a tag token inserting a character, or the start of a range tag...
Definition: texttoken.h:65
Thrown when parameters are checked at runtime and found invalid.
Definition: exception.h:108
TokenIterator EndToken()
Gets an iterator to one passed the last TextToken.
Definition: texttoken.cpp:460
TokenIterator BeginToken()
Gets an iterator to the first TextToken.
Definition: texttoken.cpp:457
ColourValue CharColour
The colour to render this Character as.
Int32 GetIntFromCharacter(Int32 &BytesUsed, const char *CurrentCharacter)
Get a number suitable for using in an index from a character string.
Definition: unicode.cpp:86
This struct represents a markup tag segment from the source string.
Definition: texttoken.h:200
virtual Char8 GetMarkupTagEnd() const =0
Gets the ID for the character that marks the end of a markup section.
This struct represents a markup tag segment from the source string.
Definition: texttoken.h:163
std::pair< TagToken *, MarkupTag * > TokenTagPair
An std::pair type used to map a TagToken to the MarkupTag implementation it is calling.
Definition: markupparser.h:142
RangeTagToken * PartnerTag
Pointer to the opening/closing tag for this tag.
Definition: texttoken.h:205
The bulk of the engine components go in this namspace.
Definition: actor.cpp:56
TokenContainer::iterator TokenIterator
Iterator type for TextToken instances being stored by this class.
Definition: texttoken.h:288
ReverseTokenIterator ReverseEndToken()
Gets a reverse iterator to one before the first TextToken.
Definition: texttoken.cpp:472
std::vector< TokenTagPair > TagVector
Basic container type for the storage of TokenTagPair instances used during processing by this class...
Definition: markupparser.h:144
Used to describe a normal text token with plain text.
Definition: texttoken.h:63
TokenType Type
Type of token this is.
Definition: texttoken.h:73
Used to describe a tag token that is syntactically correct, but has another error, such as a range tag missing a partner, or the tag name isn't found.
Definition: texttoken.h:64
String Text
Container for the converted text.
Definition: texttoken.h:79
std::map< String, String > NameValuePairMap
This is a datatype mostly used for describing settings or parameters that can't be declared in advanc...
Definition: datatypes.h:209
std::string String
A datatype used to a series of characters.
Definition: datatypes.h:159
This is a base class for render layers that render text.
Definition: textlayer.h:64
Glyph * GetGlyph(const UInt32 &GlyphID) const
Gets the glyph corresponding to the provided characters UTF-8 code.
Definition: font.cpp:100
virtual TextToken * CreateTagToken(const String &Text) const
Helper method for creating tag tokens.