Rigs of Rods 2023.09
Soft-body Physics Simulation
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
BBDocument.h
Go to the documentation of this file.
1/*
2 This code is adopted from https://github.com/zethon/bbcpp
3 at commit 852a02dda37a17f458dd68ecf5461141f93edce2.
4 See reprint of original license (MIT) in README.txt
5
6 Changes done in this file:
7 * Added commentary to the private default constructor of BBDocument
8 * Renamed `parseValue` to `parseAttributeValue` for clarity
9 * - added support for quoted attributes
10 * `parseAttributeValue()` removed the `IsAlNum()` and all color/url special character checks because:
11 > '_' was missing from the original code
12 > UTF-8 characters would cause havoc
13 * `parseElementName()` - also support singular `[*]` as list item
14*/
15
16#pragma once
17#include <memory>
18#include <string>
19#include <vector>
20#include <stack>
21#include <stdexcept>
22#include <sstream>
23#include <iostream>
24#include <map>
25#include <iterator>
26#include <cctype>
27#include <cstring>
28
29namespace bbcpp
30{
31
32inline bool IsDigit(char c)
33{
34 return ('0' <= c && c <= '9');
35}
36
37inline bool IsAlpha(char c)
38{
39 static const char alpha[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
40 return (std::strchr(alpha, c) != nullptr);
41}
42
43inline bool IsAlNum(char c)
44{
45 return IsAlpha(c) || IsDigit(c);
46}
47
48inline bool IsSpace(char c)
49{
50 return std::isspace(static_cast<unsigned char>(c)) != 0;
51}
52
53class BBNode;
54class BBText;
55class BBElement;
56class BBDocument;
57
58using BBNodePtr = std::shared_ptr<BBNode>;
59using BBTextPtr = std::shared_ptr<BBText>;
60using BBElementPtr = std::shared_ptr<BBElement>;
61
62using BBNodeWeakPtr = std::weak_ptr<BBNode>;
63using BBNodeList = std::vector<BBNodePtr>;
64using BBNodeStack = std::stack<BBNodePtr>;
65using BBDocumentPtr = std::shared_ptr<BBDocument>;
66
67using ParameterMap = std::map<std::string, std::string>;
68
69class BBNode : public std::enable_shared_from_this<BBNode>
70{
71 template<typename NewTypePtrT>
72 NewTypePtrT cast(BBNodePtr node, bool bThrowOnFail)
73 {
74 if (node == nullptr && !bThrowOnFail)
75 {
76 return NewTypePtrT();
77 }
78 else if (node == nullptr)
79 {
80 throw std::invalid_argument("Cannot downcast BBNode, object is null");
81 }
82
83 NewTypePtrT newobj = std::dynamic_pointer_cast<typename NewTypePtrT::element_type, BBNode>(node);
84
85 if (newobj == nullptr && bThrowOnFail)
86 {
87 throw std::invalid_argument("Cannot downcast, object is not correct type");
88 }
89
90 return newobj;
91 }
92
93 template<typename NewTypePtrT>
94 NewTypePtrT cast(BBNodePtr node, bool bThrowOnFail) const
95 {
96 if (node == nullptr && !bThrowOnFail)
97 {
98 return NewTypePtrT();
99 }
100 else if (node == nullptr)
101 {
102 throw std::invalid_argument("Cannot downcast, BBNode object is null");
103 }
104
105 NewTypePtrT newobj = std::dynamic_pointer_cast<typename NewTypePtrT::element_type, BBNode>(node);
106
107 if (newobj == nullptr && bThrowOnFail)
108 {
109 throw std::invalid_argument("Cannot downcast, object is not correct type");
110 }
111
112 return newobj;
113 }
114
115public:
116 enum class NodeType
117 {
118 DOCUMENT,
119 ELEMENT, // [b]bold[/b], [QUOTE], [QUOTE=Username;1234], [QUOTE user=Bob]
120 TEXT, // plain text
122 };
123
124 BBNode(NodeType nodeType, const std::string& name);
125 virtual ~BBNode() = default;
126
127 const std::string& getNodeName() const { return _name; }
128 NodeType getNodeType() const { return _nodeType; }
130
131 const BBNodeList& getChildren() const { return _children; }
132
133 virtual void appendChild(BBNodePtr node)
134 {
135 _children.push_back(node);
136 node->_parent = shared_from_this();
137 }
138
139 template<typename NewTypePtrT>
140 NewTypePtrT downCast(bool bThrowOnFail = true)
141 {
142 return cast<NewTypePtrT>(shared_from_this(), bThrowOnFail);
143 }
144
145 template<typename NewTypePtrT>
146 NewTypePtrT downCast(bool bThrowOnFail = true) const
147 {
148 return cast<NewTypePtrT>(shared_from_this(), bThrowOnFail);
149 }
150
151protected:
152 std::string _name;
156
157 friend class BBText;
158 friend class BBDocument;
159 friend class BBElement;
160};
161
162class BBText : public BBNode
163{
164public:
165 BBText(const std::string& value)
166 : BBNode(BBNode::NodeType::TEXT, value)
167 {
168 // nothing to do
169 }
170
171 virtual ~BBText() = default;
172
173 virtual const std::string getText() const { return _name; }
174
175 void append(const std::string& text)
176 {
177 _name.append(text);
178 }
179};
180
181class BBElement : public BBNode
182{
183public:
185 {
186 SIMPLE, // [b]bold[/b], [code]print("hello")[/code]
187 VALUE, // [QUOTE=Username;12345]This is a quote[/QUOTE] (mostly used by vBulletin)
188 PARAMETER, // [QUOTE user=Bob userid=1234]This is a quote[/QUOTE]
189 CLOSING // [/b], [/code]
190 };
191
192 BBElement(const std::string& name, ElementType et = BBElement::SIMPLE)
193 : BBNode(BBNode::NodeType::ELEMENT, name),
194 _elementType(et)
195 {
196 // nothing to do
197 }
198
199 virtual ~BBElement() = default;
200
201 const ElementType getElementType() const { return _elementType; }
202
203 void setOrAddParameter(const std::string& key, const std::string& value, bool addIfNotExists = true)
204 {
205 _parameters.insert({key,value});
206 }
207
208 std::string findParameter(const std::string& key)
209 {
210 if (_parameters.find(key) == _parameters.end())
211 {
212 return "";
213 }
214
215 return _parameters.at(key);
216 }
217
218 const ParameterMap& getParameters() const { return _parameters; }
219
220private:
223};
224
225class BBDocument : public BBNode
226{
227 // Private for a good reason - this object must be managed by std::shared_ptr,
228 // otherwise parser crashes due to using `shared_from_this()`
229 // See https://stackoverflow.com/questions/25628704/enable-shared-from-this-why-the-crash
231 : BBNode(BBNode::NodeType::DOCUMENT, "#document")
232 {
233 // nothing to do
234 }
235
236 template <typename citerator>
237 citerator parseText(citerator begin, citerator end)
238 {
239 auto endingChar = begin;
240
241 for (auto it = begin; it != end; it++)
242 {
243 if (*it == '[')
244 {
245 endingChar = it;
246 break;
247 }
248 }
249
250 if (endingChar == begin)
251 {
252 endingChar = end;
253 }
254
255 newText(std::string(begin, endingChar));
256
257 return endingChar;
258 }
259
260 template <typename citerator>
261 citerator parseElementName(citerator begin, citerator end, std::string& buf)
262 {
263 auto start = begin;
264 std::stringstream str;
265
266 for (auto it = start; it != end; it++)
267 {
268 // RIGSOFRODS: also support singular [*] as list item.
269 const char c = (char)*it;
270 if (bbcpp::IsAlNum(c) || c == '*')
271 {
272 str << *it;
273 }
274 else
275 {
276 buf.assign(str.str());
277 return it;
278 }
279 }
280
281 return start;
282 }
283
284 template <typename citerator>
285 citerator parseAttributeValue(citerator begin, citerator end, std::string& value)
286 {
287 auto start = begin;
288 while (bbcpp::IsSpace(*start) && start != end)
289 {
290 start++;
291 }
292
293 if (start == end)
294 {
295 // we got to the end and there was nothing but spaces
296 // so return our starting point so the caller can create
297 // a text node with those spaces
298 return end;
299 }
300
301 // RIGSOFRODS: Apparently XenForo allows [] inside alt text:
302 // [ATTACH alt="screenshot_2018-07-16_23-35-33_1_tzC[1].png"]1612[/ATTACH]
303 // .................................................^^^
304 int internal_openbracket_stack = 0;
305
306 // RIGSOFRODS: to properly terminate at closing quotes (if used at all), we must track quotation state.
307 bool quote_mode_known = false;
308 bool is_value_quoted = false;
309
310 std::stringstream temp;
311
312 for (auto it = start; it != end; it++)
313 {
314 // RIGSOFRODS: removed the `IsAlNum()` and all color/url special character checks because:
315 // * '_' was missing from the original code
316 // * UTF-8 characters would cause havoc
317
318 if (*it == '\"') // RIGSOFRODS: added support for quoted attribute values
319 {
320 if (!quote_mode_known)
321 {
322 // Opening quotes - update state and skip the character
323 quote_mode_known = true;
324 is_value_quoted = true;
325 continue;
326 }
327 else if (is_value_quoted)
328 {
329 // Closing quotes - skip the character and return result
330 it++;
331 value.assign(temp.str());
332 return it;
333 }
334 else
335 {
336 // Quotes inside string - just append to value
337 temp << *it;
338 }
339 }
340 else if (*it == '[') // RIGSOFRODS: tolerate [] inside attribute value, if matching
341 {
342 internal_openbracket_stack++;
343 temp << *it;
344 if (!quote_mode_known)
345 {
346 quote_mode_known = true;
347 is_value_quoted = false;
348 }
349 }
350 else if (*it == ']')
351 {
352 if (internal_openbracket_stack == 0)
353 {
354 value.assign(temp.str());
355 return it;
356 }
357 else
358 {
359 internal_openbracket_stack--;
360 temp << *it;
361 }
362 }
363 else
364 {
365 temp << *it;
366 if (!quote_mode_known)
367 {
368 quote_mode_known = true;
369 is_value_quoted = false;
370 }
371 }
372 }
373
374 // if we get here then we're at the end, so we return the starting
375 // point so the callerd can create a text node
376 return end;
377 }
378
379 template <typename citerator>
380 citerator parseKey(citerator begin, citerator end, std::string& keyname)
381 {
382 auto start = begin;
383 while (bbcpp::IsSpace(*start) && start != end)
384 {
385 start++;
386 }
387
388 if (start == end)
389 {
390 // we got to the end and there was nothing but spaces
391 // so return our end point so the caller can create
392 // a text node with those spaces
393 return start;
394 }
395
396 std::stringstream temp;
397
398 // TODO: need to handle spaces after the key name and before
399 // the equal sign (ie. "[style color =red]")
400 for (auto it = start; it != end; it++)
401 {
402 if (bbcpp::IsAlNum(*it))
403 {
404 temp << *it;
405 }
406 else if (*it == '=')
407 {
408 keyname.assign(temp.str());
409 return it;
410 }
411 else
412 {
413 // some invalid character, so return the point where
414 // we stopped parsing
415 return it;
416 }
417 }
418
419 // if we get here then we're at the end, so we return the starting
420 // point so the callerd can create a text node
421 return end;
422 }
423
424 template <typename citerator>
425 citerator parseKeyValuePairs(citerator begin, citerator end, ParameterMap& pairs)
426 {
427 auto current = begin;
428 std::string tempKey;
429 std::string tempVal;
430
431 while (current != end)
432 {
433 current = parseKey(current, end, tempKey);
434 if (tempKey.empty())
435 {
436 pairs.clear();
437 return current;
438 }
439
440 if (*current != '=')
441 {
442 pairs.clear();
443 return current;
444 }
445
446 current = std::next(current);
447 current = parseAttributeValue(current, end, tempVal);
448
449 if (tempKey.empty() || tempVal.empty())
450 {
451 pairs.clear();
452 return current;
453 }
454
455 pairs.insert(std::make_pair(tempKey, tempVal));
456 if (*current == ']')
457 {
458 // this is the only valid condition for key/value pairs so we do
459 // not want to clear `pairs` like in the other cases
460 return current;
461 }
462 }
463
464 return end;
465 }
466
467 template <typename citerator>
468 citerator parseElement(citerator begin, citerator end)
469 {
470 bool closingTag = false;
471
472 // the first non-[ and non-/ character
473 auto nameStart = std::next(begin);
474
475 std::string elementName;
476
477 // this might be a closing tag so mark it
478 if (*nameStart == '/')
479 {
480 closingTag = true;
481 nameStart = std::next(nameStart);
482 }
483
484 auto nameEnd = parseElementName(nameStart, end, elementName);
485
486 // no valid name was found, so bail out
487 if (elementName.empty())
488 {
489 newText(std::string{*begin});
490 return nameEnd;
491 }
492 else if (nameEnd == end)
493 {
494 newText(std::string(begin,end));
495 return end;
496 }
497
498 if (*nameEnd == ']')
499 {
500 // end of element
501 }
502 else if (*nameEnd == '=')
503 {
504 // possibly a QUOTE value element
505 // possibly key-value pairs of a QUOTE
506 ParameterMap pairs;
507
508 auto kvEnd = parseKeyValuePairs(nameStart, end, pairs);
509 if (pairs.size() == 0)
510 {
511 newText(std::string(begin, kvEnd));
512 return kvEnd;
513 }
514 else
515 {
516 newKeyValueElement(elementName, pairs);
517 // TODO: add 'pairs'
518 return std::next(kvEnd);
519 }
520 }
521 else if (*nameEnd == ' ')
522 {
523 // possibly key-value pairs of a QUOTE
524 ParameterMap pairs;
525
526 auto kvEnd = parseKeyValuePairs(nameEnd, end, pairs);
527 if (pairs.size() == 0)
528 {
529 newText(std::string(begin, kvEnd));
530 return kvEnd;
531 }
532 else
533 {
534 newKeyValueElement(elementName, pairs);
535 // TODO: add 'pairs'
536 return std::next(kvEnd);
537 }
538 }
539 else
540 {
541 // some invalid char proceeded the element name, so it's not actually a
542 // valid element, so create it as text and move on
543 newText(std::string(begin,nameEnd));
544 return nameEnd;
545 }
546
547 if (closingTag)
548 {
549 newClosingElement(elementName);
550 }
551 else
552 {
553 newElement(elementName);
554 }
555
556 return std::next(nameEnd);
557 }
558
559public:
561 {
563 return doc;
564 }
565
566 void load(const std::string& bbcode)
567 {
568 load(bbcode.begin(), bbcode.end());
569 }
570
571 template<class Iterator>
572 void load(Iterator begin, Iterator end)
573 {
574 std::string buffer;
575 auto bUnknownNodeType = true;
576 auto current = begin;
577 auto nodeType = BBNode::NodeType::TEXT;
578
579 Iterator temp;
580
581 while (current != end)
582 {
583 if (bUnknownNodeType)
584 {
585 if (*current == '[')
586 {
587 nodeType = BBNode::NodeType::ELEMENT;
588 bUnknownNodeType = false;
589 }
590 else
591 {
592 nodeType = BBNode::NodeType::TEXT;
593 bUnknownNodeType = false;
594 }
595 }
596
597 if (!bUnknownNodeType)
598 {
599 switch (nodeType)
600 {
601 default:
602 throw std::runtime_error("Unknown node type in BBDocument::load()");
603 break;
604
606 {
607 current = parseText(current, end);
608 bUnknownNodeType = true;
609 }
610 break;
611
613 {
614 temp = parseElement(current, end);
615 if (temp == current)
616 {
617 // nothing was parsed, treat as text
618 nodeType = BBNode::NodeType::TEXT;
619 bUnknownNodeType = false;
620 }
621 else
622 {
623 current = temp;
624 bUnknownNodeType = true;
625 }
626 }
627 break;
628 }
629 }
630 }
631 }
632
633private:
635
636 BBText& newText(const std::string& text = std::string());
637 BBElement& newElement(const std::string& name);
638 BBElement& newClosingElement(const std::string& name);
639 BBElement& newKeyValueElement(const std::string& name, const ParameterMap& pairs);
640};
641
642namespace
643{
644
645std::ostream& operator<<(std::ostream& os, const ParameterMap& params)
646{
647 bool first = true;
648 os << "{ ";
649 for (auto& p : params)
650 {
651 os << (first ? "" : ", ") << "{" << p.first << "=" << p.second << "}";
652 if (first)
653 {
654 first = false;
655 }
656 }
657 return (os << " }");
658}
659
660}
661
662
663
664} // namespace
BBElement & newClosingElement(const std::string &name)
citerator parseElement(citerator begin, citerator end)
Definition BBDocument.h:468
void load(const std::string &bbcode)
Definition BBDocument.h:566
BBElement & newKeyValueElement(const std::string &name, const ParameterMap &pairs)
BBElement & newElement(const std::string &name)
BBNodeStack _stack
Definition BBDocument.h:634
citerator parseKey(citerator begin, citerator end, std::string &keyname)
Definition BBDocument.h:380
citerator parseKeyValuePairs(citerator begin, citerator end, ParameterMap &pairs)
Definition BBDocument.h:425
BBText & newText(const std::string &text=std::string())
citerator parseText(citerator begin, citerator end)
Definition BBDocument.h:237
void load(Iterator begin, Iterator end)
Definition BBDocument.h:572
citerator parseAttributeValue(citerator begin, citerator end, std::string &value)
Definition BBDocument.h:285
static BBDocumentPtr create()
Definition BBDocument.h:560
citerator parseElementName(citerator begin, citerator end, std::string &buf)
Definition BBDocument.h:261
const ParameterMap & getParameters() const
Definition BBDocument.h:218
std::string findParameter(const std::string &key)
Definition BBDocument.h:208
void setOrAddParameter(const std::string &key, const std::string &value, bool addIfNotExists=true)
Definition BBDocument.h:203
ElementType _elementType
Definition BBDocument.h:221
BBElement(const std::string &name, ElementType et=BBElement::SIMPLE)
Definition BBDocument.h:192
virtual ~BBElement()=default
const ElementType getElementType() const
Definition BBDocument.h:201
ParameterMap _parameters
Definition BBDocument.h:222
NewTypePtrT cast(BBNodePtr node, bool bThrowOnFail)
Definition BBDocument.h:72
BBNodePtr getParent() const
Definition BBDocument.h:129
NodeType getNodeType() const
Definition BBDocument.h:128
virtual void appendChild(BBNodePtr node)
Definition BBDocument.h:133
const std::string & getNodeName() const
Definition BBDocument.h:127
virtual ~BBNode()=default
std::string _name
Definition BBDocument.h:152
BBNodeWeakPtr _parent
Definition BBDocument.h:154
BBNodeList _children
Definition BBDocument.h:155
const BBNodeList & getChildren() const
Definition BBDocument.h:131
NewTypePtrT cast(BBNodePtr node, bool bThrowOnFail) const
Definition BBDocument.h:94
NodeType _nodeType
Definition BBDocument.h:153
NewTypePtrT downCast(bool bThrowOnFail=true)
Definition BBDocument.h:140
NewTypePtrT downCast(bool bThrowOnFail=true) const
Definition BBDocument.h:146
virtual const std::string getText() const
Definition BBDocument.h:173
void append(const std::string &text)
Definition BBDocument.h:175
BBText(const std::string &value)
Definition BBDocument.h:165
virtual ~BBText()=default
std::stack< BBNodePtr > BBNodeStack
Definition BBDocument.h:64
std::shared_ptr< BBDocument > BBDocumentPtr
Definition BBDocument.h:65
std::shared_ptr< BBElement > BBElementPtr
Definition BBDocument.h:60
bool IsDigit(char c)
Definition BBDocument.h:32
std::weak_ptr< BBNode > BBNodeWeakPtr
Definition BBDocument.h:62
std::shared_ptr< BBNode > BBNodePtr
Definition BBDocument.h:58
std::map< std::string, std::string > ParameterMap
Definition BBDocument.h:67
bool IsAlpha(char c)
Definition BBDocument.h:37
std::shared_ptr< BBText > BBTextPtr
Definition BBDocument.h:59
std::vector< BBNodePtr > BBNodeList
Definition BBDocument.h:63
bool IsSpace(char c)
Definition BBDocument.h:48
bool IsAlNum(char c)
Definition BBDocument.h:43