CPP-TOOLBOX
Loading...
Searching...
No Matches
cpp_parsing.hpp
Go to the documentation of this file.
1#ifndef CPP_PARSING_HPP
2#define CPP_PARSING_HPP
3
4#include <cctype>
5#include <cstddef>
6#include <iostream>
7#include <memory>
8#include <unordered_set>
9
10#include <ostream>
11#include <string>
12#include <vector>
13
14#include <algorithm>
15#include <functional>
16#include <string>
17#include <unordered_map>
18
19#include <fstream>
20#include <sstream>
21
23
24namespace cpp_parsing {
25
26inline Logger logger("cpp_parsing");
27
28const std::unordered_set<std::string> cpp_built_in_types = {
29 "bool", "char", "double", "float", "long", "short", "int", "void",
30};
31
32const std::vector<std::string> cpp_sign_specifier = {"unsigned", "signed"};
33const std::vector<std::string> cpp_size_specifier = {"short", "long", "long long"};
34
35const std::unordered_set<std::string> access_specifiers = {"public", "protected", "private"};
36
37const std::vector<std::string> overloadable_operators = {
38 // arithmetic
39 "operator+",
40 "operator-",
41 "operator*",
42 "operator/",
43 "operator%",
44 "operator++", // prefix/postfix
45 "operator--", // prefix/postfix
46
47 // compound assignment
48 "operator+=",
49 "operator-=",
50 "operator*=",
51 "operator/=",
52 "operator%=",
53
54 // bitwise
55 "operator&",
56 "operator|",
57 "operator^",
58 "operator~",
59 "operator<<",
60 "operator>>",
61
62 // compound bitwise assignment
63 "operator&=",
64 "operator|=",
65 "operator^=",
66 "operator<<=",
67 "operator>>=",
68
69 // comparison
70 "operator==",
71 "operator!=",
72 "operator<",
73 "operator<=",
74 "operator>",
75 "operator>=",
76 "operator<=>", // C++20 spaceship operator
77
78 // logical
79 "operator!",
80 "operator&&",
81 "operator||",
82
83 // assignment
84 "operator=",
85
86 // subscript, call, pointer-like
87 "operator[]",
88 "operator()",
89 "operator->",
90 "operator->*",
91 "operator*",
92
93 // comma
94 "operator,",
95
96 // stream i/o (commonly overloaded, non-member)
97 "operator<<",
98 "operator>>",
99
100 // memory management
101 "operator new",
102 "operator new[]",
103 "operator delete",
104 "operator delete[]",
105
106};
107
108const std::unordered_set<std::string> cpp_keywords = {
109 "alignas", "alignof", "and", "and_eq", "asm",
110 "auto", "bitand", "bitor", "break", "case",
111 "catch", "char16_t", "char32_t", "class", "compl",
112 "const", "constexpr", "const_cast", "continue", "decltype",
113 "default", "delete", "do", "dynamic_cast", "else",
114 "enum", "explicit", "export", "extern", "false",
115 "for", "friend", "goto", "if", "inline",
116 "mutable", "namespace", "new", "noexcept", "not",
117 "not_eq", "nullptr", "operator", "or", "or_eq",
118 "private", "protected", "public", "register", "reinterpret_cast",
119 "return", "signed", "sizeof", "static", "static_assert",
120 "static_cast", "struct", "switch", "template", "this",
121 "thread_local", "throw", "true", "try", "typedef",
122 "typeid", "typename", "union", "unsigned", "using",
123 "virtual", "volatile", "wchar_t", "while", "xor",
124 "xor_eq"};
125
126inline std::string truncate(const std::string &s, int cutoff = 50) {
127 return text_utils::get_substring(s, 0, cutoff) + "...";
128}
129
130inline std::string get_next_part_of_string(const std::string &input, int start, int lookahead = 50) {
131 return text_utils::get_substring(input, start, start + lookahead) + "...";
132}
133
135 ParseResult(bool succeeded, std::string parser_name = "", size_t start = 0, size_t end = 0, std::string match = "",
136 std::vector<ParseResult> sub_results = {})
138 sub_results(std::move(sub_results)) {}
139
141 size_t start;
142 size_t end;
143 std::string match;
144 std::string parser_name;
145 std::vector<ParseResult> sub_results;
146 std::string to_string() const {
148
149 std::function<void(const ParseResult &)> recurse = [&](const ParseResult &r) {
150 mla.add("ParseResult {");
151 mla.indent();
152 mla.add("succeeded: ", (r.succeeded ? "true" : "false"));
153 mla.add("parser_name: \"", r.parser_name, "\"");
154 mla.add("start: ", r.start, ", end: ", r.end);
155 mla.add("match: \"", r.match, "\"");
156
157 if (!r.sub_results.empty()) {
158 mla.add("sub_results: [");
159 mla.indent();
160 for (const auto &sub : r.sub_results) {
161 recurse(sub);
162 }
163 mla.unindent();
164 mla.add("]");
165 }
166
167 mla.unindent();
168 mla.add("}");
169 };
170
171 recurse(*this);
172 return mla.str();
173 }
174};
175
176ParseResult clean_parse_result(const ParseResult &r);
177
178const cpp_parsing::ParseResult *find_first_by_name(const cpp_parsing::ParseResult *root, const std::string &target);
179
180// Find any node whose parser_name contains substring `substr`. Example: "type_with_optional_reference".
182 const std::string &substr);
183
184// Collect all nodes with parser_name == target (DFS)
185void collect_by_name(const cpp_parsing::ParseResult *root, const std::string &target,
186 std::vector<const cpp_parsing::ParseResult *> &out);
187
188ParseResult parse_source_or_header_file(const std::string &source_code_path);
189
190std::vector<std::pair<std::string, std::string>> bfs_collect_matches(const cpp_parsing::ParseResult *root,
191 const std::vector<std::string> &names);
192
193std::string node_text(const cpp_parsing::ParseResult *node);
194
195// deprecated for the to_string funciton remove soon.
196std::ostream &print_parse_result(std::ostream &os, const ParseResult &result, int indent = 0);
197std::ostream &operator<<(std::ostream &os, const ParseResult &result);
198
200 public:
201 explicit CharParser(std::string name = "") : name(std::move(name)) {}
202
203 virtual ParseResult parse(const std::string &input, size_t start = 0) const = 0;
204
205 virtual ~CharParser() = default;
206
207 std::string name;
208};
209
210using CharParserPtr = std::shared_ptr<CharParser>;
211
212// helper to create parsers easier
218CharParserPtr until_char(std::vector<char> target_chars, bool inclusive = true, bool ignore_in_strings_and_chars = true,
219 const std::string &name = "");
220
221CharParserPtr literal(const std::string &s);
222
223inline std::vector<CharParserPtr> create_literal_parsers(std::vector<std::string> literals) {
224 std::vector<CharParserPtr> ls;
225 for (const auto &l : literals) {
226 auto lp = literal(l);
227 ls.push_back(lp);
228 }
229 return ls;
230}
231
232CharParserPtr matching_string_pair(const std::string &name = "matching_braces", std::string left = "{",
233 std::string right = "}");
234CharParserPtr nested_string_pair(CharParserPtr parser, const std::string &name = "nested_braces",
235 std::string left = "{", std::string right = "}");
236CharParserPtr repeating(CharParserPtr parser, const std::string &name = "repeating");
237CharParserPtr optional(CharParserPtr parser, const std::string &name = "optional");
239CharParserPtr if_then(std::shared_ptr<CharParser> condition_parser, std::shared_ptr<CharParser> then_parser,
240 const std::string &name = "if_then");
241CharParserPtr any_of(std::vector<CharParserPtr> parsers, const std::string &name = "any_of");
242CharParserPtr not_any_of(std::shared_ptr<CharParser> inner, std::unordered_set<std::string> forbidden,
243 std::string name = "not_any_of");
244
245CharParserPtr sequence(std::vector<CharParserPtr> parsers, const std::string &name = "sequence");
246
247inline void log_start_of_parser(const std::string &name, const std::string &input, size_t start) {
248 logger.debug("at position {}, rest of text: {}", start, get_next_part_of_string(input, start));
249}
250
252 public:
253 using TransformFn = std::function<ParseResult(const ParseResult &)>;
254
255 TransformParser(std::shared_ptr<CharParser> inner, TransformFn fn, std::string name = "")
256 : CharParser(std::move(name)), inner(std::move(inner)), fn(std::move(fn)) {}
257
258 ParseResult parse(const std::string &input, size_t start = 0) const override {
259 auto result = inner->parse(input, start);
260 return fn(result);
261 }
262
263 private:
264 std::shared_ptr<CharParser> inner;
265 TransformFn fn;
266};
267
269 public:
270 explicit DecimalLiteralParser(std::string name = "decimal_literal") : CharParser(std::move(name)) {}
271
272 ParseResult parse(const std::string &input, size_t start = 0) const override {
273 size_t i = start;
274 while (i < input.size() && std::isdigit(static_cast<unsigned char>(input[i]))) {
275 ++i;
276 }
277
278 if (i == start) {
279 // No digits consumed → fail
280 return ParseResult(false, name, start, start, "");
281 }
282
283 std::string matched = input.substr(start, i - start);
284 return ParseResult(true, name, start, i, matched);
285 }
286};
287
289 public:
290 IdentifierParser() : CharParser("identifier") {}
291
292 ParseResult parse(const std::string &input, size_t start) const override {
293 LogSection ls(logger, "{} parser", name);
294 log_start_of_parser(name, input, start);
295
296 size_t pos = start;
297 const size_t len = input.size();
298
299 if (pos >= len) {
300 logger.debug("{} parser failed: start position {} beyond input length {}", name, start, len);
301 return {false, name, start, start, "", {}};
302 }
303
304 char c = input[pos];
305 if (!isIdentifierStartChar(c)) {
306 logger.debug("{} parser failed: first char '{}' at position {} is not valid start char", name, c, pos);
307 return {false, name, start, start, "", {}};
308 }
309
310 ++pos;
311 while (pos < len && isIdentifierContinueChar(input[pos])) {
312 ++pos;
313 }
314
315 std::string matched = input.substr(start, pos - start);
316 logger.debug("{} parser succeeded: matched '{}' from {} to {}", name, matched, start, pos);
317
318 return {true, name, start, pos, std::move(matched), {}};
319 }
320
321 private:
322 static bool isIdentifierStartChar(char c) { return (std::isalpha(static_cast<unsigned char>(c)) || c == '_'); }
323 static bool isIdentifierContinueChar(char c) { return (std::isalnum(static_cast<unsigned char>(c)) || c == '_'); }
324};
325
327 public:
328 explicit OptionalParser(std::shared_ptr<CharParser> inner, const std::string &name = "optional")
329 : CharParser(name), inner_parser(std::move(inner)) {}
330
331 ParseResult parse(const std::string &input, size_t start) const override {
332 LogSection ls(logger, "{} parser", name);
333 log_start_of_parser(name, input, start);
334
335 auto result = inner_parser->parse(input, start);
336 logger.debug("{} got out", name);
337
338 if (result.succeeded) {
339 logger.debug("{} parser: inner parser succeeded, returning {}", name, result.to_string());
340 return result;
341 } else {
342 ParseResult result(true, name, start, start, "");
343 logger.debug("OptionalParser: inner parser failed, returning original position ");
344 return result;
345 }
346 }
347
348 private:
349 std::shared_ptr<CharParser> inner_parser;
350};
351
352class IfThenParser : public CharParser {
353 public:
354 IfThenParser(std::shared_ptr<CharParser> condition_parser, std::shared_ptr<CharParser> then_parser,
355 const std::string &name = "if_then")
356 : CharParser(name), condition(std::move(condition_parser)), then_clause(std::move(then_parser)) {}
357
358 ParseResult parse(const std::string &input, size_t start) const override {
359 LogSection ls(logger, "{} parser", name);
360 log_start_of_parser(name, input, start);
361
362 size_t current = start;
363 std::vector<ParseResult> results;
364
365 // Always attempt the condition
366 auto first_result = condition->parse(input, current);
367 results.push_back(first_result);
368
369 if (!first_result.succeeded) {
370 // Return with just the condition result
371 return {false, name, start, first_result.end, text_utils::get_substring(input, start, first_result.end),
372 results};
373 }
374
375 // If condition succeeded, always attempt then_clause
376 current = first_result.end;
377 auto second_result = then_clause->parse(input, current);
378 results.push_back(second_result);
379
380 return {first_result.succeeded && second_result.succeeded, name, start, second_result.end,
381 text_utils::get_substring(input, start, second_result.end), results};
382 }
383
384 private:
385 std::shared_ptr<CharParser> condition;
386 std::shared_ptr<CharParser> then_clause;
387};
388
389// DeferredParser: holds a function returning a parser, the use case is so that we can define recursive parsers without
390// having circular dependencies
392 // Mutable shared_ptr so it can be assigned later
393 std::shared_ptr<CharParser> actual_parser;
394
395 public:
396 DeferredParser() : CharParser("deferred"), actual_parser(nullptr) {}
397
398 // Setter to assign the actual parser later
399 void set_parser(CharParserPtr parser) { actual_parser = std::move(parser); }
400
401 ParseResult parse(const std::string &input, size_t start) const override {
402
403 LogSection ls(logger, "{} parser", name);
404 log_start_of_parser(name, input, start);
405
406 if (!actual_parser) {
407 throw std::runtime_error("DeferredParser: actual parser not set");
408 }
409 return actual_parser->parse(input, start);
410 }
411};
412
414 public:
415 OptionalWhitespaceParser() : CharParser("optional_whitespace") {}
416
417 ParseResult parse(const std::string &input, size_t start) const override {
418 LogSection ls(logger, "{} parser", name);
419 log_start_of_parser(name, input, start);
420
421 size_t i = start;
422 while (i < input.size()) {
423 char c = input[i];
424 if (std::isspace(static_cast<unsigned char>(c))) {
425 logger.debug(" Whitespace at position {}", i);
426 ++i;
427 } else {
428 logger.debug(" Non-whitespace at position {} we got {} instead, stopping", i, c);
429 break;
430 }
431 }
432
433 std::string ws = input.substr(start, i - start);
434 return {true, name, start, i, ws};
435 }
436};
437
439 public:
440 VariableParser() : CharParser("variable") {}
441
442 ParseResult parse(const std::string &input, size_t start) const override {
443 size_t i = start;
444
445 LogSection ls(logger, "{} parser", name);
446 log_start_of_parser(name, input, start);
447
448 if (i >= input.size()) {
449 logger.debug(" Empty input or out of bounds");
450 return {false, name, i, i, ""};
451 }
452
453 char first_char = input[i];
454 if (!(std::isalpha(first_char) || first_char == '_')) {
455 logger.debug(" First character '{}' is not a valid start of variable", first_char);
456 return {false, name, i, i, ""};
457 }
458
459 ++i;
460 while (i < input.size()) {
461 char c = input[i];
462 if (std::isalnum(c) || c == '_') {
463 ++i;
464 } else {
465 break;
466 }
467 }
468
469 std::string var_name = input.substr(start, i - start);
470 logger.debug(" Parsed variable name: '{}'", var_name);
471
472 bool is_cpp_keyword = cpp_keywords.count(var_name) > 0 or cpp_built_in_types.count(var_name) > 0;
473 if (is_cpp_keyword) {
474 logger.debug(" Rejected: '{}' is a C++ keyword", var_name);
475 return {false, name, i, i, ""};
476 }
477
478 return {true, name, start, i, var_name};
479 }
480};
481
482class TypeParser : public CharParser {
483 public:
484 TypeParser() : CharParser("type") {}
485 ParseResult parse(const std::string &input, size_t start) const override {
486 return parse_type_internal(input, start, 0);
487 }
488
489 private:
490 bool is_valid_char(char c) const { return std::isalnum(static_cast<unsigned char>(c)) || c == '_' || c == ':'; }
491
492 ParseResult parse_type_internal(const std::string &input, size_t start, int depth) const {
493 size_t i = start;
494
495 LogSection ls(logger, "{} parser", name);
496 log_start_of_parser(name, input, start);
497
498 while (i < input.size()) {
499 char c = input[i];
500 logger.debug(" At position {}, char = '{}'", i, c);
501
502 if (is_valid_char(c)) {
503 logger.debug(" Valid char, continue");
504 ++i;
505 } else if (c == '<') {
506 logger.debug(" Found '<', parsing type argument list...");
507 ++i; // consume '<'
508
509 while (i < input.size()) {
510 // Parse a type argument recursively
511 auto inner_result = parse_type_internal(input, i, depth + 1);
512 if (!inner_result.succeeded) {
513 logger.debug(" Failed to parse inner type at position {}", i);
514 return {false, name, i, i, ""};
515 }
516
517 i = inner_result.end;
518 logger.debug(" Parsed type argument up to position ", i);
519
520 if (i >= input.size()) {
521 logger.debug(" Unexpected end of input after type argument");
522 return {false, name, i, i, ""};
523 }
524
525 if (input[i] == ',') {
526 if (input[i] == ',') {
527 logger.debug(" Found ',', continuing to next type argument");
528 ++i; // consume ','
529
530 // Skip whitespace after comma
531 while (i < input.size() && std::isspace(static_cast<unsigned char>(input[i]))) {
532 ++i;
533 }
534
535 continue;
536 }
537 } else if (input[i] == '>') {
538 logger.debug(" Found matching '>' at position {}", i);
539 ++i; // consume '>'
540 if (depth == 0) {
541 return {true, name, start, i, text_utils::get_substring(input, start, i)};
542 } else {
543 return {true, name, start, i, text_utils::get_substring(input, start, i)};
544 }
545 } else {
546 logger.debug(" Unexpected character '{}' while parsing type "
547 "argument list ",
548 input[i]);
549 return {false, name, i, i, ""};
550 }
551 }
552
553 logger.debug(" Reached end of input while parsing type arguments");
554 return {false, name, i, i, ""};
555 } else if (c == '>') {
556 if (depth == 0) {
557 logger.debug(" Found '>' at depth 0, stopping parse here at position {}", i);
558 return {true, name, start, i, text_utils::get_substring(input, start, i)};
559 } else {
560 logger.debug(" Found '>' at depth {} ", depth);
561 return {true, name, start, i, text_utils::get_substring(input, start, i)};
562 }
563 } else if (c == ',') {
564 if (depth == 0) {
565 logger.debug(" Found ',' at depth 0, treating as end of type");
566 break;
567 } else {
568 logger.debug(" Found ',' at depth {}, returning to caller", depth);
569 break;
570 }
571 } else {
572 logger.debug(" Invalid character, breaking");
573 break;
574 }
575 }
576
577 std::string type = input.substr(start, i - start);
578 logger.debug(" Parsed type: '{}'", type);
579
580 if (cpp_keywords.count(type)) {
581 logger.debug(" Rejected: '{}' is a c++ keyword", type);
582 return {false, name, start, start, ""};
583 }
584
585 logger.debug("Exiting parse_type at position {} with depth {}", i, depth);
586 return {true, name, start, i, text_utils::get_substring(input, start, i)};
587 }
588};
589
591 public:
592 TypeQualifierSequenceParser() : CharParser("type_qualifier_sequence") {};
593 ParseResult parse(const std::string &input, size_t start) const override {
594 static const std::unordered_set<std::string> qualifiers = {"const", "volatile", "static", "extern",
595 "mutable", "register", "inline", "thread_local",
596 "constexpr", "consteval", "constinit"};
597
598 LogSection ls(logger, "{} parser", name);
599 log_start_of_parser(name, input, start);
600
601 size_t i = start;
602 std::vector<std::string> found_qualifiers;
603
604 while (i < input.size()) {
605 // Skip leading whitespace
606 size_t whitespace_start = i;
607 while (i < input.size() && std::isspace(input[i]))
608 ++i;
609 if (i >= input.size())
610 break;
611
612 // Peek at next word without consuming
613 size_t word_start = i;
614 while (i < input.size() && std::isalpha(input[i]))
615 ++i;
616 std::string word = input.substr(word_start, i - word_start);
617
618 if (word.empty())
619 break;
620
621 logger.debug(" Found word: {}", word);
622
623 if (qualifiers.count(word)) {
624 found_qualifiers.push_back(word);
625 } else {
626 logger.debug(" Word: {} is not a qualifier. Stopping", word);
627 // Reset `i` back to start of this non-qualifier word
628 i = word_start;
629 break;
630 }
631 }
632
633 if (!found_qualifiers.empty()) {
634 logger.debug(" Parsed qualifiers:");
635 for (const auto &q : found_qualifiers)
636 logger.debug(" {}", q);
637 return {true, name, start, i, text_utils::get_substring(input, start, i)};
638 }
639
640 logger.debug(" No qualifiers found");
641 return {false, name, i, i, ""};
642 }
643};
644
645class LiteralParser : public CharParser {
646 public:
647 explicit LiteralParser(std::string literal) : CharParser("literal: " + literal), literal_(std::move(literal)) {}
648
649 ParseResult parse(const std::string &input, size_t start) const override {
650
651 LogSection ls(logger, "{} parser", name);
652 log_start_of_parser(name, input, start);
653
654 if (start + literal_.size() > input.size()) {
655
656 logger.debug(" Not enough input left to match. Needed: {}, available: {}", literal_.size(),
657 (input.size() - start));
658
659 return {false, name, start, start, ""};
660 }
661
662 std::string_view slice = std::string_view(input).substr(start, literal_.size());
663
664 logger.debug(" Comparing {} to {}", slice, literal_);
665
666 if (slice == literal_) {
667 logger.debug(" Match succeeded. Advancing to position {}", start + literal_.size());
668 auto end = start + literal_.size();
669 return {true, name, start, end, text_utils::get_substring(input, start, end)};
670 } else {
671 logger.debug(" Match failed.");
672 return {false, name, start, start, ""};
673 }
674 }
675
676 private:
677 std::string literal_;
678};
679
681 public:
682 MatchingStringPairParser(const std::string &name = "matching_strings", std::string left_str = "{",
683 std::string right_str = "}")
684 : CharParser(name), left_str(std::move(left_str)), right_str(std::move(right_str)) {}
685
686 std::string left_str;
687 std::string right_str;
688
689 ParseResult parse(const std::string &input, size_t start) const override {
690 LogSection ls(logger, "{} parser", name);
691 log_start_of_parser(name, input, start);
692
693 if (start >= input.size() || !starts_with(input, start, left_str)) {
694 logger.debug(" Start sequence is not '{}', aborting", left_str);
695 return {false, name, start, start, ""};
696 }
697
698 size_t depth = 1;
699 bool in_string = false;
700 bool in_char = false;
701 bool escape_next = false;
702
703 size_t i = start + left_str.size();
704 while (i < input.size()) {
705 char c = input[i];
706
707 if (escape_next) {
708 escape_next = false;
709 ++i;
710 continue;
711 }
712
713 if (in_string) {
714 if (c == '\\') {
715 escape_next = true;
716 } else if (c == '"') {
717 in_string = false;
718 }
719 ++i;
720 continue;
721 }
722
723 if (in_char) {
724 if (c == '\\') {
725 escape_next = true;
726 } else if (c == '\'') {
727 in_char = false;
728 }
729 ++i;
730 continue;
731 }
732
733 if (c == '"') {
734 in_string = true;
735 ++i;
736 continue;
737 }
738
739 if (c == '\'') {
740 in_char = true;
741 ++i;
742 continue;
743 }
744
745 if (starts_with(input, i, left_str)) {
746 ++depth;
747 i += left_str.size();
748 continue;
749 }
750
751 if (starts_with(input, i, right_str)) {
752 --depth;
753 i += right_str.size();
754 if (depth == 0) {
755 size_t end = i;
756 logger.debug(" Found matching closing sequence at position {}", end - right_str.size());
757 return {true, name, start, end, text_utils::get_substring(input, start, end)};
758 }
759 continue;
760 }
761
762 ++i;
763 }
764
765 logger.debug(" Matching closing sequence not found");
766 return {false, name, start, input.size(), text_utils::get_substring(input, start, input.size())};
767 }
768
769 private:
770 static bool starts_with(const std::string &s, size_t pos, const std::string &prefix) {
771 return s.compare(pos, prefix.size(), prefix) == 0;
772 }
773};
774
775// NOTE: untested, but leaving it here for later because it's a good idea I might need in the future.
777 public:
778 MatchingPairParser(CharParserPtr left_parser, CharParserPtr right_parser, const std::string &name = "matching_pair")
779 : CharParser(name), left_parser_(std::move(left_parser)), right_parser_(std::move(right_parser)) {}
780
781 ParseResult parse(const std::string &input, size_t start) const override {
782 LogSection ls(logger, "{} parser", name);
783 log_start_of_parser(name, input, start);
784
785 // Parse the left delimiter
786 auto left_result = left_parser_->parse(input, start);
787 if (!left_result.succeeded) {
788 logger.debug(" Left delimiter parse failed");
789 return {false, name, start, start, ""};
790 }
791
792 size_t depth = 1;
793 bool in_string = false;
794 bool in_char = false;
795 bool escape_next = false;
796 size_t i = left_result.end;
797
798 while (i < input.size()) {
799 char c = input[i];
800
801 if (escape_next) {
802 escape_next = false;
803 ++i;
804 continue;
805 }
806
807 if (in_string) {
808 if (c == '\\') {
809 escape_next = true;
810 } else if (c == '"') {
811 in_string = false;
812 }
813 ++i;
814 continue;
815 }
816
817 if (in_char) {
818 if (c == '\\') {
819 escape_next = true;
820 } else if (c == '\'') {
821 in_char = false;
822 }
823 ++i;
824 continue;
825 }
826
827 if (c == '"') {
828 in_string = true;
829 ++i;
830 continue;
831 }
832
833 if (c == '\'') {
834 in_char = true;
835 ++i;
836 continue;
837 }
838
839 // Try parsing another left delimiter
840 {
841 auto inner_left = left_parser_->parse(input, i);
842 if (inner_left.succeeded) {
843 ++depth;
844 i = inner_left.end;
845 continue;
846 }
847 }
848
849 // Try parsing a right delimiter
850 {
851 auto inner_right = right_parser_->parse(input, i);
852 if (inner_right.succeeded) {
853 --depth;
854 i = inner_right.end;
855 if (depth == 0) {
856 logger.debug(" Found matching closing delimiter at {}", i);
857 return {true,
858 name,
859 start,
860 i,
861 text_utils::get_substring(input, start, i),
862 {left_result, inner_right}};
863 }
864 continue;
865 }
866 }
867
868 ++i;
869 }
870
871 logger.debug(" Matching closing delimiter not found");
872 return {false, name, start, input.size(), text_utils::get_substring(input, start, input.size())};
873 }
874
875 private:
876 CharParserPtr left_parser_;
877 CharParserPtr right_parser_;
878};
879
881 public:
882 NestedStringPairParser(CharParserPtr inner_parser, const std::string &name = "nested_string",
883 std::string left_str = "{", std::string right_str = "}")
884 : CharParser(name), inner_parser_(std::move(inner_parser)), left_str(std::move(left_str)),
885 right_str(std::move(right_str)) {}
886
887 std::string left_str, right_str;
888
889 ParseResult parse(const std::string &input, size_t start) const override {
890 LogSection ls(logger, "{} parser", name);
891 log_start_of_parser(name, input, start);
892
894 ParseResult outer_result = match_parser.parse(input, start);
895
896 if (!outer_result.succeeded) {
897 logger.debug(" Outer string parse failed");
898 return {false, name, start, start, ""};
899 }
900
901 // Extract inner content excluding outer delimiters
902 if (outer_result.match.size() < left_str.size() + right_str.size()) {
903 logger.debug(" Match too small to contain delimiters");
904 return {false, name, start, start, ""};
905 }
906
907 std::string inner_text =
908 outer_result.match.substr(left_str.size(), outer_result.match.size() - left_str.size() - right_str.size());
909 size_t inner_start = 0;
910
911 logger.debug(" Inner content to parse: {}", truncate(inner_text));
912
913 ParseResult inner_result = inner_parser_->parse(inner_text, inner_start);
914
915 if (!inner_result.succeeded) {
916 logger.debug(" Inner parse failed");
917 return {false, name, start, start, ""};
918 }
919
920 // Adjust inner result's positions relative to the original input
921 inner_result.start += outer_result.start + left_str.size();
922 inner_result.end += outer_result.start + left_str.size();
923
924 return {
925 true,
926 name,
927 outer_result.start,
928 outer_result.end,
929 outer_result.match,
930 {outer_result, inner_result} // sub-results: outer + inner
931 };
932 }
933
934 private:
935 CharParserPtr inner_parser_;
936};
937
939 public:
940 explicit UntilCharParser(std::vector<char> target_chars, bool inclusive = true,
941 bool ignore_in_strings_and_chars = true, const std::string &name = "until_char")
942 : CharParser(name), targets(std::move(target_chars)), inclusive_(inclusive),
943 ignore_in_strings_and_chars_(ignore_in_strings_and_chars) {}
944
945 ParseResult parse(const std::string &input, size_t start) const override {
946
947 LogSection ls(logger, "{} parser", name);
948 log_start_of_parser(name, input, start);
949
950 logger.debug("Starting UntilCharParser at position {} , looking for any target "
951 "character {}",
952 start, (ignore_in_strings_and_chars_ ? " outside of strings and chars" : ""));
953
954 bool in_string = false;
955 bool in_char = false;
956 bool escape_next = false;
957
958 for (size_t i = start; i < input.size(); ++i) {
959 char c = input[i];
960 logger.debug(" At position {}, char = {}", i, c);
961
962 if (ignore_in_strings_and_chars_) {
963 if (escape_next) {
964 logger.debug(" (escaped)");
965 escape_next = false;
966 continue;
967 }
968
969 if (in_string) {
970 if (c == '\\') {
971 logger.debug(" (backslash in string, escaping next)");
972 escape_next = true;
973 } else if (c == '"') {
974 logger.debug(" (end of string)");
975 in_string = false;
976 } else {
977 logger.debug(" (inside string)");
978 }
979 continue;
980 }
981
982 if (in_char) {
983 if (c == '\\') {
984 logger.debug(" (backslash in char, escaping next)");
985 escape_next = true;
986 } else if (c == '\'') {
987 logger.debug(" (end of char)");
988 in_char = false;
989 } else {
990 logger.debug(" (inside char)");
991 }
992 continue;
993 }
994
995 if (c == '"') {
996 logger.debug(" (begin string)");
997 in_string = true;
998 continue;
999 } else if (c == '\'') {
1000 logger.debug(" (begin char)");
1001 in_char = true;
1002 continue;
1003 }
1004 }
1005
1006 if (std::find(targets.begin(), targets.end(), c) != targets.end()) {
1007 size_t end = inclusive_ ? i + 1 : i;
1008 logger.debug(" (found target, stopping at position {})", end);
1009 return {true, name, start, end, text_utils::get_substring(input, start, end)};
1010 } else {
1011 std::cout << "\n";
1012 }
1013 }
1014
1015 logger.debug(" None of the target characters found {}",
1016 (ignore_in_strings_and_chars_ ? " outside of strings or chars\n" : "\n"));
1017 return {false, name, start, start, ""};
1018 }
1019
1020 private:
1021 std::vector<char> targets;
1022 bool inclusive_;
1023 bool ignore_in_strings_and_chars_;
1024};
1025
1026// NOTE: the next step was to create an enum parser, and then the ability to serialize that as well in the meta program.
1028 public:
1029 CommaSeparatedTupleParser(CharParserPtr element_parser, std::string name = "comma_separated_tuple")
1030 : CharParser(std::move(name)), element_parser(std::move(element_parser)) {}
1031
1032 ParseResult parse(const std::string &input, size_t start = 0) const override {
1033 // Build the grammar dynamically:
1034 // ( element ( "," element )* )?
1035 auto comma_then_element = sequence({literal(","), element_parser}, "comma_then_element");
1036 auto repeating_comma_elements = repeating(comma_then_element, "repeating_comma_elements");
1037 auto full_sequence = optional(sequence({element_parser, repeating_comma_elements}, "tuple_core"), name);
1038
1039 return full_sequence->parse(input, start);
1040 }
1041
1042 private:
1043 CharParserPtr element_parser;
1044};
1045
1046// a repeating parser attempts to repeatedly parse something until the parsing
1047// fails using the passed in parser
1049 public:
1050 explicit RepeatingParser(std::shared_ptr<CharParser> inner_parser, const std::string &name = "repeating")
1051 : CharParser(name), parser(std::move(inner_parser)) {}
1052
1053 ParseResult parse(const std::string &input, size_t start) const override {
1054 size_t current = start;
1055 bool matched_once = false;
1056
1057 LogSection ls(logger, "{} parser", name);
1058 log_start_of_parser(name, input, start);
1059
1060 std::vector<ParseResult> results;
1061
1062 while (true) {
1063 logger.debug("0");
1064 auto result = parser->parse(input, current);
1065 logger.debug("1");
1066 if (!result.succeeded) {
1067 logger.debug("2");
1068 break;
1069 }
1070 results.push_back(result);
1071 logger.debug("3");
1072 if (result.end == current) {
1073 logger.debug("4");
1074 // Prevent infinite loop if parser makes no progress
1075 break;
1076 }
1077 logger.debug("5");
1078 current = result.end;
1079 matched_once = true;
1080 }
1081 logger.debug("6");
1082
1083 if (matched_once) {
1084 logger.debug("7");
1085 return {true, name, start, current, text_utils::get_substring(input, start, current), results};
1086 } else {
1087 logger.debug("8");
1088 return {false, name, start, start, ""};
1089 }
1090 }
1091
1092 private:
1093 std::shared_ptr<CharParser> parser;
1094};
1095
1096class AnyOfParser : public CharParser {
1097 public:
1098 AnyOfParser(std::vector<std::shared_ptr<CharParser>> sub_parsers, const std::string &name = "any_of")
1099 : CharParser(name), parsers(std::move(sub_parsers)) {}
1100
1101 ParseResult parse(const std::string &input, size_t start) const override {
1102 // logger.debug("{} any of parser started at position {}", name, start);
1103
1104 for (const auto &parser : parsers) {
1105 // logger.debug("{} trying sub-parser '{}'", name, parser->name);
1106 auto result = parser->parse(input, start);
1107 if (result.succeeded) {
1108 // logger.debug("{} sub-parser '{}' succeeded with match '{}'", name, parser->name, result.match);
1109 return result;
1110 } else {
1111 // logger.debug("{} sub-parser '{}' failed", name, parser->name);
1112 }
1113 }
1114
1115 // logger.debug("{} parser failed: no sub-parsers matched at position {}", name, start);
1116 return {false, name, start, start, ""};
1117 }
1118
1119 private:
1120 std::vector<std::shared_ptr<CharParser>> parsers;
1121};
1122
1124 public:
1125 SequenceParser(std::vector<CharParserPtr> parsers, const std::string &name)
1126 : CharParser(name), parsers_(std::move(parsers)) {}
1127
1128 ParseResult parse(const std::string &input, size_t start) const override {
1129
1130 LogSection ls(logger, "{} parser", name);
1131 log_start_of_parser(name, input, start);
1132
1133 size_t current = start;
1134
1135 std::vector<ParseResult> results;
1136
1137 for (const auto &parser : parsers_) {
1138 auto result = parser->parse(input, current);
1139 if (!result.succeeded) {
1140 logger.debug("{}: did not succeed on parser {}", name, parser->name);
1141 return result;
1142 }
1143 results.push_back(result);
1144 current = result.end;
1145 }
1146 return {true, name, start, current, text_utils::get_substring(input, start, current), results};
1147 }
1148
1149 private:
1150 std::vector<CharParserPtr> parsers_;
1151};
1152
1153// === TESTING ===
1154
1155inline void test_parser(const std::string &input, const CharParserPtr &parser) {
1156
1157 logger.info("Testing input: {}", input);
1158 auto result = parser->parse(input, 0);
1159 if (result.succeeded && result.end == input.size()) {
1160 logger.info(">> SUCCESS: matched full string");
1161 } else if (result.succeeded) {
1162 logger.info(">> PARTIAL MATCH: stopped at {}", result.end);
1163 } else {
1164 logger.info(">> FAILURE: no match");
1165 }
1166}
1167
1168inline std::vector<CharParserPtr> whitespace_between(const std::vector<CharParserPtr> &base_parsers) {
1169 std::vector<CharParserPtr> result;
1170 result.reserve(base_parsers.size() * 2 + 1); // start+end whitespace plus between each
1171
1172 result.push_back(optional_whitespace()); // start
1173
1174 for (size_t i = 0; i < base_parsers.size(); ++i) {
1175 result.push_back(base_parsers[i]);
1176 result.push_back(optional_whitespace()); // between and after
1177 }
1178
1179 return result;
1180}
1181
1182// TODO: should this be a class? why or why not.
1184
1185 CharParserPtr after_the_first_element_parser =
1186 optional(repeating(sequence(whitespace_between({literal(","), element_parser}),
1187 "comma_element_" + element_parser->name)),
1188 "after_the_first_element_parser");
1189
1190 // NOTE: we do optional here, because we allow the empty sequence to be valid
1191 CharParserPtr optional_element_parser = optional(
1192 sequence({if_then(sequence(whitespace_between({element_parser})), after_the_first_element_parser,
1193 "one_or_more_element_" + element_parser->name),
1194 // NOTE: that this optional here is when we do something like 1, 2, 3, and that's valid in some cases
1195 optional(literal(","))}),
1196 "optional_elements");
1197
1198 return optional_element_parser;
1199}
1200
1201std::string remove_comments_from_file(const std::string &filename);
1202
1203std::unordered_map<std::string, std::vector<std::string>>
1204collect_matches_by_parser_name(const ParseResult &result, const std::vector<std::string> &target_names = {});
1205
1206// inline CharParserPtr template_name =
1207
1208// #ifndef M_PI
1209// #define M_PI 3.14159265358979323846
1210// #endif
1211
1212// TODO: to support the above we need support for until_literal
1214
1215// assignment_parser->name = "assignment";
1216//
1218 literal("<"),
1219 until_char({'>'}),
1220 })}),
1221 "system_include");
1222
1224 sequence(whitespace_between({literal("#include"), sequence({literal("\""), until_char({'"'}, true, false)})}),
1225 "local_include");
1226
1228 sequence(whitespace_between({literal("="), until_char({',', ')'}, false)}), "default_value_for_parameter_suffix");
1229
1230// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/n4950.pdf section 9.2.1
1231// NOTE: not fully implemented yet
1232// inline CharParserPtr declaration_specifier_sequence_parser = ;
1233//
1234
1238
1240
1243 {optional(literal("inline")), optional(literal("const")),
1246 // NOTE: this one represents the fact that short short is a valid type representing
1247 // short short int making int an optional thing.
1249
1251 "type_with_optional_reference for " + base_parser->name);
1252}
1253
1255
1256// TODO: Delete this.
1258 return sequence({literal("std::function<"), get_templated_type_parser(),
1260 "lambda_parameters", "(", ")"),
1261 until_char({'>'})},
1262 "lambda_type");
1263}
1264
1266
1267// NOTE: the order in which this is parsed matters a lot, because given something like std::function<void(int)>, if we
1268// run the non recurisve type on this we'd find up to std::function, instead we want most specific to least specific I
1269// thinki
1271
1273 sequence(whitespace_between({type, variable(), literal("="), until_char({';'})}), "assignment");
1274
1275// NOTE: here is another example of what I'm talking about above, a function can be interpreted as a declaration because
1276// it follows the same type var ... ; format so this is less specific so we have to match a function declaration first
1278 sequence(whitespace_between({type, variable(), until_char({';'})}), "declaration");
1279
1283 "parameter");
1284
1286 optional(repeating(sequence({literal(","), parameter_parser}, "comma_parameter"), "repeating_parameter_sequence"),
1287 "optional_parameter_sequence");
1288
1289// TODO: need to turn this into it's own thing which is an optional comma separated sequence, I want to create a class
1290// out of this, what we do is that we pass in an element parser, and it does this logic.
1292 optional(if_then(parameter_parser, optional_parameter_sequence, "one_or_more_parameter"), "optional_parameters");
1293
1295 sequence(whitespace_between({literal("("), optional_parameters, literal(")")}), "parameter_tuple");
1296
1297inline std::vector<CharParserPtr> make_operator_literals() {
1298 std::vector<CharParserPtr> result;
1299 result.reserve(overloadable_operators.size());
1300 for (auto &op : overloadable_operators) {
1301 result.push_back(literal(op));
1302 }
1303 return result;
1304}
1305
1306inline std::vector<CharParserPtr> operator_literals = make_operator_literals();
1307
1309 sequence({optional(sequence({variable(), literal("::")})), variable()}, "optionally_namespaced_variable");
1310
1313 "function_invocation");
1314
1317
1320 "constructor_def_parser");
1321
1324 type,
1326 }),
1327 "base_function_signature");
1328
1329// TOOD: give the optional const thing a name
1332 "function_signature");
1333
1336
1339
1340inline const std::vector<CharParserPtr> access_specifier_parsers = [] {
1341 std::vector<CharParserPtr> result;
1342 result.reserve(access_specifiers.size());
1343 for (const auto &as : access_specifiers) {
1344 result.push_back(literal(as));
1345 }
1346 return result;
1347}();
1348
1350
1353
1354// NOTE: this is hollow
1357 matching_string_pair(), literal(";")}),
1358 "class_def");
1359
1360// NOTE: doesn't yet support nested classes.
1367 literal(";")}),
1368 "class_def");
1369
1376 literal(";")}),
1377 "struct_def");
1378
1382 "enum_class_def");
1383
1385 literal("using"),
1386 variable(),
1387 literal("="),
1388 type,
1389 literal(";"),
1390 }),
1391 "using_statement");
1392
1394 sequence(whitespace_between({literal("struct"), variable(), matching_string_pair(), literal(";")}), "struct_def");
1395
1398 // NOTE: are classes, strutcs, enums ever evne in the source file or headers only?
1400 "source_file_body");
1401
1404 "source_file_namespace_body");
1405
1407 repeating(any_of({local_include_parser, system_include_parser}), "local_or_system_includes_parser");
1408
1411 "source_file");
1412
1415 "source_file");
1416
1417std::unordered_map<std::string, std::vector<std::string>>
1418get_parser_name_to_matches_for_source_file(const std::string &source_code_path);
1419std::vector<std::string> extract_top_level_functions(const std::string &source_code_path);
1420std::vector<std::string> extract_top_level_function_declarations(const std::string &header_code_path);
1421std::vector<std::string> extract_top_level_classes(const std::string &source_code_path);
1422std::vector<std::string> extract_top_level_enum_classes(const std::string &source_code_path);
1423
1424inline void test() {
1425 // test_parser("std::unordered_map<std::string, std::vector<std::string>>",
1426 // type());
1427 // test_parser(" int x ", parameter_parser);
1428 // test_parser(" (int x, int y) ", parameter_tuple_parser);
1429 // test_parser("abc123", identifier());
1430 // test_parser("3bc123", identifier());
1431 // test_parser("const std::unordered_map<std::vector<std::string>, const unsigned int>",
1432 // get_templated_type_parser());
1433
1434 // test_parser("std::function<glm::vec3(double)>", lambda_type_parser());
1435 test_parser("std::function<glm::vec3(double)>", type);
1436 // test_parser("std::function<glm::vec3(double)> f", parameter_parser);
1437
1438 // // TODO: was figuring out why this doesn't work., is the comment removing it no...
1439 // test_parser(" glm::vec3 compute_tangent_finite_difference(std::function<glm::vec3(double)> f, double t, double "
1440 // "delta) { glm::vec3 forward = f(t + delta); glm::vec3 backward = f(t - delta); return (forward - "
1441 // "backward) / static_cast<float>(2.0f * delta); // central difference } ",
1442 // function_def_parser);
1443 //
1444 // test_parser("std::vector<Rectangle> vertical_weighted_subdivision(const Rectangle &rect, const "
1445 // "std::vector<unsigned int> &weights) { return weighted_subdivision(rect, weights); }",
1446 // function_def_parser);
1447 //
1448 // test_parser("Grid::Grid(int rows, int cols, float width, float height, float origin_x, float origin_y, float "
1449 // "origin_z) : rows(rows), cols(cols), grid_width(width), grid_height(height), origin_x(origin_x), "
1450 // "origin_y(origin_y), origin_z(origin_z), rect_width(width / cols), rect_height(height / rows) {}",
1451 // constructor_def_parser);
1452 // test_parser(" int add(int x, int y) ", function_signature_parser);
1453 // test_parser(" int add(int x, int y) { return x + y; } ",
1454 // function_def_parser);
1455 // test_parser(" std::optional<int> opt_mul(int x, int y) ",
1456 // function_signature_parser);
1457 //
1458 // test_parser(" int x = 5;", assignment_parser);
1459 // test_parser(" std::vector<int> x = 5;", assignment_parser);
1460 // test_parser(" std::vector<std::vector<std::string>> x = 6;",
1461 // assignment_parser); // success
1462 //
1463 // test_parser(
1464 // "std::unordered_map<std::string, std::vector<std::string>> "
1465 // "collect_matches_by_parser_name(const "
1466 // "ParseResult &result, const std::vector<std::string> &target_names) ",
1467 // function_signature_parser); // success
1468 //
1469 // test_parser(" std::vector<std::vector<std::string>> x = \"test;test\";",
1470 // assignment_parser);
1471 // test_parser(" _private = count123", assignment_parser);
1472 // test_parser(" CONST_THING = variable_123", assignment_parser);
1473 // test_parser(" foo = bar", assignment_parser);
1474 // test_parser("foo=123", assignment_parser); // 123 is not a variable
1475 // test_parser("int = value", assignment_parser); // "int" is a keyword -> reject
1476 // test_parser(" _var = _x2", assignment_parser);
1477 // test_parser("foo bar", assignment_parser); // fail (missing '=')
1478
1479 // try {
1480 // std::string commentless_code = remove_comments_from_file("main.cpp");
1481 // std::string flattened = text_utils::remove_newlines(commentless_code);
1482 // flattened = text_utils::collapse_whitespace(flattened);
1483 // std::cout << flattened << std::endl;
1484 //
1485 // test_parser(flattened, source_file_parser);
1486 //
1487 // ParseResult root = source_file_parser->parse(flattened, 0);
1488 // std::vector<std::string> target_parsers = {function_def_parser->name,
1489 // assignment_parser->name,
1490 // struct_def_parser->name,
1491 // class_def_parser->name};
1492 // auto match_map = collect_matches_by_parser_name(root, target_parsers);
1493 //
1494 // for (const auto &[name, matches] : match_map) {
1495 // std::cout << "Matches for parser: " << name << "\n";
1496 // for (const auto &match : matches) {
1497 // std::cout << " - " << match << "\n";
1498 // }
1499 // }
1500 //
1501 // } catch (const std::exception &e) {
1502 // std::cerr << "Error: " << e.what() << '\n';
1503 // return 1;
1504 // }
1505 //
1506 // return 0;
1507 // }
1508}
1509
1510} // namespace cpp_parsing
1511
1512#endif
Definition logger.hpp:182
Definition logger.hpp:22
void debug(fmt::format_string< Args... > fmt_str, Args &&...args)
Definition logger.hpp:88
AnyOfParser(std::vector< std::shared_ptr< CharParser > > sub_parsers, const std::string &name="any_of")
Definition cpp_parsing.hpp:1098
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:1101
virtual ~CharParser()=default
std::string name
Definition cpp_parsing.hpp:207
CharParser(std::string name="")
Definition cpp_parsing.hpp:201
virtual ParseResult parse(const std::string &input, size_t start=0) const =0
ParseResult parse(const std::string &input, size_t start=0) const override
Definition cpp_parsing.hpp:1032
CommaSeparatedTupleParser(CharParserPtr element_parser, std::string name="comma_separated_tuple")
Definition cpp_parsing.hpp:1029
ParseResult parse(const std::string &input, size_t start=0) const override
Definition cpp_parsing.hpp:272
DecimalLiteralParser(std::string name="decimal_literal")
Definition cpp_parsing.hpp:270
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:401
void set_parser(CharParserPtr parser)
Definition cpp_parsing.hpp:399
DeferredParser()
Definition cpp_parsing.hpp:396
IdentifierParser()
Definition cpp_parsing.hpp:290
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:292
IfThenParser(std::shared_ptr< CharParser > condition_parser, std::shared_ptr< CharParser > then_parser, const std::string &name="if_then")
Definition cpp_parsing.hpp:354
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:358
LiteralParser(std::string literal)
Definition cpp_parsing.hpp:647
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:649
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:781
MatchingPairParser(CharParserPtr left_parser, CharParserPtr right_parser, const std::string &name="matching_pair")
Definition cpp_parsing.hpp:778
Definition cpp_parsing.hpp:680
std::string right_str
Definition cpp_parsing.hpp:687
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:689
std::string left_str
Definition cpp_parsing.hpp:686
MatchingStringPairParser(const std::string &name="matching_strings", std::string left_str="{", std::string right_str="}")
Definition cpp_parsing.hpp:682
NestedStringPairParser(CharParserPtr inner_parser, const std::string &name="nested_string", std::string left_str="{", std::string right_str="}")
Definition cpp_parsing.hpp:882
std::string right_str
Definition cpp_parsing.hpp:887
std::string left_str
Definition cpp_parsing.hpp:887
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:889
OptionalParser(std::shared_ptr< CharParser > inner, const std::string &name="optional")
Definition cpp_parsing.hpp:328
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:331
OptionalWhitespaceParser()
Definition cpp_parsing.hpp:415
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:417
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:1053
RepeatingParser(std::shared_ptr< CharParser > inner_parser, const std::string &name="repeating")
Definition cpp_parsing.hpp:1050
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:1128
SequenceParser(std::vector< CharParserPtr > parsers, const std::string &name)
Definition cpp_parsing.hpp:1125
TransformParser(std::shared_ptr< CharParser > inner, TransformFn fn, std::string name="")
Definition cpp_parsing.hpp:255
ParseResult parse(const std::string &input, size_t start=0) const override
Definition cpp_parsing.hpp:258
std::function< ParseResult(const ParseResult &)> TransformFn
Definition cpp_parsing.hpp:253
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:485
TypeParser()
Definition cpp_parsing.hpp:484
TypeQualifierSequenceParser()
Definition cpp_parsing.hpp:592
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:593
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:945
UntilCharParser(std::vector< char > target_chars, bool inclusive=true, bool ignore_in_strings_and_chars=true, const std::string &name="until_char")
Definition cpp_parsing.hpp:940
VariableParser()
Definition cpp_parsing.hpp:440
ParseResult parse(const std::string &input, size_t start) const override
Definition cpp_parsing.hpp:442
Definition text_utils.hpp:81
void indent()
Increase indentation level.
Definition text_utils.hpp:86
void add(Args &&...args)
Add a line with current indentation applied.
Definition text_utils.hpp:100
void unindent()
Decrease indentation level (no-op if already at 0).
Definition text_utils.hpp:89
std::string str() const
Get the accumulated text as a single string with newlines.
Definition text_utils.hpp:180
@ s
Definition input_state.hpp:45
@ l
Definition input_state.hpp:38
@ c
Definition input_state.hpp:29
@ r
Definition input_state.hpp:44
@ q
Definition input_state.hpp:43
@ i
Definition input_state.hpp:35
Definition cpp_parsing.cpp:4
const std::vector< CharParserPtr > access_specifier_parsers
Definition cpp_parsing.hpp:1340
std::vector< std::string > extract_top_level_classes(const std::string &source_code_path)
Definition cpp_parsing.cpp:336
CharParserPtr full_non_recursive_type
Definition cpp_parsing.hpp:1265
CharParserPtr function_def_parser
Definition cpp_parsing.hpp:1334
CharParserPtr constructor_def_parser
Definition cpp_parsing.hpp:1318
std::string truncate(const std::string &s, int cutoff=50)
Definition cpp_parsing.hpp:126
std::vector< std::string > extract_top_level_enum_classes(const std::string &source_code_path)
Definition cpp_parsing.cpp:340
CharParserPtr macro_if_statement
Definition cpp_parsing.hpp:1213
std::string remove_comments_from_file(const std::string &filename)
Definition cpp_parsing.cpp:217
const CharParserPtr access_specifier_parser
Definition cpp_parsing.hpp:1349
const CharParserPtr class_inheritance_parser
Definition cpp_parsing.hpp:1351
void collect_by_name(const cpp_parsing::ParseResult *root, const std::string &target, std::vector< const cpp_parsing::ParseResult * > &out)
Definition cpp_parsing.cpp:56
CharParserPtr optionally_namespaced_variable_parser
Definition cpp_parsing.hpp:1308
CharParserPtr base_function_signature_parser
Definition cpp_parsing.hpp:1322
CharParserPtr system_include_parser
Definition cpp_parsing.hpp:1217
CharParserPtr local_or_system_includes_parser
Definition cpp_parsing.hpp:1406
CharParserPtr function_invocation
Definition cpp_parsing.hpp:1311
CharParserPtr add_optional_type_surroundings(CharParserPtr base_parser)
Definition cpp_parsing.hpp:1241
std::vector< CharParserPtr > make_operator_literals()
Definition cpp_parsing.hpp:1297
CharParserPtr parameter_tuple_parser
Definition cpp_parsing.hpp:1294
CharParserPtr optional_parameters
Definition cpp_parsing.hpp:1291
CharParserPtr nested_string_pair(CharParserPtr parser, const std::string &name, std::string left, std::string right)
Definition cpp_parsing.cpp:152
CharParserPtr matching_string_pair(const std::string &name, std::string left, std::string right)
Definition cpp_parsing.cpp:145
std::vector< std::string > extract_top_level_functions(const std::string &source_code_path)
Definition cpp_parsing.cpp:328
CharParserPtr class_def_parser
Definition cpp_parsing.hpp:1355
CharParserPtr if_then(std::shared_ptr< CharParser > condition_parser, std::shared_ptr< CharParser > then_parser, const std::string &name)
Definition cpp_parsing.cpp:166
CharParserPtr declaration_parser
Definition cpp_parsing.hpp:1277
CharParserPtr sequence(std::vector< CharParserPtr > parsers, const std::string &name)
Definition cpp_parsing.cpp:175
CharParserPtr header_file_parser
Definition cpp_parsing.hpp:1413
const cpp_parsing::ParseResult * find_first_name_contains(const cpp_parsing::ParseResult *root, const std::string &substr)
Definition cpp_parsing.cpp:42
CharParserPtr lambda_type_parser()
Definition cpp_parsing.hpp:1257
CharParserPtr type_qualifier_sequence()
Definition cpp_parsing.cpp:136
std::vector< CharParserPtr > create_literal_parsers(std::vector< std::string > literals)
Definition cpp_parsing.hpp:223
CharParserPtr assignment_parser
Definition cpp_parsing.hpp:1272
CharParserPtr until_char(std::vector< char > target_chars, bool inclusive, bool ignore_in_strings_and_chars, const std::string &name)
Definition cpp_parsing.cpp:138
CharParserPtr function_decl_parser
Definition cpp_parsing.hpp:1337
const std::vector< std::string > cpp_size_specifier
Definition cpp_parsing.hpp:33
const std::vector< std::string > overloadable_operators
Definition cpp_parsing.hpp:37
CharParserPtr struct_def_parser
Definition cpp_parsing.hpp:1393
CharParserPtr struct_def_parser_good
Definition cpp_parsing.hpp:1370
CharParserPtr using_statement_parser
Definition cpp_parsing.hpp:1384
CharParserPtr default_value_for_parameter_suffix_parser
Definition cpp_parsing.hpp:1227
CharParserPtr enum_class_def_parser
Definition cpp_parsing.hpp:1379
CharParserPtr source_file_body_parser
Definition cpp_parsing.hpp:1396
std::string node_text(const cpp_parsing::ParseResult *node)
Definition cpp_parsing.cpp:95
void test()
Definition cpp_parsing.hpp:1424
CharParserPtr local_include_parser
Definition cpp_parsing.hpp:1223
std::shared_ptr< CharParser > CharParserPtr
Definition cpp_parsing.hpp:210
const std::unordered_set< std::string > cpp_built_in_types
Definition cpp_parsing.hpp:28
std::vector< std::string > extract_top_level_function_declarations(const std::string &header_code_path)
Definition cpp_parsing.cpp:332
CharParserPtr base_type()
Definition cpp_parsing.cpp:135
const std::unordered_set< std::string > cpp_keywords
Definition cpp_parsing.hpp:108
const cpp_parsing::ParseResult * find_first_by_name(const cpp_parsing::ParseResult *root, const std::string &target)
Definition cpp_parsing.cpp:29
std::vector< CharParserPtr > operator_literals
Definition cpp_parsing.hpp:1306
Logger logger("cpp_parsing")
std::unordered_map< std::string, std::vector< std::string > > collect_matches_by_parser_name(const ParseResult &result, const std::vector< std::string > &target_names)
Definition cpp_parsing.cpp:254
CharParserPtr literal(const std::string &s)
Definition cpp_parsing.cpp:143
CharParserPtr comma_separated_sequence_parser(CharParserPtr element_parser)
Definition cpp_parsing.hpp:1183
std::unordered_map< std::string, std::vector< std::string > > get_parser_name_to_matches_for_source_file(const std::string &source_code_path)
Definition cpp_parsing.cpp:288
CharParserPtr parameter_parser
Definition cpp_parsing.hpp:1280
CharParserPtr type
Definition cpp_parsing.hpp:1270
CharParserPtr optional_reference_or_pointer()
Definition cpp_parsing.hpp:1239
CharParserPtr class_def_parser_good
Definition cpp_parsing.hpp:1361
CharParserPtr optional_whitespace()
Definition cpp_parsing.cpp:132
CharParserPtr optional(CharParserPtr parser, const std::string &name)
Definition cpp_parsing.cpp:160
CharParserPtr initializer_list_parser
Definition cpp_parsing.hpp:1315
CharParserPtr source_file_namespace_body_parser
Definition cpp_parsing.hpp:1402
void log_start_of_parser(const std::string &name, const std::string &input, size_t start)
Definition cpp_parsing.hpp:247
std::vector< CharParserPtr > whitespace_between(const std::vector< CharParserPtr > &base_parsers)
Definition cpp_parsing.hpp:1168
ParseResult clean_parse_result(const ParseResult &r)
Definition cpp_parsing.cpp:6
CharParserPtr variable()
Definition cpp_parsing.cpp:134
CharParserPtr repeating(CharParserPtr parser, const std::string &name)
Definition cpp_parsing.cpp:156
const std::unordered_set< std::string > access_specifiers
Definition cpp_parsing.hpp:35
CharParserPtr source_file_parser
Definition cpp_parsing.hpp:1409
CharParserPtr identifier()
Definition cpp_parsing.cpp:133
std::ostream & operator<<(std::ostream &os, const ParseResult &result)
Definition cpp_parsing.cpp:130
CharParserPtr get_templated_type_parser()
Definition cpp_parsing.cpp:344
std::ostream & print_parse_result(std::ostream &os, const ParseResult &result, int indent)
Definition cpp_parsing.cpp:110
CharParserPtr optionally_namespaced_identifier()
Definition cpp_parsing.hpp:1235
CharParserPtr optional_parameter_sequence
Definition cpp_parsing.hpp:1285
CharParserPtr not_any_of(std::shared_ptr< CharParser > inner, std::unordered_set< std::string > forbidden, std::string name)
Definition cpp_parsing.cpp:179
CharParserPtr any_of(std::vector< CharParserPtr > parsers, const std::string &name)
Definition cpp_parsing.cpp:171
void test_parser(const std::string &input, const CharParserPtr &parser)
Definition cpp_parsing.hpp:1155
CharParserPtr deferred()
Definition cpp_parsing.cpp:164
CharParserPtr function_signature_parser
Definition cpp_parsing.hpp:1330
const std::vector< std::string > cpp_sign_specifier
Definition cpp_parsing.hpp:32
std::string get_next_part_of_string(const std::string &input, int start, int lookahead=50)
Definition cpp_parsing.hpp:130
ParseResult parse_source_or_header_file(const std::string &source_code_path)
Definition cpp_parsing.cpp:272
std::vector< std::pair< std::string, std::string > > bfs_collect_matches(const cpp_parsing::ParseResult *root, const std::vector< std::string > &names)
Definition cpp_parsing.cpp:66
Definition hashing.hpp:8
std::string get_substring(const std::string &input, size_t start, size_t end)
Extract a substring from start to end indices.
Definition text_utils.cpp:236
Definition cpp_parsing.hpp:134
std::string match
Definition cpp_parsing.hpp:143
bool succeeded
Definition cpp_parsing.hpp:140
ParseResult(bool succeeded, std::string parser_name="", size_t start=0, size_t end=0, std::string match="", std::vector< ParseResult > sub_results={})
Definition cpp_parsing.hpp:135
size_t end
Definition cpp_parsing.hpp:142
std::vector< ParseResult > sub_results
Definition cpp_parsing.hpp:145
std::string parser_name
Definition cpp_parsing.hpp:144
size_t start
Definition cpp_parsing.hpp:141
std::string to_string() const
Definition cpp_parsing.hpp:146