Circus 0.0.1-alpha
C++ Serialization Framework
Loading...
Searching...
No Matches
lexer.hpp
Go to the documentation of this file.
1
5
6#pragma once
7#include <cassert>
8#include <cctype>
9#include <iostream>
10#include <variant>
11#include <vector>
12
13#include "../../debug/debug.hpp"
14#include "circus_traits.hpp"
15#include "reader.hpp"
16#include "token.hpp"
17#include "visitor.hpp"
18
19namespace circus {
20
30class lexer__ {
31 std::string _in;
32 std::size_t _beg;
33 std::size_t _end;
34 std::size_t _row;
35 std::size_t _col;
36 std::vector<tokens__> _toks;
37
43 [[nodiscard]] static tokens__::TYPE f_token(char c) noexcept {
44 return (tokens__::TYPE)c;
45 }
46
52 [[nodiscard]] static bool f_reserved(tokens__::TYPE token_type) noexcept {
53 switch (token_type) {
54 case tokens__::TYPE::TK_PAREN_L:
55 case tokens__::TYPE::TK_PAREN_R:
56 case tokens__::TYPE::TK_COMMA:
57 case tokens__::TYPE::TK_COLON:
58 case tokens__::TYPE::TK_BRACE_L:
59 case tokens__::TYPE::TK_BRACE_R:
60 case tokens__::TYPE::TK_DOLLA:
61 case tokens__::TYPE::TK_CURL_L:
62 case tokens__::TYPE::TK_CURL_R:
63 case tokens__::TYPE::TK_STAR:
64 case tokens__::TYPE::TK_SLASH:
65 case tokens__::TYPE::TK_EOF:
66 case tokens__::TYPE::TK_QUOTE_DOUBLE:
67 case tokens__::TYPE::TK_QUOTE_SINGLE:
68 return true;
69 default:
70 return false;
71 }
72 }
73
77 void f_print() const noexcept {
78 for (const auto &t : _toks) {
79 t.print_token();
80 }
81 };
82
87 [[nodiscard]] bool f_eof() const noexcept {
88 return (f_token(_in[_end]) == tokens__::TYPE::TK_EOF);
89 }
90
95 [[nodiscard]] char f_peek_next() const noexcept {
96 return _in[_end + 1];
97 }
98
103 [[nodiscard]] char f_peek() const noexcept {
104 return _in[_end];
105 };
106
112 [[nodiscard]] char f_peek_at(std::size_t offset) const noexcept {
113 return _in[_end + offset];
114 }
115
121 char f_advance() noexcept {
122 if (f_eof())
123 return '\0';
124
125 if (_in[_end] == '\n') {
126 _col = 0;
127 _row++;
128 } else {
129 _col++;
130 }
131
132 return _in[_end++];
133 };
134
139 char f_previous() const noexcept {
140 return _in[_end - 1];
141 }
142
147 std::string to_substr() const noexcept {
148 return _in.substr(_beg, _end - _beg);
149 };
150
157 std::string to_substr(std::size_t new_beg, std::size_t new_end) const noexcept {
158 return _in.substr(new_beg, new_end);
159 };
160
164 void scan_number() noexcept {
165 while (!f_eof() && std::isdigit(f_peek()))
166 f_advance();
167
168 if (!f_eof() && f_peek() == '.') {
169 f_advance();
170 while (!f_eof() && std::isdigit(f_peek()))
171 f_advance();
172 insert(tokens__::TYPE::TK_LITERAL_FLOAT, to_substr(), std::stof(to_substr()));
173 } else {
174 insert(tokens__::TYPE::TK_LITERAL_INT, to_substr(), std::stoi(to_substr()));
175 }
176 };
177
181 void scan_identifier() noexcept {
182 while (!f_eof() && std::isalnum(f_peek())) {
183 f_advance();
184 };
185 insert(tokens__::TYPE::TK_IDENTIFIER, to_substr(), to_substr());
186 };
187
191 void scan_singular_reserve() noexcept {
192 if (f_reserved(f_token(f_previous()))) {
193 insert(f_token(f_previous()), to_substr(), f_previous());
194 }
195 };
196
200 void scan_string() {
201 while (!f_eof() && traits::none_of(f_token(f_advance()), tokens__::TYPE::TK_QUOTE_DOUBLE));
202
203 insert(tokens__::TYPE::TK_LITERAL_STRING, to_substr(), to_substr());
204 };
205
209 void scan_comments() noexcept {
210 if (f_token(f_peek()) == tokens__::TYPE::TK_SLASH) {
211 while (!f_eof() && f_token(f_peek()) != tokens__::TYPE::TK_NEWLINE)
212 f_advance();
213 }
214
215 if (f_token(f_advance()) == tokens__::TYPE::TK_STAR) {
216 const auto curr = f_token(f_peek());
217 while (!f_eof() && traits::none_of(curr, tokens__::TYPE::TK_STAR, tokens__::TYPE::TK_SLASH)) {
218 f_advance();
219 }
220 }
221 };
222
226 void scan_unknown() noexcept {
227 insert(tokens__::TYPE::TK_UNKNOWN, to_substr(), to_substr());
228 };
229
238 template <typename T>
239 [[nodiscard]] constexpr tokens__ create_token(tokens__::TYPE type, std::string embedded, const T &literal) noexcept {
240 tokens__ tok(type, embedded, literal, std::make_pair(_row, _col));
241 return tok;
242 }
243
251 template <typename T>
252 constexpr void insert(tokens__::TYPE type, std::string embedded, const T &lit) noexcept {
253 _toks.push_back(create_token(type, embedded, lit));
254 }
255
259 void process_unit() {
260 char c = f_advance();
261 if (f_token(c) == tokens__::TYPE::TK_SLASH) {
262 scan_comments();
263 }
264 if (f_reserved(f_token(c))) {
265 if (traits::none_of(f_token(c), tokens__::TYPE::TK_SLASH, tokens__::TYPE::TK_QUOTE_DOUBLE))
266 scan_singular_reserve();
267 else if (traits::any_of(f_token(c), tokens__::TYPE::TK_QUOTE_DOUBLE))
268 scan_string();
269 } else if (std::isdigit(c)) {
270 scan_number();
271 } else if (std::isalnum(c)) {
272 scan_identifier();
273 } else {
274 if (!std::isspace(c))
275 scan_unknown();
276 };
277 }
278
283 [[nodiscard]] std::vector<tokens__> f_lex() & noexcept {
284 while (!f_eof()) {
285 process_unit();
286 _beg = _end;
287 };
288 insert(tokens__::TYPE::TK_EOF, to_substr(), f_peek());
289
290#if CIRCUS_DEBUG_PEDANTIC__
291 f_print();
292#endif
293
294 return _toks;
295 };
296
297 public:
302 lexer__() noexcept : _in{},
303 _beg{0},
304 _end{0},
305 _row{1},
306 _col{0},
307 _toks{} {};
308
314 [[nodiscard]] std::vector<tokens__> operator()(const std::string &input) noexcept {
315 _in = input;
316 return f_lex();
317 };
318
319 lexer__(lexer__ &&other) = default;
320 lexer__(const lexer__ &other) = default;
321 lexer__ &operator=(const lexer__ &other) = default;
322
327 const std::string &get_input() const { return _in; };
328
332 ~lexer__() = default;
333};
334
335} // namespace circus
Provides compile-time traits and C++20 concepts for type introspection used throughout the Circus lib...
Lexer for tokenizing input strings into Circus tokens.
Definition lexer.hpp:30
const std::string & get_input() const
Get the input string currently being lexed.
Definition lexer.hpp:327
~lexer__()=default
Destructor.
lexer__() noexcept
Default constructor. Initializes internal state.
Definition lexer.hpp:302
std::vector< tokens__ > operator()(const std::string &input) noexcept
Lex an input string and return tokens.
Definition lexer.hpp:314
Represents a lexical token with type, literal value, and source location.
Definition token.hpp:17