- Started 2D Transform Component and relevant math extensions - Switched sequence to use pointers instead of arrays
111 lines
3.3 KiB
C++
111 lines
3.3 KiB
C++
// =====================================================================================================================
|
|
// fennec, a free and open source game engine
|
|
// Copyright © 2025 Medusa Slockbower
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
// =====================================================================================================================
|
|
|
|
///
|
|
/// \file tokenizer.h
|
|
/// \brief
|
|
///
|
|
///
|
|
/// \details
|
|
/// \author Medusa Slockbower
|
|
///
|
|
/// \copyright Copyright © 2025 Medusa Slockbower ([GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html))
|
|
///
|
|
///
|
|
|
|
#ifndef FENNEC_LANGPROC_FORMAT_TOKENIZER_H
|
|
#define FENNEC_LANGPROC_FORMAT_TOKENIZER_H
|
|
|
|
#include <fennec/containers/list.h>
|
|
#include <fennec/langproc/strings/string.h>
|
|
|
|
//
|
|
// escape sequences are tricky, sometimes they must be separated by white space,
|
|
// other times they don't. Requiring a list of all possible escape sequences is unrealistic.
|
|
// We need to allow the user of this struct to specify rules for escape sequences. Here are some basic rules:
|
|
//
|
|
// An escape sequence is marked by an escape character, e.g. %, \, {{
|
|
// Multiple escape characters may be used in a single tokenizer and will have different rules
|
|
// Escape characters may also be operators, brackets, or quotes
|
|
// Escape sequences may contain operators, brackets, or quotes
|
|
//
|
|
// Here are a few examples of escape sequences from various formats and languages
|
|
// C: \\, \n, \0, \u200b
|
|
// PrintF: %s, %2.2f
|
|
// Python FMT: {{, }}
|
|
// SPSS: ''
|
|
//
|
|
|
|
namespace fennec
|
|
{
|
|
|
|
struct escape_sequence {
|
|
virtual size_t operator[](const std::string& str, size_t i) = 0;
|
|
};
|
|
|
|
struct tokenizer {
|
|
using escseq = escape_sequence*;
|
|
using escmap = map<char, escape_sequence*>;
|
|
|
|
string delimiter; // markers that separate tokens
|
|
string operators; // operators are treated as individual tokens
|
|
string brackets; // characters that mark brackets
|
|
string quotes; // characters that mark a string sequence, entire string sequence is treated as one token
|
|
escmap escapes; // characters that mark the start of an escape sequence and validate them
|
|
bool numbers; // Anything that resembles a number
|
|
|
|
enum token_ : uint8_t {
|
|
token_text = 0,
|
|
token_integer,
|
|
token_string,
|
|
token_newline,
|
|
token_escaped,
|
|
token_operator,
|
|
token_bracket,
|
|
token_quoted,
|
|
|
|
num_token_types
|
|
};
|
|
|
|
using token = pair<string, uint8_t>;
|
|
|
|
private:
|
|
static constexpr uint8_t token_delimiter = num_token_types;
|
|
|
|
constexpr list<token> operator()(const string& line) {
|
|
list<token> res;
|
|
priority_queue<pair<size_t, uint8_t>> idx;
|
|
|
|
for (size_t i = 0; i < line.size(); ++i) {
|
|
|
|
for (char c : delimiter) {
|
|
idx.emplace()
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
private:
|
|
};
|
|
|
|
}
|
|
|
|
#endif // FENNEC_LANGPROC_FORMAT_TOKENIZER_H
|