diff options
Diffstat (limited to 'server/src/mltokenizer.cc')
-rw-r--r-- | server/src/mltokenizer.cc | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/server/src/mltokenizer.cc b/server/src/mltokenizer.cc new file mode 100644 index 0000000..91d5b4b --- /dev/null +++ b/server/src/mltokenizer.cc @@ -0,0 +1,196 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/*************************************************************************** + * mltokenizer.cc + * + * Tue Nov 4 08:46:35 CET 2008 + * Copyright 2008 Bent Bisballe Nyeng + * deva@aasimon.org + ****************************************************************************/ + +/* + * This file is part of Pracro. + * + * Pracro is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Pracro is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Pracro; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ +#include "mltokenizer.h" + +static std::string rereplaceescaping(std::string mlvalue) +{ + std::string output; + size_t i = 0; + while(i < mlvalue.length()) { + if(mlvalue[i] == '\1') { + output += '{'; + i++; + } else if(mlvalue[i] == '\2') { + output += '}'; + i++; + } else { + output += mlvalue[i]; + i++; + } + } + return output; +} + +static std::string replaceescaping(std::string mlvalue) +{ + std::string output; + size_t i = 0; + while(i < mlvalue.length()) { + if(i < mlvalue.length() - 1 && mlvalue[i] == '{' && mlvalue[i + 1] == '{') { + output += '\1'; + i+=2; + } else if(i < mlvalue.length() - 1 && mlvalue[i] == '}' && mlvalue[i + 1] == '}') { + output += '\2'; + i+=2; + } else { + output += mlvalue[i]; + i++; + } + } + return output; +} + +static std::string gettoken(std::string input, size_t start, std::string term) +{ + std::string output; + + size_t i = start; + while(i < input.length()) { + + size_t j = 0; + while(j < term.length()) { + if(input[i] == term[j]) return output; + j++; + } + + output += input[i]; + i++; + } + + return output; +} + +typedef enum { + NAME, + VALUE, + TEXT, + ENDOFITEM, + UNDEFINED +} tokenizerstate_t; + +std::vector< mltoken_t > mltokenize(std::string mlvalue) +{ + std::vector< mltoken_t > tokens; + + mlvalue = replaceescaping(mlvalue); + + tokenizerstate_t state = UNDEFINED; + mltoken_t token; + size_t i = 0; + while(i < mlvalue.length()) { + switch(state) { + case NAME: + token.name = gettoken(mlvalue, i, "|"); + i += token.name.length() + 1; + token.type = MLTT_VALUE; + token.value = ""; + state = VALUE; + break; + + case VALUE: + token.value = gettoken(mlvalue, i, "}\n"); + i += token.value.length() + 1; + + token.value = rereplaceescaping(token.value); + token.type = MLTT_VALUE; + tokens.push_back(token); + + state = UNDEFINED; + break; + + case TEXT: + if(mlvalue[i] == '$') token.value = "$"; + else token.value = gettoken(mlvalue, i, "$\n"); + i += token.value.length(); + + token.value = rereplaceescaping(token.value); + token.type = MLTT_TEXT; + token.name = ""; + if(tokens.size() && tokens.back().type == MLTT_TEXT) tokens.back().value += token.value; + else tokens.push_back(token); + + state = UNDEFINED; + break; + + case ENDOFITEM: + token.value = "\n"; + i++; + + token.type = MLTT_ENDOFITEM; + token.name = ""; + tokens.push_back(token); + + state = UNDEFINED; + break; + + case UNDEFINED: + switch(mlvalue[i]) { + case '$': + if(i < mlvalue.length() - 1 && mlvalue[i + 1] == '{') { i++; break; } // ignore + else { state = TEXT; break; } + case '{': state = NAME; i++; break; + case '\n': state = ENDOFITEM; break; + default: state = TEXT; break; + } + } + } + + if(state != UNDEFINED) { + printf("Oups... missed something in the end!\n"); + tokens.push_back(token); + } + + return tokens; +} + +#ifdef TEST_MLTOKENIZER + +int main() +{ + std::string mlvalue = "$ab}}c\ndef ${na$me|${{va$lue}}}\n12${34}\n"; + + std::vector< mltoken_t > tokens = mltokenize(mlvalue); + std::vector< mltoken_t >::iterator i = tokens.begin(); + while(i != tokens.end()) { + printf("Token:\n"); + printf("\tType: "); + switch(i->type) { + case MLTT_VALUE: printf("VALUE\n"); break; + case MLTT_TEXT: printf("TEXT\n"); break; + case MLTT_ENDOFITEM: printf("ENDOFITEM\n"); break; + case MLTT_UNDEFINED: printf("UNDEFINED\n"); break; + } + printf("\tName: %s\n", i->name.c_str()); + printf("\tValue: %s\n", i->value.c_str()); + printf("\n"); + i++; + } + + return 0; +} + +#endif |