summaryrefslogtreecommitdiff
path: root/server/src/mltokenizer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/mltokenizer.cc')
-rw-r--r--server/src/mltokenizer.cc196
1 files changed, 196 insertions, 0 deletions
diff --git a/server/src/mltokenizer.cc b/server/src/mltokenizer.cc
new file mode 100644
index 0000000..91d5b4b
--- /dev/null
+++ b/server/src/mltokenizer.cc
@@ -0,0 +1,196 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/***************************************************************************
+ * mltokenizer.cc
+ *
+ * Tue Nov 4 08:46:35 CET 2008
+ * Copyright 2008 Bent Bisballe Nyeng
+ * deva@aasimon.org
+ ****************************************************************************/
+
+/*
+ * This file is part of Pracro.
+ *
+ * Pracro is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Pracro is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Pracro; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "mltokenizer.h"
+
+static std::string rereplaceescaping(std::string mlvalue)
+{
+ std::string output;
+ size_t i = 0;
+ while(i < mlvalue.length()) {
+ if(mlvalue[i] == '\1') {
+ output += '{';
+ i++;
+ } else if(mlvalue[i] == '\2') {
+ output += '}';
+ i++;
+ } else {
+ output += mlvalue[i];
+ i++;
+ }
+ }
+ return output;
+}
+
+static std::string replaceescaping(std::string mlvalue)
+{
+ std::string output;
+ size_t i = 0;
+ while(i < mlvalue.length()) {
+ if(i < mlvalue.length() - 1 && mlvalue[i] == '{' && mlvalue[i + 1] == '{') {
+ output += '\1';
+ i+=2;
+ } else if(i < mlvalue.length() - 1 && mlvalue[i] == '}' && mlvalue[i + 1] == '}') {
+ output += '\2';
+ i+=2;
+ } else {
+ output += mlvalue[i];
+ i++;
+ }
+ }
+ return output;
+}
+
+static std::string gettoken(std::string input, size_t start, std::string term)
+{
+ std::string output;
+
+ size_t i = start;
+ while(i < input.length()) {
+
+ size_t j = 0;
+ while(j < term.length()) {
+ if(input[i] == term[j]) return output;
+ j++;
+ }
+
+ output += input[i];
+ i++;
+ }
+
+ return output;
+}
+
+typedef enum {
+ NAME,
+ VALUE,
+ TEXT,
+ ENDOFITEM,
+ UNDEFINED
+} tokenizerstate_t;
+
+std::vector< mltoken_t > mltokenize(std::string mlvalue)
+{
+ std::vector< mltoken_t > tokens;
+
+ mlvalue = replaceescaping(mlvalue);
+
+ tokenizerstate_t state = UNDEFINED;
+ mltoken_t token;
+ size_t i = 0;
+ while(i < mlvalue.length()) {
+ switch(state) {
+ case NAME:
+ token.name = gettoken(mlvalue, i, "|");
+ i += token.name.length() + 1;
+ token.type = MLTT_VALUE;
+ token.value = "";
+ state = VALUE;
+ break;
+
+ case VALUE:
+ token.value = gettoken(mlvalue, i, "}\n");
+ i += token.value.length() + 1;
+
+ token.value = rereplaceescaping(token.value);
+ token.type = MLTT_VALUE;
+ tokens.push_back(token);
+
+ state = UNDEFINED;
+ break;
+
+ case TEXT:
+ if(mlvalue[i] == '$') token.value = "$";
+ else token.value = gettoken(mlvalue, i, "$\n");
+ i += token.value.length();
+
+ token.value = rereplaceescaping(token.value);
+ token.type = MLTT_TEXT;
+ token.name = "";
+ if(tokens.size() && tokens.back().type == MLTT_TEXT) tokens.back().value += token.value;
+ else tokens.push_back(token);
+
+ state = UNDEFINED;
+ break;
+
+ case ENDOFITEM:
+ token.value = "\n";
+ i++;
+
+ token.type = MLTT_ENDOFITEM;
+ token.name = "";
+ tokens.push_back(token);
+
+ state = UNDEFINED;
+ break;
+
+ case UNDEFINED:
+ switch(mlvalue[i]) {
+ case '$':
+ if(i < mlvalue.length() - 1 && mlvalue[i + 1] == '{') { i++; break; } // ignore
+ else { state = TEXT; break; }
+ case '{': state = NAME; i++; break;
+ case '\n': state = ENDOFITEM; break;
+ default: state = TEXT; break;
+ }
+ }
+ }
+
+ if(state != UNDEFINED) {
+ printf("Oups... missed something in the end!\n");
+ tokens.push_back(token);
+ }
+
+ return tokens;
+}
+
+#ifdef TEST_MLTOKENIZER
+
+int main()
+{
+ std::string mlvalue = "$ab}}c\ndef ${na$me|${{va$lue}}}\n12${34}\n";
+
+ std::vector< mltoken_t > tokens = mltokenize(mlvalue);
+ std::vector< mltoken_t >::iterator i = tokens.begin();
+ while(i != tokens.end()) {
+ printf("Token:\n");
+ printf("\tType: ");
+ switch(i->type) {
+ case MLTT_VALUE: printf("VALUE\n"); break;
+ case MLTT_TEXT: printf("TEXT\n"); break;
+ case MLTT_ENDOFITEM: printf("ENDOFITEM\n"); break;
+ case MLTT_UNDEFINED: printf("UNDEFINED\n"); break;
+ }
+ printf("\tName: %s\n", i->name.c_str());
+ printf("\tValue: %s\n", i->value.c_str());
+ printf("\n");
+ i++;
+ }
+
+ return 0;
+}
+
+#endif