1 files changed, 196 insertions, 0 deletions
diff --git a/server/src/mltokenizer.cc b/server/src/mltokenizer.cc
new file mode 100644
index 0000000..91d5b4b
--- /dev/null
+++ b/server/src/mltokenizer.cc
@@ -0,0 +1,196 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/***************************************************************************
+ *            mltokenizer.cc
+ *
+ *  Tue Nov  4 08:46:35 CET 2008
+ *  Copyright 2008 Bent Bisballe Nyeng
+ *  deva@aasimon.org
+ ****************************************************************************/
+
+/*
+ *  This file is part of Pracro.
+ *
+ *  Pracro is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Pracro is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Pracro; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+ */
+#include "mltokenizer.h"
+
+static std::string rereplaceescaping(std::string mlvalue)
+{
+  std::string output;
+  size_t i = 0;
+  while(i < mlvalue.length()) {
+    if(mlvalue[i] == '\1') {
+      output += '{';
+      i++;
+    } else if(mlvalue[i] == '\2') {
+      output += '}';
+      i++;
+    } else {
+      output += mlvalue[i];
+      i++;
+    }
+  }
+  return output;
+}
+
+static std::string replaceescaping(std::string mlvalue)
+{
+  std::string output;
+  size_t i = 0;
+  while(i < mlvalue.length()) {
+    if(i < mlvalue.length() - 1 && mlvalue[i] == '{' && mlvalue[i + 1] == '{') {
+      output += '\1';
+      i+=2;
+    } else if(i < mlvalue.length() - 1 && mlvalue[i] == '}' && mlvalue[i + 1] == '}') {
+      output += '\2';
+      i+=2;
+    } else {
+      output += mlvalue[i];
+      i++;
+    }
+  }
+  return output;
+}
+
+static std::string gettoken(std::string input, size_t start, std::string term)
+{
+  std::string output;
+
+  size_t i = start;
+  while(i < input.length()) {
+
+    size_t j = 0;
+    while(j < term.length()) {
+      if(input[i] == term[j]) return output;
+      j++;
+    }
+
+    output += input[i];
+    i++;
+  }
+
+  return output;
+}
+
+typedef enum {
+  NAME,
+  VALUE,
+  TEXT,
+  ENDOFITEM,
+  UNDEFINED
+} tokenizerstate_t;
+
+std::vector< mltoken_t > mltokenize(std::string mlvalue)
+{
+  std::vector< mltoken_t > tokens;
+
+  mlvalue = replaceescaping(mlvalue);
+  
+  tokenizerstate_t state = UNDEFINED;
+  mltoken_t token;
+  size_t i = 0;
+  while(i < mlvalue.length()) {
+    switch(state) {
+    case NAME:
+      token.name = gettoken(mlvalue, i, "|");
+      i += token.name.length() + 1;
+      token.type = MLTT_VALUE;
+      token.value = "";
+      state = VALUE;
+      break;
+
+    case VALUE:
+      token.value = gettoken(mlvalue, i, "}\n");
+      i += token.value.length() + 1;
+
+      token.value = rereplaceescaping(token.value);
+      token.type = MLTT_VALUE;
+      tokens.push_back(token);
+
+      state = UNDEFINED;
+      break;
+
+    case TEXT:
+      if(mlvalue[i] == '$') token.value = "$";
+      else token.value = gettoken(mlvalue, i, "$\n");
+      i += token.value.length();
+
+      token.value = rereplaceescaping(token.value);
+      token.type = MLTT_TEXT;
+      token.name = "";
+      if(tokens.size() && tokens.back().type == MLTT_TEXT) tokens.back().value += token.value;
+      else tokens.push_back(token);
+
+      state = UNDEFINED;
+      break;
+
+    case ENDOFITEM:
+      token.value = "\n";
+      i++;
+
+      token.type = MLTT_ENDOFITEM;
+      token.name = "";
+      tokens.push_back(token);
+
+      state = UNDEFINED;
+      break;
+
+    case UNDEFINED:
+      switch(mlvalue[i]) {
+      case '$':
+        if(i < mlvalue.length() - 1 && mlvalue[i + 1] == '{') { i++; break; } // ignore
+        else { state = TEXT; break; }
+      case '{': state = NAME; i++; break;
+      case '\n': state = ENDOFITEM; break;
+      default: state = TEXT; break;
+      }
+    }
+  }
+
+  if(state != UNDEFINED) {
+    printf("Oups... missed something in the end!\n");
+    tokens.push_back(token); 
+  }
+
+  return tokens;
+}
+
+#ifdef TEST_MLTOKENIZER
+
+int main()
+{
+  std::string mlvalue = "$ab}}c\ndef ${na$me|${{va$lue}}}\n12${34}\n";
+
+  std::vector< mltoken_t > tokens = mltokenize(mlvalue);
+  std::vector< mltoken_t >::iterator i = tokens.begin();
+  while(i != tokens.end()) {
+    printf("Token:\n");
+    printf("\tType: ");
+    switch(i->type) {
+    case MLTT_VALUE: printf("VALUE\n"); break;
+    case MLTT_TEXT: printf("TEXT\n"); break;
+    case MLTT_ENDOFITEM: printf("ENDOFITEM\n"); break;
+    case MLTT_UNDEFINED: printf("UNDEFINED\n"); break;
+    }
+    printf("\tName: %s\n", i->name.c_str());
+    printf("\tValue: %s\n", i->value.c_str());
+    printf("\n");
+    i++;
+  }
+
+  return 0;
+}
+
+#endif