From 73f3fb396dc2ba089b4e32b0bd63dc3e615f8466 Mon Sep 17 00:00:00 2001
From: deva <deva>
Date: Mon, 20 Apr 2009 09:50:30 +0000
Subject: Added utf8 decode.

---
 server/src/Makefile.am    |   2 +
 server/src/queryparser.cc |   2 +-
 server/src/utf8.cc        | 374 ++++++++++++++++++++++++++++++++++++++++++++++
 server/src/utf8.h         |  96 ++++++++++++
 4 files changed, 473 insertions(+), 1 deletion(-)
 create mode 100644 server/src/utf8.cc
 create mode 100644 server/src/utf8.h

diff --git a/server/src/Makefile.am b/server/src/Makefile.am
index 086905a..78c2fce 100644
--- a/server/src/Makefile.am
+++ b/server/src/Makefile.am
@@ -29,6 +29,7 @@ pracrod_SOURCES = \
 	templateparser.cc \
 	transactionparser.cc \
 	tcpsocket.cc \
+	utf8.cc \
 	widgetgenerator.cc \
 	xml_encode_decode.cc
 
@@ -57,6 +58,7 @@ EXTRA_DIST = \
 	templateparser.h \
 	transactionparser.h \
 	tcpsocket.h \
+	utf8.h \
 	widgetgenerator.h \
 	xml_encode_decode.h
 
diff --git a/server/src/queryparser.cc b/server/src/queryparser.cc
index f8d4a09..76b24a6 100644
--- a/server/src/queryparser.cc
+++ b/server/src/queryparser.cc
@@ -59,7 +59,7 @@ void QueryParser::startTag(std::string name, std::map< std::string, std::string>
   }
 
   if(name == "value") {
-    stack.back()->values[attributes["name"]] = attributes["value"];
+    stack.back()->values[attributes["name"]] = utf8.decode(attributes["value"]);
   }
 
 }
diff --git a/server/src/utf8.cc b/server/src/utf8.cc
new file mode 100644
index 0000000..2909a94
--- /dev/null
+++ b/server/src/utf8.cc
@@ -0,0 +1,374 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/***************************************************************************
+ *            utf8.cc
+ *
+ *  Tue Feb 27 19:18:23 CET 2007
+ *  Copyright  2006 Bent Bisballe Nyeng
+ *  deva@aasimon.org
+ ****************************************************************************/
+
+/*
+ *  This file is part of Artefact.
+ *
+ *  Artefact is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Artefact is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Artefact; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+ */
+#include "utf8.h"
+
+#include "debug.h"
+
+#include <errno.h>
+
+UTF8::UTF8(std::string encoding)
+  throw(UTF8CreateException)
+{
+  this->encoding = encoding;
+  if(encoding != "ISO-8859-1") throw UTF8CreateException("Encoding not implemented.");
+
+  // ENCODE MAP
+  map_encode["�"] = "€";
+  map_encode["�"] = "";
+  map_encode["�"] = "‚";
+  map_encode["�"] = "ƒ";
+  map_encode["�"] = "„";
+  map_encode["�"] = "…";
+  map_encode["�"] = "†";
+  map_encode["�"] = "‡";
+  map_encode["�"] = "ˆ";
+  map_encode["�"] = "‰";
+  map_encode["�"] = "Š";
+  map_encode["�"] = "‹";
+  map_encode["�"] = "Œ";
+  map_encode["�"] = "";
+  map_encode["�"] = "Ž";
+  map_encode["�"] = "";
+  map_encode["�"] = "";
+  map_encode["�"] = "‘";
+  map_encode["�"] = "’";
+  map_encode["�"] = "“";
+  map_encode["�"] = "”";
+  map_encode["�"] = "•";
+  map_encode["�"] = "–";
+  map_encode["�"] = "—";
+  map_encode["�"] = "˜";
+  map_encode["�"] = "™";
+  map_encode["�"] = "š";
+  map_encode["�"] = "›";
+  map_encode["�"] = "œ";
+  map_encode["�"] = "";
+  map_encode["�"] = "ž";
+  map_encode["�"] = "Ÿ";
+  map_encode["�"] = " ";
+  map_encode["�"] = "¡";
+  map_encode["�"] = "¢";
+  map_encode["�"] = "£";
+  map_encode["�"] = "¤";
+  map_encode["�"] = "¥";
+  map_encode["�"] = "¦";
+  map_encode["�"] = "§";
+  map_encode["�"] = "¨";
+  map_encode["�"] = "©";
+  map_encode["�"] = "ª";
+  map_encode["�"] = "«";
+  map_encode["�"] = "¬";
+  map_encode["�"] = "­";
+  map_encode["�"] = "®";
+  map_encode["�"] = "¯";
+  map_encode["�"] = "°";
+  map_encode["�"] = "±";
+  map_encode["�"] = "²";
+  map_encode["�"] = "³";
+  map_encode["�"] = "´";
+  map_encode["�"] = "µ";
+  map_encode["�"] = "¶";
+  map_encode["�"] = "·";
+  map_encode["�"] = "¸";
+  map_encode["�"] = "¹";
+  map_encode["�"] = "º";
+  map_encode["�"] = "»";
+  map_encode["�"] = "¼";
+  map_encode["�"] = "½";
+  map_encode["�"] = "¾";
+  map_encode["�"] = "¿";
+  map_encode["�"] = "À";
+  map_encode["�"] = "Á";
+  map_encode["�"] = "Â";
+  map_encode["�"] = "Ã";
+  map_encode["�"] = "Ä";
+  map_encode["�"] = "Å";
+  map_encode["�"] = "Æ";
+  map_encode["�"] = "Ç";
+  map_encode["�"] = "È";
+  map_encode["�"] = "É";
+  map_encode["�"] = "Ê";
+  map_encode["�"] = "Ë";
+  map_encode["�"] = "Ì";
+  map_encode["�"] = "Í";
+  map_encode["�"] = "Î";
+  map_encode["�"] = "Ï";
+  map_encode["�"] = "Ð";
+  map_encode["�"] = "Ñ";
+  map_encode["�"] = "Ò";
+  map_encode["�"] = "Ó";
+  map_encode["�"] = "Ô";
+  map_encode["�"] = "Õ";
+  map_encode["�"] = "Ö";
+  map_encode["�"] = "×";
+  map_encode["�"] = "Ø";
+  map_encode["�"] = "Ù";
+  map_encode["�"] = "Ú";
+  map_encode["�"] = "Û";
+  map_encode["�"] = "Ü";
+  map_encode["�"] = "Ý";
+  map_encode["�"] = "Þ";
+  map_encode["�"] = "ß";
+  map_encode["�"] = "à";
+  map_encode["�"] = "á";
+  map_encode["�"] = "â";
+  map_encode["�"] = "ã";
+  map_encode["�"] = "ä";
+  map_encode["�"] = "å";
+  map_encode["�"] = "æ";
+  map_encode["�"] = "ç";
+  map_encode["�"] = "è";
+  map_encode["�"] = "é";
+  map_encode["�"] = "ê";
+  map_encode["�"] = "ë";
+  map_encode["�"] = "ì";
+  map_encode["�"] = "í";
+  map_encode["�"] = "î";
+  map_encode["�"] = "ï";
+  map_encode["�"] = "ð";
+  map_encode["�"] = "ñ";
+  map_encode["�"] = "ò";
+  map_encode["�"] = "ó";
+  map_encode["�"] = "ô";
+  map_encode["�"] = "õ";
+  map_encode["�"] = "ö";
+  map_encode["�"] = "÷";
+  map_encode["�"] = "ø";
+  map_encode["�"] = "ù";
+  map_encode["�"] = "ú";
+  map_encode["�"] = "û";
+  map_encode["�"] = "ü";
+  map_encode["�"] = "ý";
+  map_encode["�"] = "þ";
+  map_encode["�"] = "ÿ";
+
+  // DECODE MAP
+  map_decode["€"] = "�";
+  map_decode[""] = "�";
+  map_decode["‚"] = "�";
+  map_decode["ƒ"] = "�";
+  map_decode["„"] = "�";
+  map_decode["…"] = "�";
+  map_decode["†"] = "�";
+  map_decode["‡"] = "�";
+  map_decode["ˆ"] = "�";
+  map_decode["‰"] = "�";
+  map_decode["Š"] = "�";
+  map_decode["‹"] = "�";
+  map_decode["Œ"] = "�";
+  map_decode[""] = "�";
+  map_decode["Ž"] = "�";
+  map_decode[""] = "�";
+  map_decode[""] = "�";
+  map_decode["‘"] = "�";
+  map_decode["’"] = "�";
+  map_decode["“"] = "�";
+  map_decode["”"] = "�";
+  map_decode["•"] = "�";
+  map_decode["–"] = "�";
+  map_decode["—"] = "�";
+  map_decode["˜"] = "�";
+  map_decode["™"] = "�";
+  map_decode["š"] = "�";
+  map_decode["›"] = "�";
+  map_decode["œ"] = "�";
+  map_decode[""] = "�";
+  map_decode["ž"] = "�";
+  map_decode["Ÿ"] = "�";
+  map_decode[" "] = "�";
+  map_decode["¡"] = "�";
+  map_decode["¢"] = "�";
+  map_decode["£"] = "�";
+  map_decode["¤"] = "�";
+  map_decode["¥"] = "�";
+  map_decode["¦"] = "�";
+  map_decode["§"] = "�";
+  map_decode["¨"] = "�";
+  map_decode["©"] = "�";
+  map_decode["ª"] = "�";
+  map_decode["«"] = "�";
+  map_decode["¬"] = "�";
+  map_decode["­"] = "�";
+  map_decode["®"] = "�";
+  map_decode["¯"] = "�";
+  map_decode["°"] = "�";
+  map_decode["±"] = "�";
+  map_decode["²"] = "�";
+  map_decode["³"] = "�";
+  map_decode["´"] = "�";
+  map_decode["µ"] = "�";
+  map_decode["¶"] = "�";
+  map_decode["·"] = "�";
+  map_decode["¸"] = "�";
+  map_decode["¹"] = "�";
+  map_decode["º"] = "�";
+  map_decode["»"] = "�";
+  map_decode["¼"] = "�";
+  map_decode["½"] = "�";
+  map_decode["¾"] = "�";
+  map_decode["¿"] = "�";
+  map_decode["À"] = "�";
+  map_decode["Á"] = "�";
+  map_decode["Â"] = "�";
+  map_decode["Ã"] = "�";
+  map_decode["Ä"] = "�";
+  map_decode["Å"] = "�";
+  map_decode["Æ"] = "�";
+  map_decode["Ç"] = "�";
+  map_decode["È"] = "�";
+  map_decode["É"] = "�";
+  map_decode["Ê"] = "�";
+  map_decode["Ë"] = "�";
+  map_decode["Ì"] = "�";
+  map_decode["Í"] = "�";
+  map_decode["Î"] = "�";
+  map_decode["Ï"] = "�";
+  map_decode["Ð"] = "�";
+  map_decode["Ñ"] = "�";
+  map_decode["Ò"] = "�";
+  map_decode["Ó"] = "�";
+  map_decode["Ô"] = "�";
+  map_decode["Õ"] = "�";
+  map_decode["Ö"] = "�";
+  map_decode["×"] = "�";
+  map_decode["Ø"] = "�";
+  map_decode["Ù"] = "�";
+  map_decode["Ú"] = "�";
+  map_decode["Û"] = "�";
+  map_decode["Ü"] = "�";
+  map_decode["Ý"] = "�";
+  map_decode["Þ"] = "�";
+  map_decode["ß"] = "�";
+  map_decode["à"] = "�";
+  map_decode["á"] = "�";
+  map_decode["â"] = "�";
+  map_decode["ã"] = "�";
+  map_decode["ä"] = "�";
+  map_decode["å"] = "�";
+  map_decode["æ"] = "�";
+  map_decode["ç"] = "�";
+  map_decode["è"] = "�";
+  map_decode["é"] = "�";
+  map_decode["ê"] = "�";
+  map_decode["ë"] = "�";
+  map_decode["ì"] = "�";
+  map_decode["í"] = "�";
+  map_decode["î"] = "�";
+  map_decode["ï"] = "�";
+  map_decode["ð"] = "�";
+  map_decode["ñ"] = "�";
+  map_decode["ò"] = "�";
+  map_decode["ó"] = "�";
+  map_decode["ô"] = "�";
+  map_decode["õ"] = "�";
+  map_decode["ö"] = "�";
+  map_decode["÷"] = "�";
+  map_decode["ø"] = "�";
+  map_decode["ù"] = "�";
+  map_decode["ú"] = "�";
+  map_decode["û"] = "�";
+  map_decode["ü"] = "�";
+  map_decode["ý"] = "�";
+  map_decode["þ"] = "�";
+  map_decode["ÿ"] = "�";
+}
+
+std::string UTF8::encode(std::string s)
+  throw(UTF8EncodeException)
+{
+  std::string ret;
+
+  for(int i = 0; i < (int)s.length(); i++) {
+    std::string c;
+
+    if((unsigned char)s[i] <= 0x7F) c = s.substr(i, 1);
+    else c = map_encode[s.substr(i, 1)];
+
+    if(c.length() == 0) throw UTF8EncodeException("Unknown character in string");
+
+    ret.append(c);
+
+  }
+
+  return ret;
+ 
+}
+
+std::string UTF8::decode(std::string s)
+  throw(UTF8DecodeException)
+{
+  std::string ret;
+
+  int width = 1;
+  for(int i = 0; i < (int)s.length(); i+=width) {
+    if(/*(unsigned char)s[i]>=0x00&&*/(unsigned char)s[i] <= 0x7F) width = 1; // 00-7F	1 byte
+    if((unsigned char)s[i] >= 0xC2 && (unsigned char)s[i] <= 0xDF) width = 2; // C2-DF	2 bytes
+    if((unsigned char)s[i] >= 0xE0 && (unsigned char)s[i] <= 0xEF) width = 3; // E0-EF	3 bytes
+    if((unsigned char)s[i] >= 0xF0 && (unsigned char)s[i] <= 0xF4) width = 4; // F0-F4	4 bytes
+
+    std::string c;
+
+    if(width == 1) c = s.substr(i, 1);
+    else c = map_decode[s.substr(i, width)];
+
+    if(c.length() == 0) throw UTF8DecodeException("Unknown character in string");
+
+    ret.append(c);
+  }
+
+  return ret;
+}
+
+#ifdef TEST_UTF8
+
+int main()
+{
+  try {
+    UTF8 utf8("ISO-8859-1");
+
+    std::string a = "AaBb������";
+    printf("a [%s]\n", a.c_str());
+    std::string b = utf8.encode(a);
+    printf("b [%s]\n", b.c_str());
+    b = utf8.encode(b);
+    printf("b [%s]\n", b.c_str());
+    std::string c = utf8.decode(b);
+    printf("c [%s]\n", c.c_str());
+    c = utf8.decode(c);
+    printf("c [%s]\n", c.c_str());
+
+    if(a == c) return 0;
+    else return 1;
+  } catch( Pentominos::Exception &e ) {
+    fprintf(stderr, "%s\n", e.what());
+    return 1;
+  }
+
+  return 0;
+}
+
+#endif//TEST_UTF8
diff --git a/server/src/utf8.h b/server/src/utf8.h
new file mode 100644
index 0000000..98f6ff9
--- /dev/null
+++ b/server/src/utf8.h
@@ -0,0 +1,96 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/***************************************************************************
+ *            utf8.h
+ *
+ *  Tue Feb 27 19:18:23 CET 2007
+ *  Copyright  2006 Bent Bisballe Nyeng
+ *  deva@aasimon.org
+ ****************************************************************************/
+
+/*
+ *  This file is part of Artefact.
+ *
+ *  Artefact is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Artefact is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with Artefact; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+ */
+#ifndef __ARTEFACT_UTF8_H__
+#define __ARTEFACT_UTF8_H__
+
+#include <string>
+#include <map>
+
+#include "exception.h"
+
+/**
+ * This exception is thrown by UTF8 when the subsystem fails to initialize.
+ */
+class UTF8CreateException: public Exception {
+public:
+  UTF8CreateException(std::string reason) : 
+    Exception("Error during creation of the UTF8 subsystem: " + reason) {}
+};
+
+/**
+ * This exception is thrown by UTF8 when the subsystem fails encode the gives string.
+ */
+class UTF8EncodeException: public Exception {
+public:
+  UTF8EncodeException(std::string reason) : 
+    Exception("Error during UTF8 encoding: " + reason) {}
+};
+
+/**
+ * This exception is thrown by UTF8 when the subsystem fails decode the gives string.
+ */
+class UTF8DecodeException: public Exception {
+public:
+  UTF8DecodeException(std::string reason) : 
+    Exception("Error during UTF8 decoding: " + reason) {}
+};
+
+/**
+ * UTF-8 handler class.\n
+ * It is used to convert between UTF-8 and some native charset Default
+ * is ISO-8859-1. (Currently only the ISO-8859-1 charset is implemented!)
+ */
+class UTF8 {
+public:
+  /**
+   * Constructor.
+   * @param encoding A string containing native charset. Default is ISO-8859-1
+   */
+  UTF8(std::string encoding = "ISO-8859-1") throw(UTF8CreateException);
+  
+  /**
+   * Encode a string from native encoding to UTF-8
+   * @param s The string to encode.
+   * @return The UTF-8 encoded string.
+   */
+  std::string encode(std::string s) throw(UTF8EncodeException);
+  
+  /**
+   * Decode a string from UTF-8 to native encoding.
+   * @param s The UTF-8 string to decode.
+   * @return The decoded string.
+   */
+  std::string decode(std::string s) throw(UTF8DecodeException);
+  
+private:
+  std::string encoding;
+  
+  std::map< std::string, std::string > map_encode;
+  std::map< std::string, std::string > map_decode;
+};
+
+#endif/*__ARTEFACT_UTF8_H__*/
-- 
cgit v1.2.3