diff options
| author | deva <deva> | 2009-04-20 09:50:30 +0000 | 
|---|---|---|
| committer | deva <deva> | 2009-04-20 09:50:30 +0000 | 
| commit | 73f3fb396dc2ba089b4e32b0bd63dc3e615f8466 (patch) | |
| tree | fc7aba453c7dbec309aef32991684c91cd51083f /server/src | |
| parent | 46983af88f19c184c9e0ac96ebcc62827d73b944 (diff) | |
Added utf8 decode.
Diffstat (limited to 'server/src')
| -rw-r--r-- | server/src/Makefile.am | 2 | ||||
| -rw-r--r-- | server/src/queryparser.cc | 2 | ||||
| -rw-r--r-- | server/src/utf8.cc | 374 | ||||
| -rw-r--r-- | server/src/utf8.h | 96 | 
4 files changed, 473 insertions, 1 deletions
| diff --git a/server/src/Makefile.am b/server/src/Makefile.am index 086905a..78c2fce 100644 --- a/server/src/Makefile.am +++ b/server/src/Makefile.am @@ -29,6 +29,7 @@ pracrod_SOURCES = \  	templateparser.cc \  	transactionparser.cc \  	tcpsocket.cc \ +	utf8.cc \  	widgetgenerator.cc \  	xml_encode_decode.cc @@ -57,6 +58,7 @@ EXTRA_DIST = \  	templateparser.h \  	transactionparser.h \  	tcpsocket.h \ +	utf8.h \  	widgetgenerator.h \  	xml_encode_decode.h diff --git a/server/src/queryparser.cc b/server/src/queryparser.cc index f8d4a09..76b24a6 100644 --- a/server/src/queryparser.cc +++ b/server/src/queryparser.cc @@ -59,7 +59,7 @@ void QueryParser::startTag(std::string name, std::map< std::string, std::string>    }    if(name == "value") { -    stack.back()->values[attributes["name"]] = attributes["value"]; +    stack.back()->values[attributes["name"]] = utf8.decode(attributes["value"]);    }  } diff --git a/server/src/utf8.cc b/server/src/utf8.cc new file mode 100644 index 0000000..2909a94 --- /dev/null +++ b/server/src/utf8.cc @@ -0,0 +1,374 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/*************************************************************************** + *            utf8.cc + * + *  Tue Feb 27 19:18:23 CET 2007 + *  Copyright  2006 Bent Bisballe Nyeng + *  deva@aasimon.org + ****************************************************************************/ + +/* + *  This file is part of Artefact. + * + *  Artefact is free software; you can redistribute it and/or modify + *  it under the terms of the GNU General Public License as published by + *  the Free Software Foundation; either version 2 of the License, or + *  (at your option) any later version. + * + *  Artefact is distributed in the hope that it will be useful, + *  but WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + *  GNU General Public License for more details. + * + *  You should have received a copy of the GNU General Public License + *  along with Artefact; if not, write to the Free Software + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA. + */ +#include "utf8.h" + +#include "debug.h" + +#include <errno.h> + +UTF8::UTF8(std::string encoding) +  throw(UTF8CreateException) +{ +  this->encoding = encoding; +  if(encoding != "ISO-8859-1") throw UTF8CreateException("Encoding not implemented."); + +  // ENCODE MAP +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = "
"; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = ""; +  map_encode[""] = " "; +  map_encode[""] = "¡"; +  map_encode[""] = "¢"; +  map_encode[""] = "£"; +  map_encode[""] = "¤"; +  map_encode[""] = "¥"; +  map_encode[""] = "¦"; +  map_encode[""] = "§"; +  map_encode[""] = "¨"; +  map_encode[""] = "©"; +  map_encode[""] = "ª"; +  map_encode[""] = "«"; +  map_encode[""] = "¬"; +  map_encode[""] = ""; +  map_encode[""] = "®"; +  map_encode[""] = "¯"; +  map_encode[""] = "°"; +  map_encode[""] = "±"; +  map_encode[""] = "²"; +  map_encode[""] = "³"; +  map_encode[""] = "´"; +  map_encode[""] = "µ"; +  map_encode[""] = "¶"; +  map_encode[""] = "·"; +  map_encode[""] = "¸"; +  map_encode[""] = "¹"; +  map_encode[""] = "º"; +  map_encode[""] = "»"; +  map_encode[""] = "¼"; +  map_encode[""] = "½"; +  map_encode[""] = "¾"; +  map_encode[""] = "¿"; +  map_encode[""] = "À"; +  map_encode[""] = "Á"; +  map_encode[""] = "Â"; +  map_encode[""] = "Ã"; +  map_encode[""] = "Ä"; +  map_encode[""] = "Å"; +  map_encode[""] = "Æ"; +  map_encode[""] = "Ç"; +  map_encode[""] = "È"; +  map_encode[""] = "É"; +  map_encode[""] = "Ê"; +  map_encode[""] = "Ë"; +  map_encode[""] = "Ì"; +  map_encode[""] = "Í"; +  map_encode[""] = "Î"; +  map_encode[""] = "Ï"; +  map_encode[""] = "Ð"; +  map_encode[""] = "Ñ"; +  map_encode[""] = "Ò"; +  map_encode[""] = "Ó"; +  map_encode[""] = "Ô"; +  map_encode[""] = "Õ"; +  map_encode[""] = "Ö"; +  map_encode[""] = "×"; +  map_encode[""] = "Ø"; +  map_encode[""] = "Ù"; +  map_encode[""] = "Ú"; +  map_encode[""] = "Û"; +  map_encode[""] = "Ü"; +  map_encode[""] = "Ý"; +  map_encode[""] = "Þ"; +  map_encode[""] = "ß"; +  map_encode[""] = "à"; +  map_encode[""] = "á"; +  map_encode[""] = "â"; +  map_encode[""] = "ã"; +  map_encode[""] = "ä"; +  map_encode[""] = "å"; +  map_encode[""] = "æ"; +  map_encode[""] = "ç"; +  map_encode[""] = "è"; +  map_encode[""] = "é"; +  map_encode[""] = "ê"; +  map_encode[""] = "ë"; +  map_encode[""] = "ì"; +  map_encode[""] = "í"; +  map_encode[""] = "î"; +  map_encode[""] = "ï"; +  map_encode[""] = "ð"; +  map_encode[""] = "ñ"; +  map_encode[""] = "ò"; +  map_encode[""] = "ó"; +  map_encode[""] = "ô"; +  map_encode[""] = "õ"; +  map_encode[""] = "ö"; +  map_encode[""] = "÷"; +  map_encode[""] = "ø"; +  map_encode[""] = "ù"; +  map_encode[""] = "ú"; +  map_encode[""] = "û"; +  map_encode[""] = "ü"; +  map_encode[""] = "ý"; +  map_encode[""] = "þ"; +  map_encode[""] = "ÿ"; + +  // DECODE MAP +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode["
"] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[""] = ""; +  map_decode[" "] = ""; +  map_decode["¡"] = ""; +  map_decode["¢"] = ""; +  map_decode["£"] = ""; +  map_decode["¤"] = ""; +  map_decode["¥"] = ""; +  map_decode["¦"] = ""; +  map_decode["§"] = ""; +  map_decode["¨"] = ""; +  map_decode["©"] = ""; +  map_decode["ª"] = ""; +  map_decode["«"] = ""; +  map_decode["¬"] = ""; +  map_decode[""] = ""; +  map_decode["®"] = ""; +  map_decode["¯"] = ""; +  map_decode["°"] = ""; +  map_decode["±"] = ""; +  map_decode["²"] = ""; +  map_decode["³"] = ""; +  map_decode["´"] = ""; +  map_decode["µ"] = ""; +  map_decode["¶"] = ""; +  map_decode["·"] = ""; +  map_decode["¸"] = ""; +  map_decode["¹"] = ""; +  map_decode["º"] = ""; +  map_decode["»"] = ""; +  map_decode["¼"] = ""; +  map_decode["½"] = ""; +  map_decode["¾"] = ""; +  map_decode["¿"] = ""; +  map_decode["À"] = ""; +  map_decode["Á"] = ""; +  map_decode["Â"] = ""; +  map_decode["Ã"] = ""; +  map_decode["Ä"] = ""; +  map_decode["Å"] = ""; +  map_decode["Æ"] = ""; +  map_decode["Ç"] = ""; +  map_decode["È"] = ""; +  map_decode["É"] = ""; +  map_decode["Ê"] = ""; +  map_decode["Ë"] = ""; +  map_decode["Ì"] = ""; +  map_decode["Í"] = ""; +  map_decode["Î"] = ""; +  map_decode["Ï"] = ""; +  map_decode["Ð"] = ""; +  map_decode["Ñ"] = ""; +  map_decode["Ò"] = ""; +  map_decode["Ó"] = ""; +  map_decode["Ô"] = ""; +  map_decode["Õ"] = ""; +  map_decode["Ö"] = ""; +  map_decode["×"] = ""; +  map_decode["Ø"] = ""; +  map_decode["Ù"] = ""; +  map_decode["Ú"] = ""; +  map_decode["Û"] = ""; +  map_decode["Ü"] = ""; +  map_decode["Ý"] = ""; +  map_decode["Þ"] = ""; +  map_decode["ß"] = ""; +  map_decode["à"] = ""; +  map_decode["á"] = ""; +  map_decode["â"] = ""; +  map_decode["ã"] = ""; +  map_decode["ä"] = ""; +  map_decode["å"] = ""; +  map_decode["æ"] = ""; +  map_decode["ç"] = ""; +  map_decode["è"] = ""; +  map_decode["é"] = ""; +  map_decode["ê"] = ""; +  map_decode["ë"] = ""; +  map_decode["ì"] = ""; +  map_decode["í"] = ""; +  map_decode["î"] = ""; +  map_decode["ï"] = ""; +  map_decode["ð"] = ""; +  map_decode["ñ"] = ""; +  map_decode["ò"] = ""; +  map_decode["ó"] = ""; +  map_decode["ô"] = ""; +  map_decode["õ"] = ""; +  map_decode["ö"] = ""; +  map_decode["÷"] = ""; +  map_decode["ø"] = ""; +  map_decode["ù"] = ""; +  map_decode["ú"] = ""; +  map_decode["û"] = ""; +  map_decode["ü"] = ""; +  map_decode["ý"] = ""; +  map_decode["þ"] = ""; +  map_decode["ÿ"] = ""; +} + +std::string UTF8::encode(std::string s) +  throw(UTF8EncodeException) +{ +  std::string ret; + +  for(int i = 0; i < (int)s.length(); i++) { +    std::string c; + +    if((unsigned char)s[i] <= 0x7F) c = s.substr(i, 1); +    else c = map_encode[s.substr(i, 1)]; + +    if(c.length() == 0) throw UTF8EncodeException("Unknown character in string"); + +    ret.append(c); + +  } + +  return ret; +  +} + +std::string UTF8::decode(std::string s) +  throw(UTF8DecodeException) +{ +  std::string ret; + +  int width = 1; +  for(int i = 0; i < (int)s.length(); i+=width) { +    if(/*(unsigned char)s[i]>=0x00&&*/(unsigned char)s[i] <= 0x7F) width = 1; // 00-7F	1 byte +    if((unsigned char)s[i] >= 0xC2 && (unsigned char)s[i] <= 0xDF) width = 2; // C2-DF	2 bytes +    if((unsigned char)s[i] >= 0xE0 && (unsigned char)s[i] <= 0xEF) width = 3; // E0-EF	3 bytes +    if((unsigned char)s[i] >= 0xF0 && (unsigned char)s[i] <= 0xF4) width = 4; // F0-F4	4 bytes + +    std::string c; + +    if(width == 1) c = s.substr(i, 1); +    else c = map_decode[s.substr(i, width)]; + +    if(c.length() == 0) throw UTF8DecodeException("Unknown character in string"); + +    ret.append(c); +  } + +  return ret; +} + +#ifdef TEST_UTF8 + +int main() +{ +  try { +    UTF8 utf8("ISO-8859-1"); + +    std::string a = "AaBb"; +    printf("a [%s]\n", a.c_str()); +    std::string b = utf8.encode(a); +    printf("b [%s]\n", b.c_str()); +    b = utf8.encode(b); +    printf("b [%s]\n", b.c_str()); +    std::string c = utf8.decode(b); +    printf("c [%s]\n", c.c_str()); +    c = utf8.decode(c); +    printf("c [%s]\n", c.c_str()); + +    if(a == c) return 0; +    else return 1; +  } catch( Pentominos::Exception &e ) { +    fprintf(stderr, "%s\n", e.what()); +    return 1; +  } + +  return 0; +} + +#endif//TEST_UTF8 diff --git a/server/src/utf8.h b/server/src/utf8.h new file mode 100644 index 0000000..98f6ff9 --- /dev/null +++ b/server/src/utf8.h @@ -0,0 +1,96 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/*************************************************************************** + *            utf8.h + * + *  Tue Feb 27 19:18:23 CET 2007 + *  Copyright  2006 Bent Bisballe Nyeng + *  deva@aasimon.org + ****************************************************************************/ + +/* + *  This file is part of Artefact. + * + *  Artefact is free software; you can redistribute it and/or modify + *  it under the terms of the GNU General Public License as published by + *  the Free Software Foundation; either version 2 of the License, or + *  (at your option) any later version. + * + *  Artefact is distributed in the hope that it will be useful, + *  but WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + *  GNU General Public License for more details. + * + *  You should have received a copy of the GNU General Public License + *  along with Artefact; if not, write to the Free Software + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA. + */ +#ifndef __ARTEFACT_UTF8_H__ +#define __ARTEFACT_UTF8_H__ + +#include <string> +#include <map> + +#include "exception.h" + +/** + * This exception is thrown by UTF8 when the subsystem fails to initialize. + */ +class UTF8CreateException: public Exception { +public: +  UTF8CreateException(std::string reason) :  +    Exception("Error during creation of the UTF8 subsystem: " + reason) {} +}; + +/** + * This exception is thrown by UTF8 when the subsystem fails encode the gives string. + */ +class UTF8EncodeException: public Exception { +public: +  UTF8EncodeException(std::string reason) :  +    Exception("Error during UTF8 encoding: " + reason) {} +}; + +/** + * This exception is thrown by UTF8 when the subsystem fails decode the gives string. + */ +class UTF8DecodeException: public Exception { +public: +  UTF8DecodeException(std::string reason) :  +    Exception("Error during UTF8 decoding: " + reason) {} +}; + +/** + * UTF-8 handler class.\n + * It is used to convert between UTF-8 and some native charset Default + * is ISO-8859-1. (Currently only the ISO-8859-1 charset is implemented!) + */ +class UTF8 { +public: +  /** +   * Constructor. +   * @param encoding A string containing native charset. Default is ISO-8859-1 +   */ +  UTF8(std::string encoding = "ISO-8859-1") throw(UTF8CreateException); +   +  /** +   * Encode a string from native encoding to UTF-8 +   * @param s The string to encode. +   * @return The UTF-8 encoded string. +   */ +  std::string encode(std::string s) throw(UTF8EncodeException); +   +  /** +   * Decode a string from UTF-8 to native encoding. +   * @param s The UTF-8 string to decode. +   * @return The decoded string. +   */ +  std::string decode(std::string s) throw(UTF8DecodeException); +   +private: +  std::string encoding; +   +  std::map< std::string, std::string > map_encode; +  std::map< std::string, std::string > map_decode; +}; + +#endif/*__ARTEFACT_UTF8_H__*/ | 
