diff options
-rw-r--r-- | server/src/Makefile.am | 7 | ||||
-rw-r--r-- | server/src/saxparser.cc | 226 | ||||
-rw-r--r-- | server/src/saxparser.h | 82 |
3 files changed, 302 insertions, 13 deletions
diff --git a/server/src/Makefile.am b/server/src/Makefile.am index dbc481a..f970117 100644 --- a/server/src/Makefile.am +++ b/server/src/Makefile.am @@ -102,6 +102,7 @@ EXTRA_DIST = \ ################ TESTFILES = \ + test_saxparser \ test_versionstr \ test_macrolist \ test_queryhandlerpentominos \ @@ -132,6 +133,12 @@ test: $(TESTFILES) test_clean: rm -f $(TESTFILES) $(TESTLOGS) +TEST_SAXPARSER_FILES = \ + saxparser.cc \ + $(BASICFILES) +test_saxparser: $(TEST_SAXPARSER_FILES) + @../../tools/test $(TEST_SAXPARSER_FILES) $(BASICFLAGS) $(PARSERFLAGS) + TEST_VERSIONSTR_FILES = \ versionstr.cc \ $(BASICFILES) diff --git a/server/src/saxparser.cc b/server/src/saxparser.cc index f728928..ee03de1 100644 --- a/server/src/saxparser.cc +++ b/server/src/saxparser.cc @@ -92,7 +92,7 @@ SAXParser::SAXParser() SAXParser::~SAXParser() { - XML_ParserFree(p); + if(p) XML_ParserFree(p); } int SAXParser::parse() @@ -113,6 +113,14 @@ int SAXParser::parse() return 0; } +static bool iswhitespace(char *buf, size_t size) +{ + for(size_t i = 0; i < size; i++) + if(buf[i] != ' ' && buf[i] != '\n' && buf[i] != '\t' && buf[i] != '\r') + return false; + return true; +} + bool SAXParser::parse(char *data, size_t size) { PRACRO_DEBUG(sax, "parse %d bytes\n", size); @@ -122,6 +130,8 @@ bool SAXParser::parse(char *data, size_t size) if(! XML_Parse(p, data, size, false) ) { if(XML_GetErrorCode(p) == XML_ERROR_JUNK_AFTER_DOC_ELEMENT) return true; + if(XML_GetErrorCode(p) == XML_ERROR_FINISHED && iswhitespace(data, size)) return true; + if(done && XML_GetErrorCode(p) == XML_ERROR_UNCLOSED_TOKEN) return true; parseError(data, size, XML_ErrorString(XML_GetErrorCode(p)), (int)XML_GetCurrentLineNumber(p)); return false; } @@ -129,6 +139,8 @@ bool SAXParser::parse(char *data, size_t size) if(done) { if(! XML_Parse(p, data, 0, true) ) { if(XML_GetErrorCode(p) == XML_ERROR_JUNK_AFTER_DOC_ELEMENT) return true; + if(XML_GetErrorCode(p) == XML_ERROR_FINISHED && iswhitespace(data, size)) return true; + if(XML_GetErrorCode(p) == XML_ERROR_UNCLOSED_TOKEN) return true; parseError(data, 0, XML_ErrorString(XML_GetErrorCode(p)), (int)XML_GetCurrentLineNumber(p)); return false; } @@ -155,17 +167,58 @@ unsigned int SAXParser::usedBytes() #ifdef TEST_SAXPARSER -/** - * Compile with: g++ -DTEST_SAXPARSER sax_parser.cc -lexpat -otext_saxparser - * Run with: ./test_saxparser [xmlfile] - */ + +#define XMLFILE "/tmp/saxparsertest.xml" + +#include "exception.h" + #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> - -class MyParser :public SAXParser { +#include <string.h> +#include <stdio.h> +#include <memory.h> + +static char xml[] = +"<?xml version='1.0' encoding='UTF-8'?>\n" +"<pracro version=\"1.0\" user=\"testuser\" cpr=\"1505050505\">\n" +" <commit version=\"\" macro=\"referral\" course=\"amd_forunders\">\n" +" <field value=\"Some docs\" name=\"referral.doctor\"/>\n" +" <field value=\"DIMS\" name=\"referral.diagnosecode\"/>\n" +" <field value=\"Avs\" name=\"referral.diagnose\"/>\n" +" </commit>\n" +"</pracro>\n \t\n\r" + ; + +static char xml_notrailingwhitespace[] = +"<?xml version='1.0' encoding='UTF-8'?>\n" +"<pracro version=\"1.0\" user=\"testuser\" cpr=\"1505050505\">\n" +" <commit version=\"\" macro=\"referral\" course=\"amd_forunders\">\n" +" <field value=\"Some docs\" name=\"referral.doctor\"/>\n" +" <field value=\"DIMS\" name=\"referral.diagnosecode\"/>\n" +" <field value=\"Avs\" name=\"referral.diagnose\"/>\n" +" </commit>\n" +"</pracro>" + ; + +static char xml_fail[] = +"<?xml version='1.0' encoding='UTF-8'?>\n" +"<pracro version=\"1.0\" user\"testuser\" cpr=\"1505050505\">\n" +" <request macro=\"test\" course=\"test\"/>\n" +"</pracro>\n" + ; + +static char xml_fail2[] = +"<?xml version='1.0' encoding='UTF-8'?>\n" +"<pracro version=\"1.0\" user\"testuser\" cpr=\"1505050505\">\n" +" <request macro=\"test\" course=\"test\"/>\n" +"</pracro>\n" +"this is junk\n" + ; + +class MyFileParser :public SAXParser { public: - MyParser(char *file) { + MyFileParser(const char *file) { fd = open(file, O_RDONLY); } @@ -175,17 +228,164 @@ public: void startTag(std::string name, std::map< std::string, std::string> attributes) { - printf("<%s>\n", name.c_str()); + //printf("<%s>\n", name.c_str()); + } + + void parseError(char *buf, size_t len, std::string error, int lineno) + { + throw Exception(error); } private: int fd; }; -int main(int argc, char *argv[]) { - if(argc < 2) return 1; - MyParser parser(argv[1]); - parser.parse(); +class MyBufferParser :public SAXParser { +public: + void startTag(std::string name, std::map< std::string, std::string> attributes) + { + //printf("<%s>\n", name.c_str()); + } + + void parseError(char *buf, size_t len, std::string error, int lineno) + { + throw Exception(error); + } +}; + +int main(int argc, char *argv[]) +{ + FILE *fp = fopen(XMLFILE, "w"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + fprintf(fp, xml); + fclose(fp); + + // Test callback parser + { + MyFileParser parser(XMLFILE); + parser.parse(); + } + + // Test buffer parser + for(size_t sz = 1; sz < 1000; sz++) { + bool test = false; + MyBufferParser parser; + std::string buf = xml; + size_t pos = 0; + while(pos < buf.length()) { + std::string substr = buf.substr(pos, sz); + + try { + test |= parser.parse((char*)substr.c_str(), substr.length()); + } catch(Exception &e) { + printf("Buffer parser failed on size %d: %s [%s]\n", sz, e.what(), substr.c_str()); + } + pos += sz; + } + + if(!test) { + printf("Buffer parser failed on size %d\n", sz); + return 1; + } + } + + fp = fopen(XMLFILE, "w"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + fprintf(fp, xml_notrailingwhitespace); + fprintf(fp, xml_notrailingwhitespace); + fclose(fp); + // Test buffer parser with multiple documents in the same buffer + { + fp = fopen(XMLFILE, "r"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + for(size_t sz = 1; sz < 1000; sz++) { + MyBufferParser *parser = NULL; + rewind(fp); + size_t numdocs = 0; + char *buf = new char[sz + 1]; + memset(buf, 0, sz + 1); + size_t size; + while( (size = fread(buf, 1, sz, fp)) > 0) { + while(size) { + if(parser == NULL) { + parser = new MyBufferParser(); + } + if(parser->parse(buf, size)) { + + // Got one + numdocs++; + + size = size - parser->usedBytes(); + strcpy(buf, buf + parser->usedBytes()); + delete parser; parser = NULL; + } else { + size = 0; + memset(buf, 0, sz + 1); + } + } + } + if(numdocs != 2) { + printf("Failed to parse two documents.\n"); + return 1; + } + if(parser) delete parser; parser = NULL; + delete[] buf; + } + fclose(fp); + } + + fp = fopen(XMLFILE, "w"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + fprintf(fp, xml_fail); + fclose(fp); + + // Test failure + { + MyFileParser parser(XMLFILE); + try { + parser.parse(); + } catch(Exception &e) { + goto goon; + } + printf("This test should fail...\n"); + return 1; + } + goon: + + fp = fopen(XMLFILE, "w"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + fprintf(fp, xml_fail2); + fclose(fp); + + // Test failure + { + MyFileParser parser(XMLFILE); + try { + parser.parse(); + } catch(Exception &e) { + goto goonagain; + } + printf("This test should fail...\n"); + return 1; + } + goonagain: + + unlink(XMLFILE); } #endif/*TEST_SAXPARSER*/ diff --git a/server/src/saxparser.h b/server/src/saxparser.h index da33440..9f2faa2 100644 --- a/server/src/saxparser.h +++ b/server/src/saxparser.h @@ -31,21 +31,92 @@ #include <map> #include <expat.h> +/** + * This class implements a SAX Parser, utilising the eXpat XML parser library. + * It uses virtual methods for the callbacks, and transforms tagnames and + * attributes into C++ values (std::string and std::vector). + */ class SAXParser { public: + /** + * Constructor. + * It initialises the eXpat library. + */ SAXParser(); + + /** + * Destructor. + * It frees the eXpat library resources. + */ virtual ~SAXParser(); + /** + * Call this method to use the reimplemented readData method for input. + * The entire document is parsed through this single call. + * @return An integer wityh value 0 on success, or 1 on failure. + * @see int readData(char *data, size_t size) + */ int parse(); + /** + * Character data callback method. + * Reimplement this to get character callbacks. + * This callback might be called several times, if a character block is big. In + * that cae it might be nessecary to buffer to received bytes. + * @param data A std::string containing the character data. + */ virtual void characterData(std::string &data) {} + + /** + * Start tag callback mehtod. + * Reimplement this to get start tag callbacks. + * It is called each time a new start tag is seen. + * @param name A std::string containing the tag name. + * @param attributes A std::map of std::string to std::string containing all + * attributes for the tag. + */ virtual void startTag(std::string name, std::map< std::string, std::string> attributes) {} + + /** + * End tag callback mehtod. + * Reimplement this to get end tag callbacks. + * It is called each time an end tag is seen. + * @param name A std::string containing the tag name. + */ virtual void endTag(std::string name) {} + /** + * Error callback method. + * Reimplement this to handle error messages. + * A default implementation prints out the current buffer, linenumber and error + * message to the screen. + * @param buf A char* containing the current buffer being parsed. + * @param len A size_t containing the length of the current buffer being parsed. + * @param error A std::string containing the error message. + * @param lineno An integer containing the line number on which the error occurred. + */ virtual void parseError(char *buf, size_t len, std::string error, int lineno); + /** + * Buffer parse method. + * Use this method to parse an external buffer with xml data. + * This method can be called several times (ie. in a read loop). + * @param buf A char* containing the buffer to parse. + * @param size A size_t comntaining the size of the buffer to parse. + * @return A boolean with the value true if a complete document has been seen. + * false otherwise. + * @see bool parse(char *buf, size_t size) + */ bool parse(char *buf, size_t size); + /** + * Get the number of bytes used from the last buffer. + * If the buffer parse method is used, and the buffer comes from a stream of xml + * doxuments, this method can be used to figure out how many bytes from the stream + * should be replayed, to another parser. + * @return an integer containing the number of bytes used from the last buffer. + * @see bool parse(char *buf, size_t size) + */ unsigned int usedBytes(); // private stuff that needs to be public! @@ -53,6 +124,17 @@ public: bool done; protected: + /** + * Read data callback method. + * This method is used when the parse() method is used. + * It can be used to connect the parser with eg. a file. + * @param data A char* containing the buffer to be filled. + * @param size A size_t containing the maximum number of bytes to be filled (ie. + * the size of data) + * @return An integer contaning the actual number of bytes filled. 0 if no more + * bytes are available. + * @see int parse() + */ virtual int readData(char *data, size_t size) { return 0; } XML_Parser p; |