diff options
Diffstat (limited to 'cpp/src/Freeze/Print.cpp')
-rw-r--r-- | cpp/src/Freeze/Print.cpp | 420 |
1 files changed, 420 insertions, 0 deletions
diff --git a/cpp/src/Freeze/Print.cpp b/cpp/src/Freeze/Print.cpp new file mode 100644 index 00000000000..2fedd9101d7 --- /dev/null +++ b/cpp/src/Freeze/Print.cpp @@ -0,0 +1,420 @@ +#include <util/PlatformUtils.hpp> +#include <util/XMLString.hpp> +#include <util/XMLUniDefs.hpp> +#include <framework/XMLFormatter.hpp> +#include <util/TranscodingException.hpp> + +#include <dom/DOMString.hpp> + +#include <dom/DOM.hpp> +#include <parsers/DOMParser.hpp> + +#include <sax/ErrorHandler.hpp> +#include <sax/SAXParseException.hpp> + +#include <iostream> +#include <sstream> +#include <string> + +#include <Freeze/Print.h> + +using namespace std; + +static XMLFormatter* gFormatter = 0; +static XMLFormatter::UnRepFlags gUnRepFlags = XMLFormatter::UnRep_CharRef; +static XMLCh* gEncodingName = 0; + +// --------------------------------------------------------------------------- +// Local const data +// +// Note: This is the 'safe' way to do these strings. If you compiler supports +// L"" style strings, and portability is not a concern, you can use +// those types constants directly. +// --------------------------------------------------------------------------- +static const XMLCh gEndElement[] = { chOpenAngle, chForwardSlash, chNull }; +static const XMLCh gEndPI[] = { chQuestion, chCloseAngle, chNull}; +static const XMLCh gStartPI[] = { chOpenAngle, chQuestion, chNull }; +static const XMLCh gXMLDecl1[] = +{ + chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l + , chSpace, chLatin_v, chLatin_e, chLatin_r, chLatin_s, chLatin_i + , chLatin_o, chLatin_n, chEqual, chDoubleQuote, chNull +}; +static const XMLCh gXMLDecl2[] = +{ + chDoubleQuote, chSpace, chLatin_e, chLatin_n, chLatin_c + , chLatin_o, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chEqual + , chDoubleQuote, chNull +}; +static const XMLCh gXMLDecl3[] = +{ + chDoubleQuote, chSpace, chLatin_s, chLatin_t, chLatin_a + , chLatin_n, chLatin_d, chLatin_a, chLatin_l, chLatin_o + , chLatin_n, chLatin_e, chEqual, chDoubleQuote, chNull +}; +static const XMLCh gXMLDecl4[] = +{ + chDoubleQuote, chQuestion, chCloseAngle + , chLF, chNull +}; + +static const XMLCh gStartCDATA[] = +{ + chOpenAngle, chBang, chOpenSquare, chLatin_C, chLatin_D, + chLatin_A, chLatin_T, chLatin_A, chOpenSquare, chNull +}; + +static const XMLCh gEndCDATA[] = +{ + chCloseSquare, chCloseSquare, chCloseAngle, chNull +}; +static const XMLCh gStartComment[] = +{ + chOpenAngle, chBang, chDash, chDash, chNull +}; + +static const XMLCh gEndComment[] = +{ + chDash, chDash, chCloseAngle, chNull +}; + +static const XMLCh gStartDoctype[] = +{ + chOpenAngle, chBang, chLatin_D, chLatin_O, chLatin_C, chLatin_T, + chLatin_Y, chLatin_P, chLatin_E, chSpace, chNull +}; +static const XMLCh gPublic[] = +{ + chLatin_P, chLatin_U, chLatin_B, chLatin_L, chLatin_I, + chLatin_C, chSpace, chDoubleQuote, chNull +}; +static const XMLCh gSystem[] = +{ + chLatin_S, chLatin_Y, chLatin_S, chLatin_T, chLatin_E, + chLatin_M, chSpace, chDoubleQuote, chNull +}; +static const XMLCh gStartEntity[] = +{ + chOpenAngle, chBang, chLatin_E, chLatin_N, chLatin_T, chLatin_I, + chLatin_T, chLatin_Y, chSpace, chNull +}; +static const XMLCh gNotation[] = +{ + chLatin_N, chLatin_D, chLatin_A, chLatin_T, chLatin_A, + chSpace, chDoubleQuote, chNull +}; + +class DOMPrintFormatTarget : public XMLFormatTarget +{ +public: + DOMPrintFormatTarget() {}; + ~DOMPrintFormatTarget() {}; + + // ----------------------------------------------------------------------- + // Implementations of the format target interface + // ----------------------------------------------------------------------- + + void writeChars(const XMLByte* const toWrite, + const unsigned int count, + XMLFormatter * const formatter) + { + // Surprisingly, Solaris was the only platform on which + // required the char* cast to print out the string correctly. + // Without the cast, it was printing the pointer value in hex. + // Quite annoying, considering every other platform printed + // the string with the explicit cast to char* below. + cout.write((char *) toWrite, (int) count); + }; + +private: + // ----------------------------------------------------------------------- + // Unimplemented methods. + // ----------------------------------------------------------------------- + DOMPrintFormatTarget(const DOMPrintFormatTarget& other); + void operator=(const DOMPrintFormatTarget& rhs); +}; + + +void +printinit() +{ + gEncodingName = XMLString::transcode("UTF-8"); + + DOMPrintFormatTarget* formatTarget = new DOMPrintFormatTarget(); + gFormatter = new XMLFormatter(gEncodingName, formatTarget, + XMLFormatter::NoEscapes, gUnRepFlags); + +} + +ostream& operator<< (ostream& target, const DOMString& s) +{ + char *p = s.transcode(); + target << p; + delete [] p; + return target; +} + + +XMLFormatter& operator<< (XMLFormatter& strm, const DOMString& s) +{ + unsigned int lent = s.length(); + + if (lent <= 0) + return strm; + + XMLCh* buf = new XMLCh[lent + 1]; + XMLString::copyNString(buf, s.rawBuffer(), lent); + buf[lent] = 0; + strm << buf; + delete [] buf; + return strm; +} + + +// --------------------------------------------------------------------------- +// ostream << DOM_Node +// +// Stream out a DOM node, and, recursively, all of its children. This +// function is the heart of writing a DOM tree out as XML source. Give it +// a document node and it will do the whole thing. +// --------------------------------------------------------------------------- +ostream& operator<<(ostream& target, DOM_Node& toWrite) +{ + // Get the name and value out for convenience + DOMString nodeName = toWrite.getNodeName(); + DOMString nodeValue = toWrite.getNodeValue(); + unsigned long lent = nodeValue.length(); + + switch (toWrite.getNodeType()) + { + case DOM_Node::TEXT_NODE: + { + gFormatter->formatBuf(nodeValue.rawBuffer(), + lent, XMLFormatter::CharEscapes); + break; + } + + + case DOM_Node::PROCESSING_INSTRUCTION_NODE : + { + *gFormatter << XMLFormatter::NoEscapes << gStartPI << nodeName; + if (lent > 0) + { + *gFormatter << chSpace << nodeValue; + } + *gFormatter << XMLFormatter::NoEscapes << gEndPI; + break; + } + + + case DOM_Node::DOCUMENT_NODE : + { + + DOM_Node child = toWrite.getFirstChild(); + while( child != 0) + { + target << child; + // add linefeed in requested output encoding + *gFormatter << chLF; + target << flush; + child = child.getNextSibling(); + } + break; + } + + + case DOM_Node::ELEMENT_NODE : + { + // The name has to be representable without any escapes + *gFormatter << XMLFormatter::NoEscapes + << chOpenAngle << nodeName; + + // Output the element start tag. + + // Output any attributes on this element + DOM_NamedNodeMap attributes = toWrite.getAttributes(); + int attrCount = attributes.getLength(); + for (int i = 0; i < attrCount; i++) + { + DOM_Node attribute = attributes.item(i); + + // + // Again the name has to be completely representable. But the + // attribute can have refs and requires the attribute style + // escaping. + // + *gFormatter << XMLFormatter::NoEscapes + << chSpace << attribute.getNodeName() + << chEqual << chDoubleQuote + << XMLFormatter::AttrEscapes + << attribute.getNodeValue() + << XMLFormatter::NoEscapes + << chDoubleQuote; + } + + // + // Test for the presence of children, which includes both + // text content and nested elements. + // + DOM_Node child = toWrite.getFirstChild(); + if (child != 0) + { + // There are children. Close start-tag, and output children. + // No escapes are legal here + *gFormatter << XMLFormatter::NoEscapes << chCloseAngle; + + while( child != 0) + { + target << child; + child = child.getNextSibling(); + } + + // + // Done with children. Output the end tag. + // + *gFormatter << XMLFormatter::NoEscapes << gEndElement + << nodeName << chCloseAngle; + } + else + { + // + // There were no children. Output the short form close of + // the element start tag, making it an empty-element tag. + // + *gFormatter << XMLFormatter::NoEscapes << chForwardSlash << chCloseAngle; + } + break; + } + + + case DOM_Node::ENTITY_REFERENCE_NODE: + { + DOM_Node child; +#if 0 + for (child = toWrite.getFirstChild(); + child != 0; + child = child.getNextSibling()) + { + target << child; + } +#else + // + // Instead of printing the refernece tree + // we'd output the actual text as it appeared in the xml file. + // This would be the case when -e option was chosen + // + *gFormatter << XMLFormatter::NoEscapes << chAmpersand + << nodeName << chSemiColon; +#endif + break; + } + + + case DOM_Node::CDATA_SECTION_NODE: + { + *gFormatter << XMLFormatter::NoEscapes << gStartCDATA + << nodeValue << gEndCDATA; + break; + } + + + case DOM_Node::COMMENT_NODE: + { + *gFormatter << XMLFormatter::NoEscapes << gStartComment + << nodeValue << gEndComment; + break; + } + + + case DOM_Node::DOCUMENT_TYPE_NODE: + { + DOM_DocumentType doctype = (DOM_DocumentType &)toWrite;; + + *gFormatter << XMLFormatter::NoEscapes << gStartDoctype + << nodeName; + + DOMString id = doctype.getPublicId(); + if (id != 0) + { + *gFormatter << XMLFormatter::NoEscapes << chSpace << gPublic + << id << chDoubleQuote; + id = doctype.getSystemId(); + if (id != 0) + { + *gFormatter << XMLFormatter::NoEscapes << chSpace + << chDoubleQuote << id << chDoubleQuote; + } + } + else + { + id = doctype.getSystemId(); + if (id != 0) + { + *gFormatter << XMLFormatter::NoEscapes << chSpace << gSystem + << id << chDoubleQuote; + } + } + + id = doctype.getInternalSubset(); + if (id !=0) + *gFormatter << XMLFormatter::NoEscapes << chOpenSquare + << id << chCloseSquare; + + *gFormatter << XMLFormatter::NoEscapes << chCloseAngle; + break; + } + + + case DOM_Node::ENTITY_NODE: + { + *gFormatter << XMLFormatter::NoEscapes << gStartEntity + << nodeName; + + DOMString id = ((DOM_Entity &)toWrite).getPublicId(); + if (id != 0) + *gFormatter << XMLFormatter::NoEscapes << gPublic + << id << chDoubleQuote; + + id = ((DOM_Entity &)toWrite).getSystemId(); + if (id != 0) + *gFormatter << XMLFormatter::NoEscapes << gSystem + << id << chDoubleQuote; + + id = ((DOM_Entity &)toWrite).getNotationName(); + if (id != 0) + *gFormatter << XMLFormatter::NoEscapes << gNotation + << id << chDoubleQuote; + + *gFormatter << XMLFormatter::NoEscapes << chCloseAngle << chLF; + + break; + } + + + case DOM_Node::XML_DECL_NODE: + { + DOMString str; + + *gFormatter << gXMLDecl1 << ((DOM_XMLDecl &)toWrite).getVersion(); + + *gFormatter << gXMLDecl2 << gEncodingName; + + str = ((DOM_XMLDecl &)toWrite).getStandalone(); + if (str != 0) + *gFormatter << gXMLDecl3 << str; + + *gFormatter << gXMLDecl4; + + break; + } + + + default: + cerr << "Unrecognized node type = " + << (long)toWrite.getNodeType() << endl; + } + return target; +} + + + |