Spaces:
Running
Running
| """An XML Reader is the SAX 2 name for an XML parser. XML Parsers | |
| should be based on this code. """ | |
| from . import handler | |
| from ._exceptions import SAXNotSupportedException, SAXNotRecognizedException | |
| # ===== XMLREADER ===== | |
| class XMLReader: | |
| """Interface for reading an XML document using callbacks. | |
| XMLReader is the interface that an XML parser's SAX2 driver must | |
| implement. This interface allows an application to set and query | |
| features and properties in the parser, to register event handlers | |
| for document processing, and to initiate a document parse. | |
| All SAX interfaces are assumed to be synchronous: the parse | |
| methods must not return until parsing is complete, and readers | |
| must wait for an event-handler callback to return before reporting | |
| the next event.""" | |
| def __init__(self): | |
| self._cont_handler = handler.ContentHandler() | |
| self._dtd_handler = handler.DTDHandler() | |
| self._ent_handler = handler.EntityResolver() | |
| self._err_handler = handler.ErrorHandler() | |
| def parse(self, source): | |
| "Parse an XML document from a system identifier or an InputSource." | |
| raise NotImplementedError("This method must be implemented!") | |
| def getContentHandler(self): | |
| "Returns the current ContentHandler." | |
| return self._cont_handler | |
| def setContentHandler(self, handler): | |
| "Registers a new object to receive document content events." | |
| self._cont_handler = handler | |
| def getDTDHandler(self): | |
| "Returns the current DTD handler." | |
| return self._dtd_handler | |
| def setDTDHandler(self, handler): | |
| "Register an object to receive basic DTD-related events." | |
| self._dtd_handler = handler | |
| def getEntityResolver(self): | |
| "Returns the current EntityResolver." | |
| return self._ent_handler | |
| def setEntityResolver(self, resolver): | |
| "Register an object to resolve external entities." | |
| self._ent_handler = resolver | |
| def getErrorHandler(self): | |
| "Returns the current ErrorHandler." | |
| return self._err_handler | |
| def setErrorHandler(self, handler): | |
| "Register an object to receive error-message events." | |
| self._err_handler = handler | |
| def setLocale(self, locale): | |
| """Allow an application to set the locale for errors and warnings. | |
| SAX parsers are not required to provide localization for errors | |
| and warnings; if they cannot support the requested locale, | |
| however, they must raise a SAX exception. Applications may | |
| request a locale change in the middle of a parse.""" | |
| raise SAXNotSupportedException("Locale support not implemented") | |
| def getFeature(self, name): | |
| "Looks up and returns the state of a SAX2 feature." | |
| raise SAXNotRecognizedException("Feature '%s' not recognized" % name) | |
| def setFeature(self, name, state): | |
| "Sets the state of a SAX2 feature." | |
| raise SAXNotRecognizedException("Feature '%s' not recognized" % name) | |
| def getProperty(self, name): | |
| "Looks up and returns the value of a SAX2 property." | |
| raise SAXNotRecognizedException("Property '%s' not recognized" % name) | |
| def setProperty(self, name, value): | |
| "Sets the value of a SAX2 property." | |
| raise SAXNotRecognizedException("Property '%s' not recognized" % name) | |
| class IncrementalParser(XMLReader): | |
| """This interface adds three extra methods to the XMLReader | |
| interface that allow XML parsers to support incremental | |
| parsing. Support for this interface is optional, since not all | |
| underlying XML parsers support this functionality. | |
| When the parser is instantiated it is ready to begin accepting | |
| data from the feed method immediately. After parsing has been | |
| finished with a call to close the reset method must be called to | |
| make the parser ready to accept new data, either from feed or | |
| using the parse method. | |
| Note that these methods must _not_ be called during parsing, that | |
| is, after parse has been called and before it returns. | |
| By default, the class also implements the parse method of the XMLReader | |
| interface using the feed, close and reset methods of the | |
| IncrementalParser interface as a convenience to SAX 2.0 driver | |
| writers.""" | |
| def __init__(self, bufsize=2**16): | |
| self._bufsize = bufsize | |
| XMLReader.__init__(self) | |
| def parse(self, source): | |
| from . import saxutils | |
| source = saxutils.prepare_input_source(source) | |
| self.prepareParser(source) | |
| file = source.getCharacterStream() | |
| if file is None: | |
| file = source.getByteStream() | |
| buffer = file.read(self._bufsize) | |
| while buffer: | |
| self.feed(buffer) | |
| buffer = file.read(self._bufsize) | |
| self.close() | |
| def feed(self, data): | |
| """This method gives the raw XML data in the data parameter to | |
| the parser and makes it parse the data, emitting the | |
| corresponding events. It is allowed for XML constructs to be | |
| split across several calls to feed. | |
| feed may raise SAXException.""" | |
| raise NotImplementedError("This method must be implemented!") | |
| def prepareParser(self, source): | |
| """This method is called by the parse implementation to allow | |
| the SAX 2.0 driver to prepare itself for parsing.""" | |
| raise NotImplementedError("prepareParser must be overridden!") | |
| def close(self): | |
| """This method is called when the entire XML document has been | |
| passed to the parser through the feed method, to notify the | |
| parser that there are no more data. This allows the parser to | |
| do the final checks on the document and empty the internal | |
| data buffer. | |
| The parser will not be ready to parse another document until | |
| the reset method has been called. | |
| close may raise SAXException.""" | |
| raise NotImplementedError("This method must be implemented!") | |
| def reset(self): | |
| """This method is called after close has been called to reset | |
| the parser so that it is ready to parse new documents. The | |
| results of calling parse or feed after close without calling | |
| reset are undefined.""" | |
| raise NotImplementedError("This method must be implemented!") | |
| # ===== LOCATOR ===== | |
| class Locator: | |
| """Interface for associating a SAX event with a document | |
| location. A locator object will return valid results only during | |
| calls to DocumentHandler methods; at any other time, the | |
| results are unpredictable.""" | |
| def getColumnNumber(self): | |
| "Return the column number where the current event ends." | |
| return -1 | |
| def getLineNumber(self): | |
| "Return the line number where the current event ends." | |
| return -1 | |
| def getPublicId(self): | |
| "Return the public identifier for the current event." | |
| return None | |
| def getSystemId(self): | |
| "Return the system identifier for the current event." | |
| return None | |
| # ===== INPUTSOURCE ===== | |
| class InputSource: | |
| """Encapsulation of the information needed by the XMLReader to | |
| read entities. | |
| This class may include information about the public identifier, | |
| system identifier, byte stream (possibly with character encoding | |
| information) and/or the character stream of an entity. | |
| Applications will create objects of this class for use in the | |
| XMLReader.parse method and for returning from | |
| EntityResolver.resolveEntity. | |
| An InputSource belongs to the application, the XMLReader is not | |
| allowed to modify InputSource objects passed to it from the | |
| application, although it may make copies and modify those.""" | |
| def __init__(self, system_id = None): | |
| self.__system_id = system_id | |
| self.__public_id = None | |
| self.__encoding = None | |
| self.__bytefile = None | |
| self.__charfile = None | |
| def setPublicId(self, public_id): | |
| "Sets the public identifier of this InputSource." | |
| self.__public_id = public_id | |
| def getPublicId(self): | |
| "Returns the public identifier of this InputSource." | |
| return self.__public_id | |
| def setSystemId(self, system_id): | |
| "Sets the system identifier of this InputSource." | |
| self.__system_id = system_id | |
| def getSystemId(self): | |
| "Returns the system identifier of this InputSource." | |
| return self.__system_id | |
| def setEncoding(self, encoding): | |
| """Sets the character encoding of this InputSource. | |
| The encoding must be a string acceptable for an XML encoding | |
| declaration (see section 4.3.3 of the XML recommendation). | |
| The encoding attribute of the InputSource is ignored if the | |
| InputSource also contains a character stream.""" | |
| self.__encoding = encoding | |
| def getEncoding(self): | |
| "Get the character encoding of this InputSource." | |
| return self.__encoding | |
| def setByteStream(self, bytefile): | |
| """Set the byte stream (a Python file-like object which does | |
| not perform byte-to-character conversion) for this input | |
| source. | |
| The SAX parser will ignore this if there is also a character | |
| stream specified, but it will use a byte stream in preference | |
| to opening a URI connection itself. | |
| If the application knows the character encoding of the byte | |
| stream, it should set it with the setEncoding method.""" | |
| self.__bytefile = bytefile | |
| def getByteStream(self): | |
| """Get the byte stream for this input source. | |
| The getEncoding method will return the character encoding for | |
| this byte stream, or None if unknown.""" | |
| return self.__bytefile | |
| def setCharacterStream(self, charfile): | |
| """Set the character stream for this input source. (The stream | |
| must be a Python 2.0 Unicode-wrapped file-like that performs | |
| conversion to Unicode strings.) | |
| If there is a character stream specified, the SAX parser will | |
| ignore any byte stream and will not attempt to open a URI | |
| connection to the system identifier.""" | |
| self.__charfile = charfile | |
| def getCharacterStream(self): | |
| "Get the character stream for this input source." | |
| return self.__charfile | |
| # ===== ATTRIBUTESIMPL ===== | |
| class AttributesImpl: | |
| def __init__(self, attrs): | |
| """Non-NS-aware implementation. | |
| attrs should be of the form {name : value}.""" | |
| self._attrs = attrs | |
| def getLength(self): | |
| return len(self._attrs) | |
| def getType(self, name): | |
| return "CDATA" | |
| def getValue(self, name): | |
| return self._attrs[name] | |
| def getValueByQName(self, name): | |
| return self._attrs[name] | |
| def getNameByQName(self, name): | |
| if name not in self._attrs: | |
| raise KeyError(name) | |
| return name | |
| def getQNameByName(self, name): | |
| if name not in self._attrs: | |
| raise KeyError(name) | |
| return name | |
| def getNames(self): | |
| return list(self._attrs.keys()) | |
| def getQNames(self): | |
| return list(self._attrs.keys()) | |
| def __len__(self): | |
| return len(self._attrs) | |
| def __getitem__(self, name): | |
| return self._attrs[name] | |
| def keys(self): | |
| return list(self._attrs.keys()) | |
| def __contains__(self, name): | |
| return name in self._attrs | |
| def get(self, name, alternative=None): | |
| return self._attrs.get(name, alternative) | |
| def copy(self): | |
| return self.__class__(self._attrs) | |
| def items(self): | |
| return list(self._attrs.items()) | |
| def values(self): | |
| return list(self._attrs.values()) | |
| # ===== ATTRIBUTESNSIMPL ===== | |
| class AttributesNSImpl(AttributesImpl): | |
| def __init__(self, attrs, qnames): | |
| """NS-aware implementation. | |
| attrs should be of the form {(ns_uri, lname): value, ...}. | |
| qnames of the form {(ns_uri, lname): qname, ...}.""" | |
| self._attrs = attrs | |
| self._qnames = qnames | |
| def getValueByQName(self, name): | |
| for (nsname, qname) in self._qnames.items(): | |
| if qname == name: | |
| return self._attrs[nsname] | |
| raise KeyError(name) | |
| def getNameByQName(self, name): | |
| for (nsname, qname) in self._qnames.items(): | |
| if qname == name: | |
| return nsname | |
| raise KeyError(name) | |
| def getQNameByName(self, name): | |
| return self._qnames[name] | |
| def getQNames(self): | |
| return list(self._qnames.values()) | |
| def copy(self): | |
| return self.__class__(self._attrs, self._qnames) | |
| def _test(): | |
| XMLReader() | |
| IncrementalParser() | |
| Locator() | |
| if __name__ == "__main__": | |
| _test() | |