Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
# Create pickled datafile
source = open("edugain.xml", "r", encoding="utf-8")
sink = open("edugain.pkl", "w")

t = objectify.parse(source)
p = pickle.dumps(t).decode('latin1')
sink.write(p)

# Read pickled object back in pyFF
def parse_xml
	return pickle.loads(io.encode('latin1'))

In metadata parser:
t = parse_xml(content) #Instead of parse_xml(unicode_stream(content))

xml.sax etree.ElementTree parser

This code uses the event based xml.sax parser to create an etree.ElementTree object for pyFF. As of the moment of writing, pyFF refuses validate the result, but it produces correct metadata?

Code Block
import xml.sax
class XML(xml.sax.handler.ContentHandler):
  def __init__(self):
    self.current = etree.Element("root")
    self.nsmap = {}
  def startElement(self, name, attrs):
    attributes = {}
    for key, value in attrs.items():
        key = key.split(':')
        if len(key) == 2 and key[0] == 'xmlns':
            self.nsmap[key[-1]] = value
        else:
            attributes[key[-1]] = value
    name = name.split(':')
    if len(name) == 2:
        name = f"{{{ self.nsmap.get(name[0], name[0]) }}}{ name[-1] }"
    else:
        name = name[-1]
    self.current = etree.SubElement(self.current, name, attributes, nsmap=self.nsmap)
  def endElement(self, name):
    self.current = self.current.getparent()
  def characters(self, data):
    d = data.strip()
    if d:
      self.current.text = d

def parse_xml(io, base_url=None):
    parser = xml.sax.make_parser()
    handler = XML()
    parser.setContentHandler(handler)
    parser.parse(io)
    return etree.ElementTree(handler.current[0])