...
Another way of profiling pyFF's memory usage is just following RES in top or htop for a long-running pyFF process, that has a 60s refresh interval. I normally use this pipeline
| Code Block |
|---|
- when update:
- load:
- edugain.xml
- when request:
- select:
- pipe:
- when accept application/samlmetadata+xml application/xml:
- first
- finalize:
cacheDuration: PT12H
validUntil: P10D
- sign:
key: cert/sign.key
cert: cert/sign.crt
- emit application/samlmetadata+xml
- break
- when accept application/json:
- discojson
- emit application/json
- break |
to feed the edugain feed that has been dowloaded using
...
| Code Block |
|---|
from lxml import etree, objectify
import pickle
# Create pickled datafile
source = open("edugain.xml", "r", encoding="utf-8")
sink = open("edugain.pkl", "w")
t = objectify.parse(source)
p = pickle.dumps(t).decode('latin1')
sink.write(p)
# Read pickled object back in pyFF
def parse_xml
return pickle.loads(io.encode('latin1'))
In metadata parser:
t = parse_xml(content) #Instead of parse_xml(unicode_stream(content)) |
Using un/pickling, pyFF starts out using ~800Mb of RES that slowly extends to a steady 1.2-1.5G.
xml.sax etree.ElementTree parser
...
| Code Block |
|---|
import xml.sax
class XML(xml.sax.handler.ContentHandler):
def __init__(self):
self.current = etree.Element("root")
self.nsmap = {}
def startElement(self, name, attrs):
attributes = {}
for key, value in attrs.items():
key = key.split(':')
if len(key) == 2 and key[0] == 'xmlns':
self.nsmap[key[-1]] = value
else:
attributes[key[-1]] = value
name = name.split(':')
if len(name) == 2:
name = f"{{{ self.nsmap.get(name[0], name[0]) }}}{ name[-1] }"
else:
name = name[-1]
self.current = etree.SubElement(self.current, name, attributes, nsmap=self.nsmap)
def endElement(self, name):
self.current = self.current.getparent()
def characters(self, data):
d = data.strip()
if d:
self.current.text = d
def parse_xml(io, base_url=None):
parser = xml.sax.make_parser()
handler = XML()
parser.setContentHandler(handler)
parser.parse(io)
return etree.ElementTree(handler.current[0]) |
Using xml.sax parser pyFF starts out using ~800Mb of RES that slowly extends to a steady 1.2-1.5G.