"""
Support for reading the Ant-style XML files often used for report JUnit results (and also for some non-JUnit test
execution engines).
"""
import pysys
from pysys.constants import *
from pysys.utils.fileutils import *
import logging
import calendar
import xml.etree.ElementTree as ET # Python 3.3+ will automatically use the fast C version if available
log = logging.getLogger('pysys.java.junitxml')
[docs]class JUnitXMLParser:
""" A fast, minimal parser for Ant-style JUnit XML files.
Note that there are a number of dialects of this file format with different handling of things like stdout
(per testsuite or per testcase) and timezone (UTC or local timezone) so check the details carefully if using
this for anything other than the JUnit 5 console launcher. If you need something more advanced there are
other Python-based JUnit XML parsers out there with more features."""
outcomeDetailsExcludeLinesRegex = r'^\t+(at (java[.]|sun[.]|org[.]junit|org.apache.tools.ant)|\.\.\. [0-9]+ more).*\n'
"""
A regular expression specifying lines that should be stripped out of the outcomeDetails stack traces.
"""
isTimestampLocalTime = None
"""
Set to True to force timestamp to be interpreted as local time (like JUnit5),
or False to force to interpret as UTC/GMT (like Ant). Default is to select based on test suite name.
"""
def __init__(self, path):
self.path = os.path.normpath(path)
# This approach allows subclasses to extend if they want to, and is also very efficient
self.unmarshaller = {
'testsuite': self._testsuite,
'testcase': self._testcase,
'system-out': self._systemOut,
'system-err': self._systemErr,
'failure': self._outcome,
'error': self._outcome,
'skipped': self._outcome,
}
[docs] def parse(self):
"""
Parses this file and returns a tuple of (testsuite: dict[str,obj], testcases: list[dict[str,obj]])
representing the contents of this file.
The testsuite dictionary contains keys:
* ``tests: int`` - total number of tests executed (depending on the dialect of the file format this may or
may not include skipped tests).
* ``durationSecs: float`` - The time elapsed while executing all tests.
* ``timestamp: float`` - The start time as a POSIX timestamp which can be passed to datetime.fromtimestamp().
* ``stdout: str`` (optional) - Any text written to stdout by all testcases in the suite
for dialects such as Ant which don't provide this per-testcase. (stripped of leading/trailing whitespace)
* ``stderr: str`` (optional) - Any text written to stderr by all testcases in the suite
for dialects such as Ant which don't provide this per-testcase. (stripped of leading/trailing whitespace)
* other keys vary depending on the dialect
Each testcase dictionary contains keys:
* ``classname: str`` - The qualified class name containing this testcase. May include $ if has a nested
class.
* ``name: str`` - The name of the testcase, typically a method name possibly with a suffix e.g. ``foo()[2]``.
* ``durationSecs: float`` - The time taken to execute this testcase.
* ``stdout: str`` (optional) - Any text written to stdout by the testcase (stripped of leading/trailing whitespace),
for dialects such as JUnit5 console launcher that provide this per testcase.
* ``stderr: str`` (optional) - Any text written to stderr by the testcase (stripped of leading/trailing whitespace),
for dialects such as JUnit5 console launcher that provide this per testcase
* ``outcome: str`` - passed/failure/error/skipped.
* ``outcomeType: str`` (optional) - Java class of the outcome type, if present.
* ``outcomeReason: str`` (except if test passed) - reason string or '' if not known.
* ``outcomeDetails: str`` (optional) - multi-line details string for the outcome, typically a stack trace.
To avoid excessive verbosity lines involving org.junit.* or java.* packages are excluded.
* ``outcomeDetailsFull: str`` (optional) - multi-line details string for the outcome, without exclusions.
* ``comparisonExpected/comparisonActual: str`` (optional) - actual and expected comparison values from the outcomeReason, if known. """
log.debug('Parsing JUnitXML: %s', self.path)
self.results = []
self.suite = {}
self.currenttest = {}
unmarshaller = self.unmarshaller
try:
with open(toLongPathSafe(self.path), 'rb') as fileptr:
nodepath = []
for action, elem in ET.iterparse(fileptr, events=['start','end']):
if action =='start':
nodepath.append(elem.tag)
else:
u = unmarshaller.get(elem.tag)
if u is not None: u(elem, nodepath)
nodepath.pop()
# This check is to make sure we've not missed anything while parsing;
# note that Ant seems to set tests to the total excluding skipped ones whereas JUnit5 launcher includes
# skipped ones; hence not doing an exact check here
assert len(self.results) >= self.suite['tests'], 'Suite contains %d tests but found %d testcase elements'%(
self.suite['tests'], len(self.results))
except Exception as ex: # pragma: no cover
raise Exception('Failed to parse JUnit XML %s: %s'%(self.path, ex))
# The order seems to be random, so sort it
self.results.sort(key=lambda r: (r.get('classname'), r.get('name')))
if len(self.results)==0:
if 'uniqueId' in self.suite:
# e.g. if you have JUnit 5 but not JUnit 4 (or vice-versa) you'll get an empty file; the above is a
# a way to detect that the suite is from the JUnit5 launcher rather than an actual Ant/JUnit4 test class/suite
log.debug('Ignoring suite "%s" with no results generated by the JUnit5 launcher', self.suite.get('name'))
else:
assert self.suite.get('skipped') >= 1, 'Test suite "%s" contains no results, but skipped=0: %s'%(self.suite.get('name'), self.path)
# Create a fake result so that it shows up. NB: JUnit 5 always generates a result so this is just for JUnit 4 and/or Ant
self.results.append({
"classname": self.suite['name'],
"durationSecs": 0.0,
"name": "class",
"outcome": "skipped",
"outcomeReason": "Test suite is skipped",
})
return self.suite, self.results
def _testsuite(self, elem, nodepath):
assert len(nodepath)==1, 'Only expecting testsuite elements as the root node, but got: %s'%nodepath
self.suite.update(elem.attrib)
# overwrite some with more specific values
for k in ['tests', 'failures', 'errors', 'skipped', 'aborted']: # errors vs aborted are used in different dialects
if k in self.suite: self.suite[k] = int(elem.attrib[k])
self.suite['durationSecs'] = float(self.suite.pop('time', '0'))
if self.isTimestampLocalTime is None:
self.isTimestampLocalTime = self.suite.get('name') in ['JUnit Jupiter', 'JUnit Vintage']
self.suite['timestamp'] = (time.mktime if self.isTimestampLocalTime else calendar.timegm)(time.strptime(elem.attrib['timestamp'], '%Y-%m-%dT%H:%M:%S'))
def _testcase(self, elem, nodepath):
currenttest = self.currenttest
currenttest['name'] = elem.attrib.get('name')
currenttest['classname'] = elem.attrib.get('classname')
currenttest['durationSecs'] = float(elem.attrib.get('time') or '0')
# Now we know the classname try to find the line in the stack trace from that class
classnameUnqualified = (currenttest['classname'] or '').split('.')[-1].split('$')[0]
for l in currenttest.get('outcomeDetails', '').split('\n'):
m = re.match(r'[ \t]+at .*[(]%s[^:]*:([0-9]+)'%classnameUnqualified, l)
if m is not None:
currenttest['testFileLine'] = int(m.group(1))
break
currenttest.setdefault('outcome', 'passed')
self.results.append(currenttest)
self.currenttest = {}
def _systemErr(self, elem, nodepath):
text = elem.text.strip()
if not text: return
item = self.suite if nodepath[-2]=='testsuite' else self.currenttest
if 'stderr' in item: text = item['stderr']+'\n'+text
item['stderr'] = text
def _systemOut(self, elem, nodepath):
text = elem.text.strip()
if not text: return
item = self.suite if nodepath[-2]=='testsuite' else self.currenttest
# instead of real system output it could contain special output from the JUnit 5 Jupiter engine
m = re.match('unique-id: (.*)\ndisplay-name: (.+)', text, flags=re.MULTILINE)
if m:
item['uniqueId'] = m.group(1)
item['displayName'] = m.group(2)
return
if 'stdout' in item: text = item['stdout']+'\n'+text
item['stdout'] = text
def _outcome(self, elem, nodepath):
t = self.currenttest
t['outcome'] = elem.tag
if elem.attrib.get('type'): t['outcomeType'] = elem.attrib['type']
if elem.attrib.get('message'):
t['outcomeReason'] = elem.attrib['message'].strip()
m = re.match('expected: ?<(.*)> but was: ?<(.*)>$', t['outcomeReason'])
if m is not None and m.group(1)!=m.group(2):
t['comparisonExpected'], t['comparisonActual'] = m.group(1), m.group(2)
if t['outcome'] == 'error' and t.get('outcomeType') and t['outcomeType'] not in t['outcomeReason']:
t['outcomeReason'] = '%s: %s'%(t['outcomeType'], t['outcomeReason'])
if elem.text:
details = elem.text.lstrip()
if details:
t['outcomeDetailsFull'] = details
t['outcomeDetails'] = re.sub(self.outcomeDetailsExcludeLinesRegex, '', details, flags=re.MULTILINE).strip()
else:
t['outcomeReason'] = elem.text.strip()