| Event method | Fired when |
startDocument() | Parsing begins |
endDocument() | Parsing completes |
startElement(uri, localName, qName, attrs) | Opening tag encountered |
endElement(uri, localName, qName) | Closing tag encountered |
characters(char[], start, length) | Text content between tags |
ignorableWhitespace(char[], start, length) | Whitespace between elements |
startPrefixMapping(prefix, uri) | Namespace declaration encountered |
fatalError(SAXParseException) | Fatal parse error |
SAX does not build any tree. There is no way to navigate backward or access sibling elements. Your handler must maintain its own state (e.g., a stack, current element name) to track context.
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import java.util.*;
public class BookSaxHandler extends DefaultHandler {
private final List<Map<String, String>> books = new ArrayList<>();
private Map<String, String> current;
private String currentTag;
private final StringBuilder text = new StringBuilder();
// Called when an opening tag is encountered
@Override
public void startElement(String uri, String localName, String qName,
Attributes attrs) {
currentTag = qName;
text.setLength(0); // reset text buffer
if ("book".equals(qName)) {
current = new HashMap<>();
current.put("id", attrs.getValue("id"));
current.put("genre", attrs.getValue("genre"));
}
}
// Called with text content — may be called multiple times for one element!
@Override
public void characters(char[] ch, int start, int length) {
text.append(ch, start, length);
}
// Called when a closing tag is encountered
@Override
public void endElement(String uri, String localName, String qName) {
if (current != null) {
switch (qName) {
case "title" -> current.put("title", text.toString().trim());
case "author" -> current.put("author", text.toString().trim());
case "year" -> current.put("year", text.toString().trim());
case "book" -> { books.add(current); current = null; }
}
}
text.setLength(0);
}
public List<Map<String, String>> getBooks() { return books; }
// Main: wire up the parser
public static void main(String[] args) throws Exception {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
SAXParser parser = factory.newSAXParser();
BookSaxHandler handler = new BookSaxHandler();
parser.parse(new java.io.File("library.xml"), handler);
handler.getBooks().forEach(b ->
System.out.printf("Book %s: %s by %s%n",
b.get("id"), b.get("title"), b.get("author")));
}
}
The characters() callback may be called multiple times for a single text node — always accumulate into a StringBuilder and read the result only in endElement().
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXParseException;
public class StrictErrorHandler implements ErrorHandler {
@Override
public void warning(SAXParseException e) {
System.err.printf("WARNING at line %d col %d: %s%n",
e.getLineNumber(), e.getColumnNumber(), e.getMessage());
}
@Override
public void error(SAXParseException e) throws SAXException {
// Recoverable error — throw to abort, or log to continue
throw e;
}
@Override
public void fatalError(SAXParseException e) throws SAXException {
// Non-recoverable — must throw
throw e;
}
}
// Register on the XMLReader
SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
XMLReader reader = saxParser.getXMLReader();
reader.setErrorHandler(new StrictErrorHandler());
reader.setContentHandler(new BookSaxHandler());
reader.parse(new InputSource(new FileInputStream("library.xml")));
SAX shines when you need to extract a small subset of data from a very large XML file. This example counts books without loading them all into memory.
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.*;
import javax.xml.parsers.*;
import java.io.*;
public class LargeXmlProcessor {
/** Count <book> elements in a potentially very large file. */
public static int countBooks(File xmlFile) throws Exception {
int[] count = {0}; // effectively-final wrapper
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
parser.parse(xmlFile, new DefaultHandler() {
@Override
public void startElement(String uri, String localName,
String qName, Attributes attrs) {
if ("book".equals(qName)) count[0]++;
}
});
return count[0];
}
/** Early exit: throw a special exception to stop parsing once target found. */
public static String findFirstTitle(InputStream xmlStream) throws Exception {
String[] found = {null};
try {
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
parser.parse(xmlStream, new DefaultHandler() {
boolean inTitle = false;
StringBuilder sb = new StringBuilder();
@Override
public void startElement(String u, String l, String q, Attributes a) {
inTitle = "title".equals(q);
sb.setLength(0);
}
@Override
public void characters(char[] ch, int start, int length) {
if (inTitle) sb.append(ch, start, length);
}
@Override
public void endElement(String u, String l, String q) throws SAXException {
if ("title".equals(q)) {
found[0] = sb.toString().trim();
throw new SAXException("STOP"); // abort parsing
}
}
});
} catch (SAXException e) {
if (!"STOP".equals(e.getMessage())) throw e;
}
return found[0];
}
}
Throwing a SAXException from a handler callback is the standard technique for early termination. The parser stops immediately and the exception propagates to the caller where you can catch it by its message.
SAXParserFactory factory = SAXParserFactory.newInstance();
// Prevent XXE attacks
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
SAXParser parser = factory.newSAXParser();
parser.parse(untrustedInput, new MyHandler());
| Aspect | SAX (push) | StAX (pull) |
| Control | Parser drives — calls your handler | You drive — call next() |
| Early exit | Throw exception | Simply stop calling next() |
| Write XML | No | Yes — XMLStreamWriter |
| Complexity | Medium — must track state manually | Lower — logic flows naturally |
| Performance | Slightly faster (no pull overhead) | Similar |
| Best for | Simple streaming extraction | Complex conditional parsing logic |