| Parser | Memory | Access | Modify | Best for |
| DOM | Entire document in RAM | Random — navigate any node | Yes | Small-to-medium docs needing random access or modification |
| SAX | O(1) — streaming | Forward-only events | No | Large documents, read-only extraction |
| StAX | O(1) — streaming | Forward-only cursor | No (read) / Yes (write) | Large documents, pull-style control |
import javax.xml.parsers.*;
import org.w3c.dom.*;
import java.io.*;
public class DomReadExample {
public static void main(String[] args) throws Exception {
String xml = """
<library>
<book id="1" genre="fiction">
<title>Clean Code</title>
<author>Robert C. Martin</author>
<year>2008</year>
</book>
<book id="2" genre="technical">
<title>Effective Java</title>
<author>Joshua Bloch</author>
<year>2018</year>
</book>
</library>
""";
// 1. Obtain a DocumentBuilder (thread-unsafe — create one per thread or synchronise)
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true); // enable namespace support
DocumentBuilder builder = factory.newDocumentBuilder();
// 2. Parse from string
Document doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
// 3. Normalise text nodes (merges adjacent text nodes)
doc.getDocumentElement().normalize();
// 4. Read the root element
Element root = doc.getDocumentElement();
System.out.println("Root: " + root.getTagName()); // library
// 5. Iterate child elements
NodeList books = root.getElementsByTagName("book");
for (int i = 0; i < books.getLength(); i++) {
Element book = (Element) books.item(i);
String id = book.getAttribute("id");
String title = book.getElementsByTagName("title").item(0).getTextContent();
String author = book.getElementsByTagName("author").item(0).getTextContent();
System.out.printf("Book %s: %s by %s%n", id, title, author);
}
}
}
DocumentBuilderFactory and DocumentBuilder are not thread-safe. Create a new DocumentBuilder per thread, or use a pool. The factory.newInstance() call is thread-safe and can be cached as a static field.
Element root = doc.getDocumentElement();
// Get direct children (includes text nodes with whitespace!)
NodeList children = root.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeType() == Node.ELEMENT_NODE) {
Element el = (Element) child;
System.out.println("Tag: " + el.getTagName());
}
}
// Get all descendants with a tag name
NodeList allBooks = doc.getElementsByTagName("book");
// Navigate up
Node parent = someNode.getParentNode();
// Navigate siblings
Node next = someNode.getNextSibling();
Node prev = someNode.getPreviousSibling();
// Read attributes
NamedNodeMap attrs = element.getAttributes();
for (int i = 0; i < attrs.getLength(); i++) {
Attr attr = (Attr) attrs.item(i);
System.out.println(attr.getName() + " = " + attr.getValue());
}
// Text content of a node (includes all descendant text)
String text = element.getTextContent();
// Read a specific attribute
String genre = element.getAttribute("genre");
boolean hasAttr = element.hasAttribute("id");
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.*;
public class DomWriteExample {
public static void main(String[] args) throws Exception {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
Document doc = builder.newDocument();
// Create root element
Element root = doc.createElement("library");
doc.appendChild(root);
// Helper to create a book element
Element book1 = createBook(doc, "1", "fiction", "Clean Code", "Robert C. Martin", "2008");
Element book2 = createBook(doc, "2", "technical", "Effective Java", "Joshua Bloch", "2018");
root.appendChild(book1);
root.appendChild(book2);
// Serialise to string
Transformer tf = TransformerFactory.newInstance().newTransformer();
tf.setOutputProperty(OutputKeys.INDENT, "yes");
tf.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
tf.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
StringWriter sw = new StringWriter();
tf.transform(new DOMSource(doc), new StreamResult(sw));
System.out.println(sw);
}
private static Element createBook(Document doc, String id, String genre,
String title, String author, String year) {
Element book = doc.createElement("book");
book.setAttribute("id", id);
book.setAttribute("genre", genre);
Element titleEl = doc.createElement("title");
titleEl.setTextContent(title);
book.appendChild(titleEl);
Element authorEl = doc.createElement("author");
authorEl.setTextContent(author);
book.appendChild(authorEl);
Element yearEl = doc.createElement("year");
yearEl.setTextContent(year);
book.appendChild(yearEl);
return book;
}
}
Document doc = builder.parse(xmlFile);
// Add a new child element
Element root = doc.getDocumentElement();
Element newBook = doc.createElement("book");
newBook.setAttribute("id", "3");
newBook.setTextContent("New Book");
root.appendChild(newBook);
// Modify an existing element's text
NodeList titles = doc.getElementsByTagName("title");
titles.item(0).setTextContent("Updated Title");
// Add/change an attribute
Element book = (Element) doc.getElementsByTagName("book").item(0);
book.setAttribute("available", "true");
// Remove a node
Node toRemove = doc.getElementsByTagName("book").item(1);
toRemove.getParentNode().removeChild(toRemove);
// Replace a node
Element replacement = doc.createElement("book");
replacement.setTextContent("Replacement");
root.replaceChild(replacement, root.getFirstChild());
// Clone a node (deep = true clones children too)
Node clone = book.cloneNode(true);
root.appendChild(clone);
// Enable namespace awareness BEFORE creating the builder
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(xmlFile);
// Access namespace-aware elements
NodeList items = doc.getElementsByTagNameNS(
"http://example.com/books", "book");
Element el = (Element) items.item(0);
System.out.println("Local name : " + el.getLocalName()); // book
System.out.println("Namespace : " + el.getNamespaceURI()); // http://example.com/books
System.out.println("Prefix : " + el.getPrefix()); // e.g., "bk"
// Create namespace-aware elements
Element nsEl = doc.createElementNS("http://example.com/books", "bk:book");
nsEl.setAttributeNS("http://example.com/books", "bk:id", "42");
If you call getElementsByTagName() without setting factory.setNamespaceAware(true), namespace-prefixed elements may not be found. Always enable namespace awareness when your XML uses namespaces.
By default, Java's XML parsers may follow external entity references (XXE), which can expose server files or trigger SSRF. Always disable external entities in production.
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// Disable DOCTYPE declarations entirely (most restrictive)
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
// Or selectively disable external entities
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
DocumentBuilder builder = factory.newDocumentBuilder();
XXE (XML External Entity) injection is in the OWASP Top 10. Any application that parses untrusted XML without disabling external entities is potentially vulnerable to reading arbitrary files or causing server-side request forgery.