DOM Parser in Java — Reading & Writing XML Trees

Parser	Memory	Access	Modify	Best for
DOM	Entire document in RAM	Random — navigate any node	Yes	Small-to-medium docs needing random access or modification
SAX	O(1) — streaming	Forward-only events	No	Large documents, read-only extraction
StAX	O(1) — streaming	Forward-only cursor	No (read) / Yes (write)	Large documents, pull-style control

import javax.xml.parsers.*; import org.w3c.dom.*; import java.io.*; public class DomReadExample { public static void main(String[] args) throws Exception { String xml = """ <library> <book id="1" genre="fiction"> <title>Clean Code</title> <author>Robert C. Martin</author> <year>2008</year> </book> <book id="2" genre="technical"> <title>Effective Java</title> <author>Joshua Bloch</author> <year>2018</year> </book> </library> """; // 1. Obtain a DocumentBuilder (thread-unsafe — create one per thread or synchronise) DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); // enable namespace support DocumentBuilder builder = factory.newDocumentBuilder(); // 2. Parse from string Document doc = builder.parse(new ByteArrayInputStream(xml.getBytes())); // 3. Normalise text nodes (merges adjacent text nodes) doc.getDocumentElement().normalize(); // 4. Read the root element Element root = doc.getDocumentElement(); System.out.println("Root: " + root.getTagName()); // library // 5. Iterate child elements NodeList books = root.getElementsByTagName("book"); for (int i = 0; i < books.getLength(); i++) { Element book = (Element) books.item(i); String id = book.getAttribute("id"); String title = book.getElementsByTagName("title").item(0).getTextContent(); String author = book.getElementsByTagName("author").item(0).getTextContent(); System.out.printf("Book %s: %s by %s%n", id, title, author); } } }

DocumentBuilderFactory and DocumentBuilder are not thread-safe. Create a new DocumentBuilder per thread, or use a pool. The factory.newInstance() call is thread-safe and can be cached as a static field.

Element root = doc.getDocumentElement(); // Get direct children (includes text nodes with whitespace!) NodeList children = root.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { Element el = (Element) child; System.out.println("Tag: " + el.getTagName()); } } // Get all descendants with a tag name NodeList allBooks = doc.getElementsByTagName("book"); // Navigate up Node parent = someNode.getParentNode(); // Navigate siblings Node next = someNode.getNextSibling(); Node prev = someNode.getPreviousSibling(); // Read attributes NamedNodeMap attrs = element.getAttributes(); for (int i = 0; i < attrs.getLength(); i++) { Attr attr = (Attr) attrs.item(i); System.out.println(attr.getName() + " = " + attr.getValue()); } // Text content of a node (includes all descendant text) String text = element.getTextContent(); // Read a specific attribute String genre = element.getAttribute("genre"); boolean hasAttr = element.hasAttribute("id");

import javax.xml.parsers.*; import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.*; public class DomWriteExample { public static void main(String[] args) throws Exception { DocumentBuilder builder = DocumentBuilderFactory.newInstance() .newDocumentBuilder(); Document doc = builder.newDocument(); // Create root element Element root = doc.createElement("library"); doc.appendChild(root); // Helper to create a book element Element book1 = createBook(doc, "1", "fiction", "Clean Code", "Robert C. Martin", "2008"); Element book2 = createBook(doc, "2", "technical", "Effective Java", "Joshua Bloch", "2018"); root.appendChild(book1); root.appendChild(book2); // Serialise to string Transformer tf = TransformerFactory.newInstance().newTransformer(); tf.setOutputProperty(OutputKeys.INDENT, "yes"); tf.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); tf.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); StringWriter sw = new StringWriter(); tf.transform(new DOMSource(doc), new StreamResult(sw)); System.out.println(sw); } private static Element createBook(Document doc, String id, String genre, String title, String author, String year) { Element book = doc.createElement("book"); book.setAttribute("id", id); book.setAttribute("genre", genre); Element titleEl = doc.createElement("title"); titleEl.setTextContent(title); book.appendChild(titleEl); Element authorEl = doc.createElement("author"); authorEl.setTextContent(author); book.appendChild(authorEl); Element yearEl = doc.createElement("year"); yearEl.setTextContent(year); book.appendChild(yearEl); return book; } }

Document doc = builder.parse(xmlFile); // Add a new child element Element root = doc.getDocumentElement(); Element newBook = doc.createElement("book"); newBook.setAttribute("id", "3"); newBook.setTextContent("New Book"); root.appendChild(newBook); // Modify an existing element's text NodeList titles = doc.getElementsByTagName("title"); titles.item(0).setTextContent("Updated Title"); // Add/change an attribute Element book = (Element) doc.getElementsByTagName("book").item(0); book.setAttribute("available", "true"); // Remove a node Node toRemove = doc.getElementsByTagName("book").item(1); toRemove.getParentNode().removeChild(toRemove); // Replace a node Element replacement = doc.createElement("book"); replacement.setTextContent("Replacement"); root.replaceChild(replacement, root.getFirstChild()); // Clone a node (deep = true clones children too) Node clone = book.cloneNode(true); root.appendChild(clone);

// Enable namespace awareness BEFORE creating the builder DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(xmlFile); // Access namespace-aware elements NodeList items = doc.getElementsByTagNameNS( "http://example.com/books", "book"); Element el = (Element) items.item(0); System.out.println("Local name : " + el.getLocalName()); // book System.out.println("Namespace : " + el.getNamespaceURI()); // http://example.com/books System.out.println("Prefix : " + el.getPrefix()); // e.g., "bk" // Create namespace-aware elements Element nsEl = doc.createElementNS("http://example.com/books", "bk:book"); nsEl.setAttributeNS("http://example.com/books", "bk:id", "42");

If you call getElementsByTagName() without setting factory.setNamespaceAware(true), namespace-prefixed elements may not be found. Always enable namespace awareness when your XML uses namespaces.

By default, Java's XML parsers may follow external entity references (XXE), which can expose server files or trigger SSRF. Always disable external entities in production.

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); // Disable DOCTYPE declarations entirely (most restrictive) factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); // Or selectively disable external entities factory.setFeature("http://xml.org/sax/features/external-general-entities", false); factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); factory.setXIncludeAware(false); factory.setExpandEntityReferences(false); DocumentBuilder builder = factory.newDocumentBuilder();

XXE (XML External Entity) injection is in the OWASP Top 10. Any application that parses untrusted XML without disabling external entities is potentially vulnerable to reading arbitrary files or causing server-side request forgery.