Contents

Pattern.compile() compiles a regex string into a reusable Pattern; this is the expensive step and should be done once (e.g., as a static final constant). pattern.matcher(input) creates a Matcher which applies the pattern to a specific string. find() searches anywhere in the string; matches() requires the entire string to match; lookingAt() matches from the start but not necessarily the whole string.

import java.util.regex.*; // String.matches() — checks if the ENTIRE string matches (convenience, not reusable) boolean ok = "hello123".matches("[a-z]+\\d+"); // true // Double backslash \\ in Java string = single \ in regex // Pattern.compile() — compile once, use many times Pattern digits = Pattern.compile("\\d+"); Pattern words = Pattern.compile("\\b[A-Za-z]+\\b"); // Matcher.find() — finds next match anywhere in the string String text = "Order 123 shipped, invoice 456 pending"; Matcher m = digits.matcher(text); while (m.find()) { System.out.println("Found: " + m.group() + " at [" + m.start() + "," + m.end() + ")"); } // Found: 123 at [6,9) // Found: 456 at [27,30) // Matcher.matches() — the ENTIRE string must match Matcher full = Pattern.compile("\\d{3}").matcher("123"); System.out.println(full.matches()); // true Matcher notFull = Pattern.compile("\\d{3}").matcher("123abc"); System.out.println(notFull.matches()); // false // Matcher.lookingAt() — matches from beginning (not necessarily the whole string) Matcher beginning = Pattern.compile("\\d+").matcher("123abc"); System.out.println(beginning.lookingAt()); // true // Reset a matcher to reuse with a new input m.reset("Invoice 789 sent"); while (m.find()) System.out.println(m.group()); // 789

Parentheses in a regex create a capturing group; matcher.group(n) retrieves the nth captured text (group 0 is the entire match). Named groups (?<name>pattern) (Java 7+) are retrieved by name via matcher.group("name") — clearer and more maintainable than numbered groups. Non-capturing groups (?:pattern) group without incrementing the group counter.

// Numbered groups — (pattern) creates group 1, 2, ... String date = "2025-03-15"; Pattern datePattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})"); Matcher dm = datePattern.matcher(date); if (dm.matches()) { String year = dm.group(1); // "2025" String month = dm.group(2); // "03" String day = dm.group(3); // "15" System.out.printf("Year: %s, Month: %s, Day: %s%n", year, month, day); } // Named groups — (?<name>pattern) — Java 7+ Pattern named = Pattern.compile( "(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})"); Matcher nm = named.matcher(date); if (nm.matches()) { System.out.println(nm.group("year")); // "2025" System.out.println(nm.group("month")); // "03" System.out.println(nm.group("day")); // "15" } // Collecting all matches with groups String input = "Alice:90, Bob:85, Charlie:92"; Pattern kvp = Pattern.compile("(?<name>[A-Za-z]+):(?<score>\\d+)"); Matcher kvm = kvp.matcher(input); while (kvm.find()) { System.out.printf("%s scored %s%n", kvm.group("name"), kvm.group("score")); } // Alice scored 90 // Bob scored 85 // Charlie scored 92 // Non-capturing group — (?:pattern) — groups without capturing Pattern nc = Pattern.compile("(?:https?|ftp)://([\\w./-]+)"); Matcher ncm = nc.matcher("https://example.com/page"); if (ncm.find()) { System.out.println(ncm.group(1)); // "example.com/page" (group 1, not https) }

String.replaceAll(regex, replacement) replaces every match; replaceFirst() replaces only the first. Use $1, $2 (or ${name} for named groups) in the replacement string to refer back to captured groups. Java 9 added Matcher.replaceAll(Function) which lets you transform each match programmatically. split() splits on a regex delimiter; a limit argument controls how many tokens are produced.

// String.replaceAll — replace all matches (regex) String cleaned = "hello world java".replaceAll("\\s+", " "); System.out.println(cleaned); // "hello world java" // String.replaceFirst — replace first match only String first = "aaa bbb aaa".replaceFirst("aaa", "XXX"); System.out.println(first); // "XXX bbb aaa" // Back-references in replacement ($1 for group 1) String swapped = "2025-03-15".replaceAll("(\\d{4})-(\\d{2})-(\\d{2})", "$3/$2/$1"); System.out.println(swapped); // "15/03/2025" // Named group back-reference (${name}) String reordered = "2025-03-15".replaceAll( "(?<y>\\d{4})-(?<m>\\d{2})-(?<d>\\d{2})", "${d}/${m}/${y}"); System.out.println(reordered); // "15/03/2025" // Matcher.replaceAll with Function (Java 9+) String result = Pattern.compile("\\d+") .matcher("order 5 of 10 items") .replaceAll(mr -> String.valueOf(Integer.parseInt(mr.group()) * 2)); System.out.println(result); // "order 10 of 20 items" // String.split with regex String csv = "a,b,,c,d"; String[] parts = csv.split(","); // ["a","b","","c","d"] String[] limit = csv.split(",", 3); // ["a","b",",,c,d"] — max 3 parts String[] noEmpty = csv.split(",+"); // ["a","b","c","d"] — skip empty

Flags change how the regex engine interprets the pattern. CASE_INSENSITIVE makes letter matching case-insensitive; MULTILINE makes ^/$ match line boundaries instead of just the whole string; DOTALL makes . match newlines. Flags can be combined with | in Pattern.compile(), or embedded inline in the pattern as (?i), (?m), (?s).

// CASE_INSENSITIVE Pattern p = Pattern.compile("hello", Pattern.CASE_INSENSITIVE); System.out.println(p.matcher("HELLO WORLD").find()); // true // Inline flag (?i) — applies within the pattern System.out.println("HELLO".matches("(?i)hello")); // true // MULTILINE — ^ and $ match start/end of each line, not just the whole input String multi = "first\nsecond\nthird"; Pattern ml = Pattern.compile("^\\w+$", Pattern.MULTILINE); Matcher mm = ml.matcher(multi); while (mm.find()) System.out.println(mm.group()); // first, second, third // DOTALL — . matches newlines too (default: . does NOT match \n) Pattern dot = Pattern.compile("begin.*end", Pattern.DOTALL); System.out.println(dot.matcher("begin\nmiddle\nend").find()); // true // Combining flags with | Pattern combined = Pattern.compile("pattern", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); // LITERAL — treat the pattern as a literal string (no special chars) Pattern literal = Pattern.compile("a.b", Pattern.LITERAL); System.out.println(literal.matcher("a.b").matches()); // true System.out.println(literal.matcher("axb").matches()); // false (dot is literal) // Pattern.quote — escape a string for use as a literal in a regex String userInput = "1+1=2"; String safe = Pattern.quote(userInput); // "\\Q1+1=2\\E" "formula: 1+1=2".replaceAll(safe, "ANSWER"); // safe replacement

These patterns cover the most common real-world validation and extraction tasks. Declare them as static final constants so Pattern.compile() runs only once. Note that truly RFC-compliant email or URL validation requires more complex patterns — these simplified versions handle the vast majority of real-world input without being brittle.

// Email (simplified) Pattern EMAIL = Pattern.compile("^[\\w.+-]+@[\\w-]+\\.[a-zA-Z]{2,}$"); System.out.println(EMAIL.matcher("user@example.com").matches()); // true System.out.println(EMAIL.matcher("invalid@").matches()); // false // US phone number variants Pattern PHONE = Pattern.compile("\\(?\\d{3}\\)?[-.\\s]?\\d{3}[-.\\s]?\\d{4}"); System.out.println(PHONE.matcher("(555) 123-4567").find()); // true System.out.println(PHONE.matcher("555.123.4567").find()); // true // IPv4 address (0-255 per octet) Pattern IPV4 = Pattern.compile( "\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}" + "(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b"); // URL extraction Pattern URL = Pattern.compile("https?://[\\w./?=&%-]+"); Matcher um = URL.matcher("Visit https://cscode.io and https://example.com"); while (um.find()) System.out.println(um.group()); // Alphanumeric identifier Pattern ID = Pattern.compile("^[A-Za-z][A-Za-z0-9_]{2,29}$"); // Numbers (int or float) Pattern NUMBER = Pattern.compile("-?\\d+(\\.\\d+)?"); // Whitespace normalization String normalized = " Hello World ".trim().replaceAll("\\s+", " "); System.out.println(normalized); // "Hello World" Always compile Pattern as a static final constant when it is used repeatedly. Pattern.compile() is expensive — calling it inside a loop or on every request creates unnecessary overhead. Matcher objects are cheap and can be created per-call.