X4O: Fixed html void element auto folding to allowed tags only

This commit is contained in:
Willem Cazander 2025-11-08 16:12:01 +01:00
parent c71f8c0a82
commit 045f6d07f6
2 changed files with 77 additions and 32 deletions

View file

@ -24,7 +24,10 @@ package org.x4o.sax3;
import java.io.IOException; import java.io.IOException;
import java.io.Writer; import java.io.Writer;
import java.util.Arrays;
import java.util.Calendar; import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import org.x4o.sax3.io.ContentCloseable; import org.x4o.sax3.io.ContentCloseable;
import org.x4o.sax3.io.SAX3PropertyConfig; import org.x4o.sax3.io.SAX3PropertyConfig;
@ -40,8 +43,11 @@ import org.xml.sax.helpers.AttributesImpl;
*/ */
public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3WriterXml> { public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3WriterXml> {
static public final List<String> HTML_VOID_TAGS = Collections.unmodifiableList(Tag.valuesVoidElement());
public SAX3WriterHtml(Writer out, String encoding) { public SAX3WriterHtml(Writer out, String encoding) {
super(new SAX3WriterXml(out, encoding), "", SAX3XMLConstants.NULL_NS_URI); super(new SAX3WriterXml(out, encoding), "", SAX3XMLConstants.NULL_NS_URI);
getPropertyConfig().setProperty(SAX3WriterXml.OUTPUT_FOLD_EMPTY_TAGS, HTML_VOID_TAGS);
} }
public SAX3PropertyConfig getPropertyConfig() { public SAX3PropertyConfig getPropertyConfig() {
@ -247,7 +253,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
/* Deprecated TAGS */ /* Deprecated TAGS */
frameset, frameset,
frame, frame(true),
noframes, noframes,
tt, tt,
font, font,
@ -259,14 +265,17 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
acronym, acronym,
applet, applet,
iframe, iframe,
menuitem(true),
keygen(true),
command(true),
/* HTML 4 TAGS */ /* HTML 4 TAGS */
html, html,
head, head,
title, title,
meta, meta(true),
link, link(true),
base, base(true),
body, body,
script, script,
style, style,
@ -282,9 +291,9 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
span, span,
p, p,
pre, pre,
img, img(true),
hr, hr(true),
br, br(true),
b, b,
em, em,
strong, strong,
@ -308,12 +317,12 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
abbr, abbr,
address, address,
area, area(true),
bdo, bdo,
blockquote, blockquote,
cite, cite,
code, code,
col, col(true),
colgroup, colgroup,
del, del,
dfn, dfn,
@ -324,7 +333,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
map, map,
menu, menu,
object, object,
param, param(true),
optgroup, optgroup,
q, q,
s, s,
@ -335,7 +344,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
form, form,
fieldset, fieldset,
input, input(true),
option, option,
label, label,
button, button,
@ -346,16 +355,14 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
canvas, canvas,
audio, audio,
video, video,
source, source(true),
embed, embed(true),
track, track(true),
datalist, datalist,
keygen,
output, output,
article, article,
aside, aside,
bdi, bdi,
command,
details, details,
dialog, dialog,
summary, summary,
@ -373,7 +380,26 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
rp, rp,
section, section,
time, time,
wbr,; wbr(true),
;
private final boolean voidElement;
private Tag() {
this(false);
}
private Tag(boolean voidElement) {
this.voidElement = voidElement;
}
public boolean voidElement() {
return voidElement;
}
static public List<String> valuesVoidElement() {
return Arrays.stream(values()).filter(v -> v.voidElement()).map(v -> v.name()).toList();
}
} }
private final static String DOCTYPE_NAME = "HTML PUBLIC"; private final static String DOCTYPE_NAME = "HTML PUBLIC";

View file

@ -34,6 +34,7 @@ import java.io.Writer;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
@ -65,6 +66,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
private boolean printReturn = false; private boolean printReturn = false;
private String lastElement = null; private String lastElement = null;
private Stack<String> elements = null; private Stack<String> elements = null;
private Set<String> foldEmptyTags = null;
//@formatter:off //@formatter:off
private final static String PROPERTY_CONTEXT_PREFIX = SAX3PropertyConfig.X4O_PROPERTIES_PREFIX + "content/"; // TODO: change to "writer/xml" private final static String PROPERTY_CONTEXT_PREFIX = SAX3PropertyConfig.X4O_PROPERTIES_PREFIX + "content/"; // TODO: change to "writer/xml"
@ -77,6 +79,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
public final static String OUTPUT_COMMENT_AUTO_SPACE = PROPERTY_CONTEXT_PREFIX + "output/comment-auto-space"; public final static String OUTPUT_COMMENT_AUTO_SPACE = PROPERTY_CONTEXT_PREFIX + "output/comment-auto-space";
public final static String OUTPUT_LINE_BREAK_WIDTH = PROPERTY_CONTEXT_PREFIX + "output/line-break-width"; public final static String OUTPUT_LINE_BREAK_WIDTH = PROPERTY_CONTEXT_PREFIX + "output/line-break-width";
public final static String OUTPUT_LINE_PER_ATTRIBUTE = PROPERTY_CONTEXT_PREFIX + "output/line-per-attribute"; public final static String OUTPUT_LINE_PER_ATTRIBUTE = PROPERTY_CONTEXT_PREFIX + "output/line-per-attribute";
public final static String OUTPUT_FOLD_EMPTY_TAGS = PROPERTY_CONTEXT_PREFIX + "output/fold-empty-tags";
public final static String PROLOG_LICENCE_FILE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-file"; public final static String PROLOG_LICENCE_FILE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-file";
public final static String PROLOG_LICENCE_RESOURCE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-resource"; public final static String PROLOG_LICENCE_RESOURCE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-resource";
public final static String PROLOG_LICENCE_ENCODING = PROPERTY_CONTEXT_PREFIX + "prolog/licence-encoding"; public final static String PROLOG_LICENCE_ENCODING = PROPERTY_CONTEXT_PREFIX + "prolog/licence-encoding";
@ -96,6 +99,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
new PropertyConfigItem(OUTPUT_COMMENT_AUTO_SPACE, Boolean.class, true), new PropertyConfigItem(OUTPUT_COMMENT_AUTO_SPACE, Boolean.class, true),
new PropertyConfigItem(OUTPUT_LINE_BREAK_WIDTH, Integer.class, -1), new PropertyConfigItem(OUTPUT_LINE_BREAK_WIDTH, Integer.class, -1),
new PropertyConfigItem(OUTPUT_LINE_PER_ATTRIBUTE, Boolean.class, false), new PropertyConfigItem(OUTPUT_LINE_PER_ATTRIBUTE, Boolean.class, false),
new PropertyConfigItem(OUTPUT_FOLD_EMPTY_TAGS, List.class ), // if null|empty than all empty tags are folded
new PropertyConfigItem(PROLOG_LICENCE_ENCODING, String.class, SAX3XMLConstants.XML_DEFAULT_ENCODING), new PropertyConfigItem(PROLOG_LICENCE_ENCODING, String.class, SAX3XMLConstants.XML_DEFAULT_ENCODING),
new PropertyConfigItem(PROLOG_LICENCE_FILE, File.class ), new PropertyConfigItem(PROLOG_LICENCE_FILE, File.class ),
new PropertyConfigItem(PROLOG_LICENCE_RESOURCE, String.class ), new PropertyConfigItem(PROLOG_LICENCE_RESOURCE, String.class ),
@ -429,27 +433,29 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
throw new SAXException("Unexpected end tag: " + localName + " should be: " + elements.peek()); throw new SAXException("Unexpected end tag: " + localName + " should be: " + elements.peek());
} }
elements.pop(); elements.pop();
if (startElement != null) {
String tag = startElement.toString();
write(tag.substring(0, tag.length() - 1));// rm normal close
write(SAX3XMLConstants.TAG_CLOSE_EMPTY);
startElement = null;
indent--;
return;
}
indent--; indent--;
if (printReturn || !localName.equals(lastElement)) { if (startElement != null) {
write(getPropertyConfig().getPropertyString(OUTPUT_CHAR_NEWLINE)); // no child element of other content, thus use the empty body closing tag
writeIndent(); if (allowEndElementFolding(localName)) {
String tag = startElement.toString();
write(tag.substring(0, tag.length() - 1));// rm normal close
write(SAX3XMLConstants.TAG_CLOSE_EMPTY);
startElement = null;
return;
}
// void element not allowed, so print start + empty body on same line
autoCloseStartElement();
} else { } else {
printReturn = true; if (printReturn || !localName.equals(lastElement)) {
write(getPropertyConfig().getPropertyString(OUTPUT_CHAR_NEWLINE));
writeIndent();
} else {
printReturn = true;
}
} }
if (localName == null) { if (localName == null) {
localName = "null"; localName = "null";
} }
write(SAX3XMLConstants.TAG_OPEN_END); write(SAX3XMLConstants.TAG_OPEN_END);
if (SAX3XMLConstants.NULL_NS_URI.equals(uri) || uri == null) { if (SAX3XMLConstants.NULL_NS_URI.equals(uri) || uri == null) {
write(localName); write(localName);
@ -470,6 +476,19 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
} }
} }
private boolean allowEndElementFolding(String tag) {
if (foldEmptyTags == null) {
foldEmptyTags = new HashSet<>();
if (getPropertyConfig().getProperty(OUTPUT_FOLD_EMPTY_TAGS) != null) {
foldEmptyTags.addAll(getPropertyConfig().getPropertyList(OUTPUT_FOLD_EMPTY_TAGS));
}
}
if (foldEmptyTags.isEmpty()) {
return true;
}
return foldEmptyTags.contains(tag);
}
/** /**
* Starts the prefix mapping of an xml namespace uri. * Starts the prefix mapping of an xml namespace uri.
* *