X4O: Fixed html void element auto folding to allowed tags only

This commit is contained in:
Willem Cazander 2025-11-08 16:12:01 +01:00
parent c71f8c0a82
commit 045f6d07f6
2 changed files with 77 additions and 32 deletions

View file

@ -24,7 +24,10 @@ package org.x4o.sax3;
import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import org.x4o.sax3.io.ContentCloseable;
import org.x4o.sax3.io.SAX3PropertyConfig;
@ -40,8 +43,11 @@ import org.xml.sax.helpers.AttributesImpl;
*/
public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3WriterXml> {
static public final List<String> HTML_VOID_TAGS = Collections.unmodifiableList(Tag.valuesVoidElement());
public SAX3WriterHtml(Writer out, String encoding) {
super(new SAX3WriterXml(out, encoding), "", SAX3XMLConstants.NULL_NS_URI);
getPropertyConfig().setProperty(SAX3WriterXml.OUTPUT_FOLD_EMPTY_TAGS, HTML_VOID_TAGS);
}
public SAX3PropertyConfig getPropertyConfig() {
@ -247,7 +253,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
/* Deprecated TAGS */
frameset,
frame,
frame(true),
noframes,
tt,
font,
@ -259,14 +265,17 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
acronym,
applet,
iframe,
menuitem(true),
keygen(true),
command(true),
/* HTML 4 TAGS */
html,
head,
title,
meta,
link,
base,
meta(true),
link(true),
base(true),
body,
script,
style,
@ -282,9 +291,9 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
span,
p,
pre,
img,
hr,
br,
img(true),
hr(true),
br(true),
b,
em,
strong,
@ -308,12 +317,12 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
abbr,
address,
area,
area(true),
bdo,
blockquote,
cite,
code,
col,
col(true),
colgroup,
del,
dfn,
@ -324,7 +333,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
map,
menu,
object,
param,
param(true),
optgroup,
q,
s,
@ -335,7 +344,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
form,
fieldset,
input,
input(true),
option,
label,
button,
@ -346,16 +355,14 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
canvas,
audio,
video,
source,
embed,
track,
source(true),
embed(true),
track(true),
datalist,
keygen,
output,
article,
aside,
bdi,
command,
details,
dialog,
summary,
@ -373,7 +380,26 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
rp,
section,
time,
wbr,;
wbr(true),
;
private final boolean voidElement;
private Tag() {
this(false);
}
private Tag(boolean voidElement) {
this.voidElement = voidElement;
}
public boolean voidElement() {
return voidElement;
}
static public List<String> valuesVoidElement() {
return Arrays.stream(values()).filter(v -> v.voidElement()).map(v -> v.name()).toList();
}
}
private final static String DOCTYPE_NAME = "HTML PUBLIC";

View file

@ -34,6 +34,7 @@ import java.io.Writer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@ -65,6 +66,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
private boolean printReturn = false;
private String lastElement = null;
private Stack<String> elements = null;
private Set<String> foldEmptyTags = null;
//@formatter:off
private final static String PROPERTY_CONTEXT_PREFIX = SAX3PropertyConfig.X4O_PROPERTIES_PREFIX + "content/"; // TODO: change to "writer/xml"
@ -77,6 +79,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
public final static String OUTPUT_COMMENT_AUTO_SPACE = PROPERTY_CONTEXT_PREFIX + "output/comment-auto-space";
public final static String OUTPUT_LINE_BREAK_WIDTH = PROPERTY_CONTEXT_PREFIX + "output/line-break-width";
public final static String OUTPUT_LINE_PER_ATTRIBUTE = PROPERTY_CONTEXT_PREFIX + "output/line-per-attribute";
public final static String OUTPUT_FOLD_EMPTY_TAGS = PROPERTY_CONTEXT_PREFIX + "output/fold-empty-tags";
public final static String PROLOG_LICENCE_FILE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-file";
public final static String PROLOG_LICENCE_RESOURCE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-resource";
public final static String PROLOG_LICENCE_ENCODING = PROPERTY_CONTEXT_PREFIX + "prolog/licence-encoding";
@ -96,6 +99,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
new PropertyConfigItem(OUTPUT_COMMENT_AUTO_SPACE, Boolean.class, true),
new PropertyConfigItem(OUTPUT_LINE_BREAK_WIDTH, Integer.class, -1),
new PropertyConfigItem(OUTPUT_LINE_PER_ATTRIBUTE, Boolean.class, false),
new PropertyConfigItem(OUTPUT_FOLD_EMPTY_TAGS, List.class ), // if null|empty than all empty tags are folded
new PropertyConfigItem(PROLOG_LICENCE_ENCODING, String.class, SAX3XMLConstants.XML_DEFAULT_ENCODING),
new PropertyConfigItem(PROLOG_LICENCE_FILE, File.class ),
new PropertyConfigItem(PROLOG_LICENCE_RESOURCE, String.class ),
@ -429,27 +433,29 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
throw new SAXException("Unexpected end tag: " + localName + " should be: " + elements.peek());
}
elements.pop();
if (startElement != null) {
String tag = startElement.toString();
write(tag.substring(0, tag.length() - 1));// rm normal close
write(SAX3XMLConstants.TAG_CLOSE_EMPTY);
startElement = null;
indent--;
return;
}
indent--;
if (printReturn || !localName.equals(lastElement)) {
write(getPropertyConfig().getPropertyString(OUTPUT_CHAR_NEWLINE));
writeIndent();
if (startElement != null) {
// no child element of other content, thus use the empty body closing tag
if (allowEndElementFolding(localName)) {
String tag = startElement.toString();
write(tag.substring(0, tag.length() - 1));// rm normal close
write(SAX3XMLConstants.TAG_CLOSE_EMPTY);
startElement = null;
return;
}
// void element not allowed, so print start + empty body on same line
autoCloseStartElement();
} else {
printReturn = true;
if (printReturn || !localName.equals(lastElement)) {
write(getPropertyConfig().getPropertyString(OUTPUT_CHAR_NEWLINE));
writeIndent();
} else {
printReturn = true;
}
}
if (localName == null) {
localName = "null";
}
write(SAX3XMLConstants.TAG_OPEN_END);
if (SAX3XMLConstants.NULL_NS_URI.equals(uri) || uri == null) {
write(localName);
@ -470,6 +476,19 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
}
}
private boolean allowEndElementFolding(String tag) {
if (foldEmptyTags == null) {
foldEmptyTags = new HashSet<>();
if (getPropertyConfig().getProperty(OUTPUT_FOLD_EMPTY_TAGS) != null) {
foldEmptyTags.addAll(getPropertyConfig().getPropertyList(OUTPUT_FOLD_EMPTY_TAGS));
}
}
if (foldEmptyTags.isEmpty()) {
return true;
}
return foldEmptyTags.contains(tag);
}
/**
* Starts the prefix mapping of an xml namespace uri.
*