X4O: Fixed html void element auto folding to allowed tags only
This commit is contained in:
parent
c71f8c0a82
commit
045f6d07f6
2 changed files with 77 additions and 32 deletions
|
|
@ -24,7 +24,10 @@ package org.x4o.sax3;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.x4o.sax3.io.ContentCloseable;
|
import org.x4o.sax3.io.ContentCloseable;
|
||||||
import org.x4o.sax3.io.SAX3PropertyConfig;
|
import org.x4o.sax3.io.SAX3PropertyConfig;
|
||||||
|
|
@ -40,8 +43,11 @@ import org.xml.sax.helpers.AttributesImpl;
|
||||||
*/
|
*/
|
||||||
public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3WriterXml> {
|
public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3WriterXml> {
|
||||||
|
|
||||||
|
static public final List<String> HTML_VOID_TAGS = Collections.unmodifiableList(Tag.valuesVoidElement());
|
||||||
|
|
||||||
public SAX3WriterHtml(Writer out, String encoding) {
|
public SAX3WriterHtml(Writer out, String encoding) {
|
||||||
super(new SAX3WriterXml(out, encoding), "", SAX3XMLConstants.NULL_NS_URI);
|
super(new SAX3WriterXml(out, encoding), "", SAX3XMLConstants.NULL_NS_URI);
|
||||||
|
getPropertyConfig().setProperty(SAX3WriterXml.OUTPUT_FOLD_EMPTY_TAGS, HTML_VOID_TAGS);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SAX3PropertyConfig getPropertyConfig() {
|
public SAX3PropertyConfig getPropertyConfig() {
|
||||||
|
|
@ -247,7 +253,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
|
||||||
|
|
||||||
/* Deprecated TAGS */
|
/* Deprecated TAGS */
|
||||||
frameset,
|
frameset,
|
||||||
frame,
|
frame(true),
|
||||||
noframes,
|
noframes,
|
||||||
tt,
|
tt,
|
||||||
font,
|
font,
|
||||||
|
|
@ -259,14 +265,17 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
|
||||||
acronym,
|
acronym,
|
||||||
applet,
|
applet,
|
||||||
iframe,
|
iframe,
|
||||||
|
menuitem(true),
|
||||||
|
keygen(true),
|
||||||
|
command(true),
|
||||||
|
|
||||||
/* HTML 4 TAGS */
|
/* HTML 4 TAGS */
|
||||||
html,
|
html,
|
||||||
head,
|
head,
|
||||||
title,
|
title,
|
||||||
meta,
|
meta(true),
|
||||||
link,
|
link(true),
|
||||||
base,
|
base(true),
|
||||||
body,
|
body,
|
||||||
script,
|
script,
|
||||||
style,
|
style,
|
||||||
|
|
@ -282,9 +291,9 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
|
||||||
span,
|
span,
|
||||||
p,
|
p,
|
||||||
pre,
|
pre,
|
||||||
img,
|
img(true),
|
||||||
hr,
|
hr(true),
|
||||||
br,
|
br(true),
|
||||||
b,
|
b,
|
||||||
em,
|
em,
|
||||||
strong,
|
strong,
|
||||||
|
|
@ -308,12 +317,12 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
|
||||||
|
|
||||||
abbr,
|
abbr,
|
||||||
address,
|
address,
|
||||||
area,
|
area(true),
|
||||||
bdo,
|
bdo,
|
||||||
blockquote,
|
blockquote,
|
||||||
cite,
|
cite,
|
||||||
code,
|
code,
|
||||||
col,
|
col(true),
|
||||||
colgroup,
|
colgroup,
|
||||||
del,
|
del,
|
||||||
dfn,
|
dfn,
|
||||||
|
|
@ -324,7 +333,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
|
||||||
map,
|
map,
|
||||||
menu,
|
menu,
|
||||||
object,
|
object,
|
||||||
param,
|
param(true),
|
||||||
optgroup,
|
optgroup,
|
||||||
q,
|
q,
|
||||||
s,
|
s,
|
||||||
|
|
@ -335,7 +344,7 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
|
||||||
|
|
||||||
form,
|
form,
|
||||||
fieldset,
|
fieldset,
|
||||||
input,
|
input(true),
|
||||||
option,
|
option,
|
||||||
label,
|
label,
|
||||||
button,
|
button,
|
||||||
|
|
@ -346,16 +355,14 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
|
||||||
canvas,
|
canvas,
|
||||||
audio,
|
audio,
|
||||||
video,
|
video,
|
||||||
source,
|
source(true),
|
||||||
embed,
|
embed(true),
|
||||||
track,
|
track(true),
|
||||||
datalist,
|
datalist,
|
||||||
keygen,
|
|
||||||
output,
|
output,
|
||||||
article,
|
article,
|
||||||
aside,
|
aside,
|
||||||
bdi,
|
bdi,
|
||||||
command,
|
|
||||||
details,
|
details,
|
||||||
dialog,
|
dialog,
|
||||||
summary,
|
summary,
|
||||||
|
|
@ -373,7 +380,26 @@ public class SAX3WriterHtml extends SAX3WriterEnum<SAX3WriterHtml.Tag, SAX3Write
|
||||||
rp,
|
rp,
|
||||||
section,
|
section,
|
||||||
time,
|
time,
|
||||||
wbr,;
|
wbr(true),
|
||||||
|
;
|
||||||
|
|
||||||
|
private final boolean voidElement;
|
||||||
|
|
||||||
|
private Tag() {
|
||||||
|
this(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Tag(boolean voidElement) {
|
||||||
|
this.voidElement = voidElement;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean voidElement() {
|
||||||
|
return voidElement;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public List<String> valuesVoidElement() {
|
||||||
|
return Arrays.stream(values()).filter(v -> v.voidElement()).map(v -> v.name()).toList();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static String DOCTYPE_NAME = "HTML PUBLIC";
|
private final static String DOCTYPE_NAME = "HTML PUBLIC";
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import java.io.Writer;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
@ -65,6 +66,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
|
||||||
private boolean printReturn = false;
|
private boolean printReturn = false;
|
||||||
private String lastElement = null;
|
private String lastElement = null;
|
||||||
private Stack<String> elements = null;
|
private Stack<String> elements = null;
|
||||||
|
private Set<String> foldEmptyTags = null;
|
||||||
|
|
||||||
//@formatter:off
|
//@formatter:off
|
||||||
private final static String PROPERTY_CONTEXT_PREFIX = SAX3PropertyConfig.X4O_PROPERTIES_PREFIX + "content/"; // TODO: change to "writer/xml"
|
private final static String PROPERTY_CONTEXT_PREFIX = SAX3PropertyConfig.X4O_PROPERTIES_PREFIX + "content/"; // TODO: change to "writer/xml"
|
||||||
|
|
@ -77,6 +79,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
|
||||||
public final static String OUTPUT_COMMENT_AUTO_SPACE = PROPERTY_CONTEXT_PREFIX + "output/comment-auto-space";
|
public final static String OUTPUT_COMMENT_AUTO_SPACE = PROPERTY_CONTEXT_PREFIX + "output/comment-auto-space";
|
||||||
public final static String OUTPUT_LINE_BREAK_WIDTH = PROPERTY_CONTEXT_PREFIX + "output/line-break-width";
|
public final static String OUTPUT_LINE_BREAK_WIDTH = PROPERTY_CONTEXT_PREFIX + "output/line-break-width";
|
||||||
public final static String OUTPUT_LINE_PER_ATTRIBUTE = PROPERTY_CONTEXT_PREFIX + "output/line-per-attribute";
|
public final static String OUTPUT_LINE_PER_ATTRIBUTE = PROPERTY_CONTEXT_PREFIX + "output/line-per-attribute";
|
||||||
|
public final static String OUTPUT_FOLD_EMPTY_TAGS = PROPERTY_CONTEXT_PREFIX + "output/fold-empty-tags";
|
||||||
public final static String PROLOG_LICENCE_FILE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-file";
|
public final static String PROLOG_LICENCE_FILE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-file";
|
||||||
public final static String PROLOG_LICENCE_RESOURCE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-resource";
|
public final static String PROLOG_LICENCE_RESOURCE = PROPERTY_CONTEXT_PREFIX + "prolog/licence-resource";
|
||||||
public final static String PROLOG_LICENCE_ENCODING = PROPERTY_CONTEXT_PREFIX + "prolog/licence-encoding";
|
public final static String PROLOG_LICENCE_ENCODING = PROPERTY_CONTEXT_PREFIX + "prolog/licence-encoding";
|
||||||
|
|
@ -96,6 +99,7 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
|
||||||
new PropertyConfigItem(OUTPUT_COMMENT_AUTO_SPACE, Boolean.class, true),
|
new PropertyConfigItem(OUTPUT_COMMENT_AUTO_SPACE, Boolean.class, true),
|
||||||
new PropertyConfigItem(OUTPUT_LINE_BREAK_WIDTH, Integer.class, -1),
|
new PropertyConfigItem(OUTPUT_LINE_BREAK_WIDTH, Integer.class, -1),
|
||||||
new PropertyConfigItem(OUTPUT_LINE_PER_ATTRIBUTE, Boolean.class, false),
|
new PropertyConfigItem(OUTPUT_LINE_PER_ATTRIBUTE, Boolean.class, false),
|
||||||
|
new PropertyConfigItem(OUTPUT_FOLD_EMPTY_TAGS, List.class ), // if null|empty than all empty tags are folded
|
||||||
new PropertyConfigItem(PROLOG_LICENCE_ENCODING, String.class, SAX3XMLConstants.XML_DEFAULT_ENCODING),
|
new PropertyConfigItem(PROLOG_LICENCE_ENCODING, String.class, SAX3XMLConstants.XML_DEFAULT_ENCODING),
|
||||||
new PropertyConfigItem(PROLOG_LICENCE_FILE, File.class ),
|
new PropertyConfigItem(PROLOG_LICENCE_FILE, File.class ),
|
||||||
new PropertyConfigItem(PROLOG_LICENCE_RESOURCE, String.class ),
|
new PropertyConfigItem(PROLOG_LICENCE_RESOURCE, String.class ),
|
||||||
|
|
@ -429,27 +433,29 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
|
||||||
throw new SAXException("Unexpected end tag: " + localName + " should be: " + elements.peek());
|
throw new SAXException("Unexpected end tag: " + localName + " should be: " + elements.peek());
|
||||||
}
|
}
|
||||||
elements.pop();
|
elements.pop();
|
||||||
|
|
||||||
if (startElement != null) {
|
|
||||||
String tag = startElement.toString();
|
|
||||||
write(tag.substring(0, tag.length() - 1));// rm normal close
|
|
||||||
write(SAX3XMLConstants.TAG_CLOSE_EMPTY);
|
|
||||||
startElement = null;
|
|
||||||
indent--;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
indent--;
|
indent--;
|
||||||
if (printReturn || !localName.equals(lastElement)) {
|
if (startElement != null) {
|
||||||
write(getPropertyConfig().getPropertyString(OUTPUT_CHAR_NEWLINE));
|
// no child element of other content, thus use the empty body closing tag
|
||||||
writeIndent();
|
if (allowEndElementFolding(localName)) {
|
||||||
|
String tag = startElement.toString();
|
||||||
|
write(tag.substring(0, tag.length() - 1));// rm normal close
|
||||||
|
write(SAX3XMLConstants.TAG_CLOSE_EMPTY);
|
||||||
|
startElement = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// void element not allowed, so print start + empty body on same line
|
||||||
|
autoCloseStartElement();
|
||||||
} else {
|
} else {
|
||||||
printReturn = true;
|
if (printReturn || !localName.equals(lastElement)) {
|
||||||
|
write(getPropertyConfig().getPropertyString(OUTPUT_CHAR_NEWLINE));
|
||||||
|
writeIndent();
|
||||||
|
} else {
|
||||||
|
printReturn = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (localName == null) {
|
if (localName == null) {
|
||||||
localName = "null";
|
localName = "null";
|
||||||
}
|
}
|
||||||
|
|
||||||
write(SAX3XMLConstants.TAG_OPEN_END);
|
write(SAX3XMLConstants.TAG_OPEN_END);
|
||||||
if (SAX3XMLConstants.NULL_NS_URI.equals(uri) || uri == null) {
|
if (SAX3XMLConstants.NULL_NS_URI.equals(uri) || uri == null) {
|
||||||
write(localName);
|
write(localName);
|
||||||
|
|
@ -470,6 +476,19 @@ public class AbstractContentWriterHandler implements ContentHandler, Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean allowEndElementFolding(String tag) {
|
||||||
|
if (foldEmptyTags == null) {
|
||||||
|
foldEmptyTags = new HashSet<>();
|
||||||
|
if (getPropertyConfig().getProperty(OUTPUT_FOLD_EMPTY_TAGS) != null) {
|
||||||
|
foldEmptyTags.addAll(getPropertyConfig().getPropertyList(OUTPUT_FOLD_EMPTY_TAGS));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (foldEmptyTags.isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return foldEmptyTags.contains(tag);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Starts the prefix mapping of an xml namespace uri.
|
* Starts the prefix mapping of an xml namespace uri.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue