Improved XDBX support

This commit is contained in:
Willem Cazander 2024-12-21 16:58:18 +01:00
parent 27c5d1d63c
commit 6164ea32d4
6 changed files with 319 additions and 25 deletions

View file

@ -320,9 +320,8 @@ public class AbstractContentWriterHandler implements ContentHandler {
startElement.append("=\"");
startElement.append(uri);
startElement.append('"');
startElementNamespaceAll(uri);
}
startElementNamespaceAll(uri);
}
public void startElementNamespaceAll(String uri) throws SAXException {

View file

@ -61,7 +61,6 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
private OutputStream out = null;
private Map<String,String> prefixMapping = null;
private List<String> printedMappings = null;
//private StringBuilder startElement = null;
private Stack<String> elements = null;
private Map<String,Integer> stringIdx = null;
@ -77,6 +76,7 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
public final static String PROLOG_LICENCE_ENABLE = PROPERTY_CONTEXT_PREFIX+"prolog/licence-enable";
public final static String PROLOG_USER_COMMENT = PROPERTY_CONTEXT_PREFIX+"prolog/user-comment";
public final static String PROLOG_USER_COMMENT_ENABLE = PROPERTY_CONTEXT_PREFIX+"prolog/user-comment-enable";
public final static String ROOT_START_NAMESPACE_ALL = PROPERTY_CONTEXT_PREFIX+"root/start-namespace-all";
static {
DEFAULT_PROPERTY_CONFIG = new PropertyConfig(true,null,PROPERTY_CONTEXT_PREFIX,
@ -89,7 +89,8 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
new PropertyConfigItem(PROLOG_LICENCE_RESOURCE, String.class ),
new PropertyConfigItem(PROLOG_LICENCE_ENABLE, Boolean.class, true),
new PropertyConfigItem(PROLOG_USER_COMMENT, String.class ),
new PropertyConfigItem(PROLOG_USER_COMMENT_ENABLE, Boolean.class, true)
new PropertyConfigItem(PROLOG_USER_COMMENT_ENABLE, Boolean.class, true),
new PropertyConfigItem(ROOT_START_NAMESPACE_ALL, Boolean.class, true)
);
}
@ -151,7 +152,7 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
write(XDBXConstants.HEADER_MARKER);
write(XDBXConstants.HEADER_LENGHT);
write(XDBXConstants.HEADER_VERSION);
write(XDBXConstants.HEADER_ENC_DOC_SID);
writeHeaderFlags(out, XDBXConstants.FLAG_STRING_ID + XDBXConstants.FLAG_STRING_ID_IDX);
boolean printDeclaration = getPropertyConfig().getPropertyBoolean(OUTPUT_DECLARATION);
if (printDeclaration) {
writeTag(XDBXContentTag.XML_VERSION); /// tODO: move to declaration method..
@ -287,12 +288,17 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
}
}
}
startElementTag(uri,localName);
startElementTag(uri,localName,name);
startElementNamespace(uri);
startElementAttributes(atts);
elements.push(localName);
if (XMLConstants.NULL_NS_URI.equals(uri) | uri==null) {
elements.push(localName);
} else {
elements.push(uri + ":" + name);
}
}
public void startElementTag(String uri, String localName) throws SAXException {
public void startElementTag(String uri, String localName,String name) throws SAXException {
boolean localNameIdx = xdbxStringId(localName);
if (XMLConstants.NULL_NS_URI.equals(uri) | uri==null) {
if (localNameIdx) {
@ -314,14 +320,49 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
writeTag(XDBXContentTag.ELEMENT_III);
} else {
writeTag(XDBXContentTag.ELEMENT_SII);
writeLengthValue(localName);
writeLengthValue(name);
}
writeVariableInteger(xdbxStringStore(localName));
writeVariableInteger(xdbxStringStore(name));
writeVariableInteger(xdbxStringStore(prefix));
writeVariableInteger(xdbxStringStore(uri));
}
}
public void startElementNamespace(String uri) throws SAXException {
if ((uri!=null & XMLConstants.NULL_NS_URI.equals(uri)==false) && printedMappings.contains(uri)==false) {
String prefix = prefixMapping.get(uri);
if (prefix==null) {
throw new SAXException("preFixUri: "+uri+" is not started.");
}
printedMappings.add(uri);
writeTag(XDBXContentTag.NS_DECL_II);
writeVariableInteger(xdbxStringStore(prefix));
writeVariableInteger(xdbxStringStore(uri));
}
startElementNamespaceAll(uri);
}
public void startElementNamespaceAll(String uri) throws SAXException {
if (!propertyConfig.getPropertyBoolean(ROOT_START_NAMESPACE_ALL)) {
return;
}
String prefix = null;
for (String uri2:prefixMapping.keySet()) {
if (printedMappings.contains(uri2)==false) {
prefix = prefixMapping.get(uri2);
if (prefix==null) {
throw new SAXException("preFixUri: "+uri+" is not started.");
}
printedMappings.add(uri2);
writeTag(XDBXContentTag.NS_DECL_II);
writeVariableInteger(xdbxStringStore(prefix));
writeVariableInteger(xdbxStringStore(uri2));
}
}
}
private void startElementAttributes(Attributes atts) throws SAXException {
for (int i=0;i<atts.getLength();i++) {
String attributeUri = atts.getURI(i);
@ -375,8 +416,15 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String name) throws SAXException {
if (elements.size()>0 && elements.peek().equals((localName))==false) {
throw new SAXException("Unexpected end tag: "+localName+" should be: "+elements.peek());
if (XMLConstants.NULL_NS_URI.equals(uri) | uri==null) {
if (elements.size()>0 && elements.peek().equals(localName)==false) {
throw new SAXException("Unexpected end tag: "+localName+" should be: "+elements.peek());
}
} else {
String qName = uri + ":" + name;
if (elements.size()>0 && elements.peek().equals(qName)==false) {
throw new SAXException("Unexpected end tag: "+qName+" should be: "+elements.peek());
}
}
elements.pop();
writeTag(XDBXContentTag.END_ELEMENT);
@ -404,10 +452,6 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
writeTagLengthValue(XDBXContentTag.STRING_ID, uri);
writeVariableInteger(uriIdxNum);
}
writeTag(XDBXContentTag.NS_DECL_II);
writeVariableInteger(xdbxStringStore(prefix));
writeVariableInteger(xdbxStringStore(uri));
}
/**
@ -426,7 +470,6 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
}
}
if (uri!=null) {
printedMappings.remove(uri);
prefixMapping.remove(uri);
}
}
@ -621,6 +664,13 @@ public class AbstractXDBXWriterHandler implements ContentHandler {
}
}
public void writeHeaderFlags(OutputStream out, int flags) throws SAXException {
write(flags >> 24);
write(flags >> 16);
write(flags >> 8);
write(flags >> 0);
}
protected void writeTag(XDBXContentTag tag) throws SAXException {
write(tag.getTagNumber());
}

View file

@ -34,5 +34,8 @@ public final class XDBXConstants {
static public final byte HEADER_LENGHT = 0x05;
static public final byte HEADER_VERSION = 0x01;
static public final byte[] HEADER_ENC_DOC_SID = new byte[] {(byte) 0x00,0x00,0x00, 0x02};
static public final int FLAG_XML_SEQUENCE = 0x1;
static public final int FLAG_STRING_ID = 0x2;
static public final int FLAG_STRING_ID_IDX = 0x20;
static public final int FLAG_VALIDATED = 0x80;
}

View file

@ -37,7 +37,7 @@ import org.xml.sax.helpers.AttributesImpl;
* XDBXReaderXml reads XDBX binary XML and writes it as SAX events.
*
* @author Willem Cazander
* @version 1.0 Dev 20, 2024
* @version 1.0 Dec 20, 2024
*/
public class XDBXReaderXml {
@ -47,22 +47,39 @@ public class XDBXReaderXml {
private String docXmlVersion;
private String docEncoding;
private Stack<XDBXElement> elementStack;
private boolean failOnUnsupportedTag = true;
public XDBXReaderXml(ContentWriter out) {
this.out = out;
this.stringIdx = new HashMap<>();
this.elementStack = new Stack<>();
}
// TODO: replace with proper PropertyConfig support
public XDBXReaderXml withNoFailOnUnsupportedTag() {
failOnUnsupportedTag = false;
return this;
}
public void parse(InputStream in) throws IOException, SAXException {
if (!Arrays.equals(XDBXConstants.HEADER_MARKER, in.readNBytes(2))) {
throw new SAXException("Wrong magic marker");
}
int headerLength = in.read();
byte[] header = in.readNBytes(headerLength);
if (header.length > 0 && XDBXConstants.HEADER_VERSION != header[0]) {
if (XDBXConstants.HEADER_LENGHT != headerLength) {
throw new SAXException("Wrong header length");
}
int headerVersion = in.read();
if (XDBXConstants.HEADER_VERSION != headerVersion) {
throw new SAXException("Wrong magic version");
}
int flags = readHeaderFlags(in);
if (0 != (flags & XDBXConstants.FLAG_XML_SEQUENCE)) {
throw new SAXException("XML Sequence mode is not supported");
}
if (0 == (flags & XDBXConstants.FLAG_STRING_ID)) {
throw new SAXException("None StringID mode is not supported");
}
out.startDocument();
int next = in.read();
while (next != -1) {
@ -71,7 +88,7 @@ public class XDBXReaderXml {
}
out.endDocument();
}
private void parseToken(int token, InputStream in) throws IOException, SAXException {
if (XDBXContentTag.STRING_ID.getTagNumber() == token) {
String value = readLengthValue(in);
@ -208,6 +225,12 @@ public class XDBXReaderXml {
elementStack.peek().atts.addAttribute(uri, attrName, prefix, "", attrValue);
return;
}
if (XDBXContentTag.DOCUMENT_END.getTagNumber() == token) {
return; // NOP is done by caller
}
if (failOnUnsupportedTag) {
throw new SAXException("Unsupported tag: " + (char)token + " 0x" + Integer.toHexString(token).toUpperCase());
}
}
protected void flushDocDeclaration() throws SAXException {
@ -225,7 +248,7 @@ public class XDBXReaderXml {
}
XDBXElement element = elementStack.peek();
if (element.started) {
throw new SAXException("Can't flush element twice");
return;
}
element.started = true;
out.startElement(element.uri, element.localName, element.name, element.atts);
@ -249,6 +272,15 @@ public class XDBXReaderXml {
return result + (in.read() << 14);
}
public int readHeaderFlags(InputStream in) throws IOException {
int result = 0;
result += in.read() << 24;
result += in.read() << 16;
result += in.read() << 8;
result += in.read() << 0;
return result;
}
public String getDocXmlVersion() {
return docXmlVersion;
}

View file

@ -24,11 +24,18 @@ package org.x4o.xml.io.sax.xdbx;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.StringWriter;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.x4o.xml.io.sax.ext.ContentWriterXml;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;
/**
@ -62,4 +69,37 @@ public class XDBXReaderXmlTest {
Assertions.assertTrue(output.length() > 0);
Assertions.assertTrue(output.equals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<test attr=\"foobar\"/>"), output);
}
@Test
public void testReadWritePom() throws Exception {
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
saxFactory.setNamespaceAware(true);
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader saxReader = saxParser.getXMLReader();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
XDBXWriterXml writer = new XDBXWriterXml(baos);
saxReader.setContentHandler(writer);
saxReader.parse(new InputSource(new FileInputStream(new File("../pom.xml"))));
StringWriter outputXmlStr = new StringWriter();
ContentWriterXml outputXml = new ContentWriterXml(outputXmlStr);
XDBXReaderXml reader = new XDBXReaderXml(outputXml);
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
reader.parse(bais);
String output = outputXmlStr.toString();
Assertions.assertNotNull(output);
Assertions.assertTrue(output.length() > 0);
Assertions.assertTrue(output.contains("<artifactId>nx01-x4o-driver</artifactId>"), output);
int sizePom = new FileInputStream(new File("../pom.xml")).readAllBytes().length;
int sizeXDBX = baos.toByteArray().length;
System.out.println("size-pom.xml:: " + sizePom);
System.out.println("size-pom.xdbx: " + sizeXDBX);
Assertions.assertTrue(sizePom > sizeXDBX, "XDBX is not smaller");
}
}

View file

@ -22,10 +22,13 @@
*/
package org.x4o.xml.io.sax.xdbx;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.StringWriter;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.x4o.xml.io.sax.ext.ContentWriterXml;
import org.xml.sax.helpers.AttributesImpl;
/**
@ -37,7 +40,7 @@ import org.xml.sax.helpers.AttributesImpl;
public class XDBXWriterXmlTest {
@Test
public void testCharactersSimple() throws Exception {
public void testExample1DefaultEncoding() throws Exception {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
XDBXWriterXml writer = new XDBXWriterXml(baos);
writer.getPropertyConfig().setProperty(XDBXWriterXml.OUTPUT_DECLARATION, false);
@ -131,4 +134,171 @@ public class XDBXWriterXmlTest {
Assertions.assertEquals((byte)'z', output[outIdx++]);
Assertions.assertEquals((byte)'Z', output[outIdx++]);
}
@Test
public void testExample3StringIds() throws Exception {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
XDBXWriterXml writer = new XDBXWriterXml(baos);
writer.getPropertyConfig().setProperty(XDBXWriterXml.OUTPUT_DECLARATION, false);
AttributesImpl atts;
writer.startDocument();
writer.startPrefixMapping("foo", "bar");
writer.startElement("", "root", "", new AttributesImpl());
writer.startElement("", "Person", "", new AttributesImpl());
atts = new AttributesImpl();
atts.addAttribute ("", "mgr", "", "", "NO");
writer.startElement("", "name", "", atts);
writer.characters("Bill");
writer.endElement("", "name", "");
writer.startElement("bar", "age", "foo:age", new AttributesImpl());
writer.characters("35");
writer.endElement("", "age", "");
writer.endElement("", "Person", "");
writer.startElement("", "Person", "", new AttributesImpl());
atts = new AttributesImpl();
atts.addAttribute ("", "mgr", "", "", "NO");
writer.startElement("", "name", "", atts);
writer.characters("Joe");
writer.endElement("", "name", "");
writer.startElement("bar", "age", "foo:age", new AttributesImpl());
writer.characters("45");
writer.endElement("", "age", "");
writer.endElement("", "Person", "");
writer.endElement("", "root", "");
writer.endDocument();
byte[] output = baos.toByteArray();
int outIdx = 8;
Assertions.assertNotNull(output);
Assertions.assertTrue(output.length > 0);
Assertions.assertEquals((byte)'I', output[outIdx++]);
Assertions.assertEquals((byte) 3, output[outIdx++]);
Assertions.assertEquals((byte)'f', output[outIdx++]);
Assertions.assertEquals((byte)'o', output[outIdx++]);
Assertions.assertEquals((byte)'o', output[outIdx++]);
Assertions.assertEquals((byte) 1, output[outIdx++]);
Assertions.assertEquals((byte)'I', output[outIdx++]);
Assertions.assertEquals((byte) 3, output[outIdx++]);
Assertions.assertEquals((byte)'b', output[outIdx++]);
Assertions.assertEquals((byte)'a', output[outIdx++]);
Assertions.assertEquals((byte)'r', output[outIdx++]);
Assertions.assertEquals((byte) 2, output[outIdx++]);
Assertions.assertEquals((char)'X', (char)output[outIdx++]);
Assertions.assertEquals((byte) 4, output[outIdx++]);
Assertions.assertEquals((byte)'r', output[outIdx++]);
Assertions.assertEquals((byte)'o', output[outIdx++]);
Assertions.assertEquals((byte)'o', output[outIdx++]);
Assertions.assertEquals((byte)'t', output[outIdx++]);
Assertions.assertEquals((byte) 3, output[outIdx++]);
Assertions.assertEquals((byte) 0, output[outIdx++]);
Assertions.assertEquals((byte) 0, output[outIdx++]);
Assertions.assertEquals((char)'m', (char)output[outIdx++]);
Assertions.assertEquals((byte) 1, output[outIdx++]);
Assertions.assertEquals((byte) 2, output[outIdx++]);
Assertions.assertEquals((char)'X', (char)output[outIdx++]);
Assertions.assertEquals((byte) 6, output[outIdx++]);
Assertions.assertEquals((byte)'P', output[outIdx++]);
Assertions.assertEquals((byte)'e', output[outIdx++]);
Assertions.assertEquals((byte)'r', output[outIdx++]);
Assertions.assertEquals((byte)'s', output[outIdx++]);
Assertions.assertEquals((byte)'o', output[outIdx++]);
Assertions.assertEquals((byte)'n', output[outIdx++]);
Assertions.assertEquals((byte) 4, output[outIdx++]);
Assertions.assertEquals((byte) 0, output[outIdx++]);
Assertions.assertEquals((byte) 0, output[outIdx++]);
Assertions.assertEquals((char)'X', (char)output[outIdx++]);
Assertions.assertEquals((byte) 4, output[outIdx++]);
Assertions.assertEquals((byte)'n', output[outIdx++]);
Assertions.assertEquals((byte)'a', output[outIdx++]);
Assertions.assertEquals((byte)'m', output[outIdx++]);
Assertions.assertEquals((byte)'e', output[outIdx++]);
Assertions.assertEquals((byte) 5, output[outIdx++]);
Assertions.assertEquals((byte) 0, output[outIdx++]);
Assertions.assertEquals((byte) 0, output[outIdx++]);
Assertions.assertEquals((char)'Y', (char)output[outIdx++]);
Assertions.assertEquals((byte) 3, output[outIdx++]);
Assertions.assertEquals((byte)'m', output[outIdx++]);
Assertions.assertEquals((byte)'g', output[outIdx++]);
Assertions.assertEquals((byte)'r', output[outIdx++]);
Assertions.assertEquals((byte) 6, output[outIdx++]);
Assertions.assertEquals((byte) 0, output[outIdx++]);
Assertions.assertEquals((byte) 0, output[outIdx++]);
Assertions.assertEquals((byte) 2, output[outIdx++]);
Assertions.assertEquals((byte)'N', output[outIdx++]);
Assertions.assertEquals((byte)'O', output[outIdx++]);
Assertions.assertEquals((byte)'T', output[outIdx++]);
Assertions.assertEquals((byte) 4, output[outIdx++]);
Assertions.assertEquals((byte)'B', output[outIdx++]);
Assertions.assertEquals((byte)'i', output[outIdx++]);
Assertions.assertEquals((byte)'l', output[outIdx++]);
Assertions.assertEquals((byte)'l', output[outIdx++]);
Assertions.assertEquals((byte)'z', output[outIdx++]);
Assertions.assertEquals((char)'X', (char)output[outIdx++]);
Assertions.assertEquals((byte) 3, output[outIdx++]);
Assertions.assertEquals((byte)'a', output[outIdx++]);
Assertions.assertEquals((byte)'g', output[outIdx++]);
Assertions.assertEquals((byte)'e', output[outIdx++]);
Assertions.assertEquals((byte) 7, output[outIdx++]);
Assertions.assertEquals((byte) 1, output[outIdx++]);
Assertions.assertEquals((byte) 2, output[outIdx++]);
Assertions.assertEquals((byte) 'T', output[outIdx++]);
Assertions.assertEquals((byte) 2, output[outIdx++]);
Assertions.assertEquals((byte)'3', output[outIdx++]);
Assertions.assertEquals((byte)'5', output[outIdx++]);
Assertions.assertEquals((byte)'z', output[outIdx++]);
Assertions.assertEquals((byte)'z', output[outIdx++]);
Assertions.assertEquals((byte)'e', output[outIdx++]);
Assertions.assertEquals((byte) 4, output[outIdx++]);
Assertions.assertEquals((byte)'e', output[outIdx++]);
Assertions.assertEquals((byte) 5, output[outIdx++]);
Assertions.assertEquals((byte)'a', output[outIdx++]);
Assertions.assertEquals((byte) 6, output[outIdx++]);
Assertions.assertEquals((byte) 2, output[outIdx++]);
Assertions.assertEquals((byte)'N', output[outIdx++]);
Assertions.assertEquals((byte)'O', output[outIdx++]);
Assertions.assertEquals((byte)'T', output[outIdx++]);
Assertions.assertEquals((byte) 3, output[outIdx++]);
Assertions.assertEquals((byte)'J', output[outIdx++]);
Assertions.assertEquals((byte)'o', output[outIdx++]);
Assertions.assertEquals((byte)'e', output[outIdx++]);
Assertions.assertEquals((byte)'z', output[outIdx++]);
Assertions.assertEquals((byte)'x', output[outIdx++]);
Assertions.assertEquals((byte) 7, output[outIdx++]);
Assertions.assertEquals((byte) 1, output[outIdx++]);
Assertions.assertEquals((byte) 2, output[outIdx++]);
Assertions.assertEquals((byte)'T', output[outIdx++]);
Assertions.assertEquals((byte) 2, output[outIdx++]);
Assertions.assertEquals((byte)'4', output[outIdx++]);
Assertions.assertEquals((byte)'5', output[outIdx++]);
Assertions.assertEquals((byte)'z', output[outIdx++]);
Assertions.assertEquals((byte)'z', output[outIdx++]);
Assertions.assertEquals((byte)'z', output[outIdx++]);
Assertions.assertEquals((byte)'Z', output[outIdx++]);
StringWriter outputXmlStr = new StringWriter();
ContentWriterXml outputXml = new ContentWriterXml(outputXmlStr);
XDBXReaderXml reader = new XDBXReaderXml(outputXml);
ByteArrayInputStream bais = new ByteArrayInputStream(output);
reader.parse(bais);
String outputStr = outputXmlStr.toString();
Assertions.assertTrue(outputStr.contains("<root"));
Assertions.assertTrue(outputStr.contains("xmlns:foo=\"bar\""));
Assertions.assertTrue(outputStr.contains("<name mgr=\"NO\">Bill</name>"));
Assertions.assertTrue(outputStr.contains("<foo:age>45</foo:age>"));
}
}