diff --git a/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDict.java b/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDict.java new file mode 100644 index 0000000..cec91d7 --- /dev/null +++ b/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDict.java @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2004-2014, Willem Cazander + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided + * that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package love.distributedrebirth.nx01.mushroom.mais.fc18.unicode; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +/** + * Simple dict to lookup kanji. + * + * @author Willem Cazander + * @version 1.0 Jun 01, 2025 + */ +public class UnicodeDict { + + private final List blocks = new ArrayList<>(); + private final Map> blockChars = new HashMap<>(); + + public UnicodeDict() { + UnicodeDictLiteralBlock block; + block = new UnicodeDictLiteralBlock(); + block.setName("Unknown-1"); + block.setStart(Integer.parseInt("1D800", 16)); + block.setEnd(Integer.parseInt("1E7FF", 16)); + blocks.add(block); + block = new UnicodeDictLiteralBlock(); + block.setName("Unknown-2"); + block.setStart(Integer.parseInt("1F900", 16)); + block.setEnd(Integer.parseInt("1FFFF", 16)); + blocks.add(block); + } + + public List getBlocks() { + return blocks; + } + + public List getLiterals(UnicodeDictLiteralBlock block) { + return blockChars.get(block); + } + + private UnicodeDictLiteralBlock findLiteralBlock(Integer dec) { + for (UnicodeDictLiteralBlock block : blocks) { + if (dec >= block.getStart() && dec <= block.getEnd()) { + return block; + } + } + throw new IllegalArgumentException("Unmapped decimal unicode number: " + dec); + } + + public void parseXML(InputStream input) throws ParserConfigurationException, SAXException, IOException { + SAXParserFactory factory = SAXParserFactory.newInstance(); + SAXParser parser = factory.newSAXParser(); + XMLReader reader = parser.getXMLReader(); + UnicodeDictContentHandler handler = new UnicodeDictContentHandler(); + reader.setContentHandler(handler); + InputSource inputSource = new InputSource(input); + reader.parse(inputSource); + } + + class UnicodeDictContentHandler implements ContentHandler { + + StringBuilder bufChar = new StringBuilder(); + UnicodeDictLiteral literal; + + @Override + public void characters(char[] ch, int start, int length) throws SAXException { + bufChar.append(new String(ch,start,length)); + } + + @Override + public void endDocument() throws SAXException { + } + + @Override + public void endElement(String namespaceURI, String localName, String qName) throws SAXException { + if (literal == null) { + return; + } + if ("character".equals(qName)) { + UnicodeDictLiteralBlock block = findLiteralBlock(literal.getDec()); + List charList = blockChars.get(block); + if (charList == null) { + charList = new ArrayList<>(); + blockChars.put(block, charList); + } + charList.add(literal); + } else if ("latex".equals(qName)) { + literal.setLatex(bufChar.toString()); + } else if ("description".equals(qName)) { + literal.setDescription(bufChar.toString()); + } + } + + @Override + public void endPrefixMapping(String arg0) throws SAXException { + } + + @Override + public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException { + } + + @Override + public void processingInstruction(String arg0, String arg1) throws SAXException { + } + + @Override + public void setDocumentLocator(Locator arg0) { + } + + @Override + public void skippedEntity(String arg0) throws SAXException { + } + + @Override + public void startDocument() throws SAXException { + } + + @Override + public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { + bufChar = new StringBuilder(); + if ("character".equals(qName)) { + if (atts.getValue("dec").contains("-")) { + literal = null; + return; + } + literal = new UnicodeDictLiteral(); + literal.setId(atts.getValue("id")); + literal.setDec(Integer.parseInt(atts.getValue("dec"))); + literal.setImage(atts.getValue("image")); + literal.setMode(atts.getValue("mode")); + literal.setType(atts.getValue("type")); + } else if ("unicodedata".equals(qName)) { + if (literal == null) { + return; + } + literal.setDataCategory(atts.getValue("category")); + literal.setDataCombClass(atts.getValue("combClass")); + literal.setDataBibi(atts.getValue("bibi")); + literal.setDataDecomp(atts.getValue("decomp")); + literal.setDataNumeric(atts.getValue("numeric")); + literal.setDataMirror(atts.getValue("mirror")); + literal.setDataComment(atts.getValue("comment")); + literal.setDataMathClass(atts.getValue("mathclass")); + } else if ("block".equals(qName)) { + UnicodeDictLiteralBlock block = new UnicodeDictLiteralBlock(); + block.setStart(Integer.parseInt(atts.getValue("start"), 16)); + block.setEnd(Integer.parseInt(atts.getValue("end"), 16)); + block.setName(atts.getValue("name")); + blocks.add(block); + } + } + + @Override + public void startPrefixMapping(String arg0, String arg1) throws SAXException { + } + } +} diff --git a/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictLiteral.java b/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictLiteral.java new file mode 100644 index 0000000..ccab99f --- /dev/null +++ b/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictLiteral.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2004-2014, Willem Cazander + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided + * that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package love.distributedrebirth.nx01.mushroom.mais.fc18.unicode; + +/** + * Unicode character literal. + * + * @author Willem Cazander + * @version 1.0 Jun 01, 2025 + */ +public class UnicodeDictLiteral { + + private String id; + private Integer dec; + private String image; + private String mode; + private String type; + private String latex; + private String description; + private String dataCategory; + private String dataCombClass; + private String dataBibi; + private String dataDecomp; + private String dataNumeric; + private String dataMirror; + private String dataComment; + private String dataMathClass; + + public UnicodeDictLiteral() { + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Integer getDec() { + return dec; + } + + public void setDec(Integer dec) { + this.dec = dec; + } + + public String getImage() { + return image; + } + + public void setImage(String image) { + this.image = image; + } + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getLatex() { + return latex; + } + + public void setLatex(String latex) { + this.latex = latex; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getDataCategory() { + return dataCategory; + } + + public void setDataCategory(String dataCategory) { + this.dataCategory = dataCategory; + } + + public String getDataCombClass() { + return dataCombClass; + } + + public void setDataCombClass(String dataCombClass) { + this.dataCombClass = dataCombClass; + } + + public String getDataBibi() { + return dataBibi; + } + + public void setDataBibi(String dataBibi) { + this.dataBibi = dataBibi; + } + + public String getDataDecomp() { + return dataDecomp; + } + + public void setDataDecomp(String dataDecomp) { + this.dataDecomp = dataDecomp; + } + + public String getDataNumeric() { + return dataNumeric; + } + + public void setDataNumeric(String dataNumeric) { + this.dataNumeric = dataNumeric; + } + + public String getDataMirror() { + return dataMirror; + } + + public void setDataMirror(String dataMirror) { + this.dataMirror = dataMirror; + } + + public String getDataComment() { + return dataComment; + } + + public void setDataComment(String dataComment) { + this.dataComment = dataComment; + } + + public String getDataMathClass() { + return dataMathClass; + } + + public void setDataMathClass(String dataMathClass) { + this.dataMathClass = dataMathClass; + } +} diff --git a/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictLiteralBlock.java b/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictLiteralBlock.java new file mode 100644 index 0000000..85644c3 --- /dev/null +++ b/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictLiteralBlock.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2004-2014, Willem Cazander + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided + * that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package love.distributedrebirth.nx01.mushroom.mais.fc18.unicode; + +/** + * The unicode blocks for literals. + * + * @author Willem Cazander + * @version 1.0 Jun 01, 2025 + */ +public class UnicodeDictLiteralBlock { + + private String name; + private Integer start; + private Integer end; + + public UnicodeDictLiteralBlock() { + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Integer getStart() { + return start; + } + + public void setStart(Integer start) { + this.start = start; + } + + public Integer getEnd() { + return end; + } + + public void setEnd(Integer end) { + this.end = end; + } +} diff --git a/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictTest.java b/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictTest.java new file mode 100644 index 0000000..9548cf0 --- /dev/null +++ b/nx01-mushroom-mais-fc18/src/main/java/love/distributedrebirth/nx01/mushroom/mais/fc18/unicode/UnicodeDictTest.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2004-2014, Willem Cazander + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided + * that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package love.distributedrebirth.nx01.mushroom.mais.fc18.unicode; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.List; + +/** + * Write data files from "unicode.xml" + * + * @author Willem Cazander + * @version 1.0 Jun 01, 2025 + */ +public class UnicodeDictTest { + + static public void main(String[] args) throws Exception { + if (args.length == 0) { + System.err.println("No argument file given"); + System.exit(1); + return; + } + UnicodeDict dict = new UnicodeDict(); + try (InputStream in = new FileInputStream(new File(args[0]))) { + dict.parseXML(in); + } + System.out.println("======================================="); + for (UnicodeDictLiteralBlock block : dict.getBlocks()) { + List charList = dict.getLiterals(block); + if (charList == null) { + continue; + } + System.out.println("Block size=" + charList.size() + " name=" + block.getName()); + //for (UnicodeDictLiteral literal : charList) { + // System.out.println("Literal: " + literal.getId() + " desc=" + literal.getDescription()); + //} + } + } +}