From fc0b9343f72e91162cd570393213edf7629f4a28 Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 24 Dec 2024 14:48:13 +0100 Subject: [PATCH] Updated CDC1604DashP6 to support ESC codes and fixed line endings test --- .../main/java/org/x4o/o2o/CDC1604DashP6.java | 28 +++--- .../java/org/x4o/o2o/CDC1604DashP6DEC.java | 85 +++++++++++++++++++ .../java/org/x4o/o2o/CDC1604DashP6Test.java | 19 +++++ 3 files changed, 121 insertions(+), 11 deletions(-) create mode 100644 nx01-x4o-o2o/src/main/java/org/x4o/o2o/CDC1604DashP6DEC.java diff --git a/nx01-x4o-o2o/src/main/java/org/x4o/o2o/CDC1604DashP6.java b/nx01-x4o-o2o/src/main/java/org/x4o/o2o/CDC1604DashP6.java index e6895be..77b0d74 100644 --- a/nx01-x4o-o2o/src/main/java/org/x4o/o2o/CDC1604DashP6.java +++ b/nx01-x4o-o2o/src/main/java/org/x4o/o2o/CDC1604DashP6.java @@ -36,8 +36,8 @@ import java.util.PrimitiveIterator; * @version 1.0 Dec 22, 2024 */ public enum CDC1604DashP6 { - // [NUL][WN][WG][WL] [WS]^?! {[(< }])> - // ~+-* ,.:; '"`′ =#$% + // [NUL][WN][WG][WE] [WL][WS]?! {[(< }])> + // ~+-* ,.:; '"`^ =#$% // |_\/ @ABC DEFG HIJK // LMNO PQRS TUVW XYZ& @@ -51,9 +51,10 @@ public enum CDC1604DashP6 { _WORD_NUMBER('\u0001'), /// Word glue to flag for automatic casing or separator in renderer. _WORD_GLUE('\u0002'), + /// Escape to terminal control code. (note only needed for 6 bit and 8 bit systems) + _WORD_ESC('\u0003'), _WORD_LINE('\n'), _WORD_SPACE(' '), - _CARET('^'), _QUESTION_MARK('?'), _EXCLAMATION_MARK('!'), _CURLY_BRACKET_LEFT('{'), @@ -76,7 +77,7 @@ public enum CDC1604DashP6 { _APOSTROPHE('\''), _QUOTATION_MARK('\"'), _BACKTICK_MARK('`'), - _PRIME_SIGN('′'), + _CARET('^'), _EQUALS_SIGN('='), _NUMBER_SIGN('#'), _DOLLAR_SIGN('$'), @@ -149,13 +150,18 @@ public enum CDC1604DashP6 { } static public List convertFromUnicode(String text, boolean strict) { - PrimitiveIterator.OfInt i = text.codePoints().iterator(); + String textSingleNewLines = text.replaceAll("\r\n", "\n"); + PrimitiveIterator.OfInt i = textSingleNewLines.codePoints().iterator(); List result = new ArrayList<>(text.length()); CDC1604DashP6[] cdcChars = values(); CDC1604DashP6 cdcNumberTerminator = null; while (i.hasNext()) { int codePoint = i.next(); boolean found = false; + if ('\r' == codePoint) { + result.add(CDC1604DashP6._WORD_LINE); + continue; + } for (CDC1604DashP6 v : cdcChars) { if (v.codePoint == codePoint) { result.add(v); @@ -163,12 +169,6 @@ public enum CDC1604DashP6 { cdcNumberTerminator = null; break; } - if (v.codePointLower == codePoint) { - result.add(v); - found = true; - cdcNumberTerminator = null; - break; - } if ('0' == codePoint) { if (!CDC1604DashP6._A.equals(cdcNumberTerminator)) { result.add(CDC1604DashP6._WORD_NUMBER); @@ -195,6 +195,12 @@ public enum CDC1604DashP6 { if (!found && !strict) { String codePointStr = Character.toString(codePoint); for (CDC1604DashP6 v : cdcChars) { + if (v.codePointLower == codePoint) { + result.add(v); + found = true; + cdcNumberTerminator = null; + break; + } if (v.aliases == null) { continue; } diff --git a/nx01-x4o-o2o/src/main/java/org/x4o/o2o/CDC1604DashP6DEC.java b/nx01-x4o-o2o/src/main/java/org/x4o/o2o/CDC1604DashP6DEC.java new file mode 100644 index 0000000..c054e12 --- /dev/null +++ b/nx01-x4o-o2o/src/main/java/org/x4o/o2o/CDC1604DashP6DEC.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2004-2014, Willem Cazander + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted provided + * that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.x4o.o2o; + +/** + * Direct Escaped Control of text terminal with CDC1604DashP6 code points. + * + * A 6 or 8 bit computer use the CDC1604DashP6._WORD_ESCAPE to use these 64 control codes. + * + * A 18 or 144 bit computer uses this direct as the second 64 part page. (thus number 64 to 128) + * + * @author Willem Cazander + * @version 1.0 Dec 24, 2024 + */ +public enum CDC1604DashP6DEC { + + + /// Reserved so that 6 and 8 bit systems can used P6._NUL for termination. + _RESERVED0(), + /// Encode 21 bit unicode-1 code point ?? + _RESERVED1_UNICODE1(), + _RESERVED2(), + _RESERVED3(), + /// Unicode-4 is 72 bit stroke blocks, which are 12 P6 code points to align. + /// _WORD_ESC + _TEST_UNICODE4 + + manyStrokeBlocks + _RESERVED4_UNICODE4(), + _RESERVED5(), + // block line for first 6 P6 code point to keep non-printable chars free on 6 bit computers. + + _ESC_TERMINATOR(), // ESC \ 0x9C ST String Terminator + + // TODO: Re-impl simple like VT52 see https://en.wikipedia.org/wiki/ANSI_escape_code + // - ESC [ + // - any number (including none) of "parameter bytes" in the range 0x30–0x3F (ASCII 0–9:;<=>?), + // - any number of "intermediate bytes" in the range 0x20–0x2F (ASCII space and !"#$%&'()*+,-./), + // - finally by a single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~) + + + /// Moves cursor up. (test) + /// On 8 bit ansi system this takes ESC+[+1+A = 4 bytes/32bit to move 1 up + /// with terminator codes; + /// On 6 bit P6DEC system with takes _ESC+_CSI_CURSOR_UP+_WM+_I+_A+_ESC+_ESC_TERMINATOR = 7*P6 or 42 bits + /// On 18 bit P6DEC system with takes _CSI_CURSOR_UP+_WM+_I+_A+_ESC_TERMINATOR = 5*18 or 90 bits + /// with unsigned native packet integer; (but limits value) + /// On 6 bit P6DEC system with takes _ESC+_CSI_CURSOR_UP_NP+0x1+_ESC+_ESC_TERMINATOR = 5*P6 or 30 bits + /// On 18 bit P6DEC system with takes _CSI_CURSOR_UP_NP+0x1+_ESC_TERMINATOR = 3*18 or 54 bits + /// TODO: Think of better encoding system + _CSI_CURSOR_UP('A'), + _CSI_CURSOR_UP_NP() + ; + + private final Character csiPostfix; + + private CDC1604DashP6DEC() { + this(null); + } + + private CDC1604DashP6DEC(Character csiPostfix) { + this.csiPostfix = csiPostfix; + } + + public Character csiPostFix() { + return csiPostfix; + } +} diff --git a/nx01-x4o-o2o/src/test/java/org/x4o/o2o/CDC1604DashP6Test.java b/nx01-x4o-o2o/src/test/java/org/x4o/o2o/CDC1604DashP6Test.java index dad45b6..901a18b 100644 --- a/nx01-x4o-o2o/src/test/java/org/x4o/o2o/CDC1604DashP6Test.java +++ b/nx01-x4o-o2o/src/test/java/org/x4o/o2o/CDC1604DashP6Test.java @@ -113,4 +113,23 @@ public class CDC1604DashP6Test { String out = CDC1604DashP6.convertToUnicode(cdc); Assertions.assertEquals("B3J", out); } + + @Test + public void testLineEndings() throws Exception { + List cdc = CDC1604DashP6.convertFromUnicode("A\nB\rC\r\nD\n"); + Assertions.assertNotNull(cdc); + Assertions.assertFalse(cdc.isEmpty()); + Assertions.assertEquals(8, cdc.size()); + Assertions.assertEquals(CDC1604DashP6._A, cdc.get(0)); + Assertions.assertEquals(CDC1604DashP6._WORD_LINE, cdc.get(1)); + Assertions.assertEquals(CDC1604DashP6._B, cdc.get(2)); + Assertions.assertEquals(CDC1604DashP6._WORD_LINE, cdc.get(3)); + Assertions.assertEquals(CDC1604DashP6._C, cdc.get(4)); + Assertions.assertEquals(CDC1604DashP6._WORD_LINE, cdc.get(5)); + Assertions.assertEquals(CDC1604DashP6._D, cdc.get(6)); + Assertions.assertEquals(CDC1604DashP6._WORD_LINE, cdc.get(7)); + + String out = CDC1604DashP6.convertToUnicode(cdc); + Assertions.assertEquals("A\nB\nC\nD\n", out); + } }