Updated CDC1604DashP6 to support ESC codes and fixed line endings test

This commit is contained in:
Willem Cazander 2024-12-24 14:48:13 +01:00
parent f6926b1724
commit fc0b9343f7
3 changed files with 121 additions and 11 deletions

View file

@ -36,8 +36,8 @@ import java.util.PrimitiveIterator;
* @version 1.0 Dec 22, 2024 * @version 1.0 Dec 22, 2024
*/ */
public enum CDC1604DashP6 { public enum CDC1604DashP6 {
// [NUL][WN][WG][WL] [WS]^?! {[(< }])> // [NUL][WN][WG][WE] [WL][WS]?! {[(< }])>
// ~+-* ,.:; '"` =#$% // ~+-* ,.:; '"`^ =#$%
// |_\/ @ABC DEFG HIJK // |_\/ @ABC DEFG HIJK
// LMNO PQRS TUVW XYZ& // LMNO PQRS TUVW XYZ&
@ -51,9 +51,10 @@ public enum CDC1604DashP6 {
_WORD_NUMBER('\u0001'), _WORD_NUMBER('\u0001'),
/// Word glue to flag for automatic casing or separator in renderer. /// Word glue to flag for automatic casing or separator in renderer.
_WORD_GLUE('\u0002'), _WORD_GLUE('\u0002'),
/// Escape to terminal control code. (note only needed for 6 bit and 8 bit systems)
_WORD_ESC('\u0003'),
_WORD_LINE('\n'), _WORD_LINE('\n'),
_WORD_SPACE(' '), _WORD_SPACE(' '),
_CARET('^'),
_QUESTION_MARK('?'), _QUESTION_MARK('?'),
_EXCLAMATION_MARK('!'), _EXCLAMATION_MARK('!'),
_CURLY_BRACKET_LEFT('{'), _CURLY_BRACKET_LEFT('{'),
@ -76,7 +77,7 @@ public enum CDC1604DashP6 {
_APOSTROPHE('\''), _APOSTROPHE('\''),
_QUOTATION_MARK('\"'), _QUOTATION_MARK('\"'),
_BACKTICK_MARK('`'), _BACKTICK_MARK('`'),
_PRIME_SIGN(''), _CARET('^'),
_EQUALS_SIGN('='), _EQUALS_SIGN('='),
_NUMBER_SIGN('#'), _NUMBER_SIGN('#'),
_DOLLAR_SIGN('$'), _DOLLAR_SIGN('$'),
@ -149,13 +150,18 @@ public enum CDC1604DashP6 {
} }
static public List<CDC1604DashP6> convertFromUnicode(String text, boolean strict) { static public List<CDC1604DashP6> convertFromUnicode(String text, boolean strict) {
PrimitiveIterator.OfInt i = text.codePoints().iterator(); String textSingleNewLines = text.replaceAll("\r\n", "\n");
PrimitiveIterator.OfInt i = textSingleNewLines.codePoints().iterator();
List<CDC1604DashP6> result = new ArrayList<>(text.length()); List<CDC1604DashP6> result = new ArrayList<>(text.length());
CDC1604DashP6[] cdcChars = values(); CDC1604DashP6[] cdcChars = values();
CDC1604DashP6 cdcNumberTerminator = null; CDC1604DashP6 cdcNumberTerminator = null;
while (i.hasNext()) { while (i.hasNext()) {
int codePoint = i.next(); int codePoint = i.next();
boolean found = false; boolean found = false;
if ('\r' == codePoint) {
result.add(CDC1604DashP6._WORD_LINE);
continue;
}
for (CDC1604DashP6 v : cdcChars) { for (CDC1604DashP6 v : cdcChars) {
if (v.codePoint == codePoint) { if (v.codePoint == codePoint) {
result.add(v); result.add(v);
@ -163,12 +169,6 @@ public enum CDC1604DashP6 {
cdcNumberTerminator = null; cdcNumberTerminator = null;
break; break;
} }
if (v.codePointLower == codePoint) {
result.add(v);
found = true;
cdcNumberTerminator = null;
break;
}
if ('0' == codePoint) { if ('0' == codePoint) {
if (!CDC1604DashP6._A.equals(cdcNumberTerminator)) { if (!CDC1604DashP6._A.equals(cdcNumberTerminator)) {
result.add(CDC1604DashP6._WORD_NUMBER); result.add(CDC1604DashP6._WORD_NUMBER);
@ -195,6 +195,12 @@ public enum CDC1604DashP6 {
if (!found && !strict) { if (!found && !strict) {
String codePointStr = Character.toString(codePoint); String codePointStr = Character.toString(codePoint);
for (CDC1604DashP6 v : cdcChars) { for (CDC1604DashP6 v : cdcChars) {
if (v.codePointLower == codePoint) {
result.add(v);
found = true;
cdcNumberTerminator = null;
break;
}
if (v.aliases == null) { if (v.aliases == null) {
continue; continue;
} }

View file

@ -0,0 +1,85 @@
/*
* Copyright (c) 2004-2014, Willem Cazander
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided
* that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of conditions and the
* following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.x4o.o2o;
/**
* Direct Escaped Control of text terminal with CDC1604DashP6 code points.
*
* A 6 or 8 bit computer use the CDC1604DashP6._WORD_ESCAPE to use these 64 control codes.
*
* A 18 or 144 bit computer uses this direct as the second 64 part page. (thus number 64 to 128)
*
* @author Willem Cazander
* @version 1.0 Dec 24, 2024
*/
public enum CDC1604DashP6DEC {
/// Reserved so that 6 and 8 bit systems can used P6._NUL for termination.
_RESERVED0(),
/// Encode 21 bit unicode-1 code point ??
_RESERVED1_UNICODE1(),
_RESERVED2(),
_RESERVED3(),
/// Unicode-4 is 72 bit stroke blocks, which are 12 P6 code points to align.
/// _WORD_ESC + _TEST_UNICODE4 + <stroke-amount> + manyStrokeBlocks
_RESERVED4_UNICODE4(),
_RESERVED5(),
// block line for first 6 P6 code point to keep non-printable chars free on 6 bit computers.
_ESC_TERMINATOR(), // ESC \ 0x9C ST String Terminator
// TODO: Re-impl simple like VT52 see https://en.wikipedia.org/wiki/ANSI_escape_code
// - ESC [
// - any number (including none) of "parameter bytes" in the range 0x300x3F (ASCII 09:;<=>?),
// - any number of "intermediate bytes" in the range 0x200x2F (ASCII space and !"#$%&'()*+,-./),
// - finally by a single "final byte" in the range 0x400x7E (ASCII @AZ[\]^_`az{|}~)
/// Moves cursor up. (test)
/// On 8 bit ansi system this takes ESC+[+1+A = 4 bytes/32bit to move 1 up
/// with terminator codes;
/// On 6 bit P6DEC system with takes _ESC+_CSI_CURSOR_UP+_WM+_I+_A+_ESC+_ESC_TERMINATOR = 7*P6 or 42 bits
/// On 18 bit P6DEC system with takes _CSI_CURSOR_UP+_WM+_I+_A+_ESC_TERMINATOR = 5*18 or 90 bits
/// with unsigned native packet integer; (but limits value)
/// On 6 bit P6DEC system with takes _ESC+_CSI_CURSOR_UP_NP+0x1+_ESC+_ESC_TERMINATOR = 5*P6 or 30 bits
/// On 18 bit P6DEC system with takes _CSI_CURSOR_UP_NP+0x1+_ESC_TERMINATOR = 3*18 or 54 bits
/// TODO: Think of better encoding system
_CSI_CURSOR_UP('A'),
_CSI_CURSOR_UP_NP()
;
private final Character csiPostfix;
private CDC1604DashP6DEC() {
this(null);
}
private CDC1604DashP6DEC(Character csiPostfix) {
this.csiPostfix = csiPostfix;
}
public Character csiPostFix() {
return csiPostfix;
}
}

View file

@ -113,4 +113,23 @@ public class CDC1604DashP6Test {
String out = CDC1604DashP6.convertToUnicode(cdc); String out = CDC1604DashP6.convertToUnicode(cdc);
Assertions.assertEquals("B3J", out); Assertions.assertEquals("B3J", out);
} }
@Test
public void testLineEndings() throws Exception {
List<CDC1604DashP6> cdc = CDC1604DashP6.convertFromUnicode("A\nB\rC\r\nD\n");
Assertions.assertNotNull(cdc);
Assertions.assertFalse(cdc.isEmpty());
Assertions.assertEquals(8, cdc.size());
Assertions.assertEquals(CDC1604DashP6._A, cdc.get(0));
Assertions.assertEquals(CDC1604DashP6._WORD_LINE, cdc.get(1));
Assertions.assertEquals(CDC1604DashP6._B, cdc.get(2));
Assertions.assertEquals(CDC1604DashP6._WORD_LINE, cdc.get(3));
Assertions.assertEquals(CDC1604DashP6._C, cdc.get(4));
Assertions.assertEquals(CDC1604DashP6._WORD_LINE, cdc.get(5));
Assertions.assertEquals(CDC1604DashP6._D, cdc.get(6));
Assertions.assertEquals(CDC1604DashP6._WORD_LINE, cdc.get(7));
String out = CDC1604DashP6.convertToUnicode(cdc);
Assertions.assertEquals("A\nB\nC\nD\n", out);
}
} }