diff --git a/nx01-x4o-fc18/src/main/java/org/x4o/fc18/FourCornerUnicodeImport.java b/nx01-x4o-fc18/src/main/java/org/x4o/fc18/FourCornerUnicodeImport.java index f546ef6..eef8e2a 100644 --- a/nx01-x4o-fc18/src/main/java/org/x4o/fc18/FourCornerUnicodeImport.java +++ b/nx01-x4o-fc18/src/main/java/org/x4o/fc18/FourCornerUnicodeImport.java @@ -24,6 +24,7 @@ package org.x4o.fc18; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.PrimitiveIterator; import java.util.Set; import java.util.stream.Collectors; @@ -46,6 +47,7 @@ import org.x4o.fc18.cake2.zero33.FCDotDEC2701DashPX0; public class FourCornerUnicodeImport { private boolean convertDiacritics = false; + private boolean convertWhitespaces = false; private boolean failOnMissing = false; private List missingCharIndicator = null; private static final Set WHITE_SPACES = Set.of( @@ -53,18 +55,19 @@ public class FourCornerUnicodeImport { '\u2005','\u2004','\u2003','\u2002','\u2001','\u2000','\u00A0','\u0020') .stream().map(v -> (int)v.charValue()).collect(Collectors.toUnmodifiableSet()); - public FourCornerUnicodeImport(boolean convertDiacritics, boolean failOnMissing, List missingCharIndicator) { + public FourCornerUnicodeImport(boolean convertWhitespaces, boolean convertDiacritics, boolean failOnMissing, List missingCharIndicator) { + this.convertWhitespaces = convertWhitespaces; this.convertDiacritics = convertDiacritics; this.failOnMissing = failOnMissing; this.missingCharIndicator = missingCharIndicator; } static public FourCornerUnicodeImport strict() { - return new FourCornerUnicodeImport(false, true, null); + return new FourCornerUnicodeImport(false, false, true, null); } static public FourCornerUnicodeImport lossy() { - return new FourCornerUnicodeImport(true, false, List.of(FCDotCDC1604DashP6._RAKA_QUESTION)); // TODO: change + return new FourCornerUnicodeImport(true, true, false, List.of(FCDotCDC1604DashP6._RAKA_QUESTION)); // TODO: change } public List convertToInt18(String text) { @@ -147,6 +150,15 @@ public class FourCornerUnicodeImport { } private boolean handleDataGramWordSpacerTypes(ImportState ctx) { + if (' ' == ctx.codePoint) { + ctx.reset(); + ctx.output.add(FCDotDEC2701DashPX0.ESC_DEC0801_E10); + ctx.output.add(FCDotDEC0801DashE10.E10_UWU0101_S1); + return true; + } + if (convertWhitespaces == false) { + return false; + } if (WHITE_SPACES.contains(ctx.codePoint)) { ctx.reset(); ctx.output.add(FCDotDEC2701DashPX0.ESC_DEC0801_E10); @@ -156,33 +168,75 @@ public class FourCornerUnicodeImport { return false; } + private void sendTypeWriterNewLine(ImportState ctx) { + ctx.reset(); + ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19); + ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL); + } + private boolean handleTypeWriterStructureTypes(ImportState ctx) { + while (true) { + Optional result = handleTypeWriterStructureTypesLoop(ctx); + if (result.isEmpty()) { + return true; // EOF continue; + } + if (result.get().booleanValue()) { + continue; + } + return false; + } + } + + private Optional handleTypeWriterStructureTypesLoop(ImportState ctx) { + // Convert Amiga and Xenix line endings if ('\n' == ctx.codePoint) { - ctx.reset(); - ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19); - ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL); - return true; - } - // support import from z/OS as this is "the EBCDIC New line" - if ('\u0085' == ctx.codePoint) { - ctx.reset(); - ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19); - ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL); - return true; - } - if ('\r' == ctx.codePoint) { - ctx.reset(); - ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19); - ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL); + sendTypeWriterNewLine(ctx); + // Convert Acorn BBC and RISC OS line endings if (!ctx.input.hasNext()) { - return true; // = no next is continue to next, to exit while above + return Optional.empty(); } ctx.codePoint = ctx.input.nextInt(); - if ('\n' == ctx.codePoint) { - return true; // eat \n to have one newline + if ('\r' == ctx.codePoint) { + if (!ctx.input.hasNext()) { + return Optional.empty(); + } + ctx.codePoint = ctx.input.nextInt(); // eat \r to have one newline } + return Optional.of(true); // check more endings } - return false; + // Convert escaped new lines (and eats escapes too) + if ('\u001B' == ctx.codePoint) { + if (!ctx.input.hasNext()) { + return Optional.empty(); + } + ctx.codePoint = ctx.input.nextInt(); + // Convert z/OS and OS/400 escaped converted from a real "the EBCDIC New line" + if ('\u0085' == ctx.codePoint) { + sendTypeWriterNewLine(ctx); + if (!ctx.input.hasNext()) { + return Optional.empty(); + } + ctx.codePoint = ctx.input.nextInt(); + } + return Optional.of(true); // check more endings + } + // Convert old Apple new lines + if ('\r' == ctx.codePoint) { + sendTypeWriterNewLine(ctx); + if (!ctx.input.hasNext()) { + return Optional.empty(); + } + // Convert MSX and Atari TOS new lines + ctx.codePoint = ctx.input.nextInt(); + if ('\n' == ctx.codePoint) { + if (!ctx.input.hasNext()) { + return Optional.empty(); + } + ctx.codePoint = ctx.input.nextInt(); // eat \n to have one newline + } + return Optional.of(true); // check more endings + } + return Optional.of(false); // no match continue } private boolean handleDictionary(ImportState ctx) { diff --git a/nx01-x4o-fc18/src/test/java/org/x4o/fc18/FourCornerUnicodeImportTest.java b/nx01-x4o-fc18/src/test/java/org/x4o/fc18/FourCornerUnicodeImportTest.java index cd4bb94..12de17e 100644 --- a/nx01-x4o-fc18/src/test/java/org/x4o/fc18/FourCornerUnicodeImportTest.java +++ b/nx01-x4o-fc18/src/test/java/org/x4o/fc18/FourCornerUnicodeImportTest.java @@ -248,12 +248,12 @@ public class FourCornerUnicodeImportTest { @Test public void testLineEndings() throws Exception { - List cdc = FourCornerUnicodeImport.strict().convertToX06("A\nB\rC\r\nD\nE\u0085"); + List cdc = FourCornerUnicodeImport.lossy().convertToX06("A\nB\rC\r\nD\nE\u001B\u0085F\n\r\n\r\r\n"); Iterator cdi = cdc.iterator(); Assertions.assertNotNull(cdc); Assertions.assertTrue(cdi.hasNext()); Assertions.assertFalse(cdc.isEmpty()); - Assertions.assertEquals(15, cdc.size()); + Assertions.assertEquals(22, cdc.size()); Assertions.assertEquals(FCDotCDC1604DashP6.NX01_A, cdi.next()); Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); @@ -269,8 +269,15 @@ public class FourCornerUnicodeImportTest { Assertions.assertEquals(FCDotCDC1604DashP6.NX05_E, cdi.next()); Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); + Assertions.assertEquals(FCDotCDC1604DashP6.NX06_F, cdi.next()); + Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); + Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); + Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); + Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); + Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); + Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); String out = FourCornerUnicodeDisplay.text().renderFromX06(cdc); - Assertions.assertEquals("A\nB\nC\nD\nE\n", out); + Assertions.assertEquals("A\nB\nC\nD\nE\nF\n\n\n", out); } }