From 980af7d13645e73b57404b814294911c61af5d98 Mon Sep 17 00:00:00 2001 From: Willem Date: Thu, 23 Jan 2025 20:06:35 +0100 Subject: [PATCH] Added all white space and EBCDIC support in unicode importer --- .../org/x4o/fc18/FourCornerUnicodeImport.java | 15 +++++++- .../x4o/fc18/FourCornerUnicodeImportTest.java | 36 +++++++++++-------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/nx01-x4o-fc18/src/main/java/org/x4o/fc18/FourCornerUnicodeImport.java b/nx01-x4o-fc18/src/main/java/org/x4o/fc18/FourCornerUnicodeImport.java index 02d43a3..f546ef6 100644 --- a/nx01-x4o-fc18/src/main/java/org/x4o/fc18/FourCornerUnicodeImport.java +++ b/nx01-x4o-fc18/src/main/java/org/x4o/fc18/FourCornerUnicodeImport.java @@ -25,6 +25,8 @@ package org.x4o.fc18; import java.util.ArrayList; import java.util.List; import java.util.PrimitiveIterator; +import java.util.Set; +import java.util.stream.Collectors; import org.x4o.fc18.cake2.FourCornerX00PetitVide; import org.x4o.fc18.cake2.FourCornerX06BaklavaPointSequence; @@ -46,6 +48,10 @@ public class FourCornerUnicodeImport { private boolean convertDiacritics = false; private boolean failOnMissing = false; private List missingCharIndicator = null; + private static final Set WHITE_SPACES = Set.of( + '\u3000','\u205F','\u202F','\u200A','\u2009','\u2008','\u2007','\u2006', + '\u2005','\u2004','\u2003','\u2002','\u2001','\u2000','\u00A0','\u0020') + .stream().map(v -> (int)v.charValue()).collect(Collectors.toUnmodifiableSet()); public FourCornerUnicodeImport(boolean convertDiacritics, boolean failOnMissing, List missingCharIndicator) { this.convertDiacritics = convertDiacritics; @@ -141,7 +147,7 @@ public class FourCornerUnicodeImport { } private boolean handleDataGramWordSpacerTypes(ImportState ctx) { - if (' ' == ctx.codePoint) { + if (WHITE_SPACES.contains(ctx.codePoint)) { ctx.reset(); ctx.output.add(FCDotDEC2701DashPX0.ESC_DEC0801_E10); ctx.output.add(FCDotDEC0801DashE10.E10_UWU0101_S1); @@ -157,6 +163,13 @@ public class FourCornerUnicodeImport { ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL); return true; } + // support import from z/OS as this is "the EBCDIC New line" + if ('\u0085' == ctx.codePoint) { + ctx.reset(); + ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19); + ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL); + return true; + } if ('\r' == ctx.codePoint) { ctx.reset(); ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19); diff --git a/nx01-x4o-fc18/src/test/java/org/x4o/fc18/FourCornerUnicodeImportTest.java b/nx01-x4o-fc18/src/test/java/org/x4o/fc18/FourCornerUnicodeImportTest.java index 4234233..cd4bb94 100644 --- a/nx01-x4o-fc18/src/test/java/org/x4o/fc18/FourCornerUnicodeImportTest.java +++ b/nx01-x4o-fc18/src/test/java/org/x4o/fc18/FourCornerUnicodeImportTest.java @@ -22,6 +22,7 @@ */ package org.x4o.fc18; +import java.util.Iterator; import java.util.List; import org.junit.jupiter.api.Assertions; @@ -247,24 +248,29 @@ public class FourCornerUnicodeImportTest { @Test public void testLineEndings() throws Exception { - List cdc = FourCornerUnicodeImport.strict().convertToX06("A\nB\rC\r\nD\n"); + List cdc = FourCornerUnicodeImport.strict().convertToX06("A\nB\rC\r\nD\nE\u0085"); + Iterator cdi = cdc.iterator(); Assertions.assertNotNull(cdc); + Assertions.assertTrue(cdi.hasNext()); Assertions.assertFalse(cdc.isEmpty()); - Assertions.assertEquals(12, cdc.size()); - Assertions.assertEquals(FCDotCDC1604DashP6.NX01_A, cdc.get(0)); - Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdc.get(1)); - Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdc.get(2)); - Assertions.assertEquals(FCDotCDC1604DashP6.NX02_B, cdc.get(3)); - Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdc.get(4)); - Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdc.get(5)); - Assertions.assertEquals(FCDotCDC1604DashP6.NX03_C, cdc.get(6)); - Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdc.get(7)); - Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdc.get(8)); - Assertions.assertEquals(FCDotCDC1604DashP6.NX04_D, cdc.get(9)); - Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdc.get(10)); - Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdc.get(11)); + Assertions.assertEquals(15, cdc.size()); + Assertions.assertEquals(FCDotCDC1604DashP6.NX01_A, cdi.next()); + Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); + Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); + Assertions.assertEquals(FCDotCDC1604DashP6.NX02_B, cdi.next()); + Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); + Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); + Assertions.assertEquals(FCDotCDC1604DashP6.NX03_C, cdi.next()); + Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); + Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); + Assertions.assertEquals(FCDotCDC1604DashP6.NX04_D, cdi.next()); + Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); + Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); + Assertions.assertEquals(FCDotCDC1604DashP6.NX05_E, cdi.next()); + Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next()); + Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next()); String out = FourCornerUnicodeDisplay.text().renderFromX06(cdc); - Assertions.assertEquals("A\nB\nC\nD\n", out); + Assertions.assertEquals("A\nB\nC\nD\nE\n", out); } }