Added all white space and EBCDIC support in unicode importer

This commit is contained in:
Willem Cazander 2025-01-23 20:06:35 +01:00
parent 27309073f5
commit 980af7d136
2 changed files with 35 additions and 16 deletions

View file

@ -25,6 +25,8 @@ package org.x4o.fc18;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.PrimitiveIterator; import java.util.PrimitiveIterator;
import java.util.Set;
import java.util.stream.Collectors;
import org.x4o.fc18.cake2.FourCornerX00PetitVide; import org.x4o.fc18.cake2.FourCornerX00PetitVide;
import org.x4o.fc18.cake2.FourCornerX06BaklavaPointSequence; import org.x4o.fc18.cake2.FourCornerX06BaklavaPointSequence;
@ -46,6 +48,10 @@ public class FourCornerUnicodeImport {
private boolean convertDiacritics = false; private boolean convertDiacritics = false;
private boolean failOnMissing = false; private boolean failOnMissing = false;
private List<FCDotCDC1604DashP6> missingCharIndicator = null; private List<FCDotCDC1604DashP6> missingCharIndicator = null;
private static final Set<Integer> WHITE_SPACES = Set.of(
'\u3000','\u205F','\u202F','\u200A','\u2009','\u2008','\u2007','\u2006',
'\u2005','\u2004','\u2003','\u2002','\u2001','\u2000','\u00A0','\u0020')
.stream().map(v -> (int)v.charValue()).collect(Collectors.toUnmodifiableSet());
public FourCornerUnicodeImport(boolean convertDiacritics, boolean failOnMissing, List<FCDotCDC1604DashP6> missingCharIndicator) { public FourCornerUnicodeImport(boolean convertDiacritics, boolean failOnMissing, List<FCDotCDC1604DashP6> missingCharIndicator) {
this.convertDiacritics = convertDiacritics; this.convertDiacritics = convertDiacritics;
@ -141,7 +147,7 @@ public class FourCornerUnicodeImport {
} }
private boolean handleDataGramWordSpacerTypes(ImportState ctx) { private boolean handleDataGramWordSpacerTypes(ImportState ctx) {
if (' ' == ctx.codePoint) { if (WHITE_SPACES.contains(ctx.codePoint)) {
ctx.reset(); ctx.reset();
ctx.output.add(FCDotDEC2701DashPX0.ESC_DEC0801_E10); ctx.output.add(FCDotDEC2701DashPX0.ESC_DEC0801_E10);
ctx.output.add(FCDotDEC0801DashE10.E10_UWU0101_S1); ctx.output.add(FCDotDEC0801DashE10.E10_UWU0101_S1);
@ -157,6 +163,13 @@ public class FourCornerUnicodeImport {
ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL); ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL);
return true; return true;
} }
// support import from z/OS as this is "the EBCDIC New line"
if ('\u0085' == ctx.codePoint) {
ctx.reset();
ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19);
ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL);
return true;
}
if ('\r' == ctx.codePoint) { if ('\r' == ctx.codePoint) {
ctx.reset(); ctx.reset();
ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19); ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19);

View file

@ -22,6 +22,7 @@
*/ */
package org.x4o.fc18; package org.x4o.fc18;
import java.util.Iterator;
import java.util.List; import java.util.List;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
@ -247,24 +248,29 @@ public class FourCornerUnicodeImportTest {
@Test @Test
public void testLineEndings() throws Exception { public void testLineEndings() throws Exception {
List<FourCornerX06BaklavaPointSequence> cdc = FourCornerUnicodeImport.strict().convertToX06("A\nB\rC\r\nD\n"); List<FourCornerX06BaklavaPointSequence> cdc = FourCornerUnicodeImport.strict().convertToX06("A\nB\rC\r\nD\nE\u0085");
Iterator<FourCornerX06BaklavaPointSequence> cdi = cdc.iterator();
Assertions.assertNotNull(cdc); Assertions.assertNotNull(cdc);
Assertions.assertTrue(cdi.hasNext());
Assertions.assertFalse(cdc.isEmpty()); Assertions.assertFalse(cdc.isEmpty());
Assertions.assertEquals(12, cdc.size()); Assertions.assertEquals(15, cdc.size());
Assertions.assertEquals(FCDotCDC1604DashP6.NX01_A, cdc.get(0)); Assertions.assertEquals(FCDotCDC1604DashP6.NX01_A, cdi.next());
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdc.get(1)); Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdc.get(2)); Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
Assertions.assertEquals(FCDotCDC1604DashP6.NX02_B, cdc.get(3)); Assertions.assertEquals(FCDotCDC1604DashP6.NX02_B, cdi.next());
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdc.get(4)); Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdc.get(5)); Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
Assertions.assertEquals(FCDotCDC1604DashP6.NX03_C, cdc.get(6)); Assertions.assertEquals(FCDotCDC1604DashP6.NX03_C, cdi.next());
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdc.get(7)); Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdc.get(8)); Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
Assertions.assertEquals(FCDotCDC1604DashP6.NX04_D, cdc.get(9)); Assertions.assertEquals(FCDotCDC1604DashP6.NX04_D, cdi.next());
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdc.get(10)); Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdc.get(11)); Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
Assertions.assertEquals(FCDotCDC1604DashP6.NX05_E, cdi.next());
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
String out = FourCornerUnicodeDisplay.text().renderFromX06(cdc); String out = FourCornerUnicodeDisplay.text().renderFromX06(cdc);
Assertions.assertEquals("A\nB\nC\nD\n", out); Assertions.assertEquals("A\nB\nC\nD\nE\n", out);
} }
} }