More line ending loop reading code
This commit is contained in:
parent
122efdc496
commit
f630b919f2
|
@ -24,6 +24,7 @@ package org.x4o.fc18;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.PrimitiveIterator;
|
import java.util.PrimitiveIterator;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -46,6 +47,7 @@ import org.x4o.fc18.cake2.zero33.FCDotDEC2701DashPX0;
|
||||||
public class FourCornerUnicodeImport {
|
public class FourCornerUnicodeImport {
|
||||||
|
|
||||||
private boolean convertDiacritics = false;
|
private boolean convertDiacritics = false;
|
||||||
|
private boolean convertWhitespaces = false;
|
||||||
private boolean failOnMissing = false;
|
private boolean failOnMissing = false;
|
||||||
private List<FCDotCDC1604DashP6> missingCharIndicator = null;
|
private List<FCDotCDC1604DashP6> missingCharIndicator = null;
|
||||||
private static final Set<Integer> WHITE_SPACES = Set.of(
|
private static final Set<Integer> WHITE_SPACES = Set.of(
|
||||||
|
@ -53,18 +55,19 @@ public class FourCornerUnicodeImport {
|
||||||
'\u2005','\u2004','\u2003','\u2002','\u2001','\u2000','\u00A0','\u0020')
|
'\u2005','\u2004','\u2003','\u2002','\u2001','\u2000','\u00A0','\u0020')
|
||||||
.stream().map(v -> (int)v.charValue()).collect(Collectors.toUnmodifiableSet());
|
.stream().map(v -> (int)v.charValue()).collect(Collectors.toUnmodifiableSet());
|
||||||
|
|
||||||
public FourCornerUnicodeImport(boolean convertDiacritics, boolean failOnMissing, List<FCDotCDC1604DashP6> missingCharIndicator) {
|
public FourCornerUnicodeImport(boolean convertWhitespaces, boolean convertDiacritics, boolean failOnMissing, List<FCDotCDC1604DashP6> missingCharIndicator) {
|
||||||
|
this.convertWhitespaces = convertWhitespaces;
|
||||||
this.convertDiacritics = convertDiacritics;
|
this.convertDiacritics = convertDiacritics;
|
||||||
this.failOnMissing = failOnMissing;
|
this.failOnMissing = failOnMissing;
|
||||||
this.missingCharIndicator = missingCharIndicator;
|
this.missingCharIndicator = missingCharIndicator;
|
||||||
}
|
}
|
||||||
|
|
||||||
static public FourCornerUnicodeImport strict() {
|
static public FourCornerUnicodeImport strict() {
|
||||||
return new FourCornerUnicodeImport(false, true, null);
|
return new FourCornerUnicodeImport(false, false, true, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
static public FourCornerUnicodeImport lossy() {
|
static public FourCornerUnicodeImport lossy() {
|
||||||
return new FourCornerUnicodeImport(true, false, List.of(FCDotCDC1604DashP6._RAKA_QUESTION)); // TODO: change
|
return new FourCornerUnicodeImport(true, true, false, List.of(FCDotCDC1604DashP6._RAKA_QUESTION)); // TODO: change
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Integer> convertToInt18(String text) {
|
public List<Integer> convertToInt18(String text) {
|
||||||
|
@ -147,6 +150,15 @@ public class FourCornerUnicodeImport {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean handleDataGramWordSpacerTypes(ImportState ctx) {
|
private boolean handleDataGramWordSpacerTypes(ImportState ctx) {
|
||||||
|
if (' ' == ctx.codePoint) {
|
||||||
|
ctx.reset();
|
||||||
|
ctx.output.add(FCDotDEC2701DashPX0.ESC_DEC0801_E10);
|
||||||
|
ctx.output.add(FCDotDEC0801DashE10.E10_UWU0101_S1);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (convertWhitespaces == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (WHITE_SPACES.contains(ctx.codePoint)) {
|
if (WHITE_SPACES.contains(ctx.codePoint)) {
|
||||||
ctx.reset();
|
ctx.reset();
|
||||||
ctx.output.add(FCDotDEC2701DashPX0.ESC_DEC0801_E10);
|
ctx.output.add(FCDotDEC2701DashPX0.ESC_DEC0801_E10);
|
||||||
|
@ -156,33 +168,75 @@ public class FourCornerUnicodeImport {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void sendTypeWriterNewLine(ImportState ctx) {
|
||||||
|
ctx.reset();
|
||||||
|
ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19);
|
||||||
|
ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL);
|
||||||
|
}
|
||||||
|
|
||||||
private boolean handleTypeWriterStructureTypes(ImportState ctx) {
|
private boolean handleTypeWriterStructureTypes(ImportState ctx) {
|
||||||
|
while (true) {
|
||||||
|
Optional<Boolean> result = handleTypeWriterStructureTypesLoop(ctx);
|
||||||
|
if (result.isEmpty()) {
|
||||||
|
return true; // EOF continue;
|
||||||
|
}
|
||||||
|
if (result.get().booleanValue()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Optional<Boolean> handleTypeWriterStructureTypesLoop(ImportState ctx) {
|
||||||
|
// Convert Amiga and Xenix line endings
|
||||||
if ('\n' == ctx.codePoint) {
|
if ('\n' == ctx.codePoint) {
|
||||||
ctx.reset();
|
sendTypeWriterNewLine(ctx);
|
||||||
ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19);
|
// Convert Acorn BBC and RISC OS line endings
|
||||||
ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// support import from z/OS as this is "the EBCDIC New line"
|
|
||||||
if ('\u0085' == ctx.codePoint) {
|
|
||||||
ctx.reset();
|
|
||||||
ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19);
|
|
||||||
ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if ('\r' == ctx.codePoint) {
|
|
||||||
ctx.reset();
|
|
||||||
ctx.output.add(FCDotDEC2701DashPX0.ESC68_DEC0801_E19);
|
|
||||||
ctx.output.add(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL);
|
|
||||||
if (!ctx.input.hasNext()) {
|
if (!ctx.input.hasNext()) {
|
||||||
return true; // = no next is continue to next, to exit while above
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
ctx.codePoint = ctx.input.nextInt();
|
ctx.codePoint = ctx.input.nextInt();
|
||||||
if ('\n' == ctx.codePoint) {
|
if ('\r' == ctx.codePoint) {
|
||||||
return true; // eat \n to have one newline
|
if (!ctx.input.hasNext()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
ctx.codePoint = ctx.input.nextInt(); // eat \r to have one newline
|
||||||
}
|
}
|
||||||
|
return Optional.of(true); // check more endings
|
||||||
}
|
}
|
||||||
return false;
|
// Convert escaped new lines (and eats escapes too)
|
||||||
|
if ('\u001B' == ctx.codePoint) {
|
||||||
|
if (!ctx.input.hasNext()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
ctx.codePoint = ctx.input.nextInt();
|
||||||
|
// Convert z/OS and OS/400 escaped converted from a real "the EBCDIC New line"
|
||||||
|
if ('\u0085' == ctx.codePoint) {
|
||||||
|
sendTypeWriterNewLine(ctx);
|
||||||
|
if (!ctx.input.hasNext()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
ctx.codePoint = ctx.input.nextInt();
|
||||||
|
}
|
||||||
|
return Optional.of(true); // check more endings
|
||||||
|
}
|
||||||
|
// Convert old Apple new lines
|
||||||
|
if ('\r' == ctx.codePoint) {
|
||||||
|
sendTypeWriterNewLine(ctx);
|
||||||
|
if (!ctx.input.hasNext()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
// Convert MSX and Atari TOS new lines
|
||||||
|
ctx.codePoint = ctx.input.nextInt();
|
||||||
|
if ('\n' == ctx.codePoint) {
|
||||||
|
if (!ctx.input.hasNext()) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
ctx.codePoint = ctx.input.nextInt(); // eat \n to have one newline
|
||||||
|
}
|
||||||
|
return Optional.of(true); // check more endings
|
||||||
|
}
|
||||||
|
return Optional.of(false); // no match continue
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean handleDictionary(ImportState ctx) {
|
private boolean handleDictionary(ImportState ctx) {
|
||||||
|
|
|
@ -248,12 +248,12 @@ public class FourCornerUnicodeImportTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLineEndings() throws Exception {
|
public void testLineEndings() throws Exception {
|
||||||
List<FourCornerX06BaklavaPointSequence> cdc = FourCornerUnicodeImport.strict().convertToX06("A\nB\rC\r\nD\nE\u0085");
|
List<FourCornerX06BaklavaPointSequence> cdc = FourCornerUnicodeImport.lossy().convertToX06("A\nB\rC\r\nD\nE\u001B\u0085F\n\r\n\r\r\n");
|
||||||
Iterator<FourCornerX06BaklavaPointSequence> cdi = cdc.iterator();
|
Iterator<FourCornerX06BaklavaPointSequence> cdi = cdc.iterator();
|
||||||
Assertions.assertNotNull(cdc);
|
Assertions.assertNotNull(cdc);
|
||||||
Assertions.assertTrue(cdi.hasNext());
|
Assertions.assertTrue(cdi.hasNext());
|
||||||
Assertions.assertFalse(cdc.isEmpty());
|
Assertions.assertFalse(cdc.isEmpty());
|
||||||
Assertions.assertEquals(15, cdc.size());
|
Assertions.assertEquals(22, cdc.size());
|
||||||
Assertions.assertEquals(FCDotCDC1604DashP6.NX01_A, cdi.next());
|
Assertions.assertEquals(FCDotCDC1604DashP6.NX01_A, cdi.next());
|
||||||
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
|
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
|
||||||
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
|
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
|
||||||
|
@ -269,8 +269,15 @@ public class FourCornerUnicodeImportTest {
|
||||||
Assertions.assertEquals(FCDotCDC1604DashP6.NX05_E, cdi.next());
|
Assertions.assertEquals(FCDotCDC1604DashP6.NX05_E, cdi.next());
|
||||||
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
|
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
|
||||||
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
|
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
|
||||||
|
Assertions.assertEquals(FCDotCDC1604DashP6.NX06_F, cdi.next());
|
||||||
|
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
|
||||||
|
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
|
||||||
|
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
|
||||||
|
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
|
||||||
|
Assertions.assertEquals(FCDotDEC2701DashPX0.ESC68_DEC0801_E19, cdi.next());
|
||||||
|
Assertions.assertEquals(FCDotDEC0801DashE19.TYPE_WRITER_TTY0001_NL, cdi.next());
|
||||||
|
|
||||||
String out = FourCornerUnicodeDisplay.text().renderFromX06(cdc);
|
String out = FourCornerUnicodeDisplay.text().renderFromX06(cdc);
|
||||||
Assertions.assertEquals("A\nB\nC\nD\nE\n", out);
|
Assertions.assertEquals("A\nB\nC\nD\nE\nF\n\n\n", out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue