Added reading spanish words from kanji data
This commit is contained in:
parent
12785fff61
commit
ca80b2e2ae
|
@ -72,14 +72,14 @@ public class KanjiDict {
|
||||||
String mLang;
|
String mLang;
|
||||||
|
|
||||||
private void addLiteral() {
|
private void addLiteral() {
|
||||||
if (literal.kuTen208 != null) {
|
if (literal.getKuTen208() != null) {
|
||||||
mapKuTen208.put(literal.kuTen208, literal);
|
mapKuTen208.put(literal.getKuTen208(), literal);
|
||||||
}
|
}
|
||||||
if (literal.kuTen213 != null) {
|
if (literal.getKuTen213() != null) {
|
||||||
mapKuTen213.put(literal.kuTen213, literal);
|
mapKuTen213.put(literal.getKuTen213(), literal);
|
||||||
}
|
}
|
||||||
if (literal.fourCorner != null) {
|
if (literal.getFourCorner() != null) {
|
||||||
mapFourCorner.put(literal.fourCorner, literal);
|
mapFourCorner.put(literal.getFourCorner(), literal);
|
||||||
}
|
}
|
||||||
literal = null;
|
literal = null;
|
||||||
}
|
}
|
||||||
|
@ -99,18 +99,17 @@ public class KanjiDict {
|
||||||
addLiteral();
|
addLiteral();
|
||||||
} else if ("cp_value".equals(qName)) {
|
} else if ("cp_value".equals(qName)) {
|
||||||
if ("ucs".equals(cpType)) {
|
if ("ucs".equals(cpType)) {
|
||||||
literal.codePoint = Integer.parseInt(bufChar.toString(), 16);
|
literal.setCodePoint(Integer.parseInt(bufChar.toString(), 16));
|
||||||
} else if ("jis208".equals(cpType)) {
|
} else if ("jis208".equals(cpType)) {
|
||||||
literal.kuTen208 = bufChar.toString();
|
literal.setKuTen208(bufChar.toString());
|
||||||
} else if ("jis213".equals(cpType)) {
|
} else if ("jis213".equals(cpType)) {
|
||||||
literal.kuTen213 = bufChar.toString();
|
literal.setKuTen213(bufChar.toString());
|
||||||
}
|
}
|
||||||
} else if ("q_code".equals(qName)) {
|
} else if ("q_code".equals(qName)) {
|
||||||
if ("four_corner".equals(qcType)) {
|
if ("four_corner".equals(qcType)) {
|
||||||
literal.fourCorner = Integer.parseInt(bufChar.toString().replaceAll("\\.", ""));
|
literal.setFourCorner(Integer.parseInt(bufChar.toString().replaceAll("\\.", "")));
|
||||||
}
|
}
|
||||||
} else if ("meaning".equals(qName)) {
|
} else if ("meaning".equals(qName)) {
|
||||||
if (mLang == null) {
|
|
||||||
String mean = bufChar.toString();
|
String mean = bufChar.toString();
|
||||||
if (mean.startsWith("(")) {
|
if (mean.startsWith("(")) {
|
||||||
mean = mean.replaceAll("\\(", "");
|
mean = mean.replaceAll("\\(", "");
|
||||||
|
@ -131,8 +130,7 @@ public class KanjiDict {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!mean.isEmpty()) {
|
if (!mean.isEmpty()) {
|
||||||
literal.meaning.add(mean);
|
literal.addMeaning(mLang ,mean);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
mLang = null;
|
mLang = null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,9 @@
|
||||||
package org.x4o.fc18.cake2.fcdoc.kanji;
|
package org.x4o.fc18.cake2.fcdoc.kanji;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Kanji KuTen and FourCorner and codePoint and meaning of a literal.
|
* Kanji KuTen and FourCorner and codePoint and meaning of a literal.
|
||||||
|
@ -33,9 +35,62 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class KanjiDictLiteral {
|
public class KanjiDictLiteral {
|
||||||
|
|
||||||
String kuTen208;
|
private String kuTen208;
|
||||||
String kuTen213;
|
private String kuTen213;
|
||||||
Integer fourCorner;
|
private Integer fourCorner;
|
||||||
Integer codePoint;
|
private Integer codePoint;
|
||||||
final List<String> meaning = new ArrayList<>();
|
private final Map<String, List<String>> meaning = new HashMap<>();
|
||||||
|
|
||||||
|
public KanjiDictLiteral() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKuTen208() {
|
||||||
|
return kuTen208;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKuTen208(String kuTen208) {
|
||||||
|
this.kuTen208 = kuTen208;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKuTen213() {
|
||||||
|
return kuTen213;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setKuTen213(String kuTen213) {
|
||||||
|
this.kuTen213 = kuTen213;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getFourCorner() {
|
||||||
|
return fourCorner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFourCorner(Integer fourCorner) {
|
||||||
|
this.fourCorner = fourCorner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getCodePoint() {
|
||||||
|
return codePoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCodePoint(Integer codePoint) {
|
||||||
|
this.codePoint = codePoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, List<String>> getMeaning() {
|
||||||
|
return meaning;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getMeaningDefault() {
|
||||||
|
return meaning.get("");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addMeaning(String locale, String text) {
|
||||||
|
String key = locale==null?"":locale;
|
||||||
|
List<String> data = meaning.get(key);
|
||||||
|
if (data == null) {
|
||||||
|
data = new ArrayList<>();
|
||||||
|
meaning.put(key, data);
|
||||||
|
}
|
||||||
|
data.add(text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,9 +26,8 @@ import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -52,10 +51,18 @@ public class KanjiDictTest {
|
||||||
System.out.println("=======================================");
|
System.out.println("=======================================");
|
||||||
for (Integer key : dict.mapFourCorner.keySet()) {
|
for (Integer key : dict.mapFourCorner.keySet()) {
|
||||||
KanjiDictLiteral lit = dict.mapFourCorner.get(key);
|
KanjiDictLiteral lit = dict.mapFourCorner.get(key);
|
||||||
int meanCnt = lit.meaning.size();
|
int meanCnt = lit.getMeaningDefault().size();
|
||||||
String meaning = lit.meaning.stream().map(v -> v.replaceAll(" ", "_").toUpperCase()).collect(Collectors.joining(" "));
|
String meaning = lit.getMeaningDefault().stream().map(v -> v.replaceAll(" ", "\u3000")).collect(Collectors.joining(" - "));
|
||||||
System.out.print(new StringBuilder().appendCodePoint(lit.codePoint).toString());
|
System.out.print(new StringBuilder().appendCodePoint(lit.getCodePoint()).toString());
|
||||||
System.out.println(String.format(" %s FC %5d = %2d %s", lit.kuTen208, lit.fourCorner, meanCnt, meaning));
|
System.out.println(String.format(" %s FC %5d = %2d %s", lit.getKuTen208(), lit.getFourCorner(), meanCnt, meaning));
|
||||||
|
for (String locale : lit.getMeaning().keySet()) {
|
||||||
|
if (!"es".equals(locale)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
List<String> texts = lit.getMeaning().get(locale);
|
||||||
|
String text = texts.stream().map(v -> v.replaceAll(" ", "\u3000")).collect(Collectors.joining(" - "));
|
||||||
|
System.out.println("\t\tLOCALE: " + locale + " TEXT: " + text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int meanEmpty = 0;
|
int meanEmpty = 0;
|
||||||
|
@ -65,24 +72,24 @@ public class KanjiDictTest {
|
||||||
Map<String, KanjiDictLiteral> uniqMean = new HashMap<>();
|
Map<String, KanjiDictLiteral> uniqMean = new HashMap<>();
|
||||||
for (Integer key : dict.mapFourCorner.keySet()) {
|
for (Integer key : dict.mapFourCorner.keySet()) {
|
||||||
KanjiDictLiteral lit = dict.mapFourCorner.get(key);
|
KanjiDictLiteral lit = dict.mapFourCorner.get(key);
|
||||||
if (lit.meaning.isEmpty()) {
|
if (lit.getMeaningDefault().isEmpty()) {
|
||||||
meanEmpty++;
|
meanEmpty++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
String meanLast = lit.meaning.get(lit.meaning.size() - 1);
|
String meanLast = lit.getMeaningDefault().get(lit.getMeaningDefault().size() - 1);
|
||||||
if (!uniqMean.containsKey(meanLast)) {
|
if (!uniqMean.containsKey(meanLast)) {
|
||||||
uniqMean.put(meanLast, lit);
|
uniqMean.put(meanLast, lit);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (lit.meaning.size() > 1) {
|
if (lit.getMeaningDefault().size() > 1) {
|
||||||
String meanSecond = lit.meaning.get(1);
|
String meanSecond = lit.getMeaningDefault().get(1);
|
||||||
if (!uniqMean.containsKey(meanSecond)) {
|
if (!uniqMean.containsKey(meanSecond)) {
|
||||||
uniqMean.put(meanSecond, lit);
|
uniqMean.put(meanSecond, lit);
|
||||||
meanUse2++;
|
meanUse2++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
String meanFirst = lit.meaning.get(0);
|
String meanFirst = lit.getMeaningDefault().get(0);
|
||||||
if (!uniqMean.containsKey(meanFirst)) {
|
if (!uniqMean.containsKey(meanFirst)) {
|
||||||
uniqMean.put(meanFirst, lit);
|
uniqMean.put(meanFirst, lit);
|
||||||
meanUse1++;
|
meanUse1++;
|
||||||
|
|
Loading…
Reference in a new issue