diff options
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java')
-rw-r--r-- | JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java new file mode 100644 index 0000000..b0a4eaa --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java @@ -0,0 +1,129 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.bitext; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Iterator; + +/** + * Reader of simple tab-delimited bilingual files. + * + * @author Marcin Miłkowski + */ +public class TabBitextReader implements BitextReader { + + protected BufferedReader in; + protected StringPair nextPair; + protected String nextLine; + private String prevLine; + + private int lineCount = -1; + protected int sentencePos; + + public TabBitextReader(final String filename, final String encoding) { + try { + if (encoding == null) { + in = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); + } else { + in = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding)); + } + nextLine = in.readLine(); + prevLine = ""; + nextPair = tab2StringPair(nextLine); + } catch(IOException e) { + throw new IllegalArgumentException(e); + } + } + + protected StringPair tab2StringPair(final String line) { + if (line == null) { + return null; + } + final String[] fields = line.split("\t"); + return new StringPair(fields[0], fields[1]); + } + + @Override + public Iterator<StringPair> iterator() { + return new TabReader(); + } + + class TabReader implements Iterator<StringPair> { + + public boolean hasNext() { + return nextLine != null; + } + + public StringPair next() { + try { + final StringPair result = nextPair; + sentencePos = nextPair.getSource().length() + 1; + if (nextLine != null) { + prevLine = nextLine; + nextLine = in.readLine(); + nextPair = tab2StringPair(nextLine); + lineCount++; + if (nextLine == null) { + in.close(); + } + } + return result; + } catch(IOException e) { + throw new IllegalArgumentException(e); + } + } + + // The file is read-only. + public void remove() { + throw new UnsupportedOperationException(); + } + } + + @Override + public int getColumnCount() { + return sentencePos; + } + + @Override + public int getTargetColumnCount() { + return 1; + } + + @Override + public int getLineCount() { + return lineCount; + } + + @Override + public int getSentencePosition() { + return sentencePos; + } + + @Override + public String getCurrentLine() { + return prevLine; + } + + + +} |