sen のアーカイブに入っていたサンプルコードをちょっとだけ手直し。
PreProcessor や PostProcessor を生成するためのクラスを作った。
# でも、残念ながら今の自分には不要なもの……
// Edited by NI-Lab. 2007-01-12
/*
* StreamTaggerDemo2.java - StreamTaggerDemo2 is demonstration program for Sen.
*
* Copyright (C) 2002 Takashi Okamoto, Tsuyoshi Fukui Takashi Okamoto
* <tora@debian.org> Tsuyosh Fukui <fukui556@oki.com>
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
import java.io.*;
import java.util.*;
import javax.xml.parsers.*;
import net.java.sen.*;
import net.java.sen.processor.*;
import org.w3c.dom.*;
import org.xml.sax.*;
public class ProcessorFactory {
public static void main(String args[]) {
}
private final String senProcessorConfigFile;
private String compositRule = "";
private boolean isCompound = true;
private String compoundFile = null;
private String remarkRule = "";
/**
* @param senProcessorConfigFile sen-processor.xml ファイルのパス
*/
public ProcessorFactory(String senProcessorConfigFile){
this.senProcessorConfigFile = senProcessorConfigFile;
readConfig();
}
public CompoundWordPostProcessor createCompoundWordPostProcessor(){
//if (!isCompound) {
CompoundWordPostProcessor cwProcessor =
new CompoundWordPostProcessor(compoundFile);
return cwProcessor;
//}else{
// throw new IllegalStateException();
//}
}
public CompositPostProcessor createCompositPostProcessor() throws IOException {
if (compositRule != null && !compositRule.equals("")) {
CompositPostProcessor processor = new CompositPostProcessor();
processor.readRules(new BufferedReader(new StringReader(compositRule)));
return processor;
}else{
throw new IllegalStateException();
}
}
public RemarkPreProcessor createRemarkPreProcessor() throws IOException{
if (remarkRule != null && !remarkRule.equals("")) {
RemarkPreProcessor processor = new RemarkPreProcessor();
processor.readRules(new BufferedReader(new StringReader(remarkRule)));
return processor;
}else{
throw new IllegalStateException();
}
}
public RemarkPostProcessor createRemarkPostProcessor(){
if (remarkRule != null && !remarkRule.equals("")) {
RemarkPostProcessor p2 = new RemarkPostProcessor();
return p2;
}else{
throw new IllegalStateException();
}
}
private void readConfig() {
String parent = new File(senProcessorConfigFile).getParentFile().getParent();
try {
DocumentBuilderFactory factory = DocumentBuilderFactory
.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new InputSource(senProcessorConfigFile));
NodeList nl = doc.getFirstChild().getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
org.w3c.dom.Node n = nl.item(i);
if (n.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
String nn = n.getNodeName();
String value = n.getFirstChild().getNodeValue();
if (nn.equals("composit")) {
compositRule += value + "\n";
}
if (nn.equals("compound")) {
if (value.equals("\u69cb\u6210\u8a9e")) { // "構成語"という文字列
isCompound = false;
}
}
if (nn.equals("remark")) {
remarkRule += value + "\n";
}
if (nn.equals("dictionary")) {
// read nested tag in <dictinary>
NodeList dnl = n.getChildNodes();
for (int j = 0; j < dnl.getLength(); j++) {
org.w3c.dom.Node dn = dnl.item(j);
if (dn.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
String dnn = dn.getNodeName();
if (dn.getFirstChild() == null) {
throw new IllegalArgumentException(
"element '" + dnn + "' is empty");
}
String dvalue = dn.getFirstChild()
.getNodeValue();
if (dnn.equals("compound")) {
compoundFile = SenUtils.getPath(dvalue,
parent);
}
}
}
}
}
}
if (!isCompound) {
try {
ObjectInputStream is = new ObjectInputStream(
new FileInputStream(compoundFile));
HashMap hashmap = (HashMap) is.readObject();
} catch (ClassNotFoundException e1) {
throw new RuntimeException(e1);
}
}
} catch (ParserConfigurationException e) {
throw new IllegalArgumentException(e.getMessage());
} catch (FileNotFoundException e) {
throw new IllegalArgumentException(e.getMessage());
} catch (SAXException e) {
throw new IllegalArgumentException(e.getMessage());
} catch (IOException e) {
throw new IllegalArgumentException(e.getMessage());
}
}
}
いつかまたどこかで使うかもしれないのでここに残しておく。
tags: zlashdot Java Java MeCabSen
Posted by NI-Lab. (@nilab)