使用标准corenlp软件包获取corefrences [英] Getting corefrences with Standard corenlp package

查看:99
本文介绍了使用标准corenlp软件包获取corefrences的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试在文本中获得共指.我是corenlp包的新手.我尝试了下面的代码,该代码不起作用,但是我也可以使用其他方法.

I'm trying to get coreferences in a text. I'm new to the corenlp package. I tried the code below, which doesn't work, but I'm open to other methods as well.

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

package corenlp;
import edu.stanford.nlp.ling.CoreAnnotations.CollapsedCCProcessedDependenciesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.CorefGraphAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TreeAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.semgraph.SemanticGraph;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.IntTuple;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Timing;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import java.util.Properties;
/**
 *
 * @author Karthi
 */
public class Main {


        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
    Properties props = new Properties();
    FileInputStream in = new FileInputStream("Main.properties");

    props.load(in);
    in.close();
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "The doctor can consult with other doctors about this patient. If that is the case, the name of the doctor and the names of the consultants have to be maintained. Otherwise, only the name of the doctor is kept. "; // Add your text here!

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);
    System.out.println(document);
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = (List<CoreMap>) document.get(SentencesAnnotation.class);
    System.out.println(sentences);
    for(CoreMap sentence: sentences) {
      // traversing the words in the current sentence
      // a CoreLabel is a CoreMap with additional token-specific methods
      for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
        // this is the text of the token
        String word = token.get(TextAnnotation.class);
        // this is the POS tag of the token
        String pos = token.get(PartOfSpeechAnnotation.class);
        // this is the NER label of the token
        String ne = token.get(NamedEntityTagAnnotation.class);
      }

      // this is the parse tree of the current sentence
      Tree tree = sentence.get(TreeAnnotation.class);
System.out.println(tree);
      // this is the Stanford dependency graph of the current sentence
      SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
      System.out.println(dependencies);
    }

    // this is the coreference link graph
    // each link stores an arc in the graph; the first element in the Pair is the source, the second is the target
    // each node is stored as <sentence id, token id>. Both offsets start at 1!
    List<Pair<IntTuple, IntTuple>> graph = document.get(CorefGraphAnnotation.class);
    System.out.println(graph);

    }

}

这是我得到的错误:

Loading POS Model [// For POS model] ... Loading default properties from trained tagger // For POS model
Error: No such trained tagger config file found.
java.io.FileNotFoundException: \\ For POS model (The specified path is invalid)
        at java.io.FileInputStream.open(Native Method)
        at java.io.FileInputStream.<init>(FileInputStream.java:106)
        at java.io.FileInputStream.<init>(FileInputStream.java:66)
        at edu.stanford.nlp.tagger.maxent.TaggerConfig.getTaggerDataInputStream(TaggerConfig.java:741)
        at edu.stanford.nlp.tagger.maxent.TaggerConfig.<init>(TaggerConfig.java:178)
        at edu.stanford.nlp.tagger.maxent.MaxentTagger.<init>(MaxentTagger.java:228)
        at edu.stanford.nlp.pipeline.POSTaggerAnnotator.loadModel(POSTaggerAnnotator.java:57)
        at edu.stanford.nlp.pipeline.POSTaggerAnnotator.<init>(POSTaggerAnnotator.java:44)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:441)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:434)
        at edu.stanford.nlp.pipeline.AnnotatorPool.get(AnnotatorPool.java:62)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.construct(StanfordCoreNLP.java:309)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:347)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:337)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:329)
        at corenlp.Main.main(Main.java:66)
Exception in thread "main" java.lang.RuntimeException: java.io.FileNotFoundException: \\ For POS model (The specified path is invalid)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:443)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:434)
        at edu.stanford.nlp.pipeline.AnnotatorPool.get(AnnotatorPool.java:62)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.construct(StanfordCoreNLP.java:309)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:347)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:337)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP.<init>(StanfordCoreNLP.java:329)
        at corenlp.Main.main(Main.java:66)
Caused by: java.io.FileNotFoundException: \\ For POS model (The specified path is invalid)
        at java.io.FileInputStream.open(Native Method)
        at java.io.FileInputStream.<init>(FileInputStream.java:106)
        at java.io.FileInputStream.<init>(FileInputStream.java:66)
        at edu.stanford.nlp.tagger.maxent.TaggerConfig.getTaggerDataInputStream(TaggerConfig.java:741)
        at edu.stanford.nlp.tagger.maxent.MaxentTagger.readModelAndInit(MaxentTagger.java:643)
        at edu.stanford.nlp.tagger.maxent.MaxentTagger.<init>(MaxentTagger.java:268)
        at edu.stanford.nlp.tagger.maxent.MaxentTagger.<init>(MaxentTagger.java:228)
        at edu.stanford.nlp.pipeline.POSTaggerAnnotator.loadModel(POSTaggerAnnotator.java:57)
        at edu.stanford.nlp.pipeline.POSTaggerAnnotator.<init>(POSTaggerAnnotator.java:44)
        at edu.stanford.nlp.pipeline.StanfordCoreNLP$4.create(StanfordCoreNLP.java:441)
        ... 7 more
Java Result: 1

推荐答案

此错误仅表示程序未找到需要运行的数据模型.它们需要在您的类路径上.如果您位于分发目录中,则可以使用以下命令来完成此操作:

This error simply means the program is not finding the data models it needs to run. They need to be on your classpath. If you're in the distribution directory, you can do this with a command like:

java -cp stanford-corenlp-2010-11-12.jar:stanford-corenlp-models-2010-11-06.jar:xom.jar:jgrapht.jar -Xmx3g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse,dcoref -file input.txt

第二个jar包含模型.如果您使用的是Windows,请用分号替换上面的冒号.

The second jar contains the models. If you're using Windows, replace the colons above with semicolons.

这篇关于使用标准corenlp软件包获取corefrences的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆