如何使用SentiWordNet [英] How to use SentiWordNet
问题描述
我需要对包含推文的一些csv文件进行情感分析。我正在使用 SentiWordNet 进行情绪分析。
I need to do sentiment analysis on some csv files containing tweets. I'm using SentiWordNet to do the sentiment analysis.
我得到了他们在他们网站上提供的以下示例Java代码。我不确定如何使用它。我要分析的csv文件的路径是 C:\ Users \ MyName \Desktop \tweets.csv
。 SentiWordNet_3.0.0.txt
的路径是 C:\ Users \ MyName \Desktop\SentiWordNet_3.0.0 \ home \ swn\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\我是java的新手,请帮忙,谢谢!以下示例java代码的链接是这个。
I got the following piece of sample java code they provided on their site. I'm not sure how to use it. The path of the csv file that I want to analyze is C:\Users\MyName\Desktop\tweets.csv
. The path of the SentiWordNet_3.0.0.txt
is C:\Users\MyName\Desktop\SentiWordNet_3.0.0\home\swn\www\admin\dump\SentiWordNet_3.0.0_20130122.txt
. I'm new to java, pls help, thanks! The link to the sample java code below is this.
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
public class SWN3 {
private String pathToSWN = "data"+File.separator+"SentiWordNet_3.0.0.txt";
private HashMap<String, String> _dict;
public SWN3(){
_dict = new HashMap<String, String>();
HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>();
try{
BufferedReader csv = new BufferedReader(new FileReader(pathToSWN));
String line = "";
while((line = csv.readLine()) != null)
{
String[] data = line.split("\t");
Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]);
String[] words = data[4].split(" ");
for(String w:words)
{
String[] w_n = w.split("#");
w_n[0] += "#"+data[0];
int index = Integer.parseInt(w_n[1])-1;
if(_temp.containsKey(w_n[0]))
{
Vector<Double> v = _temp.get(w_n[0]);
if(index>v.size())
for(int i = v.size();i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
else
{
Vector<Double> v = new Vector<Double>();
for(int i = 0;i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
}
}
Set<String> temp = _temp.keySet();
for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) {
String word = (String) iterator.next();
Vector<Double> v = _temp.get(word);
double score = 0.0;
double sum = 0.0;
for(int i = 0; i < v.size(); i++)
score += ((double)1/(double)(i+1))*v.get(i);
for(int i = 1; i<=v.size(); i++)
sum += (double)1/(double)i;
score /= sum;
String sent = "";
if(score>=0.75)
sent = "strong_positive";
else
if(score > 0.25 && score<=0.5)
sent = "positive";
else
if(score > 0 && score>=0.25)
sent = "weak_positive";
else
if(score < 0 && score>=-0.25)
sent = "weak_negative";
else
if(score < -0.25 && score>=-0.5)
sent = "negative";
else
if(score<=-0.75)
sent = "strong_negative";
_dict.put(word, sent);
}
}
catch(Exception e){e.printStackTrace();}
}
public String extract(String word, String pos)
{
return _dict.get(word+"#"+pos);
}
}
新密码:
public class SWN3 {
private String pathToSWN = "C:\\Users\\MyName\\Desktop\\SentiWordNet_3.0.0\\home\\swn\\www\\admin\\dump\\SentiWordNet_3.0.0.txt";
private HashMap<String, String> _dict;
public SWN3(){
_dict = new HashMap<String, String>();
HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>();
try{
BufferedReader csv = new BufferedReader(new FileReader(pathToSWN));
String line = "";
while((line = csv.readLine()) != null)
{
String[] data = line.split("\t");
Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]);
String[] words = data[4].split(" ");
for(String w:words)
{
String[] w_n = w.split("#");
w_n[0] += "#"+data[0];
int index = Integer.parseInt(w_n[1])-1;
if(_temp.containsKey(w_n[0]))
{
Vector<Double> v = _temp.get(w_n[0]);
if(index>v.size())
for(int i = v.size();i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
else
{
Vector<Double> v = new Vector<Double>();
for(int i = 0;i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
}
}
Set<String> temp = _temp.keySet();
for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) {
String word = (String) iterator.next();
Vector<Double> v = _temp.get(word);
double score = 0.0;
double sum = 0.0;
for(int i = 0; i < v.size(); i++)
score += ((double)1/(double)(i+1))*v.get(i);
for(int i = 1; i<=v.size(); i++)
sum += (double)1/(double)i;
score /= sum;
String sent = "";
if(score>=0.75)
sent = "strong_positive";
else
if(score > 0.25 && score<=0.5)
sent = "positive";
else
if(score > 0 && score>=0.25)
sent = "weak_positive";
else
if(score < 0 && score>=-0.25)
sent = "weak_negative";
else
if(score < -0.25 && score>=-0.5)
sent = "negative";
else
if(score<=-0.75)
sent = "strong_negative";
_dict.put(word, sent);
}
}
catch(Exception e){e.printStackTrace();}
}
public Double extract(String word)
{
Double total = new Double(0);
if(_dict.get(word+"#n") != null)
total = _dict.get(word+"#n") + total;
if(_dict.get(word+"#a") != null)
total = _dict.get(word+"#a") + total;
if(_dict.get(word+"#r") != null)
total = _dict.get(word+"#r") + total;
if(_dict.get(word+"#v") != null)
total = _dict.get(word+"#v") + total;
return total;
}
public String classifytweet(){
String[] words = twit.split("\\s+");
double totalScore = 0, averageScore;
for(String word : words) {
word = word.replaceAll("([^a-zA-Z\\s])", "");
if (_sw.extract(word) == null)
continue;
totalScore += _sw.extract(word);
}
Double AverageScore = totalScore;
if(averageScore>=0.75)
return "very positive";
else if(averageScore > 0.25 && averageScore<0.5)
return "positive";
else if(averageScore>=0.5)
return "positive";
else if(averageScore < 0 && averageScore>=-0.25)
return "negative";
else if(averageScore < -0.25 && averageScore>=-0.5)
return "negative";
else if(averageScore<=-0.75)
return "very negative";
return "neutral";
}
public static void main(String[] args) {
// TODO Auto-generated method stub
}
推荐答案
首先删除文件第一个的所有垃圾(包括描述,指令等..)
First of all start by deleting all the "garbage" at the first of the file (which includes description, instruction etc..)
一种可能的用法是更改 SWN3
使方法提取
,返回 Double
:
One possible usage is to change SWN3
an make the method extract
in it return a Double
:
public Double extract(String word)
{
Double total = new Double(0);
if(_dict.get(word+"#n") != null)
total = _dict.get(word+"#n") + total;
if(_dict.get(word+"#a") != null)
total = _dict.get(word+"#a") + total;
if(_dict.get(word+"#r") != null)
total = _dict.get(word+"#r") + total;
if(_dict.get(word+"#v") != null)
total = _dict.get(word+"#v") + total;
return total;
}
然后,给出一个你要标记的字符串,你可以拆分它它只有单词(没有符号和未知字符),并且对每个单词使用 extract
方法返回的结果,你可以决定字符串的平均权重是多少:
Then, giving a String that you want to tag, you can split it so it'll have only words (with no signs and unknown chars) and using the result returned from extract
method on each word, you can decide what is the average weight of the String:
String[] words = twit.split("\\s+");
double totalScore = 0, averageScore;
for(String word : words) {
word = word.replaceAll("([^a-zA-Z\\s])", "");
if (_sw.extract(word) == null)
continue;
totalScore += _sw.extract(word);
}
verageScore = totalScore;
if(averageScore>=0.75)
return "very positive";
else if(averageScore > 0.25 && averageScore<0.5)
return "positive";
else if(averageScore>=0.5)
return "positive";
else if(averageScore < 0 && averageScore>=-0.25)
return "negative";
else if(averageScore < -0.25 && averageScore>=-0.5)
return "negative";
else if(averageScore<=-0.75)
return "very negative";
return "neutral";
我发现这种方式更容易,对我来说也没问题。
I found this way easier and it works fine for me.
更新:
我更改了 _dict
到 _dict = new HashMap< String,Double>();
所以它会有一个 String
key和一个 Double
值。
I changed _dict
to _dict = new HashMap<String, Double>();
So it will have a String
key and a Double
value.
所以我替换了 _dict.put(word) ,已发送);
wish _dict.put(word,score);
So I replaced _dict.put(word, sent);
wish _dict.put(word, score);
这篇关于如何使用SentiWordNet的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!