如何操作JSON树的叶子 [英] How to manipulate leaves of a JSON tree

查看:125
本文介绍了如何操作JSON树的叶子的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想使用 JAVA 在JSON树中替换 _RARE _ 中的稀有字词。



我的罕见词表包含

  late 
populate
convicts

所以对于下面的JSON

  ,NP,[DET,the],[NP,[ADJ] NOUN,1700)]]],[S,[NP,[ADJ,British],[NOUN,convicts]],[S [VP,[VERB,are],[VP,[VERB,used],[S + VP,[PRT,to ,[VERB,填充],[WHNP,[DET,which],[NOUN,colony]]]]]],[。,? ]]]] 

我应该得到



<$ (NP,,,, [ADJ,_RARE_],[NOUN,1700)]]],[S,[NP,[ADJ,British _RARE_]],[S,[VP,[VERB,are],[VP,[VERB,used],[S + VP PRT,to],[VP,[VERB,populate],[WHNP,[DET,which],[NOUN ony]]]]]],[。,?]]]]



<注意

  [ADJ,late] 

被替换为

  [ADJ,_ RARE_] 

我的代码到目前为止,如下所示:



我递归地遍历树,只要找到罕见的单词,我创建一个新的JSON数组,并尝试用它替换现有的树的节点。请参阅 //这在下面不起作用,那就是我被卡住了。在这个函数之外,树保持不变。

  public static void traverseTreeAndReplaceWithRare(JsonArray tree){

//System.out.println(tree.getAsJsonArray());

(int x = 0; x< tree.getAsJsonArray()。size(); x ++)
{
if(!tree.get(x).isJsonArray ())
{
if(tree.size()== 2)
{
//请注意它会在同一个单词两次获得
String word = tree.get(1).toString();
word = word.replaceAll(\,); //删除双引号

if(rareWords.contains(word))
{
JsonParser parser = new JsonParser();

//这完美的工作
System.out.println(Orig:+ tree);
JsonElement jsonElement = parser.parse ([+ tree.get(0)+,+_ RARE _+]);

JsonArray newRareArray = jsonElement.getAsJsonArray();

//这个工作完美
System.out.println(New:+ newRareArray);

tree = newRareArray; //这不工作
}

}
continue;
}
traverseTreeAndReplaceWithRare(tree.get(x).getAsJsonArray());
}
}

上面的代码,我使用谷歌的 gson

  JsonParser parser = new JsonParser(); 
JsonElement jsonElement = parser.parse(strJSON);
JsonArray tree = jsonElement.getAsJsonArray();


解决方案

这是C ++中的一个直接的方法:

  #include< fstream> 
#includeJSON.hpp
#include< boost / algorithm / string / regex.hpp>
#include< boost / range / adaptors.hpp>
#include< boost / phoenix.hpp>

static std :: vector< std :: wstring> readRareWordList()
{
std :: vector< std :: wstring>结果;

std :: wifstream ifs(testcases / rarewords.txt);
std :: wstring行;
while(std :: getline(ifs,line))
result.push_back(std :: move(line));

返回结果;
}

struct RareWords:boost :: static_visitor<> {

///////////////////////////////
//默认情况下不执行
template< typename T> void operator()(T&)const {/ *保留所有其他的东西不变* /}

//////////////////// ///////////////
//递归数组和对象
void operator()(JSON :: Object& obj)const {
for (auto& v:obj.values){
//RareWords::operator()(v.first); / *替换字段名称(?!)* /
boost :: apply_visitor(* this,v.second);
}
}

void operator()(JSON :: Array& arr)const {
int i = 0;
for(auto& v:arr.values){
if(i ++)//跳过所有数组中的第一个元素
boost :: apply_visitor(* this,v);
}
}

////////////////////////////// /////
//替换字符串
void operator()(JSON :: String& s)const {
using namespace boost;

const static std :: vector< std :: wstring> rareWords = readRareWordList();
const static std :: wstring replacement = L__ RARE__;

for(auto&& word:rareWords)
if(word == s.value)
s.value = replacement;
}
};

int main()
{
auto document = JSON :: readFrom(std :: ifstream(testcases / test3.json));

boost :: apply_visitor(RareWords(),document);

std :: cout<<<文件;
}

假设您想对所有字符串值进行替换,只匹配整个字符串。您可以通过更改正则表达式或正则表达式标志来轻松地使此大小写不敏感,匹配字符串内的字符。根据评论进行微调。



包括JSON.hpp / cpp的完整代码在这里: https://github.com / sehe / spirit-v2-json / tree / 16093940


I want to replace rare words with _RARE_ in a JSON tree using JAVA.

My rareWords list contains

late  
populate
convicts

So for JSON below

["S", ["PP", ["ADP", "In"], ["NP", ["DET", "the"], ["NP", ["ADJ", "late"], ["NOUN", "1700<s"]]]], ["S", ["NP", ["ADJ", "British"], ["NOUN", "convicts"]], ["S", ["VP", ["VERB", "were"], ["VP", ["VERB", "used"], ["S+VP", ["PRT", "to"], ["VP", ["VERB", "populate"], ["WHNP", ["DET", "which"], ["NOUN", "colony"]]]]]], [".", "?"]]]]

I should get

["S", ["PP", ["ADP", "In"], ["NP", ["DET", "the"], ["NP", ["ADJ", "_RARE_"], ["NOUN", "1700<s"]]]], ["S", ["NP", ["ADJ", "British"], ["NOUN", "_RARE_"]], ["S", ["VP", ["VERB", "were"], ["VP", ["VERB", "used"], ["S+VP", ["PRT", "to"], ["VP", ["VERB", "populate"], ["WHNP", ["DET", "which"], ["NOUN", "colony"]]]]]], [".", "?"]]]]

Notice how

["ADJ","late"]

was replaced by

["ADJ","_RARE_"]

My code so far is like below:

I recursively iterate over the tree and as soon as rare word is found, I create a new JSON array and try to replace the existing tree's node with it. See // this Doesn't work in below, that is where I got stuck. The tree remains unchanged outside of this function.

public static void traverseTreeAndReplaceWithRare(JsonArray tree){   

        //System.out.println(tree.getAsJsonArray()); 

        for (int x = 0; x < tree.getAsJsonArray().size(); x++)
        {
            if(!tree.get(x).isJsonArray())
            {
                if(tree.size()==2)
                {   
                //beware it will get here twice for same word
                 String word= tree.get(1).toString();  
                 word=word.replaceAll("\"", ""); // removing double quotes

                 if(rareWords.contains(word))
                 {
                 JsonParser parser = new JsonParser();                   

                             //This works perfectly 
                             System.out.println("Orig:"+tree);
                 JsonElement jsonElement = parser.parse("["+tree.get(0)+","+"_RARE_"+"]");

                 JsonArray newRareArray = jsonElement.getAsJsonArray();

                             //This works perfectly 
                             System.out.println("New:"+newRareArray);

                 tree=newRareArray; // this Doesn't work
                 }                 

                }               
                continue;   
            }
            traverseTreeAndReplaceWithRare(tree.get(x).getAsJsonArray());
        }
    }

code for calling above, I use google's gson

JsonParser parser = new JsonParser();
JsonElement jsonElement = parser.parse(strJSON);
JsonArray tree = jsonElement.getAsJsonArray();  

解决方案

Here's a straight forward approach in C++:

#include <fstream>
#include "JSON.hpp"
#include <boost/algorithm/string/regex.hpp>
#include <boost/range/adaptors.hpp>
#include <boost/phoenix.hpp>

static std::vector<std::wstring> readRareWordList()
{
    std::vector<std::wstring> result;

    std::wifstream ifs("testcases/rarewords.txt");
    std::wstring line;
    while (std::getline(ifs, line))
        result.push_back(std::move(line));

    return result;
}

struct RareWords : boost::static_visitor<> {

    /////////////////////////////////////
    // do nothing by default
    template <typename T> void operator()(T&&) const { /* leave all other things unchanged */ }

    /////////////////////////////////////
    // recurse arrays and objects
    void operator()(JSON::Object& obj) const { 
        for(auto& v : obj.values) {
            //RareWords::operator()(v.first); /* to replace in field names (?!) */
            boost::apply_visitor(*this, v.second);
        }
    }

    void operator()(JSON::Array& arr) const {
        int i = 0;
        for(auto& v : arr.values) {
            if (i++) // skip the first element in all arrays
                boost::apply_visitor(*this, v);
        }
    }

    /////////////////////////////////////
    // do replacements on strings
    void operator()(JSON::String& s) const {
        using namespace boost;

        const static std::vector<std::wstring> rareWords = readRareWordList();
        const static std::wstring replacement = L"__RARE__";

        for (auto&& word : rareWords)
            if (word == s.value)
                s.value = replacement;
    }
};

int main()
{
    auto document = JSON::readFrom(std::ifstream("testcases/test3.json"));

    boost::apply_visitor(RareWords(), document);

    std::cout << document;
}

This assumes you wanted to do replacements on all string values, and only matches whole strings. You could easily make this case insensitive, match words inside strings etc. by changing the regex or regex flags. Slightly adapted in response to the comments.

The full code including JSON.hpp/cpp is here: https://github.com/sehe/spirit-v2-json/tree/16093940

这篇关于如何操作JSON树的叶子的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆