ElasticSearch和jdbc-映射,分析器,过滤器设置 [英] ElasticSearch and jdbc - mapping, analyzer, filter setup
问题描述
我搜索了很多stackoverflow问题,ElasticSearch文档,论坛,但都失败了.
I searching a lot of stackoverflow questions, ElasticSearch docs, forums but all falied.
我尝试设置ElasticSearch JDBC mysql数据库,并为我的搜索实现一部分单词搜索(例如,当您键入"bicycl"脚本时必须搜索自行车). 我尝试使用nGram,但是我做错了事... 我只需要在字符串字段上实现nGram.
I try to setup ElasticSearch JDBC mysql database and implement to my search a part of word search (for example when you type 'bicycl' script have to search bicycle). I try to use nGram but I do something wrong ... All I need is implement nGram on string fields.
这是我的主要sql配置:
Here is my main sql configuration:
curl -XPUT 'localhost:9200/_river/query_1/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"url" : "jdbc:mysql://localhost:3306/testowa",
"user" : "root",
"password" : "****",
"sql" : "SELECT p.products_id as _id, p.products_id, tr.tax_class_id, m.manufacturers_id, p.products_status, products_temporarily_unavailable, ptc.categories_id, ctt.categories_disabled, ctt.category_tags, ctt.categories_name, pd.products_name, manufacturers_name, pd.products_description, p.products_model, p.products_code, pd.products_search_tags, pd.products_description_seo_tag FROM products_description pd, products_to_categories ptc, tax_rates tr, manufacturers m, categories_tree_table ctt, products p LEFT JOIN specials ON specials.products_id = p.products_id AND status = 1 LEFT JOIN products_gratis pg ON pg.ref_products_id = p.products_id WHERE pd.products_id = p.products_id AND ptc.products_id = p.products_id AND p.products_tax_class_id = tr.tax_class_id AND p.manufacturers_id = m.manufacturers_id AND (p.products_status = 1 or products_temporarily_unavailable = 1) AND pd.language_id = 1 AND m.language_id = 1 AND p.products_is_archive = 0 AND ptc.categories_id = ctt.categories_id AND ctt.categories_disabled != 1",
"poll": "10s",
"strategy": "simple",
"schedule" : "0 1-59 0-23 ? * *",
"autocommit" : true,
"index" : "searcher",
"type" : "query_1"
},
"index" : {
"index" : "searcher",
"type" : "query_1",
"settings" : {
"analysis" : {
"filter" : {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer" : {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "my_ngram_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"nGram_filter"
]
},
"my_search_analyzer" : {
"type" : "custom",
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "nGram"]
}
},
"tokenizer" : {
"my_ngram_tokenizer" : {
"type" : "nGram",
"min_gram" : "3",
"max_gram" : "20",
"token_chars": [ "letter", "digit" ]
}
}
}
}
},
"type_mapping" : {
"searcher" : {
"query_1" : {
"_all" : {
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_name" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"categories_name" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"manufacturers_name" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_description" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_code" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_model" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_search_tags" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_description_seo_tag" : {
"type" : "string",
"analyzer" : "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
}
}
}
}
}'
我做错了什么?
推荐答案
第一件事是您的JDBC河规范. index
和type_mapping
需要进入jdbc
结构内部,而不是外部(+您的类型映射在_all
字段后遗漏了properties
关键字),因此,一旦更正了该内容,就应该产生像这样的东西:
The first thing that is wrong is your JDBC river specification. The index
and type_mapping
need to go inside the jdbc
structure, not outside of it (+ your type mapping missed a properties
keyword after the _all
field), so once your correct this, that should yield something like that:
curl -XPUT 'localhost:9200/_river/query_1/_meta' -d '{
"type": "jdbc",
"jdbc": {
"url": "jdbc:mysql://localhost:3306/testowa",
"user": "root",
"password": "****",
"sql": "SELECT p.products_id as _id, p.products_id, tr.tax_class_id, m.manufacturers_id, p.products_status, products_temporarily_unavailable, ptc.categories_id, ctt.categories_disabled, ctt.category_tags, ctt.categories_name, pd.products_name, manufacturers_name, pd.products_description, p.products_model, p.products_code, pd.products_search_tags, pd.products_description_seo_tag FROM products_description pd, products_to_categories ptc, tax_rates tr, manufacturers m, categories_tree_table ctt, products p LEFT JOIN specials ON specials.products_id = p.products_id AND status = 1 LEFT JOIN products_gratis pg ON pg.ref_products_id = p.products_id WHERE pd.products_id = p.products_id AND ptc.products_id = p.products_id AND p.products_tax_class_id = tr.tax_class_id AND p.manufacturers_id = m.manufacturers_id AND (p.products_status = 1 or products_temporarily_unavailable = 1) AND pd.language_id = 1 AND m.language_id = 1 AND p.products_is_archive = 0 AND ptc.categories_id = ctt.categories_id AND ctt.categories_disabled != 1",
"poll": "10s",
"strategy": "simple",
"schedule": "0 1-59 0-23 ? * *",
"autocommit": true,
"index": "searcher",
"index_settings": { <-- index settings, analyzers go here
"analysis": {
"filter": {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "my_ngram_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"nGram_filter"
]
},
"my_search_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"nGram"
]
}
},
"tokenizer": {
"my_ngram_tokenizer": {
"type": "nGram",
"min_gram": "3",
"max_gram": "20",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"type": "query_1",
"type_mapping": { <--- your type mapping goes here
"query_1": {
"_all": {
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"properties": {
"products_name": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"categories_name": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"manufacturers_name": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_description": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_code": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_model": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_search_tags": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
},
"products_description_seo_tag": {
"type": "string",
"analyzer": "polish",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "my_search_analyzer"
}
}
}
}
}
}'
这篇关于ElasticSearch和jdbc-映射,分析器,过滤器设置的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!