搜索多个单词elasticsearch haystack [英] Search for multiple words elasticsearch haystack
问题描述
我曾经使用过django,干草堆和elasticsearch。
我的search_index.py:
从干草堆导入索引
从模型导入广告
类AdvertisementIndex(indexes.SearchIndex,indexs.Indexable):
文本= indexs.CharField(document = True,use_template = True)
make = indexs.CharField()
section = indexs.CharField()
子类别= indexes.CharField()
content = index.CharField(model_attr ='content')
图片= index.CharField(model_attr ='images')
def get_model(self):
return Advertisement
def index_queryset(self,using = None):
返回self.get_model()。objects.filter(is_published = True).select_related('make')。select_related('section') .select_related('subcategory')
搜索表单:
< form action = / search method = get>
< input type = text-search name = q>
< input type = submit value =>
< / form>
模板:
{%块内容%}
{%为page.object_list%的结果}
< p> {{result.object.title}}}< / p> ;
< p> {{result.object.content}}< / p>
< p> {{result.object.images}}< / p>
< p> {{result.object.make}}< / p>
< p> {{result.object.section}}< / p>
< p> {{result.object.subcategory}}< / p>
{%empty%}
<没有结果。
{%endfor%}
{%endblock%}
查看
$当您在搜索框中输入 boss fender时,b $ bi得到 boss和 fender的所有值 curl -XGET http:// localhost:9200 / _search?q = fender + boss
我没有结果。从搜索表单中,我只能得到一个单词的结果,例如老板。
如何使人们能够搜索多个单词?
这个月我陷入了这个问题。 / p>
要执行正确的查询,您需要覆盖一些干草堆对象。我发现这篇文章很有帮助扩展干草堆的Elasticsearch后端一个>。刚开始时非常复杂,但是一旦了解了它的工作原理……::)
该博客文章介绍了如何实现elasticsearch的嵌套查询...很好。 ..我已经实现了基本的多重匹配查询。
#-*-编码:utf-8-*- __future__导入的
绝对导入django.conf导入
haystack.backends.elasticsearch_backend导入
导入(
ElasticsearchSearchBackend,ElasticsearchSearchEngine,ElasticsearchSearchQuery)$ b来自haystack.query的$ b导入SearchQuerySet
类ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend):
DEFAULT_ANALYZER = snowball
def __init __(self,connection_alias, ** connection_options):
super(ElasticsearchEngineBackendCustom,self).__ init __(connection_options)别名,** connection_options)
user_settings = getattr(settings,'ELASTICSEARCH_INDEX_SETTINGS',{})
if user_settings:
setattr(self,'DEFAULT_SETTINGS',user_settings)
user_analyzer = getattr(设置,'ELASTICSEARCH_DEFAULT_ANALYZER','')
if user_analyzer:
setattr(self,'DEFAULT_ANALYZER',user_analyzer)
def build_search_kwargs (self,query_string,sort_by = None,start_offset = 0,end_offset = None,
fields ='',高亮=假,facets = None,
date_facets = None,query_facets = None,
狭窄的查询=无,拼写查询=无,
以内=无,dwithin =无,distance_point =无,
模型=无,limit_to_registered_models =无,
result_class =无,multi_match =无):
out =超级(Elasticsea rchEngineBackendCustom,self).build_search_kwargs(query_string,sort_by,start_offset,
end_offset,
字段,高亮,刻面,
date_facets,query_facets,
窄查询,拼写查询,
在,dwithin,distance_point,
模型,limit_to_registered_models,
result_class)
如果multi_match:
out ['query'] = {
'multi_match': {
'que ry':multi_match ['query'],
'fields':multi_match ['fields'],
'tie_breaker':multi_match ['tie_breaker'],
'minimum_should_match':multi_match [ 'minimum_should_match'],
}
}
返回
def build_schema(self,fields):
content_field_name,映射=超级(ElasticsearchEngineBackendCustom,self).build_schema(fields)
for field_name,field_class in fields.items():
field_mapping = mapping [field_class.index_fieldname]
如果field_mapping ['type'] =='string'and field_class.indexed:
如果不是hasattr(field_class,'facet_for')或field_class.field_type in('ngram','edge_ngram'):
field_mapping ['analyzer'] = getattr(field_class,'analyzer',self.DEFAULT_ANALYZER)
mapping.update({field_class.index_fieldname:field_mapping})
返回content_field_name,映射
def multi_match_run(自身,查询,字段,minimum_should_match,tie_breaker):
从elasticsearch_dsl import搜索
从elasticsearch_dsl.query import MultiMatch
raw = Search()。using(self.conn).query(
MultiMatch(query = u'{}'。format(query),fields = fields,minimum_should_match = minimum_should_match,tie_breaker = tie_breaker)
).execute()
return self._process_results(raw)
class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery):
def multi_match(self ,查询,字段,minimum_should_match,tie_breaker):
结果= self.backend.multi_match_run(查询,字段,minimum_should_match,tie_breaker)
self._results = results.get('results',[])
self._hit_count = results.get('hits',0)
def add_multi_match_query(self,query,fields,minimum_should_match,tie_breaker):
self.multi_match_query = {
'query':查询,
'fields':字段,
'minimum_should_match':minimum_should_match,
'tie_breaker':tie_breaker
}
def build_params(self,spelling_query = None,** kwargs):
search_kwargs = super(ElasticsearchSearchQueryCustom,self).build_params(spelling_query,** kwargs)
如果self.multi_match_query :
search_kwargs ['multi_match'] = self.multi_match_query
return search_kwargs
class ElasticsearchSearchQuerySetCustom(SearchQuerySet):
def multi_match(自我,查询,字段,最小值_匹配= 35%,tie_breaker = 0.3):
clone = self._clone()
clone.query.add_multi_match_query(查询,字段,minimum_should_match,tie_breaker)
clone.query.multi_match(查询,字段,minimum_should_match,tie_breaker)
返回克隆
类Elastics earchEngineCustom(ElasticsearchSearchEngine):
后端= ElasticsearchEngineBackendCustom
查询= ElasticsearchSearchQueryCustom
当您可以看到我使用 elasticsearc-dsl
来执行查询(MultiMatch),此短语概括了博客文章: ElasticsearchSearchQuerySetSetCustom()。multi_match(... )
调用取决于 ElasticsearchSearchQueryCustom 依赖于
ElasticsearchEngineBackendCustom
。
然后在您的设置中放入elasticsearch配置,例如:
ELASTICSEARCH_DEFAULT_ANALYZER ='italian'
ELASTICSEARCH_INDEX_SETTINGS = {
设置:{[...]}
}
您可以从
您还需要覆盖 SearchForm
:
#-*-编码:utf-8-*-
from __future__ import absolute_import
从haystack.forms import来自.backend的SearchForm
import ElasticsearchSearchQuerySetSetCustom
class SearchFormCustom(SearchForm):
def search(self):
query = self。 searchqueryset.query.clean(self.cleaned_data.get('q'))
如果不是self.is_valid()或没有查询:
返回self.no_query_found()
sqs = ElasticsearchSearchQuerySetCustom()。multi_match(query,['title ^ 8','text ^ 0.5'])
return sqs
字段 title
和 text
必须在您的索引中,
您需要覆盖干草堆url模式才能使用自定义格式:
urlpatterns =模式(
'search.views',
url('^ $',search_view_factory(form_class = SearchFormCustom),name ='haystack-search'),
)
就是这样,HTH:-)
注意不要t使用 result.object.something
,但改用索引中的字段,例如 result.tilte
,因为 result.object.tilte
会命中数据库!请参见干草堆最佳做法
I used to django, haystack and elasticsearch.
My search_index.py:
from haystack import indexes
from models import Advertisement
class AdvertisementIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
make = indexes.CharField()
section = indexes.CharField()
subcategory = indexes.CharField()
content = indexes.CharField(model_attr='content')
images = indexes.CharField(model_attr='images')
def get_model(self):
return Advertisement
def index_queryset(self, using=None):
return self.get_model().objects.filter(is_published=True).select_related('make').select_related('section').select_related('subcategory')
search Form:
<form action="/search" method="get">
<input type="text-search" name="q">
<input type="submit" value="">
</form>
template:
{% block content %}
{% for result in page.object_list %}
<p>{{ result.object.title }}</p>
<p>{{ result.object.content }}</p>
<p>{{ result.object.images }}</p>
<p>{{ result.object.make }}</p>
<p>{{ result.object.section }}</p>
<p>{{ result.object.subcategory }}</p>
{% empty %}
<p>No result.</p>
{% endfor %}
{% endblock %}
Looking at curl -XGET "http://localhost:9200/_search?q=fender+boss"
i get all the values where there "boss" and "fender"
when you type in the search box "boss fender" i get No result. From the search form I can get a result with only a single word, for example "boss". How to make the ability to search for multiple words?
I fell into this issue during this month.
In order to perform the correct query you'll need override some haystack objects. I found this article very helpful Extending Haystack’s Elasticsearch backend. Quite complicated at the beginning, but once understand how it works... it works :-)
The blog article teaches how to implement elasticsearch's nested query... well... I've implemented a basic multi_match query.
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
ElasticsearchSearchBackend, ElasticsearchSearchEngine, ElasticsearchSearchQuery)
from haystack.query import SearchQuerySet
class ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend):
DEFAULT_ANALYZER = "snowball"
def __init__(self, connection_alias, **connection_options):
super(ElasticsearchEngineBackendCustom, self).__init__(connection_alias, **connection_options)
user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', {})
if user_settings:
setattr(self, 'DEFAULT_SETTINGS', user_settings)
user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', '')
if user_analyzer:
setattr(self, 'DEFAULT_ANALYZER', user_analyzer)
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
fields='', highlight=False, facets=None,
date_facets=None, query_facets=None,
narrow_queries=None, spelling_query=None,
within=None, dwithin=None, distance_point=None,
models=None, limit_to_registered_models=None,
result_class=None, multi_match=None):
out = super(ElasticsearchEngineBackendCustom, self).build_search_kwargs(query_string, sort_by, start_offset,
end_offset,
fields, highlight, facets,
date_facets, query_facets,
narrow_queries, spelling_query,
within, dwithin, distance_point,
models, limit_to_registered_models,
result_class)
if multi_match:
out['query'] = {
'multi_match': {
'query': multi_match['query'],
'fields': multi_match['fields'],
'tie_breaker': multi_match['tie_breaker'],
'minimum_should_match': multi_match['minimum_should_match'],
}
}
return out
def build_schema(self, fields):
content_field_name, mapping = super(ElasticsearchEngineBackendCustom, self).build_schema(fields)
for field_name, field_class in fields.items():
field_mapping = mapping[field_class.index_fieldname]
if field_mapping['type'] == 'string' and field_class.indexed:
if not hasattr(field_class, 'facet_for') or field_class.field_type in ('ngram', 'edge_ngram'):
field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)
mapping.update({field_class.index_fieldname: field_mapping})
return content_field_name, mapping
def multi_match_run(self, query, fields, minimum_should_match, tie_breaker):
from elasticsearch_dsl import Search
from elasticsearch_dsl.query import MultiMatch
raw = Search().using(self.conn).query(
MultiMatch(query=u'{}'.format(query), fields=fields, minimum_should_match=minimum_should_match, tie_breaker=tie_breaker)
).execute()
return self._process_results(raw)
class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery):
def multi_match(self, query, fields, minimum_should_match, tie_breaker):
results = self.backend.multi_match_run(query, fields, minimum_should_match, tie_breaker)
self._results = results.get('results', [])
self._hit_count = results.get('hits', 0)
def add_multi_match_query(self, query, fields, minimum_should_match, tie_breaker):
self.multi_match_query = {
'query': query,
'fields': fields,
'minimum_should_match': minimum_should_match,
'tie_breaker': tie_breaker
}
def build_params(self, spelling_query=None, **kwargs):
search_kwargs = super(ElasticsearchSearchQueryCustom, self).build_params(spelling_query, **kwargs)
if self.multi_match_query:
search_kwargs['multi_match'] = self.multi_match_query
return search_kwargs
class ElasticsearchSearchQuerySetCustom(SearchQuerySet):
def multi_match(self, query, fields, minimum_should_match="35%", tie_breaker=0.3):
clone = self._clone()
clone.query.add_multi_match_query(query, fields, minimum_should_match, tie_breaker)
clone.query.multi_match(query, fields, minimum_should_match, tie_breaker)
return clone
class ElasticsearchEngineCustom(ElasticsearchSearchEngine):
backend = ElasticsearchEngineBackendCustom
query = ElasticsearchSearchQueryCustom
As you can see I used elasticsearc-dsl
to perform the query (MultiMatch) and this phrase summarizing the blog post: ElasticsearchSearchQuerySetCustom().multi_match(...)
call depends on ElasticsearchSearchQueryCustom
that depends on ElasticsearchEngineBackendCustom
.
Then put in your settings the elasticsearch configuration, e.g:
ELASTICSEARCH_DEFAULT_ANALYZER = 'italian'
ELASTICSEARCH_INDEX_SETTINGS = {
"settings": {[...]}
}
You can grab your language(s) for ELASTICSEARCH_INDEX_SETTINGS
from Language Analyzers
You'll need to override also the SearchForm
:
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from haystack.forms import SearchForm
from .backend import ElasticsearchSearchQuerySetCustom
class SearchFormCustom(SearchForm):
def search(self):
query = self.searchqueryset.query.clean(self.cleaned_data.get('q'))
if not self.is_valid() or not query:
return self.no_query_found()
sqs = ElasticsearchSearchQuerySetCustom().multi_match(query, ['title^8', 'text^0.5'])
return sqs
The fields title
and text
must be in your index and the caret char is used to perform boost on fields.
You'll need override the haystack url patterns in order to use the custom form:
urlpatterns = patterns(
'search.views',
url('^$', search_view_factory(form_class=SearchFormCustom), name='haystack-search'),
)
That's it, HTH :-)
Pay attention don't use result.object.something
but use instead the fields on your index, e.g. result.tilte
, because result.object.tilte
hits the database! See Haystack Best Practices
这篇关于搜索多个单词elasticsearch haystack的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!