使用PHP将用户类型的全文搜索查询解析到MySQL的WHERE子句中 [英] parsing user-typed Full Text Search queries into WHERE clause of MySQL using PHP

查看:68
本文介绍了使用PHP将用户类型的全文搜索查询解析到MySQL的WHERE子句中的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想将用户输入的FTS查询转换为MySQL的WHERE子句。所以这个功能就像Gmail的搜索一样。所以用户可以输入:

  from:me AND(to:john or to:jenny)dinner 

尽管我不认为这很重要,但表结构将如下所示:

 消息
- id
- from
- to
- title
- description
- time_created

MessageComment
- id
- message_id
- comment
- time_created

由于这是一个常见问题,我认为可能已经存在解决方案。有没有?

有类似这样的问题这里,但它是用于SQL Server的。

解决方案

以下代码由类Tokenizer,Token和QueryBuilder。
这可能不是迄今为止最优雅的解决方案,但它实际上可以满足您的要求:

 < 
// QueryBuilder语法:
// =====================
// SearchRule:= SimpleSearchRule {KeyWord}
// SimpleSearchRule:= Expression | SimpleSearchRule {'OR'表达式}
//表达式:= SimpleExpression | Expression {'AND'SimpleExpression}
// SimpleExpression:='('SimpleSearchRule')'| FieldExpression

$ input ='from:me AND(to:john or to:jenny)dinner party';

$ fieldMapping = array(
'id'=>'id',
'from'=>'from',
'to'=> ;'to',
'title'=>'title',
'description'=>'description',
'time_created'=>'time_created'
);
$ fullTextFields = array('title','description');

$ qb =新的QueryBuilder($ fieldMapping,$ fullTextFields);
尝试{
echo $ qb-> parseSearchRule($ input);
} catch(Exception $ error){
echo'解析搜索查询时发生错误:< br />'。
}

类令牌{
const KEYWORD ='KEYWORD',
OPEN_PAR ='OPEN_PAR',
CLOSE_PAR ='CLOSE_PAR',
FIELD ='FIELD',
AND_OP ='AND_OP',
OR_OP ='OR_OP';
public $ type;
public $ chars;
public $ position;

函数__construct($ type,$ chars,$ position){
$ this-> type = $ type;
$ this-> chars = $ chars;
$ this-> position = $ position;
}

函数__toString(){
return'Token [type ='。$ this-> type。',chars ='。$ this-> chars。 ',position ='。$ this-> position。']';
}
}

class Tokenizer {
private $ tokens = array();
私人$输入;
private $ currentPosition;

函数__construct($ input){
$ this-> input = trim($ input);
$ this-> currentPosition = 0;

$ b / **
* @return令牌
* /
函数getToken(){
if(count($ this- >令牌)== 0){
$ token = $ this-> nextToken();
if($ token == null){
return null;
}
array_push($ this-> tokens,$ token);
}
返回$ this->令牌[0];


函数consumeToken(){
$ token = $ this-> getToken();
if($ token == null){
return null;
}
array_shift($ this-> tokens);
返回$ token;


保护函数nextToken(){
$ reservedCharacters ='\:\s\(\)';
$ fieldExpr ='/^([^'.$reservedCharacters.']+)\:([^'.$reservedCharacters.']+)/';
$ keyWord ='/^([^'.$reservedCharacters.']+)/';
$ andOperator ='/ ^ AND\s /';
$ orOperator ='/ ^ OR \s /';
//删除空格..
$ whiteSpaces ='/ ^ \s + /';
$ remaining = substr($ this-> input,$ this-> currentPosition);
if(preg_match($ whiteSpaces,$ remaining,$ matches)){
$ this-> currentPosition + = strlen($ matches [0]);
$ remaining = substr($ this-> input,$ this-> currentPosition);
}
if($ remaining ==''){
return null;
}
switch(substr($ remaining,0,1)){
case'(':
return new Token(Token :: OPEN_PAR,'(',$ this - > currentPosition ++);
case')':
返回新的令牌(Token :: CLOSE_PAR,')',$ this-> currentPosition ++);

if(preg_match($ fieldExpr,$ remaining,$ matches)){
$ token = new Token(Token :: FIELD,$ matches [0],$ this->当前位置);
$ this-> currentPosition + = strlen($ matches [0]);
} else if(preg_match($ andOperator,$ remaining,$ matches)){
$ token = new Token(Token :: AND_OP,'AND',$ this-> currentPosition);
$ this-> currentPosition + = 3;
} else if(preg_match($ orOperator,$ remaining,$ matches)){
$ token = new Token(Token :: OR_OP,'OR',$ this-> currentPosition);
$ this-> currentPosition + = 2;
} else if(preg_match($ keyWord,$ remaining,$ matches)){
$ token = new Token(Token :: KEYWORD,$ matches [0],$ this-> currentPosition);
$ this-> currentPosition + = strlen($ matches [0]);
}否则抛出新的异常('Unable to tokenize:'。$ remaining);
返回$ token;
}
}

类QueryBuilder {
private $ fieldMapping;
private $ fulltextFields;

函数__construct($ fieldMapping,$ fulltextFields){
$ this-> fieldMapping = $ fieldMapping;
$ this-> fulltextFields = $ fulltextFields;


函数parseSearchRule($ input){
$ t = new Tokenizer($ input);
$ token = $ t-> getToken();
if($ token == null){
return'';
}
$ token = $ t-> getToken();
if($ token-> type!= Token :: KEYWORD){
$ searchRule = $ this-> parseSimpleSearchRule($ t);
} else {
$ searchRule ='';
}
$ keywords ='';
while($ token = $ t-> consumeToken()){
if($ token-> type!= Token :: KEYWORD){
throw new Exception('Only Only在搜索规则结束时允许');
}
if($ keywords!=''){
$ keywords。='';
}
$关键字。= $ token-> chars;

if($ keywords!=''){
$ matchClause ='MATCH(`'。(implode('`,`',$ this-> fulltextFields))。 '')AGAINST(';
$ keywords = $ matchClause.'\''。mysql_real_escape_string($ keywords)。'\'IN BOOLEAN MODE)';
if($ searchRule ==''){
$ searchRule = $ keywords;
} else {
$ searchRule ='('。$ searchRule。')AND('。$ keywords。')';
}
}
返回$ searchRule;


protected function parseSimpleSearchRule(Tokenizer $ t){
$ expressions = array();
do {
$ repeat = false;
$表达式[] = $ this-> parseExpression($ t);
$ token = $ t-> getToken();
if($ token-> type == Token :: OR_OP){
$ t-> consumeToken();
$ repeat = true;
}
} while($ repeat);
返回implode('OR',$表达式);


保护函数parseExpression(Tokenizer $ t){
$ expressions = array();
do {
$ repeat = false;
$表达式[] = $ this-> parseSimpleExpression($ t);
$ token = $ t-> getToken();
if($ token-> type == Token :: AND_OP){
$ t-> consumeToken();
$ repeat = true;
}
} while($ repeat);
返回implode('AND',$表达式);


protected function parseSimpleExpression(Tokenizer $ t){
$ token = $ t-> consumeToken();
if($ token-> type == Token :: OPEN_PAR){
$ spr = $ this-> parseSimpleSearchRule($ t);
$ token = $ t-> consumeToken();
if($ token == null || $ token-> type!= Token :: CLOSE_PAR){
throw new Exception('Expected closing brackets,found:'。$ token-> chars );
}
return'('。$ spr。')';
} else if($ token-> type == Token :: FIELD){
$ fieldVal = explode(':',$ token-> chars,2);
if(isset($ this-> fieldMapping [$ fieldVal [0]])){
return'`'。$ this-> fieldMapping [$ fieldVal [0]]。'`= \ '\'' 'mysql_real_escape_string($ fieldVal [1])。';
}
抛出新的异常('Unknown field selected:'。$ token-> chars);
} else {
抛出新的异常('Expected opening parenthesis or field-expression,found:'。$ token-> chars);
}
}
}
?>

更合适的解决方案是首先构建一个分析树,然后将其转换为查询进一步分析。

I want to convert user typed FTS queries in to MySQL's WHERE clause. So the functionality will be something like Gmail's search. So users will be able to type:

from:me AND (to:john OR to:jenny) dinner

Although I don't think it is important, the table structure will be something like:

Message
 - id
 - from
 - to
 - title
 - description
 - time_created

MessageComment
 - id
 - message_id
 - comment
 - time_created

Since this is a common problem, I thought there may be already existing solution. Is there any?

P.S. There is a similar question like this here, but it is for SQL Server.

解决方案

The following code consists of the classes Tokenizer, Token and QueryBuilder. It is probably not the most elegant solution ever, but it actually does what you were asking:

<?
// QueryBuilder Grammar:
// =====================
// SearchRule       := SimpleSearchRule { KeyWord }
// SimpleSearchRule := Expression | SimpleSearchRule { 'OR' Expression }
// Expression       := SimpleExpression | Expression { 'AND' SimpleExpression }
// SimpleExpression := '(' SimpleSearchRule ')' | FieldExpression

$input = 'from:me AND (to:john OR to:jenny) dinner party';

$fieldMapping = array(
    'id' => 'id',
    'from' => 'from',
    'to' => 'to',
    'title' => 'title',
    'description' => 'description',
    'time_created' => 'time_created'
);
$fullTextFields = array('title','description');

$qb = new QueryBuilder($fieldMapping, $fullTextFields);
try {
    echo $qb->parseSearchRule($input);
} catch(Exception $error) {
    echo 'Error occurred while parsing search query: <br/>'.$error->getMessage();
}

class Token {
    const   KEYWORD = 'KEYWORD',
            OPEN_PAR='OPEN_PAR',
            CLOSE_PAR='CLOSE_PAR',
            FIELD='FIELD',
            AND_OP='AND_OP',
            OR_OP='OR_OP';
    public $type;
    public $chars;
    public $position;

    function __construct($type,$chars,$position) {
        $this->type = $type;
        $this->chars = $chars;
        $this->position = $position;
    }

    function __toString() {
        return 'Token[ type='.$this->type.', chars='.$this->chars.', position='.$this->position.' ]';
    }
}

class Tokenizer {
    private $tokens = array();
    private $input;
    private $currentPosition;

    function __construct($input) {
        $this->input = trim($input);
        $this->currentPosition = 0;
    }

    /**
     * @return Token
     */
    function getToken() {
        if(count($this->tokens)==0) {
            $token = $this->nextToken();
            if($token==null) {
                return null;
            }
            array_push($this->tokens, $token);
        }
        return $this->tokens[0];
    }

    function consumeToken() {
        $token = $this->getToken();
        if($token==null) {
            return null;
        }
        array_shift($this->tokens);
        return $token;
    }

    protected function nextToken() {
        $reservedCharacters = '\:\s\(\)';
        $fieldExpr = '/^([^'.$reservedCharacters.']+)\:([^'.$reservedCharacters.']+)/';
        $keyWord = '/^([^'.$reservedCharacters.']+)/';
        $andOperator = '/^AND\s/';
        $orOperator = '/^OR\s/';
        // Remove whitespaces ..
        $whiteSpaces = '/^\s+/';
        $remaining = substr($this->input,$this->currentPosition);
        if(preg_match($whiteSpaces, $remaining, $matches)) {
            $this->currentPosition += strlen($matches[0]);
            $remaining = substr($this->input,$this->currentPosition);
        }
        if($remaining=='') {
            return null;
        }
        switch(substr($remaining,0,1)) {
            case '(':
                return new Token(Token::OPEN_PAR,'(',$this->currentPosition++);
            case ')':
                return new Token(Token::CLOSE_PAR,')',$this->currentPosition++);
        }
        if(preg_match($fieldExpr, $remaining, $matches)) {
            $token = new Token(Token::FIELD, $matches[0], $this->currentPosition);
            $this->currentPosition += strlen($matches[0]);
        } else if(preg_match($andOperator, $remaining, $matches)) {
            $token = new Token(Token::AND_OP, 'AND', $this->currentPosition);
            $this->currentPosition += 3;
        } else if(preg_match($orOperator, $remaining, $matches)) {
            $token = new Token(Token::OR_OP, 'OR', $this->currentPosition);
            $this->currentPosition += 2;
        } else if(preg_match($keyWord, $remaining, $matches)) {
            $token = new Token(Token::KEYWORD, $matches[0], $this->currentPosition);
            $this->currentPosition += strlen($matches[0]);
        } else throw new Exception('Unable to tokenize: '.$remaining);
        return $token;
    }
}

class QueryBuilder {
    private $fieldMapping;
    private $fulltextFields;

    function __construct($fieldMapping, $fulltextFields) {
        $this->fieldMapping = $fieldMapping;
        $this->fulltextFields = $fulltextFields;
    }

    function parseSearchRule($input) {
        $t = new Tokenizer($input);
        $token = $t->getToken();
        if($token==null) {
            return '';
        }
        $token = $t->getToken();
        if($token->type!=Token::KEYWORD) {
            $searchRule = $this->parseSimpleSearchRule($t);
        } else {
            $searchRule = '';
        }
        $keywords = '';
        while($token = $t->consumeToken()) {
            if($token->type!=Token::KEYWORD) {
                throw new Exception('Only keywords allowed at end of search rule.');
            }
            if($keywords!='') {
                $keywords .= ' ';
            }
            $keywords .= $token->chars;
        }
        if($keywords!='') {
            $matchClause = 'MATCH (`'.(implode('`,`',$this->fulltextFields)).'`) AGAINST (';
            $keywords = $matchClause.'\''.mysql_real_escape_string($keywords).'\' IN BOOLEAN MODE)';
            if($searchRule=='') {
                $searchRule = $keywords;
            } else {
                $searchRule = '('.$searchRule.') AND ('.$keywords.')';
            }
        }
        return $searchRule;
    }

    protected function parseSimpleSearchRule(Tokenizer $t) {
        $expressions = array();
        do {
            $repeat = false;
            $expressions[] = $this->parseExpression($t);
            $token = $t->getToken();
            if($token->type==Token::OR_OP) {
                $t->consumeToken();
                $repeat = true;
            }
        } while($repeat);
        return implode(' OR ', $expressions);
    }

    protected function parseExpression(Tokenizer $t) {
        $expressions = array();
        do {
            $repeat = false;
            $expressions[] = $this->parseSimpleExpression($t);
            $token = $t->getToken();
            if($token->type==Token::AND_OP) {
                $t->consumeToken();
                $repeat = true;
            }
        } while($repeat);
        return implode(' AND ', $expressions);
    }

    protected function parseSimpleExpression(Tokenizer $t) {
        $token = $t->consumeToken();
        if($token->type==Token::OPEN_PAR) {
            $spr = $this->parseSimpleSearchRule($t);
            $token = $t->consumeToken();
            if($token==null || $token->type!=Token::CLOSE_PAR) {
                throw new Exception('Expected closing parenthesis, found: '.$token->chars);
            }
            return '('.$spr.')';
        } else if($token->type==Token::FIELD) {
            $fieldVal = explode(':', $token->chars,2);
            if(isset($this->fieldMapping[$fieldVal[0]])) {
                return '`'.$this->fieldMapping[$fieldVal[0]].'` = \''.mysql_real_escape_string($fieldVal[1]).'\'';
            }
            throw new Exception('Unknown field selected: '.$token->chars);
        } else {
            throw new Exception('Expected opening parenthesis or field-expression, found: '.$token->chars);
        }
    }
}
?>

A more proper solution would first build a parse tree, and then transform it into a query, after some further analysis.

这篇关于使用PHP将用户类型的全文搜索查询解析到MySQL的WHERE子句中的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆