解析SQL语法,设计模式 [英] Parsing SQL like syntax, design pattern

查看:173
本文介绍了解析SQL语法,设计模式的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试使用模拟sql语法来构建一个简单的sql like接口,用于键值存储。
这些值基本上是POJO



一个例子是

 从OBJ_POOL选择ABC,其中ABX = 45 AND ABY> '88'AND A.B.Z!='abc'; 

OBJ_POOL只是同一个类的POJO列表。在这个例子中,A将是基类。

 类A 
类B
String C
整数X
字符串Y
字符串Z

现在ABC是等效的.getB()。getC()



我正在使用Antlr来解析上述语句以获取AST,然后 hoping 使用Apache BeanUtils来反映获取/设置字段名称。



我写了这个语法建立一个AST
现在我面临两个问题


  1. 访问者应该如何执行where子句? ABX = 45表示所有对象的字段X为45,应该如何进行过滤呢?有没有什么好办法呢?

  2. 有没有办法遍历生成的AST而不让访客代码与自定义逻辑(存储访问,属性getter / setters等..)

第二个问题是更令人担忧,因为可能有很多



简而言之,任何建议/链接/设计模式可以很好地解析sql select statment的一小部分,将不胜感激。 p>

谢谢

解决方案

你可以这样做,就像我在< a href =http://bkiers.blogspot.com/2011/03/creating-your-own-programming-language.html =noreferrer>我的博客帖子(因为我知道你读那些,我不会详细介绍)。在这种情况下唯一的区别是每行数据都有自己的作用域。传递此范围的简单方法是将其作为 eval(...)方法的参数提供。



下面是一个关于如何实现的快速演示。请注意,我根据我的博客文章快速地将这些内容合并在一起:并不是所有的功能都可用(请参阅许多 TODO ,并且可能(小)bug好的,使用你自己的风险!)



除了ANTLR v3.3,您需要以下3个文件用于此演示:



Select.g



 语法选择; 

options {
output = AST;
}

tokens {
//想象的标记
ROOT;
ATTR_LIST;
UNARY_MINUS;

//文字标记
Eq ='=';
NEq ='!=';
LT ='<';
LTEq ='< =';
GT ='>';
GTEq ='> =';
减号=' - ';
Not ='!';
Select ='select';
From ='from';
其中='where';
And ='AND';
Or ='OR';
}

parse
:select_stat EOF - > ^(ROOT select_stat)
;

select_stat
:选择attr_list从Id where_stat';' - > ^(选择attr_list Id where_stat)
;

attr_list
:Id(','Id)* - > ^(ATTR_LIST Id +)
;

where_stat
:其中expr - > expr
| - > ^(Eq Int [1] Int [1])
//否'where',插入'1 = 1',它始终为true
;

expr
:or_expr
;

or_expr
:and_expr(或^和_expr)*
;

and_expr
:eq_expr(And ^ eq_expr)*
;

eq_expr
:rel_expr((Eq | NEq)^ rel_expr)*
;

rel_expr
:unary_expr((LT | LTEq | GT | GTEq)^ unary_expr)?
;

unary_expr
:减号 - > ^(UNARY_MINUS atom)
|不是原子 - > ^(不原子)
| atom
;

atom
:Str
| Int
| Id
| '('expr')' - > expr
;

Id:('a'..'z'|'A'..'Z'|'_')('a'..'z'|'A'..'Z '|'_'| Digit)*;
Str:'\''('\'\''|〜('\''|'\r'|'\\\
'))*'\''
{
//剥离周围的引号,并用'
setText($ text.substring(1,$ text.length() - 1)替换''.replace(', ));
}
;
Int:Digit +;
空格:(''|'\t'|'\r'|'\\\
'){skip();};

片段数字:'0'..'9';



SelectWalker.g



 树语法SelectWalker; 

options {
tokenVocab = Select;
ASTLabelType = CommonTree;
}

@header {
import java.util.List;
import java.util.Map;
import java.util.Set;
}

@members {

private Map< String,List&B>>数据池;

public SelectWalker(CommonTreeNodeStream nodes,Map< String,List& B>数据){
super(nodes);
dataPool = data;
}
}

查询返回[List< List< Object>>
:^(ROOT select_stat){$ result =(List< List< Object>>)$ select_stat.node.eval(null);}
;

select_stat返回[节点节点]
:^(选择attr_list Id expr)
{$ node = new SelectNode($ attr_list.attributes,dataPool.get($ Id.text ),$ expr.node);}
;

attr_list返回[List< String>属性]
@init {$ attributes = new ArrayList< String>();}
:^(ATTR_LIST(Id {$ attributes.add($ Id.text);})+)
;

expr返回[节点节点]
:^(或a = expr b = expr){$ node = null; / * TODO * /}
| ^(And a = expr b = expr){$ node = new AndNode($ a.node,$ b.node);}
| ^(Eq a = expr b = expr){$ node = new EqNode($ a.node,$ b.node);}
| ^(NEq a = expr b = expr){$ node = new NEqNode($ a.node,$ b.node);}
| ^(LT a = expr b = expr){$ node = null; / * TODO * /}
| ^(LTEq a = expr b = expr){$ node = null; / * TODO * /}
| ^(GT a = expr b = expr){$ node = new GTNode($ a.node,$ b.node);}
| ^(GTEq a = expr b = expr){$ node = null; / * TODO * /}
| ^(UNARY_MINUS a = expr){$ node = null; / * TODO * /}
| ^(not a = expr){$ node = null; / * TODO * /}
| Str {$ node = new AtomNode($ Str.text);}
| Int {$ node = new AtomNode(Integer.valueOf($ Int.text));}
| Id {$ node = new IdNode($ Id.text);}
;



Main.java



(是的,将所有这些Java类放在同一个文件中: Main.java

  import org.antlr.runtime。*; 
import org.antlr.runtime.tree。*;
import org.antlr.stringtemplate。*;
import java.util。*;

public class Main {

static Map< String,List& B>> getData(){
Map< String,List< B>> map = new HashMap< String,List< B>();
列表< B> data = new ArrayList< B>();
data.add(new B(id_1,345,89,abd));
data.add(new B(id_2,45,89,abd));
data.add(new B(id_3,1,89,abd));
data.add(new B(id_4,45,8,abd));
data.add(new B(id_5,4589,abc));
data.add(new B(id_6,45,99,abC));
map.put(poolX,data);
返回地图;
}

public static void main(String [] args)throws异常{
String src =从poolX中选择C,Y,其中X = 45 AND Y> '88 'AND Z!='abc';;
SelectLexer lexer = new SelectLexer(new ANTLRStringStream(src));
SelectParser parser = new SelectParser(new CommonTokenStream(lexer));
CommonTree树=(CommonTree)parser.parse()。getTree();
SelectWalker walker = new SelectWalker(new CommonTreeNodeStream(tree),getData());
列表<列表<对象>> result = walker.query();
for(List< Object> row:result){
System.out.println(row);
}
}
}

class B {

String C;
整数X;
字符串Y;
String Z;

B(String c,Integer x,String y,String z){
C = c;
X = x;
Y = y;
Z = z;
}

对象getAttribute(String属性){
if(attribute.equals(C))return C;
if(attribute.equals(X))返回X;
if(attribute.equals(Y))返回Y;
if(attribute.equals(Z))return Z;
抛出新的RuntimeException(未知属性:B.+属性);
//或使用您的Apache Bean-util API,或者甚至在这里反映,而不是上述...
}
}

接口节点{
对象eval(B b);
}

class AtomNode implements Node {

final Object value;

AtomNode(Object v){
value = v;
}

public Object eval(B b){
return value;
}
}

抽象类BinNode实现Node {

final Node left;
最终节点权限;

BinNode(Node l,Node r){
left = l;
right = r;
}

public abstract Object eval(B b);
}

class AndNode extends BinNode {

AndNode(Node l,Node r){
super(l,r);
}

@Override
public Object eval(B b){
return(Boolean)super.left.eval(b)&& (布尔型)super.right.eval(B);
}
}

class EqNode extends BinNode {

EqNode(Node l,Node r){
super(l,r) ;
}

@Override
public Object eval(B b){
return super.left.eval(b).equals(super.right.eval(b ));
}
}

class NEqNode extends BinNode {

NEqNode(Node l,Node r){
super(l,r) ;
}

@Override
public Object eval(B b){
return!super.left.eval(b).equals(super.right.eval( b));
}
}

class GTNode extends BinNode {

GTNode(Node l,Node r){
super(l,r) ;
}

@Override
public Object eval(B b){
return((Comparable)super.left.eval(b))。compareTo((Comparable )super.right.eval(b))> 0;
}
}

class IdNode implements Node {

final String id;

IdNode(String i){
id = i;
}

@Override
public Object eval(B b){
return b.getAttribute(id);
}
}

class SelectNode implements Node {

final List< String>属性;
final列表< B>数据;
最终节点表达式;

SelectNode(List< String> a,List< B> d,Node e){
attributes = a;
data = d;
expression = e;
}

@Override
public Object eval(B ignored){
列表<对象>> result = new ArrayList< List< Object>>(); (b b:data)
{
if((Boolean)expression.eval(b)){
//'b'passed,check where attributes to include
List< ;对象> row = new ArrayList< Object>();
for(String attr:attributes){
row.add(b.getAttribute(attr));
}
result.add(row);
}
}
return result;
}
}

如果您现在生成词法分析器,解析器和树步行器并运行Main类:

  java -cp antlr-3.3.jar org.antlr.Tool Select.g 
java -cp antlr-3.3.jar org.antlr.Tool SelectWalker.g
javac -cp antlr-3.3.jar * .java
java -cp。:antlr-3.3 .jar主

您将看到查询的输出:

 从poolX中选择C,Y,其中X = 45 AND Y> '88'AND Z!='abc'; 

输入:

  CXYZ 
id_134589abd
id_24589abd
id_31 89abd
id_4 458abd
id_54589abc
id_64599abC

是:

  [id_2,89] 
[id_6,99]

请注意,如果其中语句被省略,则表达式 1 = 1 将自动插入,导致查询:

 从poolX中选择C,Y; 

打印以下内容:

  [id_1,89] 
[id_2,89]
[id_3,89]
[id_4,8]
[id_5,89]
[ id_6,99]


I am trying mock sql syntax to build a simple sql like interface to a key-value storage. The values are essentially POJOs

An example would be

select A.B.C from OBJ_POOL where A.B.X = 45 AND A.B.Y > '88' AND A.B.Z != 'abc';

OBJ_POOL is just a list of POJOs of the same class. In this example A would be the base class.

Class A
    Class B
        String C
        Integer X
        String Y
        String Z

Now A.B.C is equivalent A.getB().getC()

I am using Antlr to parse the above statement to get an AST, and then hoping to use Apache BeanUtils to reflectively get/set the field names.

I wrote the grammar thats builds an AST Now I am facing two problems

  1. How should the visitor be implemented for the where clause ? A.B.X = 45 implies all objects having field X as 45, how should the filtering happen is there any nice way to do this ?
  2. Is there any way to traverse the generated AST without cluttering the visitor code with custom logic (storage access,property getters/setters etc..)

The second problem is more worrying since there might be many things that the statement might do.

In a nutshell any suggestions/links/design-patterns to nicely parse a small subset of the sql select statment would be greatly appreciated

Thanks

解决方案

You can do this much like how I demonstrated in my blog posts (and since I know you read those, I won't go in much detail). The only difference in this case is that each of your rows of data has its own scope. An easy way to pass this scope along is by providing it as a parameter to the eval(...) method.

Below is a quick demo of how this could be implemented. Note that I quickly hacked this together based on my blog posts: not all functionality is available (see the many TODO's, and there are likely (small) bugs in it as well. Use at your own risk!).

Besides ANTLR v3.3, you need the following 3 files for this demo:

Select.g

grammar Select;

options {
  output=AST;
}

tokens {
  // imaginary tokens
  ROOT;
  ATTR_LIST;
  UNARY_MINUS;

  // literal tokens
  Eq     = '=';
  NEq    = '!=';
  LT     = '<';
  LTEq   = '<=';
  GT     = '>';
  GTEq   = '>=';
  Minus  = '-';
  Not    = '!';
  Select = 'select';
  From   = 'from';
  Where  = 'where';
  And    = 'AND';
  Or     = 'OR';
}

parse
 : select_stat EOF -> ^(ROOT select_stat)
 ;

select_stat
 : Select attr_list From Id where_stat ';' -> ^(Select attr_list Id where_stat)
 ;

attr_list
 : Id (',' Id)* -> ^(ATTR_LIST Id+)
 ;

where_stat
 : Where expr -> expr
 |            -> ^(Eq Int["1"] Int["1"]) 
                 // no 'where', insert '1=1' which is always true
 ;

expr
 : or_expr
 ;

or_expr
 : and_expr (Or^ and_expr)*
 ;

and_expr
 : eq_expr (And^ eq_expr)*
 ;

eq_expr
 : rel_expr ((Eq | NEq)^ rel_expr)*
 ;

rel_expr
 : unary_expr ((LT | LTEq | GT | GTEq)^ unary_expr)?
 ;

unary_expr
 : Minus atom -> ^(UNARY_MINUS atom)
 | Not atom   -> ^(Not atom)
 | atom
 ;

atom
 : Str
 | Int
 | Id
 | '(' expr ')' -> expr
 ;

Id    : ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | Digit)*;
Str   : '\'' ('\'\'' | ~('\'' | '\r' | '\n'))* '\'' 
        {
         // strip the surrounding quotes and replace '' with '
         setText($text.substring(1, $text.length() - 1).replace("''", "'"));
        }
      ;
Int   : Digit+;
Space : (' ' | '\t' | '\r' | '\n') {skip();};

fragment Digit : '0'..'9';

SelectWalker.g

tree grammar SelectWalker;

options {
  tokenVocab=Select;
  ASTLabelType=CommonTree;
}

@header {
  import java.util.List;
  import java.util.Map;
  import java.util.Set;
}

@members {

  private Map<String, List<B>> dataPool;

  public SelectWalker(CommonTreeNodeStream nodes, Map<String, List<B>> data) {
    super(nodes);
    dataPool = data;
  }
}

query returns [List<List<Object>> result]
 : ^(ROOT select_stat) {$result = (List<List<Object>>)$select_stat.node.eval(null);}
 ;

select_stat returns [Node node]
 : ^(Select attr_list Id expr) 
    {$node = new SelectNode($attr_list.attributes, dataPool.get($Id.text), $expr.node);}
 ;

attr_list returns [List<String> attributes]
@init{$attributes = new ArrayList<String>();}
 : ^(ATTR_LIST (Id {$attributes.add($Id.text);})+)
 ;

expr returns [Node node]
 : ^(Or a=expr b=expr)   {$node = null; /* TODO */}
 | ^(And a=expr b=expr)  {$node = new AndNode($a.node, $b.node);}
 | ^(Eq a=expr b=expr)   {$node = new EqNode($a.node, $b.node);}
 | ^(NEq a=expr b=expr)  {$node = new NEqNode($a.node, $b.node);}
 | ^(LT a=expr b=expr)   {$node = null; /* TODO */}
 | ^(LTEq a=expr b=expr) {$node = null; /* TODO */}
 | ^(GT a=expr b=expr)   {$node = new GTNode($a.node, $b.node);}
 | ^(GTEq a=expr b=expr) {$node = null; /* TODO */}
 | ^(UNARY_MINUS a=expr) {$node = null; /* TODO */}
 | ^(Not a=expr)         {$node = null; /* TODO */}
 | Str                   {$node = new AtomNode($Str.text);}
 | Int                   {$node = new AtomNode(Integer.valueOf($Int.text));}
 | Id                    {$node = new IdNode($Id.text);}
 ;

Main.java

(yes, stick all these Java classes in the same file: Main.java)

import org.antlr.runtime.*;
import org.antlr.runtime.tree.*;
import org.antlr.stringtemplate.*;
import java.util.*;

public class Main {

  static Map<String, List<B>> getData() {
    Map<String, List<B>> map = new HashMap<String, List<B>>();
    List<B> data = new ArrayList<B>();
    data.add(new B("id_1", 345, "89", "abd"));
    data.add(new B("id_2", 45, "89", "abd"));
    data.add(new B("id_3", 1, "89", "abd"));
    data.add(new B("id_4", 45, "8", "abd"));
    data.add(new B("id_5", 45, "89", "abc"));
    data.add(new B("id_6", 45, "99", "abC"));
    map.put("poolX", data);
    return map;
  }

  public static void main(String[] args) throws Exception {
    String src = "select C, Y from poolX where X = 45 AND Y > '88' AND Z != 'abc';";
    SelectLexer lexer = new SelectLexer(new ANTLRStringStream(src));
    SelectParser parser = new SelectParser(new CommonTokenStream(lexer));
    CommonTree tree = (CommonTree)parser.parse().getTree();  
    SelectWalker walker = new SelectWalker(new CommonTreeNodeStream(tree), getData());  
    List<List<Object>> result = walker.query();
    for(List<Object> row : result) {
      System.out.println(row);
    }
  }
}

class B {

  String C;
  Integer X;
  String Y;
  String Z;

  B(String c, Integer x, String y, String z) {
    C = c;
    X = x;
    Y = y;
    Z = z;
  }

  Object getAttribute(String attribute) {
    if(attribute.equals("C")) return C;
    if(attribute.equals("X")) return X;
    if(attribute.equals("Y")) return Y;
    if(attribute.equals("Z")) return Z;
    throw new RuntimeException("Unknown attribute: B." + attribute);
    // or use your Apache Bean-util API, or even reflection here instead of the above...
  }
}

interface Node {
  Object eval(B b);
}

class AtomNode implements Node {

  final Object value;

  AtomNode(Object v) {
    value = v;
  }

  public Object eval(B b) {
    return value;
  }
}

abstract class BinNode implements Node {

  final Node left;
  final Node right;

  BinNode(Node l, Node r) {
    left = l;
    right = r;
  }

  public abstract Object eval(B b);
}

class AndNode extends BinNode {

  AndNode(Node l, Node r) {
    super(l, r);
  }

  @Override
  public Object eval(B b) {
    return (Boolean)super.left.eval(b) && (Boolean)super.right.eval(b);
  }
}

class EqNode extends BinNode {

  EqNode(Node l, Node r) {
    super(l, r);
  }

  @Override
  public Object eval(B b) {
    return super.left.eval(b).equals(super.right.eval(b));
  }
}

class NEqNode extends BinNode {

  NEqNode(Node l, Node r) {
    super(l, r);
  }

  @Override
  public Object eval(B b) {
    return !super.left.eval(b).equals(super.right.eval(b));
  }
}

class GTNode extends BinNode {

  GTNode(Node l, Node r) {
    super(l, r);
  }

  @Override
  public Object eval(B b) {
    return ((Comparable)super.left.eval(b)).compareTo((Comparable)super.right.eval(b)) > 0;
  }
}

class IdNode implements Node {

  final String id;

  IdNode(String i) {
    id = i;
  }

  @Override
  public Object eval(B b) {
    return b.getAttribute(id);
  }
}

class SelectNode implements Node {

  final List<String> attributes;
  final List<B> data;
  final Node expression;

  SelectNode(List<String> a, List<B> d, Node e) {
    attributes = a;
    data = d;
    expression = e;
  }

  @Override
  public Object eval(B ignored) {
    List<List<Object>> result = new ArrayList<List<Object>>();
    for(B b : data) {
      if((Boolean)expression.eval(b)) {
        // 'b' passed, check which attributes to include
        List<Object> row = new ArrayList<Object>();
        for(String attr : attributes) {
          row.add(b.getAttribute(attr));
        }
        result.add(row);
      }
    }
    return result;
  }
}

If you now generate the lexer, parser and tree walker and run the Main class:

java -cp antlr-3.3.jar org.antlr.Tool Select.g 
java -cp antlr-3.3.jar org.antlr.Tool SelectWalker.g 
javac -cp antlr-3.3.jar *.java
java -cp .:antlr-3.3.jar Main

you will see that the output for the query:

select C, Y from poolX where X = 45 AND Y > '88' AND Z != 'abc';

with input:

C           X       Y       Z
"id_1"      345     "89"    "abd"
"id_2"      45      "89"    "abd"
"id_3"      1       "89"    "abd"
"id_4       45      "8"     "abd"
"id_5"      45      "89"    "abc"
"id_6"      45      "99"    "abC"

is:

[id_2, 89]
[id_6, 99]

And note that if the where statement is omitted, the expression 1 = 1 is automatically inserted, causing the query:

select C, Y from poolX;

to print the following:

[id_1, 89]
[id_2, 89]
[id_3, 89]
[id_4, 8]
[id_5, 89]
[id_6, 99]

这篇关于解析SQL语法,设计模式的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆