关系跨越SQLAlchemy中的四个表 [英] Relationship spanning four tables in SQLAlchemy

查看:97
本文介绍了关系跨越SQLAlchemy中的四个表的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试建立一个跨越四个表的关系.我根据此问题中的代码简化了代码,以匹配我的数据库.

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class A(Base):
    __tablename__ = 'a'

    id = Column(Integer, primary_key=True)
    b_id = Column(Integer, ForeignKey('b.id'))


    # FIXME: This fails with:
    #   "Relationship A.ds could not determine any unambiguous local/remote column pairs based on
    #    join condition and remote_side arguments.  Consider using the remote() annotation to
    #    accurately mark those elements of the join condition that are on the remote side of the relationship."
    #
    # ds = relationship("D", primaryjoin="and_(A.b_id == B.id, B.id == C.b_id, D.id == C.d_id)", viewonly=True)

    def dq(self):
        return sess.query(D).filter(and_(D.id == C.d_id,
                                         C.b_id == B.id,
                                         B.id == A.id,
                                         A.id == self.id))


class B(Base):
    __tablename__ = 'b'

    id = Column(Integer, primary_key=True)


class C(Base):
    __tablename__ = 'c'

    b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
    d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)


class D(Base):
    __tablename__ = 'd'

    id = Column(Integer, primary_key=True)


e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)

sess = Session(e)

sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.flush()


a1 = sess.query(A).first()
print a1.dq().all()
#print a1.ds

所以我的问题是'ds'关系的联接语法.当前的错误提到添加remote(),但是我还没有开始使用它.我也尝试使用secondaryjoin但没有运气. dq中的查询有效,我最终可以通过在代码中使用过滤器来解决该问题-我仍然很好奇如何在可能的情况下构建relathioship吗?

解决方案

我不是sqlalchemy专家,这是我的理解.

我认为sqlalchemy的关系API中混淆的主要根源是,参数primaryjoinsecondarysecondaryjoin的真正含义是什么.对我来说,它们是:

        primaryjoin              secondaryjoin(optional)
source -------------> secondary -------------------------> dest
 (A)                                                        (D)

现在,我们需要弄清楚中间部分应该是什么.尽管自定义联接在sqlalchemy中非常复杂,但是您仍然需要了解您的要求,即原始SQL.一种可能的解决方案是:

SELECT a.*, d.id 
FROM a JOIN (b JOIN c ON c.b_id = b.id JOIN d ON d.id = c.d_id) /* secondary */
ON a.b_id = b.id /* primaryjoin */ 
WHERE a.id = 1;

在这种情况下,源a与辅助" (b JOIN c .. JOIN d ..)连接,并且由于D已经在secondary中,因此没有辅助连接到D.我们有

ds1 = relationship(
    'D',
    primaryjoin='A.b_id == B.id',
    secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
    viewonly=True,  # almost always a better to add this
)

另一个解决方案可能是:

SELECT a.*, d.id 
FROM a JOIN (b JOIN c ON c.b_id = b.id) /* secondary */
ON a.b_id = b.id /* primaryjoin */
JOIN d ON c.d_id = d.id /* secondaryjoin */
WHERE a.id = 1;

此处a联接次级(b JOIN c..),次级联接dc.d_id = d.id,因此:

ds2 = relationship(
    'D',
    primaryjoin='A.b_id == B.id',
    secondary='join(B, C, B.id == C.b_id)',
    secondaryjoin='C.d_id == D.id',
    viewonly=True,  # almost always a better to add this
)

经验法则是将较长的联接路径放在辅助节点中,并将其链接到源和目标.

在性能方面,ds1ds2导致查询计划比dq稍微简单一些,但是我认为它们之间没有太大区别.计划者总是更了解.

这是更新的代码供您参考.请注意如何使用sess.query(A).options(joinedload('ds1'))急切加载关系:

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class A(Base):
    __tablename__ = 'a'

    id = Column(Integer, primary_key=True)
    b_id = Column(Integer, ForeignKey('b.id'))

    ds1 = relationship(
        'D',
        primaryjoin='A.b_id == B.id',
        secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
        viewonly=True,  # almost always a better to add this
    )
    ds2 = relationship(
        'D',
        secondary='join(B, C, B.id == C.b_id)',
        primaryjoin='A.b_id == B.id',
        secondaryjoin='C.d_id == D.id',
        viewonly=True,  # almost always a better to add this
    )

    def dq(self):
        return sess.query(D).filter(and_(D.id == C.d_id,
                                         C.b_id == B.id,
                                         B.id == A.id,
                                         A.id == self.id))


class B(Base):
    __tablename__ = 'b'

    id = Column(Integer, primary_key=True)


class C(Base):
    __tablename__ = 'c'

    b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
    d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)


class D(Base):
    __tablename__ = 'd'

    id = Column(Integer, primary_key=True)

    def __repr__(self):
        return str(self.id)


e = create_engine("sqlite://", echo=True)
Base.metadata.drop_all(e)
Base.metadata.create_all(e)

sess = Session(e)

sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(B(id=2))
sess.flush()
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.add(A(id=2, b_id=2))
sess.commit()


def get_ids(ds):
    return {d.id for d in ds}


a1 = sess.query(A).options(joinedload('ds1')).filter_by(id=1).first()
print('{} a1.ds1: {}'.format('=' * 30, a1.ds1))
assert get_ids(a1.dq()) == get_ids(a1.ds1)


a1 = sess.query(A).options(joinedload('ds2')).filter_by(id=1).first()
print('{} a1.ds2: {}'.format('=' * 30, a1.ds2))
assert get_ids(a1.dq()) == get_ids(a1.ds2)

a2 = sess.query(A).options(joinedload('ds2')).filter_by(id=2).first()
print('{} a2.ds1: {}; a2.ds2 {};'.format('=' * 30, a2.ds1, a2.ds2))
assert a2.ds1 == a2.ds2 == []

I am trying to make a relationship work that spans four tables. I simplified my code based on the code in this question to match my db.

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class A(Base):
    __tablename__ = 'a'

    id = Column(Integer, primary_key=True)
    b_id = Column(Integer, ForeignKey('b.id'))


    # FIXME: This fails with:
    #   "Relationship A.ds could not determine any unambiguous local/remote column pairs based on
    #    join condition and remote_side arguments.  Consider using the remote() annotation to
    #    accurately mark those elements of the join condition that are on the remote side of the relationship."
    #
    # ds = relationship("D", primaryjoin="and_(A.b_id == B.id, B.id == C.b_id, D.id == C.d_id)", viewonly=True)

    def dq(self):
        return sess.query(D).filter(and_(D.id == C.d_id,
                                         C.b_id == B.id,
                                         B.id == A.id,
                                         A.id == self.id))


class B(Base):
    __tablename__ = 'b'

    id = Column(Integer, primary_key=True)


class C(Base):
    __tablename__ = 'c'

    b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
    d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)


class D(Base):
    __tablename__ = 'd'

    id = Column(Integer, primary_key=True)


e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)

sess = Session(e)

sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.flush()


a1 = sess.query(A).first()
print a1.dq().all()
#print a1.ds

so my problem is the syntax for the join for the 'ds' relationship. The current error mentions adding remote(), but I have not gotten it to work. I also tried using secondaryjoin without luck. The query in 'dq' work and I was eventually able to work around it by using filters in my code - still I am curious how to construct the relathioship if possible ?

解决方案

I'm no sqlalchemy expert, and here's my understanding.

I think the main source of confusion in the sqlalchemy's relationship API is, what do the parameters primaryjoin, secondary, secondaryjoin really mean. To me, here they are:

        primaryjoin              secondaryjoin(optional)
source -------------> secondary -------------------------> dest
 (A)                                                        (D)

Now we need to figure out what the intermediate parts should be. Despite the fact that custom joins in sqlalchemy is unexpectedly complicated, you do need to understand what you're asking for, that is, the raw SQL. One possible solution is:

SELECT a.*, d.id 
FROM a JOIN (b JOIN c ON c.b_id = b.id JOIN d ON d.id = c.d_id) /* secondary */
ON a.b_id = b.id /* primaryjoin */ 
WHERE a.id = 1;

In this case the source a joins with the "secondary" (b JOIN c .. JOIN d ..), and there's no secondary join to D since it's already in the secondary. We have

ds1 = relationship(
    'D',
    primaryjoin='A.b_id == B.id',
    secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
    viewonly=True,  # almost always a better to add this
)

Another solution might be:

SELECT a.*, d.id 
FROM a JOIN (b JOIN c ON c.b_id = b.id) /* secondary */
ON a.b_id = b.id /* primaryjoin */
JOIN d ON c.d_id = d.id /* secondaryjoin */
WHERE a.id = 1;

Here a joins the secondary (b JOIN c..), and the secondary joins d with c.d_id = d.id, thus:

ds2 = relationship(
    'D',
    primaryjoin='A.b_id == B.id',
    secondary='join(B, C, B.id == C.b_id)',
    secondaryjoin='C.d_id == D.id',
    viewonly=True,  # almost always a better to add this
)

The rule of thumb is you put long join paths in the secondary, and link it to the source and dest.

Performance-wise, ds1 and ds2 leads to slightly simpler query plan than dq, but I don't think there's much difference between them. The planner always knows better.

Here's the updated code for your reference. Note how you can eagerly load the relationship with sess.query(A).options(joinedload('ds1')):

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class A(Base):
    __tablename__ = 'a'

    id = Column(Integer, primary_key=True)
    b_id = Column(Integer, ForeignKey('b.id'))

    ds1 = relationship(
        'D',
        primaryjoin='A.b_id == B.id',
        secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
        viewonly=True,  # almost always a better to add this
    )
    ds2 = relationship(
        'D',
        secondary='join(B, C, B.id == C.b_id)',
        primaryjoin='A.b_id == B.id',
        secondaryjoin='C.d_id == D.id',
        viewonly=True,  # almost always a better to add this
    )

    def dq(self):
        return sess.query(D).filter(and_(D.id == C.d_id,
                                         C.b_id == B.id,
                                         B.id == A.id,
                                         A.id == self.id))


class B(Base):
    __tablename__ = 'b'

    id = Column(Integer, primary_key=True)


class C(Base):
    __tablename__ = 'c'

    b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
    d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)


class D(Base):
    __tablename__ = 'd'

    id = Column(Integer, primary_key=True)

    def __repr__(self):
        return str(self.id)


e = create_engine("sqlite://", echo=True)
Base.metadata.drop_all(e)
Base.metadata.create_all(e)

sess = Session(e)

sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(B(id=2))
sess.flush()
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.add(A(id=2, b_id=2))
sess.commit()


def get_ids(ds):
    return {d.id for d in ds}


a1 = sess.query(A).options(joinedload('ds1')).filter_by(id=1).first()
print('{} a1.ds1: {}'.format('=' * 30, a1.ds1))
assert get_ids(a1.dq()) == get_ids(a1.ds1)


a1 = sess.query(A).options(joinedload('ds2')).filter_by(id=1).first()
print('{} a1.ds2: {}'.format('=' * 30, a1.ds2))
assert get_ids(a1.dq()) == get_ids(a1.ds2)

a2 = sess.query(A).options(joinedload('ds2')).filter_by(id=2).first()
print('{} a2.ds1: {}; a2.ds2 {};'.format('=' * 30, a2.ds1, a2.ds2))
assert a2.ds1 == a2.ds2 == []

这篇关于关系跨越SQLAlchemy中的四个表的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆