关系跨越SQLAlchemy中的四个表 [英] Relationship spanning four tables in SQLAlchemy
问题描述
我正在尝试建立一个跨越四个表的关系.我根据此问题中的代码简化了代码,以匹配我的数据库.>
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
b_id = Column(Integer, ForeignKey('b.id'))
# FIXME: This fails with:
# "Relationship A.ds could not determine any unambiguous local/remote column pairs based on
# join condition and remote_side arguments. Consider using the remote() annotation to
# accurately mark those elements of the join condition that are on the remote side of the relationship."
#
# ds = relationship("D", primaryjoin="and_(A.b_id == B.id, B.id == C.b_id, D.id == C.d_id)", viewonly=True)
def dq(self):
return sess.query(D).filter(and_(D.id == C.d_id,
C.b_id == B.id,
B.id == A.id,
A.id == self.id))
class B(Base):
__tablename__ = 'b'
id = Column(Integer, primary_key=True)
class C(Base):
__tablename__ = 'c'
b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)
class D(Base):
__tablename__ = 'd'
id = Column(Integer, primary_key=True)
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
sess = Session(e)
sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.flush()
a1 = sess.query(A).first()
print a1.dq().all()
#print a1.ds
所以我的问题是'ds'关系的联接语法.当前的错误提到添加remote(),但是我还没有开始使用它.我也尝试使用secondaryjoin但没有运气. dq中的查询有效,我最终可以通过在代码中使用过滤器来解决该问题-我仍然很好奇如何在可能的情况下构建relathioship吗?
我不是sqlalchemy专家,这是我的理解.
我认为sqlalchemy的关系API中混淆的主要根源是,参数primaryjoin
,secondary
,secondaryjoin
的真正含义是什么.对我来说,它们是:
primaryjoin secondaryjoin(optional)
source -------------> secondary -------------------------> dest
(A) (D)
现在,我们需要弄清楚中间部分应该是什么.尽管自定义联接在sqlalchemy中非常复杂,但是您仍然需要了解您的要求,即原始SQL.一种可能的解决方案是:
SELECT a.*, d.id
FROM a JOIN (b JOIN c ON c.b_id = b.id JOIN d ON d.id = c.d_id) /* secondary */
ON a.b_id = b.id /* primaryjoin */
WHERE a.id = 1;
在这种情况下,源a
与辅助" (b JOIN c .. JOIN d ..)
连接,并且由于D
已经在secondary
中,因此没有辅助连接到D
.我们有
ds1 = relationship(
'D',
primaryjoin='A.b_id == B.id',
secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
viewonly=True, # almost always a better to add this
)
另一个解决方案可能是:
SELECT a.*, d.id
FROM a JOIN (b JOIN c ON c.b_id = b.id) /* secondary */
ON a.b_id = b.id /* primaryjoin */
JOIN d ON c.d_id = d.id /* secondaryjoin */
WHERE a.id = 1;
此处a
联接次级(b JOIN c..)
,次级联接d
和c.d_id = d.id
,因此:
ds2 = relationship(
'D',
primaryjoin='A.b_id == B.id',
secondary='join(B, C, B.id == C.b_id)',
secondaryjoin='C.d_id == D.id',
viewonly=True, # almost always a better to add this
)
经验法则是将较长的联接路径放在辅助节点中,并将其链接到源和目标.
在性能方面,ds1
和ds2
导致查询计划比dq
稍微简单一些,但是我认为它们之间没有太大区别.计划者总是更了解.
这是更新的代码供您参考.请注意如何使用sess.query(A).options(joinedload('ds1'))
急切加载关系:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
b_id = Column(Integer, ForeignKey('b.id'))
ds1 = relationship(
'D',
primaryjoin='A.b_id == B.id',
secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
viewonly=True, # almost always a better to add this
)
ds2 = relationship(
'D',
secondary='join(B, C, B.id == C.b_id)',
primaryjoin='A.b_id == B.id',
secondaryjoin='C.d_id == D.id',
viewonly=True, # almost always a better to add this
)
def dq(self):
return sess.query(D).filter(and_(D.id == C.d_id,
C.b_id == B.id,
B.id == A.id,
A.id == self.id))
class B(Base):
__tablename__ = 'b'
id = Column(Integer, primary_key=True)
class C(Base):
__tablename__ = 'c'
b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)
class D(Base):
__tablename__ = 'd'
id = Column(Integer, primary_key=True)
def __repr__(self):
return str(self.id)
e = create_engine("sqlite://", echo=True)
Base.metadata.drop_all(e)
Base.metadata.create_all(e)
sess = Session(e)
sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(B(id=2))
sess.flush()
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.add(A(id=2, b_id=2))
sess.commit()
def get_ids(ds):
return {d.id for d in ds}
a1 = sess.query(A).options(joinedload('ds1')).filter_by(id=1).first()
print('{} a1.ds1: {}'.format('=' * 30, a1.ds1))
assert get_ids(a1.dq()) == get_ids(a1.ds1)
a1 = sess.query(A).options(joinedload('ds2')).filter_by(id=1).first()
print('{} a1.ds2: {}'.format('=' * 30, a1.ds2))
assert get_ids(a1.dq()) == get_ids(a1.ds2)
a2 = sess.query(A).options(joinedload('ds2')).filter_by(id=2).first()
print('{} a2.ds1: {}; a2.ds2 {};'.format('=' * 30, a2.ds1, a2.ds2))
assert a2.ds1 == a2.ds2 == []
I am trying to make a relationship work that spans four tables. I simplified my code based on the code in this question to match my db.
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
b_id = Column(Integer, ForeignKey('b.id'))
# FIXME: This fails with:
# "Relationship A.ds could not determine any unambiguous local/remote column pairs based on
# join condition and remote_side arguments. Consider using the remote() annotation to
# accurately mark those elements of the join condition that are on the remote side of the relationship."
#
# ds = relationship("D", primaryjoin="and_(A.b_id == B.id, B.id == C.b_id, D.id == C.d_id)", viewonly=True)
def dq(self):
return sess.query(D).filter(and_(D.id == C.d_id,
C.b_id == B.id,
B.id == A.id,
A.id == self.id))
class B(Base):
__tablename__ = 'b'
id = Column(Integer, primary_key=True)
class C(Base):
__tablename__ = 'c'
b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)
class D(Base):
__tablename__ = 'd'
id = Column(Integer, primary_key=True)
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
sess = Session(e)
sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.flush()
a1 = sess.query(A).first()
print a1.dq().all()
#print a1.ds
so my problem is the syntax for the join for the 'ds' relationship. The current error mentions adding remote(), but I have not gotten it to work. I also tried using secondaryjoin without luck. The query in 'dq' work and I was eventually able to work around it by using filters in my code - still I am curious how to construct the relathioship if possible ?
I'm no sqlalchemy expert, and here's my understanding.
I think the main source of confusion in the sqlalchemy's relationship API is, what do the parameters primaryjoin
, secondary
, secondaryjoin
really mean. To me, here they are:
primaryjoin secondaryjoin(optional)
source -------------> secondary -------------------------> dest
(A) (D)
Now we need to figure out what the intermediate parts should be. Despite the fact that custom joins in sqlalchemy is unexpectedly complicated, you do need to understand what you're asking for, that is, the raw SQL. One possible solution is:
SELECT a.*, d.id
FROM a JOIN (b JOIN c ON c.b_id = b.id JOIN d ON d.id = c.d_id) /* secondary */
ON a.b_id = b.id /* primaryjoin */
WHERE a.id = 1;
In this case the source a
joins with the "secondary" (b JOIN c .. JOIN d ..)
, and there's no secondary join to D
since it's already in the secondary
. We have
ds1 = relationship(
'D',
primaryjoin='A.b_id == B.id',
secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
viewonly=True, # almost always a better to add this
)
Another solution might be:
SELECT a.*, d.id
FROM a JOIN (b JOIN c ON c.b_id = b.id) /* secondary */
ON a.b_id = b.id /* primaryjoin */
JOIN d ON c.d_id = d.id /* secondaryjoin */
WHERE a.id = 1;
Here a
joins the secondary (b JOIN c..)
, and the secondary joins d
with c.d_id = d.id
, thus:
ds2 = relationship(
'D',
primaryjoin='A.b_id == B.id',
secondary='join(B, C, B.id == C.b_id)',
secondaryjoin='C.d_id == D.id',
viewonly=True, # almost always a better to add this
)
The rule of thumb is you put long join paths in the secondary, and link it to the source and dest.
Performance-wise, ds1
and ds2
leads to slightly simpler query plan than dq
, but I don't think there's much difference between them. The planner always knows better.
Here's the updated code for your reference. Note how you can eagerly load the relationship with sess.query(A).options(joinedload('ds1'))
:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
b_id = Column(Integer, ForeignKey('b.id'))
ds1 = relationship(
'D',
primaryjoin='A.b_id == B.id',
secondary='join(B, C, B.id == C.b_id).join(D, C.d_id == D.id)',
viewonly=True, # almost always a better to add this
)
ds2 = relationship(
'D',
secondary='join(B, C, B.id == C.b_id)',
primaryjoin='A.b_id == B.id',
secondaryjoin='C.d_id == D.id',
viewonly=True, # almost always a better to add this
)
def dq(self):
return sess.query(D).filter(and_(D.id == C.d_id,
C.b_id == B.id,
B.id == A.id,
A.id == self.id))
class B(Base):
__tablename__ = 'b'
id = Column(Integer, primary_key=True)
class C(Base):
__tablename__ = 'c'
b_id = Column(Integer, ForeignKey('b.id'), primary_key=True)
d_id = Column(Integer, ForeignKey('d.id'), primary_key=True)
class D(Base):
__tablename__ = 'd'
id = Column(Integer, primary_key=True)
def __repr__(self):
return str(self.id)
e = create_engine("sqlite://", echo=True)
Base.metadata.drop_all(e)
Base.metadata.create_all(e)
sess = Session(e)
sess.add(D(id=1))
sess.add(D(id=2))
sess.add(B(id=1))
sess.add(B(id=2))
sess.flush()
sess.add(C(b_id=1, d_id=1))
sess.add(C(b_id=1, d_id=2))
sess.add(A(id=1, b_id=1))
sess.add(A(id=2, b_id=2))
sess.commit()
def get_ids(ds):
return {d.id for d in ds}
a1 = sess.query(A).options(joinedload('ds1')).filter_by(id=1).first()
print('{} a1.ds1: {}'.format('=' * 30, a1.ds1))
assert get_ids(a1.dq()) == get_ids(a1.ds1)
a1 = sess.query(A).options(joinedload('ds2')).filter_by(id=1).first()
print('{} a1.ds2: {}'.format('=' * 30, a1.ds2))
assert get_ids(a1.dq()) == get_ids(a1.ds2)
a2 = sess.query(A).options(joinedload('ds2')).filter_by(id=2).first()
print('{} a2.ds1: {}; a2.ds2 {};'.format('=' * 30, a2.ds1, a2.ds2))
assert a2.ds1 == a2.ds2 == []
这篇关于关系跨越SQLAlchemy中的四个表的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!