Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 66 additions & 5 deletions src/qana/RelationGraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <limits>
#include <memory>
#include <stdexcept>
#include <unordered_set>

// LSST headers
#include "lsst/log/Log.h"
Expand Down Expand Up @@ -79,10 +80,12 @@ using query::QueryTemplate;
using query::SelectStmt;
using query::TableRef;
using query::TableRefList;
using query::ValueExpr;
using query::ValueExprPtr;
using query::ValueExprPtrVector;
using query::ValueFactor;
using query::ValueFactorPtr;
using query::WhereClause;

std::ostream& operator<<(std::ostream& out, Vertex const& vertex) {
out << "Vertex("
Expand Down Expand Up @@ -155,6 +158,13 @@ void Vertex::insert(Edge const& e) {
}
}

std::shared_ptr<CompPredicate> Vertex::makeSubChunkCompPredicate() const {
auto const table = tr.hasAlias() ? tr.getAlias() : info->table;
return std::make_shared<CompPredicate>(ValueExpr::newColumnExpr("", table, "", SUB_CHUNK_COLUMN),
CompPredicate::EQUALS_OP,
ValueExpr::newSimple(ValueFactor::newConstFactor(SUBCHUNK_TAG)));
}

// ----------------------------------------------------------------
// RelationGraph implementation

Expand Down Expand Up @@ -1029,12 +1039,14 @@ void RelationGraph::rewrite(SelectStmtPtrVector& outputs, QueryMapping& mapping)
// Find directors for which overlap is required. At the same time, rewrite
// all table references as their corresponding chunk templates.
std::vector<Vertex*> overlapRefs;
LOGS(_log, LOG_LVL_INFO, "1: rewrite _vertices.size()=" << _vertices.size());
for (ListIter i = _vertices.begin(), e = _vertices.end(); i != e; ++i) {
i->rewriteAsChunkTemplate();
if (i->info->kind == TableInfo::DIRECTOR && i->overlap > 0.0) {
overlapRefs.push_back(&(*i));
}
}
LOGS(_log, LOG_LVL_INFO, "2: rewrite overlapRefs.size()=" << overlapRefs.size());
if (overlapRefs.empty()) {
// There is no need for sub-chunking, so leave it off for now.
//
Expand All @@ -1058,31 +1070,80 @@ void RelationGraph::rewrite(SelectStmtPtrVector& outputs, QueryMapping& mapping)
"references that require overlap");
}

// Add sub-chunk constraint for all director & match tables
// Collect full table names (or aliases) in a set to avoid duplicates in setting sub-chunk constraints
std::unordered_set<std::string> tableNames;
for (ListIter i = _vertices.begin(), e = _vertices.end(); i != e; ++i) {
std::string kindName;
switch (i->info->kind) {
case TableInfo::DIRECTOR:
kindName = "DIRECTOR";
break;
case TableInfo::CHILD:
kindName = "CHILD";
break;
case TableInfo::MATCH:
kindName = "MATCH";
break;
default:
kindName = "UNKNOWN";
break;
}
LOGS(_log, LOG_LVL_INFO,
"3: rewrite i->tr.getAlias()=" << i->tr.getAlias() << " i->info->table=" << i->info->table
<< " overlap=" << i->overlap << " kind=" << kindName);
if (i->info->kind == TableInfo::DIRECTOR || i->info->kind == TableInfo::MATCH) {
std::string tableName;
if (i->tr.hasAlias()) {
tableName = i->tr.getAlias();
} else {
tableName = i->info->database + "." + i->info->table;
}
if (tableNames.count(tableName) == 0) {
tableNames.insert(tableName);
auto predicate = i->makeSubChunkCompPredicate();
BoolFactor::Ptr bfactor = std::make_shared<BoolFactor>();
bfactor->_terms.push_back(predicate);
_query->getWhereClause(true).prependAndTerm(bfactor);
}
}
}

// Rewrite director table references not requiring overlap as their
// corresponding sub-chunk templates, and record the names of all
// sub-chunked tables.
for (ListIter i = _vertices.begin(), e = _vertices.end(); i != e; ++i) {
if (i->info->kind == TableInfo::DIRECTOR) {
if (i->overlap == 0.0) {
i->rewriteAsSubChunkTemplate();
i->rewriteAsChunkTemplate();
}
DbTable dbTable(i->info->database, i->info->table);
LOGS(_log, LOG_LVL_TRACE, "rewrite db=" << dbTable.db << " table=" << dbTable.table);
LOGS(_log, LOG_LVL_INFO,
"4: rewrite db=" << i->info->database << " table=" << i->info->table
<< " overlap=" << i->overlap);
mapping.insertSubChunkTable(dbTable);
}
}
unsigned n = static_cast<unsigned>(overlapRefs.size());
unsigned numPermutations = 1 << n;
// Each director requiring overlap must be rewritten as both a sub-chunk
// template and an overlap sub-chunk template. There are 2ⁿ different
// Each director requiring overlap must be rewritten as both a chunk
// template and chunk overlap template. There are 2ⁿ different
// template permutations for n directors requiring overlap; generate them
// all.
for (unsigned p = 0; p < numPermutations; ++p) {
for (unsigned i = 0; i < n; ++i) {
if ((p & (1 << i)) != 0) {
overlapRefs[i]->rewriteAsOverlapTemplate();
LOGS(_log, LOG_LVL_INFO,
"5.o: rewrite db=" << overlapRefs[i]->info->database
<< " table=" << overlapRefs[i]->info->table
<< " overlap=" << overlapRefs[i]->overlap);
} else {
overlapRefs[i]->rewriteAsSubChunkTemplate();
overlapRefs[i]->rewriteAsChunkTemplate();
LOGS(_log, LOG_LVL_INFO,
"5.c: rewrite db=" << overlapRefs[i]->info->database
<< " table=" << overlapRefs[i]->info->table
<< " overlap=" << overlapRefs[i]->overlap);
}
}
// Given the use of shared_ptr by the IR classes, we could shallow
Expand Down
14 changes: 6 additions & 8 deletions src/qana/RelationGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@
namespace lsst::qserv {
namespace query {
class ColumnRef;
class CompPredicate;
class QueryContext;
class SelectStmt;
} // namespace query
Expand Down Expand Up @@ -577,19 +578,16 @@ struct Vertex {
tr.setTable(info->getChunkTemplate());
}

/// `rewriteAsSubChunkTemplate` rewrites `tr` to contain a sub-chunk
/// specific name pattern.
void rewriteAsSubChunkTemplate() {
tr.setDb(info->getSubChunkDb());
tr.setTable(info->getSubChunkTemplate());
}

/// `rewriteAsOverlapTemplate` rewrites `tr` to contain an overlap
/// sub-chunk specific name pattern.
void rewriteAsOverlapTemplate() {
tr.setDb(info->getSubChunkDb());
tr.setDb(info->database);
tr.setTable(info->getOverlapTemplate());
}

/// `makeSubChunkCompPredicate` creates a shared pointer to a new
/// comparison predicate for the sub-chunk column.
std::shared_ptr<query::CompPredicate> makeSubChunkCompPredicate() const;
};

std::ostream& operator<<(std::ostream& out, Vertex const& vertex);
Expand Down
8 changes: 2 additions & 6 deletions src/qana/TableInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
#include <vector>

// Qserv headers
#include "global/constants.h" // for SUBCHUNKDB_PREFIX
#include "global/constants.h"

// Forward declarations
namespace lsst::qserv::query {
Expand Down Expand Up @@ -134,12 +134,8 @@ struct TableInfo {
return false;
}

std::string const getSubChunkDb() const { return SUBCHUNKDB_PREFIX + database + "_" + CHUNK_TAG; }
std::string const getChunkTemplate() const { return table + "_" + CHUNK_TAG; }
std::string const getSubChunkTemplate() const { return table + "_" + CHUNK_TAG + "_" + SUBCHUNK_TAG; }
std::string const getOverlapTemplate() const {
return table + "FullOverlap_" + CHUNK_TAG + "_" + SUBCHUNK_TAG;
}
std::string const getOverlapTemplate() const { return table + "FullOverlap_" + CHUNK_TAG; }

virtual void dump(std::ostream& os) const;
};
Expand Down
58 changes: 37 additions & 21 deletions src/qproc/testQueryAnaGeneral.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,16 +199,20 @@ BOOST_AUTO_TEST_CASE(RestrictorNeighborCount) {
"where qserv_areaspec_box(6,6,7,7) AND rFlux_PS<0.005 AND "
"scisql_angSep(o1.ra_Test,o1.decl_Test,o2.ra_Test,o2.decl_Test) < 0.001;";
std::string expected_100_subchunk_core =
"SELECT count(*) AS `QS1_COUNT` FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS "
"`o1`,`Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o2` "
"WHERE scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,6,6,7,7)=1 AND "
"SELECT count(*) AS `QS1_COUNT` FROM `LSST`.`Object_100` AS "
"`o1`,`LSST`.`Object_100` AS `o2` "
"WHERE `o2`.`subChunkId`=%S\007S% "
"AND `o1`.`subChunkId`=%S\007S% "
"AND scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,6,6,7,7)=1 AND "
"scisql_s2PtInBox(`o2`.`ra_Test`,`o2`.`decl_Test`,6,6,7,7)=1 AND "
"`o1`.`rFlux_PS`<0.005 AND "
"scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.001";
std::string expected_100_subchunk_overlap =
"SELECT count(*) AS `QS1_COUNT` FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS "
"`o1`,`Subchunks_LSST_100`.`ObjectFullOverlap_100_%S\007S%` AS `o2` "
"WHERE scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,6,6,7,7)=1 "
"SELECT count(*) AS `QS1_COUNT` FROM `LSST`.`Object_100` AS "
"`o1`,`LSST`.`ObjectFullOverlap_100` AS `o2` "
"WHERE `o2`.`subChunkId`=%S\007S% "
"AND `o1`.`subChunkId`=%S\007S% "
"AND scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,6,6,7,7)=1 "
"AND scisql_s2PtInBox(`o2`.`ra_Test`,`o2`.`decl_Test`,6,6,7,7)=1 "
"AND `o1`.`rFlux_PS`<0.005 AND "
"scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.001";
Expand Down Expand Up @@ -253,9 +257,11 @@ BOOST_AUTO_TEST_CASE(Triple) {
"0.024 > scisql_angSep(o1.ra_Test,o1.decl_Test,o2.ra_Test,o2.decl_Test) and "
"Source.objectIdSourceTest=o2.objectIdObjTest;";
std::string expected =
"SELECT * FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS "
"`o1`,`Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o2`,`LSST`.`Source_100` AS `LSST.Source` "
"WHERE `o1`.`id`!=`o2`.`id` AND "
"SELECT * FROM `LSST`.`Object_100` AS "
"`o1`,`LSST`.`Object_100` AS `o2`,`LSST`.`Source_100` AS `LSST.Source` "
"WHERE `o2`.`subChunkId`=%S\007S% "
"AND `o1`.`subChunkId`=%S\007S% "
"AND `o1`.`id`!=`o2`.`id` AND "
"0.024>scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`) AND "
"`LSST.Source`.`objectIdSourceTest`=`o2`.`objectIdObjTest`";

Expand Down Expand Up @@ -509,9 +515,11 @@ BOOST_AUTO_TEST_CASE(ObjectSelfJoinDistance) {
"scisql_angSep(o1.ra_Test,o1.decl_Test,o2.ra_Test,o2.decl_Test) < 0.02";
std::string expected =
"SELECT count(*) AS `QS1_COUNT` "
"FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o1`,"
"`Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o2` "
"WHERE scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,5.5,5.5,6.1,6.1)=1 "
"FROM `LSST`.`Object_100` AS `o1`,"
"`LSST`.`Object_100` AS `o2` "
"WHERE `o2`.`subChunkId`=%S\007S% "
"AND `o1`.`subChunkId`=%S\007S% "
"AND scisql_s2PtInBox(`o1`.`ra_Test`,`o1`.`decl_Test`,5.5,5.5,6.1,6.1)=1 "
"AND scisql_s2PtInBox(`o2`.`ra_Test`,`o2`.`decl_Test`,5.5,5.5,6.1,6.1)=1 "
"AND scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.02";
qsTest.sqlConfig =
Expand Down Expand Up @@ -951,9 +959,11 @@ BOOST_AUTO_TEST_CASE(FuncExprPred) {
"(scisql_fluxToAbMag(o2.gFlux_PS)-scisql_fluxToAbMag(o2.rFlux_PS)) ) < 1;";
expected =
"SELECT `o1`.`objectId` AS `o1.objectId`,`o2`.`objectId` AS `objectId2` "
"FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS "
"`o1`,`Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o2` "
"WHERE scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.00001 "
"FROM `LSST`.`Object_100` AS "
"`o1`,`LSST`.`Object_100` AS `o2` "
"WHERE `o2`.`subChunkId`=%S\007S% "
"AND `o1`.`subChunkId`=%S\007S% "
"AND scisql_angSep(`o1`.`ra_Test`,`o1`.`decl_Test`,`o2`.`ra_Test`,`o2`.`decl_Test`)<0.00001 "
"AND `o1`.`objectId`<>`o2`.`objectId` AND "
"ABS((scisql_fluxToAbMag(`o1`.`gFlux_PS`)-scisql_fluxToAbMag(`o1`.`rFlux_PS`))-(scisql_"
"fluxToAbMag(`o2`.`gFlux_PS`)-scisql_fluxToAbMag(`o2`.`rFlux_PS`)))<1";
Expand Down Expand Up @@ -1294,18 +1304,24 @@ BOOST_AUTO_TEST_CASE(Case01_1081) {
"WHERE closestToObj = 1 OR closestToObj is NULL;";
std::string expected_100_subchunk_core =
"SELECT count(*) AS `QS1_COUNT` "
"FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o` "
"FROM `LSST`.`Object_100` AS `o` "
"INNER JOIN `LSST`.`RefObjMatch_100` AS `o2t` ON `o`.`objectIdObjTest`=`o2t`.`objectId` "
"INNER JOIN `Subchunks_LSST_100`.`SimRefObject_100_%S\007S%` AS `t` ON "
"INNER JOIN `LSST`.`SimRefObject_100` AS `t` ON "
"`o2t`.`refObjectId`=`t`.`refObjectId` "
"WHERE `o`.`closestToObj`=1 OR `o`.`closestToObj` IS NULL";
"WHERE `t`.`subChunkId`=%S\007S% "
"AND `o2t`.`subChunkId`=%S\007S% "
"AND `o`.`subChunkId`=%S\007S% "
"AND (`o`.`closestToObj`=1 OR `o`.`closestToObj` IS NULL)";
std::string expected_100_subchunk_overlap =
"SELECT count(*) AS `QS1_COUNT` "
"FROM `Subchunks_LSST_100`.`Object_100_%S\007S%` AS `o` "
"FROM `LSST`.`Object_100` AS `o` "
"INNER JOIN `LSST`.`RefObjMatch_100` AS `o2t` ON `o`.`objectIdObjTest`=`o2t`.`objectId` "
"INNER JOIN `Subchunks_LSST_100`.`SimRefObjectFullOverlap_100_%S\007S%` AS `t` ON "
"INNER JOIN `LSST`.`SimRefObjectFullOverlap_100` AS `t` ON "
"`o2t`.`refObjectId`=`t`.`refObjectId` "
"WHERE `o`.`closestToObj`=1 OR `o`.`closestToObj` IS NULL";
"WHERE `t`.`subChunkId`=%S\007S% "
"AND `o2t`.`subChunkId`=%S\007S% "
"AND `o`.`subChunkId`=%S\007S% "
"AND (`o`.`closestToObj`=1 OR `o`.`closestToObj` IS NULL)";
qsTest.sqlConfig =
SqlConfig(SqlConfig::MockDbTableColumns({{"LSST",
{{"Object", {"objectIdObjTest", "closestToObj"}},
Expand Down
5 changes: 5 additions & 0 deletions src/query/SelectStmt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ void SelectStmt::setFromListAsTable(std::string const& t) {
_fromList = std::make_shared<FromList>(tr);
}

WhereClause& SelectStmt::getWhereClause(bool createIfMissing) {
if (createIfMissing && !_whereClause) _whereClause = std::make_shared<WhereClause>();
return *_whereClause;
}

bool SelectStmt::operator==(const SelectStmt& rhs) const {
return (util::ptrCompare<FromList>(_fromList, rhs._fromList) &&
util::ptrCompare<SelectList>(_selectList, rhs._selectList) &&
Expand Down
2 changes: 1 addition & 1 deletion src/query/SelectStmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class SelectStmt {

bool hasWhereClause() const { return static_cast<bool>(_whereClause); }
WhereClause const& getWhereClause() const { return *_whereClause; }
WhereClause& getWhereClause() { return *_whereClause; }
WhereClause& getWhereClause(bool createIfMissing = false);
void setWhereClause(std::shared_ptr<WhereClause> w) { _whereClause = w; }

/**
Expand Down
21 changes: 20 additions & 1 deletion src/query/WhereClause.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,26 @@ void WhereClause::prependAndTerm(std::shared_ptr<BoolTerm> t) {
throw std::logic_error("Term of first OR term is not an AND term; there is no global AND term");
}
} else {
throw std::logic_error("There is more than term in the root OR term; can't pick a global AND term");
// We've hit the following case:
//
// WHERE ... OR .. OR ...
//
// A solution is to construct the new root and the global AndTerm.
// Push down the current root as the only child of the AndTerm.
// In essence, we're replacing the following constraints:
//
// WHERE ... OR .. OR ...
//
// With:
//
// WHERE <BoolTerm> AND (... OR .. OR ...)
//
auto newRootOrTerm = std::make_shared<OrTerm>();
andTerm = std::make_shared<AndTerm>();
andTerm->addBoolTerm(_rootOrTerm);
newRootOrTerm->addBoolTerm(andTerm);
_rootOrTerm = newRootOrTerm;
andTerm = std::dynamic_pointer_cast<AndTerm>(_rootOrTerm->_terms[0]);
}

if (!andTerm->merge(*t, AndTerm::PREPEND)) {
Expand Down
8 changes: 4 additions & 4 deletions src/wbase/Task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ Task::Task(TaskMsgPtr const& t, int fragmentNumber, shared_ptr<UserQueryInfo> co
_scanInfo.sortTablesSlowestFirst();
_scanInteractive = t->scaninteractive();
_maxTableSize = t->maxtablesize_mb() * ::MB_SIZE_BYTES;

#if 0
// Create sets and vectors for 'aquiring' subchunk temporary tables.
proto::TaskMsg_Fragment const& fragment(t->fragment(_queryFragmentNum));
DbTableSet dbTbls_;
Expand Down Expand Up @@ -203,6 +203,7 @@ Task::Task(TaskMsgPtr const& t, int fragmentNumber, shared_ptr<UserQueryInfo> co
<< " subChunks=" << util::printable(subchunksVect_));
}
_dbTblsAndSubchunks = make_unique<DbTblsAndSubchunks>(dbTbls_, subchunksVect_);
#endif
if (_sendChannel == nullptr) {
throw util::Bug(ERR_LOC, "Task::Task _sendChannel==null " + getIdStr());
}
Expand All @@ -221,7 +222,6 @@ Task::~Task() {

vector<Task::Ptr> Task::createTasks(shared_ptr<proto::TaskMsg> const& taskMsg,
shared_ptr<wbase::FileChannelShared> const& sendChannel,
shared_ptr<wdb::ChunkResourceMgr> const& chunkResourceMgr,
mysql::MySqlConfig const& mySqlConfig,
shared_ptr<wcontrol::SqlConnMgr> const& sqlConnMgr,
shared_ptr<wpublish::QueriesAndChunks> const& queriesAndChunks,
Expand Down Expand Up @@ -259,8 +259,8 @@ vector<Task::Ptr> Task::createTasks(shared_ptr<proto::TaskMsg> const& taskMsg,
}
for (auto task : vect) {
// newQueryRunner sets the `_taskQueryRunner` pointer in `task`.
task->setTaskQueryRunner(wdb::QueryRunner::newQueryRunner(task, chunkResourceMgr, mySqlConfig,
sqlConnMgr, queriesAndChunks));
task->setTaskQueryRunner(
wdb::QueryRunner::newQueryRunner(task, mySqlConfig, sqlConnMgr, queriesAndChunks));
}
sendChannel->setTaskCount(vect.size());

Expand Down
Loading