|
23 | 23 | """ |
24 | 24 | Test cases for combinatorial algorithms. |
25 | 25 | """ |
| 26 | +import io |
26 | 27 | import itertools |
27 | 28 | import unittest |
| 29 | +from collections import Counter |
| 30 | +from collections import defaultdict |
| 31 | + |
| 32 | +import msprime |
28 | 33 |
|
29 | 34 | import tskit |
30 | 35 | import tskit.combinatorics as comb |
@@ -413,3 +418,166 @@ def test_is_symmetrical(self): |
413 | 418 | self.assertFalse(three_leaf_asym.is_symmetrical()) |
414 | 419 | six_leaf_sym = RankTree(children=[three_leaf_asym, three_leaf_asym]) |
415 | 420 | self.assertTrue(six_leaf_sym.is_symmetrical()) |
| 421 | + |
| 422 | + |
| 423 | +class TestCountTopologies(unittest.TestCase): |
| 424 | + def verify_topologies(self, ts, expected=None): |
| 425 | + populations = [pop.id for pop in ts.populations()] |
| 426 | + topologies = [comb.count_topologies(t) for t in ts.trees()] |
| 427 | + inc_topologies = list(comb.count_topologies_incremental(ts)) |
| 428 | + for num_pops in range(1, ts.num_populations + 1): |
| 429 | + for i, t in enumerate(ts.trees()): |
| 430 | + just_t = ts.keep_intervals([t.interval]) |
| 431 | + for pops in itertools.combinations(populations, num_pops): |
| 432 | + actual_topologies = topologies[i][frozenset(pops)] |
| 433 | + actual_inc_topologies = inc_topologies[i][frozenset(pops)] |
| 434 | + if len(t.roots) == 1: |
| 435 | + subsampled_topologies = self.subsample_topologies(just_t, pops) |
| 436 | + self.assertEqual(actual_topologies, subsampled_topologies) |
| 437 | + if expected is not None: |
| 438 | + self.assertEqual( |
| 439 | + actual_topologies, expected[i][frozenset(pops)] |
| 440 | + ) |
| 441 | + self.assertEqual(actual_topologies, actual_inc_topologies) |
| 442 | + |
| 443 | + def subsample_topologies(self, ts, populations): |
| 444 | + samples_per_pop = [ts.samples(population=p) for p in populations] |
| 445 | + topologies = Counter() |
| 446 | + for subsample in itertools.product(*samples_per_pop): |
| 447 | + for pop_tree in ts.simplify(samples=subsample).trees(): |
| 448 | + # regions before and after keep interval have all samples as roots |
| 449 | + # so don't count those |
| 450 | + # The single tree of interest should have one root |
| 451 | + if len(pop_tree.roots) == 1: |
| 452 | + topologies[pop_tree.rank()] += 1 |
| 453 | + return topologies |
| 454 | + |
| 455 | + def test_single_population(self): |
| 456 | + n = 10 |
| 457 | + ts = msprime.simulate(n, recombination_rate=10) |
| 458 | + expected = defaultdict(Counter) |
| 459 | + expected[frozenset([0])] = Counter({(0, 0): n}) |
| 460 | + self.verify_topologies(ts, [expected] * ts.num_trees) |
| 461 | + |
| 462 | + def test_three_populations(self): |
| 463 | + nodes = io.StringIO( |
| 464 | + """\ |
| 465 | + id is_sample time population individual metadata |
| 466 | + 0 1 0.000000 0 -1 |
| 467 | + 1 1 0.000000 1 -1 |
| 468 | + 2 1 0.000000 1 -1 |
| 469 | + 3 1 0.000000 2 -1 |
| 470 | + 4 1 0.000000 2 -1 |
| 471 | + 5 1 0.000000 0 -1 |
| 472 | + 6 0 1.000000 0 -1 |
| 473 | + 7 0 2.000000 0 -1 |
| 474 | + 8 0 2.000000 0 -1 |
| 475 | + 9 0 3.000000 0 -1 |
| 476 | + 10 0 4.000000 0 -1 |
| 477 | + """ |
| 478 | + ) |
| 479 | + edges = io.StringIO( |
| 480 | + """\ |
| 481 | + left right parent child |
| 482 | + 0.000000 1.000000 6 4 |
| 483 | + 0.000000 1.000000 6 5 |
| 484 | + 0.000000 1.000000 7 1 |
| 485 | + 0.000000 1.000000 7 2 |
| 486 | + 0.000000 1.000000 8 3 |
| 487 | + 0.000000 1.000000 8 6 |
| 488 | + 0.000000 1.000000 9 7 |
| 489 | + 0.000000 1.000000 9 8 |
| 490 | + 0.000000 1.000000 10 0 |
| 491 | + 0.000000 1.000000 10 9 |
| 492 | + """ |
| 493 | + ) |
| 494 | + ts = tskit.load_text( |
| 495 | + nodes, edges, sequence_length=1, strict=False, base64_metadata=False |
| 496 | + ) |
| 497 | + |
| 498 | + expected = defaultdict(Counter) |
| 499 | + expected[frozenset([0])] = Counter({(0, 0): 2}) |
| 500 | + expected[frozenset([1])] = Counter({(0, 0): 2}) |
| 501 | + expected[frozenset([2])] = Counter({(0, 0): 2}) |
| 502 | + expected[frozenset([0, 1])] = Counter({(0, 0): 4}) |
| 503 | + expected[frozenset([0, 2])] = Counter({(0, 0): 4}) |
| 504 | + expected[frozenset([1, 2])] = Counter({(0, 0): 4}) |
| 505 | + expected[frozenset([0, 1, 2])] = Counter({(1, 0): 4, (1, 1): 4}) |
| 506 | + self.verify_topologies(ts, [expected]) |
| 507 | + |
| 508 | + def test_multiple_roots(self): |
| 509 | + tables = tskit.TableCollection(sequence_length=1.0) |
| 510 | + tables.populations.add_row() |
| 511 | + tables.populations.add_row() |
| 512 | + tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0) |
| 513 | + tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=1) |
| 514 | + |
| 515 | + # Not samples so they are ignored |
| 516 | + tables.nodes.add_row(time=1) |
| 517 | + tables.nodes.add_row(time=1, population=1) |
| 518 | + |
| 519 | + expected = defaultdict(Counter) |
| 520 | + expected[frozenset([0])] = Counter({(0, 0): 1}) |
| 521 | + expected[frozenset([1])] = Counter({(0, 0): 1}) |
| 522 | + self.verify_topologies(tables.tree_sequence(), [expected]) |
| 523 | + |
| 524 | + def test_no_full_topology(self): |
| 525 | + tables = tskit.TableCollection(sequence_length=1.0) |
| 526 | + tables.populations.add_row() |
| 527 | + tables.populations.add_row() |
| 528 | + tables.populations.add_row() |
| 529 | + child1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0) |
| 530 | + child2 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=1) |
| 531 | + parent = tables.nodes.add_row(time=1) |
| 532 | + tables.edges.add_row(left=0, right=1, parent=parent, child=child1) |
| 533 | + tables.edges.add_row(left=0, right=1, parent=parent, child=child2) |
| 534 | + |
| 535 | + # Left as root so there is no topology with all three populations |
| 536 | + tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=2) |
| 537 | + |
| 538 | + expected = defaultdict(Counter) |
| 539 | + for pop_combo in [[0], [1], [2], [0, 1]]: |
| 540 | + expected[frozenset(pop_combo)] = Counter({(0, 0): 1}) |
| 541 | + self.verify_topologies(tables.tree_sequence(), [expected]) |
| 542 | + |
| 543 | + def test_polytomies(self): |
| 544 | + tables = tskit.TableCollection(sequence_length=1.0) |
| 545 | + tables.populations.add_row() |
| 546 | + tables.populations.add_row() |
| 547 | + tables.populations.add_row() |
| 548 | + c1 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0) |
| 549 | + c2 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=1) |
| 550 | + c3 = tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=2) |
| 551 | + p = tables.nodes.add_row(time=1) |
| 552 | + tables.edges.add_row(left=0, right=1, parent=p, child=c1) |
| 553 | + tables.edges.add_row(left=0, right=1, parent=p, child=c2) |
| 554 | + tables.edges.add_row(left=0, right=1, parent=p, child=c3) |
| 555 | + |
| 556 | + expected = defaultdict(Counter) |
| 557 | + for pop_combos in [[0], [1], [2], [0, 1], [0, 2], [1, 2], [0, 1, 2]]: |
| 558 | + expected[frozenset(pop_combos)] = Counter({(0, 0): 1}) |
| 559 | + self.verify_topologies(tables.tree_sequence(), [expected]) |
| 560 | + |
| 561 | + def test_msprime_migrations(self): |
| 562 | + for num_populations in range(2, 5): |
| 563 | + samples = [5] * num_populations |
| 564 | + ts = self.simulate_multiple_populations(samples) |
| 565 | + self.verify_topologies(ts) |
| 566 | + |
| 567 | + def simulate_multiple_populations(self, sample_sizes): |
| 568 | + d = len(sample_sizes) |
| 569 | + M = 0.2 |
| 570 | + m = M / (2 * (d - 1)) |
| 571 | + |
| 572 | + migration_matrix = [ |
| 573 | + [m if k < d and k == i + 1 else 0 for k in range(d)] for i in range(d) |
| 574 | + ] |
| 575 | + |
| 576 | + pop_configurations = [ |
| 577 | + msprime.PopulationConfiguration(sample_size=size) for size in sample_sizes |
| 578 | + ] |
| 579 | + return msprime.simulate( |
| 580 | + population_configurations=pop_configurations, |
| 581 | + migration_matrix=migration_matrix, |
| 582 | + recombination_rate=0.05, |
| 583 | + ) |
0 commit comments