array_tree_tests.py 4.49 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
import sys, os
import unittest
import tempfile
try:
    sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
except:
    sys.path.insert(0, os.path.dirname(os.path.abspath(".")))

from bx.arrays.array_tree import ArrayTree, FileArrayTree, FileArrayTreeDict, array_tree_dict_from_reader
from bx.arrays.bed import BedReader
from bx.arrays.wiggle import WiggleReader

class TestArrayTree(unittest.TestCase):
    def setUp(self):
        tree = ArrayTree(10000, 10) # max value of 10000, each block has 10 numbers
        for i in range(5000):
            tree[i] = i
        
        # Insert extra copies to test frequency
        for i in range(3000):
            tree[i] = i
        
        tree.set_range(5000, 9001, 100)
        tree.root.build_summary()
        
        d = {'test': tree}
        f = tempfile.TemporaryFile()
        FileArrayTreeDict.dict_to_file( d, f )
        f.seek(0)
        self.filearraytreedict = FileArrayTreeDict(f)
        self.filearraytree = self.filearraytreedict['test']
        
    def test_get_summary(self):
        f = self.filearraytree
        lvl1 = f.get_summary(0, 1)
36
        self.assertEqual( [float(_) for _ in lvl1.sums/lvl1.counts], [4.5, 14.5, 24.5, 34.5, 44.5, 54.5, 64.5, 74.5, 84.5, 94.5])
37
        lvl2 = f.get_summary(0, 2)
38
        self.assertEqual( [float(_) for _ in lvl2.sums/lvl2.counts], [49.5, 149.5, 249.5, 349.5, 449.5, 549.5, 649.5, 749.5, 849.5, 949.5])
39
        lvl3 = f.get_summary(0, 3)
40
        self.assertEqual( [float(_) for _ in lvl3.sums/lvl3.counts], [499.5, 1499.5, 2499.5, 3499.5, 4499.5, 100.0, 100.0, 100.0, 100.0, 100.0])
41
        lvl2_2 = f.get_summary(3000, 2)
42
        self.assertEqual( [float(_) for _ in lvl2_2.sums/lvl2_2.counts], [3049.5, 3149.5, 3249.5, 3349.5, 3449.5, 3549.5, 3649.5, 3749.5, 3849.5, 3949.5])
43 44 45 46 47 48
        
    def test_get_leaf(self):
        f = self.filearraytree
        from_start = [int(i) for i in f.get_leaf(0)]
        from_middle = [int(i) for i in f.get_leaf(5)]
        self.assertEqual(from_start, from_middle)
49
        self.assertEqual(from_start, list(range(10)))
50 51
        
        from_start = [int(i) for i in f.get_leaf(4999)]
52
        self.assertEqual(from_start, list(range(4990, 5000)))
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
        
        from_start = [int(i) for i in f.get_leaf(9600)]
        self.assertEqual(from_start, [])
        
    def test_big(self):
        tree = ArrayTree(2147483647, 1000) # What we use for tracks
        for i in range(5000):
            tree[i] = i
        
        # Insert extra copies to test frequency
        for i in range(3000):
            tree[i] = i
        
        tree.set_range(5000, 9001, 100)
        tree.set_range(14000000, 15000000, 200)
        tree.root.build_summary()
        
        d = {'test': tree}
        f = tempfile.TemporaryFile()
        FileArrayTreeDict.dict_to_file( d, f )
        f.seek(0)
        at = FileArrayTreeDict(f)['test']
        
        lvl1 = at.get_summary(14000000, 1)
77
        avgs = [float(_) for _ in lvl1.sums/lvl1.counts]
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
        self.assertEqual( len(avgs), 1000 )
        self.assertEqual( avgs, [ 200 for i in range(0, 1000)] )
    
    
#    def create_bed(self):
#        reader = BedReader( open( "22.bed.txt" ) )
#        temp = tempfile.TemporaryFile()
#        
#        d = array_tree_dict_from_reader( reader, {}, block_size = 1000 )
#
#        for array_tree in d.itervalues():
#            array_tree.root.build_summary()
#
#        FileArrayTreeDict.dict_to_file( d, open("tree.at", "w"), no_leaves=True ) # just summaries
#        
#    def test_bed(self):
#        # self.create_bed()
#        print "bed"
#        at = FileArrayTreeDict( open( "tree.at" ) )['chr22']
#        print map(, at.get_summary(14000000, 1).frequencies)
        
    
    def test_get_frequencies(self):
        f = self.filearraytree
102 103 104 105
        self.assertEqual( [float(_) for _ in f.get_summary(0, 1).frequencies], ([20] * 10) )
        self.assertEqual( [float(_) for _ in f.get_summary(4000, 1).frequencies], ([10] * 10) )
        self.assertEqual( [float(_) for _ in f.get_summary(0, 2).frequencies], ([200] * 10) )
        self.assertEqual( [int(_) for _ in f.get_summary(0, 3).frequencies], [2000, 2000, 2000, 1000, 1000, 1000, 1000, 1000, 1000, 1] )
106 107 108 109 110 111 112 113 114 115 116
    
    def test_wrong_dictkey(self):
        self.assertRaises(KeyError, self.filearraytreedict.__getitem__, "non-existing")
        
    def test_higher_level_than_tree(self):
        f = self.filearraytree
        self.assertEqual(3, f.levels)
        self.assertRaises(ValueError, f.get_summary, 0, 4)
        

if __name__ == '__main__':
117
    unittest.main()