Commit 7f0bcf16 authored by T-B-F's avatar T-B-F

update DMPM script

parent 4317b464
......@@ -3,46 +3,127 @@
import struct
import sys
#Domain Match Probability Matrix
class DMPM:
def __init__(self, in_f):
self.n_domains=0
self.n_vals=0
with open(in_f, "rb") as in_F:
self.name=struct.unpack('<10s', in_F.read(10))[0].decode()
self.n_domains=struct.unpack('<i', in_F.read(4))[0]
self.n_vals=struct.unpack('<i', in_F.read(4))[0]
ids = list(struct.unpack('<i'*self.n_domains, in_F.read(4*self.n_domains)))
self.ids={}
for i in range(0,len(ids)):
self.ids[ids[i]]=i
self.row_ids = list(struct.unpack('<i'*self.n_domains, in_F.read(4*self.n_domains)))
self.col_ids = list(struct.unpack('<i'*self.n_vals, in_F.read(4*self.n_vals)))
self.values = list(struct.unpack('<h'*self.n_vals, in_F.read(2*self.n_vals)))
def get_val(self, i, j):
id1=-1
id2=-1
if i<j:
id1=self.ids[i]
id2=self.ids[j]
"""
Domain Match Probability Matrix
Read a domain similarity matrix stored into a binary parse column format
row and column indexes of the matrix are the id of the domains models
Each matrix are created according to a cutoff, value below this cutoff are
not stored inside the matrix and a -1 similarity score will be return if
queried
"""
def __init__(self, path):
"""init function of DMP matrix
Parameter
---------
path : string
input file, path to the binary matrix
"""
self.n_domains = 0
self.n_vals = 0
with open(path, "rb") as in_f:
self.name = struct.unpack('10s', in_f.read(10))[0]
self.n_domains = struct.unpack('i', in_f.read(4))[0]
self.n_vals = struct.unpack('i', in_f.read(4))[0]
ids = struct.unpack('i'*self.n_domains, in_f.read(4*self.n_domains))
self.ids = {}
for i in range(0, len(ids)):
self.ids[ids[i]] = i
self.row_ids = struct.unpack('i'*self.n_domains,
in_f.read(4*self.n_domains))
self.col_ids = struct.unpack('i'*self.n_vals,
in_f.read(4*self.n_vals))
self.values = list(struct.unpack('h'*self.n_vals,
in_f.read(2*self.n_vals)))
def get_val(self, idi, idj):
"""Return a value of the matrix
Parameters
----------
idi : int
domain identification number
idj : int
domain identification number
Return
------
value : int
the similarity value, or -1 if not found
(ie similarity < cutoff matrix)
"""
id1 = -1
id2 = -1
if idi == idj:
raise ValueError("both id are indentical")
if idi not in self.ids or idj not in self.ids:
return -1
if idi < idj:
id1 = self.ids[idi]
id2 = self.ids[idj]
else:
id1=self.ids[j]
id2=self.ids[i]
id_col=self.row_ids[id1];
id1 = self.ids[idj]
id2 = self.ids[idi]
id_col = self.row_ids[id1]
if id_col != (self.n_domains-1):
end=self.row_ids[id1+1]
end = self.row_ids[id1+1]
else:
end=self.n_vals
end = self.n_vals
for i in range(id_col, end):
if self.col_ids[i]==id2:
if self.col_ids[i] == id2:
return self.values[i]
return -1;
return -1
def set_val(self, idi, idj, val):
"""Change a value inside the matrix
Parameters
----------
idi : int
domain identification number
idj : int
domain identification number
val : int
new value
Return
------
value : int
0 if the value has been changed, -1 otherwise
"""
id1 = -1
id2 = -1
if idi < idj:
id1 = self.ids[idi]
id2 = self.ids[idj]
else:
id1 = self.ids[idj]
id2 = self.ids[idi]
id_col = self.row_ids[id1]
if id_col != (self.n_domains-1):
end = self.row_ids[id1+1]
else:
end = self.n_vals
for i in range(id_col, end):
if self.col_ids[i] == id2:
self.values[i] = val
return 0
return -1
def main():
x=DMPM(sys.argv[1])
print(x.get_val(483,1704))
x=DMPM(sys.argv[1])
pos1 = int(sys.argv[2])
#pos2 = int(sys.argv[3])
for pos2 in range(1, 20000):
if pos1 != pos2:
val = x.get_val(pos1, pos2)
if val > -1:
print(pos1, pos2, x.get_val(pos1, pos2))
sys.exit(0)
if __name__ == "__main__":
main()
\ No newline at end of file
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment