Skip to content

Juicer gallery example #96

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/juicer_example/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Using Juicer to Run PASTIS on a .hic File
=========================================
Binary file not shown.
26 changes: 26 additions & 0 deletions examples/juicer_example/data/counts.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Chr01 1 1 0
Chr01 2 2 1
Chr01 3 3 2
Chr01 4 4 3
Chr01 5 5 4
Chr01 6 6 5
Chr01 7 7 6
Chr01 8 8 7
Chr01 9 9 8
Chr01 10 10 9
Chr01 11 11 10
Chr01 12 12 11
Chr01 13 13 12
Chr01 14 14 13
Chr01 15 15 14
Chr01 16 16 15
Chr01 17 17 16
Chr01 18 18 17
Chr01 19 19 18
Chr01 20 20 19
Chr01 21 21 20
Chr01 22 22 21
Chr01 23 23 22
Chr01 24 24 23
Chr01 25 25 24
Chr01 26 26 25
Binary file added examples/juicer_example/data/counts.npy
Binary file not shown.
89 changes: 89 additions & 0 deletions examples/juicer_example/juicer_for_pastis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""
Using a .hic file (output of Juicer) with PASTIS
================================================

This example showcases how to convert a .hic file output by Juicer into a numpy
array, output a .bed file for the lengths, and then run PASTIS.
"""


import numpy as np
import strawC
import os
from scipy import sparse


##############################################################################
# Returns a numpy array with counts data from the given .hic file.
# val_type : "observed" for raw values; "oe" for observed/expected values
# norm : the normalization to apply ("NONE" for no normalization)
# hic_file : the path to the .hic file we want to get counts data for
# width : the size of the returned counts matrix
# chrom1 : the first chromosome
# chrom2 : the second chromosome
# resolution : the BP resolution
# --------------------------------------------------------------------------
def grabRegion(val_type, norm, hic_file, width, chrom1, chrom2, resolution):
chrom_range1 = str(chrom1)
chrom_range2 = str(chrom2)
result = strawC.strawC(val_type, norm, hic_file, chrom_range1,
chrom_range2, 'BP', resolution)
row_indices, col_indices, data = list(), list(), list()
for record in result:
row_indices.append(record.binX)
col_indices.append(record.binY)
data.append(record.counts)
if chrom1 == chrom2 and record.binX != record.binY:
row_indices.append(record.binY)
col_indices.append(record.binX)
data.append(record.counts)
row_indices = (np.asarray(row_indices)) / resolution
col_indices = (np.asarray(col_indices)) / resolution
matrix = sparse.coo_matrix((data, (row_indices.astype(int),
col_indices.astype(int))),
shape=(width, width)).toarray()
matrix[np.isnan(matrix)] = 0
matrix[np.isinf(matrix)] = 0
return matrix


##############################################################################
# Define the path to our .hic file.
# ---------------------------------
FILE_PATH = './data/Pfalciparum_trophozoite_Ay2014.hic'


##############################################################################
# Use the grabRegion method to get counts data for the first chromosome
# in the .hic file at a resolution of 25000. We won't apply any normalization.
# ----------------------------------------------------------------------------
counts = grabRegion('observed', 'NONE', FILE_PATH, 26, 1, 1, 25000)


##############################################################################
# Output a .bed file of lengths. Let's call the file "counts.bed".
# ----------------------------------------------------------------
with open("./data/counts.bed", "w") as f:
the_length = len(counts)
for i in range(the_length):
curr_line = 'Chr01\t' + str(i + 1) + '\t' + str(i + 1) + '\t'
curr_line += str(i)
print(curr_line, file=f)


##############################################################################
# Save the counts as a .npy file. Let's call the file "counts.npy".
# -----------------------------------------------------------------
np.save("./data/counts.npy", counts)


##############################################################################
# Make the following call using os to run PASTIS with the counts data.
# The call could be made in command line as well. Note that there are other
# settings we could run PASTIS with; this is simply one set of those settings.
# ----------------------------------------------------------------------------
settings = "pastis-poisson --seed 0 --counts ./data/counts.npy --outdir"
settings += " ./results --lengths ./data/counts.bed --ploidy 1"
settings += " --filter_threshold 0.04 --multiscale_rounds 1 --max_iter 50000"
settings += " --dont-normalize --alpha 3"
os.system(settings)