Demo
Contents
Demo#
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '.')
import pprint
pp = pprint.PrettyPrinter(depth=4, width=80, compact=True)
from POTFUL import POTFUL
POT = POTFUL()
POTFUL
StX;. @:X;.
: 88S ;X88@ ..
8888X .X88X;. 8S @
;88 :.. 88 :.: X8X
.;SS8. 8888@ .X88Xt..
::. .88 :.. 88 ..
.. :88X. . 8X8.
:;. ..;:. t88%8 8:
.. ... ;SS888XX88;
;X%t8.8.8.8.8.8.8.8.8.8.8.8888@8888X8;.
888@X8@@@@@@@@@@@@@@@@@@@@@88SX@@t;...
@8@8t888@88@@888@888@888@;8@88:::..
%88@%;;:.:::.:::::::::::%888Xt. .
.888@t@;;:;:;;;;;:;;;;%8;88t8
:88@8@8888888888888888@8888
;Xt8888@8@8@888@888@88888.
.:%S88888888888888888S%;:.
..:;;;;t;t;t;t;;:....
................
. .
Results will be saved in 'POTFUL_OUT/' folder
LOAD Auxiliary Files#
POT.Load_Auxiliary_Files(WGCNA_COLOR_MAP="Auxiliary_File/WGCNA_COLOR_MAP.csv",
TF_Targets="Auxiliary_File/masterTF-target.txt",
TF_Family="Auxiliary_File/Arabidopsis_TF and family.csv")
{'TF_Family': 'Auxiliary_File/Arabidopsis_TF and family.csv',
'TF_Targets': 'Auxiliary_File/masterTF-target.txt',
'WGCNA_COLOR_MAP': 'Auxiliary_File/WGCNA_COLOR_MAP.csv'}
🍲Auxiliary File None
{'TF_Family': 'Auxiliary_File/Arabidopsis_TF and family.csv',
'TF_Targets': 'Auxiliary_File/masterTF-target.txt',
'WGCNA_COLOR_MAP': 'Auxiliary_File/WGCNA_COLOR_MAP.csv'}
LOAD ‘Uncut’ Files#
POT.Load_Files(Sample_name="Uncut",
NODE_File="2_WGCNA_data/WGCNA_GSE74488_Uncut/Nodes_Uncut.txt",
EDGE_File="2_WGCNA_data/WGCNA_GSE74488_Uncut/Edges_Uncut.txt",
GRN_File="3_GRN_data/GSE74488_Uncut_arboreto_regnet.tsv")
Uncut
🍲Sample Uncut
3_GRN_data/GSE74488_Uncut_arboreto_regnet.tsv
defaultdict(<class 'dict'>,
{'Uncut': {'GRN': '3_GRN_data/GSE74488_Uncut_arboreto_regnet.tsv',
'WGCNA': {'EDGE': '2_WGCNA_data/WGCNA_GSE74488_Uncut/Edges_Uncut.txt',
'NODE': '2_WGCNA_data/WGCNA_GSE74488_Uncut/Nodes_Uncut.txt'}}})
🍲 Files None
defaultdict(<class 'dict'>,
{'Uncut': {'GRN': '3_GRN_data/GSE74488_Uncut_arboreto_regnet.tsv',
'WGCNA': {'EDGE': '2_WGCNA_data/WGCNA_GSE74488_Uncut/Edges_Uncut.txt',
'NODE': '2_WGCNA_data/WGCNA_GSE74488_Uncut/Nodes_Uncut.txt'}}})
LOAD ‘3hpc’ Files#
POT.Load_Files(Sample_name="3hpc",
NODE_File="2_WGCNA_data/WGCNA_GSE74488_3hpc/Nodes_3hpc.txt",
EDGE_File="2_WGCNA_data/WGCNA_GSE74488_3hpc/Edges_3hpc.txt",
GRN_File="3_GRN_data/GSE74488_3hpc_arboreto_regnet.tsv")
3hpc
🍲Sample 3hpc
3_GRN_data/GSE74488_3hpc_arboreto_regnet.tsv
defaultdict(<class 'dict'>,
{'3hpc': {'GRN': '3_GRN_data/GSE74488_3hpc_arboreto_regnet.tsv',
'WGCNA': {'EDGE': '2_WGCNA_data/WGCNA_GSE74488_3hpc/Edges_3hpc.txt',
'NODE': '2_WGCNA_data/WGCNA_GSE74488_3hpc/Nodes_3hpc.txt'}},
'Uncut': {'GRN': '3_GRN_data/GSE74488_Uncut_arboreto_regnet.tsv',
'WGCNA': {'EDGE': '2_WGCNA_data/WGCNA_GSE74488_Uncut/Edges_Uncut.txt',
'NODE': '2_WGCNA_data/WGCNA_GSE74488_Uncut/Nodes_Uncut.txt'}}})
🍲 Files None
defaultdict(<class 'dict'>,
{'3hpc': {'GRN': '3_GRN_data/GSE74488_3hpc_arboreto_regnet.tsv',
'WGCNA': {'EDGE': '2_WGCNA_data/WGCNA_GSE74488_3hpc/Edges_3hpc.txt',
'NODE': '2_WGCNA_data/WGCNA_GSE74488_3hpc/Nodes_3hpc.txt'}},
'Uncut': {'GRN': '3_GRN_data/GSE74488_Uncut_arboreto_regnet.tsv',
'WGCNA': {'EDGE': '2_WGCNA_data/WGCNA_GSE74488_Uncut/Edges_Uncut.txt',
'NODE': '2_WGCNA_data/WGCNA_GSE74488_Uncut/Nodes_Uncut.txt'}}})
Define Samples#
Samples = POT.Samples
for i in range(len(Samples)):
print(i, Samples[i])
0 Uncut
1 3hpc
Create GMT file for both samples for enrichment analysis#
POT.WGCNA_Bucket_GMT()
GMT_base/POTFUL-Uncut.gmt 8921
GMT_base/POTFUL-3hpc.gmt 4756
for i in range(len(Samples)):
print((POT.File[Samples[i]]['GMT']))
GMT_base/POTFUL-Uncut.gmt
GMT_base/POTFUL-3hpc.gmt
Plot WGCNA module barplot (Uncut)#
fig = POT.Plots[Samples[0]]['WGCNA_BarPlot']
fig.write_image(POT.OutDir+"0_Uncut.png")
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',})
fig.update_layout(
autosize=False,
width=400,
height=500,
plot_bgcolor='rgba(0,0,0,0)',
)
fig.update_layout(
title=f"{Samples[0]} WGCNA module bar plot",
xaxis=dict(
title="WGCNA Modules"),
yaxis=dict(
title="Number of Genes"),
font=dict(
family="Arial",
size=12,
color="black"))
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True)
fig.write_image(POT.OutDir+f"1_{Samples[0]}.png", scale=2)
fig.write_image(POT.OutDir+f"1_{Samples[0]}.svg")
Plot WGCNA module barplot (3hpc)#
fig = POT.Plots[Samples[1]]['WGCNA_BarPlot']
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',})
fig.update_layout(
autosize=False,
width=400,
height=500,
plot_bgcolor='rgba(0,0,0,0)',
)
fig.update_layout(
# title=f"{Samples[0]} WGCNA module bar plot",
title=f"3 hr post decapitation<br>WGCNA module bar plot",
xaxis=dict(
title="WGCNA Modules"),
yaxis=dict(
title="Number of Genes"),
font=dict(
family="Arial",
size=12,
color="black"))
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True)
fig.write_image(POT.OutDir+f"2_{Samples[1]}.png", scale=2)
fig.write_image(POT.OutDir+f"2_{Samples[1]}.svg")
WGCNA module wise enrichment Uncut vs 3hpc#
POT.WGCNA_Module_Enrichment(Samples[1], Samples[0])
print(POT.Data["Enrichment_Dotplot"])
fig = POT.Plots["Enrichment_Dotplot"]
fig.update_layout(
autosize=False,
width=490,
height=500,
font=dict(
family="Arial",
size=12,
color="black"))
fig.write_image(POT.OutDir+f"3hpc__UncutEnri_dot.png", scale=2)
fig.write_image(POT.OutDir+f"3hpc__UncutEnri_dot.svg")
3hpc Uncut
Total Number of genes in 12088
Gene_set (Uncut) Term (3hpc) Overlap P-value Adjusted P-value \
4 red greenyellow 20/175 1.438619e-03 1.726343e-02
7 magenta purple 21/166 2.583919e-08 2.583919e-07
10 brown turquoise 73/914 2.693691e-08 3.232430e-07
5 black pink 45/349 1.475838e-12 1.475838e-11
Odds Ratio Significance Genes
4 2.165923 * AT2G44995;AT1G63400;AT1G63330;AT1G02450;AT3G15...
7 4.481927 *** AT5G19210;AT5G16230;AT2G44195;AT3G28150;AT3G06...
10 2.105779 *** AT2G18380;AT2G26840;AT2G33847;AT3G03580;AT1G36...
5 3.550101 *** AT3G60840;AT5G55520;AT1G16630;AT2G21710;AT5G36...
POT.Data['Enrichment_Dotplot']
Gene_set (Uncut) | Term (3hpc) | Overlap | P-value | Adjusted P-value | Odds Ratio | Significance | Genes | |
---|---|---|---|---|---|---|---|---|
4 | red | greenyellow | 20/175 | 1.438619e-03 | 1.726343e-02 | 2.165923 | * | AT2G44995;AT1G63400;AT1G63330;AT1G02450;AT3G15... |
7 | magenta | purple | 21/166 | 2.583919e-08 | 2.583919e-07 | 4.481927 | *** | AT5G19210;AT5G16230;AT2G44195;AT3G28150;AT3G06... |
10 | brown | turquoise | 73/914 | 2.693691e-08 | 3.232430e-07 | 2.105779 | *** | AT2G18380;AT2G26840;AT2G33847;AT3G03580;AT1G36... |
5 | black | pink | 45/349 | 1.475838e-12 | 1.475838e-11 | 3.550101 | *** | AT3G60840;AT5G55520;AT1G16630;AT2G21710;AT5G36... |
TF Network overlap#
POT.TF_reg(Samples[0])
POT.TF_reg(Samples[1])
Uncut Auxiliary_File/masterTF-target.txt 3_GRN_data/GSE74488_Uncut_arboreto_regnet.tsv
Total number of TFT pairs in Auxiliary_File/masterTF-target.txt :2357032
Scaled Dataset Using Pandas
Total number of TFT pairs in 3_GRN_data/GSE74488_Uncut_arboreto_regnet.tsv :4115241
Total number of common pairs:171069
Total number of common pairs:171069
3hpc Auxiliary_File/masterTF-target.txt 3_GRN_data/GSE74488_3hpc_arboreto_regnet.tsv
Total number of TFT pairs in Auxiliary_File/masterTF-target.txt :2357032
Scaled Dataset Using Pandas
AT3G11450 AT3G50550
# POT.TF_reg(Samples[0], Filter=0)
# POT.TF_reg(Samples[1], Filter=0)
TF Co-expression Network overlap#
POT.merge_reg_coexp(Samples[0])
POT.merge_reg_coexp(Samples[1])
2_WGCNA_data/WGCNA_GSE74488_Uncut/Edges_Uncut.txt
Total number of coexpressed combo-pairs:1126488
Total number of common pairs:1095
Total number of common pairs:1095
POTFUL_OUT/Uncut_Coexp_GRN.tsv DiGraph with 1015 nodes and 1095 edges
2_WGCNA_data/WGCNA_GSE74488_3hpc/Edges_3hpc.txt
Total number of coexpressed combo-pairs:333250
Total number of common pairs:229
Total number of common pairs:229
POTFUL_OUT/3hpc_Coexp_GRN.tsv DiGraph with 266 nodes and 229 edges
Network centrality analysis#
POT.network_centrality(Samples[0])
POT.network_centrality(Samples[1])
DiGraph with 1015 nodes and 1095 edges
DiGraph with 1015 nodes and 1095 edges
Degree Dict True
Betweenness Dict True
Degree Dict True
clust Dict True
DiGraph with 266 nodes and 229 edges
DiGraph with 266 nodes and 229 edges
Degree Dict True
Betweenness Dict True
Degree Dict True
clust Dict True
Save GraphML file#
POT.generate_graphml_out(Samples[0])
POT.generate_graphml_out(Samples[1])
DiGraph with 1015 nodes and 1095 edges
DiGraph with 266 nodes and 229 edges
# POT.Data['Uncut']["Network"].nodes
Generate network visualization html file#
POT.Graph_vis(Samples[0])
POT.Graph_vis(Samples[1])
Display html graph file (Uncut)#
# POT.Plots[Samples[0]]['Network_Viz'].show(POT.OutDir+'Uncut.html')
Display html graph file (3hpc)#
# POT.Plots[Samples[1]]['Network_Viz'].show(POT.OutDir+'3hpc.html')
POT.network_overlap(Samples[0], Samples[1])
🍲 👌 There are 20 nodes overlapping between pair of Graphs
{'AT1G58340', 'AT2G45420', 'AT1G51220', 'AT5G41920', 'AT4G36900', 'AT5G46590', 'AT2G18380', 'AT2G45660', 'AT1G75390', 'AT3G04030', 'AT3G10113', 'AT4G08940', 'AT3G03200', 'AT1G75388', 'AT5G62320', 'AT5G42070', 'AT1G18330', 'AT3G01530', 'AT1G75820', 'AT2G42150'}
# pp.pprint(POT.Data[Samples[0]][Samples[1]])
Display html graph file (Uncut and 3hpc overlap)#
# POT.Plots['Uncut_3hpc_Overlap_Network_Viz'].show(POT.OutDir+'Overlap.html')
List all plots#
list(POT.Plots)
['Uncut', '3hpc', 'Enrichment_Dotplot', 'Uncut_3hpc_Overlap_Network_Viz']
import session_info
session_info.show(html=False)
-----
POTFUL 0.1.1
pandas 1.4.3
plotly 5.9.0
session_info 1.0.0
-----
IPython 8.4.0
jupyter_client 7.3.4
jupyter_core 4.10.0
jupyterlab 3.4.3
notebook 6.4.12
-----
Python 3.10.4 (main, Mar 31 2022, 08:41:55) [GCC 7.5.0]
Linux-5.15.62.1-microsoft-standard-WSL2-x86_64-with-glibc2.31
-----
Session information updated at 2022-10-07 06:22
END