{
"cells": [
{
"cell_type": "markdown",
"id": "09774124-8ae3-4102-934a-d38780b53eac",
"metadata": {},
"source": [
"# Download expression data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e951c648-a373-470a-a5aa-ed678de40b05",
"metadata": {},
"outputs": [],
"source": [
"import GEOparse\n",
"import pandas as pd\n",
"import numpy as np\n",
"import re\n",
"from pathlib import Path\n",
"import pylab as pl\n",
"import seaborn as sns\n",
"pl.rcParams['figure.figsize'] = (14, 10)\n",
"pl.rcParams['ytick.labelsize'] = 12\n",
"pl.rcParams['xtick.labelsize'] = 11\n",
"pl.rcParams['axes.labelsize'] = 23\n",
"pl.rcParams['legend.fontsize'] = 20\n",
"sns.set_style('ticks')\n",
"c1, c2, c3, c4 = sns.color_palette(\"Set1\", 4)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6cb55a19-bbb6-4bab-978d-0fcedc753268",
"metadata": {},
"outputs": [],
"source": [
"# !pip install GEOparse"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "cecc6a63-5b5f-436f-8178-0ae082aef0d7",
"metadata": {},
"outputs": [],
"source": [
"Dir_Expression = \"1_Expression_data/\"\n",
"Path(Dir_Expression).mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1ece80d2-9ed3-4843-b4c5-af811b39ed17",
"metadata": {},
"outputs": [],
"source": [
"Dir_WGCNA = \"2_WGCNA_data/\"\n",
"Path(Dir_WGCNA).mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "5f0f662d-5430-4d99-8c3b-e87d097512f9",
"metadata": {},
"outputs": [],
"source": [
"Dir_GRN = \"3_GRN_data/\"\n",
"Path(Dir_GRN).mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "7bdcb850-eea9-408c-96ba-7659c2b6f58f",
"metadata": {},
"source": [
"## Download data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "452e20a4-9428-48b3-8f47-5513aa4d231a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"\n",
" 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n",
" 3 6608k 3 208k 0 0 360k 0 0:00:18 --:--:-- 0:00:18 360k\n",
" 77 6608k 77 5152k 0 0 3232k 0 0:00:02 0:00:01 0:00:01 3232k\n",
"100 6608k 100 6608k 0 0 3570k 0 0:00:01 0:00:01 --:--:-- 3570k\n",
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"\n",
" 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0curl: (6) Could not resolve host: GSE74488_sc_expression.csv.gz\n"
]
}
],
"source": [
"URL = 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE74nnn/GSE74488/suppl/GSE74488_sc_expression.csv.gz'\n",
"!curl {URL} -O GSE74488_sc_expression.csv.gz"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "692b975e-9a42-4cb0-b331-3a3d06f97baf",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"GSE74488_sc_expression.csv.gz\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0c6e9fa6-0177-4220-add0-07ecaec531cb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Locus | \n",
" wolsc_kb2_4_1 | \n",
" wolsc_kb2_4_10 | \n",
" wolsc_kb2_4_11 | \n",
" wolsc_kb2_4_13 | \n",
" wolsc_kb2_4_14 | \n",
" wolsc_kb2_4_15 | \n",
" wolsc_kb2_4_18 | \n",
" wolsc_kb2_4_19 | \n",
" wolsc_kb2_4_22 | \n",
" ... | \n",
" wolsc_kb3_2_1 | \n",
" sc_0113_pa_19 | \n",
" sc_0113_pa_3 | \n",
" sc_0113_pa_44 | \n",
" sc_0113_pa_52 | \n",
" sc_0113_pa_58 | \n",
" sc_0113_pa_59 | \n",
" sc_0113_pa_60 | \n",
" sc_0113_pa_68 | \n",
" sc_0113_pa_83 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AT1G01010 | \n",
" 0.000000 | \n",
" 7.702431 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" AT1G01020 | \n",
" 8.378906 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 4.298833 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 2.275709 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.614329 | \n",
" 0.0 | \n",
" 4.642478 | \n",
" 3.406784 | \n",
"
\n",
" \n",
" 2 | \n",
" AT1G01030 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" AT1G01040 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" AT1G01046 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 239 columns
\n",
"
"
],
"text/plain": [
" Locus wolsc_kb2_4_1 wolsc_kb2_4_10 wolsc_kb2_4_11 wolsc_kb2_4_13 \\\n",
"0 AT1G01010 0.000000 7.702431 0.0 0.000000 \n",
"1 AT1G01020 8.378906 0.000000 0.0 4.298833 \n",
"2 AT1G01030 0.000000 0.000000 0.0 0.000000 \n",
"3 AT1G01040 0.000000 0.000000 0.0 0.000000 \n",
"4 AT1G01046 0.000000 0.000000 0.0 0.000000 \n",
"\n",
" wolsc_kb2_4_14 wolsc_kb2_4_15 wolsc_kb2_4_18 wolsc_kb2_4_19 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"\n",
" wolsc_kb2_4_22 ... wolsc_kb3_2_1 sc_0113_pa_19 sc_0113_pa_3 \\\n",
"0 0.0 ... 0.0 0.000000 0.0 \n",
"1 0.0 ... 0.0 2.275709 0.0 \n",
"2 0.0 ... 0.0 0.000000 0.0 \n",
"3 0.0 ... 0.0 0.000000 0.0 \n",
"4 0.0 ... 0.0 0.000000 0.0 \n",
"\n",
" sc_0113_pa_44 sc_0113_pa_52 sc_0113_pa_58 sc_0113_pa_59 sc_0113_pa_60 \\\n",
"0 0.0 0.0 0.0 0.000000 0.0 \n",
"1 0.0 0.0 0.0 3.614329 0.0 \n",
"2 0.0 0.0 0.0 0.000000 0.0 \n",
"3 0.0 0.0 0.0 0.000000 0.0 \n",
"4 0.0 0.0 0.0 0.000000 0.0 \n",
"\n",
" sc_0113_pa_68 sc_0113_pa_83 \n",
"0 0.000000 0.000000 \n",
"1 4.642478 3.406784 \n",
"2 0.000000 0.000000 \n",
"3 0.000000 0.000000 \n",
"4 0.000000 0.000000 \n",
"\n",
"[5 rows x 239 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "79f69437-84cc-42c3-b6b3-8676753bba89",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"\n",
" 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n",
"100 26662 100 26662 0 0 32796 0 --:--:-- --:--:-- --:--:-- 32834\n",
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"\n",
" 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0curl: (6) Could not resolve host: mmc4.xlsx\n"
]
}
],
"source": [
"URL = \"https://www.cell.com/cms/10.1016/j.cell.2016.04.046/attachment/ccb8f6e8-4822-4e06-9400-2eccfd98dd56/mmc4.xlsx\"\n",
"!curl {URL} -O mmc4.xlsx"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8d8cf849-9d92-4d5d-bc81-9f1302aba4a5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"46hpc 86\n",
"3hpc 67\n",
"16hpc 55\n",
"Uncut 30\n",
"Name: Timepoint, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Metadata = pd.read_excel('mmc4.xlsx', header=1)\n",
"Metadata = Metadata[[\"Cell\", \"Timepoint\", \"Identity\"]]\n",
"Metadata.Timepoint.value_counts()"
]
},
{
"cell_type": "markdown",
"id": "1c90bb7e-5e1e-44d7-9e00-f4780f1c0ed1",
"metadata": {},
"source": [
"## Uncut"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8ef1763a-1451-425a-96c3-ed3d2b5483ed",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(30, 3)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Cell | \n",
" Timepoint | \n",
" Identity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" wolsc_kb2_4_10 | \n",
" Uncut | \n",
" Pericycle | \n",
"
\n",
" \n",
" 1 | \n",
" wolsc_kb2_4_1 | \n",
" Uncut | \n",
" Vasculature | \n",
"
\n",
" \n",
" 2 | \n",
" wolsc_kb2_4_18 | \n",
" Uncut | \n",
" Vasculature | \n",
"
\n",
" \n",
" 3 | \n",
" wolsc_kb2_4_22 | \n",
" Uncut | \n",
" Vasculature | \n",
"
\n",
" \n",
" 4 | \n",
" wolsc_kb2_4_26 | \n",
" Uncut | \n",
" Vasculature | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Cell Timepoint Identity\n",
"0 wolsc_kb2_4_10 Uncut Pericycle\n",
"1 wolsc_kb2_4_1 Uncut Vasculature\n",
"2 wolsc_kb2_4_18 Uncut Vasculature\n",
"3 wolsc_kb2_4_22 Uncut Vasculature\n",
"4 wolsc_kb2_4_26 Uncut Vasculature"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Metadata_Uncut = Metadata[Metadata[\"Timepoint\"] == \"Uncut\"]\n",
"print(Metadata_Uncut.shape)\n",
"Dic_uncut = {a:a + \"_\" + c for a, b, c in Metadata_Uncut.values.tolist()}\n",
"Metadata_Uncut.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "6c612bec-a390-4beb-ade7-e2d69e1c57e4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Gene_ID | \n",
" Gene_name | \n",
" wolsc_kb2_4_10_Pericycle | \n",
" wolsc_kb2_4_1_Vasculature | \n",
" wolsc_kb2_4_18_Vasculature | \n",
" wolsc_kb2_4_22_Vasculature | \n",
" wolsc_kb2_4_26_Vasculature | \n",
" wolsc_kb2_4_27_Vasculature | \n",
" wolsc_kb2_4_30_Vasculature | \n",
" wolsc_kb2_4_41_Vasculature | \n",
" ... | \n",
" wolsc_kb2_4_11_Unknown | \n",
" wolsc_kb2_4_13_Unknown | \n",
" wolsc_kb2_4_14_Unknown | \n",
" wolsc_kb2_4_15_Unknown | \n",
" wolsc_kb2_4_19_Unknown | \n",
" wolsc_kb2_4_24_Unknown | \n",
" wolsc_kb2_4_66_Unknown | \n",
" wolsc_kb2_4_76_Unknown | \n",
" wolsc_kb2_4_78_Unknown | \n",
" wolsc_kb2_4_80_Unknown | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AT1G01010 | \n",
" AT1G01010 | \n",
" 7.702431 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" AT1G01020 | \n",
" AT1G01020 | \n",
" 0.000000 | \n",
" 8.378906 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 4.298833 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 11.596565 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" AT1G01030 | \n",
" AT1G01030 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" AT1G01040 | \n",
" AT1G01040 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" AT1G01046 | \n",
" AT1G01046 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 32 columns
\n",
"
"
],
"text/plain": [
" Gene_ID Gene_name wolsc_kb2_4_10_Pericycle wolsc_kb2_4_1_Vasculature \\\n",
"0 AT1G01010 AT1G01010 7.702431 0.000000 \n",
"1 AT1G01020 AT1G01020 0.000000 8.378906 \n",
"2 AT1G01030 AT1G01030 0.000000 0.000000 \n",
"3 AT1G01040 AT1G01040 0.000000 0.000000 \n",
"4 AT1G01046 AT1G01046 0.000000 0.000000 \n",
"\n",
" wolsc_kb2_4_18_Vasculature wolsc_kb2_4_22_Vasculature \\\n",
"0 0.0 0.0 \n",
"1 0.0 0.0 \n",
"2 0.0 0.0 \n",
"3 0.0 0.0 \n",
"4 0.0 0.0 \n",
"\n",
" wolsc_kb2_4_26_Vasculature wolsc_kb2_4_27_Vasculature \\\n",
"0 0.0 0.0 \n",
"1 0.0 0.0 \n",
"2 0.0 0.0 \n",
"3 0.0 0.0 \n",
"4 0.0 0.0 \n",
"\n",
" wolsc_kb2_4_30_Vasculature wolsc_kb2_4_41_Vasculature ... \\\n",
"0 0.0 0.0 ... \n",
"1 0.0 0.0 ... \n",
"2 0.0 0.0 ... \n",
"3 0.0 0.0 ... \n",
"4 0.0 0.0 ... \n",
"\n",
" wolsc_kb2_4_11_Unknown wolsc_kb2_4_13_Unknown wolsc_kb2_4_14_Unknown \\\n",
"0 0.0 0.000000 0.0 \n",
"1 0.0 4.298833 0.0 \n",
"2 0.0 0.000000 0.0 \n",
"3 0.0 0.000000 0.0 \n",
"4 0.0 0.000000 0.0 \n",
"\n",
" wolsc_kb2_4_15_Unknown wolsc_kb2_4_19_Unknown wolsc_kb2_4_24_Unknown \\\n",
"0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 \n",
"\n",
" wolsc_kb2_4_66_Unknown wolsc_kb2_4_76_Unknown wolsc_kb2_4_78_Unknown \\\n",
"0 0.0 0.0 0.000000 \n",
"1 0.0 0.0 11.596565 \n",
"2 0.0 0.0 0.000000 \n",
"3 0.0 0.0 0.000000 \n",
"4 0.0 0.0 0.000000 \n",
"\n",
" wolsc_kb2_4_80_Unknown \n",
"0 0.0 \n",
"1 0.0 \n",
"2 0.0 \n",
"3 0.0 \n",
"4 0.0 \n",
"\n",
"[5 rows x 32 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_Uncut = df[['Locus'] + list(Metadata_Uncut.Cell.tolist())]\n",
"# Gene_ID, GeneName\n",
"df_Uncut.to_csv(Dir_Expression+\"Expr_Uncut.csv\", index=False)\n",
"df_Uncut = df_Uncut.rename(columns={\"Locus\":\"Gene_ID\"})\n",
"df_Uncut = df_Uncut.copy()\n",
"df_Uncut['Gene_name'] = df_Uncut['Gene_ID']\n",
"# Rearrange\n",
"df_Uncut_cols = ['Gene_ID', 'Gene_name']+[i for i in list(df_Uncut) if i not in ['Gene_ID', 'Gene_name']]\n",
"df_Uncut = df_Uncut[df_Uncut_cols]\n",
"df_Uncut = df_Uncut.rename(columns=Dic_uncut)\n",
"df_Uncut.to_csv(Dir_WGCNA+\"WGCNA_input_Uncut.csv\", index=False)\n",
"df_Uncut.head()"
]
},
{
"cell_type": "markdown",
"id": "a407b5f4-81f6-4645-9bad-3b6325b045bf",
"metadata": {},
"source": [
"## 3hpc"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "8b2bdb8e-596d-4665-bb61-246af634e4d3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(67, 3)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Cell | \n",
" Timepoint | \n",
" Identity | \n",
"
\n",
" \n",
" \n",
" \n",
" 30 | \n",
" sc_1228_pa_30 | \n",
" 3hpc | \n",
" Epidermis\\LRC | \n",
"
\n",
" \n",
" 31 | \n",
" wolsc_kb2_3_13 | \n",
" 3hpc | \n",
" Mixed distal | \n",
"
\n",
" \n",
" 32 | \n",
" wolsc_kb2_3_14 | \n",
" 3hpc | \n",
" Mixed distal | \n",
"
\n",
" \n",
" 33 | \n",
" wolsc_kb2_3_2 | \n",
" 3hpc | \n",
" Mixed distal | \n",
"
\n",
" \n",
" 34 | \n",
" wolsc_kb2_3_27 | \n",
" 3hpc | \n",
" Mixed distal | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Cell Timepoint Identity\n",
"30 sc_1228_pa_30 3hpc Epidermis\\LRC\n",
"31 wolsc_kb2_3_13 3hpc Mixed distal\n",
"32 wolsc_kb2_3_14 3hpc Mixed distal\n",
"33 wolsc_kb2_3_2 3hpc Mixed distal\n",
"34 wolsc_kb2_3_27 3hpc Mixed distal"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Metadata_3hpc = Metadata[Metadata[\"Timepoint\"] == \"3hpc\"]\n",
"print(Metadata_3hpc.shape)\n",
"Dic_3hpc = {a:a + \"_\" + c for a, b, c in Metadata_3hpc.values.tolist()}\n",
"Metadata_3hpc.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "a9f7c0eb-dce6-4cda-90e4-ed3af6568ed6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Gene_ID | \n",
" Gene_name | \n",
" sc_1228_pa_30_Epidermis\\LRC | \n",
" wolsc_kb2_3_13_Mixed distal | \n",
" wolsc_kb2_3_14_Mixed distal | \n",
" wolsc_kb2_3_2_Mixed distal | \n",
" wolsc_kb2_3_27_Mixed distal | \n",
" wolsc_kb2_3_51_Mixed distal | \n",
" sc_1228_pa_14_Mixed distal | \n",
" sc_1228_pa_86_Mixed distal | \n",
" ... | \n",
" sc_1228_pb_5_Unknown | \n",
" sc_1228_pb_70_Unknown | \n",
" sc_1228_pb_78_Unknown | \n",
" sc_1228_pb_86_Unknown | \n",
" sc_1228_pb_93_Unknown | \n",
" sc_1228_pa_36_Unknown | \n",
" sc_1228_pa_57_Unknown | \n",
" sc_1228_pa_77_Unknown | \n",
" sc_1228_pa_78_Unknown | \n",
" sc_1228_pa_85_Unknown | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AT1G01010 | \n",
" AT1G01010 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 3.829904 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" AT1G01020 | \n",
" AT1G01020 | \n",
" 0.0 | \n",
" 7.092747 | \n",
" 5.949744 | \n",
" 7.912041 | \n",
" 0.0 | \n",
" 6.881387 | \n",
" 3.328156 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.212596 | \n",
"
\n",
" \n",
" 2 | \n",
" AT1G01030 | \n",
" AT1G01030 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" AT1G01040 | \n",
" AT1G01040 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.963564 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" AT1G01046 | \n",
" AT1G01046 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 69 columns
\n",
"
"
],
"text/plain": [
" Gene_ID Gene_name sc_1228_pa_30_Epidermis\\LRC \\\n",
"0 AT1G01010 AT1G01010 0.0 \n",
"1 AT1G01020 AT1G01020 0.0 \n",
"2 AT1G01030 AT1G01030 0.0 \n",
"3 AT1G01040 AT1G01040 0.0 \n",
"4 AT1G01046 AT1G01046 0.0 \n",
"\n",
" wolsc_kb2_3_13_Mixed distal wolsc_kb2_3_14_Mixed distal \\\n",
"0 0.000000 0.000000 \n",
"1 7.092747 5.949744 \n",
"2 0.000000 0.000000 \n",
"3 0.000000 0.000000 \n",
"4 0.000000 0.000000 \n",
"\n",
" wolsc_kb2_3_2_Mixed distal wolsc_kb2_3_27_Mixed distal \\\n",
"0 3.829904 0.0 \n",
"1 7.912041 0.0 \n",
"2 0.000000 0.0 \n",
"3 0.000000 0.0 \n",
"4 0.000000 0.0 \n",
"\n",
" wolsc_kb2_3_51_Mixed distal sc_1228_pa_14_Mixed distal \\\n",
"0 0.000000 0.000000 \n",
"1 6.881387 3.328156 \n",
"2 0.000000 0.000000 \n",
"3 0.000000 0.000000 \n",
"4 0.000000 0.000000 \n",
"\n",
" sc_1228_pa_86_Mixed distal ... sc_1228_pb_5_Unknown \\\n",
"0 0.0 ... 0.0 \n",
"1 0.0 ... 0.0 \n",
"2 0.0 ... 0.0 \n",
"3 0.0 ... 0.0 \n",
"4 0.0 ... 0.0 \n",
"\n",
" sc_1228_pb_70_Unknown sc_1228_pb_78_Unknown sc_1228_pb_86_Unknown \\\n",
"0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 \n",
"\n",
" sc_1228_pb_93_Unknown sc_1228_pa_36_Unknown sc_1228_pa_57_Unknown \\\n",
"0 0.0 0.0 0.000000 \n",
"1 0.0 0.0 0.000000 \n",
"2 0.0 0.0 0.000000 \n",
"3 0.0 0.0 3.963564 \n",
"4 0.0 0.0 0.000000 \n",
"\n",
" sc_1228_pa_77_Unknown sc_1228_pa_78_Unknown sc_1228_pa_85_Unknown \n",
"0 0.0 0.0 0.000000 \n",
"1 0.0 0.0 2.212596 \n",
"2 0.0 0.0 0.000000 \n",
"3 0.0 0.0 0.000000 \n",
"4 0.0 0.0 0.000000 \n",
"\n",
"[5 rows x 69 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_3hpc = df[['Locus'] + list(Metadata_3hpc.Cell.tolist())]\n",
"df_3hpc.to_csv(Dir_Expression+\"Expr_3hpc.csv\", index=False)\n",
"# Gene_ID, GeneName\n",
"df_3hpc = df_3hpc.rename(columns={\"Locus\":\"Gene_ID\"})\n",
"df_3hpc = df_3hpc.copy()\n",
"df_3hpc['Gene_name'] = df_3hpc['Gene_ID']\n",
"# Rearrange\n",
"df_3hpc_cols = ['Gene_ID', 'Gene_name']+[i for i in list(df_3hpc) if i not in ['Gene_ID', 'Gene_name']]\n",
"df_3hpc = df_3hpc[df_3hpc_cols]\n",
"df_3hpc = df_3hpc.rename(columns=Dic_3hpc)\n",
"df_3hpc.to_csv(Dir_WGCNA+\"WGCNA_input_3hpc.csv\", index=False)\n",
"df_3hpc.head()"
]
},
{
"cell_type": "markdown",
"id": "7a77a4bd-9e1c-46f1-a1e1-4134bccdd13c",
"metadata": {},
"source": [
"END"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}