{ "cells": [ { "cell_type": "markdown", "id": "09774124-8ae3-4102-934a-d38780b53eac", "metadata": {}, "source": [ "# Download expression data" ] }, { "cell_type": "code", "execution_count": 1, "id": "e951c648-a373-470a-a5aa-ed678de40b05", "metadata": {}, "outputs": [], "source": [ "import GEOparse\n", "import pandas as pd\n", "import numpy as np\n", "import re\n", "from pathlib import Path\n", "import pylab as pl\n", "import seaborn as sns\n", "pl.rcParams['figure.figsize'] = (14, 10)\n", "pl.rcParams['ytick.labelsize'] = 12\n", "pl.rcParams['xtick.labelsize'] = 11\n", "pl.rcParams['axes.labelsize'] = 23\n", "pl.rcParams['legend.fontsize'] = 20\n", "sns.set_style('ticks')\n", "c1, c2, c3, c4 = sns.color_palette(\"Set1\", 4)" ] }, { "cell_type": "code", "execution_count": 2, "id": "6cb55a19-bbb6-4bab-978d-0fcedc753268", "metadata": {}, "outputs": [], "source": [ "# !pip install GEOparse" ] }, { "cell_type": "code", "execution_count": 3, "id": "cecc6a63-5b5f-436f-8178-0ae082aef0d7", "metadata": {}, "outputs": [], "source": [ "Dir_Expression = \"1_Expression_data/\"\n", "Path(Dir_Expression).mkdir(parents=True, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": 4, "id": "1ece80d2-9ed3-4843-b4c5-af811b39ed17", "metadata": {}, "outputs": [], "source": [ "Dir_WGCNA = \"2_WGCNA_data/\"\n", "Path(Dir_WGCNA).mkdir(parents=True, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "5f0f662d-5430-4d99-8c3b-e87d097512f9", "metadata": {}, "outputs": [], "source": [ "Dir_GRN = \"3_GRN_data/\"\n", "Path(Dir_GRN).mkdir(parents=True, exist_ok=True)" ] }, { "cell_type": "markdown", "id": "7bdcb850-eea9-408c-96ba-7659c2b6f58f", "metadata": {}, "source": [ "## Download data" ] }, { "cell_type": "code", "execution_count": 6, "id": "452e20a4-9428-48b3-8f47-5513aa4d231a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "\n", " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", " 3 6608k 3 208k 0 0 360k 0 0:00:18 --:--:-- 0:00:18 360k\n", " 77 6608k 77 5152k 0 0 3232k 0 0:00:02 0:00:01 0:00:01 3232k\n", "100 6608k 100 6608k 0 0 3570k 0 0:00:01 0:00:01 --:--:-- 3570k\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "\n", " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0curl: (6) Could not resolve host: GSE74488_sc_expression.csv.gz\n" ] } ], "source": [ "URL = 'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE74nnn/GSE74488/suppl/GSE74488_sc_expression.csv.gz'\n", "!curl {URL} -O GSE74488_sc_expression.csv.gz" ] }, { "cell_type": "code", "execution_count": 7, "id": "692b975e-9a42-4cb0-b331-3a3d06f97baf", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"GSE74488_sc_expression.csv.gz\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "0c6e9fa6-0177-4220-add0-07ecaec531cb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Locuswolsc_kb2_4_1wolsc_kb2_4_10wolsc_kb2_4_11wolsc_kb2_4_13wolsc_kb2_4_14wolsc_kb2_4_15wolsc_kb2_4_18wolsc_kb2_4_19wolsc_kb2_4_22...wolsc_kb3_2_1sc_0113_pa_19sc_0113_pa_3sc_0113_pa_44sc_0113_pa_52sc_0113_pa_58sc_0113_pa_59sc_0113_pa_60sc_0113_pa_68sc_0113_pa_83
0AT1G010100.0000007.7024310.00.0000000.00.00.00.00.0...0.00.0000000.00.00.00.00.0000000.00.0000000.000000
1AT1G010208.3789060.0000000.04.2988330.00.00.00.00.0...0.02.2757090.00.00.00.03.6143290.04.6424783.406784
2AT1G010300.0000000.0000000.00.0000000.00.00.00.00.0...0.00.0000000.00.00.00.00.0000000.00.0000000.000000
3AT1G010400.0000000.0000000.00.0000000.00.00.00.00.0...0.00.0000000.00.00.00.00.0000000.00.0000000.000000
4AT1G010460.0000000.0000000.00.0000000.00.00.00.00.0...0.00.0000000.00.00.00.00.0000000.00.0000000.000000
\n", "

5 rows × 239 columns

\n", "
" ], "text/plain": [ " Locus wolsc_kb2_4_1 wolsc_kb2_4_10 wolsc_kb2_4_11 wolsc_kb2_4_13 \\\n", "0 AT1G01010 0.000000 7.702431 0.0 0.000000 \n", "1 AT1G01020 8.378906 0.000000 0.0 4.298833 \n", "2 AT1G01030 0.000000 0.000000 0.0 0.000000 \n", "3 AT1G01040 0.000000 0.000000 0.0 0.000000 \n", "4 AT1G01046 0.000000 0.000000 0.0 0.000000 \n", "\n", " wolsc_kb2_4_14 wolsc_kb2_4_15 wolsc_kb2_4_18 wolsc_kb2_4_19 \\\n", "0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 \n", "\n", " wolsc_kb2_4_22 ... wolsc_kb3_2_1 sc_0113_pa_19 sc_0113_pa_3 \\\n", "0 0.0 ... 0.0 0.000000 0.0 \n", "1 0.0 ... 0.0 2.275709 0.0 \n", "2 0.0 ... 0.0 0.000000 0.0 \n", "3 0.0 ... 0.0 0.000000 0.0 \n", "4 0.0 ... 0.0 0.000000 0.0 \n", "\n", " sc_0113_pa_44 sc_0113_pa_52 sc_0113_pa_58 sc_0113_pa_59 sc_0113_pa_60 \\\n", "0 0.0 0.0 0.0 0.000000 0.0 \n", "1 0.0 0.0 0.0 3.614329 0.0 \n", "2 0.0 0.0 0.0 0.000000 0.0 \n", "3 0.0 0.0 0.0 0.000000 0.0 \n", "4 0.0 0.0 0.0 0.000000 0.0 \n", "\n", " sc_0113_pa_68 sc_0113_pa_83 \n", "0 0.000000 0.000000 \n", "1 4.642478 3.406784 \n", "2 0.000000 0.000000 \n", "3 0.000000 0.000000 \n", "4 0.000000 0.000000 \n", "\n", "[5 rows x 239 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "79f69437-84cc-42c3-b6b3-8676753bba89", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "\n", " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", "100 26662 100 26662 0 0 32796 0 --:--:-- --:--:-- --:--:-- 32834\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "\n", " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0curl: (6) Could not resolve host: mmc4.xlsx\n" ] } ], "source": [ "URL = \"https://www.cell.com/cms/10.1016/j.cell.2016.04.046/attachment/ccb8f6e8-4822-4e06-9400-2eccfd98dd56/mmc4.xlsx\"\n", "!curl {URL} -O mmc4.xlsx" ] }, { "cell_type": "code", "execution_count": 10, "id": "8d8cf849-9d92-4d5d-bc81-9f1302aba4a5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "46hpc 86\n", "3hpc 67\n", "16hpc 55\n", "Uncut 30\n", "Name: Timepoint, dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Metadata = pd.read_excel('mmc4.xlsx', header=1)\n", "Metadata = Metadata[[\"Cell\", \"Timepoint\", \"Identity\"]]\n", "Metadata.Timepoint.value_counts()" ] }, { "cell_type": "markdown", "id": "1c90bb7e-5e1e-44d7-9e00-f4780f1c0ed1", "metadata": {}, "source": [ "## Uncut" ] }, { "cell_type": "code", "execution_count": 11, "id": "8ef1763a-1451-425a-96c3-ed3d2b5483ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(30, 3)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CellTimepointIdentity
0wolsc_kb2_4_10UncutPericycle
1wolsc_kb2_4_1UncutVasculature
2wolsc_kb2_4_18UncutVasculature
3wolsc_kb2_4_22UncutVasculature
4wolsc_kb2_4_26UncutVasculature
\n", "
" ], "text/plain": [ " Cell Timepoint Identity\n", "0 wolsc_kb2_4_10 Uncut Pericycle\n", "1 wolsc_kb2_4_1 Uncut Vasculature\n", "2 wolsc_kb2_4_18 Uncut Vasculature\n", "3 wolsc_kb2_4_22 Uncut Vasculature\n", "4 wolsc_kb2_4_26 Uncut Vasculature" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Metadata_Uncut = Metadata[Metadata[\"Timepoint\"] == \"Uncut\"]\n", "print(Metadata_Uncut.shape)\n", "Dic_uncut = {a:a + \"_\" + c for a, b, c in Metadata_Uncut.values.tolist()}\n", "Metadata_Uncut.head()" ] }, { "cell_type": "code", "execution_count": 12, "id": "6c612bec-a390-4beb-ade7-e2d69e1c57e4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Gene_IDGene_namewolsc_kb2_4_10_Pericyclewolsc_kb2_4_1_Vasculaturewolsc_kb2_4_18_Vasculaturewolsc_kb2_4_22_Vasculaturewolsc_kb2_4_26_Vasculaturewolsc_kb2_4_27_Vasculaturewolsc_kb2_4_30_Vasculaturewolsc_kb2_4_41_Vasculature...wolsc_kb2_4_11_Unknownwolsc_kb2_4_13_Unknownwolsc_kb2_4_14_Unknownwolsc_kb2_4_15_Unknownwolsc_kb2_4_19_Unknownwolsc_kb2_4_24_Unknownwolsc_kb2_4_66_Unknownwolsc_kb2_4_76_Unknownwolsc_kb2_4_78_Unknownwolsc_kb2_4_80_Unknown
0AT1G01010AT1G010107.7024310.0000000.00.00.00.00.00.0...0.00.0000000.00.00.00.00.00.00.0000000.0
1AT1G01020AT1G010200.0000008.3789060.00.00.00.00.00.0...0.04.2988330.00.00.00.00.00.011.5965650.0
2AT1G01030AT1G010300.0000000.0000000.00.00.00.00.00.0...0.00.0000000.00.00.00.00.00.00.0000000.0
3AT1G01040AT1G010400.0000000.0000000.00.00.00.00.00.0...0.00.0000000.00.00.00.00.00.00.0000000.0
4AT1G01046AT1G010460.0000000.0000000.00.00.00.00.00.0...0.00.0000000.00.00.00.00.00.00.0000000.0
\n", "

5 rows × 32 columns

\n", "
" ], "text/plain": [ " Gene_ID Gene_name wolsc_kb2_4_10_Pericycle wolsc_kb2_4_1_Vasculature \\\n", "0 AT1G01010 AT1G01010 7.702431 0.000000 \n", "1 AT1G01020 AT1G01020 0.000000 8.378906 \n", "2 AT1G01030 AT1G01030 0.000000 0.000000 \n", "3 AT1G01040 AT1G01040 0.000000 0.000000 \n", "4 AT1G01046 AT1G01046 0.000000 0.000000 \n", "\n", " wolsc_kb2_4_18_Vasculature wolsc_kb2_4_22_Vasculature \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 0.0 0.0 \n", "\n", " wolsc_kb2_4_26_Vasculature wolsc_kb2_4_27_Vasculature \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 0.0 0.0 \n", "\n", " wolsc_kb2_4_30_Vasculature wolsc_kb2_4_41_Vasculature ... \\\n", "0 0.0 0.0 ... \n", "1 0.0 0.0 ... \n", "2 0.0 0.0 ... \n", "3 0.0 0.0 ... \n", "4 0.0 0.0 ... \n", "\n", " wolsc_kb2_4_11_Unknown wolsc_kb2_4_13_Unknown wolsc_kb2_4_14_Unknown \\\n", "0 0.0 0.000000 0.0 \n", "1 0.0 4.298833 0.0 \n", "2 0.0 0.000000 0.0 \n", "3 0.0 0.000000 0.0 \n", "4 0.0 0.000000 0.0 \n", "\n", " wolsc_kb2_4_15_Unknown wolsc_kb2_4_19_Unknown wolsc_kb2_4_24_Unknown \\\n", "0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 \n", "\n", " wolsc_kb2_4_66_Unknown wolsc_kb2_4_76_Unknown wolsc_kb2_4_78_Unknown \\\n", "0 0.0 0.0 0.000000 \n", "1 0.0 0.0 11.596565 \n", "2 0.0 0.0 0.000000 \n", "3 0.0 0.0 0.000000 \n", "4 0.0 0.0 0.000000 \n", "\n", " wolsc_kb2_4_80_Unknown \n", "0 0.0 \n", "1 0.0 \n", "2 0.0 \n", "3 0.0 \n", "4 0.0 \n", "\n", "[5 rows x 32 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_Uncut = df[['Locus'] + list(Metadata_Uncut.Cell.tolist())]\n", "# Gene_ID, GeneName\n", "df_Uncut.to_csv(Dir_Expression+\"Expr_Uncut.csv\", index=False)\n", "df_Uncut = df_Uncut.rename(columns={\"Locus\":\"Gene_ID\"})\n", "df_Uncut = df_Uncut.copy()\n", "df_Uncut['Gene_name'] = df_Uncut['Gene_ID']\n", "# Rearrange\n", "df_Uncut_cols = ['Gene_ID', 'Gene_name']+[i for i in list(df_Uncut) if i not in ['Gene_ID', 'Gene_name']]\n", "df_Uncut = df_Uncut[df_Uncut_cols]\n", "df_Uncut = df_Uncut.rename(columns=Dic_uncut)\n", "df_Uncut.to_csv(Dir_WGCNA+\"WGCNA_input_Uncut.csv\", index=False)\n", "df_Uncut.head()" ] }, { "cell_type": "markdown", "id": "a407b5f4-81f6-4645-9bad-3b6325b045bf", "metadata": {}, "source": [ "## 3hpc" ] }, { "cell_type": "code", "execution_count": 13, "id": "8b2bdb8e-596d-4665-bb61-246af634e4d3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(67, 3)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CellTimepointIdentity
30sc_1228_pa_303hpcEpidermis\\LRC
31wolsc_kb2_3_133hpcMixed distal
32wolsc_kb2_3_143hpcMixed distal
33wolsc_kb2_3_23hpcMixed distal
34wolsc_kb2_3_273hpcMixed distal
\n", "
" ], "text/plain": [ " Cell Timepoint Identity\n", "30 sc_1228_pa_30 3hpc Epidermis\\LRC\n", "31 wolsc_kb2_3_13 3hpc Mixed distal\n", "32 wolsc_kb2_3_14 3hpc Mixed distal\n", "33 wolsc_kb2_3_2 3hpc Mixed distal\n", "34 wolsc_kb2_3_27 3hpc Mixed distal" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Metadata_3hpc = Metadata[Metadata[\"Timepoint\"] == \"3hpc\"]\n", "print(Metadata_3hpc.shape)\n", "Dic_3hpc = {a:a + \"_\" + c for a, b, c in Metadata_3hpc.values.tolist()}\n", "Metadata_3hpc.head()" ] }, { "cell_type": "code", "execution_count": 14, "id": "a9f7c0eb-dce6-4cda-90e4-ed3af6568ed6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Gene_IDGene_namesc_1228_pa_30_Epidermis\\LRCwolsc_kb2_3_13_Mixed distalwolsc_kb2_3_14_Mixed distalwolsc_kb2_3_2_Mixed distalwolsc_kb2_3_27_Mixed distalwolsc_kb2_3_51_Mixed distalsc_1228_pa_14_Mixed distalsc_1228_pa_86_Mixed distal...sc_1228_pb_5_Unknownsc_1228_pb_70_Unknownsc_1228_pb_78_Unknownsc_1228_pb_86_Unknownsc_1228_pb_93_Unknownsc_1228_pa_36_Unknownsc_1228_pa_57_Unknownsc_1228_pa_77_Unknownsc_1228_pa_78_Unknownsc_1228_pa_85_Unknown
0AT1G01010AT1G010100.00.0000000.0000003.8299040.00.0000000.0000000.0...0.00.00.00.00.00.00.0000000.00.00.000000
1AT1G01020AT1G010200.07.0927475.9497447.9120410.06.8813873.3281560.0...0.00.00.00.00.00.00.0000000.00.02.212596
2AT1G01030AT1G010300.00.0000000.0000000.0000000.00.0000000.0000000.0...0.00.00.00.00.00.00.0000000.00.00.000000
3AT1G01040AT1G010400.00.0000000.0000000.0000000.00.0000000.0000000.0...0.00.00.00.00.00.03.9635640.00.00.000000
4AT1G01046AT1G010460.00.0000000.0000000.0000000.00.0000000.0000000.0...0.00.00.00.00.00.00.0000000.00.00.000000
\n", "

5 rows × 69 columns

\n", "
" ], "text/plain": [ " Gene_ID Gene_name sc_1228_pa_30_Epidermis\\LRC \\\n", "0 AT1G01010 AT1G01010 0.0 \n", "1 AT1G01020 AT1G01020 0.0 \n", "2 AT1G01030 AT1G01030 0.0 \n", "3 AT1G01040 AT1G01040 0.0 \n", "4 AT1G01046 AT1G01046 0.0 \n", "\n", " wolsc_kb2_3_13_Mixed distal wolsc_kb2_3_14_Mixed distal \\\n", "0 0.000000 0.000000 \n", "1 7.092747 5.949744 \n", "2 0.000000 0.000000 \n", "3 0.000000 0.000000 \n", "4 0.000000 0.000000 \n", "\n", " wolsc_kb2_3_2_Mixed distal wolsc_kb2_3_27_Mixed distal \\\n", "0 3.829904 0.0 \n", "1 7.912041 0.0 \n", "2 0.000000 0.0 \n", "3 0.000000 0.0 \n", "4 0.000000 0.0 \n", "\n", " wolsc_kb2_3_51_Mixed distal sc_1228_pa_14_Mixed distal \\\n", "0 0.000000 0.000000 \n", "1 6.881387 3.328156 \n", "2 0.000000 0.000000 \n", "3 0.000000 0.000000 \n", "4 0.000000 0.000000 \n", "\n", " sc_1228_pa_86_Mixed distal ... sc_1228_pb_5_Unknown \\\n", "0 0.0 ... 0.0 \n", "1 0.0 ... 0.0 \n", "2 0.0 ... 0.0 \n", "3 0.0 ... 0.0 \n", "4 0.0 ... 0.0 \n", "\n", " sc_1228_pb_70_Unknown sc_1228_pb_78_Unknown sc_1228_pb_86_Unknown \\\n", "0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 \n", "\n", " sc_1228_pb_93_Unknown sc_1228_pa_36_Unknown sc_1228_pa_57_Unknown \\\n", "0 0.0 0.0 0.000000 \n", "1 0.0 0.0 0.000000 \n", "2 0.0 0.0 0.000000 \n", "3 0.0 0.0 3.963564 \n", "4 0.0 0.0 0.000000 \n", "\n", " sc_1228_pa_77_Unknown sc_1228_pa_78_Unknown sc_1228_pa_85_Unknown \n", "0 0.0 0.0 0.000000 \n", "1 0.0 0.0 2.212596 \n", "2 0.0 0.0 0.000000 \n", "3 0.0 0.0 0.000000 \n", "4 0.0 0.0 0.000000 \n", "\n", "[5 rows x 69 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_3hpc = df[['Locus'] + list(Metadata_3hpc.Cell.tolist())]\n", "df_3hpc.to_csv(Dir_Expression+\"Expr_3hpc.csv\", index=False)\n", "# Gene_ID, GeneName\n", "df_3hpc = df_3hpc.rename(columns={\"Locus\":\"Gene_ID\"})\n", "df_3hpc = df_3hpc.copy()\n", "df_3hpc['Gene_name'] = df_3hpc['Gene_ID']\n", "# Rearrange\n", "df_3hpc_cols = ['Gene_ID', 'Gene_name']+[i for i in list(df_3hpc) if i not in ['Gene_ID', 'Gene_name']]\n", "df_3hpc = df_3hpc[df_3hpc_cols]\n", "df_3hpc = df_3hpc.rename(columns=Dic_3hpc)\n", "df_3hpc.to_csv(Dir_WGCNA+\"WGCNA_input_3hpc.csv\", index=False)\n", "df_3hpc.head()" ] }, { "cell_type": "markdown", "id": "7a77a4bd-9e1c-46f1-a1e1-4134bccdd13c", "metadata": {}, "source": [ "END" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 }