Part 1: Over-representation and Enrichment Analysis#

import pickle
import os
import sys
import matplotlib.pyplot as plt
import urllib
import urllib as ul
import urllib.request
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import json
import networkx as nx
import ast
from prettytable import PrettyTable
import gseapy as gp
from palettable import wesanderson

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import warnings
warnings.filterwarnings('ignore')
print(os.getcwd())
current_dir = os.path.dirname(os.path.abspath('__file__'))
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))
sys.path.append(parent_dir)
print(parent_dir)
/Users/chaeeunlee/Documents/VSC_workspaces/ISMB_Test/Section2
/Users/chaeeunlee/Documents/VSC_workspaces/ISMB_Test

Load networks from Section 1

  • Gene correlation network

  • Patient network from TCGA gene expression data

  • Patient network from TCGA DNA methylation data

intermediate_data_dir = '/data/intermediate/'
raw_data_dir = '/data/raw/'

# Define paths to .gml network files from Section 1
# These paths point to the Gene correlation network, Patient network from (1) TCGA gene expression data and (2) TCGA DNA methylation data.
G_gxp_path = intermediate_data_dir + 'section2_networks_v1/gene_coexpression_network.gml'

# Load the GML graphs into NetworkX graph objects
# nx.read_gml() function reads a graph from a GML file
G_gxp = nx.read_gml(G_gxp_path)  # Gene correlation network

# Get all nodes in each graph
# The nodes represent genes or patients depending on the network
G_gxp_nodes_list = list(G_gxp.nodes())  # Nodes in the gene correlation network

# Define paths to the raw TCGA datasets
# tcga_dnam_path = 'section2_data/ISMB_TCGA_DNAm.pkl'  # TCGA DNA methylation data
tcga_gxp_path = raw_data_dir + 'ISMB_TCGA_GE.pkl'  # TCGA Gene expression data

# Load the gene expression dataset
# pd.read_pickle() function loads a pickled pandas DataFrame or Series
tcga_gxp = pd.read_pickle(tcga_gxp_path)

# For this example, we'll use a CSV file that includes gene symbols
# pd.read_csv() function loads a CSV file into a pandas DataFrame
tcga_gxp_df = pd.read_csv(intermediate_data_dir + 'tcga_ge_df_symbols_t.csv') # Dataset with gene symbols
# Set 'GENES' column as the index for easy access to gene-specific data
tcga_gxp_df.set_index('GENES', inplace=True)

# Extract metadata from the gene expression dataset
# Metadata might include information such as patient IDs, sample conditions, etc.
tcga_gxp_meta = tcga_gxp['datMeta']

# Print the number of nodes in each network
# This provides a quick overview of the size of each network
print(f"Number of nodes in gene correlation network: {len(G_gxp_nodes_list)}")
Number of nodes in gene correlation network: 300

Custom function draw_network_with_node_attrs() draws a network with nodes colored and/or shaped based on their attributes. If communities are provided, nodes are colored by their community memberships. A legend is added to indicate the mapping of attributes to colors and shapes.

from functions import draw_network_with_node_attrs

# Args:
#     G (networkx.Graph): The graph to be drawn.
#     node_attributes (dict): A dictionary where keys are node names and values are dictionaries of attributes.
#     communities (List[List[Any]], optional): A list where each sublist contains the nodes belonging to a community. Default is None.
#     title (str, optional): The title of the plot. Default is 'Network Visualization'.
#     color_attr (str, optional): Node attribute to color nodes by. Default is None.
#     shape_attr (str, optional): Node attribute to shape nodes by. Default is None.
#     figsize (tuple, optional): The size of the figure. Default is (20, 10).
#     layout (str, optional): The layout algorithm for positioning nodes ('spring', 'circular', etc.). Default is 'spring'.
#     cmap_name (str, optional): The name of the colormap to use for coloring. Default is 'tab20'.
#     with_labels (bool, optional): Whether to draw labels for the nodes. Default is False.
# Display the shape of the dataframe
print("\nShape of the dataframe 'tcga_gxp_df' (rows, columns):")
### YOUR CODE HERE ###
print(tcga_gxp_df.shape)

# List the columns in the dataframe
print("\nList of columns in the dataframe 'tcga_gxp_df':")
### YOUR CODE HERE ###
print(tcga_gxp_df.columns)
Shape of the dataframe 'tcga_gxp_df' (rows, columns):
(20244, 498)

List of columns in the dataframe 'tcga_gxp_df':
Index(['TCGA-38-7271', 'TCGA-55-7914', 'TCGA-95-7043', 'TCGA-73-4658',
       'TCGA-86-8076', 'TCGA-55-7726', 'TCGA-44-6147', 'TCGA-50-5932',
       'TCGA-44-2661', 'TCGA-86-7954',
       ...
       'TCGA-97-A4M7', 'TCGA-62-A46R', 'TCGA-50-5055', 'TCGA-38-4628',
       'TCGA-86-7713', 'TCGA-86-8073', 'TCGA-MN-A4N4', 'TCGA-53-7626',
       'TCGA-44-A47G', 'TCGA-55-6969'],
      dtype='object', length=498)

Network Annotation

We will use cancer gene calatogue (CGC) from the Catalogue Of Somatic Mutations In Cancer (COSMIC) to annotate genes in our network.

  • 'Tier'

    • To be classified into Tier 1, a gene must possess a documented activity relevant to cancer, along with evidence of mutations in cancer which change the activity of the gene product in a way that promotes oncogenic transformation.

    • Tier 2 consists of genes with strong indications of a role in cancer but with less extensive available evidence.

  • 'Hallmark'

    • New overviews of cancer gene function focused on hallmarks of cancer pull together manually curated information on the function of proteins coded by cancer genes and summarise the data in simple graphical form. They present a condensed overview of most relevant facts with quick access to the literature source, and define whether a gene has a stimulating or suppressive effect via individual cancer hallmarks.

cancer_genes_path = 'section2_data/Census_allFri Jul  5 14_32_40 2024.csv'
cancer_genes_df = pd.read_csv(cancer_genes_path)
print(cancer_genes_df.columns)
cancer_genes_df.head(2)
Index(['Gene Symbol', 'Name', 'Entrez GeneId', 'Genome Location', 'Tier',
       'Hallmark', 'Chr Band', 'Somatic', 'Germline', 'Tumour Types(Somatic)',
       'Tumour Types(Germline)', 'Cancer Syndrome', 'Tissue Type',
       'Molecular Genetics', 'Role in Cancer', 'Mutation Types',
       'Translocation Partner', 'Other Germline Mut', 'Other Syndrome',
       'Synonyms'],
      dtype='object')
Gene Symbol Name Entrez GeneId Genome Location Tier Hallmark Chr Band Somatic Germline Tumour Types(Somatic) Tumour Types(Germline) Cancer Syndrome Tissue Type Molecular Genetics Role in Cancer Mutation Types Translocation Partner Other Germline Mut Other Syndrome Synonyms
0 A1CF APOBEC1 complementation factor 29974.0 10:50799421-50885675 2 NaN 10q11.23 yes NaN melanoma NaN NaN E NaN oncogene Mis NaN NaN NaN ACF,ACF64,ACF65,APOBEC1CF,ASP,CCDS73133.1,ENSG00000148584.14,NM_001198819.1,NP_001185748
1 ABI1 abl-interactor 1 10006.0 10:26746593-26860935 1 Yes 10p12.1 yes NaN AML NaN NaN L Dom TSG, fusion T KMT2A NaN NaN ABI-1,CCDS7150.1,E3B1,ENSG00000136754.17,NM_005470.3,NP_005461,NP_001334958.1,Q8IZP0,SSH3BP1
cancer_genes_df = cancer_genes_df[cancer_genes_df['Tier']==1]
cancer_genes = cancer_genes_df['Gene Symbol'].tolist()

# Create the dictionary with node labels as keys and boolean as values
# Complete the dictionary comprehension to check if each node is in the list of cancer genes

### YOUR CODE HERE ###
found_in_cancer_genes = {node: bool(node in cancer_genes) for node in list(G_gxp.nodes())}

node_attributes={}
node_attributes['cancer_gene'] = found_in_cancer_genes
node_attributes
{'cancer_gene': {'A2M': False,
  'ABI3BP': False,
  'ACAP1': False,
  'ACP5': False,
  'ACTA2': False,
  'ACTG2': False,
  'ACVRL1': False,
  'ADAM12': False,
  'ADAMTS12': False,
  'ADAMTS2': False,
  'ADGRA2': False,
  'ADH1B': False,
  'AEBP1': False,
  'AIF1': False,
  'ALOX5': False,
  'ALOX5AP': False,
  'ANGPTL2': False,
  'ANTXR1': False,
  'AOC3': False,
  'APBB1IP': False,
  'APOC1': False,
  'APOL3': False,
  'ARHGAP30': False,
  'ARHGAP9': False,
  'ARHGEF6': False,
  'BCL6B': False,
  'BGN': False,
  'BIN2': False,
  'BTK': True,
  'C1QA': False,
  'C1QB': False,
  'C1QC': False,
  'C1orf162': False,
  'C3AR1': False,
  'C5AR1': False,
  'C7': False,
  'CALCRL': False,
  'CASP1': False,
  'CCDC80': False,
  'CCL5': False,
  'CCN4': False,
  'CCR1': False,
  'CCR5': False,
  'CD14': False,
  'CD163': False,
  'CD2': False,
  'CD300A': False,
  'CD34': False,
  'CD37': False,
  'CD3E': False,
  'CD4': False,
  'CD52': False,
  'CD53': False,
  'CD6': False,
  'CD74': True,
  'CD84': False,
  'CD86': False,
  'CD8A': False,
  'CD93': False,
  'CDH5': False,
  'CHRDL1': False,
  'CLEC14A': False,
  'CMKLR1': False,
  'COL10A1': False,
  'COL11A1': False,
  'COL15A1': False,
  'COL1A1': True,
  'COL1A2': False,
  'COL3A1': False,
  'COL4A1': False,
  'COL4A2': False,
  'COL5A1': False,
  'COL5A2': False,
  'COL6A3': False,
  'CORO1A': False,
  'CSF1R': False,
  'CSF2RB': False,
  'CTHRC1': False,
  'CXCL10': False,
  'CXCL9': False,
  'CYBB': False,
  'CYTH4': False,
  'CYTIP': False,
  'CYYR1': False,
  'DAB2': False,
  'DCHS1': False,
  'DCN': False,
  'DIPK2B': False,
  'DOCK2': False,
  'DOCK8': False,
  'DOK2': False,
  'DOK3': False,
  'EDNRB': False,
  'EMILIN1': False,
  'EMILIN2': False,
  'EPB41L2': False,
  'ERG': True,
  'EVI2B': False,
  'F13A1': False,
  'FAM78A': False,
  'FAP': False,
  'FBN1': False,
  'FCER1G': False,
  'FCGR2A': False,
  'FCGR3A': False,
  'FERMT3': False,
  'FGD2': False,
  'FGD5': False,
  'FGL2': False,
  'FGR': False,
  'FHL1': False,
  'FLI1': True,
  'FLT4': True,
  'FMO2': False,
  'FNDC1': False,
  'FOLR2': False,
  'FPR1': False,
  'FPR3': False,
  'FSTL1': False,
  'FYB1': False,
  'GAS7': True,
  'GBP1': False,
  'GBP4': False,
  'GBP5': False,
  'GIMAP4': False,
  'GIMAP6': False,
  'GIMAP7': False,
  'GIMAP8': False,
  'GLIPR2': False,
  'GMFG': False,
  'GREM1': False,
  'GYPC': False,
  'HAVCR2': False,
  'HCK': False,
  'HCP5': False,
  'HEG1': False,
  'HK3': False,
  'HLA-A': True,
  'HLA-B': False,
  'HLA-C': False,
  'HLA-DMA': False,
  'HLA-DMB': False,
  'HLA-DOA': False,
  'HLA-DPA1': False,
  'HLA-DPB1': False,
  'HLA-DQA1': False,
  'HLA-DQB1': False,
  'HLA-DRA': False,
  'HLA-DRB1': False,
  'HLA-DRB5': False,
  'HLA-F': False,
  'HTRA3': False,
  'IGSF6': False,
  'IKZF1': True,
  'IL10RA': False,
  'IL16': False,
  'IL2RB': False,
  'IL2RG': False,
  'IL33': False,
  'IL7R': True,
  'INMT': False,
  'IRAG1': False,
  'IRF8': False,
  'ISLR': False,
  'ITGA11': False,
  'ITGA4': False,
  'ITGA8': False,
  'ITGAL': False,
  'ITGAM': False,
  'ITGAX': False,
  'ITGB2': False,
  'JAML': False,
  'KCNAB2': False,
  'KLHL6': False,
  'LAIR1': False,
  'LAMA2': False,
  'LAMA4': False,
  'LAPTM5': False,
  'LCK': True,
  'LCP1': False,
  'LCP2': False,
  'LDB2': False,
  'LHFPL6': False,
  'LILRB2': False,
  'LILRB4': False,
  'LIPA': False,
  'LMCD1': False,
  'LMOD1': False,
  'LRRC15': False,
  'LRRC32': False,
  'LSP1': False,
  'LST1': False,
  'LTBP2': False,
  'LUM': False,
  'MARCO': False,
  'MFAP4': False,
  'MMP11': False,
  'MMP2': False,
  'MMRN2': False,
  'MNDA': False,
  'MPEG1': False,
  'MRC1': False,
  'MS4A4A': False,
  'MS4A6A': False,
  'MS4A7': False,
  'MSR1': False,
  'MSRB3': False,
  'MXRA5': False,
  'MYH11': True,
  'MYLK': False,
  'MYO1F': False,
  'NCF2': False,
  'NCF4': False,
  'NCKAP1L': False,
  'NFAM1': False,
  'NOTCH4': False,
  'OLFML2B': False,
  'PARVG': False,
  'PCDH12': False,
  'PCDH17': False,
  'PDGFRA': True,
  'PDGFRB': True,
  'PECAM1': False,
  'PIK3AP1': False,
  'PIK3CG': False,
  'PIK3R5': False,
  'PLEK': False,
  'PLXDC2': False,
  'PLXNC1': False,
  'PODN': False,
  'POSTN': False,
  'PPP1R16B': False,
  'PRELP': False,
  'PREX1': False,
  'PRKCB': False,
  'PRRX1': True,
  'PSMB9': False,
  'PTAFR': False,
  'PTGER4': False,
  'PTPN7': False,
  'PTPRB': True,
  'PTPRC': True,
  'RAMP2': False,
  'RAMP3': False,
  'RASAL3': False,
  'RASSF2': False,
  'RCSD1': False,
  'RNASE6': False,
  'ROBO4': False,
  'S1PR1': False,
  'SAMHD1': False,
  'SAMSN1': False,
  'SASH3': False,
  'SCN7A': False,
  'SELPLG': False,
  'SH2D3C': False,
  'SHANK3': False,
  'SIGLEC1': False,
  'SIRPA': False,
  'SLA': False,
  'SLC15A3': False,
  'SLC1A3': False,
  'SLC7A7': False,
  'SLCO2B1': False,
  'SLIT2': False,
  'SLIT3': False,
  'SPARC': False,
  'SPARCL1': False,
  'SPI1': False,
  'SPN': False,
  'SPON1': False,
  'SSC5D': False,
  'STARD8': False,
  'STAT1': False,
  'SULF1': False,
  'SVEP1': False,
  'SYNPO2': False,
  'TAGLN': False,
  'TAP1': False,
  'TBC1D10C': False,
  'TCF4': False,
  'THBS2': False,
  'THY1': False,
  'TIE1': False,
  'TLR4': False,
  'TNFSF13B': False,
  'TRAC': False,
  'TRBC1': False,
  'TRBC2': False,
  'TREM2': False,
  'TRPV2': False,
  'TYROBP': False,
  'UBE2L6': False,
  'UNC5B': False,
  'VCAN': False,
  'VSIG4': False,
  'VWF': False,
  'WAS': True,
  'ZEB1': False,
  'ZEB2': False}}

We are going to use 'KEGG_2021_Human' as the gene set. KEGG (Kyoto Encyclopedia of Genes and Genomes) is a knowledge base for systematic analysis of gene functions, linking genomic information with higher order functional information (Kanehisa et al., 2000).

# Alternative gene sets like 'MSigDB_Hallmark_2020' can also be used.
gene_sets = 'KEGG_2021_Human'
# You can also retrieve and display the list of available gene sets
gene_set_list = gp.get_library_name()
print(gene_set_list)
['ARCHS4_Cell-lines', 'ARCHS4_IDG_Coexp', 'ARCHS4_Kinases_Coexp', 'ARCHS4_TFs_Coexp', 'ARCHS4_Tissues', 'Achilles_fitness_decrease', 'Achilles_fitness_increase', 'Aging_Perturbations_from_GEO_down', 'Aging_Perturbations_from_GEO_up', 'Allen_Brain_Atlas_10x_scRNA_2021', 'Allen_Brain_Atlas_down', 'Allen_Brain_Atlas_up', 'Azimuth_2023', 'Azimuth_Cell_Types_2021', 'BioCarta_2013', 'BioCarta_2015', 'BioCarta_2016', 'BioPlanet_2019', 'BioPlex_2017', 'CCLE_Proteomics_2020', 'CORUM', 'COVID-19_Related_Gene_Sets', 'COVID-19_Related_Gene_Sets_2021', 'Cancer_Cell_Line_Encyclopedia', 'CellMarker_2024', 'CellMarker_Augmented_2021', 'ChEA_2013', 'ChEA_2015', 'ChEA_2016', 'ChEA_2022', 'Chromosome_Location', 'Chromosome_Location_hg19', 'ClinVar_2019', 'DSigDB', 'Data_Acquisition_Method_Most_Popular_Genes', 'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019', 'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019', 'Descartes_Cell_Types_and_Tissue_2021', 'Diabetes_Perturbations_GEO_2022', 'DisGeNET', 'Disease_Perturbations_from_GEO_down', 'Disease_Perturbations_from_GEO_up', 'Disease_Signatures_from_GEO_down_2014', 'Disease_Signatures_from_GEO_up_2014', 'DrugMatrix', 'Drug_Perturbations_from_GEO_2014', 'Drug_Perturbations_from_GEO_down', 'Drug_Perturbations_from_GEO_up', 'ENCODE_Histone_Modifications_2013', 'ENCODE_Histone_Modifications_2015', 'ENCODE_TF_ChIP-seq_2014', 'ENCODE_TF_ChIP-seq_2015', 'ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X', 'ESCAPE', 'Elsevier_Pathway_Collection', 'Enrichr_Libraries_Most_Popular_Genes', 'Enrichr_Submissions_TF-Gene_Coocurrence', 'Enrichr_Users_Contributed_Lists_2020', 'Epigenomics_Roadmap_HM_ChIP-seq', 'FANTOM6_lncRNA_KD_DEGs', 'GO_Biological_Process_2013', 'GO_Biological_Process_2015', 'GO_Biological_Process_2017', 'GO_Biological_Process_2017b', 'GO_Biological_Process_2018', 'GO_Biological_Process_2021', 'GO_Biological_Process_2023', 'GO_Cellular_Component_2013', 'GO_Cellular_Component_2015', 'GO_Cellular_Component_2017', 'GO_Cellular_Component_2017b', 'GO_Cellular_Component_2018', 'GO_Cellular_Component_2021', 'GO_Cellular_Component_2023', 'GO_Molecular_Function_2013', 'GO_Molecular_Function_2015', 'GO_Molecular_Function_2017', 'GO_Molecular_Function_2017b', 'GO_Molecular_Function_2018', 'GO_Molecular_Function_2021', 'GO_Molecular_Function_2023', 'GTEx_Aging_Signatures_2021', 'GTEx_Tissue_Expression_Down', 'GTEx_Tissue_Expression_Up', 'GTEx_Tissues_V8_2023', 'GWAS_Catalog_2019', 'GWAS_Catalog_2023', 'GeDiPNet_2023', 'GeneSigDB', 'Gene_Perturbations_from_GEO_down', 'Gene_Perturbations_from_GEO_up', 'Genes_Associated_with_NIH_Grants', 'Genome_Browser_PWMs', 'GlyGen_Glycosylated_Proteins_2022', 'HDSigDB_Human_2021', 'HDSigDB_Mouse_2021', 'HMDB_Metabolites', 'HMS_LINCS_KinomeScan', 'HomoloGene', 'HuBMAP_ASCT_plus_B_augmented_w_RNAseq_Coexpression', 'HuBMAP_ASCTplusB_augmented_2022', 'HumanCyc_2015', 'HumanCyc_2016', 'Human_Gene_Atlas', 'Human_Phenotype_Ontology', 'IDG_Drug_Targets_2022', 'InterPro_Domains_2019', 'Jensen_COMPARTMENTS', 'Jensen_DISEASES', 'Jensen_TISSUES', 'KEA_2013', 'KEA_2015', 'KEGG_2013', 'KEGG_2015', 'KEGG_2016', 'KEGG_2019_Human', 'KEGG_2019_Mouse', 'KEGG_2021_Human', 'KOMP2_Mouse_Phenotypes_2022', 'Kinase_Perturbations_from_GEO_down', 'Kinase_Perturbations_from_GEO_up', 'L1000_Kinase_and_GPCR_Perturbations_down', 'L1000_Kinase_and_GPCR_Perturbations_up', 'LINCS_L1000_CRISPR_KO_Consensus_Sigs', 'LINCS_L1000_Chem_Pert_Consensus_Sigs', 'LINCS_L1000_Chem_Pert_down', 'LINCS_L1000_Chem_Pert_up', 'LINCS_L1000_Ligand_Perturbations_down', 'LINCS_L1000_Ligand_Perturbations_up', 'Ligand_Perturbations_from_GEO_down', 'Ligand_Perturbations_from_GEO_up', 'MAGMA_Drugs_and_Diseases', 'MAGNET_2023', 'MCF7_Perturbations_from_GEO_down', 'MCF7_Perturbations_from_GEO_up', 'MGI_Mammalian_Phenotype_2013', 'MGI_Mammalian_Phenotype_2017', 'MGI_Mammalian_Phenotype_Level_3', 'MGI_Mammalian_Phenotype_Level_4', 'MGI_Mammalian_Phenotype_Level_4_2019', 'MGI_Mammalian_Phenotype_Level_4_2021', 'MSigDB_Computational', 'MSigDB_Hallmark_2020', 'MSigDB_Oncogenic_Signatures', 'Metabolomics_Workbench_Metabolites_2022', 'Microbe_Perturbations_from_GEO_down', 'Microbe_Perturbations_from_GEO_up', 'MoTrPAC_2023', 'Mouse_Gene_Atlas', 'NCI-60_Cancer_Cell_Lines', 'NCI-Nature_2015', 'NCI-Nature_2016', 'NIH_Funded_PIs_2017_AutoRIF_ARCHS4_Predictions', 'NIH_Funded_PIs_2017_GeneRIF_ARCHS4_Predictions', 'NIH_Funded_PIs_2017_Human_AutoRIF', 'NIH_Funded_PIs_2017_Human_GeneRIF', 'NURSA_Human_Endogenous_Complexome', 'OMIM_Disease', 'OMIM_Expanded', 'Old_CMAP_down', 'Old_CMAP_up', 'Orphanet_Augmented_2021', 'PFOCR_Pathways', 'PFOCR_Pathways_2023', 'PPI_Hub_Proteins', 'PanglaoDB_Augmented_2021', 'Panther_2015', 'Panther_2016', 'Pfam_Domains_2019', 'Pfam_InterPro_Domains', 'PheWeb_2019', 'PhenGenI_Association_2021', 'Phosphatase_Substrates_from_DEPOD', 'ProteomicsDB_2020', 'Proteomics_Drug_Atlas_2023', 'RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO', 'RNAseq_Automatic_GEO_Signatures_Human_Down', 'RNAseq_Automatic_GEO_Signatures_Human_Up', 'RNAseq_Automatic_GEO_Signatures_Mouse_Down', 'RNAseq_Automatic_GEO_Signatures_Mouse_Up', 'Rare_Diseases_AutoRIF_ARCHS4_Predictions', 'Rare_Diseases_AutoRIF_Gene_Lists', 'Rare_Diseases_GeneRIF_ARCHS4_Predictions', 'Rare_Diseases_GeneRIF_Gene_Lists', 'Reactome_2013', 'Reactome_2015', 'Reactome_2016', 'Reactome_2022', 'Rummagene_kinases', 'Rummagene_signatures', 'Rummagene_transcription_factors', 'SILAC_Phosphoproteomics', 'SubCell_BarCode', 'SynGO_2022', 'SynGO_2024', 'SysMyo_Muscle_Gene_Sets', 'TF-LOF_Expression_from_GEO', 'TF_Perturbations_Followed_by_Expression', 'TG_GATES_2020', 'TRANSFAC_and_JASPAR_PWMs', 'TRRUST_Transcription_Factors_2019', 'Table_Mining_of_CRISPR_Studies', 'Tabula_Muris', 'Tabula_Sapiens', 'TargetScan_microRNA', 'TargetScan_microRNA_2017', 'The_Kinase_Library_2023', 'Tissue_Protein_Expression_from_Human_Proteome_Map', 'Tissue_Protein_Expression_from_ProteomicsDB', 'Transcription_Factor_PPIs', 'UK_Biobank_GWAS_v1', 'Virus-Host_PPI_P-HIPSTer_2020', 'VirusMINT', 'Virus_Perturbations_from_GEO_down', 'Virus_Perturbations_from_GEO_up', 'WikiPathway_2021_Human', 'WikiPathway_2023_Human', 'WikiPathways_2013', 'WikiPathways_2015', 'WikiPathways_2016', 'WikiPathways_2019_Human', 'WikiPathways_2019_Mouse', 'dbGaP', 'huMAP', 'lncHUB_lncRNA_Co-Expression', 'miRTarBase_2017']

Over Representation Analysis (ORA)#

Over-representation analysis (ORA) is a method used to identify which predefined gene sets are disproportionately represented in a given set of genes compared to what would be expected by random chance (Huang et al., 2009). We recommend using Over-representation analysis (ORA) only when Gene Set Enrichment Analysis (GSEA) is not suitable. Although we are using the gseapy library for ORA in this the tutorial, it’s important to note that ORA and GSEA are distinct methods.

ORA on gene correlation network#

# Perform ORA on all nodes in the gene correlation network

### YOUR CODE HERE ###
enr_all_nodes = gp.enrichr(gene_list=G_gxp_nodes_list, gene_sets=[gene_sets], organism='human') # , outdir=None)
# Display the top 10 enrichment results

### YOUR CODE HERE ###
enr_all_nodes.results.head(10)
Gene_set Term Overlap P-value Adjusted P-value Old P-value Old Adjusted P-value Odds Ratio Combined Score Genes
0 KEGG_2021_Human Cell adhesion molecules 32/148 4.963642e-28 9.778376e-26 0 0 20.158518 1267.370905 CD86;ITGAM;SELPLG;ITGB2;ITGAL;SPN;CDH5;HLA-DMA;HLA-DMB;HLA-DOA;CD34;HLA-DQA1;HLA-DPA1;HLA-DRB5;ITGA4;HLA-B;HLA-C;HLA-A;HLA-F;CD2;VCAN;CD4;PTPRC;CD6;CD8A;ITGA8;PECAM1;HLA-DPB1;HLA-DRA;SIGLEC1;HLA-DRB1;HLA-DQB1
1 KEGG_2021_Human Phagosome 29/152 7.718499e-24 7.602722e-22 0 0 17.032160 906.424701 ITGAM;NCF2;NCF4;ITGB2;THBS2;CORO1A;FCGR3A;HLA-DMA;HLA-DMB;MRC1;CD14;HLA-DOA;HLA-DQA1;HLA-DPA1;MSR1;HLA-DRB5;HLA-B;CYBB;HLA-C;TAP1;HLA-A;HLA-F;MARCO;FCGR2A;HLA-DPB1;HLA-DRA;TLR4;HLA-DRB1;HLA-DQB1
2 KEGG_2021_Human Staphylococcus aureus infection 24/95 5.021667e-23 3.297562e-21 0 0 24.040416 1234.371891 C1QB;C1QA;HLA-DRB5;ITGAM;SELPLG;ITGB2;C5AR1;PTAFR;FPR1;FPR3;ITGAL;FCGR3A;HLA-DMA;HLA-DMB;FCGR2A;C3AR1;HLA-DPB1;HLA-DRA;HLA-DOA;HLA-DQA1;HLA-DRB1;HLA-DPA1;HLA-DQB1;C1QC
3 KEGG_2021_Human Leishmaniasis 21/77 4.634501e-21 2.282492e-19 0 0 26.403226 1236.219058 HLA-DRB5;ITGAM;ITGA4;NCF2;PRKCB;STAT1;ITGB2;NCF4;CYBB;FCGR3A;HLA-DMA;HLA-DMB;FCGR2A;HLA-DPB1;HLA-DRA;HLA-DOA;TLR4;HLA-DQA1;HLA-DRB1;HLA-DPA1;HLA-DQB1
4 KEGG_2021_Human Viral myocarditis 18/60 4.647925e-19 1.831283e-17 0 0 29.875380 1261.120325 CD86;HLA-DRB5;LAMA2;ITGB2;HLA-B;HLA-C;HLA-A;ITGAL;HLA-F;HLA-DMA;HLA-DMB;HLA-DPB1;HLA-DRA;HLA-DOA;HLA-DQA1;HLA-DRB1;HLA-DPA1;HLA-DQB1
5 KEGG_2021_Human Hematopoietic cell lineage 21/99 1.392113e-18 4.570771e-17 0 0 18.934932 778.523171 CSF1R;HLA-DRB5;ITGAM;ITGA4;CD3E;CD2;HLA-DMA;CD4;HLA-DMB;CD8A;HLA-DPB1;HLA-DRA;CD37;CD14;HLA-DOA;IL7R;CD34;HLA-DQA1;HLA-DRB1;HLA-DPA1;HLA-DQB1
6 KEGG_2021_Human Allograft rejection 15/38 3.498581e-18 9.846007e-17 0 0 45.027460 1809.841566 CD86;HLA-DRB5;HLA-B;HLA-C;HLA-A;HLA-F;HLA-DMA;HLA-DMB;HLA-DPB1;HLA-DRA;HLA-DOA;HLA-DQA1;HLA-DRB1;HLA-DPA1;HLA-DQB1
7 KEGG_2021_Human Graft-versus-host disease 15/42 2.114525e-17 5.207019e-16 0 0 38.348928 1472.411543 CD86;HLA-DRB5;HLA-B;HLA-C;HLA-A;HLA-F;HLA-DMA;HLA-DMB;HLA-DPB1;HLA-DRA;HLA-DOA;HLA-DQA1;HLA-DRB1;HLA-DPA1;HLA-DQB1
8 KEGG_2021_Human Type I diabetes mellitus 15/43 3.203949e-17 7.013088e-16 0 0 36.977444 1404.387131 CD86;HLA-DRB5;HLA-B;HLA-C;HLA-A;HLA-F;HLA-DMA;HLA-DMB;HLA-DPB1;HLA-DRA;HLA-DOA;HLA-DQA1;HLA-DRB1;HLA-DPA1;HLA-DQB1
9 KEGG_2021_Human Antigen processing and presentation 18/78 8.383314e-17 1.651513e-15 0 0 20.893617 773.433715 CD74;HLA-DRB5;HLA-B;HLA-C;TAP1;HLA-A;HLA-F;HLA-DMA;CD4;HLA-DMB;CD8A;HLA-DPB1;HLA-DRA;HLA-DOA;HLA-DQA1;HLA-DRB1;HLA-DPA1;HLA-DQB1

ORA on gene clusters#

Clustering on Gene Correlation Network

  • We use community detection algorithm to identify communities in the network:

    • The greedy_modularity_communities() function in networkx implements a community detection algorithm that optimises modularity using a greedy approach. It iteratively merges pairs of nodes or communities that result in the largest increase in modularity until no further improvement is possible. Modularity measures the density of links inside communities compared to links between communities, aiming to maximise this value to identify densely connected groups within the network.

# Cluster the gene correlation network using the greedy modularity communities algorithm
communities = nx.algorithms.community.modularity_max.greedy_modularity_communities(G_gxp)

# Print the number of detected communities

### YOUR CODE HERE ###
print(f'The network has {len(communities)} communities.\n')
The network has 6 communities.
# Create subgraphs for each community
subgraphs = []
for community in communities:
    
    subgraphs.append(G_gxp.subgraph(community))

# Print the number of nodes in each subgraph
# And draw the subgraph
for i, subgraph in enumerate(subgraphs):
    print(f'Community {i+1} has {subgraph.number_of_nodes()} nodes.')
    
    ### YOUR CODE HERE ###
    nx.draw(subgraph, with_labels=True)
    
    plt.show() # Forces the plot to be displayed
    
    
Community 1 has 101 nodes.
../_images/130a1e1136cacc09b425a688b0796c514f145cecaf909970789b399341ad723b.png
Community 2 has 67 nodes.
../_images/4c28e796fdcac42a4c59b614a0986041beb8b361738b3804baf25701e73966bd.png
Community 3 has 64 nodes.
../_images/773b2e0a3506a3deaf4bf68c98af65830483a92c5cb826fa4c4a66d496b4d390.png
Community 4 has 51 nodes.
../_images/adc126b0951b23fe24e641a7f2d0868648d8b770b6697df734b899246282209d.png
Community 5 has 15 nodes.
../_images/bfae9dc87c6af5db54346d9cabd6aa12a56b45aac8941faa1f9c7a5058d12b21.png
Community 6 has 2 nodes.
../_images/884f36fd032ab2f04e019f905cab4e2074827232345c5f4087055723ce0fdc0a.png

Visualising the Communities

# Visualise the gene correlation network with community memberships

### YOUR CODE HERE ###
draw_network_with_node_attrs(G_gxp, title='Gene Network', node_attributes=node_attributes, communities=communities, color_attr='community', shape_attr='cancer_gene', with_labels=True)
../_images/ff4e3064d0cbe15f3b2deec30d558d2786c8a9c8093ae87f141b9356d9c45c89.png

Performing ORA on individual clusters can help in understanding distinct biological significance of each cluster, revealing how certain pathways or functions are associated with specific subsets of genes.

# Function to perform ORA on a given list of genes
def communityORA(genes):
    enr = gp.enrichr(gene_list=genes, gene_sets=[gene_sets], organism='human', outdir=None)
    return enr

# 1. Convert communities to lists for ORA 2. Sort the list of communities by their length in descending order

### YOUR CODE HERE ###
communities = [list(community) for community in communities]
communities = sorted(communities, key=len, reverse=True)

# Perform ORA for three selected communities
community1_enr = communityORA(communities[0])
community2_enr = communityORA(communities[1])
community3_enr = communityORA(communities[2])

# Print the top 10 results for each community
x = PrettyTable()
x.field_names = ["Community 1", "Community 2", "Community 3"]
for i in range(10):
    x.add_row([community1_enr.results['Term'][i], community2_enr.results['Term'][i], community3_enr.results['Term'][i]])
print(x)
+----------------------------------------------+------------------------------------------------------+-------------------------------------------------+
|                 Community 1                  |                     Community 2                      |                   Community 3                   |
+----------------------------------------------+------------------------------------------------------+-------------------------------------------------+
|       Staphylococcus aureus infection        |           Protein digestion and absorption           |             Cell adhesion molecules             |
|                  Phagosome                   |                    Focal adhesion                    |       Complement and coagulation cascades       |
|                Leishmaniasis                 |               ECM-receptor interaction               |             ECM-receptor interaction            |
|                 Tuberculosis                 |              Relaxin signaling pathway               |                  Focal adhesion                 |
|             Rheumatoid arthritis             |              PI3K-Akt signaling pathway              |          Human papillomavirus infection         |
|                    Asthma                    | AGE-RAGE signaling pathway in diabetic complications |               Tyrosine metabolism               |
|         Systemic lupus erythematosus         |                      Amoebiasis                      |        Vascular smooth muscle contraction       |
| Intestinal immune network for IgA production |            Human papillomavirus infection            |     Neuroactive ligand-receptor interaction     |
|           Cell adhesion molecules            |          Vascular smooth muscle contraction          | Arrhythmogenic right ventricular cardiomyopathy |
|             Allograft rejection              |               Proteoglycans in cancer                |            PI3K-Akt signaling pathway           |
+----------------------------------------------+------------------------------------------------------+-------------------------------------------------+

Gene Set Enrichment Analysis (GSEA)#

Gene Set Enrichment Analysis (GSEA) is a genome-wide expression analysis method designed to interpret expression profiles focusing on pre-defined gene sets (Subramanian et al., 2005). These gene sets are curated based on prior biological knowledge, such as published information about biochemical pathways or patterns of coexpression observed in previous experimental studies. The genes can be ordered in a ranked list, according to their differential expression between the classes. The primary objective of GSEA is to assess whether the genes within a given gene set tend to occur toward the top (or bottom) of the ranked list. This ranking is based on the correlation between gene expression and a particular phenotypic class distinction. By evaluating the distribution of gene set members within the ranked list, GSEA identifies whether the set is correlated with the phenotypic class, thus providing insights into underlying biological mechanisms. This method contrasts with traditional single-gene analysis by focusing on the collective behavior of gene sets, thereby uncovering biologically significant patterns that might be overlooked when examining individual genes in isolation. We use gseapy to perform GSEA with the gene set KEGG_2021_Human.

# Display the first five rows of the dataframe "tcga_gxp_df"
tcga_gxp_df.head(5)
TCGA-38-7271 TCGA-55-7914 TCGA-95-7043 TCGA-73-4658 TCGA-86-8076 TCGA-55-7726 TCGA-44-6147 TCGA-50-5932 TCGA-44-2661 TCGA-86-7954 TCGA-73-4662 TCGA-44-7671 TCGA-78-8660 TCGA-62-A46P TCGA-55-6978 TCGA-50-6592 TCGA-38-4625 TCGA-80-5611 TCGA-86-8054 TCGA-55-6986 TCGA-L9-A5IP TCGA-69-7764 TCGA-49-6744 TCGA-75-5125 TCGA-38-4626 TCGA-69-7763 TCGA-86-8279 TCGA-93-8067 TCGA-97-8179 TCGA-55-A48Y TCGA-86-8055 TCGA-91-6835 TCGA-55-6982 TCGA-55-A4DF TCGA-44-6774 TCGA-50-5066 TCGA-05-5423 TCGA-67-3774 TCGA-97-A4M2 TCGA-95-7567 TCGA-49-AAR0 TCGA-44-2656 TCGA-53-7813 TCGA-O1-A52J TCGA-35-4122 TCGA-55-8092 TCGA-49-6761 TCGA-49-4507 TCGA-55-7816 TCGA-78-7145 TCGA-55-6983 TCGA-53-7624 TCGA-97-A4M3 TCGA-50-5068 TCGA-78-8648 TCGA-44-6778 TCGA-80-5608 TCGA-86-8281 TCGA-MP-A4T6 TCGA-55-8085 TCGA-62-8399 TCGA-97-A4M5 TCGA-97-7547 TCGA-05-5429 TCGA-55-7994 TCGA-55-8094 TCGA-05-4425 TCGA-44-4112 TCGA-49-6767 TCGA-49-4490 TCGA-MP-A4T9 TCGA-50-5942 TCGA-MP-A4SV TCGA-49-AAR4 TCGA-05-4397 TCGA-44-A47A TCGA-86-8359 TCGA-78-7539 TCGA-MP-A4T8 TCGA-99-8032 TCGA-50-6595 TCGA-55-6968 TCGA-44-8120 TCGA-55-8302 TCGA-99-8025 TCGA-64-1679 TCGA-95-8039 TCGA-44-A479 TCGA-44-6148 TCGA-NJ-A55O TCGA-MP-A5C7 TCGA-64-5778 TCGA-55-6971 TCGA-49-AARN TCGA-44-A47B TCGA-55-5899 TCGA-49-AAQV TCGA-99-8028 TCGA-75-6205 TCGA-97-8552 TCGA-50-8459 TCGA-05-5425 TCGA-78-7150 TCGA-86-A4P7 TCGA-49-4512 TCGA-55-8206 TCGA-55-8614 TCGA-64-5815 TCGA-L9-A50W TCGA-73-4675 TCGA-55-7995 TCGA-05-4433 TCGA-55-7727 TCGA-44-2668 TCGA-44-A4SU TCGA-55-7907 TCGA-69-7765 TCGA-49-4487 TCGA-44-2662 TCGA-67-6216 TCGA-55-7283 TCGA-86-8280 TCGA-91-6840 TCGA-78-7154 TCGA-49-4488 TCGA-93-7348 TCGA-62-A470 TCGA-78-7147 TCGA-50-5936 TCGA-55-6984 TCGA-50-5941 TCGA-69-7978 TCGA-78-7220 TCGA-55-8616 TCGA-44-A4SS TCGA-55-7570 TCGA-78-7146 TCGA-44-3398 TCGA-05-5420 TCGA-50-5072 TCGA-05-4396 TCGA-05-4405 TCGA-50-5935 TCGA-38-4629 TCGA-55-8619 TCGA-05-4410 TCGA-73-4676 TCGA-97-8172 TCGA-44-7661 TCGA-05-4384 TCGA-44-2655 TCGA-80-5607 TCGA-67-3770 TCGA-91-6836 TCGA-95-7562 TCGA-55-8511 TCGA-44-6776 TCGA-95-7948 TCGA-91-7771 TCGA-50-5944 TCGA-MN-A4N5 TCGA-73-4677 TCGA-78-7540 TCGA-91-6829 TCGA-78-8640 TCGA-62-8398 TCGA-55-8512 TCGA-83-5908 TCGA-55-6987 TCGA-93-A4JP TCGA-73-A9RS TCGA-L4-A4E5 TCGA-86-8074 TCGA-86-8358 TCGA-78-7158 TCGA-91-8497 TCGA-49-AARO TCGA-78-7159 TCGA-55-7227 TCGA-86-7714 TCGA-L9-A7SV TCGA-78-7143 TCGA-91-8499 TCGA-49-AAR3 TCGA-55-8620 TCGA-69-8255 TCGA-75-6207 TCGA-62-A46Y TCGA-NJ-A4YF TCGA-91-6830 TCGA-62-8395 TCGA-49-4486 TCGA-44-6145 TCGA-86-A4P8 TCGA-78-7537 TCGA-44-3919 TCGA-35-4123 TCGA-62-8394 TCGA-69-7761 TCGA-62-A46U TCGA-97-8547 TCGA-97-7554 TCGA-50-6673 TCGA-95-7039 TCGA-95-7944 TCGA-55-8301 TCGA-78-7152 TCGA-05-4390 TCGA-44-7659 TCGA-97-7941 TCGA-49-4514 TCGA-55-A490 TCGA-55-8508 TCGA-MP-A4TE TCGA-97-A4M1 TCGA-75-6206 TCGA-86-8671 TCGA-78-7160 TCGA-64-1681 TCGA-49-4494 TCGA-50-5946 TCGA-55-7913 TCGA-44-6779 TCGA-49-AARE TCGA-05-4403 TCGA-99-AA5R TCGA-73-4659 TCGA-50-8457 TCGA-75-5147 TCGA-55-8507 TCGA-78-7536 TCGA-95-A4VK TCGA-38-4627 TCGA-67-6215 TCGA-69-7973 TCGA-05-5715 TCGA-75-7030 TCGA-44-2666 TCGA-62-A472 TCGA-55-6985 TCGA-J2-A4AG TCGA-97-A4LX TCGA-55-6543 TCGA-97-7938 TCGA-35-3615 TCGA-44-2657 TCGA-55-8505 TCGA-97-8175 TCGA-49-4510 TCGA-MP-A4TK TCGA-50-5044 TCGA-49-AAR2 TCGA-05-4250 TCGA-97-7937 TCGA-64-5774 TCGA-86-8674 TCGA-50-6590 TCGA-64-5779 TCGA-75-5146 TCGA-44-6144 TCGA-78-8662 TCGA-55-7911 TCGA-86-8585 TCGA-38-A44F TCGA-64-1680 TCGA-97-A4M6 TCGA-75-6214 TCGA-55-6980 TCGA-97-7553 TCGA-55-A48X TCGA-64-1677 TCGA-73-7498 TCGA-38-6178 TCGA-44-7670 TCGA-62-A471 TCGA-49-4505 TCGA-NJ-A7XG TCGA-55-6981 TCGA-91-6848 TCGA-55-8090 TCGA-55-7725 TCGA-55-8207 TCGA-44-6146 TCGA-05-4434 TCGA-55-6979 TCGA-05-4427 TCGA-55-8615 TCGA-50-5939 TCGA-05-4418 TCGA-67-6217 TCGA-49-6745 TCGA-55-1595 TCGA-49-6742 TCGA-05-4402 TCGA-05-4382 TCGA-55-7576 TCGA-67-3773 TCGA-78-7633 TCGA-50-6597 TCGA-44-2659 TCGA-95-7947 TCGA-55-7724 TCGA-J2-A4AD TCGA-55-8091 TCGA-55-1592 TCGA-73-4670 TCGA-55-1594 TCGA-55-8621 TCGA-50-5051 TCGA-49-4501 TCGA-J2-8194 TCGA-44-8119 TCGA-55-8203 TCGA-97-8177 TCGA-55-7573 TCGA-55-8089 TCGA-38-4630 TCGA-78-7166 TCGA-38-4631 TCGA-55-1596 TCGA-91-A4BD TCGA-67-3771 TCGA-J2-8192 TCGA-55-A48Z TCGA-97-8176 TCGA-86-6851 TCGA-50-5931 TCGA-NJ-A4YI TCGA-97-7552 TCGA-MP-A4T4 TCGA-L4-A4E6 TCGA-44-5643 TCGA-MP-A4TD TCGA-05-4244 TCGA-50-5930 TCGA-44-6777 TCGA-05-4430 TCGA-05-4426 TCGA-44-6775 TCGA-05-4420 TCGA-55-8506 TCGA-44-7672 TCGA-62-8402 TCGA-86-8056 TCGA-05-4422 TCGA-75-7027 TCGA-78-7535 TCGA-50-8460 TCGA-75-7031 TCGA-L9-A444 TCGA-73-4666 TCGA-86-7953 TCGA-86-A4D0 TCGA-NJ-A4YQ TCGA-91-8496 TCGA-67-3772 TCGA-55-7281 TCGA-05-4424 TCGA-69-A59K TCGA-75-7025 TCGA-55-8514 TCGA-95-8494 TCGA-05-4395 TCGA-93-A4JQ TCGA-44-8117 TCGA-55-8204 TCGA-50-5933 TCGA-MN-A4N1 TCGA-55-7903 TCGA-86-8669 TCGA-55-6970 TCGA-50-6594 TCGA-86-8075 TCGA-MP-A4TF TCGA-69-7760 TCGA-78-7161 TCGA-91-6849 TCGA-99-8033 TCGA-55-8205 TCGA-55-8510 TCGA-91-6828 TCGA-50-5049 TCGA-99-7458 TCGA-49-AARR TCGA-MP-A4TC TCGA-49-AARQ TCGA-93-A4JN TCGA-95-A4VP TCGA-69-8453 TCGA-55-8513 TCGA-55-6975 TCGA-86-8278 TCGA-75-6203 TCGA-49-6743 TCGA-55-7574 TCGA-78-8655 TCGA-05-4389 TCGA-86-7701 TCGA-91-6831 TCGA-55-7728 TCGA-97-A4M0 TCGA-50-6593 TCGA-86-6562 TCGA-62-A46S TCGA-86-A4JF TCGA-J2-A4AE TCGA-55-8087 TCGA-78-7542 TCGA-78-7148 TCGA-55-A493 TCGA-91-A4BC TCGA-05-4432 TCGA-55-6712 TCGA-4B-A93V TCGA-93-7347 TCGA-86-8668 TCGA-MP-A4TI TCGA-69-8254 TCGA-64-1676 TCGA-62-A46V TCGA-78-7167 TCGA-55-7284 TCGA-78-7162 TCGA-75-6212 TCGA-97-7546 TCGA-44-5644 TCGA-55-8299 TCGA-75-6211 TCGA-MP-A4SW TCGA-78-7149 TCGA-S2-AA1A TCGA-95-A4VN TCGA-55-8096 TCGA-05-4398 TCGA-97-8171 TCGA-55-A492 TCGA-71-8520 TCGA-44-3396 TCGA-55-A57B TCGA-L9-A443 TCGA-55-A4DG TCGA-67-4679 TCGA-64-5781 TCGA-93-A4JO TCGA-NJ-A4YP TCGA-69-7974 TCGA-MP-A4T7 TCGA-55-6642 TCGA-49-AAR9 TCGA-69-8253 TCGA-05-4249 TCGA-44-7669 TCGA-71-6725 TCGA-73-7499 TCGA-MP-A4TH TCGA-55-8208 TCGA-78-7156 TCGA-75-5122 TCGA-50-7109 TCGA-NJ-A55R TCGA-53-A4EZ TCGA-NJ-A4YG TCGA-86-A456 TCGA-38-4632 TCGA-MP-A4TJ TCGA-97-8174 TCGA-MP-A4SY TCGA-62-8397 TCGA-L9-A8F4 TCGA-75-5126 TCGA-MP-A4TA TCGA-86-7711 TCGA-50-5045 TCGA-05-4417 TCGA-44-7660 TCGA-69-7979 TCGA-55-A491 TCGA-L9-A743 TCGA-55-A494 TCGA-44-7662 TCGA-55-7910 TCGA-69-7980 TCGA-55-8097 TCGA-73-4668 TCGA-NJ-A55A TCGA-86-8672 TCGA-86-8673 TCGA-78-7153 TCGA-97-A4M7 TCGA-62-A46R TCGA-50-5055 TCGA-38-4628 TCGA-86-7713 TCGA-86-8073 TCGA-MN-A4N4 TCGA-53-7626 TCGA-44-A47G TCGA-55-6969
GENES
TSPAN6 11.3668 11.5434 11.4110 12.2149 11.2882 11.2084 11.1717 12.1281 11.9908 12.2601 11.6963 11.4364 10.8184 11.5513 11.2016 11.5201 12.3420 11.9378 12.6268 12.3495 10.0858 10.1934 11.4793 11.8151 10.9083 10.9789 12.3310 11.0959 12.2844 12.2692 12.4793 11.6572 10.4046 11.7654 11.5185 12.6804 12.6905 11.6912 10.3112 10.6976 10.3748 11.7572 11.9412 11.4387 13.5286 11.3330 10.4583 10.6897 9.8078 12.0396 10.7139 10.9416 10.6148 12.3594 10.2641 10.3341 12.1122 12.6116 11.6780 11.3810 11.2908 11.6346 12.7436 12.0022 11.3310 12.1321 10.9049 10.8718 13.6990 11.2008 12.5260 11.3536 10.9310 11.1425 11.6935 10.7492 11.5179 11.4193 12.0642 10.9989 10.1955 9.8547 12.1975 11.8144 11.3360 11.8734 11.6592 10.9656 11.3940 10.0536 11.2797 12.5194 11.1115 11.3718 11.0992 10.5568 12.4242 10.8483 10.2531 12.1970 11.0435 11.5506 11.8431 10.0167 11.1656 11.6305 12.1896 11.0415 11.2189 11.8492 11.3662 10.4561 11.6218 10.8172 12.8664 11.5225 11.3115 11.5578 10.2480 12.5978 11.3813 12.2305 12.8613 12.2530 11.6121 11.4228 10.9524 12.5578 10.0787 10.5769 12.1344 11.0673 11.5503 11.5436 11.9898 11.0150 11.7263 12.6511 12.7900 11.3529 10.8869 11.7057 10.8965 12.3545 10.2583 11.3722 10.6833 10.0881 10.4599 10.7081 12.0397 11.7018 12.8565 10.2880 12.7978 11.4706 10.8476 11.0805 11.9734 12.0851 12.1795 11.5371 10.6095 11.0963 10.3040 11.8762 11.8054 11.2086 11.3926 11.8264 10.1947 10.4312 12.0766 9.8166 11.1579 9.9593 11.9509 12.1032 11.3955 12.1417 10.2608 11.8510 14.0084 10.9162 10.3611 11.0810 11.3337 12.2993 10.7740 11.7357 10.5070 11.8349 10.1911 9.9479 11.2712 12.0021 12.6501 12.4788 10.4014 9.9802 11.2009 11.4114 11.7631 11.2826 10.2483 10.8454 11.3710 11.8123 10.4946 12.8853 12.0419 9.7825 11.3508 12.2113 11.2109 10.7897 10.9245 11.1760 12.6706 12.6263 11.1184 12.0847 12.0260 12.1872 11.1890 10.5380 11.1885 10.6572 13.0727 11.4913 11.3143 11.0925 13.0532 10.3230 11.2059 12.4663 11.5834 10.9066 10.7345 12.4693 11.5575 10.9213 12.4820 11.2840 11.5141 10.0187 10.8694 12.1195 11.9295 11.0260 10.9569 11.7140 11.9341 11.8186 12.2848 11.6558 9.3353 11.0606 12.0642 9.8124 11.7632 10.2884 12.5910 11.3659 12.7780 12.6628 11.5601 10.2000 12.0653 11.7465 12.3923 11.4238 11.8239 11.9585 12.2011 12.1427 12.2740 10.5354 10.6766 11.2521 10.8593 11.5535 10.6908 10.5088 12.1103 10.6678 11.3580 11.2907 12.4034 11.2444 11.5573 10.6814 11.1324 12.5396 10.8427 11.0895 10.6759 11.6387 11.7932 10.2316 11.7782 11.6473 11.4396 11.4026 12.0780 11.4128 13.0523 10.6577 11.9695 12.1888 12.0151 11.7835 11.2857 11.6833 11.8953 11.2430 12.4815 11.4569 12.1892 12.9238 11.3341 10.4122 12.3289 12.5716 11.8607 10.6942 9.9505 9.8565 11.1532 11.0083 9.7749 11.8172 11.5265 11.7539 9.6730 11.0506 12.0793 12.3549 11.7789 12.2414 11.0330 11.7827 12.6700 10.3898 11.4081 11.9690 11.8487 11.3940 11.7816 10.4786 11.3588 11.5056 11.5659 10.9077 12.8481 13.2597 12.1043 11.8132 10.1705 12.6113 12.7393 10.8212 11.0022 9.9679 10.4334 12.5928 10.5347 11.5644 11.3865 11.6430 11.5043 12.6001 12.6972 11.6720 13.9840 12.3083 11.9275 10.5632 10.5139 11.6059 11.0129 10.9546 11.0942 11.9468 11.2219 11.4285 11.4685 11.9371 10.7517 11.0095 9.9635 12.0445 11.4242 10.2734 10.5699 12.1478 12.6972 11.1324 9.7751 10.5446 9.7483 12.8170 11.8853 12.4547 10.2181 11.4485 11.1449 10.6456 11.8871 10.8452 10.8434 11.1721 11.5949 12.3783 10.1246 11.0102 10.7886 12.4142 13.2311 11.3050 11.1573 10.8217 11.2405 12.1907 11.6480 10.7273 10.8609 12.3249 11.5895 11.3989 12.5339 10.8605 12.1548 11.3018 11.1675 11.8632 11.6934 11.9824 11.7301 11.1696 9.6553 10.9355 11.4443 11.6764 10.8236 10.9707 10.3842 11.2515 11.6288 10.6570 11.5372 11.0214 11.6107 12.2799 9.9885 10.8204 11.5185 11.2179 11.4793 10.5852 11.8688 10.9679 11.1557 12.5560 11.0278 11.2249 11.7138 12.0610 11.0386 11.7022 10.7661 11.5559 12.3662 11.6695 12.3629 10.0493 12.6781 11.1947 10.2288 11.1006 10.7090 11.5433 10.6578 12.2400 12.2886 11.0114 11.1331 12.7791 11.4336 11.4533 11.8079 11.3954 12.2414 10.7676 11.6560 10.8912 11.3200 10.5764
DPM1 10.3673 10.5282 11.2018 10.3249 10.2095 11.6034 10.5684 10.5895 10.8699 10.8979 10.2597 10.2811 10.5428 11.4632 10.4191 11.1089 11.7516 11.0717 11.3598 10.5437 10.1693 10.4140 10.5354 10.7697 11.1321 10.9468 11.0926 10.7382 10.8152 10.3957 10.4709 10.1559 9.9805 11.4481 10.4410 12.3449 12.0215 9.8339 9.8155 11.1057 10.6229 10.3072 10.9156 10.6567 11.9269 11.2991 10.1423 10.3601 10.1592 10.7380 10.4286 10.4356 10.3590 12.4895 10.1547 10.5626 10.0477 10.5382 9.9913 10.2894 10.8054 10.1605 10.0190 11.5081 10.0984 11.3601 10.6430 10.6521 11.1387 11.3382 10.2255 10.0836 10.4917 10.0090 11.9545 10.0906 11.3592 9.9883 10.3982 10.5526 10.2111 10.2224 10.3080 10.4408 10.1729 10.0785 10.3417 11.0045 9.9599 10.3394 10.2760 11.3405 10.1568 10.4120 10.2278 10.8406 10.5518 10.6743 10.1342 10.6351 10.0925 11.4544 10.6302 10.0922 10.3143 10.0360 10.5034 10.0045 10.0630 9.8141 10.3974 10.2130 10.9346 10.9549 10.2702 10.4995 10.5450 11.4992 10.0573 9.6467 10.1493 10.0933 11.0881 12.0558 11.0665 10.3363 10.5444 10.3785 10.1670 10.3885 10.8217 10.4592 11.4573 10.3285 10.6120 11.0075 10.4944 11.1837 11.7792 10.1174 10.3780 9.6809 10.1563 11.0345 9.9656 9.6733 10.8477 9.6414 10.4871 10.1464 10.5615 10.7224 10.3989 10.2802 10.9204 10.2679 10.4497 10.7129 11.1628 10.4072 11.0103 10.5530 10.7263 10.4121 10.6011 10.4475 10.4723 11.0430 10.3518 10.2918 11.4870 10.6665 10.6233 10.2224 9.8892 9.8752 10.2606 10.4278 10.1204 10.5046 10.5811 9.9467 11.3648 10.6513 10.7957 11.3752 10.8785 10.6078 12.4100 10.6967 10.1885 11.0354 10.5423 10.0410 9.7252 10.0059 11.3611 10.4094 10.7109 10.2990 9.6637 10.2146 10.3319 10.9650 11.0105 10.4496 9.5375 11.0195 10.4263 10.0014 11.7075 9.7232 10.4508 10.6472 10.4813 10.2382 9.8675 9.7808 10.7801 10.9638 10.7165 10.8844 9.6931 10.1148 10.5937 10.0443 10.1051 9.8038 10.9006 10.7497 11.1005 9.6334 10.6470 9.6086 10.4662 9.7965 9.6259 10.6728 10.4062 10.4734 10.2548 10.3992 10.7673 10.5584 10.6872 10.1128 10.5001 10.7337 10.6823 10.3774 10.5507 9.5723 11.0563 11.3820 10.7984 10.2953 10.4317 10.9132 10.7706 10.3170 9.8159 10.7112 10.5911 10.5216 11.0934 10.1872 11.4650 10.0845 10.6251 10.2238 11.8974 10.0741 10.2667 11.4990 10.2143 10.7719 10.4364 9.9946 10.6327 9.9117 10.7585 10.6781 10.8452 10.2105 10.5266 10.7068 9.5639 10.3362 10.1308 10.2051 10.3487 10.4209 10.3560 9.9935 10.3846 10.2926 10.5870 10.2013 10.1682 10.0883 11.1945 10.6828 10.6559 10.3926 10.7314 11.3593 12.3765 10.0253 9.8573 10.7042 9.9889 10.8183 10.5255 10.8122 9.9752 10.8340 10.7147 10.3240 11.6552 11.4148 11.1563 11.1943 10.2782 10.4931 10.3662 10.2788 10.6498 10.3249 10.2040 10.5631 10.0752 11.5878 10.1247 9.9811 10.3253 9.9141 10.6591 10.3958 10.5550 11.0409 10.5295 10.5735 11.0035 10.3892 9.4086 10.6769 10.6982 10.5302 10.5096 10.2051 10.4566 9.7456 10.7550 10.3593 10.7888 11.0977 10.4838 10.1326 10.8935 10.1556 10.2567 10.6765 11.0303 10.0184 11.2173 11.0190 10.6367 10.4657 10.7996 10.3412 10.5155 10.9056 10.4717 11.0922 10.7262 10.4186 10.4778 10.8818 10.6244 9.9601 10.3842 10.8874 9.8919 10.2086 10.3251 10.6865 10.7666 10.0151 10.1534 9.8252 10.3318 10.4129 10.1555 10.6197 10.1391 10.2610 10.6608 10.4759 10.8768 9.4212 10.1342 9.6859 10.7873 9.9960 10.3806 10.5536 9.9261 11.0626 10.3189 10.3556 10.8261 10.7788 10.4143 10.6831 9.6615 9.8619 9.6148 10.3166 12.1833 10.8128 10.2940 10.3546 10.1513 9.9935 10.6634 10.6151 10.3034 11.0388 9.6084 10.1049 10.3442 10.0398 10.2584 10.6060 9.9462 10.6935 10.1466 10.9851 9.7722 10.2135 10.4614 10.0556 10.9664 10.8430 10.4458 10.4976 10.7279 10.0624 11.0496 11.2922 10.4160 10.8245 10.8518 11.4070 9.4562 10.7687 10.2546 10.4619 11.0633 10.1659 10.0678 10.2079 10.5422 11.4943 10.3799 9.8810 10.8335 10.4836 10.4381 11.1644 10.8843 10.8769 10.9079 9.8726 10.8641 10.8005 10.4343 10.3455 9.4051 10.9179 11.6558 10.2703 9.5834 11.0609 10.2996 10.9338 10.2698 10.3567 9.8932 10.7229 10.7633 11.0195 10.5073 10.5503 11.0739 10.1742 9.9137 10.9022
SCYL3 9.7884 9.7292 9.4449 9.2340 9.8186 9.0884 10.2428 10.1989 9.8516 9.7973 10.4597 9.5387 9.1616 9.6713 9.1914 9.1989 9.0392 9.7859 9.5704 9.3099 9.9071 10.2033 9.4620 8.8882 8.9993 9.2914 10.1648 9.6722 9.8902 9.5831 9.6185 9.3662 9.4164 9.3445 9.0884 9.9466 10.5563 9.5650 9.5591 9.8518 9.2797 9.9954 10.1827 9.6889 9.2378 10.1650 9.2111 8.3509 9.3441 9.2187 9.2483 9.7711 9.4156 10.4581 8.9691 9.2670 9.5723 10.6370 10.3945 10.0023 9.4692 9.3656 10.1750 10.6553 9.5655 9.6904 9.1352 10.5748 9.0598 9.3388 9.0138 10.1941 9.7808 9.2933 9.5845 10.2226 9.7462 9.5927 9.3558 9.2934 9.5384 9.4034 9.1338 9.0750 9.5706 8.9542 9.3475 9.1997 9.3012 10.0962 10.6254 9.6891 9.6099 9.7300 9.3831 8.8079 9.7910 8.8135 9.0254 9.3585 8.9728 9.6625 9.4022 9.1939 8.8231 10.0313 9.0316 8.7873 9.5787 9.4269 9.5178 10.0869 9.9300 9.7401 9.8480 9.3712 9.7986 9.4122 8.5703 9.3572 9.4105 9.8800 9.4892 9.5534 9.9758 9.7279 9.8356 9.5767 9.4541 10.2115 9.6805 8.8872 9.8853 9.4723 9.6099 9.4472 8.9678 9.5603 10.3938 10.0234 9.7362 9.9193 9.1959 8.6483 9.2934 9.9753 8.8689 10.2857 9.1127 10.0076 10.2159 8.6584 9.8026 9.4232 10.0368 9.4994 9.1809 9.6941 9.4753 9.7173 9.6417 9.9378 9.5271 9.2557 10.0794 9.6618 9.4519 9.9135 9.8384 9.5287 9.9231 9.5156 9.7516 9.2277 11.8243 9.4455 9.2924 9.3730 9.2214 9.6726 9.1222 9.8190 9.1053 9.3210 7.9588 9.3426 9.7862 9.6415 8.9893 9.3399 9.3363 10.2430 8.8740 9.4316 9.7870 9.2739 9.8320 9.0703 10.4094 9.2132 8.9828 9.2478 9.0566 9.0567 9.1049 10.0076 9.5664 9.2488 10.0838 9.7055 8.8724 9.1715 9.8818 9.5381 9.6877 9.9796 9.7347 9.7612 9.4238 9.0414 9.6469 10.0605 9.5776 9.2431 9.6940 9.5104 8.7772 9.6834 9.7049 9.5464 10.1800 10.1731 8.9434 10.1129 9.5120 9.9568 9.3915 9.6556 9.6057 9.2214 9.3705 9.4333 9.4012 9.7564 9.8985 9.3117 9.2219 9.1313 9.1250 9.3155 8.5723 8.9149 8.8896 10.1626 9.3895 10.0364 8.5380 9.5676 10.0258 9.3249 9.4584 9.4167 9.1765 9.3333 9.9249 10.2258 9.3517 9.3738 9.4879 9.8991 9.2620 10.0403 9.3082 8.6391 9.4582 9.2066 9.6627 8.9497 8.4621 8.9667 10.2945 9.5630 10.3852 9.7173 9.4638 8.7810 9.3086 9.1673 9.4087 9.7618 8.9834 9.8248 9.1231 9.4784 8.9424 10.0797 9.7576 10.4031 9.3747 9.6217 10.1896 9.4505 9.9546 9.1939 9.7137 9.3683 9.1750 9.1421 9.4496 9.7904 9.9654 9.4372 9.9617 9.2584 9.9010 9.5597 9.9597 9.9692 9.3522 9.7765 8.7170 9.6048 9.5941 9.7829 9.4945 9.8494 8.8990 9.6106 10.0654 9.4878 9.3299 8.8287 9.3280 9.8321 9.3744 9.2514 9.0631 9.2725 9.5093 9.1426 9.4783 9.1251 9.4141 8.7055 10.5948 9.4177 9.2547 9.1221 9.8962 9.6861 9.6418 9.2168 9.5201 10.1573 11.0186 9.9912 8.6901 9.4823 9.8922 9.7491 9.9393 8.9300 9.7732 9.4625 10.4722 9.2756 8.5539 9.4335 9.9991 9.6167 9.9887 9.3647 9.5085 10.1092 9.5667 9.4444 9.7623 10.0350 8.7668 9.2513 8.9995 9.6443 9.7327 9.2836 9.1662 9.4191 9.7447 9.1672 9.4019 9.4430 9.4113 9.4753 9.4045 9.4526 9.5222 10.1365 9.6688 9.3138 9.0650 9.9093 10.1098 9.0354 10.1722 9.8626 9.6334 9.4093 10.5626 9.3962 9.6598 8.8236 8.8226 9.9742 9.4260 9.4048 9.1882 9.2809 9.3073 10.7647 9.6210 9.8062 10.4229 10.1405 9.7423 9.3376 9.6765 9.5753 9.3374 9.5080 9.3468 9.6630 9.9180 9.6452 9.6021 9.1445 10.7054 10.3428 9.5334 8.8020 9.4802 9.6960 10.3752 10.3503 9.6493 9.6399 9.8303 9.2408 9.2355 9.4554 9.8303 9.4898 10.1210 9.4890 9.5164 9.9131 10.0144 9.3105 10.5826 8.8982 8.9016 9.7922 9.1497 9.8255 9.0385 9.5056 9.3626 10.0143 9.2308 9.8938 9.1274 8.7230 9.3022 9.4158 9.0778 9.3721 9.6511 9.2463 9.7447 9.0294 9.7439 9.6689 9.4864 9.4727 9.5110 8.8101 9.7125 8.7629 8.2262 9.4804 9.2713 9.7087 9.3311 9.7850 10.4121 9.0277 9.5578 9.6307 9.3312 9.3425
FIRRM 8.2552 7.9951 8.3546 7.7537 7.9844 8.7984 8.8721 8.3550 8.1877 9.2347 9.8346 7.8180 9.0867 7.4249 7.2293 9.1192 10.0768 9.4085 10.0984 7.4506 9.3028 8.9049 8.0116 8.5089 7.1778 7.2466 9.0061 8.9549 9.0305 8.8959 8.5437 7.0665 7.0667 9.0817 8.4102 9.6505 9.6165 8.0169 7.4297 9.2275 8.1630 8.6609 8.6540 8.5377 10.2647 9.1622 6.8973 7.7967 7.6247 9.1262 6.8227 9.6407 7.7675 8.8511 7.9248 7.1463 8.5977 8.7141 8.4354 9.3176 9.1287 7.7488 7.4630 9.1531 9.0223 8.8943 8.0016 8.9260 9.3683 7.3502 7.9927 7.5116 9.1125 8.6168 10.1938 8.4295 8.3625 8.2439 8.6173 8.4754 6.9050 7.3625 7.4183 8.6270 8.2785 8.6444 8.0100 8.3573 7.2562 8.2185 8.4154 9.1849 7.8837 8.2931 8.8700 8.8411 8.7850 7.2378 8.3485 7.1572 6.9420 9.2182 9.6594 7.7147 7.1512 7.6799 8.5778 7.9356 7.2182 7.3954 8.9182 8.2565 8.4605 9.2185 8.1700 8.8033 8.3172 9.7413 6.4640 8.4542 8.4359 8.2728 9.2869 9.2661 8.2354 8.4339 8.5297 8.7293 6.9337 8.1152 8.3272 8.0495 9.6303 8.3736 9.0144 9.8250 9.5564 8.4491 9.3827 9.7339 7.6906 8.7026 6.5987 8.8552 7.3927 8.3048 7.3380 7.7705 8.6787 7.9360 8.1440 8.2584 7.6809 6.7938 10.2544 8.6699 6.8795 7.6993 8.0260 7.6870 9.1015 7.9151 7.7997 8.4602 9.3955 9.3654 7.1228 10.1851 8.9583 8.6005 9.1412 8.2863 9.0798 9.8016 8.4744 7.2335 8.1654 9.1363 7.8797 8.3459 8.1668 8.8832 9.6468 9.1314 8.0587 8.6297 8.7160 7.9313 8.2261 8.4090 7.5129 8.0114 6.7926 7.5435 7.5338 8.4134 9.6792 9.4323 9.0489 8.3288 7.7048 8.5637 8.3277 8.5330 8.9807 8.9977 8.2473 9.6170 7.6683 7.3489 7.9927 7.1645 8.9068 8.3126 7.8546 7.8164 7.7755 9.0864 8.3691 7.6968 9.3912 9.6854 9.1414 8.3699 7.2528 7.3840 8.3109 7.9273 8.9461 9.0077 9.9350 8.1543 8.1164 8.5358 9.3662 8.7857 7.2239 8.3636 7.7923 8.2715 8.4018 8.0629 7.7019 8.0887 8.0258 7.4817 8.4937 8.3959 7.6639 8.4993 8.4815 7.8020 9.0679 9.0500 9.1413 8.1932 8.8357 8.7674 8.1115 7.1208 8.8311 8.8040 8.6506 7.1148 7.9377 8.2413 8.8884 7.3040 8.5518 8.7430 8.8249 7.8675 7.9440 8.9658 9.2448 6.9924 7.9411 7.4517 8.9327 7.2081 8.2532 8.2563 7.7443 8.7259 9.3832 8.8253 8.0031 8.2955 8.2765 7.8229 8.6316 8.5860 6.9472 8.3785 8.4556 9.1862 7.5927 8.6970 5.8352 7.6451 9.2664 8.0690 8.7478 7.4938 8.7066 9.4710 9.3642 7.7739 8.0841 8.1780 7.4742 8.6408 8.6898 7.7883 8.4558 9.1530 10.1528 8.5159 9.8086 9.7207 7.6168 8.8016 7.8785 8.5502 8.8609 8.8188 6.1655 8.1194 7.5525 8.4967 7.3293 8.9952 8.4818 9.1144 7.1372 8.1151 8.3783 7.9145 8.7581 8.7234 8.6154 8.4363 8.5036 6.9473 8.7350 9.5664 7.9311 7.4117 8.5619 7.7587 9.8614 9.5839 9.6539 8.6016 8.4839 8.4853 7.8965 8.7199 9.0767 7.6906 7.8826 8.7882 8.3477 8.9140 9.6005 8.8265 6.7687 8.6109 9.0711 8.1448 9.3025 9.5287 9.0400 9.1511 8.6888 7.5787 7.6862 9.4371 8.6166 8.6654 7.1258 8.8318 7.7291 7.0919 8.4898 8.6201 9.6507 8.3449 7.9233 7.3756 7.0956 8.0906 7.5837 9.4268 8.7405 8.4197 9.1549 8.5749 6.7188 7.2544 8.5006 8.2515 9.3439 7.7788 9.3662 7.7287 7.9233 9.7096 8.2986 8.7221 8.4881 9.5706 8.3269 9.1220 7.3841 7.6842 8.9006 8.1361 8.7213 8.9814 8.3672 7.7418 7.4675 8.0250 8.5535 8.8129 8.5775 9.0771 7.9622 7.5304 7.6512 8.9814 8.5059 9.0425 8.6382 7.6665 9.1114 8.6390 7.8171 8.0749 7.9138 7.9378 8.5424 8.6728 8.9180 8.5680 8.4723 8.2765 9.7289 7.1520 8.3952 9.6248 7.8581 9.5527 7.6923 9.2327 7.5736 8.4610 7.8178 7.9795 8.2111 8.0822 7.2275 9.7294 8.3773 7.8637 8.7273 7.0051 8.8181 8.4091 9.6044 9.5028 7.9725 7.9351 8.9627 9.6060 9.2114 8.3101 8.3419 10.1380 9.0545 8.7019 7.3637 9.0045 7.8968 8.2508 8.2384 8.0894 8.1065 8.5633 7.7298 9.3048 10.0026 8.2811 8.7883 8.0179 7.8703 8.8147
FGR 10.6173 8.4858 7.3211 11.0656 10.5213 9.3386 9.3440 8.7403 10.6243 9.8945 10.1067 8.2663 10.5393 9.3087 11.7055 10.1633 9.9218 10.7935 8.6680 9.3646 8.4917 8.3683 10.5502 10.4143 13.7653 9.8602 8.9363 9.0198 9.2149 9.1690 9.9654 10.4230 11.6101 10.4511 9.6184 9.9466 10.1096 10.0554 12.5322 9.0607 11.0522 10.5198 8.5024 11.5016 10.8670 8.8416 12.2780 8.8244 11.6315 9.7211 11.8538 8.3346 8.5788 9.9279 12.0813 12.3180 9.6880 8.6507 10.0116 9.9325 9.8269 10.7108 9.0030 7.6588 10.6705 7.2463 10.0586 8.2823 9.8099 11.0707 10.4863 8.3067 9.7500 10.6279 9.5733 10.6769 9.8680 10.4574 8.1190 8.9752 11.6027 12.6021 9.4494 10.2858 8.6901 9.5814 9.9495 9.8711 11.3526 11.0880 7.6519 9.8892 10.6668 8.6482 9.6278 9.3992 8.9880 11.3194 10.8624 10.9816 11.6053 10.5979 7.9816 10.6734 10.0487 11.1478 8.7977 10.6032 9.0852 9.8649 10.7973 11.6060 8.7619 10.3262 8.5420 9.4730 9.5663 10.6520 12.2258 9.6665 9.4678 10.2449 9.3221 8.5334 9.9490 8.9464 10.0200 8.6182 12.3758 9.1607 10.6504 10.8448 8.2533 9.1937 10.5314 8.0932 9.0344 10.3383 11.5452 9.3794 7.9106 9.2890 11.5038 10.5689 11.5897 9.9777 12.7109 9.8607 10.6532 10.3516 9.3908 9.6290 9.9126 12.3441 9.3238 9.7591 11.6668 7.4797 10.3716 9.5435 9.3356 9.5930 10.9491 8.8594 10.3561 9.6110 10.3872 9.7308 11.2202 10.9032 8.9799 8.3602 9.2764 8.4090 8.3675 10.6002 10.9577 9.2030 10.3880 9.4056 8.3870 9.6441 9.7136 10.5115 9.5767 9.7196 9.4578 10.1899 9.4236 9.9364 8.9602 7.7962 12.3873 11.9105 8.7270 9.9545 9.8101 8.9173 10.3324 12.3838 9.6721 9.5631 9.7512 8.7838 10.0118 10.7761 9.7616 9.7293 9.5544 9.9237 9.0296 10.8014 10.3112 8.6518 10.3076 10.6595 11.3323 9.8601 9.7748 9.4944 7.5205 8.9309 9.4078 9.6848 11.1939 11.7875 10.2860 9.8802 9.7145 9.9715 9.3146 9.1964 9.8973 10.0729 8.3242 10.7904 9.7788 8.8759 9.5566 9.6591 9.9914 10.9376 10.5745 9.0358 8.9110 11.7532 8.7806 10.1075 9.3523 10.6877 9.4762 9.5429 10.0884 8.0550 8.0269 7.6216 10.2748 8.8910 9.2807 12.6989 8.5133 9.2839 11.4400 10.9657 8.1342 10.1091 8.2446 11.4480 11.8178 9.6027 9.3934 9.5944 9.3491 8.3665 8.7138 10.0874 7.3313 11.4358 10.6241 9.8658 10.1871 10.5421 7.4162 10.8045 10.7947 10.0003 9.5008 10.2772 9.7556 9.2886 10.2222 9.3398 11.4583 9.4793 11.1001 9.5743 9.4919 8.2160 9.1174 9.3648 9.3430 10.2621 8.1118 9.7819 10.5958 9.1164 9.0904 11.5633 8.7876 9.4450 9.5219 9.4730 9.6956 10.5655 9.3805 11.0442 8.2006 8.7170 9.3075 8.8891 10.7038 10.1909 10.0962 9.5766 9.6280 10.2669 12.1436 10.2015 10.4543 10.6049 12.8714 9.2953 9.3366 10.4120 13.2448 11.4904 10.0490 9.7272 10.4148 9.0093 9.4597 11.0705 10.3704 10.0069 8.9187 8.5024 11.1808 10.2892 10.0072 10.2733 10.1902 10.1827 8.2901 10.3673 11.2238 9.7679 11.1662 10.1624 10.4986 9.4949 9.8046 10.5421 8.6035 10.0874 8.8794 9.3127 12.1308 8.7331 9.3541 9.8539 9.6341 9.2426 9.6600 9.0266 7.6209 7.7846 11.4869 9.6619 11.0722 11.3274 11.8255 11.0363 9.7664 10.6127 9.8380 9.8804 9.4436 10.2768 12.4871 12.3841 11.3146 9.2367 12.1103 9.2876 10.3038 10.1554 10.1882 9.1459 11.8745 13.0418 10.0448 10.4839 9.4837 9.6713 10.2333 10.7622 9.1548 8.0891 8.7510 10.8129 9.4356 10.0938 9.8184 9.2556 10.0784 9.5976 11.5936 9.9069 10.2541 8.7086 7.8255 11.3981 9.5923 9.6872 9.5713 8.1365 10.1498 7.9491 10.4613 8.3216 9.9512 10.6102 10.5580 10.5036 8.2208 8.9027 8.7322 11.0152 9.7544 9.0746 8.9877 9.9202 8.5830 10.6752 8.9076 10.7197 10.5941 8.6956 6.9987 9.4047 9.6932 9.7027 9.7226 10.1130 10.0054 11.5320 8.2472 11.6150 9.4476 8.2822 9.3303 10.4898 10.7294 10.2548 10.8896 9.7065 9.3320 9.9604 10.3624 11.3919 9.2426 10.1716 11.0515 9.8812 9.2150 9.3057 10.0473 10.6724 7.2922 9.7134 9.0199 9.8077 9.4394 9.4059 9.3254 10.4796 9.4242 8.0340 10.4030 10.2213 10.3702 8.7169 8.1769 9.9471 8.9532 10.6328 11.3029 9.6115
tcga_gxp_meta.head(3)
patient race gender sample_type cigarettes_per_day Smoked sizeFactor replaceable
_row
TCGA-38-7271 TCGA-38-7271 white female Primary Tumor 1.3699 Smoker 0.5841 True
TCGA-55-7914 TCGA-55-7914 white female Primary Tumor 0.274 Smoker 0.9873 True
TCGA-95-7043 TCGA-95-7043 white female Primary Tumor 2.1918 Smoker 0.5439 True

Assign classes based on phenotypic attributes in metadata (e.g., smoking status)

# Create a dictionary to store the class assignments for each sample
classes = {}

# Iterate through the columns (samples)
for sample in tcga_gxp_df.loc[G_gxp_nodes_list].columns: # ".loc[G_gxp_nodes_list]" grabs rows of specified gene symbols
    
    # Assign class labels based on the smoking status from the metadata
    
    if tcga_gxp_meta.loc[sample, 'Smoked'] == 'Smoker':
        ### YOUR CODE HERE ###
        classes[sample] = 'smoker' 
    elif tcga_gxp_meta.loc[sample, 'Smoked'] == 'Never':
        ### YOUR CODE HERE ###
        classes[sample] = 'control'
    else:
        pass
classes
{'TCGA-38-7271': 'smoker',
 'TCGA-55-7914': 'smoker',
 'TCGA-95-7043': 'smoker',
 'TCGA-73-4658': 'smoker',
 'TCGA-86-8076': 'control',
 'TCGA-55-7726': 'smoker',
 'TCGA-44-6147': 'control',
 'TCGA-50-5932': 'control',
 'TCGA-44-2661': 'control',
 'TCGA-86-7954': 'smoker',
 'TCGA-73-4662': 'smoker',
 'TCGA-44-7671': 'control',
 'TCGA-78-8660': 'smoker',
 'TCGA-62-A46P': 'smoker',
 'TCGA-55-6978': 'control',
 'TCGA-50-6592': 'control',
 'TCGA-38-4625': 'smoker',
 'TCGA-80-5611': 'smoker',
 'TCGA-86-8054': 'smoker',
 'TCGA-55-6986': 'control',
 'TCGA-L9-A5IP': 'control',
 'TCGA-69-7764': 'smoker',
 'TCGA-49-6744': 'smoker',
 'TCGA-75-5125': 'smoker',
 'TCGA-38-4626': 'smoker',
 'TCGA-69-7763': 'smoker',
 'TCGA-86-8279': 'control',
 'TCGA-93-8067': 'smoker',
 'TCGA-97-8179': 'smoker',
 'TCGA-55-A48Y': 'smoker',
 'TCGA-86-8055': 'smoker',
 'TCGA-91-6835': 'smoker',
 'TCGA-55-6982': 'control',
 'TCGA-55-A4DF': 'smoker',
 'TCGA-44-6774': 'smoker',
 'TCGA-50-5066': 'control',
 'TCGA-05-5423': 'smoker',
 'TCGA-67-3774': 'control',
 'TCGA-97-A4M2': 'smoker',
 'TCGA-95-7567': 'control',
 'TCGA-49-AAR0': 'smoker',
 'TCGA-44-2656': 'smoker',
 'TCGA-53-7813': 'smoker',
 'TCGA-O1-A52J': 'smoker',
 'TCGA-35-4122': 'smoker',
 'TCGA-55-8092': 'smoker',
 'TCGA-49-6761': 'smoker',
 'TCGA-49-4507': 'smoker',
 'TCGA-55-7816': 'control',
 'TCGA-78-7145': 'smoker',
 'TCGA-55-6983': 'smoker',
 'TCGA-53-7624': 'smoker',
 'TCGA-97-A4M3': 'smoker',
 'TCGA-50-5068': 'control',
 'TCGA-78-8648': 'smoker',
 'TCGA-44-6778': 'smoker',
 'TCGA-80-5608': 'smoker',
 'TCGA-86-8281': 'smoker',
 'TCGA-MP-A4T6': 'smoker',
 'TCGA-55-8085': 'control',
 'TCGA-62-8399': 'smoker',
 'TCGA-97-A4M5': 'smoker',
 'TCGA-97-7547': 'smoker',
 'TCGA-05-5429': 'control',
 'TCGA-55-7994': 'smoker',
 'TCGA-55-8094': 'smoker',
 'TCGA-05-4425': 'smoker',
 'TCGA-44-4112': 'smoker',
 'TCGA-49-6767': 'smoker',
 'TCGA-49-4490': 'smoker',
 'TCGA-MP-A4T9': 'smoker',
 'TCGA-50-5942': 'smoker',
 'TCGA-MP-A4SV': 'smoker',
 'TCGA-49-AAR4': 'smoker',
 'TCGA-05-4397': 'smoker',
 'TCGA-44-A47A': 'smoker',
 'TCGA-86-8359': 'smoker',
 'TCGA-78-7539': 'smoker',
 'TCGA-MP-A4T8': 'smoker',
 'TCGA-99-8032': 'smoker',
 'TCGA-50-6595': 'control',
 'TCGA-55-6968': 'control',
 'TCGA-44-8120': 'smoker',
 'TCGA-55-8302': 'smoker',
 'TCGA-99-8025': 'smoker',
 'TCGA-64-1679': 'smoker',
 'TCGA-95-8039': 'control',
 'TCGA-44-A479': 'smoker',
 'TCGA-44-6148': 'smoker',
 'TCGA-NJ-A55O': 'smoker',
 'TCGA-MP-A5C7': 'smoker',
 'TCGA-64-5778': 'smoker',
 'TCGA-55-6971': 'smoker',
 'TCGA-49-AARN': 'control',
 'TCGA-44-A47B': 'smoker',
 'TCGA-55-5899': 'control',
 'TCGA-49-AAQV': 'control',
 'TCGA-99-8028': 'smoker',
 'TCGA-75-6205': 'control',
 'TCGA-97-8552': 'control',
 'TCGA-50-8459': 'smoker',
 'TCGA-05-5425': 'smoker',
 'TCGA-78-7150': 'smoker',
 'TCGA-86-A4P7': 'control',
 'TCGA-49-4512': 'control',
 'TCGA-55-8206': 'control',
 'TCGA-55-8614': 'smoker',
 'TCGA-64-5815': 'smoker',
 'TCGA-L9-A50W': 'smoker',
 'TCGA-73-4675': 'smoker',
 'TCGA-55-7995': 'smoker',
 'TCGA-05-4433': 'smoker',
 'TCGA-55-7727': 'control',
 'TCGA-44-2668': 'smoker',
 'TCGA-44-A4SU': 'smoker',
 'TCGA-55-7907': 'smoker',
 'TCGA-69-7765': 'control',
 'TCGA-49-4487': 'smoker',
 'TCGA-44-2662': 'smoker',
 'TCGA-67-6216': 'control',
 'TCGA-55-7283': 'smoker',
 'TCGA-86-8280': 'control',
 'TCGA-91-6840': 'smoker',
 'TCGA-78-7154': 'smoker',
 'TCGA-49-4488': 'control',
 'TCGA-93-7348': 'smoker',
 'TCGA-62-A470': 'smoker',
 'TCGA-78-7147': 'smoker',
 'TCGA-50-5936': 'control',
 'TCGA-55-6984': 'control',
 'TCGA-50-5941': 'smoker',
 'TCGA-69-7978': 'smoker',
 'TCGA-78-7220': 'smoker',
 'TCGA-55-8616': 'control',
 'TCGA-44-A4SS': 'smoker',
 'TCGA-55-7570': 'control',
 'TCGA-78-7146': 'smoker',
 'TCGA-44-3398': 'smoker',
 'TCGA-05-5420': 'smoker',
 'TCGA-50-5072': 'smoker',
 'TCGA-05-4396': 'smoker',
 'TCGA-05-4405': 'smoker',
 'TCGA-50-5935': 'control',
 'TCGA-38-4629': 'smoker',
 'TCGA-55-8619': 'control',
 'TCGA-05-4410': 'smoker',
 'TCGA-73-4676': 'smoker',
 'TCGA-97-8172': 'smoker',
 'TCGA-44-7661': 'smoker',
 'TCGA-05-4384': 'smoker',
 'TCGA-44-2655': 'smoker',
 'TCGA-80-5607': 'smoker',
 'TCGA-67-3770': 'control',
 'TCGA-91-6836': 'smoker',
 'TCGA-95-7562': 'smoker',
 'TCGA-55-8511': 'smoker',
 'TCGA-44-6776': 'smoker',
 'TCGA-95-7948': 'smoker',
 'TCGA-91-7771': 'smoker',
 'TCGA-50-5944': 'control',
 'TCGA-MN-A4N5': 'control',
 'TCGA-73-4677': 'control',
 'TCGA-78-7540': 'control',
 'TCGA-91-6829': 'smoker',
 'TCGA-78-8640': 'smoker',
 'TCGA-62-8398': 'smoker',
 'TCGA-55-8512': 'smoker',
 'TCGA-83-5908': 'smoker',
 'TCGA-55-6987': 'control',
 'TCGA-93-A4JP': 'control',
 'TCGA-73-A9RS': 'smoker',
 'TCGA-L4-A4E5': 'smoker',
 'TCGA-86-8074': 'smoker',
 'TCGA-86-8358': 'smoker',
 'TCGA-78-7158': 'smoker',
 'TCGA-91-8497': 'control',
 'TCGA-49-AARO': 'control',
 'TCGA-78-7159': 'smoker',
 'TCGA-55-7227': 'control',
 'TCGA-86-7714': 'smoker',
 'TCGA-L9-A7SV': 'smoker',
 'TCGA-78-7143': 'control',
 'TCGA-91-8499': 'smoker',
 'TCGA-49-AAR3': 'control',
 'TCGA-55-8620': 'smoker',
 'TCGA-69-8255': 'smoker',
 'TCGA-75-6207': 'smoker',
 'TCGA-62-A46Y': 'control',
 'TCGA-NJ-A4YF': 'smoker',
 'TCGA-91-6830': 'smoker',
 'TCGA-62-8395': 'control',
 'TCGA-49-4486': 'smoker',
 'TCGA-44-6145': 'smoker',
 'TCGA-86-A4P8': 'control',
 'TCGA-78-7537': 'smoker',
 'TCGA-44-3919': 'control',
 'TCGA-35-4123': 'smoker',
 'TCGA-62-8394': 'control',
 'TCGA-69-7761': 'smoker',
 'TCGA-62-A46U': 'control',
 'TCGA-97-8547': 'control',
 'TCGA-97-7554': 'smoker',
 'TCGA-50-6673': 'control',
 'TCGA-95-7039': 'smoker',
 'TCGA-95-7944': 'smoker',
 'TCGA-55-8301': 'smoker',
 'TCGA-78-7152': 'smoker',
 'TCGA-05-4390': 'smoker',
 'TCGA-44-7659': 'smoker',
 'TCGA-97-7941': 'smoker',
 'TCGA-49-4514': 'smoker',
 'TCGA-55-A490': 'smoker',
 'TCGA-55-8508': 'control',
 'TCGA-MP-A4TE': 'smoker',
 'TCGA-97-A4M1': 'smoker',
 'TCGA-75-6206': 'smoker',
 'TCGA-86-8671': 'control',
 'TCGA-78-7160': 'smoker',
 'TCGA-64-1681': 'control',
 'TCGA-49-4494': 'smoker',
 'TCGA-50-5946': 'control',
 'TCGA-55-7913': 'smoker',
 'TCGA-44-6779': 'smoker',
 'TCGA-49-AARE': 'smoker',
 'TCGA-05-4403': 'control',
 'TCGA-99-AA5R': 'smoker',
 'TCGA-73-4659': 'smoker',
 'TCGA-50-8457': 'smoker',
 'TCGA-75-5147': 'control',
 'TCGA-55-8507': 'smoker',
 'TCGA-78-7536': 'smoker',
 'TCGA-95-A4VK': 'smoker',
 'TCGA-38-4627': 'control',
 'TCGA-67-6215': 'control',
 'TCGA-69-7973': 'smoker',
 'TCGA-05-5715': 'control',
 'TCGA-75-7030': 'control',
 'TCGA-44-2666': 'smoker',
 'TCGA-62-A472': 'smoker',
 'TCGA-55-6985': 'smoker',
 'TCGA-J2-A4AG': 'smoker',
 'TCGA-97-A4LX': 'smoker',
 'TCGA-55-6543': 'smoker',
 'TCGA-97-7938': 'smoker',
 'TCGA-35-3615': 'control',
 'TCGA-44-2657': 'smoker',
 'TCGA-55-8505': 'control',
 'TCGA-97-8175': 'control',
 'TCGA-49-4510': 'smoker',
 'TCGA-MP-A4TK': 'smoker',
 'TCGA-50-5044': 'control',
 'TCGA-49-AAR2': 'control',
 'TCGA-05-4250': 'smoker',
 'TCGA-97-7937': 'smoker',
 'TCGA-64-5774': 'smoker',
 'TCGA-86-8674': 'smoker',
 'TCGA-50-6590': 'smoker',
 'TCGA-64-5779': 'smoker',
 'TCGA-75-5146': 'control',
 'TCGA-44-6144': 'smoker',
 'TCGA-78-8662': 'smoker',
 'TCGA-55-7911': 'smoker',
 'TCGA-86-8585': 'control',
 'TCGA-38-A44F': 'smoker',
 'TCGA-64-1680': 'smoker',
 'TCGA-97-A4M6': 'control',
 'TCGA-75-6214': 'smoker',
 'TCGA-55-6980': 'control',
 'TCGA-97-7553': 'smoker',
 'TCGA-55-A48X': 'smoker',
 'TCGA-64-1677': 'smoker',
 'TCGA-73-7498': 'smoker',
 'TCGA-38-6178': 'control',
 'TCGA-44-7670': 'smoker',
 'TCGA-62-A471': 'smoker',
 'TCGA-49-4505': 'smoker',
 'TCGA-NJ-A7XG': 'control',
 'TCGA-55-6981': 'control',
 'TCGA-91-6848': 'control',
 'TCGA-55-8090': 'control',
 'TCGA-55-7725': 'smoker',
 'TCGA-55-8207': 'control',
 'TCGA-44-6146': 'smoker',
 'TCGA-05-4434': 'smoker',
 'TCGA-55-6979': 'control',
 'TCGA-05-4427': 'smoker',
 'TCGA-55-8615': 'smoker',
 'TCGA-50-5939': 'smoker',
 'TCGA-05-4418': 'smoker',
 'TCGA-67-6217': 'control',
 'TCGA-49-6745': 'smoker',
 'TCGA-55-1595': 'control',
 'TCGA-49-6742': 'smoker',
 'TCGA-05-4402': 'control',
 'TCGA-05-4382': 'smoker',
 'TCGA-55-7576': 'control',
 'TCGA-67-3773': 'control',
 'TCGA-78-7633': 'smoker',
 'TCGA-50-6597': 'control',
 'TCGA-44-2659': 'smoker',
 'TCGA-95-7947': 'smoker',
 'TCGA-55-7724': 'smoker',
 'TCGA-J2-A4AD': 'smoker',
 'TCGA-55-8091': 'control',
 'TCGA-55-1592': 'control',
 'TCGA-73-4670': 'smoker',
 'TCGA-55-1594': 'control',
 'TCGA-55-8621': 'smoker',
 'TCGA-50-5051': 'smoker',
 'TCGA-49-4501': 'control',
 'TCGA-J2-8194': 'smoker',
 'TCGA-44-8119': 'smoker',
 'TCGA-55-8203': 'smoker',
 'TCGA-97-8177': 'control',
 'TCGA-55-7573': 'control',
 'TCGA-55-8089': 'smoker',
 'TCGA-38-4630': 'control',
 'TCGA-78-7166': 'smoker',
 'TCGA-38-4631': 'smoker',
 'TCGA-55-1596': 'smoker',
 'TCGA-91-A4BD': 'control',
 'TCGA-67-3771': 'control',
 'TCGA-J2-8192': 'control',
 'TCGA-55-A48Z': 'smoker',
 'TCGA-97-8176': 'smoker',
 'TCGA-86-6851': 'smoker',
 'TCGA-50-5931': 'smoker',
 'TCGA-NJ-A4YI': 'smoker',
 'TCGA-97-7552': 'smoker',
 'TCGA-MP-A4T4': 'smoker',
 'TCGA-L4-A4E6': 'smoker',
 'TCGA-44-5643': 'smoker',
 'TCGA-MP-A4TD': 'smoker',
 'TCGA-05-4244': 'smoker',
 'TCGA-50-5930': 'control',
 'TCGA-44-6777': 'smoker',
 'TCGA-05-4430': 'smoker',
 'TCGA-05-4426': 'smoker',
 'TCGA-44-6775': 'smoker',
 'TCGA-05-4420': 'smoker',
 'TCGA-55-8506': 'control',
 'TCGA-44-7672': 'smoker',
 'TCGA-62-8402': 'control',
 'TCGA-86-8056': 'smoker',
 'TCGA-05-4422': 'smoker',
 'TCGA-75-7027': 'smoker',
 'TCGA-78-7535': 'smoker',
 'TCGA-50-8460': 'control',
 'TCGA-75-7031': 'smoker',
 'TCGA-L9-A444': 'smoker',
 'TCGA-73-4666': 'smoker',
 'TCGA-86-7953': 'control',
 'TCGA-86-A4D0': 'control',
 'TCGA-NJ-A4YQ': 'smoker',
 'TCGA-91-8496': 'control',
 'TCGA-67-3772': 'control',
 'TCGA-55-7281': 'smoker',
 'TCGA-05-4424': 'smoker',
 'TCGA-69-A59K': 'smoker',
 'TCGA-75-7025': 'control',
 'TCGA-55-8514': 'smoker',
 'TCGA-95-8494': 'control',
 'TCGA-05-4395': 'control',
 'TCGA-93-A4JQ': 'control',
 'TCGA-44-8117': 'smoker',
 'TCGA-55-8204': 'control',
 'TCGA-50-5933': 'control',
 'TCGA-MN-A4N1': 'control',
 'TCGA-55-7903': 'smoker',
 'TCGA-86-8669': 'smoker',
 'TCGA-55-6970': 'control',
 'TCGA-50-6594': 'control',
 'TCGA-86-8075': 'control',
 'TCGA-MP-A4TF': 'smoker',
 'TCGA-69-7760': 'control',
 'TCGA-78-7161': 'smoker',
 'TCGA-91-6849': 'smoker',
 'TCGA-99-8033': 'control',
 'TCGA-55-8205': 'smoker',
 'TCGA-55-8510': 'control',
 'TCGA-91-6828': 'control',
 'TCGA-50-5049': 'control',
 'TCGA-99-7458': 'smoker',
 'TCGA-49-AARR': 'control',
 'TCGA-MP-A4TC': 'smoker',
 'TCGA-49-AARQ': 'smoker',
 'TCGA-93-A4JN': 'smoker',
 'TCGA-95-A4VP': 'smoker',
 'TCGA-69-8453': 'smoker',
 'TCGA-55-8513': 'control',
 'TCGA-55-6975': 'control',
 'TCGA-86-8278': 'control',
 'TCGA-75-6203': 'control',
 'TCGA-49-6743': 'smoker',
 'TCGA-55-7574': 'smoker',
 'TCGA-78-8655': 'smoker',
 'TCGA-05-4389': 'smoker',
 'TCGA-86-7701': 'control',
 'TCGA-91-6831': 'control',
 'TCGA-55-7728': 'smoker',
 'TCGA-97-A4M0': 'smoker',
 'TCGA-50-6593': 'control',
 'TCGA-86-6562': 'control',
 'TCGA-62-A46S': 'smoker',
 'TCGA-86-A4JF': 'control',
 'TCGA-J2-A4AE': 'control',
 'TCGA-55-8087': 'control',
 'TCGA-78-7542': 'smoker',
 'TCGA-78-7148': 'smoker',
 'TCGA-55-A493': 'smoker',
 'TCGA-91-A4BC': 'smoker',
 'TCGA-05-4432': 'smoker',
 'TCGA-55-6712': 'control',
 'TCGA-4B-A93V': 'smoker',
 'TCGA-93-7347': 'smoker',
 'TCGA-86-8668': 'control',
 'TCGA-MP-A4TI': 'smoker',
 'TCGA-69-8254': 'smoker',
 'TCGA-64-1676': 'smoker',
 'TCGA-62-A46V': 'control',
 'TCGA-78-7167': 'smoker',
 'TCGA-55-7284': 'control',
 'TCGA-78-7162': 'smoker',
 'TCGA-75-6212': 'control',
 'TCGA-97-7546': 'smoker',
 'TCGA-44-5644': 'smoker',
 'TCGA-55-8299': 'smoker',
 'TCGA-75-6211': 'smoker',
 'TCGA-MP-A4SW': 'smoker',
 'TCGA-78-7149': 'smoker',
 'TCGA-S2-AA1A': 'smoker',
 'TCGA-95-A4VN': 'smoker',
 'TCGA-55-8096': 'smoker',
 'TCGA-05-4398': 'smoker',
 'TCGA-97-8171': 'smoker',
 'TCGA-55-A492': 'smoker',
 'TCGA-71-8520': 'control',
 'TCGA-44-3396': 'smoker',
 'TCGA-55-A57B': 'control',
 'TCGA-L9-A443': 'smoker',
 'TCGA-55-A4DG': 'smoker',
 'TCGA-67-4679': 'control',
 'TCGA-64-5781': 'smoker',
 'TCGA-93-A4JO': 'smoker',
 'TCGA-NJ-A4YP': 'smoker',
 'TCGA-69-7974': 'smoker',
 'TCGA-MP-A4T7': 'smoker',
 'TCGA-55-6642': 'control',
 'TCGA-49-AAR9': 'control',
 'TCGA-69-8253': 'smoker',
 'TCGA-05-4249': 'smoker',
 'TCGA-44-7669': 'smoker',
 'TCGA-71-6725': 'control',
 'TCGA-73-7499': 'control',
 'TCGA-MP-A4TH': 'smoker',
 'TCGA-55-8208': 'smoker',
 'TCGA-78-7156': 'smoker',
 'TCGA-75-5122': 'smoker',
 'TCGA-50-7109': 'smoker',
 'TCGA-NJ-A55R': 'smoker',
 'TCGA-53-A4EZ': 'smoker',
 'TCGA-NJ-A4YG': 'smoker',
 'TCGA-86-A456': 'control',
 'TCGA-38-4632': 'smoker',
 'TCGA-MP-A4TJ': 'control',
 'TCGA-97-8174': 'smoker',
 'TCGA-MP-A4SY': 'smoker',
 'TCGA-62-8397': 'control',
 'TCGA-L9-A8F4': 'smoker',
 'TCGA-75-5126': 'smoker',
 'TCGA-MP-A4TA': 'smoker',
 'TCGA-86-7711': 'smoker',
 'TCGA-50-5045': 'control',
 'TCGA-05-4417': 'smoker',
 'TCGA-44-7660': 'smoker',
 'TCGA-69-7979': 'smoker',
 'TCGA-55-A491': 'smoker',
 'TCGA-L9-A743': 'smoker',
 'TCGA-55-A494': 'smoker',
 'TCGA-44-7662': 'smoker',
 'TCGA-55-7910': 'smoker',
 'TCGA-69-7980': 'smoker',
 'TCGA-55-8097': 'smoker',
 'TCGA-73-4668': 'smoker',
 'TCGA-NJ-A55A': 'smoker',
 'TCGA-86-8672': 'control',
 'TCGA-86-8673': 'smoker',
 'TCGA-78-7153': 'smoker',
 'TCGA-97-A4M7': 'smoker',
 'TCGA-62-A46R': 'smoker',
 'TCGA-50-5055': 'control',
 'TCGA-38-4628': 'control',
 'TCGA-86-7713': 'control',
 'TCGA-86-8073': 'smoker',
 'TCGA-MN-A4N4': 'control',
 'TCGA-53-7626': 'smoker',
 'TCGA-44-A47G': 'smoker',
 'TCGA-55-6969': 'control'}
tcga_gxp_df.loc[G_gxp_nodes_list].head(2)
TCGA-38-7271 TCGA-55-7914 TCGA-95-7043 TCGA-73-4658 TCGA-86-8076 TCGA-55-7726 TCGA-44-6147 TCGA-50-5932 TCGA-44-2661 TCGA-86-7954 TCGA-73-4662 TCGA-44-7671 TCGA-78-8660 TCGA-62-A46P TCGA-55-6978 TCGA-50-6592 TCGA-38-4625 TCGA-80-5611 TCGA-86-8054 TCGA-55-6986 TCGA-L9-A5IP TCGA-69-7764 TCGA-49-6744 TCGA-75-5125 TCGA-38-4626 TCGA-69-7763 TCGA-86-8279 TCGA-93-8067 TCGA-97-8179 TCGA-55-A48Y TCGA-86-8055 TCGA-91-6835 TCGA-55-6982 TCGA-55-A4DF TCGA-44-6774 TCGA-50-5066 TCGA-05-5423 TCGA-67-3774 TCGA-97-A4M2 TCGA-95-7567 TCGA-49-AAR0 TCGA-44-2656 TCGA-53-7813 TCGA-O1-A52J TCGA-35-4122 TCGA-55-8092 TCGA-49-6761 TCGA-49-4507 TCGA-55-7816 TCGA-78-7145 TCGA-55-6983 TCGA-53-7624 TCGA-97-A4M3 TCGA-50-5068 TCGA-78-8648 TCGA-44-6778 TCGA-80-5608 TCGA-86-8281 TCGA-MP-A4T6 TCGA-55-8085 TCGA-62-8399 TCGA-97-A4M5 TCGA-97-7547 TCGA-05-5429 TCGA-55-7994 TCGA-55-8094 TCGA-05-4425 TCGA-44-4112 TCGA-49-6767 TCGA-49-4490 TCGA-MP-A4T9 TCGA-50-5942 TCGA-MP-A4SV TCGA-49-AAR4 TCGA-05-4397 TCGA-44-A47A TCGA-86-8359 TCGA-78-7539 TCGA-MP-A4T8 TCGA-99-8032 TCGA-50-6595 TCGA-55-6968 TCGA-44-8120 TCGA-55-8302 TCGA-99-8025 TCGA-64-1679 TCGA-95-8039 TCGA-44-A479 TCGA-44-6148 TCGA-NJ-A55O TCGA-MP-A5C7 TCGA-64-5778 TCGA-55-6971 TCGA-49-AARN TCGA-44-A47B TCGA-55-5899 TCGA-49-AAQV TCGA-99-8028 TCGA-75-6205 TCGA-97-8552 TCGA-50-8459 TCGA-05-5425 TCGA-78-7150 TCGA-86-A4P7 TCGA-49-4512 TCGA-55-8206 TCGA-55-8614 TCGA-64-5815 TCGA-L9-A50W TCGA-73-4675 TCGA-55-7995 TCGA-05-4433 TCGA-55-7727 TCGA-44-2668 TCGA-44-A4SU TCGA-55-7907 TCGA-69-7765 TCGA-49-4487 TCGA-44-2662 TCGA-67-6216 TCGA-55-7283 TCGA-86-8280 TCGA-91-6840 TCGA-78-7154 TCGA-49-4488 TCGA-93-7348 TCGA-62-A470 TCGA-78-7147 TCGA-50-5936 TCGA-55-6984 TCGA-50-5941 TCGA-69-7978 TCGA-78-7220 TCGA-55-8616 TCGA-44-A4SS TCGA-55-7570 TCGA-78-7146 TCGA-44-3398 TCGA-05-5420 TCGA-50-5072 TCGA-05-4396 TCGA-05-4405 TCGA-50-5935 TCGA-38-4629 TCGA-55-8619 TCGA-05-4410 TCGA-73-4676 TCGA-97-8172 TCGA-44-7661 TCGA-05-4384 TCGA-44-2655 TCGA-80-5607 TCGA-67-3770 TCGA-91-6836 TCGA-95-7562 TCGA-55-8511 TCGA-44-6776 TCGA-95-7948 TCGA-91-7771 TCGA-50-5944 TCGA-MN-A4N5 TCGA-73-4677 TCGA-78-7540 TCGA-91-6829 TCGA-78-8640 TCGA-62-8398 TCGA-55-8512 TCGA-83-5908 TCGA-55-6987 TCGA-93-A4JP TCGA-73-A9RS TCGA-L4-A4E5 TCGA-86-8074 TCGA-86-8358 TCGA-78-7158 TCGA-91-8497 TCGA-49-AARO TCGA-78-7159 TCGA-55-7227 TCGA-86-7714 TCGA-L9-A7SV TCGA-78-7143 TCGA-91-8499 TCGA-49-AAR3 TCGA-55-8620 TCGA-69-8255 TCGA-75-6207 TCGA-62-A46Y TCGA-NJ-A4YF TCGA-91-6830 TCGA-62-8395 TCGA-49-4486 TCGA-44-6145 TCGA-86-A4P8 TCGA-78-7537 TCGA-44-3919 TCGA-35-4123 TCGA-62-8394 TCGA-69-7761 TCGA-62-A46U TCGA-97-8547 TCGA-97-7554 TCGA-50-6673 TCGA-95-7039 TCGA-95-7944 TCGA-55-8301 TCGA-78-7152 TCGA-05-4390 TCGA-44-7659 TCGA-97-7941 TCGA-49-4514 TCGA-55-A490 TCGA-55-8508 TCGA-MP-A4TE TCGA-97-A4M1 TCGA-75-6206 TCGA-86-8671 TCGA-78-7160 TCGA-64-1681 TCGA-49-4494 TCGA-50-5946 TCGA-55-7913 TCGA-44-6779 TCGA-49-AARE TCGA-05-4403 TCGA-99-AA5R TCGA-73-4659 TCGA-50-8457 TCGA-75-5147 TCGA-55-8507 TCGA-78-7536 TCGA-95-A4VK TCGA-38-4627 TCGA-67-6215 TCGA-69-7973 TCGA-05-5715 TCGA-75-7030 TCGA-44-2666 TCGA-62-A472 TCGA-55-6985 TCGA-J2-A4AG TCGA-97-A4LX TCGA-55-6543 TCGA-97-7938 TCGA-35-3615 TCGA-44-2657 TCGA-55-8505 TCGA-97-8175 TCGA-49-4510 TCGA-MP-A4TK TCGA-50-5044 TCGA-49-AAR2 TCGA-05-4250 TCGA-97-7937 TCGA-64-5774 TCGA-86-8674 TCGA-50-6590 TCGA-64-5779 TCGA-75-5146 TCGA-44-6144 TCGA-78-8662 TCGA-55-7911 TCGA-86-8585 TCGA-38-A44F TCGA-64-1680 TCGA-97-A4M6 TCGA-75-6214 TCGA-55-6980 TCGA-97-7553 TCGA-55-A48X TCGA-64-1677 TCGA-73-7498 TCGA-38-6178 TCGA-44-7670 TCGA-62-A471 TCGA-49-4505 TCGA-NJ-A7XG TCGA-55-6981 TCGA-91-6848 TCGA-55-8090 TCGA-55-7725 TCGA-55-8207 TCGA-44-6146 TCGA-05-4434 TCGA-55-6979 TCGA-05-4427 TCGA-55-8615 TCGA-50-5939 TCGA-05-4418 TCGA-67-6217 TCGA-49-6745 TCGA-55-1595 TCGA-49-6742 TCGA-05-4402 TCGA-05-4382 TCGA-55-7576 TCGA-67-3773 TCGA-78-7633 TCGA-50-6597 TCGA-44-2659 TCGA-95-7947 TCGA-55-7724 TCGA-J2-A4AD TCGA-55-8091 TCGA-55-1592 TCGA-73-4670 TCGA-55-1594 TCGA-55-8621 TCGA-50-5051 TCGA-49-4501 TCGA-J2-8194 TCGA-44-8119 TCGA-55-8203 TCGA-97-8177 TCGA-55-7573 TCGA-55-8089 TCGA-38-4630 TCGA-78-7166 TCGA-38-4631 TCGA-55-1596 TCGA-91-A4BD TCGA-67-3771 TCGA-J2-8192 TCGA-55-A48Z TCGA-97-8176 TCGA-86-6851 TCGA-50-5931 TCGA-NJ-A4YI TCGA-97-7552 TCGA-MP-A4T4 TCGA-L4-A4E6 TCGA-44-5643 TCGA-MP-A4TD TCGA-05-4244 TCGA-50-5930 TCGA-44-6777 TCGA-05-4430 TCGA-05-4426 TCGA-44-6775 TCGA-05-4420 TCGA-55-8506 TCGA-44-7672 TCGA-62-8402 TCGA-86-8056 TCGA-05-4422 TCGA-75-7027 TCGA-78-7535 TCGA-50-8460 TCGA-75-7031 TCGA-L9-A444 TCGA-73-4666 TCGA-86-7953 TCGA-86-A4D0 TCGA-NJ-A4YQ TCGA-91-8496 TCGA-67-3772 TCGA-55-7281 TCGA-05-4424 TCGA-69-A59K TCGA-75-7025 TCGA-55-8514 TCGA-95-8494 TCGA-05-4395 TCGA-93-A4JQ TCGA-44-8117 TCGA-55-8204 TCGA-50-5933 TCGA-MN-A4N1 TCGA-55-7903 TCGA-86-8669 TCGA-55-6970 TCGA-50-6594 TCGA-86-8075 TCGA-MP-A4TF TCGA-69-7760 TCGA-78-7161 TCGA-91-6849 TCGA-99-8033 TCGA-55-8205 TCGA-55-8510 TCGA-91-6828 TCGA-50-5049 TCGA-99-7458 TCGA-49-AARR TCGA-MP-A4TC TCGA-49-AARQ TCGA-93-A4JN TCGA-95-A4VP TCGA-69-8453 TCGA-55-8513 TCGA-55-6975 TCGA-86-8278 TCGA-75-6203 TCGA-49-6743 TCGA-55-7574 TCGA-78-8655 TCGA-05-4389 TCGA-86-7701 TCGA-91-6831 TCGA-55-7728 TCGA-97-A4M0 TCGA-50-6593 TCGA-86-6562 TCGA-62-A46S TCGA-86-A4JF TCGA-J2-A4AE TCGA-55-8087 TCGA-78-7542 TCGA-78-7148 TCGA-55-A493 TCGA-91-A4BC TCGA-05-4432 TCGA-55-6712 TCGA-4B-A93V TCGA-93-7347 TCGA-86-8668 TCGA-MP-A4TI TCGA-69-8254 TCGA-64-1676 TCGA-62-A46V TCGA-78-7167 TCGA-55-7284 TCGA-78-7162 TCGA-75-6212 TCGA-97-7546 TCGA-44-5644 TCGA-55-8299 TCGA-75-6211 TCGA-MP-A4SW TCGA-78-7149 TCGA-S2-AA1A TCGA-95-A4VN TCGA-55-8096 TCGA-05-4398 TCGA-97-8171 TCGA-55-A492 TCGA-71-8520 TCGA-44-3396 TCGA-55-A57B TCGA-L9-A443 TCGA-55-A4DG TCGA-67-4679 TCGA-64-5781 TCGA-93-A4JO TCGA-NJ-A4YP TCGA-69-7974 TCGA-MP-A4T7 TCGA-55-6642 TCGA-49-AAR9 TCGA-69-8253 TCGA-05-4249 TCGA-44-7669 TCGA-71-6725 TCGA-73-7499 TCGA-MP-A4TH TCGA-55-8208 TCGA-78-7156 TCGA-75-5122 TCGA-50-7109 TCGA-NJ-A55R TCGA-53-A4EZ TCGA-NJ-A4YG TCGA-86-A456 TCGA-38-4632 TCGA-MP-A4TJ TCGA-97-8174 TCGA-MP-A4SY TCGA-62-8397 TCGA-L9-A8F4 TCGA-75-5126 TCGA-MP-A4TA TCGA-86-7711 TCGA-50-5045 TCGA-05-4417 TCGA-44-7660 TCGA-69-7979 TCGA-55-A491 TCGA-L9-A743 TCGA-55-A494 TCGA-44-7662 TCGA-55-7910 TCGA-69-7980 TCGA-55-8097 TCGA-73-4668 TCGA-NJ-A55A TCGA-86-8672 TCGA-86-8673 TCGA-78-7153 TCGA-97-A4M7 TCGA-62-A46R TCGA-50-5055 TCGA-38-4628 TCGA-86-7713 TCGA-86-8073 TCGA-MN-A4N4 TCGA-53-7626 TCGA-44-A47G TCGA-55-6969
GENES
A2M 17.7492 14.8513 14.1691 16.7238 15.6783 14.7566 16.4368 15.5476 15.5478 15.1337 15.6250 16.0494 15.7359 14.9778 16.9704 14.4270 14.7574 13.1638 15.0112 15.3896 13.5478 15.4359 16.3375 14.5970 16.0819 16.9319 14.3000 14.5112 15.9039 15.4757 15.4426 17.2575 17.6236 13.3706 16.4484 14.1244 14.5753 16.7414 16.3902 14.8686 15.3076 15.8770 13.0567 15.7282 14.829 15.0254 16.7964 13.0682 15.1424 15.1625 16.6138 12.2388 15.6623 14.2498 17.5393 17.4247 14.7999 15.0940 13.7890 14.1624 16.7473 16.5660 16.7337 14.3118 14.4655 12.8692 14.1556 14.9833 12.7165 17.4857 15.8425 16.964 14.4254 13.7352 13.4139 13.9368 14.9159 15.3027 14.6733 16.3115 17.0258 16.7013 16.722 14.7179 15.1723 15.0197 15.8363 14.8260 17.8420 15.4716 14.4838 14.3332 15.6629 14.5422 14.2511 14.6566 13.7502 15.7397 15.8105 16.1431 16.3924 15.3659 14.8809 16.5745 14.6630 16.5782 14.9216 15.7101 14.4933 14.6708 14.5511 16.3445 15.1999 14.3022 14.7421 15.5912 15.9459 15.1002 18.2897 14.3569 16.3395 16.9130 15.4322 16.0323 13.4262 15.0630 14.6226 13.8946 17.6279 14.6974 15.9820 15.5927 15.0039 16.0281 15.1942 14.4601 14.0031 15.2518 14.7647 15.3536 15.7359 15.9672 17.2774 14.8217 17.0212 15.5794 17.5069 17.8445 16.2630 16.5506 15.4779 15.6777 14.9127 16.8185 15.2691 15.0146 16.6405 13.8126 17.5794 17.3197 14.7516 15.8082 14.7878 16.7701 14.0833 14.7712 15.5576 14.7258 15.4638 17.4923 13.4521 13.8513 14.3137 14.1886 13.6341 17.0835 14.9391 15.2270 16.3281 15.9192 13.8003 14.6677 13.7759 14.3897 13.7689 16.4515 14.2397 14.4881 13.8527 17.1695 16.2304 13.7651 17.3095 17.0505 14.7285 15.7889 13.5973 14.4440 15.6426 14.3974 13.3799 16.3218 16.9491 15.4852 15.7973 15.4777 15.0131 15.0569 14.4570 16.7897 14.2399 14.9289 15.1314 13.1720 16.1986 16.2457 16.1031 16.4563 15.7976 13.6831 12.8610 12.3294 15.1704 13.7260 16.2771 17.0507 16.2292 17.3887 15.4193 15.259 14.1418 15.1655 16.8226 14.1746 15.5775 15.6248 16.8797 16.5588 14.3745 15.0979 16.3014 15.6822 15.0171 16.5745 14.3839 17.1775 15.0978 13.4816 15.1277 15.2175 14.8708 13.4964 15.2071 13.6416 13.8050 13.2714 14.5213 15.9484 15.4828 17.3208 14.4355 14.3595 14.4131 15.8961 14.2801 15.8648 12.6088 17.3259 17.1891 16.1923 13.6240 16.6941 15.9264 14.1762 12.7273 15.1329 12.9985 16.9632 15.1549 15.1316 15.5867 16.8995 15.5169 15.3575 15.7770 14.7245 13.9972 15.7515 13.6726 16.1269 15.7639 15.6623 16.9873 15.3609 15.4537 15.1843 15.2846 16.0575 15.0674 16.1746 14.8903 17.0877 15.0221 15.3074 15.5240 14.4830 14.0902 16.8029 14.7046 15.9543 15.6982 14.6841 14.6716 16.1095 16.0664 15.5669 12.4795 13.3304 13.2511 16.5557 14.0841 14.5986 15.7071 14.2093 14.8985 15.0374 17.2365 15.2448 16.9875 14.9036 17.1617 13.1778 16.1368 14.2608 17.6897 17.3752 16.1986 13.8965 14.7652 14.6927 13.7874 15.6397 13.8542 15.9806 14.3174 14.6960 13.6764 16.3227 14.3093 15.0063 14.5286 14.7218 14.4223 14.6543 15.1059 15.4205 14.7765 16.9348 14.8239 16.5049 14.8803 13.5093 13.4129 15.1136 15.3474 15.0633 18.1110 13.9956 14.1408 14.9650 15.9939 13.5645 15.4537 12.9446 14.1119 15.5059 15.5438 14.1158 16.0608 15.9465 16.8357 16.0185 16.2639 16.6432 14.4569 11.5836 16.0794 16.1072 16.7999 16.8323 18.1497 14.3592 17.1339 15.6252 16.3708 15.3610 14.0132 15.0929 17.4388 14.8807 15.0280 15.9169 15.3922 15.4448 15.0793 15.7103 15.7895 12.5608 15.6912 13.3739 15.6099 14.8925 15.0511 13.0357 15.8722 16.7687 15.5090 15.9287 13.1503 14.3885 15.2336 16.5047 17.1747 15.4653 17.3308 13.3067 16.4980 14.4621 14.4477 15.7449 16.6074 15.3400 15.9399 14.6528 13.2941 15.3209 14.4929 14.9726 16.4492 14.7416 15.9716 16.5621 14.3996 15.5921 14.4844 15.8914 15.2985 15.7348 12.3797 15.2954 15.7355 15.1683 13.7763 13.7535 16.1393 15.6355 15.8596 15.2963 14.9885 15.5841 12.5830 14.3182 15.4776 14.1320 15.3026 18.3547 14.3675 14.8228 15.2490 15.9896 13.8662 14.2694 15.3254 16.1937 13.0504 15.8276 14.9460 15.7715 13.3415 15.1307 13.9971 15.9812 16.2250 14.9622 16.6345 15.1575 13.7451 14.9930 15.9553 13.9511 16.3097 14.3934 15.8254 16.3773 14.9411 16.7343 15.6622 14.8136
ABI3BP 12.6159 8.9312 8.2451 10.5611 11.2882 8.9391 10.7873 10.1560 11.1039 10.0530 10.7197 10.6712 10.5062 8.2397 12.6539 10.2625 10.8400 9.4187 8.6577 10.1019 10.0242 10.5334 11.2775 9.5783 11.5432 10.6676 8.6036 8.8966 10.2376 10.1490 9.6755 13.1418 12.7984 10.1663 10.2419 10.7272 9.8510 10.6950 11.7937 8.5714 9.8863 10.4407 7.8951 10.5151 9.547 10.4863 11.9074 7.6185 14.3384 8.8334 11.9356 9.0709 9.7561 11.3914 11.6642 13.4321 8.6784 9.8084 8.9677 9.6907 10.3430 11.3433 11.3343 11.3761 10.5414 6.9270 10.4237 8.2354 8.5079 12.6952 10.9720 11.629 10.2084 10.0822 6.1454 9.2347 7.9694 10.2411 7.6824 9.2462 12.9248 11.6406 10.377 9.5459 8.9056 10.2460 10.2240 10.8068 13.1822 10.1594 8.7249 10.6341 11.2504 9.4652 8.9951 10.4505 8.7119 11.8054 10.8921 11.1612 10.4548 9.6977 7.9258 11.1846 9.1827 11.4688 9.1985 10.3842 9.0297 8.1098 11.2019 10.9371 10.9153 8.2604 9.9064 10.7199 10.1085 11.6238 13.7768 9.4157 10.7625 11.7291 9.7973 11.1356 8.8244 10.2639 9.6699 9.6883 13.5132 9.4951 11.2227 10.8474 9.4169 10.6349 10.2622 6.8163 9.5239 10.6370 11.4661 10.2148 9.0446 9.9071 13.4253 8.6305 12.0100 10.6986 12.7484 12.5177 11.7388 10.3128 10.6166 10.2970 10.4951 12.5125 8.6870 10.4654 12.6276 8.3694 11.7399 12.2204 9.1713 9.9988 10.5186 8.9567 9.2422 10.8016 10.7716 9.2137 10.5551 12.7693 7.7820 7.7075 9.2342 8.9878 8.2327 12.0587 9.9618 8.8208 10.3802 10.2689 8.1117 10.2066 8.5543 9.2591 10.0813 8.9050 8.3484 9.4515 7.4349 12.0988 10.5217 8.9010 12.6064 12.4913 9.8797 10.4977 8.9450 9.8954 10.9516 10.8331 9.2421 10.3416 10.7844 10.2926 9.7751 10.4743 10.1512 7.0093 9.4981 10.8996 8.4799 8.0840 9.0071 6.4036 11.4361 10.2983 12.2270 10.9392 10.7662 8.6026 7.3891 5.6262 11.0838 9.0693 10.3967 12.0477 9.6985 11.8003 9.7524 9.017 8.3161 10.0954 10.2069 8.6394 9.4585 9.2558 11.5364 9.5775 9.3982 9.9884 11.1988 11.2918 9.8260 11.3400 8.9516 12.5380 9.4044 8.6686 9.5300 10.8429 8.3845 8.2944 8.5766 7.6220 7.6484 7.1378 10.0754 10.1622 10.7386 12.8088 8.3202 9.4829 10.8396 11.3067 9.5241 11.1666 7.4765 12.9172 11.9350 11.3572 9.7022 11.2203 10.1683 9.1144 7.9692 9.4305 7.9523 12.7612 11.1430 9.3550 10.9902 11.6206 9.0270 11.0929 10.1344 9.3116 6.7414 11.0101 8.4553 11.1199 10.5439 8.9935 11.7319 10.3858 9.2744 10.0113 10.4554 10.8063 10.2956 10.7281 10.2993 11.6108 9.4874 10.3349 9.9762 7.8757 9.5314 11.5937 8.2156 10.5122 9.6563 9.0181 9.9131 10.1954 11.0526 12.5184 8.1022 8.9134 7.2356 8.5382 9.3614 10.0599 10.4890 9.6740 8.7624 11.5303 12.5717 8.9482 11.8377 9.8178 12.3426 9.4793 10.5783 9.8376 13.0470 11.1891 9.7962 8.5505 10.2857 6.6387 7.1575 11.3020 8.6016 10.3898 8.4191 8.5738 6.3949 10.9164 9.0341 10.1176 9.1327 9.2566 6.9800 11.7267 10.5429 10.6252 9.5768 11.2865 9.7393 11.6074 9.1567 7.7773 7.3868 11.0117 9.1628 8.8453 13.1054 7.3094 8.5167 10.3683 10.8076 6.9769 9.5976 7.4164 7.3489 8.6464 10.2409 9.1145 10.9262 10.9726 12.5602 11.2850 11.4983 11.5493 8.8067 9.2208 10.2202 10.6133 11.3729 11.4105 12.6875 8.6143 12.2625 9.2824 11.4505 9.4915 9.2143 10.7416 13.5765 8.8868 10.1617 9.7710 9.7944 10.2475 7.9222 11.2748 10.3609 9.8119 9.5118 9.7784 7.4632 9.1770 11.0987 6.8703 11.6108 11.1451 11.0958 10.5435 8.4857 8.3803 10.1181 11.7452 11.8352 9.9583 12.3163 7.0327 10.9467 7.7775 9.6133 11.3094 11.8133 9.2078 9.6997 8.7748 7.7552 9.2860 8.7750 10.0849 10.9496 9.6826 10.4227 11.0711 7.6728 10.9617 8.9738 10.1860 10.0194 9.0119 9.1946 10.0269 10.1513 10.9726 6.8993 9.8632 11.2975 11.2498 11.1145 7.3740 9.7054 9.5193 8.3847 9.0782 10.6320 8.6243 11.9386 11.5741 8.6329 10.1168 10.4155 9.9671 8.8834 8.9239 10.7302 10.9387 7.1663 6.5785 9.3139 10.9673 6.5078 7.5276 7.7454 10.3851 10.4498 8.7765 11.6719 8.2062 6.7474 8.3941 10.4733 9.9251 12.1192 8.3221 8.5200 10.6718 8.5181 12.2646 11.6573 9.0565
# Perform GSEA using the prepared data and class assignments
gs_res = gp.gsea(data=tcga_gxp_df.loc[G_gxp_nodes_list], gene_sets=gene_sets, cls=classes, permutation_num=100, outdir=None, method='signal_to_noise', threads=4, seed=7)

# Display the top results from the GSEA

### YOUR CODE HERE ###
gs_res.res2d.head()
Name Term ES NES NOM p-val FDR q-val FWER p-val Tag % Gene % Lead_genes
0 gsea Toxoplasmosis 0.672196 1.87385 0.017544 0.131839 0.05 11/19 14.33% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;ALOX5;HLA-DOA
1 gsea Influenza A 0.708245 1.7236 0.117647 0.477917 0.11 11/17 14.33% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;CASP1;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA
2 gsea Th1 and Th2 cell differentiation 0.735164 1.659187 0.096154 0.399637 0.18 10/16 14.33% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA
3 gsea Th17 cell differentiation 0.735164 1.659187 0.096154 0.399637 0.18 10/16 14.33% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA
4 gsea Human T-cell leukemia virus 1 infection 0.645243 1.649653 0.083333 0.323006 0.18 13/22 18.67% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;SPI1;HLA-DOA;ITGB2;HLA-F

Visualising GSEA Results#

Once you have performed GSEA, the next step is to visualise the results. Visualisation helps in interpreting the biological roles of the enriched gene sets. Here, we visualise GSEA results with Barcode Enrichment Plot, Heatmap, Clustermap, and Dot Plot.

Barcode Enrichment Plot

Barcode Enrichment Plot shows the positions of members of a given gene set in a ranked list of enrichment scores for the top enriched terms. The scores are ranked left to right from smallest to largest. The ranked scores are represented by a shaded bar, forming a pattern like a barcode.

# Extract the enriched terms from the GSEA results. The terms represent pathways or functional categories that are significantly enriched in the dataset.

### YOUR CODE HERE ###
terms = gs_res.res2d.Term

# Plot the top 5 enriched terms
# The plot function visualizes the enrichment results
axs = gs_res.plot(terms[:5], show_ranking=False, legend_kws={'loc': (1.05, 0)})
../_images/68cae591387adc9f09777ffc9132322c6c4e2ba2f0b9b12ea9dae42b888f8162.png

You can view and extract leading-edge genes from GSEA results. Leading-edge genes are the subset of genes that contribute most to the enrichment score.

# View leading-edge genes from the GSEA results. 
gs_res.res2d[['Term', 'Lead_genes']].head(10)
Term Lead_genes
0 Toxoplasmosis HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;ALOX5;HLA-DOA
1 Influenza A HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;CASP1;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA
2 Th1 and Th2 cell differentiation HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA
3 Th17 cell differentiation HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA
4 Human T-cell leukemia virus 1 infection HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;SPI1;HLA-DOA;ITGB2;HLA-F
5 Leishmaniasis HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;NCF4;NCF2;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA;ITGB2
6 Phagosome HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;NCF4;NCF2;HLA-DQB1;HLA-DQA1;HLA-DRB5;MSR1;HLA-DOA;MARCO;ITGB2;HLA-F;MRC1;HLA-B;ITGAM;TLR4
7 Viral myocarditis HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA;ITGB2;HLA-F;LAMA2;HLA-B;CD86
8 Rheumatoid arthritis HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5;HLA-DOA;ITGB2;ACP5;TLR4;CD86
9 Epstein-Barr virus infection HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;BTK;HLA-DRB5;HLA-DOA;HLA-F

Heatmap Visualisation

gseapy provides a heatmap function to visualise the expression levels of the leading-edge genes. The heatmap provides a visual representation of how these genes are expressed across different samples in relation to their assigned phenotypic classes.

# Import the heatmap function from gseapy
from gseapy import heatmap

# Select the index of the term to visualize
i = 0

# Extract the genes contributing to the enrichment of the selected term

### YOUR CODE HERE ###
genes = gs_res.res2d.Lead_genes[i].split(";")
print(genes)
['HLA-DRB1', 'HLA-DMA', 'HLA-DPB1', 'HLA-DRA', 'HLA-DMB', 'HLA-DPA1', 'HLA-DQB1', 'HLA-DQA1', 'HLA-DRB5', 'ALOX5', 'HLA-DOA']
# Generate a heatmap of the expression levels of the leading-edge genes
ax = heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[i], figsize=(15, 4))

# Update the x-tick labels with the class labels
xtick_labels = [classes[item.get_text()] for item in ax.get_xticklabels()]
ax.set_xticklabels(xtick_labels, rotation=45, ha='right')
ax.plot()
[]
../_images/2ad9e026ca3f82edcb53159b9c52302e718153da93904ab88871a775c04c4f33.png

Clustermap Visualisation

The function clustermap from seaborn is used to create a clustered heatmap. It not only shows the expression levels of the leading-edge genes but also clusters them based on similarity, providing additional insights into gene expression patterns. The cluster map includes dendrograms, which show the hierarchical clustering of both genes and samples, helping to identify groups of co-expressed genes and similar samples.

# Import the clustermap function from seaborn
from seaborn import clustermap

# Select the index of the term to visualize
i = 2

# Extract the genes contributing to the enrichment of the selected term
genes = gs_res.res2d.Lead_genes[i].split(";")

# Extract the relevant subset of the heatmap data

### YOUR CODE HERE ###
data = gs_res.heatmat.loc[genes]

# Rename the columns based on the class assignments
data.rename(columns=classes, inplace=True)

# Generate a cluster map of the expression levels of the leading-edge genes
ax = clustermap(
    data=data,  # The data to cluster
    method='average',  # Clustering method
    metric='euclidean',  # Distance metric
    z_score=0,  # Standardize the data along the rows
    figsize=(14, 4),  # Size of the figure
    dendrogram_ratio=0.2,  # Ratio of the dendrogram
    colors_ratio=0.03,  # Ratio of the colors
    cbar_pos=(0.02, 0.1, 0.05, 0.1)  # Position of the color bar
)
../_images/717aec685b39aca5709559cafb20fd58e7a4f77a77c12e3e09ee9edd07365a85.png

Dot Plot Visualisation

Use the dotplot function in gseapy to create a visual representation of the GSEA results. Here we use “FDR q-val” to determine the dot sizes, which represents the false discovery rate adjusted p-values. We display normalised enrichment score (NES) value as the x-axis.

# Import the dotplot function from gseapy
from gseapy import dotplot

# Generate the dot plot for the GSEA results
# The dotplot function visualizes the enrichment results, focusing on the FDR q-values
ax = dotplot(gs_res.res2d,
             column="FDR q-val",  # Column to be used for dot size
             title=gene_sets[0],  # Title the plot as the chosen gene set name 
             cmap=plt.cm.viridis,  # Color map for the dots
             size=5,  # Size of the dots
             figsize=(4, 5),  # Size of the figure
             cutoff=1)  # Cutoff for displaying the terms
../_images/cc8a47a57ce494e04acd4e1267bc16bbccd9a4b63e21ac9d868faf4d249af5dd.png

GSEA on Clusters#

Similarly to ORA, GSEA can also be performed on individual communities after clustering. This allows for a more granular analysis, revealing pathways and functions that are enriched within particular subgroups of the data.

# Define a function to perform GSEA on a given list of genes (community)
def communityGSEA(genes, gene_sets='KEGG_2021_Human', classes=classes):
    gs_res = gp.gsea(data=tcga_gxp_df.loc[genes], gene_sets=gene_sets, cls=classes, permutation_num=100, outdir=None, method='signal_to_noise', threads=4, seed=7, min_size=1) 
    return gs_res

# Perform GSEA for three community

### YOUR CODE HERE ###
community1_gsea = communityGSEA(communities[0])
community2_gsea = communityGSEA(communities[1]) 
community3_gsea = communityGSEA(communities[2])
# Display the top results for the first community

### YOUR CODE HERE ###
community1_gsea.res2d.head()
Name Term ES NES NOM p-val FDR q-val FWER p-val Tag % Gene % Lead_genes
0 gsea Epstein-Barr virus infection 0.853817 1.789511 0.0 0.324881 0.14 10/11 16.83% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;HLA-DQA1;BTK;HLA-DRB5
1 gsea Leishmaniasis 0.667361 1.767749 0.040816 0.181183 0.17 11/18 16.83% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;NCF4;NCF2;HLA-DQB1;HLA-DQA1;HLA-DRB5
2 gsea Influenza A 0.805713 1.723862 0.0 0.191596 0.2 10/12 16.83% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;CASP1;HLA-DPA1;HLA-DQB1;HLA-DQA1;HLA-DRB5
3 gsea Phagosome 0.595196 1.717777 0.041667 0.149945 0.21 16/22 34.65% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;NCF4;NCF2;HLA-DQB1;HLA-DQA1;HLA-DRB5;MSR1;HLA-DOA;MARCO;ITGB2;MRC1
4 gsea Cell adhesion molecules 0.649813 1.710744 0.04 0.127453 0.22 10/16 16.83% HLA-DRB1;HLA-DMA;HLA-DPB1;HLA-DRA;HLA-DMB;HLA-DPA1;HLA-DQB1;SELPLG;HLA-DQA1;HLA-DRB5
# Extract the enriched terms from the GSEA results for the first community
terms = community1_gsea.res2d.Term

# Plot the top 5 enriched terms for the first community
axs = community1_gsea.plot(terms[:5], show_ranking=False, legend_kws={'loc': (1.05, 0)})
../_images/7fc1f16fb1d70904d906e0ded3bd518c36e938a5de872297aed46fcf8d44564b.png