Perturbation¶
This guide demonstrates how to curate a complex, real world perturbation dataset McFarland et al. 2020 using the wetlab
schema.
# !pip install 'lamindb[jupyter,aws,bionty]' wetlab
!lamin init --storage ./test-perturbation --schema bionty,wetlab
→ connected lamindb: testuser1/test-perturbation
import lamindb as ln
import bionty as bt
import wetlab as wl
import pandas as pd
pd.set_option("display.max_columns", None)
ln.context.uid = "K6sInKIQW5nt0002"
ln.context.track()
Show code cell output
→ connected lamindb: testuser1/test-perturbation
→ notebook imports: bionty==0.50.2 lamindb==0.76.8 pandas==2.2.3 wetlab==0.33.0
→ created Transform(uid='K6sInKIQW5nt0002') & created Run(started_at='2024-09-25 20:01:53 UTC')
# See https://lamin.ai/laminlabs/lamindata/transform/13VINnFk89PE0004 to learn how this dataset was prepared
adata = ln.Artifact.using("laminlabs/lamindata").get(uid="Xk7Qaik9vBLV4PKf0000").load()
adata.obs.head(3)
Show code cell output
depmap_id | cancer | cell_det_rate | cell_line | cell_quality | channel | disease | dose_unit | dose_value | doublet_CL1 | doublet_CL2 | doublet_GMM_prob | doublet_dev_imp | doublet_z_margin | hash_assignment | hash_tag | num_SNPs | organism | perturbation | perturbation_type | sex | singlet_ID | singlet_dev | singlet_dev_z | singlet_margin | singlet_z_margin | time | tissue_type | tot_reads | nperts | ngenes | ncounts | percent_mito | percent_ribo | chembl-ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
AACTGGTGTCTCTCTG | ACH-000390 | True | 0.093159 | LUDLU-1 | normal | nan | lung cancer | µM | 0.1 | LUDLU1_LUNG | TE14_OESOPHAGUS | 2.269468e-10 | 0.009426 | 0.403316 | nan | nan | 481 | human | trametinib | drug | Male | LUDLU1_LUNG | 0.655877 | 14.860933 | 0.462273 | 12.351139 | 24 | cell_line | 787 | 1 | 3045 | 12895.0 | 3.202792 | 24.955409 | CHEMBL2103875 |
ATAGGCTCAGATTTCG | ACH-000444 | True | 0.145728 | LU99 | normal | 2 | lung cancer | µM | 0.5 | LU99_LUNG | MCAS_OVARY | 8.562908e-04 | 0.010173 | 0.188284 | nan | nan | 1003 | human | afatinib | drug | Male | LU99_LUNG | 0.762847 | 10.648094 | 0.474590 | 8.164565 | 24 | cell_line | 1597 | 1 | 4763 | 23161.0 | 7.473771 | 18.051898 | CHEMBL1173655 |
GCCAAATCAAGCCGTC | ACH-000396 | True | 0.117330 | J82 | normal | nan | urinary bladder carcinoma | µM | 0.1 | J82_URINARY_TRACT | IGR1_SKIN | 6.490367e-08 | 0.009686 | 1.185862 | nan | nan | 647 | human | dabrafenib | drug | Male | J82_URINARY_TRACT | 0.651059 | 14.740111 | 0.404508 | 11.188513 | 24 | cell_line | 1159 | 1 | 3834 | 18062.0 | 2.762706 | 22.085040 | CHEMBL2028663 |
curate = ln.Curator.from_anndata(
adata,
var_index=bt.Gene.ensembl_gene_id,
organism="human",
sources={"var_index": bt.Source.filter(entity="bionty.Gene", version="release-112", organism="human").one()}
)
curate.validate()
• 35 non-validated values are not saved in Feature.name: ['perturbation_type', 'singlet_dev_z', 'disease', 'singlet_ID', 'sex', 'singlet_z_margin', 'doublet_CL1', 'hash_tag', 'cell_quality', 'organism', 'cell_det_rate', 'num_SNPs', 'singlet_margin', 'depmap_id', 'doublet_CL2', 'channel', 'percent_ribo', 'tissue_type', 'singlet_dev', 'ngenes', 'percent_mito', 'doublet_GMM_prob', 'hash_assignment', 'ncounts', 'time', 'cell_line', 'chembl-ID', 'dose_value', 'doublet_dev_imp', 'perturbation', 'tot_reads', 'nperts', 'doublet_z_margin', 'dose_unit', 'cancer']!
→ to lookup values, use lookup().columns
→ to save, run add_new_from_columns
✓ created 1 Organism record from Bionty matching name: 'human'
• mapping var_index on Gene.ensembl_gene_id
! found 1279 validated terms: ['ENSG00000102316', 'ENSG00000109472', 'ENSG00000080007', 'ENSG00000203926', 'ENSG00000127419', 'ENSG00000108960', 'ENSG00000126870', 'ENSG00000121797', 'ENSG00000243927', 'ENSG00000143473', 'ENSG00000115665', 'ENSG00000180613', 'ENSG00000167283', 'ENSG00000160472', 'ENSG00000110768', 'ENSG00000124507', 'ENSG00000257381', 'ENSG00000129451', 'ENSG00000228237', 'ENSG00000137033', 'ENSG00000120868', 'ENSG00000079616', 'ENSG00000177082', 'ENSG00000227392', 'ENSG00000251620', 'ENSG00000174804', 'ENSG00000057935', 'ENSG00000251493', 'ENSG00000164182', 'ENSG00000112033', 'ENSG00000236388', 'ENSG00000116039', 'ENSG00000131095', 'ENSG00000170956', 'ENSG00000104870', 'ENSG00000259494', 'ENSG00000116906', 'ENSG00000173599', 'ENSG00000187080', 'ENSG00000167258', 'ENSG00000131473', 'ENSG00000107742', 'ENSG00000144791', 'ENSG00000198286', 'ENSG00000196267', 'ENSG00000165966', 'ENSG00000124383', 'ENSG00000154957', 'ENSG00000196689', 'ENSG00000133627', 'ENSG00000049656', 'ENSG00000163918', 'ENSG00000163517', 'ENSG00000166133', 'ENSG00000003393', 'ENSG00000186628', 'ENSG00000136002', 'ENSG00000163629', 'ENSG00000136319', 'ENSG00000189159', 'ENSG00000166770', 'ENSG00000167548', 'ENSG00000134780', 'ENSG00000257127', 'ENSG00000205571', 'ENSG00000128309', 'ENSG00000111145', 'ENSG00000164587', 'ENSG00000096395', 'ENSG00000188820', 'ENSG00000148408', 'ENSG00000111554', 'ENSG00000140943', 'ENSG00000133101', 'ENSG00000148655', 'ENSG00000073969', 'ENSG00000231671', 'ENSG00000198919', 'ENSG00000107331', 'ENSG00000120440', 'ENSG00000111801', 'ENSG00000267013', 'ENSG00000259060', 'ENSG00000117395', 'ENSG00000198938', 'ENSG00000204595', 'ENSG00000183304', 'ENSG00000165102', 'ENSG00000162460', 'ENSG00000150471', 'ENSG00000137634', 'ENSG00000106688', 'ENSG00000254413', 'ENSG00000112282', 'ENSG00000101146', 'ENSG00000162004', 'ENSG00000175768', 'ENSG00000171067', 'ENSG00000103365', 'ENSG00000206052', 'ENSG00000168288', 'ENSG00000240476', 'ENSG00000116833', 'ENSG00000111331', 'ENSG00000215252', 'ENSG00000143355', 'ENSG00000270800', 'ENSG00000161692', 'ENSG00000089053', 'ENSG00000189007', 'ENSG00000182986', 'ENSG00000107859', 'ENSG00000099203', 'ENSG00000171861', 'ENSG00000125744', 'ENSG00000204880', 'ENSG00000239877', 'ENSG00000229619', 'ENSG00000273155', 'ENSG00000166741', 'ENSG00000117139', 'ENSG00000153975', 'ENSG00000101188', 'ENSG00000146039', 'ENSG00000196666', 'ENSG00000146733', 'ENSG00000251022', 'ENSG00000163218', 'ENSG00000169857', 'ENSG00000206432', 'ENSG00000128617', 'ENSG00000142173', 'ENSG00000179115', 'ENSG00000117318', 'ENSG00000131459', 'ENSG00000178403', 'ENSG00000038295', 'ENSG00000148288', 'ENSG00000242612', 'ENSG00000082781', 'ENSG00000139684', 'ENSG00000186272', 'ENSG00000167604', 'ENSG00000111049', 'ENSG00000149357', 'ENSG00000132975', 'ENSG00000166823', 'ENSG00000102871', 'ENSG00000205155', 'ENSG00000183153', 'ENSG00000120438', 'ENSG00000157045', 'ENSG00000179751', 'ENSG00000140259', 'ENSG00000269058', 'ENSG00000171817', 'ENSG00000130383', 'ENSG00000155438', 'ENSG00000152433', 'ENSG00000204463', 'ENSG00000146521', 'ENSG00000174950', 'ENSG00000144741', 'ENSG00000174403', 'ENSG00000062096', 'ENSG00000120656', 'ENSG00000009790', 'ENSG00000166037', 'ENSG00000147650', 'ENSG00000127325', 'ENSG00000139209', 'ENSG00000153317', 'ENSG00000132475', 'ENSG00000006453', 'ENSG00000160813', 'ENSG00000138629', 'ENSG00000166450', 'ENSG00000175634', 'ENSG00000163254', 'ENSG00000261206', 'ENSG00000167608', 'ENSG00000213619', 'ENSG00000109171', 'ENSG00000116685', 'ENSG00000251258', 'ENSG00000062038', 'ENSG00000110497', 'ENSG00000069509', 'ENSG00000231944', 'ENSG00000166171', 'ENSG00000082512', 'ENSG00000162377', 'ENSG00000236311', 'ENSG00000146410', 'ENSG00000105369', 'ENSG00000137275', 'ENSG00000132846', 'ENSG00000160200', 'ENSG00000092148', 'ENSG00000184895', 'ENSG00000076351', 'ENSG00000011198', 'ENSG00000134531', 'ENSG00000163157', 'ENSG00000130584', 'ENSG00000174547', 'ENSG00000099994', 'ENSG00000146963', 'ENSG00000229557', 'ENSG00000197683', 'ENSG00000185033', 'ENSG00000175548', 'ENSG00000072133', 'ENSG00000205085', 'ENSG00000137040', 'ENSG00000180138', 'ENSG00000173905', 'ENSG00000137166', 'ENSG00000177294', 'ENSG00000114942', 'ENSG00000112658', 'ENSG00000150594', 'ENSG00000164631', 'ENSG00000151650', 'ENSG00000223638', 'ENSG00000229086', 'ENSG00000198542', 'ENSG00000196436', 'ENSG00000175854', 'ENSG00000255181', 'ENSG00000101004', 'ENSG00000217930', 'ENSG00000077935', 'ENSG00000172340', 'ENSG00000023191', 'ENSG00000189164', 'ENSG00000177990', 'ENSG00000179873', 'ENSG00000187772', 'ENSG00000155729', 'ENSG00000124440', 'ENSG00000142694', 'ENSG00000105875', 'ENSG00000158828', 'ENSG00000111371', 'ENSG00000159337', 'ENSG00000176153', 'ENSG00000168702', 'ENSG00000235718', 'ENSG00000134873', 'ENSG00000141096', 'ENSG00000116786', 'ENSG00000054938', 'ENSG00000239225', 'ENSG00000086967', 'ENSG00000171431', 'ENSG00000188263', 'ENSG00000170807', 'ENSG00000007129', 'ENSG00000168234', 'ENSG00000173141', 'ENSG00000244395', 'ENSG00000100181', 'ENSG00000118898', 'ENSG00000185960', 'ENSG00000168269', 'ENSG00000084112', 'ENSG00000096872', 'ENSG00000175066', 'ENSG00000203952', 'ENSG00000196767', 'ENSG00000138381', 'ENSG00000142661', 'ENSG00000198815', 'ENSG00000204538', 'ENSG00000118200', 'ENSG00000147655', 'ENSG00000145194', 'ENSG00000213088', 'ENSG00000177272', 'ENSG00000126464', 'ENSG00000175087', 'ENSG00000011021', 'ENSG00000125834', 'ENSG00000197766', 'ENSG00000119514', 'ENSG00000165471', 'ENSG00000172469', 'ENSG00000176142', 'ENSG00000173786', 'ENSG00000247473', 'ENSG00000119943', 'ENSG00000188342', 'ENSG00000121481', 'ENSG00000080815', 'ENSG00000165506', 'ENSG00000161835', 'ENSG00000184117', 'ENSG00000188649', 'ENSG00000114480', 'ENSG00000134333', 'ENSG00000110900', 'ENSG00000082293', 'ENSG00000151640', 'ENSG00000059377', 'ENSG00000125514', 'ENSG00000148468', 'ENSG00000148110', 'ENSG00000039139', 'ENSG00000116151', 'ENSG00000197620', 'ENSG00000154025', 'ENSG00000151690', 'ENSG00000188523', 'ENSG00000196961', 'ENSG00000048740', 'ENSG00000225473', 'ENSG00000020256', 'ENSG00000106683', 'ENSG00000105204', 'ENSG00000136352', 'ENSG00000183281', 'ENSG00000254377', 'ENSG00000180596', 'ENSG00000138347', 'ENSG00000157335', 'ENSG00000227051', 'ENSG00000187624', 'ENSG00000244462', 'ENSG00000250091', 'ENSG00000142207', 'ENSG00000164663', 'ENSG00000003756', 'ENSG00000170523', 'ENSG00000141431', 'ENSG00000106404', 'ENSG00000254656', 'ENSG00000174348', 'ENSG00000130827', 'ENSG00000145391', 'ENSG00000169499', 'ENSG00000233349', 'ENSG00000130700', 'ENSG00000159128', 'ENSG00000242715', 'ENSG00000140526', 'ENSG00000212747', 'ENSG00000214827', 'ENSG00000181982', 'ENSG00000223914', 'ENSG00000204351', 'ENSG00000185261', 'ENSG00000151388', 'ENSG00000184345', 'ENSG00000172782', 'ENSG00000005302', 'ENSG00000127515', 'ENSG00000136938', 'ENSG00000169750', 'ENSG00000213588', 'ENSG00000186862', 'ENSG00000151445', 'ENSG00000106609', 'ENSG00000120436', 'ENSG00000180872', 'ENSG00000116977', 'ENSG00000100522', 'ENSG00000133706', 'ENSG00000248920', 'ENSG00000184436', 'ENSG00000125247', 'ENSG00000164621', 'ENSG00000104218', 'ENSG00000111834', 'ENSG00000234323', 'ENSG00000260286', 'ENSG00000082068', 'ENSG00000205111', 'ENSG00000166261', 'ENSG00000197580', 'ENSG00000168214', 'ENSG00000018625', 'ENSG00000236637', 'ENSG00000165006', 'ENSG00000113575', 'ENSG00000140396', 'ENSG00000145337', 'ENSG00000135677', 'ENSG00000151379', 'ENSG00000169155', 'ENSG00000164761', 'ENSG00000234527', 'ENSG00000149633', 'ENSG00000079974', 'ENSG00000108474', 'ENSG00000103061', 'ENSG00000253457', 'ENSG00000120057', 'ENSG00000176230', 'ENSG00000043514', 'ENSG00000167483', 'ENSG00000145781', 'ENSG00000134775', 'ENSG00000086991', 'ENSG00000138107', 'ENSG00000149150', 'ENSG00000003989', 'ENSG00000063438', 'ENSG00000175643', 'ENSG00000198062', 'ENSG00000188766', 'ENSG00000089094', 'ENSG00000109814', 'ENSG00000173467', 'ENSG00000110066', 'ENSG00000125879', 'ENSG00000004838', 'ENSG00000109113', 'ENSG00000120594', 'ENSG00000108094', 'ENSG00000169016', 'ENSG00000125148', 'ENSG00000257591', 'ENSG00000242689', 'ENSG00000141198', 'ENSG00000080709', 'ENSG00000142864', 'ENSG00000156675', 'ENSG00000157557', 'ENSG00000148215', 'ENSG00000111247', 'ENSG00000173715', 'ENSG00000205220', 'ENSG00000213799', 'ENSG00000173702', 'ENSG00000188729', 'ENSG00000099800', 'ENSG00000181555', 'ENSG00000234684', 'ENSG00000111481', 'ENSG00000185630', 'ENSG00000136840', 'ENSG00000164047', 'ENSG00000139726', 'ENSG00000243477', 'ENSG00000183401', 'ENSG00000168993', 'ENSG00000239839', 'ENSG00000179284', 'ENSG00000140992', 'ENSG00000135363', 'ENSG00000115425', 'ENSG00000121380', 'ENSG00000119041', 'ENSG00000123243', 'ENSG00000139192', 'ENSG00000165632', 'ENSG00000114054', 'ENSG00000049860', 'ENSG00000248698', 'ENSG00000168591', 'ENSG00000132561', 'ENSG00000139180', 'ENSG00000166987', 'ENSG00000167384', 'ENSG00000156140', 'ENSG00000119686', 'ENSG00000228567', 'ENSG00000182810', 'ENSG00000234230', 'ENSG00000125144', 'ENSG00000126775', 'ENSG00000240770', 'ENSG00000205126', 'ENSG00000172840', 'ENSG00000103381', 'ENSG00000178057', 'ENSG00000108064', 'ENSG00000120322', 'ENSG00000139428', 'ENSG00000173473', 'ENSG00000181458', 'ENSG00000101474', 'ENSG00000165209', 'ENSG00000046651', 'ENSG00000268061', 'ENSG00000221963', 'ENSG00000165325', 'ENSG00000100884', 'ENSG00000238189', 'ENSG00000241962', 'ENSG00000148803', 'ENSG00000175206', 'ENSG00000120805', 'ENSG00000049249', 'ENSG00000090776', 'ENSG00000070731', 'ENSG00000148339', 'ENSG00000113194', 'ENSG00000110628', 'ENSG00000147873', 'ENSG00000164600', 'ENSG00000126705', 'ENSG00000196502', 'ENSG00000154473', 'ENSG00000143921', 'ENSG00000105609', 'ENSG00000184076', 'ENSG00000119283', 'ENSG00000196361', 'ENSG00000109775', 'ENSG00000179869', 'ENSG00000123213', 'ENSG00000169836', 'ENSG00000184650', 'ENSG00000138459', 'ENSG00000163734', 'ENSG00000106028', 'ENSG00000100652', 'ENSG00000251442', 'ENSG00000167491', 'ENSG00000170748', 'ENSG00000166965', 'ENSG00000203661', 'ENSG00000108061', 'ENSG00000203857', 'ENSG00000131398', 'ENSG00000143632', 'ENSG00000167914', 'ENSG00000221988', 'ENSG00000246016', 'ENSG00000139330', 'ENSG00000101773', 'ENSG00000233718', 'ENSG00000204481', 'ENSG00000173918', 'ENSG00000234186', 'ENSG00000186451', 'ENSG00000114124', 'ENSG00000178741', 'ENSG00000196407', 'ENSG00000119986', 'ENSG00000161551', 'ENSG00000105698', 'ENSG00000164756', 'ENSG00000205403', 'ENSG00000115091', 'ENSG00000173208', 'ENSG00000197651', 'ENSG00000203721', 'ENSG00000156253', 'ENSG00000196341', 'ENSG00000131116', 'ENSG00000141696', 'ENSG00000081692', 'ENSG00000126368', 'ENSG00000088002', 'ENSG00000090266', 'ENSG00000162909', 'ENSG00000186684', 'ENSG00000188038', 'ENSG00000153446', 'ENSG00000106771', 'ENSG00000078295', 'ENSG00000112053', 'ENSG00000117153', 'ENSG00000198843', 'ENSG00000164010', 'ENSG00000144120', 'ENSG00000187581', 'ENSG00000070770', 'ENSG00000148341', 'ENSG00000146360', 'ENSG00000137936', 'ENSG00000123338', 'ENSG00000120733', 'ENSG00000139154', 'ENSG00000268654', 'ENSG00000254415', 'ENSG00000198755', 'ENSG00000101435', 'ENSG00000118369', 'ENSG00000189067', 'ENSG00000118004', 'ENSG00000160808', 'ENSG00000178988', 'ENSG00000179144', 'ENSG00000184814', 'ENSG00000171634', 'ENSG00000136682', 'ENSG00000114544', 'ENSG00000115008', 'ENSG00000153246', 'ENSG00000269858', 'ENSG00000159212', 'ENSG00000214842', 'ENSG00000134757', 'ENSG00000131849', 'ENSG00000157087', 'ENSG00000268658', 'ENSG00000205129', 'ENSG00000212856', 'ENSG00000164002', 'ENSG00000147099', 'ENSG00000100321', 'ENSG00000163528', 'ENSG00000215403', 'ENSG00000165113', 'ENSG00000137522', 'ENSG00000125637', 'ENSG00000215644', 'ENSG00000181036', 'ENSG00000175274', 'ENSG00000205544', 'ENSG00000233975', 'ENSG00000261618', 'ENSG00000233818', 'ENSG00000108846', 'ENSG00000235268', 'ENSG00000143502', 'ENSG00000126709', 'ENSG00000170289', 'ENSG00000134215', 'ENSG00000240972', 'ENSG00000125730', 'ENSG00000204961', 'ENSG00000183783', 'ENSG00000172243', 'ENSG00000196787', 'ENSG00000085449', 'ENSG00000186020', 'ENSG00000198185', 'ENSG00000153015', 'ENSG00000248330', 'ENSG00000137752', 'ENSG00000232401', 'ENSG00000233701', 'ENSG00000181104', 'ENSG00000160050', 'ENSG00000089195', 'ENSG00000006576', 'ENSG00000073536', 'ENSG00000107874', 'ENSG00000076043', 'ENSG00000163527', 'ENSG00000121310', 'ENSG00000186081', 'ENSG00000035115', 'ENSG00000245146', 'ENSG00000135241', 'ENSG00000124356', 'ENSG00000175322', 'ENSG00000100095', 'ENSG00000101391', 'ENSG00000064703', 'ENSG00000144451', 'ENSG00000131471', 'ENSG00000102103', 'ENSG00000099381', 'ENSG00000144214', 'ENSG00000197885', 'ENSG00000108384', 'ENSG00000151208', 'ENSG00000204764', 'ENSG00000249641', 'ENSG00000186318', 'ENSG00000151694', 'ENSG00000140807', 'ENSG00000115694', 'ENSG00000184445', 'ENSG00000184009', 'ENSG00000007944', 'ENSG00000054654', 'ENSG00000085998', 'ENSG00000164252', 'ENSG00000174197', 'ENSG00000205726', 'ENSG00000168477', 'ENSG00000167257', 'ENSG00000160993', 'ENSG00000177202', 'ENSG00000115526', 'ENSG00000125337', 'ENSG00000205022', 'ENSG00000168612', 'ENSG00000111252', 'ENSG00000172081', 'ENSG00000170627', 'ENSG00000235412', 'ENSG00000100938', 'ENSG00000158639', 'ENSG00000122565', 'ENSG00000197496', 'ENSG00000126883', 'ENSG00000163235', 'ENSG00000151090', 'ENSG00000236371', 'ENSG00000141837', 'ENSG00000235385', 'ENSG00000167104', 'ENSG00000076003', 'ENSG00000236053', 'ENSG00000198785', 'ENSG00000119614', 'ENSG00000198954', 'ENSG00000205930', 'ENSG00000159556', 'ENSG00000250021', 'ENSG00000103319', 'ENSG00000139132', 'ENSG00000224122', 'ENSG00000107404', 'ENSG00000124091', 'ENSG00000151224', 'ENSG00000120694', 'ENSG00000234438', 'ENSG00000233070', 'ENSG00000156804', 'ENSG00000064270', 'ENSG00000100934', 'ENSG00000117984', 'ENSG00000176909', 'ENSG00000154143', 'ENSG00000248405', 'ENSG00000126261', 'ENSG00000234602', 'ENSG00000121067', 'ENSG00000167757', 'ENSG00000162373', 'ENSG00000115282', 'ENSG00000112144', 'ENSG00000154162', 'ENSG00000136240', 'ENSG00000124253', 'ENSG00000177614', 'ENSG00000151135', 'ENSG00000161914', 'ENSG00000172020', 'ENSG00000140319', 'ENSG00000178055', 'ENSG00000182111', 'ENSG00000155066', 'ENSG00000152804', 'ENSG00000157703', 'ENSG00000233421', 'ENSG00000110047', 'ENSG00000094963', 'ENSG00000163785', 'ENSG00000100151', 'ENSG00000197177', 'ENSG00000180776', 'ENSG00000106144', 'ENSG00000176904', 'ENSG00000184302', 'ENSG00000204464', 'ENSG00000111817', 'ENSG00000176742', 'ENSG00000103021', 'ENSG00000238057', 'ENSG00000154328', 'ENSG00000261485', 'ENSG00000230453', 'ENSG00000231500', 'ENSG00000136758', 'ENSG00000005075', 'ENSG00000213533', 'ENSG00000261832', 'ENSG00000196704', 'ENSG00000204301', 'ENSG00000177485', 'ENSG00000002822', 'ENSG00000204323', 'ENSG00000115274', 'ENSG00000106560', 'ENSG00000261043', 'ENSG00000174957', 'ENSG00000224186', 'ENSG00000100908', 'ENSG00000187098', 'ENSG00000256683', 'ENSG00000104907', 'ENSG00000115561', 'ENSG00000175115', 'ENSG00000125753', 'ENSG00000256769', 'ENSG00000228889', 'ENSG00000122787', 'ENSG00000143612', 'ENSG00000010278', 'ENSG00000166130', 'ENSG00000261498', 'ENSG00000171867', 'ENSG00000091483', 'ENSG00000084207', 'ENSG00000188266', 'ENSG00000204138', 'ENSG00000175514', 'ENSG00000125629', 'ENSG00000129824', 'ENSG00000183617', 'ENSG00000111653', 'ENSG00000196366', 'ENSG00000071859', 'ENSG00000133124', 'ENSG00000187838', 'ENSG00000147799', 'ENSG00000109991', 'ENSG00000242516', 'ENSG00000136156', 'ENSG00000143147', 'ENSG00000223501', 'ENSG00000173200', 'ENSG00000106415', 'ENSG00000142606', 'ENSG00000165669', 'ENSG00000112232', 'ENSG00000175354', 'ENSG00000139151', 'ENSG00000135093', 'ENSG00000188092', 'ENSG00000135912', 'ENSG00000114166', 'ENSG00000261235', 'ENSG00000170801', 'ENSG00000221882', 'ENSG00000135334', 'ENSG00000152503', 'ENSG00000183741', 'ENSG00000171466', 'ENSG00000255974', 'ENSG00000117419', 'ENSG00000132321', 'ENSG00000182022', 'ENSG00000224405', 'ENSG00000104888', 'ENSG00000179397', 'ENSG00000105707', 'ENSG00000165914', 'ENSG00000170921', 'ENSG00000049089', 'ENSG00000108786', 'ENSG00000155906', 'ENSG00000232225', 'ENSG00000185985', 'ENSG00000134802', 'ENSG00000169418', 'ENSG00000163281', 'ENSG00000197586', 'ENSG00000204435', 'ENSG00000196166', 'ENSG00000177462', 'ENSG00000248397', 'ENSG00000171488', 'ENSG00000179407', 'ENSG00000120341', 'ENSG00000100811', 'ENSG00000239713', 'ENSG00000105486', 'ENSG00000163312', 'ENSG00000101639', 'ENSG00000162073', 'ENSG00000163389', 'ENSG00000171942', 'ENSG00000236822', 'ENSG00000089639', 'ENSG00000166016', 'ENSG00000109861', 'ENSG00000141562', 'ENSG00000224559', 'ENSG00000124237', 'ENSG00000084444', 'ENSG00000092295', 'ENSG00000155111', 'ENSG00000235641', 'ENSG00000243709', 'ENSG00000139329', 'ENSG00000091583', 'ENSG00000154319', 'ENSG00000205174', 'ENSG00000085265', 'ENSG00000171824', 'ENSG00000213057', 'ENSG00000178591', 'ENSG00000157191', 'ENSG00000249348', 'ENSG00000188676', 'ENSG00000232307', 'ENSG00000155511', 'ENSG00000164334', 'ENSG00000198039', 'ENSG00000042304', 'ENSG00000172264', 'ENSG00000173976', 'ENSG00000163374', 'ENSG00000134321', 'ENSG00000188162', 'ENSG00000246100', 'ENSG00000104818', 'ENSG00000157985', 'ENSG00000178694', 'ENSG00000112305', 'ENSG00000159579', 'ENSG00000164920', 'ENSG00000214706', 'ENSG00000144749', 'ENSG00000145220', 'ENSG00000067048', 'ENSG00000227356', 'ENSG00000130758', 'ENSG00000165516', 'ENSG00000131873', 'ENSG00000125863', 'ENSG00000010932', 'ENSG00000131462', 'ENSG00000008083', 'ENSG00000157219', 'ENSG00000224459', 'ENSG00000125746', 'ENSG00000116809', 'ENSG00000106927', 'ENSG00000168904', 'ENSG00000112599', 'ENSG00000097021', 'ENSG00000068305', 'ENSG00000250349', 'ENSG00000147100', 'ENSG00000197046', 'ENSG00000175646', 'ENSG00000125878', 'ENSG00000197283', 'ENSG00000235699', 'ENSG00000184967', 'ENSG00000227695', 'ENSG00000176194', 'ENSG00000110046', 'ENSG00000188582', 'ENSG00000253873', 'ENSG00000162755', 'ENSG00000161652', 'ENSG00000091010', 'ENSG00000196944', 'ENSG00000155970', 'ENSG00000122679', 'ENSG00000106302', 'ENSG00000134595', 'ENSG00000110583', 'ENSG00000170889', 'ENSG00000232192', 'ENSG00000095321', 'ENSG00000156471', 'ENSG00000135406', 'ENSG00000164619', 'ENSG00000189350', 'ENSG00000183251', 'ENSG00000058272', 'ENSG00000177150', 'ENSG00000106635', 'ENSG00000109424', 'ENSG00000133943', 'ENSG00000081051', 'ENSG00000147234', 'ENSG00000132274', 'ENSG00000258710', 'ENSG00000251173', 'ENSG00000125388', 'ENSG00000225234', 'ENSG00000119509', 'ENSG00000006283', 'ENSG00000250317', 'ENSG00000234056', 'ENSG00000188000', 'ENSG00000109099', 'ENSG00000182185', 'ENSG00000167601', 'ENSG00000109670', 'ENSG00000249459', 'ENSG00000137404', 'ENSG00000162630', 'ENSG00000117758', 'ENSG00000105287', 'ENSG00000134909', 'ENSG00000175806', 'ENSG00000158201', 'ENSG00000001460', 'ENSG00000203362', 'ENSG00000185477', 'ENSG00000187189', 'ENSG00000101310', 'ENSG00000163762', 'ENSG00000136457', 'ENSG00000065518', 'ENSG00000141391', 'ENSG00000172466', 'ENSG00000128656', 'ENSG00000155966', 'ENSG00000151704', 'ENSG00000157796', 'ENSG00000163406', 'ENSG00000120833', 'ENSG00000105928', 'ENSG00000197745', 'ENSG00000111877', 'ENSG00000071282', 'ENSG00000234478', 'ENSG00000110921', 'ENSG00000227659', 'ENSG00000134258', 'ENSG00000146776', 'ENSG00000137714', 'ENSG00000169906', 'ENSG00000155324', 'ENSG00000072518', 'ENSG00000237489', 'ENSG00000241211', 'ENSG00000174483', 'ENSG00000115355', 'ENSG00000124145', 'ENSG00000131143', 'ENSG00000078814', 'ENSG00000128185', 'ENSG00000215529', 'ENSG00000125848', 'ENSG00000143033', 'ENSG00000100221', 'ENSG00000137413', 'ENSG00000179262', 'ENSG00000169126', 'ENSG00000179271', 'ENSG00000177476', 'ENSG00000181513', 'ENSG00000153157', 'ENSG00000125868', 'ENSG00000146674', 'ENSG00000242242', 'ENSG00000103507', 'ENSG00000181722', 'ENSG00000132026', 'ENSG00000125046', 'ENSG00000171606', 'ENSG00000166800', 'ENSG00000106992', 'ENSG00000136720', 'ENSG00000143595', 'ENSG00000180233', 'ENSG00000101463', 'ENSG00000035403', 'ENSG00000081913', 'ENSG00000205250', 'ENSG00000180815', 'ENSG00000139287', 'ENSG00000138594', 'ENSG00000224051', 'ENSG00000127564', 'ENSG00000198829', 'ENSG00000180354', 'ENSG00000255804', 'ENSG00000001167', 'ENSG00000135632', 'ENSG00000128989', 'ENSG00000130997', 'ENSG00000188909', 'ENSG00000250748', 'ENSG00000227488', 'ENSG00000172785', 'ENSG00000121068', 'ENSG00000031081', 'ENSG00000107938', 'ENSG00000120533', 'ENSG00000197694', 'ENSG00000167797', 'ENSG00000127252', 'ENSG00000203909', 'ENSG00000142534', 'ENSG00000204516', 'ENSG00000215568', 'ENSG00000162413', 'ENSG00000178734', 'ENSG00000184557', 'ENSG00000100418', 'ENSG00000180353', 'ENSG00000106266', 'ENSG00000105270', 'ENSG00000177885', 'ENSG00000164045', 'ENSG00000167588', 'ENSG00000188906', 'ENSG00000143032', 'ENSG00000135914', 'ENSG00000189280', 'ENSG00000154016', 'ENSG00000104983', 'ENSG00000096433', 'ENSG00000172404', 'ENSG00000138756', 'ENSG00000126254', 'ENSG00000162885', 'ENSG00000270011', 'ENSG00000205846', 'ENSG00000260903', 'ENSG00000180822', 'ENSG00000078795', 'ENSG00000172273', 'ENSG00000196227', 'ENSG00000258839', 'ENSG00000179213', 'ENSG00000114573', 'ENSG00000008196', 'ENSG00000183734', 'ENSG00000168907', 'ENSG00000089041', 'ENSG00000212657', 'ENSG00000169154', 'ENSG00000175329', 'ENSG00000165837', 'ENSG00000137252', 'ENSG00000100721', 'ENSG00000239886', 'ENSG00000162383', 'ENSG00000214013', 'ENSG00000172380', 'ENSG00000144668', 'ENSG00000223547', 'ENSG00000138071', 'ENSG00000073417', 'ENSG00000174255', 'ENSG00000197016', 'ENSG00000172115', 'ENSG00000122711', 'ENSG00000196420', 'ENSG00000102243', 'ENSG00000075151', 'ENSG00000198001', 'ENSG00000186583', 'ENSG00000165626', 'ENSG00000233610', 'ENSG00000132950', 'ENSG00000143061', 'ENSG00000164830', 'ENSG00000143622', 'ENSG00000111885', 'ENSG00000107317', 'ENSG00000250133', 'ENSG00000231310', 'ENSG00000156006', 'ENSG00000140988', 'ENSG00000175018', 'ENSG00000058085', 'ENSG00000174914', 'ENSG00000105472', 'ENSG00000142149', 'ENSG00000183067', 'ENSG00000004975', 'ENSG00000205858', 'ENSG00000164885', 'ENSG00000186448', 'ENSG00000186493', 'ENSG00000204379', 'ENSG00000144227', 'ENSG00000168876', 'ENSG00000142910', 'ENSG00000262074', 'ENSG00000159184', 'ENSG00000113648', 'ENSG00000223882', 'ENSG00000198454', 'ENSG00000175482', 'ENSG00000254535', 'ENSG00000057019', 'ENSG00000204475', 'ENSG00000186395', 'ENSG00000184481', 'ENSG00000076356', 'ENSG00000101981', 'ENSG00000157978', 'ENSG00000177335', 'ENSG00000205078', 'ENSG00000224418', 'ENSG00000122877', 'ENSG00000237232', 'ENSG00000204444', 'ENSG00000248079', 'ENSG00000154760', 'ENSG00000146477', 'ENSG00000003137', 'ENSG00000164128', 'ENSG00000000003', 'ENSG00000149781', 'ENSG00000198400', 'ENSG00000124003', 'ENSG00000175591', 'ENSG00000179165', 'ENSG00000174827', 'ENSG00000259070', 'ENSG00000125910', 'ENSG00000055332', 'ENSG00000105669', 'ENSG00000115461', 'ENSG00000120008', 'ENSG00000105205', 'ENSG00000143867', 'ENSG00000147439', 'ENSG00000135175', 'ENSG00000181718', 'ENSG00000184905', 'ENSG00000126233', 'ENSG00000258701', 'ENSG00000198301', 'ENSG00000108379', 'ENSG00000150667', 'ENSG00000163995', 'ENSG00000079999', 'ENSG00000149743', 'ENSG00000166961', 'ENSG00000221923', 'ENSG00000183624', 'ENSG00000162402', 'ENSG00000163636', 'ENSG00000110717', 'ENSG00000168843', 'ENSG00000138395', 'ENSG00000085415', 'ENSG00000126391', 'ENSG00000169684', 'ENSG00000185896', 'ENSG00000181027', 'ENSG00000000971', 'ENSG00000091106', 'ENSG00000140675', 'ENSG00000196074', 'ENSG00000105971', 'ENSG00000225255', 'ENSG00000118985', 'ENSG00000172828', 'ENSG00000054277', 'ENSG00000214681', 'ENSG00000250334', 'ENSG00000139343', 'ENSG00000162552', 'ENSG00000164185', 'ENSG00000107882', 'ENSG00000251655', 'ENSG00000136535', 'ENSG00000087269', 'ENSG00000267374', 'ENSG00000182117', 'ENSG00000214941', 'ENSG00000134153', 'ENSG00000251015', 'ENSG00000171916', 'ENSG00000177398', 'ENSG00000205899', 'ENSG00000167397', 'ENSG00000166685', 'ENSG00000248866', 'ENSG00000075399', 'ENSG00000179083', 'ENSG00000117262', 'ENSG00000119912', 'ENSG00000101888', 'ENSG00000243207', 'ENSG00000238121', 'ENSG00000139874', 'ENSG00000241769', 'ENSG00000250106', 'ENSG00000268006', 'ENSG00000159792', 'ENSG00000157510', 'ENSG00000228369', 'ENSG00000171811', 'ENSG00000164953', 'ENSG00000143786', 'ENSG00000203667', 'ENSG00000164611', 'ENSG00000081307', 'ENSG00000152457', 'ENSG00000163319', 'ENSG00000183638', 'ENSG00000145425', 'ENSG00000179044']
→ save terms via .add_validated_from_var_index()
False
# The cells were subject to several types of perturbations that we will curate separately
adata.obs.perturbation_type.value_counts()
Show code cell output
perturbation_type
drug 855
CRISPR 145
Name: count, dtype: int64
Curate non-perturbation metadata¶
categoricals = {
"depmap_id": bt.CellLine.ontology_id,
"cell_line": bt.CellLine.name,
"disease": bt.Disease.name,
"organism": bt.Organism.name,
"perturbation_type": ln.ULabel.name,
"sex": bt.Phenotype.name,
"time": ln.ULabel.name,
"tissue_type": ln.ULabel.name,
}
sources = {
"var_index": bt.Source.filter(entity="bionty.Gene", version="release-112", organism="human").one(),
"depmap_id": bt.Source.filter(name="depmap").one(),
"cell_line": bt.Source.filter(name="depmap").one(),
}
curate = ln.Curator.from_anndata(
adata,
var_index=bt.Gene.ensembl_gene_id,
categoricals=categoricals,
organism="human",
sources=sources,
)
curate.validate()
Show code cell output
✓ added 8 records with Feature.name for columns: 'depmap_id', 'cell_line', 'disease', 'organism', 'perturbation_type', 'sex', 'time', 'tissue_type'
• 27 non-validated values are not saved in Feature.name: ['singlet_dev_z', 'singlet_ID', 'singlet_z_margin', 'doublet_CL1', 'cell_quality', 'hash_tag', 'cell_det_rate', 'num_SNPs', 'singlet_margin', 'doublet_CL2', 'channel', 'percent_ribo', 'singlet_dev', 'ngenes', 'percent_mito', 'doublet_GMM_prob', 'hash_assignment', 'ncounts', 'chembl-ID', 'dose_value', 'doublet_dev_imp', 'perturbation', 'tot_reads', 'nperts', 'doublet_z_margin', 'dose_unit', 'cancer']!
→ to lookup values, use lookup().columns
→ to save, run add_new_from_columns
• mapping var_index on Gene.ensembl_gene_id
! found 1279 validated terms: ['ENSG00000102316', 'ENSG00000109472', 'ENSG00000080007', 'ENSG00000203926', 'ENSG00000127419', 'ENSG00000108960', 'ENSG00000126870', 'ENSG00000121797', 'ENSG00000243927', 'ENSG00000143473', 'ENSG00000115665', 'ENSG00000180613', 'ENSG00000167283', 'ENSG00000160472', 'ENSG00000110768', 'ENSG00000124507', 'ENSG00000257381', 'ENSG00000129451', 'ENSG00000228237', 'ENSG00000137033', 'ENSG00000120868', 'ENSG00000079616', 'ENSG00000177082', 'ENSG00000227392', 'ENSG00000251620', 'ENSG00000174804', 'ENSG00000057935', 'ENSG00000251493', 'ENSG00000164182', 'ENSG00000112033', 'ENSG00000236388', 'ENSG00000116039', 'ENSG00000131095', 'ENSG00000170956', 'ENSG00000104870', 'ENSG00000259494', 'ENSG00000116906', 'ENSG00000173599', 'ENSG00000187080', 'ENSG00000167258', 'ENSG00000131473', 'ENSG00000107742', 'ENSG00000144791', 'ENSG00000198286', 'ENSG00000196267', 'ENSG00000165966', 'ENSG00000124383', 'ENSG00000154957', 'ENSG00000196689', 'ENSG00000133627', 'ENSG00000049656', 'ENSG00000163918', 'ENSG00000163517', 'ENSG00000166133', 'ENSG00000003393', 'ENSG00000186628', 'ENSG00000136002', 'ENSG00000163629', 'ENSG00000136319', 'ENSG00000189159', 'ENSG00000166770', 'ENSG00000167548', 'ENSG00000134780', 'ENSG00000257127', 'ENSG00000205571', 'ENSG00000128309', 'ENSG00000111145', 'ENSG00000164587', 'ENSG00000096395', 'ENSG00000188820', 'ENSG00000148408', 'ENSG00000111554', 'ENSG00000140943', 'ENSG00000133101', 'ENSG00000148655', 'ENSG00000073969', 'ENSG00000231671', 'ENSG00000198919', 'ENSG00000107331', 'ENSG00000120440', 'ENSG00000111801', 'ENSG00000267013', 'ENSG00000259060', 'ENSG00000117395', 'ENSG00000198938', 'ENSG00000204595', 'ENSG00000183304', 'ENSG00000165102', 'ENSG00000162460', 'ENSG00000150471', 'ENSG00000137634', 'ENSG00000106688', 'ENSG00000254413', 'ENSG00000112282', 'ENSG00000101146', 'ENSG00000162004', 'ENSG00000175768', 'ENSG00000171067', 'ENSG00000103365', 'ENSG00000206052', 'ENSG00000168288', 'ENSG00000240476', 'ENSG00000116833', 'ENSG00000111331', 'ENSG00000215252', 'ENSG00000143355', 'ENSG00000270800', 'ENSG00000161692', 'ENSG00000089053', 'ENSG00000189007', 'ENSG00000182986', 'ENSG00000107859', 'ENSG00000099203', 'ENSG00000171861', 'ENSG00000125744', 'ENSG00000204880', 'ENSG00000239877', 'ENSG00000229619', 'ENSG00000273155', 'ENSG00000166741', 'ENSG00000117139', 'ENSG00000153975', 'ENSG00000101188', 'ENSG00000146039', 'ENSG00000196666', 'ENSG00000146733', 'ENSG00000251022', 'ENSG00000163218', 'ENSG00000169857', 'ENSG00000206432', 'ENSG00000128617', 'ENSG00000142173', 'ENSG00000179115', 'ENSG00000117318', 'ENSG00000131459', 'ENSG00000178403', 'ENSG00000038295', 'ENSG00000148288', 'ENSG00000242612', 'ENSG00000082781', 'ENSG00000139684', 'ENSG00000186272', 'ENSG00000167604', 'ENSG00000111049', 'ENSG00000149357', 'ENSG00000132975', 'ENSG00000166823', 'ENSG00000102871', 'ENSG00000205155', 'ENSG00000183153', 'ENSG00000120438', 'ENSG00000157045', 'ENSG00000179751', 'ENSG00000140259', 'ENSG00000269058', 'ENSG00000171817', 'ENSG00000130383', 'ENSG00000155438', 'ENSG00000152433', 'ENSG00000204463', 'ENSG00000146521', 'ENSG00000174950', 'ENSG00000144741', 'ENSG00000174403', 'ENSG00000062096', 'ENSG00000120656', 'ENSG00000009790', 'ENSG00000166037', 'ENSG00000147650', 'ENSG00000127325', 'ENSG00000139209', 'ENSG00000153317', 'ENSG00000132475', 'ENSG00000006453', 'ENSG00000160813', 'ENSG00000138629', 'ENSG00000166450', 'ENSG00000175634', 'ENSG00000163254', 'ENSG00000261206', 'ENSG00000167608', 'ENSG00000213619', 'ENSG00000109171', 'ENSG00000116685', 'ENSG00000251258', 'ENSG00000062038', 'ENSG00000110497', 'ENSG00000069509', 'ENSG00000231944', 'ENSG00000166171', 'ENSG00000082512', 'ENSG00000162377', 'ENSG00000236311', 'ENSG00000146410', 'ENSG00000105369', 'ENSG00000137275', 'ENSG00000132846', 'ENSG00000160200', 'ENSG00000092148', 'ENSG00000184895', 'ENSG00000076351', 'ENSG00000011198', 'ENSG00000134531', 'ENSG00000163157', 'ENSG00000130584', 'ENSG00000174547', 'ENSG00000099994', 'ENSG00000146963', 'ENSG00000229557', 'ENSG00000197683', 'ENSG00000185033', 'ENSG00000175548', 'ENSG00000072133', 'ENSG00000205085', 'ENSG00000137040', 'ENSG00000180138', 'ENSG00000173905', 'ENSG00000137166', 'ENSG00000177294', 'ENSG00000114942', 'ENSG00000112658', 'ENSG00000150594', 'ENSG00000164631', 'ENSG00000151650', 'ENSG00000223638', 'ENSG00000229086', 'ENSG00000198542', 'ENSG00000196436', 'ENSG00000175854', 'ENSG00000255181', 'ENSG00000101004', 'ENSG00000217930', 'ENSG00000077935', 'ENSG00000172340', 'ENSG00000023191', 'ENSG00000189164', 'ENSG00000177990', 'ENSG00000179873', 'ENSG00000187772', 'ENSG00000155729', 'ENSG00000124440', 'ENSG00000142694', 'ENSG00000105875', 'ENSG00000158828', 'ENSG00000111371', 'ENSG00000159337', 'ENSG00000176153', 'ENSG00000168702', 'ENSG00000235718', 'ENSG00000134873', 'ENSG00000141096', 'ENSG00000116786', 'ENSG00000054938', 'ENSG00000239225', 'ENSG00000086967', 'ENSG00000171431', 'ENSG00000188263', 'ENSG00000170807', 'ENSG00000007129', 'ENSG00000168234', 'ENSG00000173141', 'ENSG00000244395', 'ENSG00000100181', 'ENSG00000118898', 'ENSG00000185960', 'ENSG00000168269', 'ENSG00000084112', 'ENSG00000096872', 'ENSG00000175066', 'ENSG00000203952', 'ENSG00000196767', 'ENSG00000138381', 'ENSG00000142661', 'ENSG00000198815', 'ENSG00000204538', 'ENSG00000118200', 'ENSG00000147655', 'ENSG00000145194', 'ENSG00000213088', 'ENSG00000177272', 'ENSG00000126464', 'ENSG00000175087', 'ENSG00000011021', 'ENSG00000125834', 'ENSG00000197766', 'ENSG00000119514', 'ENSG00000165471', 'ENSG00000172469', 'ENSG00000176142', 'ENSG00000173786', 'ENSG00000247473', 'ENSG00000119943', 'ENSG00000188342', 'ENSG00000121481', 'ENSG00000080815', 'ENSG00000165506', 'ENSG00000161835', 'ENSG00000184117', 'ENSG00000188649', 'ENSG00000114480', 'ENSG00000134333', 'ENSG00000110900', 'ENSG00000082293', 'ENSG00000151640', 'ENSG00000059377', 'ENSG00000125514', 'ENSG00000148468', 'ENSG00000148110', 'ENSG00000039139', 'ENSG00000116151', 'ENSG00000197620', 'ENSG00000154025', 'ENSG00000151690', 'ENSG00000188523', 'ENSG00000196961', 'ENSG00000048740', 'ENSG00000225473', 'ENSG00000020256', 'ENSG00000106683', 'ENSG00000105204', 'ENSG00000136352', 'ENSG00000183281', 'ENSG00000254377', 'ENSG00000180596', 'ENSG00000138347', 'ENSG00000157335', 'ENSG00000227051', 'ENSG00000187624', 'ENSG00000244462', 'ENSG00000250091', 'ENSG00000142207', 'ENSG00000164663', 'ENSG00000003756', 'ENSG00000170523', 'ENSG00000141431', 'ENSG00000106404', 'ENSG00000254656', 'ENSG00000174348', 'ENSG00000130827', 'ENSG00000145391', 'ENSG00000169499', 'ENSG00000233349', 'ENSG00000130700', 'ENSG00000159128', 'ENSG00000242715', 'ENSG00000140526', 'ENSG00000212747', 'ENSG00000214827', 'ENSG00000181982', 'ENSG00000223914', 'ENSG00000204351', 'ENSG00000185261', 'ENSG00000151388', 'ENSG00000184345', 'ENSG00000172782', 'ENSG00000005302', 'ENSG00000127515', 'ENSG00000136938', 'ENSG00000169750', 'ENSG00000213588', 'ENSG00000186862', 'ENSG00000151445', 'ENSG00000106609', 'ENSG00000120436', 'ENSG00000180872', 'ENSG00000116977', 'ENSG00000100522', 'ENSG00000133706', 'ENSG00000248920', 'ENSG00000184436', 'ENSG00000125247', 'ENSG00000164621', 'ENSG00000104218', 'ENSG00000111834', 'ENSG00000234323', 'ENSG00000260286', 'ENSG00000082068', 'ENSG00000205111', 'ENSG00000166261', 'ENSG00000197580', 'ENSG00000168214', 'ENSG00000018625', 'ENSG00000236637', 'ENSG00000165006', 'ENSG00000113575', 'ENSG00000140396', 'ENSG00000145337', 'ENSG00000135677', 'ENSG00000151379', 'ENSG00000169155', 'ENSG00000164761', 'ENSG00000234527', 'ENSG00000149633', 'ENSG00000079974', 'ENSG00000108474', 'ENSG00000103061', 'ENSG00000253457', 'ENSG00000120057', 'ENSG00000176230', 'ENSG00000043514', 'ENSG00000167483', 'ENSG00000145781', 'ENSG00000134775', 'ENSG00000086991', 'ENSG00000138107', 'ENSG00000149150', 'ENSG00000003989', 'ENSG00000063438', 'ENSG00000175643', 'ENSG00000198062', 'ENSG00000188766', 'ENSG00000089094', 'ENSG00000109814', 'ENSG00000173467', 'ENSG00000110066', 'ENSG00000125879', 'ENSG00000004838', 'ENSG00000109113', 'ENSG00000120594', 'ENSG00000108094', 'ENSG00000169016', 'ENSG00000125148', 'ENSG00000257591', 'ENSG00000242689', 'ENSG00000141198', 'ENSG00000080709', 'ENSG00000142864', 'ENSG00000156675', 'ENSG00000157557', 'ENSG00000148215', 'ENSG00000111247', 'ENSG00000173715', 'ENSG00000205220', 'ENSG00000213799', 'ENSG00000173702', 'ENSG00000188729', 'ENSG00000099800', 'ENSG00000181555', 'ENSG00000234684', 'ENSG00000111481', 'ENSG00000185630', 'ENSG00000136840', 'ENSG00000164047', 'ENSG00000139726', 'ENSG00000243477', 'ENSG00000183401', 'ENSG00000168993', 'ENSG00000239839', 'ENSG00000179284', 'ENSG00000140992', 'ENSG00000135363', 'ENSG00000115425', 'ENSG00000121380', 'ENSG00000119041', 'ENSG00000123243', 'ENSG00000139192', 'ENSG00000165632', 'ENSG00000114054', 'ENSG00000049860', 'ENSG00000248698', 'ENSG00000168591', 'ENSG00000132561', 'ENSG00000139180', 'ENSG00000166987', 'ENSG00000167384', 'ENSG00000156140', 'ENSG00000119686', 'ENSG00000228567', 'ENSG00000182810', 'ENSG00000234230', 'ENSG00000125144', 'ENSG00000126775', 'ENSG00000240770', 'ENSG00000205126', 'ENSG00000172840', 'ENSG00000103381', 'ENSG00000178057', 'ENSG00000108064', 'ENSG00000120322', 'ENSG00000139428', 'ENSG00000173473', 'ENSG00000181458', 'ENSG00000101474', 'ENSG00000165209', 'ENSG00000046651', 'ENSG00000268061', 'ENSG00000221963', 'ENSG00000165325', 'ENSG00000100884', 'ENSG00000238189', 'ENSG00000241962', 'ENSG00000148803', 'ENSG00000175206', 'ENSG00000120805', 'ENSG00000049249', 'ENSG00000090776', 'ENSG00000070731', 'ENSG00000148339', 'ENSG00000113194', 'ENSG00000110628', 'ENSG00000147873', 'ENSG00000164600', 'ENSG00000126705', 'ENSG00000196502', 'ENSG00000154473', 'ENSG00000143921', 'ENSG00000105609', 'ENSG00000184076', 'ENSG00000119283', 'ENSG00000196361', 'ENSG00000109775', 'ENSG00000179869', 'ENSG00000123213', 'ENSG00000169836', 'ENSG00000184650', 'ENSG00000138459', 'ENSG00000163734', 'ENSG00000106028', 'ENSG00000100652', 'ENSG00000251442', 'ENSG00000167491', 'ENSG00000170748', 'ENSG00000166965', 'ENSG00000203661', 'ENSG00000108061', 'ENSG00000203857', 'ENSG00000131398', 'ENSG00000143632', 'ENSG00000167914', 'ENSG00000221988', 'ENSG00000246016', 'ENSG00000139330', 'ENSG00000101773', 'ENSG00000233718', 'ENSG00000204481', 'ENSG00000173918', 'ENSG00000234186', 'ENSG00000186451', 'ENSG00000114124', 'ENSG00000178741', 'ENSG00000196407', 'ENSG00000119986', 'ENSG00000161551', 'ENSG00000105698', 'ENSG00000164756', 'ENSG00000205403', 'ENSG00000115091', 'ENSG00000173208', 'ENSG00000197651', 'ENSG00000203721', 'ENSG00000156253', 'ENSG00000196341', 'ENSG00000131116', 'ENSG00000141696', 'ENSG00000081692', 'ENSG00000126368', 'ENSG00000088002', 'ENSG00000090266', 'ENSG00000162909', 'ENSG00000186684', 'ENSG00000188038', 'ENSG00000153446', 'ENSG00000106771', 'ENSG00000078295', 'ENSG00000112053', 'ENSG00000117153', 'ENSG00000198843', 'ENSG00000164010', 'ENSG00000144120', 'ENSG00000187581', 'ENSG00000070770', 'ENSG00000148341', 'ENSG00000146360', 'ENSG00000137936', 'ENSG00000123338', 'ENSG00000120733', 'ENSG00000139154', 'ENSG00000268654', 'ENSG00000254415', 'ENSG00000198755', 'ENSG00000101435', 'ENSG00000118369', 'ENSG00000189067', 'ENSG00000118004', 'ENSG00000160808', 'ENSG00000178988', 'ENSG00000179144', 'ENSG00000184814', 'ENSG00000171634', 'ENSG00000136682', 'ENSG00000114544', 'ENSG00000115008', 'ENSG00000153246', 'ENSG00000269858', 'ENSG00000159212', 'ENSG00000214842', 'ENSG00000134757', 'ENSG00000131849', 'ENSG00000157087', 'ENSG00000268658', 'ENSG00000205129', 'ENSG00000212856', 'ENSG00000164002', 'ENSG00000147099', 'ENSG00000100321', 'ENSG00000163528', 'ENSG00000215403', 'ENSG00000165113', 'ENSG00000137522', 'ENSG00000125637', 'ENSG00000215644', 'ENSG00000181036', 'ENSG00000175274', 'ENSG00000205544', 'ENSG00000233975', 'ENSG00000261618', 'ENSG00000233818', 'ENSG00000108846', 'ENSG00000235268', 'ENSG00000143502', 'ENSG00000126709', 'ENSG00000170289', 'ENSG00000134215', 'ENSG00000240972', 'ENSG00000125730', 'ENSG00000204961', 'ENSG00000183783', 'ENSG00000172243', 'ENSG00000196787', 'ENSG00000085449', 'ENSG00000186020', 'ENSG00000198185', 'ENSG00000153015', 'ENSG00000248330', 'ENSG00000137752', 'ENSG00000232401', 'ENSG00000233701', 'ENSG00000181104', 'ENSG00000160050', 'ENSG00000089195', 'ENSG00000006576', 'ENSG00000073536', 'ENSG00000107874', 'ENSG00000076043', 'ENSG00000163527', 'ENSG00000121310', 'ENSG00000186081', 'ENSG00000035115', 'ENSG00000245146', 'ENSG00000135241', 'ENSG00000124356', 'ENSG00000175322', 'ENSG00000100095', 'ENSG00000101391', 'ENSG00000064703', 'ENSG00000144451', 'ENSG00000131471', 'ENSG00000102103', 'ENSG00000099381', 'ENSG00000144214', 'ENSG00000197885', 'ENSG00000108384', 'ENSG00000151208', 'ENSG00000204764', 'ENSG00000249641', 'ENSG00000186318', 'ENSG00000151694', 'ENSG00000140807', 'ENSG00000115694', 'ENSG00000184445', 'ENSG00000184009', 'ENSG00000007944', 'ENSG00000054654', 'ENSG00000085998', 'ENSG00000164252', 'ENSG00000174197', 'ENSG00000205726', 'ENSG00000168477', 'ENSG00000167257', 'ENSG00000160993', 'ENSG00000177202', 'ENSG00000115526', 'ENSG00000125337', 'ENSG00000205022', 'ENSG00000168612', 'ENSG00000111252', 'ENSG00000172081', 'ENSG00000170627', 'ENSG00000235412', 'ENSG00000100938', 'ENSG00000158639', 'ENSG00000122565', 'ENSG00000197496', 'ENSG00000126883', 'ENSG00000163235', 'ENSG00000151090', 'ENSG00000236371', 'ENSG00000141837', 'ENSG00000235385', 'ENSG00000167104', 'ENSG00000076003', 'ENSG00000236053', 'ENSG00000198785', 'ENSG00000119614', 'ENSG00000198954', 'ENSG00000205930', 'ENSG00000159556', 'ENSG00000250021', 'ENSG00000103319', 'ENSG00000139132', 'ENSG00000224122', 'ENSG00000107404', 'ENSG00000124091', 'ENSG00000151224', 'ENSG00000120694', 'ENSG00000234438', 'ENSG00000233070', 'ENSG00000156804', 'ENSG00000064270', 'ENSG00000100934', 'ENSG00000117984', 'ENSG00000176909', 'ENSG00000154143', 'ENSG00000248405', 'ENSG00000126261', 'ENSG00000234602', 'ENSG00000121067', 'ENSG00000167757', 'ENSG00000162373', 'ENSG00000115282', 'ENSG00000112144', 'ENSG00000154162', 'ENSG00000136240', 'ENSG00000124253', 'ENSG00000177614', 'ENSG00000151135', 'ENSG00000161914', 'ENSG00000172020', 'ENSG00000140319', 'ENSG00000178055', 'ENSG00000182111', 'ENSG00000155066', 'ENSG00000152804', 'ENSG00000157703', 'ENSG00000233421', 'ENSG00000110047', 'ENSG00000094963', 'ENSG00000163785', 'ENSG00000100151', 'ENSG00000197177', 'ENSG00000180776', 'ENSG00000106144', 'ENSG00000176904', 'ENSG00000184302', 'ENSG00000204464', 'ENSG00000111817', 'ENSG00000176742', 'ENSG00000103021', 'ENSG00000238057', 'ENSG00000154328', 'ENSG00000261485', 'ENSG00000230453', 'ENSG00000231500', 'ENSG00000136758', 'ENSG00000005075', 'ENSG00000213533', 'ENSG00000261832', 'ENSG00000196704', 'ENSG00000204301', 'ENSG00000177485', 'ENSG00000002822', 'ENSG00000204323', 'ENSG00000115274', 'ENSG00000106560', 'ENSG00000261043', 'ENSG00000174957', 'ENSG00000224186', 'ENSG00000100908', 'ENSG00000187098', 'ENSG00000256683', 'ENSG00000104907', 'ENSG00000115561', 'ENSG00000175115', 'ENSG00000125753', 'ENSG00000256769', 'ENSG00000228889', 'ENSG00000122787', 'ENSG00000143612', 'ENSG00000010278', 'ENSG00000166130', 'ENSG00000261498', 'ENSG00000171867', 'ENSG00000091483', 'ENSG00000084207', 'ENSG00000188266', 'ENSG00000204138', 'ENSG00000175514', 'ENSG00000125629', 'ENSG00000129824', 'ENSG00000183617', 'ENSG00000111653', 'ENSG00000196366', 'ENSG00000071859', 'ENSG00000133124', 'ENSG00000187838', 'ENSG00000147799', 'ENSG00000109991', 'ENSG00000242516', 'ENSG00000136156', 'ENSG00000143147', 'ENSG00000223501', 'ENSG00000173200', 'ENSG00000106415', 'ENSG00000142606', 'ENSG00000165669', 'ENSG00000112232', 'ENSG00000175354', 'ENSG00000139151', 'ENSG00000135093', 'ENSG00000188092', 'ENSG00000135912', 'ENSG00000114166', 'ENSG00000261235', 'ENSG00000170801', 'ENSG00000221882', 'ENSG00000135334', 'ENSG00000152503', 'ENSG00000183741', 'ENSG00000171466', 'ENSG00000255974', 'ENSG00000117419', 'ENSG00000132321', 'ENSG00000182022', 'ENSG00000224405', 'ENSG00000104888', 'ENSG00000179397', 'ENSG00000105707', 'ENSG00000165914', 'ENSG00000170921', 'ENSG00000049089', 'ENSG00000108786', 'ENSG00000155906', 'ENSG00000232225', 'ENSG00000185985', 'ENSG00000134802', 'ENSG00000169418', 'ENSG00000163281', 'ENSG00000197586', 'ENSG00000204435', 'ENSG00000196166', 'ENSG00000177462', 'ENSG00000248397', 'ENSG00000171488', 'ENSG00000179407', 'ENSG00000120341', 'ENSG00000100811', 'ENSG00000239713', 'ENSG00000105486', 'ENSG00000163312', 'ENSG00000101639', 'ENSG00000162073', 'ENSG00000163389', 'ENSG00000171942', 'ENSG00000236822', 'ENSG00000089639', 'ENSG00000166016', 'ENSG00000109861', 'ENSG00000141562', 'ENSG00000224559', 'ENSG00000124237', 'ENSG00000084444', 'ENSG00000092295', 'ENSG00000155111', 'ENSG00000235641', 'ENSG00000243709', 'ENSG00000139329', 'ENSG00000091583', 'ENSG00000154319', 'ENSG00000205174', 'ENSG00000085265', 'ENSG00000171824', 'ENSG00000213057', 'ENSG00000178591', 'ENSG00000157191', 'ENSG00000249348', 'ENSG00000188676', 'ENSG00000232307', 'ENSG00000155511', 'ENSG00000164334', 'ENSG00000198039', 'ENSG00000042304', 'ENSG00000172264', 'ENSG00000173976', 'ENSG00000163374', 'ENSG00000134321', 'ENSG00000188162', 'ENSG00000246100', 'ENSG00000104818', 'ENSG00000157985', 'ENSG00000178694', 'ENSG00000112305', 'ENSG00000159579', 'ENSG00000164920', 'ENSG00000214706', 'ENSG00000144749', 'ENSG00000145220', 'ENSG00000067048', 'ENSG00000227356', 'ENSG00000130758', 'ENSG00000165516', 'ENSG00000131873', 'ENSG00000125863', 'ENSG00000010932', 'ENSG00000131462', 'ENSG00000008083', 'ENSG00000157219', 'ENSG00000224459', 'ENSG00000125746', 'ENSG00000116809', 'ENSG00000106927', 'ENSG00000168904', 'ENSG00000112599', 'ENSG00000097021', 'ENSG00000068305', 'ENSG00000250349', 'ENSG00000147100', 'ENSG00000197046', 'ENSG00000175646', 'ENSG00000125878', 'ENSG00000197283', 'ENSG00000235699', 'ENSG00000184967', 'ENSG00000227695', 'ENSG00000176194', 'ENSG00000110046', 'ENSG00000188582', 'ENSG00000253873', 'ENSG00000162755', 'ENSG00000161652', 'ENSG00000091010', 'ENSG00000196944', 'ENSG00000155970', 'ENSG00000122679', 'ENSG00000106302', 'ENSG00000134595', 'ENSG00000110583', 'ENSG00000170889', 'ENSG00000232192', 'ENSG00000095321', 'ENSG00000156471', 'ENSG00000135406', 'ENSG00000164619', 'ENSG00000189350', 'ENSG00000183251', 'ENSG00000058272', 'ENSG00000177150', 'ENSG00000106635', 'ENSG00000109424', 'ENSG00000133943', 'ENSG00000081051', 'ENSG00000147234', 'ENSG00000132274', 'ENSG00000258710', 'ENSG00000251173', 'ENSG00000125388', 'ENSG00000225234', 'ENSG00000119509', 'ENSG00000006283', 'ENSG00000250317', 'ENSG00000234056', 'ENSG00000188000', 'ENSG00000109099', 'ENSG00000182185', 'ENSG00000167601', 'ENSG00000109670', 'ENSG00000249459', 'ENSG00000137404', 'ENSG00000162630', 'ENSG00000117758', 'ENSG00000105287', 'ENSG00000134909', 'ENSG00000175806', 'ENSG00000158201', 'ENSG00000001460', 'ENSG00000203362', 'ENSG00000185477', 'ENSG00000187189', 'ENSG00000101310', 'ENSG00000163762', 'ENSG00000136457', 'ENSG00000065518', 'ENSG00000141391', 'ENSG00000172466', 'ENSG00000128656', 'ENSG00000155966', 'ENSG00000151704', 'ENSG00000157796', 'ENSG00000163406', 'ENSG00000120833', 'ENSG00000105928', 'ENSG00000197745', 'ENSG00000111877', 'ENSG00000071282', 'ENSG00000234478', 'ENSG00000110921', 'ENSG00000227659', 'ENSG00000134258', 'ENSG00000146776', 'ENSG00000137714', 'ENSG00000169906', 'ENSG00000155324', 'ENSG00000072518', 'ENSG00000237489', 'ENSG00000241211', 'ENSG00000174483', 'ENSG00000115355', 'ENSG00000124145', 'ENSG00000131143', 'ENSG00000078814', 'ENSG00000128185', 'ENSG00000215529', 'ENSG00000125848', 'ENSG00000143033', 'ENSG00000100221', 'ENSG00000137413', 'ENSG00000179262', 'ENSG00000169126', 'ENSG00000179271', 'ENSG00000177476', 'ENSG00000181513', 'ENSG00000153157', 'ENSG00000125868', 'ENSG00000146674', 'ENSG00000242242', 'ENSG00000103507', 'ENSG00000181722', 'ENSG00000132026', 'ENSG00000125046', 'ENSG00000171606', 'ENSG00000166800', 'ENSG00000106992', 'ENSG00000136720', 'ENSG00000143595', 'ENSG00000180233', 'ENSG00000101463', 'ENSG00000035403', 'ENSG00000081913', 'ENSG00000205250', 'ENSG00000180815', 'ENSG00000139287', 'ENSG00000138594', 'ENSG00000224051', 'ENSG00000127564', 'ENSG00000198829', 'ENSG00000180354', 'ENSG00000255804', 'ENSG00000001167', 'ENSG00000135632', 'ENSG00000128989', 'ENSG00000130997', 'ENSG00000188909', 'ENSG00000250748', 'ENSG00000227488', 'ENSG00000172785', 'ENSG00000121068', 'ENSG00000031081', 'ENSG00000107938', 'ENSG00000120533', 'ENSG00000197694', 'ENSG00000167797', 'ENSG00000127252', 'ENSG00000203909', 'ENSG00000142534', 'ENSG00000204516', 'ENSG00000215568', 'ENSG00000162413', 'ENSG00000178734', 'ENSG00000184557', 'ENSG00000100418', 'ENSG00000180353', 'ENSG00000106266', 'ENSG00000105270', 'ENSG00000177885', 'ENSG00000164045', 'ENSG00000167588', 'ENSG00000188906', 'ENSG00000143032', 'ENSG00000135914', 'ENSG00000189280', 'ENSG00000154016', 'ENSG00000104983', 'ENSG00000096433', 'ENSG00000172404', 'ENSG00000138756', 'ENSG00000126254', 'ENSG00000162885', 'ENSG00000270011', 'ENSG00000205846', 'ENSG00000260903', 'ENSG00000180822', 'ENSG00000078795', 'ENSG00000172273', 'ENSG00000196227', 'ENSG00000258839', 'ENSG00000179213', 'ENSG00000114573', 'ENSG00000008196', 'ENSG00000183734', 'ENSG00000168907', 'ENSG00000089041', 'ENSG00000212657', 'ENSG00000169154', 'ENSG00000175329', 'ENSG00000165837', 'ENSG00000137252', 'ENSG00000100721', 'ENSG00000239886', 'ENSG00000162383', 'ENSG00000214013', 'ENSG00000172380', 'ENSG00000144668', 'ENSG00000223547', 'ENSG00000138071', 'ENSG00000073417', 'ENSG00000174255', 'ENSG00000197016', 'ENSG00000172115', 'ENSG00000122711', 'ENSG00000196420', 'ENSG00000102243', 'ENSG00000075151', 'ENSG00000198001', 'ENSG00000186583', 'ENSG00000165626', 'ENSG00000233610', 'ENSG00000132950', 'ENSG00000143061', 'ENSG00000164830', 'ENSG00000143622', 'ENSG00000111885', 'ENSG00000107317', 'ENSG00000250133', 'ENSG00000231310', 'ENSG00000156006', 'ENSG00000140988', 'ENSG00000175018', 'ENSG00000058085', 'ENSG00000174914', 'ENSG00000105472', 'ENSG00000142149', 'ENSG00000183067', 'ENSG00000004975', 'ENSG00000205858', 'ENSG00000164885', 'ENSG00000186448', 'ENSG00000186493', 'ENSG00000204379', 'ENSG00000144227', 'ENSG00000168876', 'ENSG00000142910', 'ENSG00000262074', 'ENSG00000159184', 'ENSG00000113648', 'ENSG00000223882', 'ENSG00000198454', 'ENSG00000175482', 'ENSG00000254535', 'ENSG00000057019', 'ENSG00000204475', 'ENSG00000186395', 'ENSG00000184481', 'ENSG00000076356', 'ENSG00000101981', 'ENSG00000157978', 'ENSG00000177335', 'ENSG00000205078', 'ENSG00000224418', 'ENSG00000122877', 'ENSG00000237232', 'ENSG00000204444', 'ENSG00000248079', 'ENSG00000154760', 'ENSG00000146477', 'ENSG00000003137', 'ENSG00000164128', 'ENSG00000000003', 'ENSG00000149781', 'ENSG00000198400', 'ENSG00000124003', 'ENSG00000175591', 'ENSG00000179165', 'ENSG00000174827', 'ENSG00000259070', 'ENSG00000125910', 'ENSG00000055332', 'ENSG00000105669', 'ENSG00000115461', 'ENSG00000120008', 'ENSG00000105205', 'ENSG00000143867', 'ENSG00000147439', 'ENSG00000135175', 'ENSG00000181718', 'ENSG00000184905', 'ENSG00000126233', 'ENSG00000258701', 'ENSG00000198301', 'ENSG00000108379', 'ENSG00000150667', 'ENSG00000163995', 'ENSG00000079999', 'ENSG00000149743', 'ENSG00000166961', 'ENSG00000221923', 'ENSG00000183624', 'ENSG00000162402', 'ENSG00000163636', 'ENSG00000110717', 'ENSG00000168843', 'ENSG00000138395', 'ENSG00000085415', 'ENSG00000126391', 'ENSG00000169684', 'ENSG00000185896', 'ENSG00000181027', 'ENSG00000000971', 'ENSG00000091106', 'ENSG00000140675', 'ENSG00000196074', 'ENSG00000105971', 'ENSG00000225255', 'ENSG00000118985', 'ENSG00000172828', 'ENSG00000054277', 'ENSG00000214681', 'ENSG00000250334', 'ENSG00000139343', 'ENSG00000162552', 'ENSG00000164185', 'ENSG00000107882', 'ENSG00000251655', 'ENSG00000136535', 'ENSG00000087269', 'ENSG00000267374', 'ENSG00000182117', 'ENSG00000214941', 'ENSG00000134153', 'ENSG00000251015', 'ENSG00000171916', 'ENSG00000177398', 'ENSG00000205899', 'ENSG00000167397', 'ENSG00000166685', 'ENSG00000248866', 'ENSG00000075399', 'ENSG00000179083', 'ENSG00000117262', 'ENSG00000119912', 'ENSG00000101888', 'ENSG00000243207', 'ENSG00000238121', 'ENSG00000139874', 'ENSG00000241769', 'ENSG00000250106', 'ENSG00000268006', 'ENSG00000159792', 'ENSG00000157510', 'ENSG00000228369', 'ENSG00000171811', 'ENSG00000164953', 'ENSG00000143786', 'ENSG00000203667', 'ENSG00000164611', 'ENSG00000081307', 'ENSG00000152457', 'ENSG00000163319', 'ENSG00000183638', 'ENSG00000145425', 'ENSG00000179044']
→ save terms via .add_validated_from_var_index()
• mapping depmap_id on CellLine.ontology_id
! found 183 validated terms: ['ACH-000390', 'ACH-000444', 'ACH-000396', 'ACH-000997', 'ACH-000723', 'ACH-000504', 'ACH-001190', 'ACH-000834', 'ACH-000880', 'ACH-000717', 'ACH-000824', 'ACH-000713', 'ACH-000219', 'ACH-000762', 'ACH-000022', 'ACH-000750', 'ACH-000603', 'ACH-000228', 'ACH-000423', 'ACH-000553', 'ACH-000001', 'ACH-000270', 'ACH-000374', 'ACH-000649', 'ACH-000174', 'ACH-000791', 'ACH-000764', 'ACH-000873', 'ACH-000510', 'ACH-000376', 'ACH-000211', 'ACH-000875', 'ACH-000749', 'ACH-000347', 'ACH-000416', 'ACH-000565', 'ACH-000463', 'ACH-000397', 'ACH-000200', 'ACH-000086', 'ACH-000537', 'ACH-000479', 'ACH-000666', 'ACH-000897', 'ACH-000927', 'ACH-000415', 'ACH-000900', 'ACH-000189', 'ACH-000982', 'ACH-000288', 'ACH-000244', 'ACH-000323', 'ACH-000916', 'ACH-000704', 'ACH-000488', 'ACH-000235', 'ACH-000142', 'ACH-000685', 'ACH-000021', 'ACH-000159', 'ACH-000884', 'ACH-001307', 'ACH-000265', 'ACH-000756', 'ACH-000047', 'ACH-000252', 'ACH-000471', 'ACH-000977', 'ACH-000888', 'ACH-000209', 'ACH-000502', 'ACH-000212', 'ACH-000652', 'ACH-000367', 'ACH-000434', 'ACH-000950', 'ACH-000549', 'ACH-000947', 'ACH-000833', 'ACH-000452', 'ACH-000255', 'ACH-000822', 'ACH-000670', 'ACH-000896', 'ACH-000589', 'ACH-000886', 'ACH-000018', 'ACH-000672', 'ACH-000428', 'ACH-000911', 'ACH-000274', 'ACH-000785', 'ACH-000868', 'ACH-000866', 'ACH-000458', 'ACH-000882', 'ACH-000954', 'ACH-000958', 'ACH-000738', 'ACH-000734', 'ACH-000849', 'ACH-000973', 'ACH-000961', 'ACH-000098', 'ACH-000486', 'ACH-000605', 'ACH-000837', 'ACH-000460', 'ACH-000368', 'ACH-000123', 'ACH-000329', 'ACH-000186', 'ACH-000939', 'ACH-000657', 'ACH-000622', 'ACH-000015', 'ACH-000956', 'ACH-000014', 'ACH-000037', 'ACH-000966', 'ACH-000335', 'ACH-000579', 'ACH-000769', 'ACH-000644', 'ACH-000445', 'ACH-000701', 'ACH-000545', 'ACH-000595', 'ACH-000149', 'ACH-000178', 'ACH-000164', 'ACH-000023', 'ACH-000292', 'ACH-000517', 'ACH-000117', 'ACH-000527', 'ACH-000572', 'ACH-000048', 'ACH-000967', 'ACH-000324', 'ACH-000601', 'ACH-000680', 'ACH-000026', 'ACH-000411', 'ACH-000569', 'ACH-000669', 'ACH-000277', 'ACH-000941', 'ACH-000495', 'ACH-000096', 'ACH-000085', 'ACH-000268', 'ACH-000302', 'ACH-000393', 'ACH-000618', 'ACH-000624', 'ACH-000774', 'ACH-000552', 'ACH-000407', 'ACH-000936', 'ACH-000681', 'ACH-000971', 'ACH-000237', 'ACH-000976', 'ACH-000776', 'ACH-000650', 'ACH-000803', 'ACH-000535', 'ACH-000341', 'ACH-000090', 'ACH-000662', 'ACH-000748', 'ACH-000661', 'ACH-001239', 'ACH-000826', 'ACH-000035', 'ACH-000343', 'ACH-000847', 'ACH-000163', 'ACH-000906', 'ACH-000903', 'ACH-000449', 'ACH-000842']
→ save terms via .add_validated_from('depmap_id')
• mapping cell_line on CellLine.name
! found 30 validated terms: ['YD-10B', 'UM-UC-1', 'YD-38', 'LOX IMVI', 'SH-10-TC', 'SF-295', 'SNU-761', 'KNS-81', 'BICR 6', 'L3.3', 'SNU-1079', 'TCC-PAN2', 'SNU-1076', 'UO-31', 'HCC-1195', 'JHH-5', 'SNU-8', 'SNU-1105', 'COV434', 'IGROV1', 'KNS-60', 'RMUG-S', 'SNU-410', '253J-BV', 'SNU-245', 'SNU-1077', 'SNU-1041', 'RERF-LC-Ad2', 'SNU-685', 'WM1799']
→ save terms via .add_validated_from('cell_line')
! 153 terms are not validated: 'LUDLU-1 cell', 'RCB1900 cell', 'J82 cell', 'HCT-15 cell', 'SNB75 cell', 'SK-MEL-2 cell', 'AGS cell', 'COLO-680N cell', 'KYSE-510 cell', 'Caov-3 cell', 'A-375 cell', 'PA-TU-8988S cell', 'BEN cell', 'BICR 31 cell', 'SK-MEL-3 cell', 'RCB1905 cell', 'NIH:OVCAR-3 cell', 'HPAC cell', 'HCC1143 cell', '786-O cell', ...
→ fix typos, remove non-existent values, or save terms via .add_new_from('cell_line')
• mapping disease on Disease.name
! found 21 validated terms: ['lung cancer', 'urinary bladder carcinoma', 'colorectal cancer', 'head and neck cancer', 'brain cancer', 'skin cancer', 'gastric cancer', 'esophageal cancer', 'ovarian cancer', 'malignant pancreatic neoplasm', 'breast cancer', 'kidney cancer', 'thyroid cancer', 'uterine corpus cancer', 'liver cancer', 'prostate cancer', 'bile duct cancer', 'sarcoma', 'neuroblastoma', 'rhabdoid tumor', 'bone cancer']
→ save terms via .add_validated_from('disease')
✓ organism is validated against Organism.name
• mapping perturbation_type on ULabel.name
! 2 terms are not validated: 'drug', 'CRISPR'
→ fix typos, remove non-existent values, or save terms via .add_new_from('perturbation_type')
• mapping sex on Phenotype.name
! 3 terms are not validated: 'Male', 'Female', 'Unknown'
→ fix typos, remove non-existent values, or save terms via .add_new_from('sex')
• mapping time on ULabel.name
! 4 terms are not validated: '24', '72, 96', '3, 6, 12, 24, 48', '6'
→ fix typos, remove non-existent values, or save terms via .add_new_from('time')
• mapping tissue_type on ULabel.name
! 1 terms is not validated: 'cell_line'
→ fix typos, remove non-existent values, or save terms via .add_new_from('tissue_type')
False
curate.add_validated_from_var_index()
curate.add_validated_from("depmap_id")
curate.add_new_from("perturbation_type")
curate.add_new_from("sex")
curate.add_new_from("time")
curate.add_new_from("tissue_type")
curate.add_validated_from("disease")
curate.add_new_from("cell_line")
Show code cell output
✓ added 2 records with ULabel.name for perturbation_type: 'drug', 'CRISPR'
✓ added 3 records with Phenotype.name for sex: 'Male', 'Female', 'Unknown'
✓ added 4 records with ULabel.name for time: '24', '72, 96', '3, 6, 12, 24, 48', '6'
✓ added 1 record with ULabel.name for tissue_type: 'cell_line'
Modeling and curating perturbation metadata¶
The dataset has two types of perturbations: CRISPR and Compounds. We will create their records and associated targets separately.
crispr_metadata = adata.obs[adata.obs["perturbation_type"] == "CRISPR"]
drug_metadata = adata.obs[adata.obs["perturbation_type"] == "drug"]
The wetlab
schema has two major components:
wetlab.EnvironmentalTreatment
to model perturbations such as heat,wetlab.GeneticTreatment
to model perturbations such as CRISPR, andwetlab.CompoundTreatment
to model, for example, drugs. Several treatments together can be modeled usingwetlab.CombinationTreatment
.Known targets of treatments can be modeled through
wetlab.TreatmentTarget
which can be one or several ofbionty.Gene
,bionty.Protein
, orbionty.Pathway
records.
Genetic perturbations¶
Genetic perturbations can be modeled in two ways depending on the available information by populating a:
wetlab.GeneticTreatment
record if the system such as the guide RNA name or sequence, the on- and off-target scores are known.wetlab.TreatmentTarget
record that links tobionty.Gene
records.
crispr_metadata.head(3)
Show code cell output
depmap_id | cancer | cell_det_rate | cell_line | cell_quality | channel | disease | dose_unit | dose_value | doublet_CL1 | doublet_CL2 | doublet_GMM_prob | doublet_dev_imp | doublet_z_margin | hash_assignment | hash_tag | num_SNPs | organism | perturbation | perturbation_type | sex | singlet_ID | singlet_dev | singlet_dev_z | singlet_margin | singlet_z_margin | time | tissue_type | tot_reads | nperts | ngenes | ncounts | percent_mito | percent_ribo | chembl-ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
TAGTTGGAGATCGATA | ACH-000723 | True | 0.132708 | YD-10B | low_quality | nan | head and neck cancer | nan | NaN | YD10B_UPPER_AERODIGESTIVE_TRACT | 647V_URINARY_TRACT | NaN | 0.156492 | 1.556214 | nan | nan | 874 | human | sggpx4-2 | CRISPR | Male | YD10B_UPPER_AERODIGESTIVE_TRACT | 0.292802 | 3.272682 | 0.016459 | 0.330120 | 72, 96 | cell_line | 2105 | 1 | 4341 | 20693.0 | 0.695887 | 16.242208 | NaN |
CATCGGGGTTCATGGT | ACH-000219 | True | 0.087860 | A-375 | normal | nan | skin cancer | nan | NaN | A375_SKIN | DAOY_CENTRAL_NERVOUS_SYSTEM | 2.496623e-07 | 0.007701 | 0.088255 | nan | nan | 524 | human | sglacz | CRISPR | Female | A375_SKIN | 0.671925 | 13.649916 | 0.464200 | 11.962996 | 72, 96 | cell_line | 1035 | 1 | 2919 | 13771.0 | 2.730375 | 40.592550 | NaN |
AAATGCCTCGTGGACC-1 | ACH-000762 | True | 0.075085 | YD-38 | normal | nan | head and neck cancer | nan | NaN | YD38_UPPER_AERODIGESTIVE_TRACT | IGR1_SKIN | 2.366136e-02 | 0.032628 | 0.158193 | nan | nan | 407 | human | sggpx4-2 | CRISPR | Male | YD38_UPPER_AERODIGESTIVE_TRACT | 0.571537 | 7.417734 | 0.331906 | 5.552359 | 72, 96 | cell_line | 829 | 1 | 2456 | 10996.0 | 2.528192 | 39.841761 | NaN |
list(crispr_metadata["perturbation"].unique())
Show code cell output
['sggpx4-2', 'sglacz', 'sggpx4-1', 'sgor2j2']
What are the associated targets?
The following targets are the direct targets of the perturbations, and while they may affect a pathway, we only curate the direct targets for simplicity.
sgGPX4-1: Gene/Protein - GPX4 (Glutathione Peroxidase 4)
sgGPX4-2: Gene/Protein - GPX4 (Glutathione Peroxidase 4)
sgLACZ: Gene/Protein - LACZ (β-galactosidase)
sgOR2J2: Gene/Protein - OR2J2 (Olfactory receptor family 2 subfamily J member 2)
Since the perturbation metadata contains the guide RNA names, we model the genetic perturbations using both wetlab.GeneticTreatment
and wetlab.TreatmentTarget
.
treatments = [
("sgGPX4-1", "GPX4", "Glutathione Peroxidase 4"),
("sgGPX4-2", "GPX4", "Glutathione Peroxidase 4"),
("sgor2j2", "or2j2", "Olfactory receptor family 2 subfamily J member 2"),
("sgLACZ", "lacz", "beta-galactosidase control"), # Control from E. coli
]
organism = bt.Organism.lookup().human
genetic_treatments = []
for name, symbol, target_name in treatments:
treatment = wl.GeneticTreatment(system="CRISPR KO", name=name).save()
if symbol != "lacz":
gene_result = bt.Gene.from_source(symbol=symbol, organism=organism)
gene = gene_result[0] if isinstance(gene_result, list) else gene_result
gene = gene.save()
else:
gene = bt.Gene(symbol=symbol, organism=organism).save()
target = wl.TreatmentTarget(name=target_name).save()
target.genes.add(gene)
treatment.targets.add(target)
genetic_treatments.append(treatment)
Show code cell output
✓ created 1 Gene record from Bionty matching symbol: 'GPX4'
! record with similar name exists! did you mean to load it?
uid | name | system | sequence | on_target_score | off_target_score | run_id | created_by_id | updated_at | |
---|---|---|---|---|---|---|---|---|---|
id | |||||||||
1 | f9mDuZzSAByk | sgGPX4-1 | CRISPR KO | None | None | None | 1 | 1 | 2024-09-25 20:02:36.016813+00:00 |
→ returning existing TreatmentTarget record with same name: 'Glutathione Peroxidase 4'
✓ created 1 Gene record from Bionty matching synonyms: 'or2j2'
! ambiguous validation in Bionty for 1 record: 'OR2J2'
Compound perturbations¶
Although the targets are known for many compounds, we skip annotating them here to keep the guide brief.
What are the compound targets?
AZD5591: Unknown
Afatinib: Proteins - EGFR (Epidermal Growth Factor Receptor), HER2 (Human Epidermal growth factor Receptor 2)
BRD3379: Unknown
Bortezomib: Protein complex - Proteasome (specifically the 26S proteasome subunit)
Dabrafenib: Gene/Protein - BRAF (V600E mutation in the BRAF gene, which codes for a protein kinase)
Everolimus: Protein - mTOR (Mammalian Target of Rapamycin)
Gemcitabine: Pathway/Process - DNA synthesis (inhibition of ribonucleotide reductase and incorporation into DNA)
Idasanutlin: Protein - MDM2 (Mouse Double Minute 2 homolog)
JQ1: Protein - BRD4 (Bromodomain-containing protein 4)
Navitoclax: Proteins - BCL-2, BCL-XL (B-cell lymphoma 2 and B-cell lymphoma-extra large)
Prexasertib: Protein - CHK1 (Checkpoint kinase 1)
Taselisib: Protein/Pathway - PI3K (Phosphoinositide 3-kinase)
Trametinib: Proteins - MEK1/2 (Mitogen-Activated Protein Kinase Kinase 1 and 2)
control: Not applicable
# We are using the chebi/chembl chemistry/drug ontology for the drug perturbations
chebi_source = bt.Source.filter(entity="Drug", name="chebi").one()
wl.Compound.add_source(chebi_source)
compounds = wl.Compound.public()
compounds.df().head(3)
Show code cell output
→ due to lack of write access, LaminDB won't manage storage location: s3://bionty-assets/
• path in storage 's3://bionty-assets' with key 'df_all__chebi__2024-07-27__Drug.parquet'
→ source added!
name | definition | synonyms | parents | chembl_id | |
---|---|---|---|---|---|
ontology_id | |||||
CHEBI:10 | (+)-Atherospermoline | None | (+)-Atherospermoline | [CHEBI:133004] | CHEMBL500609 |
CHEBI:100 | (-)-medicarpin | The (-)-Enantiomer Of Medicarpin. | (-)-Medicarpin|(-)-medicarpin|(6aR,11aR)-9-met... | [CHEBI:16114] | CHEMBL238845 |
CHEBI:10000 | Vismione D | None | Vismione D | [CHEBI:46955] | CHEMBL487795 |
drug_metadata.head(3)
Show code cell output
depmap_id | cancer | cell_det_rate | cell_line | cell_quality | channel | disease | dose_unit | dose_value | doublet_CL1 | doublet_CL2 | doublet_GMM_prob | doublet_dev_imp | doublet_z_margin | hash_assignment | hash_tag | num_SNPs | organism | perturbation | perturbation_type | sex | singlet_ID | singlet_dev | singlet_dev_z | singlet_margin | singlet_z_margin | time | tissue_type | tot_reads | nperts | ngenes | ncounts | percent_mito | percent_ribo | chembl-ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
AACTGGTGTCTCTCTG | ACH-000390 | True | 0.093159 | LUDLU-1 | normal | nan | lung cancer | µM | 0.1 | LUDLU1_LUNG | TE14_OESOPHAGUS | 2.269468e-10 | 0.009426 | 0.403316 | nan | nan | 481 | human | trametinib | drug | Male | LUDLU1_LUNG | 0.655877 | 14.860933 | 0.462273 | 12.351139 | 24 | cell_line | 787 | 1 | 3045 | 12895.0 | 3.202792 | 24.955409 | CHEMBL2103875 |
ATAGGCTCAGATTTCG | ACH-000444 | True | 0.145728 | LU99 | normal | 2 | lung cancer | µM | 0.5 | LU99_LUNG | MCAS_OVARY | 8.562908e-04 | 0.010173 | 0.188284 | nan | nan | 1003 | human | afatinib | drug | Male | LU99_LUNG | 0.762847 | 10.648094 | 0.474590 | 8.164565 | 24 | cell_line | 1597 | 1 | 4763 | 23161.0 | 7.473771 | 18.051898 | CHEMBL1173655 |
GCCAAATCAAGCCGTC | ACH-000396 | True | 0.117330 | J82 | normal | nan | urinary bladder carcinoma | µM | 0.1 | J82_URINARY_TRACT | IGR1_SKIN | 6.490367e-08 | 0.009686 | 1.185862 | nan | nan | 647 | human | dabrafenib | drug | Male | J82_URINARY_TRACT | 0.651059 | 14.740111 | 0.404508 | 11.188513 | 24 | cell_line | 1159 | 1 | 3834 | 18062.0 | 2.762706 | 22.085040 | CHEMBL2028663 |
compounds = wl.Compound.from_values(drug_metadata["perturbation"], field="name")
Show code cell output
✓ created 7 Compound records from Bionty matching name: 'trametinib', 'afatinib', 'dabrafenib', 'gemcitabine', 'navitoclax', 'bortezomib', 'everolimus'
✓ created 1 Compound record from Bionty matching synonyms: 'jq1'
! did not create Compound records for 6 non-validated names: 'azd5591', 'brd3379', 'control', 'idasanutlin', 'prexasertib', 'taselisib'
# The remaining compounds are not in chebi and we create records for them
for missing in [
"azd5591",
"brd3379",
"control",
"idasanutlin",
"prexasertib",
"taselisib",
]:
compounds.append(wl.Compound(name=missing))
ln.save(compounds)
# We found a synonym for jq1. Let's use the correct name for the metadata
drug_metadata = adata.obs[adata.obs["perturbation_type"] == "drug"].copy()
drug_metadata["perturbation"] = drug_metadata["perturbation"].cat.rename_categories(
{"jq1": wl.Compound.search("jq1").one().name}
)
unique_treatments = drug_metadata[
["perturbation", "dose_unit", "dose_value"]
].drop_duplicates()
compound_treatments = []
for _, row in unique_treatments.iterrows():
compound = wl.Compound.get(name=row["perturbation"])
treatment = wl.CompoundTreatment(
name=compound.name,
concentration=row["dose_value"],
concentration_unit=row["dose_unit"],
)
compound_treatments.append(treatment)
ln.save(compound_treatments)
Register curated artifact¶
artifact = curate.save_artifact(description="McFarland AnnData")
Show code cell output
✓ var_index is validated against Gene.ensembl_gene_id
✓ depmap_id is validated against CellLine.ontology_id
✓ cell_line is validated against CellLine.name
✓ disease is validated against Disease.name
✓ organism is validated against Organism.name
✓ perturbation_type is validated against ULabel.name
✓ sex is validated against Phenotype.name
✓ time is validated against ULabel.name
✓ tissue_type is validated against ULabel.name
• path content will be copied to default storage upon `save()` with key `None` ('.lamindb/YBpOBsmYG8lR61MN0000.h5ad')
✓ storing artifact 'YBpOBsmYG8lR61MN0000' at '/home/runner/work/lamin-usecases/lamin-usecases/docs/test-perturbation/.lamindb/YBpOBsmYG8lR61MN0000.h5ad'
• parsing feature names of X stored in slot 'var'
✓ 1279 terms (100.00%) are validated for ensembl_gene_id
✓ linked: FeatureSet(uid='LRsTRvS8Nn8myxlKIaoI', n=1279, dtype='float', registry='bionty.Gene', hash='U5j5OOswqK8HTUlUhYLK6A', created_by_id=1, run_id=1)
• parsing feature names of slot 'obs'
✓ 8 terms (22.90%) are validated for name
! 27 terms (77.10%) are not validated for name: cancer, cell_det_rate, cell_quality, channel, dose_unit, dose_value, doublet_CL1, doublet_CL2, doublet_GMM_prob, doublet_dev_imp, doublet_z_margin, hash_assignment, hash_tag, num_SNPs, perturbation, singlet_ID, singlet_dev, singlet_dev_z, singlet_margin, singlet_z_margin, ...
✓ linked: FeatureSet(uid='zFNlVbR7twomRsvzWczY', n=8, registry='Feature', hash='kk0rtw7JSCuLw3NfrL1SjQ', created_by_id=1, run_id=1)
✓ saved 2 feature sets for slots: 'var','obs'
✓ loaded 20 Disease records matching name: 'lung cancer', 'urinary bladder carcinoma', 'colorectal cancer', 'head and neck cancer', 'brain cancer', 'skin cancer', 'gastric cancer', 'esophageal cancer', 'ovarian cancer', 'breast cancer', 'kidney cancer', 'thyroid cancer', 'uterine corpus cancer', 'liver cancer', 'prostate cancer', 'bile duct cancer', 'sarcoma', 'neuroblastoma', 'rhabdoid tumor', 'bone cancer'
✓ loaded 1 Disease record matching synonyms: 'pancreatic cancer'
artifact.genetic_treatments.set(genetic_treatments)
artifact.compound_treatments.set(compound_treatments)
artifact.describe()
Show code cell output
Artifact(uid='YBpOBsmYG8lR61MN0000', is_latest=True, description='McFarland AnnData', suffix='.h5ad', type='dataset', size=2373992, hash='736vOApj6DkONTUEh1diVw', n_observations=1000, _hash_type='md5', _accessor='AnnData', visibility=1, _key_is_virtual=True, updated_at='2024-09-25 20:02:51 UTC')
Provenance
.storage = '/home/runner/work/lamin-usecases/lamin-usecases/docs/test-perturbation'
.transform = 'Perturbation'
.run = '2024-09-25 20:01:53 UTC'
.created_by = 'testuser1'
Labels
.genetic_treatments = 'sgGPX4-1', 'sgGPX4-2', 'sgor2j2', 'sgLACZ'
.compound_treatments = 'trametinib', 'afatinib', 'dabrafenib', 'gemcitabine', 'navitoclax', 'bortezomib', 'brd3379', 'JQ1', 'azd5591', 'control', ...
.organisms = 'human'
.diseases = 'lung cancer', 'urinary bladder carcinoma', 'colorectal cancer', 'head and neck cancer', 'brain cancer', 'skin cancer', 'gastric cancer', 'esophageal cancer', 'ovarian cancer', 'malignant pancreatic neoplasm', ...
.cell_lines = 'LUDLU-1', 'LU99', 'J82', 'HCT-15', 'YD-10B', 'SNB75', 'SK-MEL-2', 'UM-UC-1', 'AGS', 'COLO-680N', ...
.phenotypes = 'Male', 'Female', 'Unknown'
.ulabels = 'drug', 'CRISPR', '24', '72, 96', '3, 6, 12, 24, 48', '6', 'cell_line'
Features
'cell_line' = 'LUDLU-1', 'LU99', 'J82', 'HCT-15', 'YD-10B', 'SNB75', 'SK-MEL-2', 'UM-UC-1', 'AGS', 'COLO-680N', ...
'disease' = 'lung cancer', 'urinary bladder carcinoma', 'colorectal cancer', 'head and neck cancer', 'brain cancer', 'skin cancer', 'gastric cancer', 'esophageal cancer', 'ovarian cancer', 'malignant pancreatic neoplasm', ...
'organism' = 'human'
'perturbation_type' = 'drug', 'CRISPR'
'sex' = 'Male', 'Female', 'Unknown'
'time' = '24', '72, 96', '3, 6, 12, 24, 48', '6'
'tissue_type' = 'cell_line'
Feature sets
'var' = 'MAGED2', 'CPE', 'DDX43', 'SPANXA2', 'TMEM175', 'MMD', 'DYNC2I1', 'CCRL2', 'MRPS6', 'KCNH1', 'SLC5A7', 'GSX2', 'ATP5MG', 'TMEM190', 'GTF2H1', 'PACSIN1', 'MIR3179-2', 'KLK10', 'EFCAB14-AS1', 'IL33'
'obs' = 'depmap_id', 'cell_line', 'disease', 'organism', 'perturbation_type', 'sex', 'time', 'tissue_type'
# clean up test instance
!rm -r test-perturbation
!lamin delete --force test-perturbation
Show code cell output
• deleting instance testuser1/test-perturbation