Perturbation

This guide demonstrates how to curate a complex, real world perturbation dataset McFarland et al. 2020 using the wetlab schema.

# !pip install 'lamindb[jupyter,aws,bionty]' wetlab
!lamin init --storage ./test-perturbation --schema bionty,wetlab
→ connected lamindb: testuser1/test-perturbation
import lamindb as ln
import bionty as bt
import wetlab as wl
import pandas as pd

pd.set_option("display.max_columns", None)

ln.context.uid = "K6sInKIQW5nt0002"
ln.context.track()
Hide code cell output
→ connected lamindb: testuser1/test-perturbation
→ notebook imports: bionty==0.50.2 lamindb==0.76.8 pandas==2.2.3 wetlab==0.33.0
→ created Transform(uid='K6sInKIQW5nt0002') & created Run(started_at='2024-09-25 20:01:53 UTC')
# See https://lamin.ai/laminlabs/lamindata/transform/13VINnFk89PE0004 to learn how this dataset was prepared
adata = ln.Artifact.using("laminlabs/lamindata").get(uid="Xk7Qaik9vBLV4PKf0000").load()
adata.obs.head(3)
Hide code cell output
depmap_id cancer cell_det_rate cell_line cell_quality channel disease dose_unit dose_value doublet_CL1 doublet_CL2 doublet_GMM_prob doublet_dev_imp doublet_z_margin hash_assignment hash_tag num_SNPs organism perturbation perturbation_type sex singlet_ID singlet_dev singlet_dev_z singlet_margin singlet_z_margin time tissue_type tot_reads nperts ngenes ncounts percent_mito percent_ribo chembl-ID
AACTGGTGTCTCTCTG ACH-000390 True 0.093159 LUDLU-1 normal nan lung cancer µM 0.1 LUDLU1_LUNG TE14_OESOPHAGUS 2.269468e-10 0.009426 0.403316 nan nan 481 human trametinib drug Male LUDLU1_LUNG 0.655877 14.860933 0.462273 12.351139 24 cell_line 787 1 3045 12895.0 3.202792 24.955409 CHEMBL2103875
ATAGGCTCAGATTTCG ACH-000444 True 0.145728 LU99 normal 2 lung cancer µM 0.5 LU99_LUNG MCAS_OVARY 8.562908e-04 0.010173 0.188284 nan nan 1003 human afatinib drug Male LU99_LUNG 0.762847 10.648094 0.474590 8.164565 24 cell_line 1597 1 4763 23161.0 7.473771 18.051898 CHEMBL1173655
GCCAAATCAAGCCGTC ACH-000396 True 0.117330 J82 normal nan urinary bladder carcinoma µM 0.1 J82_URINARY_TRACT IGR1_SKIN 6.490367e-08 0.009686 1.185862 nan nan 647 human dabrafenib drug Male J82_URINARY_TRACT 0.651059 14.740111 0.404508 11.188513 24 cell_line 1159 1 3834 18062.0 2.762706 22.085040 CHEMBL2028663
curate = ln.Curator.from_anndata(
    adata,
    var_index=bt.Gene.ensembl_gene_id,
    organism="human",
    sources={"var_index": bt.Source.filter(entity="bionty.Gene", version="release-112", organism="human").one()}
)

curate.validate()
35 non-validated values are not saved in Feature.name: ['perturbation_type', 'singlet_dev_z', 'disease', 'singlet_ID', 'sex', 'singlet_z_margin', 'doublet_CL1', 'hash_tag', 'cell_quality', 'organism', 'cell_det_rate', 'num_SNPs', 'singlet_margin', 'depmap_id', 'doublet_CL2', 'channel', 'percent_ribo', 'tissue_type', 'singlet_dev', 'ngenes', 'percent_mito', 'doublet_GMM_prob', 'hash_assignment', 'ncounts', 'time', 'cell_line', 'chembl-ID', 'dose_value', 'doublet_dev_imp', 'perturbation', 'tot_reads', 'nperts', 'doublet_z_margin', 'dose_unit', 'cancer']!
      → to lookup values, use lookup().columns
      → to save, run add_new_from_columns
✓ created 1 Organism record from Bionty matching name: 'human'
• mapping var_index on Gene.ensembl_gene_id
!    found 1279 validated terms: ['ENSG00000102316', 'ENSG00000109472', 'ENSG00000080007', 'ENSG00000203926', 'ENSG00000127419', 'ENSG00000108960', 'ENSG00000126870', 'ENSG00000121797', 'ENSG00000243927', 'ENSG00000143473', 'ENSG00000115665', 'ENSG00000180613', 'ENSG00000167283', 'ENSG00000160472', 'ENSG00000110768', 'ENSG00000124507', 'ENSG00000257381', 'ENSG00000129451', 'ENSG00000228237', 'ENSG00000137033', 'ENSG00000120868', 'ENSG00000079616', 'ENSG00000177082', 'ENSG00000227392', 'ENSG00000251620', 'ENSG00000174804', 'ENSG00000057935', 'ENSG00000251493', 'ENSG00000164182', 'ENSG00000112033', 'ENSG00000236388', 'ENSG00000116039', 'ENSG00000131095', 'ENSG00000170956', 'ENSG00000104870', 'ENSG00000259494', 'ENSG00000116906', 'ENSG00000173599', 'ENSG00000187080', 'ENSG00000167258', 'ENSG00000131473', 'ENSG00000107742', 'ENSG00000144791', 'ENSG00000198286', 'ENSG00000196267', 'ENSG00000165966', 'ENSG00000124383', 'ENSG00000154957', 'ENSG00000196689', 'ENSG00000133627', 'ENSG00000049656', 'ENSG00000163918', 'ENSG00000163517', 'ENSG00000166133', 'ENSG00000003393', 'ENSG00000186628', 'ENSG00000136002', 'ENSG00000163629', 'ENSG00000136319', 'ENSG00000189159', 'ENSG00000166770', 'ENSG00000167548', 'ENSG00000134780', 'ENSG00000257127', 'ENSG00000205571', 'ENSG00000128309', 'ENSG00000111145', 'ENSG00000164587', 'ENSG00000096395', 'ENSG00000188820', 'ENSG00000148408', 'ENSG00000111554', 'ENSG00000140943', 'ENSG00000133101', 'ENSG00000148655', 'ENSG00000073969', 'ENSG00000231671', 'ENSG00000198919', 'ENSG00000107331', 'ENSG00000120440', 'ENSG00000111801', 'ENSG00000267013', 'ENSG00000259060', 'ENSG00000117395', 'ENSG00000198938', 'ENSG00000204595', 'ENSG00000183304', 'ENSG00000165102', 'ENSG00000162460', 'ENSG00000150471', 'ENSG00000137634', 'ENSG00000106688', 'ENSG00000254413', 'ENSG00000112282', 'ENSG00000101146', 'ENSG00000162004', 'ENSG00000175768', 'ENSG00000171067', 'ENSG00000103365', 'ENSG00000206052', 'ENSG00000168288', 'ENSG00000240476', 'ENSG00000116833', 'ENSG00000111331', 'ENSG00000215252', 'ENSG00000143355', 'ENSG00000270800', 'ENSG00000161692', 'ENSG00000089053', 'ENSG00000189007', 'ENSG00000182986', 'ENSG00000107859', 'ENSG00000099203', 'ENSG00000171861', 'ENSG00000125744', 'ENSG00000204880', 'ENSG00000239877', 'ENSG00000229619', 'ENSG00000273155', 'ENSG00000166741', 'ENSG00000117139', 'ENSG00000153975', 'ENSG00000101188', 'ENSG00000146039', 'ENSG00000196666', 'ENSG00000146733', 'ENSG00000251022', 'ENSG00000163218', 'ENSG00000169857', 'ENSG00000206432', 'ENSG00000128617', 'ENSG00000142173', 'ENSG00000179115', 'ENSG00000117318', 'ENSG00000131459', 'ENSG00000178403', 'ENSG00000038295', 'ENSG00000148288', 'ENSG00000242612', 'ENSG00000082781', 'ENSG00000139684', 'ENSG00000186272', 'ENSG00000167604', 'ENSG00000111049', 'ENSG00000149357', 'ENSG00000132975', 'ENSG00000166823', 'ENSG00000102871', 'ENSG00000205155', 'ENSG00000183153', 'ENSG00000120438', 'ENSG00000157045', 'ENSG00000179751', 'ENSG00000140259', 'ENSG00000269058', 'ENSG00000171817', 'ENSG00000130383', 'ENSG00000155438', 'ENSG00000152433', 'ENSG00000204463', 'ENSG00000146521', 'ENSG00000174950', 'ENSG00000144741', 'ENSG00000174403', 'ENSG00000062096', 'ENSG00000120656', 'ENSG00000009790', 'ENSG00000166037', 'ENSG00000147650', 'ENSG00000127325', 'ENSG00000139209', 'ENSG00000153317', 'ENSG00000132475', 'ENSG00000006453', 'ENSG00000160813', 'ENSG00000138629', 'ENSG00000166450', 'ENSG00000175634', 'ENSG00000163254', 'ENSG00000261206', 'ENSG00000167608', 'ENSG00000213619', 'ENSG00000109171', 'ENSG00000116685', 'ENSG00000251258', 'ENSG00000062038', 'ENSG00000110497', 'ENSG00000069509', 'ENSG00000231944', 'ENSG00000166171', 'ENSG00000082512', 'ENSG00000162377', 'ENSG00000236311', 'ENSG00000146410', 'ENSG00000105369', 'ENSG00000137275', 'ENSG00000132846', 'ENSG00000160200', 'ENSG00000092148', 'ENSG00000184895', 'ENSG00000076351', 'ENSG00000011198', 'ENSG00000134531', 'ENSG00000163157', 'ENSG00000130584', 'ENSG00000174547', 'ENSG00000099994', 'ENSG00000146963', 'ENSG00000229557', 'ENSG00000197683', 'ENSG00000185033', 'ENSG00000175548', 'ENSG00000072133', 'ENSG00000205085', 'ENSG00000137040', 'ENSG00000180138', 'ENSG00000173905', 'ENSG00000137166', 'ENSG00000177294', 'ENSG00000114942', 'ENSG00000112658', 'ENSG00000150594', 'ENSG00000164631', 'ENSG00000151650', 'ENSG00000223638', 'ENSG00000229086', 'ENSG00000198542', 'ENSG00000196436', 'ENSG00000175854', 'ENSG00000255181', 'ENSG00000101004', 'ENSG00000217930', 'ENSG00000077935', 'ENSG00000172340', 'ENSG00000023191', 'ENSG00000189164', 'ENSG00000177990', 'ENSG00000179873', 'ENSG00000187772', 'ENSG00000155729', 'ENSG00000124440', 'ENSG00000142694', 'ENSG00000105875', 'ENSG00000158828', 'ENSG00000111371', 'ENSG00000159337', 'ENSG00000176153', 'ENSG00000168702', 'ENSG00000235718', 'ENSG00000134873', 'ENSG00000141096', 'ENSG00000116786', 'ENSG00000054938', 'ENSG00000239225', 'ENSG00000086967', 'ENSG00000171431', 'ENSG00000188263', 'ENSG00000170807', 'ENSG00000007129', 'ENSG00000168234', 'ENSG00000173141', 'ENSG00000244395', 'ENSG00000100181', 'ENSG00000118898', 'ENSG00000185960', 'ENSG00000168269', 'ENSG00000084112', 'ENSG00000096872', 'ENSG00000175066', 'ENSG00000203952', 'ENSG00000196767', 'ENSG00000138381', 'ENSG00000142661', 'ENSG00000198815', 'ENSG00000204538', 'ENSG00000118200', 'ENSG00000147655', 'ENSG00000145194', 'ENSG00000213088', 'ENSG00000177272', 'ENSG00000126464', 'ENSG00000175087', 'ENSG00000011021', 'ENSG00000125834', 'ENSG00000197766', 'ENSG00000119514', 'ENSG00000165471', 'ENSG00000172469', 'ENSG00000176142', 'ENSG00000173786', 'ENSG00000247473', 'ENSG00000119943', 'ENSG00000188342', 'ENSG00000121481', 'ENSG00000080815', 'ENSG00000165506', 'ENSG00000161835', 'ENSG00000184117', 'ENSG00000188649', 'ENSG00000114480', 'ENSG00000134333', 'ENSG00000110900', 'ENSG00000082293', 'ENSG00000151640', 'ENSG00000059377', 'ENSG00000125514', 'ENSG00000148468', 'ENSG00000148110', 'ENSG00000039139', 'ENSG00000116151', 'ENSG00000197620', 'ENSG00000154025', 'ENSG00000151690', 'ENSG00000188523', 'ENSG00000196961', 'ENSG00000048740', 'ENSG00000225473', 'ENSG00000020256', 'ENSG00000106683', 'ENSG00000105204', 'ENSG00000136352', 'ENSG00000183281', 'ENSG00000254377', 'ENSG00000180596', 'ENSG00000138347', 'ENSG00000157335', 'ENSG00000227051', 'ENSG00000187624', 'ENSG00000244462', 'ENSG00000250091', 'ENSG00000142207', 'ENSG00000164663', 'ENSG00000003756', 'ENSG00000170523', 'ENSG00000141431', 'ENSG00000106404', 'ENSG00000254656', 'ENSG00000174348', 'ENSG00000130827', 'ENSG00000145391', 'ENSG00000169499', 'ENSG00000233349', 'ENSG00000130700', 'ENSG00000159128', 'ENSG00000242715', 'ENSG00000140526', 'ENSG00000212747', 'ENSG00000214827', 'ENSG00000181982', 'ENSG00000223914', 'ENSG00000204351', 'ENSG00000185261', 'ENSG00000151388', 'ENSG00000184345', 'ENSG00000172782', 'ENSG00000005302', 'ENSG00000127515', 'ENSG00000136938', 'ENSG00000169750', 'ENSG00000213588', 'ENSG00000186862', 'ENSG00000151445', 'ENSG00000106609', 'ENSG00000120436', 'ENSG00000180872', 'ENSG00000116977', 'ENSG00000100522', 'ENSG00000133706', 'ENSG00000248920', 'ENSG00000184436', 'ENSG00000125247', 'ENSG00000164621', 'ENSG00000104218', 'ENSG00000111834', 'ENSG00000234323', 'ENSG00000260286', 'ENSG00000082068', 'ENSG00000205111', 'ENSG00000166261', 'ENSG00000197580', 'ENSG00000168214', 'ENSG00000018625', 'ENSG00000236637', 'ENSG00000165006', 'ENSG00000113575', 'ENSG00000140396', 'ENSG00000145337', 'ENSG00000135677', 'ENSG00000151379', 'ENSG00000169155', 'ENSG00000164761', 'ENSG00000234527', 'ENSG00000149633', 'ENSG00000079974', 'ENSG00000108474', 'ENSG00000103061', 'ENSG00000253457', 'ENSG00000120057', 'ENSG00000176230', 'ENSG00000043514', 'ENSG00000167483', 'ENSG00000145781', 'ENSG00000134775', 'ENSG00000086991', 'ENSG00000138107', 'ENSG00000149150', 'ENSG00000003989', 'ENSG00000063438', 'ENSG00000175643', 'ENSG00000198062', 'ENSG00000188766', 'ENSG00000089094', 'ENSG00000109814', 'ENSG00000173467', 'ENSG00000110066', 'ENSG00000125879', 'ENSG00000004838', 'ENSG00000109113', 'ENSG00000120594', 'ENSG00000108094', 'ENSG00000169016', 'ENSG00000125148', 'ENSG00000257591', 'ENSG00000242689', 'ENSG00000141198', 'ENSG00000080709', 'ENSG00000142864', 'ENSG00000156675', 'ENSG00000157557', 'ENSG00000148215', 'ENSG00000111247', 'ENSG00000173715', 'ENSG00000205220', 'ENSG00000213799', 'ENSG00000173702', 'ENSG00000188729', 'ENSG00000099800', 'ENSG00000181555', 'ENSG00000234684', 'ENSG00000111481', 'ENSG00000185630', 'ENSG00000136840', 'ENSG00000164047', 'ENSG00000139726', 'ENSG00000243477', 'ENSG00000183401', 'ENSG00000168993', 'ENSG00000239839', 'ENSG00000179284', 'ENSG00000140992', 'ENSG00000135363', 'ENSG00000115425', 'ENSG00000121380', 'ENSG00000119041', 'ENSG00000123243', 'ENSG00000139192', 'ENSG00000165632', 'ENSG00000114054', 'ENSG00000049860', 'ENSG00000248698', 'ENSG00000168591', 'ENSG00000132561', 'ENSG00000139180', 'ENSG00000166987', 'ENSG00000167384', 'ENSG00000156140', 'ENSG00000119686', 'ENSG00000228567', 'ENSG00000182810', 'ENSG00000234230', 'ENSG00000125144', 'ENSG00000126775', 'ENSG00000240770', 'ENSG00000205126', 'ENSG00000172840', 'ENSG00000103381', 'ENSG00000178057', 'ENSG00000108064', 'ENSG00000120322', 'ENSG00000139428', 'ENSG00000173473', 'ENSG00000181458', 'ENSG00000101474', 'ENSG00000165209', 'ENSG00000046651', 'ENSG00000268061', 'ENSG00000221963', 'ENSG00000165325', 'ENSG00000100884', 'ENSG00000238189', 'ENSG00000241962', 'ENSG00000148803', 'ENSG00000175206', 'ENSG00000120805', 'ENSG00000049249', 'ENSG00000090776', 'ENSG00000070731', 'ENSG00000148339', 'ENSG00000113194', 'ENSG00000110628', 'ENSG00000147873', 'ENSG00000164600', 'ENSG00000126705', 'ENSG00000196502', 'ENSG00000154473', 'ENSG00000143921', 'ENSG00000105609', 'ENSG00000184076', 'ENSG00000119283', 'ENSG00000196361', 'ENSG00000109775', 'ENSG00000179869', 'ENSG00000123213', 'ENSG00000169836', 'ENSG00000184650', 'ENSG00000138459', 'ENSG00000163734', 'ENSG00000106028', 'ENSG00000100652', 'ENSG00000251442', 'ENSG00000167491', 'ENSG00000170748', 'ENSG00000166965', 'ENSG00000203661', 'ENSG00000108061', 'ENSG00000203857', 'ENSG00000131398', 'ENSG00000143632', 'ENSG00000167914', 'ENSG00000221988', 'ENSG00000246016', 'ENSG00000139330', 'ENSG00000101773', 'ENSG00000233718', 'ENSG00000204481', 'ENSG00000173918', 'ENSG00000234186', 'ENSG00000186451', 'ENSG00000114124', 'ENSG00000178741', 'ENSG00000196407', 'ENSG00000119986', 'ENSG00000161551', 'ENSG00000105698', 'ENSG00000164756', 'ENSG00000205403', 'ENSG00000115091', 'ENSG00000173208', 'ENSG00000197651', 'ENSG00000203721', 'ENSG00000156253', 'ENSG00000196341', 'ENSG00000131116', 'ENSG00000141696', 'ENSG00000081692', 'ENSG00000126368', 'ENSG00000088002', 'ENSG00000090266', 'ENSG00000162909', 'ENSG00000186684', 'ENSG00000188038', 'ENSG00000153446', 'ENSG00000106771', 'ENSG00000078295', 'ENSG00000112053', 'ENSG00000117153', 'ENSG00000198843', 'ENSG00000164010', 'ENSG00000144120', 'ENSG00000187581', 'ENSG00000070770', 'ENSG00000148341', 'ENSG00000146360', 'ENSG00000137936', 'ENSG00000123338', 'ENSG00000120733', 'ENSG00000139154', 'ENSG00000268654', 'ENSG00000254415', 'ENSG00000198755', 'ENSG00000101435', 'ENSG00000118369', 'ENSG00000189067', 'ENSG00000118004', 'ENSG00000160808', 'ENSG00000178988', 'ENSG00000179144', 'ENSG00000184814', 'ENSG00000171634', 'ENSG00000136682', 'ENSG00000114544', 'ENSG00000115008', 'ENSG00000153246', 'ENSG00000269858', 'ENSG00000159212', 'ENSG00000214842', 'ENSG00000134757', 'ENSG00000131849', 'ENSG00000157087', 'ENSG00000268658', 'ENSG00000205129', 'ENSG00000212856', 'ENSG00000164002', 'ENSG00000147099', 'ENSG00000100321', 'ENSG00000163528', 'ENSG00000215403', 'ENSG00000165113', 'ENSG00000137522', 'ENSG00000125637', 'ENSG00000215644', 'ENSG00000181036', 'ENSG00000175274', 'ENSG00000205544', 'ENSG00000233975', 'ENSG00000261618', 'ENSG00000233818', 'ENSG00000108846', 'ENSG00000235268', 'ENSG00000143502', 'ENSG00000126709', 'ENSG00000170289', 'ENSG00000134215', 'ENSG00000240972', 'ENSG00000125730', 'ENSG00000204961', 'ENSG00000183783', 'ENSG00000172243', 'ENSG00000196787', 'ENSG00000085449', 'ENSG00000186020', 'ENSG00000198185', 'ENSG00000153015', 'ENSG00000248330', 'ENSG00000137752', 'ENSG00000232401', 'ENSG00000233701', 'ENSG00000181104', 'ENSG00000160050', 'ENSG00000089195', 'ENSG00000006576', 'ENSG00000073536', 'ENSG00000107874', 'ENSG00000076043', 'ENSG00000163527', 'ENSG00000121310', 'ENSG00000186081', 'ENSG00000035115', 'ENSG00000245146', 'ENSG00000135241', 'ENSG00000124356', 'ENSG00000175322', 'ENSG00000100095', 'ENSG00000101391', 'ENSG00000064703', 'ENSG00000144451', 'ENSG00000131471', 'ENSG00000102103', 'ENSG00000099381', 'ENSG00000144214', 'ENSG00000197885', 'ENSG00000108384', 'ENSG00000151208', 'ENSG00000204764', 'ENSG00000249641', 'ENSG00000186318', 'ENSG00000151694', 'ENSG00000140807', 'ENSG00000115694', 'ENSG00000184445', 'ENSG00000184009', 'ENSG00000007944', 'ENSG00000054654', 'ENSG00000085998', 'ENSG00000164252', 'ENSG00000174197', 'ENSG00000205726', 'ENSG00000168477', 'ENSG00000167257', 'ENSG00000160993', 'ENSG00000177202', 'ENSG00000115526', 'ENSG00000125337', 'ENSG00000205022', 'ENSG00000168612', 'ENSG00000111252', 'ENSG00000172081', 'ENSG00000170627', 'ENSG00000235412', 'ENSG00000100938', 'ENSG00000158639', 'ENSG00000122565', 'ENSG00000197496', 'ENSG00000126883', 'ENSG00000163235', 'ENSG00000151090', 'ENSG00000236371', 'ENSG00000141837', 'ENSG00000235385', 'ENSG00000167104', 'ENSG00000076003', 'ENSG00000236053', 'ENSG00000198785', 'ENSG00000119614', 'ENSG00000198954', 'ENSG00000205930', 'ENSG00000159556', 'ENSG00000250021', 'ENSG00000103319', 'ENSG00000139132', 'ENSG00000224122', 'ENSG00000107404', 'ENSG00000124091', 'ENSG00000151224', 'ENSG00000120694', 'ENSG00000234438', 'ENSG00000233070', 'ENSG00000156804', 'ENSG00000064270', 'ENSG00000100934', 'ENSG00000117984', 'ENSG00000176909', 'ENSG00000154143', 'ENSG00000248405', 'ENSG00000126261', 'ENSG00000234602', 'ENSG00000121067', 'ENSG00000167757', 'ENSG00000162373', 'ENSG00000115282', 'ENSG00000112144', 'ENSG00000154162', 'ENSG00000136240', 'ENSG00000124253', 'ENSG00000177614', 'ENSG00000151135', 'ENSG00000161914', 'ENSG00000172020', 'ENSG00000140319', 'ENSG00000178055', 'ENSG00000182111', 'ENSG00000155066', 'ENSG00000152804', 'ENSG00000157703', 'ENSG00000233421', 'ENSG00000110047', 'ENSG00000094963', 'ENSG00000163785', 'ENSG00000100151', 'ENSG00000197177', 'ENSG00000180776', 'ENSG00000106144', 'ENSG00000176904', 'ENSG00000184302', 'ENSG00000204464', 'ENSG00000111817', 'ENSG00000176742', 'ENSG00000103021', 'ENSG00000238057', 'ENSG00000154328', 'ENSG00000261485', 'ENSG00000230453', 'ENSG00000231500', 'ENSG00000136758', 'ENSG00000005075', 'ENSG00000213533', 'ENSG00000261832', 'ENSG00000196704', 'ENSG00000204301', 'ENSG00000177485', 'ENSG00000002822', 'ENSG00000204323', 'ENSG00000115274', 'ENSG00000106560', 'ENSG00000261043', 'ENSG00000174957', 'ENSG00000224186', 'ENSG00000100908', 'ENSG00000187098', 'ENSG00000256683', 'ENSG00000104907', 'ENSG00000115561', 'ENSG00000175115', 'ENSG00000125753', 'ENSG00000256769', 'ENSG00000228889', 'ENSG00000122787', 'ENSG00000143612', 'ENSG00000010278', 'ENSG00000166130', 'ENSG00000261498', 'ENSG00000171867', 'ENSG00000091483', 'ENSG00000084207', 'ENSG00000188266', 'ENSG00000204138', 'ENSG00000175514', 'ENSG00000125629', 'ENSG00000129824', 'ENSG00000183617', 'ENSG00000111653', 'ENSG00000196366', 'ENSG00000071859', 'ENSG00000133124', 'ENSG00000187838', 'ENSG00000147799', 'ENSG00000109991', 'ENSG00000242516', 'ENSG00000136156', 'ENSG00000143147', 'ENSG00000223501', 'ENSG00000173200', 'ENSG00000106415', 'ENSG00000142606', 'ENSG00000165669', 'ENSG00000112232', 'ENSG00000175354', 'ENSG00000139151', 'ENSG00000135093', 'ENSG00000188092', 'ENSG00000135912', 'ENSG00000114166', 'ENSG00000261235', 'ENSG00000170801', 'ENSG00000221882', 'ENSG00000135334', 'ENSG00000152503', 'ENSG00000183741', 'ENSG00000171466', 'ENSG00000255974', 'ENSG00000117419', 'ENSG00000132321', 'ENSG00000182022', 'ENSG00000224405', 'ENSG00000104888', 'ENSG00000179397', 'ENSG00000105707', 'ENSG00000165914', 'ENSG00000170921', 'ENSG00000049089', 'ENSG00000108786', 'ENSG00000155906', 'ENSG00000232225', 'ENSG00000185985', 'ENSG00000134802', 'ENSG00000169418', 'ENSG00000163281', 'ENSG00000197586', 'ENSG00000204435', 'ENSG00000196166', 'ENSG00000177462', 'ENSG00000248397', 'ENSG00000171488', 'ENSG00000179407', 'ENSG00000120341', 'ENSG00000100811', 'ENSG00000239713', 'ENSG00000105486', 'ENSG00000163312', 'ENSG00000101639', 'ENSG00000162073', 'ENSG00000163389', 'ENSG00000171942', 'ENSG00000236822', 'ENSG00000089639', 'ENSG00000166016', 'ENSG00000109861', 'ENSG00000141562', 'ENSG00000224559', 'ENSG00000124237', 'ENSG00000084444', 'ENSG00000092295', 'ENSG00000155111', 'ENSG00000235641', 'ENSG00000243709', 'ENSG00000139329', 'ENSG00000091583', 'ENSG00000154319', 'ENSG00000205174', 'ENSG00000085265', 'ENSG00000171824', 'ENSG00000213057', 'ENSG00000178591', 'ENSG00000157191', 'ENSG00000249348', 'ENSG00000188676', 'ENSG00000232307', 'ENSG00000155511', 'ENSG00000164334', 'ENSG00000198039', 'ENSG00000042304', 'ENSG00000172264', 'ENSG00000173976', 'ENSG00000163374', 'ENSG00000134321', 'ENSG00000188162', 'ENSG00000246100', 'ENSG00000104818', 'ENSG00000157985', 'ENSG00000178694', 'ENSG00000112305', 'ENSG00000159579', 'ENSG00000164920', 'ENSG00000214706', 'ENSG00000144749', 'ENSG00000145220', 'ENSG00000067048', 'ENSG00000227356', 'ENSG00000130758', 'ENSG00000165516', 'ENSG00000131873', 'ENSG00000125863', 'ENSG00000010932', 'ENSG00000131462', 'ENSG00000008083', 'ENSG00000157219', 'ENSG00000224459', 'ENSG00000125746', 'ENSG00000116809', 'ENSG00000106927', 'ENSG00000168904', 'ENSG00000112599', 'ENSG00000097021', 'ENSG00000068305', 'ENSG00000250349', 'ENSG00000147100', 'ENSG00000197046', 'ENSG00000175646', 'ENSG00000125878', 'ENSG00000197283', 'ENSG00000235699', 'ENSG00000184967', 'ENSG00000227695', 'ENSG00000176194', 'ENSG00000110046', 'ENSG00000188582', 'ENSG00000253873', 'ENSG00000162755', 'ENSG00000161652', 'ENSG00000091010', 'ENSG00000196944', 'ENSG00000155970', 'ENSG00000122679', 'ENSG00000106302', 'ENSG00000134595', 'ENSG00000110583', 'ENSG00000170889', 'ENSG00000232192', 'ENSG00000095321', 'ENSG00000156471', 'ENSG00000135406', 'ENSG00000164619', 'ENSG00000189350', 'ENSG00000183251', 'ENSG00000058272', 'ENSG00000177150', 'ENSG00000106635', 'ENSG00000109424', 'ENSG00000133943', 'ENSG00000081051', 'ENSG00000147234', 'ENSG00000132274', 'ENSG00000258710', 'ENSG00000251173', 'ENSG00000125388', 'ENSG00000225234', 'ENSG00000119509', 'ENSG00000006283', 'ENSG00000250317', 'ENSG00000234056', 'ENSG00000188000', 'ENSG00000109099', 'ENSG00000182185', 'ENSG00000167601', 'ENSG00000109670', 'ENSG00000249459', 'ENSG00000137404', 'ENSG00000162630', 'ENSG00000117758', 'ENSG00000105287', 'ENSG00000134909', 'ENSG00000175806', 'ENSG00000158201', 'ENSG00000001460', 'ENSG00000203362', 'ENSG00000185477', 'ENSG00000187189', 'ENSG00000101310', 'ENSG00000163762', 'ENSG00000136457', 'ENSG00000065518', 'ENSG00000141391', 'ENSG00000172466', 'ENSG00000128656', 'ENSG00000155966', 'ENSG00000151704', 'ENSG00000157796', 'ENSG00000163406', 'ENSG00000120833', 'ENSG00000105928', 'ENSG00000197745', 'ENSG00000111877', 'ENSG00000071282', 'ENSG00000234478', 'ENSG00000110921', 'ENSG00000227659', 'ENSG00000134258', 'ENSG00000146776', 'ENSG00000137714', 'ENSG00000169906', 'ENSG00000155324', 'ENSG00000072518', 'ENSG00000237489', 'ENSG00000241211', 'ENSG00000174483', 'ENSG00000115355', 'ENSG00000124145', 'ENSG00000131143', 'ENSG00000078814', 'ENSG00000128185', 'ENSG00000215529', 'ENSG00000125848', 'ENSG00000143033', 'ENSG00000100221', 'ENSG00000137413', 'ENSG00000179262', 'ENSG00000169126', 'ENSG00000179271', 'ENSG00000177476', 'ENSG00000181513', 'ENSG00000153157', 'ENSG00000125868', 'ENSG00000146674', 'ENSG00000242242', 'ENSG00000103507', 'ENSG00000181722', 'ENSG00000132026', 'ENSG00000125046', 'ENSG00000171606', 'ENSG00000166800', 'ENSG00000106992', 'ENSG00000136720', 'ENSG00000143595', 'ENSG00000180233', 'ENSG00000101463', 'ENSG00000035403', 'ENSG00000081913', 'ENSG00000205250', 'ENSG00000180815', 'ENSG00000139287', 'ENSG00000138594', 'ENSG00000224051', 'ENSG00000127564', 'ENSG00000198829', 'ENSG00000180354', 'ENSG00000255804', 'ENSG00000001167', 'ENSG00000135632', 'ENSG00000128989', 'ENSG00000130997', 'ENSG00000188909', 'ENSG00000250748', 'ENSG00000227488', 'ENSG00000172785', 'ENSG00000121068', 'ENSG00000031081', 'ENSG00000107938', 'ENSG00000120533', 'ENSG00000197694', 'ENSG00000167797', 'ENSG00000127252', 'ENSG00000203909', 'ENSG00000142534', 'ENSG00000204516', 'ENSG00000215568', 'ENSG00000162413', 'ENSG00000178734', 'ENSG00000184557', 'ENSG00000100418', 'ENSG00000180353', 'ENSG00000106266', 'ENSG00000105270', 'ENSG00000177885', 'ENSG00000164045', 'ENSG00000167588', 'ENSG00000188906', 'ENSG00000143032', 'ENSG00000135914', 'ENSG00000189280', 'ENSG00000154016', 'ENSG00000104983', 'ENSG00000096433', 'ENSG00000172404', 'ENSG00000138756', 'ENSG00000126254', 'ENSG00000162885', 'ENSG00000270011', 'ENSG00000205846', 'ENSG00000260903', 'ENSG00000180822', 'ENSG00000078795', 'ENSG00000172273', 'ENSG00000196227', 'ENSG00000258839', 'ENSG00000179213', 'ENSG00000114573', 'ENSG00000008196', 'ENSG00000183734', 'ENSG00000168907', 'ENSG00000089041', 'ENSG00000212657', 'ENSG00000169154', 'ENSG00000175329', 'ENSG00000165837', 'ENSG00000137252', 'ENSG00000100721', 'ENSG00000239886', 'ENSG00000162383', 'ENSG00000214013', 'ENSG00000172380', 'ENSG00000144668', 'ENSG00000223547', 'ENSG00000138071', 'ENSG00000073417', 'ENSG00000174255', 'ENSG00000197016', 'ENSG00000172115', 'ENSG00000122711', 'ENSG00000196420', 'ENSG00000102243', 'ENSG00000075151', 'ENSG00000198001', 'ENSG00000186583', 'ENSG00000165626', 'ENSG00000233610', 'ENSG00000132950', 'ENSG00000143061', 'ENSG00000164830', 'ENSG00000143622', 'ENSG00000111885', 'ENSG00000107317', 'ENSG00000250133', 'ENSG00000231310', 'ENSG00000156006', 'ENSG00000140988', 'ENSG00000175018', 'ENSG00000058085', 'ENSG00000174914', 'ENSG00000105472', 'ENSG00000142149', 'ENSG00000183067', 'ENSG00000004975', 'ENSG00000205858', 'ENSG00000164885', 'ENSG00000186448', 'ENSG00000186493', 'ENSG00000204379', 'ENSG00000144227', 'ENSG00000168876', 'ENSG00000142910', 'ENSG00000262074', 'ENSG00000159184', 'ENSG00000113648', 'ENSG00000223882', 'ENSG00000198454', 'ENSG00000175482', 'ENSG00000254535', 'ENSG00000057019', 'ENSG00000204475', 'ENSG00000186395', 'ENSG00000184481', 'ENSG00000076356', 'ENSG00000101981', 'ENSG00000157978', 'ENSG00000177335', 'ENSG00000205078', 'ENSG00000224418', 'ENSG00000122877', 'ENSG00000237232', 'ENSG00000204444', 'ENSG00000248079', 'ENSG00000154760', 'ENSG00000146477', 'ENSG00000003137', 'ENSG00000164128', 'ENSG00000000003', 'ENSG00000149781', 'ENSG00000198400', 'ENSG00000124003', 'ENSG00000175591', 'ENSG00000179165', 'ENSG00000174827', 'ENSG00000259070', 'ENSG00000125910', 'ENSG00000055332', 'ENSG00000105669', 'ENSG00000115461', 'ENSG00000120008', 'ENSG00000105205', 'ENSG00000143867', 'ENSG00000147439', 'ENSG00000135175', 'ENSG00000181718', 'ENSG00000184905', 'ENSG00000126233', 'ENSG00000258701', 'ENSG00000198301', 'ENSG00000108379', 'ENSG00000150667', 'ENSG00000163995', 'ENSG00000079999', 'ENSG00000149743', 'ENSG00000166961', 'ENSG00000221923', 'ENSG00000183624', 'ENSG00000162402', 'ENSG00000163636', 'ENSG00000110717', 'ENSG00000168843', 'ENSG00000138395', 'ENSG00000085415', 'ENSG00000126391', 'ENSG00000169684', 'ENSG00000185896', 'ENSG00000181027', 'ENSG00000000971', 'ENSG00000091106', 'ENSG00000140675', 'ENSG00000196074', 'ENSG00000105971', 'ENSG00000225255', 'ENSG00000118985', 'ENSG00000172828', 'ENSG00000054277', 'ENSG00000214681', 'ENSG00000250334', 'ENSG00000139343', 'ENSG00000162552', 'ENSG00000164185', 'ENSG00000107882', 'ENSG00000251655', 'ENSG00000136535', 'ENSG00000087269', 'ENSG00000267374', 'ENSG00000182117', 'ENSG00000214941', 'ENSG00000134153', 'ENSG00000251015', 'ENSG00000171916', 'ENSG00000177398', 'ENSG00000205899', 'ENSG00000167397', 'ENSG00000166685', 'ENSG00000248866', 'ENSG00000075399', 'ENSG00000179083', 'ENSG00000117262', 'ENSG00000119912', 'ENSG00000101888', 'ENSG00000243207', 'ENSG00000238121', 'ENSG00000139874', 'ENSG00000241769', 'ENSG00000250106', 'ENSG00000268006', 'ENSG00000159792', 'ENSG00000157510', 'ENSG00000228369', 'ENSG00000171811', 'ENSG00000164953', 'ENSG00000143786', 'ENSG00000203667', 'ENSG00000164611', 'ENSG00000081307', 'ENSG00000152457', 'ENSG00000163319', 'ENSG00000183638', 'ENSG00000145425', 'ENSG00000179044']
      → save terms via .add_validated_from_var_index()
False
# The cells were subject to several types of perturbations that we will curate separately
adata.obs.perturbation_type.value_counts()
Hide code cell output
perturbation_type
drug      855
CRISPR    145
Name: count, dtype: int64

Curate non-perturbation metadata

categoricals = {
    "depmap_id": bt.CellLine.ontology_id,
    "cell_line": bt.CellLine.name,
    "disease": bt.Disease.name,
    "organism": bt.Organism.name,
    "perturbation_type": ln.ULabel.name,
    "sex": bt.Phenotype.name,
    "time": ln.ULabel.name,
    "tissue_type": ln.ULabel.name,
}
sources = {
    "var_index": bt.Source.filter(entity="bionty.Gene", version="release-112", organism="human").one(),
    "depmap_id": bt.Source.filter(name="depmap").one(),
    "cell_line": bt.Source.filter(name="depmap").one(),
}

curate = ln.Curator.from_anndata(
    adata,
    var_index=bt.Gene.ensembl_gene_id,
    categoricals=categoricals,
    organism="human",
    sources=sources,
)

curate.validate()
Hide code cell output
✓    added 8 records with Feature.name for columns: 'depmap_id', 'cell_line', 'disease', 'organism', 'perturbation_type', 'sex', 'time', 'tissue_type'
27 non-validated values are not saved in Feature.name: ['singlet_dev_z', 'singlet_ID', 'singlet_z_margin', 'doublet_CL1', 'cell_quality', 'hash_tag', 'cell_det_rate', 'num_SNPs', 'singlet_margin', 'doublet_CL2', 'channel', 'percent_ribo', 'singlet_dev', 'ngenes', 'percent_mito', 'doublet_GMM_prob', 'hash_assignment', 'ncounts', 'chembl-ID', 'dose_value', 'doublet_dev_imp', 'perturbation', 'tot_reads', 'nperts', 'doublet_z_margin', 'dose_unit', 'cancer']!
      → to lookup values, use lookup().columns
      → to save, run add_new_from_columns
• mapping var_index on Gene.ensembl_gene_id
!    found 1279 validated terms: ['ENSG00000102316', 'ENSG00000109472', 'ENSG00000080007', 'ENSG00000203926', 'ENSG00000127419', 'ENSG00000108960', 'ENSG00000126870', 'ENSG00000121797', 'ENSG00000243927', 'ENSG00000143473', 'ENSG00000115665', 'ENSG00000180613', 'ENSG00000167283', 'ENSG00000160472', 'ENSG00000110768', 'ENSG00000124507', 'ENSG00000257381', 'ENSG00000129451', 'ENSG00000228237', 'ENSG00000137033', 'ENSG00000120868', 'ENSG00000079616', 'ENSG00000177082', 'ENSG00000227392', 'ENSG00000251620', 'ENSG00000174804', 'ENSG00000057935', 'ENSG00000251493', 'ENSG00000164182', 'ENSG00000112033', 'ENSG00000236388', 'ENSG00000116039', 'ENSG00000131095', 'ENSG00000170956', 'ENSG00000104870', 'ENSG00000259494', 'ENSG00000116906', 'ENSG00000173599', 'ENSG00000187080', 'ENSG00000167258', 'ENSG00000131473', 'ENSG00000107742', 'ENSG00000144791', 'ENSG00000198286', 'ENSG00000196267', 'ENSG00000165966', 'ENSG00000124383', 'ENSG00000154957', 'ENSG00000196689', 'ENSG00000133627', 'ENSG00000049656', 'ENSG00000163918', 'ENSG00000163517', 'ENSG00000166133', 'ENSG00000003393', 'ENSG00000186628', 'ENSG00000136002', 'ENSG00000163629', 'ENSG00000136319', 'ENSG00000189159', 'ENSG00000166770', 'ENSG00000167548', 'ENSG00000134780', 'ENSG00000257127', 'ENSG00000205571', 'ENSG00000128309', 'ENSG00000111145', 'ENSG00000164587', 'ENSG00000096395', 'ENSG00000188820', 'ENSG00000148408', 'ENSG00000111554', 'ENSG00000140943', 'ENSG00000133101', 'ENSG00000148655', 'ENSG00000073969', 'ENSG00000231671', 'ENSG00000198919', 'ENSG00000107331', 'ENSG00000120440', 'ENSG00000111801', 'ENSG00000267013', 'ENSG00000259060', 'ENSG00000117395', 'ENSG00000198938', 'ENSG00000204595', 'ENSG00000183304', 'ENSG00000165102', 'ENSG00000162460', 'ENSG00000150471', 'ENSG00000137634', 'ENSG00000106688', 'ENSG00000254413', 'ENSG00000112282', 'ENSG00000101146', 'ENSG00000162004', 'ENSG00000175768', 'ENSG00000171067', 'ENSG00000103365', 'ENSG00000206052', 'ENSG00000168288', 'ENSG00000240476', 'ENSG00000116833', 'ENSG00000111331', 'ENSG00000215252', 'ENSG00000143355', 'ENSG00000270800', 'ENSG00000161692', 'ENSG00000089053', 'ENSG00000189007', 'ENSG00000182986', 'ENSG00000107859', 'ENSG00000099203', 'ENSG00000171861', 'ENSG00000125744', 'ENSG00000204880', 'ENSG00000239877', 'ENSG00000229619', 'ENSG00000273155', 'ENSG00000166741', 'ENSG00000117139', 'ENSG00000153975', 'ENSG00000101188', 'ENSG00000146039', 'ENSG00000196666', 'ENSG00000146733', 'ENSG00000251022', 'ENSG00000163218', 'ENSG00000169857', 'ENSG00000206432', 'ENSG00000128617', 'ENSG00000142173', 'ENSG00000179115', 'ENSG00000117318', 'ENSG00000131459', 'ENSG00000178403', 'ENSG00000038295', 'ENSG00000148288', 'ENSG00000242612', 'ENSG00000082781', 'ENSG00000139684', 'ENSG00000186272', 'ENSG00000167604', 'ENSG00000111049', 'ENSG00000149357', 'ENSG00000132975', 'ENSG00000166823', 'ENSG00000102871', 'ENSG00000205155', 'ENSG00000183153', 'ENSG00000120438', 'ENSG00000157045', 'ENSG00000179751', 'ENSG00000140259', 'ENSG00000269058', 'ENSG00000171817', 'ENSG00000130383', 'ENSG00000155438', 'ENSG00000152433', 'ENSG00000204463', 'ENSG00000146521', 'ENSG00000174950', 'ENSG00000144741', 'ENSG00000174403', 'ENSG00000062096', 'ENSG00000120656', 'ENSG00000009790', 'ENSG00000166037', 'ENSG00000147650', 'ENSG00000127325', 'ENSG00000139209', 'ENSG00000153317', 'ENSG00000132475', 'ENSG00000006453', 'ENSG00000160813', 'ENSG00000138629', 'ENSG00000166450', 'ENSG00000175634', 'ENSG00000163254', 'ENSG00000261206', 'ENSG00000167608', 'ENSG00000213619', 'ENSG00000109171', 'ENSG00000116685', 'ENSG00000251258', 'ENSG00000062038', 'ENSG00000110497', 'ENSG00000069509', 'ENSG00000231944', 'ENSG00000166171', 'ENSG00000082512', 'ENSG00000162377', 'ENSG00000236311', 'ENSG00000146410', 'ENSG00000105369', 'ENSG00000137275', 'ENSG00000132846', 'ENSG00000160200', 'ENSG00000092148', 'ENSG00000184895', 'ENSG00000076351', 'ENSG00000011198', 'ENSG00000134531', 'ENSG00000163157', 'ENSG00000130584', 'ENSG00000174547', 'ENSG00000099994', 'ENSG00000146963', 'ENSG00000229557', 'ENSG00000197683', 'ENSG00000185033', 'ENSG00000175548', 'ENSG00000072133', 'ENSG00000205085', 'ENSG00000137040', 'ENSG00000180138', 'ENSG00000173905', 'ENSG00000137166', 'ENSG00000177294', 'ENSG00000114942', 'ENSG00000112658', 'ENSG00000150594', 'ENSG00000164631', 'ENSG00000151650', 'ENSG00000223638', 'ENSG00000229086', 'ENSG00000198542', 'ENSG00000196436', 'ENSG00000175854', 'ENSG00000255181', 'ENSG00000101004', 'ENSG00000217930', 'ENSG00000077935', 'ENSG00000172340', 'ENSG00000023191', 'ENSG00000189164', 'ENSG00000177990', 'ENSG00000179873', 'ENSG00000187772', 'ENSG00000155729', 'ENSG00000124440', 'ENSG00000142694', 'ENSG00000105875', 'ENSG00000158828', 'ENSG00000111371', 'ENSG00000159337', 'ENSG00000176153', 'ENSG00000168702', 'ENSG00000235718', 'ENSG00000134873', 'ENSG00000141096', 'ENSG00000116786', 'ENSG00000054938', 'ENSG00000239225', 'ENSG00000086967', 'ENSG00000171431', 'ENSG00000188263', 'ENSG00000170807', 'ENSG00000007129', 'ENSG00000168234', 'ENSG00000173141', 'ENSG00000244395', 'ENSG00000100181', 'ENSG00000118898', 'ENSG00000185960', 'ENSG00000168269', 'ENSG00000084112', 'ENSG00000096872', 'ENSG00000175066', 'ENSG00000203952', 'ENSG00000196767', 'ENSG00000138381', 'ENSG00000142661', 'ENSG00000198815', 'ENSG00000204538', 'ENSG00000118200', 'ENSG00000147655', 'ENSG00000145194', 'ENSG00000213088', 'ENSG00000177272', 'ENSG00000126464', 'ENSG00000175087', 'ENSG00000011021', 'ENSG00000125834', 'ENSG00000197766', 'ENSG00000119514', 'ENSG00000165471', 'ENSG00000172469', 'ENSG00000176142', 'ENSG00000173786', 'ENSG00000247473', 'ENSG00000119943', 'ENSG00000188342', 'ENSG00000121481', 'ENSG00000080815', 'ENSG00000165506', 'ENSG00000161835', 'ENSG00000184117', 'ENSG00000188649', 'ENSG00000114480', 'ENSG00000134333', 'ENSG00000110900', 'ENSG00000082293', 'ENSG00000151640', 'ENSG00000059377', 'ENSG00000125514', 'ENSG00000148468', 'ENSG00000148110', 'ENSG00000039139', 'ENSG00000116151', 'ENSG00000197620', 'ENSG00000154025', 'ENSG00000151690', 'ENSG00000188523', 'ENSG00000196961', 'ENSG00000048740', 'ENSG00000225473', 'ENSG00000020256', 'ENSG00000106683', 'ENSG00000105204', 'ENSG00000136352', 'ENSG00000183281', 'ENSG00000254377', 'ENSG00000180596', 'ENSG00000138347', 'ENSG00000157335', 'ENSG00000227051', 'ENSG00000187624', 'ENSG00000244462', 'ENSG00000250091', 'ENSG00000142207', 'ENSG00000164663', 'ENSG00000003756', 'ENSG00000170523', 'ENSG00000141431', 'ENSG00000106404', 'ENSG00000254656', 'ENSG00000174348', 'ENSG00000130827', 'ENSG00000145391', 'ENSG00000169499', 'ENSG00000233349', 'ENSG00000130700', 'ENSG00000159128', 'ENSG00000242715', 'ENSG00000140526', 'ENSG00000212747', 'ENSG00000214827', 'ENSG00000181982', 'ENSG00000223914', 'ENSG00000204351', 'ENSG00000185261', 'ENSG00000151388', 'ENSG00000184345', 'ENSG00000172782', 'ENSG00000005302', 'ENSG00000127515', 'ENSG00000136938', 'ENSG00000169750', 'ENSG00000213588', 'ENSG00000186862', 'ENSG00000151445', 'ENSG00000106609', 'ENSG00000120436', 'ENSG00000180872', 'ENSG00000116977', 'ENSG00000100522', 'ENSG00000133706', 'ENSG00000248920', 'ENSG00000184436', 'ENSG00000125247', 'ENSG00000164621', 'ENSG00000104218', 'ENSG00000111834', 'ENSG00000234323', 'ENSG00000260286', 'ENSG00000082068', 'ENSG00000205111', 'ENSG00000166261', 'ENSG00000197580', 'ENSG00000168214', 'ENSG00000018625', 'ENSG00000236637', 'ENSG00000165006', 'ENSG00000113575', 'ENSG00000140396', 'ENSG00000145337', 'ENSG00000135677', 'ENSG00000151379', 'ENSG00000169155', 'ENSG00000164761', 'ENSG00000234527', 'ENSG00000149633', 'ENSG00000079974', 'ENSG00000108474', 'ENSG00000103061', 'ENSG00000253457', 'ENSG00000120057', 'ENSG00000176230', 'ENSG00000043514', 'ENSG00000167483', 'ENSG00000145781', 'ENSG00000134775', 'ENSG00000086991', 'ENSG00000138107', 'ENSG00000149150', 'ENSG00000003989', 'ENSG00000063438', 'ENSG00000175643', 'ENSG00000198062', 'ENSG00000188766', 'ENSG00000089094', 'ENSG00000109814', 'ENSG00000173467', 'ENSG00000110066', 'ENSG00000125879', 'ENSG00000004838', 'ENSG00000109113', 'ENSG00000120594', 'ENSG00000108094', 'ENSG00000169016', 'ENSG00000125148', 'ENSG00000257591', 'ENSG00000242689', 'ENSG00000141198', 'ENSG00000080709', 'ENSG00000142864', 'ENSG00000156675', 'ENSG00000157557', 'ENSG00000148215', 'ENSG00000111247', 'ENSG00000173715', 'ENSG00000205220', 'ENSG00000213799', 'ENSG00000173702', 'ENSG00000188729', 'ENSG00000099800', 'ENSG00000181555', 'ENSG00000234684', 'ENSG00000111481', 'ENSG00000185630', 'ENSG00000136840', 'ENSG00000164047', 'ENSG00000139726', 'ENSG00000243477', 'ENSG00000183401', 'ENSG00000168993', 'ENSG00000239839', 'ENSG00000179284', 'ENSG00000140992', 'ENSG00000135363', 'ENSG00000115425', 'ENSG00000121380', 'ENSG00000119041', 'ENSG00000123243', 'ENSG00000139192', 'ENSG00000165632', 'ENSG00000114054', 'ENSG00000049860', 'ENSG00000248698', 'ENSG00000168591', 'ENSG00000132561', 'ENSG00000139180', 'ENSG00000166987', 'ENSG00000167384', 'ENSG00000156140', 'ENSG00000119686', 'ENSG00000228567', 'ENSG00000182810', 'ENSG00000234230', 'ENSG00000125144', 'ENSG00000126775', 'ENSG00000240770', 'ENSG00000205126', 'ENSG00000172840', 'ENSG00000103381', 'ENSG00000178057', 'ENSG00000108064', 'ENSG00000120322', 'ENSG00000139428', 'ENSG00000173473', 'ENSG00000181458', 'ENSG00000101474', 'ENSG00000165209', 'ENSG00000046651', 'ENSG00000268061', 'ENSG00000221963', 'ENSG00000165325', 'ENSG00000100884', 'ENSG00000238189', 'ENSG00000241962', 'ENSG00000148803', 'ENSG00000175206', 'ENSG00000120805', 'ENSG00000049249', 'ENSG00000090776', 'ENSG00000070731', 'ENSG00000148339', 'ENSG00000113194', 'ENSG00000110628', 'ENSG00000147873', 'ENSG00000164600', 'ENSG00000126705', 'ENSG00000196502', 'ENSG00000154473', 'ENSG00000143921', 'ENSG00000105609', 'ENSG00000184076', 'ENSG00000119283', 'ENSG00000196361', 'ENSG00000109775', 'ENSG00000179869', 'ENSG00000123213', 'ENSG00000169836', 'ENSG00000184650', 'ENSG00000138459', 'ENSG00000163734', 'ENSG00000106028', 'ENSG00000100652', 'ENSG00000251442', 'ENSG00000167491', 'ENSG00000170748', 'ENSG00000166965', 'ENSG00000203661', 'ENSG00000108061', 'ENSG00000203857', 'ENSG00000131398', 'ENSG00000143632', 'ENSG00000167914', 'ENSG00000221988', 'ENSG00000246016', 'ENSG00000139330', 'ENSG00000101773', 'ENSG00000233718', 'ENSG00000204481', 'ENSG00000173918', 'ENSG00000234186', 'ENSG00000186451', 'ENSG00000114124', 'ENSG00000178741', 'ENSG00000196407', 'ENSG00000119986', 'ENSG00000161551', 'ENSG00000105698', 'ENSG00000164756', 'ENSG00000205403', 'ENSG00000115091', 'ENSG00000173208', 'ENSG00000197651', 'ENSG00000203721', 'ENSG00000156253', 'ENSG00000196341', 'ENSG00000131116', 'ENSG00000141696', 'ENSG00000081692', 'ENSG00000126368', 'ENSG00000088002', 'ENSG00000090266', 'ENSG00000162909', 'ENSG00000186684', 'ENSG00000188038', 'ENSG00000153446', 'ENSG00000106771', 'ENSG00000078295', 'ENSG00000112053', 'ENSG00000117153', 'ENSG00000198843', 'ENSG00000164010', 'ENSG00000144120', 'ENSG00000187581', 'ENSG00000070770', 'ENSG00000148341', 'ENSG00000146360', 'ENSG00000137936', 'ENSG00000123338', 'ENSG00000120733', 'ENSG00000139154', 'ENSG00000268654', 'ENSG00000254415', 'ENSG00000198755', 'ENSG00000101435', 'ENSG00000118369', 'ENSG00000189067', 'ENSG00000118004', 'ENSG00000160808', 'ENSG00000178988', 'ENSG00000179144', 'ENSG00000184814', 'ENSG00000171634', 'ENSG00000136682', 'ENSG00000114544', 'ENSG00000115008', 'ENSG00000153246', 'ENSG00000269858', 'ENSG00000159212', 'ENSG00000214842', 'ENSG00000134757', 'ENSG00000131849', 'ENSG00000157087', 'ENSG00000268658', 'ENSG00000205129', 'ENSG00000212856', 'ENSG00000164002', 'ENSG00000147099', 'ENSG00000100321', 'ENSG00000163528', 'ENSG00000215403', 'ENSG00000165113', 'ENSG00000137522', 'ENSG00000125637', 'ENSG00000215644', 'ENSG00000181036', 'ENSG00000175274', 'ENSG00000205544', 'ENSG00000233975', 'ENSG00000261618', 'ENSG00000233818', 'ENSG00000108846', 'ENSG00000235268', 'ENSG00000143502', 'ENSG00000126709', 'ENSG00000170289', 'ENSG00000134215', 'ENSG00000240972', 'ENSG00000125730', 'ENSG00000204961', 'ENSG00000183783', 'ENSG00000172243', 'ENSG00000196787', 'ENSG00000085449', 'ENSG00000186020', 'ENSG00000198185', 'ENSG00000153015', 'ENSG00000248330', 'ENSG00000137752', 'ENSG00000232401', 'ENSG00000233701', 'ENSG00000181104', 'ENSG00000160050', 'ENSG00000089195', 'ENSG00000006576', 'ENSG00000073536', 'ENSG00000107874', 'ENSG00000076043', 'ENSG00000163527', 'ENSG00000121310', 'ENSG00000186081', 'ENSG00000035115', 'ENSG00000245146', 'ENSG00000135241', 'ENSG00000124356', 'ENSG00000175322', 'ENSG00000100095', 'ENSG00000101391', 'ENSG00000064703', 'ENSG00000144451', 'ENSG00000131471', 'ENSG00000102103', 'ENSG00000099381', 'ENSG00000144214', 'ENSG00000197885', 'ENSG00000108384', 'ENSG00000151208', 'ENSG00000204764', 'ENSG00000249641', 'ENSG00000186318', 'ENSG00000151694', 'ENSG00000140807', 'ENSG00000115694', 'ENSG00000184445', 'ENSG00000184009', 'ENSG00000007944', 'ENSG00000054654', 'ENSG00000085998', 'ENSG00000164252', 'ENSG00000174197', 'ENSG00000205726', 'ENSG00000168477', 'ENSG00000167257', 'ENSG00000160993', 'ENSG00000177202', 'ENSG00000115526', 'ENSG00000125337', 'ENSG00000205022', 'ENSG00000168612', 'ENSG00000111252', 'ENSG00000172081', 'ENSG00000170627', 'ENSG00000235412', 'ENSG00000100938', 'ENSG00000158639', 'ENSG00000122565', 'ENSG00000197496', 'ENSG00000126883', 'ENSG00000163235', 'ENSG00000151090', 'ENSG00000236371', 'ENSG00000141837', 'ENSG00000235385', 'ENSG00000167104', 'ENSG00000076003', 'ENSG00000236053', 'ENSG00000198785', 'ENSG00000119614', 'ENSG00000198954', 'ENSG00000205930', 'ENSG00000159556', 'ENSG00000250021', 'ENSG00000103319', 'ENSG00000139132', 'ENSG00000224122', 'ENSG00000107404', 'ENSG00000124091', 'ENSG00000151224', 'ENSG00000120694', 'ENSG00000234438', 'ENSG00000233070', 'ENSG00000156804', 'ENSG00000064270', 'ENSG00000100934', 'ENSG00000117984', 'ENSG00000176909', 'ENSG00000154143', 'ENSG00000248405', 'ENSG00000126261', 'ENSG00000234602', 'ENSG00000121067', 'ENSG00000167757', 'ENSG00000162373', 'ENSG00000115282', 'ENSG00000112144', 'ENSG00000154162', 'ENSG00000136240', 'ENSG00000124253', 'ENSG00000177614', 'ENSG00000151135', 'ENSG00000161914', 'ENSG00000172020', 'ENSG00000140319', 'ENSG00000178055', 'ENSG00000182111', 'ENSG00000155066', 'ENSG00000152804', 'ENSG00000157703', 'ENSG00000233421', 'ENSG00000110047', 'ENSG00000094963', 'ENSG00000163785', 'ENSG00000100151', 'ENSG00000197177', 'ENSG00000180776', 'ENSG00000106144', 'ENSG00000176904', 'ENSG00000184302', 'ENSG00000204464', 'ENSG00000111817', 'ENSG00000176742', 'ENSG00000103021', 'ENSG00000238057', 'ENSG00000154328', 'ENSG00000261485', 'ENSG00000230453', 'ENSG00000231500', 'ENSG00000136758', 'ENSG00000005075', 'ENSG00000213533', 'ENSG00000261832', 'ENSG00000196704', 'ENSG00000204301', 'ENSG00000177485', 'ENSG00000002822', 'ENSG00000204323', 'ENSG00000115274', 'ENSG00000106560', 'ENSG00000261043', 'ENSG00000174957', 'ENSG00000224186', 'ENSG00000100908', 'ENSG00000187098', 'ENSG00000256683', 'ENSG00000104907', 'ENSG00000115561', 'ENSG00000175115', 'ENSG00000125753', 'ENSG00000256769', 'ENSG00000228889', 'ENSG00000122787', 'ENSG00000143612', 'ENSG00000010278', 'ENSG00000166130', 'ENSG00000261498', 'ENSG00000171867', 'ENSG00000091483', 'ENSG00000084207', 'ENSG00000188266', 'ENSG00000204138', 'ENSG00000175514', 'ENSG00000125629', 'ENSG00000129824', 'ENSG00000183617', 'ENSG00000111653', 'ENSG00000196366', 'ENSG00000071859', 'ENSG00000133124', 'ENSG00000187838', 'ENSG00000147799', 'ENSG00000109991', 'ENSG00000242516', 'ENSG00000136156', 'ENSG00000143147', 'ENSG00000223501', 'ENSG00000173200', 'ENSG00000106415', 'ENSG00000142606', 'ENSG00000165669', 'ENSG00000112232', 'ENSG00000175354', 'ENSG00000139151', 'ENSG00000135093', 'ENSG00000188092', 'ENSG00000135912', 'ENSG00000114166', 'ENSG00000261235', 'ENSG00000170801', 'ENSG00000221882', 'ENSG00000135334', 'ENSG00000152503', 'ENSG00000183741', 'ENSG00000171466', 'ENSG00000255974', 'ENSG00000117419', 'ENSG00000132321', 'ENSG00000182022', 'ENSG00000224405', 'ENSG00000104888', 'ENSG00000179397', 'ENSG00000105707', 'ENSG00000165914', 'ENSG00000170921', 'ENSG00000049089', 'ENSG00000108786', 'ENSG00000155906', 'ENSG00000232225', 'ENSG00000185985', 'ENSG00000134802', 'ENSG00000169418', 'ENSG00000163281', 'ENSG00000197586', 'ENSG00000204435', 'ENSG00000196166', 'ENSG00000177462', 'ENSG00000248397', 'ENSG00000171488', 'ENSG00000179407', 'ENSG00000120341', 'ENSG00000100811', 'ENSG00000239713', 'ENSG00000105486', 'ENSG00000163312', 'ENSG00000101639', 'ENSG00000162073', 'ENSG00000163389', 'ENSG00000171942', 'ENSG00000236822', 'ENSG00000089639', 'ENSG00000166016', 'ENSG00000109861', 'ENSG00000141562', 'ENSG00000224559', 'ENSG00000124237', 'ENSG00000084444', 'ENSG00000092295', 'ENSG00000155111', 'ENSG00000235641', 'ENSG00000243709', 'ENSG00000139329', 'ENSG00000091583', 'ENSG00000154319', 'ENSG00000205174', 'ENSG00000085265', 'ENSG00000171824', 'ENSG00000213057', 'ENSG00000178591', 'ENSG00000157191', 'ENSG00000249348', 'ENSG00000188676', 'ENSG00000232307', 'ENSG00000155511', 'ENSG00000164334', 'ENSG00000198039', 'ENSG00000042304', 'ENSG00000172264', 'ENSG00000173976', 'ENSG00000163374', 'ENSG00000134321', 'ENSG00000188162', 'ENSG00000246100', 'ENSG00000104818', 'ENSG00000157985', 'ENSG00000178694', 'ENSG00000112305', 'ENSG00000159579', 'ENSG00000164920', 'ENSG00000214706', 'ENSG00000144749', 'ENSG00000145220', 'ENSG00000067048', 'ENSG00000227356', 'ENSG00000130758', 'ENSG00000165516', 'ENSG00000131873', 'ENSG00000125863', 'ENSG00000010932', 'ENSG00000131462', 'ENSG00000008083', 'ENSG00000157219', 'ENSG00000224459', 'ENSG00000125746', 'ENSG00000116809', 'ENSG00000106927', 'ENSG00000168904', 'ENSG00000112599', 'ENSG00000097021', 'ENSG00000068305', 'ENSG00000250349', 'ENSG00000147100', 'ENSG00000197046', 'ENSG00000175646', 'ENSG00000125878', 'ENSG00000197283', 'ENSG00000235699', 'ENSG00000184967', 'ENSG00000227695', 'ENSG00000176194', 'ENSG00000110046', 'ENSG00000188582', 'ENSG00000253873', 'ENSG00000162755', 'ENSG00000161652', 'ENSG00000091010', 'ENSG00000196944', 'ENSG00000155970', 'ENSG00000122679', 'ENSG00000106302', 'ENSG00000134595', 'ENSG00000110583', 'ENSG00000170889', 'ENSG00000232192', 'ENSG00000095321', 'ENSG00000156471', 'ENSG00000135406', 'ENSG00000164619', 'ENSG00000189350', 'ENSG00000183251', 'ENSG00000058272', 'ENSG00000177150', 'ENSG00000106635', 'ENSG00000109424', 'ENSG00000133943', 'ENSG00000081051', 'ENSG00000147234', 'ENSG00000132274', 'ENSG00000258710', 'ENSG00000251173', 'ENSG00000125388', 'ENSG00000225234', 'ENSG00000119509', 'ENSG00000006283', 'ENSG00000250317', 'ENSG00000234056', 'ENSG00000188000', 'ENSG00000109099', 'ENSG00000182185', 'ENSG00000167601', 'ENSG00000109670', 'ENSG00000249459', 'ENSG00000137404', 'ENSG00000162630', 'ENSG00000117758', 'ENSG00000105287', 'ENSG00000134909', 'ENSG00000175806', 'ENSG00000158201', 'ENSG00000001460', 'ENSG00000203362', 'ENSG00000185477', 'ENSG00000187189', 'ENSG00000101310', 'ENSG00000163762', 'ENSG00000136457', 'ENSG00000065518', 'ENSG00000141391', 'ENSG00000172466', 'ENSG00000128656', 'ENSG00000155966', 'ENSG00000151704', 'ENSG00000157796', 'ENSG00000163406', 'ENSG00000120833', 'ENSG00000105928', 'ENSG00000197745', 'ENSG00000111877', 'ENSG00000071282', 'ENSG00000234478', 'ENSG00000110921', 'ENSG00000227659', 'ENSG00000134258', 'ENSG00000146776', 'ENSG00000137714', 'ENSG00000169906', 'ENSG00000155324', 'ENSG00000072518', 'ENSG00000237489', 'ENSG00000241211', 'ENSG00000174483', 'ENSG00000115355', 'ENSG00000124145', 'ENSG00000131143', 'ENSG00000078814', 'ENSG00000128185', 'ENSG00000215529', 'ENSG00000125848', 'ENSG00000143033', 'ENSG00000100221', 'ENSG00000137413', 'ENSG00000179262', 'ENSG00000169126', 'ENSG00000179271', 'ENSG00000177476', 'ENSG00000181513', 'ENSG00000153157', 'ENSG00000125868', 'ENSG00000146674', 'ENSG00000242242', 'ENSG00000103507', 'ENSG00000181722', 'ENSG00000132026', 'ENSG00000125046', 'ENSG00000171606', 'ENSG00000166800', 'ENSG00000106992', 'ENSG00000136720', 'ENSG00000143595', 'ENSG00000180233', 'ENSG00000101463', 'ENSG00000035403', 'ENSG00000081913', 'ENSG00000205250', 'ENSG00000180815', 'ENSG00000139287', 'ENSG00000138594', 'ENSG00000224051', 'ENSG00000127564', 'ENSG00000198829', 'ENSG00000180354', 'ENSG00000255804', 'ENSG00000001167', 'ENSG00000135632', 'ENSG00000128989', 'ENSG00000130997', 'ENSG00000188909', 'ENSG00000250748', 'ENSG00000227488', 'ENSG00000172785', 'ENSG00000121068', 'ENSG00000031081', 'ENSG00000107938', 'ENSG00000120533', 'ENSG00000197694', 'ENSG00000167797', 'ENSG00000127252', 'ENSG00000203909', 'ENSG00000142534', 'ENSG00000204516', 'ENSG00000215568', 'ENSG00000162413', 'ENSG00000178734', 'ENSG00000184557', 'ENSG00000100418', 'ENSG00000180353', 'ENSG00000106266', 'ENSG00000105270', 'ENSG00000177885', 'ENSG00000164045', 'ENSG00000167588', 'ENSG00000188906', 'ENSG00000143032', 'ENSG00000135914', 'ENSG00000189280', 'ENSG00000154016', 'ENSG00000104983', 'ENSG00000096433', 'ENSG00000172404', 'ENSG00000138756', 'ENSG00000126254', 'ENSG00000162885', 'ENSG00000270011', 'ENSG00000205846', 'ENSG00000260903', 'ENSG00000180822', 'ENSG00000078795', 'ENSG00000172273', 'ENSG00000196227', 'ENSG00000258839', 'ENSG00000179213', 'ENSG00000114573', 'ENSG00000008196', 'ENSG00000183734', 'ENSG00000168907', 'ENSG00000089041', 'ENSG00000212657', 'ENSG00000169154', 'ENSG00000175329', 'ENSG00000165837', 'ENSG00000137252', 'ENSG00000100721', 'ENSG00000239886', 'ENSG00000162383', 'ENSG00000214013', 'ENSG00000172380', 'ENSG00000144668', 'ENSG00000223547', 'ENSG00000138071', 'ENSG00000073417', 'ENSG00000174255', 'ENSG00000197016', 'ENSG00000172115', 'ENSG00000122711', 'ENSG00000196420', 'ENSG00000102243', 'ENSG00000075151', 'ENSG00000198001', 'ENSG00000186583', 'ENSG00000165626', 'ENSG00000233610', 'ENSG00000132950', 'ENSG00000143061', 'ENSG00000164830', 'ENSG00000143622', 'ENSG00000111885', 'ENSG00000107317', 'ENSG00000250133', 'ENSG00000231310', 'ENSG00000156006', 'ENSG00000140988', 'ENSG00000175018', 'ENSG00000058085', 'ENSG00000174914', 'ENSG00000105472', 'ENSG00000142149', 'ENSG00000183067', 'ENSG00000004975', 'ENSG00000205858', 'ENSG00000164885', 'ENSG00000186448', 'ENSG00000186493', 'ENSG00000204379', 'ENSG00000144227', 'ENSG00000168876', 'ENSG00000142910', 'ENSG00000262074', 'ENSG00000159184', 'ENSG00000113648', 'ENSG00000223882', 'ENSG00000198454', 'ENSG00000175482', 'ENSG00000254535', 'ENSG00000057019', 'ENSG00000204475', 'ENSG00000186395', 'ENSG00000184481', 'ENSG00000076356', 'ENSG00000101981', 'ENSG00000157978', 'ENSG00000177335', 'ENSG00000205078', 'ENSG00000224418', 'ENSG00000122877', 'ENSG00000237232', 'ENSG00000204444', 'ENSG00000248079', 'ENSG00000154760', 'ENSG00000146477', 'ENSG00000003137', 'ENSG00000164128', 'ENSG00000000003', 'ENSG00000149781', 'ENSG00000198400', 'ENSG00000124003', 'ENSG00000175591', 'ENSG00000179165', 'ENSG00000174827', 'ENSG00000259070', 'ENSG00000125910', 'ENSG00000055332', 'ENSG00000105669', 'ENSG00000115461', 'ENSG00000120008', 'ENSG00000105205', 'ENSG00000143867', 'ENSG00000147439', 'ENSG00000135175', 'ENSG00000181718', 'ENSG00000184905', 'ENSG00000126233', 'ENSG00000258701', 'ENSG00000198301', 'ENSG00000108379', 'ENSG00000150667', 'ENSG00000163995', 'ENSG00000079999', 'ENSG00000149743', 'ENSG00000166961', 'ENSG00000221923', 'ENSG00000183624', 'ENSG00000162402', 'ENSG00000163636', 'ENSG00000110717', 'ENSG00000168843', 'ENSG00000138395', 'ENSG00000085415', 'ENSG00000126391', 'ENSG00000169684', 'ENSG00000185896', 'ENSG00000181027', 'ENSG00000000971', 'ENSG00000091106', 'ENSG00000140675', 'ENSG00000196074', 'ENSG00000105971', 'ENSG00000225255', 'ENSG00000118985', 'ENSG00000172828', 'ENSG00000054277', 'ENSG00000214681', 'ENSG00000250334', 'ENSG00000139343', 'ENSG00000162552', 'ENSG00000164185', 'ENSG00000107882', 'ENSG00000251655', 'ENSG00000136535', 'ENSG00000087269', 'ENSG00000267374', 'ENSG00000182117', 'ENSG00000214941', 'ENSG00000134153', 'ENSG00000251015', 'ENSG00000171916', 'ENSG00000177398', 'ENSG00000205899', 'ENSG00000167397', 'ENSG00000166685', 'ENSG00000248866', 'ENSG00000075399', 'ENSG00000179083', 'ENSG00000117262', 'ENSG00000119912', 'ENSG00000101888', 'ENSG00000243207', 'ENSG00000238121', 'ENSG00000139874', 'ENSG00000241769', 'ENSG00000250106', 'ENSG00000268006', 'ENSG00000159792', 'ENSG00000157510', 'ENSG00000228369', 'ENSG00000171811', 'ENSG00000164953', 'ENSG00000143786', 'ENSG00000203667', 'ENSG00000164611', 'ENSG00000081307', 'ENSG00000152457', 'ENSG00000163319', 'ENSG00000183638', 'ENSG00000145425', 'ENSG00000179044']
      → save terms via .add_validated_from_var_index()
• mapping depmap_id on CellLine.ontology_id
!    found 183 validated terms: ['ACH-000390', 'ACH-000444', 'ACH-000396', 'ACH-000997', 'ACH-000723', 'ACH-000504', 'ACH-001190', 'ACH-000834', 'ACH-000880', 'ACH-000717', 'ACH-000824', 'ACH-000713', 'ACH-000219', 'ACH-000762', 'ACH-000022', 'ACH-000750', 'ACH-000603', 'ACH-000228', 'ACH-000423', 'ACH-000553', 'ACH-000001', 'ACH-000270', 'ACH-000374', 'ACH-000649', 'ACH-000174', 'ACH-000791', 'ACH-000764', 'ACH-000873', 'ACH-000510', 'ACH-000376', 'ACH-000211', 'ACH-000875', 'ACH-000749', 'ACH-000347', 'ACH-000416', 'ACH-000565', 'ACH-000463', 'ACH-000397', 'ACH-000200', 'ACH-000086', 'ACH-000537', 'ACH-000479', 'ACH-000666', 'ACH-000897', 'ACH-000927', 'ACH-000415', 'ACH-000900', 'ACH-000189', 'ACH-000982', 'ACH-000288', 'ACH-000244', 'ACH-000323', 'ACH-000916', 'ACH-000704', 'ACH-000488', 'ACH-000235', 'ACH-000142', 'ACH-000685', 'ACH-000021', 'ACH-000159', 'ACH-000884', 'ACH-001307', 'ACH-000265', 'ACH-000756', 'ACH-000047', 'ACH-000252', 'ACH-000471', 'ACH-000977', 'ACH-000888', 'ACH-000209', 'ACH-000502', 'ACH-000212', 'ACH-000652', 'ACH-000367', 'ACH-000434', 'ACH-000950', 'ACH-000549', 'ACH-000947', 'ACH-000833', 'ACH-000452', 'ACH-000255', 'ACH-000822', 'ACH-000670', 'ACH-000896', 'ACH-000589', 'ACH-000886', 'ACH-000018', 'ACH-000672', 'ACH-000428', 'ACH-000911', 'ACH-000274', 'ACH-000785', 'ACH-000868', 'ACH-000866', 'ACH-000458', 'ACH-000882', 'ACH-000954', 'ACH-000958', 'ACH-000738', 'ACH-000734', 'ACH-000849', 'ACH-000973', 'ACH-000961', 'ACH-000098', 'ACH-000486', 'ACH-000605', 'ACH-000837', 'ACH-000460', 'ACH-000368', 'ACH-000123', 'ACH-000329', 'ACH-000186', 'ACH-000939', 'ACH-000657', 'ACH-000622', 'ACH-000015', 'ACH-000956', 'ACH-000014', 'ACH-000037', 'ACH-000966', 'ACH-000335', 'ACH-000579', 'ACH-000769', 'ACH-000644', 'ACH-000445', 'ACH-000701', 'ACH-000545', 'ACH-000595', 'ACH-000149', 'ACH-000178', 'ACH-000164', 'ACH-000023', 'ACH-000292', 'ACH-000517', 'ACH-000117', 'ACH-000527', 'ACH-000572', 'ACH-000048', 'ACH-000967', 'ACH-000324', 'ACH-000601', 'ACH-000680', 'ACH-000026', 'ACH-000411', 'ACH-000569', 'ACH-000669', 'ACH-000277', 'ACH-000941', 'ACH-000495', 'ACH-000096', 'ACH-000085', 'ACH-000268', 'ACH-000302', 'ACH-000393', 'ACH-000618', 'ACH-000624', 'ACH-000774', 'ACH-000552', 'ACH-000407', 'ACH-000936', 'ACH-000681', 'ACH-000971', 'ACH-000237', 'ACH-000976', 'ACH-000776', 'ACH-000650', 'ACH-000803', 'ACH-000535', 'ACH-000341', 'ACH-000090', 'ACH-000662', 'ACH-000748', 'ACH-000661', 'ACH-001239', 'ACH-000826', 'ACH-000035', 'ACH-000343', 'ACH-000847', 'ACH-000163', 'ACH-000906', 'ACH-000903', 'ACH-000449', 'ACH-000842']
      → save terms via .add_validated_from('depmap_id')
• mapping cell_line on CellLine.name
!    found 30 validated terms: ['YD-10B', 'UM-UC-1', 'YD-38', 'LOX IMVI', 'SH-10-TC', 'SF-295', 'SNU-761', 'KNS-81', 'BICR 6', 'L3.3', 'SNU-1079', 'TCC-PAN2', 'SNU-1076', 'UO-31', 'HCC-1195', 'JHH-5', 'SNU-8', 'SNU-1105', 'COV434', 'IGROV1', 'KNS-60', 'RMUG-S', 'SNU-410', '253J-BV', 'SNU-245', 'SNU-1077', 'SNU-1041', 'RERF-LC-Ad2', 'SNU-685', 'WM1799']
      → save terms via .add_validated_from('cell_line')
!    153 terms are not validated: 'LUDLU-1 cell', 'RCB1900 cell', 'J82 cell', 'HCT-15 cell', 'SNB75 cell', 'SK-MEL-2 cell', 'AGS cell', 'COLO-680N cell', 'KYSE-510 cell', 'Caov-3 cell', 'A-375 cell', 'PA-TU-8988S cell', 'BEN cell', 'BICR 31 cell', 'SK-MEL-3 cell', 'RCB1905 cell', 'NIH:OVCAR-3 cell', 'HPAC cell', 'HCC1143 cell', '786-O cell', ...
      → fix typos, remove non-existent values, or save terms via .add_new_from('cell_line')
• mapping disease on Disease.name
!    found 21 validated terms: ['lung cancer', 'urinary bladder carcinoma', 'colorectal cancer', 'head and neck cancer', 'brain cancer', 'skin cancer', 'gastric cancer', 'esophageal cancer', 'ovarian cancer', 'malignant pancreatic neoplasm', 'breast cancer', 'kidney cancer', 'thyroid cancer', 'uterine corpus cancer', 'liver cancer', 'prostate cancer', 'bile duct cancer', 'sarcoma', 'neuroblastoma', 'rhabdoid tumor', 'bone cancer']
      → save terms via .add_validated_from('disease')
✓ organism is validated against Organism.name
• mapping perturbation_type on ULabel.name
!    2 terms are not validated: 'drug', 'CRISPR'
      → fix typos, remove non-existent values, or save terms via .add_new_from('perturbation_type')
• mapping sex on Phenotype.name
!    3 terms are not validated: 'Male', 'Female', 'Unknown'
      → fix typos, remove non-existent values, or save terms via .add_new_from('sex')
• mapping time on ULabel.name
!    4 terms are not validated: '24', '72, 96', '3, 6, 12, 24, 48', '6'
      → fix typos, remove non-existent values, or save terms via .add_new_from('time')
• mapping tissue_type on ULabel.name
!    1 terms is not validated: 'cell_line'
      → fix typos, remove non-existent values, or save terms via .add_new_from('tissue_type')
False
curate.add_validated_from_var_index()
curate.add_validated_from("depmap_id")
curate.add_new_from("perturbation_type")
curate.add_new_from("sex")
curate.add_new_from("time")
curate.add_new_from("tissue_type")
curate.add_validated_from("disease")
curate.add_new_from("cell_line")
Hide code cell output
✓ added 2 records with ULabel.name for perturbation_type: 'drug', 'CRISPR'
✓ added 3 records with Phenotype.name for sex: 'Male', 'Female', 'Unknown'
✓ added 4 records with ULabel.name for time: '24', '72, 96', '3, 6, 12, 24, 48', '6'
✓ added 1 record with ULabel.name for tissue_type: 'cell_line'

Modeling and curating perturbation metadata

The dataset has two types of perturbations: CRISPR and Compounds. We will create their records and associated targets separately.

crispr_metadata = adata.obs[adata.obs["perturbation_type"] == "CRISPR"]
drug_metadata = adata.obs[adata.obs["perturbation_type"] == "drug"]

The wetlab schema has two major components:

  1. wetlab.EnvironmentalTreatment to model perturbations such as heat, wetlab.GeneticTreatment to model perturbations such as CRISPR, and wetlab.CompoundTreatment to model, for example, drugs. Several treatments together can be modeled using wetlab.CombinationTreatment.

  2. Known targets of treatments can be modeled through wetlab.TreatmentTarget which can be one or several of bionty.Gene, bionty.Protein, or bionty.Pathway records.

Genetic perturbations

Genetic perturbations can be modeled in two ways depending on the available information by populating a:

  1. wetlab.GeneticTreatment record if the system such as the guide RNA name or sequence, the on- and off-target scores are known.

  2. wetlab.TreatmentTarget record that links to bionty.Gene records.

crispr_metadata.head(3)
Hide code cell output
depmap_id cancer cell_det_rate cell_line cell_quality channel disease dose_unit dose_value doublet_CL1 doublet_CL2 doublet_GMM_prob doublet_dev_imp doublet_z_margin hash_assignment hash_tag num_SNPs organism perturbation perturbation_type sex singlet_ID singlet_dev singlet_dev_z singlet_margin singlet_z_margin time tissue_type tot_reads nperts ngenes ncounts percent_mito percent_ribo chembl-ID
TAGTTGGAGATCGATA ACH-000723 True 0.132708 YD-10B low_quality nan head and neck cancer nan NaN YD10B_UPPER_AERODIGESTIVE_TRACT 647V_URINARY_TRACT NaN 0.156492 1.556214 nan nan 874 human sggpx4-2 CRISPR Male YD10B_UPPER_AERODIGESTIVE_TRACT 0.292802 3.272682 0.016459 0.330120 72, 96 cell_line 2105 1 4341 20693.0 0.695887 16.242208 NaN
CATCGGGGTTCATGGT ACH-000219 True 0.087860 A-375 normal nan skin cancer nan NaN A375_SKIN DAOY_CENTRAL_NERVOUS_SYSTEM 2.496623e-07 0.007701 0.088255 nan nan 524 human sglacz CRISPR Female A375_SKIN 0.671925 13.649916 0.464200 11.962996 72, 96 cell_line 1035 1 2919 13771.0 2.730375 40.592550 NaN
AAATGCCTCGTGGACC-1 ACH-000762 True 0.075085 YD-38 normal nan head and neck cancer nan NaN YD38_UPPER_AERODIGESTIVE_TRACT IGR1_SKIN 2.366136e-02 0.032628 0.158193 nan nan 407 human sggpx4-2 CRISPR Male YD38_UPPER_AERODIGESTIVE_TRACT 0.571537 7.417734 0.331906 5.552359 72, 96 cell_line 829 1 2456 10996.0 2.528192 39.841761 NaN
list(crispr_metadata["perturbation"].unique())
Hide code cell output
['sggpx4-2', 'sglacz', 'sggpx4-1', 'sgor2j2']
What are the associated targets?

The following targets are the direct targets of the perturbations, and while they may affect a pathway, we only curate the direct targets for simplicity.

  1. sgGPX4-1: Gene/Protein - GPX4 (Glutathione Peroxidase 4)

  2. sgGPX4-2: Gene/Protein - GPX4 (Glutathione Peroxidase 4)

  3. sgLACZ: Gene/Protein - LACZ (β-galactosidase)

  4. sgOR2J2: Gene/Protein - OR2J2 (Olfactory receptor family 2 subfamily J member 2)

Since the perturbation metadata contains the guide RNA names, we model the genetic perturbations using both wetlab.GeneticTreatment and wetlab.TreatmentTarget.

treatments = [
    ("sgGPX4-1", "GPX4", "Glutathione Peroxidase 4"),
    ("sgGPX4-2", "GPX4", "Glutathione Peroxidase 4"),
    ("sgor2j2", "or2j2", "Olfactory receptor family 2 subfamily J member 2"),
    ("sgLACZ", "lacz", "beta-galactosidase control"),  # Control from E. coli
]
organism = bt.Organism.lookup().human

genetic_treatments = []
for name, symbol, target_name in treatments:
    treatment = wl.GeneticTreatment(system="CRISPR KO", name=name).save()
    if symbol != "lacz":
        gene_result = bt.Gene.from_source(symbol=symbol, organism=organism)
        gene = gene_result[0] if isinstance(gene_result, list) else gene_result
        gene = gene.save()
    else:
        gene = bt.Gene(symbol=symbol, organism=organism).save()
    target = wl.TreatmentTarget(name=target_name).save()
    target.genes.add(gene)
    treatment.targets.add(target)
    genetic_treatments.append(treatment)
Hide code cell output
✓ created 1 Gene record from Bionty matching symbol: 'GPX4'
! record with similar name exists! did you mean to load it?
uid name system sequence on_target_score off_target_score run_id created_by_id updated_at
id
1 f9mDuZzSAByk sgGPX4-1 CRISPR KO None None None 1 1 2024-09-25 20:02:36.016813+00:00
→ returning existing TreatmentTarget record with same name: 'Glutathione Peroxidase 4'
✓ created 1 Gene record from Bionty matching synonyms: 'or2j2'
! ambiguous validation in Bionty for 1 record: 'OR2J2'

Compound perturbations

Although the targets are known for many compounds, we skip annotating them here to keep the guide brief.

What are the compound targets?
  1. AZD5591: Unknown

  2. Afatinib: Proteins - EGFR (Epidermal Growth Factor Receptor), HER2 (Human Epidermal growth factor Receptor 2)

  3. BRD3379: Unknown

  4. Bortezomib: Protein complex - Proteasome (specifically the 26S proteasome subunit)

  5. Dabrafenib: Gene/Protein - BRAF (V600E mutation in the BRAF gene, which codes for a protein kinase)

  6. Everolimus: Protein - mTOR (Mammalian Target of Rapamycin)

  7. Gemcitabine: Pathway/Process - DNA synthesis (inhibition of ribonucleotide reductase and incorporation into DNA)

  8. Idasanutlin: Protein - MDM2 (Mouse Double Minute 2 homolog)

  9. JQ1: Protein - BRD4 (Bromodomain-containing protein 4)

  10. Navitoclax: Proteins - BCL-2, BCL-XL (B-cell lymphoma 2 and B-cell lymphoma-extra large)

  11. Prexasertib: Protein - CHK1 (Checkpoint kinase 1)

  12. Taselisib: Protein/Pathway - PI3K (Phosphoinositide 3-kinase)

  13. Trametinib: Proteins - MEK1/2 (Mitogen-Activated Protein Kinase Kinase 1 and 2)

  14. control: Not applicable

# We are using the chebi/chembl chemistry/drug ontology for the drug perturbations
chebi_source = bt.Source.filter(entity="Drug", name="chebi").one()
wl.Compound.add_source(chebi_source)
compounds = wl.Compound.public()
compounds.df().head(3)
Hide code cell output
→ due to lack of write access, LaminDB won't manage storage location: s3://bionty-assets/
• path in storage 's3://bionty-assets' with key 'df_all__chebi__2024-07-27__Drug.parquet'
→ source added!
name definition synonyms parents chembl_id
ontology_id
CHEBI:10 (+)-Atherospermoline None (+)-Atherospermoline [CHEBI:133004] CHEMBL500609
CHEBI:100 (-)-medicarpin The (-)-Enantiomer Of Medicarpin. (-)-Medicarpin|(-)-medicarpin|(6aR,11aR)-9-met... [CHEBI:16114] CHEMBL238845
CHEBI:10000 Vismione D None Vismione D [CHEBI:46955] CHEMBL487795
drug_metadata.head(3)
Hide code cell output
depmap_id cancer cell_det_rate cell_line cell_quality channel disease dose_unit dose_value doublet_CL1 doublet_CL2 doublet_GMM_prob doublet_dev_imp doublet_z_margin hash_assignment hash_tag num_SNPs organism perturbation perturbation_type sex singlet_ID singlet_dev singlet_dev_z singlet_margin singlet_z_margin time tissue_type tot_reads nperts ngenes ncounts percent_mito percent_ribo chembl-ID
AACTGGTGTCTCTCTG ACH-000390 True 0.093159 LUDLU-1 normal nan lung cancer µM 0.1 LUDLU1_LUNG TE14_OESOPHAGUS 2.269468e-10 0.009426 0.403316 nan nan 481 human trametinib drug Male LUDLU1_LUNG 0.655877 14.860933 0.462273 12.351139 24 cell_line 787 1 3045 12895.0 3.202792 24.955409 CHEMBL2103875
ATAGGCTCAGATTTCG ACH-000444 True 0.145728 LU99 normal 2 lung cancer µM 0.5 LU99_LUNG MCAS_OVARY 8.562908e-04 0.010173 0.188284 nan nan 1003 human afatinib drug Male LU99_LUNG 0.762847 10.648094 0.474590 8.164565 24 cell_line 1597 1 4763 23161.0 7.473771 18.051898 CHEMBL1173655
GCCAAATCAAGCCGTC ACH-000396 True 0.117330 J82 normal nan urinary bladder carcinoma µM 0.1 J82_URINARY_TRACT IGR1_SKIN 6.490367e-08 0.009686 1.185862 nan nan 647 human dabrafenib drug Male J82_URINARY_TRACT 0.651059 14.740111 0.404508 11.188513 24 cell_line 1159 1 3834 18062.0 2.762706 22.085040 CHEMBL2028663
compounds = wl.Compound.from_values(drug_metadata["perturbation"], field="name")
Hide code cell output
✓ created 7 Compound records from Bionty matching name: 'trametinib', 'afatinib', 'dabrafenib', 'gemcitabine', 'navitoclax', 'bortezomib', 'everolimus'
✓ created 1 Compound record from Bionty matching synonyms: 'jq1'
! did not create Compound records for 6 non-validated names: 'azd5591', 'brd3379', 'control', 'idasanutlin', 'prexasertib', 'taselisib'
# The remaining compounds are not in chebi and we create records for them
for missing in [
    "azd5591",
    "brd3379",
    "control",
    "idasanutlin",
    "prexasertib",
    "taselisib",
]:
    compounds.append(wl.Compound(name=missing))
ln.save(compounds)

# We found a synonym for jq1. Let's use the correct name for the metadata
drug_metadata = adata.obs[adata.obs["perturbation_type"] == "drug"].copy()
drug_metadata["perturbation"] = drug_metadata["perturbation"].cat.rename_categories(
    {"jq1": wl.Compound.search("jq1").one().name}
)
unique_treatments = drug_metadata[
    ["perturbation", "dose_unit", "dose_value"]
].drop_duplicates()

compound_treatments = []
for _, row in unique_treatments.iterrows():
    compound = wl.Compound.get(name=row["perturbation"])
    treatment = wl.CompoundTreatment(
        name=compound.name,
        concentration=row["dose_value"],
        concentration_unit=row["dose_unit"],
    )
    compound_treatments.append(treatment)

ln.save(compound_treatments)

Register curated artifact

artifact = curate.save_artifact(description="McFarland AnnData")
Hide code cell output
✓ var_index is validated against Gene.ensembl_gene_id
✓ depmap_id is validated against CellLine.ontology_id
✓ cell_line is validated against CellLine.name
✓ disease is validated against Disease.name
✓ organism is validated against Organism.name
✓ perturbation_type is validated against ULabel.name
✓ sex is validated against Phenotype.name
✓ time is validated against ULabel.name
✓ tissue_type is validated against ULabel.name
• path content will be copied to default storage upon `save()` with key `None` ('.lamindb/YBpOBsmYG8lR61MN0000.h5ad')
✓ storing artifact 'YBpOBsmYG8lR61MN0000' at '/home/runner/work/lamin-usecases/lamin-usecases/docs/test-perturbation/.lamindb/YBpOBsmYG8lR61MN0000.h5ad'
• parsing feature names of X stored in slot 'var'
1279 terms (100.00%) are validated for ensembl_gene_id
✓    linked: FeatureSet(uid='LRsTRvS8Nn8myxlKIaoI', n=1279, dtype='float', registry='bionty.Gene', hash='U5j5OOswqK8HTUlUhYLK6A', created_by_id=1, run_id=1)
• parsing feature names of slot 'obs'
8 terms (22.90%) are validated for name
!    27 terms (77.10%) are not validated for name: cancer, cell_det_rate, cell_quality, channel, dose_unit, dose_value, doublet_CL1, doublet_CL2, doublet_GMM_prob, doublet_dev_imp, doublet_z_margin, hash_assignment, hash_tag, num_SNPs, perturbation, singlet_ID, singlet_dev, singlet_dev_z, singlet_margin, singlet_z_margin, ...
✓    linked: FeatureSet(uid='zFNlVbR7twomRsvzWczY', n=8, registry='Feature', hash='kk0rtw7JSCuLw3NfrL1SjQ', created_by_id=1, run_id=1)
✓ saved 2 feature sets for slots: 'var','obs'
✓ loaded 20 Disease records matching name: 'lung cancer', 'urinary bladder carcinoma', 'colorectal cancer', 'head and neck cancer', 'brain cancer', 'skin cancer', 'gastric cancer', 'esophageal cancer', 'ovarian cancer', 'breast cancer', 'kidney cancer', 'thyroid cancer', 'uterine corpus cancer', 'liver cancer', 'prostate cancer', 'bile duct cancer', 'sarcoma', 'neuroblastoma', 'rhabdoid tumor', 'bone cancer'
✓ loaded 1 Disease record matching synonyms: 'pancreatic cancer'
artifact.genetic_treatments.set(genetic_treatments)
artifact.compound_treatments.set(compound_treatments)
artifact.describe()
Hide code cell output
Artifact(uid='YBpOBsmYG8lR61MN0000', is_latest=True, description='McFarland AnnData', suffix='.h5ad', type='dataset', size=2373992, hash='736vOApj6DkONTUEh1diVw', n_observations=1000, _hash_type='md5', _accessor='AnnData', visibility=1, _key_is_virtual=True, updated_at='2024-09-25 20:02:51 UTC')
  Provenance
    .storage = '/home/runner/work/lamin-usecases/lamin-usecases/docs/test-perturbation'
    .transform = 'Perturbation'
    .run = '2024-09-25 20:01:53 UTC'
    .created_by = 'testuser1'
  Labels
    .genetic_treatments = 'sgGPX4-1', 'sgGPX4-2', 'sgor2j2', 'sgLACZ'
    .compound_treatments = 'trametinib', 'afatinib', 'dabrafenib', 'gemcitabine', 'navitoclax', 'bortezomib', 'brd3379', 'JQ1', 'azd5591', 'control', ...
    .organisms = 'human'
    .diseases = 'lung cancer', 'urinary bladder carcinoma', 'colorectal cancer', 'head and neck cancer', 'brain cancer', 'skin cancer', 'gastric cancer', 'esophageal cancer', 'ovarian cancer', 'malignant pancreatic neoplasm', ...
    .cell_lines = 'LUDLU-1', 'LU99', 'J82', 'HCT-15', 'YD-10B', 'SNB75', 'SK-MEL-2', 'UM-UC-1', 'AGS', 'COLO-680N', ...
    .phenotypes = 'Male', 'Female', 'Unknown'
    .ulabels = 'drug', 'CRISPR', '24', '72, 96', '3, 6, 12, 24, 48', '6', 'cell_line'
  Features
    'cell_line' = 'LUDLU-1', 'LU99', 'J82', 'HCT-15', 'YD-10B', 'SNB75', 'SK-MEL-2', 'UM-UC-1', 'AGS', 'COLO-680N', ...
    'disease' = 'lung cancer', 'urinary bladder carcinoma', 'colorectal cancer', 'head and neck cancer', 'brain cancer', 'skin cancer', 'gastric cancer', 'esophageal cancer', 'ovarian cancer', 'malignant pancreatic neoplasm', ...
    'organism' = 'human'
    'perturbation_type' = 'drug', 'CRISPR'
    'sex' = 'Male', 'Female', 'Unknown'
    'time' = '24', '72, 96', '3, 6, 12, 24, 48', '6'
    'tissue_type' = 'cell_line'
  Feature sets
    'var' = 'MAGED2', 'CPE', 'DDX43', 'SPANXA2', 'TMEM175', 'MMD', 'DYNC2I1', 'CCRL2', 'MRPS6', 'KCNH1', 'SLC5A7', 'GSX2', 'ATP5MG', 'TMEM190', 'GTF2H1', 'PACSIN1', 'MIR3179-2', 'KLK10', 'EFCAB14-AS1', 'IL33'
    'obs' = 'depmap_id', 'cell_line', 'disease', 'organism', 'perturbation_type', 'sex', 'time', 'tissue_type'
# clean up test instance
!rm -r test-perturbation
!lamin delete --force test-perturbation
Hide code cell output
• deleting instance testuser1/test-perturbation