PubChem_SMARTS

Search for chemical structures in PubChem via a SMARTS substructure query and compile results
% Vincent F. Scalfani, Serena C. Ralph, and Jason E. Bara
% The University of Alabama
% Tested with MATLAB R2020a, running Ubuntu 18.04 on March 27, 2020.

Define the PubChem API base URL

% PubChem API
api = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/';
% set MATLAB web options to a 30 second timeout
options = weboptions('Timeout', 30);

Define SMARTS queries

% view pattern syntax at: https://smartsview.zbh.uni-hamburg.de/
% these are vinyl imidazolium substructure searches
SMARTSq = {'[CR0H2][n+]1[cH1][cH1]n([CR0H1]=[CR0H2])[cH1]1',...
'[CR0H2][n+]1[cH1][cH1]n([CR0H2][CR0H1]=[CR0H2])[cH1]1',...
'[CR0H2][n+]1[cH1][cH1]n([CR0H2][CR0H2][CR0H1]=[CR0H2])[cH1]1'}
SMARTSq = 1×3 cell
'[CR0H2][n+]1[cH1][cH1]n([CR… '[CR0H2][n+]1[cH1][cH1]n([CR… '[CR0H2][n+]1[cH1][cH1]n([CR…
Add your own SMARTS queries to customize. You can add as many as desired within a cell array.

Perform a SMARTS query Search

% generate URLs for SMARTS query searches
for h = 1:length(SMARTSq)
SMARTSq_url{h} = [api 'fastsubstructure/smarts/' char(SMARTSq(h)) '/cids/JSON'];
end
% perform substructure searches for each query link in SMARTSq_url
for i = 1:length(SMARTSq_url)
try
hit_CIDs{i} = webread(char(SMARTSq_url(i)),options);
% be nice to PubChem Server, add a 1 second pause
n = 1;
pause(n)
catch ME
disp('not found')
end
end
% Transfer JSON data to a cell array with all CIDs
% may need to adjust concatenation below depending on # of SMARTS queries
hit_CIDsALL = [hit_CIDs{1,1}.IdentifierList.CID; hit_CIDs{1,2}.IdentifierList.CID;...
hit_CIDs{1,3}.IdentifierList.CID];
hit_CIDsALL = num2cell(hit_CIDsALL)
hit_CIDsALL = 558×1 cell
 1
12881855
246178576
323724184
488235276
587575063
687560886
787559770
887382573
987327009
1086657888
1186657886
1286657884
1386657883
1486657882
1559435292
1646178574
1724766551
1823196178
1921803878
209976734
219855582
222881640
232881449
242881324
252881232
26139254006
27138675234
28138675233
29138404213
30137479843
31135361018
32132275640
33131723642
34131723640
35129850437
36129850146
37129827117
38129823385
39129256594
40129256592
41126722418
42123943929
43123798562
44123788188
45123754516
46123657304
47123621135
48123514315
49123512037
50123465275
51123449932
52123445047
53123420504
54123250902
55123217410
56123181799
57122625623
58121235111
59118320395
60118320372
61118320369
62118041797
63117828803
64117828802
65117828792
66117828791
67117828790
68117828788
69117828787
70117828785
71117750367
72117600780
73117600622
74117600609
75117600067
76117600066
77117599986
78117599967
79102147231
80101375236
81101375234
8290886858
8389940651
8489855326
8589855303
8689855181
8789854970
8889854892
8989854733
9089713026
9189684330
9289424495
9389400386
9488639114
9588542759
9688521620
9788521618
9888376613
9988266806
10088262503
% set a CID limit to 25 max
The CID limit of 25 was added as an initial testing safety for time consideration. This limit can be increased.
number_hit_CIDsALL = length(hit_CIDsALL)
number_hit_CIDsALL = 558
if number_hit_CIDsALL > 25
hit_CIDsALL = hit_CIDsALL(1:25)
else
disp('Number of CIDs not changed')
end
hit_CIDsALL = 25×1 cell
 1
12881855
246178576
323724184
488235276
587575063
687560886
787559770
887382573
987327009
1086657888
1186657886
1286657884
1386657883
1486657882
1559435292
1646178574
1724766551
1823196178
1921803878
209976734
219855582
222881640
232881449
242881324
252881232

Retrieve Identifier and Property Data

% Create an identifier/property dataset from the SMARTS substructure search results
% Retrieve the following data for each CID:
% InChI, Canonical SMILES, MW, IUPAC Name, Heavy Atom Count, Covalent Unit Count, Charge
% setup a for loop that processes each CID one-by-one
for r = 1:length(hit_CIDsALL)
CID = hit_CIDsALL{r};
% define api calls
api = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/';
CID_InChI_url = [api 'cid/' num2str(CID) '/property/InChI/TXT'];
CID_CanSMI_url = [api 'cid/' num2str(CID) '/property/CanonicalSMILES/TXT'];
CID_MW_url = [api 'cid/' num2str(CID) '/property/MolecularWeight/TXT'];
CID_IUPACName_url = [api 'cid/' num2str(CID) '/property/IUPACName/TXT'];
CID_HeavyAtomCount_url = [api 'cid/' num2str(CID) '/property/HeavyAtomCount/TXT'];
CID_CovalentUnitCount_url = [api 'cid/' num2str(CID) '/property/CovalentUnitCount/TXT'];
CID_Charge_url = [api 'cid/' num2str(CID) '/property/Charge/TXT'];
Additional property data can be collected by defining new api calls, for example, if you want TPSA data:
% CID_TPSA_url = [api 'cid/' num2str(CID) '/property/TPSA/TXT'];
% retrieve identifer and property data
try
CID_InChI = webread(CID_InChI_url,options);
catch ME
CID_InChI = 'not found'
end
% be polite to PubChem server
n = 0.5;
pause(n)
try
CID_CanSMI = webread(CID_CanSMI_url,options);
catch ME
CID_CanSMI = 'not found'
end
n = 0.5;
pause(n)
try
CID_MW = webread(CID_MW_url,options);
catch ME
CID_MW = 'not found'
end
n = 0.5;
pause(n)
try
CID_IUPACName = webread(CID_IUPACName_url,options);
catch ME
CID_IUPACName = 'not found'
end
n = 0.5;
pause(n)
try
CID_HeavyAtomCount = webread(CID_HeavyAtomCount_url,options);
catch ME
CID_HeavyAtomCount = 'not found'
end
n = 0.5;
pause(n)
try
CID_CovalentUnitCount = webread(CID_CovalentUnitCount_url,options);
catch ME
CID_CovalentUnitCount = 'not found'
end
n = 0.5;
pause(n)
try
CID_Charge = webread(CID_Charge_url,options);
catch ME
CID_Charge = 'not found'
end
n = 0.5;
pause(n)
% add property data to hit_CIDsALL data array
% column numbers indicate where the data will be stored.
% For example, the MW will be placed in column 4. r increases
% by 1 on each iteration, so the first CID_MW value gets stored in
% {1,4}, the second in {2,4}, the third in {3,4}, etc.
hit_CIDsALL{r,2} = CID_InChI;
hit_CIDsALL{r,3} = CID_CanSMI;
hit_CIDsALL{r,4} = CID_MW;
hit_CIDsALL{r,5} = CID_IUPACName;
hit_CIDsALL{r,6} = CID_HeavyAtomCount;
hit_CIDsALL{r,7} = CID_CovalentUnitCount;
hit_CIDsALL{r,8} = CID_Charge;
% to add more data, simply index into the next column
% hit_CIDsALL{r,9} = CID_TPSA;
end

Compile Data into a Table

% convert cell array to string and remove leading and trailing white space
hit_CIDsALLstring = strtrim(string(hit_CIDsALL));
% convert to table
SMARTSq_table = array2table(hit_CIDsALLstring, 'VariableNames',{'CID', 'InChI','CanSMI','MW',...
'IUPACName','HeavyAtomCount','CovalentUnitCount', 'Charge'})
SMARTSq_table = 25×8 table
 CIDInChICanSMIMWIUPACNameHeavyAtomCountCovalentUnitCountCharge
1"2881855""InChI=1S/C15H17N2O3.BrH/c1-4-16-7-8-17(11-16)10-13(18)12-5-6-14(19-2)15(9-12)20-3;/h4-9,11H,1,10H2,2-3H3;1H/q+1;/p-1""COC1=C(C=C(C=C1)C(=O)C[N+]2=CN(C=C2)C=C)OC.[Br-]""353.210000""1-(3,4-dimethoxyphenyl)-2-(3-ethenylimidazol-1-ium-1-yl)ethanone;bromide""21""2""0"
2"46178576""InChI=1S/C21H39N2.BrH/c1-3-5-6-7-8-9-10-11-12-13-14-15-16-17-18-23-20-19-22(4-2)21-23;/h4,19-21H,2-3,5-18H2,1H3;1H/q+1;/p-1""CCCCCCCCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""399.500000""1-ethenyl-3-hexadecylimidazol-3-ium;bromide""24""2""0"
3"23724184""InChI=1S/C11H10BrN2OS.BrH/c1-2-13-5-6-14(8-13)7-9(15)10-3-4-11(12)16-10;/h2-6,8H,1,7H2;1H/q+1;/p-1""C=CN1C=C[N+](=C1)CC(=O)C2=CC=C(S2)Br.[Br-]""378.080000""1-(5-bromothiophen-2-yl)-2-(3-ethenylimidazol-1-ium-1-yl)ethanone;bromide""17""2""0"
4"88235276""InChI=1S/C7H9N3O.ClH/c1-2-9-3-4-10(6-9)5-7(8)11;/h2-4,6H,1,5H2,(H-,8,11);1H""C=CN1C=C[N+](=C1)CC(=O)N.[Cl-]""187.630000""2-(3-ethenylimidazol-1-ium-1-yl)acetamide;chloride""12""2""0"
5"87575063""InChI=1S/C8H11N2.ClH/c1-3-5-10-7-6-9(4-2)8-10;/h3-4,6-8H,1-2,5H2;1H/q+1;/p-1""C=CC[N+]1=CN(C=C1)C=C.[Cl-]""170.640000""1-ethenyl-3-prop-2-enylimidazol-3-ium;chloride""11""2""0"
6"87560886""InChI=1S/C9H15N2.BrH/c1-3-5-6-11-8-7-10(4-2)9-11;/h4,7-9H,2-3,5-6H2,1H3;1H/q+1;/p-1""CCCC[N+]1=CN(C=C1)C=C.[Br-]""231.130000""1-butyl-3-ethenylimidazol-1-ium;bromide""12""2""0"
7"87559770""InChI=1S/C9H15N2.ClH/c1-3-5-6-11-8-7-10(4-2)9-11;/h4,7-9H,2-3,5-6H2,1H3;1H/q+1;/p-1""CCCC[N+]1=CN(C=C1)C=C.[Cl-]""186.680000""1-butyl-3-ethenylimidazol-1-ium;chloride""12""2""0"
8"87382573""InChI=1S/C11H19N2.BrH/c1-3-5-6-7-8-13-10-9-12(4-2)11-13;/h4,9-11H,2-3,5-8H2,1H3;1H/q+1;/p-1""CCCCCC[N+]1=CN(C=C1)C=C.[Br-]""259.190000""1-ethenyl-3-hexylimidazol-3-ium;bromide""14""2""0"
9"87327009""InChI=1S/C7H11N2.BrH/c1-3-8-5-6-9(4-2)7-8;/h3,5-7H,1,4H2,2H3;1H/q+1;/p-1""CC[N+]1=CN(C=C1)C=C.[Br-]""203.080000""1-ethenyl-3-ethylimidazol-3-ium;bromide""10""2""0"
10"86657888""InChI=1S/C23H43N2.BrH/c1-3-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20-25-22-21-24(4-2)23-25;/h4,21-23H,2-3,5-20H2,1H3;1H/q+1;/p-1""CCCCCCCCCCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""427.500000""1-ethenyl-3-octadecylimidazol-3-ium;bromide""26""2""0"
11"86657886""InChI=1S/C19H35N2.BrH/c1-3-5-6-7-8-9-10-11-12-13-14-15-16-21-18-17-20(4-2)19-21;/h4,17-19H,2-3,5-16H2,1H3;1H/q+1;/p-1""CCCCCCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""371.400000""1-ethenyl-3-tetradecylimidazol-3-ium;bromide""22""2""0"
12"86657884""InChI=1S/C15H27N2.BrH/c1-3-5-6-7-8-9-10-11-12-17-14-13-16(4-2)15-17;/h4,13-15H,2-3,5-12H2,1H3;1H/q+1;/p-1""CCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""315.290000""1-decyl-3-ethenylimidazol-1-ium;bromide""18""2""0"
13"86657883""InChI=1S/C13H23N2/c1-3-5-6-7-8-9-10-15-12-11-14(4-2)13-15/h4,11-13H,2-3,5-10H2,1H3/q+1""CCCCCCCC[N+]1=CN(C=C1)C=C""207.330000""1-ethenyl-3-octylimidazol-3-ium""15""1""1"
14"86657882""InChI=1S/C13H23N2.BrH/c1-3-5-6-7-8-9-10-15-12-11-14(4-2)13-15;/h4,11-13H,2-3,5-10H2,1H3;1H/q+1;/p-1""CCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""287.240000""1-ethenyl-3-octylimidazol-3-ium;bromide""16""2""0"
15"59435292""InChI=1S/C8H12N2O3S/c1-2-9-5-6-10(8-9)4-3-7-14(11,12)13/h2,5-6,8H,1,3-4,7H2""C=CN1C=C[N+](=C1)CCCS(=O)(=O)[O-]""216.260000""3-(3-ethenylimidazol-1-ium-1-yl)propane-1-sulfonate""14""1""0"
16"46178574""InChI=1S/C11H19N2.ClH/c1-3-5-6-7-8-13-10-9-12(4-2)11-13;/h4,9-11H,2-3,5-8H2,1H3;1H/q+1;/p-1""CCCCCC[N+]1=CN(C=C1)C=C.[Cl-]""214.730000""1-ethenyl-3-hexylimidazol-3-ium;chloride""14""2""0"
17"24766551""InChI=1S/C9H15N2/c1-3-5-6-11-8-7-10(4-2)9-11/h4,7-9H,2-3,5-6H2,1H3/q+1""CCCC[N+]1=CN(C=C1)C=C""151.230000""1-butyl-3-ethenylimidazol-1-ium""11""1""1"
18"23196178""InChI=1S/C17H31N2.BrH/c1-3-5-6-7-8-9-10-11-12-13-14-19-16-15-18(4-2)17-19;/h4,15-17H,2-3,5-14H2,1H3;1H/q+1;/p-1""CCCCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""343.300000""1-dodecyl-3-ethenylimidazol-1-ium;bromide""20""2""0"
19"21803878""InChI=1S/C17H31N2/c1-3-5-6-7-8-9-10-11-12-13-14-19-16-15-18(4-2)17-19/h4,15-17H,2-3,5-14H2,1H3/q+1""CCCCCCCCCCCC[N+]1=CN(C=C1)C=C""263.400000""1-dodecyl-3-ethenylimidazol-1-ium""19""1""1"
20"9976734""InChI=1S/C7H11N2/c1-3-8-5-6-9(4-2)7-8/h3,5-7H,1,4H2,2H3/q+1""CC[N+]1=CN(C=C1)C=C""123.180000""1-ethenyl-3-ethylimidazol-3-ium""9""1""1"
21"9855582""InChI=1S/C7H8N3.ClH/c1-2-9-5-6-10(7-9)4-3-8;/h2,5-7H,1,4H2;1H/q+1;/p-1""C=CN1C=C[N+](=C1)CC#N.[Cl-]""169.610000""2-(3-ethenylimidazol-1-ium-1-yl)acetonitrile;chloride""11""2""0"
22"2881640""InChI=1S/C13H12ClN2O.BrH/c1-2-15-7-8-16(10-15)9-13(17)11-3-5-12(14)6-4-11;/h2-8,10H,1,9H2;1H/q+1;/p-1""C=CN1C=C[N+](=C1)CC(=O)C2=CC=C(C=C2)Cl.[Br-]""327.600000""1-(4-chlorophenyl)-2-(3-ethenylimidazol-1-ium-1-yl)ethanone;bromide""18""2""0"
23"2881449""InChI=1S/C11H11N2OS.BrH/c1-2-12-5-6-13(9-12)8-10(14)11-4-3-7-15-11;/h2-7,9H,1,8H2;1H/q+1;/p-1""C=CN1C=C[N+](=C1)CC(=O)C2=CC=CS2.[Br-]""299.190000""2-(3-ethenylimidazol-1-ium-1-yl)-1-thiophen-2-ylethanone;bromide""16""2""0"
24"2881324""InChI=1S/C13H12BrN2O.BrH/c1-2-15-7-8-16(10-15)9-13(17)11-3-5-12(14)6-4-11;/h2-8,10H,1,9H2;1H/q+1;/p-1""C=CN1C=C[N+](=C1)CC(=O)C2=CC=C(C=C2)Br.[Br-]""372.050000""1-(4-bromophenyl)-2-(3-ethenylimidazol-1-ium-1-yl)ethanone;bromide""18""2""0"
25"2881232""InChI=1S/C14H15N2O2.BrH/c1-3-15-8-9-16(11-15)10-14(17)12-4-6-13(18-2)7-5-12;/h3-9,11H,1,10H2,2H3;1H/q+1;/p-1""COC1=CC=C(C=C1)C(=O)C[N+]2=CN(C=C2)C=C.[Br-]""323.180000""2-(3-ethenylimidazol-1-ium-1-yl)-1-(4-methoxyphenyl)ethanone;bromide""19""2""0"
% rearrange table
SMARTSq_table2 = SMARTSq_table(:, {'CanSMI' 'IUPACName' 'CID' 'InChI' 'MW',...
'HeavyAtomCount' 'CovalentUnitCount' 'Charge'})
SMARTSq_table2 = 25×8 table
 CanSMIIUPACNameCIDInChIMWHeavyAtomCountCovalentUnitCountCharge
1"COC1=C(C=C(C=C1)C(=O)C[N+]2=CN(C=C2)C=C)OC.[Br-]""1-(3,4-dimethoxyphenyl)-2-(3-ethenylimidazol-1-ium-1-yl)ethanone;bromide""2881855""InChI=1S/C15H17N2O3.BrH/c1-4-16-7-8-17(11-16)10-13(18)12-5-6-14(19-2)15(9-12)20-3;/h4-9,11H,1,10H2,2-3H3;1H/q+1;/p-1""353.210000""21""2""0"
2"CCCCCCCCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""1-ethenyl-3-hexadecylimidazol-3-ium;bromide""46178576""InChI=1S/C21H39N2.BrH/c1-3-5-6-7-8-9-10-11-12-13-14-15-16-17-18-23-20-19-22(4-2)21-23;/h4,19-21H,2-3,5-18H2,1H3;1H/q+1;/p-1""399.500000""24""2""0"
3"C=CN1C=C[N+](=C1)CC(=O)C2=CC=C(S2)Br.[Br-]""1-(5-bromothiophen-2-yl)-2-(3-ethenylimidazol-1-ium-1-yl)ethanone;bromide""23724184""InChI=1S/C11H10BrN2OS.BrH/c1-2-13-5-6-14(8-13)7-9(15)10-3-4-11(12)16-10;/h2-6,8H,1,7H2;1H/q+1;/p-1""378.080000""17""2""0"
4"C=CN1C=C[N+](=C1)CC(=O)N.[Cl-]""2-(3-ethenylimidazol-1-ium-1-yl)acetamide;chloride""88235276""InChI=1S/C7H9N3O.ClH/c1-2-9-3-4-10(6-9)5-7(8)11;/h2-4,6H,1,5H2,(H-,8,11);1H""187.630000""12""2""0"
5"C=CC[N+]1=CN(C=C1)C=C.[Cl-]""1-ethenyl-3-prop-2-enylimidazol-3-ium;chloride""87575063""InChI=1S/C8H11N2.ClH/c1-3-5-10-7-6-9(4-2)8-10;/h3-4,6-8H,1-2,5H2;1H/q+1;/p-1""170.640000""11""2""0"
6"CCCC[N+]1=CN(C=C1)C=C.[Br-]""1-butyl-3-ethenylimidazol-1-ium;bromide""87560886""InChI=1S/C9H15N2.BrH/c1-3-5-6-11-8-7-10(4-2)9-11;/h4,7-9H,2-3,5-6H2,1H3;1H/q+1;/p-1""231.130000""12""2""0"
7"CCCC[N+]1=CN(C=C1)C=C.[Cl-]""1-butyl-3-ethenylimidazol-1-ium;chloride""87559770""InChI=1S/C9H15N2.ClH/c1-3-5-6-11-8-7-10(4-2)9-11;/h4,7-9H,2-3,5-6H2,1H3;1H/q+1;/p-1""186.680000""12""2""0"
8"CCCCCC[N+]1=CN(C=C1)C=C.[Br-]""1-ethenyl-3-hexylimidazol-3-ium;bromide""87382573""InChI=1S/C11H19N2.BrH/c1-3-5-6-7-8-13-10-9-12(4-2)11-13;/h4,9-11H,2-3,5-8H2,1H3;1H/q+1;/p-1""259.190000""14""2""0"
9"CC[N+]1=CN(C=C1)C=C.[Br-]""1-ethenyl-3-ethylimidazol-3-ium;bromide""87327009""InChI=1S/C7H11N2.BrH/c1-3-8-5-6-9(4-2)7-8;/h3,5-7H,1,4H2,2H3;1H/q+1;/p-1""203.080000""10""2""0"
10"CCCCCCCCCCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""1-ethenyl-3-octadecylimidazol-3-ium;bromide""86657888""InChI=1S/C23H43N2.BrH/c1-3-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-20-25-22-21-24(4-2)23-25;/h4,21-23H,2-3,5-20H2,1H3;1H/q+1;/p-1""427.500000""26""2""0"
11"CCCCCCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""1-ethenyl-3-tetradecylimidazol-3-ium;bromide""86657886""InChI=1S/C19H35N2.BrH/c1-3-5-6-7-8-9-10-11-12-13-14-15-16-21-18-17-20(4-2)19-21;/h4,17-19H,2-3,5-16H2,1H3;1H/q+1;/p-1""371.400000""22""2""0"
12"CCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""1-decyl-3-ethenylimidazol-1-ium;bromide""86657884""InChI=1S/C15H27N2.BrH/c1-3-5-6-7-8-9-10-11-12-17-14-13-16(4-2)15-17;/h4,13-15H,2-3,5-12H2,1H3;1H/q+1;/p-1""315.290000""18""2""0"
13"CCCCCCCC[N+]1=CN(C=C1)C=C""1-ethenyl-3-octylimidazol-3-ium""86657883""InChI=1S/C13H23N2/c1-3-5-6-7-8-9-10-15-12-11-14(4-2)13-15/h4,11-13H,2-3,5-10H2,1H3/q+1""207.330000""15""1""1"
14"CCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""1-ethenyl-3-octylimidazol-3-ium;bromide""86657882""InChI=1S/C13H23N2.BrH/c1-3-5-6-7-8-9-10-15-12-11-14(4-2)13-15;/h4,11-13H,2-3,5-10H2,1H3;1H/q+1;/p-1""287.240000""16""2""0"
15"C=CN1C=C[N+](=C1)CCCS(=O)(=O)[O-]""3-(3-ethenylimidazol-1-ium-1-yl)propane-1-sulfonate""59435292""InChI=1S/C8H12N2O3S/c1-2-9-5-6-10(8-9)4-3-7-14(11,12)13/h2,5-6,8H,1,3-4,7H2""216.260000""14""1""0"
16"CCCCCC[N+]1=CN(C=C1)C=C.[Cl-]""1-ethenyl-3-hexylimidazol-3-ium;chloride""46178574""InChI=1S/C11H19N2.ClH/c1-3-5-6-7-8-13-10-9-12(4-2)11-13;/h4,9-11H,2-3,5-8H2,1H3;1H/q+1;/p-1""214.730000""14""2""0"
17"CCCC[N+]1=CN(C=C1)C=C""1-butyl-3-ethenylimidazol-1-ium""24766551""InChI=1S/C9H15N2/c1-3-5-6-11-8-7-10(4-2)9-11/h4,7-9H,2-3,5-6H2,1H3/q+1""151.230000""11""1""1"
18"CCCCCCCCCCCC[N+]1=CN(C=C1)C=C.[Br-]""1-dodecyl-3-ethenylimidazol-1-ium;bromide""23196178""InChI=1S/C17H31N2.BrH/c1-3-5-6-7-8-9-10-11-12-13-14-19-16-15-18(4-2)17-19;/h4,15-17H,2-3,5-14H2,1H3;1H/q+1;/p-1""343.300000""20""2""0"
19"CCCCCCCCCCCC[N+]1=CN(C=C1)C=C""1-dodecyl-3-ethenylimidazol-1-ium""21803878""InChI=1S/C17H31N2/c1-3-5-6-7-8-9-10-11-12-13-14-19-16-15-18(4-2)17-19/h4,15-17H,2-3,5-14H2,1H3/q+1""263.400000""19""1""1"
20"CC[N+]1=CN(C=C1)C=C""1-ethenyl-3-ethylimidazol-3-ium""9976734""InChI=1S/C7H11N2/c1-3-8-5-6-9(4-2)7-8/h3,5-7H,1,4H2,2H3/q+1""123.180000""9""1""1"
21"C=CN1C=C[N+](=C1)CC#N.[Cl-]""2-(3-ethenylimidazol-1-ium-1-yl)acetonitrile;chloride""9855582""InChI=1S/C7H8N3.ClH/c1-2-9-5-6-10(7-9)4-3-8;/h2,5-7H,1,4H2;1H/q+1;/p-1""169.610000""11""2""0"
22"C=CN1C=C[N+](=C1)CC(=O)C2=CC=C(C=C2)Cl.[Br-]""1-(4-chlorophenyl)-2-(3-ethenylimidazol-1-ium-1-yl)ethanone;bromide""2881640""InChI=1S/C13H12ClN2O.BrH/c1-2-15-7-8-16(10-15)9-13(17)11-3-5-12(14)6-4-11;/h2-8,10H,1,9H2;1H/q+1;/p-1""327.600000""18""2""0"
23"C=CN1C=C[N+](=C1)CC(=O)C2=CC=CS2.[Br-]""2-(3-ethenylimidazol-1-ium-1-yl)-1-thiophen-2-ylethanone;bromide""2881449""InChI=1S/C11H11N2OS.BrH/c1-2-12-5-6-13(9-12)8-10(14)11-4-3-7-15-11;/h2-7,9H,1,8H2;1H/q+1;/p-1""299.190000""16""2""0"
24"C=CN1C=C[N+](=C1)CC(=O)C2=CC=C(C=C2)Br.[Br-]""1-(4-bromophenyl)-2-(3-ethenylimidazol-1-ium-1-yl)ethanone;bromide""2881324""InChI=1S/C13H12BrN2O.BrH/c1-2-15-7-8-16(10-15)9-13(17)11-3-5-12(14)6-4-11;/h2-8,10H,1,9H2;1H/q+1;/p-1""372.050000""18""2""0"
25"COC1=CC=C(C=C1)C(=O)C[N+]2=CN(C=C2)C=C.[Br-]""2-(3-ethenylimidazol-1-ium-1-yl)-1-(4-methoxyphenyl)ethanone;bromide""2881232""InChI=1S/C14H15N2O2.BrH/c1-3-15-8-9-16(11-15)10-14(17)12-4-6-13(18-2)7-5-12;/h3-9,11H,1,10H2,2H3;1H/q+1;/p-1""323.180000""19""2""0"
% data export as tabbed text file
% prompt user to select folder for data export
save_folder = uigetdir;
% change directory to selected folder
cd(save_folder)
writetable(SMARTSq_table2,'MATLAB_SMARTSq_results.txt','Delimiter','tab')

Retrieve Images of CID Compounds from SMARTS query match

% loop through CIDs and show images
for r = 1:length(hit_CIDsALL)
CID = hit_CIDsALL{r};
api = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/';
CID_url = [api 'cid/' num2str(CID) '/PNG'];
try
% retrieve CID PNG image and display
[CID_img,map] = imread(CID_url);
figure;
imshow(CID_img,map)
drawnow;
title(num2str(CID));
% be polite to PubChem server
n = 0.5;
pause(n);
catch
disp('CID image not found')
disp('Execution will continue')
end
end