api = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/';
sdq = 'https://pubchem.ncbi.nlm.nih.gov/sdq/sdqagent.cgi?outfmt=json&query=';
% set a longer web options timeout
% this prevents a MATLAB timeout if the server is slow to respond.
options_api = weboptions('Timeout', 30);
options_sdq = weboptions('Timeout', 60,'ContentType','json');
% Retrieve and display a PNG image of 1-Butyl-3-methyl-imidazolium; CID 2734162
CID_SS_query = '2734162';
% setup a for loop that processes each CID one-by-one.
for j = 1:length(SS_CIDs)
api = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/';
% define api call for isomeric SMILES
CID_IsoSMILES_url = [api 'cid/' num2str(CID) '/property/IsomericSMILES/TXT'];
% retrieve isomeric SMILES
CID_IsoSMILES = webread(CID_IsoSMILES_url,options_api);
CID_IsoSMILES = 'not found'
% add isomeric SMILES data to SS_CIDs data array
% j increases by 1 on each iteration, so the first CID Isomeric SMILES
% gets added to {1,2}, the second to {2,2}, third to {3,2}, etc.
SS_CIDs{j,2} = CID_IsoSMILES;
% define sdq call to retrieve count data
sdq = 'https://pubchem.ncbi.nlm.nih.gov/sdq/sdqagent.cgi?outfmt=json&query=';
litCountQ_url = [sdq '{"hide":"*","collection":"*","where":{"ands":{"cid":"' num2str(CID) '"}}}'];
litCountQ = webread(litCountQ_url, options_sdq);
% add selected collection count data to SS_CIDs data array
% here the column index values on the left (e.g., {j,3})
% represent where the data will be stored (column 3), and the values
% on the right (e.g., litCountQ.SDQOutputSet{2,1}), is an index value (2),
% to retrieve the substance count data from the litCountQ.SDQOutputSet
SS_CIDs{j,3} = litCountQ.SDQOutputSet{2,1}.totalCount;
% add the associated collection row value as a manual validation check
SS_CIDs{j,4} = litCountQ.SDQOutputSet{2,1}.collection;
SS_CIDs{j,5} = litCountQ.SDQOutputSet{4,1}.totalCount;
SS_CIDs{j,6} = litCountQ.SDQOutputSet{4,1}.collection;
SS_CIDs{j,7} = litCountQ.SDQOutputSet{7,1}.totalCount;
SS_CIDs{j,8} = litCountQ.SDQOutputSet{7,1}.collection;
SS_CIDs{j,9} = litCountQ.SDQOutputSet{13,1}.totalCount;
SS_CIDs{j,10} = litCountQ.SDQOutputSet{13,1}.collection;
SS_CIDs{j,11} = litCountQ.SDQOutputSet{15,1}.totalCount;
SS_CIDs{j,12} = litCountQ.SDQOutputSet{15,1}.collection;
SS_CIDs{j,13} = litCountQ.SDQOutputSet{14,1}.totalCount;
SS_CIDs{j,14} = litCountQ.SDQOutputSet{14,1}.collection;
% convert cell array to string and remove leading and trailing white space
SS_CIDs_string = strtrim(string(SS_CIDs));
% convert to table and verify expected counts match extracted collection
% e.g., in the patent_lab column all rows should be 'patent'
SSq_bibtable_validate = array2table(SS_CIDs_string, 'VariableNames',{'CID','Isomeric_SMILES','num_substances',...
'substances_lab','num_patent','patent_lab','num_pubmed','pubmed_lab','num_thiemechemistry',...
'thiemechemistry_lab','num_springernature','springernature_lab','num_wiley','wiley_lab'})
SSq_bibtable_validate = 25×14 table
| CID | Isomeric_SMILES | num_substances | substances_lab | num_patent | patent_lab | num_pubmed | pubmed_lab | num_thiemechemistry | thiemechemistry_lab | num_springernature | springernature_lab | num_wiley | wiley_lab |
---|
1 | "12971008" | "CCCN1C=C[N+](=C1)C.[I-]" | "69" | "substance" | "104" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "142" | "springernature" | "5" | "wiley" |
---|
2 | "304622" | "CCCCN1C=CN=C1C" | "51" | "substance" | "126" | "patent" | "7" | "pubmed" | "0" | "thiemechemistry" | "7" | "springernature" | "0" | "wiley" |
---|
3 | "61347" | "CCCCN1C=CN=C1" | "103" | "substance" | "1259" | "patent" | "21" | "pubmed" | "13" | "thiemechemistry" | "116" | "springernature" | "5" | "wiley" |
---|
4 | "11448496" | "CCCCN1C=C[N+](=C1)C.[I-]" | "58" | "substance" | "95" | "patent" | "0" | "pubmed" | "2" | "thiemechemistry" | "133" | "springernature" | "2" | "wiley" |
---|
5 | "11424151" | "CCCCN1C=C[N+](=C1)C.C(#N)[S-]" | "49" | "substance" | "143" | "patent" | "2" | "pubmed" | "0" | "thiemechemistry" | "32" | "springernature" | "1" | "wiley" |
---|
6 | "11171745" | "CCCCN1C=C[N+](=C1)C.C(=[N-])=NC#N" | "56" | "substance" | "4" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "77" | "springernature" | "0" | "wiley" |
---|
7 | "2734161" | "CCCCN1C=C[N+](=C1)C.[Cl-]" | "113" | "substance" | "772" | "patent" | "323" | "pubmed" | "2" | "thiemechemistry" | "1182" | "springernature" | "9" | "wiley" |
---|
8 | "118785" | "CCCN1C=CN=C1" | "94" | "substance" | "1263" | "patent" | "3" | "pubmed" | "2" | "thiemechemistry" | "36" | "springernature" | "0" | "wiley" |
---|
9 | "2734236" | "CCCCN1C=C[N+](=C1)C.[Br-]" | "84" | "substance" | "227" | "patent" | "323" | "pubmed" | "1" | "thiemechemistry" | "380" | "springernature" | "22" | "wiley" |
---|
10 | "2734162" | "CCCCN1C=C[N+](=C1)C" | "60" | "substance" | "2263" | "patent" | "668" | "pubmed" | "30" | "thiemechemistry" | "0" | "springernature" | "1" | "wiley" |
---|
11 | "529334" | "CCCCCN1C=CN=C1" | "63" | "substance" | "1934" | "patent" | "1" | "pubmed" | "1" | "thiemechemistry" | "4" | "springernature" | "0" | "wiley" |
---|
12 | "11788435" | "CCCCN1C=C[N+](=C1)C.[OH-]" | "26" | "substance" | "26" | "patent" | "3" | "pubmed" | "3" | "thiemechemistry" | "117" | "springernature" | "11" | "wiley" |
---|
13 | "11245926" | "CCCCN1C=C[N+](=C1)C.[Br-].BrBr" | "9" | "substance" | "0" | "patent" | "0" | "pubmed" | "1" | "thiemechemistry" | "1" | "springernature" | "0" | "wiley" |
---|
14 | "11160028" | "CCCN1C=C[N+](=C1)C.[Br-]" | "24" | "substance" | "21" | "patent" | "1" | "pubmed" | "0" | "thiemechemistry" | "11" | "springernature" | "0" | "wiley" |
---|
15 | "5245884" | "CCCN1C=C[N+](=C1)C" | "30" | "substance" | "350" | "patent" | "2" | "pubmed" | "1" | "thiemechemistry" | "0" | "springernature" | "1" | "wiley" |
---|
16 | "2734168" | "CCCCN1C=C[N+](=C1C)C" | "32" | "substance" | "564" | "patent" | "29" | "pubmed" | "2" | "thiemechemistry" | "0" | "springernature" | "1" | "wiley" |
---|
17 | "91210418" | "CCCCN1C=C[N+](=C1I)C" | "2" | "substance" | "1" | "patent" | "1" | "pubmed" | "0" | "thiemechemistry" | "0" | "springernature" | "0" | "wiley" |
---|
18 | "87560886" | "CCCC[N+]1=CN(C=C1)C=C.[Br-]" | "22" | "substance" | "4" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "11" | "springernature" | "2" | "wiley" |
---|
19 | "87559770" | "CCCC[N+]1=CN(C=C1)C=C.[Cl-]" | "9" | "substance" | "2" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "9" | "springernature" | "2" | "wiley" |
---|
20 | "87106874" | "CCCCCN1C=C[N+](=C1)CCCCC" | "3" | "substance" | "21" | "patent" | "1" | "pubmed" | "0" | "thiemechemistry" | "0" | "springernature" | "0" | "wiley" |
---|
21 | "24766551" | "CCCC[N+]1=CN(C=C1)C=C" | "8" | "substance" | "8" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "0" | "springernature" | "0" | "wiley" |
---|
22 | "17870330" | "CN(C)CCCN1C=CN=C1" | "14" | "substance" | "48" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "0" | "springernature" | "0" | "wiley" |
---|
23 | "16720567" | "CCCCN1C=C[N+](=C1)CCC.[Br-]" | "6" | "substance" | "1" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "1" | "springernature" | "0" | "wiley" |
---|
24 | "15557008" | "CCCC1=NC=CN1CC" | "7" | "substance" | "7" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "2" | "springernature" | "0" | "wiley" |
---|
25 | "15255204" | "CCCCN1C=C[N+](=C1)CCCC.[Cl-]" | "4" | "substance" | "6" | "patent" | "0" | "pubmed" | "0" | "thiemechemistry" | "0" | "springernature" | "1" | "wiley" |
---|
% select only the numerical count data to export
SSq_bibtable = SSq_bibtable_validate(:, {'Isomeric_SMILES' 'CID' 'num_substances' 'num_patent' 'num_pubmed'...
'num_thiemechemistry' 'num_springernature' 'num_wiley'})
SSq_bibtable = 25×8 table
| Isomeric_SMILES | CID | num_substances | num_patent | num_pubmed | num_thiemechemistry | num_springernature | num_wiley |
---|
1 | "CCCN1C=C[N+](=C1)C.[I-]" | "12971008" | "69" | "104" | "0" | "0" | "142" | "5" |
---|
2 | "CCCCN1C=CN=C1C" | "304622" | "51" | "126" | "7" | "0" | "7" | "0" |
---|
3 | "CCCCN1C=CN=C1" | "61347" | "103" | "1259" | "21" | "13" | "116" | "5" |
---|
4 | "CCCCN1C=C[N+](=C1)C.[I-]" | "11448496" | "58" | "95" | "0" | "2" | "133" | "2" |
---|
5 | "CCCCN1C=C[N+](=C1)C.C(#N)[S-]" | "11424151" | "49" | "143" | "2" | "0" | "32" | "1" |
---|
6 | "CCCCN1C=C[N+](=C1)C.C(=[N-])=NC#N" | "11171745" | "56" | "4" | "0" | "0" | "77" | "0" |
---|
7 | "CCCCN1C=C[N+](=C1)C.[Cl-]" | "2734161" | "113" | "772" | "323" | "2" | "1182" | "9" |
---|
8 | "CCCN1C=CN=C1" | "118785" | "94" | "1263" | "3" | "2" | "36" | "0" |
---|
9 | "CCCCN1C=C[N+](=C1)C.[Br-]" | "2734236" | "84" | "227" | "323" | "1" | "380" | "22" |
---|
10 | "CCCCN1C=C[N+](=C1)C" | "2734162" | "60" | "2263" | "668" | "30" | "0" | "1" |
---|
11 | "CCCCCN1C=CN=C1" | "529334" | "63" | "1934" | "1" | "1" | "4" | "0" |
---|
12 | "CCCCN1C=C[N+](=C1)C.[OH-]" | "11788435" | "26" | "26" | "3" | "3" | "117" | "11" |
---|
13 | "CCCCN1C=C[N+](=C1)C.[Br-].BrBr" | "11245926" | "9" | "0" | "0" | "1" | "1" | "0" |
---|
14 | "CCCN1C=C[N+](=C1)C.[Br-]" | "11160028" | "24" | "21" | "1" | "0" | "11" | "0" |
---|
15 | "CCCN1C=C[N+](=C1)C" | "5245884" | "30" | "350" | "2" | "1" | "0" | "1" |
---|
16 | "CCCCN1C=C[N+](=C1C)C" | "2734168" | "32" | "564" | "29" | "2" | "0" | "1" |
---|
17 | "CCCCN1C=C[N+](=C1I)C" | "91210418" | "2" | "1" | "1" | "0" | "0" | "0" |
---|
18 | "CCCC[N+]1=CN(C=C1)C=C.[Br-]" | "87560886" | "22" | "4" | "0" | "0" | "11" | "2" |
---|
19 | "CCCC[N+]1=CN(C=C1)C=C.[Cl-]" | "87559770" | "9" | "2" | "0" | "0" | "9" | "2" |
---|
20 | "CCCCCN1C=C[N+](=C1)CCCCC" | "87106874" | "3" | "21" | "1" | "0" | "0" | "0" |
---|
21 | "CCCC[N+]1=CN(C=C1)C=C" | "24766551" | "8" | "8" | "0" | "0" | "0" | "0" |
---|
22 | "CN(C)CCCN1C=CN=C1" | "17870330" | "14" | "48" | "0" | "0" | "0" | "0" |
---|
23 | "CCCCN1C=C[N+](=C1)CCC.[Br-]" | "16720567" | "6" | "1" | "0" | "0" | "1" | "0" |
---|
24 | "CCCC1=NC=CN1CC" | "15557008" | "7" | "7" | "0" | "0" | "2" | "0" |
---|
25 | "CCCCN1C=C[N+](=C1)CCCC.[Cl-]" | "15255204" | "4" | "6" | "0" | "0" | "0" | "1" |
---|
% export data as tabbed text file
% prompt user to select folder for data export
% change directory to selected folder
writetable(SSq_bibtable,'MATLAB_SDQ_Bibliometrics_results.txt','Delimiter','tab')