Apr-18-2018, 10:38 AM
import csv from collections import defaultdict from Bio import Entrez from Bio import Medline import requests import datetime import pandas as pd ##please install before using this script: pip install biopython def search(query): ## fetch omids of author Entrez.email = '[email protected]' handle = Entrez.esearch(db='pubmed', sort='relevance', retmax='1000', retmode='xml', term=query) results = Entrez.read(handle) return results def fetch_details(id_list): ## extract medline record of PMids ids = ','.join(id_list) Entrez.email = '[email protected]' handle = Entrez.efetch(db='pubmed', retmode='text', rettype='medline', id=ids) results = Medline.parse(handle) return results def read_grant_file(grant): #Read grant file and use author name as search field rows=[] rows1=[] output=defaultdict(list) output1={} s='' with open(grant, "r", encoding="ISO-8859-1") as File: csvreader = csv.reader(File) fields = next(csvreader) included_cols = [21] include=[0,21] for row in csvreader: content = list(row[i].rstrip() for i in included_cols) content1 = list(row[i].rstrip() for i in include) rows.append(content) rows1.append(content1) for x in rows: t=''.join(x) s=s+t #print(s) #print("Total no. of rows: %d"%(csvreader.line_num)) #print('Field names are:' + ', '.join(field for field in fields)) for cols in rows1: cols[1]=cols[1].rstrip(";") cols[1]=cols[1].split(";") if len(cols[1])==1: output1[cols[0]]=cols[1] else: j=0 for i in cols[1]: output1[cols[0]+"_"+str(j)]=i j+=1 print(output1) return s def distinct(names): ## unique names import ordered_set from ordered_set import OrderedSet return list(OrderedSet(names)) if __name__ == '__main__': authors=read_grant_file("C:\\Users\\ragarwa3\\Desktop\\pubmed\\Funding_Report_2017_1.csv") authors=authors.rstrip(";") print("Number of distinct authors:"+str(len(distinct(authors.split(";"))))) Ofile = open("C:\\Users\\ragarwa3\\Desktop\\pubmed\\Funding_Report_2017_PI_mapping_file011.txt", "w+", encoding="UTF-8") for name in authors.split(";"): qry=name.rstrip()+'[AU]' results = search(qry) id_list = results['IdList'] records = fetch_details(id_list) for record in records: ti=record.get('TI') ##title dt=record.get('DP') ##Date of publication jt=record.get('JT') ## journal title pmid=record.get('PMID') ## pmid grant=record.get('GR') ## grant sources otext=str(pmid)+"\t"+str(ti)+"\t"+str(dt)+"\t"+str(jt)+"\t"+str(grant) au = record.get('AU', '?') authors = [] for a in au: if a not in authors: authors.append(a) otext=otext +"\t"+'{0}'.format(', '.join(authors))+"\n" Ofile.write(otext)Traceback (most recent call last):
File "C:\Users\ragarwa3\Desktop\pubmed\nih_grant.py", line 67, in <module>
records = fetch_details(id_list)
File "C:\Users\ragarwa3\Desktop\pubmed\nih_grant.py", line 26, in fetch_details
id=ids)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\site-packages\Bio\Entrez\__init__.py", line 180, in efetch
return _open(cgi, variables, post=post)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\site-packages\Bio\Entrez\__init__.py", line 528, in _open
handle = _urlopen(cgi)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 526, in open
response = self._open(req, data)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 544, in _open
'_open', req)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 1361, in https_open
context=self._context, check_hostname=self._check_hostname)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 1321, in do_open
r = h.getresponse()
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 1331, in getresponse
response.begin()
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\http\client.py", line 297, in begin
version, status, reason = self._read_status()
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\http\pyclient.", line 258, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\socket.py", line 586, in readinto
return self._sock.recv_into(b)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\ssl.py", line 1009, in recv_into
return self.read(nbytes, buffer)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\ssl.py", line 871, in read
return self._sslobj.read(len, buffer)
File "C:\Users\ragarwa3\AppData\Local\Programs\Python\Python36\lib\ssl.py", line 631, in read
v = self._sslobj.read(len, buffer)
ssl.SSLError: [SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:2217)