Jan-03-2018, 02:06 PM
I need help to understand the best partice to remove illegal char from file and path name.
Windowz (FAT32, NTFS): Any Unicode except NUL, \, /, :, *, ", <, >, |
I found my way
Windowz (FAT32, NTFS): Any Unicode except NUL, \, /, :, *, ", <, >, |
# import libraries import urllib2 from bs4 import BeautifulSoup import urlparse, os # get name movie spage = 'http://www.imdb.com/title/tt2527336/?ref_=rlm' page = urllib2.urlopen(spage) soup = BeautifulSoup(page, 'html.parser') movie = soup.find('h1', attrs={'itemprop':'name'}) title = movie.get_text(strip=True) print title # get cover movie cover = soup.find(attrs={"class" : "poster"}) cover_url = (cover.find('img'))['src'] img = urllib2.urlopen(cover_url) a = urlparse.urlparse(cover_url) a.path imgn = os.path.basename(a.path) localFile = open(imgn, 'wb') localFile.write(img.read()) localFile.close() #get ext ext = os.path.splitext(imgn)[1] #get path & file name x = os.path.split(os.path.abspath(imgn)) print x[0] print x[1] #rename file old_file = os.path.join(x[0], x[1]) new_file = os.path.join(x[0], title+ext) print old_file print new_file os.rename(old_file,new_file)#??????? os.remove(old_file)
I found my way

# import libraries import urllib2 from bs4 import BeautifulSoup import urlparse, os # get name movie spage = 'http://www.imdb.com/title/tt2527336/?ref_=rlm' page = urllib2.urlopen(spage) soup = BeautifulSoup(page, 'html.parser') movie = soup.find('h1', attrs={'itemprop':'name'}) title = movie.get_text(strip=True) print title #clean string loop ctitle = title illegal = ['NUL','\',''//',':','*','"','<','>','|'] for i in illegal: ctitle = ctitle.replace(i, '') print(ctitle) # get cover movie cover = soup.find(attrs={"class" : "poster"}) cover_url = (cover.find('img'))['src'] img = urllib2.urlopen(cover_url) a = urlparse.urlparse(cover_url) a.path imgn = os.path.basename(a.path) localFile = open(imgn, 'wb') localFile.write(img.read()) localFile.close() #get ext ext = os.path.splitext(imgn)[1] #get path & file name x = os.path.split(os.path.abspath(imgn)) print x[0] print x[1] #rename file old_file = os.path.join(x[0], x[1]) new_file = os.path.join(x[0], ctitle+ext) print old_file print new_file os.rename(old_file,new_file) os.remove(old_file)