>>> url = 'https://www.facebook.com/xxxxxx?test1q223'
>>> url = url.split('?')[:-1][0]
>>> url
'https://www.facebook.com/xxxxxx'
>>>
There are also ways to do it using the stdlib's
urllib.parse module:
>>> from urllib.parse import urlparse
>>> parsed = urlparse('https://www.facebook.com/xxxxxx?test1q223')
>>> parsed._replace(query='').geturl()
'https://www.facebook.com/xxxxxx'
Note that
_replace
is an undocumented "private" method, so its semantics might change in the future (although it's pretty unlikely).
You might also want to replace
params
and
fragment
, depending on your exact needs.
HI thanks.
if i wants to write in loop, is it possible be like this:
i put the url in the text file, and export it to a new file.
Example: 'orgginal.txt' would be like
https://www.facebook.com/xxxxxx?test1q223, and out.txt would be like
https://www.facebook.com/xxxxxx
with open('orgginal.txt') as f,open('out.txt', 'w') as f_out:
for line in f:
line = line.strip()
line=line.split('?')[:-1][0]
f_out.write('{}\n'.format(line))
i have figure it out
with open('orgginal.txt') as f,open('out.txt', 'w') as f_out:
for line in f:
line = line.strip()
print(line)
line=line.split('?')[:-1][0]
print(line)
f_out.write('{}\n'.format(line))
or
from urllib.parse import urlparse
with open('orgginal.txt') as f,open('out.txt', 'w') as f_out:
for line in f:
line = line.strip()
parsed = urlparse(line)
#print(line)
newline=parsed._replace(query='').geturl()
print(newline)
#f_out.write('{}\n'.format(line))
You really need to study up on slicing, see:
https://www.python-course.eu/python3_seq..._types.php
This will handle all cases:
url1 = 'https://www.facebook.com/xxxxxx/test1q223/'
url2 = 'https://www.facebook.com/xxxxxx/?test1q223'
url3 = 'https://www.facebook.com/xxxxxx/test1q223'
def change_url(url):
urlx = url.split('/')
if url[-1] == '/':
return url[:-1]
if urlx[-1].startswith('?'):
urlx[-1] = urlx[-1][1:]
return '/'.join(urlx)
return url
print(f'url1: {change_url(url1)}')
print(f'url2: {change_url(url2)}')
print(f'url3: {change_url(url3)}')
results:
Output:
url1: https://www.facebook.com/xxxxxx/test1q223
url2: https://www.facebook.com/xxxxxx/test1q223
url3: https://www.facebook.com/xxxxxx/test1q223
hi, i have a question about this script.
Why does my first url1 didn't cut off the last string" /test1q223 "?
url1 = 'https://www.facebook.com/xxxxxx/test1q223/'
url2 = 'https://www.facebook.com/xxxxxx/?test1q223'
url3 = 'https://www.facebook.com/xxxxxx/test1q223'
url4 = 'https://www.facebook.com/xxxxxx/test1q223'
def change_url(url):
urlx = url.split('/')
#print (urlx)
#print(urlx[4])
# if url[-1] == '/':
# print("yes1")
# return ''.join(urlx[:-1])
if urlx[-1].startswith('?'):
urlx[-1] = urlx[-1][1:]
#print("yes2")
return '/'.join(urlx[:-1])
if urlx[2]!='':
urlx[-1] = urlx[:4][3]
#print(urlx[:3][2])
#print(urlx[2])
#print("yes3")
#print("ttt"+urlx[:3][1])
#print(urlx)
return '/'.join(urlx[:-1])
return url
print(f'url1: {change_url(url1)}')
print(f'url2: {change_url(url2)}')
print(f'url3: {change_url(url3)}')
print(f'url4: {change_url(url4)}')
#output is :
url1:
https://www.facebook.com/xxxxxx/test1q223
url2:
https://www.facebook.com/xxxxxx
url3:
https://www.facebook.com/xxxxxx
url4:
https://www.facebook.com/xxxxxx
Quote:Why does my first url1 didn't cut off the last string" /test1q223 "?
Break it down step by step (uses f-string which requires python 3.6 or newer)
>>> url1 = 'https://www.facebook.com/xxxxxx/test1q223/'
>>> url2 = 'https://www.facebook.com/xxxxxx/?test1q223'
>>> url3 = 'https://www.facebook.com/xxxxxx/test1q223'
>>> def change_url(url):
... urlx = url.split('/')
... print(f'url: {url}, urlx: {urlx}')
... if url[-1] == '/':
... print(f'returning url[-1]: {url[-1]}')
... return url[:-1]
... if urlx[-1].startswith('?'):
... print(f'urlx[-1][1:]: {urlx[-1][1:]}')
... print(f"returning '/'.join(urlx): {'/'.join(urlx)}")
... return '/'.join(urlx)
... # No change needed
... return url
...
>>> print(f'url1: {change_url(url1)}')
url: https://www.facebook.com/xxxxxx/test1q223/, urlx: ['https:', '', 'www.facebook.com', 'xxxxxx', 'test1q223', '']
returning url[-1]: /
url1: https://www.facebook.com/xxxxxx/test1q223
>>> # ------------------------------------------
...
>>> print(f'url2: {change_url(url2)}')
url: https://www.facebook.com/xxxxxx/?test1q223, urlx: ['https:', '', 'www.facebook.com', 'xxxxxx', '?test1q223']
urlx[-1][1:]: test1q223
returning '/'.join(urlx): https://www.facebook.com/xxxxxx/?test1q223
url2: https://www.facebook.com/xxxxxx/?test1q223
>>> # ------------------------------------------
...
>>> print(f'url3: {change_url(url3)}')
url: https://www.facebook.com/xxxxxx/test1q223, urlx: ['https:', '', 'www.facebook.com', 'xxxxxx', 'test1q223']
url3: https://www.facebook.com/xxxxxx/test1q223
>>>