Thanks.
Actually, value = sentence I'm going to add
import requests as req
from bs4 import BeautifulSoup
import time
import signal
import sys
add_count = 1
last_sentence = ""
# After confirming the script works on the dev site, modify the URL to the real site
base_url = "https://dev.tatoeba.org" # No trailing slash
def log_interruption(signal, frame):
print('\n[INFO]Script manually interrupted.\n'
f'{add_count} sentences added.\n'
f'The last inserted sentence is probably: {last_sentence} Please check.')
sys.exit(0)
signal.signal(signal.SIGINT, log_interruption)
# 0. Use a session object to handle cookies
session = req.Session()
# 1. GET the login page to get necessary information
response = session.get(f"{base_url}/eng/users/login")
soup = BeautifulSoup(response.text, features="html.parser")
# Get necessary values
token_fields = soup.find('input', {"name": "_Token[fields]"})['value']
token_unlocked = soup.find('input', {"name": "_Token[unlocked]"})['value']
# token_debug = soup.find('input', {"name": "_Token[debug]"})['value']
csrfToken = session.cookies['csrfToken']
if response.status_code != 200:
print("[ERROR]Could not reach login page. That's not supposed to happen...")
exit()
# 2. POST the login page with necessary information
login_data = {
'_csrfToken': csrfToken,
'username': 'Ricardo14', # Modify this to your username
'password': 'alphagrego44', # Modify this to your password
'rememberMe': 1,
'_Token[fields]': token_fields,
'_Token[unlocked]': token_unlocked,
# '_Token[debug]': token_debug
}
response = session.post(f"{base_url}/eng/users/check_login", login_data)
if response.status_code != 200:
soup = BeautifulSoup(response.text, features="html.parser")
print("[ERROR]Could not log in: " + soup.title.string)
exit()
# 3. GET the Add sentences page just to check if everything went well
response = session.get(f"{base_url}/eng/sentences/add")
soup = BeautifulSoup(response.text, features="html.parser")
if "Add sentences - Tatoeba" not in soup.title.string or response.status_code != 200:
print('[ERROR]Could not display the "Add sentences" page. Problem at login?' + soup.title.string)
exit()
token_fields = soup.find('input', {"name": "_Token[fields]"})['value']
# 4. POST to add sentences
session.headers.update({'X-CSRF-Token': csrfToken})
language = "por" # Modify this to the language in which you want to add sentences
with open('sentences_to_add.csv', 'r') as f: # Modify the filename or use the same name
sentences = f.readlines()
sentences = [sentence.rstrip() for sentence in sentences]
for sentence in sentences:
add_data = {
'value': sentence,
'selectedLang': language,
# '_Token[fields]': token_fields
}
response = session.post(f"{base_url}/eng/sentences/add_an_other_sentence", add_data)
last_sentence = sentence
soup = BeautifulSoup(response.text, features="html.parser")
if "Sentences" not in soup.title.string or response.status_code != 200:
print(f'[Error]Could not POST the sentence: {sentence}' + soup.title.string)
exit()
if "Your sentence was not added because the following already exists." in response.text:
print(f'({add_count}) {sentence} => Skipped because duplicate found.')
elif "linkToSentence" in response.text:
print(f'({add_count}){sentence} => Added.')
else:
print(f'({add_count})[WARNING]Something was unexpected. The request may have been ignored: {sentence}')
time.sleep(2) # Wait 6 seconds before next sentence. Please do not go under 6 seconds.
add_count += 1
print(f'END - Added {add_count - 1} sentences.')