""" This script splices the original text of a webpage with that of a designated translation language. It uses google translate, a list of translate codes is available here: https://ctrlq.org/code/19899-google-translate-languages The function takes 2 arguments, language_splicer(link, language). Destination language defaults to Spanish if not specified. """ import time, bs4, os, requests, webbrowser, sys from googletrans import Translator os.chdir('C:\\Users\\Rory\\Documents\\Python\\Spanish_splicer') def language_splicer(link, language='es'): start = time.time() print('started') res = requests.get(link) try: res.raise_for_status() except: print('Failed to get page') sys.exit() soup = bs4.BeautifulSoup(res.content, 'html.parser') a = str(soup) print('page obtained') h1 = soup.find_all('h1') h2 = soup.find_all('h2') h3 = soup.find_all('h3') h4 = soup.find_all('h4') h5 = soup.find_all('h5') p = soup.find_all('p') textOriginal = h1 + h2 + h3 + h4 + h5 + p print("len of textOriginal: " + str(len(textOriginal))) print("Predicted time to complete: " + str(0.3888*len(textOriginal))+ "-" + str(0.5259*len(textOriginal))+" s") JustText = [] for i in textOriginal: JustText.append(str(i.get_text())) translator = Translator() newLanguageText = [] # print("len of JustText: " + str(len(JustText))) # AllText = " || ".join(JustText) # print(AllText) # AllTrans = translator.translate(AllText, dest=language).text # newLanguageText = AllTrans.split(" || ") # print(newLanguageText) # print("len of newLanguageText: " + str(len(newLanguageText))) for i in range(0, len(JustText)): try: new = translator.translate(str(JustText[i]), dest=language).text except: new = 'Translation unavailable.' newLanguageText.append(new) Spliced = [] for i in range(0, len(JustText)): original = str(textOriginal[i]) splice = original[:-5] + ' || ' + newLanguageText[i] + original[-5:] Spliced.append(splice) print("len of Spliced: " + str(len(Spliced))) for i in range(0, len(Spliced)): a = a.replace(str(textOriginal[i]), Spliced[i]) file = open('page.html', 'w', encoding="utf-8") file.write(a) file.close() webbrowser.open('page.html') end = time.time() print('Time elapsed: ' + str(end-start)+' s') print('finished') # Run the script language_splicer('https://www.bbc.co.uk/news/uk-england-london-48011838', 'es') # And the same one in Japanese language_splicer('https://www.bbc.co.uk/news/uk-england-london-48011838', 'ja')