#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import pywikibot
from pywikibot import pagegenerators, page, diff
from pywikibot import editor as editarticle
from pywikibot.specialbots import BaseUnlinkBot
from solve_disambiguation import EditOption, ShowPageOption
from pywikibot.bot import (
CurrentPageBot, SingleSiteBot, ExistingPageBot, NoRedirectPageBot, AutomaticTWSummaryBot, Bot, StandardOption)
import re
#########################################################################
import codecs
from itertools import chain
import os
import re
import pywikibot
from pywikibot import editor as editarticle
from pywikibot.tools import first_lower, first_upper as firstcap
from pywikibot import pagegenerators, config, i18n
from pywikibot.bot import (
SingleSiteBot,
StandardOption, HighlightContextOption, ListOption, OutputProxyOption,
)
from pywikibot.tools.formatter import SequenceOutputter
#########################################################################
#TODO: entymology 1 matching
#Put pronun after pics and WP template in English section.
#start tk window at top of page
#Create an exceptions list
#Put AU file after IPA (if line contains "IPA") but before "{{rhymes" and after "audio" lines
#Make sure only english pronun section is used, eg combo used Spanish pronun section
#Re-record g'day / check on File:EN-AU ck1 cab off the rank.ogg
#after Alternate forms
#Turn off comma and bracket removal in other script
#Example to work on: 'battle axe'
#Re-record: "First in", "g'day", "it's the truth"
#Already done: "bag out"
#Idea: split entry into lines, remove whitespace
#launch notepad++ if exception occurs
class EditOption(StandardOption):
"""Edit the text."""
def __init__(self, option, shortcut, text, start, title):
"""Initializer.
@type option: str
@type shortcut: str
@type text: str
@type start: int
@type title: str
@rtype: None
"""
super(EditOption, self).__init__(option, shortcut)
self._text = text
self._start = start
self._title = title
@property
def stop(self):
"""Return whether if user didn't press cancel and changed it.
@rtype: bool
"""
return self.new_text and self.new_text != self._text
def result(self, value):
"""Open a text editor and let the user change it."""
editor = editarticle.TextEditor()
self.new_text = editor.edit(self._text, jumpIndex=self._start,
highlight=self._title)
return super(EditOption, self).result(value)
class ShowPageOption(StandardOption):
"""Show the page's contents in an editor."""
def __init__(self, option, shortcut, start, page):
"""Initializer."""
super(ShowPageOption, self).__init__(option, shortcut, False)
self._start = start
if page.isRedirectPage():
page = page.getRedirectTarget()
self._page = page
def result(self, value):
"""Open a text editor and show the text."""
editor = editarticle.TextEditor()
editor.edit(self._page.text,
jumpIndex=self._start,
highlight=self._page.title())
siteCom=pywikibot.Site("commons","commons")
siteWikt=pywikibot.Site("en","wiktionary")
fileCount=1
startDate = pywikibot.Timestamp(2019,1,18,6,10) ##end date 9, 18 2019,1,6,6,32
endDate = pywikibot.Timestamp(2019,1,18,1,11) #start date 9,8 2019,1,5,0,0 #05:34, 7 January 2019
cat = pywikibot.Category(siteCom,"Australian English pronunciation")
uploadsList=pagegenerators.LogeventsPageGenerator(logtype='upload', total=2000, user="Commander Keane", site=siteCom, start=startDate, end=endDate)
categoryList = pagegenerators.CategorizedPageGenerator(cat)
generator_factory = pagegenerators.GeneratorFactory()
generator = generator_factory.getCombinedGenerator(gen=uploadsList)
generator = generator_factory.handleArg('-intersect')
generator = generator_factory.getCombinedGenerator(gen=categoryList)
listUnused =
for page in generator:
#print("---------------------------")
#print(str(page) +" Number: "+str(fileCount))
fileCount=fileCount+1
shortFilename = str(page)
#print(shortFilename)
wiktLink = False
#pageObject = pagegenerators.FileLinksGenerator(page)
for link in page.globalusage():
linkStr = str(link)
if linkStr.find("wiktionary") != -1:
wiktLink = True
print(shortFilename)
#page.get())
page99 = pywikibot.Page(pywikibot.Site("en","wiktionary"), shortFilename)
text99 = page99.text
#print(text99)
if text99.find("Audio (AU)") != -1:
wiktLink=True
# print("found text!")
#.text().find(shortFilename) != -1:
# wikiLink=True
#print(wiktLink)
if wiktLink == False:
listUnused.append(shortFilename)
print(listUnused)
for fileTitle in listUnused:
wiktPageSect = pywikibot.Page(siteWikt,fileTitle+"#Etymology")
text2=wiktPageSect.text
#print(text2)
#pywikibot.textlib.does_text_contain_section(text2,"Etymology")
#print(a)
wiktPage = pywikibot.Page(siteWikt,fileTitle)
#print(wiktPage)
origText = wiktPage.text
text = wiktPage.text
#print(text)
if text.find('Audio (AU) ***') == -1: #if text.find('Audio (AU)') == -1:
origSplit = text.splitlines(True)
uptoLine = 0
lineToStopAt = len(origSplit) #was 0
strippedList=
for line in origSplit:
strippedLine = line.replace(" ", "")
strippedList.append(strippedLine)
#print(stippedLine)
regex = r"=="
searchObj = re.match(regex, strippedLine, flags=0)
#print("Up to line: " +str(uptoLine)+ "...." + str(searchObj))
uptoLine=uptoLine+1
if searchObj != None and line.find("==English==")==-1:
lineToStopAt = uptoLine
break
print("line to stop at: " + str(lineToStopAt))
strippedList = strippedList
strippedText = "gobbly gook"
#print("length of new list: " + str(len(strippedList)))
skipToNextLine = False
foundPronunSection = -1
# for lineNumber in range(len(strippedList)):
#
# if strippedList.find("===Pronunciation===") != -1:
# foundPronunSection = lineNumber
#
# if skipToNextLine ==True:
# if strippedList.find("===") != -1:
# findNextSect = lineNumber
# break
#
#
# if foundPronunSection != -1:
# skipToNextLine = True
newLineNum=0
findNextSect=len(strippedList)
#foundPronunSection=99999
fndPronun = False
print(findNextSect)
for lineNumber in range(len(strippedList)-1):
#print(strippedList)
if strippedList.find("===Pronunciation===") != -1:
fndPronun = True
foundPronunSection = lineNumber+1
newLineNum = lineNumber
if fndPronun == True:
for lineIter in range(newLineNum+1,len(strippedList)):
if strippedList.find("===") != -1:
findNextSect = lineIter
break
print("found pronun and findnext: " + str(foundPronunSection+1) +" "+str(findNextSect))
rowNum = foundPronunSection -1 #-1 #was +1
for pronunLineNum in range(len(strippedList)):
#print("pronun line is: " + pronunLine)
#do IPA and rhymes stuff
if strippedList.find("IPA")!=-1 or strippedList.find("Audio")!=-1:# or line.find("{{")==-1 : #or line.find("")==-1:
rowNum = foundPronunSection+1+pronunLineNum -1 #-1 #was -1
############break
#print(strippedText)
masterUpto = 0
masterString = ""
print("stripped text is: " + strippedText)
#for lineNum in range(len(strippedList)):
# #print(line)
# if strippedList.find(strippedText) != -1:
# masterUpto=lineNum+1
#print(masterUpto)
newTextLine = "* {{audio|en|en-au-"+fileTitle+".ogg|Audio (AU)}}\n"
print(newTextLine)
for numLine in range(len(origSplit)):
#print(origSplit)
masterString=masterString + origSplit
if numLine == rowNum:
masterString = masterString+ newTextLine
numLine=numLine + 1
#print(masterString)
try: #https://stackoverflow.com/questions/53417668/getting-a-tkinter-tclerror-when-i-try-to-cut-and-paste-in-a-custom-tkinter-text
edit = EditOption(u'edit page', u'e', masterString, 0, wiktPage.title() ) #masterString
edit.result(6)
except Exception:
print("exception occurred")
continue
text6 = edit.new_text
if text6 is not None:
if text!=text6:
pywikibot.output(u'\nThe following changes have been made:\n')
pywikibot.showDiff(origText, text6)
pywikibot.output(u'')
wiktPage.text = text6
wiktPage.save(u"Adding EN-AU audio file: " + fileTitle )
elif fndPronun == False:
pronunUpto = 0
pronunUptoNext = 0
lineUpto = 0
nothingFound=True
for line in range(len(strippedList)):
lineText = strippedList
if lineText.find("===Etymology===") != -1 or lineText.find("===Alternative") != -1 :
if lineText.find("==English==")!= -1:
pronunUpto = line
if lineText.find("===Alternative") != -1: # and pronunUpto > line:
pronunUpto = line
#nothingFound = False
if lineText.find("===Etymology===")!= -1:
pronunUpto = line
#nothingFound = False
for lineNum in range(len(strippedList)):
lineAfter = strippedList.find("===") #Stuff after ==English==
if lineAfter != -1:
pronunUptoNext=pronunUpto + lineNum
break
#pronunUpto = line+1
#Insert new Pronun section at 'pronunUpto'
newText=u"===Pronunciation===\n* {{audio|en|en-au-"+fileTitle+".ogg|Audio (AU)}}\n\n"
masterString = ""
for numLine in range(len(origSplit)):
#print(origSplit)
masterString=masterString + origSplit
if numLine == pronunUptoNext:
masterString = masterString + newText
numLine=numLine + 1
try: #https://stackoverflow.com/questions/53417668/getting-a-tkinter-tclerror-when-i-try-to-cut-and-paste-in-a-custom-tkinter-text
edit = EditOption(u'edit page', u'e', masterString, 0, wiktPage.title() ) #masterString
edit.result(6)
except Exception:
print("exception occurred")
continue
text7 = edit.new_text
if text7 is not None:
if text!=text7:
pywikibot.output(u'\nThe following changes have been made:\n')
pywikibot.showDiff(origText, text7)
pywikibot.output(u'')
wiktPage.text = text7
wiktPage.save(u"Adding EN-AU audio file: " + fileTitle )
#LAST RESORT - place at top - needs code