User:R. Hillgentleman/yueStat.py

出自維基百科,自由嘅百科全書
#yueStat3.py
#STAT THE yue.wikipedia

import re
import wikipedia
site=wikipedia.getSite()

#page to be read; page to be written; additional comment
pagename = ur'クイズタイムショック'
sandname = ur'Wikipedia:沙盒5'
comment = ur''

# OPEN THE SANDBOX
sand = wikipedia.Page(site, pagename) # OR ur'wikipedia:\u6C99\u76D2'
text =sand.get()
wikipedia.output('...sandbox opened')


# OPEN A FILE - sandboxstat.txt
#file = open('c:/python25/pywikipedia1/pywikipedia/yue/sandboxStat.txt', 'w+')

# OPEN ANOTHER SANDBOX
sand1 = wikipedia.Page(site, sandname) # OR ur'wikipedia:\u6C99\u76D21'
wikipedia.output('...sandbox1 opened')


# LOOPING UNTIL TEXT IS EMPTY
text1=[['crap',0],['more crap',0]]  #SEED THE LIST WITH SOME CRAP
n= 0
while text!='':
 print '%d'% n
 char = text[0]
 wikipedia.output(char)
 if char=='.' or char=='^' or char=='$' or char=='*' or char=='+' or char=='?':
  char='\\'+char
 if char=='{' or char=='}' or char=='[' or char==']' or char=="(" or char==")":
  char='\\'+char
 if char== '!' or char=='\\' or char=='#' or char=='<' or char==">"or char=='-' or char=='|':
  char='\\'+char
 text,num = re.subn( char,'',text)
 text1.append([char,num])
 n += 1
 wikipedia.output(char+u" occured" )
 print('%d times' %num)

#TO SORT W.R.T. FREQUENCY OF CHARACTER
def compareFreq(a,b): return b[1]-a[1]
text1.sort(compareFreq)               

#TO GET THE OUTPUT STRING
outputText=ur'[['+pagename+ur']]用咗(%d'%n+ur'+1)隻字同符號。 -~~~~\n[[category:維基百科統計]]\n\n'

for i in range (0,n):
 outputText = outputText+text1[i][0]+',%d\n'%text1[i][1]

#PUT THE STRING TO SANDBOX1
sand1.put(outputText, u'機械人:[['+pagename+u']]統計 - [[user:R. Hillgentleman/yueStat.py]]'+comment )
wikipedia.stopme() 

#########################################
# SOME COMMENTED OUT CRAP
#
#ge = re.compile(ur'嘅') # or ur'\u5605'
#br = re.compile(r'\{\{')
#bl = re.compile(r'\}\}')
#newstr , n = ge.subn('',text) # replace every ur'嘅' by empty string
#newstr1 , n1= br.subn('',newstr)
#newstr2 , n2= bl.subn('',newstr1)
#wikipedia.output( 'the number of of GE in sandbox is: ')
#print n
#print ('numbers of {{,}}in sandbox are:')
#print n1 , n2
#wikipedia.stopme() 
############################################