User:R. Hillgentleman/CIAworldfactbooklist1.py
閱讀設定
import wikipedia
import re
import urllib
site=wikipedia.getSite()
#####TO GET THE LIST OF WORLD FACT BOOKS FROM ANY PAGE
###SET REGEX ################################
"""
SEEK CRAP LIKE THIS:
<option value="xx.html">World</option>
<option value="af.html">Afghanistan</option>
<option value="ax.html">Akrotiri</option>
YIELDS A PAIR OF DICTIONARIES: (list1,list2) = (list of codes, list of countries)
"""
def CIAworldfactbooklist():
codeword=re.compile(r'(?<=\<option value\=\")[a-z][a-z](?=\.html\"\>)')
countryword=re.compile(r'(?<=\.html\"\>).+?(?=\</option\>)')
#OPEN FACTBOOK PAGE AT READ ALL THE CRAP
countrycode='ee'
thePage='https://www.cia.gov/library/publications/the-world-factbook/geos/'+countrycode+'.html'
x=urllib.urlopen(thePage)
#PAGE OBJECT CREATED, NOW READ
ciacrap = x.read()
#NOW SEEK THE countrycodes
listcode = codeword.findall(ciacrap)
listcountry= countryword.findall(ciacrap)
### MASSAGES THE LISTS INTO DICTONARIES list1 and list2
#FIRST GET THE OUTPUT STRING
n=0
list1={}
list2={}
for i in listcode:
list1[n]=i
n+=1
n=0
for i in listcountry:
list2[n]=i
n+=1
text=''
n=0
for i in listcode:
text+= i + ','+ list2[n] + '\n'
n+=1
return (n , list1,list2) #NUMBER OF CODES, LIST OF CODES (ACTUALLY, A DICTIONARY), LIST OF COUNTRIES (ANOTHER DICTIONARY)
numbercrap, a,b =CIAworldfactbooklist()
for i in range(0,50):
print a[i],b[i]
wikipedia.stopme()