'''
IF WIKI STUB CREATOR -- LAST UPDATED 06-JUN-2016
To be used with a text file: ifdblinks.txt which should contain urls,
one per line, of IFDB pages (single works or competition pages or
search results). A # may be used to comment out a line.
To double check the IFIDs and TUIDs, these numbers will be turned
into links and added to a different file (ifid_tuid_links.txt)
so that they can be auto-checked using the idchecker script.
The wiki stubs that are generated in the wiki_pages_for_pywikibot.txt
file may need to be tweaked (see "known limitations" below) but can then
be used with pywikibot's pagefromfile script to
automatically upload the pages to the wiki. See the notes on the
"page_from_file_variables" function.
When you're done uploading the pages, you may want to either blank out
the output files or delete them in order to avoid confusion when new
entries are added.
KNOWN LIMITATIONS:
This script does not collect
* multiple game authors (only one will be listed)
* accented letters and non-ascii symbols
(they will show up as question marks)
* the names of review authors.
All of the above must be fixed by hand.
This script does not check for unusual title capitalization. Ideally
the wiki page would include a {{DEFAULTSORT:}} tag for such titles.
This script does however check for The, A, and a few other words at
the beginning of the title and writes a {{DEFAULTSORT:}} label with
that word removed.
The information this script parses is not exhaustive. It does not
currently support all comps, for instance.
'''
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import datetime
import webbrowser
new = 2
# Set the tags that will appear in the wiki stubs file to tell
# pywikibot's pagefromfile script where each page, and each page
# title, begins and ends. Whichever values are used here must
# also be used in pagefromfile.py. Also be sure to configure
# pagefromfile such that the title that appears between the title
# markers will not be included in the wiki page.
def page_from_file_variables():
format = {
"page_begin_marker": "{*begin*}",
"page_end_marker": "{*end*}",
"title_begin_marker": "{****",
"title_end_marker": "****}"
}
return format
# Go through the user-provided urls and add them to either the list of game pages or the list of search pages
def sort_user_provided_urls():
game_page_list=[ ]
search_page_list=[ ]
with open("ifdblinks.txt") as urllist:
print ("Sorting through user-provided urls...");
for line in urllist:
if "#" not in line:
if "." in line:
#strip newline chacters from entry
url_proper=str(line.strip("\n"))
if "ifdb.tads.org" in url_proper: #Is it a valid IFDB link?
if "viewgame?id=" in url_proper:
game_page_list.append(url_proper)
print ("\nAdded <" + url_proper + "> from the user's list to my list of game pages.")
else:
search_page_list.append(url_proper)
print ("\nAdded <" + url_proper + "> from the user's list to my list of search pages.")
else:
print ("Ignored <" + url_proper + "> because it is not a valid IFDB link.")
return game_page_list, search_page_list
# Go through the search pages, collecting urls for
# pages of individual works and adding these to
# list of game page urls
def collect_urls_from_search_pages(game_page_list, search_page_list):
for entry in search_page_list:
print ("\nNow checking search page <" + entry +"> for links.")
search_page = urlopen(entry)
search_soup = BeautifulSoup(search_page, "html.parser")
url_prefix = "http://ifdb.tads.org/"
for random_link in search_soup.find_all("a"):
random_url=random_link.get("href")
if "viewgame?id=" in random_url:
found_duplicate=False
full_url=url_prefix+random_url
for entry in game_page_list:
if found_duplicate==True:
break
if full_url==entry:
print ("Ignored duplicate link <" + full_url + ">.")
found_duplicate=True
if found_duplicate==False:
game_page_list.append(full_url)
print("Found game page link <" + full_url + ">.")
else:
print ("Ignored non-game-page link <" + random_url + ">.")
return game_page_list
def collect_number_comp_entries():
total_comp_entries_dict= {
"ifcomp": "#"
}
print("\nIf all works on this list are from the same competition, please enter the competition key + one space + total number of entries in the comp. For example: \nifcomp 50\n\nAvailable competition keys are:",)
for the_key in total_comp_entries_dict:
print(the_key + " "),
comp_and_entries_input=input("\nTo skip this, please press enter.\n\nCOMP AND TOTAL ENTRIES>")
if (comp_and_entries_input != "") and (" " in comp_and_entries_input):
split_comp_from_total=comp_and_entries_input.split(" ")
comp_key=split_comp_from_total[0]
total_entries=split_comp_from_total[1]
if comp_key in total_comp_entries_dict:
total_comp_entries_dict[comp_key]=total_entries
return total_comp_entries_dict
def ask_about_opening_reviews():
open_review_links=False
print("\nWould you like the option to open external review links so you can enter in review author names? (You will still be prompted for each individual link.) If so, please type Y.")
reviews_response=input(">")
if ("y" or "Y") in reviews_response:
print("External review links may be opened. You will likely still need to rearrange the links in the wiki file to put them on order by review author surname.")
open_review_links=True
else:
print("External review links won't be opened. Instead, placeholder text will be used for authors of external reviews.")
return open_review_links
def make_soup(page):
print("\nGathering data from page <" + page + ">...")
return BeautifulSoup(urlopen(page), "html.parser")
def new_game_vars():
dict = {
"title": "",
"sorttitle": "",
"author": "",
"authoralias": "",
"coverart": "",
"genres": [ ],
"lang": "",
"date": "",
"year": "",
"version": "",
"license": "",
"cruelty": "",
"authsys": "",
"platform": "",
"ifid": "",
"tuid": "",
"url": "",
"usernotes": "",
}
reviews= {}
ifcomp={
"year": "",
"overall": "",
"Miss Congeniality": ""
}
springthing = {
"year": "",
"category": "",
"rank": ""
}
xyzzy={
"year": "",
"Best Game": "",
"Best Implementation": "",
"Best Individual NPC": "",
"Best Individual Puzzle": "",
"Best Individual PC": "",
"Best NPCs": "",
"Best Puzzles": "",
"Best Setting": "",
"Best Story": "",
"Best Supplemental Materials": "",
"Best Technological Development": "",
"Best Use of Innovation": "",
"Best Use of Medium": "",
"Best Use of Multimedia": "",
"Best Writing": ""
}
return dict, reviews, ifcomp, springthing, xyzzy
available_genres = {
'Humor': 'comedy',
'Espionage': 'espionage',
'Fantasy': 'fantasy',
'Horror': 'horror',
'Mystery':'mystery',
'Religious': 'religious',
'Romance': 'romance',
'RPG': 'rpg',
'Science Fiction': 'sci-fi',
'Slice of life': 'slice of life',
'Superhero': 'superhero',
'Surreal': 'surreal',
'Western': 'western'
}
def make_ascii_friendly(some_text):
ascii_friendly_text=some_text
for a_character in some_text:
character_num=ord(a_character)
if character_num>127:
ascii_friendly_text=some_text.replace(a_character, "?")
print(r" (NON-ASCII CHARACTER(S) RENDERED AS '?') ")
return ascii_friendly_text
initial_articles=["A ", "An ", "The ", "La ", "Le ", "Les "]
def collect_header_data(game_soup, dict):
header_soup_list=[ ]
for every_string in game_soup.h1.parent.stripped_strings:
friendly_string=make_ascii_friendly(str(every_string))
header_soup_list.append("'" + friendly_string + "'")
print("\nTITLE/AUTHOR/GENRE DATA FROM PAGE: " + str(" ".join(header_soup_list)))
# Determine if there is cover art (for the babel template)
cover_image = game_soup.find("img", title="Cover Art - click to view at full size")
if cover_image != None:
dict["coverart"]="yes"
# Find the title
game_title = str(game_soup.h1.string)
dict["title"] = make_ascii_friendly(game_title)
game_sort_title=game_title #Default is that they match
for article in initial_articles:
if game_title.startswith(article):
split_off_article=game_title.split(" ", 1)
game_sort_title=str(split_off_article[1])
break
dict["sorttitle"] = make_ascii_friendly(game_sort_title)
# Clear the title so that when we search for the year, the title
# won't get mistaken for the year (e.g. if title is "1983".)
game_soup.h1.clear()
# Find the author
game_full_author=game_soup.h1.parent.a.string #the entire author name (including any aliases) is listed in ifwiki, but only the name proper is bracketed.
if " (as " in game_full_author: # if there's an alias
split_before_alias=game_full_author.split(" (as ")
game_author=str(split_before_alias[0])
alias_with_close_paren=str(split_before_alias[1])
split_after_alias=alias_with_close_paren.split(")")
game_author_alias = str('as "' + str(split_after_alias[0]) + '"')
dict["authoralias"] = make_ascii_friendly(game_author_alias)
else:
# Don't really want to deal with multiple authors in this script.
# Easier to fix by hand.
game_author=str(game_full_author)
dict["author"] = make_ascii_friendly(game_author)
# Find Genres
genre_soup_list=[ ]
for each_string in game_soup.h1.parent.span.stripped_strings:
genre_soup_list.append(repr(each_string))
genre_soup_text=str(" ".join(genre_soup_list))
for genre_name in available_genres:
if genre_name in genre_soup_text:
matching_genre=str(available_genres[genre_name])
dict["genres"].append(matching_genre)
# Look for year amidst the genre info (in case year is not
# mentioned in the details section)
for one_string in genre_soup_list:
one_string=one_string.replace("'", "") #strip the quotes
if len(one_string)==4:
if one_string.isdigit():
dict["year"]=one_string
break
return dict
unwanted_tags=re.compile(r"||||
||
", "") award_text=award_text_with_most_p_tags.replace(r"
]", "") split_awards_by_line=award_text.split(",") for each_line in split_awards_by_line: award_lines.append(each_line) print ("\nAWARD DATA FROM PAGE: ") for line in award_lines: print("\n" + make_ascii_friendly(line)) if " - " in line: split_before_comp_name=line.split(" - ") comp_name_and_year=str(split_before_comp_name[1]) #Get the comp name comp_name="" comp_year="" if "Annual Interactive Fiction Competition (" in comp_name_and_year: comp_name="ifcomp"; split_before_comp_year=comp_name_and_year.split(" (") comp_year_with_close_paren=split_before_comp_year[1] comp_year=comp_year_with_close_paren.replace(")", "") ifcomp["year"]=comp_year elif "XYZZY Awards" in comp_name_and_year: comp_name="xyzzy" split_after_xyzzy_year=comp_name_and_year.split(" ", 1) xyzzy_year=str(split_after_xyzzy_year[0]) xyzzy["year"]=xyzzy_year elif "Spring Thing " in comp_name_and_year: comp_name="springthing" split_after_thing=comp_name_and_year.split("Thing ") spring_thing_year=str(split_after_thing[1]) springthing["year"]=spring_thing_year #If the comp isn't one we are prepared to handle, skip it altogether. if comp_name == "": continue # # Get the comp year #Not the same for each comp. split_before_comp_year=comp_name_and_year.split(" (") # comp_year_with_close_paren=split_before_comp_year[1] # comp_year=comp_year_with_close_paren.replace(")", "") # Make a list of category-rank pairs categories_with_ranks=split_before_comp_name[0] category_rank_pairs_list = [ ] # If there's more than one category, split them if "; " in categories_with_ranks: split_between_categories=categories_with_ranks.split("; ") for category_rank_pair in split_between_categories: category_rank_pairs_list.append(category_rank_pair) else: category_rank_pairs_list.append(categories_with_ranks) # Parse the category-rank pairs according to comp if comp_name=="ifcomp": ifcomp=parse_ifcomp_awards(ifcomp, category_rank_pairs_list); elif comp_name=="xyzzy": xyzzy=parse_xyzzy_awards(xyzzy, category_rank_pairs_list) elif comp_name=="springthing": springthing=parse_spring_thing_awards(springthing, category_rank_pairs_list) return ifcomp, springthing, xyzzy #Parse IFcomp categories and ranks def parse_ifcomp_awards(ifcomp, category_rank_pairs_list): for category_rank_pair in category_rank_pairs_list: #Find category category="" if "Miss Congeniality" not in category_rank_pair: category="overall" elif "Miss Congeniality" in category_rank_pair: category="Miss Congeniality" #Add rank to ifcomp object split_pair_on_first_space=category_rank_pair.split(" ", 1) rank=str(split_pair_on_first_space[0]) ifcomp[category]=rank return ifcomp def parse_spring_thing_awards(springthing, category_rank_pairs_list): for category_rank_pair in category_rank_pairs_list: #Find category category="" if "Back Garden" in category_rank_pair: category="Back Garden" elif "Main Festival" in category_rank_pair: category="Main Festival" else: category_input=input("What category of Spring Thing was this game entered in? Please type 'Main Festival' or 'Back Garden.'>") category=category_input springthing["category"]=category #Add rank to ifcomp object rank="" if category=="Back Garden": rank="Entrant" elif "Entrant" in category_rank_pair: rank="Entrant" else: rank_input=input("Which Spring Thing award(s) did this entry win? Please enter text as it should appear on the wiki page, e.g. 'Audience Choice and Alumni's Choice'>") if rank_input != "": rank=rank_input else: rank="Participant" springthing["category"]=category springthing["rank"]=rank return springthing xyzzy_categories=["Best Game", "Best Implementation","Best Individual NPC","Best Individual Puzzle", "Best Individual PC", "Best NPCs", "Best Puzzles", "Best Setting", "Best Story", "Best Supplemental Materials", "Best Technological Development", "Best Use of Innovation", "Best Use of Medium", "Best Use of Multimedia", "Best Writing"] # Parse xyzzy categories & ranks. def parse_xyzzy_awards(xyzzy, category_rank_pairs_list): for category_rank_pair in category_rank_pairs_list: # Find category category="" rank="" split_rank_from_category=category_rank_pair.split(", ") category=str(split_rank_from_category[1]) # Find rank-value rank=str(split_rank_from_category[0]) if "Nominee" in rank: rank_value="Finalist" elif "Winner" in rank: rank_value="Winner" # Add award to xyzzy object if (category != "") and (rank_value != ""): for category_name in xyzzy_categories: if category_name in category: xyzzy[category_name]=rank_value break return xyzzy def collect_review_links(game_soup, dict, reviews, open_review_links): print("\nREVIEW LINKS FROM PAGE: ") all_links=game_soup.find_all("a") for link in all_links: if link.string=="See the full review": review_url=link.get("href") print("\n" + review_url) if open_review_links==True: open_link=input('Open this link in a browser? Type Y to open.>') if ("y" or "Y") in open_link: webbrowser.open(review_url,new=new) review_author=input("Link opened. Please enter review author name.>") if review_author != "": reviews[review_url]=review_author else: reviews[review_url]="Reviewer Needed" continue reviews[review_url]="Reviewer Needed" return dict, reviews # Print the parser data on the screen so the user can check it # for accuracy def print_parsed_data(dict, reviews, total_comp_entries, ifcomp, springthing, xyzzy): print ('\nPARSED DATA: ') print ('Cover art: ' + dict["coverart"]) print ('Title: ' + dict["title"]) print ('Sort as: ' + dict["sorttitle"]) print ('Author: ' + dict["author"]) print ('Alias: ' + dict["authoralias"]) print ('Genre(s): ' + str(dict["genres"])) #Leaving out str will give an error print ('Language: ' + dict["lang"]) print ('Date: ' + dict["date"]) print ('Year: ' + dict["year"]) print ('Version: ' + dict["version"]) print ('License: ' + dict["license"]) print ('Dev. Tool: ' + dict["authsys"]) print ('Platform: ' + dict["platform"]) print ('Cruelty: ' + dict["cruelty"]) print ('IFID: ' + dict["ifid"]) print ('TUID: ' + dict["tuid"]) if ifcomp["year"]!="": print ('IFComp: ' + ifcomp["overall"] + ' place out of ' + total_comp_entries["ifcomp"] + ' entries overall in IFComp ' + ifcomp["year"]) if ifcomp["Miss Congeniality"]!="": print(ifcomp["Miss Congeniality"] + ' place for Miss Congeniality.') else: print (r'IFComp: N/A') print("\n") if springthing["year"]!="": print('Spring Thing: ' + springthing["year"]) print(springthing["rank"] + ", " + springthing["category"] + " in " + springthing["year"]) else: print (r'Spring Thing: N/A') print("\n") if xyzzy["year"]!="": print('XYZZY Awards: ' + xyzzy["year"]) for the_key, the_value in xyzzy.items(): if the_key=="year": continue elif the_value!="": print(the_value + ", " + the_key) else: print (r'XYZZY Awards: N/A') print("\n") if reviews != {}: print("Reviews:") for (review_url, review_author) in reviews.items(): print("\n" + r"* [" + review_url + r" Review] - by [[" + review_author + "]].") # Check for confirmation before writing to file dict["usernotes"]=input("\nCheck accuracy and enter notes if desired. Press ENTER to continue. Type only the word 'quit' (all lowercase) to immediately quit the program.\nNOTES>") if dict["usernotes"]=="quit": sys.exit("Program cancelled by the user.") return(dict, ifcomp, springthing, xyzzy) def brackets_or_TBD(x): if (x=="") or (x=="Unknown"): return "TBD" elif x=="Web browser": #This is the one platform that should not be in brackets. return x else: return str(r"[[" + x + r"]]") def is_known(sometext): if sometext!="" and sometext!="Unknown": return True else: return False def tableprefix(): return r'
' def tablesuffix(): return r' |