Python: Parse YouTube Uploaded Videos to MediaWiki Tables
--D. Thiebaut (talk) 17:23, 1 March 2014 (EST)
# parseYouTubePage.py
# D. Thiebaut
# 3/1/14
# Login to YouTube, go to http://www.youtube.com/my_videos?o=U
# to see the Uploads (which can also be reached from Video Manager menu option.
#
# Take source of page and paste it into the text string below.
# Will take all the videos and create a mediawiki table of all the images and links
#.
text="""
[ Put very long source code of YouTube page with list of uploaded videos. The URL is of the form:
http://www.youtube.com/my_videos?o=U ]
"""
# Keywords to skip. If these keywords are found in the titles of the video
# the video, then the entry is skipped.
skipTitles = [] #["packing", "qt5"]
# Keywords to keep. If the title of the video contains one of these keywords,
# it will be kept. Empty if the list to keep all videos.
keepTitles = [] # ["CSC", "packing", "ibook", "two-bit" ]
#--- parse the lines from the source Web page from YouTube ---
# at the time of this writing, the important information was on lines
# containing the tags "vm-video-title-content" and "yt-uix-sessionlink;"
# this may change in time...
list = []
for line in text.split( "\n" ):
if line.find( "vm-video-title-content" )!= -1 and line.find( "yt-uix-sessionlink" ) != -1:
#print( line )
#--- find the video number
index1 = line.find( "watch?v=" ) + len( "watch?v=" )
index2 = line.find( "\"", index1 )
videoNumber = line[index1:index2]
#--- grab the title ---
index3 = line.find( "data-sessionlink=" )
index4 = line.find( ">", index3 )
index5 = line.find( "<", index4 )
title = line[index4+1:index5]
#--- create the image and video URLs ---
imageURL = "http://img.youtube.com/vi/%s/mqdefault.jpg" % videoNumber
videoURL = "http://www.youtube.com/watch?v=%s" % videoNumber
#--- keep in list ---
list.append( ( videoNumber, title, imageURL, videoURL ) )
#--- print a mediawiki table ---
#--- make it 2 columns wide ---
noColumns = 2
#--- header ---
print( "{| class=\"wikitable\"" )
count = 0
#--- print each image in a column of the table ---
for videoNumber, title, imageURL, videoURL in list:
#--- skip titles that are not wanted ---
skip = False
for keyword in skipTitles:
if title.lower().find( keyword ) != -1:
skip = True
break
if skip:
continue
#--- skip if not keep title ---
keep = False
if len( keepTitles ) != 0:
for keyword in keepTitles:
if title.lower().find( keyword ) != -1:
keep = True
break
if not keep:
continue
#--- generate wiki code ---
print( "|\n<center>[%s %s]<br />[%s <b>%s</b>]<br /><br /></center>" % ( videoURL, imageURL, videoURL, title ) )
#--- decide if end of row or not ---
count += 1
if count == noColumns:
count = 0
print( "|-" )
#--- close table with blank column entries in last row if necessary ---
while count != 0 and count != noColumns:
print( "|\n " )
count += 1
#--- close mediawiki table ---
print( "|}\n\n\n" )