Python: Parse YouTube Uploaded Videos to MediaWiki Tables
--D. Thiebaut (talk) 17:23, 1 March 2014 (EST)
# parseYouTubePage.py
# D. Thiebaut
# 3/1/14
# Login to YouTube, go to http://www.youtube.com/my_videos?o=U
# to see the Uploads (which can also be reached from Video Manager menu option.
#
# Take source of page and paste it into the text string below.
# Will take all the videos and create a mediawiki table of all the images and links
#.
text="""
[... put the source of the video manager output page here... ]
"""
def main():
# Keywords to skip. If these keywords are found in the titles of the video
# the video, then the entry is skipped.
skipTitles = [ "TAs" ] #["packing", "qt5"]
keepTitles = [] #"CSC", "packing", "ibook", "two-bit", "Qt" ]
#--- get the lines from the Web page from YouTube ---
list = []
for line in text.split( "\n" ):
if line.find( "vm-video-title-content" )!= -1 and line.find( "yt-uix-sessionlink" ) != -1:
#print( line )
index1 = line.find( "watch?v=" ) + len( "watch?v=" )
index2 = line.find( "\"", index1 )
videoNumber = line[index1:index2]
index3 = line.find( "data-sessionlink=" )
index4 = line.find( ">", index3 )
index5 = line.find( "<", index4 )
title = line[index4+1:index5]
imageURL = "http://img.youtube.com/vi/%s/mqdefault.jpg" % videoNumber
videoURL = "http://www.youtube.com/watch?v=%s" % videoNumber
mediaWikiImage = "[[File:%s|link=%s]]" % (imageURL, videoURL )
#print( videoNumber, title, mediaWikiImage )
list.append( ( videoNumber, title, mediaWikiImage, imageURL, videoURL ) )
#--- print a mediawiki table ---
noColumns = 2
print( "{| class=\"wikitable\"" )
count = 0
for videoNumber, title, mediaWikiImage, imageURL, videoURL in list:
#--- skip titles that are not wanted ---
skip = False
if len( skipTitles ) != 0:
for keyword in skipTitles:
if title.lower().find( keyword ) != -1:
skip = True
break
if skip:
continue
#--- skip if not keep title ---
keep = False
if len( keepTitles ) != 0:
for keyword in keepTitles:
if title.lower().find( keyword ) != -1:
keep = True
break
else:
keep = True
if not keep:
continue
print( "|\n<center>[%s %s]<br />[%s <b>%s</b>]<br /><br /></center>" % ( videoURL, imageURL, videoURL, title ) )
count += 1
if count == noColumns:
count = 0
print( "|-" )
while count != 0 and count != noColumns:
print( "|\n " )
count += 1
print( "|}\n\n\n" )
main()