Python: Parse YouTube Uploaded Videos to MediaWiki Tables

From dftwiki3
Revision as of 08:37, 8 March 2014 by Thiebaut (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

--D. Thiebaut (talk) 17:23, 1 March 2014 (EST)


# parseYouTubePage.py
# D. Thiebaut
# 3/1/14
# Login to YouTube, go to http://www.youtube.com/my_videos?o=U
# to see the Uploads (which can also be reached from Video Manager menu option.
#
# Take source of page and paste it into the text string below.
# Will take all the videos and create a mediawiki table of all the images and links
#.

text="""

[... put the source of the video manager output page here... ]



"""

def main():
    # Keywords to skip.  If these keywords are found in the titles of the video
    # the video, then the entry is skipped.
    skipTitles = [ "TAs" ]  #["packing", "qt5"]
    keepTitles = [] #"CSC", "packing", "ibook", "two-bit", "Qt" ]

    #--- get the lines from the Web page from YouTube ---
    list = []
    for line in text.split( "\n" ):
        if line.find( "vm-video-title-content" )!= -1 and line.find( "yt-uix-sessionlink" ) != -1:
            #print( line )
            index1 = line.find( "watch?v=" ) + len( "watch?v=" )
            index2 = line.find( "\"", index1 )
            videoNumber = line[index1:index2]
            index3 = line.find( "data-sessionlink=" )
            index4 = line.find( ">", index3 )
            index5 = line.find( "<", index4 )
            title = line[index4+1:index5]
            imageURL = "http://img.youtube.com/vi/%s/mqdefault.jpg" % videoNumber
            videoURL = "http://www.youtube.com/watch?v=%s" % videoNumber
            mediaWikiImage = "[[File:%s|link=%s]]" % (imageURL, videoURL )
            #print( videoNumber, title, mediaWikiImage )
            list.append( ( videoNumber, title, mediaWikiImage, imageURL, videoURL ) )

    #--- print a mediawiki table ---
    noColumns = 2
    print( "{| class=\"wikitable\"" )
    count = 0
    for videoNumber, title, mediaWikiImage, imageURL, videoURL in list:

        #--- skip titles that are not wanted ---
        skip = False
        if len( skipTitles ) != 0:
            for keyword in skipTitles:
                if title.lower().find( keyword ) != -1:
                    skip = True
                    break        
        if skip:
            continue

        #--- skip if not keep title ---
        keep = False
        if len( keepTitles ) != 0:
            for keyword in keepTitles:
                if title.lower().find( keyword ) != -1:
                    keep = True
                    break
        else:
            keep = True
            
        if not keep:
            continue
            
        #--- create 1 new entry on the current row ---
        print( "|\n<center>[%s %s]<br />[%s <b>%s</b>]<br /><br /></center>" % ( videoURL, imageURL, videoURL, title ) )
        count += 1

        #--- if we've reached the end of this row, create a new table row ---
        if count == noColumns:
            count = 0
            print( "|-" )

    #--- add enough entries in last row to match the number of columns required ---
    while count != 0 and count != noColumns:
        print( "|\n&nbsp;" )
        count += 1

    #--- close wiki table ---
    print( "|}\n\n\n" )

main()