Python: Parse YouTube Uploaded Videos to MediaWiki Tables

From dftwiki3
Revision as of 17:23, 1 March 2014 by Thiebaut (talk | contribs) (Created page with "--~~~~ ---- <source lang="python"> # parseYouTubePage.py # D. Thiebaut # 3/1/14 # Login to YouTube, go to http://www.youtube.com/my_videos?o=U # to see the Uploads (which can ...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

--D. Thiebaut (talk) 17:23, 1 March 2014 (EST)


# parseYouTubePage.py
# D. Thiebaut
# 3/1/14
# Login to YouTube, go to http://www.youtube.com/my_videos?o=U
# to see the Uploads (which can also be reached from Video Manager menu option.
#
# Take source of page and paste it into the text string below.
# Will take all the videos and create a mediawiki table of all the images and links
#.

text="""

  [ Put very long source code of YouTube page with list of uploaded videos.  The URL is of the form:
   http://www.youtube.com/my_videos?o=U ]
  
"""
# Keywords to skip.  If these keywords are found in the titles of the video
# the video, then the entry is skipped.
skipTitles = []  #["packing", "qt5"]

# Keywords to keep.  If the title of the video contains one of these keywords, 
# it will be kept.  Empty if the list to keep all videos.
keepTitles = [] # ["CSC", "packing", "ibook", "two-bit" ]

#--- parse the lines from the source Web page from YouTube ---
# at the time of this writing, the important information was on lines
# containing the tags "vm-video-title-content" and "yt-uix-sessionlink;"
# this may change in time...

list = []
for line in text.split( "\n" ):
    if line.find( "vm-video-title-content" )!= -1 and line.find( "yt-uix-sessionlink" ) != -1:
        #print( line )
        #--- find the video number
        index1 = line.find( "watch?v=" ) + len( "watch?v=" )
        index2 = line.find( "\"", index1 )
        videoNumber = line[index1:index2]

        #--- grab the title ---
        index3 = line.find( "data-sessionlink=" )
        index4 = line.find( ">", index3 )
        index5 = line.find( "<", index4 )
        title = line[index4+1:index5]
 
        #--- create the image and video URLs ---
        imageURL = "http://img.youtube.com/vi/%s/mqdefault.jpg" % videoNumber
        videoURL = "http://www.youtube.com/watch?v=%s" % videoNumber

        #--- keep in list ---
        list.append( ( videoNumber, title, imageURL, videoURL ) )

#--- print a mediawiki table ---
#--- make it 2 columns wide ---
noColumns = 2

#--- header ---
print( "{| class=\"wikitable\"" )
count = 0

#--- print each image in a column of the table ---
for videoNumber, title, imageURL, videoURL in list:

    #--- skip titles that are not wanted ---
    skip = False
    for keyword in skipTitles:
        if title.lower().find( keyword ) != -1:
            skip = True
            break
    if skip:
        continue

    #--- skip if not keep title ---
    keep = False
    if len( keepTitles ) != 0:
        for keyword in keepTitles:
            if title.lower().find( keyword ) != -1:
                keep = True
                break
    if not keep:
        continue
        
    #--- generate wiki code ---
    print( "|\n<center>[%s %s]<br />[%s <b>%s</b>]<br /><br /></center>" % ( videoURL, imageURL, videoURL, title ) )

    #--- decide if end of row or not ---
    count += 1
    if count == noColumns:
        count = 0
        print( "|-" )

#--- close table with blank column entries in last row if necessary ---
while count != 0 and count != noColumns:
    print( "|\n&nbsp;" )
    count += 1

#--- close mediawiki table ---
print( "|}\n\n\n" )