Salut a tous,
Suite aux changements sur canalplus.fr, les scripts precedents peinent un peu et flvstreamer, lui, n'arrive plus a recuperer correctement les videos (enfin, chez moi en tous cas).
Du coup, sur le conseil du post de lg2009 (voir plus haut), j'utilise youtoube-dl qui marche parfaitement!
De plus, j'en ai profité pour tout refaire avec beautifulsoup.
Je ne suis pas developpeur, donc allez-y molo sur les commentaires concernant le code 😉
Qqs petites explications annexes: le code comportent des modifs pour optimiser l'utilisation avec Insync.
Insync me permet de pousser les videos vers mon google drive.
Ensuite, sur mon android, en tache de fond, Autosynch pro me recupere les videos depuis le google drive pour visionage dans le metro (hors connexion)
http://play.google.com/store/apps/details?id=com.ttxapps.drivesync
Voila! Maintenant a vous de jouer pour l'ameliorer 🙂
#!/usr/bin/python
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (C) 2014 Ziss #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 2 of the License, or #
# any later version. #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
###########################################################################
#
#Changelog:
#V 0.4
#25-07-2014 : Creation
from bs4 import BeautifulSoup
import os, urllib2, subprocess, string, unicodedata, shlex, shutil
###########################################################################
# Options
# Directories and paths
output_dir = "/media/ziss/Data1TB/Insync/DL"
output_tmp_dir = "/media/ziss/Data1TB/.tmp/" #useful when doing cloud synch as it avoids uploading .frag from youtube-dl
HIST = os.path.join(output_dir , ".webDL_hist")
doNotDLList = ["Best Of", "Best-Of", "Best of", "Best-of", "La semaine", "Semaine du"]
want_sub_dirs = True #if you want sub directories and not everything under the same directory
###########################################################################
bad_chars = '(){}<>\\/\"%$*|~+!#'
def cleanStr(s): #Can be improved for sure...
s=' '.join(unicode(s).split())
s = str(unicodedata.normalize('NFKD', s).encode('ascii','ignore'))
s = s.replace("\'", " ")
return s.translate(string.maketrans("", "", ), bad_chars)
def parseLWT(soup):
#print (soup.prettify())
vids=[]
for row in soup.find_all('h3', "yt-lockup-title"):
vids.append(["www.youtube.com"+row.a['href'].split('&list')[0] , row.a['title'].split(': ')[1] ])
return vids
def parseCanalG(soup): #General. Should work for all Canal+ progs but 'Pepites sur le Net'
vids=[]
for row in soup.find_all('h4', 'h4noRedMea'):
vids.append([ row.a['href'] , row.a.contents[0]])
return vids
def parseCanalGClean(soup):
vids=[]
for row in soup.find_all('h4', 'h4noRedMea'):
if not any(word in row.a.contents[0] for word in doNotDLList):
vids.append([ row.a['href'] , row.a.contents[0]])
return vids
def parseCanalPSN(soup):
vids=[]
for row in soup.find_all('h4'):
if row.has_attr('title') and "pites sur le Net" in row['title']: #should be improved here
vids.append([ row.a['href'] , row.a.contents[0]])
return vids
"""
def parseCanalZap(soup):
vids=[]
for row in soup.find_all('h4'):
if row.has_attr('title') and "Zapping" in row['title'] and "Semaine" not in row.a.contents[0]: #I don't need the weekly best-of
vids.append([ row.a['href'] , row.a.contents[0]])
return vids
"""
# Base Urls
baseUrls=[
["LWT", "http://www.youtube.com/user/LastWeekTonight/videos", parseLWT], #Last Week Tonight
["PSN", "http://www.canalplus.fr/c-divertissement/pid1778-c-pepites-sur-le-net.html", parseCanalPSN], #Pepites sur le Net
["Zap", "http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html", parseCanalGClean], #Zapping
["GPr", "http://www.canalplus.fr/c-divertissement/pid4801-c-gaspard-proust.html", parseCanalG], #Gaspard Proust
["LPJ", "http://www.canalplus.fr/c-divertissement/c-le-petit-journal/pid6515-l-emission.html", parseCanalGClean], #Le petit Journal
["LGI", "http://www.canalplus.fr/c-divertissement/pid1784-c-les-guignols.html", parseCanalGClean] #Les Guignols de l'Info
]
###########################################################################
# Execute command and write stdout in file var
def execute(params, file):
p = subprocess.Popen(params,stdout=file)
p.wait()
return p.returncode
def addHistory(name):
file = open(HIST, 'a')
file.write(name + '\n')
file.close()
def checkHistory(name):
findvalue = 0
file = open(HIST, 'r')
for line in file:
if line == name + '\n':
findvalue = 1
file.close()
return findvalue
# MAIN
if __name__ == "__main__":
if os.path.exists(HIST) == False:
file = open(HIST, 'w')
file.close()
if not os.path.exists(output_tmp_dir): #Check if directory exists, if not then create it
os.makedirs(output_tmp_dir)
for prog, url, parser in baseUrls:
vids = parser(BeautifulSoup(urllib2.urlopen(url).read()))
for vidUrl, title in vids:
if checkHistory(vidUrl) == 0:
if want_sub_dirs: #Sub directories or not
output_dir_prog = os.path.join(output_dir , prog)
else:
output_dir_prog = output_dir
if not os.path.exists(output_dir_prog): #Check if directory exists, if not then create it
os.makedirs(output_dir_prog)
filename = prog + "-%(upload_date)s-"+cleanStr(title)+".%(ext)s" #Format your files the way you want. See youtube-dl help for %(upload_date)s and %(ext)s
cmd = "youtube-dl "+ vidUrl +" -o \""+ os.path.join(output_tmp_dir, filename) +"\""
print (cmd)
args = shlex.split(cmd)
p = subprocess.Popen(args)
p.wait()
if (p.returncode) == 0:
#print ("youtube-dl success")
# move file after download. Not direct download to avoid synch issue with cloud
source = os.listdir(output_tmp_dir)
for files in source:
try:
shutil.move(os.path.join(output_tmp_dir, files), output_dir_prog) #shutil is compatible with diff drives and filesystems, not os.rename()
except IOError, e:
print "Unable to move file. %s" % e
exit(1)
#print ("Success")
addHistory(vidUrl) #store the url to avoid to dl again next time
else:
print ("FAILED!!! youtube-dl returncode: " + str(p.returncode))
#else:
#print (cleanStr(title) + " is already Downloaded")