From 3fb5f8efef231dd7784be880934cd106603ab6f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joshua=20Ismael=20Haase=20Hern=C3=A1ndez?= Date: Mon, 11 Apr 2011 01:09:25 -0500 Subject: bash-port ready for testing. --- clean_repo.py | 45 +++++++++++ config.py | 10 ++- config.sh | 17 +++-- filter.py | 68 +++++++++++++---- get_license.sh | 10 +-- main.sh | 20 +++-- mkpending.py | 45 +++++++++++ pato2.py | 209 ++++------------------------------------------------ test/test_filter.py | 6 +- 9 files changed, 198 insertions(+), 232 deletions(-) create mode 100644 clean_repo.py create mode 100644 mkpending.py diff --git a/clean_repo.py b/clean_repo.py new file mode 100644 index 0000000..29d446d --- /dev/null +++ b/clean_repo.py @@ -0,0 +1,45 @@ +#! /usr/bin/python +#-*- encoding: utf-8 -*- +from repm.filter import * +import argparse + +def remove_from_blacklist(path_to_db, blacklisted_names, + debug=config["debug"]): + """ Check the blacklist and remove packages on the db""" + + pkgs=[pkg for pkg in pkginfo_from_db(path_to_db) if + pkg["name"] in blacklisted_names] + if pkgs: + lista=" ".join(pkgs) + cmd = "repo-remove " + path_to_db + " " + lista + printf(cmd) + a = check_output(cmd) + if debug: + printf(a) + return pkgs, cmd + +def cleanup_nonfree_in_dir(directory, blacklisted_names): + pkgs=pkginfo_from_files_in_dir(directory) + for package in pkgs: + if package["name"] in blacklisted_names: + os.remove(package["location"]) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Clean a repo db and packages") + parser.add_argument("-b", "--database", type=str, + help="dabatase to clean") + parser.add_argument("-d", "--directory", type=str, + help="directory to clean") + args=parser.parse_args() + + if args.directory: + cleanup_nonfree_in_dir(args.database, listado(config["blacklist"])) + + if args.database: + pkgs=pkginfo_from_db(args.database) + remove_from_blacklist(args.database, pkgs, + tuple(listado(config["blacklist"]) + + listado(config["pending"]))) + if not args.directory and not args.database: + parser.print_help() diff --git a/config.py b/config.py index 8cf07a2..24ecfaf 100644 --- a/config.py +++ b/config.py @@ -1,4 +1,4 @@ -#!/usr/bin/pythonn +#!/usr/bin/python # -*- coding: utf-8 -*- try: from subprocess import check_output @@ -7,22 +7,24 @@ except(ImportError): import os stringvars=("mirror", "mirrorpath", "logname", "tempdir", "docs_dir", - "repodir", "rsync_blacklist") + "repodir", "rsync_blacklist") listvars=("repo_list", "dir_list", "arch_list", "other",) boolvars=("output", "debug",) config=dict() -def exit_if_none: +def exit_if_none(var): if os.environ.get(var) is None: exit("%s is not defined" % var) for var in stringvars: exit_if_none(var) - config[var]=os.environ.get(var) + config[var]=os.environ.get(var) + for var in listvars: exit_if_none(var) config[var]=tuple(os.environ.get(var).split(":")) + for var in boolvars: exit_if_none(var) if os.environ.get(var) == "True": diff --git a/config.sh b/config.sh index 60bd4ea..741dee4 100755 --- a/config.sh +++ b/config.sh @@ -1,22 +1,25 @@ #!/bin/sh # -*- coding: utf-8 -*- - # Mirror options mirror="mirrors.eu.kernel.org" mirrorpath="::mirrors/archlinux" -# Directories and files - +# Directories ## Optionals paraboladir=~/parabolagnulinux.org logtime=$(date -u +%Y%m%d-%H:%M) - ## Must be defined logname=${paraboladir}/${logtime}-repo-maintainer.log tempdir=~/tmp/ docs_dir=${paraboladir}/docs repodir=${paraboladir}/repo +# End Directories + +# Files +blacklist=${docs_dir}/blacklist.txt +whitelist=${docs_dir}/whitelist.txt +pending=${docs_dir}/pending rsync_blacklist=${docs_dir}/rsyncBlacklist # Repos, arches, and dirs for repo @@ -33,16 +36,18 @@ debug="False" rsync_update_command="rsync -av --delay-updates --exclude='*.{abs|db}.tar.*' " rsync_post_command="rsync -av --delete --exclude='*.abs.tar.*' " - function run_python_cmd { env \ mirror=${mirror} \ mirrorpath=${mirrorpath} \ logname=${logname} \ tempdir=${tempdir} \ - rsync_blacklist=${rsync_blacklist} \ docs_dir=${docs_dir} \ repodir=${repodir} \ + blacklist=${blacklist} \ + whitelist=${whitelist} \ + pending=${pending} \ + rsync_blacklist=${rsync_blacklist} \ repo_list=${repo_list} \ dir_list=${dir_list} \ arch_list=${arch_list} \ diff --git a/filter.py b/filter.py index 668822b..1a0fa6f 100644 --- a/filter.py +++ b/filter.py @@ -4,6 +4,13 @@ from glob import glob from repm.config import * from repm.pato2 import * +def listado(filename): + """Obtiene una lista de paquetes de un archivo.""" + archivo = open(filename,"r") + lista = archivo.read().split("\n") + archivo.close() + return [pkg.split(":")[0].rstrip() for pkg in lista if pkg] + def pkginfo_from_filename(filename): """ Generates a Package object with info from a filename, filename can be relative or absolute @@ -116,10 +123,48 @@ def pkginfo_from_files_in_dir(directory): return tuple(package_list) def pkginfo_from_db(path_to_db): - """ """ + """ Get PKGINFO from db. + + Parameters: + ---------- + path_to_db -> str Path to file -def generate_exclude_list_from_blacklist(packages_iterable, blacklisted_names, - exclude_file=rsync_blacklist, debug=verbose): + Output: + ---------- + None """ + package_list=list() + + if not os.path.isfile(path_to_db): + raise NonValidFile(path_to_db + "is not a file") + + check_output("mkdir -p " + archdb) + + try: + db_open_tar = tarfile.open(db_tar_file, 'r:gz') + except tarfile.ReadError: + printf("No valid db_file %s or not readable" % db_tar_file) + return(tuple()) + else: + printf("No db_file %s" % db_tar_file) + return(tuple()) + + for file in db_open_tar.getmembers(): + db_open_tar.extract(file, archdb) + db_open_tar.close() + # Get info from file + for dir_ in glob(archdb + "/*"): + if isdir(dir_) and isfile(dir_ + "/desc"): + package_list.append(pkginfo_from_desc( + os.path.join(dir_,"desc"))) + check_output("rm -r %s/*" % archdb) + if verbose_: + printf(package_list) + return package_list + +def generate_exclude_list_from_blacklist(packages_iterable, + blacklisted_names, + exclude_file=config["rsync_blacklist"], + debug=config["debug"]): """ Generate an exclude list for rsync Parameters: @@ -132,16 +177,12 @@ def generate_exclude_list_from_blacklist(packages_iterable, blacklisted_names, Output: ---------- None """ - a=list() - - for package in packages_iterable: - if not isinstance(package, Package): - raise ValueError(" %s is not a Package object " % package) - if package["name"] in blacklisted_names: - a.append(package["location"]) + pkgs=[pkg["location"] for pkg in packages_iterable + if isinstance(pkg, Package) + and pkg["name"] in blacklisted_names] if debug: - return a + return pkgs try: fsock = open(exclude_file,"w") try: @@ -149,9 +190,10 @@ def generate_exclude_list_from_blacklist(packages_iterable, blacklisted_names, finally: fsock.close() except IOError: - printf("%s wasnt written" % blacklist_file) + printf("%s wasnt written" % exclude_file) if __name__ == "__main__": - a=run_rsync(rsync_list_command) + cmd=generate_rsync_command(rsync_list_command) + a=run_rsync(cmd) packages=pkginfo_from_rsync_output(a) generate_exclude_list_from_blacklist(packages,listado(blacklist)) diff --git a/get_license.sh b/get_license.sh index a7241a1..0da58cb 100755 --- a/get_license.sh +++ b/get_license.sh @@ -31,12 +31,12 @@ rm -rf $dir/* tempdir=$(mktemp -d) cd $tempdir -a=($(cut -d: -f1 $docs/pending*.txt)) -echo ${a[@]} +pending=($(cut -d: -f1 $docs/pending*.txt)) +echo ${pending[@]} -for x in ${a[@]}; do - b=( $(ls $repo/*/os/*/$x*) ) - for y in ${b[@]}; do +for pkg in ${pending[@]}; do + pkg_in_repo=( $(ls ${repo}/*/os/*/${pkg}*) ) + for y in ${pkg_in_repo[@]}; do echo "chmod +r $y" chmod +r $y echo "tar -xf $y usr/share/licenses" diff --git a/main.sh b/main.sh index 2d59094..1a2c6c4 100644 --- a/main.sh +++ b/main.sh @@ -23,13 +23,17 @@ ${rsync_update_command} --exclude-from=${rsync_blacklist} \ ${mirror}${mirropath}/{$(echo ${repo_list} | tr ':' ',')} ${repodir} msg "Syncing each repo and cleaning" +msg2 "Remove pending files" +stdnull "rm -rf ${pending}*" for repo in $(echo ${repo_list} | tr ':' ' '); do - msg2 "Syncing ${repo}" - ${rsync_post_command} --exclude-from=${rsync_blacklist} \ - ${mirror}${mirropath}/${repo} ${repodir}/${repo} - msg2 "Cleaning ${repo}" - clean-repo.py -d ${repodir}/${repo} \ - -b ${repodir}/${repo}/${repo}.db.tar.gz - msg2 "Making pending list for ${repo}" - run_python_cmd "mkpending.py -r ${repo} -d ${repodir}/${repo}" + for arch in $(echo ${arch_list} | tr ':' ' '); do + msg2 "Syncing ${repo} ${arch}" + ${rsync_post_command} --exclude-from=${rsync_blacklist} \ + ${mirror}${mirropath}/${repo} ${repodir}/${repo} + msg2 "Making pending list for ${repo} ${arch}" + run_python_cmd "mkpending.py -r ${repo} -b ${repodir}/${repo}/os/${arch}" + msg2 "Cleaning ${repo} ${arch}" + run_python_cmd "clean-repo.py -b ${repodir}/${repo}/os/${arch}/${repo}.db.tar.gz -d ${repodir}/${repo}/os/${arch}/" + get_license.sh + done done diff --git a/mkpending.py b/mkpending.py new file mode 100644 index 0000000..43a5fb2 --- /dev/null +++ b/mkpending.py @@ -0,0 +1,45 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from repm.filter import * + +def make_pending(path_to_db): + """ Determine wich packages are pending for license auditing.""" + packages_iterable=pkginfo_from_db(path_to_db) + search = tuple(listado(config["blacklist"]) + + listado(config["whitelist"])) + + pkgs=[pkg for pkg in packages_iterable + if "custom" in pkg["license"] + and pkg["name"] not in search] + return pkgs + +def write_pending(packages_iterable, repo, prefix=config["pending"]): + """ Write a pending file with the info of the packages """ + filename=prefix + "-" + repo + ".txt" + try: + fsock=open(filename, "a") + except(IOError): + print("Can't read %s" % filename) + finally: + fsock.close() + if os.path.isfile(filename): + pkgs=[pkg for pkg in packages_iterable if pkg["name"] not in + listado(filename)] + fsock.write("\n".join([pkg["name"] + ":" + pkg["license"] + for pkg in pkgs]) + "\n") + fsock.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Clean a repo db and packages") + parser.add_argument("-b", "--dababase", type=str, required=True + help="database to check") + parser.add_argument("-r", "--repo", type=str, required=True + help="repo of database") + args=parser.parse_args() + + if args.database and args.repo: + pkgs=make_pending(args.database) + write_pending(pkgs, args.repo) + else: + parser.print_help() diff --git a/pato2.py b/pato2.py index 0d77d6b..4cdb536 100644 --- a/pato2.py +++ b/pato2.py @@ -27,159 +27,23 @@ from repm.config import * from repm.filter import * import tarfile -from glob import glob from os.path import isdir, isfile -def printf(text,output_=output): +def printf(text,output=config["output"]): """Guarda el texto en la variable log y puede imprimir en pantalla.""" - log_file = open(logname, 'a') + log_file = open(config["logname"], 'a') log_file.write("\n" + str(text) + "\n") log_file.close() - if output_: print (str(text) + "\n") - -def listado(filename_): - """Obtiene una lista de paquetes de un archivo.""" - archivo = open(filename_,"r") - lista = archivo.read().split("\n") - archivo.close() - return [pkg.split(":")[0].rstrip() for pkg in lista if pkg] - -def db(repo_,arch_): - """Construye un nombre para sincronizar una base de datos.""" - return "/%s/os/%s/%s.db.tar.gz" % (repo_, arch_, repo_) - -def packages(repo_, arch_, expr="*"): - """ Get packages on a repo, arch folder """ - return tuple( glob( repodir + "/" + repo_ + "/os/" + arch_ + "/" + expr ) ) - -def sync_all_repo(debug=verbose): - cmd=generate_rsync_command(rsync_list_command) - rsout=run_rsync(cmd) - pkgs=pkginfo_from_rsync_output(rsout) - generate_exclude_list_from_blacklist(pkgs,listado(blacklist),debug=False) - cmd=generate_rsync_command(rsync_update_command,blacklist_file=rsync_blacklist) - a=run_rsync(cmd) - cmd=generate_rsync_command(rsync_post_command,blacklist_file=rsync_blacklist) - b=run_rsync(cmd) - if debug: - printf(a) - printf(b) - -def get_from_desc(desc, var,db_tar_file=False): - """ Get a var from desc file """ - desc = desc.split("\n") - return desc[desc.index(var)+1] - -def get_info(repo_,arch_,db_tar_file=False,verbose_=verbose): - """ Makes a list of package name, file and license """ - info=list() - # Extract DB tar.gz - commands.getoutput("mkdir -p " + archdb) - if not db_tar_file: - db_tar_file = repodir + db(repo_,arch_) - if isfile(db_tar_file): - try: - db_open_tar = tarfile.open(db_tar_file, 'r:gz') - except tarfile.ReadError: - printf("No valid db_file %s" % db_tar_file) - return(tuple()) - else: - printf("No db_file %s" % db_tar_file) - return(tuple()) - for file in db_open_tar.getmembers(): - db_open_tar.extract(file, archdb) - db_open_tar.close() - # Get info from file - for dir_ in glob(archdb + "/*"): - if isdir(dir_) and isfile(dir_ + "/desc"): - pkg_desc_file = open(dir_ + "/desc", "r") - desc = pkg_desc_file.read() - pkg_desc_file.close() - info.append(( get_from_desc(desc,"%NAME%"), - dir_.split("/")[-1], - get_from_desc(desc,"%LICENSE%") )) - if verbose_: printf(info) - commands.getoutput("rm -r %s/*" % archdb) - return tuple(info) - -def make_pending(repo_,arch_,info_): - """ Si los paquetes no están en blacklist ni whitelist y la licencia contiene "custom" los agrega a pending""" - search = tuple( listado(blacklist) + listado (whitelist) ) - if verbose: printf("blaclist + whitelist= " + str(search) ) - lista_=list() - for (name,pkg_,license_) in info_: - if "custom" in license_: - if name not in search: - lista_.append( (name, license_ ) ) - elif not name: - printf( pkg_ + " package has no %NAME% attibute " ) - if verbose: printf( lista_ ) - a=open( pending + "-" + repo_ + ".txt", "w" ).write( - "\n".join([name + ":" + license_ for (name,license_) in lista_]) + "\n") - -def remove_from_blacklist(repo_,arch_,info_,blacklist_): - """ Check the blacklist and remove packages on the db""" - lista_=list() - pack_=list() - for (name_, pkg_, license_) in info_: - if name_ in blacklist_: - lista_.append(name_) - for p in packages(repo_,arch_,pkg_ + "*"): - pack_.append(p) - if lista_: - lista_=" ".join(lista_) - com_ = "repo-remove " + repodir + db(repo_,arch_) + " " + lista_ - printf(com_) - a = commands.getoutput(com_) - if verbose: printf(a) - -def cleanup_nonfree_in_dir(directory,blacklisted_names): - pkgs=pkginfo_from_files_in_dir(directory) - for package in pkgs: - if package["name"] in blacklisted_names: - os.remove(package["location"]) - -def link(repo_,arch_,file_): - """ Makes a link in the repo for the package """ - cmd_="ln -f " + file_ + " " + repodir + "/" + repo_ + "/os/" + arch_ - a=commands.getoutput(cmd_) - if verbose: - printf(cmd_ + a) - -def add_free_repo(verbose_=verbose): - cmd_=os.path.join(home,"/usr/bin/sync-free") - printf(cmd_) - a=commands.getoutput(cmd_) - if verbose_: printf(a) - for repo_ in repo_list: - for arch_ in arch_list: - lista_=list() - for file_ in glob(freedir + repo_ + "/os/" + arch_ + "/*.pkg.tar.*"): - lista_.append(file_) - for dir_ in other: - for file_ in glob(freedir + repo_ + "/os/" + dir_ + "/*.pkg.tar.*"): - lista_.append(file_) - - printf(lista_) - - if lista_: - lista_=" ".join(lista_) - if verbose: printf(lista_) - cmd_="repo-add " + repodir + db(repo_,arch_) + " " + lista_ - printf(cmd_) - a=commands.getoutput(cmd_) - if verbose: printf(a) - -def get_licenses(verbose_=verbose): - """ Extract the license from packages in repo_,arch_ and in pending_ file""" - cmd_=home + "/usr/bin/get_license.sh" - printf(cmd_) - a=commands.getoutput(cmd_) - if verbose_: printf(a) - -def generate_rsync_command(base_command, dir_list=(repo_list + dir_list), destdir=repodir, - source=mirror+mirrorpath, blacklist_file=False): - """ Generates an rsync command for executing it by combining all parameters. + if output_: + print (str(text) + "\n") + +def generate_rsync_command(base_command, + dir_list=(config["repo_list"] + + config["dir_list"]), + destdir=config["repodir"], + source=config["mirror"] +config["mirrorpath"]): + """ Generates an rsync command for executing + it by combining all parameters. Parameters: ---------- @@ -192,57 +56,16 @@ def generate_rsync_command(base_command, dir_list=(repo_list + dir_list), destdi Return: ---------- rsync_command -> str """ - from os.path import isfile, isdir - - if blacklist_file and not isfile(blacklist_file): - print(blacklist_file + " is not a file") - raise NonValidFile - if not os.path.isdir(destdir): print(destdir + " is not a directory") raise NonValidDir dir_list="{" + ",".join(dir_list) + "}" + return " ".join((base_command, os.path.join(source, dir_list), + destdir)) - if blacklist_file: - return " ".join((base_command, "--exclude-from="+blacklist_file, - os.path.join(source, dir_list), destdir)) - return " ".join((base_command, os.path.join(source, dir_list), destdir)) - -def run_rsync(command,debug=verbose): +def run_rsync(command,debug=config["debug"]): """ Runs rsync and gets returns it's output """ if debug: printf("rsync_command: " + command) - return commands.getoutput(command) - -if __name__ == "__main__": - from time import time - start_time = time() - def minute(): - return str(round((time() - start_time)/60, 1)) - - printf(" Cleaning %s folder " % (tmp) ) - commands.getoutput("rm -r %s/*" % tmp) - printf(" Syncing repo") - sync_all_repo(True) - - printf(" Updating databases and pending files lists: minute %s \n" % minute() ) - for repo in repo_list: - for arch in arch_list: - printf( "\n" + repo + "-" + arch + "\n" ) - printf( "Get info: minute %s " % minute() ) - info=get_info(repo,arch) - printf( "Make pending: minute %s" % minute() ) - make_pending(repo,arch,info) - printf( "Update DB: minute %s" % minute() ) - remove_from_blacklist( - repo, arch, info, tuple( listado(blacklist) + listado(pending + "-" + repo + ".txt") ) ) - - printf("Adding Parabola Packages: minute %s\n" % minute() ) - add_free_repo(True) - - printf("Extracting licenses in pending: minute %s" % minute() ) - get_licenses() - - printf("\n\nDelay: %s minutes \n" % minute()) - + return check_output(command) diff --git a/test/test_filter.py b/test/test_filter.py index 1906b87..5601d57 100644 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -169,21 +169,21 @@ class pkginfo_from_db(unittest.TestCase): "release" : "2", "arch" : "x86_64", "license" : ("LGPL",), - "location": "acl-2.2.49-2-x86_64.pkg.tar.xz" + "location": "acl-2.2.49-2-x86_64.pkg.tar.xz", "depends" : ("attr>=2.4.41"),} example_package_list[1].package_info={ "name" : "glibc", "version" : "2.13", "release" : "4", "arch" : "x86_64", "license" : ("GPL","LGPL"), - "location": "glibc-2.13-4-x86_64.pkg.tar.xz" + "location": "glibc-2.13-4-x86_64.pkg.tar.xz", "depends" : ("linux-api-headers>=2.6.37","tzdata",),} example_package_list[2].package_info={ "name" : "", "version" : "2.2.26", "release" : "1", "arch" : "x86_64", "license" : False, - "location": "" + "location": "", "depends" : False,} -- cgit v1.1-4-g5e80