diff --git a/Makefile b/Makefile index 60bb9db..0b5738b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ PKGNAME = createrepo +ALIASES = mergerepo modifyrepo genpkgmetadata.py mergerepo.py modifyrepo.py VERSION=$(shell awk '/Version:/ { print $$2 }' ${PKGNAME}.spec) RELEASE=$(shell awk '/Release:/ { print $$2 }' ${PKGNAME}.spec) CVSTAG=createrepo-$(subst .,_,$(VERSION)-$(RELEASE)) @@ -26,6 +27,8 @@ docdir = includedir = ${prefix}/include oldincludedir = /usr/include mandir = ${prefix}/share/man +compdir = $(shell pkg-config --variable=completionsdir bash-completion) +compdir := $(or $(compdir), "/etc/bash_completion.d") pkgdatadir = $(datadir)/$(PKGNAME) pkglibdir = $(libdir)/$(PKGNAME) @@ -33,7 +36,7 @@ pkgincludedir = $(includedir)/$(PKGNAME) top_builddir = # all dirs -DIRS = $(DESTDIR)$(bindir) $(DESTDIR)$(sysconfdir)/bash_completion.d \ +DIRS = $(DESTDIR)$(bindir) $(DESTDIR)$(compdir) \ $(DESTDIR)$(pkgdatadir) $(DESTDIR)$(mandir) @@ -65,7 +68,8 @@ check: install: all installdirs $(INSTALL_MODULES) $(srcdir)/$(MODULES) $(DESTDIR)$(pkgdatadir) - $(INSTALL_DATA) $(PKGNAME).bash $(DESTDIR)$(sysconfdir)/bash_completion.d + $(INSTALL_DATA) $(PKGNAME).bash $(DESTDIR)$(compdir)/$(PKGNAME) + (cd $(DESTDIR)$(compdir); for n in $(ALIASES); do ln -s $(PKGNAME) $$n; done) for subdir in $(SUBDIRS) ; do \ $(MAKE) -C $$subdir install VERSION=$(VERSION) PKGNAME=$(PKGNAME); \ done diff --git a/createrepo.bash b/createrepo.bash index 54ac8b2..14b43d8 100644 --- a/createrepo.bash +++ b/createrepo.bash @@ -1,11 +1,22 @@ # bash completion for createrepo and friends +_cr_compress_type() +{ + COMPREPLY=( $( compgen -W "$( ${1:-createrepo} --compress-type=FOO / 2>&1 \ + | sed -ne 's/,/ /g' -ne 's/.*[Cc]ompression.*://p' )" -- "$2" ) ) +} + +_cr_checksum_type() +{ + COMPREPLY=( $( compgen -W 'md5 sha1 sha256 sha512' -- "$1" ) ) +} + _cr_createrepo() { COMPREPLY=() case $3 in - --version|-h|--help|-u|--baseurl|--distro|--content|--repo|--workers|\ + --version|-h|--help|-u|--baseurl|--distro|--content|--repo|\ --revision|-x|--excludes|--changelog-limit|--max-delta-rpm-size) return 0 ;; @@ -18,8 +29,8 @@ _cr_createrepo() COMPREPLY=( $( compgen -f -o plusdirs -X '!*.xml' -- "$2" ) ) return 0 ;; - -s|--sumtype) - COMPREPLY=( $( compgen -W 'md5 sha1 sha256 sha512' -- "$2" ) ) + -s|--checksum) + _cr_checksum_type "$2" return 0 ;; -i|--pkglist|--read-pkgs-list) @@ -30,10 +41,24 @@ _cr_createrepo() COMPREPLY=( $( compgen -f -o plusdirs -X '!*.rpm' -- "$2" ) ) return 0 ;; + --retain-old-md) + COMPREPLY=( $( compgen -W '0 1 2 3 4 5 6 7 8 9' -- "$2" ) ) + return 0 + ;; --num-deltas) COMPREPLY=( $( compgen -W '1 2 3 4 5 6 7 8 9' -- "$2" ) ) return 0 ;; + --workers) + local min=2 max=$( getconf _NPROCESSORS_ONLN 2>/dev/null ) + [[ -z $max || $max -lt $min ]] && max=$min + COMPREPLY=( $( compgen -W "{1..$max}" -- "$2" ) ) + return 0 + ;; + --compress-type) + _cr_compress_type "$1" "$2" + return 0 + ;; esac if [[ $2 == -* ]] ; then @@ -42,9 +67,9 @@ _cr_createrepo() --cachedir --checkts --no-database --update --update-md-path --skip-stat --split --pkglist --includepkg --outputdir --skip-symlinks --changelog-limit --unique-md-filenames - --simple-md-filenames --distro --content --repo --revision --deltas - --oldpackagedirs --num-deltas --read-pkgs-list - --max-delta-rpm-size --workers' -- "$2" ) ) + --simple-md-filenames --retain-old-md --distro --content --repo + --revision --deltas --oldpackagedirs --num-deltas --read-pkgs-list + --max-delta-rpm-size --workers --compress-type' -- "$2" ) ) else COMPREPLY=( $( compgen -d -- "$2" ) ) fi @@ -63,10 +88,14 @@ _cr_mergerepo() COMPREPLY=( $( compgen -d -- "$2" ) ) return 0 ;; + --compress-type) + _cr_compress_type "" "$2" + return 0 + ;; esac COMPREPLY=( $( compgen -W '--version --help --repo --archlist --no-database - --outputdir --nogroups --noupdateinfo' -- "$2" ) ) + --outputdir --nogroups --noupdateinfo --compress-type' -- "$2" ) ) } && complete -F _cr_mergerepo -o filenames mergerepo mergerepo.py @@ -78,17 +107,27 @@ _cr_modifyrepo() --version|-h|--help|--mdtype) return 0 ;; + --compress-type) + _cr_compress_type "" "$2" + return 0 + ;; + -s|--checksum) + _cr_checksum_type "$2" + return 0 + ;; esac if [[ $2 == -* ]] ; then - COMPREPLY=( $( compgen -W '--version --help --mdtype' -- "$2" ) ) + COMPREPLY=( $( compgen -W '--version --help --mdtype --remove + --compress --no-compress --compress-type --checksum + --unique-md-filenames --simple-md-filenames' -- "$2" ) ) return 0 fi local i argnum=1 for (( i=1; i < ${#COMP_WORDS[@]}-1; i++ )) ; do if [[ ${COMP_WORDS[i]} != -* && - ${COMP_WORDS[i-1]} != @(=|--mdtype) ]]; then + ${COMP_WORDS[i-1]} != @(=|--@(md|compress-)type) ]]; then argnum=$(( argnum+1 )) fi done diff --git a/createrepo.spec b/createrepo.spec index 1e491cd..9a2179b 100644 --- a/createrepo.spec +++ b/createrepo.spec @@ -1,5 +1,17 @@ %{!?python_sitelib: %define python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")} +%if ! 0%{?rhel} +# we don't have this in rhel yet... +BuildRequires: bash-completion +%endif + +# disable broken /usr/lib/rpm/brp-python-bytecompile +%define __os_install_post %{nil} +%define compdir %(pkg-config --variable=completionsdir bash-completion) +%if "%{compdir}" == "" +%define compdir "/etc/bash_completion.d" +%endif + Summary: Creates a common metadata repository Name: createrepo Version: 0.9.9 @@ -11,7 +23,7 @@ URL: http://createrepo.baseurl.org/ BuildRoot: %{_tmppath}/%{name}-%{version}root BuildArchitectures: noarch Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python -Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm +Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm, pyliblzma %description This utility will generate a common metadata repository from a directory of @@ -32,7 +44,7 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install %defattr(-, root, root) %dir %{_datadir}/%{name} %doc ChangeLog README COPYING COPYING.lib -%{_sysconfdir}/bash_completion.d/ +%(dirname %{compdir}) %{_datadir}/%{name}/* %{_bindir}/%{name} %{_bindir}/modifyrepo @@ -43,6 +55,9 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install %{python_sitelib}/createrepo %changelog +* Fri Sep 9 2011 Seth Vidal +- add lzma dep + * Wed Jan 26 2011 Seth Vidal - bump to 0.9.9 - add worker.py diff --git a/createrepo/__init__.py b/createrepo/__init__.py index 8f2538e..1b18a9f 100644 --- a/createrepo/__init__.py +++ b/createrepo/__init__.py @@ -26,15 +26,16 @@ import tempfile import stat import fcntl import subprocess +from select import select -from yum import misc, Errors, to_unicode -from yum.repoMDObject import RepoMD, RepoMDError, RepoData +from yum import misc, Errors +from yum.repoMDObject import RepoMD, RepoData from yum.sqlutils import executeSQL from yum.packageSack import MetaSack -from yum.packages import YumAvailablePackage, YumLocalPackage +from yum.packages import YumAvailablePackage import rpmUtils.transaction -from utils import _, errorprint, MDError +from utils import _, errorprint, MDError, lzma, _available_compression import readMetadata try: import sqlite3 as sqlite @@ -46,8 +47,9 @@ try: except ImportError: pass -from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \ +from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \ checksum_and_rename, split_list_into_equal_chunks +from utils import num_cpus_online import deltarpms __version__ = '0.9.9' @@ -74,7 +76,7 @@ class MetaDataConfig(object): self.deltadir = None self.delta_relative = 'drpms/' self.oldpackage_paths = [] # where to look for the old packages - - self.deltafile = 'prestodelta.xml.gz' + self.deltafile = 'prestodelta.xml' self.num_deltas = 1 # number of older versions to delta (max) self.max_delta_rpm_size = 100000000 self.update_md_path = None @@ -86,9 +88,9 @@ class MetaDataConfig(object): self.skip_symlinks = False self.pkglist = [] self.database_only = False - self.primaryfile = 'primary.xml.gz' - self.filelistsfile = 'filelists.xml.gz' - self.otherfile = 'other.xml.gz' + self.primaryfile = 'primary.xml' + self.filelistsfile = 'filelists.xml' + self.otherfile = 'other.xml' self.repomdfile = 'repomd.xml' self.tempdir = '.repodata' self.finaldir = 'repodata' @@ -108,8 +110,10 @@ class MetaDataConfig(object): self.collapse_glibc_requires = True self.workers = 1 # number of workers to fork off to grab metadata from the pkgs self.worker_cmd = '/usr/share/createrepo/worker.py' - #self.worker_cmd = './worker.py' # helpful when testing + self.retain_old_md = 0 + self.compress_type = 'compat' + class SimpleMDCallBack(object): def errorlog(self, thing): @@ -141,10 +145,23 @@ class MetaDataGenerator: self.files = [] self.rpmlib_reqs = {} self.read_pkgs = [] + self.compat_compress = False if not self.conf.directory and not self.conf.directories: raise MDError, "No directory given on which to run." - + + if self.conf.compress_type == 'compat': + self.compat_compress = True + self.conf.compress_type = None + + if not self.conf.compress_type: + self.conf.compress_type = 'gz' + + if self.conf.compress_type not in utils._available_compression: + raise MDError, "Compression %s not available: Please choose from: %s" \ + % (self.conf.compress_type, ', '.join(utils._available_compression)) + + if not self.conf.directories: # just makes things easier later self.conf.directories = [self.conf.directory] if not self.conf.directory: # ensure we have both in the config object @@ -290,14 +307,13 @@ class MetaDataGenerator: def extension_visitor(filelist, dirname, names): for fn in names: + fn = os.path.join(dirname, fn) if os.path.isdir(fn): continue if self.conf.skip_symlinks and os.path.islink(fn): continue elif fn[-extlen:].lower() == '%s' % (ext): - relativepath = dirname.replace(startdir, "", 1) - relativepath = relativepath.lstrip("/") - filelist.append(os.path.join(relativepath, fn)) + filelist.append(fn[len(startdir):]) filelist = [] startdir = directory + '/' @@ -311,7 +327,7 @@ class MetaDataGenerator: def checkTimeStamps(self): """check the timestamp of our target dir. If it is not newer than the repodata return False, else True""" - if self.conf.checkts: + if self.conf.checkts and self.conf.mdtimestamp: dn = os.path.join(self.conf.basedir, self.conf.directory) files = self.getFileList(dn, '.rpm') files = self.trimRpms(files) @@ -410,9 +426,11 @@ class MetaDataGenerator: def _setupPrimary(self): # setup the primary metadata file + # FIXME - make this be conf.compress_type once y-m-p is fixed + fpz = self.conf.primaryfile + '.' + 'gz' primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, - self.conf.primaryfile) - fo = _gzipOpen(primaryfilepath, 'w') + fpz) + fo = compressOpen(primaryfilepath, 'w', 'gz') fo.write('\n') fo.write('' % @@ -421,9 +439,11 @@ class MetaDataGenerator: def _setupFilelists(self): # setup the filelist file + # FIXME - make this be conf.compress_type once y-m-p is fixed + fpz = self.conf.filelistsfile + '.' + 'gz' filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir, - self.conf.filelistsfile) - fo = _gzipOpen(filelistpath, 'w') + fpz) + fo = compressOpen(filelistpath, 'w', 'gz') fo.write('\n') fo.write('' % self.pkgcount) @@ -431,9 +451,11 @@ class MetaDataGenerator: def _setupOther(self): # setup the other file + # FIXME - make this be conf.compress_type once y-m-p is fixed + fpz = self.conf.otherfile + '.' + 'gz' otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, - self.conf.otherfile) - fo = _gzipOpen(otherfilepath, 'w') + fpz) + fo = compressOpen(otherfilepath, 'w', 'gz') fo.write('\n') fo.write('' % @@ -442,9 +464,10 @@ class MetaDataGenerator: def _setupDelta(self): # setup the other file + fpz = self.conf.deltafile + '.' + self.conf.compress_type deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, - self.conf.deltafile) - fo = _gzipOpen(deltafilepath, 'w') + fpz) + fo = compressOpen(deltafilepath, 'w', self.conf.compress_type) fo.write('\n') fo.write('\n') return fo @@ -520,6 +543,7 @@ class MetaDataGenerator: # go on their merry way newpkgs = [] + keptpkgs = [] if self.conf.update: # if we're in --update mode then only act on the new/changed pkgs for pkg in pkglist: @@ -530,39 +554,13 @@ class MetaDataGenerator: old_pkg = pkg if pkg.find("://") != -1: old_pkg = os.path.basename(pkg) - nodes = self.oldData.getNodes(old_pkg) - if nodes is not None: # we have a match in the old metadata + old_po = self.oldData.getNodes(old_pkg) + if old_po: # we have a match in the old metadata if self.conf.verbose: self.callback.log(_("Using data from old metadata for %s") % pkg) - (primarynode, filenode, othernode) = nodes - - for node, outfile in ((primarynode, self.primaryfile), - (filenode, self.flfile), - (othernode, self.otherfile)): - if node is None: - break - - if self.conf.baseurl: - anode = node.children - while anode is not None: - if anode.type != "element": - anode = anode.next - continue - if anode.name == "location": - anode.setProp('xml:base', self.conf.baseurl) - anode = anode.next - - output = node.serialize('UTF-8', self.conf.pretty) - if output: - outfile.write(output) - else: - if self.conf.verbose: - self.callback.log(_("empty serialize on write to" \ - "%s in %s") % (outfile, pkg)) - outfile.write('\n') - - self.oldData.freeNodes(pkg) + keptpkgs.append((pkg, old_po)) + #FIXME - if we're in update and we have deltas enabled # check the presto data for this pkg and write its info back out # to our deltafile @@ -584,32 +582,45 @@ class MetaDataGenerator: po = None if isinstance(pkg, YumAvailablePackage): po = pkg - self.read_pkgs.append(po.localpath) + self.read_pkgs.append(po.localPkg()) # if we're dealing with remote pkgs - pitch it over to doing # them one at a time, for now. elif pkg.find('://') != -1: - po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) + po = self.read_in_package(pkg, pkgpath=pkgpath, reldir=reldir) self.read_pkgs.append(pkg) if po: - self.primaryfile.write(po.xml_dump_primary_metadata()) - self.flfile.write(po.xml_dump_filelists_metadata()) - self.otherfile.write(po.xml_dump_other_metadata( - clog_limit=self.conf.changelog_limit)) + keptpkgs.append((pkg, po)) continue pkgfiles.append(pkg) - - + + keptpkgs.sort(reverse=True) + # keptkgs is a list of (filename, po), pkgfiles is a list if filenames. + # Need to write them in sorted(filename) order. We loop over pkgfiles, + # inserting keptpkgs in right spots (using the upto argument). + def save_keptpkgs(upto): + while keptpkgs and (upto is None or keptpkgs[-1][0] < upto): + filename, po = keptpkgs.pop() + # reset baseurl in the old pkg + po.basepath = self.conf.baseurl + self.primaryfile.write(po.xml_dump_primary_metadata()) + self.flfile.write(po.xml_dump_filelists_metadata()) + self.otherfile.write(po.xml_dump_other_metadata( + clog_limit=self.conf.changelog_limit)) + if pkgfiles: # divide that list by the number of workers and fork off that many # workers to tmpdirs # waitfor the workers to finish and as each one comes in # open the files they created and write them out to our metadata # add up the total pkg counts and return that value - worker_tmp_path = tempfile.mkdtemp() - worker_chunks = utils.split_list_into_equal_chunks(pkgfiles, self.conf.workers) + self._worker_tmp_path = tempfile.mkdtemp() # setting this in the base object so we can clean it up later + if self.conf.workers < 1: + self.conf.workers = num_cpus_online() + pkgfiles.sort() + worker_chunks = split_list_into_equal_chunks(pkgfiles, self.conf.workers) worker_cmd_dict = {} worker_jobs = {} base_worker_cmdline = [self.conf.worker_cmd, @@ -617,7 +628,8 @@ class MetaDataGenerator: '--pkgoptions=_collapse_libc_requires=%s' % self.conf.collapse_glibc_requires, '--pkgoptions=_cachedir=%s' % self.conf.cachedir, '--pkgoptions=_baseurl=%s' % self.conf.baseurl, - '--globalopts=clog_limit=%s' % self.conf.changelog_limit,] + '--globalopts=clog_limit=%s' % self.conf.changelog_limit, + '--globalopts=sumtype=%s' % self.conf.sumtype, ] if self.conf.quiet: base_worker_cmdline.append('--quiet') @@ -626,15 +638,14 @@ class MetaDataGenerator: base_worker_cmdline.append('--verbose') for worker_num in range(self.conf.workers): - # make the worker directory + pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num + f = open(pkl, 'w') + f.write('\n'.join(worker_chunks[worker_num])) + f.close() + workercmdline = [] workercmdline.extend(base_worker_cmdline) - thisdir = worker_tmp_path + '/' + str(worker_num) - if checkAndMakeDir(thisdir): - workercmdline.append('--tmpmdpath=%s' % thisdir) - else: - raise MDError, "Unable to create worker path: %s" % thisdir - workercmdline.extend(worker_chunks[worker_num]) + workercmdline.append('--pkglist=%s/pkglist-%s' % (self._worker_tmp_path, worker_num)) worker_cmd_dict[worker_num] = workercmdline @@ -647,49 +658,60 @@ class MetaDataGenerator: stderr=subprocess.PIPE) worker_jobs[num] = job - gimmebreak = 0 - while gimmebreak != len(worker_jobs.keys()): - gimmebreak = 0 - for (num,job) in worker_jobs.items(): - if job.poll() is not None: - gimmebreak+=1 - line = job.stdout.readline() - if line: + files = self.primaryfile, self.flfile, self.otherfile + def log_messages(num): + job = worker_jobs[num] + while True: + # check stdout and stderr + for stream in select((job.stdout, job.stderr), (), ())[0]: + line = stream.readline() + if line: break + else: + return # EOF, EOF + if stream is job.stdout: + if line.startswith('*** '): + # get data, save to local files + for out, size in zip(files, line[4:].split()): + out.write(stream.read(int(size))) + return self.callback.log('Worker %s: %s' % (num, line.rstrip())) - line = job.stderr.readline() - if line: + else: self.callback.errorlog('Worker %s: %s' % (num, line.rstrip())) + + for i, pkg in enumerate(pkgfiles): + # insert cached packages + save_keptpkgs(pkg) + + # save output to local files + log_messages(i % self.conf.workers) + + for (num, job) in worker_jobs.items(): + # process remaining messages on stderr + log_messages(num) + + if job.wait() != 0: + msg = "Worker exited with non-zero value: %s. Fatal." % job.returncode + self.callback.errorlog(msg) + raise MDError, msg - if not self.conf.quiet: self.callback.log("Workers Finished") - # finished with workers - # go to their dirs and add the contents - if not self.conf.quiet: - self.callback.log("Gathering worker results") - for num in range(self.conf.workers): - for (fn, fo) in (('primary.xml', self.primaryfile), - ('filelists.xml', self.flfile), - ('other.xml', self.otherfile)): - fnpath = worker_tmp_path + '/' + str(num) + '/' + fn - if os.path.exists(fnpath): - fo.write(open(fnpath, 'r').read()) - for pkgfile in pkgfiles: if self.conf.deltas: - po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) - self._do_delta_rpm_package(po) + try: + po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) + self._do_delta_rpm_package(po) + except MDError, e: + errorprint(e) + continue self.read_pkgs.append(pkgfile) + save_keptpkgs(None) # append anything left return self.current_pkg def closeMetadataDocs(self): - if not self.conf.quiet: - self.callback.log('') - - # save them up to the tmp locations: if not self.conf.quiet: self.callback.log(_('Saving Primary metadata')) @@ -784,7 +806,6 @@ class MetaDataGenerator: return self._old_package_dict self._old_package_dict = {} - opl = [] for d in self.conf.oldpackage_paths: for f in self.getFileList(d, '.rpm'): fp = d + '/' + f @@ -833,7 +854,7 @@ class MetaDataGenerator: return ' '.join(results) def _createRepoDataObject(self, mdfile, mdtype, compress=True, - compress_type='gzip', attribs={}): + compress_type=None, attribs={}): """return random metadata as RepoData object to be added to RepoMD mdfile = complete path to file mdtype = the metadata type to use @@ -843,15 +864,13 @@ class MetaDataGenerator: sfile = os.path.basename(mdfile) fo = open(mdfile, 'r') outdir = os.path.join(self.conf.outputdir, self.conf.tempdir) + if not compress_type: + compress_type = self.conf.compress_type if compress: - if compress_type == 'gzip': - sfile = '%s.gz' % sfile - outfn = os.path.join(outdir, sfile) - output = GzipFile(filename = outfn, mode='wb') - elif compress_type == 'bzip2': - sfile = '%s.bz2' % sfile - outfn = os.path.join(outdir, sfile) - output = BZ2File(filename = outfn, mode='wb') + sfile = '%s.%s' % (sfile, compress_type) + outfn = os.path.join(outdir, sfile) + output = compressOpen(outfn, mode='wb', compress_type=compress_type) + else: outfn = os.path.join(outdir, sfile) output = open(outfn, 'w') @@ -874,14 +893,13 @@ class MetaDataGenerator: thisdata = RepoData() thisdata.type = mdtype - baseloc = None thisdata.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, sfile)) thisdata.checksum = (self.conf.sumtype, csum) if compress: thisdata.openchecksum = (self.conf.sumtype, open_csum) thisdata.size = str(os.stat(outfn).st_size) - thisdata.timestamp = str(os.stat(outfn).st_mtime) + thisdata.timestamp = str(int(os.stat(outfn).st_mtime)) for (k, v) in attribs.items(): setattr(thisdata, k, str(v)) @@ -925,9 +943,14 @@ class MetaDataGenerator: rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None) for (rpm_file, ftype) in workfiles: + # when we fix y-m-p and non-gzipped xml files - then we can make this just add + # self.conf.compress_type + if ftype in ('other', 'filelists', 'primary'): + rpm_file = rpm_file + '.' + 'gz' + elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression: + rpm_file = rpm_file + '.' + self.conf.compress_type complete_path = os.path.join(repopath, rpm_file) - - zfo = _gzipOpen(complete_path) + zfo = compressOpen(complete_path) # This is misc.checksum() done locally so we can get the size too. data = misc.Checksums([sumtype]) while data.read(zfo, 2**16): @@ -966,14 +989,20 @@ class MetaDataGenerator: good_name = '%s.sqlite' % ftype resultpath = os.path.join(repopath, good_name) + # compat compression for rhel5 compatibility from fedora :( + compress_type = self.conf.compress_type + if self.compat_compress: + compress_type = 'bz2' + # rename from silly name to not silly name os.rename(tmp_result_path, resultpath) - compressed_name = '%s.bz2' % good_name + compressed_name = '%s.%s' % (good_name, compress_type) result_compressed = os.path.join(repopath, compressed_name) db_csums[ftype] = misc.checksum(sumtype, resultpath) # compress the files - bzipFile(resultpath, result_compressed) + + compressFile(resultpath, result_compressed, compress_type) # csum the compressed file db_compressed_sums[ftype] = misc.checksum(sumtype, result_compressed) @@ -983,8 +1012,8 @@ class MetaDataGenerator: os.unlink(resultpath) if self.conf.unique_md_filenames: - csum_compressed_name = '%s-%s.bz2' % ( - db_compressed_sums[ftype], good_name) + csum_compressed_name = '%s-%s.%s' % ( + db_compressed_sums[ftype], good_name, compress_type) csum_result_compressed = os.path.join(repopath, csum_compressed_name) os.rename(result_compressed, csum_result_compressed) @@ -1001,7 +1030,7 @@ class MetaDataGenerator: data.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, compressed_name)) data.checksum = (sumtype, db_compressed_sums[ftype]) - data.timestamp = str(db_stat.st_mtime) + data.timestamp = str(int(db_stat.st_mtime)) data.size = str(db_stat.st_size) data.opensize = str(un_stat.st_size) data.openchecksum = (sumtype, db_csums[ftype]) @@ -1020,7 +1049,13 @@ class MetaDataGenerator: data.openchecksum = (sumtype, uncsum) if self.conf.unique_md_filenames: - res_file = '%s-%s.xml.gz' % (csum, ftype) + if ftype in ('primary', 'filelists', 'other'): + compress = 'gz' + else: + compress = self.conf.compress_type + + main_name = '.'.join(rpm_file.split('.')[:-1]) + res_file = '%s-%s.%s' % (csum, main_name, compress) orig_file = os.path.join(repopath, rpm_file) dest_file = os.path.join(repopath, res_file) os.rename(orig_file, dest_file) @@ -1046,7 +1081,7 @@ class MetaDataGenerator: if self.conf.additional_metadata: - for md_type, mdfile in self.conf.additional_metadata.items(): + for md_type, md_file in self.conf.additional_metadata.items(): mdcontent = self._createRepoDataObject(md_file, md_type) repomd.repoData[mdcontent.type] = mdcontent @@ -1110,23 +1145,43 @@ class MetaDataGenerator: raise MDError, _( 'Could not remove old metadata file: %s: %s') % (oldfile, e) - # Move everything else back from olddir (eg. repoview files) - try: - old_contents = os.listdir(output_old_dir) - except (OSError, IOError), e: - old_contents = [] - + old_to_remove = [] + old_pr = [] + old_fl = [] + old_ot = [] + old_pr_db = [] + old_fl_db = [] + old_ot_db = [] for f in os.listdir(output_old_dir): oldfile = os.path.join(output_old_dir, f) finalfile = os.path.join(output_final_dir, f) - if f.find('-') != -1 and f.split('-')[1] in ('primary.sqlite.bz2', - 'filelists.sqlite.bz2', 'primary.xml.gz','other.sqlite.bz2', - 'other.xml.gz','filelists.xml.gz'): - os.remove(oldfile) # kill off the old ones - continue - if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2', - 'primary.sqlite.bz2'): - os.remove(oldfile) + + for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr), + ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl), + ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)): + fn = '.'.join(f.split('.')[:-1]) + if fn.endswith(end): + lst.append(oldfile) + break + + # make a list of the old metadata files we don't want to remove. + for lst in (old_pr, old_fl, old_ot, old_pr_db, old_fl_db, old_ot_db): + sortlst = sorted(lst, key=lambda x: os.path.getmtime(x), + reverse=True) + for thisf in sortlst[self.conf.retain_old_md:]: + old_to_remove.append(thisf) + + for f in os.listdir(output_old_dir): + oldfile = os.path.join(output_old_dir, f) + finalfile = os.path.join(output_final_dir, f) + fn = '.'.join(f.split('.')[:-1]) + if fn in ('filelists.sqlite', 'other.sqlite', + 'primary.sqlite') or oldfile in old_to_remove: + try: + os.remove(oldfile) + except (OSError, IOError), e: + raise MDError, _( + 'Could not remove old metadata file: %s: %s') % (oldfile, e) continue if os.path.exists(finalfile): @@ -1147,14 +1202,19 @@ class MetaDataGenerator: msg += _('Error was %s') % e raise MDError, msg - try: - os.rmdir(output_old_dir) - except OSError, e: - self.errorlog(_('Could not remove old metadata dir: %s') - % self.conf.olddir) - self.errorlog(_('Error was %s') % e) - self.errorlog(_('Please clean up this directory manually.')) + self._cleanup_tmp_repodata_dir() + self._cleanup_update_tmp_dir() + self._write_out_read_pkgs_list() + + def _cleanup_update_tmp_dir(self): + if not self.conf.update: + return + + shutil.rmtree(self.oldData._repo.basecachedir, ignore_errors=True) + shutil.rmtree(self.oldData._repo.base_persistdir, ignore_errors=True) + + def _write_out_read_pkgs_list(self): # write out the read_pkgs_list file with self.read_pkgs if self.conf.read_pkgs_list: try: @@ -1167,6 +1227,23 @@ class MetaDataGenerator: % self.conf.read_pkgs_list) self.errorlog(_('Error was %s') % e) + def _cleanup_tmp_repodata_dir(self): + output_old_dir = os.path.join(self.conf.outputdir, self.conf.olddir) + output_temp_dir = os.path.join(self.conf.outputdir, self.conf.tempdir) + for dirbase in (self.conf.olddir, self.conf.tempdir): + dirpath = os.path.join(self.conf.outputdir, dirbase) + if os.path.exists(dirpath): + try: + os.rmdir(dirpath) + except OSError, e: + self.errorlog(_('Could not remove temp metadata dir: %s') + % dirbase) + self.errorlog(_('Error was %s') % e) + self.errorlog(_('Please clean up this directory manually.')) + # our worker tmp path + if hasattr(self, '_worker_tmp_path') and os.path.exists(self._worker_tmp_path): + shutil.rmtree(self._worker_tmp_path, ignore_errors=True) + def setup_sqlite_dbs(self, initdb=True): """sets up the sqlite dbs w/table schemas and db_infos""" destdir = os.path.join(self.conf.outputdir, self.conf.tempdir) @@ -1194,24 +1271,6 @@ class SplitMetaDataGenerator(MetaDataGenerator): (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url) return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) - def getFileList(self, directory, ext): - - extlen = len(ext) - - def extension_visitor(arg, dirname, names): - for fn in names: - if os.path.isdir(fn): - continue - elif fn[-extlen:].lower() == '%s' % (ext): - reldir = os.path.basename(dirname) - if reldir == os.path.basename(directory): - reldir = "" - arg.append(os.path.join(reldir, fn)) - - rpmlist = [] - os.path.walk(directory, extension_visitor, rpmlist) - return rpmlist - def doPkgMetadata(self): """all the heavy lifting for the package metadata""" if len(self.conf.directories) == 1: @@ -1232,6 +1291,19 @@ class SplitMetaDataGenerator(MetaDataGenerator): thisdir = os.path.join(self.conf.basedir, mydir) filematrix[mydir] = self.getFileList(thisdir, '.rpm') + + # pkglist is a bit different for split media, as we have to know + # which dir. it belongs to. So we walk the dir. and then filter. + # We could be faster by not walking the dir. ... but meh. + if self.conf.pkglist: + pkglist = set(self.conf.pkglist) + pkgs = [] + for fname in filematrix[mydir]: + if fname not in pkglist: + continue + pkgs.append(fname) + filematrix[mydir] = pkgs + self.trimRpms(filematrix[mydir]) self.pkgcount += len(filematrix[mydir]) @@ -1240,7 +1312,6 @@ class SplitMetaDataGenerator(MetaDataGenerator): self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) try: self.openMetadataDocs() - original_basedir = self.conf.basedir for mydir in self.conf.directories: self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) self.writeMetadataDocs(filematrix[mydir], mydir) diff --git a/createrepo/merge.py b/createrepo/merge.py index b3b2ea1..1ac43bb 100644 --- a/createrepo/merge.py +++ b/createrepo/merge.py @@ -24,6 +24,7 @@ from yum.misc import unique, getCacheDir import yum.update_md import rpmUtils.arch import operator +from utils import MDError import createrepo import tempfile @@ -84,6 +85,8 @@ class RepoMergeBase: # in the repolist count = 0 for r in self.repolist: + if r[0] == '/': + r = 'file://' + r # just fix the file repos, this is silly. count +=1 rid = 'repo%s' % count n = self.yumbase.add_enable_repo(rid, baseurls=[r], @@ -92,7 +95,10 @@ class RepoMergeBase: n._merge_rank = count #setup our sacks - self.yumbase._getSacks(archlist=self.archlist) + try: + self.yumbase._getSacks(archlist=self.archlist) + except yum.Errors.RepoError, e: + raise MDError, "Could not setup merge repo pkgsack: %s" % e myrepos = self.yumbase.repos.listEnabled() @@ -102,11 +108,16 @@ class RepoMergeBase: def write_metadata(self, outputdir=None): mytempdir = tempfile.mkdtemp() if self.groups: - comps_fn = mytempdir + '/groups.xml' - compsfile = open(comps_fn, 'w') - compsfile.write(self.yumbase.comps.xml()) - compsfile.close() - self.mdconf.groupfile=comps_fn + try: + comps_fn = mytempdir + '/groups.xml' + compsfile = open(comps_fn, 'w') + compsfile.write(self.yumbase.comps.xml()) + compsfile.close() + except yum.Errors.GroupsError, e: + # groups not being available shouldn't be a fatal error + pass + else: + self.mdconf.groupfile=comps_fn if self.updateinfo: ui_fn = mytempdir + '/updateinfo.xml' diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py index 27d3690..54863cb 100644 --- a/createrepo/readMetadata.py +++ b/createrepo/readMetadata.py @@ -16,11 +16,25 @@ # Copyright 2006 Red Hat import os -import libxml2 import stat from utils import errorprint, _ -from yum import repoMDObject +import yum +from yum import misc +from yum.Errors import YumBaseError +import tempfile +class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite): + # special for special people like us. + def _return_remote_location(self): + + if self.basepath: + msg = """\n""" % ( + misc.to_xml(self.basepath, attrib=True), + misc.to_xml(self.relativepath, attrib=True)) + else: + msg = """\n""" % misc.to_xml(self.relativepath, attrib=True) + + return msg class MetadataIndex(object): @@ -30,178 +44,72 @@ class MetadataIndex(object): opts = {} self.opts = opts self.outputdir = outputdir + realpath = os.path.realpath(outputdir) repodatadir = self.outputdir + '/repodata' - myrepomdxml = repodatadir + '/repomd.xml' - if os.path.exists(myrepomdxml): - repomd = repoMDObject.RepoMD('garbageid', myrepomdxml) - b = repomd.getData('primary').location[1] - f = repomd.getData('filelists').location[1] - o = repomd.getData('other').location[1] - basefile = os.path.join(self.outputdir, b) - filelistfile = os.path.join(self.outputdir, f) - otherfile = os.path.join(self.outputdir, o) - else: - basefile = filelistfile = otherfile = "" - - self.files = {'base' : basefile, - 'filelist' : filelistfile, - 'other' : otherfile} - self.scan() + self._repo = yum.yumRepo.YumRepository('garbageid') + self._repo.baseurl = 'file://' + realpath + self._repo.basecachedir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo") + self._repo.base_persistdir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo-p") + self._repo.metadata_expire = 1 + self._repo.gpgcheck = 0 + self._repo.repo_gpgcheck = 0 + self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld) + self.pkg_tups_by_path = {} + try: + self.scan() + except YumBaseError, e: + print "Could not find valid repo at: %s" % self.outputdir + def scan(self): - """Read in and index old repo data""" - self.basenodes = {} - self.filesnodes = {} - self.othernodes = {} - self.pkg_ids = {} + """Read in old repodata""" if self.opts.get('verbose'): print _("Scanning old repo data") - for fn in self.files.values(): - if not os.path.exists(fn): - #cannot scan - errorprint(_("Warning: Old repodata file missing: %s") % fn) - return - root = libxml2.parseFile(self.files['base']).getRootElement() - self._scanPackageNodes(root, self._handleBase) - if self.opts.get('verbose'): - print _("Indexed %i base nodes" % len(self.basenodes)) - root = libxml2.parseFile(self.files['filelist']).getRootElement() - self._scanPackageNodes(root, self._handleFiles) - if self.opts.get('verbose'): - print _("Indexed %i filelist nodes" % len(self.filesnodes)) - root = libxml2.parseFile(self.files['other']).getRootElement() - self._scanPackageNodes(root, self._handleOther) - if self.opts.get('verbose'): - print _("Indexed %i other nodes" % len(self.othernodes)) - #reverse index pkg ids to track references - self.pkgrefs = {} - for relpath, pkgid in self.pkg_ids.iteritems(): - self.pkgrefs.setdefault(pkgid,[]).append(relpath) - - def _scanPackageNodes(self, root, handler): - node = root.children - while node is not None: - if node.type != "element": - node = node.next + self._repo.sack.populate(self._repo, 'all', None, False) + for thispo in self._repo.sack: + mtime = thispo.filetime + size = thispo.size + relpath = thispo.relativepath + do_stat = self.opts.get('do_stat', True) + if mtime is None: + print _("mtime missing for %s") % relpath continue - if node.name == "package": - handler(node) - node = node.next - - def _handleBase(self, node): - top = node - node = node.children - pkgid = None - mtime = None - size = None - relpath = None - do_stat = self.opts.get('do_stat', True) - while node is not None: - if node.type != "element": - node = node.next + if size is None: + print _("size missing for %s") % relpath continue - if node.name == "checksum": - pkgid = node.content - elif node.name == "time": - mtime = int(node.prop('file')) - elif node.name == "size": - size = int(node.prop('package')) - elif node.name == "location": - relpath = node.prop('href') - node = node.next - if relpath is None: - print _("Incomplete data for node") - return - if pkgid is None: - print _("pkgid missing for %s") % relpath - return - if mtime is None: - print _("mtime missing for %s") % relpath - return - if size is None: - print _("size missing for %s") % relpath - return - if do_stat: - filepath = os.path.join(self.opts['pkgdir'], relpath) - try: - st = os.stat(filepath) - except OSError: - #file missing -- ignore - return - if not stat.S_ISREG(st.st_mode): - #ignore non files - return - #check size and mtime - if st.st_size != size: - if self.opts.get('verbose'): - print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) - return - if int(st.st_mtime) != mtime: - if self.opts.get('verbose'): - print _("Modification time changed for %s") % filepath - return - #otherwise we index - self.basenodes[relpath] = top - self.pkg_ids[relpath] = pkgid - - def _handleFiles(self, node): - pkgid = node.prop('pkgid') - if pkgid: - self.filesnodes[pkgid] = node - - def _handleOther(self, node): - pkgid = node.prop('pkgid') - if pkgid: - self.othernodes[pkgid] = node + if do_stat: + filepath = os.path.join(self.opts['pkgdir'], relpath) + try: + st = os.stat(filepath) + except OSError: + #file missing -- ignore + continue + if not stat.S_ISREG(st.st_mode): + #ignore non files + continue + #check size and mtime + if st.st_size != size: + if self.opts.get('verbose'): + print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) + continue + if int(st.st_mtime) != mtime: + if self.opts.get('verbose'): + print _("Modification time changed for %s") % filepath + continue + + self.pkg_tups_by_path[relpath] = thispo.pkgtup + - def getNodes(self, relpath): - """Return base, filelist, and other nodes for file, if they exist - Returns a tuple of nodes, or None if not found + def getNodes(self, relpath): + """return a package object based on relative path of pkg """ - bnode = self.basenodes.get(relpath,None) - if bnode is None: - return None - pkgid = self.pkg_ids.get(relpath,None) - if pkgid is None: - print _("No pkgid found for: %s") % relpath - return None - fnode = self.filesnodes.get(pkgid,None) - if fnode is None: - return None - onode = self.othernodes.get(pkgid,None) - if onode is None: - return None - return bnode, fnode, onode - - def freeNodes(self,relpath): - #causing problems - """Free up nodes corresponding to file, if possible""" - bnode = self.basenodes.get(relpath,None) - if bnode is None: - print "Missing node for %s" % relpath - return - bnode.unlinkNode() - bnode.freeNode() - del self.basenodes[relpath] - pkgid = self.pkg_ids.get(relpath,None) - if pkgid is None: - print _("No pkgid found for: %s") % relpath - return None - del self.pkg_ids[relpath] - dups = self.pkgrefs.get(pkgid) - dups.remove(relpath) - if len(dups): - #still referenced - return - del self.pkgrefs[pkgid] - for nodes in self.filesnodes, self.othernodes: - node = nodes.get(pkgid) - if node is not None: - node.unlinkNode() - node.freeNode() - del nodes[pkgid] + if relpath in self.pkg_tups_by_path: + pkgtup = self.pkg_tups_by_path[relpath] + return self._repo.sack.searchPkgTuple(pkgtup)[0] + return None + if __name__ == "__main__": cwd = os.getcwd() @@ -209,9 +117,9 @@ if __name__ == "__main__": 'pkgdir': cwd} idx = MetadataIndex(cwd, opts) - for fn in idx.basenodes.keys(): - a,b,c, = idx.getNodes(fn) - a.serialize() - b.serialize() - c.serialize() - idx.freeNodes(fn) + for fn in idx.pkg_tups_by_path: + po = idx.getNodes(fn) + print po.xml_dump_primary_metadata() + print po.xml_dump_filelists_metadata() + print po.xml_dump_other_metadata() + diff --git a/createrepo/utils.py b/createrepo/utils.py index 995c3b9..b0d92ec 100644 --- a/createrepo/utils.py +++ b/createrepo/utils.py @@ -23,6 +23,12 @@ import bz2 import gzip from gzip import write32u, FNAME from yum import misc +_available_compression = ['gz', 'bz2'] +try: + import lzma + _available_compression.append('xz') +except ImportError: + lzma = None def errorprint(stuff): print >> sys.stderr, stuff @@ -34,22 +40,14 @@ def _(args): class GzipFile(gzip.GzipFile): def _write_gzip_header(self): + # Generate a header that is easily reproduced with gzip -9 -n on + # an unix-like system self.fileobj.write('\037\213') # magic header self.fileobj.write('\010') # compression method - if hasattr(self, 'name'): - fname = self.name[:-3] - else: - fname = self.filename[:-3] - flags = 0 - if fname: - flags = FNAME - self.fileobj.write(chr(flags)) - write32u(self.fileobj, long(0)) - self.fileobj.write('\002') - self.fileobj.write('\377') - if fname: - self.fileobj.write(fname + '\000') - + self.fileobj.write('\000') # flags + write32u(self.fileobj, long(0)) # timestamp + self.fileobj.write('\002') # max compression + self.fileobj.write('\003') # UNIX def _gzipOpen(filename, mode="rb", compresslevel=9): return GzipFile(filename, mode, compresslevel) @@ -69,6 +67,75 @@ def bzipFile(source, dest): s_fn.close() +def xzFile(source, dest): + if not 'xz' in _available_compression: + raise MDError, "Cannot use xz for compression, library/module is not available" + + s_fn = open(source, 'rb') + destination = lzma.LZMAFile(dest, 'w') + + while True: + data = s_fn.read(1024000) + + if not data: break + destination.write(data) + + destination.close() + s_fn.close() + +def gzFile(source, dest): + + s_fn = open(source, 'rb') + destination = GzipFile(dest, 'w') + + while True: + data = s_fn.read(1024000) + + if not data: break + destination.write(data) + + destination.close() + s_fn.close() + + +class Duck: + def __init__(self, **attr): + self.__dict__ = attr + + +def compressFile(source, dest, compress_type): + """Compress an existing file using any compression type from source to dest""" + + if compress_type == 'xz': + xzFile(source, dest) + elif compress_type == 'bz2': + bzipFile(source, dest) + elif compress_type == 'gz': + gzFile(source, dest) + else: + raise MDError, "Unknown compression type %s" % compress_type + +def compressOpen(fn, mode='rb', compress_type=None): + + if not compress_type: + # we are readonly and we don't give a compress_type - then guess based on the file extension + compress_type = fn.split('.')[-1] + if compress_type not in _available_compression: + compress_type = 'gz' + + if compress_type == 'xz': + fh = lzma.LZMAFile(fn, mode) + if mode == 'w': + fh = Duck(write=lambda s, write=fh.write: s != '' and write(s), + close=fh.close) + return fh + elif compress_type == 'bz2': + return bz2.BZ2File(fn, mode) + elif compress_type == 'gz': + return _gzipOpen(fn, mode) + else: + raise MDError, "Unknown compression type %s" % compress_type + def returnFD(filename): try: fdno = os.open(filename, os.O_RDONLY) @@ -124,15 +191,28 @@ def encodefiletypelist(filetypelist): return result def split_list_into_equal_chunks(seq, num_chunks): - avg = len(seq) / float(num_chunks) - out = [] - last = 0.0 - while last < len(seq): - out.append(seq[int(last):int(last + avg)]) - last += avg - + """it's used on sorted input which is then merged in order""" + out = [[] for i in range(num_chunks)] + for i, item in enumerate(seq): + out[i % num_chunks].append(item) return out +def num_cpus_online(unknown=1): + if not hasattr(os, "sysconf"): + return unknown + + if not os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"): + return unknown + + ncpus = os.sysconf("SC_NPROCESSORS_ONLN") + try: + if int(ncpus) > 0: + return ncpus + except: + pass + + return unknown + class MDError(Exception): def __init__(self, value=None): diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py index ac06196..f87ac6d 100644 --- a/createrepo/yumbased.py +++ b/createrepo/yumbased.py @@ -16,6 +16,11 @@ import os +def _get_umask(): + oumask = os.umask(0) + os.umask(oumask) + return oumask +_b4rpm_oumask = _get_umask() import rpm import types @@ -86,6 +91,9 @@ class CreateRepoPackage(YumLocalPackage): csumo = os.fdopen(csumo, 'w', -1) csumo.write(checksum) csumo.close() + # tempfile forces 002 ... we want to undo that, so that users + # can share the cache. BZ 833350. + os.chmod(tmpfilename, 0666 ^ _b4rpm_oumask) os.rename(tmpfilename, csumfile) except: pass diff --git a/docs/createrepo.8 b/docs/createrepo.8 index e3c4c3b..eefd4bf 100644 --- a/docs/createrepo.8 +++ b/docs/createrepo.8 @@ -37,6 +37,10 @@ cache of checksums of packages in the repository. In consecutive runs of createrepo over the same repository of files that do not have a complete change out of all packages this decreases the processing time dramatically. .br +.IP "\fB\--basedir\fP" +Basedir for path to directories in the repodata, default is the current working +directory. +.br .IP "\fB\--update\fP" If metadata already exists in the outputdir and an rpm is unchanged (based on file size and mtime) since the metadata was generated, reuse @@ -49,11 +53,15 @@ skip the stat() call on a --update, assumes if the filename is the same then the file is still the same (only use this if you're fairly trusting or gullible). .br +.IP "\fB\--update-md-path\fP" +Use the existing repodata for --update, from this path. +.br .IP "\fB\-C --checkts\fP" Don't generate repo metadata, if their timestamps are newer than its rpms. This option decreases the processing time drastically again, if you happen to run it on an unmodified repo, but it is (currently) mutual exclusive -with the --split option. +with the --split option. NOTE: This command will not notice when +packages have been removed from repo. Use --update to handle that. .br .IP "\fB\--split\fP" Run in split media mode. Rather than pass a single directory, take a set of @@ -61,7 +69,7 @@ directories corresponding to different volumes in a media set. .br .IP "\fB\-p --pretty\fP" Output xml files in pretty format. -.IP "\fB\-V --version\fP" +.IP "\fB\--version\fP" Output version. .IP "\fB\-h --help\fP" Show help menu. @@ -89,6 +97,10 @@ Include the file's checksum in the metadata filename, helps HTTP caching (defaul .IP "\fB\--simple-md-filenames\fP" Do not include the file's checksum in the metadata filename. +.IP "\fB\--retain-old-md\fP" +Keep around the latest (by timestamp) N copies of the old repodata (so clients +with older repomd.xml files can still access it). Default is 0. + .IP "\fB\--distro\fP" Specify distro tags. Can be specified more than once. Optional syntax specifying a cpeid(http://cpe.mitre.org/) --distro=cpeid,distrotag @@ -104,7 +116,16 @@ Tells createrepo to generate deltarpms and the delta metadata paths to look for older pkgs to delta against. Can be specified multiple times .IP "\fB\--num-deltas\fP int" the number of older versions to make deltas against. Defaults to 1 - +.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST +output the paths to the pkgs actually read useful with --update +.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE +max size of an rpm that to run deltarpm against (in bytes) +.IP "\fB\--workers\fP WORKERS +number of workers to spawn to read rpms +.IP "\fB\--compress-type\fP +specify which compression method to use: compat (default), +xz (may not be available), gz, bz2. +.IP .SH "EXAMPLES" Here is an example of a repository with a groups file. Note that the diff --git a/genpkgmetadata.py b/genpkgmetadata.py index 8c98191..4528bf2 100755 --- a/genpkgmetadata.py +++ b/genpkgmetadata.py @@ -22,7 +22,7 @@ import os import sys import re -from optparse import OptionParser +from optparse import OptionParser,SUPPRESS_HELP import time import createrepo @@ -37,6 +37,12 @@ def parse_args(args, conf): Sanity check all the things being passed in. """ + def_workers = os.nice(0) + if def_workers > 0: + def_workers = 1 # We are niced, so just use a single worker. + else: + def_workers = 0 # zoooom.... + _def = yum.misc._default_checksums[0] _avail = yum.misc._available_checksums parser = OptionParser(version = "createrepo %s" % createrepo.__version__) @@ -95,11 +101,13 @@ def parse_args(args, conf): parser.add_option("--changelog-limit", dest="changelog_limit", default=None, help="only import the last N changelog entries") parser.add_option("--unique-md-filenames", dest="unique_md_filenames", - help="include the file's checksum in the filename, helps with proxies", + help="include the file's checksum in the filename, helps with proxies (default)", default=True, action="store_true") - parser.add_option("--simple-md-filenames", dest="simple_md_filenames", - help="do not include the file's checksum in the filename, helps with proxies", - default=False, action="store_true") + parser.add_option("--simple-md-filenames", dest="unique_md_filenames", + help="do not include the file's checksum in the filename", + action="store_false") + parser.add_option("--retain-old-md", default=0, type='int', dest='retain_old_md', + help="keep around the latest (by timestamp) N copies of the old repodata") parser.add_option("--distro", default=[], action="append", help="distro tag and optional cpeid: --distro" "'cpeid,textname'") parser.add_option("--content", default=[], dest='content_tags', @@ -119,10 +127,15 @@ def parse_args(args, conf): parser.add_option("--max-delta-rpm-size", default=100000000, dest='max_delta_rpm_size', type='int', help="max size of an rpm that to run deltarpm against (in bytes)") - - parser.add_option("--workers", default=1, + parser.add_option("--workers", default=def_workers, dest='workers', type='int', help="number of workers to spawn to read rpms") + parser.add_option("--xz", default=False, + action="store_true", + help=SUPPRESS_HELP) + parser.add_option("--compress-type", default='compat', dest="compress_type", + help="which compression type to use") + (opts, argsleft) = parser.parse_args(args) if len(argsleft) > 1 and not opts.split: @@ -138,6 +151,9 @@ def parse_args(args, conf): else: directories = argsleft + if opts.workers >= 128: + errorprint(_('Warning: More than 128 workers is a lot. Limiting.')) + opts.workers = 128 if opts.sumtype == 'sha1': errorprint(_('Warning: It is more compatible to use sha instead of sha1')) @@ -150,11 +166,13 @@ def parse_args(args, conf): errorprint(_('--split and --checkts options are mutually exclusive')) sys.exit(1) - if opts.simple_md_filenames: - opts.unique_md_filenames = False - if opts.nodatabase: opts.database = False + + # xz is just a shorthand for compress_type + if opts.xz and opts.compress_type == 'compat': + opts.compress_type='xz' + # let's switch over to using the conf object - put all the opts into it for opt in parser.option_list: @@ -240,6 +258,7 @@ def main(args): if mdgen.checkTimeStamps(): if mdgen.conf.verbose: print _('repo is up to date') + mdgen._cleanup_tmp_repodata_dir() sys.exit(0) if conf.profile: diff --git a/mergerepo.py b/mergerepo.py index 05e5f5e..80cb1a8 100755 --- a/mergerepo.py +++ b/mergerepo.py @@ -18,6 +18,7 @@ import sys import createrepo.merge +from createrepo.utils import MDError from optparse import OptionParser #TODO: @@ -47,6 +48,9 @@ def parse_args(args): help="Do not merge group(comps) metadata") parser.add_option("", "--noupdateinfo", default=False, action="store_true", help="Do not merge updateinfo metadata") + parser.add_option("--compress-type", default=None, dest="compress_type", + help="which compression type to use") + (opts, argsleft) = parser.parse_args(args) if len(opts.repos) < 2: @@ -77,9 +81,14 @@ def main(args): rmbase.groups = False if opts.noupdateinfo: rmbase.updateinfo = False - - rmbase.merge_repos() - rmbase.write_metadata() - + if opts.compress_type: + rmbase.mdconf.compress_type = opts.compress_type + try: + rmbase.merge_repos() + rmbase.write_metadata() + except MDError, e: + print >> sys.stderr, "Could not merge repos: %s" % e + sys.exit(1) + if __name__ == "__main__": main(sys.argv[1:]) diff --git a/modifyrepo.py b/modifyrepo.py index 17094a4..bffe99a 100755 --- a/modifyrepo.py +++ b/modifyrepo.py @@ -1,11 +1,15 @@ #!/usr/bin/python -# This tools is used to insert arbitrary metadata into an RPM repository. +# This tool is used to manipulate arbitrary metadata in a RPM repository. # Example: # ./modifyrepo.py updateinfo.xml myrepo/repodata +# or +# ./modifyrepo.py --remove updateinfo.xml myrepo/repodata # or in Python: # >>> from modifyrepo import RepoMetadata # >>> repomd = RepoMetadata('myrepo/repodata') # >>> repomd.add('updateinfo.xml') +# or +# >>> repomd.remove('updateinfo.xml') # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -20,11 +24,13 @@ # (C) Copyright 2006 Red Hat, Inc. # Luke Macken # modified by Seth Vidal 2008 +# modified by Daniel Mach 2011 import os import sys from createrepo import __version__ -from createrepo.utils import checksum_and_rename, GzipFile, MDError +from createrepo.utils import checksum_and_rename, compressOpen, MDError +from createrepo.utils import _available_compression from yum.misc import checksum from yum.repoMDObject import RepoMD, RepoMDError, RepoData @@ -38,7 +44,7 @@ class RepoMetadata: """ Parses the repomd.xml file existing in the given repo directory. """ self.repodir = os.path.abspath(repo) self.repomdxml = os.path.join(self.repodir, 'repomd.xml') - self.checksum_type = 'sha256' + self.compress_type = _available_compression[-1] # best available if not os.path.exists(self.repomdxml): raise MDError, '%s not found' % self.repomdxml @@ -49,6 +55,35 @@ class RepoMetadata: except RepoMDError, e: raise MDError, 'Could not parse %s' % self.repomdxml + def _get_mdtype(self, mdname, mdtype=None): + """ Get mdtype from existing mdtype or from a mdname. """ + if mdtype: + return mdtype + return mdname.split('.')[0] + + def _print_repodata(self, repodata): + """ Print repodata details. """ + print " type =", repodata.type + print " location =", repodata.location[1] + print " checksum =", repodata.checksum[1] + print " timestamp =", repodata.timestamp + print " open-checksum =", repodata.openchecksum[1] + + def _write_repomd(self): + """ Write the updated repomd.xml. """ + outmd = file(self.repomdxml, 'w') + outmd.write(self.repoobj.dump_xml()) + outmd.close() + print "Wrote:", self.repomdxml + + def _remove_repodata_file(self, repodata): + """ Remove a file specified in repodata location """ + try: + os.remove(repodata.location[1]) + except OSError, ex: + if ex.errno != 2: + # continue on a missing file + raise MDError("could not remove file %s" % repodata.location[1]) def add(self, metadata, mdtype=None): """ Insert arbitrary metadata into this repository. @@ -63,8 +98,8 @@ class RepoMetadata: mdname = 'updateinfo.xml' elif isinstance(metadata, str): if os.path.exists(metadata): - if metadata.endswith('.gz'): - oldmd = GzipFile(filename=metadata, mode='rb') + if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'): + oldmd = compressOpen(metadata, mode='rb') else: oldmd = file(metadata, 'r') md = oldmd.read() @@ -75,27 +110,32 @@ class RepoMetadata: else: raise MDError, 'invalid metadata type' + do_compress = False ## Compress the metadata and move it into the repodata - if not mdname.endswith('.gz'): - mdname += '.gz' - if not mdtype: - mdtype = mdname.split('.')[0] - + if self.compress and mdname.split('.')[-1] not in ('gz', 'bz2', 'xz'): + do_compress = True + mdname += '.' + self.compress_type + mdtype = self._get_mdtype(mdname, mdtype) + destmd = os.path.join(self.repodir, mdname) - newmd = GzipFile(filename=destmd, mode='wb') + if do_compress: + newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type) + else: + newmd = open(destmd, 'wb') + newmd.write(md) newmd.close() print "Wrote:", destmd open_csum = checksum(self.checksum_type, metadata) - csum, destmd = checksum_and_rename(destmd, self.checksum_type) + if self.unique_md_filenames: + csum, destmd = checksum_and_rename(destmd, self.checksum_type) + else: + csum = checksum(self.checksum_type, destmd) base_destmd = os.path.basename(destmd) - - ## Remove any stale metadata - if mdtype in self.repoobj.repoData: - del self.repoobj.repoData[mdtype] - + # Remove any stale metadata + old_rd = self.repoobj.repoData.pop(mdtype, None) new_rd = RepoData() new_rd.type = mdtype @@ -103,20 +143,30 @@ class RepoMetadata: new_rd.checksum = (self.checksum_type, csum) new_rd.openchecksum = (self.checksum_type, open_csum) new_rd.size = str(os.stat(destmd).st_size) - new_rd.timestamp = str(os.stat(destmd).st_mtime) + new_rd.timestamp = str(int(os.stat(destmd).st_mtime)) self.repoobj.repoData[new_rd.type] = new_rd - - print " type =", new_rd.type - print " location =", new_rd.location[1] - print " checksum =", new_rd.checksum[1] - print " timestamp =", new_rd.timestamp - print " open-checksum =", new_rd.openchecksum[1] - - ## Write the updated repomd.xml - outmd = file(self.repomdxml, 'w') - outmd.write(self.repoobj.dump_xml()) - outmd.close() - print "Wrote:", self.repomdxml + self._print_repodata(new_rd) + self._write_repomd() + + if old_rd is not None and old_rd.location[1] != new_rd.location[1]: + # remove the old file when overwriting metadata + # with the same mdtype but different location + self._remove_repodata_file(old_rd) + + def remove(self, metadata, mdtype=None): + """ Remove metadata from this repository. """ + mdname = metadata + mdtype = self._get_mdtype(mdname, mdtype) + + old_rd = self.repoobj.repoData.pop(mdtype, None) + if old_rd is None: + print "Metadata not found: %s" % mdtype + return + + self._remove_repodata_file(old_rd) + print "Removed:" + self._print_repodata(old_rd) + self._write_repomd() def main(args): @@ -124,7 +174,23 @@ def main(args): # query options parser.add_option("--mdtype", dest='mdtype', help="specific datatype of the metadata, will be derived from the filename if not specified") - parser.usage = "modifyrepo [options] " + parser.add_option("--remove", action="store_true", + help="remove specified file from repodata") + parser.add_option("--compress", action="store_true", default=True, + help="compress the new repodata before adding it to the repo (default)") + parser.add_option("--no-compress", action="store_false", dest="compress", + help="do not compress the new repodata before adding it to the repo") + parser.add_option("--compress-type", dest='compress_type', default='gz', + help="compression format to use") + parser.add_option("-s", "--checksum", default='sha256', dest='sumtype', + help="specify the checksum type to use (default: sha256)") + parser.add_option("--unique-md-filenames", dest="unique_md_filenames", + help="include the file's checksum in the filename, helps with proxies (default)", + default=True, action="store_true") + parser.add_option("--simple-md-filenames", dest="unique_md_filenames", + help="do not include the file's checksum in the filename", + action="store_false") + parser.usage = "modifyrepo [options] [--remove] " (opts, argsleft) = parser.parse_args(args) if len(argsleft) != 2: @@ -137,11 +203,32 @@ def main(args): except MDError, e: print "Could not access repository: %s" % str(e) return 1 + + + repomd.checksum_type = opts.sumtype + repomd.unique_md_filenames = opts.unique_md_filenames + repomd.compress = opts.compress + if opts.compress_type not in _available_compression: + print "Compression %s not available: Please choose from: %s" % (opts.compress_type, ', '.join(_available_compression)) + return 1 + repomd.compress_type = opts.compress_type + + # remove + if opts.remove: + try: + repomd.remove(metadata) + except MDError, ex: + print "Could not remove metadata: %s" % (metadata, str(ex)) + return 1 + return + + # add try: repomd.add(metadata, mdtype=opts.mdtype) except MDError, e: print "Could not add metadata from file %s: %s" % (metadata, str(e)) return 1 + if __name__ == '__main__': ret = main(sys.argv[1:]) diff --git a/worker.py b/worker.py index eb35ef7..b67b5bd 100755 --- a/worker.py +++ b/worker.py @@ -5,6 +5,7 @@ import yum import createrepo import os import rpmUtils +import re from optparse import OptionParser @@ -23,6 +24,8 @@ def main(args): parser = OptionParser() parser.add_option('--tmpmdpath', default=None, help="path where the outputs should be dumped for this worker") + parser.add_option('--pkglist', default=None, + help="file to read the pkglist from in lieu of all of them on the cli") parser.add_option("--pkgoptions", default=[], action='append', help="pkgoptions in the format of key=value") parser.add_option("--quiet", default=False, action='store_true', @@ -36,10 +39,6 @@ def main(args): opts, pkgs = parser.parse_args(args) external_data = {'_packagenumber': 1} globalopts = {} - if not opts.tmpmdpath: - print >> sys.stderr, "tmpmdpath required for destination files" - sys.exit(1) - for strs in opts.pkgoptions: k,v = strs.split('=') @@ -61,18 +60,39 @@ def main(args): v = None globalopts[k] = v + # turn off buffering on stdout + sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) reldir = external_data['_reldir'] ts = rpmUtils.transaction.initReadOnlyTransaction() - pri = open(opts.tmpmdpath + '/primary.xml' , 'w') - fl = open(opts.tmpmdpath + '/filelists.xml' , 'w') - other = open(opts.tmpmdpath + '/other.xml' , 'w') - - + if opts.tmpmdpath: + files = [open(opts.tmpmdpath + '/%s.xml' % i, 'w') + for i in ('primary', 'filelists', 'other')] + def output(*xml): + for fh, buf in zip(files, xml): + fh.write(buf) + else: + def output(*xml): + buf = ' '.join(str(len(i)) for i in xml) + sys.stdout.write('*** %s\n' % buf) + for buf in xml: + sys.stdout.write(buf) + + if opts.pkglist: + for line in open(opts.pkglist,'r').readlines(): + line = line.strip() + if re.match('^\s*\#.*', line) or re.match('^\s*$', line): + continue + pkgs.append(line) + + clog_limit=globalopts.get('clog_limit', None) + if clog_limit is not None: + clog_limit = int(clog_limit) for pkgfile in pkgs: pkgpath = reldir + '/' + pkgfile if not os.path.exists(pkgpath): print >> sys.stderr, "File not found: %s" % pkgpath + output() continue try: @@ -80,20 +100,17 @@ def main(args): print "reading %s" % (pkgfile) pkg = createrepo.yumbased.CreateRepoPackage(ts, package=pkgpath, - external_data=external_data) - pri.write(pkg.xml_dump_primary_metadata()) - fl.write(pkg.xml_dump_filelists_metadata()) - other.write(pkg.xml_dump_other_metadata(clog_limit= - globalopts.get('clog_limit', None))) + sumtype=globalopts.get('sumtype', None), + external_data=external_data) + output(pkg.xml_dump_primary_metadata(), + pkg.xml_dump_filelists_metadata(), + pkg.xml_dump_other_metadata(clog_limit=clog_limit)) except yum.Errors.YumBaseError, e: print >> sys.stderr, "Error: %s" % e + output() continue else: external_data['_packagenumber']+=1 - pri.close() - fl.close() - other.close() - if __name__ == "__main__": main(sys.argv[1:])