Blame SOURCES/createrepo-head.patch

164e0b
diff --git a/Makefile b/Makefile
164e0b
index 60bb9db..0b5738b 100644
164e0b
--- a/Makefile
164e0b
+++ b/Makefile
164e0b
@@ -1,4 +1,5 @@
164e0b
 PKGNAME = createrepo
164e0b
+ALIASES = mergerepo modifyrepo genpkgmetadata.py mergerepo.py modifyrepo.py
164e0b
 VERSION=$(shell awk '/Version:/ { print $$2 }' ${PKGNAME}.spec)
164e0b
 RELEASE=$(shell awk '/Release:/ { print $$2 }' ${PKGNAME}.spec)
164e0b
 CVSTAG=createrepo-$(subst .,_,$(VERSION)-$(RELEASE))
164e0b
@@ -26,6 +27,8 @@ docdir =
164e0b
 includedir = ${prefix}/include
164e0b
 oldincludedir = /usr/include
164e0b
 mandir = ${prefix}/share/man
164e0b
+compdir = $(shell pkg-config --variable=completionsdir bash-completion)
164e0b
+compdir := $(or $(compdir), "/etc/bash_completion.d")
164e0b
 
164e0b
 pkgdatadir = $(datadir)/$(PKGNAME)
164e0b
 pkglibdir = $(libdir)/$(PKGNAME)
164e0b
@@ -33,7 +36,7 @@ pkgincludedir = $(includedir)/$(PKGNAME)
164e0b
 top_builddir = 
164e0b
 
164e0b
 # all dirs
164e0b
-DIRS = $(DESTDIR)$(bindir) $(DESTDIR)$(sysconfdir)/bash_completion.d \
164e0b
+DIRS = $(DESTDIR)$(bindir) $(DESTDIR)$(compdir) \
164e0b
 	$(DESTDIR)$(pkgdatadir) $(DESTDIR)$(mandir)
164e0b
 
164e0b
 
164e0b
@@ -65,7 +68,8 @@ check:
164e0b
 
164e0b
 install: all installdirs
164e0b
 	$(INSTALL_MODULES) $(srcdir)/$(MODULES) $(DESTDIR)$(pkgdatadir)
164e0b
-	$(INSTALL_DATA) $(PKGNAME).bash $(DESTDIR)$(sysconfdir)/bash_completion.d
164e0b
+	$(INSTALL_DATA) $(PKGNAME).bash $(DESTDIR)$(compdir)/$(PKGNAME)
164e0b
+	(cd $(DESTDIR)$(compdir); for n in $(ALIASES); do ln -s $(PKGNAME) $$n; done)
164e0b
 	for subdir in $(SUBDIRS) ; do \
164e0b
 	  $(MAKE) -C $$subdir install VERSION=$(VERSION) PKGNAME=$(PKGNAME); \
164e0b
 	done
164e0b
diff --git a/createrepo.bash b/createrepo.bash
164e0b
index 54ac8b2..14b43d8 100644
164e0b
--- a/createrepo.bash
164e0b
+++ b/createrepo.bash
164e0b
@@ -1,11 +1,22 @@
164e0b
 # bash completion for createrepo and friends
164e0b
 
164e0b
+_cr_compress_type()
164e0b
+{
164e0b
+    COMPREPLY=( $( compgen -W "$( ${1:-createrepo} --compress-type=FOO / 2>&1 \
164e0b
+        | sed -ne 's/,/ /g' -ne 's/.*[Cc]ompression.*://p' )" -- "$2" ) )
164e0b
+}
164e0b
+
164e0b
+_cr_checksum_type()
164e0b
+{
164e0b
+    COMPREPLY=( $( compgen -W 'md5 sha1 sha256 sha512' -- "$1" ) )
164e0b
+}
164e0b
+
164e0b
 _cr_createrepo()
164e0b
 {
164e0b
     COMPREPLY=()
164e0b
 
164e0b
     case $3 in
164e0b
-        --version|-h|--help|-u|--baseurl|--distro|--content|--repo|--workers|\
164e0b
+        --version|-h|--help|-u|--baseurl|--distro|--content|--repo|\
164e0b
         --revision|-x|--excludes|--changelog-limit|--max-delta-rpm-size)
164e0b
             return 0
164e0b
             ;;
164e0b
@@ -18,8 +29,8 @@ _cr_createrepo()
164e0b
             COMPREPLY=( $( compgen -f -o plusdirs -X '!*.xml' -- "$2" ) )
164e0b
             return 0
164e0b
             ;;
164e0b
-        -s|--sumtype)
164e0b
-            COMPREPLY=( $( compgen -W 'md5 sha1 sha256 sha512' -- "$2" ) )
164e0b
+        -s|--checksum)
164e0b
+            _cr_checksum_type "$2"
164e0b
             return 0
164e0b
             ;;
164e0b
         -i|--pkglist|--read-pkgs-list)
164e0b
@@ -30,10 +41,24 @@ _cr_createrepo()
164e0b
             COMPREPLY=( $( compgen -f -o plusdirs -X '!*.rpm' -- "$2" ) )
164e0b
             return 0
164e0b
             ;;
164e0b
+        --retain-old-md)
164e0b
+            COMPREPLY=( $( compgen -W '0 1 2 3 4 5 6 7 8 9' -- "$2" ) )
164e0b
+            return 0
164e0b
+            ;;
164e0b
         --num-deltas)
164e0b
             COMPREPLY=( $( compgen -W '1 2 3 4 5 6 7 8 9' -- "$2" ) )
164e0b
             return 0
164e0b
             ;;
164e0b
+        --workers)
164e0b
+            local min=2 max=$( getconf _NPROCESSORS_ONLN 2>/dev/null )
164e0b
+            [[ -z $max || $max -lt $min ]] && max=$min
164e0b
+            COMPREPLY=( $( compgen -W "{1..$max}" -- "$2" ) )
164e0b
+            return 0
164e0b
+            ;;
164e0b
+        --compress-type)
164e0b
+            _cr_compress_type "$1" "$2"
164e0b
+            return 0
164e0b
+            ;;
164e0b
     esac
164e0b
 
164e0b
     if [[ $2 == -* ]] ; then
164e0b
@@ -42,9 +67,9 @@ _cr_createrepo()
164e0b
             --cachedir --checkts --no-database --update --update-md-path
164e0b
             --skip-stat --split --pkglist --includepkg --outputdir
164e0b
             --skip-symlinks --changelog-limit --unique-md-filenames
164e0b
-            --simple-md-filenames --distro --content --repo --revision --deltas
164e0b
-            --oldpackagedirs --num-deltas --read-pkgs-list
164e0b
-            --max-delta-rpm-size --workers' -- "$2" ) )
164e0b
+            --simple-md-filenames --retain-old-md --distro --content --repo
164e0b
+            --revision --deltas --oldpackagedirs --num-deltas --read-pkgs-list
164e0b
+            --max-delta-rpm-size --workers --compress-type' -- "$2" ) )
164e0b
     else
164e0b
         COMPREPLY=( $( compgen -d -- "$2" ) )
164e0b
     fi
164e0b
@@ -63,10 +88,14 @@ _cr_mergerepo()
164e0b
             COMPREPLY=( $( compgen -d -- "$2" ) )
164e0b
             return 0
164e0b
             ;;
164e0b
+        --compress-type)
164e0b
+            _cr_compress_type "" "$2"
164e0b
+            return 0
164e0b
+            ;;
164e0b
     esac
164e0b
 
164e0b
     COMPREPLY=( $( compgen -W '--version --help --repo --archlist --no-database
164e0b
-        --outputdir --nogroups --noupdateinfo' -- "$2" ) )
164e0b
+        --outputdir --nogroups --noupdateinfo --compress-type' -- "$2" ) )
164e0b
 } &&
164e0b
 complete -F _cr_mergerepo -o filenames mergerepo mergerepo.py
164e0b
 
164e0b
@@ -78,17 +107,27 @@ _cr_modifyrepo()
164e0b
         --version|-h|--help|--mdtype)
164e0b
             return 0
164e0b
             ;;
164e0b
+        --compress-type)
164e0b
+            _cr_compress_type "" "$2"
164e0b
+            return 0
164e0b
+            ;;
164e0b
+        -s|--checksum)
164e0b
+            _cr_checksum_type "$2"
164e0b
+            return 0
164e0b
+            ;;
164e0b
     esac
164e0b
 
164e0b
     if [[ $2 == -* ]] ; then
164e0b
-        COMPREPLY=( $( compgen -W '--version --help --mdtype' -- "$2" ) )
164e0b
+        COMPREPLY=( $( compgen -W '--version --help --mdtype --remove
164e0b
+            --compress --no-compress --compress-type --checksum
164e0b
+            --unique-md-filenames --simple-md-filenames' -- "$2" ) )
164e0b
         return 0
164e0b
     fi
164e0b
 
164e0b
     local i argnum=1
164e0b
     for (( i=1; i < ${#COMP_WORDS[@]}-1; i++ )) ; do
164e0b
         if [[ ${COMP_WORDS[i]} != -* &&
164e0b
-                    ${COMP_WORDS[i-1]} != @(=|--mdtype) ]]; then
164e0b
+              ${COMP_WORDS[i-1]} != @(=|--@(md|compress-)type) ]]; then
164e0b
             argnum=$(( argnum+1 ))
164e0b
         fi
164e0b
     done
164e0b
diff --git a/createrepo.spec b/createrepo.spec
164e0b
index 1e491cd..9a2179b 100644
164e0b
--- a/createrepo.spec
164e0b
+++ b/createrepo.spec
164e0b
@@ -1,5 +1,17 @@
164e0b
 %{!?python_sitelib: %define python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")}
164e0b
 
164e0b
+%if ! 0%{?rhel}
164e0b
+# we don't have this in rhel yet...
164e0b
+BuildRequires: bash-completion
164e0b
+%endif
164e0b
+
164e0b
+# disable broken /usr/lib/rpm/brp-python-bytecompile
164e0b
+%define __os_install_post %{nil}
164e0b
+%define compdir %(pkg-config --variable=completionsdir bash-completion)
164e0b
+%if "%{compdir}" == ""
164e0b
+%define compdir "/etc/bash_completion.d"
164e0b
+%endif
164e0b
+
164e0b
 Summary: Creates a common metadata repository
164e0b
 Name: createrepo
164e0b
 Version: 0.9.9
164e0b
@@ -11,7 +23,7 @@ URL: http://createrepo.baseurl.org/
164e0b
 BuildRoot: %{_tmppath}/%{name}-%{version}root
164e0b
 BuildArchitectures: noarch
164e0b
 Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python
164e0b
-Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm
164e0b
+Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm, pyliblzma
164e0b
 
164e0b
 %description
164e0b
 This utility will generate a common metadata repository from a directory of
164e0b
@@ -32,7 +44,7 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install
164e0b
 %defattr(-, root, root)
164e0b
 %dir %{_datadir}/%{name}
164e0b
 %doc ChangeLog README COPYING COPYING.lib
164e0b
-%{_sysconfdir}/bash_completion.d/
164e0b
+%(dirname %{compdir})
164e0b
 %{_datadir}/%{name}/*
164e0b
 %{_bindir}/%{name}
164e0b
 %{_bindir}/modifyrepo
164e0b
@@ -43,6 +55,9 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install
164e0b
 %{python_sitelib}/createrepo
164e0b
 
164e0b
 %changelog
164e0b
+* Fri Sep  9 2011 Seth Vidal <skvidal at fedoraproject.org>
164e0b
+- add lzma dep
164e0b
+
164e0b
 * Wed Jan 26 2011 Seth Vidal <skvidal at fedoraproject.org>
164e0b
 - bump to 0.9.9
164e0b
 - add worker.py
164e0b
diff --git a/createrepo/__init__.py b/createrepo/__init__.py
164e0b
index 8f2538e..1b18a9f 100644
164e0b
--- a/createrepo/__init__.py
164e0b
+++ b/createrepo/__init__.py
164e0b
@@ -26,15 +26,16 @@ import tempfile
164e0b
 import stat
164e0b
 import fcntl
164e0b
 import subprocess
164e0b
+from select import select
164e0b
 
164e0b
-from yum import misc, Errors, to_unicode
164e0b
-from yum.repoMDObject import RepoMD, RepoMDError, RepoData
164e0b
+from yum import misc, Errors
164e0b
+from yum.repoMDObject import RepoMD, RepoData
164e0b
 from yum.sqlutils import executeSQL
164e0b
 from yum.packageSack import MetaSack
164e0b
-from yum.packages import YumAvailablePackage, YumLocalPackage
164e0b
+from yum.packages import YumAvailablePackage
164e0b
 
164e0b
 import rpmUtils.transaction
164e0b
-from utils import _, errorprint, MDError
164e0b
+from utils import _, errorprint, MDError, lzma, _available_compression
164e0b
 import readMetadata
164e0b
 try:
164e0b
     import sqlite3 as sqlite
164e0b
@@ -46,8 +47,9 @@ try:
164e0b
 except ImportError:
164e0b
     pass
164e0b
 
164e0b
-from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \
164e0b
+from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \
164e0b
                   checksum_and_rename, split_list_into_equal_chunks
164e0b
+from utils import num_cpus_online
164e0b
 import deltarpms
164e0b
 
164e0b
 __version__ = '0.9.9'
164e0b
@@ -74,7 +76,7 @@ class MetaDataConfig(object):
164e0b
         self.deltadir = None
164e0b
         self.delta_relative = 'drpms/'
164e0b
         self.oldpackage_paths = [] # where to look for the old packages -
164e0b
-        self.deltafile = 'prestodelta.xml.gz'
164e0b
+        self.deltafile = 'prestodelta.xml'
164e0b
         self.num_deltas = 1 # number of older versions to delta (max)
164e0b
         self.max_delta_rpm_size = 100000000
164e0b
         self.update_md_path = None
164e0b
@@ -86,9 +88,9 @@ class MetaDataConfig(object):
164e0b
         self.skip_symlinks = False
164e0b
         self.pkglist = []
164e0b
         self.database_only = False
164e0b
-        self.primaryfile = 'primary.xml.gz'
164e0b
-        self.filelistsfile = 'filelists.xml.gz'
164e0b
-        self.otherfile = 'other.xml.gz'
164e0b
+        self.primaryfile = 'primary.xml'
164e0b
+        self.filelistsfile = 'filelists.xml'
164e0b
+        self.otherfile = 'other.xml'
164e0b
         self.repomdfile = 'repomd.xml'
164e0b
         self.tempdir = '.repodata'
164e0b
         self.finaldir = 'repodata'
164e0b
@@ -108,8 +110,10 @@ class MetaDataConfig(object):
164e0b
         self.collapse_glibc_requires = True
164e0b
         self.workers = 1 # number of workers to fork off to grab metadata from the pkgs
164e0b
         self.worker_cmd = '/usr/share/createrepo/worker.py'
164e0b
-        
164e0b
         #self.worker_cmd = './worker.py' # helpful when testing
164e0b
+        self.retain_old_md = 0
164e0b
+        self.compress_type = 'compat'
164e0b
+
164e0b
         
164e0b
 class SimpleMDCallBack(object):
164e0b
     def errorlog(self, thing):
164e0b
@@ -141,10 +145,23 @@ class MetaDataGenerator:
164e0b
         self.files = []
164e0b
         self.rpmlib_reqs = {}
164e0b
         self.read_pkgs = []
164e0b
+        self.compat_compress = False
164e0b
 
164e0b
         if not self.conf.directory and not self.conf.directories:
164e0b
             raise MDError, "No directory given on which to run."
164e0b
-
164e0b
+        
164e0b
+        if self.conf.compress_type == 'compat':
164e0b
+            self.compat_compress = True
164e0b
+            self.conf.compress_type = None
164e0b
+            
164e0b
+        if not self.conf.compress_type:
164e0b
+            self.conf.compress_type = 'gz'
164e0b
+        
164e0b
+        if self.conf.compress_type not in utils._available_compression:
164e0b
+            raise MDError, "Compression %s not available: Please choose from: %s" \
164e0b
+                 % (self.conf.compress_type, ', '.join(utils._available_compression))
164e0b
+            
164e0b
+            
164e0b
         if not self.conf.directories: # just makes things easier later
164e0b
             self.conf.directories = [self.conf.directory]
164e0b
         if not self.conf.directory: # ensure we have both in the config object
164e0b
@@ -290,14 +307,13 @@ class MetaDataGenerator:
164e0b
 
164e0b
         def extension_visitor(filelist, dirname, names):
164e0b
             for fn in names:
164e0b
+                fn = os.path.join(dirname, fn)
164e0b
                 if os.path.isdir(fn):
164e0b
                     continue
164e0b
                 if self.conf.skip_symlinks and os.path.islink(fn):
164e0b
                     continue
164e0b
                 elif fn[-extlen:].lower() == '%s' % (ext):
164e0b
-                    relativepath = dirname.replace(startdir, "", 1)
164e0b
-                    relativepath = relativepath.lstrip("/")
164e0b
-                    filelist.append(os.path.join(relativepath, fn))
164e0b
+                    filelist.append(fn[len(startdir):])
164e0b
 
164e0b
         filelist = []
164e0b
         startdir = directory + '/'
164e0b
@@ -311,7 +327,7 @@ class MetaDataGenerator:
164e0b
     def checkTimeStamps(self):
164e0b
         """check the timestamp of our target dir. If it is not newer than
164e0b
            the repodata return False, else True"""
164e0b
-        if self.conf.checkts:
164e0b
+        if self.conf.checkts and self.conf.mdtimestamp:
164e0b
             dn = os.path.join(self.conf.basedir, self.conf.directory)
164e0b
             files = self.getFileList(dn, '.rpm')
164e0b
             files = self.trimRpms(files)
164e0b
@@ -410,9 +426,11 @@ class MetaDataGenerator:
164e0b
 
164e0b
     def _setupPrimary(self):
164e0b
         # setup the primary metadata file
164e0b
+        # FIXME - make this be  conf.compress_type once y-m-p is fixed
164e0b
+        fpz = self.conf.primaryfile + '.' + 'gz'
164e0b
         primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
164e0b
-                                       self.conf.primaryfile)
164e0b
-        fo = _gzipOpen(primaryfilepath, 'w')
164e0b
+                                       fpz)
164e0b
+        fo = compressOpen(primaryfilepath, 'w', 'gz')
164e0b
         fo.write('\n')
164e0b
         fo.write('
164e0b
             ' xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">' %
164e0b
@@ -421,9 +439,11 @@ class MetaDataGenerator:
164e0b
 
164e0b
     def _setupFilelists(self):
164e0b
         # setup the filelist file
164e0b
+        # FIXME - make this be  conf.compress_type once y-m-p is fixed        
164e0b
+        fpz = self.conf.filelistsfile + '.' + 'gz'
164e0b
         filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir,
164e0b
-                                    self.conf.filelistsfile)
164e0b
-        fo = _gzipOpen(filelistpath, 'w')
164e0b
+                                    fpz)
164e0b
+        fo = compressOpen(filelistpath, 'w', 'gz')
164e0b
         fo.write('\n')
164e0b
         fo.write('
164e0b
                  ' packages="%s">' % self.pkgcount)
164e0b
@@ -431,9 +451,11 @@ class MetaDataGenerator:
164e0b
 
164e0b
     def _setupOther(self):
164e0b
         # setup the other file
164e0b
+        # FIXME - make this be  conf.compress_type once y-m-p is fixed        
164e0b
+        fpz = self.conf.otherfile + '.' + 'gz'
164e0b
         otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
164e0b
-                                     self.conf.otherfile)
164e0b
-        fo = _gzipOpen(otherfilepath, 'w')
164e0b
+                                     fpz)
164e0b
+        fo = compressOpen(otherfilepath, 'w', 'gz')
164e0b
         fo.write('\n')
164e0b
         fo.write('
164e0b
                  ' packages="%s">' %
164e0b
@@ -442,9 +464,10 @@ class MetaDataGenerator:
164e0b
 
164e0b
     def _setupDelta(self):
164e0b
         # setup the other file
164e0b
+        fpz = self.conf.deltafile + '.' + self.conf.compress_type        
164e0b
         deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
164e0b
-                                     self.conf.deltafile)
164e0b
-        fo = _gzipOpen(deltafilepath, 'w')
164e0b
+                                     fpz)
164e0b
+        fo = compressOpen(deltafilepath, 'w', self.conf.compress_type)
164e0b
         fo.write('\n')
164e0b
         fo.write('<prestodelta>\n')
164e0b
         return fo
164e0b
@@ -520,6 +543,7 @@ class MetaDataGenerator:
164e0b
         # go on their merry way
164e0b
         
164e0b
         newpkgs = []
164e0b
+        keptpkgs = []
164e0b
         if self.conf.update:
164e0b
             # if we're in --update mode then only act on the new/changed pkgs
164e0b
             for pkg in pkglist:
164e0b
@@ -530,39 +554,13 @@ class MetaDataGenerator:
164e0b
                 old_pkg = pkg
164e0b
                 if pkg.find("://") != -1:
164e0b
                     old_pkg = os.path.basename(pkg)
164e0b
-                nodes = self.oldData.getNodes(old_pkg)
164e0b
-                if nodes is not None: # we have a match in the old metadata
164e0b
+                old_po = self.oldData.getNodes(old_pkg)
164e0b
+                if old_po: # we have a match in the old metadata
164e0b
                     if self.conf.verbose:
164e0b
                         self.callback.log(_("Using data from old metadata for %s")
164e0b
                                             % pkg)
164e0b
-                    (primarynode, filenode, othernode) = nodes
164e0b
-
164e0b
-                    for node, outfile in ((primarynode, self.primaryfile),
164e0b
-                                          (filenode, self.flfile),
164e0b
-                                          (othernode, self.otherfile)):
164e0b
-                        if node is None:
164e0b
-                            break
164e0b
-
164e0b
-                        if self.conf.baseurl:
164e0b
-                            anode = node.children
164e0b
-                            while anode is not None:
164e0b
-                                if anode.type != "element":
164e0b
-                                    anode = anode.next
164e0b
-                                    continue
164e0b
-                                if anode.name == "location":
164e0b
-                                    anode.setProp('xml:base', self.conf.baseurl)
164e0b
-                                anode = anode.next
164e0b
-
164e0b
-                        output = node.serialize('UTF-8', self.conf.pretty)
164e0b
-                        if output:
164e0b
-                            outfile.write(output)
164e0b
-                        else:
164e0b
-                            if self.conf.verbose:
164e0b
-                                self.callback.log(_("empty serialize on write to" \
164e0b
-                                                    "%s in %s") % (outfile, pkg))
164e0b
-                        outfile.write('\n')
164e0b
-
164e0b
-                    self.oldData.freeNodes(pkg)
164e0b
+                    keptpkgs.append((pkg, old_po))
164e0b
+
164e0b
                     #FIXME - if we're in update and we have deltas enabled
164e0b
                     # check the presto data for this pkg and write its info back out
164e0b
                     # to our deltafile
164e0b
@@ -584,32 +582,45 @@ class MetaDataGenerator:
164e0b
             po = None
164e0b
             if isinstance(pkg, YumAvailablePackage):
164e0b
                 po = pkg
164e0b
-                self.read_pkgs.append(po.localpath)
164e0b
+                self.read_pkgs.append(po.localPkg())
164e0b
 
164e0b
             # if we're dealing with remote pkgs - pitch it over to doing
164e0b
             # them one at a time, for now. 
164e0b
             elif pkg.find('://') != -1:
164e0b
-                po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
164e0b
+                po = self.read_in_package(pkg, pkgpath=pkgpath, reldir=reldir)
164e0b
                 self.read_pkgs.append(pkg)
164e0b
             
164e0b
             if po:
164e0b
-                self.primaryfile.write(po.xml_dump_primary_metadata())
164e0b
-                self.flfile.write(po.xml_dump_filelists_metadata())
164e0b
-                self.otherfile.write(po.xml_dump_other_metadata(
164e0b
-                                     clog_limit=self.conf.changelog_limit))
164e0b
+                keptpkgs.append((pkg, po))
164e0b
                 continue
164e0b
                 
164e0b
             pkgfiles.append(pkg)
164e0b
-            
164e0b
-       
164e0b
+
164e0b
+        keptpkgs.sort(reverse=True)
164e0b
+        # keptkgs is a list of (filename, po), pkgfiles is a list if filenames.
164e0b
+        # Need to write them in sorted(filename) order.  We loop over pkgfiles,
164e0b
+        # inserting keptpkgs in right spots (using the upto argument).
164e0b
+        def save_keptpkgs(upto):
164e0b
+            while keptpkgs and (upto is None or keptpkgs[-1][0] < upto):
164e0b
+                filename, po = keptpkgs.pop()
164e0b
+                # reset baseurl in the old pkg
164e0b
+                po.basepath = self.conf.baseurl
164e0b
+                self.primaryfile.write(po.xml_dump_primary_metadata())
164e0b
+                self.flfile.write(po.xml_dump_filelists_metadata())
164e0b
+                self.otherfile.write(po.xml_dump_other_metadata(
164e0b
+                    clog_limit=self.conf.changelog_limit))
164e0b
+
164e0b
         if pkgfiles:
164e0b
             # divide that list by the number of workers and fork off that many
164e0b
             # workers to tmpdirs
164e0b
             # waitfor the workers to finish and as each one comes in
164e0b
             # open the files they created and write them out to our metadata
164e0b
             # add up the total pkg counts and return that value
164e0b
-            worker_tmp_path = tempfile.mkdtemp()
164e0b
-            worker_chunks = utils.split_list_into_equal_chunks(pkgfiles,  self.conf.workers)
164e0b
+            self._worker_tmp_path = tempfile.mkdtemp() # setting this in the base object so we can clean it up later
164e0b
+            if self.conf.workers < 1:
164e0b
+                self.conf.workers = num_cpus_online()
164e0b
+            pkgfiles.sort()
164e0b
+            worker_chunks = split_list_into_equal_chunks(pkgfiles, self.conf.workers)
164e0b
             worker_cmd_dict = {}
164e0b
             worker_jobs = {}
164e0b
             base_worker_cmdline = [self.conf.worker_cmd, 
164e0b
@@ -617,7 +628,8 @@ class MetaDataGenerator:
164e0b
                     '--pkgoptions=_collapse_libc_requires=%s' % self.conf.collapse_glibc_requires, 
164e0b
                     '--pkgoptions=_cachedir=%s' % self.conf.cachedir,
164e0b
                     '--pkgoptions=_baseurl=%s' % self.conf.baseurl,
164e0b
-                    '--globalopts=clog_limit=%s' % self.conf.changelog_limit,]
164e0b
+                    '--globalopts=clog_limit=%s' % self.conf.changelog_limit,
164e0b
+                    '--globalopts=sumtype=%s' % self.conf.sumtype, ]
164e0b
             
164e0b
             if self.conf.quiet:
164e0b
                 base_worker_cmdline.append('--quiet')
164e0b
@@ -626,15 +638,14 @@ class MetaDataGenerator:
164e0b
                 base_worker_cmdline.append('--verbose')
164e0b
                 
164e0b
             for worker_num in range(self.conf.workers):
164e0b
-                # make the worker directory
164e0b
+                pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num
164e0b
+                f = open(pkl, 'w') 
164e0b
+                f.write('\n'.join(worker_chunks[worker_num]))
164e0b
+                f.close()
164e0b
+                
164e0b
                 workercmdline = []
164e0b
                 workercmdline.extend(base_worker_cmdline)
164e0b
-                thisdir = worker_tmp_path + '/' + str(worker_num)
164e0b
-                if checkAndMakeDir(thisdir):
164e0b
-                    workercmdline.append('--tmpmdpath=%s' % thisdir)
164e0b
-                else:
164e0b
-                    raise MDError, "Unable to create worker path: %s" % thisdir
164e0b
-                workercmdline.extend(worker_chunks[worker_num])
164e0b
+                workercmdline.append('--pkglist=%s/pkglist-%s' % (self._worker_tmp_path, worker_num))
164e0b
                 worker_cmd_dict[worker_num] = workercmdline
164e0b
             
164e0b
                 
164e0b
@@ -647,49 +658,60 @@ class MetaDataGenerator:
164e0b
                                         stderr=subprocess.PIPE)
164e0b
                 worker_jobs[num] = job
164e0b
             
164e0b
-            gimmebreak = 0
164e0b
-            while gimmebreak != len(worker_jobs.keys()):
164e0b
-                gimmebreak = 0
164e0b
-                for (num,job) in worker_jobs.items():
164e0b
-                    if job.poll() is not None:
164e0b
-                        gimmebreak+=1
164e0b
-                    line = job.stdout.readline()
164e0b
-                    if line:
164e0b
+            files = self.primaryfile, self.flfile, self.otherfile
164e0b
+            def log_messages(num):
164e0b
+                job = worker_jobs[num]
164e0b
+                while True:
164e0b
+                    # check stdout and stderr
164e0b
+                    for stream in select((job.stdout, job.stderr), (), ())[0]:
164e0b
+                        line = stream.readline()
164e0b
+                        if line: break
164e0b
+                    else:
164e0b
+                        return # EOF, EOF
164e0b
+                    if stream is job.stdout:
164e0b
+                        if line.startswith('*** '):
164e0b
+                            # get data, save to local files
164e0b
+                            for out, size in zip(files, line[4:].split()):
164e0b
+                                out.write(stream.read(int(size)))
164e0b
+                            return
164e0b
                         self.callback.log('Worker %s: %s' % (num, line.rstrip()))
164e0b
-                    line = job.stderr.readline()
164e0b
-                    if line:
164e0b
+                    else:
164e0b
                         self.callback.errorlog('Worker %s: %s' % (num, line.rstrip()))
164e0b
+
164e0b
+            for i, pkg in enumerate(pkgfiles):
164e0b
+                # insert cached packages
164e0b
+                save_keptpkgs(pkg)
164e0b
+
164e0b
+                # save output to local files
164e0b
+                log_messages(i % self.conf.workers)
164e0b
+
164e0b
+            for (num, job) in worker_jobs.items():
164e0b
+                # process remaining messages on stderr
164e0b
+                log_messages(num)
164e0b
+
164e0b
+                if job.wait() != 0:
164e0b
+                    msg = "Worker exited with non-zero value: %s. Fatal." % job.returncode
164e0b
+                    self.callback.errorlog(msg)
164e0b
+                    raise MDError, msg
164e0b
                     
164e0b
-                
164e0b
             if not self.conf.quiet:
164e0b
                 self.callback.log("Workers Finished")
164e0b
-            # finished with workers
164e0b
-            # go to their dirs and add the contents
164e0b
-            if not self.conf.quiet:
164e0b
-                self.callback.log("Gathering worker results")
164e0b
-            for num in range(self.conf.workers):
164e0b
-                for (fn, fo) in (('primary.xml', self.primaryfile), 
164e0b
-                           ('filelists.xml', self.flfile),
164e0b
-                           ('other.xml', self.otherfile)):
164e0b
-                    fnpath = worker_tmp_path + '/' + str(num) + '/' + fn
164e0b
-                    if os.path.exists(fnpath):
164e0b
-                        fo.write(open(fnpath, 'r').read())
164e0b
-
164e0b
                     
164e0b
             for pkgfile in pkgfiles:
164e0b
                 if self.conf.deltas:
164e0b
-                    po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
164e0b
-                    self._do_delta_rpm_package(po)
164e0b
+                    try:
164e0b
+                        po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
164e0b
+                        self._do_delta_rpm_package(po)
164e0b
+                    except MDError, e:
164e0b
+                        errorprint(e)
164e0b
+                        continue
164e0b
                 self.read_pkgs.append(pkgfile)
164e0b
 
164e0b
+        save_keptpkgs(None) # append anything left
164e0b
         return self.current_pkg
164e0b
 
164e0b
 
164e0b
     def closeMetadataDocs(self):
164e0b
-        if not self.conf.quiet:
164e0b
-            self.callback.log('')
164e0b
-
164e0b
-
164e0b
         # save them up to the tmp locations:
164e0b
         if not self.conf.quiet:
164e0b
             self.callback.log(_('Saving Primary metadata'))
164e0b
@@ -784,7 +806,6 @@ class MetaDataGenerator:
164e0b
             return self._old_package_dict
164e0b
 
164e0b
         self._old_package_dict = {}
164e0b
-        opl = []
164e0b
         for d in self.conf.oldpackage_paths:
164e0b
             for f in self.getFileList(d, '.rpm'):
164e0b
                 fp = d + '/' + f
164e0b
@@ -833,7 +854,7 @@ class MetaDataGenerator:
164e0b
         return ' '.join(results)
164e0b
 
164e0b
     def _createRepoDataObject(self, mdfile, mdtype, compress=True, 
164e0b
-                              compress_type='gzip', attribs={}):
164e0b
+                              compress_type=None, attribs={}):
164e0b
         """return random metadata as RepoData object to be  added to RepoMD
164e0b
            mdfile = complete path to file
164e0b
            mdtype = the metadata type to use
164e0b
@@ -843,15 +864,13 @@ class MetaDataGenerator:
164e0b
         sfile = os.path.basename(mdfile)
164e0b
         fo = open(mdfile, 'r')
164e0b
         outdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
164e0b
+        if not compress_type:
164e0b
+            compress_type = self.conf.compress_type
164e0b
         if compress:
164e0b
-            if compress_type == 'gzip':
164e0b
-                sfile = '%s.gz' % sfile
164e0b
-                outfn = os.path.join(outdir, sfile)
164e0b
-                output = GzipFile(filename = outfn, mode='wb')
164e0b
-            elif compress_type == 'bzip2':
164e0b
-                sfile = '%s.bz2' % sfile
164e0b
-                outfn = os.path.join(outdir, sfile)
164e0b
-                output = BZ2File(filename = outfn, mode='wb')
164e0b
+            sfile = '%s.%s' % (sfile, compress_type)
164e0b
+            outfn = os.path.join(outdir, sfile)
164e0b
+            output = compressOpen(outfn, mode='wb', compress_type=compress_type)
164e0b
+                
164e0b
         else:
164e0b
             outfn  = os.path.join(outdir, sfile)
164e0b
             output = open(outfn, 'w')
164e0b
@@ -874,14 +893,13 @@ class MetaDataGenerator:
164e0b
 
164e0b
         thisdata = RepoData()
164e0b
         thisdata.type = mdtype
164e0b
-        baseloc = None
164e0b
         thisdata.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, sfile))
164e0b
         thisdata.checksum = (self.conf.sumtype, csum)
164e0b
         if compress:
164e0b
             thisdata.openchecksum  = (self.conf.sumtype, open_csum)
164e0b
         
164e0b
         thisdata.size = str(os.stat(outfn).st_size)
164e0b
-        thisdata.timestamp = str(os.stat(outfn).st_mtime)
164e0b
+        thisdata.timestamp = str(int(os.stat(outfn).st_mtime))
164e0b
         for (k, v) in attribs.items():
164e0b
             setattr(thisdata, k, str(v))
164e0b
         
164e0b
@@ -925,9 +943,14 @@ class MetaDataGenerator:
164e0b
             rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None)
164e0b
 
164e0b
         for (rpm_file, ftype) in workfiles:
164e0b
+            # when we fix y-m-p and non-gzipped xml files - then we can make this just add
164e0b
+            # self.conf.compress_type
164e0b
+            if ftype in ('other', 'filelists', 'primary'):
164e0b
+                rpm_file = rpm_file + '.' + 'gz'
164e0b
+            elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression:
164e0b
+                rpm_file = rpm_file + '.' + self.conf.compress_type
164e0b
             complete_path = os.path.join(repopath, rpm_file)
164e0b
-
164e0b
-            zfo = _gzipOpen(complete_path)
164e0b
+            zfo = compressOpen(complete_path)
164e0b
             # This is misc.checksum() done locally so we can get the size too.
164e0b
             data = misc.Checksums([sumtype])
164e0b
             while data.read(zfo, 2**16):
164e0b
@@ -966,14 +989,20 @@ class MetaDataGenerator:
164e0b
                     good_name = '%s.sqlite' % ftype
164e0b
                     resultpath = os.path.join(repopath, good_name)
164e0b
 
164e0b
+                    # compat compression for rhel5 compatibility from fedora :(
164e0b
+                    compress_type = self.conf.compress_type
164e0b
+                    if self.compat_compress:
164e0b
+                        compress_type = 'bz2'
164e0b
+                        
164e0b
                     # rename from silly name to not silly name
164e0b
                     os.rename(tmp_result_path, resultpath)
164e0b
-                    compressed_name = '%s.bz2' % good_name
164e0b
+                    compressed_name = '%s.%s' % (good_name, compress_type)
164e0b
                     result_compressed = os.path.join(repopath, compressed_name)
164e0b
                     db_csums[ftype] = misc.checksum(sumtype, resultpath)
164e0b
 
164e0b
                     # compress the files
164e0b
-                    bzipFile(resultpath, result_compressed)
164e0b
+
164e0b
+                    compressFile(resultpath, result_compressed, compress_type)
164e0b
                     # csum the compressed file
164e0b
                     db_compressed_sums[ftype] = misc.checksum(sumtype,
164e0b
                                                              result_compressed)
164e0b
@@ -983,8 +1012,8 @@ class MetaDataGenerator:
164e0b
                     os.unlink(resultpath)
164e0b
 
164e0b
                     if self.conf.unique_md_filenames:
164e0b
-                        csum_compressed_name = '%s-%s.bz2' % (
164e0b
-                                           db_compressed_sums[ftype], good_name)
164e0b
+                        csum_compressed_name = '%s-%s.%s' % (
164e0b
+                                           db_compressed_sums[ftype], good_name, compress_type)
164e0b
                         csum_result_compressed =  os.path.join(repopath,
164e0b
                                                            csum_compressed_name)
164e0b
                         os.rename(result_compressed, csum_result_compressed)
164e0b
@@ -1001,7 +1030,7 @@ class MetaDataGenerator:
164e0b
                     data.location = (self.conf.baseurl, 
164e0b
                               os.path.join(self.conf.finaldir, compressed_name))
164e0b
                     data.checksum = (sumtype, db_compressed_sums[ftype])
164e0b
-                    data.timestamp = str(db_stat.st_mtime)
164e0b
+                    data.timestamp = str(int(db_stat.st_mtime))
164e0b
                     data.size = str(db_stat.st_size)
164e0b
                     data.opensize = str(un_stat.st_size)
164e0b
                     data.openchecksum = (sumtype, db_csums[ftype])
164e0b
@@ -1020,7 +1049,13 @@ class MetaDataGenerator:
164e0b
             data.openchecksum = (sumtype, uncsum)
164e0b
 
164e0b
             if self.conf.unique_md_filenames:
164e0b
-                res_file = '%s-%s.xml.gz' % (csum, ftype)
164e0b
+                if ftype in ('primary', 'filelists', 'other'):
164e0b
+                    compress = 'gz'
164e0b
+                else:
164e0b
+                    compress = self.conf.compress_type
164e0b
+                
164e0b
+                main_name = '.'.join(rpm_file.split('.')[:-1])
164e0b
+                res_file = '%s-%s.%s' % (csum, main_name, compress)
164e0b
                 orig_file = os.path.join(repopath, rpm_file)
164e0b
                 dest_file = os.path.join(repopath, res_file)
164e0b
                 os.rename(orig_file, dest_file)
164e0b
@@ -1046,7 +1081,7 @@ class MetaDataGenerator:
164e0b
             
164e0b
 
164e0b
         if self.conf.additional_metadata:
164e0b
-            for md_type, mdfile in self.conf.additional_metadata.items():
164e0b
+            for md_type, md_file in self.conf.additional_metadata.items():
164e0b
                 mdcontent = self._createRepoDataObject(md_file, md_type)
164e0b
                 repomd.repoData[mdcontent.type] = mdcontent
164e0b
                 
164e0b
@@ -1110,23 +1145,43 @@ class MetaDataGenerator:
164e0b
                     raise MDError, _(
164e0b
                     'Could not remove old metadata file: %s: %s') % (oldfile, e)
164e0b
 
164e0b
-        # Move everything else back from olddir (eg. repoview files)
164e0b
-        try:
164e0b
-            old_contents = os.listdir(output_old_dir)
164e0b
-        except (OSError, IOError), e:
164e0b
-            old_contents = []
164e0b
-            
164e0b
+        old_to_remove = []
164e0b
+        old_pr = []
164e0b
+        old_fl = []
164e0b
+        old_ot = []
164e0b
+        old_pr_db = []
164e0b
+        old_fl_db = []
164e0b
+        old_ot_db = []
164e0b
         for f in os.listdir(output_old_dir):
164e0b
             oldfile = os.path.join(output_old_dir, f)
164e0b
             finalfile = os.path.join(output_final_dir, f)
164e0b
-            if f.find('-') != -1 and f.split('-')[1] in ('primary.sqlite.bz2',
164e0b
-                    'filelists.sqlite.bz2', 'primary.xml.gz','other.sqlite.bz2',
164e0b
-                    'other.xml.gz','filelists.xml.gz'):
164e0b
-                os.remove(oldfile) # kill off the old ones
164e0b
-                continue
164e0b
-            if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2',
164e0b
-                     'primary.sqlite.bz2'):
164e0b
-                os.remove(oldfile)
164e0b
+
164e0b
+            for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr),
164e0b
+                           ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl),
164e0b
+                           ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)):
164e0b
+                fn = '.'.join(f.split('.')[:-1])
164e0b
+                if fn.endswith(end):
164e0b
+                    lst.append(oldfile)
164e0b
+                    break
164e0b
+
164e0b
+        # make a list of the old metadata files we don't want to remove.
164e0b
+        for lst in (old_pr, old_fl, old_ot, old_pr_db, old_fl_db, old_ot_db):
164e0b
+            sortlst = sorted(lst, key=lambda x: os.path.getmtime(x),
164e0b
+                             reverse=True)
164e0b
+            for thisf in sortlst[self.conf.retain_old_md:]:
164e0b
+                old_to_remove.append(thisf)
164e0b
+
164e0b
+        for f in os.listdir(output_old_dir):
164e0b
+            oldfile = os.path.join(output_old_dir, f)
164e0b
+            finalfile = os.path.join(output_final_dir, f)
164e0b
+            fn = '.'.join(f.split('.')[:-1])
164e0b
+            if fn in ('filelists.sqlite', 'other.sqlite',
164e0b
+                     'primary.sqlite') or oldfile in old_to_remove:
164e0b
+                try:
164e0b
+                    os.remove(oldfile)
164e0b
+                except (OSError, IOError), e:
164e0b
+                    raise MDError, _(
164e0b
+                    'Could not remove old metadata file: %s: %s') % (oldfile, e)
164e0b
                 continue
164e0b
 
164e0b
             if os.path.exists(finalfile):
164e0b
@@ -1147,14 +1202,19 @@ class MetaDataGenerator:
164e0b
                     msg += _('Error was %s') % e
164e0b
                     raise MDError, msg
164e0b
 
164e0b
-        try:
164e0b
-            os.rmdir(output_old_dir)
164e0b
-        except OSError, e:
164e0b
-            self.errorlog(_('Could not remove old metadata dir: %s')
164e0b
-                          % self.conf.olddir)
164e0b
-            self.errorlog(_('Error was %s') % e)
164e0b
-            self.errorlog(_('Please clean up this directory manually.'))
164e0b
+        self._cleanup_tmp_repodata_dir()
164e0b
+        self._cleanup_update_tmp_dir()        
164e0b
+        self._write_out_read_pkgs_list()
164e0b
+
164e0b
 
164e0b
+    def _cleanup_update_tmp_dir(self):
164e0b
+        if not self.conf.update:
164e0b
+            return
164e0b
+        
164e0b
+        shutil.rmtree(self.oldData._repo.basecachedir, ignore_errors=True)
164e0b
+        shutil.rmtree(self.oldData._repo.base_persistdir, ignore_errors=True)
164e0b
+        
164e0b
+    def _write_out_read_pkgs_list(self):
164e0b
         # write out the read_pkgs_list file with self.read_pkgs
164e0b
         if self.conf.read_pkgs_list:
164e0b
             try:
164e0b
@@ -1167,6 +1227,23 @@ class MetaDataGenerator:
164e0b
                               % self.conf.read_pkgs_list)
164e0b
                 self.errorlog(_('Error was %s') % e)
164e0b
 
164e0b
+    def _cleanup_tmp_repodata_dir(self):
164e0b
+        output_old_dir = os.path.join(self.conf.outputdir, self.conf.olddir)
164e0b
+        output_temp_dir = os.path.join(self.conf.outputdir, self.conf.tempdir)
164e0b
+        for dirbase in (self.conf.olddir, self.conf.tempdir):
164e0b
+            dirpath = os.path.join(self.conf.outputdir, dirbase)
164e0b
+            if os.path.exists(dirpath):
164e0b
+                try:
164e0b
+                    os.rmdir(dirpath)
164e0b
+                except OSError, e:
164e0b
+                    self.errorlog(_('Could not remove  temp metadata dir: %s')
164e0b
+                                  % dirbase)
164e0b
+                    self.errorlog(_('Error was %s') % e)
164e0b
+                    self.errorlog(_('Please clean up this directory manually.'))
164e0b
+        # our worker tmp path
164e0b
+        if hasattr(self, '_worker_tmp_path') and os.path.exists(self._worker_tmp_path):
164e0b
+            shutil.rmtree(self._worker_tmp_path, ignore_errors=True)
164e0b
+        
164e0b
     def setup_sqlite_dbs(self, initdb=True):
164e0b
         """sets up the sqlite dbs w/table schemas and db_infos"""
164e0b
         destdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
164e0b
@@ -1194,24 +1271,6 @@ class SplitMetaDataGenerator(MetaDataGenerator):
164e0b
         (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
164e0b
         return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment)))
164e0b
 
164e0b
-    def getFileList(self, directory, ext):
164e0b
-
164e0b
-        extlen = len(ext)
164e0b
-
164e0b
-        def extension_visitor(arg, dirname, names):
164e0b
-            for fn in names:
164e0b
-                if os.path.isdir(fn):
164e0b
-                    continue
164e0b
-                elif fn[-extlen:].lower() == '%s' % (ext):
164e0b
-                    reldir = os.path.basename(dirname)
164e0b
-                    if reldir == os.path.basename(directory):
164e0b
-                        reldir = ""
164e0b
-                    arg.append(os.path.join(reldir, fn))
164e0b
-
164e0b
-        rpmlist = []
164e0b
-        os.path.walk(directory, extension_visitor, rpmlist)
164e0b
-        return rpmlist
164e0b
-
164e0b
     def doPkgMetadata(self):
164e0b
         """all the heavy lifting for the package metadata"""
164e0b
         if len(self.conf.directories) == 1:
164e0b
@@ -1232,6 +1291,19 @@ class SplitMetaDataGenerator(MetaDataGenerator):
164e0b
                     thisdir = os.path.join(self.conf.basedir, mydir)
164e0b
 
164e0b
             filematrix[mydir] = self.getFileList(thisdir, '.rpm')
164e0b
+
164e0b
+            #  pkglist is a bit different for split media, as we have to know
164e0b
+            # which dir. it belongs to. So we walk the dir. and then filter.
164e0b
+            # We could be faster by not walking the dir. ... but meh.
164e0b
+            if self.conf.pkglist:
164e0b
+                pkglist = set(self.conf.pkglist)
164e0b
+                pkgs = []
164e0b
+                for fname in filematrix[mydir]:
164e0b
+                    if fname not in pkglist:
164e0b
+                        continue
164e0b
+                    pkgs.append(fname)
164e0b
+                filematrix[mydir] = pkgs
164e0b
+
164e0b
             self.trimRpms(filematrix[mydir])
164e0b
             self.pkgcount += len(filematrix[mydir])
164e0b
 
164e0b
@@ -1240,7 +1312,6 @@ class SplitMetaDataGenerator(MetaDataGenerator):
164e0b
         self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
164e0b
         try:
164e0b
             self.openMetadataDocs()
164e0b
-            original_basedir = self.conf.basedir
164e0b
             for mydir in self.conf.directories:
164e0b
                 self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
164e0b
                 self.writeMetadataDocs(filematrix[mydir], mydir)
164e0b
diff --git a/createrepo/merge.py b/createrepo/merge.py
164e0b
index b3b2ea1..1ac43bb 100644
164e0b
--- a/createrepo/merge.py
164e0b
+++ b/createrepo/merge.py
164e0b
@@ -24,6 +24,7 @@ from yum.misc import unique, getCacheDir
164e0b
 import yum.update_md
164e0b
 import rpmUtils.arch
164e0b
 import operator
164e0b
+from utils import MDError
164e0b
 import createrepo
164e0b
 import tempfile
164e0b
 
164e0b
@@ -84,6 +85,8 @@ class RepoMergeBase:
164e0b
         # in the repolist
164e0b
         count = 0
164e0b
         for r in self.repolist:
164e0b
+            if r[0] == '/':
164e0b
+                r = 'file://' + r # just fix the file repos, this is silly.
164e0b
             count +=1
164e0b
             rid = 'repo%s' % count
164e0b
             n = self.yumbase.add_enable_repo(rid, baseurls=[r],
164e0b
@@ -92,7 +95,10 @@ class RepoMergeBase:
164e0b
             n._merge_rank = count
164e0b
 
164e0b
         #setup our sacks
164e0b
-        self.yumbase._getSacks(archlist=self.archlist)
164e0b
+        try:
164e0b
+            self.yumbase._getSacks(archlist=self.archlist)
164e0b
+        except yum.Errors.RepoError, e:
164e0b
+            raise MDError, "Could not setup merge repo pkgsack: %s" % e
164e0b
 
164e0b
         myrepos = self.yumbase.repos.listEnabled()
164e0b
 
164e0b
@@ -102,11 +108,16 @@ class RepoMergeBase:
164e0b
     def write_metadata(self, outputdir=None):
164e0b
         mytempdir = tempfile.mkdtemp()
164e0b
         if self.groups:
164e0b
-            comps_fn = mytempdir + '/groups.xml'
164e0b
-            compsfile = open(comps_fn, 'w')
164e0b
-            compsfile.write(self.yumbase.comps.xml())
164e0b
-            compsfile.close()
164e0b
-            self.mdconf.groupfile=comps_fn
164e0b
+            try:
164e0b
+                comps_fn = mytempdir + '/groups.xml'
164e0b
+                compsfile = open(comps_fn, 'w')
164e0b
+                compsfile.write(self.yumbase.comps.xml())
164e0b
+                compsfile.close()
164e0b
+            except yum.Errors.GroupsError, e:
164e0b
+                # groups not being available shouldn't be a fatal error
164e0b
+                pass
164e0b
+            else:
164e0b
+                self.mdconf.groupfile=comps_fn
164e0b
 
164e0b
         if self.updateinfo:
164e0b
             ui_fn = mytempdir + '/updateinfo.xml'
164e0b
diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
164e0b
index 27d3690..54863cb 100644
164e0b
--- a/createrepo/readMetadata.py
164e0b
+++ b/createrepo/readMetadata.py
164e0b
@@ -16,11 +16,25 @@
164e0b
 # Copyright 2006 Red Hat
164e0b
 
164e0b
 import os
164e0b
-import libxml2
164e0b
 import stat
164e0b
 from utils import errorprint, _
164e0b
 
164e0b
-from yum import repoMDObject
164e0b
+import yum
164e0b
+from yum import misc
164e0b
+from yum.Errors import YumBaseError
164e0b
+import tempfile
164e0b
+class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite):
164e0b
+    # special for special people like us.
164e0b
+    def _return_remote_location(self):
164e0b
+
164e0b
+        if self.basepath:
164e0b
+            msg = """<location xml:base="%s" href="%s"/>\n""" % (
164e0b
+                                     misc.to_xml(self.basepath, attrib=True),
164e0b
+                                     misc.to_xml(self.relativepath, attrib=True))
164e0b
+        else:
164e0b
+            msg = """<location href="%s"/>\n""" % misc.to_xml(self.relativepath, attrib=True)
164e0b
+
164e0b
+        return msg  
164e0b
 
164e0b
 
164e0b
 class MetadataIndex(object):
164e0b
@@ -30,178 +44,72 @@ class MetadataIndex(object):
164e0b
             opts = {}
164e0b
         self.opts = opts
164e0b
         self.outputdir = outputdir
164e0b
+        realpath = os.path.realpath(outputdir)
164e0b
         repodatadir = self.outputdir + '/repodata'
164e0b
-        myrepomdxml = repodatadir + '/repomd.xml'
164e0b
-        if os.path.exists(myrepomdxml):
164e0b
-            repomd = repoMDObject.RepoMD('garbageid', myrepomdxml)
164e0b
-            b = repomd.getData('primary').location[1]
164e0b
-            f = repomd.getData('filelists').location[1]
164e0b
-            o = repomd.getData('other').location[1]
164e0b
-            basefile = os.path.join(self.outputdir, b)
164e0b
-            filelistfile = os.path.join(self.outputdir, f)
164e0b
-            otherfile = os.path.join(self.outputdir, o)
164e0b
-        else:
164e0b
-            basefile = filelistfile = otherfile = ""
164e0b
-
164e0b
-        self.files = {'base' : basefile,
164e0b
-                      'filelist' : filelistfile,
164e0b
-                      'other' : otherfile}
164e0b
-        self.scan()
164e0b
+        self._repo = yum.yumRepo.YumRepository('garbageid')
164e0b
+        self._repo.baseurl = 'file://' + realpath
164e0b
+        self._repo.basecachedir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo")
164e0b
+        self._repo.base_persistdir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo-p")
164e0b
+        self._repo.metadata_expire = 1
164e0b
+        self._repo.gpgcheck = 0
164e0b
+        self._repo.repo_gpgcheck = 0
164e0b
+        self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld)
164e0b
+        self.pkg_tups_by_path = {}
164e0b
+        try:
164e0b
+            self.scan()
164e0b
+        except YumBaseError, e:
164e0b
+            print "Could not find valid repo at: %s" % self.outputdir
164e0b
+        
164e0b
 
164e0b
     def scan(self):
164e0b
-        """Read in and index old repo data"""
164e0b
-        self.basenodes = {}
164e0b
-        self.filesnodes = {}
164e0b
-        self.othernodes = {}
164e0b
-        self.pkg_ids = {}
164e0b
+        """Read in old repodata"""
164e0b
         if self.opts.get('verbose'):
164e0b
             print _("Scanning old repo data")
164e0b
-        for fn in self.files.values():
164e0b
-            if not os.path.exists(fn):
164e0b
-                #cannot scan
164e0b
-                errorprint(_("Warning: Old repodata file missing: %s") % fn)
164e0b
-                return
164e0b
-        root = libxml2.parseFile(self.files['base']).getRootElement()
164e0b
-        self._scanPackageNodes(root, self._handleBase)
164e0b
-        if self.opts.get('verbose'):
164e0b
-            print _("Indexed %i base nodes" % len(self.basenodes))
164e0b
-        root = libxml2.parseFile(self.files['filelist']).getRootElement()
164e0b
-        self._scanPackageNodes(root, self._handleFiles)
164e0b
-        if self.opts.get('verbose'):
164e0b
-            print _("Indexed %i filelist nodes" % len(self.filesnodes))
164e0b
-        root = libxml2.parseFile(self.files['other']).getRootElement()
164e0b
-        self._scanPackageNodes(root, self._handleOther)
164e0b
-        if self.opts.get('verbose'):
164e0b
-            print _("Indexed %i other nodes" % len(self.othernodes))
164e0b
-        #reverse index pkg ids to track references
164e0b
-        self.pkgrefs = {}
164e0b
-        for relpath, pkgid in self.pkg_ids.iteritems():
164e0b
-            self.pkgrefs.setdefault(pkgid,[]).append(relpath)
164e0b
-
164e0b
-    def _scanPackageNodes(self, root, handler):
164e0b
-        node = root.children
164e0b
-        while node is not None:
164e0b
-            if node.type != "element":
164e0b
-                node = node.next
164e0b
+        self._repo.sack.populate(self._repo, 'all', None, False)
164e0b
+        for thispo in self._repo.sack:
164e0b
+            mtime = thispo.filetime
164e0b
+            size = thispo.size
164e0b
+            relpath = thispo.relativepath
164e0b
+            do_stat = self.opts.get('do_stat', True)
164e0b
+            if mtime is None:
164e0b
+                print _("mtime missing for %s") % relpath
164e0b
                 continue
164e0b
-            if node.name == "package":
164e0b
-                handler(node)
164e0b
-            node = node.next
164e0b
-
164e0b
-    def _handleBase(self, node):
164e0b
-        top = node
164e0b
-        node = node.children
164e0b
-        pkgid = None
164e0b
-        mtime = None
164e0b
-        size = None
164e0b
-        relpath = None
164e0b
-        do_stat = self.opts.get('do_stat', True)
164e0b
-        while node is not None:
164e0b
-            if node.type != "element":
164e0b
-                node = node.next
164e0b
+            if size is None:
164e0b
+                print _("size missing for %s") % relpath
164e0b
                 continue
164e0b
-            if node.name == "checksum":
164e0b
-                pkgid = node.content
164e0b
-            elif node.name == "time":
164e0b
-                mtime = int(node.prop('file'))
164e0b
-            elif node.name == "size":
164e0b
-                size = int(node.prop('package'))
164e0b
-            elif node.name == "location":
164e0b
-                relpath = node.prop('href')
164e0b
-            node = node.next
164e0b
-        if relpath is None:
164e0b
-            print _("Incomplete data for node")
164e0b
-            return
164e0b
-        if pkgid is None:
164e0b
-            print _("pkgid missing for %s") % relpath
164e0b
-            return
164e0b
-        if mtime is None:
164e0b
-            print _("mtime missing for %s") % relpath
164e0b
-            return
164e0b
-        if size is None:
164e0b
-            print _("size missing for %s") % relpath
164e0b
-            return
164e0b
-        if do_stat:
164e0b
-            filepath = os.path.join(self.opts['pkgdir'], relpath)
164e0b
-            try:
164e0b
-                st = os.stat(filepath)
164e0b
-            except OSError:
164e0b
-                #file missing -- ignore
164e0b
-                return
164e0b
-            if not stat.S_ISREG(st.st_mode):
164e0b
-                #ignore non files
164e0b
-                return
164e0b
-            #check size and mtime
164e0b
-            if st.st_size != size:
164e0b
-                if self.opts.get('verbose'):
164e0b
-                    print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
164e0b
-                return
164e0b
-            if int(st.st_mtime) != mtime:
164e0b
-                if self.opts.get('verbose'):
164e0b
-                    print _("Modification time changed for %s") % filepath
164e0b
-                return
164e0b
-        #otherwise we index
164e0b
-        self.basenodes[relpath] = top
164e0b
-        self.pkg_ids[relpath] = pkgid
164e0b
-
164e0b
-    def _handleFiles(self, node):
164e0b
-        pkgid = node.prop('pkgid')
164e0b
-        if pkgid:
164e0b
-            self.filesnodes[pkgid] = node
164e0b
-
164e0b
-    def _handleOther(self, node):
164e0b
-        pkgid = node.prop('pkgid')
164e0b
-        if pkgid:
164e0b
-            self.othernodes[pkgid] = node
164e0b
+            if do_stat:
164e0b
+                filepath = os.path.join(self.opts['pkgdir'], relpath)
164e0b
+                try:
164e0b
+                    st = os.stat(filepath)
164e0b
+                except OSError:
164e0b
+                    #file missing -- ignore
164e0b
+                    continue
164e0b
+                if not stat.S_ISREG(st.st_mode):
164e0b
+                    #ignore non files
164e0b
+                    continue
164e0b
+                #check size and mtime
164e0b
+                if st.st_size != size:
164e0b
+                    if self.opts.get('verbose'):
164e0b
+                        print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
164e0b
+                    continue
164e0b
+                if int(st.st_mtime) != mtime:
164e0b
+                    if self.opts.get('verbose'):
164e0b
+                        print _("Modification time changed for %s") % filepath
164e0b
+                    continue
164e0b
+
164e0b
+            self.pkg_tups_by_path[relpath] = thispo.pkgtup
164e0b
+
164e0b
 
164e0b
-    def getNodes(self, relpath):
164e0b
-        """Return base, filelist, and other nodes for file, if they exist
164e0b
 
164e0b
-        Returns a tuple of nodes, or None if not found
164e0b
+    def getNodes(self, relpath):
164e0b
+        """return a package object based on relative path of pkg
164e0b
         """
164e0b
-        bnode = self.basenodes.get(relpath,None)
164e0b
-        if bnode is None:
164e0b
-            return None
164e0b
-        pkgid = self.pkg_ids.get(relpath,None)
164e0b
-        if pkgid is None:
164e0b
-            print _("No pkgid found for: %s") % relpath
164e0b
-            return None
164e0b
-        fnode = self.filesnodes.get(pkgid,None)
164e0b
-        if fnode is None:
164e0b
-            return None
164e0b
-        onode = self.othernodes.get(pkgid,None)
164e0b
-        if onode is None:
164e0b
-            return None
164e0b
-        return bnode, fnode, onode
164e0b
-
164e0b
-    def freeNodes(self,relpath):
164e0b
-        #causing problems
164e0b
-        """Free up nodes corresponding to file, if possible"""
164e0b
-        bnode = self.basenodes.get(relpath,None)
164e0b
-        if bnode is None:
164e0b
-            print "Missing node for %s" % relpath
164e0b
-            return
164e0b
-        bnode.unlinkNode()
164e0b
-        bnode.freeNode()
164e0b
-        del self.basenodes[relpath]
164e0b
-        pkgid = self.pkg_ids.get(relpath,None)
164e0b
-        if pkgid is None:
164e0b
-            print _("No pkgid found for: %s") % relpath
164e0b
-            return None
164e0b
-        del self.pkg_ids[relpath]
164e0b
-        dups = self.pkgrefs.get(pkgid)
164e0b
-        dups.remove(relpath)
164e0b
-        if len(dups):
164e0b
-            #still referenced
164e0b
-            return
164e0b
-        del self.pkgrefs[pkgid]
164e0b
-        for nodes in self.filesnodes, self.othernodes:
164e0b
-            node = nodes.get(pkgid)
164e0b
-            if node is not None:
164e0b
-                node.unlinkNode()
164e0b
-                node.freeNode()
164e0b
-                del nodes[pkgid]
164e0b
+        if relpath in self.pkg_tups_by_path:
164e0b
+            pkgtup = self.pkg_tups_by_path[relpath]
164e0b
+            return self._repo.sack.searchPkgTuple(pkgtup)[0]
164e0b
+        return None
164e0b
 
164e0b
+    
164e0b
 
164e0b
 if __name__ == "__main__":
164e0b
     cwd = os.getcwd()
164e0b
@@ -209,9 +117,9 @@ if __name__ == "__main__":
164e0b
             'pkgdir': cwd}
164e0b
 
164e0b
     idx = MetadataIndex(cwd, opts)
164e0b
-    for fn in idx.basenodes.keys():
164e0b
-        a,b,c, = idx.getNodes(fn)
164e0b
-        a.serialize()
164e0b
-        b.serialize()
164e0b
-        c.serialize()
164e0b
-        idx.freeNodes(fn)
164e0b
+    for fn in idx.pkg_tups_by_path:
164e0b
+        po = idx.getNodes(fn)
164e0b
+        print po.xml_dump_primary_metadata()
164e0b
+        print po.xml_dump_filelists_metadata()
164e0b
+        print po.xml_dump_other_metadata()
164e0b
+
164e0b
diff --git a/createrepo/utils.py b/createrepo/utils.py
164e0b
index 995c3b9..b0d92ec 100644
164e0b
--- a/createrepo/utils.py
164e0b
+++ b/createrepo/utils.py
164e0b
@@ -23,6 +23,12 @@ import bz2
164e0b
 import gzip
164e0b
 from gzip import write32u, FNAME
164e0b
 from yum import misc
164e0b
+_available_compression = ['gz', 'bz2']
164e0b
+try:
164e0b
+    import lzma
164e0b
+    _available_compression.append('xz')
164e0b
+except ImportError:
164e0b
+    lzma = None
164e0b
 
164e0b
 def errorprint(stuff):
164e0b
     print >> sys.stderr, stuff
164e0b
@@ -34,22 +40,14 @@ def _(args):
164e0b
 
164e0b
 class GzipFile(gzip.GzipFile):
164e0b
     def _write_gzip_header(self):
164e0b
+        # Generate a header that is easily reproduced with gzip -9 -n on
164e0b
+        # an unix-like system
164e0b
         self.fileobj.write('\037\213')             # magic header
164e0b
         self.fileobj.write('\010')                 # compression method
164e0b
-        if hasattr(self, 'name'):
164e0b
-            fname = self.name[:-3]
164e0b
-        else:
164e0b
-            fname = self.filename[:-3]
164e0b
-        flags = 0
164e0b
-        if fname:
164e0b
-            flags = FNAME
164e0b
-        self.fileobj.write(chr(flags))
164e0b
-        write32u(self.fileobj, long(0))
164e0b
-        self.fileobj.write('\002')
164e0b
-        self.fileobj.write('\377')
164e0b
-        if fname:
164e0b
-            self.fileobj.write(fname + '\000')
164e0b
-
164e0b
+        self.fileobj.write('\000')                 # flags
164e0b
+        write32u(self.fileobj, long(0))            # timestamp
164e0b
+        self.fileobj.write('\002')                 # max compression
164e0b
+        self.fileobj.write('\003')                 # UNIX
164e0b
 
164e0b
 def _gzipOpen(filename, mode="rb", compresslevel=9):
164e0b
     return GzipFile(filename, mode, compresslevel)
164e0b
@@ -69,6 +67,75 @@ def bzipFile(source, dest):
164e0b
     s_fn.close()
164e0b
 
164e0b
 
164e0b
+def xzFile(source, dest):
164e0b
+    if not 'xz' in _available_compression:
164e0b
+        raise MDError, "Cannot use xz for compression, library/module is not available"
164e0b
+        
164e0b
+    s_fn = open(source, 'rb')
164e0b
+    destination = lzma.LZMAFile(dest, 'w')
164e0b
+
164e0b
+    while True:
164e0b
+        data = s_fn.read(1024000)
164e0b
+
164e0b
+        if not data: break
164e0b
+        destination.write(data)
164e0b
+
164e0b
+    destination.close()
164e0b
+    s_fn.close()
164e0b
+
164e0b
+def gzFile(source, dest):
164e0b
+        
164e0b
+    s_fn = open(source, 'rb')
164e0b
+    destination = GzipFile(dest, 'w')
164e0b
+
164e0b
+    while True:
164e0b
+        data = s_fn.read(1024000)
164e0b
+
164e0b
+        if not data: break
164e0b
+        destination.write(data)
164e0b
+
164e0b
+    destination.close()
164e0b
+    s_fn.close()
164e0b
+
164e0b
+
164e0b
+class Duck:
164e0b
+    def __init__(self, **attr):
164e0b
+        self.__dict__ = attr
164e0b
+
164e0b
+
164e0b
+def compressFile(source, dest, compress_type):
164e0b
+    """Compress an existing file using any compression type from source to dest"""
164e0b
+    
164e0b
+    if compress_type == 'xz':
164e0b
+        xzFile(source, dest)
164e0b
+    elif compress_type == 'bz2':
164e0b
+        bzipFile(source, dest)
164e0b
+    elif compress_type == 'gz':
164e0b
+        gzFile(source, dest)
164e0b
+    else:
164e0b
+        raise MDError, "Unknown compression type %s" % compress_type
164e0b
+    
164e0b
+def compressOpen(fn, mode='rb', compress_type=None):
164e0b
+    
164e0b
+    if not compress_type:
164e0b
+        # we are readonly and we don't give a compress_type - then guess based on the file extension
164e0b
+        compress_type = fn.split('.')[-1]
164e0b
+        if compress_type not in _available_compression:
164e0b
+            compress_type = 'gz'
164e0b
+            
164e0b
+    if compress_type == 'xz':
164e0b
+        fh = lzma.LZMAFile(fn, mode)
164e0b
+        if mode == 'w':
164e0b
+            fh = Duck(write=lambda s, write=fh.write: s != '' and write(s),
164e0b
+                      close=fh.close)
164e0b
+        return fh
164e0b
+    elif compress_type == 'bz2':
164e0b
+        return bz2.BZ2File(fn, mode)
164e0b
+    elif compress_type == 'gz':
164e0b
+        return _gzipOpen(fn, mode)
164e0b
+    else:
164e0b
+        raise MDError, "Unknown compression type %s" % compress_type
164e0b
+    
164e0b
 def returnFD(filename):
164e0b
     try:
164e0b
         fdno = os.open(filename, os.O_RDONLY)
164e0b
@@ -124,15 +191,28 @@ def encodefiletypelist(filetypelist):
164e0b
     return result
164e0b
 
164e0b
 def split_list_into_equal_chunks(seq, num_chunks):
164e0b
-    avg = len(seq) / float(num_chunks)
164e0b
-    out = []
164e0b
-    last = 0.0
164e0b
-    while last < len(seq):
164e0b
-        out.append(seq[int(last):int(last + avg)])
164e0b
-        last += avg
164e0b
-
164e0b
+    """it's used on sorted input which is then merged in order"""
164e0b
+    out = [[] for i in range(num_chunks)]
164e0b
+    for i, item in enumerate(seq):
164e0b
+        out[i % num_chunks].append(item)
164e0b
     return out
164e0b
 
164e0b
+def num_cpus_online(unknown=1):
164e0b
+    if not hasattr(os, "sysconf"):
164e0b
+        return unknown
164e0b
+
164e0b
+    if not os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
164e0b
+        return unknown
164e0b
+
164e0b
+    ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
164e0b
+    try:
164e0b
+        if int(ncpus) > 0:
164e0b
+            return ncpus
164e0b
+    except:
164e0b
+        pass
164e0b
+
164e0b
+    return unknown
164e0b
+
164e0b
 
164e0b
 class MDError(Exception):
164e0b
     def __init__(self, value=None):
164e0b
diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py
164e0b
index ac06196..f87ac6d 100644
164e0b
--- a/createrepo/yumbased.py
164e0b
+++ b/createrepo/yumbased.py
164e0b
@@ -16,6 +16,11 @@
164e0b
 
164e0b
 
164e0b
 import os
164e0b
+def _get_umask():
164e0b
+   oumask = os.umask(0)
164e0b
+   os.umask(oumask)
164e0b
+   return oumask
164e0b
+_b4rpm_oumask = _get_umask()
164e0b
 import rpm
164e0b
 import types
164e0b
 
164e0b
@@ -86,6 +91,9 @@ class CreateRepoPackage(YumLocalPackage):
164e0b
                 csumo = os.fdopen(csumo, 'w', -1)
164e0b
                 csumo.write(checksum)
164e0b
                 csumo.close()
164e0b
+                #  tempfile forces 002 ... we want to undo that, so that users
164e0b
+                # can share the cache. BZ 833350.
164e0b
+                os.chmod(tmpfilename, 0666 ^ _b4rpm_oumask)
164e0b
                 os.rename(tmpfilename, csumfile)
164e0b
             except:
164e0b
                 pass
164e0b
diff --git a/docs/createrepo.8 b/docs/createrepo.8
164e0b
index e3c4c3b..eefd4bf 100644
164e0b
--- a/docs/createrepo.8
164e0b
+++ b/docs/createrepo.8
164e0b
@@ -37,6 +37,10 @@ cache of checksums of packages in the repository. In consecutive runs of
164e0b
 createrepo over the same repository of files that do not have a complete
164e0b
 change out of all packages this decreases the processing time dramatically.
164e0b
 .br
164e0b
+.IP "\fB\--basedir\fP"
164e0b
+Basedir for path to directories in the repodata, default is the current working
164e0b
+directory.
164e0b
+.br
164e0b
 .IP "\fB\--update\fP"
164e0b
 If metadata already exists in the outputdir and an rpm is unchanged
164e0b
 (based on file size and mtime) since the metadata was generated, reuse
164e0b
@@ -49,11 +53,15 @@ skip the stat() call on a --update, assumes if the filename is the same
164e0b
 then the file is still the same (only use this if you're fairly trusting or
164e0b
 gullible).
164e0b
 .br
164e0b
+.IP "\fB\--update-md-path\fP"
164e0b
+Use the existing repodata for --update, from this path.
164e0b
+.br
164e0b
 .IP "\fB\-C --checkts\fP"
164e0b
 Don't generate repo metadata, if their timestamps are newer than its rpms.
164e0b
 This option decreases the processing time drastically again, if you happen
164e0b
 to run it on an unmodified repo, but it is (currently) mutual exclusive
164e0b
-with the --split option.
164e0b
+with the --split option. NOTE: This command will not notice when 
164e0b
+packages have been removed from repo. Use --update to handle that.
164e0b
 .br
164e0b
 .IP "\fB\--split\fP"
164e0b
 Run in split media mode. Rather than pass a single directory, take a set of
164e0b
@@ -61,7 +69,7 @@ directories corresponding to different volumes in a media set.
164e0b
 .br
164e0b
 .IP "\fB\-p --pretty\fP"
164e0b
 Output xml files in pretty format.
164e0b
-.IP "\fB\-V --version\fP"
164e0b
+.IP "\fB\--version\fP"
164e0b
 Output version.
164e0b
 .IP "\fB\-h --help\fP"
164e0b
 Show help menu.
164e0b
@@ -89,6 +97,10 @@ Include the file's checksum in the metadata filename, helps HTTP caching (defaul
164e0b
 .IP "\fB\--simple-md-filenames\fP"
164e0b
 Do not include the file's checksum in the metadata filename.
164e0b
 
164e0b
+.IP "\fB\--retain-old-md\fP"
164e0b
+Keep around the latest (by timestamp) N copies of the old repodata (so clients
164e0b
+with older repomd.xml files can still access it). Default is 0.
164e0b
+
164e0b
 .IP "\fB\--distro\fP"
164e0b
 Specify distro tags. Can be specified more than once. Optional syntax specifying a
164e0b
 cpeid(http://cpe.mitre.org/) --distro=cpeid,distrotag
164e0b
@@ -104,7 +116,16 @@ Tells createrepo to generate deltarpms and the delta metadata
164e0b
 paths to look for older pkgs to delta against. Can be specified multiple times
164e0b
 .IP "\fB\--num-deltas\fP int"
164e0b
 the number of older versions to make deltas against. Defaults to 1
164e0b
-
164e0b
+.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST
164e0b
+output the paths to the pkgs actually read useful with  --update
164e0b
+.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE
164e0b
+max size of an rpm that to run deltarpm against (in bytes)
164e0b
+.IP "\fB\--workers\fP WORKERS
164e0b
+number of workers to spawn to read rpms
164e0b
+.IP "\fB\--compress-type\fP
164e0b
+specify which compression method to use: compat (default),
164e0b
+xz (may not be available), gz, bz2.
164e0b
+.IP
164e0b
 
164e0b
 .SH "EXAMPLES"
164e0b
 Here is an example of a repository with a groups file. Note that the
164e0b
diff --git a/genpkgmetadata.py b/genpkgmetadata.py
164e0b
index 8c98191..4528bf2 100755
164e0b
--- a/genpkgmetadata.py
164e0b
+++ b/genpkgmetadata.py
164e0b
@@ -22,7 +22,7 @@
164e0b
 import os
164e0b
 import sys
164e0b
 import re
164e0b
-from optparse import OptionParser
164e0b
+from optparse import OptionParser,SUPPRESS_HELP
164e0b
 import time
164e0b
 
164e0b
 import createrepo
164e0b
@@ -37,6 +37,12 @@ def parse_args(args, conf):
164e0b
        Sanity check all the things being passed in.
164e0b
     """
164e0b
 
164e0b
+    def_workers = os.nice(0)
164e0b
+    if def_workers > 0:
164e0b
+        def_workers = 1 # We are niced, so just use a single worker.
164e0b
+    else:
164e0b
+        def_workers = 0 # zoooom....
164e0b
+
164e0b
     _def   = yum.misc._default_checksums[0]
164e0b
     _avail = yum.misc._available_checksums
164e0b
     parser = OptionParser(version = "createrepo %s" % createrepo.__version__)
164e0b
@@ -95,11 +101,13 @@ def parse_args(args, conf):
164e0b
     parser.add_option("--changelog-limit", dest="changelog_limit",
164e0b
         default=None, help="only import the last N changelog entries")
164e0b
     parser.add_option("--unique-md-filenames", dest="unique_md_filenames",
164e0b
-        help="include the file's checksum in the filename, helps with proxies",
164e0b
+        help="include the file's checksum in the filename, helps with proxies (default)",
164e0b
         default=True, action="store_true")
164e0b
-    parser.add_option("--simple-md-filenames", dest="simple_md_filenames",
164e0b
-        help="do not include the file's checksum in the filename, helps with proxies",
164e0b
-        default=False, action="store_true")
164e0b
+    parser.add_option("--simple-md-filenames", dest="unique_md_filenames",
164e0b
+        help="do not include the file's checksum in the filename",
164e0b
+        action="store_false")
164e0b
+    parser.add_option("--retain-old-md", default=0, type='int', dest='retain_old_md',
164e0b
+        help="keep around the latest (by timestamp) N copies of the old repodata")
164e0b
     parser.add_option("--distro", default=[], action="append",
164e0b
         help="distro tag and optional cpeid: --distro" "'cpeid,textname'")
164e0b
     parser.add_option("--content", default=[], dest='content_tags',
164e0b
@@ -119,10 +127,15 @@ def parse_args(args, conf):
164e0b
     parser.add_option("--max-delta-rpm-size", default=100000000,
164e0b
         dest='max_delta_rpm_size', type='int',
164e0b
         help="max size of an rpm that to run deltarpm against (in bytes)")
164e0b
-
164e0b
-    parser.add_option("--workers", default=1,
164e0b
+    parser.add_option("--workers", default=def_workers,
164e0b
         dest='workers', type='int',
164e0b
         help="number of workers to spawn to read rpms")
164e0b
+    parser.add_option("--xz", default=False,
164e0b
+        action="store_true",
164e0b
+        help=SUPPRESS_HELP)
164e0b
+    parser.add_option("--compress-type", default='compat', dest="compress_type",
164e0b
+        help="which compression type to use")
164e0b
+        
164e0b
     
164e0b
     (opts, argsleft) = parser.parse_args(args)
164e0b
     if len(argsleft) > 1 and not opts.split:
164e0b
@@ -138,6 +151,9 @@ def parse_args(args, conf):
164e0b
     else:
164e0b
         directories = argsleft
164e0b
 
164e0b
+    if opts.workers >= 128:
164e0b
+        errorprint(_('Warning: More than 128 workers is a lot. Limiting.'))
164e0b
+        opts.workers = 128
164e0b
     if opts.sumtype == 'sha1':
164e0b
         errorprint(_('Warning: It is more compatible to use sha instead of sha1'))
164e0b
 
164e0b
@@ -150,11 +166,13 @@ def parse_args(args, conf):
164e0b
         errorprint(_('--split and --checkts options are mutually exclusive'))
164e0b
         sys.exit(1)
164e0b
 
164e0b
-    if opts.simple_md_filenames:
164e0b
-        opts.unique_md_filenames = False
164e0b
-    
164e0b
     if opts.nodatabase:
164e0b
         opts.database = False
164e0b
+    
164e0b
+    # xz is just a shorthand for compress_type
164e0b
+    if opts.xz and opts.compress_type == 'compat':
164e0b
+        opts.compress_type='xz'
164e0b
+        
164e0b
         
164e0b
     # let's switch over to using the conf object - put all the opts into it
164e0b
     for opt in parser.option_list:
164e0b
@@ -240,6 +258,7 @@ def main(args):
164e0b
             if mdgen.checkTimeStamps():
164e0b
                 if mdgen.conf.verbose:
164e0b
                     print _('repo is up to date')
164e0b
+                mdgen._cleanup_tmp_repodata_dir()
164e0b
                 sys.exit(0)
164e0b
 
164e0b
         if conf.profile:
164e0b
diff --git a/mergerepo.py b/mergerepo.py
164e0b
index 05e5f5e..80cb1a8 100755
164e0b
--- a/mergerepo.py
164e0b
+++ b/mergerepo.py
164e0b
@@ -18,6 +18,7 @@
164e0b
 
164e0b
 import sys
164e0b
 import createrepo.merge
164e0b
+from createrepo.utils import MDError
164e0b
 from optparse import OptionParser
164e0b
 
164e0b
 #TODO:
164e0b
@@ -47,6 +48,9 @@ def parse_args(args):
164e0b
                       help="Do not merge group(comps) metadata")
164e0b
     parser.add_option("", "--noupdateinfo", default=False, action="store_true",
164e0b
                       help="Do not merge updateinfo metadata")
164e0b
+    parser.add_option("--compress-type", default=None, dest="compress_type",
164e0b
+                      help="which compression type to use")
164e0b
+                      
164e0b
     (opts, argsleft) = parser.parse_args(args)
164e0b
 
164e0b
     if len(opts.repos) < 2:
164e0b
@@ -77,9 +81,14 @@ def main(args):
164e0b
         rmbase.groups = False
164e0b
     if opts.noupdateinfo:
164e0b
         rmbase.updateinfo = False
164e0b
-
164e0b
-    rmbase.merge_repos()
164e0b
-    rmbase.write_metadata()
164e0b
-
164e0b
+    if opts.compress_type:
164e0b
+        rmbase.mdconf.compress_type = opts.compress_type
164e0b
+    try:
164e0b
+        rmbase.merge_repos()
164e0b
+        rmbase.write_metadata()
164e0b
+    except MDError, e:
164e0b
+        print >> sys.stderr, "Could not merge repos: %s" % e
164e0b
+        sys.exit(1)
164e0b
+        
164e0b
 if __name__ == "__main__":
164e0b
     main(sys.argv[1:])
164e0b
diff --git a/modifyrepo.py b/modifyrepo.py
164e0b
index 17094a4..bffe99a 100755
164e0b
--- a/modifyrepo.py
164e0b
+++ b/modifyrepo.py
164e0b
@@ -1,11 +1,15 @@
164e0b
 #!/usr/bin/python
164e0b
-# This tools is used to insert arbitrary metadata into an RPM repository.
164e0b
+# This tool is used to manipulate arbitrary metadata in a RPM repository.
164e0b
 # Example:
164e0b
 #           ./modifyrepo.py updateinfo.xml myrepo/repodata
164e0b
+#           or
164e0b
+#           ./modifyrepo.py --remove updateinfo.xml myrepo/repodata
164e0b
 # or in Python:
164e0b
 #           >>> from modifyrepo import RepoMetadata
164e0b
 #           >>> repomd = RepoMetadata('myrepo/repodata')
164e0b
 #           >>> repomd.add('updateinfo.xml')
164e0b
+#           or
164e0b
+#           >>> repomd.remove('updateinfo.xml')
164e0b
 #
164e0b
 # This program is free software; you can redistribute it and/or modify
164e0b
 # it under the terms of the GNU General Public License as published by
164e0b
@@ -20,11 +24,13 @@
164e0b
 # (C) Copyright 2006  Red Hat, Inc.
164e0b
 # Luke Macken <lmacken@redhat.com>
164e0b
 # modified by Seth Vidal 2008
164e0b
+# modified by Daniel Mach 2011
164e0b
 
164e0b
 import os
164e0b
 import sys
164e0b
 from createrepo import __version__
164e0b
-from createrepo.utils import checksum_and_rename, GzipFile, MDError
164e0b
+from createrepo.utils import checksum_and_rename, compressOpen, MDError
164e0b
+from createrepo.utils import _available_compression
164e0b
 from yum.misc import checksum
164e0b
 
164e0b
 from yum.repoMDObject import RepoMD, RepoMDError, RepoData
164e0b
@@ -38,7 +44,7 @@ class RepoMetadata:
164e0b
         """ Parses the repomd.xml file existing in the given repo directory. """
164e0b
         self.repodir = os.path.abspath(repo)
164e0b
         self.repomdxml = os.path.join(self.repodir, 'repomd.xml')
164e0b
-        self.checksum_type = 'sha256'
164e0b
+        self.compress_type = _available_compression[-1] # best available
164e0b
 
164e0b
         if not os.path.exists(self.repomdxml):
164e0b
             raise MDError, '%s not found' % self.repomdxml
164e0b
@@ -49,6 +55,35 @@ class RepoMetadata:
164e0b
         except RepoMDError, e:
164e0b
             raise MDError, 'Could not parse %s' % self.repomdxml
164e0b
 
164e0b
+    def _get_mdtype(self, mdname, mdtype=None):
164e0b
+        """ Get mdtype from existing mdtype or from a mdname. """
164e0b
+        if mdtype:
164e0b
+            return mdtype
164e0b
+        return mdname.split('.')[0]
164e0b
+
164e0b
+    def _print_repodata(self, repodata):
164e0b
+        """ Print repodata details. """
164e0b
+        print "           type =", repodata.type
164e0b
+        print "       location =", repodata.location[1]
164e0b
+        print "       checksum =", repodata.checksum[1]
164e0b
+        print "      timestamp =", repodata.timestamp
164e0b
+        print "  open-checksum =", repodata.openchecksum[1]
164e0b
+
164e0b
+    def _write_repomd(self):
164e0b
+        """ Write the updated repomd.xml. """
164e0b
+        outmd = file(self.repomdxml, 'w')
164e0b
+        outmd.write(self.repoobj.dump_xml())
164e0b
+        outmd.close()
164e0b
+        print "Wrote:", self.repomdxml
164e0b
+
164e0b
+    def _remove_repodata_file(self, repodata):
164e0b
+        """ Remove a file specified in repodata location """
164e0b
+        try:
164e0b
+            os.remove(repodata.location[1])
164e0b
+        except OSError, ex:
164e0b
+            if ex.errno != 2:
164e0b
+                # continue on a missing file
164e0b
+                raise MDError("could not remove file %s" % repodata.location[1])
164e0b
 
164e0b
     def add(self, metadata, mdtype=None):
164e0b
         """ Insert arbitrary metadata into this repository.
164e0b
@@ -63,8 +98,8 @@ class RepoMetadata:
164e0b
             mdname = 'updateinfo.xml'
164e0b
         elif isinstance(metadata, str):
164e0b
             if os.path.exists(metadata):
164e0b
-                if metadata.endswith('.gz'):
164e0b
-                    oldmd = GzipFile(filename=metadata, mode='rb')
164e0b
+                if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'):
164e0b
+                    oldmd = compressOpen(metadata, mode='rb')
164e0b
                 else:
164e0b
                     oldmd = file(metadata, 'r')
164e0b
                 md = oldmd.read()
164e0b
@@ -75,27 +110,32 @@ class RepoMetadata:
164e0b
         else:
164e0b
             raise MDError, 'invalid metadata type'
164e0b
 
164e0b
+        do_compress = False
164e0b
         ## Compress the metadata and move it into the repodata
164e0b
-        if not mdname.endswith('.gz'):
164e0b
-            mdname += '.gz'
164e0b
-        if not mdtype:
164e0b
-            mdtype = mdname.split('.')[0]
164e0b
-            
164e0b
+        if self.compress and mdname.split('.')[-1] not in ('gz', 'bz2', 'xz'):
164e0b
+            do_compress = True
164e0b
+            mdname += '.' + self.compress_type
164e0b
+        mdtype = self._get_mdtype(mdname, mdtype)
164e0b
+
164e0b
         destmd = os.path.join(self.repodir, mdname)
164e0b
-        newmd = GzipFile(filename=destmd, mode='wb')
164e0b
+        if do_compress:
164e0b
+            newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type)
164e0b
+        else:
164e0b
+            newmd = open(destmd, 'wb')
164e0b
+            
164e0b
         newmd.write(md)
164e0b
         newmd.close()
164e0b
         print "Wrote:", destmd
164e0b
 
164e0b
         open_csum = checksum(self.checksum_type, metadata)
164e0b
-        csum, destmd = checksum_and_rename(destmd, self.checksum_type)
164e0b
+        if self.unique_md_filenames:
164e0b
+            csum, destmd = checksum_and_rename(destmd, self.checksum_type)
164e0b
+        else:
164e0b
+            csum = checksum(self.checksum_type, destmd)
164e0b
         base_destmd = os.path.basename(destmd)
164e0b
 
164e0b
-
164e0b
-        ## Remove any stale metadata
164e0b
-        if mdtype in self.repoobj.repoData:
164e0b
-            del self.repoobj.repoData[mdtype]
164e0b
-            
164e0b
+        # Remove any stale metadata
164e0b
+        old_rd = self.repoobj.repoData.pop(mdtype, None)
164e0b
 
164e0b
         new_rd = RepoData()
164e0b
         new_rd.type = mdtype
164e0b
@@ -103,20 +143,30 @@ class RepoMetadata:
164e0b
         new_rd.checksum = (self.checksum_type, csum)
164e0b
         new_rd.openchecksum = (self.checksum_type, open_csum)
164e0b
         new_rd.size = str(os.stat(destmd).st_size)
164e0b
-        new_rd.timestamp = str(os.stat(destmd).st_mtime)
164e0b
+        new_rd.timestamp = str(int(os.stat(destmd).st_mtime))
164e0b
         self.repoobj.repoData[new_rd.type] = new_rd
164e0b
-        
164e0b
-        print "           type =", new_rd.type
164e0b
-        print "       location =", new_rd.location[1]
164e0b
-        print "       checksum =", new_rd.checksum[1]
164e0b
-        print "      timestamp =", new_rd.timestamp
164e0b
-        print "  open-checksum =", new_rd.openchecksum[1]
164e0b
-
164e0b
-        ## Write the updated repomd.xml
164e0b
-        outmd = file(self.repomdxml, 'w')
164e0b
-        outmd.write(self.repoobj.dump_xml())
164e0b
-        outmd.close()
164e0b
-        print "Wrote:", self.repomdxml
164e0b
+        self._print_repodata(new_rd)
164e0b
+        self._write_repomd()
164e0b
+
164e0b
+        if old_rd is not None and old_rd.location[1] != new_rd.location[1]:
164e0b
+            # remove the old file when overwriting metadata
164e0b
+            # with the same mdtype but different location
164e0b
+            self._remove_repodata_file(old_rd)
164e0b
+
164e0b
+    def remove(self, metadata, mdtype=None):
164e0b
+        """ Remove metadata from this repository. """
164e0b
+        mdname = metadata
164e0b
+        mdtype = self._get_mdtype(mdname, mdtype)
164e0b
+
164e0b
+        old_rd = self.repoobj.repoData.pop(mdtype, None)
164e0b
+        if old_rd is None:
164e0b
+            print "Metadata not found: %s" % mdtype
164e0b
+            return
164e0b
+
164e0b
+        self._remove_repodata_file(old_rd)
164e0b
+        print "Removed:"
164e0b
+        self._print_repodata(old_rd)
164e0b
+        self._write_repomd()
164e0b
 
164e0b
 
164e0b
 def main(args):
164e0b
@@ -124,7 +174,23 @@ def main(args):
164e0b
     # query options
164e0b
     parser.add_option("--mdtype", dest='mdtype',
164e0b
                       help="specific datatype of the metadata, will be derived from the filename if not specified")
164e0b
-    parser.usage = "modifyrepo [options] <input_metadata> <output repodata>"
164e0b
+    parser.add_option("--remove", action="store_true",
164e0b
+                      help="remove specified file from repodata")
164e0b
+    parser.add_option("--compress", action="store_true", default=True,
164e0b
+                      help="compress the new repodata before adding it to the repo (default)")
164e0b
+    parser.add_option("--no-compress", action="store_false", dest="compress",
164e0b
+                      help="do not compress the new repodata before adding it to the repo")
164e0b
+    parser.add_option("--compress-type", dest='compress_type', default='gz',
164e0b
+                      help="compression format to use")
164e0b
+    parser.add_option("-s", "--checksum", default='sha256', dest='sumtype',
164e0b
+        help="specify the checksum type to use (default: sha256)")
164e0b
+    parser.add_option("--unique-md-filenames", dest="unique_md_filenames",
164e0b
+        help="include the file's checksum in the filename, helps with proxies (default)",
164e0b
+        default=True, action="store_true")
164e0b
+    parser.add_option("--simple-md-filenames", dest="unique_md_filenames",
164e0b
+        help="do not include the file's checksum in the filename",
164e0b
+        action="store_false")
164e0b
+    parser.usage = "modifyrepo [options] [--remove] <input_metadata> <output repodata>"
164e0b
     
164e0b
     (opts, argsleft) = parser.parse_args(args)
164e0b
     if len(argsleft) != 2:
164e0b
@@ -137,11 +203,32 @@ def main(args):
164e0b
     except MDError, e:
164e0b
         print "Could not access repository: %s" % str(e)
164e0b
         return 1
164e0b
+
164e0b
+
164e0b
+    repomd.checksum_type = opts.sumtype
164e0b
+    repomd.unique_md_filenames = opts.unique_md_filenames
164e0b
+    repomd.compress = opts.compress
164e0b
+    if opts.compress_type not in _available_compression:
164e0b
+        print "Compression %s not available: Please choose from: %s" % (opts.compress_type, ', '.join(_available_compression))
164e0b
+        return 1
164e0b
+    repomd.compress_type = opts.compress_type
164e0b
+
164e0b
+    # remove
164e0b
+    if opts.remove:
164e0b
+        try:
164e0b
+            repomd.remove(metadata)
164e0b
+        except MDError, ex:
164e0b
+            print "Could not remove metadata: %s" % (metadata, str(ex))
164e0b
+            return 1
164e0b
+        return
164e0b
+
164e0b
+    # add
164e0b
     try:
164e0b
         repomd.add(metadata, mdtype=opts.mdtype)
164e0b
     except MDError, e:
164e0b
         print "Could not add metadata from file %s: %s" % (metadata, str(e))
164e0b
         return 1
164e0b
+    
164e0b
 
164e0b
 if __name__ == '__main__':
164e0b
     ret = main(sys.argv[1:])
164e0b
diff --git a/worker.py b/worker.py
164e0b
index eb35ef7..b67b5bd 100755
164e0b
--- a/worker.py
164e0b
+++ b/worker.py
164e0b
@@ -5,6 +5,7 @@ import yum
164e0b
 import createrepo
164e0b
 import os
164e0b
 import rpmUtils
164e0b
+import re
164e0b
 from optparse import OptionParser
164e0b
 
164e0b
 
164e0b
@@ -23,6 +24,8 @@ def main(args):
164e0b
     parser = OptionParser()
164e0b
     parser.add_option('--tmpmdpath', default=None, 
164e0b
                 help="path where the outputs should be dumped for this worker")
164e0b
+    parser.add_option('--pkglist', default=None, 
164e0b
+                help="file to read the pkglist from in lieu of all of them on the cli")
164e0b
     parser.add_option("--pkgoptions", default=[], action='append',
164e0b
                 help="pkgoptions in the format of key=value")
164e0b
     parser.add_option("--quiet", default=False, action='store_true',
164e0b
@@ -36,10 +39,6 @@ def main(args):
164e0b
     opts, pkgs = parser.parse_args(args)
164e0b
     external_data = {'_packagenumber': 1}
164e0b
     globalopts = {}
164e0b
-    if not opts.tmpmdpath:
164e0b
-        print >> sys.stderr, "tmpmdpath required for destination files"
164e0b
-        sys.exit(1)
164e0b
-    
164e0b
     
164e0b
     for strs in opts.pkgoptions:
164e0b
         k,v = strs.split('=')
164e0b
@@ -61,18 +60,39 @@ def main(args):
164e0b
             v = None
164e0b
         globalopts[k] = v
164e0b
 
164e0b
+    # turn off buffering on stdout
164e0b
+    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
164e0b
     
164e0b
     reldir = external_data['_reldir']
164e0b
     ts = rpmUtils.transaction.initReadOnlyTransaction()
164e0b
-    pri = open(opts.tmpmdpath + '/primary.xml' , 'w')
164e0b
-    fl = open(opts.tmpmdpath  + '/filelists.xml' , 'w')
164e0b
-    other = open(opts.tmpmdpath  + '/other.xml' , 'w')
164e0b
-    
164e0b
-    
164e0b
+    if opts.tmpmdpath:
164e0b
+        files = [open(opts.tmpmdpath + '/%s.xml' % i, 'w')
164e0b
+                 for i in ('primary', 'filelists', 'other')]
164e0b
+        def output(*xml):
164e0b
+            for fh, buf in zip(files, xml):
164e0b
+                fh.write(buf)
164e0b
+    else:
164e0b
+        def output(*xml):
164e0b
+            buf = ' '.join(str(len(i)) for i in xml)
164e0b
+            sys.stdout.write('*** %s\n' % buf)
164e0b
+            for buf in xml:
164e0b
+                sys.stdout.write(buf)
164e0b
+
164e0b
+    if opts.pkglist:
164e0b
+        for line in open(opts.pkglist,'r').readlines():
164e0b
+            line = line.strip()
164e0b
+            if re.match('^\s*\#.*', line) or re.match('^\s*$', line):
164e0b
+                continue
164e0b
+            pkgs.append(line)
164e0b
+
164e0b
+    clog_limit=globalopts.get('clog_limit', None)
164e0b
+    if clog_limit is not None:
164e0b
+         clog_limit = int(clog_limit)
164e0b
     for pkgfile in pkgs:
164e0b
         pkgpath = reldir + '/' + pkgfile
164e0b
         if not os.path.exists(pkgpath):
164e0b
             print >> sys.stderr, "File not found: %s" % pkgpath
164e0b
+            output()
164e0b
             continue
164e0b
 
164e0b
         try:
164e0b
@@ -80,20 +100,17 @@ def main(args):
164e0b
                 print "reading %s" % (pkgfile)
164e0b
 
164e0b
             pkg = createrepo.yumbased.CreateRepoPackage(ts, package=pkgpath, 
164e0b
-                                                        external_data=external_data)
164e0b
-            pri.write(pkg.xml_dump_primary_metadata())
164e0b
-            fl.write(pkg.xml_dump_filelists_metadata())
164e0b
-            other.write(pkg.xml_dump_other_metadata(clog_limit=
164e0b
-                                            globalopts.get('clog_limit', None)))
164e0b
+                                sumtype=globalopts.get('sumtype', None), 
164e0b
+                                external_data=external_data)
164e0b
+            output(pkg.xml_dump_primary_metadata(),
164e0b
+                   pkg.xml_dump_filelists_metadata(),
164e0b
+                   pkg.xml_dump_other_metadata(clog_limit=clog_limit))
164e0b
         except yum.Errors.YumBaseError, e:
164e0b
             print >> sys.stderr, "Error: %s" % e
164e0b
+            output()
164e0b
             continue
164e0b
         else:
164e0b
             external_data['_packagenumber']+=1
164e0b
         
164e0b
-    pri.close()
164e0b
-    fl.close()
164e0b
-    other.close()
164e0b
-    
164e0b
 if __name__ == "__main__":
164e0b
     main(sys.argv[1:])