Package rhnpush :: Module archive
[hide private]
[frames] | no frames]

Source Code for Module rhnpush.archive

  1  # 
  2  # Copyright (c) 2008--2018 Red Hat, Inc. 
  3  # 
  4  # This software is licensed to you under the GNU General Public License, 
  5  # version 2 (GPLv2). There is NO WARRANTY for this software, express or 
  6  # implied, including the implied warranties of MERCHANTABILITY or FITNESS 
  7  # FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2 
  8  # along with this software; if not, see 
  9  # http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. 
 10  # 
 11  # Red Hat trademarks are not licensed under GPLv2. No permission is 
 12  # granted to use or replicate Red Hat trademarks that are incorporated 
 13  # in this software or its documentation. 
 14  # 
 15   
 16  """Archive Parsing module""" 
 17   
 18  import os 
 19  import subprocess 
 20  import shutil 
 21  import tempfile 
 22  import select 
 23  import zipfile 
 24  import tarfile 
 25  import sys 
 26   
 27  if not hasattr(zipfile, 'ZIP64_LIMIT'): 
 28      sys.stderr.write("%s requires zipfile with ZIP64 support.\n" % sys.argv[0]) 
 29      sys.exit(3) 
 30   
 31  # exceptions ------------------------------------------------------------- 
 32   
 33   
34 -class ArchiveException(Exception):
35 pass
36 37
38 -class DecompressionError(ArchiveException):
39 pass
40 41
42 -class UnknownArchiveError(ArchiveException):
43 pass
44 45
46 -class InvalidArchiveError(ArchiveException):
47 pass
48 49 # base archive parsing class --------------------------------------------- 50 51 52 # pylint: disable=bad-option-value,useless-object-inheritance
53 -class ArchiveParser(object):
54 55 """Explode an zip or (compressed) tar archive and parse files and 56 directories contained therein""" 57 58 # constructor -------------------------------------------------------- 59
60 - def __init__(self, archive, tempdir="/tmp/"):
61 """Initialize an archive parser""" 62 assert os.path.exists(archive) 63 64 self._archive = archive 65 self._archive_dir = None 66 67 # bug 164756: allow optional working directory 68 self._parent_dir = tempdir 69 70 # bug: 171086: support for older versions of tempfile (ie python 2.2) 71 tempfile.tempdir = tempdir 72 self._temp_dir = tempfile.mktemp() 73 os.mkdir(self._temp_dir, int('0700', 8)) 74 75 self._explode()
76 77 # destructor --------------------------------------------------------- 78
79 - def __del__(self):
80 """Cleanup temporary files and directories""" 81 82 if hasattr(self, "_temp_dir") and os.path.isdir(self._temp_dir): 83 shutil.rmtree(self._temp_dir, ignore_errors=True)
84 85 # methods called by constructor -------------------------------------- 86
87 - def _get_archive_dir(self):
88 """[internal] find the archive's top level directory name""" 89 90 raise NotImplementedError("ArchiveParser: abstract base class method '_get_archive_dir'")
91
92 - def _explode_cmd(self):
93 """[internal] find the appropriate command to open the archive""" 94 95 raise NotImplementedError("ArchiveParser: abstract base class method '_explode_cmd'")
96
97 - def _explode(self):
98 """[internal] Explode a archive for neutral parsing""" 99 100 cmd = self._explode_cmd() 101 102 assert self._archive is not None # assigned in _copy_archive 103 assert self._archive_dir is not None # assigned in _explode_cmd 104 105 if cmd: 106 _my_popen(cmd) 107 108 if os.path.isdir(self._archive_dir): 109 return 110 111 raise InvalidArchiveError("Archive did not expand to %s" % self._archive_dir) 112 113 raise InvalidArchiveError("Could not find command to open archive: %s" % self._archive)
114 115 # private helper methods --------------------------------------------- 116
117 - def _find(self, filename):
118 """[internal] Returns the relative path to a file in the archive""" 119 120 file_path = None 121 contents = os.listdir(self._archive_dir) 122 123 while contents: 124 entry = contents.pop() 125 path = os.path.join(self._archive_dir, entry) 126 127 if os.path.isdir(path): 128 p_contents = os.listdir(path) 129 e_contents = [os.path.join(entry, e) for e in p_contents] 130 # this really is something of a hack, the newest contents will 131 # 'prepended' to the queue instead of 'appended' changing the 132 # search into depth-first when I think breadth-first would be 133 # the expected behavior 134 # that's what we get for programming in python which doesn't 135 # supply a nice way of adding real data-structure support 136 # I already tried extending e_contents with contents and then 137 # reassigning the contents reference to e_contents, but the 138 # damn while loop still had a hold of the original reference 139 contents.extend(e_contents) 140 else: 141 if entry.endswith(filename): 142 file_path = entry 143 break 144 145 else: 146 # if __degug__: sys.stderr.write("[_find] '%s' not found\n" % file) 147 pass 148 149 return file_path
150 151 # public api --------------------------------------------------------- 152
153 - def list(self, prefix=""):
154 """Return a tuple of directories and files in the archive at the given 155 directory: prefix""" 156 157 dirname = os.path.join(self._archive_dir, prefix) 158 assert os.path.isdir(dirname) 159 160 l = os.listdir(dirname) 161 162 d = [] 163 f = [] 164 165 for i in l: 166 if os.path.isdir(os.path.join(dirname, i)): 167 d.append(i) 168 else: 169 f.append(i) 170 171 return (d, f)
172
173 - def contains(self, filename):
174 """Returns true iff the file is contained in the archive""" 175 return self._find(filename) is not None
176
177 - def read(self, filename):
178 """Returns the contents of the file, or None on error 179 First occurence of that file in archive is returned 180 """ 181 182 f = self._find(filename) 183 if f: 184 return self.direct_read(f) 185 186 return None
187
188 - def direct_read(self, filename):
189 """ Returns the contens of the file, file is relative path in archive. 190 Top most level (_get_archive_dir) is automaticaly added. 191 """ 192 # pylint: disable=W0703 193 f = os.path.join(os.path.abspath(self._archive_dir), filename) 194 contents = None 195 196 if os.path.isfile(f) and os.access(f, os.R_OK): 197 try: 198 fd = open(f) 199 contents = fd.read() 200 fd.close() 201 except Exception: 202 contents = None 203 204 return contents
205
206 - def zip(self, prefix=""):
207 """Create a zip archive of a (sub-)directory of the archive""" 208 209 dirname = os.path.join(self._archive_dir, prefix) 210 zip_dir = os.path.basename(dirname) 211 parent_dir = os.path.dirname(dirname) 212 213 cwd = os.getcwd() 214 os.chdir(parent_dir) 215 216 zip_file = os.path.join(self._parent_dir, "%s.zip" % zip_dir) 217 fd = zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED) 218 for base, _dirs, files in os.walk(zip_dir): 219 fd.write(base) 220 for f in files: 221 fd.write(os.path.join(base, f)) 222 223 os.chdir(cwd) 224 225 if os.path.isfile(zip_file): 226 return zip_file 227 228 return None
229
230 - def cpio(self, prefix):
231 """Create a cpio archive of a (sub-)directory of the archive""" 232 233 cpio_file = os.path.join(self._temp_dir, "%s.pkg" % prefix) 234 235 cmd = "pkgtrans -s %s %s %s" % (self._archive_dir, cpio_file, prefix) 236 _my_popen(cmd) 237 238 if os.path.isfile(cpio_file): 239 return cpio_file 240 241 return None
242 243 # parser for zip archives ------------------------------------------------ 244 245
246 -class ZipParser(ArchiveParser):
247
248 - def __init__(self, archive, tempdir="/tmp/"):
249 self.zip_file = zipfile.ZipFile(archive, 'r') 250 ArchiveParser.__init__(self, archive, tempdir)
251
252 - def _get_archive_dir(self):
253 return self.zip_file.namelist()[0]
254
255 - def _explode(self):
256 """Explode zip archive""" 257 self._archive_dir = os.path.join(self._temp_dir, 258 self._get_archive_dir()).rstrip('/') 259 260 try: 261 self.zip_file.extractall(self._temp_dir) 262 except Exception: 263 e = sys.exc_info()[1] 264 raise InvalidArchiveError("Archive did not expand to %s: %s" % 265 (self._archive_dir, str(e)))
266
267 - def _explode_cmd(self):
268 pass
269 270 # parser for tar archives ------------------------------------------------ 271 272
273 -class TarParser(ArchiveParser):
274
275 - def __init__(self, archive, tempdir="/tmp/"):
276 self.tar_file = tarfile.open(archive, 'r') 277 ArchiveParser.__init__(self, archive, tempdir)
278
279 - def _get_archive_dir(self):
280 return self.tar_file.getnames()[0]
281
282 - def _explode(self):
283 """Explode tar archive""" 284 self._archive_dir = os.path.join(self._temp_dir, self._get_archive_dir()) 285 286 try: 287 self.tar_file.extractall(path=self._temp_dir) 288 except Exception: 289 e = sys.exc_info()[1] 290 raise InvalidArchiveError("Archive did not expand to %s: %s" % 291 (self._archive_dir, str(e)))
292
293 - def _explode_cmd(self):
294 pass
295 296 # parser for cpio archives ----------------------------------------------- 297 298
299 -class CpioParser(ArchiveParser):
300
301 - def _get_archive_dir(self):
302 return os.path.basename(self._archive)[0:5] # arbitrary
303
304 - def _explode_cmd(self):
305 """Return the appropriate command for exploding a cpio archive""" 306 307 self._archive_dir = os.path.join(self._temp_dir, self._get_archive_dir()) 308 309 if not _has_executable("pkgtrans"): 310 raise ArchiveException("cannot open %s, 'pkgtrans' not found" % self._archive) 311 312 return "cd %s; mkdir %s; pkgtrans %s %s all" % \ 313 (self._temp_dir, self._archive_dir, self._archive, self._archive_dir)
314 315 # internal helper methods ------------------------------------------------ 316 317
318 -def _has_executable(exc):
319 """Return true if the executable is found in the $PATH""" 320 321 # flag the error condition, this will evaluate to False 322 if "PATH" not in os.environ: 323 return None 324 325 # this is posix specific 326 dirs = os.environ["PATH"].split(':') 327 328 for dirname in dirs: 329 path = os.path.join(dirname, exc) 330 if os.access(path, os.X_OK): 331 return True 332 333 return False
334 335
336 -def _my_popen(cmd):
337 """Execute a command as a subprocess and return its exit status""" 338 339 # pylint: disable=E1101 340 popen = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 341 stderr=subprocess.PIPE, close_fds=True, shell=True) 342 popen.stdin.close() 343 344 txt = "" 345 while 1: 346 rd, _wr, ex = select.select([popen.stdout, popen.stderr], [], [popen.stdout, popen.stderr], 5) 347 if ex: 348 txt += popen.stdout.read() 349 txt += popen.stderr.read() 350 break 351 if rd: 352 txt += rd[0].read() 353 break 354 355 status = popen.wait() 356 if status != 0: 357 raise Exception("%s exited with status %s and error\n%s" % (cmd, status, txt))
358 359 360 # NOTE these next two functions rely on file magic to determine the compression 361 # and archive types. some file magic information can be found here: 362 # http://www.astro.keele.ac.uk/oldusers/rno/Computing/File_magic.html 363 364
365 -def _decompress(archive):
366 """[internal] Decompress compressed archives and return the new archive name""" 367 368 cmd = "" 369 sfx_list = None 370 371 # determine which type of compression we're dealing with, if any 372 fd = open(archive, 'r') 373 magic = fd.read(2) 374 fd.close() 375 376 if magic == "BZ": 377 cmd = "bunzip2" 378 sfx_list = (".bz2", ".bz") 379 380 elif magic == "\x1F\x9D": 381 cmd = "uncompress" 382 sfx_list = (".Z",) 383 384 elif magic == "\x1F\x8B": 385 cmd = "gunzip" 386 sfx_list = (".gz",) 387 388 # decompress the archive if it is compressed 389 if cmd: 390 391 if not _has_executable(cmd): 392 raise ArchiveException("Cannot decompress %s, '%s' not found" % (archive, cmd)) 393 394 print("Decompressing archive") 395 396 _my_popen("%s %s" % (cmd, archive)) 397 398 # remove the now invalid suffix from the archive name 399 for sfx in sfx_list: 400 if archive[-len(sfx):] == sfx: 401 archive = archive[:-len(sfx)] 402 break 403 404 return archive
405 406 # archive parser factory ------------------------------------------------- 407 408
409 -def get_archive_parser(archive, tempdir="/tmp/"):
410 """Factory function that returns an ArchiveParser object for the given archive""" 411 412 # decompress the archive 413 archive = _decompress(archive) 414 parserClass = None 415 fd = open(archive, 'r') 416 417 magic = fd.read(4) 418 if magic == "PK\x03\x04": 419 parserClass = ZipParser 420 421 fd.seek(0) 422 magic = fd.read(20) 423 if magic == "# PaCkAgE DaTaStReAm": 424 parserClass = CpioParser 425 426 fd.seek(257) 427 magic = fd.read(5) 428 if magic == "ustar": 429 parserClass = TarParser 430 431 # pre-posix tar doesn't have any standard file magic 432 if archive.endswith(".tar"): 433 parserClass = TarParser 434 435 fd.close() 436 437 if parserClass is None: 438 raise UnknownArchiveError("Wasn't able to identify: '%s'" % archive) 439 440 return parserClass(archive, tempdir)
441