# -*- coding: UTF-8 -*- """Fetch seeds from a URL collection or from bzr.""" # Copyright (c) 2004, 2005, 2006, 2008, 2009, 2011 Canonical Ltd. # # Germinate is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2, or (at your option) any # later version. # # Germinate is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Germinate; see the file COPYING. If not, write to the Free # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301, USA. from __future__ import print_function import sys import os import tempfile import atexit import logging try: from urllib.parse import urljoin from urllib.request import Request, URLError, urlopen except ImportError: from urlparse import urljoin from urllib2 import Request, URLError, urlopen import shutil import re import subprocess import codecs import io import collections import germinate.defaults from germinate.tsort import topo_sort __all__ = [ 'SeedError', 'Seed', 'SeedStructure', ] _logger = logging.getLogger(__name__) _bzr_cache_dir = None class AtomicFile(object): """Facilitate atomic writing of files. Forces UTF-8 encoding.""" def __init__(self, filename): self.filename = filename if sys.version_info[0] < 3: self.fd = codecs.open( '%s.new' % self.filename, 'w', 'UTF-8', 'replace') else: # io.open is available from Python 2.6, but we only use it with # Python 3 because it raises exceptions when passed bytes. self.fd = io.open( '%s.new' % self.filename, mode='w', encoding='UTF-8', errors='replace') def __enter__(self): return self.fd def __exit__(self, exc_type, unused_exc_value, unused_exc_tb): self.fd.close() if exc_type is None: os.rename('%s.new' % self.filename, self.filename) # Not really necessary, but reduces pychecker confusion. def write(self, s): self.fd.write(s) class SeedError(RuntimeError): """An error opening or parsing a seed.""" pass def _cleanup_bzr_cache(directory): shutil.rmtree(directory, ignore_errors=True) def _ensure_unicode(s): if isinstance(s, unicode): return s else: return unicode(s, "utf8", "replace") class Seed(object): """A single seed from a collection.""" def _open_seed(self, base, branch, name, bzr=False): path = os.path.join(base, branch) if not path.endswith('/'): path += '/' if bzr: global _bzr_cache_dir if _bzr_cache_dir is None: _bzr_cache_dir = tempfile.mkdtemp(prefix='germinate-') atexit.register(_cleanup_bzr_cache, _bzr_cache_dir) checkout = os.path.join(_bzr_cache_dir, branch) if not os.path.isdir(checkout): command = ['bzr'] # https://bugs.launchpad.net/bzr/+bug/39542 if path.startswith('http:'): command.append('branch') _logger.info("Fetching branch of %s", path) else: command.extend(['checkout', '--lightweight']) _logger.info("Checking out %s", path) command.extend([path, checkout]) status = subprocess.call(command) if status != 0: raise SeedError("Command failed with exit status %d:\n" " '%s'" % (status, ' '.join(command))) return open(os.path.join(checkout, name)) else: url = urljoin(path, name) _logger.info("Downloading %s", url) req = Request(url) req.add_header('Cache-Control', 'no-cache') req.add_header('Pragma', 'no-cache') return urlopen(req) def __init__(self, bases, branches, name, bzr=False): """Read a seed from a collection.""" if isinstance(branches, basestring): branches = [branches] self._name = name self._base = None self._branch = None self._file = None fd = None ssh_host = None for base in bases: for branch in branches: try: fd = self._open_seed(base, branch, name, bzr) self._base = base self._branch = branch break except SeedError: ssh_match = re.match( r'bzr\+ssh://(?:[^/]*?@)?(.*?)(?:/|$)', base) if ssh_match: ssh_host = ssh_match.group(1) except (OSError, IOError, URLError): pass if fd is not None: break if fd is None: if bzr: _logger.warning("Could not open %s from checkout of (any of):", name) for base in bases: for branch in branches: _logger.warning(' %s' % os.path.join(base, branch)) if ssh_host is not None: _logger.error("Do you need to set your user name on %s?", ssh_host) _logger.error("Try a section such as this in " "~/.ssh/config:") _logger.error("") _logger.error("Host %s", ssh_host) _logger.error(" User YOUR_USER_NAME") else: _logger.warning("Could not open (any of):") for base in bases: for branch in branches: path = os.path.join(base, branch) if not path.endswith('/'): path += '/' _logger.warning(' %s' % urljoin(path, name)) raise SeedError("Could not open %s" % name) try: self._text = fd.read() # In Python 3, we need to decode seed text read from URLs. if sys.version_info[0] >= 3 and isinstance(self._text, bytes): self._text = self._text.decode(errors="replace") finally: fd.close() def open(self): """Open a file object with the text of this seed.""" if sys.version_info[0] < 3: self._file = io.BytesIO(self._text) else: self._file = io.StringIO(self._text) return self._file def read(self, *args, **kwargs): """Read text from this seed.""" return self._file.read(*args, **kwargs) def readline(self, *args, **kwargs): """Read a line from this seed.""" return self._file.readline(*args, **kwargs) def readlines(self, *args, **kwargs): """Read a list of lines from this seed.""" return self._file.readlines(*args, **kwargs) def next(self): """Read the next line from this seed.""" return self._file.next() def close(self): """Close the file object for this seed.""" self._file.close() def __enter__(self): """Open a seed context, returning a file object.""" return self.open() def __exit__(self, unused_exc_type, unused_exc_value, unused_exc_tb): """Close a seed context.""" self.close() @property def name(self): """The seed's name.""" return self._name @property def base(self): """The base URL where this seed was found.""" return self._base @property def branch(self): """The name of the branch containing this seed.""" return self._branch @property def text(self): """The text of this seed.""" return self._text def __lt__(self, other): if not isinstance(other, Seed): return NotImplemented return self.text < other.text def __le__(self, other): if not isinstance(other, Seed): return NotImplemented return self.text <= other.text def __eq__(self, other): if not isinstance(other, Seed): return NotImplemented return self.text == other.text def __ne__(self, other): if not isinstance(other, Seed): return NotImplemented return self.text != other.text def __ge__(self, other): if not isinstance(other, Seed): return NotImplemented return self.text >= other.text def __gt__(self, other): if not isinstance(other, Seed): return NotImplemented return self.text > other.text __hash__ = None class CustomSeed(Seed): """A seed created from custom input data.""" def __init__(self, name, entries): self._name = name self._base = None self._branch = None self._text = '\n'.join(entries) + '\n' class SingleSeedStructure(object): """A single seed collection structure file. The input data is an ordered sequence of lines as follows: SEED:[ INHERITED] INHERITED is a space-separated list of seeds from which SEED inherits. For example, "ship: base desktop" indicates that packages in the "ship" seed may depend on packages in the "base" or "desktop" seeds without requiring those packages to appear in the "ship" output. INHERITED may be empty. The lines should be topologically sorted with respect to inheritance, with inherited-from seeds at the start. Any line as follows: include BRANCH causes another seed branch to be included. Seed names will be resolved in included branches if they cannot be found in the current branch. This is for internal use; applications should use the SeedStructure class instead. """ def __init__(self, branch, f): """Parse a single seed structure file.""" self.seed_order = [] self.inherit = {} self.branches = [branch] self.lines = [] self.features = set() for line in f: line = line.strip() if not line: continue if line.startswith('#'): continue words = line.split() if words[0].endswith(':'): seed = words[0][:-1] if '/' in seed: raise SeedError( "seed name '%s' may not contain '/'" % seed) self.seed_order.append(seed) self.inherit[seed] = list(words[1:]) self.lines.append(line) elif words[0] == 'include': self.branches.extend(words[1:]) elif words[0] == 'feature': self.features.update(words[1:]) else: _logger.error("Unparseable seed structure entry: %s", line) class SeedStructure(collections.Mapping, object): """The full structure of a seed collection. This deals with acquiring the seed structure files and recursively acquiring any seed structure files it includes. """ def __init__(self, branch, seed_bases=None, bzr=False): """Open a seed collection and read all the seeds it contains.""" if seed_bases is None: if bzr: seed_bases = germinate.defaults.seeds_bzr else: seed_bases = germinate.defaults.seeds seed_bases = seed_bases.split(',') self._seed_bases = seed_bases self._branch = branch self._bzr = bzr self._features = set() self._seed_order, self._inherit, branches, self._lines = \ self._parse(self._branch, set()) self._seeds = {} for seed in self._seed_order: self._seeds[seed] = Seed(seed_bases, branches, seed, bzr=bzr) self._expand_inheritance() def _parse(self, branch, got_branches): all_seed_order = [] all_inherit = {} all_branches = [] all_structure = [] # Fetch this one with Seed(self._seed_bases, branch, "STRUCTURE", self._bzr) as seed: structure = SingleSeedStructure(branch, seed) got_branches.add(branch) # Recursively expand included branches for child_branch in structure.branches: if child_branch in got_branches: continue (child_seed_order, child_inherit, child_branches, child_structure) = self._parse(child_branch, got_branches) all_seed_order.extend(child_seed_order) all_inherit.update(child_inherit) for grandchild_branch in child_branches: if grandchild_branch not in all_branches: all_branches.append(grandchild_branch) for child_structure_line in child_structure: child_structure_name = child_structure_line.split()[0][:-1] for i in range(len(all_structure)): if all_structure[i].split()[0][:-1] == child_structure_name: del all_structure[i] break all_structure.append(child_structure_line) # Attach the main branch's data to the end all_seed_order.extend(structure.seed_order) all_inherit.update(structure.inherit) for child_branch in structure.branches: if child_branch not in all_branches: all_branches.append(child_branch) for structure_line in structure.lines: structure_name = structure_line.split()[0][:-1] for i in range(len(all_structure)): if all_structure[i].split()[0][:-1] == structure_name: del all_structure[i] break all_structure.append(structure_line) self._features.update(structure.features) # We generally want to process branches in reverse order, so that # later branches can override seeds from earlier branches all_branches.reverse() return all_seed_order, all_inherit, all_branches, all_structure def _expand_inheritance(self): """Expand out incomplete inheritance lists.""" self._original_inherit = dict(self._inherit) self._names = topo_sort(self._inherit) for name in self._names: seen = set() new_inherit = [] for inheritee in self._inherit[name]: for expanded in self._inherit[inheritee]: if expanded not in seen: new_inherit.append(expanded) seen.add(expanded) if inheritee not in seen: new_inherit.append(inheritee) seen.add(inheritee) self._inherit[name] = new_inherit def limit(self, seeds): """Restrict the seeds we care about to this list.""" self._names = [] for name in seeds: for inherit in self._inherit[name]: if inherit not in self._names: self._names.append(inherit) if name not in self._names: self._names.append(name) def add(self, name, entries, parent): """Add a custom seed.""" self._names.append(name) self._inherit[name] = self._inherit[parent] + [parent] self._seeds[name] = CustomSeed(name, entries) def inner_seeds(self, seedname): """Return this seed and the seeds from which it inherits.""" innerseeds = list(self._inherit[seedname]) innerseeds.append(seedname) return innerseeds def strictly_outer_seeds(self, seedname): """Return the seeds that inherit from this seed.""" outerseeds = [] for seed in self._names: if seedname in self._inherit[seed]: outerseeds.append(seed) return outerseeds def outer_seeds(self, seedname): """Return this seed and the seeds that inherit from it.""" outerseeds = [seedname] outerseeds.extend(self.strictly_outer_seeds(seedname)) return outerseeds def __iter__(self): """Return an iterator over the seeds in this collection.""" return iter(self._seeds) def __len__(self): """Return the number of seeds in this collection.""" return len(self._seeds) def __getitem__(self, seedname): """Get a particular seed from this collection.""" return self._seeds[seedname] @property def branch(self): """The name of this seed collection branch.""" return self._branch @property def features(self): """The feature flags set for this seed collection.""" return set(self._features) @property def supported(self): """The name of the "supported" seed (the last one in the structure).""" return self._seed_order[-1] @property def names(self): """All the seed names in this collection.""" return list(self._names) def write(self, filename): """Write the text of the seed STRUCTURE file.""" with AtomicFile(filename) as f: for line in self._lines: print(_ensure_unicode(line), file=f) def write_dot(self, filename): """Write a dot file representing this structure.""" with AtomicFile(filename) as dotfile: print("digraph structure {", file=dotfile) print(" node [color=lightblue2, style=filled];", file=dotfile) for seed in self._seed_order: if seed not in self._original_inherit: continue for inherit in self._original_inherit[seed]: print(" \"%s\" -> \"%s\";" % (inherit, seed), file=dotfile) print("}", file=dotfile) def write_seed_text(self, filename, seedname): """Write the text of a seed in this collection.""" with AtomicFile(filename) as f: with self._seeds[seedname] as seed: for line in seed: print(_ensure_unicode(line.rstrip('\n')), file=f)