# Copyright (C) 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """Implementation of WebDAV for http transports. A Transport which complement http transport by implementing partially the WebDAV protocol to push files. This should enable remote push operations. """ from cStringIO import StringIO import os import random import re import sys import time import urllib2 import xml.sax import xml.sax.handler from bzrlib import ( errors, osutils, trace, transport, urlutils, ) from bzrlib.transport.http import ( _urllib, _urllib2_wrappers, ) class DavResponseHandler(xml.sax.handler.ContentHandler): """Handle a multi-status DAV response.""" def __init__(self): self.url = None self.elt_stack = None self.chars = None self.chars_wanted = False self.expected_content_handled = False def set_url(self, url): """Set the url used for error reporting when handling a response.""" self.url = url def startDocument(self): self.elt_stack = [] self.chars = None self.expected_content_handled = False def endDocument(self): self._validate_handling() if not self.expected_content_handled: raise errors.InvalidHttpResponse(self.url, msg='Unknown xml response') def startElement(self, name, attrs): self.elt_stack.append(self._strip_ns(name)) # The following is incorrect in the general case where elements are # intermixed with chars in a higher level element. That's not the case # here (otherwise the chars_wanted will have to be stacked too). if self.chars_wanted: self.chars = '' else: self.chars = None def endElement(self, name): self.chars = None self.chars_wanted = False self.elt_stack.pop() def characters(self, chrs): if self.chars_wanted: self.chars += chrs def _current_element(self): return self.elt_stack[-1] def _strip_ns(self, name): """Strip the leading namespace from name. We don't have namespaces clashes in our context, stripping it makes the code simpler. """ where = name.find(':') if where == -1: return name else: return name[where +1:] class DavStatHandler(DavResponseHandler): """Handle a PROPPFIND DAV response for a file or directory. The expected content is: - a multi-status element containing - a single response element containing - a href element - a propstat element containing - a status element (ignored) - a prop element containing at least (other are ignored) - a getcontentlength element (for files only) - an executable element (for files only) - a resourcetype element containing - a collection element (for directories only) """ def __init__(self): DavResponseHandler.__init__(self) # Flags defining the context for the actions self._response_seen = False self._init_response_attrs() def _init_response_attrs(self): self.href = None self.length = -1 self.executable = None self.is_dir = False def _validate_handling(self): if self.href is not None: self.expected_content_handled = True def startElement(self, name, attrs): sname = self._strip_ns(name) self.chars_wanted = sname in ('href', 'getcontentlength', 'executable') DavResponseHandler.startElement(self, name, attrs) def endElement(self, name): if self._response_seen: self._additional_response_starting(name) if self._href_end(): self.href = self.chars elif self._getcontentlength_end(): self.length = int(self.chars) elif self._executable_end(): self.executable = self.chars elif self._collection_end(): self.is_dir = True if self._strip_ns(name) == 'response': self._response_seen = True self._response_handled() DavResponseHandler.endElement(self, name) def _response_handled(self): """A response element inside a multistatus have been parsed.""" pass def _additional_response_starting(self, name): """A additional response element inside a multistatus begins.""" sname = self._strip_ns(name) if sname != 'multistatus': raise errors.InvalidHttpResponse( self.url, msg='Unexpected %s element' % name) def _href_end(self): stack = self.elt_stack return (len(stack) == 3 and stack[0] == 'multistatus' and stack[1] == 'response' and stack[2] == 'href') def _getcontentlength_end(self): stack = self.elt_stack return (len(stack) == 5 and stack[0] == 'multistatus' and stack[1] == 'response' and stack[2] == 'propstat' and stack[3] == 'prop' and stack[4] == 'getcontentlength') def _executable_end(self): stack = self.elt_stack return (len(stack) == 5 and stack[0] == 'multistatus' and stack[1] == 'response' and stack[2] == 'propstat' and stack[3] == 'prop' and stack[4] == 'executable') def _collection_end(self): stack = self.elt_stack return (len(stack) == 6 and stack[0] == 'multistatus' and stack[1] == 'response' and stack[2] == 'propstat' and stack[3] == 'prop' and stack[4] == 'resourcetype' and stack[5] == 'collection') class _DAVStat(object): """The stat info as it can be acquired with DAV.""" def __init__(self, size, is_dir, is_exec): self.st_size = size # We build a mode considering that: # - we have no idea about group or other chmod bits so we use a sane # default (bzr should not care anyway) # - we suppose that the user can write if is_dir: self.st_mode = 0040644 else: self.st_mode = 0100644 if is_exec: self.st_mode = self.st_mode | 0755 def _extract_stat_info(url, infile): """Extract the stat-like information from a DAV PROPFIND response. :param url: The url used for the PROPFIND request. :param infile: A file-like object pointing at the start of the response. """ parser = xml.sax.make_parser() handler = DavStatHandler() handler.set_url(url) parser.setContentHandler(handler) try: parser.parse(infile) except xml.sax.SAXParseException, e: raise errors.InvalidHttpResponse( url, msg='Malformed xml response: %s' % e) if handler.is_dir: size = -1 # directory sizes are meaningless for bzr is_exec = True else: size = handler.length is_exec = (handler.executable == 'T') return _DAVStat(size, handler.is_dir, is_exec) class DavListDirHandler(DavStatHandler): """Handle a PROPPFIND depth 1 DAV response for a directory.""" def __init__(self): DavStatHandler.__init__(self) self.dir_content = None def _validate_handling(self): if self.dir_content is not None: self.expected_content_handled = True def _make_response_tuple(self): if self.executable == 'T': is_exec = True else: is_exec = False return (self.href, self.is_dir, self.length, is_exec) def _response_handled(self): """A response element inside a multistatus have been parsed.""" if self.dir_content is None: self.dir_content = [] self.dir_content.append(self._make_response_tuple()) # Resest the attributes for the next response if any self._init_response_attrs() def _additional_response_starting(self, name): """A additional response element inside a multistatus begins.""" pass def _extract_dir_content(url, infile): """Extract the directory content from a DAV PROPFIND response. :param url: The url used for the PROPFIND request. :param infile: A file-like object pointing at the start of the response. """ parser = xml.sax.make_parser() handler = DavListDirHandler() handler.set_url(url) parser.setContentHandler(handler) try: parser.parse(infile) except xml.sax.SAXParseException, e: raise errors.InvalidHttpResponse( url, msg='Malformed xml response: %s' % e) # Reformat for bzr needs dir_content = handler.dir_content (dir_name, is_dir) = dir_content[0][:2] if not is_dir: raise errors.NotADirectory(url) dir_len = len(dir_name) elements = [] for (href, is_dir, size, is_exec) in dir_content[1:]: # Ignore first element if href.startswith(dir_name): name = href[dir_len:] if name.endswith('/'): # Get rid of final '/' name = name[0:-1] # We receive already url-encoded strings so down-casting is # safe. And bzr insists on getting strings not unicode strings. elements.append((str(name), is_dir, size, is_exec)) return elements class PUTRequest(_urllib2_wrappers.Request): def __init__(self, url, data, more_headers={}, accepted_errors=None): # FIXME: Accept */* ? Why ? *we* send, we do not receive :-/ headers = {'Accept': '*/*', 'Content-type': 'application/octet-stream', # FIXME: We should complete the # implementation of # htmllib.HTTPConnection, it's just a # shame (at least a waste) that we # can't use the following. # 'Expect': '100-continue', # 'Transfer-Encoding': 'chunked', } headers.update(more_headers) _urllib2_wrappers.Request.__init__(self, 'PUT', url, data, headers, accepted_errors=accepted_errors) class DavResponse(_urllib2_wrappers.Response): """Custom HTTPResponse. DAV have some reponses for which the body is of no interest. """ _body_ignored_responses = ( _urllib2_wrappers.Response._body_ignored_responses + [201, 405, 409, 412,] ) def begin(self): """Begin to read the response from the server. httplib incorrectly close the connection far too easily. Let's try to workaround that (as _urllib2 does, but for more cases...). """ _urllib2_wrappers.Response.begin(self) if self.status in (201, 204): self.will_close = False # Takes DavResponse into account: class DavHTTPConnection(_urllib2_wrappers.HTTPConnection): response_class = DavResponse class DavHTTPSConnection(_urllib2_wrappers.HTTPSConnection): response_class = DavResponse class DavConnectionHandler(_urllib2_wrappers.ConnectionHandler): """Custom connection handler. We need to use the DavConnectionHTTPxConnection class to take into account our own DavResponse objects, to be able to declare our own body ignored responses, sigh. """ def http_request(self, request): return self.capture_connection(request, DavHTTPConnection) def https_request(self, request): return self.capture_connection(request, DavHTTPSConnection) class DavOpener(_urllib2_wrappers.Opener): """Dav specific needs regarding HTTP(S)""" def __init__(self, report_activity=None): super(DavOpener, self).__init__(connection=DavConnectionHandler, report_activity=report_activity) class HttpDavTransport(_urllib.HttpTransport_urllib): """An transport able to put files using http[s] on a DAV server. We don't try to implement the whole WebDAV protocol. Just the minimum needed for bzr. """ _debuglevel = 0 _opener_class = DavOpener def is_readonly(self): """See Transport.is_readonly.""" return False def _raise_http_error(self, url, response, info=None): if info is None: msg = '' else: msg = ': ' + info raise errors.InvalidHttpResponse(url, 'Unable to handle http code %d%s' % (response.code, msg)) def _handle_common_errors(self, code, abspath): if code == 404: raise errors.NoSuchFile(abspath) def open_write_stream(self, relpath, mode=None): """See Transport.open_write_stream.""" # FIXME: this implementation sucks, we should really use chunk encoding # and buffers. self.put_bytes(relpath, "", mode) result = transport.AppendBasedFileStream(self, relpath) transport._file_streams[self.abspath(relpath)] = result return result def put_file(self, relpath, f, mode=None): """See Transport.put_file""" # FIXME: We read the whole file in memory, using chunked encoding and # counting bytes while sending them will be far better. Look at reusing # osutils.pumpfile ? # bytes = f.read() self.put_bytes(relpath, bytes, mode=None) return len(bytes) def put_bytes(self, relpath, bytes, mode=None): """Copy the bytes object into the location. Tests revealed that contrary to what is said in http://www.rfc.net/rfc2068.html, the put is not atomic. When putting a file, if the client died, a partial file may still exists on the server. So we first put a temp file and then move it. :param relpath: Location to put the contents, relative to base. :param f: File-like object. :param mode: Not supported by DAV. """ abspath = self._remote_path(relpath) # We generate a sufficiently random name to *assume* that # no collisions will occur and don't worry about it (nor # handle it). stamp = '.tmp.%.9f.%d.%d' % (time.time(), os.getpid(), random.randint(0,0x7FFFFFFF)) # A temporary file to hold all the data to guard against # client death tmp_relpath = relpath + stamp # Will raise if something gets wrong self.put_bytes_non_atomic(tmp_relpath, bytes) # Now move the temp file try: self.move(tmp_relpath, relpath) except Exception, e: # If we fail, try to clean up the temporary file # before we throw the exception but don't let another # exception mess things up. exc_type, exc_val, exc_tb = sys.exc_info() try: self.delete(tmp_relpath) except: raise exc_type, exc_val, exc_tb raise # raise the original with its traceback if we can. def put_file_non_atomic(self, relpath, f, mode=None, create_parent_dir=False, dir_mode=False): # Implementing put_bytes_non_atomic rather than put_file_non_atomic # because to do a put request, we must read all of the file into # RAM anyway. Better to do that than to have the contents, put # into a StringIO() and then read them all out again later. self.put_bytes_non_atomic(relpath, f.read(), mode=mode, create_parent_dir=create_parent_dir, dir_mode=dir_mode) def put_bytes_non_atomic(self, relpath, bytes, mode=None, create_parent_dir=False, dir_mode=False): """See Transport.put_file_non_atomic""" abspath = self._remote_path(relpath) request = PUTRequest(abspath, bytes, accepted_errors=[200, 201, 204, 403, 404, 409]) def bare_put_file_non_atomic(): response = self._perform(request) code = response.code if code in (403, 404, 409): # Intermediate directories missing raise errors.NoSuchFile(abspath) if code not in (200, 201, 204): self._raise_curl_http_error(abspath, response, 'expected 200, 201 or 204.') try: bare_put_file_non_atomic() except errors.NoSuchFile: if not create_parent_dir: raise parent_dir = osutils.dirname(relpath) if parent_dir: self.mkdir(parent_dir, mode=dir_mode) return bare_put_file_non_atomic() else: # Don't forget to re-raise if the parent dir doesn't exist raise def _put_bytes_ranged(self, relpath, bytes, at): """Append the file-like object part to the end of the location. :param relpath: Location to put the contents, relative to base. :param bytes: A string of bytes to upload :param at: The position in the file to add the bytes """ # Acquire just the needed data # TODO: jam 20060908 Why are we creating a StringIO to hold the # data, and then using data.read() to send the data # in the PUTRequest. Rather than just reading in and # uploading the data. # Also, if we have to read the whole file into memory anyway # it would be better to implement put_bytes(), and redefine # put_file as self.put_bytes(relpath, f.read()) # Once we teach httplib to do that, we will use file-like # objects (see handling chunked data and 100-continue). abspath = self._remote_path(relpath) # Content-Range is start-end/size. 'size' is the file size, not the # chunk size. We can't be sure about the size of the file so put '*' at # the end of the range instead. request = PUTRequest(abspath, bytes, {'Content-Range': 'bytes %d-%d/*' % (at, at+len(bytes)),}, accepted_errors=[200, 201, 204, 403, 404, 409]) response = self._perform(request) code = response.code if code in (403, 404, 409): raise errors.NoSuchFile(abspath) # Intermediate directories missing if code not in (200, 201, 204): self._raise_http_error(abspath, response, 'expected 200, 201 or 204.') def mkdir(self, relpath, mode=None): """See Transport.mkdir""" abspath = self._remote_path(relpath) request = _urllib2_wrappers.Request('MKCOL', abspath, accepted_errors=[201, 403, 405, 404, 409]) response = self._perform(request) code = response.code # jam 20060908: The error handling seems to be repeated for # each function. Is it possible to factor it out into # a helper rather than repeat it for each one? # (I realize there is some custom behavior) # Yes it is and will be done. if code == 403: # Forbidden (generally server misconfigured or not # configured for DAV) raise self._raise_http_error(abspath, response, 'mkdir failed') elif code == 405: # Not allowed (generally already exists) raise errors.FileExists(abspath) elif code in (404, 409): # Conflict (intermediate directories do not exist) raise errors.NoSuchFile(abspath) elif code != 201: # Created raise self._raise_http_error(abspath, response, 'mkdir failed') def rename(self, rel_from, rel_to): """Rename without special overwriting""" abs_from = self._remote_path(rel_from) abs_to = self._remote_path(rel_to) request = _urllib2_wrappers.Request('MOVE', abs_from, None, {'Destination': abs_to, 'Overwrite': 'F'}, accepted_errors=[201, 404, 409, 412]) response = self._perform(request) code = response.code if code == 404: raise errors.NoSuchFile(abs_from) if code == 412: raise errors.FileExists(abs_to) if code == 409: # More precisely some intermediate directories are missing raise errors.NoSuchFile(abs_to) if code != 201: # As we don't want to accept overwriting abs_to, 204 # (meaning abs_to was existing (but empty, the # non-empty case is 412)) will be an error, a server # bug even, since we require explicitely to not # overwrite. self._raise_http_error(abs_from, response, 'unable to rename to %r' % (abs_to)) def move(self, rel_from, rel_to): """See Transport.move""" abs_from = self._remote_path(rel_from) abs_to = self._remote_path(rel_to) request = _urllib2_wrappers.Request('MOVE', abs_from, None, {'Destination': abs_to}, accepted_errors=[201, 204, 404, 409]) response = self._perform(request) code = response.code if code == 404: raise errors.NoSuchFile(abs_from) if code == 409: raise errors.DirectoryNotEmpty(abs_to) # Overwriting allowed, 201 means abs_to did not exist, # 204 means it did exist. if code not in (201, 204): self._raise_http_error(abs_from, response, 'unable to move to %r' % (abs_to)) def delete(self, rel_path): """ Delete the item at relpath. Note that when a non-empty dir required to be deleted, a conforming DAV server will delete the dir and all its content. That does not normally happen in bzr. """ abs_path = self._remote_path(rel_path) request = _urllib2_wrappers.Request('DELETE', abs_path, accepted_errors=[200, 204, 404, 999]) response = self._perform(request) code = response.code if code == 404: raise errors.NoSuchFile(abs_path) if code != 204: self._raise_curl_http_error(curl, 'unable to delete') def copy(self, rel_from, rel_to): """See Transport.copy""" abs_from = self._remote_path(rel_from) abs_to = self._remote_path(rel_to) request = _urllib2_wrappers.Request( 'COPY', abs_from, None, {'Destination': abs_to}, accepted_errors=[201, 204, 404, 409]) response = self._perform(request) code = response.code if code in (404, 409): raise errors.NoSuchFile(abs_from) # XXX: our test server returns 201 but apache2 returns 204, need # investivation. if code not in(201, 204): self._raise_http_error(abs_from, response, 'unable to copy from %r to %r' % (abs_from,abs_to)) def copy_to(self, relpaths, other, mode=None, pb=None): """Copy a set of entries from self into another Transport. :param relpaths: A list/generator of entries to be copied. """ # DavTransport can be a target. So our simple implementation # just returns the Transport implementation. (Which just does # a put(get()) # We only override, because the default HttpTransportBase, explicitly # disabled it for HTTP return transport.Transport.copy_to(self, relpaths, other, mode=mode, pb=pb) def listable(self): """See Transport.listable.""" return True def list_dir(self, relpath): """ Return a list of all files at the given location. """ return [elt[0] for elt in self._list_tree(relpath, 1)] def _list_tree(self, relpath, depth): abspath = self._remote_path(relpath) propfind = """ """ request = _urllib2_wrappers.Request('PROPFIND', abspath, propfind, {'Depth': depth}, accepted_errors=[207, 404, 409,]) response = self._perform(request) code = response.code if code == 404: raise errors.NoSuchFile(abspath) if code == 409: # More precisely some intermediate directories are missing raise errors.NoSuchFile(abspath) if code != 207: self._raise_http_error(abspath, response, 'unable to list %r directory' % (abspath)) return _extract_dir_content(abspath, response) def lock_write(self, relpath): """Lock the given file for exclusive access. :return: A lock object, which should be passed to Transport.unlock() """ # We follow the same path as FTP, which just returns a BogusLock # object. We don't explicitly support locking a specific file. # TODO: jam 2006-09-08 SFTP implements this by opening exclusive # "relpath + '.lock_write'". Does DAV implement anything like # O_EXCL? # Alternatively, LocalTransport uses an OS lock to lock the file # and WebDAV supports some sort of locking. return self.lock_read(relpath) def rmdir(self, relpath): """See Transport.rmdir.""" content = self.list_dir(relpath) if len(content) > 0: raise errors.DirectoryNotEmpty(self._remote_path(relpath)) self.delete(relpath) def stat(self, relpath): """See Transport.stat. We provide a limited implementation for bzr needs. """ abspath = self._remote_path(relpath) propfind = """ """ request = _urllib2_wrappers.Request('PROPFIND', abspath, propfind, {'Depth': 0}, accepted_errors=[207, 404, 409,]) response = self._perform(request) code = response.code if code == 404: raise errors.NoSuchFile(abspath) if code == 409: # FIXME: Could this really occur ? # More precisely some intermediate directories are missing raise errors.NoSuchFile(abspath) if code != 207: self._raise_http_error(abspath, response, 'unable to list %r directory' % (abspath)) return _extract_stat_info(abspath, response) def iter_files_recursive(self): """Walk the relative paths of all files in this transport.""" # We get the whole tree with a single request tree = self._list_tree('.', 'Infinity') # Now filter out the directories for (name, is_dir, size, is_exex) in tree: if not is_dir: yield name def append_file(self, relpath, f, mode=None): """See Transport.append_file""" return self.append_bytes(relpath, f.read(), mode=mode) def append_bytes(self, relpath, bytes, mode=None): """See Transport.append_bytes""" if self._range_hint is not None: # TODO: We reuse the _range_hint handled by bzr core, # unless someone can show me a server implementing # range for write but not for read. But we may, on # our own, try to handle a similar flag for write # ranges supported by a given server. Or at least, # detect that ranges are not correctly handled and # fallback to no ranges. before = self._append_by_head_put(relpath, bytes) else: before = self._append_by_get_put(relpath, bytes) return before def _append_by_head_put(self, relpath, bytes): """Append without getting the whole file. When the server allows it, a 'Content-Range' header can be specified. """ response = self._head(relpath) code = response.code if code == 404: relpath_size = 0 else: # Consider the absence of Content-Length header as # indicating an existing but empty file (Apache 2.0 # does this, and there is even a comment in # modules/http/http_protocol.c calling that a *hack*, # I agree, it's a hack. On the other hand if the file # do not exist we get a 404, if the file does exist, # is not empty and we get no Content-Length header, # then the server is buggy :-/ ) relpath_size = int(response.headers.get('Content-Length', 0)) if relpath_size == 0: trace.mutter('if %s is not empty, the server is buggy' % relpath) if relpath_size: self._put_bytes_ranged(relpath, bytes, relpath_size) else: self.put_bytes(relpath, bytes) return relpath_size def _append_by_get_put(self, relpath, bytes): # So we need to GET the file first, append to it and # finally PUT back the result. full_data = StringIO() try: data = self.get(relpath) full_data.write(data.read()) except errors.NoSuchFile: # Good, just do the put then pass # Append the f content before = full_data.tell() full_data.write(bytes) full_data.seek(0) self.put_file(relpath, full_data) return before def get_smart_medium(self): # smart server and webdav are exclusive. There is really no point to # use webdav if a smart server is available raise errors.NoSmartMedium(self) def get_test_permutations(): """Return the permutations to be used in testing.""" import tests.dav_server return [(HttpDavTransport, tests.dav_server.DAVServer),]