/* This file is part of "reprepro" * Copyright (C) 2008 Bernhard R. Link * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301 USA */ #include #include #include #include #include #include #include #include #ifdef HAVE_LIBARCHIVE #include #include #endif #include "error.h" #include "filecntl.h" #include "chunks.h" #include "uncompression.h" #include "sourceextraction.h" struct sourceextraction { bool failed, completed; int difffile, tarfile, debiantarfile; enum compression diffcompression, tarcompression, debiancompression; /*@null@*/ char **section_p, **priority_p; }; struct sourceextraction *sourceextraction_init(char **section_p, char **priority_p) { struct sourceextraction *n; n = calloc(1, sizeof(struct sourceextraction)); if( FAILEDTOALLOC(n) ) return n; n->difffile = -1; n->tarfile = -1; n->debiantarfile = -1; n->section_p = section_p; n->priority_p = priority_p; return n; } void sourceextraction_abort(struct sourceextraction *e) { free(e); } /* with must be a string constant, no pointer! */ #define endswith(name, len, with) (len >= sizeof(with) && memcmp(name+(len+1-sizeof(with)), with, sizeof(with)-1) == 0 ) /* register a file part of this source */ void sourceextraction_setpart(struct sourceextraction *e, int i, const char *basefilename) { size_t bl = strlen(basefilename); enum compression c; if( e->failed ) return; c = compression_by_suffix(basefilename, &bl); if( endswith(basefilename, bl, ".dsc" ) ) return; else if( endswith(basefilename, bl, ".diff" ) ) { e->difffile = i; e->diffcompression = c; return; } else if( endswith(basefilename, bl, ".debian.tar" ) ) { e->debiantarfile = i; e->debiancompression = c; return; } else if( endswith(basefilename, bl, ".tar" ) ) { e->tarfile = i; e->tarcompression = c; return; } else { // TODO: errormessage e->failed = true; } } /* return the next needed file */ bool sourceextraction_needs(struct sourceextraction *e, int *ofs_p) { if( e->failed || e->completed ) return false; if( e->difffile >= 0 ) { if( !uncompression_supported(e->diffcompression) ) // TODO: errormessage return false; *ofs_p = e->difffile; return true; } else if( e->debiantarfile >= 0 ) { #ifdef HAVE_LIBARCHIVE if( !uncompression_supported(e->debiancompression) ) return false; *ofs_p = e->debiantarfile; return true; #else return false; #endif } else if( e->tarfile >= 0 ) { #ifdef HAVE_LIBARCHIVE if( !uncompression_supported(e->tarcompression) ) return false; *ofs_p = e->tarfile; return true; #else return false; #endif } else return false; } static retvalue parsediff(struct compressedfile *f, /*@null@*/char **section_p, /*@null@*/char **priority_p, bool *found_p) { size_t destlength, lines_in, lines_out; const char *p, *s; char *garbage; #define BUFSIZE 4096 char buffer[BUFSIZE]; int bytes_read, used = 0, filled = 0; auto inline bool u_getline(void); inline bool u_getline(void) { do { if( filled - used > 0 ) { char *n; p = buffer + used; n = memchr(p, '\n', filled - used); if( n != NULL ) { used += 1 + (n - p); *n = '\0'; while( --n >= p && *n == '\r' ) *n = '\0'; return true; } } else { assert( filled == used ); filled = 0; used = 0; } if( filled == BUFSIZE ) { if( used == 0 ) /* overlong line */ return false; memmove(buffer, buffer + used, filled - used); filled -= used; used = 0; } bytes_read = uncompress_read(f, buffer + filled, BUFSIZE - filled); if( bytes_read <= 0 ) return false; filled += bytes_read; } while( true ); } auto inline char u_overlinegetchar(void); inline char u_overlinegetchar(void) { const char *n; char ch; if( filled - used > 0 ) { ch = buffer[used]; } else { assert( filled == used ); used = 0; bytes_read = uncompress_read(f, buffer, BUFSIZE); if( bytes_read <= 0 ) { filled = 0; return '\0'; } filled = bytes_read; ch = buffer[0]; } if( ch == '\n' ) return '\0'; /* over rest of the line */ n = memchr(buffer + used, '\n', filled - used); if( n != NULL ) { used = 1 + (n - buffer); return ch; } used = 0; filled = 0; /* need to read more to get to the end of the line */ do { /* these lines can be long */ bytes_read = uncompress_read(f, buffer, BUFSIZE); if( bytes_read <= 0 ) return false; n = memchr(buffer, '\n', bytes_read); } while( n == NULL ); used = 1 + (n - buffer); filled = bytes_read; return ch; } /* we are assuming the exact format dpkg-source generates here... */ if( !u_getline() ) { /* empty or strange file */ *found_p = false; return RET_OK; } if( memcmp(p, "diff ", 4) == 0 ) { /* one exception is allowing diff lines, * as diff -ru adds them ... */ if( !u_getline() ) { /* strange file */ *found_p = false; return RET_OK; } } if( unlikely(memcmp(p, "--- ", 4) != 0) ) return RET_NOTHING; if( !u_getline() ) /* so short a file? */ return RET_NOTHING; if( unlikely(memcmp(p, "+++ ", 4) != 0) ) return RET_NOTHING; p += 4; s = strchr(p, '/'); if( unlikely(s == NULL) ) return RET_NOTHING; s++; /* another exception to allow diff output directly: * +++ lines might have garbage after a tab... */ garbage = strchr(s, '\t'); if( garbage != NULL ) *garbage = '\0'; destlength = s - p; /* ignore all files that are not x/debian/control */ while( strcmp(s, "debian/control") != 0 ) { if( unlikely(interrupted()) ) return RET_ERROR_INTERRUPTED; if( !u_getline() ) return RET_NOTHING; while( memcmp(p, "@@ -", 4) == 0) { if( unlikely(interrupted()) ) return RET_ERROR_INTERRUPTED; p += 4; while( *p != ',' && *p != ' ' ) { if( unlikely(*p == '\0') ) return RET_NOTHING; p++; } if( *p == ' ' ) lines_in = 1; else { p++; lines_in = 0; while( *p >= '0' && *p <= '9' ) { lines_in = 10*lines_in + (*p-'0'); p++; } } while( *p == ' ' ) p++; if( unlikely(*(p++) != '+') ) return RET_NOTHING; while( *p >= '0' && *p <= '9' ) p++; if( *p == ',' ) { p++; lines_out = 0; while( *p >= '0' && *p <= '9' ) { lines_out = 10*lines_out + (*p-'0'); p++; } } else if( *p == ' ' ) lines_out = 1; else return RET_NOTHING; while( *p == ' ' ) p++; if( unlikely(*p != '@') ) return RET_NOTHING; while( lines_in > 0 || lines_out > 0 ) { char ch; ch = u_overlinegetchar(); switch( ch ) { case '+': if( unlikely(lines_out == 0) ) return RET_NOTHING; lines_out--; break; case ' ': if( unlikely(lines_out == 0) ) return RET_NOTHING; lines_out--; /* no break */ case '-': if( unlikely(lines_in == 0) ) return RET_NOTHING; lines_in--; break; default: return RET_NOTHING; } } if( !u_getline() ) { *found_p = false; /* nothing found successfully */ return RET_OK; } } if( memcmp(p, "diff ", 4) == 0 ) { if( !u_getline() ) { /* strange file, but nothing explicitly wrong */ *found_p = false; return RET_OK; } } if( unlikely(memcmp(p, "--- ", 4) != 0) ) return RET_NOTHING; if( !u_getline() ) return RET_NOTHING; if( unlikely(memcmp(p, "+++ ", 4) != 0) ) return RET_NOTHING; p += 4; s = strchr(p, '/'); if( unlikely(s == NULL) ) return RET_NOTHING; /* another exception to allow diff output directly: * +++ lines might have garbage after a tab... */ garbage = strchr(s, '\t'); if( garbage != NULL ) *garbage = '\0'; /* if it does not always have the same directory, then * we cannot be sure it has no debian/control, so we * have to fail... */ s++; if( s != p + destlength ) return RET_NOTHING; } /* found debian/control */ if( !u_getline() ) return RET_NOTHING; if( unlikely(memcmp(p, "@@ -", 4) != 0) ) return RET_NOTHING; p += 4; p++; while( *p != ',' && *p != ' ' ) { if( unlikely(*p == '\0') ) return RET_NOTHING; p++; } if( *p == ',' ) { p++; while( *p >= '0' && *p <= '9' ) p++; } while( *p == ' ' ) p++; if( unlikely(*(p++) != '+') ) return RET_NOTHING; if( *(p++) != '1' || *(p++) != ',' ) { /* a diff not starting at the first line (or not being * more than one line) is not yet supported */ return RET_NOTHING; } lines_out = 0; while( *p >= '0' && *p <= '9' ) { lines_out = 10*lines_out + (*p-'0'); p++; } while( *p == ' ' ) p++; if( unlikely(*p != '@') ) return RET_NOTHING; while( lines_out > 0 ) { if( unlikely(interrupted()) ) return RET_ERROR_INTERRUPTED; if( !u_getline() ) return RET_NOTHING; switch( *(p++) ) { case '-': break; default: return RET_NOTHING; case ' ': case '+': if( unlikely(lines_out == 0) ) return RET_NOTHING; lines_out--; if( section_p != NULL && strncasecmp(p, "Section:", 8) == 0 ) { p += 8; while( *p == ' ' || *p == '\t' ) p++; s = p; while( *s != ' ' && *s != '\t' && *s != '\0' && *s != '\r' ) s++; if( s == p ) return RET_NOTHING; *section_p = strndup(p, s-p); if( FAILEDTOALLOC(*section_p) ) return RET_ERROR_OOM; while( *s == ' ' || *s == '\t' || *s == '\r' ) s++; if( *s != '\0' ) return RET_NOTHING; continue; } if( priority_p != NULL && strncasecmp(p, "Priority:", 9) == 0 ) { p += 9; while( *p == ' ' || *p == '\t' ) p++; s = p; while( *s != ' ' && *s != '\t' && *s != '\0' && *s != '\r' ) s++; if( s == p ) return RET_NOTHING; *priority_p = strndup(p, s-p); if( FAILEDTOALLOC(*priority_p) ) return RET_ERROR_OOM; while( *s == ' ' || *s == '\t' || *s == '\r' ) s++; if( *s != '\0' ) return RET_NOTHING; continue; } if( *p == '\0' ) { /* end of control data, we are * finished */ *found_p = true; return RET_OK; } break; } } /* cannot yet handle a .diff not containing the full control */ return RET_NOTHING; } #ifdef HAVE_LIBARCHIVE static retvalue read_source_control_file(struct sourceextraction *e, struct archive *tar, struct archive_entry *entry) { // TODO: implement... size_t size, len, controllen; ssize_t got; char *buffer, *aftercontrol; size = archive_entry_size(entry); if( size <= 0 ) return RET_NOTHING; if( size > 10*1024*1024 ) return RET_NOTHING; buffer = malloc(size+2); if( FAILEDTOALLOC(buffer) ) return RET_ERROR_OOM; len = 0; while( (got = archive_read_data(tar, buffer+len, ((size_t)size+1)-len)) > 0 && !interrupted() ) { len += got; if( len > size ) { free(buffer); return RET_NOTHING; } } if( unlikely(interrupted()) ) { free(buffer); return RET_ERROR_INTERRUPTED; } if( got < 0 ) { free(buffer); return RET_NOTHING; } buffer[len] = '\0'; // TODO: allow a saved .diff for this file applied here controllen = chunk_extract(buffer, buffer, &aftercontrol); (void)chunk_getvalue(buffer, "Section", e->section_p); (void)chunk_getvalue(buffer, "Priority", e->priority_p); free(buffer); return RET_OK; } static int compressedfile_open(UNUSED(struct archive *a), UNUSED(void *v)) { return ARCHIVE_OK; } static int compressedfile_close(UNUSED(struct archive *a), UNUSED(void *v)) { return ARCHIVE_OK; } static ssize_t compressedfile_read(UNUSED(struct archive *a), void *d, const void **buffer_p) { struct compressedfile *f = d; // TODO malloc buffer instead static char mybuffer[4096]; *buffer_p = mybuffer; return uncompress_read(f, mybuffer, 4096); } static retvalue parse_tarfile(struct sourceextraction *e, const char *filename, enum compression c, /*@out@*/bool *found_p) { struct archive *tar; struct archive_entry *entry; struct compressedfile *file; int a; retvalue r, r2; /* While an .tar, especially an .orig.tar can be very ugly (they should be * pristine upstream tars, so dpkg-source works around a lot of ugliness), * we are looking for debian/control. This is unlikely to be in an ugly * upstream tar verbatimly. */ if( !isregularfile(filename) ) return RET_NOTHING; tar = archive_read_new(); if( FAILEDTOALLOC(tar) ) { return RET_ERROR_OOM; } archive_read_support_format_tar(tar); archive_read_support_format_gnutar(tar); r = uncompress_open(&file, filename, c); if( !RET_IS_OK(r) ) { archive_read_finish(tar); return r; } a = archive_read_open(tar, file, compressedfile_open, compressedfile_read, compressedfile_close); if( a != ARCHIVE_OK ) { int err = archive_errno(tar); if( err != -EINVAL && err != 0 ) fprintf(stderr, "Error %d trying to extract control information from %s:\n" "%s\n", err, filename, archive_error_string(tar)); else fprintf(stderr, "Error trying to extract control information from %s:\n" "%s\n", filename, archive_error_string(tar)); archive_read_finish(tar); uncompress_abort(file); return RET_ERROR; } while( (a=archive_read_next_header(tar, &entry)) == ARCHIVE_OK ) { const char *name = archive_entry_pathname(entry); const char *s; bool iscontrol; if( name[0] == '.' && name[1] == '/' ) name += 2; s = strchr(name, '/'); if( s == NULL ) // TODO: is this already enough to give up totally? iscontrol = false; else iscontrol = strcmp(s+1, "debian/control") == 0 || strcmp(name, "debian/control") == 0; if( iscontrol ) { r = read_source_control_file(e, tar, entry); archive_read_finish(tar); r2 = uncompress_error(file); RET_UPDATE(r, r2); uncompress_abort(file); *found_p = true; return r; } a = archive_read_data_skip(tar); if( a != ARCHIVE_OK ) { int err = archive_errno(tar); printf("Error %d skipping %s within %s: %s\n", err, name, filename, archive_error_string(tar)); archive_read_finish(tar); if( err == 0 || err == -EINVAL ) r = RET_ERROR; else r = RET_ERRNO(err); r2 = uncompress_error(file); RET_UPDATE(r, r2); uncompress_abort(file); return r; } if( interrupted() ) return RET_ERROR_INTERRUPTED; } if( a != ARCHIVE_EOF ) { int err = archive_errno(tar); fprintf(stderr, "Error %d reading %s: %s\n", err, filename, archive_error_string(tar)); archive_read_finish(tar); if( err == 0 || err == -EINVAL ) r = RET_ERROR; else r = RET_ERRNO(err); r2 = uncompress_error(file); RET_UPDATE(r, r2); uncompress_abort(file); return r; } archive_read_finish(tar); *found_p = false; return uncompress_close(file); } #endif /* full file name of requested files ready to analyse */ retvalue sourceextraction_analyse(struct sourceextraction *e, const char *fullfilename) { retvalue r; bool found IFSTUPIDCC(= false); #ifndef HAVE_LIBARCHIVE assert( e->difffile >= 0 ); #endif if( e->difffile >= 0 ) { struct compressedfile *f; assert( uncompression_supported(e->diffcompression) ); e->difffile = -1; r = uncompress_open(&f, fullfilename, e->diffcompression); if( !RET_IS_OK(r) ) { e->failed = true; /* being unable to read a file is no hard error... */ return RET_NOTHING; } r = parsediff(f, e->section_p, e->priority_p, &found); if( RET_IS_OK(r) ) { if( !found ) r = uncompress_close(f); else { r = uncompress_error(f); uncompress_abort(f); } } else { uncompress_abort(f); } if( !RET_IS_OK(r) ) e->failed = true; else if( found ) /* do not look in the tar, we found debian/control */ e->completed = true; return r; } #ifdef HAVE_LIBARCHIVE if( e->debiantarfile >= 0 ) { e->debiantarfile = -1; r = parse_tarfile(e, fullfilename, e->debiancompression, &found); if( !RET_IS_OK(r) ) e->failed = true; else if( found ) /* do not look in the tar, we found debian/control */ e->completed = true; return r; } #endif /* if it's not the diff nor the .debian.tar, look into the .tar file: */ assert( e->tarfile >= 0 ); e->tarfile = -1; #ifdef HAVE_LIBARCHIVE r = parse_tarfile(e, fullfilename, e->tarcompression, &found); if( !RET_IS_OK(r) ) e->failed = true; else if( found ) /* do not look in the tar, we found debian/control */ e->completed = true; return r; #else return RET_NOTHING; #endif } retvalue sourceextraction_finish(struct sourceextraction *e) { if( e->completed ) { free(e); return RET_OK; } free(e); return RET_NOTHING; }