* Copyright (c) 2003-2006, PEAR * All rights reserved. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - Neither the name of the authors, nor the names of its contributors * may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * ----- * * This file contains code adapted from PEAR's PHP_Compat library (v1.6.0a3). * * http://pear.php.net/package/PHP_Compat * * This code appears in Horde_Mime::_uudecode(). * * This code was originally released under the LGPL 2.1 * * ----- * * Copyright 1999-2014 Horde LLC (http://www.horde.org/) * * See the enclosed file COPYING for license information (LGPL). If you * did not receive this file, see http://www.horde.org/licenses/lgpl21. * * @author Chuck Hagenbuch * @author Michael Slusarz * @category Horde * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1 * @package Mime */ class Horde_Mime { /** * The RFC defined EOL string. * * @var string */ const EOL = "\r\n"; /** * The list of characters required to be quoted in MIME parameters * (regular expression). * * @since 2.1.0 * * @var string */ const MIME_PARAM_QUOTED = '/[\x01-\x20\x22\x28\x29\x2c\x2f\x3a-\x40\x5b-\x5d]/'; /** * Attempt to work around non RFC 2231-compliant MUAs by generating both * a RFC 2047-like parameter name and also the correct RFC 2231 * parameter. See: * http://lists.horde.org/archives/dev/Week-of-Mon-20040426/014240.html * * @var boolean */ static public $brokenRFC2231 = false; /** * Use windows-1252 charset when decoding ISO-8859-1 data? * * @var boolean */ static public $decodeWindows1252 = false; /** * Determines if a string contains 8-bit (non US-ASCII) characters. * * @param string $string The string to check. * @param string $charset The charset of the string. Defaults to * US-ASCII. * * @return boolean True if string contains non US-ASCII characters. */ static public function is8bit($string, $charset = null) { return ($string != Horde_String::convertCharset($string, $charset, 'US-ASCII')); } /** * MIME encodes a string (RFC 2047). * * @param string $text The text to encode (UTF-8). * @param string $charset The character set to encode to. * * @return string The MIME encoded string (US-ASCII). */ static public function encode($text, $charset = 'UTF-8') { /* The null character is valid US-ASCII, but was removed from the * allowed e-mail header characters in RFC 2822. */ if (!self::is8bit($text, 'UTF-8') && (strpos($text, null) === false)) { return $text; } $charset = Horde_String::lower($charset); $text = Horde_String::convertCharset($text, 'UTF-8', $charset); /* Get the list of elements in the string. */ $size = preg_match_all('/([^\s]+)([\s]*)/', $text, $matches, PREG_SET_ORDER); $line = ''; /* Return if nothing needs to be encoded. */ foreach ($matches as $key => $val) { if (self::is8bit($val[1], $charset)) { if ((($key + 1) < $size) && self::is8bit($matches[$key + 1][1], $charset)) { $line .= self::_encode($val[1] . $val[2], $charset) . ' '; } else { $line .= self::_encode($val[1], $charset) . $val[2]; } } else { $line .= $val[1] . $val[2]; } } return rtrim($line); } /** * Internal helper function to MIME encode a string. * * @param string $text The text to encode. * @param string $charset The character set of the text. * * @return string The MIME encoded text. */ static protected function _encode($text, $charset) { $encoded = trim(base64_encode($text)); $c_size = strlen($charset) + 7; if ((strlen($encoded) + $c_size) > 75) { $parts = explode(self::EOL, rtrim(chunk_split($encoded, intval((75 - $c_size) / 4) * 4))); } else { $parts[] = $encoded; } $p_size = count($parts); $out = ''; foreach ($parts as $key => $val) { $out .= '=?' . $charset . '?b?' . $val . '?='; if ($p_size > $key + 1) { /* RFC 2047 [2]: no encoded word can be more than 75 * characters long. If longer, you must split the word with * CRLF SPACE. */ $out .= self::EOL . ' '; } } return $out; } /** * Encodes a line via quoted-printable encoding. * * @param string $text The text to encode (UTF-8). * @param string $eol The EOL sequence to use. * @param integer $wrap Wrap a line at this many characters. * * @return string The quoted-printable encoded string. */ static public function quotedPrintableEncode($text, $eol = self::EOL, $wrap = 76) { $curr_length = 0; $output = ''; /* We need to go character by character through the data. */ for ($i = 0, $length = strlen($text); $i < $length; ++$i) { $char = $text[$i]; /* If we have reached the end of the line, reset counters. */ if ($char == "\n") { $output .= $eol; $curr_length = 0; continue; } elseif ($char == "\r") { continue; } /* Spaces or tabs at the end of the line are NOT allowed. Also, * ASCII characters below 32 or above 126 AND 61 must be * encoded. */ $ascii = ord($char); if ((($ascii === 32) && ($i + 1 != $length) && (($text[$i + 1] == "\n") || ($text[$i + 1] == "\r"))) || (($ascii < 32) || ($ascii > 126) || ($ascii === 61))) { $char_len = 3; $char = '=' . Horde_String::upper(sprintf('%02s', dechex($ascii))); } else { $char_len = 1; } /* Lines must be $wrap characters or less. */ $curr_length += $char_len; if ($curr_length > $wrap) { $output .= '=' . $eol; $curr_length = $char_len; } $output .= $char; } return $output; } /** * Decodes a MIME encoded (RFC 2047) string. * * @param string $string The MIME encoded text. * * @return string The decoded text. */ static public function decode($string) { /* Take out any spaces between multiple encoded words. */ $string = preg_replace('|\?=\s+=\?|', '?==?', $string); $out = ''; $old_pos = 0; while (($pos = strpos($string, '=?', $old_pos)) !== false) { /* Save any preceding text. */ $out .= substr($string, $old_pos, $pos - $old_pos); /* Search for first delimiting question mark (charset). */ if (($d1 = strpos($string, '?', $pos + 2)) === false) { break; } $orig_charset = substr($string, $pos + 2, $d1 - $pos - 2); if (self::$decodeWindows1252 && (Horde_String::lower($orig_charset) == 'iso-8859-1')) { $orig_charset = 'windows-1252'; } /* Search for second delimiting question mark (encoding). */ if (($d2 = strpos($string, '?', $d1 + 1)) === false) { break; } $encoding = substr($string, $d1 + 1, $d2 - $d1 - 1); /* Search for end of encoded data. */ if (($end = strpos($string, '?=', $d2 + 1)) === false) { break; } $encoded_text = substr($string, $d2 + 1, $end - $d2 - 1); switch ($encoding) { case 'Q': case 'q': $out .= Horde_String::convertCharset( preg_replace_callback( '/=([0-9a-f]{2})/i', function($ord) { return chr(hexdec($ord[1])); }, str_replace('_', ' ', $encoded_text)), $orig_charset, 'UTF-8' ); break; case 'B': case 'b': $out .= Horde_String::convertCharset( base64_decode($encoded_text), $orig_charset, 'UTF-8' ); break; default: // Ignore unknown encoding. break; } $old_pos = $end + 2; } return $out . substr($string, $old_pos); } /** * Encodes a MIME parameter string pursuant to RFC 2183 & 2231 * (Content-Type and Content-Disposition headers). * * @param string $name The parameter name. * @param string $val The parameter value (UTF-8). * @param array $opts Additional options: * - charset: (string) The charset to encode to. * DEFAULT: UTF-8 * - lang: (string) The language to use when encoding. * DEFAULT: None specified * * @return array The encoded parameter string (US-ASCII). */ static public function encodeParam($name, $val, array $opts = array()) { $curr = 0; $encode = $wrap = false; $output = array(); $charset = isset($opts['charset']) ? $opts['charset'] : 'UTF-8'; // 2 = '=', ';' $pre_len = strlen($name) + 2; /* Several possibilities: * - String is ASCII. Output as ASCII (duh). * - Language information has been provided. We MUST encode output * to include this information. * - String is non-ASCII, but can losslessly translate to ASCII. * Output as ASCII (most efficient). * - String is in non-ASCII, but doesn't losslessly translate to * ASCII. MUST encode output (duh). */ if (empty($opts['lang']) && !self::is8bit($val, 'UTF-8')) { $string = $val; } else { $cval = Horde_String::convertCharset($val, 'UTF-8', $charset); $string = Horde_String::lower($charset) . '\'' . (empty($opts['lang']) ? '' : Horde_String::lower($opts['lang'])) . '\'' . rawurlencode($cval); $encode = true; /* Account for trailing '*'. */ ++$pre_len; } if (($pre_len + strlen($string)) > 75) { /* Account for continuation '*'. */ ++$pre_len; $wrap = true; while ($string) { $chunk = 75 - $pre_len - strlen($curr); $pos = min($chunk, strlen($string) - 1); /* Don't split in the middle of an encoded char. */ if (($chunk == $pos) && ($pos > 2)) { for ($i = 0; $i <= 2; ++$i) { if ($string[$pos - $i] == '%') { $pos -= $i + 1; break; } } } $lines[] = substr($string, 0, $pos + 1); $string = substr($string, $pos + 1); ++$curr; } } else { $lines = array($string); } foreach ($lines as $i => $line) { $output[$name . (($wrap) ? ('*' . $i) : '') . (($encode) ? '*' : '')] = $line; } if (self::$brokenRFC2231 && !isset($output[$name])) { $output = array_merge(array( $name => self::encode($val, $charset) ), $output); } /* Escape certain characters in params (See RFC 2045 [Appendix A]). * Must be quoted-string if one of these exists. * Forbidden: SPACE, CTLs, ()<>@,;:\"/[]?= */ foreach ($output as $k => $v) { if (preg_match(self::MIME_PARAM_QUOTED, $v)) { $output[$k] = '"' . addcslashes($v, '\\"') . '"'; } } return $output; } /** * Decodes a MIME parameter string pursuant to RFC 2183 & 2231 * (Content-Type and Content-Disposition headers). * * @param string $type Either 'Content-Type' or 'Content-Disposition' * (case-insensitive). * @param mixed $data The text of the header or an array of param name * => param values. * * @return array An array with the following entries (all strings in * UTF-8): * - params: (array) The header's parameter values. * - val: (string) The header's "base" value. */ static public function decodeParam($type, $data) { $convert = array(); $ret = array('params' => array(), 'val' => ''); $splitRegex = '/([^;\'"]*[\'"]([^\'"]*([^\'"]*)*)[\'"][^;\'"]*|([^;]+))(;|$)/'; $type = Horde_String::lower($type); if (is_array($data)) { // Use dummy base values $ret['val'] = ($type == 'content-type') ? 'text/plain' : 'attachment'; $params = $data; } else { /* This code was adapted from PEAR's Mail_mimeDecode::. */ if (($pos = strpos($data, ';')) === false) { $ret['val'] = trim($data); return $ret; } $ret['val'] = trim(substr($data, 0, $pos)); $data = trim(substr($data, ++$pos)); $params = $tmp = array(); if (strlen($data) > 0) { /* This splits on a semi-colon, if there's no preceeding * backslash. */ preg_match_all($splitRegex, $data, $matches); for ($i = 0, $cnt = count($matches[0]); $i < $cnt; ++$i) { $param = $matches[0][$i]; while (substr($param, -2) == '\;') { $param .= $matches[0][++$i]; } $tmp[] = $param; } for ($i = 0, $cnt = count($tmp); $i < $cnt; ++$i) { $pos = strpos($tmp[$i], '='); $p_name = trim(substr($tmp[$i], 0, $pos), "'\";\t\\ "); $p_val = trim(str_replace('\;', ';', substr($tmp[$i], $pos + 1)), "'\";\t\\ "); if (strlen($p_val) && ($p_val[0] == '"')) { $p_val = substr($p_val, 1, -1); } $params[$p_name] = $p_val; } } /* End of code adapted from PEAR's Mail_mimeDecode::. */ } /* Sort the params list. Prevents us from having to manually keep * track of continuation values below. */ uksort($params, 'strnatcasecmp'); foreach ($params as $name => $val) { /* Asterisk at end indicates encoded value. */ if (substr($name, -1) == '*') { $name = substr($name, 0, -1); $encode = true; } else { $encode = false; } /* This asterisk indicates continuation parameter. */ if (($pos = strrpos($name, '*')) !== false) { $name = substr($name, 0, $pos); } if (!isset($ret['params'][$name]) || ($encode && !isset($convert[$name]))) { $ret['params'][$name] = ''; } $ret['params'][$name] .= $val; if ($encode) { $convert[$name] = true; } } foreach (array_keys($convert) as $name) { $val = $ret['params'][$name]; $quote = strpos($val, "'"); $orig_charset = substr($val, 0, $quote); if (self::$decodeWindows1252 && (Horde_String::lower($orig_charset) == 'iso-8859-1')) { $orig_charset = 'windows-1252'; } /* Ignore language. */ $quote = strpos($val, "'", $quote + 1); substr($val, $quote + 1); $ret['params'][$name] = Horde_String::convertCharset(urldecode(substr($val, $quote + 1)), $orig_charset, 'UTF-8'); } /* MIME parameters are supposed to be encoded via RFC 2231, but many * mailers do RFC 2045 encoding instead. However, if we see at least * one RFC 2231 encoding, then assume the sending mailer knew what * it was doing. */ if (empty($convert)) { foreach (array_diff(array_keys($ret['params']), array_keys($convert)) as $name) { $ret['params'][$name] = self::decode($ret['params'][$name]); } } return $ret; } /** * Generates a Message-ID string conforming to RFC 2822 [3.6.4] and the * standards outlined in 'draft-ietf-usefor-message-id-01.txt'. * * @param string A message ID string. */ static public function generateMessageId() { return '<' . strval(new Horde_Support_Guid(array('prefix' => 'Horde'))) . '>'; } /** * Performs MIME ID "arithmetic" on a given ID. * * @param string $id The MIME ID string. * @param string $action One of the following: * - down: ID of child. Note: down will first traverse to "$id.0" if * given an ID *NOT* of the form "$id.0". If given an ID of the * form "$id.0", down will traverse to "$id.1". This behavior * can be avoided if 'norfc822' option is set. * - next: ID of next sibling. * - prev: ID of previous sibling. * - up: ID of parent. Note: up will first traverse to "$id.0" if * given an ID *NOT* of the form "$id.0". If given an ID of the * form "$id.0", down will traverse to "$id". This behavior can be * avoided if 'norfc822' option is set. * @param array $options Additional options: * - count: (integer) How many levels to traverse. * DEFAULT: 1 * - norfc822: (boolean) Don't traverse rfc822 sub-levels * DEFAULT: false * * @return mixed The resulting ID string, or null if that ID can not * exist. */ static public function mimeIdArithmetic($id, $action, $options = array()) { $pos = strrpos($id, '.'); $end = ($pos === false) ? $id : substr($id, $pos + 1); switch ($action) { case 'down': if ($end == '0') { $id = ($pos === false) ? 1 : substr_replace($id, '1', $pos + 1); } else { $id .= empty($options['norfc822']) ? '.0' : '.1'; } break; case 'next': ++$end; $id = ($pos === false) ? $end : substr_replace($id, $end, $pos + 1); break; case 'prev': if (($end == '0') || (empty($options['norfc822']) && ($end == '1'))) { $id = null; } elseif ($pos === false) { $id = --$end; } else { $id = substr_replace($id, --$end, $pos + 1); } break; case 'up': if ($pos === false) { $id = ($end == '0') ? null : '0'; } elseif (!empty($options['norfc822']) || ($end == '0')) { $id = substr($id, 0, $pos); } else { $id = substr_replace($id, '0', $pos + 1); } break; } return (!is_null($id) && !empty($options['count']) && --$options['count']) ? self::mimeIdArithmetic($id, $action, $options) : $id; } /** * Determines if a given MIME ID lives underneath a base ID. * * @param string $base The base MIME ID. * @param string $id The MIME ID to query. * * @return boolean Whether $id lives underneath $base. */ static public function isChild($base, $id) { $base = (substr($base, -2) == '.0') ? substr($base, 0, -1) : rtrim($base, '.') . '.'; return ((($base == 0) && ($id != 0)) || (strpos(strval($id), strval($base)) === 0)); } /** * Scans $input for uuencoded data and converts it to unencoded data. * * @param string $input The input data * * @return array A list of arrays, with each array corresponding to * a file in the input and containing the following keys: * - data: (string) Unencoded data. * - name: (string) Filename. * - perms: (string) Octal permissions. */ static public function uudecode($input) { $data = array(); /* Find all uuencoded sections. */ if (preg_match_all("/begin ([0-7]{3}) (.+)\r?\n(.+)\r?\nend/Us", $input, $matches, PREG_SET_ORDER)) { reset($matches); while (list(,$v) = each($matches)) { $data[] = array( 'data' => self::_uudecode($v[3]), 'name' => $v[2], 'perm' => $v[1] ); } } return $data; } /** * PHP 5's built-in convert_uudecode() is broken. Need this wrapper. * * @param string $input UUencoded input. * * @return string Decoded string. */ static protected function _uudecode($input) { $decoded = ''; foreach (explode("\n", $input) as $line) { $c = count($bytes = unpack('c*', substr(trim($line,"\r\n\t"), 1))); while ($c % 4) { $bytes[++$c] = 0; } foreach (array_chunk($bytes, 4) as $b) { $b0 = ($b[0] == 0x60) ? 0 : $b[0] - 0x20; $b1 = ($b[1] == 0x60) ? 0 : $b[1] - 0x20; $b2 = ($b[2] == 0x60) ? 0 : $b[2] - 0x20; $b3 = ($b[3] == 0x60) ? 0 : $b[3] - 0x20; $b0 <<= 2; $b0 |= ($b1 >> 4) & 0x03; $b1 <<= 4; $b1 |= ($b2 >> 2) & 0x0F; $b2 <<= 6; $b2 |= $b3 & 0x3F; $decoded .= pack('c*', $b0, $b1, $b2); } } return rtrim($decoded, "\0"); } }