Mailinglist
 All Data Structures Files Functions Variables Pages
MailinglistMessage.class.php
Go to the documentation of this file.
1 <?php
26  function mailinglist_message_rfc2047($string) {
27 
28  if (is_array($string)) {
29  $result = array();
30  foreach ($string as $idx => $value) {
31  $result[$idx] = mailinglist_message_rfc2047($value);
32  }
33  }
34  else {
35  // RFC 2047 Processing
36  // Look for tokens like: =?charset?encoding?text?=
37  //
38  $result = "";
39  while ($string) {
40  $pos = strpos($string, '=?');
41  if ($pos === FALSE) {
42  $result .= $string;
43  $string = "";
44  break;
45  }
46  if ($pos > 0) {
47  $result .= substr($string, 0, $pos);
48  $string = substr($string, $pos);
49  }
50  $pos0 = strpos($string, '?=');
51  if ($pos0 === FALSE) { // no end, so no more encoded-words
52  $result .= $string;
53  $string = "";
54  break;
55  }
56  $pos1 = strpos($string, ' ');
57  if ($pos1 !== FALSE AND $pos1 < $pos0) {
58  $result .= substr($string, 0, 2);
59  $string = substr($string, 2);
60  continue;
61  }
62  $pos1 = strpos($string, "\t");
63  if ($pos1 !== FALSE AND $pos1 < $pos0) {
64  $result .= substr($string, 0, 2);
65  $string = substr($string, 2);
66  continue;
67  }
68  $pos1 = strpos($string, '?', 2);
69  if ($pos1 >= $pos0) {
70  // not enough ? in between
71  $result .= substr($string, 0, 2);
72  $string = substr($string, 2);
73  continue;
74  }
75  $charset = substr($string, 2, $pos1-2);
76  $pos2 = strpos($string, '?', $pos1+1);
77  if ($pos2 >= $pos0) {
78  // not enough ? in between
79  $result .= substr($string, 0, 2);
80  $string = substr($string, 2);
81  continue;
82  }
83  $encoding = substr($string, $pos1+1, $pos2 - $pos1 - 1);
84  $text = substr($string, $pos2+1, $pos0 - $pos2 -1);
85  $string = substr($string, $pos0+2);
86  switch (strtolower($encoding)) {
87  case 'b':
88  $text = base64_decode($text);
89  break;
90  case 'q':
91  $text = quoted_printable_decode($text);
92  break;
93  default:
94  // We don't know what to do, so just pass it on.
95  $text = '=?' . $charset . '?' . $encoding . '?=';
96  break;
97  }
98  $text = iconv($charset, 'UTF-8', $text);
99  $result .= $text;
100  }
101  }
102  return $result;
103  }
104 
105 
114  private $header;
115 
121  function __construct($str) {
122  $this->header = $str;
123  }
124 
130  function get_atom() {
131  static $atext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!#$%&'*+-/=?^_`{}|~";
132  $this->skip_ws();
133  $str = $this->get_chunk($atext);
134  $str = mailinglist_message_rfc2047($str);
135  return $str;
136  }
137 
143  function get_dot_atom() {
144  $atom = $this->get_atom();
145  while ($this->header[0] == '.') {
146  $atom += $this->get_char();
147  $atom += $this->get_atom();
148  }
149  return $atom;
150  }
151 
156  function get_token() {
157  static $ttext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!#$%&'*+-^_`{}|~.";
158  $this->skip_ws();
159  return $this->get_chunk($ttext);
160  }
166  function get_value() {
167  $this->skip_ws();
168  if ($this->header[0] == '"') {
169  $token = "";
170  $this->get_char();
171  while (($char = $this->get_char()) != '"' && $char !== FALSE) {
172  if ($char == '\\') {
173  $char = $this->get_char();
174  }
175  $token .= $char;
176  }
177  return $token;
178  }
179  else{
180  return $this->get_token();
181  }
182  }
183 
189  private function get_chunk($chars) {
190  $token = "";
191  while (!empty($this->header) && strpos($chars, $this->header[0]) !== FALSE) {
192  $token .= $this->get_char();
193  }
194  return $token;
195  }
201  function skip_ws() {
202  while (ctype_space($this->header[0])) {
203  $this->header = substr($this->header, 1);
204  }
205  }
206 
212  function skip_cws() {
213  while (ctype_space($char = $this->header[0]) || $char == '(') {
214  $this->get_char();
215  if ($char == '(') {
216  $this->skip_comment();
217  }
218  }
219  }
220 
228  function skip_comment() {
229  while (($char = $this->get_char()) != ')') {
230  if ($char == '\\') {
231  $this->get_char(); // Quoted char, so ignore the next
232  }
233  elseif ($char == '(') {
234  $this->skip_comment(); // Nested comment
235  }
236  }
237  }
238 
244  function get_char() {
245  if (!empty($this->header)) {
246  $char = $this->header[0];
247  $this->header = substr($this->header, 1);
248  return $char;
249  }
250  else {
251  return FALSE;
252  }
253  }
259  function push_back($char) {
260  $this->header = $char . $this->header;
261  }
262 }
263 
273  public $headers;
274 
280  public $body;
281 
290  function headerRaw($name) {
291  if (isset($this->headers[$name])) {
292  return $this->headers[$name];
293  }
294  else return FALSE;
295  }
296 
307  function header($name) {
308  $header = $this->headerRaw($name);
309  $header = mailinglist_message_rfc2047($header);
310 
311  return $header;
312  }
313 
318  function headerEmail($name) {
319  $name = $this->headerRaw($name);
320  if (is_array($name)) {
321  $result = array();
322  foreach ($name as $key => $data) {
323  $result[$key] = parseEmailAddr($data);
324  }
325  }
326  else {
327  $result = $this->parseEmailAddr($name);
328  }
329 
330  return $result;
331  }
332 
336  function headerNoComments($name) {
337  $header = $this->header($name);
338  if (is_array($header)) {
339  foreach($header as $key => $data) {
340  $result[$key] = $this->stripComments($data);
341  }
342  }
343  else {
344  $result = $this->stripComments($header);
345  }
346  return $result;
347  }
348 
352  function addHeader($header_name, $header_value) {
353  if (is_array($header_value)) {
354  // if given an array, add each element
355  foreach ($header_value as $value) {
356  $this->addHeader($header_name, $value);
357  }
358  }
359  else {
360  $header_value = trim($header_value);
361  // duplicate headers become arrays
362  if (isset($this->headers[$header_name])) {
363  if (!is_array($this->headers[$header_name])) {
364  $this->headers[$header_name] = array($this->headers[$header_name]);
365  }
366  $this->headers[$header_name][] = $header_value;
367  }
368  else {
369  $this->headers[$header_name] = $header_value;
370  }
371  }
372  }
373 
377  function setHeader($header_name, $header_value) {
378  $this->headers[$header_name] = $header_value;
379  }
380 
384  function body() {
385  return $this->body;
386  }
387 
391  function headers() {
392  return $this->headers;
393  }
394 
402  function __construct($msg = NULL) {
403  if (is_string($msg)) {
404  $break = strpos($msg, "\n\n");
405  $this->headers = substr($msg, 0, $break+1);
406  $this->body = substr($msg, $break+2);
407 
408  $this->parseHeaders();
409  $this->parseBody();
410  }
411  }
412 
420  function parseHeaders() {
421  // Fix any character problems. If real email message, should only be ascii,
422  // but some mbox files will have been converted, see if UTF or ISO-8859
424 
425  $charset = "";
426  @$headers = iconv('UTF-8//IGNORE', 'UTF-8', $this->headers);
427  if ($headers != $this->headers) {
428  $headers = iconv('ISO-8859-1//IGNORE', 'UTF-8', $this->headers);
429  $charset = 'ISO-8859-1';
430  }
431 
432  // Unfold headers newline-whitespace => space
433  $headers = preg_replace('/\n\s+/', ' ', $headers);
434  // Convert to an array of headers
435  $headers = explode("\n", $headers);
436  // Build up the associative array of the headers
437  $this->headers = array();
438  if(!empty($charset)){
439  $this->addHeader('XX-HeaderCharset', $charset);
440  }
441  foreach ($headers as $header) {
442  $colon = strpos($header, ': ');
443  if ($colon > 0) {
444  $header_name = substr($header, 0, $colon);
445  $header_value = substr($header, $colon+2);
446 
447  switch ($header_name) {
448  case 'References':
449  $header_value = str_replace("\t", " ", $header_value);
450  $header_value = str_replace('><', '> <', $header_value);
451  $header_value = explode(' ', $header_value);
452  break;
453  case 'To':
454  case 'Cc':
455  case 'From':
456  case 'Reply-To':
457  $pos = 0;
458  // See if there is a comma to process, start at beginning
459  while( FALSE !== ($posc = strpos($header_value, ',', $pos))){
460  $pos1 = strpos($header_value, '(', $pos);
461  $pos2 = strpos($header_value, '<', $pos);
462  $pos3 = strpos($header_value, '\\', $pos);
463  $pos4 = strpos($header_value, '"', $pos);
464  if($pos1 === FALSE) $pos1 = $posc;
465  if($pos2 === FALSE) $pos2 = $posc;
466  if($pos3 === FALSE) $pos3 = $posc;
467  if($pos4 === FALSE) $pos4 = $posc;
468  $pos1 = min($pos1, $pos2, $pos3, $pos4);
469  if($pos1 < $posc) {
470  switch($header_value[$pos1]) {
471  case '(':
472  $pos = strpos($header_value, ')', $pos1);
473  if(!$pos) $pos = $pos1+1;
474  break;
475  case '<':
476  $pos = strpos($header_value, '>', $pos1);
477  if(!$pos) $pos = $pos1+1;
478  break;
479  case '\\':
480  $pos = $pos1+2;
481  break;
482  case '"':
483  $pos = strpos($header_value, '"', $pos1+1);
484  if(!$pos) $pos = $pos1+1;
485  break;
486  default:
487  $this->logError('Error Processing Header: ' . $header_value);
488  break;
489  }
490  }
491  else {
492  // All quotes are after the comma, so process it.
493  $value = substr($header_value, 0, $posc);
494  $this->addHeader($header_name, $value);
495  // Strip out value, and process after the ,
496  $header_value = substr($header_value, $posc+1);
497  $pos = 0;
498  }
499  }
500  default:
501  break;
502  }
503  $this->addHeader($header_name, $header_value);
504  }
505  }
506  }
507 
525  function parseBody() {
526 
527  if (isset($this->headers['Content-Transfer-Encoding'])) {
528  $header = new MailinglistHeader($this->headers['Content-Transfer-Encoding']);
529  $cte = $header->get_atom();
530 
531  switch (strtolower($cte)) {
532  case 'quoted-printable':
533  $this->body = quoted_printable_decode($this->body);
534  break;
535  case 'base64':
536  $this->body = base64_decode($this->body);
537  break;
538  default:
539  $this->logError('Unknown CTE "@cte"', array('@cte' => $cte));
540  case 'binary':
541  case '8bit':
542  case '7bit':
543  // These encodings use a null transform, so we can ignore them
544  break;
545  }
546  }
547  else {
548  $cte = '7bit';
549  }
550 
551  if (isset($this->headers['Content-Type'])) {
552  $header = new MailinglistHeader($this->headers['Content-Type']);
553  $ct = $header->get_atom();
554  $pos = strpos($ct, '/');
555  if (!$pos) {
556  $this->logError('Bad Content-Type Format', $ct);
557  $pos = 0;
558  }
559  $type = substr($ct, 0, $pos);
560  $subtype = substr($ct, $pos+1);
561  $header->get_char(); // Remove the terminator (;) on the atom.
562  $parms = array();
563  // Get parameters to Content-Type
564  while ($parm = $header->get_token()) {
565  $char = $header->get_char(); // eat the =
566  $value = $header->get_value();
567  $parms[$parm] = $value;
568  }
569  switch (strtolower($type)) {
570  case 'multipart':
571  $bodies = explode("--" . $parms['boundary'], $this->body);
572  $this->body = array();
573  foreach ($bodies as $body) {
574  if (!empty($body) && $body[0] == "\n" && strlen($body) > 2) {
575  $this->body[] = new MailinglistMessagePart(substr($body, 1));
576  }
577  }
578  break;
579  case 'message':
580  if ($subtype == 'rfc822') {
581  $this->body = new MailinglistMessage($this->body);
582  }
583  break;
584 
585  case 'text':
587  if (isset($parms['charset'])) {
588  $charset = strtoupper($parms['charset']);
589  if ($charset == 'US-ASCII') $charset = 'ASCII';
590  $encodings = mb_list_encodings();
591  $this->body = mb_convert_encoding($this->body, 'UTF-8', $charset);
592  }
593  break;
594 
595  default:
596  $this->logError('Unkown Content Type ' . $type . ' / ' . $subtype);
597  dpm(array($type, $subtype));
598  }
599  }
600  }
601 
609  function parseEmailAddr($addr) {
611  $pos = strpos($addr, '<');
612  if ($pos !== FALSE) {
613  // Address uses name <email> format.
614  $pos1 = strpos($addr, '>', $pos);
615  $email = substr($addr, $pos+1, $pos1-$pos-2);
616  if ($pos > 0) {
617  $name = substr($addr, 0, $pos) . substr($addr, $pos1+1);
618  }
619  else {
620  $name = substr($addr, $pos1+1);
621  }
622  }
623  else {
624  $pos = strpos($addr, '(');
625  if ($pos !== FALSE) {
626  $pos1 = strpos($addr, ')');
627  $name = substr($addr, $pos+1, $pos1-$pos-1);
628  if ($pos > 0) {
629  $email = substr($addr, 0, $pos) . substr($addr, $pos1+1);
630  }
631  else {
632  $email = substr($addr, $pos1+1);
633  }
634  }
635  else {
636  $email = $addr;
637  $name = "";
638  }
639  }
640 
641  return array('email' => trim($email), 'name' => trim($name));
642  }
643 
644  function stripComments($header) {
645  while(FALSE !== ($pos = strpos($header, '('))) {
646  $pos1 = strpos($header, ')', $pos);
647  if($pos1 != FALSE) {
648  $header = substr($header, 0, $pos) . ' ' . substr($header, $pos1+1);
649  }
650  else {
651  // badly formated, strip rest of header
652  $header = substr($header, 0, $pos);
653  }
654  }
655  return $header;
656  }
657 
663  function logError($str, $parm = array()) {
664  dpm($str, NULL, 'error');
665  watchdog('MailinglistMessage', $str, $parms, WATCHDOG_ERROR);
666  }
667 }
668 
679  public $raw_message;
688  function __construct($msg="") {
689  if (is_string($msg)) {
690  // Fix line endings, just in case
691  $msg = str_replace(array("\n\r", "\r\n", "\r"), "\n", $msg);
692  // Build up our best conversion of raw message. Problem is it may have a mixed encoding
693  $rawmsg = @iconv('UTF-8//IGNORE', 'UTF-8', $msg);
694  $charset="";
695  if ($rawmsg != $msg) {
696  $rawmsg = iconv('ISO-8859-1//IGNORE', 'UTF-8', $msg);
697  $charset = 'ISO-8859-1';
698  }
699  $this->raw_message = $rawmsg;
700  parent::__construct($msg);
701  if(!empty($charset)){
702  $this->setHeader('XX-RawCharset', $charset);
703  }
704  }
705  }
706 
712  function raw() {
713  return $this->raw_message;
714  }
715 }
716 
717 
Interface MailinglistMessageInterface.
parseHeaders()
parseHeaders() Converts the headers member from a single text string with the contents of the part (o...
__construct($str)
__construct()
Class defining the contents of an e-mail message.
push_back($char)
push_back()
addHeader($header_name, $header_value)
Add a given header to the message, if header already exists, add as an array.
$headers
An array of the headers of the message part Header name is the index to the array.
$body
The body of the message, if it is a multi-part then it is an array of the various multi-parts...
body()
Return the body of the message.
setHeader($header_name, $header_value)
Set a header to a given value.
skip_cws()
Skip Comments or whitespace.
__construct($msg="")
__construct()
headerEmail($name)
Return header parsed as an email address into an array with indexes email and name.
$header
The header being processed.
parseBody()
parseBody()) converts text string in the body variable into a processed body For MIME type multipart...
class MailinglistHeader
mailinglist_message_rfc2047($string)
mailinglist_message_rfc2047()
headers()
Return array of all headers.
skip_ws()
Skip White Space.
parseEmailAddr($addr)
Parse an Email address.
raw()
Get contents of raw message.
get_chunk($chars)
get_chunk()
__construct($msg=NULL)
Build an message part.
logError($str, $parm=array())
logError()
MailinglistMessagePart A class to represent a piece of a email message.