From 0f0e567cc06e822f75971af29680be3d8e0867fa Mon Sep 17 00:00:00 2001 From: Vincent Le Gallic Date: Sun, 29 Sep 2013 14:45:52 +0200 Subject: [PATCH] Scripts mail-related --- mails/filter_dirs.sh | 49 ++ mails/mailbox_to_maildir.pl | 1462 +++++++++++++++++++++++++++++++++++ 2 files changed, 1511 insertions(+) create mode 100755 mails/filter_dirs.sh create mode 100755 mails/mailbox_to_maildir.pl diff --git a/mails/filter_dirs.sh b/mails/filter_dirs.sh new file mode 100755 index 0000000..1aa7070 --- /dev/null +++ b/mails/filter_dirs.sh @@ -0,0 +1,49 @@ +#! /bin/bash +## Retrie des dossiers de mails +## Librement adapté de la version de Harry + + +RCFILE="$HOME/.procmail/rc/main" + +#cd $HOME/Mail + +for mydir in $@ +do +d=$(basename "$mydir") +if [[ "$mydir" != "./.Sent" ]] +then +echo "entering directory $d" + + +[ -d "$d/cur" ] || exit 1 + +for f in $(find "$d/cur") +do +# echo "file $f" + if [[ -f $f ]] && [[ $(cat $f | formail -X date | wc -l) != "0" ]]; then +# echo "filtering mail" + formail -s procmail $RCFILE < "$f" #&& +# echo "removing mail" && +# rm "$f" +# else +# echo "is not a mail" + fi +done + +#[ -d "$d/new" ] || exit 1 + +#for f in $(find "$d/new") +#do +# echo "file $f" +# if [[ -f $f ]] && [[ $(cat $f | formail -X date | wc -l) != "0" ]]; then +# echo "marking as read" +# mv "$f" "$d/cur/$(basename \"$f\":2,Sa)" +# else +# echo "is not a mail" +# fi +#done +# +#[ "$(ls -A $d/new)" ] || [ "$(ls -A $d/cur)" ] || [ "$(ls -A $d/tmp)" ] || mv "$d" "bkp$d" +# +fi +done diff --git a/mails/mailbox_to_maildir.pl b/mails/mailbox_to_maildir.pl new file mode 100755 index 0000000..7b30a21 --- /dev/null +++ b/mails/mailbox_to_maildir.pl @@ -0,0 +1,1462 @@ +#!/usr/bin/perl -w +# + +# +# Trouvé sur le web pour transformer un fichier mailbox en +# un dossier au format maildir +# + +# $Id: mb2md.pl,v 1.26 2004/03/28 00:09:46 juri Exp $ +# +# mb2md-3.20.pl Converts Mbox mailboxes to Maildir format. +# +# Public domain. +# +# currently maintained by: +# Juri Haberland +# initially wrote by: +# Robin Whittle +# +# This script's web abode is http://batleth.sapienti-sat.org/projects/mb2md/ . +# For a changelog see http://batleth.sapienti-sat.org/projects/mb2md/changelog.txt +# +# The Mbox -> Maildir inner loop is based on qmail's script mbox2maildir, which +# was kludged by Ivan Kohler in 1997 from convertandcreate (public domain) +# by Russel Nelson. Both these convert a single mailspool file. +# +# The qmail distribution has a maildir2mbox.c program. +# +# What is does: +# ============= +# +# Reads a directory full of Mbox format mailboxes and creates a set of +# Maildir format mailboxes. Some details of this are to suit Courier +# IMAP's naming conventions for Maildir mailboxes. +# +# http://www.inter7.com/courierimap/ +# +# This is intended to automate the conversion of the old +# /var/spool/mail/blah file - with one call of this script - and to +# convert one or more mailboxes in a specifed directory with separate +# calls with other command line arguments. +# +# Run this as the user - in these examples "blah". + +# This version supports conversion of: +# +# Date The date-time in the "From " line of the message in the +# Mbox format is the date when the message was *received*. +# This is transformed into the date-time of the file which +# contains the message in the Maildir mailbox. +# +# This relies on the Date::Parse perl module and the utime +# perl function. +# +# The script tries to cope with errant forms of the +# Mbox "From " line which it may encounter, but if +# there is something really screwy in a From line, +# then perhaps the script will fail when "touch" +# is given an invalid date. Please report the +# exact nature of any such "From " line! +# +# +# Flagged +# Replied +# Read = Seen +# Tagged for Deletion +# +# In the Mbox message, flags for these are found in the +# "Status: N" or "X-Status: N" headers, where "N" is 0 +# or more of the following characters in the left column. +# +# They are converted to characters in the right column, +# which become the last characters of the file name, +# following the ":2," which indicates IMAP message status. +# +# +# F -> F Flagged +# A -> R Replied +# R -> S Read = Seen +# D -> T Tagged for Deletion (Trash) +# +# This is based on the work of Philip Mak who wrote a +# completely separate Mbox -> Maildir converter called +# perfect_maildir and posted it to the Mutt-users mailing +# list on 25 December 2001: +# +# http://www.mail-archive.com/mutt-users@mutt.org/msg21872.html +# +# Michael Best originally integrated those changes into mb2md. +# +# +# In addition, the names of the message files in the Maildir are of a +# regular length and are of the form: +# +# 7654321.000123.mbox:2,xxx +# +# Where "7654321" is the Unix time in seconds when the script was +# run and "000123" is the six zeroes padded message number as +# messages are converted from the Mbox file. "xxx" represents zero or +# more of the above flags F, R, S or T. +# +# +# --------------------------------------------------------------------- +# +# +# USAGE +# ===== +# +# Run this as the user of the mailboxes, not as root. +# +# +# mb2md -h +# mb2md [-c] -m [-d destdir] +# mb2md [-c] -s sourcefile [-d destdir] +# mb2md [-c] -s sourcedir [-l wu-mailboxlist] [-R|-f somefolder] [-d destdir] [-r strip_extension] +# +# -c use the Content-Length: headers (if present) to find the +# beginning of the next message +# Use with caution! Results may be unreliable. I recommend to do +# a run without "-c" first and only use it if you are certain, +# that the mbox in question really needs the "-c" option +# +# -m If this is used then the source will +# be the single mailbox at /var/spool/mail/blah for +# user blah and the destination mailbox will be the +# "destdir" mailbox itself. +# +# +# -s source Directory or file relative to the user's home directory, +# which is where the the "somefolders" directories are located. +# Or if starting with a "/" it is taken as a +# absolute path, e.g. /mnt/oldmail/user +# +# or +# +# A single mbox file which will be converted to +# the destdir. +# +# -R If defined, do not skip directories found in a mailbox +# directory, but runs recursively into each of them, +# creating all wanted folders in Maildir. +# Incompatible with '-f' +# +# -f somefolder Directories, relative to "sourcedir" where the Mbox files +# are. All mailboxes in the "sourcedir" +# directory will be converted and placed in the +# "destdir" directory. (Typically the Inbox directory +# which in this instance is also functioning as a +# folder for other mailboxes.) +# +# The "somefolder" directory +# name will be encoded into the new mailboxes' names. +# See the examples below. +# +# This does not save an UW IMAP dummy message file +# at the start of the Mbox file. Small changes +# in the code could adapt it for looking for +# other distinctive patterns of dummy messages too. +# +# Don't let the source directory you give as "somefolders" +# contain any "."s in its name, unless you want to +# create subfolders from the IMAP user's point of +# view. See the example below. +# +# Incompatible with '-f' +# +# +# -d destdir Directory where the Maildir format directories will be created. +# If not given, then the destination will be ~/Maildir . +# Typically, this is what the IMAP server sees as the +# Inbox and the folder for all user mailboxes. +# If this begins with a '/' the path is considered to be +# absolute, otherwise it is relative to the users +# home directory. +# +# -r strip_ext If defined this extension will be stripped from +# the original mailbox file name before creating +# the corresponding maildir. The extension must be +# given without the leading dot ("."). See the example below. +# +# -l WU-file File containing the list of subscribed folders. If +# migrating from WU-IMAP the list of subscribed folders will +# be found in the file called .mailboxlist in the users +# home directory. This will convert all subscribed folders +# for a single user: +# /bin/mb2md -s mail -l .mailboxlist -R -d Maildir +# and for all users in a directory as root you can do the +# following: +# for i in *; do echo $i;su - $i -c "/bin/mb2md -s mail -l .mailboxlist -R -d Maildir";done +# +# +# Example +# ======= +# +# We have a bunch of directories of Mbox mailboxes located at +# /home/blah/oldmail/ +# +# /home/blah/oldmail/fffff +# /home/blah/oldmail/ggggg +# /home/blah/oldmail/xxx/aaaa +# /home/blah/oldmail/xxx/bbbb +# /home/blah/oldmail/xxx/cccc +# /home/blah/oldmail/xxx/dddd +# /home/blah/oldmail/yyyy/huey +# /home/blah/oldmail/yyyy/duey +# /home/blah/oldmail/yyyy/louie +# +# With the UW IMAP server, fffff and ggggg would have appeared in the root +# of this mail server, along with the Inbox. aaaa, bbbb etc, would have +# appeared in a folder called xxx from that root, and xxx was just a folder +# not a mailbox for storing messages. +# +# We also have the mailspool Inbox at: +# +# /var/spool/mail/blah +# +# +# To convert these, as user blah, we give the first command: +# +# mb2md -m +# +# The main Maildir directory will be created if it does not exist. +# (This is true of any argument options, not just "-m".) +# +# /home/blah/Maildir/ +# +# It has the following subdirectories: +# +# /home/blah/Maildir/tmp/ +# /home/blah/Maildir/new/ +# /home/blah/Maildir/cur/ +# +# Then /var/spool/blah file is read, split into individual files and +# written into /home/blah/Maildir/cur/ . +# +# Now we give the second command: +# +# mb2md -s oldmail -R +# +# This reads recursively all Mbox mailboxes and creates: +# +# /home/blah/Maildir/.fffff/ +# /home/blah/Maildir/.ggggg/ +# /home/blah/Maildir/.xxx/ +# /home/blah/Maildir/.xxx.aaaa/ +# /home/blah/Maildir/.xxx.bbbb/ +# /home/blah/Maildir/.xxx.cccc/ +# /home/blah/Maildir/.xxx.aaaa/ +# /home/blah/Maildir/.yyyy/ +# /home/blah/Maildir/.yyyy.huey/ +# /home/blah/Maildir/.yyyy.duey/ +# /home/blah/Maildir/.yyyy.louie/ +# +# The result, from the IMAP client's point of view is: +# +# Inbox ----------------- +# | +# | fffff ----------- +# | ggggg ----------- +# | +# - xxx ------------- +# | | aaaa -------- +# | | bbbb -------- +# | | cccc -------- +# | | dddd -------- +# | +# - yyyy ------------ +# | huey ------- +# | duey ------- +# | louie ------ +# +# Note that although ~/Maildir/.xxx/ and ~/Maildir/.yyyy may appear +# as folders to the IMAP client the above commands to not generate +# any Maildir folders of these names. These are simply elements +# of the names of other Maildir directories. (if you used '-R', they +# whill be able to act as normal folders, containing messages AND folders) +# +# With a separate run of this script, using just the "-s" option +# without "-f" nor "-R", it would be possible to create mailboxes which +# appear at the same location as far as the IMAP client is +# concerned. By having Mbox mailboxes in some directory: +# ~/oldmail/nnn/ of the form: +# +# /home/blah/oldmail/nn/xxxx +# /home/blah/oldmail/nn/yyyyy +# +# then the command: +# +# mb2md -s oldmail/nn +# +# will create two new Maildirs: +# +# /home/blah/Maildir/.xxx/ +# /home/blah/Maildir/.yyyy/ +# +# Then what used to be the xxx and yyyy folders now function as +# mailboxes too. Netscape 4.77 needed to be put to sleep and given ECT +# to recognise this - deleting the contents of (Win2k example): +# +# C:\Program Files\Netscape\Users\uu\ImapMail\aaa.bbb.ccc\ +# +# where "uu" is the user and "aaa.bbb.ccc" is the IMAP server +# +# I often find that deleting all this directory's contents, except +# "rules.dat", forces Netscape back to reality after its IMAP innards +# have become twisted. Then maybe use File > Subscribe - but this +# seems incapable of subscribing to folders. +# +# For Outlook Express, select the mail server, then click the +# "IMAP Folders" button and use "Reset list". In the "All" +# window, select the mailboxes you want to see in normal +# usage. +# +# +# This script did not recurse subdirectories or delete old mailboxes, before addition of the '-R' parameter :) +# +# Be sure not to be accessing the Mbox mailboxes while running this +# script. It does not attempt to lock them. Likewise, don't run two +# copies of this script either. +# +# +# Trickier usage . . . +# ==================== +# +# If you have a bunch of mailboxes in a directory ~/oldmail/doors/ +# and you want them to appear in folders such as: +# +# ~/Maildir/.music.bands.doors.Jim +# ~/Maildir/.music.bands.doors.John +# +# etc. so they appear in an IMAP folder: +# +# Inbox ----------------- +# | music +# | bands +# | doors +# | Jim +# | John +# | Robbie +# | Ray +# +# Then you could rename the source directory to: +# +# ~/oldmail/music.bands.doors/ +# +# then use: +# +# mb2md -s oldmail -f music.bands.doors +# +# +# Or simply use '-R' switch with: +# mb2md -s oldmail -R +# +# +# Stripping mailbox extensions: +# ============================= +# +# If you want to convert mailboxes that came for example from +# a Windows box than you might want to strip the extension of +# the mailbox name so that it won't create a subfolder in your +# mail clients view. +# +# Example: +# You have several mailboxes named Trash.mbx, Sent.mbx, Drafts.mbx +# If you don't strip the extension "mbx" you will get the following +# hierarchy: +# +# Inbox +# | +# - Trash +# | | mbx +# | +# - Sent +# | | mbx +# | +# - Drafts +# | mbx +# +# This is more than ugly! +# Just use: +# mb2md -s oldmail -r mbx +# +# Note: don't specify the dot! It will be stripped off +# automagically ;) +# +#------------------------------------------------------------------------------ + + +use strict; +use Getopt::Std; +use Date::Parse; +use IO::Handle; +use Fcntl; + + # print the usage message +sub usage() { + print "Usage:\n"; + print " mb2md -h\n"; + print " mb2md [-c] -m [-d destdir]\n"; + print " mb2md [-c] -s sourcefile [-d destdir]\n"; + die " mb2md [-c] -s sourcedir [-l wu-mailboxlist] [-R|-f somefolder] [-d destdir] [-r strip_extension]\n"; +} + # get options +my %opts; +getopts('d:f:chms:r:l:R', \%opts) || usage(); +usage() if ( defined($opts{h}) + || (!defined($opts{m}) && !defined($opts{s})) ); + +# Get uid, username and home dir +my ($name, $passwd, $uid, $gid, $quota, $comment, $gcos, $homedir, $shell) = getpwuid($<); + +# Get arguments and determine source +# and target directories. +my $mbroot = undef; # this is the base directory for the mboxes +my $mbdir = undef; # this is an mbox dir relative to the $mbroot +my $mbfile = undef; # this is an mbox file +my $dest = undef; +my $strip_ext = undef; +my $use_cl = undef; # defines whether we use the Content-Length: header if present + +# if option "-c" is given, we use the Content-Length: header if present +# dangerous! may be unreliable, as the whole CL stuff is a bad idea +if (defined($opts{c})) +{ + $use_cl = 1; +} else { + $use_cl = 0; +} + +# first, if the user has gone the -m option +# we simply convert their mailfile +if (defined($opts{m})) +{ + if (defined($ENV{'MAIL'})) { + $mbfile = $ENV{'MAIL'}; + } elsif ( -f "/var/spool/mail/$name" ) { + $mbfile = "/var/spool/mail/$name" + } elsif ( -f "/var/mail/$name" ) { + $mbfile = "/var/mail/$name" + } else { + die("I searched \$MAIL, /var/spool/mail/$name and /var/mail/$name, ". + "but I couldn't find your mail spool file - "); + } +} +# see if the user has specified a source directory +elsif (defined($opts{s})) +{ + # if opts{s} doesn't start with a "/" then + # it is a subdir of the users $home + # if it does start with a "/" then + # let's take $mbroot as a absolut path + $opts{s} = "$homedir/$opts{s}" if ($opts{s} !~ /^\//); + + # check if the given source is a mbox file + if (-f $opts{s}) + { + $mbfile = $opts{s}; + } + + # otherwise check if it is a directory + elsif (-d $opts{s}) + { + $mbroot = $opts{s}; + # get rid of trailing /'s + $mbroot =~ s/\/$//; + + # check if we have a specified sub directory, + # otherwise the sub directory is '.' + if (defined($opts{f})) + { + $mbdir = $opts{f}; + # get rid of trailing /'s + $mbdir =~ s/\/$//; + } + } + + # otherwise we have an error + else + { + die("Fatal: Source is not an mbox file or a directory!\n"); + } +} + + +# get the dest +defined($opts{d}) && ($dest = $opts{d}) || ($dest = "Maildir"); +# see if we have anything to strip +defined($opts{r}) && ($strip_ext = $opts{r}); +# No '-f' with '-R' +if((defined($opts{R}))&&(defined($opts{f}))) { die "No recursion with \"-f\"";} +# Get list of folders +my @flist; +if(defined($opts{l})) +{ + open (LIST,$opts{l}) or die "Could not open mailbox list $opts{l}: $!"; + @flist=; + close LIST; +} + +# if the destination is relative to the home dir, +# check that the home dir exists +die("Fatal: home dir $homedir doesn't exist.\n") if ($dest !~ /^\// && ! -e $homedir); + +# +# form the destination value +# slap the home dir on the front of the dest if the dest does not begin +# with a '/' +$dest = "$homedir/$dest" if ($dest !~ /^\//); +# get rid of trailing /'s +$dest =~ s/\/$//; + + +# Count the number of mailboxes, or +# at least files, we found. +my $mailboxcount = 0; + +# Since we'll be making sub directories of the main +# Maildir, we need to make sure that the main maildir +# exists +&maildirmake($dest); + +# Now we do different things depending on whether we convert one mbox +# file or a directory of mbox files +if (defined($mbfile)) +{ + if (!isamailboxfile($mbfile)) + { + print "Skipping $mbfile: not a mbox file\n"; + } + else + { + print "Converting $mbfile to maildir: $dest\n"; + # this is easy, we just run the convert function + &convert($mbfile, $dest); + } +} +# if '-f' was used ... +elsif (defined($mbdir)) +{ + print "Converting mboxdir/mbdir: $mbroot/$mbdir to maildir: $dest/\n"; + + # Now set our source directory + my $sourcedir = "$mbroot/$mbdir"; + + # check that the directory we are supposed to be finding mbox + # files in, exists and is a directory + -e $sourcedir or die("Fatal: MBDIR directory $sourcedir/ does not exist.\n"); + -d $sourcedir or die("Fatal: MBDIR $sourcedir is not a directory.\n"); + + + &convertit($mbdir,""); +} +# Else, let's work in $mbroot +else +{ + opendir(SDIR, $mbroot) + or die("Fatal: Cannot open source directory $mbroot/ \n"); + + + while (my $sourcefile = readdir(SDIR)) + { + if (-d "$mbroot/$sourcefile") { + # Recurse only if requested (to be changed ?) + if (defined($opts{R})) { + print "convertit($sourcefile,\"\")\n"; + &convertit($sourcefile,""); + } else { + print("$sourcefile is a directory, but '-R' was not used... skipping\n"); + } + } + elsif (!-f "$mbroot/$sourcefile") + { + print "Skipping $mbroot/$sourcefile : not a file nor a dir\n"; + next; + } + elsif (!isamailboxfile("$mbroot/$sourcefile")) + { + print "Skipping $mbroot/$sourcefile : not a mbox file\n"; + next; + } + else + { + &convertit($sourcefile,""); + } + } # end of "while ($sfile = readdir(SDIR))" loop. + closedir(SDIR); + printf("$mailboxcount files processed.\n"); +} +# + +exit 0; + +# My debbugging placeholder I can put somewhere to show how far the script ran. +# die("So far so good.\n\n"); + +# The isamailboxfile function +# ---------------------- +# +# Here we check if the file is a mailbox file, not an address-book or +# something else. +# If file is empty, we say it is a mbox, to create it empty. +# +# Returns 1 if file is said mbox, 0 else. +sub isamailboxfile { + my ($mbxfile) = @_; + return 1 if(-z $mbxfile); + sysopen(MBXFILE, "$mbxfile", O_RDONLY) or die "Could not open $mbxfile ! \n"; + while() { + if (/^From/) { + close(MBXFILE); + return 1; + } + else { + close(MBXFILE); + return 0; + } + } +} + +# The convertit function +# ----------------------- +# +# This function creates all subdirs in maildir, and calls convert() +# for each mbox file. +# Yes, it becomes the 'main loop' :) +sub convertit +{ + # Get subdir as argument + my ($dir,$oldpath) = @_; + + $oldpath =~ s/\/\///; + + # Skip files beginning with '.' since they are + # not normally mbox files nor dirs (includes '.' and '..') + if ($dir =~ /^\./) + { + print "Skipping $dir : name begins with a '.'\n"; + return; + } + my $destinationdir = $dir; + my $temppath = $oldpath; + + # We don't want to have .'s in the $targetfile file + # name because they will become directories in the + # Maildir. Therefore we convert them to _'s + $temppath =~ s/\./\_/g; + $destinationdir =~ s/\./\_/g; + + # Appending $oldpath => path is only missing $dest + $destinationdir = "$temppath.$destinationdir"; + + # Converting '/' to '.' in $destinationdir + $destinationdir =~s/\/+/\./g; + + # source dir + my $srcdir="$mbroot/$oldpath/$dir"; + + printf("convertit(): Converting $dir in $mbroot/$oldpath to $dest/$destinationdir\n"); + &maildirmake("$dest/$destinationdir"); + print("destination = $destinationdir\n"); + if (-d $srcdir) { + opendir(SUBDIR, "$srcdir") or die "can't open $srcdir !\n"; + my @subdirlist=readdir(SUBDIR); + closedir(SUBDIR); + foreach (@subdirlist) { + next if (/^\.+$/); + print("Sub: $_\n"); + print("convertit($_,\"$oldpath/$dir\")\n"); + &convertit($_,"$oldpath/$dir"); + } + } else { + # Source file verifs .... + # + return if(defined($opts{l}) && !inlist("$oldpath/$dir",@flist)); + + if (!isamailboxfile("$mbroot/$oldpath/$dir")) + { + print "Skipping $dir (is not mbox)\n"; + next; + } + + # target file verifs... + # + # if $strip_extension is defined, + # strip it off the $targetfile + defined($strip_ext) && ($destinationdir =~ s/\.$strip_ext$//); + &convert("$mbroot/$oldpath/$dir","$dest/$destinationdir"); + $mailboxcount++; + } +} +# The maildirmake function +# ------------------------ +# +# It does the same thing that the maildirmake binary that +# comes with courier-imap distribution +# +sub maildirmake +{ + foreach(@_) { + -d $_ or mkdir $_,0700 or die("Fatal: Directory $_ doesn't exist and can't be created.\n"); + + -d "$_/tmp" or mkdir("$_/tmp",0700) or die("Fatal: Unable to make $_/tmp/ subdirectory.\n"); + -d "$_/new" or mkdir("$_/new",0700) or die("Fatal: Unable to make $_/new/ subdirectory.\n"); + -d "$_/cur" or mkdir("$_/cur",0700) or die("Fatal: Unable to make $_/cur/ subdirectory.\n"); + } +} + +# The inlist function +# ------------------------ +# +# It checks that the folder to be converted is in the list of subscribed +# folders in WU-IMAP +# +sub inlist +{ + my ($file,@flist) = @_; + my $valid = 0; + # Get rid of the first / if any + $file =~ s/^\///; + foreach my $folder (@flist) { + chomp $folder; + if ($file eq $folder) { + $valid = 1; + last; + } + } + if (!$valid) { + print "$file is not in list\n"; + } + else { + print "$file is in list\n"; + } + + return $valid; +} + +# + +# The convert function +# --------------------- +# +# This function does the down and dirty work of +# actually converting the mbox to a maildir +# +sub convert +{ + # get the source and destination as arguments + my ($mbox, $maildir) = @_; + + printf("Source Mbox is $mbox\n"); + printf("Target Maildir is $maildir \n") ; + + # create the directories for the new maildir + # + # if it is the root maildir (ie. converting the inbox) + # these already exist but thats not a big issue + + &maildirmake($maildir); + + # Change to the target mailbox directory. + + chdir "$maildir" ; + + # Converts a Mbox to multiple files + # in a Maildir. + # This is adapted from mbox2maildir. + # + # Open the Mbox mailbox file. + + + if (sysopen(MBOX, "$mbox", O_RDONLY)) + { + #printf("Converting Mbox $mbox . . . \n"); + } + else + { + die("Fatal: unable to open input mailbox file: $mbox ! \n"); + } + + # This loop scans the input mailbox for + # a line starting with "From ". The + # "^" before it is pattern-matching + # lingo for it being at the start of a + # line. + # + # Each email in Mbox mailbox starts + # with such a line, which is why any + # such line in the body of the email + # has to have a ">" put in front of it. + # + # This is not required in a Maildir + # mailbox, and some majik below + # finds any such quoted "> From"s and + # gets rid of the "> " quote. + # + # Each email is put in a file + # in the cur/ subdirectory with a + # name of the form: + # + # nnnnnnnnn.cccc.mbox:2,XXXX + # + # where: + # "nnnnnnnnn" is the Unix time since + # 1970 when this script started + # running, incremented by 1 for + # every email. This is to ensure + # unique names for each message + # file. + # + # ".cccc" is the message count of + # messages from this mbox. + # + # ".mbox" is just to indicate that + # this message was converted from + # an Mbox mailbox. + # + # ":2," is the start of potentially + # multiple IMAP flag characters + # "XXXX", but may be followed by + # nothing. + # + # This is sort-of compliant with + # the Maildir naming conventions + # specified at: + # + # http://www.qmail.org/man/man5/maildir.html + # + # This approach does not involve the + # process ID or the hostname, but it is + # probably good enough. + # + # When the IMAP server looks at this + # mailbox, it will move the files to + # the cur/ directory and change their + # names as it pleases. In the case + # of Courier IMAP, the names will + # become like: + # + # 995096541.25351.mbox:2,S + # + # with 25351 being Courier IMAP's + # process ID. The :2, is the start + # of the flags, and the "S" means + # that this one has been seen by + # the user. (But is this the same + # meaning as the user actually + # having opened the message to see + # its contents, rather than just the + # IMAP server having been asked to + # list the message's Subject etc. + # so the client could list it in the + # visible Inbox?) + # + # This contrasts with a message + # created by Courier IMAP, say with + # a message copy, which is like: + # + # 995096541.25351.zair,S=14285:2,S + # + # where ",S=14285" is the size of the + # message in bytes. + # + # Courier Maildrop's names are similar + # but lack the ":2,XXXX" flags . . . + # except for my modified Maildrop + # which can deliver them with a + # ":2,T" - flagged for deletion. + # + # I have extended the logic of the + # per-message inner loop to stop + # saving a file for a message with: + # + # Subject: DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA + # + # This is the dummy message, always + # at the start of an Mbox format + # mailbox file - and is put there + # by UW IMAPD. Since quite a few + # people will use this for + # converting from a UW system, + # I figure it is worth it. + # + # I will not save any such message + # file for the dummy message. + # + # Plan + # ---- + # + # We want to read the entire Mbox file, whilst + # going through a loop for each message we find. + # + # We want to read all the headers of the message, + # starting with the "From " line. For that "From " + # line we want to get a date. + # + # For all other header lines, we want to store them + # in $headers whilst parsing them to find: + # + # 1 - Any flags in the "Status: " or "X-Status: " or + # "X-Mozilla-Status: " lines. + # + # 2 - A subject line indicating this is the dummy message + # at the start (typically, but not necessarily) of + # the Mbox. + # + # Once we reach the end of the headers, we will crunch any + # flags we found to create a file name. Then, unless this is + # the dummy message we create that file and write all the + # headers to it. + # + # Then we continue reading the Mbox, converting ">From " to + # "From " and writing it to the file, until we reach one of: + # + # 1 - Another "From " line (indicating the start of another + # message). + # + # or + # + # 2 - The end of the Mbox. + # + # In the former case, which we detect at the start of the loop + # we need to close the file and touch it to alter its date-time. + # + # In the later case, we also need to close the file and touch + # it to alter its date-time - but this is beyond the end of the + # loop. + + + # Variables + # --------- + + my $messagecount = 0; + + # For generating unique filenames for + # each message. Initialise it here with + # numeric time in seconds since 1970. + my $unique = time; + + # Name of message file to delete if we found that + # it was created by reading the Mbox dummy message. + + my $deletedummy = ''; + + # To store the complete "From (address) (date-time) + # which delineates the start of each message + # in the Mbox + my $fromline = ''; + + + # Set to 1 when we are reading the header lines, + # including the "From " line. + # + # 0 means we are reading the message body and looking + # for another "From " line. + + my $inheaders = 0; + + # Variable to hold all headers (apart from + # the first line "From ...." which is not + # part of the message itself. + my $headers = ''; + + # Variable to hold the accumulated characters + # we find in header lines of the type: + # + # Status: + # X-Status: + # X-Mozilla-Status: + # X-Evolution: + my $flags = ''; + + # To build the file name for the message in. + my $messagefn = ''; + + + # The date string from the "From " line of each + # message will be written here - and used by + # touch to alter the date-time of each message + # file. Put non-date text here to make it + # spit the dummy if my code fails to find a + # date to write into this. + + my $receivedate = 'Bogus'; + + # The subject of the message + my $subject = ''; + + my $previous_line_was_empty = 1; + + # We record the message start line here, for error + # reporting. + my $startline; + + # If defined, we use this as the number of bytes in the + # message body rather than looking for a /^From / line. + my $contentlength; + + # A From lines can either occur as the first + # line of a file, or after an empty line. + # Most mail systems will quote all From lines + # appearing in the message, but some will only + # do it when necessary. + # Since we initialise the variable to true, + # we don't need to check for beginning of file. + + while() + { + # exchange possible Windows EOL (CRLF) with Unix EOL (LF) + $_ =~ s/\r\n$/\n/; + + if ( /^From / + && $previous_line_was_empty + && (!defined $contentlength) + ) + { + # We are reading the "From " line which has an + # email address followed by a receive date. + # Turn on the $inheaders flag until we reach + # the end of the headers. + + $inheaders = 1; + + # record the message start line + + $startline = $.; + + # If this is not the first run through the loop + # then this means we have already been working + # on a message. + + if ($messagecount > 0) + { + # If so, then close that message file and then + # use utime to change its date-time. + # + # Note this code should be duplicated to do + # the same thing at the end of the while loop + # since we must close and touch the final message + # file we were writing when we hit the end of the + # Mbox file. + + close (OUT); + if ($messagefn ne '') { + my $t = str2time($receivedate); + utime $t, $t, $messagefn; + } + } + + # Because we opened the Mbox file without any + # variable, I think this means that we have its + # current line in Perl's default variable "$_". + # So all sorts of pattern matching magic works + # directly on it. + + # We are currently reading the first line starting with + # "From " which contains the date we want. + # + # This will be of the form: + # + # From dduck@test.org Wed Nov 24 11:05:35 1999 + # + # at least with UW-IMAP. + # + # However, I did find a nasty exception to this in my + # tests, of the form: + # + # "bounce-MusicNewsletter 5-rw=test.org"@announce2.mp3.com + # + # This makes it trickier to get rid of the email address, + # but I did find a way. I can't rule out that there would + # be some address like this with an "@" in the quoted + # portion too. + # + # Unfortunately, testing with an old Inbox Mbox file, + # I also found an instance where the email address + # had no @ sign at all. It was just an email + # account name, with no host. + # + # I could search for the day of the week. If I skipped + # at least one word of non-whitespace (1 or more contiguous + # non-whitespace characters) then searched for a day of + # the week, then I should be able to avoid almost + # every instance of a day of the week appearing in + # the email address. + # + # Do I need a failsafe arrangement to provide some + # other date to touch if I don't get what seems like + # a date in my resulting string? For now, no. + # + # I will take one approach if there is an @ in the + # "From " line and another (just skip the first word + # after "From ") if there is no @ in the line. + # + # If I knew more about Perl I would probably do it in + # a more elegant way. + + # Copy the current line into $fromline. + + $fromline = $_; + + # Now get rid of the "From ". " =~ s" means substitute. + # Find the word "From " at the start of the line and + # replace it with nothing. The nothing is what is + # between the second and third slash. + + $fromline =~ s/^From // ; + + + # Likewise get rid of the email address. + # This first section is if we determine there is one + # (or more . . . ) "@" characters in the line, which + # would normally be the case. + + if ($fromline =~ m/@/) + { + # The line has at least one "@" in it, so we assume + # this is in the middle of an email address. + # + # If the email address had no spaces, then we could + # get rid of the whole thing by searching for any number + # of non-whitespace characters (\S) contiguously, and + # then I think a space. Subsitute nothing for this. + # + # $fromline =~ s/(\S)+ // ; + # + # But we need something to match any number of non-@ + # characters, then the "@" and then all the non-whitespace + # characters from there (which takes us to the end of + # "test.org") and then the space following that. + # + # A tutorial on regular expressions is: + # + # http://www.perldoc.com/perl5.6.1/pod/perlretut.html + # + # Get rid of all non-@ characters up to the first "@": + + $fromline =~ s/[^@]+//; + + + # Get rid of the "@". + + $fromline =~ s/@//; + } + # If there was an "@" in the line, then we have now + # removed the first one (lets hope there aren't more!) + # and everything which preceded it. + # + # we now remove either something like + # '(foo bar)'. eg. '(no mail address)', + # or everything after the '@' up to the trailing + # timezone + # + # FIXME: all those regexp should be combined to just one single one + + $fromline =~ s/(\((\S*| )+\)|\S+) *//; + + chomp $fromline; + + # Stash the date-time for later use. We will use it + # to touch the file after we have closed it. + + $receivedate = $fromline; + + # Debugging lines: + # + # print "$receivedate is the receivedate of message $messagecount.\n"; + # $receivedate = "Wed Nov 24 11:05:35 1999"; + # + # To look at the exact date-time of files: + # + # ls -lFa --full-time + # + # End of handling the "From " line. + } + + + # Now process header lines which are not the "From " line. + + if ( ($inheaders eq 1) + && (! /^From /) + ) + { + # Now we are reading the header lines after the "From " line. + # Keep looking for the blank line which indicates the end of the + # headers. + + + # ".=" means append the current line to the $headers + # variable. + # + # For some reason, I was getting two blank lines + # at the end of the headers, rather than one, + # so I decided not to read in the blank line + # which terminates the headers. + # + # Delete the "unless ($_ eq "\n")" to get rid + # of this kludge. + + $headers .= $_ unless ($_ eq "\n"); + + # Now scan the line for various status flags + # and to fine the Subject line. + + $flags .= $1 if /^Status: ([A-Z]+)/; + $flags .= $1 if /^X-Status: ([A-Z]+)/; + if (/^X-Mozilla-Status: ([0-9a-f]{4})/i) + { + $flags .= 'R' if (hex($1) & 0x0001); + $flags .= 'A' if (hex($1) & 0x0002); + $flags .= 'D' if (hex($1) & 0x0008); + } + if(/^X\-Evolution:\s+\w{8}\-(\w{4})/oi) + { + $b = pack("H4", $1); #pack it as 4 digit hex (0x0000) + $b = unpack("B32", $b); #unpack into bit string + + # "usually" only the right most six bits are used + # however, I have come across a seventh bit in + # about 15 (out of 10,000) messages with this bit + # activated. + # I have not found any documentation in the source. + # If you find out what it does, please let me know. + + # Notes: + # Evolution 1.4 does mark forwarded messages. + # The sixth bit is to denote an attachment + + $flags .= 'A' if($b =~ /[01]{15}1/); #replied + $flags .= 'D' if($b =~ /[01]{14}1[01]{1}/); #deleted + $flags .= 'T' if($b =~ /[01]{13}1[01]{2}/); #draft + $flags .= 'F' if($b =~ /[01]{12}1[01]{3}/); #flagged + $flags .= 'R' if($b =~ /[01]{11}1[01]{4}/); #seen/read + } + $subject = $1 if /^Subject: (.*)$/; + if ($use_cl eq 1) + { + $contentlength = $1 if /^Content-Length: (\d+)$/; + } + + # Now look out for the end of the headers - a blank + # line. When we find it, create the file name and + # analyse the Subject line. + + if ($_ eq "\n") + { + # We are at the end of the headers. Set the + # $inheaders flag back to 0. + + $inheaders = 0; + + # Include the current newline in the content length + + ++$contentlength if defined $contentlength; + + # Create the file name for the current message. + # + # A simple version of this would be: + # + # $messagefn = "cur/$unique.$messagecount.mbox:2,"; + # + # This would create names with $messagecount values of + # 1, 2, etc. But for neatness when looking at a + # directory of such messages, sorted by filename, + # I want to have leading zeroes on message count, so + # that they would be 000001 etc. This makes them + # appear in message order rather than 1 being after + # 19 etc. So this is good for up to 999,999 messages + # in a mailbox. It is a cosmetic matter for a person + # looking into the Maildir directory manually. + # To do this, use sprintf instead with "%06d" for + # 6 characters of zero-padding: + + $messagefn = sprintf ("cur/%d.%06d.mbox:2,", $unique, $messagecount) ; + + + # Append flag characters to the end of the + # filename, according to flag characters + # collected from the message headers + + $messagefn .= 'F' if $flags =~ /F/; # Flagged. + $messagefn .= 'R' if $flags =~ /A/; # Replied to. + $messagefn .= 'S' if $flags =~ /R/; # Seen or Read. + $messagefn .= 'T' if $flags =~ /D/; # Tagged for deletion. + + + # Opens filename $messagefn for output (>) with filehandle OUT. + + open(OUT, ">$messagefn") or die("Fatal: unable to create new message $messagefn"); + + # Count the messages. + + $messagecount++; + + # Only for the first message, + # check to see if it is a dummy. + # Delete the message file we + # just created if it was for the + # dummy message at the start + # of the Mbox. + # + # Add search terms as required. + # The last 2 lines are for rent. + # + # "m" means match the regular expression, + # but we can do without it. + # + # Do I need to escape the ' in "DON'T"? + # I didn't in the original version. + + if ( (($messagecount == 1) && defined($subject)) + && ($subject =~ m/^DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA/) + ) + { + # Stash the file name of the dummy message so we + # can delete it later. + + $deletedummy = "$messagefn"; + } + + # Print the collected headers to the message file. + + print OUT "$headers"; + + + # Clear $headers and $flags ready for the next message. + + $headers = ''; + $flags = ''; + + # End of processing the headers once we found the + # blank line which terminated them + } + + # End of dealing with the headers. + } + + + if ( $inheaders eq 0) + { + + # We are now processing the message body. + # + # Now we have passed the headers to the + # output file, we scan until the while + # loop finds another "From " line. + + # Decrement our content length if we're + # using it to find the end of the message + # body + + if (defined $contentlength) { + + # Decrement our $contentlength variable + + $contentlength -= length($_); + + # The proper end for a message with Content-Length + # specified is the $contentlength variable should + # be exactly -1 and we should be on a bare + # newline. Note that the bare newline is not + # printed to the end of the current message as + # it's actually a message separator in the mbox + # format rather than part of the message. The + # next line _should_ be a From_ line, but just in + # case the Content-Length header is incorrect + # (e.g. a corrupt mailbox), we just continue + # putting lines into the current message until we + # see the next From_ line. + + if ($contentlength < 0) { + if ($contentlength == -1 && $_ eq "\n") { + $contentlength = undef; + next; + } + $contentlength = undef; + } + } + + # + # We want to copy every part of the message + # body to the output file, except for the + # quoted ">From " lines, which was the + # way the IMAP server encoded body lines + # starting with "From ". + # + # Pattern matching Perl majik to + # get rid of an Mbox quoted From. + # + # This works on the default variable "$_" which + # contains the text from the Mbox mailbox - I + # guess this is the case because of our + # (open(MBOX ....) line above, which did not + # assign this to anything else, so it would go + # to the default variable. This enables + # inscrutably terse Perlisms to follow. + # + # "s" means "Subsitute" and it looks for any + # occurrence of ">From" starting at the start + # of the line. When it finds this, it replaces + # it with "From". + # + # So this finds all instances in the Mbox message + # where the original line started with the word + # "From" but was converted to ">From" in order to + # not be mistaken for the "From ..." line which + # is used to demark each message in the Mbox. + # This was was a destructive conversion because + # any message which originally had ">From" at the + # start of the line, before being put into the + # Mbox, will now have that line without the ">". + + s/^>From /From /; + + # Glorious tersness here. Thanks Simon for + # explaining this. + # + # "print OUT" means print the default variable to + # the file of file handle OUT. This is where + # the bulk of the message text is written to + # the output file. + + print OUT or die("Fatal: unable to write to new message to $messagefn"); + + + # End of the if statement dealing with message body. + } + + $previous_line_was_empty = ( $_ eq "\n" ); + + # End of while (MBOX) loop. + } + # Close the input file. + + close(MBOX); + + # Close the output file, and duplicate the code + # from the start of the while loop which touches + # the date-time of the most recent message file. + + close(OUT); + if ($messagefn ne '') { + my $t = str2time($receivedate); + utime $t, $t, $messagefn; + } + + # After all the messages have been + # converted, check to see if the + # first one was a dummy. + # if so, delete it and make + # the message count one less. + + if ($deletedummy ne "") + { + printf("Dummy mail system first message detected and not saved.\n"); + unlink $deletedummy; + + $messagecount--; + + } + + printf("$messagecount messages.\n\n"); +} -- 2.39.2