Skip to content

Commit 03bcf5a

Browse files
committed
20050926
- Use antiword in preference to catdoc for translating msword documents - Fixed deletion of temporary directory (broken since 20050520)
1 parent b44a6fa commit 03bcf5a

File tree

2 files changed

+23
-13
lines changed

2 files changed

+23
-13
lines changed

CHANGELOG

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
20050926
2+
3+
- Use antiword in preference to catdoc for translating msword documents
4+
- Fixed deletion of temporary directory (broken since 20050520)
5+
16
20050528
27

38
- Fixed translation of content with no file name extensions

textmail

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use strict;
2020
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2121
# or visit http://www.gnu.org/copyleft/gpl.html
2222
#
23-
# 20050528 raf <raf@raf.org>
23+
# 20050926 raf <raf@raf.org>
2424

2525
=head1 NAME
2626
@@ -272,10 +272,10 @@ delete windows executables (with output in mailbox format):
272272
273273
=head1 REQUIREMENTS
274274
275-
MS Word and RTF documents are translated into plain text using I<catdoc(1)>.
276-
If I<textmail(1)> can't find I<catdoc(1)>, then MS Word and RTF attachments
277-
are left intact. So make sure that I<catdoc(1)> is installed and in the
278-
C<$PATH>.
275+
MS Word and RTF documents are translated into plain text using
276+
I<antiword(1)> or I<catdoc(1)>. If I<textmail(1)> can't find I<antiword> or
277+
I<catdoc(1)>, then MS Word and RTF attachments are left intact. So make sure
278+
that I<antiword(1)> or I<catdoc(1)> is installed and in the C<$PATH>.
279279
280280
MS Excel documents are translated into csv files using I<xls2csv(1)>. If
281281
I<textmail(1)> can't find I<xls2csv(1)>, then MS Excel attachments are left
@@ -308,6 +308,7 @@ temporary directory will be created.
308308
=head1 SEE ALSO
309309
310310
I<procmail(1)>,
311+
I<antiword(1)>,
311312
I<catdoc(1)>,
312313
I<xls2csv(1)>,
313314
I<lynx(1)>,
@@ -318,7 +319,7 @@ C<http://raf.org/minimail/>
318319
319320
=head1 AUTHOR
320321
321-
20050528 raf <raf@raf.org>
322+
20050926 raf <raf@raf.org>
322323
323324
=head1 URL
324325
@@ -891,14 +892,15 @@ nroff if exists $opt{r};
891892
html if exists $opt{w};
892893
my $mailbox = exists $opt{M};
893894
my $catdoc = find('catdoc');
895+
my $antiword = find('antiword') || $catdoc;
894896
my $xls2csv = find('xls2csv');
895897
my $lynx = find('lynx');
896898
my $pdftotext = find('pdftotext');
897899
my $mktemp = find('mktemp');
898900
paths() if exists $opt{'?'};
899901
my @exe = qw(com exe pif dll ocx scr vbs js);
900902
my $force = exists $opt{f};
901-
my $remove_word = (defined $catdoc || $force) && ! exists $opt{W};
903+
my $remove_word = (defined $antiword || $force) && ! exists $opt{W};
902904
my $remove_excel = (defined $xls2csv || $force) && ! exists $opt{E};
903905
my $remove_html = (defined $lynx || $force) && ! exists $opt{H};
904906
my $remove_rtf = (defined $catdoc || $force) && ! exists $opt{R};
@@ -931,15 +933,17 @@ formail(sub { <> }, sub
931933
{
932934
my $m = mail2singlepart(textmail(mail2multipart(shift)));
933935
delete_header($m, qr/(?:content-length|lines)/i);
934-
$m = mail2mbox($m) if $mailbox;
935-
print mail2str($m);
936+
print mail2str($mailbox ? mail2mbox($m) : $m);
936937
});
937938
939+
rmdir $tmp or system "rm -rf $tmp";
940+
938941
# Print paths to help applications then exit
939942
940943
sub paths
941944
{
942-
print(defined $catdoc ? $catdoc : "catdoc not found: MS Word and RTF will not be translated", "\n");
945+
print(defined $antiword ? $antiword : "antiword/catdoc not found: MS Word will not be translated", "\n");
946+
print(defined $catdoc ? $catdoc : "catdoc not found: MS RTF will not be translated", "\n");
943947
print(defined $xls2csv ? $xls2csv : "xls2csv not found: MS Excel with not be translated", "\n");
944948
print(defined $lynx ? $lynx : "lynx not found: HTML will not be translated", "\n");
945949
print(defined $pdftotext ? $pdftotext : "pdftotext not found: PDF will not be translated", "\n");
@@ -994,11 +998,11 @@ sub textmail
994998
995999
for (my $i = 0; $i < @parts; ++$i)
9961000
{
997-
# Replace MS Word attachments with plain text (via catdoc)
1001+
# Replace MS Word attachments with plain text (via antiword/catdoc)
9981002
9991003
if ($remove_word && isa($parts[$i], qr/.*ms-?word/i, qr/\.doc$/i))
10001004
{
1001-
$parts[$i] = translate($parts[$i], 'doc', 'txt', $catdoc);
1005+
$parts[$i] = translate($parts[$i], 'doc', 'txt', $antiword);
10021006
next;
10031007
}
10041008
@@ -1118,9 +1122,10 @@ sub translate
11181122
11191123
return $part if !defined $cmd && !$force;
11201124
my $origpath = filename($part);
1125+
$origpath .= '.' . $ext[0] unless $origpath =~ /\.(?:@{[join '|', @ext]})$/i;
11211126
my $textpath = $origpath;
11221127
$textpath =~ s/\.(?:@{[join '|', @ext]})$/.$fmt/i;
1123-
$textpath .= ".$fmt" unless $textpath =~ /\.\Q$fmt\E$/i;
1128+
$textpath .= ".$fmt" if $textpath eq $origpath;
11241129
return newmail(filename => $textpath, body => '') if !defined $cmd && $force;
11251130
my $origdata = body($part);
11261131
open A, ">$tmp/$origpath" and do { print A $origdata; close A };

0 commit comments

Comments
 (0)