diff options
author | Silvio Rhatto <rhatto@riseup.net> | 2014-09-18 18:11:07 -0300 |
---|---|---|
committer | Silvio Rhatto <rhatto@riseup.net> | 2014-09-18 18:11:07 -0300 |
commit | 466c31cd91e5576405af190f76c073e1d06a4482 (patch) | |
tree | 1a12427f1704b0b0085a8f686f0424d54eb3172f /email/eml2mbox/eml2mbox.rb | |
parent | f32ec8c49226dc860c38a2c36d7587d1d27419b8 (diff) | |
download | utils-mail-466c31cd91e5576405af190f76c073e1d06a4482.tar.gz utils-mail-466c31cd91e5576405af190f76c073e1d06a4482.tar.bz2 |
Sorting scripts
Diffstat (limited to 'email/eml2mbox/eml2mbox.rb')
-rwxr-xr-x | email/eml2mbox/eml2mbox.rb | 265 |
1 files changed, 0 insertions, 265 deletions
diff --git a/email/eml2mbox/eml2mbox.rb b/email/eml2mbox/eml2mbox.rb deleted file mode 100755 index 1fc7bca..0000000 --- a/email/eml2mbox/eml2mbox.rb +++ /dev/null @@ -1,265 +0,0 @@ -#!/usr/bin/ruby -#============================================================================================# -# eml2mbox.rb v0.08 # -# Last updated: Jan 23, 2004 # -# # -# Converts a bunch of eml files into one mbox file. # -# # -# Usage: [ruby] eml2mbx.rb [-c] [-l] [-s] [-yz] [emlpath [trgtmbx]] # -# Switches: # -# -c Remove CRs (^M) appearing at end of lines (Unix) # -# -l Remove LFs appearing at beggining of lines (old Mac) - not tested # -# -s Don't use standard mbox postmark formatting (for From_ line) # -# This will force the use of original From and Date found in mail headers. # -# Not recommended, unless you really have problems importing emls. # -# -yz Use this to force the order of the year and timezone in date in the From_ # -# line from the default [timezone][year] to [year][timezone]. # -# emlpath - Path of dir with eml files. Defaults to the current dir if not specified # -# trgtmbx - Name of the target mbox file. Defaults to "archive.mbox" in 'emlpath' # -# # -# Ruby homepage: http://www.ruby-lang.org/en/ # -# Unix mailbox format: http://www.broobles.com/eml2mbox/mbox.html # -# This script : http://www.broobles.com/eml2mbox # -# # -#============================================================================================# -# Licence: # -# # -# This script is free software; you can redistribute it and/or modify it under the terms of # -# the GNU Lesser General Public License as published by the Free Software Foundation; # -# either version 2.1 of the License, or (at your option) any later version. # -# # -# You should have received a copy of the GNU Lesser General Public License along with this # -# script; if not, please visit http://www.gnu.org/copyleft/gpl.html for more information. # -#============================================================================================# - -require "parsedate" - -include ParseDate - -#=======================================================# -# Class that encapsulates the processing file in memory # -#=======================================================# - -class FileInMemory - - ZoneOffset = { - # Standard zones by RFC 2822 - 'UTC' => '0000', - 'UT' => '0000', 'GMT' => '0000', - 'EST' => '-0500', 'EDT' => '-0400', - 'CST' => '-0600', 'CDT' => '-0500', - 'MST' => '-0700', 'MDT' => '-0600', - 'PST' => '-0800', 'PDT' => '-0700', - } - - def initialize() - @lines = Array.new - @counter = 1 # keep the 0 position for the From_ line - @from = nil # from part of the From_ line - @date = nil # date part of the From_ line - end - - def addLine(line) - # If the line is a 'false' From line, add a '>' to its beggining - line = line.sub(/From/, '>From') if line =~ /^From/ and @from!=nil - - # If the line is the first valid From line, save it (without the line break) - if line =~ /^From:\s.*@/ and @from==nil - @from = line.sub(/From:/,'From') - @from = @from.chop # Remove line break(s) - @from = standardizeFrom(@from) unless $switches["noStandardFromLine"] - end - - # Get the date - if $switches["noStandardFromLine"] - # Don't parse the content of the Date header - @date = line.sub(/Date:\s/,'') if line =~ /^Date:\s/ and @date==nil - else - if line =~ /^Date:\s/ and @date==nil - # Parse content of the Date header and convert to the mbox standard for the From_ line - @date = line.sub(/Date:\s/,'') - year, month, day, hour, minute, second, timezone, wday = parsedate(@date) - # Need to convert the timezone from a string to a 4 digit offset - unless timezone =~ /[+|-]\d*/ - timezone=ZoneOffset[timezone] - end - time = Time.gm(year,month,day,hour,minute,second) - @date = formMboxDate(time,timezone) - end - end - - # Now add the line to the array - line = fixLineEndings(line) - @lines[@counter]=line - @counter+=1 - end - - # Forms the first line (from + date) and returns all the lines - # Returns all the lines in the file - def getProcessedLines() - if @from != nil - # Add from and date to the first line - if @date==nil - puts "WARN: Failed to extract date. Will use current time in the From_ line" - @date=formMboxDate(Time.now,nil) - end - @lines[0] = @from + " " + @date - - @lines[0] = fixLineEndings(@lines[0]) - @lines[@counter] = "" - return @lines - end - # else don't return anything - end - - # Fixes CR/LFs - def fixLineEndings(line) - line = removeCR(line) if $switches["removeCRs"]; - line = removeLF(line) if $switches["removeLFs"]; - return line - end - - # emls usually have CR+LF (DOS) line endings, Unix uses LF as a line break, - # so there's a hanging CR at the end of the line when viewed on Unix. - # This method will remove the next to the last character from a line - def removeCR(line) - line = line[0..-3]+line[-1..-1] if line[-2]==0xD - return line - end - - # Similar to the above. This one is for Macs that use CR as a line break. - # So, remove the last char - def removeLF(line) - line = line[0..-2] if line[-1]==0xA - return line - end - -end - -#================# -# Helper methods # -#================# - -# Converts: 'From "some one <aa@aa.aa>" <aa@aa.aa>' -> 'From aa@aa.aa' -def standardizeFrom(fromLine) - # Get indexes of last "<" and ">" in line - openIndex = fromLine.rindex('<') - closeIndex = fromLine.rindex('>') - if openIndex!=nil and closeIndex!=nil - fromLine = fromLine[0..4]+fromLine[openIndex+1..closeIndex-1] - end - # else leave as it is - it is either already well formed or is invalid - return fromLine -end - -# Returns a mbox postmark formatted date. -# If timezone is unknown, it is skipped. -# mbox date format used is described here: -# http://www.broobles.com/eml2mbox/mbox.html -def formMboxDate(time,timezone) - if timezone==nil - return time.strftime("%a %b %d %H:%M:%S %Y") - else - if $switches["zoneYearOrder"] - return time.strftime("%a %b %d %H:%M:%S "+timezone.to_s+" %Y") - else - return time.strftime("%a %b %d %H:%M:%S %Y "+timezone.to_s) - end - end -end - - -# Extracts all switches from the command line and returns -# a hashmap with valid switch names as keys and booleans as values -# Moves real params to the beggining of the ARGV array -def extractSwitches() - switches = Hash.new(false) # All switches (values) default to false - i=0 - while (ARGV[i]=~ /^-/) # while arguments are switches - if ARGV[i]=="-c" - switches["removeCRs"] = true - puts "\nWill fix lines ending with a CR" - elsif ARGV[i]=="-l" - switches["removeLFs"] = true - puts "\nWill fix lines beggining with a LF" - elsif ARGV[i]=="-s" - switches["noStandardFromLine"] = true - puts "\nWill use From and Date from mail headers in From_ line" - elsif ARGV[i]=="-yz" - switches["zoneYearOrder"] = true - puts "\nTimezone will be placed before the year in From_ line" - else - puts "\nUnknown switch: "+ARGV[i]+". Ignoring." - end - i = i+1 - end - # Move real arguments to the beggining of the array - ARGV[0] = ARGV[i] - ARGV[1] = ARGV[i+1] - return switches -end - -#===============# -# Main # -#===============# - - $switches = extractSwitches() - - # Extract specified directory with emls and the target archive (if any) - emlDir = "." # default if not specified - emlDir = ARGV[0] if ARGV[0]!=nil - mboxArchive = emlDir+"/archive.mbox" # default if not specified - mboxArchive = ARGV[1] if ARGV[1] != nil - - # Show specified settings - puts "\nSpecified dir : "+emlDir - puts "Specified file: "+mboxArchive+"\n" - - # Check that the dir exists - if FileTest.directory?(emlDir) - Dir.chdir(emlDir) - else - puts "\n["+emlDir+"] is not a directory (might not exist). Please specify a valid dir" - exit(0) - end - - # Check if destination file exists. If yes allow user to select an option. - canceled = false - if FileTest.exist?(mboxArchive) - print "\nFile ["+mboxArchive+"] exists! Please select: [A]ppend [O]verwrite [C]ancel (default) " - sel = STDIN.gets.chomp - if sel == 'A' or sel == 'a' - aFile = File.new(mboxArchive, "a"); - elsif sel == 'O' or sel == 'o' - aFile = File.new(mboxArchive, "w"); - else - canceled = true - end - else - # File doesn't exist, open for writing - aFile = File.new(mboxArchive, "w"); - end - - if not canceled - puts - files = Dir["*.eml"] - if files.size == 0 - puts "No *.eml files in this directory. mbox file not created." - aFile.close - File.delete(mboxArchive) - exit(0) - end - # For each .eml file in the specified directory do the following - files.each() do |x| - puts "Processing file: "+x - thisFile = FileInMemory.new() - File.open(x).each {|item| thisFile.addLine(item) } - lines = thisFile.getProcessedLines - if lines == nil - puts "WARN: File ["+x+"] doesn't seem to have a regular From: line. Not included in mbox" - else - lines.each {|line| aFile.puts line} - end - end - aFile.close - end |