#!/usr/bin/env ruby
# UsernameAnarchy - generate usernames
# by urbanadventurer
# 
# Anna Boom Key
#


## set up load paths. add the directory of the file currently being executed to the load path
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless
    $:.include?(File.dirname(__FILE__)) || $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))

$ROOT_DIR = File.expand_path(File.dirname(__FILE__)) + "/"
$VERSION = "0.5"

require 'optparse'
require 'getoptlong'
require 'pp'
require 'set'
require 'format-plugins.rb'

class Name
	attr_accessor :firstname, :lastname, :firstinitial, :lastinitial, :middlename, :middleinitial	

	def init(s)
		# if any names are empty strings, convert them to nil
		s.each_pair { |key,value| s[key] = nil if value == "" }

		if s[:firstname]
			@firstname = s[:firstname]
			@firstinitial = s[:firstname][0..0]
		end

		if s[:middlename]
			@middlename = s[:middlename]
			@middleinitial = s[:middlename][0..0]
		end

		if s[:lastname]
			@lastname = s[:lastname]
			@lastinitial = s[:lastname][0..0]
		end

		if s[:firstinitial]
			@firstinitial = s[:firstinitial]
		end

		if s[:middleinitial]
			@middleinitial = s[:firstinitial]
		end

		if s[:lastinitial]
			@lastinitial = s[:firstinitial]
		end

		pp [@firstname, @lastname, @firstinitial, @lastinitial, @middlename, @middleinitial] if $options[:verbose]
	end


	def given_name_list()
		# configure details like country, age range, gender, regex
		# regex is for stuff like 'starts with G'

		File.open($options[:given_names]).readlines.map { |x| x.downcase!; x.strip! }
	end

	def family_name_list()
		File.open($options[:family_names]).readlines.map { |x| x.downcase!; x.strip! }
	end

	def format_anna(s)
		format(s, "abk")
	end

	def format(s, type=nil)
		# username format
		# %F - Firstname
		# %M - Middlename
		# %L - Lastname
		# %f - firstname
		# %m - middlename
		# %l - lastname
		# %i.f - first initial
		# %i.m - middle initial
		# %i.l - last initial
		# %i.F - First initial
		# %i.M - Middle initial
		# %i.L - Last initial

		# natural format
		# Anna Key
		# write whatever parts of the name you want. for Joe Bloggs to become jbloggs, write "akey". for JB, write "AK"
		# less flexible than the username format

		expanded = s.clone
		# convert from Anna Key format to username format

		if type == "abk"
			# this is an array of hashes, not a hash.  Using an array gives us an order of operations. Anna must be substituted before a
			subs = [{"Anna" => "%F"},
              {"Boom" => "%M"}, 
              {"Key" => "%L"},

              {"anna" => "%f"},
              {"boom" => "%m"},
              {"key" => "%l"},

              {"A" => "%i.F"},
              {"B" => "%i.M"},
              {"K" => "%i.L"},

              {"a" => "%i.f"},
              {"b" => "%i.m"},
              {"k" => "%i.l"},
              ]
			subs.each { |x| x.each { |from,to| 
					#pp "from"+from, "to"+to; pp "expanded"+expanded; 
					expanded.gsub!(from,to); }}
		end

	  subs = {"%F" => @firstname ? upcase_it(@firstname) : "%Givenname",
		        "%M" => @middlename ? upcase_it(@middlename) : "%Givenname",
		        "%L" => @lastname ? upcase_it(@lastname): "%Surname",

		        "%f" => @firstname ? @firstname.downcase : "%givenname",
		        "%m" => @middlename ? @middlename.downcase : "%givenname",
		        "%l" => @lastname ? @lastname.downcase : "%surname",

		        "%i.F" => @firstinitial ? upcase_it(@firstinitial) : "%A-Z",
		        "%i.M" => @middleinitial ? upcase_it(@middleinitial) : "%A-Z",
		        "%i.L" => @lastinitial ? upcase_it(@lastinitial) : "%A-Z",

		        "%i.f" => @firstinitial ? @firstinitial.downcase : "%a-z",
		        "%i.m" => @middleinitial ? @middleinitial.downcase : "%a-z",
		        "%i.l" => @lastinitial ? @lastinitial.downcase : "%a-z",
		      }
		
		subs.each { |from,to| expanded.gsub!(from,to); }

		expanded = [expanded] if expanded.class == String 

		# substitution
		if $options[:substitute]
			while expanded.map { |x| x =~ /%a-z|%A-Z|%[gG]ivenname|%[sS]urname|%DD|%D/ }.compact.size > 0
				expanded = expanded.map do |e|
					# middle initials
					if e =~ /%a-z/
						e = ('a'..'z').map { |letter| e.sub('%a-z',letter) }
					end
					if e =~ /%A-Z/
						e = ('A'..'Z').map { |letter| e.sub('%A-Z',letter) }
					end

					# given names
					if e =~ /%givenname/
						e = self.given_name_list.map { |name| e.sub('%givenname',name.downcase) }
					end
					if e =~ /%Givenname/
						e = self.given_name_list.map { |name| e.sub('%Givenname',name) }
					end

					# given names
					if e =~ /%surname/
						e = self.family_name_list.map { |name| e.sub('%surname',name.downcase) }
					end
					if e =~ /%Surname/
						e = self.family_name_list.map { |name| e.sub('%Surname',name) }
					end

					# %DD - Digits, range of 00..99
					# %D - Digit, range of 0..9

					# numbers - leading 0's?
					if e =~ /%DD/
						e = (00..99).map { |num| e.sub('%DD',sprintf("%0.2d",num)) }
					end
					if e =~ /%D/
						e = (0..9).map { |num| e.sub('%D',num.to_s) }
					end
					e
				end
				expanded.flatten!
				#pp expanded
			end
		elsif expanded.to_a.map { |x| x =~ /%a-z|%A-Z|%[gG]ivenname|%[sS]urname|%DD|%D/ }.compact.size > 0
			# no substitution so remove it if it require expansion
			expanded = nil
		end

	if expanded
		# post process - remove spaces and apostrophes
		if $options[:strip_spaces] = true
			expanded = expanded.map { |x| x.delete(" ") }
		end
		if $options[:strip_apostrophes] = true
			expanded = expanded.map { |x| x.delete("'") }
		end
	end
	expanded
	end
end


def upcase_it(s)
	# what if it's nil?
#	pp s
	if s and not s == ""
		s[0..0].upcase + s[1..-1].downcase
	else
		nil
	end
end


def list_plugins
	puts "Plugin name".ljust(20) + "\tExample"
	puts "-" * 80

	person = Name.new
	person.init({:firstname => "anna", :middlename => "boom", :lastname => "key"})

	Plugin.registered_plugins.each do |name_generator|
		example = name_generator.generate(person)

		unless example.nil?
			if example.is_a?(Array)
				example = example[0..2].join(",")
			end
		else
			example = "No example"
		end
		puts "#{name_generator.plugin_name.ljust(20)}\t#{example}"
	end
end

def load_names_list(filename)
	# check f is a file and readable
	raise "#{filename}: No such file" unless File.exists?(filename)
	raise "#{filename}: Cannot read file" unless File.readable?(filename)

	lines = File.readlines(filename)

	# is it valid? first line must contain 'Firstname,Middlename,Lastname'
	#raise "#{filename}: Doesn't contain a table header" unless lines.first =~ /Firstname|Middlename|Lastname|firstname|middlename|lastname|firstinitial|lastinitial|middleinitial|Firstinitial|Lastinitial|Middleinitial/
	unless lines.first =~ /Firstname|Middlename|Lastname|firstname|middlename|lastname|firstinitial|lastinitial|middleinitial|Firstinitial|Lastinitial|Middleinitial/
		lines.unshift "firstname lastname"
	end

	# is it SPACE, comma or TAB delimited?
	if lines.first.include?("\t")
		delimiter = "\t"
	elsif lines.first.include?(",")
		delimiter = ","
	elsif lines.first.include?(" ")
		delimiter = " "
	else
		raise "Cannot auto-detect delimiter."
	end

	# learn columns
#attr_accessor :firstname, :lastname, :firstinitial, :lastinitial, :middlename, :middleinitial	

	column_names = { "firstname" => :firstname,
			              "lastname" => :lastname,
			              "firstinitial" => :firstinitial,
			              "lastinitial" => :lastinitial,
			              "middlename" => :middlename,
			              "middleinitial" => :middleinitial }
	columns = []
	
	lines.first.split(delimiter).each_with_index do |header, i|
			header.downcase!
			header.strip! unless header.nil?

			if column_names.keys.include?(header)
				columns[i] = column_names[header]
			else
				puts "# Ignored unknown column: #{header}"
			end
		end
	# pp columns

	people = lines[1..-1].map do |line|
		person = {}
		unless line == "\n"
			line.split(delimiter).each_with_index do |value, i|
				value.downcase!
				value.strip!
				person[columns[i]] = value
			end
		end
		person unless person.empty? # any lines not interpreted become nil
	end
#pp people
#exit
	people.compact # compact removes the nil's
end

def recognise_username_format(username)
	plugins_to_search = Plugin.registered_plugins
#	pp plugins_to_search
	# only search names that contain letters in our search string as the first letter of the first name or surname
	letters = $options[:format_to_recognise].split('').sort.uniq
	
	File.open($options[:family_names]).readlines.map do |this_familyname|
		this_familyname.strip!
		next unless letters.include?(this_familyname[0..0])
		
		puts "# Searching #{this_familyname}" if $options[:verbose]
		
		File.open($options[:given_names]).readlines.map {|x| x.strip! }.map do |this_givenname|
			next unless letters.include?(this_givenname[0..0])
			thisname = Name.new
			thisname.init(:firstname=>this_givenname, :lastname=>this_familyname)
			plugins_to_search.each do |name_generator|
				ret = name_generator.generate(thisname)
				ret = [ret] if ret.is_a?(String)
				if ret.is_a?(Array)
					if ret.include?($options[:format_to_recognise])
						return name_generator.plugin_name
					end
				end
			end
    end
	end
end

def error(s, severity = nil)	
	sev = case severity
		    when nil
			    sev = "Fatal"
		    end
	puts "#{sev} Error. #{s}"
	puts
	exit
end


def colorize(text, color_code)   
"#{color_code}#{text}\e[0m"
end

def red(text);          colorize(text, "\e[1m\e[31m"); end
def dark_red(text);     colorize(text, "\e[31m"); end
def green(text);        colorize(text, "\e[1m\e[32m"); end     
def dark_green(text);   colorize(text, "\e[32m"); end     
def yellow(text);       colorize(text, "\e[1m\e[33m"); end    
def dark_yellow(text);  colorize(text, "\e[33m"); end    
def blue(text);         colorize(text, "\e[1m\e[34m"); end     
def dark_blue(text);    colorize(text, "\e[34m"); end     
def magenta(text);      colorize(text, "\e[1m\e[35m"); end
def dark_magenta(text); colorize(text, "\e[35m"); end
def cyan(text);         colorize(text, "\e[1m\e[36m"); end
def dark_cyan(text);    colorize(text, "\e[36m"); end
def white(text);        colorize(text, "\e[1m\e[37m"); end
def grey(text);         colorize(text, "\e[37m"); end



def banner

banner='
 ___ ____                                                        
|   |    \ ______  ____ _______   ____  _____     __ __    ____  
|   :    //  ___/_/    \\\\_  __ \\ /    \\ \\__  \\   /  :  \\ _/    \\
\'   .   / \\___ \\ \\   o_/ |  | \\/|   :  \\ /  o \\ |  . .  \\\\   o_/ 
 \\_____/ /______) \\_____)|__|   |___:  /(______)|__: :  / \\_____)
       _____                         \\/       .__     \\/      
      /     \\    ____  _____  _______   ____  |  |__  ___.__.   
     /   o   \\  /    \\ \\__  \\ \\_  __ \\_/ ___\\ |  |  \\(   :  |   
    /    .    \\|   .  \\ /  o \\ |  | \\/\\  \\___ |   .  \\\\___  |   
    \\____:__  /|___:__/(______)|__|    \\_____)|___:__//_____|   
            \\/                                                  
'

random_colour = ['red', 'green', 'blue', 'yellow' ][ rand(3) ]
eval "#{random_colour}(banner)"

end


$options = Hash.new

$options[:substitute_country] = nil 
$options[:given_names] = "#{$ROOT_DIR}/names/facebook/firstnames-top10000.txt" #used for recogniser
$options[:family_names] = "#{$ROOT_DIR}/names/facebook/lastnames-top10000.txt" #used for recogniser
$options[:strip_spaces] = true
$options[:strip_apostrophes] = true
$options[:case_insensitive] = true


# optparse
optparse = OptionParser.new do |opts|
  opts.banner = banner
  opts.banner += "Usage: ./username-anarchy [OPTIONS]... [firstname|first last|first middle last]\
\nAuthor: Andrew Horton (urbanadventurer). Version: #{$VERSION}"
  opts.summary_width = 25
  opts.summary_indent = " " 
  opts.separator ""
  opts.separator "Names:"
  
  $options[:input_file] = nil
  opts.on('-i', '--input-file FILE', "Input list of names. Can be SPACE, CSV or TAB delimited.",
          "Defaults to firstname, lastname. Valid column headings are:",
          "firstinitial, firstname, lastinitial, lastname,",
          "middleinitial, middlename.") do |f|
    $options[:input_file] = f
  end

  $options[:no_input_names] = false  
  opts.on('-a', '--auto', "Automatically generate names from a country/list") do
    $options[:no_input_names] = true
  end
  
  opts.on('--country COUNTRY', '-c' , "COUNTRY can be one of the following datasets:",
    "PublicProfiler:",
    "argentina, austria, belgium, canada, china,",
    "denmark, france, germany, hungary, india, ireland,",
    "italy, luxembourg, netherlands, newzealand, norway,",
    "poland, serbia, slovenia, spain, sweden,",
    "switzerland, uk, us",
    "Other:",
    "Facebook - uses the Facebook top 10,000 names") do |dataset|
    dataset.downcase!
	  if dataset == "facebook"
  		dataset_file="#{$ROOT_DIR}/names/facebook/firstnames-top10000.txt"
		  if File.readable?(dataset_file)
			  $options[:given_names] = dataset_file
		  else
			  raise "Cannot read file: #{dataset_file}"
			  exit
		  end
		
		  dataset_file = "#{$ROOT_DIR}/names/facebook/lastnames-top10000.txt"
		  if File.readable?(dataset_file)
			  $options[:family_names] = dataset_file
		  else
			  raise "Cannot read file: #{dataset_file}"
			  exit
		  end
		  
	  else
		dataset_file = "#{$ROOT_DIR}/names/publicprofiler/#{dataset}-forenames.txt"				
		  if File.readable?(dataset_file)
			  $options[:given_names] = dataset_file
		  else
			  raise "Cannot read file: #{dataset_file}"
			  exit
		  end
		  
		dataset_file = "#{$ROOT_DIR}/names/publicprofiler/#{dataset}-surnames.txt"
		  if File.readable?(dataset_file)
			  $options[:family_names] = dataset_file
		  else
			  raise "Cannot read file: #{dataset_file}"
			  exit
		  end
	  end
  end
  
  opts.on('--given-names FILE',"Dictionary of given names") do |arg|
		if File.readable?($options[:given_names])
			$options[:given_names] = arg
		else
			raise "Cannot read file: #{arg}"
		end
	end		

  opts.on('--family-names FILE', "Dictionary of family names") do |arg|
		if File.readable?($options[:family_names])
			$options[:family_names] = arg
		else
			raise "Cannot read file: #{arg}"
		end
	end
	
  $options[:substitute] = false
  opts.on('-s', '--substitute STATE', "Control name substitutions",
    "Valid values are 'on' and 'off'. Default: off",
    "Can substitute any part of a name not available") do |arg|
		if arg == "on"
			$options[:substitute] = true 
		elsif arg == "off"
			$options[:substitute] = false
		else
			raise("invalid argument for --substitute")
		end			
	end
	
	# the max is -1 because -1 is also the end element of an array
  $options[:max_substitutions] = -1 
	opts.on('-m', '--max-sub NUM', Integer, "Limit quantity of substitutions per plugin.",
	"Default: -1 (Unlimited)") do |arg|
    if arg > 0
      $options[:max_substitutions] = arg - 1
		else
			raise("maximum substitutions must be a positive integer")
		end
	end    		

  opts.separator ""
  opts.separator "Username format:"
  
	opts.on('-l', '--list-formats', "List format plugins") do
		$options[:substitute] = true
		list_plugins
		exit	
	end

  $options[:selected_plugins] = nil
	opts.on('-f', '--select-format LIST', Array, "Select format plugins by name. Comma delimited list") do |plugins|
    $options[:selected_plugins] = plugins
    $options[:selected_plugins].each { |x|
    error("Format plugin not found: #{x}.") unless Plugin.registered_plugins.map { |p| p.plugin_name }.include?(x) }
  end	
	
  opts.on('-r', '--recognise USERNAME', "Recognise which format is in use for a username.",
      "This uses the Facebook dataset. Use verbose mode to","show progress.") do |arg|
		$options[:format_to_recognise] = arg
		$options[:no_input_names] = true
		puts "Recognising #{arg}. This can take a while."
		format = recognise_username_format($options[:format_to_recognise])
		if format
			puts "Username format #{$options[:format_to_recognise]} recognised. Plugin name: #{format}"
		else
			puts "Username format #{$options[:format_to_recognise]} unrecognised."
		end
		exit
  end

  # set to nil above
  opts.on('-F', '--format FORMAT', "Define the user format using either format string or","ABK format. See README.md for format details.") do |arg|
		$options[:selected_plugins] = "custom"
		fmt = arg.delete('"')
		code = "
		Plugin.define \"custom\" do
			def generate(n)
				n.format_anna(\"#{fmt}\")
			end
		end"
		eval(code)
  end
  opts.separator ""
  opts.separator "Output:"

  $options[:suffix] = nil
	opts.on('-@', '--suffix BOOL', String, "Suffix. e.g. @example.com",
	"Default: None") do |arg|
		arg.downcase!
	    if arg
	      $options[:suffix] = arg
	    end
	end

  $options[:case_insensitive] = true
	opts.on('-C', '--case-insensitive BOOL', String, "Case insensitive usernames.",
	"Default: True (All lower case)") do |arg|		
		arg.downcase!
	    if arg == 'true'
	      $options[:case_insensitive] = true
	    elsif arg == 'false'
	     	$options[:case_insensitive] = false
		else
			raise("Invalid option. Must be true or ")
		end		
	end
	
  opts.separator ""
  opts.separator "Miscellaneous:"
		
  opts.on('-v', '--verbose', "Display plugin format comments in output and displays","last name searches in plugin format recogniser") do
	  $options[:verbose] = true   
  end  

	opts.on_tail('-h', '--help') do
	  puts opts
	  exit
	end 
	opts.on_tail
end

begin
  optparse.parse!
rescue OptionParser::InvalidOption, OptionParser::MissingArgument  #  
  puts optparse                                                    #
  puts
  puts $!.to_s                                                     # Friendly output when parsing fails
  exit                                                             
end


people = [] # init the array of people

# get people from the cmdline arguments
case ARGV.size
when 1
	people << {:firstname => ARGV[0]}
when 2
	people << {:firstname => ARGV[0], :lastname=>ARGV[1]}
when 3
	people << {:firstname => ARGV[0], :middlename=>ARGV[1], :lastname=>ARGV[2]}
end

# get people from the input file
if $options[:input_file]
	people += load_names_list($options[:input_file])
elsif $options[:no_input_names]	
	# empty as bro
	people  << {}
elsif $options[:no_input_names] == false and people.empty?
	
	puts optparse # print usage
	exit
end

if $options[:selected_plugins].nil?
	$options[:selected_plugins] = Plugin.registered_plugins
else
	$options[:selected_plugins] = Plugin.registered_plugins.map { |x| x if $options    [:selected_plugins].include?(x.plugin_name) }.compact
end


# if auto, set up people
if $options[:no_input_names] and not $options[:format_to_recognise]
	File.open($options[:family_names]).readlines.map do |this_familyname|
		this_familyname = this_familyname.downcase.strip!
		File.open($options[:given_names]).readlines.map { |x| x.downcase.strip! }.map { |this_givenname| people << {:firstname => this_givenname, :lastname => this_familyname}
	}
	end
end

people = people - [{}] unless $options[:no_input_names] # remove the empty hash element in people.

generated_names = Set.new # used to stop repetition. never repeat. Mary Pell and Mary Polonski only make maryp once
people.each do |person|
	thisname = Name.new
	thisname.init(person)

	$options[:selected_plugins].each do |name_generator|
		ret = name_generator.generate(thisname)

		if $options[:verbose]
			# comment note
			comment_note = "# Plugin: #{name_generator.plugin_name}"
			if ret.is_a?(Array)
				comment_note += " substituted #{ret.size}"
			end
			puts comment_note
		end
		
		if ret.is_a?(String)
			ret.downcase! if $options[:case_insensitive] 
			ret += $options[:suffix] if $options[:suffix]
			unless generated_names.include?(ret)
				generated_names << ret 
				puts ret
			end
		elsif ret.is_a?(Array)
#			pp ret
			ret[0..$options[:max_substitutions]].each do |r|
				r.downcase! if $options[:case_insensitive] 
				r += $options[:suffix] if $options[:suffix]
				unless generated_names.include?(r)
					generated_names << r
					puts r
				end
			end

		elsif ret.nil?
			# we're just skipping over this one. maybe substitute is off.
		else
			raise("Unknown type returned from name generator")		
		end
	end
#	pp generated_names
end

