ruby 领域特定语言

domain_specific_lanaguage.rb
#!/usr/bin/env ruby
#
#  Created by Reginald Braithwaite on 2007-03-11.
#  Copyright (c) 2007. All rights reserved.
#
#            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
#                    Version 2, December 2004
# 
# Copyright (C) 2004 Sam Hocevar
#  22 rue de Plaisance, 75014 Paris, France
# Everyone is permitted to copy and distribute verbatim or modified
# copies of this license document, and changing it is allowed as long
# as the name is changed.
# 
#            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
#   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
# 
#  0. You just DO WHAT THE FUCK YOU WANT TO.

# A Domain Specific Language is used to introduce a new scope with an embedded set of methods.
#
# The idea is to avoid polluting the global namespace. Instead of adding methods to Kernel, we
# add methods to a new DomainSpecificLanguage, and then we can evaluate code with the new language
# using the #eval class method or using the #with method added to Kernel.
#
# For a similar approach, see http://www.infoq.com/articles/eval-options-in-ruby

class DomainSpecificLanguage
  
  # See http://whytheluckystiff.net/articles/seeingMetaclassesClearly.html
  
  def metaclass; class << self; self; end; end
  def meta_eval &blk; metaclass.instance_eval &blk; end
  
  # Adds methods to a metaclass
  def meta_def name, &blk
    meta_eval { define_method name, &blk }
  end
  
  # See http://onestepback.org/index.cgi/Tech/Ruby/RubyBindings.rdoc
  
  class ReadOnlyReference
    def initialize(var_name, vars)
      @getter = eval "lambda { #{var_name} }", vars
    end
    def value
      @getter.call
    end
  end
  
  attr_reader :outer_binding, :outer_self
  
  # instances of a DomainSpecificLanguage are created each time
  # a block is evaluated with that language. The instance is
  # intialized with the block's binding.
  
  def initialize(given_binding)
    @outer_binding = given_binding
    @outer_self = ReadOnlyReference.new(:self, given_binding)
  end
  
  # some jiggery-pokery to access methods defined in the block's
  # scope, because when the block is evaluated with the DomainSpecificLanguage,
  # we use #instance_eval to set <tt>self</tt> to the DomainSpecificLanguage
  # instance.
  def method_missing(symbol, *args, &block)
    if args.empty?
      r = ReadOnlyReference.new(symbol, outer_binding)
      meta_def(symbol) { r.value }
      r.value
    else
      outer_self.value.send symbol, *args, &block
    end
  end
  
  class << self
    
    # Evaluates a block in the context of a new DomainSpecificlanguage
    # instance.
    def eval &block
      new(block.binding).instance_eval(&block)
    end
    
  end
  
end

# We open Kernel and add just one method for introducing DomainSpecificLanguages

module Kernel
  
  # Evaluate a block with a DomainSpecificLanguage
  
  def with dsl_class, &block
    dsl_class.eval(&block)
  end
  
end

# Let is a DomainSpecificLanguage that actually creates DomainSpecificLanguages.
#
# Let works a lot like <tt>let</tt> in Scheme. Your provide a hash of names and value
# expressions. The value expressions are evaluated in the outer scope, and then we have
# a little domain specific language where the inner scope has the names all bound to the
# values. E.g.
# <tt>
# with Let do
#     let :x => 100, :y => 50 do
#         print "#{x + y} should equal fifty"
#     end
# end
# </tt>
#
# Within the Let DomainSpecificLanguage, you can declare multiple <tt>let</tt> statements and nest
# them as you please.
#
# One important limitation: you cannot bind a value to a name that is already a local variable.

class Let < DomainSpecificLanguage
  
  class Scope < DomainSpecificLanguage
    
    # initializes a Scope. In addition to the outer binding, we also pass in the
    # hash of names and values. Note the check to ensure we are not trying to
    # override a lcoal variable.
    
    def initialize given_binding, let_clauses = {}
      let_clauses.each do |symbol, value| 
        var_name = symbol.to_s
        raise ArgumentError.new("Cannot override local #{var_name}") if eval("local_variables", given_binding).detect { |local| local == var_name  }
        meta_eval { attr_accessor(var_name) }
        send "#{var_name}=", value
      end
      super(given_binding)
    end
    
  end
  
  # Define a new Scope: you're really defining a new DomainSpecificLanguage
  
  def let let_clauses = {}, &block
    Scope.new(block.binding, let_clauses).instance_eval(&block)
  end
  
  class << self
    
    # If you just want a one-off
    # def eval let_clauses = {}, &block
    #   Scope.new(block.binding, let_clauses).instance_eval(&block)
    # end
    
  end
  
end

# A DomainSpecificDelegator is a DSL that delegates methods to a class or object.
# The main use is to separate the mechanics of scoping from the methods of a utility
# class.

class DomainSpecificDelegator < DomainSpecificLanguage
  
  class << self
    
    # insert one or more #delegate_to calls in the class definition, giving a receiver
    # and a hash. Each hash pair is of the form <tt>verb => method</tt> where verb is the
    # name you will use in the DSL and method is the method in the receiver that will handle
    # it.
    def delegate_to receiver, method_hash
      @@delegations ||= {}
      method_hash.each { |verb, method_name| @@delegations[verb.to_s] = [receiver, method_name.to_s] }
    end
    
  end
  
  def method_missing symbol, *args, &block
    receiver, method_name = *@@delegations[symbol.to_s]
    if receiver
      receiver.send method_name, *args, &block
    else
      super(symbol, *args, &block)
    end
  end
  
end
pattern_match.rb
require 'ruby-debug'
require 'dsl' # get it at http://raganwald.com/source/dsl_and_let.html

module Matchable

  def lit(obj)
    Literal.new(obj)
  end

  def match(args,&block)
    pmatch = PatternMatch.new(args,block.binding)
    pmatch.instance_eval(&block)
    pmatch.value
  end
  
  class PatternMatch < Let::Scope
    
    def initialize(args,binding)
      @args = args
      super(binding)
    end
    
    def with(pattern, &block)
      args = @args
      unless @matched
        mapping = {}
        if(pattern.patmatch(args,mapping))
          @matched = true
          if(mapping.length > 0)
            @value = Kernel.with(Let){let(mapping,&block)}
          else
            @value = block.call
          end
        end
      end
    end
    
    def otherwise(&by_default)
      @by_default = by_default
    end
    
    def value
      if (@matched)
        @value
      else
        if(@by_default)
          @by_default.call
        else
          raise NoMatchFoundError, "The arguments did not match any of the supplied patterns and no otherwise clause was provided"
        end
      end
    end
    
  end

  class NoMatchFoundError < StandardError; end

end

class Object
  def patmatch(arg,mapping)
    self == arg
  end
end

class Class
  def patmatch(arg,mapping)
    arg.is_a?(self)
  end
end

class Symbol
  
  def patmatch(arg,mapping)
    if(empty?)
      true
    else
      mapping[self] = arg unless mapping.nil?
      true
    end
  end
  
  def empty?
    self.to_s=="_"
  end

  def %(other)
    raise ArgumentError unless other.is_a?(Symbol)
    Destructurer.new(self,other)
  end

  def &(other)
    Namer.new(self,other)
  end

end

class Namer
  
  def initialize(sym, obj)
    @sym = sym
    @obj = obj
  end
  
  def patmatch(args,mapping)
    if(@obj.patmatch(args,mapping))
      mapping[@sym] = args
      true
    else
      false
    end
  end
  
end

class Destructurer
  
  def initialize(*names)
    @names = names
  end

  def patmatch(args,mapping)
    return false unless args.is_a?(Array)
    return false unless args.length>0
    @names[0...-1].each do |name|
      mapping[name]=args.shift
    end
    mapping[@names.last] = args
    true
  end

  def %(symbol)
    raise ArgumentError unless symbol.is_a?(Symbol)
    @names << symbol
    self
  end
  
end

module Enumerable

  def patmatch(args,mapping)
    return false if self.length != args.length || !args.is_a?(Enumerable)
    return self.zip(args).all? {|x_y| x_y[0].patmatch(x_y[1],mapping)}
  end

end

class String
  
  # Have to special case this, because String is enumerable but the 
  # zip method on string doesn't behave as expected
  def patmatch(args,mapping)
    return self==args
  end
  
end

class Literal
  
  def initialize(obj)
    @obj = obj
  end

  def patmatch(args,mapping)
    @obj == args
  end
  
end


ruby 需要目录中的文件

.rb
class Dir
  def self.require_all(directory)
    self.entries(directory).each do |file|
      if file =~ /\.rb/
        require directory + file
      end
    end
  end
end

ruby 空白的石板

在Ruby中创建一个没有任何方法的对象(常规对象从Object类继承方法),对于使用缺少方法的代理很有用(http://onestepback.org/index.cgi/Tech/Ruby/BlankSlate.rdoc) 。 <br/> <br/>

blank_slate.rb
Class BlankSlate
    instance_methods.each { |m| undef_method m unless m =~ /^__/ }
end
 
# All methods will be passed to method_missing
class Proxy < BlankSlate
  def initialize(obj)
    @obj = obj
  end
 
  def method_missing(sym, *args, &block)
      puts "Sending #{sym}(#{args.join(',')}) to obj"
      @obj.__send__(sym, *args, &block)
    end
end

ruby 之前和之后

<br/> /发布于:Ruby <br/>允许您在类中创建前后方法<br/> <br/>示例<br/>需要“before_and_after”<br/> <br/> class消息<br/>包括BeforeAndAfter <br/> <br/> def初始化消息<br/> @message = message <br/> end <br/> <br/> def display <br/> puts @message <br/> end <br/> <br/> def before_display <br/> put“BEFORE DISPLAY”<br/> end <br/> <br/> def after_display <br/> put“AFTER DISPLAY”<br/>结束<br/> <br/> use_method:显示<br/>结束<br/> <br/> Message.new(“== MESSAGE ==”)。display <br/> <br/> <br/>显示之前<br/> == MESSAGE == <br/>显示之后

before_and_after.rb
module BeforeAndAfter
  # This extends the class that includes BeforeAndAfter with the methods in ClassMethods
  def self.included(base)
    base.extend(ClassMethods)
  end
 
  module ClassMethods
    def use_method *methods
      methods.each { |method|
        # Set up the before and after variables
        before_method = "before_#{method.to_s}".to_sym
        after_method = "after_#{method.to_s}".to_sym
        # Unbind the original, before, and after methods
        unbinded_before_method = instance_method( before_method )
        unbinded_method = instance_method( method )
        unbinded_after_method = instance_method( after_method )
        # Define the before and after methods if they don't already exist
        define_method( before_method ) unless self.method_defined?( before_method )
        define_method( after_method ) unless self.method_defined?( after_method )
        # Redefines the method to run the before and after methods
        define_method( method ) {
          unbinded_before_method.bind( self ).call # Bind the unbinded BEFORE method
          unbinded_method.bind( self ).call        # Bind the original method
          unbinded_after_method.bind( self ).call  # Bind the unbinded AFTER method
        }
      }
    end
  end
end

ruby 递归符号化键

.rb
def recursive_symbolize_keys! hash
hash.symbolize_keys!
 hash.values.select{|v| v.is_a? Hash}.each{|h| recursive_symbolize_keys!(h)}
end

ruby 朋友发推文

.rb
#!/usr/bin/env ruby

require "rubygems"
require "twitter"
require "active_support"
require "google_chart"


twitter_user = ""
twitter_pass = ""

httpauth = Twitter::HTTPAuth.new(twitter_user, twitter_pass)
client = Twitter::Base.new(httpauth)

totals = {}

client.friends_timeline(:count => 200).each do |t|
  user = t.user.screen_name
  if !totals.has_key? user
    totals[user] = 1
  else 
    totals[user] +=1
  end
end

pc = GoogleChart::PieChart.new('500x200', "Total Proportion of Recent Tweets", false)

totals.each do |key,val|
  pc.data key, val
end

puts pc.to_url

ruby AWS文件上载

aws_file_upload.rb
#!/usr/bin/env ruby
 
require 'rubygems'
require 'aws/s3'
 
local_file = ARGV[0]
bucket = ARGV[1]
mime_type = ARGV[2] || "application/octet-stream"
 
AWS::S3::Base.establish_connection!(
 :access_key_id => 'REPLACE_ME',
 :secret_access_key => 'REPLACE_ME'
)
 
base_name = File.basename(local_file)
 
puts "Uploading #{local_file} as '#{base_name}' to '#{bucket}'"
 
AWS::S3::S3Object.store(
 base_name,
 File.open(local_file),
 bucket,
 :content_type => mime_type 
)
 
puts "Uploaded!"

ruby 在Chunks中读取XML

xml_chunk.rb
#
# Chunk
# 
# Reads a large file in as chunks for easier parsing.
# 
# The chunks returned are whole <@@options['element']/>s found within file.
# 
# Each call to read() returns the whole element including start and end tags.
# 
# Tested with a 1.8MB file, extracted 500 elements in 0.09s
# (with no work done, just extracting the elements)
# 
# Usage:
# <code>
#   # initialize the object
#   file = Chunk.new('chunk-test.xml', { 'element' => 'Chunk' })
#   
#   # loop through the file until all lines are read
#   while xml = file->read()
#     # do whatever you want with the string
#     puts xml
#   end
# </code>
# 
class Chunk
  # options
  #
  # @var hash Contains all major options
  # 
  @@options = {
    'path' => './',
    'element' => '',
    'chunkSize' => 512
  }
  
  # file
  #
  # @var string The filename being read
  # 
  @@file = ''
  
  # pointer
  #
  # @var integer The current position the file is being read from
  # 
  @@pointer = 0
  
  # handle
  #
  # @var resource The File.open() resource
  # 
  @@handle = nil
  
  # reading
  #
  # @var boolean Whether the script is currently reading the file
  # 
  @@reading = false
  
  # readBuffer
  # 
  # @var string Used to make sure start tags aren't missed
  # 
  @@readBuffer = ''
  
  # initialize
  # 
  # Builds the Chunk object
  #
  # @param string $file The filename to work with
  # @param hash $options The options with which to parse the file
  # 
  def initialize(file, options = {})
    # merge the options together
    @@options.merge!(options.kind_of?(Hash) ? options : {})
    
    # check that the path ends with a /
    if @@options['path'][-1, 1] != '/'
      @@options['path'] += '/'
    end
    
    # normalize the filename
    file = File.basename(file)
    
    # make sure chunkSize is an int
    @@options['chunkSize'] = @@options['chunkSize'].to_i()
    
    # check it's valid
    unless @@options['chunkSize'] >= 64
      @@options['chunkSize'] = 512
    end
    
    # set the filename
    @@file = File.expand_path(@@options['path'] + file)
    
    # check the file exists
    unless File.exists?(@@file)
      raise Exception.new('Cannot load file: ' + @@file)
    end
    
    # open the file
    @@handle = File.new(@@file, 'r')
    
    # check the file opened successfully
    unless @@handle
      raise Exception.new('Error opening file for reading')
    end
    
    # add a __destruct style method
    ObjectSpace.define_finalizer(self, self.class.method(:finalize).to_proc)
  end
  
  # finalize
  # 
  # Cleans up
  #
  # @return void
  # 
  def Chunk.finalize(id)
    @@handle.close()
  end
  
  # read
  # 
  # Reads the first available occurence of the XML element @@options['element']
  #
  # @return string The XML string from @@file
  # 
  def read()
    # check we have an element specified
    if !@@options['element'].nil? and @@options['element'].strip().length() > 0
      # trim it
      element = @@options['element'].strip()
      
    else
      element = nil
    end
    
    # initialize the buffer
    buffer = ''
    
    # if the element is empty
    if element.nil?
      # let the script know we're reading
      @@reading = true
      
      # read in the whole doc, cos we don't know what's wanted
      while @@reading
        buffer += @@handle.read(@@options['chunkSize'])
        
        @@reading = !@@handle.eof()
      end
      
      # return it all
      return buffer
      
    # we must be looking for a specific element
    else
      # set up the strings to find
      open = '<' + element + '>'
      close = '</' + element + '>'
      
      # let the script know we're reading
      @@reading = true
      
      # reset the global buffer
      @@readBuffer = ''
      
      # this is used to ensure all data is read, and to make sure we don't send the start data again by mistake
      store = false
      
      # seek to the position we need in the file
      @@handle.seek(@@pointer)
      
      # start reading
      while @@reading and !@@handle.eof()
        # store the chunk in a temporary variable
        tmp = @@handle.read(@@options['chunkSize'])
        
        # update the global buffer
        @@readBuffer += tmp
        
        # check for the open string
        checkOpen = tmp.index(open)
        
        # if it wasn't in the new buffer
        if checkOpen.nil? and !store
          # check the full buffer (in case it was only half in this buffer)
          checkOpen = @@readBuffer.index(open)
          
          # if it was in there
          unless checkOpen.nil?
            # set it to the remainder
            checkOpen = checkOpen % @@options['chunkSize']
          end
        end
        
        # check for the close string
        checkClose = tmp.index(close)
        
        # if it wasn't in the new buffer
        if checkClose.nil? and store
          # check the full buffer (in case it was only half in this buffer)
          checkClose = @@readBuffer.index(close)
          
          # if it was in there
          unless checkClose.nil?
            # set it to the remainder plus the length of the close string itself
            checkClose = (checkClose + close.length()) % @@options['chunkSize']
          end
          
        # if it was
        elsif !checkClose.nil?
          # add the length of the close string itself
          checkClose += close.length()
        end
        
        # if we've found the opening string and we're not already reading another element
        if !checkOpen.nil? and !store
          # if we're found the end element too
          if !checkClose.nil?
            # append the string only between the start and end element
            buffer += tmp[checkOpen, (checkClose - checkOpen)]
            
            # update the pointer
            @@pointer += checkClose
            
            # let the script know we're done
            @@reading = false
            
          else
            # append the data we know to be part of this element
            buffer += tmp[checkOpen..-1]
            
            # update the pointer
            @@pointer += @@options['chunkSize']
            
            # let the script know we're gonna be storing all the data until we find the close element
            store = true
          end
          
        # if we've found the closing element
        elsif !checkClose.nil?
          # update the buffer with the data upto and including the close tag
          buffer += tmp[0, checkClose]
          
          # update the pointer
          @@pointer += checkClose
          
          # let the script know we're done
          @@reading = false
          
        # if we've found the closing element, but half in the previous chunk
        elsif store
          # update the buffer
          buffer += tmp
          
          # and the pointer
          @@pointer += @@options['chunkSize']
        end
      end
    end
    
    # return the element (or the whole file if we're not looking for elements)
    return (buffer == '') ? false : buffer
  end
end

ruby 读写CSV

read_write_csv.rb
#require 'csv'
require 'rubygems'
require 'fastercsv'

FCSV { |out| 
    out << [:Number,:One,:Two,:Three,:Four]
    out << [1,"first","second","Third one quoted with a, comma","fourth \"double quotes\"\n line break"]
    out << [2,"erst","zweite","Dritte,mit Komma","viertl"]
    out << [3,"primero","segundo","tercero","cuarto,con la coma"]
}

FasterCSV.foreach("sample.csv", {:headers=>true}) { |r|
   
   puts "#{r.length} fields: >>#{r.inspect}<<"
   r.each { |header, value|
       puts "\t#{header}=#{value}"
   }
}

ruby 法国Stemmer

.rb
# -*- encoding: utf-8 -*-
#
# Implementation of the stemming algorithm at http://snowball.tartarus.org/algorithms/french/stemmer.html
# Based on the javascript port made by Kasun Gajasinghe http://snowball.tartarus.org/otherlangs/french_javascript.txt
# 
# Testing:
#   It uses the file voc.txt (http://snowball.tartarus.org/algorithms/french/voc.txt)
#   and compares results with output.txt (http://snowball.tartarus.org/algorithms/french/output.txt)
#   
# At the time being, it fails for 242 words on 20403, feel free to edit this gist.

def stem(word)
    #    Letters in French include the following accented forms,
    #        â   à   ç   ë   é   ê   è   ï   î   ô   û   ù
    #    The following letters are vowels:
    #        a   e   i   o   u   y   â   à   ë   é   ê   è   ï   î   ô   û   ù
    
    original_word = word

    # Downcase it
    word = word.downcase
    tmp = -1

    # Uppercase some part to exclude them later on
    word.gsub!(/qu/, 'qU')
    word.gsub!(/([aeiouyâàëéêèïîôûù])u([aeiouyâàëéêèïîôûù])/, '\1U\2')
    word.gsub!(/([aeiouyâàëéêèïîôûù])i([aeiouyâàëéêèïîôûù])/, '\1I\2')
    word.gsub!(/([aeiouyâàëéêèïîôûù])y/, '\1Y')
    word.gsub!(/y([aeiouyâàëéêèïîôûù])/, 'Y\1')

    # Determine RV
    rv = '';
    rv_index = -1;

    if word =~ /^(par|col|tap)/ || word =~ /^[aeiouyâàëéêèïîôûù]{2}/
        rv = word[3..word.length]
        rv_index = 3
    else
        rv_index = (word[1..word.length]) =~ /[aeiouyâàëéêèïîôûù]/
        if rv_index
            rv_index += 2
            rv = word[rv_index..word.length]
        else
            rv_index = word.length
        end
    end

    # R1 is the region after the first non-vowel following a vowel, or the end of the word if there is no such non-vowel.
    # R2 is the region after the first non-vowel following a vowel in R1, or the end of the word if there is no such non-vowel
    r1_index = word =~ /[aeiouyâàëéêèïîôûù][^aeiouyâàëéêèïîôûù]/
    r1 = ''
    if r1_index
        r1_index += 2
        r1 = word[r1_index..word.length]
    else
        r1_index = word.length
    end

    r2_index = -1
    r2 = ''
    if r1_index
        r2_index = r1 =~ /[aeiouyâàëéêèïîôûù][^aeiouyâàëéêèïîôûù]/
        if r2_index
            r2_index += 2
            r2 = r1[r2_index..r1.length]
            r2_index += r1_index
        else
            r2 = ''
            r2_index = word.length
        end
    end
    if r1_index && r1_index < 3
        r1_index = 3
        r1 = word[r1_index..word.length]
    end

    # Step 1: Standard suffix removal
    
    a1_index = word =~ /(ance|iqUe|isme|able|iste|eux|ances|iqUes|ismes|ables|istes)$/
    a2_index = word =~ /(atrice|ateur|ation|atrices|ateurs|ations)$/
    a3_index = word =~ /(logie|logies)$/
    a4_index = word =~ /(usion|ution|usions|utions)$/
    a5_index = word =~ /(ence|ences)$/
    a6_index = word =~ /(ement|ements)$/
    a7_index = word =~ /(ité|ités)$/
    a8_index = word =~ /(if|ive|ifs|ives)$/
    a9_index = word =~ /(eaux)$/
    a10_index = word =~ /(aux)$/
    a11_index = word =~ /(euse|euses)$/
    a12_index = word =~ /[^aeiouyâàëéêèïîôûù](issement|issements)$/
    a13_index = word =~ /(amment)$/
    a14_index = word =~ /(emment)$/
    a15_index = word =~ /[aeiouyâàëéêèïîôûù](ment|ments)$/

    if a1_index && a1_index >= r2_index
        word = word[0..a1_index - 1]
    elsif a2_index && a2_index >= r2_index
        word = word[0..a2_index - 1]
        a2_index2 = word =~ /(ic)$/
        if a2_index2 && a2_index2 >= r2_index
            word = word[0..a2_index2 - 1]
        else
            word.gsub!(/(ic)$/, 'iqU')
        end
    elsif a3_index && a3_index >= r2_index
        word.gsub!(/(logie|logies)$/, 'log')
    elsif a4_index && a4_index >= r2_index
        word.gsub!(/(usion|ution|usions|utions)$/, 'u')
    elsif a5_index && a5_index >= r2_index
        word.gsub!(/(ence|ences)$/, 'ent')
    elsif a6_index && a6_index >= rv_index
        word = word[0..a6_index - 1]
        tmp = word =~ /(iv)$/
        if !tmp.nil? && tmp >= r2_index
            word.gsub!(/(iv)$/, '')
            tmp = word =~ /(at)$/
            if !tmp.nil? && tmp >= r2_index
                word.gsub!(/(at)$/, '')
            end
        elsif word =~ /(eus)$/
            a6_index2 = word =~ /(eus)$/
            if a6_index2 >= r2_index
                word = word[0..a6_index2 - 1]
            elsif a6_index2 >= r1_index
                word = word[0..a6_index2 - 1] + 'eux';
            end
        elsif !(tmp = (word =~ /(abl|iqU)$/)).nil? && tmp >= r2_index
            word.gsub!(/(abl|iqU)$/, '')
        elsif !(tmp = (word =~ /(ièr|Ièr)$/)).nil? && tmp >= rv_index
            word.gsub!(/(ièr|Ièr)$/, 'i')
        end
    elsif a7_index && a7_index >= r2_index
        word = word[0..a7_index - 1]
        if word =~ /(abil)$/
            a7_index2 = word =~ /(abil)$/
            if a7_index2 >= r2_index
                word = word[0..a7_index2 - 1]
            else
                word = word[0..a7_index2 - 1] + 'abl'
            end
        elsif word =~ /(ic)$/
            a7_index3 = word =~ /(ic)$/
            if a7_index3 && a7_index3 >= r2_index
                word = word[0..a7_index3 - 1]
            else
                word.gsub!(/(ic)$/, 'iqU')
            end
        elsif !(tmp = (word =~ /(iv)$/)).nil? && tmp != r2_index
            word.gsub!(/(iv)$/, '')
        end
    elsif a8_index && a8_index >= r2_index
        word = word[0..a8_index - 1]
        tmp = word =~ /(at)$/
        if !tmp.nil? && tmp >= r2_index
            word.gsub!(/(at)$/, '')
            tmp = word =~ /(ic)$/
            if !tmp.nil? && tmp >= r2_index
                word.gsub!(/(ic)$/, '')
            else
                word.gsub!(/(ic)$/, 'iqU')
            end
        end
    elsif a9_index
        word.gsub!(/(eaux)/, 'eau')
    elsif a10_index && a10_index >= r1_index
        word.gsub!(/(aux)/, 'al')
    elsif a11_index
        a11_index2 = word =~ /(euse|euses)$/
        if a11_index2 >= r2_index
            word = word[0..a11_index2 - 1]
        elsif a11_index2 >= r1_index
            word = word[0..a11_index2 - 1] + 'eux'
        end
    elsif a12_index && a12_index >= r1_index
        word = word[0..a12_index]
    elsif a13_index && a13_index >= rv_index
        word.gsub!(/(amment)$/, 'ant')
    elsif a14_index && a14_index >= rv_index
        word.gsub!(/(emment)$/, 'ent')
    elsif a15_index && a15_index >= rv_index
        word = word[0..a15_index]
    end

    # Step 2a: Verb suffixes beginning i
    
    word_step1 = word.clone
    step_2a_done = false
    if original_word == word.downcase || original_word =~ /(amment|emment|ment|ments)$/
        step_2a_done = true
        b1_regex = /([^aeiouyâàëéêèïîôûù])(îmes|ît|îtes|i|ie|ies|ir|ira|irai|iraIent|irais|irait|iras|irent|irez|iriez|irions|irons|iront|is|issaIent|issais|issait|issant|issante|issantes|issants|isse|issent|isses|issez|issiez|issions|issons|it)$/i
        tmp = word =~ b1_regex
        if !tmp.nil? && tmp >= rv_index
            word.gsub!(b1_regex, '\1')
        end
    end

    # Step 2b: Other verb suffixes
    if step_2a_done && word_step1 == word
        b2_regex = /(é|ée|ées|és|èrent|er|era|erai|eraIent|erais|erait|eras|erez|eriez|erions|erons|eront|ez|iez)$/i
        tmp = word =~ b2_regex
        if tmp && tmp >= rv_index
            word.gsub!(b2_regex, '')
        else
            tmp = word =~ /(ions)$/
            if tmp && tmp >= r2_index
                word.gsub!(/(ions)$/, '')
            else
                b3_regex = /e(âmes|ât|âtes|a|ai|aIent|ais|ait|ant|ante|antes|ants|as|asse|assent|asses|assiez|assions)$/i
                tmp = word =~ b3_regex
                if tmp && tmp >= rv_index
                    word.gsub!(b3_regex, '')
                else
                    b3_regex2 = /(âmes|ât|âtes|a|ai|aIent|ais|ait|ant|ante|antes|ants|as|asse|assent|asses|assiez|assions)$/i
                    tmp = word =~ b3_regex2
                    if tmp && tmp >= rv_index
                        word.gsub!(b3_regex2, '')
                    end
                end
            end
        end
    end

    if original_word != word.downcase
        # Step 3
        rep = ''
        if word =~ /Y$/
            word.gsub!(/Y$/, 'i')
        elsif word =~ /ç$/
            word.gsub!(/ç$/, 'c')
        end
    else
        # Step 4
        # If the word ends s, not preceded by a, i, o, u, è or s, delete it
        tmp = word =~ /([^aiouès])s$/
        if tmp && tmp >= rv_index
            word.gsub!(/([^aiouès])s$/, '\1')
        end
        e1_index = word =~ /ion$/
        tmp = word =~ /[st]ion$/
        if e1_index && e1_index >= r2_index && tmp && tmp >= rv_index
            word = word[0..e1_index - 1]
        else
            e2_index = word =~ /(ier|ière|Ier|Ière)$/
            if e2_index && e2_index >= rv_index
                word = word[0..e2_index - 1] + 'i'
            else
                tmp = word =~ /e$/
                if tmp && tmp >= rv_index
                    word.gsub!(/e$/, '')
                elsif !(tmp = (word =~ /guë$/)).nil? && tmp >= rv_index
                    word.gsub!(/guë$/, 'gu')
                end
            end
        end
    end

    # Step 5: Undouble
    word.gsub!(/(en|on)(n)$/, '\1')
    word.gsub!(/(ett)$/, 'et')
    word.gsub!(/(el|eil)(l)$/, '\1')

    # Step 6: Un-accent
    word.gsub!(/[éè]([^aeiouyâàëéêèïîôûù]+)$/, 'e\1')
    word.downcase.strip

end

# TESTS
# Opens voc.txt and compare the stem result with output.txt

voc = File.open('voc.txt', 'r:UTF-8')
expected = File.open('output.txt', 'r:UTF-8')
expected_lines = expected.lines.to_a

errors = 0

voc.lines.each_with_index do |l, i|
    stemmed = stem(l)
    expected = expected_lines[i].strip
    if stemmed != expected
        puts "Error: #{l} expected: #{expected} actual: #{stemmed}"
        errors += 1
    end
end

puts "#{errors} error(s) found, tested #{expected_lines.length} words/stems"