Google

"DTD/xhtml1-strict.dtd">
Module Amrita::Sanitizer
In: lib/amrita/format.rb

This module provide methods for avoid XSS vulnerability taken from IPA home page(Japanese) www.ipa.go.jp/security/awareness/vendor/programming/a01_02.html

Methods
Public Instance methods
sanitize_text(text)

escape &<>

# File lib/amrita/format.rb, line 21
    def sanitize_text(text)
      s = text.dup
      s.gsub!(AMP_WITHOUT_REFRENCE, '&amp;')
      s.gsub!("<", '&lt;')
      s.gsub!(">", '&gt;')
      s
    end
sanitize_attribute_value(text)

escape &<>"'

# File lib/amrita/format.rb, line 30
    def sanitize_attribute_value(text)
      s = text.dup
      s.gsub!(AMP_WITHOUT_REFRENCE, '&amp;')
      s.gsub!("<", '&lt;')
      s.gsub!(">", '&gt;')
      s.gsub!('"', '&quot;')
      s.gsub!("'", '&#39;')
      s
    end
sanitize_url(text, allowd_scheme = DefaultAllowedScheme)

sanitize_url accepts only these characters

    --- http://www.ietf.org/rfc/rfc2396.txt ---
    uric = reserved | unreserved | escaped
    reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
    unreserved = alphanum | mark
    mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
    escaped = "%" hex hex

sanitize_url accepts only schems specified by allowd_scheme

The default is http: https: ftp: mailt:

# File lib/amrita/format.rb, line 62
    def  sanitize_url(text, allowd_scheme = DefaultAllowedScheme)
      # return nil if text has characters not allowd for URL

      return nil if text =~ UrlInvalidChar

      # return '' if text has an unknown scheme
      # --- http://www.ietf.org/rfc/rfc2396.txt ---
      # scheme = alpha *( alpha | digit | "+" | "-" | "." )

      if text =~ |^([A-Za-z][A-Za-z0-9+\-.]*):| 
        return nil unless allowd_scheme[$1]
      end
  
      # escape HTML
      # special = "&" | "<" | ">" | '"' | "'"
      # But I checked  "<" | ">" | '"' before.
      s = text.dup
      #s.gsub!("&", '&amp;')
      s.gsub!("'", '&#39;')

      s
    end