(*
* bibtex2html - A BibTeX to HTML translator
* Copyright (C) 1997-2000 Jean-Christophe Filliâtre and Claude Marché
*
* This software is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See the GNU General Public License version 2 for more details
* (enclosed in the file GPL).
*)
(*i $Id: latex_accents.mll,v 1.9 2006/02/22 10:05:44 filliatr Exp $ i*)
(* Normalize both ISO-latin characters and LaTeX accents to HTML entities *)
{
let string_buf = Buffer.create 79
let add_string s = Buffer.add_string string_buf s
let add lexbuf = Buffer.add_string string_buf (Lexing.lexeme lexbuf)
let produce_regexp = ref false
}
let space = [ '\t']
rule next_char = parse
'\\' { control lexbuf }
| '{' { next_char lexbuf }
| '}' { next_char lexbuf }
| 'ç' { add_string "ç" ; next_char lexbuf }
| 'ñ' { add_string "ñ"; next_char lexbuf }
| 'ä' { add_string "ä"; next_char lexbuf }
| 'ö' { add_string "ö"; next_char lexbuf }
| 'ü' { add_string "ü"; next_char lexbuf }
| 'ë' { add_string "ë"; next_char lexbuf }
| 'Ä' { add_string "Ä"; next_char lexbuf }
| 'Ö' { add_string "Ö"; next_char lexbuf }
| 'Ü' { add_string "Ü"; next_char lexbuf }
| 'Ë' { add_string "Ë"; next_char lexbuf }
| 'ï' { add_string "ï"; next_char lexbuf }
| 'Ï' { add_string "Ï"; next_char lexbuf }
| 'á' { add_string "á"; next_char lexbuf }
| 'ó' { add_string "ó"; next_char lexbuf }
| 'ú' { add_string "ú"; next_char lexbuf }
| 'é' { add_string "é"; next_char lexbuf }
| 'Á' { add_string "Á"; next_char lexbuf }
| 'Ó' { add_string "Ó"; next_char lexbuf }
| 'Ú' { add_string "Ú"; next_char lexbuf }
| 'É' { add_string "É"; next_char lexbuf }
| 'í' { add_string "í"; next_char lexbuf }
| 'Í' { add_string "Í"; next_char lexbuf }
| 'à' { add_string "à"; next_char lexbuf }
| 'ò' { add_string "ò"; next_char lexbuf }
| 'ù' { add_string "ù"; next_char lexbuf }
| 'è' { add_string "è"; next_char lexbuf }
| 'À' { add_string "À"; next_char lexbuf }
| 'Ò' { add_string "Ò"; next_char lexbuf }
| 'Ù' { add_string "Ù"; next_char lexbuf }
| 'È' { add_string "È"; next_char lexbuf }
| 'ì' { add_string "ì"; next_char lexbuf }
| 'Ì' { add_string "Ì"; next_char lexbuf }
| 'â' { add_string "â"; next_char lexbuf }
| 'ô' { add_string "ô"; next_char lexbuf }
| 'û' { add_string "û"; next_char lexbuf }
| 'ê' { add_string "ê"; next_char lexbuf }
| 'î' { add_string "î"; next_char lexbuf }
| 'Â' { add_string "Â"; next_char lexbuf }
| 'Ô' { add_string "Ô"; next_char lexbuf }
| 'Û' { add_string "Û"; next_char lexbuf }
| 'Ê' { add_string "Ê"; next_char lexbuf }
| 'Î' { add_string "Î"; next_char lexbuf }
| _ { add lexbuf ; next_char lexbuf }
| eof { () }
(* called when we have seen "\\" *)
and control = parse
'"' { quote_char lexbuf }
| '\'' { right_accent lexbuf }
| '`' { left_accent lexbuf }
| '^' { hat lexbuf }
| "c{c}" { add_string "ç" ; next_char lexbuf }
| 'v' { czech lexbuf }
| ("~n"|"~{n}") { add_string "ñ"; next_char lexbuf }
| _ { add_string "\\" ; add lexbuf ; next_char lexbuf }
| eof { add_string "\\" }
(* called when we have seen "\\\"" *)
and quote_char = parse
('a'|"{a}") { add_string "ä" ; next_char lexbuf }
| ('o'|"{o}") { add_string "ö" ; next_char lexbuf }
| ('u'|"{u}") { add_string "ü" ; next_char lexbuf }
| ('e'|"{e}") { add_string "ë" ; next_char lexbuf }
| ('A'|"{A}") { add_string "Ä" ; next_char lexbuf }
| ('O'|"{O}") { add_string "Ö" ; next_char lexbuf }
| ('U'|"{U}") { add_string "Ü" ; next_char lexbuf }
| ('E'|"{E}") { add_string "Ë" ; next_char lexbuf }
| ("\\i" space+|"{\\i}") { add_string "ï" ; next_char lexbuf }
| ('I'|"\\I" space+|"{\\I}") { add_string "Ï" ; next_char lexbuf }
| _ { add_string "\\\"" ; add lexbuf }
| eof { add_string "\\\"" }
(* called when we have seen "\\'" *)
and right_accent = parse
| ('a'|"{a}") { add_string "á" ; next_char lexbuf }
| ('o'|"{o}") { add_string "ó" ; next_char lexbuf }
| ('u'|"{u}") { add_string "ú" ; next_char lexbuf }
| ('e'|"{e}") { add_string "é" ; next_char lexbuf }
| ('A'|"{A}") { add_string "Á" ; next_char lexbuf }
| ('O'|"{O}") { add_string "Ó" ; next_char lexbuf }
| ('U'|"{U}") { add_string "Ú" ; next_char lexbuf }
| ('E'|"{E}") { add_string "É" ; next_char lexbuf }
| ('\'') { add_string "”" ; next_char lexbuf }
| ('i'|"\\i" space+|"{\\i}") { add_string "í" ; next_char lexbuf }
| ('I'|"\\I" space+|"{\\I}") { add_string "Í" ; next_char lexbuf }
| _ { add_string "\\'" ; add lexbuf ; next_char lexbuf }
| eof { add_string "\\'" }
(* called when we have seen "\\`" *)
and left_accent = parse
('a'|"{a}") { add_string "à" ; next_char lexbuf }
| ('o'|"{o}") { add_string "ò" ; next_char lexbuf }
| ('u'|"{u}") { add_string "ù" ; next_char lexbuf }
| ('e'|"{e}") { add_string "è" ; next_char lexbuf }
| ('A'|"{A}") { add_string "À" ; next_char lexbuf }
| ('O'|"{O}") { add_string "Ò" ; next_char lexbuf }
| ('U'|"{U}") { add_string "Ù" ; next_char lexbuf }
| ('E'|"{E}") { add_string "È" ; next_char lexbuf }
| ('`') { add_string "“" ; next_char lexbuf }
| ('i'|"\\i" space+ |"{\\i}") { add_string "ì" ; next_char lexbuf }
| ('I'|"\\I" space+ |"{\\I}") { add_string "Ì" ; next_char lexbuf }
| _ { add_string "\\`" ; add lexbuf ; next_char lexbuf }
| eof { add_string "\\`" }
and hat = parse
('a'|"{a}") { add_string "â" ; next_char lexbuf }
| ('o'|"{o}") { add_string "ô" ; next_char lexbuf }
| ('u'|"{u}") { add_string "û" ; next_char lexbuf }
| ('e'|"{e}") { add_string "ê" ; next_char lexbuf }
| ('A'|"{A}") { add_string "Â" ; next_char lexbuf }
| ('O'|"{O}") { add_string "Ô" ; next_char lexbuf }
| ('U'|"{U}") { add_string "Û" ; next_char lexbuf }
| ('E'|"{E}") { add_string "Ê" ; next_char lexbuf }
| ('i'|"\\i" space+ |"{\\i}") { add_string "î" ; next_char lexbuf }
| ('I'|"\\I" space+ |"{\\I}") { add_string "Î" ; next_char lexbuf }
| _ { add_string "\\^" ; add lexbuf ; next_char lexbuf }
| eof { add_string "\\^" }
and czech = parse
('r'|"{r}") { add_string "ř" ; next_char lexbuf }
| ('R'|"{R}") { add_string "Ř" ; next_char lexbuf }
| ('s'|"{s}") { add_string "š" ; next_char lexbuf }
| ('S'|"{S}") { add_string "Š" ; next_char lexbuf }
| ('i'|"\\i" space+ |"{\\i}") { add_string "ĭ" ; next_char lexbuf }
| ('I'|"\\I" space+ |"{\\I}") { add_string "Ĭ" ; next_char lexbuf }
| _ { add_string "\\^" ; add lexbuf ; next_char lexbuf }
| eof { add_string "\\^" }
{
let normalize to_regexp s =
Buffer.clear string_buf;
produce_regexp := to_regexp;
next_char (Lexing.from_string s);
Buffer.contents string_buf
;;
}