(* * bibtex2html - A BibTeX to HTML translator * Copyright (C) 1997-2000 Jean-Christophe Filliâtre and Claude Marché * * This software is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU General Public License version 2 for more details * (enclosed in the file GPL). *) (*i $Id: latex_accents.mll,v 1.9 2006/02/22 10:05:44 filliatr Exp $ i*) (* Normalize both ISO-latin characters and LaTeX accents to HTML entities *) { let string_buf = Buffer.create 79 let add_string s = Buffer.add_string string_buf s let add lexbuf = Buffer.add_string string_buf (Lexing.lexeme lexbuf) let produce_regexp = ref false } let space = [ '\t'] rule next_char = parse '\\' { control lexbuf } | '{' { next_char lexbuf } | '}' { next_char lexbuf } | 'ç' { add_string "ç" ; next_char lexbuf } | 'ñ' { add_string "ñ"; next_char lexbuf } | 'ä' { add_string "ä"; next_char lexbuf } | 'ö' { add_string "ö"; next_char lexbuf } | 'ü' { add_string "ü"; next_char lexbuf } | 'ë' { add_string "ë"; next_char lexbuf } | 'Ä' { add_string "Ä"; next_char lexbuf } | 'Ö' { add_string "Ö"; next_char lexbuf } | 'Ü' { add_string "Ü"; next_char lexbuf } | 'Ë' { add_string "Ë"; next_char lexbuf } | 'ï' { add_string "ï"; next_char lexbuf } | 'Ï' { add_string "Ï"; next_char lexbuf } | 'á' { add_string "á"; next_char lexbuf } | 'ó' { add_string "ó"; next_char lexbuf } | 'ú' { add_string "ú"; next_char lexbuf } | 'é' { add_string "é"; next_char lexbuf } | 'Á' { add_string "Á"; next_char lexbuf } | 'Ó' { add_string "Ó"; next_char lexbuf } | 'Ú' { add_string "Ú"; next_char lexbuf } | 'É' { add_string "É"; next_char lexbuf } | 'í' { add_string "í"; next_char lexbuf } | 'Í' { add_string "Í"; next_char lexbuf } | 'à' { add_string "à"; next_char lexbuf } | 'ò' { add_string "ò"; next_char lexbuf } | 'ù' { add_string "ù"; next_char lexbuf } | 'è' { add_string "è"; next_char lexbuf } | 'À' { add_string "À"; next_char lexbuf } | 'Ò' { add_string "Ò"; next_char lexbuf } | 'Ù' { add_string "Ù"; next_char lexbuf } | 'È' { add_string "È"; next_char lexbuf } | 'ì' { add_string "ì"; next_char lexbuf } | 'Ì' { add_string "Ì"; next_char lexbuf } | 'â' { add_string "â"; next_char lexbuf } | 'ô' { add_string "ô"; next_char lexbuf } | 'û' { add_string "û"; next_char lexbuf } | 'ê' { add_string "ê"; next_char lexbuf } | 'î' { add_string "î"; next_char lexbuf } | 'Â' { add_string "Â"; next_char lexbuf } | 'Ô' { add_string "Ô"; next_char lexbuf } | 'Û' { add_string "Û"; next_char lexbuf } | 'Ê' { add_string "Ê"; next_char lexbuf } | 'Î' { add_string "Î"; next_char lexbuf } | _ { add lexbuf ; next_char lexbuf } | eof { () } (* called when we have seen "\\" *) and control = parse '"' { quote_char lexbuf } | '\'' { right_accent lexbuf } | '`' { left_accent lexbuf } | '^' { hat lexbuf } | "c{c}" { add_string "ç" ; next_char lexbuf } | 'v' { czech lexbuf } | ("~n"|"~{n}") { add_string "ñ"; next_char lexbuf } | _ { add_string "\\" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\" } (* called when we have seen "\\\"" *) and quote_char = parse ('a'|"{a}") { add_string "ä" ; next_char lexbuf } | ('o'|"{o}") { add_string "ö" ; next_char lexbuf } | ('u'|"{u}") { add_string "ü" ; next_char lexbuf } | ('e'|"{e}") { add_string "ë" ; next_char lexbuf } | ('A'|"{A}") { add_string "Ä" ; next_char lexbuf } | ('O'|"{O}") { add_string "Ö" ; next_char lexbuf } | ('U'|"{U}") { add_string "Ü" ; next_char lexbuf } | ('E'|"{E}") { add_string "Ë" ; next_char lexbuf } | ("\\i" space+|"{\\i}") { add_string "ï" ; next_char lexbuf } | ('I'|"\\I" space+|"{\\I}") { add_string "Ï" ; next_char lexbuf } | _ { add_string "\\\"" ; add lexbuf } | eof { add_string "\\\"" } (* called when we have seen "\\'" *) and right_accent = parse | ('a'|"{a}") { add_string "á" ; next_char lexbuf } | ('o'|"{o}") { add_string "ó" ; next_char lexbuf } | ('u'|"{u}") { add_string "ú" ; next_char lexbuf } | ('e'|"{e}") { add_string "é" ; next_char lexbuf } | ('A'|"{A}") { add_string "Á" ; next_char lexbuf } | ('O'|"{O}") { add_string "Ó" ; next_char lexbuf } | ('U'|"{U}") { add_string "Ú" ; next_char lexbuf } | ('E'|"{E}") { add_string "É" ; next_char lexbuf } | ('\'') { add_string "”" ; next_char lexbuf } | ('i'|"\\i" space+|"{\\i}") { add_string "í" ; next_char lexbuf } | ('I'|"\\I" space+|"{\\I}") { add_string "Í" ; next_char lexbuf } | _ { add_string "\\'" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\'" } (* called when we have seen "\\`" *) and left_accent = parse ('a'|"{a}") { add_string "à" ; next_char lexbuf } | ('o'|"{o}") { add_string "ò" ; next_char lexbuf } | ('u'|"{u}") { add_string "ù" ; next_char lexbuf } | ('e'|"{e}") { add_string "è" ; next_char lexbuf } | ('A'|"{A}") { add_string "À" ; next_char lexbuf } | ('O'|"{O}") { add_string "Ò" ; next_char lexbuf } | ('U'|"{U}") { add_string "Ù" ; next_char lexbuf } | ('E'|"{E}") { add_string "È" ; next_char lexbuf } | ('`') { add_string "“" ; next_char lexbuf } | ('i'|"\\i" space+ |"{\\i}") { add_string "ì" ; next_char lexbuf } | ('I'|"\\I" space+ |"{\\I}") { add_string "Ì" ; next_char lexbuf } | _ { add_string "\\`" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\`" } and hat = parse ('a'|"{a}") { add_string "â" ; next_char lexbuf } | ('o'|"{o}") { add_string "ô" ; next_char lexbuf } | ('u'|"{u}") { add_string "û" ; next_char lexbuf } | ('e'|"{e}") { add_string "ê" ; next_char lexbuf } | ('A'|"{A}") { add_string "Â" ; next_char lexbuf } | ('O'|"{O}") { add_string "Ô" ; next_char lexbuf } | ('U'|"{U}") { add_string "Û" ; next_char lexbuf } | ('E'|"{E}") { add_string "Ê" ; next_char lexbuf } | ('i'|"\\i" space+ |"{\\i}") { add_string "î" ; next_char lexbuf } | ('I'|"\\I" space+ |"{\\I}") { add_string "Î" ; next_char lexbuf } | _ { add_string "\\^" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\^" } and czech = parse ('r'|"{r}") { add_string "ř" ; next_char lexbuf } | ('R'|"{R}") { add_string "Ř" ; next_char lexbuf } | ('s'|"{s}") { add_string "š" ; next_char lexbuf } | ('S'|"{S}") { add_string "Š" ; next_char lexbuf } | ('i'|"\\i" space+ |"{\\i}") { add_string "ĭ" ; next_char lexbuf } | ('I'|"\\I" space+ |"{\\I}") { add_string "Ĭ" ; next_char lexbuf } | _ { add_string "\\^" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\^" } { let normalize to_regexp s = Buffer.clear string_buf; produce_regexp := to_regexp; next_char (Lexing.from_string s); Buffer.contents string_buf ;; }