(* * bibtex2html - A BibTeX to HTML translator * Copyright (C) 1997 Jean-Christophe FILLIATRE * * This software is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU General Public License version 2 for more details * (enclosed in the file GPL). *) (*i $Id: latexscan.mll,v 1.32 2007-06-07 11:13:03 filliatr Exp $ i*) (*s This code is Copyright (C) 1997 Xavier Leroy. *) { open Printf open Latexmacros let brace_nesting = ref 0 let math_mode = ref false let hevea_url = ref false let save_nesting f arg = let n = !brace_nesting in brace_nesting := 0; f arg; brace_nesting := n let save_state f arg = let n = !brace_nesting and m = !math_mode in brace_nesting := 0; math_mode := false; f arg; brace_nesting := n; math_mode := m let verb_delim = ref (Char.chr 0) let r = Str.regexp "[ \t\n]+" let remove_whitespace u = Str.global_replace r "" u let print_latex_url u = let u = remove_whitespace u in print_s (sprintf "%s" u u) let print_hevea_url u t = let u = remove_whitespace u in print_s (sprintf "%s" u t) let rec skip_n_args = function | 0 -> [] | n -> Skip_arg :: skip_n_args (pred n) let chop_last_space s = let n = String.length s in if s.[n-1] = ' ' then String.sub s 0 (n-1) else s } let space = [' ' '\t' '\n' '\r'] let float = '-'? (['0'-'9']+ | ['0'-'9']* '.' ['0'-'9']*) let dimension = float ("sp" | "pt" | "bp" | "dd" | "mm" | "pc" | "cc" | "cm" | "in" | "ex" | "em" | "mu") rule main = parse (* Comments *) '%' [^ '\n'] * '\n' { main lexbuf } (* Paragraphs *) | "\n\n" '\n' * { print_s "
\n"; main lexbuf } (* Font changes *) | "{\\it" " "* | "{\\itshape" " "* { print_s ""; save_state main lexbuf; print_s ""; main lexbuf } | "{\\em" " "* | "{\\sl" " "* | "{\\slshape" " "* { print_s ""; save_state main lexbuf; print_s ""; main lexbuf } | "{\\bf" " "* | "{\\sf" " "* | "{\\bfseries" " "* | "{\\sffamily" " "* { print_s ""; save_state main lexbuf; print_s ""; main lexbuf } | "{\\sc" " "* | "{\\scshape" " "* | "{\\normalfont" " "* | "{\\upshape" " "* | "{\\mdseries" " "* | "{\\rmfamily" " "* { save_state main lexbuf; main lexbuf } | "{\\tt" " "* | "{\\ttfamily" " "* { print_s ""; save_state main lexbuf; print_s ""; main lexbuf } | "{\\small" " "* { print_s ""; save_state main lexbuf; print_s ""; main lexbuf } | "{\\cal" " "* { save_state main lexbuf; main lexbuf } | "\\cal" " "* { main lexbuf } (* Double quotes *) | '"' { print_s ""; indoublequote lexbuf; print_s ""; main lexbuf } (* Verb, verbatim *) | ("\\verb" | "\\path") _ { verb_delim := Lexing.lexeme_char lexbuf 5; print_s ""; inverb lexbuf; print_s ""; main lexbuf } | "\\begin{verbatim}" { print_s "
"; inverbatim lexbuf;
print_s ""; main lexbuf }
(* Raw html, latex only *)
| "\\begin{rawhtml}"
{ rawhtml lexbuf; main lexbuf }
| "\\begin{latexonly}"
{ latexonly lexbuf; main lexbuf }
(* Itemize and similar environments *)
| "\\item[" [^ ']']* "]"
{ print_s "" else print_s "\n"; main lexbuf } (* \hkip *) | "\\hskip" space* dimension (space* "plus" space* dimension)? (space* "minus" space* dimension)? { print_s " "; main lexbuf } (* Special characters *) | "\\char" ['0'-'9']+ { let lxm = Lexing.lexeme lexbuf in let code = String.sub lxm 5 (String.length lxm - 5) in print_c(Char.chr(int_of_string code)); main lexbuf } | "--" | "---" { print_s "-"; main lexbuf } | "<" { print_s "<"; main lexbuf } | ">" { print_s ">"; main lexbuf } | "~" { print_s " "; main lexbuf } | "``" { print_s "“"; main lexbuf } | "''" { print_s "”"; main lexbuf } | "--" { print_s "–"; main lexbuf } | "---" { print_s "—"; main lexbuf } | "^" { if !math_mode then begin let buf = Lexing.from_string (raw_arg lexbuf) in print_s ""; save_state main buf; print_s"" end else print_s "^"; main lexbuf } | "_" { if !math_mode then begin let buf = Lexing.from_string (raw_arg lexbuf) in print_s ""; save_state main buf; print_s"" end else print_s "_"; main lexbuf } (* URLs *) | "\\url" { let url = raw_arg lexbuf in if !hevea_url then let text = raw_arg lexbuf in print_hevea_url url text else print_latex_url url; main lexbuf } (* General case for environments and commands *) | ("\\begin{" | "\\end{") ['A'-'Z' 'a'-'z']+ "}" | "\\" (['A'-'Z' 'a'-'z']+ '*'? " "? | [^ 'A'-'Z' 'a'-'z']) { let rec exec_action = function | Print str -> print_s str | Print_arg -> print_arg lexbuf | Raw_arg f -> f (raw_arg lexbuf) | Skip_arg -> save_nesting skip_arg lexbuf | Recursive s -> main (Lexing.from_string s) | Parameterized f -> List.iter exec_action (f (raw_arg lexbuf)) in let m = chop_last_space (Lexing.lexeme lexbuf) in List.iter exec_action (find_macro m); main lexbuf } (* Nesting of braces *) | '{' { incr brace_nesting; main lexbuf } | '}' { if !brace_nesting <= 0 then () else begin decr brace_nesting; main lexbuf end } (* Default rule for other characters *) | eof { () } | ['A'-'Z' 'a'-'z']+ { if !math_mode then print_s ""; print_s(Lexing.lexeme lexbuf); if !math_mode then print_s ""; main lexbuf } | _ { print_c(Lexing.lexeme_char lexbuf 0); main lexbuf } and indoublequote = parse '"' { () } | "<" { print_s "<"; indoublequote lexbuf } | ">" { print_s ">"; indoublequote lexbuf } | "&" { print_s "&"; indoublequote lexbuf } | "\\\"" { print_s "\""; indoublequote lexbuf } | "\\\\" { print_s "\\"; indoublequote lexbuf } | eof { () } | _ { print_c(Lexing.lexeme_char lexbuf 0); indoublequote lexbuf } and inverb = parse "<" { print_s "<"; inverb lexbuf } | ">" { print_s ">"; inverb lexbuf } | "&" { print_s "&"; inverb lexbuf } | eof { () } | _ { let c = Lexing.lexeme_char lexbuf 0 in if c == !verb_delim then () else (print_c c; inverb lexbuf) } and inverbatim = parse "<" { print_s "<"; inverbatim lexbuf } | ">" { print_s ">"; inverbatim lexbuf } | "&" { print_s "&"; inverbatim lexbuf } | "\\end{verbatim}" { () } | eof { () } | _ { print_c(Lexing.lexeme_char lexbuf 0); inverbatim lexbuf } and rawhtml = parse "\\end{rawhtml}" { () } | eof { () } | _ { print_c(Lexing.lexeme_char lexbuf 0); rawhtml lexbuf } and latexonly = parse "\\end{latexonly}" { () } | eof { () } | _ { latexonly lexbuf } and print_arg = parse "{" { save_nesting main lexbuf } | "[" { skip_optional_arg lexbuf; print_arg lexbuf } | " " { print_arg lexbuf } | eof { () } | _ { print_c(Lexing.lexeme_char lexbuf 0); main lexbuf } and skip_arg = parse "{" { incr brace_nesting; skip_arg lexbuf } | "}" { decr brace_nesting; if !brace_nesting > 0 then skip_arg lexbuf } | "[" { if !brace_nesting = 0 then skip_optional_arg lexbuf; skip_arg lexbuf } | " " { skip_arg lexbuf } | eof { () } | _ { if !brace_nesting > 0 then skip_arg lexbuf } and raw_arg = parse " " { raw_arg lexbuf } | '{' { nested_arg lexbuf } | "[" { skip_optional_arg lexbuf; raw_arg lexbuf } | '\\' ['A'-'Z' 'a'-'z']+ { Lexing.lexeme lexbuf } | eof { "" } | _ { Lexing.lexeme lexbuf } and nested_arg = parse '}' { "" } | '{' { let l = nested_arg lexbuf in "{" ^ l ^ "}" ^ (nested_arg lexbuf) } | eof { "" } | [^ '{' '}']+{ let x = Lexing.lexeme lexbuf in x ^ (nested_arg lexbuf) } and skip_optional_arg = parse "]" { () } | eof { () } | _ { skip_optional_arg lexbuf } (* ajout personnel: [read_macros] pour lire les macros (La)TeX *) and read_macros = parse "\\def" | "\\newcommand" { read_def lexbuf; read_macros lexbuf } | eof { () } | _ { read_macros lexbuf } and read_def = parse '\\' (['a'-'z' 'A'-'Z']+ as s) { let b = raw_arg lexbuf in if not !Options.quiet then begin eprintf "macro: %s = %s\n" s b; flush stderr end; def s [Recursive b] } | "{" ("\\" ['a'-'z' 'A'-'Z']+ as s) "}" ("[" (['0'-'9']+ as n) "]")? { let b = raw_arg lexbuf in if not !Options.quiet then begin eprintf "macro: %s = %s\n" s b; flush stderr end; let n = match n with None -> 0 | Some n -> int_of_string n in def s (skip_n_args n @ [Recursive b]) } | [' ' '\t' '\n']* { read_def lexbuf } | _ { () }