{-# OPTIONS_GHC -fno-warn-unused-do-bind #-} {-# LANGUAGE OverloadedStrings #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Input.Bibtex -- Copyright : (c) John MacFarlane -- License : BSD-style (see LICENSE) -- -- Maintainer : John MacFarlane -- Stability : unstable-- Portability : unportable -- -- Modified : JH: Use just the bibtex parser ----------------------------------------------------------------------------- module Bibtex ( bibEntries , resolveCrossRefs , Item(..) ) where import Text.Parsec hiding (optional, (<|>), many, State) import Control.Applicative import Text.Pandoc import Data.List.Split (splitOn, splitWhen, wordsBy) import Data.List (intercalate) import Data.Maybe import Data.Char (toLower, isUpper, toUpper, isDigit, isAlphaNum) import Control.Monad import Control.Monad.RWS import System.Environment (getEnvironment) import Text.CSL.Style (Formatted(..), Locale(..), CslTerm(..), Agent(..)) import qualified Text.Pandoc.Walk as Walk import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.Text as T data Item = Item{ identifier :: String , entryType :: String , fields :: [(String, String)] } type BibParser = Parsec [Char] [(String, String)] bibEntries :: BibParser [Item] bibEntries = many (try (skipMany nonEntry >> bibItem)) <* skipMany nonEntry where nonEntry = bibSkip <|> bibComment <|> bibPreamble <|> bibString bibSkip :: BibParser () bibSkip = skipMany1 (satisfy (/='@')) bibComment :: BibParser () bibComment = try $ do char '@' cistring "comment" skipMany (satisfy (/='\n')) bibPreamble :: BibParser () bibPreamble = try $ do char '@' cistring "preamble" spaces void inBraces return () bibString :: BibParser () bibString = try $ do char '@' cistring "string" spaces char '{' spaces f <- entField spaces char '}' updateState $ (f:) return () inBraces :: BibParser String inBraces = try $ do char '{' res <- manyTill ( many1 (noneOf "{}\\") <|> (char '\\' >> ( (char '{' >> return "\\{") <|> (char '}' >> return "\\}") <|> return "\\")) <|> (braced <$> inBraces) ) (char '}') return $ concat res braced :: String -> String braced s = "{" ++ s ++ "}" inQuotes :: BibParser String inQuotes = do char '"' concat <$> manyTill ( many1 (noneOf "\"\\{") <|> (char '\\' >> (\c -> ['\\',c]) <$> anyChar) <|> braced <$> inBraces ) (char '"') fieldName :: BibParser String fieldName = (map toLower) <$> many1 (letter <|> digit <|> oneOf "-_") isBibtexKeyChar :: Char -> Bool isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String) bibItem :: BibParser Item bibItem = do char '@' enttype <- map toLower <$> many1 letter spaces char '{' spaces entid <- many (satisfy isBibtexKeyChar) spaces char ',' spaces entfields <- entField `sepEndBy` (char ',') spaces char '}' return $ Item entid enttype entfields entField :: BibParser (String, String) entField = try $ do spaces k <- fieldName spaces char '=' spaces vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy` (try $ spaces >> char '#' >> spaces) spaces return (k, concat vs) rawWord :: BibParser String rawWord = many1 alphaNum expandString :: BibParser String expandString = do k <- fieldName strs <- getState case lookup k strs of Just v -> return v Nothing -> return k -- return raw key if not found cistring :: String -> BibParser String cistring [] = return [] cistring (c:cs) = do x <- (char (toLower c) <|> char (toUpper c)) xs <- cistring cs return (x:xs) resolveCrossRefs :: Bool -> [Item] -> [Item] resolveCrossRefs isBibtex entries = map (resolveCrossRef isBibtex entries) entries splitKeys :: String -> [String] splitKeys = wordsBy (\c -> c == ' ' || c == ',') getXrefFields :: Bool -> Item -> [Item] -> String -> [(String, String)] getXrefFields isBibtex baseEntry entries keys = do let keys' = splitKeys keys xrefEntry <- [e | e <- entries, identifier e `elem` keys'] (k, v) <- fields xrefEntry if k == "crossref" || k == "xdata" then do xs <- mapM (getXrefFields isBibtex baseEntry entries) (splitKeys v) (x, y) <- xs guard $ isNothing $ lookup x $ fields xrefEntry return (x, y) else do k' <- if isBibtex then return k else transformKey (entryType xrefEntry) (entryType baseEntry) k guard $ isNothing $ lookup k' $ fields baseEntry return (k',v) resolveCrossRef :: Bool -> [Item] -> Item -> Item resolveCrossRef isBibtex entries entry = foldl go entry (fields entry) where go entry' (key, val) = if key == "crossref" || key == "xdata" then entry'{ fields = fields entry' ++ getXrefFields isBibtex entry entries val } else entry' -- transformKey source target key -- derived from Appendix C of bibtex manual transformKey :: String -> String -> String -> [String] transformKey _ _ "ids" = [] transformKey _ _ "crossref" = [] transformKey _ _ "xref" = [] transformKey _ _ "entryset" = [] transformKey _ _ "entrysubtype" = [] transformKey _ _ "execute" = [] transformKey _ _ "label" = [] transformKey _ _ "options" = [] transformKey _ _ "presort" = [] transformKey _ _ "related" = [] transformKey _ _ "relatedoptions" = [] transformKey _ _ "relatedstring" = [] transformKey _ _ "relatedtype" = [] transformKey _ _ "shorthand" = [] transformKey _ _ "shorthandintro" = [] transformKey _ _ "sortkey" = [] transformKey x y "author" | x `elem` ["mvbook", "book"] && y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"] -- note: this next clause is not in the biblatex manual, but it makes -- sense in the context of CSL conversion: transformKey x y "author" | x == "mvbook" && y == "book" = ["bookauthor", "author"] transformKey "mvbook" y z | y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z transformKey x y z | x `elem` ["mvcollection", "mvreference"] && y `elem` ["collection", "reference", "incollection", "inreference", "suppcollection"] = standardTrans z transformKey "mvproceedings" y z | y `elem` ["proceedings", "inproceedings"] = standardTrans z transformKey "book" y z | y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z transformKey x y z | x `elem` ["collection", "reference"] && y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z transformKey "proceedings" "inproceedings" z = bookTrans z transformKey "periodical" y z | y `elem` ["article", "suppperiodical"] = case z of "title" -> ["journaltitle"] "subtitle" -> ["journalsubtitle"] "shorttitle" -> [] "sorttitle" -> [] "indextitle" -> [] "indexsorttitle" -> [] _ -> [z] transformKey _ _ x = [x] standardTrans :: String -> [String] standardTrans z = case z of "title" -> ["maintitle"] "subtitle" -> ["mainsubtitle"] "titleaddon" -> ["maintitleaddon"] "shorttitle" -> [] "sorttitle" -> [] "indextitle" -> [] "indexsorttitle" -> [] _ -> [z] bookTrans :: String -> [String] bookTrans z = case z of "title" -> ["booktitle"] "subtitle" -> ["booksubtitle"] "titleaddon" -> ["booktitleaddon"] "shorttitle" -> [] "sorttitle" -> [] "indextitle" -> [] "indexsorttitle" -> [] _ -> [z] data Lang = Lang String String -- e.g. "en" "US" langToLocale :: Lang -> String langToLocale (Lang x y) = x ++ ('-':y) resolveKey :: Lang -> Formatted -> Formatted resolveKey lang (Formatted ils) = Formatted (Walk.walk go ils) where go (Str s) = Str $ T.pack $ resolveKey' lang $ T.unpack s go x = x resolveKey' :: Lang -> String -> String resolveKey' (Lang "en" "US") k = case map toLower k of "inpreparation" -> "in preparation" "submitted" -> "submitted" "forthcoming" -> "forthcoming" "inpress" -> "in press" "prepublished" -> "pre-published" "mathesis" -> "Master’s thesis" "phdthesis" -> "PhD thesis" "candthesis" -> "Candidate thesis" "techreport" -> "technical report" "resreport" -> "research report" "software" -> "computer software" "datacd" -> "data CD" "audiocd" -> "audio CD" "patent" -> "patent" "patentde" -> "German patent" "patenteu" -> "European patent" "patentfr" -> "French patent" "patentuk" -> "British patent" "patentus" -> "U.S. patent" "patreq" -> "patent request" "patreqde" -> "German patent request" "patreqeu" -> "European patent request" "patreqfr" -> "French patent request" "patrequk" -> "British patent request" "patrequs" -> "U.S. patent request" "countryde" -> "Germany" "countryeu" -> "European Union" "countryep" -> "European Union" "countryfr" -> "France" "countryuk" -> "United Kingdom" "countryus" -> "United States of America" "newseries" -> "new series" "oldseries" -> "old series" _ -> k resolveKey' _ k = resolveKey' (Lang "en" "US") k parseMonth :: String -> String parseMonth "jan" = "1" parseMonth "feb" = "2" parseMonth "mar" = "3" parseMonth "apr" = "4" parseMonth "may" = "5" parseMonth "jun" = "6" parseMonth "jul" = "7" parseMonth "aug" = "8" parseMonth "sep" = "9" parseMonth "oct" = "10" parseMonth "nov" = "11" parseMonth "dec" = "12" parseMonth x = x