justin-site/Text/CSL/Input/Bibtex.hs

325 lines
9.9 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
-----------------------------------------------------------------------------
-- |
-- Module : Text.CSL.Input.Bibtex
-- Copyright : (c) John MacFarlane
-- License : BSD-style (see LICENSE)
--
-- Maintainer : John MacFarlane <fiddlosopher@gmail.com>
-- Stability : unstable-- Portability : unportable
--
-- Modified : JH: Use just the bibtex parser
-----------------------------------------------------------------------------
module Text.CSL.Input.Bibtex
(
bibEntries
, resolveCrossRefs
, Item(..)
)
where
import Text.Parsec hiding (optional, (<|>), many, State)
import Control.Applicative
import Text.Pandoc
import Data.List.Split (splitOn, splitWhen, wordsBy)
import Data.List (intercalate)
import Data.Maybe
import Data.Char (toLower, isUpper, toUpper, isDigit, isAlphaNum)
import Control.Monad
import Control.Monad.RWS
import System.Environment (getEnvironment)
import Text.CSL.Style (Formatted(..), Locale(..), CslTerm(..), Agent(..))
import qualified Text.Pandoc.Walk as Walk
import qualified Text.Pandoc.UTF8 as UTF8
data Item = Item{ identifier :: String
, entryType :: String
, fields :: [(String, String)]
}
type BibParser = Parsec [Char] [(String, String)]
bibEntries :: BibParser [Item]
bibEntries = many (try (skipMany nonEntry >> bibItem)) <* skipMany nonEntry
where nonEntry = bibSkip <|> bibComment <|> bibPreamble <|> bibString
bibSkip :: BibParser ()
bibSkip = skipMany1 (satisfy (/='@'))
bibComment :: BibParser ()
bibComment = try $ do
char '@'
cistring "comment"
skipMany (satisfy (/='\n'))
bibPreamble :: BibParser ()
bibPreamble = try $ do
char '@'
cistring "preamble"
spaces
void inBraces
return ()
bibString :: BibParser ()
bibString = try $ do
char '@'
cistring "string"
spaces
char '{'
spaces
f <- entField
spaces
char '}'
updateState $ (f:)
return ()
inBraces :: BibParser String
inBraces = try $ do
char '{'
res <- manyTill
( many1 (noneOf "{}\\")
<|> (char '\\' >> ( (char '{' >> return "\\{")
<|> (char '}' >> return "\\}")
<|> return "\\"))
<|> (braced <$> inBraces)
) (char '}')
return $ concat res
braced :: String -> String
braced s = "{" ++ s ++ "}"
inQuotes :: BibParser String
inQuotes = do
char '"'
concat <$> manyTill ( many1 (noneOf "\"\\{")
<|> (char '\\' >> (\c -> ['\\',c]) <$> anyChar)
<|> braced <$> inBraces
) (char '"')
fieldName :: BibParser String
fieldName = (map toLower) <$> many1 (letter <|> digit <|> oneOf "-_")
isBibtexKeyChar :: Char -> Bool
isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
bibItem :: BibParser Item
bibItem = do
char '@'
enttype <- map toLower <$> many1 letter
spaces
char '{'
spaces
entid <- many (satisfy isBibtexKeyChar)
spaces
char ','
spaces
entfields <- entField `sepEndBy` (char ',')
spaces
char '}'
return $ Item entid enttype entfields
entField :: BibParser (String, String)
entField = try $ do
spaces
k <- fieldName
spaces
char '='
spaces
vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
(try $ spaces >> char '#' >> spaces)
spaces
return (k, concat vs)
rawWord :: BibParser String
rawWord = many1 alphaNum
expandString :: BibParser String
expandString = do
k <- fieldName
strs <- getState
case lookup k strs of
Just v -> return v
Nothing -> return k -- return raw key if not found
cistring :: String -> BibParser String
cistring [] = return []
cistring (c:cs) = do
x <- (char (toLower c) <|> char (toUpper c))
xs <- cistring cs
return (x:xs)
resolveCrossRefs :: Bool -> [Item] -> [Item]
resolveCrossRefs isBibtex entries =
map (resolveCrossRef isBibtex entries) entries
splitKeys :: String -> [String]
splitKeys = wordsBy (\c -> c == ' ' || c == ',')
getXrefFields :: Bool -> Item -> [Item] -> String -> [(String, String)]
getXrefFields isBibtex baseEntry entries keys = do
let keys' = splitKeys keys
xrefEntry <- [e | e <- entries, identifier e `elem` keys']
(k, v) <- fields xrefEntry
if k == "crossref" || k == "xdata"
then do
xs <- mapM (getXrefFields isBibtex baseEntry entries)
(splitKeys v)
(x, y) <- xs
guard $ isNothing $ lookup x $ fields xrefEntry
return (x, y)
else do
k' <- if isBibtex
then return k
else transformKey (entryType xrefEntry) (entryType baseEntry) k
guard $ isNothing $ lookup k' $ fields baseEntry
return (k',v)
resolveCrossRef :: Bool -> [Item] -> Item -> Item
resolveCrossRef isBibtex entries entry = foldl go entry (fields entry)
where go entry' (key, val) =
if key == "crossref" || key == "xdata"
then entry'{ fields = fields entry' ++
getXrefFields isBibtex entry entries val }
else entry'
-- transformKey source target key
-- derived from Appendix C of bibtex manual
transformKey :: String -> String -> String -> [String]
transformKey _ _ "ids" = []
transformKey _ _ "crossref" = []
transformKey _ _ "xref" = []
transformKey _ _ "entryset" = []
transformKey _ _ "entrysubtype" = []
transformKey _ _ "execute" = []
transformKey _ _ "label" = []
transformKey _ _ "options" = []
transformKey _ _ "presort" = []
transformKey _ _ "related" = []
transformKey _ _ "relatedoptions" = []
transformKey _ _ "relatedstring" = []
transformKey _ _ "relatedtype" = []
transformKey _ _ "shorthand" = []
transformKey _ _ "shorthandintro" = []
transformKey _ _ "sortkey" = []
transformKey x y "author"
| x `elem` ["mvbook", "book"] &&
y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
-- note: this next clause is not in the biblatex manual, but it makes
-- sense in the context of CSL conversion:
transformKey x y "author"
| x == "mvbook" && y == "book" = ["bookauthor", "author"]
transformKey "mvbook" y z
| y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
transformKey x y z
| x `elem` ["mvcollection", "mvreference"] &&
y `elem` ["collection", "reference", "incollection", "inreference",
"suppcollection"] = standardTrans z
transformKey "mvproceedings" y z
| y `elem` ["proceedings", "inproceedings"] = standardTrans z
transformKey "book" y z
| y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
transformKey x y z
| x `elem` ["collection", "reference"] &&
y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
transformKey "proceedings" "inproceedings" z = bookTrans z
transformKey "periodical" y z
| y `elem` ["article", "suppperiodical"] =
case z of
"title" -> ["journaltitle"]
"subtitle" -> ["journalsubtitle"]
"shorttitle" -> []
"sorttitle" -> []
"indextitle" -> []
"indexsorttitle" -> []
_ -> [z]
transformKey _ _ x = [x]
standardTrans :: String -> [String]
standardTrans z =
case z of
"title" -> ["maintitle"]
"subtitle" -> ["mainsubtitle"]
"titleaddon" -> ["maintitleaddon"]
"shorttitle" -> []
"sorttitle" -> []
"indextitle" -> []
"indexsorttitle" -> []
_ -> [z]
bookTrans :: String -> [String]
bookTrans z =
case z of
"title" -> ["booktitle"]
"subtitle" -> ["booksubtitle"]
"titleaddon" -> ["booktitleaddon"]
"shorttitle" -> []
"sorttitle" -> []
"indextitle" -> []
"indexsorttitle" -> []
_ -> [z]
data Lang = Lang String String -- e.g. "en" "US"
langToLocale :: Lang -> String
langToLocale (Lang x y) = x ++ ('-':y)
resolveKey :: Lang -> Formatted -> Formatted
resolveKey lang (Formatted ils) = Formatted (Walk.walk go ils)
where go (Str s) = Str $ resolveKey' lang s
go x = x
resolveKey' :: Lang -> String -> String
resolveKey' (Lang "en" "US") k =
case map toLower k of
"inpreparation" -> "in preparation"
"submitted" -> "submitted"
"forthcoming" -> "forthcoming"
"inpress" -> "in press"
"prepublished" -> "pre-published"
"mathesis" -> "Masters thesis"
"phdthesis" -> "PhD thesis"
"candthesis" -> "Candidate thesis"
"techreport" -> "technical report"
"resreport" -> "research report"
"software" -> "computer software"
"datacd" -> "data CD"
"audiocd" -> "audio CD"
"patent" -> "patent"
"patentde" -> "German patent"
"patenteu" -> "European patent"
"patentfr" -> "French patent"
"patentuk" -> "British patent"
"patentus" -> "U.S. patent"
"patreq" -> "patent request"
"patreqde" -> "German patent request"
"patreqeu" -> "European patent request"
"patreqfr" -> "French patent request"
"patrequk" -> "British patent request"
"patrequs" -> "U.S. patent request"
"countryde" -> "Germany"
"countryeu" -> "European Union"
"countryep" -> "European Union"
"countryfr" -> "France"
"countryuk" -> "United Kingdom"
"countryus" -> "United States of America"
"newseries" -> "new series"
"oldseries" -> "old series"
_ -> k
resolveKey' _ k = resolveKey' (Lang "en" "US") k
parseMonth :: String -> String
parseMonth "jan" = "1"
parseMonth "feb" = "2"
parseMonth "mar" = "3"
parseMonth "apr" = "4"
parseMonth "may" = "5"
parseMonth "jun" = "6"
parseMonth "jul" = "7"
parseMonth "aug" = "8"
parseMonth "sep" = "9"
parseMonth "oct" = "10"
parseMonth "nov" = "11"
parseMonth "dec" = "12"
parseMonth x = x