327 lines
10 KiB
Haskell
327 lines
10 KiB
Haskell
|
{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
|
|||
|
{-# LANGUAGE OverloadedStrings #-}
|
|||
|
-----------------------------------------------------------------------------
|
|||
|
-- |
|
|||
|
-- Module : Text.CSL.Input.Bibtex
|
|||
|
-- Copyright : (c) John MacFarlane
|
|||
|
-- License : BSD-style (see LICENSE)
|
|||
|
--
|
|||
|
-- Maintainer : John MacFarlane <fiddlosopher@gmail.com>
|
|||
|
-- Stability : unstable-- Portability : unportable
|
|||
|
--
|
|||
|
-- Modified : JH: Use just the bibtex parser
|
|||
|
-----------------------------------------------------------------------------
|
|||
|
|
|||
|
module Bibtex
|
|||
|
(
|
|||
|
bibEntries
|
|||
|
, resolveCrossRefs
|
|||
|
, Item(..)
|
|||
|
)
|
|||
|
where
|
|||
|
|
|||
|
import Text.Parsec hiding (optional, (<|>), many, State)
|
|||
|
import Control.Applicative
|
|||
|
import Text.Pandoc
|
|||
|
import Data.List.Split (splitOn, splitWhen, wordsBy)
|
|||
|
import Data.List (intercalate)
|
|||
|
import Data.Maybe
|
|||
|
import Data.Char (toLower, isUpper, toUpper, isDigit, isAlphaNum)
|
|||
|
import Control.Monad
|
|||
|
import Control.Monad.RWS
|
|||
|
import System.Environment (getEnvironment)
|
|||
|
import Text.CSL.Style (Formatted(..), Locale(..), CslTerm(..), Agent(..))
|
|||
|
import qualified Text.Pandoc.Walk as Walk
|
|||
|
import qualified Text.Pandoc.UTF8 as UTF8
|
|||
|
import qualified Data.Text as T
|
|||
|
|
|||
|
data Item = Item{ identifier :: String
|
|||
|
, entryType :: String
|
|||
|
, fields :: [(String, String)]
|
|||
|
}
|
|||
|
|
|||
|
type BibParser = Parsec [Char] [(String, String)]
|
|||
|
|
|||
|
bibEntries :: BibParser [Item]
|
|||
|
bibEntries = many (try (skipMany nonEntry >> bibItem)) <* skipMany nonEntry
|
|||
|
where nonEntry = bibSkip <|> bibComment <|> bibPreamble <|> bibString
|
|||
|
|
|||
|
bibSkip :: BibParser ()
|
|||
|
bibSkip = skipMany1 (satisfy (/='@'))
|
|||
|
|
|||
|
bibComment :: BibParser ()
|
|||
|
bibComment = try $ do
|
|||
|
char '@'
|
|||
|
cistring "comment"
|
|||
|
skipMany (satisfy (/='\n'))
|
|||
|
|
|||
|
bibPreamble :: BibParser ()
|
|||
|
bibPreamble = try $ do
|
|||
|
char '@'
|
|||
|
cistring "preamble"
|
|||
|
spaces
|
|||
|
void inBraces
|
|||
|
return ()
|
|||
|
|
|||
|
bibString :: BibParser ()
|
|||
|
bibString = try $ do
|
|||
|
char '@'
|
|||
|
cistring "string"
|
|||
|
spaces
|
|||
|
char '{'
|
|||
|
spaces
|
|||
|
f <- entField
|
|||
|
spaces
|
|||
|
char '}'
|
|||
|
updateState $ (f:)
|
|||
|
return ()
|
|||
|
|
|||
|
inBraces :: BibParser String
|
|||
|
inBraces = try $ do
|
|||
|
char '{'
|
|||
|
res <- manyTill
|
|||
|
( many1 (noneOf "{}\\")
|
|||
|
<|> (char '\\' >> ( (char '{' >> return "\\{")
|
|||
|
<|> (char '}' >> return "\\}")
|
|||
|
<|> return "\\"))
|
|||
|
<|> (braced <$> inBraces)
|
|||
|
) (char '}')
|
|||
|
return $ concat res
|
|||
|
|
|||
|
braced :: String -> String
|
|||
|
braced s = "{" ++ s ++ "}"
|
|||
|
|
|||
|
inQuotes :: BibParser String
|
|||
|
inQuotes = do
|
|||
|
char '"'
|
|||
|
concat <$> manyTill ( many1 (noneOf "\"\\{")
|
|||
|
<|> (char '\\' >> (\c -> ['\\',c]) <$> anyChar)
|
|||
|
<|> braced <$> inBraces
|
|||
|
) (char '"')
|
|||
|
|
|||
|
fieldName :: BibParser String
|
|||
|
fieldName = (map toLower) <$> many1 (letter <|> digit <|> oneOf "-_")
|
|||
|
|
|||
|
isBibtexKeyChar :: Char -> Bool
|
|||
|
isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String)
|
|||
|
|
|||
|
bibItem :: BibParser Item
|
|||
|
bibItem = do
|
|||
|
char '@'
|
|||
|
enttype <- map toLower <$> many1 letter
|
|||
|
spaces
|
|||
|
char '{'
|
|||
|
spaces
|
|||
|
entid <- many (satisfy isBibtexKeyChar)
|
|||
|
spaces
|
|||
|
char ','
|
|||
|
spaces
|
|||
|
entfields <- entField `sepEndBy` (char ',')
|
|||
|
spaces
|
|||
|
char '}'
|
|||
|
return $ Item entid enttype entfields
|
|||
|
|
|||
|
entField :: BibParser (String, String)
|
|||
|
entField = try $ do
|
|||
|
spaces
|
|||
|
k <- fieldName
|
|||
|
spaces
|
|||
|
char '='
|
|||
|
spaces
|
|||
|
vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
|
|||
|
(try $ spaces >> char '#' >> spaces)
|
|||
|
spaces
|
|||
|
return (k, concat vs)
|
|||
|
|
|||
|
rawWord :: BibParser String
|
|||
|
rawWord = many1 alphaNum
|
|||
|
|
|||
|
expandString :: BibParser String
|
|||
|
expandString = do
|
|||
|
k <- fieldName
|
|||
|
strs <- getState
|
|||
|
case lookup k strs of
|
|||
|
Just v -> return v
|
|||
|
Nothing -> return k -- return raw key if not found
|
|||
|
|
|||
|
cistring :: String -> BibParser String
|
|||
|
cistring [] = return []
|
|||
|
cistring (c:cs) = do
|
|||
|
x <- (char (toLower c) <|> char (toUpper c))
|
|||
|
xs <- cistring cs
|
|||
|
return (x:xs)
|
|||
|
|
|||
|
resolveCrossRefs :: Bool -> [Item] -> [Item]
|
|||
|
resolveCrossRefs isBibtex entries =
|
|||
|
map (resolveCrossRef isBibtex entries) entries
|
|||
|
|
|||
|
splitKeys :: String -> [String]
|
|||
|
splitKeys = wordsBy (\c -> c == ' ' || c == ',')
|
|||
|
|
|||
|
getXrefFields :: Bool -> Item -> [Item] -> String -> [(String, String)]
|
|||
|
getXrefFields isBibtex baseEntry entries keys = do
|
|||
|
let keys' = splitKeys keys
|
|||
|
xrefEntry <- [e | e <- entries, identifier e `elem` keys']
|
|||
|
(k, v) <- fields xrefEntry
|
|||
|
if k == "crossref" || k == "xdata"
|
|||
|
then do
|
|||
|
xs <- mapM (getXrefFields isBibtex baseEntry entries)
|
|||
|
(splitKeys v)
|
|||
|
(x, y) <- xs
|
|||
|
guard $ isNothing $ lookup x $ fields xrefEntry
|
|||
|
return (x, y)
|
|||
|
else do
|
|||
|
k' <- if isBibtex
|
|||
|
then return k
|
|||
|
else transformKey (entryType xrefEntry) (entryType baseEntry) k
|
|||
|
guard $ isNothing $ lookup k' $ fields baseEntry
|
|||
|
return (k',v)
|
|||
|
|
|||
|
resolveCrossRef :: Bool -> [Item] -> Item -> Item
|
|||
|
resolveCrossRef isBibtex entries entry = foldl go entry (fields entry)
|
|||
|
where go entry' (key, val) =
|
|||
|
if key == "crossref" || key == "xdata"
|
|||
|
then entry'{ fields = fields entry' ++
|
|||
|
getXrefFields isBibtex entry entries val }
|
|||
|
else entry'
|
|||
|
|
|||
|
-- transformKey source target key
|
|||
|
-- derived from Appendix C of bibtex manual
|
|||
|
transformKey :: String -> String -> String -> [String]
|
|||
|
transformKey _ _ "ids" = []
|
|||
|
transformKey _ _ "crossref" = []
|
|||
|
transformKey _ _ "xref" = []
|
|||
|
transformKey _ _ "entryset" = []
|
|||
|
transformKey _ _ "entrysubtype" = []
|
|||
|
transformKey _ _ "execute" = []
|
|||
|
transformKey _ _ "label" = []
|
|||
|
transformKey _ _ "options" = []
|
|||
|
transformKey _ _ "presort" = []
|
|||
|
transformKey _ _ "related" = []
|
|||
|
transformKey _ _ "relatedoptions" = []
|
|||
|
transformKey _ _ "relatedstring" = []
|
|||
|
transformKey _ _ "relatedtype" = []
|
|||
|
transformKey _ _ "shorthand" = []
|
|||
|
transformKey _ _ "shorthandintro" = []
|
|||
|
transformKey _ _ "sortkey" = []
|
|||
|
transformKey x y "author"
|
|||
|
| x `elem` ["mvbook", "book"] &&
|
|||
|
y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
|
|||
|
-- note: this next clause is not in the biblatex manual, but it makes
|
|||
|
-- sense in the context of CSL conversion:
|
|||
|
transformKey x y "author"
|
|||
|
| x == "mvbook" && y == "book" = ["bookauthor", "author"]
|
|||
|
transformKey "mvbook" y z
|
|||
|
| y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
|
|||
|
transformKey x y z
|
|||
|
| x `elem` ["mvcollection", "mvreference"] &&
|
|||
|
y `elem` ["collection", "reference", "incollection", "inreference",
|
|||
|
"suppcollection"] = standardTrans z
|
|||
|
transformKey "mvproceedings" y z
|
|||
|
| y `elem` ["proceedings", "inproceedings"] = standardTrans z
|
|||
|
transformKey "book" y z
|
|||
|
| y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
|
|||
|
transformKey x y z
|
|||
|
| x `elem` ["collection", "reference"] &&
|
|||
|
y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
|
|||
|
transformKey "proceedings" "inproceedings" z = bookTrans z
|
|||
|
transformKey "periodical" y z
|
|||
|
| y `elem` ["article", "suppperiodical"] =
|
|||
|
case z of
|
|||
|
"title" -> ["journaltitle"]
|
|||
|
"subtitle" -> ["journalsubtitle"]
|
|||
|
"shorttitle" -> []
|
|||
|
"sorttitle" -> []
|
|||
|
"indextitle" -> []
|
|||
|
"indexsorttitle" -> []
|
|||
|
_ -> [z]
|
|||
|
transformKey _ _ x = [x]
|
|||
|
|
|||
|
standardTrans :: String -> [String]
|
|||
|
standardTrans z =
|
|||
|
case z of
|
|||
|
"title" -> ["maintitle"]
|
|||
|
"subtitle" -> ["mainsubtitle"]
|
|||
|
"titleaddon" -> ["maintitleaddon"]
|
|||
|
"shorttitle" -> []
|
|||
|
"sorttitle" -> []
|
|||
|
"indextitle" -> []
|
|||
|
"indexsorttitle" -> []
|
|||
|
_ -> [z]
|
|||
|
|
|||
|
bookTrans :: String -> [String]
|
|||
|
bookTrans z =
|
|||
|
case z of
|
|||
|
"title" -> ["booktitle"]
|
|||
|
"subtitle" -> ["booksubtitle"]
|
|||
|
"titleaddon" -> ["booktitleaddon"]
|
|||
|
"shorttitle" -> []
|
|||
|
"sorttitle" -> []
|
|||
|
"indextitle" -> []
|
|||
|
"indexsorttitle" -> []
|
|||
|
_ -> [z]
|
|||
|
|
|||
|
data Lang = Lang String String -- e.g. "en" "US"
|
|||
|
|
|||
|
langToLocale :: Lang -> String
|
|||
|
langToLocale (Lang x y) = x ++ ('-':y)
|
|||
|
|
|||
|
resolveKey :: Lang -> Formatted -> Formatted
|
|||
|
resolveKey lang (Formatted ils) = Formatted (Walk.walk go ils)
|
|||
|
where go (Str s) = Str $ T.pack $ resolveKey' lang $ T.unpack s
|
|||
|
go x = x
|
|||
|
|
|||
|
resolveKey' :: Lang -> String -> String
|
|||
|
resolveKey' (Lang "en" "US") k =
|
|||
|
case map toLower k of
|
|||
|
"inpreparation" -> "in preparation"
|
|||
|
"submitted" -> "submitted"
|
|||
|
"forthcoming" -> "forthcoming"
|
|||
|
"inpress" -> "in press"
|
|||
|
"prepublished" -> "pre-published"
|
|||
|
"mathesis" -> "Master’s thesis"
|
|||
|
"phdthesis" -> "PhD thesis"
|
|||
|
"candthesis" -> "Candidate thesis"
|
|||
|
"techreport" -> "technical report"
|
|||
|
"resreport" -> "research report"
|
|||
|
"software" -> "computer software"
|
|||
|
"datacd" -> "data CD"
|
|||
|
"audiocd" -> "audio CD"
|
|||
|
"patent" -> "patent"
|
|||
|
"patentde" -> "German patent"
|
|||
|
"patenteu" -> "European patent"
|
|||
|
"patentfr" -> "French patent"
|
|||
|
"patentuk" -> "British patent"
|
|||
|
"patentus" -> "U.S. patent"
|
|||
|
"patreq" -> "patent request"
|
|||
|
"patreqde" -> "German patent request"
|
|||
|
"patreqeu" -> "European patent request"
|
|||
|
"patreqfr" -> "French patent request"
|
|||
|
"patrequk" -> "British patent request"
|
|||
|
"patrequs" -> "U.S. patent request"
|
|||
|
"countryde" -> "Germany"
|
|||
|
"countryeu" -> "European Union"
|
|||
|
"countryep" -> "European Union"
|
|||
|
"countryfr" -> "France"
|
|||
|
"countryuk" -> "United Kingdom"
|
|||
|
"countryus" -> "United States of America"
|
|||
|
"newseries" -> "new series"
|
|||
|
"oldseries" -> "old series"
|
|||
|
_ -> k
|
|||
|
resolveKey' _ k = resolveKey' (Lang "en" "US") k
|
|||
|
|
|||
|
parseMonth :: String -> String
|
|||
|
parseMonth "jan" = "1"
|
|||
|
parseMonth "feb" = "2"
|
|||
|
parseMonth "mar" = "3"
|
|||
|
parseMonth "apr" = "4"
|
|||
|
parseMonth "may" = "5"
|
|||
|
parseMonth "jun" = "6"
|
|||
|
parseMonth "jul" = "7"
|
|||
|
parseMonth "aug" = "8"
|
|||
|
parseMonth "sep" = "9"
|
|||
|
parseMonth "oct" = "10"
|
|||
|
parseMonth "nov" = "11"
|
|||
|
parseMonth "dec" = "12"
|
|||
|
parseMonth x = x
|