327 lines
10 KiB
Haskell
327 lines
10 KiB
Haskell
{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
|
||
{-# LANGUAGE OverloadedStrings #-}
|
||
-----------------------------------------------------------------------------
|
||
-- |
|
||
-- Module : Text.CSL.Input.Bibtex
|
||
-- Copyright : (c) John MacFarlane
|
||
-- License : BSD-style (see LICENSE)
|
||
--
|
||
-- Maintainer : John MacFarlane <fiddlosopher@gmail.com>
|
||
-- Stability : unstable-- Portability : unportable
|
||
--
|
||
-- Modified : JH: Use just the bibtex parser
|
||
-----------------------------------------------------------------------------
|
||
|
||
module Bibtex
|
||
(
|
||
bibEntries
|
||
, resolveCrossRefs
|
||
, Item(..)
|
||
)
|
||
where
|
||
|
||
import Text.Parsec hiding (optional, (<|>), many, State)
|
||
import Control.Applicative
|
||
import Text.Pandoc
|
||
import Data.List.Split (splitOn, splitWhen, wordsBy)
|
||
import Data.List (intercalate)
|
||
import Data.Maybe
|
||
import Data.Char (toLower, isUpper, toUpper, isDigit, isAlphaNum)
|
||
import Control.Monad
|
||
import Control.Monad.RWS
|
||
import System.Environment (getEnvironment)
|
||
import Text.CSL.Style (Formatted(..), Locale(..), CslTerm(..), Agent(..))
|
||
import qualified Text.Pandoc.Walk as Walk
|
||
import qualified Text.Pandoc.UTF8 as UTF8
|
||
import qualified Data.Text as T
|
||
|
||
data Item = Item{ identifier :: String
|
||
, entryType :: String
|
||
, fields :: [(String, String)]
|
||
}
|
||
|
||
type BibParser = Parsec [Char] [(String, String)]
|
||
|
||
bibEntries :: BibParser [Item]
|
||
bibEntries = many (try (skipMany nonEntry >> bibItem)) <* skipMany nonEntry
|
||
where nonEntry = bibSkip <|> bibComment <|> bibPreamble <|> bibString
|
||
|
||
bibSkip :: BibParser ()
|
||
bibSkip = skipMany1 (satisfy (/='@'))
|
||
|
||
bibComment :: BibParser ()
|
||
bibComment = try $ do
|
||
char '@'
|
||
cistring "comment"
|
||
skipMany (satisfy (/='\n'))
|
||
|
||
bibPreamble :: BibParser ()
|
||
bibPreamble = try $ do
|
||
char '@'
|
||
cistring "preamble"
|
||
spaces
|
||
void inBraces
|
||
return ()
|
||
|
||
bibString :: BibParser ()
|
||
bibString = try $ do
|
||
char '@'
|
||
cistring "string"
|
||
spaces
|
||
char '{'
|
||
spaces
|
||
f <- entField
|
||
spaces
|
||
char '}'
|
||
updateState $ (f:)
|
||
return ()
|
||
|
||
inBraces :: BibParser String
|
||
inBraces = try $ do
|
||
char '{'
|
||
res <- manyTill
|
||
( many1 (noneOf "{}\\")
|
||
<|> (char '\\' >> ( (char '{' >> return "\\{")
|
||
<|> (char '}' >> return "\\}")
|
||
<|> return "\\"))
|
||
<|> (braced <$> inBraces)
|
||
) (char '}')
|
||
return $ concat res
|
||
|
||
braced :: String -> String
|
||
braced s = "{" ++ s ++ "}"
|
||
|
||
inQuotes :: BibParser String
|
||
inQuotes = do
|
||
char '"'
|
||
concat <$> manyTill ( many1 (noneOf "\"\\{")
|
||
<|> (char '\\' >> (\c -> ['\\',c]) <$> anyChar)
|
||
<|> braced <$> inBraces
|
||
) (char '"')
|
||
|
||
fieldName :: BibParser String
|
||
fieldName = (map toLower) <$> many1 (letter <|> digit <|> oneOf "-_")
|
||
|
||
isBibtexKeyChar :: Char -> Bool
|
||
isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String)
|
||
|
||
bibItem :: BibParser Item
|
||
bibItem = do
|
||
char '@'
|
||
enttype <- map toLower <$> many1 letter
|
||
spaces
|
||
char '{'
|
||
spaces
|
||
entid <- many (satisfy isBibtexKeyChar)
|
||
spaces
|
||
char ','
|
||
spaces
|
||
entfields <- entField `sepEndBy` (char ',')
|
||
spaces
|
||
char '}'
|
||
return $ Item entid enttype entfields
|
||
|
||
entField :: BibParser (String, String)
|
||
entField = try $ do
|
||
spaces
|
||
k <- fieldName
|
||
spaces
|
||
char '='
|
||
spaces
|
||
vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
|
||
(try $ spaces >> char '#' >> spaces)
|
||
spaces
|
||
return (k, concat vs)
|
||
|
||
rawWord :: BibParser String
|
||
rawWord = many1 alphaNum
|
||
|
||
expandString :: BibParser String
|
||
expandString = do
|
||
k <- fieldName
|
||
strs <- getState
|
||
case lookup k strs of
|
||
Just v -> return v
|
||
Nothing -> return k -- return raw key if not found
|
||
|
||
cistring :: String -> BibParser String
|
||
cistring [] = return []
|
||
cistring (c:cs) = do
|
||
x <- (char (toLower c) <|> char (toUpper c))
|
||
xs <- cistring cs
|
||
return (x:xs)
|
||
|
||
resolveCrossRefs :: Bool -> [Item] -> [Item]
|
||
resolveCrossRefs isBibtex entries =
|
||
map (resolveCrossRef isBibtex entries) entries
|
||
|
||
splitKeys :: String -> [String]
|
||
splitKeys = wordsBy (\c -> c == ' ' || c == ',')
|
||
|
||
getXrefFields :: Bool -> Item -> [Item] -> String -> [(String, String)]
|
||
getXrefFields isBibtex baseEntry entries keys = do
|
||
let keys' = splitKeys keys
|
||
xrefEntry <- [e | e <- entries, identifier e `elem` keys']
|
||
(k, v) <- fields xrefEntry
|
||
if k == "crossref" || k == "xdata"
|
||
then do
|
||
xs <- mapM (getXrefFields isBibtex baseEntry entries)
|
||
(splitKeys v)
|
||
(x, y) <- xs
|
||
guard $ isNothing $ lookup x $ fields xrefEntry
|
||
return (x, y)
|
||
else do
|
||
k' <- if isBibtex
|
||
then return k
|
||
else transformKey (entryType xrefEntry) (entryType baseEntry) k
|
||
guard $ isNothing $ lookup k' $ fields baseEntry
|
||
return (k',v)
|
||
|
||
resolveCrossRef :: Bool -> [Item] -> Item -> Item
|
||
resolveCrossRef isBibtex entries entry = foldl go entry (fields entry)
|
||
where go entry' (key, val) =
|
||
if key == "crossref" || key == "xdata"
|
||
then entry'{ fields = fields entry' ++
|
||
getXrefFields isBibtex entry entries val }
|
||
else entry'
|
||
|
||
-- transformKey source target key
|
||
-- derived from Appendix C of bibtex manual
|
||
transformKey :: String -> String -> String -> [String]
|
||
transformKey _ _ "ids" = []
|
||
transformKey _ _ "crossref" = []
|
||
transformKey _ _ "xref" = []
|
||
transformKey _ _ "entryset" = []
|
||
transformKey _ _ "entrysubtype" = []
|
||
transformKey _ _ "execute" = []
|
||
transformKey _ _ "label" = []
|
||
transformKey _ _ "options" = []
|
||
transformKey _ _ "presort" = []
|
||
transformKey _ _ "related" = []
|
||
transformKey _ _ "relatedoptions" = []
|
||
transformKey _ _ "relatedstring" = []
|
||
transformKey _ _ "relatedtype" = []
|
||
transformKey _ _ "shorthand" = []
|
||
transformKey _ _ "shorthandintro" = []
|
||
transformKey _ _ "sortkey" = []
|
||
transformKey x y "author"
|
||
| x `elem` ["mvbook", "book"] &&
|
||
y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
|
||
-- note: this next clause is not in the biblatex manual, but it makes
|
||
-- sense in the context of CSL conversion:
|
||
transformKey x y "author"
|
||
| x == "mvbook" && y == "book" = ["bookauthor", "author"]
|
||
transformKey "mvbook" y z
|
||
| y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
|
||
transformKey x y z
|
||
| x `elem` ["mvcollection", "mvreference"] &&
|
||
y `elem` ["collection", "reference", "incollection", "inreference",
|
||
"suppcollection"] = standardTrans z
|
||
transformKey "mvproceedings" y z
|
||
| y `elem` ["proceedings", "inproceedings"] = standardTrans z
|
||
transformKey "book" y z
|
||
| y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
|
||
transformKey x y z
|
||
| x `elem` ["collection", "reference"] &&
|
||
y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
|
||
transformKey "proceedings" "inproceedings" z = bookTrans z
|
||
transformKey "periodical" y z
|
||
| y `elem` ["article", "suppperiodical"] =
|
||
case z of
|
||
"title" -> ["journaltitle"]
|
||
"subtitle" -> ["journalsubtitle"]
|
||
"shorttitle" -> []
|
||
"sorttitle" -> []
|
||
"indextitle" -> []
|
||
"indexsorttitle" -> []
|
||
_ -> [z]
|
||
transformKey _ _ x = [x]
|
||
|
||
standardTrans :: String -> [String]
|
||
standardTrans z =
|
||
case z of
|
||
"title" -> ["maintitle"]
|
||
"subtitle" -> ["mainsubtitle"]
|
||
"titleaddon" -> ["maintitleaddon"]
|
||
"shorttitle" -> []
|
||
"sorttitle" -> []
|
||
"indextitle" -> []
|
||
"indexsorttitle" -> []
|
||
_ -> [z]
|
||
|
||
bookTrans :: String -> [String]
|
||
bookTrans z =
|
||
case z of
|
||
"title" -> ["booktitle"]
|
||
"subtitle" -> ["booksubtitle"]
|
||
"titleaddon" -> ["booktitleaddon"]
|
||
"shorttitle" -> []
|
||
"sorttitle" -> []
|
||
"indextitle" -> []
|
||
"indexsorttitle" -> []
|
||
_ -> [z]
|
||
|
||
data Lang = Lang String String -- e.g. "en" "US"
|
||
|
||
langToLocale :: Lang -> String
|
||
langToLocale (Lang x y) = x ++ ('-':y)
|
||
|
||
resolveKey :: Lang -> Formatted -> Formatted
|
||
resolveKey lang (Formatted ils) = Formatted (Walk.walk go ils)
|
||
where go (Str s) = Str $ T.pack $ resolveKey' lang $ T.unpack s
|
||
go x = x
|
||
|
||
resolveKey' :: Lang -> String -> String
|
||
resolveKey' (Lang "en" "US") k =
|
||
case map toLower k of
|
||
"inpreparation" -> "in preparation"
|
||
"submitted" -> "submitted"
|
||
"forthcoming" -> "forthcoming"
|
||
"inpress" -> "in press"
|
||
"prepublished" -> "pre-published"
|
||
"mathesis" -> "Master’s thesis"
|
||
"phdthesis" -> "PhD thesis"
|
||
"candthesis" -> "Candidate thesis"
|
||
"techreport" -> "technical report"
|
||
"resreport" -> "research report"
|
||
"software" -> "computer software"
|
||
"datacd" -> "data CD"
|
||
"audiocd" -> "audio CD"
|
||
"patent" -> "patent"
|
||
"patentde" -> "German patent"
|
||
"patenteu" -> "European patent"
|
||
"patentfr" -> "French patent"
|
||
"patentuk" -> "British patent"
|
||
"patentus" -> "U.S. patent"
|
||
"patreq" -> "patent request"
|
||
"patreqde" -> "German patent request"
|
||
"patreqeu" -> "European patent request"
|
||
"patreqfr" -> "French patent request"
|
||
"patrequk" -> "British patent request"
|
||
"patrequs" -> "U.S. patent request"
|
||
"countryde" -> "Germany"
|
||
"countryeu" -> "European Union"
|
||
"countryep" -> "European Union"
|
||
"countryfr" -> "France"
|
||
"countryuk" -> "United Kingdom"
|
||
"countryus" -> "United States of America"
|
||
"newseries" -> "new series"
|
||
"oldseries" -> "old series"
|
||
_ -> k
|
||
resolveKey' _ k = resolveKey' (Lang "en" "US") k
|
||
|
||
parseMonth :: String -> String
|
||
parseMonth "jan" = "1"
|
||
parseMonth "feb" = "2"
|
||
parseMonth "mar" = "3"
|
||
parseMonth "apr" = "4"
|
||
parseMonth "may" = "5"
|
||
parseMonth "jun" = "6"
|
||
parseMonth "jul" = "7"
|
||
parseMonth "aug" = "8"
|
||
parseMonth "sep" = "9"
|
||
parseMonth "oct" = "10"
|
||
parseMonth "nov" = "11"
|
||
parseMonth "dec" = "12"
|
||
parseMonth x = x
|