Move to Stackage.

No more cabal sandbox.
2018-02-17 18:16:38 +00:00
parent 4c400d62bc
commit 58491440ce
9 changed files with 158 additions and 14 deletions
@@ -0,0 +1,324 @@
+{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Text.CSL.Input.Bibtex
+-- Copyright   :  (c) John MacFarlane
+-- License     :  BSD-style (see LICENSE)
+--
+-- Maintainer  :  John MacFarlane <fiddlosopher@gmail.com>
+-- Stability   :  unstable-- Portability :  unportable
+--
+-- Modified    :  JH: Use just the bibtex parser
+-----------------------------------------------------------------------------
+
+module Bibtex
+    (
+      bibEntries
+    , resolveCrossRefs
+    , Item(..)
+    )
+    where
+
+import Text.Parsec hiding (optional, (<|>), many, State)
+import Control.Applicative
+import Text.Pandoc
+import Data.List.Split (splitOn, splitWhen, wordsBy)
+import Data.List (intercalate)
+import Data.Maybe
+import Data.Char (toLower, isUpper, toUpper, isDigit, isAlphaNum)
+import Control.Monad
+import Control.Monad.RWS
+import System.Environment (getEnvironment)
+import Text.CSL.Style (Formatted(..), Locale(..), CslTerm(..), Agent(..))
+import qualified Text.Pandoc.Walk as Walk
+import qualified Text.Pandoc.UTF8 as UTF8
+
+data Item = Item{ identifier :: String
+                , entryType  :: String
+                , fields     :: [(String, String)]
+                }
+
+type BibParser = Parsec [Char] [(String, String)]
+
+bibEntries :: BibParser [Item]
+bibEntries = many (try (skipMany nonEntry >> bibItem)) <* skipMany nonEntry
+  where nonEntry = bibSkip <|> bibComment <|> bibPreamble <|> bibString
+
+bibSkip :: BibParser ()
+bibSkip = skipMany1 (satisfy (/='@'))
+
+bibComment :: BibParser ()
+bibComment = try $ do
+  char '@'
+  cistring "comment"
+  skipMany (satisfy (/='\n'))
+
+bibPreamble :: BibParser ()
+bibPreamble = try $ do
+  char '@'
+  cistring "preamble"
+  spaces
+  void inBraces
+  return ()
+
+bibString :: BibParser ()
+bibString = try $ do
+  char '@'
+  cistring "string"
+  spaces
+  char '{'
+  spaces
+  f <- entField
+  spaces
+  char '}'
+  updateState $ (f:)
+  return ()
+
+inBraces :: BibParser String
+inBraces = try $ do
+  char '{'
+  res <- manyTill
+         (  many1 (noneOf "{}\\")
+        <|> (char '\\' >> (  (char '{' >> return "\\{")
+                         <|> (char '}' >> return "\\}")
+                         <|> return "\\"))
+        <|> (braced <$> inBraces)
+         ) (char '}')
+  return $ concat res
+
+braced :: String -> String
+braced s = "{" ++ s ++ "}"
+
+inQuotes :: BibParser String
+inQuotes = do
+  char '"'
+  concat <$> manyTill (  many1 (noneOf "\"\\{")
+                     <|> (char '\\' >> (\c -> ['\\',c]) <$> anyChar)
+                     <|> braced <$> inBraces
+                      ) (char '"')
+
+fieldName :: BibParser String
+fieldName = (map toLower) <$> many1 (letter <|> digit <|> oneOf "-_")
+
+isBibtexKeyChar :: Char -> Bool
+isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
+
+bibItem :: BibParser Item
+bibItem = do
+  char '@'
+  enttype <- map toLower <$> many1 letter
+  spaces
+  char '{'
+  spaces
+  entid <- many (satisfy isBibtexKeyChar)
+  spaces
+  char ','
+  spaces
+  entfields <- entField `sepEndBy` (char ',')
+  spaces
+  char '}'
+  return $ Item entid enttype entfields
+
+entField :: BibParser (String, String)
+entField = try $ do
+  spaces
+  k <- fieldName
+  spaces
+  char '='
+  spaces
+  vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
+            (try $ spaces >> char '#' >> spaces)
+  spaces
+  return (k, concat vs)
+
+rawWord :: BibParser String
+rawWord = many1 alphaNum
+
+expandString :: BibParser String
+expandString = do
+  k <- fieldName
+  strs <- getState
+  case lookup k strs of
+       Just v  -> return v
+       Nothing -> return k -- return raw key if not found
+
+cistring :: String -> BibParser String
+cistring [] = return []
+cistring (c:cs) = do
+  x <- (char (toLower c) <|> char (toUpper c))
+  xs <- cistring cs
+  return (x:xs)
+
+resolveCrossRefs :: Bool -> [Item] -> [Item]
+resolveCrossRefs isBibtex entries =
+  map (resolveCrossRef isBibtex entries) entries
+
+splitKeys :: String -> [String]
+splitKeys = wordsBy (\c -> c == ' ' || c == ',')
+
+getXrefFields :: Bool -> Item -> [Item] -> String -> [(String, String)]
+getXrefFields isBibtex baseEntry entries keys = do
+  let keys' = splitKeys keys
+  xrefEntry <- [e | e <- entries, identifier e `elem` keys']
+  (k, v) <- fields xrefEntry
+  if k == "crossref" || k == "xdata"
+     then do
+       xs <- mapM (getXrefFields isBibtex baseEntry entries)
+                   (splitKeys v)
+       (x, y) <- xs
+       guard $ isNothing $ lookup x $ fields xrefEntry
+       return (x, y)
+     else do
+       k' <- if isBibtex
+                then return k
+                else transformKey (entryType xrefEntry) (entryType baseEntry) k
+       guard $ isNothing $ lookup k' $ fields baseEntry
+       return (k',v)
+
+resolveCrossRef :: Bool -> [Item] -> Item -> Item
+resolveCrossRef isBibtex entries entry = foldl go entry (fields entry)
+  where go entry' (key, val) =
+          if key == "crossref" || key == "xdata"
+          then entry'{ fields = fields entry' ++
+                                    getXrefFields isBibtex entry entries val }
+          else entry'
+
+-- transformKey source target key
+-- derived from Appendix C of bibtex manual
+transformKey :: String -> String -> String -> [String]
+transformKey _ _ "ids"            = []
+transformKey _ _ "crossref"       = []
+transformKey _ _ "xref"           = []
+transformKey _ _ "entryset"       = []
+transformKey _ _ "entrysubtype"   = []
+transformKey _ _ "execute"        = []
+transformKey _ _ "label"          = []
+transformKey _ _ "options"        = []
+transformKey _ _ "presort"        = []
+transformKey _ _ "related"        = []
+transformKey _ _ "relatedoptions" = []
+transformKey _ _ "relatedstring"  = []
+transformKey _ _ "relatedtype"    = []
+transformKey _ _ "shorthand"      = []
+transformKey _ _ "shorthandintro" = []
+transformKey _ _ "sortkey"        = []
+transformKey x y "author"
+  | x `elem` ["mvbook", "book"] &&
+    y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
+-- note: this next clause is not in the biblatex manual, but it makes
+-- sense in the context of CSL conversion:
+transformKey x y "author"
+  | x == "mvbook" && y == "book" = ["bookauthor", "author"]
+transformKey "mvbook" y z
+  | y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
+transformKey x y z
+  | x `elem` ["mvcollection", "mvreference"] &&
+    y `elem` ["collection", "reference", "incollection", "inreference",
+               "suppcollection"] = standardTrans z
+transformKey "mvproceedings" y z
+  | y `elem` ["proceedings", "inproceedings"] = standardTrans z
+transformKey "book" y z
+  | y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
+transformKey x y z
+  | x `elem` ["collection", "reference"] &&
+    y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
+transformKey "proceedings" "inproceedings" z = bookTrans z
+transformKey "periodical" y z
+  | y `elem` ["article", "suppperiodical"] =
+  case z of
+       "title"          -> ["journaltitle"]
+       "subtitle"       -> ["journalsubtitle"]
+       "shorttitle"     -> []
+       "sorttitle"      -> []
+       "indextitle"     -> []
+       "indexsorttitle" -> []
+       _                -> [z]
+transformKey _ _ x                = [x]
+
+standardTrans :: String -> [String]
+standardTrans z =
+  case z of
+       "title"          -> ["maintitle"]
+       "subtitle"       -> ["mainsubtitle"]
+       "titleaddon"     -> ["maintitleaddon"]
+       "shorttitle"     -> []
+       "sorttitle"      -> []
+       "indextitle"     -> []
+       "indexsorttitle" -> []
+       _                -> [z]
+
+bookTrans :: String -> [String]
+bookTrans z =
+  case z of
+       "title"          -> ["booktitle"]
+       "subtitle"       -> ["booksubtitle"]
+       "titleaddon"     -> ["booktitleaddon"]
+       "shorttitle"     -> []
+       "sorttitle"      -> []
+       "indextitle"     -> []
+       "indexsorttitle" -> []
+       _                -> [z]
+
+data Lang = Lang String String  -- e.g. "en" "US"
+
+langToLocale :: Lang -> String
+langToLocale (Lang x y) = x ++ ('-':y)
+
+resolveKey :: Lang -> Formatted -> Formatted
+resolveKey lang (Formatted ils) = Formatted (Walk.walk go ils)
+  where go (Str s) = Str $ resolveKey' lang s
+        go x       = x
+
+resolveKey' :: Lang -> String -> String
+resolveKey' (Lang "en" "US") k =
+  case map toLower k of
+       "inpreparation" -> "in preparation"
+       "submitted"     -> "submitted"
+       "forthcoming"   -> "forthcoming"
+       "inpress"       -> "in press"
+       "prepublished"  -> "pre-published"
+       "mathesis"      -> "Master’s thesis"
+       "phdthesis"     -> "PhD thesis"
+       "candthesis"    -> "Candidate thesis"
+       "techreport"    -> "technical report"
+       "resreport"     -> "research report"
+       "software"      -> "computer software"
+       "datacd"        -> "data CD"
+       "audiocd"       -> "audio CD"
+       "patent"        -> "patent"
+       "patentde"      -> "German patent"
+       "patenteu"      -> "European patent"
+       "patentfr"      -> "French patent"
+       "patentuk"      -> "British patent"
+       "patentus"      -> "U.S. patent"
+       "patreq"        -> "patent request"
+       "patreqde"      -> "German patent request"
+       "patreqeu"      -> "European patent request"
+       "patreqfr"      -> "French patent request"
+       "patrequk"      -> "British patent request"
+       "patrequs"      -> "U.S. patent request"
+       "countryde"     -> "Germany"
+       "countryeu"     -> "European Union"
+       "countryep"     -> "European Union"
+       "countryfr"     -> "France"
+       "countryuk"     -> "United Kingdom"
+       "countryus"     -> "United States of America"
+       "newseries"     -> "new series"
+       "oldseries"     -> "old series"
+       _               -> k
+resolveKey' _ k = resolveKey' (Lang "en" "US") k
+
+parseMonth :: String -> String
+parseMonth "jan" = "1"
+parseMonth "feb" = "2"
+parseMonth "mar" = "3"
+parseMonth "apr" = "4"
+parseMonth "may" = "5"
+parseMonth "jun" = "6"
+parseMonth "jul" = "7"
+parseMonth "aug" = "8"
+parseMonth "sep" = "9"
+parseMonth "oct" = "10"
+parseMonth "nov" = "11"
+parseMonth "dec" = "12"
+parseMonth x     = x
@@ -0,0 +1,138 @@
+-- JH: From hakyll-bibtex by jaspervdj, using the Pandoc parser.
+--------------------------------------------------------------------------------
+{-# LANGUAGE DeriveDataTypeable         #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+module HakyllBibTex
+    ( BibEntry (..)
+    , bibEntryContext
+    , BibFile (..)
+    , bibFileEntryNames
+    , parseBibFile
+    , lookupBibEntry
+    ) where
+
+
+--------------------------------------------------------------------------------
+import           Control.Applicative
+import           Data.Binary              (Binary (..))
+import           Data.Char                (isSpace)
+import           Data.List.Split          (splitOn)
+import           Data.List                (intercalate)
+import qualified Data.Map                 as M (empty)
+import           Data.Typeable            (Typeable)
+import           Hakyll
+import qualified Bibtex                   as B
+import qualified Text.Pandoc              as Pandoc
+import qualified Text.Parsec              as Parsec
+
+--------------------------------------------------------------------------------
+newtype BibEntry = BE B.Item
+    deriving (Typeable)
+
+unwrapBE :: BibEntry -> B.Item
+unwrapBE (BE it) = it
+
+--------------------------------------------------------------------------------
+instance Binary BibEntry where
+    put (BE t) = do
+        put $ B.entryType t
+        put $ B.identifier t
+        put $ B.fields t
+
+    get = BE <$> (B.Item <$> get <*> get <*> get)
+
+--------------------------------------------------------------------------------
+-- XXX refactor to use Context monoid ? XXX
+bibEntryContext :: Context BibEntry
+bibEntryContext = Context $ \key _ item ->
+    let BE t = itemBody item
+    in case key of
+        "identifier" -> return $ StringField $ B.identifier t
+        _            -> case lookup key (B.fields t) of
+            Just val -> return $ StringField $ case key of
+              k
+                | k == "author" -> formatAuthors val
+                | k == "month"  -> expandMonth val
+                | k `elem` [ "title", "booktitle", "journal", "note" ] -> latexToHtml val
+                | otherwise -> val
+            Nothing  -> empty
+
+--------------------------------------------------------------------------------
+newtype BibFile = BibFile [BibEntry]
+    deriving (Binary, Typeable)
+
+
+--------------------------------------------------------------------------------
+bibFileEntryNames :: BibFile -> [String]
+bibFileEntryNames (BibFile es) = [B.identifier t | BE t <- es]
+
+
+--------------------------------------------------------------------------------
+parseBibFile :: String -> BibFile
+parseBibFile string = case Parsec.runParser (B.bibEntries <* Parsec.eof)
+                           [] "<bib file>" string of
+    Left err -> error $ show err
+    Right xs -> BibFile $ map BE (B.resolveCrossRefs True xs)
+
+
+--------------------------------------------------------------------------------
+lookupBibEntry :: String -> BibFile -> BibEntry
+lookupBibEntry name (BibFile es) =
+    case [BE t | BE t <- es, B.identifier t == name] of
+        []      -> error $ name ++ " not found in BibFile"
+        (x : _) -> x
+
+
+--------------------------------------------------------------------------------
+formatAuthors :: String -> String
+formatAuthors auth =
+  let auths = splitOn "and" auth
+      swapped = fmap formatAuthor auths 
+      finalSep = case (length auths) of
+          1 -> ""
+          2 -> " and&nbsp;"
+          _ -> ", and&nbsp;"
+      in
+      (intercalate ", " (init swapped)) ++ finalSep ++ last swapped
+
+--------------------------------------------------------------------------------
+formatAuthor :: String -> String
+formatAuthor = intercalate "&nbsp;"
+               . (splitOn " ")
+               . latexToHtml
+               . trimSpace
+               . (intercalate " ")
+               . reverse
+               . (splitOn ",")
+
+--------------------------------------------------------------------------------
+expandMonth :: String -> String
+expandMonth mon = case mon of
+    "jan" -> "January"
+    "feb" -> "February"
+    "mar" -> "March"
+    "apr" -> "April"
+    "may" -> "May"
+    "jun" -> "June"
+    "jul" -> "July"
+    "aug" -> "August"
+    "sep" -> "September"
+    "oct" -> "October"
+    "nov" -> "November"
+    "dec" -> "December"
+    x     -> x
+
+--------------------------------------------------------------------------------
+-- Inefficient...
+trimSpace :: String -> String
+trimSpace = f . f
+    where f = reverse . dropWhile isSpace
+
+--------------------------------------------------------------------------------
+latexToHtml tex =
+    let p = case Pandoc.readLaTeX Pandoc.def tex of
+                Right (Pandoc.Pandoc meta [Pandoc.Para para]) ->
+                    Pandoc.Pandoc meta [Pandoc.Plain para]
+                Left _                                        ->
+                    Pandoc.Pandoc (Pandoc.Meta M.empty) []
+    in Pandoc.writeHtmlString Pandoc.def p