justin-site/app/Bibtex.hs

{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
{-# LANGUAGE OverloadedStrings #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.CSL.Input.Bibtex
-- Copyright   :  (c) John MacFarlane
-- License     :  BSD-style (see LICENSE)
--
-- Maintainer  :  John MacFarlane <fiddlosopher@gmail.com>
-- Stability   :  unstable-- Portability :  unportable
--
-- Modified    :  JH: Use just the bibtex parser
-----------------------------------------------------------------------------

module Bibtex
    (
      bibEntries
    , resolveCrossRefs
    , Item(..)
    )
    where

import Text.Parsec hiding (optional, (<|>), many, State)
import Control.Applicative
import Text.Pandoc
import Data.List.Split (splitOn, splitWhen, wordsBy)
import Data.List (intercalate)
import Data.Maybe
import Data.Char (toLower, isUpper, toUpper, isDigit, isAlphaNum)
import Control.Monad
import Control.Monad.RWS
import System.Environment (getEnvironment)
import Text.CSL.Style (Formatted(..), Locale(..), CslTerm(..), Agent(..))
import qualified Text.Pandoc.Walk as Walk
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Text as T

data Item = Item{ identifier :: String
                , entryType  :: String
                , fields     :: [(String, String)]
                }

type BibParser = Parsec [Char] [(String, String)]

bibEntries :: BibParser [Item]
bibEntries = many (try (skipMany nonEntry >> bibItem)) <* skipMany nonEntry
  where nonEntry = bibSkip <|> bibComment <|> bibPreamble <|> bibString

bibSkip :: BibParser ()
bibSkip = skipMany1 (satisfy (/='@'))

bibComment :: BibParser ()
bibComment = try $ do
  char '@'
  cistring "comment"
  skipMany (satisfy (/='\n'))

bibPreamble :: BibParser ()
bibPreamble = try $ do
  char '@'
  cistring "preamble"
  spaces
  void inBraces
  return ()

bibString :: BibParser ()
bibString = try $ do
  char '@'
  cistring "string"
  spaces
  char '{'
  spaces
  f <- entField
  spaces
  char '}'
  updateState $ (f:)
  return ()

inBraces :: BibParser String
inBraces = try $ do
  char '{'
  res <- manyTill
         (  many1 (noneOf "{}\\")
        <|> (char '\\' >> (  (char '{' >> return "\\{")
                         <|> (char '}' >> return "\\}")
                         <|> return "\\"))
        <|> (braced <$> inBraces)
         ) (char '}')
  return $ concat res

braced :: String -> String
braced s = "{" ++ s ++ "}"

inQuotes :: BibParser String
inQuotes = do
  char '"'
  concat <$> manyTill (  many1 (noneOf "\"\\{")
                     <|> (char '\\' >> (\c -> ['\\',c]) <$> anyChar)
                     <|> braced <$> inBraces
                      ) (char '"')

fieldName :: BibParser String
fieldName = (map toLower) <$> many1 (letter <|> digit <|> oneOf "-_")

isBibtexKeyChar :: Char -> Bool
isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String)

bibItem :: BibParser Item
bibItem = do
  char '@'
  enttype <- map toLower <$> many1 letter
  spaces
  char '{'
  spaces
  entid <- many (satisfy isBibtexKeyChar)
  spaces
  char ','
  spaces
  entfields <- entField `sepEndBy` (char ',')
  spaces
  char '}'
  return $ Item entid enttype entfields

entField :: BibParser (String, String)
entField = try $ do
  spaces
  k <- fieldName
  spaces
  char '='
  spaces
  vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
            (try $ spaces >> char '#' >> spaces)
  spaces
  return (k, concat vs)

rawWord :: BibParser String
rawWord = many1 alphaNum

expandString :: BibParser String
expandString = do
  k <- fieldName
  strs <- getState
  case lookup k strs of
       Just v  -> return v
       Nothing -> return k -- return raw key if not found

cistring :: String -> BibParser String
cistring [] = return []
cistring (c:cs) = do
  x <- (char (toLower c) <|> char (toUpper c))
  xs <- cistring cs
  return (x:xs)

resolveCrossRefs :: Bool -> [Item] -> [Item]
resolveCrossRefs isBibtex entries =
  map (resolveCrossRef isBibtex entries) entries

splitKeys :: String -> [String]
splitKeys = wordsBy (\c -> c == ' ' || c == ',')

getXrefFields :: Bool -> Item -> [Item] -> String -> [(String, String)]
getXrefFields isBibtex baseEntry entries keys = do
  let keys' = splitKeys keys
  xrefEntry <- [e | e <- entries, identifier e `elem` keys']
  (k, v) <- fields xrefEntry
  if k == "crossref" || k == "xdata"
     then do
       xs <- mapM (getXrefFields isBibtex baseEntry entries)
                   (splitKeys v)
       (x, y) <- xs
       guard $ isNothing $ lookup x $ fields xrefEntry
       return (x, y)
     else do
       k' <- if isBibtex
                then return k
                else transformKey (entryType xrefEntry) (entryType baseEntry) k
       guard $ isNothing $ lookup k' $ fields baseEntry
       return (k',v)

resolveCrossRef :: Bool -> [Item] -> Item -> Item
resolveCrossRef isBibtex entries entry = foldl go entry (fields entry)
  where go entry' (key, val) =
          if key == "crossref" || key == "xdata"
          then entry'{ fields = fields entry' ++
                                    getXrefFields isBibtex entry entries val }
          else entry'

-- transformKey source target key
-- derived from Appendix C of bibtex manual
transformKey :: String -> String -> String -> [String]
transformKey _ _ "ids"            = []
transformKey _ _ "crossref"       = []
transformKey _ _ "xref"           = []
transformKey _ _ "entryset"       = []
transformKey _ _ "entrysubtype"   = []
transformKey _ _ "execute"        = []
transformKey _ _ "label"          = []
transformKey _ _ "options"        = []
transformKey _ _ "presort"        = []
transformKey _ _ "related"        = []
transformKey _ _ "relatedoptions" = []
transformKey _ _ "relatedstring"  = []
transformKey _ _ "relatedtype"    = []
transformKey _ _ "shorthand"      = []
transformKey _ _ "shorthandintro" = []
transformKey _ _ "sortkey"        = []
transformKey x y "author"
  | x `elem` ["mvbook", "book"] &&
    y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
-- note: this next clause is not in the biblatex manual, but it makes
-- sense in the context of CSL conversion:
transformKey x y "author"
  | x == "mvbook" && y == "book" = ["bookauthor", "author"]
transformKey "mvbook" y z
  | y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
transformKey x y z
  | x `elem` ["mvcollection", "mvreference"] &&
    y `elem` ["collection", "reference", "incollection", "inreference",
               "suppcollection"] = standardTrans z
transformKey "mvproceedings" y z
  | y `elem` ["proceedings", "inproceedings"] = standardTrans z
transformKey "book" y z
  | y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
transformKey x y z
  | x `elem` ["collection", "reference"] &&
    y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
transformKey "proceedings" "inproceedings" z = bookTrans z
transformKey "periodical" y z
  | y `elem` ["article", "suppperiodical"] =
  case z of
       "title"          -> ["journaltitle"]
       "subtitle"       -> ["journalsubtitle"]
       "shorttitle"     -> []
       "sorttitle"      -> []
       "indextitle"     -> []
       "indexsorttitle" -> []
       _                -> [z]
transformKey _ _ x                = [x]

standardTrans :: String -> [String]
standardTrans z =
  case z of
       "title"          -> ["maintitle"]
       "subtitle"       -> ["mainsubtitle"]
       "titleaddon"     -> ["maintitleaddon"]
       "shorttitle"     -> []
       "sorttitle"      -> []
       "indextitle"     -> []
       "indexsorttitle" -> []
       _                -> [z]

bookTrans :: String -> [String]
bookTrans z =
  case z of
       "title"          -> ["booktitle"]
       "subtitle"       -> ["booksubtitle"]
       "titleaddon"     -> ["booktitleaddon"]
       "shorttitle"     -> []
       "sorttitle"      -> []
       "indextitle"     -> []
       "indexsorttitle" -> []
       _                -> [z]

data Lang = Lang String String  -- e.g. "en" "US"

langToLocale :: Lang -> String
langToLocale (Lang x y) = x ++ ('-':y)

resolveKey :: Lang -> Formatted -> Formatted
resolveKey lang (Formatted ils) = Formatted (Walk.walk go ils)
  where go (Str s) = Str $ T.pack $ resolveKey' lang $ T.unpack s
        go x       = x

resolveKey' :: Lang -> String -> String
resolveKey' (Lang "en" "US") k =
  case map toLower k of
       "inpreparation" -> "in preparation"
       "submitted"     -> "submitted"
       "forthcoming"   -> "forthcoming"
       "inpress"       -> "in press"
       "prepublished"  -> "pre-published"
       "mathesis"      -> "Master’s thesis"
       "phdthesis"     -> "PhD thesis"
       "candthesis"    -> "Candidate thesis"
       "techreport"    -> "technical report"
       "resreport"     -> "research report"
       "software"      -> "computer software"
       "datacd"        -> "data CD"
       "audiocd"       -> "audio CD"
       "patent"        -> "patent"
       "patentde"      -> "German patent"
       "patenteu"      -> "European patent"
       "patentfr"      -> "French patent"
       "patentuk"      -> "British patent"
       "patentus"      -> "U.S. patent"
       "patreq"        -> "patent request"
       "patreqde"      -> "German patent request"
       "patreqeu"      -> "European patent request"
       "patreqfr"      -> "French patent request"
       "patrequk"      -> "British patent request"
       "patrequs"      -> "U.S. patent request"
       "countryde"     -> "Germany"
       "countryeu"     -> "European Union"
       "countryep"     -> "European Union"
       "countryfr"     -> "France"
       "countryuk"     -> "United Kingdom"
       "countryus"     -> "United States of America"
       "newseries"     -> "new series"
       "oldseries"     -> "old series"
       _               -> k
resolveKey' _ k = resolveKey' (Lang "en" "US") k

parseMonth :: String -> String
parseMonth "jan" = "1"
parseMonth "feb" = "2"
parseMonth "mar" = "3"
parseMonth "apr" = "4"
parseMonth "may" = "5"
parseMonth "jun" = "6"
parseMonth "jul" = "7"
parseMonth "aug" = "8"
parseMonth "sep" = "9"
parseMonth "oct" = "10"
parseMonth "nov" = "11"
parseMonth "dec" = "12"
parseMonth x     = x
-												Initial commit.

Hakyll site, generates bib from bibtex.

											
										
										
											2015-01-03 22:23:27 +00:00
+								{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
-												Switch to cabal builds.

No more stackage.

											
										
										
											2021-01-03 18:31:52 +00:00
+								{-# LANGUAGE OverloadedStrings #-}
-												Initial commit.

Hakyll site, generates bib from bibtex.

											
										
										
											2015-01-03 22:23:27 +00:00
+								-----------------------------------------------------------------------------
 								-- |
 								-- Module      :  Text.CSL.Input.Bibtex
 								-- Copyright   :  (c) John MacFarlane
 								-- License     :  BSD-style (see LICENSE)
 								--
 								-- Maintainer  :  John MacFarlane <fiddlosopher@gmail.com>
 								-- Stability   :  unstable-- Portability :  unportable
 								--
 								-- Modified    :  JH: Use just the bibtex parser
 								-----------------------------------------------------------------------------
-												Initial version of site almost ready.

											
										
										
											2015-01-05 04:43:45 +00:00
+								module Bibtex
-												Initial commit.

Hakyll site, generates bib from bibtex.

											
										
										
											2015-01-03 22:23:27 +00:00
+								    (
 								      bibEntries
 								    , resolveCrossRefs
 								    , Item(..)
 								    )
 								    where
 								import Text.Parsec hiding (optional, (<|>), many, State)
 								import Control.Applicative
 								import Text.Pandoc
 								import Data.List.Split (splitOn, splitWhen, wordsBy)
 								import Data.List (intercalate)
 								import Data.Maybe
 								import Data.Char (toLower, isUpper, toUpper, isDigit, isAlphaNum)
 								import Control.Monad
 								import Control.Monad.RWS
 								import System.Environment (getEnvironment)
 								import Text.CSL.Style (Formatted(..), Locale(..), CslTerm(..), Agent(..))
 								import qualified Text.Pandoc.Walk as Walk
 								import qualified Text.Pandoc.UTF8 as UTF8
-												Switch to cabal builds.

No more stackage.

											
										
										
											2021-01-03 18:31:52 +00:00
+								import qualified Data.Text as T
-												Initial commit.

Hakyll site, generates bib from bibtex.

											
										
										
											2015-01-03 22:23:27 +00:00
 								data Item = Item{ identifier :: String
 								                , entryType  :: String
 								                , fields     :: [(String, String)]
 								                }
 								type BibParser = Parsec [Char] [(String, String)]
 								bibEntries :: BibParser [Item]
 								bibEntries = many (try (skipMany nonEntry >> bibItem)) <* skipMany nonEntry
 								  where nonEntry = bibSkip <|> bibComment <|> bibPreamble <|> bibString
 								bibSkip :: BibParser ()
 								bibSkip = skipMany1 (satisfy (/='@'))
 								bibComment :: BibParser ()
 								bibComment = try $ do
 								  char '@'
 								  cistring "comment"
 								  skipMany (satisfy (/='\n'))
 								bibPreamble :: BibParser ()
 								bibPreamble = try $ do
 								  char '@'
 								  cistring "preamble"
 								  spaces
 								  void inBraces
 								  return ()
 								bibString :: BibParser ()
 								bibString = try $ do
 								  char '@'
 								  cistring "string"
 								  spaces
 								  char '{'
 								  spaces
 								  f <- entField
 								  spaces
 								  char '}'
 								  updateState $ (f:)
 								  return ()
 								inBraces :: BibParser String
 								inBraces = try $ do
 								  char '{'
 								  res <- manyTill
 								         (  many1 (noneOf "{}\\")
 								        <|> (char '\\' >> (  (char '{' >> return "\\{")
 								                         <|> (char '}' >> return "\\}")
 								                         <|> return "\\"))
 								        <|> (braced <$> inBraces)
 								         ) (char '}')
 								  return $ concat res
 								braced :: String -> String
 								braced s = "{" ++ s ++ "}"
 								inQuotes :: BibParser String
 								inQuotes = do
 								  char '"'
 								  concat <$> manyTill (  many1 (noneOf "\"\\{")
 								                     <|> (char '\\' >> (\c -> ['\\',c]) <$> anyChar)
 								                     <|> braced <$> inBraces
 								                      ) (char '"')
 								fieldName :: BibParser String
 								fieldName = (map toLower) <$> many1 (letter <|> digit <|> oneOf "-_")
 								isBibtexKeyChar :: Char -> Bool
-												Switch to cabal builds.

No more stackage.

											
										
										
											2021-01-03 18:31:52 +00:00
+								isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String)
-												Initial commit.

Hakyll site, generates bib from bibtex.

											
										
										
											2015-01-03 22:23:27 +00:00
 								bibItem :: BibParser Item
 								bibItem = do
 								  char '@'
 								  enttype <- map toLower <$> many1 letter
 								  spaces
 								  char '{'
 								  spaces
 								  entid <- many (satisfy isBibtexKeyChar)
 								  spaces
 								  char ','
 								  spaces
 								  entfields <- entField `sepEndBy` (char ',')
 								  spaces
 								  char '}'
 								  return $ Item entid enttype entfields
 								entField :: BibParser (String, String)
 								entField = try $ do
 								  spaces
 								  k <- fieldName
 								  spaces
 								  char '='
 								  spaces
 								  vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
 								            (try $ spaces >> char '#' >> spaces)
 								  spaces
 								  return (k, concat vs)
 								rawWord :: BibParser String
 								rawWord = many1 alphaNum
 								expandString :: BibParser String
 								expandString = do
 								  k <- fieldName
 								  strs <- getState
 								  case lookup k strs of
 								       Just v  -> return v
 								       Nothing -> return k -- return raw key if not found
 								cistring :: String -> BibParser String
 								cistring [] = return []
 								cistring (c:cs) = do
 								  x <- (char (toLower c) <|> char (toUpper c))
 								  xs <- cistring cs
 								  return (x:xs)
 								resolveCrossRefs :: Bool -> [Item] -> [Item]
 								resolveCrossRefs isBibtex entries =
 								  map (resolveCrossRef isBibtex entries) entries
 								splitKeys :: String -> [String]
 								splitKeys = wordsBy (\c -> c == ' ' || c == ',')
 								getXrefFields :: Bool -> Item -> [Item] -> String -> [(String, String)]
 								getXrefFields isBibtex baseEntry entries keys = do
 								  let keys' = splitKeys keys
 								  xrefEntry <- [e | e <- entries, identifier e `elem` keys']
 								  (k, v) <- fields xrefEntry
 								  if k == "crossref" || k == "xdata"
 								     then do
 								       xs <- mapM (getXrefFields isBibtex baseEntry entries)
 								                   (splitKeys v)
 								       (x, y) <- xs
 								       guard $ isNothing $ lookup x $ fields xrefEntry
 								       return (x, y)
 								     else do
 								       k' <- if isBibtex
 								                then return k
 								                else transformKey (entryType xrefEntry) (entryType baseEntry) k
 								       guard $ isNothing $ lookup k' $ fields baseEntry
 								       return (k',v)
 								resolveCrossRef :: Bool -> [Item] -> Item -> Item
 								resolveCrossRef isBibtex entries entry = foldl go entry (fields entry)
 								  where go entry' (key, val) =
 								          if key == "crossref" || key == "xdata"
 								          then entry'{ fields = fields entry' ++
 								                                    getXrefFields isBibtex entry entries val }
 								          else entry'
 								-- transformKey source target key
 								-- derived from Appendix C of bibtex manual
 								transformKey :: String -> String -> String -> [String]
 								transformKey _ _ "ids"            = []
 								transformKey _ _ "crossref"       = []
 								transformKey _ _ "xref"           = []
 								transformKey _ _ "entryset"       = []
 								transformKey _ _ "entrysubtype"   = []
 								transformKey _ _ "execute"        = []
 								transformKey _ _ "label"          = []
 								transformKey _ _ "options"        = []
 								transformKey _ _ "presort"        = []
 								transformKey _ _ "related"        = []
 								transformKey _ _ "relatedoptions" = []
 								transformKey _ _ "relatedstring"  = []
 								transformKey _ _ "relatedtype"    = []
 								transformKey _ _ "shorthand"      = []
 								transformKey _ _ "shorthandintro" = []
 								transformKey _ _ "sortkey"        = []
 								transformKey x y "author"
 								  | x `elem` ["mvbook", "book"] &&
 								    y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
 								-- note: this next clause is not in the biblatex manual, but it makes
 								-- sense in the context of CSL conversion:
 								transformKey x y "author"
 								  | x == "mvbook" && y == "book" = ["bookauthor", "author"]
 								transformKey "mvbook" y z
 								  | y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
 								transformKey x y z
 								  | x `elem` ["mvcollection", "mvreference"] &&
 								    y `elem` ["collection", "reference", "incollection", "inreference",
 								               "suppcollection"] = standardTrans z
 								transformKey "mvproceedings" y z
 								  | y `elem` ["proceedings", "inproceedings"] = standardTrans z
 								transformKey "book" y z
 								  | y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
 								transformKey x y z
 								  | x `elem` ["collection", "reference"] &&
 								    y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
 								transformKey "proceedings" "inproceedings" z = bookTrans z
 								transformKey "periodical" y z
 								  | y `elem` ["article", "suppperiodical"] =
 								  case z of
 								       "title"          -> ["journaltitle"]
 								       "subtitle"       -> ["journalsubtitle"]
 								       "shorttitle"     -> []
 								       "sorttitle"      -> []
 								       "indextitle"     -> []
 								       "indexsorttitle" -> []
 								       _                -> [z]
 								transformKey _ _ x                = [x]
 								standardTrans :: String -> [String]
 								standardTrans z =
 								  case z of
 								       "title"          -> ["maintitle"]
 								       "subtitle"       -> ["mainsubtitle"]
 								       "titleaddon"     -> ["maintitleaddon"]
 								       "shorttitle"     -> []
 								       "sorttitle"      -> []
 								       "indextitle"     -> []
 								       "indexsorttitle" -> []
 								       _                -> [z]
 								bookTrans :: String -> [String]
 								bookTrans z =
 								  case z of
 								       "title"          -> ["booktitle"]
 								       "subtitle"       -> ["booksubtitle"]
 								       "titleaddon"     -> ["booktitleaddon"]
 								       "shorttitle"     -> []
 								       "sorttitle"      -> []
 								       "indextitle"     -> []
 								       "indexsorttitle" -> []
 								       _                -> [z]
 								data Lang = Lang String String  -- e.g. "en" "US"
 								langToLocale :: Lang -> String
 								langToLocale (Lang x y) = x ++ ('-':y)
 								resolveKey :: Lang -> Formatted -> Formatted
 								resolveKey lang (Formatted ils) = Formatted (Walk.walk go ils)
-												Switch to cabal builds.

No more stackage.

											
										
										
											2021-01-03 18:31:52 +00:00
+								  where go (Str s) = Str $ T.pack $ resolveKey' lang $ T.unpack s
-												Initial commit.

Hakyll site, generates bib from bibtex.

											
										
										
											2015-01-03 22:23:27 +00:00
+								        go x       = x
 								resolveKey' :: Lang -> String -> String
 								resolveKey' (Lang "en" "US") k =
 								  case map toLower k of
 								       "inpreparation" -> "in preparation"
 								       "submitted"     -> "submitted"
 								       "forthcoming"   -> "forthcoming"
 								       "inpress"       -> "in press"
 								       "prepublished"  -> "pre-published"
 								       "mathesis"      -> "Master’s thesis"
 								       "phdthesis"     -> "PhD thesis"
 								       "candthesis"    -> "Candidate thesis"
 								       "techreport"    -> "technical report"
 								       "resreport"     -> "research report"
 								       "software"      -> "computer software"
 								       "datacd"        -> "data CD"
 								       "audiocd"       -> "audio CD"
 								       "patent"        -> "patent"
 								       "patentde"      -> "German patent"
 								       "patenteu"      -> "European patent"
 								       "patentfr"      -> "French patent"
 								       "patentuk"      -> "British patent"
 								       "patentus"      -> "U.S. patent"
 								       "patreq"        -> "patent request"
 								       "patreqde"      -> "German patent request"
 								       "patreqeu"      -> "European patent request"
 								       "patreqfr"      -> "French patent request"
 								       "patrequk"      -> "British patent request"
 								       "patrequs"      -> "U.S. patent request"
 								       "countryde"     -> "Germany"
 								       "countryeu"     -> "European Union"
 								       "countryep"     -> "European Union"
 								       "countryfr"     -> "France"
 								       "countryuk"     -> "United Kingdom"
 								       "countryus"     -> "United States of America"
 								       "newseries"     -> "new series"
 								       "oldseries"     -> "old series"
 								       _               -> k
 								resolveKey' _ k = resolveKey' (Lang "en" "US") k
 								parseMonth :: String -> String
 								parseMonth "jan" = "1"
 								parseMonth "feb" = "2"
 								parseMonth "mar" = "3"
 								parseMonth "apr" = "4"
 								parseMonth "may" = "5"
 								parseMonth "jun" = "6"
 								parseMonth "jul" = "7"
 								parseMonth "aug" = "8"
 								parseMonth "sep" = "9"
 								parseMonth "oct" = "10"
 								parseMonth "nov" = "11"
 								parseMonth "dec" = "12"
 								parseMonth x     = x