Skip to content

Commit

Permalink
Add encoding support.
Browse files Browse the repository at this point in the history
  • Loading branch information
kohske committed Mar 6, 2014
1 parent 93645f8 commit 2434289
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 19 deletions.
19 changes: 18 additions & 1 deletion R/config.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,24 @@ slidifyDefaults <- function(){list(
get_config <- function(cfile = 'config.yml'){
config = slidifyDefaults()
if (file.exists(cfile)){
config = modifyList(config, yaml::yaml.load_file(cfile))
# @kohske
# yaml only accepts UTF8 (probably)
# so here,
# 1. read config in .input.enc
# 2. convert it into UTF8
# 3. load yaml
# 4. back the resutls into native.enc
txt = read_file(cfile)
txt = enc2utf8(txt)
config = modifyList(config, yaml.load(txt))
config = rapply(config, function(x) {
if (is.character(x)) {
Encoding(x) <- "UTF-8"
enc2native(x)
} else {
x
}
}, how = "replace")
}
return(config)
}
Expand Down
6 changes: 4 additions & 2 deletions R/parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@ parse_page <- function(postFile, knit_deck = TRUE, envir){
opts_chunk$set(fig.path = "assets/fig/", cache.path = '.cache/', cache = TRUE)
outputFile <- gsub(".[r|R]md", ".md", inputFile)
deckFile <- ifelse(knit_deck,
knit(inputFile, outputFile, envir = envir), inputFile)
knit(inputFile, outputFile, envir = envir, encoding = .input.enc), inputFile)
post <- deckFile %|% parse_deck
post$file = postFile
post$filename = tools:::file_path_sans_ext(inputFile)
if (!is.null(post$date)) {
post$date = as.Date(post$date, '%Y-%m-%d')
}
post$link = gsub("*.Rmd", ".html", post$file)
# @kohske
# shouldn't be like this?
post$link = gsub("\\.Rmd$", ".html", post$file, fixed = TRUE)
post$raw = read_file(inputFile)
# saveRDS(post, file = "_payload.rds")
})
Expand Down
28 changes: 27 additions & 1 deletion R/process.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,35 @@
#' @keywords internal
#' @noRd
to_deck <- function(doc){
# @kohske
# read_file cares encoding and return native.enc char.
# txt should also be native.enc.
txt = str_split_fixed(read_file(doc), '\n---', 2)
meta = yaml.load(gsub("^---\n+", '', txt[1]))

# @kohske
# Here txt is native.enc. Probably yaml.load accepts only UTF8 (and ascii?).
# In MBCS locale, gsub/sub sometimes returns char marked as UTF8
# (when fixed = FALSE), but sometimes not (if there is no match).
# So just in case, convert it into utf8 before yaml.load.
meta = yaml.load(enc2utf8(gsub("^---\n+", '', txt[1])))
# Then mark meta as UTF8 (because yaml.load doesn't this)
# and convert it to native.enc.
meta = rapply(meta, function(x) {
if (is.character(x)) {
Encoding(x) <- "UTF-8"
enc2native(x)
} else {
x
}
}, how = "replace")

# custom config also care encoding
# Note that if custom config is MBCS, it MUST be same encoding
# as input Rmd.
cfile = ifelse(is.null(meta$config), 'config.yml', meta$config)

# Now all texts are native.enc

deck = modifyList(get_config(cfile), c(meta, slides = txt[2]))
deck$standalone = ifelse(deck$mode == "standalone", TRUE, FALSE)
return(deck)
Expand Down
29 changes: 25 additions & 4 deletions R/render.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,36 @@ render_page <- function(page, payload, return_page = FALSE, save_payload = FALSE
if (save_payload){
save(layout, payload, partials, file = "payload.RData")
}
cat(whisker.render(layout, payload, partials = partials), file = outputFile)


# @kohske
# output file should be UTF-8 definietely (right?),
# because almost all libraries put meta tag indicating utf-8 charaset in HTML.
# So here the encoding is hard-coded.
# In future, it may be better to accept output encoding.
outp = whisker.render(layout, payload, partials = partials) # native.enc

# create standalone deck if page mode is standalone
if (page$mode == 'standalone'){
outputFile = make_standalone(page, outputFile)
outp = make_standalone(page, outp)
}

con <- file(outputFile, "w", encoding = "UTF8")
cat(outp, file = outputFile)
writeLines(outp, con)
close(con)

# @kohske
# why not make standalone before the file?
# I changed these order, but if there is reason
# please check it.
# create standalone deck if page mode is standalone

#if (page$mode == 'standalone'){
# outputFile = make_standalone(page, outputFile)
#}

# Extract R Code from Page if purl = TRUE
if (page$purl %?=% TRUE) purl(page$file)
if (page$purl %?=% TRUE) purl(page$file, encoding = .input.enc)
})
if (return_page){ return(page) }
}
Expand Down
32 changes: 29 additions & 3 deletions R/slidify.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,39 @@
#' @param knit_deck whether the file needs to be run through knit
#' @param return_page should the function return the payload
#' @param save_payload should the payload be saved to the slide directory
#' @param encoding the encoding of the input file; see \code{\link{file}}
slidify <- pagify <- function(inputFile, knit_deck = TRUE,
return_page = FALSE, save_payload = FALSE, envir = parent.frame()){

return_page = FALSE, save_payload = FALSE, envir = parent.frame(),
encoding = getOption('encoding')){

# @kohske
# To make changes as small as possible,
# I use here a global variable, instead of
# passing function params.
.input.enc <<- encoding

## REMOVE LINES AFTER KNITR IS UPDATED ------
options('knitr.in.progress' = TRUE)
on.exit(options('knitr.in.progress' = FALSE))
## -------------------------------------------

.SLIDIFY_ENV <<- new.env()

# @kohske
# I have no idea what 'site.yml' is. It is possible to be MultiByte char or only ascii?
# If there is possibly MB char, we need to fix this.
site = ifelse(file.exists('site.yml'), yaml.load_file('site.yml'), list())

# @kohse
# there are changes inside parse_page to care encoding
page = parse_page(inputFile, knit_deck, envir = envir)


# @kohske
# What's this?
page = modifyList(page, as.list(.SLIDIFY_ENV))

# @kohske
# render_page is changes so that output is always UTF8
render_page(page, payload = list(site = site), return_page, save_payload)
}

Expand Down Expand Up @@ -80,3 +100,9 @@ check_slidifyLibraries <- function(){
}
return(invisible())
}

#' Encoding of input file
#'
#' @keywords internal
#' @noRd
.input.enc <- NULL
38 changes: 30 additions & 8 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,20 @@ minify_css <- function(css_file){
#' @return string with document contents
#' @keywords internal
#' @noRd
read_file <- function(doc, ...){
paste(readLines(doc, ...), collapse = '\n')
read_file <- function(doc, encoding = NULL, ...){
# @kohske
# read all files in specified encoding, and then convert it
# into native.enc.
# So, if input Rmd/md is CP932, all libraries etc needs to be
# written with CP932. Maybe we need more fine control,
# but useally libraries are written in ascii,
# so there is unlikely a problem.
if (is.null(encoding)) encoding = .input.enc
con <- file(doc, "r", encoding = encoding)
text <- paste(readLines(con, ...), collapse = '\n')
text <- enc2native(text) # this may be unnesessary...(?)
close(con)
return(text)
}

#' Capture patterns matched by regular expression
Expand Down Expand Up @@ -240,23 +252,33 @@ mgsub <- function(myrepl, mystring){
Reduce(gsub_, myrepl, init = mystring, right = T)
}

# @kohske
# I changed this function so that it doesn't read and write file.
# see also render_page()

#' Create a standalone version of an HTML File
#'
#' It works by embedding all images, switching links to use Slidify's googlecode
#' repository and inlining all user assets.
#'
#' @param deck parsed deck
#' @param html_in html file with library files linked locally
#' @param html output html text (native.enc)
#' @noRd
#' @keywords internal
make_standalone <- function(deck, html_in){
make_standalone <- function(deck, html){
lib_cdn = paste0(deck$lib_cdn %||% 'http://slidifylibraries2.googlecode.com/git/inst/libraries', '/')
lib_url = paste0(deck$url$lib, '/')
html = read_file(html_in, warn = FALSE) %|% markdown:::.b64EncodeImages
html = gsub(lib_url, lib_cdn, html)

html = html %|% markdown:::.b64EncodeImages

# @kohske
# shouldn't be fixed=TRUE?
html = gsub(lib_url, lib_cdn, html, fixed = TRUE)
# html_out = sprintf('%s.html', basename(getwd()))
cat(html, file = html_in)
return(html_in)

# @kohske
# need not write a file, but return text
return(html)
}


Expand Down

0 comments on commit 2434289

Please sign in to comment.