<code> #! /usr/bin/env ruby # encoding: UTF-8

# iolate.rb: # # Version 0.01 # # Extract and reinsert the translateable text from # the dokwiki-file cpage.dkw to the Textfile transfertext.utf8 # # Copyright 18.11.2021 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. #

# == syntax # # iolate.rb [-i inputfilename] # [-o throughputfilename] # [-v] # [-h] # # -i # name of the file in dokuwiki-syntax. If none is given, # cpage.dkw is assumed # # -o # Name of the text file which contains the text in dokuwiki-syntax. # If none is given transfertext.utf8 is assumed # # -k # Keep tags in the outputfile transfertext.utf8 # # -v # verbose # # -h # show help #

# == file formats # # cpage.dkw # has to be formated according to # Only the text in text boxes between the the first tag pair #


is extractet into transfertext.utf8, # text boxes before or after this are left unchanged # The first … here represents a Filename which must not contain }} # The second … represents the string which is searched for translateable text # which must not itself contain cotan. # Only text in textboxs is extraxted i.e. the line before the text must # start with an @ wich is followed by at least four # comma seperated integeres, e.g. @37,25,419,112 # After the text there has to be a line containing exactly one ~ character # at the start of the line, marking the end of the texbox # The following holds true für the translated file cpage.dkw # All <…> tags are removed from the result of the second run # All […] tags which are not at thre start of the first line, # are removed from the result of the second run # All ­ are removed # All linefeeds and carriage returns are removed # The removing of <…> and […] tags can be suppressed with -k # This file is than also used as output to store the data # between passes in yaml format. # # cpages.yaml # stors all the data of the file cpages.dkw except for the text which # gas ti be translated # # transfertext.utf8 # In the extracted text all text from one textbox is concatenatet into # one line, seperated by a space. The text of all textboxes # is concatenated seperated by one LF character per box. # This output file is used as input in the second pass. # The translated text has to concur to the same specification # in order to be used for the construction of cpages1.dkw # # cpages1.dkw # The recreated file containing the markup from cpage.dkw and the text # from transfertext.utf8

# == usage in context # # 0. It is assumem that you have the curent version of # the ruby interpreter installed as well as the gems # listed under „dependecies“. Copy this script into # a directory on your computer. All further file handling # is assumed to take place there # # 1. copy the content of the strip you want to translate # or at least


and what is between them # into the file called cpage.dkw in the directory where # the executable file iolate.rb is located # # 2. execute, e.g call ruby iolate.rb # # 3. open the file transfertext.utf8 and copy its contense # into the input field of a translation service or programm # like or # # 4. translate # # 5. overcopy the text in transfertext.utf8 with # the translation and save the file # # 5. execute iolate.rb again # # 6. cpages1.dkw should now contain the translation #

# == todo # # 1. Add the capabilitie to supply various translations # in transfertext.utf8 seperated by ~ and produce files # cpage1.dkw, cpage2.dkw etc. from them # # 2. optionally add a loop and REST-api combatibility # to this script, to translate whole comics at once #

# == constants # NEWLINECHAR = „\n“ # EOL standard char to indicate end of line SYSLIMITFILENAME = 255 # maximal length of a file name for ext4

# == dependecies

# class for parsing commandline options begin

require 'optparse' 

rescue Exception

STDERR.puts "warnung: require of optparse failed." 
# exit 1 # nonfatal if no options are given, so cross your fingers


# class for serialization begin

require 'yaml' 

rescue Exception

STDERR.puts "warnung: require of yaml failed." 
exit 1


# == methods # # to keep in line with a functional approch which might be easier # to port to javascript with opal, we make no use of oo, # but use the methods of this object as functions

# find all tags, which start with the character sBeginTag # and are closed with sEndTag, return the positions # of start and end of each tag, nil if no tag is found def tagspos(sStr, sBeginTag=„[“, sEndTag=„]“)

iaStarts =
iaEnds =
iBegin = sStr.index(sBeginTag)
while not iBegin.nil?
   iaStarts << iBegin
   iEnd =  sStr.index(sEndTag, iBegin + 1)
   if iEnd.nil?
   else # complete tag found, search for next one
     iaEnds << iEnd
     iBegin = sStr.index(sBeginTag, sEndTag + 1)
if iaStarts.empty?
  return nil 
  return [iaStarts, iaEnds]


# remove everything between sBeginChar # und sEndChar including those characters # allover sStr and return sStr def removebetween(sStr, sBeginChar=„<“, sEndChar=„>“)

iBegin = sStr.index(sBeginChar)
while not iBegin.nil?
   iEnd =  sStr.index(sEndChar, iBegin + 1)
   if iEnd.nil?
     return sStr 
     sStr.slice!(iBegin, iEnd - iBegin + 1)
     iBegin = sStr.index(sBeginChar)

return sStr end

# slongString # def startandendofdstring(slongString, icutoff = 70)

if slongString.is_a? String
  sretString = slongString.to_s
  sretString = slongString.inspect.to_s 
l = sretString.length 
if l <= icutoff * 2
  return sretString
  return sretString[0 .. icutoff - 1] + " ... " + sretString[l - icutoff ... -1] 


# returns true if a string represents an integer def is_i?(sMayBeInteger)

/\A[-+]?\d+\z/ === sMayBeInteger


# returns true, if the string sLine is the first line of a textbox def isBoxStart(sLine)

unless sLine.start_with? "@" 
  return false
asLine = sLine[1...-1].rstrip.split ","
unless asLine.length > 3
  return false
unless is_i?(asLine[0])
 _return false
unless is_i?(asLine[1])
 _return false
unless is_i?(asLine[2])
 _return false
unless is_i?(asLine[3][0]) || is_i?(asLine[3][0..1])
  return false
return true


# == main # # for the use of option parser see # # options = {:verbose ⇒ nil, :dokuinfilename ⇒ „cpage.dkw“, :textoutputfilename ⇒ „transfertext.utf8“, :fkeep ⇒ false} # default values go here opt_parser = do |opts|

opts.banner = "Usage: ocr-latest-png.rb [-i dokuinfilename] [-o textoutputfilename] [-k] [-v] [-h]" 
opts.on("-i filename", "--inputfile", "name of the dokuwiki file from which to extract translateable text. The default is cpages.dkw.") do |anopt|
  options[:dokuinfilename] = anopt
opts.on("-o filename", "--outputtext", "name of the text file to which translateable text is written. The default istransfertext.utf8.") do |anopt|
  options[:textoutputfilename] = anopt
opts.on("-k filename", "--keeptags", "do not atempt to delete all tags. Default is .") do |anopt|
  options[:fkeep] = anopt
opts.on("-v", "--[no-]verbose", "show comments") do |anopt| 
  options[:verbose] = anopt
opts.on("-h", "--help", "show this help.") do
  puts opts

end begin


rescue OptionParser::InvalidOption

puts "\nunknown option"
puts "in line" + __LINE__ # the current line number in the source file.
puts $! # error message
puts $@ # error position

end # ??? eine nicht angegebene Option gibt den Wert nil # ??? eine angegebene Option ohne Zeichen im folgenden Kommandozeilenargument gibt den Wert true # set verbosity flag if ! options[:verbose].nil?

$ivc = 0


$ivc = 1 

end sDokuwikifile = „cpage.dkw“ # default name if options[:dokuinfilename].nil? # should be impossible, but well …

puts "after the -i option there needs to be a filename" if $ivc > 0
exit 1


sDokuwikifile = options[:dokuinfilename]

end sTransferfile = „transfertext.utf8“ if options[:textoutputfilename].nil? # also impossible

puts "after the -o option there needs to be a filename" if $ivc > 0
exit 1


sTransferfile = options[:textoutputfilename]

end fkeep = options[:fkeep] bInCotan = false # flag bInTextbox = false # flag iNrTextboxes = 0 # the total number of textboxes which were red asPageAsLines = # the markup pieces of the file are collected here aiTextLineNumbersInPage = # the line numbers of the text box lines in the page asTextLines = # the text lines from the text boxes, one element per textbox contains all the lines from this textbox asTextboxTags = # the tags at the start of a text line unless File.exists? sTransferfile # if true, we are pre translation, otherwise post translation

file =, "r")
aText = "\n"
sTextBoxText = # string to collect all the text from a textbox in
aText.each_index { |iline|
  sline = aText[iline]
  if bInCotan # starting to search for textboxes
    if sline.include? "{{<cotan}}" # stop searching for textboxes, if you're out of the cotan block
      bInCotan = false 
      asPageAsLines << sline unless bInCotan # hand cotan block's tail through 
      if bInTextbox
      if sline.strip == "~" # this is the way a textbox ends, not with a !, but with a ~
        asTextLines << sTextBoxText # all the txt from the text box has been collected and can now be stored as one line
          sTextBoxText = # reset to empty
        asPageAsLines << "" # store an empty line, because the text goes to asTextLine
          asPageAsLines << sline #lines before and after a textbox, including the textbox's head and tail are just handed through 
        bInTextbox = false
        if sTextBoxText.empty? # this is the first non empty text box line
          if sline.chr == "[" # text line starts with a tag
  	        while sline.chr == "[" # a tag is still at the start of the line
	          iClose = sline.index "]"
	          if iClose == -1 # no closing parenthesis found
                break # the rest of the line is supposed to contain the text
  	            asTextboxTags[iNrTextboxes - 1] += sline[0..iClose] # save the tag by appending it to previous tags in the same line
               sline = sline[iClose + 1 .. -1] # remove the tag from the line
	        end # next tag 
            end # all starting tags removed
        end # the starting tags in non first text box lines in textboxes are not saved
        unless fkeep # remove other tags unless blocked
          removebetween sline
          removebetween sline, "[", "]"
        if ! sTextBoxText.empty? # if the text has various lines concatenate them all sepereted by a blank
          sTextBoxText += " "
        sTextBoxText += sline + "\n"
      else # not in a Textbox
        asPageAsLines << sline #lines before and after a textbox, including the textbox's head and tail are just handed through 
      if isBoxStart(sline) # a textbox starts with this line, but also a mask
        if ! (aText[iline].nil? || aText[iline + 1].chr == "#") # a textbox seems to be ahead
	    aiTextLineNumbersInPage << asPageAsLines.length - 1 # the line number in the output file where the text belongs.
          asTextboxTags << "" # initialize tag memory for this textbox
          iNrTextboxes += 1 # keep, count of the textboxes	        
          bInTextbox = true
    asPageAsLines << sline # lines before and after the cotan block, including the cotan block's head and tail are just handed through 
    bInCotan = sline[0..7] == "{{cotan>"
} # next line of the dokuwiki file
# collect all information for a second pass in an array 
oSerializedData =
oSerializedData << fkeep
oSerializedData << iNrTextboxes
oSerializedData << asPageAsLines 
oSerializedData << aiTextLineNumbersInPage
oSerializedData << asTextboxTags
# oSerializedData << asTextLines
# write intermediate file for second pass, using the original dokuwiki file name with a yaml extension
iDotPos = sDokuwikifile.rindex "."
iDotPos = sDokuwikifile.length if iDotPos.nil?
sYamlFile = sDokuwikifile[0..iDotPos] + "yaml", "w") do |file|
  file.puts YAML::dump(oSerializedData)
# write textfile to be translated, "w") { |file|
  asTextLines.each { |s|
    file.write s

else # post translation

iDotPos = sDokuwikifile.rindex "."
iDotPos = sDokuwikifile.length if iDotPos.nil?
sYamlFile = sDokuwikifile[0..iDotPos] + "yaml"
oSerializedData =
oSerializedData = YAML.load_file(sYamlFile)
fkeep = oSerializedData[0]
iNrTextboxes = oSerializedData[1]
asPageAsLines  = oSerializedData[2]
aiTextLineNumbersInPage = oSerializedData[3]
asTextboxTags = oSerializedData[4]
# load translated text
file =, "r")
  asTranslatedText = "\n"
if asTranslatedText.length != iNrTextboxes 
  STDERR.puts "warning: the number of textboxes " + iNrTextboxes.to_a + " does not equal the number of translated strings " + asTranslatedText.length.to_a + "."  if $ivc > 0
# make translation
sOutputtext =
# iTextBoxCur = 0
asPageAsLines.each_index { |iline|
  iTextBoxCur = aiTextLineNumbersInPage.find_index(iline - 1) # lookup, if this line's number is in the table of translateable lines
  if iTextBoxCur.nil? # do we have a translation for this line
    sOutputtext = sOutputtext + asPageAsLines[iline] + "\n" # this line gets handed through unchanged   
  else # there is a translation for this line which we take from the sTransferfile
    sOutputtext = sOutputtext  + asTextboxTags[iTextBoxCur] + asTranslatedText[iTextBoxCur] + "\n"
    # iTextBoxCur += 1
# remove trailing empty lines
while sOutputtext[sOutputtext.length - 1] == "\n"
# construct name of output file
iDotPos = sDokuwikifile.rindex "."
iDotPos = sDokuwikifile.length if iDotPos.nil?
sOutputfile = sDokuwikifile[0..iDotPos] + "1" + sDokuwikifile[iDotPos+1..-1]
# write final output, "w") do |file|
  file.puts sOutputtext
# File.delete sTransferfile # cleaning up
# File.delete sYamlFile # cleaning up


END The above end's the text interpreted as a ruby program. You can access this and all following text using the global IO object DATA, which contains all lines after the above marker


