diff --git a/convert.py b/convert.py index cf1649b..3a91e14 100755 --- a/convert.py +++ b/convert.py @@ -1,111 +1,96 @@ -#version 0.2 -#by John Wood - for Tech Advance -# This script reads through the monolithic .md files in a DokuWiki copy of OBS and splits them out into chunks in folders -# named like the .md files. The chunks are named 1.txt to n.txt where `n` is the last chunk. The folders and files are -# automatically named correctly. Because the script doesn't generate a manifest.json I create a project in translationStudio -# and then merge the folders created by this script into the translation folder created by translationStudio. +#convert.py +#version 0.3 +# by John Wood -- for Tech Advance +# This script reads through the monolithic .md files in a DokuWiki copy of OBS and splits them +# out into chunks in folders named like the .md files. The chunks are named 01.txt to nn.txt +# where `nn` is the last chunk. +# The folders and files are automatically named correctly. The script now creates a +# manifest.json file as well. # Usage: python convert.py -# Import necessary python components -import os -# os is used for file system commands -import re -# re is used for regular expressions -import sys -# sys is used for command line arguments -import shutil -# shutil is high-level file operations +#Import necessary python components -from subprocess import call -# to fork for git +import os # file system commands +import re # regular expressions +import sys # command line arguments +import shutil # high level file operations -# print "Starting the conversion process" +from subprocess import call # to fork for git -program_name=sys.argv[0]; -arguments=sys.argv[1:]; -count_args=len(arguments); -if count_args !=1 : - # print "Usage: convert old_dir" - sys.exit(1); - -convertdir = sys.argv[1]; -projectinfo = convertdir.split("_"); -oldpath=projectinfo[0]; -path=oldpath.split("/"); -language=path[len(path)-1]; -book=projectinfo[1]; - -#projectType=projectinfo[2]; -if len(projectinfo)!=2: - print"This may not be a DokuWiki OBS project, and I'm not sure how to handle it"; +program_name=sys.argv[0] +arguments=sys.argv[1:] +count_args=len(arguments) +if count_args !=1 : #If there is not exactly one argument, fail with a usage remark. + print ("convert.py script to convert DokuWiki OBS to translationStudio format") + print ("Usage: python converty.py ") sys.exit(1) -elif book=="obs": - # munge the filename to get the name of the resulting folder. - targetpath=convertdir+"_text_obs" -# print "The language is "+language; -# print "Source directory is "+convertdir; -# print "Target directory is "+targetpath; +convertdir=sys.argv[1] +projectinfo=convertdir.split("_") # splitting the argument on undescores will give us the + # information about the OBS project +oldpath=projectinfo[0] # The first element of the argument is the path to the old project +path=oldpath.split("/") # we can then learn more about the project from its path +language=path[len(path)-1] # The last element in the path is the language code. We have to + # subtract one because the first element is 0, rather than 1 -# Create the manifest file +book=projectinfo[1] # The book name, then, should be the second part of the project name -for filename in os.listdir(convertdir+"/content/"): - #run the script in the target directory - # Check for special folders first - if (filename=="front"): +if len(projectinfo)!=2: # tS projects look like lll_obs_text_obs rather than lll_obs + print("This may not be a DokuWiki OBS project and I'm not sure how to handle it") + sys.exit(1) + +elif book=="obs": + targetpath=convertdir+"_text_obs" #this gets the target name into the right format + +worksite=convertdir+"/content/" + +for filename in os.listdir(worksite): #the actual content is in a subdirectory + if(filename=="front"): if not os.path.exists(targetpath+"/front"): os.makedirs(targetpath+"/front") - for filename2 in os.listdir(convertdir+"/content/"+filename+"/"): - shutil.copyfile(convertdir+"/content/"+filename+"/"+filename2,targetpath+"/front/"+filename2.replace(".md",".txt")); - # Else, if the filename is a MarkDown file - elif filename.endswith(".md"): - filenum = 00; - # We start by making a folder/directory matching the name of the Markdown file - newpath = filename.replace(".md",""); - newpath=targetpath+"/"+newpath; + for filename2 in os.listdir(worksite+filename+"/"): + shutil.copyfile(worksite+filename+"/"+filename2, targetpath+"/front/"+filename2.replace(".md",".txt")) + #copy the file to the new location, changing its extension to .txt + elif filename.endswith(".md"): # all other files we deal with are MarkDown files + filenum=01 + # We start by making a folder/directory matching the name of the md file + newpath=filename.replace(".md","") + newpath=targetpath+"/"+newpath if not os.path.exists(newpath): os.makedirs(newpath) - - worksite = convertdir + "/content/"; - filename=worksite+filename; + filename=worksite+filename - # Then, opening the file to scan it - with open(filename) as mdfile: - # Parsing the file by line + with open(filename) as mdfile: #we open the old md to scan it for line in mdfile: - # print "Working with line: "+line.strip("\s") - # Lines with ![Image are image references not needed in the translationStudio project - if re.match('\!\[Image|\[\[https',line): - # print " Found a line matching ![Image" - filenum = filenum + 1 - # print " Filenumber is "+format(filenum) - elif re.match("#",line): - # print " Found a line Matching # -- this is a Title" - # print " Writing to "+newpath; - myTitle = newpath + "/title.txt" - with open(myTitle, "a+") as newfile: - # We write the same line to the new file, but erase the hashes (#) - newfile.write(line.replace("#","")); + if re.match("\!\[Image\]",line): + #line is an image: increment the file counter + newFileName = newpath + "/{:0>2d}".format(filenum)+".txt" + filenum = filenum+1 + writeLine="" + elif re.match(r'\[\[https',line): + #line is an image: increment the file counter + newFileName = newpath + "/{:0>2d}".format(filenum)+".txt" + filenum = filenum+1 + writeLine="" elif re.match("_",line): - # Lines with underscores are the references - myRef = newpath + "/reference.txt" - with open(myRef, "a+") as newfile: - #we write the same line to the new file, but erase the underscores - newfile.write(line.replace("_","")); - # Only process lines that aren't blank + #lines with underscores are the references + newFileName = newpath+"/reference.txt" + writeLine=line.replace("_","") + elif re.match("#",line): + #matching title + newFileName = newpath+"/title.txt" + writeLine=line.replace("#","") elif not line.strip("\s")=="\n": - # print "Line "+'"'+line.strip("\s")+'"'+" is blank" - #else: - # print " The current file number is "+format(filenum) - if filenum==0: filenum=1 - newFileName = "{:0>2d}".format(filenum)+".txt"; - # print " Writing the text file - "+newFileName - myNewFile = newpath + "/"+newFileName - with open(myNewFile, "a+") as newfile: - newfile.write(line) + # checking that the line isn't blank + writeLine=line + elif line.strip("\s")=="\n": + writeLine="" + with open(newFileName, "a+") as newfile: + newfile.write(writeLine) + +newfile.close() -print "Generating the manifest" with open(convertdir+"/manifest.yaml") as manfile: with open(targetpath+"/manifest.json","a+") as newmanfile: for manline in manfile: @@ -113,11 +98,11 @@ with open(convertdir+"/manifest.yaml") as manfile: direction_match = re.search(r" direction: (\w+)",manline) modified_date_match= re.search(r" modified: '(\d\d\d\d-\d\d-\d\d)'",manline) if modified_date_match: - modified_date=modified_date_match.group(1); + modified_date=modified_date_match.group(1) elif title_match: - target_language_name=title_match.group(1); + target_language_name=title_match.group(1) elif direction_match: - target_direction = direction_match.group(1); + target_direction = direction_match.group(1) newmanfile.write('\n'.join([ '{', @@ -156,7 +141,10 @@ with open(convertdir+"/manifest.yaml") as manfile: ' "parent_draft": {},', ' "translators": [],', ' "finished_chunks": []', - '}'])); + '}'])) -#gitCommand = "/usr/bin/git init "+targetpath; -#call("/usr/bin/git init", targetpath) \ No newline at end of file +os.chdir(targetpath) +call(["git","init"]) +call(["git","add","."]) +call(["git","commit","-m Initial commit"]) +print ("New project written in "+targetpath) \ No newline at end of file