#!/usr/local/bin/python3

import re
import sys
import os.path

re_beg_python_environ     = re.compile (r'^\s*\\begin{python}')
re_end_python_environ     = re.compile (r'^\s*\\end{python}')

re_beg_python_verbatim    = re.compile (r'^\s*\\PySetup{(.*?)action=verbatim')
re_end_python_verbatim    = re.compile (r'^\s*\\PySetup{(.*?)action=(show|hide)')

re_beg_latex_document     = re.compile (r'^\s*\\begin{document}')
re_end_latex_document     = re.compile (r'^\s*\\end{document}')

re_indent         = re.compile (r'(^\s*)')
re_empty_line     = re.compile (r'(^\s*$)')
re_latex_comment  = re.compile (r'(^\s*%)')
re_python_comment = re.compile (r'(^\s*#)')
re_python_markup  = re.compile (r'(#\s*(py\s*\(|pyBeg\s*\(|pyEnd))')
re_hidden_markup  = re.compile (r'(^\s*#.*#\s*(py\s*\(|pyBeg\s*\(|pyEnd))')
re_capture        = re.compile (r'(#\s*((pyBeg|pyEnd)\s*\(\s*([a-zA-Z0-9_.]+)\)))')
re_beg_capture    = re.compile (r'(#\s*(pyBeg\s*\(\s*([a-zA-Z0-9_.]+)\)))')
re_end_capture    = re.compile (r'(#\s*(pyEnd\s*(\(\s*([a-zA-Z0-9_.]+)\))?))')
re_py_tag         = re.compile (r'(#\s*(py\s*\(\s*([a-zA-Z0-9_.]+)\s*(,\s*([a-zA-Z0-9_]+)\s*)?\)))')
                               # Allow two forms of tag, py(foo,bah) and py(bah).
                               # In both cases bah must be a valid python expression.

def make_str (num,digits):
    return '{number:0{width}d}'.format(number=num,width=digits)

def grep (this_line, regex, the_group):
    result = regex.search (this_line)
    if result:
       found = True
       the_beg = result.start (the_group)
       the_end = result.end (the_group)
    else:
       found = False
       the_beg = -1
       the_end = -1
    return the_beg, the_end, found

def not_latex_comment (this_line):
    return not re_latex_comment.search (this_line)

def not_python_comment (this_line):
    return not re_python_comment.search (this_line)

def is_beg_latex_document (this_line):
    return re_beg_latex_document.search (this_line)

def is_end_latex_document (this_line):
    return re_end_latex_document.search (this_line)

def is_beg_python_environ (this_line):
    return re_beg_python_environ.search (this_line)

def is_end_python_environ (this_line):
    return re_end_python_environ.search (this_line)

def is_beg_python_verbatim (this_line):
    return re_beg_python_verbatim.search (this_line)

def is_end_python_verbatim (this_line):
    return re_end_python_verbatim.search (this_line)

def has_beg_python_capture (this_line):
    return re_beg_capture.search (this_line)

def has_end_python_capture (this_line):
    return re_end_capture.search (this_line)

def has_python_tag (this_line):
    return re_py_tag.search (this_line)

def has_python_markup (this_line):
    return re_python_markup.search (this_line)

def not_hidden_markup (this_line):
    return not re_hidden_markup.search (this_line)

def filter_python_markup (this_line):
    if len(this_line) == 0:
       return ""
    else:
       if has_python_markup (this_line):
          the_beg,the_end,found = grep (this_line,re_python_markup,1)
          if the_beg > 0 :
             return this_line[0:the_beg-1].rstrip(" ")
          else:
             return this_line.rstrip("\n")
       else:
          return this_line.rstrip("\n")

# -----------------------------------------------------------------------------
# 1st pass: copy Python source from source.tex to source.py file
#           leave in-line comments in place, these will be removed in pass2

def pass1 (src_file_name, out_file_name, the_file_name):

   in_latex_document = False
   in_python_environ = False
   in_python_verbatim = False

   # --------------------------------------------------------------------------
   # find the first non-empty, non-comment line in the Python blocks
   # record the indent of that line

   indent_num = 0 # number of leading spaces of first non-trivial Python line

   with open(src_file_name, "r") as src:
      for this_line in src:
         if not_latex_comment (this_line):
            if in_latex_document:
               if is_end_latex_document (this_line):
                  in_latex_document = False
                  break
               else:
                  if is_beg_python_verbatim (this_line):
                     in_python_verbatim = True
                  elif is_end_python_verbatim (this_line):
                     in_python_verbatim = False
                  if in_python_environ:
                     if is_end_python_environ (this_line):
                        in_python_environ = False
                     else:
                        if not in_python_verbatim:
                           if len(this_line) > 1:
                              if not_python_comment (this_line):
                                 the_beg,the_end,found = grep (this_line,re_indent,1)
                                 if not found:
                                    print ("> read error in preproc/pass1")
                                    print ("> line: "&this_line)
                                    sys.exit (1)
                                 else:
                                    indent_num = the_end - the_beg  # number of leading spaces
                                    break
                  else:
                     if is_beg_python_environ (this_line):
                        in_python_environ = True
            else:
               if is_beg_latex_document (this_line):
                  in_latex_document = True

   # --------------------------------------------------------------------------
   # collect the Python begin/end blocks and write to a single file

   in_latex_document = False
   in_python_environ = False
   in_python_verbatim = False

   with open (src_file_name,"r") as src:
      with open (out_file_name,"w") as out:

         out.write ("# ----------------------------------------------\n")
         out.write ("# auto-generated from " + src_file_name + "\n")
         out.write ("# ----------------------------------------------\n")

         out.write ("def Print (obj):\n")
         out.write ("    try:\n")
         out.write ("        print(latex(obj))\n")
         out.write ("    except:\n")
         out.write ("        print(obj)\n")
         out.write ("# ----------------------------------------------\n\n")

         for this_line in src:

            if not_latex_comment (this_line):
               if in_latex_document:
                  if is_end_latex_document (this_line):
                     in_latex_document = False
                     break
                  else:
                     if is_beg_python_verbatim (this_line):
                        in_python_verbatim = True
                     elif is_end_python_verbatim (this_line):
                        in_python_verbatim = False

                     if in_python_environ:
                        if is_end_python_environ (this_line):
                           in_python_environ = False
                        else:
                           if not in_python_verbatim:
                              if len(this_line) > 0:
                                 the_beg,the_end,found = grep (this_line,re_indent,1)
                                 start = min(indent_num,the_end-the_beg)
                                 out.write (this_line[start:len(this_line)-1]+"\n")  # use 0: to retain indent
                              else:
                                 out.write("\n")

                     else:
                        if is_beg_python_environ (this_line):
                           in_python_environ = True

               else:
                  if is_beg_latex_document (this_line):
                     in_latex_document = True

# -----------------------------------------------------------------------------
# 2nd pass: use the source.py (from pass1) file to build the following files

# source_.py : annotated file, contains extra lines to generate output for later capture
# source.pyidx : a list of tags, one line per tag, written as: tag index = tag name

def pass2 (src_file_name, out_file_name, idx_file_name):

   # -----------------------------------------------------------
   #  read functions, extract tag and expressions names

   def get_tag (this_line):
      the_beg,the_end,found = grep (this_line,re_py_tag,3)
      if found:
         return this_line[the_beg:the_end].strip(" ")
      else:
         return this_line.strip(" ")

   def get_exp (this_line):
      the_beg,the_end,found = grep (this_line,re_py_tag,5)
      if found and (the_beg > 0) and (the_end > 0):
         return this_line[the_beg:the_end].strip(" ")     # returns foo given py (foo,bah)
      else:
         return get_tag (this_line)                       # returns bah given py (bah)

   def get_src (this_line):
      the_beg,the_end,found = grep (this_line,re_py_tag,1)
      if found:
         if the_beg == 0:
            return ""
         else:
            return this_line [0:the_beg-1].rstrip(" ")
      else:
         return this_line.rstrip(" ")

   def get_capture (this_line):
      the_beg,the_end,found = grep (this_line,re_capture,4)
      if found:
         return this_line[the_beg:the_end].strip(" ")
      else:
         return this_line.strip(" ")

   def beg_capture_src (this_line):
      the_beg,the_end,found = grep (this_line,re_beg_capture,1)
      if found:
         if the_beg > 0:
            return this_line [0:the_beg-1].rstrip(" ")
         else:
            return ""
      else:
         return this_line.rstrip(" ")

   def end_capture_src (this_line):
      the_beg,the_end,found = grep (this_line,re_end_capture,1)
      if found:
         if the_beg > 0:
            return this_line [0:the_beg-1].rstrip(" ")
         else:
            return ""
      else:
         return this_line.rstrip(" ")

   # ----------------------------------------------------------
   #  output functions, writes to index file, temp py file etc.

   def beg_tag (num):
      return 'print("beg_tag'+make_str(num,4)+'")'

   def end_tag (num):
      return 'print("end_tag'+make_str(num,4)+'")'

   def beg_capture (num):
      return beg_tag (num)

   def end_capture (num):
      return end_tag (num)

   def wrt_latex (this_line):
      this_text = get_exp (this_line)
      return "Print("+this_text+")"

   # -------------------------------------------------------------------------
   #  stack operations

   stack = []
   tag_name = []
   stack_index = 0
   max_stack_index = 5
   for i in range (0,max_stack_index+1):   # create an empty stack
      stack.append(-1)
      tag_name.append("")

   def read_stack (index):
      return stack [index]

   def push_stack (index, this_line, stack_index):
      if stack_index < max_stack_index:
         stack_index = stack_index + 1
         stack [stack_index] = index
         tag_name [stack_index] = get_capture (this_line)
         return stack_index
      else:
         print ("> Depth of nested pyBeg/pyEnd pairs exceeded, max = "+str(max_stack_index)+", exit")
         sys.exit (1)

   def pop_stack (index, this_line, stack_index):
      if stack_index > 0:
         tmp_name = get_capture (this_line)
         if tmp_name == tag_name [stack_index]:
            stack [stack_index] = -1
            return stack_index - 1
         else:
            print ("> Error in pyBeg/pyEnd pair for tag: "+str(tag_name[index])+", exit")
            sys.exit (1)
      else:
         print ("> Error with pyBeg/pyEnd pairs, check for missing pyBeg or pyEnd, exit")
         sys.exit (1)

   # create a temporary copy of the Python source

   from shutil import copyfile
   from uuid   import uuid4

   tmp_file_name = "/tmp/"+str(uuid4())
   copyfile (src_file_name,tmp_file_name)

   # create an annotated _.py and .pyidx files using the temporary file as source

   with open (out_file_name,"w") as out:
      with open (idx_file_name,"w") as idx:
         with open (tmp_file_name,"r") as src:

            tag_index = 0

            for this_line in src:

               if has_python_markup (this_line) and not_hidden_markup (this_line):

                  if has_beg_python_capture (this_line):

                     tag_index = tag_index + 1

                     out.write ( beg_capture_src (this_line) + "\n")
                     out.write ( beg_capture (tag_index) + "\n" )

                     idx.write ("tag"+make_str(tag_index,4)+"=")
                     idx.write ( get_capture (this_line) + "\n")

                     stack_index = push_stack (tag_index,this_line,stack_index)

                  elif has_end_python_capture (this_line):

                     out.write ( end_capture_src (this_line) + "\n" )
                     out.write ( end_capture (read_stack (stack_index)) + "\n")

                     stack_index = pop_stack (read_stack (stack_index),this_line,stack_index)

                  elif has_python_tag (this_line):

                     tag_index = tag_index + 1

                     out.write (get_src   (this_line) + "\n")
                     out.write (beg_tag   (tag_index) + "\n")
                     out.write (wrt_latex (this_line) + "\n")
                     out.write (end_tag   (tag_index) + "\n")

                     idx.write ("tag"+make_str(tag_index,4) + "=")
                     idx.write ( get_tag (this_line) + "\n")

                  else:

                     out.write (filter_python_markup (this_line)+"\n")

               else:

                  out.write (filter_python_markup (this_line)+"\n")

   # copy the temporary file back to the source with in-line comments removed
   # note: this clean copy is just for reference, it's never used

   with open (tmp_file_name,"r") as tmp:
      with open (src_file_name,"w") as src:
         for this_line in tmp:
            src.write (filter_python_markup(this_line)+"\n")  # note: could choose to retain in-line comments
                                                              #       just drop the filter_python_markup

# -----------------------------------------------------------------------------
# the main code

import argparse

parser = argparse.ArgumentParser(description="Pre-process LaTeX-Python source")
parser.add_argument("-i", dest="input", metavar="input", help="LaTeX-Python source file (without file extension)", required=True)
parser.add_argument("-m", dest="name", metavar="name", help="Merged LaTeX-Python source file (without file extension)")

src_file_name = parser.parse_args().input
mrg_file_name = parser.parse_args().name

if not mrg_file_name:

   if not os.path.isfile (src_file_name + ".tex"):
      print ("> pre-process: Source file " + src_file_name + ".tex" + " not found, exit.")
      sys.exit (1)

   pass1 (src_file_name + ".tex", src_file_name +  ".py", src_file_name)
   pass2 (src_file_name + ".py",  src_file_name + "_.py", src_file_name + ".pyidx")

else:

   if not os.path.isfile (mrg_file_name + ".tex"):
      print ("> pre-process: Source file " + mrg_file_name + ".tex" + " not found, exit.")
      sys.exit (1)

   pass1 (mrg_file_name + ".tex", src_file_name +  ".py", src_file_name)
   pass2 (src_file_name + ".py",  src_file_name + "_.py", src_file_name + ".pyidx")
