Package src :: Package inspectors :: Module odt_inspector
Source Code for Module src.inspectors.odt_inspector

   1  #!/usr/bin/python 
   2  # -*- coding: UTF-8 -*- 
   3  # 
   4  #The MIT License 
   5  # 
   6  #Copyright (c) 2011 
   7  # 
   8  #Permission is hereby granted, free of charge, to any person obtaining a copy 
   9  #of this software and associated documentation files (the "Software"), to deal 
  10  #in the Software without restriction, including without limitation the rights 
  11  #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
  12  #copies of the Software, and to permit persons to whom the Software is 
  13  #furnished to do so, subject to the following conditions: 
  14  # 
  15  #The above copyright notice and this permission notice shall be included in 
  16  #all copies or substantial portions of the Software. 
  17  # 
  18  #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  19  #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  20  #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
  21  #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
  22  #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
  23  #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
  24  #THE SOFTWARE. 
  25  # 
  26  #Authors: 
  27  #   Vili Auvinen (vili.k.auvinen@jyu.fi) 
  28  #   Olli Kauppinen (olli.kauppinen@jyu.fi) 
  29  #   Juho Tammela (juho.i.tammela@jyu.fi) 
  30   
  31  ''' 
  32  The module provides the methods for inspecting odt files. 
  33   
  34  @author: Vili Auvinen, Juho Tammela, Olli Kauppinen 
  35  ''' 
  36   
  37  import conversions 
  38  import string 
  39  import ooo_meta_inspector 
  40  import common_methods 
  41   
  42 -def _getStyleElementByStyleName (documentDict, styleName): 
  43      '''Gets style element by the given style name. 
  44      It searches first from the file content.xml if doesn't find then searches from the file style.xml.  
  45       
  46      @return: The style element if the style exists, otherwise returns None.    
  47      ''' 
  48      styleElements = documentDict['content.xml'].getElementsByTagName('style:style') 
  49      for element in styleElements: 
  50          if element.getAttribute('style:name') == styleName: 
  51              return element 
  52                   
  53      styleElements = documentDict['styles.xml'].getElementsByTagName('style:style') 
  54      styleElements += documentDict['styles.xml'].getElementsByTagName('text:list-style') 
  55      for element in styleElements: 
  56          if element.getAttribute('style:name') == styleName: 
  57              return element 
  58      return None   
  59   
  60   
  61 -def _getStyleElementByDisplayName (documentDict, styleName): 
  62      '''Gets the style element by the given style name. 
  63      It searches first from content.xml if doesn't find then searches from style.xml. 
  64        
  65      @note: XML: 
  66      <style:style style:name="Text_20_body" style:display-name="Text body"> 
  67      <style:style style:name="tutkielma"> 
  68       
  69      Display name --> style name 
  70      " "-->"_20_" 
  71      "_"-->"_5f_"   
  72        
  73      @return: The element of the style display name if it exists, otherwise returns None.   
  74      ''' 
  75      styleElements = documentDict['content.xml'].getElementsByTagName('style:style') 
  76      for element in styleElements: 
  77          if element.getAttribute('style:display-name').lower() == styleName.lower(): 
  78              return element 
  79                   
  80      styleElements = documentDict['styles.xml'].getElementsByTagName('style:style') 
  81      for element in styleElements: 
  82          if element.getAttribute('style:display-name').lower() == styleName.lower(): 
  83              return element 
  84      return None  
  85   
  86 -def _getStyleDisplayNameByStyleName (documentDict, styleName): 
  87      '''Gets the style display name by the given style name. 
  88      It uses _getStyleElementByStyleName method to find the style. 
  89      If the style doesn't have display-name attribute then the display name is just the style name.  
  90       
  91      @note: XML example: 
  92       
  93      <style:style style:name="Text_20_body" style:display-name="Text body"> 
  94       
  95      <style:style style:name="tutkielma"> 
  96       
  97      Display name --> style name 
  98      " "-->"_20_" 
  99      "_"-->"_5f_" 
 100       
 101      @return: The style display name.        
 102      ''' 
 103       
 104      element = _getStyleElementByStyleName(documentDict, styleName) 
 105      if element.hasAttribute('style:display-name'): 
 106          return element.getAttribute('style:display-name') 
 107      else: 
 108          return styleName 
 109       
 110 -def _getMasterPageStyleElement (documentDict, masterPageStyleName): 
 111      '''Get the master page style element by the given master page style name. 
 112       
 113      @return: The master page element.     
 114      ''' 
 115      masterPageStyles = documentDict['styles.xml'].getElementsByTagName ('style:master-page') 
 116      for element in masterPageStyles:         
 117          if masterPageStyleName == element.getAttribute('style:name'): 
 118              return element 
 119      return None 
 120           
 121 -def _getPageLayoutElement(documentDict, pageLayoutName): 
 122      '''Gets page layout element by the given page layout name. 
 123       
 124      @return: The page layout. 
 125      ''' 
 126      pageLayoutElements = documentDict['styles.xml'].getElementsByTagName ('style:page-layout') 
 127      for element in pageLayoutElements: 
 128          if pageLayoutName == element.getAttribute('style:name'): 
 129              return element 
 130      return None 
 131   
 132           
 133 -def getPageMarginals(documentDict): 
 134      '''Get the page marginals. 
 135      Searches for only from used master pages. 
 136       
 137      @return: The page marginals. If the marginals are different between the used pages, 
 138      then return false. 
 139       
 140      @see: convertCmOrInDictToString 
 141      ''' 
 142      margins = {'top': None, 'bottom': None, 'right': None, 'left': None} 
 143      usedMasterPages = _getUsedMasterPageElements(documentDict)  
 144       
 145      for element in usedMasterPages: 
 146          pageLayoutName = element.getAttribute('style:page-layout-name') 
 147          layoutElement = _getPageLayoutElement(documentDict, pageLayoutName) 
 148          for key in margins.keys():             
 149              margin = layoutElement.getElementsByTagName('style:page-layout-properties')[0].getAttribute('fo:margin-' + key) 
 150               
 151              if margins [key] is None: 
 152                  margins [key] = margin 
 153              elif margins [key] != margin:    
 154                  return False 
 155               
 156                   
 157               
 158      return conversions.convertCmOrInDictToString(margins) 
 159   
 160 -def getPageSize(documentDict): 
 161      '''Get the page size.   
 162       
 163      @return: The converted page size. If the size is different between the used pages, 
 164      then returns False.  
 165       
 166      @see: convertCmOrInDictToString 
 167      ''' 
 168      pageSize = {'height': None, 'width': None } 
 169       
 170      usedMasterPages = _getUsedMasterPageElements(documentDict)         
 171       
 172      for element in usedMasterPages: 
 173          pageLayoutName = element.getAttribute('style:page-layout-name') 
 174          layoutElement = _getPageLayoutElement(documentDict, pageLayoutName) 
 175          for key in pageSize.keys():             
 176              size = layoutElement.getElementsByTagName('style:page-layout-properties')[0].getAttribute('fo:page-' + key) 
 177               
 178              if pageSize [key] is None: 
 179                  pageSize [key] = size 
 180              if pageSize [key] != size: 
 181                  return False 
 182               
 183      return conversions.convertCmOrInDictToString(pageSize) 
 184   
 185 -def _getUsedMasterPageElements (documentDict): 
 186      ''' Get the all master page elements which are used in document. 
 187      'Standard' master page style used if there is no other definitions.  
 188       
 189      @return: The list of the used master page elements. 
 190      ''' 
 191      usedMasterPageElements = [] 
 192      usedMasterPageDict = _getAllStyleNamesWithDifferentMasterPage(documentDict) 
 193      for masterPageName in usedMasterPageDict.values():         
 194          masterPageElement = _getMasterPageStyleElement(documentDict, masterPageName) 
 195          usedMasterPageElements.append(masterPageElement) 
 196           
 197      if len(usedMasterPageElements) == 0: 
 198          usedMasterPageElements.append(_getMasterPageStyleElement(documentDict, 'Standard')) 
 199      return usedMasterPageElements     
 200       
 201 -def _getDefaultStyleElement(documentDict, styleFamily): 
 202      '''Get the default style element by the given style family. 
 203       
 204      @note: 
 205       
 206      Every style is based on style family. 
 207        
 208      <style:style style:name="Heading_20_1" style:display-name="Heading 1" style:family="paragraph"> 
 209       
 210      @param styleFamily: gets wanted default style.   
 211         
 212      Style family can be paragraph, graphic, table or table-row. 
 213       
 214      @return: default style element.    
 215      ''' 
 216      defaultStyleElements = documentDict['styles.xml'].getElementsByTagName('style:default-style') 
 217      for element in defaultStyleElements: 
 218          if element.getAttribute ('style:family') == styleFamily: 
 219              return element 
 220      return None 
 221   
 222   
 223 -def _getMasterPageStyleName(documentDict, styleElement): 
 224      '''Get the master page style name by the given style element. 
 225       
 226      @return: The master page name, if master page is '' then return 'Standard'. 
 227      ''' 
 228      masterPageName = styleElement.getAttribute ('style:master-page-name') 
 229      if masterPageName == '': 
 230          return 'Standard' 
 231      else: 
 232          return masterPageName 
 233   
 234 -def checkEmptyParagraphs(documentDict): 
 235      '''Checks the empty paragraphs from document. 
 236      getDocumentPararaphs method gets all paragraphs to be checked for. 
 237      An empty paragraph is permitted after the table of content and in page break elements.    
 238        
 239      @return: The number of the empty paragraphs if efound, otherwise returns False. 
 240      ''' 
 241      pageBreakStyles = _getPageBreakStyleNames(documentDict) 
 242      paragraphs = _getDocumentParagraphs(documentDict) 
 243      emptyParagraphs = 0 
 244      for element in paragraphs: 
 245          if not element.childNodes and element.previousSibling.tagName != 'text:table-of-content' and element.getAttribute('text:style-name')not in set(pageBreakStyles): 
 246              emptyParagraphs = emptyParagraphs + 1 
 247       
 248      if emptyParagraphs == 0: 
 249          return False 
 250      return emptyParagraphs 
 251   
 252 -def checkDoubleWhitespaces(documentDict): 
 253      '''Checks double spaces. 
 254      Checks if the document has text:s tag. 
 255       
 256      @note: XML example: 
 257       
 258      <text:s text:c="2"/> --> 3 spaces 
 259       
 260      <text:s/> --> 2 spaces 
 261       
 262      @return: The amount of the double spaces. 
 263      ''' 
 264      doubleSpaces = documentDict['content.xml'].getElementsByTagName('text:s') 
 265       
 266      if len(doubleSpaces) == 0: 
 267          return False 
 268       
 269      return len(doubleSpaces)     
 270   
 271 -def checkTabs(documentDict): 
 272      '''Checks tabulators from the document. 
 273      getDocumentPararaphs method gets the all paragraphs to be checked for. 
 274       
 275      @return: The number of the tabulators if found, otherwise returns False. 
 276      ''' 
 277      paragraphs = _getDocumentParagraphs(documentDict) 
 278      tabs = 0 
 279      for element in paragraphs: 
 280          elementListLength = len(element.getElementsByTagName('text:tab')) 
 281          if  elementListLength != 0: 
 282              tabs = tabs + elementListLength 
 283      if tabs == 0: 
 284          return False 
 285   
 286      return tabs 
 287   
 288 -def checkAsterisk(documentDict): 
 289      '''Checks asterisk from the document. 
 290      getDocumentPararaphs method gets all paragraphs to check for. 
 291       
 292      @return: The number of the asterisks if found, otherwise returns False. 
 293      ''' 
 294      paragraphs = _getDocumentParagraphs(documentDict) 
 295      asterisk = 0 
 296      for element in paragraphs: 
 297          if common_methods.checkStringFromContent(element, '*') is True: 
 298              asterisk += 1 
 299      if asterisk == 0: 
 300          return False 
 301      return asterisk 
 302   
 303 -def _getDocumentParagraphs(documentDict): 
 304      '''Gets all the paragraphs from the document. 
 305      Including all text-p(text paragraphs) and text-h (headings) elements. 
 306      It is used in checkTabs and checkEmptyParagraphs. 
 307       
 308      @return: The list of the used elements. 
 309      ''' 
 310      #FIXME: must append more paragraphs (like text:list) and deeper (all descendants) 
 311      elementList = [] 
 312      officeTextElement = documentDict['content.xml'].getElementsByTagName ('office:text')[0] 
 313      elements = officeTextElement.childNodes 
 314      for element in elements: 
 315          if element.nodeName == 'text:p' or element.nodeName == 'text:h': 
 316              elementList.append(element) 
 317      return elementList             
 318            
 319 -def _getListOfUsedStyleElements (documentDict): 
 320      '''Gets all used style elements. 
 321      In the file content.xml the element office:body contains the used styles. 
 322       
 323      @return: The element list of the used styles. 
 324      ''' 
 325       
 326      usedStyleElements = []    
 327       
 328      bodyElements = common_methods.getDescendants(documentDict['content.xml'].getElementsByTagName('office:body')[0], []) 
 329       
 330      for element in bodyElements: 
 331          if element.nodeType is not element.TEXT_NODE and element.hasAttribute('text:style-name'): 
 332              if len(common_methods.getTextContent(element)) != 0: 
 333                  usedStyleElements.append(element) 
 334               
 335      return usedStyleElements 
 336   
 337 -def _getListOfUsedStyleNames (documentDict): 
 338      '''Gets all used style names. 
 339      Gets the parent style of PI-style (I is integer value) like P1 is Heading_20_1. 
 340       
 341      @return: The list of all the style names. 
 342      ''' 
 343      usedStyleNames = [] 
 344      for element in _getListOfUsedStyleElements (documentDict): 
 345          styleName = element.getAttribute('text:style-name') 
 346          if styleName [0] == 'P' and styleName[1].isdigit(): 
 347              styleElement = _getStyleElementByStyleName(documentDict, styleName) 
 348              if styleElement.hasAttribute('style:parent-style-name'): # if P-style doesn't have parent-style-name, it's not real style. 
 349                  usedStyleNames.append(styleElement.getAttribute('style:parent-style-name')) 
 350          else: 
 351              usedStyleNames.append(element.getAttribute('text:style-name')) 
 352       
 353      usedStyleNames = list(set(usedStyleNames)) #deletes duplicates from list 
 354      usedStyleNames.sort() 
 355   
 356      return usedStyleNames    
 357       
 358 -def _checkPageBreakStyleElement (styleElement): 
 359      '''Checks if the given style element contains the page break. 
 360       
 361      @return: The style element if contains the page break, otherwise returns False.  
 362      ''' 
 363       
 364      hasParagraphProperties = styleElement.getElementsByTagName('style:paragraph-properties') 
 365      if hasParagraphProperties: 
 366          if hasParagraphProperties[0].getAttribute('fo:break-before') == 'page': 
 367              return styleElement 
 368      return False  
 369   
 370 -def _getPageBreakStyleNames(documentDict): 
 371      '''Gets the names of the styles which contains the page break. 
 372       
 373      @return: The list of page break style names.     
 374      ''' 
 375      pageBreakStylesNames = [] 
 376      styleElements = documentDict['content.xml'].getElementsByTagName('style:style') 
 377      styleElements += documentDict['styles.xml'].getElementsByTagName('style:style') 
 378      for element in styleElements: 
 379          pageBreak = _checkPageBreakStyleElement(element) 
 380          if pageBreak is not False: 
 381              pageBreakStylesNames.append(element.getAttribute('style:name')) 
 382         
 383      return pageBreakStylesNames 
 384   
 385 -def _getAllStyleNamesWithDifferentMasterPage(documentDict): 
 386      '''Gets all the style names which changes the master page. 
 387      The master page will change when a style has master-page-name attribute 
 388      and its is nonempty. If is empty ("") then master page is standard and 
 389      if has no attribute with same as previous master page. 
 390       
 391      @note: masterPageDict: contains a key as a style name and value as a master page name. 
 392       
 393      @return: The dictionary of the styles which changes master page.     
 394      ''' 
 395      styleElements = documentDict['content.xml'].getElementsByTagName('style:style') 
 396      styleElements += documentDict['styles.xml'].getElementsByTagName('style:style') 
 397       
 398      masterPageDict = {} 
 399       
 400      for element in styleElements: 
 401          if element.getAttribute('style:master-page-name') != '': 
 402              masterPageDict[element.getAttribute('style:name')] = element.getAttribute('style:master-page-name') 
 403      return masterPageDict 
 404       
 405 -def _getSectionBreakElements(documentDict): 
 406      '''Gets section break elements from the document.  
 407      Finds all the elements (including text, list, heading...) which chance the section.  
 408       
 409      @return: The list of the elements which changes the section.   
 410      ''' 
 411      usedStyleElementsInDocument = _getListOfUsedStyleElements(documentDict) 
 412      masterPagesDict = _getAllStyleNamesWithDifferentMasterPage(documentDict) 
 413      sectionBreakElements = [] 
 414      for element in usedStyleElementsInDocument: 
 415          if masterPagesDict.has_key(element.getAttribute('text:style-name')): 
 416              sectionBreakElements.append(element) 
 417      return sectionBreakElements         
 418   
 419 -def _getTOC(documentDict): 
 420      '''Gets table of content. 
 421      Each TOC entry is own entry in tocList. 
 422       
 423      @return: The list of the elements in table of content.      
 424      ''' 
 425      tocList = [] 
 426      if checkTOC(documentDict) is True: 
 427          toc = documentDict['content.xml'].getElementsByTagName('text:table-of-content') 
 428          tocTextElements = toc[0].getElementsByTagName ('text:p') 
 429          for element in tocTextElements: 
 430              if element.parentNode.nodeName == 'text:index-title': 
 431                  tocTitle = common_methods.getTextContent(element) 
 432              else: 
 433                  tocList.append(common_methods.getTextContent(element)) 
 434          return tocList 
 435      
 436 -def checkTocContent(documentDict): 
 437      '''Compares document headings to the TOC entries.  
 438       
 439      @return: True if all entries matches otherwise returns an error message. 
 440      ''' 
 441      #FIXME: add better error messages (not False) 
 442      tocList = _getTOC(documentDict) 
 443      headingList = _getHeadingList(documentDict)['headings'] 
 444   
 445      if len(tocList) != len(headingList): 
 446          return False#"Number of document headings doesn't match with number of TOC entries " 
 447      i = 0  
 448      while i < len(tocList): 
 449          #if string.find (tocList[i], headingList[i]) == -1: 
 450          if headingList[i] not in tocList[i]:  
 451              return False #headingList[i-1]+" -heading doesn't exist in table of content" 
 452          i += 1 
 453      return True 
 454       
 455       
 456           
 457 -def checkTOC(documentDict): 
 458      '''Checks if the document contains the table of contents. 
 459       
 460      @return: True if there is the table of content, otherwise returns False.  
 461      ''' 
 462      toc = documentDict['content.xml'].getElementsByTagName('text:table-of-content') 
 463      if len(toc) == 0: 
 464          return False 
 465      else: 
 466          return True 
 467       
 468 -def checkIndex(documentDict): 
 469      '''Checks if the document have the alphabetical index. 
 470       
 471      @return: True if the alphabetical index exists otherwise returns False. 
 472      ''' 
 473      aIndex = documentDict['content.xml'].getElementsByTagName('text:alphabetical-index') 
 474      if len(aIndex) == 0: return False     
 475      return True 
 476   
 477 -def _getIndexContentFromDocument (documentDict): 
 478      '''Gets marked alphabetical index entries from the document. 
 479       
 480      @return: The content list of the alphabetical index entries. 
 481      ''' 
 482      indexContentList = [] 
 483      indexMarks = documentDict['content.xml'].getElementsByTagName ('text:alphabetical-index-mark-start') 
 484      for element in indexMarks: 
 485          indexContentList.append (element.nextSibling.nodeValue) 
 486      return indexContentList 
 487   
 488 -def _getIndexContent(documentDict): 
 489      '''Gets the alphabetical index content. 
 490      Each alphabetical index entry is an own entry in the list. 
 491       
 492      @return: The list of the alphabetical index content. 
 493      ''' 
 494      alphabeticalIndexList = []  
 495      if checkIndex(documentDict) is True: 
 496          aIndex = documentDict['content.xml'].getElementsByTagName('text:alphabetical-index')[0] 
 497          aIndexTextElements = aIndex.getElementsByTagName('text:p') 
 498          for element in aIndexTextElements: 
 499              alphabeticalIndexList.append(common_methods.getTextContent(element)) 
 500      return alphabeticalIndexList        
 501   
 502 -def checkIndexContent(documentDict): 
 503      '''Compares the document marked texts to the alphabetical index entries. 
 504        
 505      @return: True if all entries matches otherwise returns an error code. 
 506      ''' 
 507      indexList = _getIndexContent(documentDict) 
 508      markedList = _getIndexContentFromDocument(documentDict) 
 509      if len(indexList) == 0: 
 510          return '3' # errorcode 3 = Index is empty 
 511       
 512      for markedItem in markedList: 
 513          for indexItem in indexList: 
 514              found = False 
 515              #if string.find (indexItem, markedItem) != -1: 
 516              if markedItem in indexItem: 
 517                  found = True 
 518                  break   
 519          if found is False: 
 520              return '4' #errorcode 4 = marked item not found from index 
 521                      
 522      return True 
 523   
 524 -def _getHeadingList(documentDict): 
 525      '''Gets all headings from the document and the used outline level. 
 526      Each heading is an own entry in the list. 
 527       
 528      @return: The dictionary ['headings'] contains a list of headings and ['level'] contains the value of the highest used heading outline level. 
 529      ''' 
 530      headingList = [] 
 531      headingOutlineLevel = 0 
 532      headings = documentDict['content.xml'].getElementsByTagName('text:h') 
 533      for element in headings: 
 534          heading = common_methods.getTextContent(element) 
 535          headingList.append(heading) 
 536          if headingOutlineLevel < element.getAttribute('text:outline-level'): 
 537              headingOutlineLevel = element.getAttribute('text:outline-level') 
 538      return {'headings':headingList, 'level':headingOutlineLevel} 
 539       
 540 -def checkTable (documentDict): 
 541      '''Checks if the document has a table. 
 542       
 543      @return: True if there is a table and False if not. 
 544      ''' 
 545      table = documentDict['content.xml'].getElementsByTagName('table:table') 
 546      if len(table) == 0: 
 547          return True # "There is no table at all." 
 548      else: 
 549          return True 
 550       
 551   
 552 -def _getTableDict (documentDict): 
 553      '''Gets tables in dictionary. 
 554      Every table is own entry in tablesDict (key = table1,table2...) 
 555      Every tableDict has table's cell address as key (A1,A2...) and cell value as dictionary's value. 
 556       
 557      @return: The dictionary of the table dictionaries. 
 558      '''  
 559      tablesDict={} 
 560      tableNumber = 0 
 561      if checkTable (documentDict) is True: 
 562          tableElements = documentDict['content.xml'].getElementsByTagName('table:table') 
 563          for tableElement in tableElements: 
 564              tableDict = {} 
 565               
 566              tableRowElements = tableElement.getElementsByTagName('table:table-row') 
 567              rowIndex = 0 
 568              for row in tableRowElements: 
 569                  rowIndex += 1 
 570                  rowCellElements = row.getElementsByTagName('table:table-cell') 
 571                  columnIndex = ord('A') - 1 
 572                  for cell in rowCellElements: 
 573                      columnIndex += 1 
 574                      index = chr(columnIndex) + str(rowIndex) 
 575                      tableDict[index] = common_methods.getTextContent(cell) 
 576              tableNumber +=1 
 577              tablesDict['table'+str(tableNumber)]=tableDict 
 578          return tablesDict 
 579      return False #"There is no table at all" 
 580                   
 581               
 582   
 583 -def checkPageNumberFromFooterAndHeader(documentDict, masterPageElement, element): 
 584      '''Checks page number format by given element and master page element. 
 585       
 586      @param masterPageElement: the master page element to look for. 
 587      @param element: a footer or a header element.  
 588      
 589      @return: The number format if it exists, otherwise returns False. 
 590       
 591      The number format is optionally in the element (footer or header). If the number format 
 592      is not in the element then the page-layout element defines number format. 
 593      ''' 
 594      pageLayoutElement = _getPageLayoutElement(documentDict, masterPageElement.getAttribute('style:page-layout-name')) 
 595      pageNumberElements = element.getElementsByTagName('text:page-number') 
 596      if pageNumberElements: 
 597          if pageNumberElements[0].hasAttribute('style:num-format'): 
 598              numFormat = pageNumberElements[0].getAttribute('style:num-format') 
 599          else: 
 600              numFormat = pageLayoutElement.getElementsByTagName('style:page-layout-properties')[0].getAttribute('style:num-format') 
 601          return numFormat 
 602       
 603      return False     
 604       
 605 -def getAuthorAndPageNumberFormat(documentDict, masterPageElement):  
 606      '''Gets the author and the number format from the header and the footer. 
 607       
 608      @param masterPageElement: the master page element to look for. 
 609       
 610      @return: The dictionary which contains the author and the page number format. 
 611      '''       
 612   
 613      meta = ooo_meta_inspector.getMeta(documentDict) 
 614      footer = masterPageElement.getElementsByTagName ('style:footer') 
 615      header = masterPageElement.getElementsByTagName ('style:header')   
 616       
 617      authorAndNumberDict = {'headerPageNumber':None, 'headerAuthor': None, 'footerPageNumber': None, 'footerAuthor': None} 
 618           
 619      if footer:   
 620          if checkPageNumberFromFooterAndHeader(documentDict, masterPageElement, footer[0]) is not False: 
 621              authorAndNumberDict['footerPageNumber'] = checkPageNumberFromFooterAndHeader(documentDict, masterPageElement, footer[0]) 
 622          if common_methods.checkStringFromContent(footer[0], meta['dc:creator']): 
 623              authorAndNumberDict['footerAuthor'] = meta['dc:creator'] 
 624   
 625      if header: 
 626          if checkPageNumberFromFooterAndHeader(documentDict, masterPageElement, header[0]) is not False: 
 627              authorAndNumberDict['headerPageNumber'] = checkPageNumberFromFooterAndHeader(documentDict, masterPageElement, header[0]) 
 628               
 629          if common_methods.checkStringFromContent(header[0], meta['dc:creator']): 
 630              authorAndNumberDict['headerAuthor'] = meta['dc:creator'] 
 631   
 632      return authorAndNumberDict        
 633       
 634   
 635 -def checkHeadingNumbering(documentDict, errorIdsAndPositions): 
 636      '''Checks the outline style. 
 637      Level is highest used headings outline level. Normally Heading 1 should be 1 and Heading 2 should be 2. 
 638       
 639      @note: XML example: 
 640       
 641      <text:outline-style style:name="Outline"> 
 642       
 643      <text:outline-level-style text:level="1" style:num-format="1"> 
 644       
 645      <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> 
 646       
 647      <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="0.762cm" fo:text-indent="-0.762cm" fo:margin-left="0.762cm"/> 
 648       
 649      </style:list-level-properties> 
 650       
 651      </text:outline-level-style> 
 652   
 653      <text:outline-level-style text:level="2" style:num-format="1" text:display-levels="2"> 
 654       
 655      <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> 
 656       
 657      <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.016cm" fo:text-indent="-1.016cm" fo:margin-left="1.016cm"/> 
 658       
 659      </style:list-level-properties> 
 660       
 661      </text:outline-level-style> 
 662   
 663      <text:outline-level-style text:level="3" style:num-format=""> 
 664       
 665      <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> 
 666       
 667      <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.27cm" fo:text-indent="-1.27cm" fo:margin-left="1.27cm"/> 
 668       
 669      </style:list-level-properties> 
 670       
 671      </text:outline-level-style>    
 672      
 673      ... 
 674       
 675      </text:outline-style> 
 676       
 677      @return: True if ok, False if not. 
 678      ''' 
 679       
 680      outlineStyleElements = documentDict['styles.xml'].getElementsByTagName('text:outline-style') 
 681       
 682      if len(outlineStyleElements) == 0: 
 683          outlineStyleElement = _getStyleElementByStyleName(documentDict, 'Outline') 
 684          outlineLevels = outlineStyleElement.getElementsByTagName('text:list-level-style-number') 
 685      else: 
 686          outlineStyleElement = outlineStyleElements[0] 
 687          outlineLevels = outlineStyleElement.getElementsByTagName('text:outline-level-style') 
 688       
 689       
 690      level = _getHeadingList(documentDict)['level'] 
 691      index = 0 
 692      for element in outlineLevels:   
 693          index = index + 1 
 694          if index > int(level): 
 695              return True          
 696          if int(element.getAttribute('text:level')) == index and element.getAttribute('style:num-format') != "": 
 697              "ok"#index"-level heading numbering is correctly made" 
 698          else: 
 699              return False#"numbering is not correct on "+ str(level)+ "-level" 
 700   
 701   
 702 -def _getImagePaths(documentDict): 
 703      '''Gets the image paths. 
 704      Checks if the document have an image. 
 705      Images are located in the picture folder. 
 706       
 707      @return: The founded paths of the images in the list, otherwise returnsFalse. 
 708      ''' 
 709      imagePathList = [] 
 710      if checkImages(documentDict) is True: 
 711          imageElements = documentDict['content.xml'].getElementsByTagName ('draw:image') 
 712          for element in imageElements: 
 713              imagePath = element.getAttribute ('xlink:href') 
 714              imagePathList.append(imagePath) 
 715       
 716      if len(imagePathList)==0: 
 717          return False    
 718      return imagePathList   
 719   
 720           
 721 -def checkImages(documentDict): 
 722      '''Checks if the document contains an image. 
 723       
 724      @return: True if there is an image, otherwise False.  
 725      ''' 
 726      imageElements = documentDict['content.xml'].getElementsByTagName ('draw:image') 
 727      if len(imageElements) == 0: 
 728          return False 
 729      else: 
 730          return True 
 731       
 732 -def checkList(documentDict): 
 733      '''Checks if the document contains a list. 
 734       
 735      @return: True if there is a list, otherwise False.  
 736      ''' 
 737      listElements = documentDict['content.xml'].getElementsByTagName ('text:list') 
 738      if len(listElements) == 0: 
 739          return False 
 740       
 741      return True 
 742   
 743   
 744 -def printLists(documentDict): 
 745      '''Prints the lists of the document. 
 746       
 747      @todo: getListContent 
 748      ''' 
 749      if checkList(documentDict) is True: 
 750          listElements = documentDict['content.xml'].getElementsByTagName ('text:list') 
 751          for element in listElements: 
 752              print 'Lista tehty', element.getAttribute('text:style-name'), '-tyylillä, lista:' 
 753              listContent = element.getElementsByTagName ('text:list-item') 
 754              for text in listContent: 
 755                  print "-", common_methods.getTextContent(text) 
 756       
 757       
 758 -def getObjectPaths(documentDict): 
 759      '''Gets objects paths. 
 760      Searches if the document have an image. 
 761       
 762      @return: The object path list if founds an image, otherwise an error message 
 763      ''' 
 764      objectPathList = [] 
 765      objectElements = documentDict['content.xml'].getElementsByTagName ('draw:object') 
 766      if objectElements: 
 767          for element in objectElements: 
 768              objectPath = element.getAttribute ('xlink:href') 
 769              objectPathList.append(objectPath)   
 770          return objectPathList  
 771      else: 
 772          return False #error: there is no objects at all" 
 773       
 774 -def _isStyleUsed(documentDict, styleName): 
 775      '''Checks if the given style is used in the document. 
 776       
 777      @return: True if used, otherwise return False. 
 778      ''' 
 779      usedStyleList = _getListOfUsedStyleNames (documentDict) 
 780      if styleName in set(usedStyleList): 
 781          return True 
 782      return False 
 783   
 784 -def getStyle(documentDict, styleName):  
 785      '''Get style defination attributes by given style name. 
 786      parentStyleList is for executing the inheritation of styles. 
 787       
 788      @note: Inheritation of the styles: 
 789       
 790      default paragraph -style-> standard-style -> style(Text body) -> P-style -> T-style 
 791       
 792      @note: XML example (styles.xml): 
 793       
 794      <style:style style:name="Standard" style:family="paragraph" style:class="text"> 
 795       
 796      <style:paragraph-properties fo:orphans="2" fo:widows="2" style:writing-mode="lr-tb"/> 
 797       
 798      <style:text-properties style:use-window-font-color="true" style:font-name="Courier New" fo:font-size="10pt" fo:language="fi" fo:country="FI" style:font-name-asian="Times New Roman" style:font-size-asian="10pt" style:font-name-complex="Times New Roman" style:font-size-complex="10pt" style:language-complex="ar" style:country-complex="SA"/> 
 799      
 800      </style:style> 
 801       
 802      <style:style style:name="Text_20_body" style:display-name="Text body" style:family="paragraph" style:parent-style-name="Standard" style:class="text" style:master-page-name=""> 
 803       
 804      <style:paragraph-properties fo:margin-left="1cm" fo:margin-right="0cm" fo:margin-top="0.247cm" fo:margin-bottom="0.247cm" fo:text-indent="0cm" style:auto-text-indent="false" style:page-number="auto" fo:break-before="auto" fo:break-after="auto"/> 
 805       
 806      <style:text-properties style:font-name="Tahoma"/> 
 807       
 808      </style:style 
 809       
 810      @note: XML example (content.xml): 
 811       
 812      <style:style style:name="P2" style:family="paragraph" style:parent-style-name="Text_20_body"> 
 813       
 814      <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/> 
 815       
 816      </style:style> 
 817       
 818      @return: The style defination dictionary. 
 819       
 820      '''        
 821      originalStyleName = styleName 
 822      styleNameLower = styleName.lower() 
 823      #if string.find(styleNameLower, 'body') != -1: 
 824      if 'body' in styleNameLower: 
 825          styleName = 'Text_20_body'  
 826      if _getStyleElementByStyleName(documentDict, styleName) is None: 
 827          if _getStyleElementByDisplayName(documentDict, styleName) is None: 
 828              return False 
 829          else: styleName = _getStyleElementByDisplayName(documentDict, styleName).getAttribute ('style:name') 
 830       
 831      if _isStyleUsed(documentDict, styleName) is False: 
 832          return False 
 833       
 834      defaultStyleElement = _getDefaultStyleElement(documentDict, 'paragraph') 
 835   
 836      fontName = defaultStyleElement.getElementsByTagName('style:text-properties')[0].getAttribute('style:font-name') 
 837      fontSize = defaultStyleElement.getElementsByTagName('style:text-properties')[0].getAttribute('fo:font-size') 
 838      language = defaultStyleElement.getElementsByTagName('style:text-properties')[0].getAttribute('fo:language') 
 839           
 840       
 841      styleAttributeDict = { 
 842      'style:name':styleName, 
 843      'style:parent-style-name':None, 
 844      'fo:language':language, 
 845      'style:font-name':fontName, 
 846      'fo:font-size':fontSize, 
 847      'fo:text-transform':None, 
 848      'fo:text-indent':'0', 
 849      'fo:line-height':'100%', 
 850      'fo:margin-left':0, 
 851      'fo:margin-right':0, 
 852      'fo:margin-top':0, 
 853      'fo:margin-bottom':0, 
 854      'fo:keep-with-next':False, 
 855      'fo:text-align': 'start', 
 856      'fo:orphans':None, 
 857      'fo:widows': None, 
 858      'fo:font-style': False, 
 859      'fo:font-weight':False} 
 860       
 861      translateDict = { 
 862      'style:name': 'styleName', 
 863      'style:font-name':'fontName', 
 864      'fo:font-size':'fontSize', 
 865      'fo:text-transform':'transform', 
 866      'fo:margin-left':'indentLeft', 
 867      'fo:margin-right':'indentRight', 
 868      'fo:text-indent':'indentFirstLine', 
 869      'fo:line-height':'linespacing', 
 870      'fo:margin-top':'spacingBefore', 
 871      'fo:margin-bottom':'spacingAfter', 
 872      'fo:keep-with-next':'keepWithNext', 
 873      'fo:text-align': 'alignment', 
 874      'fo:font-style': 'italic', 
 875      'fo:font-weight':'bold', 
 876      'widowControl':'widowControl' 
 877      } 
 878      styleDict = {} 
 879       
 880      parentStyleList = _getParentStyleList (documentDict, styleName) 
 881       
 882      #Get style attribute values 
 883      while parentStyleList: 
 884          styleAttributeDict['style:name'] = parentStyleList.pop() 
 885          styleAttributeDict = _getStyleAttributes (documentDict, styleAttributeDict) 
 886       
 887      #Conversions:    
 888      styleAttributeDict['fo:margin-top'] = conversions.convertCmOrInToPt(styleAttributeDict['fo:margin-top']) 
 889      styleAttributeDict['fo:margin-bottom'] = conversions.convertCmOrInToPt(styleAttributeDict['fo:margin-bottom']) 
 890      styleAttributeDict['style:name'] = originalStyleName #_getStyleDisplayNameByStyleName(documentDict, styleAttributeDict['style:name']).lower() 
 891      styleAttributeDict['fo:line-height'] = conversions.convertPercentToDecimal(styleAttributeDict['fo:line-height']) 
 892      styleAttributeDict['fo:font-size'] = str(round(float(styleAttributeDict['fo:font-size'].split('pt')[0]), 1)) 
 893      styleAttributeDict['fo:text-indent'] = conversions.convertCmOrInToString(styleAttributeDict['fo:text-indent']) 
 894      styleAttributeDict['fo:margin-left'] = conversions.convertCmOrInToString(styleAttributeDict['fo:margin-left']) 
 895      styleAttributeDict['fo:margin-right'] = conversions.convertCmOrInToString(styleAttributeDict['fo:margin-right']) 
 896      if styleAttributeDict['fo:font-weight'] == 'bold': styleAttributeDict['fo:font-weight'] = True 
 897      elif styleAttributeDict['fo:font-weight'] == 'normal': styleAttributeDict['fo:font-weight'] = False 
 898      if styleAttributeDict['fo:font-style'] == 'italic': styleAttributeDict['fo:font-style'] = True 
 899      elif styleAttributeDict['fo:font-style'] == 'normal': styleAttributeDict['fo:font-style'] = False 
 900      if styleAttributeDict['fo:keep-with-next'] == 'always': styleAttributeDict['fo:keep-with-next'] = True 
 901      if styleAttributeDict['fo:text-align'] == 'justify': styleAttributeDict['fo:text-align'] = 'both' 
 902      elif styleAttributeDict['fo:text-align'] == 'start': styleAttributeDict['fo:text-align'] = 'left' 
 903      elif styleAttributeDict['fo:text-align'] == 'end': styleAttributeDict['fo:text-align'] = 'right' 
 904       
 905       
 906      if styleAttributeDict['fo:widows'] >= 2 and styleAttributeDict['fo:orphans'] >= 2: 
 907          styleAttributeDict['widowControl'] = True #same in docx 
 908      else: 
 909          styleAttributeDict['widowControl'] = False #same in docx 
 910       
 911       
 912      #Translate keys 
 913      for key in translateDict.keys(): 
 914          styleDict[translateDict[key]] = styleAttributeDict[key] 
 915           
 916      return styleDict  
 917   
 918 -def _getStyleAttributes (documentDict, styleAttributeList): 
 919      '''Searches if the style have the wanted attribute if it have then replace attribute value, otherwise keep old value.  
 920      The style attribute list contains all the relevant style information. 
 921       
 922      @return: The list of the styles attributes.   
 923      ''' 
 924      styleElement = _getStyleElementByStyleName(documentDict, styleAttributeList['style:name']) 
 925          
 926      if styleElement: 
 927          paragraphPropertiesElement = styleElement.getElementsByTagName('style:paragraph-properties') 
 928          textPropertiesElement = styleElement.getElementsByTagName('style:text-properties') 
 929       
 930          for attribute in styleAttributeList.keys(): 
 931              if styleElement.hasAttribute (attribute):  
 932                  styleAttributeList[attribute] = styleElement.getAttribute (attribute) 
 933       
 934          if paragraphPropertiesElement:         
 935              for attribute in styleAttributeList.keys():             
 936                  if paragraphPropertiesElement[0].hasAttribute (attribute): 
 937                      styleAttributeList[attribute] = paragraphPropertiesElement[0].getAttribute (attribute) 
 938       
 939          if textPropertiesElement: 
 940              for attribute in styleAttributeList.keys(): 
 941                  if textPropertiesElement[0].hasAttribute (attribute): 
 942                      if attribute == 'fo:font-size' and textPropertiesElement[0].getAttribute(attribute).endswith('%'): 
 943                          styleAttributeList[attribute] = str(int(styleAttributeList[attribute].split('pt')[0]) * int(textPropertiesElement[0].getAttribute(attribute).split('%')[0]) / 100)               
 944                      else: 
 945                          styleAttributeList[attribute] = textPropertiesElement[0].getAttribute(attribute) 
 946           
 947      return styleAttributeList 
 948   
 949       
 950 -def _getParentStyleList (documentDict, styleName): 
 951      '''Gets the parent style list for the given style name. 
 952       
 953      @return: The list of parent styles (lists first entry is style itself). 
 954      ''' 
 955      parentStyleList = [styleName] 
 956          
 957      while _checkParentStyle(documentDict, styleName): 
 958          styleName = _checkParentStyle(documentDict, styleName) 
 959          parentStyleList.append(styleName)     
 960       
 961      return parentStyleList 
 962   
 963   
 964 -def _checkParentStyle (documentDict, styleName): 
 965      '''Checks if the style have a parent style. 
 966       
 967      @return: The parent style name. 
 968      ''' 
 969      try: 
 970          return _getStyleElementByStyleName(documentDict, styleName).getAttribute('style:parent-style-name') 
 971      except: 
 972          return None 
 973     
 974 -def checkEndnotesAndFootnotes(documentDict): 
 975      '''Checks the end- and the footnotes. 
 976       
 977      @return: True if there is endnote or footnote in the document, otherwise False. 
 978      ''' 
 979      noteElements = documentDict['content.xml'].getElementsByTagName('text:note') 
 980      if len(noteElements) == 0: 
 981          return False 
 982  #        for element in noteElements: 
 983  #            if element.getAttribute ('text:note-class')=='endnote': 
 984  #                return True 
 985  #            elif element.getAttribute ('text:note-class')=='footnote': 
 986  #                return True 
 987      return True 
 988   
 989                   
 990 -def checkImageCaptions(documentDict): 
 991      '''Checks the caption and the reference of the image. 
 992       
 993      @return: True if the document images have caption and reference, otherwise False. 
 994      ''' 
 995      caption = False 
 996      reference = False 
 997      imageElements = documentDict['content.xml'].getElementsByTagName ('draw:image') 
 998      if imageElements: 
 999          for element in imageElements: 
1000              captionNode = element.parentNode.parentNode 
1001              if len(common_methods.getTextContent(captionNode)) > 0: 
1002                  caption = True 
1003              imagesReferenceElements = captionNode.getElementsByTagName('text:sequence') 
1004              if imagesReferenceElements: 
1005                  reference = True 
1006                   
1007      if caption is False and reference is False: 
1008          return False 
1009      elif caption is False: 
1010          return False #could replace with caption error message 
1011      elif reference is False: 
1012          return False #could replace with reference error message 
1013      return True 
1014        
1015   
1016 -def checkCoverPage(documentDict): 
1017      '''Checks that the front page is done correctly 
1018       
1019      @return: The cover definitions in a dictionary. 
1020        
1021      @keyword title: True if the title in cover page is the same as in the document meta. 
1022      @keyword name: True if the cover page contains the same author name as in the document meta. 
1023      @keyword email: True if the cover page contains e-mail address. 
1024       
1025      ''' 
1026      elementList = _getSectionElements(documentDict, 'cover') 
1027      if elementList is None: 
1028          return False 
1029      meta = ooo_meta_inspector.getMeta(documentDict) 
1030      cover = { 
1031      'title':False, 
1032      'name':False, 
1033      'email' :False 
1034      } 
1035      for element in elementList:         
1036          if common_methods.checkIfEmailAddress (element): 
1037              cover['email'] = True 
1038          if common_methods.checkStringFromContent(element, meta['dc:creator']): 
1039              cover['name'] = True 
1040          if common_methods.checkStringFromContent(element, meta['dc:title']): 
1041              cover['title'] = True 
1042      return cover 
1043                       
1044   
1045   
1046        
1047 -def getPageNumberFormatAndAuthor (documentDict, section): 
1048      '''Gets the page number format and the author name from the document. 
1049       
1050      @param section: can have a value 'cover', 'toc' or 'text'. 
1051       
1052      @return: The dictionary which contains the author and the page number information. 
1053      ''' 
1054      sectionList = _getSectionBreakElements(documentDict) 
1055      sectionBrakeElements = {'cover':sectionList[0], 'toc':sectionList[1], 'text':sectionList[2]} 
1056       
1057      styleName = sectionBrakeElements[section].getAttribute('text:style-name') 
1058   
1059      styleElement = _getStyleElementByStyleName(documentDict, styleName) 
1060       
1061      masterPageStyleName = _getMasterPageStyleName(documentDict, styleElement) 
1062      masterPageStyleElement = _getMasterPageStyleElement (documentDict, masterPageStyleName) 
1063      authorAndNumberDict = getAuthorAndPageNumberFormat(documentDict, masterPageStyleElement) 
1064   
1065       
1066      authorAndNumberDict['numStart'] = styleElement.getElementsByTagName('style:paragraph-properties')[0].getAttribute('style:page-number')  
1067      return authorAndNumberDict  
1068   
1069   
1070 -def checkHeadersAndFooters(documentDict): 
1071      '''Checks that the headers and the footers of the document are made correctly. 
1072       
1073      Assumes that the document has three sections: 
1074          1. the cover section, 
1075          2. the table of contents section or the toc section and 
1076          3. the actual content section or the text section. 
1077   
1078      @see: checkSections method must pass in order to run this method 
1079       
1080   
1081      Places findings in the headerAndFooterDict as key-boolean pairs: 
1082       
1083        - 'frontPage' was there headers or footers in the cover section. 
1084       
1085        - 'tocPageNumbering' is there a page numbering in the toc section. 
1086       
1087        - 'differentPageNumbering' is the page numbering different in the cover and text sections. 
1088       
1089        - 'nameInToc' is the last modifiers name in toc section header or footer. 
1090       
1091        - 'nameInText' is the last modifiers name in text section header or footer. 
1092       
1093        - 'pageNumbering' is there a page numbering in the text section. 
1094       
1095        - 'tocNumStart' does the toc section page numbering start at 1. 
1096       
1097        - 'textNumStart' does the text section page numbering start at 1. 
1098      ''' 
1099      headerAndFooterDict = {'frontPage': False, 'tocPageNumbering': False, 'differentPageNumbering': False, 
1100              'nameInToc': False, 'nameInText': False, 'pageNumbering': False, 'tocNumStart': False, 
1101              'textNumStart': False} 
1102       
1103      cover = getPageNumberFormatAndAuthor (documentDict, 'cover') 
1104      toc = getPageNumberFormatAndAuthor (documentDict, 'toc') 
1105      text = getPageNumberFormatAndAuthor (documentDict, 'text') 
1106       
1107      if cover['footerAuthor'] or cover['headerAuthor'] or cover['headerPageNumber'] or cover['footerPageNumber'] is not None: 
1108          headerAndFooterDict ['frontPage'] = True 
1109      else: headerAndFooterDict ['frontPage'] = False 
1110           
1111      if toc['footerAuthor'] or toc['headerAuthor'] is not None: 
1112          headerAndFooterDict ['nameInToc'] = True   
1113      if toc['headerPageNumber'] or toc['footerPageNumber'] is not None:  
1114          headerAndFooterDict['tocPageNumbering'] = True 
1115           
1116      if text['footerAuthor'] or text['headerAuthor'] is not None: 
1117          headerAndFooterDict ['nameInText'] = True  
1118      if text['headerPageNumber'] or text['footerPageNumber'] is not None: 
1119          headerAndFooterDict['pageNumbering'] = True 
1120       
1121      #TODO: tarkastus myös headerista (mitä jos on useita sivunumeroita???) 
1122      if toc['footerPageNumber'] != text['footerPageNumber']: 
1123          headerAndFooterDict['differentPageNumbering'] = True 
1124       
1125      if toc['numStart'] == '1': 
1126          headerAndFooterDict ['tocNumStart'] = True 
1127           
1128      if text['numStart'] == '1': 
1129          headerAndFooterDict ['textNumStart'] = True 
1130      return headerAndFooterDict 
1131   
1132   
1133 -def _getSectionElements (documentDict, section): 
1134      '''Gets the elements of the wanted section. 
1135      The section break elements changes the section. 
1136      Searches trough the whole document.  
1137      Adds each element to right section in sectionElements dictionary. 
1138      When finds section break element then changes the dictionary to next section. 
1139      First list elements to cover-section. 
1140      Second list elements to toc-section. 
1141      And last list element to text-section. 
1142      Document have to have atleast 3 sections. 
1143       
1144      @return: The section elements in the list. 
1145      ''' 
1146      sectionBreakElementList = _getSectionBreakElements(documentDict) 
1147      sectionElements = {'cover':None, 'toc':None, 'text':None} 
1148      sectionList = [] 
1149      officeBodyElement = documentDict['content.xml'].getElementsByTagName ('office:body')[0] #always exact 1 element 
1150      documentElementList = officeBodyElement.firstChild.childNodes 
1151      i = 0 
1152      k = len(documentElementList) 
1153      if len(sectionBreakElementList) < 3: 
1154          return None 
1155      while not documentElementList[i].isSameNode(sectionBreakElementList[1]): 
1156          sectionList.append(documentElementList[i]) 
1157          i += 1 
1158      sectionElements ['cover'] = sectionList  
1159      sectionList = [] 
1160      while not documentElementList[i].isSameNode(sectionBreakElementList[2]): 
1161          sectionList.append(documentElementList[i]) 
1162          i += 1 
1163      sectionElements ['toc'] = sectionList 
1164      sectionList = [] 
1165      while i < k: 
1166          sectionList.append(documentElementList[i]) 
1167          i += 1 
1168      sectionElements ['text'] = sectionList         
1169       
1170      return sectionElements[section]     
1171   
1172       
1173 -def checkSections(documentDict, errorList): 
1174      '''Checks that the document sections have been made correctly. 
1175      If the amount of the section breaks is not over 3 then return the error message list. 
1176       
1177      @return: True if the sections are ok, return errorList if not ok. 
1178      ''' 
1179      toc = False 
1180      cover = True 
1181      sections = len(_getSectionBreakElements (documentDict)) 
1182      if sections < 3: 
1183          return False 
1184      coverElements = _getSectionElements(documentDict, 'cover') 
1185      for coverElement in coverElements: 
1186          if coverElement.nodeName == 'text:table-of-content': 
1187              cover = False 
1188              errorList.append('cover') 
1189               
1190      tocElements = _getSectionElements(documentDict, 'toc') 
1191      for tocElement in tocElements: 
1192          if tocElement.nodeName == 'text:table-of-content': 
1193              toc = True 
1194              break 
1195      if toc is False:    
1196          errorList.append('toc') 
1197           
1198      if toc and cover is True: 
1199          return True 
1200      else: 
1201          return errorList 
1202       
1203   
1204 -def _getMeta(documentDict): 
1205      '''Gets all the meta information. 
1206       
1207      @see: ooo_meta_inspector.getMeta 
1208       
1209      @return: All the meta in the dictionary.     
1210      ''' 
1211      return ooo_meta_inspector.getMeta(documentDict) 
1212   
1213 -def getMetaAuthor (documentDict): 
1214      '''Gets the author, who have last modified the document. 
1215       
1216      @return: The last modified author. 
1217      ''' 
1218       
1219      metaDict = ooo_meta_inspector.getMeta(documentDict) 
1220      return metaDict ['dc:creator'] 
1221   
1222 -def getMetaTitle (documentDict): 
1223      '''Gets document title from the meta information. 
1224        
1225      @return: The title which have defined in meta information. 
1226      ''' 
1227      metaDict = ooo_meta_inspector.getMeta(documentDict) 
1228      return metaDict ['dc:title'] 
1229   
1230 -def getMetaEdited (documentDict): 
1231      '''Gets the last modified date and time from the meta. 
1232       
1233      @return: The last modified date in ISO 8601 standard (yyyy-mm-ddThh:mm:ss) 
1234      ''' 
1235      metaDict = ooo_meta_inspector.getMeta(documentDict) 
1236      return metaDict ['dc:date']     
1237   
1238  #FIXME: koko kappaleeseen käsin tehdyt muutokset tulevat P-tyyleihin (ei T), joten nämä jäävät huomioimatta/ 
1239  #pitäisi tehdä tarkastus, jolla lailliset P-tyylit erotetaan laittomista 
1240 -def checkStyleUsage (documentDict, errorIdsAndPositions): 
1241      '''Goes through all the elements in the document which have used any style. 
1242      Checks that elements are using the correct styles (i.e. not Standard or Default style) and that no manual style definitions are made (like T1). 
1243       
1244      ''' 
1245       
1246      illegalStyles = [] 
1247      for styleElement in _getListOfUsedStyleElements(documentDict): 
1248          styleName = styleElement.getAttribute('text:style-name') 
1249          if styleName[0] == 'T' and styleName[1].isdigit(): 
1250              illegalStyles.append(styleName) 
1251           
1252          elif styleName[0] == 'P' and styleName[1].isdigit(): 
1253              styleElement = _getStyleElementByStyleName(documentDict, styleName) 
1254              for child in styleElement.childNodes: 
1255                  if child.hasAttribute('fo:font-style')or child.hasAttribute('fo:text-align')or child.hasAttribute('fo:font-weight'): 
1256                      illegalStyles.append(styleName) 
1257                       
1258      for element in _getListOfUsedStyleElements(documentDict): 
1259          if element.getAttribute('text:style-name') in set(illegalStyles): 
1260              errorIdsAndPositions['manualChanges'].append(common_methods.getTextContent(element)[:30]) 
1261          elif element.getAttribute('text:style-name') == 'Standard': 
1262              errorIdsAndPositions['styleNotUsed'].append(common_methods.getTextContent(element)[:30]) 
1263