Package src :: Module word_processing
[hide private]
[frames] | no frames]

Source Code for Module src.word_processing

  1  #!/usr/bin/python 
  2  # -*- coding: UTF-8 -*- 
  3  # 
  4  #The MIT License 
  5  # 
  6  #Copyright (c) 2011 
  7  # 
  8  #Permission is hereby granted, free of charge, to any person obtaining a copy 
  9  #of this software and associated documentation files (the "Software"), to deal 
 10  #in the Software without restriction, including without limitation the rights 
 11  #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 
 12  #copies of the Software, and to permit persons to whom the Software is 
 13  #furnished to do so, subject to the following conditions: 
 14  # 
 15  #The above copyright notice and this permission notice shall be included in 
 16  #all copies or substantial portions of the Software. 
 17  # 
 18  #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 19  #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 20  #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 21  #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 22  #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
 23  #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
 24  #THE SOFTWARE. 
 25  # 
 26  #Authors: 
 27  #   Vili Auvinen (vili.k.auvinen@jyu.fi) 
 28  #   Olli Kauppinen (olli.kauppinen@jyu.fi) 
 29  #   Juho Tammela (juho.i.tammela@jyu.fi) 
 30   
 31  ''' 
 32  The module makes the comparisons between the office document properties and requirements specified for each user. 
 33   
 34  @author: Vili Auvinen, Olli Kauppinen, Juho Tammela 
 35   
 36  @todo: Change the name of the module to word_inspector. 
 37  ''' 
 38   
 39  from inspectors import docx_inspector, odt_inspector 
 40  import xml.dom.minidom 
 41   
 42   
 43  inspectorDict = {'odt': odt_inspector, 'docx': docx_inspector, 'docm': docx_inspector} 
 44  # @see: more attributes before inspect 
 45  #checkers = { 'TOC': word_processing.checkToc } 
 46   
47 -def makeDocumentDict(documentFile, fileExtension):
48 '''Creates a document dict which contains the XML files from a document file by the given fileExtension. 49 50 @note: Code example: 51 docXml = document['word/document.xml'] 52 53 @see: _checkers 54 55 @param fileExtension: can be docx or odt or odp etc. 56 57 @return: The document dictionary. 58 59 60 ''' 61 62 document = {'fileExtension': fileExtension} 63 64 for data in documentFile.filelist: 65 try: 66 document[data.filename] = xml.dom.minidom.parseString(documentFile.read(data.filename)) 67 except: 68 document[data.filename] = documentFile.read(data.filename) 69 return document
70
71 -def processRequirements(inspector, document, requirements, results):
72 '''Processes requirements by looping through checkers dict which contains the method names described in the 73 XML requirement file. 74 75 @param inspector: defines the given inspector. 76 @param document: defines document dictionary which contains the XML files. 77 @param requirements: defines Requirements object which contains the given requirements in the XML requirement file. 78 @param results: defines the given errors in the results dictionary. 79 ''' 80 for requirement in requirements.requirements: 81 _checkers[requirement.name](inspector, requirement, document, results)
82
83 -def printResults(resultsDict):
84 ''' For testing. ''' 85 print "\nERRORS:" 86 for feed in resultsDict.keys(): 87 print feed.upper(), 88 if resultsDict[feed]: 89 print '' 90 for i in resultsDict[feed]: 91 print " -", i 92 else: 93 print '...OK'
94
95 -def checkBooleanRequirement(function, requirement, document, results):
96 ''' Checks the boolean requirements. The inspector methods return a boolean. 97 98 @see: processRequirements for parameters. 99 100 @return: True if expectedValue is returned from the inspector method, False otherwise. 101 ''' 102 expectedValue = requirement.expectedValue 103 104 if str(function(document)) == str(expectedValue): 105 return True 106 else: 107 results[requirement.category].append(requirement.getErrorMessage()) 108 return False
109
110 -def checkDictRequirement(function, requirement, document, results):
111 ''' Checks the dictionary requirement. The inspector methods return a dictionary of which the values are compared to the ones in 112 XML requirement file. 113 114 @see: processRequirements for parameters. 115 116 ''' 117 #FIXME: requirement's expectedValue is always a string and not the same type as inspector return value. 118 # Handle the types!! 119 inspectorData = function(document) 120 if inspectorData is False: 121 results[requirement.category].append(requirement.getErrorMessage()) 122 return 123 124 for key in requirement.expectedValue.keys(): 125 if str(inspectorData[key]) != str(requirement.expectedValue[key]): 126 results[requirement.category].append(requirement.getErrorMessage(key))
127 #elif function(document) is not 'True': 128 # results[requirement.category].append(requirement.getErrorMessage()) 129
130 -def checkRequirementEndNoteFootNote(inspector, requirement, document, results):
131 # if checkBooleanRequirement(inspector.checkEndnotesAndFootnotes, requirement, document, results) is False: 132 # results[requirement.category].append(requirement.getErrorMessage()) 133 checkBooleanRequirement(inspector.checkEndnotesAndFootnotes, requirement, document, results)
134
135 -def checkRequirementSections(inspector, requirement, document, results):
136 ''' Checks the requirement sections. The inspector method takes an empty list as an argument. If its length is not zero 137 after the inspector method, it means that errors were founded. 138 139 @see: processRequirements for parameters. 140 141 @return: out from the method if inspectorData return False. 142 ''' 143 144 errorList = [] 145 inspectorData = inspector.checkSections(document, errorList) 146 147 if inspectorData is False and len(errorList) != 0: 148 for element in errorList: 149 results[requirement.category].append(requirement.getErrorMessage(element)) 150 else: 151 results[requirement.category].append(requirement.getErrorMessage()) 152 153 return # can not continue checking if sections are wrong 154 elif inspectorData is not True and len(errorList) != 0: # if a list is returned 155 # this if could be useless??? 156 for element in errorList: 157 results[requirement.category].append(requirement.getErrorMessage(element)) 158 return
159 160 161 #if inspectorData is True: 162 # check different page numbering etc. 163 164 #FIXME: not implemented 165 # checkDictRequirement(inspector.checkSections, requirement, document, results) 166
167 -def checkRequirementMargins(inspector, requirement, document, results):
168 ''' Checks the requirement margins by calling the checkDictRequirement. 169 170 @see: processRequirements for parameters and checkDictRequirement for the actual method. 171 ''' 172 checkDictRequirement(inspector.getPageMarginals, requirement, document, results)
173
174 -def checkRequirementPageSize(inspector, requirement, document, results):
175 checkDictRequirement(inspector.getPageSize, requirement, document, results)
176
177 -def checkRequirementCoverPage(inspector, requirement, document, results):
178 checkDictRequirement(inspector.checkCoverPage, requirement, document, results)
179
180 -def checkRequirementHeadingNumbering(inspector, requirement, document, results):
181 ''' Checks the heading numbering requirement. 182 183 Error ids and positions are defined in a dict which is then used in the method 184 inspector.checkHeadingNumbering(document, errorIdsAndPositions). If the keys in 185 the dict are not None, errors have been appended. 186 187 @see: docx_inspector.checkHeadingNumbering(document, errorIdsAndPositions). 188 @see: processRequirements for parameters. 189 ''' 190 191 expectedValue = requirement.expectedValue 192 193 errorIdsAndPositions = { 'manualNumbering': None, 194 'styleNotUsed': None, 195 'differentNumbering': None, 196 'notMultilevel': None, 197 'outlineLvl': None, 198 'numStart': None, 199 'numWrong': None, 200 'numFormat': None, 201 'notSequential': None} # @see requirements.xml - <errorvalue> 202 203 returnValue = inspector.checkHeadingNumbering(document, errorIdsAndPositions) 204 205 if str(returnValue) != str(expectedValue): 206 results[requirement.category].append(requirement.getErrorMessage()) #DEFAULT 207 208 if len(errorIdsAndPositions.values()) != 0: 209 for key in errorIdsAndPositions.keys(): 210 if errorIdsAndPositions[key] is not None: 211 results[requirement.category].append(requirement.getErrorMessage(key) + ' ' + errorIdsAndPositions[key])
212 # esim. - Seuraavan otsikkotyylin monitasoinen numerointi on väärin: Otsikko2 213
214 -def checkRequirementStyles(inspector, requirement, document, results):
215 ''' Checks the style requirements. Compares the style requirements described in the XML file to the document properties defined 216 by the user. Appends a default error if inspector.getStyle returns False. 217 218 @return: Nothing if inspector.getStyle returns False. 219 220 @see: processRequirements for parameters. 221 ''' 222 223 inspectorData = inspector.getStyle(document, requirement.expectedValue['styleName']) 224 225 if inspectorData is False: 226 results[requirement.category].append(requirement.getErrorMessage()) 227 return 228 229 styleName = requirement.expectedValue['styleName'] 230 styleFeedback = {styleName : []} 231 232 for key in requirement.expectedValue.keys(): 233 if str(inspectorData[key]) != str(requirement.expectedValue[key]): 234 styleFeedback[styleName].append(requirement.getErrorMessage(key)) 235 236 if len(styleFeedback[styleName]) > 0: 237 results[requirement.category].append(styleFeedback)
238
239 -def checkRequirementTOC(inspector, requirement, document, results):
240 ''' Checks if the table of the contents exists. If it does not exist, appends a default error message. 241 242 If it exists, checks if the table of contents is correctly made. If not, append an error message. 243 244 @see: processRequirements for parameters. 245 ''' 246 247 if inspector.checkTOC(document) is True: 248 if inspector.checkTocContent(document) is False: 249 #FIXME: is there a better way to get the error message than hard code? 250 results[requirement.category].append(requirement.getErrorMessage('2')) 251 else: 252 results[requirement.category].append(requirement.getErrorMessage())
253
254 -def checkRequirementImages(inspector, requirement, document, results):
255 ''' Checks if there are images in the document. Calls the checkBooleanRequirement function. 256 257 @see: checkBooleanRequirement(function, requirement, document, results). 258 ''' 259 260 checkBooleanRequirement(inspector.checkImages, requirement, document, results)
261
262 -def checkRequirementEmptyParagraphs(inspector, requirement, document, results):
263 ''' Checks empty paragraphs from the document. Appends an error message if there are some to be found. 264 265 @see: processRequirements for parameters. 266 ''' 267 268 expectedValue = requirement.expectedValue 269 inspectorData = inspector.checkEmptyParagraphs(document) 270 271 if str(inspectorData) == str(expectedValue): 272 return 273 else: 274 results[requirement.category].append(requirement.getErrorMessage() + str(inspectorData))
275
276 -def checkRequirementList(inspector, requirement, document, results):
277 ''' Checks if there are lists in the document. Calls the checkBooleanRequirement function. 278 279 @see: processRequirements for parameters. 280 ''' 281 282 checkBooleanRequirement(inspector.checkList, requirement, document, results)
283
284 -def checkRequirementStyleUsage(inspector, requirement, document, results):
285 ''' Checks the style usage. An error dict (below) with two key-value pairs is used in the method inspector.checkStyleUsage. 286 The inspector method returns the text paragraphs where manual changes have been made or style has not been used at all. 287 288 errorIdsAndPositions = {'styleNotUsed': [], 'manualChanges': []} 289 290 @see: processRequirements for parameters. 291 292 ''' 293 errorIdsAndPositions = {'styleNotUsed': [], 'manualChanges': []} 294 # @see: requirements.xml - <errorvalue> 295 296 297 result = inspector.checkStyleUsage(document, errorIdsAndPositions) 298 styleNotUsedFeedback = {requirement.getErrorMessage('styleNotUsed') : []} 299 manualChangesFeedback = {requirement.getErrorMessage('manualChanges') : []} 300 if str(result) != str(requirement.expectedValue): 301 302 # for key in errorIdsAndPositions.keys(): 303 # styleUsageFeedback[requirement.getErrorMessage(key)] = [] 304 # for item in errorIdsAndPositions[key]: 305 #results[requirement.category].append(requirement.getErrorMessage(key) + unicode(item)) 306 # styleUsageFeedback[requirement.getErrorMessage(key)].append(unicode(item)) 307 for item in errorIdsAndPositions['styleNotUsed']: 308 styleNotUsedFeedback[requirement.getErrorMessage('styleNotUsed')].append(unicode(item)) 309 for item in errorIdsAndPositions['manualChanges']: 310 manualChangesFeedback[requirement.getErrorMessage('manualChanges')].append(unicode(item)) 311 312 #for key in styleUsageFeedback.keys(): 313 if len (styleNotUsedFeedback[requirement.getErrorMessage('styleNotUsed')]) > 0: 314 results[requirement.category].append(styleNotUsedFeedback) 315 if len (manualChangesFeedback[requirement.getErrorMessage('manualChanges')]) > 0: 316 results[requirement.category].append(manualChangesFeedback)
317
318 -def checkRequirementTabs(inspector, requirement, document, results):
319 ''' Checks if the tabs have been used in the document. Does nothing if tabs are not found, otherwise append an error message. 320 321 @see: processRequirements for parameters. 322 ''' 323 324 expectedValue = requirement.expectedValue 325 inspectorData = inspector.checkTabs(document) 326 327 if str(inspectorData) == str(expectedValue): 328 return 329 else: 330 results[requirement.category].append(requirement.getErrorMessage() + str(inspectorData))
331
332 -def checkRequirementDoubleWhitespace(inspector, requirement, document, results):
333 ''' Checks if double whitespaces are found in the document. Does nothing if double whitespaces are not found, otherwise 334 append an error message. 335 336 @see: processRequirements for parameters 337 ''' 338 339 expectedValue = requirement.expectedValue 340 inspectorData = inspector.checkDoubleWhitespaces(document) 341 342 if str(inspectorData) == str(expectedValue): 343 return 344 else: 345 results[requirement.category].append(requirement.getErrorMessage() + str(inspectorData))
346
347 -def checkRequirementAsterisk(inspector, requirement, document, results):
348 ''' Checks if asterisks are found in the document. Does nothing if asterisks are not found, otherwise append an error. 349 A special print formatting is used here. Could be useful in other methods as well. 350 351 @see: processRequirements for parameters. 352 ''' 353 354 expectedValue = requirement.expectedValue 355 inspectorData = inspector.checkAsterisk(document) 356 357 if str(inspectorData) == str(expectedValue): 358 return 359 else: 360 results[requirement.category].append(requirement.getErrorMessage() % str(inspectorData))
361 # % is used for print formatting 362
363 -def checkRequirementImageCaptions(inspector, requirement, document, results):
364 ''' Checks if image captions are used. 365 366 @see: checkBooleanRequirement 367 @see: processRequirements for parameters. 368 ''' 369 370 checkBooleanRequirement(inspector.checkImageCaptions, requirement, document, results)
371
372 -def checkRequirementHeadersAndFooters(inspector, requirement, document, results):
373 ''' Checks the headers and the footers requirement. 374 375 The method can be run only if checkRequirementSections goes through. The function is hard to implement in a smart way. 376 377 @see: checkSections 378 @see: processRequirements for parameters. 379 380 ''' 381 errorList = [] 382 inspectorSections = inspector.checkSections(document, errorList) 383 if inspectorSections is False: 384 results[requirement.category].append(requirement.getErrorMessage()) 385 return 386 387 elif inspectorSections is not True and len(inspectorSections) != 0: # jos palauttaa listan 388 #------------------------------------- for element in inspectorSections: 389 #results[requirement.category].append(requirement.getErrorMessage()) 390 return 391 392 inspectorData = inspector.checkHeadersAndFooters(document) 393 394 for key in requirement.expectedValue.keys(): 395 if str(inspectorData[key]) == str(requirement.expectedValue[key]): 396 continue 397 else: results[requirement.category].append(requirement.getErrorMessage(key))
398
399 -def checkRequirementIndex(inspector, requirement, document, results):
400 ''' Checks if index is found in the document. If index is correctly made, checks the index content. 401 402 @see: processRequirements for parameters. 403 404 ''' 405 406 #FIXME: error codes are hard coded in the inspector and in the requirements XML file. 407 result = inspector.checkIndex(document) 408 if str(result) != str(requirement.expectedValue): 409 results[requirement.category].append(requirement.getErrorMessage(result)) 410 return 411 412 result = inspector.checkIndexContent(document) 413 if str(result) != str(requirement.expectedValue): 414 results[requirement.category].append(requirement.getErrorMessage(result))
415 416 # This module processes requirements by looping through checkers dict (below) which contains the method 417 # names described in the XML requirement file. 418 _checkers = { 'Styles': checkRequirementStyles, 419 420 'TOC': checkRequirementTOC, 421 422 'Image' : checkRequirementImages, 423 424 'CoverPage': checkRequirementCoverPage, 425 426 'PageSize' : checkRequirementPageSize, 427 428 'Margins' : checkRequirementMargins, 429 430 'EndNoteFootNote' : checkRequirementEndNoteFootNote, 431 432 'HeadingNumbering': checkRequirementHeadingNumbering, 433 434 'EmptyParagraphs' : checkRequirementEmptyParagraphs, 435 436 'CheckSections': checkRequirementSections, 437 438 'List' : checkRequirementList, 439 440 'StyleUsage' : checkRequirementStyleUsage, 441 442 'Tabs': checkRequirementTabs, 443 444 'DoubleWhitespaces': checkRequirementDoubleWhitespace, 445 446 'ImageCaptions' : checkRequirementImageCaptions, 447 448 'Asterisk': checkRequirementAsterisk, 449 450 'Index': checkRequirementIndex, 451 452 'CheckHeadersAndFooters': checkRequirementHeadersAndFooters 453 } 454
455 -def inspect(documentFile, requirements, fileExtension):
456 ''' Inspects a document by the given file extension which is either odt or docx. 457 458 @param documentFile: docx or odt file. 459 @param requirements: the requirements specified in the XML requirement file. 460 @param fileExtension: docx or odt file. 461 ''' 462 463 results = {'common':[], 'structure':[], 'content':[], 'styles':[], 'numbers':[], 'formatting':[], 'objects':[]} 464 document = makeDocumentDict(documentFile, fileExtension) 465 inspector = inspectorDict[document['fileExtension']] 466 467 #FIXME: hard coded for gui testing: 468 #if requirements == "": 469 # reqFilename = urlopen('http://sovellusprojektit.it.jyu.fi/parsi/sovellus/sampleFiles/xml/requirements_test1.xml') 470 471 # reqDom = xml.dom.minidom.parse(reqFilename) 472 # requirements = Requirements(reqDom) 473 474 processRequirements(inspector, document, requirements, results) 475 476 return results
477 478 if __name__ == '__main__': 479 ''' For testing. ''' 480 # inspect("foo", "vaatimukset.xml", "foobar") 481 482 #filename = 'sampleFiles/docx/sivunumerointi_oikein_hdr_ftr.docx' 483 #filename = 'sampleFiles/docx/tekstinkasittely.docx' 484 #filename = 'sampleFiles/docx/teemakokeilu.docx' 485 #filename = 'sampleFiles/docx/docx_demo.docx' 486 #filename = 'sampleFiles/docx/otsikko_numerointi_ei_otsikko1.docx' 487 #filename = 'sampleFiles/docx/docx_demo_ascii.docx' 488 #filename = 'sampleFiles/docx/otsikko2ja4_numerointi.docx' 489 #filename ='sampleFiles/odt/Yliopisto_opiskelu.odt' 490 #filename ='sampleFiles/odt/toc_handmade.odt' 491 492 #filename = 'sampleFiles/docx/index_lorem1.docx' 493 #filename = 'sampleFiles/docx/index_lorem2.docx' 494 #filename = 'sampleFiles/docx/hakemisto_kasin.docx' 495 #filename = 'sampleFiles/docx/index_lorem2_whitespace_asterix.docx' 496 filename = 'sampleFiles/docx/Teija_Holtta.docx' # asetus19.xml, see below 497 #filename = 'sampleFiles/docx/Harjoitustyo_elina_asikainen.docx' 498 #filename = 'sampleFiles/docx/HarjoitustyoD.docx' 499 #filename = 'sampleFiles/docx/Harjoitustyo.docx' 500 501 reqFilename = 'sampleFiles/xml/requirements_test1.xml' 502 #zipFile = zipfile.ZipFile(filename) 503 printResults(inspect(filename, reqFilename, "docx")) 504