1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 '''
32 The module makes the comparisons between the office document properties and requirements specified for each user.
33
34 @author: Vili Auvinen, Olli Kauppinen, Juho Tammela
35
36 @todo: Change the name of the module to word_inspector.
37 '''
38
39 from inspectors import docx_inspector, odt_inspector
40 import xml.dom.minidom
41
42
43 inspectorDict = {'odt': odt_inspector, 'docx': docx_inspector, 'docm': docx_inspector}
44
45
46
48 '''Creates a document dict which contains the XML files from a document file by the given fileExtension.
49
50 @note: Code example:
51 docXml = document['word/document.xml']
52
53 @see: _checkers
54
55 @param fileExtension: can be docx or odt or odp etc.
56
57 @return: The document dictionary.
58
59
60 '''
61
62 document = {'fileExtension': fileExtension}
63
64 for data in documentFile.filelist:
65 try:
66 document[data.filename] = xml.dom.minidom.parseString(documentFile.read(data.filename))
67 except:
68 document[data.filename] = documentFile.read(data.filename)
69 return document
70
72 '''Processes requirements by looping through checkers dict which contains the method names described in the
73 XML requirement file.
74
75 @param inspector: defines the given inspector.
76 @param document: defines document dictionary which contains the XML files.
77 @param requirements: defines Requirements object which contains the given requirements in the XML requirement file.
78 @param results: defines the given errors in the results dictionary.
79 '''
80 for requirement in requirements.requirements:
81 _checkers[requirement.name](inspector, requirement, document, results)
82
84 ''' For testing. '''
85 print "\nERRORS:"
86 for feed in resultsDict.keys():
87 print feed.upper(),
88 if resultsDict[feed]:
89 print ''
90 for i in resultsDict[feed]:
91 print " -", i
92 else:
93 print '...OK'
94
96 ''' Checks the boolean requirements. The inspector methods return a boolean.
97
98 @see: processRequirements for parameters.
99
100 @return: True if expectedValue is returned from the inspector method, False otherwise.
101 '''
102 expectedValue = requirement.expectedValue
103
104 if str(function(document)) == str(expectedValue):
105 return True
106 else:
107 results[requirement.category].append(requirement.getErrorMessage())
108 return False
109
111 ''' Checks the dictionary requirement. The inspector methods return a dictionary of which the values are compared to the ones in
112 XML requirement file.
113
114 @see: processRequirements for parameters.
115
116 '''
117
118
119 inspectorData = function(document)
120 if inspectorData is False:
121 results[requirement.category].append(requirement.getErrorMessage())
122 return
123
124 for key in requirement.expectedValue.keys():
125 if str(inspectorData[key]) != str(requirement.expectedValue[key]):
126 results[requirement.category].append(requirement.getErrorMessage(key))
127
128
129
134
136 ''' Checks the requirement sections. The inspector method takes an empty list as an argument. If its length is not zero
137 after the inspector method, it means that errors were founded.
138
139 @see: processRequirements for parameters.
140
141 @return: out from the method if inspectorData return False.
142 '''
143
144 errorList = []
145 inspectorData = inspector.checkSections(document, errorList)
146
147 if inspectorData is False and len(errorList) != 0:
148 for element in errorList:
149 results[requirement.category].append(requirement.getErrorMessage(element))
150 else:
151 results[requirement.category].append(requirement.getErrorMessage())
152
153 return
154 elif inspectorData is not True and len(errorList) != 0:
155
156 for element in errorList:
157 results[requirement.category].append(requirement.getErrorMessage(element))
158 return
159
160
161
162
163
164
165
166
168 ''' Checks the requirement margins by calling the checkDictRequirement.
169
170 @see: processRequirements for parameters and checkDictRequirement for the actual method.
171 '''
172 checkDictRequirement(inspector.getPageMarginals, requirement, document, results)
173
174 -def checkRequirementPageSize(inspector, requirement, document, results):
175 checkDictRequirement(inspector.getPageSize, requirement, document, results)
176
177 -def checkRequirementCoverPage(inspector, requirement, document, results):
178 checkDictRequirement(inspector.checkCoverPage, requirement, document, results)
179
181 ''' Checks the heading numbering requirement.
182
183 Error ids and positions are defined in a dict which is then used in the method
184 inspector.checkHeadingNumbering(document, errorIdsAndPositions). If the keys in
185 the dict are not None, errors have been appended.
186
187 @see: docx_inspector.checkHeadingNumbering(document, errorIdsAndPositions).
188 @see: processRequirements for parameters.
189 '''
190
191 expectedValue = requirement.expectedValue
192
193 errorIdsAndPositions = { 'manualNumbering': None,
194 'styleNotUsed': None,
195 'differentNumbering': None,
196 'notMultilevel': None,
197 'outlineLvl': None,
198 'numStart': None,
199 'numWrong': None,
200 'numFormat': None,
201 'notSequential': None}
202
203 returnValue = inspector.checkHeadingNumbering(document, errorIdsAndPositions)
204
205 if str(returnValue) != str(expectedValue):
206 results[requirement.category].append(requirement.getErrorMessage())
207
208 if len(errorIdsAndPositions.values()) != 0:
209 for key in errorIdsAndPositions.keys():
210 if errorIdsAndPositions[key] is not None:
211 results[requirement.category].append(requirement.getErrorMessage(key) + ' ' + errorIdsAndPositions[key])
212
213
215 ''' Checks the style requirements. Compares the style requirements described in the XML file to the document properties defined
216 by the user. Appends a default error if inspector.getStyle returns False.
217
218 @return: Nothing if inspector.getStyle returns False.
219
220 @see: processRequirements for parameters.
221 '''
222
223 inspectorData = inspector.getStyle(document, requirement.expectedValue['styleName'])
224
225 if inspectorData is False:
226 results[requirement.category].append(requirement.getErrorMessage())
227 return
228
229 styleName = requirement.expectedValue['styleName']
230 styleFeedback = {styleName : []}
231
232 for key in requirement.expectedValue.keys():
233 if str(inspectorData[key]) != str(requirement.expectedValue[key]):
234 styleFeedback[styleName].append(requirement.getErrorMessage(key))
235
236 if len(styleFeedback[styleName]) > 0:
237 results[requirement.category].append(styleFeedback)
238
240 ''' Checks if the table of the contents exists. If it does not exist, appends a default error message.
241
242 If it exists, checks if the table of contents is correctly made. If not, append an error message.
243
244 @see: processRequirements for parameters.
245 '''
246
247 if inspector.checkTOC(document) is True:
248 if inspector.checkTocContent(document) is False:
249
250 results[requirement.category].append(requirement.getErrorMessage('2'))
251 else:
252 results[requirement.category].append(requirement.getErrorMessage())
253
255 ''' Checks if there are images in the document. Calls the checkBooleanRequirement function.
256
257 @see: checkBooleanRequirement(function, requirement, document, results).
258 '''
259
260 checkBooleanRequirement(inspector.checkImages, requirement, document, results)
261
263 ''' Checks empty paragraphs from the document. Appends an error message if there are some to be found.
264
265 @see: processRequirements for parameters.
266 '''
267
268 expectedValue = requirement.expectedValue
269 inspectorData = inspector.checkEmptyParagraphs(document)
270
271 if str(inspectorData) == str(expectedValue):
272 return
273 else:
274 results[requirement.category].append(requirement.getErrorMessage() + str(inspectorData))
275
277 ''' Checks if there are lists in the document. Calls the checkBooleanRequirement function.
278
279 @see: processRequirements for parameters.
280 '''
281
282 checkBooleanRequirement(inspector.checkList, requirement, document, results)
283
285 ''' Checks the style usage. An error dict (below) with two key-value pairs is used in the method inspector.checkStyleUsage.
286 The inspector method returns the text paragraphs where manual changes have been made or style has not been used at all.
287
288 errorIdsAndPositions = {'styleNotUsed': [], 'manualChanges': []}
289
290 @see: processRequirements for parameters.
291
292 '''
293 errorIdsAndPositions = {'styleNotUsed': [], 'manualChanges': []}
294
295
296
297 result = inspector.checkStyleUsage(document, errorIdsAndPositions)
298 styleNotUsedFeedback = {requirement.getErrorMessage('styleNotUsed') : []}
299 manualChangesFeedback = {requirement.getErrorMessage('manualChanges') : []}
300 if str(result) != str(requirement.expectedValue):
301
302
303
304
305
306
307 for item in errorIdsAndPositions['styleNotUsed']:
308 styleNotUsedFeedback[requirement.getErrorMessage('styleNotUsed')].append(unicode(item))
309 for item in errorIdsAndPositions['manualChanges']:
310 manualChangesFeedback[requirement.getErrorMessage('manualChanges')].append(unicode(item))
311
312
313 if len (styleNotUsedFeedback[requirement.getErrorMessage('styleNotUsed')]) > 0:
314 results[requirement.category].append(styleNotUsedFeedback)
315 if len (manualChangesFeedback[requirement.getErrorMessage('manualChanges')]) > 0:
316 results[requirement.category].append(manualChangesFeedback)
317
319 ''' Checks if the tabs have been used in the document. Does nothing if tabs are not found, otherwise append an error message.
320
321 @see: processRequirements for parameters.
322 '''
323
324 expectedValue = requirement.expectedValue
325 inspectorData = inspector.checkTabs(document)
326
327 if str(inspectorData) == str(expectedValue):
328 return
329 else:
330 results[requirement.category].append(requirement.getErrorMessage() + str(inspectorData))
331
333 ''' Checks if double whitespaces are found in the document. Does nothing if double whitespaces are not found, otherwise
334 append an error message.
335
336 @see: processRequirements for parameters
337 '''
338
339 expectedValue = requirement.expectedValue
340 inspectorData = inspector.checkDoubleWhitespaces(document)
341
342 if str(inspectorData) == str(expectedValue):
343 return
344 else:
345 results[requirement.category].append(requirement.getErrorMessage() + str(inspectorData))
346
348 ''' Checks if asterisks are found in the document. Does nothing if asterisks are not found, otherwise append an error.
349 A special print formatting is used here. Could be useful in other methods as well.
350
351 @see: processRequirements for parameters.
352 '''
353
354 expectedValue = requirement.expectedValue
355 inspectorData = inspector.checkAsterisk(document)
356
357 if str(inspectorData) == str(expectedValue):
358 return
359 else:
360 results[requirement.category].append(requirement.getErrorMessage() % str(inspectorData))
361
362
364 ''' Checks if image captions are used.
365
366 @see: checkBooleanRequirement
367 @see: processRequirements for parameters.
368 '''
369
370 checkBooleanRequirement(inspector.checkImageCaptions, requirement, document, results)
371
398
400 ''' Checks if index is found in the document. If index is correctly made, checks the index content.
401
402 @see: processRequirements for parameters.
403
404 '''
405
406
407 result = inspector.checkIndex(document)
408 if str(result) != str(requirement.expectedValue):
409 results[requirement.category].append(requirement.getErrorMessage(result))
410 return
411
412 result = inspector.checkIndexContent(document)
413 if str(result) != str(requirement.expectedValue):
414 results[requirement.category].append(requirement.getErrorMessage(result))
415
416
417
418 _checkers = { 'Styles': checkRequirementStyles,
419
420 'TOC': checkRequirementTOC,
421
422 'Image' : checkRequirementImages,
423
424 'CoverPage': checkRequirementCoverPage,
425
426 'PageSize' : checkRequirementPageSize,
427
428 'Margins' : checkRequirementMargins,
429
430 'EndNoteFootNote' : checkRequirementEndNoteFootNote,
431
432 'HeadingNumbering': checkRequirementHeadingNumbering,
433
434 'EmptyParagraphs' : checkRequirementEmptyParagraphs,
435
436 'CheckSections': checkRequirementSections,
437
438 'List' : checkRequirementList,
439
440 'StyleUsage' : checkRequirementStyleUsage,
441
442 'Tabs': checkRequirementTabs,
443
444 'DoubleWhitespaces': checkRequirementDoubleWhitespace,
445
446 'ImageCaptions' : checkRequirementImageCaptions,
447
448 'Asterisk': checkRequirementAsterisk,
449
450 'Index': checkRequirementIndex,
451
452 'CheckHeadersAndFooters': checkRequirementHeadersAndFooters
453 }
454
455 -def inspect(documentFile, requirements, fileExtension):
456 ''' Inspects a document by the given file extension which is either odt or docx.
457
458 @param documentFile: docx or odt file.
459 @param requirements: the requirements specified in the XML requirement file.
460 @param fileExtension: docx or odt file.
461 '''
462
463 results = {'common':[], 'structure':[], 'content':[], 'styles':[], 'numbers':[], 'formatting':[], 'objects':[]}
464 document = makeDocumentDict(documentFile, fileExtension)
465 inspector = inspectorDict[document['fileExtension']]
466
467
468
469
470
471
472
473
474 processRequirements(inspector, document, requirements, results)
475
476 return results
477
478 if __name__ == '__main__':
479 ''' For testing. '''
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496 filename = 'sampleFiles/docx/Teija_Holtta.docx'
497
498
499
500
501 reqFilename = 'sampleFiles/xml/requirements_test1.xml'
502
503 printResults(inspect(filename, reqFilename, "docx"))
504