TestMu Conference is Back! Future of Testing + AIRegister for Free

How to use _info_filter method in autotest

Best Python code snippet using autotest_python

catimages.py

Source:catimages.py

1#!/usr/bin/python2# -*- coding: utf-8 -*-3"""4Image by content categorization derived from 'checkimages.py'.5Script to check uncategorized files. This script checks if a file6has some content that allows to assign it to a category.7This script runs on commons only. It needs also external libraries8(see imports and comments there) and additional configuration/data9files in order to run properly. Most of them can be checked-out at:10    http://svn.toolserver.org/svnroot/drtrigon/11(some code might get compiled on-the-fly, so a GNU compiler along12with library header files is needed too)13This script understands the following command-line arguments:14-cat[:#]            Use a category as recursive generator15                    (if no given 'Category:Media_needing_categories' is used)16-start[:#]          Start after File:[:#] or if no file given start from top17                    (instead of resuming last run).18-limit              The number of images to check (default: 80)19-noguesses          If given, this option will disable all guesses (which are20                    less reliable than true searches).21-single:#           Run for one (any) single page only.22-train              Train classifiers on good (homegenous) categories.23X-sendemail          Send an email after tagging.24X-untagged[:#]       Use daniel's tool as generator:25X                    http://toolserver.org/~daniel/WikiSense/UntaggedImages.php26"""27#28# (C) Kyle/Orgullomoore, 2006-2007 (newimage.py)29# (C) Pywikipedia team, 2007-2011 (checkimages.py)30# (C) DrTrigon, 201231#32# Distributed under the terms of the MIT license.33#34__version__ = '$Id$'35#36# python default packages37import re, urllib2, os, locale, sys, datetime, math, shutil, mimetypes, shelve38import StringIO, json # fallback: simplejson39from subprocess import Popen, PIPE40import Image41#import ImageFilter42scriptdir = os.path.dirname(sys.argv[0])43if not os.path.isabs(scriptdir):44    scriptdir = os.path.abspath(os.path.join(os.curdir, scriptdir))45# additional python packages (non-default but common)46try:47    import numpy as np48    from scipy import ndimage, fftpack#, signal49    import cv50    # TS: nonofficial cv2.so backport of the testing-version of51    # python-opencv because of missing build-host, done by DaB52    sys.path.append('/usr/local/lib/python2.6/')53    import cv254    sys.path.remove('/usr/local/lib/python2.6/')55    import pyexiv256    import warnings57    with warnings.catch_warnings():58        warnings.simplefilter("ignore")59        import gtk                  # ignore warning: "GtkWarning: could not open display"60    import rsvg                     # gnome-python2-rsvg (binding to librsvg)61    import cairo62    import magic                    # python-magic (binding to libmagic)63except:64    # either raise the ImportError later or skip it65    pass66# pywikipedia framework python packages67import wikipedia as pywikibot68import pagegenerators, catlib69import checkimages70import externals                    # allow import from externals71# additional python packages (more exotic and problematic ones)72# modules needing compilation are imported later on request:73# (see https://jira.toolserver.org/browse/TS-1452)74# e.g. opencv, jseg, slic, pydmtx, zbar, (pyml or equivalent)75# binaries: exiftool, pdftotext/pdfimages (poppler), ffprobe (ffmpeg),76#           convert/identify (ImageMagick), (ocropus)77# TODO:78#   (pdfminer not used anymore/at the moment...)79#   python-djvulibre or python-djvu for djvu support80externals.check_setup('colormath')           # check for and install needed81externals.check_setup('jseg')                # 'externals' modules82externals.check_setup('jseg/jpeg-6b')        #83#externals.check_setup('_mlpy')               #84externals.check_setup('_music21')            #85externals.check_setup('opencv/haarcascades') #86externals.check_setup('pydmtx')              # <<< !!! test OS package management here !!!87externals.check_setup('py_w3c')              #88externals.check_setup('_zbar')               #89import pycolorname90#import _mlpy as mlpy91from colormath.color_objects import RGBColor92from py_w3c.validators.html.validator import HTMLValidator, ValidationFault93#from pdfminer import pdfparser, pdfinterp, pdfdevice, converter, cmapdb, layout94#externals.check_setup('_ocropus')95locale.setlocale(locale.LC_ALL, '')96###############################################################################97# <--------------------------- Change only below! --------------------------->#98###############################################################################99# NOTE: in the messages used by the Bot if you put __botnick__ in the text, it100# will automatically replaced with the bot's nickname.101# Add your project (in alphabetical order) if you want that the bot start102project_inserted = [u'commons',]103# Ok, that's all. What is below, is the rest of code, now the code is fixed and it will run correctly in your project.104################################################################################105# <--------------------------- Change only above! ---------------------------> #106################################################################################107tmpl_FileContentsByBot = u"""}}108{{FileContentsByBot109| botName = ~~~110|"""111# this list is auto-generated during bot run (may be add notifcation about NEW templates)112#tmpl_available_spec = [ u'Properties', u'ColorRegions', u'Faces', u'ColorAverage' ]113tmpl_available_spec = []    # auto-generated114# global115useGuesses = True        # Use guesses which are less reliable than true searches116# all detection and recognition methods - bindings to other classes, modules and libs117class _UnknownFile(object):118    def __init__(self, file_name, file_mime, *args, **kwargs):119        self.file_name = file_name120        self.file_mime = file_mime121        self.image_size = (None, None)122        # available file properties and metadata123        self._properties = { 'Properties':   [{'Format': u'-', 'Pages': 0}],124                             'Metadata':     [], }125        # available feature to extract126        self._features   = { 'ColorAverage': [],127                             'ColorRegions': [],128                             'Faces':        [],129                             'People':       [],130                             'OpticalCodes': [],131                             'Chessboard':   [],132                             'History':      [],133                             'Text':         [],134                             'Streams':      [],135                             'Audio':        [],136                             'Legs':         [],137                             'Hands':        [],138                             'Torsos':       [],139                             'Ears':         [],140                             'Eyes':         [],141                             'Automobiles':  [],142                             'Classify':     [], }143    def __enter__(self):144        return self145    def __exit__(self, type, value, traceback):146        pass147    def getProperties(self):148        self._detect_HeaderAndMetadata()    # Metadata149        self._detect_Properties()           # Properties150        return self._properties151    def getFeatures(self):152        pywikibot.warning(u"File format '%s/%s' not supported (yet)!" % tuple(self.file_mime[:2]))153        return self._features154    def _detect_HeaderAndMetadata(self):155        # check/look into the file by midnight commander (mc)156        # use exif as first hint - in fact gives also image-size, streams, ...157        exif = self._util_get_DataTags_EXIF()158        #print exif159        result = { 'Software':         exif['Software'] if 'Software' in exif else u'-',160                   'Output_Extension': exif['Output_extension'] if 'Output_extension' in exif else u'-',161                   'Desc':             exif['Desc'] if 'Desc' in exif else u'-',162                   'DescProducer':     exif['DescProducer'] if 'DescProducer' in exif else u'-',163                   'DescCreator':      exif['DescCreator'] if 'DescCreator' in exif else u'-',164                   'Comment':          exif['Comment'] if 'Comment' in exif else u'-',165                   'Producer':         exif['Producer'] if 'Producer' in exif else u'-',}166                   #'Comments':         exif['Comments'] if 'Comments' in exif else u'-',167                   #'WorkDesc':         exif['WorkDescription'] if 'WorkDescription' in exif else u'-',168                   ##'Dimensions':       tuple(map(int, exif['ImageSize'].split(u'x'))),}169                   #'Dimensions':       tuple(exif['ImageSize'].split(u'x')) if 'ImageSize' in exif else (None, None),}170                   #'Mode':             exif['ColorType'], }171# TODO: vvv172#* metadata template in commons has to be worked out and code adopted173#* like in 'Streams' a nice content listing of MIDI (exif or music21 - if needed at all?)174#* docu all this stuff in commons175#* docu and do all open things on "commons TODO list"176#177#178#179#(* initial audio midi support (music21))180#[TODO: docu on Commons ... / template ...]181# TODO: if '_detect_History' is not needed here, moveit back into _JpegFile !!!182        #print "self._detect_History()"183        #print self._detect_History()184        # https://pypi.python.org/pypi/hachoir-metadata (needs 'core' and 'parser')185        #186        #from hachoir_core.error import HachoirError187        #from hachoir_core.stream import InputStreamError188        #from hachoir_parser import createParser189        #import hachoir_core.config as hachoir_config190        #191        #from hachoir_metadata import extractMetadata192        #193        #hachoir_config.debug = True194        #hachoir_config.verbose = True195        #hachoir_config.quiet = True196        #197        ## Create parser198        #try:199        #    parser = createParser(self.file_name.decode('utf-8'),200        #                          real_filename=self.file_name.encode('utf-8'),201        #                          tags=None)202        #    #print [val for val in enumerate(parser.createFields())]203        #    desc  = parser.description204        #    ptags = parser.getParserTags()205        #except (InputStreamError, AttributeError):206        #    desc  = u'-'207        #    ptags = {}208        #209        ## Extract metadata210        #try:211        #    # quality: 0.0 fastest, 1.0 best, and default is 0.5212        #    metadata = extractMetadata(parser, 0.5)213        #    #mtags = dict([(key, metadata.getValues(key))214        #    mtags = dict([(key, metadata.getValues(key))    # get, getItem, getItems, getText215        #                  for key in metadata._Metadata__data.keys()#])216        #                  if metadata.getValues(key)])217        #except (HachoirError, AttributeError):218        #    mtags = {}219        #220        ##result = {'parser_desc': desc, 'parserdata': ptags, 'metadata': mtags}221        ##print result222        #print {'parser_desc': desc, 'parserdata': ptags, 'metadata': mtags}223        #224        ### Display metadatas on stdout225        ##text = metadata.exportPlaintext(priority=None, human=False)226        ##if not text:227        ##    text = [u"(no metadata, priority may be too small, try priority=999)"]228        ##print u'\n'.join(text)229        self._properties['Metadata'] = [result]230        #print self._properties['Metadata']231        return232    def _detect_Properties(self):233        # get mime-type file-size, ...234        pass235    def _util_get_DataTags_EXIF(self):236        # http://tilloy.net/dev/pyexiv2/tutorial.html237        # (is UNFORTUNATELY NOT ABLE to handle all tags, e.g. 'FacesDetected', ...)238        239        if hasattr(self, '_buffer_EXIF'):240            return self._buffer_EXIF241        res = {}242        enable_recovery()   # enable recovery from hard crash243        try:244            if hasattr(pyexiv2, 'ImageMetadata'):245                metadata = pyexiv2.ImageMetadata(self.file_name)246                metadata.read()247            248                for key in metadata.exif_keys:249                    res[key] = metadata[key]250                    251                for key in metadata.iptc_keys:252                    res[key] = metadata[key]253                    254                for key in metadata.xmp_keys:255                    res[key] = metadata[key]256            else:257                image = pyexiv2.Image(self.file_name)258                image.readMetadata()259            260                for key in image.exifKeys():261                    res[key] = image[key]262                    263                for key in image.iptcKeys():264                    res[key] = image[key]265                    266                #for key in image.xmpKeys():267                #    res[key] = image[key]268        except IOError:269            pass270        except RuntimeError:271            pass272        disable_recovery()  # disable since everything worked out fine273        274        275        # http://www.sno.phy.queensu.ca/~phil/exiftool/276        # MIGHT BE BETTER TO USE AS PYTHON MODULE; either by wrapper or perlmodule:277        # http://search.cpan.org/~gaas/pyperl-1.0/perlmodule.pod278        # (or use C++ with embbedded perl to write a python module)279        data = Popen("exiftool -j %s" % self.file_name, 280                     shell=True, stdout=PIPE).stdout.read()281        if not data:282            raise ImportError("exiftool not found!")283        try:   # work-a-round for badly encoded exif data (from pywikibot/comms/http.py)284            data = unicode(data, 'utf-8', errors = 'strict')285        except UnicodeDecodeError:286            data = unicode(data, 'utf-8', errors = 'replace')287        #res  = {}288        data = re.sub("(?<!\")\(Binary data (?P<size>\d*) bytes\)", "\"(Binary data \g<size> bytes)\"", data)  # work-a-round some issue289        for item in json.loads(data):290            res.update( item )291        #print res292        self._buffer_EXIF = res293        294        return self._buffer_EXIF295    def _detect_History(self):296        res = self._util_get_DataTags_EXIF()297        #a = []298        #for k in res.keys():299        #    if 'history' in k.lower():300        #        a.append( k )301        #for item in sorted(a):302        #    print item303        # http://tilloy.net/dev/pyexiv2/api.html#pyexiv2.xmp.XmpTag304        #print [getattr(res['Xmp.xmpMM.History'], item) for item in ['key', 'type', 'name', 'title', 'description', 'raw_value', 'value', ]]305        result = []306        i = 1307        while (('Xmp.xmpMM.History[%i]' % i) in res):308            data = { 'ID':        i,309                     'Software':  u'-',310                     'Timestamp': u'-',311                     'Action':    u'-',312                     'Info':      u'-', }313            if   ('Xmp.xmpMM.History[%i]/stEvt:softwareAgent'%i) in res:314                data['Software']  = res['Xmp.xmpMM.History[%i]/stEvt:softwareAgent'%i].value315                data['Timestamp'] = res['Xmp.xmpMM.History[%i]/stEvt:when'%i].value316                data['Action']    = res['Xmp.xmpMM.History[%i]/stEvt:action'%i].value317                if ('Xmp.xmpMM.History[%i]/stEvt:changed'%i) in res:318                    data['Info']  = res['Xmp.xmpMM.History[%i]/stEvt:changed'%i].value319                #print res['Xmp.xmpMM.History[%i]/stEvt:instanceID'%i].value320                result.append( data )321            elif ('Xmp.xmpMM.History[%i]/stEvt:parameters'%i) in res:322                data['Action']    = res['Xmp.xmpMM.History[%i]/stEvt:action'%i].value323                data['Info']      = res['Xmp.xmpMM.History[%i]/stEvt:parameters'%i].value324                #data['Action']    = data['Info'].split(' ')[0]325                result.append( data )326            else:327                pass328            i += 1329        330        self._features['History'] = result331        return332class _JpegFile(_UnknownFile):333    # for '_detect_Trained'334    cascade_files = [(u'Legs', 'haarcascade_lowerbody.xml'),335                     (u'Torsos', 'haarcascade_upperbody.xml'),336                     (u'Ears', 'haarcascade_mcs_leftear.xml'),337                     (u'Ears', 'haarcascade_mcs_rightear.xml'),338                     (u'Eyes', 'haarcascade_lefteye_2splits.xml'),        # (http://yushiqi.cn/research/eyedetection)339                     (u'Eyes', 'haarcascade_righteye_2splits.xml'),       # (http://yushiqi.cn/research/eyedetection)340                     #externals/opencv/haarcascades/haarcascade_mcs_lefteye.xml341                     #externals/opencv/haarcascades/haarcascade_mcs_righteye.xml342                     # (others include indifferent (left and/or right) and pair)343                     (u'Automobiles', 'cars3.xml'),                       # http://www.youtube.com/watch?v=c4LobbqeKZc344                     (u'Hands', '1256617233-2-haarcascade-hand.xml', 300.),]    # http://www.andol.info/345                     # ('Hands' does not behave very well, in fact it detects any kind of skin and other things...)346                     #(u'Aeroplanes', 'haarcascade_aeroplane.xml'),]      # e.g. for 'Category:Unidentified aircraft'347    def __init__(self, file_name, file_mime, *args, **kwargs):348        _UnknownFile.__init__(self, file_name, file_mime)349        self.image_filename  = os.path.split(self.file_name)[-1]350        self.image_path      = self.file_name351        self.image_path_JPEG = self.image_path + '.jpg'352        self._convert()353    def __exit__(self, type, value, traceback):354        #if os.path.exists(self.image_path):355        #    os.remove( self.image_path )356        if os.path.exists(self.image_path_JPEG):357            os.remove( self.image_path_JPEG )358        #image_path_new = self.image_path_JPEG.replace(u"cache/", u"cache/0_DETECTED_")359        #if os.path.exists(image_path_new):360        #    os.remove( image_path_new )361    def getFeatures(self):362        # Faces (extract EXIF data)363        self._detect_Faces_EXIF()364        # Faces and eyes (opencv pre-trained haar)365        self._detect_Faces()366# TODO: test and use or switch off367        # Face via Landmark(s)368#        self._detect_FaceLandmark_xBOB()369        # exclude duplicates (CV and EXIF)370        faces = [item['Position'] for item in self._features['Faces']]371        for i in self._util_merge_Regions(faces)[1]:372            del self._features['Faces'][i]373        # Segments and colors374        self._detect_SegmentColors()375        # Average color376        self._detect_AverageColor()377        # People/Pedestrian (opencv pre-trained hog and haarcascade)378        self._detect_People()379        # Geometric object (opencv hough line, circle, edges, corner, ...)380        self._detect_Geometry()381        # general (opencv pre-trained, third-party and self-trained haar382        # and cascade) classification383        # http://www.computer-vision-software.com/blog/2009/11/faq-opencv-haartraining/384        for cf in self.cascade_files:385            self._detect_Trained(*cf)386        # barcode and Data Matrix recognition (libdmtx/pydmtx, zbar, gocr?)387        self._recognize_OpticalCodes()388        # Chessboard (opencv reference detector)389        self._detect_Chessboard()390        # general (self-trained) detection WITH classification391        # BoW: uses feature detection (SIFT, SURF, ...) AND classification (SVM, ...)392#        self._detectclassify_ObjectAll()393        # Wavelet: uses wavelet transformation AND classification (machine learning)394#        self._detectclassify_ObjectAll_PYWT()395        # general file EXIF history information396        self._detect_History()397        398        return self._features399    # supports a lot of different file types thanks to PIL400    def _convert(self):401        try:402            im = Image.open(self.image_path) # might be png, gif etc, for instance403            #im.thumbnail(size, Image.ANTIALIAS) # size is 640x480404            im.convert('RGB').save(self.image_path_JPEG, "JPEG")405            self.image_size = im.size406        except IOError, e:407            if 'image file is truncated' in str(e):408                # im object has changed due to exception raised409                im.convert('RGB').save(self.image_path_JPEG, "JPEG")410                self.image_size = im.size411            else:412                try:413                    # since opencv might still work, try this as fall-back414                    img = cv2.imread( self.image_path, cv.CV_LOAD_IMAGE_COLOR )415                    cv2.imwrite(self.image_path_JPEG, img)416                    self.image_size = (img.shape[1], img.shape[0])417                except:418                    if os.path.exists(self.image_path_JPEG):419                        os.remove(self.image_path_JPEG)420                    self.image_path_JPEG = self.image_path421        except:422            self.image_path_JPEG = self.image_path423        # FULL TIFF support (e.g. group4)424        # http://code.google.com/p/pylibtiff/425    # MIME: 'image/jpeg; charset=binary', ...426    def _detect_Properties(self):427        """Retrieve as much file property info possible, especially the same428           as commons does in order to compare if those libraries (ImageMagick,429           ...) are buggy (thus explicitely use other software for independence)"""430        result = {'Format': u'-', 'Pages': 0}431        try:432            i = Image.open(self.image_path)433        except IOError:434            pywikibot.warning(u'unknown file type [_JpegFile]')435            return436        # http://mail.python.org/pipermail/image-sig/1999-May/000740.html437        pc=0         # count number of pages438        while True:439            try:440                i.seek(pc)441            except EOFError:442                break443            pc+=1444        i.seek(0)    # restore default445        # http://grokbase.com/t/python/image-sig/082psaxt6k/embedded-icc-profiles446        # python-lcms (littlecms) may be freeimage library447        #icc = i.app['APP2']     # jpeg448        #icc = i.tag[34675]      # tiff449        #icc = re.sub('[^%s]'%string.printable, ' ', icc)450        ## more image formats and more post-processing needed...451        #self.image_size = i.size452        result.update({ #'bands':      i.getbands(),453                        #'bbox':       i.getbbox(),454                        'Format':     i.format,455                        'Mode':       i.mode,456                        #'info':       i.info,457                        #'stat':       os.stat(self.image_path),458                        'Palette':    str(len(i.palette.palette)) if i.palette else u'-',459                        'Pages':      pc,460                        'Dimensions': self.image_size,461                        'Filesize':   os.path.getsize(self.file_name),462                        'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), })463        #self._properties['Properties'] = [result]464        self._properties['Properties'][0].update(result)465        return466    # .../opencv/samples/c/facedetect.cpp467    # http://opencv.willowgarage.com/documentation/python/genindex.html468    def _detect_Faces(self):469        """Converts an image to grayscale and prints the locations of any470           faces found"""471        # http://python.pastebin.com/m76db1d6b472        # http://creatingwithcode.com/howto/face-detection-in-static-images-with-python/473        # http://opencv.willowgarage.com/documentation/python/objdetect_cascade_classification.html474        # http://opencv.willowgarage.com/wiki/FaceDetection475        # http://blog.jozilla.net/2008/06/27/fun-with-python-opencv-and-face-detection/476        # http://www.cognotics.com/opencv/servo_2007_series/part_4/index.html477        # https://code.ros.org/trac/opencv/browser/trunk/opencv_extra/testdata/gpu/haarcascade?rev=HEAD478        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_eye_tree_eyeglasses.xml')479        #xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_eye.xml')480        if not os.path.exists(xml):481            raise IOError(u"No such file: '%s'" % xml)482        #nestedCascade = cv.Load(483        nestedCascade = cv2.CascadeClassifier(xml)484        # http://tutorial-haartraining.googlecode.com/svn/trunk/data/haarcascades/485        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_frontalface_alt.xml')486        # MAY BE USE 'haarcascade_frontalface_alt_tree.xml' ALSO / INSTEAD...?!!487        if not os.path.exists(xml):488            raise IOError(u"No such file: '%s'" % xml)489        #cascade       = cv.Load(490        cascade       = cv2.CascadeClassifier(xml)491        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_profileface.xml')492        if not os.path.exists(xml):493            raise IOError(u"No such file: '%s'" % xml)494        cascadeprofil = cv2.CascadeClassifier(xml)495        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_mcs_mouth.xml')496        if not os.path.exists(xml):497            raise IOError(u"No such file: '%s'" % xml)498        cascademouth = cv2.CascadeClassifier(xml)499        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_mcs_nose.xml')500        if not os.path.exists(xml):501            raise IOError(u"No such file: '%s'" % xml)502        cascadenose = cv2.CascadeClassifier(xml)503        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_lefteye_2splits.xml')504        if not os.path.exists(xml):505            raise IOError(u"No such file: '%s'" % xml)506        cascadelefteye = cv2.CascadeClassifier(xml)        # (http://yushiqi.cn/research/eyedetection)507        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_righteye_2splits.xml')508        if not os.path.exists(xml):509            raise IOError(u"No such file: '%s'" % xml)510        cascaderighteye = cv2.CascadeClassifier(xml)       # (http://yushiqi.cn/research/eyedetection)511        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_mcs_leftear.xml')512        if not os.path.exists(xml):513            raise IOError(u"No such file: '%s'" % xml)514        cascadeleftear = cv2.CascadeClassifier(xml)515        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_mcs_rightear.xml')516        if not os.path.exists(xml):517            raise IOError(u"No such file: '%s'" % xml)518        cascaderightear = cv2.CascadeClassifier(xml)519        scale = 1.520        # So, to find an object of an unknown size in the image the scan521        # procedure should be done several times at different scales.522        # http://opencv.itseez.com/modules/objdetect/doc/cascade_classification.html523        try:524            #image = cv.LoadImage(self.image_path)525            #img    = cv2.imread( self.image_path, cv.CV_LOAD_IMAGE_COLOR )526            img    = cv2.imread( self.image_path_JPEG, cv.CV_LOAD_IMAGE_COLOR )527            #image  = cv.fromarray(img)528            if img == None:529                raise IOError530            531            # !!! the 'scale' here IS RELEVANT FOR THE DETECTION RATE;532            # how small and how many features are detected as faces (or eyes)533            scale  = max([1., np.average(np.array(img.shape)[0:2]/500.)])534        except IOError:535            pywikibot.warning(u'unknown file type [_detect_Faces]')536            return537        except AttributeError:538            pywikibot.warning(u'unknown file type [_detect_Faces]')539            return540        #detectAndDraw( image, cascade, nestedCascade, scale );541        # http://nullege.com/codes/search/cv.CvtColor542        #smallImg = cv.CreateImage( (cv.Round(img.shape[0]/scale), cv.Round(img.shape[1]/scale)), cv.CV_8UC1 )543        #smallImg = cv.fromarray(np.empty( (cv.Round(img.shape[0]/scale), cv.Round(img.shape[1]/scale)), dtype=np.uint8 ))544        smallImg = np.empty( (cv.Round(img.shape[1]/scale), cv.Round(img.shape[0]/scale)), dtype=np.uint8 )545        #cv.CvtColor( image, gray, cv.CV_BGR2GRAY )546        gray = cv2.cvtColor( img, cv.CV_BGR2GRAY )547        #cv.Resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR )        548        smallImg = cv2.resize( gray, smallImg.shape, interpolation=cv2.INTER_LINEAR )549        #cv.EqualizeHist( smallImg, smallImg )550        smallImg = cv2.equalizeHist( smallImg )551        t = cv.GetTickCount()552        faces = list(cascade.detectMultiScale( smallImg,553            1.1, 2, 0554            #|cv.CV_HAAR_FIND_BIGGEST_OBJECT555            #|cv.CV_HAAR_DO_ROUGH_SEARCH556            |cv.CV_HAAR_SCALE_IMAGE,557            (30, 30) ))558        #faces = cv.HaarDetectObjects(grayscale, cascade, storage, 1.2, 2,559        #                           cv.CV_HAAR_DO_CANNY_PRUNING, (50,50))560        facesprofil = list(cascadeprofil.detectMultiScale( smallImg,561            1.1, 2, 0562            #|cv.CV_HAAR_FIND_BIGGEST_OBJECT563            #|cv.CV_HAAR_DO_ROUGH_SEARCH564            |cv.CV_HAAR_SCALE_IMAGE,565            (30, 30) ))566        #faces = self._util_merge_Regions(faces + facesprofil)[0]567        faces = self._util_merge_Regions(faces + facesprofil, overlap=True)[0]568        faces = np.array(faces)569        #if faces:570        #    self._drawRect(faces) #call to a python pil571        t = cv.GetTickCount() - t572        #print( "detection time = %g ms\n" % (t/(cv.GetTickFrequency()*1000.)) )573        #colors = [ (0,0,255),574        #    (0,128,255),575        #    (0,255,255),576        #    (0,255,0),577        #    (255,128,0),578        #    (255,255,0),579        #    (255,0,0),580        #    (255,0,255) ]581        result = []582        for i, r in enumerate(faces):583            #color = colors[i%8]584            (rx, ry, rwidth, rheight) = r585            #cx = cv.Round((rx + rwidth*0.5)*scale)586            #cy = cv.Round((ry + rheight*0.5)*scale)587            #radius = cv.Round((rwidth + rheight)*0.25*scale)588            #cv2.circle( img, (cx, cy), radius, color, 3, 8, 0 )589            #if nestedCascade.empty():590            #    continue591            # Wilson, Fernandez: FACIAL FEATURE DETECTION USING HAAR CLASSIFIERS592            # http://nichol.as/papers/Wilson/Facial%20feature%20detection%20using%20Haar.pdf593            #dx, dy = cv.Round(rwidth*0.5), cv.Round(rheight*0.5)594            dx, dy = cv.Round(rwidth/8.), cv.Round(rheight/8.)595            (rx, ry, rwidth, rheight) = (max([rx-dx,0]), max([ry-dy,0]), min([rwidth+2*dx,img.shape[1]]), min([rheight+2*dy,img.shape[0]]))596            #smallImgROI = smallImg597            #print r, (rx, ry, rwidth, rheight)598            #smallImgROI = smallImg[ry:(ry+rheight),rx:(rx+rwidth)]599            smallImgROI = smallImg[ry:(ry+6*dy),rx:(rx+rwidth)] # speed up by setting instead of extracting ROI600            nestedObjects = nestedCascade.detectMultiScale( smallImgROI,601                1.1, 2, 0602                #|CV_HAAR_FIND_BIGGEST_OBJECT603                #|CV_HAAR_DO_ROUGH_SEARCH604                #|CV_HAAR_DO_CANNY_PRUNING605                |cv.CV_HAAR_SCALE_IMAGE,606                (30, 30) )607            nestedObjects = self._util_merge_Regions(list(nestedObjects), overlap=True)[0]608            if len(nestedObjects) < 2:609                nestedLeftEye = cascadelefteye.detectMultiScale( smallImgROI,610                    1.1, 2, 0611                    #|CV_HAAR_FIND_BIGGEST_OBJECT612                    #|CV_HAAR_DO_ROUGH_SEARCH613                    #|CV_HAAR_DO_CANNY_PRUNING614                    |cv.CV_HAAR_SCALE_IMAGE,615                    (30, 30) )616                nestedRightEye = cascaderighteye.detectMultiScale( smallImgROI,617                    1.1, 2, 0618                    #|CV_HAAR_FIND_BIGGEST_OBJECT619                    #|CV_HAAR_DO_ROUGH_SEARCH620                    #|CV_HAAR_DO_CANNY_PRUNING621                    |cv.CV_HAAR_SCALE_IMAGE,622                    (30, 30) )623                nestedObjects = self._util_merge_Regions(list(nestedObjects) +624                                                  list(nestedLeftEye) + 625                                                  list(nestedRightEye), overlap=True)[0]626            #if len(nestedObjects) > 2:627            #    nestedObjects = self._util_merge_Regions(list(nestedObjects), close=True)[0]628            smallImgROI = smallImg[(ry+4*dy):(ry+rheight),rx:(rx+rwidth)]629            nestedMouth = cascademouth.detectMultiScale( smallImgROI,630                1.1, 2, 0631                |cv.CV_HAAR_FIND_BIGGEST_OBJECT632                |cv.CV_HAAR_DO_ROUGH_SEARCH633                #|CV_HAAR_DO_CANNY_PRUNING634                |cv.CV_HAAR_SCALE_IMAGE,635                (30, 30) )636            smallImgROI = smallImg[(ry+(5*dy)/2):(ry+5*dy+(5*dy)/2),(rx+(5*dx)/2):(rx+5*dx+(5*dx)/2)]637            nestedNose = cascadenose.detectMultiScale( smallImgROI,638                1.1, 2, 0639                |cv.CV_HAAR_FIND_BIGGEST_OBJECT640                |cv.CV_HAAR_DO_ROUGH_SEARCH641                #|CV_HAAR_DO_CANNY_PRUNING642                |cv.CV_HAAR_SCALE_IMAGE,643                (30, 30) )644            smallImgROI = smallImg[(ry+2*dy):(ry+6*dy),rx:(rx+rwidth)]645            nestedEars = list(cascadeleftear.detectMultiScale( smallImgROI,646                1.1, 2, 0647                |cv.CV_HAAR_FIND_BIGGEST_OBJECT648                |cv.CV_HAAR_DO_ROUGH_SEARCH649                #|CV_HAAR_DO_CANNY_PRUNING650                |cv.CV_HAAR_SCALE_IMAGE,651                (30, 30) ))652            nestedEars += list(cascaderightear.detectMultiScale( smallImgROI,653                1.1, 2, 0654                |cv.CV_HAAR_FIND_BIGGEST_OBJECT655                |cv.CV_HAAR_DO_ROUGH_SEARCH656                #|CV_HAAR_DO_CANNY_PRUNING657                |cv.CV_HAAR_SCALE_IMAGE,658                (30, 30) ))659            data = { 'ID':       (i+1),660                     'Position': tuple(np.int_(r*scale)), 661                     'Type':     u'-',662                     'Eyes':     [],663                     'Mouth':    (),664                     'Nose':     (),665                     'Ears':     [],666                     'Pose':     (), }667            data['Coverage'] = float(data['Position'][2]*data['Position'][3])/(self.image_size[0]*self.image_size[1])668            #if (c >= confidence):669            #    eyes = nestedObjects670            #    if not (type(eyes) == type(tuple())):671            #        eyes = tuple((eyes*scale).tolist())672            #    result.append( {'Position': r*scale, 'eyes': eyes, 'confidence': c} )673            #print {'Position': r, 'eyes': nestedObjects, 'confidence': c}674            for nr in nestedObjects:675                (nrx, nry, nrwidth, nrheight) = nr676                cx = cv.Round((rx + nrx + nrwidth*0.5)*scale)677                cy = cv.Round((ry + nry + nrheight*0.5)*scale)678                radius = cv.Round((nrwidth + nrheight)*0.25*scale)679                #cv2.circle( img, (cx, cy), radius, color, 3, 8, 0 )680                data['Eyes'].append( (cx-radius, cy-radius, 2*radius, 2*radius) )681            if len(nestedMouth):682                (nrx, nry, nrwidth, nrheight) = nestedMouth[0]683                cx = cv.Round((rx + nrx + nrwidth*0.5)*scale)684                cy = cv.Round(((ry+4*dy) + nry + nrheight*0.5)*scale)685                radius = cv.Round((nrwidth + nrheight)*0.25*scale)686                #cv2.circle( img, (cx, cy), radius, color, 3, 8, 0 )687                data['Mouth'] = (cx-radius, cy-radius, 2*radius, 2*radius)688            if len(nestedNose):689                (nrx, nry, nrwidth, nrheight) = nestedNose[0]690                cx = cv.Round(((rx+(5*dx)/2) + nrx + nrwidth*0.5)*scale)691                cy = cv.Round(((ry+(5*dy)/2) + nry + nrheight*0.5)*scale)692                radius = cv.Round((nrwidth + nrheight)*0.25*scale)693                #cv2.circle( img, (cx, cy), radius, color, 3, 8, 0 )694                data['Nose'] = (cx-radius, cy-radius, 2*radius, 2*radius)695            for nr in nestedEars:696                (nrx, nry, nrwidth, nrheight) = nr697                cx = cv.Round((rx + nrx + nrwidth*0.5)*scale)698                cy = cv.Round((ry + nry + nrheight*0.5)*scale)699                radius = cv.Round((nrwidth + nrheight)*0.25*scale)700                #cv2.circle( img, (cx, cy), radius, color, 3, 8, 0 )701                data['Ears'].append( (cx-radius, cy-radius, 2*radius, 2*radius) )702            if data['Mouth'] and data['Nose'] and data['Eyes'] and (len(data['Eyes']) == 2):703                # head model "little girl" for use in "MeshLab":704                # http://www.turbosquid.com/FullPreview/Index.cfm/ID/302581705                # http://meshlab.sourceforge.net/706                D3points = [[ 70.0602, 109.898,  20.8234],  # left eye707                            [ 2.37427, 110.322,  21.7776],  # right eye708                            [ 36.8301, 78.3185,  52.0345],  # nose709                            [ 36.6391, 51.1675,  38.5903],] # mouth710                            #[ 119.268, 91.3111, -69.6397],  # left ear711                            #[-49.1328, 91.3111, -67.2481],] # right ear712                D2points = [np.array(data['Eyes'][0]), np.array(data['Eyes'][1]),713                            np.array(data['Nose']), np.array(data['Mouth']),]714                D2points = [ item[:2] + item[2:]/2. for item in D2points ]715                neutral  = np.array([[np.pi],[0.],[0.]])716                # calculate pose717                rvec, tvec, cm, err = self._util_get_Pose_solvePnP(D3points, D2points, self.image_size)718                #data['Pose'] = tuple(rvec[:,0])719                check = not (err[:,0,:].max() > 0.5)720                if not check:721                    rvec = neutral                      # reset to neutral pose722                    tvec = np.array([[0.],[0.],[100.]]) # reset to neutral position (same order as max of D3points)723                    pywikibot.warning(u'Could not calculate pose of face, too big errors. '724                                      u'(looks like neutral pose/position is somehow singular)')725                ## debug: draw pose726                ##rvec *= 0727                #mat, perp = self._util_getD2coords_calc(np.eye(3), cm, rvec, tvec, hacky=False)728                ## from '_util_drawAxes(...)'729                #for i, item in enumerate(mat.transpose()):730                #    p = tuple((50+10*item).astype(int))[:2]731                #    cv2.line(img, (50, 50), p, (0., 0., 255.), 1)732                #    cv2.putText(img, str(i), p, cv2.FONT_HERSHEY_PLAIN, 1., (0., 0., 255.))733                #cv2.imshow("win", img)734                #cv2.waitKey()735                # calculate delta to neutral pose736                drv  = -cv2.composeRT(-rvec, np.zeros((3,1)),737                                      neutral, np.zeros((3,1)))[0]738                rvec = cv2.Rodrigues(cv2.Rodrigues(rvec)[0])[0] # NOT unique!!!739                #nrv  = cv2.composeRT(neutral, np.zeros((3,1)),740                #                     drv, np.zeros((3,1)))[0]741                #print (rvec - nrv < 1E-12)  # compare742                data['Pose'] = map(float, tuple(drv[:,0]))743# TODO: POSIT has to be tested and compared; draw both results!744                # POSIT: http://www.cfar.umd.edu/~daniel/daniel_papersfordownload/Pose25Lines.pdf745                if False:746                    pywikibot.output("solvePnP:")747                    pywikibot.output(str(rvec[:,0]))748                    pywikibot.output(str(tvec[:,0]))749                    pywikibot.output(str(err[:,0,:]))750                    rvec, tvec, cm, err = self._util_get_Pose_POSIT(D3points, D2points)751                    pywikibot.output("POSIT:")752                    pywikibot.output(str(rvec[:,0]))753                    pywikibot.output(str(tvec))754                    pywikibot.output(str(np.array(err)[:,0,:]/max(self.image_size)))755            result.append( data )756        ## see '_drawRect'757        #if result:758        #    #image_path_new = os.path.join(scriptdir, 'cache/0_DETECTED_' + self.image_filename)759        #    image_path_new = self.image_path_JPEG.replace(u"cache/", u"cache/0_DETECTED_")760        #    cv2.imwrite( image_path_new, img )761        #return faces.tolist()762        self._features['Faces'] += result763        return764    def _util_get_Pose_solvePnP(self, D3points, D2points, shape):765        """ Calculate pose from head model "little girl" w/o camera or other766            calibrations needed.767            D2points: left eye, right eye, nose, mouth768        """769        # howto (credits to "Roy"):770        # http://www.youtube.com/watch?v=ZDNH4BT5Do4771        # http://www.morethantechnical.com/2010/03/19/quick-and-easy-head-pose-estimation-with-opencv-w-code/772        # http://www.morethantechnical.com/2012/10/17/head-pose-estimation-with-opencv-opengl-revisited-w-code/773        # e.g. with head model "little girl" for use in "MeshLab":774        # http://www.turbosquid.com/FullPreview/Index.cfm/ID/302581775        # http://meshlab.sourceforge.net/776        # set-up camera matrix (no calibration needed!)777        max_d = max(shape)778        cameraMatrix = [[max_d,     0, shape[0]/2.0],779                        [    0, max_d, shape[1]/2.0],780                        [    0,     0,          1.0],]781        # calculate pose782        rvec, tvec = cv2.solvePnP(np.array(D3points).astype('float32'), np.array(D2points).astype('float32'), np.array(cameraMatrix).astype('float32'), None)783        # compare to 2D points784        err = []785        for i, vec in enumerate(np.array(D3points)):786            nvec = np.dot(cameraMatrix, (np.dot(cv2.Rodrigues(rvec)[0], vec) + tvec[:,0]))787            err.append(((D2points[i] - nvec[:2]/nvec[2]), D2points[i], nvec[:2]/nvec[2]))788        pywikibot.output(u'result for UN-calibrated camera:\n  rot=%s' % rvec.transpose()[0])789        return rvec, tvec, np.array(cameraMatrix), (np.array(err)/max_d)790    #def _util_get_Pose_POSIT(self, D3points, D2points, shape):791    def _util_get_Pose_POSIT(self, D3points, D2points):792        """ Calculate pose from head model "little girl" w/o camera or other793            calibrations needed.794            Method similar to '_util_get_Pose_solvePnP', please compare.795            D2points: left eye, right eye, nose, mouth796        """797        # calculate pose798        import opencv799        #opencv.unit_test()800        (rmat, tvec, mdl) = opencv.posit(D3points, D2points, (100, 1.0e-4))801        rvec = cv2.Rodrigues(rmat)[0]802        # Project the model points with the estimated pose803        # http://opencv.willowgarage.com/documentation/cpp/camera_calibration_and_3d_reconstruction.html804        # intrinsic: camera matrix805        # extrinsic: rotation-translation matrix [R|t]806        # CV_32F, principal point in the centre of the image is (0, 0) instead of (self.image_size[0]*0.5)807        FOCAL_LENGTH = 760.0    # hard-coded in posit_python.cpp, should be changed...808        cameraMatrix = [[FOCAL_LENGTH,          0.0, 0.0],#shape[0]*0.0],809                        [         0.0, FOCAL_LENGTH, 0.0],#shape[1]*0.0],810                        [         0.0,          0.0, 1.0],]811        # compare to 2D points812        err = []813        for i, vec in enumerate(np.array(mdl)):814            nvec = np.dot(cameraMatrix, (np.dot(rmat, vec) + tvec))815            err.append(((D2points[i] - nvec[:2]/nvec[2]), D2points[i], nvec[:2]/nvec[2]))816        #pywikibot.output(u'result for UN-calibrated camera:\n  rot=%s' % rvec.transpose()[0])817        return rvec, tvec, np.array(cameraMatrix), (np.array(err)/1.0)818    # https://pypi.python.org/pypi/xbob.flandmark819    # http://cmp.felk.cvut.cz/~uricamic/flandmark/820    def _detect_FaceLandmark_xBOB(self):821        """Prints the locations of any face landmark(s) found, respective822           converts them to usual face position data"""823        scale = 1.824        try:825            #video = bob.io.VideoReader(self.image_path_JPEG.encode('utf-8'))826            video = [cv2.imread( self.image_path_JPEG, cv.CV_LOAD_IMAGE_COLOR )]827            #if img == None:828            #    raise IOError829            830            # !!! the 'scale' here IS RELEVANT FOR THE DETECTION RATE;831            # how small and how many features are detected as faces (or eyes)832            scale  = max([1., np.average(np.array(video[0].shape)[0:2]/750.)])833        except IOError:834            pywikibot.warning(u'unknown file type [_detect_FaceLandmark_xBOB]')835            return836        except AttributeError:837            pywikibot.warning(u'unknown file type [_detect_FaceLandmark_xBOB]')838            return839        smallImg = np.empty( (cv.Round(video[0].shape[1]/scale), cv.Round(video[0].shape[0]/scale)), dtype=np.uint8 )840        video = [ cv2.resize( img, smallImg.shape, interpolation=cv2.INTER_LINEAR ) for img in video ]841        sys.path.append(os.path.join(scriptdir, 'dtbext'))842        import _bob as bob843        import xbob_flandmark as xbob844        localize = xbob.flandmark.Localizer()845        result = []846        for frame in video:     # currently ALWAYS contains ONE (1!) entry847            frame = np.transpose(frame, (2,0,1))848            img   = np.transpose(frame, (1,2,0))849            for i, flm in enumerate(localize(frame)):850                #for pi, point in enumerate(flm['landmark']):851                #    cv2.circle(img, tuple(map(int, point)), 3, (  0,   0, 255))852                #    cv2.circle(img, tuple(map(int, point)), 5, (  0, 255,   0))853                #    cv2.circle(img, tuple(map(int, point)), 7, (255,   0,   0))854                #    cv2.putText(img, str(pi), tuple(map(int, point)), cv2.FONT_HERSHEY_PLAIN, 1.0, (0,255,0))855                #cv2.rectangle(img, tuple(map(int, flm['bbox'][:2])), tuple(map(int, (flm['bbox'][0]+flm['bbox'][2], flm['bbox'][1]+flm['bbox'][3]))), (0, 255, 0))856                mat = np.array([flm['landmark'][3], flm['landmark'][4]])857                mi  = np.min(mat, axis=0)858                mouth = tuple(mi.astype(int)) + tuple((np.max(mat, axis=0)-mi).astype(int))859                #cv2.rectangle(img, tuple(mi.astype(int)), tuple(np.max(mat, axis=0).astype(int)), (0, 255, 0))860                mat = np.array([flm['landmark'][5], flm['landmark'][1]])861                mi  = np.min(mat, axis=0)862                leye  = tuple(mi.astype(int)) + tuple((np.max(mat, axis=0)-mi).astype(int))863                #cv2.rectangle(img, tuple(mi.astype(int)), tuple(np.max(mat, axis=0).astype(int)), (0, 255, 0))864                mat = np.array([flm['landmark'][2], flm['landmark'][6]])865                mi  = np.min(mat, axis=0)866                reye  = tuple(mi.astype(int)) + tuple((np.max(mat, axis=0)-mi).astype(int))867                #cv2.rectangle(img, tuple(mi.astype(int)), tuple(np.max(mat, axis=0).astype(int)), (0, 255, 0))868                data = { 'ID':       (i+1),869                         'Position': flm['bbox'], 870                         'Type':     u'Landmark',871                         'Eyes':     [leye, reye],872                         'Mouth':    mouth,873                         'Nose':     tuple(np.array(flm['landmark'][7]).astype(int)) + (0, 0),874                         'Ears':     [],875                         'Landmark': [tuple(lm) for lm in np.array(flm['landmark']).astype(int)], }876                data['Coverage'] = float(data['Position'][2]*data['Position'][3])/(self.image_size[0]*self.image_size[1])877                result.append(data)878            #img = img.astype('uint8')879            #cv2.imshow("people detector", img)880            #cv2.waitKey()881        self._features['Faces'] += result882        return883    # .../opencv/samples/cpp/peopledetect.cpp884    # + Haar/Cascade detection885    def _detect_People(self):886        # http://stackoverflow.com/questions/10231380/graphic-recognition-of-people887        # https://code.ros.org/trac/opencv/ticket/1298888        # http://opencv.itseez.com/modules/gpu/doc/object_detection.html889        # http://opencv.willowgarage.com/documentation/cpp/basic_structures.html890        # http://www.pygtk.org/docs/pygtk/class-gdkrectangle.html891        scale = 1.892        try:893            img = cv2.imread(self.image_path_JPEG, cv.CV_LOAD_IMAGE_COLOR)894            if (img == None) or (min(img.shape[:2]) < 100) or (not img.data) \895               or (self.image_size[0] is None):896                return897            # !!! the 'scale' here IS RELEVANT FOR THE DETECTION RATE;898            # how small and how many features are detected899            #scale  = max([1., np.average(np.array(img.shape)[0:2]/500.)])900            scale  = max([1., np.average(np.array(img.shape)[0:2]/400.)])901            #scale  = max([1., np.average(np.array(img.shape)[0:2]/300.)])902        except IOError:903            pywikibot.warning(u'unknown file type [_detect_People]')904            return905        except AttributeError:906            pywikibot.warning(u'unknown file type [_detect_People]')907            return908        # similar to face detection909        smallImg = np.empty( (cv.Round(img.shape[1]/scale), cv.Round(img.shape[0]/scale)), dtype=np.uint8 )910        #gray = cv2.cvtColor( img, cv.CV_BGR2GRAY )911        gray = img912        smallImg = cv2.resize( gray, smallImg.shape, interpolation=cv2.INTER_LINEAR )913        #smallImg = cv2.equalizeHist( smallImg )914        img = smallImg915        916        hog = cv2.HOGDescriptor()917        hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())918        #cv2.namedWindow("people detector", 1)919        920        found = found_filtered = []921        #t = time.time()922        # run the detector with default parameters. to get a higher hit-rate923        # (and more false alarms, respectively), decrease the hitThreshold and924        # groupThreshold (set groupThreshold to 0 to turn off the grouping completely).925        # detectMultiScale(img, hit_threshold=0, win_stride=Size(),926        #                  padding=Size(), scale0=1.05, group_threshold=2)927        enable_recovery()   # enable recovery from hard crash928        found = list(hog.detectMultiScale(img, 0.25, (8,8), (32,32), 1.05, 2))929        disable_recovery()  # disable since everything worked out fine930        # people haar/cascaded classifier931        # use 'haarcascade_fullbody.xml', ... also (like face detection)932        xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_fullbody.xml')933        #xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_lowerbody.xml')934        #xml = os.path.join(scriptdir, 'externals/opencv/haarcascades/haarcascade_upperbody.xml')935        if not os.path.exists(xml):936            raise IOError(u"No such file: '%s'" % xml)937        cascade       = cv2.CascadeClassifier(xml)938        objects = list(cascade.detectMultiScale( smallImg,939            1.1, 3, 0940            #|cv.CV_HAAR_FIND_BIGGEST_OBJECT941            #|cv.CV_HAAR_DO_ROUGH_SEARCH942            |cv.CV_HAAR_SCALE_IMAGE,943            (30, 30) ))944        found += objects945        #t = time.time() - t946        #print("tdetection time = %gms\n", t*1000.)947        bbox = gtk.gdk.Rectangle(*(0,0,img.shape[1],img.shape[0]))948        # exclude duplicates (see also in 'classifyFeatures()')949        found_filtered = [gtk.gdk.Rectangle(*f) for f in self._util_merge_Regions(found, sub=True)[0]]950        result = []951        for i in range(len(found_filtered)):952            r = found_filtered[i]953            # the HOG detector returns slightly larger rectangles than the real objects.954            # so we slightly shrink the rectangles to get a nicer output.955            r.x += cv.Round(r.width*0.1)956            r.width = cv.Round(r.width*0.8)957            r.y += cv.Round(r.height*0.07)958            r.height = cv.Round(r.height*0.8)959            data = { 'ID':       (i+1), }960                     #'Center':   (int(r.x + r.width*0.5), int(r.y + r.height*0.5)), }961            # crop to image size (because of the slightly bigger boxes)962            r = bbox.intersect(r)963            #cv2.rectangle(img, (r.x, r.y), (r.x+r.width, r.y+r.height), cv.Scalar(0,255,0), 3)964            data['Position'] = tuple(np.int_(np.array(r)*scale))965            data['Coverage'] = float(data['Position'][2]*data['Position'][3])/(self.image_size[0]*self.image_size[1])966            result.append( data )967        #cv2.imshow("people detector", img)968        #c = cv2.waitKey(0) & 255969        self._features['People'] = result970        return971    def _detect_Geometry(self):972        result = self._util_get_Geometry_CVnSCIPY()973        self._features['Geometry'] = [{'Lines': result['Lines'],974                                       'Circles': result['Circles'],975                                       'Corners': result['Corners'],}]976        return977    # https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/python/houghlines.py?rev=2770978    def _util_get_Geometry_CVnSCIPY(self):979        # http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#cornerharris980        # http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#houghcircles981        # http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#houghlines982        # http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#houghlinesp983        if hasattr(self, '_buffer_Geometry'):984            return self._buffer_Geometry985        self._buffer_Geometry = {'Lines': '-', 'Circles': '-', 'Edge_Ratio': '-', 'Corners': '-',986                                 'FFT_Peaks': '-'}987        scale = 1.988        try:989            img = cv2.imread(self.image_path_JPEG, cv.CV_LOAD_IMAGE_COLOR)990            if (img == None):991                raise IOError992            # !!! the 'scale' here IS RELEVANT FOR THE DETECTION RATE;993            # how small and how many features are detected994            scale  = max([1., np.average(np.array(img.shape)[0:2]/500.)])995        except IOError:996            pywikibot.warning(u'unknown file type [_detect_Geometry]')997            return self._buffer_Geometry998        except AttributeError:999            pywikibot.warning(u'unknown file type [_detect_Geometry]')1000            return self._buffer_Geometry1001        # similar to face or people detection1002        smallImg = np.empty( (cv.Round(img.shape[1]/scale), cv.Round(img.shape[0]/scale)), dtype=np.uint8 )1003        _gray = cv2.cvtColor( img, cv.CV_BGR2GRAY )1004        # smooth it, otherwise a lot of false circles may be detected1005        #gray = cv2.GaussianBlur( _gray, (9, 9), 2 )1006        gray = cv2.GaussianBlur( _gray, (5, 5), 2 )1007        smallImg = cv2.resize( gray, smallImg.shape, interpolation=cv2.INTER_LINEAR )1008        #smallImg = cv2.equalizeHist( smallImg )1009        src = smallImg1010        # https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/python/houghlines.py?rev=27701011        #dst = cv2.Canny(src, 50, 200)1012        dst = cv2.Canny(src, 10, 10)1013        edges = cv2.Canny(src, 10, 10)1014        #color_dst = cv2.cvtColor(dst, cv.CV_GRAY2BGR)1015        # edges (in this sensitve form a meassure for color gradients)1016        data = {}1017        data['Edge_Ratio'] = float((edges != 0).sum())/(edges.shape[0]*edges.shape[1])1018        # lines1019        USE_STANDARD = True1020        if USE_STANDARD:1021            #lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_STANDARD, 1, pi / 180, 100, 0, 0)1022            #lines = cv2.HoughLines(dst, 1, math.pi / 180, 100)1023            lines = cv2.HoughLines(dst, 1, math.pi / 180, 200)1024            if (lines is not None) and len(lines):1025                lines = lines[0]1026                data['Lines'] = len(lines)1027            #for (rho, theta) in lines[:100]:1028            #    a = math.cos(theta)1029            #    b = math.sin(theta)1030            #    x0 = a * rho 1031            #    y0 = b * rho1032            #    pt1 = (cv.Round(x0 + 1000*(-b)), cv.Round(y0 + 1000*(a)))1033            #    pt2 = (cv.Round(x0 - 1000*(-b)), cv.Round(y0 - 1000*(a)))1034            #    cv2.line(color_dst, pt1, pt2, cv.RGB(255, 0, 0), 3, 8)1035        else:1036            #lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_PROBABILISTIC, 1, pi / 180, 50, 50, 10)1037            lines = cv2.HoughLinesP(dst, 1, math.pi / 180, 100) 1038            #for line in lines:1039            #    cv2.line(color_dst, line[0], line[1], cv.CV_RGB(255, 0, 0), 3, 8)1040        # circles1041        try:1042            #circles = cv2.HoughCircles(src, cv.CV_HOUGH_GRADIENT, 2, src.shape[0]/4)#, 200, 100 )1043            circles = cv2.HoughCircles(src, cv.CV_HOUGH_GRADIENT, 2, src.shape[0]/4, param2=200)1044        except cv2.error:1045            circles = None1046        if (circles is not None) and len(circles):1047            circles = circles[0]1048            data['Circles'] = len(circles)1049        #for c in circles:1050        #    center = (cv.Round(c[0]), cv.Round(c[1]))1051        #    radius = cv.Round(c[2])1052        #    # draw the circle center1053        #    cv2.circle( color_dst, center, 3, cv.CV_RGB(0,255,0), -1, 8, 0 )1054        #    # draw the circle outline1055        #    cv2.circle( color_dst, center, radius, cv.CV_RGB(0,0,255), 3, 8, 0 )1056        # corners1057        corner_dst = cv2.cornerHarris( edges, 2, 3, 0.04 )1058        # Normalizing1059        cv2.normalize( corner_dst, corner_dst, 0, 255, cv2.NORM_MINMAX, cv.CV_32FC1 )1060        #dst_norm_scaled = cv2.convertScaleAbs( corner_dst )1061        # Drawing a circle around corners1062        corner = []1063        for j in range(corner_dst.shape[0]):1064            for i in range(corner_dst.shape[1]):1065                if corner_dst[j,i] > 200:1066                    #circle( dst_norm_scaled, Point( i, j ), 5,  Scalar(0), 2, 8, 0 );1067                    corner.append( (j,i) )1068        data['Corners'] = len(corner)1069        #cv2.imshow("people detector", color_dst)1070        #c = cv2.waitKey(0) & 2551071        # fft spectral/frequency/momentum analysis with svd peak detection1072        gray = cv2.resize( _gray, smallImg.shape, interpolation=cv2.INTER_LINEAR )1073        ##s = (self.image_size[1], self.image_size[0])1074        #s = gray.shape1075        fft = fftpack.fftn(gray)1076        #fft = np.fft.fftn(gray)1077        #Image.fromarray(fft.real).show()1078        # shift quadrants so that low spatial frequencies are in the center1079        fft = fftpack.fftshift(fft)1080        #Image.fromarray(fft.real).show()1081        ##Image.fromarray(fftpack.ifftn(fft).real).show()1082        ##Image.fromarray(fftpack.ifftn(fftpack.ifftshift(fft)).real).show()1083        ##Image.fromarray(fftpack.ifftn(fftpack.ifftshift(fft.real)).real).show()1084        # (scipy svd has more options...)1085        #U, S, Vh = linalg.svd(np.matrix(fft))    # scipy; unstable, crashes with C core dump1086        #U, S, Vh = np.linalg.svd(np.matrix(fft)) # numpy (full matrix); unstable, ----"-----1087        #U, S, Vh = np.linalg.svd(np.matrix(fft), full_matrices=False) # less memory; more stable1088        S = np.linalg.svd(np.matrix(fft), compute_uv=False)           # less memory, faster; more stable1089        ma    = 0.01*max(S)1090        count = sum([int(c > ma) for c in S])1091        #SS = np.zeros(s)1092        #ss = min(s)1093        #for i in range(0, len(S)-1, max( int(len(S)/100.), 1 )):   # (len(S)==ss) -> else; problem!1094        #    #SS = np.zeros(s)1095        #    #SS[:(ss-i),:(ss-i)] = np.diag(S[:(ss-i)])1096        #    SS[:(i+1),:(i+1)] = np.diag(S[:(i+1)])1097        #    #Image.fromarray((np.dot(np.dot(U, SS), Vh) - fft).real).show()1098        #    #Image.fromarray(fftpack.ifftn(fftpack.ifftshift(np.dot(np.dot(U, SS), Vh))).real - gray).show()1099        #    print i, ((np.dot(np.dot(U, SS), Vh) - fft).real).max()1100        #    print i, (fftpack.ifftn(fftpack.ifftshift(np.dot(np.dot(U, SS), Vh))).real - gray).max()1101        #    #if ((np.dot(np.dot(U, SS), Vh) - fft).max() < (255/4.)):1102        #    #    break1103        #data['SVD_Comp'] = float(i)/ss1104        #data['SVD_Min']  = S[:(i+1)].min()1105        data['FFT_Peaks'] = float(count)/len(S)1106        #pywikibot.output( u'FFT_Peaks: %s' % data['FFT_Peaks'] )1107        # use wavelet transformation (FWT) from e.g. pywt, scipy signal or mlpy1108        # (may be other) in addition to FFT and compare the spectra with FFT...1109        # confer; "A Practical Guide to Wavelet Analysis" (http://journals.ametsoc.org/doi/pdf/10.1175/1520-0477%281998%29079%3C0061%3AAPGTWA%3E2.0.CO%3B2)1110        # on how to convert and adopt FFT and wavlet spectra frequency scales1111        if data:1112            self._buffer_Geometry.update(data)1113        return self._buffer_Geometry1114    # .../opencv/samples/cpp/bagofwords_classification.cpp1115    def _detectclassify_ObjectAll(self):1116        """Uses the 'The Bag of Words model' for detection and classification"""1117        # CAN ALSO BE USED FOR: TEXT, ...1118        # http://app-solut.com/blog/2011/07/the-bag-of-words-model-in-opencv-2-2/1119        # http://app-solut.com/blog/2011/07/using-the-normal-bayes-classifier-for-image-categorization-in-opencv/1120        # http://authors.library.caltech.edu/7694/1121        # http://www.vision.caltech.edu/Image_Datasets/Caltech256/1122        # http://opencv.itseez.com/modules/features2d/doc/object_categorization.html1123        1124        # http://www.morethantechnical.com/2011/08/25/a-simple-object-classifier-with-bag-of-words-using-opencv-2-3-w-code/1125        #   source: https://github.com/royshil/FoodcamClassifier1126        # http://app-solut.com/blog/2011/07/using-the-normal-bayes-classifier-for-image-categorization-in-opencv/1127        #   source: http://code.google.com/p/open-cv-bow-demo/downloads/detail?name=bowdemo.tar.gz&can=2&q=1128        # parts of code here should/have to be placed into e.g. a own1129        # class in 'dtbext/opencv/__init__.py' script/module1130        1131        trained = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',1132                   'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',1133                   'horse', 'motorbike', 'person', 'pottedplant', 'sheep',1134                   'sofa', 'train', 'tvmonitor',]1135        bowDescPath = os.path.join(scriptdir, 'dtbext/opencv/data/bowImageDescriptors/000000.xml.gz')1136        # https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/cpp/bagofwords_classification.cpp?rev=37141137        # stand-alone (in shell) for training e.g. with:1138        #   BoWclassify /data/toolserver/pywikipedia/dtbext/opencv/VOC2007 /data/toolserver/pywikipedia/dtbext/opencv/data FAST SURF BruteForce | tee run.log1139        #   BoWclassify /data/toolserver/pywikipedia/dtbext/opencv/VOC2007 /data/toolserver/pywikipedia/dtbext/opencv/data HARRIS SIFT BruteForce | tee run.log1140        # http://experienceopencv.blogspot.com/2011/02/object-recognition-bag-of-keypoints.html1141        import opencv1142        #opencv.unit_test()1143        if os.path.exists(bowDescPath):1144            os.remove(bowDescPath)1145        stdout = sys.stdout1146        sys.stdout = StringIO.StringIO()1147        #result = opencv.BoWclassify.main(0, '', '', '', '', '')1148        result = opencv.BoWclassify(6, 1149                                    os.path.join(scriptdir, 'dtbext/opencv/VOC2007'), 1150                                    os.path.join(scriptdir, 'dtbext/opencv/data'), 1151                                    'HARRIS',      # not important; given by training1152                                    'SIFT',        # not important; given by training1153                                    'BruteForce',  # not important; given by training1154                                    [str(os.path.abspath(self.image_path).encode('latin-1'))])1155        #out = sys.stdout.getvalue()1156        sys.stdout = stdout1157        #print out1158        if not result:1159            raise ImportError("BoW did not resolve; no results found!")1160        os.remove(bowDescPath)1161        # now make the algo working; confer also1162        # http://www.xrce.xerox.com/layout/set/print/content/download/18763/134049/file/2004_010.pdf1163        # http://people.csail.mit.edu/torralba/shortCourseRLOC/index.html1164        self._features['Classify'] = [dict([ (trained[i], r) for i, r in enumerate(result) ])]1165        return1166    def _detectclassify_ObjectAll_PYWT(self):1167        """Uses the 'Fast Wavelet-Based Visual Classification' for detection1168           and classification"""1169        # Fast Wavelet-Based Visual Classification1170        # http://www.cmap.polytechnique.fr/~yu/publications/ICPR08Final.pdf1171        # CAN ALSO BE USED FOR: TEXT, AUDIO, (VIDEO), ...1172# TODO: for audio and video (time-based) also...!!!1173        import pywt         # python-pywt1174# TODO: improve (honestly; truly apply) wavelet in a meaningful and USEFUL (correct) way/manner!1175# TODO: truly apply FFT and SVD (used before)1176        # wavelet transformation1177        # https://github.com/nigma/pywt/tree/master/demo1178        # image_blender, dwt_signal_decomposition.py, wp_scalogram.py, dwt_multidim.py, user_filter_banks.py:1179        #coeffs = pywt.dwtn(gray, 'db1')       # Single-level n-dimensional Discrete Wavelet Transform1180        coeffs = pywt.dwt2(gray, 'db1')       # 2D Discrete Wavelet Transform1181        #coeffs = pywt.wavedec2(gray, 'db1')   # Multilevel 2D Discrete Wavelet Transform1182        pass1183        result = pywt.idwt2(coeffs, 'db1')    # 2D Inverse Discrete Wavelet Transform1184        #result = pywt.waverec2(coeffs, 'db1') # Multilevel 2D Inverse Discrete Wavelet Transform1185        result = result[:gray.shape[0],:gray.shape[1]]1186        # consider 'swt' (2D Stationary Wavelet Transform) instead of 'dwt' too1187        pywikibot.output(u'%s' % coeffs)1188        pywikibot.output(u'%s' % np.abs(result - gray).max())1189        #data['Wavelet_Comp'] = coeffs1190        # https://github.com/nigma/pywt/blob/master/demo/image_blender.py1191        # http://www.ncbi.nlm.nih.gov/pubmed/187136751192        # https://github.com/nigma/pywt/blob/master/demo/wp_scalogram.py1193        # https://github.com/nigma/pywt/blob/master/demo/swt2.py1194        return1195    # a lot more paper and possible algos exist; (those with code are...)1196    # http://www.lix.polytechnique.fr/~schwander/python-srm/1197    # http://library.wolfram.com/infocenter/Demos/5725/#downloads1198    # http://code.google.com/p/pymeanshift/wiki/Examples1199    # (http://pythonvision.org/basic-tutorial, http://luispedro.org/software/mahotas, http://packages.python.org/pymorph/)1200    def _detect_SegmentColors(self):    # may be SLIC other other too...1201        try:1202            #im = Image.open(self.image_path).convert(mode = 'RGB')1203            im = Image.open(self.image_path_JPEG)1204            ## crop 25% of the image in order to give the bot a more human eye1205            ## (needed for categorization only and thus should be done there/later)1206            #scale  = 0.75    # crop 25% percent (area) bounding box1207            #(w, h) = ( self.image_size[0]*math.sqrt(scale), self.image_size[1]*math.sqrt(scale) )1208            #(l, t) = ( (self.image_size[0]-w)/2, (self.image_size[1]-h)/2 )1209            #i = im.crop( (int(l), int(t), int(l+w), int(t+h)) )1210            (l, t) = (0, 0)1211            i = im1212        except IOError:1213            pywikibot.warning(u'unknown file type [_detect_SegmentColors]')1214            return1215        result = []1216        try:1217            #h = i.histogram()   # average over WHOLE IMAGE1218            (pic, scale) = self._util_detect_ColorSegments_JSEG(i)      # split image into segments first1219            #(pic, scale) = self._util_detect_ColorSegments_SLIC(i)      # split image into superpixel first1220            hist = self._util_get_ColorSegmentsHist_PIL(i, pic, scale)  #1221            #pic  = self._util_merge_ColorSegments(pic, hist)            # iteratively in order to MERGE similar regions1222            #(pic, scale_) = self._util_detect_ColorSegments_JSEG(pic)   # (final split)1223            ##(pic, scale) = self._util_detect_ColorSegments_JSEG(pic)    # (final split)1224            #hist = self._util_get_ColorSegmentsHist_PIL(i, pic, scale)  #1225        except TypeError:1226            pywikibot.warning(u'unknown file type [_detect_SegmentColors]')1227            return1228        i = 01229        # (may be do an additional region merge according to same color names...)1230        for (h, coverage, (center, bbox)) in hist:1231            if (coverage < 0.05):    # at least 5% coverage needed (help for debugging/log_output)1232                continue1233            data = self._util_average_Color_colormath(h)1234            data['Coverage'] = float(coverage)1235            data['ID']       = (i+1)1236            data['Center']   = (int(center[0]+l), int(center[1]+t))1237            data['Position'] = (int(bbox[0]+l), int(bbox[1]+t), int(bbox[2]), int(bbox[3]))1238            data['Delta_R']  = math.sqrt( (self.image_size[0]/2 - center[0])**2 + \1239                                          (self.image_size[1]/2 - center[1])**2 )1240            result.append( data )1241            i += 11242        self._features['ColorRegions'] = result1243        return1244    # http://stackoverflow.com/questions/2270874/image-color-detection-using-python1245    # https://gist.github.com/12462681246    # colormath-1.0.8/examples/delta_e.py, colormath-1.0.8/examples/conversions.py1247    # http://code.google.com/p/python-colormath/1248    # http://en.wikipedia.org/wiki/Color_difference1249    # http://www.farb-tabelle.de/en/table-of-color.htm1250    def _detect_AverageColor(self):1251        try:1252            # we need to have 3 channels (but e.g. grayscale 'P' has only 1)1253            #i = Image.open(self.image_path).convert(mode = 'RGB')1254            i = Image.open(self.image_path_JPEG)1255            h = i.histogram()1256        except IOError:1257            pywikibot.warning(u'unknown file type [_detect_AverageColor]')1258            return1259        result              = self._util_average_Color_colormath(h)1260        result['Gradient']  = self._util_get_Geometry_CVnSCIPY().get('Edge_Ratio', None) or '-'1261        result['FFT_Peaks'] = self._util_get_Geometry_CVnSCIPY().get('FFT_Peaks', None) or '-'1262        self._features['ColorAverage'] = [result]1263        return1264    # http://stackoverflow.com/questions/2270874/image-color-detection-using-python1265    # https://gist.github.com/12462681266    # colormath-1.0.8/examples/delta_e.py, colormath-1.0.8/examples/conversions.py1267    # http://code.google.com/p/python-colormath/1268    # http://en.wikipedia.org/wiki/Color_difference1269    # http://www.farb-tabelle.de/en/table-of-color.htm1270    # http://www5.konicaminolta.eu/de/messinstrumente/color-light-language.html1271    def _util_average_Color_colormath(self, h):1272        # split into red, green, blue1273        r = h[0:256]1274        g = h[256:256*2]1275        b = h[256*2: 256*3]1276        1277        # perform the weighted average of each channel:1278        # the *index* 'i' is the channel value, and the *value* 'w' is its weight1279        rgb = (1280                sum( i*w for i, w in enumerate(r) ) / max(1, sum(r)),1281                sum( i*w for i, w in enumerate(g) ) / max(1, sum(g)),1282                sum( i*w for i, w in enumerate(b) ) / max(1, sum(b))1283        )1284        # count number of colors used more than 1% of maximum1285        ma    = 0.01*max(h)1286        count = sum([int(c > ma) for c in h])1287#        # TODO: peak detection (not supported by my local scipy version yet)1288#        # http://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks_cwt.html1289#        peakind = signal.find_peaks_cwt(fft, np.arange(1,10))1290#        print peaks1291#        print len(peakind), peakind1292        data = { #'histogram': h,1293                 'RGB':   rgb,1294                 'Peaks': float(count)/len(h), }1295        #colors = pycolorname.RAL.colors1296        #colors = pycolorname.pantone.Formula_Guide_Solid1297        colors = pycolorname.pantone.Fashion_Home_paper1298        1299        #print "=== RGB Example: RGB->LAB ==="1300        # Instantiate an Lab color object with the given values.1301        rgb = RGBColor(rgb[0], rgb[1], rgb[2], rgb_type='sRGB')1302        # Show a string representation.1303        #print rgb1304        # Convert RGB to LAB using a D50 illuminant.1305        lab = rgb.convert_to('lab', target_illuminant='D65')1306        #print lab1307        #print "=== End Example ===\n"1308        1309        # Reference color.1310        #color1 = LabColor(lab_l=0.9, lab_a=16.3, lab_b=-2.22)1311        # Color to be compared to the reference.1312        #color2 = LabColor(lab_l=0.7, lab_a=14.2, lab_b=-1.80)1313        color2 = lab1314        res = (1.E100, '')1315        for c in colors:1316            rgb = colors[c]1317            rgb = RGBColor(rgb[0], rgb[1], rgb[2], rgb_type='sRGB')1318            color1 = rgb.convert_to('lab', target_illuminant='D65')1319            #print "== Delta E Colors =="1320            #print " COLOR1: %s" % color11321            #print " COLOR2: %s" % color21322            #print "== Results =="1323            #print " CIE2000: %.3f" % color1.delta_e(color2, mode='cie2000')1324            ## Typically used for acceptability.1325            #print "     CMC: %.3f (2:1)" % color1.delta_e(color2, mode='cmc', pl=2, pc=1)1326            ## Typically used to more closely model human percetion.1327            #print "     CMC: %.3f (1:1)" % color1.delta_e(color2, mode='cmc', pl=1, pc=1)1328            r = color1.delta_e(color2, mode='cmc', pl=2, pc=1)1329            if (r < res[0]):1330                res = (r, c, colors[c])1331        data['Color']   = res[1]1332        data['Delta_E'] = res[0]1333        data['RGBref']  = res[2]1334        return data1335    def _util_detect_ColorSegments_JSEG(self, im):1336        tmpjpg = os.path.join(scriptdir, "cache/jseg_buf.jpg")1337        tmpgif = os.path.join(scriptdir, "cache/jseg_buf.gif")1338        # same scale func as in '_detect_Faces'1339        scale  = max([1., np.average(np.array(im.size)[0:2]/200.)])1340        #print np.array(im.size)/scale, scale1341        try:1342            smallImg = im.resize( tuple(np.int_(np.array(im.size)/scale)), Image.ANTIALIAS )1343        except IOError:1344            pywikibot.warning(u'unknown file type [_util_detect_ColorSegments_JSEG]')1345            return1346        1347        #im.thumbnail(size, Image.ANTIALIAS) # size is 640x4801348        smallImg.convert('RGB').save(tmpjpg, "JPEG", quality=100, optimize=True)1349        1350        # Program limitation: The total number of regions in the image must be less1351        # than 256 before the region merging process. This works for most images1352        # smaller than 512x512.1353        1354        # Processing time will be about 10 seconds for an 192x128 image and 60 seconds1355        # for a 352x240 image. It will take several minutes for a 512x512 image.1356        # Minimum image size is 64x64.1357        1358        # ^^^  THUS RESCALING TO ABOUT 200px ABOVE  ^^^1359        # sys.stdout handeled, but with freopen which could give issues1360        import jseg1361        # e.g. "segdist -i test3.jpg -t 6 -r9 test3.map.gif"1362        enable_recovery()   # enable recovery from hard crash1363        jseg.segdist_cpp.main( [ item.encode('utf-8') for item in 1364                                 ("segdist -i %s -t 6 -r9 %s"%(tmpjpg, tmpgif)).split(" ") ] )1365        disable_recovery()  # disable since everything worked out fine1366        #out = open((tmpgif + ".stdout"), "r").read()    # reading stdout1367        #print out1368        os.remove(tmpgif + ".stdout")1369        1370        os.remove( tmpjpg )1371        1372        # http://stackoverflow.com/questions/384759/pil-and-numpy1373        pic = Image.open(tmpgif)1374        #pix = np.array(pic)1375        #Image.fromarray(10*pix).show()1376        1377        os.remove( tmpgif )1378        return (pic, scale)1379    # http://planet.scipy.org/1380    # http://peekaboo-vision.blogspot.ch/2012/05/superpixels-for-python-pretty-slic.html1381    # http://ivrg.epfl.ch/supplementary_material/RK_SLICSuperpixels/index.html1382    def _util_detect_ColorSegments_SLIC(self, img):1383        import slic1384        im = np.array(img)1385        image_argb = np.dstack([im[:, :, :1], im]).copy("C")1386        #region_labels = slic.slic_n(image_argb, 1000, 10)1387        region_labels = slic.slic_n(image_argb, 1000, 50)1388        slic.contours(image_argb, region_labels, 10)1389        #import matplotlib.pyplot as plt1390        #plt.imshow(image_argb[:, :, 1:].copy())1391        #plt.show()1392        #pic = Image.fromarray(region_labels)1393        #pic.show()1394        #return (pic, 1.)1395        return (region_labels, 1.)1396    def _util_get_ColorSegmentsHist_PIL(self, im, pic, scale):1397        if not (type(np.ndarray(None)) == type(pic)):1398            pix = np.array(pic)1399            #Image.fromarray(10*pix).show()1400        else:1401            pix = pic1402            #Image.fromarray(255*pix/np.max(pix)).show()1403        try:1404            smallImg = im.resize( tuple(np.int_(np.array(im.size)/scale)), Image.ANTIALIAS )1405        except IOError:1406            pywikibot.warning(u'unknown file type [_util_get_ColorSegmentsHist_PIL]')1407            return1408        imgsize = float(smallImg.size[0]*smallImg.size[1])1409        hist = []1410        for i in range(np.max(pix)+1):1411            mask   = np.uint8(pix == i)*2551412            (y, x) = np.where(mask != 0)1413            center = (np.average(x)*scale, np.average(y)*scale)1414            bbox   = (np.min(x)*scale, np.min(y)*scale, 1415                      (np.max(x)-np.min(x))*scale, (np.max(y)-np.min(y))*scale)1416            #coverage = np.count_nonzero(mask)/imgsize1417            coverage = (mask != 0).sum()/imgsize    # count_nonzero is missing in older numpy1418            mask = Image.fromarray( mask )1419            h    = smallImg.histogram(mask)1420            #smallImg.show()1421            #dispImg = Image.new('RGBA', smallImg.size)1422            #dispImg.paste(smallImg, mask)1423            #dispImg.show()1424            if (len(h) == 256):1425                pywikibot.output(u"gray scale image, try to fix...")1426                h = h*31427            if (len(h) == 256*4):1428                pywikibot.output(u"4-ch. image, try to fix (exclude transparency)...")1429                h = h[0:(256*3)]1430            hist.append( (h, coverage, (center, bbox)) )1431        1432        return hist1433    # http://www.scipy.org/SciPyPackages/Ndimage1434    # http://www.pythonware.com/library/pil/handbook/imagefilter.htm1435    def _util_merge_ColorSegments(self, im, hist):1436        # merge regions by simplifying through average color and re-running1437        # JSEG again...1438        if not (type(np.ndarray(None)) == type(im)):1439            pix = np.array(im)1440        else:1441            pix = im1442            im  = Image.fromarray(255*pix/np.max(pix))1443        im = im.convert('RGB')1444        for j, (h, coverage, (center, bbox)) in enumerate(hist):1445            # split into red, green, blue1446            r = h[0:256]1447            g = h[256:256*2]1448            b = h[256*2: 256*3]1449            1450            # perform the weighted average of each channel:1451            # the *index* 'i' is the channel value, and the *value* 'w' is its weight1452            rgb = (1453                    sum( i*w for i, w in enumerate(r) ) / max(1, sum(r)),1454                    sum( i*w for i, w in enumerate(g) ) / max(1, sum(g)),1455                    sum( i*w for i, w in enumerate(b) ) / max(1, sum(b))1456            )1457            # color frequency analysis; do not average regions with high fluctations1458            #rgb2 = (1459            #        sum( i*i*w for i, w in enumerate(r) ) / max(1, sum(r)),1460            #        sum( i*i*w for i, w in enumerate(g) ) / max(1, sum(g)),1461            #        sum( i*i*w for i, w in enumerate(b) ) / max(1, sum(b))1462            #)1463            #if ( 500. < np.average( (1464            #       rgb2[0] - rgb[0]**2,1465            #       rgb2[1] - rgb[1]**2,1466            #       rgb2[2] - rgb[2]**2, ) ) ):1467            #           continue1468            mask = np.uint8(pix == j)*2551469            mask = Image.fromarray( mask )1470            #dispImg = Image.new('RGB', im.size)1471            #dispImg.paste(rgb, mask=mask)1472            #dispImg.show()1473            im.paste(rgb, mask=mask)1474        pix = np.array(im)1475        pix[:,:,0] = ndimage.gaussian_filter(pix[:,:,0], .5)1476        pix[:,:,1] = ndimage.gaussian_filter(pix[:,:,1], .5)1477        pix[:,:,2] = ndimage.gaussian_filter(pix[:,:,2], .5)1478        im = Image.fromarray( pix, mode='RGB' )1479        #im = im.filter(ImageFilter.BLUR)   # or 'SMOOTH'1480        return im1481    # Category:...      (several; look at self.gatherFeatures for more hints)1482    def _detect_Trained(self, info_desc, cascade_file, maxdim=500.):1483        # general (self trained) classification (e.g. people, ...)1484        # http://www.computer-vision-software.com/blog/2009/11/faq-opencv-haartraining/1485        # Can be used with haar classifier (use: opencv_haartraining) and1486        # cascaded classifier (use: opencv_traincascade), both should work.1487        # !!! train a own cascade classifier like for face detection used1488        # !!! with 'opencv_haartraing' -> xml (file to use like in face/eye detection)1489        # analogue to face detection:1490        # http://tutorial-haartraining.googlecode.com/svn/trunk/data/haarcascades/1491        # or own xml files trained onto specific file database/set1492        xml = os.path.join(scriptdir, ('externals/opencv/haarcascades/' + cascade_file))1493        if not os.path.exists(xml):1494            raise IOError(u"No such file: '%s'" % xml)1495        cascade       = cv2.CascadeClassifier(xml)1496        scale = 1.1497        try:1498            img    = cv2.imread( self.image_path_JPEG, cv.CV_LOAD_IMAGE_COLOR )1499            if (img == None) or (self.image_size[0] is None):1500                raise IOError1501            # !!! the 'scale' here IS RELEVANT FOR THE DETECTION RATE;1502            # how small and how many features are detected1503            scale  = max([1., np.average(np.array(img.shape)[0:2]/maxdim)])1504        except IOError:1505            pywikibot.warning(u'unknown file type [_detect_Trained]')1506            return1507        except AttributeError:1508            pywikibot.warning(u'unknown file type [_detect_Trained]')1509            return1510        # similar to face detection1511        smallImg = np.empty( (cv.Round(img.shape[1]/scale), cv.Round(img.shape[0]/scale)), dtype=np.uint8 )1512        gray = cv2.cvtColor( img, cv.CV_BGR2GRAY )1513        smallImg = cv2.resize( gray, smallImg.shape, interpolation=cv2.INTER_LINEAR )1514        smallImg = cv2.equalizeHist( smallImg )1515        objects = list(cascade.detectMultiScale( smallImg,1516            1.1, 5, 01517            #|cv.CV_HAAR_FIND_BIGGEST_OBJECT1518            #|cv.CV_HAAR_DO_ROUGH_SEARCH1519            |cv.CV_HAAR_SCALE_IMAGE,1520            (30, 30) ))1521        result = []1522        for i, r in enumerate(objects):1523            data = { 'Position': tuple(np.int_(np.array(r)*scale)) }1524            data['Coverage'] = float(data['Position'][2]*data['Position'][3])/(self.image_size[0]*self.image_size[1])1525            result.append( data )1526        # generic detection ...1527        self._features[info_desc] = result1528        return1529    def _recognize_OpticalCodes(self):1530        # barcode and Data Matrix recognition (libdmtx/pydmtx, zbar, gocr?)1531        # http://libdmtx.wikidot.com/libdmtx-python-wrapper1532        # http://blog.globalstomp.com/2011/09/decoding-qr-code-code-128-code-39.html1533        # http://zbar.sourceforge.net/1534        # http://pypi.python.org/pypi/zbar1535        1536        # DataMatrix1537        from pydmtx import DataMatrix   # linux distro package (fedora) / TS (debian)1538        ## Write a Data Matrix barcode1539        #dm_write = DataMatrix()1540        #dm_write.encode("Hello, world!")1541        #dm_write.save("hello.png", "png")1542        scale = 1.1543        try:1544            # Read a Data Matrix barcode1545            dm_read = DataMatrix()1546            img = Image.open(self.image_path_JPEG)1547            #if (img == None) or (self.image_size[0] is None):1548            if (self.image_size[0] is None):1549                raise IOError1550            # http://libdmtx.wikidot.com/libdmtx-python-wrapper1551            if img.mode != 'RGB':1552               img = img.convert('RGB')1553            scale  = max([1., np.average(np.array(img.size)/200.)])1554        except IOError:1555            pywikibot.warning(u'unknown file type [_recognize_OpticalCodes]')1556            return1557        smallImg = img.resize( (int(img.size[0]/scale), int(img.size[1]/scale)) )1558        img = smallImg1559        enable_recovery()   # enable recovery from hard crash1560        #res = dm_read.decode(img.size[0], img.size[1], buffer(img.tostring()))1561        disable_recovery()  # disable since everything worked out fine1562        #print res1563        result = []1564        i      = -11565        for i in range(dm_read.count()):1566            data, bbox = dm_read.stats(i+1)1567            bbox = np.array(bbox)1568            x, y = bbox[:,0], bbox[:,1]1569            pos  = (np.min(x), np.min(y), np.max(x)-np.min(x), np.max(y)-np.min(y))1570            result.append({ 'ID':       (i+1),1571                            #'Data':     dm_read.message(i+1),1572                            'Data':     data,1573                            'Position': pos,1574                            'Type':     u'DataMatrix',1575                            'Quality':  10, })1576        1577        self._features['OpticalCodes'] = result1578        # supports many popular symbologies1579        try:1580            import zbar             # TS (debian)1581        except:1582            import _zbar as zbar    # other distros (fedora)1583        1584        try:1585            img = Image.open(self.image_path_JPEG).convert('L')1586            width, height = img.size1587        except IOError:1588            pywikibot.warning(u'unknown file type [_recognize_OpticalCodes]')1589            return1590        1591        scanner = zbar.ImageScanner()1592        scanner.parse_config('enable')1593        zbar_img = zbar.Image(width, height, 'Y800', img.tostring())1594        1595        # scan the image for barcodes1596        # http://zbar.sourceforge.net/api/zbar_8h.html1597        scanner.scan(zbar_img)1598        for symbol in zbar_img:1599            i += 11600            p = np.array(symbol.location)   # list of points within code region/area1601            p = (min(p[:,0]), min(p[:,1]), (max(p[:,0])-min(p[:,0])), (max(p[:,1])-min(p[:,1])))1602            result.append({ #'components': symbol.components,1603                            'ID':         (i+1),1604                            #'Count':      symbol.count,         # 'ID'?1605                            'Data':       symbol.data or u'-',1606                            'Position':   p,                    # (left, top, width, height)1607                            'Quality':    symbol.quality,       # usable for 'Confidence'1608                            'Type':       symbol.type, })1609        1610        # further detection ?1611        self._features['OpticalCodes'] = result1612        return1613    def _detect_Chessboard(self):1614        # Chessboard (opencv reference detector)1615        # http://www.c-plusplus.de/forum/273920-full1616        # http://www.youtube.com/watch?v=bV-jAnQ-tvw1617        # http://nullege.com/codes/show/src%40o%40p%40opencvpython-HEAD%40samples%40chessboard.py/12/cv.FindChessboardCorners/python1618        scale = 1.1619        try:1620            #im = cv.LoadImage(self.image_path_JPEG, cv.CV_LOAD_IMAGE_COLOR)1621            im = cv2.imread( self.image_path_JPEG, cv2.CV_LOAD_IMAGE_GRAYSCALE )1622            #im = cv2.imread( 'Mutilated_checkerboard_3_1.jpg', cv2.CV_LOAD_IMAGE_GRAYSCALE )1623            #im = cv2.imread( 'Jogo_de_Damas_-_Acatabul.JPG', cv2.CV_LOAD_IMAGE_GRAYSCALE )1624            chessboard_dim = ( 7, 7 )1625            if im == None:1626                raise IOError1627            scale  = max([1., np.average(np.array(im.shape)[0:2]/1000.)])1628            #scale  = max([1., np.average(np.array(im.shape)[0:2]/500.)])1629            #scale  = max([1., np.average(np.array(im.shape)[0:2]/450.)])1630        except IOError:1631            pywikibot.warning(u'unknown file type [_detect_Chessboard]')1632            return1633        except AttributeError:1634            pywikibot.warning(u'unknown file type [_detect_Chessboard]')1635            return1636        smallImg = np.empty( (cv.Round(im.shape[1]/scale), cv.Round(im.shape[0]/scale)), dtype=np.uint8 )1637        #gray = cv2.cvtColor( im, cv.CV_BGR2GRAY )1638        smallImg = cv2.resize( im, smallImg.shape, interpolation=cv2.INTER_LINEAR )1639        #smallImg = cv2.equalizeHist( smallImg )1640        im = smallImg1641        found_all = False1642        corners   = None1643        try:1644            #found_all, corners = cv.FindChessboardCorners( im, chessboard_dim )1645            found_all, corners = cv2.findChessboardCorners( im, chessboard_dim )1646        except cv2.error:1647            pywikibot.exception(tb=True)1648        #cv2.drawChessboardCorners( im, chessboard_dim, corners, found_all )1649        ##cv2.imshow("win", im)1650        ##cv2.waitKey()1651        result = {}1652        if corners is not None:1653            result = { 'Corners': [tuple(item[0]) for item in corners], }1654            self._features['Chessboard'] = [result]1655# TODO: improve chessboard detection (make it more tolerant)1656#        ## http://stackoverflow.com/questions/7624765/converting-an-opencv-image-to-black-and-white1657#        #im_gray = im1658#        #im_gray_mat = cv.fromarray(im_gray)1659#        #im_bw = cv.CreateImage(cv.GetSize(im_gray_mat), cv.IPL_DEPTH_8U, 1)1660#        #im_bw_mat = cv.GetMat(im_bw)1661#        #cv.Threshold(im_gray_mat, im_bw_mat, 0, 255, cv.CV_THRESH_BINARY | cv.CV_THRESH_OTSU)1662#        #im = np.asarray(im_bw_mat)1663#1664#        # chess board recognition (more tolerant)1665#        # http://codebazaar.blogspot.ch/2011/08/chess-board-recognition-project-part-1.html1666#        # https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/python/houghlines.py?rev=27701667#        # http://docs.opencv.org/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.html1668#        dst = im.copy()1669#        color_dst = cv2.cvtColor(dst, cv.CV_GRAY2BGR)1670#        dst = cv2.GaussianBlur(dst, (3, 3), 5)1671#        thr = 1501672#        dst = cv2.Canny(dst, thr, 3*thr)1673#        cv2.imshow("win", dst)1674#        cv2.waitKey()1675#        # lines to find grid1676#        # http://dsp.stackexchange.com/questions/2420/alternatives-to-hough-transform-for-detecting-a-grid-like-structure1677#        USE_STANDARD = True1678#        if USE_STANDARD:1679#            #lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_STANDARD, 1, pi / 180, 100, 0, 0)1680#            #lines = cv2.HoughLines(dst, 1, math.pi / 180, 100)1681#            lines = cv2.HoughLines(dst, 1, math.pi / 180, 150)1682#            if (lines is not None) and len(lines):1683#                lines = lines[0]1684#                #data['Lines'] = len(lines)1685#1686#            ls = np.array(lines)1687#            import pylab1688#            (n, bins, patches) = pylab.hist(ls[:,1])1689#            print n, bins, patches1690#            pylab.grid(True)1691#            pylab.show()1692#1693#            for (rho, theta) in lines:1694#                #if theta > 0.3125: continue1695#                a = math.cos(theta)1696#                b = math.sin(theta)1697#                x0 = a * rho 1698#                y0 = b * rho1699#                pt1 = (cv.Round(x0 + 1000*(-b)), cv.Round(y0 + 1000*(a)))1700#                pt2 = (cv.Round(x0 - 1000*(-b)), cv.Round(y0 - 1000*(a)))1701#                cv2.line(color_dst, pt1, pt2, cv.RGB(255, 0, 0), 3, 8)1702#        else:1703#            #lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_PROBABILISTIC, 1, pi / 180, 50, 50, 10)1704#            lines = cv2.HoughLinesP(dst, 1, math.pi / 180, 100) 1705#1706#            for line in lines[0]:1707#                print line1708#                cv2.line(color_dst, tuple(line[0:2]), tuple(line[2:4]), cv.CV_RGB(255, 0, 0), 3, 8)1709#        cv2.imshow("win", color_dst)1710#        cv2.waitKey()1711        if found_all:1712            # pose detection1713            # http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html1714            # http://stackoverflow.com/questions/10022568/opencv-2-3-camera-calibration1715            d = shelve.open( os.path.join(scriptdir, 'externals/opencv/camera_virtual_default') )1716            if ('retval' not in d):1717                # http://commons.wikimedia.org/wiki/File:Mutilated_checkerboard_3.jpg1718                pywikibot.output(u"Doing (virtual) camera calibration onto reference image 'File:Mutilated_checkerboard_3.jpg'")1719                im3 = cv2.imread( 'Mutilated_checkerboard_3.jpg', cv2.CV_LOAD_IMAGE_GRAYSCALE )1720                im3 = cv2.resize( im3, (cv.Round(im3.shape[1]/scale), cv.Round(im3.shape[0]/scale)), interpolation=cv2.INTER_LINEAR )1721                # Compute the the three dimensional world-coordinates1722                tmp = []1723                for h in range(chessboard_dim[0]):1724                    for w in range(chessboard_dim[1]):1725                        tmp.append( (float(h), float(w), 0.0) )1726                objectPoints = np.array(tmp)1727                # Compute matrices1728                _found_all, _corners = cv2.findChessboardCorners( im3, chessboard_dim, flags=cv.CV_CALIB_CB_ADAPTIVE_THRESH | cv.CV_CALIB_CB_FILTER_QUADS )1729                #cv2.drawChessboardCorners( im3, chessboard_dim, _corners, _found_all )1730                retval, cameraMatrix, distCoeffs, rvecs, tvecs = cv2.calibrateCamera([objectPoints.astype('float32')], [_corners.astype('float32')], im3.shape, np.eye(3), np.zeros((5, 1)))1731                fovx, fovy, focalLength, principalPoint, aspectRatio = cv2.calibrationMatrixValues(cameraMatrix, im3.shape, 1.0, 1.0)1732                d['objectPoints']   = [objectPoints.astype('float32')]  # shape: (49, 3)    in a list of 1 item1733                d['imagePoints']    = [_corners.astype('float32')]      # shape: (49, 1, 2) in a list of 1 item1734                d['cameraMatrix']   = cameraMatrix1735                d['distCoeffs']     = distCoeffs1736                d['rvecs']          = rvecs1737                d['tvecs']          = tvecs1738                d['imageSize']      = im3.shape1739                d['apertureWidth']  = 1.01740                d['apertureHeight'] = 1.01741                d['fovx']           = fovx1742                d['fovy']           = fovy1743                d['focalLength']    = focalLength1744                d['principalPoint'] = principalPoint1745                d['aspectRatio']    = aspectRatio1746                d['retval']         = retval1747            else:1748                objectPoints = d['objectPoints'][0]1749                cameraMatrix, distCoeffs = d['cameraMatrix'], d['distCoeffs']1750                # would be nice to use these:1751                #cameraMatrix, distCoeffs = np.eye(3), np.zeros((5,1))1752                # ..,since they are simple... else other have to be documented as "used calibration" !!!1753            d.close()1754            # http://answers.opencv.org/question/1073/what-format-does-cv2solvepnp-use-for-points-in/1755            rvec, tvec = cv2.solvePnP(objectPoints, corners, cameraMatrix, distCoeffs)1756            #rvec, tvec = cv2.solvePnP(objectPoints, corners, cameraMatrix, None)1757            # http://www.opencv.org.cn/opencvdoc/2.3.2/html/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html1758            # -> what about using POSIT ??? (see docs on enwiki)1759            # http://opencv.willowgarage.com/wiki/Posit1760            #(cv2.findFundamentalMat, cv2.findHomography or from 'pose', cv2.estimateAffine3D)1761            # (todo) draw the rotated 3D object (projected down to 2D)1762            im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)1763            ## debug: axis-cross(es) - gives strange/wrong results1764            #for k in range(3):          # row1765            #    for j in range(5):      # column1766            #        rmat = cv2.Rodrigues(2*3.14/5.*j*np.array(np.eye(3)[:,k]))[0]1767            #        mat, perp = self._util_getD2coords_proj( np.dot(rmat, np.eye(3)), cameraMatrix, None, None, distCoeffs=distCoeffs, sign=-1 )1768            #        self._util_drawAxes(mat, 50+100*j, k*100+50, im)1769            ## debug: rotated axis-cross1770            #mat, perp = self._util_getD2coords_proj( np.eye(3), cameraMatrix, rvec, tvec, distCoeffs=distCoeffs )1771            #self._util_drawAxes(mat, 50, 350, im)1772            ## debug: self-calculated rotated axis-cross - gives strange/wrong results1773            #mat = np.dot((cameraMatrix), np.dot(cv2.Rodrigues(rvec)[0], np.eye(3)))1774            ##mat, perp = self._util_getD2coords_proj( mat, np.eye(3), None, None, distCoeffs=distCoeffs, sign=-1 )1775            #mat, perp = self._util_getD2coords_proj( mat, np.eye(3), None, None, distCoeffs=np.zeros((5,1)) )1776            #self._util_drawAxes(mat, 150, 350, im)1777            # debug: self-calculated rotated axis-cross - results looks good: OK1778            # (and can be calculated in order to give numerical results)1779            #rvec = np.zeros(3)1780            rot = rvec1781            mat, perp = self._util_getD2coords_calc(np.eye(3), cameraMatrix, rvec, tvec)1782            ortho = mat[:2,2]1783            ortho = ortho/np.linalg.norm(ortho)1784            #self._util_drawAxes(mat, 250, 350, im)1785            #self._util_drawAxes(mat, 50, 50, im)1786# TODO: compare face and chessboard pose estimations and unify them, then document everything (template in wiki, ...)1787            pywikibot.output(u'result for calibrated camera:\n  rot=%s\n  perp=%s\n  perp2D=%s' % (rot.transpose()[0], perp[:,2], ortho))1788            pywikibot.output(u'nice would be to do the same for uncalibrated/default cam settings')1789            result.update({ 'Rotation':    tuple(rot.transpose()[0]),1790                            'Perp_Dir' :   tuple(perp[:,2]),1791                            'Perp_Dir_2D': tuple(ortho), })1792            self._features['Chessboard'] = [result]1793        #cv2.imshow("win", im)1794        #cv2.waitKey()1795        return1796#    def _util_getD2coords_proj(self, D3coords, cameraMatrix, rvec=None, tvec=None, distCoeffs=np.zeros((5,1)), sign=1):1797#        """Project 3D points down to 2D by using OpenCV functions."""1798#        if rvec is None:1799#            rvec = np.zeros((3,1))1800#        if tvec is None:1801#            tvec = np.zeros((3,1))1802#        mat     = np.zeros((2,D3coords.shape[0]))1803#        matnorm = np.zeros((1,D3coords.shape[0]))1804#        for i in range(D3coords.shape[0]):1805#            D2raw, jacobian = cv2.projectPoints(np.array([[0.,0.,5.],[D3coords[0,i],D3coords[1,i],D3coords[2,i]+5.]]), rvec, tvec, cameraMatrix, distCoeffs)1806#            D2norm = (D2raw[1][0]-D2raw[0][0])1807#            #D2norm[1] *= sign   # usual 2D coords <-> pixel/picture coords1808#            D2norm[0] *= sign   # usual 2D coords <-> pixel/picture coords1809#            D2norm    *= sign   # invert all1810#            mat[:,i]     = D2norm1811#            matnorm[:,i] = np.linalg.norm(D2norm)1812#        mat = mat/max(matnorm[0])1813#        return (mat, D3coords)1814    def _util_getD2coords_calc(self, D3coords, cameraMatrix, rvec, tvec, hacky=True):1815        """Calculate s m' = A [R|t] M' in order to project 3D points down to 2D.1816        m' = (u, v, 1)^T, M' = (X, Y, Z, 1)^T, A: camera m. and [R|t]: rotation-1817        translation matrix.1818        @see http://docs.opencv.org/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html1819        """1820        # cv2.decomposeProjectionMatrix(...)1821        cm = cameraMatrix.copy()1822        cm[0:2,2] = [0., 0.]1823        rmat = np.zeros((3,4))1824        # http://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula1825        rmat[:,0:3] = cv2.Rodrigues(rvec)[0]1826        #rmat[:,0:3] = np.eye(3)1827        rmat[:,3]   = tvec[:,0]1828        origin   = np.dot(rmat, cv2.convertPointsToHomogeneous(np.zeros((3,3)).astype('float32')).transpose()[:,0,:])1829        origin2D = np.dot((cm), origin)   # np.linalg.inv(cm)1830        #coords   = np.dot(cv2.Rodrigues(rvec)[0], D3coords)1831        coords   = np.dot(rmat, cv2.convertPointsToHomogeneous(D3coords.astype('float32')).transpose()[:,0,:])1832        coords2D = np.dot((cm), coords)1833        perp = coords - origin1834        if hacky:1835            # for '_detect_Chessboard' but looks a bit strange ... may be wrong?!1836            mat  = coords2D - origin2D1837            mat  = mat/max([np.linalg.norm(mat[:,i]) for i in range(3)])1838        else:1839            for i in range(3):  # rescale with s1840                coords2D[:,i] /= coords2D[2,i]1841                origin2D[:,i] /= origin2D[2,i]1842            mat  = coords2D - origin2D1843            # simple'n'fast solution, if just 2D results are needed1844            #mat, jacobian = cv2.projectPoints(np.append(np.zeros((1,3)), 1845            #                                            D3coords, 1846            #                                            axis=0),1847            #                                  rvec, tvec, cm, np.zeros((5,1)))1848            #mat = mat[:,0,:]1849            #mat = (mat[1:,:] - mat[0,:]).transpose()1850        return (mat, perp)1851#    def _util_drawAxes(self, mat, x, y, im):1852#        color = [(0., 0., 255.), (0., 255., 0.), (255., 0., 0.)]1853#        label = ['x', 'y', 'z']1854#        for i in range(3):1855#            D2norm = 40*mat[:,i]1856#            cv2.line(im, (x,y), (x+D2norm[0].astype(int),y+D2norm[1].astype(int)), color[i], 1)1857#            cv2.putText(im, label[i], (x+D2norm[0].astype(int),y+D2norm[1].astype(int)), cv2.FONT_HERSHEY_PLAIN, 1., color[i])1858    def _detect_Faces_EXIF(self):1859        res = self._util_get_DataTags_EXIF()1860        1861        # http://u88.n24.queensu.ca/exiftool/forum/index.php?topic=3156.01862        # http://u88.n24.queensu.ca/pub/facetest.pl1863        # ( all scaling stuff ignored (!) and some strongly simplified (!) )1864        # Example: 'File:Annagrah-2 041.JPG' (canon)1865        if 'Make' in res:1866            make = res['Make'].lower()1867        else:1868            make = ''1869        found = set(res.keys())1870        data  = []1871        if 'ImageWidth' in res:1872            (width, height) = (str(res['ImageWidth']), str(res['ImageHeight']))1873            (width, height) = (re.sub(u'p[tx]', u'', width), re.sub(u'p[tx]', u'', height))1874            try:1875                (width, height) = (int(float(width)+0.5), int(float(height)+0.5))1876            except ValueError:1877                pywikibot.warning(u'%s contains incompatible unit(s), skipped' % ((width, height),))1878                return1879        else:1880            (width, height) = self.image_size1881        wasRotated = (height > width)1882        1883        if   True in [item in make for item in ['sony', 'nikon', 'panasonic', 'casio', 'ricoh']]:1884            # UNTESTED: ['sony', 'nikon', 'casio', 'ricoh']1885            #   TESTED: ['panasonic']1886            if set(['FacesDetected', 'Face1Position']).issubset(found):1887                i = 11888                if 'FaceOrientation' in res:1889                    pywikibot.output(res['FaceOrientation'])    # for rotation 'rot'1890                # 'crop' for 'casio' omitted here...1891                aspect = float(height)/width1892                if (aspect <= 3./4):1893                    (fw, fh) = (320, 320 * aspect)1894                else:1895                    (fw, fh) = (240 / aspect, 240)1896                #(sx, sy) = (1./width, 1./height)1897                (sx, sy) = (1./fw, 1./fh)1898                if 'FaceDetectFrameSize' in res:1899                    (width, height) = map(int, res['FaceDetectFrameSize'].split(' '))1900                    (sx, sy) = (1./width, 1./height)1901                while (('Face%iPosition'%i) in res) and (i <= int(res['FacesDetected'])):1902                    buf = map(int, res['Face%iPosition'%i].split(' '))1903                    (x1, y1) = ((buf[0]-buf[2]/2)*sx, (buf[1]-buf[3]/2)*sy)    # 'panasonic'1904                    (x2, y2) = (x1+buf[2]*sx, y1+buf[3]*sy)                    #1905                    #(x1, y1) = (buf[1]*sx, buf[0]*sy)1906                    #(x2, y2) = (x1+buf[3]*sx, y1+buf[2]*sy)1907                    data.append({ 'Position': (x1, y1, x2, y2) })1908                    if ('RecognizedFace%iName'%i) in res:1909                        pywikibot.output(str((res['RecognizedFace%iName'%i], res['RecognizedFace%iAge'%i])))1910                    i += 11911        elif 'fujifilm' in make:1912            # UNTESTED: 'fujifilm'1913            if set(['FacesDetected', 'FacePositions']).issubset(found):1914                buf = map(int, res['FacePositions'].split(' '))1915                (sx, sy) = (1./width, 1./height)1916                for i in range(int(res['FacesDetected'])):1917                    data.append({ 'Position': [buf[i*4]*sx,   buf[i*4+1]*sy, 1918                                               buf[i*4+2]*sx, buf[i*4+3]*sy] })1919                    if ('Face%iName'%i) in res:1920                        pywikibot.output(str((res['Face%iName'%i], res['Face%iCategory'%i], res['Face%iBirthday'%i])))1921        elif 'olympus' in make:1922            # UNTESTED: 'olympus'1923            if set(['FacesDetected', 'FaceDetectArea']).issubset(found):1924                buf = map(int, res['FacesDetected'].split(' '))1925                if buf[0] or buf[1]:1926                    buf = map(int, res['FaceDetectArea'].split(' '))1927                    for i in range(int(res['MaxFaces'])):1928                        data.append({ 'Position': [buf[i*4], buf[i*4+1], buf[i*4+2], buf[i*4+3]] })1929        elif True in [item in make for item in ['pentax', 'sanyo']]:1930            # UNTESTED: ['pentax', 'sanyo']1931            if set(['FacesDetected']).issubset(found):1932                i = 11933                (sx, sy) = (1./width, 1./height)1934                while ('Face%iPosition'%i) in res:1935                    buf = map(int, res['Face%iPosition'%i].split(' ') + \1936                                   res['Face%iSize'%i].split(' '))1937                    (x1, y1) = ((buf[0] - buf[2]/2.)*sx, (buf[1] - buf[3]/2.)*sy)1938                    (x2, y2) = (x1+buf[2]*sx, y1+buf[3]*sy)1939                    data.append({ 'Position': (x1, y1, x2, y2) })1940                    i += 11941                if 'FacePosition' in res:1942                    buf = map(int, res['FacePosition'].split(' ') + ['100', '100']) # how big is the face?1943                    (x1, y1) = (buf[0]*sx, buf[1]*sy)1944                    (x2, y2) = (buf[2]*sx, buf[3]*sy)1945                    data.append({ 'Position': (x1, y1, x2, y2) })1946        elif 'canon' in make:1947            if   set(['FacesDetected', 'FaceDetectFrameSize']).issubset(found) \1948                 and (int(res['FacesDetected'])):1949                # TESTED: older models store face detect information1950                (width, height) = map(int, res['FaceDetectFrameSize'].split(' '))   # default: (320,240)1951                (sx, sy) = (1./width, 1./height)1952                fw = res['FaceWidth'] or 351953                i = 11954                while ('Face%iPosition'%i) in res:1955                    buf = map(int, res['Face%iPosition'%i].split(' '))1956                    (x1, y1) = ((buf[0] + width/2. - fw)*sx, (buf[1] + height/2. - fw)*sy)1957                    (x2, y2) = (x1 + fw*2*sx, y1 + fw*2*sy)1958                    data.append({ 'Position': (x1, y1, x2, y2) })1959                    i += 11960            elif set(['ValidAFPoints', 'AFImageWidth', 'AFImageHeight',1961                      'AFAreaXPositions', 'AFAreaYPositions', 'PrimaryAFPoint']).issubset(found):1962                # TESTED: newer models use AF points1963                (width, height) = (int(res['AFImageWidth']), int(res['AFImageHeight']))1964                if ('AFAreaMode' in res) and ('Face' in res['AFAreaMode']):1965                    buf_x = res['AFAreaXPositions'].split(' ')1966                    buf_y = res['AFAreaYPositions'].split(' ')1967                    buf_w = buf_h = [100] * len(buf_x) # how big is the face? (else)1968                    if   'AFAreaWidths' in res:1969                        buf_w = map(int, res['AFAreaWidths'].split(' '))1970                        buf_h = map(int, res['AFAreaHeights'].split(' '))1971                    elif 'AFAreaWidth' in res:1972                        buf_w = [int(res['AFAreaWidth'])]  * len(buf_x)1973                        buf_h = [int(res['AFAreaHeight'])] * len(buf_x)1974                    else:1975                        pywikibot.output(u'No AF area size')1976                    # conversion to positive coordinates1977                    buf_x = [ int(x) + width/2. for x in buf_x ]1978                    buf_y = [ int(y) + height/2. for y in buf_y ]1979                    # EOS models have Y flipped1980                    if ('Model' in res) and ('EOS' in res['Model']):1981                        buf_y = [ height - y for y in buf_y ]1982                    (sx, sy) = (1./width, 1./height)1983                    for i in range(int(res['ValidAFPoints'])):1984                        (x1, y1) = ((buf_x[i]-buf_w[i]/2)*sx, (buf_y[i]-buf_h[i]/2)*sy)1985                        (x2, y2) = (x1+buf_w[i]*sx, y1+buf_h[i]*sy)1986                        data.append({ 'Position': (x1, y1, x2, y2) })1987        else:1988            # not supported (yet...)1989            available = [item in res for item in ['FacesDetected', 'ValidAFPoints']]1990            unknown   = ['face' in item.lower() for item in res.keys()]1991            if make and (True in (available+unknown)):1992                pywikibot.warning(u"skipped '%s' since not supported (yet) [_detect_Faces_EXIF]" % make)1993                pywikibot.warning(u"FacesDetected: %s - ValidAFPoints: %s" % tuple(available))1994        1995        # finally, rotate face coordinates if image was rotated1996        if wasRotated:1997            rot = 2701998            # variable rotation omitted here... ($$faceInfo{Rotation})1999        for i, d in enumerate(data):2000            # rotate face coordinates2001            p = data[i]['Position']2002            if wasRotated:2003                if (rot == 90):2004                    p = (p[1], 1-p[0], p[3], 1-p[2])2005                else:2006                    p = (1-p[1], p[0], 1-p[3], p[2])2007                if 'Rotation' in data[i]:2008                    data[i]['Rotation'] -= rot2009                    data[i]['Rotation'] += 360 if data[i]['Rotation'] < 0 else 02010            # rescale relative sizes to real pixel values2011            p = (p[0]*self.image_size[0] + 0.5, p[1]*self.image_size[1] + 0.5, 2012                 p[2]*self.image_size[0] + 0.5, p[3]*self.image_size[1] + 0.5)2013            # change from (x1, y1, x2, y2) to (x, y, w, h)2014            #data[i]['Position'] = (p[0], p[1], p[0]-p[2], p[3]-p[1])2015            data[i]['Position'] = (min(p[0],p[2]), min(p[1],p[3]), 2016                                   abs(p[0]-p[2]), abs(p[3]-p[1]))2017            data[i] = { 'Position': tuple(map(int, data[i]['Position'])),2018                        'ID':       (i+1),2019                        'Type':     u'Exif',2020                        'Eyes':     [],2021                        'Mouth':    (),2022                        'Nose':     (), }2023            data[i]['Coverage'] = float(data[i]['Position'][2]*data[i]['Position'][3])/(self.image_size[0]*self.image_size[1])2024        # (exclusion of duplicates is done later by '_util_merge_Regions')2025        self._features['Faces'] += data2026        return2027    def _util_merge_Regions(self, regs, sub=False, overlap=False, close=False):2028        # sub=False, overlap=False, close=False ; level 0 ; similar regions, similar position (default)2029        # sub=True,  overlap=False, close=False ; level 1 ; region contained in other, any shape/size2030        # sub=False, overlap=True,  close=False ; level 2 ; center of region conatained in other2031        # sub=False, overlap=False, close=True  ; level 3 ; regions placed close together2032        if not regs:2033            return ([], [])2034        dmax = np.linalg.norm(self.image_size)2035        #thsr = 1.0      # strict: if it is contained completely2036        thsr = 0.95      # a little bit tolerant: nearly completly contained (or 0.9)2037        drop = []2038        for i1, r1i in enumerate(regs):2039            r1 = np.float_(r1i)2040            (xy1, wh1) = (r1[0:2], r1[2:4])2041            c1 = xy1 + wh1/22042            a1 = wh1[0]*wh1[1]2043            # check for duplicates (e.g. similar regions in similar position)2044            i2 = 02045            while (i2 < i1):2046                r2i, r2 = regs[i2], np.float_(regs[i2])2047                (xy2, wh2) = (r2[0:2], r2[2:4])2048                c2 = xy2 + wh2/22049                a2 = wh2[0]*wh2[1]2050                dr = np.linalg.norm(c1-c2)/dmax2051                intersect = gtk.gdk.Rectangle(*r1i).intersect(gtk.gdk.Rectangle(*r2i))2052                area = intersect.width*intersect.height2053                ar1, ar2 = area/a1, area/a22054                check = [(1-dr), ar1, ar2]2055                # (I assume the 1. condition (1-dr) to be always true if the 2.2056                # and 3. are - so it's obsolete... how is the analytic relation?)2057                # add the first match (first is assumed to be the best one) / drop second one2058                #print check, np.average(check), np.std(check)2059                if (np.average(check) >= 0.9) and (np.std(check) <= 0.1):2060                #if (np.average(check) >= 0.85) and (np.std(check) <= 0.1):2061                    drop.append( i1 )2062                # remove all sub-rect/-regions (all regions fully contained in other)2063                if sub:2064                    #drop.append( [i1, i2][check[0:2].index(1.0)] )2065                    if   (ar1 >= thsr) and (i2 not in drop):2066                        drop.append( i1 )2067                    elif (ar2 >= thsr) and (i1 not in drop):2068                        drop.append( i2 )2069                # from '_detect_Faces()'2070                if overlap:2071                    if (r2[0] <= c1[0] <= (r2[0] + r2[2])) and \2072                       (r2[1] <= c1[1] <= (r2[1] + r2[3])) and (i2 not in drop):2073                        drop.append( i1 )2074                if close:2075                    if (check[0] >= 0.985) and (i2 not in drop):     # at least (!)2076                        drop.append( i1 )2077                i2 += 12078        drop = sorted(list(set(drop)))2079        drop.reverse()2080        for i in drop:2081            del regs[i]2082        return (regs, drop)2083class _PngFile(_JpegFile):2084    pass2085class _GifFile(_JpegFile):2086    pass2087class _TiffFile(_JpegFile):2088    pass2089class _XcfFile(_JpegFile):2090    def _convert(self):2091        # Very few programs other than GIMP read XCF files. This is by design2092        # from the GIMP developers, the format is not really documented or2093        # supported as a general-purpose file format.2094        # Commons uses ImageMagick, thus we have EXACTLY THE SAME support!2095        # (can also be a drawback, e.g. when the library is buggy...)2096        proc = Popen("convert %s %s" % (self.image_path, self.image_path_JPEG),2097                     shell=True, stderr=PIPE)#.stderr.read()2098        proc.wait()2099        if   proc.returncode != 0:2100            raise ImportError("convert (ImageMagick) not found (may be other error occured)!")2101        elif proc.returncode:2102            self.image_path_JPEG = self.image_path2103        #data = Popen("identify -verbose info: %s" % self.image_path,2104        #             shell=True, stderr=PIPE).stderr.read()2105        #print data2106        if not os.path.exists(self.image_path_JPEG):2107            # xcf can have more than 1 layer/page like gif, tiff, and movies...2108            self.image_path_JPEG = self.image_path_JPEG.replace('.jpg', '-0.jpg')2109        self.image_size = Image.open(self.image_path_JPEG).size2110    # MIME: 'image/x-xcf; charset=binary'2111    def _detect_Properties(self):2112        """Retrieve as much file property info possible, especially the same2113           as commons does in order to compare if those libraries (ImageMagick,2114           ...) are buggy (thus explicitely use other software for independence)"""2115        result =      { 'Format':     u'%s' % self.file_mime[1].upper(),2116        # DO NOT use ImageMagick (identify) instead of PIL to get these info !!2117                        'Pages':      0,2118                        'Dimensions': self.image_size,2119                        'Filesize':   os.path.getsize(self.file_name),2120                        'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), }2121        #self._properties['Properties'] = [result]2122        self._properties['Properties'][0].update(result)2123        return2124class _SvgFile(_JpegFile):2125    def _convert(self):2126        # SVG: rasterize the SVG to bitmap (MAY BE GET FROM WIKI BY DOWNLOAD?...)2127        # (Mediawiki uses librsvg too: http://commons.wikimedia.org/wiki/SVG#SVGs_in_MediaWiki)2128        # http://stackoverflow.com/questions/6589358/convert-svg-to-png-in-python2129        # http://cairographics.org/pythoncairopil/2130        # http://cairographics.org/pyrsvg/2131        # http://stackoverflow.com/questions/9166400/convert-rgba-png-to-rgb-with-pil2132        try:2133            svg = rsvg.Handle(self.image_path)2134            img = cairo.ImageSurface(cairo.FORMAT_ARGB32, svg.props.width, svg.props.height)2135            ctx = cairo.Context(img)2136            svg.render_cairo(ctx)2137            #img.write_to_png("svg.png")2138            #Image.frombuffer("RGBA",( img.get_width(),img.get_height() ),2139            #             img.get_data(),"raw","RGBA",0,1).save(self.image_path_JPEG, "JPEG")2140            png = Image.frombuffer("RGBA",( img.get_width(),img.get_height() ),2141                               img.get_data(),"raw","RGBA",0,1)2142            background = Image.new("RGB", png.size, (255, 255, 255))2143            background.paste(png, mask=png.split()[3]) # 3 is the alpha channel2144            background.save(self.image_path_JPEG, "JPEG")2145            self.image_size = (svg.props.width, svg.props.height)2146        except MemoryError:2147            self.image_path_JPEG = self.image_path2148        except SystemError:2149            self.image_path_JPEG = self.image_path2150    # MIME: 'application/xml; charset=utf-8'2151    def _detect_Properties(self):2152        """Retrieve as much file property info possible, especially the same2153           as commons does in order to compare if those libraries (ImageMagick,2154           ...) are buggy (thus explicitely use other software for independence)"""2155        result = {'Format': u'-', 'Pages': 0}2156        # similar to PDF page count OR use BeautifulSoup2157        svgcountpages = re.compile("<page>")2158        pc = len(svgcountpages.findall( file(self.image_path,"r").read() ))2159        #svg = rsvg.Handle(self.image_path)2160        # http://validator.w3.org/docs/api.html#libs2161        # http://pypi.python.org/pypi/py_w3c/2162        vld = HTMLValidator()2163        valid = u'SVG'2164        try:2165            vld.validate(self.image.fileUrl())2166            valid = (u'Valid SVG' if vld.result.validity == 'true' else u'Invalid SVG')2167        except urllib2.URLError:2168            pass2169        except ValidationFault:2170            pass2171        #print vld.errors, vld.warnings2172        #self.image_size = (svg.props.width, svg.props.height)2173        result.update({ 'Format':     valid,2174                        'Mode':       u'-',2175                        'Palette':    u'-',2176                        'Pages':      pc,2177        # may be set {{validSVG}} also or do something in bot template to2178        # recognize 'Format=SVG (valid)' ...2179                        'Dimensions': self.image_size,2180                        'Filesize':   os.path.getsize(self.file_name),2181                        'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), })2182        #self._properties['Properties'] = [result]2183        self._properties['Properties'][0].update(result)2184        return2185class _PdfFile(_JpegFile):2186    def getFeatures(self):2187        # optical and other text recognition (tesseract & ocropus, ...)2188        self._detect_EmbeddedText()2189#        self._recognize_OpticalText()2190        # (may be just classify as 'contains text', may be store text, e.g. to wikisource)2191        return self._features2192    def _convert(self):2193#        self._wikidata = self.image._latestInfo # all info wikimedia got from content (mime, sha1, ...)2194        # PDF: support extract text and images2195        # (Mediawiki uses ghostscript: https://www.mediawiki.org/wiki/Extension:PdfHandler#Pre-requisites)2196        # http://vermeulen.ca/python-pdf.html2197        # http://code.activestate.com/recipes/511465-pure-python-pdf-to-text-converter/2198        # http://stackoverflow.com/questions/25665/python-module-for-converting-pdf-to-text2199        if os.path.splitext(self.image_filename)[1].lower() == u'.pdf':2200            pass2201    # MIME: 'application/pdf; charset=binary'2202    def _detect_Properties(self):2203        """Retrieve as much file property info possible, especially the same2204           as commons does in order to compare if those libraries (ImageMagick,2205           ...) are buggy (thus explicitely use other software for independence)"""2206        # http://code.activestate.com/recipes/496837-count-pdf-pages/2207        #rxcountpages = re.compile(r"$\s*/Type\s*/Page[/\s]", re.MULTILINE|re.DOTALL)2208        rxcountpages = re.compile(r"/Type\s*/Page([^s]|$)", re.MULTILINE|re.DOTALL)    # PDF v. 1.3,1.4,1.5,1.62209        pc = len(rxcountpages.findall( file(self.image_path,"rb").read() ))2210        result =      { 'Format':     u'PDF',2211                        'Mode':       u'-',2212                        'Palette':    u'-',2213                        'Pages':      pc,2214                        'Dimensions': self.image_size,2215                        'Filesize':   os.path.getsize(self.file_name),2216                        'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), }2217        #self._properties['Properties'] = [result]2218        self._properties['Properties'][0].update(result)2219        return2220    # ./run-test (ocropus/ocropy)2221    # (in fact all scripts/executables used here are pure python scripts!!!)2222    def _recognize_OpticalText(self):2223        # optical text recognition (tesseract & ocropus, ...)2224        # (no full recognition but - at least - just classify as 'contains text')2225        # http://www.claraocr.org/de/ocr/ocr-software/open-source-ocr.html2226        # https://github.com/edsu/ocropy2227        # http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot/Doku#Categorization2228        # Usage:tesseract imagename outputbase [-l lang] [configfile [[+|-]varfile]...]2229        # tesseract imagename.tif output2230        # (it's simpler to run the scripts/executables in own environment/interpreter...)2231        path = os.path.join(scriptdir, 'dtbext/_ocropus/ocropy')2232        curdir = os.path.abspath(os.curdir)2233        os.chdir(path)2234        # binarization2235        if os.path.exists(os.path.join(path, "temp")):2236            shutil.rmtree(os.path.join(path, "temp"))2237        if os.system("ocropus-nlbin %s -o %s" % (self.image_path_JPEG, os.path.join(path, "temp"))):2238            raise ImportError("ocropus not found!")2239        2240        # page level segmentation2241        if os.system("ocropus-gpageseg --minscale 6.0 '%s'" % os.path.join(path, "temp/????.bin.png")):2242            # detection error2243            return2244        2245        # raw text line recognition2246        if os.system("ocropus-lattices --writebestpath '%s'" % os.path.join(path, "temp/????/??????.bin.png")):2247            # detection error2248            return2249        2250        # language model application2251        # (optional - improve the raw results by applying a pretrained model)2252        os.environ['OCROPUS_DATA'] = os.path.join(path, "models/")2253        if os.system("ocropus-ngraphs '%s'" % os.path.join(path, "temp/????/??????.lattice")):2254            # detection error2255            return2256        2257        # create hOCR output2258        if os.system("ocropus-hocr '%s' -o %s" % (os.path.join(path, "temp/????.bin.png"), os.path.join(path, "temp.html"))):2259            # detection error2260            return2261        2262        ## 'create HTML for debugging (use "firefox temp/index.html" to view)'2263        ## (optional - generate human readable debug output)2264        #if os.system("ocropus-visualize-results %s" % os.path.join(path, "temp")):2265        #    # detection error2266        #    return2267        2268        # "to see recognition results, type: firefox temp.html"2269        # "to see details on the recognition process, type: firefox temp/index.html"2270        tmpfile = open(os.path.join(path, "temp.html"), 'r')2271        data = tmpfile.read()2272        tmpfile.close()2273        shutil.rmtree(os.path.join(path, "temp"))2274        os.remove(os.path.join(path, "temp.html"))2275        os.chdir(curdir)2276        #print data2277        pywikibot.output(data)2278 2279    def _detect_EmbeddedText(self):2280        # may be also: http://www.reportlab.com/software/opensource/rl-toolkit/2281        # poppler pdftotext/pdfimages2282        # (similar as in '_util_get_DataTags_EXIF' but with stderr and no json output)2283        # http://poppler.freedesktop.org/2284        # http://www.izzycode.com/bash/how-to-install-pdf2text-on-centos-fedora-redhat.html2285        # MIGHT BE BETTER TO USE AS PYTHON MODULE:2286        # https://launchpad.net/poppler-python/2287        # http://stackoverflow.com/questions/2732178/extracting-text-from-pdf-with-poppler-c2288        # http://stackoverflow.com/questions/25665/python-module-for-converting-pdf-to-text2289        #proc = Popen("pdftotext -layout %s %s" % (self.image_path, self.image_path+'.txt'), 2290        proc = Popen("pdftotext %s %s" % (self.image_path, self.image_path+'.txt'), 2291                     shell=True, stderr=PIPE)#.stderr.readlines()2292        proc.wait()2293        if proc.returncode:2294            raise ImportError("pdftotext not found!")2295        data = open(self.image_path+'.txt', 'r').readlines()2296        os.remove( self.image_path+'.txt' )2297#        self._content_text = data2298        (s1, l1) = (len(u''.join(data)), len(data))2299        tmp_path = os.path.join(os.environ.get('TMP', '/tmp'), 'DrTrigonBot/')2300        os.mkdir( tmp_path )2301# switch this part off since 'pdfimages' (on toolserver) is too old; TS-14492302#        proc = Popen("pdfimages -p %s %s/" % (self.image_path, tmp_path), 2303        proc = Popen("pdfimages %s %s/" % (self.image_path, tmp_path), 2304                     shell=True, stderr=PIPE)#.stderr.readlines()2305        proc.wait()2306        if proc.returncode:2307            raise ImportError("pdfimages not found!")2308        images = os.listdir( tmp_path )2309#        pages  = set()2310        for f in images:2311#            pages.add( int(f.split('-')[1]) )2312            os.remove( os.path.join(tmp_path, f) )2313        os.rmdir( tmp_path )2314        2315        ## pdfminer (tools/pdf2txt.py)2316        ## http://denis.papathanasiou.org/?p=343 (for layout and images)2317        #debug = 02318        #laparams = layout.LAParams()2319        ##2320        #pdfparser.PDFDocument.debug        = debug2321        #pdfparser.PDFParser.debug          = debug2322        #cmapdb.CMapDB.debug                = debug2323        #pdfinterp.PDFResourceManager.debug = debug2324        #pdfinterp.PDFPageInterpreter.debug = debug2325        #pdfdevice.PDFDevice.debug          = debug2326        ##2327        #rsrcmgr = pdfinterp.PDFResourceManager(caching=True)2328        #outfp = StringIO.StringIO()2329        #device = converter.TextConverter(rsrcmgr, outfp, codec='utf-8', laparams=laparams)2330        ##device = converter.XMLConverter(rsrcmgr, outfp, codec='utf-8', laparams=laparams, outdir=None)2331        ##device = converter.HTMLConverter(rsrcmgr, outfp, codec='utf-8', scale=1,2332        ##                       layoutmode='normal', laparams=laparams, outdir=None)2333        ##device = pdfdevice.TagExtractor(rsrcmgr, outfp, codec='utf-8')2334        #fp = file(self.image_path, 'rb')2335        #try:2336        #    pdfinterp.process_pdf(rsrcmgr, device, fp, set(), maxpages=0, password='',2337        #                caching=True, check_extractable=False)2338        #except AssertionError:2339        #    pywikibot.warning(u'pdfminer missed, may be corrupt [_detect_EmbeddedText]')2340        #    return2341        #except TypeError:2342        #    pywikibot.warning(u'pdfminer missed, may be corrupt [_detect_EmbeddedText]')2343        #    return2344        #fp.close()2345        #device.close()2346        #data = outfp.getvalue().splitlines(True)2347        #2348        #(s2, l2) = (len(u''.join(data)), len(data))2349        result = { 'Size':     s1,2350                   'Lines':    l1,2351                   #'Data':     data,2352                   #'Position': pos,2353#                   'Images':   u'%s (on %s page(s))' % (len(images), len(list(pages))),  # pages containing images2354                   'Images':   u'%s' % len(images),2355                   'Type':     u'-', }  # 'Type' could be u'OCR' above...2356        self._features['Text'] = [result]2357        return2358#class DjvuFile(_JpegFile):2359#    pass2360class _OggFile(_JpegFile):2361    def getFeatures(self):2362        # general handling of all audio and video formats2363        self._detect_Streams()2364        # general audio feature extraction2365#        self._detect_AudioFeatures()2366        return self._features2367    # MIME: 'application/ogg; charset=binary'2368    def _detect_Properties(self):2369        """Retrieve as much file property info possible, especially the same2370           as commons does in order to compare if those libraries (ImageMagick,2371           ...) are buggy (thus explicitely use other software for independence)"""2372        # 'ffprobe' (ffmpeg); audio and video streams files (ogv, oga, ...)2373        d = self._util_get_DataStreams_FFMPEG()2374        #print d2375        result =      { 'Format':     u'%s' % d['format']['format_name'].upper(),2376                        'Pages':      0,2377                        'Dimensions': self.image_size,2378                        'Filesize':   os.path.getsize(self.file_name),2379                        'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), }2380        #self._properties['Properties'] = [result]2381        self._properties['Properties'][0].update(result)2382        return2383    def _detect_Streams(self):2384        # audio and video streams files (ogv, oga, ...)2385        d = self._util_get_DataStreams_FFMPEG()2386        if not d:2387            return2388        result = []2389        for s in d['streams']:2390            #print s2391            if   (s["codec_type"] == "video"):2392                rate = s["avg_frame_rate"]2393                dim = (int(s["width"]), int(s["height"]))2394                #asp  = s["display_aspect_ratio"]2395            elif (s["codec_type"] == "audio"):2396# switch this part off since 'ffprobe' (on toolserver) is too old2397#                rate = u'%s/%s/%s' % (s["channels"], s["sample_fmt"], s["sample_rate"])2398                rate = u'%s/%s/%s' % (s["channels"], u'-', int(float(s["sample_rate"])))2399                dim  = None2400            elif (s["codec_type"] == "data"):2401                rate = None2402                dim  = None2403            result.append({ 'ID':         int(s["index"]) + 1,2404                            'Format':     u'%s/%s' % (s["codec_type"], s.get("codec_name",u'?')),2405                            'Rate':       rate or u'-',2406                            'Dimensions': dim or (None, None),2407                            })2408        if 'image' in d["format"]["format_name"]:2409            result = []2410        self._features['Streams'] = result2411        return2412    def _util_get_DataStreams_FFMPEG(self):2413        if hasattr(self, '_buffer_FFMPEG'):2414            return self._buffer_FFMPEG2415        # (similar as in '_util_get_DataTags_EXIF')2416# switch this part off since 'ffprobe' (on toolserver) is too old; TS-14492417#        data = Popen("ffprobe -v quiet -print_format json -show_format -show_streams %s" % self.image_path, 2418        proc = Popen("ffprobe -v quiet -show_format -show_streams %s" % self.image_path,#.replace('%', '%%'), 2419                     shell=True, stdout=PIPE)#.stdout.read()2420        proc.wait()2421        if proc.returncode == 127:2422            raise ImportError("ffprobe (ffmpeg) not found!")2423        data = proc.stdout.read().strip()2424#        self._buffer_FFMPEG = json.loads(data)2425        res, key, cur = {}, '', {}2426        for item in data.splitlines():2427            if (item[0] == '['):2428                if not (item[1] == '/'):2429                    key = item[1:-1]2430                    cur = {}2431                    if key not in res:2432                        res[key] = []2433                else:2434                    res[key].append( cur )2435            else:2436                val = item.split('=')2437                cur[val[0].strip()] = val[1].strip()2438        if res:2439            res = { 'streams': res['STREAM'], 'format': res['FORMAT'][0] }2440        self._buffer_FFMPEG = res2441        2442        return self._buffer_FFMPEG2443    def _detect_AudioFeatures(self):2444        # http://yaafe.sourceforge.net/manual/tools.html2445        # http://yaafe.sourceforge.net/manual/quickstart.html - yaafe.py2446        # ( help: yaafe.py -h / features: yaafe.py -l )2447        #2448        # compile yaafe on fedora:2449        # 1.) get and compile 'argtable2' (2-13)2450        #     1.1 download from http://argtable.sourceforge.net/2451        #     1.2 unpack and cd to directory2452        #     1.3 $ ccmake .2453        #     1.4 set: CMAKE_BUILD_TYPE = Release2454        #     1.5 press: c, g (in order to configure and generate)2455        #     1.6 $ make2456        # 2.) get and compile 'yaafe'2457        #     1.1 download from http://yaafe.sourceforge.net/2458        #     1.2 unpack and cd to directory2459        #     1.3 $ ccmake .2460        #     1.4 set: ARGTABLE2_INCLUDE_DIR = /home/ursin/Desktop/argtable2-13/src2461        #              ARGTABLE2_LIBRARY     = /home/ursin/Desktop/argtable2-13/src/libargtable2.a2462        #              ...2463        #              DL_INCLUDE_DIR        = /usr/include2464        #              DL_LIBRARY            = /usr/lib64/libdl.so2465        #              FFTW3_INCLUDE_DIR     = /usr/include2466        #              FFTW3_LIBRARY         = /usr/lib64/libfftw3.so2467        #              HDF5_HL_LIBRARY       = /usr/lib64/libhdf5_hl.so2468        #              HDF5_INCLUDE_DIR      = /usr/include2469        #              HDF5_LIBRARY          = /usr/lib64/libhdf5.so2470        #              LAPACK_LIBRARY        = /usr/lib64/liblapack.so2471        #              MATLAB_ROOT           = MATLAB_ROOT-NOTFOUND2472        #              MPG123_INCLUDE_DIR    = /usr/include2473        #              MPG123_LIBRARY        = /usr/lib64/libmpg123.so2474        #              RT_LIBRARY            = /usr/lib64/librt.so2475        #              SNDFILE_INCLUDE_DIR   = /usr/include2476        #              SNDFILE_LIBRARY       = /usr/lib64/libsndfile.so2477        #              ...2478        #              WITH_FFTW3            = ON2479        #              WITH_HDF5             = ON2480        #              WITH_LAPACK           = ON2481        #              WITH_MATLAB_MEX       = OFF2482        #              WITH_MPG123           = ON2483        #              WITH_SNDFILE          = ON2484        #              WITH_TIMERS           = ON2485        #              (use t to toggle to more advanced options)2486        #              CMAKE_CXX_FLAGS       = -fpermissive2487        #              CMAKE_C_FLAGS         = -fpermissive2488        #         (install all needed dependencies/packages into the OS also)2489        #     1.5 press: c, g (in order to configure and generate)2490        #     1.6 $ make2491        #     1.7 $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ursin/Desktop/yaafe-v0.64/src_cpp/yaafe-python/:/home/ursin/Desktop/yaafe-v0.64/src_cpp/yaafe-io/:/home/ursin/Desktop/yaafe-v0.64/src_cpp/yaafe-core/:/home/ursin/Desktop/yaafe-v0.64/src_cpp/yaafe-components/2492        #         $ export YAAFE_PATH=/home/ursin/Desktop/yaafe-v0.64/src_python/2493        #         $ export PYTHONPATH=/home/ursin/Desktop/yaafe-v0.64/src_python2494        import yaafelib as yaafe2495        # use WAV, OGG, MP3 (and others) audio file formats2496        #audiofile = '/home/ursin/data/09Audio_UNS/Amy MacDonald - This Is The Life (2007) - Pop/01-amy_macdonald-mr_rock_and_roll.mp3'2497        audiofile = self.image_path2498        yaafe.setVerbose(True)2499        #print 'Yaafe v%s'%yaafe.getYaafeVersion()2500        # Load important components2501        if (yaafe.loadComponentLibrary('yaafe-io')!=0):2502            pywikibot.warning(u'cannot load yaafe-io component library !')   # ! needed, else it will crash !2503        # Build a DataFlow object using FeaturePlan2504        fp = yaafe.FeaturePlan(sample_rate=44100, normalize=0.98, resample=False)2505        #fp.addFeature('am: AmplitudeModulation blockSize=512 stepSize=256')2506        #fp.addFeature('ac: AutoCorrelation blockSize=512 stepSize=256')2507        #fp.addFeature('cdod: ComplexDomainOnsetDetection blockSize=512 stepSize=256')2508        #fp.addFeature('erg: Energy blockSize=512 stepSize=256')2509        #fp.addFeature('e: Envelope blockSize=512 stepSize=256')2510        fp.addFeature('ess: EnvelopeShapeStatistics blockSize=512 stepSize=256')2511        #fp.addFeature('f: Frames blockSize=512 stepSize=256')2512        #fp.addFeature('lpc: LPC blockSize=512 stepSize=256')2513        #fp.addFeature('lsf: LSF blockSize=512 stepSize=256')2514        #fp.addFeature('l: Loudness blockSize=512 stepSize=256')2515        #fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256')2516        ## features: AutoCorrelationPeaksIntegrator, Cepstrum, Derivate, HistogramIntegrator, SlopeIntegrator, StatisticalIntegrator2517        #fp.addFeature('mfcc_d1: MFCC blockSize=512 stepSize=256 > Derivate DOrder=1')2518        #fp.addFeature('mfcc_d2: MFCC blockSize=512 stepSize=256 > Derivate DOrder=2')2519        #fp.addFeature('mas: MagnitudeSpectrum blockSize=512 stepSize=256')2520        #fp.addFeature('mes: MelSpectrum blockSize=512 stepSize=256')2521        #fp.addFeature('obsi: OBSI blockSize=512 stepSize=256')2522        #fp.addFeature('obsir: OBSIR blockSize=512 stepSize=256')2523        #fp.addFeature('psh: PerceptualSharpness blockSize=512 stepSize=256')2524        #fp.addFeature('psp: PerceptualSpread blockSize=512 stepSize=256')2525        #fp.addFeature('scfpb: SpectralCrestFactorPerBand blockSize=512 stepSize=256')2526        #fp.addFeature('sd: SpectralDecrease blockSize=512 stepSize=256')2527        #fp.addFeature('sfa: SpectralFlatness blockSize=512 stepSize=256')2528        #fp.addFeature('sfpb: SpectralFlatnessPerBand blockSize=512 stepSize=256')2529        #fp.addFeature('sfu: SpectralFlux blockSize=512 stepSize=256')2530        #fp.addFeature('sr: SpectralRolloff blockSize=512 stepSize=256')2531        fp.addFeature('sss: SpectralShapeStatistics blockSize=512 stepSize=256')2532        #fp.addFeature('ss: SpectralSlope blockSize=512 stepSize=256')2533        #fp.addFeature('sv: SpectralVariation blockSize=512 stepSize=256')2534        fp.addFeature('tss: TemporalShapeStatistics blockSize=512 stepSize=256')2535        fp.addFeature('zcr: ZCR blockSize=512 stepSize=256')2536        df = fp.getDataFlow()2537        ## or load a DataFlow from dataflow file.2538        #df = DataFlow()2539        #df.load(dataflow_file)2540        #fp.getDataFlow().save('')2541        #print df.display()2542        # configure an Engine2543        engine = yaafe.Engine()2544        engine.load(df)2545        # extract features from an audio file using AudioFileProcessor2546        afp = yaafe.AudioFileProcessor()2547        #afp.setOutputFormat('csv','',{})       # ! needed, else it will crash ! (but now produces file output)2548        #afp.processFile(engine,audiofile)2549        #feats = engine.readAllOutputs()2550        ## and play with your features2551        #print feats2552        # extract features from an audio file and write results to csv files2553        afp.setOutputFormat('csv','output',{'Precision':'8'})2554        afp.processFile(engine,audiofile)2555        # this creates output/myaudio.wav.mfcc.csv, .mfcc_d1.csv and .mfcc_d2.csv files.2556        ## extract features from a numpy array2557        #audio = np.random.randn(1,100000)2558        #feats = engine.processAudio(audio)2559        ## and play with your features2560        #print feats2561        import csv2562        data = {}2563        for ext in ['ess', 'sss', 'tss', 'zcr']:2564            fn = 'output' + audiofile + ('.%s.csv' % ext)2565            with open(fn, 'rb') as csvfile:2566                reader = csv.reader(csvfile, delimiter=',')2567                d = [row for row in reader]2568                d = np.array(d[5:])    # cut header and convert to numpy2569                d = np.float_(d)2570                d = tuple(np.average(d, axis=0))2571                pywikibot.output(ext)2572                #if ext in ['ess', 'sss', 'tss']:2573                #    pywikibot.output(u"centroid: %s\nspread: %s\nskewness: %s\nkurtosis: %s\n" % d)2574                #elif ext in ['zcr']:2575                #    pywikibot.output(u"zero-crossing rate: %s\n" % d)2576                data[ext.upper()] = d2577            os.remove(fn)2578            # remove folder too...2579        self._features['Audio'] = [data]2580        return2581class _MidiFile(_UnknownFile):2582    def getFeatures(self):2583        self._detect_AudioFeatures()    # Audio2584        return self._features2585    def _detect_HeaderAndMetadata(self):2586        #_UnknownFile._detect_HeaderAndMetadata(self)2587        #result = {'Desc': self._properties['Metadata'][0]['Desc'].splitlines()}2588        result = {'Desc': []}2589        # extract data from midi file2590        # http://valentin.dasdeck.com/midi/midifile.htm2591        # http://stackoverflow.com/questions/3943149/reading-and-interpreting-data-from-a-binary-file-in-python2592        ba = bytearray(open(self.file_name, 'rb').read())2593        i = -12594        for key, data in [('Text', '\x01'), ('Copyright', '\x02')]:#, ('Lyrics', '\x05')]:2595            key = 'Desc'2596            #result[key] = []2597            while True:2598                i = ba.find('\xff%s' % data, i+1)2599                if i < 0:       # something found?2600                    break2601                e = (i+3+ba[i+2])2602                if ba[e] != 0:  # length match with string end (00)?2603                    e = ba.find('\x00', (i+3+ba[i+2]))2604                result[key].append(ba[i+3:e].decode('latin-1').strip())2605            #result[key] = u'\n'.join(result[key])2606        result[key] = u'\n'.join(result[key])2607        if not result['Desc']:2608            result['Desc'] = u'-'2609        ## find specific info in extracted data2610        #print [item.strip() for item in re.findall('Generated .*?\n', result['Text'])]2611        ##u"Cr'eateur: GNU LilyPond 2.0.1"2612        #import dateutil.parser2613        #dates = []2614        #for line in result['Text'].splitlines():2615        #    # http://stackoverflow.com/questions/3276180/extracting-date-from-a-string-in-python2616        #    try:2617        #        dates.append(dateutil.parser.parse(line, fuzzy=True).isoformat(' ').decode('utf-8'))2618        #    except ValueError:2619        #        pass2620        #print dates2621        import _music21 as music212622        try:2623            s = music21.converter.parse(self.file_name)2624            if s.metadata:2625                pywikibot.output(unicode(s.metadata))2626                result.update(s.metadata)2627        except music21.midi.base.MidiException:2628            pass2629        self._properties['Metadata'] = [result]2630        return2631    # MIME: 'audio/midi; charset=binary'2632    def _detect_Properties(self):2633        """Retrieve as much file property info possible, especially the same2634           as commons does in order to compare if those libraries (ImageMagick,2635           ...) are buggy (thus explicitely use other software for independence)"""2636        result =      { 'Format':     u'%s' % self.file_mime[1].upper(),2637                        'Pages':      0,2638                        'Dimensions': self.image_size,2639                        'Filesize':   os.path.getsize(self.file_name),2640                        'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), }2641        #self._properties['Properties'] = [result]2642        self._properties['Properties'][0].update(result)2643        return2644    # midi audio feature extraction2645    def _detect_AudioFeatures(self):2646        import _music21 as music212647        #music21.features.jSymbolic.getCompletionStats()2648        try:2649            #audiofile = '/home/ursin/Desktop/3_Ships.mid'2650            #s = music21.midi.translate.midiFilePathToStream(self.file_name)2651            s = music21.converter.parse(self.file_name)2652        except music21.midi.base.MidiException:2653            pywikibot.warning(u'unknown file type [_detect_AudioFeatures]')2654            return2655        #fs = music21.features.jSymbolic.extractorsById2656        #for k in fs:2657        #    for i in range(len(fs[k])):2658        #        if fs[k][i] is not None:2659        #            n = fs[k][i].__name__2660        #            if fs[k][i] not in music21.features.jSymbolic.featureExtractors:2661        #                n += " (not implemented)"2662        #                print k, i, n2663        #            else:2664        #                fe = fs[k][i](s)2665        #                print k, i, n,2666        #                try:2667        #                    f = fe.extract()2668        #                    print f.name, f.vector2669        #                except AttributeError:2670        #                    print "ERROR"2671        data = {'RegisterImportance': (music21.features.jSymbolic.ImportanceOfBassRegisterFeature(s).extract().vector[0],2672                                       music21.features.jSymbolic.ImportanceOfMiddleRegisterFeature(s).extract().vector[0],2673                                       music21.features.jSymbolic.ImportanceOfHighRegisterFeature(s).extract().vector[0],),2674                      'NoteDuration': (music21.features.jSymbolic.AverageNoteDurationFeature(s).extract().vector[0],2675                                       music21.features.jSymbolic.MaximumNoteDurationFeature(s).extract().vector[0],),2676                 'IndependentVoices': (music21.features.jSymbolic.AverageNumberOfIndependentVoicesFeature(s).extract().vector[0],2677                                       music21.features.jSymbolic.MaximumNumberOfIndependentVoicesFeature(s).extract().vector[0],),2678                   'MostCommonPitch': music21.features.jSymbolic.MostCommonPitchFeature(s).extract().vector[0],2679                             'Tempo': music21.features.jSymbolic.InitialTempoFeature(s).extract().vector[0],2680                          'Duration': s.highestTime,2681                          #'Metadata': s.metadata if s.metadata else u'',2682                            'Lyrics': s.lyrics(recurse=True) if s.lyrics(recurse=True) else u'',}2683        #print music21.text.assembleLyrics(s)2684        #print s.duration2685        #print s.offsetMap2686        #print s.measureOffsetMap()2687        #print s.seconds2688        #print s.secondsMap2689        self._features['Audio'] = [data]2690        return2691# http://commons.wikimedia.org/wiki/File_formats2692_FILETYPES = {                        '*': _UnknownFile,2693              (      'image',     'jpeg'): _JpegFile,2694              (      'image',      'png'): _PngFile,2695              (      'image',      'gif'): _GifFile,2696              (      'image',     'tiff'): _TiffFile,2697              (      'image',    'x-xcf'): _XcfFile,2698              (      'image',  'svg+xml'): _SvgFile,    # unify/merge them?2699              ('application',      'xml'): _SvgFile,    #2700              ('application',      'pdf'): _PdfFile,2701# djvu: python-djvulibre or python-djvu for djvu support2702# http://pypi.python.org/pypi/python-djvulibre/0.3.92703#              (      'image', 'vnd.djvu'): DjvuFile,2704              (      'audio',     'midi'): _MidiFile,2705              ('application',      'ogg'): _OggFile,}2706#              (          '?',        '?'): _WebMFile,}2707def GenericFile(file_name):2708    # 'magic' (libmagic)2709    m = magic.open(magic.MAGIC_MIME)    # or 'magic.MAGIC_NONE'2710    m.load()2711    file_mime = re.split('[/;\s]', m.file(file_name))2712    mime = mimetypes.guess_all_extensions('%s/%s' % tuple(file_mime[0:2]))2713    if mime and (os.path.splitext(file_name)[1].lower() not in mime):2714        pywikibot.warning(u'File extension does not match MIME type! File extension should be %s.' % mime)2715    # split detection and extraction according to file types; _JpegFile, ...2716    GenericFile = _FILETYPES.get(tuple(file_mime[:2]), _FILETYPES['*'])2717    return GenericFile(file_name, file_mime)2718# all classification and categorization methods and definitions - default variation2719#  use simplest classification I can think of (self-made) and do categorization2720#  mostly based on filtered/reported features2721class CatImages_Default(object):2722    #ignore = []2723    ignore = ['color']2724    2725    _thrhld_group_size = 42726    #_thrshld_guesses = 0.12727    _thrshld_default = 0.752728    # for '_detect_Trained'2729    cascade_files = [(u'Legs', 'haarcascade_lowerbody.xml'),2730                     (u'Torsos', 'haarcascade_upperbody.xml'),2731                     (u'Ears', 'haarcascade_mcs_leftear.xml'),2732                     (u'Ears', 'haarcascade_mcs_rightear.xml'),2733                     (u'Eyes', 'haarcascade_lefteye_2splits.xml'),        # (http://yushiqi.cn/research/eyedetection)2734                     (u'Eyes', 'haarcascade_righteye_2splits.xml'),       # (http://yushiqi.cn/research/eyedetection)2735                     #externals/opencv/haarcascades/haarcascade_mcs_lefteye.xml2736                     #externals/opencv/haarcascades/haarcascade_mcs_righteye.xml2737                     # (others include indifferent (left and/or right) and pair)2738                     (u'Automobiles', 'cars3.xml'),                       # http://www.youtube.com/watch?v=c4LobbqeKZc2739                     (u'Hands', '1256617233-2-haarcascade-hand.xml', 300.),]    # http://www.andol.info/2740                     # ('Hands' does not behave very well, in fact it detects any kind of skin and other things...)2741                     #(u'Aeroplanes', 'haarcascade_aeroplane.xml'),]      # e.g. for 'Category:Unidentified aircraft'2742    # very simple / rought / poor-man's min. thresshold classification2743    # (done by guessing, does not need to be trained)2744    # replace/improve this with RTrees, KNearest, Boost, SVM, MLP, NBayes, ...2745    def classifyFeatures(self):2746        # classification of detected features (should use RTrees, KNearest, Boost, SVM, MLP, NBayes, ...)2747        # ??? (may be do this in '_cat_...()' or '_filter_...()' ?!?...)2748        # Faces and eyes (opencv pre-trained haar and extracted EXIF data)2749        for i in range(len(self._info['Faces'])):2750            if self._info['Faces'][i]['Type'] == u'Exif':2751                c = self._thrshld_default2752            else:2753                c = (len(self._info['Faces'][i]['Eyes']) + 2.) / 4.2754            self._info['Faces'][i]['Confidence'] = c2755            self._info['Faces'][i]['ID'] = i+12756        # Segments and colors / Average color2757        #max_dim = max(self.image_size)2758        for i in range(len(self._info['ColorRegions'])):2759            data = self._info['ColorRegions'][i]2760            # has to be in descending order since only 1 resolves (!)2761            #if   (data['Coverage'] >= 0.40) and (data['Delta_E']  <=  5.0):2762            #    c = 1.02763            ##elif (data['Coverage'] >= 0.20) and (data['Delta_E']  <= 15.0):2764            ##elif (data['Coverage'] >= 0.20) and (data['Delta_E']  <= 10.0):2765            #elif (data['Coverage'] >= 0.25) and (data['Delta_E']  <= 10.0):2766            #    c = 0.752767            #elif (data['Coverage'] >= 0.10) and (data['Delta_E']  <= 20.0):2768            #    c = 0.52769            #else:2770            #    c = 0.12771            ca = (data['Coverage'])**(1./7)                 # 0.15 -> ~0.752772            #ca = (data['Coverage'])**(1./6)                 # 0.20 -> ~0.752773            #ca = (data['Coverage'])**(1./5)                 # 0.25 -> ~0.752774            #ca = (data['Coverage'])**(1./4)                 # 0.35 -> ~0.752775            ##cb = (0.02 * (50. - data['Delta_E']))**(1.2)    # 10.0 -> ~0.752776            #cb = (0.02 * (50. - data['Delta_E']))**(1./2)   # 20.0 -> ~0.752777            ##cb = (0.02 * (50. - data['Delta_E']))**(1./3)   # 25.0 -> ~0.752778            #cc = (1. - (data['Delta_R']/max_dim))**(1.)     # 0.25 -> ~0.752779            #c  = ( 3*ca + cb ) / 42780            #c  = ( cc + 6*ca + 2*cb ) / 92781            c  = ca2782            self._info['ColorRegions'][i]['Confidence'] = c2783        # People/Pedestrian (opencv pre-trained hog and haarcascade)2784        for i in range(len(self._info['People'])):2785            data = self._info['People'][i]2786            if (data['Coverage'] >= 0.20):2787                c = 0.752788            if (data['Coverage'] >= 0.10):      # at least 10% coverage needed2789                c = 0.52790            else:2791                c = 0.12792            self._info['People'][i]['Confidence'] = c2793        # general (opencv pre-trained, third-party and self-trained haar2794        # and cascade) classification2795        for cf in self.cascade_files:2796            cat = cf[0]2797            for i in range(len(self._info[cat])):2798                data = self._info[cat][i]2799                # detect images with this as one of the main contents only thus2800                # high coverage requested as a minimal confidence estimation2801                self._info[cat][i]['Confidence'] = (data['Coverage'])**(1./5)  # 0.25 -> ~0.752802        # barcode and Data Matrix recognition (libdmtx/pydmtx, zbar, gocr?)2803        for i in range(len(self._info['OpticalCodes'])):2804            self._info['OpticalCodes'][i]['Confidence'] = min(0.75*self._info['OpticalCodes'][i]['Quality']/10., 1.)2805        # Chessboard (opencv reference detector)2806        for i in range(len(self._info['Chessboard'])):2807            self._info['Chessboard'][i]['Confidence'] = len(self._info['Chessboard'][i]['Corners'])/49.2808        ## Geometric object (opencv hough line, circle, edges, corner, ...)2809        #if self._info['Geometry']:2810        #    self._info['Geometry'][0]['Confidence'] = 1. - self._info['Geometry'][0]['Edge_Ratio']2811    # Category:Unidentified people2812    def _cat_people_People(self):2813        #relevance = bool(self._info_filter['People'])2814        relevance = self._cat_people_Groups()[1]2815        return (u'Unidentified people', relevance)2816    # Category:Unidentified people2817    #def _cat_multi_People(self):2818    def _cat_face_People(self):2819        relevance = bool(self._info_filter['Faces'])2820        #relevance = bool(self._info_filter['People']) or relevance2821        return (u'Unidentified people', relevance)2822    # Category:Groups2823    def _cat_people_Groups(self):2824        result = self._info_filter['People']2825        relevance = (len(result) >= self._thrhld_group_size) and \2826                    (not self._cat_coloraverage_Graphics()[1])2827        return (u'Groups', relevance)2828    # Category:Groups2829    def _cat_face_Groups(self):2830        result = self._info_filter['Faces']2831        #if not (len(result) > 1): # 5 should give 0.75 and get reported2832        #    relevance = 0.2833        #else:2834        #    relevance = 1 - 1./(len(result)-1)2835        relevance = (len(result) >= self._thrhld_group_size)2836        return (u'Groups', relevance)2837    # Category:Faces2838    def _cat_face_Faces(self):2839        result = self._info_filter['Faces']2840        #return (u'Faces', ((len(result) == 1) and (result[0]['Coverage'] >= .50)))2841        return (u'Faces', ((len(result) == 1) and (result[0]['Coverage'] >= .40)))2842    # Category:Portraits2843    def _cat_face_Portraits(self):2844        result = self._info_filter['Faces']2845        #return (u'Portraits', ((len(result) == 1) and (result[0]['Coverage'] >= .25)))2846        return (u'Portraits', ((len(result) == 1) and (result[0]['Coverage'] >= .20)))2847    # Category:Barcode2848    def _cat_code_Barcode(self):2849        relevance = bool(self._info_filter['OpticalCodes'])2850        return (u'Barcode', relevance)2851    # Category:Chessboards2852    def _cat_chess_Chessboards(self):2853        relevance = bool(self._info_filter['Chessboard'])2854        return (u'Chessboards', relevance)2855    # Category:Books (literature) in PDF2856    def _cat_text_BooksPDF(self):2857        pdf    = u'PDF' in self._info_filter['Properties'][0]['Format']2858        result = self._info_filter['Text']2859        relevance = pdf and len(result) and \2860                    (self._info_filter['Properties'][0]['Pages'] >= 10) and \2861                    (result[0]['Size'] >= 5E4) and (result[0]['Lines'] >= 1000)2862        return (u'Books (literature) in PDF', relevance)2863    # Category:Animated GIF2864    # Category:Animated PNGâ2865    # (Category:Animated SVGâ)2866    def _cat_prop_Animated_general(self):2867        result = self._info_filter['Properties']2868        relevance = result and (result[0]['Pages'] > 1) and \2869                    (result[0]['Format'] in [u'GIF', u'PNG'])2870        return (u'Animated %s' % result[0]['Format'], relevance)2871    # Category:Human ears2872    def _cat_ears_HumanEars(self):2873        relevance = bool(self._info_filter['Ears'])2874        return (u'Human ears', relevance)2875    # Category:Human eyes2876    def _cat_eyes_HumanEyes(self):2877        relevance = bool(self._info_filter['Eyes'])2878        return (u'Human eyes', relevance)2879    # Category:Ogg sound files2880    def _cat_streams_OggSoundFiles(self):2881        result = self._info_filter['Streams']2882        return (u'Ogg sound files', ((len(result) == 1) and (u'audio/' in result[0]['Format'])))2883    # Category:Videos2884    def _cat_streams_Videos(self):2885        result = self._info_filter['Streams']2886        return (u'Videos', (True in [u'video/' in s['Format'] for s in result]))2887    # Category:Graphics2888    def _cat_coloraverage_Graphics(self):2889        result = self._info_filter['ColorAverage']2890        relevance = (result and result[0]['Gradient'] < 0.1) and \2891                    (0.005 < result[0]['Peaks'] < 0.1)  # black/white texts are below that2892                    #(result[0]['FFT_Peaks'] < 0.2)      # has to be tested first !!!2893        return (u'Graphics', bool(relevance))2894    # Category:MIDI files created with GNU LilyPond2895    def _cat_meta_MIDIfilescreatedwithGNULilyPond(self):2896        result = self._info_filter['Metadata']2897        relevance = len(result) and ('Desc' in result[0]) and \2898                    (u"Generated automatically by: GNU LilyPond" in2899                     result[0]['Desc'])2900        return (u'MIDI files created with GNU LilyPond', bool(relevance))2901    # Category:Bitmap_from_Inkscape (png)2902    def _cat_meta_BitmapfromInkscape(self):2903        result = self._info_filter['Metadata']2904        relevance = len(result) and ('Software' in result[0]) and \2905                    (u"www.inkscape.org" in2906                     result[0]['Software'].lower())2907        return (u'Bitmap from Inkscape', bool(relevance))2908    # Category:Created_with_Inkscape (svg)2909    def _cat_meta_CreatedwithInkscape(self):2910        result = self._info_filter['Metadata']2911        relevance = len(result) and ('Output_Extension' in result[0]) and \2912                    (u"org.inkscape.output.svg.inkscape" in2913                     result[0]['Output_Extension'].lower())2914        return (u'Created with Inkscape', bool(relevance))2915    # Category:Created_with_MATLAB (png)2916    # Category:Created_with_MATLAB (svg)2917    def _cat_meta_CreatedwithMATLAB(self):2918        result = self._info_filter['Metadata']2919        relevance = len(result) and \2920                    ((('Software' in result[0]) and \2921                    (u"MATLAB, The Mathworks, Inc." in 2922                     result[0]['Software'])) \2923                    or \2924                     (('Desc' in result[0]) and \2925                    (u"Matlab Figure" in 2926                     result[0]['Desc'])) )2927        return (u'Created with MATLAB', bool(relevance))2928    # Category:Created_with_PLOT2SVG (svg) [new]2929    def _cat_meta_CreatedwithPLOT2SVG(self):2930        result = self._info_filter['Metadata']2931        relevance = len(result) and ('Desc' in result[0]) and \2932                    (u"Converted by PLOT2SVG" in2933                     result[0]['Desc'])2934        return (u'Created with PLOT2SVG', bool(relevance))2935    # Category:Created_with_ImageMagick (jpg)2936    def _cat_meta_CreatedwithImageMagick(self):2937        result = self._info_filter['Metadata']2938        relevance = len(result) and ('Software' in result[0]) and \2939                    (u"ImageMagick" in2940                     result[0]['Software'])2941        return (u'Created with ImageMagick', bool(relevance))2942    # Category:Created_with_Adobe_ImageReady (png)2943    def _cat_meta_CreatedwithAdobeImageReady(self):2944        result = self._info_filter['Metadata']2945        relevance = len(result) and ('Software' in result[0]) and \2946                    (u"Adobe ImageReady" in2947                     result[0]['Software'])2948        return (u'Created with Adobe ImageReady', bool(relevance))2949    # Category:Created_with_Adobe_Photoshop (jpg)2950    def _cat_meta_CreatedwithAdobePhotoshop(self):2951        result = self._info_filter['Metadata']2952        relevance = len(result) and ('Software' in result[0]) and \2953                    (u"Adobe Photoshop" in2954                     result[0]['Software'])2955        return (u'Created with Adobe Photoshop', bool(relevance))2956    # Category:Created_with_Picasa (jpg)2957    def _cat_meta_CreatedwithPicasa(self):2958        result = self._info_filter['Metadata']2959        relevance = len(result) and ('Software' in result[0]) and \2960                    (u"Picasa" in2961                     result[0]['Software'])2962        return (u'Created with Picasa', bool(relevance))2963    # Category:Created_with_Qtpfsgui (jpg)2964    def _cat_meta_CreatedwithQtpfsgui(self):2965        result = self._info_filter['Metadata']2966        relevance = len(result) and ('Software' in result[0]) and \2967                    (u"Created with opensource tool Qtpfsgui" in2968                     result[0]['Software'])2969        return (u'Created with Qtpfsgui', bool(relevance))2970    # Category:Created_with_Autopano (jpg)2971    def _cat_meta_CreatedwithAutopano(self):2972        result = self._info_filter['Metadata']2973        relevance = len(result) and ('Software' in result[0]) and \2974                    (u"Autopano" in2975                     result[0]['Software'])2976        return (u'Created with Autopano', bool(relevance))2977    # Category:Created_with_Xmgrace (png)2978    def _cat_meta_CreatedwithXmgrace(self):2979        result = self._info_filter['Metadata']2980        relevance = len(result) and ('Software' in result[0]) and \2981                    (u"Grace" in2982                     result[0]['Software'])2983        return (u'Created with Xmgrace', bool(relevance))2984    # Category:Created_with_darktable (jpg)2985    def _cat_meta_Createdwithdarktable(self):2986        result = self._info_filter['Metadata']2987        relevance = len(result) and ('Software' in result[0]) and \2988                    (u"darktable" in2989                     result[0]['Software'].lower())2990        return (u'Created with darktable', bool(relevance))2991    # Category:Created_with_easyHDR (jpg)2992    def _cat_meta_CreatedwitheasyHDR(self):2993        result = self._info_filter['Metadata']2994        relevance = len(result) and \2995                    ((('Software' in result[0]) and \2996                    (u"easyHDR" in2997                     result[0]['Software'])) \2998                    or \2999                     (('Comment' in result[0]) and \3000                    (u"easyHDR" in3001                     result[0]['Comment'])) )3002        return (u'Created with easyHDR', bool(relevance))3003    # Category:Created_with_GIMP (jpg) [new]3004    def _cat_meta_CreatedwithGIMP(self):3005        result = self._info_filter['Metadata']3006        relevance = len(result) and \3007                    ((('Software' in result[0]) and \3008                    (u"GIMP" in3009                     result[0]['Software'])) \3010                    or \3011                     (('Comment' in result[0]) and \3012                    (u"Created with GIMP" in3013                     result[0]['Comment'])) )3014        return (u'Created with GIMP', bool(relevance))3015    # Category:Created_with_R (svg)3016    def _cat_meta_CreatedwithR(self):3017        result = self._info_filter['Metadata']3018        relevance = len(result) and ('Desc' in result[0]) and \3019                    (u"R SVG" in3020                     result[0]['Desc'])3021        return (u'Created with R', bool(relevance))3022    # Category:Created_with_VectorFieldPlot (svg)3023    def _cat_meta_CreatedwithVectorFieldPlot(self):3024        result = self._info_filter['Metadata']3025        relevance = len(result) and ('Desc' in result[0]) and \3026                    (u"created with VectorFieldPlot" in3027                     result[0]['Desc'])3028        return (u'Created with VectorFieldPlot', bool(relevance))3029    # Category:Created_with_Chemtool (svg)3030    def _cat_meta_CreatedwithChemtool(self):3031        result = self._info_filter['Metadata']3032        relevance = len(result) and ('Desc' in result[0]) and \3033                    (u"Created with Chemtool" in3034                     result[0]['Desc'])3035        return (u'Created with Chemtool', bool(relevance))3036    # Category:Created_with_GNU_Octave (svg)3037    def _cat_meta_CreatedwithGNUOctave(self):3038        result = self._info_filter['Metadata']3039        relevance = len(result) and ('Desc' in result[0]) and \3040                    (u"Produced by GNUPLOT" in3041                     result[0]['Desc'])3042        return (u'Created with GNU Octave', bool(relevance))3043    # Category:Created_with_GeoGebra (svg)3044    def _cat_meta_CreatedwithGeoGebra(self):3045        result = self._info_filter['Metadata']3046        relevance = len(result) and ('DescProducer' in result[0]) and \3047                    (u"geogebra.d.W" in3048                     result[0]['DescProducer']) #and \3049                    #(u"FreeHEP Graphics2D Driver" in3050                    # result[0]['DescCreator'])3051        return (u'Created with GeoGebra', bool(relevance))3052    # Category:Created_with_Stella (png)3053    def _cat_meta_CreatedwithStella(self):3054        result = self._info_filter['Metadata']3055        relevance = len(result) and ('Comment' in result[0]) and \3056                    (u"Created using Stella4D" in3057                     result[0]['Comment'])3058        return (u'Created with Stella', bool(relevance))3059    # Category:Created_with_PhotoStitch (jpg)3060    def _cat_meta_CreatedwithPhotoStitch(self):3061        result = self._info_filter['Metadata']3062        relevance = len(result) and ('Comment' in result[0]) and \3063                    (u"LEAD Technologies Inc." in3064                     result[0]['Comment'])3065        return (u'Created with PhotoStitch', bool(relevance))3066    # Category:Created_with_Scribus (pdf)3067    def _cat_meta_CreatedwithScribus(self):3068        result = self._info_filter['Metadata']3069        relevance = len(result) and ('Producer' in result[0]) and \3070                    (u"Scribus PDF Library" in3071                     result[0]['Producer'])3072        return (u'Created with Scribus', bool(relevance))3073    # Category:Created_with_OpenOffice.org (pdf)3074    def _cat_meta_CreatedwithOpenOfficeorg(self):3075        result = self._info_filter['Metadata']3076        relevance = len(result) and ('Producer' in result[0]) and \3077                    (u"OpenOffice.org" in3078                     result[0]['Producer'])3079        return (u'Created with OpenOffice.org', bool(relevance))3080    # Category:Created_with_Tux_Paint (pdf)3081    def _cat_meta_CreatedwithTuxPaint(self):3082        result = self._info_filter['Metadata']3083        relevance = len(result) and ('Software' in result[0]) and \3084                    (u"Tux Paint" in3085                     result[0]['Software'])3086        return (u'Created with Tux Paint', bool(relevance))3087    # Category:Created_with_Microsoft_Image_Composite_Editor (jpg)3088    def _cat_meta_CreatedwithMicrosoftImageCompositeEditor(self):3089        result = self._info_filter['Metadata']3090        relevance = len(result) and ('Software' in result[0]) and \3091                    (u"Microsoft ICE" in3092                     result[0]['Software'])3093        return (u'Created with Microsoft Image Composite Editor', bool(relevance))3094# TODO: make '_cat_meta_general(self)'3095    # Category:Categorized by DrTrigonBot3096    def _addcat_BOT(self):3097        # - ALWAYS -3098        return (u"Categorized by DrTrigonBot", True)3099    # (Category:BMP)3100    # (Category:PNG)3101    # (Category:JPEG)3102    # Category:TIFF files3103    # (may be more image formats/extensions according to PIL, e.g. SVG, ...)3104    # Category:PDF files3105    def _addcat_prop_general(self):3106        fmt = self._info_filter['Properties'][0]['Format']3107        if   u'TIFF' in fmt:3108            fmt = u'TIFF images'3109        #elif u'SVG' in fmt:3110        #    # additional to PIL (rsvg, ...)3111        #    # should be added as template instead of category (!)3112        #    fmt = u''3113        elif u'PDF' in fmt:3114            # additional to PIL (...)3115            fmt = u'PDF files'3116        else:3117            # disable ALL categorization, except the listed exceptions above3118            # (BMP, PNG, JPEG, OGG; no general catgeory available, ...)3119            fmt = u''3120        # PIL: http://www.pythonware.com/library/pil/handbook/index.htm3121        return (fmt, bool(fmt))3122#    # TODO: add templates (conditional/additional like 'addcat')3123#    # Category:SVG - Category:Valid SVGâ - Category:Invalid SVG3124#    # {{ValidSVG}} - {{InvalidSVG}}3125#    def _addtempl_prop_SVN(self):3126#        fmt = self._info_filter['Properties'][0]['Format']3127#        d   = { u'Valid SVG':   u'{{ValidSVG}}',3128#                u'Invalid SVG': u'{{InvalidSVG}}', }3129#        fmt = d.get(fmt, u'')3130#3131#        return (fmt, bool(fmt))3132#    # Category:Unidentified people3133#    def _guess_Classify_People(self):3134#        pass3135#    # Category:Unidentified maps3136#    def _guess_Classify_Maps(self):3137#        pass3138#    # Category:Unidentified flags3139#    def _guess_Classify_Flags(self):3140#        pass3141#    # Category:Unidentified plants3142#    def _guess_Classify_Plants(self):3143#        pass3144#    # Category:Unidentified coats of arms3145#    def _guess_Classify_CoatsOfArms(self):3146#        pass3147#    # Category:Unidentified buildings3148#    def _guess_Classify_Buildings(self):3149#        pass3150#    # Category:Unidentified trains3151#    def _guess_Classify_Trains(self):3152#        pass3153#    # Category:Unidentified automobiles3154#    def _guess_Classify_Automobiles(self):3155#        pass3156#    # Category:Unidentified buses3157#    def _guess_Classify_Buses(self):3158#        pass3159    # Category:Human legs3160    def _guess_legs_HumanLegs(self):3161        result = self._info_filter['Legs']3162 3163        return (u'Human legs', ((len(result) == 1) and (result[0]['Coverage'] >= .40)))3164    # Category:Human torsos3165    def _guess_torsos_HumanTorsos(self):3166        result = self._info_filter['Torsos']3167 3168        return (u'Human torsos', ((len(result) == 1) and (result[0]['Coverage'] >= .40)))3169    # Category:Automobiles3170    def _guess_automobiles_Automobiles(self):3171        result = self._info_filter['Automobiles']3172 3173        return (u'Automobiles', ((len(result) == 1) and (result[0]['Coverage'] >= .40)))3174    ## Category:Hands3175    #def _guess_hands_Hands(self):3176    #    result = self._info_filter['Hands']3177    #3178    #    return (u'Hands', ((len(result) == 1) and (result[0]['Coverage'] >= .50)))3179    # Category:Black     (  0,   0,   0)3180    # Category:Blueâ      (  0,   0, 255)3181    # Category:Brown     (165,  42,  42)3182    # Category:Green     (  0, 255,   0)3183    # Category:Orange    (255, 165,   0)3184    # Category:Pinkâ      (255, 192, 203)3185    # Category:Purple    (160,  32, 240)3186    # Category:Redâ       (255,   0,   0)3187    # Category:Turquoise ( 64, 224, 208)3188    # Category:Whiteâ     (255, 255, 255)3189    # Category:Yellow    (255, 255,   0)3190    # http://www.farb-tabelle.de/en/table-of-color.htm3191    #def _collectColor(self):3192    #def _cat_color_Black(self):3193    #    info = self._info_filter['ColorRegions']3194    #    for item in info:3195    #        if (u'Black' == item[u'Color']):3196    #            return (u'Black', True)3197    #    return (u'Black', False)3198    def __cat_color_general(self, col):3199        info = self._info_filter['ColorRegions']3200        for item in info:3201            if (col == item[u'Color']):3202                return (col, True)3203        return (col, False)3204    _cat_color_Black     = lambda self: self.__cat_color_general(u'Black')3205    _cat_color_Blue      = lambda self: self.__cat_color_general(u'Blue')3206    _cat_color_Brown     = lambda self: self.__cat_color_general(u'Brown')3207    _cat_color_Green     = lambda self: self.__cat_color_general(u'Green')3208    _cat_color_Orange    = lambda self: self.__cat_color_general(u'Orange')3209    _cat_color_Pink      = lambda self: self.__cat_color_general(u'Pink')3210    _cat_color_Purple    = lambda self: self.__cat_color_general(u'Purple')3211    _cat_color_Red       = lambda self: self.__cat_color_general(u'Red')3212    _cat_color_Turquoise = lambda self: self.__cat_color_general(u'Turquoise')3213    _cat_color_White     = lambda self: self.__cat_color_general(u'White')3214    _cat_color_Yellow    = lambda self: self.__cat_color_general(u'Yellow')3215# all classification and categorization methods and definitions - SVM variation3216#  use 'pyml' SVM (libsvm) classifier3217#  may be 'scikit-learn' or 'opencv' (svm, a.o.) could be of some use too3218class CatImages_SVM(CatImages_Default):3219    trained_cat = [u'Human_ears', u'Male faces']3220    # dummy: deactivated3221    def classifyFeatures(self):3222        for key in self._info:3223            for i in range(len(self._info[key])):3224                self._info[key][i]['Confidence'] = 1.03225    3226    # (all trained categories)3227    # http://scipy-lectures.github.com/advanced/scikit-learn/index.html3228    # http://mlpy.sourceforge.net/docs/3.5/index.html3229    # http://docs.opencv.org/modules/ml/doc/ml.html3230    def _cat_multi_generic(self):3231        # IT LOOKS LIKE (MAY BE) scikit-learn IS BETTER AND HAS MORE OPTIONS THAN pyml ... ?!!!3232        # create classifier feature set3233        # !!!currently number of detected features is used only -> lots of room for improvements!!!3234        features = []3235        for key in sorted(self._info):3236            #print key, len(self._info[key]), self._info[key]3237            features.append( len(self._info[key]) )3238        features = np.array(features)3239        linear_svm = mlpy.LibSvm().load_model('cache/test.csf')3240        yp  = linear_svm.pred(features)3241        cat = self.trained_cat[int(yp)-1]3242        #print linear_svm.labels()3243        # confidence of match?3244 3245        return (cat, True)3246# Image by content categorization derived from 'checkimages.py'.3247class CatImagesBot(checkimages.checkImagesBot, CatImages_Default):3248#class CatImagesBot(checkimages.checkImagesBot, CatImages_SVM):3249#    def __init__(self, site, logFulNumber = 25000, sendemailActive = False,3250#                 duplicatesReport = False, logFullError = True): pass3251#    def setParameters(self, imageName): pass3252    # or may be '__init__' ... ???3253    def load_licenses(self):3254        #pywikibot.output(u'\n\t...Listing the procedures available...\n')3255        pywikibot.output(u'\n\t...Listing the procedures used...\n')3256        3257        self._funcs = {'filter': [], 'cat': [], 'addcat': [], 'guess': []}3258        for item in dir(self):3259            s = item.split('_')3260            if (len(s) < 3) or (s[1] not in self._funcs) or (s[2] in self.ignore):3261                continue3262            pywikibot.output( item )3263            self._funcs[s[1]].append( item )3264        self.tmpl_available_spec = tmpl_available_spec3265        gen = pagegenerators.PrefixingPageGenerator(prefix = u'Template:FileContentsByBot/')3266        buf = []3267        for item in gen:3268            item = item.title()3269            if (item[-4:] == "/doc"):           # all docs3270                continue3271            item = os.path.split(item)[1]3272            if (item[0].lower() == item[0]):    # e.g. 'generic'3273                continue3274            buf.append( item )3275        if buf:3276            self.tmpl_available_spec = buf3277            pywikibot.output( u'\n\t...Following specialized templates found, check them since they are used now...\n' )3278            pywikibot.output( u'tmpl_available_spec = [ %s ]\n' % u", ".join(buf) )3279        return []3280    def downloadImage(self):3281        #print self.image_path3282        pywikibot.output(u'Processing media %s ...' % self.image.title(asLink=True))3283        image_filename  = os.path.split(self.image.fileUrl())[-1]3284        self.image_path = urllib2.quote(os.path.join(scriptdir, ('cache/' + image_filename[-128:])))3285        self._wikidata = self.image._latestInfo # all info wikimedia got from content (mime, sha1, ...)3286        #print self._wikidata3287        #print self._wikidata['mime']3288        #print self._wikidata['sha1']3289        #print self._wikidata['metadata']3290        #for item in self._wikidata['metadata']:3291        #    print item['name'], item['value']3292        if not os.path.exists(self.image_path):3293            pywikibot.get_throttle()3294            f_url, data = self.site.getUrl(self.image.fileUrl(), no_hostname=True, 3295                                           back_response=True)3296            # needed patch for 'getUrl' applied upstream in r104413297            # (allows to re-read from back_response)3298            data = f_url.read()3299            del f_url   # free some memory (no need to keep a copy...)3300            f = open(self.image_path, 'wb')3301            f.write( data )3302            f.close()3303    # LOOK ALSO AT: checkimages.CatImagesBot.checkStep3304    # (and category scripts/bots too...)3305    def checkStep(self):3306        self.thrshld = self._thrshld_default3307        self._info         = {}     # used for LOG/DEBUG OUTPUT ONLY3308        self._info_filter  = {}     # used for CATEGORIZATION3309        self._result_check = []3310        self._result_add   = []3311        self._result_guess = []3312        # flush internal buffers3313        for attr in ['_buffer_EXIF', '_buffer_FFMPEG', '_buffer_Geometry']:#, '_content_text']:3314            if hasattr(self, attr):3315                delattr(self, attr)3316        # gather all features (information) related to current image3317        self.gatherFeatures()3318        # classification of detected features (should use RTrees, KNearest, Boost, SVM, MLP, NBayes, ...)3319        # ??? (may be do this in '_cat_...()' or '_filter_...()' ?!?...)3320        # http://opencv.itseez.com/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.html3321        # http://stackoverflow.com/questions/8687885/python-opencv-svm-implementation3322        # https://code.ros.org/trac/opencv/browser/trunk/opencv/samples/python2/letter_recog.py?rev=64803323        self.classifyFeatures()      # assign confidences3324        # replace/improve this with RTrees, KNearest, Boost, SVM, MLP, NBayes, ...3325        # information template: use filter to select from gathered features3326        #                       the ones that get reported3327        self._info_filter = {}3328        for item in self._funcs['filter']:3329            self._info_filter.update( getattr(self, item)() )3330        # categorization: use explicit searches for classification (rel = ?)3331        for item in self._funcs['cat']:3332            (cat, rel) = getattr(self, item)()3333            #print cat, result, len(result)3334            if rel:3335                self._result_check.append( cat )3336        self._result_check = list(set(self._result_check))3337        # categorization: conditional (only if the ones before are present)3338        # (does not trigger report to page)3339        for item in self._funcs['addcat']:3340            (cat, rel) = getattr(self, item)()3341            #print cat, result, len(result)3342            if rel:3343                self._result_add.append( cat )3344        self._result_add = list(set(self._result_add))3345        # categorization: use guesses for unreliable classification (rel = 0.1)3346        if not useGuesses:3347            return self._result_check3348        for item in self._funcs['guess']:3349            (cat, rel) = getattr(self, item)()3350            #print cat, result, len(result)3351            if rel:3352                self._result_guess.append( cat )3353        return self._result_check3354    def tag_image(self):3355        self.clean_cache()3356        #if not self._existInformation(self._info_filter):  # information available?3357        if not (self._result_check + self._result_guess):   # category available?3358            return False3359        pywikibot.get_throttle()3360        content = self.image.get()3361        # check the type of template used on page; Information, Artwork, ...3362        for temp in [u"Information", u"Artwork"]:3363            pos = content.find(u'{{%s' % temp) + 23364            if pos > 1:3365                break3366        if pos > 1:3367            # cosmetic changes: format the page well to have '\n\n' after the template3368            diff = content[:(pos-2)].count(u'{{') - content[:(pos-2)].count(u'}}')3369            while (content[:pos].count(u'{{') - content[:pos].count(u'}}')) != diff:3370                pos = content.find(u'}}', pos) + 23371            if content[pos:(pos+2)] != (u"\n"*2):3372                content = content[:pos] + (u"\n"*2) + content[pos:].lstrip()3373        else:3374            pywikibot.warning(u'Page layout issue; Information template could '3375                              u'not be found and thus the data not appended!')3376            return False3377        # append template and fill it with data3378        content = self._append_to_template(content, temp, tmpl_FileContentsByBot)3379        for i, key in enumerate(self._info_filter):3380            item = self._info_filter[key]3381            info = self._make_infoblock(key, item)3382            if info:3383                content = self._append_to_template(content, u"FileContentsByBot", info)3384        # append categories3385        tags = set([])3386        for i, cat in enumerate(list(set(self._result_check + self._result_add))):3387            tags.add( u"[[:Category:%s]]" % cat )3388            content = pywikibot.replaceCategoryLinks(content, [cat], site=self.site, addOnly=True)3389        # cleanup double categories, remove obsolete ones and add templates3390        content = pywikibot.replaceCategoryLinks( content, 3391                list(set(pywikibot.getCategoryLinks(content, site=self.site))),3392                site=self.site )3393        content = self._remove_category_or_template(content, u"Uncategorized")  # template3394        content = self._add_template(content, u"Check categories|year={{subst:#time:Y}}|month={{subst:#time:F}}|day={{subst:#time:j}}|category=[[Category:Categorized by DrTrigonBot]]", top=True)3395        # add category guesses3396        for i, cat in enumerate(self._result_guess):3397            content += u"\n<!--DrTrigonBot-guess-- [[Category:%s]] -->" % cat3398        # verbosely output info about changes and apply them3399        pywikibot.output(u"--- " * 20)3400        pywikibot.output(content)3401        pywikibot.output(u"--- " * 20)3402        pywikibot.put_throttle()3403        self.image.put( content, comment="bot automatic categorization; adding %s" % u", ".join(tags),3404                                 botflag=False )3405# TODO: (work-a-round if https://bugzilla.wikimedia.org/show_bug.cgi?id=6421 not solved)3406#        if hasattr(self, '_content_text'):3407#            textpage = pywikibot.Page(self.site, os.path.join(self.image.title(), u'Contents/Text'))3408#            textpage.put( self._content_text, comment="bot adding content from %s" % textpage.title(asLink=True),3409#                                              botflag=False )3410        return True3411    def log_output(self):3412        # ColorRegions always applies here since there is at least 1 (THE average) color...3413        ignore = ['Properties', 'Metadata', 'ColorAverage', 'ColorRegions', 'Geometry']3414        #if not self._existInformation(self._info):  # information available?3415        # information available? AND/OR category available?3416        if not (self._existInformation(self._info, ignore = ignore) or self._result_check):3417            return u""3418        ret  = []3419        ret.append( u"" )3420        ret.append( u"== [[:%s]] ==" % self.image.title() )3421        ret.append( u'{|' )3422        ret.append( u'|<div style="position:relative;">' )3423        ret.append( u"[[%s|200px]]" % self.image.title() )3424        ret.append( self._make_markerblock(self._info[u'Faces'], 200.,3425                                           structure=['Position', 'Eyes', 'Mouth', 'Nose']) )3426        ret.append( self._make_markerblock(self._info[u'People'], 200.,3427                                           line='dashed') )3428        ret.append( u"</div>" )3429        ret.append( u'|<div style="position:relative;">' )3430        ret.append( u"[[%s|200px]]" % self.image.title() )3431        ret.append( self._make_markerblock(self._info[u'ColorRegions'], 200.) )3432        ret.append( self._make_markerblock(self._info[u'OpticalCodes'], 200.,3433                                           line='dashed') )3434        ret.append( u"</div>" )3435        ret.append( u'|<div style="position:relative;">' )3436        ret.append( u"[[%s|200px]]" % self.image.title() )3437        ret.append( self._make_markerblock(self._info[u'Ears'], 200.) )3438        ret.append( self._make_markerblock(self._info[u'Eyes'], 200.) )3439        ret.append( self._make_markerblock(self._info[u'Legs'], 200.,3440                                           line='dashed') )3441        ret.append( self._make_markerblock(self._info[u'Torsos'], 200.,3442                                           line='dashed') )3443        ret.append( self._make_markerblock(self._info[u'Automobiles'], 200.,3444                                           line='dashed') )3445        #ret.append( self._make_markerblock(self._info[u'Hands'], 200.,3446        #                                   line='dashed') )3447        ret.append( u"</div>" )3448        ret.append( u'|}' )3449        color = {True: "rgb(0,255,0)", False: "rgb(255,0,0)"}[bool(self._result_check + self._result_guess)]3450        ret.append( u"<div style='background:%s'>'''automatic categorization''': %s</div>" % (color, u", ".join(list(set(self._result_check + self._result_add)))) )3451        buf = []3452        for i, key in enumerate(self._info):3453            item = self._info[key]3454            info = self._make_infoblock(key, item, [])3455            if info:3456                buf.append( info )3457        ret.append( tmpl_FileContentsByBot[3:] + u"\n" + u"\n".join( buf ) + u"\n}}" )3458        return u"\n".join( ret )3459    def clean_cache(self):3460        if os.path.exists(self.image_path):3461            os.remove( self.image_path )3462        #if os.path.exists(self.image_path_JPEG):3463        #    os.remove( self.image_path_JPEG )3464        ##image_path_new = self.image_path_JPEG.replace(u"cache/", u"cache/0_DETECTED_")3465        ##if os.path.exists(image_path_new):3466        ##    os.remove( image_path_new )3467    # LOOK ALSO AT: checkimages.CatImagesBot.report3468    def report(self):3469        tagged = self.tag_image()3470        logged = self.log_output()3471        return (tagged, logged)3472    def _make_infoblock(self, cat, res, tmpl_available=None):3473        if not res:3474            return u''3475        if (tmpl_available == None):3476            tmpl_available = self.tmpl_available_spec3477        generic = (cat not in tmpl_available)3478        titles = res[0].keys()3479        if not titles:3480            return u''3481        result = []3482        #result.append( u'{{(!}}style="background:%s;"' % {True: 'green', False: 'red'}[report] )3483        if generic:3484            result.append( u"{{FileContentsByBot/generic|name=%s|" % cat )3485            buf = dict([ (key, []) for key in titles ])3486            for item in res:3487                for key in titles:3488                    buf[key].append( self._output_format(item[key]) )3489            for key in titles:3490                result.append( u"  {{FileContentsByBot/generic|name=%s|value=%s}}" % (key, u"; ".join(buf[key])) )3491        else:3492            result.append( u"{{FileContentsByBot/%s|" % cat )3493            for item in res:3494                result.append( u"  {{FileContentsByBot/%s" % cat )3495                for key in titles:3496                    if item[key]:   # (work-a-round for empty 'Eyes')3497                        result.append( self._output_format_flatten(key, item[key]) )3498                result.append( u"  }}" )3499        result.append( u"}}" )3500        return u"\n".join( result )3501    def _output_format(self, value):3502        if (type(value) == type(float())):3503            # round/strip floats3504            return "%.3f" % value3505        else:3506            # output string representation of variable3507            return str(value)3508    def _output_format_flatten(self, key, value):3509        # flatten structured varible recursively3510        if (type(value) == type(tuple())) or (type(value) == type(list())):3511            buf = []3512            for i, t in enumerate(value):3513                buf.append( self._output_format_flatten(key + (u"-%02i" % i), t) )3514            return u"\n".join( buf )3515        else:3516            # end of recursion3517            return u"  | %s = %s" % (key, self._output_format(value))3518    def _make_markerblock(self, res, size, structure=['Position'], line='solid'):3519        # same as in '_detect_Faces'3520        colors = [ (0,0,255),3521            (0,128,255),3522            (0,255,255),3523            (0,255,0),3524            (255,128,0),3525            (255,255,0),3526            (255,0,0),3527            (255,0,255) ]3528        result = []3529        for i, r in enumerate(res):3530            if ('RGB' in r):3531                color = list(np.array((255,255,255))-np.array(r['RGBref']))3532            else:3533                color = list(colors[i%8])3534            color.reverse()3535            color = u"%02x%02x%02x" % tuple(color)3536            3537            #scale = r['size'][0]/size3538            scale = self.image_size[0]/size3539            f     = list(np.array(r[structure[0]])/scale)3540            3541            result.append( u'<div class="%s-marker" style="position:absolute; left:%ipx; top:%ipx; width:%ipx; height:%ipx; border:2px %s #%s;"></div>' % tuple([structure[0].lower()] + f + [line, color]) )3542            for ei in range(len(structure)-1):3543                data = r[structure[ei+1]]3544                if data and (not hasattr(data[0], '__iter__')):    # Mouth and Nose are not lists3545                    data = [ r[structure[ei+1]] ]3546                for e in data:3547                    e = list(np.array(e)/scale)3548    3549                    result.append( u'<div class="%s-marker" style="position:absolute; left:%ipx; top:%ipx; width:%ipx; height:%ipx; border:2px solid #%s;"></div>' % tuple([structure[ei+1].lower()] + e + [color]) )3550        return u"\n".join( result )3551    # place into 'textlib' (or else e.g. 'catlib'/'templib'...)3552    def _remove_category_or_template(self, text, name):3553        text = re.sub(u"[\{\[]{2}%s.*?[\}\]]{2}\n?" % name, u"", text)3554        return text3555    # place into 'textlib'3556    def _add_template(self, text, name, params={}, top=False, raw=False):3557        if top:3558            buf = [(u"{{%s}}" % name), text]3559        else:3560            if raw:3561                buf = [text, name]3562            else:3563                buf = [text, (u"{{%s}}" % name)]3564        return u"\n".join( buf )3565    # place into 'textlib' (or else e.g. 'catlib'/'templib'...)3566    def _append_to_template(self, text, name, append):3567        # mask/search template to append to3568        pattern  = re.compile(u"(\{\{%s.*?\n)(\s*\}\}\n{2})" % name, flags=re.S)3569        template = pattern.search(text).groups()3570        # append to template3571        template = u"".join( [template[0], append, u"\n", template[1]] )3572        # apply changes3573        text = pattern.sub(template, text)3574        return text3575    # gather data from all information interfaces3576    def gatherFeatures(self):3577        # split detection and extraction according to file types; _JpegFile, ...3578        with GenericFile(self.image_path) as gf:3579            gf.image = self.image           # patch for _SvgFile needing url3580            for func in ['getProperties', 'getFeatures']:3581                result = getattr(gf, func)()3582                self._info.update(result)3583            self.image_size = gf.image_size3584    def _existInformation(self, info, ignore = ['Properties', 'Metadata', 'ColorAverage']):3585        result = []3586        for item in info:3587            if item in ignore:3588                continue3589            if info[item]:3590                result.append( item )3591        return result3592    def _filter_Properties(self):3593        # >>> never drop <<<3594        result = self._info['Properties']3595        return {'Properties': result}3596    def _filter_Metadata(self):3597        # >>> never drop <<<3598        result = self._info['Metadata']3599        return {'Metadata': result}3600    def _filter_Faces(self):3601        result = self._info['Faces']3602        if (len(result) < self._thrhld_group_size):3603            buf = []3604            for item in self._info['Faces']:3605                # >>> drop if below thrshld <<<3606                if (item['Confidence'] >= self.thrshld):3607                    buf.append( item )3608            result = buf3609        return {'Faces': result}3610    def _filter_People(self):3611        result = self._info['People']3612        if (len(result) < self._thrhld_group_size):3613            buf = []3614            for item in self._info['People']:3615                # >>> drop if below thrshld <<<3616                if (item['Confidence'] >= self.thrshld):3617                    buf.append( item )3618            result = buf3619        return {'People': result}3620    def _filter_ColorRegions(self):3621        #result = {}3622        result = []3623        for item in self._info['ColorRegions']:3624            ## >>> drop wrost ones... (ignore all below 0.2) <<<3625            #if (result.get(item['Color'], {'Confidence': 0.2})['Confidence'] < item['Confidence']):3626            #    result[item['Color']] = item3627            # >>> drop if below thrshld <<<3628            if (item['Confidence'] >= self.thrshld):3629                result.append( item )3630        #return {'ColorRegions': [result[item] for item in result]}3631        return {'ColorRegions': result}3632    def _filter_ColorAverage(self):3633        # >>> never drop <<<3634        result = self._info['ColorAverage']3635        return {'ColorAverage': result}3636    def _filter_OpticalCodes(self):3637        # use all, since detection should be very reliable3638        #result = self._info['OpticalCodes']3639        result = []3640        for item in self._info['OpticalCodes']:3641            # >>> drop if below thrshld <<<3642            if (item['Confidence'] >= self.thrshld):3643                result.append( item )3644        return {'OpticalCodes': result}3645    def _filter_Chessboard(self):3646        # use all, since detection should be very reliable3647        result = self._info['Chessboard']3648        return {'Chessboard': result}3649    def _filter_Text(self):3650        # use all, since detection should be very reliable3651        result = self._info['Text']3652        return {'Text': result}3653    def _filter_Legs(self):3654        result = []3655        for item in self._info['Legs']:3656            # >>> drop if below thrshld <<<3657            if (item['Confidence'] >= self.thrshld):3658                result.append( item )3659        return {'Legs': result}3660    def _filter_Torsos(self):3661        result = []3662        for item in self._info['Torsos']:3663            # >>> drop if below thrshld <<<3664            if (item['Confidence'] >= self.thrshld):3665                result.append( item )3666        return {'Torsos': result}3667    def _filter_Ears(self):3668        result = []3669        for item in self._info['Ears']:3670            # >>> drop if below thrshld <<<3671            if (item['Confidence'] >= self.thrshld):3672                result.append( item )3673        return {'Ears': result}3674    def _filter_Eyes(self):3675        result = []3676        for item in self._info['Eyes']:3677            # >>> drop if below thrshld <<<3678            if (item['Confidence'] >= self.thrshld):3679                result.append( item )3680        return {'Eyes': result}3681    def _filter_Automobiles(self):3682        result = []3683        for item in self._info['Automobiles']:3684            # >>> drop if below thrshld <<<3685            if (item['Confidence'] >= self.thrshld):3686                result.append( item )3687        return {'Automobiles': result}3688    def _filter_Streams(self):3689        # use all, (should be reliable)3690        result = self._info['Streams']3691        return {'Streams': result}3692#    def _filter_Audio(self):3693#        # use all, (should be reliable)3694#        result = self._info['Audio']3695#        return {'Audio': result}3696    #def _filter_Geometry(self):3697    #    result = []3698    #    for item in self._info['Geometry']:3699    #        # >>> drop if below thrshld <<<3700    #        if (item['Confidence'] >= self.thrshld):3701    #            result.append( item )3702    #    return {'Geometry': result}3703    #def _filter_Hands(self):3704    #    result = []3705    #    for item in self._info['Hands']:3706    #        # >>> drop if below thrshld <<<3707    #        if (item['Confidence'] >= self.thrshld):3708    #            result.append( item )3709    #    return {'Hands': result}3710#    def _filter_Classify(self):3711#        from operator import itemgetter3712#        result = sorted(self._info['Classify'][0].items(), key=itemgetter(1))3713#        result.reverse()3714#        pywikibot.output(u' Best: %s' % result[:3] )3715#        pywikibot.output(u'Worst: %s' % result[-3:] )3716#3717#        # >>> dummy: drop all (not reliable yet since untrained) <<<3718#        return {'Classify': []}3719def main():3720    """ Main function """3721    global useGuesses3722    # Command line configurable parameters3723    limit = 150 # How many images to check?3724#    untagged = False # Use the untagged generator3725    sendemailActive = False # Use the send-email3726    train = False3727    generator = None3728    # default3729    if len(sys.argv) < 2:3730        sys.argv += ['-cat']3731    # debug:    'python catimages.py -debug'3732    # run/test: 'python catimages.py [-start:File:abc]'3733    sys.argv += ['-family:commons', '-lang:commons']3734    #sys.argv += ['-noguesses']3735    # try to resume last run and continue3736    if os.path.exists( os.path.join(scriptdir, 'cache/catimages_start') ):3737        shutil.copy2(os.path.join(scriptdir, 'cache/catimages_start'), os.path.join(scriptdir, 'cache/catimages_start.bak'))3738        posfile = open(os.path.join(scriptdir, 'cache/catimages_start'), "r")3739        firstPageTitle = posfile.read().decode('utf-8')3740        posfile.close()3741    else:3742        firstPageTitle = None3743    # Here below there are the parameters.3744    for arg in pywikibot.handleArgs():3745        if arg.startswith('-limit'):3746            if len(arg) == 7:3747                limit = int(pywikibot.input(u'How many files do you want to check?'))3748            else:3749                limit = int(arg[7:])3750#        elif arg == '-sendemail':3751#            sendemailActive = True3752        elif arg.startswith('-start'):3753            if len(arg) == 6:3754                firstPageTitle = None3755            elif len(arg) > 6:3756                firstPageTitle = arg[7:]3757            #firstPageTitle = firstPageTitle.split(":")[1:]3758            #generator = pywikibot.getSite().allpages(start=firstPageTitle, namespace=6)3759        elif arg.startswith('-cat'):3760            if len(arg) == 4:3761                catName = u'Media_needing_categories'3762            elif len(arg) > 4:3763                catName = str(arg[5:])3764            catSelected = catlib.Category(pywikibot.getSite(), 'Category:%s' % catName)3765            generator = pagegenerators.CategorizedPageGenerator(catSelected, recurse = True)3766#        elif arg.startswith('-untagged'):3767#            untagged = True3768#            if len(arg) == 9:3769#                projectUntagged = str(pywikibot.input(u'In which project should I work?'))3770#            elif len(arg) > 9:3771#                projectUntagged = str(arg[10:])3772        elif arg == '-noguesses':3773            useGuesses = False3774        elif arg.startswith('-single'):3775            if len(arg) > 7:3776                pageName = unicode(arg[8:])3777            if 'File:' not in pageName:3778                pageName = 'File:%s' % pageName3779            generator = [ pywikibot.Page(pywikibot.getSite(), pageName) ]3780            firstPageTitle = None3781        elif arg.startswith('-train'):3782            train = True3783            generator = None3784    # Understand if the generator is present or not.3785    if not generator:3786        pywikibot.output(u'no generator defined... EXIT.')3787        sys.exit()3788            3789    # Define the site.3790    site = pywikibot.getSite()3791    # Block of text to translate the parameters set above.3792    image_old_namespace = u"%s:" % site.image_namespace()3793    image_namespace = u"File:"3794    # A little block-statement to ensure that the bot will not start with en-parameters3795    if site.lang not in project_inserted:3796        pywikibot.output(u"Your project is not supported by this script. You have to edit the script and add it!")3797        return3798    # Defing the Main Class.3799    Bot = CatImagesBot(site, sendemailActive = sendemailActive,3800                       duplicatesReport = False, logFullError = False)3801#    # Untagged is True? Let's take that generator3802#    if untagged == True:3803#        generator =  Bot.untaggedGenerator(projectUntagged, limit)3804    # Ok, We (should) have a generator, so let's go on.3805    # Take the additional settings for the Project3806    Bot.takesettings()3807    # do classifier training on good (homgenous) commons categories3808    if train:3809        trainbot(generator, Bot, image_old_namespace, image_namespace)3810        return3811    # Not the main, but the most important loop.3812    outresult = []3813    for image in generator:3814        if firstPageTitle:3815            if (image.title() == firstPageTitle):3816                pywikibot.output( u"found last page '%s' ..." % image.title() )3817                firstPageTitle = None3818                continue3819            else:3820                #pywikibot.output( u"skipping page '%s' ..." % image.title() )3821                continue3822        # recover from hard crash in the run before, thus skip one more page3823        if os.path.exists( os.path.join(scriptdir, 'cache/catimages_recovery') ):3824            pywikibot.output( u"trying to recover from hard crash, skipping page '%s' ..." % image.title() )3825            disable_recovery()3826            # in case the next one has a hard-crash too...3827            posfile = open(os.path.join(scriptdir, 'cache/catimages_start'), "w")3828            posfile.write( image.title().encode('utf-8') )3829            posfile.close()3830            continue3831        #comment = None # useless, also this, let it here for further developments3832        try:3833            imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here)3834        except IndexError:# Namespace image not found, that's not an image! Let's skip...3835            try:3836                imageName = image.title().split(image_old_namespace)[1]3837            except IndexError:3838                pywikibot.output(u"%s is not a file, skipping..." % image.title())3839                continue3840        Bot.setParameters(imageName) # Setting the image for the main class3841        try:3842            Bot.downloadImage()3843        except IOError, err:3844            # skip if download not possible3845            pywikibot.warning(u"%s, skipped..." % err)3846            continue3847        except:3848            # skip on any unexpected error, but report it3849            pywikibot.exception(tb=True)3850            pywikibot.error(u"was not able to process page %s !!!\n" %\3851                            image.title(asLink=True))3852            continue3853        resultCheck = Bot.checkStep()3854        tagged = False3855        try:3856            (tagged, ret) = Bot.report()3857            if ret:3858                outresult.append( ret )3859        except AttributeError:3860            pywikibot.exception(tb=True)3861            pywikibot.error(u"was not able to process page %s !!!\n" %\3862                            image.title(asLink=True))3863        limit += -13864        if not tagged:3865            posfile = open(os.path.join(scriptdir, 'cache/catimages_start'), "w")3866            posfile.write( image.title().encode('utf-8') )3867            posfile.close()3868        if limit <= 0:3869            break3870        if resultCheck:3871            continue3872    if outresult:3873        outpage = pywikibot.Page(site, u"User:DrTrigon/User:DrTrigonBot/logging")3874        #outresult = [ outpage.get() ] + outresult   # append to page3875        outresult = u"\n".join(outresult)3876        pywikibot.output(u"Size of log page data: %s byte(s)" % len(outresult))3877        # work-a-round: write pages mutliple times if content is too large in order to circumvent3878        # "HTTPError: 504 Gateway Time-out" leading finally to "MaxTriesExceededError"3879        # (why is that...?!?? FIX THIS in the framework core e.g. 'postForm'!)3880        tmp = outresult3881        while tmp:3882            i = np.array([m.start() for m in re.finditer(u"\n\n==", tmp)]3883                       + [len(tmp)])3884            #pos = i[ np.where((i - 2048*1024) <= 0)[0][-1] ] # $wgMaxArticleSize3885            pos = i[ np.where((i - 500*1024) <= 0)[0][-1] ]3886            pywikibot.output(u"Size of bunch to write: %s byte(s)" % len(tmp[:pos]))3887            outpage.put( tmp[:pos], comment="bot writing log for last run" )3888            tmp = tmp[pos:]3889        if pywikibot.simulate:3890            #print u"--- " * 203891            #print u"--- " * 203892            #print outresult3893            posfile = open(os.path.join(scriptdir, 'cache/catimages.log'), "a")3894            posfile.write( outresult )3895            posfile.close()3896# http://scipy-lectures.github.com/advanced/scikit-learn/index.html3897# http://mlpy.sourceforge.net/docs/3.5/index.html3898# http://docs.opencv.org/modules/ml/doc/ml.html3899# train pyml (svm), opencv BoW and haarcascade classifiers3900# choose a good and meaningful featureset from extracted (better than actual one)3901def trainbot(generator, Bot, image_old_namespace, image_namespace):3902    # IT LOOKS LIKE (MAY BE) scikit-learn IS BETTER AND HAS MORE OPTIONS THAN pyml ... ?!!!3903    # gather training dataset from wiki commons categories3904    trainset = []3905    for i, catName in enumerate(Bot.trained_cat):3906        catSelected = catlib.Category(pywikibot.getSite(), 'Category:%s' % catName)3907        generator = pagegenerators.CategorizedPageGenerator(catSelected)3908        for image in generator:3909            try:3910                imageName = image.title().split(image_namespace)[1] # Deleting the namespace (useless here)3911            except IndexError:# Namespace image not found, that's not an image! Let's skip...3912                try:3913                    imageName = image.title().split(image_old_namespace)[1]3914                except IndexError:3915                    pywikibot.output(u"%s is not a file, skipping..." % image.title())3916                    continue3917            Bot.setParameters(imageName) # Setting the image for the main class3918            try:3919                Bot.downloadImage()3920            except IOError, err:3921                # skip if download not possible3922                pywikibot.warning(u"%s, skipped..." % err)3923                continue3924            except Exception, err:3925                # skip on any unexpected error, but report it3926                pywikibot.error(u"%s" % err)3927                pywikibot.error(u"was not able to process page %s !!!\n" %\3928                                image.title(asLink=True))3929                continue3930            # gather all features (information) related to current image3931            Bot._info = {}3932            Bot.gatherFeatures()3933    3934            # create classifier feature set3935            # !!!currently number of detected features is used only -> lots of room for improvements!!!3936            # choose a good and meaningful featureset from extracted (better than actual one)3937            features = []3938            for key in sorted(Bot._info):3939                #print key, len(self._info[key]), self._info[key]3940                features.append( len(Bot._info[key]) )3941            features.append( i+1 )      # category id (returned by predictor later)3942            #print features3943            trainset.append( features )3944    trainset = np.array(trainset)3945    cols = trainset.shape[1]3946    # http://mlpy.sourceforge.net/docs/3.5/tutorial.html3947    import matplotlib.pyplot as plt # required for plotting3948    ##iris = np.loadtxt('iris.csv', delimiter=',')3949    ##x, y = iris[:, :4], iris[:, 4].astype(np.int) # x: (observations x attributes) matrix, y: classes (1: setosa, 2: versicolor, 3: virginica)3950    #trainset = np.loadtxt('cache/test.csv', delimiter=' ')3951    #cols = trainset.shape[1]3952    #print trainset3953    x, y = trainset[:, :(cols-1)], trainset[:, (cols-1)].astype(np.int) # x: (observations x attributes) matrix, y: classes (1: setosa, 2: versicolor, 3: virginica)3954    pywikibot.output(x.shape)3955    pywikibot.output(y.shape)3956    3957    # Dimensionality reduction by Principal Component Analysis (PCA)3958    pca = mlpy.PCA() # new PCA instance3959    pca.learn(x) # learn from data3960    z = pca.transform(x, k=2) # embed x into the k=2 dimensional subspace3961    pywikibot.output(z.shape)3962    3963    plt.set_cmap(plt.cm.Paired)3964    fig1 = plt.figure(1)3965    title = plt.title("PCA on dataset")3966    plot = plt.scatter(z[:, 0], z[:, 1], c=y)3967    labx = plt.xlabel("First component")3968    laby = plt.ylabel("Second component")3969    plt.show()3970    3971    # Learning by Kernel Support Vector Machines (SVMs) on principal components3972    linear_svm = mlpy.LibSvm(kernel_type='linear') # new linear SVM instance3973    linear_svm.learn(z, y) # learn from principal components3974    3975    # !!! train also BoW (bag-of-words) in '_detectclassify_ObjectAll' resp. 'opencv.BoWclassify.main' !!!3976    3977    xmin, xmax = z[:,0].min()-0.1, z[:,0].max()+0.13978    ymin, ymax = z[:,1].min()-0.1, z[:,1].max()+0.13979    xx, yy = np.meshgrid(np.arange(xmin, xmax, 0.01), np.arange(ymin, ymax, 0.01))3980    zgrid = np.c_[xx.ravel(), yy.ravel()]3981    yp = linear_svm.pred(zgrid)3982    3983    plt.set_cmap(plt.cm.Paired)3984    fig2 = plt.figure(2)3985    title = plt.title("SVM (linear kernel) on principal components")3986    plot1 = plt.pcolormesh(xx, yy, yp.reshape(xx.shape))3987    plot2 = plt.scatter(z[:, 0], z[:, 1], c=y)3988    labx = plt.xlabel("First component")3989    laby = plt.ylabel("Second component")3990    limx = plt.xlim(xmin, xmax)3991    limy = plt.ylim(ymin, ymax)3992    plt.show()3993    3994    linear_svm.save_model('cache/test.csf')3995    pywikibot.output(u'Linear SVM model stored to %s.' % 'cache/test.csf')3996# for functions in C/C++ that might crash hard without any exception throwed3997# e.g. an abort due to an assert or something else3998def enable_recovery():3999    recoveryfile = open(os.path.join(scriptdir, 'cache/catimages_recovery'), "w")4000    recoveryfile.write('')4001    recoveryfile.close()4002def disable_recovery():4003    if os.path.exists( os.path.join(scriptdir, 'cache/catimages_recovery') ):4004        os.remove( os.path.join(scriptdir, 'cache/catimages_recovery') )4005# Main loop will take all the (name of the) images and then i'll check them.4006if __name__ == "__main__":4007    old = datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S") #timezones are UTC4008    if sys.exc_info()[0]:   # re-raise ImportError4009        raise               #4010    try:4011        main()4012    finally:4013        final = datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S") #timezones are UTC4014        delta = final - old4015        secs_of_diff = delta.seconds4016        pywikibot.output("Execution time: %s" % secs_of_diff)...

Excel.py

Source:Excel.py

...27            sheet = sheets[0]28            rows = sheet.rows29            for row in rows:30                row_value = [col.value for col in row]31                self._info_filter(row_value)32            if not self._header:33                self._header = self._info.pop(0)34            else:35                self._info.pop(0)  # å é¤å¼å¤´çæ°æ®ä¹å³è¡¨å¤´36    def _info_filter(self, info):37        pass38    def _write(self):39        if self._type == 'w':40            pass41        else:42            self._read()43        self._wb_write = openpyxl.Workbook()44        ws = self._wb_write.active45        ws.append(self._header)46        print('å·²ç»åå¥headeräº')47        print(len(self._info))48        for row in self._info:49            ws.append(row)50        print('å¨é¨åå¥')...

Copyfile.py

Source:Copyfile.py

...8import openpyxl9from Classes.Excel import Excel10log_path = r'C:\Users\dell\Desktop\IV.xlsx'11class ReadLog(Excel):12    def _info_filter(self, info):13        self._info.append([info[-2] + '.png', info[-1] + '.png'])14    def _read(self):15        if not self._info:16            print('å°è¿ä¸æ¥äº')17            self._wb_read = openpyxl.load_workbook(self._path_read)18            sheets = self._wb_read.worksheets  # è·åå½åææçsheet19            sheet = sheets[4]20            rows = sheet.rows21            for row in rows:22                row_value = [col.value for col in row]23                self._info_filter(row_value)24            if not self._header:25                self._header = self._info.pop(0)26            else:27                self._info.pop(0)  # å é¤å¼å¤´çæ°æ®ä¹å³è¡¨å¤´28excel = ReadLog(log_path, None, types='r')29excel.read()30paths = excel.info31base_path = r'G:\LearmonthData\learmonth_pics'32new_path = r'G:\LearmonthData\IV\5-12h'33for path in paths:34    full_path = os.path.join(base_path, path[-1])35    new_full_path = os.path.join(new_path, path[-2])36    try:37        shutil.copyfile(full_path, new_full_path)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.