Modernize testings

- Switch to using nose - Calculate coverage with coveralls - Use conda for setting up Python since Travis is flakey about that
chardet · dan-blanchard · Jan 9, 2015 · Oct 11, 2014 · Oct 11, 2014 · Dec 1, 2014
commit 9b8b12c61f24100509ea0d2d06776a3eb55d7c8a
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,9 @@
+[run]
+source = chardet
+omit =
+    */python?.?/*
+    */lib-python/?.?/*.py
+    */lib_pypy/_*.py
+    */site-packages/ordereddict.py
+    */site-packages/nose/*
+    */unittest2/*
diff --git a/.travis.yml b/.travis.yml
@@ -5,8 +5,34 @@ python:
   - 3.3
   - 3.4
 
-script: python test.py
+before_install:
+  - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
+  - chmod +x miniconda.sh
+  - ./miniconda.sh -b
+  - export PATH=/home/travis/miniconda/bin:$PATH
+  - conda config --add channels https://conda.binstar.org/dan_blanchard
+  - conda update --yes conda
+
+install:
+  # Setup desired Python in conda environments with python-coveralls
+  - conda install --yes pip python=$TRAVIS_PYTHON_VERSION python-coveralls
+  # Have to use pip for nose-cov because its entry points are not supported by conda yet
+  - pip install nose-cov
+  # Multiprocessing fix for Travis
+  - sudo rm -rf /dev/shm
+  - sudo ln -s /run/shm /dev/shm
+  # Actuall install chardet
+  - python setup.py install
+
+# Run test
+script:
+  - nosetests -v --with-cov --cov skll --cov-config .coveragerc --logging-level=DEBUG
+
+# Calculate coverage
+after_success:
+  - coveralls --config_file .coveragerc
 
 notifications:
-   on_success: change
-   on_failure: always
+  email:
+    on_success: change
+    on_failure: always
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+nose
diff --git a/setup.py b/setup.py
@@ -9,20 +9,25 @@ def readme():
         return f.read()
 
 
+def requirements():
+    with open(req_path) as f:
+        reqs = f.read().splitlines()
+    return reqs
+
 setup(name='chardet',
       version=__version__,
       description='Universal encoding detector for Python 2 and 3',
       long_description=readme(),
       author='Mark Pilgrim',
       author_email='mark@diveintomark.org',
-      maintainer='Ian Cordasco',
-      maintainer_email='graffatcolmingov@gmail.com',
+      maintainer='Daniel Blanchard',
+      maintainer_email='dblanchard@ets.org',
       url='https://github.com/chardet/chardet',
       license="LGPL",
       keywords=['encoding', 'i18n', 'xml'],
       classifiers=["Development Status :: 4 - Beta",
                    "Intended Audience :: Developers",
-                   ("License :: OSI Approved :: GNU Library or Lesser General" +
+                   ("License :: OSI Approved :: GNU Library or Lesser General"
                     " Public License (LGPL)"),
                    "Operating System :: OS Independent",
                    "Programming Language :: Python",
@@ -32,8 +37,10 @@ def readme():
                    'Programming Language :: Python :: 3',
                    'Programming Language :: Python :: 3.2',
                    'Programming Language :: Python :: 3.3',
-                   ("Topic :: Software Development :: Libraries :: Python " +
+                   ("Topic :: Software Development :: Libraries :: Python "
                     "Modules"),
                    "Topic :: Text Processing :: Linguistic"],
       packages=['chardet'],
-      entry_points={'console_scripts': ['chardetect = chardet.chardetect:main']})
+      install_requires=requirements(),
+      entry_points={'console_scripts':
+                    ['chardetect = chardet.chardetect:main']})
diff --git a/test.py b/test.py
@@ -1,56 +1,47 @@
+"""
+Run chardet on a bunch of documents and see that we get the correct encodings.
+
+:author: Dan Blanchard
+:author: Ian Cordasco
+"""
+
 from __future__ import with_statement
 
-import os
 import sys
 import unittest
+from os import listdir
+from os.path import dirname, isdir, join, realpath, relpath, splitext
+
+from nose.tools import eq_
 
 import chardet
 
 
-class TestCase(unittest.TestCase):
-    def __init__(self, file_name, encoding):
-        unittest.TestCase.__init__(self)
-        self.file_name = file_name
-        encoding = encoding.lower()
-        for postfix in ['-arabic',
-                        '-bulgarian',
-                        '-cyrillic',
-                        '-greek',
-                        '-hebrew',
-                        '-hungarian',
-                        '-turkish']:
-            if encoding.endswith(postfix):
-                encoding, _, _ = encoding.rpartition(postfix)
-                break
-        self.encoding = encoding
-
-    def runTest(self):
-        with open(self.file_name, 'rb') as f:
-            result = chardet.detect(f.read())
-        self.assertEqual(result['encoding'].lower(), self.encoding,
-                         "Expected %s, but got %s in %s" %
-                         (self.encoding, result['encoding'],
-                          self.file_name))
-
-
-def main():
-    suite = unittest.TestSuite()
-    if len(sys.argv) > 1:
-        base_path = sys.argv[1]
-    else:
-        base_path = os.path.join(
-            os.path.dirname(os.path.abspath(__file__)), 'tests')
-    for encoding in os.listdir(base_path):
-        path = os.path.join(base_path, encoding)
-        if not os.path.isdir(path):
+def check_file_encoding(file_name, encoding):
+    """ Ensure that we detect the encoding for file_name correctly. """
+    encoding = encoding.lower()
+    for postfix in ['-arabic', '-bulgarian', '-cyrillic', '-greek', '-hebrew',
+                    '-hungarian', '-turkish']:
+        if encoding.endswith(postfix):
+            encoding = encoding.rpartition(postfix)[0]
+            break
+
+    with open(file_name, 'rb') as f:
+        result = chardet.detect(f.read())
+    eq_(result['encoding'].lower(), encoding, ("Expected %s, but got %s for "
+                                               "%s" % (encoding,
+                                                       result['encoding'],
+                                                       file_name)))
+
+
+def test_encoding_detection():
+    base_path = relpath(join(dirname(realpath(__file__)), 'tests'))
+    for encoding in listdir(base_path):
+        path = join(base_path, encoding)
+        if not isdir(path):
             continue
-        for file_name in os.listdir(path):
-            _, ext = os.path.splitext(file_name)
+        for file_name in listdir(path):
+            ext = splitext(file_name)[1].lower()
             if ext not in ['.html', '.txt', '.xml', '.srt']:
                 continue
-            suite.addTest(TestCase(os.path.join(path, file_name), encoding))
-    unittest.TextTestRunner().run(suite)
-
-
-if __name__ == '__main__':
-    main()
+            yield check_file_encoding, join(path, file_name), encoding