[go: up one dir, main page]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add upstream changes and clean up where possible #42

Merged
merged 38 commits into from
Jan 9, 2015
Merged
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
8bc4b89
Comment out sections of tables that weren't used to save memory.
dan-blanchard Oct 11, 2014
20cad49
Add 3.4 to list for Travis testing and remove 3.2
dan-blanchard Oct 11, 2014
44752d7
Bunch of little clean up things
dan-blanchard Dec 1, 2014
7251430
Merge branch 'master' into feature/upstream-changes-and-overhaul
dan-blanchard Dec 2, 2014
0ecaf05
Add if __name__... to test.py and a break to speed things up in loop.
dan-blanchard Dec 2, 2014
9b8b12c
Modernize testings
dan-blanchard Dec 2, 2014
3e13cc7
Fix missing req_path in setup.py
dan-blanchard Dec 2, 2014
59f30b7
Simplify Travis setup and just use pip. conda was overkill for our s…
dan-blanchard Dec 2, 2014
a5c7484
Make tests slightly more efficient.
dan-blanchard Dec 2, 2014
07a5849
Merge branch 'master' into feature/upstream-changes-and-overhaul
dan-blanchard Dec 21, 2014
267c5d8
Switch to new Travis docker VMs and add PyPy testing.
dan-blanchard Dec 29, 2014
c665459
Add C-equivalent implementation of filter_english_letters.
dan-blanchard Dec 30, 2014
7cfa45c
Fix some pylint warnings in universaldetector.py
dan-blanchard Dec 30, 2014
125575f
Made latin1 equivalent to windows-1252 when running unit tests.
dan-blanchard Dec 30, 2014
d9c42c7
A bunch of little clean up changes.
dan-blanchard Dec 30, 2014
04398ff
Comment out pypy line in .travis.yml. It's 10x slower, which is ridi…
dan-blanchard Dec 30, 2014
475ffa6
Re-enable PyPy on Travis, but disable coverage for it
dan-blanchard Dec 30, 2014
2eae0d6
Fix syntax error in .travis.yml
dan-blanchard Dec 30, 2014
b45c331
Fix coverage logic reversal in .travis.yml
dan-blanchard Dec 30, 2014
b382f22
Fix TypeError on PyPy in utf8prober.py
dan-blanchard Dec 30, 2014
be09612
Switch to using enums instead of constants, and a bunch of cleanup st…
dan-blanchard Jan 2, 2015
431bd39
Get rid of set literal to appease Python 2.6
dan-blanchard Jan 2, 2015
3fb82c9
Some minor PEP8 name changes
dan-blanchard Jan 5, 2015
6058456
Merge branch 'master' into feature/upstream-changes-and-overhaul
dan-blanchard Jan 5, 2015
bd9951f
Loads of PEP8 naming convention fixes.
dan-blanchard Jan 5, 2015
4317be7
Fix some NOTES.rst formatting issues
dan-blanchard Jan 6, 2015
01e82e3
Update MANIFEST.in to include test files and docs
dan-blanchard Jan 6, 2015
b8f8b24
Remove PyCharm stuff from .gitignore
dan-blanchard Jan 6, 2015
50f701c
Remove flake8: noqa lines.
dan-blanchard Jan 6, 2015
e42c4d1
Add missing __version__ import to __init__.py
dan-blanchard Jan 6, 2015
0913a91
Remove unnecessary import sys import from conf.py
dan-blanchard Jan 6, 2015
c7f01c1
Switch to using pip for installation in .travis.yml
dan-blanchard Jan 6, 2015
1e0f1a5
Rename SMState to MachineState
dan-blanchard Jan 6, 2015
4a8084d
Get rid of messy ternary operator in charsetprober.py
dan-blanchard Jan 6, 2015
5449248
Fix __version typo in __init__.py
dan-blanchard Jan 6, 2015
369875d
Add comment about why we're slicing in filter_with_english_letters
dan-blanchard Jan 6, 2015
8e3fc03
Made more attributes public.
dan-blanchard Jan 6, 2015
da6c0a0
Temporarily disable Hungarian probers, and update missing encodings list
dan-blanchard Jan 7, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix some pylint warnings in universaldetector.py
  • Loading branch information
dan-blanchard committed Dec 30, 2014
commit 7cfa45c5bb7ea28c555b411b83ff8de23e8753b5
42 changes: 23 additions & 19 deletions chardet/universaldetector.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def __init__(self):
self._escDetector = re.compile(b'(\033|~{)')
self._mEscCharSetProber = None
self._mCharSetProbers = []
self.result = None
self.done = None
self._mStart = None
self._mGotData = None
self._mInputState = None
self._mLastChar = None
self.reset()

def reset(self):
Expand All @@ -72,31 +78,27 @@ def feed(self, aBuf):

if not self._mGotData:
# If the data starts with BOM, we know it is UTF
if aBuf[:3] == codecs.BOM_UTF8:
if aBuf.startswith(codecs.BOM_UTF8):
# EF BB BF UTF-8 with BOM
self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
elif aBuf[:4] == codecs.BOM_UTF32_LE:
elif aBuf.startswith(codecs.BOM_UTF32_LE):
# FF FE 00 00 UTF-32, little-endian BOM
self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
elif aBuf[:4] == codecs.BOM_UTF32_BE:
elif aBuf.startswith(codecs.BOM_UTF32_BE):
# 00 00 FE FF UTF-32, big-endian BOM
self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
elif aBuf[:4] == b'\xFE\xFF\x00\x00':
elif aBuf.startswith(b'\xFE\xFF\x00\x00'):
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
self.result = {
'encoding': "X-ISO-10646-UCS-4-3412",
'confidence': 1.0
}
elif aBuf[:4] == b'\x00\x00\xFF\xFE':
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
'confidence': 1.0}
elif aBuf.startswith(b'\x00\x00\xFF\xFE'):
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
self.result = {
'encoding': "X-ISO-10646-UCS-4-2143",
'confidence': 1.0
}
elif aBuf[:2] == codecs.BOM_LE:
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
'confidence': 1.0}
elif aBuf.startswith(codecs.BOM_LE):
# FF FE UTF-16, little endian BOM
self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
elif aBuf[:2] == codecs.BOM_BE:
elif aBuf.startswith(codecs.BOM_BE):
# FE FF UTF-16, big endian BOM
self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}

Expand All @@ -108,8 +110,8 @@ def feed(self, aBuf):
if self._mInputState == ePureAscii:
if self._highBitDetector.search(aBuf):
self._mInputState = eHighbyte
elif ((self._mInputState == ePureAscii) and
self._escDetector.search(self._mLastChar + aBuf)):
elif self._mInputState == ePureAscii and \
self._escDetector.search(self._mLastChar + aBuf):
self._mInputState = eEscAscii

self._mLastChar = aBuf[-1:]
Expand All @@ -118,8 +120,10 @@ def feed(self, aBuf):
if not self._mEscCharSetProber:
self._mEscCharSetProber = EscCharSetProber()
if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
'confidence': self._mEscCharSetProber.get_confidence()}
self.result = {'encoding':
self._mEscCharSetProber.get_charset_name(),
'confidence':
self._mEscCharSetProber.get_confidence()}
self.done = True
elif self._mInputState == eHighbyte:
if not self._mCharSetProbers:
Expand Down