third_party/requests/packages/charade/sbcharsetprober.py - Issue 24076010: Add 'requests' library to third_party.

Side by Side Diff: third_party/requests/packages/charade/sbcharsetprober.py

Issue 24076010: Add 'requests' library to third_party. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/swarm_client

Patch Set: Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 ######################## BEGIN LICENSE BLOCK ########################

	2 # The Original Code is Mozilla Universal charset detector code.

	3 #

	4 # The Initial Developer of the Original Code is

	5 # Netscape Communications Corporation.

	6 # Portions created by the Initial Developer are Copyright (C) 2001

	7 # the Initial Developer. All Rights Reserved.

	8 #

	9 # Contributor(s):

	10 # Mark Pilgrim - port to Python

	11 # Shy Shalom - original C code

	12 #

	13 # This library is free software; you can redistribute it and/or

	14 # modify it under the terms of the GNU Lesser General Public

	15 # License as published by the Free Software Foundation; either

	16 # version 2.1 of the License, or (at your option) any later version.

	17 #

	18 # This library is distributed in the hope that it will be useful,

	19 # but WITHOUT ANY WARRANTY; without even the implied warranty of

	20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

	21 # Lesser General Public License for more details.

	22 #

	23 # You should have received a copy of the GNU Lesser General Public

	24 # License along with this library; if not, write to the Free Software

	25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA

	26 # 02110-1301 USA

	27 ######################### END LICENSE BLOCK #########################

	28

	29 import sys

	30 from . import constants

	31 from .charsetprober import CharSetProber

	32 from .compat import wrap_ord

	33

	34 SAMPLE_SIZE = 64

	35 SB_ENOUGH_REL_THRESHOLD = 1024

	36 POSITIVE_SHORTCUT_THRESHOLD = 0.95

	37 NEGATIVE_SHORTCUT_THRESHOLD = 0.05

	38 SYMBOL_CAT_ORDER = 250

	39 NUMBER_OF_SEQ_CAT = 4

	40 POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1

	41 #NEGATIVE_CAT = 0

	42

	43

	44 class SingleByteCharSetProber(CharSetProber):

	45 def __init__(self, model, reversed=False, nameProber=None):

	46 CharSetProber.__init__(self)

	47 self._mModel = model

	48 # TRUE if we need to reverse every pair in the model lookup

	49 self._mReversed = reversed

	50 # Optional auxiliary prober for name decision

	51 self._mNameProber = nameProber

	52 self.reset()

	53

	54 def reset(self):

	55 CharSetProber.reset(self)

	56 # char order of last character

	57 self._mLastOrder = 255

	58 self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT

	59 self._mTotalSeqs = 0

	60 self._mTotalChar = 0

	61 # characters that fall in our sampling range

	62 self._mFreqChar = 0

	63

	64 def get_charset_name(self):

	65 if self._mNameProber:

	66 return self._mNameProber.get_charset_name()

	67 else:

	68 return self._mModel['charsetName']

	69

	70 def feed(self, aBuf):

	71 if not self._mModel['keepEnglishLetter']:

	72 aBuf = self.filter_without_english_letters(aBuf)

	73 aLen = len(aBuf)

	74 if not aLen:

	75 return self.get_state()

	76 for c in aBuf:

	77 order = self._mModel['charToOrderMap'][wrap_ord(c)]

	78 if order < SYMBOL_CAT_ORDER:

	79 self._mTotalChar += 1

	80 if order < SAMPLE_SIZE:

	81 self._mFreqChar += 1

	82 if self._mLastOrder < SAMPLE_SIZE:

	83 self._mTotalSeqs += 1

	84 if not self._mReversed:

	85 i = (self._mLastOrder * SAMPLE_SIZE) + order

	86 model = self._mModel['precedenceMatrix'][i]

	87 else: # reverse the order of the letters in the lookup

	88 i = (order * SAMPLE_SIZE) + self._mLastOrder

	89 model = self._mModel['precedenceMatrix'][i]

	90 self._mSeqCounters[model] += 1

	91 self._mLastOrder = order

	92

	93 if self.get_state() == constants.eDetecting:

	94 if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:

	95 cf = self.get_confidence()

	96 if cf > POSITIVE_SHORTCUT_THRESHOLD:

	97 if constants._debug:

	98 sys.stderr.write('%s confidence = %s, we have a'

	99 'winner\n' %

	100 (self._mModel['charsetName'], cf))

	101 self._mState = constants.eFoundIt

	102 elif cf < NEGATIVE_SHORTCUT_THRESHOLD:

	103 if constants._debug:

	104 sys.stderr.write('%s confidence = %s, below negative'

	105 'shortcut threshhold %s\n' %

	106 (self._mModel['charsetName'], cf,

	107 NEGATIVE_SHORTCUT_THRESHOLD))

	108 self._mState = constants.eNotMe

	109

	110 return self.get_state()

	111

	112 def get_confidence(self):

	113 r = 0.01

	114 if self._mTotalSeqs > 0:

	115 r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs

	116 / self._mModel['mTypicalPositiveRatio'])

	117 r = r * self._mFreqChar / self._mTotalChar

	118 if r >= 1.0:

	119 r = 0.99

	120 return r

OLD	NEW