[go: up one dir, main page]

Skip to content
This repository has been archived by the owner on Apr 15, 2024. It is now read-only.

Commit

Permalink
Fixed: password handling.
Browse files Browse the repository at this point in the history
  • Loading branch information
euske committed Oct 12, 2019
1 parent 2908c77 commit 481e9db
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 24 deletions.
2 changes: 1 addition & 1 deletion pdfminer/cmapdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def add_cid2unichr(self, cid, code):
# Interpret as UTF-16BE.
self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore')
elif isinstance(code, int):
self.cid2unichr[cid] = unichr(code)
self.cid2unichr[cid] = chr(code)
else:
raise TypeError(code)
return
Expand Down
2 changes: 1 addition & 1 deletion pdfminer/encodingdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def name2unicode(name):
m = STRIP_NAME.search(name)
if not m:
raise KeyError(name)
return unichr(int(m.group(0)))
return chr(int(m.group(0)))


## EncodingDB
Expand Down
12 changes: 6 additions & 6 deletions pdfminer/pdfdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from .pdftypes import PDFObjectNotFound
from .pdftypes import decipher_all
from .pdftypes import int_value
from .pdftypes import str_value
from .pdftypes import bytes_value
from .pdftypes import list_value
from .pdftypes import dict_value
from .pdftypes import stream_value
Expand Down Expand Up @@ -311,8 +311,8 @@ def init_params(self):
self.v = int_value(self.param.get('V', 0))
self.r = int_value(self.param['R'])
self.p = int_value(self.param['P'])
self.o = str_value(self.param['O'])
self.u = str_value(self.param['U'])
self.o = bytes_value(self.param['O'])
self.u = bytes_value(self.param['U'])
self.length = int_value(self.param.get('Length', 40))
return

Expand Down Expand Up @@ -472,8 +472,8 @@ class PDFStandardSecurityHandlerV5(PDFStandardSecurityHandlerV4):
def init_params(self):
super(PDFStandardSecurityHandlerV5, self).init_params()
self.length = 256
self.oe = str_value(self.param['OE'])
self.ue = str_value(self.param['UE'])
self.oe = bytes_value(self.param['OE'])
self.ue = bytes_value(self.param['UE'])
self.o_hash = self.o[:32]
self.o_validation_salt = self.o[32:40]
self.o_key_salt = self.o[40:]
Expand Down Expand Up @@ -698,7 +698,7 @@ def search(entry, level):
entry = dict_value(entry)
if 'Title' in entry:
if 'A' in entry or 'Dest' in entry:
title = decode_text(str_value(entry['Title']))
title = decode_text(bytes_value(entry['Title']))
dest = entry.get('Dest')
action = entry.get('A')
se = entry.get('SE')
Expand Down
4 changes: 2 additions & 2 deletions pdfminer/pdfinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,12 @@ def get_inline_data(self, pos, target=b'EI'):
while i <= len(target):
self.fillbuf()
if i:
c = self.buf[self.charpos]
c = self.buf[self.charpos:self.charpos+1]
data += c
self.charpos += 1
if len(target) <= i and c.isspace():
i += 1
elif i < len(target) and c == target[i]:
elif i < len(target) and c == target[i:i+1]:
i += 1
else:
i = 0
Expand Down
8 changes: 4 additions & 4 deletions pdfminer/pdftypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,12 @@ def num_value(x):
return x


def str_value(x):
def bytes_value(x):
x = resolve1(x)
if not isinstance(x, str):
if not isinstance(x, bytes):
if STRICT:
raise PDFTypeError('String required: %r' % x)
return ''
raise PDFTypeError('Bytes required: %r' % x)
return b''
return x


Expand Down
4 changes: 2 additions & 2 deletions pdfminer/psparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def nextline(self):
while 1:
self.fillbuf()
if eol:
c = self.buf[self.charpos]
c = self.buf[self.charpos:self.charpos+1]
# handle b'\r\n'
if c == b'\n':
linebuf += c
Expand All @@ -238,7 +238,7 @@ def nextline(self):
if m:
linebuf += self.buf[self.charpos:m.end(0)]
self.charpos = m.end(0)
if linebuf[-1] == b'\r':
if linebuf[-1:] == b'\r':
eol = True
else:
break
Expand Down
2 changes: 1 addition & 1 deletion pdfminer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def nunpack(s, default=0):


def decode_text(s):
"""Decodes a PDFDocEncoding string to Unicode."""
"""Decodes a PDFDocEncoding bytes to Unicode."""
if s.startswith(b'\xfe\xff'):
return s[2:].decode('utf-16be', 'ignore')
else:
Expand Down
10 changes: 5 additions & 5 deletions tools/dumppdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def dumpallobjs(out, doc, mode=None):
return

# dumpoutline
def dumpoutline(outfp, fname, objids, pagenos, password='',
def dumpoutline(outfp, fname, objids, pagenos, password=b'',
dumpall=False, mode=None, extractdir=None):
with open(fname, 'rb') as fp:
parser = PDFParser(fp)
Expand Down Expand Up @@ -165,7 +165,7 @@ def resolve_dest(dest):
# extractembedded
LITERAL_FILESPEC = LIT('Filespec')
LITERAL_EMBEDDEDFILE = LIT('EmbeddedFile')
def extractembedded(outfp, fname, objids, pagenos, password='',
def extractembedded(outfp, fname, objids, pagenos, password=b'',
dumpall=False, mode=None, extractdir=None):
def extract1(obj):
filename = os.path.basename(obj['UF'] or obj['F'])
Expand Down Expand Up @@ -198,7 +198,7 @@ def extract1(obj):
return

# dumppdf
def dumppdf(outfp, fname, objids, pagenos, password='',
def dumppdf(outfp, fname, objids, pagenos, password=b'',
dumpall=False, mode=None, extractdir=None):
with open(fname, 'rb') as fp:
parser = PDFParser(fp)
Expand Down Expand Up @@ -240,7 +240,7 @@ def usage():
objids = []
pagenos = set()
mode = None
password = ''
password = b''
dumpall = False
proc = dumppdf
outfp = sys.stdout
Expand All @@ -250,7 +250,7 @@ def usage():
elif k == '-o': outfp = open(v, 'wb')
elif k == '-i': objids.extend( int(x) for x in v.split(',') )
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
elif k == '-P': password = v
elif k == '-P': password = v.encode('ascii')
elif k == '-a': dumpall = True
elif k == '-r': mode = 'raw'
elif k == '-b': mode = 'binary'
Expand Down
4 changes: 2 additions & 2 deletions tools/pdf2txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def usage():
# debug option
debug = 0
# input option
password = ''
password = b''
pagenos = set()
maxpages = 0
# output option
Expand All @@ -48,7 +48,7 @@ def usage():
if k == '-d': debug += 1
elif k == '-p': pagenos.update( int(x)-1 for x in v.split(',') )
elif k == '-m': maxpages = int(v)
elif k == '-P': password = v
elif k == '-P': password = v.encode('ascii')
elif k == '-o': outfile = v
elif k == '-C': caching = False
elif k == '-n': laparams = None
Expand Down

0 comments on commit 481e9db

Please sign in to comment.