| Index: third_party/logilab/common/fileutils.py
|
| diff --git a/third_party/logilab/common/fileutils.py b/third_party/logilab/common/fileutils.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..4ac927023d2a6ed06e0afab3a376b5e7fbf576fa
|
| --- /dev/null
|
| +++ b/third_party/logilab/common/fileutils.py
|
| @@ -0,0 +1,402 @@
|
| +# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
|
| +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
|
| +#
|
| +# This file is part of logilab-common.
|
| +#
|
| +# logilab-common is free software: you can redistribute it and/or modify it under
|
| +# the terms of the GNU Lesser General Public License as published by the Free
|
| +# Software Foundation, either version 2.1 of the License, or (at your option) any
|
| +# later version.
|
| +#
|
| +# logilab-common is distributed in the hope that it will be useful, but WITHOUT
|
| +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
| +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
| +# details.
|
| +#
|
| +# You should have received a copy of the GNU Lesser General Public License along
|
| +# with logilab-common. If not, see <http://www.gnu.org/licenses/>.
|
| +"""File and file-path manipulation utilities.
|
| +
|
| +:group path manipulation: first_level_directory, relative_path, is_binary,\
|
| +get_by_ext, remove_dead_links
|
| +:group file manipulation: norm_read, norm_open, lines, stream_lines, lines,\
|
| +write_open_mode, ensure_fs_mode, export
|
| +:sort: path manipulation, file manipulation
|
| +"""
|
| +__docformat__ = "restructuredtext en"
|
| +
|
| +import sys
|
| +import shutil
|
| +import mimetypes
|
| +from os.path import isabs, isdir, islink, split, exists, normpath, join
|
| +from os.path import abspath
|
| +from os import sep, mkdir, remove, listdir, stat, chmod, walk
|
| +from stat import ST_MODE, S_IWRITE
|
| +from cStringIO import StringIO
|
| +
|
| +from logilab.common import STD_BLACKLIST as BASE_BLACKLIST, IGNORED_EXTENSIONS
|
| +from logilab.common.shellutils import find
|
| +from logilab.common.deprecation import deprecated
|
| +from logilab.common.compat import FileIO, any
|
| +
|
| +def first_level_directory(path):
|
| + """Return the first level directory of a path.
|
| +
|
| + >>> first_level_directory('home/syt/work')
|
| + 'home'
|
| + >>> first_level_directory('/home/syt/work')
|
| + '/'
|
| + >>> first_level_directory('work')
|
| + 'work'
|
| + >>>
|
| +
|
| + :type path: str
|
| + :param path: the path for which we want the first level directory
|
| +
|
| + :rtype: str
|
| + :return: the first level directory appearing in `path`
|
| + """
|
| + head, tail = split(path)
|
| + while head and tail:
|
| + head, tail = split(head)
|
| + if tail:
|
| + return tail
|
| + # path was absolute, head is the fs root
|
| + return head
|
| +
|
| +def abspath_listdir(path):
|
| + """Lists path's content using absolute paths.
|
| +
|
| + >>> os.listdir('/home')
|
| + ['adim', 'alf', 'arthur', 'auc']
|
| + >>> abspath_listdir('/home')
|
| + ['/home/adim', '/home/alf', '/home/arthur', '/home/auc']
|
| + """
|
| + path = abspath(path)
|
| + return [join(path, filename) for filename in listdir(path)]
|
| +
|
| +
|
| +def is_binary(filename):
|
| + """Return true if filename may be a binary file, according to it's
|
| + extension.
|
| +
|
| + :type filename: str
|
| + :param filename: the name of the file
|
| +
|
| + :rtype: bool
|
| + :return:
|
| + true if the file is a binary file (actually if it's mime type
|
| + isn't beginning by text/)
|
| + """
|
| + try:
|
| + return not mimetypes.guess_type(filename)[0].startswith('text')
|
| + except AttributeError:
|
| + return 1
|
| +
|
| +
|
| +def write_open_mode(filename):
|
| + """Return the write mode that should used to open file.
|
| +
|
| + :type filename: str
|
| + :param filename: the name of the file
|
| +
|
| + :rtype: str
|
| + :return: the mode that should be use to open the file ('w' or 'wb')
|
| + """
|
| + if is_binary(filename):
|
| + return 'wb'
|
| + return 'w'
|
| +
|
| +
|
| +def ensure_fs_mode(filepath, desired_mode=S_IWRITE):
|
| + """Check that the given file has the given mode(s) set, else try to
|
| + set it.
|
| +
|
| + :type filepath: str
|
| + :param filepath: path of the file
|
| +
|
| + :type desired_mode: int
|
| + :param desired_mode:
|
| + ORed flags describing the desired mode. Use constants from the
|
| + `stat` module for file permission's modes
|
| + """
|
| + mode = stat(filepath)[ST_MODE]
|
| + if not mode & desired_mode:
|
| + chmod(filepath, mode | desired_mode)
|
| +
|
| +
|
| +# XXX (syt) unused? kill?
|
| +class ProtectedFile(FileIO):
|
| + """A special file-object class that automatically does a 'chmod +w' when
|
| + needed.
|
| +
|
| + XXX: for now, the way it is done allows 'normal file-objects' to be
|
| + created during the ProtectedFile object lifetime.
|
| + One way to circumvent this would be to chmod / unchmod on each
|
| + write operation.
|
| +
|
| + One other way would be to :
|
| +
|
| + - catch the IOError in the __init__
|
| +
|
| + - if IOError, then create a StringIO object
|
| +
|
| + - each write operation writes in this StringIO object
|
| +
|
| + - on close()/del(), write/append the StringIO content to the file and
|
| + do the chmod only once
|
| + """
|
| + def __init__(self, filepath, mode):
|
| + self.original_mode = stat(filepath)[ST_MODE]
|
| + self.mode_changed = False
|
| + if mode in ('w', 'a', 'wb', 'ab'):
|
| + if not self.original_mode & S_IWRITE:
|
| + chmod(filepath, self.original_mode | S_IWRITE)
|
| + self.mode_changed = True
|
| + FileIO.__init__(self, filepath, mode)
|
| +
|
| + def _restore_mode(self):
|
| + """restores the original mode if needed"""
|
| + if self.mode_changed:
|
| + chmod(self.name, self.original_mode)
|
| + # Don't re-chmod in case of several restore
|
| + self.mode_changed = False
|
| +
|
| + def close(self):
|
| + """restore mode before closing"""
|
| + self._restore_mode()
|
| + FileIO.close(self)
|
| +
|
| + def __del__(self):
|
| + if not self.closed:
|
| + self.close()
|
| +
|
| +
|
| +class UnresolvableError(Exception):
|
| + """Exception raised by relative path when it's unable to compute relative
|
| + path between two paths.
|
| + """
|
| +
|
| +def relative_path(from_file, to_file):
|
| + """Try to get a relative path from `from_file` to `to_file`
|
| + (path will be absolute if to_file is an absolute file). This function
|
| + is useful to create link in `from_file` to `to_file`. This typical use
|
| + case is used in this function description.
|
| +
|
| + If both files are relative, they're expected to be relative to the same
|
| + directory.
|
| +
|
| + >>> relative_path( from_file='toto/index.html', to_file='index.html')
|
| + '../index.html'
|
| + >>> relative_path( from_file='index.html', to_file='toto/index.html')
|
| + 'toto/index.html'
|
| + >>> relative_path( from_file='tutu/index.html', to_file='toto/index.html')
|
| + '../toto/index.html'
|
| + >>> relative_path( from_file='toto/index.html', to_file='/index.html')
|
| + '/index.html'
|
| + >>> relative_path( from_file='/toto/index.html', to_file='/index.html')
|
| + '../index.html'
|
| + >>> relative_path( from_file='/toto/index.html', to_file='/toto/summary.html')
|
| + 'summary.html'
|
| + >>> relative_path( from_file='index.html', to_file='index.html')
|
| + ''
|
| + >>> relative_path( from_file='/index.html', to_file='toto/index.html')
|
| + Traceback (most recent call last):
|
| + File "<string>", line 1, in ?
|
| + File "<stdin>", line 37, in relative_path
|
| + UnresolvableError
|
| + >>> relative_path( from_file='/index.html', to_file='/index.html')
|
| + ''
|
| + >>>
|
| +
|
| + :type from_file: str
|
| + :param from_file: source file (where links will be inserted)
|
| +
|
| + :type to_file: str
|
| + :param to_file: target file (on which links point)
|
| +
|
| + :raise UnresolvableError: if it has been unable to guess a correct path
|
| +
|
| + :rtype: str
|
| + :return: the relative path of `to_file` from `from_file`
|
| + """
|
| + from_file = normpath(from_file)
|
| + to_file = normpath(to_file)
|
| + if from_file == to_file:
|
| + return ''
|
| + if isabs(to_file):
|
| + if not isabs(from_file):
|
| + return to_file
|
| + elif isabs(from_file):
|
| + raise UnresolvableError()
|
| + from_parts = from_file.split(sep)
|
| + to_parts = to_file.split(sep)
|
| + idem = 1
|
| + result = []
|
| + while len(from_parts) > 1:
|
| + dirname = from_parts.pop(0)
|
| + if idem and len(to_parts) > 1 and dirname == to_parts[0]:
|
| + to_parts.pop(0)
|
| + else:
|
| + idem = 0
|
| + result.append('..')
|
| + result += to_parts
|
| + return sep.join(result)
|
| +
|
| +
|
| +def norm_read(path):
|
| + """Return the content of the file with normalized line feeds.
|
| +
|
| + :type path: str
|
| + :param path: path to the file to read
|
| +
|
| + :rtype: str
|
| + :return: the content of the file with normalized line feeds
|
| + """
|
| + return open(path, 'U').read()
|
| +norm_read = deprecated("use \"open(path, 'U').read()\"")(norm_read)
|
| +
|
| +def norm_open(path):
|
| + """Return a stream for a file with content with normalized line feeds.
|
| +
|
| + :type path: str
|
| + :param path: path to the file to open
|
| +
|
| + :rtype: file or StringIO
|
| + :return: the opened file with normalized line feeds
|
| + """
|
| + return open(path, 'U')
|
| +norm_open = deprecated("use \"open(path, 'U')\"")(norm_open)
|
| +
|
| +def lines(path, comments=None):
|
| + """Return a list of non empty lines in the file located at `path`.
|
| +
|
| + :type path: str
|
| + :param path: path to the file
|
| +
|
| + :type comments: str or None
|
| + :param comments:
|
| + optional string which can be used to comment a line in the file
|
| + (i.e. lines starting with this string won't be returned)
|
| +
|
| + :rtype: list
|
| + :return:
|
| + a list of stripped line in the file, without empty and commented
|
| + lines
|
| +
|
| + :warning: at some point this function will probably return an iterator
|
| + """
|
| + stream = open(path, 'U')
|
| + result = stream_lines(stream, comments)
|
| + stream.close()
|
| + return result
|
| +
|
| +
|
| +def stream_lines(stream, comments=None):
|
| + """Return a list of non empty lines in the given `stream`.
|
| +
|
| + :type stream: object implementing 'xreadlines' or 'readlines'
|
| + :param stream: file like object
|
| +
|
| + :type comments: str or None
|
| + :param comments:
|
| + optional string which can be used to comment a line in the file
|
| + (i.e. lines starting with this string won't be returned)
|
| +
|
| + :rtype: list
|
| + :return:
|
| + a list of stripped line in the file, without empty and commented
|
| + lines
|
| +
|
| + :warning: at some point this function will probably return an iterator
|
| + """
|
| + try:
|
| + readlines = stream.xreadlines
|
| + except AttributeError:
|
| + readlines = stream.readlines
|
| + result = []
|
| + for line in readlines():
|
| + line = line.strip()
|
| + if line and (comments is None or not line.startswith(comments)):
|
| + result.append(line)
|
| + return result
|
| +
|
| +
|
| +def export(from_dir, to_dir,
|
| + blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS,
|
| + verbose=0):
|
| + """Make a mirror of `from_dir` in `to_dir`, omitting directories and
|
| + files listed in the black list or ending with one of the given
|
| + extensions.
|
| +
|
| + :type from_dir: str
|
| + :param from_dir: directory to export
|
| +
|
| + :type to_dir: str
|
| + :param to_dir: destination directory
|
| +
|
| + :type blacklist: list or tuple
|
| + :param blacklist:
|
| + list of files or directories to ignore, default to the content of
|
| + `BASE_BLACKLIST`
|
| +
|
| + :type ignore_ext: list or tuple
|
| + :param ignore_ext:
|
| + list of extensions to ignore, default to the content of
|
| + `IGNORED_EXTENSIONS`
|
| +
|
| + :type verbose: bool
|
| + :param verbose:
|
| + flag indicating whether information about exported files should be
|
| + printed to stderr, default to False
|
| + """
|
| + try:
|
| + mkdir(to_dir)
|
| + except OSError:
|
| + pass # FIXME we should use "exists" if the point is about existing dir
|
| + # else (permission problems?) shouldn't return / raise ?
|
| + for directory, dirnames, filenames in walk(from_dir):
|
| + for norecurs in blacklist:
|
| + try:
|
| + dirnames.remove(norecurs)
|
| + except ValueError:
|
| + continue
|
| + for dirname in dirnames:
|
| + src = join(directory, dirname)
|
| + dest = to_dir + src[len(from_dir):]
|
| + if isdir(src):
|
| + if not exists(dest):
|
| + mkdir(dest)
|
| + for filename in filenames:
|
| + # don't include binary files
|
| + # endswith does not accept tuple in 2.4
|
| + if any([filename.endswith(ext) for ext in ignore_ext]):
|
| + continue
|
| + src = join(directory, filename)
|
| + dest = to_dir + src[len(from_dir):]
|
| + if verbose:
|
| + print >> sys.stderr, src, '->', dest
|
| + if exists(dest):
|
| + remove(dest)
|
| + shutil.copy2(src, dest)
|
| +
|
| +
|
| +def remove_dead_links(directory, verbose=0):
|
| + """Recursively traverse directory and remove all dead links.
|
| +
|
| + :type directory: str
|
| + :param directory: directory to cleanup
|
| +
|
| + :type verbose: bool
|
| + :param verbose:
|
| + flag indicating whether information about deleted links should be
|
| + printed to stderr, default to False
|
| + """
|
| + for dirpath, dirname, filenames in walk(directory):
|
| + for filename in dirnames + filenames:
|
| + src = join(dirpath, filename)
|
| + if islink(src) and not exists(src):
|
| + if verbose:
|
| + print 'remove dead link', src
|
| + remove(src)
|
| +
|
|
|