| Index: content/renderer/hyphenator/hyphenator.cc
|
| diff --git a/content/renderer/hyphenator/hyphenator.cc b/content/renderer/hyphenator/hyphenator.cc
|
| deleted file mode 100644
|
| index b94ba3cc6981a6aa227b1b6a7122c9ed34f9c860..0000000000000000000000000000000000000000
|
| --- a/content/renderer/hyphenator/hyphenator.cc
|
| +++ /dev/null
|
| @@ -1,270 +0,0 @@
|
| -// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "content/renderer/hyphenator/hyphenator.h"
|
| -
|
| -#include "base/files/memory_mapped_file.h"
|
| -#include "base/logging.h"
|
| -#include "base/memory/scoped_ptr.h"
|
| -#include "base/strings/string_util.h"
|
| -#include "base/strings/utf_string_conversions.h"
|
| -#include "content/common/hyphenator_messages.h"
|
| -#include "content/public/renderer/render_thread.h"
|
| -#include "third_party/hyphen/hyphen.h"
|
| -#include "third_party/icu/source/common/unicode/uscript.h"
|
| -
|
| -namespace {
|
| -
|
| -// A class that converts a sequence of UTF-8 characters to UTF-16 ones and holds
|
| -// only the length of converted UTF-16 characters. This class is used for
|
| -// creating a mapping from the position of a UTF-8 string to a position of a
|
| -// UTF-16 string without unnecessary conversions. Even though the following
|
| -// snippet produces the same mapping, it needs to convert same characters many
|
| -// times. This class incrementally counts the number of converted UTF-16
|
| -// characters to avoid this problem.
|
| -//
|
| -// scoped_ptr<size_t[]> position(new size_t[text.length()]);
|
| -// for (size_t i = 0; i < text.length(); ++i)
|
| -// position[i] = UTF8ToUTF16(text.substr(0, i)).length();
|
| -//
|
| -class UTF16TextLength {
|
| - public:
|
| - UTF16TextLength();
|
| - ~UTF16TextLength();
|
| -
|
| - // Returns the current position.
|
| - int utf16_length() const { return utf16_length_; }
|
| -
|
| - // Appends one UTF-8 character to this converter and advances the converted
|
| - // position. This converter increases the position by one when it finishes
|
| - // reading a BMP character and increases by two when it finish reading a
|
| - // non-BMP character.
|
| - void Append(char c);
|
| -
|
| - private:
|
| - // The length of the converted UTF-16 text.
|
| - int utf16_length_;
|
| -
|
| - // The buffer that stores UTF-8 characters being converted.
|
| - std::string utf8_text_;
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(UTF16TextLength);
|
| -};
|
| -
|
| -UTF16TextLength::UTF16TextLength()
|
| - : utf16_length_(0) {
|
| -}
|
| -
|
| -UTF16TextLength::~UTF16TextLength() {
|
| -}
|
| -
|
| -void UTF16TextLength::Append(char c) {
|
| - // Append the given character and try converting the UTF-8 characters in this
|
| - // buffer to Unicode codepoints. If this buffer includes a Unicode codepoint,
|
| - // get the number of UTF-16 characters representing this codepoint and advance
|
| - // the position.
|
| - int code = 0;
|
| - int index = 0;
|
| - utf8_text_.push_back(c);
|
| - U8_NEXT(utf8_text_.data(), index, static_cast<int>(utf8_text_.length()),
|
| - code);
|
| - if (code != U_SENTINEL) {
|
| - utf8_text_.clear();
|
| - utf16_length_ += U16_LENGTH(code);
|
| - }
|
| -}
|
| -
|
| -// A class that encapsulates a hyphenation query. This class owns resources
|
| -// temporarily needed for hyphenating one word, and deletes them when it is
|
| -// deleted as listed in the following snippet.
|
| -//
|
| -// std::vector<int> hyphens;
|
| -// QUery query(UTF8ToUTF16("hyphenate"));
|
| -// query.Hyphenate(dict, &hyphens);
|
| -//
|
| -class Query {
|
| - public:
|
| - explicit Query(const string16& word);
|
| - ~Query();
|
| -
|
| - // Hyphenates a word with the specified dictionary. This function hyphenates
|
| - // the word provided to its constructor and returns a list of hyphenation
|
| - // points, positions where we can insert hyphens.
|
| - bool Hyphenate(HyphenDict* dictionary, std::vector<int>* hyphen_offsets);
|
| -
|
| - private:
|
| - // A word to be hyphenated.
|
| - std::string word_utf8_;
|
| -
|
| - // Return variables from the hyphen library.
|
| - scoped_ptr<char[]> hyphen_vector_;
|
| - char** rep_;
|
| - int* pos_;
|
| - int* cut_;
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(Query);
|
| -};
|
| -
|
| -Query::Query(const string16& word)
|
| - : rep_(NULL),
|
| - pos_(NULL),
|
| - cut_(NULL) {
|
| - // Remove trailing punctuation characters. WebKit does not remove these
|
| - // characters when it hyphenates a word. These characters prevent the hyphen
|
| - // library from applying some rules, i.e. they prevent the library from adding
|
| - // hyphens.
|
| - DCHECK(!word.empty());
|
| - const char16* data = word.data();
|
| - int length = static_cast<int>(word.length());
|
| - while (length > 0) {
|
| - int previous = length;
|
| - int code = 0;
|
| - U16_PREV(data, 0, previous, code);
|
| - UErrorCode error = U_ZERO_ERROR;
|
| - if (uscript_getScript(code, &error) != USCRIPT_COMMON)
|
| - break;
|
| - length = previous;
|
| - }
|
| - UTF16ToUTF8(word.c_str(), length, &word_utf8_);
|
| - // Create a hyphen vector used by hnj_hyphen_hyphenate2(). We allocate a
|
| - // buffer of |word_.length()| + 5 as written in Line 112 of
|
| - // <http://cs.chromium.org/src/third_party/hyphen/hyphen.h>.
|
| - hyphen_vector_.reset(new char[word_utf8_.length() + 5]);
|
| -}
|
| -
|
| -Query::~Query() {
|
| - if (rep_) {
|
| - for (size_t i = 0; i < word_utf8_.length(); ++i) {
|
| - if (rep_[i])
|
| - free(rep_[i]);
|
| - }
|
| - free(rep_);
|
| - }
|
| - if (pos_)
|
| - free(pos_);
|
| - if (cut_)
|
| - free(cut_);
|
| -}
|
| -
|
| -bool Query::Hyphenate(HyphenDict* dictionary,
|
| - std::vector<int>* hyphen_offsets) {
|
| - DCHECK(dictionary);
|
| - DCHECK(hyphen_offsets);
|
| -
|
| - int error_code = hnj_hyphen_hyphenate2(dictionary,
|
| - word_utf8_.data(),
|
| - static_cast<int>(word_utf8_.length()),
|
| - hyphen_vector_.get(),
|
| - NULL,
|
| - &rep_,
|
| - &pos_,
|
| - &cut_);
|
| - if (error_code)
|
| - return false;
|
| -
|
| - // WebKit needs hyphenation points counted in UTF-16 characters. On the other
|
| - // hand, the hyphen library returns hyphenation points counted in UTF-8
|
| - // characters. We increamentally convert hyphenation points in UTF-8
|
| - // characters to hyphenation points in UTF-16 characters and write the
|
| - // converted hyphenation points to the output vector.
|
| - UTF16TextLength text_length;
|
| - hyphen_offsets->clear();
|
| - for (size_t i = 0; i < word_utf8_.length(); ++i) {
|
| - text_length.Append(word_utf8_[i]);
|
| - if (hyphen_vector_[i] & 1)
|
| - hyphen_offsets->push_back(text_length.utf16_length());
|
| - }
|
| - return !hyphen_offsets->empty();
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| -namespace content {
|
| -
|
| -Hyphenator::Hyphenator(base::PlatformFile file)
|
| - : dictionary_(NULL),
|
| - dictionary_file_(base::FdopenPlatformFile(file, "r")),
|
| - result_(0) {
|
| -}
|
| -
|
| -Hyphenator::~Hyphenator() {
|
| - if (dictionary_)
|
| - hnj_hyphen_free(dictionary_);
|
| -}
|
| -
|
| -bool Hyphenator::Initialize() {
|
| - if (dictionary_)
|
| - return true;
|
| -
|
| - if (!dictionary_file_.get())
|
| - return false;
|
| - dictionary_ = hnj_hyphen_load_file(dictionary_file_.get());
|
| - return !!dictionary_;
|
| -}
|
| -
|
| -bool Hyphenator::Attach(RenderThread* thread, const string16& locale) {
|
| - if (!thread)
|
| - return false;
|
| - locale_.assign(locale);
|
| - thread->AddObserver(this);
|
| - return thread->Send(new HyphenatorHostMsg_OpenDictionary(locale));
|
| -}
|
| -
|
| -bool Hyphenator::CanHyphenate(const string16& locale) {
|
| - return !locale_.compare(locale);
|
| -}
|
| -
|
| -size_t Hyphenator::ComputeLastHyphenLocation(const string16& word,
|
| - size_t before_index) {
|
| - if (!Initialize() || word.empty())
|
| - return 0;
|
| -
|
| - // Call the hyphen library to get all hyphenation points, i.e. positions where
|
| - // we can insert hyphens. When WebKit finds a line-break, it calls this
|
| - // function twice or more with the same word to find the best hyphenation
|
| - // point. To avoid calling the hyphen library twice or more with the same
|
| - // word, we cache the last query.
|
| - if (word_ != word) {
|
| - word_ = word;
|
| - Query query(word);
|
| - result_ = query.Hyphenate(dictionary_, &hyphen_offsets_);
|
| - }
|
| - if (!result_)
|
| - return 0;
|
| - for (std::vector<int>::reverse_iterator it = hyphen_offsets_.rbegin();
|
| - it != hyphen_offsets_.rend(); ++it) {
|
| - if (static_cast<size_t>(*it) < before_index)
|
| - return *it;
|
| - }
|
| - return 0;
|
| -}
|
| -
|
| -bool Hyphenator::OnControlMessageReceived(const IPC::Message& message) {
|
| - bool handled = true;
|
| - IPC_BEGIN_MESSAGE_MAP(Hyphenator, message)
|
| - IPC_MESSAGE_HANDLER(HyphenatorMsg_SetDictionary, OnSetDictionary)
|
| - IPC_MESSAGE_UNHANDLED(handled = false)
|
| - IPC_END_MESSAGE_MAP()
|
| - return handled;
|
| -}
|
| -
|
| -void Hyphenator::OnSetDictionary(IPC::PlatformFileForTransit file) {
|
| - base::PlatformFile rule_file =
|
| - IPC::PlatformFileForTransitToPlatformFile(file);
|
| - if (rule_file == base::kInvalidPlatformFileValue)
|
| - return;
|
| - // Delete the current dictionary and save the given file to this object. We
|
| - // initialize the hyphen library the first time when WebKit actually
|
| - // hyphenates a word, i.e. when WebKit calls the ComputeLastHyphenLocation
|
| - // function. (WebKit does not always hyphenate words even when it calls the
|
| - // CanHyphenate function, e.g. WebKit does not have to hyphenate words when it
|
| - // does not have to break text into lines.)
|
| - if (dictionary_) {
|
| - hnj_hyphen_free(dictionary_);
|
| - dictionary_ = NULL;
|
| - }
|
| - dictionary_file_.Set(base::FdopenPlatformFile(rule_file, "r"));
|
| -}
|
| -
|
| -} // namespace content
|
|
|