Chromium Code Reviews| Index: chrome/browser/metrics/metrics_service.cc |
| diff --git a/chrome/browser/metrics/metrics_service.cc b/chrome/browser/metrics/metrics_service.cc |
| index fef349d7b40345cacbbd1b1211308e31cdb039f1..7da0645e910daea91119553543a78ff2c10ffea4 100644 |
| --- a/chrome/browser/metrics/metrics_service.cc |
| +++ b/chrome/browser/metrics/metrics_service.cc |
| @@ -239,11 +239,6 @@ const size_t kUploadLogAvoidRetransmitSize = 50000; |
| // Interval, in minutes, between state saves. |
| const int kSaveStateIntervalMinutes = 5; |
| -// Used to indicate that the response code is currently not set at all -- |
| -// RESPONSE_CODE_INVALID can sometimes be returned in response to a request if, |
| -// e.g., the server is down. |
| -const int kNoResponseCode = net::URLFetcher::RESPONSE_CODE_INVALID - 1; |
| - |
| enum ResponseStatus { |
| UNKNOWN_FAILURE, |
| SUCCESS, |
| @@ -1017,8 +1012,12 @@ void MetricsService::PushPendingLogsToPersistentStorage() { |
| // We may race here, and send second copy of initial log later. |
| if (state_ == INITIAL_LOG_READY) |
| state_ = SENDING_OLD_LOGS; |
| - MetricsLogManager::StoreType store_type = current_fetch_xml_.get() ? |
| - MetricsLogManager::PROVISIONAL_STORE : MetricsLogManager::NORMAL_STORE; |
| + |
| + MetricsLogManager::StoreType store_type; |
| + if (current_fetch_xml_.get() || current_fetch_proto_.get()) |
| + store_type = MetricsLogManager::PROVISIONAL_STORE; |
| + else |
| + store_type = MetricsLogManager::NORMAL_STORE; |
| log_manager_.StoreStagedLogAsUnsent(store_type); |
| } |
| DCHECK(!log_manager_.has_staged_log()); |
| @@ -1121,14 +1120,6 @@ void MetricsService::OnFinalLogInfoCollectionDone() { |
| } |
| MakeStagedLog(); |
| - |
| - // MakeStagedLog should have prepared log text; if it didn't, skip this |
| - // upload and hope things work out next time. |
| - if (log_manager_.staged_log_text().empty()) { |
| - scheduler_->UploadCancelled(); |
| - return; |
| - } |
| - |
| SendStagedLog(); |
| } |
| @@ -1139,7 +1130,7 @@ void MetricsService::MakeStagedLog() { |
| switch (state_) { |
| case INITIALIZED: |
| case INIT_TASK_SCHEDULED: // We should be further along by now. |
| - DCHECK(false); |
| + NOTREACHED(); |
| return; |
| case INIT_TASK_DONE: |
| @@ -1207,26 +1198,21 @@ void MetricsService::SendStagedLog() { |
| PrepareFetchWithStagedLog(); |
| - if (!current_fetch_xml_.get()) { |
| - DCHECK(!current_fetch_proto_.get()); |
| + if (!current_fetch_xml_.get() && !current_fetch_proto_.get()) { |
| // Compression failed, and log discarded :-/. |
| + // Skip this upload and hope things work out next time. |
| log_manager_.DiscardStagedLog(); |
| scheduler_->UploadCancelled(); |
| // TODO(jar): If compression failed, we should have created a tiny log and |
| // compressed that, so that we can signal that we're losing logs. |
| return; |
| } |
| - // Currently, the staged log for the protobuf version of the data is discarded |
| - // after we create the URL request, so that there is no chance for |
| - // re-transmission in case the corresponding XML request fails. We will |
| - // handle protobuf failures more carefully once that becomes the main |
| - // pipeline, i.e. once we switch away from the XML pipeline. |
| - DCHECK(current_fetch_proto_.get() || !log_manager_.has_staged_log_proto()); |
| DCHECK(!waiting_for_asynchronous_reporting_step_); |
| - |
| waiting_for_asynchronous_reporting_step_ = true; |
| - current_fetch_xml_->Start(); |
| + |
| + if (current_fetch_xml_.get()) |
| + current_fetch_xml_->Start(); |
| if (current_fetch_proto_.get()) |
| current_fetch_proto_->Start(); |
| @@ -1234,20 +1220,22 @@ void MetricsService::SendStagedLog() { |
| } |
| void MetricsService::PrepareFetchWithStagedLog() { |
| - DCHECK(!log_manager_.staged_log_text().empty()); |
| + DCHECK(log_manager_.has_staged_log()); |
| // Prepare the XML version. |
| DCHECK(!current_fetch_xml_.get()); |
| - current_fetch_xml_.reset(content::URLFetcher::Create( |
| - GURL(server_url_xml_), net::URLFetcher::POST, this)); |
| - current_fetch_xml_->SetRequestContext( |
| - g_browser_process->system_request_context()); |
| - current_fetch_xml_->SetUploadData(kMimeTypeXml, |
| - log_manager_.staged_log_text().xml); |
| - // We already drop cookies server-side, but we might as well strip them out |
| - // client-side as well. |
| - current_fetch_xml_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES | |
| - net::LOAD_DO_NOT_SEND_COOKIES); |
| + if (log_manager_.has_staged_log_xml()) { |
| + current_fetch_xml_.reset(content::URLFetcher::Create( |
| + GURL(server_url_xml_), net::URLFetcher::POST, this)); |
| + current_fetch_xml_->SetRequestContext( |
| + g_browser_process->system_request_context()); |
| + current_fetch_xml_->SetUploadData(kMimeTypeXml, |
| + log_manager_.staged_log_text().xml); |
| + // We already drop cookies server-side, but we might as well strip them out |
| + // client-side as well. |
| + current_fetch_xml_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES | |
| + net::LOAD_DO_NOT_SEND_COOKIES); |
| + } |
| // Prepare the protobuf version. |
| DCHECK(!current_fetch_proto_.get()); |
| @@ -1262,12 +1250,6 @@ void MetricsService::PrepareFetchWithStagedLog() { |
| // client-side as well. |
| current_fetch_proto_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES | |
| net::LOAD_DO_NOT_SEND_COOKIES); |
| - |
| - // Discard the protobuf version of the staged log, so that we will avoid |
| - // re-uploading it even if we need to re-upload the XML version. |
| - // TODO(isherman): Handle protobuf upload failures more gracefully once we |
| - // transition away from the XML-based pipeline. |
| - log_manager_.DiscardStagedLogProto(); |
| } |
| } |
| @@ -1295,22 +1277,9 @@ static const char* StatusToString(const net::URLRequestStatus& status) { |
| } |
| // We need to wait for two responses: the response to the XML upload, and the |
| -// response to the protobuf upload. For now, only the XML upload's response |
| -// affects decisions like whether to retry the upload, whether to abandon the |
| -// upload because it is too large, etc. However, we still need to wait for the |
| -// protobuf upload, as we cannot reset |current_fetch_proto_| until we have |
| -// confirmation that the network request was sent; and the easiest way to do |
| -// that is to wait for the response. In case the XML upload's response arrives |
| -// first, we cache that response until the protobuf upload's response also |
| -// arrives. |
| -// |
| -// Note that if the XML upload succeeds but the protobuf upload fails, we will |
| -// not retry the protobuf upload. If the XML upload fails while the protobuf |
| -// upload succeeds, we will still avoid re-uploading the protobuf data because |
| -// we "zap" the data after the first upload attempt. This means that we might |
| -// lose protobuf uploads when XML ones succeed; but we will never duplicate any |
| -// protobuf uploads. Protobuf failures should be rare enough to where this |
| -// should be ok while we have the two pipelines running in parallel. |
| +// response to the protobuf upload. In the case that exactly one of the uploads |
| +// fails and needs to be retried, we "zap" the other pipeline's staged log to |
| +// avoid incorrectly re-uploading it as well. |
| void MetricsService::OnURLFetchComplete(const net::URLFetcher* source) { |
| DCHECK(waiting_for_asynchronous_reporting_step_); |
| @@ -1318,72 +1287,76 @@ void MetricsService::OnURLFetchComplete(const net::URLFetcher* source) { |
| // Note however that |source| is aliased to one of these, so we should be |
| // careful not to delete it too early. |
| scoped_ptr<net::URLFetcher> s; |
| + bool is_xml; |
| if (source == current_fetch_xml_.get()) { |
| s.reset(current_fetch_xml_.release()); |
| - |
| - // Cache the XML responses, in case we still need to wait for the protobuf |
| - // response. |
| - response_code_ = source->GetResponseCode(); |
| - response_status_ = StatusToString(source->GetStatus()); |
| - source->GetResponseAsString(&response_data_); |
| - |
| - // Log a histogram to track response success vs. failure rates. |
| - UMA_HISTOGRAM_ENUMERATION("UMA.UploadResponseStatus.XML", |
| - ResponseCodeToStatus(response_code_), |
| - NUM_RESPONSE_STATUSES); |
| + is_xml = true; |
| } else if (source == current_fetch_proto_.get()) { |
| s.reset(current_fetch_proto_.release()); |
| - |
| - // Log a histogram to track response success vs. failure rates. |
| - UMA_HISTOGRAM_ENUMERATION("UMA.UploadResponseStatus.Protobuf", |
| - ResponseCodeToStatus(source->GetResponseCode()), |
| - NUM_RESPONSE_STATUSES); |
| + is_xml = false; |
| } else { |
| NOTREACHED(); |
| return; |
| } |
| - // If we're still waiting for one of the responses, keep waiting... |
| - if (current_fetch_xml_.get() || current_fetch_proto_.get()) |
| - return; |
| + int response_code = source->GetResponseCode(); |
| - // We should only be able to reach here once we've received responses to both |
| - // the XML and the protobuf requests. We should always have the response code |
| - // available. |
| - DCHECK_NE(response_code_, kNoResponseCode); |
| - waiting_for_asynchronous_reporting_step_ = false; |
| + // Log a histogram to track response success vs. failure rates. |
| + if (is_xml) { |
| + UMA_HISTOGRAM_ENUMERATION("UMA.UploadResponseStatus.XML", |
| + ResponseCodeToStatus(response_code), |
| + NUM_RESPONSE_STATUSES); |
| + } else { |
| + UMA_HISTOGRAM_ENUMERATION("UMA.UploadResponseStatus.Protobuf", |
| + ResponseCodeToStatus(response_code), |
| + NUM_RESPONSE_STATUSES); |
| + } |
| // If the upload was provisionally stored, drop it now that the upload is |
| // known to have gone through. |
| log_manager_.DiscardLastProvisionalStore(); |
| // Confirm send so that we can move on. |
| - VLOG(1) << "Metrics response code: " << response_code_ |
| - << " status=" << response_status_; |
| + VLOG(1) << "Metrics response code: " << response_code |
|
jar (doing other things)
2012/06/11 21:19:35
nit: I think there was a move to reduce the verbos
|
| + << " status=" << StatusToString(source->GetStatus()) << " for " |
| + << (is_xml ? "xml" : "protobuf") << " upload."; |
| - bool upload_succeeded = response_code_ == 200; |
| + bool upload_succeeded = response_code == 200; |
| // Provide boolean for error recovery (allow us to ignore response_code). |
| bool discard_log = false; |
| - |
| - if (!upload_succeeded && |
| - log_manager_.staged_log_text().xml.length() > |
| - kUploadLogAvoidRetransmitSize) { |
| - UMA_HISTOGRAM_COUNTS( |
| - "UMA.Large Rejected Log was Discarded", |
| - static_cast<int>(log_manager_.staged_log_text().xml.length())); |
| + const size_t log_size = is_xml ? |
| + log_manager_.staged_log_text().xml.length() : |
| + log_manager_.staged_log_text().proto.length(); |
| + if (!upload_succeeded && log_size > kUploadLogAvoidRetransmitSize) { |
| + UMA_HISTOGRAM_COUNTS("UMA.Large Rejected Log was Discarded", |
| + static_cast<int>(log_size)); |
| discard_log = true; |
| - } else if (response_code_ == 400) { |
| + } else if (response_code == 400) { |
| // Bad syntax. Retransmission won't work. |
| discard_log = true; |
| } |
| if (!upload_succeeded && !discard_log) { |
| - VLOG(1) << "Metrics: transmission attempt returned a failure code: " |
| - << response_code_ << ". Verify network connectivity"; |
| - LogBadResponseCode(); |
| + LogBadResponseCode(response_code, is_xml); |
| } else { // Successful receipt (or we are discarding log). |
| - VLOG(1) << "Metrics response data: " << response_data_; |
| + std::string response_data; |
| + source->GetResponseAsString(&response_data); |
| + VLOG(1) << "Metrics response data: " << response_data; |
|
jar (doing other things)
2012/06/11 21:19:35
nit: can/should this be debug-only logging?
|
| + |
| + if (is_xml) |
| + log_manager_.DiscardStagedLogXml(); |
| + else |
| + log_manager_.DiscardStagedLogProto(); |
| + } |
| + |
| + // If we're still waiting for one of the responses, keep waiting... |
| + if (current_fetch_xml_.get() || current_fetch_proto_.get()) |
| + return; |
| + |
| + waiting_for_asynchronous_reporting_step_ = false; |
| + |
| + if (!log_manager_.has_staged_log()) { |
| switch (state_) { |
| case INITIAL_LOG_READY: |
| state_ = SENDING_OLD_LOGS; |
| @@ -1402,18 +1375,23 @@ void MetricsService::OnURLFetchComplete(const net::URLFetcher* source) { |
| break; |
| } |
| - log_manager_.DiscardStagedLog(); |
| - |
| if (log_manager_.has_unsent_logs()) |
| DCHECK_LT(state_, SENDING_CURRENT_LOGS); |
| } |
| // Error 400 indicates a problem with the log, not with the server, so |
| // don't consider that a sign that the server is in trouble. |
| - bool server_is_healthy = upload_succeeded || response_code_ == 400; |
| - |
| - scheduler_->UploadFinished(server_is_healthy, |
| - log_manager_.has_unsent_logs()); |
| + bool server_is_healthy = upload_succeeded || response_code == 400; |
| + |
| + // Note that we are essentially randomly choosing to report either the XML or |
| + // the protobuf server's health status, but this is ok: In the case that the |
| + // two statuses are not the same, one of the uploads succeeded but the other |
| + // did not. In this case, we'll re-try only the upload that failed. This first |
| + // re-try might ignore the server's unhealthiness; but the response to the |
| + // re-tried upload will correctly propagate the server's health status for any |
| + // subsequent re-tries. Hence, we'll at most delay slowing the upload rate by |
| + // one re-try, which is fine. |
| + scheduler_->UploadFinished(server_is_healthy, log_manager_.has_unsent_logs()); |
| // Collect network stats if UMA upload succeeded. |
| IOThread* io_thread = g_browser_process->io_thread(); |
| @@ -1422,21 +1400,25 @@ void MetricsService::OnURLFetchComplete(const net::URLFetcher* source) { |
| chrome_browser_net::CollectPipeliningCapabilityStatsOnUIThread( |
| http_pipelining_test_server_, io_thread); |
| } |
| - |
| - // Reset the cached response data. |
| - response_code_ = kNoResponseCode; |
| - response_data_ = std::string(); |
| - response_status_ = std::string(); |
| } |
| -void MetricsService::LogBadResponseCode() { |
| - VLOG(1) << "Verify your metrics logs are formatted correctly. Verify server " |
| - "is active at " << server_url_xml_; |
| +void MetricsService::LogBadResponseCode(int response_code, bool is_xml) { |
| + VLOG(1) << "Metrics: transmission attempt returned a failure code: " |
|
jar (doing other things)
2012/06/11 21:19:35
nit: debug only? Will folks ever notice this in o
|
| + << response_code << ". Verify network connectivity and that your " |
| + << "metrics logs are formatted correctly. Verify server is active at " |
| + << (is_xml ? server_url_xml_ : server_url_proto_) << "."; |
| + |
| if (!log_manager_.has_staged_log()) { |
| - VLOG(1) << "METRICS: Recorder shutdown during log transmission."; |
| + // TODO(isherman): I don't think this code is reachable... |
| + VLOG(1) << "Metrics: Recorder shutdown during log transmission."; |
| } else { |
| - VLOG(1) << "METRICS: transmission retry being scheduled for " |
| - << log_manager_.staged_log_text().xml; |
| + if (is_xml) { |
| + VLOG(1) << "Metrics: transmission retry being scheduled for \n" |
| + << log_manager_.staged_log_text().xml; |
| + } else { |
| + VLOG(1) |
| + << "Metrics: transmission retry being scheduled for protobuf upload."; |
|
jar (doing other things)
2012/06/11 21:19:35
nit: vlogs throughout.
|
| + } |
| } |
| } |