OLD | NEW |
(Empty) | |
| 1 # Copyright 2012 Google Inc. All Rights Reserved. |
| 2 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at |
| 6 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # |
| 9 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. |
| 14 |
| 15 """Contains the perfdiag gsutil command.""" |
| 16 |
| 17 import calendar |
| 18 from collections import defaultdict |
| 19 import contextlib |
| 20 import datetime |
| 21 import json |
| 22 import math |
| 23 import multiprocessing |
| 24 import os |
| 25 import re |
| 26 import socket |
| 27 import string |
| 28 import subprocess |
| 29 import tempfile |
| 30 import time |
| 31 |
| 32 import boto.gs.connection |
| 33 |
| 34 from gslib.command import Command |
| 35 from gslib.command import COMMAND_NAME |
| 36 from gslib.command import COMMAND_NAME_ALIASES |
| 37 from gslib.command import CONFIG_REQUIRED |
| 38 from gslib.command import FILE_URIS_OK |
| 39 from gslib.command import MAX_ARGS |
| 40 from gslib.command import MIN_ARGS |
| 41 from gslib.command import PROVIDER_URIS_OK |
| 42 from gslib.command import SUPPORTED_SUB_ARGS |
| 43 from gslib.command import URIS_START_ARG |
| 44 from gslib.commands import config |
| 45 from gslib.exception import CommandException |
| 46 from gslib.help_provider import HELP_NAME |
| 47 from gslib.help_provider import HELP_NAME_ALIASES |
| 48 from gslib.help_provider import HELP_ONE_LINE_SUMMARY |
| 49 from gslib.help_provider import HELP_TEXT |
| 50 from gslib.help_provider import HELP_TYPE |
| 51 from gslib.help_provider import HelpType |
| 52 from gslib.util import IS_LINUX |
| 53 from gslib.util import MakeBitsHumanReadable |
| 54 from gslib.util import MakeHumanReadable |
| 55 from gslib.util import Percentile |
| 56 |
| 57 _detailed_help_text = (""" |
| 58 <B>SYNOPSIS</B> |
| 59 gsutil perfdiag [-i in.json] [-o out.json] |
| 60 [-n iterations] [-c concurrency] [-s size] [-t tests] uri... |
| 61 |
| 62 |
| 63 <B>DESCRIPTION</B> |
| 64 The perfdiag command runs a suite of diagnostic tests for a given Google |
| 65 Storage bucket. |
| 66 |
| 67 The 'uri' parameter must name an existing bucket (e.g. gs://foo) to which |
| 68 the user has write permission. Several test files will be uploaded to and |
| 69 downloaded from this bucket. All test files will be deleted at the completion |
| 70 of the diagnostic if it finishes successfully. |
| 71 |
| 72 gsutil performance can be impacted by many factors at the client, server, |
| 73 and in-between, such as: CPU speed; available memory; the access path to the |
| 74 local disk; network bandwidth; contention and error rates along the path |
| 75 between gsutil and Google; operating system buffering configuration; and |
| 76 firewalls and other network elements. The perfdiag command is provided so |
| 77 that customers can run a known measurement suite when troubleshooting |
| 78 performance problems. |
| 79 |
| 80 |
| 81 <B>PROVIDING DIAGNOSTIC OUTPUT TO GOOGLE CLOUD STORAGE TEAM</B> |
| 82 If the Google Cloud Storage Team asks you to run a performance diagnostic |
| 83 please use the following command, and email the output file (output.json) |
| 84 to gs-team@google.com: |
| 85 |
| 86 gsutil perfdiag -o output.json gs://your-bucket |
| 87 |
| 88 |
| 89 <B>OPTIONS</B> |
| 90 -n Sets the number of iterations performed when downloading and |
| 91 uploading files during latency and throughput tests. Defaults to |
| 92 5. |
| 93 |
| 94 -c Sets the level of concurrency to use while running throughput |
| 95 experiments. The default value of 1 will only run a single read |
| 96 or write operation concurrently. |
| 97 |
| 98 -s Sets the size (in bytes) of the test file used to perform read |
| 99 and write throughput tests. The default is 1 MiB. |
| 100 |
| 101 -t Sets the list of diagnostic tests to perform. The default is to |
| 102 run all diagnostic tests. Must be a comma-separated list |
| 103 containing one or more of the following: |
| 104 |
| 105 lat: Runs N iterations (set with -n) of writing the file, |
| 106 retrieving its metadata, reading the file, and deleting |
| 107 the file. Records the latency of each operation. |
| 108 |
| 109 rthru: Runs N (set with -n) read operations, with at most C |
| 110 (set with -c) reads outstanding at any given time. |
| 111 |
| 112 wthru: Runs N (set with -n) write operations, with at most C |
| 113 (set with -c) writes outstanding at any given time. |
| 114 |
| 115 -o Writes the results of the diagnostic to an output file. The output |
| 116 is a JSON file containing system information and performance |
| 117 diagnostic results. The file can be read and reported later using |
| 118 the -i option. |
| 119 |
| 120 -i Reads the JSON output file created using the -o command and prints |
| 121 a formatted description of the results. |
| 122 |
| 123 |
| 124 <B>NOTE</B> |
| 125 The perfdiag command collects system information. It collects your IP address, |
| 126 executes DNS queries to Google servers and collects the results, and collects |
| 127 network statistics information from the output of netstat -s. None of this |
| 128 information will be sent to Google unless you choose to send it. |
| 129 """) |
| 130 |
| 131 |
| 132 class PerfDiagCommand(Command): |
| 133 """Implementation of gsutil perfdiag command.""" |
| 134 |
| 135 # Command specification (processed by parent class). |
| 136 command_spec = { |
| 137 # Name of command. |
| 138 COMMAND_NAME: 'perfdiag', |
| 139 # List of command name aliases. |
| 140 COMMAND_NAME_ALIASES: ['diag', 'diagnostic', 'perf', 'performance'], |
| 141 # Min number of args required by this command. |
| 142 MIN_ARGS: 0, |
| 143 # Max number of args required by this command, or NO_MAX. |
| 144 MAX_ARGS: 1, |
| 145 # Getopt-style string specifying acceptable sub args. |
| 146 SUPPORTED_SUB_ARGS: 'n:c:s:t:i:o:', |
| 147 # True if file URIs acceptable for this command. |
| 148 FILE_URIS_OK: False, |
| 149 # True if provider-only URIs acceptable for this command. |
| 150 PROVIDER_URIS_OK: False, |
| 151 # Index in args of first URI arg. |
| 152 URIS_START_ARG: 0, |
| 153 # True if must configure gsutil before running command. |
| 154 CONFIG_REQUIRED: True, |
| 155 } |
| 156 help_spec = { |
| 157 # Name of command or auxiliary help info for which this help applies. |
| 158 HELP_NAME: 'perfdiag', |
| 159 # List of help name aliases. |
| 160 HELP_NAME_ALIASES: [], |
| 161 # Type of help: |
| 162 HELP_TYPE: HelpType.COMMAND_HELP, |
| 163 # One line summary of this help. |
| 164 HELP_ONE_LINE_SUMMARY: 'Run performance diagnostic', |
| 165 # The full help text. |
| 166 HELP_TEXT: _detailed_help_text, |
| 167 } |
| 168 |
| 169 # Byte sizes to use for testing files. |
| 170 # TODO: Consider letting the user specify these sizes with a configuration |
| 171 # parameter. |
| 172 test_file_sizes = ( |
| 173 0, # 0 bytes |
| 174 1024, # 1 KB |
| 175 102400, # 100 KB |
| 176 1048576, # 1MB |
| 177 ) |
| 178 |
| 179 # List of all diagnostic tests. |
| 180 ALL_DIAG_TESTS = ('rthru', 'wthru', 'lat') |
| 181 |
| 182 # Google Cloud Storage API endpoint host. |
| 183 GOOGLE_API_HOST = boto.gs.connection.GSConnection.DefaultHost |
| 184 |
| 185 def _WindowedExec(self, cmd, n, w, raise_on_error=True): |
| 186 """Executes a command n times with a window size of w. |
| 187 |
| 188 Up to w instances of the command will be executed and left outstanding at a |
| 189 time until n instances of the command have completed. |
| 190 |
| 191 Args: |
| 192 cmd: List containing the command to execute. |
| 193 n: Number of times the command will be executed. |
| 194 w: Window size of outstanding commands being executed. |
| 195 raise_on_error: See _Exec. |
| 196 |
| 197 Raises: |
| 198 Exception: If raise_on_error is set to True and any process exits with a |
| 199 non-zero return code. |
| 200 """ |
| 201 if self.debug: |
| 202 print 'Running command:', cmd |
| 203 devnull_f = open(os.devnull, 'w') |
| 204 num_finished = 0 |
| 205 running = [] |
| 206 while len(running) or num_finished < n: |
| 207 # Fires off new commands that can be executed. |
| 208 while len(running) < w and num_finished + len(running) < n: |
| 209 print 'Starting concurrent command: %s' % (' '.join(cmd)) |
| 210 p = subprocess.Popen(cmd, stdout=devnull_f, stderr=devnull_f) |
| 211 running.append(p) |
| 212 |
| 213 # Checks for finished commands. |
| 214 prev_running = running |
| 215 running = [] |
| 216 for p in prev_running: |
| 217 retcode = p.poll() |
| 218 if retcode is None: |
| 219 running.append(p) |
| 220 elif raise_on_error and retcode: |
| 221 raise CommandException("Received non-zero return code (%d) from " |
| 222 "subprocess '%s'." % (retcode, ' '.join(cmd))) |
| 223 else: |
| 224 num_finished += 1 |
| 225 |
| 226 def _Exec(self, cmd, raise_on_error=True, return_output=False, |
| 227 mute_stderr=False): |
| 228 """Executes a command in a subprocess. |
| 229 |
| 230 Args: |
| 231 cmd: List containing the command to execute. |
| 232 raise_on_error: Whether or not to raise an exception when a process exits |
| 233 with a non-zero return code. |
| 234 return_output: If set to True, the return value of the function is the |
| 235 stdout of the process. |
| 236 mute_stderr: If set to True, the stderr of the process is not printed to |
| 237 the console. |
| 238 |
| 239 Returns: |
| 240 The return code of the process or the stdout if return_output is set. |
| 241 |
| 242 Raises: |
| 243 Exception: If raise_on_error is set to True and any process exits with a |
| 244 non-zero return code. |
| 245 """ |
| 246 if self.debug: |
| 247 print 'Running command:', cmd |
| 248 stderr = subprocess.PIPE if mute_stderr else None |
| 249 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=stderr) |
| 250 (stdoutdata, stderrdata) = p.communicate() |
| 251 if raise_on_error and p.returncode: |
| 252 raise CommandException("Received non-zero return code (%d) from " |
| 253 "subprocess '%s'." % (p.returncode, ' '.join(cmd))) |
| 254 return stdoutdata if return_output else p.returncode |
| 255 |
| 256 def _GsUtil(self, cmd, raise_on_error=True, return_output=False, |
| 257 mute_stderr=False): |
| 258 """Executes a gsutil command in a subprocess. |
| 259 |
| 260 Args: |
| 261 cmd: A list containing the arguments to the gsutil program, e.g. ['ls', |
| 262 'gs://foo']. |
| 263 raise_on_error: see _Exec. |
| 264 return_output: see _Exec. |
| 265 mute_stderr: see _Exec. |
| 266 |
| 267 Returns: |
| 268 The return code of the process or the stdout if return_output is set. |
| 269 """ |
| 270 cmd = self.gsutil_exec_list + cmd |
| 271 return self._Exec(cmd, raise_on_error=raise_on_error, |
| 272 return_output=return_output, mute_stderr=mute_stderr) |
| 273 |
| 274 def _SetUp(self): |
| 275 """Performs setup operations needed before diagnostics can be run.""" |
| 276 |
| 277 # Stores test result data. |
| 278 self.results = {} |
| 279 # List of test files in a temporary location on disk for latency ops. |
| 280 self.latency_files = [] |
| 281 # Maps each test file path to its size in bytes. |
| 282 self.file_sizes = {} |
| 283 # Maps each test file to its contents as a string. |
| 284 self.file_contents = {} |
| 285 |
| 286 def _MakeFile(file_size): |
| 287 """Creates a temporary file of the given size and returns its path.""" |
| 288 fd, fpath = tempfile.mkstemp(suffix='.bin', prefix='gsutil_test_file', |
| 289 text=False) |
| 290 self.file_sizes[fpath] = file_size |
| 291 f = os.fdopen(fd, 'wb') |
| 292 f.write(os.urandom(file_size)) |
| 293 f.close() |
| 294 f = open(fpath, 'rb') |
| 295 self.file_contents[fpath] = f.read() |
| 296 f.close() |
| 297 return fpath |
| 298 |
| 299 # Create files for latency tests. |
| 300 for file_size in self.test_file_sizes: |
| 301 fpath = _MakeFile(file_size) |
| 302 self.latency_files.append(fpath) |
| 303 |
| 304 # Local file on disk for write throughput tests. |
| 305 self.thru_local_file = _MakeFile(self.thru_filesize) |
| 306 # Remote file to write/read from during throughput tests. |
| 307 self.thru_remote_file = (str(self.bucket_uri) + |
| 308 os.path.basename(self.thru_local_file)) |
| 309 |
| 310 def _TearDown(self): |
| 311 """Performs operations to clean things up after performing diagnostics.""" |
| 312 for fpath in self.latency_files + [self.thru_local_file]: |
| 313 try: |
| 314 os.remove(fpath) |
| 315 except OSError: |
| 316 pass |
| 317 |
| 318 self._GsUtil(['rm', self.thru_remote_file], raise_on_error=False, |
| 319 mute_stderr=True) |
| 320 |
| 321 @contextlib.contextmanager |
| 322 def _Time(self, key, bucket): |
| 323 """A context manager that measures time. |
| 324 |
| 325 A context manager that prints a status message before and after executing |
| 326 the inner command and times how long the inner command takes. Keeps track of |
| 327 the timing, aggregated by the given key. |
| 328 |
| 329 Args: |
| 330 key: The key to insert the timing value into a dictionary bucket. |
| 331 bucket: A dictionary to place the timing value in. |
| 332 |
| 333 Yields: |
| 334 For the context manager. |
| 335 """ |
| 336 print key, 'starting...' |
| 337 t0 = time.time() |
| 338 yield |
| 339 t1 = time.time() |
| 340 bucket[key].append(t1 - t0) |
| 341 print key, 'done.' |
| 342 |
| 343 def _RunLatencyTests(self): |
| 344 """Runs latency tests.""" |
| 345 # Stores timing information for each category of operation. |
| 346 self.results['latency'] = defaultdict(list) |
| 347 |
| 348 for i in range(self.num_iterations): |
| 349 print |
| 350 print 'Running latency iteration %d...' % (i+1) |
| 351 for fpath in self.latency_files: |
| 352 basename = os.path.basename(fpath) |
| 353 gsbucket = str(self.bucket_uri) |
| 354 gsuri = gsbucket + basename |
| 355 file_size = self.file_sizes[fpath] |
| 356 readable_file_size = MakeHumanReadable(file_size) |
| 357 |
| 358 print |
| 359 print ("File of size %(size)s located on disk at '%(fpath)s' being " |
| 360 "diagnosed in the cloud at '%(gsuri)s'." |
| 361 % {'size': readable_file_size, |
| 362 'fpath': fpath, |
| 363 'gsuri': gsuri}) |
| 364 |
| 365 k = self.bucket.key_class(self.bucket) |
| 366 k.key = basename |
| 367 |
| 368 with self._Time('UPLOAD_%d' % file_size, self.results['latency']): |
| 369 k.set_contents_from_string(self.file_contents[fpath]) |
| 370 with self._Time('METADATA_%d' % file_size, self.results['latency']): |
| 371 k.exists() |
| 372 with self._Time('DOWNLOAD_%d' % file_size, self.results['latency']): |
| 373 k.get_contents_as_string() |
| 374 with self._Time('DELETE_%d' % file_size, self.results['latency']): |
| 375 k.delete() |
| 376 |
| 377 def _RunReadThruTests(self): |
| 378 """Runs read throughput tests.""" |
| 379 self.results['read_throughput'] = {'file_size': self.thru_filesize, |
| 380 'num_times': self.num_iterations, |
| 381 'concurrency': self.concurrency} |
| 382 |
| 383 # Copy the file to remote location before reading. |
| 384 self._GsUtil(['cp', self.thru_local_file, self.thru_remote_file]) |
| 385 |
| 386 if self.concurrency == 1: |
| 387 k = self.bucket.key_class(self.bucket) |
| 388 k.key = os.path.basename(self.thru_local_file) |
| 389 # Warm up the TCP connection by transferring a couple times first. |
| 390 for i in range(2): |
| 391 k.get_contents_as_string() |
| 392 t0 = time.time() |
| 393 for i in range(self.num_iterations): |
| 394 k.get_contents_as_string() |
| 395 t1 = time.time() |
| 396 else: |
| 397 cmd = self.gsutil_exec_list + ['cp', self.thru_remote_file, os.devnull] |
| 398 t0 = time.time() |
| 399 self._WindowedExec(cmd, self.num_iterations, self.concurrency) |
| 400 t1 = time.time() |
| 401 |
| 402 time_took = t1 - t0 |
| 403 total_bytes_copied = self.thru_filesize * self.num_iterations |
| 404 bytes_per_second = total_bytes_copied / time_took |
| 405 |
| 406 self.results['read_throughput']['time_took'] = time_took |
| 407 self.results['read_throughput']['total_bytes_copied'] = total_bytes_copied |
| 408 self.results['read_throughput']['bytes_per_second'] = bytes_per_second |
| 409 |
| 410 def _RunWriteThruTests(self): |
| 411 """Runs write throughput tests.""" |
| 412 self.results['write_throughput'] = {'file_size': self.thru_filesize, |
| 413 'num_copies': self.num_iterations, |
| 414 'concurrency': self.concurrency} |
| 415 |
| 416 if self.concurrency == 1: |
| 417 k = self.bucket.key_class(self.bucket) |
| 418 k.key = os.path.basename(self.thru_local_file) |
| 419 # Warm up the TCP connection by transferring a couple times first. |
| 420 for i in range(2): |
| 421 k.set_contents_from_string(self.file_contents[self.thru_local_file]) |
| 422 t0 = time.time() |
| 423 for i in range(self.num_iterations): |
| 424 k.set_contents_from_string(self.file_contents[self.thru_local_file]) |
| 425 t1 = time.time() |
| 426 else: |
| 427 cmd = self.gsutil_exec_list + ['cp', self.thru_local_file, |
| 428 self.thru_remote_file] |
| 429 t0 = time.time() |
| 430 self._WindowedExec(cmd, self.num_iterations, self.concurrency) |
| 431 t1 = time.time() |
| 432 |
| 433 time_took = t1 - t0 |
| 434 total_bytes_copied = self.thru_filesize * self.num_iterations |
| 435 bytes_per_second = total_bytes_copied / time_took |
| 436 |
| 437 self.results['write_throughput']['time_took'] = time_took |
| 438 self.results['write_throughput']['total_bytes_copied'] = total_bytes_copied |
| 439 self.results['write_throughput']['bytes_per_second'] = bytes_per_second |
| 440 |
| 441 def _GetDiskCounters(self): |
| 442 """Retrieves disk I/O statistics for all disks. |
| 443 |
| 444 Adapted from the psutil module's psutil._pslinux.disk_io_counters: |
| 445 http://code.google.com/p/psutil/source/browse/trunk/psutil/_pslinux.py |
| 446 |
| 447 Originally distributed under under a BSD license. |
| 448 Original Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola. |
| 449 |
| 450 Returns: |
| 451 A dictionary containing disk names mapped to the disk counters from |
| 452 /disk/diskstats. |
| 453 """ |
| 454 # iostat documentation states that sectors are equivalent with blocks and |
| 455 # have a size of 512 bytes since 2.4 kernels. This value is needed to |
| 456 # calculate the amount of disk I/O in bytes. |
| 457 sector_size = 512 |
| 458 |
| 459 partitions = [] |
| 460 with open('/proc/partitions', 'r') as f: |
| 461 lines = f.readlines()[2:] |
| 462 for line in lines: |
| 463 _, _, _, name = line.split() |
| 464 if name[-1].isdigit(): |
| 465 partitions.append(name) |
| 466 |
| 467 retdict = {} |
| 468 with open('/proc/diskstats', 'r') as f: |
| 469 for line in f: |
| 470 values = line.split()[:11] |
| 471 _, _, name, reads, _, rbytes, rtime, writes, _, wbytes, wtime = values |
| 472 if name in partitions: |
| 473 rbytes = int(rbytes) * sector_size |
| 474 wbytes = int(wbytes) * sector_size |
| 475 reads = int(reads) |
| 476 writes = int(writes) |
| 477 rtime = int(rtime) |
| 478 wtime = int(wtime) |
| 479 retdict[name] = (reads, writes, rbytes, wbytes, rtime, wtime) |
| 480 return retdict |
| 481 |
| 482 def _GetTcpStats(self): |
| 483 """Tries to parse out TCP packet information from netstat output. |
| 484 |
| 485 Returns: |
| 486 A dictionary containing TCP information |
| 487 """ |
| 488 # netstat return code is non-zero for -s on Linux, so don't raise on error. |
| 489 netstat_output = self._Exec(['netstat', '-s'], return_output=True, |
| 490 raise_on_error=False) |
| 491 netstat_output = netstat_output.strip().lower() |
| 492 found_tcp = False |
| 493 tcp_retransmit = None |
| 494 tcp_received = None |
| 495 tcp_sent = None |
| 496 for line in netstat_output.split('\n'): |
| 497 # Header for TCP section is "Tcp:" in Linux/Mac and |
| 498 # "TCP Statistics for" in Windows. |
| 499 if 'tcp:' in line or 'tcp statistics' in line: |
| 500 found_tcp = True |
| 501 |
| 502 # Linux == "segments retransmited" (sic), Mac == "retransmit timeouts" |
| 503 # Windows == "segments retransmitted". |
| 504 if (found_tcp and tcp_retransmit is None and |
| 505 ('segments retransmited' in line or 'retransmit timeouts' in line or |
| 506 'segments retransmitted' in line)): |
| 507 tcp_retransmit = ''.join(c for c in line if c in string.digits) |
| 508 |
| 509 # Linux+Windows == "segments received", Mac == "packets received". |
| 510 if (found_tcp and tcp_received is None and |
| 511 ('segments received' in line or 'packets received' in line)): |
| 512 tcp_received = ''.join(c for c in line if c in string.digits) |
| 513 |
| 514 # Linux == "segments send out" (sic), Mac+Windows == "packets sent". |
| 515 if (found_tcp and tcp_sent is None and |
| 516 ('segments send out' in line or 'packets sent' in line or |
| 517 'segments sent' in line)): |
| 518 tcp_sent = ''.join(c for c in line if c in string.digits) |
| 519 |
| 520 result = {} |
| 521 try: |
| 522 result['tcp_retransmit'] = int(tcp_retransmit) |
| 523 result['tcp_received'] = int(tcp_received) |
| 524 result['tcp_sent'] = int(tcp_sent) |
| 525 except (ValueError, TypeError): |
| 526 result['tcp_retransmit'] = None |
| 527 result['tcp_received'] = None |
| 528 result['tcp_sent'] = None |
| 529 |
| 530 return result |
| 531 |
| 532 def _CollectSysInfo(self): |
| 533 """Collects system information.""" |
| 534 sysinfo = {} |
| 535 |
| 536 # Get the local IP address from socket lib. |
| 537 sysinfo['ip_address'] = socket.gethostbyname(socket.gethostname()) |
| 538 # Record the temporary directory used since it can affect performance, e.g. |
| 539 # when on a networked filesystem. |
| 540 sysinfo['tempdir'] = tempfile.gettempdir() |
| 541 |
| 542 # Produces an RFC 2822 compliant GMT timestamp. |
| 543 sysinfo['gmt_timestamp'] = time.strftime('%a, %d %b %Y %H:%M:%S +0000', |
| 544 time.gmtime()) |
| 545 |
| 546 # Execute a CNAME lookup on Google DNS to find what Google server |
| 547 # it's routing to. |
| 548 cmd = ['nslookup', '-type=CNAME', self.GOOGLE_API_HOST] |
| 549 nslookup_cname_output = self._Exec(cmd, return_output=True) |
| 550 m = re.search(r' = (?P<googserv>[^.]+)\.', nslookup_cname_output) |
| 551 sysinfo['googserv_route'] = m.group('googserv') if m else None |
| 552 |
| 553 # Look up IP addresses for Google Server. |
| 554 (hostname, aliaslist, ipaddrlist) = socket.gethostbyname_ex( |
| 555 self.GOOGLE_API_HOST) |
| 556 sysinfo['googserv_ips'] = ipaddrlist |
| 557 |
| 558 # Reverse lookup the hostnames for the Google Server IPs. |
| 559 sysinfo['googserv_hostnames'] = [] |
| 560 for googserv_ip in ipaddrlist: |
| 561 (hostname, aliaslist, ipaddrlist) = socket.gethostbyaddr(googserv_ip) |
| 562 sysinfo['googserv_hostnames'].append(hostname) |
| 563 |
| 564 # Query o-o to find out what the Google DNS thinks is the user's IP. |
| 565 cmd = ['nslookup', '-type=TXT', 'o-o.myaddr.google.com.'] |
| 566 nslookup_txt_output = self._Exec(cmd, return_output=True) |
| 567 m = re.search(r'text\s+=\s+"(?P<dnsip>[\.\d]+)"', nslookup_txt_output) |
| 568 sysinfo['dns_o-o_ip'] = m.group('dnsip') if m else None |
| 569 |
| 570 # Try and find the number of CPUs in the system if available. |
| 571 try: |
| 572 sysinfo['cpu_count'] = multiprocessing.cpu_count() |
| 573 except NotImplementedError: |
| 574 sysinfo['cpu_count'] = None |
| 575 |
| 576 # For *nix platforms, obtain the CPU load. |
| 577 try: |
| 578 sysinfo['load_avg'] = list(os.getloadavg()) |
| 579 except (AttributeError, OSError): |
| 580 sysinfo['load_avg'] = None |
| 581 |
| 582 # Try and collect memory information from /proc/meminfo if possible. |
| 583 mem_total = None |
| 584 mem_free = None |
| 585 mem_buffers = None |
| 586 mem_cached = None |
| 587 |
| 588 try: |
| 589 with open('/proc/meminfo', 'r') as f: |
| 590 for line in f: |
| 591 if line.startswith('MemTotal'): |
| 592 mem_total = (int(''.join(c for c in line if c in string.digits)) |
| 593 * 1000) |
| 594 elif line.startswith('MemFree'): |
| 595 mem_free = (int(''.join(c for c in line if c in string.digits)) |
| 596 * 1000) |
| 597 elif line.startswith('Buffers'): |
| 598 mem_buffers = (int(''.join(c for c in line if c in string.digits)) |
| 599 * 1000) |
| 600 elif line.startswith('Cached'): |
| 601 mem_cached = (int(''.join(c for c in line if c in string.digits)) |
| 602 * 1000) |
| 603 except (IOError, ValueError): |
| 604 pass |
| 605 |
| 606 sysinfo['meminfo'] = {'mem_total': mem_total, |
| 607 'mem_free': mem_free, |
| 608 'mem_buffers': mem_buffers, |
| 609 'mem_cached': mem_cached} |
| 610 |
| 611 # Get configuration attributes from config module. |
| 612 sysinfo['gsutil_config'] = {} |
| 613 for attr in dir(config): |
| 614 attr_value = getattr(config, attr) |
| 615 # Filter out multiline strings that are not useful. |
| 616 if attr.isupper() and not (isinstance(attr_value, basestring) and |
| 617 '\n' in attr_value): |
| 618 sysinfo['gsutil_config'][attr] = attr_value |
| 619 |
| 620 self.results['sysinfo'] = sysinfo |
| 621 |
| 622 def _DisplayStats(self, trials): |
| 623 """Prints out mean, standard deviation, median, and 90th percentile.""" |
| 624 n = len(trials) |
| 625 mean = float(sum(trials)) / n |
| 626 stdev = math.sqrt(sum((x - mean)**2 for x in trials) / n) |
| 627 |
| 628 print str(n).rjust(6), '', |
| 629 print ('%.1f' % (mean * 1000)).rjust(9), '', |
| 630 print ('%.1f' % (stdev * 1000)).rjust(12), '', |
| 631 print ('%.1f' % (Percentile(trials, 0.5) * 1000)).rjust(11), '', |
| 632 print ('%.1f' % (Percentile(trials, 0.9) * 1000)).rjust(11), '' |
| 633 |
| 634 def _DisplayResults(self): |
| 635 """Displays results collected from diagnostic run.""" |
| 636 print |
| 637 print '=' * 78 |
| 638 print 'DIAGNOSTIC RESULTS'.center(78) |
| 639 print '=' * 78 |
| 640 |
| 641 if 'latency' in self.results: |
| 642 print |
| 643 print '-' * 78 |
| 644 print 'Latency'.center(78) |
| 645 print '-' * 78 |
| 646 print ('Operation Size Trials Mean (ms) Std Dev (ms) ' |
| 647 'Median (ms) 90th % (ms)') |
| 648 print ('========= ========= ====== ========= ============ ' |
| 649 '=========== ===========') |
| 650 for key in sorted(self.results['latency']): |
| 651 trials = sorted(self.results['latency'][key]) |
| 652 op, numbytes = key.split('_') |
| 653 numbytes = int(numbytes) |
| 654 if op == 'METADATA': |
| 655 print 'Metadata'.rjust(9), '', |
| 656 print MakeHumanReadable(numbytes).rjust(9), '', |
| 657 self._DisplayStats(trials) |
| 658 if op == 'DOWNLOAD': |
| 659 print 'Download'.rjust(9), '', |
| 660 print MakeHumanReadable(numbytes).rjust(9), '', |
| 661 self._DisplayStats(trials) |
| 662 if op == 'UPLOAD': |
| 663 print 'Upload'.rjust(9), '', |
| 664 print MakeHumanReadable(numbytes).rjust(9), '', |
| 665 self._DisplayStats(trials) |
| 666 if op == 'DELETE': |
| 667 print 'Delete'.rjust(9), '', |
| 668 print MakeHumanReadable(numbytes).rjust(9), '', |
| 669 self._DisplayStats(trials) |
| 670 |
| 671 if 'write_throughput' in self.results: |
| 672 print |
| 673 print '-' * 78 |
| 674 print 'Write Throughput'.center(78) |
| 675 print '-' * 78 |
| 676 write_thru = self.results['write_throughput'] |
| 677 print 'Copied a %s file %d times for a total transfer size of %s.' % ( |
| 678 MakeHumanReadable(write_thru['file_size']), |
| 679 write_thru['num_copies'], |
| 680 MakeHumanReadable(write_thru['total_bytes_copied'])) |
| 681 print 'Write throughput: %s/s.' % ( |
| 682 MakeBitsHumanReadable(write_thru['bytes_per_second'] * 8)) |
| 683 |
| 684 if 'read_throughput' in self.results: |
| 685 print |
| 686 print '-' * 78 |
| 687 print 'Read Throughput'.center(78) |
| 688 print '-' * 78 |
| 689 read_thru = self.results['read_throughput'] |
| 690 print 'Copied a %s file %d times for a total transfer size of %s.' % ( |
| 691 MakeHumanReadable(read_thru['file_size']), |
| 692 read_thru['num_times'], |
| 693 MakeHumanReadable(read_thru['total_bytes_copied'])) |
| 694 print 'Read throughput: %s/s.' % ( |
| 695 MakeBitsHumanReadable(read_thru['bytes_per_second'] * 8)) |
| 696 |
| 697 if 'sysinfo' in self.results: |
| 698 print |
| 699 print '-' * 78 |
| 700 print 'System Information'.center(78) |
| 701 print '-' * 78 |
| 702 info = self.results['sysinfo'] |
| 703 print 'IP Address: \n %s' % info['ip_address'] |
| 704 print 'Temporary Directory: \n %s' % info['tempdir'] |
| 705 print 'Bucket URI: \n %s' % self.results['bucket_uri'] |
| 706 |
| 707 if 'gmt_timestamp' in info: |
| 708 ts_string = info['gmt_timestamp'] |
| 709 timetuple = None |
| 710 try: |
| 711 # Convert RFC 2822 string to Linux timestamp. |
| 712 timetuple = time.strptime(ts_string, '%a, %d %b %Y %H:%M:%S +0000') |
| 713 except ValueError: |
| 714 pass |
| 715 |
| 716 if timetuple: |
| 717 # Converts the GMT time tuple to local Linux timestamp. |
| 718 localtime = calendar.timegm(timetuple) |
| 719 localdt = datetime.datetime.fromtimestamp(localtime) |
| 720 print 'Measurement time: \n %s' % localdt.strftime( |
| 721 '%Y-%m-%d %I-%M-%S %p %Z') |
| 722 |
| 723 print 'Google Server: \n %s' % info['googserv_route'] |
| 724 print ('Google Server IP Addresses: \n %s' % |
| 725 ('\n '.join(info['googserv_ips']))) |
| 726 print ('Google Server Hostnames: \n %s' % |
| 727 ('\n '.join(info['googserv_hostnames']))) |
| 728 print 'Google DNS thinks your IP is: \n %s' % info['dns_o-o_ip'] |
| 729 print 'CPU Count: \n %s' % info['cpu_count'] |
| 730 print 'CPU Load Average: \n %s' % info['load_avg'] |
| 731 try: |
| 732 print ('Total Memory: \n %s' % |
| 733 MakeHumanReadable(info['meminfo']['mem_total'])) |
| 734 # Free memory is really MemFree + Buffers + Cached. |
| 735 print 'Free Memory: \n %s' % MakeHumanReadable( |
| 736 info['meminfo']['mem_free'] + |
| 737 info['meminfo']['mem_buffers'] + |
| 738 info['meminfo']['mem_cached']) |
| 739 except TypeError: |
| 740 pass |
| 741 |
| 742 netstat_after = info['netstat_end'] |
| 743 netstat_before = info['netstat_start'] |
| 744 for tcp_type in ('sent', 'received', 'retransmit'): |
| 745 try: |
| 746 delta = (netstat_after['tcp_%s' % tcp_type] - |
| 747 netstat_before['tcp_%s' % tcp_type]) |
| 748 print 'TCP segments %s during test:\n %d' % (tcp_type, delta) |
| 749 except TypeError: |
| 750 pass |
| 751 |
| 752 if 'disk_counters_end' in info and 'disk_counters_start' in info: |
| 753 print 'Disk Counter Deltas:\n', |
| 754 disk_after = info['disk_counters_end'] |
| 755 disk_before = info['disk_counters_start'] |
| 756 print '', 'disk'.rjust(6), |
| 757 for colname in ['reads', 'writes', 'rbytes', 'wbytes', 'rtime', |
| 758 'wtime']: |
| 759 print colname.rjust(8), |
| 760 print |
| 761 for diskname in sorted(disk_after): |
| 762 before = disk_before[diskname] |
| 763 after = disk_after[diskname] |
| 764 (reads1, writes1, rbytes1, wbytes1, rtime1, wtime1) = before |
| 765 (reads2, writes2, rbytes2, wbytes2, rtime2, wtime2) = after |
| 766 print '', diskname.rjust(6), |
| 767 deltas = [reads2-reads1, writes2-writes1, rbytes2-rbytes1, |
| 768 wbytes2-wbytes1, rtime2-rtime1, wtime2-wtime1] |
| 769 for delta in deltas: |
| 770 print str(delta).rjust(8), |
| 771 print |
| 772 |
| 773 if self.output_file: |
| 774 with open(self.output_file, 'w') as f: |
| 775 json.dump(self.results, f, indent=2) |
| 776 print |
| 777 print "Output file written to '%s'." % self.output_file |
| 778 |
| 779 print |
| 780 |
| 781 def _ParsePositiveInteger(self, val, msg): |
| 782 """Tries to convert val argument to a positive integer. |
| 783 |
| 784 Args: |
| 785 val: The value (as a string) to convert to a positive integer. |
| 786 msg: The error message to place in the CommandException on an error. |
| 787 |
| 788 Returns: |
| 789 A valid positive integer. |
| 790 |
| 791 Raises: |
| 792 CommandException: If the supplied value is not a valid positive integer. |
| 793 """ |
| 794 try: |
| 795 val = int(val) |
| 796 if val < 1: |
| 797 raise CommandException(msg) |
| 798 return val |
| 799 except ValueError: |
| 800 raise CommandException(msg) |
| 801 |
| 802 def _ParseArgs(self): |
| 803 """Parses arguments for perfdiag command.""" |
| 804 # From -n. |
| 805 self.num_iterations = 5 |
| 806 # From -c. |
| 807 self.concurrency = 1 |
| 808 # From -s. |
| 809 self.thru_filesize = 1048576 |
| 810 # From -t. |
| 811 self.diag_tests = self.ALL_DIAG_TESTS |
| 812 # From -o. |
| 813 self.output_file = None |
| 814 # From -i. |
| 815 self.input_file = None |
| 816 |
| 817 if self.sub_opts: |
| 818 for o, a in self.sub_opts: |
| 819 if o == '-n': |
| 820 self.num_iterations = self._ParsePositiveInteger( |
| 821 a, 'The -n parameter must be a positive integer.') |
| 822 if o == '-c': |
| 823 self.concurrency = self._ParsePositiveInteger( |
| 824 a, 'The -c parameter must be a positive integer.') |
| 825 if o == '-s': |
| 826 self.thru_filesize = self._ParsePositiveInteger( |
| 827 a, 'The -s parameter must be a positive integer.') |
| 828 if o == '-t': |
| 829 self.diag_tests = [] |
| 830 for test_name in a.strip().split(','): |
| 831 if test_name.lower() not in self.ALL_DIAG_TESTS: |
| 832 raise CommandException("List of test names (-t) contains invalid " |
| 833 "test name '%s'." % test_name) |
| 834 self.diag_tests.append(test_name) |
| 835 if o == '-o': |
| 836 self.output_file = os.path.abspath(a) |
| 837 if o == '-i': |
| 838 self.input_file = os.path.abspath(a) |
| 839 if not os.path.isfile(self.input_file): |
| 840 raise CommandException("Invalid input file (-i): '%s'." % a) |
| 841 try: |
| 842 with open(self.input_file, 'r') as f: |
| 843 self.results = json.load(f) |
| 844 print "Read input file: '%s'." % self.input_file |
| 845 except ValueError: |
| 846 raise CommandException("Could not decode input file (-i): '%s'." % |
| 847 a) |
| 848 return |
| 849 |
| 850 if not self.args: |
| 851 raise CommandException('Wrong number of arguments for "perfdiag" ' |
| 852 'command.') |
| 853 self.bucket_uri = self.suri_builder.StorageUri(self.args[0]) |
| 854 if not self.bucket_uri.names_bucket(): |
| 855 raise CommandException('The perfdiag command requires a URI that ' |
| 856 'specifies a bucket.\n"%s" is not ' |
| 857 'valid.' % self.bucket_uri) |
| 858 self.bucket = self.bucket_uri.get_bucket() |
| 859 |
| 860 # Command entry point. |
| 861 def RunCommand(self): |
| 862 """Called by gsutil when the command is being invoked.""" |
| 863 self._ParseArgs() |
| 864 |
| 865 if self.input_file: |
| 866 self._DisplayResults() |
| 867 return 0 |
| 868 |
| 869 print 'Number of iterations to run: %d' % self.num_iterations |
| 870 print 'Base bucket URI: %s' % self.bucket_uri |
| 871 print 'Concurrency level: %d' % self.concurrency |
| 872 print 'Throughput file size: %s' % MakeHumanReadable(self.thru_filesize) |
| 873 print 'Diagnostics to run: %s' % (', '.join(self.diag_tests)) |
| 874 |
| 875 try: |
| 876 self._SetUp() |
| 877 |
| 878 # Collect generic system info. |
| 879 self._CollectSysInfo() |
| 880 # Collect netstat info and disk counters before tests (and again later). |
| 881 self.results['sysinfo']['netstat_start'] = self._GetTcpStats() |
| 882 if IS_LINUX: |
| 883 self.results['sysinfo']['disk_counters_start'] = self._GetDiskCounters() |
| 884 # Record bucket URI. |
| 885 self.results['bucket_uri'] = str(self.bucket_uri) |
| 886 |
| 887 if 'lat' in self.diag_tests: |
| 888 self._RunLatencyTests() |
| 889 if 'rthru' in self.diag_tests: |
| 890 self._RunReadThruTests() |
| 891 if 'wthru' in self.diag_tests: |
| 892 self._RunWriteThruTests() |
| 893 |
| 894 # Collect netstat info and disk counters after tests. |
| 895 self.results['sysinfo']['netstat_end'] = self._GetTcpStats() |
| 896 if IS_LINUX: |
| 897 self.results['sysinfo']['disk_counters_end'] = self._GetDiskCounters() |
| 898 |
| 899 self._DisplayResults() |
| 900 finally: |
| 901 self._TearDown() |
| 902 |
| 903 return 0 |
OLD | NEW |