engine.py 13.7 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-
"""The processing engine."""

4 5 6 7 8 9 10 11
from __future__ import unicode_literals

import os

from artifacts import definitions as artifact_types
from artifacts import errors as artifacts_errors
from artifacts import reader as artifacts_reader
from artifacts import registry as artifacts_registry
12

13
from dfvfs.helpers import file_system_searcher
14
from dfvfs.lib import errors as dfvfs_errors
15
from dfvfs.path import factory as path_spec_factory
16 17
from dfvfs.resolver import resolver as path_spec_resolver

18 19 20
from plaso.containers import sessions
from plaso.engine import artifact_filters
from plaso.engine import filter_file
21
from plaso.engine import knowledge_base
22
from plaso.engine import logger
23
from plaso.engine import processing_status
24 25 26
from plaso.engine import profilers
from plaso.lib import definitions
from plaso.lib import errors
27 28 29 30
from plaso.preprocessors import manager as preprocess_manager


class BaseEngine(object):
31
  """Processing engine interface.
32 33

  Attributes:
34
    knowledge_base (KnowledgeBase): knowledge base.
35
  """
36

37 38 39
  # The interval of status updates in number of seconds.
  _STATUS_UPDATE_INTERVAL = 0.5

40 41
  def __init__(self):
    """Initializes an engine."""
42 43
    super(BaseEngine, self).__init__()
    self._abort = False
44 45 46
    self._guppy_memory_profiler = None
    self._memory_profiler = None
    self._name = 'Main'
47
    self._processing_status = processing_status.ProcessingStatus()
48 49 50 51
    self._processing_profiler = None
    self._serializers_profiler = None
    self._storage_profiler = None
    self._task_queue_profiler = None
52 53 54

    self.knowledge_base = knowledge_base.KnowledgeBase()

55 56 57 58 59 60 61 62
  def _DetermineOperatingSystem(self, searcher):
    """Tries to determine the underlying operating system.

    Args:
      searcher (dfvfs.FileSystemSearcher): file system searcher.

    Returns:
      str: operating system for example "Windows". This should be one of
63
          the values in definitions.OPERATING_SYSTEM_FAMILIES.
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
    """
    find_specs = [
        file_system_searcher.FindSpec(
            location='/etc', case_sensitive=False),
        file_system_searcher.FindSpec(
            location='/System/Library', case_sensitive=False),
        file_system_searcher.FindSpec(
            location='/Windows/System32', case_sensitive=False),
        file_system_searcher.FindSpec(
            location='/WINNT/System32', case_sensitive=False),
        file_system_searcher.FindSpec(
            location='/WINNT35/System32', case_sensitive=False),
        file_system_searcher.FindSpec(
            location='/WTSRV/System32', case_sensitive=False)]

    locations = []
    for path_spec in searcher.Find(find_specs=find_specs):
      relative_path = searcher.GetRelativePath(path_spec)
      if relative_path:
        locations.append(relative_path.lower())

    # We need to check for both forward and backward slashes since the path
    # spec will be OS dependent, as in running the tool on Windows will return
    # Windows paths (backward slash) vs. forward slash on *NIX systems.
    windows_locations = set([
        '/windows/system32', '\\windows\\system32', '/winnt/system32',
        '\\winnt\\system32', '/winnt35/system32', '\\winnt35\\system32',
        '\\wtsrv\\system32', '/wtsrv/system32'])

93
    operating_system = definitions.OPERATING_SYSTEM_FAMILY_UNKNOWN
94
    if windows_locations.intersection(set(locations)):
95
      operating_system = definitions.OPERATING_SYSTEM_FAMILY_WINDOWS_NT
96 97

    elif '/system/library' in locations:
98
      operating_system = definitions.OPERATING_SYSTEM_FAMILY_MACOS
99 100

    elif '/etc' in locations:
101
      operating_system = definitions.OPERATING_SYSTEM_FAMILY_LINUX
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205

    return operating_system

  def _StartProfiling(self, configuration):
    """Starts profiling.

    Args:
      configuration (ProfilingConfiguration): profiling configuration.
    """
    if not configuration:
      return

    if configuration.HaveProfileMemoryGuppy():
      self._guppy_memory_profiler = profilers.GuppyMemoryProfiler(
          self._name, configuration)
      self._guppy_memory_profiler.Start()

    if configuration.HaveProfileMemory():
      self._memory_profiler = profilers.MemoryProfiler(
          self._name, configuration)
      self._memory_profiler.Start()

    if configuration.HaveProfileProcessing():
      identifier = '{0:s}-processing'.format(self._name)
      self._processing_profiler = profilers.ProcessingProfiler(
          identifier, configuration)
      self._processing_profiler.Start()

    if configuration.HaveProfileSerializers():
      identifier = '{0:s}-serializers'.format(self._name)
      self._serializers_profiler = profilers.SerializersProfiler(
          identifier, configuration)
      self._serializers_profiler.Start()

    if configuration.HaveProfileStorage():
      self._storage_profiler = profilers.StorageProfiler(
          self._name, configuration)
      self._storage_profiler.Start()

    if configuration.HaveProfileTaskQueue():
      self._task_queue_profiler = profilers.TaskQueueProfiler(
          self._name, configuration)
      self._task_queue_profiler.Start()

  def _StopProfiling(self):
    """Stops profiling."""
    if self._guppy_memory_profiler:
      self._guppy_memory_profiler.Sample()
      self._guppy_memory_profiler.Stop()
      self._guppy_memory_profiler = None

    if self._memory_profiler:
      self._memory_profiler.Stop()
      self._memory_profiler = None

    if self._processing_profiler:
      self._processing_profiler.Stop()
      self._processing_profiler = None

    if self._serializers_profiler:
      self._serializers_profiler.Stop()
      self._serializers_profiler = None

    if self._storage_profiler:
      self._storage_profiler.Stop()
      self._storage_profiler = None

    if self._task_queue_profiler:
      self._task_queue_profiler.Stop()
      self._task_queue_profiler = None

  @classmethod
  def CreateSession(
      cls, artifact_filter_names=None, command_line_arguments=None,
      debug_mode=False, filter_file_path=None, preferred_encoding='utf-8',
      preferred_time_zone=None, preferred_year=None):
    """Creates a session attribute container.

    Args:
      artifact_filter_names (Optional[list[str]]): names of artifact definitions
          that are used for filtering file system and Windows Registry
          key paths.
      command_line_arguments (Optional[str]): the command line arguments.
      debug_mode (bool): True if debug mode was enabled.
      filter_file_path (Optional[str]): path to a file with find specifications.
      preferred_encoding (Optional[str]): preferred encoding.
      preferred_time_zone (Optional[str]): preferred time zone.
      preferred_year (Optional[int]): preferred year.

    Returns:
      Session: session attribute container.
    """
    session = sessions.Session()

    session.artifact_filters = artifact_filter_names
    session.command_line_arguments = command_line_arguments
    session.debug_mode = debug_mode
    session.filter_file = filter_file_path
    session.preferred_encoding = preferred_encoding
    session.preferred_time_zone = preferred_time_zone
    session.preferred_year = preferred_year

    return session

206 207
  def GetSourceFileSystem(self, source_path_spec, resolver_context=None):
    """Retrieves the file system of the source.
208 209

    Args:
210 211 212
      source_path_spec (dfvfs.PathSpec): path specifications of the sources
          to process.
      resolver_context (dfvfs.Context): resolver context.
213 214

    Returns:
215 216 217 218 219 220 221 222
      tuple: containing:

        dfvfs.FileSystem: file system
        path.PathSpec: mount point path specification. The mount point path
            specification refers to either a directory or a volume on a storage
            media device or image. It is needed by the dfVFS file system
            searcher (FileSystemSearcher) to indicate the base location of
            the file system.
223 224

    Raises:
225
      RuntimeError: if source file system path specification is not set.
226
    """
227
    if not source_path_spec:
228
      raise RuntimeError('Missing source path specification.')
229 230

    file_system = path_spec_resolver.Resolver.OpenFileSystem(
231
        source_path_spec, resolver_context=resolver_context)
232

233 234 235
    type_indicator = source_path_spec.type_indicator
    if path_spec_factory.Factory.IsSystemLevelTypeIndicator(type_indicator):
      mount_point = source_path_spec
236
    else:
237
      mount_point = source_path_spec.parent
238

239
    return file_system, mount_point
240

241 242 243
  def PreprocessSources(
      self, artifacts_registry_object, source_path_specs,
      resolver_context=None):
244
    """Preprocesses the sources.
245 246

    Args:
247 248
      artifacts_registry_object (artifacts.ArtifactDefinitionsRegistry):
          artifact definitions registry.
249 250
      source_path_specs (list[dfvfs.PathSpec]): path specifications of
          the sources to process.
251
      resolver_context (Optional[dfvfs.Context]): resolver context.
252
    """
253
    detected_operating_systems = []
254
    for source_path_spec in source_path_specs:
255 256 257 258
      try:
        file_system, mount_point = self.GetSourceFileSystem(
            source_path_spec, resolver_context=resolver_context)
      except (RuntimeError, dfvfs_errors.BackEndError) as exception:
259
        logger.error(exception)
260
        continue
261

262
      try:
263 264
        searcher = file_system_searcher.FileSystemSearcher(
            file_system, mount_point)
265

266
        operating_system = self._DetermineOperatingSystem(searcher)
267
        if operating_system != definitions.OPERATING_SYSTEM_FAMILY_UNKNOWN:
268
          preprocess_manager.PreprocessPluginsManager.RunPlugins(
269 270 271 272
              artifacts_registry_object, file_system, mount_point,
              self.knowledge_base)

          detected_operating_systems.append(operating_system)
273 274 275 276

      finally:
        file_system.Close()

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
    if detected_operating_systems:
      logger.info('Preprocessing detected operating systems: {0:s}'.format(
          ', '.join(detected_operating_systems)))
      self.knowledge_base.SetValue(
          'operating_system', detected_operating_systems[0])

  @classmethod
  def SupportsGuppyMemoryProfiling(cls):
    """Determines if memory profiling with guppy is supported.

    Returns:
      bool: True if memory profiling with guppy is supported.
    """
    return profilers.GuppyMemoryProfiler.IsSupported()

  @classmethod
  def BuildFilterFindSpecs(
      cls, artifact_definitions_path, custom_artifacts_path,
      knowledge_base_object, artifact_filter_names=None, filter_file_path=None):
    """Builds find specifications from artifacts or filter file if available.

    Args:
       artifact_definitions_path (str): path to artifact definitions file.
       custom_artifacts_path (str): path to custom artifact definitions file.
       knowledge_base_object (KnowledgeBase): knowledge base.
       artifact_filter_names (Optional[list[str]]): names of artifact
          definitions that are used for filtering file system and Windows
          Registry key paths.
305
       filter_file_path (Optional[str]): path of filter file.
306 307 308 309 310

    Returns:
      list[dfvfs.FindSpec]: find specifications for the file source type.

    Raises:
311
      InvalidFilter: if no valid FindSpecs are built.
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
    """
    environment_variables = knowledge_base_object.GetEnvironmentVariables()
    find_specs = None
    if artifact_filter_names:
      artifacts_registry_object = cls.BuildArtifactsRegistry(
          artifact_definitions_path, custom_artifacts_path)
      artifact_filters_object = (
          artifact_filters.ArtifactDefinitionsFilterHelper(
              artifacts_registry_object, artifact_filter_names,
              knowledge_base_object))
      artifact_filters_object.BuildFindSpecs(environment_variables)
      find_specs = knowledge_base_object.GetValue(
          artifact_filters_object.KNOWLEDGE_BASE_VALUE)[
              artifact_types.TYPE_INDICATOR_FILE]
    elif filter_file_path:
      filter_file_object = filter_file.FilterFile(filter_file_path)
      find_specs = filter_file_object.BuildFindSpecs(
          environment_variables=environment_variables)

    if (artifact_filter_names or filter_file_path) and not find_specs:
332
      raise errors.InvalidFilter(
333 334 335
          'Error processing filters, no valid specifications built.')

    return find_specs
336

337
  @classmethod
338 339 340 341 342 343 344
  def BuildArtifactsRegistry(
      cls, artifact_definitions_path, custom_artifacts_path):
    """Build Find Specs from artifacts or filter file if available.

    Args:
       artifact_definitions_path (str): path to artifact definitions file.
       custom_artifacts_path (str): path to custom artifact definitions file.
345 346

    Returns:
347 348 349 350
      artifacts.ArtifactDefinitionsRegistry: artifact definitions registry.

    Raises:
      RuntimeError: if no valid FindSpecs are built.
351
    """
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
    if artifact_definitions_path and not os.path.isdir(
        artifact_definitions_path):
      raise errors.BadConfigOption(
          'No such artifacts filter file: {0:s}.'.format(
              artifact_definitions_path))

    if custom_artifacts_path and not os.path.isfile(custom_artifacts_path):
      raise errors.BadConfigOption(
          'No such artifacts filter file: {0:s}.'.format(custom_artifacts_path))

    registry = artifacts_registry.ArtifactDefinitionsRegistry()
    reader = artifacts_reader.YamlArtifactsReader()

    try:
      registry.ReadFromDirectory(reader, artifact_definitions_path)

    except (KeyError, artifacts_errors.FormatError) as exception:
      raise errors.BadConfigOption((
          'Unable to read artifact definitions from: {0:s} with error: '
          '{1!s}').format(artifact_definitions_path, exception))

    if custom_artifacts_path:
      try:
        registry.ReadFromFile(reader, custom_artifacts_path)

      except (KeyError, artifacts_errors.FormatError) as exception:
        raise errors.BadConfigOption((
            'Unable to read artifact definitions from: {0:s} with error: '
            '{1!s}').format(custom_artifacts_path, exception))

    return registry