#define DATAMANAGER_H

#include "includes. h"

#include "rccconsts. h"

#include "data_structures/pagedata. h"

#include "data_structures/pagearc. h"

#include "data_structures/pdlogitem. h"

#include "data_structures/pdandpacreatedata. h"

typedef QVector<PageArc> PAContainer;

typedef QQueue<PDLogItem> PDLIContainer;

typedef QVector<PDPACreateData> PDPAContainer;

class DataManager

{

public:

  enum LogType

  {

  CommonLog,

  ErrorLog

  };

  DataManager();

  virtual ~DataManager();

  virtual bool addHost(HostData* phD) = 0;

  virtual bool allWorkIsDone() = 0;

  virtual QVector<HostData*> getHosts() = 0;

  virtual void getPDsStartingFromId(PDContainer &result, const ulong &id,

  const int& countMultiplier = RCCConsts::PAGEDATA_CHUNK_SIZE_COUNT_MULTIPLIER) = 0;

  virtual void getPAsStartingFromId(PAContainer &result, const ulong &id,

  const int& count = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;

  virtual void getFreePDsForDownloading(PDContainer &result, HostData *phD,

  const int& countMultiplier = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;

  virtual void getFreePDsForParsing(PDContainer &result, const int& count = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;

  virtual void insertPDs(const PDContainer &pDs) = 0;

  virtual void addPDsAndPAs(const PDPAContainer &pDPAs) = 0;

  virtual void updatePDs(const PDContainer &pDs) = 0;//метод должен удалять, если стоит remove

НЕ нашли? Не то? Что вы ищете?

  virtual std::pair<bool, PageData> contentSeen(HostData* phD, const QString& content) = 0;

  virtual void insertPAs(const PAContainer& pAs) = 0;

  virtual void changePAsByTo(const int &to, const int &newTo) = 0;

  //Лог

  virtual PDLIContainer* getLog(const DataManager::LogType &logType, const int size = INT_MAX) = 0;

  virtual void insertLogItem(const DataManager::LogType &logType, const PDLogItem& pDLI) = 0;

  virtual void insertLogItems(const DataManager::LogType &logType, const QVector<PDLogItem> &items) = 0;

};

#endif // DATAMANAGER_H

data_structures/pdstore. h

#ifndef PDSTORE_H

#define PDSTORE_H

#include "includes. h"

#include "pagedata. h"

using namespace boost::multi_index;

namespace PD

{

struct ById{};

struct ByBlocked{};

struct ByPhDAndBlockedAndDownloaded{};

struct ByBlockedAndDownloadedAndParsedAndErrorCode{};

struct ByPhDAndContentHash{};

struct ByUrlAndPhD{};

struct ByPhDAndNormalizedUrl{};

struct ByIdFrom{};

struct BlockedChange : public std::unary_function<PageData, void>

{

  bool b;

  BlockedChange(const bool& _b) : b(_b) {}

  void operator()(PageData& pd)

  {

  pd. blocked = b;

  }

};

struct LevelChange : public std::unary_function<PageData, void>

{

  uint l;

  LevelChange(const uint& _l) : l(_l) {}

  void operator()(PageData& pd)

  {

  pd. level = l;

  }

};

}

typedef boost::multi_index_container<PageData,

  indexed_by<

  ordered_non_unique<

  tag<PD::ById>, member<PageData, ulong, &PageData::id>

  >,

  ordered_non_unique<

  tag<PD::ByBlocked>, member<PageData, bool, &PageData::blocked>

  >,

  ordered_non_unique<

  tag<PD::ByPhDAndBlockedAndDownloaded>, composite_key<

  PageData,

  member<PageData, HostData*,&PageData::phD>,

  member<PageData, bool,&PageData::blocked>,

  member<PageData, bool,&PageData::downloaded>

  >

  >,

  ordered_non_unique<

  tag<PD::ByBlockedAndDownloadedAndParsedAndErrorCode>, composite_key<

  PageData,

  member<PageData, bool,&PageData::blocked>,

  member<PageData, bool,&PageData::downloaded>,

  member<PageData, bool,&PageData::parsed>,

  member<PageData, uint,&PageData::errorCode>

  >

  >,

  ordered_non_unique<

  tag<PD::ByPhDAndContentHash>, composite_key<

  PageData,

  member<PageData, HostData*,&PageData::phD>,

  member<PageData, uint,&PageData::contentHash>

  >

  >,

  ordered_unique<

  tag<PD::ByPhDAndNormalizedUrl>, composite_key<

  PageData,

  member<PageData, HostData*,&PageData::phD>,

  member<PageData, QString,&PageData::normalizedUrl>

  >

  >,

  ordered_non_unique<

  tag<PD::ByIdFrom>, member<PageData, ulong, &PageData::idFrom>

  >

  >

> PDStore;

typedef PDStore::index<PD::ByBlocked>::type BlockedList;

#endif // PDSTORE_H

application_managers/textfileam. cpp

#include "textfileam. h"

TextFileAM::TextFileAM()

{

  _pdM = _setupDataManager();

  _setupThreads();

  _setupApplicationFinishers();

  _startThreads();

}

int TextFileAM::run()

{

  RCCSettings *psett = RCCSettings::instance();

  QElapsedTimer runTimer;

  runTimer. start();

  while(true)

  {

  _app->processEvents();

  _app->thread()->msleep(1000);

  bool finish = false;

  for (auto pfinisher: _appFs)

  {

  if (pfinisher->needToFinishApplication())

  {

  finish = true;

  break;

  }

  }

  if (finish)

  {

  _stopThreads();

  int crawlingTime = runTimer. elapsed() / 1000;

  QStringList rUNames = psett->value( "ResultUnloaders",

                       "r").toString().split(",");

  for (auto rUName: rUNames)

  {

  ResultUnloader *prU =

                       ObjectCreator::resultUnloader(rUName. trimmed(), _pdM);

  prU->unloadResult();

  delete prU;

  }

  int unloadTime = runTimer. elapsed() / 1000 - crawlingTime;

  qDebug() << "Work is done." << endl

  << "Crawling time: " << crawlingTime << "s." << endl

  << "Unload time: " << unloadTime << "s." << endl

  << "Press Enter to exit";

  std::cin. get();

  break;

  }

  }

  return 0;

}


1 http://www. qt. io

2 http://www. boost. org/doc/libs/1_58_0/libs/multi_index/doc/

3 Триал-версии STATISTICA. http://www. statsoft. ru/products/trial.

Из за большого объема этот материал размещен на нескольких страницах:
1 2 3 4 5 6 7 8 9 10 11