}

  _mutexPA. unlock();

}

void BMICDM::_removePAsByToAndFrom(const QVector<ulong> &pDIdsToRemove)

{

  _mutexPA. lock();

  typedef PAStore::index<PA::ByTo>::type::iterator ByToIt;

  PAStore::index<PA::ByTo>::type & to = _storePA. get<PA::ByTo>();

  for (auto& id: pDIdsToRemove)

  {

  ByToIt it0, it1;

  std::tie(it0,it1) = to. equal_range(id);

  to. erase(it0, it1);

  }

  typedef PAStore::index<PA::ByFrom>::type::iterator ByFromIt;

  PAStore::index<PA::ByFrom>::type & from = _storePA. get<PA::ByFrom>();

  for (auto& id: pDIdsToRemove)

  {

  ByFromIt it0, it1;

  std::tie(it0,it1) = from. equal_range(id);

  from. erase(it0, it1);

  }

  _mutexPA. unlock();

}

PageData BMICDM::_findById(ulong id)

{

  auto &pDsById = _storePD. get<PD::ById>();

  PDStore::index<PD::ById>::type::iterator it0, it1;

  it0 = pDsById. find(id);

  it1 = pDsById. end();

  if (it0 != it1)

  {

  return *it0;

  }

  return PageData();

}

void BMICDM::insertPAs(const QVector<PageArc> &pAs)

{

  _mutexPA. lock();

  for (const PageArc &pA: pAs)

  {

  _storePA. insert(pA);

  }

  _mutexPA. unlock();

}

data_managers/datamanager. h

#ifndef DATAMANAGER_H

#define DATAMANAGER_H

#include "includes. h"

#include "rccconsts. h"

НЕ нашли? Не то? Что вы ищете?

#include "data_structures/pagedata. h"

#include "data_structures/pagearc. h"

#include "data_structures/pdlogitem. h"

#include "data_structures/pdandpacreatedata. h"

typedef QVector<PageArc> PAContainer;

typedef QQueue<PDLogItem> PDLIContainer;

typedef QVector<PDPACreateData> PDPAContainer;

class DataManager

{

public:

  enum LogType

  {

  CommonLog,

  ErrorLog

  };

  DataManager();

  virtual ~DataManager();

  virtual bool addHost(HostData* phD) = 0;

  virtual bool allWorkIsDone() = 0;

  virtual QVector<HostData*> getHosts() = 0;

  virtual void getPDsStartingFromId(PDContainer &result, const ulong &id,

  const int& countMultiplier = RCCConsts::PAGEDATA_CHUNK_SIZE_COUNT_MULTIPLIER) = 0;

  virtual void getPAsStartingFromId(PAContainer &result, const ulong &id,

  const int& count = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;

  virtual void getFreePDsForDownloading(PDContainer &result, HostData *phD,

  const int& countMultiplier = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;

  virtual void getFreePDsForParsing(PDContainer &result, const int& count = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;

  virtual void insertPDs(const PDContainer &pDs) = 0;

  virtual void addPDsAndPAs(const PDPAContainer &pDPAs) = 0;

  virtual void updatePDs(const PDContainer &pDs) = 0;//метод должен удалять, если стоит remove

  virtual std::pair<bool, PageData> contentSeen(HostData* phD, const QString& content) = 0;

  virtual void insertPAs(const PAContainer& pAs) = 0;

  virtual void changePAsByTo(const int &to, const int &newTo) = 0;

  //Лог

  virtual PDLIContainer* getLog(const DataManager::LogType &logType, const int size = INT_MAX) = 0;

  virtual void insertLogItem(const DataManager::LogType &logType, const PDLogItem& pDLI) = 0;

  virtual void insertLogItems(const DataManager::LogType &logType, const QVector<PDLogItem> &items) = 0;

};

#endif // DATAMANAGER_H

data_managers/datamanager. cpp

#include "datamanager. h"

DataManager::DataManager()

{

}

DataManager::~DataManager()

{

}

data_structures/hostdata. h

#ifndef HOSTDATA_H

#define HOSTDATA_H

#include <QString>

#include <QUrl>

#include "rccconsts. h"

#include "robotstxtrule. h"

struct HostData

{

  HostData();

  HostData(const QString& url, const int& maxDownloadsAtTime,

  const int& crawlDelay, const int& maxCrawlLevel);

  QString host;

  uint crawlDelay;

  QString protocol;

  QString str;

  uint maxDownloadsAtTime;

  RTRContainer rules;

  int maxCrawlLevel;

  int port;

};

#endif // HOSTDATA_H

data_structures/ hostdata. cpp

#include "hostdata. h"

HostData::HostData()

{

}

HostData::HostData(const QString& url, const int& maxDownloadsAtTime,

  const int& downloadDelay, const int& maxCrawlLevel):

  crawlDelay(downloadDelay),

  maxDownloadsAtTime(maxDownloadsAtTime),

  maxCrawlLevel(maxCrawlLevel)

{

  QUrl u(url);

  protocol = u. scheme();

  host = u. host().replace("/","");

  port = u. port();

  str = protocol + "://" + host;

  if (port && port!= 80)

  str += ":" + QString::number(port);

}

data_structures/ hostdownloaddata. h

#ifndef HOSTDOWNLOADDATA_H

#define HOSTDOWNLOADDATA_H

#include "pagedata. h"

struct HostDownloadData

{

  HostDownloadData();


  int curDownloadsCount;

  PDContainer pDs;

  PDContainer::iterator pDsIt;

};

#endif // HOSTDOWNLOADDATA_H

data_structures/ hostdownloaddata. cpp

#include "hostdownloaddata. h"

HostDownloadData::HostDownloadData():

  curDownloadsCount(0)

{

}

data_structures/pagearc. h

#ifndef PAGEARC_H

#define PAGEARC_H

#include "includes. h"

struct PageArc

{

  PageArc();

  PageArc(const ulong &from, const ulong &to);

  PageArc(const ulong &id, const ulong &from, const ulong &to);

  ulong id;

  ulong from;

  ulong to;

  static ulong newIdUnsafe();

  static ulong newIdSafe();

  static ulong lastId;

  QString toString() const;

};

#endif // PAGEARC_H

data_structures/ pagearc. cpp

#include "pagearc. h"

ulong PageArc::lastId = 0;

PageArc::PageArc():

  id(0)

{}

PageArc::PageArc(const ulong &from, const ulong &to):

  id(0),

  from(from),

  to(to)

{

}

PageArc::PageArc(const ulong &id, const ulong &from, const ulong &to):

  id(id),

  from(from),

  to(to)

{

}

ulong PageArc::newIdUnsafe()

{

  lastId++;

  return lastId;

}

ulong PageArc::newIdSafe()

{

  static QMutex mut;

  mut. lock();

  ulong res = ++lastId;

  mut. unlock();

  return res;

}

QString PageArc::toString() const

{

  return QString::number(id) + ";" + QString::number(from)

  + ";" + QString::number(to);

}

data_structures/pagedata. h

#ifndef PAGEDATA_H

#define PAGEDATA_H

#include "includes. h"

#include "hostdata. h"

struct PageData

{

  PageData();//не присваивает id

  PageData(const ulong &id, const QString& url, HostData *phD);

  PageData(const ulong &id, const QString& url, const QString& normalizedUrl, HostData *phD);

  QString toString() const;


  static ulong newIdUnsafe();

  static ulong newIdSafe();

  static uint hashContent(const QString& content);

  static ulong lastId;

  static QString normalizeUrl(const QString &urlStr);

  ulong id;

  HostData* phD;

  QString url;

  QString normalizedUrl;

  ulong idFrom;

  uint level;

  uint outDegree;

  bool blocked;

  bool downloaded;

  bool parsed;

  QString content;

  uint contentHash;

  uint errorCode;

  ulong replaceId;

  uint downloadAttempts;

  bool remove;

};

typedef QVector<PageData> PDContainer;

std::size_t hash_value(const QString &x);

#endif // PAGEDATA_H

data_structures/ pagedata. cpp

#include "pagedata. h"

ulong PageData::lastId = 0;

PageData::PageData():

  id(0),

  url(""),

  normalizedUrl(""),

  level(0),

  outDegree(0),

  downloaded(false),

  parsed(false),

  blocked(false),

  phD(nullptr),

  content(""),

  contentHash(0),

  errorCode(0),

  remove(false),

  replaceId(0),

  downloadAttempts(0),

  idFrom(0)

{

}

PageData::PageData(const ulong &id, const QString &url, HostData *phD):

  id(id),

  url(url),

  normalizedUrl(""),

  level(0),

  outDegree(0),

  downloaded(false),

  parsed(false),

  blocked(false),

  phD(phD),

  content(""),

  contentHash(0),

  errorCode(0),

  remove(false),

  replaceId(0),

  downloadAttempts(0),

  idFrom(0)

{

}

PageData::PageData(const ulong &id, const QString &url, const QString &normalizedUrl, HostData *phD):

  id(id),

  url(url),

  normalizedUrl(normalizedUrl),

  level(0),

  outDegree(0),

  downloaded(false),

  parsed(false),

  blocked(false),

  phD(phD),

  content(""),

  contentHash(0),

  errorCode(0),

  remove(false),

  replaceId(0),

  downloadAttempts(0),

  idFrom(0)

{

}

QString PageData::toString() const

{

  return QString::number(id) +";" + phD->str + ";\"" + normalizedUrl

  + "\";" + QString::number(idFrom) + "";

}

ulong PageData::newIdUnsafe()

{

  lastId++;

  return lastId;

}

ulong PageData::newIdSafe()

{

  static QMutex mut;

  mut. lock();

  ulong res = ++lastId;

  mut. unlock();

Из за большого объема этот материал размещен на нескольких страницах:
1 2 3 4 5 6 7 8 9