#define DATAMANAGER_H
#include "includes. h"
#include "rccconsts. h"
#include "data_structures/pagedata. h"
#include "data_structures/pagearc. h"
#include "data_structures/pdlogitem. h"
#include "data_structures/pdandpacreatedata. h"
typedef QVector<PageArc> PAContainer;
typedef QQueue<PDLogItem> PDLIContainer;
typedef QVector<PDPACreateData> PDPAContainer;
class DataManager
{
public:
enum LogType
{
CommonLog,
ErrorLog
};
DataManager();
virtual ~DataManager();
virtual bool addHost(HostData* phD) = 0;
virtual bool allWorkIsDone() = 0;
virtual QVector<HostData*> getHosts() = 0;
virtual void getPDsStartingFromId(PDContainer &result, const ulong &id,
const int& countMultiplier = RCCConsts::PAGEDATA_CHUNK_SIZE_COUNT_MULTIPLIER) = 0;
virtual void getPAsStartingFromId(PAContainer &result, const ulong &id,
const int& count = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;
virtual void getFreePDsForDownloading(PDContainer &result, HostData *phD,
const int& countMultiplier = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;
virtual void getFreePDsForParsing(PDContainer &result, const int& count = RCCConsts::PAGEDATA_DEFAULT_CHUNK_SIZE) = 0;
virtual void insertPDs(const PDContainer &pDs) = 0;
virtual void addPDsAndPAs(const PDPAContainer &pDPAs) = 0;
virtual void updatePDs(const PDContainer &pDs) = 0;//метод должен удалять, если стоит remove
virtual std::pair<bool, PageData> contentSeen(HostData* phD, const QString& content) = 0;
virtual void insertPAs(const PAContainer& pAs) = 0;
virtual void changePAsByTo(const int &to, const int &newTo) = 0;
//Лог
virtual PDLIContainer* getLog(const DataManager::LogType &logType, const int size = INT_MAX) = 0;
virtual void insertLogItem(const DataManager::LogType &logType, const PDLogItem& pDLI) = 0;
virtual void insertLogItems(const DataManager::LogType &logType, const QVector<PDLogItem> &items) = 0;
};
#endif // DATAMANAGER_H
data_structures/pdstore. h
#ifndef PDSTORE_H
#define PDSTORE_H
#include "includes. h"
#include "pagedata. h"
using namespace boost::multi_index;
namespace PD
{
struct ById{};
struct ByBlocked{};
struct ByPhDAndBlockedAndDownloaded{};
struct ByBlockedAndDownloadedAndParsedAndErrorCode{};
struct ByPhDAndContentHash{};
struct ByUrlAndPhD{};
struct ByPhDAndNormalizedUrl{};
struct ByIdFrom{};
struct BlockedChange : public std::unary_function<PageData, void>
{
bool b;
BlockedChange(const bool& _b) : b(_b) {}
void operator()(PageData& pd)
{
pd. blocked = b;
}
};
struct LevelChange : public std::unary_function<PageData, void>
{
uint l;
LevelChange(const uint& _l) : l(_l) {}
void operator()(PageData& pd)
{
pd. level = l;
}
};
}
typedef boost::multi_index_container<PageData,
indexed_by<
ordered_non_unique<
tag<PD::ById>, member<PageData, ulong, &PageData::id>
>,
ordered_non_unique<
tag<PD::ByBlocked>, member<PageData, bool, &PageData::blocked>
>,
ordered_non_unique<
tag<PD::ByPhDAndBlockedAndDownloaded>, composite_key<
PageData,
member<PageData, HostData*,&PageData::phD>,
member<PageData, bool,&PageData::blocked>,
member<PageData, bool,&PageData::downloaded>
>
>,
ordered_non_unique<
tag<PD::ByBlockedAndDownloadedAndParsedAndErrorCode>, composite_key<
PageData,
member<PageData, bool,&PageData::blocked>,
member<PageData, bool,&PageData::downloaded>,
member<PageData, bool,&PageData::parsed>,
member<PageData, uint,&PageData::errorCode>
>
>,
ordered_non_unique<
tag<PD::ByPhDAndContentHash>, composite_key<
PageData,
member<PageData, HostData*,&PageData::phD>,
member<PageData, uint,&PageData::contentHash>
>
>,
ordered_unique<
tag<PD::ByPhDAndNormalizedUrl>, composite_key<
PageData,
member<PageData, HostData*,&PageData::phD>,
member<PageData, QString,&PageData::normalizedUrl>
>
>,
ordered_non_unique<
tag<PD::ByIdFrom>, member<PageData, ulong, &PageData::idFrom>
>
>
> PDStore;
typedef PDStore::index<PD::ByBlocked>::type BlockedList;
#endif // PDSTORE_H
application_managers/textfileam. cpp
#include "textfileam. h"
TextFileAM::TextFileAM()
{
_pdM = _setupDataManager();
_setupThreads();
_setupApplicationFinishers();
_startThreads();
}
int TextFileAM::run()
{
RCCSettings *psett = RCCSettings::instance();
QElapsedTimer runTimer;
runTimer. start();
while(true)
{
_app->processEvents();
_app->thread()->msleep(1000);
bool finish = false;
for (auto pfinisher: _appFs)
{
if (pfinisher->needToFinishApplication())
{
finish = true;
break;
}
}
if (finish)
{
_stopThreads();
int crawlingTime = runTimer. elapsed() / 1000;
QStringList rUNames = psett->value( "ResultUnloaders",
"r").toString().split(",");
for (auto rUName: rUNames)
{
ResultUnloader *prU =
ObjectCreator::resultUnloader(rUName. trimmed(), _pdM);
prU->unloadResult();
delete prU;
}
int unloadTime = runTimer. elapsed() / 1000 - crawlingTime;
qDebug() << "Work is done." << endl
<< "Crawling time: " << crawlingTime << "s." << endl
<< "Unload time: " << unloadTime << "s." << endl
<< "Press Enter to exit";
std::cin. get();
break;
}
}
return 0;
}
1 http://www. qt. io
2 http://www. boost. org/doc/libs/1_58_0/libs/multi_index/doc/
3 Триал-версии STATISTICA. http://www. statsoft. ru/products/trial.
|
Из за большого объема этот материал размещен на нескольких страницах:
1 2 3 4 5 6 7 8 9 10 11 |


