Console.WriteLine("\t-t\tвыполнить распознавание t-текстов");

Console.WriteLine("\t-help\tвызвать справку по команде");

}

else

Console.WriteLine("Программа успешно выполнена!");

}

}

}

Файл TextNormalizer. cs

using System;

using System. Collections. Generic;

using System. Linq;

using System. Text;

using System. IO;

using *****ntime. InteropServices;

namespace JSM_VS

{

class TextNormalizer

{

private struct PROCESS_INFORMATION

{

public IntPtr hProcess;

public IntPtr hThread;

public uint dwProcessId;

public uint dwThreadId;

}

private struct STARTUPINFO

{

public uint cb;

public string lpReserved;

public string lpDesktop;

public string lpTitle;

public uint dwX;

public uint dwY;

public uint dwXSize;

public uint dwYSize;

public uint dwXCountChars;

public uint dwYCountChars;

public uint dwFillAttribute;

public uint dwFlags;

public short wShowWindow;

public short cbReserved2;

public IntPtr lpReserved2;

public IntPtr hStdInput;

public IntPtr hStdOutput;

public IntPtr hStdError;

}

[DllImport("kernel32.dll")]

static extern bool CreateProcess(string lpApplicationName, string lpCommandLine, IntPtr lpProcessAttributes, IntPtr lpThreadAttributes,

bool bInheritHandles, uint dwCreationFlags, IntPtr lpEnvironment,

string lpCurrentDirectory, ref STARTUPINFO lpStartupInfo, out PROCESS_INFORMATION lpProcessInformation);

/// <summary>

/// Процедура нормализации текстов

/// </summary>

/// <param name="source"></param>

/// <param name="destination"></param>

public void Normalize(string source, string destination)

{

DirectoryInfo diSource = new DirectoryInfo(source);

DirectoryInfo diDestination = new DirectoryInfo(destination);

FileInfo[] fiSource = diSource. GetFiles();

for (int i = 0; i < fiSource. Length; i++)

{

STARTUPINFO si = new STARTUPINFO();

PROCESS_INFORMATION pi = new PROCESS_INFORMATION();

string fileInPath = source + "\\" + fiSource[i].Name;

// Опции программы mystem

// n - построчный режим; каждое слово печатается на новой строке

// l - не печатать исходные словоформы, только леммы и граммемы

// i - печатать грамматическую информацию

string options = " - nli ";

string fileOutPath = destination + "\\" + "norm_" + fiSource[i].Name;

CreateProcess("mystem. exe", "mystem. exe " + fileInPath + options + fileOutPath,

IntPtr.Zero, IntPtr.Zero, false, 0, IntPtr.Zero, null, ref si, out pi);

}

}

}

}

Файл CrossValidation. cs

using System;

using System. Collections. Generic;

using System. Linq;

using System. Text;

namespace JSM_VS

{

class CrossValidator

{

/// <summary>

/// Вычисление метрик эффективности анализа

/// </summary>

/// <param name="resForPlusTestCollect"></param>

/// <param name="resForMinusTestCollect"></param>

/// <param name="koeff"></param>

static void CalcKoeff(char[] resForPlusTestCollect, char[] resForMinusTestCollect, ref EffectMeasure koeff)

{

int TP = 0, FP = 0, TN = 0, FN = 0;

for (int i = 0; i < resForPlusTestCollect. Length; i++)

{

if (resForPlusTestCollect[i] == '+')

TP++;

else if (resForPlusTestCollect[i] == '-')

FP++;

}

for (int i = 0; i < resForMinusTestCollect. Length; i++)

{

if (resForMinusTestCollect[i] == '-')

TN++;

else if (resForMinusTestCollect[i] == '+')

FN++;

}

koeff. precission = (double)TP / (double)(TP + FP);

koeff. recall = (double)TP / (double)(TP + FN);

koeff. accuracy = (double)(TP + TN) / (double)(TP + TN + FN + FP);

koeff. f1_measure = 2 * koeff. precission * koeff. recall / (koeff. precission + koeff. recall);

}

/// <summary>

/// Запуск процедуры кросс-валидации

/// </summary>

/// <param name="plusSamples"></param>

/// <param name="minusSamples"></param>

/// <param name="averAffectMeasure"></param>

/// <param name="n"></param>

/// <param name="imbalance"></param>

public void ProcessTexts(TextInfo[] plusSamples, TextInfo[] minusSamples,

ref EffectMeasure averAffectMeasure, int n, double imbalance, int typeFuncResolution)

{

for (int i = 0; i < n; i++)

{

// Тестовая коллекция

TextInfo[] plusTestCollection = null;

TextInfo[] minusTestCollection = null;

// Обучающая коллекция

TextInfo[] plusTrainingCollection = null;

TextInfo[] minusTrainingCollection = null;

int sizePlusBlock = plusSamples. Length / n;

int sizeMinusBlock = minusSamples. Length / n;

if (plusSamples. Length - (i + 1) * sizePlusBlock >= sizePlusBlock &&

minusSamples. Length - (i + 1) * sizeMinusBlock >= sizeMinusBlock)

{

// Инициализация тестовой коллекции

plusTestCollection = new TextInfo[sizePlusBlock];

minusTestCollection = new TextInfo[sizeMinusBlock];

for (int j = 0; j < sizePlusBlock; j++)

plusTestCollection[j] = plusSamples[i * sizePlusBlock + j];

for (int j = 0; j < sizeMinusBlock; j++)

minusTestCollection[j] = minusSamples[i * sizeMinusBlock + j];

// Инициализация обучающей коллекции

plusTrainingCollection = new TextInfo[plusSamples. Length - sizePlusBlock];

minusTrainingCollection = new TextInfo[minusSamples. Length - sizeMinusBlock];

for (int j = 0; j < i * sizePlusBlock; j++)

plusTrainingCollection[j] = plusSamples[j];

for (int j = (i + 1) * sizePlusBlock; j < plusSamples. Length; j++)

plusTrainingCollection[j - sizePlusBlock] = plusSamples[j];

for (int j = 0; j < i * sizeMinusBlock; j++)

minusTrainingCollection[j] = minusSamples[j];

for (int j = (i + 1) * sizeMinusBlock; j < minusSamples. Length; j++)

minusTrainingCollection[j - sizeMinusBlock] = minusSamples[j];

}

else

{

// Инициализация тестовой коллекции

plusTestCollection = new TextInfo[plusSamples. Length - i * sizePlusBlock];

minusTestCollection = new TextInfo[minusSamples. Length - i * sizeMinusBlock];

for (int j = 0; j < plusSamples. Length - i * sizePlusBlock; j++)

plusTestCollection[j] = plusSamples[i * sizePlusBlock + j];

for (int j = 0; j < minusSamples. Length - i * sizeMinusBlock; j++)

minusTestCollection[j] = minusSamples[i * sizeMinusBlock + j];

// Инициализация обучающей коллекции

plusTrainingCollection = new TextInfo[i * sizePlusBlock];

minusTrainingCollection = new TextInfo[i * sizeMinusBlock];

for (int j = 0; j < i * sizePlusBlock; j++)

plusTrainingCollection[j] = plusSamples[j];

for (int j = 0; j < i * sizeMinusBlock; j++)

minusTrainingCollection[j] = minusSamples[j];

}

// Список гипотез

List<Hypothesis> plusHypotheses = new List<Hypothesis>();

List<Hypothesis> minusHypotheses = new List<Hypothesis>();

JsmProcessor jp = new JsmProcessor();

// Индукция

jp. Induction(plusHypotheses, plusTrainingCollection);

jp. Induction(minusHypotheses, minusTrainingCollection);

List<Hypothesis>[] coincidedHypForPlusSamples;

List<Hypothesis>[] coincidedHypForMinusSamples;

// Аналогия

jp. Analogy(plusHypotheses, minusHypotheses, plusTestCollection, out coincidedHypForPlusSamples);

jp. Analogy(plusHypotheses, minusHypotheses, minusTestCollection, out coincidedHypForMinusSamples);

// Обработка примеров классификатором

// '+' - пример положительный

// '-' - пример отрицательный

// 'n' - класс примера не определен

char[] resForPlusTestCollection = new char[plusTestCollection. Length];

char[] resForMinusTestCollection = new char[minusTestCollection. Length];

jp. Classification(resForPlusTestCollection, coincidedHypForPlusSamples, imbalance, typeFuncResolution);

jp. Classification(resForMinusTestCollection, coincidedHypForMinusSamples, imbalance, typeFuncResolution);

EffectMeasure koeff = new EffectMeasure();

CalcKoeff(resForPlusTestCollection, resForMinusTestCollection, ref koeff);

if (i < n)

{

averAffectMeasure. precission += koeff. precission;

averAffectMeasure. recall += koeff. recall;

averAffectMeasure. f1_measure += koeff. f1_measure;

averAffectMeasure. accuracy += koeff. accuracy;

}

if (i == n - 1)

{

averAffectMeasure. precission = averAffectMeasure. precission / n;

averAffectMeasure. recall = averAffectMeasure. recall / n;

averAffectMeasure. f1_measure = averAffectMeasure. f1_measure / n;

averAffectMeasure. accuracy = averAffectMeasure. accuracy / n;

}

}

}

}

}

Файл JsmProcessor. cs

Из за большого объема этот материал размещен на нескольких страницах:
1 2 3 4 5 6 7 8 9