سلام وقتتون بخیر ...
ببخشید من کدی برای دستهبندی متن زدم منتها یک اروری بهم میده! برای component هر مقداری قرار میدم با این ارور مواجه میشم لطفا راهنماییم کنید با تشکر
کد:
import sklearn
from sklearn import datasets
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import Normalizer
from sklearn.pipeline import make_pipeline
from sklearn.random_projection import sparse_random_matrix
print("linear svm")
data_train =sklearn.datasets.load_files('F:/arshad/out put1', description=None, categories=None, load_content=True, shuffle=True, encoding='utf-8', decode_error='strict', random_state=0)
data_test = sklearn.datasets.load_files('C:/Users/parya/Desktop/data/100-1/test', description=None, categories=None, load_content=True, shuffle=True, encoding='utf-8', decode_error='strict', random_state=0)
categories = data_train.target_names
y_train, y_test = data_train.target, data_test.target
import codecs
file = codecs.open('F:/s.txt','r','utf-8')
stopwords = file.read().split('\n')
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(sublinear_tf=True,max_df = 0.5 ,stop_words=stopwords)
X_train = vectorizer.fit_transform(data_train.data)
X_train
svd = TruncatedSVD(n_components=380, algorithm='randomized', n_iter=100, random_state=None, tol=0.0)
normalizer = Normalizer(copy=False)
lsa = make_pipeline(svd, normalizer)
X = lsa.fit_transform(X_train)
from sklearn.svm import LinearSVC
LinearSVC(loss='l2', penalty='l2', dual=False, tol=1e-3)
clf = LinearSVC().fit(X, y_train)
X_test = vectorizer.transform(data_test.data)
predicted = clf.predict(X_test)
import numpy as np
print(np.mean(predicted == y_test))
from sklearn import metrics
print(metrics.classification_report(y_test, predicted,target_names=data_test.target_names))
metrics.confusion_matrix(y_test, predicted)
ارور برنامه:
ValueError: X has 2430 features per sample; expecting 150