با سلام و وقت بخیر
کدی توی پایتون زدم و از سایکیت لرن (آناکندا ورژن 3 نصب کردم)استفاده می کنم.منتها چنین خطایی به من میده
AttributeError: module 'lda' has no attribute 'LDA'
lda روی سیستم نصبه(pip install lda) و همین کد روی لب تاب دیگه جواب میده منتها روی لب تاب من نه!!!
from __future__ import print_function
from time import time
import codecs
import sklearn
import gensim
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.datasets import fetch_20newsgroups
import numpy as np
n_samples = 2000
n_features = 1000
n_topics = 10
n_top_words = 20
def print_top_words(model, feature_names, n_top_words):
for topic_idx, topic in enumerate(model.components_):
print("Topic #%d:" % topic_idx)
print(" ".join([feature_names[i]
for i in topic.argsort()[:-n_top_words - 1:-1]]))
print("Loading dataset...")
t0 = time()
dataset = sklearn.datasets.load_files('C:/Users/parya/Desktop/hhhhhh', description=None, categories=None, load_content=True, shuffle=True, encoding='utf-8', decode_error='strict', random_state=0)
data_samples = dataset.data
print("done in %0.3fs." % (time() - t0))
file = codecs.open('E:/stop2 (2).txt','r','utf-8')
stopwords = file.read().split('\n')
# Use tf-idf features for NMF.
print("Extracting tf-idf features for NMF...")
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, #max_features=n_features,
t0 = time()
tfidf = tfidf_vectorizer.fit_transform(data_samples)
print("done in %0.3fs." % (time() - t0))
# Use tf (raw term count) features for LDA.
print("Extracting tf features for LDA...")
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, max_features=n_features,
t0 = time()
tf = tf_vectorizer.fit_transform(data_samples)
print("done in %0.3fs." % (time() - t0))
print("done in %0.3fs." % (time() - t0))
#print("\nTopics in NMF model:")
tfidf_feature_names = tfidf_vectorizer.get_feature_names()
#print_top_words(nmf, tfidf_feature_names, n_top_words)
print("Fitting LDA models with tf features, n_samples=%d and n_features=%d..."
% (n_samples, n_features))
import lda
lda = lda.LDA(n_topics=20, n_iter=1500, random_state=1)
t0 = time()
print("done in %0.3fs." % (time() - t0))
print("\nTopics in LDA model:")
tf_feature_names = tf_vectorizer.get_feature_names()
print_top_words(lda, tf_feature_names, n_top_words)
from time import time
import codecs
import sklearn
import gensim
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.datasets import fetch_20newsgroups
import numpy as np
n_samples = 2000
n_features = 1000
n_topics = 10
n_top_words = 20
def print_top_words(model, feature_names, n_top_words):
for topic_idx, topic in enumerate(model.components_):
print("Topic #%d:" % topic_idx)
print(" ".join([feature_names[i]
for i in topic.argsort()[:-n_top_words - 1:-1]]))
print("Loading dataset...")
t0 = time()
dataset = sklearn.datasets.load_files('C:/Users/parya/Desktop/hhhhhh', description=None, categories=None, load_content=True, shuffle=True, encoding='utf-8', decode_error='strict', random_state=0)
data_samples = dataset.data
print("done in %0.3fs." % (time() - t0))
file = codecs.open('E:/stop2 (2).txt','r','utf-8')
stopwords = file.read().split('\n')
# Use tf-idf features for NMF.
print("Extracting tf-idf features for NMF...")
tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, #max_features=n_features,
t0 = time()
tfidf = tfidf_vectorizer.fit_transform(data_samples)
print("done in %0.3fs." % (time() - t0))
# Use tf (raw term count) features for LDA.
print("Extracting tf features for LDA...")
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, max_features=n_features,
t0 = time()
tf = tf_vectorizer.fit_transform(data_samples)
print("done in %0.3fs." % (time() - t0))
print("done in %0.3fs." % (time() - t0))
#print("\nTopics in NMF model:")
tfidf_feature_names = tfidf_vectorizer.get_feature_names()
#print_top_words(nmf, tfidf_feature_names, n_top_words)
print("Fitting LDA models with tf features, n_samples=%d and n_features=%d..."
% (n_samples, n_features))
import lda
lda = lda.LDA(n_topics=20, n_iter=1500, random_state=1)
t0 = time()
print("done in %0.3fs." % (time() - t0))
print("\nTopics in LDA model:")
tf_feature_names = tf_vectorizer.get_feature_names()
print_top_words(lda, tf_feature_names, n_top_words)
دوستان من خیلی وقته دیگه پایتون کار نمیکنم و این وبلاگ واسه چند سال پیشه که همچنان سوال مطرح میشه... لطفا اگه میدونید جواب بدید