Keras One Hot 编码内存管理 - 最好的出路

2024-02-21

我知道这个问题已经以不同的方式得到了解答past https://stackoverflow.com/questions/41058780/python-one-hot-encoding-for-huge-data。但我无法弄清楚并适合我的代码,需要帮助。我正在使用康奈尔电影语料库 https://www.cs.cornell.edu/~cristian/memorability_files/cornell_movie_quotes_corpus.zip作为我的数据集。尝试为聊天机器人训练 LSTM 模型是最终的期望。但我坚持最初的一种热编码并且内存不足。请注意,我正在训练的虚拟机有 86GB 内存,但仍然存在问题。在 nmt_special_utils_mod.py 中,一种热门编码超出了分配的内存,我无法通过该阶段。执行这些行的任何替代方法都会有所帮助,而不会失去功能

Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))

所有代码如下以使问题清楚

import_corpus_mod.py- 变化1:更新了不太频繁的单词删除

def data_load():
TrainDataSetPath = 'D:\\Script\\Python\\NLP\\chatbotSeq2SeqWithAtt\\ChatBot\\'

####initializing libraries####
#import numpy as np
#import tensorflow as tf
import re
#import time

########### Data Pre-processing Part 1##########

def clean_text(text):
    '''The function will clean known texts and make it more meaningful'''
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"let's", "let us", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"howz", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"don't", "do not", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"wouldn't", "would not", text)
    text = re.sub(r"wasn't", "was not", text)
    text = re.sub(r"haven't", "have not", text)
    text = re.sub(r"\s+"," ",text)
    text = re.sub(r"[-()\"#/@;:<>+=~|{}.?,]", "", text)
    #####Add more below this line######
    #####Add more above this line######
    return text

lines = open(TrainDataSetPath+'movie_lines.txt', encoding='utf-8', errors='ignore').read().split('\n')
conversations = open(TrainDataSetPath+'movie_conversations_short.txt', encoding='utf-8', errors='ignore').read().split('\n')

#Create dictionary which maps each line with its corresponding ID

id2line = {}
for line in lines:
    _line = line.split(' +++$+++ ')
    if len(_line) == 5:
        id2line[_line[0]] = _line[4]

#Create list of all conversation
conversations_ids = []
for conversation in conversations[:-1]:             #the last line in conversation is blank hence -1
    #Split then pick last part[-1] which is conversation. Then Removing square bracket by [1:-1] and then replacing quotes and space
    _conversation = conversation.split(' +++$+++ ')[-1][1:-1].replace("'","").replace(" ","")
    # Append to form a list of list separating by comma
    conversations_ids.append(_conversation.split(","))

#Separating the question and answer - assuming the first is the question second is the answer in a conversation
questions = []
answers = []
threshold = 5   #If more than 15 counts of words

for conversation in conversations_ids:
    for i in range(len(conversation)-1):
        questions.append(id2line[conversation[i]])
        answers.append(id2line[conversation[i+1]])

# Cleaning all questions
clean_questions = []
for question in questions:
    clean_questions.append(clean_text(question))



# Cleaning all answers
clean_answers = []
for answer in answers:
    clean_answers.append(clean_text(answer))

# Creating a dictionary that maps each word to its number of occurrence
word2count = {}
for question in clean_questions:
    for word in question.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1
for answer in clean_answers:
    for word in answer.split():
        if word not in word2count:
            word2count[word] = 1
        else:
            word2count[word] += 1

#Create dictionary of words which has more occurrence than threshold

for k in list(word2count):
    if word2count[k] < threshold:
        del word2count[k]

cleanest_questions, cleanest_answers, keys_list = [], [], list(word2count.keys())


for answers in clean_answers:
    ans = []
    for word in answers.split():
        if word in keys_list:
            ans.append(word)
        else:
            ans.append('<unk>')
    cleanest_answers.append(' '.join(ans))

for question in clean_questions:
    ques = []
    for word in question.split():
        if word in keys_list:
            ques.append(word)
        else:
            ques.append('<unk>')
    cleanest_questions.append(' '.join(ques))

return cleanest_questions, cleanest_answers

nmt_data_load_asmain_words.py变化1:更新不太频繁的单词删除

from tqdm import tqdm
from import_corpus_mod import data_load

def load_dataset(clean_questions, clean_answers):
    """
        Loads a dataset with m examples and vocabularies
        :m: the number of examples to generate
    """
    human_vocab = set()
    machine_vocab = set()
    dataset = []
    lines = len(clean_questions)

    for i in tqdm(range(lines)):
        hu, mc = clean_questions[i], clean_answers[i]
        if hu is not None:
            dataset.append((hu, mc))
            human_vocab.update(set(hu.split()))
            machine_vocab.update(set(mc.split()))

    human = dict(zip(sorted(human_vocab) + ['<pad>'], 
                     list(range(len(human_vocab) + 1))))
    #human = dict(zip(sorted(human_vocab) + ['<pad>'], 
                     #list(range(len(human_vocab) + 1))))
    #human = dict(zip(sorted(human_vocab), 
                     #list(range(len(human_vocab)))))
    machine = dict(zip(sorted(machine_vocab) + ['<pad>'], 
                     list(range(len(machine_vocab) + 1))))
    #machine = dict(zip(sorted(machine_vocab) + ['<pad>'], 
                     #list(range(len(machine_vocab) + 1))))
    inv_machine = {v:k for k,v in machine.items()}
    inv_human = {p:q for q,p in human.items()}

    return dataset, human, machine, inv_machine, inv_human




clean_questions, clean_answers = data_load()
dataset, human_vocab, machine_vocab, inv_machine_vocab, inv_human_vocab = load_dataset(clean_questions, clean_answers)

nmt_special_utils_mod.py

import numpy as np
from keras.utils import to_categorical
import keras.backend as K
import matplotlib.pyplot as plt
import sys

# Initiate a list to store integer version of sentences
X_into_int = []
Y_into_int = []

def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):

    X, Y = zip(*dataset)

    X = np.asarray([string_to_int(i, Tx, human_vocab) for i in X])

    Y = [string_to_int(t, Ty, machine_vocab) for t in Y]
    Xoh, Yoh = [], []
    Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
    Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))

    return X, np.array(Y), Xoh, Yoh

def string_to_int(line, length, vocab):
    #print("hello- inside function")
    """
    Converts all strings in the vocabulary into a list of integers representing the positions of the
    input string's characters in the "vocab"

    Arguments:
    string -- input string, e.g. 'Hello how are you'
    length -- the number of time steps you'd like, determines if the output will be padded or cut
    vocab -- vocabulary, dictionary used to index every character of your "string"

    Returns:
    rep -- list of integers (or '<unk>') (size = length) representing the position of the string's character in the vocabulary
    """
    '''    
    #make lower to standardize
    for string in listofstring:
        string = string.lower()
        string = string.replace(',','')

        if len(string) > length:
            string = string[:length]

        rep = list(map(lambda x: vocab.get(x, '<unk>'), string))

        if len(string) < length:
            rep += [vocab['<pad>']] * (length - len(string))

    #print (rep)
    return rep
    '''
    newlist = []
    if len(line.split()) > length:
        line = line.split()
        for i in range(length):
            newlist.append(line[i])
        line = ' '.join(newlist)

    else:
        line = line + ' <pad>' * (length - len(line.split()))
        #print(line)
        #print("hello- inside padded")
    #words_into_int = []
    ints = []
    for word in line.split():
        if word not in vocab:
            ints.append(vocab['<unk>'])
        else:
            ints.append(vocab[word])
            #print("hello- inside append if loop")
    #words_into_int.append(ints)
    #words_into_int = ",".join(x for x in words_into_int)
    return ints        

def int_to_string(ints, inv_vocab):
    """
    Output a machine readable list of characters based on a list of indexes in the machine's vocabulary

    Arguments:
    ints -- list of integers representing indexes in the machine's vocabulary
    inv_vocab -- dictionary mapping machine readable indexes to machine readable characters 

    Returns:
    l -- list of characters corresponding to the indexes of ints thanks to the inv_vocab mapping
    """

    l = [inv_vocab[i] for i in ints]
    return l


EXAMPLES = ['3 May 1979', '5 Apr 09', '20th February 2016', 'Wed 10 Jul 2007']

def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')


def plot_attention_map(model, input_vocabulary, inv_output_vocabulary, text, n_s = 128, num = 6, Tx = 30, Ty = 10):
    """
    Plot the attention map.

    """
    attention_map = np.zeros((10, 30))
    Ty, Tx = attention_map.shape

    s0 = np.zeros((1, n_s))
    c0 = np.zeros((1, n_s))
    layer = model.layers[num]

    encoded = np.array(string_to_int(text, Tx, input_vocabulary)).reshape((1, 30))
    encoded = np.array(list(map(lambda x: to_categorical(x, num_classes=len(input_vocabulary)), encoded)))

    f = K.function(model.inputs, [layer.get_output_at(t) for t in range(Ty)])
    r = f([encoded, s0, c0])

    for t in range(Ty):
        for t_prime in range(Tx):
            attention_map[t][t_prime] = r[t][0,t_prime,0]

    # Normalize attention map
#     row_max = attention_map.max(axis=1)
#     attention_map = attention_map / row_max[:, None]

    prediction = model.predict([encoded, s0, c0])

    predicted_text = []
    for i in range(len(prediction)):
        predicted_text.append(int(np.argmax(prediction[i], axis=1)))

    predicted_text = list(predicted_text)
    predicted_text = int_to_string(predicted_text, inv_output_vocabulary)
    text_ = list(text)

    # get the lengths of the string
    input_length = len(text)
    output_length = Ty

    # Plot the attention_map
    plt.clf()
    f = plt.figure(figsize=(8, 8.5))
    ax = f.add_subplot(1, 1, 1)

    # add image
    i = ax.imshow(attention_map, interpolation='nearest', cmap='Blues')

    # add colorbar
    cbaxes = f.add_axes([0.2, 0, 0.6, 0.03])
    cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal')
    cbar.ax.set_xlabel('Alpha value (Probability output of the "softmax")', labelpad=2)

    # add labels
    ax.set_yticks(range(output_length))
    ax.set_yticklabels(predicted_text[:output_length])

    ax.set_xticks(range(input_length))
    ax.set_xticklabels(text_[:input_length], rotation=45)

    ax.set_xlabel('Input Sequence')
    ax.set_ylabel('Output Sequence')

    # add grid and legend
    ax.grid()

    #f.show()

    return attention_map

nmt_code_mod.py主要代码

# -*- coding: utf-8 -*-
"""
Created on Tue Apr 10 16:31:44 2018

@author: Anirban
"""

from keras.layers import Bidirectional, Concatenate, Dot, Input, LSTM
from keras.layers import RepeatVector, Dense, Activation
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import Model
import keras.backend as K
import numpy as np
from nmt_data_load_asmain_words import load_dataset
from import_corpus_mod import data_load
from nmt_special_utils_mod import *

epochs = 50


clean_questions, clean_answers = data_load()
dataset, human_vocab, machine_vocab, inv_machine_vocab, inv_human_vocab = load_dataset(clean_questions, clean_answers)

m = len(clean_questions)
Tx = 8
Ty = 8
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)

print("X.shape:", X.shape)
print("Y.shape:", Y.shape)
print("Xoh.shape:", Xoh.shape)
print("Yoh.shape:", Yoh.shape)

# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(20, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded from nmt_special_utils
dotor = Dot(axes = 1)

def one_step_attention(a, s_prev):
    """ 
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights 
    "alphas" and the hidden states "a" of the Bi-LSTM. 

    Arguments: 
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a) 
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s) 

    Returns: 
    context -- context vector, input of the next (post-attetion) LSTM cell 
    """  

    ### START CODE HERE ###  
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
    s_prev = repeator(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
    concat = concatenator([a,s_prev]) 
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)  
    e = densor1(concat)  
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)  
    energies = densor2(e)  
    # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)  
    alphas = activator(energies)  
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)  
    context = dotor([alphas,a])  
    ### END CODE HERE ###  

    return context 

n_a = 32
n_s = 64
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(len(machine_vocab), activation=softmax)

def model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    human_vocab_size -- size of the python dictionary "human_vocab"
    machine_vocab_size -- size of the python dictionary "machine_vocab"

    Returns:
    model -- Keras model instance
    """

    # Define the inputs of your model with a shape (Tx,)
    # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
    X = Input(shape=(Tx, human_vocab_size))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0

    # Initialize empty list of outputs
    outputs = []

    ### START CODE HERE ###

    # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line)
    a = Bidirectional(LSTM(n_a, return_sequences=True),input_shape=(m, Tx, n_a*2))(X)

    # Step 2: Iterate for Ty steps
    for t in range(Ty):

        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)
        context = one_step_attention(a, s)

        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)
        s, _, c = post_activation_LSTM_cell(context,initial_state = [s, c])

        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)
        out = output_layer(s)

        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
        outputs.append(out)

    # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line)
    model = Model(inputs=[X,s0,c0],outputs=outputs)

    ### END CODE HERE ###

    return model

model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))
opt = Adam(lr=0.05, beta_1=0.9, beta_2=0.999,decay=0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])

s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))

model.fit([Xoh, s0, c0], outputs, epochs=epochs, batch_size=5)

EXAMPLES = ['can we make this quick  roxanne korrine and andrew barrett are having an incredibly horrendous public break up on the quad  again'
            ,'the thing is cameron  i am at the mercy of a particularly hideous breed of loser  my sister  i cannot date until she does'
            ,'Hello how are you']
#EXAMPLES = ['13 May 1979', 'Tue 11 Jul 2007','Saturday May 9 2018', 'March 3 2001','March 3rd 2001', '1 March 2001','23 May 2017']
for example in EXAMPLES:

    source = np.asarray([string_to_int(example, Tx, human_vocab)])
    #need a try block here to prevent errors if vocab is small and example has characters not in the vocab
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source)))   #.swapaxes(0,1)
    prediction = model.predict([source, s0, c0])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    pads = output.count('<pad>')
    output = output[0:(len(output)-pads)]
    print("source:", example)
    print("output:", ' '.join(output))

注意:该代码是 2016 年非常著名的研究论文的代码,它将任何日期时间转换为计算机可理解的日期时间。我试图将其重新用于我们的聊天机器人 - Seq2Seq 和注意力模型(双向)。该代码正在运行 - 只是电影语料库如果加载到 1000 个对话中它就可以运行。当您加载完整语料库时,由于内存过载而失败

EDIT

感谢您在这个问题上的协作努力 - 非常感谢您在浏览代码并试图找到最佳解决方案时所付出的努力。按照您的指示,我已经更新了import_corpus_mod.py要合并阈值= 5,并且从一开始就将最小频繁的单词转换为小于5到没有空间。这一变化迫使另一个小变化nmt_data_load_asmain_words.py删除那里添加的 。

现在根据另一点和您共享的代码 - 我在中列出了以下几行nmt_special_utils_mod.py

#Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
#Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))

并立即根据您的指导更改输入?

Xi = Input(shape=(Tx,))
X  = Embedding( human_vocab_size, 100, embeddings_initializer='uniform', input_length=Tx , trainable=True )(Xi)
s0 = Input(shape=(n_s,), name='s0')
c0 = Input(shape=(n_s,), name='c0')
s = s0
c = c0

有很多错误

runfile('D:/Script/Python/NLP/chatbotSeq2SeqWithAtt/ChatBot/nmt_code_mod.py', wdir='D:/Script/Python/NLP/chatbotSeq2SeqWithAtt/ChatBot')
Reloaded modules: nmt_data_load_asmain_words, import_corpus_mod, nmt_special_utils_mod
100%|██████████| 384/384 [00:00<00:00, 24615.06it/s]
100%|██████████| 384/384 [00:00<?, ?it/s]
X.shape: (384, 8)
Y.shape: (384, 8)
D:\Python\Anaconda3\lib\site-packages\keras\engine\topology.py:1592: UserWarning: Model inputs must come from a Keras Input layer, they cannot be the output of a previous non-Input layer. Here, a tensor specified as input to "model_2" was not an Input tensor, it was generated by layer embedding_1.
Note that input tensors are instantiated via `tensor = Input(shape)`.
The tensor that caused the issue was: embedding_1/Gather:0
  str(x.name))
Traceback (most recent call last):

  File "<ipython-input-44-addb6f9e6bc1>", line 1, in <module>
    runfile('D:/Script/Python/NLP/chatbotSeq2SeqWithAtt/ChatBot/nmt_code_mod.py', wdir='D:/Script/Python/NLP/chatbotSeq2SeqWithAtt/ChatBot')

  File "D:\Python\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
    execfile(filename, namespace)

  File "D:\Python\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "D:/Script/Python/NLP/chatbotSeq2SeqWithAtt/ChatBot/nmt_code_mod.py", line 138, in <module>
    model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))

  File "D:/Script/Python/NLP/chatbotSeq2SeqWithAtt/ChatBot/nmt_code_mod.py", line 132, in model
    model = Model(inputs=[X,s0,c0],outputs=outputs)

  File "D:\Python\Anaconda3\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)

  File "D:\Python\Anaconda3\lib\site-packages\keras\engine\topology.py", line 1652, in __init__
    layer.__class__.__name__))

TypeError: Input layers to a `Model` must be `InputLayer` objects. Received inputs: [<tf.Tensor 'embedding_1/Gather:0' shape=(?, 8, 100) dtype=float32>, <tf.Tensor 's0_1:0' shape=(?, 64) dtype=float32>, <tf.Tensor 'c0_1:0' shape=(?, 64) dtype=float32>]. Input 0 (0-based) originates from layer type `Embedding`

因此,在此处恢复 nmt_code_mod.py 和 nmt_special_utils_mod.py 的代码


问题不在于one-hot编码,而在于将整个数据集存储在内存中。明智的选择是发电机,或者Sequence https://keras.io/utils/#sequence这将允许您动态加载和编码数据。例如,这通常用于大型图像数据集。

我建议执行所有预处理并保存输入、输出对,而不编码为 csv 文件,然后您可以创建一个延迟加载和编码的生成器:

class MySequence(Sequence):
  def __init__(self, data, batch_size):
    self.data_file = data
    self.batch_size = batch_size

  def __len__(self):
     return int(np.ceil(len(self.x) / float(self.batch_size)))

  def __getitem__(self, batch_id):
    # Get corresponding batch data...
    # one-hot encode
    return X, Y

请注意,生成器(或 Sequence[i])返回单个批次。

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

Keras One Hot 编码内存管理 - 最好的出路 的相关文章

  • 将列名与第一行的数据连接起来,Python 3.6 Dataframe

    我想将数据框第一行的数据添加到其列名称并删除第一行 源数据框 2013K2 2013K3 2013K4 2013K5 ABC1 ABC2 ABC3 ABC4 324 5435 543 543 6543 543 657 765 765 876
  • 是否可以将 ppm 文件从 p3 转换为 p6 或使用pillow lib 打开 ppm p3 文件

    是否可以将 ppm 文件从 p3 转换为 p6 或使用pillow lib 打开并读取 ppm p3 文件 我实际上有 ppm p3 文件 并尝试使用pillow lib 将它们转换为 jpg 但不幸的是 它不读取 p3 文件 只读取 p6
  • Python中非常大的整数的math.pow是错误的[重复]

    这个问题在这里已经有答案了 我试图通过计算一个整数的非常大的幂来打印一个非常大的数字 尽管我的代码是正确的 但我没有观察到所需的输出 一般来说 Python解释器可以打印系统内存支持的非常大的整数 考虑到这个假设 下面是我正在运行的代码 a
  • 有什么理由不在Python中混合使用多处理和线程模块

    我正在考虑使用Python来实现一个需要大量多线程的程序 另一个要求是它将在桌面上运行 因此拥有许多进程将使应用程序显得混乱且难以杀死 在任务管理器中 因此 我正在考虑使用线程和多处理模块来减少进程数量 据我了解 GIL 仅适用于单个进程
  • 为什么函数会修改列表以及如何防止它发生?

    我正在 Python 3 7 x 中调用一个函数并向其传递一个列表 我愿意not希望修改列表 在函数内部 我复制了列表并对其进行修改 函数完成后 传递给函数的原始列表已被修改 为什么会发生这种情况 我该如何预防 这是代码 def appen
  • 如何使用授权 API 设置部分身份验证

    好的 我正在通过 Authorize net API 设置部分付款 以便能够使用多张卡支付单笔余额 费用 我假设他们的部分身份验证功能涵盖了我的用例 但在测试中 我可以在此处使用 API 实时控制台向您展示一个问题 https develo
  • Keras:嵌入/向量的附加层?

    我有 3 个词嵌入 嵌入 1 w11 w12 w13 w14 嵌入 2 w21 w22 w23 w24 嵌入 3 w31 w32 w33 w34 有没有办法通过添加所有三个向量来获得第四个嵌入 并使用所有向量的可训练权重 例如 嵌入 4 w
  • TypeError:无法在 re.findall() 中的类似字节的对象上使用字符串模式

    我正在尝试学习如何自动从页面获取网址 在下面的代码中 我试图获取网页的标题 import urllib request import re url http www google com regex r pattern re compile
  • PyPDF2 复制后返回空白 PDF

    def EncryptPDFFiles password directory pdfFiles success 0 Get all PDF files from a directory for folderName subFolders f
  • 如何在Python中比较枚举?

    从 Python 3 4 开始 Enum类存在 我正在编写一个程序 其中一些常量具有特定的顺序 我想知道哪种方式最适合比较它们 class Information Enum ValueOnly 0 FirstDerivative 1 Sec
  • 哪些 2to3 修复程序输出有效的 Python 2 代码?

    2to3 是一个 Python 程序 它读取 Python 2 x 源代码并应用一系列修复程序将其转换为有效的 Python 3 x 代码 考虑一下列出的四十个修复者https docs python org 3 library 2to3
  • 通过服务器端的服务帐户使用 gmail api,避免使用 OAUTH2 GUI

    我有一个用 python 开发的应用程序 它使用 SMTP 服务连接到 gmail 帐户 这种类型的连接典型为 访问不安全的应用程序 不太安全的应用程序 https myaccount google com lesssecureapps 为
  • 通过一个正则表达式以任意顺序匹配多个单词

    如标题中所述 我想要正则表达式 它将根据我的 查询 按顺序给出结果 line VERSION OTHER POWER LOW FREQ OFF MAXTUN BLER 示例1 re findall r FREQ VERSION line r
  • 获取每个训练实例的损失值 - Keras

    我想获得每个实例的损失值作为模型训练 history model fit 例如 上面的代码返回每个时期的损失值 而不是小批量或实例 做这个的最好方式是什么 有什么建议么 在这个 keras 官方文档页面的末尾 正是您要寻找的内容https
  • 使用 python3 和请求登录 Twitter

    我正在开发一个项目 要求使用用户名和密码登录网站 我必须在 python 中执行此操作 然后才能访问只有登录人员才能访问的网站部分 我尝试了几种编码变体来执行此操作 但无法成功登录然而 这是我的编码 登录它的功能 def 会话2 url r
  • Django Rest Framework 完整性错误捕获

    在 Django Rest Framework 中 我使用了序列化器 视图集和路由器方法 每当我在 django Rest 框架的 API 视图中发布故意错误时 它都会抛出完整性错误 有没有办法尝试捕获错误 例如如果数据中没有错误 则继续保
  • numpy.nan_to_num - 'nan' 关键字无法识别

    当您尝试使用以下代码将 nan 值替换为自定义数值时 np nan to num exp allowance nan 9999 99 它产生以下错误 typeerror nan to num got an unexpected keywor
  • FutureWarning:使用非元组序列进行多维索引

    我收到的警告是 C Users el Anaconda3 envs Py3 lib site packages scipy io matlab miobase py 414 FutureWarning 使用非元组序列进行多维 不推荐使用索引
  • 向 Python 函数添加属性的最佳方法

    以计算数学函数的 Python 函数为例 def func x a b c Return the value of the quadratic function ax 2 bx c return a x 2 b x c 假设我想以函数属性的
  • 在嵌入中附加文件 (Discord.py)

    我目前正在编写一个不和谐的机器人discord py Rewrite我想将图像附加到嵌入中 但我无法弄清楚 import discord from discord ext import commands from discord impor

随机推荐

  • 有什么跨平台方法可以从标头构建 cpp 骨架吗? [复制]

    这个问题在这里已经有答案了 我厌倦了将标头复制粘贴到我的 cpp 文件中 然后对其进行修改 直到其格式正确 有没有人编写过一个程序来读取头文件并制作相应的cpp框架 我需要一些跨平台的东西或者至少可以在 Linux 上运行的东西 vim 插
  • 使用全名搜索时,Gem Ransack 不会返回任何结果

    我正在使用 Ransack 和 Rails 3 我的看法 p class button p 我的架构 create table users force gt true do t t string first name
  • 从 SQL Server 中的表中选择 XML

    我的表中存储有 XML 该表具有 int 类型的列 id 和 XML 类型的值 我正在使用 SQL Server 2012 XML 看起来像这样
  • IPython %timeit 魔法的 -n 和 -r 参数

    我想使用以下方法对代码块进行计时timeitJupyter 笔记本中的魔法命令 根据文档 timeit需要几个参数 其中两个特别控制循环次数和重复次数 我不清楚这两个论点之间的区别 例如 import numpy N 1000000 v n
  • 如何检查列表中的任何单词是否包含部分字符串?

    var list alist Contains somestring 这匹配整个字符串 如何查看列表中的任何单词是否具有匹配 somestring 的子字符串 您可以使用可枚举 任意 http msdn microsoft com en u
  • gitosis 在哪里存储工作副本?

    我想知道 gitosis 在哪里存储我的存储库的工作副本 我可以通过 gitweb 界面看到文件树 但在 srv gitosis repositories testrepo 中找不到任何工作副本 我还能够推送和克隆到存储库 但是 在不知道工
  • 是否可以在 Delphi 可执行应用程序中嵌入并运行 exe 文件?

    资源文件 RES 接受任何类型的二进制文件 但如果它是 exe 文件 我该如何运行它 您必须将其作为文件提取到磁盘并执行它 尽管您不必将其提取到磁盘 正如 Cosmin Prund 在评论中所说 但如果您不这样做 则需要大量的艰苦工作
  • 如何构造一个不区分大小写的枚举?

    我有一个简单的 Python 2 7 枚举 from enum import Enum class Label enum RedApple 1 GreenApple 2 我希望能够使用不区分大小写的键创建枚举对象 fruitname red
  • 如何将命令行参数转换为 int?

    我需要获取一个参数并将其转换为 int 到目前为止 这是我的代码 include
  • R:从字符串定义函数

    我想定义一个函数f来自参数和表达式character从 a 读取的字符串 csv文件 这个功能f有以下表达式 f function parameters expression 其中参数是 n 个参数的列表 表达式是这些参数的函数 例如 参数
  • argv[0] 可以包含空字符串吗?

    在任何 C 程序中 命令行参数argv 0 指向用于调用程序的名称 有没有什么情况会指向空字符串 这种情况的示例代码片段将是一个很好的参考 它是实现定义的 5 1 2 2 1 删节 如果值argc大于零 数组成员argv 0 通过argv
  • 用于构建小型公司演示网站的简单 CMS

    我正在寻找一个免费 简单且高效的 CMS 来为一家小公司构建网站 先决条件是 该网站只不过是一个演示文稿 包含信息丰富的内容和画廊 该网站应该包含一个具有 js ajax 风格的漂亮画廊 目前没有更多计划 但如果 CMS 能够提供一些更通用
  • 防止在 DateTime 值反序列化时进行时区转换

    我有一个使用序列化 反序列化的类XmlSerializer 这个类包含一个DateTime field 当序列化时 DateTime字段由包含 GMT 偏移量的字符串表示 例如2010 05 05T09 13 45 05 00 反序列化时
  • OWIN OAuthAuthorizationServerProvider 中的 context.Request.User 为 null

    我正在尝试使用 OWIN 为本地 Intranet 上的 Web API v2 端点实现 OAuth 该 API 使用内置 Windows 身份验证托管在 IIS 中 简而言之 这就是我想要发生的事情 当我在 token 索要我的令牌时 将
  • Ruby 中类名前有双冒号?

    我在 Rails 中看到很多在类名之前使用双冒号的情况 例如 require File expand path config environment FILE 我知道什么Module Class Constant意思是 但是 Class 这
  • 使用 Jquery 获取当前 URL

    我对 javascript 和 jquery 很陌生 getJSON idcheck php callback url i want full url to be print function json alert json message
  • 我收到警告:控制到达非 void 函数的末尾

    我的程序似乎在 Visual Studio 中运行良好 但是当我在 GCC 中运行它时 它给了我一个编译错误 称为 Book cpp In member function sdds Book sdds Book addChapter con
  • WAS 7 的异常 java.util.zip.ZipFile.ensureOpenOrZipException

    当我部署应用程序时WebSphere Application Server 7 0 我遇到了异常 并且部署花费了太多时间 经过几次部署后 服务器开始生成堆转储并耗尽内存 在我们的应用程序中我们使用JAX WS用于生成 Web 服务客户端 我
  • 顺序评估依赖属性绑定?

    什么决定了同一控件上的多个 DepdencyProperties 的求值顺序 我正在使用扩展 WPF 工具包 http wpftoolkit codeplex com PropertyGrid 并绑定了 SelectedObject 和 P
  • Keras One Hot 编码内存管理 - 最好的出路

    我知道这个问题已经以不同的方式得到了解答past https stackoverflow com questions 41058780 python one hot encoding for huge data 但我无法弄清楚并适合我的代码