如何在python的频谱中编码图像? [英] How to encode an image in frequency spectrum at python?

查看:83
本文介绍了如何在python的频谱中编码图像?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我在将图像编码到某个.wav文件的频谱中时遇到问题,因此结果看起来类似于:

或者如果您不想下载SOX,则可以使用以下程序来创建图像声波文件的频谱图.

 #!/usr/bin/env python#coding:utf-8"本作品已根据知识共享署名3.0未移植许可证获得许可.弗兰克·扎尔科夫(Frank Zalkow),2012-2013年"导入numpy为np从matplotlib导入pyplot作为plt将scipy.io.wav文件导入为wav从numpy.lib导入stride_tricks"音频信号的短时傅立叶变换""def stft(sig,frameSize,overlayFac = 0.5,window = np.hanning):赢=窗口(frameSize)hopSize = int(frameSize-np.floor(overlapFac * frameSize))#在开始处为零(因此第一个窗口的中心应为样本nr.0)样本= np.append(np.zeros(np.floor(frameSize/2.0)),sig)#列开窗cols = np.ceil((len(samples)-frameSize)/float(hopSize))+ 1#末尾为零(因此样本可以完全被帧覆盖)样本= np.append(样本,np.zeros(frameSize))frame = stride_tricks.as_strided(samples,shape =(cols,frameSize),strides =(samples.strides [0] * hopSize,samples.strides [0])).copy()帧数* =获胜返回np.fft.rfft(帧)"对数刻度频率轴""def logscale_spec(spec,sr = 44100,factor = 20.):timebins,freqbins = np.shape(spec)标度= np.linspace(0,1,freqbins)**因子比例* =(freqbins-1)/max(比例)标尺= np.unique(np.round(标尺))#使用新的频率框创建频谱图newspec = np.complex128(np.zeros([timebins,len(scale)]))对于范围(0,len(scale))中的i:如果i == len(scale)-1:newspec [:,i] = np.sum(spec [:,scale [i]:],axis = 1)别的:newspec [:,i] = np.sum(spec [:,scale [i]:scale [i + 1]],axis = 1)#列出垃圾箱的中心频率allfreqs = np.abs(np.fft.fftfreq(freqbins * 2,1./sr)[:freqbins+1])频率= []对于范围(0,len(scale))中的i:如果i == len(scale)-1:频率+ = [np.mean(allfreqs [scale [i]:])]别的:freqs + = [np.mean(allfreqs [scale [i]:scale [i + 1]])]返回newspec,freqs"频谱图""def plotstft(audiopath,binsize = 2 ** 10,plotpath = None,colormap ="jet"):采样率,采样= wav.read(audiopath)s = stft(样本,binsize)sshow,freq = logscale_spec(s,factor = 1.0,sr = samplerate)ims = 20. * np.log10(np.abs(sshow)/10e-6)#振幅到分贝时间轴,频率轴= np.shape(ims)plt.figure(figsize =(15,7.5))plt.imshow(np.transpose(ims),origin ="lower",aspect ="auto",cmap = colormap,插值="none")plt.colorbar()plt.xlabel(时间(秒)")plt.ylabel(频率(hz)")plt.xlim([0,timebins-1])plt.ylim([0,freqbins])xlocs = np.float32(np.linspace(0,timebins-1,5))plt.xticks(xlocs,[%.02f"%l对于l((xlocs * len(samples)/timebins +(0.5 * binsize))/samplerate]ylocs = np.int16(np.round(np.linspace(0,freqbins-1,10)))plt.yticks(ylocs,[ylocs中i的[%.02f"%freq [i]])如果plotpath:plt.savefig(plotpath,bbox_inches ="tight")别的:plt.show()plt.clf()plotstft("spectrogram.png.wav")# 

图像光谱图如下:

I have a problem with encoding an image into a spectrum of some .wav file, so that the outcome look similar to this: http://www.bastwood.com/projects/aphex_face/aphex.png

I am just getting started with programming, so I'm searching for quite easy to understand solution.

Someone can help?

解决方案

In order to encode an image into spectrum of wave , you may use below program downloaded from GitHub. Spectrogram python code converts image into an audio wave file.

#!/usr/bin/python
import numpy as np
import matplotlib.image as mpimg
import wave
from array import array


def make_wav(image_filename):
    """ Make a WAV file having a spectrogram resembling an image """
    # Load image
    image = mpimg.imread(image_filename)
    image = np.sum(image, axis = 2).T[:, ::-1]
    image = image**3 # ???
    w, h = image.shape

    # Fourier transform, normalize, remove DC bias
    data = np.fft.irfft(image, h*2, axis=1).reshape((w*h*2))
    data -= np.average(data)
    data *= (2**15-1.)/np.amax(data)
    data = array("h", np.int_(data)).tostring()

    # Write to disk
    output_file = wave.open(image_filename+".wav", "w")
    output_file.setparams((1, 2, 44100, 0, "NONE", "not compressed"))
    output_file.writeframes(data)
    output_file.close()
    print "Wrote %s.wav" % image_filename


if __name__ == "__main__":

    my_image = "spectrogram.png"
    make_wav(my_image)

In order to display the wave file as a spectrogram, you have couple of choices. Depending on your platform, you can download sox and run

sox <yourImage>.jpg.wav -n spectrogram

SOX , short for sound exchange will then convert the audio wave file of image into an image Spectrogram.

Or If you don't want to download SOX, you can use following program to create a Spectrogram of image audio wave file.

#!/usr/bin/env python
#coding: utf-8
""" This work is licensed under a Creative Commons Attribution 3.0 Unported License.
    Frank Zalkow, 2012-2013 """

import numpy as np
from matplotlib import pyplot as plt
import scipy.io.wavfile as wav
from numpy.lib import stride_tricks

""" short time fourier transform of audio signal """
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
    win = window(frameSize)
    hopSize = int(frameSize - np.floor(overlapFac * frameSize))

    # zeros at beginning (thus center of 1st window should be for sample nr. 0)
    samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)    
    # cols for windowing
    cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
    # zeros at end (thus samples can be fully covered by frames)
    samples = np.append(samples, np.zeros(frameSize))

    frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
    frames *= win

    return np.fft.rfft(frames)    

""" scale frequency axis logarithmically """    
def logscale_spec(spec, sr=44100, factor=20.):
    timebins, freqbins = np.shape(spec)

    scale = np.linspace(0, 1, freqbins) ** factor
    scale *= (freqbins-1)/max(scale)
    scale = np.unique(np.round(scale))

    # create spectrogram with new freq bins
    newspec = np.complex128(np.zeros([timebins, len(scale)]))
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1)
        else:        
            newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1)

    # list center freq of bins
    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
    freqs = []
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            freqs += [np.mean(allfreqs[scale[i]:])]
        else:
            freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]
    return newspec, freqs

""" plot spectrogram"""
def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
    samplerate, samples = wav.read(audiopath)
    s = stft(samples, binsize)

    sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
    ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel

    timebins, freqbins = np.shape(ims)

    plt.figure(figsize=(15, 7.5))
    plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none")
    plt.colorbar()

    plt.xlabel("time (s)")
    plt.ylabel("frequency (hz)")
    plt.xlim([0, timebins-1])
    plt.ylim([0, freqbins])

    xlocs = np.float32(np.linspace(0, timebins-1, 5))
    plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate])
    ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))
    plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])

    if plotpath:
        plt.savefig(plotpath, bbox_inches="tight")
    else:
        plt.show()

    plt.clf()

plotstft("spectrogram.png.wav")
#

Image Spectrogram is as below:

这篇关于如何在python的频谱中编码图像?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆