在处理医学图像时,常常会遇到以Dicom格式保存的医学图像,如CT、MRI等。Dicom文件是需要专门的软件或者通过编程,应用相应的库进行处理。为了能够更好地服务下游任务,例如分割或检测腹腔CT图像中某个病灶组织,需要先将Dicom图像进行读取,脱敏,调窗等步骤,以便于后续的编辑。本文使用python对医学Dicom文件进行相应的处理,相比于封装好的软件,笔者认为自己动手的可操作性更强。
1 导入相应的包
2 读取Dicom图像数据
3 设置CT图像的窗宽和窗位
4 获取Dicom图像的tag信息
5 结果保存及可视化
# load necessary packagesimport matplotlib.pyplot as pltimport pydicom.uidimport sysfrom PyQt5 import QtGuiimport osimport pydicomimport globfrom PIL import *import matplotlib.pyplot as pltfrom pylab import *from tkinter.filedialog import *import PIL.Image as Image
其核心是使用了python中的pydicom库来处理dicom文件。
have_numpy = Truetry: import numpyexcept ImportError: have_numpy = False raisesys_is_little_endian = (sys.byteorder == 'little')NumpySupportedTransferSyntaxes = [ pydicom.uid.ExplicitVRLittleEndian, pydicom.uid.ImplicitVRLittleEndian, pydicom.uid.DeflatedExplicitVRLittleEndian, pydicom.uid.ExplicitVRBigEndian,]# 支持"传输"语法def supports_transfer_syntax(dicom_dataset): return (dicom_dataset.file_meta.TransferSyntaxUID in NumpySupportedTransferSyntaxes)def needs_to_convert_to_RGB(dicom_dataset): return Falsedef should_change_PhotometricInterpretation_to_RGB(dicom_dataset): return False# 加载 Dicom图像def get_pixeldata(dicom_dataset): """If NumPy is available, return an ndarray of the Pixel Data. Raises ------ TypeError If there is no Pixel Data or not a supported data type. ImportError If NumPy isn't found NotImplementedError if the transfer syntax is not supported AttributeError if the decoded amount of data does not match the expected amount Returns ------- numpy.ndarray The contents of the Pixel Data element (7FE0,0010) as an ndarray. """ if (dicom_dataset.file_meta.TransferSyntaxUID not in NumpySupportedTransferSyntaxes): raise NotImplementedError("Pixel Data is compressed in a " "format pydicom does not yet handle. " "Cannot return array. Pydicom might " "be able to convert the pixel data " "using GDCM if it is installed.") if not have_numpy: msg = ("The Numpy package is required to use pixel_array, and " "numpy could not be imported.") raise ImportError(msg) if 'PixelData' not in dicom_dataset: raise TypeError("No pixel data found in this dataset.") # Make NumPy format code, e.g. "uint16", "int32" etc # from two pieces of info: # dicom_dataset.PixelRepresentation -- 0 for unsigned, 1 for signed; # dicom_dataset.BitsAllocated -- 8, 16, or 32 if dicom_dataset.BitsAllocated == 1: # single bits are used for representation of binary data format_str = 'uint8' elif dicom_dataset.PixelRepresentation == 0: format_str = 'uint{}'.format(dicom_dataset.BitsAllocated) elif dicom_dataset.PixelRepresentation == 1: format_str = 'int{}'.format(dicom_dataset.BitsAllocated) else: format_str = 'bad_pixel_representation' try: numpy_dtype = numpy.dtype(format_str) except TypeError: msg = ("Data type not understood by NumPy: " "format='{}', PixelRepresentation={}, " "BitsAllocated={}".format( format_str, dicom_dataset.PixelRepresentation, dicom_dataset.BitsAllocated)) raise TypeError(msg) if dicom_dataset.is_little_endian != sys_is_little_endian: numpy_dtype = numpy_dtype.newbyteorder('S') pixel_bytearray = dicom_dataset.PixelData if dicom_dataset.BitsAllocated == 1: # if single bits are used for binary representation, a uint8 array # has to be converted to a binary-valued array (that is 8 times bigger) try: pixel_array = numpy.unpackbits( numpy.frombuffer(pixel_bytearray, dtype='uint8')) except NotImplementedError: # PyPy2 does not implement numpy.unpackbits raise NotImplementedError( 'Cannot handle BitsAllocated == 1 on this platform') else: pixel_array = numpy.frombuffer(pixel_bytearray, dtype=numpy_dtype) length_of_pixel_array = pixel_array.nbytes expected_length = dicom_dataset.Rows * dicom_dataset.Columns if ('NumberOfFrames' in dicom_dataset and dicom_dataset.NumberOfFrames > 1): expected_length *= dicom_dataset.NumberOfFrames if ('SamplesPerPixel' in dicom_dataset and dicom_dataset.SamplesPerPixel > 1): expected_length *= dicom_dataset.SamplesPerPixel if dicom_dataset.BitsAllocated > 8: expected_length *= (dicom_dataset.BitsAllocated // 8) padded_length = expected_length if expected_length & 1: padded_length += 1 if length_of_pixel_array != padded_length: raise AttributeError( "Amount of pixel data %d does not " "match the expected data %d" % (length_of_pixel_array, padded_length)) if expected_length != padded_length: pixel_array = pixel_array[:expected_length] if should_change_PhotometricInterpretation_to_RGB(dicom_dataset): dicom_dataset.PhotometricInterpretation = "RGB" if dicom_dataset.Modality.lower().find('ct') >= 0: # CT图像需要得到其CT值图像 pixel_array = pixel_array * dicom_dataset.RescaleSlope + dicom_dataset.RescaleIntercept # 获得图像的CT值 pixel_array = pixel_array.reshape(dicom_dataset.Rows, dicom_dataset.Columns*dicom_dataset.SamplesPerPixel) return pixel_array, dicom_dataset.Rows, dicom_dataset.Columns
读取到dicom文件中的数据后,实质上是几个图像矩阵,这个过程同时也处理了“脱敏”问题。
def setDicomWinWidthWinCenter(img_data, winwidth, wincenter, rows, cols): img_temp = img_data img_temp.flags.writeable = True min = (2 * wincenter - winwidth) / 2.0 + 0.5 max = (2 * wincenter + winwidth) / 2.0 + 0.5 dFactor = 255.0 / (max - min) for i in numpy.arange(rows): for j in numpy.arange(cols): img_temp[i, j] = int((img_temp[i, j]-min)*dFactor) min_index = img_temp < min img_temp[min_index] = 0 max_index = img_temp > max img_temp[max_index] = 255 return img_temp
该函数的输入变量winwidth和wincenter即为需要设置的窗宽和窗位,这两个值根据研究的问题(不同的组织器官对应不同的窗宽和窗位,有时候也要根据图像效果进行一定的调整)调整不同的值。网上有很多关于相关的窗位和窗宽对应值,这里给出一些参考资料,如果遇到不确定的,最好借鉴查阅相应领域的论文。
点击链接:https://radiopaedia.org/articles/windowing-ct
def loadFileInformation(filename): information = {} ds = pydicom.read_file(filename) information['PatientID'] = ds.PatientID information['PatientName'] = ds.PatientName information['PatientBirthDate'] = ds.PatientBirthDate information['PatientSex'] = ds.PatientSex information['StudyID'] = ds.StudyID information['StudyDate'] = ds.StudyDate information['StudyTime'] = ds.StudyTime information['InstitutionName'] = ds.InstitutionName information['Manufacturer'] = ds.Manufacturer print(dir(ds)) print(type(information)) return information
这个步骤视需求而定,如果不需要查看dicom文件的具体tag信息,此步骤可以跳过。
可以单张保存,或者批量处理。
读取单张dicom文件
def main_single(): dcm = dicom.read_file('81228816') # load dicom_file # 得到 CT 值,图像的 长, 宽 pixel_array, dcm.Rows, dcm.Columns = get_pixeldata(dcm) # 调整窗位、窗宽 img_data = pixel_array winwidth = 500 wincenter = 50 rows = dcm.Rows cols = dcm.Columns dcm_temp = setDicomWinWidthWinCenter(img_data, winwidth, wincenter, rows, cols) # 可视化 dcm_img = Image.fromarray(dcm_temp) # 将Numpy转换为PIL.Image dcm_img = dcm_img.convert('L') # plt.imshow(img, cmap=plt.cm.bone) # 保存为jpg文件,用作后面的生成label用 dcm_img.save('../output/temp.jpg') # 显示图像 dcm_img.show()
同时读取一个文件夹中的 dicom 文件,并处理保存 (写成循环即可)
defmain_mulit(path): names = os.listdir(path) # 读取文件夹中的所有文件名for i inrange(len(names)): dicom_name = path+names[i] dcm = pydicom.read_file(dicom_name) # 读取 dicom 文件 pixel_array, dcm.Rows, dcm.Columns = get_pixeldata(dcm) # 得到 dicom文件的 CT 值 img_data = pixel_array winwidth =500 wincenter =50 rows = dcm.Rows cols = dcm.Columns dcm_temp = setDicomWinWidthWinCenter(img_data, winwidth, wincenter, rows, cols) # 调整窗位、窗宽# 可视化 dcm_img = Image.fromarray(dcm_temp) # 将 Numpy转换为 PIL.Image dcm_img = dcm_img.convert('L')# 批量保存 dcm_img.save("C:/output/%s_%s.png"% (path1, names[i]))
注意,以上都写成函数的形式,运行时需要调用,并注意文件路径的修改。
单张dicom文件处理
main_single()
批量处理
main_mulit(path)
欢迎关注,不时更新数码科技技术和学术前沿。